haystack/test/document_stores/test_sql.py

import logging

import pytest

from haystack.document_stores.sql import SQLDocumentStore
from haystack.schema import Document

from .test_base import DocumentStoreBaseTestAbstract


class TestSQLDocumentStore(DocumentStoreBaseTestAbstract):
    # Constants

    index_name = __name__

    @pytest.fixture
    def ds(self, tmp_path):
        db_url = f"sqlite:///{tmp_path}/haystack_test.db"
        return SQLDocumentStore(url=db_url, index=self.index_name, isolation_level="AUTOCOMMIT")

    @pytest.mark.integration
    def test_delete_index(self, ds, documents):
        """Contrary to other Document Stores, SQLDocumentStore doesn't raise if the index is empty"""
        ds.write_documents(documents, index="custom_index")
        assert ds.get_document_count(index="custom_index") == len(documents)
        ds.delete_index(index="custom_index")
        assert ds.get_document_count(index="custom_index") == 0

    @pytest.mark.integration
    def test_sql_write_different_documents_same_vector_id(self, ds):
        doc1 = {"content": "content 1", "name": "doc1", "id": "1", "vector_id": "vector_id"}
        doc2 = {"content": "content 2", "name": "doc2", "id": "2", "vector_id": "vector_id"}

        ds.write_documents([doc1], index="index1")
        documents_in_index1 = ds.get_all_documents(index="index1")
        assert len(documents_in_index1) == 1
        ds.write_documents([doc2], index="index2")
        documents_in_index2 = ds.get_all_documents(index="index2")
        assert len(documents_in_index2) == 1

        ds.write_documents([doc1], index="index3")
        with pytest.raises(Exception, match=r"(?i)unique"):
            ds.write_documents([doc2], index="index3")

    @pytest.mark.integration
    def test_sql_get_documents_using_nested_filters_about_classification(self, ds):
        documents = [
            Document(
                content="That's good. I like it.",
                id="1",
                meta={
                    "classification": {
                        "label": "LABEL_1",
                        "score": 0.694,
                        "details": {"LABEL_1": 0.694, "LABEL_0": 0.306},
                    }
                },
            ),
            Document(
                content="That's bad. I don't like it.",
                id="2",
                meta={
                    "classification": {
                        "label": "LABEL_0",
                        "score": 0.898,
                        "details": {"LABEL_0": 0.898, "LABEL_1": 0.102},
                    }
                },
            ),
        ]
        ds.write_documents(documents)

        assert ds.get_document_count() == 2
        assert len(ds.get_all_documents(filters={"classification.score": {"$gt": 0.1}})) == 2
        assert len(ds.get_all_documents(filters={"classification.label": ["LABEL_1", "LABEL_0"]})) == 2
        assert len(ds.get_all_documents(filters={"classification.score": {"$gt": 0.8}})) == 1
        assert len(ds.get_all_documents(filters={"classification.label": ["LABEL_1"]})) == 1
        assert len(ds.get_all_documents(filters={"classification.score": {"$gt": 0.95}})) == 0
        assert len(ds.get_all_documents(filters={"classification.label": ["LABEL_100"]})) == 0

    # NOTE: the SQLDocumentStore marshals metadata values with JSON so querying
    # using filters doesn't always work. While this should be considered a bug,
    # the relative tests are either customized or skipped while we work on a fix.

    @pytest.mark.integration
    def test_ne_filters(self, ds, caplog):
        with caplog.at_level(logging.WARNING):
            ds.get_all_documents(filters={"year": {"$ne": "2020"}})
            assert "filters won't work on metadata fields" in caplog.text

    @pytest.mark.skip
    @pytest.mark.integration
    def test_nin_filters(self, ds, documents):
        pass

    @pytest.mark.skip
    @pytest.mark.integration
    def test_comparison_filters(self, ds, documents):
        pass

    @pytest.mark.skip
    @pytest.mark.integration
    def test_nested_condition_filters(self, ds, documents):
        pass

    @pytest.mark.skip
    @pytest.mark.integration
    def test_nested_condition_not_filters(self, ds, documents):
        pass

    @pytest.mark.skip(reason="labels metadata are not supported")
    @pytest.mark.integration
    def test_delete_labels_by_filter(self, ds, labels):
        pass

    @pytest.mark.skip(reason="labels metadata are not supported")
    @pytest.mark.integration
    def test_delete_labels_by_filter_id(self, ds, labels):
        pass

    @pytest.mark.skip(reason="labels metadata are not supported")
    @pytest.mark.integration
    def test_multilabel_filter_aggregations(self):
        pass

    @pytest.mark.skip(reason="labels metadata are not supported")
    @pytest.mark.integration
    def test_multilabel_meta_aggregations(self):
        pass