mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-31 01:39:45 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			180 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			180 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import json
 | |
| import logging
 | |
| 
 | |
| import pytest
 | |
| 
 | |
| from haystack.document_stores.sql import LabelORM, SQLDocumentStore
 | |
| from haystack.schema import Document
 | |
| from haystack.testing import DocumentStoreBaseTestAbstract
 | |
| 
 | |
| 
 | |
| class TestSQLDocumentStore(DocumentStoreBaseTestAbstract):
 | |
|     # Constants
 | |
| 
 | |
|     index_name = __name__
 | |
| 
 | |
|     @pytest.fixture
 | |
|     def ds(self, tmp_path):
 | |
|         db_url = f"sqlite:///{tmp_path}/haystack_test.db"
 | |
|         return SQLDocumentStore(url=db_url, index=self.index_name, isolation_level="AUTOCOMMIT")
 | |
| 
 | |
|     @pytest.mark.integration
 | |
|     def test_delete_index(self, ds, documents):
 | |
|         """Contrary to other Document Stores, SQLDocumentStore doesn't raise if the index is empty"""
 | |
|         ds.write_documents(documents, index="custom_index")
 | |
|         assert ds.get_document_count(index="custom_index") == len(documents)
 | |
|         ds.delete_index(index="custom_index")
 | |
|         assert ds.get_document_count(index="custom_index") == 0
 | |
| 
 | |
|     @pytest.mark.integration
 | |
|     def test_sql_write_different_documents_same_vector_id(self, ds):
 | |
|         doc1 = {"content": "content 1", "name": "doc1", "id": "1", "vector_id": "vector_id"}
 | |
|         doc2 = {"content": "content 2", "name": "doc2", "id": "2", "vector_id": "vector_id"}
 | |
| 
 | |
|         ds.write_documents([doc1], index="index1")
 | |
|         documents_in_index1 = ds.get_all_documents(index="index1")
 | |
|         assert len(documents_in_index1) == 1
 | |
|         ds.write_documents([doc2], index="index2")
 | |
|         documents_in_index2 = ds.get_all_documents(index="index2")
 | |
|         assert len(documents_in_index2) == 1
 | |
| 
 | |
|         ds.write_documents([doc1], index="index3")
 | |
|         with pytest.raises(Exception, match=r"(?i)unique"):
 | |
|             ds.write_documents([doc2], index="index3")
 | |
| 
 | |
|     @pytest.mark.integration
 | |
|     def test_sql_get_documents_using_nested_filters_about_classification(self, ds):
 | |
|         documents = [
 | |
|             Document(
 | |
|                 content="That's good. I like it.",
 | |
|                 id="1",
 | |
|                 meta={
 | |
|                     "classification": {
 | |
|                         "label": "LABEL_1",
 | |
|                         "score": 0.694,
 | |
|                         "details": {"LABEL_1": 0.694, "LABEL_0": 0.306},
 | |
|                     }
 | |
|                 },
 | |
|             ),
 | |
|             Document(
 | |
|                 content="That's bad. I don't like it.",
 | |
|                 id="2",
 | |
|                 meta={
 | |
|                     "classification": {
 | |
|                         "label": "LABEL_0",
 | |
|                         "score": 0.898,
 | |
|                         "details": {"LABEL_0": 0.898, "LABEL_1": 0.102},
 | |
|                     }
 | |
|                 },
 | |
|             ),
 | |
|         ]
 | |
|         ds.write_documents(documents)
 | |
| 
 | |
|         assert ds.get_document_count() == 2
 | |
|         assert len(ds.get_all_documents(filters={"classification.score": {"$gt": 0.1}})) == 2
 | |
|         assert len(ds.get_all_documents(filters={"classification.label": ["LABEL_1", "LABEL_0"]})) == 2
 | |
|         assert len(ds.get_all_documents(filters={"classification.score": {"$gt": 0.8}})) == 1
 | |
|         assert len(ds.get_all_documents(filters={"classification.label": ["LABEL_1"]})) == 1
 | |
|         assert len(ds.get_all_documents(filters={"classification.score": {"$gt": 0.95}})) == 0
 | |
|         assert len(ds.get_all_documents(filters={"classification.label": ["LABEL_100"]})) == 0
 | |
| 
 | |
|     # NOTE: the SQLDocumentStore marshals metadata values with JSON so querying
 | |
|     # using filters doesn't always work. While this should be considered a bug,
 | |
|     # the relative tests are either customized or skipped while we work on a fix.
 | |
| 
 | |
|     @pytest.mark.integration
 | |
|     def test_ne_filters(self, ds, caplog):
 | |
|         with caplog.at_level(logging.WARNING):
 | |
|             ds.get_all_documents(filters={"year": {"$ne": "2020"}})
 | |
|             assert "filters won't work on metadata fields" in caplog.text
 | |
| 
 | |
|     @pytest.mark.integration
 | |
|     def test_get_all_labels_legacy_document_id(self, ds):
 | |
|         ds.session.add(
 | |
|             LabelORM(
 | |
|                 id="123",
 | |
|                 no_answer=False,
 | |
|                 document=json.dumps(
 | |
|                     {
 | |
|                         "content": "Some content",
 | |
|                         "content_type": "text",
 | |
|                         "score": None,
 | |
|                         "id": "fc18c987a8312e72a47fb1524f230bb0",
 | |
|                         "meta": {},
 | |
|                         "embedding": [0.1, 0.2, 0.3],
 | |
|                     }
 | |
|                 ),
 | |
|                 origin="user-feedback",
 | |
|                 query="Who made the PDF specification?",
 | |
|                 is_correct_answer=True,
 | |
|                 is_correct_document=True,
 | |
|                 answer=json.dumps(
 | |
|                     {
 | |
|                         "answer": "Adobe Systems",
 | |
|                         "type": "extractive",
 | |
|                         "context": "Some content",
 | |
|                         "offsets_in_context": [{"start": 60, "end": 73}],
 | |
|                         "offsets_in_document": [{"start": 60, "end": 73}],
 | |
|                         # legacy document_id answer
 | |
|                         "document_id": "fc18c987a8312e72a47fb1524f230bb0",
 | |
|                         "meta": {},
 | |
|                         "score": None,
 | |
|                     }
 | |
|                 ),
 | |
|                 pipeline_id="some-123",
 | |
|                 index=ds.label_index,
 | |
|             )
 | |
|         )
 | |
|         labels = ds.get_all_labels()
 | |
|         assert labels[0].answer.document_ids == ["fc18c987a8312e72a47fb1524f230bb0"]
 | |
| 
 | |
|     @pytest.mark.skip
 | |
|     @pytest.mark.integration
 | |
|     def test_nin_filters(self, ds, documents):
 | |
|         pass
 | |
| 
 | |
|     @pytest.mark.skip
 | |
|     @pytest.mark.integration
 | |
|     def test_comparison_filters(self, ds, documents):
 | |
|         pass
 | |
| 
 | |
|     @pytest.mark.skip
 | |
|     @pytest.mark.integration
 | |
|     def test_nested_condition_filters(self, ds, documents):
 | |
|         pass
 | |
| 
 | |
|     @pytest.mark.skip
 | |
|     @pytest.mark.integration
 | |
|     def test_nested_condition_not_filters(self, ds, documents):
 | |
|         pass
 | |
| 
 | |
|     @pytest.mark.skip(reason="labels metadata are not supported")
 | |
|     @pytest.mark.integration
 | |
|     def test_delete_labels_by_filter(self, ds, labels):
 | |
|         pass
 | |
| 
 | |
|     @pytest.mark.skip(reason="labels metadata are not supported")
 | |
|     @pytest.mark.integration
 | |
|     def test_delete_labels_by_filter_id(self, ds, labels):
 | |
|         pass
 | |
| 
 | |
|     @pytest.mark.skip(reason="labels metadata are not supported")
 | |
|     @pytest.mark.integration
 | |
|     def test_multilabel_filter_aggregations(self):
 | |
|         pass
 | |
| 
 | |
|     @pytest.mark.skip(reason="labels metadata are not supported")
 | |
|     @pytest.mark.integration
 | |
|     def test_multilabel_meta_aggregations(self):
 | |
|         pass
 | |
| 
 | |
|     @pytest.mark.skip(reason="embeddings are not supported")
 | |
|     @pytest.mark.integration
 | |
|     def test_get_embedding_count(self):
 | |
|         pass
 | |
| 
 | |
|     @pytest.mark.skip(reason="embeddings are not supported")
 | |
|     @pytest.mark.integration
 | |
|     def test_custom_embedding_field(self, ds):
 | |
|         pass
 | 
