mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-31 09:49:48 +00:00 
			
		
		
		
	test: ease testing for 3rd parties (#5539)
* ease testing for 3rd parties * fix __all__ * uniform error management * raise the same filter error * raise the same filter error * fix circular import
This commit is contained in:
		
							parent
							
								
									168b7c806c
								
							
						
					
					
						commit
						d73d443bc0
					
				| @ -1 +1,3 @@ | ||||
| from haystack.preview.dataclasses.document import Document | ||||
| from haystack.preview.dataclasses.document import Document, ContentType | ||||
| 
 | ||||
| __all__ = ["Document", "ContentType"] | ||||
|  | ||||
| @ -2,6 +2,10 @@ class StoreError(Exception): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class FilterError(StoreError): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class DuplicateDocumentError(StoreError): | ||||
|     pass | ||||
| 
 | ||||
|  | ||||
| @ -1 +1,4 @@ | ||||
| from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore | ||||
| from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError | ||||
| 
 | ||||
| __all__ = ["MemoryDocumentStore", "MemoryDocumentStoreFilterError"] | ||||
|  | ||||
| @ -3,18 +3,14 @@ from typing import List, Any | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
| 
 | ||||
| from haystack.preview.document_stores.errors import StoreError | ||||
| from haystack.preview.dataclasses import Document | ||||
| from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError | ||||
| 
 | ||||
| 
 | ||||
| GT_TYPES = (int, float, np.number) | ||||
| IN_TYPES = (list, set, tuple) | ||||
| 
 | ||||
| 
 | ||||
| class MemoryDocumentStoreFilterError(StoreError): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| def not_operation(conditions: List[Any], document: Document, _current_key: str): | ||||
|     """ | ||||
|     Applies a NOT to all the nested conditions. | ||||
| @ -224,7 +220,7 @@ def match(conditions: Any, document: Document, _current_key=None): | ||||
|     if isinstance(conditions, dict): | ||||
|         # Check for malformed filters, like {"name": {"year": "2020"}} | ||||
|         if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()): | ||||
|             raise ValueError( | ||||
|             raise MemoryDocumentStoreFilterError( | ||||
|                 f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. " | ||||
|                 "Comparisons between dictionaries are not currently supported. " | ||||
|                 "Check the documentation to learn more about filters syntax." | ||||
| @ -245,7 +241,7 @@ def match(conditions: Any, document: Document, _current_key=None): | ||||
|         # A comparison operator ($eq, $in, $gte, ...) | ||||
|         if field_key in OPERATORS.keys(): | ||||
|             if not _current_key: | ||||
|                 raise ValueError( | ||||
|                 raise MemoryDocumentStoreFilterError( | ||||
|                     "Filters can't start with an operator like $eq and $in. You have to specify the field name first. " | ||||
|                     "See the examples in the documentation." | ||||
|                 ) | ||||
| @ -268,7 +264,9 @@ def match(conditions: Any, document: Document, _current_key=None): | ||||
|         # The default operator for a {key: value} filter is $eq | ||||
|         return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions) | ||||
| 
 | ||||
|     raise ValueError("Filters must be dictionaries or lists. See the examples in the documentation.") | ||||
|     raise MemoryDocumentStoreFilterError( | ||||
|         "Filters must be dictionaries or lists. See the examples in the documentation." | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def _list_conditions(conditions: Any) -> List[Any]: | ||||
|  | ||||
							
								
								
									
										5
									
								
								haystack/preview/document_stores/memory/errors.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								haystack/preview/document_stores/memory/errors.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,5 @@ | ||||
| from haystack.preview.document_stores.errors import FilterError | ||||
| 
 | ||||
| 
 | ||||
| class MemoryDocumentStoreFilterError(FilterError): | ||||
|     pass | ||||
| @ -6,8 +6,8 @@ import numpy as np | ||||
| import pandas as pd | ||||
| 
 | ||||
| from haystack.preview.dataclasses import Document | ||||
| from haystack.preview.document_stores import Store, StoreError, DuplicatePolicy | ||||
| from haystack.preview.document_stores import MissingDocumentError, DuplicateDocumentError | ||||
| from haystack.preview.document_stores import Store, DuplicatePolicy | ||||
| from haystack.preview.document_stores.errors import FilterError, MissingDocumentError, DuplicateDocumentError | ||||
| 
 | ||||
| 
 | ||||
| class DocumentStoreBaseTests: | ||||
| @ -17,8 +17,8 @@ class DocumentStoreBaseTests: | ||||
| 
 | ||||
|     @pytest.fixture | ||||
|     def filterable_docs(self) -> List[Document]: | ||||
|         embedding_zero = np.zeros([768, 1]).astype(np.float32) | ||||
|         embedding_one = np.ones([768, 1]).astype(np.float32) | ||||
|         embedding_zero = np.zeros(768).astype(np.float32) | ||||
|         embedding_one = np.ones(768).astype(np.float32) | ||||
| 
 | ||||
|         documents = [] | ||||
|         for i in range(3): | ||||
| @ -142,7 +142,7 @@ class DocumentStoreBaseTests: | ||||
|     @pytest.mark.unit | ||||
|     def test_incorrect_filter_type(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(ValueError, match="dictionaries or lists"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters="something odd")  # type: ignore | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
| @ -154,13 +154,13 @@ class DocumentStoreBaseTests: | ||||
|     @pytest.mark.unit | ||||
|     def test_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(ValueError, match="malformed"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"number": {"page": "100"}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_deeper_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(ValueError, match="malformed"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"number": {"page": {"chapter": "intro"}}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
| @ -189,9 +189,9 @@ class DocumentStoreBaseTests: | ||||
|         ) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_eq_filter_tensor(self, docstore: Store, filterable_docs: List[Document]): | ||||
|     def test_eq_filter_embedding(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         embedding = np.zeros([768, 1]).astype(np.float32) | ||||
|         embedding = np.zeros(768).astype(np.float32) | ||||
|         result = docstore.filter_documents(filters={"embedding": embedding}) | ||||
|         assert self.contains_same_docs( | ||||
|             result, [doc for doc in filterable_docs if np.array_equal(embedding, doc.embedding)]  # type: ignore | ||||
| @ -228,10 +228,10 @@ class DocumentStoreBaseTests: | ||||
|         ) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_in_filter_tensor(self, docstore: Store, filterable_docs: List[Document]): | ||||
|     def test_in_filter_embedding(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         embedding_zero = np.zeros([768, 1]).astype(np.float32) | ||||
|         embedding_one = np.ones([768, 1]).astype(np.float32) | ||||
|         embedding_zero = np.zeros(768, np.float32) | ||||
|         embedding_one = np.ones(768, np.float32) | ||||
|         result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}}) | ||||
|         assert self.contains_same_docs( | ||||
|             result, | ||||
| @ -263,7 +263,7 @@ class DocumentStoreBaseTests: | ||||
|         ) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_ne_filter_tensor(self, docstore: Store, filterable_docs: List[Document]): | ||||
|     def test_ne_filter_embedding(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         embedding = np.zeros([768, 1]).astype(np.float32) | ||||
|         result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}}) | ||||
| @ -291,7 +291,7 @@ class DocumentStoreBaseTests: | ||||
|         ) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_nin_filter_tensor(self, docstore: Store, filterable_docs: List[Document]): | ||||
|     def test_nin_filter_embedding(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         embedding_zeros = np.zeros([768, 1]).astype(np.float32) | ||||
|         embedding_ones = np.zeros([768, 1]).astype(np.float32) | ||||
| @ -328,20 +328,20 @@ class DocumentStoreBaseTests: | ||||
|     @pytest.mark.unit | ||||
|     def test_gt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"page": {"$gt": "100"}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_gt_filter_table(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"content": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_gt_filter_tensor(self, docstore: Store, filterable_docs: List[Document]): | ||||
|     def test_gt_filter_embedding(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         embedding_zeros = np.zeros([768, 1]).astype(np.float32) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"embedding": {"$gt": embedding_zeros}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
| @ -355,20 +355,20 @@ class DocumentStoreBaseTests: | ||||
|     @pytest.mark.unit | ||||
|     def test_gte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"page": {"$gte": "100"}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_gte_filter_table(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"content": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_gte_filter_tensor(self, docstore: Store, filterable_docs: List[Document]): | ||||
|     def test_gte_filter_embedding(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         embedding_zeros = np.zeros([768, 1]).astype(np.float32) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"embedding": {"$gte": embedding_zeros}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
| @ -382,20 +382,20 @@ class DocumentStoreBaseTests: | ||||
|     @pytest.mark.unit | ||||
|     def test_lt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"page": {"$lt": "100"}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_lt_filter_table(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"content": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_lt_filter_tensor(self, docstore: Store, filterable_docs: List[Document]): | ||||
|     def test_lt_filter_embedding(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         embedding_ones = np.ones([768, 1]).astype(np.float32) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"embedding": {"$lt": embedding_ones}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
| @ -409,20 +409,20 @@ class DocumentStoreBaseTests: | ||||
|     @pytest.mark.unit | ||||
|     def test_lte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"page": {"$lte": "100"}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_lte_filter_table(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"content": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_lte_filter_tensor(self, docstore: Store, filterable_docs: List[Document]): | ||||
|     def test_lte_filter_embedding(self, docstore: Store, filterable_docs: List[Document]): | ||||
|         docstore.write_documents(filterable_docs) | ||||
|         embedding_ones = np.ones([768, 1]).astype(np.float32) | ||||
|         with pytest.raises(StoreError, match="Can't evaluate"): | ||||
|         with pytest.raises(FilterError): | ||||
|             docstore.filter_documents(filters={"embedding": {"$lte": embedding_ones}}) | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
| @ -677,12 +677,12 @@ class DocumentStoreBaseTests: | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_write_not_docs(self, docstore: Store): | ||||
|         with pytest.raises(ValueError, match="Please provide a list of Documents"): | ||||
|         with pytest.raises(ValueError): | ||||
|             docstore.write_documents(["not a document for sure"])  # type: ignore | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|     def test_write_not_list(self, docstore: Store): | ||||
|         with pytest.raises(ValueError, match="Please provide a list of Documents"): | ||||
|         with pytest.raises(ValueError): | ||||
|             docstore.write_documents("not a list actually")  # type: ignore | ||||
| 
 | ||||
|     @pytest.mark.unit | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Massimiliano Pippi
						Massimiliano Pippi