mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-09-07 15:23:31 +00:00
test: ease testing for 3rd parties (#5539)
* ease testing for 3rd parties * fix __all__ * uniform error management * raise the same filter error * raise the same filter error * fix circular import
This commit is contained in:
parent
168b7c806c
commit
d73d443bc0
@ -1 +1,3 @@
|
|||||||
from haystack.preview.dataclasses.document import Document
|
from haystack.preview.dataclasses.document import Document, ContentType
|
||||||
|
|
||||||
|
__all__ = ["Document", "ContentType"]
|
||||||
|
@ -2,6 +2,10 @@ class StoreError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class FilterError(StoreError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class DuplicateDocumentError(StoreError):
|
class DuplicateDocumentError(StoreError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -1 +1,4 @@
|
|||||||
from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore
|
from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore
|
||||||
|
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
|
||||||
|
|
||||||
|
__all__ = ["MemoryDocumentStore", "MemoryDocumentStoreFilterError"]
|
||||||
|
@ -3,18 +3,14 @@ from typing import List, Any
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from haystack.preview.document_stores.errors import StoreError
|
|
||||||
from haystack.preview.dataclasses import Document
|
from haystack.preview.dataclasses import Document
|
||||||
|
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
|
||||||
|
|
||||||
|
|
||||||
GT_TYPES = (int, float, np.number)
|
GT_TYPES = (int, float, np.number)
|
||||||
IN_TYPES = (list, set, tuple)
|
IN_TYPES = (list, set, tuple)
|
||||||
|
|
||||||
|
|
||||||
class MemoryDocumentStoreFilterError(StoreError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def not_operation(conditions: List[Any], document: Document, _current_key: str):
|
def not_operation(conditions: List[Any], document: Document, _current_key: str):
|
||||||
"""
|
"""
|
||||||
Applies a NOT to all the nested conditions.
|
Applies a NOT to all the nested conditions.
|
||||||
@ -224,7 +220,7 @@ def match(conditions: Any, document: Document, _current_key=None):
|
|||||||
if isinstance(conditions, dict):
|
if isinstance(conditions, dict):
|
||||||
# Check for malformed filters, like {"name": {"year": "2020"}}
|
# Check for malformed filters, like {"name": {"year": "2020"}}
|
||||||
if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()):
|
if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()):
|
||||||
raise ValueError(
|
raise MemoryDocumentStoreFilterError(
|
||||||
f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. "
|
f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. "
|
||||||
"Comparisons between dictionaries are not currently supported. "
|
"Comparisons between dictionaries are not currently supported. "
|
||||||
"Check the documentation to learn more about filters syntax."
|
"Check the documentation to learn more about filters syntax."
|
||||||
@ -245,7 +241,7 @@ def match(conditions: Any, document: Document, _current_key=None):
|
|||||||
# A comparison operator ($eq, $in, $gte, ...)
|
# A comparison operator ($eq, $in, $gte, ...)
|
||||||
if field_key in OPERATORS.keys():
|
if field_key in OPERATORS.keys():
|
||||||
if not _current_key:
|
if not _current_key:
|
||||||
raise ValueError(
|
raise MemoryDocumentStoreFilterError(
|
||||||
"Filters can't start with an operator like $eq and $in. You have to specify the field name first. "
|
"Filters can't start with an operator like $eq and $in. You have to specify the field name first. "
|
||||||
"See the examples in the documentation."
|
"See the examples in the documentation."
|
||||||
)
|
)
|
||||||
@ -268,7 +264,9 @@ def match(conditions: Any, document: Document, _current_key=None):
|
|||||||
# The default operator for a {key: value} filter is $eq
|
# The default operator for a {key: value} filter is $eq
|
||||||
return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions)
|
return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions)
|
||||||
|
|
||||||
raise ValueError("Filters must be dictionaries or lists. See the examples in the documentation.")
|
raise MemoryDocumentStoreFilterError(
|
||||||
|
"Filters must be dictionaries or lists. See the examples in the documentation."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _list_conditions(conditions: Any) -> List[Any]:
|
def _list_conditions(conditions: Any) -> List[Any]:
|
||||||
|
5
haystack/preview/document_stores/memory/errors.py
Normal file
5
haystack/preview/document_stores/memory/errors.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from haystack.preview.document_stores.errors import FilterError
|
||||||
|
|
||||||
|
|
||||||
|
class MemoryDocumentStoreFilterError(FilterError):
|
||||||
|
pass
|
@ -6,8 +6,8 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from haystack.preview.dataclasses import Document
|
from haystack.preview.dataclasses import Document
|
||||||
from haystack.preview.document_stores import Store, StoreError, DuplicatePolicy
|
from haystack.preview.document_stores import Store, DuplicatePolicy
|
||||||
from haystack.preview.document_stores import MissingDocumentError, DuplicateDocumentError
|
from haystack.preview.document_stores.errors import FilterError, MissingDocumentError, DuplicateDocumentError
|
||||||
|
|
||||||
|
|
||||||
class DocumentStoreBaseTests:
|
class DocumentStoreBaseTests:
|
||||||
@ -17,8 +17,8 @@ class DocumentStoreBaseTests:
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def filterable_docs(self) -> List[Document]:
|
def filterable_docs(self) -> List[Document]:
|
||||||
embedding_zero = np.zeros([768, 1]).astype(np.float32)
|
embedding_zero = np.zeros(768).astype(np.float32)
|
||||||
embedding_one = np.ones([768, 1]).astype(np.float32)
|
embedding_one = np.ones(768).astype(np.float32)
|
||||||
|
|
||||||
documents = []
|
documents = []
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
@ -142,7 +142,7 @@ class DocumentStoreBaseTests:
|
|||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_incorrect_filter_type(self, docstore: Store, filterable_docs: List[Document]):
|
def test_incorrect_filter_type(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(ValueError, match="dictionaries or lists"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters="something odd") # type: ignore
|
docstore.filter_documents(filters="something odd") # type: ignore
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
@ -154,13 +154,13 @@ class DocumentStoreBaseTests:
|
|||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]):
|
def test_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(ValueError, match="malformed"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"number": {"page": "100"}})
|
docstore.filter_documents(filters={"number": {"page": "100"}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_deeper_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]):
|
def test_deeper_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(ValueError, match="malformed"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"number": {"page": {"chapter": "intro"}}})
|
docstore.filter_documents(filters={"number": {"page": {"chapter": "intro"}}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
@ -189,9 +189,9 @@ class DocumentStoreBaseTests:
|
|||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_eq_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
def test_eq_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
embedding = np.zeros([768, 1]).astype(np.float32)
|
embedding = np.zeros(768).astype(np.float32)
|
||||||
result = docstore.filter_documents(filters={"embedding": embedding})
|
result = docstore.filter_documents(filters={"embedding": embedding})
|
||||||
assert self.contains_same_docs(
|
assert self.contains_same_docs(
|
||||||
result, [doc for doc in filterable_docs if np.array_equal(embedding, doc.embedding)] # type: ignore
|
result, [doc for doc in filterable_docs if np.array_equal(embedding, doc.embedding)] # type: ignore
|
||||||
@ -228,10 +228,10 @@ class DocumentStoreBaseTests:
|
|||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_in_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
def test_in_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
embedding_zero = np.zeros([768, 1]).astype(np.float32)
|
embedding_zero = np.zeros(768, np.float32)
|
||||||
embedding_one = np.ones([768, 1]).astype(np.float32)
|
embedding_one = np.ones(768, np.float32)
|
||||||
result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}})
|
result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}})
|
||||||
assert self.contains_same_docs(
|
assert self.contains_same_docs(
|
||||||
result,
|
result,
|
||||||
@ -263,7 +263,7 @@ class DocumentStoreBaseTests:
|
|||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_ne_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
def test_ne_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
embedding = np.zeros([768, 1]).astype(np.float32)
|
embedding = np.zeros([768, 1]).astype(np.float32)
|
||||||
result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}})
|
result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}})
|
||||||
@ -291,7 +291,7 @@ class DocumentStoreBaseTests:
|
|||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_nin_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
def test_nin_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
||||||
embedding_ones = np.zeros([768, 1]).astype(np.float32)
|
embedding_ones = np.zeros([768, 1]).astype(np.float32)
|
||||||
@ -328,20 +328,20 @@ class DocumentStoreBaseTests:
|
|||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_gt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
def test_gt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"page": {"$gt": "100"}})
|
docstore.filter_documents(filters={"page": {"$gt": "100"}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_gt_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
def test_gt_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"content": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
docstore.filter_documents(filters={"content": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_gt_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
def test_gt_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"embedding": {"$gt": embedding_zeros}})
|
docstore.filter_documents(filters={"embedding": {"$gt": embedding_zeros}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
@ -355,20 +355,20 @@ class DocumentStoreBaseTests:
|
|||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_gte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
def test_gte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"page": {"$gte": "100"}})
|
docstore.filter_documents(filters={"page": {"$gte": "100"}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_gte_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
def test_gte_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"content": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
docstore.filter_documents(filters={"content": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_gte_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
def test_gte_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"embedding": {"$gte": embedding_zeros}})
|
docstore.filter_documents(filters={"embedding": {"$gte": embedding_zeros}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
@ -382,20 +382,20 @@ class DocumentStoreBaseTests:
|
|||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_lt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
def test_lt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"page": {"$lt": "100"}})
|
docstore.filter_documents(filters={"page": {"$lt": "100"}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_lt_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
def test_lt_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"content": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
docstore.filter_documents(filters={"content": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_lt_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
def test_lt_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
embedding_ones = np.ones([768, 1]).astype(np.float32)
|
embedding_ones = np.ones([768, 1]).astype(np.float32)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"embedding": {"$lt": embedding_ones}})
|
docstore.filter_documents(filters={"embedding": {"$lt": embedding_ones}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
@ -409,20 +409,20 @@ class DocumentStoreBaseTests:
|
|||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_lte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
def test_lte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"page": {"$lte": "100"}})
|
docstore.filter_documents(filters={"page": {"$lte": "100"}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_lte_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
def test_lte_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"content": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
docstore.filter_documents(filters={"content": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_lte_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
def test_lte_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||||
docstore.write_documents(filterable_docs)
|
docstore.write_documents(filterable_docs)
|
||||||
embedding_ones = np.ones([768, 1]).astype(np.float32)
|
embedding_ones = np.ones([768, 1]).astype(np.float32)
|
||||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
with pytest.raises(FilterError):
|
||||||
docstore.filter_documents(filters={"embedding": {"$lte": embedding_ones}})
|
docstore.filter_documents(filters={"embedding": {"$lte": embedding_ones}})
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
@ -677,12 +677,12 @@ class DocumentStoreBaseTests:
|
|||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_write_not_docs(self, docstore: Store):
|
def test_write_not_docs(self, docstore: Store):
|
||||||
with pytest.raises(ValueError, match="Please provide a list of Documents"):
|
with pytest.raises(ValueError):
|
||||||
docstore.write_documents(["not a document for sure"]) # type: ignore
|
docstore.write_documents(["not a document for sure"]) # type: ignore
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_write_not_list(self, docstore: Store):
|
def test_write_not_list(self, docstore: Store):
|
||||||
with pytest.raises(ValueError, match="Please provide a list of Documents"):
|
with pytest.raises(ValueError):
|
||||||
docstore.write_documents("not a list actually") # type: ignore
|
docstore.write_documents("not a list actually") # type: ignore
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
|
Loading…
x
Reference in New Issue
Block a user