mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-09-06 23:03:54 +00:00
test: ease testing for 3rd parties (#5539)
* ease testing for 3rd parties * fix __all__ * uniform error management * raise the same filter error * raise the same filter error * fix circular import
This commit is contained in:
parent
168b7c806c
commit
d73d443bc0
@ -1 +1,3 @@
|
||||
from haystack.preview.dataclasses.document import Document
|
||||
from haystack.preview.dataclasses.document import Document, ContentType
|
||||
|
||||
__all__ = ["Document", "ContentType"]
|
||||
|
@ -2,6 +2,10 @@ class StoreError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FilterError(StoreError):
|
||||
pass
|
||||
|
||||
|
||||
class DuplicateDocumentError(StoreError):
|
||||
pass
|
||||
|
||||
|
@ -1 +1,4 @@
|
||||
from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore
|
||||
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
|
||||
|
||||
__all__ = ["MemoryDocumentStore", "MemoryDocumentStoreFilterError"]
|
||||
|
@ -3,18 +3,14 @@ from typing import List, Any
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from haystack.preview.document_stores.errors import StoreError
|
||||
from haystack.preview.dataclasses import Document
|
||||
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
|
||||
|
||||
|
||||
GT_TYPES = (int, float, np.number)
|
||||
IN_TYPES = (list, set, tuple)
|
||||
|
||||
|
||||
class MemoryDocumentStoreFilterError(StoreError):
|
||||
pass
|
||||
|
||||
|
||||
def not_operation(conditions: List[Any], document: Document, _current_key: str):
|
||||
"""
|
||||
Applies a NOT to all the nested conditions.
|
||||
@ -224,7 +220,7 @@ def match(conditions: Any, document: Document, _current_key=None):
|
||||
if isinstance(conditions, dict):
|
||||
# Check for malformed filters, like {"name": {"year": "2020"}}
|
||||
if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()):
|
||||
raise ValueError(
|
||||
raise MemoryDocumentStoreFilterError(
|
||||
f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. "
|
||||
"Comparisons between dictionaries are not currently supported. "
|
||||
"Check the documentation to learn more about filters syntax."
|
||||
@ -245,7 +241,7 @@ def match(conditions: Any, document: Document, _current_key=None):
|
||||
# A comparison operator ($eq, $in, $gte, ...)
|
||||
if field_key in OPERATORS.keys():
|
||||
if not _current_key:
|
||||
raise ValueError(
|
||||
raise MemoryDocumentStoreFilterError(
|
||||
"Filters can't start with an operator like $eq and $in. You have to specify the field name first. "
|
||||
"See the examples in the documentation."
|
||||
)
|
||||
@ -268,7 +264,9 @@ def match(conditions: Any, document: Document, _current_key=None):
|
||||
# The default operator for a {key: value} filter is $eq
|
||||
return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions)
|
||||
|
||||
raise ValueError("Filters must be dictionaries or lists. See the examples in the documentation.")
|
||||
raise MemoryDocumentStoreFilterError(
|
||||
"Filters must be dictionaries or lists. See the examples in the documentation."
|
||||
)
|
||||
|
||||
|
||||
def _list_conditions(conditions: Any) -> List[Any]:
|
||||
|
5
haystack/preview/document_stores/memory/errors.py
Normal file
5
haystack/preview/document_stores/memory/errors.py
Normal file
@ -0,0 +1,5 @@
|
||||
from haystack.preview.document_stores.errors import FilterError
|
||||
|
||||
|
||||
class MemoryDocumentStoreFilterError(FilterError):
|
||||
pass
|
@ -6,8 +6,8 @@ import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from haystack.preview.dataclasses import Document
|
||||
from haystack.preview.document_stores import Store, StoreError, DuplicatePolicy
|
||||
from haystack.preview.document_stores import MissingDocumentError, DuplicateDocumentError
|
||||
from haystack.preview.document_stores import Store, DuplicatePolicy
|
||||
from haystack.preview.document_stores.errors import FilterError, MissingDocumentError, DuplicateDocumentError
|
||||
|
||||
|
||||
class DocumentStoreBaseTests:
|
||||
@ -17,8 +17,8 @@ class DocumentStoreBaseTests:
|
||||
|
||||
@pytest.fixture
|
||||
def filterable_docs(self) -> List[Document]:
|
||||
embedding_zero = np.zeros([768, 1]).astype(np.float32)
|
||||
embedding_one = np.ones([768, 1]).astype(np.float32)
|
||||
embedding_zero = np.zeros(768).astype(np.float32)
|
||||
embedding_one = np.ones(768).astype(np.float32)
|
||||
|
||||
documents = []
|
||||
for i in range(3):
|
||||
@ -142,7 +142,7 @@ class DocumentStoreBaseTests:
|
||||
@pytest.mark.unit
|
||||
def test_incorrect_filter_type(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(ValueError, match="dictionaries or lists"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters="something odd") # type: ignore
|
||||
|
||||
@pytest.mark.unit
|
||||
@ -154,13 +154,13 @@ class DocumentStoreBaseTests:
|
||||
@pytest.mark.unit
|
||||
def test_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(ValueError, match="malformed"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"number": {"page": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_deeper_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(ValueError, match="malformed"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"number": {"page": {"chapter": "intro"}}})
|
||||
|
||||
@pytest.mark.unit
|
||||
@ -189,9 +189,9 @@ class DocumentStoreBaseTests:
|
||||
)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_eq_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
||||
def test_eq_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
embedding = np.zeros([768, 1]).astype(np.float32)
|
||||
embedding = np.zeros(768).astype(np.float32)
|
||||
result = docstore.filter_documents(filters={"embedding": embedding})
|
||||
assert self.contains_same_docs(
|
||||
result, [doc for doc in filterable_docs if np.array_equal(embedding, doc.embedding)] # type: ignore
|
||||
@ -228,10 +228,10 @@ class DocumentStoreBaseTests:
|
||||
)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_in_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
||||
def test_in_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
embedding_zero = np.zeros([768, 1]).astype(np.float32)
|
||||
embedding_one = np.ones([768, 1]).astype(np.float32)
|
||||
embedding_zero = np.zeros(768, np.float32)
|
||||
embedding_one = np.ones(768, np.float32)
|
||||
result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}})
|
||||
assert self.contains_same_docs(
|
||||
result,
|
||||
@ -263,7 +263,7 @@ class DocumentStoreBaseTests:
|
||||
)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_ne_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
||||
def test_ne_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
embedding = np.zeros([768, 1]).astype(np.float32)
|
||||
result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}})
|
||||
@ -291,7 +291,7 @@ class DocumentStoreBaseTests:
|
||||
)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_nin_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
||||
def test_nin_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
||||
embedding_ones = np.zeros([768, 1]).astype(np.float32)
|
||||
@ -328,20 +328,20 @@ class DocumentStoreBaseTests:
|
||||
@pytest.mark.unit
|
||||
def test_gt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"page": {"$gt": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gt_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"content": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gt_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
||||
def test_gt_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"embedding": {"$gt": embedding_zeros}})
|
||||
|
||||
@pytest.mark.unit
|
||||
@ -355,20 +355,20 @@ class DocumentStoreBaseTests:
|
||||
@pytest.mark.unit
|
||||
def test_gte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"page": {"$gte": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gte_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"content": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gte_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
||||
def test_gte_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"embedding": {"$gte": embedding_zeros}})
|
||||
|
||||
@pytest.mark.unit
|
||||
@ -382,20 +382,20 @@ class DocumentStoreBaseTests:
|
||||
@pytest.mark.unit
|
||||
def test_lt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"page": {"$lt": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lt_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"content": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lt_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
||||
def test_lt_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
embedding_ones = np.ones([768, 1]).astype(np.float32)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"embedding": {"$lt": embedding_ones}})
|
||||
|
||||
@pytest.mark.unit
|
||||
@ -409,20 +409,20 @@ class DocumentStoreBaseTests:
|
||||
@pytest.mark.unit
|
||||
def test_lte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"page": {"$lte": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lte_filter_table(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"content": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lte_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
|
||||
def test_lte_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
embedding_ones = np.ones([768, 1]).astype(np.float32)
|
||||
with pytest.raises(StoreError, match="Can't evaluate"):
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"embedding": {"$lte": embedding_ones}})
|
||||
|
||||
@pytest.mark.unit
|
||||
@ -677,12 +677,12 @@ class DocumentStoreBaseTests:
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_write_not_docs(self, docstore: Store):
|
||||
with pytest.raises(ValueError, match="Please provide a list of Documents"):
|
||||
with pytest.raises(ValueError):
|
||||
docstore.write_documents(["not a document for sure"]) # type: ignore
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_write_not_list(self, docstore: Store):
|
||||
with pytest.raises(ValueError, match="Please provide a list of Documents"):
|
||||
with pytest.raises(ValueError):
|
||||
docstore.write_documents("not a list actually") # type: ignore
|
||||
|
||||
@pytest.mark.unit
|
||||
|
Loading…
x
Reference in New Issue
Block a user