test: ease testing for 3rd parties (#5539)

* ease testing for 3rd parties

* fix __all__

* uniform error management

* raise the same filter error

* raise the same filter error

* fix circular import
This commit is contained in:
Massimiliano Pippi 2023-08-10 17:13:15 +02:00 committed by GitHub
parent 168b7c806c
commit d73d443bc0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 53 additions and 41 deletions

View File

@ -1 +1,3 @@
from haystack.preview.dataclasses.document import Document
from haystack.preview.dataclasses.document import Document, ContentType
__all__ = ["Document", "ContentType"]

View File

@ -2,6 +2,10 @@ class StoreError(Exception):
pass
class FilterError(StoreError):
pass
class DuplicateDocumentError(StoreError):
pass

View File

@ -1 +1,4 @@
from haystack.preview.document_stores.memory.document_store import MemoryDocumentStore
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
__all__ = ["MemoryDocumentStore", "MemoryDocumentStoreFilterError"]

View File

@ -3,18 +3,14 @@ from typing import List, Any
import numpy as np
import pandas as pd
from haystack.preview.document_stores.errors import StoreError
from haystack.preview.dataclasses import Document
from haystack.preview.document_stores.memory.errors import MemoryDocumentStoreFilterError
GT_TYPES = (int, float, np.number)
IN_TYPES = (list, set, tuple)
class MemoryDocumentStoreFilterError(StoreError):
pass
def not_operation(conditions: List[Any], document: Document, _current_key: str):
"""
Applies a NOT to all the nested conditions.
@ -224,7 +220,7 @@ def match(conditions: Any, document: Document, _current_key=None):
if isinstance(conditions, dict):
# Check for malformed filters, like {"name": {"year": "2020"}}
if _current_key and any(key not in RESERVED_KEYS for key in conditions.keys()):
raise ValueError(
raise MemoryDocumentStoreFilterError(
f"This filter ({{{_current_key}: {conditions}}}) seems to be malformed. "
"Comparisons between dictionaries are not currently supported. "
"Check the documentation to learn more about filters syntax."
@ -245,7 +241,7 @@ def match(conditions: Any, document: Document, _current_key=None):
# A comparison operator ($eq, $in, $gte, ...)
if field_key in OPERATORS.keys():
if not _current_key:
raise ValueError(
raise MemoryDocumentStoreFilterError(
"Filters can't start with an operator like $eq and $in. You have to specify the field name first. "
"See the examples in the documentation."
)
@ -268,7 +264,9 @@ def match(conditions: Any, document: Document, _current_key=None):
# The default operator for a {key: value} filter is $eq
return eq_operation(fields=document.flatten(), field_name=_current_key, value=conditions)
raise ValueError("Filters must be dictionaries or lists. See the examples in the documentation.")
raise MemoryDocumentStoreFilterError(
"Filters must be dictionaries or lists. See the examples in the documentation."
)
def _list_conditions(conditions: Any) -> List[Any]:

View File

@ -0,0 +1,5 @@
from haystack.preview.document_stores.errors import FilterError
class MemoryDocumentStoreFilterError(FilterError):
pass

View File

@ -6,8 +6,8 @@ import numpy as np
import pandas as pd
from haystack.preview.dataclasses import Document
from haystack.preview.document_stores import Store, StoreError, DuplicatePolicy
from haystack.preview.document_stores import MissingDocumentError, DuplicateDocumentError
from haystack.preview.document_stores import Store, DuplicatePolicy
from haystack.preview.document_stores.errors import FilterError, MissingDocumentError, DuplicateDocumentError
class DocumentStoreBaseTests:
@ -17,8 +17,8 @@ class DocumentStoreBaseTests:
@pytest.fixture
def filterable_docs(self) -> List[Document]:
embedding_zero = np.zeros([768, 1]).astype(np.float32)
embedding_one = np.ones([768, 1]).astype(np.float32)
embedding_zero = np.zeros(768).astype(np.float32)
embedding_one = np.ones(768).astype(np.float32)
documents = []
for i in range(3):
@ -142,7 +142,7 @@ class DocumentStoreBaseTests:
@pytest.mark.unit
def test_incorrect_filter_type(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(ValueError, match="dictionaries or lists"):
with pytest.raises(FilterError):
docstore.filter_documents(filters="something odd") # type: ignore
@pytest.mark.unit
@ -154,13 +154,13 @@ class DocumentStoreBaseTests:
@pytest.mark.unit
def test_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(ValueError, match="malformed"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"number": {"page": "100"}})
@pytest.mark.unit
def test_deeper_incorrect_filter_nesting(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(ValueError, match="malformed"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"number": {"page": {"chapter": "intro"}}})
@pytest.mark.unit
@ -189,9 +189,9 @@ class DocumentStoreBaseTests:
)
@pytest.mark.unit
def test_eq_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
def test_eq_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding = np.zeros([768, 1]).astype(np.float32)
embedding = np.zeros(768).astype(np.float32)
result = docstore.filter_documents(filters={"embedding": embedding})
assert self.contains_same_docs(
result, [doc for doc in filterable_docs if np.array_equal(embedding, doc.embedding)] # type: ignore
@ -228,10 +228,10 @@ class DocumentStoreBaseTests:
)
@pytest.mark.unit
def test_in_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
def test_in_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding_zero = np.zeros([768, 1]).astype(np.float32)
embedding_one = np.ones([768, 1]).astype(np.float32)
embedding_zero = np.zeros(768, np.float32)
embedding_one = np.ones(768, np.float32)
result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}})
assert self.contains_same_docs(
result,
@ -263,7 +263,7 @@ class DocumentStoreBaseTests:
)
@pytest.mark.unit
def test_ne_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
def test_ne_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding = np.zeros([768, 1]).astype(np.float32)
result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}})
@ -291,7 +291,7 @@ class DocumentStoreBaseTests:
)
@pytest.mark.unit
def test_nin_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
def test_nin_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
embedding_ones = np.zeros([768, 1]).astype(np.float32)
@ -328,20 +328,20 @@ class DocumentStoreBaseTests:
@pytest.mark.unit
def test_gt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"page": {"$gt": "100"}})
@pytest.mark.unit
def test_gt_filter_table(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"content": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
@pytest.mark.unit
def test_gt_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
def test_gt_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$gt": embedding_zeros}})
@pytest.mark.unit
@ -355,20 +355,20 @@ class DocumentStoreBaseTests:
@pytest.mark.unit
def test_gte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"page": {"$gte": "100"}})
@pytest.mark.unit
def test_gte_filter_table(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"content": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
@pytest.mark.unit
def test_gte_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
def test_gte_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$gte": embedding_zeros}})
@pytest.mark.unit
@ -382,20 +382,20 @@ class DocumentStoreBaseTests:
@pytest.mark.unit
def test_lt_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"page": {"$lt": "100"}})
@pytest.mark.unit
def test_lt_filter_table(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"content": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
@pytest.mark.unit
def test_lt_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
def test_lt_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding_ones = np.ones([768, 1]).astype(np.float32)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$lt": embedding_ones}})
@pytest.mark.unit
@ -409,20 +409,20 @@ class DocumentStoreBaseTests:
@pytest.mark.unit
def test_lte_filter_non_numeric(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"page": {"$lte": "100"}})
@pytest.mark.unit
def test_lte_filter_table(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"content": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
@pytest.mark.unit
def test_lte_filter_tensor(self, docstore: Store, filterable_docs: List[Document]):
def test_lte_filter_embedding(self, docstore: Store, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding_ones = np.ones([768, 1]).astype(np.float32)
with pytest.raises(StoreError, match="Can't evaluate"):
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$lte": embedding_ones}})
@pytest.mark.unit
@ -677,12 +677,12 @@ class DocumentStoreBaseTests:
@pytest.mark.unit
def test_write_not_docs(self, docstore: Store):
with pytest.raises(ValueError, match="Please provide a list of Documents"):
with pytest.raises(ValueError):
docstore.write_documents(["not a document for sure"]) # type: ignore
@pytest.mark.unit
def test_write_not_list(self, docstore: Store):
with pytest.raises(ValueError, match="Please provide a list of Documents"):
with pytest.raises(ValueError):
docstore.write_documents("not a list actually") # type: ignore
@pytest.mark.unit