mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-13 12:00:50 +00:00
refactor: Rename docstore
fixture to document_store
(#6360)
* Prevent pytest_generate_tests from polluting preview tests * Rename docstore fixture to document_store
This commit is contained in:
parent
365127dc5b
commit
a7f742fdbd
@ -20,126 +20,126 @@ class CountDocumentsTest:
|
||||
"""
|
||||
Utility class to test a Document Store `count_documents` method.
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(CountDocumentsTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_count_empty(self, docstore: DocumentStore):
|
||||
assert docstore.count_documents() == 0
|
||||
def test_count_empty(self, document_store: DocumentStore):
|
||||
assert document_store.count_documents() == 0
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_count_not_empty(self, docstore: DocumentStore):
|
||||
docstore.write_documents(
|
||||
def test_count_not_empty(self, document_store: DocumentStore):
|
||||
document_store.write_documents(
|
||||
[Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")]
|
||||
)
|
||||
assert docstore.count_documents() == 3
|
||||
assert document_store.count_documents() == 3
|
||||
|
||||
|
||||
class WriteDocumentsTest:
|
||||
"""
|
||||
Utility class to test a Document Store `write_documents` method.
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(WriteDocumentsTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_write(self, docstore: DocumentStore):
|
||||
def test_write(self, document_store: DocumentStore):
|
||||
doc = Document(content="test doc")
|
||||
docstore.write_documents([doc])
|
||||
assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
|
||||
document_store.write_documents([doc])
|
||||
assert document_store.filter_documents(filters={"id": doc.id}) == [doc]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_write_duplicate_fail(self, docstore: DocumentStore):
|
||||
def test_write_duplicate_fail(self, document_store: DocumentStore):
|
||||
doc = Document(content="test doc")
|
||||
docstore.write_documents([doc])
|
||||
document_store.write_documents([doc])
|
||||
with pytest.raises(DuplicateDocumentError, match=f"ID '{doc.id}' already exists."):
|
||||
docstore.write_documents(documents=[doc], policy=DuplicatePolicy.FAIL)
|
||||
assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
|
||||
document_store.write_documents(documents=[doc], policy=DuplicatePolicy.FAIL)
|
||||
assert document_store.filter_documents(filters={"id": doc.id}) == [doc]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_write_duplicate_skip(self, docstore: DocumentStore):
|
||||
def test_write_duplicate_skip(self, document_store: DocumentStore):
|
||||
doc = Document(content="test doc")
|
||||
docstore.write_documents([doc])
|
||||
docstore.write_documents(documents=[doc], policy=DuplicatePolicy.SKIP)
|
||||
assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
|
||||
document_store.write_documents([doc])
|
||||
document_store.write_documents(documents=[doc], policy=DuplicatePolicy.SKIP)
|
||||
assert document_store.filter_documents(filters={"id": doc.id}) == [doc]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_write_duplicate_overwrite(self, docstore: DocumentStore):
|
||||
def test_write_duplicate_overwrite(self, document_store: DocumentStore):
|
||||
doc1 = Document(content="test doc 1")
|
||||
doc2 = Document(content="test doc 2")
|
||||
object.__setattr__(doc2, "id", doc1.id) # Make two docs with different content but same ID
|
||||
|
||||
docstore.write_documents([doc2])
|
||||
assert docstore.filter_documents(filters={"id": doc1.id}) == [doc2]
|
||||
docstore.write_documents(documents=[doc1], policy=DuplicatePolicy.OVERWRITE)
|
||||
assert docstore.filter_documents(filters={"id": doc1.id}) == [doc1]
|
||||
document_store.write_documents([doc2])
|
||||
assert document_store.filter_documents(filters={"id": doc1.id}) == [doc2]
|
||||
document_store.write_documents(documents=[doc1], policy=DuplicatePolicy.OVERWRITE)
|
||||
assert document_store.filter_documents(filters={"id": doc1.id}) == [doc1]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_write_not_docs(self, docstore: DocumentStore):
|
||||
def test_write_not_docs(self, document_store: DocumentStore):
|
||||
with pytest.raises(ValueError):
|
||||
docstore.write_documents(["not a document for sure"]) # type: ignore
|
||||
document_store.write_documents(["not a document for sure"]) # type: ignore
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_write_not_list(self, docstore: DocumentStore):
|
||||
def test_write_not_list(self, document_store: DocumentStore):
|
||||
with pytest.raises(ValueError):
|
||||
docstore.write_documents("not a list actually") # type: ignore
|
||||
document_store.write_documents("not a list actually") # type: ignore
|
||||
|
||||
|
||||
class DeleteDocumentsTest:
|
||||
"""
|
||||
Utility class to test a Document Store `delete_documents` method.
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(DeleteDocumentsTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_delete_empty(self, docstore: DocumentStore):
|
||||
def test_delete_empty(self, document_store: DocumentStore):
|
||||
with pytest.raises(MissingDocumentError):
|
||||
docstore.delete_documents(["test"])
|
||||
document_store.delete_documents(["test"])
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_delete_not_empty(self, docstore: DocumentStore):
|
||||
def test_delete_not_empty(self, document_store: DocumentStore):
|
||||
doc = Document(content="test doc")
|
||||
docstore.write_documents([doc])
|
||||
document_store.write_documents([doc])
|
||||
|
||||
docstore.delete_documents([doc.id])
|
||||
document_store.delete_documents([doc.id])
|
||||
|
||||
with pytest.raises(Exception):
|
||||
assert docstore.filter_documents(filters={"id": doc.id})
|
||||
assert document_store.filter_documents(filters={"id": doc.id})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_delete_not_empty_nonexisting(self, docstore: DocumentStore):
|
||||
def test_delete_not_empty_nonexisting(self, document_store: DocumentStore):
|
||||
doc = Document(content="test doc")
|
||||
docstore.write_documents([doc])
|
||||
document_store.write_documents([doc])
|
||||
|
||||
with pytest.raises(MissingDocumentError):
|
||||
docstore.delete_documents(["non_existing"])
|
||||
document_store.delete_documents(["non_existing"])
|
||||
|
||||
assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
|
||||
assert document_store.filter_documents(filters={"id": doc.id}) == [doc]
|
||||
|
||||
|
||||
class FilterableDocsFixtureMixin:
|
||||
@ -195,87 +195,87 @@ class LegacyFilterDocumentsInvalidFiltersTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using invalid legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsInvalidFiltersTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_incorrect_filter_type(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_incorrect_filter_type(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters="something odd") # type: ignore
|
||||
document_store.filter_documents(filters="something odd") # type: ignore
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_incorrect_filter_nesting(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_incorrect_filter_nesting(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"number": {"page": "100"}})
|
||||
document_store.filter_documents(filters={"number": {"page": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_deeper_incorrect_filter_nesting(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_deeper_incorrect_filter_nesting(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"number": {"page": {"chapter": "intro"}}})
|
||||
document_store.filter_documents(filters={"number": {"page": {"chapter": "intro"}}})
|
||||
|
||||
|
||||
class LegacyFilterDocumentsEqualTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using implicit and explicit '$eq' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsEqualTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_document_content(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"content": "A Foo Document 1"})
|
||||
def test_filter_document_content(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"content": "A Foo Document 1"})
|
||||
assert result == [doc for doc in filterable_docs if doc.content == "A Foo Document 1"]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_simple_metadata_value(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": "100"})
|
||||
def test_filter_simple_metadata_value(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": "100"})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_document_dataframe(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"dataframe": pd.DataFrame([1])})
|
||||
def test_filter_document_dataframe(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"dataframe": pd.DataFrame([1])})
|
||||
assert result == [
|
||||
doc for doc in filterable_docs if doc.dataframe is not None and doc.dataframe.equals(pd.DataFrame([1]))
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_eq_filter_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": {"$eq": "100"}})
|
||||
def test_eq_filter_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": {"$eq": "100"}})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_eq_filter_implicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": "100"})
|
||||
def test_eq_filter_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": "100"})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_eq_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"dataframe": pd.DataFrame([1])})
|
||||
def test_eq_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"dataframe": pd.DataFrame([1])})
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -283,10 +283,10 @@ class LegacyFilterDocumentsEqualTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_eq_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_eq_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
embedding = [0.0] * 768
|
||||
result = docstore.filter_documents(filters={"embedding": embedding})
|
||||
result = document_store.filter_documents(filters={"embedding": embedding})
|
||||
assert result == [doc for doc in filterable_docs if embedding == doc.embedding]
|
||||
|
||||
|
||||
@ -294,27 +294,27 @@ class LegacyFilterDocumentsNotEqualTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using explicit '$ne' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsNotEqualTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_ne_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": {"$ne": "100"}})
|
||||
def test_ne_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": {"$ne": "100"}})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") != "100"]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_ne_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"dataframe": {"$ne": pd.DataFrame([1])}})
|
||||
def test_ne_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"dataframe": {"$ne": pd.DataFrame([1])}})
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -322,10 +322,10 @@ class LegacyFilterDocumentsNotEqualTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_ne_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_ne_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
embedding = np.zeros([768, 1]).astype(np.float32)
|
||||
result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}})
|
||||
result = document_store.filter_documents(filters={"embedding": {"$ne": embedding}})
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -337,63 +337,63 @@ class LegacyFilterDocumentsInTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using implicit and explicit '$in' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsInTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_simple_list_single_element(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": ["100"]})
|
||||
def test_filter_simple_list_single_element(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": ["100"]})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_simple_list_one_value(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": ["100"]})
|
||||
def test_filter_simple_list_one_value(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": ["100"]})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100"]]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_simple_list(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": ["100", "123"]})
|
||||
def test_filter_simple_list(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": ["100", "123"]})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_incorrect_filter_name(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"non_existing_meta_field": ["whatever"]})
|
||||
def test_incorrect_filter_name(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"non_existing_meta_field": ["whatever"]})
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_incorrect_filter_value(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": ["nope"]})
|
||||
def test_incorrect_filter_value(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": ["nope"]})
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_in_filter_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": {"$in": ["100", "123", "n.a."]}})
|
||||
def test_in_filter_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": {"$in": ["100", "123", "n.a."]}})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_in_filter_implicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": ["100", "123", "n.a."]})
|
||||
def test_in_filter_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": ["100", "123", "n.a."]})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_in_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"dataframe": {"$in": [pd.DataFrame([1]), pd.DataFrame([2])]}})
|
||||
def test_in_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"dataframe": {"$in": [pd.DataFrame([1]), pd.DataFrame([2])]}})
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -402,11 +402,11 @@ class LegacyFilterDocumentsInTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_in_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_in_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
embedding_zero = [0.0] * 768
|
||||
embedding_one = [1.0] * 768
|
||||
result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}})
|
||||
result = document_store.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}})
|
||||
assert result == [
|
||||
doc for doc in filterable_docs if (embedding_zero == doc.embedding or embedding_one == doc.embedding)
|
||||
]
|
||||
@ -416,21 +416,23 @@ class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using explicit '$nin' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsNotInTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_nin_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"dataframe": {"$nin": [pd.DataFrame([1]), pd.DataFrame([0])]}})
|
||||
def test_nin_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(
|
||||
filters={"dataframe": {"$nin": [pd.DataFrame([1]), pd.DataFrame([0])]}}
|
||||
)
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -439,11 +441,11 @@ class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_nin_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_nin_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
||||
embedding_ones = np.zeros([768, 1]).astype(np.float32)
|
||||
result = docstore.filter_documents(filters={"embedding": {"$nin": [embedding_ones, embedding_zeros]}})
|
||||
result = document_store.filter_documents(filters={"embedding": {"$nin": [embedding_ones, embedding_zeros]}})
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -451,9 +453,9 @@ class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_nin_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"page": {"$nin": ["100", "123", "n.a."]}})
|
||||
def test_nin_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"page": {"$nin": ["100", "123", "n.a."]}})
|
||||
assert result == [doc for doc in filterable_docs if doc.meta.get("page") not in ["100", "123"]]
|
||||
|
||||
|
||||
@ -461,186 +463,186 @@ class LegacyFilterDocumentsGreaterThanTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using explicit '$gt' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsGreaterThanTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gt_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"number": {"$gt": 0.0}})
|
||||
def test_gt_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"number": {"$gt": 0.0}})
|
||||
assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] > 0]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gt_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_gt_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"page": {"$gt": "100"}})
|
||||
document_store.filter_documents(filters={"page": {"$gt": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gt_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_gt_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"dataframe": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
document_store.filter_documents(filters={"dataframe": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gt_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_gt_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"embedding": {"$gt": embedding_zeros}})
|
||||
document_store.filter_documents(filters={"embedding": {"$gt": embedding_zeros}})
|
||||
|
||||
|
||||
class LegacyFilterDocumentsGreaterThanEqualTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using explicit '$gte' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsGreaterThanEqualTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gte_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"number": {"$gte": -2}})
|
||||
def test_gte_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"number": {"$gte": -2}})
|
||||
assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] >= -2]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gte_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_gte_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"page": {"$gte": "100"}})
|
||||
document_store.filter_documents(filters={"page": {"$gte": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gte_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_gte_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"dataframe": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
document_store.filter_documents(filters={"dataframe": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gte_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_gte_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"embedding": {"$gte": embedding_zeros}})
|
||||
document_store.filter_documents(filters={"embedding": {"$gte": embedding_zeros}})
|
||||
|
||||
|
||||
class LegacyFilterDocumentsLessThanTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using explicit '$lt' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsLessThanTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lt_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"number": {"$lt": 0.0}})
|
||||
def test_lt_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"number": {"$lt": 0.0}})
|
||||
assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] < 0]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lt_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_lt_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"page": {"$lt": "100"}})
|
||||
document_store.filter_documents(filters={"page": {"$lt": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lt_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_lt_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"dataframe": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
document_store.filter_documents(filters={"dataframe": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lt_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_lt_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
embedding_ones = np.ones([768, 1]).astype(np.float32)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"embedding": {"$lt": embedding_ones}})
|
||||
document_store.filter_documents(filters={"embedding": {"$lt": embedding_ones}})
|
||||
|
||||
|
||||
class LegacyFilterDocumentsLessThanEqualTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using explicit '$lte' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsLessThanEqualTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lte_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"number": {"$lte": 2.0}})
|
||||
def test_lte_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"number": {"$lte": 2.0}})
|
||||
assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] <= 2.0]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lte_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_lte_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"page": {"$lte": "100"}})
|
||||
document_store.filter_documents(filters={"page": {"$lte": "100"}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lte_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_lte_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"dataframe": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
document_store.filter_documents(filters={"dataframe": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_lte_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_lte_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
embedding_ones = np.ones([768, 1]).astype(np.float32)
|
||||
with pytest.raises(FilterError):
|
||||
docstore.filter_documents(filters={"embedding": {"$lte": embedding_ones}})
|
||||
document_store.filter_documents(filters={"embedding": {"$lte": embedding_ones}})
|
||||
|
||||
|
||||
class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using logical '$and', '$or' and '$not' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsSimpleLogicalTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_simple_or(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_filter_simple_or(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
filters = {"$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}}}
|
||||
result = docstore.filter_documents(filters=filters)
|
||||
result = document_store.filter_documents(filters=filters)
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -649,10 +651,10 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_simple_implicit_and_with_multi_key_dict(
|
||||
self, docstore: DocumentStore, filterable_docs: List[Document]
|
||||
self, document_store: DocumentStore, filterable_docs: List[Document]
|
||||
):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0.0}})
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0.0}})
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -661,16 +663,16 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_simple_explicit_and_with_multikey_dict(
|
||||
self, docstore: DocumentStore, filterable_docs: List[Document]
|
||||
self, document_store: DocumentStore, filterable_docs: List[Document]
|
||||
):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"number": {"$and": {"$gte": 0, "$lte": 2}}})
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"number": {"$and": {"$gte": 0, "$lte": 2}}})
|
||||
assert result == [doc for doc in filterable_docs if "number" in doc.meta and 0 <= doc.meta["number"] <= 2]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_simple_explicit_and_with_list(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}})
|
||||
def test_filter_simple_explicit_and_with_list(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}})
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -678,9 +680,9 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_simple_implicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
result = docstore.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0}})
|
||||
def test_filter_simple_implicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
result = document_store.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0}})
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -692,22 +694,22 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using multiple nested logical '$and', '$or' and '$not' legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsNestedLogicalTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_nested_explicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_filter_nested_explicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
filters = {"$and": {"number": {"$and": {"$lte": 2, "$gte": 0}}, "name": {"$in": ["name_0", "name_1"]}}}
|
||||
result = docstore.filter_documents(filters=filters)
|
||||
result = document_store.filter_documents(filters=filters)
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -720,10 +722,10 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_nested_implicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_filter_nested_implicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
filters_simplified = {"number": {"$lte": 2, "$gte": 0}, "name": ["name_0", "name_1"]}
|
||||
result = docstore.filter_documents(filters=filters_simplified)
|
||||
result = document_store.filter_documents(filters=filters_simplified)
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -736,10 +738,10 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_nested_or(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_filter_nested_or(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
filters = {"$or": {"name": {"$or": [{"$eq": "name_0"}, {"$eq": "name_1"}]}, "number": {"$lt": 1.0}}}
|
||||
result = docstore.filter_documents(filters=filters)
|
||||
result = document_store.filter_documents(filters=filters)
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -747,12 +749,12 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_nested_and_or_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_filter_nested_and_or_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
filters_simplified = {
|
||||
"$and": {"page": {"$eq": "123"}, "$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}}}
|
||||
}
|
||||
result = docstore.filter_documents(filters=filters_simplified)
|
||||
result = document_store.filter_documents(filters=filters_simplified)
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -763,13 +765,13 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_nested_and_or_implicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_filter_nested_and_or_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
filters_simplified = {
|
||||
"page": {"$eq": "123"},
|
||||
"$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}},
|
||||
}
|
||||
result = docstore.filter_documents(filters=filters_simplified)
|
||||
result = document_store.filter_documents(filters=filters_simplified)
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -780,15 +782,15 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
|
||||
]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_nested_or_and(self, docstore: DocumentStore, filterable_docs: List[Document]):
|
||||
docstore.write_documents(filterable_docs)
|
||||
def test_filter_nested_or_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
|
||||
document_store.write_documents(filterable_docs)
|
||||
filters_simplified = {
|
||||
"$or": {
|
||||
"number": {"$lt": 1},
|
||||
"$and": {"name": {"$in": ["name_0", "name_1"]}, "$not": {"chapter": {"$eq": "intro"}}},
|
||||
}
|
||||
}
|
||||
result = docstore.filter_documents(filters=filters_simplified)
|
||||
result = document_store.filter_documents(filters=filters_simplified)
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -803,16 +805,16 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_filter_nested_multiple_identical_operators_same_level(
|
||||
self, docstore: DocumentStore, filterable_docs: List[Document]
|
||||
self, document_store: DocumentStore, filterable_docs: List[Document]
|
||||
):
|
||||
docstore.write_documents(filterable_docs)
|
||||
document_store.write_documents(filterable_docs)
|
||||
filters = {
|
||||
"$or": [
|
||||
{"$and": {"name": {"$in": ["name_0", "name_1"]}, "page": "100"}},
|
||||
{"$and": {"chapter": {"$in": ["intro", "abstract"]}, "page": "123"}},
|
||||
]
|
||||
}
|
||||
result = docstore.filter_documents(filters=filters)
|
||||
result = document_store.filter_documents(filters=filters)
|
||||
assert result == [
|
||||
doc
|
||||
for doc in filterable_docs
|
||||
@ -839,33 +841,33 @@ class LegacyFilterDocumentsTest( # pylint: disable=too-many-ancestors
|
||||
"""
|
||||
Utility class to test a Document Store `filter_documents` method using different types of legacy filters
|
||||
|
||||
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
|
||||
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
class MyDocumentStoreTest(LegacyFilterDocumentsTest):
|
||||
@pytest.fixture
|
||||
def docstore(self):
|
||||
def document_store(self):
|
||||
return MyDocumentStore()
|
||||
```
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_no_filter_empty(self, docstore: DocumentStore):
|
||||
assert docstore.filter_documents() == []
|
||||
assert docstore.filter_documents(filters={}) == []
|
||||
def test_no_filter_empty(self, document_store: DocumentStore):
|
||||
assert document_store.filter_documents() == []
|
||||
assert document_store.filter_documents(filters={}) == []
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_no_filter_not_empty(self, docstore: DocumentStore):
|
||||
def test_no_filter_not_empty(self, document_store: DocumentStore):
|
||||
docs = [Document(content="test doc")]
|
||||
docstore.write_documents(docs)
|
||||
assert docstore.filter_documents() == docs
|
||||
assert docstore.filter_documents(filters={}) == docs
|
||||
document_store.write_documents(docs)
|
||||
assert document_store.filter_documents() == docs
|
||||
assert document_store.filter_documents(filters={}) == docs
|
||||
|
||||
|
||||
class DocumentStoreBaseTests(
|
||||
CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, LegacyFilterDocumentsTest
|
||||
): # pylint: disable=too-many-ancestors
|
||||
@pytest.fixture
|
||||
def docstore(self) -> DocumentStore:
|
||||
def document_store(self) -> DocumentStore:
|
||||
raise NotImplementedError()
|
||||
|
@ -17,7 +17,7 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def docstore(self) -> InMemoryDocumentStore:
|
||||
def document_store(self) -> InMemoryDocumentStore:
|
||||
return InMemoryDocumentStore()
|
||||
|
||||
@pytest.mark.unit
|
||||
@ -71,45 +71,45 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
|
||||
assert store.bm25_parameters == {"key": "value"}
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_written_documents_count(self, docstore: InMemoryDocumentStore):
|
||||
def test_written_documents_count(self, document_store: InMemoryDocumentStore):
|
||||
# FIXME Remove after the document store base tests have been rewritten
|
||||
documents = [Document(content=f"Hello world #{i}") for i in range(10)]
|
||||
docs_written = docstore.write_documents(documents[0:2])
|
||||
docs_written = document_store.write_documents(documents[0:2])
|
||||
assert docs_written == 2
|
||||
assert docstore.filter_documents() == documents[0:2]
|
||||
assert document_store.filter_documents() == documents[0:2]
|
||||
|
||||
docs_written = docstore.write_documents(documents, DuplicatePolicy.SKIP)
|
||||
docs_written = document_store.write_documents(documents, DuplicatePolicy.SKIP)
|
||||
assert docs_written == len(documents) - 2
|
||||
assert docstore.filter_documents() == documents
|
||||
assert document_store.filter_documents() == documents
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval(self, docstore: InMemoryDocumentStore):
|
||||
docstore = InMemoryDocumentStore()
|
||||
def test_bm25_retrieval(self, document_store: InMemoryDocumentStore):
|
||||
document_store = InMemoryDocumentStore()
|
||||
# Tests if the bm25_retrieval method returns the correct document based on the input query.
|
||||
docs = [Document(content="Hello world"), Document(content="Haystack supports multiple languages")]
|
||||
docstore.write_documents(docs)
|
||||
results = docstore.bm25_retrieval(query="What languages?", top_k=1)
|
||||
document_store.write_documents(docs)
|
||||
results = document_store.bm25_retrieval(query="What languages?", top_k=1)
|
||||
assert len(results) == 1
|
||||
assert results[0].content == "Haystack supports multiple languages"
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_empty_document_store(self, docstore: InMemoryDocumentStore, caplog):
|
||||
def test_bm25_retrieval_with_empty_document_store(self, document_store: InMemoryDocumentStore, caplog):
|
||||
caplog.set_level(logging.INFO)
|
||||
# Tests if the bm25_retrieval method correctly returns an empty list when there are no documents in the DocumentStore.
|
||||
results = docstore.bm25_retrieval(query="How to test this?", top_k=2)
|
||||
results = document_store.bm25_retrieval(query="How to test this?", top_k=2)
|
||||
assert len(results) == 0
|
||||
assert "No documents found for BM25 retrieval. Returning empty list." in caplog.text
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_empty_query(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_empty_query(self, document_store: InMemoryDocumentStore):
|
||||
# Tests if the bm25_retrieval method returns a document when the query is an empty string.
|
||||
docs = [Document(content="Hello world"), Document(content="Haystack supports multiple languages")]
|
||||
docstore.write_documents(docs)
|
||||
document_store.write_documents(docs)
|
||||
with pytest.raises(ValueError, match="Query should be a non-empty string"):
|
||||
docstore.bm25_retrieval(query="", top_k=1)
|
||||
document_store.bm25_retrieval(query="", top_k=1)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_different_top_k(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_with_different_top_k(self, document_store: InMemoryDocumentStore):
|
||||
# Tests if the bm25_retrieval method correctly changes the number of returned documents
|
||||
# based on the top_k parameter.
|
||||
docs = [
|
||||
@ -117,19 +117,19 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
|
||||
Document(content="Haystack supports multiple languages"),
|
||||
Document(content="Python is a popular programming language"),
|
||||
]
|
||||
docstore.write_documents(docs)
|
||||
document_store.write_documents(docs)
|
||||
|
||||
# top_k = 2
|
||||
results = docstore.bm25_retrieval(query="languages", top_k=2)
|
||||
results = document_store.bm25_retrieval(query="languages", top_k=2)
|
||||
assert len(results) == 2
|
||||
|
||||
# top_k = 3
|
||||
results = docstore.bm25_retrieval(query="languages", top_k=3)
|
||||
results = document_store.bm25_retrieval(query="languages", top_k=3)
|
||||
assert len(results) == 3
|
||||
|
||||
# Test two queries and make sure the results are different
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_two_queries(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_with_two_queries(self, document_store: InMemoryDocumentStore):
|
||||
# Tests if the bm25_retrieval method returns different documents for different queries.
|
||||
docs = [
|
||||
Document(content="Javascript is a popular programming language"),
|
||||
@ -138,60 +138,60 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
|
||||
Document(content="Ruby is a popular programming language"),
|
||||
Document(content="PHP is a popular programming language"),
|
||||
]
|
||||
docstore.write_documents(docs)
|
||||
document_store.write_documents(docs)
|
||||
|
||||
results = docstore.bm25_retrieval(query="Java", top_k=1)
|
||||
results = document_store.bm25_retrieval(query="Java", top_k=1)
|
||||
assert results[0].content == "Java is a popular programming language"
|
||||
|
||||
results = docstore.bm25_retrieval(query="Python", top_k=1)
|
||||
results = document_store.bm25_retrieval(query="Python", top_k=1)
|
||||
assert results[0].content == "Python is a popular programming language"
|
||||
|
||||
@pytest.mark.skip(reason="Filter is not working properly, see https://github.com/deepset-ai/haystack/issues/6153")
|
||||
def test_eq_filter_embedding(self, docstore: InMemoryDocumentStore, filterable_docs):
|
||||
def test_eq_filter_embedding(self, document_store: InMemoryDocumentStore, filterable_docs):
|
||||
pass
|
||||
|
||||
# Test a query, add a new document and make sure results are appropriately updated
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_updated_docs(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_with_updated_docs(self, document_store: InMemoryDocumentStore):
|
||||
# Tests if the bm25_retrieval method correctly updates the retrieved documents when new
|
||||
# documents are added to the DocumentStore.
|
||||
docs = [Document(content="Hello world")]
|
||||
docstore.write_documents(docs)
|
||||
document_store.write_documents(docs)
|
||||
|
||||
results = docstore.bm25_retrieval(query="Python", top_k=1)
|
||||
results = document_store.bm25_retrieval(query="Python", top_k=1)
|
||||
assert len(results) == 1
|
||||
|
||||
docstore.write_documents([Document(content="Python is a popular programming language")])
|
||||
results = docstore.bm25_retrieval(query="Python", top_k=1)
|
||||
document_store.write_documents([Document(content="Python is a popular programming language")])
|
||||
results = document_store.bm25_retrieval(query="Python", top_k=1)
|
||||
assert len(results) == 1
|
||||
assert results[0].content == "Python is a popular programming language"
|
||||
|
||||
docstore.write_documents([Document(content="Java is a popular programming language")])
|
||||
results = docstore.bm25_retrieval(query="Python", top_k=1)
|
||||
document_store.write_documents([Document(content="Java is a popular programming language")])
|
||||
results = document_store.bm25_retrieval(query="Python", top_k=1)
|
||||
assert len(results) == 1
|
||||
assert results[0].content == "Python is a popular programming language"
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_scale_score(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_with_scale_score(self, document_store: InMemoryDocumentStore):
|
||||
docs = [Document(content="Python programming"), Document(content="Java programming")]
|
||||
docstore.write_documents(docs)
|
||||
document_store.write_documents(docs)
|
||||
|
||||
results1 = docstore.bm25_retrieval(query="Python", top_k=1, scale_score=True)
|
||||
results1 = document_store.bm25_retrieval(query="Python", top_k=1, scale_score=True)
|
||||
# Confirm that score is scaled between 0 and 1
|
||||
assert results1[0].score is not None
|
||||
assert 0.0 <= results1[0].score <= 1.0
|
||||
|
||||
# Same query, different scale, scores differ when not scaled
|
||||
results = docstore.bm25_retrieval(query="Python", top_k=1, scale_score=False)
|
||||
results = document_store.bm25_retrieval(query="Python", top_k=1, scale_score=False)
|
||||
assert results[0].score != results1[0].score
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_table_content(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_with_table_content(self, document_store: InMemoryDocumentStore):
|
||||
# Tests if the bm25_retrieval method correctly returns a dataframe when the content_type is table.
|
||||
table_content = pd.DataFrame({"language": ["Python", "Java"], "use": ["Data Science", "Web Development"]})
|
||||
docs = [Document(dataframe=table_content), Document(content="Gardening"), Document(content="Bird watching")]
|
||||
docstore.write_documents(docs)
|
||||
results = docstore.bm25_retrieval(query="Java", top_k=1)
|
||||
document_store.write_documents(docs)
|
||||
results = document_store.bm25_retrieval(query="Java", top_k=1)
|
||||
assert len(results) == 1
|
||||
|
||||
df = results[0].dataframe
|
||||
@ -199,7 +199,7 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
|
||||
assert df.equals(table_content)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_text_and_table_content(self, docstore: InMemoryDocumentStore, caplog):
|
||||
def test_bm25_retrieval_with_text_and_table_content(self, document_store: InMemoryDocumentStore, caplog):
|
||||
table_content = pd.DataFrame({"language": ["Python", "Java"], "use": ["Data Science", "Web Development"]})
|
||||
document = Document(content="Gardening", dataframe=table_content)
|
||||
docs = [
|
||||
@ -209,51 +209,51 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
|
||||
Document(content="Gardening"),
|
||||
Document(content="Java"),
|
||||
]
|
||||
docstore.write_documents(docs)
|
||||
results = docstore.bm25_retrieval(query="Gardening", top_k=2)
|
||||
document_store.write_documents(docs)
|
||||
results = document_store.bm25_retrieval(query="Gardening", top_k=2)
|
||||
assert document.id in [d.id for d in results]
|
||||
assert "both text and dataframe content" in caplog.text
|
||||
results = docstore.bm25_retrieval(query="Python", top_k=2)
|
||||
results = document_store.bm25_retrieval(query="Python", top_k=2)
|
||||
assert document.id not in [d.id for d in results]
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_default_filter_for_text_and_dataframes(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_default_filter_for_text_and_dataframes(self, document_store: InMemoryDocumentStore):
|
||||
docs = [Document(), Document(content="Gardening"), Document(content="Bird watching")]
|
||||
docstore.write_documents(docs)
|
||||
results = docstore.bm25_retrieval(query="doesn't matter, top_k is 10", top_k=10)
|
||||
document_store.write_documents(docs)
|
||||
results = document_store.bm25_retrieval(query="doesn't matter, top_k is 10", top_k=10)
|
||||
assert len(results) == 2
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_filters(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_with_filters(self, document_store: InMemoryDocumentStore):
|
||||
selected_document = Document(content="Gardening", meta={"selected": True})
|
||||
docs = [Document(), selected_document, Document(content="Bird watching")]
|
||||
docstore.write_documents(docs)
|
||||
results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"selected": True})
|
||||
document_store.write_documents(docs)
|
||||
results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"selected": True})
|
||||
assert len(results) == 1
|
||||
assert results[0].id == selected_document.id
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_filters_keeps_default_filters(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_with_filters_keeps_default_filters(self, document_store: InMemoryDocumentStore):
|
||||
docs = [Document(meta={"selected": True}), Document(content="Gardening"), Document(content="Bird watching")]
|
||||
docstore.write_documents(docs)
|
||||
results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"selected": True})
|
||||
document_store.write_documents(docs)
|
||||
results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"selected": True})
|
||||
assert len(results) == 0
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_filters_on_text_or_dataframe(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_with_filters_on_text_or_dataframe(self, document_store: InMemoryDocumentStore):
|
||||
document = Document(dataframe=pd.DataFrame({"language": ["Python", "Java"], "use": ["Data Science", "Web"]}))
|
||||
docs = [Document(), Document(content="Gardening"), Document(content="Bird watching"), document]
|
||||
docstore.write_documents(docs)
|
||||
results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"content": None})
|
||||
document_store.write_documents(docs)
|
||||
results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"content": None})
|
||||
assert len(results) == 1
|
||||
assert results[0].id == document.id
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bm25_retrieval_with_documents_with_mixed_content(self, docstore: InMemoryDocumentStore):
|
||||
def test_bm25_retrieval_with_documents_with_mixed_content(self, document_store: InMemoryDocumentStore):
|
||||
double_document = Document(content="Gardening", embedding=[1.0, 2.0, 3.0])
|
||||
docs = [Document(embedding=[1.0, 2.0, 3.0]), double_document, Document(content="Bird watching")]
|
||||
docstore.write_documents(docs)
|
||||
results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"embedding": {"$not": None}})
|
||||
document_store.write_documents(docs)
|
||||
results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"embedding": {"$not": None}})
|
||||
assert len(results) == 1
|
||||
assert results[0].id == double_document.id
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user