refactor: Rename docstore fixture to document_store (#6360)

* Prevent pytest_generate_tests from polluting preview tests

* Rename docstore fixture to document_store
This commit is contained in:
Silvano Cerza 2023-11-20 17:41:48 +01:00 committed by GitHub
parent 365127dc5b
commit a7f742fdbd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 291 additions and 289 deletions

View File

@ -20,126 +20,126 @@ class CountDocumentsTest:
"""
Utility class to test a Document Store `count_documents` method.
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(CountDocumentsTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_count_empty(self, docstore: DocumentStore):
assert docstore.count_documents() == 0
def test_count_empty(self, document_store: DocumentStore):
assert document_store.count_documents() == 0
@pytest.mark.unit
def test_count_not_empty(self, docstore: DocumentStore):
docstore.write_documents(
def test_count_not_empty(self, document_store: DocumentStore):
document_store.write_documents(
[Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")]
)
assert docstore.count_documents() == 3
assert document_store.count_documents() == 3
class WriteDocumentsTest:
"""
Utility class to test a Document Store `write_documents` method.
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(WriteDocumentsTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_write(self, docstore: DocumentStore):
def test_write(self, document_store: DocumentStore):
doc = Document(content="test doc")
docstore.write_documents([doc])
assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
document_store.write_documents([doc])
assert document_store.filter_documents(filters={"id": doc.id}) == [doc]
@pytest.mark.unit
def test_write_duplicate_fail(self, docstore: DocumentStore):
def test_write_duplicate_fail(self, document_store: DocumentStore):
doc = Document(content="test doc")
docstore.write_documents([doc])
document_store.write_documents([doc])
with pytest.raises(DuplicateDocumentError, match=f"ID '{doc.id}' already exists."):
docstore.write_documents(documents=[doc], policy=DuplicatePolicy.FAIL)
assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
document_store.write_documents(documents=[doc], policy=DuplicatePolicy.FAIL)
assert document_store.filter_documents(filters={"id": doc.id}) == [doc]
@pytest.mark.unit
def test_write_duplicate_skip(self, docstore: DocumentStore):
def test_write_duplicate_skip(self, document_store: DocumentStore):
doc = Document(content="test doc")
docstore.write_documents([doc])
docstore.write_documents(documents=[doc], policy=DuplicatePolicy.SKIP)
assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
document_store.write_documents([doc])
document_store.write_documents(documents=[doc], policy=DuplicatePolicy.SKIP)
assert document_store.filter_documents(filters={"id": doc.id}) == [doc]
@pytest.mark.unit
def test_write_duplicate_overwrite(self, docstore: DocumentStore):
def test_write_duplicate_overwrite(self, document_store: DocumentStore):
doc1 = Document(content="test doc 1")
doc2 = Document(content="test doc 2")
object.__setattr__(doc2, "id", doc1.id) # Make two docs with different content but same ID
docstore.write_documents([doc2])
assert docstore.filter_documents(filters={"id": doc1.id}) == [doc2]
docstore.write_documents(documents=[doc1], policy=DuplicatePolicy.OVERWRITE)
assert docstore.filter_documents(filters={"id": doc1.id}) == [doc1]
document_store.write_documents([doc2])
assert document_store.filter_documents(filters={"id": doc1.id}) == [doc2]
document_store.write_documents(documents=[doc1], policy=DuplicatePolicy.OVERWRITE)
assert document_store.filter_documents(filters={"id": doc1.id}) == [doc1]
@pytest.mark.unit
def test_write_not_docs(self, docstore: DocumentStore):
def test_write_not_docs(self, document_store: DocumentStore):
with pytest.raises(ValueError):
docstore.write_documents(["not a document for sure"]) # type: ignore
document_store.write_documents(["not a document for sure"]) # type: ignore
@pytest.mark.unit
def test_write_not_list(self, docstore: DocumentStore):
def test_write_not_list(self, document_store: DocumentStore):
with pytest.raises(ValueError):
docstore.write_documents("not a list actually") # type: ignore
document_store.write_documents("not a list actually") # type: ignore
class DeleteDocumentsTest:
"""
Utility class to test a Document Store `delete_documents` method.
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(DeleteDocumentsTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_delete_empty(self, docstore: DocumentStore):
def test_delete_empty(self, document_store: DocumentStore):
with pytest.raises(MissingDocumentError):
docstore.delete_documents(["test"])
document_store.delete_documents(["test"])
@pytest.mark.unit
def test_delete_not_empty(self, docstore: DocumentStore):
def test_delete_not_empty(self, document_store: DocumentStore):
doc = Document(content="test doc")
docstore.write_documents([doc])
document_store.write_documents([doc])
docstore.delete_documents([doc.id])
document_store.delete_documents([doc.id])
with pytest.raises(Exception):
assert docstore.filter_documents(filters={"id": doc.id})
assert document_store.filter_documents(filters={"id": doc.id})
@pytest.mark.unit
def test_delete_not_empty_nonexisting(self, docstore: DocumentStore):
def test_delete_not_empty_nonexisting(self, document_store: DocumentStore):
doc = Document(content="test doc")
docstore.write_documents([doc])
document_store.write_documents([doc])
with pytest.raises(MissingDocumentError):
docstore.delete_documents(["non_existing"])
document_store.delete_documents(["non_existing"])
assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
assert document_store.filter_documents(filters={"id": doc.id}) == [doc]
class FilterableDocsFixtureMixin:
@ -195,87 +195,87 @@ class LegacyFilterDocumentsInvalidFiltersTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using invalid legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsInvalidFiltersTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_incorrect_filter_type(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_incorrect_filter_type(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters="something odd") # type: ignore
document_store.filter_documents(filters="something odd") # type: ignore
@pytest.mark.unit
def test_incorrect_filter_nesting(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_incorrect_filter_nesting(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"number": {"page": "100"}})
document_store.filter_documents(filters={"number": {"page": "100"}})
@pytest.mark.unit
def test_deeper_incorrect_filter_nesting(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_deeper_incorrect_filter_nesting(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"number": {"page": {"chapter": "intro"}}})
document_store.filter_documents(filters={"number": {"page": {"chapter": "intro"}}})
class LegacyFilterDocumentsEqualTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using implicit and explicit '$eq' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsEqualTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_filter_document_content(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"content": "A Foo Document 1"})
def test_filter_document_content(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"content": "A Foo Document 1"})
assert result == [doc for doc in filterable_docs if doc.content == "A Foo Document 1"]
@pytest.mark.unit
def test_filter_simple_metadata_value(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": "100"})
def test_filter_simple_metadata_value(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": "100"})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"]
@pytest.mark.unit
def test_filter_document_dataframe(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"dataframe": pd.DataFrame([1])})
def test_filter_document_dataframe(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"dataframe": pd.DataFrame([1])})
assert result == [
doc for doc in filterable_docs if doc.dataframe is not None and doc.dataframe.equals(pd.DataFrame([1]))
]
@pytest.mark.unit
def test_eq_filter_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": {"$eq": "100"}})
def test_eq_filter_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": {"$eq": "100"}})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"]
@pytest.mark.unit
def test_eq_filter_implicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": "100"})
def test_eq_filter_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": "100"})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"]
@pytest.mark.unit
def test_eq_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"dataframe": pd.DataFrame([1])})
def test_eq_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"dataframe": pd.DataFrame([1])})
assert result == [
doc
for doc in filterable_docs
@ -283,10 +283,10 @@ class LegacyFilterDocumentsEqualTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_eq_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_eq_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
embedding = [0.0] * 768
result = docstore.filter_documents(filters={"embedding": embedding})
result = document_store.filter_documents(filters={"embedding": embedding})
assert result == [doc for doc in filterable_docs if embedding == doc.embedding]
@ -294,27 +294,27 @@ class LegacyFilterDocumentsNotEqualTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$ne' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsNotEqualTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_ne_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": {"$ne": "100"}})
def test_ne_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": {"$ne": "100"}})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") != "100"]
@pytest.mark.unit
def test_ne_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"dataframe": {"$ne": pd.DataFrame([1])}})
def test_ne_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"dataframe": {"$ne": pd.DataFrame([1])}})
assert result == [
doc
for doc in filterable_docs
@ -322,10 +322,10 @@ class LegacyFilterDocumentsNotEqualTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_ne_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_ne_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
embedding = np.zeros([768, 1]).astype(np.float32)
result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}})
result = document_store.filter_documents(filters={"embedding": {"$ne": embedding}})
assert result == [
doc
for doc in filterable_docs
@ -337,63 +337,63 @@ class LegacyFilterDocumentsInTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using implicit and explicit '$in' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsInTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_filter_simple_list_single_element(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": ["100"]})
def test_filter_simple_list_single_element(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": ["100"]})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"]
@pytest.mark.unit
def test_filter_simple_list_one_value(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": ["100"]})
def test_filter_simple_list_one_value(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": ["100"]})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100"]]
@pytest.mark.unit
def test_filter_simple_list(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": ["100", "123"]})
def test_filter_simple_list(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": ["100", "123"]})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]]
@pytest.mark.unit
def test_incorrect_filter_name(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"non_existing_meta_field": ["whatever"]})
def test_incorrect_filter_name(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"non_existing_meta_field": ["whatever"]})
assert len(result) == 0
@pytest.mark.unit
def test_incorrect_filter_value(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": ["nope"]})
def test_incorrect_filter_value(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": ["nope"]})
assert len(result) == 0
@pytest.mark.unit
def test_in_filter_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": {"$in": ["100", "123", "n.a."]}})
def test_in_filter_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": {"$in": ["100", "123", "n.a."]}})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]]
@pytest.mark.unit
def test_in_filter_implicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": ["100", "123", "n.a."]})
def test_in_filter_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": ["100", "123", "n.a."]})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]]
@pytest.mark.unit
def test_in_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"dataframe": {"$in": [pd.DataFrame([1]), pd.DataFrame([2])]}})
def test_in_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"dataframe": {"$in": [pd.DataFrame([1]), pd.DataFrame([2])]}})
assert result == [
doc
for doc in filterable_docs
@ -402,11 +402,11 @@ class LegacyFilterDocumentsInTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_in_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_in_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
embedding_zero = [0.0] * 768
embedding_one = [1.0] * 768
result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}})
result = document_store.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}})
assert result == [
doc for doc in filterable_docs if (embedding_zero == doc.embedding or embedding_one == doc.embedding)
]
@ -416,21 +416,23 @@ class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$nin' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsNotInTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_nin_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"dataframe": {"$nin": [pd.DataFrame([1]), pd.DataFrame([0])]}})
def test_nin_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(
filters={"dataframe": {"$nin": [pd.DataFrame([1]), pd.DataFrame([0])]}}
)
assert result == [
doc
for doc in filterable_docs
@ -439,11 +441,11 @@ class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_nin_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_nin_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
embedding_ones = np.zeros([768, 1]).astype(np.float32)
result = docstore.filter_documents(filters={"embedding": {"$nin": [embedding_ones, embedding_zeros]}})
result = document_store.filter_documents(filters={"embedding": {"$nin": [embedding_ones, embedding_zeros]}})
assert result == [
doc
for doc in filterable_docs
@ -451,9 +453,9 @@ class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_nin_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": {"$nin": ["100", "123", "n.a."]}})
def test_nin_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": {"$nin": ["100", "123", "n.a."]}})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") not in ["100", "123"]]
@ -461,186 +463,186 @@ class LegacyFilterDocumentsGreaterThanTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$gt' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsGreaterThanTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_gt_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"number": {"$gt": 0.0}})
def test_gt_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"number": {"$gt": 0.0}})
assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] > 0]
@pytest.mark.unit
def test_gt_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_gt_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"page": {"$gt": "100"}})
document_store.filter_documents(filters={"page": {"$gt": "100"}})
@pytest.mark.unit
def test_gt_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_gt_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"dataframe": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
document_store.filter_documents(filters={"dataframe": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
@pytest.mark.unit
def test_gt_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_gt_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$gt": embedding_zeros}})
document_store.filter_documents(filters={"embedding": {"$gt": embedding_zeros}})
class LegacyFilterDocumentsGreaterThanEqualTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$gte' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsGreaterThanEqualTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_gte_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"number": {"$gte": -2}})
def test_gte_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"number": {"$gte": -2}})
assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] >= -2]
@pytest.mark.unit
def test_gte_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_gte_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"page": {"$gte": "100"}})
document_store.filter_documents(filters={"page": {"$gte": "100"}})
@pytest.mark.unit
def test_gte_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_gte_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"dataframe": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
document_store.filter_documents(filters={"dataframe": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
@pytest.mark.unit
def test_gte_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_gte_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
embedding_zeros = np.zeros([768, 1]).astype(np.float32)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$gte": embedding_zeros}})
document_store.filter_documents(filters={"embedding": {"$gte": embedding_zeros}})
class LegacyFilterDocumentsLessThanTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$lt' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsLessThanTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_lt_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"number": {"$lt": 0.0}})
def test_lt_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"number": {"$lt": 0.0}})
assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] < 0]
@pytest.mark.unit
def test_lt_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_lt_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"page": {"$lt": "100"}})
document_store.filter_documents(filters={"page": {"$lt": "100"}})
@pytest.mark.unit
def test_lt_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_lt_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"dataframe": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
document_store.filter_documents(filters={"dataframe": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
@pytest.mark.unit
def test_lt_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_lt_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
embedding_ones = np.ones([768, 1]).astype(np.float32)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$lt": embedding_ones}})
document_store.filter_documents(filters={"embedding": {"$lt": embedding_ones}})
class LegacyFilterDocumentsLessThanEqualTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$lte' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsLessThanEqualTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_lte_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"number": {"$lte": 2.0}})
def test_lte_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"number": {"$lte": 2.0}})
assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] <= 2.0]
@pytest.mark.unit
def test_lte_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_lte_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"page": {"$lte": "100"}})
document_store.filter_documents(filters={"page": {"$lte": "100"}})
@pytest.mark.unit
def test_lte_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_lte_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"dataframe": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
document_store.filter_documents(filters={"dataframe": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}})
@pytest.mark.unit
def test_lte_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_lte_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
embedding_ones = np.ones([768, 1]).astype(np.float32)
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$lte": embedding_ones}})
document_store.filter_documents(filters={"embedding": {"$lte": embedding_ones}})
class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using logical '$and', '$or' and '$not' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsSimpleLogicalTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_filter_simple_or(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_filter_simple_or(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
filters = {"$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}}}
result = docstore.filter_documents(filters=filters)
result = document_store.filter_documents(filters=filters)
assert result == [
doc
for doc in filterable_docs
@ -649,10 +651,10 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
@pytest.mark.unit
def test_filter_simple_implicit_and_with_multi_key_dict(
self, docstore: DocumentStore, filterable_docs: List[Document]
self, document_store: DocumentStore, filterable_docs: List[Document]
):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0.0}})
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0.0}})
assert result == [
doc
for doc in filterable_docs
@ -661,16 +663,16 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
@pytest.mark.unit
def test_filter_simple_explicit_and_with_multikey_dict(
self, docstore: DocumentStore, filterable_docs: List[Document]
self, document_store: DocumentStore, filterable_docs: List[Document]
):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"number": {"$and": {"$gte": 0, "$lte": 2}}})
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"number": {"$and": {"$gte": 0, "$lte": 2}}})
assert result == [doc for doc in filterable_docs if "number" in doc.meta and 0 <= doc.meta["number"] <= 2]
@pytest.mark.unit
def test_filter_simple_explicit_and_with_list(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}})
def test_filter_simple_explicit_and_with_list(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}})
assert result == [
doc
for doc in filterable_docs
@ -678,9 +680,9 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_filter_simple_implicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0}})
def test_filter_simple_implicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0}})
assert result == [
doc
for doc in filterable_docs
@ -692,22 +694,22 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using multiple nested logical '$and', '$or' and '$not' legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsNestedLogicalTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_filter_nested_explicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_filter_nested_explicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
filters = {"$and": {"number": {"$and": {"$lte": 2, "$gte": 0}}, "name": {"$in": ["name_0", "name_1"]}}}
result = docstore.filter_documents(filters=filters)
result = document_store.filter_documents(filters=filters)
assert result == [
doc
for doc in filterable_docs
@ -720,10 +722,10 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_filter_nested_implicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_filter_nested_implicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
filters_simplified = {"number": {"$lte": 2, "$gte": 0}, "name": ["name_0", "name_1"]}
result = docstore.filter_documents(filters=filters_simplified)
result = document_store.filter_documents(filters=filters_simplified)
assert result == [
doc
for doc in filterable_docs
@ -736,10 +738,10 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_filter_nested_or(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_filter_nested_or(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
filters = {"$or": {"name": {"$or": [{"$eq": "name_0"}, {"$eq": "name_1"}]}, "number": {"$lt": 1.0}}}
result = docstore.filter_documents(filters=filters)
result = document_store.filter_documents(filters=filters)
assert result == [
doc
for doc in filterable_docs
@ -747,12 +749,12 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_filter_nested_and_or_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_filter_nested_and_or_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
filters_simplified = {
"$and": {"page": {"$eq": "123"}, "$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}}}
}
result = docstore.filter_documents(filters=filters_simplified)
result = document_store.filter_documents(filters=filters_simplified)
assert result == [
doc
for doc in filterable_docs
@ -763,13 +765,13 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_filter_nested_and_or_implicit(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_filter_nested_and_or_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
filters_simplified = {
"page": {"$eq": "123"},
"$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}},
}
result = docstore.filter_documents(filters=filters_simplified)
result = document_store.filter_documents(filters=filters_simplified)
assert result == [
doc
for doc in filterable_docs
@ -780,15 +782,15 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
]
@pytest.mark.unit
def test_filter_nested_or_and(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
def test_filter_nested_or_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
document_store.write_documents(filterable_docs)
filters_simplified = {
"$or": {
"number": {"$lt": 1},
"$and": {"name": {"$in": ["name_0", "name_1"]}, "$not": {"chapter": {"$eq": "intro"}}},
}
}
result = docstore.filter_documents(filters=filters_simplified)
result = document_store.filter_documents(filters=filters_simplified)
assert result == [
doc
for doc in filterable_docs
@ -803,16 +805,16 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
@pytest.mark.unit
def test_filter_nested_multiple_identical_operators_same_level(
self, docstore: DocumentStore, filterable_docs: List[Document]
self, document_store: DocumentStore, filterable_docs: List[Document]
):
docstore.write_documents(filterable_docs)
document_store.write_documents(filterable_docs)
filters = {
"$or": [
{"$and": {"name": {"$in": ["name_0", "name_1"]}, "page": "100"}},
{"$and": {"chapter": {"$in": ["intro", "abstract"]}, "page": "123"}},
]
}
result = docstore.filter_documents(filters=filters)
result = document_store.filter_documents(filters=filters)
assert result == [
doc
for doc in filterable_docs
@ -839,33 +841,33 @@ class LegacyFilterDocumentsTest( # pylint: disable=too-many-ancestors
"""
Utility class to test a Document Store `filter_documents` method using different types of legacy filters
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
```python
class MyDocumentStoreTest(LegacyFilterDocumentsTest):
@pytest.fixture
def docstore(self):
def document_store(self):
return MyDocumentStore()
```
"""
@pytest.mark.unit
def test_no_filter_empty(self, docstore: DocumentStore):
assert docstore.filter_documents() == []
assert docstore.filter_documents(filters={}) == []
def test_no_filter_empty(self, document_store: DocumentStore):
assert document_store.filter_documents() == []
assert document_store.filter_documents(filters={}) == []
@pytest.mark.unit
def test_no_filter_not_empty(self, docstore: DocumentStore):
def test_no_filter_not_empty(self, document_store: DocumentStore):
docs = [Document(content="test doc")]
docstore.write_documents(docs)
assert docstore.filter_documents() == docs
assert docstore.filter_documents(filters={}) == docs
document_store.write_documents(docs)
assert document_store.filter_documents() == docs
assert document_store.filter_documents(filters={}) == docs
class DocumentStoreBaseTests(
CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, LegacyFilterDocumentsTest
): # pylint: disable=too-many-ancestors
@pytest.fixture
def docstore(self) -> DocumentStore:
def document_store(self) -> DocumentStore:
raise NotImplementedError()

View File

@ -17,7 +17,7 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
"""
@pytest.fixture
def docstore(self) -> InMemoryDocumentStore:
def document_store(self) -> InMemoryDocumentStore:
return InMemoryDocumentStore()
@pytest.mark.unit
@ -71,45 +71,45 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
assert store.bm25_parameters == {"key": "value"}
@pytest.mark.unit
def test_written_documents_count(self, docstore: InMemoryDocumentStore):
def test_written_documents_count(self, document_store: InMemoryDocumentStore):
# FIXME Remove after the document store base tests have been rewritten
documents = [Document(content=f"Hello world #{i}") for i in range(10)]
docs_written = docstore.write_documents(documents[0:2])
docs_written = document_store.write_documents(documents[0:2])
assert docs_written == 2
assert docstore.filter_documents() == documents[0:2]
assert document_store.filter_documents() == documents[0:2]
docs_written = docstore.write_documents(documents, DuplicatePolicy.SKIP)
docs_written = document_store.write_documents(documents, DuplicatePolicy.SKIP)
assert docs_written == len(documents) - 2
assert docstore.filter_documents() == documents
assert document_store.filter_documents() == documents
@pytest.mark.unit
def test_bm25_retrieval(self, docstore: InMemoryDocumentStore):
docstore = InMemoryDocumentStore()
def test_bm25_retrieval(self, document_store: InMemoryDocumentStore):
document_store = InMemoryDocumentStore()
# Tests if the bm25_retrieval method returns the correct document based on the input query.
docs = [Document(content="Hello world"), Document(content="Haystack supports multiple languages")]
docstore.write_documents(docs)
results = docstore.bm25_retrieval(query="What languages?", top_k=1)
document_store.write_documents(docs)
results = document_store.bm25_retrieval(query="What languages?", top_k=1)
assert len(results) == 1
assert results[0].content == "Haystack supports multiple languages"
@pytest.mark.unit
def test_bm25_retrieval_with_empty_document_store(self, docstore: InMemoryDocumentStore, caplog):
def test_bm25_retrieval_with_empty_document_store(self, document_store: InMemoryDocumentStore, caplog):
caplog.set_level(logging.INFO)
# Tests if the bm25_retrieval method correctly returns an empty list when there are no documents in the DocumentStore.
results = docstore.bm25_retrieval(query="How to test this?", top_k=2)
results = document_store.bm25_retrieval(query="How to test this?", top_k=2)
assert len(results) == 0
assert "No documents found for BM25 retrieval. Returning empty list." in caplog.text
@pytest.mark.unit
def test_bm25_retrieval_empty_query(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_empty_query(self, document_store: InMemoryDocumentStore):
# Tests if the bm25_retrieval method returns a document when the query is an empty string.
docs = [Document(content="Hello world"), Document(content="Haystack supports multiple languages")]
docstore.write_documents(docs)
document_store.write_documents(docs)
with pytest.raises(ValueError, match="Query should be a non-empty string"):
docstore.bm25_retrieval(query="", top_k=1)
document_store.bm25_retrieval(query="", top_k=1)
@pytest.mark.unit
def test_bm25_retrieval_with_different_top_k(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_with_different_top_k(self, document_store: InMemoryDocumentStore):
# Tests if the bm25_retrieval method correctly changes the number of returned documents
# based on the top_k parameter.
docs = [
@ -117,19 +117,19 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
Document(content="Haystack supports multiple languages"),
Document(content="Python is a popular programming language"),
]
docstore.write_documents(docs)
document_store.write_documents(docs)
# top_k = 2
results = docstore.bm25_retrieval(query="languages", top_k=2)
results = document_store.bm25_retrieval(query="languages", top_k=2)
assert len(results) == 2
# top_k = 3
results = docstore.bm25_retrieval(query="languages", top_k=3)
results = document_store.bm25_retrieval(query="languages", top_k=3)
assert len(results) == 3
# Test two queries and make sure the results are different
@pytest.mark.unit
def test_bm25_retrieval_with_two_queries(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_with_two_queries(self, document_store: InMemoryDocumentStore):
# Tests if the bm25_retrieval method returns different documents for different queries.
docs = [
Document(content="Javascript is a popular programming language"),
@ -138,60 +138,60 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
Document(content="Ruby is a popular programming language"),
Document(content="PHP is a popular programming language"),
]
docstore.write_documents(docs)
document_store.write_documents(docs)
results = docstore.bm25_retrieval(query="Java", top_k=1)
results = document_store.bm25_retrieval(query="Java", top_k=1)
assert results[0].content == "Java is a popular programming language"
results = docstore.bm25_retrieval(query="Python", top_k=1)
results = document_store.bm25_retrieval(query="Python", top_k=1)
assert results[0].content == "Python is a popular programming language"
@pytest.mark.skip(reason="Filter is not working properly, see https://github.com/deepset-ai/haystack/issues/6153")
def test_eq_filter_embedding(self, docstore: InMemoryDocumentStore, filterable_docs):
def test_eq_filter_embedding(self, document_store: InMemoryDocumentStore, filterable_docs):
pass
# Test a query, add a new document and make sure results are appropriately updated
@pytest.mark.unit
def test_bm25_retrieval_with_updated_docs(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_with_updated_docs(self, document_store: InMemoryDocumentStore):
# Tests if the bm25_retrieval method correctly updates the retrieved documents when new
# documents are added to the DocumentStore.
docs = [Document(content="Hello world")]
docstore.write_documents(docs)
document_store.write_documents(docs)
results = docstore.bm25_retrieval(query="Python", top_k=1)
results = document_store.bm25_retrieval(query="Python", top_k=1)
assert len(results) == 1
docstore.write_documents([Document(content="Python is a popular programming language")])
results = docstore.bm25_retrieval(query="Python", top_k=1)
document_store.write_documents([Document(content="Python is a popular programming language")])
results = document_store.bm25_retrieval(query="Python", top_k=1)
assert len(results) == 1
assert results[0].content == "Python is a popular programming language"
docstore.write_documents([Document(content="Java is a popular programming language")])
results = docstore.bm25_retrieval(query="Python", top_k=1)
document_store.write_documents([Document(content="Java is a popular programming language")])
results = document_store.bm25_retrieval(query="Python", top_k=1)
assert len(results) == 1
assert results[0].content == "Python is a popular programming language"
@pytest.mark.unit
def test_bm25_retrieval_with_scale_score(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_with_scale_score(self, document_store: InMemoryDocumentStore):
docs = [Document(content="Python programming"), Document(content="Java programming")]
docstore.write_documents(docs)
document_store.write_documents(docs)
results1 = docstore.bm25_retrieval(query="Python", top_k=1, scale_score=True)
results1 = document_store.bm25_retrieval(query="Python", top_k=1, scale_score=True)
# Confirm that score is scaled between 0 and 1
assert results1[0].score is not None
assert 0.0 <= results1[0].score <= 1.0
# Same query, different scale, scores differ when not scaled
results = docstore.bm25_retrieval(query="Python", top_k=1, scale_score=False)
results = document_store.bm25_retrieval(query="Python", top_k=1, scale_score=False)
assert results[0].score != results1[0].score
@pytest.mark.unit
def test_bm25_retrieval_with_table_content(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_with_table_content(self, document_store: InMemoryDocumentStore):
# Tests if the bm25_retrieval method correctly returns a dataframe when the content_type is table.
table_content = pd.DataFrame({"language": ["Python", "Java"], "use": ["Data Science", "Web Development"]})
docs = [Document(dataframe=table_content), Document(content="Gardening"), Document(content="Bird watching")]
docstore.write_documents(docs)
results = docstore.bm25_retrieval(query="Java", top_k=1)
document_store.write_documents(docs)
results = document_store.bm25_retrieval(query="Java", top_k=1)
assert len(results) == 1
df = results[0].dataframe
@ -199,7 +199,7 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
assert df.equals(table_content)
@pytest.mark.unit
def test_bm25_retrieval_with_text_and_table_content(self, docstore: InMemoryDocumentStore, caplog):
def test_bm25_retrieval_with_text_and_table_content(self, document_store: InMemoryDocumentStore, caplog):
table_content = pd.DataFrame({"language": ["Python", "Java"], "use": ["Data Science", "Web Development"]})
document = Document(content="Gardening", dataframe=table_content)
docs = [
@ -209,51 +209,51 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
Document(content="Gardening"),
Document(content="Java"),
]
docstore.write_documents(docs)
results = docstore.bm25_retrieval(query="Gardening", top_k=2)
document_store.write_documents(docs)
results = document_store.bm25_retrieval(query="Gardening", top_k=2)
assert document.id in [d.id for d in results]
assert "both text and dataframe content" in caplog.text
results = docstore.bm25_retrieval(query="Python", top_k=2)
results = document_store.bm25_retrieval(query="Python", top_k=2)
assert document.id not in [d.id for d in results]
@pytest.mark.unit
def test_bm25_retrieval_default_filter_for_text_and_dataframes(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_default_filter_for_text_and_dataframes(self, document_store: InMemoryDocumentStore):
docs = [Document(), Document(content="Gardening"), Document(content="Bird watching")]
docstore.write_documents(docs)
results = docstore.bm25_retrieval(query="doesn't matter, top_k is 10", top_k=10)
document_store.write_documents(docs)
results = document_store.bm25_retrieval(query="doesn't matter, top_k is 10", top_k=10)
assert len(results) == 2
@pytest.mark.unit
def test_bm25_retrieval_with_filters(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_with_filters(self, document_store: InMemoryDocumentStore):
selected_document = Document(content="Gardening", meta={"selected": True})
docs = [Document(), selected_document, Document(content="Bird watching")]
docstore.write_documents(docs)
results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"selected": True})
document_store.write_documents(docs)
results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"selected": True})
assert len(results) == 1
assert results[0].id == selected_document.id
@pytest.mark.unit
def test_bm25_retrieval_with_filters_keeps_default_filters(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_with_filters_keeps_default_filters(self, document_store: InMemoryDocumentStore):
docs = [Document(meta={"selected": True}), Document(content="Gardening"), Document(content="Bird watching")]
docstore.write_documents(docs)
results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"selected": True})
document_store.write_documents(docs)
results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"selected": True})
assert len(results) == 0
@pytest.mark.unit
def test_bm25_retrieval_with_filters_on_text_or_dataframe(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_with_filters_on_text_or_dataframe(self, document_store: InMemoryDocumentStore):
document = Document(dataframe=pd.DataFrame({"language": ["Python", "Java"], "use": ["Data Science", "Web"]}))
docs = [Document(), Document(content="Gardening"), Document(content="Bird watching"), document]
docstore.write_documents(docs)
results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"content": None})
document_store.write_documents(docs)
results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"content": None})
assert len(results) == 1
assert results[0].id == document.id
@pytest.mark.unit
def test_bm25_retrieval_with_documents_with_mixed_content(self, docstore: InMemoryDocumentStore):
def test_bm25_retrieval_with_documents_with_mixed_content(self, document_store: InMemoryDocumentStore):
double_document = Document(content="Gardening", embedding=[1.0, 2.0, 3.0])
docs = [Document(embedding=[1.0, 2.0, 3.0]), double_document, Document(content="Bird watching")]
docstore.write_documents(docs)
results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"embedding": {"$not": None}})
document_store.write_documents(docs)
results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"embedding": {"$not": None}})
assert len(results) == 1
assert results[0].id == double_document.id