diff --git a/haystack/preview/testing/document_store.py b/haystack/preview/testing/document_store.py index c730e5531..a66c38094 100644 --- a/haystack/preview/testing/document_store.py +++ b/haystack/preview/testing/document_store.py @@ -20,126 +20,126 @@ class CountDocumentsTest: """ Utility class to test a Document Store `count_documents` method. - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(CountDocumentsTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_count_empty(self, docstore: DocumentStore): - assert docstore.count_documents() == 0 + def test_count_empty(self, document_store: DocumentStore): + assert document_store.count_documents() == 0 @pytest.mark.unit - def test_count_not_empty(self, docstore: DocumentStore): - docstore.write_documents( + def test_count_not_empty(self, document_store: DocumentStore): + document_store.write_documents( [Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")] ) - assert docstore.count_documents() == 3 + assert document_store.count_documents() == 3 class WriteDocumentsTest: """ Utility class to test a Document Store `write_documents` method. - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(WriteDocumentsTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_write(self, docstore: DocumentStore): + def test_write(self, document_store: DocumentStore): doc = Document(content="test doc") - docstore.write_documents([doc]) - assert docstore.filter_documents(filters={"id": doc.id}) == [doc] + document_store.write_documents([doc]) + assert document_store.filter_documents(filters={"id": doc.id}) == [doc] @pytest.mark.unit - def test_write_duplicate_fail(self, docstore: DocumentStore): + def test_write_duplicate_fail(self, document_store: DocumentStore): doc = Document(content="test doc") - docstore.write_documents([doc]) + document_store.write_documents([doc]) with pytest.raises(DuplicateDocumentError, match=f"ID '{doc.id}' already exists."): - docstore.write_documents(documents=[doc], policy=DuplicatePolicy.FAIL) - assert docstore.filter_documents(filters={"id": doc.id}) == [doc] + document_store.write_documents(documents=[doc], policy=DuplicatePolicy.FAIL) + assert document_store.filter_documents(filters={"id": doc.id}) == [doc] @pytest.mark.unit - def test_write_duplicate_skip(self, docstore: DocumentStore): + def test_write_duplicate_skip(self, document_store: DocumentStore): doc = Document(content="test doc") - docstore.write_documents([doc]) - docstore.write_documents(documents=[doc], policy=DuplicatePolicy.SKIP) - assert docstore.filter_documents(filters={"id": doc.id}) == [doc] + document_store.write_documents([doc]) + document_store.write_documents(documents=[doc], policy=DuplicatePolicy.SKIP) + assert document_store.filter_documents(filters={"id": doc.id}) == [doc] @pytest.mark.unit - def test_write_duplicate_overwrite(self, docstore: DocumentStore): + def test_write_duplicate_overwrite(self, document_store: DocumentStore): doc1 = Document(content="test doc 1") doc2 = Document(content="test doc 2") object.__setattr__(doc2, "id", doc1.id) # Make two docs with different content but same ID - docstore.write_documents([doc2]) - assert docstore.filter_documents(filters={"id": doc1.id}) == [doc2] - docstore.write_documents(documents=[doc1], policy=DuplicatePolicy.OVERWRITE) - assert docstore.filter_documents(filters={"id": doc1.id}) == [doc1] + document_store.write_documents([doc2]) + assert document_store.filter_documents(filters={"id": doc1.id}) == [doc2] + document_store.write_documents(documents=[doc1], policy=DuplicatePolicy.OVERWRITE) + assert document_store.filter_documents(filters={"id": doc1.id}) == [doc1] @pytest.mark.unit - def test_write_not_docs(self, docstore: DocumentStore): + def test_write_not_docs(self, document_store: DocumentStore): with pytest.raises(ValueError): - docstore.write_documents(["not a document for sure"]) # type: ignore + document_store.write_documents(["not a document for sure"]) # type: ignore @pytest.mark.unit - def test_write_not_list(self, docstore: DocumentStore): + def test_write_not_list(self, document_store: DocumentStore): with pytest.raises(ValueError): - docstore.write_documents("not a list actually") # type: ignore + document_store.write_documents("not a list actually") # type: ignore class DeleteDocumentsTest: """ Utility class to test a Document Store `delete_documents` method. - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(DeleteDocumentsTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_delete_empty(self, docstore: DocumentStore): + def test_delete_empty(self, document_store: DocumentStore): with pytest.raises(MissingDocumentError): - docstore.delete_documents(["test"]) + document_store.delete_documents(["test"]) @pytest.mark.unit - def test_delete_not_empty(self, docstore: DocumentStore): + def test_delete_not_empty(self, document_store: DocumentStore): doc = Document(content="test doc") - docstore.write_documents([doc]) + document_store.write_documents([doc]) - docstore.delete_documents([doc.id]) + document_store.delete_documents([doc.id]) with pytest.raises(Exception): - assert docstore.filter_documents(filters={"id": doc.id}) + assert document_store.filter_documents(filters={"id": doc.id}) @pytest.mark.unit - def test_delete_not_empty_nonexisting(self, docstore: DocumentStore): + def test_delete_not_empty_nonexisting(self, document_store: DocumentStore): doc = Document(content="test doc") - docstore.write_documents([doc]) + document_store.write_documents([doc]) with pytest.raises(MissingDocumentError): - docstore.delete_documents(["non_existing"]) + document_store.delete_documents(["non_existing"]) - assert docstore.filter_documents(filters={"id": doc.id}) == [doc] + assert document_store.filter_documents(filters={"id": doc.id}) == [doc] class FilterableDocsFixtureMixin: @@ -195,87 +195,87 @@ class LegacyFilterDocumentsInvalidFiltersTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using invalid legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsInvalidFiltersTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_incorrect_filter_type(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_incorrect_filter_type(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters="something odd") # type: ignore + document_store.filter_documents(filters="something odd") # type: ignore @pytest.mark.unit - def test_incorrect_filter_nesting(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_incorrect_filter_nesting(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"number": {"page": "100"}}) + document_store.filter_documents(filters={"number": {"page": "100"}}) @pytest.mark.unit - def test_deeper_incorrect_filter_nesting(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_deeper_incorrect_filter_nesting(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"number": {"page": {"chapter": "intro"}}}) + document_store.filter_documents(filters={"number": {"page": {"chapter": "intro"}}}) class LegacyFilterDocumentsEqualTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using implicit and explicit '$eq' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsEqualTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_filter_document_content(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"content": "A Foo Document 1"}) + def test_filter_document_content(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"content": "A Foo Document 1"}) assert result == [doc for doc in filterable_docs if doc.content == "A Foo Document 1"] @pytest.mark.unit - def test_filter_simple_metadata_value(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": "100"}) + def test_filter_simple_metadata_value(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": "100"}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"] @pytest.mark.unit - def test_filter_document_dataframe(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"dataframe": pd.DataFrame([1])}) + def test_filter_document_dataframe(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"dataframe": pd.DataFrame([1])}) assert result == [ doc for doc in filterable_docs if doc.dataframe is not None and doc.dataframe.equals(pd.DataFrame([1])) ] @pytest.mark.unit - def test_eq_filter_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": {"$eq": "100"}}) + def test_eq_filter_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": {"$eq": "100"}}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"] @pytest.mark.unit - def test_eq_filter_implicit(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": "100"}) + def test_eq_filter_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": "100"}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"] @pytest.mark.unit - def test_eq_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"dataframe": pd.DataFrame([1])}) + def test_eq_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"dataframe": pd.DataFrame([1])}) assert result == [ doc for doc in filterable_docs @@ -283,10 +283,10 @@ class LegacyFilterDocumentsEqualTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_eq_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_eq_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) embedding = [0.0] * 768 - result = docstore.filter_documents(filters={"embedding": embedding}) + result = document_store.filter_documents(filters={"embedding": embedding}) assert result == [doc for doc in filterable_docs if embedding == doc.embedding] @@ -294,27 +294,27 @@ class LegacyFilterDocumentsNotEqualTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using explicit '$ne' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsNotEqualTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_ne_filter(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": {"$ne": "100"}}) + def test_ne_filter(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": {"$ne": "100"}}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") != "100"] @pytest.mark.unit - def test_ne_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"dataframe": {"$ne": pd.DataFrame([1])}}) + def test_ne_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"dataframe": {"$ne": pd.DataFrame([1])}}) assert result == [ doc for doc in filterable_docs @@ -322,10 +322,10 @@ class LegacyFilterDocumentsNotEqualTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_ne_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_ne_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) embedding = np.zeros([768, 1]).astype(np.float32) - result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}}) + result = document_store.filter_documents(filters={"embedding": {"$ne": embedding}}) assert result == [ doc for doc in filterable_docs @@ -337,63 +337,63 @@ class LegacyFilterDocumentsInTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using implicit and explicit '$in' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsInTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_filter_simple_list_single_element(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": ["100"]}) + def test_filter_simple_list_single_element(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": ["100"]}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") == "100"] @pytest.mark.unit - def test_filter_simple_list_one_value(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": ["100"]}) + def test_filter_simple_list_one_value(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": ["100"]}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100"]] @pytest.mark.unit - def test_filter_simple_list(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": ["100", "123"]}) + def test_filter_simple_list(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": ["100", "123"]}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]] @pytest.mark.unit - def test_incorrect_filter_name(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"non_existing_meta_field": ["whatever"]}) + def test_incorrect_filter_name(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"non_existing_meta_field": ["whatever"]}) assert len(result) == 0 @pytest.mark.unit - def test_incorrect_filter_value(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": ["nope"]}) + def test_incorrect_filter_value(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": ["nope"]}) assert len(result) == 0 @pytest.mark.unit - def test_in_filter_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": {"$in": ["100", "123", "n.a."]}}) + def test_in_filter_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": {"$in": ["100", "123", "n.a."]}}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]] @pytest.mark.unit - def test_in_filter_implicit(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": ["100", "123", "n.a."]}) + def test_in_filter_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": ["100", "123", "n.a."]}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]] @pytest.mark.unit - def test_in_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"dataframe": {"$in": [pd.DataFrame([1]), pd.DataFrame([2])]}}) + def test_in_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"dataframe": {"$in": [pd.DataFrame([1]), pd.DataFrame([2])]}}) assert result == [ doc for doc in filterable_docs @@ -402,11 +402,11 @@ class LegacyFilterDocumentsInTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_in_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_in_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) embedding_zero = [0.0] * 768 embedding_one = [1.0] * 768 - result = docstore.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}}) + result = document_store.filter_documents(filters={"embedding": {"$in": [embedding_zero, embedding_one]}}) assert result == [ doc for doc in filterable_docs if (embedding_zero == doc.embedding or embedding_one == doc.embedding) ] @@ -416,21 +416,23 @@ class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using explicit '$nin' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsNotInTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_nin_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"dataframe": {"$nin": [pd.DataFrame([1]), pd.DataFrame([0])]}}) + def test_nin_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents( + filters={"dataframe": {"$nin": [pd.DataFrame([1]), pd.DataFrame([0])]}} + ) assert result == [ doc for doc in filterable_docs @@ -439,11 +441,11 @@ class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_nin_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_nin_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) embedding_zeros = np.zeros([768, 1]).astype(np.float32) embedding_ones = np.zeros([768, 1]).astype(np.float32) - result = docstore.filter_documents(filters={"embedding": {"$nin": [embedding_ones, embedding_zeros]}}) + result = document_store.filter_documents(filters={"embedding": {"$nin": [embedding_ones, embedding_zeros]}}) assert result == [ doc for doc in filterable_docs @@ -451,9 +453,9 @@ class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_nin_filter(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"page": {"$nin": ["100", "123", "n.a."]}}) + def test_nin_filter(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"page": {"$nin": ["100", "123", "n.a."]}}) assert result == [doc for doc in filterable_docs if doc.meta.get("page") not in ["100", "123"]] @@ -461,186 +463,186 @@ class LegacyFilterDocumentsGreaterThanTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using explicit '$gt' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsGreaterThanTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_gt_filter(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"number": {"$gt": 0.0}}) + def test_gt_filter(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"number": {"$gt": 0.0}}) assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] > 0] @pytest.mark.unit - def test_gt_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_gt_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"page": {"$gt": "100"}}) + document_store.filter_documents(filters={"page": {"$gt": "100"}}) @pytest.mark.unit - def test_gt_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_gt_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"dataframe": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) + document_store.filter_documents(filters={"dataframe": {"$gt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) @pytest.mark.unit - def test_gt_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_gt_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) embedding_zeros = np.zeros([768, 1]).astype(np.float32) with pytest.raises(FilterError): - docstore.filter_documents(filters={"embedding": {"$gt": embedding_zeros}}) + document_store.filter_documents(filters={"embedding": {"$gt": embedding_zeros}}) class LegacyFilterDocumentsGreaterThanEqualTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using explicit '$gte' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsGreaterThanEqualTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_gte_filter(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"number": {"$gte": -2}}) + def test_gte_filter(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"number": {"$gte": -2}}) assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] >= -2] @pytest.mark.unit - def test_gte_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_gte_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"page": {"$gte": "100"}}) + document_store.filter_documents(filters={"page": {"$gte": "100"}}) @pytest.mark.unit - def test_gte_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_gte_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"dataframe": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) + document_store.filter_documents(filters={"dataframe": {"$gte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) @pytest.mark.unit - def test_gte_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_gte_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) embedding_zeros = np.zeros([768, 1]).astype(np.float32) with pytest.raises(FilterError): - docstore.filter_documents(filters={"embedding": {"$gte": embedding_zeros}}) + document_store.filter_documents(filters={"embedding": {"$gte": embedding_zeros}}) class LegacyFilterDocumentsLessThanTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using explicit '$lt' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsLessThanTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_lt_filter(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"number": {"$lt": 0.0}}) + def test_lt_filter(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"number": {"$lt": 0.0}}) assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] < 0] @pytest.mark.unit - def test_lt_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_lt_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"page": {"$lt": "100"}}) + document_store.filter_documents(filters={"page": {"$lt": "100"}}) @pytest.mark.unit - def test_lt_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_lt_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"dataframe": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) + document_store.filter_documents(filters={"dataframe": {"$lt": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) @pytest.mark.unit - def test_lt_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_lt_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) embedding_ones = np.ones([768, 1]).astype(np.float32) with pytest.raises(FilterError): - docstore.filter_documents(filters={"embedding": {"$lt": embedding_ones}}) + document_store.filter_documents(filters={"embedding": {"$lt": embedding_ones}}) class LegacyFilterDocumentsLessThanEqualTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using explicit '$lte' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsLessThanEqualTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_lte_filter(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"number": {"$lte": 2.0}}) + def test_lte_filter(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"number": {"$lte": 2.0}}) assert result == [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] <= 2.0] @pytest.mark.unit - def test_lte_filter_non_numeric(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_lte_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"page": {"$lte": "100"}}) + document_store.filter_documents(filters={"page": {"$lte": "100"}}) @pytest.mark.unit - def test_lte_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_lte_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) with pytest.raises(FilterError): - docstore.filter_documents(filters={"dataframe": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) + document_store.filter_documents(filters={"dataframe": {"$lte": pd.DataFrame([[1, 2, 3], [-1, -2, -3]])}}) @pytest.mark.unit - def test_lte_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_lte_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) embedding_ones = np.ones([768, 1]).astype(np.float32) with pytest.raises(FilterError): - docstore.filter_documents(filters={"embedding": {"$lte": embedding_ones}}) + document_store.filter_documents(filters={"embedding": {"$lte": embedding_ones}}) class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using logical '$and', '$or' and '$not' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsSimpleLogicalTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_filter_simple_or(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_filter_simple_or(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) filters = {"$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}}} - result = docstore.filter_documents(filters=filters) + result = document_store.filter_documents(filters=filters) assert result == [ doc for doc in filterable_docs @@ -649,10 +651,10 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin): @pytest.mark.unit def test_filter_simple_implicit_and_with_multi_key_dict( - self, docstore: DocumentStore, filterable_docs: List[Document] + self, document_store: DocumentStore, filterable_docs: List[Document] ): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0.0}}) + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0.0}}) assert result == [ doc for doc in filterable_docs @@ -661,16 +663,16 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin): @pytest.mark.unit def test_filter_simple_explicit_and_with_multikey_dict( - self, docstore: DocumentStore, filterable_docs: List[Document] + self, document_store: DocumentStore, filterable_docs: List[Document] ): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"number": {"$and": {"$gte": 0, "$lte": 2}}}) + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"number": {"$and": {"$gte": 0, "$lte": 2}}}) assert result == [doc for doc in filterable_docs if "number" in doc.meta and 0 <= doc.meta["number"] <= 2] @pytest.mark.unit - def test_filter_simple_explicit_and_with_list(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}}) + def test_filter_simple_explicit_and_with_list(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}}) assert result == [ doc for doc in filterable_docs @@ -678,9 +680,9 @@ class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_filter_simple_implicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) - result = docstore.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0}}) + def test_filter_simple_implicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) + result = document_store.filter_documents(filters={"number": {"$lte": 2.0, "$gte": 0}}) assert result == [ doc for doc in filterable_docs @@ -692,22 +694,22 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin): """ Utility class to test a Document Store `filter_documents` method using multiple nested logical '$and', '$or' and '$not' legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsNestedLogicalTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_filter_nested_explicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_filter_nested_explicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) filters = {"$and": {"number": {"$and": {"$lte": 2, "$gte": 0}}, "name": {"$in": ["name_0", "name_1"]}}} - result = docstore.filter_documents(filters=filters) + result = document_store.filter_documents(filters=filters) assert result == [ doc for doc in filterable_docs @@ -720,10 +722,10 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_filter_nested_implicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_filter_nested_implicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) filters_simplified = {"number": {"$lte": 2, "$gte": 0}, "name": ["name_0", "name_1"]} - result = docstore.filter_documents(filters=filters_simplified) + result = document_store.filter_documents(filters=filters_simplified) assert result == [ doc for doc in filterable_docs @@ -736,10 +738,10 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_filter_nested_or(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_filter_nested_or(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) filters = {"$or": {"name": {"$or": [{"$eq": "name_0"}, {"$eq": "name_1"}]}, "number": {"$lt": 1.0}}} - result = docstore.filter_documents(filters=filters) + result = document_store.filter_documents(filters=filters) assert result == [ doc for doc in filterable_docs @@ -747,12 +749,12 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_filter_nested_and_or_explicit(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_filter_nested_and_or_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) filters_simplified = { "$and": {"page": {"$eq": "123"}, "$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}}} } - result = docstore.filter_documents(filters=filters_simplified) + result = document_store.filter_documents(filters=filters_simplified) assert result == [ doc for doc in filterable_docs @@ -763,13 +765,13 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_filter_nested_and_or_implicit(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_filter_nested_and_or_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) filters_simplified = { "page": {"$eq": "123"}, "$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}}, } - result = docstore.filter_documents(filters=filters_simplified) + result = document_store.filter_documents(filters=filters_simplified) assert result == [ doc for doc in filterable_docs @@ -780,15 +782,15 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin): ] @pytest.mark.unit - def test_filter_nested_or_and(self, docstore: DocumentStore, filterable_docs: List[Document]): - docstore.write_documents(filterable_docs) + def test_filter_nested_or_and(self, document_store: DocumentStore, filterable_docs: List[Document]): + document_store.write_documents(filterable_docs) filters_simplified = { "$or": { "number": {"$lt": 1}, "$and": {"name": {"$in": ["name_0", "name_1"]}, "$not": {"chapter": {"$eq": "intro"}}}, } } - result = docstore.filter_documents(filters=filters_simplified) + result = document_store.filter_documents(filters=filters_simplified) assert result == [ doc for doc in filterable_docs @@ -803,16 +805,16 @@ class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin): @pytest.mark.unit def test_filter_nested_multiple_identical_operators_same_level( - self, docstore: DocumentStore, filterable_docs: List[Document] + self, document_store: DocumentStore, filterable_docs: List[Document] ): - docstore.write_documents(filterable_docs) + document_store.write_documents(filterable_docs) filters = { "$or": [ {"$and": {"name": {"$in": ["name_0", "name_1"]}, "page": "100"}}, {"$and": {"chapter": {"$in": ["intro", "abstract"]}, "page": "123"}}, ] } - result = docstore.filter_documents(filters=filters) + result = document_store.filter_documents(filters=filters) assert result == [ doc for doc in filterable_docs @@ -839,33 +841,33 @@ class LegacyFilterDocumentsTest( # pylint: disable=too-many-ancestors """ Utility class to test a Document Store `filter_documents` method using different types of legacy filters - To use it create a custom test class and override the `docstore` fixture to return your Document Store. + To use it create a custom test class and override the `document_store` fixture to return your Document Store. Example usage: ```python class MyDocumentStoreTest(LegacyFilterDocumentsTest): @pytest.fixture - def docstore(self): + def document_store(self): return MyDocumentStore() ``` """ @pytest.mark.unit - def test_no_filter_empty(self, docstore: DocumentStore): - assert docstore.filter_documents() == [] - assert docstore.filter_documents(filters={}) == [] + def test_no_filter_empty(self, document_store: DocumentStore): + assert document_store.filter_documents() == [] + assert document_store.filter_documents(filters={}) == [] @pytest.mark.unit - def test_no_filter_not_empty(self, docstore: DocumentStore): + def test_no_filter_not_empty(self, document_store: DocumentStore): docs = [Document(content="test doc")] - docstore.write_documents(docs) - assert docstore.filter_documents() == docs - assert docstore.filter_documents(filters={}) == docs + document_store.write_documents(docs) + assert document_store.filter_documents() == docs + assert document_store.filter_documents(filters={}) == docs class DocumentStoreBaseTests( CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, LegacyFilterDocumentsTest ): # pylint: disable=too-many-ancestors @pytest.fixture - def docstore(self) -> DocumentStore: + def document_store(self) -> DocumentStore: raise NotImplementedError() diff --git a/test/preview/document_stores/test_in_memory.py b/test/preview/document_stores/test_in_memory.py index edb949868..65d148c12 100644 --- a/test/preview/document_stores/test_in_memory.py +++ b/test/preview/document_stores/test_in_memory.py @@ -17,7 +17,7 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904 """ @pytest.fixture - def docstore(self) -> InMemoryDocumentStore: + def document_store(self) -> InMemoryDocumentStore: return InMemoryDocumentStore() @pytest.mark.unit @@ -71,45 +71,45 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904 assert store.bm25_parameters == {"key": "value"} @pytest.mark.unit - def test_written_documents_count(self, docstore: InMemoryDocumentStore): + def test_written_documents_count(self, document_store: InMemoryDocumentStore): # FIXME Remove after the document store base tests have been rewritten documents = [Document(content=f"Hello world #{i}") for i in range(10)] - docs_written = docstore.write_documents(documents[0:2]) + docs_written = document_store.write_documents(documents[0:2]) assert docs_written == 2 - assert docstore.filter_documents() == documents[0:2] + assert document_store.filter_documents() == documents[0:2] - docs_written = docstore.write_documents(documents, DuplicatePolicy.SKIP) + docs_written = document_store.write_documents(documents, DuplicatePolicy.SKIP) assert docs_written == len(documents) - 2 - assert docstore.filter_documents() == documents + assert document_store.filter_documents() == documents @pytest.mark.unit - def test_bm25_retrieval(self, docstore: InMemoryDocumentStore): - docstore = InMemoryDocumentStore() + def test_bm25_retrieval(self, document_store: InMemoryDocumentStore): + document_store = InMemoryDocumentStore() # Tests if the bm25_retrieval method returns the correct document based on the input query. docs = [Document(content="Hello world"), Document(content="Haystack supports multiple languages")] - docstore.write_documents(docs) - results = docstore.bm25_retrieval(query="What languages?", top_k=1) + document_store.write_documents(docs) + results = document_store.bm25_retrieval(query="What languages?", top_k=1) assert len(results) == 1 assert results[0].content == "Haystack supports multiple languages" @pytest.mark.unit - def test_bm25_retrieval_with_empty_document_store(self, docstore: InMemoryDocumentStore, caplog): + def test_bm25_retrieval_with_empty_document_store(self, document_store: InMemoryDocumentStore, caplog): caplog.set_level(logging.INFO) # Tests if the bm25_retrieval method correctly returns an empty list when there are no documents in the DocumentStore. - results = docstore.bm25_retrieval(query="How to test this?", top_k=2) + results = document_store.bm25_retrieval(query="How to test this?", top_k=2) assert len(results) == 0 assert "No documents found for BM25 retrieval. Returning empty list." in caplog.text @pytest.mark.unit - def test_bm25_retrieval_empty_query(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_empty_query(self, document_store: InMemoryDocumentStore): # Tests if the bm25_retrieval method returns a document when the query is an empty string. docs = [Document(content="Hello world"), Document(content="Haystack supports multiple languages")] - docstore.write_documents(docs) + document_store.write_documents(docs) with pytest.raises(ValueError, match="Query should be a non-empty string"): - docstore.bm25_retrieval(query="", top_k=1) + document_store.bm25_retrieval(query="", top_k=1) @pytest.mark.unit - def test_bm25_retrieval_with_different_top_k(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_with_different_top_k(self, document_store: InMemoryDocumentStore): # Tests if the bm25_retrieval method correctly changes the number of returned documents # based on the top_k parameter. docs = [ @@ -117,19 +117,19 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904 Document(content="Haystack supports multiple languages"), Document(content="Python is a popular programming language"), ] - docstore.write_documents(docs) + document_store.write_documents(docs) # top_k = 2 - results = docstore.bm25_retrieval(query="languages", top_k=2) + results = document_store.bm25_retrieval(query="languages", top_k=2) assert len(results) == 2 # top_k = 3 - results = docstore.bm25_retrieval(query="languages", top_k=3) + results = document_store.bm25_retrieval(query="languages", top_k=3) assert len(results) == 3 # Test two queries and make sure the results are different @pytest.mark.unit - def test_bm25_retrieval_with_two_queries(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_with_two_queries(self, document_store: InMemoryDocumentStore): # Tests if the bm25_retrieval method returns different documents for different queries. docs = [ Document(content="Javascript is a popular programming language"), @@ -138,60 +138,60 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904 Document(content="Ruby is a popular programming language"), Document(content="PHP is a popular programming language"), ] - docstore.write_documents(docs) + document_store.write_documents(docs) - results = docstore.bm25_retrieval(query="Java", top_k=1) + results = document_store.bm25_retrieval(query="Java", top_k=1) assert results[0].content == "Java is a popular programming language" - results = docstore.bm25_retrieval(query="Python", top_k=1) + results = document_store.bm25_retrieval(query="Python", top_k=1) assert results[0].content == "Python is a popular programming language" @pytest.mark.skip(reason="Filter is not working properly, see https://github.com/deepset-ai/haystack/issues/6153") - def test_eq_filter_embedding(self, docstore: InMemoryDocumentStore, filterable_docs): + def test_eq_filter_embedding(self, document_store: InMemoryDocumentStore, filterable_docs): pass # Test a query, add a new document and make sure results are appropriately updated @pytest.mark.unit - def test_bm25_retrieval_with_updated_docs(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_with_updated_docs(self, document_store: InMemoryDocumentStore): # Tests if the bm25_retrieval method correctly updates the retrieved documents when new # documents are added to the DocumentStore. docs = [Document(content="Hello world")] - docstore.write_documents(docs) + document_store.write_documents(docs) - results = docstore.bm25_retrieval(query="Python", top_k=1) + results = document_store.bm25_retrieval(query="Python", top_k=1) assert len(results) == 1 - docstore.write_documents([Document(content="Python is a popular programming language")]) - results = docstore.bm25_retrieval(query="Python", top_k=1) + document_store.write_documents([Document(content="Python is a popular programming language")]) + results = document_store.bm25_retrieval(query="Python", top_k=1) assert len(results) == 1 assert results[0].content == "Python is a popular programming language" - docstore.write_documents([Document(content="Java is a popular programming language")]) - results = docstore.bm25_retrieval(query="Python", top_k=1) + document_store.write_documents([Document(content="Java is a popular programming language")]) + results = document_store.bm25_retrieval(query="Python", top_k=1) assert len(results) == 1 assert results[0].content == "Python is a popular programming language" @pytest.mark.unit - def test_bm25_retrieval_with_scale_score(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_with_scale_score(self, document_store: InMemoryDocumentStore): docs = [Document(content="Python programming"), Document(content="Java programming")] - docstore.write_documents(docs) + document_store.write_documents(docs) - results1 = docstore.bm25_retrieval(query="Python", top_k=1, scale_score=True) + results1 = document_store.bm25_retrieval(query="Python", top_k=1, scale_score=True) # Confirm that score is scaled between 0 and 1 assert results1[0].score is not None assert 0.0 <= results1[0].score <= 1.0 # Same query, different scale, scores differ when not scaled - results = docstore.bm25_retrieval(query="Python", top_k=1, scale_score=False) + results = document_store.bm25_retrieval(query="Python", top_k=1, scale_score=False) assert results[0].score != results1[0].score @pytest.mark.unit - def test_bm25_retrieval_with_table_content(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_with_table_content(self, document_store: InMemoryDocumentStore): # Tests if the bm25_retrieval method correctly returns a dataframe when the content_type is table. table_content = pd.DataFrame({"language": ["Python", "Java"], "use": ["Data Science", "Web Development"]}) docs = [Document(dataframe=table_content), Document(content="Gardening"), Document(content="Bird watching")] - docstore.write_documents(docs) - results = docstore.bm25_retrieval(query="Java", top_k=1) + document_store.write_documents(docs) + results = document_store.bm25_retrieval(query="Java", top_k=1) assert len(results) == 1 df = results[0].dataframe @@ -199,7 +199,7 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904 assert df.equals(table_content) @pytest.mark.unit - def test_bm25_retrieval_with_text_and_table_content(self, docstore: InMemoryDocumentStore, caplog): + def test_bm25_retrieval_with_text_and_table_content(self, document_store: InMemoryDocumentStore, caplog): table_content = pd.DataFrame({"language": ["Python", "Java"], "use": ["Data Science", "Web Development"]}) document = Document(content="Gardening", dataframe=table_content) docs = [ @@ -209,51 +209,51 @@ class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904 Document(content="Gardening"), Document(content="Java"), ] - docstore.write_documents(docs) - results = docstore.bm25_retrieval(query="Gardening", top_k=2) + document_store.write_documents(docs) + results = document_store.bm25_retrieval(query="Gardening", top_k=2) assert document.id in [d.id for d in results] assert "both text and dataframe content" in caplog.text - results = docstore.bm25_retrieval(query="Python", top_k=2) + results = document_store.bm25_retrieval(query="Python", top_k=2) assert document.id not in [d.id for d in results] @pytest.mark.unit - def test_bm25_retrieval_default_filter_for_text_and_dataframes(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_default_filter_for_text_and_dataframes(self, document_store: InMemoryDocumentStore): docs = [Document(), Document(content="Gardening"), Document(content="Bird watching")] - docstore.write_documents(docs) - results = docstore.bm25_retrieval(query="doesn't matter, top_k is 10", top_k=10) + document_store.write_documents(docs) + results = document_store.bm25_retrieval(query="doesn't matter, top_k is 10", top_k=10) assert len(results) == 2 @pytest.mark.unit - def test_bm25_retrieval_with_filters(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_with_filters(self, document_store: InMemoryDocumentStore): selected_document = Document(content="Gardening", meta={"selected": True}) docs = [Document(), selected_document, Document(content="Bird watching")] - docstore.write_documents(docs) - results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"selected": True}) + document_store.write_documents(docs) + results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"selected": True}) assert len(results) == 1 assert results[0].id == selected_document.id @pytest.mark.unit - def test_bm25_retrieval_with_filters_keeps_default_filters(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_with_filters_keeps_default_filters(self, document_store: InMemoryDocumentStore): docs = [Document(meta={"selected": True}), Document(content="Gardening"), Document(content="Bird watching")] - docstore.write_documents(docs) - results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"selected": True}) + document_store.write_documents(docs) + results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"selected": True}) assert len(results) == 0 @pytest.mark.unit - def test_bm25_retrieval_with_filters_on_text_or_dataframe(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_with_filters_on_text_or_dataframe(self, document_store: InMemoryDocumentStore): document = Document(dataframe=pd.DataFrame({"language": ["Python", "Java"], "use": ["Data Science", "Web"]})) docs = [Document(), Document(content="Gardening"), Document(content="Bird watching"), document] - docstore.write_documents(docs) - results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"content": None}) + document_store.write_documents(docs) + results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"content": None}) assert len(results) == 1 assert results[0].id == document.id @pytest.mark.unit - def test_bm25_retrieval_with_documents_with_mixed_content(self, docstore: InMemoryDocumentStore): + def test_bm25_retrieval_with_documents_with_mixed_content(self, document_store: InMemoryDocumentStore): double_document = Document(content="Gardening", embedding=[1.0, 2.0, 3.0]) docs = [Document(embedding=[1.0, 2.0, 3.0]), double_document, Document(content="Bird watching")] - docstore.write_documents(docs) - results = docstore.bm25_retrieval(query="Java", top_k=10, filters={"embedding": {"$not": None}}) + document_store.write_documents(docs) + results = document_store.bm25_retrieval(query="Java", top_k=10, filters={"embedding": {"$not": None}}) assert len(results) == 1 assert results[0].id == double_document.id