mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-31 03:46:30 +00:00
refactor: Move InMemoryDocumentStore
tests to their own class (#3614)
* move tests to their own class * move more tests * add specific job * fix test * Update test/document_stores/test_memory.py Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai> Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
This commit is contained in:
parent
0e05f71f33
commit
a15af7f8c3
28
.github/workflows/tests.yml
vendored
28
.github/workflows/tests.yml
vendored
@ -428,6 +428,34 @@ jobs:
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
integration-tests-memory:
|
||||
name: Integration / memory / ${{ matrix.os }}
|
||||
needs:
|
||||
- unit-tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest,macos-latest,windows-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Python
|
||||
uses: ./.github/actions/python_cache/
|
||||
|
||||
- name: Install Haystack
|
||||
run: pip install -U .
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_memory.py
|
||||
|
||||
- uses: act10ns/slack@v1
|
||||
with:
|
||||
status: ${{ job.status }}
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
|
||||
#
|
||||
# TODO: the following steps need to be revisited
|
||||
|
@ -1436,41 +1436,3 @@ def test_cosine_sanity_check(document_store_small):
|
||||
|
||||
# check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
|
||||
assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.0002)
|
||||
|
||||
|
||||
def test_normalize_embeddings_diff_shapes():
|
||||
VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32")
|
||||
BaseDocumentStore.normalize_embedding(VEC_1)
|
||||
assert np.linalg.norm(VEC_1) - 1 < 0.01
|
||||
|
||||
VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32").reshape(1, -1)
|
||||
BaseDocumentStore.normalize_embedding(VEC_1)
|
||||
assert np.linalg.norm(VEC_1) - 1 < 0.01
|
||||
|
||||
|
||||
def test_memory_update_bm25():
|
||||
ds = InMemoryDocumentStore(use_bm25=False)
|
||||
ds.write_documents(DOCUMENTS)
|
||||
ds.update_bm25()
|
||||
bm25_representation = ds.bm25[ds.index]
|
||||
assert isinstance(bm25_representation, BM25)
|
||||
assert bm25_representation.corpus_size == ds.get_document_count()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||
def test_memory_query(document_store_with_docs):
|
||||
query_text = "Rome"
|
||||
docs = document_store_with_docs.query(query=query_text, top_k=1)
|
||||
assert len(docs) == 1
|
||||
assert docs[0].content == "My name is Matteo and I live in Rome"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||
def test_memory_query_batch(document_store_with_docs):
|
||||
query_texts = ["Paris", "Madrid"]
|
||||
docs = document_store_with_docs.query_batch(queries=query_texts, top_k=5)
|
||||
assert len(docs) == 2
|
||||
assert len(docs[0]) == 5
|
||||
assert docs[0][0].content == "My name is Christelle and I live in Paris"
|
||||
assert len(docs[1]) == 5
|
||||
assert docs[1][0].content == "My name is Camila and I live in Madrid"
|
||||
|
93
test/document_stores/test_memory.py
Normal file
93
test/document_stores/test_memory.py
Normal file
@ -0,0 +1,93 @@
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
from rank_bm25 import BM25
|
||||
|
||||
from haystack.document_stores.memory import InMemoryDocumentStore
|
||||
from haystack.schema import Document
|
||||
|
||||
from .test_base import DocumentStoreBaseTestAbstract
|
||||
|
||||
|
||||
class TestInMemoryDocumentStore(DocumentStoreBaseTestAbstract):
|
||||
@pytest.fixture
|
||||
def ds(self):
|
||||
return InMemoryDocumentStore(return_embedding=True, use_bm25=True)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_index(self, ds, documents):
|
||||
"""Contrary to other Document Stores, this doesn't raise if the index is empty"""
|
||||
ds.write_documents(documents, index="custom_index")
|
||||
assert ds.get_document_count(index="custom_index") == len(documents)
|
||||
ds.delete_index(index="custom_index")
|
||||
assert ds.get_document_count(index="custom_index") == 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_ne_filters(self, ds, documents):
|
||||
"""
|
||||
InMemory doesn't include documents if the field is missing,
|
||||
so we customize this test
|
||||
"""
|
||||
ds.write_documents(documents)
|
||||
|
||||
result = ds.get_all_documents(filters={"year": {"$ne": "2020"}})
|
||||
assert len(result) == 3
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.integration
|
||||
def test_nin_filters(self, ds, documents):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.integration
|
||||
def test_comparison_filters(self, ds, documents):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.integration
|
||||
def test_nested_condition_filters(self, ds, documents):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.integration
|
||||
def test_nested_condition_not_filters(self, ds, documents):
|
||||
pass
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_documents_by_id(self, ds, documents):
|
||||
"""
|
||||
The base test uses the batch_size param that's not supported
|
||||
here, so we override the test case
|
||||
"""
|
||||
ds.write_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
result = {doc.id for doc in ds.get_documents_by_id(ids)}
|
||||
assert set(ids) == result
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_update_bm25(self, documents):
|
||||
ds = InMemoryDocumentStore(use_bm25=False)
|
||||
ds.write_documents(documents)
|
||||
ds.update_bm25()
|
||||
bm25_representation = ds.bm25[ds.index]
|
||||
assert isinstance(bm25_representation, BM25)
|
||||
assert bm25_representation.corpus_size == ds.get_document_count()
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_memory_query(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
query_text = "Bar"
|
||||
docs = ds.query(query=query_text, top_k=1)
|
||||
assert len(docs) == 1
|
||||
assert "A Bar Document" in docs[0].content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_memory_query_batch(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
query_texts = ["Foo", "Bar"]
|
||||
docs = ds.query_batch(queries=query_texts, top_k=5)
|
||||
assert len(docs) == 2
|
||||
assert len(docs[0]) == 5
|
||||
assert "A Foo Document" in docs[0][0].content
|
||||
assert len(docs[1]) == 5
|
||||
assert "A Bar Document" in docs[1][0].content
|
Loading…
x
Reference in New Issue
Block a user