mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-09-01 20:33:48 +00:00
refactor: Move InMemoryDocumentStore
tests to their own class (#3614)
* move tests to their own class * move more tests * add specific job * fix test * Update test/document_stores/test_memory.py Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai> Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
This commit is contained in:
parent
0e05f71f33
commit
a15af7f8c3
28
.github/workflows/tests.yml
vendored
28
.github/workflows/tests.yml
vendored
@ -428,6 +428,34 @@ jobs:
|
|||||||
channel: '#haystack'
|
channel: '#haystack'
|
||||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||||
|
|
||||||
|
integration-tests-memory:
|
||||||
|
name: Integration / memory / ${{ matrix.os }}
|
||||||
|
needs:
|
||||||
|
- unit-tests
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest,macos-latest,windows-latest]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Setup Python
|
||||||
|
uses: ./.github/actions/python_cache/
|
||||||
|
|
||||||
|
- name: Install Haystack
|
||||||
|
run: pip install -U .
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: |
|
||||||
|
pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_memory.py
|
||||||
|
|
||||||
|
- uses: act10ns/slack@v1
|
||||||
|
with:
|
||||||
|
status: ${{ job.status }}
|
||||||
|
channel: '#haystack'
|
||||||
|
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# TODO: the following steps need to be revisited
|
# TODO: the following steps need to be revisited
|
||||||
|
@ -1436,41 +1436,3 @@ def test_cosine_sanity_check(document_store_small):
|
|||||||
|
|
||||||
# check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
|
# check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
|
||||||
assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.0002)
|
assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.0002)
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_embeddings_diff_shapes():
|
|
||||||
VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32")
|
|
||||||
BaseDocumentStore.normalize_embedding(VEC_1)
|
|
||||||
assert np.linalg.norm(VEC_1) - 1 < 0.01
|
|
||||||
|
|
||||||
VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32").reshape(1, -1)
|
|
||||||
BaseDocumentStore.normalize_embedding(VEC_1)
|
|
||||||
assert np.linalg.norm(VEC_1) - 1 < 0.01
|
|
||||||
|
|
||||||
|
|
||||||
def test_memory_update_bm25():
|
|
||||||
ds = InMemoryDocumentStore(use_bm25=False)
|
|
||||||
ds.write_documents(DOCUMENTS)
|
|
||||||
ds.update_bm25()
|
|
||||||
bm25_representation = ds.bm25[ds.index]
|
|
||||||
assert isinstance(bm25_representation, BM25)
|
|
||||||
assert bm25_representation.corpus_size == ds.get_document_count()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
|
||||||
def test_memory_query(document_store_with_docs):
|
|
||||||
query_text = "Rome"
|
|
||||||
docs = document_store_with_docs.query(query=query_text, top_k=1)
|
|
||||||
assert len(docs) == 1
|
|
||||||
assert docs[0].content == "My name is Matteo and I live in Rome"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
|
||||||
def test_memory_query_batch(document_store_with_docs):
|
|
||||||
query_texts = ["Paris", "Madrid"]
|
|
||||||
docs = document_store_with_docs.query_batch(queries=query_texts, top_k=5)
|
|
||||||
assert len(docs) == 2
|
|
||||||
assert len(docs[0]) == 5
|
|
||||||
assert docs[0][0].content == "My name is Christelle and I live in Paris"
|
|
||||||
assert len(docs[1]) == 5
|
|
||||||
assert docs[1][0].content == "My name is Camila and I live in Madrid"
|
|
||||||
|
93
test/document_stores/test_memory.py
Normal file
93
test/document_stores/test_memory.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from rank_bm25 import BM25
|
||||||
|
|
||||||
|
from haystack.document_stores.memory import InMemoryDocumentStore
|
||||||
|
from haystack.schema import Document
|
||||||
|
|
||||||
|
from .test_base import DocumentStoreBaseTestAbstract
|
||||||
|
|
||||||
|
|
||||||
|
class TestInMemoryDocumentStore(DocumentStoreBaseTestAbstract):
|
||||||
|
@pytest.fixture
|
||||||
|
def ds(self):
|
||||||
|
return InMemoryDocumentStore(return_embedding=True, use_bm25=True)
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_delete_index(self, ds, documents):
|
||||||
|
"""Contrary to other Document Stores, this doesn't raise if the index is empty"""
|
||||||
|
ds.write_documents(documents, index="custom_index")
|
||||||
|
assert ds.get_document_count(index="custom_index") == len(documents)
|
||||||
|
ds.delete_index(index="custom_index")
|
||||||
|
assert ds.get_document_count(index="custom_index") == 0
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_ne_filters(self, ds, documents):
|
||||||
|
"""
|
||||||
|
InMemory doesn't include documents if the field is missing,
|
||||||
|
so we customize this test
|
||||||
|
"""
|
||||||
|
ds.write_documents(documents)
|
||||||
|
|
||||||
|
result = ds.get_all_documents(filters={"year": {"$ne": "2020"}})
|
||||||
|
assert len(result) == 3
|
||||||
|
|
||||||
|
@pytest.mark.skip
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_nin_filters(self, ds, documents):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@pytest.mark.skip
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_comparison_filters(self, ds, documents):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@pytest.mark.skip
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_nested_condition_filters(self, ds, documents):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@pytest.mark.skip
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_nested_condition_not_filters(self, ds, documents):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_get_documents_by_id(self, ds, documents):
|
||||||
|
"""
|
||||||
|
The base test uses the batch_size param that's not supported
|
||||||
|
here, so we override the test case
|
||||||
|
"""
|
||||||
|
ds.write_documents(documents)
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
result = {doc.id for doc in ds.get_documents_by_id(ids)}
|
||||||
|
assert set(ids) == result
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_update_bm25(self, documents):
|
||||||
|
ds = InMemoryDocumentStore(use_bm25=False)
|
||||||
|
ds.write_documents(documents)
|
||||||
|
ds.update_bm25()
|
||||||
|
bm25_representation = ds.bm25[ds.index]
|
||||||
|
assert isinstance(bm25_representation, BM25)
|
||||||
|
assert bm25_representation.corpus_size == ds.get_document_count()
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_memory_query(self, ds, documents):
|
||||||
|
ds.write_documents(documents)
|
||||||
|
query_text = "Bar"
|
||||||
|
docs = ds.query(query=query_text, top_k=1)
|
||||||
|
assert len(docs) == 1
|
||||||
|
assert "A Bar Document" in docs[0].content
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_memory_query_batch(self, ds, documents):
|
||||||
|
ds.write_documents(documents)
|
||||||
|
query_texts = ["Foo", "Bar"]
|
||||||
|
docs = ds.query_batch(queries=query_texts, top_k=5)
|
||||||
|
assert len(docs) == 2
|
||||||
|
assert len(docs[0]) == 5
|
||||||
|
assert "A Foo Document" in docs[0][0].content
|
||||||
|
assert len(docs[1]) == 5
|
||||||
|
assert "A Bar Document" in docs[1][0].content
|
Loading…
x
Reference in New Issue
Block a user