mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-03 03:09:28 +00:00
Document Store test refactoring (#3449)
* add new marker * start using test hierarchies * move ES tests into their own class * refactor test workflow * job steps * add more tests * move more tests * more tests * test labels * add more tests * Update tests.yml * Update tests.yml * fix * typo * fix es image tag * map es ports * try * fix * default port * remove opensearch from the markers sorcery * revert * skip new tests in old jobs * skip opensearch_faiss
This commit is contained in:
parent
85cdc1040a
commit
b694c7b5cb
212
.github/workflows/tests.yml
vendored
212
.github/workflows/tests.yml
vendored
@ -92,17 +92,22 @@ jobs:
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
unit-tests:
|
||||
name: Unit / ${{ matrix.os }}
|
||||
name: Unit / ${{ matrix.topic }} / ${{ matrix.os }}
|
||||
needs:
|
||||
- mypy
|
||||
- pylint
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
os:
|
||||
- ubuntu-latest
|
||||
- windows-latest
|
||||
- macos-latest
|
||||
topic:
|
||||
- document_stores
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Python
|
||||
uses: ./.github/actions/python_cache/
|
||||
@ -111,7 +116,7 @@ jobs:
|
||||
run: pip install .[all]
|
||||
|
||||
- name: Run
|
||||
run: pytest -m "unit" test/
|
||||
run: pytest -m "unit" test/${{ matrix.topic }}
|
||||
|
||||
- uses: act10ns/slack@v1
|
||||
with:
|
||||
@ -119,6 +124,86 @@ jobs:
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
integration-tests-elasticsearch:
|
||||
name: Integration / Elasticsearch / ${{ matrix.os }}
|
||||
needs:
|
||||
- unit-tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
services:
|
||||
elasticsearch:
|
||||
image: elasticsearch:7.17.6
|
||||
env:
|
||||
discovery.type: "single-node"
|
||||
ES_JAVA_OPTS: "-Xms128m -Xmx256m"
|
||||
ports:
|
||||
- 9200:9200
|
||||
# env:
|
||||
# ELASTICSEARCH_HOST: "elasticsearch"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Python
|
||||
uses: ./.github/actions/python_cache/
|
||||
|
||||
- name: Install Haystack
|
||||
run: pip install -U .[docstores]
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest -x -m "document_store and integration" test/document_stores/test_elasticsearch.py
|
||||
|
||||
- uses: act10ns/slack@v1
|
||||
with:
|
||||
status: ${{ job.status }}
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
integration-tests-opensearch:
|
||||
name: Integration / Opensearch / ${{ matrix.os }}
|
||||
needs:
|
||||
- unit-tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
services:
|
||||
opensearch:
|
||||
image: opensearchproject/opensearch:1.3.5
|
||||
env:
|
||||
discovery.type: "single-node"
|
||||
ES_JAVA_OPTS: "-Xms128m -Xmx256m"
|
||||
ports:
|
||||
- 9200:9200
|
||||
# env:
|
||||
# OPENSEARCH_HOST: "opensearch"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Python
|
||||
uses: ./.github/actions/python_cache/
|
||||
|
||||
- name: Install Haystack
|
||||
run: pip install -U .[docstores]
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest -x -m "document_store and integration" test/document_stores/test_opensearch.py
|
||||
|
||||
- uses: act10ns/slack@v1
|
||||
with:
|
||||
status: ${{ job.status }}
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
#
|
||||
# TODO: the following steps need to be revisited
|
||||
#
|
||||
|
||||
unit-tests-linux:
|
||||
needs:
|
||||
- mypy
|
||||
@ -216,117 +301,6 @@ jobs:
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
elasticsearch-tests-linux:
|
||||
needs:
|
||||
- mypy
|
||||
- pylint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Setup Elasticsearch
|
||||
run: |
|
||||
docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
|
||||
|
||||
# TODO Let's try to remove this one from the unit tests
|
||||
- name: Install pdftotext
|
||||
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
|
||||
|
||||
- name: Setup Python
|
||||
uses: ./.github/actions/python_cache/
|
||||
|
||||
- name: Install Haystack
|
||||
run: pip install .
|
||||
|
||||
- name: Run tests
|
||||
env:
|
||||
TOKENIZERS_PARALLELISM: 'false'
|
||||
run: |
|
||||
pytest ${{ env.PYTEST_PARAMS }} -m "elasticsearch and not integration" test/document_stores/ --document_store_type=elasticsearch
|
||||
|
||||
- name: Dump docker logs on failure
|
||||
if: failure()
|
||||
uses: jwalton/gh-docker-logs@v1
|
||||
|
||||
- uses: act10ns/slack@v1
|
||||
with:
|
||||
status: ${{ job.status }}
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
elasticsearch-tests-windows:
|
||||
needs:
|
||||
- mypy
|
||||
- pylint
|
||||
runs-on: windows-latest
|
||||
if: contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
choco install --no-progress xpdf-utils
|
||||
choco install --no-progress openjdk --version=11.0.2.01
|
||||
refreshenv
|
||||
choco install --no-progress elasticsearch --version=7.9.2
|
||||
refreshenv
|
||||
Get-Service elasticsearch-service-x64 | Start-Service
|
||||
|
||||
- name: Setup Python
|
||||
uses: ./.github/actions/python_cache/
|
||||
with:
|
||||
prefix: windows
|
||||
|
||||
- name: Run tests
|
||||
env:
|
||||
TOKENIZERS_PARALLELISM: 'false'
|
||||
run: |
|
||||
pytest ${{ env.PYTEST_PARAMS }} -m "elasticsearch and not integration" test/document_stores/ ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} --document_store_type=elasticsearch
|
||||
|
||||
- uses: act10ns/slack@v1
|
||||
with:
|
||||
status: ${{ job.status }}
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
opensearch-tests-linux:
|
||||
needs:
|
||||
- mypy
|
||||
- pylint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Setup Opensearch
|
||||
run: |
|
||||
docker run -d -p 9201:9200 -p 9600:9600 -e "discovery.type=single-node" opensearchproject/opensearch:1.3.5
|
||||
|
||||
# TODO Let's try to remove this one from the unit tests
|
||||
- name: Install pdftotext
|
||||
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
|
||||
|
||||
- name: Setup Python
|
||||
uses: ./.github/actions/python_cache/
|
||||
|
||||
- name: Install Haystack
|
||||
run: pip install .
|
||||
|
||||
- name: Run tests
|
||||
env:
|
||||
TOKENIZERS_PARALLELISM: 'false'
|
||||
run: |
|
||||
pytest ${{ env.PYTEST_PARAMS }} -m "opensearch and not integration" test/document_stores/test_document_store.py --document_store_type=opensearch
|
||||
|
||||
- name: Dump docker logs on failure
|
||||
if: failure()
|
||||
uses: jwalton/gh-docker-logs@v1
|
||||
|
||||
- uses: act10ns/slack@v1
|
||||
with:
|
||||
status: ${{ job.status }}
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
faiss-tests-linux:
|
||||
needs:
|
||||
@ -656,7 +630,6 @@ jobs:
|
||||
integration-tests-linux:
|
||||
needs:
|
||||
- unit-tests-linux
|
||||
- elasticsearch-tests-linux
|
||||
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
@ -691,7 +664,6 @@ jobs:
|
||||
run: |
|
||||
python -c "from transformers import AutoModel;[AutoModel.from_pretrained(model_name) for model_name in ['vblagoje/bart_lfqa','yjernite/bart_eli5', 'vblagoje/dpr-ctx_encoder-single-lfqa-wiki', 'vblagoje/dpr-question_encoder-single-lfqa-wiki', 'facebook/dpr-question_encoder-single-nq-base', 'facebook/dpr-ctx_encoder-single-nq-base', 'elastic/distilbert-base-cased-finetuned-conll03-english', 'deepset/bert-medium-squad2-distilled']]"
|
||||
|
||||
|
||||
- name: Run Elasticsearch
|
||||
run: |
|
||||
docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
|
||||
@ -738,8 +710,9 @@ jobs:
|
||||
- name: Run tests
|
||||
env:
|
||||
TOKENIZERS_PARALLELISM: 'false' # Avoid logspam by tokenizers
|
||||
# we add "and not document_store" to exclude the tests that were ported to the new strategy
|
||||
run: |
|
||||
pytest ${{ env.PYTEST_PARAMS }} -m "integration" test/${{ matrix.folder }}
|
||||
pytest ${{ env.PYTEST_PARAMS }} -m "integration and not document_store" test/${{ matrix.folder }}
|
||||
|
||||
- name: Dump docker logs on failure
|
||||
if: failure()
|
||||
@ -754,7 +727,6 @@ jobs:
|
||||
integration-tests-windows:
|
||||
needs:
|
||||
- unit-tests-windows
|
||||
- elasticsearch-tests-windows
|
||||
runs-on: windows-latest
|
||||
if: contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
|
||||
|
||||
@ -800,4 +772,4 @@ jobs:
|
||||
with:
|
||||
status: ${{ job.status }}
|
||||
channel: '#haystack'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||
|
||||
@ -2,7 +2,7 @@ def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--document_store_type",
|
||||
action="store",
|
||||
default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone, opensearch",
|
||||
default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -351,6 +351,7 @@ markers = [
|
||||
"milvus: requires a Milvus 2 setup",
|
||||
"milvus1: requires a Milvus 1 container",
|
||||
"opensearch",
|
||||
"document_store",
|
||||
]
|
||||
log_cli = true
|
||||
|
||||
|
||||
@ -152,7 +152,6 @@ def pytest_collection_modifyitems(config, items):
|
||||
"pinecone": [pytest.mark.pinecone],
|
||||
# FIXME GraphDB can't be treated as a regular docstore, it fails most of their tests
|
||||
"graphdb": [pytest.mark.integration],
|
||||
"opensearch": [pytest.mark.opensearch],
|
||||
}
|
||||
for item in items:
|
||||
for name, markers in name_to_markers.items():
|
||||
@ -196,17 +195,7 @@ def infer_required_doc_store(item, keywords):
|
||||
# 2. if the test name contains the docstore name, we use that
|
||||
# 3. use an arbitrary one by calling set.pop()
|
||||
required_doc_store = None
|
||||
all_doc_stores = {
|
||||
"elasticsearch",
|
||||
"faiss",
|
||||
"sql",
|
||||
"memory",
|
||||
"milvus1",
|
||||
"milvus",
|
||||
"weaviate",
|
||||
"pinecone",
|
||||
"opensearch",
|
||||
}
|
||||
all_doc_stores = {"elasticsearch", "faiss", "sql", "memory", "milvus1", "milvus", "weaviate", "pinecone"}
|
||||
docstore_markers = set(keywords).intersection(all_doc_stores)
|
||||
if len(docstore_markers) > 1:
|
||||
# if parameterized infer the docstore from the parameter
|
||||
@ -1109,18 +1098,6 @@ def get_document_store(
|
||||
knn_engine="faiss",
|
||||
)
|
||||
|
||||
elif document_store_type == "opensearch":
|
||||
document_store = OpenSearchDocumentStore(
|
||||
index=index,
|
||||
return_embedding=True,
|
||||
embedding_dim=embedding_dim,
|
||||
embedding_field=embedding_field,
|
||||
similarity=similarity,
|
||||
recreate_index=recreate_index,
|
||||
port=9201,
|
||||
knn_engine="nmslib",
|
||||
)
|
||||
|
||||
else:
|
||||
raise Exception(f"No document store fixture for '{document_store_type}'")
|
||||
|
||||
|
||||
445
test/document_stores/test_base.py
Normal file
445
test/document_stores/test_base.py
Normal file
@ -0,0 +1,445 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from haystack.schema import Document, Label, Answer
|
||||
from haystack.errors import DuplicateDocumentError
|
||||
from haystack.document_stores import BaseDocumentStore
|
||||
|
||||
|
||||
@pytest.mark.document_store
|
||||
class DocumentStoreBaseTestAbstract:
|
||||
"""
|
||||
This is a base class to test abstract methods from DocumentStoreBase to be inherited by any Document Store
|
||||
testsuite. It doesn't have the `Test` prefix in the name so that its methods won't be collected for this
|
||||
class but only for its subclasses.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def documents(self):
|
||||
documents = []
|
||||
for i in range(3):
|
||||
documents.append(
|
||||
Document(
|
||||
content=f"A Foo Document {i}",
|
||||
meta={"name": f"name_{i}", "year": "2020", "month": "01", "numbers": [2, 4]},
|
||||
embedding=np.random.rand(768).astype(np.float32),
|
||||
)
|
||||
)
|
||||
|
||||
documents.append(
|
||||
Document(
|
||||
content=f"A Bar Document {i}",
|
||||
meta={"name": f"name_{i}", "year": "2021", "month": "02", "numbers": [-2, -4]},
|
||||
embedding=np.random.rand(768).astype(np.float32),
|
||||
)
|
||||
)
|
||||
|
||||
documents.append(
|
||||
Document(
|
||||
content=f"Document {i} without embeddings",
|
||||
meta={"name": f"name_{i}", "no_embedding": True, "month": "03"},
|
||||
)
|
||||
)
|
||||
|
||||
return documents
|
||||
|
||||
@pytest.fixture
|
||||
def labels(self, documents):
|
||||
labels = []
|
||||
for i, d in enumerate(documents):
|
||||
labels.append(
|
||||
Label(
|
||||
query=f"query_{i}",
|
||||
document=d,
|
||||
is_correct_document=True,
|
||||
is_correct_answer=False,
|
||||
# create a mix set of labels
|
||||
origin="user-feedback" if i % 2 else "gold-label",
|
||||
answer=None if not i else Answer(f"the answer is {i}"),
|
||||
meta={"name": f"label_{i}", "year": f"{2020 + i}"},
|
||||
)
|
||||
)
|
||||
return labels
|
||||
|
||||
#
|
||||
# Integration tests
|
||||
#
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_write_documents(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
docs = ds.get_all_documents()
|
||||
assert len(docs) == len(documents)
|
||||
for i, doc in enumerate(docs):
|
||||
expected = documents[i]
|
||||
assert doc.id == expected.id
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_write_labels(self, ds, labels):
|
||||
ds.write_labels(labels)
|
||||
assert ds.get_all_labels() == labels
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_write_with_duplicate_doc_ids(self, ds):
|
||||
duplicate_documents = [
|
||||
Document(content="Doc1", id_hash_keys=["content"]),
|
||||
Document(content="Doc1", id_hash_keys=["content"]),
|
||||
]
|
||||
ds.write_documents(duplicate_documents, duplicate_documents="skip")
|
||||
assert len(ds.get_all_documents()) == 1
|
||||
with pytest.raises(Exception):
|
||||
ds.write_documents(duplicate_documents, duplicate_documents="fail")
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.integration
|
||||
def test_get_all_documents_without_filters(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
out = ds.get_all_documents()
|
||||
assert out == documents
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_all_document_filter_duplicate_text_value(self, ds):
|
||||
documents = [
|
||||
Document(content="duplicated", meta={"meta_field": "0"}, id_hash_keys=["meta"]),
|
||||
Document(content="duplicated", meta={"meta_field": "1", "name": "file.txt"}, id_hash_keys=["meta"]),
|
||||
Document(content="Doc2", meta={"name": "file_2.txt"}, id_hash_keys=["meta"]),
|
||||
]
|
||||
ds.write_documents(documents)
|
||||
documents = ds.get_all_documents(filters={"meta_field": ["1"]})
|
||||
assert len(documents) == 1
|
||||
assert documents[0].content == "duplicated"
|
||||
assert documents[0].meta["name"] == "file.txt"
|
||||
|
||||
documents = ds.get_all_documents(filters={"meta_field": ["0"]})
|
||||
assert len(documents) == 1
|
||||
assert documents[0].content == "duplicated"
|
||||
assert documents[0].meta.get("name") is None
|
||||
|
||||
documents = ds.get_all_documents(filters={"name": ["file_2.txt"]})
|
||||
assert len(documents) == 1
|
||||
assert documents[0].content == "Doc2"
|
||||
assert documents[0].meta.get("meta_field") is None
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_all_documents_with_correct_filters(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
result = ds.get_all_documents(filters={"year": ["2020"]})
|
||||
assert len(result) == 3
|
||||
|
||||
documents = ds.get_all_documents(filters={"year": ["2020", "2021"]})
|
||||
assert len(documents) == 6
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_all_documents_with_incorrect_filter_name(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
result = ds.get_all_documents(filters={"non_existing_meta_field": ["whatever"]})
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_all_documents_with_incorrect_filter_value(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
result = ds.get_all_documents(filters={"year": ["nope"]})
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_extended_filter(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
|
||||
# Test comparison operators individually
|
||||
|
||||
result = ds.get_all_documents(filters={"year": {"$eq": "2020"}})
|
||||
assert len(result) == 3
|
||||
result = ds.get_all_documents(filters={"year": "2020"})
|
||||
assert len(result) == 3
|
||||
|
||||
result = ds.get_all_documents(filters={"year": {"$in": ["2020", "2021", "n.a."]}})
|
||||
assert len(result) == 6
|
||||
result = ds.get_all_documents(filters={"year": ["2020", "2021", "n.a."]})
|
||||
assert len(result) == 6
|
||||
|
||||
result = ds.get_all_documents(filters={"year": {"$ne": "2020"}})
|
||||
assert len(result) == 6
|
||||
|
||||
result = ds.get_all_documents(filters={"year": {"$nin": ["2020", "2021", "n.a."]}})
|
||||
assert len(result) == 3
|
||||
|
||||
result = ds.get_all_documents(filters={"numbers": {"$gt": 0}})
|
||||
assert len(result) == 3
|
||||
|
||||
result = ds.get_all_documents(filters={"numbers": {"$gte": -2}})
|
||||
assert len(result) == 6
|
||||
|
||||
result = ds.get_all_documents(filters={"numbers": {"$lt": 0}})
|
||||
assert len(result) == 3
|
||||
|
||||
result = ds.get_all_documents(filters={"numbers": {"$lte": 2.0}})
|
||||
assert len(result) == 6
|
||||
|
||||
# Test compound filters
|
||||
|
||||
result = ds.get_all_documents(filters={"year": {"$lte": "2021", "$gte": "2020"}})
|
||||
assert len(result) == 6
|
||||
|
||||
filters = {"$and": {"year": {"$lte": "2021", "$gte": "2020"}, "name": {"$in": ["name_0", "name_1"]}}}
|
||||
result = ds.get_all_documents(filters=filters)
|
||||
assert len(result) == 4
|
||||
|
||||
filters_simplified = {"year": {"$lte": "2021", "$gte": "2020"}, "name": ["name_0", "name_1"]}
|
||||
result = ds.get_all_documents(filters=filters_simplified)
|
||||
assert len(result) == 4
|
||||
|
||||
filters = {
|
||||
"$and": {
|
||||
"year": {"$lte": "2021", "$gte": "2020"},
|
||||
"$or": {"name": {"$in": ["name_0", "name_1"]}, "numbers": {"$lt": 5.0}},
|
||||
}
|
||||
}
|
||||
result = ds.get_all_documents(filters=filters)
|
||||
assert len(result) == 6
|
||||
|
||||
filters_simplified = {
|
||||
"year": {"$lte": "2021", "$gte": "2020"},
|
||||
"$or": {"name": {"$in": ["name_0", "name_2"]}, "numbers": {"$lt": 5.0}},
|
||||
}
|
||||
result = ds.get_all_documents(filters=filters_simplified)
|
||||
assert len(result) == 6
|
||||
|
||||
filters = {
|
||||
"$and": {
|
||||
"year": {"$lte": "2021", "$gte": "2020"},
|
||||
"$or": {
|
||||
"name": {"$in": ["name_0", "name_1"]},
|
||||
"$and": {"numbers": {"$lt": 5.0}, "$not": {"month": {"$eq": "01"}}},
|
||||
},
|
||||
}
|
||||
}
|
||||
result = ds.get_all_documents(filters=filters)
|
||||
assert len(result) == 5
|
||||
|
||||
filters_simplified = {
|
||||
"year": {"$lte": "2021", "$gte": "2020"},
|
||||
"$or": {"name": ["name_0", "name_1"], "$and": {"numbers": {"$lt": 5.0}, "$not": {"month": {"$eq": "01"}}}},
|
||||
}
|
||||
result = ds.get_all_documents(filters=filters_simplified)
|
||||
assert len(result) == 5
|
||||
|
||||
# Test nested logical operations within "$not", important as we apply De Morgan's laws in WeaviateDocumentstore
|
||||
|
||||
filters = {
|
||||
"$not": {
|
||||
"$or": {
|
||||
"$and": {"numbers": {"$lt": 5.0}, "month": {"$ne": "01"}},
|
||||
"$not": {"year": {"$lte": "2021", "$gte": "2020"}},
|
||||
}
|
||||
}
|
||||
}
|
||||
result = ds.get_all_documents(filters=filters)
|
||||
docs_meta = result[0].meta["numbers"]
|
||||
assert len(result) == 3
|
||||
assert [2, 4] == docs_meta
|
||||
|
||||
# Test same logical operator twice on same level
|
||||
|
||||
filters = {
|
||||
"$or": [
|
||||
{"$and": {"name": {"$in": ["name_0", "name_1"]}, "year": {"$gte": "2020"}}},
|
||||
{"$and": {"name": {"$in": ["name_0", "name_1"]}, "year": {"$lt": "2021"}}},
|
||||
]
|
||||
}
|
||||
result = ds.get_all_documents(filters=filters)
|
||||
docs_meta = [doc.meta["name"] for doc in result]
|
||||
assert len(result) == 4
|
||||
assert "name_0" in docs_meta
|
||||
assert "name_2" not in docs_meta
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_document_by_id(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
doc = ds.get_document_by_id(documents[0].id)
|
||||
assert doc.id == documents[0].id
|
||||
assert doc.content == documents[0].content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_documents_by_id(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
result = {doc.id for doc in ds.get_documents_by_id(ids, batch_size=2)}
|
||||
assert set(ids) == result
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_document_count(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
assert ds.get_document_count() == 9
|
||||
assert ds.get_document_count(filters={"year": ["2020"]}) == 3
|
||||
assert ds.get_document_count(filters={"month": ["02"]}) == 3
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_all_documents_generator(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
assert len(list(ds.get_all_documents_generator(batch_size=2))) == 9
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_duplicate_documents_skip(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
|
||||
updated_docs = []
|
||||
for d in documents:
|
||||
updated_d = Document.from_dict(d.to_dict())
|
||||
updated_d.meta["name"] = "Updated"
|
||||
updated_docs.append(updated_d)
|
||||
|
||||
ds.write_documents(updated_docs, duplicate_documents="skip")
|
||||
result = ds.get_all_documents()
|
||||
assert result[0].meta["name"] == "name_0"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_duplicate_documents_overwrite(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
|
||||
updated_docs = []
|
||||
for d in documents:
|
||||
updated_d = Document.from_dict(d.to_dict())
|
||||
updated_d.meta["name"] = "Updated"
|
||||
updated_docs.append(updated_d)
|
||||
|
||||
ds.write_documents(updated_docs, duplicate_documents="overwrite")
|
||||
for doc in ds.get_all_documents():
|
||||
assert doc.meta["name"] == "Updated"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_duplicate_documents_fail(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
|
||||
updated_docs = []
|
||||
for d in documents:
|
||||
updated_d = Document.from_dict(d.to_dict())
|
||||
updated_d.meta["name"] = "Updated"
|
||||
updated_docs.append(updated_d)
|
||||
|
||||
with pytest.raises(DuplicateDocumentError):
|
||||
ds.write_documents(updated_docs, duplicate_documents="fail")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_write_document_meta(self, ds):
|
||||
ds.write_documents(
|
||||
[
|
||||
{"content": "dict_without_meta", "id": "1"},
|
||||
{"content": "dict_with_meta", "meta_field": "test2", "id": "2"},
|
||||
Document(content="document_object_without_meta", id="3"),
|
||||
Document(content="document_object_with_meta", meta={"meta_field": "test4"}, id="4"),
|
||||
]
|
||||
)
|
||||
assert not ds.get_document_by_id("1").meta
|
||||
assert ds.get_document_by_id("2").meta["meta_field"] == "test2"
|
||||
assert not ds.get_document_by_id("3").meta
|
||||
assert ds.get_document_by_id("4").meta["meta_field"] == "test4"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_documents(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
ds.delete_documents()
|
||||
assert ds.get_document_count() == 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_documents_with_filters(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
ds.delete_documents(filters={"year": ["2020", "2021"]})
|
||||
documents = ds.get_all_documents()
|
||||
assert ds.get_document_count() == 3
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_documents_by_id(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
docs_to_delete = ds.get_all_documents(filters={"year": ["2020"]})
|
||||
ds.delete_documents(ids=[doc.id for doc in docs_to_delete])
|
||||
assert ds.get_document_count() == 6
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_write_get_all_labels(self, ds, labels):
|
||||
ds.write_labels(labels)
|
||||
ds.write_labels(labels[:3], index="custom_index")
|
||||
assert len(ds.get_all_labels()) == 9
|
||||
assert len(ds.get_all_labels(index="custom_index")) == 3
|
||||
# remove the index we created in this test
|
||||
ds.delete_index("custom_index")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_labels(self, ds, labels):
|
||||
ds.write_labels(labels)
|
||||
ds.write_labels(labels[:3], index="custom_index")
|
||||
ds.delete_labels()
|
||||
ds.delete_labels(index="custom_index")
|
||||
assert len(ds.get_all_labels()) == 0
|
||||
assert len(ds.get_all_labels(index="custom_index")) == 0
|
||||
# remove the index we created in this test
|
||||
ds.delete_index("custom_index")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_write_labels_duplicate(self, ds, labels):
|
||||
# create a duplicate
|
||||
dupe = Label.from_dict(labels[0].to_dict())
|
||||
|
||||
ds.write_labels(labels + [dupe])
|
||||
|
||||
# ensure the duplicate was discarded
|
||||
assert len(ds.get_all_labels()) == len(labels)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_labels_by_id(self, ds, labels):
|
||||
ds.write_labels(labels)
|
||||
ds.delete_labels(ids=[labels[0].id])
|
||||
assert len(ds.get_all_labels()) == len(labels) - 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_labels_by_filter(self, ds, labels):
|
||||
ds.write_labels(labels)
|
||||
ds.delete_labels(filters={"query": "query_1"})
|
||||
assert len(ds.get_all_labels()) == len(labels) - 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_labels_by_filter_id(self, ds, labels):
|
||||
ds.write_labels(labels)
|
||||
|
||||
# ids and filters are ANDed, the following should have no effect
|
||||
ds.delete_labels(ids=[labels[0].id], filters={"query": "query_9"})
|
||||
assert len(ds.get_all_labels()) == len(labels)
|
||||
|
||||
#
|
||||
ds.delete_labels(ids=[labels[0].id], filters={"query": "query_0"})
|
||||
assert len(ds.get_all_labels()) == len(labels) - 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_label_count(self, ds, labels):
|
||||
ds.write_labels(labels)
|
||||
assert ds.get_label_count() == len(labels)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_index(self, ds, documents):
|
||||
ds.write_documents(documents, index="custom_index")
|
||||
assert ds.get_document_count(index="custom_index") == len(documents)
|
||||
ds.delete_index(index="custom_index")
|
||||
with pytest.raises(Exception):
|
||||
ds.get_document_count(index="custom_index")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_update_meta(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
doc = documents[0]
|
||||
ds.update_document_meta(doc.id, meta={"year": "2099", "month": "12"})
|
||||
doc = ds.get_document_by_id(doc.id)
|
||||
assert doc.meta["year"] == "2099"
|
||||
assert doc.meta["month"] == "12"
|
||||
|
||||
#
|
||||
# Unit tests
|
||||
#
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_normalize_embeddings_diff_shapes(self):
|
||||
VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32")
|
||||
BaseDocumentStore.normalize_embedding(VEC_1)
|
||||
assert np.linalg.norm(VEC_1) - 1 < 0.01
|
||||
|
||||
VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32").reshape(1, -1)
|
||||
BaseDocumentStore.normalize_embedding(VEC_1)
|
||||
assert np.linalg.norm(VEC_1) - 1 < 0.01
|
||||
@ -77,84 +77,6 @@ DOCUMENTS = [
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_init_elastic_client():
|
||||
# defaults
|
||||
_ = ElasticsearchDocumentStore()
|
||||
|
||||
# list of hosts + single port
|
||||
_ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=9200)
|
||||
|
||||
# list of hosts + list of ports (wrong)
|
||||
with pytest.raises(Exception):
|
||||
_ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200])
|
||||
|
||||
# list of hosts + list
|
||||
_ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200, 9200])
|
||||
|
||||
# only api_key
|
||||
with pytest.raises(Exception):
|
||||
_ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test")
|
||||
|
||||
# api_key + id
|
||||
_ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test", api_key_id="test")
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_init_elastic_doc_store_with_index_recreation():
|
||||
index_name = "test_index_recreation"
|
||||
label_index_name = "test_index_recreation_labels"
|
||||
|
||||
document_store = ElasticsearchDocumentStore(index=index_name, label_index=label_index_name)
|
||||
documents = [Document(content="Doc1")]
|
||||
labels = [
|
||||
Label(
|
||||
query="query",
|
||||
document=documents[0],
|
||||
is_correct_document=True,
|
||||
is_correct_answer=False,
|
||||
origin="user-feedback",
|
||||
answer=None,
|
||||
)
|
||||
]
|
||||
document_store.write_documents(documents, index=index_name)
|
||||
document_store.write_labels(labels, index=label_index_name)
|
||||
|
||||
document_store = ElasticsearchDocumentStore(index=index_name, label_index=label_index_name, recreate_index=True)
|
||||
docs = document_store.get_all_documents(index=index_name)
|
||||
labels = document_store.get_all_labels(index=label_index_name)
|
||||
|
||||
assert len(docs) == 0
|
||||
assert len(labels) == 0
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_elasticsearch_eq_filter():
|
||||
documents = [
|
||||
{"content": "some text", "id": "1", "keyword_field": ["x", "y", "z"], "number_field": [1, 2, 3, 4]},
|
||||
{"content": "some text", "id": "2", "keyword_field": ["x", "y", "w"], "number_field": [1, 2, 3]},
|
||||
{"content": "some text", "id": "3", "keyword_field": ["x", "z"], "number_field": [2, 4]},
|
||||
{"content": "some text", "id": "4", "keyword_field": ["z", "x"], "number_field": [5, 6]},
|
||||
{"content": "some text", "id": "5", "keyword_field": ["x", "y"], "number_field": [2, 3]},
|
||||
]
|
||||
|
||||
index = "test_elasticsearch_eq_filter"
|
||||
document_store = ElasticsearchDocumentStore(index=index, recreate_index=True)
|
||||
document_store.write_documents(documents)
|
||||
|
||||
filter = {"keyword_field": {"$eq": ["z", "x"]}}
|
||||
filtered_docs = document_store.get_all_documents(index=index, filters=filter)
|
||||
assert len(filtered_docs) == 2
|
||||
for doc in filtered_docs:
|
||||
assert set(doc.meta["keyword_field"]) == {"x", "z"}
|
||||
|
||||
filter = {"number_field": {"$eq": [2, 3]}}
|
||||
filtered_docs = document_store.query(query=None, index=index, filters=filter)
|
||||
assert len(filtered_docs) == 1
|
||||
assert filtered_docs[0].meta["number_field"] == [2, 3]
|
||||
assert filtered_docs[0].id == "5"
|
||||
|
||||
|
||||
def test_write_with_duplicate_doc_ids(document_store: BaseDocumentStore):
|
||||
duplicate_documents = [
|
||||
Document(content="Doc1", id_hash_keys=["content"]),
|
||||
@ -1274,164 +1196,6 @@ def test_get_meta_values_by_key(document_store: BaseDocumentStore):
|
||||
assert bucket["count"] == 1
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_elasticsearch_custom_fields():
|
||||
document_store = ElasticsearchDocumentStore(
|
||||
index="haystack_test_custom",
|
||||
content_field="custom_text_field",
|
||||
embedding_field="custom_embedding_field",
|
||||
recreate_index=True,
|
||||
)
|
||||
|
||||
doc_to_write = {"custom_text_field": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
|
||||
document_store.write_documents([doc_to_write])
|
||||
documents = document_store.get_all_documents(return_embedding=True)
|
||||
assert len(documents) == 1
|
||||
assert documents[0].content == "test"
|
||||
np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding)
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_elasticsearch_delete_index():
|
||||
client = Elasticsearch()
|
||||
index_name = "haystack_test_deletion"
|
||||
|
||||
document_store = ElasticsearchDocumentStore(index=index_name)
|
||||
|
||||
# the index should exist
|
||||
index_exists = client.indices.exists(index=index_name)
|
||||
assert index_exists
|
||||
|
||||
document_store.delete_index(index_name)
|
||||
|
||||
# the index was deleted and should not exist
|
||||
index_exists = client.indices.exists(index=index_name)
|
||||
assert not index_exists
|
||||
|
||||
|
||||
@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
|
||||
def test_elasticsearch_query_with_filters_and_missing_embeddings(document_store: ElasticsearchDocumentStore):
|
||||
document_store.write_documents(DOCUMENTS)
|
||||
document_without_embedding = Document(
|
||||
content="Doc without embedding", meta={"name": "name_7", "year": "2021", "month": "04"}
|
||||
)
|
||||
document_store.write_documents([document_without_embedding])
|
||||
filters = {"year": "2021"}
|
||||
document_store.skip_missing_embeddings = False
|
||||
with pytest.raises(RequestError):
|
||||
document_store.query_by_embedding(np.random.rand(768), filters=filters)
|
||||
|
||||
document_store.skip_missing_embeddings = True
|
||||
documents = document_store.query_by_embedding(np.random.rand(768), filters=filters)
|
||||
assert len(documents) == 3
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_get_document_count_only_documents_without_embedding_arg():
|
||||
documents = [
|
||||
{
|
||||
"content": "text1",
|
||||
"id": "1",
|
||||
"embedding": np.random.rand(768).astype(np.float32),
|
||||
"meta_field_for_count": "a",
|
||||
},
|
||||
{
|
||||
"content": "text2",
|
||||
"id": "2",
|
||||
"embedding": np.random.rand(768).astype(np.float64),
|
||||
"meta_field_for_count": "b",
|
||||
},
|
||||
{"content": "text3", "id": "3", "embedding": np.random.rand(768).astype(np.float32).tolist()},
|
||||
{"content": "text4", "id": "4", "meta_field_for_count": "b"},
|
||||
{"content": "text5", "id": "5", "meta_field_for_count": "b"},
|
||||
{"content": "text6", "id": "6", "meta_field_for_count": "c"},
|
||||
{
|
||||
"content": "text7",
|
||||
"id": "7",
|
||||
"embedding": np.random.rand(768).astype(np.float64),
|
||||
"meta_field_for_count": "c",
|
||||
},
|
||||
]
|
||||
|
||||
_index: str = "haystack_test_count"
|
||||
document_store = ElasticsearchDocumentStore(index=_index, recreate_index=True)
|
||||
|
||||
document_store.write_documents(documents)
|
||||
|
||||
assert document_store.get_document_count() == 7
|
||||
assert document_store.get_document_count(only_documents_without_embedding=True) == 3
|
||||
assert (
|
||||
document_store.get_document_count(
|
||||
only_documents_without_embedding=True, filters={"meta_field_for_count": ["c"]}
|
||||
)
|
||||
== 1
|
||||
)
|
||||
assert (
|
||||
document_store.get_document_count(
|
||||
only_documents_without_embedding=True, filters={"meta_field_for_count": ["b"]}
|
||||
)
|
||||
== 2
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_skip_missing_embeddings(caplog):
|
||||
documents = [
|
||||
{"content": "text1", "id": "1"}, # a document without embeddings
|
||||
{"content": "text2", "id": "2", "embedding": np.random.rand(768).astype(np.float64)},
|
||||
{"content": "text3", "id": "3", "embedding": np.random.rand(768).astype(np.float32).tolist()},
|
||||
{"content": "text4", "id": "4", "embedding": np.random.rand(768).astype(np.float32)},
|
||||
]
|
||||
document_store = ElasticsearchDocumentStore(index="skip_missing_embedding_index", recreate_index=True)
|
||||
document_store.write_documents(documents)
|
||||
|
||||
document_store.skip_missing_embeddings = True
|
||||
retrieved_docs = document_store.query_by_embedding(np.random.rand(768).astype(np.float32))
|
||||
assert len(retrieved_docs) == 3
|
||||
|
||||
document_store.skip_missing_embeddings = False
|
||||
with pytest.raises(RequestError):
|
||||
document_store.query_by_embedding(np.random.rand(768).astype(np.float32))
|
||||
|
||||
# Test scenario with no embeddings for the entire index
|
||||
documents = [
|
||||
{"content": "text1", "id": "1"},
|
||||
{"content": "text2", "id": "2"},
|
||||
{"content": "text3", "id": "3"},
|
||||
{"content": "text4", "id": "4"},
|
||||
]
|
||||
|
||||
document_store.delete_documents()
|
||||
document_store.write_documents(documents)
|
||||
|
||||
document_store.skip_missing_embeddings = True
|
||||
with caplog.at_level(logging.WARNING):
|
||||
document_store.query_by_embedding(np.random.rand(768).astype(np.float32))
|
||||
assert "No documents with embeddings. Run the document store's update_embeddings() method." in caplog.text
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_elasticsearch_synonyms():
|
||||
synonyms = ["i-pod, i pod, ipod", "sea biscuit, sea biscit, seabiscuit", "foo, foo bar, baz"]
|
||||
synonym_type = "synonym_graph"
|
||||
|
||||
client = Elasticsearch()
|
||||
client.indices.delete(index="haystack_synonym_arg", ignore=[404])
|
||||
document_store = ElasticsearchDocumentStore(
|
||||
index="haystack_synonym_arg", synonyms=synonyms, synonym_type=synonym_type
|
||||
)
|
||||
indexed_settings = client.indices.get_settings(index="haystack_synonym_arg")
|
||||
|
||||
assert (
|
||||
synonym_type
|
||||
== indexed_settings["haystack_synonym_arg"]["settings"]["index"]["analysis"]["filter"]["synonym"]["type"]
|
||||
)
|
||||
assert (
|
||||
synonyms
|
||||
== indexed_settings["haystack_synonym_arg"]["settings"]["index"]["analysis"]["filter"]["synonym"]["synonyms"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"document_store_with_docs", ["memory", "faiss", "milvus1", "weaviate", "elasticsearch"], indirect=True
|
||||
)
|
||||
@ -1980,105 +1744,6 @@ def test_DeepsetCloudDocumentStore_query_without_index():
|
||||
assert document_store.query(query="some query") == []
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_elasticsearch_search_field_mapping():
|
||||
|
||||
client = Elasticsearch()
|
||||
client.indices.delete(index="haystack_search_field_mapping", ignore=[404])
|
||||
|
||||
index_data = [
|
||||
{
|
||||
"title": "Green tea components",
|
||||
"meta": {
|
||||
"content": "The green tea plant contains a range of healthy compounds that make it into the final drink",
|
||||
"sub_content": "Drink tip",
|
||||
},
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Green tea catechin",
|
||||
"meta": {
|
||||
"content": "Green tea contains a catechin called epigallocatechin-3-gallate (EGCG).",
|
||||
"sub_content": "Ingredients tip",
|
||||
},
|
||||
"id": "2",
|
||||
},
|
||||
{
|
||||
"title": "Minerals in Green tea",
|
||||
"meta": {
|
||||
"content": "Green tea also has small amounts of minerals that can benefit your health.",
|
||||
"sub_content": "Minerals tip",
|
||||
},
|
||||
"id": "3",
|
||||
},
|
||||
{
|
||||
"title": "Green tea Benefits",
|
||||
"meta": {
|
||||
"content": "Green tea does more than just keep you alert, it may also help boost brain function.",
|
||||
"sub_content": "Health tip",
|
||||
},
|
||||
"id": "4",
|
||||
},
|
||||
]
|
||||
|
||||
document_store = ElasticsearchDocumentStore(
|
||||
index="haystack_search_field_mapping", search_fields=["content", "sub_content"], content_field="title"
|
||||
)
|
||||
document_store.write_documents(index_data)
|
||||
|
||||
indexed_settings = client.indices.get_mapping(index="haystack_search_field_mapping")
|
||||
|
||||
assert indexed_settings["haystack_search_field_mapping"]["mappings"]["properties"]["content"]["type"] == "text"
|
||||
assert indexed_settings["haystack_search_field_mapping"]["mappings"]["properties"]["sub_content"]["type"] == "text"
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_elasticsearch_existing_alias():
|
||||
|
||||
client = Elasticsearch()
|
||||
client.indices.delete(index="haystack_existing_alias_1", ignore=[404])
|
||||
client.indices.delete(index="haystack_existing_alias_2", ignore=[404])
|
||||
client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404])
|
||||
|
||||
settings = {"mappings": {"properties": {"content": {"type": "text"}}}}
|
||||
|
||||
client.indices.create(index="haystack_existing_alias_1", body=settings)
|
||||
client.indices.create(index="haystack_existing_alias_2", body=settings)
|
||||
|
||||
client.indices.put_alias(
|
||||
index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias"
|
||||
)
|
||||
|
||||
# To be valid, all indices related to the alias must have content field of type text
|
||||
_ = ElasticsearchDocumentStore(index="haystack_existing_alias", search_fields=["content"])
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_elasticsearch_existing_alias_missing_fields():
|
||||
|
||||
client = Elasticsearch()
|
||||
client.indices.delete(index="haystack_existing_alias_1", ignore=[404])
|
||||
client.indices.delete(index="haystack_existing_alias_2", ignore=[404])
|
||||
client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404])
|
||||
|
||||
right_settings = {"mappings": {"properties": {"content": {"type": "text"}}}}
|
||||
|
||||
wrong_settings = {"mappings": {"properties": {"content": {"type": "histogram"}}}}
|
||||
|
||||
client.indices.create(index="haystack_existing_alias_1", body=right_settings)
|
||||
client.indices.create(index="haystack_existing_alias_2", body=wrong_settings)
|
||||
|
||||
client.indices.put_alias(
|
||||
index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias"
|
||||
)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
# wrong field type for "content" in index "haystack_existing_alias_2"
|
||||
_ = ElasticsearchDocumentStore(
|
||||
index="haystack_existing_alias", search_fields=["content"], content_field="title"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
|
||||
def test_elasticsearch_brownfield_support(document_store_with_docs):
|
||||
new_document_store = InMemoryDocumentStore()
|
||||
@ -2122,9 +1787,7 @@ def test_elasticsearch_brownfield_support(document_store_with_docs):
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"document_store",
|
||||
["faiss", "milvus1", "milvus", "weaviate", "opensearch_faiss", "opensearch", "elasticsearch", "memory"],
|
||||
indirect=True,
|
||||
"document_store", ["faiss", "milvus1", "milvus", "weaviate", "opensearch", "elasticsearch", "memory"], indirect=True
|
||||
)
|
||||
def test_cosine_similarity(document_store: BaseDocumentStore):
|
||||
# below we will write documents to the store and then query it to see if vectors were normalized or not
|
||||
@ -2166,9 +1829,7 @@ def test_cosine_similarity(document_store: BaseDocumentStore):
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"document_store",
|
||||
["faiss", "milvus1", "milvus", "weaviate", "opensearch_faiss", "opensearch", "elasticsearch", "memory"],
|
||||
indirect=True,
|
||||
"document_store", ["faiss", "milvus1", "milvus", "weaviate", "opensearch", "elasticsearch", "memory"], indirect=True
|
||||
)
|
||||
def test_update_embeddings_cosine_similarity(document_store: BaseDocumentStore):
|
||||
# below we will write documents to the store and then query it to see if vectors were normalized
|
||||
@ -2228,7 +1889,7 @@ def test_update_embeddings_cosine_similarity(document_store: BaseDocumentStore):
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"document_store_small",
|
||||
["faiss", "milvus1", "milvus", "weaviate", "memory", "elasticsearch", "opensearch", "opensearch_faiss"],
|
||||
["faiss", "milvus1", "milvus", "weaviate", "memory", "elasticsearch", "opensearch"],
|
||||
indirect=True,
|
||||
)
|
||||
def test_cosine_sanity_check(document_store_small):
|
||||
|
||||
225
test/document_stores/test_elasticsearch.py
Normal file
225
test/document_stores/test_elasticsearch.py
Normal file
@ -0,0 +1,225 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from haystack.schema import Document
|
||||
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
|
||||
|
||||
from .test_base import DocumentStoreBaseTestAbstract
|
||||
from .test_search_engine import SearchEngineDocumentStoreTestAbstract
|
||||
|
||||
|
||||
class TestElasticsearchDocumentStore(DocumentStoreBaseTestAbstract, SearchEngineDocumentStoreTestAbstract):
|
||||
# Constants
|
||||
|
||||
index_name = __name__
|
||||
|
||||
@pytest.fixture
|
||||
def ds(self):
|
||||
"""
|
||||
This fixture provides a working document store and takes care of removing the indices when done
|
||||
"""
|
||||
labels_index_name = f"{self.index_name}_labels"
|
||||
ds = ElasticsearchDocumentStore(
|
||||
index=self.index_name,
|
||||
label_index=labels_index_name,
|
||||
host=os.environ.get("ELASTICSEARCH_HOST", "localhost"),
|
||||
create_index=True,
|
||||
)
|
||||
yield ds
|
||||
ds.delete_index(self.index_name)
|
||||
ds.delete_index(labels_index_name)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test___init__(self):
|
||||
# defaults
|
||||
_ = ElasticsearchDocumentStore()
|
||||
|
||||
# list of hosts + single port
|
||||
_ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=9200)
|
||||
|
||||
# list of hosts + list of ports (wrong)
|
||||
with pytest.raises(Exception):
|
||||
_ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200])
|
||||
|
||||
# list of hosts + list
|
||||
_ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200, 9200])
|
||||
|
||||
# only api_key
|
||||
with pytest.raises(Exception):
|
||||
_ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test")
|
||||
|
||||
# api_key + id
|
||||
_ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test", api_key_id="test")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_recreate_index(self, ds, documents, labels):
|
||||
ds.write_documents(documents)
|
||||
ds.write_labels(labels)
|
||||
|
||||
# Create another document store on top of the previous one
|
||||
ds = ElasticsearchDocumentStore(index=ds.index, label_index=ds.label_index, recreate_index=True)
|
||||
assert len(ds.get_all_documents(index=ds.index)) == 0
|
||||
assert len(ds.get_all_labels(index=ds.label_index)) == 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_eq_filter(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
|
||||
filter = {"name": {"$eq": ["name_0"]}}
|
||||
filtered_docs = ds.get_all_documents(filters=filter)
|
||||
assert len(filtered_docs) == 3
|
||||
for doc in filtered_docs:
|
||||
assert doc.meta["name"] == "name_0"
|
||||
|
||||
filter = {"numbers": {"$eq": [2, 4]}}
|
||||
filtered_docs = ds.query(query=None, filters=filter)
|
||||
assert len(filtered_docs) == 3
|
||||
for doc in filtered_docs:
|
||||
assert doc.meta["month"] == "01"
|
||||
assert doc.meta["numbers"] == [2, 4]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_custom_fields(self, ds):
|
||||
index = "haystack_test_custom"
|
||||
document_store = ElasticsearchDocumentStore(
|
||||
index=index,
|
||||
content_field="custom_text_field",
|
||||
embedding_field="custom_embedding_field",
|
||||
recreate_index=True,
|
||||
)
|
||||
doc_to_write = {"custom_text_field": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
|
||||
document_store.write_documents([doc_to_write])
|
||||
documents = document_store.get_all_documents(return_embedding=True)
|
||||
assert len(documents) == 1
|
||||
assert documents[0].content == "test"
|
||||
np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding)
|
||||
document_store.delete_index(index)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_query_with_filters_and_missing_embeddings(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
filters = {"month": {"$in": ["01", "03"]}}
|
||||
ds.skip_missing_embeddings = False
|
||||
with pytest.raises(ds._RequestError):
|
||||
ds.query_by_embedding(np.random.rand(768), filters=filters)
|
||||
|
||||
ds.skip_missing_embeddings = True
|
||||
documents = ds.query_by_embedding(np.random.rand(768), filters=filters)
|
||||
assert len(documents) == 3
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_synonyms(self, ds):
|
||||
synonyms = ["i-pod, i pod, ipod", "sea biscuit, sea biscit, seabiscuit", "foo, foo bar, baz"]
|
||||
synonym_type = "synonym_graph"
|
||||
|
||||
client = ds.client
|
||||
index = "haystack_synonym_arg"
|
||||
client.indices.delete(index=index, ignore=[404])
|
||||
ElasticsearchDocumentStore(index=index, synonyms=synonyms, synonym_type=synonym_type)
|
||||
indexed_settings = client.indices.get_settings(index=index)
|
||||
|
||||
assert synonym_type == indexed_settings[index]["settings"]["index"]["analysis"]["filter"]["synonym"]["type"]
|
||||
assert synonyms == indexed_settings[index]["settings"]["index"]["analysis"]["filter"]["synonym"]["synonyms"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_search_field_mapping(self):
|
||||
index = "haystack_search_field_mapping"
|
||||
document_store = ElasticsearchDocumentStore(
|
||||
index=index, search_fields=["content", "sub_content"], content_field="title"
|
||||
)
|
||||
|
||||
document_store.write_documents(
|
||||
[
|
||||
{
|
||||
"title": "Green tea components",
|
||||
"meta": {
|
||||
"content": "The green tea plant contains a range of healthy compounds that make it into the final drink",
|
||||
"sub_content": "Drink tip",
|
||||
},
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Green tea catechin",
|
||||
"meta": {
|
||||
"content": "Green tea contains a catechin called epigallocatechin-3-gallate (EGCG).",
|
||||
"sub_content": "Ingredients tip",
|
||||
},
|
||||
"id": "2",
|
||||
},
|
||||
{
|
||||
"title": "Minerals in Green tea",
|
||||
"meta": {
|
||||
"content": "Green tea also has small amounts of minerals that can benefit your health.",
|
||||
"sub_content": "Minerals tip",
|
||||
},
|
||||
"id": "3",
|
||||
},
|
||||
{
|
||||
"title": "Green tea Benefits",
|
||||
"meta": {
|
||||
"content": "Green tea does more than just keep you alert, it may also help boost brain function.",
|
||||
"sub_content": "Health tip",
|
||||
},
|
||||
"id": "4",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
indexed_settings = document_store.client.indices.get_mapping(index=index)
|
||||
|
||||
assert indexed_settings[index]["mappings"]["properties"]["content"]["type"] == "text"
|
||||
assert indexed_settings[index]["mappings"]["properties"]["sub_content"]["type"] == "text"
|
||||
document_store.delete_index(index)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_existing_alias(self, ds):
|
||||
client = ds.client
|
||||
client.indices.delete(index="haystack_existing_alias_1", ignore=[404])
|
||||
client.indices.delete(index="haystack_existing_alias_2", ignore=[404])
|
||||
client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404])
|
||||
|
||||
settings = {"mappings": {"properties": {"content": {"type": "text"}}}}
|
||||
|
||||
client.indices.create(index="haystack_existing_alias_1", body=settings)
|
||||
client.indices.create(index="haystack_existing_alias_2", body=settings)
|
||||
|
||||
client.indices.put_alias(
|
||||
index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias"
|
||||
)
|
||||
|
||||
# To be valid, all indices related to the alias must have content field of type text
|
||||
ElasticsearchDocumentStore(index="haystack_existing_alias", search_fields=["content"])
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_existing_alias_missing_fields(self, ds):
|
||||
|
||||
client = ds.client
|
||||
client.indices.delete(index="haystack_existing_alias_1", ignore=[404])
|
||||
client.indices.delete(index="haystack_existing_alias_2", ignore=[404])
|
||||
client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404])
|
||||
|
||||
right_settings = {"mappings": {"properties": {"content": {"type": "text"}}}}
|
||||
wrong_settings = {"mappings": {"properties": {"content": {"type": "histogram"}}}}
|
||||
|
||||
client.indices.create(index="haystack_existing_alias_1", body=right_settings)
|
||||
client.indices.create(index="haystack_existing_alias_2", body=wrong_settings)
|
||||
client.indices.put_alias(
|
||||
index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias"
|
||||
)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
# wrong field type for "content" in index "haystack_existing_alias_2"
|
||||
ElasticsearchDocumentStore(
|
||||
index="haystack_existing_alias", search_fields=["content"], content_field="title"
|
||||
)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_document_count_only_documents_without_embedding_arg(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
|
||||
assert ds.get_document_count() == 9
|
||||
assert ds.get_document_count(only_documents_without_embedding=True) == 3
|
||||
assert ds.get_document_count(only_documents_without_embedding=True, filters={"month": ["01"]}) == 0
|
||||
assert ds.get_document_count(only_documents_without_embedding=True, filters={"month": ["03"]}) == 3
|
||||
@ -1,3 +1,4 @@
|
||||
import os
|
||||
import logging
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
@ -19,15 +20,16 @@ from haystack.document_stores.opensearch import (
|
||||
from haystack.schema import Document, Label, Answer
|
||||
from haystack.errors import DocumentStoreError
|
||||
|
||||
# Being all the tests in this module, ideally we wouldn't need a marker here,
|
||||
# but this is to allow this test suite to be skipped when running (e.g.)
|
||||
# `pytest test/document_stores --document-store-type=faiss`
|
||||
class TestOpenSearchDocumentStore:
|
||||
from .test_base import DocumentStoreBaseTestAbstract
|
||||
from .test_search_engine import SearchEngineDocumentStoreTestAbstract
|
||||
|
||||
|
||||
class TestOpenSearchDocumentStore(DocumentStoreBaseTestAbstract, SearchEngineDocumentStoreTestAbstract):
|
||||
|
||||
# Constants
|
||||
|
||||
query_emb = np.random.random_sample(size=(2, 2))
|
||||
index_name = "myindex"
|
||||
index_name = __name__
|
||||
|
||||
# Fixtures
|
||||
|
||||
@ -36,11 +38,15 @@ class TestOpenSearchDocumentStore:
|
||||
"""
|
||||
This fixture provides a working document store and takes care of removing the indices when done
|
||||
"""
|
||||
index_name = __name__
|
||||
labels_index_name = f"{index_name}_labels"
|
||||
ds = OpenSearchDocumentStore(index=index_name, label_index=labels_index_name, port=9201, create_index=True)
|
||||
labels_index_name = f"{self.index_name}_labels"
|
||||
ds = OpenSearchDocumentStore(
|
||||
index=self.index_name,
|
||||
label_index=labels_index_name,
|
||||
host=os.environ.get("OPENSEARCH_HOST", "localhost"),
|
||||
create_index=True,
|
||||
)
|
||||
yield ds
|
||||
ds.delete_index(index_name)
|
||||
ds.delete_index(self.index_name)
|
||||
ds.delete_index(labels_index_name)
|
||||
|
||||
@pytest.fixture
|
||||
@ -82,35 +88,6 @@ class TestOpenSearchDocumentStore:
|
||||
"use_system_proxy": True,
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def documents(self):
|
||||
documents = []
|
||||
for i in range(3):
|
||||
documents.append(
|
||||
Document(
|
||||
content=f"A Foo Document {i}",
|
||||
meta={"name": f"name_{i}", "year": "2020", "month": "01"},
|
||||
embedding=np.random.rand(768).astype(np.float32),
|
||||
)
|
||||
)
|
||||
|
||||
documents.append(
|
||||
Document(
|
||||
content=f"A Bar Document {i}",
|
||||
meta={"name": f"name_{i}", "year": "2021", "month": "02"},
|
||||
embedding=np.random.rand(768).astype(np.float32),
|
||||
)
|
||||
)
|
||||
|
||||
documents.append(
|
||||
Document(
|
||||
content=f"Document {i} without embeddings",
|
||||
meta={"name": f"name_{i}", "no_embedding": True, "month": "03"},
|
||||
)
|
||||
)
|
||||
|
||||
return documents
|
||||
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return {
|
||||
@ -143,46 +120,15 @@ class TestOpenSearchDocumentStore:
|
||||
},
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def labels(self, documents):
|
||||
labels = []
|
||||
for i, d in enumerate(documents):
|
||||
labels.append(
|
||||
Label(
|
||||
query="query",
|
||||
document=d,
|
||||
is_correct_document=True,
|
||||
is_correct_answer=False,
|
||||
# create a mix set of labels
|
||||
origin="user-feedback" if i % 2 else "gold-label",
|
||||
answer=None if not i else Answer(f"the answer is {i}"),
|
||||
)
|
||||
)
|
||||
return labels
|
||||
|
||||
# Integration tests
|
||||
|
||||
@pytest.mark.integration
|
||||
def test___init__(self):
|
||||
OpenSearchDocumentStore(index="default_index", port=9201, create_index=True)
|
||||
OpenSearchDocumentStore(index="default_index", create_index=True)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test___init___faiss(self):
|
||||
OpenSearchDocumentStore(index="faiss_index", port=9201, create_index=True, knn_engine="faiss")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_write_documents(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
docs = ds.get_all_documents()
|
||||
assert len(docs) == len(documents)
|
||||
for i, doc in enumerate(docs):
|
||||
expected = documents[i]
|
||||
assert doc.id == expected.id
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_write_labels(self, ds, labels):
|
||||
ds.write_labels(labels)
|
||||
assert ds.get_all_labels() == labels
|
||||
OpenSearchDocumentStore(index="faiss_index", create_index=True, knn_engine="faiss")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_recreate_index(self, ds, documents, labels):
|
||||
@ -190,7 +136,7 @@ class TestOpenSearchDocumentStore:
|
||||
ds.write_labels(labels)
|
||||
|
||||
# Create another document store on top of the previous one
|
||||
ds = OpenSearchDocumentStore(index=ds.index, label_index=ds.label_index, recreate_index=True, port=9201)
|
||||
ds = OpenSearchDocumentStore(index=ds.index, label_index=ds.label_index, recreate_index=True)
|
||||
assert len(ds.get_all_documents(index=ds.index)) == 0
|
||||
assert len(ds.get_all_labels(index=ds.label_index)) == 0
|
||||
|
||||
@ -213,7 +159,7 @@ class TestOpenSearchDocumentStore:
|
||||
assert ds.embeddings_field_supports_similarity == True
|
||||
index_name = ds.index
|
||||
with caplog.at_level(logging.WARNING):
|
||||
ds = OpenSearchDocumentStore(port=9201, knn_engine="faiss", index=index_name)
|
||||
ds = OpenSearchDocumentStore(knn_engine="faiss", index=index_name)
|
||||
warning = (
|
||||
"Embedding field 'embedding' was initially created with knn_engine 'nmslib', but knn_engine was "
|
||||
"set to 'faiss' when initializing OpenSearchDocumentStore. Falling back to slow exact vector "
|
||||
|
||||
58
test/document_stores/test_search_engine.py
Normal file
58
test/document_stores/test_search_engine.py
Normal file
@ -0,0 +1,58 @@
|
||||
import pytest
|
||||
from haystack.document_stores.search_engine import SearchEngineDocumentStore, prepare_hosts
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_prepare_hosts():
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.document_store
|
||||
class SearchEngineDocumentStoreTestAbstract:
|
||||
"""
|
||||
This is the base class for any Searchengine Document Store testsuite, it doesn't have the `Test` prefix in the name
|
||||
because we want to run its methods only in subclasses.
|
||||
"""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test___do_bulk(self):
|
||||
pass
|
||||
|
||||
@pytest.mark.integration
|
||||
def test___do_scan(self):
|
||||
pass
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_query_by_embedding(self):
|
||||
pass
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_meta_values_by_key(self, ds, documents):
|
||||
ds.write_documents(documents)
|
||||
|
||||
# test without filters or query
|
||||
result = ds.get_metadata_values_by_key(key="name")
|
||||
assert result == [
|
||||
{"count": 3, "value": "name_0"},
|
||||
{"count": 3, "value": "name_1"},
|
||||
{"count": 3, "value": "name_2"},
|
||||
]
|
||||
|
||||
# test with filters but no query
|
||||
result = ds.get_metadata_values_by_key(key="year", filters={"month": ["01"]})
|
||||
assert result == [{"count": 3, "value": "2020"}]
|
||||
|
||||
# test with filters & query
|
||||
result = ds.get_metadata_values_by_key(key="year", query="Bar")
|
||||
assert result == [{"count": 3, "value": "2021"}]
|
||||
|
||||
|
||||
@pytest.mark.document_store
|
||||
class TestSearchEngineDocumentStore:
|
||||
"""
|
||||
This class tests the concrete methods in SearchEngineDocumentStore
|
||||
"""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test__split_document_list(self):
|
||||
pass
|
||||
Loading…
x
Reference in New Issue
Block a user