mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-09 22:33:47 +00:00
API tests (#2738)
* clean up tests and run earlier * use change detection * better naming, skip ES * more cleanup * fix job name * dummy commit to trigger the CI * mock away the PDF converter * make the test compatible with 3.7 * removed leftover * always run the api tests, use a matrix for the OS * refactor all the tests * remove outdated dependency * pylint * new abstract method * adjust for older python versions * rename pipeline file * address PR comments
This commit is contained in:
parent
0388284d71
commit
82df677ebf
102
.github/workflows/tests.yml
vendored
102
.github/workflows/tests.yml
vendored
@ -23,12 +23,12 @@ on:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
PYTEST_PARAMS: --maxfail=5 --durations=10 --suppress-no-test-exit-code
|
PYTEST_PARAMS: --maxfail=5 --durations=10 --suppress-no-test-exit-code
|
||||||
SUITES_EXCLUDED_FROM_WINDOWS:
|
SUITES_EXCLUDED_FROM_WINDOWS:
|
||||||
--ignore=test/pipelines/test_ray.py
|
--ignore=test/pipelines/test_ray.py
|
||||||
--ignore=test/document_stores/test_knowledge_graph.py
|
--ignore=test/document_stores/test_knowledge_graph.py
|
||||||
--ignore=test/nodes/test_audio.py
|
--ignore=test/nodes/test_audio.py
|
||||||
--ignore=test/nodes/test_connector.py
|
--ignore=test/nodes/test_connector.py
|
||||||
--ignore=test/nodes/test_summarizer_translation.py
|
--ignore=test/nodes/test_summarizer_translation.py
|
||||||
--ignore=test/nodes/test_summarizer.py
|
--ignore=test/nodes/test_summarizer.py
|
||||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
|
||||||
@ -55,11 +55,11 @@ jobs:
|
|||||||
# pip install rest_api/
|
# pip install rest_api/
|
||||||
# pip install ui/
|
# pip install ui/
|
||||||
# FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600
|
# FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600
|
||||||
# Hotfixing by installing type packages explicitly.
|
# Hotfixing by installing type packages explicitly.
|
||||||
# Run mypy --install-types haystack locally to ensure the list is still up to date
|
# Run mypy --install-types haystack locally to ensure the list is still up to date
|
||||||
# mypy --install-types --non-interactive .
|
# mypy --install-types --non-interactive .
|
||||||
pip install mypy pydantic types-Markdown types-PyYAML types-requests types-setuptools types-six types-tabulate types-chardet types-emoji types-protobuf
|
pip install mypy pydantic types-Markdown types-PyYAML types-requests types-setuptools types-six types-tabulate types-chardet types-emoji types-protobuf
|
||||||
|
|
||||||
- name: Mypy
|
- name: Mypy
|
||||||
run: |
|
run: |
|
||||||
echo "=== haystack/ ==="
|
echo "=== haystack/ ==="
|
||||||
@ -68,7 +68,7 @@ jobs:
|
|||||||
mypy rest_api --exclude=rest_api/build/ --exclude=rest_api/test/
|
mypy rest_api --exclude=rest_api/build/ --exclude=rest_api/test/
|
||||||
echo "=== ui/ ==="
|
echo "=== ui/ ==="
|
||||||
mypy ui --exclude=ui/build/ --exclude=ui/test/
|
mypy ui --exclude=ui/build/ --exclude=ui/test/
|
||||||
|
|
||||||
pylint:
|
pylint:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
@ -91,7 +91,7 @@ jobs:
|
|||||||
- pylint
|
- pylint
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false # Avoid cancelling the others if one of these fails
|
fail-fast: false # Avoid cancelling the others if one of these fails
|
||||||
matrix:
|
matrix:
|
||||||
folder:
|
folder:
|
||||||
- "nodes"
|
- "nodes"
|
||||||
- "pipelines"
|
- "pipelines"
|
||||||
@ -127,15 +127,15 @@ jobs:
|
|||||||
TOKENIZERS_PARALLELISM: 'false'
|
TOKENIZERS_PARALLELISM: 'false'
|
||||||
run: |
|
run: |
|
||||||
pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not milvus1 and not weaviate and not pinecone and not integration" test/${{ matrix.folder }} --document_store_type=memory
|
pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not milvus1 and not weaviate and not pinecone and not integration" test/${{ matrix.folder }} --document_store_type=memory
|
||||||
|
|
||||||
|
|
||||||
unit-tests-windows:
|
unit-tests-windows:
|
||||||
needs:
|
needs:
|
||||||
- mypy
|
- mypy
|
||||||
- pylint
|
- pylint
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false # Avoid cancelling the others if one of these fails
|
fail-fast: false # Avoid cancelling the others if one of these fails
|
||||||
matrix:
|
matrix:
|
||||||
folder:
|
folder:
|
||||||
- "nodes"
|
- "nodes"
|
||||||
- "pipelines"
|
- "pipelines"
|
||||||
@ -170,7 +170,7 @@ jobs:
|
|||||||
TOKENIZERS_PARALLELISM: 'false'
|
TOKENIZERS_PARALLELISM: 'false'
|
||||||
run: |
|
run: |
|
||||||
pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not milvus1 and not weaviate and not pinecone and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/${{ matrix.folder }} --document_store_type=memory
|
pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not milvus1 and not weaviate and not pinecone and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/${{ matrix.folder }} --document_store_type=memory
|
||||||
|
|
||||||
|
|
||||||
elasticsearch-tests-linux:
|
elasticsearch-tests-linux:
|
||||||
needs:
|
needs:
|
||||||
@ -233,7 +233,7 @@ jobs:
|
|||||||
TOKENIZERS_PARALLELISM: 'false'
|
TOKENIZERS_PARALLELISM: 'false'
|
||||||
run: |
|
run: |
|
||||||
pytest ${{ env.PYTEST_PARAMS }} -m "elasticsearch and not integration" test/document_stores/ ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} --document_store_type=elasticsearch
|
pytest ${{ env.PYTEST_PARAMS }} -m "elasticsearch and not integration" test/document_stores/ ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} --document_store_type=elasticsearch
|
||||||
|
|
||||||
|
|
||||||
faiss-tests-linux:
|
faiss-tests-linux:
|
||||||
needs:
|
needs:
|
||||||
@ -392,7 +392,7 @@ jobs:
|
|||||||
TOKENIZERS_PARALLELISM: 'false'
|
TOKENIZERS_PARALLELISM: 'false'
|
||||||
run: |
|
run: |
|
||||||
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=weaviate
|
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=weaviate
|
||||||
|
|
||||||
# FIXME: seems like we can't run containers on Windows
|
# FIXME: seems like we can't run containers on Windows
|
||||||
# weaviate-tests-windows:
|
# weaviate-tests-windows:
|
||||||
# needs:
|
# needs:
|
||||||
@ -491,14 +491,16 @@ jobs:
|
|||||||
# run: |
|
# run: |
|
||||||
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
|
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
|
||||||
|
|
||||||
|
rest-and-ui:
|
||||||
rest-and-ui-tests-linux:
|
|
||||||
needs:
|
needs:
|
||||||
- unit-tests-linux
|
- mypy
|
||||||
- elasticsearch-tests-linux
|
- pylint
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
strategy:
|
||||||
if: contains(github.event.pull_request.labels.*.name, 'topic:rest_api') || !github.event.pull_request.draft
|
matrix:
|
||||||
|
os: [windows-latest, ubuntu-latest]
|
||||||
|
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
@ -506,55 +508,15 @@ jobs:
|
|||||||
- name: Setup Python
|
- name: Setup Python
|
||||||
uses: ./.github/actions/python_cache/
|
uses: ./.github/actions/python_cache/
|
||||||
|
|
||||||
- name: Run Elasticsearch
|
|
||||||
run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
|
|
||||||
|
|
||||||
- name: Install pdftotext
|
|
||||||
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
|
|
||||||
|
|
||||||
- name: Install REST API and UI
|
- name: Install REST API and UI
|
||||||
run: |
|
run: |
|
||||||
pip install rest_api/
|
pip install rest_api/
|
||||||
pip install ui/
|
pip install ui/
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
env:
|
|
||||||
TOKENIZERS_PARALLELISM: 'false'
|
|
||||||
run: |
|
run: |
|
||||||
pytest ${{ env.PYTEST_PARAMS }} rest_api/ ui/
|
pytest ${{ env.PYTEST_PARAMS }} rest_api/ ui/
|
||||||
|
|
||||||
rest-and-ui-tests-windows:
|
|
||||||
needs:
|
|
||||||
- unit-tests-windows
|
|
||||||
- elasticsearch-tests-windows
|
|
||||||
runs-on: windows-latest
|
|
||||||
if: contains(github.event.pull_request.labels.*.name, 'topic:rest_api') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Setup Python
|
|
||||||
uses: ./.github/actions/python_cache/
|
|
||||||
with:
|
|
||||||
prefix: windows
|
|
||||||
|
|
||||||
- name: Set up Elasticsearch and pdftotext
|
|
||||||
run: |
|
|
||||||
choco install xpdf-utils
|
|
||||||
choco install openjdk11
|
|
||||||
refreshenv
|
|
||||||
choco install elasticsearch --version=7.9.2
|
|
||||||
refreshenv
|
|
||||||
Get-Service elasticsearch-service-x64 | Start-Service
|
|
||||||
- name: Install REST API and UI
|
|
||||||
run: |
|
|
||||||
pip install rest_api/
|
|
||||||
pip install ui/
|
|
||||||
- name: Run tests
|
|
||||||
env:
|
|
||||||
TOKENIZERS_PARALLELISM: 'false'
|
|
||||||
run: |
|
|
||||||
pytest ${{ env.PYTEST_PARAMS }} ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} rest_api/ ui/
|
|
||||||
|
|
||||||
|
|
||||||
integration-tests-linux:
|
integration-tests-linux:
|
||||||
needs:
|
needs:
|
||||||
@ -564,7 +526,7 @@ jobs:
|
|||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false # Avoid cancelling the others if one of these fails
|
fail-fast: false # Avoid cancelling the others if one of these fails
|
||||||
matrix:
|
matrix:
|
||||||
folder:
|
folder:
|
||||||
- "nodes"
|
- "nodes"
|
||||||
- "pipelines"
|
- "pipelines"
|
||||||
@ -626,7 +588,7 @@ jobs:
|
|||||||
|
|
||||||
|
|
||||||
integration-tests-windows:
|
integration-tests-windows:
|
||||||
needs:
|
needs:
|
||||||
- unit-tests-windows
|
- unit-tests-windows
|
||||||
- elasticsearch-tests-windows
|
- elasticsearch-tests-windows
|
||||||
runs-on: windows-latest
|
runs-on: windows-latest
|
||||||
@ -635,7 +597,7 @@ jobs:
|
|||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false # Avoid cancelling the others if one of these fails
|
fail-fast: false # Avoid cancelling the others if one of these fails
|
||||||
matrix:
|
matrix:
|
||||||
folder:
|
folder:
|
||||||
- "nodes"
|
- "nodes"
|
||||||
- "pipelines"
|
- "pipelines"
|
||||||
@ -669,10 +631,10 @@ jobs:
|
|||||||
# FIXME many tests are disabled here!
|
# FIXME many tests are disabled here!
|
||||||
run: |
|
run: |
|
||||||
pytest ${{ env.PYTEST_PARAMS }} -m "integration and not tika and not graphdb" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/${{ matrix.folder }} --document_store_type=memory,faiss,elasticsearch
|
pytest ${{ env.PYTEST_PARAMS }} -m "integration and not tika and not graphdb" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/${{ matrix.folder }} --document_store_type=memory,faiss,elasticsearch
|
||||||
|
|
||||||
|
|
||||||
# This CI action mirrors autoformat.yml, with the difference that it
|
# This CI action mirrors autoformat.yml, with the difference that it
|
||||||
# runs on Haystack's end. If the contributor hasn't run autoformat.yml,
|
# runs on Haystack's end. If the contributor hasn't run autoformat.yml,
|
||||||
# then this check will fail.
|
# then this check will fail.
|
||||||
bot-check:
|
bot-check:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@ -680,7 +642,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
|
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Setup Python
|
- name: Setup Python
|
||||||
uses: ./.github/actions/python_cache/
|
uses: ./.github/actions/python_cache/
|
||||||
|
|
||||||
|
|||||||
37
rest_api/test/samples/test.haystack-pipeline.yml
Normal file
37
rest_api/test/samples/test.haystack-pipeline.yml
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
version: 'unstable'
|
||||||
|
|
||||||
|
components:
|
||||||
|
- name: TestReader
|
||||||
|
type: MockReader
|
||||||
|
- name: TestRetriever
|
||||||
|
type: MockRetriever
|
||||||
|
params:
|
||||||
|
document_store: TestDocumentStore
|
||||||
|
- name: TestDocumentStore
|
||||||
|
type: MockDocumentStore
|
||||||
|
- name: TestPreprocessor
|
||||||
|
type: PreProcessor
|
||||||
|
params:
|
||||||
|
clean_whitespace: true
|
||||||
|
- name: TestPDFConverter
|
||||||
|
type: MockPDFToTextConverter
|
||||||
|
params:
|
||||||
|
remove_numeric_tables: false
|
||||||
|
|
||||||
|
|
||||||
|
pipelines:
|
||||||
|
- name: test-query
|
||||||
|
nodes:
|
||||||
|
- name: TestRetriever
|
||||||
|
inputs: [Query]
|
||||||
|
- name: TestReader
|
||||||
|
inputs: [TestRetriever]
|
||||||
|
|
||||||
|
- name: test-indexing
|
||||||
|
nodes:
|
||||||
|
- name: TestPDFConverter
|
||||||
|
inputs: [File]
|
||||||
|
- name: TestPreprocessor
|
||||||
|
inputs: [TestPDFConverter]
|
||||||
|
- name: TestDocumentStore
|
||||||
|
inputs: [TestPreprocessor]
|
||||||
@ -1,9 +1,10 @@
|
|||||||
from typing import Dict, List, Optional, Union
|
from typing import Dict, List, Optional, Union, Generator
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from copy import deepcopy
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
from unittest import mock
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
@ -11,41 +12,12 @@ from haystack import Document, Answer
|
|||||||
from haystack.nodes import BaseReader, BaseRetriever
|
from haystack.nodes import BaseReader, BaseRetriever
|
||||||
from haystack.document_stores import BaseDocumentStore
|
from haystack.document_stores import BaseDocumentStore
|
||||||
from haystack.schema import Label
|
from haystack.schema import Label
|
||||||
|
from haystack.nodes.file_converter import BaseConverter
|
||||||
|
|
||||||
from rest_api.utils import get_app, get_pipelines
|
from rest_api.utils import get_app
|
||||||
|
|
||||||
|
|
||||||
FEEDBACK = {
|
TEST_QUERY = "Who made the PDF specification?"
|
||||||
"id": "123",
|
|
||||||
"query": "Who made the PDF specification?",
|
|
||||||
"document": {
|
|
||||||
"content": "A sample PDF file\n\nHistory and standardization\nFormat (PDF) Adobe Systems made the PDF specification available free of charge in 1993. In the early years PDF was popular mainly in desktop publishing workflows, and competed with a variety of formats such as DjVu, Envoy, Common Ground Digital Paper, Farallon Replica and even Adobe's own PostScript format. PDF was a proprietary format controlled by Adobe until it was released as an open standard on July 1, 2008, and published by the International Organization for Standardization as ISO 32000-1:2008, at which time control of the specification passed to an ISO Committee of volunteer industry experts. In 2008, Adobe published a Public Patent License to ISO 32000-1 granting royalty-free rights for all patents owned by Adobe that are necessary to make, use, sell, and distribute PDF-compliant implementations. PDF 1.7, the sixth edition of the PDF specification that became ISO 32000-1, includes some proprietary technologies defined only by Adobe, such as Adobe XML Forms Architecture (XFA) and JavaScript extension for Acrobat, which are referenced by ISO 32000-1 as normative and indispensable for the full implementation of the ISO 32000-1 specification. These proprietary technologies are not standardized and their specification is published only on Adobes website. Many of them are also not supported by popular third-party implementations of PDF. Column 1",
|
|
||||||
"content_type": "text",
|
|
||||||
"score": None,
|
|
||||||
"id": "fc18c987a8312e72a47fb1524f230bb0",
|
|
||||||
"meta": {},
|
|
||||||
"embedding": None,
|
|
||||||
},
|
|
||||||
"answer": {
|
|
||||||
"answer": "Adobe Systems",
|
|
||||||
"type": "extractive",
|
|
||||||
"context": "A sample PDF file\n\nHistory and standardization\nFormat (PDF) Adobe Systems made the PDF specification available free of charge in 1993. In the early ye",
|
|
||||||
"offsets_in_context": [{"start": 60, "end": 73}],
|
|
||||||
"offsets_in_document": [{"start": 60, "end": 73}],
|
|
||||||
"document_id": "fc18c987a8312e72a47fb1524f230bb0",
|
|
||||||
"meta": {},
|
|
||||||
"score": None,
|
|
||||||
},
|
|
||||||
"is_correct_answer": True,
|
|
||||||
"is_correct_document": True,
|
|
||||||
"origin": "user-feedback",
|
|
||||||
"pipeline_id": "some-123",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def exclude_no_answer(responses):
|
|
||||||
responses["answers"] = [response for response in responses["answers"] if response.get("answer", None)]
|
|
||||||
return responses
|
|
||||||
|
|
||||||
|
|
||||||
class MockReader(BaseReader):
|
class MockReader(BaseReader):
|
||||||
@ -97,83 +69,23 @@ class MockRetriever(BaseRetriever):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
class MockPDFToTextConverter(BaseConverter):
|
||||||
def yaml_pipeline_path(tmp_path_factory):
|
mocker = MagicMock()
|
||||||
root_temp = tmp_path_factory.mktemp("tests")
|
|
||||||
pipeline_path = root_temp / "test.haystack-pipeline.yml"
|
|
||||||
with open(pipeline_path, "w") as pipeline_file:
|
|
||||||
pipeline_file.write(
|
|
||||||
f"""
|
|
||||||
version: 'unstable'
|
|
||||||
|
|
||||||
components:
|
def convert(self, *args, **kwargs):
|
||||||
- name: TestReader
|
self.mocker.convert(*args, **kwargs)
|
||||||
type: MockReader
|
return []
|
||||||
- name: TestRetriever
|
|
||||||
type: MockRetriever
|
|
||||||
params:
|
|
||||||
document_store: TestDocumentStore
|
|
||||||
- name: TestDocumentStore
|
|
||||||
type: SQLDocumentStore
|
|
||||||
params:
|
|
||||||
url: sqlite:///{root_temp.absolute()}/test_docstore.db
|
|
||||||
- name: TestPreprocessor
|
|
||||||
type: PreProcessor
|
|
||||||
params:
|
|
||||||
clean_whitespace: true
|
|
||||||
- name: TestPDFConverter
|
|
||||||
type: PDFToTextConverter
|
|
||||||
params:
|
|
||||||
remove_numeric_tables: false
|
|
||||||
|
|
||||||
|
|
||||||
pipelines:
|
class MockDocumentStore(BaseDocumentStore):
|
||||||
- name: test-query
|
mocker = MagicMock()
|
||||||
nodes:
|
|
||||||
- name: TestRetriever
|
|
||||||
inputs: [Query]
|
|
||||||
- name: TestReader
|
|
||||||
inputs: [TestRetriever]
|
|
||||||
|
|
||||||
- name: test-indexing
|
def write_documents(self, *args, **kwargs):
|
||||||
nodes:
|
pass
|
||||||
- name: TestPDFConverter
|
|
||||||
inputs: [File]
|
|
||||||
- name: TestPreprocessor
|
|
||||||
inputs: [TestPDFConverter]
|
|
||||||
- name: TestDocumentStore
|
|
||||||
inputs: [TestPreprocessor]
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
return pipeline_path
|
|
||||||
|
|
||||||
|
def get_all_documents(self, *args, **kwargs) -> List[Document]:
|
||||||
@pytest.fixture
|
self.mocker.get_all_documents(*args, **kwargs)
|
||||||
def client(yaml_pipeline_path):
|
return [
|
||||||
os.environ["PIPELINE_YAML_PATH"] = str(yaml_pipeline_path)
|
|
||||||
os.environ["INDEXING_PIPELINE_NAME"] = "test-indexing"
|
|
||||||
os.environ["QUERY_PIPELINE_NAME"] = "test-query"
|
|
||||||
|
|
||||||
app = get_app()
|
|
||||||
client = TestClient(app)
|
|
||||||
|
|
||||||
pipelines = get_pipelines()
|
|
||||||
document_store: BaseDocumentStore = pipelines["document_store"]
|
|
||||||
document_store.delete_documents()
|
|
||||||
document_store.delete_labels()
|
|
||||||
|
|
||||||
yield client
|
|
||||||
|
|
||||||
document_store.delete_documents()
|
|
||||||
document_store.delete_labels()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def populated_client(client: TestClient):
|
|
||||||
pipelines = get_pipelines()
|
|
||||||
document_store: BaseDocumentStore = pipelines["document_store"]
|
|
||||||
document_store.write_documents(
|
|
||||||
[
|
|
||||||
Document(
|
Document(
|
||||||
content=dedent(
|
content=dedent(
|
||||||
"""\
|
"""\
|
||||||
@ -208,217 +120,301 @@ def populated_client(client: TestClient):
|
|||||||
meta={"name": "test.txt", "test_key": "test_value", "test_index": "2"},
|
meta={"name": "test.txt", "test_key": "test_value", "test_index": "2"},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
|
||||||
yield client
|
def get_all_documents_generator(self, *args, **kwargs) -> Generator[Document, None, None]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_all_labels(self, *args, **kwargs) -> List[Label]:
|
||||||
|
self.mocker.get_all_labels(*args, **kwargs)
|
||||||
|
|
||||||
|
def get_document_by_id(self, *args, **kwargs) -> Optional[Document]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_document_count(self, *args, **kwargs) -> int:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def query_by_embedding(self, *args, **kwargs) -> List[Document]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_label_count(self, *args, **kwargs) -> int:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def write_labels(self, *args, **kwargs):
|
||||||
|
self.mocker.write_labels(*args, **kwargs)
|
||||||
|
|
||||||
|
def delete_documents(self, *args, **kwargs):
|
||||||
|
self.mocker.delete_documents(*args, **kwargs)
|
||||||
|
|
||||||
|
def delete_labels(self, *args, **kwargs):
|
||||||
|
self.mocker.delete_labels(*args, **kwargs)
|
||||||
|
|
||||||
|
def delete_index(self, index: str):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _create_document_field_map(self) -> Dict:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_documents_by_id(self, *args, **kwargs) -> List[Document]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def update_document_meta(self, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function")
|
||||||
|
def feedback():
|
||||||
|
"""
|
||||||
|
Some test functions change the content of the `feedback` dictionary, let's keep
|
||||||
|
the default "function" scope so we don't need to deepcopy the dict each time
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"id": "123",
|
||||||
|
"query": "Who made the PDF specification?",
|
||||||
|
"document": {
|
||||||
|
"content": "A sample PDF file\n\nHistory and standardization\nFormat (PDF) Adobe Systems made the PDF specification available free of charge in 1993. In the early years PDF was popular mainly in desktop publishing workflows, and competed with a variety of formats such as DjVu, Envoy, Common Ground Digital Paper, Farallon Replica and even Adobe's own PostScript format. PDF was a proprietary format controlled by Adobe until it was released as an open standard on July 1, 2008, and published by the International Organization for Standardization as ISO 32000-1:2008, at which time control of the specification passed to an ISO Committee of volunteer industry experts. In 2008, Adobe published a Public Patent License to ISO 32000-1 granting royalty-free rights for all patents owned by Adobe that are necessary to make, use, sell, and distribute PDF-compliant implementations. PDF 1.7, the sixth edition of the PDF specification that became ISO 32000-1, includes some proprietary technologies defined only by Adobe, such as Adobe XML Forms Architecture (XFA) and JavaScript extension for Acrobat, which are referenced by ISO 32000-1 as normative and indispensable for the full implementation of the ISO 32000-1 specification. These proprietary technologies are not standardized and their specification is published only on Adobes website. Many of them are also not supported by popular third-party implementations of PDF. Column 1",
|
||||||
|
"content_type": "text",
|
||||||
|
"score": None,
|
||||||
|
"id": "fc18c987a8312e72a47fb1524f230bb0",
|
||||||
|
"meta": {},
|
||||||
|
"embedding": None,
|
||||||
|
},
|
||||||
|
"answer": {
|
||||||
|
"answer": "Adobe Systems",
|
||||||
|
"type": "extractive",
|
||||||
|
"context": "A sample PDF file\n\nHistory and standardization\nFormat (PDF) Adobe Systems made the PDF specification available free of charge in 1993. In the early ye",
|
||||||
|
"offsets_in_context": [{"start": 60, "end": 73}],
|
||||||
|
"offsets_in_document": [{"start": 60, "end": 73}],
|
||||||
|
"document_id": "fc18c987a8312e72a47fb1524f230bb0",
|
||||||
|
"meta": {},
|
||||||
|
"score": None,
|
||||||
|
},
|
||||||
|
"is_correct_answer": True,
|
||||||
|
"is_correct_document": True,
|
||||||
|
"origin": "user-feedback",
|
||||||
|
"pipeline_id": "some-123",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def populated_client_with_feedback(populated_client: TestClient):
|
def client():
|
||||||
pipelines = get_pipelines()
|
yaml_pipeline_path = Path(__file__).parent.resolve() / "samples" / "test.haystack-pipeline.yml"
|
||||||
document_store: BaseDocumentStore = pipelines["document_store"]
|
os.environ["PIPELINE_YAML_PATH"] = str(yaml_pipeline_path)
|
||||||
document_store.write_labels([FEEDBACK])
|
os.environ["INDEXING_PIPELINE_NAME"] = "test-indexing"
|
||||||
yield populated_client
|
os.environ["QUERY_PIPELINE_NAME"] = "test-query"
|
||||||
|
|
||||||
|
app = get_app()
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
MockDocumentStore.mocker.reset_mock()
|
||||||
|
MockPDFToTextConverter.mocker.reset_mock()
|
||||||
|
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
def test_get_all_documents(client):
|
||||||
def api_document_store():
|
response = client.post(url="/documents/get_by_filters", data='{"filters": {}}')
|
||||||
pipelines = get_pipelines()
|
|
||||||
yield pipelines["document_store"]
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_all_documents(populated_client: TestClient):
|
|
||||||
response = populated_client.post(url="/documents/get_by_filters", data='{"filters": {}}')
|
|
||||||
assert 200 == response.status_code
|
assert 200 == response.status_code
|
||||||
|
# Ensure `get_all_documents` was called with the expected `filters` param
|
||||||
|
MockDocumentStore.mocker.get_all_documents.assert_called_with(filters={})
|
||||||
|
# Ensure results are part of the response body
|
||||||
response_json = response.json()
|
response_json = response.json()
|
||||||
|
|
||||||
assert len(response_json) == 2
|
assert len(response_json) == 2
|
||||||
|
|
||||||
|
|
||||||
def test_get_documents_with_filters(populated_client: TestClient):
|
def test_get_documents_with_filters(client):
|
||||||
response = populated_client.post(url="/documents/get_by_filters", data='{"filters": {"test_index": ["2"]}}')
|
response = client.post(url="/documents/get_by_filters", data='{"filters": {"test_index": ["2"]}}')
|
||||||
assert 200 == response.status_code
|
assert 200 == response.status_code
|
||||||
response_json = response.json()
|
# Ensure `get_all_documents` was called with the expected `filters` param
|
||||||
|
MockDocumentStore.mocker.get_all_documents.assert_called_with(filters={"test_index": ["2"]})
|
||||||
assert len(response_json) == 1
|
|
||||||
assert response_json[0]["meta"]["test_index"] == "2"
|
|
||||||
|
|
||||||
|
|
||||||
def test_delete_all_documents(populated_client: TestClient, api_document_store: BaseDocumentStore):
|
def test_delete_all_documents(client):
|
||||||
response = populated_client.post(url="/documents/delete_by_filters", data='{"filters": {}}')
|
response = client.post(url="/documents/delete_by_filters", data='{"filters": {}}')
|
||||||
assert 200 == response.status_code
|
assert 200 == response.status_code
|
||||||
|
# Ensure `delete_documents` was called on the Document Store instance
|
||||||
remaining_docs = api_document_store.get_all_documents()
|
MockDocumentStore.mocker.delete_documents.assert_called_with(filters={})
|
||||||
assert len(remaining_docs) == 0
|
|
||||||
|
|
||||||
|
|
||||||
def test_delete_documents_with_filters(populated_client: TestClient, api_document_store: BaseDocumentStore):
|
def test_delete_documents_with_filters(client):
|
||||||
response = populated_client.post(url="/documents/delete_by_filters", data='{"filters": {"test_index": ["1"]}}')
|
response = client.post(url="/documents/delete_by_filters", data='{"filters": {"test_index": ["1"]}}')
|
||||||
assert 200 == response.status_code
|
assert 200 == response.status_code
|
||||||
|
# Ensure `delete_documents` was called on the Document Store instance with the same params
|
||||||
remaining_docs = api_document_store.get_all_documents()
|
MockDocumentStore.mocker.delete_documents.assert_called_with(filters={"test_index": ["1"]})
|
||||||
assert len(remaining_docs) == 1
|
|
||||||
assert remaining_docs[0].meta["test_index"] == "2"
|
|
||||||
|
|
||||||
|
|
||||||
def test_file_upload(client: TestClient, api_document_store: BaseDocumentStore):
|
def test_file_upload(client):
|
||||||
file_to_upload = {"files": (Path(__file__).parent / "samples" / "pdf" / "sample_pdf_1.pdf").open("rb")}
|
file_to_upload = {"files": (Path(__file__).parent / "samples" / "pdf" / "sample_pdf_1.pdf").open("rb")}
|
||||||
response = client.post(url="/file-upload", files=file_to_upload, data={"meta": '{"test_key": "test_value"}'})
|
response = client.post(url="/file-upload", files=file_to_upload, data={"meta": '{"test_key": "test_value"}'})
|
||||||
assert 200 == response.status_code
|
assert 200 == response.status_code
|
||||||
|
# Ensure the `convert` method was called with the right keyword params
|
||||||
documents = api_document_store.get_all_documents()
|
_, kwargs = MockPDFToTextConverter.mocker.convert.call_args
|
||||||
assert len(documents) > 0
|
# Files are renamed with random prefix like 83f4c1f5b2bd43f2af35923b9408076b_sample_pdf_1.pdf
|
||||||
for doc in documents:
|
# so we just ensure the original file name is contained in the converted file name
|
||||||
assert doc.meta["name"] == "sample_pdf_1.pdf"
|
assert "sample_pdf_1.pdf" in str(kwargs["file_path"])
|
||||||
assert doc.meta["test_key"] == "test_value"
|
assert kwargs["meta"]["test_key"] == "test_value"
|
||||||
|
|
||||||
|
|
||||||
def test_file_upload_with_no_meta(client: TestClient, api_document_store: BaseDocumentStore):
|
def test_file_upload_with_no_meta(client):
|
||||||
|
file_to_upload = {"files": (Path(__file__).parent / "samples" / "pdf" / "sample_pdf_1.pdf").open("rb")}
|
||||||
|
response = client.post(url="/file-upload", files=file_to_upload, data={})
|
||||||
|
assert 200 == response.status_code
|
||||||
|
# Ensure the `convert` method was called with the right keyword params
|
||||||
|
_, kwargs = MockPDFToTextConverter.mocker.convert.call_args
|
||||||
|
assert kwargs["meta"] == {"name": "sample_pdf_1.pdf"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_upload_with_empty_meta(client):
|
||||||
file_to_upload = {"files": (Path(__file__).parent / "samples" / "pdf" / "sample_pdf_1.pdf").open("rb")}
|
file_to_upload = {"files": (Path(__file__).parent / "samples" / "pdf" / "sample_pdf_1.pdf").open("rb")}
|
||||||
response = client.post(url="/file-upload", files=file_to_upload, data={"meta": ""})
|
response = client.post(url="/file-upload", files=file_to_upload, data={"meta": ""})
|
||||||
assert 200 == response.status_code
|
assert 200 == response.status_code
|
||||||
|
# Ensure the `convert` method was called with the right keyword params
|
||||||
documents = api_document_store.get_all_documents()
|
_, kwargs = MockPDFToTextConverter.mocker.convert.call_args
|
||||||
assert len(documents) > 0
|
assert kwargs["meta"] == {"name": "sample_pdf_1.pdf"}
|
||||||
for doc in documents:
|
|
||||||
assert doc.meta["name"] == "sample_pdf_1.pdf"
|
|
||||||
|
|
||||||
|
|
||||||
def test_file_upload_with_wrong_meta(client: TestClient, api_document_store: BaseDocumentStore):
|
def test_file_upload_with_wrong_meta(client):
|
||||||
file_to_upload = {"files": (Path(__file__).parent / "samples" / "pdf" / "sample_pdf_1.pdf").open("rb")}
|
file_to_upload = {"files": (Path(__file__).parent / "samples" / "pdf" / "sample_pdf_1.pdf").open("rb")}
|
||||||
response = client.post(url="/file-upload", files=file_to_upload, data={"meta": "1"})
|
response = client.post(url="/file-upload", files=file_to_upload, data={"meta": "1"})
|
||||||
assert 500 == response.status_code
|
assert 500 == response.status_code
|
||||||
assert api_document_store.get_document_count() == 0
|
# Ensure the `convert` method was never called
|
||||||
|
MockPDFToTextConverter.mocker.convert.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
def test_query_with_no_filter(populated_client: TestClient):
|
def test_query_with_no_filter(client):
|
||||||
query_with_no_filter_value = {"query": "Who made the PDF specification?"}
|
with mock.patch("rest_api.controller.search.query_pipeline") as mocked_pipeline:
|
||||||
response = populated_client.post(url="/query", json=query_with_no_filter_value)
|
# `run` must return a dictionary containing a `query` key
|
||||||
|
mocked_pipeline.run.return_value = {"query": TEST_QUERY}
|
||||||
|
response = client.post(url="/query", json={"query": TEST_QUERY})
|
||||||
|
assert 200 == response.status_code
|
||||||
|
# Ensure `run` was called with the expected parameters
|
||||||
|
mocked_pipeline.run.assert_called_with(query=TEST_QUERY, params={}, debug=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_with_one_filter(client):
|
||||||
|
params = {"TestRetriever": {"filters": {"test_key": ["test_value"]}}}
|
||||||
|
with mock.patch("rest_api.controller.search.query_pipeline") as mocked_pipeline:
|
||||||
|
# `run` must return a dictionary containing a `query` key
|
||||||
|
mocked_pipeline.run.return_value = {"query": TEST_QUERY}
|
||||||
|
response = client.post(url="/query", json={"query": TEST_QUERY, "params": params})
|
||||||
|
assert 200 == response.status_code
|
||||||
|
# Ensure `run` was called with the expected parameters
|
||||||
|
mocked_pipeline.run.assert_called_with(query=TEST_QUERY, params=params, debug=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_with_one_global_filter(client):
|
||||||
|
params = {"filters": {"test_key": ["test_value"]}}
|
||||||
|
with mock.patch("rest_api.controller.search.query_pipeline") as mocked_pipeline:
|
||||||
|
# `run` must return a dictionary containing a `query` key
|
||||||
|
mocked_pipeline.run.return_value = {"query": TEST_QUERY}
|
||||||
|
response = client.post(url="/query", json={"query": TEST_QUERY, "params": params})
|
||||||
|
assert 200 == response.status_code
|
||||||
|
# Ensure `run` was called with the expected parameters
|
||||||
|
mocked_pipeline.run.assert_called_with(query=TEST_QUERY, params=params, debug=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_with_filter_list(client):
|
||||||
|
params = {"TestRetriever": {"filters": {"test_key": ["test_value", "another_value"]}}}
|
||||||
|
with mock.patch("rest_api.controller.search.query_pipeline") as mocked_pipeline:
|
||||||
|
# `run` must return a dictionary containing a `query` key
|
||||||
|
mocked_pipeline.run.return_value = {"query": TEST_QUERY}
|
||||||
|
response = client.post(url="/query", json={"query": TEST_QUERY, "params": params})
|
||||||
|
assert 200 == response.status_code
|
||||||
|
# Ensure `run` was called with the expected parameters
|
||||||
|
mocked_pipeline.run.assert_called_with(query=TEST_QUERY, params=params, debug=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_with_deprecated_filter_format(client):
|
||||||
|
request_params = {"TestRetriever": {"filters": {"test_key": "i_should_be_a_list"}}}
|
||||||
|
expected_params = {"TestRetriever": {"filters": {"test_key": ["i_should_be_a_list"]}}}
|
||||||
|
with mock.patch("rest_api.controller.search.query_pipeline") as mocked_pipeline:
|
||||||
|
# `run` must return a dictionary containing a `query` key
|
||||||
|
mocked_pipeline.run.return_value = {"query": TEST_QUERY}
|
||||||
|
response = client.post(url="/query", json={"query": TEST_QUERY, "params": request_params})
|
||||||
|
assert 200 == response.status_code
|
||||||
|
# Ensure `run` was called with the expected parameters. In this case,
|
||||||
|
# `_format_filters` will fix the `filters` format within the params
|
||||||
|
mocked_pipeline.run.assert_called_with(query=TEST_QUERY, params=expected_params, debug=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_with_no_documents_and_no_answers(client):
|
||||||
|
with mock.patch("rest_api.controller.search.query_pipeline") as mocked_pipeline:
|
||||||
|
# `run` must return a dictionary containing a `query` key
|
||||||
|
mocked_pipeline.run.return_value = {"query": TEST_QUERY}
|
||||||
|
response = client.post(url="/query", json={"query": TEST_QUERY})
|
||||||
|
assert 200 == response.status_code
|
||||||
|
response_json = response.json()
|
||||||
|
assert response_json["documents"] == []
|
||||||
|
assert response_json["answers"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_feedback(client, feedback):
|
||||||
|
response = client.post(url="/feedback", json=feedback)
|
||||||
assert 200 == response.status_code
|
assert 200 == response.status_code
|
||||||
response_json = response.json()
|
# Ensure `write_labels` was called on the Document Store instance passing a list
|
||||||
response_json = exclude_no_answer(response_json)
|
# containing only one label
|
||||||
assert response_json["answers"][0]["answer"] == "Adobe Systems"
|
args, _ = MockDocumentStore.mocker.write_labels.call_args
|
||||||
|
labels = args[0]
|
||||||
|
assert len(labels) == 1
|
||||||
|
# Ensure all the items that were in `feedback` are also part of
|
||||||
|
# the stored label (which has several more keys)
|
||||||
|
label = labels[0].to_dict()
|
||||||
|
for k, v in feedback.items():
|
||||||
|
assert label[k] == v
|
||||||
|
|
||||||
|
|
||||||
def test_query_with_one_filter(populated_client: TestClient):
|
def test_write_feedback_without_id(client, feedback):
|
||||||
query_with_filter = {
|
|
||||||
"query": "Who made the PDF specification?",
|
|
||||||
"params": {"TestRetriever": {"filters": {"test_key": ["test_value"]}}},
|
|
||||||
}
|
|
||||||
response = populated_client.post(url="/query", json=query_with_filter)
|
|
||||||
assert 200 == response.status_code
|
|
||||||
response_json = response.json()
|
|
||||||
response_json = exclude_no_answer(response_json)
|
|
||||||
assert response_json["answers"][0]["answer"] == "Adobe Systems"
|
|
||||||
|
|
||||||
|
|
||||||
def test_query_with_one_global_filter(populated_client: TestClient):
|
|
||||||
query_with_filter = {
|
|
||||||
"query": "Who made the PDF specification?",
|
|
||||||
"params": {"filters": {"test_key": ["test_value"]}},
|
|
||||||
}
|
|
||||||
response = populated_client.post(url="/query", json=query_with_filter)
|
|
||||||
assert 200 == response.status_code
|
|
||||||
response_json = response.json()
|
|
||||||
response_json = exclude_no_answer(response_json)
|
|
||||||
assert response_json["answers"][0]["answer"] == "Adobe Systems"
|
|
||||||
|
|
||||||
|
|
||||||
def test_query_with_filter_list(populated_client: TestClient):
|
|
||||||
query_with_filter_list = {
|
|
||||||
"query": "Who made the PDF specification?",
|
|
||||||
"params": {"TestRetriever": {"filters": {"test_key": ["test_value", "another_value"]}}},
|
|
||||||
}
|
|
||||||
response = populated_client.post(url="/query", json=query_with_filter_list)
|
|
||||||
assert 200 == response.status_code
|
|
||||||
response_json = response.json()
|
|
||||||
response_json = exclude_no_answer(response_json)
|
|
||||||
assert response_json["answers"][0]["answer"] == "Adobe Systems"
|
|
||||||
|
|
||||||
|
|
||||||
def test_query_with_invalid_filter(populated_client: TestClient):
|
|
||||||
query_with_invalid_filter = {
|
|
||||||
"query": "Who made the PDF specification?",
|
|
||||||
"params": {"TestRetriever": {"filters": {"test_key": "invalid_value"}}},
|
|
||||||
}
|
|
||||||
response = populated_client.post(url="/query", json=query_with_invalid_filter)
|
|
||||||
assert 200 == response.status_code
|
|
||||||
response_json = response.json()
|
|
||||||
response_json = exclude_no_answer(response_json)
|
|
||||||
assert len(response_json["answers"]) == 0
|
|
||||||
|
|
||||||
|
|
||||||
def test_query_with_no_documents_and_no_answers(client: TestClient):
|
|
||||||
query = {"query": "Who made the PDF specification?"}
|
|
||||||
response = client.post(url="/query", json=query)
|
|
||||||
assert 200 == response.status_code
|
|
||||||
response_json = response.json()
|
|
||||||
assert response_json["documents"] == []
|
|
||||||
assert response_json["answers"] == []
|
|
||||||
|
|
||||||
|
|
||||||
def test_write_feedback(populated_client: TestClient, api_document_store: BaseDocumentStore):
|
|
||||||
response = populated_client.post(url="/feedback", json=FEEDBACK)
|
|
||||||
assert 200 == response.status_code
|
|
||||||
assert api_document_store.get_label_count() == 1
|
|
||||||
|
|
||||||
label: Label = api_document_store.get_all_labels()[0]
|
|
||||||
label_values = label.to_dict()
|
|
||||||
for actual_item, expected_item in [(label_values[key], value) for key, value in FEEDBACK.items()]:
|
|
||||||
assert actual_item == expected_item
|
|
||||||
|
|
||||||
|
|
||||||
def test_write_feedback_without_id(populated_client: TestClient, api_document_store: BaseDocumentStore):
|
|
||||||
feedback = deepcopy(FEEDBACK)
|
|
||||||
del feedback["id"]
|
del feedback["id"]
|
||||||
response = populated_client.post(url="/feedback", json=feedback)
|
response = client.post(url="/feedback", json=feedback)
|
||||||
assert 200 == response.status_code
|
assert 200 == response.status_code
|
||||||
assert api_document_store.get_label_count() == 1
|
# Ensure `write_labels` was called on the Document Store instance passing a list
|
||||||
|
# containing only one label
|
||||||
label: Label = api_document_store.get_all_labels()[0]
|
args, _ = MockDocumentStore.mocker.write_labels.call_args
|
||||||
label_values = label.to_dict()
|
labels = args[0]
|
||||||
for actual_item, expected_item in [(label_values[key], value) for key, value in FEEDBACK.items() if key != "id"]:
|
assert len(labels) == 1
|
||||||
assert actual_item == expected_item
|
# Ensure the `id` was automatically set before storing the label
|
||||||
|
label = labels[0].to_dict()
|
||||||
|
assert label["id"]
|
||||||
|
|
||||||
|
|
||||||
def test_get_feedback(populated_client_with_feedback: TestClient):
|
def test_get_feedback(client):
|
||||||
response = populated_client_with_feedback.get(url="/feedback")
|
response = client.get("/feedback")
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
json_response = response.json()
|
MockDocumentStore.mocker.get_all_labels.assert_called_once()
|
||||||
for response_item, expected_item in [(json_response[0][key], value) for key, value in FEEDBACK.items()]:
|
|
||||||
assert response_item == expected_item
|
|
||||||
|
|
||||||
|
|
||||||
def test_delete_feedback(populated_client_with_feedback: TestClient, api_document_store: BaseDocumentStore):
|
def test_delete_feedback(client, monkeypatch, feedback):
|
||||||
response = populated_client_with_feedback.delete(url="/feedback")
|
# This label contains `origin=user-feedback` and should be deleted
|
||||||
|
label_to_delete = Label.from_dict(feedback)
|
||||||
|
# This other label has a different origin and should NOT be deleted
|
||||||
|
label_to_keep = Label.from_dict(feedback)
|
||||||
|
label_to_keep.id = "42"
|
||||||
|
label_to_keep.origin = "not-from-api"
|
||||||
|
|
||||||
|
# Patch the Document Store so it returns the 2 labels above
|
||||||
|
def get_all_labels(*args, **kwargs):
|
||||||
|
return [label_to_delete, label_to_keep]
|
||||||
|
|
||||||
|
monkeypatch.setattr(MockDocumentStore, "get_all_labels", get_all_labels)
|
||||||
|
|
||||||
|
# Call the API and ensure `delete_labels` was called only on the label with id=123
|
||||||
|
response = client.delete(url="/feedback")
|
||||||
assert 200 == response.status_code
|
assert 200 == response.status_code
|
||||||
assert api_document_store.get_label_count() == 0
|
MockDocumentStore.mocker.delete_labels.assert_called_with(ids=["123"])
|
||||||
|
|
||||||
|
|
||||||
def test_do_not_delete_gold_labels(populated_client_with_feedback: TestClient, api_document_store: BaseDocumentStore):
|
def test_export_feedback(client, monkeypatch, feedback):
|
||||||
feedback = deepcopy(FEEDBACK)
|
def get_all_labels(*args, **kwargs):
|
||||||
feedback["id"] = "456"
|
return [Label.from_dict(feedback)]
|
||||||
feedback["origin"] = "gold-label"
|
|
||||||
api_document_store.write_labels([feedback])
|
|
||||||
|
|
||||||
response = populated_client_with_feedback.delete(url="/feedback")
|
monkeypatch.setattr(MockDocumentStore, "get_all_labels", get_all_labels)
|
||||||
assert 200 == response.status_code
|
|
||||||
|
|
||||||
assert api_document_store.get_label_count() == 1
|
|
||||||
|
|
||||||
label: Label = api_document_store.get_all_labels()[0]
|
|
||||||
label_values = label.to_dict()
|
|
||||||
for actual_item, expected_item in [(label_values[key], value) for key, value in feedback.items()]:
|
|
||||||
assert actual_item == expected_item
|
|
||||||
|
|
||||||
|
|
||||||
def test_export_feedback(populated_client_with_feedback: TestClient):
|
|
||||||
feedback_urls = [
|
feedback_urls = [
|
||||||
"/export-feedback?full_document_context=true",
|
"/export-feedback?full_document_context=true",
|
||||||
"/export-feedback?full_document_context=false&context_size=50",
|
"/export-feedback?full_document_context=false&context_size=50",
|
||||||
"/export-feedback?full_document_context=false&context_size=50000",
|
"/export-feedback?full_document_context=false&context_size=50000",
|
||||||
]
|
]
|
||||||
for url in feedback_urls:
|
for url in feedback_urls:
|
||||||
response = populated_client_with_feedback.get(url=url, json=FEEDBACK)
|
response = client.get(url)
|
||||||
response_json = response.json()
|
response_json = response.json()
|
||||||
context = response_json["data"][0]["paragraphs"][0]["context"]
|
context = response_json["data"][0]["paragraphs"][0]["context"]
|
||||||
answer_start = response_json["data"][0]["paragraphs"][0]["qas"][0]["answers"][0]["answer_start"]
|
answer_start = response_json["data"][0]["paragraphs"][0]["qas"][0]["answers"][0]["answer_start"]
|
||||||
@ -426,8 +422,7 @@ def test_export_feedback(populated_client_with_feedback: TestClient):
|
|||||||
assert context[answer_start : answer_start + len(answer)] == answer
|
assert context[answer_start : answer_start + len(answer)] == answer
|
||||||
|
|
||||||
|
|
||||||
def test_get_feedback_malformed_query(populated_client_with_feedback: TestClient):
|
def test_get_feedback_malformed_query(client, feedback):
|
||||||
feedback = deepcopy(FEEDBACK)
|
|
||||||
feedback["unexpected_field"] = "misplaced-value"
|
feedback["unexpected_field"] = "misplaced-value"
|
||||||
response = populated_client_with_feedback.post(url="/feedback", json=feedback)
|
response = client.post(url="/feedback", json=feedback)
|
||||||
assert response.status_code == 422
|
assert response.status_code == 422
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user