Mocking Pinecone tests (#2778)

* Integrating the mock into conftest.py

* re-enable workflow

* delete_all

* Update Documentation & Code Style

* remove ValueError

* Add empty response

* wrong condition

* return response

* revert removal of delete_all

* change mock

* Update Documentation & Code Style

* test for rest api, to revert

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Sara Zan 2022-07-14 19:03:33 +01:00 committed by GitHub
parent e6d8bcdf9b
commit 6b39fbd39c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 109 additions and 66 deletions

View File

@ -428,68 +428,64 @@ jobs:
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate # pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate
# FIXME: This block should be uncommented as soon as Pinecone tests are fixed pinecone-tests-linux:
# as part of the fixes discussed in #2644. needs:
# Check locally for these tests to pass before uncommenting. - mypy
# - pylint
# pinecone-tests-linux: runs-on: ubuntu-latest
# needs: if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') || !github.event.pull_request.draft
# - mypy
# - pylint
# runs-on: ubuntu-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') || !github.event.pull_request.draft
# steps: steps:
# - uses: actions/checkout@v2 - uses: actions/checkout@v2
# - name: Setup Python - name: Setup Python
# uses: ./.github/actions/python_cache/ uses: ./.github/actions/python_cache/
# # TODO Let's try to remove this one from the unit tests # TODO Let's try to remove this one from the unit tests
# - name: Install pdftotext - name: Install pdftotext
# run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
# - name: Install Haystack - name: Install Haystack
# run: pip install .[pinecone] run: pip install .[pinecone]
# - name: Run tests - name: Run tests
# env: env:
# PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
# TOKENIZERS_PARALLELISM: 'false' TOKENIZERS_PARALLELISM: 'false'
# run: | run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone
# FIXME very slow and very little platform dependency, so to evaluate
# pinecone-tests-windows:
# needs:
# - mypy
# - pylint
# runs-on: windows-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
# steps: pinecone-tests-windows:
# - uses: actions/checkout@v2 needs:
- mypy
- pylint
runs-on: windows-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
# - name: Setup Python steps:
# uses: ./.github/actions/python_cache/ - uses: actions/checkout@v2
# with:
# prefix: windows
# - name: Install pdftotext - name: Setup Python
# run: | uses: ./.github/actions/python_cache/
# choco install xpdf-utils with:
# choco install openjdk11 prefix: windows
# refreshenv
# - name: Install Haystack - name: Install pdftotext
# run: pip install .[pinecone] run: |
choco install xpdf-utils
choco install openjdk11
refreshenv
# - name: Run tests - name: Install Haystack
# env: run: pip install .[pinecone]
# TOKENIZERS_PARALLELISM: 'false'
# PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} - name: Run tests
# run: | env:
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone TOKENIZERS_PARALLELISM: 'false'
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
rest-and-ui: rest-and-ui:
needs: needs:
@ -510,9 +506,9 @@ jobs:
- name: Install REST API and UI - name: Install REST API and UI
run: | run: |
pip install rest_api/ pip install -U rest_api/
pip install ui/ pip install -U ui/
- name: Run tests - name: Run tests
run: | run: |
pytest ${{ env.PYTEST_PARAMS }} rest_api/ ui/ pytest ${{ env.PYTEST_PARAMS }} rest_api/ ui/

View File

@ -72,6 +72,8 @@ from haystack.modeling.infer import Inferencer, QAInferencer
from haystack.schema import Document from haystack.schema import Document
from .mocks import pinecone as pinecone_mock
# To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below # To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below
SQL_TYPE = "sqlite" SQL_TYPE = "sqlite"
@ -159,9 +161,9 @@ def pytest_collection_modifyitems(config, items):
item.add_marker(skip_milvus) item.add_marker(skip_milvus)
# Skip PineconeDocumentStore if PINECONE_API_KEY not in environment variables # Skip PineconeDocumentStore if PINECONE_API_KEY not in environment variables
if not os.environ.get("PINECONE_API_KEY", False) and "pinecone" in keywords: # if not os.environ.get("PINECONE_API_KEY", False) and "pinecone" in keywords:
skip_pinecone = pytest.mark.skip(reason="PINECONE_API_KEY not in environment variables.") # skip_pinecone = pytest.mark.skip(reason="PINECONE_API_KEY not in environment variables.")
item.add_marker(skip_pinecone) # item.add_marker(skip_pinecone)
# #
@ -742,8 +744,22 @@ def ensure_ids_are_correct_uuids(docs: list, document_store: object) -> None:
d["id"] = str(uuid.uuid4()) d["id"] = str(uuid.uuid4())
# FIXME Fix this in the docstore tests refactoring
from inspect import getmembers, isclass, isfunction
def mock_pinecone(monkeypatch):
for fname, function in getmembers(pinecone_mock, isfunction):
monkeypatch.setattr(f"pinecone.{fname}", function, raising=False)
for cname, class_ in getmembers(pinecone_mock, isclass):
monkeypatch.setattr(f"pinecone.{cname}", class_, raising=False)
@pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"]) @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"])
def document_store_with_docs(request, docs, tmp_path): def document_store_with_docs(request, docs, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)
embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768)) embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store( document_store = get_document_store(
document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path
@ -754,7 +770,10 @@ def document_store_with_docs(request, docs, tmp_path):
@pytest.fixture @pytest.fixture
def document_store(request, tmp_path): def document_store(request, tmp_path, monkeypatch: pytest.MonkeyPatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)
embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768)) embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store( document_store = get_document_store(
document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path
@ -764,7 +783,10 @@ def document_store(request, tmp_path):
@pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"]) @pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"])
def document_store_dot_product(request, tmp_path): def document_store_dot_product(request, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)
embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768)) embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store( document_store = get_document_store(
document_store_type=request.param, document_store_type=request.param,
@ -777,7 +799,10 @@ def document_store_dot_product(request, tmp_path):
@pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"]) @pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"])
def document_store_dot_product_with_docs(request, docs, tmp_path): def document_store_dot_product_with_docs(request, docs, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)
embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768)) embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store( document_store = get_document_store(
document_store_type=request.param, document_store_type=request.param,
@ -791,7 +816,10 @@ def document_store_dot_product_with_docs(request, docs, tmp_path):
@pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "pinecone"]) @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "pinecone"])
def document_store_dot_product_small(request, tmp_path): def document_store_dot_product_small(request, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)
embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3)) embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
document_store = get_document_store( document_store = get_document_store(
document_store_type=request.param, document_store_type=request.param,
@ -804,7 +832,10 @@ def document_store_dot_product_small(request, tmp_path):
@pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"]) @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"])
def document_store_small(request, tmp_path): def document_store_small(request, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)
embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3)) embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
document_store = get_document_store( document_store = get_document_store(
document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path
@ -931,7 +962,7 @@ def get_document_store(
elif document_store_type == "pinecone": elif document_store_type == "pinecone":
document_store = PineconeDocumentStore( document_store = PineconeDocumentStore(
api_key=os.environ["PINECONE_API_KEY"], api_key=os.environ.get("PINECONE_API_KEY"),
embedding_dim=embedding_dim, embedding_dim=embedding_dim,
embedding_field=embedding_field, embedding_field=embedding_field,
index=index, index=index,

View File

@ -1,5 +1,10 @@
from typing import Optional, List from typing import Optional, List
import logging
logger = logging.getLogger(__name__)
# Mock Pinecone instance # Mock Pinecone instance
CONFIG: dict = {"api_key": None, "environment": None, "indexes": {}} CONFIG: dict = {"api_key": None, "environment": None, "indexes": {}}
@ -87,7 +92,9 @@ class Index:
def fetch(self, ids: List[str], namespace: str = ""): def fetch(self, ids: List[str], namespace: str = ""):
response: dict = {"namespace": namespace, "vectors": {}} response: dict = {"namespace": namespace, "vectors": {}}
if namespace not in self.index_config.namespaces: if namespace not in self.index_config.namespaces:
raise ValueError("Namespace not found") # If we query an empty/non-existent namespace, Pinecone will just return an empty response
logger.warning(f"No namespace called '{namespace}'")
return response
records = self.index_config.namespaces[namespace] records = self.index_config.namespaces[namespace]
for record in records: for record in records:
if record["id"] in ids.copy(): if record["id"] in ids.copy():
@ -98,7 +105,16 @@ class Index:
} }
return response return response
def delete(self, ids: Optional[List[str]] = None, namespace: str = "", filters: Optional[dict] = None): def delete(
self,
ids: Optional[List[str]] = None,
namespace: str = "",
filters: Optional[dict] = None,
delete_all: bool = False,
):
if delete_all:
self.index_config.namespaces[namespace] = []
if namespace not in self.index_config.namespaces: if namespace not in self.index_config.namespaces:
pass pass
elif ids is not None: elif ids is not None: