mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-29 07:59:27 +00:00
test: Re-activate end-to-end tests workflow (#5343)
* Install haystack with required extras * remove whitespaces Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> * Add sleep * Add s for seconds * Move container initialization in workflow * Update e2e.yml add nightly run * use new folder for initial e2e test * use file hash for caching and trigger on push to branch * remove \n from model names read from file * remove trigger on push to branch --------- Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Co-authored-by: bogdankostic <bogdankostic@web.de>
This commit is contained in:
parent
f7642e83ea
commit
eeb29b5686
37
.github/workflows/e2e.yml
vendored
37
.github/workflows/e2e.yml
vendored
@ -2,7 +2,9 @@
|
||||
name: end-to-end
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
workflow_dispatch: # Activate this workflow manually
|
||||
schedule:
|
||||
- cron: "0 0 * * *"
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: "3.8"
|
||||
@ -14,7 +16,7 @@ jobs:
|
||||
fail-fast: false # Avoid cancelling the others if one of these fails
|
||||
matrix:
|
||||
folder:
|
||||
- "document_stores"
|
||||
- "document_search"
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@ -25,20 +27,6 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Cache HF models
|
||||
id: cache-hf-models
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/huggingface/transformers/
|
||||
key: hf-models
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 15
|
||||
|
||||
- name: Download models
|
||||
if: steps.cache-hf-models.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
python -c "from transformers import AutoModel;[AutoModel.from_pretrained(model_name) for model_name in ['vblagoje/dpr-ctx_encoder-single-lfqa-wiki', 'vblagoje/dpr-question_encoder-single-lfqa-wiki', 'facebook/dpr-question_encoder-single-nq-base', 'facebook/dpr-ctx_encoder-single-nq-base', 'elastic/distilbert-base-cased-finetuned-conll03-english', 'deepset/bert-medium-squad2-distilled']]"
|
||||
|
||||
- name: Run Elasticsearch
|
||||
run: |
|
||||
docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
|
||||
@ -51,8 +39,23 @@ jobs:
|
||||
run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --env ENABLE_EXPERIMENTAL_BM25='true' --env DISK_USE_READONLY_PERCENTAGE='95' semitechnologies/weaviate:1.17.2
|
||||
|
||||
- name: Install Haystack
|
||||
run: pip install .
|
||||
run: pip install .[inference,elasticsearch7,faiss,weaviate,opensearch,dev]
|
||||
|
||||
- name: Cache HF models
|
||||
id: cache-hf-models
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ./e2e
|
||||
key: ${{ runner.os }}-${{ hashFiles('**/models_to_cache.txt') }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 15
|
||||
- name: Download models
|
||||
if: steps.cache-hf-models.outputs.cache-hit != 'true'
|
||||
shell: python
|
||||
run: |
|
||||
from transformers import AutoModel
|
||||
with open("./e2e/models_to_cache.txt") as file:
|
||||
AutoModel.from_pretrained(file.readline().rstrip())
|
||||
- name: Run tests
|
||||
env:
|
||||
TOKENIZERS_PARALLELISM: 'false' # Avoid logspam by tokenizers
|
||||
|
||||
0
e2e/document_search/__init__.py
Normal file
0
e2e/document_search/__init__.py
Normal file
35
e2e/document_search/test_standard_pipeline.py
Normal file
35
e2e/document_search/test_standard_pipeline.py
Normal file
@ -0,0 +1,35 @@
|
||||
import pytest
|
||||
|
||||
from haystack.nodes import EmbeddingRetriever
|
||||
from haystack.pipelines import DocumentSearchPipeline
|
||||
|
||||
from ..conftest import document_store
|
||||
|
||||
|
||||
@pytest.mark.parametrize("document_store_name", ["memory", "faiss", "weaviate", "elasticsearch"])
|
||||
def test_document_search_standard_pipeline(document_store_name, docs, tmp_path):
|
||||
"""
|
||||
Testing the DocumentSearchPipeline with most common parameters according to our template:
|
||||
https://github.com/deepset-ai/templates/blob/main/pipelines/DenseDocSearch.yaml
|
||||
The common multi-qa-mpnet-base-dot-v1 model is replaced with the very similar paraphrase-MiniLM-L3-v2,
|
||||
which reduces runtime and model size by ~6x
|
||||
"""
|
||||
with document_store(document_store_name, docs, tmp_path, embedding_dim=384) as ds:
|
||||
retriever = EmbeddingRetriever(
|
||||
document_store=ds, embedding_model="sentence-transformers/paraphrase-MiniLM-L3-v2"
|
||||
)
|
||||
ds.update_embeddings(retriever)
|
||||
pipeline = DocumentSearchPipeline(retriever)
|
||||
prediction = pipeline.run("Paul lives in New York")
|
||||
scores = [document.score for document in prediction["documents"]]
|
||||
assert [document.content for document in prediction["documents"]] == [
|
||||
"My name is Paul and I live in New York",
|
||||
"My name is Matteo and I live in Rome",
|
||||
"My name is Christelle and I live in Paris",
|
||||
"My name is Carla and I live in Berlin",
|
||||
"My name is Camila and I live in Madrid",
|
||||
]
|
||||
assert scores == pytest.approx(
|
||||
[0.9149981737136841, 0.6895168423652649, 0.641706794500351, 0.6206043660640717, 0.5837393924593925],
|
||||
abs=1e-3,
|
||||
)
|
||||
1
e2e/models_to_cache.txt
Normal file
1
e2e/models_to_cache.txt
Normal file
@ -0,0 +1 @@
|
||||
sentence-transformers/paraphrase-MiniLM-L3-v2
|
||||
Loading…
x
Reference in New Issue
Block a user