haystack/e2e/document_search/test_standard_pipeline.py
Julian Risch eeb29b5686
test: Re-activate end-to-end tests workflow (#5343)
* Install haystack with required extras

* remove whitespaces

Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>

* Add sleep

* Add s for seconds

* Move container initialization in workflow

* Update e2e.yml

add nightly run

* use new folder for initial e2e test

* use file hash for caching and trigger on push to branch

* remove \n from model names read from file

* remove trigger on push to branch

---------

Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
Co-authored-by: bogdankostic <bogdankostic@web.de>
2023-07-20 11:48:51 +02:00

36 lines
1.6 KiB
Python

import pytest
from haystack.nodes import EmbeddingRetriever
from haystack.pipelines import DocumentSearchPipeline
from ..conftest import document_store
@pytest.mark.parametrize("document_store_name", ["memory", "faiss", "weaviate", "elasticsearch"])
def test_document_search_standard_pipeline(document_store_name, docs, tmp_path):
"""
Testing the DocumentSearchPipeline with most common parameters according to our template:
https://github.com/deepset-ai/templates/blob/main/pipelines/DenseDocSearch.yaml
The common multi-qa-mpnet-base-dot-v1 model is replaced with the very similar paraphrase-MiniLM-L3-v2,
which reduces runtime and model size by ~6x
"""
with document_store(document_store_name, docs, tmp_path, embedding_dim=384) as ds:
retriever = EmbeddingRetriever(
document_store=ds, embedding_model="sentence-transformers/paraphrase-MiniLM-L3-v2"
)
ds.update_embeddings(retriever)
pipeline = DocumentSearchPipeline(retriever)
prediction = pipeline.run("Paul lives in New York")
scores = [document.score for document in prediction["documents"]]
assert [document.content for document in prediction["documents"]] == [
"My name is Paul and I live in New York",
"My name is Matteo and I live in Rome",
"My name is Christelle and I live in Paris",
"My name is Carla and I live in Berlin",
"My name is Camila and I live in Madrid",
]
assert scores == pytest.approx(
[0.9149981737136841, 0.6895168423652649, 0.641706794500351, 0.6206043660640717, 0.5837393924593925],
abs=1e-3,
)