haystack/test/pipelines/test_ray.py
Zoltan Fedor 1e20818328
Ability to run Ray Serve detached (#2945)
* Ability to run Ray Serve detached

Fixes #2944

Ability to run Ray Serve detached - to allow running multiple instances of the app (HA).

See https://docs.ray.io/en/latest/serve/package-ref.html#core-apis

* Generating the docs

* Re-trigger the CI pipeline

* Retrigger the CI Pipeline

* Typo in docstrings

* Fixing docstring and typing issues

* Regenerating docs

* [EMPTY] Re-trigger CI

* [EMPTY] Re-trigger CI

* Refactoring to allow any number of args for the `serve.start()` method

There seems to be additional arguments of the `serve.start()` method, so we should probably cover all of them at once, instead of only the `detached` option.

* [EMPTY] Re-trigger CI

* Test whether the ServeControllerClient in fact has the supplied `detached` parameter
2022-08-03 18:49:03 +02:00

42 lines
1.4 KiB
Python

from pathlib import Path
import pytest
import ray
from haystack.pipelines import RayPipeline
from ..conftest import SAMPLES_PATH
@pytest.fixture(autouse=True)
def shutdown_ray():
yield
try:
import ray
ray.serve.shutdown()
ray.shutdown()
except:
pass
@pytest.mark.integration
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
@pytest.mark.parametrize("serve_detached", [True, False])
def test_load_pipeline(document_store_with_docs, serve_detached):
pipeline = RayPipeline.load_from_yaml(
SAMPLES_PATH / "pipeline" / "ray.haystack-pipeline.yml",
pipeline_name="ray_query_pipeline",
ray_args={"num_cpus": 8},
serve_args={"detached": serve_detached},
)
prediction = pipeline.run(query="Who lives in Berlin?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 3}})
assert pipeline._serve_controller_client._detached == serve_detached
assert ray.serve.get_deployment(name="ESRetriever").num_replicas == 2
assert ray.serve.get_deployment(name="Reader").num_replicas == 1
assert ray.serve.get_deployment(name="ESRetriever").max_concurrent_queries == 17
assert ray.serve.get_deployment(name="ESRetriever").ray_actor_options["num_cpus"] == 0.5
assert prediction["query"] == "Who lives in Berlin?"
assert prediction["answers"][0].answer == "Carla"