Add cpu-remote-inference Docker image (#5225)

* Add cpu-remote-inference Docker image * Add web lfqa pipeline as an example for cpu-remote-inference Docker image * WebRetriever must have document_store attribute * Add cpu-remote-inference-latest * Add image testing in CI --------- Co-authored-by: Silvano Cerza <silvanocerza@gmail.com>
2025-12-27 23:18:37 +00:00 · 2023-07-07 10:23:14 +02:00 · 2023-07-07 10:23:14 +02:00 · 395854d823
commit 395854d823
parent 08f1865ddd
4 changed files with 129 additions and 4 deletions
--- a/.github/workflows/docker_release.yml
+++ b/.github/workflows/docker_release.yml
@ -19,6 +19,7 @@ jobs:
      matrix:
        target:
          - "cpu"
+          - "cpu-remote-inference"
          - "gpu"

    steps:
@ -72,6 +73,34 @@ jobs:
          # Remove image after test to avoid filling the GitHub runner and prevent its failure
          docker rmi "deepset/haystack:$TAG"

+      - name: Test non-inference image
+        if: contains(matrix.target, 'inference') != true
+        run: |
+          TAG="base-${{ matrix.target }}-${{ steps.meta.outputs.version }}"
+
+          # docker commands below always output a non-empty string, otherwise the step will exit abnormally
+          PLATFORM="linux/amd64"
+          TORCH_INSTALLED=$(docker run --platform "$PLATFORM" --rm "deepset/haystack:$TAG" pip list | grep torch || echo 'not found')
+          [[ "$TORCH_INSTALLED" != "not found" ]] || echo "::error::Pytorch is not installed in deepset/haystack:$TAG image for $PLATFORM"
+
+          PLATFORM="linux/arm64"
+          TORCH_INSTALLED=$(docker run --platform "$PLATFORM" --rm "deepset/haystack:$TAG" pip list | grep torch || echo 'not found')
+          [[ "$TORCH_INSTALLED" != "not found" ]] || echo "::error::Pytorch is not installed in deepset/haystack:$TAG image for $PLATFORM"
+
+      - name: Test inference image
+        if: contains(matrix.target, 'inference')
+        run: |
+          TAG="base-${{ matrix.target }}-${{ steps.meta.outputs.version }}"
+
+          # docker commands below always output a non-empty string, otherwise the step will exit abnormally
+          PLATFORM="linux/amd64"
+          TORCH_INSTALLED=$(docker run --platform "$PLATFORM" --rm "deepset/haystack:$TAG" sh -c "pip list | grep torch || echo 'not found'")
+          [[ "$TORCH_INSTALLED" == "not found" ]] || echo "::error::Pytorch is installed in deepset/haystack:$TAG image for $PLATFORM"
+
+          PLATFORM="linux/arm64"
+          TORCH_INSTALLED=$(docker run --platform "$PLATFORM" --rm "deepset/haystack:$TAG" sh -c "pip list | grep torch || echo 'not found'")
+          [[ "$TORCH_INSTALLED" == "not found" ]] || echo "::error::Pytorch is installed in deepset/haystack:$TAG image for $PLATFORM"
+
      - name: Build api images
        uses: docker/bake-action@v2
        env:
@ -82,6 +111,36 @@ jobs:
          targets: ${{ matrix.target }}
          push: true

+      - name: Test inference API invocation
+        if: contains(matrix.target, 'inference')
+        env:
+          SERPERDEV_API_KEY: ${{ secrets.SERPERDEV_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          TAG="${{ matrix.target }}-${{ steps.meta.outputs.version }}"
+
+          PLATFORMS=("linux/amd64" "linux/arm64")
+          for PLATFORM in "${PLATFORMS[@]}"; do
+            docker run --name test-container -d \
+              --platform "$PLATFORM" \
+              -e PIPELINE_YAML_PATH=/opt/venv/lib/python3.10/site-packages/rest_api/pipeline/pipelines_web_lfqa.haystack-pipeline.yaml \
+              -e "RETRIEVER_PARAMS_API_KEY=$SERPERDEV_API_KEY" \
+              -e "PROMPTNODE_PARAMS_API_KEY=$OPENAI_API_KEY" \
+              -p 8080:8000 "deepset/haystack:$TAG"
+
+            I=0
+            until docker logs test-container 2>&1 | grep "Uvicorn running"; do
+              echo "Waiting"
+              sleep 2
+              ((I++)) && ((I==100)) && echo "::error 'Timeout waiting for Uvicorn to start using deepset/haystack:$TAG image for $PLATFORM'"
+            done
+
+            RESULT=$(curl -s -X POST -H "Content-Type: application/json" -d "{\"query\": \"Where in Europe, should I live?\"}" http://localhost:8080/query)
+            [[ -n "$RESULT" ]] || echo "::error 'No response from inference API using deepset/haystack:$TAG image for $PLATFORM'"
+
+            docker rm -f test-container
+          done
+
      - name: Get latest version of Haystack
        id: latest-version
        if: startsWith(github.ref, 'refs/tags/')
--- a/docker/docker-bake.hcl
+++ b/docker/docker-bake.hcl
@ -23,19 +23,19 @@ variable "HAYSTACK_EXTRAS" {
 }

 group "base" {
-  targets = ["base-cpu", "base-gpu"]
+  targets = ["base-cpu", "base-gpu", "base-cpu-remote-inference"]
 }

 group "api" {
-  targets = ["cpu", "gpu"]
+  targets = ["cpu", "gpu", "cpu-remote-inference"]
 }

 group "api-latest" {
-  targets = ["cpu-latest", "gpu-latest"]
+  targets = ["cpu-latest", "gpu-latest", "cpu-remote-inference-latest"]
 }

 group "all" {
-  targets = ["base", "base-gpu", "cpu", "gpu"]
+  targets = ["base", "base-gpu", "cpu", "gpu", "cpu-remote-inference"]
 }

 target "base-cpu" {
@ -50,6 +50,14 @@ target "base-cpu" {
  platforms = ["linux/amd64", "linux/arm64"]
 }

+target "base-cpu-remote-inference" {
+  inherits = ["base-cpu"]
+  tags = ["${IMAGE_NAME}:base-cpu-remote-inference-${IMAGE_TAG_SUFFIX}"]
+  args = {
+    haystack_extras = notequal("",HAYSTACK_EXTRAS) ? "${HAYSTACK_EXTRAS}" : "[preprocessing]"
+  }
+}
+
 target "base-gpu" {
  dockerfile = "Dockerfile.base"
  tags = ["${IMAGE_NAME}:base-gpu-${IMAGE_TAG_SUFFIX}"]
@ -74,6 +82,21 @@ target "cpu" {
  platforms = ["linux/amd64", "linux/arm64"]
 }

+target "cpu-remote-inference" {
+  dockerfile = "Dockerfile.api"
+  tags = ["${IMAGE_NAME}:cpu-remote-inference-${IMAGE_TAG_SUFFIX}"]
+  args = {
+    base_image = "${IMAGE_NAME}"
+    base_image_tag = "base-cpu-remote-inference-${BASE_IMAGE_TAG_SUFFIX}"
+  }
+  platforms = ["linux/amd64", "linux/arm64"]
+}
+
+target "cpu-remote-inference-latest" {
+  inherits = ["cpu-remote-inference"]
+  tags = ["${IMAGE_NAME}:cpu-remote-inference"]
+}
+
 target "cpu-latest" {
  inherits = ["cpu"]
  tags = ["${IMAGE_NAME}:cpu"]
--- a/haystack/nodes/retriever/web.py
+++ b/haystack/nodes/retriever/web.py
@ -81,6 +81,7 @@ class WebRetriever(BaseRetriever):
        )
        self.mode = mode
        self.cache_document_store = cache_document_store
+        self.document_store = cache_document_store
        self.cache_index = cache_index
        self.cache_headers = cache_headers
        self.cache_time = cache_time
--- a/rest_api/rest_api/pipeline/pipelines_web_lfqa.haystack-pipeline.yaml
+++ b/rest_api/rest_api/pipeline/pipelines_web_lfqa.haystack-pipeline.yaml
@ -0,0 +1,42 @@
+version: ignore
+
+components:
+- name: Retriever
+  params:
+    api_key: RETRIEVER_PARAMS_API_KEY
+  type: WebRetriever
+- name: Shaper
+  params:
+    func: join_documents_and_scores
+    inputs:
+      documents: documents
+    outputs:
+    - documents
+  type: Shaper
+- name: custom-at-query-time
+  params:
+    prompt: "\nSynthesize a comprehensive answer from the following most relevant\
+      \ paragraphs and the given question.\nProvide a clear and concise response that\
+      \ summarizes the key points and information presented in the paragraphs.\nYour\
+      \ answer should be in your own words and be no longer than 50 words.\n\n\n Paragraphs:\
+      \ {documents} \n\n Question: {query} \n\n Answer:\n"
+  type: PromptTemplate
+- name: PromptNode
+  params:
+    api_key: PROMPTNODE_PARAMS_API_KEY
+    default_prompt_template: custom-at-query-time
+    max_length: 256
+    model_name_or_path: gpt-3.5-turbo
+  type: PromptNode
+pipelines:
+- name: query
+  nodes:
+  - inputs:
+    - Query
+    name: Retriever
+  - inputs:
+    - Retriever
+    name: Shaper
+  - inputs:
+    - Shaper
+    name: PromptNode