feat: add health check endpoint to rest api (#3168)

* feat: add /health endpoint to rest api * refactor: adjust to new dir structure * fix: add new rest api dependency * docs: add new openapi schema * docs: manual black run * refactor: remove some sys-wide details * docs: minor description changes * docs: minor description changes * docs: generate openapi schemas * tests: improved tests * refactor: add cls method decorator
2025-12-10 14:27:38 +00:00 · 2022-09-08 13:24:16 -03:00 · 2022-09-08 13:24:16 -03:00 · 1a6cbca9b6
commit 1a6cbca9b6
parent e0d73f3ae0
6 changed files with 438 additions and 2 deletions
--- a/docs/_src/api/openapi/openapi-1.8.1rc0.json
+++ b/docs/_src/api/openapi/openapi-1.8.1rc0.json
@ -398,6 +398,28 @@
                    }
                }
            }
+        },
+        "/health": {
+            "get": {
+                "tags": [
+                    "health"
+                ],
+                "summary": "Get Health Status",
+                "description": "This endpoint allows external systems to monitor the health of the Haystack REST API.",
+                "operationId": "get_health_status",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/HealthResponse"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
        }
    },
    "components": {
@ -511,6 +533,20 @@
                    }
                }
            },
+            "CPUUsage": {
+                "title": "CPUUsage",
+                "required": [
+                    "used"
+                ],
+                "type": "object",
+                "properties": {
+                    "used": {
+                        "title": "Used",
+                        "type": "number",
+                        "description": "REST API average CPU usage in percentage"
+                    }
+                }
+            },
            "CreateLabelSerialized": {
                "title": "CreateLabelSerialized",
                "required": [
@ -693,6 +729,56 @@
                },
                "additionalProperties": false
            },
+            "GPUInfo": {
+                "title": "GPUInfo",
+                "required": [
+                    "index",
+                    "usage"
+                ],
+                "type": "object",
+                "properties": {
+                    "index": {
+                        "title": "Index",
+                        "type": "integer",
+                        "description": "GPU index"
+                    },
+                    "usage": {
+                        "title": "Usage",
+                        "allOf": [
+                            {
+                                "$ref": "#/components/schemas/GPUUsage"
+                            }
+                        ],
+                        "description": "GPU usage details"
+                    }
+                }
+            },
+            "GPUUsage": {
+                "title": "GPUUsage",
+                "required": [
+                    "kernel_usage",
+                    "memory_total",
+                    "memory_used"
+                ],
+                "type": "object",
+                "properties": {
+                    "kernel_usage": {
+                        "title": "Kernel Usage",
+                        "type": "number",
+                        "description": "GPU kernel usage in percentage"
+                    },
+                    "memory_total": {
+                        "title": "Memory Total",
+                        "type": "integer",
+                        "description": "Total GPU memory in megabytes"
+                    },
+                    "memory_used": {
+                        "title": "Memory Used",
+                        "type": "integer",
+                        "description": "REST API used GPU memory in megabytes"
+                    }
+                }
+            },
            "HTTPValidationError": {
                "title": "HTTPValidationError",
                "type": "object",
@ -706,6 +792,48 @@
                    }
                }
            },
+            "HealthResponse": {
+                "title": "HealthResponse",
+                "required": [
+                    "version",
+                    "cpu",
+                    "memory"
+                ],
+                "type": "object",
+                "properties": {
+                    "version": {
+                        "title": "Version",
+                        "type": "string",
+                        "description": "Haystack version"
+                    },
+                    "cpu": {
+                        "title": "Cpu",
+                        "allOf": [
+                            {
+                                "$ref": "#/components/schemas/CPUUsage"
+                            }
+                        ],
+                        "description": "CPU usage details"
+                    },
+                    "memory": {
+                        "title": "Memory",
+                        "allOf": [
+                            {
+                                "$ref": "#/components/schemas/MemoryUsage"
+                            }
+                        ],
+                        "description": "Memory usage details"
+                    },
+                    "gpus": {
+                        "title": "Gpus",
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/GPUInfo"
+                        },
+                        "description": "GPU usage details"
+                    }
+                }
+            },
            "Label": {
                "title": "Label",
                "required": [
@ -774,6 +902,20 @@
                    }
                }
            },
+            "MemoryUsage": {
+                "title": "MemoryUsage",
+                "required": [
+                    "used"
+                ],
+                "type": "object",
+                "properties": {
+                    "used": {
+                        "title": "Used",
+                        "type": "number",
+                        "description": "REST API used memory in percentage"
+                    }
+                }
+            },
            "QueryRequest": {
                "title": "QueryRequest",
                "required": [
--- a/docs/_src/api/openapi/openapi.json
+++ b/docs/_src/api/openapi/openapi.json
@ -398,6 +398,28 @@
                    }
                }
            }
+        },
+        "/health": {
+            "get": {
+                "tags": [
+                    "health"
+                ],
+                "summary": "Get Health Status",
+                "description": "This endpoint allows external systems to monitor the health of the Haystack REST API.",
+                "operationId": "get_health_status",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/HealthResponse"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
        }
    },
    "components": {
@ -511,6 +533,20 @@
                    }
                }
            },
+            "CPUUsage": {
+                "title": "CPUUsage",
+                "required": [
+                    "used"
+                ],
+                "type": "object",
+                "properties": {
+                    "used": {
+                        "title": "Used",
+                        "type": "number",
+                        "description": "REST API average CPU usage in percentage"
+                    }
+                }
+            },
            "CreateLabelSerialized": {
                "title": "CreateLabelSerialized",
                "required": [
@ -693,6 +729,56 @@
                },
                "additionalProperties": false
            },
+            "GPUInfo": {
+                "title": "GPUInfo",
+                "required": [
+                    "index",
+                    "usage"
+                ],
+                "type": "object",
+                "properties": {
+                    "index": {
+                        "title": "Index",
+                        "type": "integer",
+                        "description": "GPU index"
+                    },
+                    "usage": {
+                        "title": "Usage",
+                        "allOf": [
+                            {
+                                "$ref": "#/components/schemas/GPUUsage"
+                            }
+                        ],
+                        "description": "GPU usage details"
+                    }
+                }
+            },
+            "GPUUsage": {
+                "title": "GPUUsage",
+                "required": [
+                    "kernel_usage",
+                    "memory_total",
+                    "memory_used"
+                ],
+                "type": "object",
+                "properties": {
+                    "kernel_usage": {
+                        "title": "Kernel Usage",
+                        "type": "number",
+                        "description": "GPU kernel usage in percentage"
+                    },
+                    "memory_total": {
+                        "title": "Memory Total",
+                        "type": "integer",
+                        "description": "Total GPU memory in megabytes"
+                    },
+                    "memory_used": {
+                        "title": "Memory Used",
+                        "type": "integer",
+                        "description": "REST API used GPU memory in megabytes"
+                    }
+                }
+            },
            "HTTPValidationError": {
                "title": "HTTPValidationError",
                "type": "object",
@ -706,6 +792,48 @@
                    }
                }
            },
+            "HealthResponse": {
+                "title": "HealthResponse",
+                "required": [
+                    "version",
+                    "cpu",
+                    "memory"
+                ],
+                "type": "object",
+                "properties": {
+                    "version": {
+                        "title": "Version",
+                        "type": "string",
+                        "description": "Haystack version"
+                    },
+                    "cpu": {
+                        "title": "Cpu",
+                        "allOf": [
+                            {
+                                "$ref": "#/components/schemas/CPUUsage"
+                            }
+                        ],
+                        "description": "CPU usage details"
+                    },
+                    "memory": {
+                        "title": "Memory",
+                        "allOf": [
+                            {
+                                "$ref": "#/components/schemas/MemoryUsage"
+                            }
+                        ],
+                        "description": "Memory usage details"
+                    },
+                    "gpus": {
+                        "title": "Gpus",
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/GPUInfo"
+                        },
+                        "description": "GPU usage details"
+                    }
+                }
+            },
            "Label": {
                "title": "Label",
                "required": [
@ -774,6 +902,20 @@
                    }
                }
            },
+            "MemoryUsage": {
+                "title": "MemoryUsage",
+                "required": [
+                    "used"
+                ],
+                "type": "object",
+                "properties": {
+                    "used": {
+                        "title": "Used",
+                        "type": "number",
+                        "description": "REST API used memory in percentage"
+                    }
+                }
+            },
            "QueryRequest": {
                "title": "QueryRequest",
                "required": [
--- a/rest_api/pyproject.toml
+++ b/rest_api/pyproject.toml
@ -30,6 +30,8 @@ dependencies = [
    "uvicorn<1",
    "gunicorn<21",
    "python-multipart<1",  # optional FastAPI dependency for form data
+    "pynvml",
+    "psutil"
 ]
 dynamic = ["version"]

@ -69,3 +71,7 @@ exclude_lines = [
  "if __name__ == .__main__.:",
  "if TYPE_CHECKING:",
 ]
+
+[tool.black]
+line-length = 120
+skip_magic_trailing_comma = true  # For compatibility with pydoc>=4.6, check if still needed.
--- a/rest_api/rest_api/controller/health.py
+++ b/rest_api/rest_api/controller/health.py
@ -0,0 +1,110 @@
+from typing import List, Optional
+
+import logging
+
+import os
+import pynvml
+import psutil
+
+from pydantic import BaseModel, Field, validator
+
+from fastapi import FastAPI, APIRouter
+
+import haystack
+
+from rest_api.utils import get_app
+from rest_api.config import LOG_LEVEL
+
+logging.getLogger("haystack").setLevel(LOG_LEVEL)
+logger = logging.getLogger("haystack")
+
+
+router = APIRouter()
+app: FastAPI = get_app()
+
+
+class CPUUsage(BaseModel):
+    used: float = Field(..., description="REST API average CPU usage in percentage")
+
+    @validator("used")
+    @classmethod
+    def used_check(cls, v):
+        return round(v, 2)
+
+
+class MemoryUsage(BaseModel):
+    used: float = Field(..., description="REST API used memory in percentage")
+
+    @validator("used")
+    @classmethod
+    def used_check(cls, v):
+        return round(v, 2)
+
+
+class GPUUsage(BaseModel):
+    kernel_usage: float = Field(..., description="GPU kernel usage in percentage")
+    memory_total: int = Field(..., description="Total GPU memory in megabytes")
+    memory_used: Optional[int] = Field(..., description="REST API used GPU memory in megabytes")
+
+    @validator("kernel_usage")
+    @classmethod
+    def kernel_usage_check(cls, v):
+        return round(v, 2)
+
+
+class GPUInfo(BaseModel):
+    index: int = Field(..., description="GPU index")
+    usage: GPUUsage = Field(..., description="GPU usage details")
+
+
+class HealthResponse(BaseModel):
+    version: str = Field(..., description="Haystack version")
+    cpu: CPUUsage = Field(..., description="CPU usage details")
+    memory: MemoryUsage = Field(..., description="Memory usage details")
+    gpus: List[GPUInfo] = Field(default_factory=list, description="GPU usage details")
+
+
+@router.get("/health", response_model=HealthResponse, status_code=200)
+def get_health_status():
+    """
+    This endpoint allows external systems to monitor the health of the Haystack REST API.
+    """
+
+    gpus: List[GPUInfo] = []
+
+    try:
+        pynvml.nvmlInit()
+        gpu_count = pynvml.nvmlDeviceGetCount()
+        for i in range(gpu_count):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+            gpu_mem_total = float(info.total) / 1024 / 1024
+            gpu_mem_used = None
+            for proc in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
+                if proc.pid == os.getpid():
+                    gpu_mem_used = float(proc.usedGpuMemory) / 1024 / 1024
+                    break
+            gpu_info = GPUInfo(
+                index=i,
+                usage=GPUUsage(
+                    memory_total=round(gpu_mem_total),
+                    kernel_usage=pynvml.nvmlDeviceGetUtilizationRates(handle).gpu,
+                    memory_used=round(gpu_mem_used) if gpu_mem_used is not None else None,
+                ),
+            )
+
+            gpus.append(gpu_info)
+    except pynvml.NVMLError:
+        logger.warning("No NVIDIA GPU found.")
+
+    p_cpu_usage = 0
+    p_memory_usage = 0
+    cpu_count = os.cpu_count() or 1
+    p = psutil.Process()
+    p_cpu_usage = p.cpu_percent() / cpu_count
+    p_memory_usage = p.memory_percent()
+
+    cpu_usage = CPUUsage(used=p_cpu_usage)
+    memory_usage = MemoryUsage(used=p_memory_usage)
+
+    return HealthResponse(version=haystack.__version__, cpu=cpu_usage, memory=memory_usage, gpus=gpus)
--- a/rest_api/rest_api/utils.py
+++ b/rest_api/rest_api/utils.py
@ -25,13 +25,14 @@ def get_app() -> FastAPI:
    app = FastAPI(title="Haystack REST API", debug=True, version=haystack_version, root_path=ROOT_PATH)

    # Creates the router for the API calls
-    from rest_api.controller import file_upload, search, feedback, document
+    from rest_api.controller import file_upload, search, feedback, document, health

    router = APIRouter()
    router.include_router(search.router, tags=["search"])
    router.include_router(feedback.router, tags=["feedback"])
    router.include_router(file_upload.router, tags=["file-upload"])
    router.include_router(document.router, tags=["document"])
+    router.include_router(health.router, tags=["health"])

    # This middleware enables allow all cross-domain requests to the API from a browser. For production
    # deployments, it could be made more restrictive.
--- a/rest_api/test/test_rest_api.py
+++ b/rest_api/test/test_rest_api.py
@ -4,13 +4,15 @@ import os
 from pathlib import Path
 from textwrap import dedent
 from unittest import mock
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, Mock
+import functools
 import numpy as np
 import pandas as pd

 import pytest
 from fastapi.testclient import TestClient
 from haystack import Document, Answer
+import haystack
 from haystack.nodes import BaseReader, BaseRetriever
 from haystack.document_stores import BaseDocumentStore
 from haystack.schema import Label
@ -499,3 +501,36 @@ def test_get_feedback_malformed_query(client, feedback):
    feedback["unexpected_field"] = "misplaced-value"
    response = client.post(url="/feedback", json=feedback)
    assert response.status_code == 422
+
+
+def test_get_health_check(client):
+    with mock.patch("rest_api.controller.health.os") as os:
+        os.cpu_count.return_value = 4
+        os.getpid.return_value = int(2345)
+        with mock.patch("rest_api.controller.health.pynvml") as pynvml:
+            pynvml.nvmlDeviceGetCount.return_value = 2
+            pynvml.nvmlDeviceGetHandleByIndex.return_value = "device"
+            pynvml.nvmlDeviceGetMemoryInfo.return_value = Mock(total=34359738368)
+            pynvml.nvmlDeviceGetComputeRunningProcesses.return_value = [
+                Mock(pid=int(1234), usedGpuMemory=4000000000),
+                Mock(pid=int(2345), usedGpuMemory=2097152000),
+                Mock(pid=int(3456), usedGpuMemory=2000000000),
+            ]
+            pynvml.nvmlDeviceGetUtilizationRates.return_value = Mock(gpu=45)
+            with mock.patch("rest_api.controller.health.psutil") as psutil:
+                psutil.virtual_memory.return_value = Mock(total=34359738368)
+                psutil.Process.return_value = Mock(
+                    cpu_percent=Mock(return_value=200), memory_percent=Mock(return_value=75)
+                )
+
+                response = client.get(url="/health")
+                assert response.status_code == 200
+                assert response.json() == {
+                    "version": haystack.__version__,
+                    "cpu": {"used": 50.0},
+                    "memory": {"used": 75.0},
+                    "gpus": [
+                        {"index": 0, "usage": {"kernel_usage": 45.0, "memory_total": 32768.0, "memory_used": 2000}},
+                        {"index": 1, "usage": {"kernel_usage": 45.0, "memory_total": 32768.0, "memory_used": 2000}},
+                    ],
+                }