mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-10 14:27:38 +00:00
feat: add health check endpoint to rest api (#3168)
* feat: add /health endpoint to rest api * refactor: adjust to new dir structure * fix: add new rest api dependency * docs: add new openapi schema * docs: manual black run * refactor: remove some sys-wide details * docs: minor description changes * docs: minor description changes * docs: generate openapi schemas * tests: improved tests * refactor: add cls method decorator
This commit is contained in:
parent
e0d73f3ae0
commit
1a6cbca9b6
@ -398,6 +398,28 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/health": {
|
||||
"get": {
|
||||
"tags": [
|
||||
"health"
|
||||
],
|
||||
"summary": "Get Health Status",
|
||||
"description": "This endpoint allows external systems to monitor the health of the Haystack REST API.",
|
||||
"operationId": "get_health_status",
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successful Response",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HealthResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"components": {
|
||||
@ -511,6 +533,20 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"CPUUsage": {
|
||||
"title": "CPUUsage",
|
||||
"required": [
|
||||
"used"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"used": {
|
||||
"title": "Used",
|
||||
"type": "number",
|
||||
"description": "REST API average CPU usage in percentage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"CreateLabelSerialized": {
|
||||
"title": "CreateLabelSerialized",
|
||||
"required": [
|
||||
@ -693,6 +729,56 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"GPUInfo": {
|
||||
"title": "GPUInfo",
|
||||
"required": [
|
||||
"index",
|
||||
"usage"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"title": "Index",
|
||||
"type": "integer",
|
||||
"description": "GPU index"
|
||||
},
|
||||
"usage": {
|
||||
"title": "Usage",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/GPUUsage"
|
||||
}
|
||||
],
|
||||
"description": "GPU usage details"
|
||||
}
|
||||
}
|
||||
},
|
||||
"GPUUsage": {
|
||||
"title": "GPUUsage",
|
||||
"required": [
|
||||
"kernel_usage",
|
||||
"memory_total",
|
||||
"memory_used"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kernel_usage": {
|
||||
"title": "Kernel Usage",
|
||||
"type": "number",
|
||||
"description": "GPU kernel usage in percentage"
|
||||
},
|
||||
"memory_total": {
|
||||
"title": "Memory Total",
|
||||
"type": "integer",
|
||||
"description": "Total GPU memory in megabytes"
|
||||
},
|
||||
"memory_used": {
|
||||
"title": "Memory Used",
|
||||
"type": "integer",
|
||||
"description": "REST API used GPU memory in megabytes"
|
||||
}
|
||||
}
|
||||
},
|
||||
"HTTPValidationError": {
|
||||
"title": "HTTPValidationError",
|
||||
"type": "object",
|
||||
@ -706,6 +792,48 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"HealthResponse": {
|
||||
"title": "HealthResponse",
|
||||
"required": [
|
||||
"version",
|
||||
"cpu",
|
||||
"memory"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"version": {
|
||||
"title": "Version",
|
||||
"type": "string",
|
||||
"description": "Haystack version"
|
||||
},
|
||||
"cpu": {
|
||||
"title": "Cpu",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CPUUsage"
|
||||
}
|
||||
],
|
||||
"description": "CPU usage details"
|
||||
},
|
||||
"memory": {
|
||||
"title": "Memory",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/MemoryUsage"
|
||||
}
|
||||
],
|
||||
"description": "Memory usage details"
|
||||
},
|
||||
"gpus": {
|
||||
"title": "Gpus",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/GPUInfo"
|
||||
},
|
||||
"description": "GPU usage details"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Label": {
|
||||
"title": "Label",
|
||||
"required": [
|
||||
@ -774,6 +902,20 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"MemoryUsage": {
|
||||
"title": "MemoryUsage",
|
||||
"required": [
|
||||
"used"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"used": {
|
||||
"title": "Used",
|
||||
"type": "number",
|
||||
"description": "REST API used memory in percentage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"QueryRequest": {
|
||||
"title": "QueryRequest",
|
||||
"required": [
|
||||
|
||||
@ -398,6 +398,28 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/health": {
|
||||
"get": {
|
||||
"tags": [
|
||||
"health"
|
||||
],
|
||||
"summary": "Get Health Status",
|
||||
"description": "This endpoint allows external systems to monitor the health of the Haystack REST API.",
|
||||
"operationId": "get_health_status",
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successful Response",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HealthResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"components": {
|
||||
@ -511,6 +533,20 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"CPUUsage": {
|
||||
"title": "CPUUsage",
|
||||
"required": [
|
||||
"used"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"used": {
|
||||
"title": "Used",
|
||||
"type": "number",
|
||||
"description": "REST API average CPU usage in percentage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"CreateLabelSerialized": {
|
||||
"title": "CreateLabelSerialized",
|
||||
"required": [
|
||||
@ -693,6 +729,56 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"GPUInfo": {
|
||||
"title": "GPUInfo",
|
||||
"required": [
|
||||
"index",
|
||||
"usage"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"title": "Index",
|
||||
"type": "integer",
|
||||
"description": "GPU index"
|
||||
},
|
||||
"usage": {
|
||||
"title": "Usage",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/GPUUsage"
|
||||
}
|
||||
],
|
||||
"description": "GPU usage details"
|
||||
}
|
||||
}
|
||||
},
|
||||
"GPUUsage": {
|
||||
"title": "GPUUsage",
|
||||
"required": [
|
||||
"kernel_usage",
|
||||
"memory_total",
|
||||
"memory_used"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kernel_usage": {
|
||||
"title": "Kernel Usage",
|
||||
"type": "number",
|
||||
"description": "GPU kernel usage in percentage"
|
||||
},
|
||||
"memory_total": {
|
||||
"title": "Memory Total",
|
||||
"type": "integer",
|
||||
"description": "Total GPU memory in megabytes"
|
||||
},
|
||||
"memory_used": {
|
||||
"title": "Memory Used",
|
||||
"type": "integer",
|
||||
"description": "REST API used GPU memory in megabytes"
|
||||
}
|
||||
}
|
||||
},
|
||||
"HTTPValidationError": {
|
||||
"title": "HTTPValidationError",
|
||||
"type": "object",
|
||||
@ -706,6 +792,48 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"HealthResponse": {
|
||||
"title": "HealthResponse",
|
||||
"required": [
|
||||
"version",
|
||||
"cpu",
|
||||
"memory"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"version": {
|
||||
"title": "Version",
|
||||
"type": "string",
|
||||
"description": "Haystack version"
|
||||
},
|
||||
"cpu": {
|
||||
"title": "Cpu",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CPUUsage"
|
||||
}
|
||||
],
|
||||
"description": "CPU usage details"
|
||||
},
|
||||
"memory": {
|
||||
"title": "Memory",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/MemoryUsage"
|
||||
}
|
||||
],
|
||||
"description": "Memory usage details"
|
||||
},
|
||||
"gpus": {
|
||||
"title": "Gpus",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/GPUInfo"
|
||||
},
|
||||
"description": "GPU usage details"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Label": {
|
||||
"title": "Label",
|
||||
"required": [
|
||||
@ -774,6 +902,20 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"MemoryUsage": {
|
||||
"title": "MemoryUsage",
|
||||
"required": [
|
||||
"used"
|
||||
],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"used": {
|
||||
"title": "Used",
|
||||
"type": "number",
|
||||
"description": "REST API used memory in percentage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"QueryRequest": {
|
||||
"title": "QueryRequest",
|
||||
"required": [
|
||||
|
||||
@ -30,6 +30,8 @@ dependencies = [
|
||||
"uvicorn<1",
|
||||
"gunicorn<21",
|
||||
"python-multipart<1", # optional FastAPI dependency for form data
|
||||
"pynvml",
|
||||
"psutil"
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
@ -69,3 +71,7 @@ exclude_lines = [
|
||||
"if __name__ == .__main__.:",
|
||||
"if TYPE_CHECKING:",
|
||||
]
|
||||
|
||||
[tool.black]
|
||||
line-length = 120
|
||||
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.
|
||||
|
||||
110
rest_api/rest_api/controller/health.py
Normal file
110
rest_api/rest_api/controller/health.py
Normal file
@ -0,0 +1,110 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import logging
|
||||
|
||||
import os
|
||||
import pynvml
|
||||
import psutil
|
||||
|
||||
from pydantic import BaseModel, Field, validator
|
||||
|
||||
from fastapi import FastAPI, APIRouter
|
||||
|
||||
import haystack
|
||||
|
||||
from rest_api.utils import get_app
|
||||
from rest_api.config import LOG_LEVEL
|
||||
|
||||
logging.getLogger("haystack").setLevel(LOG_LEVEL)
|
||||
logger = logging.getLogger("haystack")
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
app: FastAPI = get_app()
|
||||
|
||||
|
||||
class CPUUsage(BaseModel):
|
||||
used: float = Field(..., description="REST API average CPU usage in percentage")
|
||||
|
||||
@validator("used")
|
||||
@classmethod
|
||||
def used_check(cls, v):
|
||||
return round(v, 2)
|
||||
|
||||
|
||||
class MemoryUsage(BaseModel):
|
||||
used: float = Field(..., description="REST API used memory in percentage")
|
||||
|
||||
@validator("used")
|
||||
@classmethod
|
||||
def used_check(cls, v):
|
||||
return round(v, 2)
|
||||
|
||||
|
||||
class GPUUsage(BaseModel):
|
||||
kernel_usage: float = Field(..., description="GPU kernel usage in percentage")
|
||||
memory_total: int = Field(..., description="Total GPU memory in megabytes")
|
||||
memory_used: Optional[int] = Field(..., description="REST API used GPU memory in megabytes")
|
||||
|
||||
@validator("kernel_usage")
|
||||
@classmethod
|
||||
def kernel_usage_check(cls, v):
|
||||
return round(v, 2)
|
||||
|
||||
|
||||
class GPUInfo(BaseModel):
|
||||
index: int = Field(..., description="GPU index")
|
||||
usage: GPUUsage = Field(..., description="GPU usage details")
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
version: str = Field(..., description="Haystack version")
|
||||
cpu: CPUUsage = Field(..., description="CPU usage details")
|
||||
memory: MemoryUsage = Field(..., description="Memory usage details")
|
||||
gpus: List[GPUInfo] = Field(default_factory=list, description="GPU usage details")
|
||||
|
||||
|
||||
@router.get("/health", response_model=HealthResponse, status_code=200)
|
||||
def get_health_status():
|
||||
"""
|
||||
This endpoint allows external systems to monitor the health of the Haystack REST API.
|
||||
"""
|
||||
|
||||
gpus: List[GPUInfo] = []
|
||||
|
||||
try:
|
||||
pynvml.nvmlInit()
|
||||
gpu_count = pynvml.nvmlDeviceGetCount()
|
||||
for i in range(gpu_count):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
||||
gpu_mem_total = float(info.total) / 1024 / 1024
|
||||
gpu_mem_used = None
|
||||
for proc in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
|
||||
if proc.pid == os.getpid():
|
||||
gpu_mem_used = float(proc.usedGpuMemory) / 1024 / 1024
|
||||
break
|
||||
gpu_info = GPUInfo(
|
||||
index=i,
|
||||
usage=GPUUsage(
|
||||
memory_total=round(gpu_mem_total),
|
||||
kernel_usage=pynvml.nvmlDeviceGetUtilizationRates(handle).gpu,
|
||||
memory_used=round(gpu_mem_used) if gpu_mem_used is not None else None,
|
||||
),
|
||||
)
|
||||
|
||||
gpus.append(gpu_info)
|
||||
except pynvml.NVMLError:
|
||||
logger.warning("No NVIDIA GPU found.")
|
||||
|
||||
p_cpu_usage = 0
|
||||
p_memory_usage = 0
|
||||
cpu_count = os.cpu_count() or 1
|
||||
p = psutil.Process()
|
||||
p_cpu_usage = p.cpu_percent() / cpu_count
|
||||
p_memory_usage = p.memory_percent()
|
||||
|
||||
cpu_usage = CPUUsage(used=p_cpu_usage)
|
||||
memory_usage = MemoryUsage(used=p_memory_usage)
|
||||
|
||||
return HealthResponse(version=haystack.__version__, cpu=cpu_usage, memory=memory_usage, gpus=gpus)
|
||||
@ -25,13 +25,14 @@ def get_app() -> FastAPI:
|
||||
app = FastAPI(title="Haystack REST API", debug=True, version=haystack_version, root_path=ROOT_PATH)
|
||||
|
||||
# Creates the router for the API calls
|
||||
from rest_api.controller import file_upload, search, feedback, document
|
||||
from rest_api.controller import file_upload, search, feedback, document, health
|
||||
|
||||
router = APIRouter()
|
||||
router.include_router(search.router, tags=["search"])
|
||||
router.include_router(feedback.router, tags=["feedback"])
|
||||
router.include_router(file_upload.router, tags=["file-upload"])
|
||||
router.include_router(document.router, tags=["document"])
|
||||
router.include_router(health.router, tags=["health"])
|
||||
|
||||
# This middleware enables allow all cross-domain requests to the API from a browser. For production
|
||||
# deployments, it could be made more restrictive.
|
||||
|
||||
@ -4,13 +4,15 @@ import os
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
from unittest import mock
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import MagicMock, Mock
|
||||
import functools
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from haystack import Document, Answer
|
||||
import haystack
|
||||
from haystack.nodes import BaseReader, BaseRetriever
|
||||
from haystack.document_stores import BaseDocumentStore
|
||||
from haystack.schema import Label
|
||||
@ -499,3 +501,36 @@ def test_get_feedback_malformed_query(client, feedback):
|
||||
feedback["unexpected_field"] = "misplaced-value"
|
||||
response = client.post(url="/feedback", json=feedback)
|
||||
assert response.status_code == 422
|
||||
|
||||
|
||||
def test_get_health_check(client):
|
||||
with mock.patch("rest_api.controller.health.os") as os:
|
||||
os.cpu_count.return_value = 4
|
||||
os.getpid.return_value = int(2345)
|
||||
with mock.patch("rest_api.controller.health.pynvml") as pynvml:
|
||||
pynvml.nvmlDeviceGetCount.return_value = 2
|
||||
pynvml.nvmlDeviceGetHandleByIndex.return_value = "device"
|
||||
pynvml.nvmlDeviceGetMemoryInfo.return_value = Mock(total=34359738368)
|
||||
pynvml.nvmlDeviceGetComputeRunningProcesses.return_value = [
|
||||
Mock(pid=int(1234), usedGpuMemory=4000000000),
|
||||
Mock(pid=int(2345), usedGpuMemory=2097152000),
|
||||
Mock(pid=int(3456), usedGpuMemory=2000000000),
|
||||
]
|
||||
pynvml.nvmlDeviceGetUtilizationRates.return_value = Mock(gpu=45)
|
||||
with mock.patch("rest_api.controller.health.psutil") as psutil:
|
||||
psutil.virtual_memory.return_value = Mock(total=34359738368)
|
||||
psutil.Process.return_value = Mock(
|
||||
cpu_percent=Mock(return_value=200), memory_percent=Mock(return_value=75)
|
||||
)
|
||||
|
||||
response = client.get(url="/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {
|
||||
"version": haystack.__version__,
|
||||
"cpu": {"used": 50.0},
|
||||
"memory": {"used": 75.0},
|
||||
"gpus": [
|
||||
{"index": 0, "usage": {"kernel_usage": 45.0, "memory_total": 32768.0, "memory_used": 2000}},
|
||||
{"index": 1, "usage": {"kernel_usage": 45.0, "memory_total": 32768.0, "memory_used": 2000}},
|
||||
],
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user