feat: add health check endpoint to rest api (#3168)

* feat: add /health endpoint to rest api

* refactor: adjust to new dir structure

* fix: add new rest api dependency

* docs: add new openapi schema

* docs: manual black run

* refactor: remove some sys-wide details

* docs: minor description changes

* docs: minor description changes

* docs: generate openapi schemas

* tests: improved tests

* refactor: add cls method decorator
This commit is contained in:
Daniel Bichuetti 2022-09-08 13:24:16 -03:00 committed by GitHub
parent e0d73f3ae0
commit 1a6cbca9b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 438 additions and 2 deletions

View File

@ -398,6 +398,28 @@
}
}
}
},
"/health": {
"get": {
"tags": [
"health"
],
"summary": "Get Health Status",
"description": "This endpoint allows external systems to monitor the health of the Haystack REST API.",
"operationId": "get_health_status",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HealthResponse"
}
}
}
}
}
}
}
},
"components": {
@ -511,6 +533,20 @@
}
}
},
"CPUUsage": {
"title": "CPUUsage",
"required": [
"used"
],
"type": "object",
"properties": {
"used": {
"title": "Used",
"type": "number",
"description": "REST API average CPU usage in percentage"
}
}
},
"CreateLabelSerialized": {
"title": "CreateLabelSerialized",
"required": [
@ -693,6 +729,56 @@
},
"additionalProperties": false
},
"GPUInfo": {
"title": "GPUInfo",
"required": [
"index",
"usage"
],
"type": "object",
"properties": {
"index": {
"title": "Index",
"type": "integer",
"description": "GPU index"
},
"usage": {
"title": "Usage",
"allOf": [
{
"$ref": "#/components/schemas/GPUUsage"
}
],
"description": "GPU usage details"
}
}
},
"GPUUsage": {
"title": "GPUUsage",
"required": [
"kernel_usage",
"memory_total",
"memory_used"
],
"type": "object",
"properties": {
"kernel_usage": {
"title": "Kernel Usage",
"type": "number",
"description": "GPU kernel usage in percentage"
},
"memory_total": {
"title": "Memory Total",
"type": "integer",
"description": "Total GPU memory in megabytes"
},
"memory_used": {
"title": "Memory Used",
"type": "integer",
"description": "REST API used GPU memory in megabytes"
}
}
},
"HTTPValidationError": {
"title": "HTTPValidationError",
"type": "object",
@ -706,6 +792,48 @@
}
}
},
"HealthResponse": {
"title": "HealthResponse",
"required": [
"version",
"cpu",
"memory"
],
"type": "object",
"properties": {
"version": {
"title": "Version",
"type": "string",
"description": "Haystack version"
},
"cpu": {
"title": "Cpu",
"allOf": [
{
"$ref": "#/components/schemas/CPUUsage"
}
],
"description": "CPU usage details"
},
"memory": {
"title": "Memory",
"allOf": [
{
"$ref": "#/components/schemas/MemoryUsage"
}
],
"description": "Memory usage details"
},
"gpus": {
"title": "Gpus",
"type": "array",
"items": {
"$ref": "#/components/schemas/GPUInfo"
},
"description": "GPU usage details"
}
}
},
"Label": {
"title": "Label",
"required": [
@ -774,6 +902,20 @@
}
}
},
"MemoryUsage": {
"title": "MemoryUsage",
"required": [
"used"
],
"type": "object",
"properties": {
"used": {
"title": "Used",
"type": "number",
"description": "REST API used memory in percentage"
}
}
},
"QueryRequest": {
"title": "QueryRequest",
"required": [

View File

@ -398,6 +398,28 @@
}
}
}
},
"/health": {
"get": {
"tags": [
"health"
],
"summary": "Get Health Status",
"description": "This endpoint allows external systems to monitor the health of the Haystack REST API.",
"operationId": "get_health_status",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HealthResponse"
}
}
}
}
}
}
}
},
"components": {
@ -511,6 +533,20 @@
}
}
},
"CPUUsage": {
"title": "CPUUsage",
"required": [
"used"
],
"type": "object",
"properties": {
"used": {
"title": "Used",
"type": "number",
"description": "REST API average CPU usage in percentage"
}
}
},
"CreateLabelSerialized": {
"title": "CreateLabelSerialized",
"required": [
@ -693,6 +729,56 @@
},
"additionalProperties": false
},
"GPUInfo": {
"title": "GPUInfo",
"required": [
"index",
"usage"
],
"type": "object",
"properties": {
"index": {
"title": "Index",
"type": "integer",
"description": "GPU index"
},
"usage": {
"title": "Usage",
"allOf": [
{
"$ref": "#/components/schemas/GPUUsage"
}
],
"description": "GPU usage details"
}
}
},
"GPUUsage": {
"title": "GPUUsage",
"required": [
"kernel_usage",
"memory_total",
"memory_used"
],
"type": "object",
"properties": {
"kernel_usage": {
"title": "Kernel Usage",
"type": "number",
"description": "GPU kernel usage in percentage"
},
"memory_total": {
"title": "Memory Total",
"type": "integer",
"description": "Total GPU memory in megabytes"
},
"memory_used": {
"title": "Memory Used",
"type": "integer",
"description": "REST API used GPU memory in megabytes"
}
}
},
"HTTPValidationError": {
"title": "HTTPValidationError",
"type": "object",
@ -706,6 +792,48 @@
}
}
},
"HealthResponse": {
"title": "HealthResponse",
"required": [
"version",
"cpu",
"memory"
],
"type": "object",
"properties": {
"version": {
"title": "Version",
"type": "string",
"description": "Haystack version"
},
"cpu": {
"title": "Cpu",
"allOf": [
{
"$ref": "#/components/schemas/CPUUsage"
}
],
"description": "CPU usage details"
},
"memory": {
"title": "Memory",
"allOf": [
{
"$ref": "#/components/schemas/MemoryUsage"
}
],
"description": "Memory usage details"
},
"gpus": {
"title": "Gpus",
"type": "array",
"items": {
"$ref": "#/components/schemas/GPUInfo"
},
"description": "GPU usage details"
}
}
},
"Label": {
"title": "Label",
"required": [
@ -774,6 +902,20 @@
}
}
},
"MemoryUsage": {
"title": "MemoryUsage",
"required": [
"used"
],
"type": "object",
"properties": {
"used": {
"title": "Used",
"type": "number",
"description": "REST API used memory in percentage"
}
}
},
"QueryRequest": {
"title": "QueryRequest",
"required": [

View File

@ -30,6 +30,8 @@ dependencies = [
"uvicorn<1",
"gunicorn<21",
"python-multipart<1", # optional FastAPI dependency for form data
"pynvml",
"psutil"
]
dynamic = ["version"]
@ -69,3 +71,7 @@ exclude_lines = [
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]
[tool.black]
line-length = 120
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.

View File

@ -0,0 +1,110 @@
from typing import List, Optional
import logging
import os
import pynvml
import psutil
from pydantic import BaseModel, Field, validator
from fastapi import FastAPI, APIRouter
import haystack
from rest_api.utils import get_app
from rest_api.config import LOG_LEVEL
logging.getLogger("haystack").setLevel(LOG_LEVEL)
logger = logging.getLogger("haystack")
router = APIRouter()
app: FastAPI = get_app()
class CPUUsage(BaseModel):
used: float = Field(..., description="REST API average CPU usage in percentage")
@validator("used")
@classmethod
def used_check(cls, v):
return round(v, 2)
class MemoryUsage(BaseModel):
used: float = Field(..., description="REST API used memory in percentage")
@validator("used")
@classmethod
def used_check(cls, v):
return round(v, 2)
class GPUUsage(BaseModel):
kernel_usage: float = Field(..., description="GPU kernel usage in percentage")
memory_total: int = Field(..., description="Total GPU memory in megabytes")
memory_used: Optional[int] = Field(..., description="REST API used GPU memory in megabytes")
@validator("kernel_usage")
@classmethod
def kernel_usage_check(cls, v):
return round(v, 2)
class GPUInfo(BaseModel):
index: int = Field(..., description="GPU index")
usage: GPUUsage = Field(..., description="GPU usage details")
class HealthResponse(BaseModel):
version: str = Field(..., description="Haystack version")
cpu: CPUUsage = Field(..., description="CPU usage details")
memory: MemoryUsage = Field(..., description="Memory usage details")
gpus: List[GPUInfo] = Field(default_factory=list, description="GPU usage details")
@router.get("/health", response_model=HealthResponse, status_code=200)
def get_health_status():
"""
This endpoint allows external systems to monitor the health of the Haystack REST API.
"""
gpus: List[GPUInfo] = []
try:
pynvml.nvmlInit()
gpu_count = pynvml.nvmlDeviceGetCount()
for i in range(gpu_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem_total = float(info.total) / 1024 / 1024
gpu_mem_used = None
for proc in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
if proc.pid == os.getpid():
gpu_mem_used = float(proc.usedGpuMemory) / 1024 / 1024
break
gpu_info = GPUInfo(
index=i,
usage=GPUUsage(
memory_total=round(gpu_mem_total),
kernel_usage=pynvml.nvmlDeviceGetUtilizationRates(handle).gpu,
memory_used=round(gpu_mem_used) if gpu_mem_used is not None else None,
),
)
gpus.append(gpu_info)
except pynvml.NVMLError:
logger.warning("No NVIDIA GPU found.")
p_cpu_usage = 0
p_memory_usage = 0
cpu_count = os.cpu_count() or 1
p = psutil.Process()
p_cpu_usage = p.cpu_percent() / cpu_count
p_memory_usage = p.memory_percent()
cpu_usage = CPUUsage(used=p_cpu_usage)
memory_usage = MemoryUsage(used=p_memory_usage)
return HealthResponse(version=haystack.__version__, cpu=cpu_usage, memory=memory_usage, gpus=gpus)

View File

@ -25,13 +25,14 @@ def get_app() -> FastAPI:
app = FastAPI(title="Haystack REST API", debug=True, version=haystack_version, root_path=ROOT_PATH)
# Creates the router for the API calls
from rest_api.controller import file_upload, search, feedback, document
from rest_api.controller import file_upload, search, feedback, document, health
router = APIRouter()
router.include_router(search.router, tags=["search"])
router.include_router(feedback.router, tags=["feedback"])
router.include_router(file_upload.router, tags=["file-upload"])
router.include_router(document.router, tags=["document"])
router.include_router(health.router, tags=["health"])
# This middleware enables allow all cross-domain requests to the API from a browser. For production
# deployments, it could be made more restrictive.

View File

@ -4,13 +4,15 @@ import os
from pathlib import Path
from textwrap import dedent
from unittest import mock
from unittest.mock import MagicMock
from unittest.mock import MagicMock, Mock
import functools
import numpy as np
import pandas as pd
import pytest
from fastapi.testclient import TestClient
from haystack import Document, Answer
import haystack
from haystack.nodes import BaseReader, BaseRetriever
from haystack.document_stores import BaseDocumentStore
from haystack.schema import Label
@ -499,3 +501,36 @@ def test_get_feedback_malformed_query(client, feedback):
feedback["unexpected_field"] = "misplaced-value"
response = client.post(url="/feedback", json=feedback)
assert response.status_code == 422
def test_get_health_check(client):
with mock.patch("rest_api.controller.health.os") as os:
os.cpu_count.return_value = 4
os.getpid.return_value = int(2345)
with mock.patch("rest_api.controller.health.pynvml") as pynvml:
pynvml.nvmlDeviceGetCount.return_value = 2
pynvml.nvmlDeviceGetHandleByIndex.return_value = "device"
pynvml.nvmlDeviceGetMemoryInfo.return_value = Mock(total=34359738368)
pynvml.nvmlDeviceGetComputeRunningProcesses.return_value = [
Mock(pid=int(1234), usedGpuMemory=4000000000),
Mock(pid=int(2345), usedGpuMemory=2097152000),
Mock(pid=int(3456), usedGpuMemory=2000000000),
]
pynvml.nvmlDeviceGetUtilizationRates.return_value = Mock(gpu=45)
with mock.patch("rest_api.controller.health.psutil") as psutil:
psutil.virtual_memory.return_value = Mock(total=34359738368)
psutil.Process.return_value = Mock(
cpu_percent=Mock(return_value=200), memory_percent=Mock(return_value=75)
)
response = client.get(url="/health")
assert response.status_code == 200
assert response.json() == {
"version": haystack.__version__,
"cpu": {"used": 50.0},
"memory": {"used": 75.0},
"gpus": [
{"index": 0, "usage": {"kernel_usage": 45.0, "memory_total": 32768.0, "memory_used": 2000}},
{"index": 1, "usage": {"kernel_usage": 45.0, "memory_total": 32768.0, "memory_used": 2000}},
],
}