feat(api): add /ps endpoint to list running models for Ollama API

2025-11-05 20:34:04 +00:00 · 2025-06-11 16:23:02 +08:00 · 2025-06-11 16:23:02 +08:00 · c3f5c413fa
commit c3f5c413fa
parent 9351b09cc7
1 changed files with 49 additions and 0 deletions
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@ -95,6 +95,29 @@ class OllamaTagResponse(BaseModel):
    models: List[OllamaModel]
 class OllamaRunningModelDetails(BaseModel):
    parent_model: str
    format: str
    family: str
    families: List[str]
    parameter_size: str
    quantization_level: str
 class OllamaRunningModel(BaseModel):
    name: str
    model: str
    size: int
    digest: str
    details: OllamaRunningModelDetails
    expires_at: str
    size_vram: int
 class OllamaPsResponse(BaseModel):
    models: List[OllamaRunningModel]
 async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel:
    """
    Parse request body based on Content-Type header.
@ -238,6 +261,32 @@ class OllamaAPI:
                ]
            )
        @self.router.get("/ps", dependencies=[Depends(combined_auth)])
        async def get_running_models():
            """List Running Models - returns currently running models"""
            return OllamaPsResponse(
                models=[
                    {
                        "name": self.ollama_server_infos.LIGHTRAG_MODEL,
                        "model": self.ollama_server_infos.LIGHTRAG_MODEL,
                        "size": self.ollama_server_infos.LIGHTRAG_SIZE,
                        "digest": self.ollama_server_infos.LIGHTRAG_DIGEST,
                        "details": {
                            "parent_model": "",
                            "format": "gguf",
                            "family": "llama",
                            "families": [
                                "llama"
                            ],
                            "parameter_size": "7.2B",
                            "quantization_level": "Q4_0"
                        },
                        "expires_at": "2050-12-31T14:38:31.83753-07:00",
                        "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE
                    }
                ]
            )
        @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True)
        async def generate(raw_request: Request):
            """Handle generate completion requests acting as an Ollama model