feat(api): add /ps endpoint to list running models for Ollama API

2025-11-03 11:20:13 +00:00 · 2025-06-11 16:23:02 +08:00 · 2025-06-11 16:23:02 +08:00 · c3f5c413fa
commit c3f5c413fa
parent 9351b09cc7
1 changed files with 49 additions and 0 deletions
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@ -95,6 +95,29 @@ class OllamaTagResponse(BaseModel):
    models: List[OllamaModel]


+class OllamaRunningModelDetails(BaseModel):
+    parent_model: str
+    format: str
+    family: str
+    families: List[str]
+    parameter_size: str
+    quantization_level: str
+
+
+class OllamaRunningModel(BaseModel):
+    name: str
+    model: str
+    size: int
+    digest: str
+    details: OllamaRunningModelDetails
+    expires_at: str
+    size_vram: int
+
+
+class OllamaPsResponse(BaseModel):
+    models: List[OllamaRunningModel]
+
+
 async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel:
    """
    Parse request body based on Content-Type header.
@ -237,6 +260,32 @@ class OllamaAPI:
                    }
                ]
            )
+            
+        @self.router.get("/ps", dependencies=[Depends(combined_auth)])
+        async def get_running_models():
+            """List Running Models - returns currently running models"""
+            return OllamaPsResponse(
+                models=[
+                    {
+                        "name": self.ollama_server_infos.LIGHTRAG_MODEL,
+                        "model": self.ollama_server_infos.LIGHTRAG_MODEL,
+                        "size": self.ollama_server_infos.LIGHTRAG_SIZE,
+                        "digest": self.ollama_server_infos.LIGHTRAG_DIGEST,
+                        "details": {
+                            "parent_model": "",
+                            "format": "gguf",
+                            "family": "llama",
+                            "families": [
+                                "llama"
+                            ],
+                            "parameter_size": "7.2B",
+                            "quantization_level": "Q4_0"
+                        },
+                        "expires_at": "2050-12-31T14:38:31.83753-07:00",
+                        "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE
+                    }
+                ]
+            )

        @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True)
        async def generate(raw_request: Request):