diff --git a/lightrag/api/routers/ollama_api.py b/lightrag/api/routers/ollama_api.py index 8aeb21c8..25e0521f 100644 --- a/lightrag/api/routers/ollama_api.py +++ b/lightrag/api/routers/ollama_api.py @@ -95,6 +95,29 @@ class OllamaTagResponse(BaseModel): models: List[OllamaModel] +class OllamaRunningModelDetails(BaseModel): + parent_model: str + format: str + family: str + families: List[str] + parameter_size: str + quantization_level: str + + +class OllamaRunningModel(BaseModel): + name: str + model: str + size: int + digest: str + details: OllamaRunningModelDetails + expires_at: str + size_vram: int + + +class OllamaPsResponse(BaseModel): + models: List[OllamaRunningModel] + + async def parse_request_body(request: Request, model_class: Type[BaseModel]) -> BaseModel: """ Parse request body based on Content-Type header. @@ -237,6 +260,32 @@ class OllamaAPI: } ] ) + + @self.router.get("/ps", dependencies=[Depends(combined_auth)]) + async def get_running_models(): + """List Running Models - returns currently running models""" + return OllamaPsResponse( + models=[ + { + "name": self.ollama_server_infos.LIGHTRAG_MODEL, + "model": self.ollama_server_infos.LIGHTRAG_MODEL, + "size": self.ollama_server_infos.LIGHTRAG_SIZE, + "digest": self.ollama_server_infos.LIGHTRAG_DIGEST, + "details": { + "parent_model": "", + "format": "gguf", + "family": "llama", + "families": [ + "llama" + ], + "parameter_size": "7.2B", + "quantization_level": "Q4_0" + }, + "expires_at": "2050-12-31T14:38:31.83753-07:00", + "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE + } + ] + ) @self.router.post("/generate", dependencies=[Depends(combined_auth)], include_in_schema=True) async def generate(raw_request: Request):