Merge branch 'main' into delete_doc

This commit is contained in:
zrguo 2025-06-16 17:02:21 +08:00
commit 3abdc42549
4 changed files with 114 additions and 11 deletions

View File

@ -1,5 +1,5 @@
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
__version__ = "1.3.8"
__version__ = "1.3.9"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG"

View File

@ -1 +1 @@
__api_version__ = "0172"
__api_version__ = "0173"

View File

@ -1,7 +1,7 @@
from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel
from typing import List, Dict, Any, Optional
import logging
from typing import List, Dict, Any, Optional, Type
from lightrag.utils import logger
import time
import json
import re
@ -95,6 +95,68 @@ class OllamaTagResponse(BaseModel):
models: List[OllamaModel]
class OllamaRunningModelDetails(BaseModel):
parent_model: str
format: str
family: str
families: List[str]
parameter_size: str
quantization_level: str
class OllamaRunningModel(BaseModel):
name: str
model: str
size: int
digest: str
details: OllamaRunningModelDetails
expires_at: str
size_vram: int
class OllamaPsResponse(BaseModel):
models: List[OllamaRunningModel]
async def parse_request_body(
request: Request, model_class: Type[BaseModel]
) -> BaseModel:
"""
Parse request body based on Content-Type header.
Supports both application/json and application/octet-stream.
Args:
request: The FastAPI Request object
model_class: The Pydantic model class to parse the request into
Returns:
An instance of the provided model_class
"""
content_type = request.headers.get("content-type", "").lower()
try:
if content_type.startswith("application/json"):
# FastAPI already handles JSON parsing for us
body = await request.json()
elif content_type.startswith("application/octet-stream"):
# Manually parse octet-stream as JSON
body_bytes = await request.body()
body = json.loads(body_bytes.decode("utf-8"))
else:
# Try to parse as JSON for any other content type
body_bytes = await request.body()
body = json.loads(body_bytes.decode("utf-8"))
# Create an instance of the model
return model_class(**body)
except json.JSONDecodeError:
raise HTTPException(status_code=400, detail="Invalid JSON in request body")
except Exception as e:
raise HTTPException(
status_code=400, detail=f"Error parsing request body: {str(e)}"
)
def estimate_tokens(text: str) -> int:
"""Estimate the number of tokens in text using tiktoken"""
tokens = TiktokenTokenizer().encode(text)
@ -197,13 +259,43 @@ class OllamaAPI:
]
)
@self.router.post("/generate", dependencies=[Depends(combined_auth)])
async def generate(raw_request: Request, request: OllamaGenerateRequest):
@self.router.get("/ps", dependencies=[Depends(combined_auth)])
async def get_running_models():
"""List Running Models - returns currently running models"""
return OllamaPsResponse(
models=[
{
"name": self.ollama_server_infos.LIGHTRAG_MODEL,
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"size": self.ollama_server_infos.LIGHTRAG_SIZE,
"digest": self.ollama_server_infos.LIGHTRAG_DIGEST,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": ["llama"],
"parameter_size": "7.2B",
"quantization_level": "Q4_0",
},
"expires_at": "2050-12-31T14:38:31.83753-07:00",
"size_vram": self.ollama_server_infos.LIGHTRAG_SIZE,
}
]
)
@self.router.post(
"/generate", dependencies=[Depends(combined_auth)], include_in_schema=True
)
async def generate(raw_request: Request):
"""Handle generate completion requests acting as an Ollama model
For compatibility purpose, the request is not processed by LightRAG,
and will be handled by underlying LLM model.
Supports both application/json and application/octet-stream Content-Types.
"""
try:
# Parse the request body manually
request = await parse_request_body(raw_request, OllamaGenerateRequest)
query = request.prompt
start_time = time.time_ns()
prompt_tokens = estimate_tokens(query)
@ -278,7 +370,7 @@ class OllamaAPI:
else:
error_msg = f"Provider error: {error_msg}"
logging.error(f"Stream error: {error_msg}")
logger.error(f"Stream error: {error_msg}")
# Send error message to client
error_data = {
@ -363,13 +455,19 @@ class OllamaAPI:
trace_exception(e)
raise HTTPException(status_code=500, detail=str(e))
@self.router.post("/chat", dependencies=[Depends(combined_auth)])
async def chat(raw_request: Request, request: OllamaChatRequest):
@self.router.post(
"/chat", dependencies=[Depends(combined_auth)], include_in_schema=True
)
async def chat(raw_request: Request):
"""Process chat completion requests acting as an Ollama model
Routes user queries through LightRAG by selecting query mode based on prefix indicators.
Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM.
Supports both application/json and application/octet-stream Content-Types.
"""
try:
# Parse the request body manually
request = await parse_request_body(raw_request, OllamaChatRequest)
# Get all messages
messages = request.messages
if not messages:
@ -496,7 +594,7 @@ class OllamaAPI:
else:
error_msg = f"Provider error: {error_msg}"
logging.error(f"Stream error: {error_msg}")
logger.error(f"Stream error: {error_msg}")
# Send error message to client
error_data = {
@ -530,6 +628,11 @@ class OllamaAPI:
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": "",
"images": None,
},
"done": True,
"total_duration": total_time,
"load_duration": 0,

View File

@ -1007,7 +1007,7 @@ class LightRAG:
except Exception as e:
# Log error and update pipeline status
logger.error(traceback.format_exc())
error_msg = f"Failed to extrat document {current_file_number}/{total_files}: {file_path}"
error_msg = f"Failed to extract document {current_file_number}/{total_files}: {file_path}"
logger.error(error_msg)
async with pipeline_status_lock:
pipeline_status["latest_message"] = error_msg