mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-12-30 00:13:15 +00:00
Merge branch 'main' into delete_doc
This commit is contained in:
commit
3abdc42549
@ -1,5 +1,5 @@
|
||||
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
||||
|
||||
__version__ = "1.3.8"
|
||||
__version__ = "1.3.9"
|
||||
__author__ = "Zirui Guo"
|
||||
__url__ = "https://github.com/HKUDS/LightRAG"
|
||||
|
||||
@ -1 +1 @@
|
||||
__api_version__ = "0172"
|
||||
__api_version__ = "0173"
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Any, Optional
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Type
|
||||
from lightrag.utils import logger
|
||||
import time
|
||||
import json
|
||||
import re
|
||||
@ -95,6 +95,68 @@ class OllamaTagResponse(BaseModel):
|
||||
models: List[OllamaModel]
|
||||
|
||||
|
||||
class OllamaRunningModelDetails(BaseModel):
|
||||
parent_model: str
|
||||
format: str
|
||||
family: str
|
||||
families: List[str]
|
||||
parameter_size: str
|
||||
quantization_level: str
|
||||
|
||||
|
||||
class OllamaRunningModel(BaseModel):
|
||||
name: str
|
||||
model: str
|
||||
size: int
|
||||
digest: str
|
||||
details: OllamaRunningModelDetails
|
||||
expires_at: str
|
||||
size_vram: int
|
||||
|
||||
|
||||
class OllamaPsResponse(BaseModel):
|
||||
models: List[OllamaRunningModel]
|
||||
|
||||
|
||||
async def parse_request_body(
|
||||
request: Request, model_class: Type[BaseModel]
|
||||
) -> BaseModel:
|
||||
"""
|
||||
Parse request body based on Content-Type header.
|
||||
Supports both application/json and application/octet-stream.
|
||||
|
||||
Args:
|
||||
request: The FastAPI Request object
|
||||
model_class: The Pydantic model class to parse the request into
|
||||
|
||||
Returns:
|
||||
An instance of the provided model_class
|
||||
"""
|
||||
content_type = request.headers.get("content-type", "").lower()
|
||||
|
||||
try:
|
||||
if content_type.startswith("application/json"):
|
||||
# FastAPI already handles JSON parsing for us
|
||||
body = await request.json()
|
||||
elif content_type.startswith("application/octet-stream"):
|
||||
# Manually parse octet-stream as JSON
|
||||
body_bytes = await request.body()
|
||||
body = json.loads(body_bytes.decode("utf-8"))
|
||||
else:
|
||||
# Try to parse as JSON for any other content type
|
||||
body_bytes = await request.body()
|
||||
body = json.loads(body_bytes.decode("utf-8"))
|
||||
|
||||
# Create an instance of the model
|
||||
return model_class(**body)
|
||||
except json.JSONDecodeError:
|
||||
raise HTTPException(status_code=400, detail="Invalid JSON in request body")
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Error parsing request body: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
def estimate_tokens(text: str) -> int:
|
||||
"""Estimate the number of tokens in text using tiktoken"""
|
||||
tokens = TiktokenTokenizer().encode(text)
|
||||
@ -197,13 +259,43 @@ class OllamaAPI:
|
||||
]
|
||||
)
|
||||
|
||||
@self.router.post("/generate", dependencies=[Depends(combined_auth)])
|
||||
async def generate(raw_request: Request, request: OllamaGenerateRequest):
|
||||
@self.router.get("/ps", dependencies=[Depends(combined_auth)])
|
||||
async def get_running_models():
|
||||
"""List Running Models - returns currently running models"""
|
||||
return OllamaPsResponse(
|
||||
models=[
|
||||
{
|
||||
"name": self.ollama_server_infos.LIGHTRAG_MODEL,
|
||||
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
|
||||
"size": self.ollama_server_infos.LIGHTRAG_SIZE,
|
||||
"digest": self.ollama_server_infos.LIGHTRAG_DIGEST,
|
||||
"details": {
|
||||
"parent_model": "",
|
||||
"format": "gguf",
|
||||
"family": "llama",
|
||||
"families": ["llama"],
|
||||
"parameter_size": "7.2B",
|
||||
"quantization_level": "Q4_0",
|
||||
},
|
||||
"expires_at": "2050-12-31T14:38:31.83753-07:00",
|
||||
"size_vram": self.ollama_server_infos.LIGHTRAG_SIZE,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
@self.router.post(
|
||||
"/generate", dependencies=[Depends(combined_auth)], include_in_schema=True
|
||||
)
|
||||
async def generate(raw_request: Request):
|
||||
"""Handle generate completion requests acting as an Ollama model
|
||||
For compatibility purpose, the request is not processed by LightRAG,
|
||||
and will be handled by underlying LLM model.
|
||||
Supports both application/json and application/octet-stream Content-Types.
|
||||
"""
|
||||
try:
|
||||
# Parse the request body manually
|
||||
request = await parse_request_body(raw_request, OllamaGenerateRequest)
|
||||
|
||||
query = request.prompt
|
||||
start_time = time.time_ns()
|
||||
prompt_tokens = estimate_tokens(query)
|
||||
@ -278,7 +370,7 @@ class OllamaAPI:
|
||||
else:
|
||||
error_msg = f"Provider error: {error_msg}"
|
||||
|
||||
logging.error(f"Stream error: {error_msg}")
|
||||
logger.error(f"Stream error: {error_msg}")
|
||||
|
||||
# Send error message to client
|
||||
error_data = {
|
||||
@ -363,13 +455,19 @@ class OllamaAPI:
|
||||
trace_exception(e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@self.router.post("/chat", dependencies=[Depends(combined_auth)])
|
||||
async def chat(raw_request: Request, request: OllamaChatRequest):
|
||||
@self.router.post(
|
||||
"/chat", dependencies=[Depends(combined_auth)], include_in_schema=True
|
||||
)
|
||||
async def chat(raw_request: Request):
|
||||
"""Process chat completion requests acting as an Ollama model
|
||||
Routes user queries through LightRAG by selecting query mode based on prefix indicators.
|
||||
Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM.
|
||||
Supports both application/json and application/octet-stream Content-Types.
|
||||
"""
|
||||
try:
|
||||
# Parse the request body manually
|
||||
request = await parse_request_body(raw_request, OllamaChatRequest)
|
||||
|
||||
# Get all messages
|
||||
messages = request.messages
|
||||
if not messages:
|
||||
@ -496,7 +594,7 @@ class OllamaAPI:
|
||||
else:
|
||||
error_msg = f"Provider error: {error_msg}"
|
||||
|
||||
logging.error(f"Stream error: {error_msg}")
|
||||
logger.error(f"Stream error: {error_msg}")
|
||||
|
||||
# Send error message to client
|
||||
error_data = {
|
||||
@ -530,6 +628,11 @@ class OllamaAPI:
|
||||
data = {
|
||||
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
|
||||
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"images": None,
|
||||
},
|
||||
"done": True,
|
||||
"total_duration": total_time,
|
||||
"load_duration": 0,
|
||||
|
||||
@ -1007,7 +1007,7 @@ class LightRAG:
|
||||
except Exception as e:
|
||||
# Log error and update pipeline status
|
||||
logger.error(traceback.format_exc())
|
||||
error_msg = f"Failed to extrat document {current_file_number}/{total_files}: {file_path}"
|
||||
error_msg = f"Failed to extract document {current_file_number}/{total_files}: {file_path}"
|
||||
logger.error(error_msg)
|
||||
async with pipeline_status_lock:
|
||||
pipeline_status["latest_message"] = error_msg
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user