mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-07-04 23:50:29 +00:00
222 lines
6.8 KiB
Python
222 lines
6.8 KiB
Python
![]() |
"""
|
||
|
LoLLMs (Lord of Large Language Models) Interface Module
|
||
|
=====================================================
|
||
|
|
||
|
This module provides the official interface for interacting with LoLLMs (Lord of Large Language and multimodal Systems),
|
||
|
a unified framework for AI model interaction and deployment.
|
||
|
|
||
|
LoLLMs is designed as a "one tool to rule them all" solution, providing seamless integration
|
||
|
with various AI models while maintaining high performance and user-friendly interfaces.
|
||
|
|
||
|
Author: ParisNeo
|
||
|
Created: 2024-01-24
|
||
|
License: Apache 2.0
|
||
|
|
||
|
Copyright (c) 2024 ParisNeo
|
||
|
|
||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
you may not use this file except in compliance with the License.
|
||
|
You may obtain a copy of the License at
|
||
|
|
||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
Unless required by applicable law or agreed to in writing, software
|
||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
See the License for the specific language governing permissions and
|
||
|
limitations under the License.
|
||
|
|
||
|
Version: 2.0.0
|
||
|
|
||
|
Change Log:
|
||
|
- 2.0.0 (2024-01-24):
|
||
|
* Added async support for model inference
|
||
|
* Implemented streaming capabilities
|
||
|
* Added embedding generation functionality
|
||
|
* Enhanced parameter handling
|
||
|
* Improved error handling and timeout management
|
||
|
|
||
|
Dependencies:
|
||
|
- aiohttp
|
||
|
- numpy
|
||
|
- Python >= 3.10
|
||
|
|
||
|
Features:
|
||
|
- Async text generation with streaming support
|
||
|
- Embedding generation
|
||
|
- Configurable model parameters
|
||
|
- System prompt and chat history support
|
||
|
- Timeout handling
|
||
|
- API key authentication
|
||
|
|
||
|
Usage:
|
||
|
from llm_interfaces.lollms import lollms_model_complete, lollms_embed
|
||
|
|
||
|
Project Repository: https://github.com/ParisNeo/lollms
|
||
|
Documentation: https://github.com/ParisNeo/lollms/docs
|
||
|
"""
|
||
|
|
||
|
__version__ = "1.0.0"
|
||
|
__author__ = "ParisNeo"
|
||
|
__status__ = "Production"
|
||
|
__project_url__ = "https://github.com/ParisNeo/lollms"
|
||
|
__doc_url__ = "https://github.com/ParisNeo/lollms/docs"
|
||
|
import sys
|
||
|
if sys.version_info < (3, 9):
|
||
|
from typing import AsyncIterator
|
||
|
else:
|
||
|
from collections.abc import AsyncIterator
|
||
|
import pipmaster as pm # Pipmaster for dynamic library install
|
||
|
if not pm.is_installed("aiohttp"):
|
||
|
pm.install("aiohttp")
|
||
|
if not pm.is_installed("tenacity"):
|
||
|
pm.install("tenacity")
|
||
|
|
||
|
import aiohttp
|
||
|
from tenacity import (
|
||
|
retry,
|
||
|
stop_after_attempt,
|
||
|
wait_exponential,
|
||
|
retry_if_exception_type,
|
||
|
)
|
||
|
|
||
|
from lightrag.exceptions import (
|
||
|
APIConnectionError,
|
||
|
RateLimitError,
|
||
|
APITimeoutError,
|
||
|
)
|
||
|
|
||
|
from typing import Union, List
|
||
|
import numpy as np
|
||
|
|
||
|
@retry(
|
||
|
stop=stop_after_attempt(3),
|
||
|
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||
|
retry=retry_if_exception_type(
|
||
|
(RateLimitError, APIConnectionError, APITimeoutError)
|
||
|
),
|
||
|
)
|
||
|
async def lollms_model_if_cache(
|
||
|
model,
|
||
|
prompt,
|
||
|
system_prompt=None,
|
||
|
history_messages=[],
|
||
|
base_url="http://localhost:9600",
|
||
|
**kwargs,
|
||
|
) -> Union[str, AsyncIterator[str]]:
|
||
|
"""Client implementation for lollms generation."""
|
||
|
|
||
|
stream = True if kwargs.get("stream") else False
|
||
|
api_key = kwargs.pop("api_key", None)
|
||
|
headers = (
|
||
|
{"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
||
|
if api_key
|
||
|
else {"Content-Type": "application/json"}
|
||
|
)
|
||
|
|
||
|
# Extract lollms specific parameters
|
||
|
request_data = {
|
||
|
"prompt": prompt,
|
||
|
"model_name": model,
|
||
|
"personality": kwargs.get("personality", -1),
|
||
|
"n_predict": kwargs.get("n_predict", None),
|
||
|
"stream": stream,
|
||
|
"temperature": kwargs.get("temperature", 0.1),
|
||
|
"top_k": kwargs.get("top_k", 50),
|
||
|
"top_p": kwargs.get("top_p", 0.95),
|
||
|
"repeat_penalty": kwargs.get("repeat_penalty", 0.8),
|
||
|
"repeat_last_n": kwargs.get("repeat_last_n", 40),
|
||
|
"seed": kwargs.get("seed", None),
|
||
|
"n_threads": kwargs.get("n_threads", 8),
|
||
|
}
|
||
|
|
||
|
# Prepare the full prompt including history
|
||
|
full_prompt = ""
|
||
|
if system_prompt:
|
||
|
full_prompt += f"{system_prompt}\n"
|
||
|
for msg in history_messages:
|
||
|
full_prompt += f"{msg['role']}: {msg['content']}\n"
|
||
|
full_prompt += prompt
|
||
|
|
||
|
request_data["prompt"] = full_prompt
|
||
|
timeout = aiohttp.ClientTimeout(total=kwargs.get("timeout", None))
|
||
|
|
||
|
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
|
||
|
if stream:
|
||
|
|
||
|
async def inner():
|
||
|
async with session.post(
|
||
|
f"{base_url}/lollms_generate", json=request_data
|
||
|
) as response:
|
||
|
async for line in response.content:
|
||
|
yield line.decode().strip()
|
||
|
|
||
|
return inner()
|
||
|
else:
|
||
|
async with session.post(
|
||
|
f"{base_url}/lollms_generate", json=request_data
|
||
|
) as response:
|
||
|
return await response.text()
|
||
|
|
||
|
|
||
|
async def lollms_model_complete(
|
||
|
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
|
||
|
) -> Union[str, AsyncIterator[str]]:
|
||
|
"""Complete function for lollms model generation."""
|
||
|
|
||
|
# Extract and remove keyword_extraction from kwargs if present
|
||
|
keyword_extraction = kwargs.pop("keyword_extraction", None)
|
||
|
|
||
|
# Get model name from config
|
||
|
model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
|
||
|
|
||
|
# If keyword extraction is needed, we might need to modify the prompt
|
||
|
# or add specific parameters for JSON output (if lollms supports it)
|
||
|
if keyword_extraction:
|
||
|
# Note: You might need to adjust this based on how lollms handles structured output
|
||
|
pass
|
||
|
|
||
|
return await lollms_model_if_cache(
|
||
|
model_name,
|
||
|
prompt,
|
||
|
system_prompt=system_prompt,
|
||
|
history_messages=history_messages,
|
||
|
**kwargs,
|
||
|
)
|
||
|
|
||
|
|
||
|
|
||
|
async def lollms_embed(
|
||
|
texts: List[str], embed_model=None, base_url="http://localhost:9600", **kwargs
|
||
|
) -> np.ndarray:
|
||
|
"""
|
||
|
Generate embeddings for a list of texts using lollms server.
|
||
|
|
||
|
Args:
|
||
|
texts: List of strings to embed
|
||
|
embed_model: Model name (not used directly as lollms uses configured vectorizer)
|
||
|
base_url: URL of the lollms server
|
||
|
**kwargs: Additional arguments passed to the request
|
||
|
|
||
|
Returns:
|
||
|
np.ndarray: Array of embeddings
|
||
|
"""
|
||
|
api_key = kwargs.pop("api_key", None)
|
||
|
headers = (
|
||
|
{"Content-Type": "application/json", "Authorization": api_key}
|
||
|
if api_key
|
||
|
else {"Content-Type": "application/json"}
|
||
|
)
|
||
|
async with aiohttp.ClientSession(headers=headers) as session:
|
||
|
embeddings = []
|
||
|
for text in texts:
|
||
|
request_data = {"text": text}
|
||
|
|
||
|
async with session.post(
|
||
|
f"{base_url}/lollms_embed",
|
||
|
json=request_data,
|
||
|
) as response:
|
||
|
result = await response.json()
|
||
|
embeddings.append(result["vector"])
|
||
|
|
||
|
return np.array(embeddings)
|