mirror of
				https://github.com/HKUDS/LightRAG.git
				synced 2025-10-31 01:39:56 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			225 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			225 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
| LoLLMs (Lord of Large Language Models) Interface Module
 | |
| =====================================================
 | |
| 
 | |
| This module provides the official interface for interacting with LoLLMs (Lord of Large Language and multimodal Systems),
 | |
| a unified framework for AI model interaction and deployment.
 | |
| 
 | |
| LoLLMs is designed as a "one tool to rule them all" solution, providing seamless integration
 | |
| with various AI models while maintaining high performance and user-friendly interfaces.
 | |
| 
 | |
| Author: ParisNeo
 | |
| Created: 2024-01-24
 | |
| License: Apache 2.0
 | |
| 
 | |
| Copyright (c) 2024 ParisNeo
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License.
 | |
| 
 | |
| Version: 2.0.0
 | |
| 
 | |
| Change Log:
 | |
| - 2.0.0 (2024-01-24):
 | |
|     * Added async support for model inference
 | |
|     * Implemented streaming capabilities
 | |
|     * Added embedding generation functionality
 | |
|     * Enhanced parameter handling
 | |
|     * Improved error handling and timeout management
 | |
| 
 | |
| Dependencies:
 | |
|     - aiohttp
 | |
|     - numpy
 | |
|     - Python >= 3.10
 | |
| 
 | |
| Features:
 | |
|     - Async text generation with streaming support
 | |
|     - Embedding generation
 | |
|     - Configurable model parameters
 | |
|     - System prompt and chat history support
 | |
|     - Timeout handling
 | |
|     - API key authentication
 | |
| 
 | |
| Usage:
 | |
|     from llm_interfaces.lollms import lollms_model_complete, lollms_embed
 | |
| 
 | |
| Project Repository: https://github.com/ParisNeo/lollms
 | |
| Documentation: https://github.com/ParisNeo/lollms/docs
 | |
| """
 | |
| 
 | |
| __version__ = "1.0.0"
 | |
| __author__ = "ParisNeo"
 | |
| __status__ = "Production"
 | |
| __project_url__ = "https://github.com/ParisNeo/lollms"
 | |
| __doc_url__ = "https://github.com/ParisNeo/lollms/docs"
 | |
| import sys
 | |
| 
 | |
| if sys.version_info < (3, 9):
 | |
|     from typing import AsyncIterator
 | |
| else:
 | |
|     from collections.abc import AsyncIterator
 | |
| import pipmaster as pm  # Pipmaster for dynamic library install
 | |
| 
 | |
| if not pm.is_installed("aiohttp"):
 | |
|     pm.install("aiohttp")
 | |
| if not pm.is_installed("tenacity"):
 | |
|     pm.install("tenacity")
 | |
| 
 | |
| import aiohttp
 | |
| from tenacity import (
 | |
|     retry,
 | |
|     stop_after_attempt,
 | |
|     wait_exponential,
 | |
|     retry_if_exception_type,
 | |
| )
 | |
| 
 | |
| from lightrag.exceptions import (
 | |
|     APIConnectionError,
 | |
|     RateLimitError,
 | |
|     APITimeoutError,
 | |
| )
 | |
| 
 | |
| from typing import Union, List
 | |
| import numpy as np
 | |
| 
 | |
| 
 | |
| @retry(
 | |
|     stop=stop_after_attempt(3),
 | |
|     wait=wait_exponential(multiplier=1, min=4, max=10),
 | |
|     retry=retry_if_exception_type(
 | |
|         (RateLimitError, APIConnectionError, APITimeoutError)
 | |
|     ),
 | |
| )
 | |
| async def lollms_model_if_cache(
 | |
|     model,
 | |
|     prompt,
 | |
|     system_prompt=None,
 | |
|     history_messages=[],
 | |
|     base_url="http://localhost:9600",
 | |
|     **kwargs,
 | |
| ) -> Union[str, AsyncIterator[str]]:
 | |
|     """Client implementation for lollms generation."""
 | |
| 
 | |
|     stream = True if kwargs.get("stream") else False
 | |
|     api_key = kwargs.pop("api_key", None)
 | |
|     headers = (
 | |
|         {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
 | |
|         if api_key
 | |
|         else {"Content-Type": "application/json"}
 | |
|     )
 | |
| 
 | |
|     # Extract lollms specific parameters
 | |
|     request_data = {
 | |
|         "prompt": prompt,
 | |
|         "model_name": model,
 | |
|         "personality": kwargs.get("personality", -1),
 | |
|         "n_predict": kwargs.get("n_predict", None),
 | |
|         "stream": stream,
 | |
|         "temperature": kwargs.get("temperature", 0.1),
 | |
|         "top_k": kwargs.get("top_k", 50),
 | |
|         "top_p": kwargs.get("top_p", 0.95),
 | |
|         "repeat_penalty": kwargs.get("repeat_penalty", 0.8),
 | |
|         "repeat_last_n": kwargs.get("repeat_last_n", 40),
 | |
|         "seed": kwargs.get("seed", None),
 | |
|         "n_threads": kwargs.get("n_threads", 8),
 | |
|     }
 | |
| 
 | |
|     # Prepare the full prompt including history
 | |
|     full_prompt = ""
 | |
|     if system_prompt:
 | |
|         full_prompt += f"{system_prompt}\n"
 | |
|     for msg in history_messages:
 | |
|         full_prompt += f"{msg['role']}: {msg['content']}\n"
 | |
|     full_prompt += prompt
 | |
| 
 | |
|     request_data["prompt"] = full_prompt
 | |
|     timeout = aiohttp.ClientTimeout(total=kwargs.get("timeout", None))
 | |
| 
 | |
|     async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
 | |
|         if stream:
 | |
| 
 | |
|             async def inner():
 | |
|                 async with session.post(
 | |
|                     f"{base_url}/lollms_generate", json=request_data
 | |
|                 ) as response:
 | |
|                     async for line in response.content:
 | |
|                         yield line.decode().strip()
 | |
| 
 | |
|             return inner()
 | |
|         else:
 | |
|             async with session.post(
 | |
|                 f"{base_url}/lollms_generate", json=request_data
 | |
|             ) as response:
 | |
|                 return await response.text()
 | |
| 
 | |
| 
 | |
| async def lollms_model_complete(
 | |
|     prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
 | |
| ) -> Union[str, AsyncIterator[str]]:
 | |
|     """Complete function for lollms model generation."""
 | |
| 
 | |
|     # Extract and remove keyword_extraction from kwargs if present
 | |
|     keyword_extraction = kwargs.pop("keyword_extraction", None)
 | |
| 
 | |
|     # Get model name from config
 | |
|     model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
 | |
| 
 | |
|     # If keyword extraction is needed, we might need to modify the prompt
 | |
|     # or add specific parameters for JSON output (if lollms supports it)
 | |
|     if keyword_extraction:
 | |
|         # Note: You might need to adjust this based on how lollms handles structured output
 | |
|         pass
 | |
| 
 | |
|     return await lollms_model_if_cache(
 | |
|         model_name,
 | |
|         prompt,
 | |
|         system_prompt=system_prompt,
 | |
|         history_messages=history_messages,
 | |
|         **kwargs,
 | |
|     )
 | |
| 
 | |
| 
 | |
| async def lollms_embed(
 | |
|     texts: List[str], embed_model=None, base_url="http://localhost:9600", **kwargs
 | |
| ) -> np.ndarray:
 | |
|     """
 | |
|     Generate embeddings for a list of texts using lollms server.
 | |
| 
 | |
|     Args:
 | |
|         texts: List of strings to embed
 | |
|         embed_model: Model name (not used directly as lollms uses configured vectorizer)
 | |
|         base_url: URL of the lollms server
 | |
|         **kwargs: Additional arguments passed to the request
 | |
| 
 | |
|     Returns:
 | |
|         np.ndarray: Array of embeddings
 | |
|     """
 | |
|     api_key = kwargs.pop("api_key", None)
 | |
|     headers = (
 | |
|         {"Content-Type": "application/json", "Authorization": api_key}
 | |
|         if api_key
 | |
|         else {"Content-Type": "application/json"}
 | |
|     )
 | |
|     async with aiohttp.ClientSession(headers=headers) as session:
 | |
|         embeddings = []
 | |
|         for text in texts:
 | |
|             request_data = {"text": text}
 | |
| 
 | |
|             async with session.post(
 | |
|                 f"{base_url}/lollms_embed",
 | |
|                 json=request_data,
 | |
|             ) as response:
 | |
|                 result = await response.json()
 | |
|                 embeddings.append(result["vector"])
 | |
| 
 | |
|         return np.array(embeddings)
 | 
