mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-11-22 13:06:10 +00:00
Fix line length Create binding_options.py Remove test property Add dynamic binding options to CLI and environment config Automatically generate command-line arguments and environment variable support for all LLM provider bindings using BindingOptions. Add sample .env generation and extensible framework for new providers. Add example option definitions and fix test arg check in OllamaOptions Add options_dict method to BindingOptions for argument parsing Add comprehensive Ollama binding configuration options ruff formatting Apply ruff formatting to binding_options.py Add Ollama separate options for embedding and LLM Refactor Ollama binding options and fix class var handling The changes improve how class variables are handled in binding options and better organize the Ollama-specific options into LLM and embedding subclasses. Fix typo in arg test. Rename cls parameter to klass to avoid keyword shadowing Fix Ollama embedding binding name typo Fix ollama embedder context param name Split Ollama options into LLM and embedding configs with mixin base Add Ollama option configuration to LLM and embeddings in lightrag_server Update sample .env generation and environment handling Conditionally add env vars and cmdline options only when ollama bindings are used. Add example env file for Ollama binding options.
421 lines
14 KiB
Python
421 lines
14 KiB
Python
"""
|
|
Configs for the LightRAG API.
|
|
"""
|
|
|
|
import os
|
|
import argparse
|
|
import logging
|
|
from dotenv import load_dotenv
|
|
from lightrag.utils import get_env_value
|
|
from lightrag.llm.binding_options import OllamaEmbeddingOptions, OllamaLLMOptions
|
|
import sys
|
|
|
|
from lightrag.constants import (
|
|
DEFAULT_WOKERS,
|
|
DEFAULT_TIMEOUT,
|
|
DEFAULT_TOP_K,
|
|
DEFAULT_CHUNK_TOP_K,
|
|
DEFAULT_HISTORY_TURNS,
|
|
DEFAULT_MAX_ENTITY_TOKENS,
|
|
DEFAULT_MAX_RELATION_TOKENS,
|
|
DEFAULT_MAX_TOTAL_TOKENS,
|
|
DEFAULT_COSINE_THRESHOLD,
|
|
DEFAULT_RELATED_CHUNK_NUMBER,
|
|
DEFAULT_MIN_RERANK_SCORE,
|
|
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
|
|
DEFAULT_MAX_ASYNC,
|
|
DEFAULT_SUMMARY_MAX_TOKENS,
|
|
DEFAULT_SUMMARY_LANGUAGE,
|
|
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
|
|
DEFAULT_EMBEDDING_BATCH_NUM,
|
|
DEFAULT_OLLAMA_MODEL_NAME,
|
|
DEFAULT_OLLAMA_MODEL_TAG,
|
|
DEFAULT_OLLAMA_MODEL_SIZE,
|
|
DEFAULT_OLLAMA_CREATED_AT,
|
|
DEFAULT_OLLAMA_DIGEST,
|
|
)
|
|
|
|
# use the .env that is inside the current folder
|
|
# allows to use different .env file for each lightrag instance
|
|
# the OS environment variables take precedence over the .env file
|
|
load_dotenv(dotenv_path=".env", override=False)
|
|
|
|
|
|
class OllamaServerInfos:
|
|
def __init__(self, name=None, tag=None):
|
|
self._lightrag_name = name or os.getenv(
|
|
"OLLAMA_EMULATING_MODEL_NAME", DEFAULT_OLLAMA_MODEL_NAME
|
|
)
|
|
self._lightrag_tag = tag or os.getenv(
|
|
"OLLAMA_EMULATING_MODEL_TAG", DEFAULT_OLLAMA_MODEL_TAG
|
|
)
|
|
self.LIGHTRAG_SIZE = DEFAULT_OLLAMA_MODEL_SIZE
|
|
self.LIGHTRAG_CREATED_AT = DEFAULT_OLLAMA_CREATED_AT
|
|
self.LIGHTRAG_DIGEST = DEFAULT_OLLAMA_DIGEST
|
|
|
|
@property
|
|
def LIGHTRAG_NAME(self):
|
|
return self._lightrag_name
|
|
|
|
@LIGHTRAG_NAME.setter
|
|
def LIGHTRAG_NAME(self, value):
|
|
self._lightrag_name = value
|
|
|
|
@property
|
|
def LIGHTRAG_TAG(self):
|
|
return self._lightrag_tag
|
|
|
|
@LIGHTRAG_TAG.setter
|
|
def LIGHTRAG_TAG(self, value):
|
|
self._lightrag_tag = value
|
|
|
|
@property
|
|
def LIGHTRAG_MODEL(self):
|
|
return f"{self._lightrag_name}:{self._lightrag_tag}"
|
|
|
|
|
|
ollama_server_infos = OllamaServerInfos()
|
|
|
|
|
|
class DefaultRAGStorageConfig:
|
|
KV_STORAGE = "JsonKVStorage"
|
|
VECTOR_STORAGE = "NanoVectorDBStorage"
|
|
GRAPH_STORAGE = "NetworkXStorage"
|
|
DOC_STATUS_STORAGE = "JsonDocStatusStorage"
|
|
|
|
|
|
def get_default_host(binding_type: str) -> str:
|
|
default_hosts = {
|
|
"ollama": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"),
|
|
"lollms": os.getenv("LLM_BINDING_HOST", "http://localhost:9600"),
|
|
"azure_openai": os.getenv("AZURE_OPENAI_ENDPOINT", "https://api.openai.com/v1"),
|
|
"openai": os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1"),
|
|
}
|
|
return default_hosts.get(
|
|
binding_type, os.getenv("LLM_BINDING_HOST", "http://localhost:11434")
|
|
) # fallback to ollama if unknown
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
"""
|
|
Parse command line arguments with environment variable fallback
|
|
|
|
Args:
|
|
is_uvicorn_mode: Whether running under uvicorn mode
|
|
|
|
Returns:
|
|
argparse.Namespace: Parsed arguments
|
|
"""
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="LightRAG FastAPI Server with separate working and input directories"
|
|
)
|
|
|
|
# Server configuration
|
|
parser.add_argument(
|
|
"--host",
|
|
default=get_env_value("HOST", "0.0.0.0"),
|
|
help="Server host (default: from env or 0.0.0.0)",
|
|
)
|
|
parser.add_argument(
|
|
"--port",
|
|
type=int,
|
|
default=get_env_value("PORT", 9621, int),
|
|
help="Server port (default: from env or 9621)",
|
|
)
|
|
|
|
# Directory configuration
|
|
parser.add_argument(
|
|
"--working-dir",
|
|
default=get_env_value("WORKING_DIR", "./rag_storage"),
|
|
help="Working directory for RAG storage (default: from env or ./rag_storage)",
|
|
)
|
|
parser.add_argument(
|
|
"--input-dir",
|
|
default=get_env_value("INPUT_DIR", "./inputs"),
|
|
help="Directory containing input documents (default: from env or ./inputs)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--timeout",
|
|
default=get_env_value("TIMEOUT", DEFAULT_TIMEOUT,
|
|
int, special_none=True),
|
|
type=int,
|
|
help="Timeout in seconds (useful when using slow AI). Use None for infinite timeout",
|
|
)
|
|
|
|
# RAG configuration
|
|
parser.add_argument(
|
|
"--max-async",
|
|
type=int,
|
|
default=get_env_value("MAX_ASYNC", DEFAULT_MAX_ASYNC, int),
|
|
help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
|
|
)
|
|
parser.add_argument(
|
|
"--max-tokens",
|
|
type=int,
|
|
default=get_env_value("MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
|
|
help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
|
|
)
|
|
|
|
# Logging configuration
|
|
parser.add_argument(
|
|
"--log-level",
|
|
default=get_env_value("LOG_LEVEL", "INFO"),
|
|
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
help="Logging level (default: from env or INFO)",
|
|
)
|
|
parser.add_argument(
|
|
"--verbose",
|
|
action="store_true",
|
|
default=get_env_value("VERBOSE", False, bool),
|
|
help="Enable verbose debug output(only valid for DEBUG log-level)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--key",
|
|
type=str,
|
|
default=get_env_value("LIGHTRAG_API_KEY", None),
|
|
help="API key for authentication. This protects lightrag server against unauthorized access",
|
|
)
|
|
|
|
# Optional https parameters
|
|
parser.add_argument(
|
|
"--ssl",
|
|
action="store_true",
|
|
default=get_env_value("SSL", False, bool),
|
|
help="Enable HTTPS (default: from env or False)",
|
|
)
|
|
parser.add_argument(
|
|
"--ssl-certfile",
|
|
default=get_env_value("SSL_CERTFILE", None),
|
|
help="Path to SSL certificate file (required if --ssl is enabled)",
|
|
)
|
|
parser.add_argument(
|
|
"--ssl-keyfile",
|
|
default=get_env_value("SSL_KEYFILE", None),
|
|
help="Path to SSL private key file (required if --ssl is enabled)",
|
|
)
|
|
|
|
# Ollama model configuration
|
|
parser.add_argument(
|
|
"--simulated-model-name",
|
|
type=str,
|
|
default=get_env_value("OLLAMA_EMULATING_MODEL_NAME", DEFAULT_OLLAMA_MODEL_NAME),
|
|
help="Name for the simulated Ollama model (default: from env or lightrag)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--simulated-model-tag",
|
|
type=str,
|
|
default=get_env_value("OLLAMA_EMULATING_MODEL_TAG", DEFAULT_OLLAMA_MODEL_TAG),
|
|
help="Tag for the simulated Ollama model (default: from env or latest)",
|
|
)
|
|
|
|
# Namespace
|
|
parser.add_argument(
|
|
"--workspace",
|
|
type=str,
|
|
default=get_env_value("WORKSPACE", ""),
|
|
help="Default workspace for all storage",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--auto-scan-at-startup",
|
|
action="store_true",
|
|
default=False,
|
|
help="Enable automatic scanning when the program starts",
|
|
)
|
|
|
|
# Server workers configuration
|
|
parser.add_argument(
|
|
"--workers",
|
|
type=int,
|
|
default=get_env_value("WORKERS", DEFAULT_WOKERS, int),
|
|
help="Number of worker processes (default: from env or 1)",
|
|
)
|
|
|
|
# LLM and embedding bindings
|
|
parser.add_argument(
|
|
"--llm-binding",
|
|
type=str,
|
|
default=get_env_value("LLM_BINDING", "ollama"),
|
|
choices=["lollms", "ollama", "openai",
|
|
"openai-ollama", "azure_openai"],
|
|
help="LLM binding type (default: from env or ollama)",
|
|
)
|
|
parser.add_argument(
|
|
"--embedding-binding",
|
|
type=str,
|
|
default=get_env_value("EMBEDDING_BINDING", "ollama"),
|
|
choices=["lollms", "ollama", "openai", "azure_openai"],
|
|
help="Embedding binding type (default: from env or ollama)",
|
|
)
|
|
|
|
# Conditionally add binding options defined in binding_options module
|
|
# This will add command line arguments for all binding options (e.g., --ollama-embedding-num_ctx)
|
|
# and corresponding environment variables (e.g., OLLAMA_EMBEDDING_NUM_CTX)
|
|
if '--llm-binding' in sys.argv:
|
|
try:
|
|
idx = sys.argv.index('--llm-binding')
|
|
if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == 'ollama':
|
|
OllamaLLMOptions.add_args(parser)
|
|
except IndexError:
|
|
pass
|
|
elif os.environ.get('LLM_BINDING') == 'ollama':
|
|
OllamaLLMOptions.add_args(parser)
|
|
|
|
if '--embedding-binding' in sys.argv:
|
|
try:
|
|
idx = sys.argv.index('--embedding-binding')
|
|
if idx + 1 < len(sys.argv) and sys.argv[idx + 1] == 'ollama':
|
|
OllamaEmbeddingOptions.add_args(parser)
|
|
except IndexError:
|
|
pass
|
|
elif os.environ.get('EMBEDDING_BINDING') == 'ollama':
|
|
OllamaEmbeddingOptions.add_args(parser)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# convert relative path to absolute path
|
|
args.working_dir = os.path.abspath(args.working_dir)
|
|
args.input_dir = os.path.abspath(args.input_dir)
|
|
|
|
# Inject storage configuration from environment variables
|
|
args.kv_storage = get_env_value(
|
|
"LIGHTRAG_KV_STORAGE", DefaultRAGStorageConfig.KV_STORAGE
|
|
)
|
|
args.doc_status_storage = get_env_value(
|
|
"LIGHTRAG_DOC_STATUS_STORAGE", DefaultRAGStorageConfig.DOC_STATUS_STORAGE
|
|
)
|
|
args.graph_storage = get_env_value(
|
|
"LIGHTRAG_GRAPH_STORAGE", DefaultRAGStorageConfig.GRAPH_STORAGE
|
|
)
|
|
args.vector_storage = get_env_value(
|
|
"LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
|
|
)
|
|
|
|
# Get MAX_PARALLEL_INSERT from environment
|
|
args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
|
|
|
|
# Get MAX_GRAPH_NODES from environment
|
|
args.max_graph_nodes = get_env_value("MAX_GRAPH_NODES", 1000, int)
|
|
|
|
# Handle openai-ollama special case
|
|
if args.llm_binding == "openai-ollama":
|
|
args.llm_binding = "openai"
|
|
args.embedding_binding = "ollama"
|
|
|
|
# Ollama ctx_num
|
|
args.ollama_num_ctx = get_env_value("OLLAMA_NUM_CTX", 32768, int)
|
|
|
|
args.llm_binding_host = get_env_value(
|
|
"LLM_BINDING_HOST", get_default_host(args.llm_binding)
|
|
)
|
|
args.embedding_binding_host = get_env_value(
|
|
"EMBEDDING_BINDING_HOST", get_default_host(args.embedding_binding)
|
|
)
|
|
args.llm_binding_api_key = get_env_value("LLM_BINDING_API_KEY", None)
|
|
args.embedding_binding_api_key = get_env_value(
|
|
"EMBEDDING_BINDING_API_KEY", "")
|
|
|
|
# Inject model configuration
|
|
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
|
|
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
|
|
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
|
|
args.max_embed_tokens = get_env_value("MAX_EMBED_TOKENS", 8192, int)
|
|
|
|
# Inject chunk configuration
|
|
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
|
args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
|
|
|
|
# Inject LLM cache configuration
|
|
args.enable_llm_cache_for_extract = get_env_value(
|
|
"ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
|
|
)
|
|
args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
|
|
|
|
# Inject LLM temperature configuration
|
|
args.temperature = get_env_value("TEMPERATURE", 0.5, float)
|
|
|
|
# Select Document loading tool (DOCLING, DEFAULT)
|
|
args.document_loading_engine = get_env_value(
|
|
"DOCUMENT_LOADING_ENGINE", "DEFAULT")
|
|
|
|
# Add environment variables that were previously read directly
|
|
args.cors_origins = get_env_value("CORS_ORIGINS", "*")
|
|
args.summary_language = get_env_value("SUMMARY_LANGUAGE", DEFAULT_SUMMARY_LANGUAGE)
|
|
args.whitelist_paths = get_env_value("WHITELIST_PATHS", "/health,/api/*")
|
|
|
|
# For JWT Auth
|
|
args.auth_accounts = get_env_value("AUTH_ACCOUNTS", "")
|
|
args.token_secret = get_env_value(
|
|
"TOKEN_SECRET", "lightrag-jwt-default-secret")
|
|
args.token_expire_hours = get_env_value("TOKEN_EXPIRE_HOURS", 48, int)
|
|
args.guest_token_expire_hours = get_env_value(
|
|
"GUEST_TOKEN_EXPIRE_HOURS", 24, int)
|
|
args.jwt_algorithm = get_env_value("JWT_ALGORITHM", "HS256")
|
|
|
|
# Rerank model configuration
|
|
args.rerank_model = get_env_value(
|
|
"RERANK_MODEL", "BAAI/bge-reranker-v2-m3")
|
|
args.rerank_binding_host = get_env_value("RERANK_BINDING_HOST", None)
|
|
args.rerank_binding_api_key = get_env_value("RERANK_BINDING_API_KEY", None)
|
|
|
|
# Min rerank score configuration
|
|
args.min_rerank_score = get_env_value(
|
|
"MIN_RERANK_SCORE", DEFAULT_MIN_RERANK_SCORE, float
|
|
)
|
|
|
|
# Query configuration
|
|
args.history_turns = get_env_value(
|
|
"HISTORY_TURNS", DEFAULT_HISTORY_TURNS, int)
|
|
args.top_k = get_env_value("TOP_K", DEFAULT_TOP_K, int)
|
|
args.chunk_top_k = get_env_value("CHUNK_TOP_K", DEFAULT_CHUNK_TOP_K, int)
|
|
args.max_entity_tokens = get_env_value(
|
|
"MAX_ENTITY_TOKENS", DEFAULT_MAX_ENTITY_TOKENS, int
|
|
)
|
|
args.max_relation_tokens = get_env_value(
|
|
"MAX_RELATION_TOKENS", DEFAULT_MAX_RELATION_TOKENS, int
|
|
)
|
|
args.max_total_tokens = get_env_value(
|
|
"MAX_TOTAL_TOKENS", DEFAULT_MAX_TOTAL_TOKENS, int
|
|
)
|
|
args.cosine_threshold = get_env_value(
|
|
"COSINE_THRESHOLD", DEFAULT_COSINE_THRESHOLD, float
|
|
)
|
|
args.related_chunk_number = get_env_value(
|
|
"RELATED_CHUNK_NUMBER", DEFAULT_RELATED_CHUNK_NUMBER, int
|
|
)
|
|
|
|
# Add missing environment variables for health endpoint
|
|
args.force_llm_summary_on_merge = get_env_value(
|
|
"FORCE_LLM_SUMMARY_ON_MERGE", DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int
|
|
)
|
|
args.embedding_func_max_async = get_env_value(
|
|
"EMBEDDING_FUNC_MAX_ASYNC", DEFAULT_EMBEDDING_FUNC_MAX_ASYNC, int
|
|
)
|
|
args.embedding_batch_num = get_env_value(
|
|
"EMBEDDING_BATCH_NUM", DEFAULT_EMBEDDING_BATCH_NUM, int
|
|
)
|
|
|
|
ollama_server_infos.LIGHTRAG_NAME = args.simulated_model_name
|
|
ollama_server_infos.LIGHTRAG_TAG = args.simulated_model_tag
|
|
|
|
return args
|
|
|
|
|
|
def update_uvicorn_mode_config():
|
|
# If in uvicorn mode and workers > 1, force it to 1 and log warning
|
|
if global_args.workers > 1:
|
|
original_workers = global_args.workers
|
|
global_args.workers = 1
|
|
# Log warning directly here
|
|
logging.warning(
|
|
f"In uvicorn mode, workers parameter was set to {
|
|
original_workers}. Forcing workers=1"
|
|
)
|
|
|
|
|
|
global_args = parse_args()
|