### This is sample file of .env ########################### ### Server Configuration ########################### HOST=0.0.0.0 PORT=9621 WEBUI_TITLE='My Graph KB' WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System" # WORKERS=2 ### gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set) # TIMEOUT=150 # CORS_ORIGINS=http://localhost:3000,http://localhost:8080 ### Optional SSL Configuration # SSL=true # SSL_CERTFILE=/path/to/cert.pem # SSL_KEYFILE=/path/to/key.pem ### Directory Configuration (defaults to current working directory) ### Default value is ./inputs and ./rag_storage # INPUT_DIR= # WORKING_DIR= ### Tiktoken cache directory (Store cached files in this folder for offline deployment) # TIKTOKEN_CACHE_DIR=/app/data/tiktoken ### Ollama Emulating Model and Tag # OLLAMA_EMULATING_MODEL_NAME=lightrag OLLAMA_EMULATING_MODEL_TAG=latest ### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value) # MAX_GRAPH_NODES=1000 ### Logging level # LOG_LEVEL=INFO # VERBOSE=False # LOG_MAX_BYTES=10485760 # LOG_BACKUP_COUNT=5 ### Logfile location (defaults to current working directory) # LOG_DIR=/path/to/log/directory ##################################### ### Login and API-Key Configuration ##################################### # AUTH_ACCOUNTS='admin:admin123,user1:pass456' # TOKEN_SECRET=Your-Key-For-LightRAG-API-Server # TOKEN_EXPIRE_HOURS=48 # GUEST_TOKEN_EXPIRE_HOURS=24 # JWT_ALGORITHM=HS256 ### Token Auto-Renewal Configuration (Sliding Window Expiration) ### Enable automatic token renewal to prevent active users from being logged out ### When enabled, tokens will be automatically renewed when remaining time < threshold # TOKEN_AUTO_RENEW=true ### Token renewal threshold (0.0 - 1.0) ### Renew token when remaining time < (total time * threshold) ### Default: 0.5 (renew when 50% time remaining) ### Examples: ### 0.5 = renew when 24h token has 12h left ### 0.25 = renew when 24h token has 6h left # TOKEN_RENEW_THRESHOLD=0.5 ### Note: Token renewal is automatically skipped for certain endpoints: ### - /health: Health check endpoint (no authentication required) ### - /documents/paginated: Frequently polled by client (5-30s interval) ### - /documents/pipeline_status: Very frequently polled by client (2s interval) ### - Rate limit: Minimum 60 seconds between renewals for same user ### API-Key to access LightRAG Server API ### Use this key in HTTP requests with the 'X-API-Key' header ### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query # LIGHTRAG_API_KEY=your-secure-api-key-here # WHITELIST_PATHS=/health,/api/* ###################################################################################### ### Query Configuration ### ### How to control the context length sent to LLM: ### MAX_ENTITY_TOKENS + MAX_RELATION_TOKENS < MAX_TOTAL_TOKENS ### Chunk_Tokens = MAX_TOTAL_TOKENS - Actual_Entity_Tokens - Actual_Relation_Tokens ###################################################################################### # LLM response cache for query (Not valid for streaming response) ENABLE_LLM_CACHE=true # COSINE_THRESHOLD=0.2 ### Number of entities or relations retrieved from KG # TOP_K=40 ### Maximum number or chunks for naive vector search # CHUNK_TOP_K=20 ### control the actual entities send to LLM # MAX_ENTITY_TOKENS=6000 ### control the actual relations send to LLM # MAX_RELATION_TOKENS=8000 ### control the maximum tokens send to LLM (include entities, relations and chunks) # MAX_TOTAL_TOKENS=30000 ### chunk selection strategies ### VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval ### WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM ### If reranking is enabled, the impact of chunk selection strategies will be diminished. # KG_CHUNK_PICK_METHOD=VECTOR ######################################################### ### Reranking configuration ### RERANK_BINDING type: null, cohere, jina, aliyun ### For rerank model deployed by vLLM use cohere binding ######################################################### RERANK_BINDING=null ### Enable rerank by default in query params when RERANK_BINDING is not null # RERANK_BY_DEFAULT=True ### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enough) # MIN_RERANK_SCORE=0.0 ### For local deployment with vLLM # RERANK_MODEL=BAAI/bge-reranker-v2-m3 # RERANK_BINDING_HOST=http://localhost:8000/v1/rerank # RERANK_BINDING_API_KEY=your_rerank_api_key_here ### Default value for Cohere AI # RERANK_MODEL=rerank-v3.5 # RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank # RERANK_BINDING_API_KEY=your_rerank_api_key_here ### Cohere rerank chunking configuration (useful for models with token limits like ColBERT) # RERANK_ENABLE_CHUNKING=true # RERANK_MAX_TOKENS_PER_DOC=480 ### Default value for Jina AI # RERANK_MODEL=jina-reranker-v2-base-multilingual # RERANK_BINDING_HOST=https://api.jina.ai/v1/rerank # RERANK_BINDING_API_KEY=your_rerank_api_key_here ### Default value for Aliyun # RERANK_MODEL=gte-rerank-v2 # RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank # RERANK_BINDING_API_KEY=your_rerank_api_key_here ######################################## ### Document processing configuration ######################################## ENABLE_LLM_CACHE_FOR_EXTRACT=true ### Document processing output language: English, Chinese, French, German ... SUMMARY_LANGUAGE=English ### PDF decryption password for protected PDF files # PDF_DECRYPT_PASSWORD=your_pdf_password_here ### Entity types that the LLM will attempt to recognize # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]' ### Chunk size for document splitting, 500~1500 is recommended # CHUNK_SIZE=1200 # CHUNK_OVERLAP_SIZE=100 ### Number of summary segments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommended) # FORCE_LLM_SUMMARY_ON_MERGE=8 ### Max description token size to trigger LLM summary # SUMMARY_MAX_TOKENS = 1200 ### Recommended LLM summary output length in tokens # SUMMARY_LENGTH_RECOMMENDED_=600 ### Maximum context size sent to LLM for description summary # SUMMARY_CONTEXT_SIZE=12000 ### control the maximum chunk_ids stored in vector and graph db # MAX_SOURCE_IDS_PER_ENTITY=300 # MAX_SOURCE_IDS_PER_RELATION=300 ### control chunk_ids limitation method: FIFO, KEEP ### FIFO: First in first out ### KEEP: Keep oldest (less merge action and faster) # SOURCE_IDS_LIMIT_METHOD=FIFO # Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance) # MAX_FILE_PATHS=100 ### maximum number of related chunks per source entity or relation ### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph) ### Higher values increase re-ranking time # RELATED_CHUNK_NUMBER=5 ############################### ### Concurrency Configuration ############################### ### Max concurrency requests of LLM (for both query and document processing) MAX_ASYNC=4 ### Number of parallel processing documents(between 2~10, MAX_ASYNC/3 is recommended) MAX_PARALLEL_INSERT=2 ### Max concurrency requests for Embedding # EMBEDDING_FUNC_MAX_ASYNC=8 ### Num of chunks send to Embedding in single request # EMBEDDING_BATCH_NUM=10 ########################################################################### ### LLM Configuration ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini ### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service ### If LightRAG deployed in Docker: ### uses host.docker.internal instead of localhost in LLM_BINDING_HOST ########################################################################### ### LLM request timeout setting for all llm (0 means no timeout for Ollma) # LLM_TIMEOUT=180 LLM_BINDING=openai LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key ### Azure OpenAI example ### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT instead # AZURE_OPENAI_API_VERSION=2024-08-01-preview # LLM_BINDING=azure_openai # LLM_BINDING_HOST=https://xxxx.openai.azure.com/ # LLM_BINDING_API_KEY=your_api_key # LLM_MODEL=my-gpt-mini-deployment ### Openrouter example # LLM_MODEL=google/gemini-2.5-flash # LLM_BINDING_HOST=https://openrouter.ai/api/v1 # LLM_BINDING_API_KEY=your_api_key # LLM_BINDING=openai ### Google Gemini example (AI Studio) # LLM_BINDING=gemini # LLM_MODEL=gemini-flash-latest # LLM_BINDING_API_KEY=your_gemini_api_key # LLM_BINDING_HOST=https://generativelanguage.googleapis.com ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter ### lightrag-server --llm-binding gemini --help ### Gemini Specific Parameters # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000 # GEMINI_LLM_TEMPERATURE=0.7 ### Enable Thinking # GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}' ### Disable Thinking # GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}' ### Google Vertex AI example ### Vertex AI use GOOGLE_APPLICATION_CREDENTIALS instead of API-KEY for authentication ### LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT means select endpoit based on project and location automatically # LLM_BINDING=gemini # LLM_BINDING_HOST=https://aiplatform.googleapis.com ### or use DEFAULT_GEMINI_ENDPOINT to select endpoint based on project and location automatically # LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT # LLM_MODEL=gemini-2.5-flash # GOOGLE_GENAI_USE_VERTEXAI=true # GOOGLE_CLOUD_PROJECT='your-project-id' # GOOGLE_CLOUD_LOCATION='us-central1' # GOOGLE_APPLICATION_CREDENTIALS='/Users/xxxxx/your-service-account-credentials-file.json' ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter ### lightrag-server --llm-binding openai --help ### OpenAI Specific Parameters # OPENAI_LLM_REASONING_EFFORT=minimal ### OpenRouter Specific Parameters # OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}' ### Qwen3 Specific Parameters deploy by vLLM # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}' ### OpenAI Compatible API Specific Parameters ### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B. # OPENAI_LLM_TEMPERATURE=0.9 ### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) ### Typically, max_tokens does not include prompt content ### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider # OPENAI_LLM_MAX_TOKENS=9000 ### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 ### use the following command to see all support options for Ollama LLM ### lightrag-server --llm-binding ollama --help ### Ollama Server Specific Parameters ### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000 OLLAMA_LLM_NUM_CTX=32768 ### Set the max_output_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) # OLLAMA_LLM_NUM_PREDICT=9000 ### Stop sequences for Ollama LLM # OLLAMA_LLM_STOP='["", "<|EOT|>"]' ### Bedrock Specific Parameters # BEDROCK_LLM_TEMPERATURE=1.0 ####################################################################################### ### Embedding Configuration (Should not be changed after the first file processed) ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock ### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service ### If LightRAG deployed in Docker: ### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST ####################################################################################### # EMBEDDING_TIMEOUT=30 ### Control whether to send embedding_dim parameter to embedding API ### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina ### For OpenAI: Set to 'true' to enable dynamic dimension adjustment ### For OpenAI: Set to 'false' (default) to disable sending dimension parameter ### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama) # Ollama embedding # EMBEDDING_BINDING=ollama # EMBEDDING_MODEL=bge-m3:latest # EMBEDDING_DIM=1024 # EMBEDDING_BINDING_API_KEY=your_api_key ### If LightRAG deployed in Docker uses host.docker.internal instead of localhost # EMBEDDING_BINDING_HOST=http://localhost:11434 ### OpenAI compatible embedding EMBEDDING_BINDING=openai EMBEDDING_MODEL=text-embedding-3-large EMBEDDING_DIM=3072 EMBEDDING_SEND_DIM=false EMBEDDING_TOKEN_LIMIT=8192 EMBEDDING_BINDING_HOST=https://api.openai.com/v1 EMBEDDING_BINDING_API_KEY=your_api_key ### Optional for Azure embedding ### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead # AZURE_EMBEDDING_API_VERSION=2024-08-01-preview # EMBEDDING_BINDING=azure_openai # EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/ # EMBEDDING_API_KEY=your_api_key # EMBEDDING_MODEL==my-text-embedding-3-large-deployment # EMBEDDING_DIM=3072 ### Gemini embedding # EMBEDDING_BINDING=gemini # EMBEDDING_MODEL=gemini-embedding-001 # EMBEDDING_DIM=1536 # EMBEDDING_TOKEN_LIMIT=2048 # EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com # EMBEDDING_BINDING_API_KEY=your_api_key ### Gemini embedding requires sending dimension to server # EMBEDDING_SEND_DIM=true ### Jina AI Embedding # EMBEDDING_BINDING=jina # EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings # EMBEDDING_MODEL=jina-embeddings-v4 # EMBEDDING_DIM=2048 # EMBEDDING_BINDING_API_KEY=your_api_key ### Optional for Ollama embedding OLLAMA_EMBEDDING_NUM_CTX=8192 ### use the following command to see all support options for Ollama embedding ### lightrag-server --embedding-binding ollama --help #################################################################### ### WORKSPACE sets workspace name for all storage types ### for the purpose of isolating data from LightRAG instances. ### Valid workspace name constraints: a-z, A-Z, 0-9, and _ #################################################################### # WORKSPACE=space1 ############################ ### Data storage selection ############################ ### Default storage (Recommended for small scale deployment) # LIGHTRAG_KV_STORAGE=JsonKVStorage # LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage # LIGHTRAG_GRAPH_STORAGE=NetworkXStorage # LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage ### Redis Storage (Recommended for production deployment) # LIGHTRAG_KV_STORAGE=RedisKVStorage # LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage ### Vector Storage (Recommended for production deployment) # LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage # LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage ### Graph Storage (Recommended for production deployment) # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage # LIGHTRAG_GRAPH_STORAGE=MemgraphStorage ### PostgreSQL # LIGHTRAG_KV_STORAGE=PGKVStorage # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage ### MongoDB (Vector storage only available on Atlas Cloud) # LIGHTRAG_KV_STORAGE=MongoKVStorage # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage ### PostgreSQL Configuration POSTGRES_HOST=localhost POSTGRES_PORT=5432 POSTGRES_USER=your_username POSTGRES_PASSWORD='your_password' POSTGRES_DATABASE=your_database POSTGRES_MAX_CONNECTIONS=12 ### DB specific workspace should not be set, keep for compatible only ### POSTGRES_WORKSPACE=forced_workspace_name ### PostgreSQL Vector Storage Configuration ### Vector storage type: HNSW, IVFFlat, VCHORDRQ POSTGRES_VECTOR_INDEX_TYPE=HNSW POSTGRES_HNSW_M=16 POSTGRES_HNSW_EF=200 POSTGRES_IVFFLAT_LISTS=100 POSTGRES_VCHORDRQ_BUILD_OPTIONS= POSTGRES_VCHORDRQ_PROBES= POSTGRES_VCHORDRQ_EPSILON=1.9 ### PostgreSQL Connection Retry Configuration (Network Robustness) ### Number of retry attempts (1-10, default: 3) ### Initial retry backoff in seconds (0.1-5.0, default: 0.5) ### Maximum retry backoff in seconds (backoff-60.0, default: 5.0) ### Connection pool close timeout in seconds (1.0-30.0, default: 5.0) # POSTGRES_CONNECTION_RETRIES=3 # POSTGRES_CONNECTION_RETRY_BACKOFF=0.5 # POSTGRES_CONNECTION_RETRY_BACKOFF_MAX=5.0 # POSTGRES_POOL_CLOSE_TIMEOUT=5.0 ### PostgreSQL SSL Configuration (Optional) # POSTGRES_SSL_MODE=require # POSTGRES_SSL_CERT=/path/to/client-cert.pem # POSTGRES_SSL_KEY=/path/to/client-key.pem # POSTGRES_SSL_ROOT_CERT=/path/to/ca-cert.pem # POSTGRES_SSL_CRL=/path/to/crl.pem ### PostgreSQL Server Settings (for Supabase Supavisor) # Use this to pass extra options to the PostgreSQL connection string. # For Supabase, you might need to set it like this: # POSTGRES_SERVER_SETTINGS="options=reference%3D[project-ref]" # Default is 100 set to 0 to disable # POSTGRES_STATEMENT_CACHE_SIZE=100 ### Neo4j Configuration NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io NEO4J_USERNAME=neo4j NEO4J_PASSWORD='your_password' NEO4J_DATABASE=neo4j NEO4J_MAX_CONNECTION_POOL_SIZE=100 NEO4J_CONNECTION_TIMEOUT=30 NEO4J_CONNECTION_ACQUISITION_TIMEOUT=30 NEO4J_MAX_TRANSACTION_RETRY_TIME=30 NEO4J_MAX_CONNECTION_LIFETIME=300 NEO4J_LIVENESS_CHECK_TIMEOUT=30 NEO4J_KEEP_ALIVE=true ### DB specific workspace should not be set, keep for compatible only ### NEO4J_WORKSPACE=forced_workspace_name ### MongoDB Configuration MONGO_URI=mongodb://root:root@localhost:27017/ #MONGO_URI=mongodb+srv://xxxx MONGO_DATABASE=LightRAG # MONGODB_WORKSPACE=forced_workspace_name ### Milvus Configuration MILVUS_URI=http://localhost:19530 MILVUS_DB_NAME=lightrag # MILVUS_USER=root # MILVUS_PASSWORD=your_password # MILVUS_TOKEN=your_token ### DB specific workspace should not be set, keep for compatible only ### MILVUS_WORKSPACE=forced_workspace_name ### Qdrant QDRANT_URL=http://localhost:6333 # QDRANT_API_KEY=your-api-key ### DB specific workspace should not be set, keep for compatible only ### QDRANT_WORKSPACE=forced_workspace_name ### Redis REDIS_URI=redis://localhost:6379 REDIS_SOCKET_TIMEOUT=30 REDIS_CONNECT_TIMEOUT=10 REDIS_MAX_CONNECTIONS=100 REDIS_RETRY_ATTEMPTS=3 ### DB specific workspace should not be set, keep for compatible only ### REDIS_WORKSPACE=forced_workspace_name ### Memgraph Configuration MEMGRAPH_URI=bolt://localhost:7687 MEMGRAPH_USERNAME= MEMGRAPH_PASSWORD= MEMGRAPH_DATABASE=memgraph ### DB specific workspace should not be set, keep for compatible only ### MEMGRAPH_WORKSPACE=forced_workspace_name ########################################################### ### Langfuse Observability Configuration ### Only works with LLM provided by OpenAI compatible API ### Install with: pip install lightrag-hku[observability] ### Sign up at: https://cloud.langfuse.com or self-host ########################################################### # LANGFUSE_SECRET_KEY="" # LANGFUSE_PUBLIC_KEY="" # LANGFUSE_HOST="https://cloud.langfuse.com" # 或您的自托管实例地址 # LANGFUSE_ENABLE_TRACE=true ############################ ### Evaluation Configuration ############################ ### RAGAS evaluation models (used for RAG quality assessment) ### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible ### Default uses OpenAI models for evaluation ### LLM Configuration for Evaluation # EVAL_LLM_MODEL=gpt-4o-mini ### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set) # EVAL_LLM_BINDING_API_KEY=your_api_key ### Custom OpenAI-compatible endpoint for LLM evaluation (optional) # EVAL_LLM_BINDING_HOST=https://api.openai.com/v1 ### Embedding Configuration for Evaluation # EVAL_EMBEDDING_MODEL=text-embedding-3-large ### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY) # EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key ### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST) # EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1 ### Performance Tuning ### Number of concurrent test case evaluations ### Lower values reduce API rate limit issues but increase evaluation time # EVAL_MAX_CONCURRENT=2 ### TOP_K query parameter of LightRAG (default: 10) ### Number of entities or relations retrieved from KG # EVAL_QUERY_TOP_K=10 ### LLM request retry and timeout settings for evaluation # EVAL_LLM_MAX_RETRIES=5 # EVAL_LLM_TIMEOUT=180