mirror of
https://github.com/HKUDS/LightRAG.git
synced 2025-12-27 14:52:34 +00:00
- Remove MAX_TOKEN_SUMMARY parameter and related configurations - Eliminate forced token-based truncation in entity/relationship descriptions - Switch to fragment-count based summarization logic using FORCE_LLM_SUMMARY_ON_MERGE - Update FORCE_LLM_SUMMARY_ON_MERGE default from 6 to 4 for better summarization - Clean up documentation, environment examples, and API display code - Preserve backward compatibility by graceful parameter removal This change resolves issues where LLMs were forcibly truncating entity relationship descriptions mid-sentence, leading to incomplete and potentially inaccurate knowledge graph content. The new approach allows LLMs to generate complete descriptions while still providing summarization when multiple fragments need to be merged. Breaking Change: None - parameter removal is backward compatible Fixes: Entity relationship description truncation issues
203 lines
6.6 KiB
Plaintext
203 lines
6.6 KiB
Plaintext
### This is sample file of .env
|
|
|
|
### Server Configuration
|
|
HOST=0.0.0.0
|
|
PORT=9621
|
|
WEBUI_TITLE='My Graph KB'
|
|
WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
|
|
OLLAMA_EMULATING_MODEL_TAG=latest
|
|
# WORKERS=2
|
|
# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
|
|
|
|
### Login Configuration
|
|
# AUTH_ACCOUNTS='admin:admin123,user1:pass456'
|
|
# TOKEN_SECRET=Your-Key-For-LightRAG-API-Server
|
|
# TOKEN_EXPIRE_HOURS=48
|
|
# GUEST_TOKEN_EXPIRE_HOURS=24
|
|
# JWT_ALGORITHM=HS256
|
|
|
|
### API-Key to access LightRAG Server API
|
|
# LIGHTRAG_API_KEY=your-secure-api-key-here
|
|
# WHITELIST_PATHS=/health,/api/*
|
|
|
|
### Optional SSL Configuration
|
|
# SSL=true
|
|
# SSL_CERTFILE=/path/to/cert.pem
|
|
# SSL_KEYFILE=/path/to/key.pem
|
|
|
|
### Directory Configuration (defaults to current working directory)
|
|
### Should not be set if deploy by docker (Set by Dockerfile instead of .env)
|
|
### Default value is ./inputs and ./rag_storage
|
|
# INPUT_DIR=<absolute_path_for_doc_input_dir>
|
|
# WORKING_DIR=<absolute_path_for_working_dir>
|
|
|
|
### Max nodes return from grap retrieval
|
|
# MAX_GRAPH_NODES=1000
|
|
|
|
### Logging level
|
|
# LOG_LEVEL=INFO
|
|
# VERBOSE=False
|
|
# LOG_MAX_BYTES=10485760
|
|
# LOG_BACKUP_COUNT=5
|
|
### Logfile location (defaults to current working directory)
|
|
# LOG_DIR=/path/to/log/directory
|
|
|
|
### RAG Configuration
|
|
### Chunk size for document splitting, 500~1500 is recommended
|
|
# CHUNK_SIZE=1200
|
|
# CHUNK_OVERLAP_SIZE=100
|
|
|
|
### RAG Query Configuration
|
|
# HISTORY_TURNS=3
|
|
# MAX_TOKEN_TEXT_CHUNK=6000
|
|
# MAX_TOKEN_RELATION_DESC=4000
|
|
# MAX_TOKEN_ENTITY_DESC=4000
|
|
# COSINE_THRESHOLD=0.2
|
|
### Number of entities or relations to retrieve from KG
|
|
# TOP_K=60
|
|
### Number of text chunks to retrieve initially from vector search
|
|
# CHUNK_TOP_K=5
|
|
|
|
### Rerank Configuration
|
|
# ENABLE_RERANK=False
|
|
### Number of text chunks to keep after reranking (should be <= CHUNK_TOP_K)
|
|
# CHUNK_RERANK_TOP_K=5
|
|
### Rerank model configuration (required when ENABLE_RERANK=True)
|
|
# RERANK_MODEL=BAAI/bge-reranker-v2-m3
|
|
# RERANK_BINDING_HOST=https://api.your-rerank-provider.com/v1/rerank
|
|
# RERANK_BINDING_API_KEY=your_rerank_api_key_here
|
|
|
|
### Entity and relation summarization configuration
|
|
### Language: English, Chinese, French, German ...
|
|
SUMMARY_LANGUAGE=English
|
|
### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
|
|
# FORCE_LLM_SUMMARY_ON_MERGE=6
|
|
### Maximum number of entity extraction attempts for ambiguous content
|
|
# MAX_GLEANING=1
|
|
|
|
### Number of parallel processing documents(Less than MAX_ASYNC/2 is recommended)
|
|
# MAX_PARALLEL_INSERT=2
|
|
|
|
### LLM Configuration
|
|
ENABLE_LLM_CACHE=true
|
|
ENABLE_LLM_CACHE_FOR_EXTRACT=true
|
|
### Time out in seconds for LLM, None for infinite timeout
|
|
TIMEOUT=240
|
|
### Some models like o1-mini require temperature to be set to 1
|
|
TEMPERATURE=0
|
|
### Max concurrency requests of LLM
|
|
MAX_ASYNC=4
|
|
### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
|
|
MAX_TOKENS=32000
|
|
### LLM Binding type: openai, ollama, lollms, azure_openai
|
|
LLM_BINDING=openai
|
|
LLM_MODEL=gpt-4o
|
|
LLM_BINDING_HOST=https://api.openai.com/v1
|
|
LLM_BINDING_API_KEY=your_api_key
|
|
### Optional for Azure
|
|
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
|
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
|
|
### set as num_ctx option for Ollama LLM
|
|
# OLLAMA_NUM_CTX=32768
|
|
|
|
### Embedding Configuration
|
|
### Embedding Binding type: openai, ollama, lollms, azure_openai
|
|
EMBEDDING_BINDING=ollama
|
|
EMBEDDING_MODEL=bge-m3:latest
|
|
EMBEDDING_DIM=1024
|
|
EMBEDDING_BINDING_API_KEY=your_api_key
|
|
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
|
EMBEDDING_BINDING_HOST=http://localhost:11434
|
|
### Num of chunks send to Embedding in single request
|
|
# EMBEDDING_BATCH_NUM=10
|
|
### Max concurrency requests for Embedding
|
|
# EMBEDDING_FUNC_MAX_ASYNC=8
|
|
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
|
|
# MAX_EMBED_TOKENS=8192
|
|
### Optional for Azure
|
|
# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
|
|
# AZURE_EMBEDDING_API_VERSION=2023-05-15
|
|
# AZURE_EMBEDDING_ENDPOINT=your_endpoint
|
|
# AZURE_EMBEDDING_API_KEY=your_api_key
|
|
|
|
###########################
|
|
### Data storage selection
|
|
###########################
|
|
### In-memory database with local file persistence(Recommended for small scale deployment)
|
|
# LIGHTRAG_KV_STORAGE=JsonKVStorage
|
|
# LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
|
|
# LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
|
|
# LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
|
|
# LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
|
|
### PostgreSQL
|
|
# LIGHTRAG_KV_STORAGE=PGKVStorage
|
|
# LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
|
|
# LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
|
|
# LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
|
|
### MongoDB (Vector storage only available on Atlas Cloud)
|
|
# LIGHTRAG_KV_STORAGE=MongoKVStorage
|
|
# LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
|
|
# LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
|
|
# LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
|
|
### Redis Storage (Recommended for production deployment)
|
|
# LIGHTRAG_KV_STORAGE=RedisKVStorage
|
|
# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
|
|
### Vector Storage (Recommended for production deployment)
|
|
# LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
|
|
# LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage
|
|
### Graph Storage (Recommended for production deployment)
|
|
# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
|
|
# LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
|
|
|
|
####################################################################
|
|
### Default workspace for all storage types
|
|
### For the purpose of isolation of data for each LightRAG instance
|
|
### Valid characters: a-z, A-Z, 0-9, and _
|
|
####################################################################
|
|
# WORKSPACE=space1
|
|
|
|
### PostgreSQL Configuration
|
|
POSTGRES_HOST=localhost
|
|
POSTGRES_PORT=5432
|
|
POSTGRES_USER=your_username
|
|
POSTGRES_PASSWORD='your_password'
|
|
POSTGRES_DATABASE=your_database
|
|
POSTGRES_MAX_CONNECTIONS=12
|
|
# POSTGRES_WORKSPACE=forced_workspace_name
|
|
|
|
### Neo4j Configuration
|
|
NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
|
|
NEO4J_USERNAME=neo4j
|
|
NEO4J_PASSWORD='your_password'
|
|
# NEO4J_WORKSPACE=forced_workspace_name
|
|
|
|
### MongoDB Configuration
|
|
MONGO_URI=mongodb://root:root@localhost:27017/
|
|
#MONGO_URI=mongodb+srv://xxxx
|
|
MONGO_DATABASE=LightRAG
|
|
# MONGODB_WORKSPACE=forced_workspace_name
|
|
|
|
### Milvus Configuration
|
|
MILVUS_URI=http://localhost:19530
|
|
MILVUS_DB_NAME=lightrag
|
|
# MILVUS_USER=root
|
|
# MILVUS_PASSWORD=your_password
|
|
# MILVUS_TOKEN=your_token
|
|
# MILVUS_WORKSPACE=forced_workspace_name
|
|
|
|
### Qdrant
|
|
QDRANT_URL=http://localhost:6333
|
|
# QDRANT_API_KEY=your-api-key
|
|
# QDRANT_WORKSPACE=forced_workspace_name
|
|
|
|
### Redis
|
|
REDIS_URI=redis://localhost:6379
|
|
# REDIS_WORKSPACE=forced_workspace_name
|
|
|
|
### Memgraph Configuration
|
|
MEMGRAPH_URI=bolt://localhost:7687
|
|
MEMGRAPH_USERNAME=
|
|
MEMGRAPH_PASSWORD=
|
|
MEMGRAPH_DATABASE=memgraph
|
|
# MEMGRAPH_WORKSPACE=forced_workspace_name
|