diff --git a/env.example b/env.example index f98648d2..6bac24ae 100644 --- a/env.example +++ b/env.example @@ -58,6 +58,8 @@ SUMMARY_LANGUAGE=English # FORCE_LLM_SUMMARY_ON_MERGE=6 ### Max tokens for entity/relations description after merge # MAX_TOKEN_SUMMARY=500 +### Maximum number of entity extraction attempts for ambiguous content +# ENTITY_EXTRACT_MAX_GLEANING=1 ### Number of parallel processing documents(Less than MAX_ASYNC/2 is recommended) # MAX_PARALLEL_INSERT=2 diff --git a/lightrag/constants.py b/lightrag/constants.py index f8345994..82451a36 100644 --- a/lightrag/constants.py +++ b/lightrag/constants.py @@ -7,6 +7,7 @@ consistency and makes maintenance easier. """ # Default values for environment variables +DEFAULT_MAX_GLEANING = 1 DEFAULT_MAX_TOKEN_SUMMARY = 500 DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 6 DEFAULT_WOKERS = 2 diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 2ca543b1..132075d6 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -22,6 +22,7 @@ from typing import ( Dict, ) from lightrag.constants import ( + DEFAULT_MAX_GLEANING, DEFAULT_MAX_TOKEN_SUMMARY, DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, ) @@ -124,7 +125,9 @@ class LightRAG: # Entity extraction # --- - entity_extract_max_gleaning: int = field(default=1) + entity_extract_max_gleaning: int = field( + default=get_env_value("MAX_GLEANING", DEFAULT_MAX_GLEANING, int) + ) """Maximum number of entity extraction attempts for ambiguous content.""" summary_to_max_tokens: int = field(