Merge pull request #1996 from danielaskdd/key-work-empty

feat: Improve Empty Keyword Handling logic
This commit is contained in:
Daniel.y 2025-08-23 11:54:26 +08:00 committed by GitHub
commit df4927f837
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 29 additions and 29 deletions

View File

@ -91,7 +91,7 @@ ENABLE_LLM_CACHE=true
### For rerank model deployed by vLLM use cohere binding ### For rerank model deployed by vLLM use cohere binding
######################################################### #########################################################
RERANK_BINDING=null RERANK_BINDING=null
### Enable rerank by default in query params ### Enable rerank by default in query params when RERANK_BINDING is not null
# RERANK_BY_DEFAULT=True # RERANK_BY_DEFAULT=True
### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enought) ### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enought)
# MIN_RERANK_SCORE=0.0 # MIN_RERANK_SCORE=0.0

View File

@ -1727,6 +1727,9 @@ async def kg_query(
system_prompt: str | None = None, system_prompt: str | None = None,
chunks_vdb: BaseVectorStorage = None, chunks_vdb: BaseVectorStorage = None,
) -> str | AsyncIterator[str]: ) -> str | AsyncIterator[str]:
if not query:
return PROMPTS["fail_response"]
if query_param.model_func: if query_param.model_func:
use_model_func = query_param.model_func use_model_func = query_param.model_func
else: else:
@ -1763,21 +1766,16 @@ async def kg_query(
logger.debug(f"Low-level keywords: {ll_keywords}") logger.debug(f"Low-level keywords: {ll_keywords}")
# Handle empty keywords # Handle empty keywords
if ll_keywords == [] and query_param.mode in ["local", "hybrid", "mix"]:
logger.warning("low_level_keywords is empty")
if hl_keywords == [] and query_param.mode in ["global", "hybrid", "mix"]:
logger.warning("high_level_keywords is empty")
if hl_keywords == [] and ll_keywords == []: if hl_keywords == [] and ll_keywords == []:
logger.warning("low_level_keywords and high_level_keywords is empty") if len(query) < 50:
return PROMPTS["fail_response"] logger.warning(f"Forced low_level_keywords to origin query: {query}")
if ll_keywords == [] and query_param.mode in ["local", "hybrid"]: ll_keywords = [query]
logger.warning( else:
"low_level_keywords is empty, switching from %s mode to global mode", return PROMPTS["fail_response"]
query_param.mode,
)
query_param.mode = "global"
if hl_keywords == [] and query_param.mode in ["global", "hybrid"]:
logger.warning(
"high_level_keywords is empty, switching from %s mode to local mode",
query_param.mode,
)
query_param.mode = "local"
ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else "" ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else "" hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
@ -2133,7 +2131,7 @@ async def _build_query_context(
query_embedding = None query_embedding = None
# Handle local and global modes # Handle local and global modes
if query_param.mode == "local": if query_param.mode == "local" and len(ll_keywords) > 0:
local_entities, local_relations = await _get_node_data( local_entities, local_relations = await _get_node_data(
ll_keywords, ll_keywords,
knowledge_graph_inst, knowledge_graph_inst,
@ -2141,7 +2139,7 @@ async def _build_query_context(
query_param, query_param,
) )
elif query_param.mode == "global": elif query_param.mode == "global" and len(hl_keywords) > 0:
global_relations, global_entities = await _get_edge_data( global_relations, global_entities = await _get_edge_data(
hl_keywords, hl_keywords,
knowledge_graph_inst, knowledge_graph_inst,
@ -2150,18 +2148,20 @@ async def _build_query_context(
) )
else: # hybrid or mix mode else: # hybrid or mix mode
local_entities, local_relations = await _get_node_data( if len(ll_keywords) > 0:
ll_keywords, local_entities, local_relations = await _get_node_data(
knowledge_graph_inst, ll_keywords,
entities_vdb, knowledge_graph_inst,
query_param, entities_vdb,
) query_param,
global_relations, global_entities = await _get_edge_data( )
hl_keywords, if len(hl_keywords) > 0:
knowledge_graph_inst, global_relations, global_entities = await _get_edge_data(
relationships_vdb, hl_keywords,
query_param, knowledge_graph_inst,
) relationships_vdb,
query_param,
)
# Get vector chunks first if in mix mode # Get vector chunks first if in mix mode
if query_param.mode == "mix" and chunks_vdb: if query_param.mode == "mix" and chunks_vdb: