From f2d051eea5b636efbd6bb23dcad3b0ea3f16bf53 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 27 Jul 2025 21:10:47 +0800 Subject: [PATCH] Fix: Improve keyword extraction prompt for robust JSON output. * Emphasize strict JSON output in key extration prompt * Clean up prompt examples in key extration prompt * Log raw LLM response on JSON error --- lightrag/operate.py | 1 + lightrag/prompt.py | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index e976a5b4..108a8690 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1852,6 +1852,7 @@ async def extract_keywords_only( keywords_data = json.loads(match.group(0)) except json.JSONDecodeError as e: logger.error(f"JSON parsing error: {e}") + logger.error(f"LLM respond: {result}") return [], [] hl_keywords = keywords_data.get("high_level_keywords", []) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index f0d48a13..570ea7b8 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -251,7 +251,7 @@ Given the query and conversation history, list both high-level and low-level key ###################### {examples} -############################# +###################### ---Real Data--- ###################### Conversation History: @@ -259,42 +259,45 @@ Conversation History: Current Query: {query} ###################### -The `Output` should be human text, not unicode characters. Keep the same language as `Query`. -Output: +The `Output` should be in JSON format, with no other text before and after the JSON. Use the same language as `Current Query`. +Output: """ PROMPTS["keywords_extraction_examples"] = [ """Example 1: Query: "How does international trade influence global economic stability?" -################ + Output: { "high_level_keywords": ["International trade", "Global economic stability", "Economic impact"], "low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"] } -#############################""", + +""", """Example 2: Query: "What are the environmental consequences of deforestation on biodiversity?" -################ + Output: { "high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"], "low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"] } -#############################""", + +""", """Example 3: Query: "What is the role of education in reducing poverty?" -################ + Output: { "high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"], "low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"] } -#############################""", + +""", ] PROMPTS["naive_rag_response"] = """---Role---