From 9a9674d590a43f1c02c7fed164057a33d13db9f2 Mon Sep 17 00:00:00 2001
From: frankj
Date: Tue, 8 Jul 2025 10:24:19 +0800
Subject: [PATCH 1/6] Fix incorrect file path (404 Not Found)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Issue Description
A 404 error occurred when accessing the repository link pointing to README_zh.md. Upon inspection, the actual file path is README-zh.md, indicating an incorrect path reference in the original link.
Fix Details
Corrected the broken link from README_zh.md to the correct path README-zh.md.
Verification Method
After modification, the target file opens normally in the browser.
Hope this fix helps users access the Chinese documentation properly—thanks for the review!
---
README-zh.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README-zh.md b/README-zh.md
index 45335489..e9599099 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -30,7 +30,7 @@
-
+
From 186c6411fbf9f4bb9673a39b8ff28525d079093a Mon Sep 17 00:00:00 2001
From: yangdx
Date: Tue, 8 Jul 2025 03:06:19 +0800
Subject: [PATCH 2/6] Update env.example
---
env.example | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/env.example b/env.example
index 1efe4830..ef52bd53 100644
--- a/env.example
+++ b/env.example
@@ -159,7 +159,7 @@ NEO4J_PASSWORD='your_password'
### MongoDB Configuration
MONGO_URI=mongodb://root:root@localhost:27017/
-#MONGO_URI=mongodb+srv://root:rooot@cluster0.xxxx.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0
+#MONGO_URI=mongodb+srv://xxxx
MONGO_DATABASE=LightRAG
# MONGODB_WORKSPACE=forced_workspace_name
From 9b7b2a9b0f03801bad81b77e0cd32556f05dc097 Mon Sep 17 00:00:00 2001
From: yangdx
Date: Tue, 8 Jul 2025 11:00:09 +0800
Subject: [PATCH 3/6] Reduce default embedding batch size from 32 to 10
---
env.example | 2 +-
lightrag/lightrag.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/env.example b/env.example
index ef52bd53..f759ea92 100644
--- a/env.example
+++ b/env.example
@@ -96,7 +96,7 @@ EMBEDDING_BINDING_API_KEY=your_api_key
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
EMBEDDING_BINDING_HOST=http://localhost:11434
### Num of chunks send to Embedding in single request
-# EMBEDDING_BATCH_NUM=32
+# EMBEDDING_BATCH_NUM=10
### Max concurrency requests for Embedding
# EMBEDDING_FUNC_MAX_ASYNC=16
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index cbb5e2a8..1f61a42e 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -201,7 +201,7 @@ class LightRAG:
embedding_func: EmbeddingFunc | None = field(default=None)
"""Function for computing text embeddings. Must be set before use."""
- embedding_batch_num: int = field(default=int(os.getenv("EMBEDDING_BATCH_NUM", 32)))
+ embedding_batch_num: int = field(default=int(os.getenv("EMBEDDING_BATCH_NUM", 10)))
"""Batch size for embedding computations."""
embedding_func_max_async: int = field(
From 8cbba6e9dbf2ae28760ae6986a81fa6d2bca371f Mon Sep 17 00:00:00 2001
From: Molion Surya
Date: Tue, 8 Jul 2025 13:25:52 +0800
Subject: [PATCH 4/6] Fix #1746: [openai.py logic for streaming complete]
---
lightrag/llm/openai.py | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index 57f016cf..30491476 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -210,9 +210,16 @@ async def openai_complete_if_cache(
async def inner():
# Track if we've started iterating
iteration_started = False
+ final_chunk_usage = None
+
try:
iteration_started = True
async for chunk in response:
+ # Check if this chunk has usage information (final chunk)
+ if hasattr(chunk, "usage") and chunk.usage:
+ final_chunk_usage = chunk.usage
+ logger.debug(f"Received usage info in streaming chunk: {chunk.usage}")
+
# Check if choices exists and is not empty
if not hasattr(chunk, "choices") or not chunk.choices:
logger.warning(f"Received chunk without choices: {chunk}")
@@ -222,16 +229,29 @@ async def openai_complete_if_cache(
if not hasattr(chunk.choices[0], "delta") or not hasattr(
chunk.choices[0].delta, "content"
):
- logger.warning(
- f"Received chunk without delta content: {chunk.choices[0]}"
- )
+ # This might be the final chunk, continue to check for usage
continue
+
content = chunk.choices[0].delta.content
if content is None:
continue
if r"\u" in content:
content = safe_unicode_decode(content.encode("utf-8"))
+
yield content
+
+ # After streaming is complete, track token usage
+ if token_tracker and final_chunk_usage:
+ # Use actual usage from the API
+ token_counts = {
+ "prompt_tokens": getattr(final_chunk_usage, "prompt_tokens", 0),
+ "completion_tokens": getattr(final_chunk_usage, "completion_tokens", 0),
+ "total_tokens": getattr(final_chunk_usage, "total_tokens", 0),
+ }
+ token_tracker.add_usage(token_counts)
+ logger.debug(f"Streaming token usage (from API): {token_counts}")
+ elif token_tracker:
+ logger.debug("No usage information available in streaming response")
except Exception as e:
logger.error(f"Error in stream response: {str(e)}")
# Try to clean up resources if possible
@@ -451,4 +471,4 @@ async def openai_embed(
response = await openai_async_client.embeddings.create(
model=model, input=texts, encoding_format="float"
)
- return np.array([dp.embedding for dp in response.data])
+ return np.array([dp.embedding for dp in response.data])
\ No newline at end of file
From 5f330ec11a487753e9aa06a15fdeb5df782d9d49 Mon Sep 17 00:00:00 2001
From: SLKun
Date: Mon, 7 Jul 2025 10:31:46 +0800
Subject: [PATCH 5/6] remove tag for entities and keywords extraction
---
lightrag/operate.py | 4 +++-
lightrag/utils.py | 11 +++++++++--
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 88837435..4e219cf8 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -26,6 +26,7 @@ from .utils import (
get_conversation_turns,
use_llm_func_with_cache,
update_chunk_cache_list,
+ remove_think_tags,
)
from .base import (
BaseGraphStorage,
@@ -1703,7 +1704,8 @@ async def extract_keywords_only(
result = await use_model_func(kw_prompt, keyword_extraction=True)
# 6. Parse out JSON from the LLM response
- match = re.search(r"\{.*\}", result, re.DOTALL)
+ result = remove_think_tags(result)
+ match = re.search(r"\{.*?\}", result, re.DOTALL)
if not match:
logger.error("No JSON-like structure found in the LLM respond.")
return [], []
diff --git a/lightrag/utils.py b/lightrag/utils.py
index c6e2def9..386de3ab 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -1465,6 +1465,11 @@ async def update_chunk_cache_list(
)
+def remove_think_tags(text: str) -> str:
+ """Remove tags from the text"""
+ return re.sub(r"^(.*?|)", "", text, flags=re.DOTALL).strip()
+
+
async def use_llm_func_with_cache(
input_text: str,
use_llm_func: callable,
@@ -1531,6 +1536,7 @@ async def use_llm_func_with_cache(
kwargs["max_tokens"] = max_tokens
res: str = await use_llm_func(input_text, **kwargs)
+ res = remove_think_tags(res)
if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
await save_to_cache(
@@ -1557,8 +1563,9 @@ async def use_llm_func_with_cache(
if max_tokens is not None:
kwargs["max_tokens"] = max_tokens
- logger.info(f"Call LLM function with query text lenght: {len(input_text)}")
- return await use_llm_func(input_text, **kwargs)
+ logger.info(f"Call LLM function with query text length: {len(input_text)}")
+ res = await use_llm_func(input_text, **kwargs)
+ return remove_think_tags(res)
def get_content_summary(content: str, max_length: int = 250) -> str:
From 2a0cff3ed6ec69e0b5786bbcea7402b25b5c2dc0 Mon Sep 17 00:00:00 2001
From: yangdx
Date: Tue, 8 Jul 2025 18:17:21 +0800
Subject: [PATCH 6/6] Fix linting
---
lightrag/llm/openai.py | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index 30491476..eb74c2f1 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -211,15 +211,17 @@ async def openai_complete_if_cache(
# Track if we've started iterating
iteration_started = False
final_chunk_usage = None
-
+
try:
iteration_started = True
async for chunk in response:
# Check if this chunk has usage information (final chunk)
if hasattr(chunk, "usage") and chunk.usage:
final_chunk_usage = chunk.usage
- logger.debug(f"Received usage info in streaming chunk: {chunk.usage}")
-
+ logger.debug(
+ f"Received usage info in streaming chunk: {chunk.usage}"
+ )
+
# Check if choices exists and is not empty
if not hasattr(chunk, "choices") or not chunk.choices:
logger.warning(f"Received chunk without choices: {chunk}")
@@ -231,21 +233,23 @@ async def openai_complete_if_cache(
):
# This might be the final chunk, continue to check for usage
continue
-
+
content = chunk.choices[0].delta.content
if content is None:
continue
if r"\u" in content:
content = safe_unicode_decode(content.encode("utf-8"))
-
+
yield content
-
+
# After streaming is complete, track token usage
if token_tracker and final_chunk_usage:
# Use actual usage from the API
token_counts = {
"prompt_tokens": getattr(final_chunk_usage, "prompt_tokens", 0),
- "completion_tokens": getattr(final_chunk_usage, "completion_tokens", 0),
+ "completion_tokens": getattr(
+ final_chunk_usage, "completion_tokens", 0
+ ),
"total_tokens": getattr(final_chunk_usage, "total_tokens", 0),
}
token_tracker.add_usage(token_counts)
@@ -471,4 +475,4 @@ async def openai_embed(
response = await openai_async_client.embeddings.create(
model=model, input=texts, encoding_format="float"
)
- return np.array([dp.embedding for dp in response.data])
\ No newline at end of file
+ return np.array([dp.embedding for dp in response.data])