2024-08-15 09:17:36 +08:00
|
|
|
#
|
|
|
|
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
#
|
2025-08-12 14:55:27 +08:00
|
|
|
import inspect
|
2024-11-14 17:13:48 +08:00
|
|
|
import logging
|
2025-07-30 19:41:09 +08:00
|
|
|
import re
|
|
|
|
|
from functools import partial
|
2025-07-31 12:13:49 +08:00
|
|
|
from typing import Generator
|
2025-08-13 16:41:01 +08:00
|
|
|
from api.db.db_models import LLM
|
2024-08-15 09:17:36 +08:00
|
|
|
from api.db.services.common_service import CommonService
|
2025-08-13 16:41:01 +08:00
|
|
|
from api.db.services.tenant_llm_service import LLM4Tenant, TenantLLMService
|
2024-08-15 09:17:36 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class LLMService(CommonService):
|
|
|
|
|
model = LLM
|
|
|
|
|
|
|
|
|
|
|
2025-08-13 16:41:01 +08:00
|
|
|
def get_init_tenant_llm(user_id):
|
|
|
|
|
from api import settings
|
|
|
|
|
tenant_llm = []
|
|
|
|
|
|
|
|
|
|
seen = set()
|
|
|
|
|
factory_configs = []
|
|
|
|
|
for factory_config in [
|
|
|
|
|
settings.CHAT_CFG,
|
|
|
|
|
settings.EMBEDDING_CFG,
|
|
|
|
|
settings.ASR_CFG,
|
|
|
|
|
settings.IMAGE2TEXT_CFG,
|
|
|
|
|
settings.RERANK_CFG,
|
|
|
|
|
]:
|
|
|
|
|
factory_name = factory_config["factory"]
|
|
|
|
|
if factory_name not in seen:
|
|
|
|
|
seen.add(factory_name)
|
|
|
|
|
factory_configs.append(factory_config)
|
|
|
|
|
|
|
|
|
|
for factory_config in factory_configs:
|
|
|
|
|
for llm in LLMService.query(fid=factory_config["factory"]):
|
|
|
|
|
tenant_llm.append(
|
|
|
|
|
{
|
|
|
|
|
"tenant_id": user_id,
|
|
|
|
|
"llm_factory": factory_config["factory"],
|
|
|
|
|
"llm_name": llm.llm_name,
|
|
|
|
|
"model_type": llm.model_type,
|
|
|
|
|
"api_key": factory_config["api_key"],
|
|
|
|
|
"api_base": factory_config["base_url"],
|
|
|
|
|
"max_tokens": llm.max_tokens if llm.max_tokens else 8192,
|
|
|
|
|
}
|
2024-08-26 15:19:43 +08:00
|
|
|
)
|
2024-08-15 09:17:36 +08:00
|
|
|
|
2025-08-13 16:41:01 +08:00
|
|
|
if settings.LIGHTEN != 1:
|
|
|
|
|
for buildin_embedding_model in settings.BUILTIN_EMBEDDING_MODELS:
|
|
|
|
|
mdlnm, fid = TenantLLMService.split_model_name_and_factory(buildin_embedding_model)
|
|
|
|
|
tenant_llm.append(
|
|
|
|
|
{
|
|
|
|
|
"tenant_id": user_id,
|
|
|
|
|
"llm_factory": fid,
|
|
|
|
|
"llm_name": mdlnm,
|
|
|
|
|
"model_type": "embedding",
|
|
|
|
|
"api_key": "",
|
|
|
|
|
"api_base": "",
|
|
|
|
|
"max_tokens": 1024 if buildin_embedding_model == "BAAI/bge-large-zh-v1.5@BAAI" else 512,
|
|
|
|
|
}
|
2025-03-24 13:18:47 +08:00
|
|
|
)
|
2025-07-16 18:06:03 +08:00
|
|
|
|
2025-08-13 16:41:01 +08:00
|
|
|
unique = {}
|
|
|
|
|
for item in tenant_llm:
|
|
|
|
|
key = (item["tenant_id"], item["llm_factory"], item["llm_name"])
|
|
|
|
|
if key not in unique:
|
|
|
|
|
unique[key] = item
|
|
|
|
|
return list(unique.values())
|
2025-08-07 16:14:47 +08:00
|
|
|
|
2025-08-05 09:49:47 +08:00
|
|
|
|
2025-08-13 16:41:01 +08:00
|
|
|
class LLMBundle(LLM4Tenant):
|
2025-07-30 19:41:09 +08:00
|
|
|
def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese", **kwargs):
|
2025-08-13 16:41:01 +08:00
|
|
|
super().__init__(tenant_id, llm_type, llm_name, lang, **kwargs)
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2025-04-08 16:09:03 +08:00
|
|
|
def bind_tools(self, toolcall_session, tools):
|
|
|
|
|
if not self.is_tools:
|
2025-05-16 16:32:19 +08:00
|
|
|
logging.warning(f"Model {self.llm_name} does not support tool call, but you have assigned one or more tools to it!")
|
2025-04-08 16:09:03 +08:00
|
|
|
return
|
|
|
|
|
self.mdl.bind_tools(toolcall_session, tools)
|
|
|
|
|
|
2024-12-03 16:22:39 +08:00
|
|
|
def encode(self, texts: list):
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts})
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-12-03 16:22:39 +08:00
|
|
|
embeddings, used_tokens = self.mdl.encode(texts)
|
2025-06-16 13:40:12 +08:00
|
|
|
llm_name = getattr(self, "llm_name", None)
|
|
|
|
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
2025-03-24 13:18:47 +08:00
|
|
|
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
|
|
|
|
|
|
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation.update(usage_details={"total_tokens": used_tokens})
|
|
|
|
|
generation.end()
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-12-03 16:22:39 +08:00
|
|
|
return embeddings, used_tokens
|
2024-08-15 09:17:36 +08:00
|
|
|
|
|
|
|
|
def encode_queries(self, query: str):
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode_queries", model=self.llm_name, input={"query": query})
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-08-15 09:17:36 +08:00
|
|
|
emd, used_tokens = self.mdl.encode_queries(query)
|
2025-06-16 13:40:12 +08:00
|
|
|
llm_name = getattr(self, "llm_name", None)
|
|
|
|
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
2025-03-24 13:18:47 +08:00
|
|
|
logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
|
|
|
|
|
|
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation.update(usage_details={"total_tokens": used_tokens})
|
|
|
|
|
generation.end()
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-08-15 09:17:36 +08:00
|
|
|
return emd, used_tokens
|
|
|
|
|
|
|
|
|
|
def similarity(self, query: str, texts: list):
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="similarity", model=self.llm_name, input={"query": query, "texts": texts})
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-08-15 09:17:36 +08:00
|
|
|
sim, used_tokens = self.mdl.similarity(query, texts)
|
2025-03-24 13:18:47 +08:00
|
|
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
|
|
|
|
|
logging.error("LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
|
|
|
|
|
|
|
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation.update(usage_details={"total_tokens": used_tokens})
|
|
|
|
|
generation.end()
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-08-15 09:17:36 +08:00
|
|
|
return sim, used_tokens
|
|
|
|
|
|
|
|
|
|
def describe(self, image, max_tokens=300):
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="describe", metadata={"model": self.llm_name})
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2025-03-26 09:02:48 +08:00
|
|
|
txt, used_tokens = self.mdl.describe(image)
|
2025-03-24 13:18:47 +08:00
|
|
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
|
|
|
|
|
logging.error("LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
|
|
|
|
|
|
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation.update(output={"output": txt}, usage_details={"total_tokens": used_tokens})
|
|
|
|
|
generation.end()
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-08-15 09:17:36 +08:00
|
|
|
return txt
|
2025-03-18 14:52:20 +08:00
|
|
|
|
|
|
|
|
def describe_with_prompt(self, image, prompt):
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-25 10:17:10 +08:00
|
|
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="describe_with_prompt", metadata={"model": self.llm_name, "prompt": prompt})
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2025-03-18 14:52:20 +08:00
|
|
|
txt, used_tokens = self.mdl.describe_with_prompt(image, prompt)
|
2025-03-24 13:18:47 +08:00
|
|
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
|
|
|
|
|
logging.error("LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
|
|
|
|
|
|
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation.update(output={"output": txt}, usage_details={"total_tokens": used_tokens})
|
|
|
|
|
generation.end()
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2025-03-18 14:52:20 +08:00
|
|
|
return txt
|
2024-08-15 09:17:36 +08:00
|
|
|
|
|
|
|
|
def transcription(self, audio):
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="transcription", metadata={"model": self.llm_name})
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-08-15 09:17:36 +08:00
|
|
|
txt, used_tokens = self.mdl.transcription(audio)
|
2025-03-24 13:18:47 +08:00
|
|
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
|
|
|
|
|
logging.error("LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
|
|
|
|
|
|
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation.update(output={"output": txt}, usage_details={"total_tokens": used_tokens})
|
|
|
|
|
generation.end()
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-08-15 09:17:36 +08:00
|
|
|
return txt
|
|
|
|
|
|
2025-07-31 12:13:49 +08:00
|
|
|
def tts(self, text: str) -> Generator[bytes, None, None]:
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="tts", input={"text": text})
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-09-02 18:40:57 +08:00
|
|
|
for chunk in self.mdl.tts(text):
|
2024-12-19 18:13:33 +08:00
|
|
|
if isinstance(chunk, int):
|
2025-03-24 13:18:47 +08:00
|
|
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, chunk, self.llm_name):
|
|
|
|
|
logging.error("LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
|
2024-09-02 18:40:57 +08:00
|
|
|
return
|
2024-12-03 16:22:39 +08:00
|
|
|
yield chunk
|
2024-09-02 18:40:57 +08:00
|
|
|
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation.end()
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2025-04-08 16:09:03 +08:00
|
|
|
def _remove_reasoning_content(self, txt: str) -> str:
|
|
|
|
|
first_think_start = txt.find("<think>")
|
|
|
|
|
if first_think_start == -1:
|
|
|
|
|
return txt
|
|
|
|
|
|
|
|
|
|
last_think_end = txt.rfind("</think>")
|
|
|
|
|
if last_think_end == -1:
|
|
|
|
|
return txt
|
|
|
|
|
|
|
|
|
|
if last_think_end < first_think_start:
|
|
|
|
|
return txt
|
|
|
|
|
|
|
|
|
|
return txt[last_think_end + len("</think>") :]
|
2025-08-12 14:55:27 +08:00
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _clean_param(chat_partial, **kwargs):
|
|
|
|
|
func = chat_partial.func
|
|
|
|
|
sig = inspect.signature(func)
|
|
|
|
|
keyword_args = []
|
|
|
|
|
support_var_args = False
|
|
|
|
|
for param in sig.parameters.values():
|
|
|
|
|
if param.kind == inspect.Parameter.VAR_KEYWORD or param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
|
|
|
support_var_args = True
|
|
|
|
|
elif param.kind == inspect.Parameter.KEYWORD_ONLY:
|
|
|
|
|
keyword_args.append(param.name)
|
|
|
|
|
|
|
|
|
|
use_kwargs = kwargs
|
|
|
|
|
if not support_var_args:
|
|
|
|
|
use_kwargs = {k: v for k, v in kwargs.items() if k in keyword_args}
|
|
|
|
|
return use_kwargs
|
|
|
|
|
|
2025-08-04 14:45:43 +08:00
|
|
|
def chat(self, system: str, history: list, gen_conf: dict = {}, **kwargs) -> str:
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat", model=self.llm_name, input={"system": system, "history": history})
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2025-07-30 19:41:09 +08:00
|
|
|
chat_partial = partial(self.mdl.chat, system, history, gen_conf)
|
2025-04-08 16:09:03 +08:00
|
|
|
if self.is_tools and self.mdl.is_tools:
|
2025-07-30 19:41:09 +08:00
|
|
|
chat_partial = partial(self.mdl.chat_with_tools, system, history, gen_conf)
|
2025-08-12 14:55:27 +08:00
|
|
|
|
|
|
|
|
use_kwargs = self._clean_param(chat_partial, **kwargs)
|
|
|
|
|
txt, used_tokens = chat_partial(**use_kwargs)
|
2025-04-08 16:09:03 +08:00
|
|
|
txt = self._remove_reasoning_content(txt)
|
|
|
|
|
|
2025-07-30 19:41:09 +08:00
|
|
|
if not self.verbose_tool_use:
|
|
|
|
|
txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
|
|
|
|
|
|
2025-03-24 13:18:47 +08:00
|
|
|
if isinstance(txt, int) and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, self.llm_name):
|
|
|
|
|
logging.error("LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
|
|
|
|
|
|
|
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation.update(output={"output": txt}, usage_details={"total_tokens": used_tokens})
|
|
|
|
|
generation.end()
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2024-08-15 09:17:36 +08:00
|
|
|
return txt
|
|
|
|
|
|
2025-08-04 14:45:43 +08:00
|
|
|
def chat_streamly(self, system: str, history: list, gen_conf: dict = {}, **kwargs):
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat_streamly", model=self.llm_name, input={"system": system, "history": history})
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2025-03-26 19:33:14 +08:00
|
|
|
ans = ""
|
2025-07-30 19:41:09 +08:00
|
|
|
chat_partial = partial(self.mdl.chat_streamly, system, history, gen_conf)
|
2025-05-09 17:52:03 +08:00
|
|
|
total_tokens = 0
|
2025-04-08 16:09:03 +08:00
|
|
|
if self.is_tools and self.mdl.is_tools:
|
2025-07-30 19:41:09 +08:00
|
|
|
chat_partial = partial(self.mdl.chat_streamly_with_tools, system, history, gen_conf)
|
2025-08-12 14:55:27 +08:00
|
|
|
use_kwargs = self._clean_param(chat_partial, **kwargs)
|
|
|
|
|
for txt in chat_partial(**use_kwargs):
|
2024-08-15 09:17:36 +08:00
|
|
|
if isinstance(txt, int):
|
Fix: pymysql.err.InterfaceError: (0, '') during long time streaming chat responses (#6548) (#7057)
### Related Issue:
https://github.com/infiniflow/ragflow/issues/6548
### Related PR:
https://github.com/infiniflow/ragflow/pull/6861
### Environment:
Commit version:
[[48730e0](https://github.com/infiniflow/ragflow/commit/48730e00a864606606a9d0778620d75411488740)]
### Bug Description:
Unexpected `pymysql.err.InterfaceError: (0, '') `when using Peewee +
PyMySQL + PooledMySQLDatabase after a long-running `chat streamly`
operation.
This is a common issue with Peewee + PyMySQL + connection pooling: you
end up using a connection that was silently closed by the server, but
Peewee doesn't realize it's dead.
**I found that the error only occurs during longer streaming outputs**
and is unrelated to the database connection context, so it's likely
because:
- The prolonged streaming response caused the database connection to
time out
- The original database connection might have been disconnected by the
server during the streaming process
### Why This Happens
This error happens even when using `@DB.connection_context() `after the
stream is done. After investigation, I found this is caused by MySQL
connection pools that appear to be open but are actually dead (expired
due to` wait_timeout`).
1. `@DB.connection_context()` (as a decorator or context manager) pulls
a connection from the pool.
2. If this connection was idle and expired on the MySQL server (e.g.,
due to `wait_timeout`), but not closed in Python, it will still be
considered “open” (`DB.is_closed() == False`).
3. The real error will occur only when I execute a SQL command (such as
.`get_or_none()`), and PyMySQL tries to send it to the server via a
broken socket.
### Changes Made:
1. I implemented manual connection checks before executing SQL:
```
try:
DB.execute_sql("SELECT 1")
except Exception:
print("Connection dead, reconnecting...")
DB.close()
DB.connect()
```
2. Delayed the token count update until after the streaming response is
completed to ensure the streaming output isn't interrupted by database
operations.
```
total_tokens = 0
for txt in chat_streamly(system, history, gen_conf):
if isinstance(txt, int):
total_tokens = txt
......
break
......
if total_tokens > 0:
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, txt, self.llm_name):
logging.error("LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
```
2025-04-16 19:15:35 +08:00
|
|
|
total_tokens = txt
|
2025-03-24 13:18:47 +08:00
|
|
|
if self.langfuse:
|
2025-08-04 14:45:43 +08:00
|
|
|
generation.update(output={"output": ans})
|
|
|
|
|
generation.end()
|
Fix: pymysql.err.InterfaceError: (0, '') during long time streaming chat responses (#6548) (#7057)
### Related Issue:
https://github.com/infiniflow/ragflow/issues/6548
### Related PR:
https://github.com/infiniflow/ragflow/pull/6861
### Environment:
Commit version:
[[48730e0](https://github.com/infiniflow/ragflow/commit/48730e00a864606606a9d0778620d75411488740)]
### Bug Description:
Unexpected `pymysql.err.InterfaceError: (0, '') `when using Peewee +
PyMySQL + PooledMySQLDatabase after a long-running `chat streamly`
operation.
This is a common issue with Peewee + PyMySQL + connection pooling: you
end up using a connection that was silently closed by the server, but
Peewee doesn't realize it's dead.
**I found that the error only occurs during longer streaming outputs**
and is unrelated to the database connection context, so it's likely
because:
- The prolonged streaming response caused the database connection to
time out
- The original database connection might have been disconnected by the
server during the streaming process
### Why This Happens
This error happens even when using `@DB.connection_context() `after the
stream is done. After investigation, I found this is caused by MySQL
connection pools that appear to be open but are actually dead (expired
due to` wait_timeout`).
1. `@DB.connection_context()` (as a decorator or context manager) pulls
a connection from the pool.
2. If this connection was idle and expired on the MySQL server (e.g.,
due to `wait_timeout`), but not closed in Python, it will still be
considered “open” (`DB.is_closed() == False`).
3. The real error will occur only when I execute a SQL command (such as
.`get_or_none()`), and PyMySQL tries to send it to the server via a
broken socket.
### Changes Made:
1. I implemented manual connection checks before executing SQL:
```
try:
DB.execute_sql("SELECT 1")
except Exception:
print("Connection dead, reconnecting...")
DB.close()
DB.connect()
```
2. Delayed the token count update until after the streaming response is
completed to ensure the streaming output isn't interrupted by database
operations.
```
total_tokens = 0
for txt in chat_streamly(system, history, gen_conf):
if isinstance(txt, int):
total_tokens = txt
......
break
......
if total_tokens > 0:
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, txt, self.llm_name):
logging.error("LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
```
2025-04-16 19:15:35 +08:00
|
|
|
break
|
2025-03-26 19:33:14 +08:00
|
|
|
|
|
|
|
|
if txt.endswith("</think>"):
|
|
|
|
|
ans = ans.rstrip("</think>")
|
2025-03-24 13:18:47 +08:00
|
|
|
|
2025-07-30 19:41:09 +08:00
|
|
|
if not self.verbose_tool_use:
|
|
|
|
|
txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
|
|
|
|
|
|
2025-03-26 19:33:14 +08:00
|
|
|
ans += txt
|
|
|
|
|
yield ans
|
2025-07-30 19:41:09 +08:00
|
|
|
|
Fix: pymysql.err.InterfaceError: (0, '') during long time streaming chat responses (#6548) (#7057)
### Related Issue:
https://github.com/infiniflow/ragflow/issues/6548
### Related PR:
https://github.com/infiniflow/ragflow/pull/6861
### Environment:
Commit version:
[[48730e0](https://github.com/infiniflow/ragflow/commit/48730e00a864606606a9d0778620d75411488740)]
### Bug Description:
Unexpected `pymysql.err.InterfaceError: (0, '') `when using Peewee +
PyMySQL + PooledMySQLDatabase after a long-running `chat streamly`
operation.
This is a common issue with Peewee + PyMySQL + connection pooling: you
end up using a connection that was silently closed by the server, but
Peewee doesn't realize it's dead.
**I found that the error only occurs during longer streaming outputs**
and is unrelated to the database connection context, so it's likely
because:
- The prolonged streaming response caused the database connection to
time out
- The original database connection might have been disconnected by the
server during the streaming process
### Why This Happens
This error happens even when using `@DB.connection_context() `after the
stream is done. After investigation, I found this is caused by MySQL
connection pools that appear to be open but are actually dead (expired
due to` wait_timeout`).
1. `@DB.connection_context()` (as a decorator or context manager) pulls
a connection from the pool.
2. If this connection was idle and expired on the MySQL server (e.g.,
due to `wait_timeout`), but not closed in Python, it will still be
considered “open” (`DB.is_closed() == False`).
3. The real error will occur only when I execute a SQL command (such as
.`get_or_none()`), and PyMySQL tries to send it to the server via a
broken socket.
### Changes Made:
1. I implemented manual connection checks before executing SQL:
```
try:
DB.execute_sql("SELECT 1")
except Exception:
print("Connection dead, reconnecting...")
DB.close()
DB.connect()
```
2. Delayed the token count update until after the streaming response is
completed to ensure the streaming output isn't interrupted by database
operations.
```
total_tokens = 0
for txt in chat_streamly(system, history, gen_conf):
if isinstance(txt, int):
total_tokens = txt
......
break
......
if total_tokens > 0:
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, txt, self.llm_name):
logging.error("LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
```
2025-04-16 19:15:35 +08:00
|
|
|
if total_tokens > 0:
|
|
|
|
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, txt, self.llm_name):
|
|
|
|
|
logging.error("LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
|