From 0283e4098f842aa6fbd2d6ccb372af2c596449fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnter=20Lukas?= Date: Fri, 10 Oct 2025 13:18:24 +0200 Subject: [PATCH] Fix #10408 (#10471) ### What problem does this PR solve? Google Cloud model does not work correctly with gemini-2.5 models Close #10408 ### Type of change - [X] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu --- api/db/db_models.py | 6 +++++- rag/llm/chat_model.py | 20 +++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/api/db/db_models.py b/api/db/db_models.py index 7f2e35497..c1a5fd5ed 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -641,7 +641,7 @@ class TenantLLM(DataBaseModel): llm_factory = CharField(max_length=128, null=False, help_text="LLM factory name", index=True) model_type = CharField(max_length=128, null=True, help_text="LLM, Text Embedding, Image2Text, ASR", index=True) llm_name = CharField(max_length=128, null=True, help_text="LLM name", default="", index=True) - api_key = CharField(max_length=2048, null=True, help_text="API KEY", index=True) + api_key = TextField(null=True, help_text="API KEY") api_base = CharField(max_length=255, null=True, help_text="API Base") max_tokens = IntegerField(default=8192, index=True) used_tokens = IntegerField(default=0, index=True) @@ -1142,4 +1142,8 @@ def migrate_db(): migrate(migrator.add_column("knowledgebase", "mindmap_task_finish_at", CharField(null=True))) except Exception: pass + try: + migrate(migrator.alter_column_type("tenant_llm", "api_key", TextField(null=True, help_text="API KEY"))) + except Exception: + pass logging.disable(logging.NOTSET) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 2a77a45eb..91507d147 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -145,7 +145,7 @@ class Base(ABC): logging.info(f"[INFO] {self.model_name} detected as reasoning model, using _chat_streamly") final_ans = "" - tol_token = 0 + tol_token = 0 for delta, tol in self._chat_streamly(history, gen_conf, with_reasoning=False, **kwargs): if delta.startswith("") or delta.endswith(""): continue @@ -156,7 +156,7 @@ class Base(ABC): final_ans = "**ERROR**: Empty response from reasoning model" return final_ans.strip(), tol_token - + if self.model_name.lower().find("qwen3") >= 0: kwargs["extra_body"] = {"enable_thinking": False} @@ -1182,6 +1182,7 @@ class GoogleChat(Base): else: if "max_tokens" in gen_conf: gen_conf["max_output_tokens"] = gen_conf["max_tokens"] + del gen_conf["max_tokens"] for k in list(gen_conf.keys()): if k not in ["temperature", "top_p", "max_output_tokens"]: del gen_conf[k] @@ -1189,6 +1190,7 @@ class GoogleChat(Base): def _chat(self, history, gen_conf={}, **kwargs): system = history[0]["content"] if history and history[0]["role"] == "system" else "" + gen_conf = self._clean_conf(gen_conf) if "claude" in self.model_name: response = self.client.messages.create( model=self.model_name, @@ -1250,9 +1252,12 @@ class GoogleChat(Base): yield total_tokens else: + response = None + total_tokens = 0 self.client._system_instruction = system if "max_tokens" in gen_conf: gen_conf["max_output_tokens"] = gen_conf["max_tokens"] + del gen_conf["max_tokens"] for k in list(gen_conf.keys()): if k not in ["temperature", "top_p", "max_output_tokens"]: del gen_conf[k] @@ -1260,18 +1265,23 @@ class GoogleChat(Base): if "role" in item and item["role"] == "assistant": item["role"] = "model" if "content" in item: - item["parts"] = item.pop("content") + item["parts"] = [ + { + "text": item.pop("content"), + } + ] ans = "" try: - response = self.model.generate_content(history, generation_config=gen_conf, stream=True) + response = self.client.generate_content(history, generation_config=gen_conf, stream=True) for resp in response: ans = resp.text + total_tokens += num_tokens_from_string(ans) yield ans except Exception as e: yield ans + "\n**ERROR**: " + str(e) - yield response._chunks[-1].usage_metadata.total_token_count + yield total_tokens class GPUStackChat(Base):