### What problem does this PR solve?

Google Cloud model does not work correctly with gemini-2.5 models
Close #10408

### Type of change

- [X] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
Günter Lukas 2025-10-10 13:18:24 +02:00 committed by GitHub
parent 2cdba3d1e6
commit 0283e4098f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 20 additions and 6 deletions

View File

@ -641,7 +641,7 @@ class TenantLLM(DataBaseModel):
llm_factory = CharField(max_length=128, null=False, help_text="LLM factory name", index=True)
model_type = CharField(max_length=128, null=True, help_text="LLM, Text Embedding, Image2Text, ASR", index=True)
llm_name = CharField(max_length=128, null=True, help_text="LLM name", default="", index=True)
api_key = CharField(max_length=2048, null=True, help_text="API KEY", index=True)
api_key = TextField(null=True, help_text="API KEY")
api_base = CharField(max_length=255, null=True, help_text="API Base")
max_tokens = IntegerField(default=8192, index=True)
used_tokens = IntegerField(default=0, index=True)
@ -1142,4 +1142,8 @@ def migrate_db():
migrate(migrator.add_column("knowledgebase", "mindmap_task_finish_at", CharField(null=True)))
except Exception:
pass
try:
migrate(migrator.alter_column_type("tenant_llm", "api_key", TextField(null=True, help_text="API KEY")))
except Exception:
pass
logging.disable(logging.NOTSET)

View File

@ -145,7 +145,7 @@ class Base(ABC):
logging.info(f"[INFO] {self.model_name} detected as reasoning model, using _chat_streamly")
final_ans = ""
tol_token = 0
tol_token = 0
for delta, tol in self._chat_streamly(history, gen_conf, with_reasoning=False, **kwargs):
if delta.startswith("<think>") or delta.endswith("</think>"):
continue
@ -156,7 +156,7 @@ class Base(ABC):
final_ans = "**ERROR**: Empty response from reasoning model"
return final_ans.strip(), tol_token
if self.model_name.lower().find("qwen3") >= 0:
kwargs["extra_body"] = {"enable_thinking": False}
@ -1182,6 +1182,7 @@ class GoogleChat(Base):
else:
if "max_tokens" in gen_conf:
gen_conf["max_output_tokens"] = gen_conf["max_tokens"]
del gen_conf["max_tokens"]
for k in list(gen_conf.keys()):
if k not in ["temperature", "top_p", "max_output_tokens"]:
del gen_conf[k]
@ -1189,6 +1190,7 @@ class GoogleChat(Base):
def _chat(self, history, gen_conf={}, **kwargs):
system = history[0]["content"] if history and history[0]["role"] == "system" else ""
gen_conf = self._clean_conf(gen_conf)
if "claude" in self.model_name:
response = self.client.messages.create(
model=self.model_name,
@ -1250,9 +1252,12 @@ class GoogleChat(Base):
yield total_tokens
else:
response = None
total_tokens = 0
self.client._system_instruction = system
if "max_tokens" in gen_conf:
gen_conf["max_output_tokens"] = gen_conf["max_tokens"]
del gen_conf["max_tokens"]
for k in list(gen_conf.keys()):
if k not in ["temperature", "top_p", "max_output_tokens"]:
del gen_conf[k]
@ -1260,18 +1265,23 @@ class GoogleChat(Base):
if "role" in item and item["role"] == "assistant":
item["role"] = "model"
if "content" in item:
item["parts"] = item.pop("content")
item["parts"] = [
{
"text": item.pop("content"),
}
]
ans = ""
try:
response = self.model.generate_content(history, generation_config=gen_conf, stream=True)
response = self.client.generate_content(history, generation_config=gen_conf, stream=True)
for resp in response:
ans = resp.text
total_tokens += num_tokens_from_string(ans)
yield ans
except Exception as e:
yield ans + "\n**ERROR**: " + str(e)
yield response._chunks[-1].usage_metadata.total_token_count
yield total_tokens
class GPUStackChat(Base):