From 021e8b57ae61be83dce528db353bd0adb2fdfd3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AC=9D=E5=AF=8C=E7=A5=A5?= <44687588+es-zx@users.noreply.github.com> Date: Wed, 30 Jul 2025 11:37:49 +0800 Subject: [PATCH] Fix: fix error 429 api rate limit when building knowledge graph for all chat model and Mistral embedding model (#9106) ### What problem does this PR solve? fix error 429 api rate limit when building knowledge graph for all chat model and Mistral embedding model. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/llm/chat_model.py | 2 +- rag/llm/embedding_model.py | 39 +++++++++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 55c7aac63..28ecbc20e 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -73,7 +73,7 @@ class Base(ABC): def _get_delay(self): """Calculate retry delay time""" - return self.base_delay + random.uniform(10, 150) + return self.base_delay + random.uniform(60, 150) def _classify_error(self, error): """Classify error based on error message content""" diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index d8de3e0de..f89f49e9b 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -463,25 +463,42 @@ class MistralEmbed(Base): self.model_name = model_name def encode(self, texts: list): + import time + import random texts = [truncate(t, 8196) for t in texts] batch_size = 16 ress = [] token_count = 0 for i in range(0, len(texts), batch_size): - res = self.client.embeddings(input=texts[i : i + batch_size], model=self.model_name) - try: - ress.extend([d.embedding for d in res.data]) - token_count += self.total_token_count(res) - except Exception as _e: - log_exception(_e, res) + retry_max = 5 + while retry_max > 0: + try: + res = self.client.embeddings(input=texts[i : i + batch_size], model=self.model_name) + ress.extend([d.embedding for d in res.data]) + token_count += self.total_token_count(res) + break + except Exception as _e: + if retry_max == 1: + log_exception(_e) + delay = random.uniform(20, 60) + time.sleep(delay) + retry_max -= 1 return np.array(ress), token_count def encode_queries(self, text): - res = self.client.embeddings(input=[truncate(text, 8196)], model=self.model_name) - try: - return np.array(res.data[0].embedding), self.total_token_count(res) - except Exception as _e: - log_exception(_e, res) + import time + import random + retry_max = 5 + while retry_max > 0: + try: + res = self.client.embeddings(input=[truncate(text, 8196)], model=self.model_name) + return np.array(res.data[0].embedding), self.total_token_count(res) + except Exception as _e: + if retry_max == 1: + log_exception(_e) + delay = random.randint(20, 60) + time.sleep(delay) + retry_max -= 1 class BedrockEmbed(Base):