Fix: fix error 429 api rate limit when building knowledge graph for all chat model and Mistral embedding model (#9106)

### What problem does this PR solve? fix error 429 api rate limit when building knowledge graph for all chat model and Mistral embedding model. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2025-12-04 19:07:22 +00:00 · 2025-07-30 11:37:49 +08:00 · 2025-07-30 11:37:49 +08:00 · 021e8b57ae
commit 021e8b57ae
parent e26f37351d
2 changed files with 29 additions and 12 deletions
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -73,7 +73,7 @@ class Base(ABC):

    def _get_delay(self):
        """Calculate retry delay time"""
-        return self.base_delay + random.uniform(10, 150)
+        return self.base_delay + random.uniform(60, 150)

    def _classify_error(self, error):
        """Classify error based on error message content"""
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@ -463,25 +463,42 @@ class MistralEmbed(Base):
        self.model_name = model_name

    def encode(self, texts: list):
+        import time
+        import random
        texts = [truncate(t, 8196) for t in texts]
        batch_size = 16
        ress = []
        token_count = 0
        for i in range(0, len(texts), batch_size):
-            res = self.client.embeddings(input=texts[i : i + batch_size], model=self.model_name)
-            try:
-                ress.extend([d.embedding for d in res.data])
-                token_count += self.total_token_count(res)
-            except Exception as _e:
-                log_exception(_e, res)
+            retry_max = 5
+            while retry_max > 0:
+                try:
+                    res = self.client.embeddings(input=texts[i : i + batch_size], model=self.model_name)
+                    ress.extend([d.embedding for d in res.data])
+                    token_count += self.total_token_count(res)
+                    break
+                except Exception as _e:
+                    if retry_max == 1:
+                        log_exception(_e)
+                    delay = random.uniform(20, 60)
+                    time.sleep(delay)
+                    retry_max -= 1
        return np.array(ress), token_count

    def encode_queries(self, text):
-        res = self.client.embeddings(input=[truncate(text, 8196)], model=self.model_name)
-        try:
-            return np.array(res.data[0].embedding), self.total_token_count(res)
-        except Exception as _e:
-            log_exception(_e, res)
+        import time
+        import random
+        retry_max = 5
+        while retry_max > 0:
+            try:
+                res = self.client.embeddings(input=[truncate(text, 8196)], model=self.model_name)
+                return np.array(res.data[0].embedding), self.total_token_count(res)
+            except Exception as _e:
+                if retry_max == 1:
+                    log_exception(_e)
+                delay = random.randint(20, 60)
+                time.sleep(delay)
+                retry_max -= 1


 class BedrockEmbed(Base):