add support for deepseek (#668)

### What problem does this PR solve? #666 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2025-11-22 04:57:18 +00:00 · 2024-05-08 10:30:02 +08:00 · 2024-05-08 10:30:02 +08:00 · eb27a4309e
commit eb27a4309e
parent 48e1534bf4
3 changed files with 43 additions and 53 deletions
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@ -124,6 +124,11 @@ factory_infos = [{
    "logo": "",
    "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
    "status": "1",
 },{
    "name": "DeepSeek",
    "logo": "",
    "tags": "LLM",
    "status": "1",
 },
    # {
    #     "name": "文心一言",
@ -331,6 +336,21 @@ def init_llm_factory():
            "max_tokens": 512,
            "model_type": LLMType.EMBEDDING.value
        },
        # ------------------------ DeepSeek -----------------------
        {
            "fid": factory_infos[8]["name"],
            "llm_name": "deepseek-chat",
            "tags": "LLM,CHAT,",
            "max_tokens": 32768,
            "model_type": LLMType.CHAT.value
        },
        {
            "fid": factory_infos[8]["name"],
            "llm_name": "deepseek-coder",
            "tags": "LLM,CHAT,",
            "max_tokens": 16385,
            "model_type": LLMType.CHAT.value
        },
    ]
    for info in factory_infos:
        try:
--- a/rag/llm/init.py
+++ b/rag/llm/init.py
@ -45,6 +45,7 @@ ChatModel = {
    "Tongyi-Qianwen": QWenChat,
    "Ollama": OllamaChat,
    "Xinference": XinferenceChat,
-    "Moonshot": MoonshotChat
+    "Moonshot": MoonshotChat,
    "DeepSeek": DeepSeekChat
 }
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -24,16 +24,7 @@ from rag.utils import num_tokens_from_string
 class Base(ABC):
-    def __init__(self, key, model_name):
+    def __init__(self, key, model_name, base_url):
        pass
    def chat(self, system, history, gen_conf):
        raise NotImplementedError("Please implement encode method!")
 class GptTurbo(Base):
    def __init__(self, key, model_name="gpt-3.5-turbo", base_url="https://api.openai.com/v1"):
        if not base_url: base_url="https://api.openai.com/v1"
        self.client = OpenAI(api_key=key, base_url=base_url)
        self.model_name = model_name
@ -54,28 +45,28 @@ class GptTurbo(Base):
            return "**ERROR**: " + str(e), 0
-class MoonshotChat(GptTurbo):
+class GptTurbo(Base):
    def __init__(self, key, model_name="gpt-3.5-turbo", base_url="https://api.openai.com/v1"):
        if not base_url: base_url="https://api.openai.com/v1"
        super().__init__(key, model_name, base_url)
 class MoonshotChat(Base):
    def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1"):
        if not base_url: base_url="https://api.moonshot.cn/v1"
-        self.client = OpenAI(
+        super().__init__(key, model_name, base_url)
            api_key=key, base_url=base_url)
        self.model_name = model_name
-    def chat(self, system, history, gen_conf):
+
-        if system:
+class XinferenceChat(Base):
-            history.insert(0, {"role": "system", "content": system})
+    def __init__(self, key=None, model_name="", base_url=""):
-        try:
+        key = "xxx"
-            response = self.client.chat.completions.create(
+        super().__init__(key, model_name, base_url)
-                model=self.model_name,
+
-                messages=history,
+
-                **gen_conf)
+class DeepSeekChat(Base):
-            ans = response.choices[0].message.content.strip()
+    def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepseek.com/v1"):
-            if response.choices[0].finish_reason == "length":
+        if not base_url: base_url="https://api.deepseek.com/v1"
-                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
+        super().__init__(key, model_name, base_url)
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
            return ans, response.usage.total_tokens
        except openai.APIError as e:
            return "**ERROR**: " + str(e), 0
 class QWenChat(Base):
@ -157,25 +148,3 @@ class OllamaChat(Base):
        except Exception as e:
            return "**ERROR**: " + str(e), 0
 class XinferenceChat(Base):
    def __init__(self, key=None, model_name="", base_url=""):
        self.client = OpenAI(api_key="xxx", base_url=base_url)
        self.model_name = model_name
    def chat(self, system, history, gen_conf):
        if system:
            history.insert(0, {"role": "system", "content": system})
        try:
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=history,
                **gen_conf)
            ans = response.choices[0].message.content.strip()
            if response.choices[0].finish_reason == "length":
                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
            return ans, response.usage.total_tokens
        except openai.APIError as e:
            return "**ERROR**: " + str(e), 0