diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py
index 708ed4372..9e7945d04 100644
--- a/rag/llm/__init__.py
+++ b/rag/llm/__init__.py
@@ -107,7 +107,8 @@ from .cv_model import (
     YiCV,
     HunyuanCV,
     AnthropicCV,
-    SILICONFLOWCV
+    SILICONFLOWCV,
+    GPUStackCV,
 )
 
 from .rerank_model import (
@@ -145,7 +146,7 @@ from .tts_model import (
     SparkTTS,
     XinferenceTTS,
     GPUStackTTS,
-    SILICONFLOWTTS
+    SILICONFLOWTTS,
 )
 
 EmbeddingModel = {
@@ -202,6 +203,7 @@ CvModel = {
     "Tencent Hunyuan": HunyuanCV,
     "Anthropic": AnthropicCV,
     "SILICONFLOW": SILICONFLOWCV,
+    "GPUStack": GPUStackCV,
 }
 
 ChatModel = {
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 46ea7b14e..480c5c6d4 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -1586,6 +1586,6 @@ class GPUStackChat(Base):
     def __init__(self, key=None, model_name="", base_url=""):
         if not base_url:
             raise ValueError("Local llm url cannot be None")
-        if base_url.split("/")[-1] != "v1-openai":
-            base_url = os.path.join(base_url, "v1-openai")
+        if base_url.split("/")[-1] != "v1":
+            base_url = os.path.join(base_url, "v1")
         super().__init__(key, model_name, base_url)
diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
index ce317ea6f..e0dbea2e5 100644
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@@ -1028,4 +1028,14 @@ class AnthropicCV(Base):
         except Exception as e:
             yield ans + "\n**ERROR**: " + str(e)
 
-        yield total_tokens
\ No newline at end of file
+        yield total_tokens
+
+class GPUStackCV(GptV4):
+    def __init__(self, key, model_name, lang="Chinese", base_url=""):
+        if not base_url:
+            raise ValueError("Local llm url cannot be None")
+        if base_url.split("/")[-1] != "v1":
+            base_url = os.path.join(base_url, "v1")
+        self.client = OpenAI(api_key=key, base_url=base_url)
+        self.model_name = model_name
+        self.lang = lang
\ No newline at end of file
diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py
index c42e9b0f5..03cfcf0b0 100644
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@@ -832,9 +832,8 @@ class GPUStackEmbed(OpenAIEmbed):
     def __init__(self, key, model_name, base_url):
         if not base_url:
             raise ValueError("url cannot be None")
-        if base_url.split("/")[-1] != "v1-openai":
-            base_url = os.path.join(base_url, "v1-openai")
+        if base_url.split("/")[-1] != "v1":
+            base_url = os.path.join(base_url, "v1")
 
-        print(key,base_url)
         self.client = OpenAI(api_key=key, base_url=base_url)
         self.model_name = model_name
diff --git a/rag/llm/sequence2txt_model.py b/rag/llm/sequence2txt_model.py
index e5bcae4fa..01e529f86 100644
--- a/rag/llm/sequence2txt_model.py
+++ b/rag/llm/sequence2txt_model.py
@@ -198,8 +198,8 @@ class GPUStackSeq2txt(Base):
     def __init__(self, key, model_name, base_url):
         if not base_url:
             raise ValueError("url cannot be None")
-        if base_url.split("/")[-1] != "v1-openai":
-            base_url = os.path.join(base_url, "v1-openai")
+        if base_url.split("/")[-1] != "v1":
+            base_url = os.path.join(base_url, "v1")
         self.base_url = base_url
         self.model_name = model_name
         self.key = key
diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py
index e07ba695a..7111b2f60 100644
--- a/rag/llm/tts_model.py
+++ b/rag/llm/tts_model.py
@@ -378,7 +378,7 @@ class GPUStackTTS:
         }
 
         response = requests.post(
-            f"{self.base_url}/v1-openai/audio/speech",
+            f"{self.base_url}/v1/audio/speech",
             headers=self.headers,
             json=payload,
             stream=stream