diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 708ed4372..9e7945d04 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -107,7 +107,8 @@ from .cv_model import ( YiCV, HunyuanCV, AnthropicCV, - SILICONFLOWCV + SILICONFLOWCV, + GPUStackCV, ) from .rerank_model import ( @@ -145,7 +146,7 @@ from .tts_model import ( SparkTTS, XinferenceTTS, GPUStackTTS, - SILICONFLOWTTS + SILICONFLOWTTS, ) EmbeddingModel = { @@ -202,6 +203,7 @@ CvModel = { "Tencent Hunyuan": HunyuanCV, "Anthropic": AnthropicCV, "SILICONFLOW": SILICONFLOWCV, + "GPUStack": GPUStackCV, } ChatModel = { diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 46ea7b14e..480c5c6d4 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -1586,6 +1586,6 @@ class GPUStackChat(Base): def __init__(self, key=None, model_name="", base_url=""): if not base_url: raise ValueError("Local llm url cannot be None") - if base_url.split("/")[-1] != "v1-openai": - base_url = os.path.join(base_url, "v1-openai") + if base_url.split("/")[-1] != "v1": + base_url = os.path.join(base_url, "v1") super().__init__(key, model_name, base_url) diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index ce317ea6f..e0dbea2e5 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -1028,4 +1028,14 @@ class AnthropicCV(Base): except Exception as e: yield ans + "\n**ERROR**: " + str(e) - yield total_tokens \ No newline at end of file + yield total_tokens + +class GPUStackCV(GptV4): + def __init__(self, key, model_name, lang="Chinese", base_url=""): + if not base_url: + raise ValueError("Local llm url cannot be None") + if base_url.split("/")[-1] != "v1": + base_url = os.path.join(base_url, "v1") + self.client = OpenAI(api_key=key, base_url=base_url) + self.model_name = model_name + self.lang = lang \ No newline at end of file diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index c42e9b0f5..03cfcf0b0 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -832,9 +832,8 @@ class GPUStackEmbed(OpenAIEmbed): def __init__(self, key, model_name, base_url): if not base_url: raise ValueError("url cannot be None") - if base_url.split("/")[-1] != "v1-openai": - base_url = os.path.join(base_url, "v1-openai") + if base_url.split("/")[-1] != "v1": + base_url = os.path.join(base_url, "v1") - print(key,base_url) self.client = OpenAI(api_key=key, base_url=base_url) self.model_name = model_name diff --git a/rag/llm/sequence2txt_model.py b/rag/llm/sequence2txt_model.py index e5bcae4fa..01e529f86 100644 --- a/rag/llm/sequence2txt_model.py +++ b/rag/llm/sequence2txt_model.py @@ -198,8 +198,8 @@ class GPUStackSeq2txt(Base): def __init__(self, key, model_name, base_url): if not base_url: raise ValueError("url cannot be None") - if base_url.split("/")[-1] != "v1-openai": - base_url = os.path.join(base_url, "v1-openai") + if base_url.split("/")[-1] != "v1": + base_url = os.path.join(base_url, "v1") self.base_url = base_url self.model_name = model_name self.key = key diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py index e07ba695a..7111b2f60 100644 --- a/rag/llm/tts_model.py +++ b/rag/llm/tts_model.py @@ -378,7 +378,7 @@ class GPUStackTTS: } response = requests.post( - f"{self.base_url}/v1-openai/audio/speech", + f"{self.base_url}/v1/audio/speech", headers=self.headers, json=payload, stream=stream