2024-01-15 08:46:22 +08:00
|
|
|
#
|
2024-01-19 19:51:57 +08:00
|
|
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
2024-01-15 08:46:22 +08:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
from .embedding_model import *
|
|
|
|
from .chat_model import *
|
|
|
|
from .cv_model import *
|
2024-05-29 16:50:02 +08:00
|
|
|
from .rerank_model import *
|
2024-01-15 08:46:22 +08:00
|
|
|
|
|
|
|
|
|
|
|
EmbeddingModel = {
|
2024-04-08 19:20:57 +08:00
|
|
|
"Ollama": OllamaEmbed,
|
2024-01-15 08:46:22 +08:00
|
|
|
"OpenAI": OpenAIEmbed,
|
2024-07-04 09:57:16 +08:00
|
|
|
"Azure-OpenAI": AzureEmbed,
|
2024-04-11 18:22:25 +08:00
|
|
|
"Xinference": XinferenceEmbed,
|
2024-05-31 09:24:24 +08:00
|
|
|
"Tongyi-Qianwen": QWenEmbed,
|
2024-03-20 18:57:22 +08:00
|
|
|
"ZHIPU-AI": ZhipuEmbed,
|
2024-04-16 16:42:19 +08:00
|
|
|
"FastEmbed": FastEmbed,
|
2024-05-15 11:16:08 +08:00
|
|
|
"Youdao": YoudaoEmbed,
|
2024-05-29 16:50:02 +08:00
|
|
|
"BaiChuan": BaiChuanEmbed,
|
2024-05-29 19:38:57 +08:00
|
|
|
"Jina": JinaEmbed,
|
2024-06-14 11:32:58 +08:00
|
|
|
"BAAI": DefaultEmbedding,
|
2024-07-08 09:37:34 +08:00
|
|
|
"Mistral": MistralEmbed,
|
added SVG for Groq model model providers (#1470)
#1432 #1447
This PR adds support for the GROQ LLM (Large Language Model).
Groq is an AI solutions company delivering ultra-low latency inference
with the first-ever LPU™ Inference Engine. The Groq API enables
developers to integrate state-of-the-art LLMs, such as Llama-2 and
llama3-70b-8192, into low latency applications with the request limits
specified below. Learn more at [groq.com](https://groq.com/).
Supported Models
| ID | Requests per Minute | Requests per Day | Tokens per Minute |
|----------------------|---------------------|------------------|-------------------|
| gemma-7b-it | 30 | 14,400 | 15,000 |
| gemma2-9b-it | 30 | 14,400 | 15,000 |
| llama3-70b-8192 | 30 | 14,400 | 6,000 |
| llama3-8b-8192 | 30 | 14,400 | 30,000 |
| mixtral-8x7b-32768 | 30 | 14,400 | 5,000 |
---------
Co-authored-by: paresh0628 <paresh.tuvoc@gmail.com>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2024-07-12 06:55:44 +05:30
|
|
|
"Bedrock": BedrockEmbed,
|
2024-07-12 18:11:34 +08:00
|
|
|
"Gemini":GeminiEmbed
|
2024-01-15 08:46:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
CvModel = {
|
|
|
|
"OpenAI": GptV4,
|
2024-07-04 09:57:16 +08:00
|
|
|
"Azure-OpenAI": AzureGptV4,
|
2024-04-08 19:20:57 +08:00
|
|
|
"Ollama": OllamaCV,
|
2024-04-11 18:22:25 +08:00
|
|
|
"Xinference": XinferenceCV,
|
2024-03-20 18:57:22 +08:00
|
|
|
"Tongyi-Qianwen": QWenCV,
|
|
|
|
"ZHIPU-AI": Zhipu4V,
|
2024-07-12 13:41:58 +08:00
|
|
|
"Moonshot": LocalCV,
|
|
|
|
'Gemini':GeminiCV
|
2024-01-15 08:46:22 +08:00
|
|
|
}
|
|
|
|
|
2024-01-18 19:28:37 +08:00
|
|
|
|
|
|
|
ChatModel = {
|
|
|
|
"OpenAI": GptTurbo,
|
2024-07-04 09:57:16 +08:00
|
|
|
"Azure-OpenAI": AzureChat,
|
2024-03-20 18:57:22 +08:00
|
|
|
"ZHIPU-AI": ZhipuChat,
|
|
|
|
"Tongyi-Qianwen": QWenChat,
|
2024-04-08 19:20:57 +08:00
|
|
|
"Ollama": OllamaChat,
|
2024-04-11 18:22:25 +08:00
|
|
|
"Xinference": XinferenceChat,
|
2024-05-08 10:30:02 +08:00
|
|
|
"Moonshot": MoonshotChat,
|
2024-05-28 09:09:37 +08:00
|
|
|
"DeepSeek": DeepSeekChat,
|
2024-06-14 19:49:28 +08:00
|
|
|
"VolcEngine": VolcEngineChat,
|
2024-05-31 16:38:53 +08:00
|
|
|
"BaiChuan": BaiChuanChat,
|
2024-06-14 11:32:58 +08:00
|
|
|
"MiniMax": MiniMaxChat,
|
2024-07-08 09:37:34 +08:00
|
|
|
"Mistral": MistralChat,
|
2024-07-12 13:41:58 +08:00
|
|
|
'Gemini' : GeminiChat,
|
|
|
|
"Bedrock": BedrockChat,
|
|
|
|
"Groq": GroqChat
|
2024-01-18 19:28:37 +08:00
|
|
|
}
|
|
|
|
|
2024-05-29 16:50:02 +08:00
|
|
|
|
|
|
|
RerankModel = {
|
|
|
|
"BAAI": DefaultRerank,
|
|
|
|
"Jina": JinaRerank,
|
|
|
|
"Youdao": YoudaoRerank,
|
2024-07-11 18:37:41 +08:00
|
|
|
"Xinference": XInferenceRerank
|
2024-05-29 16:50:02 +08:00
|
|
|
}
|