ragflow/rag/llm/chat_model.py

203 lines
7.6 KiB
Python
Raw Normal View History

#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
2024-03-27 11:33:46 +08:00
from zhipuai import ZhipuAI
from dashscope import Generation
from abc import ABC
from openai import OpenAI
2024-02-27 14:57:34 +08:00
import openai
from ollama import Client
from rag.nlp import is_english
2024-03-12 11:57:08 +08:00
from rag.utils import num_tokens_from_string
class Base(ABC):
2024-01-22 19:51:38 +08:00
def __init__(self, key, model_name):
pass
def chat(self, system, history, gen_conf):
raise NotImplementedError("Please implement encode method!")
class GptTurbo(Base):
2024-03-28 19:15:16 +08:00
def __init__(self, key, model_name="gpt-3.5-turbo", base_url="https://api.openai.com/v1"):
if not base_url: base_url="https://api.openai.com/v1"
self.client = OpenAI(api_key=key, base_url=base_url)
2024-01-22 19:51:38 +08:00
self.model_name = model_name
def chat(self, system, history, gen_conf):
2024-03-27 11:33:46 +08:00
if system:
history.insert(0, {"role": "system", "content": system})
2024-02-27 14:57:34 +08:00
try:
response = self.client.chat.completions.create(
2024-02-27 14:57:34 +08:00
model=self.model_name,
messages=history,
**gen_conf)
2024-03-27 17:55:45 +08:00
ans = response.choices[0].message.content.strip()
if response.choices[0].finish_reason == "length":
ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
[ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
return ans, response.usage.completion_tokens
2024-02-27 14:57:34 +08:00
except openai.APIError as e:
2024-03-27 11:33:46 +08:00
return "**ERROR**: " + str(e), 0
class MoonshotChat(GptTurbo):
2024-03-28 19:15:16 +08:00
def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1"):
if not base_url: base_url="https://api.moonshot.cn/v1"
2024-03-27 11:33:46 +08:00
self.client = OpenAI(
2024-03-28 19:15:16 +08:00
api_key=key, base_url=base_url)
self.model_name = model_name
2024-03-15 18:59:00 +08:00
def chat(self, system, history, gen_conf):
2024-03-27 11:33:46 +08:00
if system:
history.insert(0, {"role": "system", "content": system})
2024-03-15 18:59:00 +08:00
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=history,
**gen_conf)
ans = response.choices[0].message.content.strip()
if response.choices[0].finish_reason == "length":
ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
[ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
return ans, response.usage.completion_tokens
except openai.APIError as e:
2024-03-27 11:33:46 +08:00
return "**ERROR**: " + str(e), 0
2024-03-15 18:59:00 +08:00
class QWenChat(Base):
2024-03-28 19:15:16 +08:00
def __init__(self, key, model_name=Generation.Models.qwen_turbo, **kwargs):
2024-01-22 19:51:38 +08:00
import dashscope
dashscope.api_key = key
self.model_name = model_name
def chat(self, system, history, gen_conf):
from http import HTTPStatus
2024-03-27 11:33:46 +08:00
if system:
history.insert(0, {"role": "system", "content": system})
response = Generation.call(
2024-01-22 19:51:38 +08:00
self.model_name,
messages=history,
result_format='message',
**gen_conf
)
ans = ""
tk_count = 0
if response.status_code == HTTPStatus.OK:
ans += response.output.choices[0]['message']['content']
tk_count += response.usage.total_tokens
if response.output.choices[0].get("finish_reason", "") == "length":
2024-03-27 11:33:46 +08:00
ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
[ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
return ans, tk_count
return "**ERROR**: " + response.message, tk_count
class ZhipuChat(Base):
2024-03-28 19:15:16 +08:00
def __init__(self, key, model_name="glm-3-turbo", **kwargs):
self.client = ZhipuAI(api_key=key)
self.model_name = model_name
def chat(self, system, history, gen_conf):
2024-03-27 11:33:46 +08:00
if system:
history.insert(0, {"role": "system", "content": system})
try:
if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"]
if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"]
response = self.client.chat.completions.create(
2024-03-27 17:55:45 +08:00
model=self.model_name,
messages=history,
**gen_conf
)
2024-03-27 17:55:45 +08:00
ans = response.choices[0].message.content.strip()
if response.choices[0].finish_reason == "length":
ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
[ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
return ans, response.usage.completion_tokens
except Exception as e:
2024-03-12 11:57:08 +08:00
return "**ERROR**: " + str(e), 0
2024-03-27 11:33:46 +08:00
class OllamaChat(Base):
def __init__(self, key, model_name, **kwargs):
self.client = Client(host=kwargs["base_url"])
self.model_name = model_name
def chat(self, system, history, gen_conf):
if system:
history.insert(0, {"role": "system", "content": system})
try:
options = {"temperature": gen_conf.get("temperature", 0.1),
"num_predict": gen_conf.get("max_tokens", 128),
"top_k": gen_conf.get("top_p", 0.3),
"presence_penalty": gen_conf.get("presence_penalty", 0.4),
"frequency_penalty": gen_conf.get("frequency_penalty", 0.7),
}
response = self.client.chat(
model=self.model_name,
messages=history,
options=options
)
ans = response["message"]["content"].strip()
return ans, response["eval_count"]
except Exception as e:
return "**ERROR**: " + str(e), 0
2024-03-12 11:57:08 +08:00
class LocalLLM(Base):
class RPCProxy:
def __init__(self, host, port):
self.host = host
self.port = int(port)
self.__conn()
def __conn(self):
from multiprocessing.connection import Client
2024-03-27 11:33:46 +08:00
self._connection = Client(
(self.host, self.port), authkey=b'infiniflow-token4kevinhu')
2024-03-12 11:57:08 +08:00
def __getattr__(self, name):
import pickle
2024-03-27 11:33:46 +08:00
2024-03-12 11:57:08 +08:00
def do_rpc(*args, **kwargs):
for _ in range(3):
try:
2024-03-27 11:33:46 +08:00
self._connection.send(
pickle.dumps((name, args, kwargs)))
2024-03-12 11:57:08 +08:00
return pickle.loads(self._connection.recv())
except Exception as e:
self.__conn()
raise Exception("RPC connection lost!")
return do_rpc
def __init__(self, *args, **kwargs):
2024-03-12 11:57:08 +08:00
self.client = LocalLLM.RPCProxy("127.0.0.1", 7860)
def chat(self, system, history, gen_conf):
2024-03-27 11:33:46 +08:00
if system:
history.insert(0, {"role": "system", "content": system})
2024-03-12 11:57:08 +08:00
try:
ans = self.client.chat(
history,
gen_conf
)
return ans, num_tokens_from_string(ans)
except Exception as e:
return "**ERROR**: " + str(e), 0