ragflow/rag/llm/chat_model.py

#
#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
from zhipuai import ZhipuAI
from dashscope import Generation
from abc import ABC
from openai import OpenAI
import openai
from ollama import Client
from rag.nlp import is_english


class Base(ABC):
    def __init__(self, key, model_name, base_url):
        self.client = OpenAI(api_key=key, base_url=base_url)
        self.model_name = model_name

    def chat(self, system, history, gen_conf):
        if system:
            history.insert(0, {"role": "system", "content": system})
        try:
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=history,
                **gen_conf)
            ans = response.choices[0].message.content.strip()
            if response.choices[0].finish_reason == "length":
                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
            return ans, response.usage.total_tokens
        except openai.APIError as e:
            return "**ERROR**: " + str(e), 0

    def chat_streamly(self, system, history, gen_conf):
        if system:
            history.insert(0, {"role": "system", "content": system})
        ans = ""
        total_tokens = 0
        try:
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=history,
                stream=True,
                **gen_conf)
            for resp in response:
                if not resp.choices[0].delta.content:continue
                ans += resp.choices[0].delta.content
                total_tokens += 1
                if resp.choices[0].finish_reason == "length":
                    ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                        [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                yield ans

        except openai.APIError as e:
            yield ans + "\n**ERROR**: " + str(e)

        yield total_tokens


class GptTurbo(Base):
    def __init__(self, key, model_name="gpt-3.5-turbo", base_url="https://api.openai.com/v1"):
        if not base_url: base_url="https://api.openai.com/v1"
        super().__init__(key, model_name, base_url)


class MoonshotChat(Base):
    def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1"):
        if not base_url: base_url="https://api.moonshot.cn/v1"
        super().__init__(key, model_name, base_url)


class XinferenceChat(Base):
    def __init__(self, key=None, model_name="", base_url=""):
        key = "xxx"
        super().__init__(key, model_name, base_url)


class DeepSeekChat(Base):
    def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepseek.com/v1"):
        if not base_url: base_url="https://api.deepseek.com/v1"
        super().__init__(key, model_name, base_url)


class QWenChat(Base):
    def __init__(self, key, model_name=Generation.Models.qwen_turbo, **kwargs):
        import dashscope
        dashscope.api_key = key
        self.model_name = model_name

    def chat(self, system, history, gen_conf):
        from http import HTTPStatus
        if system:
            history.insert(0, {"role": "system", "content": system})
        response = Generation.call(
            self.model_name,
            messages=history,
            result_format='message',
            **gen_conf
        )
        ans = ""
        tk_count = 0
        if response.status_code == HTTPStatus.OK:
            ans += response.output.choices[0]['message']['content']
            tk_count += response.usage.total_tokens
            if response.output.choices[0].get("finish_reason", "") == "length":
                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
            return ans, tk_count

        return "**ERROR**: " + response.message, tk_count

    def chat_streamly(self, system, history, gen_conf):
        from http import HTTPStatus
        if system:
            history.insert(0, {"role": "system", "content": system})
        ans = ""
        try:
            response = Generation.call(
                self.model_name,
                messages=history,
                result_format='message',
                stream=True,
                **gen_conf
            )
            tk_count = 0
            for resp in response:
                if resp.status_code == HTTPStatus.OK:
                    ans = resp.output.choices[0]['message']['content']
                    tk_count = resp.usage.total_tokens
                    if resp.output.choices[0].get("finish_reason", "") == "length":
                        ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                            [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                    yield ans
                else:
                    yield ans + "\n**ERROR**: " + resp.message if str(resp.message).find("Access")<0 else "Out of credit. Please set the API key in **settings > Model providers.**"
        except Exception as e:
            yield ans + "\n**ERROR**: " + str(e)

        yield tk_count


class ZhipuChat(Base):
    def __init__(self, key, model_name="glm-3-turbo", **kwargs):
        self.client = ZhipuAI(api_key=key)
        self.model_name = model_name

    def chat(self, system, history, gen_conf):
        if system:
            history.insert(0, {"role": "system", "content": system})
        try:
            if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"]
            if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"]
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=history,
                **gen_conf
            )
            ans = response.choices[0].message.content.strip()
            if response.choices[0].finish_reason == "length":
                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
            return ans, response.usage.total_tokens
        except Exception as e:
            return "**ERROR**: " + str(e), 0

    def chat_streamly(self, system, history, gen_conf):
        if system:
            history.insert(0, {"role": "system", "content": system})
        if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"]
        if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"]
        ans = ""
        try:
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=history,
                stream=True,
                **gen_conf
            )
            tk_count = 0
            for resp in response:
                if not resp.choices[0].delta.content:continue
                delta = resp.choices[0].delta.content
                ans += delta
                tk_count = resp.usage.total_tokens if response.usage else 0
                if resp.output.choices[0].finish_reason == "length":
                    ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                        [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                yield ans
        except Exception as e:
            yield ans + "\n**ERROR**: " + str(e)

        yield tk_count


class OllamaChat(Base):
    def __init__(self, key, model_name, **kwargs):
        self.client = Client(host=kwargs["base_url"])
        self.model_name = model_name

    def chat(self, system, history, gen_conf):
        if system:
            history.insert(0, {"role": "system", "content": system})
        try:
            options = {}
            if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"]
            if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"]
            if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"]
            if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"]
            if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"]
            response = self.client.chat(
                model=self.model_name,
                messages=history,
                options=options
            )
            ans = response["message"]["content"].strip()
            return ans, response["eval_count"] + response.get("prompt_eval_count", 0)
        except Exception as e:
            return "**ERROR**: " + str(e), 0

    def chat_streamly(self, system, history, gen_conf):
        if system:
            history.insert(0, {"role": "system", "content": system})
        options = {}
        if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"]
        if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"]
        if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"]
        if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"]
        if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"]
        ans = ""
        try:
            response = self.client.chat(
                model=self.model_name,
                messages=history,
                stream=True,
                options=options
            )
            for resp in response:
                if resp["done"]:
                    return resp["prompt_eval_count"] + resp["eval_count"]
                ans = resp["message"]["content"]
                yield ans
        except Exception as e:
            yield ans + "\n**ERROR**: " + str(e)
        yield 0
build python version rag-flow (#21) * clean rust version project * clean rust version project * build python version rag-flow 2024-01-15 08:46:22 +08:00			`#`
llm configuation refine and trievalTest API refine (#40) 2024-01-19 19:51:57 +08:00			`# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.`
build python version rag-flow (#21) * clean rust version project * clean rust version project * build python version rag-flow 2024-01-15 08:46:22 +08:00			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`#`
apply pep8 formalize (#155) 2024-03-27 11:33:46 +08:00			`from zhipuai import ZhipuAI`
			`from dashscope import Generation`
use minio to store uploaded files; build dialog server; (#16) * format code * use minio to store uploaded files; build dialog server; 2023-12-25 19:05:59 +08:00			`from abc import ABC`
add llm API (#19) * add llm API * refine llm API 2023-12-28 13:50:13 +08:00			`from openai import OpenAI`
refine admin initialization (#75) 2024-02-27 14:57:34 +08:00			`import openai`
Support Ollama (#261) ### What problem does this PR solve? Issue link:#221 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-04-08 19:20:57 +08:00			`from ollama import Client`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`from rag.nlp import is_english`

add llm API (#19) * add llm API * refine llm API 2023-12-28 13:50:13 +08:00
use minio to store uploaded files; build dialog server; (#16) * format code * use minio to store uploaded files; build dialog server; 2023-12-25 19:05:59 +08:00			`class Base(ABC):`
add support for deepseek (#668) ### What problem does this PR solve? #666 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-05-08 10:30:02 +08:00			`def __init__(self, key, model_name, base_url):`
add base url for OpenAI (#166) 2024-03-28 19:15:16 +08:00			`self.client = OpenAI(api_key=key, base_url=base_url)`
Test APIs and fix bugs (#41) 2024-01-22 19:51:38 +08:00			`self.model_name = model_name`
use minio to store uploaded files; build dialog server; (#16) * format code * use minio to store uploaded files; build dialog server; 2023-12-25 19:05:59 +08:00
			`def chat(self, system, history, gen_conf):`
apply pep8 formalize (#155) 2024-03-27 11:33:46 +08:00			`if system:`
			`history.insert(0, {"role": "system", "content": system})`
refine admin initialization (#75) 2024-02-27 14:57:34 +08:00			`try:`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`response = self.client.chat.completions.create(`
refine admin initialization (#75) 2024-02-27 14:57:34 +08:00			`model=self.model_name,`
			`messages=history,`
			`**gen_conf)`
refine OpenAi Api (#159) 2024-03-27 17:55:45 +08:00			`ans = response.choices[0].message.content.strip()`
			`if response.choices[0].finish_reason == "length":`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`ans += "...\nFor the content length reason, it stopped, continue?" if is_english(`
			`[ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"`
conversation API backend update (#360) ### What problem does this PR solve? Issue link:#345 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-04-15 14:43:44 +08:00			`return ans, response.usage.total_tokens`
refine admin initialization (#75) 2024-02-27 14:57:34 +08:00			`except openai.APIError as e:`
apply pep8 formalize (#155) 2024-03-27 11:33:46 +08:00			`return "ERROR: " + str(e), 0`
use minio to store uploaded files; build dialog server; (#16) * format code * use minio to store uploaded files; build dialog server; 2023-12-25 19:05:59 +08:00
add stream chat (#811) ### What problem does this PR solve? #709 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-05-16 20:14:53 +08:00			`def chat_streamly(self, system, history, gen_conf):`
			`if system:`
			`history.insert(0, {"role": "system", "content": system})`
			`ans = ""`
			`total_tokens = 0`
			`try:`
			`response = self.client.chat.completions.create(`
			`model=self.model_name,`
			`messages=history,`
			`stream=True,`
			`**gen_conf)`
			`for resp in response:`
			`if not resp.choices[0].delta.content:continue`
			`ans += resp.choices[0].delta.content`
			`total_tokens += 1`
			`if resp.choices[0].finish_reason == "length":`
			`ans += "...\nFor the content length reason, it stopped, continue?" if is_english(`
			`[ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"`
			`yield ans`

			`except openai.APIError as e:`
			`yield ans + "\nERROR: " + str(e)`

			`yield total_tokens`

use minio to store uploaded files; build dialog server; (#16) * format code * use minio to store uploaded files; build dialog server; 2023-12-25 19:05:59 +08:00
add support for deepseek (#668) ### What problem does this PR solve? #666 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-05-08 10:30:02 +08:00			`class GptTurbo(Base):`
			`def __init__(self, key, model_name="gpt-3.5-turbo", base_url="https://api.openai.com/v1"):`
			`if not base_url: base_url="https://api.openai.com/v1"`
			`super().__init__(key, model_name, base_url)`


			`class MoonshotChat(Base):`
add base url for OpenAI (#166) 2024-03-28 19:15:16 +08:00			`def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1"):`
			`if not base_url: base_url="https://api.moonshot.cn/v1"`
add support for deepseek (#668) ### What problem does this PR solve? #666 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-05-08 10:30:02 +08:00			`super().__init__(key, model_name, base_url)`
add dockerfile for cuda envirement. Refine table search strategy, (#123) 2024-03-14 19:45:29 +08:00
add support for deepseek (#668) ### What problem does this PR solve? #666 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-05-08 10:30:02 +08:00
			`class XinferenceChat(Base):`
			`def __init__(self, key=None, model_name="", base_url=""):`
			`key = "xxx"`
			`super().__init__(key, model_name, base_url)`


			`class DeepSeekChat(Base):`
			`def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepseek.com/v1"):`
			`if not base_url: base_url="https://api.deepseek.com/v1"`
			`super().__init__(key, model_name, base_url)`
add Moonshot, debug my_llm (#126) 2024-03-15 18:59:00 +08:00
add dockerfile for cuda envirement. Refine table search strategy, (#123) 2024-03-14 19:45:29 +08:00
add llm API (#19) * add llm API * refine llm API 2023-12-28 13:50:13 +08:00			`class QWenChat(Base):`
add base url for OpenAI (#166) 2024-03-28 19:15:16 +08:00			`def __init__(self, key, model_name=Generation.Models.qwen_turbo, **kwargs):`
Test APIs and fix bugs (#41) 2024-01-22 19:51:38 +08:00			`import dashscope`
			`dashscope.api_key = key`
			`self.model_name = model_name`

use minio to store uploaded files; build dialog server; (#16) * format code * use minio to store uploaded files; build dialog server; 2023-12-25 19:05:59 +08:00			`def chat(self, system, history, gen_conf):`
			`from http import HTTPStatus`
apply pep8 formalize (#155) 2024-03-27 11:33:46 +08:00			`if system:`
			`history.insert(0, {"role": "system", "content": system})`
use minio to store uploaded files; build dialog server; (#16) * format code * use minio to store uploaded files; build dialog server; 2023-12-25 19:05:59 +08:00			`response = Generation.call(`
Test APIs and fix bugs (#41) 2024-01-22 19:51:38 +08:00			`self.model_name,`
add llm API (#19) * add llm API * refine llm API 2023-12-28 13:50:13 +08:00			`messages=history,`
Refine resume parts and fix bugs in retrival using sql (#66) 2024-02-19 19:22:17 +08:00			`result_format='message',`
			`**gen_conf`
use minio to store uploaded files; build dialog server; (#16) * format code * use minio to store uploaded files; build dialog server; 2023-12-25 19:05:59 +08:00			`)`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`ans = ""`
			`tk_count = 0`
use minio to store uploaded files; build dialog server; (#16) * format code * use minio to store uploaded files; build dialog server; 2023-12-25 19:05:59 +08:00			`if response.status_code == HTTPStatus.OK:`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`ans += response.output.choices[0]['message']['content']`
refine log format (#312) ### What problem does this PR solve? Issue link:#264 ### Type of change - [x] Documentation Update - [x] Refactoring 2024-04-11 10:13:43 +08:00			`tk_count += response.usage.total_tokens`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`if response.output.choices[0].get("finish_reason", "") == "length":`
apply pep8 formalize (#155) 2024-03-27 11:33:46 +08:00			`ans += "...\nFor the content length reason, it stopped, continue?" if is_english(`
			`[ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`return ans, tk_count`

			`return "ERROR: " + response.message, tk_count`
refactor retieval_test, add SQl retrieval methods (#61) 2024-02-08 17:01:01 +08:00
add stream chat (#811) ### What problem does this PR solve? #709 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-05-16 20:14:53 +08:00			`def chat_streamly(self, system, history, gen_conf):`
			`from http import HTTPStatus`
			`if system:`
			`history.insert(0, {"role": "system", "content": system})`
			`ans = ""`
			`try:`
			`response = Generation.call(`
			`self.model_name,`
			`messages=history,`
			`result_format='message',`
			`stream=True,`
			`**gen_conf`
			`)`
			`tk_count = 0`
			`for resp in response:`
			`if resp.status_code == HTTPStatus.OK:`
			`ans = resp.output.choices[0]['message']['content']`
			`tk_count = resp.usage.total_tokens`
			`if resp.output.choices[0].get("finish_reason", "") == "length":`
			`ans += "...\nFor the content length reason, it stopped, continue?" if is_english(`
			`[ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"`
			`yield ans`
			`else:`
			`yield ans + "\nERROR: " + resp.message if str(resp.message).find("Access")<0 else "Out of credit. Please set the API key in settings > Model providers."`
			`except Exception as e:`
			`yield ans + "\nERROR: " + str(e)`

			`yield tk_count`

refactor retieval_test, add SQl retrieval methods (#61) 2024-02-08 17:01:01 +08:00
			`class ZhipuChat(Base):`
add base url for OpenAI (#166) 2024-03-28 19:15:16 +08:00			`def __init__(self, key, model_name="glm-3-turbo", **kwargs):`
refactor retieval_test, add SQl retrieval methods (#61) 2024-02-08 17:01:01 +08:00			`self.client = ZhipuAI(api_key=key)`
			`self.model_name = model_name`

			`def chat(self, system, history, gen_conf):`
apply pep8 formalize (#155) 2024-03-27 11:33:46 +08:00			`if system:`
			`history.insert(0, {"role": "system", "content": system})`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`try:`
remove presence_penalty for chatglm (#268) ### What problem does this PR solve? Issue link:#265 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) 2024-04-09 09:24:08 +08:00			`if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"]`
resolve issure to call ZH?IPUAI (#277) ### What problem does this PR solve? Issue link:#265 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) 2024-04-09 16:16:10 +08:00			`if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"]`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`response = self.client.chat.completions.create(`
refine OpenAi Api (#159) 2024-03-27 17:55:45 +08:00			`model=self.model_name,`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`messages=history,`
			`**gen_conf`
			`)`
refine OpenAi Api (#159) 2024-03-27 17:55:45 +08:00			`ans = response.choices[0].message.content.strip()`
			`if response.choices[0].finish_reason == "length":`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`ans += "...\nFor the content length reason, it stopped, continue?" if is_english(`
			`[ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"`
conversation API backend update (#360) ### What problem does this PR solve? Issue link:#345 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-04-15 14:43:44 +08:00			`return ans, response.usage.total_tokens`
deal with stop reason being length problem (#109) 2024-03-07 16:12:01 +08:00			`except Exception as e:`
add local llm implementation (#119) 2024-03-12 11:57:08 +08:00			`return "ERROR: " + str(e), 0`

add stream chat (#811) ### What problem does this PR solve? #709 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-05-16 20:14:53 +08:00			`def chat_streamly(self, system, history, gen_conf):`
			`if system:`
			`history.insert(0, {"role": "system", "content": system})`
			`if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"]`
			`if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"]`
			`ans = ""`
			`try:`
			`response = self.client.chat.completions.create(`
			`model=self.model_name,`
			`messages=history,`
			`stream=True,`
			`**gen_conf`
			`)`
			`tk_count = 0`
			`for resp in response:`
			`if not resp.choices[0].delta.content:continue`
			`delta = resp.choices[0].delta.content`
			`ans += delta`
			`tk_count = resp.usage.total_tokens if response.usage else 0`
			`if resp.output.choices[0].finish_reason == "length":`
			`ans += "...\nFor the content length reason, it stopped, continue?" if is_english(`
			`[ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"`
			`yield ans`
			`except Exception as e:`
			`yield ans + "\nERROR: " + str(e)`

			`yield tk_count`

apply pep8 formalize (#155) 2024-03-27 11:33:46 +08:00
Support Ollama (#261) ### What problem does this PR solve? Issue link:#221 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-04-08 19:20:57 +08:00			`class OllamaChat(Base):`
			`def __init__(self, key, model_name, **kwargs):`
			`self.client = Client(host=kwargs["base_url"])`
			`self.model_name = model_name`

			`def chat(self, system, history, gen_conf):`
			`if system:`
			`history.insert(0, {"role": "system", "content": system})`
			`try:`
fix disable and enable llm setting in dialog (#616) ### What problem does this PR solve? #614 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) 2024-04-30 11:04:14 +08:00			`options = {}`
			`if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"]`
			`if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"]`
			`if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"]`
			`if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"]`
			`if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"]`
Support Ollama (#261) ### What problem does this PR solve? Issue link:#221 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-04-08 19:20:57 +08:00			`response = self.client.chat(`
			`model=self.model_name,`
			`messages=history,`
			`options=options`
			`)`
			`ans = response["message"]["content"].strip()`
fix ollama issuet push (#486) ### What problem does this PR solve? #477 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) 2024-04-22 15:13:01 +08:00			`return ans, response["eval_count"] + response.get("prompt_eval_count", 0)`
Support Ollama (#261) ### What problem does this PR solve? Issue link:#221 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-04-08 19:20:57 +08:00			`except Exception as e:`
			`return "ERROR: " + str(e), 0`

add stream chat (#811) ### What problem does this PR solve? #709 ### Type of change - [x] New Feature (non-breaking change which adds functionality) 2024-05-16 20:14:53 +08:00			`def chat_streamly(self, system, history, gen_conf):`
			`if system:`
			`history.insert(0, {"role": "system", "content": system})`
			`options = {}`
			`if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"]`
			`if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"]`
			`if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"]`
			`if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"]`
			`if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"]`
			`ans = ""`
			`try:`
			`response = self.client.chat(`
			`model=self.model_name,`
			`messages=history,`
			`stream=True,`
			`options=options`
			`)`
			`for resp in response:`
			`if resp["done"]:`
			`return resp["prompt_eval_count"] + resp["eval_count"]`
			`ans = resp["message"]["content"]`
			`yield ans`
			`except Exception as e:`
			`yield ans + "\nERROR: " + str(e)`
			`yield 0`