mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-06 03:47:41 +00:00
Fix issue with keep_alive=-1 for ollama chat model by allowing a user to set an additional configuration option (#9017)
### What problem does this PR solve? fix issue with `keep_alive=-1` for ollama chat model by allowing a user to set an additional configuration option. It is no-breaking change because it still uses a previous default value such as: `keep_alive=-1` ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [X] Performance Improvement - [X] Other (please describe): - Additional configuration option has been added to control behavior of RAGFlow while working with ollama LLM
This commit is contained in:
parent
3db819f011
commit
b47dcc9108
@ -663,6 +663,7 @@ class OllamaChat(Base):
|
||||
|
||||
self.client = Client(host=base_url) if not key or key == "x" else Client(host=base_url, headers={"Authorization": f"Bearer {key}"})
|
||||
self.model_name = model_name
|
||||
self.keep_alive = kwargs.get("ollama_keep_alive", int(os.environ.get("OLLAMA_KEEP_ALIVE", -1)))
|
||||
|
||||
def _clean_conf(self, gen_conf):
|
||||
options = {}
|
||||
@ -679,7 +680,7 @@ class OllamaChat(Base):
|
||||
ctx_size = self._calculate_dynamic_ctx(history)
|
||||
|
||||
gen_conf["num_ctx"] = ctx_size
|
||||
response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=-1)
|
||||
response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=self.keep_alive)
|
||||
ans = response["message"]["content"].strip()
|
||||
token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0)
|
||||
return ans, token_count
|
||||
@ -706,7 +707,7 @@ class OllamaChat(Base):
|
||||
|
||||
ans = ""
|
||||
try:
|
||||
response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=-1)
|
||||
response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=self.keep_alive)
|
||||
for resp in response:
|
||||
if resp["done"]:
|
||||
token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user