From d5236b71f4898d5396cd5e917617e6c2058c38e6 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Thu, 12 Jun 2025 15:09:40 +0800 Subject: [PATCH] Refa: ollama keep alive issue. (#8216) ### What problem does this PR solve? #8122 ### Type of change - [x] Refactoring --- rag/llm/chat_model.py | 4 ++-- rag/llm/cv_model.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 820dbec53..a7a088077 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -871,7 +871,7 @@ class OllamaChat(Base): ctx_size = self._calculate_dynamic_ctx(history) gen_conf["num_ctx"] = ctx_size - response = self.client.chat(model=self.model_name, messages=history, options=gen_conf) + response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=-1) ans = response["message"]["content"].strip() token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0) return ans, token_count @@ -898,7 +898,7 @@ class OllamaChat(Base): ans = "" try: - response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options) + response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=-1) for resp in response: if resp["done"]: token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0) diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 9be83ace5..82640b56f 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -506,7 +506,8 @@ class OllamaCV(Base): response = self.client.chat( model=self.model_name, messages=history, - options=options + options=options, + keep_alive=-1 ) ans = response["message"]["content"].strip() @@ -536,7 +537,8 @@ class OllamaCV(Base): model=self.model_name, messages=history, stream=True, - options=options + options=options, + keep_alive=-1 ) for resp in response: if resp["done"]: