mirror of
https://github.com/Cinnamon/kotaemon.git
synced 2025-06-26 23:19:56 +00:00
Feat/local endpoint llm (#148)
* serve local model in a different process from the app --------- Co-authored-by: albert <albert@cinnamon.is> Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
parent
2950e6ed02
commit
df12dec732
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.bat text eol=crlf
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -466,4 +466,5 @@ examples/example1/assets
|
||||
storage/*
|
||||
|
||||
# Conda and env storages
|
||||
install_dir/
|
||||
*install_dir/
|
||||
doc_env
|
||||
|
@ -5,7 +5,7 @@ from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Literal, NamedTuple, Optional, Union
|
||||
|
||||
from pydantic import Extra
|
||||
from pydantic import ConfigDict
|
||||
|
||||
from kotaemon.base import LLMInterface
|
||||
|
||||
@ -238,7 +238,7 @@ class AgentFinish(NamedTuple):
|
||||
log: str
|
||||
|
||||
|
||||
class AgentOutput(LLMInterface, extra=Extra.allow): # type: ignore [call-arg]
|
||||
class AgentOutput(LLMInterface):
|
||||
"""Output from an agent.
|
||||
|
||||
Args:
|
||||
@ -248,6 +248,8 @@ class AgentOutput(LLMInterface, extra=Extra.allow): # type: ignore [call-arg]
|
||||
error: The error message if any.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
text: str
|
||||
type: str = "agent"
|
||||
agent_type: AgentType
|
||||
|
@ -1,4 +1,5 @@
|
||||
from .base import BaseEmbeddings
|
||||
from .endpoint_based import EndpointEmbeddings
|
||||
from .langchain_based import (
|
||||
AzureOpenAIEmbeddings,
|
||||
CohereEmbdeddings,
|
||||
@ -8,6 +9,7 @@ from .langchain_based import (
|
||||
|
||||
__all__ = [
|
||||
"BaseEmbeddings",
|
||||
"EndpointEmbeddings",
|
||||
"OpenAIEmbeddings",
|
||||
"AzureOpenAIEmbeddings",
|
||||
"CohereEmbdeddings",
|
||||
|
46
libs/kotaemon/kotaemon/embeddings/endpoint_based.py
Normal file
46
libs/kotaemon/kotaemon/embeddings/endpoint_based.py
Normal file
@ -0,0 +1,46 @@
|
||||
import requests
|
||||
|
||||
from kotaemon.base import Document, DocumentWithEmbedding
|
||||
|
||||
from .base import BaseEmbeddings
|
||||
|
||||
|
||||
class EndpointEmbeddings(BaseEmbeddings):
|
||||
"""
|
||||
An Embeddings component that uses an OpenAI API compatible endpoint.
|
||||
|
||||
Attributes:
|
||||
endpoint_url (str): The url of an OpenAI API compatible endpoint.
|
||||
"""
|
||||
|
||||
endpoint_url: str
|
||||
|
||||
def run(
|
||||
self, text: str | list[str] | Document | list[Document]
|
||||
) -> list[DocumentWithEmbedding]:
|
||||
"""
|
||||
Generate embeddings from text Args:
|
||||
text (str | list[str] | Document | list[Document]): text to generate
|
||||
embeddings from
|
||||
Returns:
|
||||
list[DocumentWithEmbedding]: embeddings
|
||||
"""
|
||||
if not isinstance(text, list):
|
||||
text = [text]
|
||||
|
||||
outputs = []
|
||||
|
||||
for item in text:
|
||||
response = requests.post(
|
||||
self.endpoint_url, json={"input": str(item)}
|
||||
).json()
|
||||
outputs.append(
|
||||
DocumentWithEmbedding(
|
||||
text=str(item),
|
||||
embedding=response["data"][0]["embedding"],
|
||||
total_tokens=response["usage"]["total_tokens"],
|
||||
prompt_tokens=response["usage"]["prompt_tokens"],
|
||||
)
|
||||
)
|
||||
|
||||
return outputs
|
@ -108,6 +108,9 @@ class CitationPipeline(BaseComponent):
|
||||
print(e)
|
||||
return None
|
||||
|
||||
if not llm_output.messages:
|
||||
return None
|
||||
|
||||
function_output = llm_output.messages[0].additional_kwargs["function_call"][
|
||||
"arguments"
|
||||
]
|
||||
@ -126,6 +129,9 @@ class CitationPipeline(BaseComponent):
|
||||
print(e)
|
||||
return None
|
||||
|
||||
if not llm_output.messages:
|
||||
return None
|
||||
|
||||
function_output = llm_output.messages[0].additional_kwargs["function_call"][
|
||||
"arguments"
|
||||
]
|
||||
|
@ -2,7 +2,7 @@ from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMes
|
||||
|
||||
from .base import BaseLLM
|
||||
from .branching import GatedBranchingPipeline, SimpleBranchingPipeline
|
||||
from .chats import AzureChatOpenAI, ChatLLM, LlamaCppChat
|
||||
from .chats import AzureChatOpenAI, ChatLLM, EndpointChatLLM, LlamaCppChat
|
||||
from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
|
||||
from .cot import ManualSequentialChainOfThought, Thought
|
||||
from .linear import GatedLinearPipeline, SimpleLinearPipeline
|
||||
@ -12,6 +12,7 @@ __all__ = [
|
||||
"BaseLLM",
|
||||
# chat-specific components
|
||||
"ChatLLM",
|
||||
"EndpointChatLLM",
|
||||
"BaseMessage",
|
||||
"HumanMessage",
|
||||
"AIMessage",
|
||||
|
@ -1,5 +1,12 @@
|
||||
from .base import ChatLLM
|
||||
from .endpoint_based import EndpointChatLLM
|
||||
from .langchain_based import AzureChatOpenAI, LCChatMixin
|
||||
from .llamacpp import LlamaCppChat
|
||||
|
||||
__all__ = ["ChatLLM", "AzureChatOpenAI", "LCChatMixin", "LlamaCppChat"]
|
||||
__all__ = [
|
||||
"ChatLLM",
|
||||
"EndpointChatLLM",
|
||||
"AzureChatOpenAI",
|
||||
"LCChatMixin",
|
||||
"LlamaCppChat",
|
||||
]
|
||||
|
85
libs/kotaemon/kotaemon/llms/chats/endpoint_based.py
Normal file
85
libs/kotaemon/kotaemon/llms/chats/endpoint_based.py
Normal file
@ -0,0 +1,85 @@
|
||||
import requests
|
||||
|
||||
from kotaemon.base import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
HumanMessage,
|
||||
LLMInterface,
|
||||
SystemMessage,
|
||||
)
|
||||
|
||||
from .base import ChatLLM
|
||||
|
||||
|
||||
class EndpointChatLLM(ChatLLM):
|
||||
"""
|
||||
A ChatLLM that uses an endpoint to generate responses. This expects an OpenAI API
|
||||
compatible endpoint.
|
||||
|
||||
Attributes:
|
||||
endpoint_url (str): The url of a OpenAI API compatible endpoint.
|
||||
"""
|
||||
|
||||
endpoint_url: str
|
||||
|
||||
def run(
|
||||
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
|
||||
) -> LLMInterface:
|
||||
"""
|
||||
Generate response from messages
|
||||
Args:
|
||||
messages (str | BaseMessage | list[BaseMessage]): history of messages to
|
||||
generate response from
|
||||
**kwargs: additional arguments to pass to the OpenAI API
|
||||
Returns:
|
||||
LLMInterface: generated response
|
||||
"""
|
||||
if isinstance(messages, str):
|
||||
input_ = [HumanMessage(content=messages)]
|
||||
elif isinstance(messages, BaseMessage):
|
||||
input_ = [messages]
|
||||
else:
|
||||
input_ = messages
|
||||
|
||||
def decide_role(message: BaseMessage):
|
||||
if isinstance(message, SystemMessage):
|
||||
return "system"
|
||||
elif isinstance(message, AIMessage):
|
||||
return "assistant"
|
||||
else:
|
||||
return "user"
|
||||
|
||||
request_json = {
|
||||
"messages": [{"content": m.text, "role": decide_role(m)} for m in input_]
|
||||
}
|
||||
|
||||
response = requests.post(self.endpoint_url, json=request_json).json()
|
||||
|
||||
content = ""
|
||||
candidates = []
|
||||
if response["choices"]:
|
||||
candidates = [
|
||||
each["message"]["content"]
|
||||
for each in response["choices"]
|
||||
if each["message"]["content"]
|
||||
]
|
||||
content = candidates[0]
|
||||
|
||||
return LLMInterface(
|
||||
content=content,
|
||||
candidates=candidates,
|
||||
completion_tokens=response["usage"]["completion_tokens"],
|
||||
total_tokens=response["usage"]["total_tokens"],
|
||||
prompt_tokens=response["usage"]["prompt_tokens"],
|
||||
)
|
||||
|
||||
def invoke(
|
||||
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
|
||||
) -> LLMInterface:
|
||||
"""Same as run"""
|
||||
return self.run(messages, **kwargs)
|
||||
|
||||
async def ainvoke(
|
||||
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
|
||||
) -> LLMInterface:
|
||||
return self.invoke(messages, **kwargs)
|
@ -12,7 +12,7 @@ user_cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
COHERE_API_KEY = config("COHERE_API_KEY", default="")
|
||||
KH_MODE = "dev"
|
||||
KH_FEATURE_USER_MANAGEMENT = True
|
||||
KH_FEATURE_USER_MANAGEMENT = False
|
||||
KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
|
||||
config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
|
||||
)
|
||||
@ -21,6 +21,8 @@ KH_FEATURE_USER_MANAGEMENT_PASSWORD = str(
|
||||
)
|
||||
KH_ENABLE_ALEMBIC = False
|
||||
KH_DATABASE = f"sqlite:///{user_cache_dir / 'sql.db'}"
|
||||
KH_FILESTORAGE_PATH = str(user_cache_dir / "files")
|
||||
|
||||
KH_DOCSTORE = {
|
||||
"__type__": "kotaemon.storages.SimpleFileDocumentStore",
|
||||
"path": str(user_cache_dir / "docstore"),
|
||||
@ -29,51 +31,68 @@ KH_VECTORSTORE = {
|
||||
"__type__": "kotaemon.storages.ChromaVectorStore",
|
||||
"path": str(user_cache_dir / "vectorstore"),
|
||||
}
|
||||
KH_FILESTORAGE_PATH = str(user_cache_dir / "files")
|
||||
KH_LLMS = {
|
||||
"gpt4": {
|
||||
# example for using Azure OpenAI, the config variables can set as environment
|
||||
# variables or in the .env file
|
||||
# "gpt4": {
|
||||
# "def": {
|
||||
# "__type__": "kotaemon.llms.AzureChatOpenAI",
|
||||
# "temperature": 0,
|
||||
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
|
||||
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
|
||||
# "openai_api_version": config("OPENAI_API_VERSION", default=""),
|
||||
# "deployment_name": "<your deployment name>",
|
||||
# "stream": True,
|
||||
# },
|
||||
# "accuracy": 10,
|
||||
# "cost": 10,
|
||||
# "default": False,
|
||||
# },
|
||||
# "gpt35": {
|
||||
# "def": {
|
||||
# "__type__": "kotaemon.llms.AzureChatOpenAI",
|
||||
# "temperature": 0,
|
||||
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
|
||||
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
|
||||
# "openai_api_version": config("OPENAI_API_VERSION", default=""),
|
||||
# "deployment_name": "<your deployment name>",
|
||||
# "request_timeout": 10,
|
||||
# "stream": False,
|
||||
# },
|
||||
# "accuracy": 5,
|
||||
# "cost": 5,
|
||||
# "default": False,
|
||||
# },
|
||||
"local": {
|
||||
"def": {
|
||||
"__type__": "kotaemon.llms.AzureChatOpenAI",
|
||||
"temperature": 0,
|
||||
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
|
||||
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
|
||||
"openai_api_version": config("OPENAI_API_VERSION", default=""),
|
||||
"deployment_name": "dummy-q2",
|
||||
"stream": True,
|
||||
"__type__": "kotaemon.llms.EndpointChatLLM",
|
||||
"endpoint_url": "http://localhost:31415/v1/chat/completions",
|
||||
},
|
||||
"accuracy": 10,
|
||||
"cost": 10,
|
||||
"default": False,
|
||||
},
|
||||
"gpt35": {
|
||||
"def": {
|
||||
"__type__": "kotaemon.llms.AzureChatOpenAI",
|
||||
"temperature": 0,
|
||||
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
|
||||
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
|
||||
"openai_api_version": config("OPENAI_API_VERSION", default=""),
|
||||
"deployment_name": "dummy-q2",
|
||||
"request_timeout": 10,
|
||||
"stream": False,
|
||||
},
|
||||
"accuracy": 5,
|
||||
"cost": 5,
|
||||
"default": True,
|
||||
},
|
||||
}
|
||||
KH_EMBEDDINGS = {
|
||||
"ada": {
|
||||
# example for using Azure OpenAI, the config variables can set as environment
|
||||
# variables or in the .env file
|
||||
# "ada": {
|
||||
# "def": {
|
||||
# "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
|
||||
# "model": "text-embedding-ada-002",
|
||||
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
|
||||
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
|
||||
# "deployment": "<your deployment name>",
|
||||
# "chunk_size": 16,
|
||||
# },
|
||||
# "accuracy": 5,
|
||||
# "cost": 5,
|
||||
# "default": True,
|
||||
# },
|
||||
"local": {
|
||||
"def": {
|
||||
"__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
|
||||
"model": "text-embedding-ada-002",
|
||||
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
|
||||
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
|
||||
"deployment": "dummy-q2-text-embedding",
|
||||
"chunk_size": 16,
|
||||
"__type__": "kotaemon.embeddings.EndpointEmbeddings",
|
||||
"endpoint_url": "http://localhost:31415/v1/embeddings",
|
||||
},
|
||||
"accuracy": 5,
|
||||
"cost": 5,
|
||||
"default": True,
|
||||
"default": False,
|
||||
},
|
||||
}
|
||||
KH_REASONINGS = ["ktem.reasoning.simple.FullQAPipeline"]
|
||||
|
@ -118,7 +118,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
|
||||
|
||||
# rerank
|
||||
docs = self.vector_retrieval(text=text, top_k=top_k, **kwargs)
|
||||
if self.get_from_path("reranker"):
|
||||
if docs and self.get_from_path("reranker"):
|
||||
docs = self.reranker(docs, query=text)
|
||||
|
||||
if not self.get_extra_table:
|
||||
|
@ -200,24 +200,37 @@ class AnswerWithContextPipeline(BaseComponent):
|
||||
lang=self.lang,
|
||||
)
|
||||
|
||||
citation_task = asyncio.create_task(
|
||||
self.citation_pipeline.ainvoke(context=evidence, question=question)
|
||||
)
|
||||
print("Citation task created")
|
||||
if evidence:
|
||||
citation_task = asyncio.create_task(
|
||||
self.citation_pipeline.ainvoke(context=evidence, question=question)
|
||||
)
|
||||
print("Citation task created")
|
||||
|
||||
messages = []
|
||||
if self.system_prompt:
|
||||
messages.append(SystemMessage(content=self.system_prompt))
|
||||
messages.append(HumanMessage(content=prompt))
|
||||
|
||||
output = ""
|
||||
for text in self.llm.stream(messages):
|
||||
output += text.text
|
||||
self.report_output({"output": text.text})
|
||||
await asyncio.sleep(0)
|
||||
try:
|
||||
# try streaming first
|
||||
print("Trying LLM streaming")
|
||||
for text in self.llm.stream(messages):
|
||||
output += text.text
|
||||
self.report_output({"output": text.text})
|
||||
await asyncio.sleep(0)
|
||||
except NotImplementedError:
|
||||
print("Streaming is not supported, falling back to normal processing")
|
||||
output = self.llm(messages).text
|
||||
self.report_output({"output": output})
|
||||
|
||||
# retrieve the citation
|
||||
print("Waiting for citation task")
|
||||
citation = await citation_task
|
||||
if evidence:
|
||||
citation = await citation_task
|
||||
else:
|
||||
citation = None
|
||||
|
||||
answer = Document(text=output, metadata={"citation": citation})
|
||||
|
||||
return answer
|
||||
|
@ -2,4 +2,4 @@ from ktem.main import App
|
||||
|
||||
app = App()
|
||||
demo = app.make()
|
||||
demo.queue().launch(favicon_path=app._favicon)
|
||||
demo.queue().launch(favicon_path=app._favicon, inbrowser=True)
|
||||
|
@ -12,23 +12,23 @@ function install_miniconda() {
|
||||
# Miniconda installer is limited to two main architectures: x86_64 and arm64
|
||||
local sys_arch=$(uname -m)
|
||||
case "${sys_arch}" in
|
||||
x86_64*) sys_arch="x86_64";;
|
||||
arm64*) sys_arch="aarch64";;
|
||||
aarch64*) sys_arch="aarch64";;
|
||||
*) {
|
||||
echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
|
||||
exit 1
|
||||
};;
|
||||
x86_64*) sys_arch="x86_64" ;;
|
||||
arm64*) sys_arch="aarch64" ;;
|
||||
aarch64*) sys_arch="aarch64" ;;
|
||||
*) {
|
||||
echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
|
||||
exit 1
|
||||
} ;;
|
||||
esac
|
||||
|
||||
# if miniconda has not been installed, download and install it
|
||||
if ! "${conda_root}/bin/conda" --version &>/dev/null ; then
|
||||
if ! "${conda_root}/bin/conda" --version &>/dev/null; then
|
||||
if [ ! -d "$install_dir/miniconda_installer.sh" ]; then
|
||||
echo "Downloading Miniconda from $miniconda_url"
|
||||
local miniconda_url="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${sys_arch}.sh"
|
||||
|
||||
mkdir -p "$install_dir"
|
||||
curl -Lk "$miniconda_url" > "$install_dir/miniconda_installer.sh"
|
||||
curl -Lk "$miniconda_url" >"$install_dir/miniconda_installer.sh"
|
||||
fi
|
||||
|
||||
echo "Installing Miniconda to $conda_root"
|
||||
@ -64,7 +64,7 @@ function create_conda_env() {
|
||||
|
||||
function activate_conda_env() {
|
||||
# deactivate the current env(s) to avoid conflicts
|
||||
{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
|
||||
{ conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
|
||||
|
||||
# check if conda env is broken (because of interruption during creation)
|
||||
if [ ! -f "$env_dir/bin/python" ]; then
|
||||
@ -80,7 +80,7 @@ function activate_conda_env() {
|
||||
echo "Activate conda environment at $CONDA_PREFIX"
|
||||
}
|
||||
|
||||
function deactivate_conda_env(){
|
||||
function deactivate_conda_env() {
|
||||
# Conda deactivate if we are in the right env
|
||||
if [ "$CONDA_PREFIX" == "$env_dir" ]; then
|
||||
conda deactivate
|
||||
@ -89,7 +89,7 @@ function deactivate_conda_env(){
|
||||
}
|
||||
|
||||
function install_dependencies() {
|
||||
if pip list 2> /dev/null | grep -q "kotaemon"; then
|
||||
if pip list 2>/dev/null | grep -q "kotaemon"; then
|
||||
echo "Requirements are already installed"
|
||||
else
|
||||
local kotaemon_root="$(pwd)/libs/kotaemon/.[dev]"
|
||||
@ -101,7 +101,7 @@ function install_dependencies() {
|
||||
echo "" && echo "Install ktem's requirements"
|
||||
python -m pip install -e "$ktem_root"
|
||||
|
||||
if ! pip list 2> /dev/null | grep -q "kotaemon"; then
|
||||
if ! pip list 2>/dev/null | grep -q "kotaemon"; then
|
||||
echo "Installation failed. You may need to run the installer again."
|
||||
deactivate_conda_env
|
||||
exit 1
|
||||
@ -123,6 +123,10 @@ function install_dependencies() {
|
||||
fi
|
||||
}
|
||||
|
||||
function setup_local_model() {
|
||||
python $(pwd)/scripts/serve_local.py
|
||||
}
|
||||
|
||||
function launch_ui() {
|
||||
gradio $(pwd)/libs/ktem/launch.py || {
|
||||
echo "" && echo "Will exit now..."
|
||||
@ -159,6 +163,9 @@ activate_conda_env
|
||||
print_highlight "Install requirements"
|
||||
install_dependencies
|
||||
|
||||
print_highlight "Setting up a local model"
|
||||
setup_local_model
|
||||
|
||||
print_highlight "Launching web UI. Please wait..."
|
||||
launch_ui
|
||||
|
||||
|
36
scripts/run_macos.sh
Normal file → Executable file
36
scripts/run_macos.sh
Normal file → Executable file
@ -12,22 +12,22 @@ function install_miniconda() {
|
||||
# Miniconda installer is limited to two main architectures: x86_64 and arm64
|
||||
local sys_arch=$(uname -m)
|
||||
case "${sys_arch}" in
|
||||
x86_64*) sys_arch="x86_64";;
|
||||
arm64*) sys_arch="arm64";;
|
||||
*) {
|
||||
echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
|
||||
exit 1
|
||||
};;
|
||||
x86_64*) sys_arch="x86_64" ;;
|
||||
arm64*) sys_arch="arm64" ;;
|
||||
*) {
|
||||
echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
|
||||
exit 1
|
||||
} ;;
|
||||
esac
|
||||
|
||||
# if miniconda has not been installed, download and install it
|
||||
if ! "${conda_root}/bin/conda" --version &>/dev/null ; then
|
||||
if ! "${conda_root}/bin/conda" --version &>/dev/null; then
|
||||
if [ ! -d "$install_dir/miniconda_installer.sh" ]; then
|
||||
local miniconda_url="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-${sys_arch}.sh"
|
||||
echo "Downloading Miniconda from $miniconda_url"
|
||||
|
||||
mkdir -p "$install_dir"
|
||||
curl -Lk "$miniconda_url" > "$install_dir/miniconda_installer.sh"
|
||||
curl -Lk "$miniconda_url" >"$install_dir/miniconda_installer.sh"
|
||||
fi
|
||||
|
||||
echo "Installing Miniconda to $conda_root"
|
||||
@ -63,7 +63,7 @@ function create_conda_env() {
|
||||
|
||||
function activate_conda_env() {
|
||||
# deactivate the current env(s) to avoid conflicts
|
||||
{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
|
||||
{ conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
|
||||
|
||||
# check if conda env is broken (because of interruption during creation)
|
||||
if [ ! -f "$env_dir/bin/python" ]; then
|
||||
@ -79,7 +79,7 @@ function activate_conda_env() {
|
||||
echo "Activate conda environment at $CONDA_PREFIX"
|
||||
}
|
||||
|
||||
function deactivate_conda_env(){
|
||||
function deactivate_conda_env() {
|
||||
# Conda deactivate if we are in the right env
|
||||
if [[ "$CONDA_PREFIX" == "$env_dir" ]]; then
|
||||
conda deactivate
|
||||
@ -89,7 +89,7 @@ function deactivate_conda_env(){
|
||||
|
||||
function install_dependencies() {
|
||||
# check if the env is already setup by finding 'kotaemon' in 'pip list'
|
||||
if pip list 2> /dev/null | grep -q "kotaemon"; then
|
||||
if pip list 2>/dev/null | grep -q "kotaemon"; then
|
||||
echo "Requirements are already installed"
|
||||
else
|
||||
local kotaemon_root="$(pwd)/libs/kotaemon/.[dev]"
|
||||
@ -101,7 +101,7 @@ function install_dependencies() {
|
||||
echo "" && echo "Install ktem's requirements"
|
||||
python -m pip install -e "$ktem_root"
|
||||
|
||||
if ! pip list 2> /dev/null | grep -q "kotaemon"; then
|
||||
if ! pip list 2>/dev/null | grep -q "kotaemon"; then
|
||||
echo "Installation failed. You may need to run the installer again."
|
||||
deactivate_conda_env
|
||||
exit 1
|
||||
@ -124,6 +124,10 @@ function install_dependencies() {
|
||||
fi
|
||||
}
|
||||
|
||||
function setup_local_model() {
|
||||
python $(pwd)/scripts/serve_local.py
|
||||
}
|
||||
|
||||
function launch_ui() {
|
||||
gradio $(pwd)/libs/ktem/launch.py || {
|
||||
echo "" && echo "Will exit now..."
|
||||
@ -141,7 +145,10 @@ function print_highlight() {
|
||||
# Main script execution
|
||||
|
||||
# move two levels up from the dir where this script resides
|
||||
cd "$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" && cd ..
|
||||
cd "$(
|
||||
cd -- "$(dirname "$0")" >/dev/null 2>&1
|
||||
pwd -P
|
||||
)" && cd ..
|
||||
|
||||
install_dir="$(pwd)/install_dir"
|
||||
conda_root="${install_dir}/conda"
|
||||
@ -160,6 +167,9 @@ activate_conda_env
|
||||
print_highlight "Install requirements"
|
||||
install_dependencies
|
||||
|
||||
print_highlight "Setting up a local model"
|
||||
setup_local_model
|
||||
|
||||
print_highlight "Launching web UI. Please wait..."
|
||||
launch_ui
|
||||
|
||||
|
@ -14,6 +14,7 @@ IF %ERRORLEVEL% EQU 0 (
|
||||
ECHO The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.
|
||||
GOTO :end
|
||||
)
|
||||
|
||||
CALL :print_highlight "Setup Anaconda/Miniconda"
|
||||
CALL :download_and_install_miniconda
|
||||
:: check if function run fail, then exit the script
|
||||
@ -30,6 +31,10 @@ CALL :print_highlight "Install requirements"
|
||||
CALL :install_dependencies
|
||||
IF ERRORLEVEL 1 GOTO :end
|
||||
|
||||
CALL :print_highlight "Setting up a local model"
|
||||
CALL :setup_local_model
|
||||
IF ERRORLEVEL 1 GOTO :end
|
||||
|
||||
CALL :print_highlight "Launching web UI. Please wait..."
|
||||
CALL :launch_ui
|
||||
|
||||
@ -126,6 +131,10 @@ IF %ERRORLEVEL% == 0 (
|
||||
)
|
||||
GOTO :eof
|
||||
|
||||
:setup_local_model
|
||||
python "%CD%\scripts\serve_local.py"
|
||||
GOTO :eof
|
||||
|
||||
:launch_ui
|
||||
CALL gradio "%CD%\libs\ktem\launch.py" || ( ECHO. && ECHO Will exit now... && GOTO :exit_func_with_error )
|
||||
GOTO :eof
|
||||
|
81
scripts/serve_local.py
Normal file
81
scripts/serve_local.py
Normal file
@ -0,0 +1,81 @@
|
||||
import platform
|
||||
import subprocess
|
||||
from inspect import currentframe, getframeinfo
|
||||
from pathlib import Path
|
||||
|
||||
import dotenv
|
||||
|
||||
configs = dotenv.dotenv_values(".env")
|
||||
|
||||
system_name = platform.system()
|
||||
|
||||
cur_frame = currentframe()
|
||||
if cur_frame is None:
|
||||
raise ValueError("Cannot get the current frame.")
|
||||
this_file = getframeinfo(cur_frame).filename
|
||||
this_dir = Path(this_file).parent
|
||||
|
||||
|
||||
def serve_llamacpp_python(local_model_file: Path, **kwargs):
|
||||
def guess_chat_format(local_model_file):
|
||||
model_name = local_model_file.stem
|
||||
|
||||
# handle known cases that the server backends handle incorrectly
|
||||
# this is highly heuristic, should be expand later
|
||||
# server backends usually has logic for this but they could still be wrong
|
||||
if "qwen" in model_name:
|
||||
return "qwen"
|
||||
|
||||
return None
|
||||
|
||||
# default port
|
||||
if "port" not in kwargs:
|
||||
kwargs["port"] = 31415
|
||||
|
||||
chat_format = guess_chat_format(local_model_file)
|
||||
if chat_format:
|
||||
kwargs = {**kwargs, "chat_format": chat_format}
|
||||
|
||||
# these scripts create a separate conda env and run the server
|
||||
if system_name == "Windows":
|
||||
script_file = this_dir / "server_llamacpp_windows.bat"
|
||||
elif system_name == "Linux":
|
||||
script_file = this_dir / "server_llamacpp_linux.sh"
|
||||
elif system_name == "Darwin":
|
||||
script_file = this_dir / "server_llamacpp_macos.sh"
|
||||
else:
|
||||
raise ValueError(f"Unsupported system: {system_name}")
|
||||
|
||||
args = " ".join(f"--{k} {v}" for k, v in kwargs.items())
|
||||
|
||||
cmd = f"{script_file} --model {local_model_file} {args}"
|
||||
subprocess.Popen(cmd, shell=True)
|
||||
|
||||
|
||||
def main():
|
||||
local_model_file = configs.get("LOCAL_MODEL", "")
|
||||
|
||||
if not local_model_file:
|
||||
print("LOCAL_MODEL not set in the `.env` file.")
|
||||
return
|
||||
|
||||
local_model_file = Path(local_model_file)
|
||||
if not local_model_file.exists():
|
||||
print(f"Local model not found: {local_model_file}")
|
||||
return
|
||||
|
||||
print(f"Local model found: {local_model_file}")
|
||||
will_start_server = input("Do you want to use this local model ? (y/n): ")
|
||||
|
||||
if will_start_server.lower().strip() not in ["y", "yes"]:
|
||||
return
|
||||
|
||||
print("Starting the local server...")
|
||||
if local_model_file.suffix == ".gguf":
|
||||
serve_llamacpp_python(local_model_file)
|
||||
else:
|
||||
raise ValueError(f"Unsupported model file type: {local_model_file.suffix}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
95
scripts/server_llamacpp_linux.sh
Executable file
95
scripts/server_llamacpp_linux.sh
Executable file
@ -0,0 +1,95 @@
|
||||
#!/bin/bash
|
||||
|
||||
# functions used in the main code execution
|
||||
function print_highlight() {
|
||||
local message="${1}"
|
||||
echo "" && echo "******************************************************"
|
||||
echo $message
|
||||
echo "******************************************************" && echo ""
|
||||
}
|
||||
|
||||
function path_sanity_check() {
|
||||
echo "Path sanity checking"
|
||||
if [[ $PWD =~ \ ]]; then
|
||||
print_highlight "This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function deactivate_environment() {
|
||||
echo "Deactivate existing environment(s)"
|
||||
# deactivate existing conda envs as needed to avoid conflicts
|
||||
{ conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
|
||||
}
|
||||
|
||||
function check_conda_existence() {
|
||||
echo "Check for conda existence"
|
||||
conda_exists="F"
|
||||
|
||||
# figure out whether conda exists
|
||||
if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
|
||||
|
||||
# verify if conda is installed by the main app, if not then raise error
|
||||
if [ "$conda_exists" == "F" ]; then
|
||||
# test the conda binary
|
||||
print_highlight "conda is not installed, seems like the app wasn't installed correctly."
|
||||
exit
|
||||
fi
|
||||
}
|
||||
|
||||
function create_conda_environment() {
|
||||
# create the environment if needed
|
||||
if [ ! -e "$INSTALL_ENV_DIR" ]; then
|
||||
echo "Create conda environment"
|
||||
"$CONDA_ROOT_PREFIX/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python="$PYTHON_VERSION" || {
|
||||
echo && print_highlight "Conda environment creation failed." && exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
# check if conda environment was actually created
|
||||
if [ ! -e "$INSTALL_ENV_DIR/bin/python" ]; then
|
||||
print_highlight "Conda environment was not correctly created."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function isolate_environment() {
|
||||
echo "Isolate environment"
|
||||
export PYTHONNOUSERSITE=1
|
||||
unset PYTHONPATH
|
||||
unset PYTHONHOME
|
||||
}
|
||||
|
||||
function activate_environment() {
|
||||
echo "Activate conda environment"
|
||||
source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
|
||||
conda activate "$INSTALL_ENV_DIR"
|
||||
}
|
||||
|
||||
# main code execution
|
||||
|
||||
cd "$(dirname "${BASH_SOURCE[0]}")/.."
|
||||
echo "Changed the current directory to: $(pwd)"
|
||||
|
||||
path_sanity_check
|
||||
deactivate_environment
|
||||
|
||||
# config
|
||||
ENV_NAME="llama-cpp-python-server"
|
||||
PYTHON_VERSION="3.10"
|
||||
CONDA_ROOT_PREFIX="$(pwd)/install_dir/conda"
|
||||
INSTALL_ENV_DIR="$(pwd)/install_dir/server_envs/${ENV_NAME}"
|
||||
|
||||
check_conda_existence
|
||||
create_conda_environment
|
||||
isolate_environment
|
||||
activate_environment
|
||||
|
||||
# install dependencies
|
||||
# ver 0.2.56 produces segment error for /embeddings on MacOS
|
||||
python -m pip install llama-cpp-python[server]!=0.2.56
|
||||
|
||||
# start the server with passed params
|
||||
python -m llama_cpp.server $@
|
||||
|
||||
conda deactivate
|
96
scripts/server_llamacpp_macos.sh
Executable file
96
scripts/server_llamacpp_macos.sh
Executable file
@ -0,0 +1,96 @@
|
||||
#!/bin/bash
|
||||
|
||||
# functions used in the main code execution
|
||||
function print_highlight() {
|
||||
local message="${1}"
|
||||
echo "" && echo "******************************************************"
|
||||
echo $message
|
||||
echo "******************************************************" && echo ""
|
||||
}
|
||||
|
||||
function path_sanity_check() {
|
||||
echo "Path sanity checking"
|
||||
if [[ "$(pwd)" =~ " " ]]; then
|
||||
print_highlight "This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function deactivate_environment() {
|
||||
echo "Deactivate existing environment(s)"
|
||||
# deactivate existing conda envs as needed to avoid conflicts
|
||||
{ conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
|
||||
}
|
||||
|
||||
function check_conda_existence() {
|
||||
echo "Check for conda existence"
|
||||
conda_exists="F"
|
||||
|
||||
# figure out whether conda exists
|
||||
if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
|
||||
|
||||
# verify if conda is installed by the main app, if not then raise error
|
||||
if [ "$conda_exists" == "F" ]; then
|
||||
# test the conda binary
|
||||
print_highlight "conda is not installed, seems like the app wasn't installed correctly."
|
||||
exit
|
||||
fi
|
||||
}
|
||||
|
||||
function create_conda_environment() {
|
||||
# create the environment if needed
|
||||
if [ ! -d "${INSTALL_ENV_DIR}" ]; then
|
||||
echo "Create conda environment"
|
||||
"${CONDA_ROOT_PREFIX}/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python="$PYTHON_VERSION" || (echo && print_highlight "Conda environment creation failed." && exit 1)
|
||||
fi
|
||||
|
||||
# check if conda environment was actually created
|
||||
if [ ! -f "$INSTALL_ENV_DIR/bin/python" ]; then
|
||||
print_highlight "Conda environment was not correctly created."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function isolate_environment() {
|
||||
echo "Isolate environment"
|
||||
export PYTHONNOUSERSITE=1
|
||||
unset PYTHONPATH
|
||||
unset PYTHONHOME
|
||||
}
|
||||
|
||||
function activate_environment() {
|
||||
echo "Activate conda environment"
|
||||
source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
|
||||
conda activate "$INSTALL_ENV_DIR"
|
||||
}
|
||||
|
||||
# main code execution
|
||||
|
||||
cd "$(
|
||||
cd -- "$(dirname "$0")" >/dev/null 2>&1
|
||||
pwd -P
|
||||
)" && cd ..
|
||||
echo "Changed the current directory to: $(pwd)"
|
||||
|
||||
path_sanity_check
|
||||
deactivate_environment
|
||||
|
||||
# config
|
||||
ENV_NAME="llama-cpp-python-server"
|
||||
PYTHON_VERSION="3.10"
|
||||
CONDA_ROOT_PREFIX="$(pwd)/install_dir/conda"
|
||||
INSTALL_ENV_DIR="$(pwd)/install_dir/server_envs/${ENV_NAME}"
|
||||
|
||||
check_conda_existence
|
||||
create_conda_environment
|
||||
isolate_environment
|
||||
activate_environment
|
||||
|
||||
# install dependencies
|
||||
# ver 0.2.56 produces segment error for /embeddings on MacOS
|
||||
python -m pip install llama-cpp-python[server]!=0.2.56
|
||||
|
||||
# start the server with passed params
|
||||
python -m llama_cpp.server $@
|
||||
|
||||
conda deactivate
|
115
scripts/server_llamacpp_windows.bat
Normal file
115
scripts/server_llamacpp_windows.bat
Normal file
@ -0,0 +1,115 @@
|
||||
@echo off
|
||||
|
||||
@rem main code execution
|
||||
|
||||
call :print_highlight "Starting inference server for llama-cpp"
|
||||
|
||||
cd /D "%~dp0\.."
|
||||
echo "Change the current directory to: %cd%"
|
||||
|
||||
call :path_sanity_check
|
||||
call :deactivate_environment
|
||||
|
||||
@rem config
|
||||
set ENV_NAME=llama-cpp-python-server
|
||||
set PYTHON_VERSION=3.10
|
||||
set CONDA_ROOT_PREFIX=%cd%\install_dir\conda
|
||||
set INSTALL_ENV_DIR=%cd%\install_dir\server_envs\%ENV_NAME%
|
||||
|
||||
echo "Python version: %PYTHON_VERSION%"
|
||||
echo "Conda prefix: %CONDA_ROOT_PREFIX%"
|
||||
echo "Environment path: %INSTALL_ENV_DIR%"
|
||||
|
||||
@rem handle conda environment
|
||||
call :check_conda_existence
|
||||
call :create_conda_environment
|
||||
call :isolate_environment
|
||||
call :activate_environment
|
||||
|
||||
@rem install dependencies
|
||||
@rem ver 0.2.56 produces segment error for /embeddings on MacOS
|
||||
call python -m pip install llama-cpp-python[server]!=0.2.56
|
||||
|
||||
@REM @rem start the server with passed params
|
||||
call python -m llama_cpp.server %*
|
||||
call conda deactivate
|
||||
|
||||
goto :end
|
||||
@rem the end of main code execution
|
||||
|
||||
|
||||
@rem below are the functions used in the above execution
|
||||
|
||||
|
||||
:print_highlight
|
||||
echo.
|
||||
echo ******************************************************
|
||||
echo %~1
|
||||
echo ******************************************************
|
||||
echo.
|
||||
goto :eof
|
||||
|
||||
|
||||
:path_sanity_check
|
||||
echo "Path sanity checking"
|
||||
echo "%cd%"| findstr /C:" " >nul ^
|
||||
&& (call :print_highlight "This script relies on Miniconda which can not be silently installed under a path with spaces." ^
|
||||
&& goto :end)
|
||||
goto :eof
|
||||
|
||||
|
||||
:deactivate_environment
|
||||
echo "Deactivate existing environment(s)"
|
||||
(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
|
||||
goto :eof
|
||||
|
||||
|
||||
:check_conda_existence
|
||||
echo "Check for conda existence"
|
||||
set conda_exists=F
|
||||
|
||||
@rem figure out whether conda exists
|
||||
call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1
|
||||
if "%ERRORLEVEL%" EQU "0" set conda_exists=T
|
||||
|
||||
@rem verify if conda is installed by the main app, if not then raise error
|
||||
if "%conda_exists%" == "F" (
|
||||
call :print_highlight "conda is not installed, seems like the app wasn't installed correctly."
|
||||
goto :end
|
||||
)
|
||||
goto :eof
|
||||
|
||||
|
||||
:create_conda_environment
|
||||
@rem create the environment if needed
|
||||
if not exist "%INSTALL_ENV_DIR%" (
|
||||
echo "Create conda environment"
|
||||
call "%CONDA_ROOT_PREFIX%\_conda.exe" create ^
|
||||
--no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python="%PYTHON_VERSION%" || ^
|
||||
( echo. && call :print_highlight "Conda environment creation failed." && goto :end )
|
||||
)
|
||||
|
||||
@rem check if conda environment was actually created
|
||||
if not exist "%INSTALL_ENV_DIR%\python.exe" (
|
||||
call :print_highlight "Conda environment was not correctly created."
|
||||
goto :end
|
||||
)
|
||||
goto :eof
|
||||
|
||||
|
||||
:isolate_environment
|
||||
echo "Isolate environment"
|
||||
set PYTHONNOUSERSITE=1
|
||||
set PYTHONPATH=
|
||||
set PYTHONHOME=
|
||||
goto :eof
|
||||
|
||||
|
||||
:activate_environment
|
||||
echo "Activate conda environment"
|
||||
call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ^
|
||||
( echo. && call :print_highlight "Miniconda hook not found." && goto :end )
|
||||
goto :eof
|
||||
|
||||
|
||||
:end
|
Loading…
x
Reference in New Issue
Block a user