Feat/local endpoint llm (#148)

* serve local model in a different process from the app
---------

Co-authored-by: albert <albert@cinnamon.is>
Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
ian_Cin 2024-03-15 16:17:33 +07:00 committed by GitHub
parent 2950e6ed02
commit df12dec732
20 changed files with 675 additions and 79 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
*.bat text eol=crlf

3
.gitignore vendored
View File

@ -466,4 +466,5 @@ examples/example1/assets
storage/*
# Conda and env storages
install_dir/
*install_dir/
doc_env

View File

@ -5,7 +5,7 @@ from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, Literal, NamedTuple, Optional, Union
from pydantic import Extra
from pydantic import ConfigDict
from kotaemon.base import LLMInterface
@ -238,7 +238,7 @@ class AgentFinish(NamedTuple):
log: str
class AgentOutput(LLMInterface, extra=Extra.allow): # type: ignore [call-arg]
class AgentOutput(LLMInterface):
"""Output from an agent.
Args:
@ -248,6 +248,8 @@ class AgentOutput(LLMInterface, extra=Extra.allow): # type: ignore [call-arg]
error: The error message if any.
"""
model_config = ConfigDict(extra="allow")
text: str
type: str = "agent"
agent_type: AgentType

View File

@ -1,4 +1,5 @@
from .base import BaseEmbeddings
from .endpoint_based import EndpointEmbeddings
from .langchain_based import (
AzureOpenAIEmbeddings,
CohereEmbdeddings,
@ -8,6 +9,7 @@ from .langchain_based import (
__all__ = [
"BaseEmbeddings",
"EndpointEmbeddings",
"OpenAIEmbeddings",
"AzureOpenAIEmbeddings",
"CohereEmbdeddings",

View File

@ -0,0 +1,46 @@
import requests
from kotaemon.base import Document, DocumentWithEmbedding
from .base import BaseEmbeddings
class EndpointEmbeddings(BaseEmbeddings):
"""
An Embeddings component that uses an OpenAI API compatible endpoint.
Attributes:
endpoint_url (str): The url of an OpenAI API compatible endpoint.
"""
endpoint_url: str
def run(
self, text: str | list[str] | Document | list[Document]
) -> list[DocumentWithEmbedding]:
"""
Generate embeddings from text Args:
text (str | list[str] | Document | list[Document]): text to generate
embeddings from
Returns:
list[DocumentWithEmbedding]: embeddings
"""
if not isinstance(text, list):
text = [text]
outputs = []
for item in text:
response = requests.post(
self.endpoint_url, json={"input": str(item)}
).json()
outputs.append(
DocumentWithEmbedding(
text=str(item),
embedding=response["data"][0]["embedding"],
total_tokens=response["usage"]["total_tokens"],
prompt_tokens=response["usage"]["prompt_tokens"],
)
)
return outputs

View File

@ -108,6 +108,9 @@ class CitationPipeline(BaseComponent):
print(e)
return None
if not llm_output.messages:
return None
function_output = llm_output.messages[0].additional_kwargs["function_call"][
"arguments"
]
@ -126,6 +129,9 @@ class CitationPipeline(BaseComponent):
print(e)
return None
if not llm_output.messages:
return None
function_output = llm_output.messages[0].additional_kwargs["function_call"][
"arguments"
]

View File

@ -2,7 +2,7 @@ from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMes
from .base import BaseLLM
from .branching import GatedBranchingPipeline, SimpleBranchingPipeline
from .chats import AzureChatOpenAI, ChatLLM, LlamaCppChat
from .chats import AzureChatOpenAI, ChatLLM, EndpointChatLLM, LlamaCppChat
from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
from .cot import ManualSequentialChainOfThought, Thought
from .linear import GatedLinearPipeline, SimpleLinearPipeline
@ -12,6 +12,7 @@ __all__ = [
"BaseLLM",
# chat-specific components
"ChatLLM",
"EndpointChatLLM",
"BaseMessage",
"HumanMessage",
"AIMessage",

View File

@ -1,5 +1,12 @@
from .base import ChatLLM
from .endpoint_based import EndpointChatLLM
from .langchain_based import AzureChatOpenAI, LCChatMixin
from .llamacpp import LlamaCppChat
__all__ = ["ChatLLM", "AzureChatOpenAI", "LCChatMixin", "LlamaCppChat"]
__all__ = [
"ChatLLM",
"EndpointChatLLM",
"AzureChatOpenAI",
"LCChatMixin",
"LlamaCppChat",
]

View File

@ -0,0 +1,85 @@
import requests
from kotaemon.base import (
AIMessage,
BaseMessage,
HumanMessage,
LLMInterface,
SystemMessage,
)
from .base import ChatLLM
class EndpointChatLLM(ChatLLM):
"""
A ChatLLM that uses an endpoint to generate responses. This expects an OpenAI API
compatible endpoint.
Attributes:
endpoint_url (str): The url of a OpenAI API compatible endpoint.
"""
endpoint_url: str
def run(
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
) -> LLMInterface:
"""
Generate response from messages
Args:
messages (str | BaseMessage | list[BaseMessage]): history of messages to
generate response from
**kwargs: additional arguments to pass to the OpenAI API
Returns:
LLMInterface: generated response
"""
if isinstance(messages, str):
input_ = [HumanMessage(content=messages)]
elif isinstance(messages, BaseMessage):
input_ = [messages]
else:
input_ = messages
def decide_role(message: BaseMessage):
if isinstance(message, SystemMessage):
return "system"
elif isinstance(message, AIMessage):
return "assistant"
else:
return "user"
request_json = {
"messages": [{"content": m.text, "role": decide_role(m)} for m in input_]
}
response = requests.post(self.endpoint_url, json=request_json).json()
content = ""
candidates = []
if response["choices"]:
candidates = [
each["message"]["content"]
for each in response["choices"]
if each["message"]["content"]
]
content = candidates[0]
return LLMInterface(
content=content,
candidates=candidates,
completion_tokens=response["usage"]["completion_tokens"],
total_tokens=response["usage"]["total_tokens"],
prompt_tokens=response["usage"]["prompt_tokens"],
)
def invoke(
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
) -> LLMInterface:
"""Same as run"""
return self.run(messages, **kwargs)
async def ainvoke(
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
) -> LLMInterface:
return self.invoke(messages, **kwargs)

View File

@ -12,7 +12,7 @@ user_cache_dir.mkdir(parents=True, exist_ok=True)
COHERE_API_KEY = config("COHERE_API_KEY", default="")
KH_MODE = "dev"
KH_FEATURE_USER_MANAGEMENT = True
KH_FEATURE_USER_MANAGEMENT = False
KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
)
@ -21,6 +21,8 @@ KH_FEATURE_USER_MANAGEMENT_PASSWORD = str(
)
KH_ENABLE_ALEMBIC = False
KH_DATABASE = f"sqlite:///{user_cache_dir / 'sql.db'}"
KH_FILESTORAGE_PATH = str(user_cache_dir / "files")
KH_DOCSTORE = {
"__type__": "kotaemon.storages.SimpleFileDocumentStore",
"path": str(user_cache_dir / "docstore"),
@ -29,51 +31,68 @@ KH_VECTORSTORE = {
"__type__": "kotaemon.storages.ChromaVectorStore",
"path": str(user_cache_dir / "vectorstore"),
}
KH_FILESTORAGE_PATH = str(user_cache_dir / "files")
KH_LLMS = {
"gpt4": {
# example for using Azure OpenAI, the config variables can set as environment
# variables or in the .env file
# "gpt4": {
# "def": {
# "__type__": "kotaemon.llms.AzureChatOpenAI",
# "temperature": 0,
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
# "openai_api_version": config("OPENAI_API_VERSION", default=""),
# "deployment_name": "<your deployment name>",
# "stream": True,
# },
# "accuracy": 10,
# "cost": 10,
# "default": False,
# },
# "gpt35": {
# "def": {
# "__type__": "kotaemon.llms.AzureChatOpenAI",
# "temperature": 0,
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
# "openai_api_version": config("OPENAI_API_VERSION", default=""),
# "deployment_name": "<your deployment name>",
# "request_timeout": 10,
# "stream": False,
# },
# "accuracy": 5,
# "cost": 5,
# "default": False,
# },
"local": {
"def": {
"__type__": "kotaemon.llms.AzureChatOpenAI",
"temperature": 0,
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
"openai_api_version": config("OPENAI_API_VERSION", default=""),
"deployment_name": "dummy-q2",
"stream": True,
"__type__": "kotaemon.llms.EndpointChatLLM",
"endpoint_url": "http://localhost:31415/v1/chat/completions",
},
"accuracy": 10,
"cost": 10,
"default": False,
},
"gpt35": {
"def": {
"__type__": "kotaemon.llms.AzureChatOpenAI",
"temperature": 0,
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
"openai_api_version": config("OPENAI_API_VERSION", default=""),
"deployment_name": "dummy-q2",
"request_timeout": 10,
"stream": False,
},
"accuracy": 5,
"cost": 5,
"default": True,
},
}
KH_EMBEDDINGS = {
"ada": {
# example for using Azure OpenAI, the config variables can set as environment
# variables or in the .env file
# "ada": {
# "def": {
# "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
# "model": "text-embedding-ada-002",
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
# "deployment": "<your deployment name>",
# "chunk_size": 16,
# },
# "accuracy": 5,
# "cost": 5,
# "default": True,
# },
"local": {
"def": {
"__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
"model": "text-embedding-ada-002",
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
"deployment": "dummy-q2-text-embedding",
"chunk_size": 16,
"__type__": "kotaemon.embeddings.EndpointEmbeddings",
"endpoint_url": "http://localhost:31415/v1/embeddings",
},
"accuracy": 5,
"cost": 5,
"default": True,
"default": False,
},
}
KH_REASONINGS = ["ktem.reasoning.simple.FullQAPipeline"]

View File

@ -118,7 +118,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
# rerank
docs = self.vector_retrieval(text=text, top_k=top_k, **kwargs)
if self.get_from_path("reranker"):
if docs and self.get_from_path("reranker"):
docs = self.reranker(docs, query=text)
if not self.get_extra_table:

View File

@ -200,24 +200,37 @@ class AnswerWithContextPipeline(BaseComponent):
lang=self.lang,
)
citation_task = asyncio.create_task(
self.citation_pipeline.ainvoke(context=evidence, question=question)
)
print("Citation task created")
if evidence:
citation_task = asyncio.create_task(
self.citation_pipeline.ainvoke(context=evidence, question=question)
)
print("Citation task created")
messages = []
if self.system_prompt:
messages.append(SystemMessage(content=self.system_prompt))
messages.append(HumanMessage(content=prompt))
output = ""
for text in self.llm.stream(messages):
output += text.text
self.report_output({"output": text.text})
await asyncio.sleep(0)
try:
# try streaming first
print("Trying LLM streaming")
for text in self.llm.stream(messages):
output += text.text
self.report_output({"output": text.text})
await asyncio.sleep(0)
except NotImplementedError:
print("Streaming is not supported, falling back to normal processing")
output = self.llm(messages).text
self.report_output({"output": output})
# retrieve the citation
print("Waiting for citation task")
citation = await citation_task
if evidence:
citation = await citation_task
else:
citation = None
answer = Document(text=output, metadata={"citation": citation})
return answer

View File

@ -2,4 +2,4 @@ from ktem.main import App
app = App()
demo = app.make()
demo.queue().launch(favicon_path=app._favicon)
demo.queue().launch(favicon_path=app._favicon, inbrowser=True)

View File

@ -12,23 +12,23 @@ function install_miniconda() {
# Miniconda installer is limited to two main architectures: x86_64 and arm64
local sys_arch=$(uname -m)
case "${sys_arch}" in
x86_64*) sys_arch="x86_64";;
arm64*) sys_arch="aarch64";;
aarch64*) sys_arch="aarch64";;
*) {
echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
exit 1
};;
x86_64*) sys_arch="x86_64" ;;
arm64*) sys_arch="aarch64" ;;
aarch64*) sys_arch="aarch64" ;;
*) {
echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
exit 1
} ;;
esac
# if miniconda has not been installed, download and install it
if ! "${conda_root}/bin/conda" --version &>/dev/null ; then
if ! "${conda_root}/bin/conda" --version &>/dev/null; then
if [ ! -d "$install_dir/miniconda_installer.sh" ]; then
echo "Downloading Miniconda from $miniconda_url"
local miniconda_url="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${sys_arch}.sh"
mkdir -p "$install_dir"
curl -Lk "$miniconda_url" > "$install_dir/miniconda_installer.sh"
curl -Lk "$miniconda_url" >"$install_dir/miniconda_installer.sh"
fi
echo "Installing Miniconda to $conda_root"
@ -64,7 +64,7 @@ function create_conda_env() {
function activate_conda_env() {
# deactivate the current env(s) to avoid conflicts
{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
{ conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
# check if conda env is broken (because of interruption during creation)
if [ ! -f "$env_dir/bin/python" ]; then
@ -80,7 +80,7 @@ function activate_conda_env() {
echo "Activate conda environment at $CONDA_PREFIX"
}
function deactivate_conda_env(){
function deactivate_conda_env() {
# Conda deactivate if we are in the right env
if [ "$CONDA_PREFIX" == "$env_dir" ]; then
conda deactivate
@ -89,7 +89,7 @@ function deactivate_conda_env(){
}
function install_dependencies() {
if pip list 2> /dev/null | grep -q "kotaemon"; then
if pip list 2>/dev/null | grep -q "kotaemon"; then
echo "Requirements are already installed"
else
local kotaemon_root="$(pwd)/libs/kotaemon/.[dev]"
@ -101,7 +101,7 @@ function install_dependencies() {
echo "" && echo "Install ktem's requirements"
python -m pip install -e "$ktem_root"
if ! pip list 2> /dev/null | grep -q "kotaemon"; then
if ! pip list 2>/dev/null | grep -q "kotaemon"; then
echo "Installation failed. You may need to run the installer again."
deactivate_conda_env
exit 1
@ -123,6 +123,10 @@ function install_dependencies() {
fi
}
function setup_local_model() {
python $(pwd)/scripts/serve_local.py
}
function launch_ui() {
gradio $(pwd)/libs/ktem/launch.py || {
echo "" && echo "Will exit now..."
@ -159,6 +163,9 @@ activate_conda_env
print_highlight "Install requirements"
install_dependencies
print_highlight "Setting up a local model"
setup_local_model
print_highlight "Launching web UI. Please wait..."
launch_ui

36
scripts/run_macos.sh Normal file → Executable file
View File

@ -12,22 +12,22 @@ function install_miniconda() {
# Miniconda installer is limited to two main architectures: x86_64 and arm64
local sys_arch=$(uname -m)
case "${sys_arch}" in
x86_64*) sys_arch="x86_64";;
arm64*) sys_arch="arm64";;
*) {
echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
exit 1
};;
x86_64*) sys_arch="x86_64" ;;
arm64*) sys_arch="arm64" ;;
*) {
echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
exit 1
} ;;
esac
# if miniconda has not been installed, download and install it
if ! "${conda_root}/bin/conda" --version &>/dev/null ; then
if ! "${conda_root}/bin/conda" --version &>/dev/null; then
if [ ! -d "$install_dir/miniconda_installer.sh" ]; then
local miniconda_url="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-${sys_arch}.sh"
echo "Downloading Miniconda from $miniconda_url"
mkdir -p "$install_dir"
curl -Lk "$miniconda_url" > "$install_dir/miniconda_installer.sh"
curl -Lk "$miniconda_url" >"$install_dir/miniconda_installer.sh"
fi
echo "Installing Miniconda to $conda_root"
@ -63,7 +63,7 @@ function create_conda_env() {
function activate_conda_env() {
# deactivate the current env(s) to avoid conflicts
{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
{ conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
# check if conda env is broken (because of interruption during creation)
if [ ! -f "$env_dir/bin/python" ]; then
@ -79,7 +79,7 @@ function activate_conda_env() {
echo "Activate conda environment at $CONDA_PREFIX"
}
function deactivate_conda_env(){
function deactivate_conda_env() {
# Conda deactivate if we are in the right env
if [[ "$CONDA_PREFIX" == "$env_dir" ]]; then
conda deactivate
@ -89,7 +89,7 @@ function deactivate_conda_env(){
function install_dependencies() {
# check if the env is already setup by finding 'kotaemon' in 'pip list'
if pip list 2> /dev/null | grep -q "kotaemon"; then
if pip list 2>/dev/null | grep -q "kotaemon"; then
echo "Requirements are already installed"
else
local kotaemon_root="$(pwd)/libs/kotaemon/.[dev]"
@ -101,7 +101,7 @@ function install_dependencies() {
echo "" && echo "Install ktem's requirements"
python -m pip install -e "$ktem_root"
if ! pip list 2> /dev/null | grep -q "kotaemon"; then
if ! pip list 2>/dev/null | grep -q "kotaemon"; then
echo "Installation failed. You may need to run the installer again."
deactivate_conda_env
exit 1
@ -124,6 +124,10 @@ function install_dependencies() {
fi
}
function setup_local_model() {
python $(pwd)/scripts/serve_local.py
}
function launch_ui() {
gradio $(pwd)/libs/ktem/launch.py || {
echo "" && echo "Will exit now..."
@ -141,7 +145,10 @@ function print_highlight() {
# Main script execution
# move two levels up from the dir where this script resides
cd "$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" && cd ..
cd "$(
cd -- "$(dirname "$0")" >/dev/null 2>&1
pwd -P
)" && cd ..
install_dir="$(pwd)/install_dir"
conda_root="${install_dir}/conda"
@ -160,6 +167,9 @@ activate_conda_env
print_highlight "Install requirements"
install_dependencies
print_highlight "Setting up a local model"
setup_local_model
print_highlight "Launching web UI. Please wait..."
launch_ui

View File

@ -14,6 +14,7 @@ IF %ERRORLEVEL% EQU 0 (
ECHO The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.
GOTO :end
)
CALL :print_highlight "Setup Anaconda/Miniconda"
CALL :download_and_install_miniconda
:: check if function run fail, then exit the script
@ -30,6 +31,10 @@ CALL :print_highlight "Install requirements"
CALL :install_dependencies
IF ERRORLEVEL 1 GOTO :end
CALL :print_highlight "Setting up a local model"
CALL :setup_local_model
IF ERRORLEVEL 1 GOTO :end
CALL :print_highlight "Launching web UI. Please wait..."
CALL :launch_ui
@ -126,6 +131,10 @@ IF %ERRORLEVEL% == 0 (
)
GOTO :eof
:setup_local_model
python "%CD%\scripts\serve_local.py"
GOTO :eof
:launch_ui
CALL gradio "%CD%\libs\ktem\launch.py" || ( ECHO. && ECHO Will exit now... && GOTO :exit_func_with_error )
GOTO :eof

81
scripts/serve_local.py Normal file
View File

@ -0,0 +1,81 @@
import platform
import subprocess
from inspect import currentframe, getframeinfo
from pathlib import Path
import dotenv
configs = dotenv.dotenv_values(".env")
system_name = platform.system()
cur_frame = currentframe()
if cur_frame is None:
raise ValueError("Cannot get the current frame.")
this_file = getframeinfo(cur_frame).filename
this_dir = Path(this_file).parent
def serve_llamacpp_python(local_model_file: Path, **kwargs):
def guess_chat_format(local_model_file):
model_name = local_model_file.stem
# handle known cases that the server backends handle incorrectly
# this is highly heuristic, should be expand later
# server backends usually has logic for this but they could still be wrong
if "qwen" in model_name:
return "qwen"
return None
# default port
if "port" not in kwargs:
kwargs["port"] = 31415
chat_format = guess_chat_format(local_model_file)
if chat_format:
kwargs = {**kwargs, "chat_format": chat_format}
# these scripts create a separate conda env and run the server
if system_name == "Windows":
script_file = this_dir / "server_llamacpp_windows.bat"
elif system_name == "Linux":
script_file = this_dir / "server_llamacpp_linux.sh"
elif system_name == "Darwin":
script_file = this_dir / "server_llamacpp_macos.sh"
else:
raise ValueError(f"Unsupported system: {system_name}")
args = " ".join(f"--{k} {v}" for k, v in kwargs.items())
cmd = f"{script_file} --model {local_model_file} {args}"
subprocess.Popen(cmd, shell=True)
def main():
local_model_file = configs.get("LOCAL_MODEL", "")
if not local_model_file:
print("LOCAL_MODEL not set in the `.env` file.")
return
local_model_file = Path(local_model_file)
if not local_model_file.exists():
print(f"Local model not found: {local_model_file}")
return
print(f"Local model found: {local_model_file}")
will_start_server = input("Do you want to use this local model ? (y/n): ")
if will_start_server.lower().strip() not in ["y", "yes"]:
return
print("Starting the local server...")
if local_model_file.suffix == ".gguf":
serve_llamacpp_python(local_model_file)
else:
raise ValueError(f"Unsupported model file type: {local_model_file.suffix}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,95 @@
#!/bin/bash
# functions used in the main code execution
function print_highlight() {
local message="${1}"
echo "" && echo "******************************************************"
echo $message
echo "******************************************************" && echo ""
}
function path_sanity_check() {
echo "Path sanity checking"
if [[ $PWD =~ \ ]]; then
print_highlight "This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces."
exit 1
fi
}
function deactivate_environment() {
echo "Deactivate existing environment(s)"
# deactivate existing conda envs as needed to avoid conflicts
{ conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
}
function check_conda_existence() {
echo "Check for conda existence"
conda_exists="F"
# figure out whether conda exists
if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
# verify if conda is installed by the main app, if not then raise error
if [ "$conda_exists" == "F" ]; then
# test the conda binary
print_highlight "conda is not installed, seems like the app wasn't installed correctly."
exit
fi
}
function create_conda_environment() {
# create the environment if needed
if [ ! -e "$INSTALL_ENV_DIR" ]; then
echo "Create conda environment"
"$CONDA_ROOT_PREFIX/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python="$PYTHON_VERSION" || {
echo && print_highlight "Conda environment creation failed." && exit 1
}
fi
# check if conda environment was actually created
if [ ! -e "$INSTALL_ENV_DIR/bin/python" ]; then
print_highlight "Conda environment was not correctly created."
exit 1
fi
}
function isolate_environment() {
echo "Isolate environment"
export PYTHONNOUSERSITE=1
unset PYTHONPATH
unset PYTHONHOME
}
function activate_environment() {
echo "Activate conda environment"
source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
conda activate "$INSTALL_ENV_DIR"
}
# main code execution
cd "$(dirname "${BASH_SOURCE[0]}")/.."
echo "Changed the current directory to: $(pwd)"
path_sanity_check
deactivate_environment
# config
ENV_NAME="llama-cpp-python-server"
PYTHON_VERSION="3.10"
CONDA_ROOT_PREFIX="$(pwd)/install_dir/conda"
INSTALL_ENV_DIR="$(pwd)/install_dir/server_envs/${ENV_NAME}"
check_conda_existence
create_conda_environment
isolate_environment
activate_environment
# install dependencies
# ver 0.2.56 produces segment error for /embeddings on MacOS
python -m pip install llama-cpp-python[server]!=0.2.56
# start the server with passed params
python -m llama_cpp.server $@
conda deactivate

View File

@ -0,0 +1,96 @@
#!/bin/bash
# functions used in the main code execution
function print_highlight() {
local message="${1}"
echo "" && echo "******************************************************"
echo $message
echo "******************************************************" && echo ""
}
function path_sanity_check() {
echo "Path sanity checking"
if [[ "$(pwd)" =~ " " ]]; then
print_highlight "This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces."
exit 1
fi
}
function deactivate_environment() {
echo "Deactivate existing environment(s)"
# deactivate existing conda envs as needed to avoid conflicts
{ conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
}
function check_conda_existence() {
echo "Check for conda existence"
conda_exists="F"
# figure out whether conda exists
if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
# verify if conda is installed by the main app, if not then raise error
if [ "$conda_exists" == "F" ]; then
# test the conda binary
print_highlight "conda is not installed, seems like the app wasn't installed correctly."
exit
fi
}
function create_conda_environment() {
# create the environment if needed
if [ ! -d "${INSTALL_ENV_DIR}" ]; then
echo "Create conda environment"
"${CONDA_ROOT_PREFIX}/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python="$PYTHON_VERSION" || (echo && print_highlight "Conda environment creation failed." && exit 1)
fi
# check if conda environment was actually created
if [ ! -f "$INSTALL_ENV_DIR/bin/python" ]; then
print_highlight "Conda environment was not correctly created."
exit 1
fi
}
function isolate_environment() {
echo "Isolate environment"
export PYTHONNOUSERSITE=1
unset PYTHONPATH
unset PYTHONHOME
}
function activate_environment() {
echo "Activate conda environment"
source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
conda activate "$INSTALL_ENV_DIR"
}
# main code execution
cd "$(
cd -- "$(dirname "$0")" >/dev/null 2>&1
pwd -P
)" && cd ..
echo "Changed the current directory to: $(pwd)"
path_sanity_check
deactivate_environment
# config
ENV_NAME="llama-cpp-python-server"
PYTHON_VERSION="3.10"
CONDA_ROOT_PREFIX="$(pwd)/install_dir/conda"
INSTALL_ENV_DIR="$(pwd)/install_dir/server_envs/${ENV_NAME}"
check_conda_existence
create_conda_environment
isolate_environment
activate_environment
# install dependencies
# ver 0.2.56 produces segment error for /embeddings on MacOS
python -m pip install llama-cpp-python[server]!=0.2.56
# start the server with passed params
python -m llama_cpp.server $@
conda deactivate

View File

@ -0,0 +1,115 @@
@echo off
@rem main code execution
call :print_highlight "Starting inference server for llama-cpp"
cd /D "%~dp0\.."
echo "Change the current directory to: %cd%"
call :path_sanity_check
call :deactivate_environment
@rem config
set ENV_NAME=llama-cpp-python-server
set PYTHON_VERSION=3.10
set CONDA_ROOT_PREFIX=%cd%\install_dir\conda
set INSTALL_ENV_DIR=%cd%\install_dir\server_envs\%ENV_NAME%
echo "Python version: %PYTHON_VERSION%"
echo "Conda prefix: %CONDA_ROOT_PREFIX%"
echo "Environment path: %INSTALL_ENV_DIR%"
@rem handle conda environment
call :check_conda_existence
call :create_conda_environment
call :isolate_environment
call :activate_environment
@rem install dependencies
@rem ver 0.2.56 produces segment error for /embeddings on MacOS
call python -m pip install llama-cpp-python[server]!=0.2.56
@REM @rem start the server with passed params
call python -m llama_cpp.server %*
call conda deactivate
goto :end
@rem the end of main code execution
@rem below are the functions used in the above execution
:print_highlight
echo.
echo ******************************************************
echo %~1
echo ******************************************************
echo.
goto :eof
:path_sanity_check
echo "Path sanity checking"
echo "%cd%"| findstr /C:" " >nul ^
&& (call :print_highlight "This script relies on Miniconda which can not be silently installed under a path with spaces." ^
&& goto :end)
goto :eof
:deactivate_environment
echo "Deactivate existing environment(s)"
(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
goto :eof
:check_conda_existence
echo "Check for conda existence"
set conda_exists=F
@rem figure out whether conda exists
call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1
if "%ERRORLEVEL%" EQU "0" set conda_exists=T
@rem verify if conda is installed by the main app, if not then raise error
if "%conda_exists%" == "F" (
call :print_highlight "conda is not installed, seems like the app wasn't installed correctly."
goto :end
)
goto :eof
:create_conda_environment
@rem create the environment if needed
if not exist "%INSTALL_ENV_DIR%" (
echo "Create conda environment"
call "%CONDA_ROOT_PREFIX%\_conda.exe" create ^
--no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python="%PYTHON_VERSION%" || ^
( echo. && call :print_highlight "Conda environment creation failed." && goto :end )
)
@rem check if conda environment was actually created
if not exist "%INSTALL_ENV_DIR%\python.exe" (
call :print_highlight "Conda environment was not correctly created."
goto :end
)
goto :eof
:isolate_environment
echo "Isolate environment"
set PYTHONNOUSERSITE=1
set PYTHONPATH=
set PYTHONHOME=
goto :eof
:activate_environment
echo "Activate conda environment"
call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ^
( echo. && call :print_highlight "Miniconda hook not found." && goto :end )
goto :eof
:end