diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..d552f2d7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.bat text eol=crlf diff --git a/.gitignore b/.gitignore index 9e9e64c2..47b78c70 100644 --- a/.gitignore +++ b/.gitignore @@ -466,4 +466,5 @@ examples/example1/assets storage/* # Conda and env storages -install_dir/ +*install_dir/ +doc_env diff --git a/libs/kotaemon/kotaemon/agents/io/base.py b/libs/kotaemon/kotaemon/agents/io/base.py index c27eed03..c6dd1cab 100644 --- a/libs/kotaemon/kotaemon/agents/io/base.py +++ b/libs/kotaemon/kotaemon/agents/io/base.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from enum import Enum from typing import Any, Dict, Literal, NamedTuple, Optional, Union -from pydantic import Extra +from pydantic import ConfigDict from kotaemon.base import LLMInterface @@ -238,7 +238,7 @@ class AgentFinish(NamedTuple): log: str -class AgentOutput(LLMInterface, extra=Extra.allow): # type: ignore [call-arg] +class AgentOutput(LLMInterface): """Output from an agent. Args: @@ -248,6 +248,8 @@ class AgentOutput(LLMInterface, extra=Extra.allow): # type: ignore [call-arg] error: The error message if any. """ + model_config = ConfigDict(extra="allow") + text: str type: str = "agent" agent_type: AgentType diff --git a/libs/kotaemon/kotaemon/embeddings/__init__.py b/libs/kotaemon/kotaemon/embeddings/__init__.py index 1b8b4527..245c9b3b 100644 --- a/libs/kotaemon/kotaemon/embeddings/__init__.py +++ b/libs/kotaemon/kotaemon/embeddings/__init__.py @@ -1,4 +1,5 @@ from .base import BaseEmbeddings +from .endpoint_based import EndpointEmbeddings from .langchain_based import ( AzureOpenAIEmbeddings, CohereEmbdeddings, @@ -8,6 +9,7 @@ from .langchain_based import ( __all__ = [ "BaseEmbeddings", + "EndpointEmbeddings", "OpenAIEmbeddings", "AzureOpenAIEmbeddings", "CohereEmbdeddings", diff --git a/libs/kotaemon/kotaemon/embeddings/endpoint_based.py b/libs/kotaemon/kotaemon/embeddings/endpoint_based.py new file mode 100644 index 00000000..fa9a99b7 --- /dev/null +++ b/libs/kotaemon/kotaemon/embeddings/endpoint_based.py @@ -0,0 +1,46 @@ +import requests + +from kotaemon.base import Document, DocumentWithEmbedding + +from .base import BaseEmbeddings + + +class EndpointEmbeddings(BaseEmbeddings): + """ + An Embeddings component that uses an OpenAI API compatible endpoint. + + Attributes: + endpoint_url (str): The url of an OpenAI API compatible endpoint. + """ + + endpoint_url: str + + def run( + self, text: str | list[str] | Document | list[Document] + ) -> list[DocumentWithEmbedding]: + """ + Generate embeddings from text Args: + text (str | list[str] | Document | list[Document]): text to generate + embeddings from + Returns: + list[DocumentWithEmbedding]: embeddings + """ + if not isinstance(text, list): + text = [text] + + outputs = [] + + for item in text: + response = requests.post( + self.endpoint_url, json={"input": str(item)} + ).json() + outputs.append( + DocumentWithEmbedding( + text=str(item), + embedding=response["data"][0]["embedding"], + total_tokens=response["usage"]["total_tokens"], + prompt_tokens=response["usage"]["prompt_tokens"], + ) + ) + + return outputs diff --git a/libs/kotaemon/kotaemon/indices/qa/citation.py b/libs/kotaemon/kotaemon/indices/qa/citation.py index 4fe86001..f1a53c79 100644 --- a/libs/kotaemon/kotaemon/indices/qa/citation.py +++ b/libs/kotaemon/kotaemon/indices/qa/citation.py @@ -108,6 +108,9 @@ class CitationPipeline(BaseComponent): print(e) return None + if not llm_output.messages: + return None + function_output = llm_output.messages[0].additional_kwargs["function_call"][ "arguments" ] @@ -126,6 +129,9 @@ class CitationPipeline(BaseComponent): print(e) return None + if not llm_output.messages: + return None + function_output = llm_output.messages[0].additional_kwargs["function_call"][ "arguments" ] diff --git a/libs/kotaemon/kotaemon/llms/__init__.py b/libs/kotaemon/kotaemon/llms/__init__.py index 1968df47..4e81d21e 100644 --- a/libs/kotaemon/kotaemon/llms/__init__.py +++ b/libs/kotaemon/kotaemon/llms/__init__.py @@ -2,7 +2,7 @@ from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMes from .base import BaseLLM from .branching import GatedBranchingPipeline, SimpleBranchingPipeline -from .chats import AzureChatOpenAI, ChatLLM, LlamaCppChat +from .chats import AzureChatOpenAI, ChatLLM, EndpointChatLLM, LlamaCppChat from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI from .cot import ManualSequentialChainOfThought, Thought from .linear import GatedLinearPipeline, SimpleLinearPipeline @@ -12,6 +12,7 @@ __all__ = [ "BaseLLM", # chat-specific components "ChatLLM", + "EndpointChatLLM", "BaseMessage", "HumanMessage", "AIMessage", diff --git a/libs/kotaemon/kotaemon/llms/chats/__init__.py b/libs/kotaemon/kotaemon/llms/chats/__init__.py index 8d2e12ee..53d44b2b 100644 --- a/libs/kotaemon/kotaemon/llms/chats/__init__.py +++ b/libs/kotaemon/kotaemon/llms/chats/__init__.py @@ -1,5 +1,12 @@ from .base import ChatLLM +from .endpoint_based import EndpointChatLLM from .langchain_based import AzureChatOpenAI, LCChatMixin from .llamacpp import LlamaCppChat -__all__ = ["ChatLLM", "AzureChatOpenAI", "LCChatMixin", "LlamaCppChat"] +__all__ = [ + "ChatLLM", + "EndpointChatLLM", + "AzureChatOpenAI", + "LCChatMixin", + "LlamaCppChat", +] diff --git a/libs/kotaemon/kotaemon/llms/chats/endpoint_based.py b/libs/kotaemon/kotaemon/llms/chats/endpoint_based.py new file mode 100644 index 00000000..170ec8bb --- /dev/null +++ b/libs/kotaemon/kotaemon/llms/chats/endpoint_based.py @@ -0,0 +1,85 @@ +import requests + +from kotaemon.base import ( + AIMessage, + BaseMessage, + HumanMessage, + LLMInterface, + SystemMessage, +) + +from .base import ChatLLM + + +class EndpointChatLLM(ChatLLM): + """ + A ChatLLM that uses an endpoint to generate responses. This expects an OpenAI API + compatible endpoint. + + Attributes: + endpoint_url (str): The url of a OpenAI API compatible endpoint. + """ + + endpoint_url: str + + def run( + self, messages: str | BaseMessage | list[BaseMessage], **kwargs + ) -> LLMInterface: + """ + Generate response from messages + Args: + messages (str | BaseMessage | list[BaseMessage]): history of messages to + generate response from + **kwargs: additional arguments to pass to the OpenAI API + Returns: + LLMInterface: generated response + """ + if isinstance(messages, str): + input_ = [HumanMessage(content=messages)] + elif isinstance(messages, BaseMessage): + input_ = [messages] + else: + input_ = messages + + def decide_role(message: BaseMessage): + if isinstance(message, SystemMessage): + return "system" + elif isinstance(message, AIMessage): + return "assistant" + else: + return "user" + + request_json = { + "messages": [{"content": m.text, "role": decide_role(m)} for m in input_] + } + + response = requests.post(self.endpoint_url, json=request_json).json() + + content = "" + candidates = [] + if response["choices"]: + candidates = [ + each["message"]["content"] + for each in response["choices"] + if each["message"]["content"] + ] + content = candidates[0] + + return LLMInterface( + content=content, + candidates=candidates, + completion_tokens=response["usage"]["completion_tokens"], + total_tokens=response["usage"]["total_tokens"], + prompt_tokens=response["usage"]["prompt_tokens"], + ) + + def invoke( + self, messages: str | BaseMessage | list[BaseMessage], **kwargs + ) -> LLMInterface: + """Same as run""" + return self.run(messages, **kwargs) + + async def ainvoke( + self, messages: str | BaseMessage | list[BaseMessage], **kwargs + ) -> LLMInterface: + return self.invoke(messages, **kwargs) diff --git a/libs/ktem/flowsettings.py b/libs/ktem/flowsettings.py index c9bdaca8..52ebf863 100644 --- a/libs/ktem/flowsettings.py +++ b/libs/ktem/flowsettings.py @@ -12,7 +12,7 @@ user_cache_dir.mkdir(parents=True, exist_ok=True) COHERE_API_KEY = config("COHERE_API_KEY", default="") KH_MODE = "dev" -KH_FEATURE_USER_MANAGEMENT = True +KH_FEATURE_USER_MANAGEMENT = False KH_FEATURE_USER_MANAGEMENT_ADMIN = str( config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin") ) @@ -21,6 +21,8 @@ KH_FEATURE_USER_MANAGEMENT_PASSWORD = str( ) KH_ENABLE_ALEMBIC = False KH_DATABASE = f"sqlite:///{user_cache_dir / 'sql.db'}" +KH_FILESTORAGE_PATH = str(user_cache_dir / "files") + KH_DOCSTORE = { "__type__": "kotaemon.storages.SimpleFileDocumentStore", "path": str(user_cache_dir / "docstore"), @@ -29,51 +31,68 @@ KH_VECTORSTORE = { "__type__": "kotaemon.storages.ChromaVectorStore", "path": str(user_cache_dir / "vectorstore"), } -KH_FILESTORAGE_PATH = str(user_cache_dir / "files") KH_LLMS = { - "gpt4": { + # example for using Azure OpenAI, the config variables can set as environment + # variables or in the .env file + # "gpt4": { + # "def": { + # "__type__": "kotaemon.llms.AzureChatOpenAI", + # "temperature": 0, + # "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), + # "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), + # "openai_api_version": config("OPENAI_API_VERSION", default=""), + # "deployment_name": "", + # "stream": True, + # }, + # "accuracy": 10, + # "cost": 10, + # "default": False, + # }, + # "gpt35": { + # "def": { + # "__type__": "kotaemon.llms.AzureChatOpenAI", + # "temperature": 0, + # "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), + # "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), + # "openai_api_version": config("OPENAI_API_VERSION", default=""), + # "deployment_name": "", + # "request_timeout": 10, + # "stream": False, + # }, + # "accuracy": 5, + # "cost": 5, + # "default": False, + # }, + "local": { "def": { - "__type__": "kotaemon.llms.AzureChatOpenAI", - "temperature": 0, - "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), - "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), - "openai_api_version": config("OPENAI_API_VERSION", default=""), - "deployment_name": "dummy-q2", - "stream": True, + "__type__": "kotaemon.llms.EndpointChatLLM", + "endpoint_url": "http://localhost:31415/v1/chat/completions", }, - "accuracy": 10, - "cost": 10, "default": False, }, - "gpt35": { - "def": { - "__type__": "kotaemon.llms.AzureChatOpenAI", - "temperature": 0, - "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), - "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), - "openai_api_version": config("OPENAI_API_VERSION", default=""), - "deployment_name": "dummy-q2", - "request_timeout": 10, - "stream": False, - }, - "accuracy": 5, - "cost": 5, - "default": True, - }, } KH_EMBEDDINGS = { - "ada": { + # example for using Azure OpenAI, the config variables can set as environment + # variables or in the .env file + # "ada": { + # "def": { + # "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings", + # "model": "text-embedding-ada-002", + # "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), + # "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), + # "deployment": "", + # "chunk_size": 16, + # }, + # "accuracy": 5, + # "cost": 5, + # "default": True, + # }, + "local": { "def": { - "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings", - "model": "text-embedding-ada-002", - "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), - "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), - "deployment": "dummy-q2-text-embedding", - "chunk_size": 16, + "__type__": "kotaemon.embeddings.EndpointEmbeddings", + "endpoint_url": "http://localhost:31415/v1/embeddings", }, - "accuracy": 5, - "cost": 5, - "default": True, + "default": False, }, } KH_REASONINGS = ["ktem.reasoning.simple.FullQAPipeline"] diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py index 7fe6da6b..ff7c8955 100644 --- a/libs/ktem/ktem/index/file/pipelines.py +++ b/libs/ktem/ktem/index/file/pipelines.py @@ -118,7 +118,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): # rerank docs = self.vector_retrieval(text=text, top_k=top_k, **kwargs) - if self.get_from_path("reranker"): + if docs and self.get_from_path("reranker"): docs = self.reranker(docs, query=text) if not self.get_extra_table: diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py index acd768f4..9bcc3c4a 100644 --- a/libs/ktem/ktem/reasoning/simple.py +++ b/libs/ktem/ktem/reasoning/simple.py @@ -200,24 +200,37 @@ class AnswerWithContextPipeline(BaseComponent): lang=self.lang, ) - citation_task = asyncio.create_task( - self.citation_pipeline.ainvoke(context=evidence, question=question) - ) - print("Citation task created") + if evidence: + citation_task = asyncio.create_task( + self.citation_pipeline.ainvoke(context=evidence, question=question) + ) + print("Citation task created") messages = [] if self.system_prompt: messages.append(SystemMessage(content=self.system_prompt)) messages.append(HumanMessage(content=prompt)) + output = "" - for text in self.llm.stream(messages): - output += text.text - self.report_output({"output": text.text}) - await asyncio.sleep(0) + try: + # try streaming first + print("Trying LLM streaming") + for text in self.llm.stream(messages): + output += text.text + self.report_output({"output": text.text}) + await asyncio.sleep(0) + except NotImplementedError: + print("Streaming is not supported, falling back to normal processing") + output = self.llm(messages).text + self.report_output({"output": output}) # retrieve the citation print("Waiting for citation task") - citation = await citation_task + if evidence: + citation = await citation_task + else: + citation = None + answer = Document(text=output, metadata={"citation": citation}) return answer diff --git a/libs/ktem/launch.py b/libs/ktem/launch.py index 1f436c5b..2ac7a1aa 100644 --- a/libs/ktem/launch.py +++ b/libs/ktem/launch.py @@ -2,4 +2,4 @@ from ktem.main import App app = App() demo = app.make() -demo.queue().launch(favicon_path=app._favicon) +demo.queue().launch(favicon_path=app._favicon, inbrowser=True) diff --git a/scripts/run_linux.sh b/scripts/run_linux.sh index 723f90e2..7298b87b 100755 --- a/scripts/run_linux.sh +++ b/scripts/run_linux.sh @@ -12,23 +12,23 @@ function install_miniconda() { # Miniconda installer is limited to two main architectures: x86_64 and arm64 local sys_arch=$(uname -m) case "${sys_arch}" in - x86_64*) sys_arch="x86_64";; - arm64*) sys_arch="aarch64";; - aarch64*) sys_arch="aarch64";; - *) { - echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64" - exit 1 - };; + x86_64*) sys_arch="x86_64" ;; + arm64*) sys_arch="aarch64" ;; + aarch64*) sys_arch="aarch64" ;; + *) { + echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64" + exit 1 + } ;; esac # if miniconda has not been installed, download and install it - if ! "${conda_root}/bin/conda" --version &>/dev/null ; then + if ! "${conda_root}/bin/conda" --version &>/dev/null; then if [ ! -d "$install_dir/miniconda_installer.sh" ]; then echo "Downloading Miniconda from $miniconda_url" local miniconda_url="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${sys_arch}.sh" mkdir -p "$install_dir" - curl -Lk "$miniconda_url" > "$install_dir/miniconda_installer.sh" + curl -Lk "$miniconda_url" >"$install_dir/miniconda_installer.sh" fi echo "Installing Miniconda to $conda_root" @@ -64,7 +64,7 @@ function create_conda_env() { function activate_conda_env() { # deactivate the current env(s) to avoid conflicts - { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null + { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null # check if conda env is broken (because of interruption during creation) if [ ! -f "$env_dir/bin/python" ]; then @@ -80,7 +80,7 @@ function activate_conda_env() { echo "Activate conda environment at $CONDA_PREFIX" } -function deactivate_conda_env(){ +function deactivate_conda_env() { # Conda deactivate if we are in the right env if [ "$CONDA_PREFIX" == "$env_dir" ]; then conda deactivate @@ -89,7 +89,7 @@ function deactivate_conda_env(){ } function install_dependencies() { - if pip list 2> /dev/null | grep -q "kotaemon"; then + if pip list 2>/dev/null | grep -q "kotaemon"; then echo "Requirements are already installed" else local kotaemon_root="$(pwd)/libs/kotaemon/.[dev]" @@ -101,7 +101,7 @@ function install_dependencies() { echo "" && echo "Install ktem's requirements" python -m pip install -e "$ktem_root" - if ! pip list 2> /dev/null | grep -q "kotaemon"; then + if ! pip list 2>/dev/null | grep -q "kotaemon"; then echo "Installation failed. You may need to run the installer again." deactivate_conda_env exit 1 @@ -123,6 +123,10 @@ function install_dependencies() { fi } +function setup_local_model() { + python $(pwd)/scripts/serve_local.py +} + function launch_ui() { gradio $(pwd)/libs/ktem/launch.py || { echo "" && echo "Will exit now..." @@ -159,6 +163,9 @@ activate_conda_env print_highlight "Install requirements" install_dependencies +print_highlight "Setting up a local model" +setup_local_model + print_highlight "Launching web UI. Please wait..." launch_ui diff --git a/scripts/run_macos.sh b/scripts/run_macos.sh old mode 100644 new mode 100755 index ba573397..6ad9901d --- a/scripts/run_macos.sh +++ b/scripts/run_macos.sh @@ -12,22 +12,22 @@ function install_miniconda() { # Miniconda installer is limited to two main architectures: x86_64 and arm64 local sys_arch=$(uname -m) case "${sys_arch}" in - x86_64*) sys_arch="x86_64";; - arm64*) sys_arch="arm64";; - *) { - echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64" - exit 1 - };; + x86_64*) sys_arch="x86_64" ;; + arm64*) sys_arch="arm64" ;; + *) { + echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64" + exit 1 + } ;; esac # if miniconda has not been installed, download and install it - if ! "${conda_root}/bin/conda" --version &>/dev/null ; then + if ! "${conda_root}/bin/conda" --version &>/dev/null; then if [ ! -d "$install_dir/miniconda_installer.sh" ]; then local miniconda_url="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-${sys_arch}.sh" echo "Downloading Miniconda from $miniconda_url" mkdir -p "$install_dir" - curl -Lk "$miniconda_url" > "$install_dir/miniconda_installer.sh" + curl -Lk "$miniconda_url" >"$install_dir/miniconda_installer.sh" fi echo "Installing Miniconda to $conda_root" @@ -63,7 +63,7 @@ function create_conda_env() { function activate_conda_env() { # deactivate the current env(s) to avoid conflicts - { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null + { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null # check if conda env is broken (because of interruption during creation) if [ ! -f "$env_dir/bin/python" ]; then @@ -79,7 +79,7 @@ function activate_conda_env() { echo "Activate conda environment at $CONDA_PREFIX" } -function deactivate_conda_env(){ +function deactivate_conda_env() { # Conda deactivate if we are in the right env if [[ "$CONDA_PREFIX" == "$env_dir" ]]; then conda deactivate @@ -89,7 +89,7 @@ function deactivate_conda_env(){ function install_dependencies() { # check if the env is already setup by finding 'kotaemon' in 'pip list' - if pip list 2> /dev/null | grep -q "kotaemon"; then + if pip list 2>/dev/null | grep -q "kotaemon"; then echo "Requirements are already installed" else local kotaemon_root="$(pwd)/libs/kotaemon/.[dev]" @@ -101,7 +101,7 @@ function install_dependencies() { echo "" && echo "Install ktem's requirements" python -m pip install -e "$ktem_root" - if ! pip list 2> /dev/null | grep -q "kotaemon"; then + if ! pip list 2>/dev/null | grep -q "kotaemon"; then echo "Installation failed. You may need to run the installer again." deactivate_conda_env exit 1 @@ -124,6 +124,10 @@ function install_dependencies() { fi } +function setup_local_model() { + python $(pwd)/scripts/serve_local.py +} + function launch_ui() { gradio $(pwd)/libs/ktem/launch.py || { echo "" && echo "Will exit now..." @@ -141,7 +145,10 @@ function print_highlight() { # Main script execution # move two levels up from the dir where this script resides -cd "$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" && cd .. +cd "$( + cd -- "$(dirname "$0")" >/dev/null 2>&1 + pwd -P +)" && cd .. install_dir="$(pwd)/install_dir" conda_root="${install_dir}/conda" @@ -160,6 +167,9 @@ activate_conda_env print_highlight "Install requirements" install_dependencies +print_highlight "Setting up a local model" +setup_local_model + print_highlight "Launching web UI. Please wait..." launch_ui diff --git a/scripts/run_windows.bat b/scripts/run_windows.bat index 257e232c..4e5db862 100644 --- a/scripts/run_windows.bat +++ b/scripts/run_windows.bat @@ -14,6 +14,7 @@ IF %ERRORLEVEL% EQU 0 ( ECHO The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later. GOTO :end ) + CALL :print_highlight "Setup Anaconda/Miniconda" CALL :download_and_install_miniconda :: check if function run fail, then exit the script @@ -30,6 +31,10 @@ CALL :print_highlight "Install requirements" CALL :install_dependencies IF ERRORLEVEL 1 GOTO :end +CALL :print_highlight "Setting up a local model" +CALL :setup_local_model +IF ERRORLEVEL 1 GOTO :end + CALL :print_highlight "Launching web UI. Please wait..." CALL :launch_ui @@ -126,6 +131,10 @@ IF %ERRORLEVEL% == 0 ( ) GOTO :eof +:setup_local_model +python "%CD%\scripts\serve_local.py" +GOTO :eof + :launch_ui CALL gradio "%CD%\libs\ktem\launch.py" || ( ECHO. && ECHO Will exit now... && GOTO :exit_func_with_error ) GOTO :eof diff --git a/scripts/serve_local.py b/scripts/serve_local.py new file mode 100644 index 00000000..61b8f778 --- /dev/null +++ b/scripts/serve_local.py @@ -0,0 +1,81 @@ +import platform +import subprocess +from inspect import currentframe, getframeinfo +from pathlib import Path + +import dotenv + +configs = dotenv.dotenv_values(".env") + +system_name = platform.system() + +cur_frame = currentframe() +if cur_frame is None: + raise ValueError("Cannot get the current frame.") +this_file = getframeinfo(cur_frame).filename +this_dir = Path(this_file).parent + + +def serve_llamacpp_python(local_model_file: Path, **kwargs): + def guess_chat_format(local_model_file): + model_name = local_model_file.stem + + # handle known cases that the server backends handle incorrectly + # this is highly heuristic, should be expand later + # server backends usually has logic for this but they could still be wrong + if "qwen" in model_name: + return "qwen" + + return None + + # default port + if "port" not in kwargs: + kwargs["port"] = 31415 + + chat_format = guess_chat_format(local_model_file) + if chat_format: + kwargs = {**kwargs, "chat_format": chat_format} + + # these scripts create a separate conda env and run the server + if system_name == "Windows": + script_file = this_dir / "server_llamacpp_windows.bat" + elif system_name == "Linux": + script_file = this_dir / "server_llamacpp_linux.sh" + elif system_name == "Darwin": + script_file = this_dir / "server_llamacpp_macos.sh" + else: + raise ValueError(f"Unsupported system: {system_name}") + + args = " ".join(f"--{k} {v}" for k, v in kwargs.items()) + + cmd = f"{script_file} --model {local_model_file} {args}" + subprocess.Popen(cmd, shell=True) + + +def main(): + local_model_file = configs.get("LOCAL_MODEL", "") + + if not local_model_file: + print("LOCAL_MODEL not set in the `.env` file.") + return + + local_model_file = Path(local_model_file) + if not local_model_file.exists(): + print(f"Local model not found: {local_model_file}") + return + + print(f"Local model found: {local_model_file}") + will_start_server = input("Do you want to use this local model ? (y/n): ") + + if will_start_server.lower().strip() not in ["y", "yes"]: + return + + print("Starting the local server...") + if local_model_file.suffix == ".gguf": + serve_llamacpp_python(local_model_file) + else: + raise ValueError(f"Unsupported model file type: {local_model_file.suffix}") + + +if __name__ == "__main__": + main() diff --git a/scripts/server_llamacpp_linux.sh b/scripts/server_llamacpp_linux.sh new file mode 100755 index 00000000..f72ccde5 --- /dev/null +++ b/scripts/server_llamacpp_linux.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +# functions used in the main code execution +function print_highlight() { + local message="${1}" + echo "" && echo "******************************************************" + echo $message + echo "******************************************************" && echo "" +} + +function path_sanity_check() { + echo "Path sanity checking" + if [[ $PWD =~ \ ]]; then + print_highlight "This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces." + exit 1 + fi +} + +function deactivate_environment() { + echo "Deactivate existing environment(s)" + # deactivate existing conda envs as needed to avoid conflicts + { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null +} + +function check_conda_existence() { + echo "Check for conda existence" + conda_exists="F" + + # figure out whether conda exists + if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi + + # verify if conda is installed by the main app, if not then raise error + if [ "$conda_exists" == "F" ]; then + # test the conda binary + print_highlight "conda is not installed, seems like the app wasn't installed correctly." + exit + fi +} + +function create_conda_environment() { + # create the environment if needed + if [ ! -e "$INSTALL_ENV_DIR" ]; then + echo "Create conda environment" + "$CONDA_ROOT_PREFIX/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python="$PYTHON_VERSION" || { + echo && print_highlight "Conda environment creation failed." && exit 1 + } + fi + + # check if conda environment was actually created + if [ ! -e "$INSTALL_ENV_DIR/bin/python" ]; then + print_highlight "Conda environment was not correctly created." + exit 1 + fi +} + +function isolate_environment() { + echo "Isolate environment" + export PYTHONNOUSERSITE=1 + unset PYTHONPATH + unset PYTHONHOME +} + +function activate_environment() { + echo "Activate conda environment" + source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script) + conda activate "$INSTALL_ENV_DIR" +} + +# main code execution + +cd "$(dirname "${BASH_SOURCE[0]}")/.." +echo "Changed the current directory to: $(pwd)" + +path_sanity_check +deactivate_environment + +# config +ENV_NAME="llama-cpp-python-server" +PYTHON_VERSION="3.10" +CONDA_ROOT_PREFIX="$(pwd)/install_dir/conda" +INSTALL_ENV_DIR="$(pwd)/install_dir/server_envs/${ENV_NAME}" + +check_conda_existence +create_conda_environment +isolate_environment +activate_environment + +# install dependencies +# ver 0.2.56 produces segment error for /embeddings on MacOS +python -m pip install llama-cpp-python[server]!=0.2.56 + +# start the server with passed params +python -m llama_cpp.server $@ + +conda deactivate diff --git a/scripts/server_llamacpp_macos.sh b/scripts/server_llamacpp_macos.sh new file mode 100755 index 00000000..4ed9ac2a --- /dev/null +++ b/scripts/server_llamacpp_macos.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# functions used in the main code execution +function print_highlight() { + local message="${1}" + echo "" && echo "******************************************************" + echo $message + echo "******************************************************" && echo "" +} + +function path_sanity_check() { + echo "Path sanity checking" + if [[ "$(pwd)" =~ " " ]]; then + print_highlight "This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces." + exit 1 + fi +} + +function deactivate_environment() { + echo "Deactivate existing environment(s)" + # deactivate existing conda envs as needed to avoid conflicts + { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null +} + +function check_conda_existence() { + echo "Check for conda existence" + conda_exists="F" + + # figure out whether conda exists + if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi + + # verify if conda is installed by the main app, if not then raise error + if [ "$conda_exists" == "F" ]; then + # test the conda binary + print_highlight "conda is not installed, seems like the app wasn't installed correctly." + exit + fi +} + +function create_conda_environment() { + # create the environment if needed + if [ ! -d "${INSTALL_ENV_DIR}" ]; then + echo "Create conda environment" + "${CONDA_ROOT_PREFIX}/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python="$PYTHON_VERSION" || (echo && print_highlight "Conda environment creation failed." && exit 1) + fi + + # check if conda environment was actually created + if [ ! -f "$INSTALL_ENV_DIR/bin/python" ]; then + print_highlight "Conda environment was not correctly created." + exit 1 + fi +} + +function isolate_environment() { + echo "Isolate environment" + export PYTHONNOUSERSITE=1 + unset PYTHONPATH + unset PYTHONHOME +} + +function activate_environment() { + echo "Activate conda environment" + source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script) + conda activate "$INSTALL_ENV_DIR" +} + +# main code execution + +cd "$( + cd -- "$(dirname "$0")" >/dev/null 2>&1 + pwd -P +)" && cd .. +echo "Changed the current directory to: $(pwd)" + +path_sanity_check +deactivate_environment + +# config +ENV_NAME="llama-cpp-python-server" +PYTHON_VERSION="3.10" +CONDA_ROOT_PREFIX="$(pwd)/install_dir/conda" +INSTALL_ENV_DIR="$(pwd)/install_dir/server_envs/${ENV_NAME}" + +check_conda_existence +create_conda_environment +isolate_environment +activate_environment + +# install dependencies +# ver 0.2.56 produces segment error for /embeddings on MacOS +python -m pip install llama-cpp-python[server]!=0.2.56 + +# start the server with passed params +python -m llama_cpp.server $@ + +conda deactivate diff --git a/scripts/server_llamacpp_windows.bat b/scripts/server_llamacpp_windows.bat new file mode 100644 index 00000000..48779dbe --- /dev/null +++ b/scripts/server_llamacpp_windows.bat @@ -0,0 +1,115 @@ +@echo off + +@rem main code execution + +call :print_highlight "Starting inference server for llama-cpp" + +cd /D "%~dp0\.." +echo "Change the current directory to: %cd%" + +call :path_sanity_check +call :deactivate_environment + +@rem config +set ENV_NAME=llama-cpp-python-server +set PYTHON_VERSION=3.10 +set CONDA_ROOT_PREFIX=%cd%\install_dir\conda +set INSTALL_ENV_DIR=%cd%\install_dir\server_envs\%ENV_NAME% + +echo "Python version: %PYTHON_VERSION%" +echo "Conda prefix: %CONDA_ROOT_PREFIX%" +echo "Environment path: %INSTALL_ENV_DIR%" + +@rem handle conda environment +call :check_conda_existence +call :create_conda_environment +call :isolate_environment +call :activate_environment + +@rem install dependencies +@rem ver 0.2.56 produces segment error for /embeddings on MacOS +call python -m pip install llama-cpp-python[server]!=0.2.56 + +@REM @rem start the server with passed params +call python -m llama_cpp.server %* +call conda deactivate + +goto :end +@rem the end of main code execution + + +@rem below are the functions used in the above execution + + +:print_highlight +echo. +echo ****************************************************** +echo %~1 +echo ****************************************************** +echo. +goto :eof + + +:path_sanity_check +echo "Path sanity checking" +echo "%cd%"| findstr /C:" " >nul ^ +&& (call :print_highlight "This script relies on Miniconda which can not be silently installed under a path with spaces." ^ +&& goto :end) +goto :eof + + +:deactivate_environment +echo "Deactivate existing environment(s)" +(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul +goto :eof + + +:check_conda_existence +echo "Check for conda existence" +set conda_exists=F + +@rem figure out whether conda exists +call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1 +if "%ERRORLEVEL%" EQU "0" set conda_exists=T + +@rem verify if conda is installed by the main app, if not then raise error +if "%conda_exists%" == "F" ( + call :print_highlight "conda is not installed, seems like the app wasn't installed correctly." + goto :end +) +goto :eof + + +:create_conda_environment +@rem create the environment if needed +if not exist "%INSTALL_ENV_DIR%" ( + echo "Create conda environment" + call "%CONDA_ROOT_PREFIX%\_conda.exe" create ^ + --no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python="%PYTHON_VERSION%" || ^ + ( echo. && call :print_highlight "Conda environment creation failed." && goto :end ) +) + +@rem check if conda environment was actually created +if not exist "%INSTALL_ENV_DIR%\python.exe" ( + call :print_highlight "Conda environment was not correctly created." + goto :end +) +goto :eof + + +:isolate_environment +echo "Isolate environment" +set PYTHONNOUSERSITE=1 +set PYTHONPATH= +set PYTHONHOME= +goto :eof + + +:activate_environment +echo "Activate conda environment" +call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ^ +( echo. && call :print_highlight "Miniconda hook not found." && goto :end ) +goto :eof + + +:end