kotaemon/scripts/serve_local.py

import platform
import subprocess
from inspect import currentframe, getframeinfo
from pathlib import Path

from decouple import config

system_name = platform.system()

cur_frame = currentframe()
if cur_frame is None:
    raise ValueError("Cannot get the current frame.")
this_file = getframeinfo(cur_frame).filename
this_dir = Path(this_file).parent


def serve_llamacpp_python(local_model_file: Path, **kwargs):
    def guess_chat_format(local_model_file):
        model_name = local_model_file.stem

        # handle known cases that the server backends handle incorrectly
        # this is highly heuristic, should be expand later
        # server backends usually has logic for this but they could still be wrong
        if "qwen" in model_name:
            return "qwen"

        return None

    # default port
    if "port" not in kwargs:
        kwargs["port"] = 31415

    chat_format = guess_chat_format(local_model_file)
    if chat_format:
        kwargs = {**kwargs, "chat_format": chat_format}

    # these scripts create a separate conda env and run the server
    if system_name == "Windows":
        script_file = this_dir / "server_llamacpp_windows.bat"
    elif system_name == "Linux":
        script_file = this_dir / "server_llamacpp_linux.sh"
    elif system_name == "Darwin":
        script_file = this_dir / "server_llamacpp_macos.sh"
    else:
        raise ValueError(f"Unsupported system: {system_name}")

    args = " ".join(f"--{k} {v}" for k, v in kwargs.items())

    cmd = f"{script_file} --model {local_model_file} {args}"
    subprocess.Popen(cmd, shell=True)


def main():
    local_model_file = config("LOCAL_MODEL", default="")

    if not local_model_file:
        print("LOCAL_MODEL not set in the `.env` file.")
        return

    local_model_file = Path(local_model_file)
    if not local_model_file.exists():
        print(f"Local model not found: {local_model_file}")
        return

    print(f"Local model found: {local_model_file}")
    will_start_server = input("Do you want to use this local model ? (y/n): ")

    if will_start_server.lower().strip() not in ["y", "yes"]:
        return

    print("Starting the local server...")
    if local_model_file.suffix == ".gguf":
        serve_llamacpp_python(local_model_file)
    else:
        raise ValueError(f"Unsupported model file type: {local_model_file.suffix}")


if __name__ == "__main__":
    main()
Feat/local endpoint llm (#148) * serve local model in a different process from the app --------- Co-authored-by: albert <albert@cinnamon.is> Co-authored-by: trducng <trungduc1992@gmail.com> 2024-03-15 16:17:33 +07:00			`import platform`
			`import subprocess`
			`from inspect import currentframe, getframeinfo`
			`from pathlib import Path`

pin llama-cpp-python to 0.2.55 due to https://github.com/abetlen/llama-cpp-python/issues/1288 2024-03-27 18:58:19 +07:00			`from decouple import config`
Feat/local endpoint llm (#148) * serve local model in a different process from the app --------- Co-authored-by: albert <albert@cinnamon.is> Co-authored-by: trducng <trungduc1992@gmail.com> 2024-03-15 16:17:33 +07:00
			`system_name = platform.system()`

			`cur_frame = currentframe()`
			`if cur_frame is None:`
			`raise ValueError("Cannot get the current frame.")`
			`this_file = getframeinfo(cur_frame).filename`
			`this_dir = Path(this_file).parent`


			`def serve_llamacpp_python(local_model_file: Path, **kwargs):`
			`def guess_chat_format(local_model_file):`
			`model_name = local_model_file.stem`

			`# handle known cases that the server backends handle incorrectly`
			`# this is highly heuristic, should be expand later`
			`# server backends usually has logic for this but they could still be wrong`
			`if "qwen" in model_name:`
			`return "qwen"`

			`return None`

			`# default port`
			`if "port" not in kwargs:`
			`kwargs["port"] = 31415`

			`chat_format = guess_chat_format(local_model_file)`
			`if chat_format:`
			`kwargs = {**kwargs, "chat_format": chat_format}`

			`# these scripts create a separate conda env and run the server`
			`if system_name == "Windows":`
			`script_file = this_dir / "server_llamacpp_windows.bat"`
			`elif system_name == "Linux":`
			`script_file = this_dir / "server_llamacpp_linux.sh"`
			`elif system_name == "Darwin":`
			`script_file = this_dir / "server_llamacpp_macos.sh"`
			`else:`
			`raise ValueError(f"Unsupported system: {system_name}")`

			`args = " ".join(f"--{k} {v}" for k, v in kwargs.items())`

			`cmd = f"{script_file} --model {local_model_file} {args}"`
			`subprocess.Popen(cmd, shell=True)`


			`def main():`
pin llama-cpp-python to 0.2.55 due to https://github.com/abetlen/llama-cpp-python/issues/1288 2024-03-27 18:58:19 +07:00			`local_model_file = config("LOCAL_MODEL", default="")`
Feat/local endpoint llm (#148) * serve local model in a different process from the app --------- Co-authored-by: albert <albert@cinnamon.is> Co-authored-by: trducng <trungduc1992@gmail.com> 2024-03-15 16:17:33 +07:00
			`if not local_model_file:`
			print("LOCAL_MODEL not set in the `.env` file.")
			`return`

			`local_model_file = Path(local_model_file)`
			`if not local_model_file.exists():`
			`print(f"Local model not found: {local_model_file}")`
			`return`

			`print(f"Local model found: {local_model_file}")`
			`will_start_server = input("Do you want to use this local model ? (y/n): ")`

			`if will_start_server.lower().strip() not in ["y", "yes"]:`
			`return`

			`print("Starting the local server...")`
			`if local_model_file.suffix == ".gguf":`
			`serve_llamacpp_python(local_model_file)`
			`else:`
			`raise ValueError(f"Unsupported model file type: {local_model_file.suffix}")`


			`if __name__ == "__main__":`
			`main()`