LLMs-from-scratch/ch05/06_user_interface/app_own.py

# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
#   - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch

from pathlib import Path
import sys

import tiktoken
import torch
import chainlit

# For llms_from_scratch installation instructions, see:
# https://github.com/rasbt/LLMs-from-scratch/tree/main/pkg
from llms_from_scratch.ch04 import GPTModel
from llms_from_scratch.ch05 import (
    generate,
    text_to_token_ids,
    token_ids_to_text,
)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def get_model_and_tokenizer():
    """
    Code to load a GPT-2 model with pretrained weights generated in chapter 5.
    This requires that you run the code in chapter 5 first, which generates the necessary model.pth file.
    """

    GPT_CONFIG_124M = {
        "vocab_size": 50257,    # Vocabulary size
        "context_length": 256,  # Shortened context length (orig: 1024)
        "emb_dim": 768,         # Embedding dimension
        "n_heads": 12,          # Number of attention heads
        "n_layers": 12,         # Number of layers
        "drop_rate": 0.1,       # Dropout rate
        "qkv_bias": False       # Query-key-value bias
    }

    tokenizer = tiktoken.get_encoding("gpt2")

    model_path = Path("..") / "01_main-chapter-code" / "model.pth"
    if not model_path.exists():
        print(f"Could not find the {model_path} file. Please run the chapter 5 code (ch05.ipynb) to generate the model.pth file.")
        sys.exit()

    checkpoint = torch.load(model_path, weights_only=True)
    model = GPTModel(GPT_CONFIG_124M)
    model.load_state_dict(checkpoint)
    model.to(device)

    return tokenizer, model, GPT_CONFIG_124M


# Obtain the necessary tokenizer and model files for the chainlit function below
tokenizer, model, model_config = get_model_and_tokenizer()


@chainlit.on_message
async def main(message: chainlit.Message):
    """
    The main Chainlit function.
    """
    token_ids = generate(  # function uses `with torch.no_grad()` internally already
        model=model,
        idx=text_to_token_ids(message.content, tokenizer).to(device),  # The user text is provided via as `message.content`
        max_new_tokens=50,
        context_size=model_config["context_length"],
        top_k=1,
        temperature=0.0
    )

    text = token_ids_to_text(token_ids, tokenizer)

    await chainlit.Message(
        content=f"{text}",  # This returns the model response to the interface
    ).send()
Add chatpgpt-like user interface (#360) * Add chatpgpt-like user interface * fixes 2024-09-17 08:26:44 -05:00			`# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).`
			`# Source for "Build a Large Language Model From Scratch"`
			`# - https://www.manning.com/books/build-a-large-language-model-from-scratch`
			`# Code: https://github.com/rasbt/LLMs-from-scratch`

			`from pathlib import Path`
			`import sys`

			`import tiktoken`
			`import torch`
			`import chainlit`

Add PyPI package (#576) * Add PyPI package * fixes * fixes 2025-03-23 19:28:49 -05:00			`# For llms_from_scratch installation instructions, see:`
			`# https://github.com/rasbt/LLMs-from-scratch/tree/main/pkg`
			`from llms_from_scratch.ch04 import GPTModel`
			`from llms_from_scratch.ch05 import (`
Add chatpgpt-like user interface (#360) * Add chatpgpt-like user interface * fixes 2024-09-17 08:26:44 -05:00			`generate,`
			`text_to_token_ids,`
			`token_ids_to_text,`
			`)`

Add PyPI package (#576) * Add PyPI package * fixes * fixes 2025-03-23 19:28:49 -05:00
Chainlit bonus material fixes (#361) * fix cmd * moved idx to device * improved code with clone().detach() * fixed path * fix: added extra line for pep8 * updated .gitginore * Update ch05/06_user_interface/app_orig.py * Update ch05/06_user_interface/app_own.py * Apply suggestions from code review --------- Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com> 2024-09-18 17:08:50 +02:00			`device = torch.device("cuda" if torch.cuda.is_available() else "cpu")`

Add chatpgpt-like user interface (#360) * Add chatpgpt-like user interface * fixes 2024-09-17 08:26:44 -05:00
			`def get_model_and_tokenizer():`
			`"""`
Add user interface to ch06 and ch07 (#366) * Add user interface to ch06 and ch07 * pep8 * fix url 2024-09-21 18:33:00 -07:00			`Code to load a GPT-2 model with pretrained weights generated in chapter 5.`
Add chatpgpt-like user interface (#360) * Add chatpgpt-like user interface * fixes 2024-09-17 08:26:44 -05:00			`This requires that you run the code in chapter 5 first, which generates the necessary model.pth file.`
			`"""`

			`GPT_CONFIG_124M = {`
			`"vocab_size": 50257, # Vocabulary size`
			`"context_length": 256, # Shortened context length (orig: 1024)`
			`"emb_dim": 768, # Embedding dimension`
			`"n_heads": 12, # Number of attention heads`
			`"n_layers": 12, # Number of layers`
			`"drop_rate": 0.1, # Dropout rate`
			`"qkv_bias": False # Query-key-value bias`
			`}`

			`tokenizer = tiktoken.get_encoding("gpt2")`

			`model_path = Path("..") / "01_main-chapter-code" / "model.pth"`
			`if not model_path.exists():`
			`print(f"Could not find the {model_path} file. Please run the chapter 5 code (ch05.ipynb) to generate the model.pth file.")`
			`sys.exit()`

Chainlit bonus material fixes (#361) * fix cmd * moved idx to device * improved code with clone().detach() * fixed path * fix: added extra line for pep8 * updated .gitginore * Update ch05/06_user_interface/app_orig.py * Update ch05/06_user_interface/app_own.py * Apply suggestions from code review --------- Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com> 2024-09-18 17:08:50 +02:00			`checkpoint = torch.load(model_path, weights_only=True)`
Add chatpgpt-like user interface (#360) * Add chatpgpt-like user interface * fixes 2024-09-17 08:26:44 -05:00			`model = GPTModel(GPT_CONFIG_124M)`
			`model.load_state_dict(checkpoint)`
			`model.to(device)`

			`return tokenizer, model, GPT_CONFIG_124M`


			`# Obtain the necessary tokenizer and model files for the chainlit function below`
			`tokenizer, model, model_config = get_model_and_tokenizer()`


			`@chainlit.on_message`
			`async def main(message: chainlit.Message):`
			`"""`
			`The main Chainlit function.`
			`"""`
Chainlit bonus material fixes (#361) * fix cmd * moved idx to device * improved code with clone().detach() * fixed path * fix: added extra line for pep8 * updated .gitginore * Update ch05/06_user_interface/app_orig.py * Update ch05/06_user_interface/app_own.py * Apply suggestions from code review --------- Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com> 2024-09-18 17:08:50 +02:00			token_ids = generate( # function uses `with torch.no_grad()` internally already
Add chatpgpt-like user interface (#360) * Add chatpgpt-like user interface * fixes 2024-09-17 08:26:44 -05:00			`model=model,`
Chainlit bonus material fixes (#361) * fix cmd * moved idx to device * improved code with clone().detach() * fixed path * fix: added extra line for pep8 * updated .gitginore * Update ch05/06_user_interface/app_orig.py * Update ch05/06_user_interface/app_own.py * Apply suggestions from code review --------- Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com> 2024-09-18 17:08:50 +02:00			idx=text_to_token_ids(message.content, tokenizer).to(device), # The user text is provided via as `message.content`
Add chatpgpt-like user interface (#360) * Add chatpgpt-like user interface * fixes 2024-09-17 08:26:44 -05:00			`max_new_tokens=50,`
			`context_size=model_config["context_length"],`
			`top_k=1,`
			`temperature=0.0`
			`)`

			`text = token_ids_to_text(token_ids, tokenizer)`

			`await chainlit.Message(`
			`content=f"{text}", # This returns the model response to the interface`
			`).send()`