2024-09-17 08:26:44 -05:00
|
|
|
# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
|
|
|
|
# Source for "Build a Large Language Model From Scratch"
|
|
|
|
# - https://www.manning.com/books/build-a-large-language-model-from-scratch
|
|
|
|
# Code: https://github.com/rasbt/LLMs-from-scratch
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
import sys
|
|
|
|
|
|
|
|
import tiktoken
|
|
|
|
import torch
|
|
|
|
import chainlit
|
|
|
|
|
2025-03-23 19:28:49 -05:00
|
|
|
# For llms_from_scratch installation instructions, see:
|
|
|
|
# https://github.com/rasbt/LLMs-from-scratch/tree/main/pkg
|
|
|
|
from llms_from_scratch.ch04 import GPTModel
|
|
|
|
from llms_from_scratch.ch05 import (
|
2024-09-17 08:26:44 -05:00
|
|
|
generate,
|
|
|
|
text_to_token_ids,
|
|
|
|
token_ids_to_text,
|
|
|
|
)
|
|
|
|
|
2025-03-23 19:28:49 -05:00
|
|
|
|
2024-09-18 17:08:50 +02:00
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
2024-09-17 08:26:44 -05:00
|
|
|
|
|
|
|
def get_model_and_tokenizer():
|
|
|
|
"""
|
2024-09-21 18:33:00 -07:00
|
|
|
Code to load a GPT-2 model with pretrained weights generated in chapter 5.
|
2024-09-17 08:26:44 -05:00
|
|
|
This requires that you run the code in chapter 5 first, which generates the necessary model.pth file.
|
|
|
|
"""
|
|
|
|
|
|
|
|
GPT_CONFIG_124M = {
|
|
|
|
"vocab_size": 50257, # Vocabulary size
|
|
|
|
"context_length": 256, # Shortened context length (orig: 1024)
|
|
|
|
"emb_dim": 768, # Embedding dimension
|
|
|
|
"n_heads": 12, # Number of attention heads
|
|
|
|
"n_layers": 12, # Number of layers
|
|
|
|
"drop_rate": 0.1, # Dropout rate
|
|
|
|
"qkv_bias": False # Query-key-value bias
|
|
|
|
}
|
|
|
|
|
|
|
|
tokenizer = tiktoken.get_encoding("gpt2")
|
|
|
|
|
|
|
|
model_path = Path("..") / "01_main-chapter-code" / "model.pth"
|
|
|
|
if not model_path.exists():
|
|
|
|
print(f"Could not find the {model_path} file. Please run the chapter 5 code (ch05.ipynb) to generate the model.pth file.")
|
|
|
|
sys.exit()
|
|
|
|
|
2024-09-18 17:08:50 +02:00
|
|
|
checkpoint = torch.load(model_path, weights_only=True)
|
2024-09-17 08:26:44 -05:00
|
|
|
model = GPTModel(GPT_CONFIG_124M)
|
|
|
|
model.load_state_dict(checkpoint)
|
|
|
|
model.to(device)
|
|
|
|
|
|
|
|
return tokenizer, model, GPT_CONFIG_124M
|
|
|
|
|
|
|
|
|
|
|
|
# Obtain the necessary tokenizer and model files for the chainlit function below
|
|
|
|
tokenizer, model, model_config = get_model_and_tokenizer()
|
|
|
|
|
|
|
|
|
|
|
|
@chainlit.on_message
|
|
|
|
async def main(message: chainlit.Message):
|
|
|
|
"""
|
|
|
|
The main Chainlit function.
|
|
|
|
"""
|
2024-09-18 17:08:50 +02:00
|
|
|
token_ids = generate( # function uses `with torch.no_grad()` internally already
|
2024-09-17 08:26:44 -05:00
|
|
|
model=model,
|
2024-09-18 17:08:50 +02:00
|
|
|
idx=text_to_token_ids(message.content, tokenizer).to(device), # The user text is provided via as `message.content`
|
2024-09-17 08:26:44 -05:00
|
|
|
max_new_tokens=50,
|
|
|
|
context_size=model_config["context_length"],
|
|
|
|
top_k=1,
|
|
|
|
temperature=0.0
|
|
|
|
)
|
|
|
|
|
|
|
|
text = token_ids_to_text(token_ids, tokenizer)
|
|
|
|
|
|
|
|
await chainlit.Message(
|
|
|
|
content=f"{text}", # This returns the model response to the interface
|
|
|
|
).send()
|