LLMs-from-scratch/pkg/llms_from_scratch/tests/test_ch04.py

# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
#   - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch

from llms_from_scratch.ch04 import GPTModel
from llms_from_scratch.ch04 import generate_text_simple

import torch
import tiktoken


def test_GPTModel():
    GPT_CONFIG_124M = {
        "vocab_size": 50257,     # Vocabulary size
        "context_length": 1024,  # Context length
        "emb_dim": 768,          # Embedding dimension
        "n_heads": 12,           # Number of attention heads
        "n_layers": 12,          # Number of layers
        "drop_rate": 0.1,        # Dropout rate
        "qkv_bias": False        # Query-Key-Value bias
    }

    torch.manual_seed(123)
    model = GPTModel(GPT_CONFIG_124M)
    model.eval()  # disable dropout

    start_context = "Hello, I am"

    tokenizer = tiktoken.get_encoding("gpt2")
    encoded = tokenizer.encode(start_context)
    encoded_tensor = torch.tensor(encoded).unsqueeze(0)

    print(f"\n{50*'='}\n{22*' '}IN\n{50*'='}")
    print("\nInput text:", start_context)
    print("Encoded input text:", encoded)
    print("encoded_tensor.shape:", encoded_tensor.shape)

    out = generate_text_simple(
        model=model,
        idx=encoded_tensor,
        max_new_tokens=10,
        context_size=GPT_CONFIG_124M["context_length"]
    )

    expect = torch.tensor([
        [15496,   11,   314,   716, 27018, 24086, 47843, 30961, 42348,  7267,
         49706, 43231, 47062, 34657]
    ])
    torch.equal(expect, out)
Add PyPI package (#576) * Add PyPI package * fixes * fixes 2025-03-23 19:28:49 -05:00			`# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).`
			`# Source for "Build a Large Language Model From Scratch"`
			`# - https://www.manning.com/books/build-a-large-language-model-from-scratch`
			`# Code: https://github.com/rasbt/LLMs-from-scratch`

			`from llms_from_scratch.ch04 import GPTModel`
			`from llms_from_scratch.ch04 import generate_text_simple`

			`import torch`
			`import tiktoken`


			`def test_GPTModel():`
			`GPT_CONFIG_124M = {`
			`"vocab_size": 50257, # Vocabulary size`
			`"context_length": 1024, # Context length`
			`"emb_dim": 768, # Embedding dimension`
			`"n_heads": 12, # Number of attention heads`
			`"n_layers": 12, # Number of layers`
			`"drop_rate": 0.1, # Dropout rate`
			`"qkv_bias": False # Query-Key-Value bias`
			`}`

			`torch.manual_seed(123)`
			`model = GPTModel(GPT_CONFIG_124M)`
			`model.eval() # disable dropout`

			`start_context = "Hello, I am"`

			`tokenizer = tiktoken.get_encoding("gpt2")`
			`encoded = tokenizer.encode(start_context)`
			`encoded_tensor = torch.tensor(encoded).unsqueeze(0)`

			`print(f"\n{50'='}\n{22' '}IN\n{50*'='}")`
			`print("\nInput text:", start_context)`
			`print("Encoded input text:", encoded)`
			`print("encoded_tensor.shape:", encoded_tensor.shape)`

			`out = generate_text_simple(`
			`model=model,`
			`idx=encoded_tensor,`
			`max_new_tokens=10,`
			`context_size=GPT_CONFIG_124M["context_length"]`
			`)`

			`expect = torch.tensor([`
			`[15496, 11, 314, 716, 27018, 24086, 47843, 30961, 42348, 7267,`
			`49706, 43231, 47062, 34657]`
			`])`
			`torch.equal(expect, out)`