LLMs-from-scratch/ch05/07_gpt_to_llama/tests/tests.py

# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
#   - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch

# File for internal use (unit tests)

import io
import os
import sys
import types
import nbformat
import torch
import pytest
from transformers.models.llama.modeling_llama import LlamaRotaryEmbedding, apply_rotary_pos_emb


@pytest.fixture(scope="module")
def notebook():
    def import_definitions_from_notebook(notebooks):
        imported_modules = {}

        for fullname, names in notebooks.items():
            # Get the directory of the current test file
            current_dir = os.path.dirname(__file__)
            path = os.path.join(current_dir, "..", fullname + ".ipynb")
            path = os.path.normpath(path)

            # Load the notebook
            if not os.path.exists(path):
                raise FileNotFoundError(f"Notebook file not found at: {path}")

            with io.open(path, "r", encoding="utf-8") as f:
                nb = nbformat.read(f, as_version=4)

            # Create a module to store the imported functions and classes
            mod = types.ModuleType(fullname)
            sys.modules[fullname] = mod

            # Go through the notebook cells and only execute function or class definitions
            for cell in nb.cells:
                if cell.cell_type == "code":
                    cell_code = cell.source
                    for name in names:
                        # Check for function or class definitions
                        if f"def {name}" in cell_code or f"class {name}" in cell_code:
                            exec(cell_code, mod.__dict__)

            imported_modules[fullname] = mod

        return imported_modules

    notebooks = {
        "converting-gpt-to-llama2": ["SiLU", "RMSNorm", "precompute_rope_params", "compute_rope"],
        "converting-llama2-to-llama3": ["precompute_rope_params"]
    }

    return import_definitions_from_notebook(notebooks)


@pytest.fixture(autouse=True)
def set_seed():
    torch.manual_seed(123)


def test_rope_llama2(notebook):

    this_nb = notebook["converting-gpt-to-llama2"]

    # Settings
    batch_size = 1
    context_len = 4096
    num_heads = 4
    head_dim = 16

    # Instantiate RoPE parameters
    cos, sin = this_nb.precompute_rope_params(head_dim=head_dim, context_length=context_len)

    # Dummy query and key tensors
    queries = torch.randn(batch_size, num_heads, context_len, head_dim)
    keys = torch.randn(batch_size, num_heads, context_len, head_dim)

    # Apply rotary position embeddings
    queries_rot = this_nb.compute_rope(queries, cos, sin)
    keys_rot = this_nb.compute_rope(keys, cos, sin)

    rot_emb = LlamaRotaryEmbedding(
        dim=head_dim,
        max_position_embeddings=context_len,
        base=10_000
    )

    position_ids = torch.arange(context_len, dtype=torch.long).unsqueeze(0)
    ref_cos, ref_sin = rot_emb(queries, position_ids)
    ref_queries_rot, ref_keys_rot = apply_rotary_pos_emb(queries, keys, ref_cos, ref_sin)

    torch.testing.assert_close(sin, ref_sin.squeeze(0))
    torch.testing.assert_close(cos, ref_cos.squeeze(0))
    torch.testing.assert_close(keys_rot, ref_keys_rot)
    torch.testing.assert_close(queries_rot, ref_queries_rot)


def test_rope_llama3(notebook):

    nb1 = notebook["converting-gpt-to-llama2"]
    nb2 = notebook["converting-llama2-to-llama3"]

    # Settings
    batch_size = 1
    context_len = 8192
    num_heads = 4
    head_dim = 16
    theta_base = 500_000

    # Instantiate RoPE parameters
    cos, sin = nb2.precompute_rope_params(
        head_dim=head_dim,
        context_length=context_len,
        theta_base=theta_base
    )

    # Dummy query and key tensors
    torch.manual_seed(123)
    queries = torch.randn(batch_size, num_heads, context_len, head_dim)
    keys = torch.randn(batch_size, num_heads, context_len, head_dim)

    # Apply rotary position embeddings
    queries_rot = nb1.compute_rope(queries, cos, sin)
    keys_rot = nb1.compute_rope(keys, cos, sin)

    rot_emb = LlamaRotaryEmbedding(
        dim=head_dim,
        max_position_embeddings=context_len,
        base=theta_base
    )

    position_ids = torch.arange(context_len, dtype=torch.long).unsqueeze(0)
    ref_cos, ref_sin = rot_emb(queries, position_ids)
    ref_queries_rot, ref_keys_rot = apply_rotary_pos_emb(queries, keys, ref_cos, ref_sin)

    torch.testing.assert_close(sin, ref_sin.squeeze(0))
    torch.testing.assert_close(cos, ref_cos.squeeze(0))
    torch.testing.assert_close(keys_rot, ref_keys_rot)
    torch.testing.assert_close(queries_rot, ref_queries_rot)


def test_rope_llama3_12(notebook):

    nb1 = notebook["converting-gpt-to-llama2"]
    nb2 = notebook["converting-llama2-to-llama3"]

    # Settings
    batch_size = 1
    context_len = 8192
    num_heads = 4
    head_dim = 16
    rope_theta = 500_000

    rope_config = {
        "factor": 8.0,
        "low_freq_factor": 1.0,
        "high_freq_factor": 4.0,
        "original_context_length": 8192,
    }

    # Instantiate RoPE parameters
    cos, sin = nb2.precompute_rope_params(
        head_dim=head_dim,
        theta_base=rope_theta,
        context_length=context_len,
        freq_config=rope_config,
    )

    # Dummy query and key tensors
    torch.manual_seed(123)
    queries = torch.randn(batch_size, num_heads, context_len, head_dim)
    keys = torch.randn(batch_size, num_heads, context_len, head_dim)

    # Apply rotary position embeddings
    queries_rot = nb1.compute_rope(queries, cos, sin)
    keys_rot = nb1.compute_rope(keys, cos, sin)

    hf_rope_params = {
        "factor": 8.0,
        "low_freq_factor": 1.0,
        "high_freq_factor": 4.0,
        "original_max_position_embeddings": 8192,
        "rope_type": "llama3"
    }

    class RoPEConfig:
        rope_type = "llama3"
        rope_scaling = hf_rope_params
        factor = 1.0
        dim: int = head_dim
        rope_theta = 500_000
        max_position_embeddings: int = 8192
        hidden_size = head_dim * num_heads
        num_attention_heads = num_heads

    config = RoPEConfig()

    rot_emb = LlamaRotaryEmbedding(config=config)
    position_ids = torch.arange(context_len, dtype=torch.long).unsqueeze(0)
    ref_cos, ref_sin = rot_emb(queries, position_ids)
    ref_queries_rot, ref_keys_rot = apply_rotary_pos_emb(queries, keys, ref_cos, ref_sin)

    torch.testing.assert_close(sin, ref_sin.squeeze(0))
    torch.testing.assert_close(cos, ref_cos.squeeze(0))
    torch.testing.assert_close(keys_rot, ref_keys_rot)
    torch.testing.assert_close(queries_rot, ref_queries_rot)


def test_silu(notebook):
    example_batch = torch.randn(2, 3, 4)
    silu = notebook["converting-gpt-to-llama2"].SiLU()
    assert torch.allclose(silu(example_batch), torch.nn.functional.silu(example_batch))


@pytest.mark.skipif(torch.__version__ < "2.4", reason="Requires PyTorch 2.4 or newer")
def test_rmsnorm(notebook):
    example_batch = torch.randn(2, 3, 4)
    rms_norm = notebook["converting-gpt-to-llama2"].RMSNorm(emb_dim=example_batch.shape[-1], eps=1e-5)
    rmsnorm_pytorch = torch.nn.RMSNorm(example_batch.shape[-1], eps=1e-5)

    assert torch.allclose(rms_norm(example_batch), rmsnorm_pytorch(example_batch))
Update tests.py 2024-10-23 07:48:33 -05:00			`# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).`
			`# Source for "Build a Large Language Model From Scratch"`
			`# - https://www.manning.com/books/build-a-large-language-model-from-scratch`
			`# Code: https://github.com/rasbt/LLMs-from-scratch`

			`# File for internal use (unit tests)`

Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00			`import io`
			`import os`
			`import sys`
			`import types`
			`import nbformat`
			`import torch`
			`import pytest`
			`from transformers.models.llama.modeling_llama import LlamaRotaryEmbedding, apply_rotary_pos_emb`


			`@pytest.fixture(scope="module")`
			`def notebook():`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`def import_definitions_from_notebook(notebooks):`
			`imported_modules = {}`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`for fullname, names in notebooks.items():`
			`# Get the directory of the current test file`
			`current_dir = os.path.dirname(__file__)`
			`path = os.path.join(current_dir, "..", fullname + ".ipynb")`
			`path = os.path.normpath(path)`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`# Load the notebook`
			`if not os.path.exists(path):`
			`raise FileNotFoundError(f"Notebook file not found at: {path}")`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`with io.open(path, "r", encoding="utf-8") as f:`
			`nb = nbformat.read(f, as_version=4)`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`# Create a module to store the imported functions and classes`
			`mod = types.ModuleType(fullname)`
			`sys.modules[fullname] = mod`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`# Go through the notebook cells and only execute function or class definitions`
			`for cell in nb.cells:`
			`if cell.cell_type == "code":`
			`cell_code = cell.source`
			`for name in names:`
			`# Check for function or class definitions`
			`if f"def {name}" in cell_code or f"class {name}" in cell_code:`
			`exec(cell_code, mod.__dict__)`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`imported_modules[fullname] = mod`

			`return imported_modules`

			`notebooks = {`
			`"converting-gpt-to-llama2": ["SiLU", "RMSNorm", "precompute_rope_params", "compute_rope"],`
			`"converting-llama2-to-llama3": ["precompute_rope_params"]`
			`}`

			`return import_definitions_from_notebook(notebooks)`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00

			`@pytest.fixture(autouse=True)`
			`def set_seed():`
			`torch.manual_seed(123)`


Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00			`def test_rope_llama2(notebook):`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00
			`this_nb = notebook["converting-gpt-to-llama2"]`

Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00			`# Settings`
			`batch_size = 1`
Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00			`context_len = 4096`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00			`num_heads = 4`
			`head_dim = 16`

			`# Instantiate RoPE parameters`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`cos, sin = this_nb.precompute_rope_params(head_dim=head_dim, context_length=context_len)`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
			`# Dummy query and key tensors`
			`queries = torch.randn(batch_size, num_heads, context_len, head_dim)`
			`keys = torch.randn(batch_size, num_heads, context_len, head_dim)`

			`# Apply rotary position embeddings`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`queries_rot = this_nb.compute_rope(queries, cos, sin)`
			`keys_rot = this_nb.compute_rope(keys, cos, sin)`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00			`rot_emb = LlamaRotaryEmbedding(`
			`dim=head_dim,`
			`max_position_embeddings=context_len,`
			`base=10_000`
			`)`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00			`position_ids = torch.arange(context_len, dtype=torch.long).unsqueeze(0)`
			`ref_cos, ref_sin = rot_emb(queries, position_ids)`
			`ref_queries_rot, ref_keys_rot = apply_rotary_pos_emb(queries, keys, ref_cos, ref_sin)`

			`torch.testing.assert_close(sin, ref_sin.squeeze(0))`
			`torch.testing.assert_close(cos, ref_cos.squeeze(0))`
			`torch.testing.assert_close(keys_rot, ref_keys_rot)`
			`torch.testing.assert_close(queries_rot, ref_queries_rot)`


			`def test_rope_llama3(notebook):`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00
			`nb1 = notebook["converting-gpt-to-llama2"]`
			`nb2 = notebook["converting-llama2-to-llama3"]`

Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00			`# Settings`
			`batch_size = 1`
			`context_len = 8192`
			`num_heads = 4`
			`head_dim = 16`
RoPE increase (#407) 2024-10-21 19:58:38 -05:00			`theta_base = 500_000`
Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00
			`# Instantiate RoPE parameters`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`cos, sin = nb2.precompute_rope_params(`
Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00			`head_dim=head_dim,`
			`context_length=context_len,`
			`theta_base=theta_base`
			`)`

			`# Dummy query and key tensors`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`torch.manual_seed(123)`
Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00			`queries = torch.randn(batch_size, num_heads, context_len, head_dim)`
			`keys = torch.randn(batch_size, num_heads, context_len, head_dim)`

			`# Apply rotary position embeddings`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`queries_rot = nb1.compute_rope(queries, cos, sin)`
			`keys_rot = nb1.compute_rope(keys, cos, sin)`
Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00
			`rot_emb = LlamaRotaryEmbedding(`
			`dim=head_dim,`
			`max_position_embeddings=context_len,`
			`base=theta_base`
			`)`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
			`position_ids = torch.arange(context_len, dtype=torch.long).unsqueeze(0)`
			`ref_cos, ref_sin = rot_emb(queries, position_ids)`
			`ref_queries_rot, ref_keys_rot = apply_rotary_pos_emb(queries, keys, ref_cos, ref_sin)`

			`torch.testing.assert_close(sin, ref_sin.squeeze(0))`
			`torch.testing.assert_close(cos, ref_cos.squeeze(0))`
			`torch.testing.assert_close(keys_rot, ref_keys_rot)`
			`torch.testing.assert_close(queries_rot, ref_queries_rot)`


Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`def test_rope_llama3_12(notebook):`

			`nb1 = notebook["converting-gpt-to-llama2"]`
			`nb2 = notebook["converting-llama2-to-llama3"]`

			`# Settings`
			`batch_size = 1`
			`context_len = 8192`
			`num_heads = 4`
			`head_dim = 16`
RoPE increase (#407) 2024-10-21 19:58:38 -05:00			`rope_theta = 500_000`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00
			`rope_config = {`
			`"factor": 8.0,`
			`"low_freq_factor": 1.0,`
			`"high_freq_factor": 4.0,`
			`"original_context_length": 8192,`
			`}`

			`# Instantiate RoPE parameters`
			`cos, sin = nb2.precompute_rope_params(`
			`head_dim=head_dim,`
			`theta_base=rope_theta,`
			`context_length=context_len,`
			`freq_config=rope_config,`
			`)`

			`# Dummy query and key tensors`
			`torch.manual_seed(123)`
			`queries = torch.randn(batch_size, num_heads, context_len, head_dim)`
			`keys = torch.randn(batch_size, num_heads, context_len, head_dim)`

			`# Apply rotary position embeddings`
			`queries_rot = nb1.compute_rope(queries, cos, sin)`
			`keys_rot = nb1.compute_rope(keys, cos, sin)`

			`hf_rope_params = {`
			`"factor": 8.0,`
			`"low_freq_factor": 1.0,`
			`"high_freq_factor": 4.0,`
			`"original_max_position_embeddings": 8192,`
			`"rope_type": "llama3"`
			`}`

			`class RoPEConfig:`
			`rope_type = "llama3"`
			`rope_scaling = hf_rope_params`
			`factor = 1.0`
			`dim: int = head_dim`
RoPE increase (#407) 2024-10-21 19:58:38 -05:00			`rope_theta = 500_000`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`max_position_embeddings: int = 8192`
			`hidden_size = head_dim * num_heads`
			`num_attention_heads = num_heads`

			`config = RoPEConfig()`

			`rot_emb = LlamaRotaryEmbedding(config=config)`
			`position_ids = torch.arange(context_len, dtype=torch.long).unsqueeze(0)`
			`ref_cos, ref_sin = rot_emb(queries, position_ids)`
			`ref_queries_rot, ref_keys_rot = apply_rotary_pos_emb(queries, keys, ref_cos, ref_sin)`

			`torch.testing.assert_close(sin, ref_sin.squeeze(0))`
			`torch.testing.assert_close(cos, ref_cos.squeeze(0))`
			`torch.testing.assert_close(keys_rot, ref_keys_rot)`
			`torch.testing.assert_close(queries_rot, ref_queries_rot)`


Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00			`def test_silu(notebook):`
			`example_batch = torch.randn(2, 3, 4)`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`silu = notebook["converting-gpt-to-llama2"].SiLU()`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00			`assert torch.allclose(silu(example_batch), torch.nn.functional.silu(example_batch))`


			`@pytest.mark.skipif(torch.__version__ < "2.4", reason="Requires PyTorch 2.4 or newer")`
			`def test_rmsnorm(notebook):`
			`example_batch = torch.randn(2, 3, 4)`
Add Llama 3.2 RoPE to CI (#391) * add Llama 3.2 RoPE to CI * update 2024-10-08 08:28:34 -05:00			`rms_norm = notebook["converting-gpt-to-llama2"].RMSNorm(emb_dim=example_batch.shape[-1], eps=1e-5)`
Improve rope settings for llama3 (#380) 2024-10-03 08:29:54 -05:00			`rmsnorm_pytorch = torch.nn.RMSNorm(example_batch.shape[-1], eps=1e-5)`
Add llama2 unit tests (#372) * add llama2 unit tests * update * updates * updates * update file path * update requirements file * rmsnorm test * update 2024-09-25 19:40:36 -05:00
			`assert torch.allclose(rms_norm(example_batch), rmsnorm_pytorch(example_batch))`