From 4aa398c79d09a7bf6a8a50af6bda470cbb58f725 Mon Sep 17 00:00:00 2001 From: rasbt Date: Wed, 23 Jul 2025 08:16:30 -0500 Subject: [PATCH] Comment typo: head_dim -> head_dim // 2 --- ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb | 2 +- ch05/07_gpt_to_llama/standalone-llama32.ipynb | 2 +- ch05/11_qwen3/standalone-qwen3.ipynb | 4 ++-- pkg/llms_from_scratch/kv_cache/llama3.py | 2 +- pkg/llms_from_scratch/kv_cache/qwen3.py | 2 +- pkg/llms_from_scratch/llama3.py | 2 +- pkg/llms_from_scratch/qwen3.py | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb b/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb index 0feaced..ae4a9ef 100644 --- a/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb +++ b/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb @@ -453,7 +453,7 @@ " x2 = x[..., head_dim // 2 :] # Second half\n", "\n", " # Adjust sin and cos shapes\n", - " cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim)\n", + " cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim // 2)\n", " sin = sin[:seq_len, :].unsqueeze(0).unsqueeze(0)\n", "\n", " # Apply the rotary transformation\n", diff --git a/ch05/07_gpt_to_llama/standalone-llama32.ipynb b/ch05/07_gpt_to_llama/standalone-llama32.ipynb index afb27c2..6f7ca60 100644 --- a/ch05/07_gpt_to_llama/standalone-llama32.ipynb +++ b/ch05/07_gpt_to_llama/standalone-llama32.ipynb @@ -202,7 +202,7 @@ " x2 = x[..., head_dim // 2 :] # Second half\n", "\n", " # Adjust sin and cos shapes\n", - " cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim)\n", + " cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim // 2)\n", " sin = sin[:seq_len, :].unsqueeze(0).unsqueeze(0)\n", "\n", " # Apply the rotary transformation\n", diff --git a/ch05/11_qwen3/standalone-qwen3.ipynb b/ch05/11_qwen3/standalone-qwen3.ipynb index 9dcff2f..a6126d5 100644 --- a/ch05/11_qwen3/standalone-qwen3.ipynb +++ b/ch05/11_qwen3/standalone-qwen3.ipynb @@ -226,7 +226,7 @@ " x2 = x[..., head_dim // 2 :] # Second half\n", "\n", " # Adjust sin and cos shapes\n", - " cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim)\n", + " cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim // 2)\n", " sin = sin[:seq_len, :].unsqueeze(0).unsqueeze(0)\n", "\n", " # Apply the rotary transformation\n", @@ -1201,7 +1201,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.10.16" } }, "nbformat": 4, diff --git a/pkg/llms_from_scratch/kv_cache/llama3.py b/pkg/llms_from_scratch/kv_cache/llama3.py index 70258d0..74cabdd 100644 --- a/pkg/llms_from_scratch/kv_cache/llama3.py +++ b/pkg/llms_from_scratch/kv_cache/llama3.py @@ -292,7 +292,7 @@ def apply_rope(x, cos, sin, offset=0): x2 = x[..., head_dim // 2:] # Second half # Adjust sin and cos shapes - cos = cos[offset:offset + seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim) + cos = cos[offset:offset + seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim // 2) sin = sin[offset:offset + seq_len, :].unsqueeze(0).unsqueeze(0) # Apply the rotary transformation diff --git a/pkg/llms_from_scratch/kv_cache/qwen3.py b/pkg/llms_from_scratch/kv_cache/qwen3.py index cb60112..4d842d9 100644 --- a/pkg/llms_from_scratch/kv_cache/qwen3.py +++ b/pkg/llms_from_scratch/kv_cache/qwen3.py @@ -236,7 +236,7 @@ def apply_rope(x, cos, sin, offset=0): x2 = x[..., head_dim // 2:] # Second half # Adjust sin and cos shapes - cos = cos[offset:offset + seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim) + cos = cos[offset:offset + seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim // 2) sin = sin[offset:offset + seq_len, :].unsqueeze(0).unsqueeze(0) # Apply the rotary transformation diff --git a/pkg/llms_from_scratch/llama3.py b/pkg/llms_from_scratch/llama3.py index ddd4cde..585c174 100644 --- a/pkg/llms_from_scratch/llama3.py +++ b/pkg/llms_from_scratch/llama3.py @@ -260,7 +260,7 @@ def apply_rope(x, cos, sin): x2 = x[..., head_dim // 2:] # Second half # Adjust sin and cos shapes - cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim) + cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim // 2) sin = sin[:seq_len, :].unsqueeze(0).unsqueeze(0) # Apply the rotary transformation diff --git a/pkg/llms_from_scratch/qwen3.py b/pkg/llms_from_scratch/qwen3.py index 33cf047..71a1e3b 100644 --- a/pkg/llms_from_scratch/qwen3.py +++ b/pkg/llms_from_scratch/qwen3.py @@ -288,7 +288,7 @@ def apply_rope(x, cos, sin): x2 = x[..., head_dim // 2:] # Second half # Adjust sin and cos shapes - cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim) + cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0) # Shape: (1, 1, seq_len, head_dim // 2) sin = sin[:seq_len, :].unsqueeze(0).unsqueeze(0) # Apply the rotary transformation