From 984cca3f64b65fc5b5cc5d6268d547cd1b13b51d Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Sun, 22 Jun 2025 16:36:39 -0500 Subject: [PATCH] Fix code comment: embed_dim -> d_out (#698) --- .../mha-implementations.ipynb | 4 ++-- ch05/10_llm-training-speed/01_opt_single_gpu.py | 2 +- ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb index 15f83d7..077cc1d 100644 --- a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb +++ b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb @@ -352,7 +352,7 @@ " def __init__(self, d_in, d_out, num_heads, context_length, dropout=0.0, qkv_bias=False):\n", " super().__init__()\n", "\n", - " assert d_out % num_heads == 0, \"embed_dim is indivisible by num_heads\"\n", + " assert d_out % num_heads == 0, \"d_out is indivisible by num_heads\"\n", "\n", " self.num_heads = num_heads\n", " self.context_length = context_length\n", @@ -588,7 +588,7 @@ " def __init__(self, d_in, d_out, num_heads, context_length, dropout=0.0, qkv_bias=False):\n", " super().__init__()\n", "\n", - " assert d_out % num_heads == 0, \"embed_dim is indivisible by num_heads\"\n", + " assert d_out % num_heads == 0, \"d_out is indivisible by num_heads\"\n", "\n", " self.num_heads = num_heads\n", " self.context_length = context_length\n", diff --git a/ch05/10_llm-training-speed/01_opt_single_gpu.py b/ch05/10_llm-training-speed/01_opt_single_gpu.py index 891e424..155b57e 100644 --- a/ch05/10_llm-training-speed/01_opt_single_gpu.py +++ b/ch05/10_llm-training-speed/01_opt_single_gpu.py @@ -65,7 +65,7 @@ class PyTorchMultiHeadAttention(nn.Module): def __init__(self, d_in, d_out, num_heads, dropout=0.0, qkv_bias=False): super().__init__() - assert d_out % num_heads == 0, "embed_dim is indivisible by num_heads" + assert d_out % num_heads == 0, "d_out is indivisible by num_heads" self.num_heads = num_heads self.head_dim = d_out // num_heads diff --git a/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py b/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py index e9b9767..29db397 100644 --- a/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py +++ b/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py @@ -108,7 +108,7 @@ class PyTorchMultiHeadAttention(nn.Module): def __init__(self, d_in, d_out, num_heads, dropout=0.0, qkv_bias=False): super().__init__() - assert d_out % num_heads == 0, "embed_dim is indivisible by num_heads" + assert d_out % num_heads == 0, "d_out is indivisible by num_heads" self.num_heads = num_heads self.head_dim = d_out // num_heads