diff --git a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb index 15f83d7..077cc1d 100644 --- a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb +++ b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb @@ -352,7 +352,7 @@ " def __init__(self, d_in, d_out, num_heads, context_length, dropout=0.0, qkv_bias=False):\n", " super().__init__()\n", "\n", - " assert d_out % num_heads == 0, \"embed_dim is indivisible by num_heads\"\n", + " assert d_out % num_heads == 0, \"d_out is indivisible by num_heads\"\n", "\n", " self.num_heads = num_heads\n", " self.context_length = context_length\n", @@ -588,7 +588,7 @@ " def __init__(self, d_in, d_out, num_heads, context_length, dropout=0.0, qkv_bias=False):\n", " super().__init__()\n", "\n", - " assert d_out % num_heads == 0, \"embed_dim is indivisible by num_heads\"\n", + " assert d_out % num_heads == 0, \"d_out is indivisible by num_heads\"\n", "\n", " self.num_heads = num_heads\n", " self.context_length = context_length\n", diff --git a/ch05/10_llm-training-speed/01_opt_single_gpu.py b/ch05/10_llm-training-speed/01_opt_single_gpu.py index 891e424..155b57e 100644 --- a/ch05/10_llm-training-speed/01_opt_single_gpu.py +++ b/ch05/10_llm-training-speed/01_opt_single_gpu.py @@ -65,7 +65,7 @@ class PyTorchMultiHeadAttention(nn.Module): def __init__(self, d_in, d_out, num_heads, dropout=0.0, qkv_bias=False): super().__init__() - assert d_out % num_heads == 0, "embed_dim is indivisible by num_heads" + assert d_out % num_heads == 0, "d_out is indivisible by num_heads" self.num_heads = num_heads self.head_dim = d_out // num_heads diff --git a/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py b/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py index e9b9767..29db397 100644 --- a/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py +++ b/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py @@ -108,7 +108,7 @@ class PyTorchMultiHeadAttention(nn.Module): def __init__(self, d_in, d_out, num_heads, dropout=0.0, qkv_bias=False): super().__init__() - assert d_out % num_heads == 0, "embed_dim is indivisible by num_heads" + assert d_out % num_heads == 0, "d_out is indivisible by num_heads" self.num_heads = num_heads self.head_dim = d_out // num_heads