diff --git a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb index 82f5cde..918ce00 100644 --- a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb +++ b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb @@ -239,7 +239,7 @@ " self.head_dim = d_out // num_heads\n", "\n", " self.qkv = nn.Linear(d_in, 3 * d_out, bias=qkv_bias)\n", - " self.proj = nn.Linear(d_in, d_out)\n", + " self.proj = nn.Linear(d_out, d_out)\n", " self.dropout = nn.Dropout(dropout)\n", "\n", " self.register_buffer(\n",