diff --git a/ch03/01_main-chapter-code/multihead-attention.ipynb b/ch03/01_main-chapter-code/multihead-attention.ipynb index 1748607..b788040 100644 --- a/ch03/01_main-chapter-code/multihead-attention.ipynb +++ b/ch03/01_main-chapter-code/multihead-attention.ipynb @@ -38,7 +38,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "torch version: 2.2.1\n" + "torch version: 2.2.2\n" ] } ], @@ -365,6 +365,14 @@ "\n", "print(\"context_vecs.shape:\", context_vecs.shape)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1d965a5-9b98-4554-8646-7ecd497874cb", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb index ce5a33e..82f5cde 100644 --- a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb +++ b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb @@ -341,7 +341,7 @@ " self.d_out = d_out\n", "\n", " self.qkv = nn.Linear(d_in, 3 * d_out, bias=qkv_bias)\n", - " self.proj = nn.Linear(d_in, d_out)\n", + " self.proj = nn.Linear(d_out, d_out)\n", " self.dropout = dropout\n", "\n", " def forward(self, x):\n",