mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-09-04 13:58:25 +00:00
Use dim=-1 for consistency (#122)
This commit is contained in:
parent
49f01d06d0
commit
b5878a80ff
@ -37,7 +37,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"torch version: 2.2.1\n"
|
"torch version: 2.2.2\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -625,7 +625,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"attn_weights = torch.softmax(attn_scores, dim=1)\n",
|
"attn_weights = torch.softmax(attn_scores, dim=-1)\n",
|
||||||
"print(attn_weights)"
|
"print(attn_weights)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -656,7 +656,7 @@
|
|||||||
"row_2_sum = sum([0.1385, 0.2379, 0.2333, 0.1240, 0.1082, 0.1581])\n",
|
"row_2_sum = sum([0.1385, 0.2379, 0.2333, 0.1240, 0.1082, 0.1581])\n",
|
||||||
"print(\"Row 2 sum:\", row_2_sum)\n",
|
"print(\"Row 2 sum:\", row_2_sum)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"All row sums:\", attn_weights.sum(dim=1))"
|
"print(\"All row sums:\", attn_weights.sum(dim=-1))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -1139,7 +1139,7 @@
|
|||||||
" values = self.W_value(x)\n",
|
" values = self.W_value(x)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" attn_scores = queries @ keys.T\n",
|
" attn_scores = queries @ keys.T\n",
|
||||||
" attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=1)\n",
|
" attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" context_vec = attn_weights @ values\n",
|
" context_vec = attn_weights @ values\n",
|
||||||
" return context_vec\n",
|
" return context_vec\n",
|
||||||
@ -1243,7 +1243,7 @@
|
|||||||
"keys = sa_v2.W_key(inputs) \n",
|
"keys = sa_v2.W_key(inputs) \n",
|
||||||
"attn_scores = queries @ keys.T\n",
|
"attn_scores = queries @ keys.T\n",
|
||||||
"\n",
|
"\n",
|
||||||
"attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=1)\n",
|
"attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n",
|
||||||
"print(attn_weights)"
|
"print(attn_weights)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -1429,7 +1429,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"attn_weights = torch.softmax(masked / keys.shape[-1]**0.5, dim=1)\n",
|
"attn_weights = torch.softmax(masked / keys.shape[-1]**0.5, dim=-1)\n",
|
||||||
"print(attn_weights)"
|
"print(attn_weights)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -1765,7 +1765,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 42,
|
"execution_count": 36,
|
||||||
"id": "110b0188-6e9e-4e56-a988-10523c6c8538",
|
"id": "110b0188-6e9e-4e56-a988-10523c6c8538",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -1894,7 +1894,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 38,
|
"execution_count": 37,
|
||||||
"id": "e8cfc1ae-78ab-4faa-bc73-98bd054806c9",
|
"id": "e8cfc1ae-78ab-4faa-bc73-98bd054806c9",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -1937,7 +1937,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 39,
|
"execution_count": 38,
|
||||||
"id": "053760f1-1a02-42f0-b3bf-3d939e407039",
|
"id": "053760f1-1a02-42f0-b3bf-3d939e407039",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -2000,7 +2000,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.6"
|
"version": "3.11.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user