mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-12-27 07:02:08 +00:00
use block size variable in positional embedding layer
This commit is contained in:
parent
10aa40ba6a
commit
4f161bd549
@ -1593,7 +1593,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "0ed4b7db-3b47-4fd3-a4a6-5f4ed5dd166e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -74,8 +74,11 @@
|
||||
"\n",
|
||||
"vocab_size = 50257\n",
|
||||
"output_dim = 256\n",
|
||||
"block_size = 1024\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)\n",
|
||||
"pos_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)\n",
|
||||
"pos_embedding_layer = torch.nn.Embedding(block_size, output_dim)\n",
|
||||
"\n",
|
||||
"max_length = 4\n",
|
||||
"dataloader = create_dataloader(raw_text, batch_size=8, max_length=max_length, stride=5)"
|
||||
@ -83,7 +86,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "664397bc-6daa-4b88-90aa-e8fc1fbd5846",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -101,7 +104,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "d3664332-e6bb-447e-8b96-203aafde8b24",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
||||
@ -30,7 +30,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "664397bc-6daa-4b88-90aa-e8fc1fbd5846",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -40,7 +40,7 @@
|
||||
"[33901]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -51,7 +51,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "d3664332-e6bb-447e-8b96-203aafde8b24",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -61,7 +61,7 @@
|
||||
"[86]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -72,7 +72,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "2773c09d-c136-4372-a2be-04b58d292842",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -82,7 +82,7 @@
|
||||
"[343]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -93,7 +93,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "8a6abd32-1e0a-4038-9dd2-673f47bcdeb5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -103,7 +103,7 @@
|
||||
"[86]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -114,7 +114,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "26ae940a-9841-4e27-a1df-b83fc8a488b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -124,7 +124,7 @@
|
||||
"[220]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -135,7 +135,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "a606c39a-6747-4cd8-bb38-e3183f80908d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -145,7 +145,7 @@
|
||||
"[959]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -156,7 +156,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"id": "47c7268d-8fdc-4957-bc68-5be6113f45a7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -166,7 +166,7 @@
|
||||
"'Akwirw ier'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -185,7 +185,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 10,
|
||||
"id": "4d50af16-937b-49e0-8ffd-42d30cbb41c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -239,13 +239,16 @@
|
||||
"\n",
|
||||
"vocab_size = 50257\n",
|
||||
"output_dim = 256\n",
|
||||
"token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)\n",
|
||||
"max_len = 4\n",
|
||||
"block_size = max_len\n",
|
||||
"\n",
|
||||
"token_embedding_layer = torch.nn.Embedding(block_size, output_dim)\n",
|
||||
"pos_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 11,
|
||||
"id": "0128eefa-d7c8-4f76-9851-566dfa7c3745",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -258,7 +261,7 @@
|
||||
" [ 402, 271]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -275,7 +278,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 12,
|
||||
"id": "ff5c1e90-c6de-4a87-adf6-7e19f603291c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -288,7 +291,7 @@
|
||||
" [ 402, 271, 10899, 2138, 257, 7026, 15632, 438]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@ -158,7 +158,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "6fb5b2f8-dd2c-4a6d-94ef-a0e9ad163951",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -190,7 +190,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "9842f39b-1654-410e-88bf-d1b899bf0241",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -224,7 +224,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 4,
|
||||
"id": "e3ccc99c-33ce-4f11-b7f2-353cf1cbdaba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -255,7 +255,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 5,
|
||||
"id": "07b2e58d-a6ed-49f0-a1cd-2463e8d53a20",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -289,7 +289,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 6,
|
||||
"id": "2d99cac4-45ea-46b3-b3c1-e000ad16e158",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -319,7 +319,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 7,
|
||||
"id": "8fcb96f0-14e5-4973-a50e-79ea7c6af99f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -378,7 +378,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 8,
|
||||
"id": "04004be8-07a1-468b-ab33-32e16a551b45",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -415,7 +415,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 9,
|
||||
"id": "2cea69d0-9a47-45da-8d5a-47ceef2df673",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -447,7 +447,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 10,
|
||||
"id": "fa4ef062-de81-47ee-8415-bfe1708c81b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -479,7 +479,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 11,
|
||||
"id": "112b492c-fb6f-4e6d-8df5-518ae83363d5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -509,7 +509,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 12,
|
||||
"id": "ba8eafcf-f7f7-4989-b8dc-61b50c4f81dc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -541,7 +541,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 13,
|
||||
"id": "2570eb7d-aee1-457a-a61e-7544478219fa",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -620,7 +620,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 14,
|
||||
"id": "8250fdc6-6cd6-4c5b-b9c0-8c643aadb7db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -640,7 +640,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 15,
|
||||
"id": "bfd7259a-f26c-4cea-b8fc-282b5cae1e00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -662,7 +662,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 16,
|
||||
"id": "73cedd62-01e1-4196-a575-baecc6095601",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -692,7 +692,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 17,
|
||||
"id": "8c1c3949-fc08-4d19-a41e-1c235b4e631b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -731,7 +731,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 18,
|
||||
"id": "64cbc253-a182-4490-a765-246979ea0a28",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -759,7 +759,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 19,
|
||||
"id": "b14e44b5-d170-40f9-8847-8990804af26d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -795,7 +795,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 20,
|
||||
"id": "146f5587-c845-4e30-9894-c7ed3a248153",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -830,7 +830,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 21,
|
||||
"id": "e138f033-fa7e-4e3a-8764-b53a96b26397",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -865,7 +865,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 22,
|
||||
"id": "51590326-cdbe-4e62-93b1-17df71c11ee4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -921,7 +921,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 23,
|
||||
"id": "73f411e3-e231-464a-89fe-0a9035e5f839",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1017,7 +1017,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 24,
|
||||
"id": "1933940d-0fa5-4b17-a3ce-388e5314a1bb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1049,7 +1049,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 25,
|
||||
"id": "43f3d2e3-185b-4184-9f98-edde5e6df746",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1082,7 +1082,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 26,
|
||||
"id": "9f531e2e-f4d2-4fea-a87f-4c132e48b9e7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1122,7 +1122,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"execution_count": 27,
|
||||
"id": "a2be2f43-9cf0-44f6-8d8b-68ef2fb3cc39",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1155,7 +1155,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 28,
|
||||
"id": "b1cd6d7f-16f2-43c1-915e-0824f1a4bc52",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1217,7 +1217,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 29,
|
||||
"id": "0de578db-8289-41d6-b377-ef645751e33f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1244,7 +1244,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"execution_count": 30,
|
||||
"id": "b16c5edb-942b-458c-8e95-25e4e355381e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1278,7 +1278,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 31,
|
||||
"id": "977a5fa7-a9d5-4e2e-8a32-8e0331ccfe28",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1305,7 +1305,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"execution_count": 32,
|
||||
"id": "60d8c2eb-2d8e-4d2c-99bc-9eef8cc53ca0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1313,19 +1313,19 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([[[-0.0827, 0.0405],\n",
|
||||
" [-0.2249, -0.0036],\n",
|
||||
" [-0.4126, -0.0555],\n",
|
||||
" [-0.5054, -0.1016],\n",
|
||||
" [-0.7738, -0.1860],\n",
|
||||
" [-1.1677, -0.3309]],\n",
|
||||
"tensor([[[-0.0844, 0.0414],\n",
|
||||
" [-0.2264, -0.0039],\n",
|
||||
" [-0.4163, -0.0564],\n",
|
||||
" [-0.5014, -0.1011],\n",
|
||||
" [-0.7754, -0.1867],\n",
|
||||
" [-1.1632, -0.3303]],\n",
|
||||
"\n",
|
||||
" [[-0.0827, 0.0405],\n",
|
||||
" [-0.2249, -0.0036],\n",
|
||||
" [-0.4126, -0.0555],\n",
|
||||
" [-0.5054, -0.1016],\n",
|
||||
" [-0.7738, -0.1860],\n",
|
||||
" [-1.1677, -0.3309]]], grad_fn=<UnsafeViewBackward0>)\n",
|
||||
" [[-0.0844, 0.0414],\n",
|
||||
" [-0.2264, -0.0039],\n",
|
||||
" [-0.4163, -0.0564],\n",
|
||||
" [-0.5014, -0.1011],\n",
|
||||
" [-0.7754, -0.1867],\n",
|
||||
" [-1.1632, -0.3303]]], grad_fn=<UnsafeViewBackward0>)\n",
|
||||
"context_vecs.shape: torch.Size([2, 6, 2])\n"
|
||||
]
|
||||
}
|
||||
@ -1412,7 +1412,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"execution_count": 33,
|
||||
"id": "b9a66e11-7105-4bb4-be84-041f1a1f3bd2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1420,19 +1420,19 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([[[-0.0827, 0.0405, 0.0771, 0.0172],\n",
|
||||
" [-0.2249, -0.0036, 0.2144, 0.1183],\n",
|
||||
" [-0.4126, -0.0555, 0.3871, 0.2445],\n",
|
||||
" [-0.5054, -0.1016, 0.4995, 0.3406],\n",
|
||||
" [-0.7738, -0.1860, 0.7385, 0.4875],\n",
|
||||
" [-1.1677, -0.3309, 1.1223, 0.8457]],\n",
|
||||
"tensor([[[-0.0844, 0.0414, 0.0766, 0.0171],\n",
|
||||
" [-0.2264, -0.0039, 0.2143, 0.1185],\n",
|
||||
" [-0.4163, -0.0564, 0.3878, 0.2453],\n",
|
||||
" [-0.5014, -0.1011, 0.4992, 0.3401],\n",
|
||||
" [-0.7754, -0.1867, 0.7387, 0.4868],\n",
|
||||
" [-1.1632, -0.3303, 1.1224, 0.8460]],\n",
|
||||
"\n",
|
||||
" [[-0.0827, 0.0405, 0.0771, 0.0172],\n",
|
||||
" [-0.2249, -0.0036, 0.2144, 0.1183],\n",
|
||||
" [-0.4126, -0.0555, 0.3871, 0.2445],\n",
|
||||
" [-0.5054, -0.1016, 0.4995, 0.3406],\n",
|
||||
" [-0.7738, -0.1860, 0.7385, 0.4875],\n",
|
||||
" [-1.1677, -0.3309, 1.1223, 0.8457]]], grad_fn=<CatBackward0>)\n",
|
||||
" [[-0.0844, 0.0414, 0.0766, 0.0171],\n",
|
||||
" [-0.2264, -0.0039, 0.2143, 0.1185],\n",
|
||||
" [-0.4163, -0.0564, 0.3878, 0.2453],\n",
|
||||
" [-0.5014, -0.1011, 0.4992, 0.3401],\n",
|
||||
" [-0.7754, -0.1867, 0.7387, 0.4868],\n",
|
||||
" [-1.1632, -0.3303, 1.1224, 0.8460]]], grad_fn=<CatBackward0>)\n",
|
||||
"context_vecs.shape: torch.Size([2, 6, 4])\n"
|
||||
]
|
||||
}
|
||||
@ -1474,7 +1474,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"execution_count": 34,
|
||||
"id": "dc9a4375-068b-4b2a-aabb-a29347ca5ecd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1482,19 +1482,19 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([[[-0.0932, 0.0353],\n",
|
||||
" [-0.2688, -0.0017],\n",
|
||||
" [-0.4848, -0.0500],\n",
|
||||
" [-0.6469, -0.1051],\n",
|
||||
" [-0.8832, -0.1764],\n",
|
||||
" [-1.4730, -0.3391]],\n",
|
||||
"tensor([[[-9.1476e-02, 3.4164e-02],\n",
|
||||
" [-2.6796e-01, -1.3427e-03],\n",
|
||||
" [-4.8421e-01, -4.8909e-02],\n",
|
||||
" [-6.4808e-01, -1.0625e-01],\n",
|
||||
" [-8.8380e-01, -1.7140e-01],\n",
|
||||
" [-1.4744e+00, -3.4327e-01]],\n",
|
||||
"\n",
|
||||
" [[-0.0932, 0.0353],\n",
|
||||
" [-0.2688, -0.0017],\n",
|
||||
" [-0.4848, -0.0500],\n",
|
||||
" [-0.6469, -0.1051],\n",
|
||||
" [-0.8832, -0.1764],\n",
|
||||
" [-1.4730, -0.3391]]], grad_fn=<CatBackward0>)\n",
|
||||
" [[-9.1476e-02, 3.4164e-02],\n",
|
||||
" [-2.6796e-01, -1.3427e-03],\n",
|
||||
" [-4.8421e-01, -4.8909e-02],\n",
|
||||
" [-6.4808e-01, -1.0625e-01],\n",
|
||||
" [-8.8380e-01, -1.7140e-01],\n",
|
||||
" [-1.4744e+00, -3.4327e-01]]], grad_fn=<CatBackward0>)\n",
|
||||
"context_vecs.shape: torch.Size([2, 6, 2])\n"
|
||||
]
|
||||
}
|
||||
@ -1531,7 +1531,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 96,
|
||||
"execution_count": 35,
|
||||
"id": "110b0188-6e9e-4e56-a988-10523c6c8538",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1646,7 +1646,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"execution_count": 36,
|
||||
"id": "e8cfc1ae-78ab-4faa-bc73-98bd054806c9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1689,7 +1689,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"execution_count": 37,
|
||||
"id": "053760f1-1a02-42f0-b3bf-3d939e407039",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -1760,7 +1760,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -28,7 +28,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 2,
|
||||
"id": "0ed4b7db-3b47-4fd3-a4a6-5f4ed5dd166e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -82,6 +82,10 @@
|
||||
"\n",
|
||||
"vocab_size = 50257\n",
|
||||
"output_dim = 256\n",
|
||||
"max_len = 1024\n",
|
||||
"block_size = max_len\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)\n",
|
||||
"pos_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)\n",
|
||||
"\n",
|
||||
@ -91,7 +95,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 3,
|
||||
"id": "664397bc-6daa-4b88-90aa-e8fc1fbd5846",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -109,7 +113,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 4,
|
||||
"id": "d3664332-e6bb-447e-8b96-203aafde8b24",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -143,7 +147,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 5,
|
||||
"id": "a44e682d-1c3c-445d-85fa-b142f89f8503",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -193,7 +197,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 6,
|
||||
"id": "7898551e-f582-48ac-9f66-3632abe2a93f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -232,7 +236,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 7,
|
||||
"id": "2773c09d-c136-4372-a2be-04b58d292842",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -280,7 +284,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 8,
|
||||
"id": "779fdd04-0152-4308-af08-840800a7f395",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -324,7 +328,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
9
ch03/01_main-chapter-code/small-text-sample.txt
Normal file
9
ch03/01_main-chapter-code/small-text-sample.txt
Normal file
@ -0,0 +1,9 @@
|
||||
Once upon a time in a quiet village nestled among rolling hills and whispering forests, there lived a young girl named Elara. Elara was known for her boundless curiosity and her love for the stars. Every night, she would climb to the highest hill near her home to gaze at the glittering sky, dreaming of distant worlds and galaxies.
|
||||
|
||||
In the heart of the village, there was an ancient library, tended by an old, wise librarian named Mr. Bramwell. This library was a treasure trove of books on every subject, but most importantly, it housed a collection of old star maps and celestial guides. Elara, fascinated by these books, spent countless hours with Mr. Bramwell, learning about constellations, planets, and the mysteries of the universe.
|
||||
|
||||
One evening, while studying an old star map, Elara noticed a small, uncharted star that twinkled differently. She shared this discovery with Mr. Bramwell, who was equally intrigued. They decided to observe this star every night, noting its unique patterns and movements. This small, mysterious star, which they named "Elara's Star," became the center of their nightly adventures.
|
||||
|
||||
As days turned into weeks, the villagers began to take notice of Elara's star. The uncharted star brought the community together, with people of all ages joining Elara and Mr. Bramwell on the hill each night to gaze at the sky. The nightly gatherings turned into a festival of stars, where stories were shared, friendships were formed, and the mysteries of the cosmos were contemplated.
|
||||
|
||||
The story of Elara and her star spread far and wide, attracting astronomers and dreamers from distant lands. The once quiet village became a beacon of wonder, a place where the sky seemed a little closer and the stars a bit friendlier. Elara's curiosity had not only unveiled a hidden star but had also brought her community together, reminding everyone that sometimes, the most extraordinary discoveries are waiting just above us, in the starlit sky.
|
||||
Loading…
x
Reference in New Issue
Block a user