mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-11-14 00:55:11 +00:00
format the other GPT architecture sizes
This commit is contained in:
parent
40477c55b3
commit
496b52f842
@ -884,7 +884,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"torch.manual_seed(123)\n",
|
"torch.manual_seed(123)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"x = torch.rand(2, 6, 768)\n",
|
"x = torch.rand(2, 6, 768) # Shape: [batch_size, num_tokens, emb_dim]\n",
|
||||||
"block = TransformerBlock(GPT_CONFIG_124M)\n",
|
"block = TransformerBlock(GPT_CONFIG_124M)\n",
|
||||||
"output = block(x)\n",
|
"output = block(x)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -1140,27 +1140,27 @@
|
|||||||
"id": "309a3be4-c20a-4657-b4e0-77c97510b47c",
|
"id": "309a3be4-c20a-4657-b4e0-77c97510b47c",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"- Exercise: you can try the other configurations as well:\n",
|
"- Exercise: you can try the following other configurations, which are referenced in the [GPT-2 paper](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), as well.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- GPT2-small (the 124M configuration we implemented):\n",
|
" - **GPT2-small** (the 124M configuration we already implemented):\n",
|
||||||
" - \"emb_dim\" = 768\n",
|
" - \"emb_dim\" = 768\n",
|
||||||
" - \"n_layers\" = 12\n",
|
" - \"n_layers\" = 12\n",
|
||||||
" - \"n_heads\" = 12\n",
|
" - \"n_heads\" = 12\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- GPT2-medium:\n",
|
" - **GPT2-medium:**\n",
|
||||||
" - \"emb_dim\" = 1024\n",
|
" - \"emb_dim\" = 1024\n",
|
||||||
" - \"n_layers\" = 24\n",
|
" - \"n_layers\" = 24\n",
|
||||||
" - \"n_heads\" = 16\n",
|
" - \"n_heads\" = 16\n",
|
||||||
"\n",
|
" \n",
|
||||||
"- GPT2-large:\n",
|
" - **GPT2-large:**\n",
|
||||||
" - \"emb_dim\" = 1280\n",
|
" - \"emb_dim\" = 1280\n",
|
||||||
" - \"n_layers\" = 36\n",
|
" - \"n_layers\" = 36\n",
|
||||||
" - \"n_heads\" = 20\n",
|
" - \"n_heads\" = 20\n",
|
||||||
"\n",
|
" \n",
|
||||||
"- GPT2-XL:\n",
|
" - **GPT2-XL:**\n",
|
||||||
" - \"emb_dim\" = 1600\n",
|
" - \"emb_dim\" = 1600\n",
|
||||||
" - \"n_layers\" = 48\n",
|
" - \"n_layers\" = 48\n",
|
||||||
" - \"n_heads\" = 25"
|
" - \"n_heads\" = 25"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user