potential little fixes appendix-D4 .ipynb (#427)

* Update appendix-D.ipynb

- lr missing argument for passing peak_lr to the optimizer
- filling 1 step gap for gradient clipping

* adjustments

---------

Co-authored-by: rasbt <mail@sebastianraschka.com>
This commit is contained in:
casinca 2024-11-03 19:12:58 +01:00 committed by GitHub
parent 95f8a4084f
commit 2fd07e2cfd

View File

@ -552,6 +552,8 @@
"source": [ "source": [
"from previous_chapters import evaluate_model, generate_and_print_sample\n", "from previous_chapters import evaluate_model, generate_and_print_sample\n",
"\n", "\n",
"BOOK_VERSION = True\n",
"\n",
"\n", "\n",
"def train_model(model, train_loader, val_loader, optimizer, device,\n", "def train_model(model, train_loader, val_loader, optimizer, device,\n",
" n_epochs, eval_freq, eval_iter, start_context, tokenizer,\n", " n_epochs, eval_freq, eval_iter, start_context, tokenizer,\n",
@ -595,7 +597,12 @@
" loss.backward()\n", " loss.backward()\n",
"\n", "\n",
" # Apply gradient clipping after the warmup phase to avoid exploding gradients\n", " # Apply gradient clipping after the warmup phase to avoid exploding gradients\n",
"\n",
" if BOOK_VERSION:\n",
" if global_step > warmup_steps:\n", " if global_step > warmup_steps:\n",
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) \n",
" else:\n",
" if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which lead to a skipped clipping step after warmup\n",
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n",
" \n", " \n",
" optimizer.step()\n", " optimizer.step()\n",
@ -691,8 +698,8 @@
"model = GPTModel(GPT_CONFIG_124M)\n", "model = GPTModel(GPT_CONFIG_124M)\n",
"model.to(device)\n", "model.to(device)\n",
"\n", "\n",
"peak_lr = 5e-4\n", "peak_lr = 0.001 # this was originally set to 5e-4 in the book by mistake\n",
"optimizer = torch.optim.AdamW(model.parameters(), weight_decay=0.1)\n", "optimizer = torch.optim.AdamW(model.parameters(), lr=peak_lr, weight_decay=0.1) # the book accidentally omitted the lr assignment\n",
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n", "tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
"\n", "\n",
"n_epochs = 15\n", "n_epochs = 15\n",
@ -817,7 +824,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.6" "version": "3.11.4"
} }
}, },
"nbformat": 4, "nbformat": 4,