mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-08-28 18:40:01 +00:00
potential little fixes appendix-D4 .ipynb
(#427)
* Update appendix-D.ipynb - lr missing argument for passing peak_lr to the optimizer - filling 1 step gap for gradient clipping * adjustments --------- Co-authored-by: rasbt <mail@sebastianraschka.com>
This commit is contained in:
parent
95f8a4084f
commit
2fd07e2cfd
@ -552,6 +552,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from previous_chapters import evaluate_model, generate_and_print_sample\n",
|
"from previous_chapters import evaluate_model, generate_and_print_sample\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"BOOK_VERSION = True\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def train_model(model, train_loader, val_loader, optimizer, device,\n",
|
"def train_model(model, train_loader, val_loader, optimizer, device,\n",
|
||||||
" n_epochs, eval_freq, eval_iter, start_context, tokenizer,\n",
|
" n_epochs, eval_freq, eval_iter, start_context, tokenizer,\n",
|
||||||
@ -595,8 +597,13 @@
|
|||||||
" loss.backward()\n",
|
" loss.backward()\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Apply gradient clipping after the warmup phase to avoid exploding gradients\n",
|
" # Apply gradient clipping after the warmup phase to avoid exploding gradients\n",
|
||||||
|
"\n",
|
||||||
|
" if BOOK_VERSION:\n",
|
||||||
" if global_step > warmup_steps:\n",
|
" if global_step > warmup_steps:\n",
|
||||||
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) \n",
|
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) \n",
|
||||||
|
" else:\n",
|
||||||
|
" if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which lead to a skipped clipping step after warmup\n",
|
||||||
|
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" optimizer.step()\n",
|
" optimizer.step()\n",
|
||||||
" tokens_seen += input_batch.numel()\n",
|
" tokens_seen += input_batch.numel()\n",
|
||||||
@ -691,8 +698,8 @@
|
|||||||
"model = GPTModel(GPT_CONFIG_124M)\n",
|
"model = GPTModel(GPT_CONFIG_124M)\n",
|
||||||
"model.to(device)\n",
|
"model.to(device)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"peak_lr = 5e-4\n",
|
"peak_lr = 0.001 # this was originally set to 5e-4 in the book by mistake\n",
|
||||||
"optimizer = torch.optim.AdamW(model.parameters(), weight_decay=0.1)\n",
|
"optimizer = torch.optim.AdamW(model.parameters(), lr=peak_lr, weight_decay=0.1) # the book accidentally omitted the lr assignment\n",
|
||||||
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
|
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"n_epochs = 15\n",
|
"n_epochs = 15\n",
|
||||||
@ -817,7 +824,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.6"
|
"version": "3.11.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user