mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-09-27 09:09:33 +00:00
Fix: Typo in appendix_d.py
comments. (#682)
* Fix: pkg/llms_from_scratch/appendix_d.py * minor language typo fix * fix 691 --------- Co-authored-by: PrinceSajjadHussain <PrinceSajjadHussain@users.noreply.github.com> Co-authored-by: rasbt <mail@sebastianraschka.com>
This commit is contained in:
parent
c4b19d7eb6
commit
cfdf22330b
@ -615,7 +615,7 @@
|
|||||||
" if global_step > warmup_steps:\n",
|
" if global_step > warmup_steps:\n",
|
||||||
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) \n",
|
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) \n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which lead to a skipped clipping step after warmup\n",
|
" if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which led to a skipped clipping step after warmup\n",
|
||||||
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n",
|
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" optimizer.step()\n",
|
" optimizer.step()\n",
|
||||||
|
@ -66,7 +66,7 @@ def train_model(model, train_loader, val_loader, optimizer, device,
|
|||||||
if global_step > warmup_steps:
|
if global_step > warmup_steps:
|
||||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
||||||
else:
|
else:
|
||||||
if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which lead to a skipped clipping step after warmup
|
if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which led to a skipped clipping step after warmup
|
||||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
||||||
|
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
@ -160,7 +160,7 @@ class PyTorchMultiHeadAttention(nn.Module):
|
|||||||
def __init__(self, d_in, d_out, num_heads, dropout=0.0, qkv_bias=False):
|
def __init__(self, d_in, d_out, num_heads, dropout=0.0, qkv_bias=False):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
assert d_out % num_heads == 0, "embed_dim is indivisible by num_heads"
|
assert d_out % num_heads == 0, "d_out is indivisible by num_heads"
|
||||||
|
|
||||||
self.num_heads = num_heads
|
self.num_heads = num_heads
|
||||||
self.head_dim = d_out // num_heads
|
self.head_dim = d_out // num_heads
|
||||||
|
Loading…
x
Reference in New Issue
Block a user