mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-08-31 20:08:08 +00:00
update how to retrieve learning rate
This commit is contained in:
parent
30ba6a3f4b
commit
aa084656e0
@ -231,7 +231,7 @@
|
|||||||
" # Apply the calculated learning rate to the optimizer\n",
|
" # Apply the calculated learning rate to the optimizer\n",
|
||||||
" for param_group in optimizer.param_groups:\n",
|
" for param_group in optimizer.param_groups:\n",
|
||||||
" param_group[\"lr\"] = lr\n",
|
" param_group[\"lr\"] = lr\n",
|
||||||
" track_lrs.append(optimizer.param_groups[0][\"lr\"])\n",
|
" track_lrs.append(optimizer.defaults[\"lr\"])\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Calculate loss and update weights\n",
|
" # Calculate loss and update weights\n",
|
||||||
" # ..."
|
" # ..."
|
||||||
@ -318,7 +318,7 @@
|
|||||||
" # Apply the calculated learning rate to the optimizer\n",
|
" # Apply the calculated learning rate to the optimizer\n",
|
||||||
" for param_group in optimizer.param_groups:\n",
|
" for param_group in optimizer.param_groups:\n",
|
||||||
" param_group[\"lr\"] = lr\n",
|
" param_group[\"lr\"] = lr\n",
|
||||||
" track_lrs.append(optimizer.param_groups[0][\"lr\"])\n",
|
" track_lrs.append(optimizer.defaults[\"lr\"])\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Calculate loss and update weights"
|
" # Calculate loss and update weights"
|
||||||
]
|
]
|
||||||
@ -529,7 +529,7 @@
|
|||||||
" tokens_seen, global_step = 0, -1\n",
|
" tokens_seen, global_step = 0, -1\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Retrieve the maximum learning rate from the optimizer\n",
|
" # Retrieve the maximum learning rate from the optimizer\n",
|
||||||
" peak_lr = optimizer.param_groups[0][\"lr\"]\n",
|
" peak_lr = optimizer.defaults[\"lr\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Calculate the total number of iterations in the training process\n",
|
" # Calculate the total number of iterations in the training process\n",
|
||||||
" total_training_steps = len(train_loader) * n_epochs\n",
|
" total_training_steps = len(train_loader) * n_epochs\n",
|
||||||
@ -780,7 +780,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.6"
|
"version": "3.11.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
@ -65,13 +65,13 @@ def train_model(model, train_loader, val_loader, optimizer, device,
|
|||||||
initial_lr=3e-05, min_lr=1e-6):
|
initial_lr=3e-05, min_lr=1e-6):
|
||||||
global_step = 0
|
global_step = 0
|
||||||
|
|
||||||
max_lr = optimizer.param_groups[0]["lr"]
|
max_lr = optimizer.defaults["lr"]
|
||||||
|
|
||||||
# Calculate total number of iterations
|
# Calculate total number of iterations
|
||||||
total_training_iters = len(train_loader) * n_epochs
|
total_training_iters = len(train_loader) * n_epochs
|
||||||
|
|
||||||
# Calculate the learning rate increment at each step during warmup
|
# Calculate the learning rate increment at each step during warmup
|
||||||
lr_increment = (optimizer.param_groups[0]["lr"] - initial_lr) / warmup_iters
|
lr_increment = (optimizer.defaults["lr"] - initial_lr) / warmup_iters
|
||||||
|
|
||||||
for epoch in range(n_epochs):
|
for epoch in range(n_epochs):
|
||||||
model.train()
|
model.train()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user