From d16527ddf2fe3ae996093aed127af133b74b1ab0 Mon Sep 17 00:00:00 2001 From: TITC <35098797+TITC@users.noreply.github.com> Date: Tue, 6 Aug 2024 20:10:05 +0800 Subject: [PATCH] total training iters may equal to warmup_iters (#301) total_training_iters=20, warmup_iters=20= len(train_loader) 4 multiply n_epochs 5, then ZeroDivisionError occurred. ```shell Traceback (most recent call last): File "LLMs-from-scratch/ch05/05_bonus_hparam_tuning/hparam_search.py", line 191, in train_loss, val_loss = train_model( ^^^^^^^^^^^^ File "/mnt/raid1/docker/ai/LLMs-from-scratch/ch05/05_bonus_hparam_tuning/hparam_search.py", line 90, in train_model progress = (global_step - warmup_iters) / (total_training_iters - warmup_iters) ~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ZeroDivisionError: division by zero ``` --- ch05/05_bonus_hparam_tuning/hparam_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ch05/05_bonus_hparam_tuning/hparam_search.py b/ch05/05_bonus_hparam_tuning/hparam_search.py index 9c689f1..3e0604e 100644 --- a/ch05/05_bonus_hparam_tuning/hparam_search.py +++ b/ch05/05_bonus_hparam_tuning/hparam_search.py @@ -82,7 +82,7 @@ def train_model(model, train_loader, val_loader, optimizer, device, global_step += 1 # Warmup: adjust learning rate linearly - if global_step < warmup_iters: + if global_step <= warmup_iters: lr = initial_lr + global_step * lr_increment # Cosine annealing phase else: