From c31e99720d86b5ea3ee7907c80d63d287dc323a9 Mon Sep 17 00:00:00 2001 From: rasbt Date: Fri, 5 Apr 2024 07:24:46 -0500 Subject: [PATCH] rename hparams to settings --- ch05/01_main-chapter-code/ch05.ipynb | 8 ++++---- .../exercise-solutions.ipynb | 18 +++++++++--------- ch05/01_main-chapter-code/gpt_download.py | 14 +++++++------- ch05/01_main-chapter-code/gpt_generate.py | 16 ++++++++-------- ch05/01_main-chapter-code/gpt_train.py | 16 ++++++++-------- ch05/01_main-chapter-code/tests.py | 6 +++--- 6 files changed, 39 insertions(+), 39 deletions(-) diff --git a/ch05/01_main-chapter-code/ch05.ipynb b/ch05/01_main-chapter-code/ch05.ipynb index e93fb39..61ea3ca 100644 --- a/ch05/01_main-chapter-code/ch05.ipynb +++ b/ch05/01_main-chapter-code/ch05.ipynb @@ -2106,7 +2106,7 @@ "text": [ "File already exists and is up-to-date: gpt2/124M/checkpoint\n", "File already exists and is up-to-date: gpt2/124M/encoder.json\n", - "File already exists and is up-to-date: gpt2/124M/hparams.json\n", + "File already exists and is up-to-date: gpt2/124M/settings.json\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.index\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.meta\n", @@ -2115,7 +2115,7 @@ } ], "source": [ - "hparams, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")" + "settings, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")" ] }, { @@ -2133,7 +2133,7 @@ } ], "source": [ - "print(\"Settings:\", hparams)" + "print(\"Settings:\", settings)" ] }, { @@ -2401,7 +2401,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/ch05/01_main-chapter-code/exercise-solutions.ipynb b/ch05/01_main-chapter-code/exercise-solutions.ipynb index c7dd5d7..99a8626 100644 --- a/ch05/01_main-chapter-code/exercise-solutions.ipynb +++ b/ch05/01_main-chapter-code/exercise-solutions.ipynb @@ -584,7 +584,7 @@ "text": [ "File already exists and is up-to-date: gpt2/124M/checkpoint\n", "File already exists and is up-to-date: gpt2/124M/encoder.json\n", - "File already exists and is up-to-date: gpt2/124M/hparams.json\n", + "File already exists and is up-to-date: gpt2/124M/settings.json\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.index\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.meta\n", @@ -595,7 +595,7 @@ "source": [ "from gpt_download import download_and_load_gpt2\n", "\n", - "hparams, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")" + "settings, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")" ] }, { @@ -737,7 +737,7 @@ "text": [ "File already exists and is up-to-date: gpt2/1558M/checkpoint\n", "File already exists and is up-to-date: gpt2/1558M/encoder.json\n", - "File already exists and is up-to-date: gpt2/1558M/hparams.json\n", + "File already exists and is up-to-date: gpt2/1558M/settings.json\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.data-00000-of-00001\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.index\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.meta\n", @@ -748,7 +748,7 @@ } ], "source": [ - "hparams, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n", + "settings, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n", "\n", "model_name = \"gpt2-xl (1558M)\"\n", "NEW_CONFIG = GPT_CONFIG_124M.copy()\n", @@ -785,7 +785,7 @@ "In the main chapter, we experimented with the smallest GPT-2 model, which has only 124M parameters. The reason was to keep the resource requirements as low as possible. However, you can easily experiment with larger models with minimal code changes. For example, instead of loading the 1558M instead of 124M model in chapter 5, the only 2 lines of code that we have to change are\n", "\n", "```\n", - "hparams, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")\n", + "settings, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")\n", "model_name = \"gpt2-small (124M)\"\n", "```\n", "\n", @@ -793,7 +793,7 @@ "\n", "\n", "```\n", - "hparams, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n", + "settings, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n", "model_name = \"gpt2-xl (1558M)\"\n", "```" ] @@ -836,7 +836,7 @@ "text": [ "File already exists and is up-to-date: gpt2/1558M/checkpoint\n", "File already exists and is up-to-date: gpt2/1558M/encoder.json\n", - "File already exists and is up-to-date: gpt2/1558M/hparams.json\n", + "File already exists and is up-to-date: gpt2/1558M/settings.json\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.data-00000-of-00001\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.index\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.meta\n", @@ -864,7 +864,7 @@ "gpt = GPTModel(NEW_CONFIG)\n", "gpt.eval()\n", "\n", - "hparams, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n", + "settings, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n", "load_weights_into_gpt(gpt, params)" ] }, @@ -926,7 +926,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/ch05/01_main-chapter-code/gpt_download.py b/ch05/01_main-chapter-code/gpt_download.py index 89f2bc7..690d2ce 100644 --- a/ch05/01_main-chapter-code/gpt_download.py +++ b/ch05/01_main-chapter-code/gpt_download.py @@ -16,7 +16,7 @@ def download_and_load_gpt2(model_size, models_dir): model_dir = os.path.join(models_dir, model_size) base_url = "https://openaipublic.blob.core.windows.net/gpt-2/models" filenames = [ - "checkpoint", "encoder.json", "hparams.json", + "checkpoint", "encoder.json", "settings.json", "model.ckpt.data-00000-of-00001", "model.ckpt.index", "model.ckpt.meta", "vocab.bpe" ] @@ -28,12 +28,12 @@ def download_and_load_gpt2(model_size, models_dir): file_path = os.path.join(model_dir, filename) download_file(file_url, file_path) - # Load hparams and params + # Load settings and params tf_ckpt_path = tf.train.latest_checkpoint(model_dir) - hparams = json.load(open(os.path.join(model_dir, "hparams.json"))) - params = load_gpt2_params_from_tf_ckpt(tf_ckpt_path, hparams) + settings = json.load(open(os.path.join(model_dir, "settings.json"))) + params = load_gpt2_params_from_tf_ckpt(tf_ckpt_path, settings) - return hparams, params + return settings, params def download_file(url, destination): @@ -64,9 +64,9 @@ def download_file(url, destination): file.write(chunk) # Write the chunk to the file -def load_gpt2_params_from_tf_ckpt(ckpt_path, hparams): +def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): # Initialize parameters dictionary with empty blocks for each layer - params = {"blocks": [{} for _ in range(hparams["n_layer"])]} + params = {"blocks": [{} for _ in range(settings["n_layer"])]} # Iterate over each variable in the checkpoint for name, _ in tf.train.list_variables(ckpt_path): diff --git a/ch05/01_main-chapter-code/gpt_generate.py b/ch05/01_main-chapter-code/gpt_generate.py index d35274f..948dcff 100644 --- a/ch05/01_main-chapter-code/gpt_generate.py +++ b/ch05/01_main-chapter-code/gpt_generate.py @@ -37,7 +37,7 @@ def download_and_load_gpt2(model_size, models_dir): model_dir = os.path.join(models_dir, model_size) base_url = "https://openaipublic.blob.core.windows.net/gpt-2/models" filenames = [ - "checkpoint", "encoder.json", "hparams.json", + "checkpoint", "encoder.json", "settings.json", "model.ckpt.data-00000-of-00001", "model.ckpt.index", "model.ckpt.meta", "vocab.bpe" ] @@ -49,12 +49,12 @@ def download_and_load_gpt2(model_size, models_dir): file_path = os.path.join(model_dir, filename) download_file(file_url, file_path) - # Load hparams and params + # Load settings and params tf_ckpt_path = tf.train.latest_checkpoint(model_dir) - hparams = json.load(open(os.path.join(model_dir, "hparams.json"))) - params = load_gpt2_params_from_tf_ckpt(tf_ckpt_path, hparams) + settings = json.load(open(os.path.join(model_dir, "settings.json"))) + params = load_gpt2_params_from_tf_ckpt(tf_ckpt_path, settings) - return hparams, params + return settings, params def download_file(url, destination): @@ -85,9 +85,9 @@ def download_file(url, destination): file.write(chunk) # Write the chunk to the file -def load_gpt2_params_from_tf_ckpt(ckpt_path, hparams): +def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): # Initialize parameters dictionary with empty blocks for each layer - params = {"blocks": [{} for _ in range(hparams["n_layer"])]} + params = {"blocks": [{} for _ in range(settings["n_layer"])]} # Iterate over each variable in the checkpoint for name, _ in tf.train.list_variables(ckpt_path): @@ -221,7 +221,7 @@ def main(gpt_config, input_prompt, model_size): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - hparams, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2") + settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2") gpt = GPTModel(gpt_config) load_weights_into_gpt(gpt, params) diff --git a/ch05/01_main-chapter-code/gpt_train.py b/ch05/01_main-chapter-code/gpt_train.py index 421409e..8382aba 100644 --- a/ch05/01_main-chapter-code/gpt_train.py +++ b/ch05/01_main-chapter-code/gpt_train.py @@ -124,7 +124,7 @@ def plot_losses(epochs_seen, tokens_seen, train_losses, val_losses): # plt.show() -def main(gpt_config, hparams): +def main(gpt_config, settings): torch.manual_seed(123) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -152,7 +152,7 @@ def main(gpt_config, hparams): model = GPTModel(gpt_config) model.to(device) # no assignment model = model.to(device) necessary for nn.Module classes optimizer = torch.optim.AdamW( - model.parameters(), lr=hparams["learning_rate"], weight_decay=hparams["weight_decay"] + model.parameters(), lr=settings["learning_rate"], weight_decay=settings["weight_decay"] ) ############################## @@ -165,7 +165,7 @@ def main(gpt_config, hparams): train_loader = create_dataloader_v1( text_data[:split_idx], - batch_size=hparams["batch_size"], + batch_size=settings["batch_size"], max_length=gpt_config["context_length"], stride=gpt_config["context_length"], drop_last=True, @@ -174,7 +174,7 @@ def main(gpt_config, hparams): val_loader = create_dataloader_v1( text_data[split_idx:], - batch_size=hparams["batch_size"], + batch_size=settings["batch_size"], max_length=gpt_config["context_length"], stride=gpt_config["context_length"], drop_last=False, @@ -187,7 +187,7 @@ def main(gpt_config, hparams): train_losses, val_losses, tokens_seen = train_model_simple( model, train_loader, val_loader, optimizer, device, - num_epochs=hparams["num_epochs"], eval_freq=5, eval_iter=1, + num_epochs=settings["num_epochs"], eval_freq=5, eval_iter=1, start_context="Every effort moves you", ) @@ -206,7 +206,7 @@ if __name__ == "__main__": "qkv_bias": False # Query-key-value bias } - OTHER_HPARAMS = { + OTHER_SETTINGS = { "learning_rate": 5e-4, "num_epochs": 10, "batch_size": 2, @@ -217,14 +217,14 @@ if __name__ == "__main__": # Initiate training ########################### - train_losses, val_losses, tokens_seen, model = main(GPT_CONFIG_124M, OTHER_HPARAMS) + train_losses, val_losses, tokens_seen, model = main(GPT_CONFIG_124M, OTHER_SETTINGS) ########################### # After training ########################### # Plot results - epochs_tensor = torch.linspace(0, OTHER_HPARAMS["num_epochs"], len(train_losses)) + epochs_tensor = torch.linspace(0, OTHER_SETTINGS["num_epochs"], len(train_losses)) plot_losses(epochs_tensor, tokens_seen, train_losses, val_losses) plt.savefig("loss.pdf") diff --git a/ch05/01_main-chapter-code/tests.py b/ch05/01_main-chapter-code/tests.py index 176a092..4ffcb1e 100644 --- a/ch05/01_main-chapter-code/tests.py +++ b/ch05/01_main-chapter-code/tests.py @@ -23,7 +23,7 @@ def gpt_config(): @pytest.fixture -def other_hparams(): +def other_settings(): return { "learning_rate": 5e-4, "num_epochs": 1, # small for testing efficiency @@ -32,8 +32,8 @@ def other_hparams(): } -def test_main(gpt_config, other_hparams): - train_losses, val_losses, tokens_seen, model = main(gpt_config, other_hparams) +def test_main(gpt_config, other_settings): + train_losses, val_losses, tokens_seen, model = main(gpt_config, other_settings) assert len(train_losses) == 39, "Unexpected number of training losses" assert len(val_losses) == 39, "Unexpected number of validation losses"