rename hparams to settings

This commit is contained in:
rasbt 2024-04-05 07:24:46 -05:00
parent 7d0b9b78b0
commit c31e99720d
6 changed files with 39 additions and 39 deletions

View File

@ -2106,7 +2106,7 @@
"text": [ "text": [
"File already exists and is up-to-date: gpt2/124M/checkpoint\n", "File already exists and is up-to-date: gpt2/124M/checkpoint\n",
"File already exists and is up-to-date: gpt2/124M/encoder.json\n", "File already exists and is up-to-date: gpt2/124M/encoder.json\n",
"File already exists and is up-to-date: gpt2/124M/hparams.json\n", "File already exists and is up-to-date: gpt2/124M/settings.json\n",
"File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001\n",
"File already exists and is up-to-date: gpt2/124M/model.ckpt.index\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.index\n",
"File already exists and is up-to-date: gpt2/124M/model.ckpt.meta\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.meta\n",
@ -2115,7 +2115,7 @@
} }
], ],
"source": [ "source": [
"hparams, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")" "settings, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")"
] ]
}, },
{ {
@ -2133,7 +2133,7 @@
} }
], ],
"source": [ "source": [
"print(\"Settings:\", hparams)" "print(\"Settings:\", settings)"
] ]
}, },
{ {
@ -2401,7 +2401,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.4" "version": "3.10.12"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -584,7 +584,7 @@
"text": [ "text": [
"File already exists and is up-to-date: gpt2/124M/checkpoint\n", "File already exists and is up-to-date: gpt2/124M/checkpoint\n",
"File already exists and is up-to-date: gpt2/124M/encoder.json\n", "File already exists and is up-to-date: gpt2/124M/encoder.json\n",
"File already exists and is up-to-date: gpt2/124M/hparams.json\n", "File already exists and is up-to-date: gpt2/124M/settings.json\n",
"File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001\n",
"File already exists and is up-to-date: gpt2/124M/model.ckpt.index\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.index\n",
"File already exists and is up-to-date: gpt2/124M/model.ckpt.meta\n", "File already exists and is up-to-date: gpt2/124M/model.ckpt.meta\n",
@ -595,7 +595,7 @@
"source": [ "source": [
"from gpt_download import download_and_load_gpt2\n", "from gpt_download import download_and_load_gpt2\n",
"\n", "\n",
"hparams, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")" "settings, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")"
] ]
}, },
{ {
@ -737,7 +737,7 @@
"text": [ "text": [
"File already exists and is up-to-date: gpt2/1558M/checkpoint\n", "File already exists and is up-to-date: gpt2/1558M/checkpoint\n",
"File already exists and is up-to-date: gpt2/1558M/encoder.json\n", "File already exists and is up-to-date: gpt2/1558M/encoder.json\n",
"File already exists and is up-to-date: gpt2/1558M/hparams.json\n", "File already exists and is up-to-date: gpt2/1558M/settings.json\n",
"File already exists and is up-to-date: gpt2/1558M/model.ckpt.data-00000-of-00001\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.data-00000-of-00001\n",
"File already exists and is up-to-date: gpt2/1558M/model.ckpt.index\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.index\n",
"File already exists and is up-to-date: gpt2/1558M/model.ckpt.meta\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.meta\n",
@ -748,7 +748,7 @@
} }
], ],
"source": [ "source": [
"hparams, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n", "settings, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n",
"\n", "\n",
"model_name = \"gpt2-xl (1558M)\"\n", "model_name = \"gpt2-xl (1558M)\"\n",
"NEW_CONFIG = GPT_CONFIG_124M.copy()\n", "NEW_CONFIG = GPT_CONFIG_124M.copy()\n",
@ -785,7 +785,7 @@
"In the main chapter, we experimented with the smallest GPT-2 model, which has only 124M parameters. The reason was to keep the resource requirements as low as possible. However, you can easily experiment with larger models with minimal code changes. For example, instead of loading the 1558M instead of 124M model in chapter 5, the only 2 lines of code that we have to change are\n", "In the main chapter, we experimented with the smallest GPT-2 model, which has only 124M parameters. The reason was to keep the resource requirements as low as possible. However, you can easily experiment with larger models with minimal code changes. For example, instead of loading the 1558M instead of 124M model in chapter 5, the only 2 lines of code that we have to change are\n",
"\n", "\n",
"```\n", "```\n",
"hparams, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")\n", "settings, params = download_and_load_gpt2(model_size=\"124M\", models_dir=\"gpt2\")\n",
"model_name = \"gpt2-small (124M)\"\n", "model_name = \"gpt2-small (124M)\"\n",
"```\n", "```\n",
"\n", "\n",
@ -793,7 +793,7 @@
"\n", "\n",
"\n", "\n",
"```\n", "```\n",
"hparams, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n", "settings, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n",
"model_name = \"gpt2-xl (1558M)\"\n", "model_name = \"gpt2-xl (1558M)\"\n",
"```" "```"
] ]
@ -836,7 +836,7 @@
"text": [ "text": [
"File already exists and is up-to-date: gpt2/1558M/checkpoint\n", "File already exists and is up-to-date: gpt2/1558M/checkpoint\n",
"File already exists and is up-to-date: gpt2/1558M/encoder.json\n", "File already exists and is up-to-date: gpt2/1558M/encoder.json\n",
"File already exists and is up-to-date: gpt2/1558M/hparams.json\n", "File already exists and is up-to-date: gpt2/1558M/settings.json\n",
"File already exists and is up-to-date: gpt2/1558M/model.ckpt.data-00000-of-00001\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.data-00000-of-00001\n",
"File already exists and is up-to-date: gpt2/1558M/model.ckpt.index\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.index\n",
"File already exists and is up-to-date: gpt2/1558M/model.ckpt.meta\n", "File already exists and is up-to-date: gpt2/1558M/model.ckpt.meta\n",
@ -864,7 +864,7 @@
"gpt = GPTModel(NEW_CONFIG)\n", "gpt = GPTModel(NEW_CONFIG)\n",
"gpt.eval()\n", "gpt.eval()\n",
"\n", "\n",
"hparams, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n", "settings, params = download_and_load_gpt2(model_size=\"1558M\", models_dir=\"gpt2\")\n",
"load_weights_into_gpt(gpt, params)" "load_weights_into_gpt(gpt, params)"
] ]
}, },
@ -926,7 +926,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.6" "version": "3.10.12"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -16,7 +16,7 @@ def download_and_load_gpt2(model_size, models_dir):
model_dir = os.path.join(models_dir, model_size) model_dir = os.path.join(models_dir, model_size)
base_url = "https://openaipublic.blob.core.windows.net/gpt-2/models" base_url = "https://openaipublic.blob.core.windows.net/gpt-2/models"
filenames = [ filenames = [
"checkpoint", "encoder.json", "hparams.json", "checkpoint", "encoder.json", "settings.json",
"model.ckpt.data-00000-of-00001", "model.ckpt.index", "model.ckpt.data-00000-of-00001", "model.ckpt.index",
"model.ckpt.meta", "vocab.bpe" "model.ckpt.meta", "vocab.bpe"
] ]
@ -28,12 +28,12 @@ def download_and_load_gpt2(model_size, models_dir):
file_path = os.path.join(model_dir, filename) file_path = os.path.join(model_dir, filename)
download_file(file_url, file_path) download_file(file_url, file_path)
# Load hparams and params # Load settings and params
tf_ckpt_path = tf.train.latest_checkpoint(model_dir) tf_ckpt_path = tf.train.latest_checkpoint(model_dir)
hparams = json.load(open(os.path.join(model_dir, "hparams.json"))) settings = json.load(open(os.path.join(model_dir, "settings.json")))
params = load_gpt2_params_from_tf_ckpt(tf_ckpt_path, hparams) params = load_gpt2_params_from_tf_ckpt(tf_ckpt_path, settings)
return hparams, params return settings, params
def download_file(url, destination): def download_file(url, destination):
@ -64,9 +64,9 @@ def download_file(url, destination):
file.write(chunk) # Write the chunk to the file file.write(chunk) # Write the chunk to the file
def load_gpt2_params_from_tf_ckpt(ckpt_path, hparams): def load_gpt2_params_from_tf_ckpt(ckpt_path, settings):
# Initialize parameters dictionary with empty blocks for each layer # Initialize parameters dictionary with empty blocks for each layer
params = {"blocks": [{} for _ in range(hparams["n_layer"])]} params = {"blocks": [{} for _ in range(settings["n_layer"])]}
# Iterate over each variable in the checkpoint # Iterate over each variable in the checkpoint
for name, _ in tf.train.list_variables(ckpt_path): for name, _ in tf.train.list_variables(ckpt_path):

View File

@ -37,7 +37,7 @@ def download_and_load_gpt2(model_size, models_dir):
model_dir = os.path.join(models_dir, model_size) model_dir = os.path.join(models_dir, model_size)
base_url = "https://openaipublic.blob.core.windows.net/gpt-2/models" base_url = "https://openaipublic.blob.core.windows.net/gpt-2/models"
filenames = [ filenames = [
"checkpoint", "encoder.json", "hparams.json", "checkpoint", "encoder.json", "settings.json",
"model.ckpt.data-00000-of-00001", "model.ckpt.index", "model.ckpt.data-00000-of-00001", "model.ckpt.index",
"model.ckpt.meta", "vocab.bpe" "model.ckpt.meta", "vocab.bpe"
] ]
@ -49,12 +49,12 @@ def download_and_load_gpt2(model_size, models_dir):
file_path = os.path.join(model_dir, filename) file_path = os.path.join(model_dir, filename)
download_file(file_url, file_path) download_file(file_url, file_path)
# Load hparams and params # Load settings and params
tf_ckpt_path = tf.train.latest_checkpoint(model_dir) tf_ckpt_path = tf.train.latest_checkpoint(model_dir)
hparams = json.load(open(os.path.join(model_dir, "hparams.json"))) settings = json.load(open(os.path.join(model_dir, "settings.json")))
params = load_gpt2_params_from_tf_ckpt(tf_ckpt_path, hparams) params = load_gpt2_params_from_tf_ckpt(tf_ckpt_path, settings)
return hparams, params return settings, params
def download_file(url, destination): def download_file(url, destination):
@ -85,9 +85,9 @@ def download_file(url, destination):
file.write(chunk) # Write the chunk to the file file.write(chunk) # Write the chunk to the file
def load_gpt2_params_from_tf_ckpt(ckpt_path, hparams): def load_gpt2_params_from_tf_ckpt(ckpt_path, settings):
# Initialize parameters dictionary with empty blocks for each layer # Initialize parameters dictionary with empty blocks for each layer
params = {"blocks": [{} for _ in range(hparams["n_layer"])]} params = {"blocks": [{} for _ in range(settings["n_layer"])]}
# Iterate over each variable in the checkpoint # Iterate over each variable in the checkpoint
for name, _ in tf.train.list_variables(ckpt_path): for name, _ in tf.train.list_variables(ckpt_path):
@ -221,7 +221,7 @@ def main(gpt_config, input_prompt, model_size):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
hparams, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2") settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2")
gpt = GPTModel(gpt_config) gpt = GPTModel(gpt_config)
load_weights_into_gpt(gpt, params) load_weights_into_gpt(gpt, params)

View File

@ -124,7 +124,7 @@ def plot_losses(epochs_seen, tokens_seen, train_losses, val_losses):
# plt.show() # plt.show()
def main(gpt_config, hparams): def main(gpt_config, settings):
torch.manual_seed(123) torch.manual_seed(123)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@ -152,7 +152,7 @@ def main(gpt_config, hparams):
model = GPTModel(gpt_config) model = GPTModel(gpt_config)
model.to(device) # no assignment model = model.to(device) necessary for nn.Module classes model.to(device) # no assignment model = model.to(device) necessary for nn.Module classes
optimizer = torch.optim.AdamW( optimizer = torch.optim.AdamW(
model.parameters(), lr=hparams["learning_rate"], weight_decay=hparams["weight_decay"] model.parameters(), lr=settings["learning_rate"], weight_decay=settings["weight_decay"]
) )
############################## ##############################
@ -165,7 +165,7 @@ def main(gpt_config, hparams):
train_loader = create_dataloader_v1( train_loader = create_dataloader_v1(
text_data[:split_idx], text_data[:split_idx],
batch_size=hparams["batch_size"], batch_size=settings["batch_size"],
max_length=gpt_config["context_length"], max_length=gpt_config["context_length"],
stride=gpt_config["context_length"], stride=gpt_config["context_length"],
drop_last=True, drop_last=True,
@ -174,7 +174,7 @@ def main(gpt_config, hparams):
val_loader = create_dataloader_v1( val_loader = create_dataloader_v1(
text_data[split_idx:], text_data[split_idx:],
batch_size=hparams["batch_size"], batch_size=settings["batch_size"],
max_length=gpt_config["context_length"], max_length=gpt_config["context_length"],
stride=gpt_config["context_length"], stride=gpt_config["context_length"],
drop_last=False, drop_last=False,
@ -187,7 +187,7 @@ def main(gpt_config, hparams):
train_losses, val_losses, tokens_seen = train_model_simple( train_losses, val_losses, tokens_seen = train_model_simple(
model, train_loader, val_loader, optimizer, device, model, train_loader, val_loader, optimizer, device,
num_epochs=hparams["num_epochs"], eval_freq=5, eval_iter=1, num_epochs=settings["num_epochs"], eval_freq=5, eval_iter=1,
start_context="Every effort moves you", start_context="Every effort moves you",
) )
@ -206,7 +206,7 @@ if __name__ == "__main__":
"qkv_bias": False # Query-key-value bias "qkv_bias": False # Query-key-value bias
} }
OTHER_HPARAMS = { OTHER_SETTINGS = {
"learning_rate": 5e-4, "learning_rate": 5e-4,
"num_epochs": 10, "num_epochs": 10,
"batch_size": 2, "batch_size": 2,
@ -217,14 +217,14 @@ if __name__ == "__main__":
# Initiate training # Initiate training
########################### ###########################
train_losses, val_losses, tokens_seen, model = main(GPT_CONFIG_124M, OTHER_HPARAMS) train_losses, val_losses, tokens_seen, model = main(GPT_CONFIG_124M, OTHER_SETTINGS)
########################### ###########################
# After training # After training
########################### ###########################
# Plot results # Plot results
epochs_tensor = torch.linspace(0, OTHER_HPARAMS["num_epochs"], len(train_losses)) epochs_tensor = torch.linspace(0, OTHER_SETTINGS["num_epochs"], len(train_losses))
plot_losses(epochs_tensor, tokens_seen, train_losses, val_losses) plot_losses(epochs_tensor, tokens_seen, train_losses, val_losses)
plt.savefig("loss.pdf") plt.savefig("loss.pdf")

View File

@ -23,7 +23,7 @@ def gpt_config():
@pytest.fixture @pytest.fixture
def other_hparams(): def other_settings():
return { return {
"learning_rate": 5e-4, "learning_rate": 5e-4,
"num_epochs": 1, # small for testing efficiency "num_epochs": 1, # small for testing efficiency
@ -32,8 +32,8 @@ def other_hparams():
} }
def test_main(gpt_config, other_hparams): def test_main(gpt_config, other_settings):
train_losses, val_losses, tokens_seen, model = main(gpt_config, other_hparams) train_losses, val_losses, tokens_seen, model = main(gpt_config, other_settings)
assert len(train_losses) == 39, "Unexpected number of training losses" assert len(train_losses) == 39, "Unexpected number of training losses"
assert len(val_losses) == 39, "Unexpected number of validation losses" assert len(val_losses) == 39, "Unexpected number of validation losses"