diff --git a/ch05/10_llm-training-speed/01_opt_single_gpu.py b/ch05/10_llm-training-speed/01_opt_single_gpu.py index 9ec9aa9..891e424 100644 --- a/ch05/10_llm-training-speed/01_opt_single_gpu.py +++ b/ch05/10_llm-training-speed/01_opt_single_gpu.py @@ -502,6 +502,12 @@ if __name__ == "__main__": plt.savefig("loss.pdf") # Save and load model - # torch.save(model.state_dict(), "model.pth") + # + # compiled = hasattr(model, "_orig_mod") + # if compiled: + # torch.save(model._orig_mod.state_dict(), "model.pth") + # else: + # torch.save(model.state_dict(), "model.pth") + # # model = GPTModel(GPT_CONFIG_124M) # model.load_state_dict(torch.load("model.pth", weights_only=True)) diff --git a/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py b/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py index 86ef3ec..e9b9767 100644 --- a/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py +++ b/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py @@ -598,6 +598,12 @@ if __name__ == "__main__": plt.savefig("loss.pdf") # Save and load model - # torch.save(model.state_dict(), "model.pth") + # + # compiled = hasattr(model, "_orig_mod") + # if compiled: + # torch.save(model._orig_mod.state_dict(), "model.pth") + # else: + # torch.save(model.state_dict(), "model.pth") + # # model = GPTModel(GPT_CONFIG_124M) # model.load_state_dict(torch.load("model.pth", weights_only=True))