diff --git a/.gitignore b/.gitignore index f91cc5c..4385a0b 100644 --- a/.gitignore +++ b/.gitignore @@ -35,12 +35,15 @@ ch05/01_main-chapter-code/model.pth ch05/01_main-chapter-code/model_and_optimizer.pth ch05/03_bonus_pretraining_on_gutenberg/model_checkpoints ch05/06_user_interface/gpt2 +ch05/07_gpt_to_llama/.cache ch05/07_gpt_to_llama/Llama-2-7b ch05/07_gpt_to_llama/Llama-2-7b-chat -ch05/07_gpt_to_llama/.cache -ch05/07_gpt_to_llama/llama3-files -ch05/07_gpt_to_llama/llama31-files -ch05/07_gpt_to_llama/llama32-files +ch05/07_gpt_to_llama/Llama-3-8B +ch05/07_gpt_to_llama/Llama-3-8B-Instruct +ch05/07_gpt_to_llama/Llama-3.1-8B +ch05/07_gpt_to_llama/Llama-3.1-8B-Instruct +ch05/07_gpt_to_llama/Llama-3.2-1B +ch05/07_gpt_to_llama/Llama-3.2-1B-Instruct ch06/01_main-chapter-code/gpt2 ch06/02_bonus_additional-experiments/gpt2 diff --git a/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb b/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb index e7f459e..1ff5a42 100644 --- a/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb +++ b/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb @@ -1189,7 +1189,7 @@ "tokenizer_file = hf_hub_download(\n", " repo_id=\"meta-llama/Llama-2-7b\",\n", " filename=\"tokenizer.model\",\n", - " local_dir=\"Llama-2-7B\"\n", + " local_dir=\"Llama-2-7b\"\n", ")" ] }, diff --git a/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb b/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb index bf62d9f..1c0dc34 100644 --- a/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb +++ b/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb @@ -1252,7 +1252,7 @@ "tokenizer_file_path = hf_hub_download(\n", " repo_id=\"meta-llama/Meta-Llama-3-8B\",\n", " filename=\"original/tokenizer.model\",\n", - " local_dir=\"llama3-files\"\n", + " local_dir=\"Llama-3-8B\"\n", ")" ] }, @@ -1458,7 +1458,7 @@ " weights_file = hf_hub_download(\n", " repo_id=\"meta-llama/Meta-Llama-3-8B\",\n", " filename=f\"model-0000{i}-of-00004.safetensors\",\n", - " local_dir=\"llama3-files\"\n", + " local_dir=\"Llama-3-8B\"\n", " )\n", " current_weights = load_file(weights_file)\n", " combined_weights.update(current_weights)" @@ -1677,7 +1677,7 @@ "id": "akyo7WNyF_YL" }, "source": [ - "- Above, we used the pretrained base model; if you want to use a model capable of following instructions, use the `\"meta-llama/Llama-3-8b-Instruct\"` model instead, as shown below" + "- Above, we used the pretrained base model; if you want to use a model capable of following instructions, use the `\"meta-llama/Llama-3-8B-Instruct\"` model instead, as shown below" ] }, { @@ -1824,7 +1824,7 @@ " weights_file = hf_hub_download(\n", " repo_id=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", " filename=f\"model-0000{i}-of-00004.safetensors\",\n", - " local_dir=\"llama3-files\"\n", + " local_dir=\"Llama-3-8B-Instruct\"\n", " )\n", " current_weights = load_file(weights_file)\n", " combined_weights.update(current_weights)\n", @@ -2157,7 +2157,7 @@ "tokenizer_file_path = hf_hub_download(\n", " repo_id=\"meta-llama/Llama-3.1-8B\",\n", " filename=\"original/tokenizer.model\",\n", - " local_dir=\"llama31-files\"\n", + " local_dir=\"Llama-3.1-8B\"\n", ")\n", "\n", "tokenizer = Tokenizer(tokenizer_file_path)" @@ -2313,7 +2313,7 @@ " weights_file = hf_hub_download(\n", " repo_id=\"meta-llama/Llama-3.1-8B\",\n", " filename=f\"model-0000{i}-of-00004.safetensors\",\n", - " local_dir=\"llama31-files\"\n", + " local_dir=\"Llama-3.1-8B\"\n", " )\n", " current_weights = load_file(weights_file)\n", " combined_weights.update(current_weights)\n", @@ -2512,7 +2512,7 @@ "tokenizer_file_path = hf_hub_download(\n", " repo_id=\"meta-llama/Llama-3.2-1B\",\n", " filename=\"original/tokenizer.model\",\n", - " local_dir=\"llama32-files\"\n", + " local_dir=\"Llama-3.2-1B\"\n", ")\n", "\n", "tokenizer = Tokenizer(tokenizer_file_path)" @@ -2589,7 +2589,7 @@ "weights_file = hf_hub_download(\n", " repo_id=\"meta-llama/Llama-3.2-1B\",\n", " filename=f\"model.safetensors\",\n", - " local_dir=\"llama32-files\"\n", + " local_dir=\"Llama-3.2-1B\"\n", ")\n", "current_weights = load_file(weights_file)\n", "\n", @@ -2687,7 +2687,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "pt", "language": "python", "name": "python3" }, @@ -2701,7 +2701,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.9" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/ch05/07_gpt_to_llama/standalone-llama32.ipynb b/ch05/07_gpt_to_llama/standalone-llama32.ipynb index b3d80c9..dd8fdf5 100644 --- a/ch05/07_gpt_to_llama/standalone-llama32.ipynb +++ b/ch05/07_gpt_to_llama/standalone-llama32.ipynb @@ -733,7 +733,7 @@ "tokenizer_file_path = hf_hub_download(\n", " repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n", " filename=\"original/tokenizer.model\",\n", - " local_dir=\"llama32-files\"\n", + " local_dir=\"Llama-3.2-1B-Instruct\"\n", ")" ] }, @@ -860,7 +860,7 @@ " weights_file = hf_hub_download(\n", " repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n", " filename=f\"model.safetensors\",\n", - " local_dir=\"llama32-files\"\n", + " local_dir=\"Llama-3.2-1B-Instruct\"\n", " )\n", " combined_weights = load_file(weights_file)\n", "\n", @@ -871,7 +871,7 @@ " weights_file = hf_hub_download(\n", " repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n", " filename=f\"model-0000{i}-of-00002.safetensors\",\n", - " local_dir=\"llama32-files\"\n", + " local_dir=\"Llama-3.2-1B-Instruct\"\n", " )\n", " current_weights = load_file(weights_file)\n", " combined_weights.update(current_weights)\n", @@ -1047,7 +1047,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "pt", "language": "python", "name": "python3" }, @@ -1061,7 +1061,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.9" } }, "nbformat": 4,