mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2025-09-26 16:52:04 +00:00
Updated Llama 2 to 3 paths (#413)
* llama 2 and 3 path fixes * updated llama 3, 3.1 and 3.2 paths * updated .gitignore * Typo fix --------- Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com>
This commit is contained in:
parent
632d7772b2
commit
8b60460319
11
.gitignore
vendored
11
.gitignore
vendored
@ -35,12 +35,15 @@ ch05/01_main-chapter-code/model.pth
|
|||||||
ch05/01_main-chapter-code/model_and_optimizer.pth
|
ch05/01_main-chapter-code/model_and_optimizer.pth
|
||||||
ch05/03_bonus_pretraining_on_gutenberg/model_checkpoints
|
ch05/03_bonus_pretraining_on_gutenberg/model_checkpoints
|
||||||
ch05/06_user_interface/gpt2
|
ch05/06_user_interface/gpt2
|
||||||
|
ch05/07_gpt_to_llama/.cache
|
||||||
ch05/07_gpt_to_llama/Llama-2-7b
|
ch05/07_gpt_to_llama/Llama-2-7b
|
||||||
ch05/07_gpt_to_llama/Llama-2-7b-chat
|
ch05/07_gpt_to_llama/Llama-2-7b-chat
|
||||||
ch05/07_gpt_to_llama/.cache
|
ch05/07_gpt_to_llama/Llama-3-8B
|
||||||
ch05/07_gpt_to_llama/llama3-files
|
ch05/07_gpt_to_llama/Llama-3-8B-Instruct
|
||||||
ch05/07_gpt_to_llama/llama31-files
|
ch05/07_gpt_to_llama/Llama-3.1-8B
|
||||||
ch05/07_gpt_to_llama/llama32-files
|
ch05/07_gpt_to_llama/Llama-3.1-8B-Instruct
|
||||||
|
ch05/07_gpt_to_llama/Llama-3.2-1B
|
||||||
|
ch05/07_gpt_to_llama/Llama-3.2-1B-Instruct
|
||||||
|
|
||||||
ch06/01_main-chapter-code/gpt2
|
ch06/01_main-chapter-code/gpt2
|
||||||
ch06/02_bonus_additional-experiments/gpt2
|
ch06/02_bonus_additional-experiments/gpt2
|
||||||
|
@ -1189,7 +1189,7 @@
|
|||||||
"tokenizer_file = hf_hub_download(\n",
|
"tokenizer_file = hf_hub_download(\n",
|
||||||
" repo_id=\"meta-llama/Llama-2-7b\",\n",
|
" repo_id=\"meta-llama/Llama-2-7b\",\n",
|
||||||
" filename=\"tokenizer.model\",\n",
|
" filename=\"tokenizer.model\",\n",
|
||||||
" local_dir=\"Llama-2-7B\"\n",
|
" local_dir=\"Llama-2-7b\"\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -1252,7 +1252,7 @@
|
|||||||
"tokenizer_file_path = hf_hub_download(\n",
|
"tokenizer_file_path = hf_hub_download(\n",
|
||||||
" repo_id=\"meta-llama/Meta-Llama-3-8B\",\n",
|
" repo_id=\"meta-llama/Meta-Llama-3-8B\",\n",
|
||||||
" filename=\"original/tokenizer.model\",\n",
|
" filename=\"original/tokenizer.model\",\n",
|
||||||
" local_dir=\"llama3-files\"\n",
|
" local_dir=\"Llama-3-8B\"\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -1458,7 +1458,7 @@
|
|||||||
" weights_file = hf_hub_download(\n",
|
" weights_file = hf_hub_download(\n",
|
||||||
" repo_id=\"meta-llama/Meta-Llama-3-8B\",\n",
|
" repo_id=\"meta-llama/Meta-Llama-3-8B\",\n",
|
||||||
" filename=f\"model-0000{i}-of-00004.safetensors\",\n",
|
" filename=f\"model-0000{i}-of-00004.safetensors\",\n",
|
||||||
" local_dir=\"llama3-files\"\n",
|
" local_dir=\"Llama-3-8B\"\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" current_weights = load_file(weights_file)\n",
|
" current_weights = load_file(weights_file)\n",
|
||||||
" combined_weights.update(current_weights)"
|
" combined_weights.update(current_weights)"
|
||||||
@ -1677,7 +1677,7 @@
|
|||||||
"id": "akyo7WNyF_YL"
|
"id": "akyo7WNyF_YL"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"- Above, we used the pretrained base model; if you want to use a model capable of following instructions, use the `\"meta-llama/Llama-3-8b-Instruct\"` model instead, as shown below"
|
"- Above, we used the pretrained base model; if you want to use a model capable of following instructions, use the `\"meta-llama/Llama-3-8B-Instruct\"` model instead, as shown below"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -1824,7 +1824,7 @@
|
|||||||
" weights_file = hf_hub_download(\n",
|
" weights_file = hf_hub_download(\n",
|
||||||
" repo_id=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n",
|
" repo_id=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n",
|
||||||
" filename=f\"model-0000{i}-of-00004.safetensors\",\n",
|
" filename=f\"model-0000{i}-of-00004.safetensors\",\n",
|
||||||
" local_dir=\"llama3-files\"\n",
|
" local_dir=\"Llama-3-8B-Instruct\"\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" current_weights = load_file(weights_file)\n",
|
" current_weights = load_file(weights_file)\n",
|
||||||
" combined_weights.update(current_weights)\n",
|
" combined_weights.update(current_weights)\n",
|
||||||
@ -2157,7 +2157,7 @@
|
|||||||
"tokenizer_file_path = hf_hub_download(\n",
|
"tokenizer_file_path = hf_hub_download(\n",
|
||||||
" repo_id=\"meta-llama/Llama-3.1-8B\",\n",
|
" repo_id=\"meta-llama/Llama-3.1-8B\",\n",
|
||||||
" filename=\"original/tokenizer.model\",\n",
|
" filename=\"original/tokenizer.model\",\n",
|
||||||
" local_dir=\"llama31-files\"\n",
|
" local_dir=\"Llama-3.1-8B\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"tokenizer = Tokenizer(tokenizer_file_path)"
|
"tokenizer = Tokenizer(tokenizer_file_path)"
|
||||||
@ -2313,7 +2313,7 @@
|
|||||||
" weights_file = hf_hub_download(\n",
|
" weights_file = hf_hub_download(\n",
|
||||||
" repo_id=\"meta-llama/Llama-3.1-8B\",\n",
|
" repo_id=\"meta-llama/Llama-3.1-8B\",\n",
|
||||||
" filename=f\"model-0000{i}-of-00004.safetensors\",\n",
|
" filename=f\"model-0000{i}-of-00004.safetensors\",\n",
|
||||||
" local_dir=\"llama31-files\"\n",
|
" local_dir=\"Llama-3.1-8B\"\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" current_weights = load_file(weights_file)\n",
|
" current_weights = load_file(weights_file)\n",
|
||||||
" combined_weights.update(current_weights)\n",
|
" combined_weights.update(current_weights)\n",
|
||||||
@ -2512,7 +2512,7 @@
|
|||||||
"tokenizer_file_path = hf_hub_download(\n",
|
"tokenizer_file_path = hf_hub_download(\n",
|
||||||
" repo_id=\"meta-llama/Llama-3.2-1B\",\n",
|
" repo_id=\"meta-llama/Llama-3.2-1B\",\n",
|
||||||
" filename=\"original/tokenizer.model\",\n",
|
" filename=\"original/tokenizer.model\",\n",
|
||||||
" local_dir=\"llama32-files\"\n",
|
" local_dir=\"Llama-3.2-1B\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"tokenizer = Tokenizer(tokenizer_file_path)"
|
"tokenizer = Tokenizer(tokenizer_file_path)"
|
||||||
@ -2589,7 +2589,7 @@
|
|||||||
"weights_file = hf_hub_download(\n",
|
"weights_file = hf_hub_download(\n",
|
||||||
" repo_id=\"meta-llama/Llama-3.2-1B\",\n",
|
" repo_id=\"meta-llama/Llama-3.2-1B\",\n",
|
||||||
" filename=f\"model.safetensors\",\n",
|
" filename=f\"model.safetensors\",\n",
|
||||||
" local_dir=\"llama32-files\"\n",
|
" local_dir=\"Llama-3.2-1B\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"current_weights = load_file(weights_file)\n",
|
"current_weights = load_file(weights_file)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -2687,7 +2687,7 @@
|
|||||||
"provenance": []
|
"provenance": []
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "pt",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@ -2701,7 +2701,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.6"
|
"version": "3.11.9"
|
||||||
},
|
},
|
||||||
"widgets": {
|
"widgets": {
|
||||||
"application/vnd.jupyter.widget-state+json": {
|
"application/vnd.jupyter.widget-state+json": {
|
||||||
|
@ -733,7 +733,7 @@
|
|||||||
"tokenizer_file_path = hf_hub_download(\n",
|
"tokenizer_file_path = hf_hub_download(\n",
|
||||||
" repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
|
" repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
|
||||||
" filename=\"original/tokenizer.model\",\n",
|
" filename=\"original/tokenizer.model\",\n",
|
||||||
" local_dir=\"llama32-files\"\n",
|
" local_dir=\"Llama-3.2-1B-Instruct\"\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -860,7 +860,7 @@
|
|||||||
" weights_file = hf_hub_download(\n",
|
" weights_file = hf_hub_download(\n",
|
||||||
" repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
|
" repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
|
||||||
" filename=f\"model.safetensors\",\n",
|
" filename=f\"model.safetensors\",\n",
|
||||||
" local_dir=\"llama32-files\"\n",
|
" local_dir=\"Llama-3.2-1B-Instruct\"\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" combined_weights = load_file(weights_file)\n",
|
" combined_weights = load_file(weights_file)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -871,7 +871,7 @@
|
|||||||
" weights_file = hf_hub_download(\n",
|
" weights_file = hf_hub_download(\n",
|
||||||
" repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
|
" repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
|
||||||
" filename=f\"model-0000{i}-of-00002.safetensors\",\n",
|
" filename=f\"model-0000{i}-of-00002.safetensors\",\n",
|
||||||
" local_dir=\"llama32-files\"\n",
|
" local_dir=\"Llama-3.2-1B-Instruct\"\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" current_weights = load_file(weights_file)\n",
|
" current_weights = load_file(weights_file)\n",
|
||||||
" combined_weights.update(current_weights)\n",
|
" combined_weights.update(current_weights)\n",
|
||||||
@ -1047,7 +1047,7 @@
|
|||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "pt",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@ -1061,7 +1061,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.6"
|
"version": "3.11.9"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user