diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index decf415..1627f29 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -11,7 +11,8 @@ "ms-python.python", "ms-azuretools.vscode-docker", "ms-toolsai.jupyter", - "yahyabatulu.vscode-markdown-alert" + "yahyabatulu.vscode-markdown-alert", + "tomoki1207.pdf" ] } } diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 60c5b2b..daccfa3 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -5,5 +5,6 @@ "ms-azuretools.vscode-docker", "ms-vscode-remote.vscode-remote-extensionpack", "yahyabatulu.vscode-markdown-alert", + "tomoki1207.pdf", ] } \ No newline at end of file diff --git a/ch05/03_bonus_pretraining_on_gutenberg/README.md b/ch05/03_bonus_pretraining_on_gutenberg/README.md index bef41d1..dcb3245 100644 --- a/ch05/03_bonus_pretraining_on_gutenberg/README.md +++ b/ch05/03_bonus_pretraining_on_gutenberg/README.md @@ -23,7 +23,7 @@ As of this writing, this will require approximately 50 GB of disk space, but it Linux and macOS users can follow these steps to download the dataset (if you are a Windows user, please see the note below): -Set the `03_bonus_pretraining_on_gutenberg` folder as working directory to clone the `gutenberg` repository locally in this folder (this is necessary to run the provided scripts `prepare_dataset.py` and `pretraining_simple.py`). For instance, when being in the `LLMs-from-scratch` repository's folder, navigate into the *03_bonus_pretraining_on_gutenberg* folder via: +1. Set the `03_bonus_pretraining_on_gutenberg` folder as working directory to clone the `gutenberg` repository locally in this folder (this is necessary to run the provided scripts `prepare_dataset.py` and `pretraining_simple.py`). For instance, when being in the `LLMs-from-scratch` repository's folder, navigate into the *03_bonus_pretraining_on_gutenberg* folder via: ```bash cd ch05/03_bonus_pretraining_on_gutenberg ```