From 222f7b16f89f45db79e3667c6b8abcc5e92dfcd4 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Fri, 20 Sep 2024 07:00:06 -0700 Subject: [PATCH] update gpt-2 paper url --- ch04/01_main-chapter-code/ch04.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ch04/01_main-chapter-code/ch04.ipynb b/ch04/01_main-chapter-code/ch04.ipynb index 4a4c1da..26b9506 100644 --- a/ch04/01_main-chapter-code/ch04.ipynb +++ b/ch04/01_main-chapter-code/ch04.ipynb @@ -106,7 +106,7 @@ "source": [ "- In previous chapters, we used small embedding dimensions for token inputs and outputs for ease of illustration, ensuring they fit on a single page\n", "- In this chapter, we consider embedding and model sizes akin to a small GPT-2 model\n", - "- We'll specifically code the architecture of the smallest GPT-2 model (124 million parameters), as outlined in Radford et al.'s [Language Models are Unsupervised Multitask Learners](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=dOad5HoAAAAJ&citation_for_view=dOad5HoAAAAJ:YsMSGLbcyi4C) (note that the initial report lists it as 117M parameters, but this was later corrected in the model weight repository)\n", + "- We'll specifically code the architecture of the smallest GPT-2 model (124 million parameters), as outlined in Radford et al.'s [Language Models are Unsupervised Multitask Learners](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) (note that the initial report lists it as 117M parameters, but this was later corrected in the model weight repository)\n", "- Chapter 6 will show how to load pretrained weights into our implementation, which will be compatible with model sizes of 345, 762, and 1542 million parameters" ] }, @@ -1509,7 +1509,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.10" } }, "nbformat": 4,