mirror of
				https://github.com/rasbt/LLMs-from-scratch.git
				synced 2025-11-04 11:50:14 +00:00 
			
		
		
		
	* Organized setup instructions * update tets * link checker action * raise error upon broken link * fix links * fix links * delete duplicated paragraph
		
			
				
	
	
		
			187 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			187 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
{
 | 
						|
 "cells": [
 | 
						|
  {
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "<font size=\"1\">\n",
 | 
						|
    "Supplementary code for \"Build a Large Language Model From Scratch\": <a href=\"https://www.manning.com/books/build-a-large-language-model-from-scratch\">https://www.manning.com/books/build-a-large-language-model-from-scratch</a> by <a href=\"https://sebastianraschka.com\">Sebastian Raschka</a><br>\n",
 | 
						|
    "Code repository: <a href=\"https://github.com/rasbt/LLMs-from-scratch\">https://github.com/rasbt/LLMs-from-scratch</a>\n",
 | 
						|
    "</font>"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Exercise A.3"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 2,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "import torch\n",
 | 
						|
    "\n",
 | 
						|
    "class NeuralNetwork(torch.nn.Module):\n",
 | 
						|
    "    def __init__(self, num_inputs, num_outputs):\n",
 | 
						|
    "        super().__init__()\n",
 | 
						|
    "\n",
 | 
						|
    "        self.layers = torch.nn.Sequential(\n",
 | 
						|
    "                \n",
 | 
						|
    "            # 1st hidden layer\n",
 | 
						|
    "            torch.nn.Linear(num_inputs, 30),\n",
 | 
						|
    "            torch.nn.ReLU(),\n",
 | 
						|
    "\n",
 | 
						|
    "            # 2nd hidden layer\n",
 | 
						|
    "            torch.nn.Linear(30, 20),\n",
 | 
						|
    "            torch.nn.ReLU(),\n",
 | 
						|
    "\n",
 | 
						|
    "            # output layer\n",
 | 
						|
    "            torch.nn.Linear(20, num_outputs),\n",
 | 
						|
    "        )\n",
 | 
						|
    "\n",
 | 
						|
    "    def forward(self, x):\n",
 | 
						|
    "        logits = self.layers(x)\n",
 | 
						|
    "        return logits"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 3,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "Total number of trainable model parameters: 752\n"
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "model = NeuralNetwork(2, 2)\n",
 | 
						|
    "\n",
 | 
						|
    "num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
 | 
						|
    "print(\"Total number of trainable model parameters:\", num_params)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Exercise A.4"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 1,
 | 
						|
   "metadata": {
 | 
						|
    "id": "qGgnamiyLJxp"
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "import torch\n",
 | 
						|
    "\n",
 | 
						|
    "a = torch.rand(100, 200)\n",
 | 
						|
    "b = torch.rand(200, 300)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 2,
 | 
						|
   "metadata": {
 | 
						|
    "colab": {
 | 
						|
     "base_uri": "https://localhost:8080/"
 | 
						|
    },
 | 
						|
    "id": "CvGvIeVkLzXE",
 | 
						|
    "outputId": "44d027be-0787-4348-9c06-4e559d94d0e1"
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "63.8 µs ± 8.7 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "%timeit a @ b"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 3,
 | 
						|
   "metadata": {
 | 
						|
    "id": "OmRtZLa9L2ZG"
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "a, b = a.to(\"cuda\"), b.to(\"cuda\")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 4,
 | 
						|
   "metadata": {
 | 
						|
    "colab": {
 | 
						|
     "base_uri": "https://localhost:8080/"
 | 
						|
    },
 | 
						|
    "id": "duLEhXDPL6k0",
 | 
						|
    "outputId": "3486471d-fd62-446f-9855-2d01f41fd101"
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "13.8 µs ± 425 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "%timeit a @ b"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {
 | 
						|
    "id": "Zqqa-To2L749"
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": []
 | 
						|
  }
 | 
						|
 ],
 | 
						|
 "metadata": {
 | 
						|
  "accelerator": "GPU",
 | 
						|
  "colab": {
 | 
						|
   "gpuType": "V100",
 | 
						|
   "machine_shape": "hm",
 | 
						|
   "provenance": []
 | 
						|
  },
 | 
						|
  "kernelspec": {
 | 
						|
   "display_name": "Python 3 (ipykernel)",
 | 
						|
   "language": "python",
 | 
						|
   "name": "python3"
 | 
						|
  },
 | 
						|
  "language_info": {
 | 
						|
   "codemirror_mode": {
 | 
						|
    "name": "ipython",
 | 
						|
    "version": 3
 | 
						|
   },
 | 
						|
   "file_extension": ".py",
 | 
						|
   "mimetype": "text/x-python",
 | 
						|
   "name": "python",
 | 
						|
   "nbconvert_exporter": "python",
 | 
						|
   "pygments_lexer": "ipython3",
 | 
						|
   "version": "3.10.6"
 | 
						|
  }
 | 
						|
 },
 | 
						|
 "nbformat": 4,
 | 
						|
 "nbformat_minor": 4
 | 
						|
}
 |