LLMs-from-scratch/appendix-A/01_main-chapter-code/code-part2.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<table style=\"width:100%\">\n",
    "<tr>\n",
    "<td style=\"vertical-align:middle; text-align:left;\">\n",
    "<font size=\"2\">\n",
    "Supplementary code for the <a href=\"http://mng.bz/orYv\">Build a Large Language Model From Scratch</a> book by <a href=\"https://sebastianraschka.com\">Sebastian Raschka</a><br>\n",
    "<br>Code repository: <a href=\"https://github.com/rasbt/LLMs-from-scratch\">https://github.com/rasbt/LLMs-from-scratch</a>\n",
    "</font>\n",
    "</td>\n",
    "<td style=\"vertical-align:middle; text-align:left;\">\n",
    "<a href=\"http://mng.bz/orYv\"><img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/cover-small.webp\" width=\"100px\"></a>\n",
    "</td>\n",
    "</tr>\n",
    "</table>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "O9i6kzBsZVaZ"
   },
   "source": [
    "# Appendix A: Introduction to PyTorch (Part 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ppbG5d-NZezH"
   },
   "source": [
    "## A.9 Optimizing training performance with GPUs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "6jH0J_DPZhbn"
   },
   "source": [
    "### A.9.1 PyTorch computations on GPU devices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "RM7kGhwMF_nO",
    "outputId": "ac60b048-b81f-4bb0-90fa-1ca474f04e9a"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.0.1+cu118\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "print(torch.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "OXLCKXhiUkZt",
    "outputId": "39fe5366-287e-47eb-cc34-3508d616c4f9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n"
     ]
    }
   ],
   "source": [
    "print(torch.cuda.is_available())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "MTTlfh53Va-T",
    "outputId": "f31d8bbe-577f-4db4-9939-02e66b9f96d1"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([5., 7., 9.])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tensor_1 = torch.tensor([1., 2., 3.])\n",
    "tensor_2 = torch.tensor([4., 5., 6.])\n",
    "\n",
    "print(tensor_1 + tensor_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Z4LwTNw7Vmmb",
    "outputId": "1c025c6a-e3ed-4c7c-f5fd-86c14607036e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([5., 7., 9.], device='cuda:0')\n"
     ]
    }
   ],
   "source": [
    "tensor_1 = tensor_1.to(\"cuda\")\n",
    "tensor_2 = tensor_2.to(\"cuda\")\n",
    "\n",
    "print(tensor_1 + tensor_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 184
    },
    "id": "tKT6URN1Vuft",
    "outputId": "e6f01e7f-d9cf-44cb-cc6d-46fc7907d5c0"
   },
   "outputs": [
    {
     "ename": "RuntimeError",
     "evalue": "ignored",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-7-4ff3c4d20fc3>\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"
     ]
    }
   ],
   "source": [
    "tensor_1 = tensor_1.to(\"cpu\")\n",
    "print(tensor_1 + tensor_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "c8j1cWDcWAMf"
   },
   "source": [
    "### A.9.2 Single-GPU training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "GyY59cjieitv"
   },
   "outputs": [],
   "source": [
    "X_train = torch.tensor([\n",
    "    [-1.2, 3.1],\n",
    "    [-0.9, 2.9],\n",
    "    [-0.5, 2.6],\n",
    "    [2.3, -1.1],\n",
    "    [2.7, -1.5]\n",
    "])\n",
    "\n",
    "y_train = torch.tensor([0, 0, 0, 1, 1])\n",
    "\n",
    "X_test = torch.tensor([\n",
    "    [-0.8, 2.8],\n",
    "    [2.6, -1.6],\n",
    "])\n",
    "\n",
    "y_test = torch.tensor([0, 1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "id": "v41gKqEJempa"
   },
   "outputs": [],
   "source": [
    "from torch.utils.data import Dataset\n",
    "\n",
    "\n",
    "class ToyDataset(Dataset):\n",
    "    def __init__(self, X, y):\n",
    "        self.features = X\n",
    "        self.labels = y\n",
    "\n",
    "    def __getitem__(self, index):\n",
    "        one_x = self.features[index]\n",
    "        one_y = self.labels[index]\n",
    "        return one_x, one_y\n",
    "\n",
    "    def __len__(self):\n",
    "        return self.labels.shape[0]\n",
    "\n",
    "train_ds = ToyDataset(X_train, y_train)\n",
    "test_ds = ToyDataset(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "id": "UPGVRuylep8Y"
   },
   "outputs": [],
   "source": [
    "from torch.utils.data import DataLoader\n",
    "\n",
    "torch.manual_seed(123)\n",
    "\n",
    "train_loader = DataLoader(\n",
    "    dataset=train_ds,\n",
    "    batch_size=2,\n",
    "    shuffle=True,\n",
    "    num_workers=1,\n",
    "    drop_last=True\n",
    ")\n",
    "\n",
    "test_loader = DataLoader(\n",
    "    dataset=test_ds,\n",
    "    batch_size=2,\n",
    "    shuffle=False,\n",
    "    num_workers=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "id": "drhg6IXofAXh"
   },
   "outputs": [],
   "source": [
    "class NeuralNetwork(torch.nn.Module):\n",
    "    def __init__(self, num_inputs, num_outputs):\n",
    "        super().__init__()\n",
    "\n",
    "        self.layers = torch.nn.Sequential(\n",
    "\n",
    "            # 1st hidden layer\n",
    "            torch.nn.Linear(num_inputs, 30),\n",
    "            torch.nn.ReLU(),\n",
    "\n",
    "            # 2nd hidden layer\n",
    "            torch.nn.Linear(30, 20),\n",
    "            torch.nn.ReLU(),\n",
    "\n",
    "            # output layer\n",
    "            torch.nn.Linear(20, num_outputs),\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        logits = self.layers(x)\n",
    "        return logits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "7jaS5sqPWCY0",
    "outputId": "84c74615-38f2-48b8-eeda-b5912fed1d3a"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75\n",
      "Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65\n",
      "Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44\n",
      "Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13\n",
      "Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03\n",
      "Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00\n"
     ]
    }
   ],
   "source": [
    "import torch.nn.functional as F\n",
    "\n",
    "\n",
    "torch.manual_seed(123)\n",
    "model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
    "\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # NEW\n",
    "model = model.to(device) # NEW\n",
    "\n",
    "optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
    "\n",
    "num_epochs = 3\n",
    "\n",
    "for epoch in range(num_epochs):\n",
    "\n",
    "    model.train()\n",
    "    for batch_idx, (features, labels) in enumerate(train_loader):\n",
    "\n",
    "        features, labels = features.to(device), labels.to(device) # NEW\n",
    "        logits = model(features)\n",
    "        loss = F.cross_entropy(logits, labels) # Loss function\n",
    "\n",
    "        optimizer.zero_grad()\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "\n",
    "        ### LOGGING\n",
    "        print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
    "              f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
    "              f\" | Train/Val Loss: {loss:.2f}\")\n",
    "\n",
    "    model.eval()\n",
    "    # Optional model evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "id": "4qrlmnPPe7FO"
   },
   "outputs": [],
   "source": [
    "def compute_accuracy(model, dataloader, device):\n",
    "\n",
    "    model = model.eval()\n",
    "    correct = 0.0\n",
    "    total_examples = 0\n",
    "\n",
    "    for idx, (features, labels) in enumerate(dataloader):\n",
    "\n",
    "        features, labels = features.to(device), labels.to(device) # New\n",
    "\n",
    "        with torch.no_grad():\n",
    "            logits = model(features)\n",
    "\n",
    "        predictions = torch.argmax(logits, dim=1)\n",
    "        compare = labels == predictions\n",
    "        correct += torch.sum(compare)\n",
    "        total_examples += len(compare)\n",
    "\n",
    "    return (correct / total_examples).item()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "1_-BfkfEf4HX",
    "outputId": "473bf21d-5880-4de3-fc8a-051d75315b94"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compute_accuracy(model, train_loader, device=device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "iYtXKBGEgKss",
    "outputId": "508edd84-3fb7-4d04-cb23-9df0c3d24170"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compute_accuracy(model, test_loader, device=device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### A.9.3 Training with multiple GPUs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "See [DDP-script.py](DDP-script.py)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/12.webp\" width=\"600px\">\n",
    "<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/13.webp\" width=\"600px\">"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuType": "T4",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
first sync 2023-07-23 13:18:13 -05:00			`{`
			`"cells": [`
Ch05 supplementary code (#81) 2024-03-19 09:26:26 -05:00			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
update formatting 2024-05-24 07:20:37 -05:00			`"<table style=\"width:100%\">\n",`
			`"<tr>\n",`
			`"<td style=\"vertical-align:middle; text-align:left;\">\n",`
			`"<font size=\"2\">\n",`
			`"Supplementary code for the <a href=\"http://mng.bz/orYv\">Build a Large Language Model From Scratch</a> book by <a href=\"https://sebastianraschka.com\">Sebastian Raschka</a><br>\n",`
			`"<br>Code repository: <a href=\"https://github.com/rasbt/LLMs-from-scratch\">https://github.com/rasbt/LLMs-from-scratch</a>\n",`
			`"</font>\n",`
			`"</td>\n",`
			`"<td style=\"vertical-align:middle; text-align:left;\">\n",`
			`"<a href=\"http://mng.bz/orYv\"><img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/cover-small.webp\" width=\"100px\"></a>\n",`
			`"</td>\n",`
			`"</tr>\n",`
			`"</table>\n"`
Ch05 supplementary code (#81) 2024-03-19 09:26:26 -05:00			`]`
			`},`
first sync 2023-07-23 13:18:13 -05:00			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "O9i6kzBsZVaZ"`
			`},`
			`"source": [`
restruture old ch02 into appendix A 2023-09-22 07:01:08 -05:00			`"# Appendix A: Introduction to PyTorch (Part 2)"`
first sync 2023-07-23 13:18:13 -05:00			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "ppbG5d-NZezH"`
			`},`
			`"source": [`
restruture old ch02 into appendix A 2023-09-22 07:01:08 -05:00			`"## A.9 Optimizing training performance with GPUs"`
first sync 2023-07-23 13:18:13 -05:00			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "6jH0J_DPZhbn"`
			`},`
			`"source": [`
restruture old ch02 into appendix A 2023-09-22 07:01:08 -05:00			`"### A.9.1 PyTorch computations on GPU devices"`
first sync 2023-07-23 13:18:13 -05:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "RM7kGhwMF_nO",`
			`"outputId": "ac60b048-b81f-4bb0-90fa-1ca474f04e9a"`
			`},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"2.0.1+cu118\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"import torch\n",`
			`"\n",`
			`"print(torch.__version__)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "OXLCKXhiUkZt",`
			`"outputId": "39fe5366-287e-47eb-cc34-3508d616c4f9"`
			`},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"True\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"print(torch.cuda.is_available())"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "MTTlfh53Va-T",`
			`"outputId": "f31d8bbe-577f-4db4-9939-02e66b9f96d1"`
			`},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"tensor([5., 7., 9.])"`
			`]`
			`},`
			`"execution_count": 3,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"tensor_1 = torch.tensor([1., 2., 3.])\n",`
			`"tensor_2 = torch.tensor([4., 5., 6.])\n",`
			`"\n",`
			`"print(tensor_1 + tensor_2)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 5,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "Z4LwTNw7Vmmb",`
			`"outputId": "1c025c6a-e3ed-4c7c-f5fd-86c14607036e"`
			`},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"tensor([5., 7., 9.], device='cuda:0')\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"tensor_1 = tensor_1.to(\"cuda\")\n",`
			`"tensor_2 = tensor_2.to(\"cuda\")\n",`
			`"\n",`
			`"print(tensor_1 + tensor_2)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 7,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/",`
			`"height": 184`
			`},`
			`"id": "tKT6URN1Vuft",`
			`"outputId": "e6f01e7f-d9cf-44cb-cc6d-46fc7907d5c0"`
			`},`
			`"outputs": [`
			`{`
			`"ename": "RuntimeError",`
			`"evalue": "ignored",`
			`"output_type": "error",`
			`"traceback": [`
			`"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",`
			`"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",`
			"\u001b[0;32m<ipython-input-7-4ff3c4d20fc3>\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
			`"\u001b[0;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"`
			`]`
			`}`
			`],`
			`"source": [`
			`"tensor_1 = tensor_1.to(\"cpu\")\n",`
			`"print(tensor_1 + tensor_2)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "c8j1cWDcWAMf"`
			`},`
			`"source": [`
minor updates 2024-03-29 20:42:32 -05:00			`"### A.9.2 Single-GPU training"`
first sync 2023-07-23 13:18:13 -05:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 8,`
			`"metadata": {`
			`"id": "GyY59cjieitv"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"X_train = torch.tensor([\n",`
			`" [-1.2, 3.1],\n",`
			`" [-0.9, 2.9],\n",`
			`" [-0.5, 2.6],\n",`
			`" [2.3, -1.1],\n",`
			`" [2.7, -1.5]\n",`
			`"])\n",`
			`"\n",`
			`"y_train = torch.tensor([0, 0, 0, 1, 1])\n",`
			`"\n",`
			`"X_test = torch.tensor([\n",`
			`" [-0.8, 2.8],\n",`
			`" [2.6, -1.6],\n",`
			`"])\n",`
			`"\n",`
			`"y_test = torch.tensor([0, 1])"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 9,`
			`"metadata": {`
			`"id": "v41gKqEJempa"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"from torch.utils.data import Dataset\n",`
			`"\n",`
			`"\n",`
			`"class ToyDataset(Dataset):\n",`
			`" def __init__(self, X, y):\n",`
			`" self.features = X\n",`
			`" self.labels = y\n",`
			`"\n",`
			`" def __getitem__(self, index):\n",`
			`" one_x = self.features[index]\n",`
			`" one_y = self.labels[index]\n",`
			`" return one_x, one_y\n",`
			`"\n",`
			`" def __len__(self):\n",`
			`" return self.labels.shape[0]\n",`
			`"\n",`
			`"train_ds = ToyDataset(X_train, y_train)\n",`
			`"test_ds = ToyDataset(X_test, y_test)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 23,`
			`"metadata": {`
			`"id": "UPGVRuylep8Y"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"from torch.utils.data import DataLoader\n",`
			`"\n",`
			`"torch.manual_seed(123)\n",`
			`"\n",`
			`"train_loader = DataLoader(\n",`
			`" dataset=train_ds,\n",`
			`" batch_size=2,\n",`
			`" shuffle=True,\n",`
			`" num_workers=1,\n",`
			`" drop_last=True\n",`
			`")\n",`
			`"\n",`
			`"test_loader = DataLoader(\n",`
			`" dataset=test_ds,\n",`
			`" batch_size=2,\n",`
			`" shuffle=False,\n",`
			`" num_workers=1\n",`
			`")"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 24,`
			`"metadata": {`
			`"id": "drhg6IXofAXh"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"class NeuralNetwork(torch.nn.Module):\n",`
			`" def __init__(self, num_inputs, num_outputs):\n",`
			`" super().__init__()\n",`
			`"\n",`
			`" self.layers = torch.nn.Sequential(\n",`
			`"\n",`
			`" # 1st hidden layer\n",`
			`" torch.nn.Linear(num_inputs, 30),\n",`
			`" torch.nn.ReLU(),\n",`
			`"\n",`
			`" # 2nd hidden layer\n",`
			`" torch.nn.Linear(30, 20),\n",`
			`" torch.nn.ReLU(),\n",`
			`"\n",`
			`" # output layer\n",`
			`" torch.nn.Linear(20, num_outputs),\n",`
			`" )\n",`
			`"\n",`
			`" def forward(self, x):\n",`
			`" logits = self.layers(x)\n",`
			`" return logits"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 25,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "7jaS5sqPWCY0",`
			`"outputId": "84c74615-38f2-48b8-eeda-b5912fed1d3a"`
			`},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Epoch: 001/003 \| Batch 000/002 \| Train/Val Loss: 0.75\n",`
			`"Epoch: 001/003 \| Batch 001/002 \| Train/Val Loss: 0.65\n",`
			`"Epoch: 002/003 \| Batch 000/002 \| Train/Val Loss: 0.44\n",`
			`"Epoch: 002/003 \| Batch 001/002 \| Train/Val Loss: 0.13\n",`
			`"Epoch: 003/003 \| Batch 000/002 \| Train/Val Loss: 0.03\n",`
			`"Epoch: 003/003 \| Batch 001/002 \| Train/Val Loss: 0.00\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"import torch.nn.functional as F\n",`
			`"\n",`
			`"\n",`
			`"torch.manual_seed(123)\n",`
			`"model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",`
			`"\n",`
			`"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # NEW\n",`
			`"model = model.to(device) # NEW\n",`
			`"\n",`
			`"optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",`
			`"\n",`
			`"num_epochs = 3\n",`
			`"\n",`
			`"for epoch in range(num_epochs):\n",`
			`"\n",`
			`" model.train()\n",`
			`" for batch_idx, (features, labels) in enumerate(train_loader):\n",`
			`"\n",`
			`" features, labels = features.to(device), labels.to(device) # NEW\n",`
			`" logits = model(features)\n",`
			`" loss = F.cross_entropy(logits, labels) # Loss function\n",`
			`"\n",`
			`" optimizer.zero_grad()\n",`
			`" loss.backward()\n",`
			`" optimizer.step()\n",`
			`"\n",`
			`" ### LOGGING\n",`
			`" print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",`
			`" f\" \| Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",`
			`" f\" \| Train/Val Loss: {loss:.2f}\")\n",`
			`"\n",`
			`" model.eval()\n",`
			`" # Optional model evaluation"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 26,`
			`"metadata": {`
			`"id": "4qrlmnPPe7FO"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"def compute_accuracy(model, dataloader, device):\n",`
			`"\n",`
			`" model = model.eval()\n",`
			`" correct = 0.0\n",`
			`" total_examples = 0\n",`
			`"\n",`
			`" for idx, (features, labels) in enumerate(dataloader):\n",`
			`"\n",`
			`" features, labels = features.to(device), labels.to(device) # New\n",`
			`"\n",`
			`" with torch.no_grad():\n",`
			`" logits = model(features)\n",`
			`"\n",`
			`" predictions = torch.argmax(logits, dim=1)\n",`
			`" compare = labels == predictions\n",`
			`" correct += torch.sum(compare)\n",`
			`" total_examples += len(compare)\n",`
			`"\n",`
			`" return (correct / total_examples).item()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 27,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "1_-BfkfEf4HX",`
			`"outputId": "473bf21d-5880-4de3-fc8a-051d75315b94"`
			`},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"1.0"`
			`]`
			`},`
			`"execution_count": 27,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"compute_accuracy(model, train_loader, device=device)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 21,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "iYtXKBGEgKss",`
			`"outputId": "508edd84-3fb7-4d04-cb23-9df0c3d24170"`
			`},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"1.0"`
			`]`
			`},`
			`"execution_count": 21,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"compute_accuracy(model, test_loader, device=device)"`
			`]`
minor updates 2024-03-29 20:42:32 -05:00			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"### A.9.3 Training with multiple GPUs"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"See [DDP-script.py](DDP-script.py)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/12.webp\" width=\"600px\">\n",`
			`"<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/13.webp\" width=\"600px\">"`
			`]`
first sync 2023-07-23 13:18:13 -05:00			`}`
			`],`
			`"metadata": {`
			`"accelerator": "GPU",`
			`"colab": {`
			`"gpuType": "T4",`
			`"provenance": []`
			`},`
			`"kernelspec": {`
			`"display_name": "Python 3 (ipykernel)",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
minor updates 2024-03-29 20:42:32 -05:00			`"version": "3.11.4"`
first sync 2023-07-23 13:18:13 -05:00			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 4`
			`}`