mirror of
				https://github.com/rasbt/LLMs-from-scratch.git
				synced 2025-10-31 18:00:08 +00:00 
			
		
		
		
	
		
			
	
	
		
			177 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			177 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | { | ||
|  |  "cells": [ | ||
|  |   { | ||
|  |    "cell_type": "markdown", | ||
|  |    "metadata": {}, | ||
|  |    "source": [ | ||
|  |     "## Exercise A.3" | ||
|  |    ] | ||
|  |   }, | ||
|  |   { | ||
|  |    "cell_type": "code", | ||
|  |    "execution_count": 2, | ||
|  |    "metadata": {}, | ||
|  |    "outputs": [], | ||
|  |    "source": [ | ||
|  |     "import torch\n", | ||
|  |     "\n", | ||
|  |     "class NeuralNetwork(torch.nn.Module):\n", | ||
|  |     "    def __init__(self, num_inputs, num_outputs):\n", | ||
|  |     "        super().__init__()\n", | ||
|  |     "\n", | ||
|  |     "        self.layers = torch.nn.Sequential(\n", | ||
|  |     "                \n", | ||
|  |     "            # 1st hidden layer\n", | ||
|  |     "            torch.nn.Linear(num_inputs, 30),\n", | ||
|  |     "            torch.nn.ReLU(),\n", | ||
|  |     "\n", | ||
|  |     "            # 2nd hidden layer\n", | ||
|  |     "            torch.nn.Linear(30, 20),\n", | ||
|  |     "            torch.nn.ReLU(),\n", | ||
|  |     "\n", | ||
|  |     "            # output layer\n", | ||
|  |     "            torch.nn.Linear(20, num_outputs),\n", | ||
|  |     "        )\n", | ||
|  |     "\n", | ||
|  |     "    def forward(self, x):\n", | ||
|  |     "        logits = self.layers(x)\n", | ||
|  |     "        return logits" | ||
|  |    ] | ||
|  |   }, | ||
|  |   { | ||
|  |    "cell_type": "code", | ||
|  |    "execution_count": 3, | ||
|  |    "metadata": {}, | ||
|  |    "outputs": [ | ||
|  |     { | ||
|  |      "name": "stdout", | ||
|  |      "output_type": "stream", | ||
|  |      "text": [ | ||
|  |       "Total number of trainable model parameters: 752\n" | ||
|  |      ] | ||
|  |     } | ||
|  |    ], | ||
|  |    "source": [ | ||
|  |     "model = NeuralNetwork(2, 2)\n", | ||
|  |     "\n", | ||
|  |     "num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n", | ||
|  |     "print(\"Total number of trainable model parameters:\", num_params)" | ||
|  |    ] | ||
|  |   }, | ||
|  |   { | ||
|  |    "cell_type": "markdown", | ||
|  |    "metadata": {}, | ||
|  |    "source": [ | ||
|  |     "## Exercise A.4" | ||
|  |    ] | ||
|  |   }, | ||
|  |   { | ||
|  |    "cell_type": "code", | ||
|  |    "execution_count": 1, | ||
|  |    "metadata": { | ||
|  |     "id": "qGgnamiyLJxp" | ||
|  |    }, | ||
|  |    "outputs": [], | ||
|  |    "source": [ | ||
|  |     "import torch\n", | ||
|  |     "\n", | ||
|  |     "a = torch.rand(100, 200)\n", | ||
|  |     "b = torch.rand(200, 300)" | ||
|  |    ] | ||
|  |   }, | ||
|  |   { | ||
|  |    "cell_type": "code", | ||
|  |    "execution_count": 2, | ||
|  |    "metadata": { | ||
|  |     "colab": { | ||
|  |      "base_uri": "https://localhost:8080/" | ||
|  |     }, | ||
|  |     "id": "CvGvIeVkLzXE", | ||
|  |     "outputId": "44d027be-0787-4348-9c06-4e559d94d0e1" | ||
|  |    }, | ||
|  |    "outputs": [ | ||
|  |     { | ||
|  |      "name": "stdout", | ||
|  |      "output_type": "stream", | ||
|  |      "text": [ | ||
|  |       "63.8 µs ± 8.7 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" | ||
|  |      ] | ||
|  |     } | ||
|  |    ], | ||
|  |    "source": [ | ||
|  |     "%timeit a @ b" | ||
|  |    ] | ||
|  |   }, | ||
|  |   { | ||
|  |    "cell_type": "code", | ||
|  |    "execution_count": 3, | ||
|  |    "metadata": { | ||
|  |     "id": "OmRtZLa9L2ZG" | ||
|  |    }, | ||
|  |    "outputs": [], | ||
|  |    "source": [ | ||
|  |     "a, b = a.to(\"cuda\"), b.to(\"cuda\")" | ||
|  |    ] | ||
|  |   }, | ||
|  |   { | ||
|  |    "cell_type": "code", | ||
|  |    "execution_count": 4, | ||
|  |    "metadata": { | ||
|  |     "colab": { | ||
|  |      "base_uri": "https://localhost:8080/" | ||
|  |     }, | ||
|  |     "id": "duLEhXDPL6k0", | ||
|  |     "outputId": "3486471d-fd62-446f-9855-2d01f41fd101" | ||
|  |    }, | ||
|  |    "outputs": [ | ||
|  |     { | ||
|  |      "name": "stdout", | ||
|  |      "output_type": "stream", | ||
|  |      "text": [ | ||
|  |       "13.8 µs ± 425 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" | ||
|  |      ] | ||
|  |     } | ||
|  |    ], | ||
|  |    "source": [ | ||
|  |     "%timeit a @ b" | ||
|  |    ] | ||
|  |   }, | ||
|  |   { | ||
|  |    "cell_type": "code", | ||
|  |    "execution_count": null, | ||
|  |    "metadata": { | ||
|  |     "id": "Zqqa-To2L749" | ||
|  |    }, | ||
|  |    "outputs": [], | ||
|  |    "source": [] | ||
|  |   } | ||
|  |  ], | ||
|  |  "metadata": { | ||
|  |   "accelerator": "GPU", | ||
|  |   "colab": { | ||
|  |    "gpuType": "V100", | ||
|  |    "machine_shape": "hm", | ||
|  |    "provenance": [] | ||
|  |   }, | ||
|  |   "kernelspec": { | ||
|  |    "display_name": "Python 3 (ipykernel)", | ||
|  |    "language": "python", | ||
|  |    "name": "python3" | ||
|  |   }, | ||
|  |   "language_info": { | ||
|  |    "codemirror_mode": { | ||
|  |     "name": "ipython", | ||
|  |     "version": 3 | ||
|  |    }, | ||
|  |    "file_extension": ".py", | ||
|  |    "mimetype": "text/x-python", | ||
|  |    "name": "python", | ||
|  |    "nbconvert_exporter": "python", | ||
|  |    "pygments_lexer": "ipython3", | ||
|  |    "version": "3.10.6" | ||
|  |   } | ||
|  |  }, | ||
|  |  "nbformat": 4, | ||
|  |  "nbformat_minor": 4 | ||
|  | } |