diff --git a/ch05/02_alternative_weight_loading/weight-loading-hf-transformers.ipynb b/ch05/02_alternative_weight_loading/weight-loading-hf-transformers.ipynb index 7561bf6..0f92a87 100644 --- a/ch05/02_alternative_weight_loading/weight-loading-hf-transformers.ipynb +++ b/ch05/02_alternative_weight_loading/weight-loading-hf-transformers.ipynb @@ -172,7 +172,7 @@ "def assign_check(left, right):\n", " if left.shape != right.shape:\n", " raise ValueError(f\"Shape mismatch. Left: {left.shape}, Right: {right.shape}\")\n", - " return torch.nn.Parameter(torch.tensor(right))" + " return torch.nn.Parameter(right.clone().detach())" ] }, { @@ -227,16 +227,7 @@ "execution_count": 7, "id": "cda44d37-92c0-4c19-a70a-15711513afce", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_9385/3877979348.py:4: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", - " return torch.nn.Parameter(torch.tensor(right))\n" - ] - } - ], + "outputs": [], "source": [ "import torch\n", "from previous_chapters import GPTModel\n", @@ -250,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "4ddd0d51-3ade-4890-9bab-d63f141d095f", "metadata": {}, "outputs": [ @@ -302,7 +293,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/ch06/02_bonus_additional-experiments/additional-experiments.py b/ch06/02_bonus_additional-experiments/additional-experiments.py index f3217ed..1ba1ae5 100644 --- a/ch06/02_bonus_additional-experiments/additional-experiments.py +++ b/ch06/02_bonus_additional-experiments/additional-experiments.py @@ -117,7 +117,7 @@ def random_split(df, train_frac, validation_frac): return train_df, validation_df, test_df -def create_dataset_csvs(data_file_path): +def create_dataset_csvs(new_file_path): df = pd.read_csv(new_file_path, sep="\t", header=None, names=["Label", "Text"]) # Create balanced dataset