Make saving more explicit in tutorial 2 (#95)

2025-12-29 16:08:38 +00:00 · 2020-05-06 12:13:49 +02:00 · 2020-05-06 12:13:49 +02:00 · f58f58fc86
commit f58f58fc86
parent 9437daba98
2 changed files with 47 additions and 3 deletions
--- a/tutorials/Tutorial2_Finetune_a_model_on_your_data.ipynb
+++ b/tutorials/Tutorial2_Finetune_a_model_on_your_data.ipynb
@ -56,7 +56,11 @@
    "\n",
    "Once you have collected training data, you can fine-tune your base models.\n",
    "We initialize a reader as a base model and fine-tune it on our own custom dataset (should be in SQuAD-like format).\n",
-    "We recommend using a base model that was trained on SQuAD or a similar QA dataset before to benefit from Transfer Learning effects."
+    "We recommend using a base model that was trained on SQuAD or a similar QA dataset before to benefit from Transfer Learning effects.\n",
+    "\n",
+    "**Recommendation: Run training on a GPU.\n",
+    "If you are using Colab: Enable this in the menu \"Runtime\" > \"Change Runtime type\" > Select \"GPU\" in dropdown.\n",
+    "Then change the `use_gpu` arguments below to `True`"
   ]
  },
  {
@ -86,11 +90,42 @@
    }
   ],
   "source": [
-    "reader = FARMReader(model_name_or_path=\"distilbert-base-uncased-distilled-squad\", use_gpu=False)\n",
+    "reader = FARMReader(model_name_or_path=\"distilbert-base-uncased-distilled-squad\", use_gpu=False, save_dir=\"my_model\")\n",
    "train_data = \"data/squad20\"\n",
    "# train_data = \"PATH/TO_YOUR/TRAIN_DATA\" \n",
    "reader.train(data_dir=train_data, train_filename=\"dev-v2.0.json\", use_gpu=False, n_epochs=1)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "# Saving the model happens automatically at the end of training into the `save_dir` you specified\n",
+    "# However, you could also save a reader manually again via:\n",
+    "reader.save(directory=\"my_model\")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "# If you want to load it at a later point, just do:\n",
+    "new_reader = FARMReader(model_name_or_path=\"my_model\")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
  }
 ],
 "metadata": {
--- a/tutorials/Tutorial2_Finetune_a_model_on_your_data.py
+++ b/tutorials/Tutorial2_Finetune_a_model_on_your_data.py
@ -32,7 +32,16 @@ from haystack.reader.farm import FARMReader
 # We recommend using a base model that was trained on SQuAD or a similar QA dataset before to benefit from Transfer
 # Learning effects.

+#**Recommendation: Run training on a GPU. To do so change the `use_gpu` arguments below to `True`
+
 reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=False)
 train_data = "data/squad20"
 # train_data = "PATH/TO_YOUR/TRAIN_DATA" 
-reader.train(data_dir=train_data, train_filename="dev-v2.0.json", use_gpu=False, n_epochs=1)
+reader.train(data_dir=train_data, train_filename="dev-v2.0.json", use_gpu=False, n_epochs=1, save_dir="my_model")
+
+# Saving the model happens automatically at the end of training into the `save_dir` you specified
+# However, you could also save a reader manually again via:
+reader.save(directory="my_model")
+
+# If you want to load it at a later point, just do:
+new_reader = FARMReader(model_name_or_path="my_model")