Make saving more explicit in tutorial 2 (#95)

This commit is contained in:
Malte Pietsch 2020-05-06 12:13:49 +02:00 committed by GitHub
parent 9437daba98
commit f58f58fc86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 3 deletions

View File

@ -56,7 +56,11 @@
"\n",
"Once you have collected training data, you can fine-tune your base models.\n",
"We initialize a reader as a base model and fine-tune it on our own custom dataset (should be in SQuAD-like format).\n",
"We recommend using a base model that was trained on SQuAD or a similar QA dataset before to benefit from Transfer Learning effects."
"We recommend using a base model that was trained on SQuAD or a similar QA dataset before to benefit from Transfer Learning effects.\n",
"\n",
"**Recommendation: Run training on a GPU.\n",
"If you are using Colab: Enable this in the menu \"Runtime\" > \"Change Runtime type\" > Select \"GPU\" in dropdown.\n",
"Then change the `use_gpu` arguments below to `True`"
]
},
{
@ -86,11 +90,42 @@
}
],
"source": [
"reader = FARMReader(model_name_or_path=\"distilbert-base-uncased-distilled-squad\", use_gpu=False)\n",
"reader = FARMReader(model_name_or_path=\"distilbert-base-uncased-distilled-squad\", use_gpu=False, save_dir=\"my_model\")\n",
"train_data = \"data/squad20\"\n",
"# train_data = \"PATH/TO_YOUR/TRAIN_DATA\" \n",
"reader.train(data_dir=train_data, train_filename=\"dev-v2.0.json\", use_gpu=False, n_epochs=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# Saving the model happens automatically at the end of training into the `save_dir` you specified\n",
"# However, you could also save a reader manually again via:\n",
"reader.save(directory=\"my_model\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# If you want to load it at a later point, just do:\n",
"new_reader = FARMReader(model_name_or_path=\"my_model\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {

View File

@ -32,7 +32,16 @@ from haystack.reader.farm import FARMReader
# We recommend using a base model that was trained on SQuAD or a similar QA dataset before to benefit from Transfer
# Learning effects.
#**Recommendation: Run training on a GPU. To do so change the `use_gpu` arguments below to `True`
reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=False)
train_data = "data/squad20"
# train_data = "PATH/TO_YOUR/TRAIN_DATA"
reader.train(data_dir=train_data, train_filename="dev-v2.0.json", use_gpu=False, n_epochs=1)
reader.train(data_dir=train_data, train_filename="dev-v2.0.json", use_gpu=False, n_epochs=1, save_dir="my_model")
# Saving the model happens automatically at the end of training into the `save_dir` you specified
# However, you could also save a reader manually again via:
reader.save(directory="my_model")
# If you want to load it at a later point, just do:
new_reader = FARMReader(model_name_or_path="my_model")