mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-06 12:07:04 +00:00
Fix DPR training batch size (#898)
* Adjust batch size * Add latest docstring and tutorial changes * Update training results * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
992277e812
commit
24d0c4d42d
@ -174,8 +174,8 @@ retriever = DensePassageRetriever(
|
||||
|
||||
Let's start training and save our trained model!
|
||||
|
||||
On a V100 GPU, you can fit up to batch size 4 so we set gradient accumulation steps to 4 in order
|
||||
to simulate the batch size 16 of the original DPR experiment.
|
||||
On a V100 GPU, you can fit up to batch size 16 so we set gradient accumulation steps to 8 in order
|
||||
to simulate the batch size 128 of the original DPR experiment.
|
||||
|
||||
When `embed_title=True`, the document title is prepended to the input text sequence with a `[SEP]` token
|
||||
between it and document text.
|
||||
@ -183,11 +183,22 @@ between it and document text.
|
||||
When training from scratch with the above variables, 1 epoch takes around an hour and we reached the following performance:
|
||||
|
||||
```
|
||||
loss: 0.09334952129693501
|
||||
acc: 0.984035000191887
|
||||
f1: 0.936147352264006
|
||||
acc_and_f1: 0.9600911762279465
|
||||
average_rank: 0.07075978511128166
|
||||
loss: 0.046580662854042276
|
||||
task_name: text_similarity
|
||||
acc: 0.992524064068483
|
||||
f1: 0.8804297774366846
|
||||
acc_and_f1: 0.9364769207525838
|
||||
average_rank: 0.19631619339984652
|
||||
report:
|
||||
precision recall f1-score support
|
||||
|
||||
hard_negative 0.9961 0.9961 0.9961 201887
|
||||
positive 0.8804 0.8804 0.8804 6515
|
||||
|
||||
accuracy 0.9925 208402
|
||||
macro avg 0.9383 0.9383 0.9383 208402
|
||||
weighted avg 0.9925 0.9925 0.9925 208402
|
||||
|
||||
```
|
||||
|
||||
|
||||
@ -200,8 +211,8 @@ retriever.train(
|
||||
dev_filename=dev_filename,
|
||||
test_filename=dev_filename,
|
||||
n_epochs=1,
|
||||
batch_size=4,
|
||||
grad_acc_steps=4,
|
||||
batch_size=16,
|
||||
grad_acc_steps=8,
|
||||
save_dir=save_dir,
|
||||
evaluate_every=3000,
|
||||
embed_title=True,
|
||||
|
||||
@ -294,8 +294,8 @@
|
||||
"\n",
|
||||
"Let's start training and save our trained model!\n",
|
||||
"\n",
|
||||
"On a V100 GPU, you can fit up to batch size 4 so we set gradient accumulation steps to 4 in order\n",
|
||||
"to simulate the batch size 16 of the original DPR experiment.\n",
|
||||
"On a V100 GPU, you can fit up to batch size 16 so we set gradient accumulation steps to 8 in order\n",
|
||||
"to simulate the batch size 128 of the original DPR experiment.\n",
|
||||
"\n",
|
||||
"When `embed_title=True`, the document title is prepended to the input text sequence with a `[SEP]` token\n",
|
||||
"between it and document text."
|
||||
@ -313,11 +313,22 @@
|
||||
"When training from scratch with the above variables, 1 epoch takes around an hour and we reached the following performance:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"loss: 0.09334952129693501\n",
|
||||
"acc: 0.984035000191887\n",
|
||||
"f1: 0.936147352264006\n",
|
||||
"acc_and_f1: 0.9600911762279465\n",
|
||||
"average_rank: 0.07075978511128166\n",
|
||||
"loss: 0.046580662854042276\n",
|
||||
"task_name: text_similarity\n",
|
||||
"acc: 0.992524064068483\n",
|
||||
"f1: 0.8804297774366846\n",
|
||||
"acc_and_f1: 0.9364769207525838\n",
|
||||
"average_rank: 0.19631619339984652\n",
|
||||
"report:\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
"hard_negative 0.9961 0.9961 0.9961 201887\n",
|
||||
" positive 0.8804 0.8804 0.8804 6515\n",
|
||||
"\n",
|
||||
" accuracy 0.9925 208402\n",
|
||||
" macro avg 0.9383 0.9383 0.9383 208402\n",
|
||||
" weighted avg 0.9925 0.9925 0.9925 208402\n",
|
||||
"\n",
|
||||
"```"
|
||||
],
|
||||
"metadata": {
|
||||
@ -340,8 +351,8 @@
|
||||
" dev_filename=dev_filename,\n",
|
||||
" test_filename=dev_filename,\n",
|
||||
" n_epochs=1,\n",
|
||||
" batch_size=4,\n",
|
||||
" grad_acc_steps=4,\n",
|
||||
" batch_size=16,\n",
|
||||
" grad_acc_steps=8,\n",
|
||||
" save_dir=save_dir,\n",
|
||||
" evaluate_every=3000,\n",
|
||||
" embed_title=True,\n",
|
||||
|
||||
@ -65,8 +65,8 @@ def tutorial9_dpr_training():
|
||||
dev_filename=dev_filename,
|
||||
test_filename=dev_filename,
|
||||
n_epochs=1,
|
||||
batch_size=4,
|
||||
grad_acc_steps=4,
|
||||
batch_size=16,
|
||||
grad_acc_steps=8,
|
||||
save_dir=save_dir,
|
||||
evaluate_every=3000,
|
||||
embed_title=True,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user