From 0dbc203f66bf454e5396db9934a948a4ac9ba2c2 Mon Sep 17 00:00:00 2001 From: Jinge Wang Date: Sun, 15 Sep 2024 20:36:22 +0800 Subject: [PATCH] Fix 2 typos in 04_preferene-tuning-with-dpo (#356) --- ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb b/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb index d161e17..9513724 100644 --- a/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb +++ b/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb @@ -2774,7 +2774,7 @@ }, "source": [ "- As we can see above, the loss continues to improve, which is a good sign\n", - "- Based on the downward slope, one might be tempted to train the model a bit further (and readers are encouraged to try this), but not that DPO is prone to collapse, where the model may start generating nonsensical responses\n", + "- Based on the downward slope, one might be tempted to train the model a bit further (and readers are encouraged to try this), but note that DPO is prone to collapse, where the model may start generating nonsensical responses\n", "- Next, let's take a look at the reward margins:" ] }, @@ -2823,7 +2823,7 @@ }, "source": [ "- As we can see, and as it's desired, the reward margins improve; this mirrors the loss curve and is a good sign\n", - "- Note that DPO losses and reward margins are valuable metrics to track during training; however, they don't tell the whole store\n", + "- Note that DPO losses and reward margins are valuable metrics to track during training; however, they don't tell the whole story\n", "- Lastly, and most importantly, we have to conduct a qualitative check of the responses\n", "- Here, we will look at the response (in addition, you could use an LLM to score the responses similar to chapter 7)" ]