Merge pull request #1183 from baochi0212/master

Bug of BGE M3 training
This commit is contained in:
chaofan 2024-11-04 16:11:42 +08:00 committed by GitHub
commit a055d48886
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -221,7 +221,7 @@ class BGEM3Model(nn.Module):
if teacher_scores is not None:
# print("Use soft-label distillation...")
teacher_targets = F.softmax(teacher_scores, dim=-1) # B N
group_size = p_sparse_vecs.size(0) // q_sparse_vecs.size(0)
group_size = p_dense_vecs.size(0) // q_dense_vecs.size(0)
# dense loss
dense_scores = self.dense_score(q_dense_vecs, p_dense_vecs) # B, B * N