This commit is contained in:
Jake Poznanski 2025-08-21 17:17:43 +00:00
parent 892429965a
commit 5556e204cf

View File

@ -170,10 +170,11 @@ class OlmOCRDataset(Dataset):
# Return None if processing fails # Return None if processing fails
return None return None
def simple_length_reward(completions_ids, **kwargs): def simple_length_reward(**kwargs):
"""Reward function that assigns higher scores to longer completions (in terms of token count).""" """Reward function that assigns higher scores to longer completions (in terms of token count)."""
logger.info(f"Reward function called {kwargs}") logger.info(f"Reward function called {kwargs}")
return [float(len(ids)) for ids in completions_ids] # return [float(len(ids)) for ids in completions_ids]
return random.choice([0.1, 0.5])
def main(): def main():