|
{ |
|
"epoch": 3.0, |
|
"eval_dpo_losses": 0.6309279203414917, |
|
"eval_logits/chosen": -2.586883544921875, |
|
"eval_logits/rejected": -2.536296844482422, |
|
"eval_logps/chosen": -330.7057189941406, |
|
"eval_logps/rejected": -333.7965087890625, |
|
"eval_loss": 5.204337120056152, |
|
"eval_positive_losses": 49.692630767822266, |
|
"eval_rewards/accuracies": 0.670634925365448, |
|
"eval_rewards/chosen": -0.4548453986644745, |
|
"eval_rewards/margins": 0.2912927269935608, |
|
"eval_rewards/margins_max": 1.0734634399414062, |
|
"eval_rewards/margins_min": -0.5345237851142883, |
|
"eval_rewards/margins_std": 0.7254678010940552, |
|
"eval_rewards/rejected": -0.7461380958557129, |
|
"eval_runtime": 284.396, |
|
"eval_samples": 2000, |
|
"eval_samples_per_second": 7.032, |
|
"eval_steps_per_second": 0.222, |
|
"train_loss": 0.48024289137880566, |
|
"train_runtime": 8933.1726, |
|
"train_samples": 5678, |
|
"train_samples_per_second": 1.907, |
|
"train_steps_per_second": 0.119 |
|
} |