|
{ |
|
"epoch": 1.0, |
|
"eval_dpo_losses": 0.6446115970611572, |
|
"eval_logits/chosen": -2.713839530944824, |
|
"eval_logits/rejected": -2.6717445850372314, |
|
"eval_logps/chosen": -284.5663146972656, |
|
"eval_logps/rejected": -271.5415954589844, |
|
"eval_loss": 1.616790771484375, |
|
"eval_positive_losses": 8.828282356262207, |
|
"eval_rewards/accuracies": 0.6499999761581421, |
|
"eval_rewards/chosen": 0.0002709717955440283, |
|
"eval_rewards/margins": 0.12989762425422668, |
|
"eval_rewards/margins_max": 0.6433730721473694, |
|
"eval_rewards/margins_min": -0.34936216473579407, |
|
"eval_rewards/margins_std": 0.33273470401763916, |
|
"eval_rewards/rejected": -0.12962664663791656, |
|
"eval_runtime": 428.5272, |
|
"eval_samples": 2000, |
|
"eval_samples_per_second": 4.667, |
|
"eval_steps_per_second": 0.292, |
|
"train_loss": 0.5743894765074824, |
|
"train_runtime": 4311.1014, |
|
"train_samples": 5678, |
|
"train_samples_per_second": 1.317, |
|
"train_steps_per_second": 0.082 |
|
} |