{ | |
"epoch": 1.0, | |
"eval_logits/chosen": -3.900073289871216, | |
"eval_logits/rejected": -3.9142141342163086, | |
"eval_logps/chosen": -293.32354736328125, | |
"eval_logps/rejected": -187.59616088867188, | |
"eval_loss": 0.6149587035179138, | |
"eval_rewards/accuracies": 0.6875, | |
"eval_rewards/chosen": -0.1575067937374115, | |
"eval_rewards/diff": -2.2334296703338623, | |
"eval_rewards/diff_abs": 2.236445188522339, | |
"eval_rewards/rejected": -0.33032703399658203, | |
"eval_rewards/student_margin": 0.17282025516033173, | |
"eval_rewards/teacher_margin": 2.40625, | |
"eval_runtime": 11.4547, | |
"eval_samples": 1470, | |
"eval_samples_per_second": 128.332, | |
"eval_steps_per_second": 0.175 | |
} |