{ "epoch": 9.95, "eval_logits/chosen": 15.895672798156738, "eval_logits/rejected": 15.476293563842773, "eval_logps/chosen": -503.22802734375, "eval_logps/rejected": -549.6417846679688, "eval_loss": 0.581115186214447, "eval_rewards/accuracies": 0.796407163143158, "eval_rewards/chosen": -0.5595874786376953, "eval_rewards/margins": 0.3732680380344391, "eval_rewards/rejected": -0.932855486869812, "eval_runtime": 55.3448, "eval_samples": 994, "eval_samples_per_second": 18.069, "eval_steps_per_second": 3.017 }