{ "epoch": 1.0, "eval_logits/chosen": -0.4874803125858307, "eval_logits/rejected": -0.5277639627456665, "eval_logps/chosen": -418.41363525390625, "eval_logps/rejected": -439.39300537109375, "eval_loss": 0.5796785354614258, "eval_rewards/accuracies": 0.716269850730896, "eval_rewards/chosen": -0.7179543972015381, "eval_rewards/margins": 0.5342229008674622, "eval_rewards/rejected": -1.2521772384643555, "eval_runtime": 305.3815, "eval_samples": 2000, "eval_samples_per_second": 6.549, "eval_steps_per_second": 0.206 }