{ "epoch": 2.0, "eval_logits/chosen": 0.7331738471984863, "eval_logits/rejected": 0.7520135045051575, "eval_logps/chosen": -93.26776885986328, "eval_logps/rejected": -96.37944030761719, "eval_loss": 0.00928194634616375, "eval_rewards/accuracies": 0.3499999940395355, "eval_rewards/chosen": -0.015403981320559978, "eval_rewards/margins": 0.006420300807803869, "eval_rewards/rejected": -0.02182428352534771, "eval_runtime": 272.4398, "eval_samples": 2000, "eval_samples_per_second": 7.341, "eval_steps_per_second": 0.459 }