{ "epoch": 0.9988571428571429, "eval_logits/chosen": -0.5392131209373474, "eval_logits/rejected": 0.8578556776046753, "eval_logps/chosen": -395.783447265625, "eval_logps/rejected": -463.0950012207031, "eval_loss": 0.5202247500419617, "eval_rewards/accuracies": 0.7715517282485962, "eval_rewards/chosen": -1.2017946243286133, "eval_rewards/margins": 1.2383939027786255, "eval_rewards/rejected": -2.440188407897949, "eval_runtime": 91.4513, "eval_samples": 1831, "eval_samples_per_second": 20.022, "eval_steps_per_second": 0.317 }