{ "epoch": 1.0, "eval_logits/chosen": -2.444389581680298, "eval_logits/rejected": -2.4428083896636963, "eval_logps/chosen": -65.44485473632812, "eval_logps/rejected": -71.30088806152344, "eval_loss": 0.68132084608078, "eval_rewards/accuracies": 0.2919999957084656, "eval_rewards/chosen": -0.0008568476187065244, "eval_rewards/margins": 0.02430289424955845, "eval_rewards/rejected": -0.025159740820527077, "eval_runtime": 376.3354, "eval_samples": 2000, "eval_samples_per_second": 5.314, "eval_steps_per_second": 0.664 }