{ "epoch": 0.99, "eval_logits/chosen": 103.4466323852539, "eval_logits/rejected": 94.834228515625, "eval_logps/chosen": -781.325927734375, "eval_logps/rejected": -823.1282348632812, "eval_loss": 0.5184370875358582, "eval_rewards/accuracies": 0.7604166865348816, "eval_rewards/chosen": 0.5471681356430054, "eval_rewards/margins": 1.347060203552246, "eval_rewards/rejected": -0.7998921275138855, "eval_runtime": 51.097, "eval_samples": 750, "eval_samples_per_second": 14.678, "eval_steps_per_second": 0.47 }