{ "epoch": 1.0, "eval_logits/chosen": -1.803659439086914, "eval_logits/rejected": -1.6524428129196167, "eval_logps/chosen": -296.7821350097656, "eval_logps/rejected": -284.1953430175781, "eval_loss": 0.7084454298019409, "eval_rewards/accuracies": 0.4640718698501587, "eval_rewards/chosen": -0.33870866894721985, "eval_rewards/margins": 0.03748469054698944, "eval_rewards/rejected": -0.3761933445930481, "eval_runtime": 1205.3044, "eval_samples": 2000, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277 }