{ "epoch": 2.0, "eval_logits/chosen": -2.4070470333099365, "eval_logits/rejected": -2.210773468017578, "eval_logps/chosen": -274.3865966796875, "eval_logps/rejected": -249.44677734375, "eval_loss": 0.0258334930986166, "eval_rewards/accuracies": 0.33799999952316284, "eval_rewards/chosen": -0.05798804759979248, "eval_rewards/margins": -0.05191566422581673, "eval_rewards/rejected": -0.006072388496249914, "eval_runtime": 1420.6371, "eval_samples": 2000, "eval_samples_per_second": 1.408, "eval_steps_per_second": 0.704 }