{ "epoch": 2.0, "eval_logits/chosen": 0.6308508515357971, "eval_logits/rejected": 0.7334519028663635, "eval_logps/chosen": -285.9486999511719, "eval_logps/rejected": -269.1014404296875, "eval_loss": 0.6537346243858337, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.2570453882217407, "eval_rewards/margins": 0.11960798501968384, "eval_rewards/rejected": -0.37665337324142456, "eval_runtime": 325.7825, "eval_samples": 2000, "eval_samples_per_second": 6.139, "eval_steps_per_second": 0.384, "train_loss": 0.6680307045922589, "train_runtime": 18174.3674, "train_samples": 30567, "train_samples_per_second": 3.364, "train_steps_per_second": 0.052 }