{ "epoch": 1.0, "eval_logits/chosen": -3.0808565616607666, "eval_logits/rejected": -2.90731143951416, "eval_logps/chosen": -808.923095703125, "eval_logps/rejected": -1705.3643798828125, "eval_loss": 0.3800393342971802, "eval_rewards/accuracies": 0.8601694703102112, "eval_rewards/chosen": -2.4212381839752197, "eval_rewards/margins": 4.509303569793701, "eval_rewards/rejected": -6.9305419921875, "eval_runtime": 193.7756, "eval_samples": 1868, "eval_samples_per_second": 9.64, "eval_steps_per_second": 0.304 }