{ "epoch": 3.97, "eval_logits/chosen": 0.8417125940322876, "eval_logits/rejected": 0.8990198373794556, "eval_logps/chosen": -256.497314453125, "eval_logps/rejected": -233.4380340576172, "eval_loss": 0.0003914303961209953, "eval_rewards/accuracies": 0.49950000643730164, "eval_rewards/chosen": 0.001168824383057654, "eval_rewards/margins": 0.00016647118900436908, "eval_rewards/rejected": 0.001002353266812861, "eval_runtime": 491.0166, "eval_samples": 2000, "eval_samples_per_second": 4.073, "eval_steps_per_second": 1.018 }