{ "epoch": 1.0, "eval_logits/chosen": -0.786741316318512, "eval_logits/rejected": -0.567284107208252, "eval_logps/chosen": -312.4064025878906, "eval_logps/rejected": -771.6371459960938, "eval_loss": 0.27778080105781555, "eval_rewards/accuracies": 0.949999988079071, "eval_rewards/chosen": -0.10158518701791763, "eval_rewards/margins": 2.0500597953796387, "eval_rewards/rejected": -2.1516449451446533, "eval_runtime": 15.9542, "eval_samples": 152, "eval_samples_per_second": 9.527, "eval_steps_per_second": 0.313 }