{ "epoch": 1.0, "eval_logits/chosen": -2.13159441947937, "eval_logits/rejected": -2.0261080265045166, "eval_logps/chosen": -353.0312805175781, "eval_logps/rejected": -402.0422058105469, "eval_loss": 0.5784093141555786, "eval_rewards/accuracies": 0.6890000104904175, "eval_rewards/chosen": -0.8136823177337646, "eval_rewards/margins": 0.5943145751953125, "eval_rewards/margins_max": 2.7059268951416016, "eval_rewards/margins_min": -0.7893158197402954, "eval_rewards/margins_std": 1.1535371541976929, "eval_rewards/rejected": -1.4079967737197876, "eval_runtime": 1498.5961, "eval_samples": 4000, "eval_samples_per_second": 2.669, "eval_steps_per_second": 0.167 }