{ "epoch": 1.971563981042654, "eval_logits/chosen": 92.7257308959961, "eval_logits/rejected": 86.90755462646484, "eval_logps/chosen": -470.2870178222656, "eval_logps/rejected": -508.6237487792969, "eval_loss": 0.2685789465904236, "eval_rewards/accuracies": 0.7291666865348816, "eval_rewards/chosen": -5.330941677093506, "eval_rewards/margins": 1.9966983795166016, "eval_rewards/rejected": -7.327640533447266, "eval_runtime": 118.8627, "eval_samples": 750, "eval_samples_per_second": 6.31, "eval_steps_per_second": 0.202 }