{ "epoch": 1.0, "eval_logits/chosen": 0.9743706583976746, "eval_logits/rejected": 0.6065909266471863, "eval_logps/chosen": -139.52996826171875, "eval_logps/rejected": -142.2351837158203, "eval_loss": 0.01678086631000042, "eval_rewards/accuracies": 0.9956116080284119, "eval_rewards/chosen": -1.5749770402908325, "eval_rewards/margins": 9.825251579284668, "eval_rewards/rejected": -11.400227546691895, "eval_runtime": 379.7007, "eval_samples_per_second": 9.6, "eval_steps_per_second": 4.801 }