{ "epoch": 1.0, "eval_logits/chosen": -1.0555651187896729, "eval_logits/rejected": -0.9260634779930115, "eval_logps/chosen": -431.970947265625, "eval_logps/rejected": -520.1134643554688, "eval_loss": 0.48531633615493774, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -1.9996598958969116, "eval_rewards/margins": 1.0853556394577026, "eval_rewards/rejected": -3.085015296936035, "eval_runtime": 730.1693, "eval_samples": 2000, "eval_samples_per_second": 2.739, "eval_steps_per_second": 1.37 }