{ "epoch": 1.0, "eval_logits/chosen": -3.411515712738037, "eval_logits/rejected": -3.456860065460205, "eval_logps/chosen": -564.323974609375, "eval_logps/rejected": -567.8529052734375, "eval_loss": 0.7979298830032349, "eval_rewards/accuracies": 0.46875, "eval_rewards/chosen": 4.5177483558654785, "eval_rewards/diff": -0.33996284008026123, "eval_rewards/diff_abs": 1.2063032388687134, "eval_rewards/rejected": 4.610641002655029, "eval_rewards/student_margin": -0.09289252758026123, "eval_rewards/teacher_margin": 0.2470703125, "eval_runtime": 26.855, "eval_samples": 1543, "eval_samples_per_second": 57.457, "eval_steps_per_second": 0.149 }