{ "epoch": 1.0, "eval_logits/chosen": -3.900073289871216, "eval_logits/rejected": -3.9142141342163086, "eval_logps/chosen": -293.32354736328125, "eval_logps/rejected": -187.59616088867188, "eval_loss": 0.6149587035179138, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.1575067937374115, "eval_rewards/diff": -2.2334296703338623, "eval_rewards/diff_abs": 2.236445188522339, "eval_rewards/rejected": -0.33032703399658203, "eval_rewards/student_margin": 0.17282025516033173, "eval_rewards/teacher_margin": 2.40625, "eval_runtime": 11.4547, "eval_samples": 1470, "eval_samples_per_second": 128.332, "eval_steps_per_second": 0.175, "train_loss": 0.6488963676806219, "train_runtime": 2900.3403, "train_samples": 147002, "train_samples_per_second": 50.684, "train_steps_per_second": 0.132 }