{ "epoch": 1.0, "eval_logits/chosen": -1.0169163942337036, "eval_logits/rejected": -0.8867645263671875, "eval_logps/chosen": -412.1428527832031, "eval_logps/rejected": -479.4647521972656, "eval_loss": 0.02242046222090721, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -0.18013788759708405, "eval_rewards/margins": 0.08771497756242752, "eval_rewards/rejected": -0.26785287261009216, "eval_runtime": 713.9076, "eval_samples": 2000, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401 }