{ "epoch": 1.0, "eval_logits/chosen": 0.9053679704666138, "eval_logits/rejected": 1.7481720447540283, "eval_logps/chosen": -228.00465393066406, "eval_logps/rejected": -198.00367736816406, "eval_loss": 4449.90234375, "eval_rewards/accuracies": 0.616104006767273, "eval_rewards/chosen": -0.8765509724617004, "eval_rewards/margins": 0.08215557038784027, "eval_rewards/rejected": -0.9587064981460571, "eval_rewards/safe_rewards": -0.865267813205719, "eval_rewards/unsafe_rewards": -0.860846996307373, "eval_runtime": 2354.4025, "eval_samples": 35044, "eval_samples_per_second": 14.884, "eval_steps_per_second": 0.466 }