{ "epoch": 3.0, "eval_logits/chosen": -2.9668068885803223, "eval_logits/rejected": -2.877352237701416, "eval_logps/chosen": -266.8033752441406, "eval_logps/rejected": -229.52769470214844, "eval_loss": 0.5254282355308533, "eval_rewards/accuracies": 0.7400000095367432, "eval_rewards/chosen": -0.12839017808437347, "eval_rewards/margins": 0.7881025671958923, "eval_rewards/rejected": -0.9164927005767822, "eval_runtime": 537.7252, "eval_samples": 2000, "eval_samples_per_second": 3.719, "eval_steps_per_second": 1.86, "train_loss": 0.5633651232629111, "train_runtime": 75701.2005, "train_samples": 61966, "train_samples_per_second": 2.456, "train_steps_per_second": 0.038 }