{ "epoch": 1.0, "eval_logits/chosen": 1.789110541343689, "eval_logits/rejected": 2.363781213760376, "eval_logps/chosen": -477.48095703125, "eval_logps/rejected": -560.772216796875, "eval_loss": 0.5036382079124451, "eval_rewards/accuracies": 0.7294999957084656, "eval_rewards/chosen": -2.0892136096954346, "eval_rewards/margins": 1.0304385423660278, "eval_rewards/rejected": -3.119652509689331, "eval_runtime": 1346.3395, "eval_samples": 2000, "eval_samples_per_second": 1.486, "eval_steps_per_second": 0.186 }