{ "epoch": 1.0, "eval_logits/chosen": 1.6140714883804321, "eval_logits/rejected": 2.3821487426757812, "eval_logps/chosen": -368.41351318359375, "eval_logps/rejected": -468.14495849609375, "eval_loss": 0.5042223334312439, "eval_rewards/accuracies": 0.75390625, "eval_rewards/chosen": -1.049981713294983, "eval_rewards/margins": 0.9979785680770874, "eval_rewards/rejected": -2.0479602813720703, "eval_runtime": 91.1991, "eval_samples": 2000, "eval_samples_per_second": 21.93, "eval_steps_per_second": 0.351 }