{ "epoch": 1.0, "eval_logits/chosen": 1.0124144554138184, "eval_logits/rejected": 1.7555558681488037, "eval_logps/chosen": -594.8843383789062, "eval_logps/rejected": -706.2915649414062, "eval_loss": 0.4887983202934265, "eval_rewards/accuracies": 0.7509999871253967, "eval_rewards/chosen": -3.302633047103882, "eval_rewards/margins": 1.3144937753677368, "eval_rewards/rejected": -4.61712646484375, "eval_runtime": 1589.7118, "eval_samples": 2000, "eval_samples_per_second": 1.258, "eval_steps_per_second": 0.315 }