{ "epoch": 1.0, "eval_logits/chosen": 83.16413879394531, "eval_logits/rejected": 83.84245300292969, "eval_logps/chosen": -392.341552734375, "eval_logps/rejected": -414.519775390625, "eval_loss": 0.40556877851486206, "eval_rewards/accuracies": 0.792553186416626, "eval_rewards/chosen": -0.3995126485824585, "eval_rewards/margins": 3.1725716590881348, "eval_rewards/rejected": -3.5720841884613037, "eval_runtime": 140.3273, "eval_samples": 3000, "eval_samples_per_second": 21.379, "eval_steps_per_second": 0.67 }