{ "epoch": 1.0, "eval_logits/chosen": -0.8900327086448669, "eval_logits/rejected": -0.8877933025360107, "eval_logps/chosen": -986.1032104492188, "eval_logps/rejected": -1205.3543701171875, "eval_loss": 0.5355749130249023, "eval_rewards/accuracies": 0.692307710647583, "eval_rewards/chosen": -0.28710055351257324, "eval_rewards/margins": 0.4889349639415741, "eval_rewards/rejected": -0.7760356068611145, "eval_runtime": 47.8688, "eval_samples": 416, "eval_samples_per_second": 8.69, "eval_steps_per_second": 0.272 }