{ "epoch": 0.99, "eval_logits/chosen": 106.95464324951172, "eval_logits/rejected": 98.64165496826172, "eval_logps/chosen": -782.9426879882812, "eval_logps/rejected": -810.2578125, "eval_loss": 1.7132372856140137, "eval_rewards/accuracies": 0.7291666865348816, "eval_rewards/chosen": 3.7306249141693115, "eval_rewards/margins": 4.981531620025635, "eval_rewards/rejected": -1.2509063482284546, "eval_runtime": 48.4896, "eval_samples": 750, "eval_samples_per_second": 15.467, "eval_steps_per_second": 0.495 }