{ "epoch": 0.9988571428571429, "eval_logits/chosen": -0.5392131209373474, "eval_logits/rejected": 0.8578556776046753, "eval_logps/chosen": -395.783447265625, "eval_logps/rejected": -463.0950012207031, "eval_loss": 0.5202247500419617, "eval_rewards/accuracies": 0.7715517282485962, "eval_rewards/chosen": -1.2017946243286133, "eval_rewards/margins": 1.2383939027786255, "eval_rewards/rejected": -2.440188407897949, "eval_runtime": 91.4513, "eval_samples": 1831, "eval_samples_per_second": 20.022, "eval_steps_per_second": 0.317, "total_flos": 0.0, "train_loss": 0.5630035629534339, "train_runtime": 11387.5716, "train_samples": 55999, "train_samples_per_second": 4.918, "train_steps_per_second": 0.038 }