{ "epoch": 0.99, "eval_logits/chosen": 110.9354476928711, "eval_logits/rejected": 104.462890625, "eval_logps/chosen": -833.5565795898438, "eval_logps/rejected": -861.5226440429688, "eval_loss": 0.621110200881958, "eval_rewards/accuracies": 0.7291666865348816, "eval_rewards/chosen": -0.26271167397499084, "eval_rewards/margins": 2.0050528049468994, "eval_rewards/rejected": -2.2677645683288574, "eval_runtime": 51.7287, "eval_samples": 750, "eval_samples_per_second": 14.499, "eval_steps_per_second": 0.464, "train_loss": 0.7234916411913358, "train_runtime": 575.1359, "train_samples": 6750, "train_samples_per_second": 11.736, "train_steps_per_second": 0.09 }