{ "epoch": 3.97, "eval_logits/chosen": 0.8074077367782593, "eval_logits/rejected": 0.8652730584144592, "eval_logps/chosen": -257.0081481933594, "eval_logps/rejected": -233.95510864257812, "eval_loss": 0.001172678079456091, "eval_rewards/accuracies": 0.5059999823570251, "eval_rewards/chosen": -0.003939597401767969, "eval_rewards/margins": 0.00022889903630129993, "eval_rewards/rejected": -0.0041684964671730995, "eval_runtime": 749.2839, "eval_samples": 2000, "eval_samples_per_second": 2.669, "eval_steps_per_second": 0.667, "train_loss": 0.0010566485528421077, "train_runtime": 4635.3882, "train_samples": 61135, "train_samples_per_second": 0.863, "train_steps_per_second": 0.054 }