{ "epoch": 0.9965156794425087, "eval_logits/chosen": -0.16245996952056885, "eval_logits/rejected": 0.3127332031726837, "eval_logps/chosen": -328.00042724609375, "eval_logps/rejected": -417.0811462402344, "eval_loss": 0.5565306544303894, "eval_rewards/accuracies": 0.7578125, "eval_rewards/chosen": -0.653814435005188, "eval_rewards/margins": 0.8903896808624268, "eval_rewards/rejected": -1.5442042350769043, "eval_runtime": 102.1382, "eval_samples": 2000, "eval_samples_per_second": 19.581, "eval_steps_per_second": 0.313, "total_flos": 0.0, "train_loss": 0.4296882511018873, "train_runtime": 3529.992, "train_samples": 18339, "train_samples_per_second": 5.195, "train_steps_per_second": 0.041 }