{ "epoch": 0.9996020692399522, "eval_logits/chosen": -1.4212305545806885, "eval_logits/rejected": -1.432730793952942, "eval_logps/chosen": -228.8310546875, "eval_logps/rejected": -246.39952087402344, "eval_loss": 0.6470152735710144, "eval_rewards/accuracies": 0.6128731369972229, "eval_rewards/chosen": -0.3647947907447815, "eval_rewards/margins": 0.11783836036920547, "eval_rewards/rejected": -0.48263317346572876, "eval_runtime": 326.0, "eval_samples": 8552, "eval_samples_per_second": 26.233, "eval_steps_per_second": 0.411, "total_flos": 0.0, "train_loss": 0.6662861807331159, "train_runtime": 15158.4407, "train_samples": 160800, "train_samples_per_second": 10.608, "train_steps_per_second": 0.041 }