{ "epoch": 3.0, "eval_logits/chosen": -2.649322271347046, "eval_logits/rejected": -2.6439428329467773, "eval_logps/chosen": -197.75340270996094, "eval_logps/rejected": -183.8756866455078, "eval_loss": 0.9304828643798828, "eval_rewards/accuracies": 0.582426905632019, "eval_rewards/chosen": -0.21480964124202728, "eval_rewards/margins": 0.08063079416751862, "eval_rewards/rejected": -0.2954404354095459, "eval_runtime": 443.5148, "eval_samples": 11765, "eval_samples_per_second": 26.527, "eval_steps_per_second": 3.317 }