{ "epoch": 0.9981298423724285, "eval_logits/chosen": -1.1943395137786865, "eval_logits/rejected": -1.1611931324005127, "eval_logps/chosen": -1.2059502601623535, "eval_logps/rejected": -1.4740996360778809, "eval_loss": 1.373450517654419, "eval_rewards/accuracies": 0.6653226017951965, "eval_rewards/chosen": -3.0148754119873047, "eval_rewards/margins": 0.6703741550445557, "eval_rewards/rejected": -3.6852493286132812, "eval_runtime": 71.4975, "eval_samples": 1961, "eval_samples_per_second": 27.428, "eval_steps_per_second": 0.867 }