{ "epoch": 1.0, "eval_dpo_losses": 0.6899768114089966, "eval_logits/chosen": -2.8065316677093506, "eval_logits/rejected": -2.7679667472839355, "eval_logps/chosen": -281.7566833496094, "eval_logps/rejected": -256.38348388671875, "eval_loss": 0.691593587398529, "eval_positive_losses": 0.009106353856623173, "eval_rewards/accuracies": 0.6570000052452087, "eval_rewards/chosen": 0.028367336839437485, "eval_rewards/margins": 0.006412810645997524, "eval_rewards/margins_max": 0.030472885817289352, "eval_rewards/margins_min": -0.014914236031472683, "eval_rewards/margins_std": 0.014994381926953793, "eval_rewards/rejected": 0.021954525262117386, "eval_runtime": 428.2088, "eval_samples": 2000, "eval_samples_per_second": 4.671, "eval_steps_per_second": 0.292, "train_loss": 0.6924228537771114, "train_runtime": 3896.0637, "train_samples": 5263, "train_samples_per_second": 1.351, "train_steps_per_second": 0.084 }