{ "epoch": 1.0, "eval_dpo_losses": 0.6691617965698242, "eval_logits/chosen": -2.305570602416992, "eval_logits/rejected": -2.1975905895233154, "eval_logps/chosen": -262.83721923828125, "eval_logps/rejected": -254.97503662109375, "eval_loss": 0.6798878312110901, "eval_positive_losses": 0.08152038604021072, "eval_rewards/accuracies": 0.7123016119003296, "eval_rewards/chosen": 0.12936948239803314, "eval_rewards/margins": 0.05091732367873192, "eval_rewards/margins_max": 0.19700397551059723, "eval_rewards/margins_min": -0.07996664941310883, "eval_rewards/margins_std": 0.09219963103532791, "eval_rewards/rejected": 0.07845214754343033, "eval_runtime": 387.9178, "eval_samples": 2000, "eval_samples_per_second": 5.156, "eval_steps_per_second": 0.162, "train_loss": 0.68390608707419, "train_runtime": 41762.7734, "train_samples": 61134, "train_samples_per_second": 1.464, "train_steps_per_second": 0.091 }