{ "epoch": 1.0, "eval_logits/chosen": -1.9487414360046387, "eval_logits/rejected": -1.7909051179885864, "eval_logps/chosen": -291.80194091796875, "eval_logps/rejected": -278.0226135253906, "eval_loss": 0.05501076579093933, "eval_rewards/accuracies": 0.38749998807907104, "eval_rewards/chosen": -0.0251043438911438, "eval_rewards/margins": -0.0020215215627104044, "eval_rewards/rejected": -0.023082822561264038, "eval_runtime": 728.1274, "eval_samples": 2000, "eval_samples_per_second": 2.747, "eval_steps_per_second": 1.373 }