{ "epoch": 3.0, "eval_logits/chosen": -2.4555959701538086, "eval_logits/rejected": -2.1643569469451904, "eval_logps/chosen": -334.3052978515625, "eval_logps/rejected": -316.3751220703125, "eval_loss": 0.4730003774166107, "eval_rewards/accuracies": 0.8015872836112976, "eval_rewards/chosen": -3.5288658142089844, "eval_rewards/margins": 3.8411812782287598, "eval_rewards/rejected": -7.370047569274902, "eval_runtime": 139.3206, "eval_samples": 2000, "eval_samples_per_second": 14.355, "eval_steps_per_second": 0.452 }