{ "epoch": 1.0, "eval_logits/chosen": -1.563957691192627, "eval_logits/rejected": -1.5160561800003052, "eval_logps/chosen": -331.9000549316406, "eval_logps/rejected": -348.5905456542969, "eval_loss": 0.5600629448890686, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -0.40962255001068115, "eval_rewards/margins": 0.5067135691642761, "eval_rewards/rejected": -0.9163362383842468, "eval_runtime": 469.9532, "eval_samples": 2000, "eval_samples_per_second": 4.256, "eval_steps_per_second": 1.064 }