{ "epoch": 3.97, "eval_logits/chosen": -0.0017466560238972306, "eval_logits/rejected": 0.09523628652095795, "eval_logps/chosen": -306.2304382324219, "eval_logps/rejected": -278.4661865234375, "eval_loss": 0.0019584796391427517, "eval_rewards/accuracies": 0.49900001287460327, "eval_rewards/chosen": 0.0013964117970317602, "eval_rewards/margins": -2.741074604273308e-05, "eval_rewards/rejected": 0.0014238222502171993, "eval_runtime": 420.8882, "eval_samples": 2000, "eval_samples_per_second": 4.752, "eval_steps_per_second": 1.188 }