{ "epoch": 1.98, "eval_logits/chosen": -0.010728351771831512, "eval_logits/rejected": 0.08673479408025742, "eval_logps/chosen": -306.4167785644531, "eval_logps/rejected": -278.7133483886719, "eval_loss": 0.01081769447773695, "eval_rewards/accuracies": 0.49950000643730164, "eval_rewards/chosen": -0.0007103218231350183, "eval_rewards/margins": -0.00023564710863865912, "eval_rewards/rejected": -0.00047467477270402014, "eval_runtime": 470.0123, "eval_samples": 2000, "eval_samples_per_second": 4.255, "eval_steps_per_second": 1.064 }