{ "epoch": 3.97, "eval_logits/chosen": -0.05778733268380165, "eval_logits/rejected": 0.0396571010351181, "eval_logps/chosen": -307.2596130371094, "eval_logps/rejected": -279.42071533203125, "eval_loss": 0.0018671295838430524, "eval_rewards/accuracies": 0.4869999885559082, "eval_rewards/chosen": -0.006934665143489838, "eval_rewards/margins": -0.0012784178834408522, "eval_rewards/rejected": -0.0056562479585409164, "eval_runtime": 412.8752, "eval_samples": 2000, "eval_samples_per_second": 4.844, "eval_steps_per_second": 1.211 }