{ "epoch": 1.971563981042654, "eval_logits/chosen": 113.6686782836914, "eval_logits/rejected": 108.56292724609375, "eval_logps/chosen": -332.4104919433594, "eval_logps/rejected": -363.9787902832031, "eval_loss": 0.29782694578170776, "eval_rewards/accuracies": 0.6979166865348816, "eval_rewards/chosen": 1.5628873109817505, "eval_rewards/margins": 1.6582797765731812, "eval_rewards/rejected": -0.09539230912923813, "eval_runtime": 121.439, "eval_samples": 750, "eval_samples_per_second": 6.176, "eval_steps_per_second": 0.198 }