{ "epoch": 1.0, "eval_logits/chosen": -0.7062377333641052, "eval_logits/rejected": -0.9040690064430237, "eval_logps/chosen": -834.8738403320312, "eval_logps/rejected": -1456.362060546875, "eval_loss": 0.5792528390884399, "eval_rewards/accuracies": 0.77173912525177, "eval_rewards/chosen": -0.1323184221982956, "eval_rewards/margins": 0.34403395652770996, "eval_rewards/rejected": -0.47635239362716675, "eval_runtime": 330.6189, "eval_samples": 2927, "eval_samples_per_second": 8.853, "eval_steps_per_second": 0.278 }