{ "epoch": 1.0, "eval_logits/chosen": -3.3306896686553955, "eval_logits/rejected": -3.137249708175659, "eval_logps/chosen": -1001.12890625, "eval_logps/rejected": -1768.898193359375, "eval_loss": 0.2304462045431137, "eval_rewards/accuracies": 0.8472222089767456, "eval_rewards/chosen": -1.4963774681091309, "eval_rewards/margins": 7.075459003448486, "eval_rewards/rejected": -8.5718355178833, "eval_runtime": 34.9747, "eval_samples": 284, "eval_samples_per_second": 8.12, "eval_steps_per_second": 0.257 }