{ "epoch": 1.0, "eval_logits/chosen": -3.3306896686553955, "eval_logits/rejected": -3.137249708175659, "eval_logps/chosen": -1001.12890625, "eval_logps/rejected": -1768.898193359375, "eval_loss": 0.2304462045431137, "eval_rewards/accuracies": 0.8472222089767456, "eval_rewards/chosen": -1.4963774681091309, "eval_rewards/margins": 7.075459003448486, "eval_rewards/rejected": -8.5718355178833, "eval_runtime": 34.9747, "eval_samples": 284, "eval_samples_per_second": 8.12, "eval_steps_per_second": 0.257, "train_loss": 0.2672084366101675, "train_runtime": 3083.1249, "train_samples": 12328, "train_samples_per_second": 3.999, "train_steps_per_second": 0.063 }