{ "epoch": 1.0, "eval_logits/chosen": -3.13935923576355, "eval_logits/rejected": -2.883375644683838, "eval_logps/chosen": -720.7828369140625, "eval_logps/rejected": -1537.8291015625, "eval_loss": 0.21815194189548492, "eval_rewards/accuracies": 0.875, "eval_rewards/chosen": -1.5509830713272095, "eval_rewards/margins": 4.800168991088867, "eval_rewards/rejected": -6.351152420043945, "eval_runtime": 66.6237, "eval_samples": 616, "eval_samples_per_second": 9.246, "eval_steps_per_second": 0.3, "train_loss": 0.2699868052719749, "train_runtime": 2833.2764, "train_samples": 11996, "train_samples_per_second": 4.234, "train_steps_per_second": 0.066 }