{ "epoch": 1.0, "eval_logits/chosen": -0.15863379836082458, "eval_logits/rejected": -0.3815014362335205, "eval_logps/chosen": -523.9338989257812, "eval_logps/rejected": -821.0360717773438, "eval_loss": 0.6073186993598938, "eval_rewards/accuracies": 0.8214285969734192, "eval_rewards/chosen": 0.07047966867685318, "eval_rewards/margins": 0.24993744492530823, "eval_rewards/rejected": -0.17945778369903564, "eval_runtime": 22.8385, "eval_samples": 208, "eval_samples_per_second": 9.107, "eval_steps_per_second": 0.307, "train_loss": 0.6189163742643414, "train_runtime": 2381.9724, "train_samples": 10591, "train_samples_per_second": 4.446, "train_steps_per_second": 0.069 }