{ "epoch": 1.0, "eval_logits/chosen": -4.123082160949707, "eval_logits/rejected": -4.0820841789245605, "eval_logps/chosen": -456.7123107910156, "eval_logps/rejected": -693.0343627929688, "eval_loss": 0.4655996561050415, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -0.5065802931785583, "eval_rewards/margins": 0.8316203951835632, "eval_rewards/rejected": -1.338200569152832, "eval_runtime": 12.1695, "eval_samples": 76, "eval_samples_per_second": 6.245, "eval_steps_per_second": 0.247, "train_loss": 0.021612109477854958, "train_runtime": 343.2656, "train_samples": 7736, "train_samples_per_second": 22.536, "train_steps_per_second": 0.352 }