{ "epoch": 1.0, "eval_logits/chosen": -2.5045135021209717, "eval_logits/rejected": -2.4882149696350098, "eval_logps/chosen": -248.94781494140625, "eval_logps/rejected": -191.02655029296875, "eval_loss": 0.21413667500019073, "eval_rewards/accuracies": 0.9140625, "eval_rewards/chosen": 4.242725372314453, "eval_rewards/margins": 9.778908729553223, "eval_rewards/rejected": -5.536184310913086, "eval_runtime": 97.1606, "eval_samples": 2000, "eval_samples_per_second": 20.584, "eval_steps_per_second": 0.329, "train_loss": 0.4018711235732713, "train_runtime": 7633.33, "train_samples": 61135, "train_samples_per_second": 8.009, "train_steps_per_second": 0.063 }