{ "epoch": 3.97, "eval_logits/chosen": -0.06497720628976822, "eval_logits/rejected": 0.03257768973708153, "eval_logps/chosen": -307.4852600097656, "eval_logps/rejected": -279.59686279296875, "eval_loss": 0.001842426834627986, "eval_rewards/accuracies": 0.5015000104904175, "eval_rewards/chosen": -0.006085487548261881, "eval_rewards/margins": -0.0004582978435792029, "eval_rewards/rejected": -0.005627189297229052, "eval_runtime": 412.5036, "eval_samples": 2000, "eval_samples_per_second": 4.848, "eval_steps_per_second": 1.212, "train_loss": 0.0010713144931228712, "train_runtime": 2635.242, "train_samples": 61135, "train_samples_per_second": 1.518, "train_steps_per_second": 0.094 }