{ "epoch": 1.0, "eval_logits/chosen": -1.563957691192627, "eval_logits/rejected": -1.5160561800003052, "eval_logps/chosen": -331.9000549316406, "eval_logps/rejected": -348.5905456542969, "eval_loss": 0.5600629448890686, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -0.40962255001068115, "eval_rewards/margins": 0.5067135691642761, "eval_rewards/rejected": -0.9163362383842468, "eval_runtime": 469.9532, "eval_samples": 2000, "eval_samples_per_second": 4.256, "eval_steps_per_second": 1.064, "train_loss": 0.589801928247341, "train_runtime": 31982.1679, "train_samples": 61135, "train_samples_per_second": 1.912, "train_steps_per_second": 0.239 }