{ "epoch": 1.0, "eval_logits/chosen": -1.6772518157958984, "eval_logits/rejected": -1.6514736413955688, "eval_logps/chosen": -158.52154541015625, "eval_logps/rejected": -170.3629150390625, "eval_loss": 0.6668696403503418, "eval_rewards/accuracies": 0.6976549625396729, "eval_rewards/chosen": 0.07404651492834091, "eval_rewards/margins": 0.05502632260322571, "eval_rewards/rejected": 0.019020190462470055, "eval_runtime": 1787.1604, "eval_samples": 19103, "eval_samples_per_second": 10.689, "eval_steps_per_second": 0.334, "train_loss": 0.2519006322069866, "train_runtime": 787.0698, "train_samples": 10477, "train_samples_per_second": 13.311, "train_steps_per_second": 0.208 }