{ "epoch": 1.0, "eval_logits/chosen": 1.1976563930511475, "eval_logits/rejected": 1.104769229888916, "eval_logps/chosen": -349.1275329589844, "eval_logps/rejected": -429.5989685058594, "eval_loss": 0.5278915762901306, "eval_rewards/accuracies": 0.76171875, "eval_rewards/chosen": -1.0268259048461914, "eval_rewards/margins": 0.7935623526573181, "eval_rewards/rejected": -1.8203881978988647, "eval_runtime": 72.3078, "eval_samples": 2000, "eval_samples_per_second": 27.66, "eval_steps_per_second": 0.443, "train_loss": 0.5624207920498319, "train_runtime": 4926.1363, "train_samples": 61135, "train_samples_per_second": 12.41, "train_steps_per_second": 0.097 }