{ "epoch": 1.0, "eval_logits/chosen": -0.7592486143112183, "eval_logits/rejected": -0.87989342212677, "eval_logps/chosen": -974.4986572265625, "eval_logps/rejected": -1658.228271484375, "eval_loss": 0.5849764347076416, "eval_rewards/accuracies": 0.7881355881690979, "eval_rewards/chosen": -0.20753850042819977, "eval_rewards/margins": 0.3774545192718506, "eval_rewards/rejected": -0.5849930644035339, "eval_runtime": 215.1803, "eval_samples": 1864, "eval_samples_per_second": 8.663, "eval_steps_per_second": 0.274, "train_loss": 0.5914445311983646, "train_runtime": 2922.1625, "train_samples": 11603, "train_samples_per_second": 3.971, "train_steps_per_second": 0.062 }