{ "epoch": 3.99, "eval_logits/chosen": 0.3525713086128235, "eval_logits/rejected": 0.4171118438243866, "eval_logps/chosen": -272.5227355957031, "eval_logps/rejected": -249.6534423828125, "eval_loss": 2546.4375, "eval_rewards/accuracies": 0.5220000147819519, "eval_rewards/chosen": -0.15908558666706085, "eval_rewards/margins": 0.0020662047900259495, "eval_rewards/rejected": -0.16115178167819977, "eval_runtime": 411.7057, "eval_samples": 2000, "eval_samples_per_second": 4.858, "eval_steps_per_second": 1.214, "train_loss": 1918.4646684695513, "train_runtime": 14252.7073, "train_samples": 30567, "train_samples_per_second": 1.403, "train_steps_per_second": 0.088 }