{ "epoch": 1.0, "eval_logits/chosen": 0.0034948738757520914, "eval_logits/rejected": -0.007565807551145554, "eval_logps/chosen": -2.20627498626709, "eval_logps/rejected": -60.60332489013672, "eval_loss": 0.15350937843322754, "eval_rewards/accuracies": 0.9610000848770142, "eval_rewards/chosen": 17.28226661682129, "eval_rewards/margins": 5.981877326965332, "eval_rewards/rejected": 11.30038833618164, "eval_runtime": 361.5652, "eval_samples_per_second": 2.766, "eval_steps_per_second": 0.346, "total_flos": 1.4338459346927616e+18, "train_loss": 0.23597783709896936, "train_runtime": 13850.7044, "train_samples_per_second": 0.65, "train_steps_per_second": 0.081 }