{ "epoch": 1.0, "eval_logits/chosen": -1.099204421043396, "eval_logits/rejected": -0.6287172436714172, "eval_logps/chosen": -231.0450897216797, "eval_logps/rejected": -222.87437438964844, "eval_loss": 0.34230488538742065, "eval_rewards/accuracies": 0.7313649654388428, "eval_rewards/chosen": -1.0060617923736572, "eval_rewards/margins": 0.29796916246414185, "eval_rewards/rejected": -1.3040310144424438, "eval_rewards/safe_rewards": -0.9952626824378967, "eval_rewards/unsafe_rewards": -1.0112754106521606, "eval_runtime": 1122.5675, "eval_samples": 33044, "eval_samples_per_second": 29.436, "eval_steps_per_second": 0.92, "train_loss": 1.018996798697021, "train_runtime": 22449.6551, "train_samples": 59478, "train_samples_per_second": 2.649, "train_steps_per_second": 0.083 }