{ "epoch": 3.0, "eval_logits/chosen": -1.4396553039550781, "eval_logits/rejected": -1.4439030885696411, "eval_logps/chosen": -151.50469970703125, "eval_logps/rejected": -180.52976989746094, "eval_loss": 2.0568761825561523, "eval_rewards/accuracies": 0.6666666865348816, "eval_rewards/chosen": 9.014835357666016, "eval_rewards/margins": 1.1928315162658691, "eval_rewards/rejected": 7.822003364562988, "eval_runtime": 280.3771, "eval_samples_per_second": 2.514, "eval_steps_per_second": 2.514, "train_loss": 0.8950637202828078, "train_runtime": 14615.0375, "train_samples_per_second": 0.985, "train_steps_per_second": 0.123 }