{ "epoch": 1.0, "eval_logits/chosen": -2.5471837520599365, "eval_logits/rejected": -2.4984941482543945, "eval_logps/chosen": -255.5736846923828, "eval_logps/rejected": -296.26116943359375, "eval_loss": 0.5782328248023987, "eval_rewards/accuracies": 0.6926282048225403, "eval_rewards/chosen": -0.21196967363357544, "eval_rewards/margins": 0.4882549047470093, "eval_rewards/rejected": -0.7002245783805847, "eval_runtime": 862.1588, "eval_samples": 12451, "eval_samples_per_second": 14.442, "eval_steps_per_second": 0.452, "train_loss": 0.5964619764459262, "train_runtime": 39064.845, "train_samples": 112052, "train_samples_per_second": 2.868, "train_steps_per_second": 0.045 }