{ "epoch": 3.0, "eval_logits/chosen": -5.246756553649902, "eval_logits/rejected": -4.960155010223389, "eval_logps/chosen": -588.7877197265625, "eval_logps/rejected": -450.20184326171875, "eval_loss": 0.5464638471603394, "eval_rewards/accuracies": 0.7170000076293945, "eval_rewards/chosen": -0.05181777477264404, "eval_rewards/margins": 0.7143149375915527, "eval_rewards/rejected": -0.7661327123641968, "eval_runtime": 103.5267, "eval_samples": 2000, "eval_samples_per_second": 19.319, "eval_steps_per_second": 1.207, "train_loss": 0.49318724932753905, "train_runtime": 14700.0435, "train_samples": 61966, "train_samples_per_second": 12.646, "train_steps_per_second": 0.79 }