{ "epoch": 3.0, "eval_logits/chosen": -2.4403114318847656, "eval_logits/rejected": -2.160489320755005, "eval_logps/chosen": -84.75238800048828, "eval_logps/rejected": -121.9200210571289, "eval_loss": 0.009359963238239288, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -1.2833248376846313, "eval_rewards/margins": 28.446029663085938, "eval_rewards/rejected": -29.729354858398438, "eval_runtime": 194.4792, "eval_samples": 2862, "eval_samples_per_second": 14.716, "eval_steps_per_second": 0.92, "train_loss": 0.020571088252061828, "train_runtime": 85394.2588, "train_samples": 140201, "train_samples_per_second": 4.925, "train_steps_per_second": 0.077 }