{ "epoch": 1.971563981042654, "eval_logits/chosen": 113.8158187866211, "eval_logits/rejected": 108.58926391601562, "eval_logps/chosen": -333.64825439453125, "eval_logps/rejected": -365.6023254394531, "eval_loss": 0.2953260540962219, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": 1.500998854637146, "eval_rewards/margins": 1.6775684356689453, "eval_rewards/rejected": -0.17656946182250977, "eval_runtime": 122.5277, "eval_samples": 750, "eval_samples_per_second": 6.121, "eval_steps_per_second": 0.196 }