{ "epoch": 0.9995419147961521, "eval_logits/chosen": -0.6407761573791504, "eval_logits/rejected": -0.6567662358283997, "eval_logps/chosen": -200.93621826171875, "eval_logps/rejected": -276.8379821777344, "eval_loss": 0.30047258734703064, "eval_rewards/accuracies": 0.8699421882629395, "eval_rewards/chosen": 1.6737332344055176, "eval_rewards/margins": 2.166951894760132, "eval_rewards/rejected": -0.4932188391685486, "eval_runtime": 252.7931, "eval_samples": 2763, "eval_samples_per_second": 10.93, "eval_steps_per_second": 1.369, "total_flos": 0.0, "train_loss": 0.37405444834616947, "train_runtime": 8738.9435, "train_samples": 34924, "train_samples_per_second": 3.996, "train_steps_per_second": 0.125 }