{ "epoch": 1.0, "eval_logits/chosen": -1.2245557308197021, "eval_logits/rejected": 1.1489198207855225, "eval_logps/chosen": -197.46658325195312, "eval_logps/rejected": -414.81317138671875, "eval_loss": 0.4863126873970032, "eval_rewards/accuracies": 0.77734375, "eval_rewards/chosen": -0.45359015464782715, "eval_rewards/margins": 1.201076626777649, "eval_rewards/rejected": -1.6546669006347656, "eval_runtime": 103.3892, "eval_samples": 1000, "eval_samples_per_second": 9.672, "eval_steps_per_second": 0.31 }