{ "epoch": 2.0, "eval_logits/chosen": 0.6623885631561279, "eval_logits/rejected": 0.7668179869651794, "eval_logps/chosen": -271.2955017089844, "eval_logps/rejected": -249.1475830078125, "eval_loss": 2156.2255859375, "eval_rewards/accuracies": 0.6940000057220459, "eval_rewards/chosen": -0.11051338165998459, "eval_rewards/margins": 0.06660113483667374, "eval_rewards/rejected": -0.17711451649665833, "eval_runtime": 325.4702, "eval_samples": 2000, "eval_samples_per_second": 6.145, "eval_steps_per_second": 0.384 }