{ "epoch": 3.97, "eval_logits/chosen": -0.005432075820863247, "eval_logits/rejected": 0.09337250888347626, "eval_logps/chosen": -306.2527160644531, "eval_logps/rejected": -278.61090087890625, "eval_loss": 0.0015043426537886262, "eval_rewards/accuracies": 0.5360000133514404, "eval_rewards/chosen": 0.00021375658980105072, "eval_rewards/margins": 0.000962120364420116, "eval_rewards/rejected": -0.0007483638473786414, "eval_runtime": 420.5209, "eval_samples": 2000, "eval_samples_per_second": 4.756, "eval_steps_per_second": 1.189 }