{ "epoch": 1.0, "eval_logits/chosen": -2.5045135021209717, "eval_logits/rejected": -2.4882149696350098, "eval_logps/chosen": -248.94781494140625, "eval_logps/rejected": -191.02655029296875, "eval_loss": 0.21413667500019073, "eval_rewards/accuracies": 0.9140625, "eval_rewards/chosen": 4.242725372314453, "eval_rewards/margins": 9.778908729553223, "eval_rewards/rejected": -5.536184310913086, "eval_runtime": 97.1606, "eval_samples": 2000, "eval_samples_per_second": 20.584, "eval_steps_per_second": 0.329 }