{ "epoch": 5.0, "eval_logits/chosen": -1.985146403312683, "eval_logits/rejected": -1.8253288269042969, "eval_logps/chosen": -287.1579895019531, "eval_logps/rejected": -271.7846374511719, "eval_loss": 0.09761956334114075, "eval_rewards/accuracies": 0.3440000116825104, "eval_rewards/chosen": -0.20460382103919983, "eval_rewards/margins": -0.03615570068359375, "eval_rewards/rejected": -0.16844810545444489, "eval_runtime": 700.8137, "eval_samples": 2000, "eval_samples_per_second": 2.854, "eval_steps_per_second": 1.427 }