{ "epoch": 0.9988571428571429, "eval_logits/chosen": 0.6892262101173401, "eval_logits/rejected": 2.4422545433044434, "eval_logps/chosen": -432.7330322265625, "eval_logps/rejected": -518.172607421875, "eval_loss": 0.541784942150116, "eval_rewards/accuracies": 0.767241358757019, "eval_rewards/chosen": -1.5712907314300537, "eval_rewards/margins": 1.419674038887024, "eval_rewards/rejected": -2.990964651107788, "eval_runtime": 91.4769, "eval_samples": 1831, "eval_samples_per_second": 20.016, "eval_steps_per_second": 0.317 }