{ "epoch": 1.0, "eval_logits/chosen": -2.0103824138641357, "eval_logits/rejected": -1.8318537473678589, "eval_logps/chosen": -559.1896362304688, "eval_logps/rejected": -652.67919921875, "eval_loss": 0.5344099402427673, "eval_rewards/accuracies": 0.7459999918937683, "eval_rewards/chosen": -2.6374406814575195, "eval_rewards/margins": 1.1352916955947876, "eval_rewards/rejected": -3.772732734680176, "eval_runtime": 464.7199, "eval_samples": 2000, "eval_samples_per_second": 4.304, "eval_steps_per_second": 2.152 }