{ "epoch": 1.0, "eval_logits/chosen": -2.0103824138641357, "eval_logits/rejected": -1.8318537473678589, "eval_logps/chosen": -559.1896362304688, "eval_logps/rejected": -652.67919921875, "eval_loss": 0.5344099402427673, "eval_rewards/accuracies": 0.7459999918937683, "eval_rewards/chosen": -2.6374406814575195, "eval_rewards/margins": 1.1352916955947876, "eval_rewards/rejected": -3.772732734680176, "eval_runtime": 464.7199, "eval_samples": 2000, "eval_samples_per_second": 4.304, "eval_steps_per_second": 2.152, "train_loss": 0.5599543302822287, "train_runtime": 34322.3587, "train_samples": 61135, "train_samples_per_second": 1.781, "train_steps_per_second": 0.445 }