{ "epoch": 0.9997382884061764, "eval_logits/chosen": -0.8381435871124268, "eval_logits/rejected": -0.8568249344825745, "eval_logps/chosen": -507.26611328125, "eval_logps/rejected": -604.2300415039062, "eval_loss": 0.4785204529762268, "eval_rewards/accuracies": 0.7760000228881836, "eval_rewards/chosen": -2.3087284564971924, "eval_rewards/margins": 1.200958251953125, "eval_rewards/rejected": -3.5096867084503174, "eval_runtime": 490.7419, "eval_samples": 2000, "eval_samples_per_second": 4.075, "eval_steps_per_second": 0.255 }