{ "epoch": 1.98, "eval_logits/chosen": -0.006220105104148388, "eval_logits/rejected": 0.09270644932985306, "eval_logps/chosen": -306.42120361328125, "eval_logps/rejected": -278.68194580078125, "eval_loss": 0.010818002745509148, "eval_rewards/accuracies": 0.5, "eval_rewards/chosen": -0.0003898772120010108, "eval_rewards/margins": -4.452037319424562e-05, "eval_rewards/rejected": -0.00034535673330537975, "eval_runtime": 710.2034, "eval_samples": 2000, "eval_samples_per_second": 2.816, "eval_steps_per_second": 0.704, "train_loss": 0.010306676939850854, "train_runtime": 2255.3236, "train_samples": 61135, "train_samples_per_second": 0.887, "train_steps_per_second": 0.055 }