{ "epoch": 0.1, "eval_logits/chosen": -2.449439287185669, "eval_logits/rejected": -2.416422128677368, "eval_logps/chosen": -258.90673828125, "eval_logps/rejected": -260.843017578125, "eval_loss": 0.6843283772468567, "eval_rewards/accuracies": 0.62109375, "eval_rewards/chosen": 0.023392992094159126, "eval_rewards/margins": 0.019943276420235634, "eval_rewards/rejected": 0.003449714742600918, "eval_runtime": 144.9425, "eval_samples": 2000, "eval_samples_per_second": 13.799, "eval_steps_per_second": 0.221 }