{ "epoch": 1.0, "eval_logits/chosen": -0.8344432711601257, "eval_logits/rejected": -0.6401407718658447, "eval_logps/chosen": -361.0932312011719, "eval_logps/rejected": -682.3729858398438, "eval_loss": 0.46492835879325867, "eval_rewards/accuracies": 0.9166666865348816, "eval_rewards/chosen": -0.1737493872642517, "eval_rewards/margins": 0.431477814912796, "eval_rewards/rejected": -0.6052272319793701, "eval_runtime": 12.8431, "eval_samples": 96, "eval_samples_per_second": 7.475, "eval_steps_per_second": 0.234 }