{ "epoch": 1.0, "eval_logits/chosen": -0.30772945284843445, "eval_logits/rejected": 1.072676658630371, "eval_logps/chosen": -474.955810546875, "eval_logps/rejected": -812.6320190429688, "eval_loss": 0.21300281584262848, "eval_rewards/accuracies": 0.8984375, "eval_rewards/chosen": -2.175225019454956, "eval_rewards/margins": 4.151553153991699, "eval_rewards/rejected": -6.326778411865234, "eval_runtime": 97.1627, "eval_samples": 2000, "eval_samples_per_second": 20.584, "eval_steps_per_second": 0.329 }