{ "epoch": 0.99, "eval_logits/chosen": 106.62171173095703, "eval_logits/rejected": 100.2883529663086, "eval_logps/chosen": -476.6144104003906, "eval_logps/rejected": -493.8050231933594, "eval_loss": 0.5058174729347229, "eval_rewards/accuracies": 0.7083333134651184, "eval_rewards/chosen": -1.1604968309402466, "eval_rewards/margins": 0.9522747993469238, "eval_rewards/rejected": -2.11277174949646, "eval_runtime": 51.3345, "eval_samples": 750, "eval_samples_per_second": 14.61, "eval_steps_per_second": 0.468 }