{ "epoch": 1.0, "eval_logits/chosen": -1.3208973407745361, "eval_logits/rejected": -1.0479389429092407, "eval_logps/chosen": -157.5604248046875, "eval_logps/rejected": -693.1373291015625, "eval_loss": 0.6415364146232605, "eval_rewards/accuracies": 0.7749999761581421, "eval_rewards/chosen": -0.018806004896759987, "eval_rewards/margins": 0.12330160290002823, "eval_rewards/rejected": -0.14210760593414307, "eval_runtime": 16.2949, "eval_samples": 144, "eval_samples_per_second": 8.837, "eval_steps_per_second": 0.307 }