{ "epoch": 0.99, "eval_logits/chosen": 106.41616821289062, "eval_logits/rejected": 100.24935150146484, "eval_logps/chosen": -458.2794494628906, "eval_logps/rejected": -466.465576171875, "eval_loss": 1.2743279933929443, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.9499748945236206, "eval_rewards/margins": 4.016444206237793, "eval_rewards/rejected": -5.966419219970703, "eval_runtime": 51.5123, "eval_samples": 750, "eval_samples_per_second": 14.56, "eval_steps_per_second": 0.466 }