{ "epoch": 2.986666666666667, "eval_logits/chosen": -0.4098573923110962, "eval_logits/rejected": -0.48421889543533325, "eval_logps/chosen": -0.8694608807563782, "eval_logps/rejected": -1.295823335647583, "eval_loss": 0.9151002764701843, "eval_odds_ratio_loss": 8.100966453552246, "eval_rewards/accuracies": 0.8399999737739563, "eval_rewards/chosen": -0.08694608509540558, "eval_rewards/margins": 0.04263625293970108, "eval_rewards/rejected": -0.12958233058452606, "eval_runtime": 2.0333, "eval_samples_per_second": 24.59, "eval_sft_loss": 0.10500368475914001, "eval_steps_per_second": 12.295 }