{ "epoch": 1.0, "eval_logits/chosen": -2.8302695751190186, "eval_logits/rejected": -2.8291618824005127, "eval_logps/chosen": -283.5841369628906, "eval_logps/rejected": -264.64422607421875, "eval_loss": 0.5655555129051208, "eval_pred_label": 0.0, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": 0.06732505559921265, "eval_rewards/margins": 0.6017746329307556, "eval_rewards/rejected": -0.534449577331543, "eval_runtime": 457.1465, "eval_samples": 2000, "eval_samples_per_second": 4.375, "eval_steps_per_second": 0.273, "eval_use_label": 0.0 }