{ "epoch": 1.0, "eval_logits/chosen": -2.822110891342163, "eval_logits/rejected": -2.8178136348724365, "eval_logps/chosen": -285.4951171875, "eval_logps/rejected": -270.55743408203125, "eval_loss": 0.5198934078216553, "eval_pred_label": 0.0, "eval_rewards/accuracies": 0.7300000190734863, "eval_rewards/chosen": -0.12377375364303589, "eval_rewards/margins": 1.001997470855713, "eval_rewards/rejected": -1.1257712841033936, "eval_runtime": 453.8128, "eval_samples": 2000, "eval_samples_per_second": 4.407, "eval_steps_per_second": 0.275, "eval_use_label": 0.0, "train_loss": 0.5321606655040877, "train_runtime": 24451.2028, "train_samples": 61135, "train_samples_per_second": 2.5, "train_steps_per_second": 0.02 }