{ "epoch": 1.0, "eval_logits/chosen": -2.7731637954711914, "eval_logits/rejected": -2.7412285804748535, "eval_logps/chosen": -303.5331115722656, "eval_logps/rejected": -303.9143371582031, "eval_loss": 0.6104059815406799, "eval_rewards/accuracies": 0.6869999766349792, "eval_rewards/chosen": -0.1893969476222992, "eval_rewards/margins": 0.26395705342292786, "eval_rewards/margins_max": 1.0323494672775269, "eval_rewards/margins_min": -0.46421656012535095, "eval_rewards/margins_std": 0.5045729875564575, "eval_rewards/rejected": -0.45335400104522705, "eval_runtime": 428.0701, "eval_samples": 2000, "eval_samples_per_second": 4.672, "eval_steps_per_second": 0.292, "train_loss": 0.6308559574254741, "train_runtime": 3887.3613, "train_samples": 5263, "train_samples_per_second": 1.354, "train_steps_per_second": 0.085 }