{ "epoch": 1.971563981042654, "eval_logits/chosen": 113.6686782836914, "eval_logits/rejected": 108.56292724609375, "eval_logps/chosen": -332.4104919433594, "eval_logps/rejected": -363.9787902832031, "eval_loss": 0.29782694578170776, "eval_rewards/accuracies": 0.6979166865348816, "eval_rewards/chosen": 1.5628873109817505, "eval_rewards/margins": 1.6582797765731812, "eval_rewards/rejected": -0.09539230912923813, "eval_runtime": 121.439, "eval_samples": 750, "eval_samples_per_second": 6.176, "eval_steps_per_second": 0.198, "total_flos": 0.0, "train_loss": 0.29816230271871275, "train_runtime": 2165.9381, "train_samples": 6750, "train_samples_per_second": 6.233, "train_steps_per_second": 0.048 }