{ "epoch": 2.0, "eval_logits/chosen": 0.7325530052185059, "eval_logits/rejected": 0.7531598806381226, "eval_logps/chosen": -94.8434829711914, "eval_logps/rejected": -98.55415344238281, "eval_loss": 0.687246561050415, "eval_rewards/accuracies": 0.33399999141693115, "eval_rewards/chosen": -0.03116113506257534, "eval_rewards/margins": 0.012410260736942291, "eval_rewards/rejected": -0.04357139766216278, "eval_runtime": 272.5724, "eval_samples": 2000, "eval_samples_per_second": 7.337, "eval_steps_per_second": 0.459, "train_loss": 0.3994182998029441, "train_runtime": 8026.3391, "train_samples": 30567, "train_samples_per_second": 7.617, "train_steps_per_second": 0.119 }