{ "epoch": 3.97, "eval_logits/chosen": -0.0017466560238972306, "eval_logits/rejected": 0.09523628652095795, "eval_logps/chosen": -306.2304382324219, "eval_logps/rejected": -278.4661865234375, "eval_loss": 0.0019584796391427517, "eval_rewards/accuracies": 0.49900001287460327, "eval_rewards/chosen": 0.0013964117970317602, "eval_rewards/margins": -2.741074604273308e-05, "eval_rewards/rejected": 0.0014238222502171993, "eval_runtime": 420.8882, "eval_samples": 2000, "eval_samples_per_second": 4.752, "eval_steps_per_second": 1.188, "train_loss": 0.0014411601897013643, "train_runtime": 2755.6406, "train_samples": 61135, "train_samples_per_second": 1.452, "train_steps_per_second": 0.09 }