{ "epoch": 3.97, "eval_logits/chosen": -0.06500499695539474, "eval_logits/rejected": 0.03410104289650917, "eval_logps/chosen": -307.4422302246094, "eval_logps/rejected": -279.5290832519531, "eval_loss": 0.0018018082482740283, "eval_rewards/accuracies": 0.49549999833106995, "eval_rewards/chosen": -0.006038154941052198, "eval_rewards/margins": -0.00034422188764438033, "eval_rewards/rejected": -0.0056939334608614445, "eval_runtime": 512.3455, "eval_samples": 2000, "eval_samples_per_second": 3.904, "eval_steps_per_second": 0.976, "train_loss": 0.0010581601813863663, "train_runtime": 3353.9187, "train_samples": 61135, "train_samples_per_second": 1.193, "train_steps_per_second": 0.074 }