{ "epoch": 1.0, "eval_logits/chosen": -1.8041099309921265, "eval_logits/rejected": -1.6730901002883911, "eval_logps/chosen": -373.1398620605469, "eval_logps/rejected": -354.2070617675781, "eval_loss": 0.6285176873207092, "eval_rewards/accuracies": 0.6805555820465088, "eval_rewards/chosen": -0.30501797795295715, "eval_rewards/margins": 0.23024281859397888, "eval_rewards/rejected": -0.535260796546936, "eval_runtime": 337.0045, "eval_samples": 2000, "eval_samples_per_second": 5.935, "eval_steps_per_second": 0.187, "train_loss": 0.6456358959537526, "train_runtime": 17140.5232, "train_samples": 61135, "train_samples_per_second": 3.567, "train_steps_per_second": 0.056 }