{ "epoch": 1.0, "eval_logits/chosen": -0.4878213703632355, "eval_logits/rejected": -0.4049469530582428, "eval_logps/chosen": -456.8681945800781, "eval_logps/rejected": -1991.3533935546875, "eval_loss": 0.14216427505016327, "eval_rewards/accuracies": 0.9166666865348816, "eval_rewards/chosen": -1.2709063291549683, "eval_rewards/margins": 11.992403984069824, "eval_rewards/rejected": -13.263309478759766, "eval_runtime": 29.9163, "eval_samples": 192, "eval_samples_per_second": 6.418, "eval_steps_per_second": 0.201, "train_loss": 0.22599055091055428, "train_runtime": 7017.6011, "train_samples": 20961, "train_samples_per_second": 2.987, "train_steps_per_second": 0.047 }