{ "epoch": 1.0, "eval_logits/chosen": -2.103424549102783, "eval_logits/rejected": -2.0066123008728027, "eval_logps/chosen": -373.6857604980469, "eval_logps/rejected": -462.4768981933594, "eval_loss": 0.5127353072166443, "eval_rewards/accuracies": 0.7539682388305664, "eval_rewards/chosen": -0.9791162014007568, "eval_rewards/margins": 1.0174498558044434, "eval_rewards/margins_max": 3.5694239139556885, "eval_rewards/margins_min": -0.9504349827766418, "eval_rewards/margins_std": 1.5236564874649048, "eval_rewards/rejected": -1.9965660572052002, "eval_runtime": 386.5493, "eval_samples": 2000, "eval_samples_per_second": 5.174, "eval_steps_per_second": 0.163, "train_loss": 0.5503434051989385, "train_runtime": 43811.8216, "train_samples": 61134, "train_samples_per_second": 1.395, "train_steps_per_second": 0.087 }