{ "epoch": 2.0, "eval_logits/chosen": -2.3005340099334717, "eval_logits/rejected": -2.3817031383514404, "eval_logps/chosen": -316.1898498535156, "eval_logps/rejected": -322.1933898925781, "eval_loss": 0.4346597194671631, "eval_rewards/accuracies": 0.7658227682113647, "eval_rewards/chosen": -0.9460535049438477, "eval_rewards/margins": 1.8284220695495605, "eval_rewards/rejected": -2.7744758129119873, "eval_runtime": 117.6177, "eval_samples": 2500, "eval_samples_per_second": 21.255, "eval_steps_per_second": 0.672, "train_loss": 0.5184940074794384, "train_runtime": 19743.6623, "train_samples": 73494, "train_samples_per_second": 7.445, "train_steps_per_second": 0.058 }