{ "epoch": 0.99, "eval_logits/chosen": 105.48260498046875, "eval_logits/rejected": 99.1939697265625, "eval_logps/chosen": -473.2679748535156, "eval_logps/rejected": -486.23388671875, "eval_loss": 0.6159864068031311, "eval_rewards/accuracies": 0.7083333134651184, "eval_rewards/chosen": -1.986350178718567, "eval_rewards/margins": 1.4820828437805176, "eval_rewards/rejected": -3.468433141708374, "eval_runtime": 50.9713, "eval_samples": 750, "eval_samples_per_second": 14.714, "eval_steps_per_second": 0.471, "train_loss": 0.6733019076860868, "train_runtime": 575.7905, "train_samples": 6750, "train_samples_per_second": 11.723, "train_steps_per_second": 0.09 }