{ "epoch": 0.9984301412872841, "eval_logits/chosen": 2.7991323471069336, "eval_logits/rejected": 3.318207263946533, "eval_logps/chosen": -358.6744689941406, "eval_logps/rejected": -462.7060546875, "eval_loss": 0.5032069683074951, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -0.9892802238464355, "eval_rewards/margins": 1.0340925455093384, "eval_rewards/rejected": -2.0233726501464844, "eval_runtime": 134.6793, "eval_samples": 2000, "eval_samples_per_second": 14.85, "eval_steps_per_second": 0.238, "total_flos": 0.0, "train_loss": 0.5498417743346976, "train_runtime": 10726.7175, "train_samples": 61134, "train_samples_per_second": 5.699, "train_steps_per_second": 0.044 }