{ "epoch": 1.0, "eval_logits/chosen": -2.550062417984009, "eval_logits/rejected": -2.519716262817383, "eval_logps/chosen": -309.5232849121094, "eval_logps/rejected": -306.5199279785156, "eval_loss": 0.5869344472885132, "eval_rewards/accuracies": 0.7089999914169312, "eval_rewards/chosen": -0.2480395883321762, "eval_rewards/margins": 0.3042061924934387, "eval_rewards/rejected": -0.5522457957267761, "eval_runtime": 469.9661, "eval_samples": 2000, "eval_samples_per_second": 4.256, "eval_steps_per_second": 1.064, "train_loss": 0.6071469678956156, "train_runtime": 31823.939, "train_samples": 61135, "train_samples_per_second": 1.921, "train_steps_per_second": 0.24 }