{ "epoch": 1.0, "eval_logits/chosen": -0.4925585687160492, "eval_logits/rejected": -0.15963555872440338, "eval_logps/chosen": -334.72607421875, "eval_logps/rejected": -671.7160034179688, "eval_loss": 0.6914218664169312, "eval_rewards/accuracies": 0.6104797720909119, "eval_rewards/chosen": -0.001606568112038076, "eval_rewards/margins": 0.003300695214420557, "eval_rewards/rejected": -0.004907263442873955, "eval_runtime": 630.223, "eval_samples": 6317, "eval_samples_per_second": 10.023, "eval_steps_per_second": 0.314, "train_loss": 0.6694252616480777, "train_runtime": 502.7814, "train_samples": 2413, "train_samples_per_second": 4.799, "train_steps_per_second": 0.076 }