{ "epoch": 1.971563981042654, "eval_logits/chosen": 96.71578216552734, "eval_logits/rejected": 90.98221588134766, "eval_logps/chosen": -423.6227722167969, "eval_logps/rejected": -453.7782287597656, "eval_loss": 0.468290776014328, "eval_rewards/accuracies": 0.7708333134651184, "eval_rewards/chosen": -3.0221338272094727, "eval_rewards/margins": 1.6591955423355103, "eval_rewards/rejected": -4.681329727172852, "eval_runtime": 58.6185, "eval_samples": 750, "eval_samples_per_second": 12.795, "eval_steps_per_second": 0.409, "total_flos": 0.0, "train_loss": 0.3921648321243433, "train_runtime": 1190.3032, "train_samples": 6750, "train_samples_per_second": 11.342, "train_steps_per_second": 0.087 }