{ "epoch": 1.0, "eval_logits/chosen": -2.6475114822387695, "eval_logits/rejected": -3.464193344116211, "eval_logps/chosen": -984.8101806640625, "eval_logps/rejected": -1480.8428955078125, "eval_loss": 0.4465055763721466, "eval_rewards/accuracies": 0.7845849990844727, "eval_rewards/chosen": -0.5929392576217651, "eval_rewards/margins": 1.0842887163162231, "eval_rewards/rejected": -1.6772277355194092, "eval_runtime": 1610.6329, "eval_samples": 16161, "eval_samples_per_second": 10.034, "eval_steps_per_second": 0.314, "train_loss": 0.2915988182410216, "train_runtime": 1026.489, "train_samples": 4992, "train_samples_per_second": 4.863, "train_steps_per_second": 0.076 }