{ "epoch": 1.0, "eval_logits/chosen": 0.47494596242904663, "eval_logits/rejected": -0.5924796462059021, "eval_logps/chosen": -408.63677978515625, "eval_logps/rejected": -1010.3619384765625, "eval_loss": 0.6588297486305237, "eval_rewards/accuracies": 0.7911764979362488, "eval_rewards/chosen": 0.047554861754179, "eval_rewards/margins": 0.07667416334152222, "eval_rewards/rejected": -0.029119295999407768, "eval_runtime": 302.3402, "eval_samples": 2712, "eval_samples_per_second": 8.97, "eval_steps_per_second": 0.281, "train_loss": 0.6376642821327089, "train_runtime": 2213.5067, "train_samples": 8087, "train_samples_per_second": 3.653, "train_steps_per_second": 0.057 }