{ "epoch": 1.0, "eval_logits/chosen": -1.4990795850753784, "eval_logits/rejected": -1.133651852607727, "eval_logps/chosen": -672.6758422851562, "eval_logps/rejected": -832.4777221679688, "eval_loss": 0.5156374573707581, "eval_rewards/accuracies": 0.7494999766349792, "eval_rewards/chosen": -4.080562114715576, "eval_rewards/margins": 1.7984988689422607, "eval_rewards/rejected": -5.879061698913574, "eval_runtime": 1082.4235, "eval_samples": 2000, "eval_samples_per_second": 1.848, "eval_steps_per_second": 1.848, "train_loss": 0.5464587531257954, "train_runtime": 239691.3187, "train_samples": 61135, "train_samples_per_second": 0.255, "train_steps_per_second": 0.064 }