{ "epoch": 1.98, "eval_logits/chosen": -0.009708462283015251, "eval_logits/rejected": 0.08755116909742355, "eval_logps/chosen": -306.4169006347656, "eval_logps/rejected": -278.6773681640625, "eval_loss": 0.010828138329088688, "eval_rewards/accuracies": 0.49149999022483826, "eval_rewards/chosen": -0.00047626858577132225, "eval_rewards/margins": -0.00013606167340185493, "eval_rewards/rejected": -0.00034020686871372163, "eval_runtime": 840.6964, "eval_samples": 2000, "eval_samples_per_second": 2.379, "eval_steps_per_second": 0.595, "train_loss": 0.0102488252845022, "train_runtime": 3093.4739, "train_samples": 61135, "train_samples_per_second": 0.647, "train_steps_per_second": 0.04 }