{ "epoch": 1.0, "eval_logits/chosen": -1.9163823127746582, "eval_logits/rejected": -1.5818859338760376, "eval_logps/chosen": -168.4323272705078, "eval_logps/rejected": -2053.068603515625, "eval_loss": 0.04686330258846283, "eval_rewards/accuracies": 0.875, "eval_rewards/chosen": -0.778435230255127, "eval_rewards/margins": 13.74771499633789, "eval_rewards/rejected": -14.52614974975586, "eval_runtime": 4.8308, "eval_samples": 24, "eval_samples_per_second": 4.968, "eval_steps_per_second": 0.207, "train_loss": 0.23545521443209996, "train_runtime": 6883.5239, "train_samples": 20968, "train_samples_per_second": 3.046, "train_steps_per_second": 0.048 }