{ "epoch": 1.0, "eval_dpo_losses": 0.6679562926292419, "eval_logits/chosen": -2.784212112426758, "eval_logits/rejected": -2.7470052242279053, "eval_logps/chosen": -269.742919921875, "eval_logps/rejected": -249.1260223388672, "eval_loss": 0.688105046749115, "eval_positive_losses": 0.09528277069330215, "eval_rewards/accuracies": 0.6919999718666077, "eval_rewards/chosen": 0.14850492775440216, "eval_rewards/margins": 0.05397578701376915, "eval_rewards/margins_max": 0.21397222578525543, "eval_rewards/margins_min": -0.09459882229566574, "eval_rewards/margins_std": 0.10261555016040802, "eval_rewards/rejected": 0.09452912956476212, "eval_runtime": 428.7571, "eval_samples": 2000, "eval_samples_per_second": 4.665, "eval_steps_per_second": 0.292 }