{ "epoch": 2.997999555456768, "eval_logits/chosen": -0.3320940136909485, "eval_logits/rejected": -0.29884636402130127, "eval_logps/chosen": -0.9399133324623108, "eval_logps/rejected": -1.080655574798584, "eval_loss": 1.0073015689849854, "eval_odds_ratio_loss": 0.6738813519477844, "eval_rewards/accuracies": 0.515999972820282, "eval_rewards/chosen": -0.09399133920669556, "eval_rewards/margins": 0.01407422125339508, "eval_rewards/rejected": -0.10806556046009064, "eval_runtime": 185.8105, "eval_samples_per_second": 5.382, "eval_sft_loss": 0.9399133324623108, "eval_steps_per_second": 2.691, "total_flos": 1.8817568285770383e+18, "train_loss": 1.0353579054523618, "train_runtime": 16950.0138, "train_samples_per_second": 1.593, "train_steps_per_second": 0.099 }