{ "epoch": 3.0, "eval_log_odds_chosen": 0.23976314067840576, "eval_log_odds_ratio": -0.6928443908691406, "eval_logits/chosen": 340.5321350097656, "eval_logits/rejected": 312.9670104980469, "eval_logps/chosen": -1.0267834663391113, "eval_logps/rejected": -1.1786600351333618, "eval_loss": 1.455647587776184, "eval_nll_loss": 1.4095592498779297, "eval_rewards/accuracies": 0.5107913613319397, "eval_rewards/chosen": -0.051339175552129745, "eval_rewards/margins": 0.0075938161462545395, "eval_rewards/rejected": -0.058932989835739136, "eval_runtime": 90.3296, "eval_samples": 553, "eval_samples_per_second": 6.122, "eval_steps_per_second": 1.539, "total_flos": 0.0, "train_loss": 0.968865410152301, "train_runtime": 16784.8411, "train_samples": 5034, "train_samples_per_second": 0.9, "train_steps_per_second": 0.225 }