{ "epoch": 1.0, "eval_logits/chosen": 0.39978858828544617, "eval_logits/rejected": 1.363898754119873, "eval_logps/chosen": -552.1046142578125, "eval_logps/rejected": -778.7791137695312, "eval_loss": 0.4203870892524719, "eval_rewards/accuracies": 0.8019999861717224, "eval_rewards/chosen": -1.9644380807876587, "eval_rewards/margins": 1.633329153060913, "eval_rewards/rejected": -3.5977675914764404, "eval_runtime": 1376.4504, "eval_samples": 2000, "eval_samples_per_second": 1.453, "eval_steps_per_second": 0.363, "train_loss": 0.43769783746998087, "train_runtime": 91189.3347, "train_samples": 61135, "train_samples_per_second": 0.67, "train_steps_per_second": 0.042 }