{ "epoch": 3.0, "eval_logits/chosen": -2.3604142665863037, "eval_logits/rejected": -2.260056734085083, "eval_logps/chosen": -263.6833801269531, "eval_logps/rejected": -220.46438598632812, "eval_loss": 0.5873599648475647, "eval_rewards/accuracies": 0.6919999718666077, "eval_rewards/chosen": 0.033877428621053696, "eval_rewards/margins": 0.3345241844654083, "eval_rewards/rejected": -0.30064672231674194, "eval_runtime": 1147.3002, "eval_samples": 2000, "eval_samples_per_second": 1.743, "eval_steps_per_second": 0.109, "train_loss": 0.6181704732833159, "train_runtime": 220426.8026, "train_samples": 61966, "train_samples_per_second": 0.843, "train_steps_per_second": 0.003 }