{ "epoch": 0.9230769230769231, "eval_logits/chosen": -2.3219950199127197, "eval_logits/rejected": -2.219332218170166, "eval_logps/chosen": -277.3203430175781, "eval_logps/rejected": -226.97976684570312, "eval_loss": 0.6930660009384155, "eval_rewards/accuracies": 0.4642857015132904, "eval_rewards/chosen": -0.0008508563041687012, "eval_rewards/margins": -0.00032856108737178147, "eval_rewards/rejected": -0.0005222952459007502, "eval_runtime": 163.6884, "eval_samples": 1000, "eval_samples_per_second": 6.109, "eval_steps_per_second": 0.385, "total_flos": 0.0, "train_loss": 0.6933046579360962, "train_runtime": 107.7106, "train_samples": 200, "train_samples_per_second": 1.857, "train_steps_per_second": 0.028 }