{ "epoch": 1.0, "eval_logits/chosen": -0.3531719446182251, "eval_logits/rejected": -0.21002119779586792, "eval_logps/chosen": -567.3484497070312, "eval_logps/rejected": -2031.565185546875, "eval_loss": 0.240110382437706, "eval_rewards/accuracies": 0.8942307829856873, "eval_rewards/chosen": -1.0927813053131104, "eval_rewards/margins": 12.077652931213379, "eval_rewards/rejected": -13.17043399810791, "eval_runtime": 41.3566, "eval_samples": 396, "eval_samples_per_second": 9.575, "eval_steps_per_second": 0.314, "train_loss": 0.23018195102980107, "train_runtime": 4792.5755, "train_samples": 20757, "train_samples_per_second": 4.331, "train_steps_per_second": 0.068 }