{ "epoch": 0.99, "eval_logits/chosen": -1.2204631567001343, "eval_logits/rejected": -1.2738878726959229, "eval_logps/chosen": -92.67973327636719, "eval_logps/rejected": -656.5872802734375, "eval_loss": 0.6677024960517883, "eval_rewards/accuracies": 0.875, "eval_rewards/chosen": -0.04467713087797165, "eval_rewards/margins": 0.0837688222527504, "eval_rewards/rejected": -0.12844595313072205, "eval_runtime": 9.5521, "eval_samples": 56, "eval_samples_per_second": 5.863, "eval_steps_per_second": 0.209, "train_loss": 0.5556549103029312, "train_runtime": 1091.242, "train_samples": 3974, "train_samples_per_second": 3.642, "train_steps_per_second": 0.057 }