{ "epoch": 1.0, "eval_logits/chosen": -0.24525223672389984, "eval_logits/rejected": -0.1380803883075714, "eval_logps/chosen": -103.02954864501953, "eval_logps/rejected": -131.91891479492188, "eval_loss": 0.6906174421310425, "eval_pred_label": 4192.14306640625, "eval_rewards/accuracies": 0.363095223903656, "eval_rewards/chosen": -0.3412899374961853, "eval_rewards/margins": 0.22391849756240845, "eval_rewards/rejected": -0.5652084350585938, "eval_runtime": 247.5585, "eval_samples": 2000, "eval_samples_per_second": 8.079, "eval_steps_per_second": 0.254, "eval_use_label": 15879.857421875, "train_loss": 0.6880922077838039, "train_runtime": 20023.3666, "train_samples": 61135, "train_samples_per_second": 3.053, "train_steps_per_second": 0.048 }