{ "epoch": 1.0, "eval_logits/chosen": -2.7866196632385254, "eval_logits/rejected": -2.781510353088379, "eval_logps/chosen": -289.3619689941406, "eval_logps/rejected": -273.2136535644531, "eval_loss": 0.5273504853248596, "eval_pred_label": 0.0, "eval_rewards/accuracies": 0.7579365372657776, "eval_rewards/chosen": -0.08383701741695404, "eval_rewards/margins": 1.0735403299331665, "eval_rewards/rejected": -1.1573774814605713, "eval_runtime": 151.8269, "eval_samples": 2000, "eval_samples_per_second": 13.173, "eval_steps_per_second": 0.415, "eval_use_label": 0.0, "train_loss": 0.5461576643349236, "train_runtime": 10580.1696, "train_samples": 61135, "train_samples_per_second": 5.778, "train_steps_per_second": 0.09 }