{ "epoch": 1.971563981042654, "eval_logits/chosen": 113.8158187866211, "eval_logits/rejected": 108.58926391601562, "eval_logps/chosen": -333.64825439453125, "eval_logps/rejected": -365.6023254394531, "eval_loss": 0.2953260540962219, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": 1.500998854637146, "eval_rewards/margins": 1.6775684356689453, "eval_rewards/rejected": -0.17656946182250977, "eval_runtime": 122.5277, "eval_samples": 750, "eval_samples_per_second": 6.121, "eval_steps_per_second": 0.196, "total_flos": 0.0, "train_loss": 0.2986852119748409, "train_runtime": 2293.3212, "train_samples": 6750, "train_samples_per_second": 5.887, "train_steps_per_second": 0.045 }