{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9947089947089947, "eval_steps": 500, "global_step": 94, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 25.005165779900267, "learning_rate": 1e-08, "logits/chosen": -1.9501205682754517, "logits/rejected": -2.513594388961792, "logps/chosen": -348.5884704589844, "logps/rejected": -166.58517456054688, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.11, "grad_norm": 23.68906488321, "learning_rate": 1e-07, "logits/chosen": -2.640984535217285, "logits/rejected": -2.219906806945801, "logps/chosen": -213.95584106445312, "logps/rejected": -198.17874145507812, "loss": 0.6932, "rewards/accuracies": 0.4930555522441864, "rewards/chosen": 0.0007135343039408326, "rewards/margins": 0.0006534373969770968, "rewards/rejected": 6.009703065501526e-05, "step": 10 }, { "epoch": 0.21, "grad_norm": 22.52231020802604, "learning_rate": 9.65436874322102e-08, "logits/chosen": -2.397062301635742, "logits/rejected": -2.3303606510162354, "logps/chosen": -257.8389587402344, "logps/rejected": -222.90444946289062, "loss": 0.6917, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.0030278589110821486, "rewards/margins": 0.002860091160982847, "rewards/rejected": 0.00016776802658569068, "step": 20 }, { "epoch": 0.32, "grad_norm": 24.931881082321315, "learning_rate": 8.665259359149131e-08, "logits/chosen": -2.534593105316162, "logits/rejected": -2.4346184730529785, "logps/chosen": -227.6776885986328, "logps/rejected": -204.8966064453125, "loss": 0.689, "rewards/accuracies": 0.75, "rewards/chosen": 0.00954088568687439, "rewards/margins": 0.00823338981717825, "rewards/rejected": 0.0013074951712042093, "step": 30 }, { "epoch": 0.42, "grad_norm": 24.657287539381663, "learning_rate": 7.16941869558779e-08, "logits/chosen": -2.358189105987549, "logits/rejected": -2.440410614013672, "logps/chosen": -230.328857421875, "logps/rejected": -210.7056427001953, "loss": 0.6846, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.018901044502854347, "rewards/margins": 0.01690484955906868, "rewards/rejected": 0.0019961954094469547, "step": 40 }, { "epoch": 0.53, "grad_norm": 24.026917311824274, "learning_rate": 5.373650467932121e-08, "logits/chosen": -2.379296064376831, "logits/rejected": -2.6683709621429443, "logps/chosen": -233.249267578125, "logps/rejected": -199.204833984375, "loss": 0.6805, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": 0.02693324163556099, "rewards/margins": 0.024523768573999405, "rewards/rejected": 0.002409472828730941, "step": 50 }, { "epoch": 0.63, "grad_norm": 23.069042390909715, "learning_rate": 3.5262241279454787e-08, "logits/chosen": -2.233121156692505, "logits/rejected": -2.644624710083008, "logps/chosen": -250.5662078857422, "logps/rejected": -173.79067993164062, "loss": 0.6762, "rewards/accuracies": 0.90625, "rewards/chosen": 0.03840692713856697, "rewards/margins": 0.03592415899038315, "rewards/rejected": 0.0024827648885548115, "step": 60 }, { "epoch": 0.74, "grad_norm": 23.60279896856753, "learning_rate": 1.8825509907063325e-08, "logits/chosen": -2.354218006134033, "logits/rejected": -2.420661449432373, "logps/chosen": -241.6086883544922, "logps/rejected": -207.097900390625, "loss": 0.6725, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.04547766596078873, "rewards/margins": 0.04097073897719383, "rewards/rejected": 0.004506924655288458, "step": 70 }, { "epoch": 0.85, "grad_norm": 23.640093404183123, "learning_rate": 6.698729810778064e-09, "logits/chosen": -2.2659950256347656, "logits/rejected": -2.467071056365967, "logps/chosen": -243.5118408203125, "logps/rejected": -205.4396209716797, "loss": 0.6739, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": 0.0479864627122879, "rewards/margins": 0.04288201406598091, "rewards/rejected": 0.005104447714984417, "step": 80 }, { "epoch": 0.95, "grad_norm": 22.97821730211384, "learning_rate": 5.584586887435739e-10, "logits/chosen": -2.3436455726623535, "logits/rejected": -2.367281436920166, "logps/chosen": -226.5314178466797, "logps/rejected": -200.30862426757812, "loss": 0.6745, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.043580617755651474, "rewards/margins": 0.037381939589977264, "rewards/rejected": 0.006198678631335497, "step": 90 }, { "epoch": 0.99, "step": 94, "total_flos": 0.0, "train_loss": 0.6816894855905087, "train_runtime": 1070.9433, "train_samples_per_second": 5.639, "train_steps_per_second": 0.088 } ], "logging_steps": 10, "max_steps": 94, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }