{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.995053003533569, "eval_steps": 500, "global_step": 88, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05653710247349823, "grad_norm": 0.23830582201480865, "learning_rate": 2.7777777777777783e-06, "logits/chosen": 0.5033482313156128, "logits/rejected": 0.08856754004955292, "logps/chosen": -0.1221993938088417, "logps/rejected": -1.4663223028182983, "loss": 0.8881, "rewards/accuracies": 1.0, "rewards/chosen": -0.012219938449561596, "rewards/margins": 0.13441228866577148, "rewards/rejected": -0.14663222432136536, "step": 5 }, { "epoch": 0.11307420494699646, "grad_norm": 0.20615892112255096, "learning_rate": 4.998023493068255e-06, "logits/chosen": 0.5648357272148132, "logits/rejected": 0.2297288179397583, "logps/chosen": -0.16152355074882507, "logps/rejected": -1.4754002094268799, "loss": 0.8913, "rewards/accuracies": 1.0, "rewards/chosen": -0.016152355819940567, "rewards/margins": 0.13138766586780548, "rewards/rejected": -0.14754004776477814, "step": 10 }, { "epoch": 0.1696113074204947, "grad_norm": 0.22748805582523346, "learning_rate": 4.929173350101025e-06, "logits/chosen": 0.5755559206008911, "logits/rejected": 0.2600894570350647, "logps/chosen": -0.16774430871009827, "logps/rejected": -1.666832685470581, "loss": 0.8848, "rewards/accuracies": 1.0, "rewards/chosen": -0.016774429008364677, "rewards/margins": 0.1499088555574417, "rewards/rejected": -0.16668327152729034, "step": 15 }, { "epoch": 0.22614840989399293, "grad_norm": 0.23899538815021515, "learning_rate": 4.764600984163809e-06, "logits/chosen": 0.6155421137809753, "logits/rejected": 0.31905263662338257, "logps/chosen": -0.1800321787595749, "logps/rejected": -1.646435022354126, "loss": 0.8808, "rewards/accuracies": 1.0, "rewards/chosen": -0.01800321787595749, "rewards/margins": 0.14664028584957123, "rewards/rejected": -0.16464349627494812, "step": 20 }, { "epoch": 0.2826855123674912, "grad_norm": 0.2510228455066681, "learning_rate": 4.510791413176912e-06, "logits/chosen": 0.9448992609977722, "logits/rejected": 0.43348661065101624, "logps/chosen": -0.43118977546691895, "logps/rejected": -2.275934934616089, "loss": 0.8694, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.043118976056575775, "rewards/margins": 0.18447449803352356, "rewards/rejected": -0.22759349644184113, "step": 25 }, { "epoch": 0.3392226148409894, "grad_norm": 0.33294302225112915, "learning_rate": 4.177746070897593e-06, "logits/chosen": 1.037989616394043, "logits/rejected": 0.6013290286064148, "logps/chosen": -0.41320762038230896, "logps/rejected": -2.21561598777771, "loss": 0.864, "rewards/accuracies": 1.0, "rewards/chosen": -0.041320763528347015, "rewards/margins": 0.18024082481861115, "rewards/rejected": -0.22156158089637756, "step": 30 }, { "epoch": 0.3957597173144876, "grad_norm": 0.3022996783256531, "learning_rate": 3.7785886977585562e-06, "logits/chosen": 1.0428460836410522, "logits/rejected": 0.7501012086868286, "logps/chosen": -0.5169800519943237, "logps/rejected": -2.2174105644226074, "loss": 0.8575, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.05169800668954849, "rewards/margins": 0.17004306614398956, "rewards/rejected": -0.22174108028411865, "step": 35 }, { "epoch": 0.45229681978798586, "grad_norm": 0.4065157175064087, "learning_rate": 3.32904819638017e-06, "logits/chosen": 1.144087553024292, "logits/rejected": 0.8252680897712708, "logps/chosen": -0.7040660381317139, "logps/rejected": -2.8310651779174805, "loss": 0.853, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07040660083293915, "rewards/margins": 0.21269993484020233, "rewards/rejected": -0.2831065058708191, "step": 40 }, { "epoch": 0.508833922261484, "grad_norm": 0.39310887455940247, "learning_rate": 2.8468388299726714e-06, "logits/chosen": 1.2338650226593018, "logits/rejected": 0.9706697463989258, "logps/chosen": -0.7117434740066528, "logps/rejected": -2.958108425140381, "loss": 0.8404, "rewards/accuracies": 1.0, "rewards/chosen": -0.07117435336112976, "rewards/margins": 0.224636510014534, "rewards/rejected": -0.29581087827682495, "step": 45 }, { "epoch": 0.5653710247349824, "grad_norm": 0.40744879841804504, "learning_rate": 2.3509621870754505e-06, "logits/chosen": 1.4577362537384033, "logits/rejected": 1.0853009223937988, "logps/chosen": -0.9320166707038879, "logps/rejected": -3.0959839820861816, "loss": 0.8421, "rewards/accuracies": 1.0, "rewards/chosen": -0.0932016670703888, "rewards/margins": 0.216396763920784, "rewards/rejected": -0.309598445892334, "step": 50 }, { "epoch": 0.6219081272084805, "grad_norm": 0.5040230751037598, "learning_rate": 1.8609584188988135e-06, "logits/chosen": 1.6823539733886719, "logits/rejected": 1.301403284072876, "logps/chosen": -1.3072410821914673, "logps/rejected": -3.916767120361328, "loss": 0.8195, "rewards/accuracies": 1.0, "rewards/chosen": -0.13072410225868225, "rewards/margins": 0.2609526216983795, "rewards/rejected": -0.39167672395706177, "step": 55 }, { "epoch": 0.6784452296819788, "grad_norm": 0.4802953600883484, "learning_rate": 1.3961362544602215e-06, "logits/chosen": 1.7860639095306396, "logits/rejected": 1.3450249433517456, "logps/chosen": -1.3377199172973633, "logps/rejected": -3.9463648796081543, "loss": 0.825, "rewards/accuracies": 1.0, "rewards/chosen": -0.13377198576927185, "rewards/margins": 0.2608645260334015, "rewards/rejected": -0.39463648200035095, "step": 60 }, { "epoch": 0.734982332155477, "grad_norm": 0.5378066897392273, "learning_rate": 9.74812134973689e-07, "logits/chosen": 1.9130666255950928, "logits/rejected": 1.3870166540145874, "logps/chosen": -1.5210387706756592, "logps/rejected": -4.175502777099609, "loss": 0.8111, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.15210385620594025, "rewards/margins": 0.2654464542865753, "rewards/rejected": -0.417550265789032, "step": 65 }, { "epoch": 0.7915194346289752, "grad_norm": 0.646070122718811, "learning_rate": 6.135884496044245e-07, "logits/chosen": 1.7755920886993408, "logits/rejected": 1.4310863018035889, "logps/chosen": -1.7617400884628296, "logps/rejected": -4.471417427062988, "loss": 0.8105, "rewards/accuracies": 1.0, "rewards/chosen": -0.17617401480674744, "rewards/margins": 0.2709676921367645, "rewards/rejected": -0.44714173674583435, "step": 70 }, { "epoch": 0.8480565371024735, "grad_norm": 0.691250741481781, "learning_rate": 3.266993139010438e-07, "logits/chosen": 2.0647330284118652, "logits/rejected": 1.6562068462371826, "logps/chosen": -1.9144256114959717, "logps/rejected": -5.297122955322266, "loss": 0.798, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.1914425641298294, "rewards/margins": 0.33826974034309387, "rewards/rejected": -0.5297123193740845, "step": 75 }, { "epoch": 0.9045936395759717, "grad_norm": 0.7118372321128845, "learning_rate": 1.2544967068054332e-07, "logits/chosen": 2.001556158065796, "logits/rejected": 1.5731197595596313, "logps/chosen": -1.788537621498108, "logps/rejected": -4.781277656555176, "loss": 0.81, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.17885378003120422, "rewards/margins": 0.29927393794059753, "rewards/rejected": -0.47812777757644653, "step": 80 }, { "epoch": 0.9611307420494699, "grad_norm": 0.649433434009552, "learning_rate": 1.7769815745066476e-08, "logits/chosen": 1.9461923837661743, "logits/rejected": 1.6117980480194092, "logps/chosen": -2.07262921333313, "logps/rejected": -4.415001392364502, "loss": 0.8108, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.20726290345191956, "rewards/margins": 0.23423722386360168, "rewards/rejected": -0.44150012731552124, "step": 85 }, { "epoch": 0.995053003533569, "step": 88, "total_flos": 7.975833940315341e+17, "train_loss": 0.8439869067885659, "train_runtime": 2265.8916, "train_samples_per_second": 4.995, "train_steps_per_second": 0.039 } ], "logging_steps": 5, "max_steps": 88, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.975833940315341e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }