{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.976501305483029, "eval_steps": 500, "global_step": 285, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05221932114882506, "grad_norm": 4.916470527648926, "learning_rate": 4.996203791083291e-05, "loss": 3.0294, "step": 5 }, { "epoch": 0.10443864229765012, "grad_norm": 2.873619318008423, "learning_rate": 4.984826693294874e-05, "loss": 2.7312, "step": 10 }, { "epoch": 0.1566579634464752, "grad_norm": 1.968766689300537, "learning_rate": 4.965903258506806e-05, "loss": 2.6282, "step": 15 }, { "epoch": 0.20887728459530025, "grad_norm": 2.165731430053711, "learning_rate": 4.9394909565685894e-05, "loss": 2.5533, "step": 20 }, { "epoch": 0.26109660574412535, "grad_norm": 2.0350794792175293, "learning_rate": 4.905670000773126e-05, "loss": 2.5412, "step": 25 }, { "epoch": 0.3133159268929504, "grad_norm": 1.8538161516189575, "learning_rate": 4.864543104251587e-05, "loss": 2.5643, "step": 30 }, { "epoch": 0.36553524804177545, "grad_norm": 1.6806731224060059, "learning_rate": 4.8162351680370044e-05, "loss": 2.5158, "step": 35 }, { "epoch": 0.4177545691906005, "grad_norm": 1.7877177000045776, "learning_rate": 4.760892901743944e-05, "loss": 2.5183, "step": 40 }, { "epoch": 0.4699738903394256, "grad_norm": 1.8654683828353882, "learning_rate": 4.698684378016222e-05, "loss": 2.496, "step": 45 }, { "epoch": 0.5221932114882507, "grad_norm": 1.7663869857788086, "learning_rate": 4.629798522095818e-05, "loss": 2.5069, "step": 50 }, { "epoch": 0.5744125326370757, "grad_norm": 1.914778470993042, "learning_rate": 4.554444538063113e-05, "loss": 2.4605, "step": 55 }, { "epoch": 0.6266318537859008, "grad_norm": 1.7610474824905396, "learning_rate": 4.4728512734909844e-05, "loss": 2.4223, "step": 60 }, { "epoch": 0.6788511749347258, "grad_norm": 1.6278265714645386, "learning_rate": 4.385266524442241e-05, "loss": 2.4468, "step": 65 }, { "epoch": 0.7310704960835509, "grad_norm": 1.709999442100525, "learning_rate": 4.2919562829211283e-05, "loss": 2.3933, "step": 70 }, { "epoch": 0.783289817232376, "grad_norm": 1.6477118730545044, "learning_rate": 4.193203929064353e-05, "loss": 2.454, "step": 75 }, { "epoch": 0.835509138381201, "grad_norm": 1.584774136543274, "learning_rate": 4.089309370524921e-05, "loss": 2.4139, "step": 80 }, { "epoch": 0.8877284595300261, "grad_norm": 1.5124833583831787, "learning_rate": 3.9805881316624506e-05, "loss": 2.3528, "step": 85 }, { "epoch": 0.9399477806788512, "grad_norm": 1.6704648733139038, "learning_rate": 3.867370395306068e-05, "loss": 2.3643, "step": 90 }, { "epoch": 0.9921671018276762, "grad_norm": 1.619065761566162, "learning_rate": 3.7500000000000003e-05, "loss": 2.3584, "step": 95 }, { "epoch": 1.0443864229765012, "grad_norm": 1.49176824092865, "learning_rate": 3.628833395777224e-05, "loss": 2.1535, "step": 100 }, { "epoch": 1.0966057441253263, "grad_norm": 1.5439894199371338, "learning_rate": 3.504238561632424e-05, "loss": 2.0786, "step": 105 }, { "epoch": 1.1488250652741514, "grad_norm": 1.5870767831802368, "learning_rate": 3.376593887981887e-05, "loss": 2.0251, "step": 110 }, { "epoch": 1.2010443864229765, "grad_norm": 1.5849578380584717, "learning_rate": 3.246287027504237e-05, "loss": 2.0486, "step": 115 }, { "epoch": 1.2532637075718016, "grad_norm": 1.5910005569458008, "learning_rate": 3.1137137178519985e-05, "loss": 2.003, "step": 120 }, { "epoch": 1.3054830287206267, "grad_norm": 1.5487288236618042, "learning_rate": 2.9792765798093465e-05, "loss": 2.0196, "step": 125 }, { "epoch": 1.3577023498694518, "grad_norm": 1.572863221168518, "learning_rate": 2.8433838945460205e-05, "loss": 1.9855, "step": 130 }, { "epoch": 1.4099216710182767, "grad_norm": 1.5365219116210938, "learning_rate": 2.7064483636808313e-05, "loss": 2.0136, "step": 135 }, { "epoch": 1.4621409921671018, "grad_norm": 1.619452714920044, "learning_rate": 2.5688858559204053e-05, "loss": 1.9636, "step": 140 }, { "epoch": 1.514360313315927, "grad_norm": 1.5345263481140137, "learning_rate": 2.4311141440795953e-05, "loss": 1.9941, "step": 145 }, { "epoch": 1.566579634464752, "grad_norm": 1.59757661819458, "learning_rate": 2.2935516363191693e-05, "loss": 1.9606, "step": 150 }, { "epoch": 1.6187989556135771, "grad_norm": 1.493499994277954, "learning_rate": 2.1566161054539798e-05, "loss": 2.0351, "step": 155 }, { "epoch": 1.671018276762402, "grad_norm": 1.5033268928527832, "learning_rate": 2.0207234201906547e-05, "loss": 1.9508, "step": 160 }, { "epoch": 1.723237597911227, "grad_norm": 1.487410545349121, "learning_rate": 1.8862862821480025e-05, "loss": 1.9504, "step": 165 }, { "epoch": 1.7754569190600522, "grad_norm": 1.6940875053405762, "learning_rate": 1.7537129724957642e-05, "loss": 1.9508, "step": 170 }, { "epoch": 1.8276762402088773, "grad_norm": 1.555407166481018, "learning_rate": 1.6234061120181142e-05, "loss": 1.8806, "step": 175 }, { "epoch": 1.8798955613577024, "grad_norm": 1.6240324974060059, "learning_rate": 1.495761438367577e-05, "loss": 1.9759, "step": 180 }, { "epoch": 1.9321148825065273, "grad_norm": 1.5653289556503296, "learning_rate": 1.3711666042227772e-05, "loss": 1.9918, "step": 185 }, { "epoch": 1.9843342036553526, "grad_norm": 1.5873417854309082, "learning_rate": 1.2500000000000006e-05, "loss": 1.8922, "step": 190 }, { "epoch": 2.0365535248041775, "grad_norm": 1.4565200805664062, "learning_rate": 1.1326296046939333e-05, "loss": 1.7482, "step": 195 }, { "epoch": 2.0887728459530024, "grad_norm": 1.5391967296600342, "learning_rate": 1.0194118683375503e-05, "loss": 1.6307, "step": 200 }, { "epoch": 2.1409921671018277, "grad_norm": 1.497523307800293, "learning_rate": 9.106906294750805e-06, "loss": 1.6288, "step": 205 }, { "epoch": 2.1932114882506526, "grad_norm": 1.5081855058670044, "learning_rate": 8.067960709356478e-06, "loss": 1.6239, "step": 210 }, { "epoch": 2.245430809399478, "grad_norm": 1.4794273376464844, "learning_rate": 7.080437170788723e-06, "loss": 1.6385, "step": 215 }, { "epoch": 2.297650130548303, "grad_norm": 1.590654730796814, "learning_rate": 6.147334755577596e-06, "loss": 1.6511, "step": 220 }, { "epoch": 2.349869451697128, "grad_norm": 1.516768217086792, "learning_rate": 5.271487265090163e-06, "loss": 1.6356, "step": 225 }, { "epoch": 2.402088772845953, "grad_norm": 1.5770176649093628, "learning_rate": 4.4555546193688735e-06, "loss": 1.6163, "step": 230 }, { "epoch": 2.454308093994778, "grad_norm": 1.5710504055023193, "learning_rate": 3.7020147790418263e-06, "loss": 1.6386, "step": 235 }, { "epoch": 2.506527415143603, "grad_norm": 1.5108474493026733, "learning_rate": 3.013156219837776e-06, "loss": 1.6722, "step": 240 }, { "epoch": 2.558746736292428, "grad_norm": 1.5271434783935547, "learning_rate": 2.391070982560564e-06, "loss": 1.6134, "step": 245 }, { "epoch": 2.6109660574412534, "grad_norm": 1.5538915395736694, "learning_rate": 1.837648319629956e-06, "loss": 1.6621, "step": 250 }, { "epoch": 2.6631853785900783, "grad_norm": 1.4325404167175293, "learning_rate": 1.3545689574841342e-06, "loss": 1.5881, "step": 255 }, { "epoch": 2.7154046997389036, "grad_norm": 1.505527138710022, "learning_rate": 9.432999922687396e-07, "loss": 1.6241, "step": 260 }, { "epoch": 2.7676240208877285, "grad_norm": 1.491540789604187, "learning_rate": 6.050904343141095e-07, "loss": 1.5779, "step": 265 }, { "epoch": 2.8198433420365534, "grad_norm": 1.508405327796936, "learning_rate": 3.4096741493194197e-07, "loss": 1.5785, "step": 270 }, { "epoch": 2.8720626631853787, "grad_norm": 1.4897537231445312, "learning_rate": 1.517330670512629e-07, "loss": 1.6248, "step": 275 }, { "epoch": 2.9242819843342036, "grad_norm": 1.4672536849975586, "learning_rate": 3.796208916709565e-08, "loss": 1.5898, "step": 280 }, { "epoch": 2.976501305483029, "grad_norm": 1.4578866958618164, "learning_rate": 0.0, "loss": 1.5727, "step": 285 }, { "epoch": 2.976501305483029, "step": 285, "total_flos": 1.0513138638127104e+17, "train_loss": 2.0445492710983544, "train_runtime": 467.486, "train_samples_per_second": 4.916, "train_steps_per_second": 0.61 } ], "logging_steps": 5, "max_steps": 285, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0513138638127104e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }