{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5000.0, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 100.0, "grad_norm": 0.0051079667173326015, "learning_rate": 0.00019616000000000002, "loss": 0.1768, "step": 100 }, { "epoch": 200.0, "grad_norm": 0.0013037772150710225, "learning_rate": 0.00019216, "loss": 0.0, "step": 200 }, { "epoch": 300.0, "grad_norm": 0.00030190523830242455, "learning_rate": 0.00018816000000000001, "loss": 0.0, "step": 300 }, { "epoch": 400.0, "grad_norm": 0.00017417919298168272, "learning_rate": 0.00018416, "loss": 0.0, "step": 400 }, { "epoch": 500.0, "grad_norm": 0.00014137968537397683, "learning_rate": 0.00018016, "loss": 0.0, "step": 500 }, { "epoch": 600.0, "grad_norm": 0.0001240275305463001, "learning_rate": 0.00017616000000000002, "loss": 0.0, "step": 600 }, { "epoch": 700.0, "grad_norm": 9.807997412281111e-05, "learning_rate": 0.00017216, "loss": 0.0, "step": 700 }, { "epoch": 800.0, "grad_norm": 6.768624734831974e-05, "learning_rate": 0.00016816000000000002, "loss": 0.0, "step": 800 }, { "epoch": 900.0, "grad_norm": 5.961491842754185e-05, "learning_rate": 0.00016416, "loss": 0.0, "step": 900 }, { "epoch": 1000.0, "grad_norm": 5.017322473577224e-05, "learning_rate": 0.00016016, "loss": 0.0, "step": 1000 }, { "epoch": 1100.0, "grad_norm": 5.257365410216153e-05, "learning_rate": 0.00015616000000000002, "loss": 0.0, "step": 1100 }, { "epoch": 1200.0, "grad_norm": 5.0212354835821316e-05, "learning_rate": 0.00015216, "loss": 0.0, "step": 1200 }, { "epoch": 1300.0, "grad_norm": 0.00011130324128316715, "learning_rate": 0.00014816000000000002, "loss": 0.0, "step": 1300 }, { "epoch": 1400.0, "grad_norm": 3.4537704777903855e-05, "learning_rate": 0.00014416, "loss": 0.0, "step": 1400 }, { "epoch": 1500.0, "grad_norm": 2.7689882699633017e-05, "learning_rate": 0.00014016, "loss": 0.0, "step": 1500 }, { "epoch": 1600.0, "grad_norm": 2.726606180658564e-05, "learning_rate": 0.00013616, "loss": 0.0, "step": 1600 }, { "epoch": 1700.0, "grad_norm": 2.1775686036562547e-05, "learning_rate": 0.00013216, "loss": 0.0, "step": 1700 }, { "epoch": 1800.0, "grad_norm": 2.3525770302512683e-05, "learning_rate": 0.00012816000000000002, "loss": 0.0, "step": 1800 }, { "epoch": 1900.0, "grad_norm": 1.902567055367399e-05, "learning_rate": 0.00012416, "loss": 0.0, "step": 1900 }, { "epoch": 2000.0, "grad_norm": 2.1888447008677758e-05, "learning_rate": 0.00012016, "loss": 0.0, "step": 2000 }, { "epoch": 2100.0, "grad_norm": 1.896571302495431e-05, "learning_rate": 0.00011616, "loss": 0.0, "step": 2100 }, { "epoch": 2200.0, "grad_norm": 1.5480936781386845e-05, "learning_rate": 0.00011216, "loss": 0.0, "step": 2200 }, { "epoch": 2300.0, "grad_norm": 1.3961292097519618e-05, "learning_rate": 0.00010816, "loss": 0.0, "step": 2300 }, { "epoch": 2400.0, "grad_norm": 1.4109475159784779e-05, "learning_rate": 0.00010416000000000002, "loss": 0.0, "step": 2400 }, { "epoch": 2500.0, "grad_norm": 1.2665558642765973e-05, "learning_rate": 0.00010016, "loss": 0.0, "step": 2500 }, { "epoch": 2600.0, "grad_norm": 1.5646817701053806e-05, "learning_rate": 9.616e-05, "loss": 0.0, "step": 2600 }, { "epoch": 2700.0, "grad_norm": 1.2876950677309651e-05, "learning_rate": 9.216e-05, "loss": 0.0, "step": 2700 }, { "epoch": 2800.0, "grad_norm": 1.2121616236981936e-05, "learning_rate": 8.816000000000001e-05, "loss": 0.0, "step": 2800 }, { "epoch": 2900.0, "grad_norm": 1.4524578546115663e-05, "learning_rate": 8.416000000000001e-05, "loss": 0.0, "step": 2900 }, { "epoch": 3000.0, "grad_norm": 1.1223896763112862e-05, "learning_rate": 8.016e-05, "loss": 0.0, "step": 3000 }, { "epoch": 3100.0, "grad_norm": 8.85269673744915e-06, "learning_rate": 7.616e-05, "loss": 0.0, "step": 3100 }, { "epoch": 3200.0, "grad_norm": 1.264509955944959e-05, "learning_rate": 7.216e-05, "loss": 0.0, "step": 3200 }, { "epoch": 3300.0, "grad_norm": 8.284540854219813e-06, "learning_rate": 6.816e-05, "loss": 0.0, "step": 3300 }, { "epoch": 3400.0, "grad_norm": 8.871616046235431e-06, "learning_rate": 6.416e-05, "loss": 0.0, "step": 3400 }, { "epoch": 3500.0, "grad_norm": 9.966872312361374e-06, "learning_rate": 6.016000000000001e-05, "loss": 0.0, "step": 3500 }, { "epoch": 3600.0, "grad_norm": 2.9739601814071648e-05, "learning_rate": 5.6160000000000004e-05, "loss": 0.0, "step": 3600 }, { "epoch": 3700.0, "grad_norm": 7.714033927186392e-06, "learning_rate": 5.2159999999999995e-05, "loss": 0.0, "step": 3700 }, { "epoch": 3800.0, "grad_norm": 1.497406901762588e-05, "learning_rate": 4.816e-05, "loss": 0.0, "step": 3800 }, { "epoch": 3900.0, "grad_norm": 7.307490250241244e-06, "learning_rate": 4.4160000000000004e-05, "loss": 0.0, "step": 3900 }, { "epoch": 4000.0, "grad_norm": 6.682894763798686e-06, "learning_rate": 4.016e-05, "loss": 0.0, "step": 4000 }, { "epoch": 4100.0, "grad_norm": 7.749928954581264e-06, "learning_rate": 3.616e-05, "loss": 0.0, "step": 4100 }, { "epoch": 4200.0, "grad_norm": 1.01770574474358e-05, "learning_rate": 3.2160000000000004e-05, "loss": 0.0, "step": 4200 }, { "epoch": 4300.0, "grad_norm": 6.606936040043365e-06, "learning_rate": 2.816e-05, "loss": 0.0, "step": 4300 }, { "epoch": 4400.0, "grad_norm": 6.749212843715213e-06, "learning_rate": 2.4160000000000002e-05, "loss": 0.0, "step": 4400 }, { "epoch": 4500.0, "grad_norm": 8.575744686822873e-06, "learning_rate": 2.016e-05, "loss": 0.0, "step": 4500 }, { "epoch": 4600.0, "grad_norm": 6.673930329270661e-06, "learning_rate": 1.616e-05, "loss": 0.0, "step": 4600 }, { "epoch": 4700.0, "grad_norm": 6.32612272966071e-06, "learning_rate": 1.216e-05, "loss": 0.0, "step": 4700 }, { "epoch": 4800.0, "grad_norm": 6.985771960899001e-06, "learning_rate": 8.160000000000001e-06, "loss": 0.0, "step": 4800 }, { "epoch": 4900.0, "grad_norm": 5.245818101684563e-06, "learning_rate": 4.16e-06, "loss": 0.0, "step": 4900 }, { "epoch": 5000.0, "grad_norm": 5.854470146005042e-06, "learning_rate": 1.6e-07, "loss": 0.0, "step": 5000 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 5000, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6755965747200000.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }