{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.159821000479463, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00010400000000000001, "loss": 1.4239, "step": 13 }, { "epoch": 0.01, "learning_rate": 0.0001999978128380225, "loss": 1.3151, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.0001995716208873644, "loss": 1.0994, "step": 39 }, { "epoch": 0.02, "learning_rate": 0.0001984097857063434, "loss": 1.0521, "step": 52 }, { "epoch": 0.02, "learning_rate": 0.00019652089102773488, "loss": 0.9963, "step": 65 }, { "epoch": 0.02, "learning_rate": 0.00019391889215899299, "loss": 1.0038, "step": 78 }, { "epoch": 0.03, "learning_rate": 0.00019062301287930446, "loss": 0.9647, "step": 91 }, { "epoch": 0.03, "learning_rate": 0.00018665760341274505, "loss": 1.0262, "step": 104 }, { "epoch": 0.04, "learning_rate": 0.00018205196052684445, "loss": 0.9811, "step": 117 }, { "epoch": 0.04, "learning_rate": 0.00017684011108568592, "loss": 0.9759, "step": 130 }, { "epoch": 0.05, "learning_rate": 0.00017106056065666793, "loss": 1.0042, "step": 143 }, { "epoch": 0.05, "learning_rate": 0.0001647560090282419, "loss": 0.9669, "step": 156 }, { "epoch": 0.05, "learning_rate": 0.00015797303474040332, "loss": 0.9218, "step": 169 }, { "epoch": 0.06, "learning_rate": 0.0001507617509586517, "loss": 0.939, "step": 182 }, { "epoch": 0.06, "learning_rate": 0.00014317543523384928, "loss": 0.8893, "step": 195 }, { "epoch": 0.07, "learning_rate": 0.00013527013588334415, "loss": 0.9348, "step": 208 }, { "epoch": 0.07, "learning_rate": 0.00012710425790144446, "loss": 0.9399, "step": 221 }, { "epoch": 0.07, "learning_rate": 0.00011873813145857249, "loss": 0.8956, "step": 234 }, { "epoch": 0.08, "learning_rate": 0.00011023356617706052, "loss": 0.8905, "step": 247 }, { "epoch": 0.08, "learning_rate": 0.00010165339447663587, "loss": 0.8825, "step": 260 }, { "epoch": 0.09, "learning_rate": 9.30610073633956e-05, "loss": 0.8853, "step": 273 }, { "epoch": 0.09, "learning_rate": 8.451988609189987e-05, "loss": 0.9191, "step": 286 }, { "epoch": 0.1, "learning_rate": 7.6093133160502e-05, "loss": 0.8472, "step": 299 }, { "epoch": 0.1, "learning_rate": 6.784300610496048e-05, "loss": 0.9249, "step": 312 }, { "epoch": 0.1, "learning_rate": 5.983045753470308e-05, "loss": 0.8763, "step": 325 }, { "epoch": 0.11, "learning_rate": 5.2114684809993044e-05, "loss": 0.824, "step": 338 }, { "epoch": 0.11, "learning_rate": 4.475269268701868e-05, "loss": 0.8627, "step": 351 }, { "epoch": 0.12, "learning_rate": 3.779887216211995e-05, "loss": 0.8474, "step": 364 }, { "epoch": 0.12, "learning_rate": 3.1304598626685545e-05, "loss": 0.8444, "step": 377 }, { "epoch": 0.12, "learning_rate": 2.5317852301584643e-05, "loss": 0.837, "step": 390 }, { "epoch": 0.13, "learning_rate": 1.988286375539391e-05, "loss": 0.8613, "step": 403 }, { "epoch": 0.13, "learning_rate": 1.5039787125361326e-05, "loss": 0.8878, "step": 416 }, { "epoch": 0.14, "learning_rate": 1.0824403455375288e-05, "loss": 0.8382, "step": 429 }, { "epoch": 0.14, "learning_rate": 7.267856342703461e-06, "loss": 0.8802, "step": 442 }, { "epoch": 0.15, "learning_rate": 4.3964218465642355e-06, "loss": 0.9216, "step": 455 }, { "epoch": 0.15, "learning_rate": 2.2313143584648423e-06, "loss": 0.8493, "step": 468 }, { "epoch": 0.15, "learning_rate": 7.885298685522235e-07, "loss": 0.8862, "step": 481 }, { "epoch": 0.16, "learning_rate": 7.872778593728258e-08, "loss": 0.8264, "step": 494 } ], "max_steps": 500, "num_train_epochs": 1, "total_flos": 1.7103152756921088e+16, "trial_name": null, "trial_params": null }