|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.159821000479463, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010400000000000001, |
|
"loss": 1.4239, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001999978128380225, |
|
"loss": 1.3151, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001995716208873644, |
|
"loss": 1.0994, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001984097857063434, |
|
"loss": 1.0521, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019652089102773488, |
|
"loss": 0.9963, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019391889215899299, |
|
"loss": 1.0038, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019062301287930446, |
|
"loss": 0.9647, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00018665760341274505, |
|
"loss": 1.0262, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00018205196052684445, |
|
"loss": 0.9811, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00017684011108568592, |
|
"loss": 0.9759, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00017106056065666793, |
|
"loss": 1.0042, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001647560090282419, |
|
"loss": 0.9669, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00015797303474040332, |
|
"loss": 0.9218, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001507617509586517, |
|
"loss": 0.939, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00014317543523384928, |
|
"loss": 0.8893, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00013527013588334415, |
|
"loss": 0.9348, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00012710425790144446, |
|
"loss": 0.9399, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00011873813145857249, |
|
"loss": 0.8956, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00011023356617706052, |
|
"loss": 0.8905, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00010165339447663587, |
|
"loss": 0.8825, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.30610073633956e-05, |
|
"loss": 0.8853, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.451988609189987e-05, |
|
"loss": 0.9191, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.6093133160502e-05, |
|
"loss": 0.8472, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 6.784300610496048e-05, |
|
"loss": 0.9249, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.983045753470308e-05, |
|
"loss": 0.8763, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.2114684809993044e-05, |
|
"loss": 0.824, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.475269268701868e-05, |
|
"loss": 0.8627, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.779887216211995e-05, |
|
"loss": 0.8474, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.1304598626685545e-05, |
|
"loss": 0.8444, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.5317852301584643e-05, |
|
"loss": 0.837, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.988286375539391e-05, |
|
"loss": 0.8613, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.5039787125361326e-05, |
|
"loss": 0.8878, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.0824403455375288e-05, |
|
"loss": 0.8382, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.267856342703461e-06, |
|
"loss": 0.8802, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.3964218465642355e-06, |
|
"loss": 0.9216, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.2313143584648423e-06, |
|
"loss": 0.8493, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.885298685522235e-07, |
|
"loss": 0.8862, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.872778593728258e-08, |
|
"loss": 0.8264, |
|
"step": 494 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.7103152756921088e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|