|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 69.0909090909091, |
|
"eval_steps": 500, |
|
"global_step": 350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.194805194805195e-06, |
|
"loss": 1.8662, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 1.5883, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.6623376623376623e-05, |
|
"loss": 1.0967, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 2.2337662337662336e-05, |
|
"loss": 0.7579, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 2.8051948051948052e-05, |
|
"loss": 0.5478, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 3.324675324675325e-05, |
|
"loss": 0.4326, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 3.8441558441558445e-05, |
|
"loss": 0.3292, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 4.415584415584416e-05, |
|
"loss": 0.2404, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"learning_rate": 4.987012987012987e-05, |
|
"loss": 0.1739, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 21.09, |
|
"learning_rate": 5.558441558441558e-05, |
|
"loss": 0.1312, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 6.12987012987013e-05, |
|
"loss": 0.0961, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 26.04, |
|
"learning_rate": 6.701298701298702e-05, |
|
"loss": 0.0595, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 28.05, |
|
"learning_rate": 7.16883116883117e-05, |
|
"loss": 0.1897, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 30.07, |
|
"learning_rate": 7.74025974025974e-05, |
|
"loss": 0.0537, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 32.09, |
|
"learning_rate": 8.311688311688312e-05, |
|
"loss": 0.0264, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"learning_rate": 8.883116883116883e-05, |
|
"loss": 0.0199, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"learning_rate": 9.454545454545455e-05, |
|
"loss": 0.0176, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 39.05, |
|
"learning_rate": 0.00010025974025974026, |
|
"loss": 0.015, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 41.07, |
|
"learning_rate": 0.00010597402597402598, |
|
"loss": 0.0134, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 43.09, |
|
"learning_rate": 0.00011168831168831168, |
|
"loss": 0.0094, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"learning_rate": 0.0001174025974025974, |
|
"loss": 0.0064, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 48.04, |
|
"learning_rate": 0.0001231168831168831, |
|
"loss": 0.0084, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 50.05, |
|
"learning_rate": 0.00012883116883116884, |
|
"loss": 0.0077, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 52.07, |
|
"learning_rate": 0.00013454545454545455, |
|
"loss": 0.0082, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 54.09, |
|
"learning_rate": 0.00014025974025974028, |
|
"loss": 0.0058, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 57.02, |
|
"learning_rate": 0.00014597402597402599, |
|
"loss": 0.006, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 59.04, |
|
"learning_rate": 0.0001516883116883117, |
|
"loss": 0.0095, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 61.05, |
|
"learning_rate": 0.00015740259740259742, |
|
"loss": 0.006, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 63.07, |
|
"learning_rate": 0.00016311688311688313, |
|
"loss": 0.0068, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"learning_rate": 0.00016883116883116884, |
|
"loss": 0.0086, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 68.02, |
|
"learning_rate": 0.00017454545454545454, |
|
"loss": 0.0036, |
|
"step": 341 |
|
} |
|
], |
|
"logging_steps": 11, |
|
"max_steps": 3850, |
|
"num_train_epochs": 70, |
|
"save_steps": 500, |
|
"total_flos": 6.12217506496512e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|