|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.0, |
|
"global_step": 2850, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-05, |
|
"loss": 0.65, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4943, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4019, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4193, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3961, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.787234042553192e-05, |
|
"loss": 0.324, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.574468085106383e-05, |
|
"loss": 0.3471, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.3617021276595746e-05, |
|
"loss": 0.287, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.148936170212766e-05, |
|
"loss": 0.2373, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.936170212765958e-05, |
|
"loss": 0.2729, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.723404255319149e-05, |
|
"loss": 0.1955, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.5106382978723407e-05, |
|
"loss": 0.1929, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.2978723404255317e-05, |
|
"loss": 0.1849, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.085106382978723e-05, |
|
"loss": 0.2012, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.8723404255319154e-05, |
|
"loss": 0.152, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 2.6595744680851064e-05, |
|
"loss": 0.1388, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.446808510638298e-05, |
|
"loss": 0.1337, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.2340425531914894e-05, |
|
"loss": 0.1571, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.0212765957446807e-05, |
|
"loss": 0.1088, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.8085106382978724e-05, |
|
"loss": 0.0701, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.595744680851064e-05, |
|
"loss": 0.0853, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.3829787234042554e-05, |
|
"loss": 0.0923, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.170212765957447e-05, |
|
"loss": 0.0695, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 9.574468085106383e-06, |
|
"loss": 0.0867, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.446808510638298e-06, |
|
"loss": 0.0371, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 5.319148936170213e-06, |
|
"loss": 0.0201, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 3.1914893617021277e-06, |
|
"loss": 0.0141, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"loss": 0.1063, |
|
"step": 2800 |
|
} |
|
], |
|
"max_steps": 2850, |
|
"num_train_epochs": 6, |
|
"total_flos": 5994196063211520.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|