|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 9860, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8985801217038542e-05, |
|
"loss": 2.0844, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.797160243407708e-05, |
|
"loss": 1.322, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.6957403651115622e-05, |
|
"loss": 1.1934, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.594320486815416e-05, |
|
"loss": 1.1366, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.4929006085192699e-05, |
|
"loss": 1.0634, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3914807302231239e-05, |
|
"loss": 0.855, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.2900608519269777e-05, |
|
"loss": 0.8663, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.1886409736308317e-05, |
|
"loss": 0.8364, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.0872210953346858e-05, |
|
"loss": 0.835, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.858012170385396e-06, |
|
"loss": 0.8107, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.843813387423936e-06, |
|
"loss": 0.6625, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.829614604462476e-06, |
|
"loss": 0.6576, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.815415821501015e-06, |
|
"loss": 0.6617, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.801217038539554e-06, |
|
"loss": 0.6515, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.787018255578094e-06, |
|
"loss": 0.6179, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.7728194726166332e-06, |
|
"loss": 0.5304, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 2.7586206896551725e-06, |
|
"loss": 0.5401, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.7444219066937122e-06, |
|
"loss": 0.5549, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.302231237322515e-07, |
|
"loss": 0.5237, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 9860, |
|
"total_flos": 6.955379978528563e+16, |
|
"train_loss": 0.8512596145846539, |
|
"train_runtime": 3575.2739, |
|
"train_samples_per_second": 99.27, |
|
"train_steps_per_second": 2.758 |
|
} |
|
], |
|
"max_steps": 9860, |
|
"num_train_epochs": 4, |
|
"total_flos": 6.955379978528563e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|