|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 9654, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.742075823492853e-05, |
|
"loss": 3.736, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.483115806919412e-05, |
|
"loss": 3.6583, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.224155790345971e-05, |
|
"loss": 3.6089, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.96519577377253e-05, |
|
"loss": 3.5785, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.706235757199089e-05, |
|
"loss": 3.5712, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.447275740625648e-05, |
|
"loss": 3.5492, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.188315724052207e-05, |
|
"loss": 3.5441, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9293557074787652e-05, |
|
"loss": 3.528, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6703956909053245e-05, |
|
"loss": 3.5161, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4114356743318832e-05, |
|
"loss": 3.4965, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1524756577584422e-05, |
|
"loss": 3.502, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8935156411850012e-05, |
|
"loss": 3.4984, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6345556246115602e-05, |
|
"loss": 3.4832, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.375595608038119e-05, |
|
"loss": 3.4808, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.116635591464678e-05, |
|
"loss": 3.4683, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.576755748912369e-06, |
|
"loss": 3.4613, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.992334783509427e-06, |
|
"loss": 3.4763, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.402734617775016e-06, |
|
"loss": 3.4537, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.131344520406049e-07, |
|
"loss": 3.4605, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 9654, |
|
"total_flos": 5044921687867392.0, |
|
"train_loss": 3.5289945189493994, |
|
"train_runtime": 1832.9089, |
|
"train_samples_per_second": 42.135, |
|
"train_steps_per_second": 5.267 |
|
} |
|
], |
|
"max_steps": 9654, |
|
"num_train_epochs": 1, |
|
"total_flos": 5044921687867392.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|