|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5741726452210136, |
|
"global_step": 4962, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9997685720897943e-05, |
|
"loss": 2.316, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9259430687340893e-05, |
|
"loss": 2.541, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.850960425827355e-05, |
|
"loss": 2.3336, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7759777829206203e-05, |
|
"loss": 2.1896, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7009951400138856e-05, |
|
"loss": 2.2036, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.667900948854432e-05, |
|
"loss": 2.122, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.620226799352002e-05, |
|
"loss": 2.1447, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.572552649849572e-05, |
|
"loss": 2.0965, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.524878500347142e-05, |
|
"loss": 2.1287, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.477435778754918e-05, |
|
"loss": 2.1001, |
|
"step": 2266 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.429761629252488e-05, |
|
"loss": 2.1086, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.382087479750058e-05, |
|
"loss": 2.0791, |
|
"step": 2678 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3344133302476279e-05, |
|
"loss": 2.0026, |
|
"step": 2884 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2867391807451979e-05, |
|
"loss": 1.9953, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2390650312427679e-05, |
|
"loss": 2.0375, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.19185373756075e-05, |
|
"loss": 2.0465, |
|
"step": 3502 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.144411015968526e-05, |
|
"loss": 2.0147, |
|
"step": 3708 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.096736866466096e-05, |
|
"loss": 2.0066, |
|
"step": 3914 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0490627169636659e-05, |
|
"loss": 2.0109, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0013885674612359e-05, |
|
"loss": 1.9669, |
|
"step": 4326 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.537144179588059e-06, |
|
"loss": 1.9822, |
|
"step": 4532 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.062716963665819e-06, |
|
"loss": 1.9876, |
|
"step": 4738 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.585975468641519e-06, |
|
"loss": 1.9557, |
|
"step": 4944 |
|
} |
|
], |
|
"max_steps": 8642, |
|
"num_train_epochs": 1, |
|
"total_flos": 9216488128905216.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|