|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 210, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8571428571428572e-05, |
|
"loss": 0.474, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.9206349206349206e-05, |
|
"loss": 0.4638, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.7142857142857144e-05, |
|
"loss": 0.4295, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.507936507936508e-05, |
|
"loss": 0.4306, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.301587301587302e-05, |
|
"loss": 0.3871, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.095238095238095e-05, |
|
"loss": 0.3386, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.3411, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.6825396825396824e-05, |
|
"loss": 0.3176, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.4761904761904761e-05, |
|
"loss": 0.2954, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.2698412698412699e-05, |
|
"loss": 0.2926, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.0634920634920634e-05, |
|
"loss": 0.2705, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.2638, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 6.507936507936508e-06, |
|
"loss": 0.2702, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.2727, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"loss": 0.2775, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.1746031746031743e-07, |
|
"loss": 0.2542, |
|
"step": 208 |
|
} |
|
], |
|
"logging_steps": 13, |
|
"max_steps": 210, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.694999141941248e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|