|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.31412202892914304, |
|
"global_step": 105000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.925209040731157e-05, |
|
"loss": 0.0716, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.850418081462313e-05, |
|
"loss": 0.0741, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.77562712219347e-05, |
|
"loss": 0.0753, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7008361629246264e-05, |
|
"loss": 0.0709, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6260452036557824e-05, |
|
"loss": 0.0754, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.07123171538114548, |
|
"eval_runtime": 1351.5318, |
|
"eval_samples_per_second": 13.741, |
|
"eval_steps_per_second": 13.741, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.551254244386939e-05, |
|
"loss": 0.0745, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.476463285118095e-05, |
|
"loss": 0.0671, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.401672325849251e-05, |
|
"loss": 0.0687, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.326881366580408e-05, |
|
"loss": 0.0718, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.252090407311564e-05, |
|
"loss": 0.0692, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.06770400702953339, |
|
"eval_runtime": 1359.9214, |
|
"eval_samples_per_second": 13.656, |
|
"eval_steps_per_second": 13.656, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.1772994480427206e-05, |
|
"loss": 0.0695, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.102508488773877e-05, |
|
"loss": 0.0696, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.0277175295050333e-05, |
|
"loss": 0.0689, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.95292657023619e-05, |
|
"loss": 0.0679, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.878135610967347e-05, |
|
"loss": 0.071, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.05809599161148071, |
|
"eval_runtime": 2705.4449, |
|
"eval_samples_per_second": 6.864, |
|
"eval_steps_per_second": 6.864, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.803344651698503e-05, |
|
"loss": 0.0712, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7285536924296595e-05, |
|
"loss": 0.0683, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.653762733160816e-05, |
|
"loss": 0.0673, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.578971773891972e-05, |
|
"loss": 0.0685, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.504180814623129e-05, |
|
"loss": 0.0676, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.06122186779975891, |
|
"eval_runtime": 2772.9648, |
|
"eval_samples_per_second": 6.697, |
|
"eval_steps_per_second": 6.697, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.429389855354285e-05, |
|
"loss": 0.0672, |
|
"step": 105000 |
|
} |
|
], |
|
"max_steps": 334265, |
|
"num_train_epochs": 1, |
|
"total_flos": 6.792868897849498e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|