alex-gpt-finetune / trainer_state.json
danurahul's picture
Initial commit
5c3e885
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9175784099197664,
"global_step": 24000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 5e-05,
"loss": 3.2675,
"step": 500
},
{
"epoch": 0.12,
"learning_rate": 4.896600215071553e-05,
"loss": 2.9196,
"step": 1000
},
{
"epoch": 0.18,
"learning_rate": 4.793200430143105e-05,
"loss": 2.8585,
"step": 1500
},
{
"epoch": 0.24,
"learning_rate": 4.689800645214658e-05,
"loss": 2.8352,
"step": 2000
},
{
"epoch": 0.3,
"learning_rate": 4.586400860286211e-05,
"loss": 2.8187,
"step": 2500
},
{
"epoch": 0.36,
"learning_rate": 4.483001075357763e-05,
"loss": 2.8061,
"step": 3000
},
{
"epoch": 0.43,
"learning_rate": 4.379601290429316e-05,
"loss": 2.8031,
"step": 3500
},
{
"epoch": 0.49,
"learning_rate": 4.276201505500869e-05,
"loss": 2.7748,
"step": 4000
},
{
"epoch": 0.55,
"learning_rate": 4.1728017205724215e-05,
"loss": 2.7805,
"step": 4500
},
{
"epoch": 0.61,
"learning_rate": 4.069401935643974e-05,
"loss": 2.7626,
"step": 5000
},
{
"epoch": 0.67,
"learning_rate": 3.9660021507155265e-05,
"loss": 2.7667,
"step": 5500
},
{
"epoch": 0.73,
"learning_rate": 3.8626023657870794e-05,
"loss": 2.7543,
"step": 6000
},
{
"epoch": 0.79,
"learning_rate": 3.7592025808586315e-05,
"loss": 2.7481,
"step": 6500
},
{
"epoch": 0.85,
"learning_rate": 3.6558027959301844e-05,
"loss": 2.756,
"step": 7000
},
{
"epoch": 0.91,
"learning_rate": 3.552403011001738e-05,
"loss": 2.7437,
"step": 7500
},
{
"epoch": 0.97,
"learning_rate": 3.44900322607329e-05,
"loss": 2.7375,
"step": 8000
},
{
"epoch": 1.03,
"learning_rate": 3.345603441144843e-05,
"loss": 2.7159,
"step": 8500
},
{
"epoch": 1.09,
"learning_rate": 3.242203656216396e-05,
"loss": 2.697,
"step": 9000
},
{
"epoch": 1.15,
"learning_rate": 3.138803871287948e-05,
"loss": 2.697,
"step": 9500
},
{
"epoch": 1.22,
"learning_rate": 3.0354040863595007e-05,
"loss": 2.6973,
"step": 10000
},
{
"epoch": 1.28,
"learning_rate": 2.932004301431053e-05,
"loss": 2.6924,
"step": 10500
},
{
"epoch": 1.34,
"learning_rate": 2.828604516502606e-05,
"loss": 2.6973,
"step": 11000
},
{
"epoch": 1.4,
"learning_rate": 2.7252047315741585e-05,
"loss": 2.6975,
"step": 11500
},
{
"epoch": 1.46,
"learning_rate": 2.6218049466457113e-05,
"loss": 2.6978,
"step": 12000
},
{
"epoch": 1.52,
"learning_rate": 2.5184051617172638e-05,
"loss": 2.6867,
"step": 12500
},
{
"epoch": 1.58,
"learning_rate": 2.4150053767888163e-05,
"loss": 2.6771,
"step": 13000
},
{
"epoch": 1.64,
"learning_rate": 2.311605591860369e-05,
"loss": 2.6919,
"step": 13500
},
{
"epoch": 1.7,
"learning_rate": 2.2082058069319216e-05,
"loss": 2.6826,
"step": 14000
},
{
"epoch": 1.76,
"learning_rate": 2.1048060220034745e-05,
"loss": 2.6865,
"step": 14500
},
{
"epoch": 1.82,
"learning_rate": 2.001406237075027e-05,
"loss": 2.6724,
"step": 15000
},
{
"epoch": 1.88,
"learning_rate": 1.8980064521465795e-05,
"loss": 2.6768,
"step": 15500
},
{
"epoch": 1.95,
"learning_rate": 1.7946066672181323e-05,
"loss": 2.68,
"step": 16000
},
{
"epoch": 2.01,
"learning_rate": 1.6912068822896848e-05,
"loss": 2.6798,
"step": 16500
},
{
"epoch": 2.07,
"learning_rate": 1.5878070973612376e-05,
"loss": 2.6573,
"step": 17000
},
{
"epoch": 2.13,
"learning_rate": 1.4844073124327901e-05,
"loss": 2.6498,
"step": 17500
},
{
"epoch": 2.19,
"learning_rate": 1.381007527504343e-05,
"loss": 2.648,
"step": 18000
},
{
"epoch": 2.25,
"learning_rate": 1.2776077425758956e-05,
"loss": 2.6586,
"step": 18500
},
{
"epoch": 2.31,
"learning_rate": 1.1742079576474481e-05,
"loss": 2.6553,
"step": 19000
},
{
"epoch": 2.37,
"learning_rate": 1.070808172719001e-05,
"loss": 2.6484,
"step": 19500
},
{
"epoch": 2.43,
"learning_rate": 9.674083877905534e-06,
"loss": 2.656,
"step": 20000
},
{
"epoch": 2.49,
"learning_rate": 8.640086028621061e-06,
"loss": 2.6524,
"step": 20500
},
{
"epoch": 2.55,
"learning_rate": 7.6060881793365875e-06,
"loss": 2.6489,
"step": 21000
},
{
"epoch": 2.61,
"learning_rate": 6.572090330052113e-06,
"loss": 2.6452,
"step": 21500
},
{
"epoch": 2.67,
"learning_rate": 5.538092480767641e-06,
"loss": 2.6397,
"step": 22000
},
{
"epoch": 2.74,
"learning_rate": 4.5040946314831665e-06,
"loss": 2.6359,
"step": 22500
},
{
"epoch": 2.8,
"learning_rate": 3.470096782198693e-06,
"loss": 2.6491,
"step": 23000
},
{
"epoch": 2.86,
"learning_rate": 2.43609893291422e-06,
"loss": 2.6369,
"step": 23500
},
{
"epoch": 2.92,
"learning_rate": 1.402101083629746e-06,
"loss": 2.6461,
"step": 24000
}
],
"max_steps": 24678,
"num_train_epochs": 3,
"total_flos": 48313248320323584,
"trial_name": null,
"trial_params": null
}