bert-base-uncased-squad1 / trainer_state.json
sguskin's picture
Upload trainer_state.json
ee7c533
{
"best_metric": 88.58308022828253,
"best_model_checkpoint": "output/bert/checkpoint-20500",
"epoch": 1.8525212362190493,
"global_step": 20500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 2.9322248328212544e-05,
"loss": 2.6168,
"step": 500
},
{
"epoch": 0.05,
"eval_exact_match": 60.0,
"eval_f1": 71.21519064840525,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 2.8644496656425085e-05,
"loss": 1.6041,
"step": 1000
},
{
"epoch": 0.09,
"eval_exact_match": 67.11447492904446,
"eval_f1": 77.66743510634613,
"step": 1000
},
{
"epoch": 0.14,
"learning_rate": 2.7966744984637632e-05,
"loss": 1.4496,
"step": 1500
},
{
"epoch": 0.14,
"eval_exact_match": 69.66887417218543,
"eval_f1": 79.43837393741833,
"step": 1500
},
{
"epoch": 0.18,
"learning_rate": 2.7288993312850172e-05,
"loss": 1.3674,
"step": 2000
},
{
"epoch": 0.18,
"eval_exact_match": 72.5922421948912,
"eval_f1": 81.81276872830003,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 2.6611241641062716e-05,
"loss": 1.3504,
"step": 2500
},
{
"epoch": 0.23,
"eval_exact_match": 73.52885525070955,
"eval_f1": 82.34072045279031,
"step": 2500
},
{
"epoch": 0.27,
"learning_rate": 2.5933489969275256e-05,
"loss": 1.3002,
"step": 3000
},
{
"epoch": 0.27,
"eval_exact_match": 74.13434247871334,
"eval_f1": 83.19942219951206,
"step": 3000
},
{
"epoch": 0.32,
"learning_rate": 2.5255738297487804e-05,
"loss": 1.2499,
"step": 3500
},
{
"epoch": 0.32,
"eval_exact_match": 74.12488174077578,
"eval_f1": 83.16211440023876,
"step": 3500
},
{
"epoch": 0.36,
"learning_rate": 2.4577986625700344e-05,
"loss": 1.1953,
"step": 4000
},
{
"epoch": 0.36,
"eval_exact_match": 75.37369914853359,
"eval_f1": 84.23388802584633,
"step": 4000
},
{
"epoch": 0.41,
"learning_rate": 2.3900234953912888e-05,
"loss": 1.1952,
"step": 4500
},
{
"epoch": 0.41,
"eval_exact_match": 76.14001892147587,
"eval_f1": 84.76099418267141,
"step": 4500
},
{
"epoch": 0.45,
"learning_rate": 2.3222483282125428e-05,
"loss": 1.2084,
"step": 5000
},
{
"epoch": 0.45,
"eval_exact_match": 77.11447492904446,
"eval_f1": 85.29225320990346,
"step": 5000
},
{
"epoch": 0.5,
"learning_rate": 2.2544731610337975e-05,
"loss": 1.1546,
"step": 5500
},
{
"epoch": 0.5,
"eval_exact_match": 76.65089877010406,
"eval_f1": 85.47014269865286,
"step": 5500
},
{
"epoch": 0.54,
"learning_rate": 2.1866979938550515e-05,
"loss": 1.1018,
"step": 6000
},
{
"epoch": 0.54,
"eval_exact_match": 76.9441816461684,
"eval_f1": 85.33429382849097,
"step": 6000
},
{
"epoch": 0.59,
"learning_rate": 2.118922826676306e-05,
"loss": 1.0937,
"step": 6500
},
{
"epoch": 0.59,
"eval_exact_match": 77.07663197729423,
"eval_f1": 85.52813865025963,
"step": 6500
},
{
"epoch": 0.63,
"learning_rate": 2.05114765949756e-05,
"loss": 1.0422,
"step": 7000
},
{
"epoch": 0.63,
"eval_exact_match": 78.0794701986755,
"eval_f1": 86.03681982738262,
"step": 7000
},
{
"epoch": 0.68,
"learning_rate": 1.9833724923188147e-05,
"loss": 1.116,
"step": 7500
},
{
"epoch": 0.68,
"eval_exact_match": 77.95648060548723,
"eval_f1": 86.26399611648696,
"step": 7500
},
{
"epoch": 0.72,
"learning_rate": 1.9155973251400687e-05,
"loss": 1.1176,
"step": 8000
},
{
"epoch": 0.72,
"eval_exact_match": 78.59035004730369,
"eval_f1": 86.54207970028193,
"step": 8000
},
{
"epoch": 0.77,
"learning_rate": 1.847822157961323e-05,
"loss": 1.1029,
"step": 8500
},
{
"epoch": 0.77,
"eval_exact_match": 78.66603595080416,
"eval_f1": 86.56865525427538,
"step": 8500
},
{
"epoch": 0.81,
"learning_rate": 1.780046990782577e-05,
"loss": 1.0594,
"step": 9000
},
{
"epoch": 0.81,
"eval_exact_match": 78.9120151371807,
"eval_f1": 86.80153948101524,
"step": 9000
},
{
"epoch": 0.86,
"learning_rate": 1.7122718236038318e-05,
"loss": 1.0266,
"step": 9500
},
{
"epoch": 0.86,
"eval_exact_match": 78.90255439924314,
"eval_f1": 86.69867179951433,
"step": 9500
},
{
"epoch": 0.9,
"learning_rate": 1.644496656425086e-05,
"loss": 1.063,
"step": 10000
},
{
"epoch": 0.9,
"eval_exact_match": 79.5837275307474,
"eval_f1": 87.50572394546504,
"step": 10000
},
{
"epoch": 0.95,
"learning_rate": 1.5767214892463402e-05,
"loss": 1.0353,
"step": 10500
},
{
"epoch": 0.95,
"eval_exact_match": 80.10406811731315,
"eval_f1": 87.58460117305698,
"step": 10500
},
{
"epoch": 0.99,
"learning_rate": 1.5089463220675944e-05,
"loss": 1.0198,
"step": 11000
},
{
"epoch": 0.99,
"eval_exact_match": 80.1135288552507,
"eval_f1": 87.63553208656376,
"step": 11000
},
{
"epoch": 1.04,
"learning_rate": 1.4411711548888486e-05,
"loss": 0.7558,
"step": 11500
},
{
"epoch": 1.04,
"eval_exact_match": 80.00946073793756,
"eval_f1": 87.60930859917372,
"step": 11500
},
{
"epoch": 1.08,
"learning_rate": 1.373395987710103e-05,
"loss": 0.7337,
"step": 12000
},
{
"epoch": 1.08,
"eval_exact_match": 79.92431409649953,
"eval_f1": 87.57399698225302,
"step": 12000
},
{
"epoch": 1.13,
"learning_rate": 1.3056208205313572e-05,
"loss": 0.6884,
"step": 12500
},
{
"epoch": 1.13,
"eval_exact_match": 80.2554399243141,
"eval_f1": 87.80990616116381,
"step": 12500
},
{
"epoch": 1.17,
"learning_rate": 1.2378456533526116e-05,
"loss": 0.7082,
"step": 13000
},
{
"epoch": 1.17,
"eval_exact_match": 80.17975402081362,
"eval_f1": 87.80432576834488,
"step": 13000
},
{
"epoch": 1.22,
"learning_rate": 1.1700704861738658e-05,
"loss": 0.6914,
"step": 13500
},
{
"epoch": 1.22,
"eval_exact_match": 79.87701040681173,
"eval_f1": 87.89607002450978,
"step": 13500
},
{
"epoch": 1.27,
"learning_rate": 1.1022953189951202e-05,
"loss": 0.7051,
"step": 14000
},
{
"epoch": 1.27,
"eval_exact_match": 79.80132450331126,
"eval_f1": 87.69384875801946,
"step": 14000
},
{
"epoch": 1.31,
"learning_rate": 1.0345201518163744e-05,
"loss": 0.7302,
"step": 14500
},
{
"epoch": 1.31,
"eval_exact_match": 80.2081362346263,
"eval_f1": 88.06697491299282,
"step": 14500
},
{
"epoch": 1.36,
"learning_rate": 9.667449846376287e-06,
"loss": 0.6824,
"step": 15000
},
{
"epoch": 1.36,
"eval_exact_match": 80.50141911069063,
"eval_f1": 87.97379997606235,
"step": 15000
},
{
"epoch": 1.4,
"learning_rate": 8.98969817458883e-06,
"loss": 0.7169,
"step": 15500
},
{
"epoch": 1.4,
"eval_exact_match": 80.35004730368969,
"eval_f1": 88.00649157738846,
"step": 15500
},
{
"epoch": 1.45,
"learning_rate": 8.311946502801373e-06,
"loss": 0.7149,
"step": 16000
},
{
"epoch": 1.45,
"eval_exact_match": 80.74739829706716,
"eval_f1": 87.99961546836674,
"step": 16000
},
{
"epoch": 1.49,
"learning_rate": 7.634194831013915e-06,
"loss": 0.6726,
"step": 16500
},
{
"epoch": 1.49,
"eval_exact_match": 80.93661305581836,
"eval_f1": 88.06055493342305,
"step": 16500
},
{
"epoch": 1.54,
"learning_rate": 6.95644315922646e-06,
"loss": 0.6761,
"step": 17000
},
{
"epoch": 1.54,
"eval_exact_match": 80.50141911069063,
"eval_f1": 87.98739385338236,
"step": 17000
},
{
"epoch": 1.58,
"learning_rate": 6.278691487439003e-06,
"loss": 0.712,
"step": 17500
},
{
"epoch": 1.58,
"eval_exact_match": 81.0406811731315,
"eval_f1": 88.4045499003394,
"step": 17500
},
{
"epoch": 1.63,
"learning_rate": 5.6009398156515455e-06,
"loss": 0.693,
"step": 18000
},
{
"epoch": 1.63,
"eval_exact_match": 80.69063386944181,
"eval_f1": 88.19624181405425,
"step": 18000
},
{
"epoch": 1.67,
"learning_rate": 4.923188143864088e-06,
"loss": 0.6594,
"step": 18500
},
{
"epoch": 1.67,
"eval_exact_match": 80.97445600756859,
"eval_f1": 88.26284168848002,
"step": 18500
},
{
"epoch": 1.72,
"learning_rate": 4.245436472076631e-06,
"loss": 0.6743,
"step": 19000
},
{
"epoch": 1.72,
"eval_exact_match": 80.43519394512772,
"eval_f1": 88.07199660434318,
"step": 19000
},
{
"epoch": 1.76,
"learning_rate": 3.567684800289174e-06,
"loss": 0.6876,
"step": 19500
},
{
"epoch": 1.76,
"eval_exact_match": 80.88930936613056,
"eval_f1": 88.35346447229631,
"step": 19500
},
{
"epoch": 1.81,
"learning_rate": 2.889933128501717e-06,
"loss": 0.6913,
"step": 20000
},
{
"epoch": 1.81,
"eval_exact_match": 81.22989593188268,
"eval_f1": 88.55310003155216,
"step": 20000
},
{
"epoch": 1.85,
"learning_rate": 2.21218145671426e-06,
"loss": 0.6383,
"step": 20500
},
{
"epoch": 1.85,
"eval_exact_match": 81.35288552507096,
"eval_f1": 88.58308022828253,
"step": 20500
}
],
"max_steps": 22132,
"num_train_epochs": 2,
"total_flos": 3.213871718825779e+16,
"trial_name": null,
"trial_params": null
}