diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3360 @@ +{ + "best_metric": 57.56979472888117, + "best_model_checkpoint": "./results_train_sae/bert-base-uncased/coqa/checkpoint-28000", + "epoch": 9.999931351685316, + "global_step": 72830, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07, + "learning_rate": 2.2883295194508013e-06, + "loss": 5.9079, + "step": 500 + }, + { + "epoch": 0.07, + "eval_HasAns_exact": 0.0, + "eval_HasAns_f1": 0.058699242323848086, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 98.46153846153847, + "eval_NoAns_f1": 98.46153846153847, + "eval_NoAns_total": 65, + "eval_best_exact": 0.8142302392584242, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 0.859924915535542, + "eval_best_f1_thresh": 0.0, + "eval_exact": 0.80170362019291, + "eval_f1": 0.859924915535542, + "eval_total": 7983, + "step": 500 + }, + { + "epoch": 0.14, + "learning_rate": 4.576659038901603e-06, + "loss": 4.7938, + "step": 1000 + }, + { + "epoch": 0.14, + "eval_HasAns_exact": 6.870421823692852, + "eval_HasAns_f1": 27.70107501407698, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 1.5384615384615385, + "eval_NoAns_f1": 1.5384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 6.827007390705249, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 27.48805110377818, + "eval_best_f1_thresh": 0.0, + "eval_exact": 6.827007390705249, + "eval_f1": 27.48805110377822, + "eval_total": 7983, + "step": 1000 + }, + { + "epoch": 0.21, + "learning_rate": 6.8649885583524035e-06, + "loss": 3.991, + "step": 1500 + }, + { + "epoch": 0.21, + "eval_HasAns_exact": 11.896943672644607, + "eval_HasAns_f1": 37.05032211892552, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 7.6923076923076925, + "eval_NoAns_f1": 7.6923076923076925, + "eval_NoAns_total": 65, + "eval_best_exact": 11.862708255041964, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 36.81128028781815, + "eval_best_f1_thresh": 0.0, + "eval_exact": 11.862708255041964, + "eval_f1": 36.81128028781814, + "eval_total": 7983, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 9.153318077803205e-06, + "loss": 3.7176, + "step": 2000 + }, + { + "epoch": 0.27, + "eval_HasAns_exact": 14.52386966405658, + "eval_HasAns_f1": 42.09036402930084, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 3.076923076923077, + "eval_NoAns_f1": 3.076923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 14.418138544406865, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 41.76017817662584, + "eval_best_f1_thresh": 0.0, + "eval_exact": 14.430665163472378, + "eval_f1": 41.77270479569133, + "eval_total": 7983, + "step": 2000 + }, + { + "epoch": 0.34, + "learning_rate": 1.1441647597254006e-05, + "loss": 3.4929, + "step": 2500 + }, + { + "epoch": 0.34, + "eval_HasAns_exact": 15.635261429653953, + "eval_HasAns_f1": 42.60135406380397, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 15.658273831892773, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 42.40480038546909, + "eval_best_f1_thresh": 0.0, + "eval_exact": 15.658273831892773, + "eval_f1": 42.404800385469095, + "eval_total": 7983, + "step": 2500 + }, + { + "epoch": 0.41, + "learning_rate": 1.3729977116704807e-05, + "loss": 3.3632, + "step": 3000 + }, + { + "epoch": 0.41, + "eval_HasAns_exact": 17.70649153826724, + "eval_HasAns_f1": 46.86612402307515, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 1.5384615384615385, + "eval_NoAns_f1": 1.5384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 17.574846548916447, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 46.49705248837645, + "eval_best_f1_thresh": 0.0, + "eval_exact": 17.574846548916447, + "eval_f1": 46.49705248837643, + "eval_total": 7983, + "step": 3000 + }, + { + "epoch": 0.48, + "learning_rate": 1.6018306636155606e-05, + "loss": 3.2348, + "step": 3500 + }, + { + "epoch": 0.48, + "eval_HasAns_exact": 18.615812073756, + "eval_HasAns_f1": 48.88254968548444, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 18.464236502567957, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 48.484533184224716, + "eval_best_f1_thresh": 0.0, + "eval_exact": 18.464236502567957, + "eval_f1": 48.4845331842247, + "eval_total": 7983, + "step": 3500 + }, + { + "epoch": 0.55, + "learning_rate": 1.830663615560641e-05, + "loss": 3.0918, + "step": 4000 + }, + { + "epoch": 0.55, + "eval_HasAns_exact": 19.790351098762315, + "eval_HasAns_f1": 50.19536205413491, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 19.629212075660778, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 49.7866562375849, + "eval_best_f1_thresh": 0.0, + "eval_exact": 19.629212075660778, + "eval_f1": 49.7866562375849, + "eval_total": 7983, + "step": 4000 + }, + { + "epoch": 0.62, + "learning_rate": 1.996202161846334e-05, + "loss": 2.9685, + "step": 4500 + }, + { + "epoch": 0.62, + "eval_HasAns_exact": 20.661783278605707, + "eval_HasAns_f1": 51.24202719367006, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 20.49354879118126, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 50.8247991130502, + "eval_best_f1_thresh": 0.0, + "eval_exact": 20.49354879118126, + "eval_f1": 50.824799113050176, + "eval_total": 7983, + "step": 4500 + }, + { + "epoch": 0.69, + "learning_rate": 1.98159509202454e-05, + "loss": 2.8876, + "step": 5000 + }, + { + "epoch": 0.69, + "eval_HasAns_exact": 21.987875726193483, + "eval_HasAns_f1": 53.06234486539824, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 21.80884379306025, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 52.63029520784457, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.80884379306025, + "eval_f1": 52.630295207844576, + "eval_total": 7983, + "step": 5000 + }, + { + "epoch": 0.76, + "learning_rate": 1.9669880222027462e-05, + "loss": 2.8069, + "step": 5500 + }, + { + "epoch": 0.76, + "eval_HasAns_exact": 22.139429148774944, + "eval_HasAns_f1": 52.947930590885036, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 21.959163221846424, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 52.5168125289525, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.959163221846424, + "eval_f1": 52.51681252895249, + "eval_total": 7983, + "step": 5500 + }, + { + "epoch": 0.82, + "learning_rate": 1.9523809523809524e-05, + "loss": 2.753, + "step": 6000 + }, + { + "epoch": 0.82, + "eval_HasAns_exact": 22.467794897701438, + "eval_HasAns_f1": 52.74131115227161, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 22.284855317549795, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 52.31187544828844, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.284855317549795, + "eval_f1": 52.31187544828843, + "eval_total": 7983, + "step": 6000 + }, + { + "epoch": 0.89, + "learning_rate": 1.937773882559159e-05, + "loss": 2.7304, + "step": 6500 + }, + { + "epoch": 0.89, + "eval_HasAns_exact": 24.05910583480677, + "eval_HasAns_f1": 55.094001124088884, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 23.863209319804586, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.64540910691915, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.863209319804586, + "eval_f1": 54.645409106919175, + "eval_total": 7983, + "step": 6500 + }, + { + "epoch": 0.96, + "learning_rate": 1.923166812737365e-05, + "loss": 2.6887, + "step": 7000 + }, + { + "epoch": 0.96, + "eval_HasAns_exact": 23.503409952008084, + "eval_HasAns_f1": 54.51083938244222, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 1.5384615384615385, + "eval_NoAns_f1": 1.5384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 23.324564699987473, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.07952226358227, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.324564699987473, + "eval_f1": 54.07952226358229, + "eval_total": 7983, + "step": 7000 + }, + { + "epoch": 1.03, + "learning_rate": 1.9085597429155715e-05, + "loss": 2.5921, + "step": 7500 + }, + { + "epoch": 1.03, + "eval_HasAns_exact": 22.467794897701438, + "eval_HasAns_f1": 53.881350612333, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 22.284855317549795, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 53.44263236232651, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.284855317549795, + "eval_f1": 53.44263236232653, + "eval_total": 7983, + "step": 7500 + }, + { + "epoch": 1.1, + "learning_rate": 1.8939526730937776e-05, + "loss": 2.4825, + "step": 8000 + }, + { + "epoch": 1.1, + "eval_HasAns_exact": 23.137155847436222, + "eval_HasAns_f1": 53.99581621835903, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 22.948766128022047, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 53.55616595477474, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.948766128022047, + "eval_f1": 53.55616595477475, + "eval_total": 7983, + "step": 8000 + }, + { + "epoch": 1.17, + "learning_rate": 1.8793456032719838e-05, + "loss": 2.4281, + "step": 8500 + }, + { + "epoch": 1.17, + "eval_HasAns_exact": 23.553927759535235, + "eval_HasAns_f1": 54.02861374681815, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 3.076923076923077, + "eval_NoAns_f1": 3.076923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 23.387197795315043, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 53.61374967397043, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.387197795315043, + "eval_f1": 53.61374967397047, + "eval_total": 7983, + "step": 8500 + }, + { + "epoch": 1.24, + "learning_rate": 1.86473853345019e-05, + "loss": 2.4263, + "step": 9000 + }, + { + "epoch": 1.24, + "eval_HasAns_exact": 24.109623642333922, + "eval_HasAns_f1": 54.76436323474705, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 23.91331579606664, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.3184552289524, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.91331579606664, + "eval_f1": 54.318455228952416, + "eval_total": 7983, + "step": 9000 + }, + { + "epoch": 1.3, + "learning_rate": 1.8501314636283964e-05, + "loss": 2.4629, + "step": 9500 + }, + { + "epoch": 1.3, + "eval_HasAns_exact": 24.475877746905784, + "eval_HasAns_f1": 55.617867547126586, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 24.276587748966556, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.16501005112716, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.276587748966556, + "eval_f1": 55.16501005112718, + "eval_total": 7983, + "step": 9500 + }, + { + "epoch": 1.37, + "learning_rate": 1.8355243938066025e-05, + "loss": 2.4136, + "step": 10000 + }, + { + "epoch": 1.37, + "eval_HasAns_exact": 24.475877746905784, + "eval_HasAns_f1": 56.18918648654074, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 24.276587748966556, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.731677138974, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.276587748966556, + "eval_f1": 55.73167713897402, + "eval_total": 7983, + "step": 10000 + }, + { + "epoch": 1.44, + "learning_rate": 1.8209173239848087e-05, + "loss": 2.4286, + "step": 10500 + }, + { + "epoch": 1.44, + "eval_HasAns_exact": 24.299065420560748, + "eval_HasAns_f1": 56.63451506748617, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 24.101215082049354, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.17337971994934, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.101215082049354, + "eval_f1": 56.17337971994933, + "eval_total": 7983, + "step": 10500 + }, + { + "epoch": 1.51, + "learning_rate": 1.806310254163015e-05, + "loss": 2.421, + "step": 11000 + }, + { + "epoch": 1.51, + "eval_HasAns_exact": 24.791614043950492, + "eval_HasAns_f1": 56.09913988305096, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 24.627333082800952, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.67994357935579, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.627333082800952, + "eval_f1": 55.679943579355815, + "eval_total": 7983, + "step": 11000 + }, + { + "epoch": 1.58, + "learning_rate": 1.7917031843412213e-05, + "loss": 2.4138, + "step": 11500 + }, + { + "epoch": 1.58, + "eval_HasAns_exact": 23.869664056579943, + "eval_HasAns_f1": 55.71886205295582, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 10.76923076923077, + "eval_NoAns_f1": 10.76923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 23.762996367280472, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.35286856260852, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.762996367280472, + "eval_f1": 55.35286856260858, + "eval_total": 7983, + "step": 11500 + }, + { + "epoch": 1.65, + "learning_rate": 1.7770961145194278e-05, + "loss": 2.3941, + "step": 12000 + }, + { + "epoch": 1.65, + "eval_HasAns_exact": 24.917908562768375, + "eval_HasAns_f1": 55.98821830578535, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 24.752599273456095, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.569925159114156, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.752599273456095, + "eval_f1": 55.56992515911417, + "eval_total": 7983, + "step": 12000 + }, + { + "epoch": 1.72, + "learning_rate": 1.7624890446976336e-05, + "loss": 2.3891, + "step": 12500 + }, + { + "epoch": 1.72, + "eval_HasAns_exact": 24.65269007325082, + "eval_HasAns_f1": 56.420675481209294, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 6.153846153846154, + "eval_NoAns_f1": 6.153846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 24.489540273080294, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.998861137443946, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.50206689214581, + "eval_f1": 56.01138775650948, + "eval_total": 7983, + "step": 12500 + }, + { + "epoch": 1.78, + "learning_rate": 1.74788197487584e-05, + "loss": 2.3541, + "step": 13000 + }, + { + "epoch": 1.78, + "eval_HasAns_exact": 24.65269007325082, + "eval_HasAns_f1": 56.3286384049731, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 24.489540273080294, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.90757345491381, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.489540273080294, + "eval_f1": 55.907573454913816, + "eval_total": 7983, + "step": 13000 + }, + { + "epoch": 1.85, + "learning_rate": 1.7332749050540462e-05, + "loss": 2.3694, + "step": 13500 + }, + { + "epoch": 1.85, + "eval_HasAns_exact": 25.34730992674918, + "eval_HasAns_f1": 56.89667335733098, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 25.140924464487036, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.43340343772345, + "eval_best_f1_thresh": 0.0, + "eval_exact": 25.140924464487036, + "eval_f1": 56.433403437723506, + "eval_total": 7983, + "step": 13500 + }, + { + "epoch": 1.92, + "learning_rate": 1.7186678352322527e-05, + "loss": 2.3684, + "step": 14000 + }, + { + "epoch": 1.92, + "eval_HasAns_exact": 25.082091437231625, + "eval_HasAns_f1": 56.242751844782845, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 3.076923076923077, + "eval_NoAns_f1": 3.076923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 24.902918702242264, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.809859590002546, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.902918702242264, + "eval_f1": 55.809859590002574, + "eval_total": 7983, + "step": 14000 + }, + { + "epoch": 1.99, + "learning_rate": 1.7040607654104588e-05, + "loss": 2.3991, + "step": 14500 + }, + { + "epoch": 1.99, + "eval_HasAns_exact": 23.99595857539783, + "eval_HasAns_f1": 56.43417395738433, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 3.076923076923077, + "eval_NoAns_f1": 3.076923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 23.825629462608042, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.99972308587861, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.825629462608042, + "eval_f1": 55.999723085878635, + "eval_total": 7983, + "step": 14500 + }, + { + "epoch": 2.06, + "learning_rate": 1.689453695588665e-05, + "loss": 2.0923, + "step": 15000 + }, + { + "epoch": 2.06, + "eval_HasAns_exact": 24.981055822177318, + "eval_HasAns_f1": 56.740647572969316, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 24.815232368783665, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.316227919675676, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.815232368783665, + "eval_f1": 56.31622791967569, + "eval_total": 7983, + "step": 15000 + }, + { + "epoch": 2.13, + "learning_rate": 1.6748466257668714e-05, + "loss": 2.0467, + "step": 15500 + }, + { + "epoch": 2.13, + "eval_HasAns_exact": 25.284162667340237, + "eval_HasAns_f1": 56.17685433436681, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 25.115871226356006, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.75702525610878, + "eval_best_f1_thresh": 0.0, + "eval_exact": 25.115871226356006, + "eval_f1": 55.75702525610878, + "eval_total": 7983, + "step": 15500 + }, + { + "epoch": 2.2, + "learning_rate": 1.6602395559450776e-05, + "loss": 2.0438, + "step": 16000 + }, + { + "epoch": 2.2, + "eval_HasAns_exact": 24.324324324324323, + "eval_HasAns_f1": 56.49055815994823, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 0.0, + "eval_NoAns_f1": 0.0, + "eval_NoAns_total": 65, + "eval_best_exact": 24.126268320180383, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.03059495308402, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.126268320180383, + "eval_f1": 56.03059495308406, + "eval_total": 7983, + "step": 16000 + }, + { + "epoch": 2.27, + "learning_rate": 1.6456324861232837e-05, + "loss": 2.0502, + "step": 16500 + }, + { + "epoch": 2.27, + "eval_HasAns_exact": 23.89492296034352, + "eval_HasAns_f1": 55.95219524867858, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 1.5384615384615385, + "eval_NoAns_f1": 1.5384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 23.712889891018413, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.50914217450042, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.712889891018413, + "eval_f1": 55.50914217450044, + "eval_total": 7983, + "step": 16500 + }, + { + "epoch": 2.33, + "learning_rate": 1.63102541630149e-05, + "loss": 2.0676, + "step": 17000 + }, + { + "epoch": 2.33, + "eval_HasAns_exact": 24.463248295023995, + "eval_HasAns_f1": 55.285627746177674, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 6.153846153846154, + "eval_NoAns_f1": 6.153846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 24.314167606163096, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.8855819233665, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.314167606163096, + "eval_f1": 54.88558192336651, + "eval_total": 7983, + "step": 17000 + }, + { + "epoch": 2.4, + "learning_rate": 1.6164183464796963e-05, + "loss": 2.0655, + "step": 17500 + }, + { + "epoch": 2.4, + "eval_HasAns_exact": 25.372568830512755, + "eval_HasAns_f1": 57.0197153609346, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 7.6923076923076925, + "eval_NoAns_f1": 7.6923076923076925, + "eval_NoAns_total": 65, + "eval_best_exact": 25.228610797945635, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.61807669145432, + "eval_best_f1_thresh": 0.0, + "eval_exact": 25.228610797945635, + "eval_f1": 56.61807669145436, + "eval_total": 7983, + "step": 17500 + }, + { + "epoch": 2.47, + "learning_rate": 1.6018112766579025e-05, + "loss": 2.0479, + "step": 18000 + }, + { + "epoch": 2.47, + "eval_HasAns_exact": 25.71356403132104, + "eval_HasAns_f1": 57.821942508234095, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 3.076923076923077, + "eval_NoAns_f1": 3.076923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 25.529249655517976, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 57.37619200553649, + "eval_best_f1_thresh": 0.0, + "eval_exact": 25.529249655517976, + "eval_f1": 57.37619200553646, + "eval_total": 7983, + "step": 18000 + }, + { + "epoch": 2.54, + "learning_rate": 1.587204206836109e-05, + "loss": 2.0603, + "step": 18500 + }, + { + "epoch": 2.54, + "eval_HasAns_exact": 25.018944177822682, + "eval_HasAns_f1": 57.38397521872463, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 24.85281222598021, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.954317397201734, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.85281222598021, + "eval_f1": 56.95431739720177, + "eval_total": 7983, + "step": 18500 + }, + { + "epoch": 2.61, + "learning_rate": 1.572597137014315e-05, + "loss": 2.0788, + "step": 19000 + }, + { + "epoch": 2.61, + "eval_HasAns_exact": 24.715837332659763, + "eval_HasAns_f1": 57.37643493713972, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 3.076923076923077, + "eval_NoAns_f1": 3.076923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 24.539646749342353, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.93431189180411, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.539646749342353, + "eval_f1": 56.93431189180413, + "eval_total": 7983, + "step": 19000 + }, + { + "epoch": 2.68, + "learning_rate": 1.5579900671925213e-05, + "loss": 2.076, + "step": 19500 + }, + { + "epoch": 2.68, + "eval_HasAns_exact": 24.917908562768375, + "eval_HasAns_f1": 57.176560884370765, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 24.752599273456095, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.748591893078725, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.752599273456095, + "eval_f1": 56.74859189307877, + "eval_total": 7983, + "step": 19500 + }, + { + "epoch": 2.75, + "learning_rate": 1.5433829973707274e-05, + "loss": 2.0729, + "step": 20000 + }, + { + "epoch": 2.75, + "eval_HasAns_exact": 25.663046223793888, + "eval_HasAns_f1": 57.259618778706425, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 25.491669798321432, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.83097350492259, + "eval_best_f1_thresh": 0.0, + "eval_exact": 25.491669798321432, + "eval_f1": 56.830973504922646, + "eval_total": 7983, + "step": 20000 + }, + { + "epoch": 2.81, + "learning_rate": 1.528775927548934e-05, + "loss": 2.0671, + "step": 20500 + }, + { + "epoch": 2.81, + "eval_HasAns_exact": 24.66531952513261, + "eval_HasAns_f1": 57.27218980931721, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 1.5384615384615385, + "eval_NoAns_f1": 1.5384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 24.477013654014783, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.81838894026976, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.477013654014783, + "eval_f1": 56.818388940269784, + "eval_total": 7983, + "step": 20500 + }, + { + "epoch": 2.88, + "learning_rate": 1.51416885772714e-05, + "loss": 2.0876, + "step": 21000 + }, + { + "epoch": 2.88, + "eval_HasAns_exact": 24.842131851477646, + "eval_HasAns_f1": 57.122549033702775, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 7.6923076923076925, + "eval_NoAns_f1": 7.6923076923076925, + "eval_NoAns_total": 65, + "eval_best_exact": 24.702492797194036, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.72007306136267, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.702492797194036, + "eval_f1": 56.720073061362704, + "eval_total": 7983, + "step": 21000 + }, + { + "epoch": 2.95, + "learning_rate": 1.4995617879053463e-05, + "loss": 2.0692, + "step": 21500 + }, + { + "epoch": 2.95, + "eval_HasAns_exact": 25.63778732003031, + "eval_HasAns_f1": 57.346401166726885, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 9.23076923076923, + "eval_NoAns_f1": 9.23076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 25.504196417386947, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.95462914169401, + "eval_best_f1_thresh": 0.0, + "eval_exact": 25.504196417386947, + "eval_f1": 56.95462914169406, + "eval_total": 7983, + "step": 21500 + }, + { + "epoch": 3.02, + "learning_rate": 1.4849547180835526e-05, + "loss": 1.9856, + "step": 22000 + }, + { + "epoch": 3.02, + "eval_HasAns_exact": 24.51376610255115, + "eval_HasAns_f1": 56.65859501004218, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 7.6923076923076925, + "eval_NoAns_f1": 7.6923076923076925, + "eval_NoAns_total": 65, + "eval_best_exact": 24.37680070149067, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.259896691658966, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.37680070149067, + "eval_f1": 56.25989669165901, + "eval_total": 7983, + "step": 22000 + }, + { + "epoch": 3.09, + "learning_rate": 1.470347648261759e-05, + "loss": 1.7772, + "step": 22500 + }, + { + "epoch": 3.09, + "eval_HasAns_exact": 25.208385956049508, + "eval_HasAns_f1": 57.541934442000205, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 9.23076923076923, + "eval_NoAns_f1": 9.23076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 25.078291369159462, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 57.14857032591221, + "eval_best_f1_thresh": 0.0, + "eval_exact": 25.078291369159462, + "eval_f1": 57.14857032591224, + "eval_total": 7983, + "step": 22500 + }, + { + "epoch": 3.16, + "learning_rate": 1.455740578439965e-05, + "loss": 1.7533, + "step": 23000 + }, + { + "epoch": 3.16, + "eval_HasAns_exact": 25.852488002020714, + "eval_HasAns_f1": 57.54889352900813, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 25.679569084304145, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 57.117892892732826, + "eval_best_f1_thresh": 0.0, + "eval_exact": 25.679569084304145, + "eval_f1": 57.11789289273286, + "eval_total": 7983, + "step": 23000 + }, + { + "epoch": 3.23, + "learning_rate": 1.4411335086181712e-05, + "loss": 1.7728, + "step": 23500 + }, + { + "epoch": 3.23, + "eval_HasAns_exact": 25.599898964384945, + "eval_HasAns_f1": 57.81775981872567, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 6.153846153846154, + "eval_NoAns_f1": 6.153846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 25.441563322059377, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 57.397096610881825, + "eval_best_f1_thresh": 0.0, + "eval_exact": 25.441563322059377, + "eval_f1": 57.39709661088185, + "eval_total": 7983, + "step": 23500 + }, + { + "epoch": 3.3, + "learning_rate": 1.4265264387963775e-05, + "loss": 1.7535, + "step": 24000 + }, + { + "epoch": 3.3, + "eval_HasAns_exact": 24.86739075524122, + "eval_HasAns_f1": 57.557703123488565, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 24.702492797194036, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 57.126630756831055, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.702492797194036, + "eval_f1": 57.12663075683106, + "eval_total": 7983, + "step": 24000 + }, + { + "epoch": 3.36, + "learning_rate": 1.4119193689745838e-05, + "loss": 1.764, + "step": 24500 + }, + { + "epoch": 3.36, + "eval_HasAns_exact": 24.943167466531953, + "eval_HasAns_f1": 56.947380755274594, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 7.6923076923076925, + "eval_NoAns_f1": 7.6923076923076925, + "eval_NoAns_total": 65, + "eval_best_exact": 24.80270574971815, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.54633105602702, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.80270574971815, + "eval_f1": 56.546331056027086, + "eval_total": 7983, + "step": 24500 + }, + { + "epoch": 3.43, + "learning_rate": 1.3973122991527902e-05, + "loss": 1.799, + "step": 25000 + }, + { + "epoch": 3.43, + "eval_HasAns_exact": 24.564283910078302, + "eval_HasAns_f1": 55.87293948312867, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 29.23076923076923, + "eval_NoAns_f1": 29.23076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 24.602279844669923, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.65601087653921, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.602279844669923, + "eval_f1": 55.65601087653924, + "eval_total": 7983, + "step": 25000 + }, + { + "epoch": 3.5, + "learning_rate": 1.3827052293309963e-05, + "loss": 1.8033, + "step": 25500 + }, + { + "epoch": 3.5, + "eval_HasAns_exact": 24.943167466531953, + "eval_HasAns_f1": 56.693149175873096, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 10.76923076923077, + "eval_NoAns_f1": 10.76923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 24.82775898784918, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.319222745153795, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.82775898784918, + "eval_f1": 56.31922274515385, + "eval_total": 7983, + "step": 25500 + }, + { + "epoch": 3.57, + "learning_rate": 1.3680981595092026e-05, + "loss": 1.7901, + "step": 26000 + }, + { + "epoch": 3.57, + "eval_HasAns_exact": 24.488507198787573, + "eval_HasAns_f1": 56.74255621835055, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 7.6923076923076925, + "eval_NoAns_f1": 7.6923076923076925, + "eval_NoAns_total": 65, + "eval_best_exact": 24.35174746335964, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.34317426242004, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.35174746335964, + "eval_f1": 56.3431742624201, + "eval_total": 7983, + "step": 26000 + }, + { + "epoch": 3.64, + "learning_rate": 1.3534910896874088e-05, + "loss": 1.7752, + "step": 26500 + }, + { + "epoch": 3.64, + "eval_HasAns_exact": 25.09472088911341, + "eval_HasAns_f1": 57.45911652355072, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 9.23076923076923, + "eval_NoAns_f1": 9.23076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 24.965551797569837, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 57.066426735998284, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.965551797569837, + "eval_f1": 57.0664267359983, + "eval_total": 7983, + "step": 26500 + }, + { + "epoch": 3.71, + "learning_rate": 1.338884019865615e-05, + "loss": 1.8101, + "step": 27000 + }, + { + "epoch": 3.71, + "eval_HasAns_exact": 24.854761303359435, + "eval_HasAns_f1": 56.59464728748924, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 24.77765251158712, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.259102746127944, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.77765251158712, + "eval_f1": 56.25910274612799, + "eval_total": 7983, + "step": 27000 + }, + { + "epoch": 3.78, + "learning_rate": 1.3242769500438212e-05, + "loss": 1.779, + "step": 27500 + }, + { + "epoch": 3.78, + "eval_HasAns_exact": 25.157868148522354, + "eval_HasAns_f1": 57.443891564398484, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 24.990605035700863, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 57.013745885870875, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.990605035700863, + "eval_f1": 57.013745885870904, + "eval_total": 7983, + "step": 27500 + }, + { + "epoch": 3.84, + "learning_rate": 1.3096698802220275e-05, + "loss": 1.7697, + "step": 28000 + }, + { + "epoch": 3.84, + "eval_HasAns_exact": 24.981055822177318, + "eval_HasAns_f1": 57.991875640396366, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 6.153846153846154, + "eval_NoAns_f1": 6.153846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 24.82775898784918, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 57.56979472888116, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.82775898784918, + "eval_f1": 57.56979472888117, + "eval_total": 7983, + "step": 28000 + }, + { + "epoch": 3.91, + "learning_rate": 1.2950628104002338e-05, + "loss": 1.8014, + "step": 28500 + }, + { + "epoch": 3.91, + "eval_HasAns_exact": 23.958070219752464, + "eval_HasAns_f1": 56.83764415249211, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 4.615384615384615, + "eval_NoAns_f1": 4.615384615384615, + "eval_NoAns_total": 65, + "eval_best_exact": 23.800576224477012, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.41243472371692, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.800576224477012, + "eval_f1": 56.41243472371696, + "eval_total": 7983, + "step": 28500 + }, + { + "epoch": 3.98, + "learning_rate": 1.2804557405784401e-05, + "loss": 1.7952, + "step": 29000 + }, + { + "epoch": 3.98, + "eval_HasAns_exact": 24.501136650669363, + "eval_HasAns_f1": 56.987466541223796, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 6.153846153846154, + "eval_NoAns_f1": 6.153846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 24.35174746335964, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.5735638323199, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.35174746335964, + "eval_f1": 56.57356383231995, + "eval_total": 7983, + "step": 29000 + }, + { + "epoch": 4.05, + "learning_rate": 1.2658486707566464e-05, + "loss": 1.5768, + "step": 29500 + }, + { + "epoch": 4.05, + "eval_HasAns_exact": 24.172770901742865, + "eval_HasAns_f1": 56.488669720521, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 24.101215082049354, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.15398808055679, + "eval_best_f1_thresh": 0.0, + "eval_exact": 24.101215082049354, + "eval_f1": 56.15398808055684, + "eval_total": 7983, + "step": 29500 + }, + { + "epoch": 4.12, + "learning_rate": 1.2512416009348524e-05, + "loss": 1.5075, + "step": 30000 + }, + { + "epoch": 4.12, + "eval_HasAns_exact": 23.869664056579943, + "eval_HasAns_f1": 55.37001243988256, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 12.307692307692308, + "eval_NoAns_f1": 12.307692307692308, + "eval_NoAns_total": 65, + "eval_best_exact": 23.775522986345987, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.01938600763996, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.775522986345987, + "eval_f1": 55.01938600763998, + "eval_total": 7983, + "step": 30000 + }, + { + "epoch": 4.19, + "learning_rate": 1.2366345311130587e-05, + "loss": 1.5191, + "step": 30500 + }, + { + "epoch": 4.19, + "eval_HasAns_exact": 23.30133872189947, + "eval_HasAns_f1": 55.78659265350773, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 13.846153846153847, + "eval_NoAns_f1": 13.846153846153847, + "eval_NoAns_total": 65, + "eval_best_exact": 23.22435174746336, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.4451009182606, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.22435174746336, + "eval_f1": 55.4451009182606, + "eval_total": 7983, + "step": 30500 + }, + { + "epoch": 4.26, + "learning_rate": 1.222027461291265e-05, + "loss": 1.5108, + "step": 31000 + }, + { + "epoch": 4.26, + "eval_HasAns_exact": 23.250820914372316, + "eval_HasAns_f1": 56.43743176160512, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 10.76923076923077, + "eval_NoAns_f1": 10.76923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 23.149192033070275, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.065587459399865, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.149192033070275, + "eval_f1": 56.065587459399865, + "eval_total": 7983, + "step": 31000 + }, + { + "epoch": 4.33, + "learning_rate": 1.2074203914694714e-05, + "loss": 1.5025, + "step": 31500 + }, + { + "epoch": 4.33, + "eval_HasAns_exact": 23.45289214448093, + "eval_HasAns_f1": 56.04845888253506, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 23.399724414380557, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.729888191395766, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.399724414380557, + "eval_f1": 55.72988819139578, + "eval_total": 7983, + "step": 31500 + }, + { + "epoch": 4.39, + "learning_rate": 1.1928133216476777e-05, + "loss": 1.5335, + "step": 32000 + }, + { + "epoch": 4.39, + "eval_HasAns_exact": 22.85930790603688, + "eval_HasAns_f1": 56.68438606813033, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 12.307692307692308, + "eval_NoAns_f1": 12.307692307692308, + "eval_NoAns_total": 65, + "eval_best_exact": 22.77339346110485, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.32305760834969, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.77339346110485, + "eval_f1": 56.323057608349735, + "eval_total": 7983, + "step": 32000 + }, + { + "epoch": 4.46, + "learning_rate": 1.1782062518258838e-05, + "loss": 1.5225, + "step": 32500 + }, + { + "epoch": 4.46, + "eval_HasAns_exact": 23.60444556706239, + "eval_HasAns_f1": 56.20113671315002, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 6.153846153846154, + "eval_NoAns_f1": 6.153846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 23.46235750970813, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.793636539486585, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.46235750970813, + "eval_f1": 55.793636539486634, + "eval_total": 7983, + "step": 32500 + }, + { + "epoch": 4.53, + "learning_rate": 1.1635991820040901e-05, + "loss": 1.5258, + "step": 33000 + }, + { + "epoch": 4.53, + "eval_HasAns_exact": 23.68022227835312, + "eval_HasAns_f1": 56.10398795615236, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 9.23076923076923, + "eval_NoAns_f1": 9.23076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 23.562570462232244, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.722332035176514, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.562570462232244, + "eval_f1": 55.72233203517653, + "eval_total": 7983, + "step": 33000 + }, + { + "epoch": 4.6, + "learning_rate": 1.1489921121822963e-05, + "loss": 1.5243, + "step": 33500 + }, + { + "epoch": 4.6, + "eval_HasAns_exact": 23.124526395554433, + "eval_HasAns_f1": 55.16381098253084, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 13.846153846153847, + "eval_NoAns_f1": 13.846153846153847, + "eval_NoAns_total": 65, + "eval_best_exact": 23.04897908054616, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.827390123973345, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.04897908054616, + "eval_f1": 54.82739012397335, + "eval_total": 7983, + "step": 33500 + }, + { + "epoch": 4.67, + "learning_rate": 1.1343850423605026e-05, + "loss": 1.5106, + "step": 34000 + }, + { + "epoch": 4.67, + "eval_HasAns_exact": 23.857034604698157, + "eval_HasAns_f1": 55.75167317754203, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 13.846153846153847, + "eval_NoAns_f1": 13.846153846153847, + "eval_NoAns_total": 65, + "eval_best_exact": 23.775522986345987, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.41046576722756, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.775522986345987, + "eval_f1": 55.41046576722758, + "eval_total": 7983, + "step": 34000 + }, + { + "epoch": 4.74, + "learning_rate": 1.1197779725387087e-05, + "loss": 1.5087, + "step": 34500 + }, + { + "epoch": 4.74, + "eval_HasAns_exact": 23.617075018944178, + "eval_HasAns_f1": 56.55795709995034, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 12.307692307692308, + "eval_NoAns_f1": 12.307692307692308, + "eval_NoAns_total": 65, + "eval_best_exact": 23.5249906050357, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.19765806305982, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.5249906050357, + "eval_f1": 56.197658063059876, + "eval_total": 7983, + "step": 34500 + }, + { + "epoch": 4.81, + "learning_rate": 1.105170902716915e-05, + "loss": 1.5146, + "step": 35000 + }, + { + "epoch": 4.81, + "eval_HasAns_exact": 23.654963374589542, + "eval_HasAns_f1": 56.11957102159652, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 23.600150319428785, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.80042131391719, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.600150319428785, + "eval_f1": 55.80042131391721, + "eval_total": 7983, + "step": 35000 + }, + { + "epoch": 4.87, + "learning_rate": 1.0905638328951213e-05, + "loss": 1.5457, + "step": 35500 + }, + { + "epoch": 4.87, + "eval_HasAns_exact": 23.187673654963376, + "eval_HasAns_f1": 55.30196544515009, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 21.53846153846154, + "eval_NoAns_f1": 21.53846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 23.174245271201304, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.02705278650863, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.174245271201304, + "eval_f1": 55.027052786508634, + "eval_total": 7983, + "step": 35500 + }, + { + "epoch": 4.94, + "learning_rate": 1.0759567630733276e-05, + "loss": 1.5391, + "step": 36000 + }, + { + "epoch": 4.94, + "eval_HasAns_exact": 22.909825713564032, + "eval_HasAns_f1": 54.75304453221414, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 13.846153846153847, + "eval_NoAns_f1": 13.846153846153847, + "eval_NoAns_total": 65, + "eval_best_exact": 22.83602655643242, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.419968258307854, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.83602655643242, + "eval_f1": 54.41996825830788, + "eval_total": 7983, + "step": 36000 + }, + { + "epoch": 5.01, + "learning_rate": 1.061349693251534e-05, + "loss": 1.4678, + "step": 36500 + }, + { + "epoch": 5.01, + "eval_HasAns_exact": 23.263450366254105, + "eval_HasAns_f1": 55.457692051771524, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 23.19929850933233, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.13140494374632, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.19929850933233, + "eval_f1": 55.13140494374634, + "eval_total": 7983, + "step": 36500 + }, + { + "epoch": 5.08, + "learning_rate": 1.04674262342974e-05, + "loss": 1.231, + "step": 37000 + }, + { + "epoch": 5.08, + "eval_HasAns_exact": 23.07400858802728, + "eval_HasAns_f1": 55.804051165907055, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 20.0, + "eval_NoAns_f1": 20.0, + "eval_NoAns_total": 65, + "eval_best_exact": 23.04897908054616, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.51252375443468, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.04897908054616, + "eval_f1": 55.512523754434696, + "eval_total": 7983, + "step": 37000 + }, + { + "epoch": 5.15, + "learning_rate": 1.0321355536079462e-05, + "loss": 1.2651, + "step": 37500 + }, + { + "epoch": 5.15, + "eval_HasAns_exact": 24.033846931043193, + "eval_HasAns_f1": 56.78984223580061, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 23.975948891394214, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.46523497721016, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.975948891394214, + "eval_f1": 56.46523497721018, + "eval_total": 7983, + "step": 37500 + }, + { + "epoch": 5.22, + "learning_rate": 1.0175284837861525e-05, + "loss": 1.2697, + "step": 38000 + }, + { + "epoch": 5.22, + "eval_HasAns_exact": 23.54129830765345, + "eval_HasAns_f1": 56.15134431131498, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 23.487410747839156, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.83193589590276, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.487410747839156, + "eval_f1": 55.831935895902795, + "eval_total": 7983, + "step": 38000 + }, + { + "epoch": 5.29, + "learning_rate": 1.0029214139643589e-05, + "loss": 1.2678, + "step": 38500 + }, + { + "epoch": 5.29, + "eval_HasAns_exact": 22.972972972972972, + "eval_HasAns_f1": 56.30865181101293, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 9.23076923076923, + "eval_NoAns_f1": 9.23076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 22.861079794563448, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.925329455041975, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.861079794563448, + "eval_f1": 55.925329455042025, + "eval_total": 7983, + "step": 38500 + }, + { + "epoch": 5.35, + "learning_rate": 9.883143441425652e-06, + "loss": 1.2719, + "step": 39000 + }, + { + "epoch": 5.35, + "eval_HasAns_exact": 22.98560242485476, + "eval_HasAns_f1": 56.43712830994619, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 22.936239508956533, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.115392954798146, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.936239508956533, + "eval_f1": 56.11539295479819, + "eval_total": 7983, + "step": 39000 + }, + { + "epoch": 5.42, + "learning_rate": 9.737072743207713e-06, + "loss": 1.2461, + "step": 39500 + }, + { + "epoch": 5.42, + "eval_HasAns_exact": 21.94998737054812, + "eval_HasAns_f1": 54.461077183587264, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 20.0, + "eval_NoAns_f1": 20.0, + "eval_NoAns_total": 65, + "eval_best_exact": 21.934109983715395, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.180484672384274, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.934109983715395, + "eval_f1": 54.18048467238429, + "eval_total": 7983, + "step": 39500 + }, + { + "epoch": 5.49, + "learning_rate": 9.591002044989776e-06, + "loss": 1.2682, + "step": 40000 + }, + { + "epoch": 5.49, + "eval_HasAns_exact": 22.480424349583227, + "eval_HasAns_f1": 56.09513335441032, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 9.23076923076923, + "eval_NoAns_f1": 9.23076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 22.36001503194288, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.701022911213904, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.372541651008394, + "eval_f1": 55.713549530279444, + "eval_total": 7983, + "step": 40000 + }, + { + "epoch": 5.56, + "learning_rate": 9.44493134677184e-06, + "loss": 1.2697, + "step": 40500 + }, + { + "epoch": 5.56, + "eval_HasAns_exact": 21.735286688557718, + "eval_HasAns_f1": 53.78512964065436, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 26.153846153846153, + "eval_NoAns_f1": 26.153846153846153, + "eval_NoAns_total": 65, + "eval_best_exact": 21.77126393586371, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 53.56014737500954, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.77126393586371, + "eval_f1": 53.56014737500955, + "eval_total": 7983, + "step": 40500 + }, + { + "epoch": 5.63, + "learning_rate": 9.2988606485539e-06, + "loss": 1.2917, + "step": 41000 + }, + { + "epoch": 5.63, + "eval_HasAns_exact": 23.023490780500126, + "eval_HasAns_f1": 55.033207325250956, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 21.53846153846154, + "eval_NoAns_f1": 21.53846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 23.011399223349617, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.760482976492185, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.011399223349617, + "eval_f1": 54.76048297649218, + "eval_total": 7983, + "step": 41000 + }, + { + "epoch": 5.7, + "learning_rate": 9.152789950335964e-06, + "loss": 1.2778, + "step": 41500 + }, + { + "epoch": 5.7, + "eval_HasAns_exact": 22.480424349583227, + "eval_HasAns_f1": 55.70683691098, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 12.307692307692308, + "eval_NoAns_f1": 12.307692307692308, + "eval_NoAns_total": 65, + "eval_best_exact": 22.39759488913942, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.3534679520405, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.39759488913942, + "eval_f1": 55.35346795204054, + "eval_total": 7983, + "step": 41500 + }, + { + "epoch": 5.77, + "learning_rate": 9.006719252118025e-06, + "loss": 1.2687, + "step": 42000 + }, + { + "epoch": 5.77, + "eval_HasAns_exact": 22.796160646627936, + "eval_HasAns_f1": 56.24362857300818, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 22.760866842039334, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.9359953702967, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.760866842039334, + "eval_f1": 55.93599537029674, + "eval_total": 7983, + "step": 42000 + }, + { + "epoch": 5.84, + "learning_rate": 8.860648553900088e-06, + "loss": 1.2824, + "step": 42500 + }, + { + "epoch": 5.84, + "eval_HasAns_exact": 22.745642839100782, + "eval_HasAns_f1": 56.39484118651382, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 10.76923076923077, + "eval_NoAns_f1": 10.76923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 22.648127270449706, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.02334366964999, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.648127270449706, + "eval_f1": 56.023343669650046, + "eval_total": 7983, + "step": 42500 + }, + { + "epoch": 5.9, + "learning_rate": 8.714577855682151e-06, + "loss": 1.2765, + "step": 43000 + }, + { + "epoch": 5.9, + "eval_HasAns_exact": 23.08663803990907, + "eval_HasAns_f1": 56.275076616780346, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 23.02392584241513, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.94213411645579, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.02392584241513, + "eval_f1": 55.94213411645581, + "eval_total": 7983, + "step": 43000 + }, + { + "epoch": 5.97, + "learning_rate": 8.568507157464213e-06, + "loss": 1.2842, + "step": 43500 + }, + { + "epoch": 5.97, + "eval_HasAns_exact": 22.935084617327608, + "eval_HasAns_f1": 56.49077118373285, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 22.886133032694477, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.16859905208523, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.886133032694477, + "eval_f1": 56.16859905208528, + "eval_total": 7983, + "step": 43500 + }, + { + "epoch": 6.04, + "learning_rate": 8.422436459246276e-06, + "loss": 1.1443, + "step": 44000 + }, + { + "epoch": 6.04, + "eval_HasAns_exact": 22.417277090174288, + "eval_HasAns_f1": 55.25309346308609, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 23.076923076923077, + "eval_NoAns_f1": 23.076923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 22.42264812727045, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.9911053539666, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.42264812727045, + "eval_f1": 54.99110535396661, + "eval_total": 7983, + "step": 44000 + }, + { + "epoch": 6.11, + "learning_rate": 8.276365761028339e-06, + "loss": 1.0544, + "step": 44500 + }, + { + "epoch": 6.11, + "eval_HasAns_exact": 23.21293255872695, + "eval_HasAns_f1": 56.57201194681735, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 23.149192033070275, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.236651709244576, + "eval_best_f1_thresh": 0.0, + "eval_exact": 23.16171865213579, + "eval_f1": 56.24917832831013, + "eval_total": 7983, + "step": 44500 + }, + { + "epoch": 6.18, + "learning_rate": 8.1302950628104e-06, + "loss": 1.0672, + "step": 45000 + }, + { + "epoch": 6.18, + "eval_HasAns_exact": 22.354129830765345, + "eval_HasAns_f1": 55.7700117261907, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 22.297381936615306, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.441181616933214, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.297381936615306, + "eval_f1": 55.44118161693323, + "eval_total": 7983, + "step": 45000 + }, + { + "epoch": 6.25, + "learning_rate": 7.984224364592464e-06, + "loss": 1.0696, + "step": 45500 + }, + { + "epoch": 6.25, + "eval_HasAns_exact": 23.07400858802728, + "eval_HasAns_f1": 56.442304320306086, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 12.307692307692308, + "eval_NoAns_f1": 12.307692307692308, + "eval_NoAns_total": 65, + "eval_best_exact": 22.98634598521859, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.08294696331998, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.98634598521859, + "eval_f1": 56.082946963320026, + "eval_total": 7983, + "step": 45500 + }, + { + "epoch": 6.32, + "learning_rate": 7.838153666374527e-06, + "loss": 1.0671, + "step": 46000 + }, + { + "epoch": 6.32, + "eval_HasAns_exact": 22.202576408183884, + "eval_HasAns_f1": 55.34797695561554, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 21.53846153846154, + "eval_NoAns_f1": 21.53846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 22.197168984091192, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.072689657342316, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.197168984091192, + "eval_f1": 55.07268965734232, + "eval_total": 7983, + "step": 46000 + }, + { + "epoch": 6.38, + "learning_rate": 7.69208296815659e-06, + "loss": 1.0649, + "step": 46500 + }, + { + "epoch": 6.38, + "eval_HasAns_exact": 22.32887092700177, + "eval_HasAns_f1": 55.65813462809328, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 20.0, + "eval_NoAns_f1": 20.0, + "eval_NoAns_total": 65, + "eval_best_exact": 22.30990855568082, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.36779531319585, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.30990855568082, + "eval_f1": 55.36779531319583, + "eval_total": 7983, + "step": 46500 + }, + { + "epoch": 6.45, + "learning_rate": 7.54601226993865e-06, + "loss": 1.0704, + "step": 47000 + }, + { + "epoch": 6.45, + "eval_HasAns_exact": 22.682495579691842, + "eval_HasAns_f1": 56.39872408814156, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 12.307692307692308, + "eval_NoAns_f1": 12.307692307692308, + "eval_NoAns_total": 65, + "eval_best_exact": 22.598020794187647, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.03972157458405, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.598020794187647, + "eval_f1": 56.03972157458409, + "eval_total": 7983, + "step": 47000 + }, + { + "epoch": 6.52, + "learning_rate": 7.3999415717207134e-06, + "loss": 1.0793, + "step": 47500 + }, + { + "epoch": 6.52, + "eval_HasAns_exact": 22.063652437484212, + "eval_HasAns_f1": 55.758375206459505, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 22.03432293623951, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.45469308339549, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.03432293623951, + "eval_f1": 55.454693083395476, + "eval_total": 7983, + "step": 47500 + }, + { + "epoch": 6.59, + "learning_rate": 7.2538708735027766e-06, + "loss": 1.0882, + "step": 48000 + }, + { + "epoch": 6.59, + "eval_HasAns_exact": 22.85930790603688, + "eval_HasAns_f1": 56.577537520045155, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 13.846153846153847, + "eval_NoAns_f1": 13.846153846153847, + "eval_NoAns_total": 65, + "eval_best_exact": 22.785920080170364, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.229605672518765, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.785920080170364, + "eval_f1": 56.229605672518794, + "eval_total": 7983, + "step": 48000 + }, + { + "epoch": 6.66, + "learning_rate": 7.107800175284838e-06, + "loss": 1.0902, + "step": 48500 + }, + { + "epoch": 6.66, + "eval_HasAns_exact": 22.139429148774944, + "eval_HasAns_f1": 56.283972169058366, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 22.084429412501567, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.95095723845719, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.084429412501567, + "eval_f1": 55.950957238457235, + "eval_total": 7983, + "step": 48500 + }, + { + "epoch": 6.73, + "learning_rate": 6.961729477066901e-06, + "loss": 1.0719, + "step": 49000 + }, + { + "epoch": 6.73, + "eval_HasAns_exact": 22.455165445819652, + "eval_HasAns_f1": 55.93362321999497, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 22.410121508204934, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.61598755554552, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.42264812727045, + "eval_f1": 55.6285141746111, + "eval_total": 7983, + "step": 49000 + }, + { + "epoch": 6.8, + "learning_rate": 6.815658778848963e-06, + "loss": 1.0736, + "step": 49500 + }, + { + "epoch": 6.8, + "eval_HasAns_exact": 21.71002778479414, + "eval_HasAns_f1": 55.39234728917103, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.671050983339597, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.079118857028206, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.671050983339597, + "eval_f1": 55.07911885702822, + "eval_total": 7983, + "step": 49500 + }, + { + "epoch": 6.87, + "learning_rate": 6.6695880806310264e-06, + "loss": 1.099, + "step": 50000 + }, + { + "epoch": 6.87, + "eval_HasAns_exact": 22.240464763829248, + "eval_HasAns_f1": 55.81572714282118, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 21.53846153846154, + "eval_NoAns_f1": 21.53846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 22.234748841287736, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.53663128107956, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.234748841287736, + "eval_f1": 55.53663128107958, + "eval_total": 7983, + "step": 50000 + }, + { + "epoch": 6.93, + "learning_rate": 6.523517382413088e-06, + "loss": 1.0901, + "step": 50500 + }, + { + "epoch": 6.93, + "eval_HasAns_exact": 21.987875726193483, + "eval_HasAns_f1": 54.90813130105857, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 21.934109983715395, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.5863188828488, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.934109983715395, + "eval_f1": 54.58631888284879, + "eval_total": 7983, + "step": 50500 + }, + { + "epoch": 7.0, + "learning_rate": 6.377446684195151e-06, + "loss": 1.0659, + "step": 51000 + }, + { + "epoch": 7.0, + "eval_HasAns_exact": 21.811063399848447, + "eval_HasAns_f1": 54.90414620994612, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 21.53846153846154, + "eval_NoAns_f1": 21.53846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 21.80884379306025, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.63247271581528, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.80884379306025, + "eval_f1": 54.63247271581526, + "eval_total": 7983, + "step": 51000 + }, + { + "epoch": 7.07, + "learning_rate": 6.231375985977214e-06, + "loss": 0.9032, + "step": 51500 + }, + { + "epoch": 7.07, + "eval_HasAns_exact": 22.15205860065673, + "eval_HasAns_f1": 55.72255495660219, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 22.09695603156708, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.39411125471326, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.09695603156708, + "eval_f1": 55.39411125471329, + "eval_total": 7983, + "step": 51500 + }, + { + "epoch": 7.14, + "learning_rate": 6.0853052877592755e-06, + "loss": 0.9125, + "step": 52000 + }, + { + "epoch": 7.14, + "eval_HasAns_exact": 22.278353119474616, + "eval_HasAns_f1": 55.7348659358302, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 22.24727546035325, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.43137523235669, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.24727546035325, + "eval_f1": 55.43137523235669, + "eval_total": 7983, + "step": 52000 + }, + { + "epoch": 7.21, + "learning_rate": 5.939234589541339e-06, + "loss": 0.8917, + "step": 52500 + }, + { + "epoch": 7.21, + "eval_HasAns_exact": 22.75827229098257, + "eval_HasAns_f1": 56.48355048657508, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 13.846153846153847, + "eval_NoAns_f1": 13.846153846153847, + "eval_NoAns_total": 65, + "eval_best_exact": 22.68570712764625, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.13638390989616, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.68570712764625, + "eval_f1": 56.13638390989625, + "eval_total": 7983, + "step": 52500 + }, + { + "epoch": 7.28, + "learning_rate": 5.793163891323401e-06, + "loss": 0.9111, + "step": 53000 + }, + { + "epoch": 7.28, + "eval_HasAns_exact": 21.71002778479414, + "eval_HasAns_f1": 55.32799628852263, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.671050983339597, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.015291821686354, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.671050983339597, + "eval_f1": 55.01529182168635, + "eval_total": 7983, + "step": 53000 + }, + { + "epoch": 7.35, + "learning_rate": 5.647093193105464e-06, + "loss": 0.929, + "step": 53500 + }, + { + "epoch": 7.35, + "eval_HasAns_exact": 21.92472846678454, + "eval_HasAns_f1": 55.97435992895805, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 21.871476888387825, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.643865954840265, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.871476888387825, + "eval_f1": 55.6438659548403, + "eval_total": 7983, + "step": 53500 + }, + { + "epoch": 7.41, + "learning_rate": 5.501022494887525e-06, + "loss": 0.9245, + "step": 54000 + }, + { + "epoch": 7.41, + "eval_HasAns_exact": 21.444809295276585, + "eval_HasAns_f1": 54.25388000235857, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 24.615384615384617, + "eval_NoAns_f1": 24.615384615384617, + "eval_NoAns_total": 65, + "eval_best_exact": 21.47062507829137, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 54.01255441045664, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.47062507829137, + "eval_f1": 54.01255441045667, + "eval_total": 7983, + "step": 54000 + }, + { + "epoch": 7.48, + "learning_rate": 5.3549517966695885e-06, + "loss": 0.9281, + "step": 54500 + }, + { + "epoch": 7.48, + "eval_HasAns_exact": 21.217479161404395, + "eval_HasAns_f1": 55.51878832249496, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.182512839784543, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.204530369223946, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.182512839784543, + "eval_f1": 55.204530369223974, + "eval_total": 7983, + "step": 54500 + }, + { + "epoch": 7.55, + "learning_rate": 5.2088810984516516e-06, + "loss": 0.921, + "step": 55000 + }, + { + "epoch": 7.55, + "eval_HasAns_exact": 21.55847436221268, + "eval_HasAns_f1": 55.80802472366072, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 21.4956783164224, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.46635848201745, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.50820493548791, + "eval_f1": 55.478885101083, + "eval_total": 7983, + "step": 55000 + }, + { + "epoch": 7.62, + "learning_rate": 5.062810400233713e-06, + "loss": 0.9311, + "step": 55500 + }, + { + "epoch": 7.62, + "eval_HasAns_exact": 21.962616822429908, + "eval_HasAns_f1": 56.38546877505227, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 12.307692307692308, + "eval_NoAns_f1": 12.307692307692308, + "eval_NoAns_total": 65, + "eval_best_exact": 21.88400350745334, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.02657419026227, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.88400350745334, + "eval_f1": 56.026574190262274, + "eval_total": 7983, + "step": 55500 + }, + { + "epoch": 7.69, + "learning_rate": 4.916739702015776e-06, + "loss": 0.9116, + "step": 56000 + }, + { + "epoch": 7.69, + "eval_HasAns_exact": 21.71002778479414, + "eval_HasAns_f1": 55.8268981189265, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 24.615384615384617, + "eval_NoAns_f1": 24.615384615384617, + "eval_NoAns_total": 65, + "eval_best_exact": 21.733684078667167, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.57276453785041, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.733684078667167, + "eval_f1": 55.57276453785043, + "eval_total": 7983, + "step": 56000 + }, + { + "epoch": 7.76, + "learning_rate": 4.770669003797838e-06, + "loss": 0.9173, + "step": 56500 + }, + { + "epoch": 7.76, + "eval_HasAns_exact": 21.596362717858046, + "eval_HasAns_f1": 56.19351406929686, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 23.076923076923077, + "eval_NoAns_f1": 23.076923076923077, + "eval_NoAns_total": 65, + "eval_best_exact": 21.608417888012024, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.9238687712254, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.608417888012024, + "eval_f1": 55.92386877122544, + "eval_total": 7983, + "step": 56500 + }, + { + "epoch": 7.83, + "learning_rate": 4.624598305579901e-06, + "loss": 0.922, + "step": 57000 + }, + { + "epoch": 7.83, + "eval_HasAns_exact": 22.17731750442031, + "eval_HasAns_f1": 55.756455451415086, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 24.615384615384617, + "eval_NoAns_f1": 24.615384615384617, + "eval_NoAns_total": 65, + "eval_best_exact": 22.197168984091192, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.50289543583923, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.197168984091192, + "eval_f1": 55.502895435839214, + "eval_total": 7983, + "step": 57000 + }, + { + "epoch": 7.9, + "learning_rate": 4.478527607361964e-06, + "loss": 0.9138, + "step": 57500 + }, + { + "epoch": 7.9, + "eval_HasAns_exact": 22.08891134124779, + "eval_HasAns_f1": 56.82319956291719, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 22.046849555305023, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.49832069888238, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.059376174370538, + "eval_f1": 56.51084731794791, + "eval_total": 7983, + "step": 57500 + }, + { + "epoch": 7.96, + "learning_rate": 4.332456909144026e-06, + "loss": 0.9051, + "step": 58000 + }, + { + "epoch": 7.96, + "eval_HasAns_exact": 21.886840111139175, + "eval_HasAns_f1": 55.728471748611895, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 20.0, + "eval_NoAns_f1": 20.0, + "eval_NoAns_total": 65, + "eval_best_exact": 21.871476888387825, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.437559727609774, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.871476888387825, + "eval_f1": 55.43755972760981, + "eval_total": 7983, + "step": 58000 + }, + { + "epoch": 8.03, + "learning_rate": 4.186386210926088e-06, + "loss": 0.8438, + "step": 58500 + }, + { + "epoch": 8.03, + "eval_HasAns_exact": 21.92472846678454, + "eval_HasAns_f1": 56.222570513713045, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 21.88400350745334, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.9025821530226, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.896530126518854, + "eval_f1": 55.91510877208819, + "eval_total": 7983, + "step": 58500 + }, + { + "epoch": 8.1, + "learning_rate": 4.040315512708151e-06, + "loss": 0.776, + "step": 59000 + }, + { + "epoch": 8.1, + "eval_HasAns_exact": 22.227835311947462, + "eval_HasAns_f1": 56.1608777793549, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 22.18464236502568, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.84139173956304, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.197168984091192, + "eval_f1": 55.85391835862862, + "eval_total": 7983, + "step": 59000 + }, + { + "epoch": 8.17, + "learning_rate": 3.894244814490214e-06, + "loss": 0.7949, + "step": 59500 + }, + { + "epoch": 8.17, + "eval_HasAns_exact": 22.025764081838847, + "eval_HasAns_f1": 56.274619626461195, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 21.959163221846424, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.92915422802447, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.97168984091194, + "eval_f1": 55.94168084709003, + "eval_total": 7983, + "step": 59500 + }, + { + "epoch": 8.24, + "learning_rate": 3.7481741162722763e-06, + "loss": 0.7951, + "step": 60000 + }, + { + "epoch": 8.24, + "eval_HasAns_exact": 21.684768881030564, + "eval_HasAns_f1": 56.08759973467501, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 20.0, + "eval_NoAns_f1": 20.0, + "eval_NoAns_total": 65, + "eval_best_exact": 21.671050983339597, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.79376358501269, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.671050983339597, + "eval_f1": 55.79376358501277, + "eval_total": 7983, + "step": 60000 + }, + { + "epoch": 8.31, + "learning_rate": 3.6021034180543385e-06, + "loss": 0.7791, + "step": 60500 + }, + { + "epoch": 8.31, + "eval_HasAns_exact": 22.051022985602426, + "eval_HasAns_f1": 56.14666242246213, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 21.996743079042965, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.81476550933922, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.996743079042965, + "eval_f1": 55.81476550933923, + "eval_total": 7983, + "step": 60500 + }, + { + "epoch": 8.38, + "learning_rate": 3.4560327198364012e-06, + "loss": 0.8014, + "step": 61000 + }, + { + "epoch": 8.38, + "eval_HasAns_exact": 22.354129830765345, + "eval_HasAns_f1": 56.44277640131104, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 22.284855317549795, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.0959418195641, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.297381936615306, + "eval_f1": 56.10846843862965, + "eval_total": 7983, + "step": 61000 + }, + { + "epoch": 8.44, + "learning_rate": 3.3099620216184635e-06, + "loss": 0.7841, + "step": 61500 + }, + { + "epoch": 8.44, + "eval_HasAns_exact": 22.215205860065673, + "eval_HasAns_f1": 56.09094279428335, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 22.172115745960166, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.77202618628775, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.172115745960166, + "eval_f1": 55.77202618628781, + "eval_total": 7983, + "step": 61500 + }, + { + "epoch": 8.51, + "learning_rate": 3.1638913234005257e-06, + "loss": 0.8008, + "step": 62000 + }, + { + "epoch": 8.51, + "eval_HasAns_exact": 22.063652437484212, + "eval_HasAns_f1": 56.17916434394415, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 20.0, + "eval_NoAns_f1": 20.0, + "eval_NoAns_total": 65, + "eval_best_exact": 22.046849555305023, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.88458264754471, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.046849555305023, + "eval_f1": 55.88458264754477, + "eval_total": 7983, + "step": 62000 + }, + { + "epoch": 8.58, + "learning_rate": 3.017820625182589e-06, + "loss": 0.7879, + "step": 62500 + }, + { + "epoch": 8.58, + "eval_HasAns_exact": 22.10154079312958, + "eval_HasAns_f1": 56.32847927477324, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 13.846153846153847, + "eval_NoAns_f1": 13.846153846153847, + "eval_NoAns_total": 65, + "eval_best_exact": 22.021796317173994, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.9700487157277, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.03432293623951, + "eval_f1": 55.98257533479327, + "eval_total": 7983, + "step": 62500 + }, + { + "epoch": 8.65, + "learning_rate": 2.871749926964651e-06, + "loss": 0.7893, + "step": 63000 + }, + { + "epoch": 8.65, + "eval_HasAns_exact": 21.811063399848447, + "eval_HasAns_f1": 55.61502667468564, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 21.53846153846154, + "eval_NoAns_f1": 21.53846153846154, + "eval_NoAns_total": 65, + "eval_best_exact": 21.79631717399474, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.32503835778039, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.80884379306025, + "eval_f1": 55.33756497684592, + "eval_total": 7983, + "step": 63000 + }, + { + "epoch": 8.72, + "learning_rate": 2.725679228746714e-06, + "loss": 0.8072, + "step": 63500 + }, + { + "epoch": 8.72, + "eval_HasAns_exact": 21.94998737054812, + "eval_HasAns_f1": 56.147388772329045, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 21.909056745584365, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.82801256411139, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.92158336464988, + "eval_f1": 55.84053918317692, + "eval_total": 7983, + "step": 63500 + }, + { + "epoch": 8.79, + "learning_rate": 2.579608530528776e-06, + "loss": 0.7824, + "step": 64000 + }, + { + "epoch": 8.79, + "eval_HasAns_exact": 22.038393533720637, + "eval_HasAns_f1": 56.069960647102285, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 21.996743079042965, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.751214882093905, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.00926969810848, + "eval_f1": 55.763741501159465, + "eval_total": 7983, + "step": 64000 + }, + { + "epoch": 8.86, + "learning_rate": 2.4335378323108387e-06, + "loss": 0.7966, + "step": 64500 + }, + { + "epoch": 8.86, + "eval_HasAns_exact": 22.10154079312958, + "eval_HasAns_f1": 56.371415222133756, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 22.059376174370538, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 56.05021492281784, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.071902793436053, + "eval_f1": 56.06274154188338, + "eval_total": 7983, + "step": 64500 + }, + { + "epoch": 8.92, + "learning_rate": 2.287467134092901e-06, + "loss": 0.7966, + "step": 65000 + }, + { + "epoch": 8.92, + "eval_HasAns_exact": 21.94998737054812, + "eval_HasAns_f1": 55.94050441592927, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 20.0, + "eval_NoAns_f1": 20.0, + "eval_NoAns_total": 65, + "eval_best_exact": 21.92158336464988, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.63533934176723, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.934109983715395, + "eval_f1": 55.6478659608328, + "eval_total": 7983, + "step": 65000 + }, + { + "epoch": 8.99, + "learning_rate": 2.1413964358749637e-06, + "loss": 0.7947, + "step": 65500 + }, + { + "epoch": 8.99, + "eval_HasAns_exact": 21.62162162162162, + "eval_HasAns_f1": 56.009996934703764, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 20.0, + "eval_NoAns_f1": 20.0, + "eval_NoAns_total": 65, + "eval_best_exact": 21.595891268946513, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.70426603143982, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.608417888012024, + "eval_f1": 55.7167926505054, + "eval_total": 7983, + "step": 65500 + }, + { + "epoch": 9.06, + "learning_rate": 1.9953257376570264e-06, + "loss": 0.6858, + "step": 66000 + }, + { + "epoch": 9.06, + "eval_HasAns_exact": 21.596362717858046, + "eval_HasAns_f1": 55.77625294716333, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 21.55831141174997, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.459898639062885, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.570838030815484, + "eval_f1": 55.47242525812843, + "eval_total": 7983, + "step": 66000 + }, + { + "epoch": 9.13, + "learning_rate": 1.8492550394390888e-06, + "loss": 0.7001, + "step": 66500 + }, + { + "epoch": 9.13, + "eval_HasAns_exact": 21.912099014902754, + "eval_HasAns_f1": 56.00529549829087, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 21.846423650256796, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.66202301834734, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.85895026932231, + "eval_f1": 55.6745496374129, + "eval_total": 7983, + "step": 66500 + }, + { + "epoch": 9.2, + "learning_rate": 1.7031843412211513e-06, + "loss": 0.7017, + "step": 67000 + }, + { + "epoch": 9.2, + "eval_HasAns_exact": 21.48269765092195, + "eval_HasAns_f1": 55.98671029097834, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.433045221094826, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.6561157564783, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.44557184016034, + "eval_f1": 55.668642375543875, + "eval_total": 7983, + "step": 67000 + }, + { + "epoch": 9.27, + "learning_rate": 1.5571136430032136e-06, + "loss": 0.7042, + "step": 67500 + }, + { + "epoch": 9.27, + "eval_HasAns_exact": 21.84895175549381, + "eval_HasAns_f1": 55.89859581000308, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 20.0, + "eval_NoAns_f1": 20.0, + "eval_NoAns_total": 65, + "eval_best_exact": 21.821370412125766, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.59377196838335, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.83389703119128, + "eval_f1": 55.60629858744889, + "eval_total": 7983, + "step": 67500 + }, + { + "epoch": 9.34, + "learning_rate": 1.411042944785276e-06, + "loss": 0.7119, + "step": 68000 + }, + { + "epoch": 9.34, + "eval_HasAns_exact": 21.899469563020965, + "eval_HasAns_f1": 56.260524050489586, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.846423650256796, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.92770004156034, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.85895026932231, + "eval_f1": 55.940226660625896, + "eval_total": 7983, + "step": 68000 + }, + { + "epoch": 9.41, + "learning_rate": 1.2649722465673387e-06, + "loss": 0.7158, + "step": 68500 + }, + { + "epoch": 9.41, + "eval_HasAns_exact": 21.242738065167973, + "eval_HasAns_f1": 55.89174064933859, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.195039458850058, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.561919386378875, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.20756607791557, + "eval_f1": 55.57444600544444, + "eval_total": 7983, + "step": 68500 + }, + { + "epoch": 9.47, + "learning_rate": 1.1189015483494012e-06, + "loss": 0.6944, + "step": 69000 + }, + { + "epoch": 9.47, + "eval_HasAns_exact": 21.672139429148775, + "eval_HasAns_f1": 55.86251843204239, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 21.608417888012024, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.52040848614698, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.62094450707754, + "eval_f1": 55.53293510521254, + "eval_total": 7983, + "step": 69000 + }, + { + "epoch": 9.54, + "learning_rate": 9.728308501314637e-07, + "loss": 0.7007, + "step": 69500 + }, + { + "epoch": 9.54, + "eval_HasAns_exact": 21.672139429148775, + "eval_HasAns_f1": 55.84201571688193, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 15.384615384615385, + "eval_NoAns_f1": 15.384615384615385, + "eval_NoAns_total": 65, + "eval_best_exact": 21.608417888012024, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.50007271029319, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.62094450707754, + "eval_f1": 55.51259932935878, + "eval_total": 7983, + "step": 69500 + }, + { + "epoch": 9.61, + "learning_rate": 8.267601519135262e-07, + "loss": 0.7084, + "step": 70000 + }, + { + "epoch": 9.61, + "eval_HasAns_exact": 21.747916140439504, + "eval_HasAns_f1": 56.25415794858626, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 21.708630840536138, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.9339123934493, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.721157459601653, + "eval_f1": 55.94643901251486, + "eval_total": 7983, + "step": 70000 + }, + { + "epoch": 9.68, + "learning_rate": 6.806894536955887e-07, + "loss": 0.7008, + "step": 70500 + }, + { + "epoch": 9.68, + "eval_HasAns_exact": 21.747916140439504, + "eval_HasAns_f1": 56.058704157634516, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 18.46153846153846, + "eval_NoAns_f1": 18.46153846153846, + "eval_NoAns_total": 65, + "eval_best_exact": 21.708630840536138, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.740050046367244, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.721157459601653, + "eval_f1": 55.75257666543282, + "eval_total": 7983, + "step": 70500 + }, + { + "epoch": 9.75, + "learning_rate": 5.346187554776512e-07, + "loss": 0.6984, + "step": 71000 + }, + { + "epoch": 9.75, + "eval_HasAns_exact": 21.78580449608487, + "eval_HasAns_f1": 56.249722496375995, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.733684078667167, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.91698643696666, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.746210697732682, + "eval_f1": 55.92951305603222, + "eval_total": 7983, + "step": 71000 + }, + { + "epoch": 9.82, + "learning_rate": 3.8854805725971376e-07, + "loss": 0.7118, + "step": 71500 + }, + { + "epoch": 9.82, + "eval_HasAns_exact": 22.051022985602426, + "eval_HasAns_f1": 56.254669133111044, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.996743079042965, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.92189279668958, + "eval_best_f1_thresh": 0.0, + "eval_exact": 22.00926969810848, + "eval_f1": 55.934419415755144, + "eval_total": 7983, + "step": 71500 + }, + { + "epoch": 9.89, + "learning_rate": 2.4247735904177623e-07, + "loss": 0.6861, + "step": 72000 + }, + { + "epoch": 9.89, + "eval_HasAns_exact": 22.025764081838847, + "eval_HasAns_f1": 56.10462500608517, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.97168984091194, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.77307037431819, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.984216459977453, + "eval_f1": 55.785596993383734, + "eval_total": 7983, + "step": 72000 + }, + { + "epoch": 9.95, + "learning_rate": 9.640666082383875e-08, + "loss": 0.6937, + "step": 72500 + }, + { + "epoch": 9.95, + "eval_HasAns_exact": 21.94998737054812, + "eval_HasAns_f1": 56.1125349455772, + "eval_HasAns_total": 7918, + "eval_NoAns_exact": 16.923076923076923, + "eval_NoAns_f1": 16.923076923076923, + "eval_NoAns_total": 65, + "eval_best_exact": 21.896530126518854, + "eval_best_exact_thresh": 0.0, + "eval_best_f1": 55.78091590869094, + "eval_best_f1_thresh": 0.0, + "eval_exact": 21.909056745584365, + "eval_f1": 55.793442527756504, + "eval_total": 7983, + "step": 72500 + }, + { + "epoch": 10.0, + "step": 72830, + "total_flos": 3.045026980558111e+17, + "train_loss": 1.5957024367517223, + "train_runtime": 72711.6119, + "train_samples_per_second": 16.027, + "train_steps_per_second": 1.002 + } + ], + "max_steps": 72830, + "num_train_epochs": 10, + "total_flos": 3.045026980558111e+17, + "trial_name": null, + "trial_params": null +}