{ "best_metric": 0.7562862669245648, "best_model_checkpoint": "/train_db_id/checkpoint-66500", "epoch": 62.03357817418678, "global_step": 66500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999988339552239e-05, "loss": 3.3987, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.999953358208955e-05, "loss": 2.8913, "step": 4 }, { "epoch": 0.01, "learning_rate": 4.9999067164179105e-05, "loss": 1.6031, "step": 8 }, { "epoch": 0.01, "learning_rate": 4.999860074626866e-05, "loss": 1.5674, "step": 12 }, { "epoch": 0.01, "learning_rate": 4.999813432835821e-05, "loss": 1.075, "step": 16 }, { "epoch": 0.02, "learning_rate": 4.999766791044776e-05, "loss": 1.0344, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.999720149253732e-05, "loss": 0.839, "step": 24 }, { "epoch": 0.03, "learning_rate": 4.9996735074626866e-05, "loss": 0.6872, "step": 28 }, { "epoch": 0.03, "learning_rate": 4.999626865671642e-05, "loss": 0.6713, "step": 32 }, { "epoch": 0.03, "learning_rate": 4.999580223880597e-05, "loss": 0.707, "step": 36 }, { "epoch": 0.04, "learning_rate": 4.9995335820895524e-05, "loss": 0.6456, "step": 40 }, { "epoch": 0.04, "learning_rate": 4.999486940298508e-05, "loss": 0.6796, "step": 44 }, { "epoch": 0.04, "learning_rate": 4.999440298507463e-05, "loss": 0.5273, "step": 48 }, { "epoch": 0.05, "learning_rate": 4.999393656716418e-05, "loss": 0.9018, "step": 52 }, { "epoch": 0.05, "learning_rate": 4.999347014925374e-05, "loss": 0.6358, "step": 56 }, { "epoch": 0.06, "learning_rate": 4.9993003731343285e-05, "loss": 0.462, "step": 60 }, { "epoch": 0.06, "learning_rate": 4.9992537313432833e-05, "loss": 0.5077, "step": 64 }, { "epoch": 0.06, "learning_rate": 4.999207089552239e-05, "loss": 0.5189, "step": 68 }, { "epoch": 0.07, "learning_rate": 4.999160447761194e-05, "loss": 0.5189, "step": 72 }, { "epoch": 0.07, "learning_rate": 4.999113805970149e-05, "loss": 0.4512, "step": 76 }, { "epoch": 0.07, "learning_rate": 4.9990671641791046e-05, "loss": 0.4906, "step": 80 }, { "epoch": 0.08, "learning_rate": 4.99902052238806e-05, "loss": 0.4084, "step": 84 }, { "epoch": 0.08, "learning_rate": 4.9989738805970156e-05, "loss": 0.3692, "step": 88 }, { "epoch": 0.09, "learning_rate": 4.9989272388059704e-05, "loss": 0.459, "step": 92 }, { "epoch": 0.09, "learning_rate": 4.998880597014925e-05, "loss": 0.4347, "step": 96 }, { "epoch": 0.09, "learning_rate": 4.9988339552238814e-05, "loss": 0.39, "step": 100 }, { "epoch": 0.1, "learning_rate": 4.998787313432836e-05, "loss": 0.4953, "step": 104 }, { "epoch": 0.1, "learning_rate": 4.998740671641791e-05, "loss": 0.4506, "step": 108 }, { "epoch": 0.1, "learning_rate": 4.9986940298507465e-05, "loss": 0.407, "step": 112 }, { "epoch": 0.11, "learning_rate": 4.998647388059702e-05, "loss": 0.3425, "step": 116 }, { "epoch": 0.11, "learning_rate": 4.998600746268657e-05, "loss": 0.4349, "step": 120 }, { "epoch": 0.12, "learning_rate": 4.998554104477612e-05, "loss": 0.3413, "step": 124 }, { "epoch": 0.12, "learning_rate": 4.998507462686567e-05, "loss": 0.3923, "step": 128 }, { "epoch": 0.12, "learning_rate": 4.9984608208955226e-05, "loss": 0.4086, "step": 132 }, { "epoch": 0.13, "learning_rate": 4.998414179104478e-05, "loss": 0.3972, "step": 136 }, { "epoch": 0.13, "learning_rate": 4.998367537313433e-05, "loss": 0.3259, "step": 140 }, { "epoch": 0.13, "learning_rate": 4.9983208955223884e-05, "loss": 0.3852, "step": 144 }, { "epoch": 0.14, "learning_rate": 4.998274253731344e-05, "loss": 0.3536, "step": 148 }, { "epoch": 0.14, "learning_rate": 4.998227611940299e-05, "loss": 0.3187, "step": 152 }, { "epoch": 0.15, "learning_rate": 4.9981809701492535e-05, "loss": 0.2841, "step": 156 }, { "epoch": 0.15, "learning_rate": 4.99813432835821e-05, "loss": 0.4, "step": 160 }, { "epoch": 0.15, "learning_rate": 4.9980876865671645e-05, "loss": 0.319, "step": 164 }, { "epoch": 0.16, "learning_rate": 4.998041044776119e-05, "loss": 0.285, "step": 168 }, { "epoch": 0.16, "learning_rate": 4.997994402985075e-05, "loss": 0.3721, "step": 172 }, { "epoch": 0.16, "learning_rate": 4.99794776119403e-05, "loss": 0.2752, "step": 176 }, { "epoch": 0.17, "learning_rate": 4.997901119402985e-05, "loss": 0.2895, "step": 180 }, { "epoch": 0.17, "learning_rate": 4.9978544776119406e-05, "loss": 0.2652, "step": 184 }, { "epoch": 0.18, "learning_rate": 4.9978078358208954e-05, "loss": 0.4065, "step": 188 }, { "epoch": 0.18, "learning_rate": 4.997761194029851e-05, "loss": 0.2846, "step": 192 }, { "epoch": 0.18, "learning_rate": 4.9977145522388064e-05, "loss": 0.3481, "step": 196 }, { "epoch": 0.19, "learning_rate": 4.997667910447761e-05, "loss": 0.33, "step": 200 }, { "epoch": 0.19, "learning_rate": 4.997621268656717e-05, "loss": 0.3122, "step": 204 }, { "epoch": 0.19, "learning_rate": 4.997574626865672e-05, "loss": 0.2949, "step": 208 }, { "epoch": 0.2, "learning_rate": 4.997527985074627e-05, "loss": 0.2282, "step": 212 }, { "epoch": 0.2, "learning_rate": 4.997481343283582e-05, "loss": 0.2633, "step": 216 }, { "epoch": 0.21, "learning_rate": 4.997434701492538e-05, "loss": 0.2847, "step": 220 }, { "epoch": 0.21, "learning_rate": 4.997388059701493e-05, "loss": 0.252, "step": 224 }, { "epoch": 0.21, "learning_rate": 4.9973414179104476e-05, "loss": 0.2747, "step": 228 }, { "epoch": 0.22, "learning_rate": 4.997294776119403e-05, "loss": 0.2968, "step": 232 }, { "epoch": 0.22, "learning_rate": 4.9972481343283586e-05, "loss": 0.3299, "step": 236 }, { "epoch": 0.22, "learning_rate": 4.9972014925373134e-05, "loss": 0.3038, "step": 240 }, { "epoch": 0.23, "learning_rate": 4.997154850746269e-05, "loss": 0.214, "step": 244 }, { "epoch": 0.23, "learning_rate": 4.997108208955224e-05, "loss": 0.2138, "step": 248 }, { "epoch": 0.24, "learning_rate": 4.99706156716418e-05, "loss": 0.2308, "step": 252 }, { "epoch": 0.24, "learning_rate": 4.997014925373135e-05, "loss": 0.3071, "step": 256 }, { "epoch": 0.24, "learning_rate": 4.9969682835820895e-05, "loss": 0.3456, "step": 260 }, { "epoch": 0.25, "learning_rate": 4.996921641791045e-05, "loss": 0.2254, "step": 264 }, { "epoch": 0.25, "learning_rate": 4.9968750000000005e-05, "loss": 0.2141, "step": 268 }, { "epoch": 0.25, "learning_rate": 4.996828358208955e-05, "loss": 0.1988, "step": 272 }, { "epoch": 0.26, "learning_rate": 4.996781716417911e-05, "loss": 0.2797, "step": 276 }, { "epoch": 0.26, "learning_rate": 4.996735074626866e-05, "loss": 0.265, "step": 280 }, { "epoch": 0.26, "learning_rate": 4.996688432835821e-05, "loss": 0.3751, "step": 284 }, { "epoch": 0.27, "learning_rate": 4.9966417910447766e-05, "loss": 0.2993, "step": 288 }, { "epoch": 0.27, "learning_rate": 4.9965951492537314e-05, "loss": 0.2626, "step": 292 }, { "epoch": 0.28, "learning_rate": 4.996548507462687e-05, "loss": 0.2083, "step": 296 }, { "epoch": 0.28, "learning_rate": 4.9965018656716424e-05, "loss": 0.1907, "step": 300 }, { "epoch": 0.28, "learning_rate": 4.996455223880597e-05, "loss": 0.2737, "step": 304 }, { "epoch": 0.29, "learning_rate": 4.996408582089552e-05, "loss": 0.3005, "step": 308 }, { "epoch": 0.29, "learning_rate": 4.996361940298508e-05, "loss": 0.2399, "step": 312 }, { "epoch": 0.29, "learning_rate": 4.996315298507463e-05, "loss": 0.2554, "step": 316 }, { "epoch": 0.3, "learning_rate": 4.996268656716418e-05, "loss": 0.2626, "step": 320 }, { "epoch": 0.3, "learning_rate": 4.996222014925373e-05, "loss": 0.2391, "step": 324 }, { "epoch": 0.31, "learning_rate": 4.996175373134329e-05, "loss": 0.2105, "step": 328 }, { "epoch": 0.31, "learning_rate": 4.9961287313432836e-05, "loss": 0.2333, "step": 332 }, { "epoch": 0.31, "learning_rate": 4.996082089552239e-05, "loss": 0.2182, "step": 336 }, { "epoch": 0.32, "learning_rate": 4.9960354477611946e-05, "loss": 0.2743, "step": 340 }, { "epoch": 0.32, "learning_rate": 4.9959888059701494e-05, "loss": 0.1997, "step": 344 }, { "epoch": 0.32, "learning_rate": 4.995942164179105e-05, "loss": 0.3057, "step": 348 }, { "epoch": 0.33, "learning_rate": 4.99589552238806e-05, "loss": 0.1665, "step": 352 }, { "epoch": 0.33, "learning_rate": 4.995848880597015e-05, "loss": 0.2077, "step": 356 }, { "epoch": 0.34, "learning_rate": 4.995802238805971e-05, "loss": 0.229, "step": 360 }, { "epoch": 0.34, "learning_rate": 4.9957555970149255e-05, "loss": 0.1626, "step": 364 }, { "epoch": 0.34, "learning_rate": 4.99570895522388e-05, "loss": 0.2124, "step": 368 }, { "epoch": 0.35, "learning_rate": 4.9956623134328365e-05, "loss": 0.1981, "step": 372 }, { "epoch": 0.35, "learning_rate": 4.995615671641791e-05, "loss": 0.1941, "step": 376 }, { "epoch": 0.35, "learning_rate": 4.995569029850746e-05, "loss": 0.1985, "step": 380 }, { "epoch": 0.36, "learning_rate": 4.9955223880597016e-05, "loss": 0.2346, "step": 384 }, { "epoch": 0.36, "learning_rate": 4.995475746268657e-05, "loss": 0.1867, "step": 388 }, { "epoch": 0.37, "learning_rate": 4.995429104477612e-05, "loss": 0.1955, "step": 392 }, { "epoch": 0.37, "learning_rate": 4.9953824626865674e-05, "loss": 0.2611, "step": 396 }, { "epoch": 0.37, "learning_rate": 4.995335820895522e-05, "loss": 0.2636, "step": 400 }, { "epoch": 0.38, "learning_rate": 4.995289179104478e-05, "loss": 0.2358, "step": 404 }, { "epoch": 0.38, "learning_rate": 4.995242537313433e-05, "loss": 0.2215, "step": 408 }, { "epoch": 0.38, "learning_rate": 4.995195895522388e-05, "loss": 0.1965, "step": 412 }, { "epoch": 0.39, "learning_rate": 4.9951492537313435e-05, "loss": 0.2427, "step": 416 }, { "epoch": 0.39, "learning_rate": 4.995102611940299e-05, "loss": 0.1671, "step": 420 }, { "epoch": 0.4, "learning_rate": 4.995055970149254e-05, "loss": 0.1296, "step": 424 }, { "epoch": 0.4, "learning_rate": 4.995009328358209e-05, "loss": 0.2114, "step": 428 }, { "epoch": 0.4, "learning_rate": 4.994962686567165e-05, "loss": 0.1675, "step": 432 }, { "epoch": 0.41, "learning_rate": 4.9949160447761196e-05, "loss": 0.162, "step": 436 }, { "epoch": 0.41, "learning_rate": 4.994869402985075e-05, "loss": 0.1415, "step": 440 }, { "epoch": 0.41, "learning_rate": 4.99482276119403e-05, "loss": 0.148, "step": 444 }, { "epoch": 0.42, "learning_rate": 4.9947761194029854e-05, "loss": 0.1717, "step": 448 }, { "epoch": 0.42, "learning_rate": 4.994729477611941e-05, "loss": 0.157, "step": 452 }, { "epoch": 0.43, "learning_rate": 4.994682835820896e-05, "loss": 0.226, "step": 456 }, { "epoch": 0.43, "learning_rate": 4.9946361940298505e-05, "loss": 0.2185, "step": 460 }, { "epoch": 0.43, "learning_rate": 4.994589552238807e-05, "loss": 0.2423, "step": 464 }, { "epoch": 0.44, "learning_rate": 4.9945429104477615e-05, "loss": 0.2652, "step": 468 }, { "epoch": 0.44, "learning_rate": 4.994496268656716e-05, "loss": 0.1864, "step": 472 }, { "epoch": 0.44, "learning_rate": 4.994449626865672e-05, "loss": 0.1418, "step": 476 }, { "epoch": 0.45, "learning_rate": 4.994402985074627e-05, "loss": 0.1907, "step": 480 }, { "epoch": 0.45, "learning_rate": 4.994356343283582e-05, "loss": 0.1826, "step": 484 }, { "epoch": 0.46, "learning_rate": 4.9943097014925376e-05, "loss": 0.1699, "step": 488 }, { "epoch": 0.46, "learning_rate": 4.994263059701493e-05, "loss": 0.1956, "step": 492 }, { "epoch": 0.46, "learning_rate": 4.994216417910448e-05, "loss": 0.1386, "step": 496 }, { "epoch": 0.47, "learning_rate": 4.9941697761194034e-05, "loss": 0.1573, "step": 500 }, { "epoch": 0.47, "eval_exact_match": 0.4410058027079304, "eval_exec": 0.534816247582205, "eval_loss": 0.1741107553243637, "eval_runtime": 975.177, "eval_samples_per_second": 1.06, "step": 500 }, { "epoch": 0.47, "learning_rate": 4.994123134328358e-05, "loss": 0.165, "step": 504 }, { "epoch": 0.47, "learning_rate": 4.994076492537314e-05, "loss": 0.1845, "step": 508 }, { "epoch": 0.48, "learning_rate": 4.994029850746269e-05, "loss": 0.1452, "step": 512 }, { "epoch": 0.48, "learning_rate": 4.993983208955224e-05, "loss": 0.1927, "step": 516 }, { "epoch": 0.49, "learning_rate": 4.993936567164179e-05, "loss": 0.1153, "step": 520 }, { "epoch": 0.49, "learning_rate": 4.993889925373135e-05, "loss": 0.1446, "step": 524 }, { "epoch": 0.49, "learning_rate": 4.99384328358209e-05, "loss": 0.1632, "step": 528 }, { "epoch": 0.5, "learning_rate": 4.9937966417910446e-05, "loss": 0.1256, "step": 532 }, { "epoch": 0.5, "learning_rate": 4.99375e-05, "loss": 0.2019, "step": 536 }, { "epoch": 0.5, "learning_rate": 4.9937033582089556e-05, "loss": 0.1862, "step": 540 }, { "epoch": 0.51, "learning_rate": 4.9936567164179104e-05, "loss": 0.1931, "step": 544 }, { "epoch": 0.51, "learning_rate": 4.993610074626866e-05, "loss": 0.16, "step": 548 }, { "epoch": 0.51, "learning_rate": 4.9935634328358214e-05, "loss": 0.2193, "step": 552 }, { "epoch": 0.52, "learning_rate": 4.993516791044776e-05, "loss": 0.1208, "step": 556 }, { "epoch": 0.52, "learning_rate": 4.993470149253732e-05, "loss": 0.1524, "step": 560 }, { "epoch": 0.53, "learning_rate": 4.9934235074626865e-05, "loss": 0.1348, "step": 564 }, { "epoch": 0.53, "learning_rate": 4.993376865671642e-05, "loss": 0.1796, "step": 568 }, { "epoch": 0.53, "learning_rate": 4.9933302238805975e-05, "loss": 0.1142, "step": 572 }, { "epoch": 0.54, "learning_rate": 4.993283582089552e-05, "loss": 0.1417, "step": 576 }, { "epoch": 0.54, "learning_rate": 4.993236940298508e-05, "loss": 0.1696, "step": 580 }, { "epoch": 0.54, "learning_rate": 4.993190298507463e-05, "loss": 0.1412, "step": 584 }, { "epoch": 0.55, "learning_rate": 4.993143656716418e-05, "loss": 0.1188, "step": 588 }, { "epoch": 0.55, "learning_rate": 4.9930970149253736e-05, "loss": 0.1944, "step": 592 }, { "epoch": 0.56, "learning_rate": 4.9930503731343284e-05, "loss": 0.0786, "step": 596 }, { "epoch": 0.56, "learning_rate": 4.993003731343284e-05, "loss": 0.1258, "step": 600 }, { "epoch": 0.56, "learning_rate": 4.9929570895522394e-05, "loss": 0.1359, "step": 604 }, { "epoch": 0.57, "learning_rate": 4.992910447761194e-05, "loss": 0.1297, "step": 608 }, { "epoch": 0.57, "learning_rate": 4.99286380597015e-05, "loss": 0.2223, "step": 612 }, { "epoch": 0.57, "learning_rate": 4.992817164179105e-05, "loss": 0.1363, "step": 616 }, { "epoch": 0.58, "learning_rate": 4.99277052238806e-05, "loss": 0.1362, "step": 620 }, { "epoch": 0.58, "learning_rate": 4.992723880597015e-05, "loss": 0.1977, "step": 624 }, { "epoch": 0.59, "learning_rate": 4.99267723880597e-05, "loss": 0.1176, "step": 628 }, { "epoch": 0.59, "learning_rate": 4.992630597014926e-05, "loss": 0.1254, "step": 632 }, { "epoch": 0.59, "learning_rate": 4.9925839552238806e-05, "loss": 0.1825, "step": 636 }, { "epoch": 0.6, "learning_rate": 4.992537313432836e-05, "loss": 0.1363, "step": 640 }, { "epoch": 0.6, "learning_rate": 4.9924906716417916e-05, "loss": 0.1388, "step": 644 }, { "epoch": 0.6, "learning_rate": 4.9924440298507464e-05, "loss": 0.1409, "step": 648 }, { "epoch": 0.61, "learning_rate": 4.992397388059702e-05, "loss": 0.1481, "step": 652 }, { "epoch": 0.61, "learning_rate": 4.992350746268657e-05, "loss": 0.183, "step": 656 }, { "epoch": 0.62, "learning_rate": 4.992304104477612e-05, "loss": 0.1344, "step": 660 }, { "epoch": 0.62, "learning_rate": 4.9922574626865677e-05, "loss": 0.1149, "step": 664 }, { "epoch": 0.62, "learning_rate": 4.9922108208955225e-05, "loss": 0.1584, "step": 668 }, { "epoch": 0.63, "learning_rate": 4.992164179104478e-05, "loss": 0.1784, "step": 672 }, { "epoch": 0.63, "learning_rate": 4.9921175373134335e-05, "loss": 0.066, "step": 676 }, { "epoch": 0.63, "learning_rate": 4.992070895522388e-05, "loss": 0.1293, "step": 680 }, { "epoch": 0.64, "learning_rate": 4.992024253731343e-05, "loss": 0.1601, "step": 684 }, { "epoch": 0.64, "learning_rate": 4.9919776119402986e-05, "loss": 0.1549, "step": 688 }, { "epoch": 0.65, "learning_rate": 4.991930970149254e-05, "loss": 0.102, "step": 692 }, { "epoch": 0.65, "learning_rate": 4.991884328358209e-05, "loss": 0.1538, "step": 696 }, { "epoch": 0.65, "learning_rate": 4.9918376865671644e-05, "loss": 0.1375, "step": 700 }, { "epoch": 0.66, "learning_rate": 4.99179104477612e-05, "loss": 0.1162, "step": 704 }, { "epoch": 0.66, "learning_rate": 4.991744402985075e-05, "loss": 0.1154, "step": 708 }, { "epoch": 0.66, "learning_rate": 4.99169776119403e-05, "loss": 0.2463, "step": 712 }, { "epoch": 0.67, "learning_rate": 4.991651119402985e-05, "loss": 0.1519, "step": 716 }, { "epoch": 0.67, "learning_rate": 4.9916044776119405e-05, "loss": 0.0805, "step": 720 }, { "epoch": 0.68, "learning_rate": 4.991557835820896e-05, "loss": 0.1146, "step": 724 }, { "epoch": 0.68, "learning_rate": 4.991511194029851e-05, "loss": 0.1365, "step": 728 }, { "epoch": 0.68, "learning_rate": 4.991464552238806e-05, "loss": 0.1944, "step": 732 }, { "epoch": 0.69, "learning_rate": 4.991417910447762e-05, "loss": 0.1529, "step": 736 }, { "epoch": 0.69, "learning_rate": 4.9913712686567166e-05, "loss": 0.1301, "step": 740 }, { "epoch": 0.69, "learning_rate": 4.991324626865672e-05, "loss": 0.1387, "step": 744 }, { "epoch": 0.7, "learning_rate": 4.991277985074627e-05, "loss": 0.1241, "step": 748 }, { "epoch": 0.7, "learning_rate": 4.9912313432835824e-05, "loss": 0.1165, "step": 752 }, { "epoch": 0.71, "learning_rate": 4.991184701492538e-05, "loss": 0.1206, "step": 756 }, { "epoch": 0.71, "learning_rate": 4.991138059701493e-05, "loss": 0.1771, "step": 760 }, { "epoch": 0.71, "learning_rate": 4.991091417910448e-05, "loss": 0.1229, "step": 764 }, { "epoch": 0.72, "learning_rate": 4.9910447761194036e-05, "loss": 0.1462, "step": 768 }, { "epoch": 0.72, "learning_rate": 4.9909981343283585e-05, "loss": 0.119, "step": 772 }, { "epoch": 0.72, "learning_rate": 4.990951492537313e-05, "loss": 0.1318, "step": 776 }, { "epoch": 0.73, "learning_rate": 4.9909048507462694e-05, "loss": 0.1514, "step": 780 }, { "epoch": 0.73, "learning_rate": 4.990858208955224e-05, "loss": 0.1043, "step": 784 }, { "epoch": 0.73, "learning_rate": 4.990811567164179e-05, "loss": 0.1466, "step": 788 }, { "epoch": 0.74, "learning_rate": 4.9907649253731346e-05, "loss": 0.1052, "step": 792 }, { "epoch": 0.74, "learning_rate": 4.99071828358209e-05, "loss": 0.1098, "step": 796 }, { "epoch": 0.75, "learning_rate": 4.990671641791045e-05, "loss": 0.2, "step": 800 }, { "epoch": 0.75, "learning_rate": 4.9906250000000004e-05, "loss": 0.1416, "step": 804 }, { "epoch": 0.75, "learning_rate": 4.990578358208955e-05, "loss": 0.1137, "step": 808 }, { "epoch": 0.76, "learning_rate": 4.9905317164179107e-05, "loss": 0.14, "step": 812 }, { "epoch": 0.76, "learning_rate": 4.990485074626866e-05, "loss": 0.1023, "step": 816 }, { "epoch": 0.76, "learning_rate": 4.990438432835821e-05, "loss": 0.1276, "step": 820 }, { "epoch": 0.77, "learning_rate": 4.9903917910447764e-05, "loss": 0.1445, "step": 824 }, { "epoch": 0.77, "learning_rate": 4.990345149253732e-05, "loss": 0.1165, "step": 828 }, { "epoch": 0.78, "learning_rate": 4.990298507462687e-05, "loss": 0.1369, "step": 832 }, { "epoch": 0.78, "learning_rate": 4.9902518656716416e-05, "loss": 0.1656, "step": 836 }, { "epoch": 0.78, "learning_rate": 4.990205223880598e-05, "loss": 0.1383, "step": 840 }, { "epoch": 0.79, "learning_rate": 4.9901585820895525e-05, "loss": 0.1065, "step": 844 }, { "epoch": 0.79, "learning_rate": 4.9901119402985074e-05, "loss": 0.1279, "step": 848 }, { "epoch": 0.79, "learning_rate": 4.990065298507463e-05, "loss": 0.111, "step": 852 }, { "epoch": 0.8, "learning_rate": 4.9900186567164183e-05, "loss": 0.1729, "step": 856 }, { "epoch": 0.8, "learning_rate": 4.989972014925373e-05, "loss": 0.1522, "step": 860 }, { "epoch": 0.81, "learning_rate": 4.9899253731343286e-05, "loss": 0.1157, "step": 864 }, { "epoch": 0.81, "learning_rate": 4.9898787313432835e-05, "loss": 0.0954, "step": 868 }, { "epoch": 0.81, "learning_rate": 4.989832089552239e-05, "loss": 0.1079, "step": 872 }, { "epoch": 0.82, "learning_rate": 4.9897854477611944e-05, "loss": 0.094, "step": 876 }, { "epoch": 0.82, "learning_rate": 4.989738805970149e-05, "loss": 0.1084, "step": 880 }, { "epoch": 0.82, "learning_rate": 4.989692164179105e-05, "loss": 0.1579, "step": 884 }, { "epoch": 0.83, "learning_rate": 4.98964552238806e-05, "loss": 0.1245, "step": 888 }, { "epoch": 0.83, "learning_rate": 4.989598880597015e-05, "loss": 0.1674, "step": 892 }, { "epoch": 0.84, "learning_rate": 4.98955223880597e-05, "loss": 0.0896, "step": 896 }, { "epoch": 0.84, "learning_rate": 4.989505597014926e-05, "loss": 0.1532, "step": 900 }, { "epoch": 0.84, "learning_rate": 4.989458955223881e-05, "loss": 0.1221, "step": 904 }, { "epoch": 0.85, "learning_rate": 4.989412313432836e-05, "loss": 0.1003, "step": 908 }, { "epoch": 0.85, "learning_rate": 4.989365671641791e-05, "loss": 0.149, "step": 912 }, { "epoch": 0.85, "learning_rate": 4.9893190298507466e-05, "loss": 0.1192, "step": 916 }, { "epoch": 0.86, "learning_rate": 4.989272388059702e-05, "loss": 0.122, "step": 920 }, { "epoch": 0.86, "learning_rate": 4.989225746268657e-05, "loss": 0.0945, "step": 924 }, { "epoch": 0.87, "learning_rate": 4.989179104477612e-05, "loss": 0.1248, "step": 928 }, { "epoch": 0.87, "learning_rate": 4.989132462686568e-05, "loss": 0.0974, "step": 932 }, { "epoch": 0.87, "learning_rate": 4.989085820895523e-05, "loss": 0.0956, "step": 936 }, { "epoch": 0.88, "learning_rate": 4.9890391791044775e-05, "loss": 0.1214, "step": 940 }, { "epoch": 0.88, "learning_rate": 4.988992537313433e-05, "loss": 0.1591, "step": 944 }, { "epoch": 0.88, "learning_rate": 4.9889458955223885e-05, "loss": 0.1303, "step": 948 }, { "epoch": 0.89, "learning_rate": 4.9888992537313433e-05, "loss": 0.1071, "step": 952 }, { "epoch": 0.89, "learning_rate": 4.988852611940299e-05, "loss": 0.1266, "step": 956 }, { "epoch": 0.9, "learning_rate": 4.988805970149254e-05, "loss": 0.1142, "step": 960 }, { "epoch": 0.9, "learning_rate": 4.988759328358209e-05, "loss": 0.0919, "step": 964 }, { "epoch": 0.9, "learning_rate": 4.9887126865671646e-05, "loss": 0.1704, "step": 968 }, { "epoch": 0.91, "learning_rate": 4.9886660447761194e-05, "loss": 0.1074, "step": 972 }, { "epoch": 0.91, "learning_rate": 4.988619402985075e-05, "loss": 0.1104, "step": 976 }, { "epoch": 0.91, "learning_rate": 4.9885727611940304e-05, "loss": 0.1298, "step": 980 }, { "epoch": 0.92, "learning_rate": 4.988526119402985e-05, "loss": 0.1069, "step": 984 }, { "epoch": 0.92, "learning_rate": 4.98847947761194e-05, "loss": 0.0986, "step": 988 }, { "epoch": 0.93, "learning_rate": 4.988432835820896e-05, "loss": 0.1057, "step": 992 }, { "epoch": 0.93, "learning_rate": 4.988386194029851e-05, "loss": 0.1513, "step": 996 }, { "epoch": 0.93, "learning_rate": 4.988339552238806e-05, "loss": 0.16, "step": 1000 }, { "epoch": 0.93, "eval_exact_match": 0.597678916827853, "eval_exec": 0.6266924564796905, "eval_loss": 0.14075149595737457, "eval_runtime": 1596.4093, "eval_samples_per_second": 0.648, "step": 1000 }, { "epoch": 0.94, "learning_rate": 4.988292910447761e-05, "loss": 0.1095, "step": 1004 }, { "epoch": 0.94, "learning_rate": 4.988246268656717e-05, "loss": 0.0819, "step": 1008 }, { "epoch": 0.94, "learning_rate": 4.9881996268656716e-05, "loss": 0.1231, "step": 1012 }, { "epoch": 0.95, "learning_rate": 4.988152985074627e-05, "loss": 0.0878, "step": 1016 }, { "epoch": 0.95, "learning_rate": 4.9881063432835826e-05, "loss": 0.1158, "step": 1020 }, { "epoch": 0.96, "learning_rate": 4.9880597014925374e-05, "loss": 0.1312, "step": 1024 }, { "epoch": 0.96, "learning_rate": 4.988013059701493e-05, "loss": 0.1238, "step": 1028 }, { "epoch": 0.96, "learning_rate": 4.987966417910448e-05, "loss": 0.1167, "step": 1032 }, { "epoch": 0.97, "learning_rate": 4.987919776119403e-05, "loss": 0.0814, "step": 1036 }, { "epoch": 0.97, "learning_rate": 4.987873134328359e-05, "loss": 0.0817, "step": 1040 }, { "epoch": 0.97, "learning_rate": 4.9878264925373135e-05, "loss": 0.1221, "step": 1044 }, { "epoch": 0.98, "learning_rate": 4.9877798507462683e-05, "loss": 0.131, "step": 1048 }, { "epoch": 0.98, "learning_rate": 4.9877332089552245e-05, "loss": 0.1129, "step": 1052 }, { "epoch": 0.98, "learning_rate": 4.987686567164179e-05, "loss": 0.1051, "step": 1056 }, { "epoch": 0.99, "learning_rate": 4.987639925373134e-05, "loss": 0.0943, "step": 1060 }, { "epoch": 0.99, "learning_rate": 4.9875932835820896e-05, "loss": 0.0652, "step": 1064 }, { "epoch": 1.0, "learning_rate": 4.987546641791045e-05, "loss": 0.109, "step": 1068 }, { "epoch": 1.0, "learning_rate": 4.9875000000000006e-05, "loss": 0.1217, "step": 1072 }, { "epoch": 1.0, "learning_rate": 4.9874533582089554e-05, "loss": 0.0951, "step": 1076 }, { "epoch": 1.01, "learning_rate": 4.98740671641791e-05, "loss": 0.0788, "step": 1080 }, { "epoch": 1.01, "learning_rate": 4.9873600746268664e-05, "loss": 0.0627, "step": 1084 }, { "epoch": 1.01, "learning_rate": 4.987313432835821e-05, "loss": 0.1035, "step": 1088 }, { "epoch": 1.02, "learning_rate": 4.987266791044776e-05, "loss": 0.0611, "step": 1092 }, { "epoch": 1.02, "learning_rate": 4.9872201492537315e-05, "loss": 0.0593, "step": 1096 }, { "epoch": 1.03, "learning_rate": 4.987173507462687e-05, "loss": 0.0997, "step": 1100 }, { "epoch": 1.03, "learning_rate": 4.987126865671642e-05, "loss": 0.0991, "step": 1104 }, { "epoch": 1.03, "learning_rate": 4.987080223880597e-05, "loss": 0.0917, "step": 1108 }, { "epoch": 1.04, "learning_rate": 4.987033582089553e-05, "loss": 0.0519, "step": 1112 }, { "epoch": 1.04, "learning_rate": 4.9869869402985076e-05, "loss": 0.0923, "step": 1116 }, { "epoch": 1.04, "learning_rate": 4.986940298507463e-05, "loss": 0.071, "step": 1120 }, { "epoch": 1.05, "learning_rate": 4.986893656716418e-05, "loss": 0.0654, "step": 1124 }, { "epoch": 1.05, "learning_rate": 4.9868470149253734e-05, "loss": 0.0823, "step": 1128 }, { "epoch": 1.06, "learning_rate": 4.986800373134329e-05, "loss": 0.0674, "step": 1132 }, { "epoch": 1.06, "learning_rate": 4.986753731343284e-05, "loss": 0.0838, "step": 1136 }, { "epoch": 1.06, "learning_rate": 4.9867070895522385e-05, "loss": 0.115, "step": 1140 }, { "epoch": 1.07, "learning_rate": 4.986660447761195e-05, "loss": 0.0886, "step": 1144 }, { "epoch": 1.07, "learning_rate": 4.9866138059701495e-05, "loss": 0.086, "step": 1148 }, { "epoch": 1.07, "learning_rate": 4.986567164179104e-05, "loss": 0.0902, "step": 1152 }, { "epoch": 1.08, "learning_rate": 4.98652052238806e-05, "loss": 0.0724, "step": 1156 }, { "epoch": 1.08, "learning_rate": 4.986473880597015e-05, "loss": 0.0832, "step": 1160 }, { "epoch": 1.09, "learning_rate": 4.98642723880597e-05, "loss": 0.0507, "step": 1164 }, { "epoch": 1.09, "learning_rate": 4.9863805970149256e-05, "loss": 0.0754, "step": 1168 }, { "epoch": 1.09, "learning_rate": 4.986333955223881e-05, "loss": 0.0738, "step": 1172 }, { "epoch": 1.1, "learning_rate": 4.986287313432836e-05, "loss": 0.0834, "step": 1176 }, { "epoch": 1.1, "learning_rate": 4.9862406716417914e-05, "loss": 0.0867, "step": 1180 }, { "epoch": 1.1, "learning_rate": 4.986194029850746e-05, "loss": 0.0967, "step": 1184 }, { "epoch": 1.11, "learning_rate": 4.986147388059702e-05, "loss": 0.0759, "step": 1188 }, { "epoch": 1.11, "learning_rate": 4.986100746268657e-05, "loss": 0.0858, "step": 1192 }, { "epoch": 1.12, "learning_rate": 4.986054104477612e-05, "loss": 0.143, "step": 1196 }, { "epoch": 1.12, "learning_rate": 4.986007462686567e-05, "loss": 0.0648, "step": 1200 }, { "epoch": 1.12, "learning_rate": 4.985960820895523e-05, "loss": 0.0706, "step": 1204 }, { "epoch": 1.13, "learning_rate": 4.985914179104478e-05, "loss": 0.0687, "step": 1208 }, { "epoch": 1.13, "learning_rate": 4.9858675373134326e-05, "loss": 0.098, "step": 1212 }, { "epoch": 1.13, "learning_rate": 4.985820895522388e-05, "loss": 0.0809, "step": 1216 }, { "epoch": 1.14, "learning_rate": 4.9857742537313436e-05, "loss": 0.0634, "step": 1220 }, { "epoch": 1.14, "learning_rate": 4.9857276119402984e-05, "loss": 0.0899, "step": 1224 }, { "epoch": 1.15, "learning_rate": 4.985680970149254e-05, "loss": 0.0905, "step": 1228 }, { "epoch": 1.15, "learning_rate": 4.9856343283582094e-05, "loss": 0.0669, "step": 1232 }, { "epoch": 1.15, "learning_rate": 4.985587686567165e-05, "loss": 0.0898, "step": 1236 }, { "epoch": 1.16, "learning_rate": 4.98554104477612e-05, "loss": 0.1037, "step": 1240 }, { "epoch": 1.16, "learning_rate": 4.9854944029850745e-05, "loss": 0.1286, "step": 1244 }, { "epoch": 1.16, "learning_rate": 4.98544776119403e-05, "loss": 0.0882, "step": 1248 }, { "epoch": 1.17, "learning_rate": 4.9854011194029855e-05, "loss": 0.0847, "step": 1252 }, { "epoch": 1.17, "learning_rate": 4.98535447761194e-05, "loss": 0.0579, "step": 1256 }, { "epoch": 1.18, "learning_rate": 4.985307835820896e-05, "loss": 0.0864, "step": 1260 }, { "epoch": 1.18, "learning_rate": 4.985261194029851e-05, "loss": 0.0831, "step": 1264 }, { "epoch": 1.18, "learning_rate": 4.985214552238806e-05, "loss": 0.1051, "step": 1268 }, { "epoch": 1.19, "learning_rate": 4.9851679104477616e-05, "loss": 0.0406, "step": 1272 }, { "epoch": 1.19, "learning_rate": 4.9851212686567164e-05, "loss": 0.1047, "step": 1276 }, { "epoch": 1.19, "learning_rate": 4.985074626865672e-05, "loss": 0.0855, "step": 1280 }, { "epoch": 1.2, "learning_rate": 4.9850279850746274e-05, "loss": 0.1054, "step": 1284 }, { "epoch": 1.2, "learning_rate": 4.984981343283582e-05, "loss": 0.102, "step": 1288 }, { "epoch": 1.21, "learning_rate": 4.984934701492538e-05, "loss": 0.0599, "step": 1292 }, { "epoch": 1.21, "learning_rate": 4.984888059701493e-05, "loss": 0.054, "step": 1296 }, { "epoch": 1.21, "learning_rate": 4.984841417910448e-05, "loss": 0.0712, "step": 1300 }, { "epoch": 1.22, "learning_rate": 4.984794776119403e-05, "loss": 0.0768, "step": 1304 }, { "epoch": 1.22, "learning_rate": 4.984748134328358e-05, "loss": 0.0646, "step": 1308 }, { "epoch": 1.22, "learning_rate": 4.984701492537314e-05, "loss": 0.0775, "step": 1312 }, { "epoch": 1.23, "learning_rate": 4.9846548507462686e-05, "loss": 0.0643, "step": 1316 }, { "epoch": 1.23, "learning_rate": 4.984608208955224e-05, "loss": 0.0972, "step": 1320 }, { "epoch": 1.24, "learning_rate": 4.9845615671641796e-05, "loss": 0.0801, "step": 1324 }, { "epoch": 1.24, "learning_rate": 4.9845149253731344e-05, "loss": 0.114, "step": 1328 }, { "epoch": 1.24, "learning_rate": 4.98446828358209e-05, "loss": 0.0623, "step": 1332 }, { "epoch": 1.25, "learning_rate": 4.984421641791045e-05, "loss": 0.07, "step": 1336 }, { "epoch": 1.25, "learning_rate": 4.984375e-05, "loss": 0.0768, "step": 1340 }, { "epoch": 1.25, "learning_rate": 4.984328358208956e-05, "loss": 0.0684, "step": 1344 }, { "epoch": 1.26, "learning_rate": 4.9842817164179105e-05, "loss": 0.08, "step": 1348 }, { "epoch": 1.26, "learning_rate": 4.984235074626866e-05, "loss": 0.062, "step": 1352 }, { "epoch": 1.26, "learning_rate": 4.9841884328358215e-05, "loss": 0.1145, "step": 1356 }, { "epoch": 1.27, "learning_rate": 4.984141791044776e-05, "loss": 0.088, "step": 1360 }, { "epoch": 1.27, "learning_rate": 4.984095149253731e-05, "loss": 0.0713, "step": 1364 }, { "epoch": 1.28, "learning_rate": 4.9840485074626866e-05, "loss": 0.0372, "step": 1368 }, { "epoch": 1.28, "learning_rate": 4.984001865671642e-05, "loss": 0.0618, "step": 1372 }, { "epoch": 1.28, "learning_rate": 4.983955223880597e-05, "loss": 0.119, "step": 1376 }, { "epoch": 1.29, "learning_rate": 4.9839085820895524e-05, "loss": 0.0979, "step": 1380 }, { "epoch": 1.29, "learning_rate": 4.983861940298508e-05, "loss": 0.056, "step": 1384 }, { "epoch": 1.29, "learning_rate": 4.983815298507463e-05, "loss": 0.0805, "step": 1388 }, { "epoch": 1.3, "learning_rate": 4.983768656716418e-05, "loss": 0.0768, "step": 1392 }, { "epoch": 1.3, "learning_rate": 4.983722014925373e-05, "loss": 0.0488, "step": 1396 }, { "epoch": 1.31, "learning_rate": 4.983675373134329e-05, "loss": 0.0914, "step": 1400 }, { "epoch": 1.31, "learning_rate": 4.983628731343284e-05, "loss": 0.0765, "step": 1404 }, { "epoch": 1.31, "learning_rate": 4.983582089552239e-05, "loss": 0.0862, "step": 1408 }, { "epoch": 1.32, "learning_rate": 4.983535447761194e-05, "loss": 0.0692, "step": 1412 }, { "epoch": 1.32, "learning_rate": 4.98348880597015e-05, "loss": 0.0611, "step": 1416 }, { "epoch": 1.32, "learning_rate": 4.9834421641791046e-05, "loss": 0.0947, "step": 1420 }, { "epoch": 1.33, "learning_rate": 4.98339552238806e-05, "loss": 0.0615, "step": 1424 }, { "epoch": 1.33, "learning_rate": 4.983348880597015e-05, "loss": 0.0774, "step": 1428 }, { "epoch": 1.34, "learning_rate": 4.9833022388059704e-05, "loss": 0.0725, "step": 1432 }, { "epoch": 1.34, "learning_rate": 4.983255597014926e-05, "loss": 0.0784, "step": 1436 }, { "epoch": 1.34, "learning_rate": 4.983208955223881e-05, "loss": 0.061, "step": 1440 }, { "epoch": 1.35, "learning_rate": 4.983162313432836e-05, "loss": 0.1046, "step": 1444 }, { "epoch": 1.35, "learning_rate": 4.983115671641792e-05, "loss": 0.0768, "step": 1448 }, { "epoch": 1.35, "learning_rate": 4.9830690298507465e-05, "loss": 0.0608, "step": 1452 }, { "epoch": 1.36, "learning_rate": 4.983022388059701e-05, "loss": 0.0959, "step": 1456 }, { "epoch": 1.36, "learning_rate": 4.9829757462686575e-05, "loss": 0.0602, "step": 1460 }, { "epoch": 1.37, "learning_rate": 4.982929104477612e-05, "loss": 0.0655, "step": 1464 }, { "epoch": 1.37, "learning_rate": 4.982882462686567e-05, "loss": 0.1009, "step": 1468 }, { "epoch": 1.37, "learning_rate": 4.9828358208955226e-05, "loss": 0.0563, "step": 1472 }, { "epoch": 1.38, "learning_rate": 4.982789179104478e-05, "loss": 0.0763, "step": 1476 }, { "epoch": 1.38, "learning_rate": 4.982742537313433e-05, "loss": 0.0651, "step": 1480 }, { "epoch": 1.38, "learning_rate": 4.9826958955223884e-05, "loss": 0.0346, "step": 1484 }, { "epoch": 1.39, "learning_rate": 4.982649253731343e-05, "loss": 0.0437, "step": 1488 }, { "epoch": 1.39, "learning_rate": 4.982602611940299e-05, "loss": 0.0944, "step": 1492 }, { "epoch": 1.4, "learning_rate": 4.982555970149254e-05, "loss": 0.0718, "step": 1496 }, { "epoch": 1.4, "learning_rate": 4.982509328358209e-05, "loss": 0.098, "step": 1500 }, { "epoch": 1.4, "eval_exact_match": 0.660541586073501, "eval_exec": 0.6818181818181818, "eval_loss": 0.1437511444091797, "eval_runtime": 1123.3017, "eval_samples_per_second": 0.921, "step": 1500 }, { "epoch": 1.4, "learning_rate": 4.9824626865671645e-05, "loss": 0.087, "step": 1504 }, { "epoch": 1.41, "learning_rate": 4.98241604477612e-05, "loss": 0.0929, "step": 1508 }, { "epoch": 1.41, "learning_rate": 4.982369402985075e-05, "loss": 0.1199, "step": 1512 }, { "epoch": 1.41, "learning_rate": 4.9823227611940296e-05, "loss": 0.0896, "step": 1516 }, { "epoch": 1.42, "learning_rate": 4.982276119402986e-05, "loss": 0.0901, "step": 1520 }, { "epoch": 1.42, "learning_rate": 4.9822294776119406e-05, "loss": 0.0724, "step": 1524 }, { "epoch": 1.43, "learning_rate": 4.9821828358208954e-05, "loss": 0.067, "step": 1528 }, { "epoch": 1.43, "learning_rate": 4.982136194029851e-05, "loss": 0.0706, "step": 1532 }, { "epoch": 1.43, "learning_rate": 4.9820895522388064e-05, "loss": 0.0507, "step": 1536 }, { "epoch": 1.44, "learning_rate": 4.982042910447761e-05, "loss": 0.0632, "step": 1540 }, { "epoch": 1.44, "learning_rate": 4.981996268656717e-05, "loss": 0.0833, "step": 1544 }, { "epoch": 1.44, "learning_rate": 4.9819496268656715e-05, "loss": 0.1091, "step": 1548 }, { "epoch": 1.45, "learning_rate": 4.981902985074627e-05, "loss": 0.0629, "step": 1552 }, { "epoch": 1.45, "learning_rate": 4.9818563432835825e-05, "loss": 0.1016, "step": 1556 }, { "epoch": 1.46, "learning_rate": 4.981809701492537e-05, "loss": 0.0879, "step": 1560 }, { "epoch": 1.46, "learning_rate": 4.981763059701493e-05, "loss": 0.0938, "step": 1564 }, { "epoch": 1.46, "learning_rate": 4.981716417910448e-05, "loss": 0.0596, "step": 1568 }, { "epoch": 1.47, "learning_rate": 4.981669776119403e-05, "loss": 0.1145, "step": 1572 }, { "epoch": 1.47, "learning_rate": 4.9816231343283586e-05, "loss": 0.0723, "step": 1576 }, { "epoch": 1.47, "learning_rate": 4.981576492537314e-05, "loss": 0.0335, "step": 1580 }, { "epoch": 1.48, "learning_rate": 4.981529850746269e-05, "loss": 0.0567, "step": 1584 }, { "epoch": 1.48, "learning_rate": 4.9814832089552244e-05, "loss": 0.0653, "step": 1588 }, { "epoch": 1.49, "learning_rate": 4.981436567164179e-05, "loss": 0.0643, "step": 1592 }, { "epoch": 1.49, "learning_rate": 4.981389925373135e-05, "loss": 0.0719, "step": 1596 }, { "epoch": 1.49, "learning_rate": 4.98134328358209e-05, "loss": 0.0477, "step": 1600 }, { "epoch": 1.5, "learning_rate": 4.981296641791045e-05, "loss": 0.0702, "step": 1604 }, { "epoch": 1.5, "learning_rate": 4.98125e-05, "loss": 0.0685, "step": 1608 }, { "epoch": 1.5, "learning_rate": 4.981203358208956e-05, "loss": 0.0908, "step": 1612 }, { "epoch": 1.51, "learning_rate": 4.981156716417911e-05, "loss": 0.1234, "step": 1616 }, { "epoch": 1.51, "learning_rate": 4.9811100746268656e-05, "loss": 0.0749, "step": 1620 }, { "epoch": 1.51, "learning_rate": 4.981063432835821e-05, "loss": 0.0627, "step": 1624 }, { "epoch": 1.52, "learning_rate": 4.9810167910447766e-05, "loss": 0.0642, "step": 1628 }, { "epoch": 1.52, "learning_rate": 4.9809701492537314e-05, "loss": 0.0632, "step": 1632 }, { "epoch": 1.53, "learning_rate": 4.980923507462687e-05, "loss": 0.0874, "step": 1636 }, { "epoch": 1.53, "learning_rate": 4.9808768656716424e-05, "loss": 0.1122, "step": 1640 }, { "epoch": 1.53, "learning_rate": 4.980830223880597e-05, "loss": 0.0583, "step": 1644 }, { "epoch": 1.54, "learning_rate": 4.9807835820895527e-05, "loss": 0.0584, "step": 1648 }, { "epoch": 1.54, "learning_rate": 4.9807369402985075e-05, "loss": 0.1076, "step": 1652 }, { "epoch": 1.54, "learning_rate": 4.980690298507463e-05, "loss": 0.0854, "step": 1656 }, { "epoch": 1.55, "learning_rate": 4.9806436567164185e-05, "loss": 0.0862, "step": 1660 }, { "epoch": 1.55, "learning_rate": 4.980597014925373e-05, "loss": 0.0705, "step": 1664 }, { "epoch": 1.56, "learning_rate": 4.980550373134328e-05, "loss": 0.0803, "step": 1668 }, { "epoch": 1.56, "learning_rate": 4.980503731343284e-05, "loss": 0.0387, "step": 1672 }, { "epoch": 1.56, "learning_rate": 4.980457089552239e-05, "loss": 0.0556, "step": 1676 }, { "epoch": 1.57, "learning_rate": 4.980410447761194e-05, "loss": 0.0559, "step": 1680 }, { "epoch": 1.57, "learning_rate": 4.9803638059701494e-05, "loss": 0.0766, "step": 1684 }, { "epoch": 1.57, "learning_rate": 4.980317164179105e-05, "loss": 0.0778, "step": 1688 }, { "epoch": 1.58, "learning_rate": 4.98027052238806e-05, "loss": 0.0976, "step": 1692 }, { "epoch": 1.58, "learning_rate": 4.980223880597015e-05, "loss": 0.0818, "step": 1696 }, { "epoch": 1.59, "learning_rate": 4.9801772388059707e-05, "loss": 0.0597, "step": 1700 }, { "epoch": 1.59, "learning_rate": 4.9801305970149255e-05, "loss": 0.0626, "step": 1704 }, { "epoch": 1.59, "learning_rate": 4.980083955223881e-05, "loss": 0.117, "step": 1708 }, { "epoch": 1.6, "learning_rate": 4.980037313432836e-05, "loss": 0.0725, "step": 1712 }, { "epoch": 1.6, "learning_rate": 4.979990671641791e-05, "loss": 0.0788, "step": 1716 }, { "epoch": 1.6, "learning_rate": 4.979944029850747e-05, "loss": 0.0874, "step": 1720 }, { "epoch": 1.61, "learning_rate": 4.9798973880597016e-05, "loss": 0.0781, "step": 1724 }, { "epoch": 1.61, "learning_rate": 4.979850746268657e-05, "loss": 0.0971, "step": 1728 }, { "epoch": 1.62, "learning_rate": 4.9798041044776125e-05, "loss": 0.1099, "step": 1732 }, { "epoch": 1.62, "learning_rate": 4.9797574626865674e-05, "loss": 0.0748, "step": 1736 }, { "epoch": 1.62, "learning_rate": 4.979710820895523e-05, "loss": 0.073, "step": 1740 }, { "epoch": 1.63, "learning_rate": 4.9796641791044777e-05, "loss": 0.0584, "step": 1744 }, { "epoch": 1.63, "learning_rate": 4.979617537313433e-05, "loss": 0.0749, "step": 1748 }, { "epoch": 1.63, "learning_rate": 4.9795708955223886e-05, "loss": 0.0712, "step": 1752 }, { "epoch": 1.64, "learning_rate": 4.9795242537313435e-05, "loss": 0.082, "step": 1756 }, { "epoch": 1.64, "learning_rate": 4.979477611940298e-05, "loss": 0.0701, "step": 1760 }, { "epoch": 1.65, "learning_rate": 4.9794309701492544e-05, "loss": 0.0424, "step": 1764 }, { "epoch": 1.65, "learning_rate": 4.979384328358209e-05, "loss": 0.1098, "step": 1768 }, { "epoch": 1.65, "learning_rate": 4.979337686567164e-05, "loss": 0.0737, "step": 1772 }, { "epoch": 1.66, "learning_rate": 4.9792910447761196e-05, "loss": 0.0571, "step": 1776 }, { "epoch": 1.66, "learning_rate": 4.979244402985075e-05, "loss": 0.0605, "step": 1780 }, { "epoch": 1.66, "learning_rate": 4.97919776119403e-05, "loss": 0.0686, "step": 1784 }, { "epoch": 1.67, "learning_rate": 4.9791511194029853e-05, "loss": 0.0497, "step": 1788 }, { "epoch": 1.67, "learning_rate": 4.979104477611941e-05, "loss": 0.0528, "step": 1792 }, { "epoch": 1.68, "learning_rate": 4.9790578358208957e-05, "loss": 0.0618, "step": 1796 }, { "epoch": 1.68, "learning_rate": 4.979011194029851e-05, "loss": 0.0575, "step": 1800 }, { "epoch": 1.68, "learning_rate": 4.978964552238806e-05, "loss": 0.0448, "step": 1804 }, { "epoch": 1.69, "learning_rate": 4.9789179104477614e-05, "loss": 0.0505, "step": 1808 }, { "epoch": 1.69, "learning_rate": 4.978871268656717e-05, "loss": 0.0813, "step": 1812 }, { "epoch": 1.69, "learning_rate": 4.978824626865672e-05, "loss": 0.0487, "step": 1816 }, { "epoch": 1.7, "learning_rate": 4.9787779850746266e-05, "loss": 0.0912, "step": 1820 }, { "epoch": 1.7, "learning_rate": 4.978731343283583e-05, "loss": 0.0839, "step": 1824 }, { "epoch": 1.71, "learning_rate": 4.9786847014925375e-05, "loss": 0.0734, "step": 1828 }, { "epoch": 1.71, "learning_rate": 4.9786380597014924e-05, "loss": 0.0664, "step": 1832 }, { "epoch": 1.71, "learning_rate": 4.978591417910448e-05, "loss": 0.0435, "step": 1836 }, { "epoch": 1.72, "learning_rate": 4.9785447761194033e-05, "loss": 0.0893, "step": 1840 }, { "epoch": 1.72, "learning_rate": 4.978498134328358e-05, "loss": 0.0807, "step": 1844 }, { "epoch": 1.72, "learning_rate": 4.9784514925373136e-05, "loss": 0.0587, "step": 1848 }, { "epoch": 1.73, "learning_rate": 4.978404850746269e-05, "loss": 0.072, "step": 1852 }, { "epoch": 1.73, "learning_rate": 4.978358208955224e-05, "loss": 0.0707, "step": 1856 }, { "epoch": 1.73, "learning_rate": 4.9783115671641794e-05, "loss": 0.0658, "step": 1860 }, { "epoch": 1.74, "learning_rate": 4.978264925373134e-05, "loss": 0.0782, "step": 1864 }, { "epoch": 1.74, "learning_rate": 4.97821828358209e-05, "loss": 0.124, "step": 1868 }, { "epoch": 1.75, "learning_rate": 4.978171641791045e-05, "loss": 0.0764, "step": 1872 }, { "epoch": 1.75, "learning_rate": 4.978125e-05, "loss": 0.0901, "step": 1876 }, { "epoch": 1.75, "learning_rate": 4.9780783582089555e-05, "loss": 0.0531, "step": 1880 }, { "epoch": 1.76, "learning_rate": 4.978031716417911e-05, "loss": 0.0964, "step": 1884 }, { "epoch": 1.76, "learning_rate": 4.977985074626866e-05, "loss": 0.0847, "step": 1888 }, { "epoch": 1.76, "learning_rate": 4.977938432835821e-05, "loss": 0.0643, "step": 1892 }, { "epoch": 1.77, "learning_rate": 4.977891791044776e-05, "loss": 0.0743, "step": 1896 }, { "epoch": 1.77, "learning_rate": 4.9778451492537316e-05, "loss": 0.0533, "step": 1900 }, { "epoch": 1.78, "learning_rate": 4.977798507462687e-05, "loss": 0.0736, "step": 1904 }, { "epoch": 1.78, "learning_rate": 4.977751865671642e-05, "loss": 0.0535, "step": 1908 }, { "epoch": 1.78, "learning_rate": 4.9777052238805974e-05, "loss": 0.0636, "step": 1912 }, { "epoch": 1.79, "learning_rate": 4.977658582089553e-05, "loss": 0.0381, "step": 1916 }, { "epoch": 1.79, "learning_rate": 4.977611940298508e-05, "loss": 0.0298, "step": 1920 }, { "epoch": 1.79, "learning_rate": 4.9775652985074625e-05, "loss": 0.0838, "step": 1924 }, { "epoch": 1.8, "learning_rate": 4.977518656716418e-05, "loss": 0.1048, "step": 1928 }, { "epoch": 1.8, "learning_rate": 4.9774720149253735e-05, "loss": 0.031, "step": 1932 }, { "epoch": 1.81, "learning_rate": 4.9774253731343283e-05, "loss": 0.0617, "step": 1936 }, { "epoch": 1.81, "learning_rate": 4.977378731343284e-05, "loss": 0.1009, "step": 1940 }, { "epoch": 1.81, "learning_rate": 4.977332089552239e-05, "loss": 0.057, "step": 1944 }, { "epoch": 1.82, "learning_rate": 4.977285447761194e-05, "loss": 0.0519, "step": 1948 }, { "epoch": 1.82, "learning_rate": 4.9772388059701496e-05, "loss": 0.061, "step": 1952 }, { "epoch": 1.82, "learning_rate": 4.9771921641791044e-05, "loss": 0.1017, "step": 1956 }, { "epoch": 1.83, "learning_rate": 4.97714552238806e-05, "loss": 0.0552, "step": 1960 }, { "epoch": 1.83, "learning_rate": 4.9770988805970154e-05, "loss": 0.0943, "step": 1964 }, { "epoch": 1.84, "learning_rate": 4.97705223880597e-05, "loss": 0.0545, "step": 1968 }, { "epoch": 1.84, "learning_rate": 4.977005597014926e-05, "loss": 0.0931, "step": 1972 }, { "epoch": 1.84, "learning_rate": 4.976958955223881e-05, "loss": 0.0426, "step": 1976 }, { "epoch": 1.85, "learning_rate": 4.976912313432836e-05, "loss": 0.0939, "step": 1980 }, { "epoch": 1.85, "learning_rate": 4.976865671641791e-05, "loss": 0.086, "step": 1984 }, { "epoch": 1.85, "learning_rate": 4.976819029850746e-05, "loss": 0.0391, "step": 1988 }, { "epoch": 1.86, "learning_rate": 4.976772388059702e-05, "loss": 0.0389, "step": 1992 }, { "epoch": 1.86, "learning_rate": 4.9767257462686566e-05, "loss": 0.0725, "step": 1996 }, { "epoch": 1.87, "learning_rate": 4.976679104477612e-05, "loss": 0.074, "step": 2000 }, { "epoch": 1.87, "eval_exact_match": 0.6721470019342359, "eval_exec": 0.7147001934235977, "eval_loss": 0.14614403247833252, "eval_runtime": 1069.1464, "eval_samples_per_second": 0.967, "step": 2000 }, { "epoch": 1.87, "learning_rate": 4.9766324626865676e-05, "loss": 0.0657, "step": 2004 }, { "epoch": 1.87, "learning_rate": 4.9765858208955224e-05, "loss": 0.0452, "step": 2008 }, { "epoch": 1.88, "learning_rate": 4.976539179104478e-05, "loss": 0.0343, "step": 2012 }, { "epoch": 1.88, "learning_rate": 4.976492537313433e-05, "loss": 0.0896, "step": 2016 }, { "epoch": 1.88, "learning_rate": 4.976445895522388e-05, "loss": 0.043, "step": 2020 }, { "epoch": 1.89, "learning_rate": 4.976399253731344e-05, "loss": 0.0543, "step": 2024 }, { "epoch": 1.89, "learning_rate": 4.9763526119402985e-05, "loss": 0.0976, "step": 2028 }, { "epoch": 1.9, "learning_rate": 4.976305970149254e-05, "loss": 0.0623, "step": 2032 }, { "epoch": 1.9, "learning_rate": 4.9762593283582095e-05, "loss": 0.0817, "step": 2036 }, { "epoch": 1.9, "learning_rate": 4.976212686567164e-05, "loss": 0.0442, "step": 2040 }, { "epoch": 1.91, "learning_rate": 4.97616604477612e-05, "loss": 0.0688, "step": 2044 }, { "epoch": 1.91, "learning_rate": 4.9761194029850746e-05, "loss": 0.0729, "step": 2048 }, { "epoch": 1.91, "learning_rate": 4.97607276119403e-05, "loss": 0.055, "step": 2052 }, { "epoch": 1.92, "learning_rate": 4.9760261194029856e-05, "loss": 0.055, "step": 2056 }, { "epoch": 1.92, "learning_rate": 4.9759794776119404e-05, "loss": 0.0533, "step": 2060 }, { "epoch": 1.93, "learning_rate": 4.975932835820896e-05, "loss": 0.0656, "step": 2064 }, { "epoch": 1.93, "learning_rate": 4.9758861940298514e-05, "loss": 0.0534, "step": 2068 }, { "epoch": 1.93, "learning_rate": 4.975839552238806e-05, "loss": 0.0565, "step": 2072 }, { "epoch": 1.94, "learning_rate": 4.975792910447761e-05, "loss": 0.0608, "step": 2076 }, { "epoch": 1.94, "learning_rate": 4.975746268656717e-05, "loss": 0.0383, "step": 2080 }, { "epoch": 1.94, "learning_rate": 4.975699626865672e-05, "loss": 0.1284, "step": 2084 }, { "epoch": 1.95, "learning_rate": 4.975652985074627e-05, "loss": 0.0556, "step": 2088 }, { "epoch": 1.95, "learning_rate": 4.975606343283582e-05, "loss": 0.0666, "step": 2092 }, { "epoch": 1.96, "learning_rate": 4.975559701492538e-05, "loss": 0.0705, "step": 2096 }, { "epoch": 1.96, "learning_rate": 4.9755130597014926e-05, "loss": 0.0421, "step": 2100 }, { "epoch": 1.96, "learning_rate": 4.975466417910448e-05, "loss": 0.0666, "step": 2104 }, { "epoch": 1.97, "learning_rate": 4.975419776119403e-05, "loss": 0.0333, "step": 2108 }, { "epoch": 1.97, "learning_rate": 4.9753731343283584e-05, "loss": 0.05, "step": 2112 }, { "epoch": 1.97, "learning_rate": 4.975326492537314e-05, "loss": 0.059, "step": 2116 }, { "epoch": 1.98, "learning_rate": 4.975279850746269e-05, "loss": 0.0567, "step": 2120 }, { "epoch": 1.98, "learning_rate": 4.975233208955224e-05, "loss": 0.0447, "step": 2124 }, { "epoch": 1.98, "learning_rate": 4.97518656716418e-05, "loss": 0.0653, "step": 2128 }, { "epoch": 1.99, "learning_rate": 4.9751399253731345e-05, "loss": 0.0653, "step": 2132 }, { "epoch": 1.99, "learning_rate": 4.975093283582089e-05, "loss": 0.0908, "step": 2136 }, { "epoch": 2.0, "learning_rate": 4.9750466417910455e-05, "loss": 0.086, "step": 2140 }, { "epoch": 2.0, "learning_rate": 4.975e-05, "loss": 0.0909, "step": 2144 }, { "epoch": 2.0, "learning_rate": 4.974953358208955e-05, "loss": 0.0282, "step": 2148 }, { "epoch": 2.01, "learning_rate": 4.9749067164179106e-05, "loss": 0.0412, "step": 2152 }, { "epoch": 2.01, "learning_rate": 4.974860074626866e-05, "loss": 0.0324, "step": 2156 }, { "epoch": 2.01, "learning_rate": 4.974813432835821e-05, "loss": 0.0381, "step": 2160 }, { "epoch": 2.02, "learning_rate": 4.9747667910447764e-05, "loss": 0.0609, "step": 2164 }, { "epoch": 2.02, "learning_rate": 4.974720149253731e-05, "loss": 0.0252, "step": 2168 }, { "epoch": 2.03, "learning_rate": 4.974673507462687e-05, "loss": 0.0282, "step": 2172 }, { "epoch": 2.03, "learning_rate": 4.974626865671642e-05, "loss": 0.0329, "step": 2176 }, { "epoch": 2.03, "learning_rate": 4.974580223880597e-05, "loss": 0.0479, "step": 2180 }, { "epoch": 2.04, "learning_rate": 4.9745335820895525e-05, "loss": 0.0323, "step": 2184 }, { "epoch": 2.04, "learning_rate": 4.974486940298508e-05, "loss": 0.0309, "step": 2188 }, { "epoch": 2.04, "learning_rate": 4.974440298507463e-05, "loss": 0.0482, "step": 2192 }, { "epoch": 2.05, "learning_rate": 4.9743936567164176e-05, "loss": 0.0378, "step": 2196 }, { "epoch": 2.05, "learning_rate": 4.974347014925374e-05, "loss": 0.057, "step": 2200 }, { "epoch": 2.06, "learning_rate": 4.9743003731343286e-05, "loss": 0.0485, "step": 2204 }, { "epoch": 2.06, "learning_rate": 4.974253731343284e-05, "loss": 0.0409, "step": 2208 }, { "epoch": 2.06, "learning_rate": 4.974207089552239e-05, "loss": 0.0497, "step": 2212 }, { "epoch": 2.07, "learning_rate": 4.9741604477611944e-05, "loss": 0.0286, "step": 2216 }, { "epoch": 2.07, "learning_rate": 4.97411380597015e-05, "loss": 0.0323, "step": 2220 }, { "epoch": 2.07, "learning_rate": 4.974067164179105e-05, "loss": 0.0414, "step": 2224 }, { "epoch": 2.08, "learning_rate": 4.9740205223880595e-05, "loss": 0.0469, "step": 2228 }, { "epoch": 2.08, "learning_rate": 4.973973880597016e-05, "loss": 0.0267, "step": 2232 }, { "epoch": 2.09, "learning_rate": 4.9739272388059705e-05, "loss": 0.0438, "step": 2236 }, { "epoch": 2.09, "learning_rate": 4.973880597014925e-05, "loss": 0.0423, "step": 2240 }, { "epoch": 2.09, "learning_rate": 4.973833955223881e-05, "loss": 0.0424, "step": 2244 }, { "epoch": 2.1, "learning_rate": 4.973787313432836e-05, "loss": 0.0562, "step": 2248 }, { "epoch": 2.1, "learning_rate": 4.973740671641791e-05, "loss": 0.0538, "step": 2252 }, { "epoch": 2.1, "learning_rate": 4.9736940298507466e-05, "loss": 0.0554, "step": 2256 }, { "epoch": 2.11, "learning_rate": 4.973647388059702e-05, "loss": 0.0595, "step": 2260 }, { "epoch": 2.11, "learning_rate": 4.973600746268657e-05, "loss": 0.0652, "step": 2264 }, { "epoch": 2.12, "learning_rate": 4.9735541044776124e-05, "loss": 0.05, "step": 2268 }, { "epoch": 2.12, "learning_rate": 4.973507462686567e-05, "loss": 0.0699, "step": 2272 }, { "epoch": 2.12, "learning_rate": 4.973460820895523e-05, "loss": 0.0484, "step": 2276 }, { "epoch": 2.13, "learning_rate": 4.973414179104478e-05, "loss": 0.0502, "step": 2280 }, { "epoch": 2.13, "learning_rate": 4.973367537313433e-05, "loss": 0.0437, "step": 2284 }, { "epoch": 2.13, "learning_rate": 4.973320895522388e-05, "loss": 0.072, "step": 2288 }, { "epoch": 2.14, "learning_rate": 4.973274253731344e-05, "loss": 0.0548, "step": 2292 }, { "epoch": 2.14, "learning_rate": 4.973227611940299e-05, "loss": 0.0734, "step": 2296 }, { "epoch": 2.15, "learning_rate": 4.9731809701492536e-05, "loss": 0.0451, "step": 2300 }, { "epoch": 2.15, "learning_rate": 4.973134328358209e-05, "loss": 0.0202, "step": 2304 }, { "epoch": 2.15, "learning_rate": 4.9730876865671646e-05, "loss": 0.0281, "step": 2308 }, { "epoch": 2.16, "learning_rate": 4.9730410447761194e-05, "loss": 0.0335, "step": 2312 }, { "epoch": 2.16, "learning_rate": 4.972994402985075e-05, "loss": 0.042, "step": 2316 }, { "epoch": 2.16, "learning_rate": 4.9729477611940304e-05, "loss": 0.0429, "step": 2320 }, { "epoch": 2.17, "learning_rate": 4.972901119402985e-05, "loss": 0.0548, "step": 2324 }, { "epoch": 2.17, "learning_rate": 4.972854477611941e-05, "loss": 0.0355, "step": 2328 }, { "epoch": 2.18, "learning_rate": 4.9728078358208955e-05, "loss": 0.0399, "step": 2332 }, { "epoch": 2.18, "learning_rate": 4.972761194029851e-05, "loss": 0.0273, "step": 2336 }, { "epoch": 2.18, "learning_rate": 4.9727145522388065e-05, "loss": 0.0439, "step": 2340 }, { "epoch": 2.19, "learning_rate": 4.972667910447761e-05, "loss": 0.0303, "step": 2344 }, { "epoch": 2.19, "learning_rate": 4.972621268656716e-05, "loss": 0.0439, "step": 2348 }, { "epoch": 2.19, "learning_rate": 4.972574626865672e-05, "loss": 0.0417, "step": 2352 }, { "epoch": 2.2, "learning_rate": 4.972527985074627e-05, "loss": 0.0286, "step": 2356 }, { "epoch": 2.2, "learning_rate": 4.972481343283582e-05, "loss": 0.0292, "step": 2360 }, { "epoch": 2.21, "learning_rate": 4.9724347014925374e-05, "loss": 0.0515, "step": 2364 }, { "epoch": 2.21, "learning_rate": 4.972388059701493e-05, "loss": 0.0473, "step": 2368 }, { "epoch": 2.21, "learning_rate": 4.9723414179104484e-05, "loss": 0.0406, "step": 2372 }, { "epoch": 2.22, "learning_rate": 4.972294776119403e-05, "loss": 0.0629, "step": 2376 }, { "epoch": 2.22, "learning_rate": 4.972248134328359e-05, "loss": 0.0504, "step": 2380 }, { "epoch": 2.22, "learning_rate": 4.972201492537314e-05, "loss": 0.0267, "step": 2384 }, { "epoch": 2.23, "learning_rate": 4.972154850746269e-05, "loss": 0.0302, "step": 2388 }, { "epoch": 2.23, "learning_rate": 4.972108208955224e-05, "loss": 0.0223, "step": 2392 }, { "epoch": 2.24, "learning_rate": 4.972061567164179e-05, "loss": 0.0636, "step": 2396 }, { "epoch": 2.24, "learning_rate": 4.972014925373135e-05, "loss": 0.0347, "step": 2400 }, { "epoch": 2.24, "learning_rate": 4.9719682835820896e-05, "loss": 0.0412, "step": 2404 }, { "epoch": 2.25, "learning_rate": 4.971921641791045e-05, "loss": 0.0416, "step": 2408 }, { "epoch": 2.25, "learning_rate": 4.9718750000000006e-05, "loss": 0.0442, "step": 2412 }, { "epoch": 2.25, "learning_rate": 4.9718283582089554e-05, "loss": 0.0268, "step": 2416 }, { "epoch": 2.26, "learning_rate": 4.971781716417911e-05, "loss": 0.043, "step": 2420 }, { "epoch": 2.26, "learning_rate": 4.971735074626866e-05, "loss": 0.0315, "step": 2424 }, { "epoch": 2.26, "learning_rate": 4.971688432835821e-05, "loss": 0.0238, "step": 2428 }, { "epoch": 2.27, "learning_rate": 4.971641791044777e-05, "loss": 0.0405, "step": 2432 }, { "epoch": 2.27, "learning_rate": 4.9715951492537315e-05, "loss": 0.0445, "step": 2436 }, { "epoch": 2.28, "learning_rate": 4.971548507462686e-05, "loss": 0.0537, "step": 2440 }, { "epoch": 2.28, "learning_rate": 4.9715018656716425e-05, "loss": 0.0656, "step": 2444 }, { "epoch": 2.28, "learning_rate": 4.971455223880597e-05, "loss": 0.0381, "step": 2448 }, { "epoch": 2.29, "learning_rate": 4.971408582089552e-05, "loss": 0.0455, "step": 2452 }, { "epoch": 2.29, "learning_rate": 4.9713619402985076e-05, "loss": 0.0489, "step": 2456 }, { "epoch": 2.29, "learning_rate": 4.971315298507463e-05, "loss": 0.0505, "step": 2460 }, { "epoch": 2.3, "learning_rate": 4.971268656716418e-05, "loss": 0.0454, "step": 2464 }, { "epoch": 2.3, "learning_rate": 4.9712220149253734e-05, "loss": 0.0409, "step": 2468 }, { "epoch": 2.31, "learning_rate": 4.971175373134329e-05, "loss": 0.0429, "step": 2472 }, { "epoch": 2.31, "learning_rate": 4.971128731343284e-05, "loss": 0.0383, "step": 2476 }, { "epoch": 2.31, "learning_rate": 4.971082089552239e-05, "loss": 0.0349, "step": 2480 }, { "epoch": 2.32, "learning_rate": 4.971035447761194e-05, "loss": 0.0369, "step": 2484 }, { "epoch": 2.32, "learning_rate": 4.9709888059701495e-05, "loss": 0.052, "step": 2488 }, { "epoch": 2.32, "learning_rate": 4.970942164179105e-05, "loss": 0.0564, "step": 2492 }, { "epoch": 2.33, "learning_rate": 4.97089552238806e-05, "loss": 0.0585, "step": 2496 }, { "epoch": 2.33, "learning_rate": 4.9708488805970146e-05, "loss": 0.0382, "step": 2500 }, { "epoch": 2.33, "eval_exact_match": 0.6847195357833655, "eval_exec": 0.7408123791102514, "eval_loss": 0.16533711552619934, "eval_runtime": 1046.3454, "eval_samples_per_second": 0.988, "step": 2500 }, { "epoch": 2.34, "learning_rate": 4.970802238805971e-05, "loss": 0.0608, "step": 2504 }, { "epoch": 2.34, "learning_rate": 4.9707555970149256e-05, "loss": 0.0526, "step": 2508 }, { "epoch": 2.34, "learning_rate": 4.9707089552238804e-05, "loss": 0.0666, "step": 2512 }, { "epoch": 2.35, "learning_rate": 4.970662313432836e-05, "loss": 0.019, "step": 2516 }, { "epoch": 2.35, "learning_rate": 4.9706156716417914e-05, "loss": 0.0384, "step": 2520 }, { "epoch": 2.35, "learning_rate": 4.970569029850746e-05, "loss": 0.0553, "step": 2524 }, { "epoch": 2.36, "learning_rate": 4.970522388059702e-05, "loss": 0.0517, "step": 2528 }, { "epoch": 2.36, "learning_rate": 4.970475746268657e-05, "loss": 0.086, "step": 2532 }, { "epoch": 2.37, "learning_rate": 4.9704291044776127e-05, "loss": 0.0685, "step": 2536 }, { "epoch": 2.37, "learning_rate": 4.9703824626865675e-05, "loss": 0.0479, "step": 2540 }, { "epoch": 2.37, "learning_rate": 4.970335820895522e-05, "loss": 0.0378, "step": 2544 }, { "epoch": 2.38, "learning_rate": 4.9702891791044784e-05, "loss": 0.046, "step": 2548 }, { "epoch": 2.38, "learning_rate": 4.970242537313433e-05, "loss": 0.056, "step": 2552 }, { "epoch": 2.38, "learning_rate": 4.970195895522388e-05, "loss": 0.0294, "step": 2556 }, { "epoch": 2.39, "learning_rate": 4.9701492537313436e-05, "loss": 0.0497, "step": 2560 }, { "epoch": 2.39, "learning_rate": 4.970102611940299e-05, "loss": 0.039, "step": 2564 }, { "epoch": 2.4, "learning_rate": 4.970055970149254e-05, "loss": 0.0395, "step": 2568 }, { "epoch": 2.4, "learning_rate": 4.9700093283582094e-05, "loss": 0.0542, "step": 2572 }, { "epoch": 2.4, "learning_rate": 4.969962686567164e-05, "loss": 0.0596, "step": 2576 }, { "epoch": 2.41, "learning_rate": 4.96991604477612e-05, "loss": 0.0358, "step": 2580 }, { "epoch": 2.41, "learning_rate": 4.969869402985075e-05, "loss": 0.0467, "step": 2584 }, { "epoch": 2.41, "learning_rate": 4.96982276119403e-05, "loss": 0.0431, "step": 2588 }, { "epoch": 2.42, "learning_rate": 4.9697761194029855e-05, "loss": 0.0355, "step": 2592 }, { "epoch": 2.42, "learning_rate": 4.969729477611941e-05, "loss": 0.0251, "step": 2596 }, { "epoch": 2.43, "learning_rate": 4.969682835820896e-05, "loss": 0.0417, "step": 2600 }, { "epoch": 2.43, "learning_rate": 4.9696361940298506e-05, "loss": 0.063, "step": 2604 }, { "epoch": 2.43, "learning_rate": 4.969589552238806e-05, "loss": 0.0454, "step": 2608 }, { "epoch": 2.44, "learning_rate": 4.9695429104477616e-05, "loss": 0.0351, "step": 2612 }, { "epoch": 2.44, "learning_rate": 4.9694962686567164e-05, "loss": 0.0543, "step": 2616 }, { "epoch": 2.44, "learning_rate": 4.969449626865672e-05, "loss": 0.0422, "step": 2620 }, { "epoch": 2.45, "learning_rate": 4.9694029850746274e-05, "loss": 0.0427, "step": 2624 }, { "epoch": 2.45, "learning_rate": 4.969356343283582e-05, "loss": 0.0595, "step": 2628 }, { "epoch": 2.46, "learning_rate": 4.9693097014925377e-05, "loss": 0.0437, "step": 2632 }, { "epoch": 2.46, "learning_rate": 4.9692630597014925e-05, "loss": 0.0358, "step": 2636 }, { "epoch": 2.46, "learning_rate": 4.969216417910448e-05, "loss": 0.041, "step": 2640 }, { "epoch": 2.47, "learning_rate": 4.9691697761194035e-05, "loss": 0.0497, "step": 2644 }, { "epoch": 2.47, "learning_rate": 4.969123134328358e-05, "loss": 0.0494, "step": 2648 }, { "epoch": 2.47, "learning_rate": 4.969076492537314e-05, "loss": 0.0541, "step": 2652 }, { "epoch": 2.48, "learning_rate": 4.969029850746269e-05, "loss": 0.0668, "step": 2656 }, { "epoch": 2.48, "learning_rate": 4.968983208955224e-05, "loss": 0.0314, "step": 2660 }, { "epoch": 2.49, "learning_rate": 4.968936567164179e-05, "loss": 0.0576, "step": 2664 }, { "epoch": 2.49, "learning_rate": 4.9688899253731344e-05, "loss": 0.0514, "step": 2668 }, { "epoch": 2.49, "learning_rate": 4.96884328358209e-05, "loss": 0.0398, "step": 2672 }, { "epoch": 2.5, "learning_rate": 4.968796641791045e-05, "loss": 0.0481, "step": 2676 }, { "epoch": 2.5, "learning_rate": 4.96875e-05, "loss": 0.0431, "step": 2680 }, { "epoch": 2.5, "learning_rate": 4.9687033582089556e-05, "loss": 0.0419, "step": 2684 }, { "epoch": 2.51, "learning_rate": 4.9686567164179105e-05, "loss": 0.0343, "step": 2688 }, { "epoch": 2.51, "learning_rate": 4.968610074626866e-05, "loss": 0.0421, "step": 2692 }, { "epoch": 2.51, "learning_rate": 4.968563432835821e-05, "loss": 0.0585, "step": 2696 }, { "epoch": 2.52, "learning_rate": 4.968516791044777e-05, "loss": 0.0442, "step": 2700 }, { "epoch": 2.52, "learning_rate": 4.968470149253732e-05, "loss": 0.0315, "step": 2704 }, { "epoch": 2.53, "learning_rate": 4.9684235074626866e-05, "loss": 0.0563, "step": 2708 }, { "epoch": 2.53, "learning_rate": 4.968376865671642e-05, "loss": 0.05, "step": 2712 }, { "epoch": 2.53, "learning_rate": 4.9683302238805975e-05, "loss": 0.0356, "step": 2716 }, { "epoch": 2.54, "learning_rate": 4.9682835820895524e-05, "loss": 0.0399, "step": 2720 }, { "epoch": 2.54, "learning_rate": 4.968236940298508e-05, "loss": 0.035, "step": 2724 }, { "epoch": 2.54, "learning_rate": 4.9681902985074627e-05, "loss": 0.0222, "step": 2728 }, { "epoch": 2.55, "learning_rate": 4.968143656716418e-05, "loss": 0.0527, "step": 2732 }, { "epoch": 2.55, "learning_rate": 4.9680970149253736e-05, "loss": 0.0451, "step": 2736 }, { "epoch": 2.56, "learning_rate": 4.9680503731343285e-05, "loss": 0.0488, "step": 2740 }, { "epoch": 2.56, "learning_rate": 4.968003731343284e-05, "loss": 0.0794, "step": 2744 }, { "epoch": 2.56, "learning_rate": 4.9679570895522394e-05, "loss": 0.0571, "step": 2748 }, { "epoch": 2.57, "learning_rate": 4.967910447761194e-05, "loss": 0.0391, "step": 2752 }, { "epoch": 2.57, "learning_rate": 4.967863805970149e-05, "loss": 0.048, "step": 2756 }, { "epoch": 2.57, "learning_rate": 4.967817164179105e-05, "loss": 0.051, "step": 2760 }, { "epoch": 2.58, "learning_rate": 4.96777052238806e-05, "loss": 0.0402, "step": 2764 }, { "epoch": 2.58, "learning_rate": 4.967723880597015e-05, "loss": 0.0392, "step": 2768 }, { "epoch": 2.59, "learning_rate": 4.9676772388059703e-05, "loss": 0.0518, "step": 2772 }, { "epoch": 2.59, "learning_rate": 4.967630597014926e-05, "loss": 0.0494, "step": 2776 }, { "epoch": 2.59, "learning_rate": 4.9675839552238807e-05, "loss": 0.0324, "step": 2780 }, { "epoch": 2.6, "learning_rate": 4.967537313432836e-05, "loss": 0.0357, "step": 2784 }, { "epoch": 2.6, "learning_rate": 4.967490671641791e-05, "loss": 0.0458, "step": 2788 }, { "epoch": 2.6, "learning_rate": 4.9674440298507464e-05, "loss": 0.0593, "step": 2792 }, { "epoch": 2.61, "learning_rate": 4.967397388059702e-05, "loss": 0.0454, "step": 2796 }, { "epoch": 2.61, "learning_rate": 4.967350746268657e-05, "loss": 0.0629, "step": 2800 }, { "epoch": 2.62, "learning_rate": 4.967304104477612e-05, "loss": 0.0287, "step": 2804 }, { "epoch": 2.62, "learning_rate": 4.967257462686568e-05, "loss": 0.0413, "step": 2808 }, { "epoch": 2.62, "learning_rate": 4.9672108208955225e-05, "loss": 0.0231, "step": 2812 }, { "epoch": 2.63, "learning_rate": 4.9671641791044774e-05, "loss": 0.0362, "step": 2816 }, { "epoch": 2.63, "learning_rate": 4.9671175373134335e-05, "loss": 0.055, "step": 2820 }, { "epoch": 2.63, "learning_rate": 4.967070895522388e-05, "loss": 0.0448, "step": 2824 }, { "epoch": 2.64, "learning_rate": 4.967024253731343e-05, "loss": 0.0337, "step": 2828 }, { "epoch": 2.64, "learning_rate": 4.9669776119402986e-05, "loss": 0.048, "step": 2832 }, { "epoch": 2.65, "learning_rate": 4.966930970149254e-05, "loss": 0.0534, "step": 2836 }, { "epoch": 2.65, "learning_rate": 4.966884328358209e-05, "loss": 0.0296, "step": 2840 }, { "epoch": 2.65, "learning_rate": 4.9668376865671644e-05, "loss": 0.0418, "step": 2844 }, { "epoch": 2.66, "learning_rate": 4.966791044776119e-05, "loss": 0.0556, "step": 2848 }, { "epoch": 2.66, "learning_rate": 4.966744402985075e-05, "loss": 0.0262, "step": 2852 }, { "epoch": 2.66, "learning_rate": 4.96669776119403e-05, "loss": 0.0494, "step": 2856 }, { "epoch": 2.67, "learning_rate": 4.966651119402985e-05, "loss": 0.0361, "step": 2860 }, { "epoch": 2.67, "learning_rate": 4.9666044776119405e-05, "loss": 0.0495, "step": 2864 }, { "epoch": 2.68, "learning_rate": 4.966557835820896e-05, "loss": 0.0491, "step": 2868 }, { "epoch": 2.68, "learning_rate": 4.966511194029851e-05, "loss": 0.023, "step": 2872 }, { "epoch": 2.68, "learning_rate": 4.966464552238806e-05, "loss": 0.0381, "step": 2876 }, { "epoch": 2.69, "learning_rate": 4.966417910447762e-05, "loss": 0.0618, "step": 2880 }, { "epoch": 2.69, "learning_rate": 4.9663712686567166e-05, "loss": 0.0352, "step": 2884 }, { "epoch": 2.69, "learning_rate": 4.966324626865672e-05, "loss": 0.0534, "step": 2888 }, { "epoch": 2.7, "learning_rate": 4.966277985074627e-05, "loss": 0.1242, "step": 2892 }, { "epoch": 2.7, "learning_rate": 4.9662313432835824e-05, "loss": 0.0236, "step": 2896 }, { "epoch": 2.71, "learning_rate": 4.966184701492538e-05, "loss": 0.0396, "step": 2900 }, { "epoch": 2.71, "learning_rate": 4.966138059701493e-05, "loss": 0.0516, "step": 2904 }, { "epoch": 2.71, "learning_rate": 4.9660914179104475e-05, "loss": 0.0378, "step": 2908 }, { "epoch": 2.72, "learning_rate": 4.966044776119404e-05, "loss": 0.049, "step": 2912 }, { "epoch": 2.72, "learning_rate": 4.9659981343283585e-05, "loss": 0.0445, "step": 2916 }, { "epoch": 2.72, "learning_rate": 4.9659514925373133e-05, "loss": 0.0205, "step": 2920 }, { "epoch": 2.73, "learning_rate": 4.965904850746269e-05, "loss": 0.046, "step": 2924 }, { "epoch": 2.73, "learning_rate": 4.965858208955224e-05, "loss": 0.0493, "step": 2928 }, { "epoch": 2.73, "learning_rate": 4.965811567164179e-05, "loss": 0.054, "step": 2932 }, { "epoch": 2.74, "learning_rate": 4.9657649253731346e-05, "loss": 0.0409, "step": 2936 }, { "epoch": 2.74, "learning_rate": 4.96571828358209e-05, "loss": 0.0396, "step": 2940 }, { "epoch": 2.75, "learning_rate": 4.965671641791045e-05, "loss": 0.0362, "step": 2944 }, { "epoch": 2.75, "learning_rate": 4.9656250000000004e-05, "loss": 0.0487, "step": 2948 }, { "epoch": 2.75, "learning_rate": 4.965578358208955e-05, "loss": 0.0336, "step": 2952 }, { "epoch": 2.76, "learning_rate": 4.965531716417911e-05, "loss": 0.041, "step": 2956 }, { "epoch": 2.76, "learning_rate": 4.965485074626866e-05, "loss": 0.0395, "step": 2960 }, { "epoch": 2.76, "learning_rate": 4.965438432835821e-05, "loss": 0.032, "step": 2964 }, { "epoch": 2.77, "learning_rate": 4.965391791044776e-05, "loss": 0.0231, "step": 2968 }, { "epoch": 2.77, "learning_rate": 4.965345149253732e-05, "loss": 0.0594, "step": 2972 }, { "epoch": 2.78, "learning_rate": 4.965298507462687e-05, "loss": 0.0319, "step": 2976 }, { "epoch": 2.78, "learning_rate": 4.9652518656716416e-05, "loss": 0.031, "step": 2980 }, { "epoch": 2.78, "learning_rate": 4.965205223880597e-05, "loss": 0.0608, "step": 2984 }, { "epoch": 2.79, "learning_rate": 4.9651585820895526e-05, "loss": 0.0273, "step": 2988 }, { "epoch": 2.79, "learning_rate": 4.9651119402985074e-05, "loss": 0.0461, "step": 2992 }, { "epoch": 2.79, "learning_rate": 4.965065298507463e-05, "loss": 0.0546, "step": 2996 }, { "epoch": 2.8, "learning_rate": 4.9650186567164184e-05, "loss": 0.0209, "step": 3000 }, { "epoch": 2.8, "eval_exact_match": 0.6721470019342359, "eval_exec": 0.7224371373307543, "eval_loss": 0.16579200327396393, "eval_runtime": 1120.3474, "eval_samples_per_second": 0.923, "step": 3000 }, { "epoch": 2.8, "learning_rate": 4.964972014925373e-05, "loss": 0.0316, "step": 3004 }, { "epoch": 2.81, "learning_rate": 4.964925373134329e-05, "loss": 0.0333, "step": 3008 }, { "epoch": 2.81, "learning_rate": 4.9648787313432835e-05, "loss": 0.0722, "step": 3012 }, { "epoch": 2.81, "learning_rate": 4.964832089552239e-05, "loss": 0.0709, "step": 3016 }, { "epoch": 2.82, "learning_rate": 4.9647854477611945e-05, "loss": 0.0526, "step": 3020 }, { "epoch": 2.82, "learning_rate": 4.964738805970149e-05, "loss": 0.0346, "step": 3024 }, { "epoch": 2.82, "learning_rate": 4.964692164179105e-05, "loss": 0.0917, "step": 3028 }, { "epoch": 2.83, "learning_rate": 4.96464552238806e-05, "loss": 0.0397, "step": 3032 }, { "epoch": 2.83, "learning_rate": 4.964598880597015e-05, "loss": 0.0284, "step": 3036 }, { "epoch": 2.84, "learning_rate": 4.9645522388059706e-05, "loss": 0.0392, "step": 3040 }, { "epoch": 2.84, "learning_rate": 4.9645055970149254e-05, "loss": 0.0406, "step": 3044 }, { "epoch": 2.84, "learning_rate": 4.964458955223881e-05, "loss": 0.0423, "step": 3048 }, { "epoch": 2.85, "learning_rate": 4.9644123134328364e-05, "loss": 0.0459, "step": 3052 }, { "epoch": 2.85, "learning_rate": 4.964365671641791e-05, "loss": 0.0497, "step": 3056 }, { "epoch": 2.85, "learning_rate": 4.964319029850747e-05, "loss": 0.0248, "step": 3060 }, { "epoch": 2.86, "learning_rate": 4.964272388059702e-05, "loss": 0.0581, "step": 3064 }, { "epoch": 2.86, "learning_rate": 4.964225746268657e-05, "loss": 0.0254, "step": 3068 }, { "epoch": 2.87, "learning_rate": 4.964179104477612e-05, "loss": 0.029, "step": 3072 }, { "epoch": 2.87, "learning_rate": 4.964132462686567e-05, "loss": 0.0354, "step": 3076 }, { "epoch": 2.87, "learning_rate": 4.964085820895523e-05, "loss": 0.0398, "step": 3080 }, { "epoch": 2.88, "learning_rate": 4.9640391791044776e-05, "loss": 0.0325, "step": 3084 }, { "epoch": 2.88, "learning_rate": 4.963992537313433e-05, "loss": 0.0627, "step": 3088 }, { "epoch": 2.88, "learning_rate": 4.9639458955223886e-05, "loss": 0.0465, "step": 3092 }, { "epoch": 2.89, "learning_rate": 4.9638992537313434e-05, "loss": 0.0338, "step": 3096 }, { "epoch": 2.89, "learning_rate": 4.963852611940299e-05, "loss": 0.0313, "step": 3100 }, { "epoch": 2.9, "learning_rate": 4.963805970149254e-05, "loss": 0.0247, "step": 3104 }, { "epoch": 2.9, "learning_rate": 4.963759328358209e-05, "loss": 0.029, "step": 3108 }, { "epoch": 2.9, "learning_rate": 4.963712686567165e-05, "loss": 0.0319, "step": 3112 }, { "epoch": 2.91, "learning_rate": 4.9636660447761195e-05, "loss": 0.0358, "step": 3116 }, { "epoch": 2.91, "learning_rate": 4.963619402985074e-05, "loss": 0.0499, "step": 3120 }, { "epoch": 2.91, "learning_rate": 4.9635727611940305e-05, "loss": 0.0388, "step": 3124 }, { "epoch": 2.92, "learning_rate": 4.963526119402985e-05, "loss": 0.0304, "step": 3128 }, { "epoch": 2.92, "learning_rate": 4.96347947761194e-05, "loss": 0.02, "step": 3132 }, { "epoch": 2.93, "learning_rate": 4.9634328358208956e-05, "loss": 0.0344, "step": 3136 }, { "epoch": 2.93, "learning_rate": 4.963386194029851e-05, "loss": 0.0321, "step": 3140 }, { "epoch": 2.93, "learning_rate": 4.963339552238806e-05, "loss": 0.0496, "step": 3144 }, { "epoch": 2.94, "learning_rate": 4.9632929104477614e-05, "loss": 0.0624, "step": 3148 }, { "epoch": 2.94, "learning_rate": 4.963246268656717e-05, "loss": 0.039, "step": 3152 }, { "epoch": 2.94, "learning_rate": 4.963199626865672e-05, "loss": 0.0444, "step": 3156 }, { "epoch": 2.95, "learning_rate": 4.963152985074627e-05, "loss": 0.0342, "step": 3160 }, { "epoch": 2.95, "learning_rate": 4.963106343283582e-05, "loss": 0.0329, "step": 3164 }, { "epoch": 2.96, "learning_rate": 4.9630597014925375e-05, "loss": 0.0226, "step": 3168 }, { "epoch": 2.96, "learning_rate": 4.963013059701493e-05, "loss": 0.0301, "step": 3172 }, { "epoch": 2.96, "learning_rate": 4.962966417910448e-05, "loss": 0.019, "step": 3176 }, { "epoch": 2.97, "learning_rate": 4.9629197761194026e-05, "loss": 0.0331, "step": 3180 }, { "epoch": 2.97, "learning_rate": 4.962873134328359e-05, "loss": 0.0497, "step": 3184 }, { "epoch": 2.97, "learning_rate": 4.9628264925373136e-05, "loss": 0.0405, "step": 3188 }, { "epoch": 2.98, "learning_rate": 4.962779850746269e-05, "loss": 0.0312, "step": 3192 }, { "epoch": 2.98, "learning_rate": 4.962733208955224e-05, "loss": 0.0289, "step": 3196 }, { "epoch": 2.98, "learning_rate": 4.9626865671641794e-05, "loss": 0.0514, "step": 3200 }, { "epoch": 2.99, "learning_rate": 4.962639925373135e-05, "loss": 0.0698, "step": 3204 }, { "epoch": 2.99, "learning_rate": 4.96259328358209e-05, "loss": 0.0424, "step": 3208 }, { "epoch": 3.0, "learning_rate": 4.962546641791045e-05, "loss": 0.0357, "step": 3212 }, { "epoch": 3.0, "learning_rate": 4.962500000000001e-05, "loss": 0.0193, "step": 3216 }, { "epoch": 3.0, "learning_rate": 4.9624533582089555e-05, "loss": 0.0471, "step": 3220 }, { "epoch": 3.01, "learning_rate": 4.96240671641791e-05, "loss": 0.0428, "step": 3224 }, { "epoch": 3.01, "learning_rate": 4.9623600746268665e-05, "loss": 0.0268, "step": 3228 }, { "epoch": 3.01, "learning_rate": 4.962313432835821e-05, "loss": 0.0359, "step": 3232 }, { "epoch": 3.02, "learning_rate": 4.962266791044776e-05, "loss": 0.0468, "step": 3236 }, { "epoch": 3.02, "learning_rate": 4.9622201492537316e-05, "loss": 0.0176, "step": 3240 }, { "epoch": 3.03, "learning_rate": 4.962173507462687e-05, "loss": 0.0279, "step": 3244 }, { "epoch": 3.03, "learning_rate": 4.962126865671642e-05, "loss": 0.0193, "step": 3248 }, { "epoch": 3.03, "learning_rate": 4.9620802238805974e-05, "loss": 0.0468, "step": 3252 }, { "epoch": 3.04, "learning_rate": 4.962033582089552e-05, "loss": 0.0284, "step": 3256 }, { "epoch": 3.04, "learning_rate": 4.961986940298508e-05, "loss": 0.0166, "step": 3260 }, { "epoch": 3.04, "learning_rate": 4.961940298507463e-05, "loss": 0.0197, "step": 3264 }, { "epoch": 3.05, "learning_rate": 4.961893656716418e-05, "loss": 0.0305, "step": 3268 }, { "epoch": 3.05, "learning_rate": 4.9618470149253735e-05, "loss": 0.026, "step": 3272 }, { "epoch": 3.06, "learning_rate": 4.961800373134329e-05, "loss": 0.0427, "step": 3276 }, { "epoch": 3.06, "learning_rate": 4.961753731343284e-05, "loss": 0.0206, "step": 3280 }, { "epoch": 3.06, "learning_rate": 4.9617070895522386e-05, "loss": 0.017, "step": 3284 }, { "epoch": 3.07, "learning_rate": 4.961660447761194e-05, "loss": 0.0299, "step": 3288 }, { "epoch": 3.07, "learning_rate": 4.9616138059701496e-05, "loss": 0.0169, "step": 3292 }, { "epoch": 3.07, "learning_rate": 4.9615671641791044e-05, "loss": 0.0345, "step": 3296 }, { "epoch": 3.08, "learning_rate": 4.96152052238806e-05, "loss": 0.0367, "step": 3300 }, { "epoch": 3.08, "learning_rate": 4.9614738805970154e-05, "loss": 0.0288, "step": 3304 }, { "epoch": 3.09, "learning_rate": 4.96142723880597e-05, "loss": 0.0272, "step": 3308 }, { "epoch": 3.09, "learning_rate": 4.961380597014926e-05, "loss": 0.0117, "step": 3312 }, { "epoch": 3.09, "learning_rate": 4.9613339552238805e-05, "loss": 0.0219, "step": 3316 }, { "epoch": 3.1, "learning_rate": 4.961287313432836e-05, "loss": 0.0071, "step": 3320 }, { "epoch": 3.1, "learning_rate": 4.9612406716417915e-05, "loss": 0.0183, "step": 3324 }, { "epoch": 3.1, "learning_rate": 4.961194029850746e-05, "loss": 0.0314, "step": 3328 }, { "epoch": 3.11, "learning_rate": 4.961147388059702e-05, "loss": 0.0123, "step": 3332 }, { "epoch": 3.11, "learning_rate": 4.961100746268657e-05, "loss": 0.0308, "step": 3336 }, { "epoch": 3.12, "learning_rate": 4.961054104477612e-05, "loss": 0.0281, "step": 3340 }, { "epoch": 3.12, "learning_rate": 4.961007462686567e-05, "loss": 0.0215, "step": 3344 }, { "epoch": 3.12, "learning_rate": 4.9609608208955224e-05, "loss": 0.017, "step": 3348 }, { "epoch": 3.13, "learning_rate": 4.960914179104478e-05, "loss": 0.0152, "step": 3352 }, { "epoch": 3.13, "learning_rate": 4.9608675373134334e-05, "loss": 0.044, "step": 3356 }, { "epoch": 3.13, "learning_rate": 4.960820895522388e-05, "loss": 0.0241, "step": 3360 }, { "epoch": 3.14, "learning_rate": 4.960774253731344e-05, "loss": 0.0159, "step": 3364 }, { "epoch": 3.14, "learning_rate": 4.960727611940299e-05, "loss": 0.0282, "step": 3368 }, { "epoch": 3.15, "learning_rate": 4.960680970149254e-05, "loss": 0.0336, "step": 3372 }, { "epoch": 3.15, "learning_rate": 4.960634328358209e-05, "loss": 0.038, "step": 3376 }, { "epoch": 3.15, "learning_rate": 4.960587686567165e-05, "loss": 0.0369, "step": 3380 }, { "epoch": 3.16, "learning_rate": 4.96054104477612e-05, "loss": 0.0246, "step": 3384 }, { "epoch": 3.16, "learning_rate": 4.9604944029850746e-05, "loss": 0.0163, "step": 3388 }, { "epoch": 3.16, "learning_rate": 4.96044776119403e-05, "loss": 0.0241, "step": 3392 }, { "epoch": 3.17, "learning_rate": 4.9604011194029856e-05, "loss": 0.0232, "step": 3396 }, { "epoch": 3.17, "learning_rate": 4.9603544776119404e-05, "loss": 0.0355, "step": 3400 }, { "epoch": 3.18, "learning_rate": 4.960307835820896e-05, "loss": 0.0187, "step": 3404 }, { "epoch": 3.18, "learning_rate": 4.960261194029851e-05, "loss": 0.0208, "step": 3408 }, { "epoch": 3.18, "learning_rate": 4.960214552238806e-05, "loss": 0.0233, "step": 3412 }, { "epoch": 3.19, "learning_rate": 4.960167910447762e-05, "loss": 0.0418, "step": 3416 }, { "epoch": 3.19, "learning_rate": 4.9601212686567165e-05, "loss": 0.0465, "step": 3420 }, { "epoch": 3.19, "learning_rate": 4.960074626865672e-05, "loss": 0.028, "step": 3424 }, { "epoch": 3.2, "learning_rate": 4.9600279850746275e-05, "loss": 0.0257, "step": 3428 }, { "epoch": 3.2, "learning_rate": 4.959981343283582e-05, "loss": 0.0357, "step": 3432 }, { "epoch": 3.21, "learning_rate": 4.959934701492537e-05, "loss": 0.017, "step": 3436 }, { "epoch": 3.21, "learning_rate": 4.959888059701493e-05, "loss": 0.0296, "step": 3440 }, { "epoch": 3.21, "learning_rate": 4.959841417910448e-05, "loss": 0.0336, "step": 3444 }, { "epoch": 3.22, "learning_rate": 4.959794776119403e-05, "loss": 0.0194, "step": 3448 }, { "epoch": 3.22, "learning_rate": 4.9597481343283584e-05, "loss": 0.0231, "step": 3452 }, { "epoch": 3.22, "learning_rate": 4.959701492537314e-05, "loss": 0.0186, "step": 3456 }, { "epoch": 3.23, "learning_rate": 4.959654850746269e-05, "loss": 0.0477, "step": 3460 }, { "epoch": 3.23, "learning_rate": 4.959608208955224e-05, "loss": 0.041, "step": 3464 }, { "epoch": 3.24, "learning_rate": 4.959561567164179e-05, "loss": 0.0304, "step": 3468 }, { "epoch": 3.24, "learning_rate": 4.9595149253731345e-05, "loss": 0.0318, "step": 3472 }, { "epoch": 3.24, "learning_rate": 4.95946828358209e-05, "loss": 0.0789, "step": 3476 }, { "epoch": 3.25, "learning_rate": 4.959421641791045e-05, "loss": 0.0251, "step": 3480 }, { "epoch": 3.25, "learning_rate": 4.959375e-05, "loss": 0.0254, "step": 3484 }, { "epoch": 3.25, "learning_rate": 4.959328358208956e-05, "loss": 0.0178, "step": 3488 }, { "epoch": 3.26, "learning_rate": 4.9592817164179106e-05, "loss": 0.03, "step": 3492 }, { "epoch": 3.26, "learning_rate": 4.9592350746268654e-05, "loss": 0.0202, "step": 3496 }, { "epoch": 3.26, "learning_rate": 4.9591884328358216e-05, "loss": 0.0243, "step": 3500 }, { "epoch": 3.26, "eval_exact_match": 0.7108317214700194, "eval_exec": 0.7543520309477756, "eval_loss": 0.17954690754413605, "eval_runtime": 1070.1188, "eval_samples_per_second": 0.966, "step": 3500 }, { "epoch": 3.27, "learning_rate": 4.9591417910447764e-05, "loss": 0.0259, "step": 3504 }, { "epoch": 3.27, "learning_rate": 4.959095149253731e-05, "loss": 0.0117, "step": 3508 }, { "epoch": 3.28, "learning_rate": 4.959048507462687e-05, "loss": 0.0275, "step": 3512 }, { "epoch": 3.28, "learning_rate": 4.959001865671642e-05, "loss": 0.0219, "step": 3516 }, { "epoch": 3.28, "learning_rate": 4.9589552238805977e-05, "loss": 0.0259, "step": 3520 }, { "epoch": 3.29, "learning_rate": 4.9589085820895525e-05, "loss": 0.0362, "step": 3524 }, { "epoch": 3.29, "learning_rate": 4.958861940298507e-05, "loss": 0.0362, "step": 3528 }, { "epoch": 3.29, "learning_rate": 4.9588152985074634e-05, "loss": 0.0363, "step": 3532 }, { "epoch": 3.3, "learning_rate": 4.958768656716418e-05, "loss": 0.0603, "step": 3536 }, { "epoch": 3.3, "learning_rate": 4.958722014925373e-05, "loss": 0.0303, "step": 3540 }, { "epoch": 3.31, "learning_rate": 4.9586753731343286e-05, "loss": 0.0152, "step": 3544 }, { "epoch": 3.31, "learning_rate": 4.958628731343284e-05, "loss": 0.0177, "step": 3548 }, { "epoch": 3.31, "learning_rate": 4.958582089552239e-05, "loss": 0.03, "step": 3552 }, { "epoch": 3.32, "learning_rate": 4.9585354477611944e-05, "loss": 0.0267, "step": 3556 }, { "epoch": 3.32, "learning_rate": 4.95848880597015e-05, "loss": 0.016, "step": 3560 }, { "epoch": 3.32, "learning_rate": 4.958442164179105e-05, "loss": 0.0345, "step": 3564 }, { "epoch": 3.33, "learning_rate": 4.95839552238806e-05, "loss": 0.0247, "step": 3568 }, { "epoch": 3.33, "learning_rate": 4.958348880597015e-05, "loss": 0.0342, "step": 3572 }, { "epoch": 3.34, "learning_rate": 4.9583022388059705e-05, "loss": 0.0234, "step": 3576 }, { "epoch": 3.34, "learning_rate": 4.958255597014926e-05, "loss": 0.034, "step": 3580 }, { "epoch": 3.34, "learning_rate": 4.958208955223881e-05, "loss": 0.0257, "step": 3584 }, { "epoch": 3.35, "learning_rate": 4.9581623134328356e-05, "loss": 0.0194, "step": 3588 }, { "epoch": 3.35, "learning_rate": 4.958115671641792e-05, "loss": 0.0289, "step": 3592 }, { "epoch": 3.35, "learning_rate": 4.9580690298507466e-05, "loss": 0.0336, "step": 3596 }, { "epoch": 3.36, "learning_rate": 4.9580223880597014e-05, "loss": 0.0295, "step": 3600 }, { "epoch": 3.36, "learning_rate": 4.957975746268657e-05, "loss": 0.0334, "step": 3604 }, { "epoch": 3.37, "learning_rate": 4.9579291044776124e-05, "loss": 0.0198, "step": 3608 }, { "epoch": 3.37, "learning_rate": 4.957882462686567e-05, "loss": 0.0195, "step": 3612 }, { "epoch": 3.37, "learning_rate": 4.9578358208955227e-05, "loss": 0.0281, "step": 3616 }, { "epoch": 3.38, "learning_rate": 4.957789179104478e-05, "loss": 0.0192, "step": 3620 }, { "epoch": 3.38, "learning_rate": 4.957742537313433e-05, "loss": 0.0361, "step": 3624 }, { "epoch": 3.38, "learning_rate": 4.9576958955223884e-05, "loss": 0.0094, "step": 3628 }, { "epoch": 3.39, "learning_rate": 4.957649253731343e-05, "loss": 0.0212, "step": 3632 }, { "epoch": 3.39, "learning_rate": 4.957602611940299e-05, "loss": 0.0237, "step": 3636 }, { "epoch": 3.4, "learning_rate": 4.957555970149254e-05, "loss": 0.0295, "step": 3640 }, { "epoch": 3.4, "learning_rate": 4.957509328358209e-05, "loss": 0.0373, "step": 3644 }, { "epoch": 3.4, "learning_rate": 4.957462686567164e-05, "loss": 0.0255, "step": 3648 }, { "epoch": 3.41, "learning_rate": 4.95741604477612e-05, "loss": 0.0313, "step": 3652 }, { "epoch": 3.41, "learning_rate": 4.957369402985075e-05, "loss": 0.0548, "step": 3656 }, { "epoch": 3.41, "learning_rate": 4.95732276119403e-05, "loss": 0.0417, "step": 3660 }, { "epoch": 3.42, "learning_rate": 4.957276119402985e-05, "loss": 0.0232, "step": 3664 }, { "epoch": 3.42, "learning_rate": 4.9572294776119406e-05, "loss": 0.0203, "step": 3668 }, { "epoch": 3.43, "learning_rate": 4.9571828358208955e-05, "loss": 0.0422, "step": 3672 }, { "epoch": 3.43, "learning_rate": 4.957136194029851e-05, "loss": 0.0225, "step": 3676 }, { "epoch": 3.43, "learning_rate": 4.9570895522388064e-05, "loss": 0.0162, "step": 3680 }, { "epoch": 3.44, "learning_rate": 4.957042910447762e-05, "loss": 0.0359, "step": 3684 }, { "epoch": 3.44, "learning_rate": 4.956996268656717e-05, "loss": 0.0186, "step": 3688 }, { "epoch": 3.44, "learning_rate": 4.9569496268656716e-05, "loss": 0.0165, "step": 3692 }, { "epoch": 3.45, "learning_rate": 4.956902985074627e-05, "loss": 0.0148, "step": 3696 }, { "epoch": 3.45, "learning_rate": 4.9568563432835825e-05, "loss": 0.026, "step": 3700 }, { "epoch": 3.46, "learning_rate": 4.9568097014925374e-05, "loss": 0.0157, "step": 3704 }, { "epoch": 3.46, "learning_rate": 4.956763059701493e-05, "loss": 0.0465, "step": 3708 }, { "epoch": 3.46, "learning_rate": 4.956716417910448e-05, "loss": 0.0174, "step": 3712 }, { "epoch": 3.47, "learning_rate": 4.956669776119403e-05, "loss": 0.0233, "step": 3716 }, { "epoch": 3.47, "learning_rate": 4.9566231343283586e-05, "loss": 0.0347, "step": 3720 }, { "epoch": 3.47, "learning_rate": 4.9565764925373135e-05, "loss": 0.0201, "step": 3724 }, { "epoch": 3.48, "learning_rate": 4.956529850746269e-05, "loss": 0.0182, "step": 3728 }, { "epoch": 3.48, "learning_rate": 4.9564832089552244e-05, "loss": 0.0306, "step": 3732 }, { "epoch": 3.49, "learning_rate": 4.956436567164179e-05, "loss": 0.0205, "step": 3736 }, { "epoch": 3.49, "learning_rate": 4.956389925373135e-05, "loss": 0.0255, "step": 3740 }, { "epoch": 3.49, "learning_rate": 4.95634328358209e-05, "loss": 0.0309, "step": 3744 }, { "epoch": 3.5, "learning_rate": 4.956296641791045e-05, "loss": 0.018, "step": 3748 }, { "epoch": 3.5, "learning_rate": 4.95625e-05, "loss": 0.0175, "step": 3752 }, { "epoch": 3.5, "learning_rate": 4.9562033582089553e-05, "loss": 0.0334, "step": 3756 }, { "epoch": 3.51, "learning_rate": 4.956156716417911e-05, "loss": 0.0213, "step": 3760 }, { "epoch": 3.51, "learning_rate": 4.9561100746268656e-05, "loss": 0.0153, "step": 3764 }, { "epoch": 3.51, "learning_rate": 4.956063432835821e-05, "loss": 0.0223, "step": 3768 }, { "epoch": 3.52, "learning_rate": 4.9560167910447766e-05, "loss": 0.0178, "step": 3772 }, { "epoch": 3.52, "learning_rate": 4.9559701492537314e-05, "loss": 0.0278, "step": 3776 }, { "epoch": 3.53, "learning_rate": 4.955923507462687e-05, "loss": 0.0264, "step": 3780 }, { "epoch": 3.53, "learning_rate": 4.955876865671642e-05, "loss": 0.0204, "step": 3784 }, { "epoch": 3.53, "learning_rate": 4.955830223880597e-05, "loss": 0.0252, "step": 3788 }, { "epoch": 3.54, "learning_rate": 4.955783582089553e-05, "loss": 0.044, "step": 3792 }, { "epoch": 3.54, "learning_rate": 4.9557369402985075e-05, "loss": 0.0266, "step": 3796 }, { "epoch": 3.54, "learning_rate": 4.9556902985074624e-05, "loss": 0.0191, "step": 3800 }, { "epoch": 3.55, "learning_rate": 4.9556436567164185e-05, "loss": 0.0422, "step": 3804 }, { "epoch": 3.55, "learning_rate": 4.955597014925373e-05, "loss": 0.0284, "step": 3808 }, { "epoch": 3.56, "learning_rate": 4.955550373134328e-05, "loss": 0.0344, "step": 3812 }, { "epoch": 3.56, "learning_rate": 4.9555037313432836e-05, "loss": 0.0295, "step": 3816 }, { "epoch": 3.56, "learning_rate": 4.955457089552239e-05, "loss": 0.0331, "step": 3820 }, { "epoch": 3.57, "learning_rate": 4.955410447761194e-05, "loss": 0.0436, "step": 3824 }, { "epoch": 3.57, "learning_rate": 4.9553638059701494e-05, "loss": 0.0292, "step": 3828 }, { "epoch": 3.57, "learning_rate": 4.955317164179105e-05, "loss": 0.0356, "step": 3832 }, { "epoch": 3.58, "learning_rate": 4.95527052238806e-05, "loss": 0.0315, "step": 3836 }, { "epoch": 3.58, "learning_rate": 4.955223880597015e-05, "loss": 0.0295, "step": 3840 }, { "epoch": 3.59, "learning_rate": 4.95517723880597e-05, "loss": 0.0413, "step": 3844 }, { "epoch": 3.59, "learning_rate": 4.955130597014926e-05, "loss": 0.0287, "step": 3848 }, { "epoch": 3.59, "learning_rate": 4.955083955223881e-05, "loss": 0.0223, "step": 3852 }, { "epoch": 3.6, "learning_rate": 4.955037313432836e-05, "loss": 0.0355, "step": 3856 }, { "epoch": 3.6, "learning_rate": 4.954990671641791e-05, "loss": 0.0244, "step": 3860 }, { "epoch": 3.6, "learning_rate": 4.954944029850747e-05, "loss": 0.0149, "step": 3864 }, { "epoch": 3.61, "learning_rate": 4.9548973880597016e-05, "loss": 0.0225, "step": 3868 }, { "epoch": 3.61, "learning_rate": 4.954850746268657e-05, "loss": 0.0261, "step": 3872 }, { "epoch": 3.62, "learning_rate": 4.954804104477612e-05, "loss": 0.0264, "step": 3876 }, { "epoch": 3.62, "learning_rate": 4.9547574626865674e-05, "loss": 0.0282, "step": 3880 }, { "epoch": 3.62, "learning_rate": 4.954710820895523e-05, "loss": 0.0511, "step": 3884 }, { "epoch": 3.63, "learning_rate": 4.954664179104478e-05, "loss": 0.0487, "step": 3888 }, { "epoch": 3.63, "learning_rate": 4.954617537313433e-05, "loss": 0.0169, "step": 3892 }, { "epoch": 3.63, "learning_rate": 4.954570895522389e-05, "loss": 0.0243, "step": 3896 }, { "epoch": 3.64, "learning_rate": 4.9545242537313435e-05, "loss": 0.0247, "step": 3900 }, { "epoch": 3.64, "learning_rate": 4.954477611940298e-05, "loss": 0.0258, "step": 3904 }, { "epoch": 3.65, "learning_rate": 4.9544309701492545e-05, "loss": 0.0328, "step": 3908 }, { "epoch": 3.65, "learning_rate": 4.954384328358209e-05, "loss": 0.0328, "step": 3912 }, { "epoch": 3.65, "learning_rate": 4.954337686567164e-05, "loss": 0.0265, "step": 3916 }, { "epoch": 3.66, "learning_rate": 4.9542910447761196e-05, "loss": 0.0349, "step": 3920 }, { "epoch": 3.66, "learning_rate": 4.954244402985075e-05, "loss": 0.0307, "step": 3924 }, { "epoch": 3.66, "learning_rate": 4.95419776119403e-05, "loss": 0.0256, "step": 3928 }, { "epoch": 3.67, "learning_rate": 4.9541511194029854e-05, "loss": 0.0246, "step": 3932 }, { "epoch": 3.67, "learning_rate": 4.95410447761194e-05, "loss": 0.0212, "step": 3936 }, { "epoch": 3.68, "learning_rate": 4.954057835820896e-05, "loss": 0.0548, "step": 3940 }, { "epoch": 3.68, "learning_rate": 4.954011194029851e-05, "loss": 0.05, "step": 3944 }, { "epoch": 3.68, "learning_rate": 4.953964552238806e-05, "loss": 0.036, "step": 3948 }, { "epoch": 3.69, "learning_rate": 4.9539179104477615e-05, "loss": 0.0122, "step": 3952 }, { "epoch": 3.69, "learning_rate": 4.953871268656717e-05, "loss": 0.0352, "step": 3956 }, { "epoch": 3.69, "learning_rate": 4.953824626865672e-05, "loss": 0.0316, "step": 3960 }, { "epoch": 3.7, "learning_rate": 4.9537779850746266e-05, "loss": 0.0243, "step": 3964 }, { "epoch": 3.7, "learning_rate": 4.953731343283582e-05, "loss": 0.0224, "step": 3968 }, { "epoch": 3.71, "learning_rate": 4.9536847014925376e-05, "loss": 0.0249, "step": 3972 }, { "epoch": 3.71, "learning_rate": 4.9536380597014924e-05, "loss": 0.0368, "step": 3976 }, { "epoch": 3.71, "learning_rate": 4.953591417910448e-05, "loss": 0.0412, "step": 3980 }, { "epoch": 3.72, "learning_rate": 4.9535447761194034e-05, "loss": 0.0248, "step": 3984 }, { "epoch": 3.72, "learning_rate": 4.953498134328358e-05, "loss": 0.0399, "step": 3988 }, { "epoch": 3.72, "learning_rate": 4.953451492537314e-05, "loss": 0.0156, "step": 3992 }, { "epoch": 3.73, "learning_rate": 4.9534048507462685e-05, "loss": 0.0109, "step": 3996 }, { "epoch": 3.73, "learning_rate": 4.953358208955224e-05, "loss": 0.0412, "step": 4000 }, { "epoch": 3.73, "eval_exact_match": 0.7030947775628626, "eval_exec": 0.7369439071566731, "eval_loss": 0.189029723405838, "eval_runtime": 1025.0796, "eval_samples_per_second": 1.009, "step": 4000 }, { "epoch": 3.73, "learning_rate": 4.9533115671641795e-05, "loss": 0.0135, "step": 4004 }, { "epoch": 3.74, "learning_rate": 4.953264925373134e-05, "loss": 0.0238, "step": 4008 }, { "epoch": 3.74, "learning_rate": 4.95321828358209e-05, "loss": 0.0143, "step": 4012 }, { "epoch": 3.75, "learning_rate": 4.953171641791045e-05, "loss": 0.0691, "step": 4016 }, { "epoch": 3.75, "learning_rate": 4.953125e-05, "loss": 0.0458, "step": 4020 }, { "epoch": 3.75, "learning_rate": 4.9530783582089556e-05, "loss": 0.0638, "step": 4024 }, { "epoch": 3.76, "learning_rate": 4.9530317164179104e-05, "loss": 0.022, "step": 4028 }, { "epoch": 3.76, "learning_rate": 4.952985074626866e-05, "loss": 0.0205, "step": 4032 }, { "epoch": 3.76, "learning_rate": 4.9529384328358214e-05, "loss": 0.0268, "step": 4036 }, { "epoch": 3.77, "learning_rate": 4.952891791044776e-05, "loss": 0.0292, "step": 4040 }, { "epoch": 3.77, "learning_rate": 4.952845149253732e-05, "loss": 0.0362, "step": 4044 }, { "epoch": 3.78, "learning_rate": 4.952798507462687e-05, "loss": 0.0272, "step": 4048 }, { "epoch": 3.78, "learning_rate": 4.952751865671642e-05, "loss": 0.0357, "step": 4052 }, { "epoch": 3.78, "learning_rate": 4.952705223880597e-05, "loss": 0.0465, "step": 4056 }, { "epoch": 3.79, "learning_rate": 4.952658582089553e-05, "loss": 0.0329, "step": 4060 }, { "epoch": 3.79, "learning_rate": 4.952611940298508e-05, "loss": 0.0191, "step": 4064 }, { "epoch": 3.79, "learning_rate": 4.9525652985074626e-05, "loss": 0.0204, "step": 4068 }, { "epoch": 3.8, "learning_rate": 4.952518656716418e-05, "loss": 0.0362, "step": 4072 }, { "epoch": 3.8, "learning_rate": 4.9524720149253736e-05, "loss": 0.0175, "step": 4076 }, { "epoch": 3.81, "learning_rate": 4.9524253731343284e-05, "loss": 0.0609, "step": 4080 }, { "epoch": 3.81, "learning_rate": 4.952378731343284e-05, "loss": 0.0194, "step": 4084 }, { "epoch": 3.81, "learning_rate": 4.952332089552239e-05, "loss": 0.0331, "step": 4088 }, { "epoch": 3.82, "learning_rate": 4.952285447761194e-05, "loss": 0.0464, "step": 4092 }, { "epoch": 3.82, "learning_rate": 4.95223880597015e-05, "loss": 0.0154, "step": 4096 }, { "epoch": 3.82, "learning_rate": 4.9521921641791045e-05, "loss": 0.0402, "step": 4100 }, { "epoch": 3.83, "learning_rate": 4.95214552238806e-05, "loss": 0.0201, "step": 4104 }, { "epoch": 3.83, "learning_rate": 4.9520988805970155e-05, "loss": 0.0329, "step": 4108 }, { "epoch": 3.84, "learning_rate": 4.95205223880597e-05, "loss": 0.0357, "step": 4112 }, { "epoch": 3.84, "learning_rate": 4.952005597014925e-05, "loss": 0.0322, "step": 4116 }, { "epoch": 3.84, "learning_rate": 4.951958955223881e-05, "loss": 0.0286, "step": 4120 }, { "epoch": 3.85, "learning_rate": 4.951912313432836e-05, "loss": 0.0334, "step": 4124 }, { "epoch": 3.85, "learning_rate": 4.951865671641791e-05, "loss": 0.0324, "step": 4128 }, { "epoch": 3.85, "learning_rate": 4.9518190298507464e-05, "loss": 0.0235, "step": 4132 }, { "epoch": 3.86, "learning_rate": 4.951772388059702e-05, "loss": 0.0456, "step": 4136 }, { "epoch": 3.86, "learning_rate": 4.951725746268657e-05, "loss": 0.029, "step": 4140 }, { "epoch": 3.87, "learning_rate": 4.951679104477612e-05, "loss": 0.039, "step": 4144 }, { "epoch": 3.87, "learning_rate": 4.951632462686567e-05, "loss": 0.0576, "step": 4148 }, { "epoch": 3.87, "learning_rate": 4.9515858208955225e-05, "loss": 0.0086, "step": 4152 }, { "epoch": 3.88, "learning_rate": 4.951539179104478e-05, "loss": 0.0461, "step": 4156 }, { "epoch": 3.88, "learning_rate": 4.951492537313433e-05, "loss": 0.0401, "step": 4160 }, { "epoch": 3.88, "learning_rate": 4.951445895522388e-05, "loss": 0.0242, "step": 4164 }, { "epoch": 3.89, "learning_rate": 4.951399253731344e-05, "loss": 0.0193, "step": 4168 }, { "epoch": 3.89, "learning_rate": 4.9513526119402986e-05, "loss": 0.018, "step": 4172 }, { "epoch": 3.9, "learning_rate": 4.951305970149254e-05, "loss": 0.0306, "step": 4176 }, { "epoch": 3.9, "learning_rate": 4.9512593283582096e-05, "loss": 0.0333, "step": 4180 }, { "epoch": 3.9, "learning_rate": 4.9512126865671644e-05, "loss": 0.0168, "step": 4184 }, { "epoch": 3.91, "learning_rate": 4.95116604477612e-05, "loss": 0.0275, "step": 4188 }, { "epoch": 3.91, "learning_rate": 4.951119402985075e-05, "loss": 0.0228, "step": 4192 }, { "epoch": 3.91, "learning_rate": 4.95107276119403e-05, "loss": 0.0351, "step": 4196 }, { "epoch": 3.92, "learning_rate": 4.951026119402986e-05, "loss": 0.0217, "step": 4200 }, { "epoch": 3.92, "learning_rate": 4.9509794776119405e-05, "loss": 0.0157, "step": 4204 }, { "epoch": 3.93, "learning_rate": 4.950932835820895e-05, "loss": 0.031, "step": 4208 }, { "epoch": 3.93, "learning_rate": 4.9508861940298515e-05, "loss": 0.0352, "step": 4212 }, { "epoch": 3.93, "learning_rate": 4.950839552238806e-05, "loss": 0.0201, "step": 4216 }, { "epoch": 3.94, "learning_rate": 4.950792910447761e-05, "loss": 0.03, "step": 4220 }, { "epoch": 3.94, "learning_rate": 4.9507462686567166e-05, "loss": 0.0345, "step": 4224 }, { "epoch": 3.94, "learning_rate": 4.950699626865672e-05, "loss": 0.043, "step": 4228 }, { "epoch": 3.95, "learning_rate": 4.950652985074627e-05, "loss": 0.0389, "step": 4232 }, { "epoch": 3.95, "learning_rate": 4.9506063432835824e-05, "loss": 0.0203, "step": 4236 }, { "epoch": 3.96, "learning_rate": 4.950559701492538e-05, "loss": 0.0464, "step": 4240 }, { "epoch": 3.96, "learning_rate": 4.950513059701493e-05, "loss": 0.0171, "step": 4244 }, { "epoch": 3.96, "learning_rate": 4.950466417910448e-05, "loss": 0.0478, "step": 4248 }, { "epoch": 3.97, "learning_rate": 4.950419776119403e-05, "loss": 0.0148, "step": 4252 }, { "epoch": 3.97, "learning_rate": 4.9503731343283585e-05, "loss": 0.0182, "step": 4256 }, { "epoch": 3.97, "learning_rate": 4.950326492537314e-05, "loss": 0.0165, "step": 4260 }, { "epoch": 3.98, "learning_rate": 4.950279850746269e-05, "loss": 0.0097, "step": 4264 }, { "epoch": 3.98, "learning_rate": 4.9502332089552236e-05, "loss": 0.0343, "step": 4268 }, { "epoch": 3.98, "learning_rate": 4.95018656716418e-05, "loss": 0.0079, "step": 4272 }, { "epoch": 3.99, "learning_rate": 4.9501399253731346e-05, "loss": 0.0332, "step": 4276 }, { "epoch": 3.99, "learning_rate": 4.9500932835820894e-05, "loss": 0.0205, "step": 4280 }, { "epoch": 4.0, "learning_rate": 4.950046641791045e-05, "loss": 0.0333, "step": 4284 }, { "epoch": 4.0, "learning_rate": 4.9500000000000004e-05, "loss": 0.0344, "step": 4288 }, { "epoch": 4.0, "learning_rate": 4.949953358208955e-05, "loss": 0.016, "step": 4292 }, { "epoch": 4.01, "learning_rate": 4.949906716417911e-05, "loss": 0.0103, "step": 4296 }, { "epoch": 4.01, "learning_rate": 4.949860074626866e-05, "loss": 0.0192, "step": 4300 }, { "epoch": 4.01, "learning_rate": 4.949813432835821e-05, "loss": 0.0157, "step": 4304 }, { "epoch": 4.02, "learning_rate": 4.9497667910447765e-05, "loss": 0.0178, "step": 4308 }, { "epoch": 4.02, "learning_rate": 4.949720149253731e-05, "loss": 0.0073, "step": 4312 }, { "epoch": 4.03, "learning_rate": 4.949673507462687e-05, "loss": 0.0299, "step": 4316 }, { "epoch": 4.03, "learning_rate": 4.949626865671642e-05, "loss": 0.0182, "step": 4320 }, { "epoch": 4.03, "learning_rate": 4.949580223880597e-05, "loss": 0.0246, "step": 4324 }, { "epoch": 4.04, "learning_rate": 4.949533582089552e-05, "loss": 0.0322, "step": 4328 }, { "epoch": 4.04, "learning_rate": 4.949486940298508e-05, "loss": 0.0391, "step": 4332 }, { "epoch": 4.04, "learning_rate": 4.949440298507463e-05, "loss": 0.0207, "step": 4336 }, { "epoch": 4.05, "learning_rate": 4.9493936567164184e-05, "loss": 0.0201, "step": 4340 }, { "epoch": 4.05, "learning_rate": 4.949347014925373e-05, "loss": 0.0194, "step": 4344 }, { "epoch": 4.06, "learning_rate": 4.949300373134329e-05, "loss": 0.0162, "step": 4348 }, { "epoch": 4.06, "learning_rate": 4.949253731343284e-05, "loss": 0.0393, "step": 4352 }, { "epoch": 4.06, "learning_rate": 4.949207089552239e-05, "loss": 0.0156, "step": 4356 }, { "epoch": 4.07, "learning_rate": 4.9491604477611945e-05, "loss": 0.0182, "step": 4360 }, { "epoch": 4.07, "learning_rate": 4.94911380597015e-05, "loss": 0.0119, "step": 4364 }, { "epoch": 4.07, "learning_rate": 4.949067164179105e-05, "loss": 0.0116, "step": 4368 }, { "epoch": 4.08, "learning_rate": 4.9490205223880596e-05, "loss": 0.0179, "step": 4372 }, { "epoch": 4.08, "learning_rate": 4.948973880597015e-05, "loss": 0.0153, "step": 4376 }, { "epoch": 4.09, "learning_rate": 4.9489272388059706e-05, "loss": 0.0362, "step": 4380 }, { "epoch": 4.09, "learning_rate": 4.9488805970149254e-05, "loss": 0.0082, "step": 4384 }, { "epoch": 4.09, "learning_rate": 4.948833955223881e-05, "loss": 0.0287, "step": 4388 }, { "epoch": 4.1, "learning_rate": 4.9487873134328364e-05, "loss": 0.0125, "step": 4392 }, { "epoch": 4.1, "learning_rate": 4.948740671641791e-05, "loss": 0.0094, "step": 4396 }, { "epoch": 4.1, "learning_rate": 4.948694029850747e-05, "loss": 0.0187, "step": 4400 }, { "epoch": 4.11, "learning_rate": 4.9486473880597015e-05, "loss": 0.0197, "step": 4404 }, { "epoch": 4.11, "learning_rate": 4.948600746268657e-05, "loss": 0.0104, "step": 4408 }, { "epoch": 4.12, "learning_rate": 4.9485541044776125e-05, "loss": 0.0424, "step": 4412 }, { "epoch": 4.12, "learning_rate": 4.948507462686567e-05, "loss": 0.0181, "step": 4416 }, { "epoch": 4.12, "learning_rate": 4.948460820895523e-05, "loss": 0.0187, "step": 4420 }, { "epoch": 4.13, "learning_rate": 4.948414179104478e-05, "loss": 0.0154, "step": 4424 }, { "epoch": 4.13, "learning_rate": 4.948367537313433e-05, "loss": 0.0218, "step": 4428 }, { "epoch": 4.13, "learning_rate": 4.948320895522388e-05, "loss": 0.0119, "step": 4432 }, { "epoch": 4.14, "learning_rate": 4.9482742537313434e-05, "loss": 0.0215, "step": 4436 }, { "epoch": 4.14, "learning_rate": 4.948227611940299e-05, "loss": 0.0112, "step": 4440 }, { "epoch": 4.15, "learning_rate": 4.948180970149254e-05, "loss": 0.0187, "step": 4444 }, { "epoch": 4.15, "learning_rate": 4.948134328358209e-05, "loss": 0.0083, "step": 4448 }, { "epoch": 4.15, "learning_rate": 4.9480876865671647e-05, "loss": 0.0153, "step": 4452 }, { "epoch": 4.16, "learning_rate": 4.9480410447761195e-05, "loss": 0.0342, "step": 4456 }, { "epoch": 4.16, "learning_rate": 4.947994402985075e-05, "loss": 0.0264, "step": 4460 }, { "epoch": 4.16, "learning_rate": 4.94794776119403e-05, "loss": 0.0149, "step": 4464 }, { "epoch": 4.17, "learning_rate": 4.947901119402985e-05, "loss": 0.0193, "step": 4468 }, { "epoch": 4.17, "learning_rate": 4.947854477611941e-05, "loss": 0.011, "step": 4472 }, { "epoch": 4.18, "learning_rate": 4.9478078358208956e-05, "loss": 0.0224, "step": 4476 }, { "epoch": 4.18, "learning_rate": 4.9477611940298504e-05, "loss": 0.0225, "step": 4480 }, { "epoch": 4.18, "learning_rate": 4.9477145522388066e-05, "loss": 0.0176, "step": 4484 }, { "epoch": 4.19, "learning_rate": 4.9476679104477614e-05, "loss": 0.0262, "step": 4488 }, { "epoch": 4.19, "learning_rate": 4.947621268656716e-05, "loss": 0.0165, "step": 4492 }, { "epoch": 4.19, "learning_rate": 4.947574626865672e-05, "loss": 0.0167, "step": 4496 }, { "epoch": 4.2, "learning_rate": 4.947527985074627e-05, "loss": 0.0262, "step": 4500 }, { "epoch": 4.2, "eval_exact_match": 0.6992263056092843, "eval_exec": 0.7437137330754352, "eval_loss": 0.18752895295619965, "eval_runtime": 1059.8867, "eval_samples_per_second": 0.976, "step": 4500 }, { "epoch": 4.2, "learning_rate": 4.9474813432835827e-05, "loss": 0.0147, "step": 4504 }, { "epoch": 4.21, "learning_rate": 4.9474347014925375e-05, "loss": 0.0179, "step": 4508 }, { "epoch": 4.21, "learning_rate": 4.947388059701493e-05, "loss": 0.0218, "step": 4512 }, { "epoch": 4.21, "learning_rate": 4.9473414179104484e-05, "loss": 0.0346, "step": 4516 }, { "epoch": 4.22, "learning_rate": 4.947294776119403e-05, "loss": 0.0181, "step": 4520 }, { "epoch": 4.22, "learning_rate": 4.947248134328358e-05, "loss": 0.0138, "step": 4524 }, { "epoch": 4.22, "learning_rate": 4.947201492537314e-05, "loss": 0.0184, "step": 4528 }, { "epoch": 4.23, "learning_rate": 4.947154850746269e-05, "loss": 0.0319, "step": 4532 }, { "epoch": 4.23, "learning_rate": 4.947108208955224e-05, "loss": 0.0194, "step": 4536 }, { "epoch": 4.24, "learning_rate": 4.9470615671641794e-05, "loss": 0.0148, "step": 4540 }, { "epoch": 4.24, "learning_rate": 4.947014925373135e-05, "loss": 0.0233, "step": 4544 }, { "epoch": 4.24, "learning_rate": 4.9469682835820897e-05, "loss": 0.0273, "step": 4548 }, { "epoch": 4.25, "learning_rate": 4.946921641791045e-05, "loss": 0.0139, "step": 4552 }, { "epoch": 4.25, "learning_rate": 4.946875e-05, "loss": 0.0158, "step": 4556 }, { "epoch": 4.25, "learning_rate": 4.9468283582089555e-05, "loss": 0.0123, "step": 4560 }, { "epoch": 4.26, "learning_rate": 4.946781716417911e-05, "loss": 0.0205, "step": 4564 }, { "epoch": 4.26, "learning_rate": 4.946735074626866e-05, "loss": 0.0181, "step": 4568 }, { "epoch": 4.26, "learning_rate": 4.946688432835821e-05, "loss": 0.0365, "step": 4572 }, { "epoch": 4.27, "learning_rate": 4.946641791044777e-05, "loss": 0.0174, "step": 4576 }, { "epoch": 4.27, "learning_rate": 4.9465951492537316e-05, "loss": 0.0239, "step": 4580 }, { "epoch": 4.28, "learning_rate": 4.9465485074626864e-05, "loss": 0.0113, "step": 4584 }, { "epoch": 4.28, "learning_rate": 4.9465018656716425e-05, "loss": 0.0309, "step": 4588 }, { "epoch": 4.28, "learning_rate": 4.9464552238805973e-05, "loss": 0.0245, "step": 4592 }, { "epoch": 4.29, "learning_rate": 4.946408582089552e-05, "loss": 0.0252, "step": 4596 }, { "epoch": 4.29, "learning_rate": 4.9463619402985077e-05, "loss": 0.0233, "step": 4600 }, { "epoch": 4.29, "learning_rate": 4.946315298507463e-05, "loss": 0.0214, "step": 4604 }, { "epoch": 4.3, "learning_rate": 4.946268656716418e-05, "loss": 0.0286, "step": 4608 }, { "epoch": 4.3, "learning_rate": 4.9462220149253734e-05, "loss": 0.0199, "step": 4612 }, { "epoch": 4.31, "learning_rate": 4.946175373134328e-05, "loss": 0.0373, "step": 4616 }, { "epoch": 4.31, "learning_rate": 4.946128731343284e-05, "loss": 0.0142, "step": 4620 }, { "epoch": 4.31, "learning_rate": 4.946082089552239e-05, "loss": 0.0189, "step": 4624 }, { "epoch": 4.32, "learning_rate": 4.946035447761194e-05, "loss": 0.0208, "step": 4628 }, { "epoch": 4.32, "learning_rate": 4.9459888059701495e-05, "loss": 0.0197, "step": 4632 }, { "epoch": 4.32, "learning_rate": 4.945942164179105e-05, "loss": 0.0124, "step": 4636 }, { "epoch": 4.33, "learning_rate": 4.94589552238806e-05, "loss": 0.0308, "step": 4640 }, { "epoch": 4.33, "learning_rate": 4.945848880597015e-05, "loss": 0.0192, "step": 4644 }, { "epoch": 4.34, "learning_rate": 4.94580223880597e-05, "loss": 0.0239, "step": 4648 }, { "epoch": 4.34, "learning_rate": 4.9457555970149256e-05, "loss": 0.0087, "step": 4652 }, { "epoch": 4.34, "learning_rate": 4.9457089552238805e-05, "loss": 0.0256, "step": 4656 }, { "epoch": 4.35, "learning_rate": 4.945662313432836e-05, "loss": 0.0396, "step": 4660 }, { "epoch": 4.35, "learning_rate": 4.9456156716417914e-05, "loss": 0.0162, "step": 4664 }, { "epoch": 4.35, "learning_rate": 4.945569029850747e-05, "loss": 0.0233, "step": 4668 }, { "epoch": 4.36, "learning_rate": 4.945522388059702e-05, "loss": 0.0246, "step": 4672 }, { "epoch": 4.36, "learning_rate": 4.9454757462686566e-05, "loss": 0.0165, "step": 4676 }, { "epoch": 4.37, "learning_rate": 4.945429104477613e-05, "loss": 0.027, "step": 4680 }, { "epoch": 4.37, "learning_rate": 4.9453824626865675e-05, "loss": 0.0126, "step": 4684 }, { "epoch": 4.37, "learning_rate": 4.9453358208955224e-05, "loss": 0.0182, "step": 4688 }, { "epoch": 4.38, "learning_rate": 4.945289179104478e-05, "loss": 0.0226, "step": 4692 }, { "epoch": 4.38, "learning_rate": 4.945242537313433e-05, "loss": 0.0264, "step": 4696 }, { "epoch": 4.38, "learning_rate": 4.945195895522388e-05, "loss": 0.0378, "step": 4700 }, { "epoch": 4.39, "learning_rate": 4.9451492537313436e-05, "loss": 0.0161, "step": 4704 }, { "epoch": 4.39, "learning_rate": 4.9451026119402984e-05, "loss": 0.012, "step": 4708 }, { "epoch": 4.4, "learning_rate": 4.945055970149254e-05, "loss": 0.0132, "step": 4712 }, { "epoch": 4.4, "learning_rate": 4.9450093283582094e-05, "loss": 0.0185, "step": 4716 }, { "epoch": 4.4, "learning_rate": 4.944962686567164e-05, "loss": 0.0214, "step": 4720 }, { "epoch": 4.41, "learning_rate": 4.94491604477612e-05, "loss": 0.0057, "step": 4724 }, { "epoch": 4.41, "learning_rate": 4.944869402985075e-05, "loss": 0.0158, "step": 4728 }, { "epoch": 4.41, "learning_rate": 4.94482276119403e-05, "loss": 0.0063, "step": 4732 }, { "epoch": 4.42, "learning_rate": 4.944776119402985e-05, "loss": 0.0059, "step": 4736 }, { "epoch": 4.42, "learning_rate": 4.944729477611941e-05, "loss": 0.03, "step": 4740 }, { "epoch": 4.43, "learning_rate": 4.944682835820896e-05, "loss": 0.0185, "step": 4744 }, { "epoch": 4.43, "learning_rate": 4.9446361940298506e-05, "loss": 0.0241, "step": 4748 }, { "epoch": 4.43, "learning_rate": 4.944589552238806e-05, "loss": 0.0162, "step": 4752 }, { "epoch": 4.44, "learning_rate": 4.9445429104477616e-05, "loss": 0.0138, "step": 4756 }, { "epoch": 4.44, "learning_rate": 4.9444962686567164e-05, "loss": 0.0128, "step": 4760 }, { "epoch": 4.44, "learning_rate": 4.944449626865672e-05, "loss": 0.0107, "step": 4764 }, { "epoch": 4.45, "learning_rate": 4.944402985074627e-05, "loss": 0.0171, "step": 4768 }, { "epoch": 4.45, "learning_rate": 4.944356343283582e-05, "loss": 0.0238, "step": 4772 }, { "epoch": 4.46, "learning_rate": 4.944309701492538e-05, "loss": 0.0075, "step": 4776 }, { "epoch": 4.46, "learning_rate": 4.9442630597014925e-05, "loss": 0.0178, "step": 4780 }, { "epoch": 4.46, "learning_rate": 4.944216417910448e-05, "loss": 0.0115, "step": 4784 }, { "epoch": 4.47, "learning_rate": 4.9441697761194035e-05, "loss": 0.0189, "step": 4788 }, { "epoch": 4.47, "learning_rate": 4.944123134328358e-05, "loss": 0.026, "step": 4792 }, { "epoch": 4.47, "learning_rate": 4.944076492537313e-05, "loss": 0.0286, "step": 4796 }, { "epoch": 4.48, "learning_rate": 4.944029850746269e-05, "loss": 0.0529, "step": 4800 }, { "epoch": 4.48, "learning_rate": 4.943983208955224e-05, "loss": 0.0191, "step": 4804 }, { "epoch": 4.49, "learning_rate": 4.943936567164179e-05, "loss": 0.0323, "step": 4808 }, { "epoch": 4.49, "learning_rate": 4.9438899253731344e-05, "loss": 0.0208, "step": 4812 }, { "epoch": 4.49, "learning_rate": 4.94384328358209e-05, "loss": 0.0217, "step": 4816 }, { "epoch": 4.5, "learning_rate": 4.943796641791045e-05, "loss": 0.0251, "step": 4820 }, { "epoch": 4.5, "learning_rate": 4.94375e-05, "loss": 0.0133, "step": 4824 }, { "epoch": 4.5, "learning_rate": 4.943703358208955e-05, "loss": 0.02, "step": 4828 }, { "epoch": 4.51, "learning_rate": 4.943656716417911e-05, "loss": 0.0258, "step": 4832 }, { "epoch": 4.51, "learning_rate": 4.943610074626866e-05, "loss": 0.0172, "step": 4836 }, { "epoch": 4.51, "learning_rate": 4.943563432835821e-05, "loss": 0.0382, "step": 4840 }, { "epoch": 4.52, "learning_rate": 4.943516791044776e-05, "loss": 0.014, "step": 4844 }, { "epoch": 4.52, "learning_rate": 4.943470149253732e-05, "loss": 0.0078, "step": 4848 }, { "epoch": 4.53, "learning_rate": 4.9434235074626866e-05, "loss": 0.0191, "step": 4852 }, { "epoch": 4.53, "learning_rate": 4.943376865671642e-05, "loss": 0.025, "step": 4856 }, { "epoch": 4.53, "learning_rate": 4.9433302238805976e-05, "loss": 0.0247, "step": 4860 }, { "epoch": 4.54, "learning_rate": 4.9432835820895524e-05, "loss": 0.0205, "step": 4864 }, { "epoch": 4.54, "learning_rate": 4.943236940298508e-05, "loss": 0.0087, "step": 4868 }, { "epoch": 4.54, "learning_rate": 4.943190298507463e-05, "loss": 0.0133, "step": 4872 }, { "epoch": 4.55, "learning_rate": 4.943143656716418e-05, "loss": 0.0116, "step": 4876 }, { "epoch": 4.55, "learning_rate": 4.943097014925374e-05, "loss": 0.0378, "step": 4880 }, { "epoch": 4.56, "learning_rate": 4.9430503731343285e-05, "loss": 0.0138, "step": 4884 }, { "epoch": 4.56, "learning_rate": 4.943003731343283e-05, "loss": 0.0109, "step": 4888 }, { "epoch": 4.56, "learning_rate": 4.9429570895522395e-05, "loss": 0.0393, "step": 4892 }, { "epoch": 4.57, "learning_rate": 4.942910447761194e-05, "loss": 0.0111, "step": 4896 }, { "epoch": 4.57, "learning_rate": 4.942863805970149e-05, "loss": 0.0159, "step": 4900 }, { "epoch": 4.57, "learning_rate": 4.9428171641791046e-05, "loss": 0.0169, "step": 4904 }, { "epoch": 4.58, "learning_rate": 4.94277052238806e-05, "loss": 0.0271, "step": 4908 }, { "epoch": 4.58, "learning_rate": 4.942723880597015e-05, "loss": 0.0064, "step": 4912 }, { "epoch": 4.59, "learning_rate": 4.9426772388059704e-05, "loss": 0.016, "step": 4916 }, { "epoch": 4.59, "learning_rate": 4.942630597014926e-05, "loss": 0.0194, "step": 4920 }, { "epoch": 4.59, "learning_rate": 4.942583955223881e-05, "loss": 0.0241, "step": 4924 }, { "epoch": 4.6, "learning_rate": 4.942537313432836e-05, "loss": 0.0217, "step": 4928 }, { "epoch": 4.6, "learning_rate": 4.942490671641791e-05, "loss": 0.0054, "step": 4932 }, { "epoch": 4.6, "learning_rate": 4.9424440298507465e-05, "loss": 0.0156, "step": 4936 }, { "epoch": 4.61, "learning_rate": 4.942397388059702e-05, "loss": 0.016, "step": 4940 }, { "epoch": 4.61, "learning_rate": 4.942350746268657e-05, "loss": 0.0283, "step": 4944 }, { "epoch": 4.62, "learning_rate": 4.9423041044776116e-05, "loss": 0.0189, "step": 4948 }, { "epoch": 4.62, "learning_rate": 4.942257462686568e-05, "loss": 0.0236, "step": 4952 }, { "epoch": 4.62, "learning_rate": 4.9422108208955226e-05, "loss": 0.0101, "step": 4956 }, { "epoch": 4.63, "learning_rate": 4.9421641791044774e-05, "loss": 0.0299, "step": 4960 }, { "epoch": 4.63, "learning_rate": 4.942117537313433e-05, "loss": 0.0215, "step": 4964 }, { "epoch": 4.63, "learning_rate": 4.9420708955223884e-05, "loss": 0.0109, "step": 4968 }, { "epoch": 4.64, "learning_rate": 4.942024253731343e-05, "loss": 0.0157, "step": 4972 }, { "epoch": 4.64, "learning_rate": 4.941977611940299e-05, "loss": 0.0241, "step": 4976 }, { "epoch": 4.65, "learning_rate": 4.941930970149254e-05, "loss": 0.0279, "step": 4980 }, { "epoch": 4.65, "learning_rate": 4.941884328358209e-05, "loss": 0.0318, "step": 4984 }, { "epoch": 4.65, "learning_rate": 4.9418376865671645e-05, "loss": 0.022, "step": 4988 }, { "epoch": 4.66, "learning_rate": 4.941791044776119e-05, "loss": 0.0137, "step": 4992 }, { "epoch": 4.66, "learning_rate": 4.941744402985075e-05, "loss": 0.0226, "step": 4996 }, { "epoch": 4.66, "learning_rate": 4.94169776119403e-05, "loss": 0.009, "step": 5000 }, { "epoch": 4.66, "eval_exact_match": 0.7030947775628626, "eval_exec": 0.746615087040619, "eval_loss": 0.18529821932315826, "eval_runtime": 1152.0827, "eval_samples_per_second": 0.898, "step": 5000 }, { "epoch": 4.67, "learning_rate": 4.941651119402985e-05, "loss": 0.0145, "step": 5004 }, { "epoch": 4.67, "learning_rate": 4.9416044776119406e-05, "loss": 0.0094, "step": 5008 }, { "epoch": 4.68, "learning_rate": 4.941557835820896e-05, "loss": 0.0242, "step": 5012 }, { "epoch": 4.68, "learning_rate": 4.941511194029851e-05, "loss": 0.0222, "step": 5016 }, { "epoch": 4.68, "learning_rate": 4.9414645522388064e-05, "loss": 0.0191, "step": 5020 }, { "epoch": 4.69, "learning_rate": 4.941417910447761e-05, "loss": 0.0174, "step": 5024 }, { "epoch": 4.69, "learning_rate": 4.941371268656717e-05, "loss": 0.0283, "step": 5028 }, { "epoch": 4.69, "learning_rate": 4.941324626865672e-05, "loss": 0.0104, "step": 5032 }, { "epoch": 4.7, "learning_rate": 4.941277985074627e-05, "loss": 0.0104, "step": 5036 }, { "epoch": 4.7, "learning_rate": 4.9412313432835825e-05, "loss": 0.0192, "step": 5040 }, { "epoch": 4.71, "learning_rate": 4.941184701492538e-05, "loss": 0.0129, "step": 5044 }, { "epoch": 4.71, "learning_rate": 4.941138059701493e-05, "loss": 0.0177, "step": 5048 }, { "epoch": 4.71, "learning_rate": 4.9410914179104476e-05, "loss": 0.0157, "step": 5052 }, { "epoch": 4.72, "learning_rate": 4.941044776119403e-05, "loss": 0.0169, "step": 5056 }, { "epoch": 4.72, "learning_rate": 4.9409981343283586e-05, "loss": 0.0124, "step": 5060 }, { "epoch": 4.72, "learning_rate": 4.9409514925373134e-05, "loss": 0.0167, "step": 5064 }, { "epoch": 4.73, "learning_rate": 4.940904850746269e-05, "loss": 0.0112, "step": 5068 }, { "epoch": 4.73, "learning_rate": 4.9408582089552244e-05, "loss": 0.0228, "step": 5072 }, { "epoch": 4.73, "learning_rate": 4.940811567164179e-05, "loss": 0.0211, "step": 5076 }, { "epoch": 4.74, "learning_rate": 4.940764925373135e-05, "loss": 0.0306, "step": 5080 }, { "epoch": 4.74, "learning_rate": 4.9407182835820895e-05, "loss": 0.0173, "step": 5084 }, { "epoch": 4.75, "learning_rate": 4.940671641791045e-05, "loss": 0.0061, "step": 5088 }, { "epoch": 4.75, "learning_rate": 4.9406250000000005e-05, "loss": 0.0432, "step": 5092 }, { "epoch": 4.75, "learning_rate": 4.940578358208955e-05, "loss": 0.0349, "step": 5096 }, { "epoch": 4.76, "learning_rate": 4.940531716417911e-05, "loss": 0.0483, "step": 5100 }, { "epoch": 4.76, "learning_rate": 4.940485074626866e-05, "loss": 0.0176, "step": 5104 }, { "epoch": 4.76, "learning_rate": 4.940438432835821e-05, "loss": 0.0256, "step": 5108 }, { "epoch": 4.77, "learning_rate": 4.940391791044776e-05, "loss": 0.0236, "step": 5112 }, { "epoch": 4.77, "learning_rate": 4.9403451492537314e-05, "loss": 0.0167, "step": 5116 }, { "epoch": 4.78, "learning_rate": 4.940298507462687e-05, "loss": 0.0294, "step": 5120 }, { "epoch": 4.78, "learning_rate": 4.940251865671642e-05, "loss": 0.0245, "step": 5124 }, { "epoch": 4.78, "learning_rate": 4.940205223880597e-05, "loss": 0.0169, "step": 5128 }, { "epoch": 4.79, "learning_rate": 4.940158582089553e-05, "loss": 0.0202, "step": 5132 }, { "epoch": 4.79, "learning_rate": 4.9401119402985075e-05, "loss": 0.0286, "step": 5136 }, { "epoch": 4.79, "learning_rate": 4.940065298507463e-05, "loss": 0.0245, "step": 5140 }, { "epoch": 4.8, "learning_rate": 4.940018656716418e-05, "loss": 0.0287, "step": 5144 }, { "epoch": 4.8, "learning_rate": 4.939972014925374e-05, "loss": 0.0185, "step": 5148 }, { "epoch": 4.81, "learning_rate": 4.939925373134329e-05, "loss": 0.0274, "step": 5152 }, { "epoch": 4.81, "learning_rate": 4.9398787313432836e-05, "loss": 0.0052, "step": 5156 }, { "epoch": 4.81, "learning_rate": 4.939832089552239e-05, "loss": 0.0217, "step": 5160 }, { "epoch": 4.82, "learning_rate": 4.9397854477611946e-05, "loss": 0.0214, "step": 5164 }, { "epoch": 4.82, "learning_rate": 4.9397388059701494e-05, "loss": 0.0243, "step": 5168 }, { "epoch": 4.82, "learning_rate": 4.939692164179105e-05, "loss": 0.0253, "step": 5172 }, { "epoch": 4.83, "learning_rate": 4.93964552238806e-05, "loss": 0.013, "step": 5176 }, { "epoch": 4.83, "learning_rate": 4.939598880597015e-05, "loss": 0.0193, "step": 5180 }, { "epoch": 4.84, "learning_rate": 4.939552238805971e-05, "loss": 0.0098, "step": 5184 }, { "epoch": 4.84, "learning_rate": 4.9395055970149255e-05, "loss": 0.0193, "step": 5188 }, { "epoch": 4.84, "learning_rate": 4.939458955223881e-05, "loss": 0.0238, "step": 5192 }, { "epoch": 4.85, "learning_rate": 4.9394123134328365e-05, "loss": 0.0204, "step": 5196 }, { "epoch": 4.85, "learning_rate": 4.939365671641791e-05, "loss": 0.0424, "step": 5200 }, { "epoch": 4.85, "learning_rate": 4.939319029850746e-05, "loss": 0.012, "step": 5204 }, { "epoch": 4.86, "learning_rate": 4.939272388059702e-05, "loss": 0.0168, "step": 5208 }, { "epoch": 4.86, "learning_rate": 4.939225746268657e-05, "loss": 0.0328, "step": 5212 }, { "epoch": 4.87, "learning_rate": 4.939179104477612e-05, "loss": 0.0151, "step": 5216 }, { "epoch": 4.87, "learning_rate": 4.9391324626865674e-05, "loss": 0.0222, "step": 5220 }, { "epoch": 4.87, "learning_rate": 4.939085820895523e-05, "loss": 0.0315, "step": 5224 }, { "epoch": 4.88, "learning_rate": 4.939039179104478e-05, "loss": 0.0255, "step": 5228 }, { "epoch": 4.88, "learning_rate": 4.938992537313433e-05, "loss": 0.0187, "step": 5232 }, { "epoch": 4.88, "learning_rate": 4.938945895522388e-05, "loss": 0.0132, "step": 5236 }, { "epoch": 4.89, "learning_rate": 4.9388992537313435e-05, "loss": 0.0296, "step": 5240 }, { "epoch": 4.89, "learning_rate": 4.938852611940299e-05, "loss": 0.0223, "step": 5244 }, { "epoch": 4.9, "learning_rate": 4.938805970149254e-05, "loss": 0.0189, "step": 5248 }, { "epoch": 4.9, "learning_rate": 4.938759328358209e-05, "loss": 0.0232, "step": 5252 }, { "epoch": 4.9, "learning_rate": 4.938712686567165e-05, "loss": 0.0243, "step": 5256 }, { "epoch": 4.91, "learning_rate": 4.9386660447761196e-05, "loss": 0.0218, "step": 5260 }, { "epoch": 4.91, "learning_rate": 4.9386194029850744e-05, "loss": 0.0371, "step": 5264 }, { "epoch": 4.91, "learning_rate": 4.9385727611940306e-05, "loss": 0.0128, "step": 5268 }, { "epoch": 4.92, "learning_rate": 4.9385261194029854e-05, "loss": 0.0266, "step": 5272 }, { "epoch": 4.92, "learning_rate": 4.93847947761194e-05, "loss": 0.0152, "step": 5276 }, { "epoch": 4.93, "learning_rate": 4.938432835820896e-05, "loss": 0.019, "step": 5280 }, { "epoch": 4.93, "learning_rate": 4.938386194029851e-05, "loss": 0.0055, "step": 5284 }, { "epoch": 4.93, "learning_rate": 4.938339552238806e-05, "loss": 0.023, "step": 5288 }, { "epoch": 4.94, "learning_rate": 4.9382929104477615e-05, "loss": 0.011, "step": 5292 }, { "epoch": 4.94, "learning_rate": 4.938246268656716e-05, "loss": 0.0152, "step": 5296 }, { "epoch": 4.94, "learning_rate": 4.938199626865672e-05, "loss": 0.0356, "step": 5300 }, { "epoch": 4.95, "learning_rate": 4.938152985074627e-05, "loss": 0.0317, "step": 5304 }, { "epoch": 4.95, "learning_rate": 4.938106343283582e-05, "loss": 0.0347, "step": 5308 }, { "epoch": 4.96, "learning_rate": 4.9380597014925376e-05, "loss": 0.0067, "step": 5312 }, { "epoch": 4.96, "learning_rate": 4.938013059701493e-05, "loss": 0.0202, "step": 5316 }, { "epoch": 4.96, "learning_rate": 4.937966417910448e-05, "loss": 0.0275, "step": 5320 }, { "epoch": 4.97, "learning_rate": 4.9379197761194034e-05, "loss": 0.0165, "step": 5324 }, { "epoch": 4.97, "learning_rate": 4.937873134328358e-05, "loss": 0.019, "step": 5328 }, { "epoch": 4.97, "learning_rate": 4.937826492537314e-05, "loss": 0.0157, "step": 5332 }, { "epoch": 4.98, "learning_rate": 4.937779850746269e-05, "loss": 0.0303, "step": 5336 }, { "epoch": 4.98, "learning_rate": 4.937733208955224e-05, "loss": 0.0178, "step": 5340 }, { "epoch": 4.98, "learning_rate": 4.9376865671641795e-05, "loss": 0.0162, "step": 5344 }, { "epoch": 4.99, "learning_rate": 4.937639925373135e-05, "loss": 0.0235, "step": 5348 }, { "epoch": 4.99, "learning_rate": 4.93759328358209e-05, "loss": 0.0154, "step": 5352 }, { "epoch": 5.0, "learning_rate": 4.9375466417910446e-05, "loss": 0.0167, "step": 5356 }, { "epoch": 5.0, "learning_rate": 4.937500000000001e-05, "loss": 0.0238, "step": 5360 }, { "epoch": 5.0, "learning_rate": 4.9374533582089556e-05, "loss": 0.0116, "step": 5364 }, { "epoch": 5.01, "learning_rate": 4.9374067164179104e-05, "loss": 0.0073, "step": 5368 }, { "epoch": 5.01, "learning_rate": 4.937360074626866e-05, "loss": 0.0178, "step": 5372 }, { "epoch": 5.01, "learning_rate": 4.9373134328358214e-05, "loss": 0.0102, "step": 5376 }, { "epoch": 5.02, "learning_rate": 4.937266791044776e-05, "loss": 0.0109, "step": 5380 }, { "epoch": 5.02, "learning_rate": 4.937220149253732e-05, "loss": 0.0288, "step": 5384 }, { "epoch": 5.03, "learning_rate": 4.9371735074626865e-05, "loss": 0.0073, "step": 5388 }, { "epoch": 5.03, "learning_rate": 4.937126865671642e-05, "loss": 0.0181, "step": 5392 }, { "epoch": 5.03, "learning_rate": 4.9370802238805975e-05, "loss": 0.0159, "step": 5396 }, { "epoch": 5.04, "learning_rate": 4.937033582089552e-05, "loss": 0.0234, "step": 5400 }, { "epoch": 5.04, "learning_rate": 4.936986940298508e-05, "loss": 0.0203, "step": 5404 }, { "epoch": 5.04, "learning_rate": 4.936940298507463e-05, "loss": 0.0132, "step": 5408 }, { "epoch": 5.05, "learning_rate": 4.936893656716418e-05, "loss": 0.0069, "step": 5412 }, { "epoch": 5.05, "learning_rate": 4.936847014925373e-05, "loss": 0.0182, "step": 5416 }, { "epoch": 5.06, "learning_rate": 4.936800373134329e-05, "loss": 0.0064, "step": 5420 }, { "epoch": 5.06, "learning_rate": 4.936753731343284e-05, "loss": 0.0057, "step": 5424 }, { "epoch": 5.06, "learning_rate": 4.936707089552239e-05, "loss": 0.0206, "step": 5428 }, { "epoch": 5.07, "learning_rate": 4.936660447761194e-05, "loss": 0.0148, "step": 5432 }, { "epoch": 5.07, "learning_rate": 4.9366138059701497e-05, "loss": 0.0233, "step": 5436 }, { "epoch": 5.07, "learning_rate": 4.9365671641791045e-05, "loss": 0.0297, "step": 5440 }, { "epoch": 5.08, "learning_rate": 4.93652052238806e-05, "loss": 0.0138, "step": 5444 }, { "epoch": 5.08, "learning_rate": 4.936473880597015e-05, "loss": 0.0223, "step": 5448 }, { "epoch": 5.09, "learning_rate": 4.93642723880597e-05, "loss": 0.0104, "step": 5452 }, { "epoch": 5.09, "learning_rate": 4.936380597014926e-05, "loss": 0.0088, "step": 5456 }, { "epoch": 5.09, "learning_rate": 4.9363339552238806e-05, "loss": 0.0276, "step": 5460 }, { "epoch": 5.1, "learning_rate": 4.936287313432836e-05, "loss": 0.0533, "step": 5464 }, { "epoch": 5.1, "learning_rate": 4.9362406716417916e-05, "loss": 0.0108, "step": 5468 }, { "epoch": 5.1, "learning_rate": 4.9361940298507464e-05, "loss": 0.0145, "step": 5472 }, { "epoch": 5.11, "learning_rate": 4.936147388059702e-05, "loss": 0.0045, "step": 5476 }, { "epoch": 5.11, "learning_rate": 4.9361007462686573e-05, "loss": 0.0249, "step": 5480 }, { "epoch": 5.12, "learning_rate": 4.936054104477612e-05, "loss": 0.0071, "step": 5484 }, { "epoch": 5.12, "learning_rate": 4.9360074626865676e-05, "loss": 0.0101, "step": 5488 }, { "epoch": 5.12, "learning_rate": 4.9359608208955225e-05, "loss": 0.0113, "step": 5492 }, { "epoch": 5.13, "learning_rate": 4.935914179104478e-05, "loss": 0.0135, "step": 5496 }, { "epoch": 5.13, "learning_rate": 4.9358675373134334e-05, "loss": 0.0129, "step": 5500 }, { "epoch": 5.13, "eval_exact_match": 0.723404255319149, "eval_exec": 0.7562862669245648, "eval_loss": 0.20738151669502258, "eval_runtime": 1207.3594, "eval_samples_per_second": 0.856, "step": 5500 }, { "epoch": 5.13, "learning_rate": 4.935820895522388e-05, "loss": 0.0183, "step": 5504 }, { "epoch": 5.14, "learning_rate": 4.935774253731343e-05, "loss": 0.0204, "step": 5508 }, { "epoch": 5.14, "learning_rate": 4.935727611940299e-05, "loss": 0.0059, "step": 5512 }, { "epoch": 5.15, "learning_rate": 4.935680970149254e-05, "loss": 0.017, "step": 5516 }, { "epoch": 5.15, "learning_rate": 4.935634328358209e-05, "loss": 0.0125, "step": 5520 }, { "epoch": 5.15, "learning_rate": 4.9355876865671644e-05, "loss": 0.022, "step": 5524 }, { "epoch": 5.16, "learning_rate": 4.93554104477612e-05, "loss": 0.0079, "step": 5528 }, { "epoch": 5.16, "learning_rate": 4.9354944029850747e-05, "loss": 0.0082, "step": 5532 }, { "epoch": 5.16, "learning_rate": 4.93544776119403e-05, "loss": 0.0078, "step": 5536 }, { "epoch": 5.17, "learning_rate": 4.9354011194029856e-05, "loss": 0.0172, "step": 5540 }, { "epoch": 5.17, "learning_rate": 4.9353544776119405e-05, "loss": 0.0133, "step": 5544 }, { "epoch": 5.18, "learning_rate": 4.935307835820896e-05, "loss": 0.0171, "step": 5548 }, { "epoch": 5.18, "learning_rate": 4.935261194029851e-05, "loss": 0.0192, "step": 5552 }, { "epoch": 5.18, "learning_rate": 4.935214552238806e-05, "loss": 0.0066, "step": 5556 }, { "epoch": 5.19, "learning_rate": 4.935167910447762e-05, "loss": 0.019, "step": 5560 }, { "epoch": 5.19, "learning_rate": 4.9351212686567166e-05, "loss": 0.0155, "step": 5564 }, { "epoch": 5.19, "learning_rate": 4.9350746268656714e-05, "loss": 0.0426, "step": 5568 }, { "epoch": 5.2, "learning_rate": 4.9350279850746275e-05, "loss": 0.0177, "step": 5572 }, { "epoch": 5.2, "learning_rate": 4.9349813432835823e-05, "loss": 0.0092, "step": 5576 }, { "epoch": 5.21, "learning_rate": 4.934934701492537e-05, "loss": 0.0128, "step": 5580 }, { "epoch": 5.21, "learning_rate": 4.9348880597014927e-05, "loss": 0.0208, "step": 5584 }, { "epoch": 5.21, "learning_rate": 4.934841417910448e-05, "loss": 0.0207, "step": 5588 }, { "epoch": 5.22, "learning_rate": 4.934794776119403e-05, "loss": 0.0251, "step": 5592 }, { "epoch": 5.22, "learning_rate": 4.9347481343283584e-05, "loss": 0.0099, "step": 5596 }, { "epoch": 5.22, "learning_rate": 4.934701492537314e-05, "loss": 0.0138, "step": 5600 }, { "epoch": 5.23, "learning_rate": 4.934654850746269e-05, "loss": 0.0153, "step": 5604 }, { "epoch": 5.23, "learning_rate": 4.934608208955224e-05, "loss": 0.0268, "step": 5608 }, { "epoch": 5.24, "learning_rate": 4.934561567164179e-05, "loss": 0.0149, "step": 5612 }, { "epoch": 5.24, "learning_rate": 4.9345149253731345e-05, "loss": 0.0148, "step": 5616 }, { "epoch": 5.24, "learning_rate": 4.93446828358209e-05, "loss": 0.0114, "step": 5620 }, { "epoch": 5.25, "learning_rate": 4.934421641791045e-05, "loss": 0.0064, "step": 5624 }, { "epoch": 5.25, "learning_rate": 4.9343749999999997e-05, "loss": 0.0176, "step": 5628 }, { "epoch": 5.25, "learning_rate": 4.934328358208956e-05, "loss": 0.0175, "step": 5632 }, { "epoch": 5.26, "learning_rate": 4.9342817164179106e-05, "loss": 0.028, "step": 5636 }, { "epoch": 5.26, "learning_rate": 4.934235074626866e-05, "loss": 0.0124, "step": 5640 }, { "epoch": 5.26, "learning_rate": 4.934188432835821e-05, "loss": 0.0196, "step": 5644 }, { "epoch": 5.27, "learning_rate": 4.9341417910447764e-05, "loss": 0.0129, "step": 5648 }, { "epoch": 5.27, "learning_rate": 4.934095149253732e-05, "loss": 0.0173, "step": 5652 }, { "epoch": 5.28, "learning_rate": 4.934048507462687e-05, "loss": 0.042, "step": 5656 }, { "epoch": 5.28, "learning_rate": 4.934001865671642e-05, "loss": 0.0103, "step": 5660 }, { "epoch": 5.28, "learning_rate": 4.933955223880598e-05, "loss": 0.0108, "step": 5664 }, { "epoch": 5.29, "learning_rate": 4.9339085820895525e-05, "loss": 0.0123, "step": 5668 }, { "epoch": 5.29, "learning_rate": 4.9338619402985073e-05, "loss": 0.0061, "step": 5672 }, { "epoch": 5.29, "learning_rate": 4.933815298507463e-05, "loss": 0.0136, "step": 5676 }, { "epoch": 5.3, "learning_rate": 4.933768656716418e-05, "loss": 0.0152, "step": 5680 }, { "epoch": 5.3, "learning_rate": 4.933722014925373e-05, "loss": 0.0209, "step": 5684 }, { "epoch": 5.31, "learning_rate": 4.9336753731343286e-05, "loss": 0.0259, "step": 5688 }, { "epoch": 5.31, "learning_rate": 4.933628731343284e-05, "loss": 0.0155, "step": 5692 }, { "epoch": 5.31, "learning_rate": 4.933582089552239e-05, "loss": 0.0257, "step": 5696 }, { "epoch": 5.32, "learning_rate": 4.9335354477611944e-05, "loss": 0.0359, "step": 5700 }, { "epoch": 5.32, "learning_rate": 4.933488805970149e-05, "loss": 0.0091, "step": 5704 }, { "epoch": 5.32, "learning_rate": 4.933442164179105e-05, "loss": 0.021, "step": 5708 }, { "epoch": 5.33, "learning_rate": 4.93339552238806e-05, "loss": 0.0175, "step": 5712 }, { "epoch": 5.33, "learning_rate": 4.933348880597015e-05, "loss": 0.0134, "step": 5716 }, { "epoch": 5.34, "learning_rate": 4.9333022388059705e-05, "loss": 0.0152, "step": 5720 }, { "epoch": 5.34, "learning_rate": 4.933255597014926e-05, "loss": 0.014, "step": 5724 }, { "epoch": 5.34, "learning_rate": 4.933208955223881e-05, "loss": 0.0205, "step": 5728 }, { "epoch": 5.35, "learning_rate": 4.9331623134328356e-05, "loss": 0.0145, "step": 5732 }, { "epoch": 5.35, "learning_rate": 4.933115671641791e-05, "loss": 0.0054, "step": 5736 }, { "epoch": 5.35, "learning_rate": 4.9330690298507466e-05, "loss": 0.0197, "step": 5740 }, { "epoch": 5.36, "learning_rate": 4.9330223880597014e-05, "loss": 0.0167, "step": 5744 }, { "epoch": 5.36, "learning_rate": 4.932975746268657e-05, "loss": 0.0326, "step": 5748 }, { "epoch": 5.37, "learning_rate": 4.9329291044776124e-05, "loss": 0.0252, "step": 5752 }, { "epoch": 5.37, "learning_rate": 4.932882462686567e-05, "loss": 0.0169, "step": 5756 }, { "epoch": 5.37, "learning_rate": 4.932835820895523e-05, "loss": 0.0097, "step": 5760 }, { "epoch": 5.38, "learning_rate": 4.9327891791044775e-05, "loss": 0.0059, "step": 5764 }, { "epoch": 5.38, "learning_rate": 4.932742537313433e-05, "loss": 0.0128, "step": 5768 }, { "epoch": 5.38, "learning_rate": 4.9326958955223885e-05, "loss": 0.012, "step": 5772 }, { "epoch": 5.39, "learning_rate": 4.932649253731343e-05, "loss": 0.0202, "step": 5776 }, { "epoch": 5.39, "learning_rate": 4.932602611940299e-05, "loss": 0.0112, "step": 5780 }, { "epoch": 5.4, "learning_rate": 4.932555970149254e-05, "loss": 0.0114, "step": 5784 }, { "epoch": 5.4, "learning_rate": 4.932509328358209e-05, "loss": 0.0085, "step": 5788 }, { "epoch": 5.4, "learning_rate": 4.932462686567164e-05, "loss": 0.0115, "step": 5792 }, { "epoch": 5.41, "learning_rate": 4.9324160447761194e-05, "loss": 0.0318, "step": 5796 }, { "epoch": 5.41, "learning_rate": 4.932369402985075e-05, "loss": 0.032, "step": 5800 }, { "epoch": 5.41, "learning_rate": 4.9323227611940304e-05, "loss": 0.015, "step": 5804 }, { "epoch": 5.42, "learning_rate": 4.932276119402985e-05, "loss": 0.0194, "step": 5808 }, { "epoch": 5.42, "learning_rate": 4.932229477611941e-05, "loss": 0.017, "step": 5812 }, { "epoch": 5.43, "learning_rate": 4.932182835820896e-05, "loss": 0.0081, "step": 5816 }, { "epoch": 5.43, "learning_rate": 4.932136194029851e-05, "loss": 0.0084, "step": 5820 }, { "epoch": 5.43, "learning_rate": 4.932089552238806e-05, "loss": 0.0069, "step": 5824 }, { "epoch": 5.44, "learning_rate": 4.932042910447762e-05, "loss": 0.0175, "step": 5828 }, { "epoch": 5.44, "learning_rate": 4.931996268656717e-05, "loss": 0.021, "step": 5832 }, { "epoch": 5.44, "learning_rate": 4.9319496268656716e-05, "loss": 0.0136, "step": 5836 }, { "epoch": 5.45, "learning_rate": 4.931902985074627e-05, "loss": 0.012, "step": 5840 }, { "epoch": 5.45, "learning_rate": 4.9318563432835826e-05, "loss": 0.0068, "step": 5844 }, { "epoch": 5.46, "learning_rate": 4.9318097014925374e-05, "loss": 0.0151, "step": 5848 }, { "epoch": 5.46, "learning_rate": 4.931763059701493e-05, "loss": 0.0189, "step": 5852 }, { "epoch": 5.46, "learning_rate": 4.931716417910448e-05, "loss": 0.0198, "step": 5856 }, { "epoch": 5.47, "learning_rate": 4.931669776119403e-05, "loss": 0.0086, "step": 5860 }, { "epoch": 5.47, "learning_rate": 4.931623134328359e-05, "loss": 0.0126, "step": 5864 }, { "epoch": 5.47, "learning_rate": 4.9315764925373135e-05, "loss": 0.0093, "step": 5868 }, { "epoch": 5.48, "learning_rate": 4.931529850746269e-05, "loss": 0.0232, "step": 5872 }, { "epoch": 5.48, "learning_rate": 4.9314832089552245e-05, "loss": 0.0138, "step": 5876 }, { "epoch": 5.49, "learning_rate": 4.931436567164179e-05, "loss": 0.0332, "step": 5880 }, { "epoch": 5.49, "learning_rate": 4.931389925373134e-05, "loss": 0.0102, "step": 5884 }, { "epoch": 5.49, "learning_rate": 4.93134328358209e-05, "loss": 0.015, "step": 5888 }, { "epoch": 5.5, "learning_rate": 4.931296641791045e-05, "loss": 0.0099, "step": 5892 }, { "epoch": 5.5, "learning_rate": 4.93125e-05, "loss": 0.0109, "step": 5896 }, { "epoch": 5.5, "learning_rate": 4.9312033582089554e-05, "loss": 0.0084, "step": 5900 }, { "epoch": 5.51, "learning_rate": 4.931156716417911e-05, "loss": 0.0145, "step": 5904 }, { "epoch": 5.51, "learning_rate": 4.931110074626866e-05, "loss": 0.0085, "step": 5908 }, { "epoch": 5.51, "learning_rate": 4.931063432835821e-05, "loss": 0.0101, "step": 5912 }, { "epoch": 5.52, "learning_rate": 4.931016791044776e-05, "loss": 0.0131, "step": 5916 }, { "epoch": 5.52, "learning_rate": 4.9309701492537315e-05, "loss": 0.021, "step": 5920 }, { "epoch": 5.53, "learning_rate": 4.930923507462687e-05, "loss": 0.0113, "step": 5924 }, { "epoch": 5.53, "learning_rate": 4.930876865671642e-05, "loss": 0.0208, "step": 5928 }, { "epoch": 5.53, "learning_rate": 4.930830223880597e-05, "loss": 0.0187, "step": 5932 }, { "epoch": 5.54, "learning_rate": 4.930783582089553e-05, "loss": 0.0216, "step": 5936 }, { "epoch": 5.54, "learning_rate": 4.9307369402985076e-05, "loss": 0.0101, "step": 5940 }, { "epoch": 5.54, "learning_rate": 4.9306902985074624e-05, "loss": 0.0108, "step": 5944 }, { "epoch": 5.55, "learning_rate": 4.9306436567164186e-05, "loss": 0.0206, "step": 5948 }, { "epoch": 5.55, "learning_rate": 4.9305970149253734e-05, "loss": 0.0134, "step": 5952 }, { "epoch": 5.56, "learning_rate": 4.930550373134328e-05, "loss": 0.0075, "step": 5956 }, { "epoch": 5.56, "learning_rate": 4.930503731343284e-05, "loss": 0.0154, "step": 5960 }, { "epoch": 5.56, "learning_rate": 4.930457089552239e-05, "loss": 0.0136, "step": 5964 }, { "epoch": 5.57, "learning_rate": 4.930410447761195e-05, "loss": 0.011, "step": 5968 }, { "epoch": 5.57, "learning_rate": 4.9303638059701495e-05, "loss": 0.0114, "step": 5972 }, { "epoch": 5.57, "learning_rate": 4.930317164179104e-05, "loss": 0.0133, "step": 5976 }, { "epoch": 5.58, "learning_rate": 4.9302705223880605e-05, "loss": 0.0109, "step": 5980 }, { "epoch": 5.58, "learning_rate": 4.930223880597015e-05, "loss": 0.0084, "step": 5984 }, { "epoch": 5.59, "learning_rate": 4.93017723880597e-05, "loss": 0.0147, "step": 5988 }, { "epoch": 5.59, "learning_rate": 4.9301305970149256e-05, "loss": 0.0139, "step": 5992 }, { "epoch": 5.59, "learning_rate": 4.930083955223881e-05, "loss": 0.009, "step": 5996 }, { "epoch": 5.6, "learning_rate": 4.930037313432836e-05, "loss": 0.0127, "step": 6000 }, { "epoch": 5.6, "eval_exact_match": 0.7224371373307543, "eval_exec": 0.7446808510638298, "eval_loss": 0.20770786702632904, "eval_runtime": 1400.4625, "eval_samples_per_second": 0.738, "step": 6000 }, { "epoch": 5.6, "learning_rate": 4.9299906716417914e-05, "loss": 0.0203, "step": 6004 }, { "epoch": 5.6, "learning_rate": 4.929944029850746e-05, "loss": 0.0078, "step": 6008 }, { "epoch": 5.61, "learning_rate": 4.929897388059702e-05, "loss": 0.0127, "step": 6012 }, { "epoch": 5.61, "learning_rate": 4.929850746268657e-05, "loss": 0.0099, "step": 6016 }, { "epoch": 5.62, "learning_rate": 4.929804104477612e-05, "loss": 0.0149, "step": 6020 }, { "epoch": 5.62, "learning_rate": 4.9297574626865675e-05, "loss": 0.0142, "step": 6024 }, { "epoch": 5.62, "learning_rate": 4.929710820895523e-05, "loss": 0.0129, "step": 6028 }, { "epoch": 5.63, "learning_rate": 4.929664179104478e-05, "loss": 0.015, "step": 6032 }, { "epoch": 5.63, "learning_rate": 4.9296175373134326e-05, "loss": 0.0056, "step": 6036 }, { "epoch": 5.63, "learning_rate": 4.929570895522389e-05, "loss": 0.0085, "step": 6040 }, { "epoch": 5.64, "learning_rate": 4.9295242537313436e-05, "loss": 0.0157, "step": 6044 }, { "epoch": 5.64, "learning_rate": 4.9294776119402984e-05, "loss": 0.0248, "step": 6048 }, { "epoch": 5.65, "learning_rate": 4.929430970149254e-05, "loss": 0.0234, "step": 6052 }, { "epoch": 5.65, "learning_rate": 4.9293843283582094e-05, "loss": 0.0103, "step": 6056 }, { "epoch": 5.65, "learning_rate": 4.929337686567164e-05, "loss": 0.0106, "step": 6060 }, { "epoch": 5.66, "learning_rate": 4.92929104477612e-05, "loss": 0.0075, "step": 6064 }, { "epoch": 5.66, "learning_rate": 4.9292444029850745e-05, "loss": 0.0082, "step": 6068 }, { "epoch": 5.66, "learning_rate": 4.92919776119403e-05, "loss": 0.0289, "step": 6072 }, { "epoch": 5.67, "learning_rate": 4.9291511194029855e-05, "loss": 0.0079, "step": 6076 }, { "epoch": 5.67, "learning_rate": 4.92910447761194e-05, "loss": 0.0148, "step": 6080 }, { "epoch": 5.68, "learning_rate": 4.929057835820896e-05, "loss": 0.0072, "step": 6084 }, { "epoch": 5.68, "learning_rate": 4.929011194029851e-05, "loss": 0.0087, "step": 6088 }, { "epoch": 5.68, "learning_rate": 4.928964552238806e-05, "loss": 0.0075, "step": 6092 }, { "epoch": 5.69, "learning_rate": 4.928917910447761e-05, "loss": 0.0202, "step": 6096 }, { "epoch": 5.69, "learning_rate": 4.928871268656717e-05, "loss": 0.0076, "step": 6100 }, { "epoch": 5.69, "learning_rate": 4.928824626865672e-05, "loss": 0.0187, "step": 6104 }, { "epoch": 5.7, "learning_rate": 4.928777985074627e-05, "loss": 0.0158, "step": 6108 }, { "epoch": 5.7, "learning_rate": 4.928731343283582e-05, "loss": 0.0142, "step": 6112 }, { "epoch": 5.71, "learning_rate": 4.928684701492538e-05, "loss": 0.0116, "step": 6116 }, { "epoch": 5.71, "learning_rate": 4.9286380597014925e-05, "loss": 0.0218, "step": 6120 }, { "epoch": 5.71, "learning_rate": 4.928591417910448e-05, "loss": 0.0132, "step": 6124 }, { "epoch": 5.72, "learning_rate": 4.928544776119403e-05, "loss": 0.0084, "step": 6128 }, { "epoch": 5.72, "learning_rate": 4.928498134328359e-05, "loss": 0.0063, "step": 6132 }, { "epoch": 5.72, "learning_rate": 4.928451492537314e-05, "loss": 0.008, "step": 6136 }, { "epoch": 5.73, "learning_rate": 4.9284048507462686e-05, "loss": 0.0157, "step": 6140 }, { "epoch": 5.73, "learning_rate": 4.928358208955224e-05, "loss": 0.0041, "step": 6144 }, { "epoch": 5.73, "learning_rate": 4.9283115671641796e-05, "loss": 0.0168, "step": 6148 }, { "epoch": 5.74, "learning_rate": 4.9282649253731344e-05, "loss": 0.039, "step": 6152 }, { "epoch": 5.74, "learning_rate": 4.92821828358209e-05, "loss": 0.0105, "step": 6156 }, { "epoch": 5.75, "learning_rate": 4.9281716417910454e-05, "loss": 0.0158, "step": 6160 }, { "epoch": 5.75, "learning_rate": 4.928125e-05, "loss": 0.0184, "step": 6164 }, { "epoch": 5.75, "learning_rate": 4.928078358208956e-05, "loss": 0.01, "step": 6168 }, { "epoch": 5.76, "learning_rate": 4.9280317164179105e-05, "loss": 0.0229, "step": 6172 }, { "epoch": 5.76, "learning_rate": 4.927985074626866e-05, "loss": 0.0083, "step": 6176 }, { "epoch": 5.76, "learning_rate": 4.9279384328358215e-05, "loss": 0.016, "step": 6180 }, { "epoch": 5.77, "learning_rate": 4.927891791044776e-05, "loss": 0.0097, "step": 6184 }, { "epoch": 5.77, "learning_rate": 4.927845149253731e-05, "loss": 0.0119, "step": 6188 }, { "epoch": 5.78, "learning_rate": 4.927798507462687e-05, "loss": 0.0089, "step": 6192 }, { "epoch": 5.78, "learning_rate": 4.927751865671642e-05, "loss": 0.0472, "step": 6196 }, { "epoch": 5.78, "learning_rate": 4.927705223880597e-05, "loss": 0.0154, "step": 6200 }, { "epoch": 5.79, "learning_rate": 4.9276585820895524e-05, "loss": 0.0054, "step": 6204 }, { "epoch": 5.79, "learning_rate": 4.927611940298508e-05, "loss": 0.0133, "step": 6208 }, { "epoch": 5.79, "learning_rate": 4.927565298507463e-05, "loss": 0.0163, "step": 6212 }, { "epoch": 5.8, "learning_rate": 4.927518656716418e-05, "loss": 0.0296, "step": 6216 }, { "epoch": 5.8, "learning_rate": 4.927472014925374e-05, "loss": 0.0075, "step": 6220 }, { "epoch": 5.81, "learning_rate": 4.9274253731343285e-05, "loss": 0.0128, "step": 6224 }, { "epoch": 5.81, "learning_rate": 4.927378731343284e-05, "loss": 0.0127, "step": 6228 }, { "epoch": 5.81, "learning_rate": 4.927332089552239e-05, "loss": 0.0246, "step": 6232 }, { "epoch": 5.82, "learning_rate": 4.927285447761194e-05, "loss": 0.0223, "step": 6236 }, { "epoch": 5.82, "learning_rate": 4.92723880597015e-05, "loss": 0.0312, "step": 6240 }, { "epoch": 5.82, "learning_rate": 4.9271921641791046e-05, "loss": 0.0177, "step": 6244 }, { "epoch": 5.83, "learning_rate": 4.9271455223880594e-05, "loss": 0.0184, "step": 6248 }, { "epoch": 5.83, "learning_rate": 4.9270988805970156e-05, "loss": 0.0151, "step": 6252 }, { "epoch": 5.84, "learning_rate": 4.9270522388059704e-05, "loss": 0.0263, "step": 6256 }, { "epoch": 5.84, "learning_rate": 4.927005597014925e-05, "loss": 0.0185, "step": 6260 }, { "epoch": 5.84, "learning_rate": 4.926958955223881e-05, "loss": 0.0235, "step": 6264 }, { "epoch": 5.85, "learning_rate": 4.926912313432836e-05, "loss": 0.0132, "step": 6268 }, { "epoch": 5.85, "learning_rate": 4.926865671641791e-05, "loss": 0.0167, "step": 6272 }, { "epoch": 5.85, "learning_rate": 4.9268190298507465e-05, "loss": 0.0097, "step": 6276 }, { "epoch": 5.86, "learning_rate": 4.926772388059702e-05, "loss": 0.0157, "step": 6280 }, { "epoch": 5.86, "learning_rate": 4.926725746268657e-05, "loss": 0.0161, "step": 6284 }, { "epoch": 5.87, "learning_rate": 4.926679104477612e-05, "loss": 0.0129, "step": 6288 }, { "epoch": 5.87, "learning_rate": 4.926632462686567e-05, "loss": 0.0307, "step": 6292 }, { "epoch": 5.87, "learning_rate": 4.9265858208955226e-05, "loss": 0.0091, "step": 6296 }, { "epoch": 5.88, "learning_rate": 4.926539179104478e-05, "loss": 0.0108, "step": 6300 }, { "epoch": 5.88, "learning_rate": 4.926492537313433e-05, "loss": 0.0164, "step": 6304 }, { "epoch": 5.88, "learning_rate": 4.9264458955223884e-05, "loss": 0.0067, "step": 6308 }, { "epoch": 5.89, "learning_rate": 4.926399253731344e-05, "loss": 0.0078, "step": 6312 }, { "epoch": 5.89, "learning_rate": 4.926352611940299e-05, "loss": 0.0118, "step": 6316 }, { "epoch": 5.9, "learning_rate": 4.926305970149254e-05, "loss": 0.0174, "step": 6320 }, { "epoch": 5.9, "learning_rate": 4.926259328358209e-05, "loss": 0.0226, "step": 6324 }, { "epoch": 5.9, "learning_rate": 4.9262126865671645e-05, "loss": 0.013, "step": 6328 }, { "epoch": 5.91, "learning_rate": 4.92616604477612e-05, "loss": 0.0241, "step": 6332 }, { "epoch": 5.91, "learning_rate": 4.926119402985075e-05, "loss": 0.0154, "step": 6336 }, { "epoch": 5.91, "learning_rate": 4.92607276119403e-05, "loss": 0.0339, "step": 6340 }, { "epoch": 5.92, "learning_rate": 4.926026119402986e-05, "loss": 0.0147, "step": 6344 }, { "epoch": 5.92, "learning_rate": 4.9259794776119406e-05, "loss": 0.0203, "step": 6348 }, { "epoch": 5.93, "learning_rate": 4.9259328358208954e-05, "loss": 0.008, "step": 6352 }, { "epoch": 5.93, "learning_rate": 4.925886194029851e-05, "loss": 0.0371, "step": 6356 }, { "epoch": 5.93, "learning_rate": 4.9258395522388064e-05, "loss": 0.0161, "step": 6360 }, { "epoch": 5.94, "learning_rate": 4.925792910447761e-05, "loss": 0.02, "step": 6364 }, { "epoch": 5.94, "learning_rate": 4.925746268656717e-05, "loss": 0.0117, "step": 6368 }, { "epoch": 5.94, "learning_rate": 4.925699626865672e-05, "loss": 0.0057, "step": 6372 }, { "epoch": 5.95, "learning_rate": 4.925652985074627e-05, "loss": 0.0157, "step": 6376 }, { "epoch": 5.95, "learning_rate": 4.9256063432835825e-05, "loss": 0.0115, "step": 6380 }, { "epoch": 5.96, "learning_rate": 4.925559701492537e-05, "loss": 0.0082, "step": 6384 }, { "epoch": 5.96, "learning_rate": 4.925513059701493e-05, "loss": 0.009, "step": 6388 }, { "epoch": 5.96, "learning_rate": 4.925466417910448e-05, "loss": 0.0169, "step": 6392 }, { "epoch": 5.97, "learning_rate": 4.925419776119403e-05, "loss": 0.0204, "step": 6396 }, { "epoch": 5.97, "learning_rate": 4.9253731343283586e-05, "loss": 0.0356, "step": 6400 }, { "epoch": 5.97, "learning_rate": 4.925326492537314e-05, "loss": 0.0079, "step": 6404 }, { "epoch": 5.98, "learning_rate": 4.925279850746269e-05, "loss": 0.008, "step": 6408 }, { "epoch": 5.98, "learning_rate": 4.925233208955224e-05, "loss": 0.01, "step": 6412 }, { "epoch": 5.98, "learning_rate": 4.925186567164179e-05, "loss": 0.0309, "step": 6416 }, { "epoch": 5.99, "learning_rate": 4.9251399253731347e-05, "loss": 0.023, "step": 6420 }, { "epoch": 5.99, "learning_rate": 4.9250932835820895e-05, "loss": 0.0079, "step": 6424 }, { "epoch": 6.0, "learning_rate": 4.925046641791045e-05, "loss": 0.0145, "step": 6428 }, { "epoch": 6.0, "learning_rate": 4.9250000000000004e-05, "loss": 0.0435, "step": 6432 }, { "epoch": 6.0, "learning_rate": 4.924953358208955e-05, "loss": 0.0114, "step": 6436 }, { "epoch": 6.01, "learning_rate": 4.924906716417911e-05, "loss": 0.0078, "step": 6440 }, { "epoch": 6.01, "learning_rate": 4.9248600746268656e-05, "loss": 0.0081, "step": 6444 }, { "epoch": 6.01, "learning_rate": 4.924813432835821e-05, "loss": 0.013, "step": 6448 }, { "epoch": 6.02, "learning_rate": 4.9247667910447765e-05, "loss": 0.0108, "step": 6452 }, { "epoch": 6.02, "learning_rate": 4.9247201492537314e-05, "loss": 0.0088, "step": 6456 }, { "epoch": 6.03, "learning_rate": 4.924673507462687e-05, "loss": 0.0064, "step": 6460 }, { "epoch": 6.03, "learning_rate": 4.9246268656716423e-05, "loss": 0.0124, "step": 6464 }, { "epoch": 6.03, "learning_rate": 4.924580223880597e-05, "loss": 0.0052, "step": 6468 }, { "epoch": 6.04, "learning_rate": 4.9245335820895526e-05, "loss": 0.0062, "step": 6472 }, { "epoch": 6.04, "learning_rate": 4.9244869402985075e-05, "loss": 0.0173, "step": 6476 }, { "epoch": 6.04, "learning_rate": 4.924440298507463e-05, "loss": 0.014, "step": 6480 }, { "epoch": 6.05, "learning_rate": 4.9243936567164184e-05, "loss": 0.0144, "step": 6484 }, { "epoch": 6.05, "learning_rate": 4.924347014925373e-05, "loss": 0.0037, "step": 6488 }, { "epoch": 6.06, "learning_rate": 4.924300373134329e-05, "loss": 0.0062, "step": 6492 }, { "epoch": 6.06, "learning_rate": 4.924253731343284e-05, "loss": 0.0095, "step": 6496 }, { "epoch": 6.06, "learning_rate": 4.924207089552239e-05, "loss": 0.0078, "step": 6500 }, { "epoch": 6.06, "eval_exact_match": 0.7195357833655706, "eval_exec": 0.741779497098646, "eval_loss": 0.2259366363286972, "eval_runtime": 1072.2544, "eval_samples_per_second": 0.964, "step": 6500 }, { "epoch": 6.07, "learning_rate": 4.924160447761194e-05, "loss": 0.0215, "step": 6504 }, { "epoch": 6.07, "learning_rate": 4.92411380597015e-05, "loss": 0.005, "step": 6508 }, { "epoch": 6.07, "learning_rate": 4.924067164179105e-05, "loss": 0.0141, "step": 6512 }, { "epoch": 6.08, "learning_rate": 4.9240205223880597e-05, "loss": 0.0077, "step": 6516 }, { "epoch": 6.08, "learning_rate": 4.923973880597015e-05, "loss": 0.0067, "step": 6520 }, { "epoch": 6.09, "learning_rate": 4.9239272388059706e-05, "loss": 0.0072, "step": 6524 }, { "epoch": 6.09, "learning_rate": 4.9238805970149255e-05, "loss": 0.0193, "step": 6528 }, { "epoch": 6.09, "learning_rate": 4.923833955223881e-05, "loss": 0.0103, "step": 6532 }, { "epoch": 6.1, "learning_rate": 4.923787313432836e-05, "loss": 0.0157, "step": 6536 }, { "epoch": 6.1, "learning_rate": 4.923740671641791e-05, "loss": 0.0103, "step": 6540 }, { "epoch": 6.1, "learning_rate": 4.923694029850747e-05, "loss": 0.0052, "step": 6544 }, { "epoch": 6.11, "learning_rate": 4.9236473880597016e-05, "loss": 0.0028, "step": 6548 }, { "epoch": 6.11, "learning_rate": 4.923600746268657e-05, "loss": 0.0065, "step": 6552 }, { "epoch": 6.12, "learning_rate": 4.9235541044776125e-05, "loss": 0.0114, "step": 6556 }, { "epoch": 6.12, "learning_rate": 4.9235074626865673e-05, "loss": 0.0147, "step": 6560 }, { "epoch": 6.12, "learning_rate": 4.923460820895522e-05, "loss": 0.0049, "step": 6564 }, { "epoch": 6.13, "learning_rate": 4.923414179104478e-05, "loss": 0.0079, "step": 6568 }, { "epoch": 6.13, "learning_rate": 4.923367537313433e-05, "loss": 0.0099, "step": 6572 }, { "epoch": 6.13, "learning_rate": 4.923320895522388e-05, "loss": 0.0079, "step": 6576 }, { "epoch": 6.14, "learning_rate": 4.9232742537313434e-05, "loss": 0.0042, "step": 6580 }, { "epoch": 6.14, "learning_rate": 4.923227611940299e-05, "loss": 0.0112, "step": 6584 }, { "epoch": 6.15, "learning_rate": 4.923180970149254e-05, "loss": 0.006, "step": 6588 }, { "epoch": 6.15, "learning_rate": 4.923134328358209e-05, "loss": 0.0107, "step": 6592 }, { "epoch": 6.15, "learning_rate": 4.923087686567164e-05, "loss": 0.0112, "step": 6596 }, { "epoch": 6.16, "learning_rate": 4.9230410447761195e-05, "loss": 0.0251, "step": 6600 }, { "epoch": 6.16, "learning_rate": 4.922994402985075e-05, "loss": 0.023, "step": 6604 }, { "epoch": 6.16, "learning_rate": 4.92294776119403e-05, "loss": 0.016, "step": 6608 }, { "epoch": 6.17, "learning_rate": 4.922901119402985e-05, "loss": 0.0044, "step": 6612 }, { "epoch": 6.17, "learning_rate": 4.922854477611941e-05, "loss": 0.018, "step": 6616 }, { "epoch": 6.18, "learning_rate": 4.9228078358208956e-05, "loss": 0.0172, "step": 6620 }, { "epoch": 6.18, "learning_rate": 4.922761194029851e-05, "loss": 0.0419, "step": 6624 }, { "epoch": 6.18, "learning_rate": 4.9227145522388066e-05, "loss": 0.0044, "step": 6628 }, { "epoch": 6.19, "learning_rate": 4.9226679104477614e-05, "loss": 0.0193, "step": 6632 }, { "epoch": 6.19, "learning_rate": 4.922621268656717e-05, "loss": 0.0122, "step": 6636 }, { "epoch": 6.19, "learning_rate": 4.922574626865672e-05, "loss": 0.009, "step": 6640 }, { "epoch": 6.2, "learning_rate": 4.922527985074627e-05, "loss": 0.0184, "step": 6644 }, { "epoch": 6.2, "learning_rate": 4.922481343283583e-05, "loss": 0.0084, "step": 6648 }, { "epoch": 6.21, "learning_rate": 4.9224347014925375e-05, "loss": 0.0035, "step": 6652 }, { "epoch": 6.21, "learning_rate": 4.9223880597014923e-05, "loss": 0.0089, "step": 6656 }, { "epoch": 6.21, "learning_rate": 4.9223414179104485e-05, "loss": 0.0073, "step": 6660 }, { "epoch": 6.22, "learning_rate": 4.922294776119403e-05, "loss": 0.0126, "step": 6664 }, { "epoch": 6.22, "learning_rate": 4.922248134328358e-05, "loss": 0.0028, "step": 6668 }, { "epoch": 6.22, "learning_rate": 4.9222014925373136e-05, "loss": 0.0066, "step": 6672 }, { "epoch": 6.23, "learning_rate": 4.922154850746269e-05, "loss": 0.0041, "step": 6676 }, { "epoch": 6.23, "learning_rate": 4.922108208955224e-05, "loss": 0.0064, "step": 6680 }, { "epoch": 6.24, "learning_rate": 4.9220615671641794e-05, "loss": 0.0171, "step": 6684 }, { "epoch": 6.24, "learning_rate": 4.922014925373134e-05, "loss": 0.0186, "step": 6688 }, { "epoch": 6.24, "learning_rate": 4.92196828358209e-05, "loss": 0.0028, "step": 6692 }, { "epoch": 6.25, "learning_rate": 4.921921641791045e-05, "loss": 0.0133, "step": 6696 }, { "epoch": 6.25, "learning_rate": 4.921875e-05, "loss": 0.0054, "step": 6700 }, { "epoch": 6.25, "learning_rate": 4.9218283582089555e-05, "loss": 0.0071, "step": 6704 }, { "epoch": 6.26, "learning_rate": 4.921781716417911e-05, "loss": 0.0216, "step": 6708 }, { "epoch": 6.26, "learning_rate": 4.921735074626866e-05, "loss": 0.0213, "step": 6712 }, { "epoch": 6.26, "learning_rate": 4.9216884328358206e-05, "loss": 0.0134, "step": 6716 }, { "epoch": 6.27, "learning_rate": 4.921641791044777e-05, "loss": 0.0077, "step": 6720 }, { "epoch": 6.27, "learning_rate": 4.9215951492537316e-05, "loss": 0.0022, "step": 6724 }, { "epoch": 6.28, "learning_rate": 4.9215485074626864e-05, "loss": 0.0227, "step": 6728 }, { "epoch": 6.28, "learning_rate": 4.921501865671642e-05, "loss": 0.0049, "step": 6732 }, { "epoch": 6.28, "learning_rate": 4.9214552238805974e-05, "loss": 0.0083, "step": 6736 }, { "epoch": 6.29, "learning_rate": 4.921408582089552e-05, "loss": 0.0151, "step": 6740 }, { "epoch": 6.29, "learning_rate": 4.921361940298508e-05, "loss": 0.0093, "step": 6744 }, { "epoch": 6.29, "learning_rate": 4.9213152985074625e-05, "loss": 0.0131, "step": 6748 }, { "epoch": 6.3, "learning_rate": 4.921268656716418e-05, "loss": 0.0173, "step": 6752 }, { "epoch": 6.3, "learning_rate": 4.9212220149253735e-05, "loss": 0.0198, "step": 6756 }, { "epoch": 6.31, "learning_rate": 4.921175373134328e-05, "loss": 0.014, "step": 6760 }, { "epoch": 6.31, "learning_rate": 4.921128731343284e-05, "loss": 0.0136, "step": 6764 }, { "epoch": 6.31, "learning_rate": 4.921082089552239e-05, "loss": 0.011, "step": 6768 }, { "epoch": 6.32, "learning_rate": 4.921035447761194e-05, "loss": 0.0175, "step": 6772 }, { "epoch": 6.32, "learning_rate": 4.920988805970149e-05, "loss": 0.0123, "step": 6776 }, { "epoch": 6.32, "learning_rate": 4.920942164179105e-05, "loss": 0.0137, "step": 6780 }, { "epoch": 6.33, "learning_rate": 4.92089552238806e-05, "loss": 0.0129, "step": 6784 }, { "epoch": 6.33, "learning_rate": 4.9208488805970154e-05, "loss": 0.0116, "step": 6788 }, { "epoch": 6.34, "learning_rate": 4.92080223880597e-05, "loss": 0.0142, "step": 6792 }, { "epoch": 6.34, "learning_rate": 4.920755597014926e-05, "loss": 0.0161, "step": 6796 }, { "epoch": 6.34, "learning_rate": 4.920708955223881e-05, "loss": 0.0078, "step": 6800 }, { "epoch": 6.35, "learning_rate": 4.920662313432836e-05, "loss": 0.0183, "step": 6804 }, { "epoch": 6.35, "learning_rate": 4.920615671641791e-05, "loss": 0.0138, "step": 6808 }, { "epoch": 6.35, "learning_rate": 4.920569029850747e-05, "loss": 0.0128, "step": 6812 }, { "epoch": 6.36, "learning_rate": 4.920522388059702e-05, "loss": 0.0105, "step": 6816 }, { "epoch": 6.36, "learning_rate": 4.9204757462686566e-05, "loss": 0.0246, "step": 6820 }, { "epoch": 6.37, "learning_rate": 4.920429104477612e-05, "loss": 0.0076, "step": 6824 }, { "epoch": 6.37, "learning_rate": 4.9203824626865676e-05, "loss": 0.0074, "step": 6828 }, { "epoch": 6.37, "learning_rate": 4.9203358208955224e-05, "loss": 0.0065, "step": 6832 }, { "epoch": 6.38, "learning_rate": 4.920289179104478e-05, "loss": 0.0156, "step": 6836 }, { "epoch": 6.38, "learning_rate": 4.9202425373134334e-05, "loss": 0.0128, "step": 6840 }, { "epoch": 6.38, "learning_rate": 4.920195895522388e-05, "loss": 0.015, "step": 6844 }, { "epoch": 6.39, "learning_rate": 4.920149253731344e-05, "loss": 0.0054, "step": 6848 }, { "epoch": 6.39, "learning_rate": 4.9201026119402985e-05, "loss": 0.0092, "step": 6852 }, { "epoch": 6.4, "learning_rate": 4.920055970149254e-05, "loss": 0.0159, "step": 6856 }, { "epoch": 6.4, "learning_rate": 4.9200093283582095e-05, "loss": 0.0132, "step": 6860 }, { "epoch": 6.4, "learning_rate": 4.919962686567164e-05, "loss": 0.0106, "step": 6864 }, { "epoch": 6.41, "learning_rate": 4.919916044776119e-05, "loss": 0.0067, "step": 6868 }, { "epoch": 6.41, "learning_rate": 4.919869402985075e-05, "loss": 0.0088, "step": 6872 }, { "epoch": 6.41, "learning_rate": 4.91982276119403e-05, "loss": 0.0098, "step": 6876 }, { "epoch": 6.42, "learning_rate": 4.919776119402985e-05, "loss": 0.0177, "step": 6880 }, { "epoch": 6.42, "learning_rate": 4.9197294776119404e-05, "loss": 0.0097, "step": 6884 }, { "epoch": 6.43, "learning_rate": 4.919682835820896e-05, "loss": 0.0176, "step": 6888 }, { "epoch": 6.43, "learning_rate": 4.919636194029851e-05, "loss": 0.0085, "step": 6892 }, { "epoch": 6.43, "learning_rate": 4.919589552238806e-05, "loss": 0.0129, "step": 6896 }, { "epoch": 6.44, "learning_rate": 4.919542910447762e-05, "loss": 0.0131, "step": 6900 }, { "epoch": 6.44, "learning_rate": 4.9194962686567165e-05, "loss": 0.0155, "step": 6904 }, { "epoch": 6.44, "learning_rate": 4.919449626865672e-05, "loss": 0.009, "step": 6908 }, { "epoch": 6.45, "learning_rate": 4.919402985074627e-05, "loss": 0.0105, "step": 6912 }, { "epoch": 6.45, "learning_rate": 4.919356343283582e-05, "loss": 0.013, "step": 6916 }, { "epoch": 6.46, "learning_rate": 4.919309701492538e-05, "loss": 0.0181, "step": 6920 }, { "epoch": 6.46, "learning_rate": 4.9192630597014926e-05, "loss": 0.0118, "step": 6924 }, { "epoch": 6.46, "learning_rate": 4.9192164179104474e-05, "loss": 0.0109, "step": 6928 }, { "epoch": 6.47, "learning_rate": 4.9191697761194036e-05, "loss": 0.011, "step": 6932 }, { "epoch": 6.47, "learning_rate": 4.9191231343283584e-05, "loss": 0.0091, "step": 6936 }, { "epoch": 6.47, "learning_rate": 4.919076492537313e-05, "loss": 0.0233, "step": 6940 }, { "epoch": 6.48, "learning_rate": 4.919029850746269e-05, "loss": 0.0207, "step": 6944 }, { "epoch": 6.48, "learning_rate": 4.918983208955224e-05, "loss": 0.0153, "step": 6948 }, { "epoch": 6.49, "learning_rate": 4.91893656716418e-05, "loss": 0.0155, "step": 6952 }, { "epoch": 6.49, "learning_rate": 4.9188899253731345e-05, "loss": 0.015, "step": 6956 }, { "epoch": 6.49, "learning_rate": 4.91884328358209e-05, "loss": 0.0072, "step": 6960 }, { "epoch": 6.5, "learning_rate": 4.9187966417910455e-05, "loss": 0.0077, "step": 6964 }, { "epoch": 6.5, "learning_rate": 4.91875e-05, "loss": 0.017, "step": 6968 }, { "epoch": 6.5, "learning_rate": 4.918703358208955e-05, "loss": 0.0154, "step": 6972 }, { "epoch": 6.51, "learning_rate": 4.9186567164179106e-05, "loss": 0.0102, "step": 6976 }, { "epoch": 6.51, "learning_rate": 4.918610074626866e-05, "loss": 0.0213, "step": 6980 }, { "epoch": 6.51, "learning_rate": 4.918563432835821e-05, "loss": 0.0095, "step": 6984 }, { "epoch": 6.52, "learning_rate": 4.9185167910447764e-05, "loss": 0.0187, "step": 6988 }, { "epoch": 6.52, "learning_rate": 4.918470149253732e-05, "loss": 0.0093, "step": 6992 }, { "epoch": 6.53, "learning_rate": 4.918423507462687e-05, "loss": 0.0095, "step": 6996 }, { "epoch": 6.53, "learning_rate": 4.918376865671642e-05, "loss": 0.0084, "step": 7000 }, { "epoch": 6.53, "eval_exact_match": 0.7030947775628626, "eval_exec": 0.7504835589941973, "eval_loss": 0.20910172164440155, "eval_runtime": 1063.5178, "eval_samples_per_second": 0.972, "step": 7000 }, { "epoch": 6.53, "learning_rate": 4.918330223880597e-05, "loss": 0.0181, "step": 7004 }, { "epoch": 6.54, "learning_rate": 4.9182835820895525e-05, "loss": 0.011, "step": 7008 }, { "epoch": 6.54, "learning_rate": 4.918236940298508e-05, "loss": 0.0015, "step": 7012 }, { "epoch": 6.54, "learning_rate": 4.918190298507463e-05, "loss": 0.0112, "step": 7016 }, { "epoch": 6.55, "learning_rate": 4.918143656716418e-05, "loss": 0.0163, "step": 7020 }, { "epoch": 6.55, "learning_rate": 4.918097014925374e-05, "loss": 0.0113, "step": 7024 }, { "epoch": 6.56, "learning_rate": 4.9180503731343286e-05, "loss": 0.0139, "step": 7028 }, { "epoch": 6.56, "learning_rate": 4.9180037313432834e-05, "loss": 0.019, "step": 7032 }, { "epoch": 6.56, "learning_rate": 4.917957089552239e-05, "loss": 0.0094, "step": 7036 }, { "epoch": 6.57, "learning_rate": 4.9179104477611944e-05, "loss": 0.0109, "step": 7040 }, { "epoch": 6.57, "learning_rate": 4.917863805970149e-05, "loss": 0.0132, "step": 7044 }, { "epoch": 6.57, "learning_rate": 4.917817164179105e-05, "loss": 0.0169, "step": 7048 }, { "epoch": 6.58, "learning_rate": 4.91777052238806e-05, "loss": 0.0235, "step": 7052 }, { "epoch": 6.58, "learning_rate": 4.917723880597015e-05, "loss": 0.0129, "step": 7056 }, { "epoch": 6.59, "learning_rate": 4.9176772388059705e-05, "loss": 0.0266, "step": 7060 }, { "epoch": 6.59, "learning_rate": 4.917630597014925e-05, "loss": 0.0084, "step": 7064 }, { "epoch": 6.59, "learning_rate": 4.917583955223881e-05, "loss": 0.0167, "step": 7068 }, { "epoch": 6.6, "learning_rate": 4.917537313432836e-05, "loss": 0.0045, "step": 7072 }, { "epoch": 6.6, "learning_rate": 4.917490671641791e-05, "loss": 0.0207, "step": 7076 }, { "epoch": 6.6, "learning_rate": 4.9174440298507466e-05, "loss": 0.0144, "step": 7080 }, { "epoch": 6.61, "learning_rate": 4.917397388059702e-05, "loss": 0.0044, "step": 7084 }, { "epoch": 6.61, "learning_rate": 4.917350746268657e-05, "loss": 0.0197, "step": 7088 }, { "epoch": 6.62, "learning_rate": 4.917304104477612e-05, "loss": 0.0137, "step": 7092 }, { "epoch": 6.62, "learning_rate": 4.917257462686567e-05, "loss": 0.0139, "step": 7096 }, { "epoch": 6.62, "learning_rate": 4.917210820895523e-05, "loss": 0.0188, "step": 7100 }, { "epoch": 6.63, "learning_rate": 4.9171641791044775e-05, "loss": 0.0072, "step": 7104 }, { "epoch": 6.63, "learning_rate": 4.917117537313433e-05, "loss": 0.01, "step": 7108 }, { "epoch": 6.63, "learning_rate": 4.9170708955223885e-05, "loss": 0.0185, "step": 7112 }, { "epoch": 6.64, "learning_rate": 4.917024253731344e-05, "loss": 0.005, "step": 7116 }, { "epoch": 6.64, "learning_rate": 4.916977611940299e-05, "loss": 0.0143, "step": 7120 }, { "epoch": 6.65, "learning_rate": 4.9169309701492536e-05, "loss": 0.0176, "step": 7124 }, { "epoch": 6.65, "learning_rate": 4.91688432835821e-05, "loss": 0.0129, "step": 7128 }, { "epoch": 6.65, "learning_rate": 4.9168376865671646e-05, "loss": 0.0038, "step": 7132 }, { "epoch": 6.66, "learning_rate": 4.9167910447761194e-05, "loss": 0.0117, "step": 7136 }, { "epoch": 6.66, "learning_rate": 4.916744402985075e-05, "loss": 0.0063, "step": 7140 }, { "epoch": 6.66, "learning_rate": 4.9166977611940304e-05, "loss": 0.0198, "step": 7144 }, { "epoch": 6.67, "learning_rate": 4.916651119402985e-05, "loss": 0.0205, "step": 7148 }, { "epoch": 6.67, "learning_rate": 4.916604477611941e-05, "loss": 0.0097, "step": 7152 }, { "epoch": 6.68, "learning_rate": 4.9165578358208955e-05, "loss": 0.0118, "step": 7156 }, { "epoch": 6.68, "learning_rate": 4.916511194029851e-05, "loss": 0.0137, "step": 7160 }, { "epoch": 6.68, "learning_rate": 4.9164645522388065e-05, "loss": 0.0047, "step": 7164 }, { "epoch": 6.69, "learning_rate": 4.916417910447761e-05, "loss": 0.0149, "step": 7168 }, { "epoch": 6.69, "learning_rate": 4.916371268656717e-05, "loss": 0.0069, "step": 7172 }, { "epoch": 6.69, "learning_rate": 4.916324626865672e-05, "loss": 0.0114, "step": 7176 }, { "epoch": 6.7, "learning_rate": 4.916277985074627e-05, "loss": 0.0087, "step": 7180 }, { "epoch": 6.7, "learning_rate": 4.916231343283582e-05, "loss": 0.0021, "step": 7184 }, { "epoch": 6.71, "learning_rate": 4.916184701492538e-05, "loss": 0.0121, "step": 7188 }, { "epoch": 6.71, "learning_rate": 4.916138059701493e-05, "loss": 0.0042, "step": 7192 }, { "epoch": 6.71, "learning_rate": 4.916091417910448e-05, "loss": 0.0052, "step": 7196 }, { "epoch": 6.72, "learning_rate": 4.916044776119403e-05, "loss": 0.0108, "step": 7200 }, { "epoch": 6.72, "learning_rate": 4.915998134328359e-05, "loss": 0.0175, "step": 7204 }, { "epoch": 6.72, "learning_rate": 4.9159514925373135e-05, "loss": 0.0045, "step": 7208 }, { "epoch": 6.73, "learning_rate": 4.915904850746269e-05, "loss": 0.0068, "step": 7212 }, { "epoch": 6.73, "learning_rate": 4.915858208955224e-05, "loss": 0.0096, "step": 7216 }, { "epoch": 6.73, "learning_rate": 4.915811567164179e-05, "loss": 0.0232, "step": 7220 }, { "epoch": 6.74, "learning_rate": 4.915764925373135e-05, "loss": 0.0145, "step": 7224 }, { "epoch": 6.74, "learning_rate": 4.9157182835820896e-05, "loss": 0.0063, "step": 7228 }, { "epoch": 6.75, "learning_rate": 4.915671641791045e-05, "loss": 0.0228, "step": 7232 }, { "epoch": 6.75, "learning_rate": 4.9156250000000006e-05, "loss": 0.0146, "step": 7236 }, { "epoch": 6.75, "learning_rate": 4.9155783582089554e-05, "loss": 0.0097, "step": 7240 }, { "epoch": 6.76, "learning_rate": 4.91553171641791e-05, "loss": 0.0089, "step": 7244 }, { "epoch": 6.76, "learning_rate": 4.9154850746268664e-05, "loss": 0.0091, "step": 7248 }, { "epoch": 6.76, "learning_rate": 4.915438432835821e-05, "loss": 0.0064, "step": 7252 }, { "epoch": 6.77, "learning_rate": 4.915391791044776e-05, "loss": 0.0121, "step": 7256 }, { "epoch": 6.77, "learning_rate": 4.9153451492537315e-05, "loss": 0.0026, "step": 7260 }, { "epoch": 6.78, "learning_rate": 4.915298507462687e-05, "loss": 0.0141, "step": 7264 }, { "epoch": 6.78, "learning_rate": 4.915251865671642e-05, "loss": 0.004, "step": 7268 }, { "epoch": 6.78, "learning_rate": 4.915205223880597e-05, "loss": 0.0124, "step": 7272 }, { "epoch": 6.79, "learning_rate": 4.915158582089552e-05, "loss": 0.0115, "step": 7276 }, { "epoch": 6.79, "learning_rate": 4.915111940298508e-05, "loss": 0.0193, "step": 7280 }, { "epoch": 6.79, "learning_rate": 4.915065298507463e-05, "loss": 0.0088, "step": 7284 }, { "epoch": 6.8, "learning_rate": 4.915018656716418e-05, "loss": 0.0076, "step": 7288 }, { "epoch": 6.8, "learning_rate": 4.9149720149253734e-05, "loss": 0.0137, "step": 7292 }, { "epoch": 6.81, "learning_rate": 4.914925373134329e-05, "loss": 0.0108, "step": 7296 }, { "epoch": 6.81, "learning_rate": 4.914878731343284e-05, "loss": 0.008, "step": 7300 }, { "epoch": 6.81, "learning_rate": 4.914832089552239e-05, "loss": 0.0146, "step": 7304 }, { "epoch": 6.82, "learning_rate": 4.9147854477611947e-05, "loss": 0.0134, "step": 7308 }, { "epoch": 6.82, "learning_rate": 4.9147388059701495e-05, "loss": 0.0099, "step": 7312 }, { "epoch": 6.82, "learning_rate": 4.914692164179105e-05, "loss": 0.0079, "step": 7316 }, { "epoch": 6.83, "learning_rate": 4.91464552238806e-05, "loss": 0.0115, "step": 7320 }, { "epoch": 6.83, "learning_rate": 4.914598880597015e-05, "loss": 0.0088, "step": 7324 }, { "epoch": 6.84, "learning_rate": 4.914552238805971e-05, "loss": 0.0175, "step": 7328 }, { "epoch": 6.84, "learning_rate": 4.9145055970149256e-05, "loss": 0.0159, "step": 7332 }, { "epoch": 6.84, "learning_rate": 4.9144589552238804e-05, "loss": 0.0211, "step": 7336 }, { "epoch": 6.85, "learning_rate": 4.9144123134328365e-05, "loss": 0.02, "step": 7340 }, { "epoch": 6.85, "learning_rate": 4.9143656716417914e-05, "loss": 0.0059, "step": 7344 }, { "epoch": 6.85, "learning_rate": 4.914319029850746e-05, "loss": 0.0227, "step": 7348 }, { "epoch": 6.86, "learning_rate": 4.9142723880597017e-05, "loss": 0.0125, "step": 7352 }, { "epoch": 6.86, "learning_rate": 4.914225746268657e-05, "loss": 0.0047, "step": 7356 }, { "epoch": 6.87, "learning_rate": 4.914179104477612e-05, "loss": 0.0119, "step": 7360 }, { "epoch": 6.87, "learning_rate": 4.9141324626865675e-05, "loss": 0.0123, "step": 7364 }, { "epoch": 6.87, "learning_rate": 4.914085820895522e-05, "loss": 0.0082, "step": 7368 }, { "epoch": 6.88, "learning_rate": 4.914039179104478e-05, "loss": 0.0152, "step": 7372 }, { "epoch": 6.88, "learning_rate": 4.913992537313433e-05, "loss": 0.0106, "step": 7376 }, { "epoch": 6.88, "learning_rate": 4.913945895522388e-05, "loss": 0.0141, "step": 7380 }, { "epoch": 6.89, "learning_rate": 4.9138992537313436e-05, "loss": 0.0142, "step": 7384 }, { "epoch": 6.89, "learning_rate": 4.913852611940299e-05, "loss": 0.0166, "step": 7388 }, { "epoch": 6.9, "learning_rate": 4.913805970149254e-05, "loss": 0.0135, "step": 7392 }, { "epoch": 6.9, "learning_rate": 4.913759328358209e-05, "loss": 0.0169, "step": 7396 }, { "epoch": 6.9, "learning_rate": 4.913712686567165e-05, "loss": 0.011, "step": 7400 }, { "epoch": 6.91, "learning_rate": 4.9136660447761197e-05, "loss": 0.0163, "step": 7404 }, { "epoch": 6.91, "learning_rate": 4.9136194029850745e-05, "loss": 0.0102, "step": 7408 }, { "epoch": 6.91, "learning_rate": 4.91357276119403e-05, "loss": 0.0066, "step": 7412 }, { "epoch": 6.92, "learning_rate": 4.9135261194029854e-05, "loss": 0.0132, "step": 7416 }, { "epoch": 6.92, "learning_rate": 4.91347947761194e-05, "loss": 0.0091, "step": 7420 }, { "epoch": 6.93, "learning_rate": 4.913432835820896e-05, "loss": 0.0095, "step": 7424 }, { "epoch": 6.93, "learning_rate": 4.9133861940298506e-05, "loss": 0.0222, "step": 7428 }, { "epoch": 6.93, "learning_rate": 4.913339552238806e-05, "loss": 0.0078, "step": 7432 }, { "epoch": 6.94, "learning_rate": 4.9132929104477615e-05, "loss": 0.0157, "step": 7436 }, { "epoch": 6.94, "learning_rate": 4.9132462686567164e-05, "loss": 0.0103, "step": 7440 }, { "epoch": 6.94, "learning_rate": 4.913199626865672e-05, "loss": 0.0178, "step": 7444 }, { "epoch": 6.95, "learning_rate": 4.9131529850746273e-05, "loss": 0.0115, "step": 7448 }, { "epoch": 6.95, "learning_rate": 4.913106343283582e-05, "loss": 0.0083, "step": 7452 }, { "epoch": 6.96, "learning_rate": 4.9130597014925376e-05, "loss": 0.0242, "step": 7456 }, { "epoch": 6.96, "learning_rate": 4.913013059701493e-05, "loss": 0.0086, "step": 7460 }, { "epoch": 6.96, "learning_rate": 4.912966417910448e-05, "loss": 0.0056, "step": 7464 }, { "epoch": 6.97, "learning_rate": 4.9129197761194034e-05, "loss": 0.0148, "step": 7468 }, { "epoch": 6.97, "learning_rate": 4.912873134328358e-05, "loss": 0.0127, "step": 7472 }, { "epoch": 6.97, "learning_rate": 4.912826492537314e-05, "loss": 0.0049, "step": 7476 }, { "epoch": 6.98, "learning_rate": 4.912779850746269e-05, "loss": 0.0044, "step": 7480 }, { "epoch": 6.98, "learning_rate": 4.912733208955224e-05, "loss": 0.0056, "step": 7484 }, { "epoch": 6.98, "learning_rate": 4.912686567164179e-05, "loss": 0.0335, "step": 7488 }, { "epoch": 6.99, "learning_rate": 4.912639925373135e-05, "loss": 0.0055, "step": 7492 }, { "epoch": 6.99, "learning_rate": 4.91259328358209e-05, "loss": 0.0129, "step": 7496 }, { "epoch": 7.0, "learning_rate": 4.9125466417910447e-05, "loss": 0.0122, "step": 7500 }, { "epoch": 7.0, "eval_exact_match": 0.730174081237911, "eval_exec": 0.7591876208897486, "eval_loss": 0.21238785982131958, "eval_runtime": 1160.0553, "eval_samples_per_second": 0.891, "step": 7500 }, { "epoch": 7.0, "learning_rate": 4.9125e-05, "loss": 0.0162, "step": 7504 }, { "epoch": 7.0, "learning_rate": 4.9124533582089556e-05, "loss": 0.0051, "step": 7508 }, { "epoch": 7.01, "learning_rate": 4.9124067164179104e-05, "loss": 0.0151, "step": 7512 }, { "epoch": 7.01, "learning_rate": 4.912360074626866e-05, "loss": 0.0096, "step": 7516 }, { "epoch": 7.01, "learning_rate": 4.9123134328358214e-05, "loss": 0.0107, "step": 7520 }, { "epoch": 7.02, "learning_rate": 4.912266791044776e-05, "loss": 0.0088, "step": 7524 }, { "epoch": 7.02, "learning_rate": 4.912220149253732e-05, "loss": 0.0022, "step": 7528 }, { "epoch": 7.03, "learning_rate": 4.9121735074626865e-05, "loss": 0.0129, "step": 7532 }, { "epoch": 7.03, "learning_rate": 4.912126865671642e-05, "loss": 0.0049, "step": 7536 }, { "epoch": 7.03, "learning_rate": 4.9120802238805975e-05, "loss": 0.0068, "step": 7540 }, { "epoch": 7.04, "learning_rate": 4.9120335820895523e-05, "loss": 0.0109, "step": 7544 }, { "epoch": 7.04, "learning_rate": 4.911986940298507e-05, "loss": 0.0266, "step": 7548 }, { "epoch": 7.04, "learning_rate": 4.911940298507463e-05, "loss": 0.0092, "step": 7552 }, { "epoch": 7.05, "learning_rate": 4.911893656716418e-05, "loss": 0.0166, "step": 7556 }, { "epoch": 7.05, "learning_rate": 4.911847014925373e-05, "loss": 0.007, "step": 7560 }, { "epoch": 7.06, "learning_rate": 4.9118003731343284e-05, "loss": 0.0054, "step": 7564 }, { "epoch": 7.06, "learning_rate": 4.911753731343284e-05, "loss": 0.0135, "step": 7568 }, { "epoch": 7.06, "learning_rate": 4.911707089552239e-05, "loss": 0.0149, "step": 7572 }, { "epoch": 7.07, "learning_rate": 4.911660447761194e-05, "loss": 0.0119, "step": 7576 }, { "epoch": 7.07, "learning_rate": 4.91161380597015e-05, "loss": 0.0054, "step": 7580 }, { "epoch": 7.07, "learning_rate": 4.9115671641791045e-05, "loss": 0.0073, "step": 7584 }, { "epoch": 7.08, "learning_rate": 4.91152052238806e-05, "loss": 0.0045, "step": 7588 }, { "epoch": 7.08, "learning_rate": 4.911473880597015e-05, "loss": 0.0114, "step": 7592 }, { "epoch": 7.09, "learning_rate": 4.91142723880597e-05, "loss": 0.0094, "step": 7596 }, { "epoch": 7.09, "learning_rate": 4.911380597014926e-05, "loss": 0.0177, "step": 7600 }, { "epoch": 7.09, "learning_rate": 4.9113339552238806e-05, "loss": 0.0134, "step": 7604 }, { "epoch": 7.1, "learning_rate": 4.911287313432836e-05, "loss": 0.0129, "step": 7608 }, { "epoch": 7.1, "learning_rate": 4.9112406716417916e-05, "loss": 0.0205, "step": 7612 }, { "epoch": 7.1, "learning_rate": 4.9111940298507464e-05, "loss": 0.0096, "step": 7616 }, { "epoch": 7.11, "learning_rate": 4.911147388059702e-05, "loss": 0.0108, "step": 7620 }, { "epoch": 7.11, "learning_rate": 4.911100746268657e-05, "loss": 0.0313, "step": 7624 }, { "epoch": 7.12, "learning_rate": 4.911054104477612e-05, "loss": 0.0134, "step": 7628 }, { "epoch": 7.12, "learning_rate": 4.911007462686568e-05, "loss": 0.005, "step": 7632 }, { "epoch": 7.12, "learning_rate": 4.9109608208955225e-05, "loss": 0.0154, "step": 7636 }, { "epoch": 7.13, "learning_rate": 4.910914179104478e-05, "loss": 0.0044, "step": 7640 }, { "epoch": 7.13, "learning_rate": 4.9108675373134335e-05, "loss": 0.0115, "step": 7644 }, { "epoch": 7.13, "learning_rate": 4.910820895522388e-05, "loss": 0.0098, "step": 7648 }, { "epoch": 7.14, "learning_rate": 4.910774253731343e-05, "loss": 0.003, "step": 7652 }, { "epoch": 7.14, "learning_rate": 4.9107276119402986e-05, "loss": 0.0067, "step": 7656 }, { "epoch": 7.15, "learning_rate": 4.910680970149254e-05, "loss": 0.0123, "step": 7660 }, { "epoch": 7.15, "learning_rate": 4.910634328358209e-05, "loss": 0.0124, "step": 7664 }, { "epoch": 7.15, "learning_rate": 4.9105876865671644e-05, "loss": 0.0063, "step": 7668 }, { "epoch": 7.16, "learning_rate": 4.91054104477612e-05, "loss": 0.0181, "step": 7672 }, { "epoch": 7.16, "learning_rate": 4.910494402985075e-05, "loss": 0.005, "step": 7676 }, { "epoch": 7.16, "learning_rate": 4.91044776119403e-05, "loss": 0.0052, "step": 7680 }, { "epoch": 7.17, "learning_rate": 4.910401119402985e-05, "loss": 0.0135, "step": 7684 }, { "epoch": 7.17, "learning_rate": 4.9103544776119405e-05, "loss": 0.0053, "step": 7688 }, { "epoch": 7.18, "learning_rate": 4.910307835820896e-05, "loss": 0.0116, "step": 7692 }, { "epoch": 7.18, "learning_rate": 4.910261194029851e-05, "loss": 0.0078, "step": 7696 }, { "epoch": 7.18, "learning_rate": 4.910214552238806e-05, "loss": 0.0033, "step": 7700 }, { "epoch": 7.19, "learning_rate": 4.910167910447762e-05, "loss": 0.0107, "step": 7704 }, { "epoch": 7.19, "learning_rate": 4.9101212686567166e-05, "loss": 0.0045, "step": 7708 }, { "epoch": 7.19, "learning_rate": 4.9100746268656714e-05, "loss": 0.0127, "step": 7712 }, { "epoch": 7.2, "learning_rate": 4.910027985074627e-05, "loss": 0.0106, "step": 7716 }, { "epoch": 7.2, "learning_rate": 4.9099813432835824e-05, "loss": 0.0105, "step": 7720 }, { "epoch": 7.21, "learning_rate": 4.909934701492537e-05, "loss": 0.0073, "step": 7724 }, { "epoch": 7.21, "learning_rate": 4.909888059701493e-05, "loss": 0.0075, "step": 7728 }, { "epoch": 7.21, "learning_rate": 4.909841417910448e-05, "loss": 0.013, "step": 7732 }, { "epoch": 7.22, "learning_rate": 4.909794776119403e-05, "loss": 0.0041, "step": 7736 }, { "epoch": 7.22, "learning_rate": 4.9097481343283585e-05, "loss": 0.0102, "step": 7740 }, { "epoch": 7.22, "learning_rate": 4.909701492537313e-05, "loss": 0.0078, "step": 7744 }, { "epoch": 7.23, "learning_rate": 4.909654850746269e-05, "loss": 0.0059, "step": 7748 }, { "epoch": 7.23, "learning_rate": 4.909608208955224e-05, "loss": 0.0029, "step": 7752 }, { "epoch": 7.24, "learning_rate": 4.909561567164179e-05, "loss": 0.0101, "step": 7756 }, { "epoch": 7.24, "learning_rate": 4.9095149253731346e-05, "loss": 0.0113, "step": 7760 }, { "epoch": 7.24, "learning_rate": 4.90946828358209e-05, "loss": 0.0097, "step": 7764 }, { "epoch": 7.25, "learning_rate": 4.909421641791045e-05, "loss": 0.003, "step": 7768 }, { "epoch": 7.25, "learning_rate": 4.9093750000000004e-05, "loss": 0.0047, "step": 7772 }, { "epoch": 7.25, "learning_rate": 4.909328358208955e-05, "loss": 0.0113, "step": 7776 }, { "epoch": 7.26, "learning_rate": 4.909281716417911e-05, "loss": 0.0074, "step": 7780 }, { "epoch": 7.26, "learning_rate": 4.909235074626866e-05, "loss": 0.0122, "step": 7784 }, { "epoch": 7.26, "learning_rate": 4.909188432835821e-05, "loss": 0.0312, "step": 7788 }, { "epoch": 7.27, "learning_rate": 4.9091417910447765e-05, "loss": 0.0104, "step": 7792 }, { "epoch": 7.27, "learning_rate": 4.909095149253732e-05, "loss": 0.0124, "step": 7796 }, { "epoch": 7.28, "learning_rate": 4.909048507462687e-05, "loss": 0.0217, "step": 7800 }, { "epoch": 7.28, "learning_rate": 4.9090018656716416e-05, "loss": 0.0164, "step": 7804 }, { "epoch": 7.28, "learning_rate": 4.908955223880598e-05, "loss": 0.0105, "step": 7808 }, { "epoch": 7.29, "learning_rate": 4.9089085820895526e-05, "loss": 0.01, "step": 7812 }, { "epoch": 7.29, "learning_rate": 4.9088619402985074e-05, "loss": 0.0267, "step": 7816 }, { "epoch": 7.29, "learning_rate": 4.908815298507463e-05, "loss": 0.0062, "step": 7820 }, { "epoch": 7.3, "learning_rate": 4.9087686567164184e-05, "loss": 0.0099, "step": 7824 }, { "epoch": 7.3, "learning_rate": 4.908722014925373e-05, "loss": 0.0089, "step": 7828 }, { "epoch": 7.31, "learning_rate": 4.908675373134329e-05, "loss": 0.0067, "step": 7832 }, { "epoch": 7.31, "learning_rate": 4.9086287313432835e-05, "loss": 0.0028, "step": 7836 }, { "epoch": 7.31, "learning_rate": 4.908582089552239e-05, "loss": 0.0052, "step": 7840 }, { "epoch": 7.32, "learning_rate": 4.9085354477611945e-05, "loss": 0.0033, "step": 7844 }, { "epoch": 7.32, "learning_rate": 4.908488805970149e-05, "loss": 0.0082, "step": 7848 }, { "epoch": 7.32, "learning_rate": 4.908442164179105e-05, "loss": 0.0134, "step": 7852 }, { "epoch": 7.33, "learning_rate": 4.90839552238806e-05, "loss": 0.0053, "step": 7856 }, { "epoch": 7.33, "learning_rate": 4.908348880597015e-05, "loss": 0.0142, "step": 7860 }, { "epoch": 7.34, "learning_rate": 4.90830223880597e-05, "loss": 0.0097, "step": 7864 }, { "epoch": 7.34, "learning_rate": 4.908255597014926e-05, "loss": 0.0063, "step": 7868 }, { "epoch": 7.34, "learning_rate": 4.908208955223881e-05, "loss": 0.0141, "step": 7872 }, { "epoch": 7.35, "learning_rate": 4.908162313432836e-05, "loss": 0.0046, "step": 7876 }, { "epoch": 7.35, "learning_rate": 4.908115671641791e-05, "loss": 0.0068, "step": 7880 }, { "epoch": 7.35, "learning_rate": 4.908069029850747e-05, "loss": 0.006, "step": 7884 }, { "epoch": 7.36, "learning_rate": 4.9080223880597015e-05, "loss": 0.0082, "step": 7888 }, { "epoch": 7.36, "learning_rate": 4.907975746268657e-05, "loss": 0.0071, "step": 7892 }, { "epoch": 7.37, "learning_rate": 4.907929104477612e-05, "loss": 0.0034, "step": 7896 }, { "epoch": 7.37, "learning_rate": 4.907882462686567e-05, "loss": 0.0028, "step": 7900 }, { "epoch": 7.37, "learning_rate": 4.907835820895523e-05, "loss": 0.004, "step": 7904 }, { "epoch": 7.38, "learning_rate": 4.9077891791044776e-05, "loss": 0.0083, "step": 7908 }, { "epoch": 7.38, "learning_rate": 4.907742537313433e-05, "loss": 0.0091, "step": 7912 }, { "epoch": 7.38, "learning_rate": 4.9076958955223886e-05, "loss": 0.0068, "step": 7916 }, { "epoch": 7.39, "learning_rate": 4.9076492537313434e-05, "loss": 0.0053, "step": 7920 }, { "epoch": 7.39, "learning_rate": 4.907602611940298e-05, "loss": 0.0092, "step": 7924 }, { "epoch": 7.4, "learning_rate": 4.9075559701492544e-05, "loss": 0.002, "step": 7928 }, { "epoch": 7.4, "learning_rate": 4.907509328358209e-05, "loss": 0.0121, "step": 7932 }, { "epoch": 7.4, "learning_rate": 4.907462686567165e-05, "loss": 0.0158, "step": 7936 }, { "epoch": 7.41, "learning_rate": 4.9074160447761195e-05, "loss": 0.0135, "step": 7940 }, { "epoch": 7.41, "learning_rate": 4.907369402985075e-05, "loss": 0.0088, "step": 7944 }, { "epoch": 7.41, "learning_rate": 4.9073227611940305e-05, "loss": 0.0071, "step": 7948 }, { "epoch": 7.42, "learning_rate": 4.907276119402985e-05, "loss": 0.0072, "step": 7952 }, { "epoch": 7.42, "learning_rate": 4.90722947761194e-05, "loss": 0.0344, "step": 7956 }, { "epoch": 7.43, "learning_rate": 4.907182835820896e-05, "loss": 0.0059, "step": 7960 }, { "epoch": 7.43, "learning_rate": 4.907136194029851e-05, "loss": 0.0101, "step": 7964 }, { "epoch": 7.43, "learning_rate": 4.907089552238806e-05, "loss": 0.013, "step": 7968 }, { "epoch": 7.44, "learning_rate": 4.9070429104477614e-05, "loss": 0.0268, "step": 7972 }, { "epoch": 7.44, "learning_rate": 4.906996268656717e-05, "loss": 0.0088, "step": 7976 }, { "epoch": 7.44, "learning_rate": 4.906949626865672e-05, "loss": 0.01, "step": 7980 }, { "epoch": 7.45, "learning_rate": 4.906902985074627e-05, "loss": 0.0091, "step": 7984 }, { "epoch": 7.45, "learning_rate": 4.906856343283583e-05, "loss": 0.0176, "step": 7988 }, { "epoch": 7.46, "learning_rate": 4.9068097014925375e-05, "loss": 0.0144, "step": 7992 }, { "epoch": 7.46, "learning_rate": 4.906763059701493e-05, "loss": 0.016, "step": 7996 }, { "epoch": 7.46, "learning_rate": 4.906716417910448e-05, "loss": 0.0184, "step": 8000 }, { "epoch": 7.46, "eval_exact_match": 0.7059961315280464, "eval_exec": 0.7427466150870407, "eval_loss": 0.19857120513916016, "eval_runtime": 1295.3702, "eval_samples_per_second": 0.798, "step": 8000 }, { "epoch": 7.47, "learning_rate": 4.906669776119403e-05, "loss": 0.0074, "step": 8004 }, { "epoch": 7.47, "learning_rate": 4.906623134328359e-05, "loss": 0.0099, "step": 8008 }, { "epoch": 7.47, "learning_rate": 4.9065764925373136e-05, "loss": 0.0031, "step": 8012 }, { "epoch": 7.48, "learning_rate": 4.9065298507462684e-05, "loss": 0.0014, "step": 8016 }, { "epoch": 7.48, "learning_rate": 4.9064832089552246e-05, "loss": 0.0187, "step": 8020 }, { "epoch": 7.49, "learning_rate": 4.9064365671641794e-05, "loss": 0.0078, "step": 8024 }, { "epoch": 7.49, "learning_rate": 4.906389925373134e-05, "loss": 0.0109, "step": 8028 }, { "epoch": 7.49, "learning_rate": 4.90634328358209e-05, "loss": 0.0088, "step": 8032 }, { "epoch": 7.5, "learning_rate": 4.906296641791045e-05, "loss": 0.0195, "step": 8036 }, { "epoch": 7.5, "learning_rate": 4.90625e-05, "loss": 0.0075, "step": 8040 }, { "epoch": 7.5, "learning_rate": 4.9062033582089555e-05, "loss": 0.0075, "step": 8044 }, { "epoch": 7.51, "learning_rate": 4.906156716417911e-05, "loss": 0.0111, "step": 8048 }, { "epoch": 7.51, "learning_rate": 4.906110074626866e-05, "loss": 0.0068, "step": 8052 }, { "epoch": 7.51, "learning_rate": 4.906063432835821e-05, "loss": 0.0266, "step": 8056 }, { "epoch": 7.52, "learning_rate": 4.906016791044776e-05, "loss": 0.0099, "step": 8060 }, { "epoch": 7.52, "learning_rate": 4.9059701492537316e-05, "loss": 0.0185, "step": 8064 }, { "epoch": 7.53, "learning_rate": 4.905923507462687e-05, "loss": 0.0554, "step": 8068 }, { "epoch": 7.53, "learning_rate": 4.905876865671642e-05, "loss": 0.0089, "step": 8072 }, { "epoch": 7.53, "learning_rate": 4.905830223880597e-05, "loss": 0.0142, "step": 8076 }, { "epoch": 7.54, "learning_rate": 4.905783582089553e-05, "loss": 0.0093, "step": 8080 }, { "epoch": 7.54, "learning_rate": 4.905736940298508e-05, "loss": 0.004, "step": 8084 }, { "epoch": 7.54, "learning_rate": 4.9056902985074625e-05, "loss": 0.0191, "step": 8088 }, { "epoch": 7.55, "learning_rate": 4.905643656716418e-05, "loss": 0.0087, "step": 8092 }, { "epoch": 7.55, "learning_rate": 4.9055970149253735e-05, "loss": 0.0024, "step": 8096 }, { "epoch": 7.56, "learning_rate": 4.905550373134329e-05, "loss": 0.0119, "step": 8100 }, { "epoch": 7.56, "learning_rate": 4.905503731343284e-05, "loss": 0.0183, "step": 8104 }, { "epoch": 7.56, "learning_rate": 4.9054570895522386e-05, "loss": 0.0335, "step": 8108 }, { "epoch": 7.57, "learning_rate": 4.905410447761195e-05, "loss": 0.0109, "step": 8112 }, { "epoch": 7.57, "learning_rate": 4.9053638059701496e-05, "loss": 0.0087, "step": 8116 }, { "epoch": 7.57, "learning_rate": 4.9053171641791044e-05, "loss": 0.0147, "step": 8120 }, { "epoch": 7.58, "learning_rate": 4.90527052238806e-05, "loss": 0.0057, "step": 8124 }, { "epoch": 7.58, "learning_rate": 4.9052238805970154e-05, "loss": 0.0076, "step": 8128 }, { "epoch": 7.59, "learning_rate": 4.90517723880597e-05, "loss": 0.0141, "step": 8132 }, { "epoch": 7.59, "learning_rate": 4.905130597014926e-05, "loss": 0.011, "step": 8136 }, { "epoch": 7.59, "learning_rate": 4.905083955223881e-05, "loss": 0.0131, "step": 8140 }, { "epoch": 7.6, "learning_rate": 4.905037313432836e-05, "loss": 0.0042, "step": 8144 }, { "epoch": 7.6, "learning_rate": 4.9049906716417915e-05, "loss": 0.0029, "step": 8148 }, { "epoch": 7.6, "learning_rate": 4.904944029850746e-05, "loss": 0.0093, "step": 8152 }, { "epoch": 7.61, "learning_rate": 4.904897388059702e-05, "loss": 0.0169, "step": 8156 }, { "epoch": 7.61, "learning_rate": 4.904850746268657e-05, "loss": 0.0051, "step": 8160 }, { "epoch": 7.62, "learning_rate": 4.904804104477612e-05, "loss": 0.0164, "step": 8164 }, { "epoch": 7.62, "learning_rate": 4.904757462686567e-05, "loss": 0.0187, "step": 8168 }, { "epoch": 7.62, "learning_rate": 4.904710820895523e-05, "loss": 0.0233, "step": 8172 }, { "epoch": 7.63, "learning_rate": 4.904664179104478e-05, "loss": 0.0178, "step": 8176 }, { "epoch": 7.63, "learning_rate": 4.904617537313433e-05, "loss": 0.0074, "step": 8180 }, { "epoch": 7.63, "learning_rate": 4.904570895522388e-05, "loss": 0.0159, "step": 8184 }, { "epoch": 7.64, "learning_rate": 4.904524253731344e-05, "loss": 0.0035, "step": 8188 }, { "epoch": 7.64, "learning_rate": 4.9044776119402985e-05, "loss": 0.0147, "step": 8192 }, { "epoch": 7.65, "learning_rate": 4.904430970149254e-05, "loss": 0.0221, "step": 8196 }, { "epoch": 7.65, "learning_rate": 4.9043843283582095e-05, "loss": 0.0231, "step": 8200 }, { "epoch": 7.65, "learning_rate": 4.904337686567164e-05, "loss": 0.0097, "step": 8204 }, { "epoch": 7.66, "learning_rate": 4.90429104477612e-05, "loss": 0.0051, "step": 8208 }, { "epoch": 7.66, "learning_rate": 4.9042444029850746e-05, "loss": 0.0112, "step": 8212 }, { "epoch": 7.66, "learning_rate": 4.90419776119403e-05, "loss": 0.0071, "step": 8216 }, { "epoch": 7.67, "learning_rate": 4.9041511194029856e-05, "loss": 0.0031, "step": 8220 }, { "epoch": 7.67, "learning_rate": 4.9041044776119404e-05, "loss": 0.0922, "step": 8224 }, { "epoch": 7.68, "learning_rate": 4.904057835820895e-05, "loss": 0.0062, "step": 8228 }, { "epoch": 7.68, "learning_rate": 4.9040111940298514e-05, "loss": 0.0073, "step": 8232 }, { "epoch": 7.68, "learning_rate": 4.903964552238806e-05, "loss": 0.008, "step": 8236 }, { "epoch": 7.69, "learning_rate": 4.903917910447761e-05, "loss": 0.0109, "step": 8240 }, { "epoch": 7.69, "learning_rate": 4.9038712686567165e-05, "loss": 0.0079, "step": 8244 }, { "epoch": 7.69, "learning_rate": 4.903824626865672e-05, "loss": 0.0109, "step": 8248 }, { "epoch": 7.7, "learning_rate": 4.903777985074627e-05, "loss": 0.0054, "step": 8252 }, { "epoch": 7.7, "learning_rate": 4.903731343283582e-05, "loss": 0.005, "step": 8256 }, { "epoch": 7.71, "learning_rate": 4.903684701492538e-05, "loss": 0.0063, "step": 8260 }, { "epoch": 7.71, "learning_rate": 4.903638059701493e-05, "loss": 0.009, "step": 8264 }, { "epoch": 7.71, "learning_rate": 4.903591417910448e-05, "loss": 0.0047, "step": 8268 }, { "epoch": 7.72, "learning_rate": 4.903544776119403e-05, "loss": 0.0079, "step": 8272 }, { "epoch": 7.72, "learning_rate": 4.9034981343283584e-05, "loss": 0.0158, "step": 8276 }, { "epoch": 7.72, "learning_rate": 4.903451492537314e-05, "loss": 0.0092, "step": 8280 }, { "epoch": 7.73, "learning_rate": 4.903404850746269e-05, "loss": 0.013, "step": 8284 }, { "epoch": 7.73, "learning_rate": 4.903358208955224e-05, "loss": 0.0055, "step": 8288 }, { "epoch": 7.73, "learning_rate": 4.9033115671641796e-05, "loss": 0.0108, "step": 8292 }, { "epoch": 7.74, "learning_rate": 4.9032649253731345e-05, "loss": 0.0144, "step": 8296 }, { "epoch": 7.74, "learning_rate": 4.90321828358209e-05, "loss": 0.0122, "step": 8300 }, { "epoch": 7.75, "learning_rate": 4.903171641791045e-05, "loss": 0.0042, "step": 8304 }, { "epoch": 7.75, "learning_rate": 4.903125e-05, "loss": 0.0077, "step": 8308 }, { "epoch": 7.75, "learning_rate": 4.903078358208956e-05, "loss": 0.0076, "step": 8312 }, { "epoch": 7.76, "learning_rate": 4.9030317164179106e-05, "loss": 0.0096, "step": 8316 }, { "epoch": 7.76, "learning_rate": 4.902985074626866e-05, "loss": 0.0126, "step": 8320 }, { "epoch": 7.76, "learning_rate": 4.9029384328358215e-05, "loss": 0.0178, "step": 8324 }, { "epoch": 7.77, "learning_rate": 4.9028917910447764e-05, "loss": 0.0056, "step": 8328 }, { "epoch": 7.77, "learning_rate": 4.902845149253731e-05, "loss": 0.0135, "step": 8332 }, { "epoch": 7.78, "learning_rate": 4.9027985074626867e-05, "loss": 0.0065, "step": 8336 }, { "epoch": 7.78, "learning_rate": 4.902751865671642e-05, "loss": 0.0056, "step": 8340 }, { "epoch": 7.78, "learning_rate": 4.902705223880597e-05, "loss": 0.006, "step": 8344 }, { "epoch": 7.79, "learning_rate": 4.9026585820895525e-05, "loss": 0.0108, "step": 8348 }, { "epoch": 7.79, "learning_rate": 4.902611940298508e-05, "loss": 0.0076, "step": 8352 }, { "epoch": 7.79, "learning_rate": 4.902565298507463e-05, "loss": 0.0112, "step": 8356 }, { "epoch": 7.8, "learning_rate": 4.902518656716418e-05, "loss": 0.0044, "step": 8360 }, { "epoch": 7.8, "learning_rate": 4.902472014925373e-05, "loss": 0.0029, "step": 8364 }, { "epoch": 7.81, "learning_rate": 4.9024253731343286e-05, "loss": 0.0062, "step": 8368 }, { "epoch": 7.81, "learning_rate": 4.902378731343284e-05, "loss": 0.0114, "step": 8372 }, { "epoch": 7.81, "learning_rate": 4.902332089552239e-05, "loss": 0.0344, "step": 8376 }, { "epoch": 7.82, "learning_rate": 4.9022854477611943e-05, "loss": 0.0113, "step": 8380 }, { "epoch": 7.82, "learning_rate": 4.90223880597015e-05, "loss": 0.0098, "step": 8384 }, { "epoch": 7.82, "learning_rate": 4.9021921641791047e-05, "loss": 0.011, "step": 8388 }, { "epoch": 7.83, "learning_rate": 4.9021455223880595e-05, "loss": 0.0147, "step": 8392 }, { "epoch": 7.83, "learning_rate": 4.902098880597015e-05, "loss": 0.0191, "step": 8396 }, { "epoch": 7.84, "learning_rate": 4.9020522388059704e-05, "loss": 0.0161, "step": 8400 }, { "epoch": 7.84, "learning_rate": 4.902005597014925e-05, "loss": 0.0097, "step": 8404 }, { "epoch": 7.84, "learning_rate": 4.901958955223881e-05, "loss": 0.0107, "step": 8408 }, { "epoch": 7.85, "learning_rate": 4.901912313432836e-05, "loss": 0.0112, "step": 8412 }, { "epoch": 7.85, "learning_rate": 4.901865671641792e-05, "loss": 0.0135, "step": 8416 }, { "epoch": 7.85, "learning_rate": 4.9018190298507465e-05, "loss": 0.0098, "step": 8420 }, { "epoch": 7.86, "learning_rate": 4.9017723880597014e-05, "loss": 0.0128, "step": 8424 }, { "epoch": 7.86, "learning_rate": 4.9017257462686575e-05, "loss": 0.0111, "step": 8428 }, { "epoch": 7.87, "learning_rate": 4.901679104477612e-05, "loss": 0.0231, "step": 8432 }, { "epoch": 7.87, "learning_rate": 4.901632462686567e-05, "loss": 0.0146, "step": 8436 }, { "epoch": 7.87, "learning_rate": 4.9015858208955226e-05, "loss": 0.0016, "step": 8440 }, { "epoch": 7.88, "learning_rate": 4.901539179104478e-05, "loss": 0.0031, "step": 8444 }, { "epoch": 7.88, "learning_rate": 4.901492537313433e-05, "loss": 0.0071, "step": 8448 }, { "epoch": 7.88, "learning_rate": 4.9014458955223884e-05, "loss": 0.0152, "step": 8452 }, { "epoch": 7.89, "learning_rate": 4.901399253731343e-05, "loss": 0.0076, "step": 8456 }, { "epoch": 7.89, "learning_rate": 4.901352611940299e-05, "loss": 0.0204, "step": 8460 }, { "epoch": 7.9, "learning_rate": 4.901305970149254e-05, "loss": 0.0027, "step": 8464 }, { "epoch": 7.9, "learning_rate": 4.901259328358209e-05, "loss": 0.012, "step": 8468 }, { "epoch": 7.9, "learning_rate": 4.9012126865671645e-05, "loss": 0.0056, "step": 8472 }, { "epoch": 7.91, "learning_rate": 4.90116604477612e-05, "loss": 0.0064, "step": 8476 }, { "epoch": 7.91, "learning_rate": 4.901119402985075e-05, "loss": 0.0082, "step": 8480 }, { "epoch": 7.91, "learning_rate": 4.9010727611940297e-05, "loss": 0.0071, "step": 8484 }, { "epoch": 7.92, "learning_rate": 4.901026119402986e-05, "loss": 0.0152, "step": 8488 }, { "epoch": 7.92, "learning_rate": 4.9009794776119406e-05, "loss": 0.008, "step": 8492 }, { "epoch": 7.93, "learning_rate": 4.9009328358208954e-05, "loss": 0.0148, "step": 8496 }, { "epoch": 7.93, "learning_rate": 4.900886194029851e-05, "loss": 0.0038, "step": 8500 }, { "epoch": 7.93, "eval_exact_match": 0.7127659574468085, "eval_exec": 0.7504835589941973, "eval_loss": 0.23233897984027863, "eval_runtime": 1425.0133, "eval_samples_per_second": 0.726, "step": 8500 }, { "epoch": 7.93, "learning_rate": 4.9008395522388064e-05, "loss": 0.0051, "step": 8504 }, { "epoch": 7.94, "learning_rate": 4.900792910447761e-05, "loss": 0.0202, "step": 8508 }, { "epoch": 7.94, "learning_rate": 4.900746268656717e-05, "loss": 0.0163, "step": 8512 }, { "epoch": 7.94, "learning_rate": 4.9006996268656715e-05, "loss": 0.0055, "step": 8516 }, { "epoch": 7.95, "learning_rate": 4.900652985074627e-05, "loss": 0.0053, "step": 8520 }, { "epoch": 7.95, "learning_rate": 4.9006063432835825e-05, "loss": 0.0126, "step": 8524 }, { "epoch": 7.96, "learning_rate": 4.9005597014925373e-05, "loss": 0.0098, "step": 8528 }, { "epoch": 7.96, "learning_rate": 4.900513059701493e-05, "loss": 0.0072, "step": 8532 }, { "epoch": 7.96, "learning_rate": 4.900466417910448e-05, "loss": 0.0078, "step": 8536 }, { "epoch": 7.97, "learning_rate": 4.900419776119403e-05, "loss": 0.0104, "step": 8540 }, { "epoch": 7.97, "learning_rate": 4.900373134328358e-05, "loss": 0.0062, "step": 8544 }, { "epoch": 7.97, "learning_rate": 4.900326492537314e-05, "loss": 0.0079, "step": 8548 }, { "epoch": 7.98, "learning_rate": 4.900279850746269e-05, "loss": 0.0096, "step": 8552 }, { "epoch": 7.98, "learning_rate": 4.900233208955224e-05, "loss": 0.0116, "step": 8556 }, { "epoch": 7.98, "learning_rate": 4.900186567164179e-05, "loss": 0.01, "step": 8560 }, { "epoch": 7.99, "learning_rate": 4.900139925373135e-05, "loss": 0.0133, "step": 8564 }, { "epoch": 7.99, "learning_rate": 4.9000932835820895e-05, "loss": 0.0058, "step": 8568 }, { "epoch": 8.0, "learning_rate": 4.900046641791045e-05, "loss": 0.0039, "step": 8572 }, { "epoch": 8.0, "learning_rate": 4.9e-05, "loss": 0.0034, "step": 8576 }, { "epoch": 8.0, "learning_rate": 4.899953358208956e-05, "loss": 0.0095, "step": 8580 }, { "epoch": 8.01, "learning_rate": 4.899906716417911e-05, "loss": 0.0008, "step": 8584 }, { "epoch": 8.01, "learning_rate": 4.8998600746268656e-05, "loss": 0.0054, "step": 8588 }, { "epoch": 8.01, "learning_rate": 4.899813432835821e-05, "loss": 0.0044, "step": 8592 }, { "epoch": 8.02, "learning_rate": 4.8997667910447766e-05, "loss": 0.0024, "step": 8596 }, { "epoch": 8.02, "learning_rate": 4.8997201492537314e-05, "loss": 0.0031, "step": 8600 }, { "epoch": 8.03, "learning_rate": 4.899673507462687e-05, "loss": 0.0033, "step": 8604 }, { "epoch": 8.03, "learning_rate": 4.8996268656716424e-05, "loss": 0.0056, "step": 8608 }, { "epoch": 8.03, "learning_rate": 4.899580223880597e-05, "loss": 0.0025, "step": 8612 }, { "epoch": 8.04, "learning_rate": 4.899533582089553e-05, "loss": 0.0038, "step": 8616 }, { "epoch": 8.04, "learning_rate": 4.8994869402985075e-05, "loss": 0.0088, "step": 8620 }, { "epoch": 8.04, "learning_rate": 4.899440298507463e-05, "loss": 0.0041, "step": 8624 }, { "epoch": 8.05, "learning_rate": 4.8993936567164185e-05, "loss": 0.0068, "step": 8628 }, { "epoch": 8.05, "learning_rate": 4.899347014925373e-05, "loss": 0.011, "step": 8632 }, { "epoch": 8.06, "learning_rate": 4.899300373134328e-05, "loss": 0.0037, "step": 8636 }, { "epoch": 8.06, "learning_rate": 4.899253731343284e-05, "loss": 0.0043, "step": 8640 }, { "epoch": 8.06, "learning_rate": 4.899207089552239e-05, "loss": 0.0058, "step": 8644 }, { "epoch": 8.07, "learning_rate": 4.899160447761194e-05, "loss": 0.005, "step": 8648 }, { "epoch": 8.07, "learning_rate": 4.8991138059701494e-05, "loss": 0.0084, "step": 8652 }, { "epoch": 8.07, "learning_rate": 4.899067164179105e-05, "loss": 0.0252, "step": 8656 }, { "epoch": 8.08, "learning_rate": 4.89902052238806e-05, "loss": 0.0063, "step": 8660 }, { "epoch": 8.08, "learning_rate": 4.898973880597015e-05, "loss": 0.0119, "step": 8664 }, { "epoch": 8.09, "learning_rate": 4.898927238805971e-05, "loss": 0.0016, "step": 8668 }, { "epoch": 8.09, "learning_rate": 4.8988805970149255e-05, "loss": 0.0067, "step": 8672 }, { "epoch": 8.09, "learning_rate": 4.898833955223881e-05, "loss": 0.0108, "step": 8676 }, { "epoch": 8.1, "learning_rate": 4.898787313432836e-05, "loss": 0.0061, "step": 8680 }, { "epoch": 8.1, "learning_rate": 4.898740671641791e-05, "loss": 0.0072, "step": 8684 }, { "epoch": 8.1, "learning_rate": 4.898694029850747e-05, "loss": 0.0017, "step": 8688 }, { "epoch": 8.11, "learning_rate": 4.8986473880597016e-05, "loss": 0.0038, "step": 8692 }, { "epoch": 8.11, "learning_rate": 4.8986007462686564e-05, "loss": 0.0169, "step": 8696 }, { "epoch": 8.12, "learning_rate": 4.8985541044776126e-05, "loss": 0.0112, "step": 8700 }, { "epoch": 8.12, "learning_rate": 4.8985074626865674e-05, "loss": 0.0025, "step": 8704 }, { "epoch": 8.12, "learning_rate": 4.898460820895522e-05, "loss": 0.0019, "step": 8708 }, { "epoch": 8.13, "learning_rate": 4.898414179104478e-05, "loss": 0.0063, "step": 8712 }, { "epoch": 8.13, "learning_rate": 4.898367537313433e-05, "loss": 0.0054, "step": 8716 }, { "epoch": 8.13, "learning_rate": 4.898320895522388e-05, "loss": 0.004, "step": 8720 }, { "epoch": 8.14, "learning_rate": 4.8982742537313435e-05, "loss": 0.0049, "step": 8724 }, { "epoch": 8.14, "learning_rate": 4.898227611940299e-05, "loss": 0.0072, "step": 8728 }, { "epoch": 8.15, "learning_rate": 4.898180970149254e-05, "loss": 0.0028, "step": 8732 }, { "epoch": 8.15, "learning_rate": 4.898134328358209e-05, "loss": 0.0143, "step": 8736 }, { "epoch": 8.15, "learning_rate": 4.898087686567164e-05, "loss": 0.0043, "step": 8740 }, { "epoch": 8.16, "learning_rate": 4.8980410447761196e-05, "loss": 0.0136, "step": 8744 }, { "epoch": 8.16, "learning_rate": 4.897994402985075e-05, "loss": 0.0029, "step": 8748 }, { "epoch": 8.16, "learning_rate": 4.89794776119403e-05, "loss": 0.0028, "step": 8752 }, { "epoch": 8.17, "learning_rate": 4.8979011194029854e-05, "loss": 0.0205, "step": 8756 }, { "epoch": 8.17, "learning_rate": 4.897854477611941e-05, "loss": 0.005, "step": 8760 }, { "epoch": 8.18, "learning_rate": 4.897807835820896e-05, "loss": 0.0037, "step": 8764 }, { "epoch": 8.18, "learning_rate": 4.897761194029851e-05, "loss": 0.0069, "step": 8768 }, { "epoch": 8.18, "learning_rate": 4.897714552238806e-05, "loss": 0.0075, "step": 8772 }, { "epoch": 8.19, "learning_rate": 4.8976679104477615e-05, "loss": 0.0102, "step": 8776 }, { "epoch": 8.19, "learning_rate": 4.897621268656717e-05, "loss": 0.0185, "step": 8780 }, { "epoch": 8.19, "learning_rate": 4.897574626865672e-05, "loss": 0.0029, "step": 8784 }, { "epoch": 8.2, "learning_rate": 4.8975279850746266e-05, "loss": 0.003, "step": 8788 }, { "epoch": 8.2, "learning_rate": 4.897481343283583e-05, "loss": 0.0142, "step": 8792 }, { "epoch": 8.21, "learning_rate": 4.8974347014925376e-05, "loss": 0.0086, "step": 8796 }, { "epoch": 8.21, "learning_rate": 4.8973880597014924e-05, "loss": 0.0037, "step": 8800 }, { "epoch": 8.21, "learning_rate": 4.897341417910448e-05, "loss": 0.0189, "step": 8804 }, { "epoch": 8.22, "learning_rate": 4.8972947761194034e-05, "loss": 0.0031, "step": 8808 }, { "epoch": 8.22, "learning_rate": 4.897248134328358e-05, "loss": 0.0061, "step": 8812 }, { "epoch": 8.22, "learning_rate": 4.897201492537314e-05, "loss": 0.0047, "step": 8816 }, { "epoch": 8.23, "learning_rate": 4.897154850746269e-05, "loss": 0.0041, "step": 8820 }, { "epoch": 8.23, "learning_rate": 4.897108208955224e-05, "loss": 0.0055, "step": 8824 }, { "epoch": 8.24, "learning_rate": 4.8970615671641795e-05, "loss": 0.0222, "step": 8828 }, { "epoch": 8.24, "learning_rate": 4.897014925373134e-05, "loss": 0.0059, "step": 8832 }, { "epoch": 8.24, "learning_rate": 4.89696828358209e-05, "loss": 0.016, "step": 8836 }, { "epoch": 8.25, "learning_rate": 4.896921641791045e-05, "loss": 0.0095, "step": 8840 }, { "epoch": 8.25, "learning_rate": 4.896875e-05, "loss": 0.0062, "step": 8844 }, { "epoch": 8.25, "learning_rate": 4.896828358208955e-05, "loss": 0.0012, "step": 8848 }, { "epoch": 8.26, "learning_rate": 4.896781716417911e-05, "loss": 0.0032, "step": 8852 }, { "epoch": 8.26, "learning_rate": 4.896735074626866e-05, "loss": 0.0146, "step": 8856 }, { "epoch": 8.26, "learning_rate": 4.896688432835821e-05, "loss": 0.0123, "step": 8860 }, { "epoch": 8.27, "learning_rate": 4.896641791044776e-05, "loss": 0.0066, "step": 8864 }, { "epoch": 8.27, "learning_rate": 4.896595149253732e-05, "loss": 0.0026, "step": 8868 }, { "epoch": 8.28, "learning_rate": 4.8965485074626865e-05, "loss": 0.0051, "step": 8872 }, { "epoch": 8.28, "learning_rate": 4.896501865671642e-05, "loss": 0.0045, "step": 8876 }, { "epoch": 8.28, "learning_rate": 4.8964552238805975e-05, "loss": 0.004, "step": 8880 }, { "epoch": 8.29, "learning_rate": 4.896408582089552e-05, "loss": 0.0113, "step": 8884 }, { "epoch": 8.29, "learning_rate": 4.896361940298508e-05, "loss": 0.005, "step": 8888 }, { "epoch": 8.29, "learning_rate": 4.8963152985074626e-05, "loss": 0.0062, "step": 8892 }, { "epoch": 8.3, "learning_rate": 4.896268656716418e-05, "loss": 0.0053, "step": 8896 }, { "epoch": 8.3, "learning_rate": 4.8962220149253736e-05, "loss": 0.0049, "step": 8900 }, { "epoch": 8.31, "learning_rate": 4.8961753731343284e-05, "loss": 0.0037, "step": 8904 }, { "epoch": 8.31, "learning_rate": 4.896128731343284e-05, "loss": 0.0359, "step": 8908 }, { "epoch": 8.31, "learning_rate": 4.8960820895522394e-05, "loss": 0.0034, "step": 8912 }, { "epoch": 8.32, "learning_rate": 4.896035447761194e-05, "loss": 0.0019, "step": 8916 }, { "epoch": 8.32, "learning_rate": 4.89598880597015e-05, "loss": 0.0039, "step": 8920 }, { "epoch": 8.32, "learning_rate": 4.8959421641791045e-05, "loss": 0.0091, "step": 8924 }, { "epoch": 8.33, "learning_rate": 4.89589552238806e-05, "loss": 0.0103, "step": 8928 }, { "epoch": 8.33, "learning_rate": 4.8958488805970155e-05, "loss": 0.0076, "step": 8932 }, { "epoch": 8.34, "learning_rate": 4.89580223880597e-05, "loss": 0.0653, "step": 8936 }, { "epoch": 8.34, "learning_rate": 4.895755597014926e-05, "loss": 0.0164, "step": 8940 }, { "epoch": 8.34, "learning_rate": 4.895708955223881e-05, "loss": 0.0133, "step": 8944 }, { "epoch": 8.35, "learning_rate": 4.895662313432836e-05, "loss": 0.0063, "step": 8948 }, { "epoch": 8.35, "learning_rate": 4.895615671641791e-05, "loss": 0.0046, "step": 8952 }, { "epoch": 8.35, "learning_rate": 4.8955690298507464e-05, "loss": 0.002, "step": 8956 }, { "epoch": 8.36, "learning_rate": 4.895522388059702e-05, "loss": 0.0034, "step": 8960 }, { "epoch": 8.36, "learning_rate": 4.895475746268657e-05, "loss": 0.003, "step": 8964 }, { "epoch": 8.37, "learning_rate": 4.895429104477612e-05, "loss": 0.0112, "step": 8968 }, { "epoch": 8.37, "learning_rate": 4.895382462686568e-05, "loss": 0.0156, "step": 8972 }, { "epoch": 8.37, "learning_rate": 4.8953358208955225e-05, "loss": 0.0045, "step": 8976 }, { "epoch": 8.38, "learning_rate": 4.895289179104478e-05, "loss": 0.003, "step": 8980 }, { "epoch": 8.38, "learning_rate": 4.895242537313433e-05, "loss": 0.0061, "step": 8984 }, { "epoch": 8.38, "learning_rate": 4.895195895522388e-05, "loss": 0.0125, "step": 8988 }, { "epoch": 8.39, "learning_rate": 4.895149253731344e-05, "loss": 0.0068, "step": 8992 }, { "epoch": 8.39, "learning_rate": 4.8951026119402986e-05, "loss": 0.0072, "step": 8996 }, { "epoch": 8.4, "learning_rate": 4.895055970149254e-05, "loss": 0.0054, "step": 9000 }, { "epoch": 8.4, "eval_exact_match": 0.7321083172147002, "eval_exec": 0.7572533849129593, "eval_loss": 0.24979576468467712, "eval_runtime": 1232.0481, "eval_samples_per_second": 0.839, "step": 9000 }, { "epoch": 8.4, "learning_rate": 4.8950093283582096e-05, "loss": 0.0038, "step": 9004 }, { "epoch": 8.4, "learning_rate": 4.8949626865671644e-05, "loss": 0.007, "step": 9008 }, { "epoch": 8.41, "learning_rate": 4.894916044776119e-05, "loss": 0.0187, "step": 9012 }, { "epoch": 8.41, "learning_rate": 4.894869402985075e-05, "loss": 0.0081, "step": 9016 }, { "epoch": 8.41, "learning_rate": 4.89482276119403e-05, "loss": 0.0047, "step": 9020 }, { "epoch": 8.42, "learning_rate": 4.894776119402985e-05, "loss": 0.0043, "step": 9024 }, { "epoch": 8.42, "learning_rate": 4.8947294776119405e-05, "loss": 0.028, "step": 9028 }, { "epoch": 8.43, "learning_rate": 4.894682835820896e-05, "loss": 0.0063, "step": 9032 }, { "epoch": 8.43, "learning_rate": 4.894636194029851e-05, "loss": 0.0072, "step": 9036 }, { "epoch": 8.43, "learning_rate": 4.894589552238806e-05, "loss": 0.0169, "step": 9040 }, { "epoch": 8.44, "learning_rate": 4.894542910447761e-05, "loss": 0.0325, "step": 9044 }, { "epoch": 8.44, "learning_rate": 4.8944962686567166e-05, "loss": 0.0085, "step": 9048 }, { "epoch": 8.44, "learning_rate": 4.894449626865672e-05, "loss": 0.0022, "step": 9052 }, { "epoch": 8.45, "learning_rate": 4.894402985074627e-05, "loss": 0.0097, "step": 9056 }, { "epoch": 8.45, "learning_rate": 4.8943563432835824e-05, "loss": 0.0049, "step": 9060 }, { "epoch": 8.46, "learning_rate": 4.894309701492538e-05, "loss": 0.0099, "step": 9064 }, { "epoch": 8.46, "learning_rate": 4.894263059701493e-05, "loss": 0.003, "step": 9068 }, { "epoch": 8.46, "learning_rate": 4.894216417910448e-05, "loss": 0.0065, "step": 9072 }, { "epoch": 8.47, "learning_rate": 4.894169776119403e-05, "loss": 0.0048, "step": 9076 }, { "epoch": 8.47, "learning_rate": 4.8941231343283585e-05, "loss": 0.0018, "step": 9080 }, { "epoch": 8.47, "learning_rate": 4.894076492537314e-05, "loss": 0.0039, "step": 9084 }, { "epoch": 8.48, "learning_rate": 4.894029850746269e-05, "loss": 0.0058, "step": 9088 }, { "epoch": 8.48, "learning_rate": 4.893983208955224e-05, "loss": 0.0059, "step": 9092 }, { "epoch": 8.49, "learning_rate": 4.89393656716418e-05, "loss": 0.0059, "step": 9096 }, { "epoch": 8.49, "learning_rate": 4.8938899253731346e-05, "loss": 0.0076, "step": 9100 }, { "epoch": 8.49, "learning_rate": 4.8938432835820894e-05, "loss": 0.0044, "step": 9104 }, { "epoch": 8.5, "learning_rate": 4.8937966417910456e-05, "loss": 0.0052, "step": 9108 }, { "epoch": 8.5, "learning_rate": 4.8937500000000004e-05, "loss": 0.0091, "step": 9112 }, { "epoch": 8.5, "learning_rate": 4.893703358208955e-05, "loss": 0.007, "step": 9116 }, { "epoch": 8.51, "learning_rate": 4.893656716417911e-05, "loss": 0.0019, "step": 9120 }, { "epoch": 8.51, "learning_rate": 4.893610074626866e-05, "loss": 0.0059, "step": 9124 }, { "epoch": 8.51, "learning_rate": 4.893563432835821e-05, "loss": 0.0049, "step": 9128 }, { "epoch": 8.52, "learning_rate": 4.8935167910447765e-05, "loss": 0.0033, "step": 9132 }, { "epoch": 8.52, "learning_rate": 4.893470149253731e-05, "loss": 0.0168, "step": 9136 }, { "epoch": 8.53, "learning_rate": 4.893423507462687e-05, "loss": 0.0114, "step": 9140 }, { "epoch": 8.53, "learning_rate": 4.893376865671642e-05, "loss": 0.0085, "step": 9144 }, { "epoch": 8.53, "learning_rate": 4.893330223880597e-05, "loss": 0.0111, "step": 9148 }, { "epoch": 8.54, "learning_rate": 4.8932835820895526e-05, "loss": 0.0053, "step": 9152 }, { "epoch": 8.54, "learning_rate": 4.893236940298508e-05, "loss": 0.0014, "step": 9156 }, { "epoch": 8.54, "learning_rate": 4.893190298507463e-05, "loss": 0.008, "step": 9160 }, { "epoch": 8.55, "learning_rate": 4.893143656716418e-05, "loss": 0.0079, "step": 9164 }, { "epoch": 8.55, "learning_rate": 4.893097014925374e-05, "loss": 0.0069, "step": 9168 }, { "epoch": 8.56, "learning_rate": 4.893050373134329e-05, "loss": 0.0094, "step": 9172 }, { "epoch": 8.56, "learning_rate": 4.8930037313432835e-05, "loss": 0.0086, "step": 9176 }, { "epoch": 8.56, "learning_rate": 4.892957089552239e-05, "loss": 0.0151, "step": 9180 }, { "epoch": 8.57, "learning_rate": 4.8929104477611945e-05, "loss": 0.0061, "step": 9184 }, { "epoch": 8.57, "learning_rate": 4.892863805970149e-05, "loss": 0.0031, "step": 9188 }, { "epoch": 8.57, "learning_rate": 4.892817164179105e-05, "loss": 0.013, "step": 9192 }, { "epoch": 8.58, "learning_rate": 4.8927705223880596e-05, "loss": 0.0115, "step": 9196 }, { "epoch": 8.58, "learning_rate": 4.892723880597015e-05, "loss": 0.0132, "step": 9200 }, { "epoch": 8.59, "learning_rate": 4.8926772388059706e-05, "loss": 0.016, "step": 9204 }, { "epoch": 8.59, "learning_rate": 4.8926305970149254e-05, "loss": 0.0091, "step": 9208 }, { "epoch": 8.59, "learning_rate": 4.892583955223881e-05, "loss": 0.0022, "step": 9212 }, { "epoch": 8.6, "learning_rate": 4.8925373134328364e-05, "loss": 0.006, "step": 9216 }, { "epoch": 8.6, "learning_rate": 4.892490671641791e-05, "loss": 0.01, "step": 9220 }, { "epoch": 8.6, "learning_rate": 4.892444029850746e-05, "loss": 0.0023, "step": 9224 }, { "epoch": 8.61, "learning_rate": 4.892397388059702e-05, "loss": 0.0046, "step": 9228 }, { "epoch": 8.61, "learning_rate": 4.892350746268657e-05, "loss": 0.0106, "step": 9232 }, { "epoch": 8.62, "learning_rate": 4.8923041044776124e-05, "loss": 0.0152, "step": 9236 }, { "epoch": 8.62, "learning_rate": 4.892257462686567e-05, "loss": 0.0108, "step": 9240 }, { "epoch": 8.62, "learning_rate": 4.892210820895523e-05, "loss": 0.0146, "step": 9244 }, { "epoch": 8.63, "learning_rate": 4.892164179104478e-05, "loss": 0.0046, "step": 9248 }, { "epoch": 8.63, "learning_rate": 4.892117537313433e-05, "loss": 0.0106, "step": 9252 }, { "epoch": 8.63, "learning_rate": 4.892070895522388e-05, "loss": 0.002, "step": 9256 }, { "epoch": 8.64, "learning_rate": 4.892024253731344e-05, "loss": 0.0041, "step": 9260 }, { "epoch": 8.64, "learning_rate": 4.891977611940299e-05, "loss": 0.0086, "step": 9264 }, { "epoch": 8.65, "learning_rate": 4.891930970149254e-05, "loss": 0.0109, "step": 9268 }, { "epoch": 8.65, "learning_rate": 4.891884328358209e-05, "loss": 0.0073, "step": 9272 }, { "epoch": 8.65, "learning_rate": 4.8918376865671646e-05, "loss": 0.0168, "step": 9276 }, { "epoch": 8.66, "learning_rate": 4.8917910447761195e-05, "loss": 0.0122, "step": 9280 }, { "epoch": 8.66, "learning_rate": 4.891744402985075e-05, "loss": 0.0049, "step": 9284 }, { "epoch": 8.66, "learning_rate": 4.8916977611940304e-05, "loss": 0.008, "step": 9288 }, { "epoch": 8.67, "learning_rate": 4.891651119402985e-05, "loss": 0.0071, "step": 9292 }, { "epoch": 8.67, "learning_rate": 4.891604477611941e-05, "loss": 0.0187, "step": 9296 }, { "epoch": 8.68, "learning_rate": 4.8915578358208956e-05, "loss": 0.006, "step": 9300 }, { "epoch": 8.68, "learning_rate": 4.891511194029851e-05, "loss": 0.0052, "step": 9304 }, { "epoch": 8.68, "learning_rate": 4.8914645522388065e-05, "loss": 0.0079, "step": 9308 }, { "epoch": 8.69, "learning_rate": 4.8914179104477614e-05, "loss": 0.0029, "step": 9312 }, { "epoch": 8.69, "learning_rate": 4.891371268656716e-05, "loss": 0.0057, "step": 9316 }, { "epoch": 8.69, "learning_rate": 4.891324626865672e-05, "loss": 0.0256, "step": 9320 }, { "epoch": 8.7, "learning_rate": 4.891277985074627e-05, "loss": 0.0075, "step": 9324 }, { "epoch": 8.7, "learning_rate": 4.891231343283582e-05, "loss": 0.0223, "step": 9328 }, { "epoch": 8.71, "learning_rate": 4.8911847014925375e-05, "loss": 0.013, "step": 9332 }, { "epoch": 8.71, "learning_rate": 4.891138059701493e-05, "loss": 0.0038, "step": 9336 }, { "epoch": 8.71, "learning_rate": 4.891091417910448e-05, "loss": 0.014, "step": 9340 }, { "epoch": 8.72, "learning_rate": 4.891044776119403e-05, "loss": 0.004, "step": 9344 }, { "epoch": 8.72, "learning_rate": 4.890998134328359e-05, "loss": 0.0035, "step": 9348 }, { "epoch": 8.72, "learning_rate": 4.8909514925373136e-05, "loss": 0.0139, "step": 9352 }, { "epoch": 8.73, "learning_rate": 4.890904850746269e-05, "loss": 0.0054, "step": 9356 }, { "epoch": 8.73, "learning_rate": 4.890858208955224e-05, "loss": 0.007, "step": 9360 }, { "epoch": 8.73, "learning_rate": 4.8908115671641793e-05, "loss": 0.0136, "step": 9364 }, { "epoch": 8.74, "learning_rate": 4.890764925373135e-05, "loss": 0.0025, "step": 9368 }, { "epoch": 8.74, "learning_rate": 4.8907182835820896e-05, "loss": 0.0116, "step": 9372 }, { "epoch": 8.75, "learning_rate": 4.8906716417910445e-05, "loss": 0.013, "step": 9376 }, { "epoch": 8.75, "learning_rate": 4.8906250000000006e-05, "loss": 0.0061, "step": 9380 }, { "epoch": 8.75, "learning_rate": 4.8905783582089554e-05, "loss": 0.0027, "step": 9384 }, { "epoch": 8.76, "learning_rate": 4.89053171641791e-05, "loss": 0.0024, "step": 9388 }, { "epoch": 8.76, "learning_rate": 4.890485074626866e-05, "loss": 0.0027, "step": 9392 }, { "epoch": 8.76, "learning_rate": 4.890438432835821e-05, "loss": 0.005, "step": 9396 }, { "epoch": 8.77, "learning_rate": 4.890391791044777e-05, "loss": 0.0056, "step": 9400 }, { "epoch": 8.77, "learning_rate": 4.8903451492537315e-05, "loss": 0.0092, "step": 9404 }, { "epoch": 8.78, "learning_rate": 4.890298507462687e-05, "loss": 0.0028, "step": 9408 }, { "epoch": 8.78, "learning_rate": 4.8902518656716425e-05, "loss": 0.0263, "step": 9412 }, { "epoch": 8.78, "learning_rate": 4.890205223880597e-05, "loss": 0.0037, "step": 9416 }, { "epoch": 8.79, "learning_rate": 4.890158582089552e-05, "loss": 0.0013, "step": 9420 }, { "epoch": 8.79, "learning_rate": 4.8901119402985076e-05, "loss": 0.0043, "step": 9424 }, { "epoch": 8.79, "learning_rate": 4.890065298507463e-05, "loss": 0.016, "step": 9428 }, { "epoch": 8.8, "learning_rate": 4.890018656716418e-05, "loss": 0.0099, "step": 9432 }, { "epoch": 8.8, "learning_rate": 4.8899720149253734e-05, "loss": 0.0034, "step": 9436 }, { "epoch": 8.81, "learning_rate": 4.889925373134329e-05, "loss": 0.0022, "step": 9440 }, { "epoch": 8.81, "learning_rate": 4.889878731343284e-05, "loss": 0.0035, "step": 9444 }, { "epoch": 8.81, "learning_rate": 4.889832089552239e-05, "loss": 0.0063, "step": 9448 }, { "epoch": 8.82, "learning_rate": 4.889785447761194e-05, "loss": 0.0102, "step": 9452 }, { "epoch": 8.82, "learning_rate": 4.8897388059701495e-05, "loss": 0.0085, "step": 9456 }, { "epoch": 8.82, "learning_rate": 4.889692164179105e-05, "loss": 0.0024, "step": 9460 }, { "epoch": 8.83, "learning_rate": 4.88964552238806e-05, "loss": 0.0029, "step": 9464 }, { "epoch": 8.83, "learning_rate": 4.8895988805970147e-05, "loss": 0.0111, "step": 9468 }, { "epoch": 8.84, "learning_rate": 4.889552238805971e-05, "loss": 0.0018, "step": 9472 }, { "epoch": 8.84, "learning_rate": 4.8895055970149256e-05, "loss": 0.005, "step": 9476 }, { "epoch": 8.84, "learning_rate": 4.8894589552238804e-05, "loss": 0.011, "step": 9480 }, { "epoch": 8.85, "learning_rate": 4.889412313432836e-05, "loss": 0.0065, "step": 9484 }, { "epoch": 8.85, "learning_rate": 4.8893656716417914e-05, "loss": 0.0071, "step": 9488 }, { "epoch": 8.85, "learning_rate": 4.889319029850746e-05, "loss": 0.0172, "step": 9492 }, { "epoch": 8.86, "learning_rate": 4.889272388059702e-05, "loss": 0.0057, "step": 9496 }, { "epoch": 8.86, "learning_rate": 4.889225746268657e-05, "loss": 0.0052, "step": 9500 }, { "epoch": 8.86, "eval_exact_match": 0.718568665377176, "eval_exec": 0.7475822050290135, "eval_loss": 0.24757526814937592, "eval_runtime": 1186.3447, "eval_samples_per_second": 0.872, "step": 9500 }, { "epoch": 8.87, "learning_rate": 4.889179104477612e-05, "loss": 0.0078, "step": 9504 }, { "epoch": 8.87, "learning_rate": 4.8891324626865675e-05, "loss": 0.0044, "step": 9508 }, { "epoch": 8.87, "learning_rate": 4.889085820895522e-05, "loss": 0.0049, "step": 9512 }, { "epoch": 8.88, "learning_rate": 4.889039179104478e-05, "loss": 0.0046, "step": 9516 }, { "epoch": 8.88, "learning_rate": 4.888992537313433e-05, "loss": 0.0027, "step": 9520 }, { "epoch": 8.88, "learning_rate": 4.888945895522388e-05, "loss": 0.0052, "step": 9524 }, { "epoch": 8.89, "learning_rate": 4.888899253731343e-05, "loss": 0.0024, "step": 9528 }, { "epoch": 8.89, "learning_rate": 4.888852611940299e-05, "loss": 0.0125, "step": 9532 }, { "epoch": 8.9, "learning_rate": 4.888805970149254e-05, "loss": 0.0113, "step": 9536 }, { "epoch": 8.9, "learning_rate": 4.888759328358209e-05, "loss": 0.0031, "step": 9540 }, { "epoch": 8.9, "learning_rate": 4.888712686567164e-05, "loss": 0.0017, "step": 9544 }, { "epoch": 8.91, "learning_rate": 4.88866604477612e-05, "loss": 0.0101, "step": 9548 }, { "epoch": 8.91, "learning_rate": 4.8886194029850745e-05, "loss": 0.0128, "step": 9552 }, { "epoch": 8.91, "learning_rate": 4.88857276119403e-05, "loss": 0.002, "step": 9556 }, { "epoch": 8.92, "learning_rate": 4.8885261194029855e-05, "loss": 0.0032, "step": 9560 }, { "epoch": 8.92, "learning_rate": 4.888479477611941e-05, "loss": 0.0099, "step": 9564 }, { "epoch": 8.93, "learning_rate": 4.888432835820896e-05, "loss": 0.0095, "step": 9568 }, { "epoch": 8.93, "learning_rate": 4.8883861940298506e-05, "loss": 0.0131, "step": 9572 }, { "epoch": 8.93, "learning_rate": 4.888339552238806e-05, "loss": 0.004, "step": 9576 }, { "epoch": 8.94, "learning_rate": 4.8882929104477616e-05, "loss": 0.0112, "step": 9580 }, { "epoch": 8.94, "learning_rate": 4.8882462686567164e-05, "loss": 0.0112, "step": 9584 }, { "epoch": 8.94, "learning_rate": 4.888199626865672e-05, "loss": 0.0062, "step": 9588 }, { "epoch": 8.95, "learning_rate": 4.8881529850746274e-05, "loss": 0.0102, "step": 9592 }, { "epoch": 8.95, "learning_rate": 4.888106343283582e-05, "loss": 0.0127, "step": 9596 }, { "epoch": 8.96, "learning_rate": 4.888059701492538e-05, "loss": 0.0036, "step": 9600 }, { "epoch": 8.96, "learning_rate": 4.8880130597014925e-05, "loss": 0.0038, "step": 9604 }, { "epoch": 8.96, "learning_rate": 4.887966417910448e-05, "loss": 0.0052, "step": 9608 }, { "epoch": 8.97, "learning_rate": 4.8879197761194035e-05, "loss": 0.0101, "step": 9612 }, { "epoch": 8.97, "learning_rate": 4.887873134328358e-05, "loss": 0.0144, "step": 9616 }, { "epoch": 8.97, "learning_rate": 4.887826492537314e-05, "loss": 0.0155, "step": 9620 }, { "epoch": 8.98, "learning_rate": 4.887779850746269e-05, "loss": 0.0092, "step": 9624 }, { "epoch": 8.98, "learning_rate": 4.887733208955224e-05, "loss": 0.0083, "step": 9628 }, { "epoch": 8.98, "learning_rate": 4.887686567164179e-05, "loss": 0.0097, "step": 9632 }, { "epoch": 8.99, "learning_rate": 4.8876399253731344e-05, "loss": 0.0148, "step": 9636 }, { "epoch": 8.99, "learning_rate": 4.88759328358209e-05, "loss": 0.0022, "step": 9640 }, { "epoch": 9.0, "learning_rate": 4.887546641791045e-05, "loss": 0.0131, "step": 9644 }, { "epoch": 9.0, "learning_rate": 4.8875e-05, "loss": 0.0049, "step": 9648 }, { "epoch": 9.0, "learning_rate": 4.887453358208956e-05, "loss": 0.004, "step": 9652 }, { "epoch": 9.01, "learning_rate": 4.8874067164179105e-05, "loss": 0.0092, "step": 9656 }, { "epoch": 9.01, "learning_rate": 4.887360074626866e-05, "loss": 0.0087, "step": 9660 }, { "epoch": 9.01, "learning_rate": 4.887313432835821e-05, "loss": 0.0054, "step": 9664 }, { "epoch": 9.02, "learning_rate": 4.887266791044776e-05, "loss": 0.0041, "step": 9668 }, { "epoch": 9.02, "learning_rate": 4.887220149253732e-05, "loss": 0.0093, "step": 9672 }, { "epoch": 9.03, "learning_rate": 4.8871735074626866e-05, "loss": 0.005, "step": 9676 }, { "epoch": 9.03, "learning_rate": 4.887126865671642e-05, "loss": 0.0079, "step": 9680 }, { "epoch": 9.03, "learning_rate": 4.8870802238805976e-05, "loss": 0.0252, "step": 9684 }, { "epoch": 9.04, "learning_rate": 4.8870335820895524e-05, "loss": 0.0045, "step": 9688 }, { "epoch": 9.04, "learning_rate": 4.886986940298507e-05, "loss": 0.0063, "step": 9692 }, { "epoch": 9.04, "learning_rate": 4.886940298507463e-05, "loss": 0.0023, "step": 9696 }, { "epoch": 9.05, "learning_rate": 4.886893656716418e-05, "loss": 0.0042, "step": 9700 }, { "epoch": 9.05, "learning_rate": 4.886847014925373e-05, "loss": 0.0088, "step": 9704 }, { "epoch": 9.06, "learning_rate": 4.8868003731343285e-05, "loss": 0.0078, "step": 9708 }, { "epoch": 9.06, "learning_rate": 4.886753731343284e-05, "loss": 0.0094, "step": 9712 }, { "epoch": 9.06, "learning_rate": 4.886707089552239e-05, "loss": 0.0031, "step": 9716 }, { "epoch": 9.07, "learning_rate": 4.886660447761194e-05, "loss": 0.008, "step": 9720 }, { "epoch": 9.07, "learning_rate": 4.886613805970149e-05, "loss": 0.0017, "step": 9724 }, { "epoch": 9.07, "learning_rate": 4.886567164179105e-05, "loss": 0.007, "step": 9728 }, { "epoch": 9.08, "learning_rate": 4.88652052238806e-05, "loss": 0.0045, "step": 9732 }, { "epoch": 9.08, "learning_rate": 4.886473880597015e-05, "loss": 0.0098, "step": 9736 }, { "epoch": 9.09, "learning_rate": 4.8864272388059704e-05, "loss": 0.0131, "step": 9740 }, { "epoch": 9.09, "learning_rate": 4.886380597014926e-05, "loss": 0.0022, "step": 9744 }, { "epoch": 9.09, "learning_rate": 4.886333955223881e-05, "loss": 0.0044, "step": 9748 }, { "epoch": 9.1, "learning_rate": 4.886287313432836e-05, "loss": 0.0078, "step": 9752 }, { "epoch": 9.1, "learning_rate": 4.886240671641791e-05, "loss": 0.0047, "step": 9756 }, { "epoch": 9.1, "learning_rate": 4.8861940298507465e-05, "loss": 0.008, "step": 9760 }, { "epoch": 9.11, "learning_rate": 4.886147388059702e-05, "loss": 0.0065, "step": 9764 }, { "epoch": 9.11, "learning_rate": 4.886100746268657e-05, "loss": 0.0024, "step": 9768 }, { "epoch": 9.12, "learning_rate": 4.886054104477612e-05, "loss": 0.0123, "step": 9772 }, { "epoch": 9.12, "learning_rate": 4.886007462686568e-05, "loss": 0.0016, "step": 9776 }, { "epoch": 9.12, "learning_rate": 4.8859608208955226e-05, "loss": 0.0018, "step": 9780 }, { "epoch": 9.13, "learning_rate": 4.8859141791044774e-05, "loss": 0.0096, "step": 9784 }, { "epoch": 9.13, "learning_rate": 4.8858675373134336e-05, "loss": 0.0037, "step": 9788 }, { "epoch": 9.13, "learning_rate": 4.8858208955223884e-05, "loss": 0.0041, "step": 9792 }, { "epoch": 9.14, "learning_rate": 4.885774253731343e-05, "loss": 0.0031, "step": 9796 }, { "epoch": 9.14, "learning_rate": 4.885727611940299e-05, "loss": 0.0084, "step": 9800 }, { "epoch": 9.15, "learning_rate": 4.885680970149254e-05, "loss": 0.0042, "step": 9804 }, { "epoch": 9.15, "learning_rate": 4.885634328358209e-05, "loss": 0.0035, "step": 9808 }, { "epoch": 9.15, "learning_rate": 4.8855876865671645e-05, "loss": 0.008, "step": 9812 }, { "epoch": 9.16, "learning_rate": 4.885541044776119e-05, "loss": 0.0092, "step": 9816 }, { "epoch": 9.16, "learning_rate": 4.885494402985075e-05, "loss": 0.0018, "step": 9820 }, { "epoch": 9.16, "learning_rate": 4.88544776119403e-05, "loss": 0.0074, "step": 9824 }, { "epoch": 9.17, "learning_rate": 4.885401119402985e-05, "loss": 0.0033, "step": 9828 }, { "epoch": 9.17, "learning_rate": 4.8853544776119406e-05, "loss": 0.0062, "step": 9832 }, { "epoch": 9.18, "learning_rate": 4.885307835820896e-05, "loss": 0.0108, "step": 9836 }, { "epoch": 9.18, "learning_rate": 4.885261194029851e-05, "loss": 0.0013, "step": 9840 }, { "epoch": 9.18, "learning_rate": 4.885214552238806e-05, "loss": 0.0041, "step": 9844 }, { "epoch": 9.19, "learning_rate": 4.885167910447762e-05, "loss": 0.0033, "step": 9848 }, { "epoch": 9.19, "learning_rate": 4.885121268656717e-05, "loss": 0.0097, "step": 9852 }, { "epoch": 9.19, "learning_rate": 4.8850746268656715e-05, "loss": 0.0038, "step": 9856 }, { "epoch": 9.2, "learning_rate": 4.885027985074627e-05, "loss": 0.0038, "step": 9860 }, { "epoch": 9.2, "learning_rate": 4.8849813432835825e-05, "loss": 0.0096, "step": 9864 }, { "epoch": 9.21, "learning_rate": 4.884934701492537e-05, "loss": 0.0028, "step": 9868 }, { "epoch": 9.21, "learning_rate": 4.884888059701493e-05, "loss": 0.0026, "step": 9872 }, { "epoch": 9.21, "learning_rate": 4.8848414179104476e-05, "loss": 0.0077, "step": 9876 }, { "epoch": 9.22, "learning_rate": 4.884794776119403e-05, "loss": 0.0042, "step": 9880 }, { "epoch": 9.22, "learning_rate": 4.8847481343283586e-05, "loss": 0.0126, "step": 9884 }, { "epoch": 9.22, "learning_rate": 4.8847014925373134e-05, "loss": 0.0051, "step": 9888 }, { "epoch": 9.23, "learning_rate": 4.884654850746269e-05, "loss": 0.0021, "step": 9892 }, { "epoch": 9.23, "learning_rate": 4.8846082089552244e-05, "loss": 0.0047, "step": 9896 }, { "epoch": 9.24, "learning_rate": 4.884561567164179e-05, "loss": 0.0127, "step": 9900 }, { "epoch": 9.24, "learning_rate": 4.884514925373135e-05, "loss": 0.0092, "step": 9904 }, { "epoch": 9.24, "learning_rate": 4.88446828358209e-05, "loss": 0.0014, "step": 9908 }, { "epoch": 9.25, "learning_rate": 4.884421641791045e-05, "loss": 0.0029, "step": 9912 }, { "epoch": 9.25, "learning_rate": 4.8843750000000005e-05, "loss": 0.0095, "step": 9916 }, { "epoch": 9.25, "learning_rate": 4.884328358208955e-05, "loss": 0.0046, "step": 9920 }, { "epoch": 9.26, "learning_rate": 4.884281716417911e-05, "loss": 0.0055, "step": 9924 }, { "epoch": 9.26, "learning_rate": 4.884235074626866e-05, "loss": 0.0018, "step": 9928 }, { "epoch": 9.26, "learning_rate": 4.884188432835821e-05, "loss": 0.0035, "step": 9932 }, { "epoch": 9.27, "learning_rate": 4.884141791044776e-05, "loss": 0.0005, "step": 9936 }, { "epoch": 9.27, "learning_rate": 4.884095149253732e-05, "loss": 0.0007, "step": 9940 }, { "epoch": 9.28, "learning_rate": 4.884048507462687e-05, "loss": 0.0067, "step": 9944 }, { "epoch": 9.28, "learning_rate": 4.884001865671642e-05, "loss": 0.0027, "step": 9948 }, { "epoch": 9.28, "learning_rate": 4.883955223880597e-05, "loss": 0.0144, "step": 9952 }, { "epoch": 9.29, "learning_rate": 4.883908582089553e-05, "loss": 0.0074, "step": 9956 }, { "epoch": 9.29, "learning_rate": 4.8838619402985075e-05, "loss": 0.0047, "step": 9960 }, { "epoch": 9.29, "learning_rate": 4.883815298507463e-05, "loss": 0.0044, "step": 9964 }, { "epoch": 9.3, "learning_rate": 4.8837686567164185e-05, "loss": 0.0054, "step": 9968 }, { "epoch": 9.3, "learning_rate": 4.883722014925373e-05, "loss": 0.0024, "step": 9972 }, { "epoch": 9.31, "learning_rate": 4.883675373134329e-05, "loss": 0.0095, "step": 9976 }, { "epoch": 9.31, "learning_rate": 4.8836287313432836e-05, "loss": 0.0029, "step": 9980 }, { "epoch": 9.31, "learning_rate": 4.883582089552239e-05, "loss": 0.0036, "step": 9984 }, { "epoch": 9.32, "learning_rate": 4.8835354477611946e-05, "loss": 0.0026, "step": 9988 }, { "epoch": 9.32, "learning_rate": 4.8834888059701494e-05, "loss": 0.0053, "step": 9992 }, { "epoch": 9.32, "learning_rate": 4.883442164179104e-05, "loss": 0.0005, "step": 9996 }, { "epoch": 9.33, "learning_rate": 4.8833955223880604e-05, "loss": 0.005, "step": 10000 }, { "epoch": 9.33, "eval_exact_match": 0.7166344294003868, "eval_exec": 0.7543520309477756, "eval_loss": 0.2650771737098694, "eval_runtime": 1372.3967, "eval_samples_per_second": 0.753, "step": 10000 }, { "epoch": 9.33, "learning_rate": 4.883348880597015e-05, "loss": 0.0067, "step": 10004 }, { "epoch": 9.34, "learning_rate": 4.88330223880597e-05, "loss": 0.0106, "step": 10008 }, { "epoch": 9.34, "learning_rate": 4.8832555970149255e-05, "loss": 0.003, "step": 10012 }, { "epoch": 9.34, "learning_rate": 4.883208955223881e-05, "loss": 0.0035, "step": 10016 }, { "epoch": 9.35, "learning_rate": 4.883162313432836e-05, "loss": 0.0091, "step": 10020 }, { "epoch": 9.35, "learning_rate": 4.883115671641791e-05, "loss": 0.0061, "step": 10024 }, { "epoch": 9.35, "learning_rate": 4.883069029850747e-05, "loss": 0.0104, "step": 10028 }, { "epoch": 9.36, "learning_rate": 4.8830223880597016e-05, "loss": 0.0064, "step": 10032 }, { "epoch": 9.36, "learning_rate": 4.882975746268657e-05, "loss": 0.0049, "step": 10036 }, { "epoch": 9.37, "learning_rate": 4.882929104477612e-05, "loss": 0.0095, "step": 10040 }, { "epoch": 9.37, "learning_rate": 4.8828824626865674e-05, "loss": 0.0055, "step": 10044 }, { "epoch": 9.37, "learning_rate": 4.882835820895523e-05, "loss": 0.0069, "step": 10048 }, { "epoch": 9.38, "learning_rate": 4.882789179104478e-05, "loss": 0.0087, "step": 10052 }, { "epoch": 9.38, "learning_rate": 4.882742537313433e-05, "loss": 0.006, "step": 10056 }, { "epoch": 9.38, "learning_rate": 4.8826958955223887e-05, "loss": 0.0081, "step": 10060 }, { "epoch": 9.39, "learning_rate": 4.8826492537313435e-05, "loss": 0.0057, "step": 10064 }, { "epoch": 9.39, "learning_rate": 4.882602611940299e-05, "loss": 0.0143, "step": 10068 }, { "epoch": 9.4, "learning_rate": 4.882555970149254e-05, "loss": 0.0006, "step": 10072 }, { "epoch": 9.4, "learning_rate": 4.882509328358209e-05, "loss": 0.0026, "step": 10076 }, { "epoch": 9.4, "learning_rate": 4.882462686567165e-05, "loss": 0.0029, "step": 10080 }, { "epoch": 9.41, "learning_rate": 4.8824160447761196e-05, "loss": 0.0095, "step": 10084 }, { "epoch": 9.41, "learning_rate": 4.882369402985075e-05, "loss": 0.0035, "step": 10088 }, { "epoch": 9.41, "learning_rate": 4.8823227611940306e-05, "loss": 0.005, "step": 10092 }, { "epoch": 9.42, "learning_rate": 4.8822761194029854e-05, "loss": 0.0078, "step": 10096 }, { "epoch": 9.42, "learning_rate": 4.88222947761194e-05, "loss": 0.0091, "step": 10100 }, { "epoch": 9.43, "learning_rate": 4.882182835820896e-05, "loss": 0.0016, "step": 10104 }, { "epoch": 9.43, "learning_rate": 4.882136194029851e-05, "loss": 0.0048, "step": 10108 }, { "epoch": 9.43, "learning_rate": 4.882089552238806e-05, "loss": 0.0057, "step": 10112 }, { "epoch": 9.44, "learning_rate": 4.8820429104477615e-05, "loss": 0.0101, "step": 10116 }, { "epoch": 9.44, "learning_rate": 4.881996268656717e-05, "loss": 0.0057, "step": 10120 }, { "epoch": 9.44, "learning_rate": 4.881949626865672e-05, "loss": 0.0196, "step": 10124 }, { "epoch": 9.45, "learning_rate": 4.881902985074627e-05, "loss": 0.0033, "step": 10128 }, { "epoch": 9.45, "learning_rate": 4.881856343283582e-05, "loss": 0.0071, "step": 10132 }, { "epoch": 9.46, "learning_rate": 4.8818097014925376e-05, "loss": 0.007, "step": 10136 }, { "epoch": 9.46, "learning_rate": 4.881763059701493e-05, "loss": 0.0107, "step": 10140 }, { "epoch": 9.46, "learning_rate": 4.881716417910448e-05, "loss": 0.0095, "step": 10144 }, { "epoch": 9.47, "learning_rate": 4.881669776119403e-05, "loss": 0.0059, "step": 10148 }, { "epoch": 9.47, "learning_rate": 4.881623134328359e-05, "loss": 0.0046, "step": 10152 }, { "epoch": 9.47, "learning_rate": 4.881576492537314e-05, "loss": 0.01, "step": 10156 }, { "epoch": 9.48, "learning_rate": 4.8815298507462685e-05, "loss": 0.0111, "step": 10160 }, { "epoch": 9.48, "learning_rate": 4.881483208955224e-05, "loss": 0.0019, "step": 10164 }, { "epoch": 9.49, "learning_rate": 4.8814365671641795e-05, "loss": 0.0036, "step": 10168 }, { "epoch": 9.49, "learning_rate": 4.881389925373134e-05, "loss": 0.0077, "step": 10172 }, { "epoch": 9.49, "learning_rate": 4.88134328358209e-05, "loss": 0.0048, "step": 10176 }, { "epoch": 9.5, "learning_rate": 4.881296641791045e-05, "loss": 0.0053, "step": 10180 }, { "epoch": 9.5, "learning_rate": 4.88125e-05, "loss": 0.009, "step": 10184 }, { "epoch": 9.5, "learning_rate": 4.8812033582089556e-05, "loss": 0.0019, "step": 10188 }, { "epoch": 9.51, "learning_rate": 4.8811567164179104e-05, "loss": 0.0117, "step": 10192 }, { "epoch": 9.51, "learning_rate": 4.881110074626866e-05, "loss": 0.0055, "step": 10196 }, { "epoch": 9.51, "learning_rate": 4.8810634328358213e-05, "loss": 0.0021, "step": 10200 }, { "epoch": 9.52, "learning_rate": 4.881016791044776e-05, "loss": 0.0033, "step": 10204 }, { "epoch": 9.52, "learning_rate": 4.880970149253731e-05, "loss": 0.0086, "step": 10208 }, { "epoch": 9.53, "learning_rate": 4.880923507462687e-05, "loss": 0.002, "step": 10212 }, { "epoch": 9.53, "learning_rate": 4.880876865671642e-05, "loss": 0.0155, "step": 10216 }, { "epoch": 9.53, "learning_rate": 4.8808302238805974e-05, "loss": 0.0059, "step": 10220 }, { "epoch": 9.54, "learning_rate": 4.880783582089552e-05, "loss": 0.0176, "step": 10224 }, { "epoch": 9.54, "learning_rate": 4.880736940298508e-05, "loss": 0.0054, "step": 10228 }, { "epoch": 9.54, "learning_rate": 4.880690298507463e-05, "loss": 0.0152, "step": 10232 }, { "epoch": 9.55, "learning_rate": 4.880643656716418e-05, "loss": 0.0048, "step": 10236 }, { "epoch": 9.55, "learning_rate": 4.8805970149253735e-05, "loss": 0.0032, "step": 10240 }, { "epoch": 9.56, "learning_rate": 4.880550373134329e-05, "loss": 0.0011, "step": 10244 }, { "epoch": 9.56, "learning_rate": 4.880503731343284e-05, "loss": 0.0076, "step": 10248 }, { "epoch": 9.56, "learning_rate": 4.880457089552239e-05, "loss": 0.0022, "step": 10252 }, { "epoch": 9.57, "learning_rate": 4.880410447761195e-05, "loss": 0.0075, "step": 10256 }, { "epoch": 9.57, "learning_rate": 4.8803638059701496e-05, "loss": 0.0046, "step": 10260 }, { "epoch": 9.57, "learning_rate": 4.8803171641791045e-05, "loss": 0.0122, "step": 10264 }, { "epoch": 9.58, "learning_rate": 4.88027052238806e-05, "loss": 0.0022, "step": 10268 }, { "epoch": 9.58, "learning_rate": 4.8802238805970154e-05, "loss": 0.0089, "step": 10272 }, { "epoch": 9.59, "learning_rate": 4.88017723880597e-05, "loss": 0.0036, "step": 10276 }, { "epoch": 9.59, "learning_rate": 4.880130597014926e-05, "loss": 0.0034, "step": 10280 }, { "epoch": 9.59, "learning_rate": 4.8800839552238806e-05, "loss": 0.0018, "step": 10284 }, { "epoch": 9.6, "learning_rate": 4.880037313432836e-05, "loss": 0.013, "step": 10288 }, { "epoch": 9.6, "learning_rate": 4.8799906716417915e-05, "loss": 0.0064, "step": 10292 }, { "epoch": 9.6, "learning_rate": 4.8799440298507464e-05, "loss": 0.0188, "step": 10296 }, { "epoch": 9.61, "learning_rate": 4.879897388059702e-05, "loss": 0.0066, "step": 10300 }, { "epoch": 9.61, "learning_rate": 4.879850746268657e-05, "loss": 0.002, "step": 10304 }, { "epoch": 9.62, "learning_rate": 4.879804104477612e-05, "loss": 0.0185, "step": 10308 }, { "epoch": 9.62, "learning_rate": 4.879757462686567e-05, "loss": 0.0018, "step": 10312 }, { "epoch": 9.62, "learning_rate": 4.8797108208955224e-05, "loss": 0.0054, "step": 10316 }, { "epoch": 9.63, "learning_rate": 4.879664179104478e-05, "loss": 0.0053, "step": 10320 }, { "epoch": 9.63, "learning_rate": 4.879617537313433e-05, "loss": 0.0041, "step": 10324 }, { "epoch": 9.63, "learning_rate": 4.879570895522388e-05, "loss": 0.0044, "step": 10328 }, { "epoch": 9.64, "learning_rate": 4.879524253731344e-05, "loss": 0.0053, "step": 10332 }, { "epoch": 9.64, "learning_rate": 4.8794776119402985e-05, "loss": 0.0015, "step": 10336 }, { "epoch": 9.65, "learning_rate": 4.879430970149254e-05, "loss": 0.0031, "step": 10340 }, { "epoch": 9.65, "learning_rate": 4.879384328358209e-05, "loss": 0.0079, "step": 10344 }, { "epoch": 9.65, "learning_rate": 4.8793376865671643e-05, "loss": 0.0049, "step": 10348 }, { "epoch": 9.66, "learning_rate": 4.87929104477612e-05, "loss": 0.0065, "step": 10352 }, { "epoch": 9.66, "learning_rate": 4.8792444029850746e-05, "loss": 0.0036, "step": 10356 }, { "epoch": 9.66, "learning_rate": 4.87919776119403e-05, "loss": 0.004, "step": 10360 }, { "epoch": 9.67, "learning_rate": 4.8791511194029856e-05, "loss": 0.0037, "step": 10364 }, { "epoch": 9.67, "learning_rate": 4.8791044776119404e-05, "loss": 0.007, "step": 10368 }, { "epoch": 9.68, "learning_rate": 4.879057835820895e-05, "loss": 0.0059, "step": 10372 }, { "epoch": 9.68, "learning_rate": 4.879011194029851e-05, "loss": 0.0114, "step": 10376 }, { "epoch": 9.68, "learning_rate": 4.878964552238806e-05, "loss": 0.0061, "step": 10380 }, { "epoch": 9.69, "learning_rate": 4.878917910447762e-05, "loss": 0.0068, "step": 10384 }, { "epoch": 9.69, "learning_rate": 4.8788712686567165e-05, "loss": 0.0023, "step": 10388 }, { "epoch": 9.69, "learning_rate": 4.878824626865672e-05, "loss": 0.0013, "step": 10392 }, { "epoch": 9.7, "learning_rate": 4.8787779850746275e-05, "loss": 0.018, "step": 10396 }, { "epoch": 9.7, "learning_rate": 4.878731343283582e-05, "loss": 0.0046, "step": 10400 }, { "epoch": 9.71, "learning_rate": 4.878684701492537e-05, "loss": 0.0023, "step": 10404 }, { "epoch": 9.71, "learning_rate": 4.878638059701493e-05, "loss": 0.0012, "step": 10408 }, { "epoch": 9.71, "learning_rate": 4.878591417910448e-05, "loss": 0.0094, "step": 10412 }, { "epoch": 9.72, "learning_rate": 4.878544776119403e-05, "loss": 0.0019, "step": 10416 }, { "epoch": 9.72, "learning_rate": 4.8784981343283584e-05, "loss": 0.0021, "step": 10420 }, { "epoch": 9.72, "learning_rate": 4.878451492537314e-05, "loss": 0.0051, "step": 10424 }, { "epoch": 9.73, "learning_rate": 4.878404850746269e-05, "loss": 0.0066, "step": 10428 }, { "epoch": 9.73, "learning_rate": 4.878358208955224e-05, "loss": 0.0152, "step": 10432 }, { "epoch": 9.73, "learning_rate": 4.878311567164179e-05, "loss": 0.0071, "step": 10436 }, { "epoch": 9.74, "learning_rate": 4.8782649253731345e-05, "loss": 0.0122, "step": 10440 }, { "epoch": 9.74, "learning_rate": 4.87821828358209e-05, "loss": 0.0071, "step": 10444 }, { "epoch": 9.75, "learning_rate": 4.878171641791045e-05, "loss": 0.0062, "step": 10448 }, { "epoch": 9.75, "learning_rate": 4.878125e-05, "loss": 0.0091, "step": 10452 }, { "epoch": 9.75, "learning_rate": 4.878078358208956e-05, "loss": 0.0056, "step": 10456 }, { "epoch": 9.76, "learning_rate": 4.8780317164179106e-05, "loss": 0.0059, "step": 10460 }, { "epoch": 9.76, "learning_rate": 4.8779850746268654e-05, "loss": 0.0024, "step": 10464 }, { "epoch": 9.76, "learning_rate": 4.8779384328358216e-05, "loss": 0.0057, "step": 10468 }, { "epoch": 9.77, "learning_rate": 4.8778917910447764e-05, "loss": 0.0105, "step": 10472 }, { "epoch": 9.77, "learning_rate": 4.877845149253731e-05, "loss": 0.0117, "step": 10476 }, { "epoch": 9.78, "learning_rate": 4.877798507462687e-05, "loss": 0.0049, "step": 10480 }, { "epoch": 9.78, "learning_rate": 4.877751865671642e-05, "loss": 0.0039, "step": 10484 }, { "epoch": 9.78, "learning_rate": 4.877705223880597e-05, "loss": 0.0077, "step": 10488 }, { "epoch": 9.79, "learning_rate": 4.8776585820895525e-05, "loss": 0.0077, "step": 10492 }, { "epoch": 9.79, "learning_rate": 4.877611940298507e-05, "loss": 0.0067, "step": 10496 }, { "epoch": 9.79, "learning_rate": 4.877565298507463e-05, "loss": 0.0076, "step": 10500 }, { "epoch": 9.79, "eval_exact_match": 0.7263056092843327, "eval_exec": 0.746615087040619, "eval_loss": 0.25435617566108704, "eval_runtime": 1163.3507, "eval_samples_per_second": 0.889, "step": 10500 }, { "epoch": 9.8, "learning_rate": 4.877518656716418e-05, "loss": 0.0073, "step": 10504 }, { "epoch": 9.8, "learning_rate": 4.877472014925373e-05, "loss": 0.0052, "step": 10508 }, { "epoch": 9.81, "learning_rate": 4.8774253731343286e-05, "loss": 0.0037, "step": 10512 }, { "epoch": 9.81, "learning_rate": 4.877378731343284e-05, "loss": 0.0038, "step": 10516 }, { "epoch": 9.81, "learning_rate": 4.877332089552239e-05, "loss": 0.0067, "step": 10520 }, { "epoch": 9.82, "learning_rate": 4.877285447761194e-05, "loss": 0.0172, "step": 10524 }, { "epoch": 9.82, "learning_rate": 4.87723880597015e-05, "loss": 0.0064, "step": 10528 }, { "epoch": 9.82, "learning_rate": 4.877192164179105e-05, "loss": 0.0025, "step": 10532 }, { "epoch": 9.83, "learning_rate": 4.8771455223880595e-05, "loss": 0.0071, "step": 10536 }, { "epoch": 9.83, "learning_rate": 4.877098880597015e-05, "loss": 0.0036, "step": 10540 }, { "epoch": 9.84, "learning_rate": 4.8770522388059705e-05, "loss": 0.002, "step": 10544 }, { "epoch": 9.84, "learning_rate": 4.877005597014926e-05, "loss": 0.0026, "step": 10548 }, { "epoch": 9.84, "learning_rate": 4.876958955223881e-05, "loss": 0.0103, "step": 10552 }, { "epoch": 9.85, "learning_rate": 4.8769123134328356e-05, "loss": 0.022, "step": 10556 }, { "epoch": 9.85, "learning_rate": 4.876865671641792e-05, "loss": 0.0156, "step": 10560 }, { "epoch": 9.85, "learning_rate": 4.8768190298507466e-05, "loss": 0.0061, "step": 10564 }, { "epoch": 9.86, "learning_rate": 4.8767723880597014e-05, "loss": 0.0035, "step": 10568 }, { "epoch": 9.86, "learning_rate": 4.876725746268657e-05, "loss": 0.0074, "step": 10572 }, { "epoch": 9.87, "learning_rate": 4.8766791044776124e-05, "loss": 0.0039, "step": 10576 }, { "epoch": 9.87, "learning_rate": 4.876632462686567e-05, "loss": 0.0038, "step": 10580 }, { "epoch": 9.87, "learning_rate": 4.876585820895523e-05, "loss": 0.0179, "step": 10584 }, { "epoch": 9.88, "learning_rate": 4.876539179104478e-05, "loss": 0.0066, "step": 10588 }, { "epoch": 9.88, "learning_rate": 4.876492537313433e-05, "loss": 0.0219, "step": 10592 }, { "epoch": 9.88, "learning_rate": 4.8764458955223885e-05, "loss": 0.0091, "step": 10596 }, { "epoch": 9.89, "learning_rate": 4.876399253731343e-05, "loss": 0.0163, "step": 10600 }, { "epoch": 9.89, "learning_rate": 4.876352611940299e-05, "loss": 0.0063, "step": 10604 }, { "epoch": 9.9, "learning_rate": 4.876305970149254e-05, "loss": 0.0016, "step": 10608 }, { "epoch": 9.9, "learning_rate": 4.876259328358209e-05, "loss": 0.0107, "step": 10612 }, { "epoch": 9.9, "learning_rate": 4.876212686567164e-05, "loss": 0.0038, "step": 10616 }, { "epoch": 9.91, "learning_rate": 4.87616604477612e-05, "loss": 0.0051, "step": 10620 }, { "epoch": 9.91, "learning_rate": 4.876119402985075e-05, "loss": 0.0046, "step": 10624 }, { "epoch": 9.91, "learning_rate": 4.87607276119403e-05, "loss": 0.0087, "step": 10628 }, { "epoch": 9.92, "learning_rate": 4.876026119402985e-05, "loss": 0.0055, "step": 10632 }, { "epoch": 9.92, "learning_rate": 4.875979477611941e-05, "loss": 0.0079, "step": 10636 }, { "epoch": 9.93, "learning_rate": 4.8759328358208955e-05, "loss": 0.0017, "step": 10640 }, { "epoch": 9.93, "learning_rate": 4.875886194029851e-05, "loss": 0.0048, "step": 10644 }, { "epoch": 9.93, "learning_rate": 4.8758395522388065e-05, "loss": 0.0052, "step": 10648 }, { "epoch": 9.94, "learning_rate": 4.875792910447761e-05, "loss": 0.0089, "step": 10652 }, { "epoch": 9.94, "learning_rate": 4.875746268656717e-05, "loss": 0.0057, "step": 10656 }, { "epoch": 9.94, "learning_rate": 4.8756996268656716e-05, "loss": 0.0079, "step": 10660 }, { "epoch": 9.95, "learning_rate": 4.875652985074627e-05, "loss": 0.0027, "step": 10664 }, { "epoch": 9.95, "learning_rate": 4.8756063432835826e-05, "loss": 0.0058, "step": 10668 }, { "epoch": 9.96, "learning_rate": 4.8755597014925374e-05, "loss": 0.0024, "step": 10672 }, { "epoch": 9.96, "learning_rate": 4.875513059701492e-05, "loss": 0.0095, "step": 10676 }, { "epoch": 9.96, "learning_rate": 4.8754664179104484e-05, "loss": 0.0044, "step": 10680 }, { "epoch": 9.97, "learning_rate": 4.875419776119403e-05, "loss": 0.0091, "step": 10684 }, { "epoch": 9.97, "learning_rate": 4.875373134328358e-05, "loss": 0.0044, "step": 10688 }, { "epoch": 9.97, "learning_rate": 4.8753264925373135e-05, "loss": 0.015, "step": 10692 }, { "epoch": 9.98, "learning_rate": 4.875279850746269e-05, "loss": 0.0052, "step": 10696 }, { "epoch": 9.98, "learning_rate": 4.875233208955224e-05, "loss": 0.0128, "step": 10700 }, { "epoch": 9.98, "learning_rate": 4.875186567164179e-05, "loss": 0.0034, "step": 10704 }, { "epoch": 9.99, "learning_rate": 4.875139925373135e-05, "loss": 0.0081, "step": 10708 }, { "epoch": 9.99, "learning_rate": 4.87509328358209e-05, "loss": 0.0177, "step": 10712 }, { "epoch": 10.0, "learning_rate": 4.875046641791045e-05, "loss": 0.0081, "step": 10716 }, { "epoch": 10.0, "learning_rate": 4.875e-05, "loss": 0.0112, "step": 10720 }, { "epoch": 10.0, "learning_rate": 4.8749533582089554e-05, "loss": 0.0034, "step": 10724 }, { "epoch": 10.01, "learning_rate": 4.874906716417911e-05, "loss": 0.0058, "step": 10728 }, { "epoch": 10.01, "learning_rate": 4.874860074626866e-05, "loss": 0.0068, "step": 10732 }, { "epoch": 10.01, "learning_rate": 4.874813432835821e-05, "loss": 0.003, "step": 10736 }, { "epoch": 10.02, "learning_rate": 4.874766791044777e-05, "loss": 0.0021, "step": 10740 }, { "epoch": 10.02, "learning_rate": 4.8747201492537315e-05, "loss": 0.0023, "step": 10744 }, { "epoch": 10.03, "learning_rate": 4.874673507462687e-05, "loss": 0.0125, "step": 10748 }, { "epoch": 10.03, "learning_rate": 4.874626865671642e-05, "loss": 0.0033, "step": 10752 }, { "epoch": 10.03, "learning_rate": 4.874580223880597e-05, "loss": 0.0033, "step": 10756 }, { "epoch": 10.04, "learning_rate": 4.874533582089553e-05, "loss": 0.004, "step": 10760 }, { "epoch": 10.04, "learning_rate": 4.8744869402985076e-05, "loss": 0.0077, "step": 10764 }, { "epoch": 10.04, "learning_rate": 4.874440298507463e-05, "loss": 0.0011, "step": 10768 }, { "epoch": 10.05, "learning_rate": 4.8743936567164186e-05, "loss": 0.018, "step": 10772 }, { "epoch": 10.05, "learning_rate": 4.8743470149253734e-05, "loss": 0.0127, "step": 10776 }, { "epoch": 10.06, "learning_rate": 4.874300373134328e-05, "loss": 0.0034, "step": 10780 }, { "epoch": 10.06, "learning_rate": 4.874253731343284e-05, "loss": 0.0034, "step": 10784 }, { "epoch": 10.06, "learning_rate": 4.874207089552239e-05, "loss": 0.0028, "step": 10788 }, { "epoch": 10.07, "learning_rate": 4.874160447761194e-05, "loss": 0.0033, "step": 10792 }, { "epoch": 10.07, "learning_rate": 4.8741138059701495e-05, "loss": 0.0063, "step": 10796 }, { "epoch": 10.07, "learning_rate": 4.874067164179105e-05, "loss": 0.0076, "step": 10800 }, { "epoch": 10.08, "learning_rate": 4.87402052238806e-05, "loss": 0.0073, "step": 10804 }, { "epoch": 10.08, "learning_rate": 4.873973880597015e-05, "loss": 0.0182, "step": 10808 }, { "epoch": 10.09, "learning_rate": 4.87392723880597e-05, "loss": 0.0039, "step": 10812 }, { "epoch": 10.09, "learning_rate": 4.8738805970149256e-05, "loss": 0.0044, "step": 10816 }, { "epoch": 10.09, "learning_rate": 4.873833955223881e-05, "loss": 0.0031, "step": 10820 }, { "epoch": 10.1, "learning_rate": 4.873787313432836e-05, "loss": 0.004, "step": 10824 }, { "epoch": 10.1, "learning_rate": 4.873740671641791e-05, "loss": 0.0014, "step": 10828 }, { "epoch": 10.1, "learning_rate": 4.873694029850747e-05, "loss": 0.0011, "step": 10832 }, { "epoch": 10.11, "learning_rate": 4.873647388059702e-05, "loss": 0.0087, "step": 10836 }, { "epoch": 10.11, "learning_rate": 4.8736007462686565e-05, "loss": 0.0035, "step": 10840 }, { "epoch": 10.12, "learning_rate": 4.873554104477612e-05, "loss": 0.0023, "step": 10844 }, { "epoch": 10.12, "learning_rate": 4.8735074626865675e-05, "loss": 0.0029, "step": 10848 }, { "epoch": 10.12, "learning_rate": 4.873460820895522e-05, "loss": 0.0066, "step": 10852 }, { "epoch": 10.13, "learning_rate": 4.873414179104478e-05, "loss": 0.0055, "step": 10856 }, { "epoch": 10.13, "learning_rate": 4.873367537313433e-05, "loss": 0.0012, "step": 10860 }, { "epoch": 10.13, "learning_rate": 4.873320895522388e-05, "loss": 0.0036, "step": 10864 }, { "epoch": 10.14, "learning_rate": 4.8732742537313436e-05, "loss": 0.0055, "step": 10868 }, { "epoch": 10.14, "learning_rate": 4.8732276119402984e-05, "loss": 0.006, "step": 10872 }, { "epoch": 10.15, "learning_rate": 4.8731809701492546e-05, "loss": 0.0038, "step": 10876 }, { "epoch": 10.15, "learning_rate": 4.8731343283582094e-05, "loss": 0.0035, "step": 10880 }, { "epoch": 10.15, "learning_rate": 4.873087686567164e-05, "loss": 0.0044, "step": 10884 }, { "epoch": 10.16, "learning_rate": 4.87304104477612e-05, "loss": 0.0012, "step": 10888 }, { "epoch": 10.16, "learning_rate": 4.872994402985075e-05, "loss": 0.0067, "step": 10892 }, { "epoch": 10.16, "learning_rate": 4.87294776119403e-05, "loss": 0.0057, "step": 10896 }, { "epoch": 10.17, "learning_rate": 4.8729011194029855e-05, "loss": 0.0012, "step": 10900 }, { "epoch": 10.17, "learning_rate": 4.87285447761194e-05, "loss": 0.0095, "step": 10904 }, { "epoch": 10.18, "learning_rate": 4.872807835820896e-05, "loss": 0.003, "step": 10908 }, { "epoch": 10.18, "learning_rate": 4.872761194029851e-05, "loss": 0.0048, "step": 10912 }, { "epoch": 10.18, "learning_rate": 4.872714552238806e-05, "loss": 0.0052, "step": 10916 }, { "epoch": 10.19, "learning_rate": 4.8726679104477616e-05, "loss": 0.0047, "step": 10920 }, { "epoch": 10.19, "learning_rate": 4.872621268656717e-05, "loss": 0.0015, "step": 10924 }, { "epoch": 10.19, "learning_rate": 4.872574626865672e-05, "loss": 0.0007, "step": 10928 }, { "epoch": 10.2, "learning_rate": 4.872527985074627e-05, "loss": 0.0024, "step": 10932 }, { "epoch": 10.2, "learning_rate": 4.872481343283583e-05, "loss": 0.003, "step": 10936 }, { "epoch": 10.21, "learning_rate": 4.872434701492538e-05, "loss": 0.0018, "step": 10940 }, { "epoch": 10.21, "learning_rate": 4.8723880597014925e-05, "loss": 0.0084, "step": 10944 }, { "epoch": 10.21, "learning_rate": 4.872341417910448e-05, "loss": 0.0087, "step": 10948 }, { "epoch": 10.22, "learning_rate": 4.8722947761194035e-05, "loss": 0.0147, "step": 10952 }, { "epoch": 10.22, "learning_rate": 4.872248134328358e-05, "loss": 0.0021, "step": 10956 }, { "epoch": 10.22, "learning_rate": 4.872201492537314e-05, "loss": 0.0037, "step": 10960 }, { "epoch": 10.23, "learning_rate": 4.8721548507462686e-05, "loss": 0.0021, "step": 10964 }, { "epoch": 10.23, "learning_rate": 4.872108208955224e-05, "loss": 0.0016, "step": 10968 }, { "epoch": 10.24, "learning_rate": 4.8720615671641796e-05, "loss": 0.003, "step": 10972 }, { "epoch": 10.24, "learning_rate": 4.8720149253731344e-05, "loss": 0.0034, "step": 10976 }, { "epoch": 10.24, "learning_rate": 4.87196828358209e-05, "loss": 0.0032, "step": 10980 }, { "epoch": 10.25, "learning_rate": 4.8719216417910454e-05, "loss": 0.0095, "step": 10984 }, { "epoch": 10.25, "learning_rate": 4.871875e-05, "loss": 0.0083, "step": 10988 }, { "epoch": 10.25, "learning_rate": 4.871828358208955e-05, "loss": 0.0156, "step": 10992 }, { "epoch": 10.26, "learning_rate": 4.8717817164179105e-05, "loss": 0.0089, "step": 10996 }, { "epoch": 10.26, "learning_rate": 4.871735074626866e-05, "loss": 0.0049, "step": 11000 }, { "epoch": 10.26, "eval_exact_match": 0.7388781431334622, "eval_exec": 0.7514506769825918, "eval_loss": 0.25768712162971497, "eval_runtime": 1364.5681, "eval_samples_per_second": 0.758, "step": 11000 }, { "epoch": 10.26, "learning_rate": 4.871688432835821e-05, "loss": 0.0159, "step": 11004 }, { "epoch": 10.27, "learning_rate": 4.871641791044776e-05, "loss": 0.0124, "step": 11008 }, { "epoch": 10.27, "learning_rate": 4.871595149253732e-05, "loss": 0.0136, "step": 11012 }, { "epoch": 10.28, "learning_rate": 4.8715485074626866e-05, "loss": 0.0046, "step": 11016 }, { "epoch": 10.28, "learning_rate": 4.871501865671642e-05, "loss": 0.0018, "step": 11020 }, { "epoch": 10.28, "learning_rate": 4.871455223880597e-05, "loss": 0.0029, "step": 11024 }, { "epoch": 10.29, "learning_rate": 4.8714085820895524e-05, "loss": 0.002, "step": 11028 }, { "epoch": 10.29, "learning_rate": 4.871361940298508e-05, "loss": 0.0022, "step": 11032 }, { "epoch": 10.29, "learning_rate": 4.871315298507463e-05, "loss": 0.006, "step": 11036 }, { "epoch": 10.3, "learning_rate": 4.871268656716418e-05, "loss": 0.005, "step": 11040 }, { "epoch": 10.3, "learning_rate": 4.8712220149253737e-05, "loss": 0.0033, "step": 11044 }, { "epoch": 10.31, "learning_rate": 4.8711753731343285e-05, "loss": 0.0016, "step": 11048 }, { "epoch": 10.31, "learning_rate": 4.871128731343284e-05, "loss": 0.0011, "step": 11052 }, { "epoch": 10.31, "learning_rate": 4.871082089552239e-05, "loss": 0.0031, "step": 11056 }, { "epoch": 10.32, "learning_rate": 4.871035447761194e-05, "loss": 0.0041, "step": 11060 }, { "epoch": 10.32, "learning_rate": 4.87098880597015e-05, "loss": 0.0029, "step": 11064 }, { "epoch": 10.32, "learning_rate": 4.8709421641791046e-05, "loss": 0.0194, "step": 11068 }, { "epoch": 10.33, "learning_rate": 4.87089552238806e-05, "loss": 0.0036, "step": 11072 }, { "epoch": 10.33, "learning_rate": 4.8708488805970156e-05, "loss": 0.0092, "step": 11076 }, { "epoch": 10.34, "learning_rate": 4.8708022388059704e-05, "loss": 0.0026, "step": 11080 }, { "epoch": 10.34, "learning_rate": 4.870755597014925e-05, "loss": 0.0023, "step": 11084 }, { "epoch": 10.34, "learning_rate": 4.8707089552238813e-05, "loss": 0.0055, "step": 11088 }, { "epoch": 10.35, "learning_rate": 4.870662313432836e-05, "loss": 0.0009, "step": 11092 }, { "epoch": 10.35, "learning_rate": 4.870615671641791e-05, "loss": 0.0041, "step": 11096 }, { "epoch": 10.35, "learning_rate": 4.8705690298507465e-05, "loss": 0.0043, "step": 11100 }, { "epoch": 10.36, "learning_rate": 4.870522388059702e-05, "loss": 0.0057, "step": 11104 }, { "epoch": 10.36, "learning_rate": 4.870475746268657e-05, "loss": 0.0052, "step": 11108 }, { "epoch": 10.37, "learning_rate": 4.870429104477612e-05, "loss": 0.003, "step": 11112 }, { "epoch": 10.37, "learning_rate": 4.870382462686567e-05, "loss": 0.0026, "step": 11116 }, { "epoch": 10.37, "learning_rate": 4.8703358208955226e-05, "loss": 0.0012, "step": 11120 }, { "epoch": 10.38, "learning_rate": 4.870289179104478e-05, "loss": 0.0201, "step": 11124 }, { "epoch": 10.38, "learning_rate": 4.870242537313433e-05, "loss": 0.0014, "step": 11128 }, { "epoch": 10.38, "learning_rate": 4.8701958955223884e-05, "loss": 0.0032, "step": 11132 }, { "epoch": 10.39, "learning_rate": 4.870149253731344e-05, "loss": 0.0047, "step": 11136 }, { "epoch": 10.39, "learning_rate": 4.8701026119402987e-05, "loss": 0.0053, "step": 11140 }, { "epoch": 10.4, "learning_rate": 4.8700559701492535e-05, "loss": 0.009, "step": 11144 }, { "epoch": 10.4, "learning_rate": 4.8700093283582096e-05, "loss": 0.0018, "step": 11148 }, { "epoch": 10.4, "learning_rate": 4.8699626865671645e-05, "loss": 0.0007, "step": 11152 }, { "epoch": 10.41, "learning_rate": 4.869916044776119e-05, "loss": 0.005, "step": 11156 }, { "epoch": 10.41, "learning_rate": 4.869869402985075e-05, "loss": 0.0029, "step": 11160 }, { "epoch": 10.41, "learning_rate": 4.86982276119403e-05, "loss": 0.002, "step": 11164 }, { "epoch": 10.42, "learning_rate": 4.869776119402985e-05, "loss": 0.0049, "step": 11168 }, { "epoch": 10.42, "learning_rate": 4.8697294776119406e-05, "loss": 0.0034, "step": 11172 }, { "epoch": 10.43, "learning_rate": 4.8696828358208954e-05, "loss": 0.0078, "step": 11176 }, { "epoch": 10.43, "learning_rate": 4.869636194029851e-05, "loss": 0.0016, "step": 11180 }, { "epoch": 10.43, "learning_rate": 4.8695895522388063e-05, "loss": 0.0114, "step": 11184 }, { "epoch": 10.44, "learning_rate": 4.869542910447761e-05, "loss": 0.0186, "step": 11188 }, { "epoch": 10.44, "learning_rate": 4.8694962686567167e-05, "loss": 0.0076, "step": 11192 }, { "epoch": 10.44, "learning_rate": 4.869449626865672e-05, "loss": 0.0042, "step": 11196 }, { "epoch": 10.45, "learning_rate": 4.869402985074627e-05, "loss": 0.019, "step": 11200 }, { "epoch": 10.45, "learning_rate": 4.8693563432835824e-05, "loss": 0.0029, "step": 11204 }, { "epoch": 10.46, "learning_rate": 4.869309701492538e-05, "loss": 0.0135, "step": 11208 }, { "epoch": 10.46, "learning_rate": 4.869263059701493e-05, "loss": 0.002, "step": 11212 }, { "epoch": 10.46, "learning_rate": 4.869216417910448e-05, "loss": 0.0021, "step": 11216 }, { "epoch": 10.47, "learning_rate": 4.869169776119403e-05, "loss": 0.003, "step": 11220 }, { "epoch": 10.47, "learning_rate": 4.8691231343283585e-05, "loss": 0.0094, "step": 11224 }, { "epoch": 10.47, "learning_rate": 4.869076492537314e-05, "loss": 0.0035, "step": 11228 }, { "epoch": 10.48, "learning_rate": 4.869029850746269e-05, "loss": 0.0061, "step": 11232 }, { "epoch": 10.48, "learning_rate": 4.868983208955224e-05, "loss": 0.0027, "step": 11236 }, { "epoch": 10.49, "learning_rate": 4.86893656716418e-05, "loss": 0.003, "step": 11240 }, { "epoch": 10.49, "learning_rate": 4.8688899253731346e-05, "loss": 0.0033, "step": 11244 }, { "epoch": 10.49, "learning_rate": 4.8688432835820895e-05, "loss": 0.004, "step": 11248 }, { "epoch": 10.5, "learning_rate": 4.868796641791045e-05, "loss": 0.0024, "step": 11252 }, { "epoch": 10.5, "learning_rate": 4.8687500000000004e-05, "loss": 0.0114, "step": 11256 }, { "epoch": 10.5, "learning_rate": 4.868703358208955e-05, "loss": 0.0017, "step": 11260 }, { "epoch": 10.51, "learning_rate": 4.868656716417911e-05, "loss": 0.0173, "step": 11264 }, { "epoch": 10.51, "learning_rate": 4.868610074626866e-05, "loss": 0.0101, "step": 11268 }, { "epoch": 10.51, "learning_rate": 4.868563432835821e-05, "loss": 0.0054, "step": 11272 }, { "epoch": 10.52, "learning_rate": 4.8685167910447765e-05, "loss": 0.0031, "step": 11276 }, { "epoch": 10.52, "learning_rate": 4.8684701492537313e-05, "loss": 0.0042, "step": 11280 }, { "epoch": 10.53, "learning_rate": 4.868423507462687e-05, "loss": 0.01, "step": 11284 }, { "epoch": 10.53, "learning_rate": 4.868376865671642e-05, "loss": 0.0073, "step": 11288 }, { "epoch": 10.53, "learning_rate": 4.868330223880597e-05, "loss": 0.002, "step": 11292 }, { "epoch": 10.54, "learning_rate": 4.868283582089552e-05, "loss": 0.0045, "step": 11296 }, { "epoch": 10.54, "learning_rate": 4.868236940298508e-05, "loss": 0.0062, "step": 11300 }, { "epoch": 10.54, "learning_rate": 4.868190298507463e-05, "loss": 0.0057, "step": 11304 }, { "epoch": 10.55, "learning_rate": 4.868143656716418e-05, "loss": 0.0037, "step": 11308 }, { "epoch": 10.55, "learning_rate": 4.868097014925373e-05, "loss": 0.0071, "step": 11312 }, { "epoch": 10.56, "learning_rate": 4.868050373134329e-05, "loss": 0.0073, "step": 11316 }, { "epoch": 10.56, "learning_rate": 4.8680037313432835e-05, "loss": 0.0121, "step": 11320 }, { "epoch": 10.56, "learning_rate": 4.867957089552239e-05, "loss": 0.0091, "step": 11324 }, { "epoch": 10.57, "learning_rate": 4.8679104477611945e-05, "loss": 0.0058, "step": 11328 }, { "epoch": 10.57, "learning_rate": 4.8678638059701493e-05, "loss": 0.0047, "step": 11332 }, { "epoch": 10.57, "learning_rate": 4.867817164179105e-05, "loss": 0.002, "step": 11336 }, { "epoch": 10.58, "learning_rate": 4.8677705223880596e-05, "loss": 0.0051, "step": 11340 }, { "epoch": 10.58, "learning_rate": 4.867723880597015e-05, "loss": 0.0096, "step": 11344 }, { "epoch": 10.59, "learning_rate": 4.8676772388059706e-05, "loss": 0.0019, "step": 11348 }, { "epoch": 10.59, "learning_rate": 4.8676305970149254e-05, "loss": 0.006, "step": 11352 }, { "epoch": 10.59, "learning_rate": 4.86758395522388e-05, "loss": 0.0018, "step": 11356 }, { "epoch": 10.6, "learning_rate": 4.8675373134328364e-05, "loss": 0.0004, "step": 11360 }, { "epoch": 10.6, "learning_rate": 4.867490671641791e-05, "loss": 0.0032, "step": 11364 }, { "epoch": 10.6, "learning_rate": 4.867444029850747e-05, "loss": 0.0023, "step": 11368 }, { "epoch": 10.61, "learning_rate": 4.8673973880597015e-05, "loss": 0.0312, "step": 11372 }, { "epoch": 10.61, "learning_rate": 4.867350746268657e-05, "loss": 0.0034, "step": 11376 }, { "epoch": 10.62, "learning_rate": 4.8673041044776125e-05, "loss": 0.0044, "step": 11380 }, { "epoch": 10.62, "learning_rate": 4.867257462686567e-05, "loss": 0.0065, "step": 11384 }, { "epoch": 10.62, "learning_rate": 4.867210820895523e-05, "loss": 0.0032, "step": 11388 }, { "epoch": 10.63, "learning_rate": 4.867164179104478e-05, "loss": 0.0054, "step": 11392 }, { "epoch": 10.63, "learning_rate": 4.867117537313433e-05, "loss": 0.0187, "step": 11396 }, { "epoch": 10.63, "learning_rate": 4.867070895522388e-05, "loss": 0.0059, "step": 11400 }, { "epoch": 10.64, "learning_rate": 4.8670242537313434e-05, "loss": 0.0104, "step": 11404 }, { "epoch": 10.64, "learning_rate": 4.866977611940299e-05, "loss": 0.0055, "step": 11408 }, { "epoch": 10.65, "learning_rate": 4.866930970149254e-05, "loss": 0.0007, "step": 11412 }, { "epoch": 10.65, "learning_rate": 4.866884328358209e-05, "loss": 0.0154, "step": 11416 }, { "epoch": 10.65, "learning_rate": 4.866837686567165e-05, "loss": 0.0075, "step": 11420 }, { "epoch": 10.66, "learning_rate": 4.8667910447761195e-05, "loss": 0.0022, "step": 11424 }, { "epoch": 10.66, "learning_rate": 4.866744402985075e-05, "loss": 0.0005, "step": 11428 }, { "epoch": 10.66, "learning_rate": 4.86669776119403e-05, "loss": 0.0053, "step": 11432 }, { "epoch": 10.67, "learning_rate": 4.866651119402985e-05, "loss": 0.0014, "step": 11436 }, { "epoch": 10.67, "learning_rate": 4.866604477611941e-05, "loss": 0.0055, "step": 11440 }, { "epoch": 10.68, "learning_rate": 4.8665578358208956e-05, "loss": 0.0081, "step": 11444 }, { "epoch": 10.68, "learning_rate": 4.866511194029851e-05, "loss": 0.0083, "step": 11448 }, { "epoch": 10.68, "learning_rate": 4.8664645522388066e-05, "loss": 0.0017, "step": 11452 }, { "epoch": 10.69, "learning_rate": 4.8664179104477614e-05, "loss": 0.0027, "step": 11456 }, { "epoch": 10.69, "learning_rate": 4.866371268656716e-05, "loss": 0.0168, "step": 11460 }, { "epoch": 10.69, "learning_rate": 4.866324626865672e-05, "loss": 0.0013, "step": 11464 }, { "epoch": 10.7, "learning_rate": 4.866277985074627e-05, "loss": 0.0057, "step": 11468 }, { "epoch": 10.7, "learning_rate": 4.866231343283582e-05, "loss": 0.0095, "step": 11472 }, { "epoch": 10.71, "learning_rate": 4.8661847014925375e-05, "loss": 0.0029, "step": 11476 }, { "epoch": 10.71, "learning_rate": 4.866138059701493e-05, "loss": 0.0151, "step": 11480 }, { "epoch": 10.71, "learning_rate": 4.866091417910448e-05, "loss": 0.0043, "step": 11484 }, { "epoch": 10.72, "learning_rate": 4.866044776119403e-05, "loss": 0.001, "step": 11488 }, { "epoch": 10.72, "learning_rate": 4.865998134328358e-05, "loss": 0.0074, "step": 11492 }, { "epoch": 10.72, "learning_rate": 4.8659514925373136e-05, "loss": 0.0067, "step": 11496 }, { "epoch": 10.73, "learning_rate": 4.865904850746269e-05, "loss": 0.0019, "step": 11500 }, { "epoch": 10.73, "eval_exact_match": 0.7321083172147002, "eval_exec": 0.7620889748549323, "eval_loss": 0.27294236421585083, "eval_runtime": 4988.8833, "eval_samples_per_second": 0.207, "step": 11500 }, { "epoch": 10.73, "learning_rate": 4.865858208955224e-05, "loss": 0.0054, "step": 11504 }, { "epoch": 10.73, "learning_rate": 4.865811567164179e-05, "loss": 0.0097, "step": 11508 }, { "epoch": 10.74, "learning_rate": 4.865764925373135e-05, "loss": 0.0018, "step": 11512 }, { "epoch": 10.74, "learning_rate": 4.86571828358209e-05, "loss": 0.0024, "step": 11516 }, { "epoch": 10.75, "learning_rate": 4.8656716417910445e-05, "loss": 0.0051, "step": 11520 }, { "epoch": 10.75, "learning_rate": 4.865625e-05, "loss": 0.0032, "step": 11524 }, { "epoch": 10.75, "learning_rate": 4.8655783582089555e-05, "loss": 0.0028, "step": 11528 }, { "epoch": 10.76, "learning_rate": 4.865531716417911e-05, "loss": 0.0004, "step": 11532 }, { "epoch": 10.76, "learning_rate": 4.865485074626866e-05, "loss": 0.0465, "step": 11536 }, { "epoch": 10.76, "learning_rate": 4.865438432835821e-05, "loss": 0.0039, "step": 11540 }, { "epoch": 10.77, "learning_rate": 4.865391791044777e-05, "loss": 0.0051, "step": 11544 }, { "epoch": 10.77, "learning_rate": 4.8653451492537316e-05, "loss": 0.0056, "step": 11548 }, { "epoch": 10.78, "learning_rate": 4.8652985074626864e-05, "loss": 0.0043, "step": 11552 }, { "epoch": 10.78, "learning_rate": 4.8652518656716426e-05, "loss": 0.0012, "step": 11556 }, { "epoch": 10.78, "learning_rate": 4.8652052238805974e-05, "loss": 0.0004, "step": 11560 }, { "epoch": 10.79, "learning_rate": 4.865158582089552e-05, "loss": 0.0015, "step": 11564 }, { "epoch": 10.79, "learning_rate": 4.865111940298508e-05, "loss": 0.0031, "step": 11568 }, { "epoch": 10.79, "learning_rate": 4.865065298507463e-05, "loss": 0.0018, "step": 11572 }, { "epoch": 10.8, "learning_rate": 4.865018656716418e-05, "loss": 0.0042, "step": 11576 }, { "epoch": 10.8, "learning_rate": 4.8649720149253735e-05, "loss": 0.0076, "step": 11580 }, { "epoch": 10.81, "learning_rate": 4.864925373134328e-05, "loss": 0.0032, "step": 11584 }, { "epoch": 10.81, "learning_rate": 4.864878731343284e-05, "loss": 0.0011, "step": 11588 }, { "epoch": 10.81, "learning_rate": 4.864832089552239e-05, "loss": 0.0061, "step": 11592 }, { "epoch": 10.82, "learning_rate": 4.864785447761194e-05, "loss": 0.0019, "step": 11596 }, { "epoch": 10.82, "learning_rate": 4.8647388059701496e-05, "loss": 0.0035, "step": 11600 }, { "epoch": 10.82, "learning_rate": 4.864692164179105e-05, "loss": 0.0057, "step": 11604 }, { "epoch": 10.83, "learning_rate": 4.86464552238806e-05, "loss": 0.0037, "step": 11608 }, { "epoch": 10.83, "learning_rate": 4.864598880597015e-05, "loss": 0.0068, "step": 11612 }, { "epoch": 10.84, "learning_rate": 4.864552238805971e-05, "loss": 0.0087, "step": 11616 }, { "epoch": 10.84, "learning_rate": 4.864505597014926e-05, "loss": 0.0074, "step": 11620 }, { "epoch": 10.84, "learning_rate": 4.8644589552238805e-05, "loss": 0.0016, "step": 11624 }, { "epoch": 10.85, "learning_rate": 4.864412313432836e-05, "loss": 0.0041, "step": 11628 }, { "epoch": 10.85, "learning_rate": 4.8643656716417915e-05, "loss": 0.0054, "step": 11632 }, { "epoch": 10.85, "learning_rate": 4.864319029850746e-05, "loss": 0.0204, "step": 11636 }, { "epoch": 10.86, "learning_rate": 4.864272388059702e-05, "loss": 0.003, "step": 11640 }, { "epoch": 10.86, "learning_rate": 4.8642257462686566e-05, "loss": 0.0014, "step": 11644 }, { "epoch": 10.87, "learning_rate": 4.864179104477612e-05, "loss": 0.0107, "step": 11648 }, { "epoch": 10.87, "learning_rate": 4.8641324626865676e-05, "loss": 0.0018, "step": 11652 }, { "epoch": 10.87, "learning_rate": 4.8640858208955224e-05, "loss": 0.0025, "step": 11656 }, { "epoch": 10.88, "learning_rate": 4.864039179104478e-05, "loss": 0.0021, "step": 11660 }, { "epoch": 10.88, "learning_rate": 4.8639925373134334e-05, "loss": 0.0039, "step": 11664 }, { "epoch": 10.88, "learning_rate": 4.863945895522388e-05, "loss": 0.0034, "step": 11668 }, { "epoch": 10.89, "learning_rate": 4.863899253731343e-05, "loss": 0.0057, "step": 11672 }, { "epoch": 10.89, "learning_rate": 4.8638526119402985e-05, "loss": 0.0024, "step": 11676 }, { "epoch": 10.9, "learning_rate": 4.863805970149254e-05, "loss": 0.0056, "step": 11680 }, { "epoch": 10.9, "learning_rate": 4.863759328358209e-05, "loss": 0.0083, "step": 11684 }, { "epoch": 10.9, "learning_rate": 4.863712686567164e-05, "loss": 0.0061, "step": 11688 }, { "epoch": 10.91, "learning_rate": 4.86366604477612e-05, "loss": 0.0049, "step": 11692 }, { "epoch": 10.91, "learning_rate": 4.863619402985075e-05, "loss": 0.0035, "step": 11696 }, { "epoch": 10.91, "learning_rate": 4.86357276119403e-05, "loss": 0.0211, "step": 11700 }, { "epoch": 10.92, "learning_rate": 4.863526119402985e-05, "loss": 0.012, "step": 11704 }, { "epoch": 10.92, "learning_rate": 4.863479477611941e-05, "loss": 0.0067, "step": 11708 }, { "epoch": 10.93, "learning_rate": 4.863432835820896e-05, "loss": 0.0035, "step": 11712 }, { "epoch": 10.93, "learning_rate": 4.863386194029851e-05, "loss": 0.0078, "step": 11716 }, { "epoch": 10.93, "learning_rate": 4.863339552238806e-05, "loss": 0.0042, "step": 11720 }, { "epoch": 10.94, "learning_rate": 4.863292910447762e-05, "loss": 0.0036, "step": 11724 }, { "epoch": 10.94, "learning_rate": 4.8632462686567165e-05, "loss": 0.0036, "step": 11728 }, { "epoch": 10.94, "learning_rate": 4.863199626865672e-05, "loss": 0.0044, "step": 11732 }, { "epoch": 10.95, "learning_rate": 4.863152985074627e-05, "loss": 0.0137, "step": 11736 }, { "epoch": 10.95, "learning_rate": 4.863106343283582e-05, "loss": 0.0015, "step": 11740 }, { "epoch": 10.96, "learning_rate": 4.863059701492538e-05, "loss": 0.0024, "step": 11744 }, { "epoch": 10.96, "learning_rate": 4.8630130597014926e-05, "loss": 0.0024, "step": 11748 }, { "epoch": 10.96, "learning_rate": 4.862966417910448e-05, "loss": 0.0022, "step": 11752 }, { "epoch": 10.97, "learning_rate": 4.8629197761194036e-05, "loss": 0.0009, "step": 11756 }, { "epoch": 10.97, "learning_rate": 4.8628731343283584e-05, "loss": 0.0066, "step": 11760 }, { "epoch": 10.97, "learning_rate": 4.862826492537313e-05, "loss": 0.0165, "step": 11764 }, { "epoch": 10.98, "learning_rate": 4.8627798507462694e-05, "loss": 0.0085, "step": 11768 }, { "epoch": 10.98, "learning_rate": 4.862733208955224e-05, "loss": 0.0094, "step": 11772 }, { "epoch": 10.98, "learning_rate": 4.862686567164179e-05, "loss": 0.0009, "step": 11776 }, { "epoch": 10.99, "learning_rate": 4.8626399253731345e-05, "loss": 0.0039, "step": 11780 }, { "epoch": 10.99, "learning_rate": 4.86259328358209e-05, "loss": 0.0113, "step": 11784 }, { "epoch": 11.0, "learning_rate": 4.862546641791045e-05, "loss": 0.0044, "step": 11788 }, { "epoch": 11.0, "learning_rate": 4.8625e-05, "loss": 0.0027, "step": 11792 }, { "epoch": 11.0, "learning_rate": 4.862453358208955e-05, "loss": 0.0014, "step": 11796 }, { "epoch": 11.01, "learning_rate": 4.8624067164179106e-05, "loss": 0.0013, "step": 11800 }, { "epoch": 11.01, "learning_rate": 4.862360074626866e-05, "loss": 0.0007, "step": 11804 }, { "epoch": 11.01, "learning_rate": 4.862313432835821e-05, "loss": 0.004, "step": 11808 }, { "epoch": 11.02, "learning_rate": 4.8622667910447764e-05, "loss": 0.0021, "step": 11812 }, { "epoch": 11.02, "learning_rate": 4.862220149253732e-05, "loss": 0.0034, "step": 11816 }, { "epoch": 11.03, "learning_rate": 4.862173507462687e-05, "loss": 0.0059, "step": 11820 }, { "epoch": 11.03, "learning_rate": 4.8621268656716415e-05, "loss": 0.0025, "step": 11824 }, { "epoch": 11.03, "learning_rate": 4.862080223880598e-05, "loss": 0.0009, "step": 11828 }, { "epoch": 11.04, "learning_rate": 4.8620335820895525e-05, "loss": 0.0025, "step": 11832 }, { "epoch": 11.04, "learning_rate": 4.861986940298507e-05, "loss": 0.0071, "step": 11836 }, { "epoch": 11.04, "learning_rate": 4.861940298507463e-05, "loss": 0.0012, "step": 11840 }, { "epoch": 11.05, "learning_rate": 4.861893656716418e-05, "loss": 0.001, "step": 11844 }, { "epoch": 11.05, "learning_rate": 4.861847014925374e-05, "loss": 0.0013, "step": 11848 }, { "epoch": 11.06, "learning_rate": 4.8618003731343286e-05, "loss": 0.0019, "step": 11852 }, { "epoch": 11.06, "learning_rate": 4.8617537313432834e-05, "loss": 0.0056, "step": 11856 }, { "epoch": 11.06, "learning_rate": 4.8617070895522396e-05, "loss": 0.0038, "step": 11860 }, { "epoch": 11.07, "learning_rate": 4.8616604477611944e-05, "loss": 0.0036, "step": 11864 }, { "epoch": 11.07, "learning_rate": 4.861613805970149e-05, "loss": 0.0014, "step": 11868 }, { "epoch": 11.07, "learning_rate": 4.861567164179105e-05, "loss": 0.0019, "step": 11872 }, { "epoch": 11.08, "learning_rate": 4.86152052238806e-05, "loss": 0.0009, "step": 11876 }, { "epoch": 11.08, "learning_rate": 4.861473880597015e-05, "loss": 0.0039, "step": 11880 }, { "epoch": 11.09, "learning_rate": 4.8614272388059705e-05, "loss": 0.0012, "step": 11884 }, { "epoch": 11.09, "learning_rate": 4.861380597014926e-05, "loss": 0.0022, "step": 11888 }, { "epoch": 11.09, "learning_rate": 4.861333955223881e-05, "loss": 0.0032, "step": 11892 }, { "epoch": 11.1, "learning_rate": 4.861287313432836e-05, "loss": 0.002, "step": 11896 }, { "epoch": 11.1, "learning_rate": 4.861240671641791e-05, "loss": 0.0079, "step": 11900 }, { "epoch": 11.1, "learning_rate": 4.8611940298507466e-05, "loss": 0.0097, "step": 11904 }, { "epoch": 11.11, "learning_rate": 4.861147388059702e-05, "loss": 0.0023, "step": 11908 }, { "epoch": 11.11, "learning_rate": 4.861100746268657e-05, "loss": 0.0062, "step": 11912 }, { "epoch": 11.12, "learning_rate": 4.861054104477612e-05, "loss": 0.0034, "step": 11916 }, { "epoch": 11.12, "learning_rate": 4.861007462686568e-05, "loss": 0.001, "step": 11920 }, { "epoch": 11.12, "learning_rate": 4.860960820895523e-05, "loss": 0.0027, "step": 11924 }, { "epoch": 11.13, "learning_rate": 4.8609141791044775e-05, "loss": 0.0022, "step": 11928 }, { "epoch": 11.13, "learning_rate": 4.860867537313433e-05, "loss": 0.0078, "step": 11932 }, { "epoch": 11.13, "learning_rate": 4.8608208955223885e-05, "loss": 0.0028, "step": 11936 }, { "epoch": 11.14, "learning_rate": 4.860774253731343e-05, "loss": 0.0079, "step": 11940 }, { "epoch": 11.14, "learning_rate": 4.860727611940299e-05, "loss": 0.0014, "step": 11944 }, { "epoch": 11.15, "learning_rate": 4.860680970149254e-05, "loss": 0.0053, "step": 11948 }, { "epoch": 11.15, "learning_rate": 4.860634328358209e-05, "loss": 0.0048, "step": 11952 }, { "epoch": 11.15, "learning_rate": 4.8605876865671646e-05, "loss": 0.0078, "step": 11956 }, { "epoch": 11.16, "learning_rate": 4.8605410447761194e-05, "loss": 0.0035, "step": 11960 }, { "epoch": 11.16, "learning_rate": 4.860494402985075e-05, "loss": 0.0044, "step": 11964 }, { "epoch": 11.16, "learning_rate": 4.8604477611940304e-05, "loss": 0.0121, "step": 11968 }, { "epoch": 11.17, "learning_rate": 4.860401119402985e-05, "loss": 0.0056, "step": 11972 }, { "epoch": 11.17, "learning_rate": 4.86035447761194e-05, "loss": 0.0039, "step": 11976 }, { "epoch": 11.18, "learning_rate": 4.860307835820896e-05, "loss": 0.0053, "step": 11980 }, { "epoch": 11.18, "learning_rate": 4.860261194029851e-05, "loss": 0.0029, "step": 11984 }, { "epoch": 11.18, "learning_rate": 4.860214552238806e-05, "loss": 0.0017, "step": 11988 }, { "epoch": 11.19, "learning_rate": 4.860167910447761e-05, "loss": 0.0027, "step": 11992 }, { "epoch": 11.19, "learning_rate": 4.860121268656717e-05, "loss": 0.0033, "step": 11996 }, { "epoch": 11.19, "learning_rate": 4.8600746268656716e-05, "loss": 0.0026, "step": 12000 }, { "epoch": 11.19, "eval_exact_match": 0.7292069632495164, "eval_exec": 0.7553191489361702, "eval_loss": 0.28799450397491455, "eval_runtime": 1479.0438, "eval_samples_per_second": 0.699, "step": 12000 }, { "epoch": 11.2, "learning_rate": 4.860027985074627e-05, "loss": 0.0073, "step": 12004 }, { "epoch": 11.2, "learning_rate": 4.8599813432835826e-05, "loss": 0.0067, "step": 12008 }, { "epoch": 11.21, "learning_rate": 4.859934701492538e-05, "loss": 0.0064, "step": 12012 }, { "epoch": 11.21, "learning_rate": 4.859888059701493e-05, "loss": 0.041, "step": 12016 }, { "epoch": 11.21, "learning_rate": 4.859841417910448e-05, "loss": 0.0005, "step": 12020 }, { "epoch": 11.22, "learning_rate": 4.859794776119403e-05, "loss": 0.004, "step": 12024 }, { "epoch": 11.22, "learning_rate": 4.8597481343283587e-05, "loss": 0.0102, "step": 12028 }, { "epoch": 11.22, "learning_rate": 4.8597014925373135e-05, "loss": 0.0062, "step": 12032 }, { "epoch": 11.23, "learning_rate": 4.859654850746269e-05, "loss": 0.0041, "step": 12036 }, { "epoch": 11.23, "learning_rate": 4.8596082089552244e-05, "loss": 0.0034, "step": 12040 }, { "epoch": 11.24, "learning_rate": 4.859561567164179e-05, "loss": 0.0085, "step": 12044 }, { "epoch": 11.24, "learning_rate": 4.859514925373135e-05, "loss": 0.0106, "step": 12048 }, { "epoch": 11.24, "learning_rate": 4.8594682835820896e-05, "loss": 0.0103, "step": 12052 }, { "epoch": 11.25, "learning_rate": 4.859421641791045e-05, "loss": 0.006, "step": 12056 }, { "epoch": 11.25, "learning_rate": 4.8593750000000005e-05, "loss": 0.0047, "step": 12060 }, { "epoch": 11.25, "learning_rate": 4.8593283582089554e-05, "loss": 0.0104, "step": 12064 }, { "epoch": 11.26, "learning_rate": 4.859281716417911e-05, "loss": 0.0043, "step": 12068 }, { "epoch": 11.26, "learning_rate": 4.8592350746268663e-05, "loss": 0.0063, "step": 12072 }, { "epoch": 11.26, "learning_rate": 4.859188432835821e-05, "loss": 0.0032, "step": 12076 }, { "epoch": 11.27, "learning_rate": 4.859141791044776e-05, "loss": 0.0088, "step": 12080 }, { "epoch": 11.27, "learning_rate": 4.8590951492537315e-05, "loss": 0.0046, "step": 12084 }, { "epoch": 11.28, "learning_rate": 4.859048507462687e-05, "loss": 0.0052, "step": 12088 }, { "epoch": 11.28, "learning_rate": 4.859001865671642e-05, "loss": 0.0082, "step": 12092 }, { "epoch": 11.28, "learning_rate": 4.858955223880597e-05, "loss": 0.002, "step": 12096 }, { "epoch": 11.29, "learning_rate": 4.858908582089553e-05, "loss": 0.0014, "step": 12100 }, { "epoch": 11.29, "learning_rate": 4.8588619402985076e-05, "loss": 0.0029, "step": 12104 }, { "epoch": 11.29, "learning_rate": 4.858815298507463e-05, "loss": 0.0057, "step": 12108 }, { "epoch": 11.3, "learning_rate": 4.858768656716418e-05, "loss": 0.0019, "step": 12112 }, { "epoch": 11.3, "learning_rate": 4.8587220149253734e-05, "loss": 0.011, "step": 12116 }, { "epoch": 11.31, "learning_rate": 4.858675373134329e-05, "loss": 0.0029, "step": 12120 }, { "epoch": 11.31, "learning_rate": 4.8586287313432837e-05, "loss": 0.0103, "step": 12124 }, { "epoch": 11.31, "learning_rate": 4.858582089552239e-05, "loss": 0.0034, "step": 12128 }, { "epoch": 11.32, "learning_rate": 4.8585354477611946e-05, "loss": 0.0027, "step": 12132 }, { "epoch": 11.32, "learning_rate": 4.8584888059701495e-05, "loss": 0.0065, "step": 12136 }, { "epoch": 11.32, "learning_rate": 4.858442164179104e-05, "loss": 0.0038, "step": 12140 }, { "epoch": 11.33, "learning_rate": 4.85839552238806e-05, "loss": 0.0018, "step": 12144 }, { "epoch": 11.33, "learning_rate": 4.858348880597015e-05, "loss": 0.003, "step": 12148 }, { "epoch": 11.34, "learning_rate": 4.85830223880597e-05, "loss": 0.0033, "step": 12152 }, { "epoch": 11.34, "learning_rate": 4.8582555970149256e-05, "loss": 0.0043, "step": 12156 }, { "epoch": 11.34, "learning_rate": 4.858208955223881e-05, "loss": 0.0072, "step": 12160 }, { "epoch": 11.35, "learning_rate": 4.858162313432836e-05, "loss": 0.0037, "step": 12164 }, { "epoch": 11.35, "learning_rate": 4.8581156716417913e-05, "loss": 0.0056, "step": 12168 }, { "epoch": 11.35, "learning_rate": 4.858069029850746e-05, "loss": 0.0006, "step": 12172 }, { "epoch": 11.36, "learning_rate": 4.858022388059702e-05, "loss": 0.0046, "step": 12176 }, { "epoch": 11.36, "learning_rate": 4.857975746268657e-05, "loss": 0.003, "step": 12180 }, { "epoch": 11.37, "learning_rate": 4.857929104477612e-05, "loss": 0.0136, "step": 12184 }, { "epoch": 11.37, "learning_rate": 4.8578824626865674e-05, "loss": 0.0166, "step": 12188 }, { "epoch": 11.37, "learning_rate": 4.857835820895523e-05, "loss": 0.0101, "step": 12192 }, { "epoch": 11.38, "learning_rate": 4.857789179104478e-05, "loss": 0.0007, "step": 12196 }, { "epoch": 11.38, "learning_rate": 4.857742537313433e-05, "loss": 0.0022, "step": 12200 }, { "epoch": 11.38, "learning_rate": 4.857695895522388e-05, "loss": 0.0046, "step": 12204 }, { "epoch": 11.39, "learning_rate": 4.8576492537313435e-05, "loss": 0.0103, "step": 12208 }, { "epoch": 11.39, "learning_rate": 4.857602611940299e-05, "loss": 0.0057, "step": 12212 }, { "epoch": 11.4, "learning_rate": 4.857555970149254e-05, "loss": 0.001, "step": 12216 }, { "epoch": 11.4, "learning_rate": 4.857509328358209e-05, "loss": 0.0023, "step": 12220 }, { "epoch": 11.4, "learning_rate": 4.857462686567165e-05, "loss": 0.0158, "step": 12224 }, { "epoch": 11.41, "learning_rate": 4.8574160447761196e-05, "loss": 0.0056, "step": 12228 }, { "epoch": 11.41, "learning_rate": 4.8573694029850745e-05, "loss": 0.0043, "step": 12232 }, { "epoch": 11.41, "learning_rate": 4.8573227611940306e-05, "loss": 0.007, "step": 12236 }, { "epoch": 11.42, "learning_rate": 4.8572761194029854e-05, "loss": 0.01, "step": 12240 }, { "epoch": 11.42, "learning_rate": 4.85722947761194e-05, "loss": 0.0065, "step": 12244 }, { "epoch": 11.43, "learning_rate": 4.857182835820896e-05, "loss": 0.0031, "step": 12248 }, { "epoch": 11.43, "learning_rate": 4.857136194029851e-05, "loss": 0.0028, "step": 12252 }, { "epoch": 11.43, "learning_rate": 4.857089552238806e-05, "loss": 0.0023, "step": 12256 }, { "epoch": 11.44, "learning_rate": 4.8570429104477615e-05, "loss": 0.0022, "step": 12260 }, { "epoch": 11.44, "learning_rate": 4.8569962686567163e-05, "loss": 0.0092, "step": 12264 }, { "epoch": 11.44, "learning_rate": 4.856949626865672e-05, "loss": 0.0047, "step": 12268 }, { "epoch": 11.45, "learning_rate": 4.856902985074627e-05, "loss": 0.0081, "step": 12272 }, { "epoch": 11.45, "learning_rate": 4.856856343283582e-05, "loss": 0.002, "step": 12276 }, { "epoch": 11.46, "learning_rate": 4.8568097014925376e-05, "loss": 0.0027, "step": 12280 }, { "epoch": 11.46, "learning_rate": 4.856763059701493e-05, "loss": 0.0015, "step": 12284 }, { "epoch": 11.46, "learning_rate": 4.856716417910448e-05, "loss": 0.003, "step": 12288 }, { "epoch": 11.47, "learning_rate": 4.856669776119403e-05, "loss": 0.0054, "step": 12292 }, { "epoch": 11.47, "learning_rate": 4.856623134328359e-05, "loss": 0.0237, "step": 12296 }, { "epoch": 11.47, "learning_rate": 4.856576492537314e-05, "loss": 0.006, "step": 12300 }, { "epoch": 11.48, "learning_rate": 4.8565298507462685e-05, "loss": 0.0008, "step": 12304 }, { "epoch": 11.48, "learning_rate": 4.856483208955224e-05, "loss": 0.0012, "step": 12308 }, { "epoch": 11.49, "learning_rate": 4.8564365671641795e-05, "loss": 0.0025, "step": 12312 }, { "epoch": 11.49, "learning_rate": 4.8563899253731343e-05, "loss": 0.0073, "step": 12316 }, { "epoch": 11.49, "learning_rate": 4.85634328358209e-05, "loss": 0.0037, "step": 12320 }, { "epoch": 11.5, "learning_rate": 4.8562966417910446e-05, "loss": 0.0057, "step": 12324 }, { "epoch": 11.5, "learning_rate": 4.85625e-05, "loss": 0.006, "step": 12328 }, { "epoch": 11.5, "learning_rate": 4.8562033582089556e-05, "loss": 0.0032, "step": 12332 }, { "epoch": 11.51, "learning_rate": 4.8561567164179104e-05, "loss": 0.0137, "step": 12336 }, { "epoch": 11.51, "learning_rate": 4.856110074626866e-05, "loss": 0.0022, "step": 12340 }, { "epoch": 11.51, "learning_rate": 4.8560634328358214e-05, "loss": 0.0046, "step": 12344 }, { "epoch": 11.52, "learning_rate": 4.856016791044776e-05, "loss": 0.0016, "step": 12348 }, { "epoch": 11.52, "learning_rate": 4.855970149253732e-05, "loss": 0.0054, "step": 12352 }, { "epoch": 11.53, "learning_rate": 4.8559235074626865e-05, "loss": 0.015, "step": 12356 }, { "epoch": 11.53, "learning_rate": 4.855876865671642e-05, "loss": 0.0028, "step": 12360 }, { "epoch": 11.53, "learning_rate": 4.8558302238805975e-05, "loss": 0.004, "step": 12364 }, { "epoch": 11.54, "learning_rate": 4.855783582089552e-05, "loss": 0.0046, "step": 12368 }, { "epoch": 11.54, "learning_rate": 4.855736940298508e-05, "loss": 0.0031, "step": 12372 }, { "epoch": 11.54, "learning_rate": 4.855690298507463e-05, "loss": 0.0008, "step": 12376 }, { "epoch": 11.55, "learning_rate": 4.855643656716418e-05, "loss": 0.0011, "step": 12380 }, { "epoch": 11.55, "learning_rate": 4.855597014925373e-05, "loss": 0.0039, "step": 12384 }, { "epoch": 11.56, "learning_rate": 4.855550373134329e-05, "loss": 0.0028, "step": 12388 }, { "epoch": 11.56, "learning_rate": 4.855503731343284e-05, "loss": 0.0044, "step": 12392 }, { "epoch": 11.56, "learning_rate": 4.855457089552239e-05, "loss": 0.0022, "step": 12396 }, { "epoch": 11.57, "learning_rate": 4.855410447761194e-05, "loss": 0.013, "step": 12400 }, { "epoch": 11.57, "learning_rate": 4.85536380597015e-05, "loss": 0.0035, "step": 12404 }, { "epoch": 11.57, "learning_rate": 4.8553171641791045e-05, "loss": 0.0038, "step": 12408 }, { "epoch": 11.58, "learning_rate": 4.85527052238806e-05, "loss": 0.0004, "step": 12412 }, { "epoch": 11.58, "learning_rate": 4.855223880597015e-05, "loss": 0.0019, "step": 12416 }, { "epoch": 11.59, "learning_rate": 4.85517723880597e-05, "loss": 0.0123, "step": 12420 }, { "epoch": 11.59, "learning_rate": 4.855130597014926e-05, "loss": 0.0087, "step": 12424 }, { "epoch": 11.59, "learning_rate": 4.8550839552238806e-05, "loss": 0.0008, "step": 12428 }, { "epoch": 11.6, "learning_rate": 4.855037313432836e-05, "loss": 0.0046, "step": 12432 }, { "epoch": 11.6, "learning_rate": 4.8549906716417916e-05, "loss": 0.0057, "step": 12436 }, { "epoch": 11.6, "learning_rate": 4.8549440298507464e-05, "loss": 0.0011, "step": 12440 }, { "epoch": 11.61, "learning_rate": 4.854897388059701e-05, "loss": 0.0024, "step": 12444 }, { "epoch": 11.61, "learning_rate": 4.8548507462686574e-05, "loss": 0.0038, "step": 12448 }, { "epoch": 11.62, "learning_rate": 4.854804104477612e-05, "loss": 0.0114, "step": 12452 }, { "epoch": 11.62, "learning_rate": 4.854757462686567e-05, "loss": 0.007, "step": 12456 }, { "epoch": 11.62, "learning_rate": 4.8547108208955225e-05, "loss": 0.0028, "step": 12460 }, { "epoch": 11.63, "learning_rate": 4.854664179104478e-05, "loss": 0.0039, "step": 12464 }, { "epoch": 11.63, "learning_rate": 4.854617537313433e-05, "loss": 0.0057, "step": 12468 }, { "epoch": 11.63, "learning_rate": 4.854570895522388e-05, "loss": 0.0133, "step": 12472 }, { "epoch": 11.64, "learning_rate": 4.854524253731343e-05, "loss": 0.0005, "step": 12476 }, { "epoch": 11.64, "learning_rate": 4.8544776119402986e-05, "loss": 0.0009, "step": 12480 }, { "epoch": 11.65, "learning_rate": 4.854430970149254e-05, "loss": 0.0055, "step": 12484 }, { "epoch": 11.65, "learning_rate": 4.854384328358209e-05, "loss": 0.0012, "step": 12488 }, { "epoch": 11.65, "learning_rate": 4.8543376865671644e-05, "loss": 0.0037, "step": 12492 }, { "epoch": 11.66, "learning_rate": 4.85429104477612e-05, "loss": 0.0054, "step": 12496 }, { "epoch": 11.66, "learning_rate": 4.854244402985075e-05, "loss": 0.0035, "step": 12500 }, { "epoch": 11.66, "eval_exact_match": 0.7340425531914894, "eval_exec": 0.7611218568665378, "eval_loss": 0.2715718448162079, "eval_runtime": 1831.9341, "eval_samples_per_second": 0.564, "step": 12500 }, { "epoch": 11.66, "learning_rate": 4.85419776119403e-05, "loss": 0.004, "step": 12504 }, { "epoch": 11.67, "learning_rate": 4.854151119402986e-05, "loss": 0.0061, "step": 12508 }, { "epoch": 11.67, "learning_rate": 4.8541044776119405e-05, "loss": 0.0124, "step": 12512 }, { "epoch": 11.68, "learning_rate": 4.854057835820896e-05, "loss": 0.004, "step": 12516 }, { "epoch": 11.68, "learning_rate": 4.854011194029851e-05, "loss": 0.0018, "step": 12520 }, { "epoch": 11.68, "learning_rate": 4.853964552238806e-05, "loss": 0.0019, "step": 12524 }, { "epoch": 11.69, "learning_rate": 4.853917910447762e-05, "loss": 0.0036, "step": 12528 }, { "epoch": 11.69, "learning_rate": 4.8538712686567166e-05, "loss": 0.0024, "step": 12532 }, { "epoch": 11.69, "learning_rate": 4.8538246268656714e-05, "loss": 0.0188, "step": 12536 }, { "epoch": 11.7, "learning_rate": 4.8537779850746276e-05, "loss": 0.0057, "step": 12540 }, { "epoch": 11.7, "learning_rate": 4.8537313432835824e-05, "loss": 0.0071, "step": 12544 }, { "epoch": 11.71, "learning_rate": 4.853684701492537e-05, "loss": 0.004, "step": 12548 }, { "epoch": 11.71, "learning_rate": 4.853638059701493e-05, "loss": 0.0049, "step": 12552 }, { "epoch": 11.71, "learning_rate": 4.853591417910448e-05, "loss": 0.0012, "step": 12556 }, { "epoch": 11.72, "learning_rate": 4.853544776119403e-05, "loss": 0.0031, "step": 12560 }, { "epoch": 11.72, "learning_rate": 4.8534981343283585e-05, "loss": 0.0016, "step": 12564 }, { "epoch": 11.72, "learning_rate": 4.853451492537314e-05, "loss": 0.0092, "step": 12568 }, { "epoch": 11.73, "learning_rate": 4.853404850746269e-05, "loss": 0.0102, "step": 12572 }, { "epoch": 11.73, "learning_rate": 4.853358208955224e-05, "loss": 0.0059, "step": 12576 }, { "epoch": 11.73, "learning_rate": 4.853311567164179e-05, "loss": 0.0085, "step": 12580 }, { "epoch": 11.74, "learning_rate": 4.8532649253731346e-05, "loss": 0.0012, "step": 12584 }, { "epoch": 11.74, "learning_rate": 4.85321828358209e-05, "loss": 0.0033, "step": 12588 }, { "epoch": 11.75, "learning_rate": 4.853171641791045e-05, "loss": 0.0124, "step": 12592 }, { "epoch": 11.75, "learning_rate": 4.853125e-05, "loss": 0.0054, "step": 12596 }, { "epoch": 11.75, "learning_rate": 4.853078358208956e-05, "loss": 0.0055, "step": 12600 }, { "epoch": 11.76, "learning_rate": 4.853031716417911e-05, "loss": 0.0073, "step": 12604 }, { "epoch": 11.76, "learning_rate": 4.8529850746268655e-05, "loss": 0.0036, "step": 12608 }, { "epoch": 11.76, "learning_rate": 4.852938432835821e-05, "loss": 0.01, "step": 12612 }, { "epoch": 11.77, "learning_rate": 4.8528917910447765e-05, "loss": 0.0048, "step": 12616 }, { "epoch": 11.77, "learning_rate": 4.852845149253731e-05, "loss": 0.0093, "step": 12620 }, { "epoch": 11.78, "learning_rate": 4.852798507462687e-05, "loss": 0.0096, "step": 12624 }, { "epoch": 11.78, "learning_rate": 4.852751865671642e-05, "loss": 0.0075, "step": 12628 }, { "epoch": 11.78, "learning_rate": 4.852705223880597e-05, "loss": 0.0062, "step": 12632 }, { "epoch": 11.79, "learning_rate": 4.8526585820895526e-05, "loss": 0.0034, "step": 12636 }, { "epoch": 11.79, "learning_rate": 4.8526119402985074e-05, "loss": 0.0076, "step": 12640 }, { "epoch": 11.79, "learning_rate": 4.852565298507463e-05, "loss": 0.0033, "step": 12644 }, { "epoch": 11.8, "learning_rate": 4.8525186567164184e-05, "loss": 0.0045, "step": 12648 }, { "epoch": 11.8, "learning_rate": 4.852472014925373e-05, "loss": 0.0009, "step": 12652 }, { "epoch": 11.81, "learning_rate": 4.852425373134328e-05, "loss": 0.0036, "step": 12656 }, { "epoch": 11.81, "learning_rate": 4.852378731343284e-05, "loss": 0.0031, "step": 12660 }, { "epoch": 11.81, "learning_rate": 4.852332089552239e-05, "loss": 0.0046, "step": 12664 }, { "epoch": 11.82, "learning_rate": 4.8522854477611945e-05, "loss": 0.0096, "step": 12668 }, { "epoch": 11.82, "learning_rate": 4.852238805970149e-05, "loss": 0.0056, "step": 12672 }, { "epoch": 11.82, "learning_rate": 4.852192164179105e-05, "loss": 0.001, "step": 12676 }, { "epoch": 11.83, "learning_rate": 4.85214552238806e-05, "loss": 0.0037, "step": 12680 }, { "epoch": 11.83, "learning_rate": 4.852098880597015e-05, "loss": 0.0024, "step": 12684 }, { "epoch": 11.84, "learning_rate": 4.8520522388059706e-05, "loss": 0.0048, "step": 12688 }, { "epoch": 11.84, "learning_rate": 4.852005597014926e-05, "loss": 0.0019, "step": 12692 }, { "epoch": 11.84, "learning_rate": 4.851958955223881e-05, "loss": 0.0137, "step": 12696 }, { "epoch": 11.85, "learning_rate": 4.851912313432836e-05, "loss": 0.0009, "step": 12700 }, { "epoch": 11.85, "learning_rate": 4.851865671641791e-05, "loss": 0.0021, "step": 12704 }, { "epoch": 11.85, "learning_rate": 4.851819029850747e-05, "loss": 0.0109, "step": 12708 }, { "epoch": 11.86, "learning_rate": 4.8517723880597015e-05, "loss": 0.0013, "step": 12712 }, { "epoch": 11.86, "learning_rate": 4.851725746268657e-05, "loss": 0.0026, "step": 12716 }, { "epoch": 11.87, "learning_rate": 4.8516791044776125e-05, "loss": 0.0037, "step": 12720 }, { "epoch": 11.87, "learning_rate": 4.851632462686567e-05, "loss": 0.007, "step": 12724 }, { "epoch": 11.87, "learning_rate": 4.851585820895523e-05, "loss": 0.0014, "step": 12728 }, { "epoch": 11.88, "learning_rate": 4.8515391791044776e-05, "loss": 0.0035, "step": 12732 }, { "epoch": 11.88, "learning_rate": 4.851492537313433e-05, "loss": 0.0008, "step": 12736 }, { "epoch": 11.88, "learning_rate": 4.8514458955223886e-05, "loss": 0.0022, "step": 12740 }, { "epoch": 11.89, "learning_rate": 4.8513992537313434e-05, "loss": 0.0071, "step": 12744 }, { "epoch": 11.89, "learning_rate": 4.851352611940299e-05, "loss": 0.0099, "step": 12748 }, { "epoch": 11.9, "learning_rate": 4.8513059701492544e-05, "loss": 0.0049, "step": 12752 }, { "epoch": 11.9, "learning_rate": 4.851259328358209e-05, "loss": 0.0013, "step": 12756 }, { "epoch": 11.9, "learning_rate": 4.851212686567164e-05, "loss": 0.0058, "step": 12760 }, { "epoch": 11.91, "learning_rate": 4.8511660447761195e-05, "loss": 0.0041, "step": 12764 }, { "epoch": 11.91, "learning_rate": 4.851119402985075e-05, "loss": 0.0051, "step": 12768 }, { "epoch": 11.91, "learning_rate": 4.85107276119403e-05, "loss": 0.0015, "step": 12772 }, { "epoch": 11.92, "learning_rate": 4.851026119402985e-05, "loss": 0.0047, "step": 12776 }, { "epoch": 11.92, "learning_rate": 4.850979477611941e-05, "loss": 0.0118, "step": 12780 }, { "epoch": 11.93, "learning_rate": 4.8509328358208956e-05, "loss": 0.0017, "step": 12784 }, { "epoch": 11.93, "learning_rate": 4.850886194029851e-05, "loss": 0.0031, "step": 12788 }, { "epoch": 11.93, "learning_rate": 4.850839552238806e-05, "loss": 0.001, "step": 12792 }, { "epoch": 11.94, "learning_rate": 4.8507929104477614e-05, "loss": 0.0053, "step": 12796 }, { "epoch": 11.94, "learning_rate": 4.850746268656717e-05, "loss": 0.0016, "step": 12800 }, { "epoch": 11.94, "learning_rate": 4.850699626865672e-05, "loss": 0.0067, "step": 12804 }, { "epoch": 11.95, "learning_rate": 4.850652985074627e-05, "loss": 0.0027, "step": 12808 }, { "epoch": 11.95, "learning_rate": 4.850606343283583e-05, "loss": 0.0021, "step": 12812 }, { "epoch": 11.96, "learning_rate": 4.8505597014925375e-05, "loss": 0.0151, "step": 12816 }, { "epoch": 11.96, "learning_rate": 4.850513059701492e-05, "loss": 0.0073, "step": 12820 }, { "epoch": 11.96, "learning_rate": 4.850466417910448e-05, "loss": 0.0051, "step": 12824 }, { "epoch": 11.97, "learning_rate": 4.850419776119403e-05, "loss": 0.0044, "step": 12828 }, { "epoch": 11.97, "learning_rate": 4.850373134328359e-05, "loss": 0.0032, "step": 12832 }, { "epoch": 11.97, "learning_rate": 4.8503264925373136e-05, "loss": 0.0065, "step": 12836 }, { "epoch": 11.98, "learning_rate": 4.850279850746269e-05, "loss": 0.0048, "step": 12840 }, { "epoch": 11.98, "learning_rate": 4.8502332089552246e-05, "loss": 0.0049, "step": 12844 }, { "epoch": 11.98, "learning_rate": 4.8501865671641794e-05, "loss": 0.0006, "step": 12848 }, { "epoch": 11.99, "learning_rate": 4.850139925373134e-05, "loss": 0.0011, "step": 12852 }, { "epoch": 11.99, "learning_rate": 4.8500932835820904e-05, "loss": 0.0028, "step": 12856 }, { "epoch": 12.0, "learning_rate": 4.850046641791045e-05, "loss": 0.0016, "step": 12860 }, { "epoch": 12.0, "learning_rate": 4.85e-05, "loss": 0.004, "step": 12864 }, { "epoch": 12.0, "learning_rate": 4.8499533582089555e-05, "loss": 0.0025, "step": 12868 }, { "epoch": 12.01, "learning_rate": 4.849906716417911e-05, "loss": 0.0103, "step": 12872 }, { "epoch": 12.01, "learning_rate": 4.849860074626866e-05, "loss": 0.0008, "step": 12876 }, { "epoch": 12.01, "learning_rate": 4.849813432835821e-05, "loss": 0.0046, "step": 12880 }, { "epoch": 12.02, "learning_rate": 4.849766791044776e-05, "loss": 0.0017, "step": 12884 }, { "epoch": 12.02, "learning_rate": 4.8497201492537316e-05, "loss": 0.002, "step": 12888 }, { "epoch": 12.03, "learning_rate": 4.849673507462687e-05, "loss": 0.0037, "step": 12892 }, { "epoch": 12.03, "learning_rate": 4.849626865671642e-05, "loss": 0.0072, "step": 12896 }, { "epoch": 12.03, "learning_rate": 4.8495802238805974e-05, "loss": 0.0018, "step": 12900 }, { "epoch": 12.04, "learning_rate": 4.849533582089553e-05, "loss": 0.0022, "step": 12904 }, { "epoch": 12.04, "learning_rate": 4.849486940298508e-05, "loss": 0.0014, "step": 12908 }, { "epoch": 12.04, "learning_rate": 4.8494402985074625e-05, "loss": 0.0023, "step": 12912 }, { "epoch": 12.05, "learning_rate": 4.8493936567164187e-05, "loss": 0.0034, "step": 12916 }, { "epoch": 12.05, "learning_rate": 4.8493470149253735e-05, "loss": 0.0044, "step": 12920 }, { "epoch": 12.06, "learning_rate": 4.849300373134328e-05, "loss": 0.0052, "step": 12924 }, { "epoch": 12.06, "learning_rate": 4.849253731343284e-05, "loss": 0.0061, "step": 12928 }, { "epoch": 12.06, "learning_rate": 4.849207089552239e-05, "loss": 0.0022, "step": 12932 }, { "epoch": 12.07, "learning_rate": 4.849160447761194e-05, "loss": 0.0068, "step": 12936 }, { "epoch": 12.07, "learning_rate": 4.8491138059701496e-05, "loss": 0.005, "step": 12940 }, { "epoch": 12.07, "learning_rate": 4.8490671641791044e-05, "loss": 0.015, "step": 12944 }, { "epoch": 12.08, "learning_rate": 4.84902052238806e-05, "loss": 0.0009, "step": 12948 }, { "epoch": 12.08, "learning_rate": 4.8489738805970154e-05, "loss": 0.002, "step": 12952 }, { "epoch": 12.09, "learning_rate": 4.84892723880597e-05, "loss": 0.0019, "step": 12956 }, { "epoch": 12.09, "learning_rate": 4.848880597014926e-05, "loss": 0.0029, "step": 12960 }, { "epoch": 12.09, "learning_rate": 4.848833955223881e-05, "loss": 0.0012, "step": 12964 }, { "epoch": 12.1, "learning_rate": 4.848787313432836e-05, "loss": 0.0198, "step": 12968 }, { "epoch": 12.1, "learning_rate": 4.848740671641791e-05, "loss": 0.0013, "step": 12972 }, { "epoch": 12.1, "learning_rate": 4.848694029850747e-05, "loss": 0.0032, "step": 12976 }, { "epoch": 12.11, "learning_rate": 4.848647388059702e-05, "loss": 0.0141, "step": 12980 }, { "epoch": 12.11, "learning_rate": 4.8486007462686566e-05, "loss": 0.0038, "step": 12984 }, { "epoch": 12.12, "learning_rate": 4.848554104477612e-05, "loss": 0.0037, "step": 12988 }, { "epoch": 12.12, "learning_rate": 4.8485074626865676e-05, "loss": 0.0028, "step": 12992 }, { "epoch": 12.12, "learning_rate": 4.848460820895523e-05, "loss": 0.0139, "step": 12996 }, { "epoch": 12.13, "learning_rate": 4.848414179104478e-05, "loss": 0.0063, "step": 13000 }, { "epoch": 12.13, "eval_exact_match": 0.730174081237911, "eval_exec": 0.7572533849129593, "eval_loss": 0.28556960821151733, "eval_runtime": 1682.081, "eval_samples_per_second": 0.615, "step": 13000 }, { "epoch": 12.13, "learning_rate": 4.848367537313433e-05, "loss": 0.0037, "step": 13004 }, { "epoch": 12.13, "learning_rate": 4.848320895522389e-05, "loss": 0.0027, "step": 13008 }, { "epoch": 12.14, "learning_rate": 4.8482742537313437e-05, "loss": 0.004, "step": 13012 }, { "epoch": 12.14, "learning_rate": 4.8482276119402985e-05, "loss": 0.0039, "step": 13016 }, { "epoch": 12.15, "learning_rate": 4.848180970149254e-05, "loss": 0.0056, "step": 13020 }, { "epoch": 12.15, "learning_rate": 4.8481343283582094e-05, "loss": 0.0067, "step": 13024 }, { "epoch": 12.15, "learning_rate": 4.848087686567164e-05, "loss": 0.0021, "step": 13028 }, { "epoch": 12.16, "learning_rate": 4.84804104477612e-05, "loss": 0.0053, "step": 13032 }, { "epoch": 12.16, "learning_rate": 4.8479944029850746e-05, "loss": 0.0016, "step": 13036 }, { "epoch": 12.16, "learning_rate": 4.84794776119403e-05, "loss": 0.0004, "step": 13040 }, { "epoch": 12.17, "learning_rate": 4.8479011194029855e-05, "loss": 0.0014, "step": 13044 }, { "epoch": 12.17, "learning_rate": 4.8478544776119404e-05, "loss": 0.0163, "step": 13048 }, { "epoch": 12.18, "learning_rate": 4.847807835820896e-05, "loss": 0.0108, "step": 13052 }, { "epoch": 12.18, "learning_rate": 4.8477611940298513e-05, "loss": 0.006, "step": 13056 }, { "epoch": 12.18, "learning_rate": 4.847714552238806e-05, "loss": 0.0039, "step": 13060 }, { "epoch": 12.19, "learning_rate": 4.847667910447761e-05, "loss": 0.002, "step": 13064 }, { "epoch": 12.19, "learning_rate": 4.847621268656717e-05, "loss": 0.0018, "step": 13068 }, { "epoch": 12.19, "learning_rate": 4.847574626865672e-05, "loss": 0.0013, "step": 13072 }, { "epoch": 12.2, "learning_rate": 4.847527985074627e-05, "loss": 0.0022, "step": 13076 }, { "epoch": 12.2, "learning_rate": 4.847481343283582e-05, "loss": 0.0071, "step": 13080 }, { "epoch": 12.21, "learning_rate": 4.847434701492538e-05, "loss": 0.0003, "step": 13084 }, { "epoch": 12.21, "learning_rate": 4.8473880597014926e-05, "loss": 0.0025, "step": 13088 }, { "epoch": 12.21, "learning_rate": 4.847341417910448e-05, "loss": 0.0033, "step": 13092 }, { "epoch": 12.22, "learning_rate": 4.847294776119403e-05, "loss": 0.001, "step": 13096 }, { "epoch": 12.22, "learning_rate": 4.8472481343283584e-05, "loss": 0.0064, "step": 13100 }, { "epoch": 12.22, "learning_rate": 4.847201492537314e-05, "loss": 0.0025, "step": 13104 }, { "epoch": 12.23, "learning_rate": 4.8471548507462687e-05, "loss": 0.0011, "step": 13108 }, { "epoch": 12.23, "learning_rate": 4.847108208955224e-05, "loss": 0.0005, "step": 13112 }, { "epoch": 12.24, "learning_rate": 4.8470615671641796e-05, "loss": 0.0029, "step": 13116 }, { "epoch": 12.24, "learning_rate": 4.8470149253731344e-05, "loss": 0.0021, "step": 13120 }, { "epoch": 12.24, "learning_rate": 4.846968283582089e-05, "loss": 0.0043, "step": 13124 }, { "epoch": 12.25, "learning_rate": 4.8469216417910454e-05, "loss": 0.0226, "step": 13128 }, { "epoch": 12.25, "learning_rate": 4.846875e-05, "loss": 0.0006, "step": 13132 }, { "epoch": 12.25, "learning_rate": 4.846828358208955e-05, "loss": 0.0005, "step": 13136 }, { "epoch": 12.26, "learning_rate": 4.8467817164179105e-05, "loss": 0.0057, "step": 13140 }, { "epoch": 12.26, "learning_rate": 4.846735074626866e-05, "loss": 0.006, "step": 13144 }, { "epoch": 12.26, "learning_rate": 4.846688432835821e-05, "loss": 0.0037, "step": 13148 }, { "epoch": 12.27, "learning_rate": 4.8466417910447763e-05, "loss": 0.0027, "step": 13152 }, { "epoch": 12.27, "learning_rate": 4.846595149253731e-05, "loss": 0.0009, "step": 13156 }, { "epoch": 12.28, "learning_rate": 4.846548507462687e-05, "loss": 0.0017, "step": 13160 }, { "epoch": 12.28, "learning_rate": 4.846501865671642e-05, "loss": 0.004, "step": 13164 }, { "epoch": 12.28, "learning_rate": 4.846455223880597e-05, "loss": 0.0093, "step": 13168 }, { "epoch": 12.29, "learning_rate": 4.8464085820895524e-05, "loss": 0.0036, "step": 13172 }, { "epoch": 12.29, "learning_rate": 4.846361940298508e-05, "loss": 0.0018, "step": 13176 }, { "epoch": 12.29, "learning_rate": 4.846315298507463e-05, "loss": 0.0036, "step": 13180 }, { "epoch": 12.3, "learning_rate": 4.846268656716418e-05, "loss": 0.0029, "step": 13184 }, { "epoch": 12.3, "learning_rate": 4.846222014925374e-05, "loss": 0.0133, "step": 13188 }, { "epoch": 12.31, "learning_rate": 4.8461753731343285e-05, "loss": 0.0097, "step": 13192 }, { "epoch": 12.31, "learning_rate": 4.846128731343284e-05, "loss": 0.0028, "step": 13196 }, { "epoch": 12.31, "learning_rate": 4.846082089552239e-05, "loss": 0.0009, "step": 13200 }, { "epoch": 12.32, "learning_rate": 4.846035447761194e-05, "loss": 0.0009, "step": 13204 }, { "epoch": 12.32, "learning_rate": 4.84598880597015e-05, "loss": 0.0101, "step": 13208 }, { "epoch": 12.32, "learning_rate": 4.8459421641791046e-05, "loss": 0.0021, "step": 13212 }, { "epoch": 12.33, "learning_rate": 4.8458955223880595e-05, "loss": 0.0066, "step": 13216 }, { "epoch": 12.33, "learning_rate": 4.8458488805970156e-05, "loss": 0.0027, "step": 13220 }, { "epoch": 12.34, "learning_rate": 4.8458022388059704e-05, "loss": 0.0007, "step": 13224 }, { "epoch": 12.34, "learning_rate": 4.845755597014925e-05, "loss": 0.0025, "step": 13228 }, { "epoch": 12.34, "learning_rate": 4.845708955223881e-05, "loss": 0.0007, "step": 13232 }, { "epoch": 12.35, "learning_rate": 4.845662313432836e-05, "loss": 0.003, "step": 13236 }, { "epoch": 12.35, "learning_rate": 4.845615671641791e-05, "loss": 0.0023, "step": 13240 }, { "epoch": 12.35, "learning_rate": 4.8455690298507465e-05, "loss": 0.0033, "step": 13244 }, { "epoch": 12.36, "learning_rate": 4.845522388059702e-05, "loss": 0.0045, "step": 13248 }, { "epoch": 12.36, "learning_rate": 4.845475746268657e-05, "loss": 0.0027, "step": 13252 }, { "epoch": 12.37, "learning_rate": 4.845429104477612e-05, "loss": 0.0039, "step": 13256 }, { "epoch": 12.37, "learning_rate": 4.845382462686567e-05, "loss": 0.0004, "step": 13260 }, { "epoch": 12.37, "learning_rate": 4.8453358208955226e-05, "loss": 0.0049, "step": 13264 }, { "epoch": 12.38, "learning_rate": 4.845289179104478e-05, "loss": 0.0062, "step": 13268 }, { "epoch": 12.38, "learning_rate": 4.845242537313433e-05, "loss": 0.0009, "step": 13272 }, { "epoch": 12.38, "learning_rate": 4.845195895522388e-05, "loss": 0.005, "step": 13276 }, { "epoch": 12.39, "learning_rate": 4.845149253731344e-05, "loss": 0.0025, "step": 13280 }, { "epoch": 12.39, "learning_rate": 4.845102611940299e-05, "loss": 0.0037, "step": 13284 }, { "epoch": 12.4, "learning_rate": 4.8450559701492535e-05, "loss": 0.0039, "step": 13288 }, { "epoch": 12.4, "learning_rate": 4.845009328358209e-05, "loss": 0.002, "step": 13292 }, { "epoch": 12.4, "learning_rate": 4.8449626865671645e-05, "loss": 0.0055, "step": 13296 }, { "epoch": 12.41, "learning_rate": 4.844916044776119e-05, "loss": 0.001, "step": 13300 }, { "epoch": 12.41, "learning_rate": 4.844869402985075e-05, "loss": 0.0008, "step": 13304 }, { "epoch": 12.41, "learning_rate": 4.84482276119403e-05, "loss": 0.0092, "step": 13308 }, { "epoch": 12.42, "learning_rate": 4.844776119402985e-05, "loss": 0.0028, "step": 13312 }, { "epoch": 12.42, "learning_rate": 4.8447294776119406e-05, "loss": 0.0009, "step": 13316 }, { "epoch": 12.43, "learning_rate": 4.8446828358208954e-05, "loss": 0.0088, "step": 13320 }, { "epoch": 12.43, "learning_rate": 4.844636194029851e-05, "loss": 0.0014, "step": 13324 }, { "epoch": 12.43, "learning_rate": 4.8445895522388064e-05, "loss": 0.0012, "step": 13328 }, { "epoch": 12.44, "learning_rate": 4.844542910447761e-05, "loss": 0.002, "step": 13332 }, { "epoch": 12.44, "learning_rate": 4.844496268656717e-05, "loss": 0.0031, "step": 13336 }, { "epoch": 12.44, "learning_rate": 4.844449626865672e-05, "loss": 0.0013, "step": 13340 }, { "epoch": 12.45, "learning_rate": 4.844402985074627e-05, "loss": 0.0059, "step": 13344 }, { "epoch": 12.45, "learning_rate": 4.8443563432835825e-05, "loss": 0.0025, "step": 13348 }, { "epoch": 12.46, "learning_rate": 4.844309701492537e-05, "loss": 0.0002, "step": 13352 }, { "epoch": 12.46, "learning_rate": 4.844263059701493e-05, "loss": 0.002, "step": 13356 }, { "epoch": 12.46, "learning_rate": 4.844216417910448e-05, "loss": 0.0007, "step": 13360 }, { "epoch": 12.47, "learning_rate": 4.844169776119403e-05, "loss": 0.009, "step": 13364 }, { "epoch": 12.47, "learning_rate": 4.8441231343283586e-05, "loss": 0.0065, "step": 13368 }, { "epoch": 12.47, "learning_rate": 4.844076492537314e-05, "loss": 0.0114, "step": 13372 }, { "epoch": 12.48, "learning_rate": 4.844029850746269e-05, "loss": 0.0021, "step": 13376 }, { "epoch": 12.48, "learning_rate": 4.843983208955224e-05, "loss": 0.0036, "step": 13380 }, { "epoch": 12.49, "learning_rate": 4.843936567164179e-05, "loss": 0.0027, "step": 13384 }, { "epoch": 12.49, "learning_rate": 4.843889925373135e-05, "loss": 0.0042, "step": 13388 }, { "epoch": 12.49, "learning_rate": 4.8438432835820895e-05, "loss": 0.0013, "step": 13392 }, { "epoch": 12.5, "learning_rate": 4.843796641791045e-05, "loss": 0.0008, "step": 13396 }, { "epoch": 12.5, "learning_rate": 4.8437500000000005e-05, "loss": 0.0047, "step": 13400 }, { "epoch": 12.5, "learning_rate": 4.843703358208955e-05, "loss": 0.0012, "step": 13404 }, { "epoch": 12.51, "learning_rate": 4.843656716417911e-05, "loss": 0.0024, "step": 13408 }, { "epoch": 12.51, "learning_rate": 4.8436100746268656e-05, "loss": 0.0128, "step": 13412 }, { "epoch": 12.51, "learning_rate": 4.843563432835821e-05, "loss": 0.0007, "step": 13416 }, { "epoch": 12.52, "learning_rate": 4.8435167910447766e-05, "loss": 0.0055, "step": 13420 }, { "epoch": 12.52, "learning_rate": 4.8434701492537314e-05, "loss": 0.0122, "step": 13424 }, { "epoch": 12.53, "learning_rate": 4.843423507462687e-05, "loss": 0.0029, "step": 13428 }, { "epoch": 12.53, "learning_rate": 4.8433768656716424e-05, "loss": 0.0019, "step": 13432 }, { "epoch": 12.53, "learning_rate": 4.843330223880597e-05, "loss": 0.0009, "step": 13436 }, { "epoch": 12.54, "learning_rate": 4.843283582089552e-05, "loss": 0.0006, "step": 13440 }, { "epoch": 12.54, "learning_rate": 4.8432369402985075e-05, "loss": 0.004, "step": 13444 }, { "epoch": 12.54, "learning_rate": 4.843190298507463e-05, "loss": 0.0022, "step": 13448 }, { "epoch": 12.55, "learning_rate": 4.843143656716418e-05, "loss": 0.005, "step": 13452 }, { "epoch": 12.55, "learning_rate": 4.843097014925373e-05, "loss": 0.0061, "step": 13456 }, { "epoch": 12.56, "learning_rate": 4.843050373134329e-05, "loss": 0.0062, "step": 13460 }, { "epoch": 12.56, "learning_rate": 4.8430037313432836e-05, "loss": 0.0023, "step": 13464 }, { "epoch": 12.56, "learning_rate": 4.842957089552239e-05, "loss": 0.0005, "step": 13468 }, { "epoch": 12.57, "learning_rate": 4.842910447761194e-05, "loss": 0.008, "step": 13472 }, { "epoch": 12.57, "learning_rate": 4.8428638059701494e-05, "loss": 0.0039, "step": 13476 }, { "epoch": 12.57, "learning_rate": 4.842817164179105e-05, "loss": 0.0017, "step": 13480 }, { "epoch": 12.58, "learning_rate": 4.84277052238806e-05, "loss": 0.0037, "step": 13484 }, { "epoch": 12.58, "learning_rate": 4.842723880597015e-05, "loss": 0.0091, "step": 13488 }, { "epoch": 12.59, "learning_rate": 4.842677238805971e-05, "loss": 0.0006, "step": 13492 }, { "epoch": 12.59, "learning_rate": 4.8426305970149255e-05, "loss": 0.0026, "step": 13496 }, { "epoch": 12.59, "learning_rate": 4.842583955223881e-05, "loss": 0.0058, "step": 13500 }, { "epoch": 12.59, "eval_exact_match": 0.7156673114119922, "eval_exec": 0.7562862669245648, "eval_loss": 0.295368492603302, "eval_runtime": 1541.5565, "eval_samples_per_second": 0.671, "step": 13500 }, { "epoch": 12.6, "learning_rate": 4.842537313432836e-05, "loss": 0.0075, "step": 13504 }, { "epoch": 12.6, "learning_rate": 4.842490671641791e-05, "loss": 0.002, "step": 13508 }, { "epoch": 12.6, "learning_rate": 4.842444029850747e-05, "loss": 0.0156, "step": 13512 }, { "epoch": 12.61, "learning_rate": 4.8423973880597016e-05, "loss": 0.0082, "step": 13516 }, { "epoch": 12.61, "learning_rate": 4.842350746268657e-05, "loss": 0.0057, "step": 13520 }, { "epoch": 12.62, "learning_rate": 4.8423041044776126e-05, "loss": 0.0022, "step": 13524 }, { "epoch": 12.62, "learning_rate": 4.8422574626865674e-05, "loss": 0.0029, "step": 13528 }, { "epoch": 12.62, "learning_rate": 4.842210820895522e-05, "loss": 0.0014, "step": 13532 }, { "epoch": 12.63, "learning_rate": 4.8421641791044784e-05, "loss": 0.0013, "step": 13536 }, { "epoch": 12.63, "learning_rate": 4.842117537313433e-05, "loss": 0.002, "step": 13540 }, { "epoch": 12.63, "learning_rate": 4.842070895522388e-05, "loss": 0.0019, "step": 13544 }, { "epoch": 12.64, "learning_rate": 4.8420242537313435e-05, "loss": 0.0006, "step": 13548 }, { "epoch": 12.64, "learning_rate": 4.841977611940299e-05, "loss": 0.0011, "step": 13552 }, { "epoch": 12.65, "learning_rate": 4.841930970149254e-05, "loss": 0.0063, "step": 13556 }, { "epoch": 12.65, "learning_rate": 4.841884328358209e-05, "loss": 0.0033, "step": 13560 }, { "epoch": 12.65, "learning_rate": 4.841837686567164e-05, "loss": 0.0013, "step": 13564 }, { "epoch": 12.66, "learning_rate": 4.8417910447761196e-05, "loss": 0.0014, "step": 13568 }, { "epoch": 12.66, "learning_rate": 4.841744402985075e-05, "loss": 0.0036, "step": 13572 }, { "epoch": 12.66, "learning_rate": 4.84169776119403e-05, "loss": 0.0034, "step": 13576 }, { "epoch": 12.67, "learning_rate": 4.8416511194029854e-05, "loss": 0.0062, "step": 13580 }, { "epoch": 12.67, "learning_rate": 4.841604477611941e-05, "loss": 0.0032, "step": 13584 }, { "epoch": 12.68, "learning_rate": 4.841557835820896e-05, "loss": 0.005, "step": 13588 }, { "epoch": 12.68, "learning_rate": 4.8415111940298505e-05, "loss": 0.008, "step": 13592 }, { "epoch": 12.68, "learning_rate": 4.841464552238807e-05, "loss": 0.0038, "step": 13596 }, { "epoch": 12.69, "learning_rate": 4.8414179104477615e-05, "loss": 0.0033, "step": 13600 }, { "epoch": 12.69, "learning_rate": 4.841371268656716e-05, "loss": 0.0008, "step": 13604 }, { "epoch": 12.69, "learning_rate": 4.841324626865672e-05, "loss": 0.0041, "step": 13608 }, { "epoch": 12.7, "learning_rate": 4.841277985074627e-05, "loss": 0.006, "step": 13612 }, { "epoch": 12.7, "learning_rate": 4.841231343283582e-05, "loss": 0.0007, "step": 13616 }, { "epoch": 12.71, "learning_rate": 4.8411847014925376e-05, "loss": 0.0035, "step": 13620 }, { "epoch": 12.71, "learning_rate": 4.8411380597014924e-05, "loss": 0.0067, "step": 13624 }, { "epoch": 12.71, "learning_rate": 4.841091417910448e-05, "loss": 0.0056, "step": 13628 }, { "epoch": 12.72, "learning_rate": 4.8410447761194034e-05, "loss": 0.0396, "step": 13632 }, { "epoch": 12.72, "learning_rate": 4.840998134328358e-05, "loss": 0.0055, "step": 13636 }, { "epoch": 12.72, "learning_rate": 4.840951492537314e-05, "loss": 0.0048, "step": 13640 }, { "epoch": 12.73, "learning_rate": 4.840904850746269e-05, "loss": 0.0046, "step": 13644 }, { "epoch": 12.73, "learning_rate": 4.840858208955224e-05, "loss": 0.0037, "step": 13648 }, { "epoch": 12.73, "learning_rate": 4.8408115671641795e-05, "loss": 0.0061, "step": 13652 }, { "epoch": 12.74, "learning_rate": 4.840764925373135e-05, "loss": 0.0053, "step": 13656 }, { "epoch": 12.74, "learning_rate": 4.84071828358209e-05, "loss": 0.0048, "step": 13660 }, { "epoch": 12.75, "learning_rate": 4.840671641791045e-05, "loss": 0.0038, "step": 13664 }, { "epoch": 12.75, "learning_rate": 4.840625e-05, "loss": 0.0069, "step": 13668 }, { "epoch": 12.75, "learning_rate": 4.8405783582089556e-05, "loss": 0.0086, "step": 13672 }, { "epoch": 12.76, "learning_rate": 4.840531716417911e-05, "loss": 0.0017, "step": 13676 }, { "epoch": 12.76, "learning_rate": 4.840485074626866e-05, "loss": 0.0021, "step": 13680 }, { "epoch": 12.76, "learning_rate": 4.840438432835821e-05, "loss": 0.004, "step": 13684 }, { "epoch": 12.77, "learning_rate": 4.840391791044777e-05, "loss": 0.0007, "step": 13688 }, { "epoch": 12.77, "learning_rate": 4.840345149253732e-05, "loss": 0.0118, "step": 13692 }, { "epoch": 12.78, "learning_rate": 4.8402985074626865e-05, "loss": 0.0022, "step": 13696 }, { "epoch": 12.78, "learning_rate": 4.840251865671642e-05, "loss": 0.0033, "step": 13700 }, { "epoch": 12.78, "learning_rate": 4.8402052238805975e-05, "loss": 0.0048, "step": 13704 }, { "epoch": 12.79, "learning_rate": 4.840158582089552e-05, "loss": 0.004, "step": 13708 }, { "epoch": 12.79, "learning_rate": 4.840111940298508e-05, "loss": 0.0018, "step": 13712 }, { "epoch": 12.79, "learning_rate": 4.8400652985074626e-05, "loss": 0.0021, "step": 13716 }, { "epoch": 12.8, "learning_rate": 4.840018656716418e-05, "loss": 0.007, "step": 13720 }, { "epoch": 12.8, "learning_rate": 4.8399720149253736e-05, "loss": 0.0012, "step": 13724 }, { "epoch": 12.81, "learning_rate": 4.8399253731343284e-05, "loss": 0.0032, "step": 13728 }, { "epoch": 12.81, "learning_rate": 4.839878731343284e-05, "loss": 0.0031, "step": 13732 }, { "epoch": 12.81, "learning_rate": 4.8398320895522394e-05, "loss": 0.0064, "step": 13736 }, { "epoch": 12.82, "learning_rate": 4.839785447761194e-05, "loss": 0.0105, "step": 13740 }, { "epoch": 12.82, "learning_rate": 4.839738805970149e-05, "loss": 0.004, "step": 13744 }, { "epoch": 12.82, "learning_rate": 4.839692164179105e-05, "loss": 0.0012, "step": 13748 }, { "epoch": 12.83, "learning_rate": 4.83964552238806e-05, "loss": 0.0024, "step": 13752 }, { "epoch": 12.83, "learning_rate": 4.839598880597015e-05, "loss": 0.0046, "step": 13756 }, { "epoch": 12.84, "learning_rate": 4.83955223880597e-05, "loss": 0.003, "step": 13760 }, { "epoch": 12.84, "learning_rate": 4.839505597014926e-05, "loss": 0.0049, "step": 13764 }, { "epoch": 12.84, "learning_rate": 4.8394589552238806e-05, "loss": 0.0031, "step": 13768 }, { "epoch": 12.85, "learning_rate": 4.839412313432836e-05, "loss": 0.0045, "step": 13772 }, { "epoch": 12.85, "learning_rate": 4.839365671641791e-05, "loss": 0.0035, "step": 13776 }, { "epoch": 12.85, "learning_rate": 4.8393190298507464e-05, "loss": 0.0107, "step": 13780 }, { "epoch": 12.86, "learning_rate": 4.839272388059702e-05, "loss": 0.0014, "step": 13784 }, { "epoch": 12.86, "learning_rate": 4.839225746268657e-05, "loss": 0.0063, "step": 13788 }, { "epoch": 12.87, "learning_rate": 4.839179104477612e-05, "loss": 0.001, "step": 13792 }, { "epoch": 12.87, "learning_rate": 4.839132462686568e-05, "loss": 0.0022, "step": 13796 }, { "epoch": 12.87, "learning_rate": 4.8390858208955225e-05, "loss": 0.0021, "step": 13800 }, { "epoch": 12.88, "learning_rate": 4.839039179104477e-05, "loss": 0.0031, "step": 13804 }, { "epoch": 12.88, "learning_rate": 4.8389925373134335e-05, "loss": 0.0099, "step": 13808 }, { "epoch": 12.88, "learning_rate": 4.838945895522388e-05, "loss": 0.0008, "step": 13812 }, { "epoch": 12.89, "learning_rate": 4.838899253731344e-05, "loss": 0.009, "step": 13816 }, { "epoch": 12.89, "learning_rate": 4.8388526119402986e-05, "loss": 0.01, "step": 13820 }, { "epoch": 12.9, "learning_rate": 4.838805970149254e-05, "loss": 0.0051, "step": 13824 }, { "epoch": 12.9, "learning_rate": 4.8387593283582096e-05, "loss": 0.0097, "step": 13828 }, { "epoch": 12.9, "learning_rate": 4.8387126865671644e-05, "loss": 0.0004, "step": 13832 }, { "epoch": 12.91, "learning_rate": 4.838666044776119e-05, "loss": 0.001, "step": 13836 }, { "epoch": 12.91, "learning_rate": 4.8386194029850754e-05, "loss": 0.0049, "step": 13840 }, { "epoch": 12.91, "learning_rate": 4.83857276119403e-05, "loss": 0.0015, "step": 13844 }, { "epoch": 12.92, "learning_rate": 4.838526119402985e-05, "loss": 0.0044, "step": 13848 }, { "epoch": 12.92, "learning_rate": 4.8384794776119405e-05, "loss": 0.002, "step": 13852 }, { "epoch": 12.93, "learning_rate": 4.838432835820896e-05, "loss": 0.0018, "step": 13856 }, { "epoch": 12.93, "learning_rate": 4.838386194029851e-05, "loss": 0.0216, "step": 13860 }, { "epoch": 12.93, "learning_rate": 4.838339552238806e-05, "loss": 0.0043, "step": 13864 }, { "epoch": 12.94, "learning_rate": 4.838292910447762e-05, "loss": 0.0007, "step": 13868 }, { "epoch": 12.94, "learning_rate": 4.8382462686567166e-05, "loss": 0.0054, "step": 13872 }, { "epoch": 12.94, "learning_rate": 4.838199626865672e-05, "loss": 0.0009, "step": 13876 }, { "epoch": 12.95, "learning_rate": 4.838152985074627e-05, "loss": 0.0131, "step": 13880 }, { "epoch": 12.95, "learning_rate": 4.8381063432835824e-05, "loss": 0.0049, "step": 13884 }, { "epoch": 12.96, "learning_rate": 4.838059701492538e-05, "loss": 0.0013, "step": 13888 }, { "epoch": 12.96, "learning_rate": 4.838013059701493e-05, "loss": 0.0049, "step": 13892 }, { "epoch": 12.96, "learning_rate": 4.8379664179104475e-05, "loss": 0.004, "step": 13896 }, { "epoch": 12.97, "learning_rate": 4.8379197761194036e-05, "loss": 0.0028, "step": 13900 }, { "epoch": 12.97, "learning_rate": 4.8378731343283585e-05, "loss": 0.0072, "step": 13904 }, { "epoch": 12.97, "learning_rate": 4.837826492537313e-05, "loss": 0.0047, "step": 13908 }, { "epoch": 12.98, "learning_rate": 4.837779850746269e-05, "loss": 0.0005, "step": 13912 }, { "epoch": 12.98, "learning_rate": 4.837733208955224e-05, "loss": 0.0068, "step": 13916 }, { "epoch": 12.98, "learning_rate": 4.837686567164179e-05, "loss": 0.0106, "step": 13920 }, { "epoch": 12.99, "learning_rate": 4.8376399253731346e-05, "loss": 0.0077, "step": 13924 }, { "epoch": 12.99, "learning_rate": 4.83759328358209e-05, "loss": 0.003, "step": 13928 }, { "epoch": 13.0, "learning_rate": 4.837546641791045e-05, "loss": 0.0073, "step": 13932 }, { "epoch": 13.0, "learning_rate": 4.8375000000000004e-05, "loss": 0.0009, "step": 13936 }, { "epoch": 13.0, "learning_rate": 4.837453358208955e-05, "loss": 0.0026, "step": 13940 }, { "epoch": 13.01, "learning_rate": 4.8374067164179107e-05, "loss": 0.0009, "step": 13944 }, { "epoch": 13.01, "learning_rate": 4.837360074626866e-05, "loss": 0.0012, "step": 13948 }, { "epoch": 13.01, "learning_rate": 4.837313432835821e-05, "loss": 0.0035, "step": 13952 }, { "epoch": 13.02, "learning_rate": 4.837266791044776e-05, "loss": 0.0041, "step": 13956 }, { "epoch": 13.02, "learning_rate": 4.837220149253732e-05, "loss": 0.0033, "step": 13960 }, { "epoch": 13.03, "learning_rate": 4.837173507462687e-05, "loss": 0.0029, "step": 13964 }, { "epoch": 13.03, "learning_rate": 4.8371268656716416e-05, "loss": 0.0017, "step": 13968 }, { "epoch": 13.03, "learning_rate": 4.837080223880597e-05, "loss": 0.0047, "step": 13972 }, { "epoch": 13.04, "learning_rate": 4.8370335820895526e-05, "loss": 0.0019, "step": 13976 }, { "epoch": 13.04, "learning_rate": 4.836986940298508e-05, "loss": 0.0022, "step": 13980 }, { "epoch": 13.04, "learning_rate": 4.836940298507463e-05, "loss": 0.0007, "step": 13984 }, { "epoch": 13.05, "learning_rate": 4.8368936567164183e-05, "loss": 0.0045, "step": 13988 }, { "epoch": 13.05, "learning_rate": 4.836847014925374e-05, "loss": 0.0013, "step": 13992 }, { "epoch": 13.06, "learning_rate": 4.8368003731343287e-05, "loss": 0.0051, "step": 13996 }, { "epoch": 13.06, "learning_rate": 4.8367537313432835e-05, "loss": 0.0061, "step": 14000 }, { "epoch": 13.06, "eval_exact_match": 0.7108317214700194, "eval_exec": 0.7456479690522244, "eval_loss": 0.29429176449775696, "eval_runtime": 1353.2304, "eval_samples_per_second": 0.764, "step": 14000 }, { "epoch": 13.06, "learning_rate": 4.836707089552239e-05, "loss": 0.0029, "step": 14004 }, { "epoch": 13.07, "learning_rate": 4.8366604477611944e-05, "loss": 0.0007, "step": 14008 }, { "epoch": 13.07, "learning_rate": 4.836613805970149e-05, "loss": 0.0118, "step": 14012 }, { "epoch": 13.07, "learning_rate": 4.836567164179105e-05, "loss": 0.0014, "step": 14016 }, { "epoch": 13.08, "learning_rate": 4.83652052238806e-05, "loss": 0.0061, "step": 14020 }, { "epoch": 13.08, "learning_rate": 4.836473880597015e-05, "loss": 0.0007, "step": 14024 }, { "epoch": 13.09, "learning_rate": 4.8364272388059705e-05, "loss": 0.0039, "step": 14028 }, { "epoch": 13.09, "learning_rate": 4.8363805970149254e-05, "loss": 0.0063, "step": 14032 }, { "epoch": 13.09, "learning_rate": 4.836333955223881e-05, "loss": 0.0004, "step": 14036 }, { "epoch": 13.1, "learning_rate": 4.8362873134328363e-05, "loss": 0.0054, "step": 14040 }, { "epoch": 13.1, "learning_rate": 4.836240671641791e-05, "loss": 0.0014, "step": 14044 }, { "epoch": 13.1, "learning_rate": 4.8361940298507466e-05, "loss": 0.002, "step": 14048 }, { "epoch": 13.11, "learning_rate": 4.836147388059702e-05, "loss": 0.006, "step": 14052 }, { "epoch": 13.11, "learning_rate": 4.836100746268657e-05, "loss": 0.0012, "step": 14056 }, { "epoch": 13.12, "learning_rate": 4.836054104477612e-05, "loss": 0.0007, "step": 14060 }, { "epoch": 13.12, "learning_rate": 4.836007462686567e-05, "loss": 0.0023, "step": 14064 }, { "epoch": 13.12, "learning_rate": 4.835960820895523e-05, "loss": 0.001, "step": 14068 }, { "epoch": 13.13, "learning_rate": 4.8359141791044776e-05, "loss": 0.0075, "step": 14072 }, { "epoch": 13.13, "learning_rate": 4.835867537313433e-05, "loss": 0.0018, "step": 14076 }, { "epoch": 13.13, "learning_rate": 4.8358208955223885e-05, "loss": 0.0024, "step": 14080 }, { "epoch": 13.14, "learning_rate": 4.8357742537313433e-05, "loss": 0.0012, "step": 14084 }, { "epoch": 13.14, "learning_rate": 4.835727611940299e-05, "loss": 0.002, "step": 14088 }, { "epoch": 13.15, "learning_rate": 4.8356809701492537e-05, "loss": 0.0019, "step": 14092 }, { "epoch": 13.15, "learning_rate": 4.835634328358209e-05, "loss": 0.0041, "step": 14096 }, { "epoch": 13.15, "learning_rate": 4.8355876865671646e-05, "loss": 0.006, "step": 14100 }, { "epoch": 13.16, "learning_rate": 4.8355410447761194e-05, "loss": 0.0048, "step": 14104 }, { "epoch": 13.16, "learning_rate": 4.835494402985075e-05, "loss": 0.0118, "step": 14108 }, { "epoch": 13.16, "learning_rate": 4.8354477611940304e-05, "loss": 0.0067, "step": 14112 }, { "epoch": 13.17, "learning_rate": 4.835401119402985e-05, "loss": 0.0075, "step": 14116 }, { "epoch": 13.17, "learning_rate": 4.83535447761194e-05, "loss": 0.0042, "step": 14120 }, { "epoch": 13.18, "learning_rate": 4.8353078358208955e-05, "loss": 0.0119, "step": 14124 }, { "epoch": 13.18, "learning_rate": 4.835261194029851e-05, "loss": 0.0032, "step": 14128 }, { "epoch": 13.18, "learning_rate": 4.835214552238806e-05, "loss": 0.0019, "step": 14132 }, { "epoch": 13.19, "learning_rate": 4.8351679104477613e-05, "loss": 0.0052, "step": 14136 }, { "epoch": 13.19, "learning_rate": 4.835121268656717e-05, "loss": 0.0137, "step": 14140 }, { "epoch": 13.19, "learning_rate": 4.835074626865672e-05, "loss": 0.0016, "step": 14144 }, { "epoch": 13.2, "learning_rate": 4.835027985074627e-05, "loss": 0.0104, "step": 14148 }, { "epoch": 13.2, "learning_rate": 4.834981343283582e-05, "loss": 0.0018, "step": 14152 }, { "epoch": 13.21, "learning_rate": 4.834934701492538e-05, "loss": 0.0009, "step": 14156 }, { "epoch": 13.21, "learning_rate": 4.834888059701493e-05, "loss": 0.0051, "step": 14160 }, { "epoch": 13.21, "learning_rate": 4.834841417910448e-05, "loss": 0.0082, "step": 14164 }, { "epoch": 13.22, "learning_rate": 4.834794776119403e-05, "loss": 0.0052, "step": 14168 }, { "epoch": 13.22, "learning_rate": 4.834748134328359e-05, "loss": 0.0013, "step": 14172 }, { "epoch": 13.22, "learning_rate": 4.8347014925373135e-05, "loss": 0.0037, "step": 14176 }, { "epoch": 13.23, "learning_rate": 4.834654850746269e-05, "loss": 0.0014, "step": 14180 }, { "epoch": 13.23, "learning_rate": 4.834608208955224e-05, "loss": 0.0066, "step": 14184 }, { "epoch": 13.24, "learning_rate": 4.834561567164179e-05, "loss": 0.0038, "step": 14188 }, { "epoch": 13.24, "learning_rate": 4.834514925373135e-05, "loss": 0.0055, "step": 14192 }, { "epoch": 13.24, "learning_rate": 4.8344682835820896e-05, "loss": 0.0034, "step": 14196 }, { "epoch": 13.25, "learning_rate": 4.834421641791045e-05, "loss": 0.0021, "step": 14200 }, { "epoch": 13.25, "learning_rate": 4.8343750000000006e-05, "loss": 0.006, "step": 14204 }, { "epoch": 13.25, "learning_rate": 4.8343283582089554e-05, "loss": 0.001, "step": 14208 }, { "epoch": 13.26, "learning_rate": 4.83428171641791e-05, "loss": 0.0006, "step": 14212 }, { "epoch": 13.26, "learning_rate": 4.8342350746268664e-05, "loss": 0.0009, "step": 14216 }, { "epoch": 13.26, "learning_rate": 4.834188432835821e-05, "loss": 0.0015, "step": 14220 }, { "epoch": 13.27, "learning_rate": 4.834141791044776e-05, "loss": 0.0008, "step": 14224 }, { "epoch": 13.27, "learning_rate": 4.8340951492537315e-05, "loss": 0.0043, "step": 14228 }, { "epoch": 13.28, "learning_rate": 4.834048507462687e-05, "loss": 0.0044, "step": 14232 }, { "epoch": 13.28, "learning_rate": 4.834001865671642e-05, "loss": 0.002, "step": 14236 }, { "epoch": 13.28, "learning_rate": 4.833955223880597e-05, "loss": 0.0025, "step": 14240 }, { "epoch": 13.29, "learning_rate": 4.833908582089552e-05, "loss": 0.007, "step": 14244 }, { "epoch": 13.29, "learning_rate": 4.8338619402985076e-05, "loss": 0.0061, "step": 14248 }, { "epoch": 13.29, "learning_rate": 4.833815298507463e-05, "loss": 0.0038, "step": 14252 }, { "epoch": 13.3, "learning_rate": 4.833768656716418e-05, "loss": 0.0027, "step": 14256 }, { "epoch": 13.3, "learning_rate": 4.8337220149253734e-05, "loss": 0.0008, "step": 14260 }, { "epoch": 13.31, "learning_rate": 4.833675373134329e-05, "loss": 0.0024, "step": 14264 }, { "epoch": 13.31, "learning_rate": 4.833628731343284e-05, "loss": 0.0032, "step": 14268 }, { "epoch": 13.31, "learning_rate": 4.8335820895522385e-05, "loss": 0.0079, "step": 14272 }, { "epoch": 13.32, "learning_rate": 4.833535447761195e-05, "loss": 0.0065, "step": 14276 }, { "epoch": 13.32, "learning_rate": 4.8334888059701495e-05, "loss": 0.0045, "step": 14280 }, { "epoch": 13.32, "learning_rate": 4.833442164179104e-05, "loss": 0.0006, "step": 14284 }, { "epoch": 13.33, "learning_rate": 4.83339552238806e-05, "loss": 0.0027, "step": 14288 }, { "epoch": 13.33, "learning_rate": 4.833348880597015e-05, "loss": 0.0089, "step": 14292 }, { "epoch": 13.34, "learning_rate": 4.83330223880597e-05, "loss": 0.0009, "step": 14296 }, { "epoch": 13.34, "learning_rate": 4.8332555970149256e-05, "loss": 0.0052, "step": 14300 }, { "epoch": 13.34, "learning_rate": 4.8332089552238804e-05, "loss": 0.0019, "step": 14304 }, { "epoch": 13.35, "learning_rate": 4.8331623134328366e-05, "loss": 0.0033, "step": 14308 }, { "epoch": 13.35, "learning_rate": 4.8331156716417914e-05, "loss": 0.0011, "step": 14312 }, { "epoch": 13.35, "learning_rate": 4.833069029850746e-05, "loss": 0.0017, "step": 14316 }, { "epoch": 13.36, "learning_rate": 4.833022388059702e-05, "loss": 0.0008, "step": 14320 }, { "epoch": 13.36, "learning_rate": 4.832975746268657e-05, "loss": 0.001, "step": 14324 }, { "epoch": 13.37, "learning_rate": 4.832929104477612e-05, "loss": 0.0044, "step": 14328 }, { "epoch": 13.37, "learning_rate": 4.8328824626865675e-05, "loss": 0.0029, "step": 14332 }, { "epoch": 13.37, "learning_rate": 4.832835820895523e-05, "loss": 0.0106, "step": 14336 }, { "epoch": 13.38, "learning_rate": 4.832789179104478e-05, "loss": 0.016, "step": 14340 }, { "epoch": 13.38, "learning_rate": 4.832742537313433e-05, "loss": 0.0078, "step": 14344 }, { "epoch": 13.38, "learning_rate": 4.832695895522388e-05, "loss": 0.0097, "step": 14348 }, { "epoch": 13.39, "learning_rate": 4.8326492537313436e-05, "loss": 0.0039, "step": 14352 }, { "epoch": 13.39, "learning_rate": 4.832602611940299e-05, "loss": 0.0068, "step": 14356 }, { "epoch": 13.4, "learning_rate": 4.832555970149254e-05, "loss": 0.0163, "step": 14360 }, { "epoch": 13.4, "learning_rate": 4.832509328358209e-05, "loss": 0.0011, "step": 14364 }, { "epoch": 13.4, "learning_rate": 4.832462686567165e-05, "loss": 0.0113, "step": 14368 }, { "epoch": 13.41, "learning_rate": 4.83241604477612e-05, "loss": 0.0027, "step": 14372 }, { "epoch": 13.41, "learning_rate": 4.8323694029850745e-05, "loss": 0.0008, "step": 14376 }, { "epoch": 13.41, "learning_rate": 4.83232276119403e-05, "loss": 0.003, "step": 14380 }, { "epoch": 13.42, "learning_rate": 4.8322761194029855e-05, "loss": 0.0014, "step": 14384 }, { "epoch": 13.42, "learning_rate": 4.83222947761194e-05, "loss": 0.0042, "step": 14388 }, { "epoch": 13.43, "learning_rate": 4.832182835820896e-05, "loss": 0.0017, "step": 14392 }, { "epoch": 13.43, "learning_rate": 4.8321361940298506e-05, "loss": 0.0041, "step": 14396 }, { "epoch": 13.43, "learning_rate": 4.832089552238806e-05, "loss": 0.0012, "step": 14400 }, { "epoch": 13.44, "learning_rate": 4.8320429104477616e-05, "loss": 0.0005, "step": 14404 }, { "epoch": 13.44, "learning_rate": 4.8319962686567164e-05, "loss": 0.0005, "step": 14408 }, { "epoch": 13.44, "learning_rate": 4.831949626865672e-05, "loss": 0.0031, "step": 14412 }, { "epoch": 13.45, "learning_rate": 4.8319029850746274e-05, "loss": 0.007, "step": 14416 }, { "epoch": 13.45, "learning_rate": 4.831856343283582e-05, "loss": 0.0006, "step": 14420 }, { "epoch": 13.46, "learning_rate": 4.831809701492537e-05, "loss": 0.0038, "step": 14424 }, { "epoch": 13.46, "learning_rate": 4.831763059701493e-05, "loss": 0.0033, "step": 14428 }, { "epoch": 13.46, "learning_rate": 4.831716417910448e-05, "loss": 0.0012, "step": 14432 }, { "epoch": 13.47, "learning_rate": 4.831669776119403e-05, "loss": 0.005, "step": 14436 }, { "epoch": 13.47, "learning_rate": 4.831623134328358e-05, "loss": 0.01, "step": 14440 }, { "epoch": 13.47, "learning_rate": 4.831576492537314e-05, "loss": 0.0034, "step": 14444 }, { "epoch": 13.48, "learning_rate": 4.8315298507462686e-05, "loss": 0.0139, "step": 14448 }, { "epoch": 13.48, "learning_rate": 4.831483208955224e-05, "loss": 0.0069, "step": 14452 }, { "epoch": 13.49, "learning_rate": 4.831436567164179e-05, "loss": 0.0012, "step": 14456 }, { "epoch": 13.49, "learning_rate": 4.8313899253731344e-05, "loss": 0.004, "step": 14460 }, { "epoch": 13.49, "learning_rate": 4.83134328358209e-05, "loss": 0.0009, "step": 14464 }, { "epoch": 13.5, "learning_rate": 4.831296641791045e-05, "loss": 0.0009, "step": 14468 }, { "epoch": 13.5, "learning_rate": 4.83125e-05, "loss": 0.0009, "step": 14472 }, { "epoch": 13.5, "learning_rate": 4.831203358208956e-05, "loss": 0.0011, "step": 14476 }, { "epoch": 13.51, "learning_rate": 4.8311567164179105e-05, "loss": 0.0003, "step": 14480 }, { "epoch": 13.51, "learning_rate": 4.831110074626866e-05, "loss": 0.0001, "step": 14484 }, { "epoch": 13.51, "learning_rate": 4.8310634328358215e-05, "loss": 0.0037, "step": 14488 }, { "epoch": 13.52, "learning_rate": 4.831016791044776e-05, "loss": 0.0035, "step": 14492 }, { "epoch": 13.52, "learning_rate": 4.830970149253732e-05, "loss": 0.0006, "step": 14496 }, { "epoch": 13.53, "learning_rate": 4.8309235074626866e-05, "loss": 0.0156, "step": 14500 }, { "epoch": 13.53, "eval_exact_match": 0.7224371373307543, "eval_exec": 0.7591876208897486, "eval_loss": 0.2937206029891968, "eval_runtime": 1346.8122, "eval_samples_per_second": 0.768, "step": 14500 }, { "epoch": 13.53, "learning_rate": 4.830876865671642e-05, "loss": 0.0009, "step": 14504 }, { "epoch": 13.53, "learning_rate": 4.8308302238805976e-05, "loss": 0.0014, "step": 14508 }, { "epoch": 13.54, "learning_rate": 4.8307835820895524e-05, "loss": 0.0018, "step": 14512 }, { "epoch": 13.54, "learning_rate": 4.830736940298507e-05, "loss": 0.0028, "step": 14516 }, { "epoch": 13.54, "learning_rate": 4.8306902985074634e-05, "loss": 0.0014, "step": 14520 }, { "epoch": 13.55, "learning_rate": 4.830643656716418e-05, "loss": 0.0018, "step": 14524 }, { "epoch": 13.55, "learning_rate": 4.830597014925373e-05, "loss": 0.0011, "step": 14528 }, { "epoch": 13.56, "learning_rate": 4.8305503731343285e-05, "loss": 0.0041, "step": 14532 }, { "epoch": 13.56, "learning_rate": 4.830503731343284e-05, "loss": 0.0035, "step": 14536 }, { "epoch": 13.56, "learning_rate": 4.830457089552239e-05, "loss": 0.0096, "step": 14540 }, { "epoch": 13.57, "learning_rate": 4.830410447761194e-05, "loss": 0.0062, "step": 14544 }, { "epoch": 13.57, "learning_rate": 4.83036380597015e-05, "loss": 0.0106, "step": 14548 }, { "epoch": 13.57, "learning_rate": 4.8303171641791046e-05, "loss": 0.0049, "step": 14552 }, { "epoch": 13.58, "learning_rate": 4.83027052238806e-05, "loss": 0.0069, "step": 14556 }, { "epoch": 13.58, "learning_rate": 4.830223880597015e-05, "loss": 0.0088, "step": 14560 }, { "epoch": 13.59, "learning_rate": 4.8301772388059704e-05, "loss": 0.0012, "step": 14564 }, { "epoch": 13.59, "learning_rate": 4.830130597014926e-05, "loss": 0.0029, "step": 14568 }, { "epoch": 13.59, "learning_rate": 4.830083955223881e-05, "loss": 0.0005, "step": 14572 }, { "epoch": 13.6, "learning_rate": 4.8300373134328355e-05, "loss": 0.001, "step": 14576 }, { "epoch": 13.6, "learning_rate": 4.829990671641792e-05, "loss": 0.0041, "step": 14580 }, { "epoch": 13.6, "learning_rate": 4.8299440298507465e-05, "loss": 0.0037, "step": 14584 }, { "epoch": 13.61, "learning_rate": 4.829897388059701e-05, "loss": 0.0035, "step": 14588 }, { "epoch": 13.61, "learning_rate": 4.829850746268657e-05, "loss": 0.0068, "step": 14592 }, { "epoch": 13.62, "learning_rate": 4.829804104477612e-05, "loss": 0.0018, "step": 14596 }, { "epoch": 13.62, "learning_rate": 4.829757462686567e-05, "loss": 0.0029, "step": 14600 }, { "epoch": 13.62, "learning_rate": 4.8297108208955226e-05, "loss": 0.0013, "step": 14604 }, { "epoch": 13.63, "learning_rate": 4.829664179104478e-05, "loss": 0.0051, "step": 14608 }, { "epoch": 13.63, "learning_rate": 4.829617537313433e-05, "loss": 0.0012, "step": 14612 }, { "epoch": 13.63, "learning_rate": 4.8295708955223884e-05, "loss": 0.0021, "step": 14616 }, { "epoch": 13.64, "learning_rate": 4.829524253731343e-05, "loss": 0.0019, "step": 14620 }, { "epoch": 13.64, "learning_rate": 4.829477611940299e-05, "loss": 0.0023, "step": 14624 }, { "epoch": 13.65, "learning_rate": 4.829430970149254e-05, "loss": 0.0015, "step": 14628 }, { "epoch": 13.65, "learning_rate": 4.829384328358209e-05, "loss": 0.002, "step": 14632 }, { "epoch": 13.65, "learning_rate": 4.8293376865671645e-05, "loss": 0.0095, "step": 14636 }, { "epoch": 13.66, "learning_rate": 4.82929104477612e-05, "loss": 0.006, "step": 14640 }, { "epoch": 13.66, "learning_rate": 4.829244402985075e-05, "loss": 0.0024, "step": 14644 }, { "epoch": 13.66, "learning_rate": 4.82919776119403e-05, "loss": 0.0022, "step": 14648 }, { "epoch": 13.67, "learning_rate": 4.829151119402985e-05, "loss": 0.0016, "step": 14652 }, { "epoch": 13.67, "learning_rate": 4.8291044776119406e-05, "loss": 0.0011, "step": 14656 }, { "epoch": 13.68, "learning_rate": 4.829057835820896e-05, "loss": 0.001, "step": 14660 }, { "epoch": 13.68, "learning_rate": 4.829011194029851e-05, "loss": 0.0068, "step": 14664 }, { "epoch": 13.68, "learning_rate": 4.8289645522388064e-05, "loss": 0.0007, "step": 14668 }, { "epoch": 13.69, "learning_rate": 4.828917910447762e-05, "loss": 0.0044, "step": 14672 }, { "epoch": 13.69, "learning_rate": 4.828871268656717e-05, "loss": 0.0097, "step": 14676 }, { "epoch": 13.69, "learning_rate": 4.8288246268656715e-05, "loss": 0.0004, "step": 14680 }, { "epoch": 13.7, "learning_rate": 4.828777985074627e-05, "loss": 0.0077, "step": 14684 }, { "epoch": 13.7, "learning_rate": 4.8287313432835825e-05, "loss": 0.0011, "step": 14688 }, { "epoch": 13.71, "learning_rate": 4.828684701492537e-05, "loss": 0.0008, "step": 14692 }, { "epoch": 13.71, "learning_rate": 4.828638059701493e-05, "loss": 0.0111, "step": 14696 }, { "epoch": 13.71, "learning_rate": 4.828591417910448e-05, "loss": 0.0005, "step": 14700 }, { "epoch": 13.72, "learning_rate": 4.828544776119403e-05, "loss": 0.0037, "step": 14704 }, { "epoch": 13.72, "learning_rate": 4.8284981343283586e-05, "loss": 0.0024, "step": 14708 }, { "epoch": 13.72, "learning_rate": 4.8284514925373134e-05, "loss": 0.0024, "step": 14712 }, { "epoch": 13.73, "learning_rate": 4.828404850746269e-05, "loss": 0.0052, "step": 14716 }, { "epoch": 13.73, "learning_rate": 4.8283582089552244e-05, "loss": 0.0005, "step": 14720 }, { "epoch": 13.73, "learning_rate": 4.828311567164179e-05, "loss": 0.006, "step": 14724 }, { "epoch": 13.74, "learning_rate": 4.828264925373135e-05, "loss": 0.0023, "step": 14728 }, { "epoch": 13.74, "learning_rate": 4.82821828358209e-05, "loss": 0.0033, "step": 14732 }, { "epoch": 13.75, "learning_rate": 4.828171641791045e-05, "loss": 0.0012, "step": 14736 }, { "epoch": 13.75, "learning_rate": 4.828125e-05, "loss": 0.0076, "step": 14740 }, { "epoch": 13.75, "learning_rate": 4.828078358208955e-05, "loss": 0.0063, "step": 14744 }, { "epoch": 13.76, "learning_rate": 4.828031716417911e-05, "loss": 0.0086, "step": 14748 }, { "epoch": 13.76, "learning_rate": 4.8279850746268656e-05, "loss": 0.0102, "step": 14752 }, { "epoch": 13.76, "learning_rate": 4.827938432835821e-05, "loss": 0.0011, "step": 14756 }, { "epoch": 13.77, "learning_rate": 4.8278917910447766e-05, "loss": 0.0056, "step": 14760 }, { "epoch": 13.77, "learning_rate": 4.8278451492537314e-05, "loss": 0.0025, "step": 14764 }, { "epoch": 13.78, "learning_rate": 4.827798507462687e-05, "loss": 0.0076, "step": 14768 }, { "epoch": 13.78, "learning_rate": 4.827751865671642e-05, "loss": 0.0081, "step": 14772 }, { "epoch": 13.78, "learning_rate": 4.827705223880597e-05, "loss": 0.0039, "step": 14776 }, { "epoch": 13.79, "learning_rate": 4.827658582089553e-05, "loss": 0.0013, "step": 14780 }, { "epoch": 13.79, "learning_rate": 4.8276119402985075e-05, "loss": 0.0019, "step": 14784 }, { "epoch": 13.79, "learning_rate": 4.827565298507463e-05, "loss": 0.0004, "step": 14788 }, { "epoch": 13.8, "learning_rate": 4.8275186567164185e-05, "loss": 0.0035, "step": 14792 }, { "epoch": 13.8, "learning_rate": 4.827472014925373e-05, "loss": 0.0006, "step": 14796 }, { "epoch": 13.81, "learning_rate": 4.827425373134329e-05, "loss": 0.0058, "step": 14800 }, { "epoch": 13.81, "learning_rate": 4.8273787313432836e-05, "loss": 0.0014, "step": 14804 }, { "epoch": 13.81, "learning_rate": 4.827332089552239e-05, "loss": 0.0023, "step": 14808 }, { "epoch": 13.82, "learning_rate": 4.8272854477611946e-05, "loss": 0.0006, "step": 14812 }, { "epoch": 13.82, "learning_rate": 4.8272388059701494e-05, "loss": 0.0057, "step": 14816 }, { "epoch": 13.82, "learning_rate": 4.827192164179105e-05, "loss": 0.0039, "step": 14820 }, { "epoch": 13.83, "learning_rate": 4.8271455223880604e-05, "loss": 0.0086, "step": 14824 }, { "epoch": 13.83, "learning_rate": 4.827098880597015e-05, "loss": 0.0012, "step": 14828 }, { "epoch": 13.84, "learning_rate": 4.82705223880597e-05, "loss": 0.0007, "step": 14832 }, { "epoch": 13.84, "learning_rate": 4.827005597014926e-05, "loss": 0.0006, "step": 14836 }, { "epoch": 13.84, "learning_rate": 4.826958955223881e-05, "loss": 0.0049, "step": 14840 }, { "epoch": 13.85, "learning_rate": 4.826912313432836e-05, "loss": 0.0008, "step": 14844 }, { "epoch": 13.85, "learning_rate": 4.826865671641791e-05, "loss": 0.001, "step": 14848 }, { "epoch": 13.85, "learning_rate": 4.826819029850747e-05, "loss": 0.0003, "step": 14852 }, { "epoch": 13.86, "learning_rate": 4.8267723880597016e-05, "loss": 0.0022, "step": 14856 }, { "epoch": 13.86, "learning_rate": 4.826725746268657e-05, "loss": 0.0009, "step": 14860 }, { "epoch": 13.87, "learning_rate": 4.826679104477612e-05, "loss": 0.0005, "step": 14864 }, { "epoch": 13.87, "learning_rate": 4.8266324626865674e-05, "loss": 0.0038, "step": 14868 }, { "epoch": 13.87, "learning_rate": 4.826585820895523e-05, "loss": 0.002, "step": 14872 }, { "epoch": 13.88, "learning_rate": 4.826539179104478e-05, "loss": 0.01, "step": 14876 }, { "epoch": 13.88, "learning_rate": 4.826492537313433e-05, "loss": 0.006, "step": 14880 }, { "epoch": 13.88, "learning_rate": 4.8264458955223886e-05, "loss": 0.011, "step": 14884 }, { "epoch": 13.89, "learning_rate": 4.8263992537313435e-05, "loss": 0.0027, "step": 14888 }, { "epoch": 13.89, "learning_rate": 4.826352611940298e-05, "loss": 0.0025, "step": 14892 }, { "epoch": 13.9, "learning_rate": 4.8263059701492544e-05, "loss": 0.0008, "step": 14896 }, { "epoch": 13.9, "learning_rate": 4.826259328358209e-05, "loss": 0.0017, "step": 14900 }, { "epoch": 13.9, "learning_rate": 4.826212686567164e-05, "loss": 0.001, "step": 14904 }, { "epoch": 13.91, "learning_rate": 4.8261660447761196e-05, "loss": 0.0004, "step": 14908 }, { "epoch": 13.91, "learning_rate": 4.826119402985075e-05, "loss": 0.0025, "step": 14912 }, { "epoch": 13.91, "learning_rate": 4.82607276119403e-05, "loss": 0.0011, "step": 14916 }, { "epoch": 13.92, "learning_rate": 4.8260261194029854e-05, "loss": 0.0007, "step": 14920 }, { "epoch": 13.92, "learning_rate": 4.82597947761194e-05, "loss": 0.0015, "step": 14924 }, { "epoch": 13.93, "learning_rate": 4.8259328358208957e-05, "loss": 0.0101, "step": 14928 }, { "epoch": 13.93, "learning_rate": 4.825886194029851e-05, "loss": 0.0003, "step": 14932 }, { "epoch": 13.93, "learning_rate": 4.825839552238806e-05, "loss": 0.0052, "step": 14936 }, { "epoch": 13.94, "learning_rate": 4.8257929104477615e-05, "loss": 0.0044, "step": 14940 }, { "epoch": 13.94, "learning_rate": 4.825746268656717e-05, "loss": 0.0005, "step": 14944 }, { "epoch": 13.94, "learning_rate": 4.825699626865672e-05, "loss": 0.0054, "step": 14948 }, { "epoch": 13.95, "learning_rate": 4.8256529850746266e-05, "loss": 0.0016, "step": 14952 }, { "epoch": 13.95, "learning_rate": 4.825606343283583e-05, "loss": 0.0017, "step": 14956 }, { "epoch": 13.96, "learning_rate": 4.8255597014925376e-05, "loss": 0.0065, "step": 14960 }, { "epoch": 13.96, "learning_rate": 4.825513059701493e-05, "loss": 0.001, "step": 14964 }, { "epoch": 13.96, "learning_rate": 4.825466417910448e-05, "loss": 0.012, "step": 14968 }, { "epoch": 13.97, "learning_rate": 4.8254197761194033e-05, "loss": 0.0023, "step": 14972 }, { "epoch": 13.97, "learning_rate": 4.825373134328359e-05, "loss": 0.0053, "step": 14976 }, { "epoch": 13.97, "learning_rate": 4.8253264925373136e-05, "loss": 0.0021, "step": 14980 }, { "epoch": 13.98, "learning_rate": 4.8252798507462685e-05, "loss": 0.0019, "step": 14984 }, { "epoch": 13.98, "learning_rate": 4.8252332089552246e-05, "loss": 0.002, "step": 14988 }, { "epoch": 13.98, "learning_rate": 4.8251865671641794e-05, "loss": 0.0069, "step": 14992 }, { "epoch": 13.99, "learning_rate": 4.825139925373134e-05, "loss": 0.0035, "step": 14996 }, { "epoch": 13.99, "learning_rate": 4.82509328358209e-05, "loss": 0.0026, "step": 15000 }, { "epoch": 13.99, "eval_exact_match": 0.7156673114119922, "eval_exec": 0.7524177949709865, "eval_loss": 0.31172001361846924, "eval_runtime": 1126.7509, "eval_samples_per_second": 0.918, "step": 15000 }, { "epoch": 14.0, "learning_rate": 4.825046641791045e-05, "loss": 0.0005, "step": 15004 }, { "epoch": 14.0, "learning_rate": 4.825e-05, "loss": 0.0089, "step": 15008 }, { "epoch": 14.0, "learning_rate": 4.8249533582089555e-05, "loss": 0.0092, "step": 15012 }, { "epoch": 14.01, "learning_rate": 4.824906716417911e-05, "loss": 0.001, "step": 15016 }, { "epoch": 14.01, "learning_rate": 4.824860074626866e-05, "loss": 0.0052, "step": 15020 }, { "epoch": 14.01, "learning_rate": 4.824813432835821e-05, "loss": 0.0005, "step": 15024 }, { "epoch": 14.02, "learning_rate": 4.824766791044776e-05, "loss": 0.0014, "step": 15028 }, { "epoch": 14.02, "learning_rate": 4.8247201492537316e-05, "loss": 0.0028, "step": 15032 }, { "epoch": 14.03, "learning_rate": 4.824673507462687e-05, "loss": 0.0008, "step": 15036 }, { "epoch": 14.03, "learning_rate": 4.824626865671642e-05, "loss": 0.0031, "step": 15040 }, { "epoch": 14.03, "learning_rate": 4.824580223880597e-05, "loss": 0.0055, "step": 15044 }, { "epoch": 14.04, "learning_rate": 4.824533582089553e-05, "loss": 0.0049, "step": 15048 }, { "epoch": 14.04, "learning_rate": 4.824486940298508e-05, "loss": 0.0045, "step": 15052 }, { "epoch": 14.04, "learning_rate": 4.8244402985074626e-05, "loss": 0.0033, "step": 15056 }, { "epoch": 14.05, "learning_rate": 4.824393656716418e-05, "loss": 0.0015, "step": 15060 }, { "epoch": 14.05, "learning_rate": 4.8243470149253735e-05, "loss": 0.0027, "step": 15064 }, { "epoch": 14.06, "learning_rate": 4.8243003731343283e-05, "loss": 0.0007, "step": 15068 }, { "epoch": 14.06, "learning_rate": 4.824253731343284e-05, "loss": 0.001, "step": 15072 }, { "epoch": 14.06, "learning_rate": 4.8242070895522387e-05, "loss": 0.001, "step": 15076 }, { "epoch": 14.07, "learning_rate": 4.824160447761194e-05, "loss": 0.0022, "step": 15080 }, { "epoch": 14.07, "learning_rate": 4.8241138059701496e-05, "loss": 0.0005, "step": 15084 }, { "epoch": 14.07, "learning_rate": 4.8240671641791044e-05, "loss": 0.0004, "step": 15088 }, { "epoch": 14.08, "learning_rate": 4.82402052238806e-05, "loss": 0.0027, "step": 15092 }, { "epoch": 14.08, "learning_rate": 4.8239738805970154e-05, "loss": 0.0034, "step": 15096 }, { "epoch": 14.09, "learning_rate": 4.82392723880597e-05, "loss": 0.0063, "step": 15100 }, { "epoch": 14.09, "learning_rate": 4.823880597014925e-05, "loss": 0.0004, "step": 15104 }, { "epoch": 14.09, "learning_rate": 4.823833955223881e-05, "loss": 0.0002, "step": 15108 }, { "epoch": 14.1, "learning_rate": 4.823787313432836e-05, "loss": 0.0029, "step": 15112 }, { "epoch": 14.1, "learning_rate": 4.8237406716417915e-05, "loss": 0.0011, "step": 15116 }, { "epoch": 14.1, "learning_rate": 4.8236940298507463e-05, "loss": 0.0043, "step": 15120 }, { "epoch": 14.11, "learning_rate": 4.823647388059702e-05, "loss": 0.0084, "step": 15124 }, { "epoch": 14.11, "learning_rate": 4.823600746268657e-05, "loss": 0.0062, "step": 15128 }, { "epoch": 14.12, "learning_rate": 4.823554104477612e-05, "loss": 0.0088, "step": 15132 }, { "epoch": 14.12, "learning_rate": 4.823507462686567e-05, "loss": 0.0005, "step": 15136 }, { "epoch": 14.12, "learning_rate": 4.823460820895523e-05, "loss": 0.0078, "step": 15140 }, { "epoch": 14.13, "learning_rate": 4.823414179104478e-05, "loss": 0.0007, "step": 15144 }, { "epoch": 14.13, "learning_rate": 4.823367537313433e-05, "loss": 0.0056, "step": 15148 }, { "epoch": 14.13, "learning_rate": 4.823320895522388e-05, "loss": 0.0005, "step": 15152 }, { "epoch": 14.14, "learning_rate": 4.823274253731344e-05, "loss": 0.0117, "step": 15156 }, { "epoch": 14.14, "learning_rate": 4.8232276119402985e-05, "loss": 0.0032, "step": 15160 }, { "epoch": 14.15, "learning_rate": 4.823180970149254e-05, "loss": 0.0032, "step": 15164 }, { "epoch": 14.15, "learning_rate": 4.8231343283582095e-05, "loss": 0.0061, "step": 15168 }, { "epoch": 14.15, "learning_rate": 4.823087686567164e-05, "loss": 0.0018, "step": 15172 }, { "epoch": 14.16, "learning_rate": 4.82304104477612e-05, "loss": 0.0101, "step": 15176 }, { "epoch": 14.16, "learning_rate": 4.8229944029850746e-05, "loss": 0.0014, "step": 15180 }, { "epoch": 14.16, "learning_rate": 4.82294776119403e-05, "loss": 0.0058, "step": 15184 }, { "epoch": 14.17, "learning_rate": 4.8229011194029856e-05, "loss": 0.0036, "step": 15188 }, { "epoch": 14.17, "learning_rate": 4.8228544776119404e-05, "loss": 0.0013, "step": 15192 }, { "epoch": 14.18, "learning_rate": 4.822807835820895e-05, "loss": 0.0029, "step": 15196 }, { "epoch": 14.18, "learning_rate": 4.8227611940298514e-05, "loss": 0.0063, "step": 15200 }, { "epoch": 14.18, "learning_rate": 4.822714552238806e-05, "loss": 0.0028, "step": 15204 }, { "epoch": 14.19, "learning_rate": 4.822667910447761e-05, "loss": 0.0006, "step": 15208 }, { "epoch": 14.19, "learning_rate": 4.8226212686567165e-05, "loss": 0.0014, "step": 15212 }, { "epoch": 14.19, "learning_rate": 4.822574626865672e-05, "loss": 0.0005, "step": 15216 }, { "epoch": 14.2, "learning_rate": 4.822527985074627e-05, "loss": 0.0019, "step": 15220 }, { "epoch": 14.2, "learning_rate": 4.822481343283582e-05, "loss": 0.0003, "step": 15224 }, { "epoch": 14.21, "learning_rate": 4.822434701492538e-05, "loss": 0.001, "step": 15228 }, { "epoch": 14.21, "learning_rate": 4.8223880597014926e-05, "loss": 0.0025, "step": 15232 }, { "epoch": 14.21, "learning_rate": 4.822341417910448e-05, "loss": 0.0007, "step": 15236 }, { "epoch": 14.22, "learning_rate": 4.822294776119403e-05, "loss": 0.0043, "step": 15240 }, { "epoch": 14.22, "learning_rate": 4.8222481343283584e-05, "loss": 0.0014, "step": 15244 }, { "epoch": 14.22, "learning_rate": 4.822201492537314e-05, "loss": 0.0025, "step": 15248 }, { "epoch": 14.23, "learning_rate": 4.822154850746269e-05, "loss": 0.0002, "step": 15252 }, { "epoch": 14.23, "learning_rate": 4.8221082089552235e-05, "loss": 0.0012, "step": 15256 }, { "epoch": 14.24, "learning_rate": 4.82206156716418e-05, "loss": 0.0026, "step": 15260 }, { "epoch": 14.24, "learning_rate": 4.8220149253731345e-05, "loss": 0.0005, "step": 15264 }, { "epoch": 14.24, "learning_rate": 4.821968283582089e-05, "loss": 0.0004, "step": 15268 }, { "epoch": 14.25, "learning_rate": 4.821921641791045e-05, "loss": 0.0002, "step": 15272 }, { "epoch": 14.25, "learning_rate": 4.821875e-05, "loss": 0.0069, "step": 15276 }, { "epoch": 14.25, "learning_rate": 4.821828358208956e-05, "loss": 0.0019, "step": 15280 }, { "epoch": 14.26, "learning_rate": 4.8217817164179106e-05, "loss": 0.0076, "step": 15284 }, { "epoch": 14.26, "learning_rate": 4.821735074626866e-05, "loss": 0.0015, "step": 15288 }, { "epoch": 14.26, "learning_rate": 4.8216884328358216e-05, "loss": 0.0013, "step": 15292 }, { "epoch": 14.27, "learning_rate": 4.8216417910447764e-05, "loss": 0.0014, "step": 15296 }, { "epoch": 14.27, "learning_rate": 4.821595149253731e-05, "loss": 0.0048, "step": 15300 }, { "epoch": 14.28, "learning_rate": 4.821548507462687e-05, "loss": 0.002, "step": 15304 }, { "epoch": 14.28, "learning_rate": 4.821501865671642e-05, "loss": 0.0023, "step": 15308 }, { "epoch": 14.28, "learning_rate": 4.821455223880597e-05, "loss": 0.0034, "step": 15312 }, { "epoch": 14.29, "learning_rate": 4.8214085820895525e-05, "loss": 0.0047, "step": 15316 }, { "epoch": 14.29, "learning_rate": 4.821361940298508e-05, "loss": 0.0092, "step": 15320 }, { "epoch": 14.29, "learning_rate": 4.821315298507463e-05, "loss": 0.0144, "step": 15324 }, { "epoch": 14.3, "learning_rate": 4.821268656716418e-05, "loss": 0.0068, "step": 15328 }, { "epoch": 14.3, "learning_rate": 4.821222014925373e-05, "loss": 0.0042, "step": 15332 }, { "epoch": 14.31, "learning_rate": 4.8211753731343286e-05, "loss": 0.0021, "step": 15336 }, { "epoch": 14.31, "learning_rate": 4.821128731343284e-05, "loss": 0.004, "step": 15340 }, { "epoch": 14.31, "learning_rate": 4.821082089552239e-05, "loss": 0.0018, "step": 15344 }, { "epoch": 14.32, "learning_rate": 4.8210354477611944e-05, "loss": 0.0032, "step": 15348 }, { "epoch": 14.32, "learning_rate": 4.82098880597015e-05, "loss": 0.0047, "step": 15352 }, { "epoch": 14.32, "learning_rate": 4.820942164179105e-05, "loss": 0.0024, "step": 15356 }, { "epoch": 14.33, "learning_rate": 4.8208955223880595e-05, "loss": 0.0012, "step": 15360 }, { "epoch": 14.33, "learning_rate": 4.820848880597015e-05, "loss": 0.0011, "step": 15364 }, { "epoch": 14.34, "learning_rate": 4.8208022388059705e-05, "loss": 0.0041, "step": 15368 }, { "epoch": 14.34, "learning_rate": 4.820755597014925e-05, "loss": 0.0026, "step": 15372 }, { "epoch": 14.34, "learning_rate": 4.820708955223881e-05, "loss": 0.0012, "step": 15376 }, { "epoch": 14.35, "learning_rate": 4.820662313432836e-05, "loss": 0.0038, "step": 15380 }, { "epoch": 14.35, "learning_rate": 4.820615671641791e-05, "loss": 0.0011, "step": 15384 }, { "epoch": 14.35, "learning_rate": 4.8205690298507466e-05, "loss": 0.0007, "step": 15388 }, { "epoch": 14.36, "learning_rate": 4.8205223880597014e-05, "loss": 0.0003, "step": 15392 }, { "epoch": 14.36, "learning_rate": 4.820475746268657e-05, "loss": 0.0002, "step": 15396 }, { "epoch": 14.37, "learning_rate": 4.8204291044776124e-05, "loss": 0.0008, "step": 15400 }, { "epoch": 14.37, "learning_rate": 4.820382462686567e-05, "loss": 0.0011, "step": 15404 }, { "epoch": 14.37, "learning_rate": 4.820335820895523e-05, "loss": 0.0026, "step": 15408 }, { "epoch": 14.38, "learning_rate": 4.820289179104478e-05, "loss": 0.0019, "step": 15412 }, { "epoch": 14.38, "learning_rate": 4.820242537313433e-05, "loss": 0.0036, "step": 15416 }, { "epoch": 14.38, "learning_rate": 4.820195895522388e-05, "loss": 0.003, "step": 15420 }, { "epoch": 14.39, "learning_rate": 4.820149253731343e-05, "loss": 0.0109, "step": 15424 }, { "epoch": 14.39, "learning_rate": 4.820102611940299e-05, "loss": 0.0034, "step": 15428 }, { "epoch": 14.4, "learning_rate": 4.8200559701492536e-05, "loss": 0.0005, "step": 15432 }, { "epoch": 14.4, "learning_rate": 4.820009328358209e-05, "loss": 0.0084, "step": 15436 }, { "epoch": 14.4, "learning_rate": 4.8199626865671646e-05, "loss": 0.0008, "step": 15440 }, { "epoch": 14.41, "learning_rate": 4.81991604477612e-05, "loss": 0.009, "step": 15444 }, { "epoch": 14.41, "learning_rate": 4.819869402985075e-05, "loss": 0.0021, "step": 15448 }, { "epoch": 14.41, "learning_rate": 4.81982276119403e-05, "loss": 0.0029, "step": 15452 }, { "epoch": 14.42, "learning_rate": 4.819776119402986e-05, "loss": 0.0013, "step": 15456 }, { "epoch": 14.42, "learning_rate": 4.819729477611941e-05, "loss": 0.0014, "step": 15460 }, { "epoch": 14.43, "learning_rate": 4.8196828358208955e-05, "loss": 0.0073, "step": 15464 }, { "epoch": 14.43, "learning_rate": 4.819636194029851e-05, "loss": 0.0027, "step": 15468 }, { "epoch": 14.43, "learning_rate": 4.8195895522388065e-05, "loss": 0.0027, "step": 15472 }, { "epoch": 14.44, "learning_rate": 4.819542910447761e-05, "loss": 0.0005, "step": 15476 }, { "epoch": 14.44, "learning_rate": 4.819496268656717e-05, "loss": 0.0028, "step": 15480 }, { "epoch": 14.44, "learning_rate": 4.8194496268656716e-05, "loss": 0.0005, "step": 15484 }, { "epoch": 14.45, "learning_rate": 4.819402985074627e-05, "loss": 0.0063, "step": 15488 }, { "epoch": 14.45, "learning_rate": 4.8193563432835826e-05, "loss": 0.0037, "step": 15492 }, { "epoch": 14.46, "learning_rate": 4.8193097014925374e-05, "loss": 0.0025, "step": 15496 }, { "epoch": 14.46, "learning_rate": 4.819263059701493e-05, "loss": 0.0044, "step": 15500 }, { "epoch": 14.46, "eval_exact_match": 0.7340425531914894, "eval_exec": 0.7727272727272727, "eval_loss": 0.30817216634750366, "eval_runtime": 1067.8388, "eval_samples_per_second": 0.968, "step": 15500 }, { "epoch": 14.46, "learning_rate": 4.8192164179104484e-05, "loss": 0.0109, "step": 15504 }, { "epoch": 14.47, "learning_rate": 4.819169776119403e-05, "loss": 0.0041, "step": 15508 }, { "epoch": 14.47, "learning_rate": 4.819123134328358e-05, "loss": 0.0005, "step": 15512 }, { "epoch": 14.47, "learning_rate": 4.819076492537314e-05, "loss": 0.0005, "step": 15516 }, { "epoch": 14.48, "learning_rate": 4.819029850746269e-05, "loss": 0.0044, "step": 15520 }, { "epoch": 14.48, "learning_rate": 4.818983208955224e-05, "loss": 0.0005, "step": 15524 }, { "epoch": 14.49, "learning_rate": 4.818936567164179e-05, "loss": 0.0006, "step": 15528 }, { "epoch": 14.49, "learning_rate": 4.818889925373135e-05, "loss": 0.0026, "step": 15532 }, { "epoch": 14.49, "learning_rate": 4.8188432835820896e-05, "loss": 0.0042, "step": 15536 }, { "epoch": 14.5, "learning_rate": 4.818796641791045e-05, "loss": 0.0017, "step": 15540 }, { "epoch": 14.5, "learning_rate": 4.81875e-05, "loss": 0.0013, "step": 15544 }, { "epoch": 14.5, "learning_rate": 4.8187033582089554e-05, "loss": 0.0041, "step": 15548 }, { "epoch": 14.51, "learning_rate": 4.818656716417911e-05, "loss": 0.0098, "step": 15552 }, { "epoch": 14.51, "learning_rate": 4.818610074626866e-05, "loss": 0.0025, "step": 15556 }, { "epoch": 14.51, "learning_rate": 4.818563432835821e-05, "loss": 0.0019, "step": 15560 }, { "epoch": 14.52, "learning_rate": 4.818516791044777e-05, "loss": 0.0021, "step": 15564 }, { "epoch": 14.52, "learning_rate": 4.8184701492537315e-05, "loss": 0.0054, "step": 15568 }, { "epoch": 14.53, "learning_rate": 4.818423507462686e-05, "loss": 0.0031, "step": 15572 }, { "epoch": 14.53, "learning_rate": 4.8183768656716425e-05, "loss": 0.0079, "step": 15576 }, { "epoch": 14.53, "learning_rate": 4.818330223880597e-05, "loss": 0.0037, "step": 15580 }, { "epoch": 14.54, "learning_rate": 4.818283582089552e-05, "loss": 0.0011, "step": 15584 }, { "epoch": 14.54, "learning_rate": 4.8182369402985076e-05, "loss": 0.0073, "step": 15588 }, { "epoch": 14.54, "learning_rate": 4.818190298507463e-05, "loss": 0.0069, "step": 15592 }, { "epoch": 14.55, "learning_rate": 4.818143656716418e-05, "loss": 0.006, "step": 15596 }, { "epoch": 14.55, "learning_rate": 4.8180970149253734e-05, "loss": 0.0053, "step": 15600 }, { "epoch": 14.56, "learning_rate": 4.818050373134328e-05, "loss": 0.0039, "step": 15604 }, { "epoch": 14.56, "learning_rate": 4.8180037313432844e-05, "loss": 0.0132, "step": 15608 }, { "epoch": 14.56, "learning_rate": 4.817957089552239e-05, "loss": 0.0041, "step": 15612 }, { "epoch": 14.57, "learning_rate": 4.817910447761194e-05, "loss": 0.0063, "step": 15616 }, { "epoch": 14.57, "learning_rate": 4.8178638059701495e-05, "loss": 0.0051, "step": 15620 }, { "epoch": 14.57, "learning_rate": 4.817817164179105e-05, "loss": 0.0036, "step": 15624 }, { "epoch": 14.58, "learning_rate": 4.81777052238806e-05, "loss": 0.0027, "step": 15628 }, { "epoch": 14.58, "learning_rate": 4.817723880597015e-05, "loss": 0.0041, "step": 15632 }, { "epoch": 14.59, "learning_rate": 4.817677238805971e-05, "loss": 0.0017, "step": 15636 }, { "epoch": 14.59, "learning_rate": 4.8176305970149256e-05, "loss": 0.0036, "step": 15640 }, { "epoch": 14.59, "learning_rate": 4.817583955223881e-05, "loss": 0.0112, "step": 15644 }, { "epoch": 14.6, "learning_rate": 4.817537313432836e-05, "loss": 0.0043, "step": 15648 }, { "epoch": 14.6, "learning_rate": 4.8174906716417914e-05, "loss": 0.0023, "step": 15652 }, { "epoch": 14.6, "learning_rate": 4.817444029850747e-05, "loss": 0.0029, "step": 15656 }, { "epoch": 14.61, "learning_rate": 4.817397388059702e-05, "loss": 0.001, "step": 15660 }, { "epoch": 14.61, "learning_rate": 4.8173507462686565e-05, "loss": 0.001, "step": 15664 }, { "epoch": 14.62, "learning_rate": 4.8173041044776127e-05, "loss": 0.0011, "step": 15668 }, { "epoch": 14.62, "learning_rate": 4.8172574626865675e-05, "loss": 0.009, "step": 15672 }, { "epoch": 14.62, "learning_rate": 4.817210820895522e-05, "loss": 0.0054, "step": 15676 }, { "epoch": 14.63, "learning_rate": 4.817164179104478e-05, "loss": 0.0043, "step": 15680 }, { "epoch": 14.63, "learning_rate": 4.817117537313433e-05, "loss": 0.0026, "step": 15684 }, { "epoch": 14.63, "learning_rate": 4.817070895522388e-05, "loss": 0.0013, "step": 15688 }, { "epoch": 14.64, "learning_rate": 4.8170242537313436e-05, "loss": 0.0018, "step": 15692 }, { "epoch": 14.64, "learning_rate": 4.816977611940299e-05, "loss": 0.0052, "step": 15696 }, { "epoch": 14.65, "learning_rate": 4.816930970149254e-05, "loss": 0.0049, "step": 15700 }, { "epoch": 14.65, "learning_rate": 4.8168843283582094e-05, "loss": 0.0006, "step": 15704 }, { "epoch": 14.65, "learning_rate": 4.816837686567164e-05, "loss": 0.0119, "step": 15708 }, { "epoch": 14.66, "learning_rate": 4.81679104477612e-05, "loss": 0.0032, "step": 15712 }, { "epoch": 14.66, "learning_rate": 4.816744402985075e-05, "loss": 0.0048, "step": 15716 }, { "epoch": 14.66, "learning_rate": 4.81669776119403e-05, "loss": 0.0005, "step": 15720 }, { "epoch": 14.67, "learning_rate": 4.816651119402985e-05, "loss": 0.001, "step": 15724 }, { "epoch": 14.67, "learning_rate": 4.816604477611941e-05, "loss": 0.0078, "step": 15728 }, { "epoch": 14.68, "learning_rate": 4.816557835820896e-05, "loss": 0.0037, "step": 15732 }, { "epoch": 14.68, "learning_rate": 4.8165111940298506e-05, "loss": 0.0027, "step": 15736 }, { "epoch": 14.68, "learning_rate": 4.816464552238806e-05, "loss": 0.0025, "step": 15740 }, { "epoch": 14.69, "learning_rate": 4.8164179104477616e-05, "loss": 0.0017, "step": 15744 }, { "epoch": 14.69, "learning_rate": 4.8163712686567164e-05, "loss": 0.0008, "step": 15748 }, { "epoch": 14.69, "learning_rate": 4.816324626865672e-05, "loss": 0.0007, "step": 15752 }, { "epoch": 14.7, "learning_rate": 4.8162779850746274e-05, "loss": 0.0105, "step": 15756 }, { "epoch": 14.7, "learning_rate": 4.816231343283582e-05, "loss": 0.0056, "step": 15760 }, { "epoch": 14.71, "learning_rate": 4.816184701492538e-05, "loss": 0.0056, "step": 15764 }, { "epoch": 14.71, "learning_rate": 4.8161380597014925e-05, "loss": 0.009, "step": 15768 }, { "epoch": 14.71, "learning_rate": 4.816091417910448e-05, "loss": 0.0036, "step": 15772 }, { "epoch": 14.72, "learning_rate": 4.8160447761194035e-05, "loss": 0.0007, "step": 15776 }, { "epoch": 14.72, "learning_rate": 4.815998134328358e-05, "loss": 0.002, "step": 15780 }, { "epoch": 14.72, "learning_rate": 4.815951492537314e-05, "loss": 0.0027, "step": 15784 }, { "epoch": 14.73, "learning_rate": 4.815904850746269e-05, "loss": 0.002, "step": 15788 }, { "epoch": 14.73, "learning_rate": 4.815858208955224e-05, "loss": 0.0004, "step": 15792 }, { "epoch": 14.73, "learning_rate": 4.8158115671641796e-05, "loss": 0.0035, "step": 15796 }, { "epoch": 14.74, "learning_rate": 4.8157649253731344e-05, "loss": 0.0032, "step": 15800 }, { "epoch": 14.74, "learning_rate": 4.81571828358209e-05, "loss": 0.0004, "step": 15804 }, { "epoch": 14.75, "learning_rate": 4.8156716417910453e-05, "loss": 0.0007, "step": 15808 }, { "epoch": 14.75, "learning_rate": 4.815625e-05, "loss": 0.002, "step": 15812 }, { "epoch": 14.75, "learning_rate": 4.815578358208955e-05, "loss": 0.0007, "step": 15816 }, { "epoch": 14.76, "learning_rate": 4.815531716417911e-05, "loss": 0.0001, "step": 15820 }, { "epoch": 14.76, "learning_rate": 4.815485074626866e-05, "loss": 0.0011, "step": 15824 }, { "epoch": 14.76, "learning_rate": 4.815438432835821e-05, "loss": 0.0041, "step": 15828 }, { "epoch": 14.77, "learning_rate": 4.815391791044776e-05, "loss": 0.0061, "step": 15832 }, { "epoch": 14.77, "learning_rate": 4.815345149253732e-05, "loss": 0.0028, "step": 15836 }, { "epoch": 14.78, "learning_rate": 4.8152985074626866e-05, "loss": 0.0028, "step": 15840 }, { "epoch": 14.78, "learning_rate": 4.815251865671642e-05, "loss": 0.0051, "step": 15844 }, { "epoch": 14.78, "learning_rate": 4.8152052238805975e-05, "loss": 0.0054, "step": 15848 }, { "epoch": 14.79, "learning_rate": 4.8151585820895524e-05, "loss": 0.0016, "step": 15852 }, { "epoch": 14.79, "learning_rate": 4.815111940298508e-05, "loss": 0.0024, "step": 15856 }, { "epoch": 14.79, "learning_rate": 4.815065298507463e-05, "loss": 0.0041, "step": 15860 }, { "epoch": 14.8, "learning_rate": 4.815018656716418e-05, "loss": 0.0025, "step": 15864 }, { "epoch": 14.8, "learning_rate": 4.8149720149253736e-05, "loss": 0.003, "step": 15868 }, { "epoch": 14.81, "learning_rate": 4.8149253731343285e-05, "loss": 0.0043, "step": 15872 }, { "epoch": 14.81, "learning_rate": 4.814878731343283e-05, "loss": 0.014, "step": 15876 }, { "epoch": 14.81, "learning_rate": 4.8148320895522394e-05, "loss": 0.0046, "step": 15880 }, { "epoch": 14.82, "learning_rate": 4.814785447761194e-05, "loss": 0.0089, "step": 15884 }, { "epoch": 14.82, "learning_rate": 4.814738805970149e-05, "loss": 0.0005, "step": 15888 }, { "epoch": 14.82, "learning_rate": 4.8146921641791046e-05, "loss": 0.0025, "step": 15892 }, { "epoch": 14.83, "learning_rate": 4.81464552238806e-05, "loss": 0.0027, "step": 15896 }, { "epoch": 14.83, "learning_rate": 4.814598880597015e-05, "loss": 0.0082, "step": 15900 }, { "epoch": 14.84, "learning_rate": 4.8145522388059704e-05, "loss": 0.0016, "step": 15904 }, { "epoch": 14.84, "learning_rate": 4.814505597014926e-05, "loss": 0.0556, "step": 15908 }, { "epoch": 14.84, "learning_rate": 4.8144589552238807e-05, "loss": 0.0032, "step": 15912 }, { "epoch": 14.85, "learning_rate": 4.814412313432836e-05, "loss": 0.0014, "step": 15916 }, { "epoch": 14.85, "learning_rate": 4.814365671641791e-05, "loss": 0.0013, "step": 15920 }, { "epoch": 14.85, "learning_rate": 4.8143190298507465e-05, "loss": 0.0037, "step": 15924 }, { "epoch": 14.86, "learning_rate": 4.814272388059702e-05, "loss": 0.002, "step": 15928 }, { "epoch": 14.86, "learning_rate": 4.814225746268657e-05, "loss": 0.0032, "step": 15932 }, { "epoch": 14.87, "learning_rate": 4.814179104477612e-05, "loss": 0.0133, "step": 15936 }, { "epoch": 14.87, "learning_rate": 4.814132462686568e-05, "loss": 0.0004, "step": 15940 }, { "epoch": 14.87, "learning_rate": 4.8140858208955225e-05, "loss": 0.0026, "step": 15944 }, { "epoch": 14.88, "learning_rate": 4.814039179104478e-05, "loss": 0.0004, "step": 15948 }, { "epoch": 14.88, "learning_rate": 4.813992537313433e-05, "loss": 0.0043, "step": 15952 }, { "epoch": 14.88, "learning_rate": 4.8139458955223883e-05, "loss": 0.0121, "step": 15956 }, { "epoch": 14.89, "learning_rate": 4.813899253731344e-05, "loss": 0.0007, "step": 15960 }, { "epoch": 14.89, "learning_rate": 4.8138526119402986e-05, "loss": 0.0046, "step": 15964 }, { "epoch": 14.9, "learning_rate": 4.813805970149254e-05, "loss": 0.0008, "step": 15968 }, { "epoch": 14.9, "learning_rate": 4.8137593283582096e-05, "loss": 0.0106, "step": 15972 }, { "epoch": 14.9, "learning_rate": 4.8137126865671644e-05, "loss": 0.0034, "step": 15976 }, { "epoch": 14.91, "learning_rate": 4.813666044776119e-05, "loss": 0.0062, "step": 15980 }, { "epoch": 14.91, "learning_rate": 4.813619402985075e-05, "loss": 0.0059, "step": 15984 }, { "epoch": 14.91, "learning_rate": 4.81357276119403e-05, "loss": 0.0028, "step": 15988 }, { "epoch": 14.92, "learning_rate": 4.813526119402985e-05, "loss": 0.0056, "step": 15992 }, { "epoch": 14.92, "learning_rate": 4.8134794776119405e-05, "loss": 0.0029, "step": 15996 }, { "epoch": 14.93, "learning_rate": 4.813432835820896e-05, "loss": 0.0031, "step": 16000 }, { "epoch": 14.93, "eval_exact_match": 0.7166344294003868, "eval_exec": 0.7553191489361702, "eval_loss": 0.2952513098716736, "eval_runtime": 1078.2591, "eval_samples_per_second": 0.959, "step": 16000 }, { "epoch": 14.93, "learning_rate": 4.813386194029851e-05, "loss": 0.005, "step": 16004 }, { "epoch": 14.93, "learning_rate": 4.813339552238806e-05, "loss": 0.0055, "step": 16008 }, { "epoch": 14.94, "learning_rate": 4.813292910447761e-05, "loss": 0.0085, "step": 16012 }, { "epoch": 14.94, "learning_rate": 4.8132462686567166e-05, "loss": 0.0029, "step": 16016 }, { "epoch": 14.94, "learning_rate": 4.813199626865672e-05, "loss": 0.0035, "step": 16020 }, { "epoch": 14.95, "learning_rate": 4.813152985074627e-05, "loss": 0.0051, "step": 16024 }, { "epoch": 14.95, "learning_rate": 4.8131063432835824e-05, "loss": 0.0015, "step": 16028 }, { "epoch": 14.96, "learning_rate": 4.813059701492538e-05, "loss": 0.005, "step": 16032 }, { "epoch": 14.96, "learning_rate": 4.813013059701493e-05, "loss": 0.0011, "step": 16036 }, { "epoch": 14.96, "learning_rate": 4.8129664179104476e-05, "loss": 0.002, "step": 16040 }, { "epoch": 14.97, "learning_rate": 4.812919776119403e-05, "loss": 0.0034, "step": 16044 }, { "epoch": 14.97, "learning_rate": 4.8128731343283585e-05, "loss": 0.0025, "step": 16048 }, { "epoch": 14.97, "learning_rate": 4.8128264925373133e-05, "loss": 0.002, "step": 16052 }, { "epoch": 14.98, "learning_rate": 4.812779850746269e-05, "loss": 0.0081, "step": 16056 }, { "epoch": 14.98, "learning_rate": 4.812733208955224e-05, "loss": 0.0041, "step": 16060 }, { "epoch": 14.98, "learning_rate": 4.812686567164179e-05, "loss": 0.0021, "step": 16064 }, { "epoch": 14.99, "learning_rate": 4.8126399253731346e-05, "loss": 0.0031, "step": 16068 }, { "epoch": 14.99, "learning_rate": 4.8125932835820894e-05, "loss": 0.0066, "step": 16072 }, { "epoch": 15.0, "learning_rate": 4.812546641791045e-05, "loss": 0.0015, "step": 16076 }, { "epoch": 15.0, "learning_rate": 4.8125000000000004e-05, "loss": 0.0011, "step": 16080 }, { "epoch": 15.0, "learning_rate": 4.812453358208955e-05, "loss": 0.0049, "step": 16084 }, { "epoch": 15.01, "learning_rate": 4.812406716417911e-05, "loss": 0.0013, "step": 16088 }, { "epoch": 15.01, "learning_rate": 4.812360074626866e-05, "loss": 0.0014, "step": 16092 }, { "epoch": 15.01, "learning_rate": 4.812313432835821e-05, "loss": 0.0009, "step": 16096 }, { "epoch": 15.02, "learning_rate": 4.8122667910447765e-05, "loss": 0.0009, "step": 16100 }, { "epoch": 15.02, "learning_rate": 4.812220149253731e-05, "loss": 0.0004, "step": 16104 }, { "epoch": 15.03, "learning_rate": 4.812173507462687e-05, "loss": 0.015, "step": 16108 }, { "epoch": 15.03, "learning_rate": 4.812126865671642e-05, "loss": 0.0018, "step": 16112 }, { "epoch": 15.03, "learning_rate": 4.812080223880597e-05, "loss": 0.002, "step": 16116 }, { "epoch": 15.04, "learning_rate": 4.8120335820895526e-05, "loss": 0.0027, "step": 16120 }, { "epoch": 15.04, "learning_rate": 4.811986940298508e-05, "loss": 0.0018, "step": 16124 }, { "epoch": 15.04, "learning_rate": 4.811940298507463e-05, "loss": 0.0019, "step": 16128 }, { "epoch": 15.05, "learning_rate": 4.811893656716418e-05, "loss": 0.0064, "step": 16132 }, { "epoch": 15.05, "learning_rate": 4.811847014925374e-05, "loss": 0.0063, "step": 16136 }, { "epoch": 15.06, "learning_rate": 4.811800373134329e-05, "loss": 0.0016, "step": 16140 }, { "epoch": 15.06, "learning_rate": 4.8117537313432835e-05, "loss": 0.0042, "step": 16144 }, { "epoch": 15.06, "learning_rate": 4.811707089552239e-05, "loss": 0.0011, "step": 16148 }, { "epoch": 15.07, "learning_rate": 4.8116604477611945e-05, "loss": 0.0019, "step": 16152 }, { "epoch": 15.07, "learning_rate": 4.811613805970149e-05, "loss": 0.0003, "step": 16156 }, { "epoch": 15.07, "learning_rate": 4.811567164179105e-05, "loss": 0.0019, "step": 16160 }, { "epoch": 15.08, "learning_rate": 4.8115205223880596e-05, "loss": 0.0049, "step": 16164 }, { "epoch": 15.08, "learning_rate": 4.811473880597015e-05, "loss": 0.0036, "step": 16168 }, { "epoch": 15.09, "learning_rate": 4.8114272388059706e-05, "loss": 0.0012, "step": 16172 }, { "epoch": 15.09, "learning_rate": 4.8113805970149254e-05, "loss": 0.0004, "step": 16176 }, { "epoch": 15.09, "learning_rate": 4.811333955223881e-05, "loss": 0.0018, "step": 16180 }, { "epoch": 15.1, "learning_rate": 4.8112873134328364e-05, "loss": 0.0003, "step": 16184 }, { "epoch": 15.1, "learning_rate": 4.811240671641791e-05, "loss": 0.003, "step": 16188 }, { "epoch": 15.1, "learning_rate": 4.811194029850746e-05, "loss": 0.0029, "step": 16192 }, { "epoch": 15.11, "learning_rate": 4.811147388059702e-05, "loss": 0.0081, "step": 16196 }, { "epoch": 15.11, "learning_rate": 4.811100746268657e-05, "loss": 0.0094, "step": 16200 }, { "epoch": 15.12, "learning_rate": 4.811054104477612e-05, "loss": 0.0017, "step": 16204 }, { "epoch": 15.12, "learning_rate": 4.811007462686567e-05, "loss": 0.0049, "step": 16208 }, { "epoch": 15.12, "learning_rate": 4.810960820895523e-05, "loss": 0.0044, "step": 16212 }, { "epoch": 15.13, "learning_rate": 4.8109141791044776e-05, "loss": 0.0012, "step": 16216 }, { "epoch": 15.13, "learning_rate": 4.810867537313433e-05, "loss": 0.0013, "step": 16220 }, { "epoch": 15.13, "learning_rate": 4.810820895522388e-05, "loss": 0.0024, "step": 16224 }, { "epoch": 15.14, "learning_rate": 4.8107742537313434e-05, "loss": 0.0004, "step": 16228 }, { "epoch": 15.14, "learning_rate": 4.810727611940299e-05, "loss": 0.0072, "step": 16232 }, { "epoch": 15.15, "learning_rate": 4.810680970149254e-05, "loss": 0.0043, "step": 16236 }, { "epoch": 15.15, "learning_rate": 4.810634328358209e-05, "loss": 0.0004, "step": 16240 }, { "epoch": 15.15, "learning_rate": 4.810587686567165e-05, "loss": 0.0024, "step": 16244 }, { "epoch": 15.16, "learning_rate": 4.8105410447761195e-05, "loss": 0.0038, "step": 16248 }, { "epoch": 15.16, "learning_rate": 4.810494402985074e-05, "loss": 0.0016, "step": 16252 }, { "epoch": 15.16, "learning_rate": 4.8104477611940305e-05, "loss": 0.0045, "step": 16256 }, { "epoch": 15.17, "learning_rate": 4.810401119402985e-05, "loss": 0.0002, "step": 16260 }, { "epoch": 15.17, "learning_rate": 4.810354477611941e-05, "loss": 0.0052, "step": 16264 }, { "epoch": 15.18, "learning_rate": 4.8103078358208956e-05, "loss": 0.0047, "step": 16268 }, { "epoch": 15.18, "learning_rate": 4.810261194029851e-05, "loss": 0.0022, "step": 16272 }, { "epoch": 15.18, "learning_rate": 4.8102145522388066e-05, "loss": 0.0205, "step": 16276 }, { "epoch": 15.19, "learning_rate": 4.8101679104477614e-05, "loss": 0.0053, "step": 16280 }, { "epoch": 15.19, "learning_rate": 4.810121268656716e-05, "loss": 0.0004, "step": 16284 }, { "epoch": 15.19, "learning_rate": 4.8100746268656724e-05, "loss": 0.0042, "step": 16288 }, { "epoch": 15.2, "learning_rate": 4.810027985074627e-05, "loss": 0.0027, "step": 16292 }, { "epoch": 15.2, "learning_rate": 4.809981343283582e-05, "loss": 0.0143, "step": 16296 }, { "epoch": 15.21, "learning_rate": 4.8099347014925375e-05, "loss": 0.0028, "step": 16300 }, { "epoch": 15.21, "learning_rate": 4.809888059701493e-05, "loss": 0.0093, "step": 16304 }, { "epoch": 15.21, "learning_rate": 4.809841417910448e-05, "loss": 0.0039, "step": 16308 }, { "epoch": 15.22, "learning_rate": 4.809794776119403e-05, "loss": 0.0009, "step": 16312 }, { "epoch": 15.22, "learning_rate": 4.809748134328359e-05, "loss": 0.0022, "step": 16316 }, { "epoch": 15.22, "learning_rate": 4.8097014925373136e-05, "loss": 0.0019, "step": 16320 }, { "epoch": 15.23, "learning_rate": 4.809654850746269e-05, "loss": 0.0058, "step": 16324 }, { "epoch": 15.23, "learning_rate": 4.809608208955224e-05, "loss": 0.0013, "step": 16328 }, { "epoch": 15.24, "learning_rate": 4.8095615671641794e-05, "loss": 0.0025, "step": 16332 }, { "epoch": 15.24, "learning_rate": 4.809514925373135e-05, "loss": 0.0045, "step": 16336 }, { "epoch": 15.24, "learning_rate": 4.80946828358209e-05, "loss": 0.0026, "step": 16340 }, { "epoch": 15.25, "learning_rate": 4.8094216417910445e-05, "loss": 0.0062, "step": 16344 }, { "epoch": 15.25, "learning_rate": 4.809375000000001e-05, "loss": 0.0044, "step": 16348 }, { "epoch": 15.25, "learning_rate": 4.8093283582089555e-05, "loss": 0.0026, "step": 16352 }, { "epoch": 15.26, "learning_rate": 4.80928171641791e-05, "loss": 0.003, "step": 16356 }, { "epoch": 15.26, "learning_rate": 4.809235074626866e-05, "loss": 0.0039, "step": 16360 }, { "epoch": 15.26, "learning_rate": 4.809188432835821e-05, "loss": 0.012, "step": 16364 }, { "epoch": 15.27, "learning_rate": 4.809141791044776e-05, "loss": 0.0023, "step": 16368 }, { "epoch": 15.27, "learning_rate": 4.8090951492537316e-05, "loss": 0.0057, "step": 16372 }, { "epoch": 15.28, "learning_rate": 4.809048507462687e-05, "loss": 0.0011, "step": 16376 }, { "epoch": 15.28, "learning_rate": 4.809001865671642e-05, "loss": 0.009, "step": 16380 }, { "epoch": 15.28, "learning_rate": 4.8089552238805974e-05, "loss": 0.0026, "step": 16384 }, { "epoch": 15.29, "learning_rate": 4.808908582089552e-05, "loss": 0.0005, "step": 16388 }, { "epoch": 15.29, "learning_rate": 4.808861940298508e-05, "loss": 0.0054, "step": 16392 }, { "epoch": 15.29, "learning_rate": 4.808815298507463e-05, "loss": 0.0035, "step": 16396 }, { "epoch": 15.3, "learning_rate": 4.808768656716418e-05, "loss": 0.0049, "step": 16400 }, { "epoch": 15.3, "learning_rate": 4.808722014925373e-05, "loss": 0.0028, "step": 16404 }, { "epoch": 15.31, "learning_rate": 4.808675373134329e-05, "loss": 0.0002, "step": 16408 }, { "epoch": 15.31, "learning_rate": 4.808628731343284e-05, "loss": 0.0003, "step": 16412 }, { "epoch": 15.31, "learning_rate": 4.8085820895522386e-05, "loss": 0.0014, "step": 16416 }, { "epoch": 15.32, "learning_rate": 4.808535447761194e-05, "loss": 0.009, "step": 16420 }, { "epoch": 15.32, "learning_rate": 4.8084888059701496e-05, "loss": 0.0002, "step": 16424 }, { "epoch": 15.32, "learning_rate": 4.808442164179105e-05, "loss": 0.0004, "step": 16428 }, { "epoch": 15.33, "learning_rate": 4.80839552238806e-05, "loss": 0.0043, "step": 16432 }, { "epoch": 15.33, "learning_rate": 4.8083488805970154e-05, "loss": 0.0091, "step": 16436 }, { "epoch": 15.34, "learning_rate": 4.808302238805971e-05, "loss": 0.0007, "step": 16440 }, { "epoch": 15.34, "learning_rate": 4.808255597014926e-05, "loss": 0.0021, "step": 16444 }, { "epoch": 15.34, "learning_rate": 4.8082089552238805e-05, "loss": 0.0036, "step": 16448 }, { "epoch": 15.35, "learning_rate": 4.808162313432836e-05, "loss": 0.0018, "step": 16452 }, { "epoch": 15.35, "learning_rate": 4.8081156716417915e-05, "loss": 0.0222, "step": 16456 }, { "epoch": 15.35, "learning_rate": 4.808069029850746e-05, "loss": 0.0022, "step": 16460 }, { "epoch": 15.36, "learning_rate": 4.808022388059702e-05, "loss": 0.0023, "step": 16464 }, { "epoch": 15.36, "learning_rate": 4.807975746268657e-05, "loss": 0.0022, "step": 16468 }, { "epoch": 15.37, "learning_rate": 4.807929104477612e-05, "loss": 0.0056, "step": 16472 }, { "epoch": 15.37, "learning_rate": 4.8078824626865676e-05, "loss": 0.0005, "step": 16476 }, { "epoch": 15.37, "learning_rate": 4.8078358208955224e-05, "loss": 0.0017, "step": 16480 }, { "epoch": 15.38, "learning_rate": 4.807789179104478e-05, "loss": 0.0005, "step": 16484 }, { "epoch": 15.38, "learning_rate": 4.8077425373134334e-05, "loss": 0.0085, "step": 16488 }, { "epoch": 15.38, "learning_rate": 4.807695895522388e-05, "loss": 0.0005, "step": 16492 }, { "epoch": 15.39, "learning_rate": 4.807649253731343e-05, "loss": 0.0032, "step": 16496 }, { "epoch": 15.39, "learning_rate": 4.807602611940299e-05, "loss": 0.0007, "step": 16500 }, { "epoch": 15.39, "eval_exact_match": 0.7272727272727273, "eval_exec": 0.7669245647969052, "eval_loss": 0.3177631199359894, "eval_runtime": 1071.5811, "eval_samples_per_second": 0.965, "step": 16500 }, { "epoch": 15.4, "learning_rate": 4.807555970149254e-05, "loss": 0.0019, "step": 16504 }, { "epoch": 15.4, "learning_rate": 4.807509328358209e-05, "loss": 0.0072, "step": 16508 }, { "epoch": 15.4, "learning_rate": 4.807462686567164e-05, "loss": 0.0004, "step": 16512 }, { "epoch": 15.41, "learning_rate": 4.80741604477612e-05, "loss": 0.0006, "step": 16516 }, { "epoch": 15.41, "learning_rate": 4.8073694029850746e-05, "loss": 0.0023, "step": 16520 }, { "epoch": 15.41, "learning_rate": 4.80732276119403e-05, "loss": 0.0017, "step": 16524 }, { "epoch": 15.42, "learning_rate": 4.8072761194029856e-05, "loss": 0.0038, "step": 16528 }, { "epoch": 15.42, "learning_rate": 4.8072294776119404e-05, "loss": 0.0014, "step": 16532 }, { "epoch": 15.43, "learning_rate": 4.807182835820896e-05, "loss": 0.0032, "step": 16536 }, { "epoch": 15.43, "learning_rate": 4.807136194029851e-05, "loss": 0.0007, "step": 16540 }, { "epoch": 15.43, "learning_rate": 4.807089552238806e-05, "loss": 0.0019, "step": 16544 }, { "epoch": 15.44, "learning_rate": 4.807042910447762e-05, "loss": 0.0026, "step": 16548 }, { "epoch": 15.44, "learning_rate": 4.8069962686567165e-05, "loss": 0.004, "step": 16552 }, { "epoch": 15.44, "learning_rate": 4.806949626865671e-05, "loss": 0.0006, "step": 16556 }, { "epoch": 15.45, "learning_rate": 4.8069029850746275e-05, "loss": 0.0016, "step": 16560 }, { "epoch": 15.45, "learning_rate": 4.806856343283582e-05, "loss": 0.0025, "step": 16564 }, { "epoch": 15.46, "learning_rate": 4.806809701492537e-05, "loss": 0.0006, "step": 16568 }, { "epoch": 15.46, "learning_rate": 4.8067630597014926e-05, "loss": 0.0008, "step": 16572 }, { "epoch": 15.46, "learning_rate": 4.806716417910448e-05, "loss": 0.0043, "step": 16576 }, { "epoch": 15.47, "learning_rate": 4.806669776119403e-05, "loss": 0.0013, "step": 16580 }, { "epoch": 15.47, "learning_rate": 4.8066231343283584e-05, "loss": 0.0018, "step": 16584 }, { "epoch": 15.47, "learning_rate": 4.806576492537314e-05, "loss": 0.0001, "step": 16588 }, { "epoch": 15.48, "learning_rate": 4.8065298507462694e-05, "loss": 0.0008, "step": 16592 }, { "epoch": 15.48, "learning_rate": 4.806483208955224e-05, "loss": 0.0017, "step": 16596 }, { "epoch": 15.49, "learning_rate": 4.806436567164179e-05, "loss": 0.0005, "step": 16600 }, { "epoch": 15.49, "learning_rate": 4.8063899253731345e-05, "loss": 0.0004, "step": 16604 }, { "epoch": 15.49, "learning_rate": 4.80634328358209e-05, "loss": 0.0003, "step": 16608 }, { "epoch": 15.5, "learning_rate": 4.806296641791045e-05, "loss": 0.0063, "step": 16612 }, { "epoch": 15.5, "learning_rate": 4.80625e-05, "loss": 0.0138, "step": 16616 }, { "epoch": 15.5, "learning_rate": 4.806203358208956e-05, "loss": 0.0003, "step": 16620 }, { "epoch": 15.51, "learning_rate": 4.8061567164179106e-05, "loss": 0.0004, "step": 16624 }, { "epoch": 15.51, "learning_rate": 4.806110074626866e-05, "loss": 0.0017, "step": 16628 }, { "epoch": 15.51, "learning_rate": 4.806063432835821e-05, "loss": 0.0007, "step": 16632 }, { "epoch": 15.52, "learning_rate": 4.8060167910447764e-05, "loss": 0.0026, "step": 16636 }, { "epoch": 15.52, "learning_rate": 4.805970149253732e-05, "loss": 0.0024, "step": 16640 }, { "epoch": 15.53, "learning_rate": 4.805923507462687e-05, "loss": 0.0096, "step": 16644 }, { "epoch": 15.53, "learning_rate": 4.805876865671642e-05, "loss": 0.0074, "step": 16648 }, { "epoch": 15.53, "learning_rate": 4.8058302238805977e-05, "loss": 0.0007, "step": 16652 }, { "epoch": 15.54, "learning_rate": 4.8057835820895525e-05, "loss": 0.0049, "step": 16656 }, { "epoch": 15.54, "learning_rate": 4.805736940298507e-05, "loss": 0.001, "step": 16660 }, { "epoch": 15.54, "learning_rate": 4.805690298507463e-05, "loss": 0.007, "step": 16664 }, { "epoch": 15.55, "learning_rate": 4.805643656716418e-05, "loss": 0.0027, "step": 16668 }, { "epoch": 15.55, "learning_rate": 4.805597014925373e-05, "loss": 0.0018, "step": 16672 }, { "epoch": 15.56, "learning_rate": 4.8055503731343286e-05, "loss": 0.0017, "step": 16676 }, { "epoch": 15.56, "learning_rate": 4.805503731343284e-05, "loss": 0.0009, "step": 16680 }, { "epoch": 15.56, "learning_rate": 4.805457089552239e-05, "loss": 0.005, "step": 16684 }, { "epoch": 15.57, "learning_rate": 4.8054104477611944e-05, "loss": 0.0016, "step": 16688 }, { "epoch": 15.57, "learning_rate": 4.805363805970149e-05, "loss": 0.0005, "step": 16692 }, { "epoch": 15.57, "learning_rate": 4.805317164179105e-05, "loss": 0.0041, "step": 16696 }, { "epoch": 15.58, "learning_rate": 4.80527052238806e-05, "loss": 0.0019, "step": 16700 }, { "epoch": 15.58, "learning_rate": 4.805223880597015e-05, "loss": 0.0024, "step": 16704 }, { "epoch": 15.59, "learning_rate": 4.8051772388059705e-05, "loss": 0.0122, "step": 16708 }, { "epoch": 15.59, "learning_rate": 4.805130597014926e-05, "loss": 0.0028, "step": 16712 }, { "epoch": 15.59, "learning_rate": 4.805083955223881e-05, "loss": 0.003, "step": 16716 }, { "epoch": 15.6, "learning_rate": 4.8050373134328356e-05, "loss": 0.0032, "step": 16720 }, { "epoch": 15.6, "learning_rate": 4.804990671641791e-05, "loss": 0.0014, "step": 16724 }, { "epoch": 15.6, "learning_rate": 4.8049440298507466e-05, "loss": 0.0014, "step": 16728 }, { "epoch": 15.61, "learning_rate": 4.8048973880597014e-05, "loss": 0.001, "step": 16732 }, { "epoch": 15.61, "learning_rate": 4.804850746268657e-05, "loss": 0.0025, "step": 16736 }, { "epoch": 15.62, "learning_rate": 4.8048041044776124e-05, "loss": 0.0017, "step": 16740 }, { "epoch": 15.62, "learning_rate": 4.804757462686567e-05, "loss": 0.0124, "step": 16744 }, { "epoch": 15.62, "learning_rate": 4.8047108208955227e-05, "loss": 0.0022, "step": 16748 }, { "epoch": 15.63, "learning_rate": 4.8046641791044775e-05, "loss": 0.0093, "step": 16752 }, { "epoch": 15.63, "learning_rate": 4.8046175373134336e-05, "loss": 0.0019, "step": 16756 }, { "epoch": 15.63, "learning_rate": 4.8045708955223885e-05, "loss": 0.003, "step": 16760 }, { "epoch": 15.64, "learning_rate": 4.804524253731343e-05, "loss": 0.0019, "step": 16764 }, { "epoch": 15.64, "learning_rate": 4.804477611940299e-05, "loss": 0.0017, "step": 16768 }, { "epoch": 15.65, "learning_rate": 4.804430970149254e-05, "loss": 0.0004, "step": 16772 }, { "epoch": 15.65, "learning_rate": 4.804384328358209e-05, "loss": 0.0017, "step": 16776 }, { "epoch": 15.65, "learning_rate": 4.8043376865671646e-05, "loss": 0.0031, "step": 16780 }, { "epoch": 15.66, "learning_rate": 4.8042910447761194e-05, "loss": 0.0042, "step": 16784 }, { "epoch": 15.66, "learning_rate": 4.804244402985075e-05, "loss": 0.002, "step": 16788 }, { "epoch": 15.66, "learning_rate": 4.8041977611940303e-05, "loss": 0.0007, "step": 16792 }, { "epoch": 15.67, "learning_rate": 4.804151119402985e-05, "loss": 0.0008, "step": 16796 }, { "epoch": 15.67, "learning_rate": 4.8041044776119407e-05, "loss": 0.0008, "step": 16800 }, { "epoch": 15.68, "learning_rate": 4.804057835820896e-05, "loss": 0.0017, "step": 16804 }, { "epoch": 15.68, "learning_rate": 4.804011194029851e-05, "loss": 0.0055, "step": 16808 }, { "epoch": 15.68, "learning_rate": 4.803964552238806e-05, "loss": 0.0021, "step": 16812 }, { "epoch": 15.69, "learning_rate": 4.803917910447762e-05, "loss": 0.0046, "step": 16816 }, { "epoch": 15.69, "learning_rate": 4.803871268656717e-05, "loss": 0.0013, "step": 16820 }, { "epoch": 15.69, "learning_rate": 4.8038246268656716e-05, "loss": 0.0049, "step": 16824 }, { "epoch": 15.7, "learning_rate": 4.803777985074627e-05, "loss": 0.0023, "step": 16828 }, { "epoch": 15.7, "learning_rate": 4.8037313432835825e-05, "loss": 0.0031, "step": 16832 }, { "epoch": 15.71, "learning_rate": 4.8036847014925374e-05, "loss": 0.0127, "step": 16836 }, { "epoch": 15.71, "learning_rate": 4.803638059701493e-05, "loss": 0.0041, "step": 16840 }, { "epoch": 15.71, "learning_rate": 4.803591417910448e-05, "loss": 0.0102, "step": 16844 }, { "epoch": 15.72, "learning_rate": 4.803544776119403e-05, "loss": 0.0031, "step": 16848 }, { "epoch": 15.72, "learning_rate": 4.8034981343283586e-05, "loss": 0.0038, "step": 16852 }, { "epoch": 15.72, "learning_rate": 4.8034514925373135e-05, "loss": 0.0079, "step": 16856 }, { "epoch": 15.73, "learning_rate": 4.803404850746269e-05, "loss": 0.0113, "step": 16860 }, { "epoch": 15.73, "learning_rate": 4.8033582089552244e-05, "loss": 0.0009, "step": 16864 }, { "epoch": 15.73, "learning_rate": 4.803311567164179e-05, "loss": 0.0019, "step": 16868 }, { "epoch": 15.74, "learning_rate": 4.803264925373134e-05, "loss": 0.0053, "step": 16872 }, { "epoch": 15.74, "learning_rate": 4.80321828358209e-05, "loss": 0.0009, "step": 16876 }, { "epoch": 15.75, "learning_rate": 4.803171641791045e-05, "loss": 0.001, "step": 16880 }, { "epoch": 15.75, "learning_rate": 4.803125e-05, "loss": 0.0125, "step": 16884 }, { "epoch": 15.75, "learning_rate": 4.8030783582089553e-05, "loss": 0.0018, "step": 16888 }, { "epoch": 15.76, "learning_rate": 4.803031716417911e-05, "loss": 0.0032, "step": 16892 }, { "epoch": 15.76, "learning_rate": 4.8029850746268657e-05, "loss": 0.0013, "step": 16896 }, { "epoch": 15.76, "learning_rate": 4.802938432835821e-05, "loss": 0.0017, "step": 16900 }, { "epoch": 15.77, "learning_rate": 4.802891791044776e-05, "loss": 0.0011, "step": 16904 }, { "epoch": 15.77, "learning_rate": 4.8028451492537314e-05, "loss": 0.0013, "step": 16908 }, { "epoch": 15.78, "learning_rate": 4.802798507462687e-05, "loss": 0.0009, "step": 16912 }, { "epoch": 15.78, "learning_rate": 4.802751865671642e-05, "loss": 0.0007, "step": 16916 }, { "epoch": 15.78, "learning_rate": 4.802705223880597e-05, "loss": 0.001, "step": 16920 }, { "epoch": 15.79, "learning_rate": 4.802658582089553e-05, "loss": 0.0007, "step": 16924 }, { "epoch": 15.79, "learning_rate": 4.8026119402985075e-05, "loss": 0.0075, "step": 16928 }, { "epoch": 15.79, "learning_rate": 4.802565298507463e-05, "loss": 0.0008, "step": 16932 }, { "epoch": 15.8, "learning_rate": 4.8025186567164185e-05, "loss": 0.0015, "step": 16936 }, { "epoch": 15.8, "learning_rate": 4.8024720149253733e-05, "loss": 0.0034, "step": 16940 }, { "epoch": 15.81, "learning_rate": 4.802425373134329e-05, "loss": 0.0054, "step": 16944 }, { "epoch": 15.81, "learning_rate": 4.8023787313432836e-05, "loss": 0.0013, "step": 16948 }, { "epoch": 15.81, "learning_rate": 4.802332089552239e-05, "loss": 0.001, "step": 16952 }, { "epoch": 15.82, "learning_rate": 4.8022854477611946e-05, "loss": 0.0023, "step": 16956 }, { "epoch": 15.82, "learning_rate": 4.8022388059701494e-05, "loss": 0.0005, "step": 16960 }, { "epoch": 15.82, "learning_rate": 4.802192164179104e-05, "loss": 0.0018, "step": 16964 }, { "epoch": 15.83, "learning_rate": 4.8021455223880604e-05, "loss": 0.0038, "step": 16968 }, { "epoch": 15.83, "learning_rate": 4.802098880597015e-05, "loss": 0.0024, "step": 16972 }, { "epoch": 15.84, "learning_rate": 4.80205223880597e-05, "loss": 0.0078, "step": 16976 }, { "epoch": 15.84, "learning_rate": 4.8020055970149255e-05, "loss": 0.0027, "step": 16980 }, { "epoch": 15.84, "learning_rate": 4.801958955223881e-05, "loss": 0.0014, "step": 16984 }, { "epoch": 15.85, "learning_rate": 4.801912313432836e-05, "loss": 0.0037, "step": 16988 }, { "epoch": 15.85, "learning_rate": 4.801865671641791e-05, "loss": 0.0042, "step": 16992 }, { "epoch": 15.85, "learning_rate": 4.801819029850747e-05, "loss": 0.0025, "step": 16996 }, { "epoch": 15.86, "learning_rate": 4.8017723880597016e-05, "loss": 0.0119, "step": 17000 }, { "epoch": 15.86, "eval_exact_match": 0.7321083172147002, "eval_exec": 0.7611218568665378, "eval_loss": 0.2985568642616272, "eval_runtime": 1073.3502, "eval_samples_per_second": 0.963, "step": 17000 }, { "epoch": 15.86, "learning_rate": 4.801725746268657e-05, "loss": 0.0016, "step": 17004 }, { "epoch": 15.87, "learning_rate": 4.801679104477612e-05, "loss": 0.0039, "step": 17008 }, { "epoch": 15.87, "learning_rate": 4.8016324626865674e-05, "loss": 0.0053, "step": 17012 }, { "epoch": 15.87, "learning_rate": 4.801585820895523e-05, "loss": 0.0005, "step": 17016 }, { "epoch": 15.88, "learning_rate": 4.801539179104478e-05, "loss": 0.0023, "step": 17020 }, { "epoch": 15.88, "learning_rate": 4.8014925373134325e-05, "loss": 0.0009, "step": 17024 }, { "epoch": 15.88, "learning_rate": 4.801445895522389e-05, "loss": 0.0106, "step": 17028 }, { "epoch": 15.89, "learning_rate": 4.8013992537313435e-05, "loss": 0.0086, "step": 17032 }, { "epoch": 15.89, "learning_rate": 4.8013526119402983e-05, "loss": 0.0005, "step": 17036 }, { "epoch": 15.9, "learning_rate": 4.801305970149254e-05, "loss": 0.0011, "step": 17040 }, { "epoch": 15.9, "learning_rate": 4.801259328358209e-05, "loss": 0.0035, "step": 17044 }, { "epoch": 15.9, "learning_rate": 4.801212686567164e-05, "loss": 0.0006, "step": 17048 }, { "epoch": 15.91, "learning_rate": 4.8011660447761196e-05, "loss": 0.0013, "step": 17052 }, { "epoch": 15.91, "learning_rate": 4.801119402985075e-05, "loss": 0.005, "step": 17056 }, { "epoch": 15.91, "learning_rate": 4.80107276119403e-05, "loss": 0.0029, "step": 17060 }, { "epoch": 15.92, "learning_rate": 4.8010261194029854e-05, "loss": 0.0006, "step": 17064 }, { "epoch": 15.92, "learning_rate": 4.80097947761194e-05, "loss": 0.0028, "step": 17068 }, { "epoch": 15.93, "learning_rate": 4.800932835820896e-05, "loss": 0.0086, "step": 17072 }, { "epoch": 15.93, "learning_rate": 4.800886194029851e-05, "loss": 0.0046, "step": 17076 }, { "epoch": 15.93, "learning_rate": 4.800839552238806e-05, "loss": 0.0011, "step": 17080 }, { "epoch": 15.94, "learning_rate": 4.8007929104477615e-05, "loss": 0.0025, "step": 17084 }, { "epoch": 15.94, "learning_rate": 4.800746268656717e-05, "loss": 0.0012, "step": 17088 }, { "epoch": 15.94, "learning_rate": 4.800699626865672e-05, "loss": 0.006, "step": 17092 }, { "epoch": 15.95, "learning_rate": 4.800652985074627e-05, "loss": 0.0031, "step": 17096 }, { "epoch": 15.95, "learning_rate": 4.800606343283582e-05, "loss": 0.0008, "step": 17100 }, { "epoch": 15.96, "learning_rate": 4.8005597014925376e-05, "loss": 0.0068, "step": 17104 }, { "epoch": 15.96, "learning_rate": 4.800513059701493e-05, "loss": 0.0005, "step": 17108 }, { "epoch": 15.96, "learning_rate": 4.800466417910448e-05, "loss": 0.0005, "step": 17112 }, { "epoch": 15.97, "learning_rate": 4.8004197761194034e-05, "loss": 0.0024, "step": 17116 }, { "epoch": 15.97, "learning_rate": 4.800373134328359e-05, "loss": 0.0016, "step": 17120 }, { "epoch": 15.97, "learning_rate": 4.800326492537314e-05, "loss": 0.0004, "step": 17124 }, { "epoch": 15.98, "learning_rate": 4.8002798507462685e-05, "loss": 0.0037, "step": 17128 }, { "epoch": 15.98, "learning_rate": 4.800233208955224e-05, "loss": 0.0018, "step": 17132 }, { "epoch": 15.98, "learning_rate": 4.8001865671641795e-05, "loss": 0.0022, "step": 17136 }, { "epoch": 15.99, "learning_rate": 4.800139925373134e-05, "loss": 0.0007, "step": 17140 }, { "epoch": 15.99, "learning_rate": 4.80009328358209e-05, "loss": 0.0013, "step": 17144 }, { "epoch": 16.0, "learning_rate": 4.800046641791045e-05, "loss": 0.0039, "step": 17148 }, { "epoch": 16.0, "learning_rate": 4.8e-05, "loss": 0.0049, "step": 17152 }, { "epoch": 16.0, "learning_rate": 4.7999533582089556e-05, "loss": 0.0026, "step": 17156 }, { "epoch": 16.01, "learning_rate": 4.7999067164179104e-05, "loss": 0.0088, "step": 17160 }, { "epoch": 16.01, "learning_rate": 4.799860074626866e-05, "loss": 0.0007, "step": 17164 }, { "epoch": 16.01, "learning_rate": 4.7998134328358214e-05, "loss": 0.0012, "step": 17168 }, { "epoch": 16.02, "learning_rate": 4.799766791044776e-05, "loss": 0.0009, "step": 17172 }, { "epoch": 16.02, "learning_rate": 4.799720149253731e-05, "loss": 0.0035, "step": 17176 }, { "epoch": 16.03, "learning_rate": 4.799673507462687e-05, "loss": 0.0006, "step": 17180 }, { "epoch": 16.03, "learning_rate": 4.799626865671642e-05, "loss": 0.0043, "step": 17184 }, { "epoch": 16.03, "learning_rate": 4.799580223880597e-05, "loss": 0.0002, "step": 17188 }, { "epoch": 16.04, "learning_rate": 4.799533582089552e-05, "loss": 0.0001, "step": 17192 }, { "epoch": 16.04, "learning_rate": 4.799486940298508e-05, "loss": 0.0037, "step": 17196 }, { "epoch": 16.04, "learning_rate": 4.7994402985074626e-05, "loss": 0.0033, "step": 17200 }, { "epoch": 16.05, "learning_rate": 4.799393656716418e-05, "loss": 0.0002, "step": 17204 }, { "epoch": 16.05, "learning_rate": 4.7993470149253736e-05, "loss": 0.0011, "step": 17208 }, { "epoch": 16.06, "learning_rate": 4.7993003731343284e-05, "loss": 0.0001, "step": 17212 }, { "epoch": 16.06, "learning_rate": 4.799253731343284e-05, "loss": 0.0005, "step": 17216 }, { "epoch": 16.06, "learning_rate": 4.799207089552239e-05, "loss": 0.0023, "step": 17220 }, { "epoch": 16.07, "learning_rate": 4.799160447761194e-05, "loss": 0.0112, "step": 17224 }, { "epoch": 16.07, "learning_rate": 4.79911380597015e-05, "loss": 0.0062, "step": 17228 }, { "epoch": 16.07, "learning_rate": 4.7990671641791045e-05, "loss": 0.001, "step": 17232 }, { "epoch": 16.08, "learning_rate": 4.799020522388059e-05, "loss": 0.0012, "step": 17236 }, { "epoch": 16.08, "learning_rate": 4.7989738805970155e-05, "loss": 0.0004, "step": 17240 }, { "epoch": 16.09, "learning_rate": 4.79892723880597e-05, "loss": 0.0001, "step": 17244 }, { "epoch": 16.09, "learning_rate": 4.798880597014926e-05, "loss": 0.0014, "step": 17248 }, { "epoch": 16.09, "learning_rate": 4.7988339552238806e-05, "loss": 0.003, "step": 17252 }, { "epoch": 16.1, "learning_rate": 4.798787313432836e-05, "loss": 0.0062, "step": 17256 }, { "epoch": 16.1, "learning_rate": 4.7987406716417916e-05, "loss": 0.0018, "step": 17260 }, { "epoch": 16.1, "learning_rate": 4.7986940298507464e-05, "loss": 0.001, "step": 17264 }, { "epoch": 16.11, "learning_rate": 4.798647388059702e-05, "loss": 0.0036, "step": 17268 }, { "epoch": 16.11, "learning_rate": 4.7986007462686574e-05, "loss": 0.0008, "step": 17272 }, { "epoch": 16.12, "learning_rate": 4.798554104477612e-05, "loss": 0.0004, "step": 17276 }, { "epoch": 16.12, "learning_rate": 4.798507462686567e-05, "loss": 0.0041, "step": 17280 }, { "epoch": 16.12, "learning_rate": 4.7984608208955225e-05, "loss": 0.0003, "step": 17284 }, { "epoch": 16.13, "learning_rate": 4.798414179104478e-05, "loss": 0.0012, "step": 17288 }, { "epoch": 16.13, "learning_rate": 4.798367537313433e-05, "loss": 0.0057, "step": 17292 }, { "epoch": 16.13, "learning_rate": 4.798320895522388e-05, "loss": 0.0005, "step": 17296 }, { "epoch": 16.14, "learning_rate": 4.798274253731344e-05, "loss": 0.0002, "step": 17300 }, { "epoch": 16.14, "learning_rate": 4.7982276119402986e-05, "loss": 0.0036, "step": 17304 }, { "epoch": 16.15, "learning_rate": 4.798180970149254e-05, "loss": 0.0022, "step": 17308 }, { "epoch": 16.15, "learning_rate": 4.798134328358209e-05, "loss": 0.0042, "step": 17312 }, { "epoch": 16.15, "learning_rate": 4.7980876865671644e-05, "loss": 0.0023, "step": 17316 }, { "epoch": 16.16, "learning_rate": 4.79804104477612e-05, "loss": 0.0008, "step": 17320 }, { "epoch": 16.16, "learning_rate": 4.797994402985075e-05, "loss": 0.0009, "step": 17324 }, { "epoch": 16.16, "learning_rate": 4.79794776119403e-05, "loss": 0.0075, "step": 17328 }, { "epoch": 16.17, "learning_rate": 4.797901119402986e-05, "loss": 0.0036, "step": 17332 }, { "epoch": 16.17, "learning_rate": 4.7978544776119405e-05, "loss": 0.0057, "step": 17336 }, { "epoch": 16.18, "learning_rate": 4.797807835820895e-05, "loss": 0.0024, "step": 17340 }, { "epoch": 16.18, "learning_rate": 4.797761194029851e-05, "loss": 0.0023, "step": 17344 }, { "epoch": 16.18, "learning_rate": 4.797714552238806e-05, "loss": 0.0398, "step": 17348 }, { "epoch": 16.19, "learning_rate": 4.797667910447761e-05, "loss": 0.0049, "step": 17352 }, { "epoch": 16.19, "learning_rate": 4.7976212686567166e-05, "loss": 0.0004, "step": 17356 }, { "epoch": 16.19, "learning_rate": 4.797574626865672e-05, "loss": 0.0033, "step": 17360 }, { "epoch": 16.2, "learning_rate": 4.797527985074627e-05, "loss": 0.0007, "step": 17364 }, { "epoch": 16.2, "learning_rate": 4.7974813432835824e-05, "loss": 0.0017, "step": 17368 }, { "epoch": 16.21, "learning_rate": 4.797434701492537e-05, "loss": 0.0024, "step": 17372 }, { "epoch": 16.21, "learning_rate": 4.797388059701493e-05, "loss": 0.0026, "step": 17376 }, { "epoch": 16.21, "learning_rate": 4.797341417910448e-05, "loss": 0.0003, "step": 17380 }, { "epoch": 16.22, "learning_rate": 4.797294776119403e-05, "loss": 0.0032, "step": 17384 }, { "epoch": 16.22, "learning_rate": 4.7972481343283585e-05, "loss": 0.0009, "step": 17388 }, { "epoch": 16.22, "learning_rate": 4.797201492537314e-05, "loss": 0.002, "step": 17392 }, { "epoch": 16.23, "learning_rate": 4.797154850746269e-05, "loss": 0.001, "step": 17396 }, { "epoch": 16.23, "learning_rate": 4.7971082089552236e-05, "loss": 0.0016, "step": 17400 }, { "epoch": 16.24, "learning_rate": 4.797061567164179e-05, "loss": 0.0039, "step": 17404 }, { "epoch": 16.24, "learning_rate": 4.7970149253731346e-05, "loss": 0.002, "step": 17408 }, { "epoch": 16.24, "learning_rate": 4.79696828358209e-05, "loss": 0.0006, "step": 17412 }, { "epoch": 16.25, "learning_rate": 4.796921641791045e-05, "loss": 0.0004, "step": 17416 }, { "epoch": 16.25, "learning_rate": 4.7968750000000004e-05, "loss": 0.0054, "step": 17420 }, { "epoch": 16.25, "learning_rate": 4.796828358208956e-05, "loss": 0.0098, "step": 17424 }, { "epoch": 16.26, "learning_rate": 4.796781716417911e-05, "loss": 0.0011, "step": 17428 }, { "epoch": 16.26, "learning_rate": 4.7967350746268655e-05, "loss": 0.0047, "step": 17432 }, { "epoch": 16.26, "learning_rate": 4.796688432835822e-05, "loss": 0.007, "step": 17436 }, { "epoch": 16.27, "learning_rate": 4.7966417910447765e-05, "loss": 0.0004, "step": 17440 }, { "epoch": 16.27, "learning_rate": 4.796595149253731e-05, "loss": 0.0062, "step": 17444 }, { "epoch": 16.28, "learning_rate": 4.796548507462687e-05, "loss": 0.0034, "step": 17448 }, { "epoch": 16.28, "learning_rate": 4.796501865671642e-05, "loss": 0.0049, "step": 17452 }, { "epoch": 16.28, "learning_rate": 4.796455223880597e-05, "loss": 0.0079, "step": 17456 }, { "epoch": 16.29, "learning_rate": 4.7964085820895526e-05, "loss": 0.0007, "step": 17460 }, { "epoch": 16.29, "learning_rate": 4.7963619402985074e-05, "loss": 0.0051, "step": 17464 }, { "epoch": 16.29, "learning_rate": 4.796315298507463e-05, "loss": 0.0125, "step": 17468 }, { "epoch": 16.3, "learning_rate": 4.7962686567164184e-05, "loss": 0.0018, "step": 17472 }, { "epoch": 16.3, "learning_rate": 4.796222014925373e-05, "loss": 0.0008, "step": 17476 }, { "epoch": 16.31, "learning_rate": 4.796175373134329e-05, "loss": 0.0007, "step": 17480 }, { "epoch": 16.31, "learning_rate": 4.796128731343284e-05, "loss": 0.0022, "step": 17484 }, { "epoch": 16.31, "learning_rate": 4.796082089552239e-05, "loss": 0.0008, "step": 17488 }, { "epoch": 16.32, "learning_rate": 4.796035447761194e-05, "loss": 0.0038, "step": 17492 }, { "epoch": 16.32, "learning_rate": 4.79598880597015e-05, "loss": 0.0036, "step": 17496 }, { "epoch": 16.32, "learning_rate": 4.795942164179105e-05, "loss": 0.0002, "step": 17500 }, { "epoch": 16.32, "eval_exact_match": 0.7330754352030948, "eval_exec": 0.7475822050290135, "eval_loss": 0.3184237778186798, "eval_runtime": 1056.9157, "eval_samples_per_second": 0.978, "step": 17500 }, { "epoch": 16.33, "learning_rate": 4.7958955223880596e-05, "loss": 0.0139, "step": 17504 }, { "epoch": 16.33, "learning_rate": 4.795848880597015e-05, "loss": 0.0024, "step": 17508 }, { "epoch": 16.34, "learning_rate": 4.7958022388059706e-05, "loss": 0.005, "step": 17512 }, { "epoch": 16.34, "learning_rate": 4.7957555970149254e-05, "loss": 0.001, "step": 17516 }, { "epoch": 16.34, "learning_rate": 4.795708955223881e-05, "loss": 0.0053, "step": 17520 }, { "epoch": 16.35, "learning_rate": 4.795662313432836e-05, "loss": 0.0021, "step": 17524 }, { "epoch": 16.35, "learning_rate": 4.795615671641791e-05, "loss": 0.0059, "step": 17528 }, { "epoch": 16.35, "learning_rate": 4.795569029850747e-05, "loss": 0.0008, "step": 17532 }, { "epoch": 16.36, "learning_rate": 4.7955223880597015e-05, "loss": 0.0151, "step": 17536 }, { "epoch": 16.36, "learning_rate": 4.795475746268657e-05, "loss": 0.0023, "step": 17540 }, { "epoch": 16.37, "learning_rate": 4.7954291044776125e-05, "loss": 0.0022, "step": 17544 }, { "epoch": 16.37, "learning_rate": 4.795382462686567e-05, "loss": 0.0004, "step": 17548 }, { "epoch": 16.37, "learning_rate": 4.795335820895522e-05, "loss": 0.0005, "step": 17552 }, { "epoch": 16.38, "learning_rate": 4.795289179104478e-05, "loss": 0.0019, "step": 17556 }, { "epoch": 16.38, "learning_rate": 4.795242537313433e-05, "loss": 0.0057, "step": 17560 }, { "epoch": 16.38, "learning_rate": 4.795195895522388e-05, "loss": 0.0046, "step": 17564 }, { "epoch": 16.39, "learning_rate": 4.7951492537313434e-05, "loss": 0.0009, "step": 17568 }, { "epoch": 16.39, "learning_rate": 4.795102611940299e-05, "loss": 0.0035, "step": 17572 }, { "epoch": 16.4, "learning_rate": 4.7950559701492544e-05, "loss": 0.0062, "step": 17576 }, { "epoch": 16.4, "learning_rate": 4.795009328358209e-05, "loss": 0.0036, "step": 17580 }, { "epoch": 16.4, "learning_rate": 4.794962686567164e-05, "loss": 0.0003, "step": 17584 }, { "epoch": 16.41, "learning_rate": 4.79491604477612e-05, "loss": 0.011, "step": 17588 }, { "epoch": 16.41, "learning_rate": 4.794869402985075e-05, "loss": 0.0021, "step": 17592 }, { "epoch": 16.41, "learning_rate": 4.79482276119403e-05, "loss": 0.0015, "step": 17596 }, { "epoch": 16.42, "learning_rate": 4.794776119402985e-05, "loss": 0.0022, "step": 17600 }, { "epoch": 16.42, "learning_rate": 4.794729477611941e-05, "loss": 0.0033, "step": 17604 }, { "epoch": 16.43, "learning_rate": 4.7946828358208956e-05, "loss": 0.0012, "step": 17608 }, { "epoch": 16.43, "learning_rate": 4.794636194029851e-05, "loss": 0.0006, "step": 17612 }, { "epoch": 16.43, "learning_rate": 4.7945895522388066e-05, "loss": 0.0052, "step": 17616 }, { "epoch": 16.44, "learning_rate": 4.7945429104477614e-05, "loss": 0.0002, "step": 17620 }, { "epoch": 16.44, "learning_rate": 4.794496268656717e-05, "loss": 0.0016, "step": 17624 }, { "epoch": 16.44, "learning_rate": 4.794449626865672e-05, "loss": 0.0009, "step": 17628 }, { "epoch": 16.45, "learning_rate": 4.794402985074627e-05, "loss": 0.0031, "step": 17632 }, { "epoch": 16.45, "learning_rate": 4.7943563432835827e-05, "loss": 0.0077, "step": 17636 }, { "epoch": 16.46, "learning_rate": 4.7943097014925375e-05, "loss": 0.0037, "step": 17640 }, { "epoch": 16.46, "learning_rate": 4.794263059701492e-05, "loss": 0.0023, "step": 17644 }, { "epoch": 16.46, "learning_rate": 4.7942164179104485e-05, "loss": 0.0008, "step": 17648 }, { "epoch": 16.47, "learning_rate": 4.794169776119403e-05, "loss": 0.0013, "step": 17652 }, { "epoch": 16.47, "learning_rate": 4.794123134328358e-05, "loss": 0.0015, "step": 17656 }, { "epoch": 16.47, "learning_rate": 4.7940764925373136e-05, "loss": 0.0071, "step": 17660 }, { "epoch": 16.48, "learning_rate": 4.794029850746269e-05, "loss": 0.0004, "step": 17664 }, { "epoch": 16.48, "learning_rate": 4.793983208955224e-05, "loss": 0.0041, "step": 17668 }, { "epoch": 16.49, "learning_rate": 4.7939365671641794e-05, "loss": 0.0022, "step": 17672 }, { "epoch": 16.49, "learning_rate": 4.793889925373135e-05, "loss": 0.0052, "step": 17676 }, { "epoch": 16.49, "learning_rate": 4.79384328358209e-05, "loss": 0.0029, "step": 17680 }, { "epoch": 16.5, "learning_rate": 4.793796641791045e-05, "loss": 0.0017, "step": 17684 }, { "epoch": 16.5, "learning_rate": 4.79375e-05, "loss": 0.0033, "step": 17688 }, { "epoch": 16.5, "learning_rate": 4.7937033582089555e-05, "loss": 0.0054, "step": 17692 }, { "epoch": 16.51, "learning_rate": 4.793656716417911e-05, "loss": 0.0005, "step": 17696 }, { "epoch": 16.51, "learning_rate": 4.793610074626866e-05, "loss": 0.0047, "step": 17700 }, { "epoch": 16.51, "learning_rate": 4.7935634328358206e-05, "loss": 0.0014, "step": 17704 }, { "epoch": 16.52, "learning_rate": 4.793516791044777e-05, "loss": 0.0014, "step": 17708 }, { "epoch": 16.52, "learning_rate": 4.7934701492537316e-05, "loss": 0.0051, "step": 17712 }, { "epoch": 16.53, "learning_rate": 4.7934235074626864e-05, "loss": 0.0008, "step": 17716 }, { "epoch": 16.53, "learning_rate": 4.793376865671642e-05, "loss": 0.0046, "step": 17720 }, { "epoch": 16.53, "learning_rate": 4.7933302238805974e-05, "loss": 0.0013, "step": 17724 }, { "epoch": 16.54, "learning_rate": 4.793283582089552e-05, "loss": 0.0003, "step": 17728 }, { "epoch": 16.54, "learning_rate": 4.7932369402985077e-05, "loss": 0.0048, "step": 17732 }, { "epoch": 16.54, "learning_rate": 4.793190298507463e-05, "loss": 0.0008, "step": 17736 }, { "epoch": 16.55, "learning_rate": 4.7931436567164186e-05, "loss": 0.0007, "step": 17740 }, { "epoch": 16.55, "learning_rate": 4.7930970149253735e-05, "loss": 0.001, "step": 17744 }, { "epoch": 16.56, "learning_rate": 4.793050373134328e-05, "loss": 0.005, "step": 17748 }, { "epoch": 16.56, "learning_rate": 4.793003731343284e-05, "loss": 0.0033, "step": 17752 }, { "epoch": 16.56, "learning_rate": 4.792957089552239e-05, "loss": 0.0031, "step": 17756 }, { "epoch": 16.57, "learning_rate": 4.792910447761194e-05, "loss": 0.0004, "step": 17760 }, { "epoch": 16.57, "learning_rate": 4.7928638059701496e-05, "loss": 0.0024, "step": 17764 }, { "epoch": 16.57, "learning_rate": 4.792817164179105e-05, "loss": 0.0059, "step": 17768 }, { "epoch": 16.58, "learning_rate": 4.79277052238806e-05, "loss": 0.0031, "step": 17772 }, { "epoch": 16.58, "learning_rate": 4.7927238805970153e-05, "loss": 0.0009, "step": 17776 }, { "epoch": 16.59, "learning_rate": 4.79267723880597e-05, "loss": 0.0044, "step": 17780 }, { "epoch": 16.59, "learning_rate": 4.7926305970149256e-05, "loss": 0.0076, "step": 17784 }, { "epoch": 16.59, "learning_rate": 4.792583955223881e-05, "loss": 0.0002, "step": 17788 }, { "epoch": 16.6, "learning_rate": 4.792537313432836e-05, "loss": 0.0005, "step": 17792 }, { "epoch": 16.6, "learning_rate": 4.7924906716417914e-05, "loss": 0.0018, "step": 17796 }, { "epoch": 16.6, "learning_rate": 4.792444029850747e-05, "loss": 0.0041, "step": 17800 }, { "epoch": 16.61, "learning_rate": 4.792397388059702e-05, "loss": 0.0115, "step": 17804 }, { "epoch": 16.61, "learning_rate": 4.7923507462686566e-05, "loss": 0.0022, "step": 17808 }, { "epoch": 16.62, "learning_rate": 4.792304104477612e-05, "loss": 0.0012, "step": 17812 }, { "epoch": 16.62, "learning_rate": 4.7922574626865675e-05, "loss": 0.0002, "step": 17816 }, { "epoch": 16.62, "learning_rate": 4.7922108208955224e-05, "loss": 0.0014, "step": 17820 }, { "epoch": 16.63, "learning_rate": 4.792164179104478e-05, "loss": 0.0007, "step": 17824 }, { "epoch": 16.63, "learning_rate": 4.792117537313433e-05, "loss": 0.0051, "step": 17828 }, { "epoch": 16.63, "learning_rate": 4.792070895522388e-05, "loss": 0.0007, "step": 17832 }, { "epoch": 16.64, "learning_rate": 4.7920242537313436e-05, "loss": 0.0012, "step": 17836 }, { "epoch": 16.64, "learning_rate": 4.7919776119402985e-05, "loss": 0.0037, "step": 17840 }, { "epoch": 16.65, "learning_rate": 4.791930970149254e-05, "loss": 0.0002, "step": 17844 }, { "epoch": 16.65, "learning_rate": 4.7918843283582094e-05, "loss": 0.0069, "step": 17848 }, { "epoch": 16.65, "learning_rate": 4.791837686567164e-05, "loss": 0.0016, "step": 17852 }, { "epoch": 16.66, "learning_rate": 4.791791044776119e-05, "loss": 0.0029, "step": 17856 }, { "epoch": 16.66, "learning_rate": 4.791744402985075e-05, "loss": 0.005, "step": 17860 }, { "epoch": 16.66, "learning_rate": 4.79169776119403e-05, "loss": 0.0005, "step": 17864 }, { "epoch": 16.67, "learning_rate": 4.791651119402985e-05, "loss": 0.0014, "step": 17868 }, { "epoch": 16.67, "learning_rate": 4.7916044776119403e-05, "loss": 0.0003, "step": 17872 }, { "epoch": 16.68, "learning_rate": 4.791557835820896e-05, "loss": 0.0012, "step": 17876 }, { "epoch": 16.68, "learning_rate": 4.7915111940298507e-05, "loss": 0.0041, "step": 17880 }, { "epoch": 16.68, "learning_rate": 4.791464552238806e-05, "loss": 0.0016, "step": 17884 }, { "epoch": 16.69, "learning_rate": 4.7914179104477616e-05, "loss": 0.0005, "step": 17888 }, { "epoch": 16.69, "learning_rate": 4.7913712686567164e-05, "loss": 0.0002, "step": 17892 }, { "epoch": 16.69, "learning_rate": 4.791324626865672e-05, "loss": 0.0017, "step": 17896 }, { "epoch": 16.7, "learning_rate": 4.791277985074627e-05, "loss": 0.0069, "step": 17900 }, { "epoch": 16.7, "learning_rate": 4.791231343283583e-05, "loss": 0.0039, "step": 17904 }, { "epoch": 16.71, "learning_rate": 4.791184701492538e-05, "loss": 0.0006, "step": 17908 }, { "epoch": 16.71, "learning_rate": 4.7911380597014925e-05, "loss": 0.0, "step": 17912 }, { "epoch": 16.71, "learning_rate": 4.791091417910448e-05, "loss": 0.0031, "step": 17916 }, { "epoch": 16.72, "learning_rate": 4.7910447761194035e-05, "loss": 0.0014, "step": 17920 }, { "epoch": 16.72, "learning_rate": 4.7909981343283583e-05, "loss": 0.0024, "step": 17924 }, { "epoch": 16.72, "learning_rate": 4.790951492537314e-05, "loss": 0.0006, "step": 17928 }, { "epoch": 16.73, "learning_rate": 4.7909048507462686e-05, "loss": 0.0009, "step": 17932 }, { "epoch": 16.73, "learning_rate": 4.790858208955224e-05, "loss": 0.0012, "step": 17936 }, { "epoch": 16.73, "learning_rate": 4.7908115671641796e-05, "loss": 0.0021, "step": 17940 }, { "epoch": 16.74, "learning_rate": 4.7907649253731344e-05, "loss": 0.0082, "step": 17944 }, { "epoch": 16.74, "learning_rate": 4.79071828358209e-05, "loss": 0.0014, "step": 17948 }, { "epoch": 16.75, "learning_rate": 4.7906716417910454e-05, "loss": 0.0013, "step": 17952 }, { "epoch": 16.75, "learning_rate": 4.790625e-05, "loss": 0.0008, "step": 17956 }, { "epoch": 16.75, "learning_rate": 4.790578358208955e-05, "loss": 0.0026, "step": 17960 }, { "epoch": 16.76, "learning_rate": 4.790531716417911e-05, "loss": 0.0003, "step": 17964 }, { "epoch": 16.76, "learning_rate": 4.790485074626866e-05, "loss": 0.0055, "step": 17968 }, { "epoch": 16.76, "learning_rate": 4.790438432835821e-05, "loss": 0.0009, "step": 17972 }, { "epoch": 16.77, "learning_rate": 4.790391791044776e-05, "loss": 0.0008, "step": 17976 }, { "epoch": 16.77, "learning_rate": 4.790345149253732e-05, "loss": 0.0027, "step": 17980 }, { "epoch": 16.78, "learning_rate": 4.7902985074626866e-05, "loss": 0.0003, "step": 17984 }, { "epoch": 16.78, "learning_rate": 4.790251865671642e-05, "loss": 0.0023, "step": 17988 }, { "epoch": 16.78, "learning_rate": 4.790205223880597e-05, "loss": 0.0005, "step": 17992 }, { "epoch": 16.79, "learning_rate": 4.7901585820895524e-05, "loss": 0.0024, "step": 17996 }, { "epoch": 16.79, "learning_rate": 4.790111940298508e-05, "loss": 0.0005, "step": 18000 }, { "epoch": 16.79, "eval_exact_match": 0.730174081237911, "eval_exec": 0.7553191489361702, "eval_loss": 0.3473448157310486, "eval_runtime": 1359.5008, "eval_samples_per_second": 0.761, "step": 18000 }, { "epoch": 16.79, "learning_rate": 4.790065298507463e-05, "loss": 0.0041, "step": 18004 }, { "epoch": 16.8, "learning_rate": 4.790018656716418e-05, "loss": 0.0034, "step": 18008 }, { "epoch": 16.8, "learning_rate": 4.789972014925374e-05, "loss": 0.0004, "step": 18012 }, { "epoch": 16.81, "learning_rate": 4.7899253731343285e-05, "loss": 0.0046, "step": 18016 }, { "epoch": 16.81, "learning_rate": 4.7898787313432833e-05, "loss": 0.001, "step": 18020 }, { "epoch": 16.81, "learning_rate": 4.789832089552239e-05, "loss": 0.0069, "step": 18024 }, { "epoch": 16.82, "learning_rate": 4.789785447761194e-05, "loss": 0.0081, "step": 18028 }, { "epoch": 16.82, "learning_rate": 4.789738805970149e-05, "loss": 0.0012, "step": 18032 }, { "epoch": 16.82, "learning_rate": 4.7896921641791046e-05, "loss": 0.0023, "step": 18036 }, { "epoch": 16.83, "learning_rate": 4.78964552238806e-05, "loss": 0.0049, "step": 18040 }, { "epoch": 16.83, "learning_rate": 4.789598880597015e-05, "loss": 0.0011, "step": 18044 }, { "epoch": 16.84, "learning_rate": 4.7895522388059704e-05, "loss": 0.0028, "step": 18048 }, { "epoch": 16.84, "learning_rate": 4.789505597014925e-05, "loss": 0.0016, "step": 18052 }, { "epoch": 16.84, "learning_rate": 4.789458955223881e-05, "loss": 0.0007, "step": 18056 }, { "epoch": 16.85, "learning_rate": 4.789412313432836e-05, "loss": 0.0008, "step": 18060 }, { "epoch": 16.85, "learning_rate": 4.789365671641791e-05, "loss": 0.0013, "step": 18064 }, { "epoch": 16.85, "learning_rate": 4.7893190298507465e-05, "loss": 0.0038, "step": 18068 }, { "epoch": 16.86, "learning_rate": 4.789272388059702e-05, "loss": 0.0002, "step": 18072 }, { "epoch": 16.86, "learning_rate": 4.789225746268657e-05, "loss": 0.0016, "step": 18076 }, { "epoch": 16.87, "learning_rate": 4.789179104477612e-05, "loss": 0.0003, "step": 18080 }, { "epoch": 16.87, "learning_rate": 4.789132462686567e-05, "loss": 0.0013, "step": 18084 }, { "epoch": 16.87, "learning_rate": 4.7890858208955226e-05, "loss": 0.0124, "step": 18088 }, { "epoch": 16.88, "learning_rate": 4.789039179104478e-05, "loss": 0.0009, "step": 18092 }, { "epoch": 16.88, "learning_rate": 4.788992537313433e-05, "loss": 0.0034, "step": 18096 }, { "epoch": 16.88, "learning_rate": 4.7889458955223884e-05, "loss": 0.0118, "step": 18100 }, { "epoch": 16.89, "learning_rate": 4.788899253731344e-05, "loss": 0.003, "step": 18104 }, { "epoch": 16.89, "learning_rate": 4.788852611940299e-05, "loss": 0.0029, "step": 18108 }, { "epoch": 16.9, "learning_rate": 4.7888059701492535e-05, "loss": 0.0013, "step": 18112 }, { "epoch": 16.9, "learning_rate": 4.78875932835821e-05, "loss": 0.0057, "step": 18116 }, { "epoch": 16.9, "learning_rate": 4.7887126865671645e-05, "loss": 0.0001, "step": 18120 }, { "epoch": 16.91, "learning_rate": 4.788666044776119e-05, "loss": 0.001, "step": 18124 }, { "epoch": 16.91, "learning_rate": 4.788619402985075e-05, "loss": 0.011, "step": 18128 }, { "epoch": 16.91, "learning_rate": 4.78857276119403e-05, "loss": 0.0044, "step": 18132 }, { "epoch": 16.92, "learning_rate": 4.788526119402985e-05, "loss": 0.0017, "step": 18136 }, { "epoch": 16.92, "learning_rate": 4.7884794776119406e-05, "loss": 0.0047, "step": 18140 }, { "epoch": 16.93, "learning_rate": 4.7884328358208954e-05, "loss": 0.0044, "step": 18144 }, { "epoch": 16.93, "learning_rate": 4.788386194029851e-05, "loss": 0.0012, "step": 18148 }, { "epoch": 16.93, "learning_rate": 4.7883395522388064e-05, "loss": 0.0003, "step": 18152 }, { "epoch": 16.94, "learning_rate": 4.788292910447761e-05, "loss": 0.0006, "step": 18156 }, { "epoch": 16.94, "learning_rate": 4.788246268656717e-05, "loss": 0.0041, "step": 18160 }, { "epoch": 16.94, "learning_rate": 4.788199626865672e-05, "loss": 0.0014, "step": 18164 }, { "epoch": 16.95, "learning_rate": 4.788152985074627e-05, "loss": 0.0039, "step": 18168 }, { "epoch": 16.95, "learning_rate": 4.788106343283582e-05, "loss": 0.0021, "step": 18172 }, { "epoch": 16.96, "learning_rate": 4.788059701492538e-05, "loss": 0.0052, "step": 18176 }, { "epoch": 16.96, "learning_rate": 4.788013059701493e-05, "loss": 0.0009, "step": 18180 }, { "epoch": 16.96, "learning_rate": 4.7879664179104476e-05, "loss": 0.0008, "step": 18184 }, { "epoch": 16.97, "learning_rate": 4.787919776119403e-05, "loss": 0.0023, "step": 18188 }, { "epoch": 16.97, "learning_rate": 4.7878731343283586e-05, "loss": 0.0028, "step": 18192 }, { "epoch": 16.97, "learning_rate": 4.7878264925373134e-05, "loss": 0.0007, "step": 18196 }, { "epoch": 16.98, "learning_rate": 4.787779850746269e-05, "loss": 0.0004, "step": 18200 }, { "epoch": 16.98, "learning_rate": 4.787733208955224e-05, "loss": 0.0063, "step": 18204 }, { "epoch": 16.98, "learning_rate": 4.787686567164179e-05, "loss": 0.0018, "step": 18208 }, { "epoch": 16.99, "learning_rate": 4.787639925373135e-05, "loss": 0.0034, "step": 18212 }, { "epoch": 16.99, "learning_rate": 4.7875932835820895e-05, "loss": 0.0019, "step": 18216 }, { "epoch": 17.0, "learning_rate": 4.787546641791045e-05, "loss": 0.0065, "step": 18220 }, { "epoch": 17.0, "learning_rate": 4.7875000000000005e-05, "loss": 0.0032, "step": 18224 }, { "epoch": 17.0, "learning_rate": 4.787453358208955e-05, "loss": 0.0003, "step": 18228 }, { "epoch": 17.01, "learning_rate": 4.787406716417911e-05, "loss": 0.0036, "step": 18232 }, { "epoch": 17.01, "learning_rate": 4.787360074626866e-05, "loss": 0.0004, "step": 18236 }, { "epoch": 17.01, "learning_rate": 4.787313432835821e-05, "loss": 0.0008, "step": 18240 }, { "epoch": 17.02, "learning_rate": 4.7872667910447766e-05, "loss": 0.0037, "step": 18244 }, { "epoch": 17.02, "learning_rate": 4.7872201492537314e-05, "loss": 0.0003, "step": 18248 }, { "epoch": 17.03, "learning_rate": 4.787173507462687e-05, "loss": 0.0202, "step": 18252 }, { "epoch": 17.03, "learning_rate": 4.7871268656716424e-05, "loss": 0.0002, "step": 18256 }, { "epoch": 17.03, "learning_rate": 4.787080223880597e-05, "loss": 0.0032, "step": 18260 }, { "epoch": 17.04, "learning_rate": 4.787033582089552e-05, "loss": 0.0015, "step": 18264 }, { "epoch": 17.04, "learning_rate": 4.786986940298508e-05, "loss": 0.0018, "step": 18268 }, { "epoch": 17.04, "learning_rate": 4.786940298507463e-05, "loss": 0.0034, "step": 18272 }, { "epoch": 17.05, "learning_rate": 4.786893656716418e-05, "loss": 0.0008, "step": 18276 }, { "epoch": 17.05, "learning_rate": 4.786847014925373e-05, "loss": 0.0004, "step": 18280 }, { "epoch": 17.06, "learning_rate": 4.786800373134329e-05, "loss": 0.0002, "step": 18284 }, { "epoch": 17.06, "learning_rate": 4.7867537313432836e-05, "loss": 0.0006, "step": 18288 }, { "epoch": 17.06, "learning_rate": 4.786707089552239e-05, "loss": 0.0016, "step": 18292 }, { "epoch": 17.07, "learning_rate": 4.7866604477611946e-05, "loss": 0.0019, "step": 18296 }, { "epoch": 17.07, "learning_rate": 4.7866138059701494e-05, "loss": 0.003, "step": 18300 }, { "epoch": 17.07, "learning_rate": 4.786567164179105e-05, "loss": 0.0014, "step": 18304 }, { "epoch": 17.08, "learning_rate": 4.78652052238806e-05, "loss": 0.0056, "step": 18308 }, { "epoch": 17.08, "learning_rate": 4.786473880597015e-05, "loss": 0.0004, "step": 18312 }, { "epoch": 17.09, "learning_rate": 4.786427238805971e-05, "loss": 0.0002, "step": 18316 }, { "epoch": 17.09, "learning_rate": 4.7863805970149255e-05, "loss": 0.0025, "step": 18320 }, { "epoch": 17.09, "learning_rate": 4.78633395522388e-05, "loss": 0.0019, "step": 18324 }, { "epoch": 17.1, "learning_rate": 4.7862873134328365e-05, "loss": 0.0006, "step": 18328 }, { "epoch": 17.1, "learning_rate": 4.786240671641791e-05, "loss": 0.0006, "step": 18332 }, { "epoch": 17.1, "learning_rate": 4.786194029850746e-05, "loss": 0.0004, "step": 18336 }, { "epoch": 17.11, "learning_rate": 4.7861473880597016e-05, "loss": 0.0008, "step": 18340 }, { "epoch": 17.11, "learning_rate": 4.786100746268657e-05, "loss": 0.0019, "step": 18344 }, { "epoch": 17.12, "learning_rate": 4.786054104477612e-05, "loss": 0.0002, "step": 18348 }, { "epoch": 17.12, "learning_rate": 4.7860074626865674e-05, "loss": 0.0008, "step": 18352 }, { "epoch": 17.12, "learning_rate": 4.785960820895523e-05, "loss": 0.0015, "step": 18356 }, { "epoch": 17.13, "learning_rate": 4.785914179104478e-05, "loss": 0.0041, "step": 18360 }, { "epoch": 17.13, "learning_rate": 4.785867537313433e-05, "loss": 0.0029, "step": 18364 }, { "epoch": 17.13, "learning_rate": 4.785820895522388e-05, "loss": 0.0002, "step": 18368 }, { "epoch": 17.14, "learning_rate": 4.7857742537313435e-05, "loss": 0.0004, "step": 18372 }, { "epoch": 17.14, "learning_rate": 4.785727611940299e-05, "loss": 0.0019, "step": 18376 }, { "epoch": 17.15, "learning_rate": 4.785680970149254e-05, "loss": 0.0011, "step": 18380 }, { "epoch": 17.15, "learning_rate": 4.7856343283582086e-05, "loss": 0.0027, "step": 18384 }, { "epoch": 17.15, "learning_rate": 4.785587686567165e-05, "loss": 0.0055, "step": 18388 }, { "epoch": 17.16, "learning_rate": 4.7855410447761196e-05, "loss": 0.0001, "step": 18392 }, { "epoch": 17.16, "learning_rate": 4.785494402985075e-05, "loss": 0.0004, "step": 18396 }, { "epoch": 17.16, "learning_rate": 4.78544776119403e-05, "loss": 0.0002, "step": 18400 }, { "epoch": 17.17, "learning_rate": 4.7854011194029854e-05, "loss": 0.006, "step": 18404 }, { "epoch": 17.17, "learning_rate": 4.785354477611941e-05, "loss": 0.0001, "step": 18408 }, { "epoch": 17.18, "learning_rate": 4.785307835820896e-05, "loss": 0.0029, "step": 18412 }, { "epoch": 17.18, "learning_rate": 4.785261194029851e-05, "loss": 0.0004, "step": 18416 }, { "epoch": 17.18, "learning_rate": 4.785214552238807e-05, "loss": 0.0008, "step": 18420 }, { "epoch": 17.19, "learning_rate": 4.7851679104477615e-05, "loss": 0.0004, "step": 18424 }, { "epoch": 17.19, "learning_rate": 4.785121268656716e-05, "loss": 0.0022, "step": 18428 }, { "epoch": 17.19, "learning_rate": 4.785074626865672e-05, "loss": 0.0009, "step": 18432 }, { "epoch": 17.2, "learning_rate": 4.785027985074627e-05, "loss": 0.0009, "step": 18436 }, { "epoch": 17.2, "learning_rate": 4.784981343283582e-05, "loss": 0.0053, "step": 18440 }, { "epoch": 17.21, "learning_rate": 4.7849347014925376e-05, "loss": 0.0019, "step": 18444 }, { "epoch": 17.21, "learning_rate": 4.784888059701493e-05, "loss": 0.0028, "step": 18448 }, { "epoch": 17.21, "learning_rate": 4.784841417910448e-05, "loss": 0.0016, "step": 18452 }, { "epoch": 17.22, "learning_rate": 4.7847947761194034e-05, "loss": 0.0012, "step": 18456 }, { "epoch": 17.22, "learning_rate": 4.784748134328358e-05, "loss": 0.0004, "step": 18460 }, { "epoch": 17.22, "learning_rate": 4.784701492537314e-05, "loss": 0.0002, "step": 18464 }, { "epoch": 17.23, "learning_rate": 4.784654850746269e-05, "loss": 0.0049, "step": 18468 }, { "epoch": 17.23, "learning_rate": 4.784608208955224e-05, "loss": 0.0037, "step": 18472 }, { "epoch": 17.24, "learning_rate": 4.7845615671641795e-05, "loss": 0.0102, "step": 18476 }, { "epoch": 17.24, "learning_rate": 4.784514925373135e-05, "loss": 0.0021, "step": 18480 }, { "epoch": 17.24, "learning_rate": 4.78446828358209e-05, "loss": 0.0025, "step": 18484 }, { "epoch": 17.25, "learning_rate": 4.7844216417910446e-05, "loss": 0.0012, "step": 18488 }, { "epoch": 17.25, "learning_rate": 4.784375e-05, "loss": 0.0019, "step": 18492 }, { "epoch": 17.25, "learning_rate": 4.7843283582089556e-05, "loss": 0.0006, "step": 18496 }, { "epoch": 17.26, "learning_rate": 4.7842817164179104e-05, "loss": 0.007, "step": 18500 }, { "epoch": 17.26, "eval_exact_match": 0.7292069632495164, "eval_exec": 0.7562862669245648, "eval_loss": 0.35152459144592285, "eval_runtime": 1127.9193, "eval_samples_per_second": 0.917, "step": 18500 }, { "epoch": 17.26, "learning_rate": 4.784235074626866e-05, "loss": 0.0043, "step": 18504 }, { "epoch": 17.26, "learning_rate": 4.7841884328358214e-05, "loss": 0.0025, "step": 18508 }, { "epoch": 17.27, "learning_rate": 4.784141791044776e-05, "loss": 0.0137, "step": 18512 }, { "epoch": 17.27, "learning_rate": 4.784095149253732e-05, "loss": 0.002, "step": 18516 }, { "epoch": 17.28, "learning_rate": 4.7840485074626865e-05, "loss": 0.0014, "step": 18520 }, { "epoch": 17.28, "learning_rate": 4.784001865671642e-05, "loss": 0.0019, "step": 18524 }, { "epoch": 17.28, "learning_rate": 4.7839552238805975e-05, "loss": 0.0002, "step": 18528 }, { "epoch": 17.29, "learning_rate": 4.783908582089552e-05, "loss": 0.0006, "step": 18532 }, { "epoch": 17.29, "learning_rate": 4.783861940298507e-05, "loss": 0.0048, "step": 18536 }, { "epoch": 17.29, "learning_rate": 4.783815298507463e-05, "loss": 0.0017, "step": 18540 }, { "epoch": 17.3, "learning_rate": 4.783768656716418e-05, "loss": 0.0053, "step": 18544 }, { "epoch": 17.3, "learning_rate": 4.7837220149253736e-05, "loss": 0.0008, "step": 18548 }, { "epoch": 17.31, "learning_rate": 4.7836753731343284e-05, "loss": 0.0023, "step": 18552 }, { "epoch": 17.31, "learning_rate": 4.783628731343284e-05, "loss": 0.0049, "step": 18556 }, { "epoch": 17.31, "learning_rate": 4.7835820895522394e-05, "loss": 0.0028, "step": 18560 }, { "epoch": 17.32, "learning_rate": 4.783535447761194e-05, "loss": 0.0002, "step": 18564 }, { "epoch": 17.32, "learning_rate": 4.78348880597015e-05, "loss": 0.001, "step": 18568 }, { "epoch": 17.32, "learning_rate": 4.783442164179105e-05, "loss": 0.0032, "step": 18572 }, { "epoch": 17.33, "learning_rate": 4.78339552238806e-05, "loss": 0.0009, "step": 18576 }, { "epoch": 17.33, "learning_rate": 4.783348880597015e-05, "loss": 0.0019, "step": 18580 }, { "epoch": 17.34, "learning_rate": 4.783302238805971e-05, "loss": 0.0008, "step": 18584 }, { "epoch": 17.34, "learning_rate": 4.783255597014926e-05, "loss": 0.0029, "step": 18588 }, { "epoch": 17.34, "learning_rate": 4.7832089552238806e-05, "loss": 0.0024, "step": 18592 }, { "epoch": 17.35, "learning_rate": 4.783162313432836e-05, "loss": 0.0064, "step": 18596 }, { "epoch": 17.35, "learning_rate": 4.7831156716417916e-05, "loss": 0.0005, "step": 18600 }, { "epoch": 17.35, "learning_rate": 4.7830690298507464e-05, "loss": 0.0026, "step": 18604 }, { "epoch": 17.36, "learning_rate": 4.783022388059702e-05, "loss": 0.0002, "step": 18608 }, { "epoch": 17.36, "learning_rate": 4.782975746268657e-05, "loss": 0.0007, "step": 18612 }, { "epoch": 17.37, "learning_rate": 4.782929104477612e-05, "loss": 0.0005, "step": 18616 }, { "epoch": 17.37, "learning_rate": 4.7828824626865677e-05, "loss": 0.0015, "step": 18620 }, { "epoch": 17.37, "learning_rate": 4.7828358208955225e-05, "loss": 0.002, "step": 18624 }, { "epoch": 17.38, "learning_rate": 4.782789179104478e-05, "loss": 0.0133, "step": 18628 }, { "epoch": 17.38, "learning_rate": 4.7827425373134334e-05, "loss": 0.0004, "step": 18632 }, { "epoch": 17.38, "learning_rate": 4.782695895522388e-05, "loss": 0.0002, "step": 18636 }, { "epoch": 17.39, "learning_rate": 4.782649253731343e-05, "loss": 0.0052, "step": 18640 }, { "epoch": 17.39, "learning_rate": 4.782602611940299e-05, "loss": 0.0012, "step": 18644 }, { "epoch": 17.4, "learning_rate": 4.782555970149254e-05, "loss": 0.0045, "step": 18648 }, { "epoch": 17.4, "learning_rate": 4.782509328358209e-05, "loss": 0.0024, "step": 18652 }, { "epoch": 17.4, "learning_rate": 4.7824626865671644e-05, "loss": 0.0025, "step": 18656 }, { "epoch": 17.41, "learning_rate": 4.78241604477612e-05, "loss": 0.004, "step": 18660 }, { "epoch": 17.41, "learning_rate": 4.782369402985075e-05, "loss": 0.0038, "step": 18664 }, { "epoch": 17.41, "learning_rate": 4.78232276119403e-05, "loss": 0.0013, "step": 18668 }, { "epoch": 17.42, "learning_rate": 4.782276119402985e-05, "loss": 0.002, "step": 18672 }, { "epoch": 17.42, "learning_rate": 4.7822294776119405e-05, "loss": 0.0007, "step": 18676 }, { "epoch": 17.43, "learning_rate": 4.782182835820896e-05, "loss": 0.0028, "step": 18680 }, { "epoch": 17.43, "learning_rate": 4.782136194029851e-05, "loss": 0.0001, "step": 18684 }, { "epoch": 17.43, "learning_rate": 4.782089552238806e-05, "loss": 0.0058, "step": 18688 }, { "epoch": 17.44, "learning_rate": 4.782042910447762e-05, "loss": 0.0038, "step": 18692 }, { "epoch": 17.44, "learning_rate": 4.7819962686567166e-05, "loss": 0.0014, "step": 18696 }, { "epoch": 17.44, "learning_rate": 4.7819496268656714e-05, "loss": 0.0005, "step": 18700 }, { "epoch": 17.45, "learning_rate": 4.781902985074627e-05, "loss": 0.0137, "step": 18704 }, { "epoch": 17.45, "learning_rate": 4.7818563432835824e-05, "loss": 0.0038, "step": 18708 }, { "epoch": 17.46, "learning_rate": 4.781809701492538e-05, "loss": 0.0009, "step": 18712 }, { "epoch": 17.46, "learning_rate": 4.7817630597014927e-05, "loss": 0.0006, "step": 18716 }, { "epoch": 17.46, "learning_rate": 4.781716417910448e-05, "loss": 0.0013, "step": 18720 }, { "epoch": 17.47, "learning_rate": 4.7816697761194036e-05, "loss": 0.0049, "step": 18724 }, { "epoch": 17.47, "learning_rate": 4.7816231343283585e-05, "loss": 0.0006, "step": 18728 }, { "epoch": 17.47, "learning_rate": 4.781576492537313e-05, "loss": 0.004, "step": 18732 }, { "epoch": 17.48, "learning_rate": 4.7815298507462694e-05, "loss": 0.0003, "step": 18736 }, { "epoch": 17.48, "learning_rate": 4.781483208955224e-05, "loss": 0.0011, "step": 18740 }, { "epoch": 17.49, "learning_rate": 4.781436567164179e-05, "loss": 0.0006, "step": 18744 }, { "epoch": 17.49, "learning_rate": 4.7813899253731345e-05, "loss": 0.0025, "step": 18748 }, { "epoch": 17.49, "learning_rate": 4.78134328358209e-05, "loss": 0.0009, "step": 18752 }, { "epoch": 17.5, "learning_rate": 4.781296641791045e-05, "loss": 0.0027, "step": 18756 }, { "epoch": 17.5, "learning_rate": 4.7812500000000003e-05, "loss": 0.0025, "step": 18760 }, { "epoch": 17.5, "learning_rate": 4.781203358208955e-05, "loss": 0.0008, "step": 18764 }, { "epoch": 17.51, "learning_rate": 4.7811567164179106e-05, "loss": 0.0051, "step": 18768 }, { "epoch": 17.51, "learning_rate": 4.781110074626866e-05, "loss": 0.0012, "step": 18772 }, { "epoch": 17.51, "learning_rate": 4.781063432835821e-05, "loss": 0.0003, "step": 18776 }, { "epoch": 17.52, "learning_rate": 4.7810167910447764e-05, "loss": 0.0024, "step": 18780 }, { "epoch": 17.52, "learning_rate": 4.780970149253732e-05, "loss": 0.0002, "step": 18784 }, { "epoch": 17.53, "learning_rate": 4.780923507462687e-05, "loss": 0.0074, "step": 18788 }, { "epoch": 17.53, "learning_rate": 4.7808768656716416e-05, "loss": 0.0023, "step": 18792 }, { "epoch": 17.53, "learning_rate": 4.780830223880598e-05, "loss": 0.0009, "step": 18796 }, { "epoch": 17.54, "learning_rate": 4.7807835820895525e-05, "loss": 0.0157, "step": 18800 }, { "epoch": 17.54, "learning_rate": 4.7807369402985074e-05, "loss": 0.0032, "step": 18804 }, { "epoch": 17.54, "learning_rate": 4.780690298507463e-05, "loss": 0.0005, "step": 18808 }, { "epoch": 17.55, "learning_rate": 4.780643656716418e-05, "loss": 0.0012, "step": 18812 }, { "epoch": 17.55, "learning_rate": 4.780597014925373e-05, "loss": 0.0014, "step": 18816 }, { "epoch": 17.56, "learning_rate": 4.7805503731343286e-05, "loss": 0.0055, "step": 18820 }, { "epoch": 17.56, "learning_rate": 4.7805037313432835e-05, "loss": 0.0026, "step": 18824 }, { "epoch": 17.56, "learning_rate": 4.780457089552239e-05, "loss": 0.0003, "step": 18828 }, { "epoch": 17.57, "learning_rate": 4.7804104477611944e-05, "loss": 0.0013, "step": 18832 }, { "epoch": 17.57, "learning_rate": 4.780363805970149e-05, "loss": 0.0062, "step": 18836 }, { "epoch": 17.57, "learning_rate": 4.780317164179105e-05, "loss": 0.0007, "step": 18840 }, { "epoch": 17.58, "learning_rate": 4.78027052238806e-05, "loss": 0.001, "step": 18844 }, { "epoch": 17.58, "learning_rate": 4.780223880597015e-05, "loss": 0.0005, "step": 18848 }, { "epoch": 17.59, "learning_rate": 4.78017723880597e-05, "loss": 0.0013, "step": 18852 }, { "epoch": 17.59, "learning_rate": 4.780130597014926e-05, "loss": 0.0033, "step": 18856 }, { "epoch": 17.59, "learning_rate": 4.780083955223881e-05, "loss": 0.0028, "step": 18860 }, { "epoch": 17.6, "learning_rate": 4.7800373134328356e-05, "loss": 0.0045, "step": 18864 }, { "epoch": 17.6, "learning_rate": 4.779990671641791e-05, "loss": 0.001, "step": 18868 }, { "epoch": 17.6, "learning_rate": 4.7799440298507466e-05, "loss": 0.0095, "step": 18872 }, { "epoch": 17.61, "learning_rate": 4.779897388059702e-05, "loss": 0.0001, "step": 18876 }, { "epoch": 17.61, "learning_rate": 4.779850746268657e-05, "loss": 0.0029, "step": 18880 }, { "epoch": 17.62, "learning_rate": 4.779804104477612e-05, "loss": 0.0007, "step": 18884 }, { "epoch": 17.62, "learning_rate": 4.779757462686568e-05, "loss": 0.0042, "step": 18888 }, { "epoch": 17.62, "learning_rate": 4.779710820895523e-05, "loss": 0.0029, "step": 18892 }, { "epoch": 17.63, "learning_rate": 4.7796641791044775e-05, "loss": 0.0007, "step": 18896 }, { "epoch": 17.63, "learning_rate": 4.779617537313433e-05, "loss": 0.001, "step": 18900 }, { "epoch": 17.63, "learning_rate": 4.7795708955223885e-05, "loss": 0.003, "step": 18904 }, { "epoch": 17.64, "learning_rate": 4.779524253731343e-05, "loss": 0.0074, "step": 18908 }, { "epoch": 17.64, "learning_rate": 4.779477611940299e-05, "loss": 0.0013, "step": 18912 }, { "epoch": 17.65, "learning_rate": 4.779430970149254e-05, "loss": 0.0011, "step": 18916 }, { "epoch": 17.65, "learning_rate": 4.779384328358209e-05, "loss": 0.004, "step": 18920 }, { "epoch": 17.65, "learning_rate": 4.7793376865671646e-05, "loss": 0.0014, "step": 18924 }, { "epoch": 17.66, "learning_rate": 4.7792910447761194e-05, "loss": 0.0015, "step": 18928 }, { "epoch": 17.66, "learning_rate": 4.779244402985075e-05, "loss": 0.0012, "step": 18932 }, { "epoch": 17.66, "learning_rate": 4.7791977611940304e-05, "loss": 0.0003, "step": 18936 }, { "epoch": 17.67, "learning_rate": 4.779151119402985e-05, "loss": 0.0001, "step": 18940 }, { "epoch": 17.67, "learning_rate": 4.77910447761194e-05, "loss": 0.0025, "step": 18944 }, { "epoch": 17.68, "learning_rate": 4.779057835820896e-05, "loss": 0.0009, "step": 18948 }, { "epoch": 17.68, "learning_rate": 4.779011194029851e-05, "loss": 0.0003, "step": 18952 }, { "epoch": 17.68, "learning_rate": 4.778964552238806e-05, "loss": 0.0022, "step": 18956 }, { "epoch": 17.69, "learning_rate": 4.778917910447761e-05, "loss": 0.0035, "step": 18960 }, { "epoch": 17.69, "learning_rate": 4.778871268656717e-05, "loss": 0.0004, "step": 18964 }, { "epoch": 17.69, "learning_rate": 4.7788246268656716e-05, "loss": 0.0001, "step": 18968 }, { "epoch": 17.7, "learning_rate": 4.778777985074627e-05, "loss": 0.0002, "step": 18972 }, { "epoch": 17.7, "learning_rate": 4.7787313432835826e-05, "loss": 0.0026, "step": 18976 }, { "epoch": 17.71, "learning_rate": 4.7786847014925374e-05, "loss": 0.0026, "step": 18980 }, { "epoch": 17.71, "learning_rate": 4.778638059701493e-05, "loss": 0.0017, "step": 18984 }, { "epoch": 17.71, "learning_rate": 4.778591417910448e-05, "loss": 0.0053, "step": 18988 }, { "epoch": 17.72, "learning_rate": 4.778544776119403e-05, "loss": 0.0042, "step": 18992 }, { "epoch": 17.72, "learning_rate": 4.778498134328359e-05, "loss": 0.0018, "step": 18996 }, { "epoch": 17.72, "learning_rate": 4.7784514925373135e-05, "loss": 0.0001, "step": 19000 }, { "epoch": 17.72, "eval_exact_match": 0.7282398452611218, "eval_exec": 0.7601547388781431, "eval_loss": 0.3401549458503723, "eval_runtime": 1055.5133, "eval_samples_per_second": 0.98, "step": 19000 }, { "epoch": 17.73, "learning_rate": 4.7784048507462683e-05, "loss": 0.0016, "step": 19004 }, { "epoch": 17.73, "learning_rate": 4.7783582089552245e-05, "loss": 0.0027, "step": 19008 }, { "epoch": 17.73, "learning_rate": 4.778311567164179e-05, "loss": 0.0041, "step": 19012 }, { "epoch": 17.74, "learning_rate": 4.778264925373134e-05, "loss": 0.0009, "step": 19016 }, { "epoch": 17.74, "learning_rate": 4.7782182835820896e-05, "loss": 0.0073, "step": 19020 }, { "epoch": 17.75, "learning_rate": 4.778171641791045e-05, "loss": 0.0015, "step": 19024 }, { "epoch": 17.75, "learning_rate": 4.778125e-05, "loss": 0.0036, "step": 19028 }, { "epoch": 17.75, "learning_rate": 4.7780783582089554e-05, "loss": 0.0074, "step": 19032 }, { "epoch": 17.76, "learning_rate": 4.778031716417911e-05, "loss": 0.0003, "step": 19036 }, { "epoch": 17.76, "learning_rate": 4.7779850746268664e-05, "loss": 0.0006, "step": 19040 }, { "epoch": 17.76, "learning_rate": 4.777938432835821e-05, "loss": 0.001, "step": 19044 }, { "epoch": 17.77, "learning_rate": 4.777891791044776e-05, "loss": 0.0008, "step": 19048 }, { "epoch": 17.77, "learning_rate": 4.7778451492537315e-05, "loss": 0.0005, "step": 19052 }, { "epoch": 17.78, "learning_rate": 4.777798507462687e-05, "loss": 0.0007, "step": 19056 }, { "epoch": 17.78, "learning_rate": 4.777751865671642e-05, "loss": 0.0002, "step": 19060 }, { "epoch": 17.78, "learning_rate": 4.777705223880597e-05, "loss": 0.0003, "step": 19064 }, { "epoch": 17.79, "learning_rate": 4.777658582089553e-05, "loss": 0.001, "step": 19068 }, { "epoch": 17.79, "learning_rate": 4.7776119402985076e-05, "loss": 0.0006, "step": 19072 }, { "epoch": 17.79, "learning_rate": 4.777565298507463e-05, "loss": 0.0043, "step": 19076 }, { "epoch": 17.8, "learning_rate": 4.777518656716418e-05, "loss": 0.002, "step": 19080 }, { "epoch": 17.8, "learning_rate": 4.7774720149253734e-05, "loss": 0.0013, "step": 19084 }, { "epoch": 17.81, "learning_rate": 4.777425373134329e-05, "loss": 0.0001, "step": 19088 }, { "epoch": 17.81, "learning_rate": 4.777378731343284e-05, "loss": 0.004, "step": 19092 }, { "epoch": 17.81, "learning_rate": 4.777332089552239e-05, "loss": 0.0023, "step": 19096 }, { "epoch": 17.82, "learning_rate": 4.777285447761195e-05, "loss": 0.0003, "step": 19100 }, { "epoch": 17.82, "learning_rate": 4.7772388059701495e-05, "loss": 0.001, "step": 19104 }, { "epoch": 17.82, "learning_rate": 4.777192164179104e-05, "loss": 0.0035, "step": 19108 }, { "epoch": 17.83, "learning_rate": 4.77714552238806e-05, "loss": 0.0021, "step": 19112 }, { "epoch": 17.83, "learning_rate": 4.777098880597015e-05, "loss": 0.0005, "step": 19116 }, { "epoch": 17.84, "learning_rate": 4.77705223880597e-05, "loss": 0.0005, "step": 19120 }, { "epoch": 17.84, "learning_rate": 4.7770055970149256e-05, "loss": 0.0004, "step": 19124 }, { "epoch": 17.84, "learning_rate": 4.776958955223881e-05, "loss": 0.0001, "step": 19128 }, { "epoch": 17.85, "learning_rate": 4.776912313432836e-05, "loss": 0.0002, "step": 19132 }, { "epoch": 17.85, "learning_rate": 4.7768656716417914e-05, "loss": 0.0117, "step": 19136 }, { "epoch": 17.85, "learning_rate": 4.776819029850746e-05, "loss": 0.0008, "step": 19140 }, { "epoch": 17.86, "learning_rate": 4.776772388059702e-05, "loss": 0.0037, "step": 19144 }, { "epoch": 17.86, "learning_rate": 4.776725746268657e-05, "loss": 0.0002, "step": 19148 }, { "epoch": 17.87, "learning_rate": 4.776679104477612e-05, "loss": 0.0005, "step": 19152 }, { "epoch": 17.87, "learning_rate": 4.7766324626865675e-05, "loss": 0.0026, "step": 19156 }, { "epoch": 17.87, "learning_rate": 4.776585820895523e-05, "loss": 0.0021, "step": 19160 }, { "epoch": 17.88, "learning_rate": 4.776539179104478e-05, "loss": 0.0062, "step": 19164 }, { "epoch": 17.88, "learning_rate": 4.7764925373134326e-05, "loss": 0.002, "step": 19168 }, { "epoch": 17.88, "learning_rate": 4.776445895522388e-05, "loss": 0.0011, "step": 19172 }, { "epoch": 17.89, "learning_rate": 4.7763992537313436e-05, "loss": 0.0026, "step": 19176 }, { "epoch": 17.89, "learning_rate": 4.7763526119402984e-05, "loss": 0.0024, "step": 19180 }, { "epoch": 17.9, "learning_rate": 4.776305970149254e-05, "loss": 0.0022, "step": 19184 }, { "epoch": 17.9, "learning_rate": 4.7762593283582094e-05, "loss": 0.0018, "step": 19188 }, { "epoch": 17.9, "learning_rate": 4.776212686567164e-05, "loss": 0.0019, "step": 19192 }, { "epoch": 17.91, "learning_rate": 4.77616604477612e-05, "loss": 0.0001, "step": 19196 }, { "epoch": 17.91, "learning_rate": 4.7761194029850745e-05, "loss": 0.0007, "step": 19200 }, { "epoch": 17.91, "learning_rate": 4.776072761194031e-05, "loss": 0.0002, "step": 19204 }, { "epoch": 17.92, "learning_rate": 4.7760261194029855e-05, "loss": 0.0001, "step": 19208 }, { "epoch": 17.92, "learning_rate": 4.77597947761194e-05, "loss": 0.0004, "step": 19212 }, { "epoch": 17.93, "learning_rate": 4.775932835820896e-05, "loss": 0.0008, "step": 19216 }, { "epoch": 17.93, "learning_rate": 4.775886194029851e-05, "loss": 0.001, "step": 19220 }, { "epoch": 17.93, "learning_rate": 4.775839552238806e-05, "loss": 0.001, "step": 19224 }, { "epoch": 17.94, "learning_rate": 4.7757929104477616e-05, "loss": 0.0003, "step": 19228 }, { "epoch": 17.94, "learning_rate": 4.7757462686567164e-05, "loss": 0.0016, "step": 19232 }, { "epoch": 17.94, "learning_rate": 4.775699626865672e-05, "loss": 0.002, "step": 19236 }, { "epoch": 17.95, "learning_rate": 4.7756529850746274e-05, "loss": 0.0033, "step": 19240 }, { "epoch": 17.95, "learning_rate": 4.775606343283582e-05, "loss": 0.0007, "step": 19244 }, { "epoch": 17.96, "learning_rate": 4.775559701492538e-05, "loss": 0.0015, "step": 19248 }, { "epoch": 17.96, "learning_rate": 4.775513059701493e-05, "loss": 0.0029, "step": 19252 }, { "epoch": 17.96, "learning_rate": 4.775466417910448e-05, "loss": 0.0053, "step": 19256 }, { "epoch": 17.97, "learning_rate": 4.775419776119403e-05, "loss": 0.0033, "step": 19260 }, { "epoch": 17.97, "learning_rate": 4.775373134328359e-05, "loss": 0.0094, "step": 19264 }, { "epoch": 17.97, "learning_rate": 4.775326492537314e-05, "loss": 0.0032, "step": 19268 }, { "epoch": 17.98, "learning_rate": 4.7752798507462686e-05, "loss": 0.0007, "step": 19272 }, { "epoch": 17.98, "learning_rate": 4.775233208955224e-05, "loss": 0.0167, "step": 19276 }, { "epoch": 17.98, "learning_rate": 4.7751865671641796e-05, "loss": 0.0062, "step": 19280 }, { "epoch": 17.99, "learning_rate": 4.7751399253731344e-05, "loss": 0.005, "step": 19284 }, { "epoch": 17.99, "learning_rate": 4.77509328358209e-05, "loss": 0.0014, "step": 19288 }, { "epoch": 18.0, "learning_rate": 4.775046641791045e-05, "loss": 0.0047, "step": 19292 }, { "epoch": 18.0, "learning_rate": 4.775e-05, "loss": 0.0049, "step": 19296 }, { "epoch": 18.0, "learning_rate": 4.774953358208956e-05, "loss": 0.0003, "step": 19300 }, { "epoch": 18.01, "learning_rate": 4.7749067164179105e-05, "loss": 0.0048, "step": 19304 }, { "epoch": 18.01, "learning_rate": 4.774860074626866e-05, "loss": 0.001, "step": 19308 }, { "epoch": 18.01, "learning_rate": 4.7748134328358215e-05, "loss": 0.0018, "step": 19312 }, { "epoch": 18.02, "learning_rate": 4.774766791044776e-05, "loss": 0.0004, "step": 19316 }, { "epoch": 18.02, "learning_rate": 4.774720149253731e-05, "loss": 0.0014, "step": 19320 }, { "epoch": 18.03, "learning_rate": 4.774673507462687e-05, "loss": 0.0046, "step": 19324 }, { "epoch": 18.03, "learning_rate": 4.774626865671642e-05, "loss": 0.0002, "step": 19328 }, { "epoch": 18.03, "learning_rate": 4.774580223880597e-05, "loss": 0.0082, "step": 19332 }, { "epoch": 18.04, "learning_rate": 4.7745335820895524e-05, "loss": 0.0046, "step": 19336 }, { "epoch": 18.04, "learning_rate": 4.774486940298508e-05, "loss": 0.0008, "step": 19340 }, { "epoch": 18.04, "learning_rate": 4.774440298507463e-05, "loss": 0.0003, "step": 19344 }, { "epoch": 18.05, "learning_rate": 4.774393656716418e-05, "loss": 0.001, "step": 19348 }, { "epoch": 18.05, "learning_rate": 4.774347014925373e-05, "loss": 0.002, "step": 19352 }, { "epoch": 18.06, "learning_rate": 4.7743003731343285e-05, "loss": 0.0033, "step": 19356 }, { "epoch": 18.06, "learning_rate": 4.774253731343284e-05, "loss": 0.0005, "step": 19360 }, { "epoch": 18.06, "learning_rate": 4.774207089552239e-05, "loss": 0.0007, "step": 19364 }, { "epoch": 18.07, "learning_rate": 4.774160447761194e-05, "loss": 0.0035, "step": 19368 }, { "epoch": 18.07, "learning_rate": 4.77411380597015e-05, "loss": 0.0001, "step": 19372 }, { "epoch": 18.07, "learning_rate": 4.7740671641791046e-05, "loss": 0.0002, "step": 19376 }, { "epoch": 18.08, "learning_rate": 4.77402052238806e-05, "loss": 0.0058, "step": 19380 }, { "epoch": 18.08, "learning_rate": 4.773973880597015e-05, "loss": 0.0005, "step": 19384 }, { "epoch": 18.09, "learning_rate": 4.7739272388059704e-05, "loss": 0.002, "step": 19388 }, { "epoch": 18.09, "learning_rate": 4.773880597014926e-05, "loss": 0.0095, "step": 19392 }, { "epoch": 18.09, "learning_rate": 4.773833955223881e-05, "loss": 0.0022, "step": 19396 }, { "epoch": 18.1, "learning_rate": 4.773787313432836e-05, "loss": 0.0041, "step": 19400 }, { "epoch": 18.1, "learning_rate": 4.773740671641792e-05, "loss": 0.0015, "step": 19404 }, { "epoch": 18.1, "learning_rate": 4.7736940298507465e-05, "loss": 0.0038, "step": 19408 }, { "epoch": 18.11, "learning_rate": 4.773647388059701e-05, "loss": 0.0002, "step": 19412 }, { "epoch": 18.11, "learning_rate": 4.7736007462686575e-05, "loss": 0.0033, "step": 19416 }, { "epoch": 18.12, "learning_rate": 4.773554104477612e-05, "loss": 0.0003, "step": 19420 }, { "epoch": 18.12, "learning_rate": 4.773507462686567e-05, "loss": 0.0005, "step": 19424 }, { "epoch": 18.12, "learning_rate": 4.7734608208955226e-05, "loss": 0.0003, "step": 19428 }, { "epoch": 18.13, "learning_rate": 4.773414179104478e-05, "loss": 0.0009, "step": 19432 }, { "epoch": 18.13, "learning_rate": 4.773367537313433e-05, "loss": 0.0189, "step": 19436 }, { "epoch": 18.13, "learning_rate": 4.7733208955223884e-05, "loss": 0.0036, "step": 19440 }, { "epoch": 18.14, "learning_rate": 4.773274253731343e-05, "loss": 0.0008, "step": 19444 }, { "epoch": 18.14, "learning_rate": 4.773227611940299e-05, "loss": 0.0014, "step": 19448 }, { "epoch": 18.15, "learning_rate": 4.773180970149254e-05, "loss": 0.001, "step": 19452 }, { "epoch": 18.15, "learning_rate": 4.773134328358209e-05, "loss": 0.0032, "step": 19456 }, { "epoch": 18.15, "learning_rate": 4.7730876865671645e-05, "loss": 0.0025, "step": 19460 }, { "epoch": 18.16, "learning_rate": 4.77304104477612e-05, "loss": 0.0018, "step": 19464 }, { "epoch": 18.16, "learning_rate": 4.772994402985075e-05, "loss": 0.0002, "step": 19468 }, { "epoch": 18.16, "learning_rate": 4.7729477611940296e-05, "loss": 0.0008, "step": 19472 }, { "epoch": 18.17, "learning_rate": 4.772901119402986e-05, "loss": 0.001, "step": 19476 }, { "epoch": 18.17, "learning_rate": 4.7728544776119406e-05, "loss": 0.0002, "step": 19480 }, { "epoch": 18.18, "learning_rate": 4.7728078358208954e-05, "loss": 0.0, "step": 19484 }, { "epoch": 18.18, "learning_rate": 4.772761194029851e-05, "loss": 0.0005, "step": 19488 }, { "epoch": 18.18, "learning_rate": 4.7727145522388064e-05, "loss": 0.0026, "step": 19492 }, { "epoch": 18.19, "learning_rate": 4.772667910447761e-05, "loss": 0.001, "step": 19496 }, { "epoch": 18.19, "learning_rate": 4.772621268656717e-05, "loss": 0.0037, "step": 19500 }, { "epoch": 18.19, "eval_exact_match": 0.7292069632495164, "eval_exec": 0.7562862669245648, "eval_loss": 0.34727153182029724, "eval_runtime": 1047.0187, "eval_samples_per_second": 0.988, "step": 19500 }, { "epoch": 18.19, "learning_rate": 4.7725746268656715e-05, "loss": 0.0025, "step": 19504 }, { "epoch": 18.2, "learning_rate": 4.772527985074627e-05, "loss": 0.0047, "step": 19508 }, { "epoch": 18.2, "learning_rate": 4.7724813432835825e-05, "loss": 0.0004, "step": 19512 }, { "epoch": 18.21, "learning_rate": 4.772434701492537e-05, "loss": 0.0015, "step": 19516 }, { "epoch": 18.21, "learning_rate": 4.772388059701493e-05, "loss": 0.0003, "step": 19520 }, { "epoch": 18.21, "learning_rate": 4.772341417910448e-05, "loss": 0.0003, "step": 19524 }, { "epoch": 18.22, "learning_rate": 4.772294776119403e-05, "loss": 0.001, "step": 19528 }, { "epoch": 18.22, "learning_rate": 4.7722481343283586e-05, "loss": 0.003, "step": 19532 }, { "epoch": 18.22, "learning_rate": 4.772201492537314e-05, "loss": 0.0019, "step": 19536 }, { "epoch": 18.23, "learning_rate": 4.772154850746269e-05, "loss": 0.0001, "step": 19540 }, { "epoch": 18.23, "learning_rate": 4.7721082089552244e-05, "loss": 0.0022, "step": 19544 }, { "epoch": 18.24, "learning_rate": 4.772061567164179e-05, "loss": 0.0008, "step": 19548 }, { "epoch": 18.24, "learning_rate": 4.7720149253731347e-05, "loss": 0.001, "step": 19552 }, { "epoch": 18.24, "learning_rate": 4.77196828358209e-05, "loss": 0.0003, "step": 19556 }, { "epoch": 18.25, "learning_rate": 4.771921641791045e-05, "loss": 0.0001, "step": 19560 }, { "epoch": 18.25, "learning_rate": 4.771875e-05, "loss": 0.0001, "step": 19564 }, { "epoch": 18.25, "learning_rate": 4.771828358208956e-05, "loss": 0.0018, "step": 19568 }, { "epoch": 18.26, "learning_rate": 4.771781716417911e-05, "loss": 0.0018, "step": 19572 }, { "epoch": 18.26, "learning_rate": 4.7717350746268656e-05, "loss": 0.001, "step": 19576 }, { "epoch": 18.26, "learning_rate": 4.771688432835821e-05, "loss": 0.0007, "step": 19580 }, { "epoch": 18.27, "learning_rate": 4.7716417910447766e-05, "loss": 0.0019, "step": 19584 }, { "epoch": 18.27, "learning_rate": 4.7715951492537314e-05, "loss": 0.0018, "step": 19588 }, { "epoch": 18.28, "learning_rate": 4.771548507462687e-05, "loss": 0.0076, "step": 19592 }, { "epoch": 18.28, "learning_rate": 4.7715018656716423e-05, "loss": 0.0002, "step": 19596 }, { "epoch": 18.28, "learning_rate": 4.771455223880597e-05, "loss": 0.0102, "step": 19600 }, { "epoch": 18.29, "learning_rate": 4.7714085820895527e-05, "loss": 0.0018, "step": 19604 }, { "epoch": 18.29, "learning_rate": 4.7713619402985075e-05, "loss": 0.0008, "step": 19608 }, { "epoch": 18.29, "learning_rate": 4.771315298507463e-05, "loss": 0.0012, "step": 19612 }, { "epoch": 18.3, "learning_rate": 4.7712686567164184e-05, "loss": 0.002, "step": 19616 }, { "epoch": 18.3, "learning_rate": 4.771222014925373e-05, "loss": 0.0046, "step": 19620 }, { "epoch": 18.31, "learning_rate": 4.771175373134328e-05, "loss": 0.0061, "step": 19624 }, { "epoch": 18.31, "learning_rate": 4.771128731343284e-05, "loss": 0.001, "step": 19628 }, { "epoch": 18.31, "learning_rate": 4.771082089552239e-05, "loss": 0.002, "step": 19632 }, { "epoch": 18.32, "learning_rate": 4.771035447761194e-05, "loss": 0.0012, "step": 19636 }, { "epoch": 18.32, "learning_rate": 4.7709888059701494e-05, "loss": 0.0, "step": 19640 }, { "epoch": 18.32, "learning_rate": 4.770942164179105e-05, "loss": 0.0019, "step": 19644 }, { "epoch": 18.33, "learning_rate": 4.77089552238806e-05, "loss": 0.0005, "step": 19648 }, { "epoch": 18.33, "learning_rate": 4.770848880597015e-05, "loss": 0.0058, "step": 19652 }, { "epoch": 18.34, "learning_rate": 4.7708022388059706e-05, "loss": 0.0007, "step": 19656 }, { "epoch": 18.34, "learning_rate": 4.7707555970149255e-05, "loss": 0.001, "step": 19660 }, { "epoch": 18.34, "learning_rate": 4.770708955223881e-05, "loss": 0.0023, "step": 19664 }, { "epoch": 18.35, "learning_rate": 4.770662313432836e-05, "loss": 0.0056, "step": 19668 }, { "epoch": 18.35, "learning_rate": 4.770615671641791e-05, "loss": 0.0024, "step": 19672 }, { "epoch": 18.35, "learning_rate": 4.770569029850747e-05, "loss": 0.0051, "step": 19676 }, { "epoch": 18.36, "learning_rate": 4.7705223880597016e-05, "loss": 0.0016, "step": 19680 }, { "epoch": 18.36, "learning_rate": 4.7704757462686564e-05, "loss": 0.0104, "step": 19684 }, { "epoch": 18.37, "learning_rate": 4.7704291044776125e-05, "loss": 0.0003, "step": 19688 }, { "epoch": 18.37, "learning_rate": 4.7703824626865673e-05, "loss": 0.0033, "step": 19692 }, { "epoch": 18.37, "learning_rate": 4.770335820895523e-05, "loss": 0.0005, "step": 19696 }, { "epoch": 18.38, "learning_rate": 4.7702891791044777e-05, "loss": 0.0015, "step": 19700 }, { "epoch": 18.38, "learning_rate": 4.770242537313433e-05, "loss": 0.0002, "step": 19704 }, { "epoch": 18.38, "learning_rate": 4.7701958955223886e-05, "loss": 0.0003, "step": 19708 }, { "epoch": 18.39, "learning_rate": 4.7701492537313434e-05, "loss": 0.0054, "step": 19712 }, { "epoch": 18.39, "learning_rate": 4.770102611940299e-05, "loss": 0.0025, "step": 19716 }, { "epoch": 18.4, "learning_rate": 4.7700559701492544e-05, "loss": 0.0014, "step": 19720 }, { "epoch": 18.4, "learning_rate": 4.770009328358209e-05, "loss": 0.0006, "step": 19724 }, { "epoch": 18.4, "learning_rate": 4.769962686567164e-05, "loss": 0.0008, "step": 19728 }, { "epoch": 18.41, "learning_rate": 4.7699160447761195e-05, "loss": 0.0006, "step": 19732 }, { "epoch": 18.41, "learning_rate": 4.769869402985075e-05, "loss": 0.0002, "step": 19736 }, { "epoch": 18.41, "learning_rate": 4.76982276119403e-05, "loss": 0.0044, "step": 19740 }, { "epoch": 18.42, "learning_rate": 4.7697761194029853e-05, "loss": 0.001, "step": 19744 }, { "epoch": 18.42, "learning_rate": 4.769729477611941e-05, "loss": 0.0011, "step": 19748 }, { "epoch": 18.43, "learning_rate": 4.7696828358208956e-05, "loss": 0.0009, "step": 19752 }, { "epoch": 18.43, "learning_rate": 4.769636194029851e-05, "loss": 0.0032, "step": 19756 }, { "epoch": 18.43, "learning_rate": 4.769589552238806e-05, "loss": 0.001, "step": 19760 }, { "epoch": 18.44, "learning_rate": 4.7695429104477614e-05, "loss": 0.0008, "step": 19764 }, { "epoch": 18.44, "learning_rate": 4.769496268656717e-05, "loss": 0.0022, "step": 19768 }, { "epoch": 18.44, "learning_rate": 4.769449626865672e-05, "loss": 0.0001, "step": 19772 }, { "epoch": 18.45, "learning_rate": 4.769402985074627e-05, "loss": 0.0003, "step": 19776 }, { "epoch": 18.45, "learning_rate": 4.769356343283583e-05, "loss": 0.0043, "step": 19780 }, { "epoch": 18.46, "learning_rate": 4.7693097014925375e-05, "loss": 0.0003, "step": 19784 }, { "epoch": 18.46, "learning_rate": 4.7692630597014924e-05, "loss": 0.0007, "step": 19788 }, { "epoch": 18.46, "learning_rate": 4.769216417910448e-05, "loss": 0.0013, "step": 19792 }, { "epoch": 18.47, "learning_rate": 4.769169776119403e-05, "loss": 0.0001, "step": 19796 }, { "epoch": 18.47, "learning_rate": 4.769123134328358e-05, "loss": 0.0005, "step": 19800 }, { "epoch": 18.47, "learning_rate": 4.7690764925373136e-05, "loss": 0.0015, "step": 19804 }, { "epoch": 18.48, "learning_rate": 4.769029850746269e-05, "loss": 0.0018, "step": 19808 }, { "epoch": 18.48, "learning_rate": 4.768983208955224e-05, "loss": 0.0104, "step": 19812 }, { "epoch": 18.49, "learning_rate": 4.7689365671641794e-05, "loss": 0.0005, "step": 19816 }, { "epoch": 18.49, "learning_rate": 4.768889925373134e-05, "loss": 0.0006, "step": 19820 }, { "epoch": 18.49, "learning_rate": 4.76884328358209e-05, "loss": 0.0001, "step": 19824 }, { "epoch": 18.5, "learning_rate": 4.768796641791045e-05, "loss": 0.0131, "step": 19828 }, { "epoch": 18.5, "learning_rate": 4.76875e-05, "loss": 0.0006, "step": 19832 }, { "epoch": 18.5, "learning_rate": 4.7687033582089555e-05, "loss": 0.0004, "step": 19836 }, { "epoch": 18.51, "learning_rate": 4.768656716417911e-05, "loss": 0.0004, "step": 19840 }, { "epoch": 18.51, "learning_rate": 4.768610074626866e-05, "loss": 0.0006, "step": 19844 }, { "epoch": 18.51, "learning_rate": 4.7685634328358206e-05, "loss": 0.0004, "step": 19848 }, { "epoch": 18.52, "learning_rate": 4.768516791044776e-05, "loss": 0.003, "step": 19852 }, { "epoch": 18.52, "learning_rate": 4.7684701492537316e-05, "loss": 0.0117, "step": 19856 }, { "epoch": 18.53, "learning_rate": 4.768423507462687e-05, "loss": 0.0017, "step": 19860 }, { "epoch": 18.53, "learning_rate": 4.768376865671642e-05, "loss": 0.003, "step": 19864 }, { "epoch": 18.53, "learning_rate": 4.7683302238805974e-05, "loss": 0.0009, "step": 19868 }, { "epoch": 18.54, "learning_rate": 4.768283582089553e-05, "loss": 0.0057, "step": 19872 }, { "epoch": 18.54, "learning_rate": 4.768236940298508e-05, "loss": 0.0018, "step": 19876 }, { "epoch": 18.54, "learning_rate": 4.7681902985074625e-05, "loss": 0.0009, "step": 19880 }, { "epoch": 18.55, "learning_rate": 4.768143656716419e-05, "loss": 0.0014, "step": 19884 }, { "epoch": 18.55, "learning_rate": 4.7680970149253735e-05, "loss": 0.0004, "step": 19888 }, { "epoch": 18.56, "learning_rate": 4.768050373134328e-05, "loss": 0.0036, "step": 19892 }, { "epoch": 18.56, "learning_rate": 4.768003731343284e-05, "loss": 0.0022, "step": 19896 }, { "epoch": 18.56, "learning_rate": 4.767957089552239e-05, "loss": 0.0007, "step": 19900 }, { "epoch": 18.57, "learning_rate": 4.767910447761194e-05, "loss": 0.0018, "step": 19904 }, { "epoch": 18.57, "learning_rate": 4.7678638059701496e-05, "loss": 0.0004, "step": 19908 }, { "epoch": 18.57, "learning_rate": 4.7678171641791044e-05, "loss": 0.0002, "step": 19912 }, { "epoch": 18.58, "learning_rate": 4.76777052238806e-05, "loss": 0.0021, "step": 19916 }, { "epoch": 18.58, "learning_rate": 4.7677238805970154e-05, "loss": 0.0014, "step": 19920 }, { "epoch": 18.59, "learning_rate": 4.76767723880597e-05, "loss": 0.014, "step": 19924 }, { "epoch": 18.59, "learning_rate": 4.767630597014926e-05, "loss": 0.0028, "step": 19928 }, { "epoch": 18.59, "learning_rate": 4.767583955223881e-05, "loss": 0.0004, "step": 19932 }, { "epoch": 18.6, "learning_rate": 4.767537313432836e-05, "loss": 0.0009, "step": 19936 }, { "epoch": 18.6, "learning_rate": 4.767490671641791e-05, "loss": 0.0011, "step": 19940 }, { "epoch": 18.6, "learning_rate": 4.767444029850747e-05, "loss": 0.0016, "step": 19944 }, { "epoch": 18.61, "learning_rate": 4.767397388059702e-05, "loss": 0.0238, "step": 19948 }, { "epoch": 18.61, "learning_rate": 4.7673507462686566e-05, "loss": 0.0036, "step": 19952 }, { "epoch": 18.62, "learning_rate": 4.767304104477612e-05, "loss": 0.0089, "step": 19956 }, { "epoch": 18.62, "learning_rate": 4.7672574626865676e-05, "loss": 0.0041, "step": 19960 }, { "epoch": 18.62, "learning_rate": 4.7672108208955224e-05, "loss": 0.0005, "step": 19964 }, { "epoch": 18.63, "learning_rate": 4.767164179104478e-05, "loss": 0.0002, "step": 19968 }, { "epoch": 18.63, "learning_rate": 4.767117537313433e-05, "loss": 0.0013, "step": 19972 }, { "epoch": 18.63, "learning_rate": 4.767070895522388e-05, "loss": 0.0007, "step": 19976 }, { "epoch": 18.64, "learning_rate": 4.767024253731344e-05, "loss": 0.0004, "step": 19980 }, { "epoch": 18.64, "learning_rate": 4.7669776119402985e-05, "loss": 0.001, "step": 19984 }, { "epoch": 18.65, "learning_rate": 4.766930970149254e-05, "loss": 0.0006, "step": 19988 }, { "epoch": 18.65, "learning_rate": 4.7668843283582095e-05, "loss": 0.0007, "step": 19992 }, { "epoch": 18.65, "learning_rate": 4.766837686567164e-05, "loss": 0.0006, "step": 19996 }, { "epoch": 18.66, "learning_rate": 4.766791044776119e-05, "loss": 0.0025, "step": 20000 }, { "epoch": 18.66, "eval_exact_match": 0.7263056092843327, "eval_exec": 0.7543520309477756, "eval_loss": 0.3301263451576233, "eval_runtime": 1520.1707, "eval_samples_per_second": 0.68, "step": 20000 }, { "epoch": 18.66, "learning_rate": 4.766744402985075e-05, "loss": 0.0024, "step": 20004 }, { "epoch": 18.66, "learning_rate": 4.76669776119403e-05, "loss": 0.0036, "step": 20008 }, { "epoch": 18.67, "learning_rate": 4.766651119402985e-05, "loss": 0.0009, "step": 20012 }, { "epoch": 18.67, "learning_rate": 4.7666044776119404e-05, "loss": 0.001, "step": 20016 }, { "epoch": 18.68, "learning_rate": 4.766557835820896e-05, "loss": 0.0146, "step": 20020 }, { "epoch": 18.68, "learning_rate": 4.7665111940298514e-05, "loss": 0.0006, "step": 20024 }, { "epoch": 18.68, "learning_rate": 4.766464552238806e-05, "loss": 0.0068, "step": 20028 }, { "epoch": 18.69, "learning_rate": 4.766417910447761e-05, "loss": 0.0021, "step": 20032 }, { "epoch": 18.69, "learning_rate": 4.766371268656717e-05, "loss": 0.0053, "step": 20036 }, { "epoch": 18.69, "learning_rate": 4.766324626865672e-05, "loss": 0.0006, "step": 20040 }, { "epoch": 18.7, "learning_rate": 4.766277985074627e-05, "loss": 0.0008, "step": 20044 }, { "epoch": 18.7, "learning_rate": 4.766231343283582e-05, "loss": 0.0032, "step": 20048 }, { "epoch": 18.71, "learning_rate": 4.766184701492538e-05, "loss": 0.0026, "step": 20052 }, { "epoch": 18.71, "learning_rate": 4.7661380597014926e-05, "loss": 0.0032, "step": 20056 }, { "epoch": 18.71, "learning_rate": 4.766091417910448e-05, "loss": 0.0131, "step": 20060 }, { "epoch": 18.72, "learning_rate": 4.766044776119403e-05, "loss": 0.0026, "step": 20064 }, { "epoch": 18.72, "learning_rate": 4.7659981343283584e-05, "loss": 0.0009, "step": 20068 }, { "epoch": 18.72, "learning_rate": 4.765951492537314e-05, "loss": 0.0003, "step": 20072 }, { "epoch": 18.73, "learning_rate": 4.765904850746269e-05, "loss": 0.0007, "step": 20076 }, { "epoch": 18.73, "learning_rate": 4.765858208955224e-05, "loss": 0.0048, "step": 20080 }, { "epoch": 18.73, "learning_rate": 4.76581156716418e-05, "loss": 0.0032, "step": 20084 }, { "epoch": 18.74, "learning_rate": 4.7657649253731345e-05, "loss": 0.0023, "step": 20088 }, { "epoch": 18.74, "learning_rate": 4.765718283582089e-05, "loss": 0.0005, "step": 20092 }, { "epoch": 18.75, "learning_rate": 4.7656716417910455e-05, "loss": 0.0006, "step": 20096 }, { "epoch": 18.75, "learning_rate": 4.765625e-05, "loss": 0.0026, "step": 20100 }, { "epoch": 18.75, "learning_rate": 4.765578358208955e-05, "loss": 0.0002, "step": 20104 }, { "epoch": 18.76, "learning_rate": 4.7655317164179106e-05, "loss": 0.0031, "step": 20108 }, { "epoch": 18.76, "learning_rate": 4.765485074626866e-05, "loss": 0.0003, "step": 20112 }, { "epoch": 18.76, "learning_rate": 4.765438432835821e-05, "loss": 0.0017, "step": 20116 }, { "epoch": 18.77, "learning_rate": 4.7653917910447764e-05, "loss": 0.0003, "step": 20120 }, { "epoch": 18.77, "learning_rate": 4.765345149253731e-05, "loss": 0.0011, "step": 20124 }, { "epoch": 18.78, "learning_rate": 4.765298507462687e-05, "loss": 0.0013, "step": 20128 }, { "epoch": 18.78, "learning_rate": 4.765251865671642e-05, "loss": 0.0071, "step": 20132 }, { "epoch": 18.78, "learning_rate": 4.765205223880597e-05, "loss": 0.0006, "step": 20136 }, { "epoch": 18.79, "learning_rate": 4.7651585820895525e-05, "loss": 0.0009, "step": 20140 }, { "epoch": 18.79, "learning_rate": 4.765111940298508e-05, "loss": 0.0018, "step": 20144 }, { "epoch": 18.79, "learning_rate": 4.765065298507463e-05, "loss": 0.0144, "step": 20148 }, { "epoch": 18.8, "learning_rate": 4.7650186567164176e-05, "loss": 0.0011, "step": 20152 }, { "epoch": 18.8, "learning_rate": 4.764972014925374e-05, "loss": 0.0033, "step": 20156 }, { "epoch": 18.81, "learning_rate": 4.7649253731343286e-05, "loss": 0.0064, "step": 20160 }, { "epoch": 18.81, "learning_rate": 4.7648787313432834e-05, "loss": 0.0016, "step": 20164 }, { "epoch": 18.81, "learning_rate": 4.764832089552239e-05, "loss": 0.0014, "step": 20168 }, { "epoch": 18.82, "learning_rate": 4.7647854477611944e-05, "loss": 0.0002, "step": 20172 }, { "epoch": 18.82, "learning_rate": 4.764738805970149e-05, "loss": 0.0019, "step": 20176 }, { "epoch": 18.82, "learning_rate": 4.764692164179105e-05, "loss": 0.0024, "step": 20180 }, { "epoch": 18.83, "learning_rate": 4.7646455223880595e-05, "loss": 0.005, "step": 20184 }, { "epoch": 18.83, "learning_rate": 4.764598880597016e-05, "loss": 0.0004, "step": 20188 }, { "epoch": 18.84, "learning_rate": 4.7645522388059705e-05, "loss": 0.0003, "step": 20192 }, { "epoch": 18.84, "learning_rate": 4.764505597014925e-05, "loss": 0.0008, "step": 20196 }, { "epoch": 18.84, "learning_rate": 4.764458955223881e-05, "loss": 0.0009, "step": 20200 }, { "epoch": 18.85, "learning_rate": 4.764412313432836e-05, "loss": 0.0004, "step": 20204 }, { "epoch": 18.85, "learning_rate": 4.764365671641791e-05, "loss": 0.0062, "step": 20208 }, { "epoch": 18.85, "learning_rate": 4.7643190298507466e-05, "loss": 0.0042, "step": 20212 }, { "epoch": 18.86, "learning_rate": 4.764272388059702e-05, "loss": 0.0016, "step": 20216 }, { "epoch": 18.86, "learning_rate": 4.764225746268657e-05, "loss": 0.0016, "step": 20220 }, { "epoch": 18.87, "learning_rate": 4.7641791044776124e-05, "loss": 0.0009, "step": 20224 }, { "epoch": 18.87, "learning_rate": 4.764132462686567e-05, "loss": 0.0018, "step": 20228 }, { "epoch": 18.87, "learning_rate": 4.764085820895523e-05, "loss": 0.0009, "step": 20232 }, { "epoch": 18.88, "learning_rate": 4.764039179104478e-05, "loss": 0.0021, "step": 20236 }, { "epoch": 18.88, "learning_rate": 4.763992537313433e-05, "loss": 0.0036, "step": 20240 }, { "epoch": 18.88, "learning_rate": 4.763945895522388e-05, "loss": 0.0006, "step": 20244 }, { "epoch": 18.89, "learning_rate": 4.763899253731344e-05, "loss": 0.0013, "step": 20248 }, { "epoch": 18.89, "learning_rate": 4.763852611940299e-05, "loss": 0.0019, "step": 20252 }, { "epoch": 18.9, "learning_rate": 4.7638059701492536e-05, "loss": 0.0003, "step": 20256 }, { "epoch": 18.9, "learning_rate": 4.763759328358209e-05, "loss": 0.0237, "step": 20260 }, { "epoch": 18.9, "learning_rate": 4.7637126865671646e-05, "loss": 0.0001, "step": 20264 }, { "epoch": 18.91, "learning_rate": 4.7636660447761194e-05, "loss": 0.0005, "step": 20268 }, { "epoch": 18.91, "learning_rate": 4.763619402985075e-05, "loss": 0.0016, "step": 20272 }, { "epoch": 18.91, "learning_rate": 4.7635727611940304e-05, "loss": 0.0026, "step": 20276 }, { "epoch": 18.92, "learning_rate": 4.763526119402985e-05, "loss": 0.0013, "step": 20280 }, { "epoch": 18.92, "learning_rate": 4.763479477611941e-05, "loss": 0.0007, "step": 20284 }, { "epoch": 18.93, "learning_rate": 4.7634328358208955e-05, "loss": 0.0037, "step": 20288 }, { "epoch": 18.93, "learning_rate": 4.763386194029851e-05, "loss": 0.0016, "step": 20292 }, { "epoch": 18.93, "learning_rate": 4.7633395522388065e-05, "loss": 0.001, "step": 20296 }, { "epoch": 18.94, "learning_rate": 4.763292910447761e-05, "loss": 0.0004, "step": 20300 }, { "epoch": 18.94, "learning_rate": 4.763246268656716e-05, "loss": 0.0015, "step": 20304 }, { "epoch": 18.94, "learning_rate": 4.763199626865672e-05, "loss": 0.0057, "step": 20308 }, { "epoch": 18.95, "learning_rate": 4.763152985074627e-05, "loss": 0.0001, "step": 20312 }, { "epoch": 18.95, "learning_rate": 4.763106343283582e-05, "loss": 0.0113, "step": 20316 }, { "epoch": 18.96, "learning_rate": 4.7630597014925374e-05, "loss": 0.0017, "step": 20320 }, { "epoch": 18.96, "learning_rate": 4.763013059701493e-05, "loss": 0.0024, "step": 20324 }, { "epoch": 18.96, "learning_rate": 4.762966417910448e-05, "loss": 0.0005, "step": 20328 }, { "epoch": 18.97, "learning_rate": 4.762919776119403e-05, "loss": 0.0051, "step": 20332 }, { "epoch": 18.97, "learning_rate": 4.762873134328359e-05, "loss": 0.0017, "step": 20336 }, { "epoch": 18.97, "learning_rate": 4.7628264925373135e-05, "loss": 0.0002, "step": 20340 }, { "epoch": 18.98, "learning_rate": 4.762779850746269e-05, "loss": 0.0024, "step": 20344 }, { "epoch": 18.98, "learning_rate": 4.762733208955224e-05, "loss": 0.007, "step": 20348 }, { "epoch": 18.98, "learning_rate": 4.762686567164179e-05, "loss": 0.0002, "step": 20352 }, { "epoch": 18.99, "learning_rate": 4.762639925373135e-05, "loss": 0.0005, "step": 20356 }, { "epoch": 18.99, "learning_rate": 4.7625932835820896e-05, "loss": 0.0012, "step": 20360 }, { "epoch": 19.0, "learning_rate": 4.762546641791045e-05, "loss": 0.0012, "step": 20364 }, { "epoch": 19.0, "learning_rate": 4.7625000000000006e-05, "loss": 0.0002, "step": 20368 }, { "epoch": 19.0, "learning_rate": 4.7624533582089554e-05, "loss": 0.0021, "step": 20372 }, { "epoch": 19.01, "learning_rate": 4.762406716417911e-05, "loss": 0.0027, "step": 20376 }, { "epoch": 19.01, "learning_rate": 4.762360074626866e-05, "loss": 0.0007, "step": 20380 }, { "epoch": 19.01, "learning_rate": 4.762313432835821e-05, "loss": 0.004, "step": 20384 }, { "epoch": 19.02, "learning_rate": 4.762266791044777e-05, "loss": 0.0004, "step": 20388 }, { "epoch": 19.02, "learning_rate": 4.7622201492537315e-05, "loss": 0.0001, "step": 20392 }, { "epoch": 19.03, "learning_rate": 4.762173507462687e-05, "loss": 0.0009, "step": 20396 }, { "epoch": 19.03, "learning_rate": 4.7621268656716425e-05, "loss": 0.0089, "step": 20400 }, { "epoch": 19.03, "learning_rate": 4.762080223880597e-05, "loss": 0.0002, "step": 20404 }, { "epoch": 19.04, "learning_rate": 4.762033582089552e-05, "loss": 0.0019, "step": 20408 }, { "epoch": 19.04, "learning_rate": 4.7619869402985076e-05, "loss": 0.0003, "step": 20412 }, { "epoch": 19.04, "learning_rate": 4.761940298507463e-05, "loss": 0.0013, "step": 20416 }, { "epoch": 19.05, "learning_rate": 4.761893656716418e-05, "loss": 0.0034, "step": 20420 }, { "epoch": 19.05, "learning_rate": 4.7618470149253734e-05, "loss": 0.0009, "step": 20424 }, { "epoch": 19.06, "learning_rate": 4.761800373134329e-05, "loss": 0.0014, "step": 20428 }, { "epoch": 19.06, "learning_rate": 4.761753731343284e-05, "loss": 0.0013, "step": 20432 }, { "epoch": 19.06, "learning_rate": 4.761707089552239e-05, "loss": 0.0007, "step": 20436 }, { "epoch": 19.07, "learning_rate": 4.761660447761194e-05, "loss": 0.0006, "step": 20440 }, { "epoch": 19.07, "learning_rate": 4.7616138059701495e-05, "loss": 0.0049, "step": 20444 }, { "epoch": 19.07, "learning_rate": 4.761567164179105e-05, "loss": 0.0066, "step": 20448 }, { "epoch": 19.08, "learning_rate": 4.76152052238806e-05, "loss": 0.0008, "step": 20452 }, { "epoch": 19.08, "learning_rate": 4.761473880597015e-05, "loss": 0.0011, "step": 20456 }, { "epoch": 19.09, "learning_rate": 4.761427238805971e-05, "loss": 0.0002, "step": 20460 }, { "epoch": 19.09, "learning_rate": 4.7613805970149256e-05, "loss": 0.0002, "step": 20464 }, { "epoch": 19.09, "learning_rate": 4.7613339552238804e-05, "loss": 0.0, "step": 20468 }, { "epoch": 19.1, "learning_rate": 4.761287313432836e-05, "loss": 0.0014, "step": 20472 }, { "epoch": 19.1, "learning_rate": 4.7612406716417914e-05, "loss": 0.0011, "step": 20476 }, { "epoch": 19.1, "learning_rate": 4.761194029850746e-05, "loss": 0.0005, "step": 20480 }, { "epoch": 19.11, "learning_rate": 4.761147388059702e-05, "loss": 0.0002, "step": 20484 }, { "epoch": 19.11, "learning_rate": 4.761100746268657e-05, "loss": 0.0027, "step": 20488 }, { "epoch": 19.12, "learning_rate": 4.761054104477612e-05, "loss": 0.0006, "step": 20492 }, { "epoch": 19.12, "learning_rate": 4.7610074626865675e-05, "loss": 0.0019, "step": 20496 }, { "epoch": 19.12, "learning_rate": 4.760960820895522e-05, "loss": 0.0001, "step": 20500 }, { "epoch": 19.12, "eval_exact_match": 0.7340425531914894, "eval_exec": 0.7601547388781431, "eval_loss": 0.35038140416145325, "eval_runtime": 1096.2977, "eval_samples_per_second": 0.943, "step": 20500 }, { "epoch": 19.13, "learning_rate": 4.760914179104478e-05, "loss": 0.001, "step": 20504 }, { "epoch": 19.13, "learning_rate": 4.760867537313433e-05, "loss": 0.0015, "step": 20508 }, { "epoch": 19.13, "learning_rate": 4.760820895522388e-05, "loss": 0.0236, "step": 20512 }, { "epoch": 19.14, "learning_rate": 4.7607742537313436e-05, "loss": 0.0016, "step": 20516 }, { "epoch": 19.14, "learning_rate": 4.760727611940299e-05, "loss": 0.0006, "step": 20520 }, { "epoch": 19.15, "learning_rate": 4.760680970149254e-05, "loss": 0.0004, "step": 20524 }, { "epoch": 19.15, "learning_rate": 4.7606343283582094e-05, "loss": 0.0001, "step": 20528 }, { "epoch": 19.15, "learning_rate": 4.760587686567164e-05, "loss": 0.0006, "step": 20532 }, { "epoch": 19.16, "learning_rate": 4.7605410447761197e-05, "loss": 0.0021, "step": 20536 }, { "epoch": 19.16, "learning_rate": 4.760494402985075e-05, "loss": 0.0039, "step": 20540 }, { "epoch": 19.16, "learning_rate": 4.76044776119403e-05, "loss": 0.0032, "step": 20544 }, { "epoch": 19.17, "learning_rate": 4.7604011194029855e-05, "loss": 0.0026, "step": 20548 }, { "epoch": 19.17, "learning_rate": 4.760354477611941e-05, "loss": 0.0011, "step": 20552 }, { "epoch": 19.18, "learning_rate": 4.760307835820896e-05, "loss": 0.0015, "step": 20556 }, { "epoch": 19.18, "learning_rate": 4.7602611940298506e-05, "loss": 0.0033, "step": 20560 }, { "epoch": 19.18, "learning_rate": 4.760214552238807e-05, "loss": 0.0002, "step": 20564 }, { "epoch": 19.19, "learning_rate": 4.7601679104477616e-05, "loss": 0.002, "step": 20568 }, { "epoch": 19.19, "learning_rate": 4.7601212686567164e-05, "loss": 0.009, "step": 20572 }, { "epoch": 19.19, "learning_rate": 4.760074626865672e-05, "loss": 0.0006, "step": 20576 }, { "epoch": 19.2, "learning_rate": 4.7600279850746273e-05, "loss": 0.0002, "step": 20580 }, { "epoch": 19.2, "learning_rate": 4.759981343283582e-05, "loss": 0.0012, "step": 20584 }, { "epoch": 19.21, "learning_rate": 4.7599347014925376e-05, "loss": 0.0019, "step": 20588 }, { "epoch": 19.21, "learning_rate": 4.7598880597014925e-05, "loss": 0.0005, "step": 20592 }, { "epoch": 19.21, "learning_rate": 4.759841417910448e-05, "loss": 0.0133, "step": 20596 }, { "epoch": 19.22, "learning_rate": 4.7597947761194034e-05, "loss": 0.0001, "step": 20600 }, { "epoch": 19.22, "learning_rate": 4.759748134328358e-05, "loss": 0.0013, "step": 20604 }, { "epoch": 19.22, "learning_rate": 4.759701492537314e-05, "loss": 0.0002, "step": 20608 }, { "epoch": 19.23, "learning_rate": 4.759654850746269e-05, "loss": 0.0044, "step": 20612 }, { "epoch": 19.23, "learning_rate": 4.759608208955224e-05, "loss": 0.0002, "step": 20616 }, { "epoch": 19.24, "learning_rate": 4.759561567164179e-05, "loss": 0.0007, "step": 20620 }, { "epoch": 19.24, "learning_rate": 4.759514925373135e-05, "loss": 0.0018, "step": 20624 }, { "epoch": 19.24, "learning_rate": 4.75946828358209e-05, "loss": 0.0061, "step": 20628 }, { "epoch": 19.25, "learning_rate": 4.7594216417910447e-05, "loss": 0.0014, "step": 20632 }, { "epoch": 19.25, "learning_rate": 4.759375e-05, "loss": 0.0011, "step": 20636 }, { "epoch": 19.25, "learning_rate": 4.7593283582089556e-05, "loss": 0.0002, "step": 20640 }, { "epoch": 19.26, "learning_rate": 4.7592817164179105e-05, "loss": 0.0028, "step": 20644 }, { "epoch": 19.26, "learning_rate": 4.759235074626866e-05, "loss": 0.0021, "step": 20648 }, { "epoch": 19.26, "learning_rate": 4.759188432835821e-05, "loss": 0.0002, "step": 20652 }, { "epoch": 19.27, "learning_rate": 4.759141791044776e-05, "loss": 0.0008, "step": 20656 }, { "epoch": 19.27, "learning_rate": 4.759095149253732e-05, "loss": 0.0021, "step": 20660 }, { "epoch": 19.28, "learning_rate": 4.7590485074626866e-05, "loss": 0.0035, "step": 20664 }, { "epoch": 19.28, "learning_rate": 4.759001865671642e-05, "loss": 0.0002, "step": 20668 }, { "epoch": 19.28, "learning_rate": 4.7589552238805975e-05, "loss": 0.0004, "step": 20672 }, { "epoch": 19.29, "learning_rate": 4.7589085820895523e-05, "loss": 0.0007, "step": 20676 }, { "epoch": 19.29, "learning_rate": 4.758861940298508e-05, "loss": 0.0005, "step": 20680 }, { "epoch": 19.29, "learning_rate": 4.758815298507463e-05, "loss": 0.0002, "step": 20684 }, { "epoch": 19.3, "learning_rate": 4.758768656716418e-05, "loss": 0.0005, "step": 20688 }, { "epoch": 19.3, "learning_rate": 4.7587220149253736e-05, "loss": 0.0001, "step": 20692 }, { "epoch": 19.31, "learning_rate": 4.7586753731343284e-05, "loss": 0.0017, "step": 20696 }, { "epoch": 19.31, "learning_rate": 4.758628731343284e-05, "loss": 0.0006, "step": 20700 }, { "epoch": 19.31, "learning_rate": 4.7585820895522394e-05, "loss": 0.0003, "step": 20704 }, { "epoch": 19.32, "learning_rate": 4.758535447761194e-05, "loss": 0.0007, "step": 20708 }, { "epoch": 19.32, "learning_rate": 4.758488805970149e-05, "loss": 0.0018, "step": 20712 }, { "epoch": 19.32, "learning_rate": 4.758442164179105e-05, "loss": 0.0003, "step": 20716 }, { "epoch": 19.33, "learning_rate": 4.75839552238806e-05, "loss": 0.0039, "step": 20720 }, { "epoch": 19.33, "learning_rate": 4.758348880597015e-05, "loss": 0.002, "step": 20724 }, { "epoch": 19.34, "learning_rate": 4.7583022388059703e-05, "loss": 0.0006, "step": 20728 }, { "epoch": 19.34, "learning_rate": 4.758255597014926e-05, "loss": 0.0009, "step": 20732 }, { "epoch": 19.34, "learning_rate": 4.7582089552238806e-05, "loss": 0.0011, "step": 20736 }, { "epoch": 19.35, "learning_rate": 4.758162313432836e-05, "loss": 0.0001, "step": 20740 }, { "epoch": 19.35, "learning_rate": 4.758115671641791e-05, "loss": 0.0014, "step": 20744 }, { "epoch": 19.35, "learning_rate": 4.7580690298507464e-05, "loss": 0.0003, "step": 20748 }, { "epoch": 19.36, "learning_rate": 4.758022388059702e-05, "loss": 0.0003, "step": 20752 }, { "epoch": 19.36, "learning_rate": 4.757975746268657e-05, "loss": 0.0028, "step": 20756 }, { "epoch": 19.37, "learning_rate": 4.757929104477612e-05, "loss": 0.0005, "step": 20760 }, { "epoch": 19.37, "learning_rate": 4.757882462686568e-05, "loss": 0.0004, "step": 20764 }, { "epoch": 19.37, "learning_rate": 4.7578358208955225e-05, "loss": 0.0011, "step": 20768 }, { "epoch": 19.38, "learning_rate": 4.7577891791044773e-05, "loss": 0.0003, "step": 20772 }, { "epoch": 19.38, "learning_rate": 4.7577425373134335e-05, "loss": 0.0002, "step": 20776 }, { "epoch": 19.38, "learning_rate": 4.757695895522388e-05, "loss": 0.0001, "step": 20780 }, { "epoch": 19.39, "learning_rate": 4.757649253731343e-05, "loss": 0.0002, "step": 20784 }, { "epoch": 19.39, "learning_rate": 4.7576026119402986e-05, "loss": 0.003, "step": 20788 }, { "epoch": 19.4, "learning_rate": 4.757555970149254e-05, "loss": 0.0002, "step": 20792 }, { "epoch": 19.4, "learning_rate": 4.757509328358209e-05, "loss": 0.0017, "step": 20796 }, { "epoch": 19.4, "learning_rate": 4.7574626865671644e-05, "loss": 0.0032, "step": 20800 }, { "epoch": 19.41, "learning_rate": 4.757416044776119e-05, "loss": 0.0023, "step": 20804 }, { "epoch": 19.41, "learning_rate": 4.757369402985075e-05, "loss": 0.0003, "step": 20808 }, { "epoch": 19.41, "learning_rate": 4.75732276119403e-05, "loss": 0.0037, "step": 20812 }, { "epoch": 19.42, "learning_rate": 4.757276119402985e-05, "loss": 0.0008, "step": 20816 }, { "epoch": 19.42, "learning_rate": 4.7572294776119405e-05, "loss": 0.0021, "step": 20820 }, { "epoch": 19.43, "learning_rate": 4.757182835820896e-05, "loss": 0.0005, "step": 20824 }, { "epoch": 19.43, "learning_rate": 4.757136194029851e-05, "loss": 0.0001, "step": 20828 }, { "epoch": 19.43, "learning_rate": 4.7570895522388056e-05, "loss": 0.0001, "step": 20832 }, { "epoch": 19.44, "learning_rate": 4.757042910447762e-05, "loss": 0.0005, "step": 20836 }, { "epoch": 19.44, "learning_rate": 4.7569962686567166e-05, "loss": 0.002, "step": 20840 }, { "epoch": 19.44, "learning_rate": 4.756949626865672e-05, "loss": 0.0021, "step": 20844 }, { "epoch": 19.45, "learning_rate": 4.756902985074627e-05, "loss": 0.0001, "step": 20848 }, { "epoch": 19.45, "learning_rate": 4.7568563432835824e-05, "loss": 0.0032, "step": 20852 }, { "epoch": 19.46, "learning_rate": 4.756809701492538e-05, "loss": 0.0003, "step": 20856 }, { "epoch": 19.46, "learning_rate": 4.756763059701493e-05, "loss": 0.0038, "step": 20860 }, { "epoch": 19.46, "learning_rate": 4.7567164179104475e-05, "loss": 0.0087, "step": 20864 }, { "epoch": 19.47, "learning_rate": 4.756669776119404e-05, "loss": 0.0025, "step": 20868 }, { "epoch": 19.47, "learning_rate": 4.7566231343283585e-05, "loss": 0.0016, "step": 20872 }, { "epoch": 19.47, "learning_rate": 4.756576492537313e-05, "loss": 0.0013, "step": 20876 }, { "epoch": 19.48, "learning_rate": 4.756529850746269e-05, "loss": 0.0016, "step": 20880 }, { "epoch": 19.48, "learning_rate": 4.756483208955224e-05, "loss": 0.0001, "step": 20884 }, { "epoch": 19.49, "learning_rate": 4.756436567164179e-05, "loss": 0.0016, "step": 20888 }, { "epoch": 19.49, "learning_rate": 4.7563899253731346e-05, "loss": 0.0056, "step": 20892 }, { "epoch": 19.49, "learning_rate": 4.75634328358209e-05, "loss": 0.0002, "step": 20896 }, { "epoch": 19.5, "learning_rate": 4.756296641791045e-05, "loss": 0.0001, "step": 20900 }, { "epoch": 19.5, "learning_rate": 4.7562500000000004e-05, "loss": 0.0047, "step": 20904 }, { "epoch": 19.5, "learning_rate": 4.756203358208955e-05, "loss": 0.0059, "step": 20908 }, { "epoch": 19.51, "learning_rate": 4.756156716417911e-05, "loss": 0.0003, "step": 20912 }, { "epoch": 19.51, "learning_rate": 4.756110074626866e-05, "loss": 0.0013, "step": 20916 }, { "epoch": 19.51, "learning_rate": 4.756063432835821e-05, "loss": 0.0007, "step": 20920 }, { "epoch": 19.52, "learning_rate": 4.756016791044776e-05, "loss": 0.0058, "step": 20924 }, { "epoch": 19.52, "learning_rate": 4.755970149253732e-05, "loss": 0.0018, "step": 20928 }, { "epoch": 19.53, "learning_rate": 4.755923507462687e-05, "loss": 0.0001, "step": 20932 }, { "epoch": 19.53, "learning_rate": 4.7558768656716416e-05, "loss": 0.0082, "step": 20936 }, { "epoch": 19.53, "learning_rate": 4.755830223880597e-05, "loss": 0.0066, "step": 20940 }, { "epoch": 19.54, "learning_rate": 4.7557835820895526e-05, "loss": 0.0064, "step": 20944 }, { "epoch": 19.54, "learning_rate": 4.7557369402985074e-05, "loss": 0.0015, "step": 20948 }, { "epoch": 19.54, "learning_rate": 4.755690298507463e-05, "loss": 0.0042, "step": 20952 }, { "epoch": 19.55, "learning_rate": 4.7556436567164184e-05, "loss": 0.0006, "step": 20956 }, { "epoch": 19.55, "learning_rate": 4.755597014925373e-05, "loss": 0.0001, "step": 20960 }, { "epoch": 19.56, "learning_rate": 4.755550373134329e-05, "loss": 0.0042, "step": 20964 }, { "epoch": 19.56, "learning_rate": 4.7555037313432835e-05, "loss": 0.0001, "step": 20968 }, { "epoch": 19.56, "learning_rate": 4.755457089552239e-05, "loss": 0.0044, "step": 20972 }, { "epoch": 19.57, "learning_rate": 4.7554104477611945e-05, "loss": 0.0087, "step": 20976 }, { "epoch": 19.57, "learning_rate": 4.755363805970149e-05, "loss": 0.0023, "step": 20980 }, { "epoch": 19.57, "learning_rate": 4.755317164179104e-05, "loss": 0.0004, "step": 20984 }, { "epoch": 19.58, "learning_rate": 4.75527052238806e-05, "loss": 0.0004, "step": 20988 }, { "epoch": 19.58, "learning_rate": 4.755223880597015e-05, "loss": 0.0007, "step": 20992 }, { "epoch": 19.59, "learning_rate": 4.75517723880597e-05, "loss": 0.0063, "step": 20996 }, { "epoch": 19.59, "learning_rate": 4.7551305970149254e-05, "loss": 0.0003, "step": 21000 }, { "epoch": 19.59, "eval_exact_match": 0.7253384912959381, "eval_exec": 0.7553191489361702, "eval_loss": 0.33756983280181885, "eval_runtime": 1072.0397, "eval_samples_per_second": 0.965, "step": 21000 }, { "epoch": 19.59, "learning_rate": 4.755083955223881e-05, "loss": 0.0037, "step": 21004 }, { "epoch": 19.6, "learning_rate": 4.7550373134328364e-05, "loss": 0.0005, "step": 21008 }, { "epoch": 19.6, "learning_rate": 4.754990671641791e-05, "loss": 0.0002, "step": 21012 }, { "epoch": 19.6, "learning_rate": 4.754944029850747e-05, "loss": 0.0008, "step": 21016 }, { "epoch": 19.61, "learning_rate": 4.754897388059702e-05, "loss": 0.0061, "step": 21020 }, { "epoch": 19.61, "learning_rate": 4.754850746268657e-05, "loss": 0.0014, "step": 21024 }, { "epoch": 19.62, "learning_rate": 4.754804104477612e-05, "loss": 0.0056, "step": 21028 }, { "epoch": 19.62, "learning_rate": 4.754757462686567e-05, "loss": 0.0009, "step": 21032 }, { "epoch": 19.62, "learning_rate": 4.754710820895523e-05, "loss": 0.0099, "step": 21036 }, { "epoch": 19.63, "learning_rate": 4.7546641791044776e-05, "loss": 0.0005, "step": 21040 }, { "epoch": 19.63, "learning_rate": 4.754617537313433e-05, "loss": 0.0065, "step": 21044 }, { "epoch": 19.63, "learning_rate": 4.7545708955223886e-05, "loss": 0.0012, "step": 21048 }, { "epoch": 19.64, "learning_rate": 4.7545242537313434e-05, "loss": 0.0002, "step": 21052 }, { "epoch": 19.64, "learning_rate": 4.754477611940299e-05, "loss": 0.0021, "step": 21056 }, { "epoch": 19.65, "learning_rate": 4.754430970149254e-05, "loss": 0.0005, "step": 21060 }, { "epoch": 19.65, "learning_rate": 4.754384328358209e-05, "loss": 0.0007, "step": 21064 }, { "epoch": 19.65, "learning_rate": 4.754337686567165e-05, "loss": 0.0002, "step": 21068 }, { "epoch": 19.66, "learning_rate": 4.7542910447761195e-05, "loss": 0.0032, "step": 21072 }, { "epoch": 19.66, "learning_rate": 4.754244402985075e-05, "loss": 0.0073, "step": 21076 }, { "epoch": 19.66, "learning_rate": 4.7541977611940305e-05, "loss": 0.0034, "step": 21080 }, { "epoch": 19.67, "learning_rate": 4.754151119402985e-05, "loss": 0.0011, "step": 21084 }, { "epoch": 19.67, "learning_rate": 4.75410447761194e-05, "loss": 0.0085, "step": 21088 }, { "epoch": 19.68, "learning_rate": 4.7540578358208956e-05, "loss": 0.0041, "step": 21092 }, { "epoch": 19.68, "learning_rate": 4.754011194029851e-05, "loss": 0.0002, "step": 21096 }, { "epoch": 19.68, "learning_rate": 4.753964552238806e-05, "loss": 0.0033, "step": 21100 }, { "epoch": 19.69, "learning_rate": 4.7539179104477614e-05, "loss": 0.0009, "step": 21104 }, { "epoch": 19.69, "learning_rate": 4.753871268656717e-05, "loss": 0.0004, "step": 21108 }, { "epoch": 19.69, "learning_rate": 4.753824626865672e-05, "loss": 0.0017, "step": 21112 }, { "epoch": 19.7, "learning_rate": 4.753777985074627e-05, "loss": 0.0024, "step": 21116 }, { "epoch": 19.7, "learning_rate": 4.753731343283582e-05, "loss": 0.0003, "step": 21120 }, { "epoch": 19.71, "learning_rate": 4.7536847014925375e-05, "loss": 0.0007, "step": 21124 }, { "epoch": 19.71, "learning_rate": 4.753638059701493e-05, "loss": 0.0053, "step": 21128 }, { "epoch": 19.71, "learning_rate": 4.753591417910448e-05, "loss": 0.0009, "step": 21132 }, { "epoch": 19.72, "learning_rate": 4.753544776119403e-05, "loss": 0.0008, "step": 21136 }, { "epoch": 19.72, "learning_rate": 4.753498134328359e-05, "loss": 0.0035, "step": 21140 }, { "epoch": 19.72, "learning_rate": 4.7534514925373136e-05, "loss": 0.0008, "step": 21144 }, { "epoch": 19.73, "learning_rate": 4.7534048507462684e-05, "loss": 0.0029, "step": 21148 }, { "epoch": 19.73, "learning_rate": 4.753358208955224e-05, "loss": 0.0089, "step": 21152 }, { "epoch": 19.73, "learning_rate": 4.7533115671641794e-05, "loss": 0.0034, "step": 21156 }, { "epoch": 19.74, "learning_rate": 4.753264925373134e-05, "loss": 0.0016, "step": 21160 }, { "epoch": 19.74, "learning_rate": 4.75321828358209e-05, "loss": 0.0006, "step": 21164 }, { "epoch": 19.75, "learning_rate": 4.753171641791045e-05, "loss": 0.0003, "step": 21168 }, { "epoch": 19.75, "learning_rate": 4.753125000000001e-05, "loss": 0.0036, "step": 21172 }, { "epoch": 19.75, "learning_rate": 4.7530783582089555e-05, "loss": 0.0001, "step": 21176 }, { "epoch": 19.76, "learning_rate": 4.75303171641791e-05, "loss": 0.0003, "step": 21180 }, { "epoch": 19.76, "learning_rate": 4.7529850746268665e-05, "loss": 0.001, "step": 21184 }, { "epoch": 19.76, "learning_rate": 4.752938432835821e-05, "loss": 0.0008, "step": 21188 }, { "epoch": 19.77, "learning_rate": 4.752891791044776e-05, "loss": 0.0002, "step": 21192 }, { "epoch": 19.77, "learning_rate": 4.7528451492537316e-05, "loss": 0.0001, "step": 21196 }, { "epoch": 19.78, "learning_rate": 4.752798507462687e-05, "loss": 0.0001, "step": 21200 }, { "epoch": 19.78, "learning_rate": 4.752751865671642e-05, "loss": 0.0011, "step": 21204 }, { "epoch": 19.78, "learning_rate": 4.7527052238805974e-05, "loss": 0.0002, "step": 21208 }, { "epoch": 19.79, "learning_rate": 4.752658582089552e-05, "loss": 0.0024, "step": 21212 }, { "epoch": 19.79, "learning_rate": 4.752611940298508e-05, "loss": 0.0023, "step": 21216 }, { "epoch": 19.79, "learning_rate": 4.752565298507463e-05, "loss": 0.0044, "step": 21220 }, { "epoch": 19.8, "learning_rate": 4.752518656716418e-05, "loss": 0.001, "step": 21224 }, { "epoch": 19.8, "learning_rate": 4.7524720149253735e-05, "loss": 0.001, "step": 21228 }, { "epoch": 19.81, "learning_rate": 4.752425373134329e-05, "loss": 0.0075, "step": 21232 }, { "epoch": 19.81, "learning_rate": 4.752378731343284e-05, "loss": 0.0005, "step": 21236 }, { "epoch": 19.81, "learning_rate": 4.7523320895522386e-05, "loss": 0.0036, "step": 21240 }, { "epoch": 19.82, "learning_rate": 4.752285447761195e-05, "loss": 0.0015, "step": 21244 }, { "epoch": 19.82, "learning_rate": 4.7522388059701496e-05, "loss": 0.0062, "step": 21248 }, { "epoch": 19.82, "learning_rate": 4.7521921641791044e-05, "loss": 0.0002, "step": 21252 }, { "epoch": 19.83, "learning_rate": 4.75214552238806e-05, "loss": 0.0007, "step": 21256 }, { "epoch": 19.83, "learning_rate": 4.7520988805970154e-05, "loss": 0.0002, "step": 21260 }, { "epoch": 19.84, "learning_rate": 4.75205223880597e-05, "loss": 0.0016, "step": 21264 }, { "epoch": 19.84, "learning_rate": 4.752005597014926e-05, "loss": 0.0025, "step": 21268 }, { "epoch": 19.84, "learning_rate": 4.7519589552238805e-05, "loss": 0.0002, "step": 21272 }, { "epoch": 19.85, "learning_rate": 4.751912313432836e-05, "loss": 0.0004, "step": 21276 }, { "epoch": 19.85, "learning_rate": 4.7518656716417915e-05, "loss": 0.0004, "step": 21280 }, { "epoch": 19.85, "learning_rate": 4.751819029850746e-05, "loss": 0.0002, "step": 21284 }, { "epoch": 19.86, "learning_rate": 4.751772388059702e-05, "loss": 0.0125, "step": 21288 }, { "epoch": 19.86, "learning_rate": 4.751725746268657e-05, "loss": 0.0006, "step": 21292 }, { "epoch": 19.87, "learning_rate": 4.751679104477612e-05, "loss": 0.0034, "step": 21296 }, { "epoch": 19.87, "learning_rate": 4.751632462686567e-05, "loss": 0.011, "step": 21300 }, { "epoch": 19.87, "learning_rate": 4.751585820895523e-05, "loss": 0.0026, "step": 21304 }, { "epoch": 19.88, "learning_rate": 4.751539179104478e-05, "loss": 0.0015, "step": 21308 }, { "epoch": 19.88, "learning_rate": 4.751492537313433e-05, "loss": 0.0013, "step": 21312 }, { "epoch": 19.88, "learning_rate": 4.751445895522388e-05, "loss": 0.0002, "step": 21316 }, { "epoch": 19.89, "learning_rate": 4.751399253731344e-05, "loss": 0.0001, "step": 21320 }, { "epoch": 19.89, "learning_rate": 4.7513526119402985e-05, "loss": 0.0036, "step": 21324 }, { "epoch": 19.9, "learning_rate": 4.751305970149254e-05, "loss": 0.0013, "step": 21328 }, { "epoch": 19.9, "learning_rate": 4.751259328358209e-05, "loss": 0.0009, "step": 21332 }, { "epoch": 19.9, "learning_rate": 4.751212686567165e-05, "loss": 0.0024, "step": 21336 }, { "epoch": 19.91, "learning_rate": 4.75116604477612e-05, "loss": 0.008, "step": 21340 }, { "epoch": 19.91, "learning_rate": 4.7511194029850746e-05, "loss": 0.0018, "step": 21344 }, { "epoch": 19.91, "learning_rate": 4.75107276119403e-05, "loss": 0.0031, "step": 21348 }, { "epoch": 19.92, "learning_rate": 4.7510261194029856e-05, "loss": 0.0002, "step": 21352 }, { "epoch": 19.92, "learning_rate": 4.7509794776119404e-05, "loss": 0.0017, "step": 21356 }, { "epoch": 19.93, "learning_rate": 4.750932835820896e-05, "loss": 0.0004, "step": 21360 }, { "epoch": 19.93, "learning_rate": 4.7508861940298514e-05, "loss": 0.0022, "step": 21364 }, { "epoch": 19.93, "learning_rate": 4.750839552238806e-05, "loss": 0.0005, "step": 21368 }, { "epoch": 19.94, "learning_rate": 4.750792910447762e-05, "loss": 0.0028, "step": 21372 }, { "epoch": 19.94, "learning_rate": 4.7507462686567165e-05, "loss": 0.0007, "step": 21376 }, { "epoch": 19.94, "learning_rate": 4.750699626865672e-05, "loss": 0.0053, "step": 21380 }, { "epoch": 19.95, "learning_rate": 4.7506529850746275e-05, "loss": 0.0003, "step": 21384 }, { "epoch": 19.95, "learning_rate": 4.750606343283582e-05, "loss": 0.0004, "step": 21388 }, { "epoch": 19.96, "learning_rate": 4.750559701492537e-05, "loss": 0.0046, "step": 21392 }, { "epoch": 19.96, "learning_rate": 4.750513059701493e-05, "loss": 0.0003, "step": 21396 }, { "epoch": 19.96, "learning_rate": 4.750466417910448e-05, "loss": 0.0001, "step": 21400 }, { "epoch": 19.97, "learning_rate": 4.750419776119403e-05, "loss": 0.0012, "step": 21404 }, { "epoch": 19.97, "learning_rate": 4.7503731343283584e-05, "loss": 0.0019, "step": 21408 }, { "epoch": 19.97, "learning_rate": 4.750326492537314e-05, "loss": 0.0003, "step": 21412 }, { "epoch": 19.98, "learning_rate": 4.750279850746269e-05, "loss": 0.0007, "step": 21416 }, { "epoch": 19.98, "learning_rate": 4.750233208955224e-05, "loss": 0.0089, "step": 21420 }, { "epoch": 19.98, "learning_rate": 4.750186567164179e-05, "loss": 0.0011, "step": 21424 }, { "epoch": 19.99, "learning_rate": 4.7501399253731345e-05, "loss": 0.0011, "step": 21428 }, { "epoch": 19.99, "learning_rate": 4.75009328358209e-05, "loss": 0.001, "step": 21432 }, { "epoch": 20.0, "learning_rate": 4.750046641791045e-05, "loss": 0.0058, "step": 21436 }, { "epoch": 20.0, "learning_rate": 4.75e-05, "loss": 0.0003, "step": 21440 }, { "epoch": 20.0, "learning_rate": 4.749953358208956e-05, "loss": 0.0001, "step": 21444 }, { "epoch": 20.01, "learning_rate": 4.7499067164179106e-05, "loss": 0.0005, "step": 21448 }, { "epoch": 20.01, "learning_rate": 4.7498600746268654e-05, "loss": 0.0018, "step": 21452 }, { "epoch": 20.01, "learning_rate": 4.7498134328358215e-05, "loss": 0.0005, "step": 21456 }, { "epoch": 20.02, "learning_rate": 4.7497667910447764e-05, "loss": 0.0027, "step": 21460 }, { "epoch": 20.02, "learning_rate": 4.749720149253731e-05, "loss": 0.0005, "step": 21464 }, { "epoch": 20.03, "learning_rate": 4.749673507462687e-05, "loss": 0.0005, "step": 21468 }, { "epoch": 20.03, "learning_rate": 4.749626865671642e-05, "loss": 0.0014, "step": 21472 }, { "epoch": 20.03, "learning_rate": 4.749580223880597e-05, "loss": 0.001, "step": 21476 }, { "epoch": 20.04, "learning_rate": 4.7495335820895525e-05, "loss": 0.0003, "step": 21480 }, { "epoch": 20.04, "learning_rate": 4.749486940298507e-05, "loss": 0.0005, "step": 21484 }, { "epoch": 20.04, "learning_rate": 4.749440298507463e-05, "loss": 0.0003, "step": 21488 }, { "epoch": 20.05, "learning_rate": 4.749393656716418e-05, "loss": 0.0037, "step": 21492 }, { "epoch": 20.05, "learning_rate": 4.749347014925373e-05, "loss": 0.0005, "step": 21496 }, { "epoch": 20.06, "learning_rate": 4.7493003731343286e-05, "loss": 0.0023, "step": 21500 }, { "epoch": 20.06, "eval_exact_match": 0.7224371373307543, "eval_exec": 0.7562862669245648, "eval_loss": 0.34417060017585754, "eval_runtime": 1073.2583, "eval_samples_per_second": 0.963, "step": 21500 }, { "epoch": 20.06, "learning_rate": 4.749253731343284e-05, "loss": 0.0007, "step": 21504 }, { "epoch": 20.06, "learning_rate": 4.749207089552239e-05, "loss": 0.0026, "step": 21508 }, { "epoch": 20.07, "learning_rate": 4.7491604477611944e-05, "loss": 0.0012, "step": 21512 }, { "epoch": 20.07, "learning_rate": 4.74911380597015e-05, "loss": 0.0002, "step": 21516 }, { "epoch": 20.07, "learning_rate": 4.7490671641791047e-05, "loss": 0.0004, "step": 21520 }, { "epoch": 20.08, "learning_rate": 4.74902052238806e-05, "loss": 0.009, "step": 21524 }, { "epoch": 20.08, "learning_rate": 4.748973880597015e-05, "loss": 0.0002, "step": 21528 }, { "epoch": 20.09, "learning_rate": 4.7489272388059705e-05, "loss": 0.0004, "step": 21532 }, { "epoch": 20.09, "learning_rate": 4.748880597014926e-05, "loss": 0.0026, "step": 21536 }, { "epoch": 20.09, "learning_rate": 4.748833955223881e-05, "loss": 0.0007, "step": 21540 }, { "epoch": 20.1, "learning_rate": 4.7487873134328356e-05, "loss": 0.0021, "step": 21544 }, { "epoch": 20.1, "learning_rate": 4.748740671641792e-05, "loss": 0.0003, "step": 21548 }, { "epoch": 20.1, "learning_rate": 4.7486940298507465e-05, "loss": 0.0002, "step": 21552 }, { "epoch": 20.11, "learning_rate": 4.7486473880597014e-05, "loss": 0.0034, "step": 21556 }, { "epoch": 20.11, "learning_rate": 4.748600746268657e-05, "loss": 0.0003, "step": 21560 }, { "epoch": 20.12, "learning_rate": 4.7485541044776123e-05, "loss": 0.0003, "step": 21564 }, { "epoch": 20.12, "learning_rate": 4.748507462686567e-05, "loss": 0.0048, "step": 21568 }, { "epoch": 20.12, "learning_rate": 4.7484608208955226e-05, "loss": 0.0008, "step": 21572 }, { "epoch": 20.13, "learning_rate": 4.748414179104478e-05, "loss": 0.002, "step": 21576 }, { "epoch": 20.13, "learning_rate": 4.748367537313433e-05, "loss": 0.0005, "step": 21580 }, { "epoch": 20.13, "learning_rate": 4.7483208955223884e-05, "loss": 0.0014, "step": 21584 }, { "epoch": 20.14, "learning_rate": 4.748274253731343e-05, "loss": 0.0035, "step": 21588 }, { "epoch": 20.14, "learning_rate": 4.748227611940299e-05, "loss": 0.001, "step": 21592 }, { "epoch": 20.15, "learning_rate": 4.748180970149254e-05, "loss": 0.0008, "step": 21596 }, { "epoch": 20.15, "learning_rate": 4.748134328358209e-05, "loss": 0.0169, "step": 21600 }, { "epoch": 20.15, "learning_rate": 4.748087686567164e-05, "loss": 0.0008, "step": 21604 }, { "epoch": 20.16, "learning_rate": 4.74804104477612e-05, "loss": 0.0003, "step": 21608 }, { "epoch": 20.16, "learning_rate": 4.747994402985075e-05, "loss": 0.0003, "step": 21612 }, { "epoch": 20.16, "learning_rate": 4.7479477611940297e-05, "loss": 0.0005, "step": 21616 }, { "epoch": 20.17, "learning_rate": 4.747901119402985e-05, "loss": 0.0, "step": 21620 }, { "epoch": 20.17, "learning_rate": 4.7478544776119406e-05, "loss": 0.0018, "step": 21624 }, { "epoch": 20.18, "learning_rate": 4.7478078358208955e-05, "loss": 0.0007, "step": 21628 }, { "epoch": 20.18, "learning_rate": 4.747761194029851e-05, "loss": 0.0013, "step": 21632 }, { "epoch": 20.18, "learning_rate": 4.7477145522388064e-05, "loss": 0.0001, "step": 21636 }, { "epoch": 20.19, "learning_rate": 4.747667910447761e-05, "loss": 0.0017, "step": 21640 }, { "epoch": 20.19, "learning_rate": 4.747621268656717e-05, "loss": 0.002, "step": 21644 }, { "epoch": 20.19, "learning_rate": 4.7475746268656716e-05, "loss": 0.0002, "step": 21648 }, { "epoch": 20.2, "learning_rate": 4.747527985074627e-05, "loss": 0.0025, "step": 21652 }, { "epoch": 20.2, "learning_rate": 4.7474813432835825e-05, "loss": 0.0003, "step": 21656 }, { "epoch": 20.21, "learning_rate": 4.7474347014925373e-05, "loss": 0.0018, "step": 21660 }, { "epoch": 20.21, "learning_rate": 4.747388059701493e-05, "loss": 0.0071, "step": 21664 }, { "epoch": 20.21, "learning_rate": 4.747341417910448e-05, "loss": 0.0013, "step": 21668 }, { "epoch": 20.22, "learning_rate": 4.747294776119403e-05, "loss": 0.0014, "step": 21672 }, { "epoch": 20.22, "learning_rate": 4.7472481343283586e-05, "loss": 0.0003, "step": 21676 }, { "epoch": 20.22, "learning_rate": 4.7472014925373134e-05, "loss": 0.0003, "step": 21680 }, { "epoch": 20.23, "learning_rate": 4.747154850746269e-05, "loss": 0.0034, "step": 21684 }, { "epoch": 20.23, "learning_rate": 4.7471082089552244e-05, "loss": 0.0001, "step": 21688 }, { "epoch": 20.24, "learning_rate": 4.747061567164179e-05, "loss": 0.0003, "step": 21692 }, { "epoch": 20.24, "learning_rate": 4.747014925373135e-05, "loss": 0.0041, "step": 21696 }, { "epoch": 20.24, "learning_rate": 4.74696828358209e-05, "loss": 0.0008, "step": 21700 }, { "epoch": 20.25, "learning_rate": 4.746921641791045e-05, "loss": 0.0004, "step": 21704 }, { "epoch": 20.25, "learning_rate": 4.746875e-05, "loss": 0.0005, "step": 21708 }, { "epoch": 20.25, "learning_rate": 4.746828358208955e-05, "loss": 0.0003, "step": 21712 }, { "epoch": 20.26, "learning_rate": 4.746781716417911e-05, "loss": 0.0005, "step": 21716 }, { "epoch": 20.26, "learning_rate": 4.7467350746268656e-05, "loss": 0.0001, "step": 21720 }, { "epoch": 20.26, "learning_rate": 4.746688432835821e-05, "loss": 0.0039, "step": 21724 }, { "epoch": 20.27, "learning_rate": 4.7466417910447766e-05, "loss": 0.0005, "step": 21728 }, { "epoch": 20.27, "learning_rate": 4.7465951492537314e-05, "loss": 0.0046, "step": 21732 }, { "epoch": 20.28, "learning_rate": 4.746548507462687e-05, "loss": 0.0003, "step": 21736 }, { "epoch": 20.28, "learning_rate": 4.746501865671642e-05, "loss": 0.0025, "step": 21740 }, { "epoch": 20.28, "learning_rate": 4.746455223880597e-05, "loss": 0.0016, "step": 21744 }, { "epoch": 20.29, "learning_rate": 4.746408582089553e-05, "loss": 0.0019, "step": 21748 }, { "epoch": 20.29, "learning_rate": 4.7463619402985075e-05, "loss": 0.0016, "step": 21752 }, { "epoch": 20.29, "learning_rate": 4.746315298507463e-05, "loss": 0.0004, "step": 21756 }, { "epoch": 20.3, "learning_rate": 4.7462686567164185e-05, "loss": 0.0004, "step": 21760 }, { "epoch": 20.3, "learning_rate": 4.746222014925373e-05, "loss": 0.0007, "step": 21764 }, { "epoch": 20.31, "learning_rate": 4.746175373134328e-05, "loss": 0.0002, "step": 21768 }, { "epoch": 20.31, "learning_rate": 4.7461287313432836e-05, "loss": 0.0006, "step": 21772 }, { "epoch": 20.31, "learning_rate": 4.746082089552239e-05, "loss": 0.0003, "step": 21776 }, { "epoch": 20.32, "learning_rate": 4.746035447761194e-05, "loss": 0.0005, "step": 21780 }, { "epoch": 20.32, "learning_rate": 4.7459888059701494e-05, "loss": 0.0007, "step": 21784 }, { "epoch": 20.32, "learning_rate": 4.745942164179105e-05, "loss": 0.0005, "step": 21788 }, { "epoch": 20.33, "learning_rate": 4.74589552238806e-05, "loss": 0.0005, "step": 21792 }, { "epoch": 20.33, "learning_rate": 4.745848880597015e-05, "loss": 0.0004, "step": 21796 }, { "epoch": 20.34, "learning_rate": 4.74580223880597e-05, "loss": 0.0003, "step": 21800 }, { "epoch": 20.34, "learning_rate": 4.7457555970149255e-05, "loss": 0.0032, "step": 21804 }, { "epoch": 20.34, "learning_rate": 4.745708955223881e-05, "loss": 0.0004, "step": 21808 }, { "epoch": 20.35, "learning_rate": 4.745662313432836e-05, "loss": 0.0013, "step": 21812 }, { "epoch": 20.35, "learning_rate": 4.745615671641791e-05, "loss": 0.0008, "step": 21816 }, { "epoch": 20.35, "learning_rate": 4.745569029850747e-05, "loss": 0.0047, "step": 21820 }, { "epoch": 20.36, "learning_rate": 4.7455223880597016e-05, "loss": 0.0001, "step": 21824 }, { "epoch": 20.36, "learning_rate": 4.745475746268657e-05, "loss": 0.0012, "step": 21828 }, { "epoch": 20.37, "learning_rate": 4.745429104477612e-05, "loss": 0.0035, "step": 21832 }, { "epoch": 20.37, "learning_rate": 4.7453824626865674e-05, "loss": 0.0052, "step": 21836 }, { "epoch": 20.37, "learning_rate": 4.745335820895523e-05, "loss": 0.0006, "step": 21840 }, { "epoch": 20.38, "learning_rate": 4.745289179104478e-05, "loss": 0.002, "step": 21844 }, { "epoch": 20.38, "learning_rate": 4.745242537313433e-05, "loss": 0.0063, "step": 21848 }, { "epoch": 20.38, "learning_rate": 4.745195895522389e-05, "loss": 0.0005, "step": 21852 }, { "epoch": 20.39, "learning_rate": 4.7451492537313435e-05, "loss": 0.0214, "step": 21856 }, { "epoch": 20.39, "learning_rate": 4.745102611940298e-05, "loss": 0.0038, "step": 21860 }, { "epoch": 20.4, "learning_rate": 4.7450559701492545e-05, "loss": 0.0043, "step": 21864 }, { "epoch": 20.4, "learning_rate": 4.745009328358209e-05, "loss": 0.0023, "step": 21868 }, { "epoch": 20.4, "learning_rate": 4.744962686567164e-05, "loss": 0.0001, "step": 21872 }, { "epoch": 20.41, "learning_rate": 4.7449160447761196e-05, "loss": 0.0002, "step": 21876 }, { "epoch": 20.41, "learning_rate": 4.744869402985075e-05, "loss": 0.0021, "step": 21880 }, { "epoch": 20.41, "learning_rate": 4.74482276119403e-05, "loss": 0.0018, "step": 21884 }, { "epoch": 20.42, "learning_rate": 4.7447761194029854e-05, "loss": 0.0012, "step": 21888 }, { "epoch": 20.42, "learning_rate": 4.74472947761194e-05, "loss": 0.0003, "step": 21892 }, { "epoch": 20.43, "learning_rate": 4.744682835820896e-05, "loss": 0.0016, "step": 21896 }, { "epoch": 20.43, "learning_rate": 4.744636194029851e-05, "loss": 0.0029, "step": 21900 }, { "epoch": 20.43, "learning_rate": 4.744589552238806e-05, "loss": 0.0005, "step": 21904 }, { "epoch": 20.44, "learning_rate": 4.7445429104477615e-05, "loss": 0.0028, "step": 21908 }, { "epoch": 20.44, "learning_rate": 4.744496268656717e-05, "loss": 0.0006, "step": 21912 }, { "epoch": 20.44, "learning_rate": 4.744449626865672e-05, "loss": 0.0002, "step": 21916 }, { "epoch": 20.45, "learning_rate": 4.7444029850746266e-05, "loss": 0.0009, "step": 21920 }, { "epoch": 20.45, "learning_rate": 4.744356343283583e-05, "loss": 0.0019, "step": 21924 }, { "epoch": 20.46, "learning_rate": 4.7443097014925376e-05, "loss": 0.0005, "step": 21928 }, { "epoch": 20.46, "learning_rate": 4.7442630597014924e-05, "loss": 0.0018, "step": 21932 }, { "epoch": 20.46, "learning_rate": 4.744216417910448e-05, "loss": 0.0024, "step": 21936 }, { "epoch": 20.47, "learning_rate": 4.7441697761194034e-05, "loss": 0.0007, "step": 21940 }, { "epoch": 20.47, "learning_rate": 4.744123134328358e-05, "loss": 0.0129, "step": 21944 }, { "epoch": 20.47, "learning_rate": 4.744076492537314e-05, "loss": 0.005, "step": 21948 }, { "epoch": 20.48, "learning_rate": 4.7440298507462685e-05, "loss": 0.0038, "step": 21952 }, { "epoch": 20.48, "learning_rate": 4.743983208955224e-05, "loss": 0.0001, "step": 21956 }, { "epoch": 20.49, "learning_rate": 4.7439365671641795e-05, "loss": 0.0003, "step": 21960 }, { "epoch": 20.49, "learning_rate": 4.743889925373134e-05, "loss": 0.0015, "step": 21964 }, { "epoch": 20.49, "learning_rate": 4.74384328358209e-05, "loss": 0.0002, "step": 21968 }, { "epoch": 20.5, "learning_rate": 4.743796641791045e-05, "loss": 0.0009, "step": 21972 }, { "epoch": 20.5, "learning_rate": 4.74375e-05, "loss": 0.0006, "step": 21976 }, { "epoch": 20.5, "learning_rate": 4.7437033582089556e-05, "loss": 0.0001, "step": 21980 }, { "epoch": 20.51, "learning_rate": 4.743656716417911e-05, "loss": 0.0005, "step": 21984 }, { "epoch": 20.51, "learning_rate": 4.743610074626866e-05, "loss": 0.0021, "step": 21988 }, { "epoch": 20.51, "learning_rate": 4.7435634328358214e-05, "loss": 0.0008, "step": 21992 }, { "epoch": 20.52, "learning_rate": 4.743516791044776e-05, "loss": 0.0022, "step": 21996 }, { "epoch": 20.52, "learning_rate": 4.743470149253732e-05, "loss": 0.009, "step": 22000 }, { "epoch": 20.52, "eval_exact_match": 0.718568665377176, "eval_exec": 0.746615087040619, "eval_loss": 0.36730751395225525, "eval_runtime": 1232.9452, "eval_samples_per_second": 0.839, "step": 22000 }, { "epoch": 20.53, "learning_rate": 4.743423507462687e-05, "loss": 0.0004, "step": 22004 }, { "epoch": 20.53, "learning_rate": 4.743376865671642e-05, "loss": 0.0028, "step": 22008 }, { "epoch": 20.53, "learning_rate": 4.743330223880597e-05, "loss": 0.0001, "step": 22012 }, { "epoch": 20.54, "learning_rate": 4.743283582089553e-05, "loss": 0.0002, "step": 22016 }, { "epoch": 20.54, "learning_rate": 4.743236940298508e-05, "loss": 0.0049, "step": 22020 }, { "epoch": 20.54, "learning_rate": 4.7431902985074626e-05, "loss": 0.0001, "step": 22024 }, { "epoch": 20.55, "learning_rate": 4.743143656716418e-05, "loss": 0.0003, "step": 22028 }, { "epoch": 20.55, "learning_rate": 4.7430970149253736e-05, "loss": 0.0003, "step": 22032 }, { "epoch": 20.56, "learning_rate": 4.7430503731343284e-05, "loss": 0.0009, "step": 22036 }, { "epoch": 20.56, "learning_rate": 4.743003731343284e-05, "loss": 0.0002, "step": 22040 }, { "epoch": 20.56, "learning_rate": 4.7429570895522394e-05, "loss": 0.0001, "step": 22044 }, { "epoch": 20.57, "learning_rate": 4.742910447761194e-05, "loss": 0.0001, "step": 22048 }, { "epoch": 20.57, "learning_rate": 4.74286380597015e-05, "loss": 0.0337, "step": 22052 }, { "epoch": 20.57, "learning_rate": 4.7428171641791045e-05, "loss": 0.0002, "step": 22056 }, { "epoch": 20.58, "learning_rate": 4.74277052238806e-05, "loss": 0.0001, "step": 22060 }, { "epoch": 20.58, "learning_rate": 4.7427238805970155e-05, "loss": 0.0001, "step": 22064 }, { "epoch": 20.59, "learning_rate": 4.74267723880597e-05, "loss": 0.0031, "step": 22068 }, { "epoch": 20.59, "learning_rate": 4.742630597014925e-05, "loss": 0.0008, "step": 22072 }, { "epoch": 20.59, "learning_rate": 4.742583955223881e-05, "loss": 0.0003, "step": 22076 }, { "epoch": 20.6, "learning_rate": 4.742537313432836e-05, "loss": 0.0011, "step": 22080 }, { "epoch": 20.6, "learning_rate": 4.742490671641791e-05, "loss": 0.0006, "step": 22084 }, { "epoch": 20.6, "learning_rate": 4.7424440298507464e-05, "loss": 0.0001, "step": 22088 }, { "epoch": 20.61, "learning_rate": 4.742397388059702e-05, "loss": 0.0025, "step": 22092 }, { "epoch": 20.61, "learning_rate": 4.742350746268657e-05, "loss": 0.0034, "step": 22096 }, { "epoch": 20.62, "learning_rate": 4.742304104477612e-05, "loss": 0.0001, "step": 22100 }, { "epoch": 20.62, "learning_rate": 4.742257462686567e-05, "loss": 0.0001, "step": 22104 }, { "epoch": 20.62, "learning_rate": 4.7422108208955225e-05, "loss": 0.0015, "step": 22108 }, { "epoch": 20.63, "learning_rate": 4.742164179104478e-05, "loss": 0.0001, "step": 22112 }, { "epoch": 20.63, "learning_rate": 4.742117537313433e-05, "loss": 0.0002, "step": 22116 }, { "epoch": 20.63, "learning_rate": 4.742070895522388e-05, "loss": 0.0075, "step": 22120 }, { "epoch": 20.64, "learning_rate": 4.742024253731344e-05, "loss": 0.0001, "step": 22124 }, { "epoch": 20.64, "learning_rate": 4.7419776119402986e-05, "loss": 0.0042, "step": 22128 }, { "epoch": 20.65, "learning_rate": 4.7419309701492534e-05, "loss": 0.0004, "step": 22132 }, { "epoch": 20.65, "learning_rate": 4.7418843283582096e-05, "loss": 0.0016, "step": 22136 }, { "epoch": 20.65, "learning_rate": 4.7418376865671644e-05, "loss": 0.0008, "step": 22140 }, { "epoch": 20.66, "learning_rate": 4.74179104477612e-05, "loss": 0.0014, "step": 22144 }, { "epoch": 20.66, "learning_rate": 4.741744402985075e-05, "loss": 0.0001, "step": 22148 }, { "epoch": 20.66, "learning_rate": 4.74169776119403e-05, "loss": 0.0036, "step": 22152 }, { "epoch": 20.67, "learning_rate": 4.741651119402986e-05, "loss": 0.0004, "step": 22156 }, { "epoch": 20.67, "learning_rate": 4.7416044776119405e-05, "loss": 0.0022, "step": 22160 }, { "epoch": 20.68, "learning_rate": 4.741557835820895e-05, "loss": 0.0009, "step": 22164 }, { "epoch": 20.68, "learning_rate": 4.7415111940298515e-05, "loss": 0.0004, "step": 22168 }, { "epoch": 20.68, "learning_rate": 4.741464552238806e-05, "loss": 0.0057, "step": 22172 }, { "epoch": 20.69, "learning_rate": 4.741417910447761e-05, "loss": 0.0005, "step": 22176 }, { "epoch": 20.69, "learning_rate": 4.7413712686567166e-05, "loss": 0.0015, "step": 22180 }, { "epoch": 20.69, "learning_rate": 4.741324626865672e-05, "loss": 0.0002, "step": 22184 }, { "epoch": 20.7, "learning_rate": 4.741277985074627e-05, "loss": 0.0002, "step": 22188 }, { "epoch": 20.7, "learning_rate": 4.7412313432835824e-05, "loss": 0.0003, "step": 22192 }, { "epoch": 20.71, "learning_rate": 4.741184701492538e-05, "loss": 0.0003, "step": 22196 }, { "epoch": 20.71, "learning_rate": 4.741138059701493e-05, "loss": 0.0023, "step": 22200 }, { "epoch": 20.71, "learning_rate": 4.741091417910448e-05, "loss": 0.0016, "step": 22204 }, { "epoch": 20.72, "learning_rate": 4.741044776119403e-05, "loss": 0.0012, "step": 22208 }, { "epoch": 20.72, "learning_rate": 4.7409981343283585e-05, "loss": 0.0025, "step": 22212 }, { "epoch": 20.72, "learning_rate": 4.740951492537314e-05, "loss": 0.0003, "step": 22216 }, { "epoch": 20.73, "learning_rate": 4.740904850746269e-05, "loss": 0.0009, "step": 22220 }, { "epoch": 20.73, "learning_rate": 4.7408582089552236e-05, "loss": 0.0008, "step": 22224 }, { "epoch": 20.73, "learning_rate": 4.74081156716418e-05, "loss": 0.004, "step": 22228 }, { "epoch": 20.74, "learning_rate": 4.7407649253731346e-05, "loss": 0.0004, "step": 22232 }, { "epoch": 20.74, "learning_rate": 4.7407182835820894e-05, "loss": 0.0026, "step": 22236 }, { "epoch": 20.75, "learning_rate": 4.740671641791045e-05, "loss": 0.0016, "step": 22240 }, { "epoch": 20.75, "learning_rate": 4.7406250000000004e-05, "loss": 0.0011, "step": 22244 }, { "epoch": 20.75, "learning_rate": 4.740578358208955e-05, "loss": 0.0045, "step": 22248 }, { "epoch": 20.76, "learning_rate": 4.740531716417911e-05, "loss": 0.0007, "step": 22252 }, { "epoch": 20.76, "learning_rate": 4.740485074626866e-05, "loss": 0.0013, "step": 22256 }, { "epoch": 20.76, "learning_rate": 4.740438432835821e-05, "loss": 0.0009, "step": 22260 }, { "epoch": 20.77, "learning_rate": 4.7403917910447765e-05, "loss": 0.0041, "step": 22264 }, { "epoch": 20.77, "learning_rate": 4.740345149253731e-05, "loss": 0.0016, "step": 22268 }, { "epoch": 20.78, "learning_rate": 4.740298507462687e-05, "loss": 0.0008, "step": 22272 }, { "epoch": 20.78, "learning_rate": 4.740251865671642e-05, "loss": 0.0174, "step": 22276 }, { "epoch": 20.78, "learning_rate": 4.740205223880597e-05, "loss": 0.0067, "step": 22280 }, { "epoch": 20.79, "learning_rate": 4.740158582089552e-05, "loss": 0.0009, "step": 22284 }, { "epoch": 20.79, "learning_rate": 4.740111940298508e-05, "loss": 0.0, "step": 22288 }, { "epoch": 20.79, "learning_rate": 4.740065298507463e-05, "loss": 0.0009, "step": 22292 }, { "epoch": 20.8, "learning_rate": 4.740018656716418e-05, "loss": 0.0014, "step": 22296 }, { "epoch": 20.8, "learning_rate": 4.739972014925373e-05, "loss": 0.0009, "step": 22300 }, { "epoch": 20.81, "learning_rate": 4.739925373134329e-05, "loss": 0.0001, "step": 22304 }, { "epoch": 20.81, "learning_rate": 4.739878731343284e-05, "loss": 0.0046, "step": 22308 }, { "epoch": 20.81, "learning_rate": 4.739832089552239e-05, "loss": 0.0004, "step": 22312 }, { "epoch": 20.82, "learning_rate": 4.7397854477611945e-05, "loss": 0.0076, "step": 22316 }, { "epoch": 20.82, "learning_rate": 4.73973880597015e-05, "loss": 0.0117, "step": 22320 }, { "epoch": 20.82, "learning_rate": 4.739692164179105e-05, "loss": 0.0002, "step": 22324 }, { "epoch": 20.83, "learning_rate": 4.7396455223880596e-05, "loss": 0.0009, "step": 22328 }, { "epoch": 20.83, "learning_rate": 4.739598880597015e-05, "loss": 0.0028, "step": 22332 }, { "epoch": 20.84, "learning_rate": 4.7395522388059706e-05, "loss": 0.0009, "step": 22336 }, { "epoch": 20.84, "learning_rate": 4.7395055970149254e-05, "loss": 0.0007, "step": 22340 }, { "epoch": 20.84, "learning_rate": 4.739458955223881e-05, "loss": 0.001, "step": 22344 }, { "epoch": 20.85, "learning_rate": 4.7394123134328364e-05, "loss": 0.0058, "step": 22348 }, { "epoch": 20.85, "learning_rate": 4.739365671641791e-05, "loss": 0.0018, "step": 22352 }, { "epoch": 20.85, "learning_rate": 4.7393190298507467e-05, "loss": 0.0022, "step": 22356 }, { "epoch": 20.86, "learning_rate": 4.7392723880597015e-05, "loss": 0.0006, "step": 22360 }, { "epoch": 20.86, "learning_rate": 4.739225746268657e-05, "loss": 0.0006, "step": 22364 }, { "epoch": 20.87, "learning_rate": 4.7391791044776125e-05, "loss": 0.0003, "step": 22368 }, { "epoch": 20.87, "learning_rate": 4.739132462686567e-05, "loss": 0.0001, "step": 22372 }, { "epoch": 20.87, "learning_rate": 4.739085820895523e-05, "loss": 0.0002, "step": 22376 }, { "epoch": 20.88, "learning_rate": 4.739039179104478e-05, "loss": 0.0007, "step": 22380 }, { "epoch": 20.88, "learning_rate": 4.738992537313433e-05, "loss": 0.0015, "step": 22384 }, { "epoch": 20.88, "learning_rate": 4.738945895522388e-05, "loss": 0.0007, "step": 22388 }, { "epoch": 20.89, "learning_rate": 4.7388992537313434e-05, "loss": 0.0003, "step": 22392 }, { "epoch": 20.89, "learning_rate": 4.738852611940299e-05, "loss": 0.0024, "step": 22396 }, { "epoch": 20.9, "learning_rate": 4.738805970149254e-05, "loss": 0.0006, "step": 22400 }, { "epoch": 20.9, "learning_rate": 4.738759328358209e-05, "loss": 0.0005, "step": 22404 }, { "epoch": 20.9, "learning_rate": 4.7387126865671647e-05, "loss": 0.0026, "step": 22408 }, { "epoch": 20.91, "learning_rate": 4.7386660447761195e-05, "loss": 0.0015, "step": 22412 }, { "epoch": 20.91, "learning_rate": 4.738619402985075e-05, "loss": 0.0008, "step": 22416 }, { "epoch": 20.91, "learning_rate": 4.73857276119403e-05, "loss": 0.0009, "step": 22420 }, { "epoch": 20.92, "learning_rate": 4.738526119402985e-05, "loss": 0.0006, "step": 22424 }, { "epoch": 20.92, "learning_rate": 4.738479477611941e-05, "loss": 0.0038, "step": 22428 }, { "epoch": 20.93, "learning_rate": 4.7384328358208956e-05, "loss": 0.0001, "step": 22432 }, { "epoch": 20.93, "learning_rate": 4.738386194029851e-05, "loss": 0.0005, "step": 22436 }, { "epoch": 20.93, "learning_rate": 4.7383395522388065e-05, "loss": 0.0003, "step": 22440 }, { "epoch": 20.94, "learning_rate": 4.7382929104477614e-05, "loss": 0.0004, "step": 22444 }, { "epoch": 20.94, "learning_rate": 4.738246268656716e-05, "loss": 0.0036, "step": 22448 }, { "epoch": 20.94, "learning_rate": 4.738199626865672e-05, "loss": 0.0002, "step": 22452 }, { "epoch": 20.95, "learning_rate": 4.738152985074627e-05, "loss": 0.0025, "step": 22456 }, { "epoch": 20.95, "learning_rate": 4.738106343283582e-05, "loss": 0.0004, "step": 22460 }, { "epoch": 20.96, "learning_rate": 4.7380597014925375e-05, "loss": 0.0019, "step": 22464 }, { "epoch": 20.96, "learning_rate": 4.738013059701493e-05, "loss": 0.005, "step": 22468 }, { "epoch": 20.96, "learning_rate": 4.7379664179104484e-05, "loss": 0.0005, "step": 22472 }, { "epoch": 20.97, "learning_rate": 4.737919776119403e-05, "loss": 0.0002, "step": 22476 }, { "epoch": 20.97, "learning_rate": 4.737873134328358e-05, "loss": 0.0046, "step": 22480 }, { "epoch": 20.97, "learning_rate": 4.737826492537314e-05, "loss": 0.0005, "step": 22484 }, { "epoch": 20.98, "learning_rate": 4.737779850746269e-05, "loss": 0.0011, "step": 22488 }, { "epoch": 20.98, "learning_rate": 4.737733208955224e-05, "loss": 0.0024, "step": 22492 }, { "epoch": 20.98, "learning_rate": 4.7376865671641793e-05, "loss": 0.001, "step": 22496 }, { "epoch": 20.99, "learning_rate": 4.737639925373135e-05, "loss": 0.0001, "step": 22500 }, { "epoch": 20.99, "eval_exact_match": 0.7321083172147002, "eval_exec": 0.7669245647969052, "eval_loss": 0.3726440668106079, "eval_runtime": 1238.1908, "eval_samples_per_second": 0.835, "step": 22500 }, { "epoch": 20.99, "learning_rate": 4.7375932835820897e-05, "loss": 0.0001, "step": 22504 }, { "epoch": 21.0, "learning_rate": 4.737546641791045e-05, "loss": 0.0035, "step": 22508 }, { "epoch": 21.0, "learning_rate": 4.7375e-05, "loss": 0.0003, "step": 22512 }, { "epoch": 21.0, "learning_rate": 4.7374533582089554e-05, "loss": 0.0002, "step": 22516 }, { "epoch": 21.01, "learning_rate": 4.737406716417911e-05, "loss": 0.0005, "step": 22520 }, { "epoch": 21.01, "learning_rate": 4.737360074626866e-05, "loss": 0.006, "step": 22524 }, { "epoch": 21.01, "learning_rate": 4.737313432835821e-05, "loss": 0.0019, "step": 22528 }, { "epoch": 21.02, "learning_rate": 4.737266791044777e-05, "loss": 0.0008, "step": 22532 }, { "epoch": 21.02, "learning_rate": 4.7372201492537315e-05, "loss": 0.002, "step": 22536 }, { "epoch": 21.03, "learning_rate": 4.7371735074626864e-05, "loss": 0.0002, "step": 22540 }, { "epoch": 21.03, "learning_rate": 4.7371268656716425e-05, "loss": 0.0007, "step": 22544 }, { "epoch": 21.03, "learning_rate": 4.7370802238805973e-05, "loss": 0.0004, "step": 22548 }, { "epoch": 21.04, "learning_rate": 4.737033582089552e-05, "loss": 0.0002, "step": 22552 }, { "epoch": 21.04, "learning_rate": 4.7369869402985076e-05, "loss": 0.0005, "step": 22556 }, { "epoch": 21.04, "learning_rate": 4.736940298507463e-05, "loss": 0.0001, "step": 22560 }, { "epoch": 21.05, "learning_rate": 4.736893656716418e-05, "loss": 0.0003, "step": 22564 }, { "epoch": 21.05, "learning_rate": 4.7368470149253734e-05, "loss": 0.0008, "step": 22568 }, { "epoch": 21.06, "learning_rate": 4.736800373134328e-05, "loss": 0.0027, "step": 22572 }, { "epoch": 21.06, "learning_rate": 4.736753731343284e-05, "loss": 0.0021, "step": 22576 }, { "epoch": 21.06, "learning_rate": 4.736707089552239e-05, "loss": 0.0071, "step": 22580 }, { "epoch": 21.07, "learning_rate": 4.736660447761194e-05, "loss": 0.001, "step": 22584 }, { "epoch": 21.07, "learning_rate": 4.7366138059701495e-05, "loss": 0.0019, "step": 22588 }, { "epoch": 21.07, "learning_rate": 4.736567164179105e-05, "loss": 0.0006, "step": 22592 }, { "epoch": 21.08, "learning_rate": 4.73652052238806e-05, "loss": 0.0009, "step": 22596 }, { "epoch": 21.08, "learning_rate": 4.7364738805970147e-05, "loss": 0.0016, "step": 22600 }, { "epoch": 21.09, "learning_rate": 4.736427238805971e-05, "loss": 0.0001, "step": 22604 }, { "epoch": 21.09, "learning_rate": 4.7363805970149256e-05, "loss": 0.0004, "step": 22608 }, { "epoch": 21.09, "learning_rate": 4.7363339552238805e-05, "loss": 0.0074, "step": 22612 }, { "epoch": 21.1, "learning_rate": 4.736287313432836e-05, "loss": 0.0042, "step": 22616 }, { "epoch": 21.1, "learning_rate": 4.7362406716417914e-05, "loss": 0.0001, "step": 22620 }, { "epoch": 21.1, "learning_rate": 4.736194029850746e-05, "loss": 0.0036, "step": 22624 }, { "epoch": 21.11, "learning_rate": 4.736147388059702e-05, "loss": 0.0009, "step": 22628 }, { "epoch": 21.11, "learning_rate": 4.7361007462686565e-05, "loss": 0.0002, "step": 22632 }, { "epoch": 21.12, "learning_rate": 4.736054104477613e-05, "loss": 0.0, "step": 22636 }, { "epoch": 21.12, "learning_rate": 4.7360074626865675e-05, "loss": 0.0076, "step": 22640 }, { "epoch": 21.12, "learning_rate": 4.7359608208955223e-05, "loss": 0.0015, "step": 22644 }, { "epoch": 21.13, "learning_rate": 4.735914179104478e-05, "loss": 0.003, "step": 22648 }, { "epoch": 21.13, "learning_rate": 4.735867537313433e-05, "loss": 0.0014, "step": 22652 }, { "epoch": 21.13, "learning_rate": 4.735820895522388e-05, "loss": 0.0033, "step": 22656 }, { "epoch": 21.14, "learning_rate": 4.7357742537313436e-05, "loss": 0.0001, "step": 22660 }, { "epoch": 21.14, "learning_rate": 4.735727611940299e-05, "loss": 0.0011, "step": 22664 }, { "epoch": 21.15, "learning_rate": 4.735680970149254e-05, "loss": 0.0007, "step": 22668 }, { "epoch": 21.15, "learning_rate": 4.7356343283582094e-05, "loss": 0.0007, "step": 22672 }, { "epoch": 21.15, "learning_rate": 4.735587686567164e-05, "loss": 0.0041, "step": 22676 }, { "epoch": 21.16, "learning_rate": 4.73554104477612e-05, "loss": 0.0011, "step": 22680 }, { "epoch": 21.16, "learning_rate": 4.735494402985075e-05, "loss": 0.0036, "step": 22684 }, { "epoch": 21.16, "learning_rate": 4.73544776119403e-05, "loss": 0.0049, "step": 22688 }, { "epoch": 21.17, "learning_rate": 4.735401119402985e-05, "loss": 0.0011, "step": 22692 }, { "epoch": 21.17, "learning_rate": 4.735354477611941e-05, "loss": 0.0011, "step": 22696 }, { "epoch": 21.18, "learning_rate": 4.735307835820896e-05, "loss": 0.0031, "step": 22700 }, { "epoch": 21.18, "learning_rate": 4.7352611940298506e-05, "loss": 0.0002, "step": 22704 }, { "epoch": 21.18, "learning_rate": 4.735214552238806e-05, "loss": 0.0002, "step": 22708 }, { "epoch": 21.19, "learning_rate": 4.7351679104477616e-05, "loss": 0.0005, "step": 22712 }, { "epoch": 21.19, "learning_rate": 4.7351212686567164e-05, "loss": 0.0044, "step": 22716 }, { "epoch": 21.19, "learning_rate": 4.735074626865672e-05, "loss": 0.0009, "step": 22720 }, { "epoch": 21.2, "learning_rate": 4.7350279850746274e-05, "loss": 0.0002, "step": 22724 }, { "epoch": 21.2, "learning_rate": 4.734981343283582e-05, "loss": 0.0003, "step": 22728 }, { "epoch": 21.21, "learning_rate": 4.734934701492538e-05, "loss": 0.0001, "step": 22732 }, { "epoch": 21.21, "learning_rate": 4.7348880597014925e-05, "loss": 0.0013, "step": 22736 }, { "epoch": 21.21, "learning_rate": 4.734841417910448e-05, "loss": 0.0047, "step": 22740 }, { "epoch": 21.22, "learning_rate": 4.7347947761194035e-05, "loss": 0.0031, "step": 22744 }, { "epoch": 21.22, "learning_rate": 4.734748134328358e-05, "loss": 0.0044, "step": 22748 }, { "epoch": 21.22, "learning_rate": 4.734701492537313e-05, "loss": 0.0001, "step": 22752 }, { "epoch": 21.23, "learning_rate": 4.734654850746269e-05, "loss": 0.0006, "step": 22756 }, { "epoch": 21.23, "learning_rate": 4.734608208955224e-05, "loss": 0.0004, "step": 22760 }, { "epoch": 21.24, "learning_rate": 4.734561567164179e-05, "loss": 0.0019, "step": 22764 }, { "epoch": 21.24, "learning_rate": 4.7345149253731344e-05, "loss": 0.0003, "step": 22768 }, { "epoch": 21.24, "learning_rate": 4.73446828358209e-05, "loss": 0.0009, "step": 22772 }, { "epoch": 21.25, "learning_rate": 4.734421641791045e-05, "loss": 0.0006, "step": 22776 }, { "epoch": 21.25, "learning_rate": 4.734375e-05, "loss": 0.0015, "step": 22780 }, { "epoch": 21.25, "learning_rate": 4.734328358208955e-05, "loss": 0.0011, "step": 22784 }, { "epoch": 21.26, "learning_rate": 4.7342817164179105e-05, "loss": 0.0038, "step": 22788 }, { "epoch": 21.26, "learning_rate": 4.734235074626866e-05, "loss": 0.0001, "step": 22792 }, { "epoch": 21.26, "learning_rate": 4.734188432835821e-05, "loss": 0.0001, "step": 22796 }, { "epoch": 21.27, "learning_rate": 4.734141791044776e-05, "loss": 0.0037, "step": 22800 }, { "epoch": 21.27, "learning_rate": 4.734095149253732e-05, "loss": 0.0003, "step": 22804 }, { "epoch": 21.28, "learning_rate": 4.7340485074626866e-05, "loss": 0.0006, "step": 22808 }, { "epoch": 21.28, "learning_rate": 4.734001865671642e-05, "loss": 0.0003, "step": 22812 }, { "epoch": 21.28, "learning_rate": 4.7339552238805976e-05, "loss": 0.0018, "step": 22816 }, { "epoch": 21.29, "learning_rate": 4.7339085820895524e-05, "loss": 0.0003, "step": 22820 }, { "epoch": 21.29, "learning_rate": 4.733861940298508e-05, "loss": 0.0003, "step": 22824 }, { "epoch": 21.29, "learning_rate": 4.733815298507463e-05, "loss": 0.004, "step": 22828 }, { "epoch": 21.3, "learning_rate": 4.733768656716418e-05, "loss": 0.0005, "step": 22832 }, { "epoch": 21.3, "learning_rate": 4.733722014925374e-05, "loss": 0.0001, "step": 22836 }, { "epoch": 21.31, "learning_rate": 4.7336753731343285e-05, "loss": 0.0006, "step": 22840 }, { "epoch": 21.31, "learning_rate": 4.733628731343283e-05, "loss": 0.0018, "step": 22844 }, { "epoch": 21.31, "learning_rate": 4.7335820895522395e-05, "loss": 0.0, "step": 22848 }, { "epoch": 21.32, "learning_rate": 4.733535447761194e-05, "loss": 0.0008, "step": 22852 }, { "epoch": 21.32, "learning_rate": 4.733488805970149e-05, "loss": 0.0045, "step": 22856 }, { "epoch": 21.32, "learning_rate": 4.7334421641791046e-05, "loss": 0.0006, "step": 22860 }, { "epoch": 21.33, "learning_rate": 4.73339552238806e-05, "loss": 0.0014, "step": 22864 }, { "epoch": 21.33, "learning_rate": 4.733348880597015e-05, "loss": 0.0005, "step": 22868 }, { "epoch": 21.34, "learning_rate": 4.7333022388059704e-05, "loss": 0.0001, "step": 22872 }, { "epoch": 21.34, "learning_rate": 4.733255597014926e-05, "loss": 0.0032, "step": 22876 }, { "epoch": 21.34, "learning_rate": 4.733208955223881e-05, "loss": 0.0015, "step": 22880 }, { "epoch": 21.35, "learning_rate": 4.733162313432836e-05, "loss": 0.0006, "step": 22884 }, { "epoch": 21.35, "learning_rate": 4.733115671641791e-05, "loss": 0.0001, "step": 22888 }, { "epoch": 21.35, "learning_rate": 4.7330690298507465e-05, "loss": 0.0023, "step": 22892 }, { "epoch": 21.36, "learning_rate": 4.733022388059702e-05, "loss": 0.0013, "step": 22896 }, { "epoch": 21.36, "learning_rate": 4.732975746268657e-05, "loss": 0.0022, "step": 22900 }, { "epoch": 21.37, "learning_rate": 4.7329291044776116e-05, "loss": 0.0071, "step": 22904 }, { "epoch": 21.37, "learning_rate": 4.732882462686568e-05, "loss": 0.0038, "step": 22908 }, { "epoch": 21.37, "learning_rate": 4.7328358208955226e-05, "loss": 0.0001, "step": 22912 }, { "epoch": 21.38, "learning_rate": 4.7327891791044774e-05, "loss": 0.0005, "step": 22916 }, { "epoch": 21.38, "learning_rate": 4.732742537313433e-05, "loss": 0.0001, "step": 22920 }, { "epoch": 21.38, "learning_rate": 4.7326958955223884e-05, "loss": 0.0085, "step": 22924 }, { "epoch": 21.39, "learning_rate": 4.732649253731343e-05, "loss": 0.0053, "step": 22928 }, { "epoch": 21.39, "learning_rate": 4.732602611940299e-05, "loss": 0.0007, "step": 22932 }, { "epoch": 21.4, "learning_rate": 4.732555970149254e-05, "loss": 0.0013, "step": 22936 }, { "epoch": 21.4, "learning_rate": 4.732509328358209e-05, "loss": 0.0002, "step": 22940 }, { "epoch": 21.4, "learning_rate": 4.7324626865671645e-05, "loss": 0.0027, "step": 22944 }, { "epoch": 21.41, "learning_rate": 4.732416044776119e-05, "loss": 0.0005, "step": 22948 }, { "epoch": 21.41, "learning_rate": 4.732369402985075e-05, "loss": 0.0004, "step": 22952 }, { "epoch": 21.41, "learning_rate": 4.73232276119403e-05, "loss": 0.0006, "step": 22956 }, { "epoch": 21.42, "learning_rate": 4.732276119402985e-05, "loss": 0.0047, "step": 22960 }, { "epoch": 21.42, "learning_rate": 4.7322294776119406e-05, "loss": 0.0008, "step": 22964 }, { "epoch": 21.43, "learning_rate": 4.732182835820896e-05, "loss": 0.0019, "step": 22968 }, { "epoch": 21.43, "learning_rate": 4.732136194029851e-05, "loss": 0.0008, "step": 22972 }, { "epoch": 21.43, "learning_rate": 4.7320895522388064e-05, "loss": 0.0001, "step": 22976 }, { "epoch": 21.44, "learning_rate": 4.732042910447761e-05, "loss": 0.0002, "step": 22980 }, { "epoch": 21.44, "learning_rate": 4.731996268656717e-05, "loss": 0.0008, "step": 22984 }, { "epoch": 21.44, "learning_rate": 4.731949626865672e-05, "loss": 0.0007, "step": 22988 }, { "epoch": 21.45, "learning_rate": 4.731902985074627e-05, "loss": 0.0002, "step": 22992 }, { "epoch": 21.45, "learning_rate": 4.7318563432835825e-05, "loss": 0.0017, "step": 22996 }, { "epoch": 21.46, "learning_rate": 4.731809701492538e-05, "loss": 0.0074, "step": 23000 }, { "epoch": 21.46, "eval_exact_match": 0.7263056092843327, "eval_exec": 0.7630560928433269, "eval_loss": 0.3736368417739868, "eval_runtime": 1044.2285, "eval_samples_per_second": 0.99, "step": 23000 }, { "epoch": 21.46, "learning_rate": 4.731763059701493e-05, "loss": 0.0002, "step": 23004 }, { "epoch": 21.46, "learning_rate": 4.7317164179104476e-05, "loss": 0.0008, "step": 23008 }, { "epoch": 21.47, "learning_rate": 4.731669776119403e-05, "loss": 0.0008, "step": 23012 }, { "epoch": 21.47, "learning_rate": 4.7316231343283586e-05, "loss": 0.0004, "step": 23016 }, { "epoch": 21.47, "learning_rate": 4.7315764925373134e-05, "loss": 0.0002, "step": 23020 }, { "epoch": 21.48, "learning_rate": 4.731529850746269e-05, "loss": 0.0021, "step": 23024 }, { "epoch": 21.48, "learning_rate": 4.7314832089552244e-05, "loss": 0.0001, "step": 23028 }, { "epoch": 21.49, "learning_rate": 4.731436567164179e-05, "loss": 0.0001, "step": 23032 }, { "epoch": 21.49, "learning_rate": 4.731389925373135e-05, "loss": 0.0063, "step": 23036 }, { "epoch": 21.49, "learning_rate": 4.7313432835820895e-05, "loss": 0.0068, "step": 23040 }, { "epoch": 21.5, "learning_rate": 4.731296641791045e-05, "loss": 0.0021, "step": 23044 }, { "epoch": 21.5, "learning_rate": 4.7312500000000005e-05, "loss": 0.0038, "step": 23048 }, { "epoch": 21.5, "learning_rate": 4.731203358208955e-05, "loss": 0.0005, "step": 23052 }, { "epoch": 21.51, "learning_rate": 4.731156716417911e-05, "loss": 0.0001, "step": 23056 }, { "epoch": 21.51, "learning_rate": 4.731110074626866e-05, "loss": 0.0007, "step": 23060 }, { "epoch": 21.51, "learning_rate": 4.731063432835821e-05, "loss": 0.0004, "step": 23064 }, { "epoch": 21.52, "learning_rate": 4.731016791044776e-05, "loss": 0.0005, "step": 23068 }, { "epoch": 21.52, "learning_rate": 4.7309701492537314e-05, "loss": 0.0006, "step": 23072 }, { "epoch": 21.53, "learning_rate": 4.730923507462687e-05, "loss": 0.0016, "step": 23076 }, { "epoch": 21.53, "learning_rate": 4.730876865671642e-05, "loss": 0.0008, "step": 23080 }, { "epoch": 21.53, "learning_rate": 4.730830223880597e-05, "loss": 0.0009, "step": 23084 }, { "epoch": 21.54, "learning_rate": 4.730783582089553e-05, "loss": 0.0008, "step": 23088 }, { "epoch": 21.54, "learning_rate": 4.7307369402985075e-05, "loss": 0.0016, "step": 23092 }, { "epoch": 21.54, "learning_rate": 4.730690298507463e-05, "loss": 0.0001, "step": 23096 }, { "epoch": 21.55, "learning_rate": 4.730643656716418e-05, "loss": 0.0047, "step": 23100 }, { "epoch": 21.55, "learning_rate": 4.730597014925373e-05, "loss": 0.0004, "step": 23104 }, { "epoch": 21.56, "learning_rate": 4.730550373134329e-05, "loss": 0.0008, "step": 23108 }, { "epoch": 21.56, "learning_rate": 4.7305037313432836e-05, "loss": 0.0011, "step": 23112 }, { "epoch": 21.56, "learning_rate": 4.730457089552239e-05, "loss": 0.0015, "step": 23116 }, { "epoch": 21.57, "learning_rate": 4.7304104477611946e-05, "loss": 0.0034, "step": 23120 }, { "epoch": 21.57, "learning_rate": 4.7303638059701494e-05, "loss": 0.0004, "step": 23124 }, { "epoch": 21.57, "learning_rate": 4.730317164179105e-05, "loss": 0.0009, "step": 23128 }, { "epoch": 21.58, "learning_rate": 4.73027052238806e-05, "loss": 0.0052, "step": 23132 }, { "epoch": 21.58, "learning_rate": 4.730223880597015e-05, "loss": 0.0007, "step": 23136 }, { "epoch": 21.59, "learning_rate": 4.730177238805971e-05, "loss": 0.0061, "step": 23140 }, { "epoch": 21.59, "learning_rate": 4.7301305970149255e-05, "loss": 0.0036, "step": 23144 }, { "epoch": 21.59, "learning_rate": 4.730083955223881e-05, "loss": 0.0002, "step": 23148 }, { "epoch": 21.6, "learning_rate": 4.7300373134328365e-05, "loss": 0.0001, "step": 23152 }, { "epoch": 21.6, "learning_rate": 4.729990671641791e-05, "loss": 0.0001, "step": 23156 }, { "epoch": 21.6, "learning_rate": 4.729944029850746e-05, "loss": 0.0013, "step": 23160 }, { "epoch": 21.61, "learning_rate": 4.729897388059702e-05, "loss": 0.0001, "step": 23164 }, { "epoch": 21.61, "learning_rate": 4.729850746268657e-05, "loss": 0.003, "step": 23168 }, { "epoch": 21.62, "learning_rate": 4.729804104477612e-05, "loss": 0.0053, "step": 23172 }, { "epoch": 21.62, "learning_rate": 4.7297574626865674e-05, "loss": 0.0004, "step": 23176 }, { "epoch": 21.62, "learning_rate": 4.729710820895523e-05, "loss": 0.0028, "step": 23180 }, { "epoch": 21.63, "learning_rate": 4.729664179104478e-05, "loss": 0.0004, "step": 23184 }, { "epoch": 21.63, "learning_rate": 4.729617537313433e-05, "loss": 0.0004, "step": 23188 }, { "epoch": 21.63, "learning_rate": 4.729570895522388e-05, "loss": 0.0018, "step": 23192 }, { "epoch": 21.64, "learning_rate": 4.7295242537313435e-05, "loss": 0.0004, "step": 23196 }, { "epoch": 21.64, "learning_rate": 4.729477611940299e-05, "loss": 0.001, "step": 23200 }, { "epoch": 21.65, "learning_rate": 4.729430970149254e-05, "loss": 0.0012, "step": 23204 }, { "epoch": 21.65, "learning_rate": 4.729384328358209e-05, "loss": 0.001, "step": 23208 }, { "epoch": 21.65, "learning_rate": 4.729337686567165e-05, "loss": 0.005, "step": 23212 }, { "epoch": 21.66, "learning_rate": 4.7292910447761196e-05, "loss": 0.0017, "step": 23216 }, { "epoch": 21.66, "learning_rate": 4.7292444029850744e-05, "loss": 0.0002, "step": 23220 }, { "epoch": 21.66, "learning_rate": 4.7291977611940306e-05, "loss": 0.0004, "step": 23224 }, { "epoch": 21.67, "learning_rate": 4.7291511194029854e-05, "loss": 0.0004, "step": 23228 }, { "epoch": 21.67, "learning_rate": 4.72910447761194e-05, "loss": 0.001, "step": 23232 }, { "epoch": 21.68, "learning_rate": 4.729057835820896e-05, "loss": 0.0002, "step": 23236 }, { "epoch": 21.68, "learning_rate": 4.729011194029851e-05, "loss": 0.0063, "step": 23240 }, { "epoch": 21.68, "learning_rate": 4.728964552238806e-05, "loss": 0.0015, "step": 23244 }, { "epoch": 21.69, "learning_rate": 4.7289179104477615e-05, "loss": 0.0066, "step": 23248 }, { "epoch": 21.69, "learning_rate": 4.728871268656716e-05, "loss": 0.0004, "step": 23252 }, { "epoch": 21.69, "learning_rate": 4.728824626865672e-05, "loss": 0.0006, "step": 23256 }, { "epoch": 21.7, "learning_rate": 4.728777985074627e-05, "loss": 0.0007, "step": 23260 }, { "epoch": 21.7, "learning_rate": 4.728731343283582e-05, "loss": 0.0007, "step": 23264 }, { "epoch": 21.71, "learning_rate": 4.7286847014925376e-05, "loss": 0.0005, "step": 23268 }, { "epoch": 21.71, "learning_rate": 4.728638059701493e-05, "loss": 0.0004, "step": 23272 }, { "epoch": 21.71, "learning_rate": 4.728591417910448e-05, "loss": 0.0004, "step": 23276 }, { "epoch": 21.72, "learning_rate": 4.728544776119403e-05, "loss": 0.0002, "step": 23280 }, { "epoch": 21.72, "learning_rate": 4.728498134328359e-05, "loss": 0.0005, "step": 23284 }, { "epoch": 21.72, "learning_rate": 4.728451492537314e-05, "loss": 0.0003, "step": 23288 }, { "epoch": 21.73, "learning_rate": 4.728404850746269e-05, "loss": 0.0016, "step": 23292 }, { "epoch": 21.73, "learning_rate": 4.728358208955224e-05, "loss": 0.0005, "step": 23296 }, { "epoch": 21.73, "learning_rate": 4.7283115671641795e-05, "loss": 0.0026, "step": 23300 }, { "epoch": 21.74, "learning_rate": 4.728264925373135e-05, "loss": 0.0004, "step": 23304 }, { "epoch": 21.74, "learning_rate": 4.72821828358209e-05, "loss": 0.0041, "step": 23308 }, { "epoch": 21.75, "learning_rate": 4.7281716417910446e-05, "loss": 0.0022, "step": 23312 }, { "epoch": 21.75, "learning_rate": 4.728125000000001e-05, "loss": 0.0001, "step": 23316 }, { "epoch": 21.75, "learning_rate": 4.7280783582089556e-05, "loss": 0.0013, "step": 23320 }, { "epoch": 21.76, "learning_rate": 4.7280317164179104e-05, "loss": 0.0005, "step": 23324 }, { "epoch": 21.76, "learning_rate": 4.727985074626866e-05, "loss": 0.0039, "step": 23328 }, { "epoch": 21.76, "learning_rate": 4.7279384328358214e-05, "loss": 0.0002, "step": 23332 }, { "epoch": 21.77, "learning_rate": 4.727891791044776e-05, "loss": 0.0032, "step": 23336 }, { "epoch": 21.77, "learning_rate": 4.7278451492537317e-05, "loss": 0.003, "step": 23340 }, { "epoch": 21.78, "learning_rate": 4.727798507462687e-05, "loss": 0.0012, "step": 23344 }, { "epoch": 21.78, "learning_rate": 4.727751865671642e-05, "loss": 0.0001, "step": 23348 }, { "epoch": 21.78, "learning_rate": 4.7277052238805975e-05, "loss": 0.0053, "step": 23352 }, { "epoch": 21.79, "learning_rate": 4.727658582089552e-05, "loss": 0.0026, "step": 23356 }, { "epoch": 21.79, "learning_rate": 4.727611940298508e-05, "loss": 0.0003, "step": 23360 }, { "epoch": 21.79, "learning_rate": 4.727565298507463e-05, "loss": 0.0002, "step": 23364 }, { "epoch": 21.8, "learning_rate": 4.727518656716418e-05, "loss": 0.0061, "step": 23368 }, { "epoch": 21.8, "learning_rate": 4.727472014925373e-05, "loss": 0.0089, "step": 23372 }, { "epoch": 21.81, "learning_rate": 4.727425373134329e-05, "loss": 0.0052, "step": 23376 }, { "epoch": 21.81, "learning_rate": 4.727378731343284e-05, "loss": 0.0004, "step": 23380 }, { "epoch": 21.81, "learning_rate": 4.727332089552239e-05, "loss": 0.0003, "step": 23384 }, { "epoch": 21.82, "learning_rate": 4.727285447761194e-05, "loss": 0.0017, "step": 23388 }, { "epoch": 21.82, "learning_rate": 4.7272388059701496e-05, "loss": 0.0068, "step": 23392 }, { "epoch": 21.82, "learning_rate": 4.7271921641791045e-05, "loss": 0.0007, "step": 23396 }, { "epoch": 21.83, "learning_rate": 4.72714552238806e-05, "loss": 0.0001, "step": 23400 }, { "epoch": 21.83, "learning_rate": 4.7270988805970154e-05, "loss": 0.0002, "step": 23404 }, { "epoch": 21.84, "learning_rate": 4.72705223880597e-05, "loss": 0.0011, "step": 23408 }, { "epoch": 21.84, "learning_rate": 4.727005597014926e-05, "loss": 0.0083, "step": 23412 }, { "epoch": 21.84, "learning_rate": 4.7269589552238806e-05, "loss": 0.0012, "step": 23416 }, { "epoch": 21.85, "learning_rate": 4.726912313432836e-05, "loss": 0.0002, "step": 23420 }, { "epoch": 21.85, "learning_rate": 4.7268656716417915e-05, "loss": 0.0012, "step": 23424 }, { "epoch": 21.85, "learning_rate": 4.7268190298507464e-05, "loss": 0.0065, "step": 23428 }, { "epoch": 21.86, "learning_rate": 4.726772388059701e-05, "loss": 0.0005, "step": 23432 }, { "epoch": 21.86, "learning_rate": 4.726725746268657e-05, "loss": 0.0027, "step": 23436 }, { "epoch": 21.87, "learning_rate": 4.726679104477612e-05, "loss": 0.0001, "step": 23440 }, { "epoch": 21.87, "learning_rate": 4.726632462686567e-05, "loss": 0.0003, "step": 23444 }, { "epoch": 21.87, "learning_rate": 4.7265858208955225e-05, "loss": 0.0003, "step": 23448 }, { "epoch": 21.88, "learning_rate": 4.726539179104478e-05, "loss": 0.0001, "step": 23452 }, { "epoch": 21.88, "learning_rate": 4.7264925373134334e-05, "loss": 0.0002, "step": 23456 }, { "epoch": 21.88, "learning_rate": 4.726445895522388e-05, "loss": 0.0014, "step": 23460 }, { "epoch": 21.89, "learning_rate": 4.726399253731344e-05, "loss": 0.0018, "step": 23464 }, { "epoch": 21.89, "learning_rate": 4.726352611940299e-05, "loss": 0.0012, "step": 23468 }, { "epoch": 21.9, "learning_rate": 4.726305970149254e-05, "loss": 0.0036, "step": 23472 }, { "epoch": 21.9, "learning_rate": 4.726259328358209e-05, "loss": 0.0006, "step": 23476 }, { "epoch": 21.9, "learning_rate": 4.7262126865671643e-05, "loss": 0.0003, "step": 23480 }, { "epoch": 21.91, "learning_rate": 4.72616604477612e-05, "loss": 0.0003, "step": 23484 }, { "epoch": 21.91, "learning_rate": 4.7261194029850747e-05, "loss": 0.0009, "step": 23488 }, { "epoch": 21.91, "learning_rate": 4.72607276119403e-05, "loss": 0.0003, "step": 23492 }, { "epoch": 21.92, "learning_rate": 4.7260261194029856e-05, "loss": 0.0002, "step": 23496 }, { "epoch": 21.92, "learning_rate": 4.7259794776119404e-05, "loss": 0.0011, "step": 23500 }, { "epoch": 21.92, "eval_exact_match": 0.7272727272727273, "eval_exec": 0.7630560928433269, "eval_loss": 0.36809346079826355, "eval_runtime": 1056.1475, "eval_samples_per_second": 0.979, "step": 23500 }, { "epoch": 21.93, "learning_rate": 4.725932835820896e-05, "loss": 0.0008, "step": 23504 }, { "epoch": 21.93, "learning_rate": 4.725886194029851e-05, "loss": 0.0004, "step": 23508 }, { "epoch": 21.93, "learning_rate": 4.725839552238806e-05, "loss": 0.0012, "step": 23512 }, { "epoch": 21.94, "learning_rate": 4.725792910447762e-05, "loss": 0.0006, "step": 23516 }, { "epoch": 21.94, "learning_rate": 4.7257462686567165e-05, "loss": 0.0019, "step": 23520 }, { "epoch": 21.94, "learning_rate": 4.7256996268656714e-05, "loss": 0.0013, "step": 23524 }, { "epoch": 21.95, "learning_rate": 4.7256529850746275e-05, "loss": 0.0033, "step": 23528 }, { "epoch": 21.95, "learning_rate": 4.7256063432835823e-05, "loss": 0.0003, "step": 23532 }, { "epoch": 21.96, "learning_rate": 4.725559701492537e-05, "loss": 0.0004, "step": 23536 }, { "epoch": 21.96, "learning_rate": 4.7255130597014926e-05, "loss": 0.001, "step": 23540 }, { "epoch": 21.96, "learning_rate": 4.725466417910448e-05, "loss": 0.0015, "step": 23544 }, { "epoch": 21.97, "learning_rate": 4.725419776119403e-05, "loss": 0.0011, "step": 23548 }, { "epoch": 21.97, "learning_rate": 4.7253731343283584e-05, "loss": 0.0004, "step": 23552 }, { "epoch": 21.97, "learning_rate": 4.725326492537314e-05, "loss": 0.0005, "step": 23556 }, { "epoch": 21.98, "learning_rate": 4.725279850746269e-05, "loss": 0.0012, "step": 23560 }, { "epoch": 21.98, "learning_rate": 4.725233208955224e-05, "loss": 0.003, "step": 23564 }, { "epoch": 21.98, "learning_rate": 4.725186567164179e-05, "loss": 0.0087, "step": 23568 }, { "epoch": 21.99, "learning_rate": 4.7251399253731345e-05, "loss": 0.0005, "step": 23572 }, { "epoch": 21.99, "learning_rate": 4.72509328358209e-05, "loss": 0.0026, "step": 23576 }, { "epoch": 22.0, "learning_rate": 4.725046641791045e-05, "loss": 0.0004, "step": 23580 }, { "epoch": 22.0, "learning_rate": 4.7249999999999997e-05, "loss": 0.0038, "step": 23584 }, { "epoch": 22.0, "learning_rate": 4.724953358208956e-05, "loss": 0.0006, "step": 23588 }, { "epoch": 22.01, "learning_rate": 4.7249067164179106e-05, "loss": 0.0012, "step": 23592 }, { "epoch": 22.01, "learning_rate": 4.7248600746268654e-05, "loss": 0.0009, "step": 23596 }, { "epoch": 22.01, "learning_rate": 4.724813432835821e-05, "loss": 0.0002, "step": 23600 }, { "epoch": 22.02, "learning_rate": 4.7247667910447764e-05, "loss": 0.0009, "step": 23604 }, { "epoch": 22.02, "learning_rate": 4.724720149253731e-05, "loss": 0.0002, "step": 23608 }, { "epoch": 22.03, "learning_rate": 4.724673507462687e-05, "loss": 0.0005, "step": 23612 }, { "epoch": 22.03, "learning_rate": 4.724626865671642e-05, "loss": 0.0022, "step": 23616 }, { "epoch": 22.03, "learning_rate": 4.724580223880598e-05, "loss": 0.0006, "step": 23620 }, { "epoch": 22.04, "learning_rate": 4.7245335820895525e-05, "loss": 0.0001, "step": 23624 }, { "epoch": 22.04, "learning_rate": 4.7244869402985073e-05, "loss": 0.0013, "step": 23628 }, { "epoch": 22.04, "learning_rate": 4.724440298507463e-05, "loss": 0.0006, "step": 23632 }, { "epoch": 22.05, "learning_rate": 4.724393656716418e-05, "loss": 0.0001, "step": 23636 }, { "epoch": 22.05, "learning_rate": 4.724347014925373e-05, "loss": 0.0021, "step": 23640 }, { "epoch": 22.06, "learning_rate": 4.7243003731343286e-05, "loss": 0.001, "step": 23644 }, { "epoch": 22.06, "learning_rate": 4.724253731343284e-05, "loss": 0.002, "step": 23648 }, { "epoch": 22.06, "learning_rate": 4.724207089552239e-05, "loss": 0.0002, "step": 23652 }, { "epoch": 22.07, "learning_rate": 4.7241604477611944e-05, "loss": 0.0001, "step": 23656 }, { "epoch": 22.07, "learning_rate": 4.724113805970149e-05, "loss": 0.0002, "step": 23660 }, { "epoch": 22.07, "learning_rate": 4.724067164179105e-05, "loss": 0.0004, "step": 23664 }, { "epoch": 22.08, "learning_rate": 4.72402052238806e-05, "loss": 0.0013, "step": 23668 }, { "epoch": 22.08, "learning_rate": 4.723973880597015e-05, "loss": 0.0005, "step": 23672 }, { "epoch": 22.09, "learning_rate": 4.7239272388059705e-05, "loss": 0.0027, "step": 23676 }, { "epoch": 22.09, "learning_rate": 4.723880597014926e-05, "loss": 0.0012, "step": 23680 }, { "epoch": 22.09, "learning_rate": 4.723833955223881e-05, "loss": 0.0015, "step": 23684 }, { "epoch": 22.1, "learning_rate": 4.7237873134328356e-05, "loss": 0.003, "step": 23688 }, { "epoch": 22.1, "learning_rate": 4.723740671641791e-05, "loss": 0.0004, "step": 23692 }, { "epoch": 22.1, "learning_rate": 4.7236940298507466e-05, "loss": 0.0002, "step": 23696 }, { "epoch": 22.11, "learning_rate": 4.7236473880597014e-05, "loss": 0.0003, "step": 23700 }, { "epoch": 22.11, "learning_rate": 4.723600746268657e-05, "loss": 0.0019, "step": 23704 }, { "epoch": 22.12, "learning_rate": 4.7235541044776124e-05, "loss": 0.0005, "step": 23708 }, { "epoch": 22.12, "learning_rate": 4.723507462686567e-05, "loss": 0.0004, "step": 23712 }, { "epoch": 22.12, "learning_rate": 4.723460820895523e-05, "loss": 0.0002, "step": 23716 }, { "epoch": 22.13, "learning_rate": 4.7234141791044775e-05, "loss": 0.0002, "step": 23720 }, { "epoch": 22.13, "learning_rate": 4.723367537313433e-05, "loss": 0.0002, "step": 23724 }, { "epoch": 22.13, "learning_rate": 4.7233208955223885e-05, "loss": 0.0082, "step": 23728 }, { "epoch": 22.14, "learning_rate": 4.723274253731343e-05, "loss": 0.0003, "step": 23732 }, { "epoch": 22.14, "learning_rate": 4.723227611940299e-05, "loss": 0.0001, "step": 23736 }, { "epoch": 22.15, "learning_rate": 4.723180970149254e-05, "loss": 0.0039, "step": 23740 }, { "epoch": 22.15, "learning_rate": 4.723134328358209e-05, "loss": 0.0001, "step": 23744 }, { "epoch": 22.15, "learning_rate": 4.723087686567164e-05, "loss": 0.0098, "step": 23748 }, { "epoch": 22.16, "learning_rate": 4.7230410447761194e-05, "loss": 0.0001, "step": 23752 }, { "epoch": 22.16, "learning_rate": 4.722994402985075e-05, "loss": 0.0003, "step": 23756 }, { "epoch": 22.16, "learning_rate": 4.72294776119403e-05, "loss": 0.001, "step": 23760 }, { "epoch": 22.17, "learning_rate": 4.722901119402985e-05, "loss": 0.0, "step": 23764 }, { "epoch": 22.17, "learning_rate": 4.722854477611941e-05, "loss": 0.0005, "step": 23768 }, { "epoch": 22.18, "learning_rate": 4.7228078358208955e-05, "loss": 0.0013, "step": 23772 }, { "epoch": 22.18, "learning_rate": 4.722761194029851e-05, "loss": 0.0003, "step": 23776 }, { "epoch": 22.18, "learning_rate": 4.722714552238806e-05, "loss": 0.0004, "step": 23780 }, { "epoch": 22.19, "learning_rate": 4.722667910447762e-05, "loss": 0.0005, "step": 23784 }, { "epoch": 22.19, "learning_rate": 4.722621268656717e-05, "loss": 0.0001, "step": 23788 }, { "epoch": 22.19, "learning_rate": 4.7225746268656716e-05, "loss": 0.0005, "step": 23792 }, { "epoch": 22.2, "learning_rate": 4.722527985074627e-05, "loss": 0.0002, "step": 23796 }, { "epoch": 22.2, "learning_rate": 4.7224813432835826e-05, "loss": 0.0021, "step": 23800 }, { "epoch": 22.21, "learning_rate": 4.7224347014925374e-05, "loss": 0.0011, "step": 23804 }, { "epoch": 22.21, "learning_rate": 4.722388059701493e-05, "loss": 0.001, "step": 23808 }, { "epoch": 22.21, "learning_rate": 4.722341417910448e-05, "loss": 0.0004, "step": 23812 }, { "epoch": 22.22, "learning_rate": 4.722294776119403e-05, "loss": 0.0082, "step": 23816 }, { "epoch": 22.22, "learning_rate": 4.722248134328359e-05, "loss": 0.0006, "step": 23820 }, { "epoch": 22.22, "learning_rate": 4.7222014925373135e-05, "loss": 0.0021, "step": 23824 }, { "epoch": 22.23, "learning_rate": 4.722154850746269e-05, "loss": 0.0002, "step": 23828 }, { "epoch": 22.23, "learning_rate": 4.7221082089552245e-05, "loss": 0.0003, "step": 23832 }, { "epoch": 22.24, "learning_rate": 4.722061567164179e-05, "loss": 0.0034, "step": 23836 }, { "epoch": 22.24, "learning_rate": 4.722014925373134e-05, "loss": 0.0007, "step": 23840 }, { "epoch": 22.24, "learning_rate": 4.72196828358209e-05, "loss": 0.0041, "step": 23844 }, { "epoch": 22.25, "learning_rate": 4.721921641791045e-05, "loss": 0.0001, "step": 23848 }, { "epoch": 22.25, "learning_rate": 4.721875e-05, "loss": 0.001, "step": 23852 }, { "epoch": 22.25, "learning_rate": 4.7218283582089554e-05, "loss": 0.0001, "step": 23856 }, { "epoch": 22.26, "learning_rate": 4.721781716417911e-05, "loss": 0.001, "step": 23860 }, { "epoch": 22.26, "learning_rate": 4.721735074626866e-05, "loss": 0.0025, "step": 23864 }, { "epoch": 22.26, "learning_rate": 4.721688432835821e-05, "loss": 0.0001, "step": 23868 }, { "epoch": 22.27, "learning_rate": 4.721641791044776e-05, "loss": 0.0023, "step": 23872 }, { "epoch": 22.27, "learning_rate": 4.7215951492537315e-05, "loss": 0.0054, "step": 23876 }, { "epoch": 22.28, "learning_rate": 4.721548507462687e-05, "loss": 0.0035, "step": 23880 }, { "epoch": 22.28, "learning_rate": 4.721501865671642e-05, "loss": 0.0015, "step": 23884 }, { "epoch": 22.28, "learning_rate": 4.721455223880597e-05, "loss": 0.0002, "step": 23888 }, { "epoch": 22.29, "learning_rate": 4.721408582089553e-05, "loss": 0.0007, "step": 23892 }, { "epoch": 22.29, "learning_rate": 4.7213619402985076e-05, "loss": 0.0007, "step": 23896 }, { "epoch": 22.29, "learning_rate": 4.7213152985074624e-05, "loss": 0.0122, "step": 23900 }, { "epoch": 22.3, "learning_rate": 4.7212686567164186e-05, "loss": 0.0012, "step": 23904 }, { "epoch": 22.3, "learning_rate": 4.7212220149253734e-05, "loss": 0.0023, "step": 23908 }, { "epoch": 22.31, "learning_rate": 4.721175373134328e-05, "loss": 0.0001, "step": 23912 }, { "epoch": 22.31, "learning_rate": 4.721128731343284e-05, "loss": 0.0003, "step": 23916 }, { "epoch": 22.31, "learning_rate": 4.721082089552239e-05, "loss": 0.0008, "step": 23920 }, { "epoch": 22.32, "learning_rate": 4.721035447761194e-05, "loss": 0.0004, "step": 23924 }, { "epoch": 22.32, "learning_rate": 4.7209888059701495e-05, "loss": 0.0005, "step": 23928 }, { "epoch": 22.32, "learning_rate": 4.720942164179104e-05, "loss": 0.0058, "step": 23932 }, { "epoch": 22.33, "learning_rate": 4.72089552238806e-05, "loss": 0.0006, "step": 23936 }, { "epoch": 22.33, "learning_rate": 4.720848880597015e-05, "loss": 0.0009, "step": 23940 }, { "epoch": 22.34, "learning_rate": 4.72080223880597e-05, "loss": 0.0004, "step": 23944 }, { "epoch": 22.34, "learning_rate": 4.7207555970149256e-05, "loss": 0.0005, "step": 23948 }, { "epoch": 22.34, "learning_rate": 4.720708955223881e-05, "loss": 0.0012, "step": 23952 }, { "epoch": 22.35, "learning_rate": 4.720662313432836e-05, "loss": 0.0026, "step": 23956 }, { "epoch": 22.35, "learning_rate": 4.7206156716417914e-05, "loss": 0.0011, "step": 23960 }, { "epoch": 22.35, "learning_rate": 4.720569029850747e-05, "loss": 0.0041, "step": 23964 }, { "epoch": 22.36, "learning_rate": 4.720522388059702e-05, "loss": 0.0008, "step": 23968 }, { "epoch": 22.36, "learning_rate": 4.720475746268657e-05, "loss": 0.0008, "step": 23972 }, { "epoch": 22.37, "learning_rate": 4.720429104477612e-05, "loss": 0.0001, "step": 23976 }, { "epoch": 22.37, "learning_rate": 4.7203824626865675e-05, "loss": 0.007, "step": 23980 }, { "epoch": 22.37, "learning_rate": 4.720335820895523e-05, "loss": 0.0003, "step": 23984 }, { "epoch": 22.38, "learning_rate": 4.720289179104478e-05, "loss": 0.0006, "step": 23988 }, { "epoch": 22.38, "learning_rate": 4.7202425373134326e-05, "loss": 0.0001, "step": 23992 }, { "epoch": 22.38, "learning_rate": 4.720195895522389e-05, "loss": 0.0005, "step": 23996 }, { "epoch": 22.39, "learning_rate": 4.7201492537313436e-05, "loss": 0.0002, "step": 24000 }, { "epoch": 22.39, "eval_exact_match": 0.7214700193423598, "eval_exec": 0.7601547388781431, "eval_loss": 0.3911060094833374, "eval_runtime": 1226.8058, "eval_samples_per_second": 0.843, "step": 24000 }, { "epoch": 22.39, "learning_rate": 4.7201026119402984e-05, "loss": 0.0042, "step": 24004 }, { "epoch": 22.4, "learning_rate": 4.720055970149254e-05, "loss": 0.0012, "step": 24008 }, { "epoch": 22.4, "learning_rate": 4.7200093283582094e-05, "loss": 0.0001, "step": 24012 }, { "epoch": 22.4, "learning_rate": 4.719962686567164e-05, "loss": 0.0004, "step": 24016 }, { "epoch": 22.41, "learning_rate": 4.71991604477612e-05, "loss": 0.0045, "step": 24020 }, { "epoch": 22.41, "learning_rate": 4.719869402985075e-05, "loss": 0.0001, "step": 24024 }, { "epoch": 22.41, "learning_rate": 4.71982276119403e-05, "loss": 0.0062, "step": 24028 }, { "epoch": 22.42, "learning_rate": 4.7197761194029855e-05, "loss": 0.0037, "step": 24032 }, { "epoch": 22.42, "learning_rate": 4.71972947761194e-05, "loss": 0.0016, "step": 24036 }, { "epoch": 22.43, "learning_rate": 4.719682835820896e-05, "loss": 0.0004, "step": 24040 }, { "epoch": 22.43, "learning_rate": 4.719636194029851e-05, "loss": 0.001, "step": 24044 }, { "epoch": 22.43, "learning_rate": 4.719589552238806e-05, "loss": 0.0024, "step": 24048 }, { "epoch": 22.44, "learning_rate": 4.719542910447761e-05, "loss": 0.0011, "step": 24052 }, { "epoch": 22.44, "learning_rate": 4.719496268656717e-05, "loss": 0.0002, "step": 24056 }, { "epoch": 22.44, "learning_rate": 4.719449626865672e-05, "loss": 0.0004, "step": 24060 }, { "epoch": 22.45, "learning_rate": 4.719402985074627e-05, "loss": 0.0029, "step": 24064 }, { "epoch": 22.45, "learning_rate": 4.719356343283582e-05, "loss": 0.0002, "step": 24068 }, { "epoch": 22.46, "learning_rate": 4.719309701492538e-05, "loss": 0.0043, "step": 24072 }, { "epoch": 22.46, "learning_rate": 4.7192630597014925e-05, "loss": 0.0003, "step": 24076 }, { "epoch": 22.46, "learning_rate": 4.719216417910448e-05, "loss": 0.0002, "step": 24080 }, { "epoch": 22.47, "learning_rate": 4.7191697761194035e-05, "loss": 0.0014, "step": 24084 }, { "epoch": 22.47, "learning_rate": 4.719123134328358e-05, "loss": 0.0008, "step": 24088 }, { "epoch": 22.47, "learning_rate": 4.719076492537314e-05, "loss": 0.0002, "step": 24092 }, { "epoch": 22.48, "learning_rate": 4.7190298507462686e-05, "loss": 0.002, "step": 24096 }, { "epoch": 22.48, "learning_rate": 4.718983208955224e-05, "loss": 0.0002, "step": 24100 }, { "epoch": 22.49, "learning_rate": 4.7189365671641796e-05, "loss": 0.0012, "step": 24104 }, { "epoch": 22.49, "learning_rate": 4.7188899253731344e-05, "loss": 0.002, "step": 24108 }, { "epoch": 22.49, "learning_rate": 4.71884328358209e-05, "loss": 0.0002, "step": 24112 }, { "epoch": 22.5, "learning_rate": 4.7187966417910454e-05, "loss": 0.0003, "step": 24116 }, { "epoch": 22.5, "learning_rate": 4.71875e-05, "loss": 0.0018, "step": 24120 }, { "epoch": 22.5, "learning_rate": 4.718703358208956e-05, "loss": 0.0011, "step": 24124 }, { "epoch": 22.51, "learning_rate": 4.7186567164179105e-05, "loss": 0.0014, "step": 24128 }, { "epoch": 22.51, "learning_rate": 4.718610074626866e-05, "loss": 0.0001, "step": 24132 }, { "epoch": 22.51, "learning_rate": 4.7185634328358215e-05, "loss": 0.0001, "step": 24136 }, { "epoch": 22.52, "learning_rate": 4.718516791044776e-05, "loss": 0.0007, "step": 24140 }, { "epoch": 22.52, "learning_rate": 4.718470149253732e-05, "loss": 0.002, "step": 24144 }, { "epoch": 22.53, "learning_rate": 4.718423507462687e-05, "loss": 0.0022, "step": 24148 }, { "epoch": 22.53, "learning_rate": 4.718376865671642e-05, "loss": 0.0014, "step": 24152 }, { "epoch": 22.53, "learning_rate": 4.718330223880597e-05, "loss": 0.0003, "step": 24156 }, { "epoch": 22.54, "learning_rate": 4.7182835820895524e-05, "loss": 0.0009, "step": 24160 }, { "epoch": 22.54, "learning_rate": 4.718236940298508e-05, "loss": 0.0016, "step": 24164 }, { "epoch": 22.54, "learning_rate": 4.718190298507463e-05, "loss": 0.0, "step": 24168 }, { "epoch": 22.55, "learning_rate": 4.718143656716418e-05, "loss": 0.0005, "step": 24172 }, { "epoch": 22.55, "learning_rate": 4.718097014925374e-05, "loss": 0.0004, "step": 24176 }, { "epoch": 22.56, "learning_rate": 4.7180503731343285e-05, "loss": 0.0014, "step": 24180 }, { "epoch": 22.56, "learning_rate": 4.718003731343284e-05, "loss": 0.0001, "step": 24184 }, { "epoch": 22.56, "learning_rate": 4.717957089552239e-05, "loss": 0.001, "step": 24188 }, { "epoch": 22.57, "learning_rate": 4.717910447761194e-05, "loss": 0.0002, "step": 24192 }, { "epoch": 22.57, "learning_rate": 4.71786380597015e-05, "loss": 0.0005, "step": 24196 }, { "epoch": 22.57, "learning_rate": 4.7178171641791046e-05, "loss": 0.0005, "step": 24200 }, { "epoch": 22.58, "learning_rate": 4.7177705223880594e-05, "loss": 0.0023, "step": 24204 }, { "epoch": 22.58, "learning_rate": 4.7177238805970156e-05, "loss": 0.0051, "step": 24208 }, { "epoch": 22.59, "learning_rate": 4.7176772388059704e-05, "loss": 0.0009, "step": 24212 }, { "epoch": 22.59, "learning_rate": 4.717630597014925e-05, "loss": 0.0091, "step": 24216 }, { "epoch": 22.59, "learning_rate": 4.717583955223881e-05, "loss": 0.0002, "step": 24220 }, { "epoch": 22.6, "learning_rate": 4.717537313432836e-05, "loss": 0.0014, "step": 24224 }, { "epoch": 22.6, "learning_rate": 4.717490671641791e-05, "loss": 0.0001, "step": 24228 }, { "epoch": 22.6, "learning_rate": 4.7174440298507465e-05, "loss": 0.0034, "step": 24232 }, { "epoch": 22.61, "learning_rate": 4.717397388059702e-05, "loss": 0.0047, "step": 24236 }, { "epoch": 22.61, "learning_rate": 4.717350746268657e-05, "loss": 0.0017, "step": 24240 }, { "epoch": 22.62, "learning_rate": 4.717304104477612e-05, "loss": 0.0013, "step": 24244 }, { "epoch": 22.62, "learning_rate": 4.717257462686567e-05, "loss": 0.0008, "step": 24248 }, { "epoch": 22.62, "learning_rate": 4.7172108208955226e-05, "loss": 0.0029, "step": 24252 }, { "epoch": 22.63, "learning_rate": 4.717164179104478e-05, "loss": 0.0023, "step": 24256 }, { "epoch": 22.63, "learning_rate": 4.717117537313433e-05, "loss": 0.0011, "step": 24260 }, { "epoch": 22.63, "learning_rate": 4.717070895522388e-05, "loss": 0.0121, "step": 24264 }, { "epoch": 22.64, "learning_rate": 4.717024253731344e-05, "loss": 0.0003, "step": 24268 }, { "epoch": 22.64, "learning_rate": 4.716977611940299e-05, "loss": 0.0011, "step": 24272 }, { "epoch": 22.65, "learning_rate": 4.716930970149254e-05, "loss": 0.0001, "step": 24276 }, { "epoch": 22.65, "learning_rate": 4.716884328358209e-05, "loss": 0.0002, "step": 24280 }, { "epoch": 22.65, "learning_rate": 4.7168376865671645e-05, "loss": 0.0005, "step": 24284 }, { "epoch": 22.66, "learning_rate": 4.71679104477612e-05, "loss": 0.0006, "step": 24288 }, { "epoch": 22.66, "learning_rate": 4.716744402985075e-05, "loss": 0.0003, "step": 24292 }, { "epoch": 22.66, "learning_rate": 4.71669776119403e-05, "loss": 0.0103, "step": 24296 }, { "epoch": 22.67, "learning_rate": 4.716651119402986e-05, "loss": 0.0001, "step": 24300 }, { "epoch": 22.67, "learning_rate": 4.7166044776119406e-05, "loss": 0.0017, "step": 24304 }, { "epoch": 22.68, "learning_rate": 4.7165578358208954e-05, "loss": 0.0011, "step": 24308 }, { "epoch": 22.68, "learning_rate": 4.716511194029851e-05, "loss": 0.0001, "step": 24312 }, { "epoch": 22.68, "learning_rate": 4.7164645522388064e-05, "loss": 0.0017, "step": 24316 }, { "epoch": 22.69, "learning_rate": 4.716417910447761e-05, "loss": 0.0002, "step": 24320 }, { "epoch": 22.69, "learning_rate": 4.7163712686567167e-05, "loss": 0.0004, "step": 24324 }, { "epoch": 22.69, "learning_rate": 4.716324626865672e-05, "loss": 0.0014, "step": 24328 }, { "epoch": 22.7, "learning_rate": 4.716277985074627e-05, "loss": 0.0087, "step": 24332 }, { "epoch": 22.7, "learning_rate": 4.7162313432835825e-05, "loss": 0.0003, "step": 24336 }, { "epoch": 22.71, "learning_rate": 4.716184701492537e-05, "loss": 0.0002, "step": 24340 }, { "epoch": 22.71, "learning_rate": 4.716138059701493e-05, "loss": 0.0077, "step": 24344 }, { "epoch": 22.71, "learning_rate": 4.716091417910448e-05, "loss": 0.0058, "step": 24348 }, { "epoch": 22.72, "learning_rate": 4.716044776119403e-05, "loss": 0.0052, "step": 24352 }, { "epoch": 22.72, "learning_rate": 4.7159981343283585e-05, "loss": 0.0028, "step": 24356 }, { "epoch": 22.72, "learning_rate": 4.715951492537314e-05, "loss": 0.0002, "step": 24360 }, { "epoch": 22.73, "learning_rate": 4.715904850746269e-05, "loss": 0.0023, "step": 24364 }, { "epoch": 22.73, "learning_rate": 4.715858208955224e-05, "loss": 0.0002, "step": 24368 }, { "epoch": 22.73, "learning_rate": 4.715811567164179e-05, "loss": 0.0004, "step": 24372 }, { "epoch": 22.74, "learning_rate": 4.7157649253731346e-05, "loss": 0.0006, "step": 24376 }, { "epoch": 22.74, "learning_rate": 4.7157182835820895e-05, "loss": 0.0004, "step": 24380 }, { "epoch": 22.75, "learning_rate": 4.715671641791045e-05, "loss": 0.0004, "step": 24384 }, { "epoch": 22.75, "learning_rate": 4.7156250000000004e-05, "loss": 0.0012, "step": 24388 }, { "epoch": 22.75, "learning_rate": 4.715578358208955e-05, "loss": 0.0012, "step": 24392 }, { "epoch": 22.76, "learning_rate": 4.715531716417911e-05, "loss": 0.0009, "step": 24396 }, { "epoch": 22.76, "learning_rate": 4.7154850746268656e-05, "loss": 0.0009, "step": 24400 }, { "epoch": 22.76, "learning_rate": 4.715438432835821e-05, "loss": 0.0004, "step": 24404 }, { "epoch": 22.77, "learning_rate": 4.7153917910447765e-05, "loss": 0.0004, "step": 24408 }, { "epoch": 22.77, "learning_rate": 4.7153451492537314e-05, "loss": 0.001, "step": 24412 }, { "epoch": 22.78, "learning_rate": 4.715298507462687e-05, "loss": 0.0005, "step": 24416 }, { "epoch": 22.78, "learning_rate": 4.715251865671642e-05, "loss": 0.0003, "step": 24420 }, { "epoch": 22.78, "learning_rate": 4.715205223880597e-05, "loss": 0.0019, "step": 24424 }, { "epoch": 22.79, "learning_rate": 4.715158582089552e-05, "loss": 0.0008, "step": 24428 }, { "epoch": 22.79, "learning_rate": 4.7151119402985075e-05, "loss": 0.0007, "step": 24432 }, { "epoch": 22.79, "learning_rate": 4.715065298507463e-05, "loss": 0.0027, "step": 24436 }, { "epoch": 22.8, "learning_rate": 4.7150186567164184e-05, "loss": 0.0009, "step": 24440 }, { "epoch": 22.8, "learning_rate": 4.714972014925373e-05, "loss": 0.0034, "step": 24444 }, { "epoch": 22.81, "learning_rate": 4.714925373134329e-05, "loss": 0.0026, "step": 24448 }, { "epoch": 22.81, "learning_rate": 4.714878731343284e-05, "loss": 0.001, "step": 24452 }, { "epoch": 22.81, "learning_rate": 4.714832089552239e-05, "loss": 0.0003, "step": 24456 }, { "epoch": 22.82, "learning_rate": 4.714785447761194e-05, "loss": 0.0001, "step": 24460 }, { "epoch": 22.82, "learning_rate": 4.71473880597015e-05, "loss": 0.0009, "step": 24464 }, { "epoch": 22.82, "learning_rate": 4.714692164179105e-05, "loss": 0.0019, "step": 24468 }, { "epoch": 22.83, "learning_rate": 4.7146455223880596e-05, "loss": 0.0003, "step": 24472 }, { "epoch": 22.83, "learning_rate": 4.714598880597015e-05, "loss": 0.0001, "step": 24476 }, { "epoch": 22.84, "learning_rate": 4.7145522388059706e-05, "loss": 0.0003, "step": 24480 }, { "epoch": 22.84, "learning_rate": 4.7145055970149254e-05, "loss": 0.0013, "step": 24484 }, { "epoch": 22.84, "learning_rate": 4.714458955223881e-05, "loss": 0.0095, "step": 24488 }, { "epoch": 22.85, "learning_rate": 4.714412313432836e-05, "loss": 0.0002, "step": 24492 }, { "epoch": 22.85, "learning_rate": 4.714365671641791e-05, "loss": 0.0009, "step": 24496 }, { "epoch": 22.85, "learning_rate": 4.714319029850747e-05, "loss": 0.0002, "step": 24500 }, { "epoch": 22.85, "eval_exact_match": 0.7292069632495164, "eval_exec": 0.7572533849129593, "eval_loss": 0.35489827394485474, "eval_runtime": 1237.9317, "eval_samples_per_second": 0.835, "step": 24500 }, { "epoch": 22.86, "learning_rate": 4.7142723880597015e-05, "loss": 0.001, "step": 24504 }, { "epoch": 22.86, "learning_rate": 4.714225746268657e-05, "loss": 0.0014, "step": 24508 }, { "epoch": 22.87, "learning_rate": 4.7141791044776125e-05, "loss": 0.0068, "step": 24512 }, { "epoch": 22.87, "learning_rate": 4.714132462686567e-05, "loss": 0.0008, "step": 24516 }, { "epoch": 22.87, "learning_rate": 4.714085820895522e-05, "loss": 0.0002, "step": 24520 }, { "epoch": 22.88, "learning_rate": 4.714039179104478e-05, "loss": 0.0031, "step": 24524 }, { "epoch": 22.88, "learning_rate": 4.713992537313433e-05, "loss": 0.0003, "step": 24528 }, { "epoch": 22.88, "learning_rate": 4.713945895522388e-05, "loss": 0.0001, "step": 24532 }, { "epoch": 22.89, "learning_rate": 4.7138992537313434e-05, "loss": 0.0041, "step": 24536 }, { "epoch": 22.89, "learning_rate": 4.713852611940299e-05, "loss": 0.0003, "step": 24540 }, { "epoch": 22.9, "learning_rate": 4.713805970149254e-05, "loss": 0.0001, "step": 24544 }, { "epoch": 22.9, "learning_rate": 4.713759328358209e-05, "loss": 0.0058, "step": 24548 }, { "epoch": 22.9, "learning_rate": 4.713712686567164e-05, "loss": 0.0006, "step": 24552 }, { "epoch": 22.91, "learning_rate": 4.7136660447761195e-05, "loss": 0.0001, "step": 24556 }, { "epoch": 22.91, "learning_rate": 4.713619402985075e-05, "loss": 0.0011, "step": 24560 }, { "epoch": 22.91, "learning_rate": 4.71357276119403e-05, "loss": 0.0019, "step": 24564 }, { "epoch": 22.92, "learning_rate": 4.713526119402985e-05, "loss": 0.0006, "step": 24568 }, { "epoch": 22.92, "learning_rate": 4.713479477611941e-05, "loss": 0.0015, "step": 24572 }, { "epoch": 22.93, "learning_rate": 4.7134328358208956e-05, "loss": 0.0004, "step": 24576 }, { "epoch": 22.93, "learning_rate": 4.7133861940298504e-05, "loss": 0.0049, "step": 24580 }, { "epoch": 22.93, "learning_rate": 4.7133395522388066e-05, "loss": 0.0001, "step": 24584 }, { "epoch": 22.94, "learning_rate": 4.7132929104477614e-05, "loss": 0.0026, "step": 24588 }, { "epoch": 22.94, "learning_rate": 4.713246268656716e-05, "loss": 0.0007, "step": 24592 }, { "epoch": 22.94, "learning_rate": 4.713199626865672e-05, "loss": 0.0002, "step": 24596 }, { "epoch": 22.95, "learning_rate": 4.713152985074627e-05, "loss": 0.0001, "step": 24600 }, { "epoch": 22.95, "learning_rate": 4.713106343283583e-05, "loss": 0.0002, "step": 24604 }, { "epoch": 22.96, "learning_rate": 4.7130597014925375e-05, "loss": 0.0204, "step": 24608 }, { "epoch": 22.96, "learning_rate": 4.7130130597014923e-05, "loss": 0.0018, "step": 24612 }, { "epoch": 22.96, "learning_rate": 4.7129664179104485e-05, "loss": 0.0037, "step": 24616 }, { "epoch": 22.97, "learning_rate": 4.712919776119403e-05, "loss": 0.0002, "step": 24620 }, { "epoch": 22.97, "learning_rate": 4.712873134328358e-05, "loss": 0.0006, "step": 24624 }, { "epoch": 22.97, "learning_rate": 4.7128264925373136e-05, "loss": 0.0034, "step": 24628 }, { "epoch": 22.98, "learning_rate": 4.712779850746269e-05, "loss": 0.0018, "step": 24632 }, { "epoch": 22.98, "learning_rate": 4.712733208955224e-05, "loss": 0.0031, "step": 24636 }, { "epoch": 22.98, "learning_rate": 4.7126865671641794e-05, "loss": 0.0002, "step": 24640 }, { "epoch": 22.99, "learning_rate": 4.712639925373135e-05, "loss": 0.002, "step": 24644 }, { "epoch": 22.99, "learning_rate": 4.71259328358209e-05, "loss": 0.0027, "step": 24648 }, { "epoch": 23.0, "learning_rate": 4.712546641791045e-05, "loss": 0.0018, "step": 24652 }, { "epoch": 23.0, "learning_rate": 4.7125e-05, "loss": 0.0015, "step": 24656 }, { "epoch": 23.0, "learning_rate": 4.7124533582089555e-05, "loss": 0.0004, "step": 24660 }, { "epoch": 23.01, "learning_rate": 4.712406716417911e-05, "loss": 0.0035, "step": 24664 }, { "epoch": 23.01, "learning_rate": 4.712360074626866e-05, "loss": 0.0002, "step": 24668 }, { "epoch": 23.01, "learning_rate": 4.7123134328358206e-05, "loss": 0.0024, "step": 24672 }, { "epoch": 23.02, "learning_rate": 4.712266791044777e-05, "loss": 0.0001, "step": 24676 }, { "epoch": 23.02, "learning_rate": 4.7122201492537316e-05, "loss": 0.0006, "step": 24680 }, { "epoch": 23.03, "learning_rate": 4.7121735074626864e-05, "loss": 0.0048, "step": 24684 }, { "epoch": 23.03, "learning_rate": 4.712126865671642e-05, "loss": 0.0005, "step": 24688 }, { "epoch": 23.03, "learning_rate": 4.7120802238805974e-05, "loss": 0.0001, "step": 24692 }, { "epoch": 23.04, "learning_rate": 4.712033582089552e-05, "loss": 0.0008, "step": 24696 }, { "epoch": 23.04, "learning_rate": 4.711986940298508e-05, "loss": 0.0006, "step": 24700 }, { "epoch": 23.04, "learning_rate": 4.711940298507463e-05, "loss": 0.0002, "step": 24704 }, { "epoch": 23.05, "learning_rate": 4.711893656716418e-05, "loss": 0.0008, "step": 24708 }, { "epoch": 23.05, "learning_rate": 4.7118470149253735e-05, "loss": 0.0002, "step": 24712 }, { "epoch": 23.06, "learning_rate": 4.711800373134328e-05, "loss": 0.0002, "step": 24716 }, { "epoch": 23.06, "learning_rate": 4.711753731343284e-05, "loss": 0.0002, "step": 24720 }, { "epoch": 23.06, "learning_rate": 4.711707089552239e-05, "loss": 0.0005, "step": 24724 }, { "epoch": 23.07, "learning_rate": 4.711660447761194e-05, "loss": 0.0009, "step": 24728 }, { "epoch": 23.07, "learning_rate": 4.711613805970149e-05, "loss": 0.0001, "step": 24732 }, { "epoch": 23.07, "learning_rate": 4.711567164179105e-05, "loss": 0.0002, "step": 24736 }, { "epoch": 23.08, "learning_rate": 4.71152052238806e-05, "loss": 0.0012, "step": 24740 }, { "epoch": 23.08, "learning_rate": 4.711473880597015e-05, "loss": 0.0002, "step": 24744 }, { "epoch": 23.09, "learning_rate": 4.71142723880597e-05, "loss": 0.0017, "step": 24748 }, { "epoch": 23.09, "learning_rate": 4.711380597014926e-05, "loss": 0.0016, "step": 24752 }, { "epoch": 23.09, "learning_rate": 4.7113339552238805e-05, "loss": 0.0005, "step": 24756 }, { "epoch": 23.1, "learning_rate": 4.711287313432836e-05, "loss": 0.0002, "step": 24760 }, { "epoch": 23.1, "learning_rate": 4.7112406716417915e-05, "loss": 0.0002, "step": 24764 }, { "epoch": 23.1, "learning_rate": 4.711194029850747e-05, "loss": 0.0092, "step": 24768 }, { "epoch": 23.11, "learning_rate": 4.711147388059702e-05, "loss": 0.0022, "step": 24772 }, { "epoch": 23.11, "learning_rate": 4.7111007462686566e-05, "loss": 0.0004, "step": 24776 }, { "epoch": 23.12, "learning_rate": 4.711054104477612e-05, "loss": 0.0006, "step": 24780 }, { "epoch": 23.12, "learning_rate": 4.7110074626865676e-05, "loss": 0.0004, "step": 24784 }, { "epoch": 23.12, "learning_rate": 4.7109608208955224e-05, "loss": 0.001, "step": 24788 }, { "epoch": 23.13, "learning_rate": 4.710914179104478e-05, "loss": 0.0002, "step": 24792 }, { "epoch": 23.13, "learning_rate": 4.7108675373134334e-05, "loss": 0.0017, "step": 24796 }, { "epoch": 23.13, "learning_rate": 4.710820895522388e-05, "loss": 0.0001, "step": 24800 }, { "epoch": 23.14, "learning_rate": 4.710774253731344e-05, "loss": 0.0017, "step": 24804 }, { "epoch": 23.14, "learning_rate": 4.7107276119402985e-05, "loss": 0.0002, "step": 24808 }, { "epoch": 23.15, "learning_rate": 4.710680970149254e-05, "loss": 0.0002, "step": 24812 }, { "epoch": 23.15, "learning_rate": 4.7106343283582095e-05, "loss": 0.0, "step": 24816 }, { "epoch": 23.15, "learning_rate": 4.710587686567164e-05, "loss": 0.0, "step": 24820 }, { "epoch": 23.16, "learning_rate": 4.71054104477612e-05, "loss": 0.0001, "step": 24824 }, { "epoch": 23.16, "learning_rate": 4.710494402985075e-05, "loss": 0.0001, "step": 24828 }, { "epoch": 23.16, "learning_rate": 4.71044776119403e-05, "loss": 0.0011, "step": 24832 }, { "epoch": 23.17, "learning_rate": 4.710401119402985e-05, "loss": 0.0004, "step": 24836 }, { "epoch": 23.17, "learning_rate": 4.7103544776119404e-05, "loss": 0.0014, "step": 24840 }, { "epoch": 23.18, "learning_rate": 4.710307835820896e-05, "loss": 0.0002, "step": 24844 }, { "epoch": 23.18, "learning_rate": 4.710261194029851e-05, "loss": 0.001, "step": 24848 }, { "epoch": 23.18, "learning_rate": 4.710214552238806e-05, "loss": 0.0003, "step": 24852 }, { "epoch": 23.19, "learning_rate": 4.710167910447762e-05, "loss": 0.0003, "step": 24856 }, { "epoch": 23.19, "learning_rate": 4.7101212686567165e-05, "loss": 0.0049, "step": 24860 }, { "epoch": 23.19, "learning_rate": 4.710074626865672e-05, "loss": 0.0021, "step": 24864 }, { "epoch": 23.2, "learning_rate": 4.710027985074627e-05, "loss": 0.0002, "step": 24868 }, { "epoch": 23.2, "learning_rate": 4.709981343283582e-05, "loss": 0.0063, "step": 24872 }, { "epoch": 23.21, "learning_rate": 4.709934701492538e-05, "loss": 0.0003, "step": 24876 }, { "epoch": 23.21, "learning_rate": 4.7098880597014926e-05, "loss": 0.0009, "step": 24880 }, { "epoch": 23.21, "learning_rate": 4.7098414179104474e-05, "loss": 0.0044, "step": 24884 }, { "epoch": 23.22, "learning_rate": 4.7097947761194036e-05, "loss": 0.0003, "step": 24888 }, { "epoch": 23.22, "learning_rate": 4.7097481343283584e-05, "loss": 0.0002, "step": 24892 }, { "epoch": 23.22, "learning_rate": 4.709701492537313e-05, "loss": 0.0008, "step": 24896 }, { "epoch": 23.23, "learning_rate": 4.709654850746269e-05, "loss": 0.0021, "step": 24900 }, { "epoch": 23.23, "learning_rate": 4.709608208955224e-05, "loss": 0.0001, "step": 24904 }, { "epoch": 23.24, "learning_rate": 4.709561567164179e-05, "loss": 0.0002, "step": 24908 }, { "epoch": 23.24, "learning_rate": 4.7095149253731345e-05, "loss": 0.0008, "step": 24912 }, { "epoch": 23.24, "learning_rate": 4.70946828358209e-05, "loss": 0.0011, "step": 24916 }, { "epoch": 23.25, "learning_rate": 4.709421641791045e-05, "loss": 0.0003, "step": 24920 }, { "epoch": 23.25, "learning_rate": 4.709375e-05, "loss": 0.0007, "step": 24924 }, { "epoch": 23.25, "learning_rate": 4.709328358208955e-05, "loss": 0.0031, "step": 24928 }, { "epoch": 23.26, "learning_rate": 4.709281716417911e-05, "loss": 0.008, "step": 24932 }, { "epoch": 23.26, "learning_rate": 4.709235074626866e-05, "loss": 0.0002, "step": 24936 }, { "epoch": 23.26, "learning_rate": 4.709188432835821e-05, "loss": 0.0032, "step": 24940 }, { "epoch": 23.27, "learning_rate": 4.7091417910447764e-05, "loss": 0.0001, "step": 24944 }, { "epoch": 23.27, "learning_rate": 4.709095149253732e-05, "loss": 0.0024, "step": 24948 }, { "epoch": 23.28, "learning_rate": 4.709048507462687e-05, "loss": 0.0003, "step": 24952 }, { "epoch": 23.28, "learning_rate": 4.709001865671642e-05, "loss": 0.0001, "step": 24956 }, { "epoch": 23.28, "learning_rate": 4.708955223880597e-05, "loss": 0.0003, "step": 24960 }, { "epoch": 23.29, "learning_rate": 4.7089085820895525e-05, "loss": 0.0003, "step": 24964 }, { "epoch": 23.29, "learning_rate": 4.708861940298508e-05, "loss": 0.0076, "step": 24968 }, { "epoch": 23.29, "learning_rate": 4.708815298507463e-05, "loss": 0.0044, "step": 24972 }, { "epoch": 23.3, "learning_rate": 4.708768656716418e-05, "loss": 0.0026, "step": 24976 }, { "epoch": 23.3, "learning_rate": 4.708722014925374e-05, "loss": 0.0001, "step": 24980 }, { "epoch": 23.31, "learning_rate": 4.7086753731343286e-05, "loss": 0.0015, "step": 24984 }, { "epoch": 23.31, "learning_rate": 4.7086287313432834e-05, "loss": 0.0001, "step": 24988 }, { "epoch": 23.31, "learning_rate": 4.708582089552239e-05, "loss": 0.0101, "step": 24992 }, { "epoch": 23.32, "learning_rate": 4.7085354477611944e-05, "loss": 0.0002, "step": 24996 }, { "epoch": 23.32, "learning_rate": 4.708488805970149e-05, "loss": 0.0001, "step": 25000 }, { "epoch": 23.32, "eval_exact_match": 0.7263056092843327, "eval_exec": 0.7620889748549323, "eval_loss": 0.3771933615207672, "eval_runtime": 1213.1376, "eval_samples_per_second": 0.852, "step": 25000 }, { "epoch": 23.32, "learning_rate": 4.708442164179105e-05, "loss": 0.0006, "step": 25004 }, { "epoch": 23.33, "learning_rate": 4.70839552238806e-05, "loss": 0.0035, "step": 25008 }, { "epoch": 23.33, "learning_rate": 4.708348880597015e-05, "loss": 0.0, "step": 25012 }, { "epoch": 23.34, "learning_rate": 4.7083022388059705e-05, "loss": 0.0009, "step": 25016 }, { "epoch": 23.34, "learning_rate": 4.708255597014925e-05, "loss": 0.0004, "step": 25020 }, { "epoch": 23.34, "learning_rate": 4.708208955223881e-05, "loss": 0.0027, "step": 25024 }, { "epoch": 23.35, "learning_rate": 4.708162313432836e-05, "loss": 0.0013, "step": 25028 }, { "epoch": 23.35, "learning_rate": 4.708115671641791e-05, "loss": 0.0016, "step": 25032 }, { "epoch": 23.35, "learning_rate": 4.7080690298507466e-05, "loss": 0.0002, "step": 25036 }, { "epoch": 23.36, "learning_rate": 4.708022388059702e-05, "loss": 0.0001, "step": 25040 }, { "epoch": 23.36, "learning_rate": 4.707975746268657e-05, "loss": 0.0003, "step": 25044 }, { "epoch": 23.37, "learning_rate": 4.707929104477612e-05, "loss": 0.0005, "step": 25048 }, { "epoch": 23.37, "learning_rate": 4.707882462686567e-05, "loss": 0.0006, "step": 25052 }, { "epoch": 23.37, "learning_rate": 4.707835820895523e-05, "loss": 0.008, "step": 25056 }, { "epoch": 23.38, "learning_rate": 4.7077891791044775e-05, "loss": 0.0058, "step": 25060 }, { "epoch": 23.38, "learning_rate": 4.707742537313433e-05, "loss": 0.0025, "step": 25064 }, { "epoch": 23.38, "learning_rate": 4.7076958955223885e-05, "loss": 0.0001, "step": 25068 }, { "epoch": 23.39, "learning_rate": 4.707649253731343e-05, "loss": 0.0002, "step": 25072 }, { "epoch": 23.39, "learning_rate": 4.707602611940299e-05, "loss": 0.0004, "step": 25076 }, { "epoch": 23.4, "learning_rate": 4.7075559701492536e-05, "loss": 0.0101, "step": 25080 }, { "epoch": 23.4, "learning_rate": 4.707509328358209e-05, "loss": 0.0012, "step": 25084 }, { "epoch": 23.4, "learning_rate": 4.7074626865671646e-05, "loss": 0.0007, "step": 25088 }, { "epoch": 23.41, "learning_rate": 4.7074160447761194e-05, "loss": 0.0014, "step": 25092 }, { "epoch": 23.41, "learning_rate": 4.707369402985075e-05, "loss": 0.0009, "step": 25096 }, { "epoch": 23.41, "learning_rate": 4.7073227611940304e-05, "loss": 0.0004, "step": 25100 }, { "epoch": 23.42, "learning_rate": 4.707276119402985e-05, "loss": 0.0006, "step": 25104 }, { "epoch": 23.42, "learning_rate": 4.707229477611941e-05, "loss": 0.0011, "step": 25108 }, { "epoch": 23.43, "learning_rate": 4.7071828358208955e-05, "loss": 0.0, "step": 25112 }, { "epoch": 23.43, "learning_rate": 4.707136194029851e-05, "loss": 0.0004, "step": 25116 }, { "epoch": 23.43, "learning_rate": 4.7070895522388065e-05, "loss": 0.0003, "step": 25120 }, { "epoch": 23.44, "learning_rate": 4.707042910447761e-05, "loss": 0.0005, "step": 25124 }, { "epoch": 23.44, "learning_rate": 4.706996268656717e-05, "loss": 0.0012, "step": 25128 }, { "epoch": 23.44, "learning_rate": 4.706949626865672e-05, "loss": 0.0, "step": 25132 }, { "epoch": 23.45, "learning_rate": 4.706902985074627e-05, "loss": 0.0012, "step": 25136 }, { "epoch": 23.45, "learning_rate": 4.706856343283582e-05, "loss": 0.0002, "step": 25140 }, { "epoch": 23.46, "learning_rate": 4.706809701492538e-05, "loss": 0.0078, "step": 25144 }, { "epoch": 23.46, "learning_rate": 4.706763059701493e-05, "loss": 0.0002, "step": 25148 }, { "epoch": 23.46, "learning_rate": 4.706716417910448e-05, "loss": 0.0062, "step": 25152 }, { "epoch": 23.47, "learning_rate": 4.706669776119403e-05, "loss": 0.0002, "step": 25156 }, { "epoch": 23.47, "learning_rate": 4.7066231343283587e-05, "loss": 0.0002, "step": 25160 }, { "epoch": 23.47, "learning_rate": 4.7065764925373135e-05, "loss": 0.003, "step": 25164 }, { "epoch": 23.48, "learning_rate": 4.706529850746269e-05, "loss": 0.0006, "step": 25168 }, { "epoch": 23.48, "learning_rate": 4.706483208955224e-05, "loss": 0.0003, "step": 25172 }, { "epoch": 23.49, "learning_rate": 4.706436567164179e-05, "loss": 0.0007, "step": 25176 }, { "epoch": 23.49, "learning_rate": 4.706389925373135e-05, "loss": 0.0002, "step": 25180 }, { "epoch": 23.49, "learning_rate": 4.7063432835820896e-05, "loss": 0.0005, "step": 25184 }, { "epoch": 23.5, "learning_rate": 4.706296641791045e-05, "loss": 0.0004, "step": 25188 }, { "epoch": 23.5, "learning_rate": 4.7062500000000006e-05, "loss": 0.0025, "step": 25192 }, { "epoch": 23.5, "learning_rate": 4.7062033582089554e-05, "loss": 0.0001, "step": 25196 }, { "epoch": 23.51, "learning_rate": 4.70615671641791e-05, "loss": 0.0006, "step": 25200 }, { "epoch": 23.51, "learning_rate": 4.7061100746268663e-05, "loss": 0.0012, "step": 25204 }, { "epoch": 23.51, "learning_rate": 4.706063432835821e-05, "loss": 0.0002, "step": 25208 }, { "epoch": 23.52, "learning_rate": 4.706016791044776e-05, "loss": 0.0003, "step": 25212 }, { "epoch": 23.52, "learning_rate": 4.7059701492537315e-05, "loss": 0.0005, "step": 25216 }, { "epoch": 23.53, "learning_rate": 4.705923507462687e-05, "loss": 0.0005, "step": 25220 }, { "epoch": 23.53, "learning_rate": 4.705876865671642e-05, "loss": 0.0001, "step": 25224 }, { "epoch": 23.53, "learning_rate": 4.705830223880597e-05, "loss": 0.0005, "step": 25228 }, { "epoch": 23.54, "learning_rate": 4.705783582089552e-05, "loss": 0.0128, "step": 25232 }, { "epoch": 23.54, "learning_rate": 4.7057369402985076e-05, "loss": 0.0123, "step": 25236 }, { "epoch": 23.54, "learning_rate": 4.705690298507463e-05, "loss": 0.0012, "step": 25240 }, { "epoch": 23.55, "learning_rate": 4.705643656716418e-05, "loss": 0.0005, "step": 25244 }, { "epoch": 23.55, "learning_rate": 4.7055970149253734e-05, "loss": 0.0001, "step": 25248 }, { "epoch": 23.56, "learning_rate": 4.705550373134329e-05, "loss": 0.0008, "step": 25252 }, { "epoch": 23.56, "learning_rate": 4.705503731343284e-05, "loss": 0.0002, "step": 25256 }, { "epoch": 23.56, "learning_rate": 4.705457089552239e-05, "loss": 0.0005, "step": 25260 }, { "epoch": 23.57, "learning_rate": 4.7054104477611946e-05, "loss": 0.0, "step": 25264 }, { "epoch": 23.57, "learning_rate": 4.7053638059701495e-05, "loss": 0.0, "step": 25268 }, { "epoch": 23.57, "learning_rate": 4.705317164179105e-05, "loss": 0.0008, "step": 25272 }, { "epoch": 23.58, "learning_rate": 4.70527052238806e-05, "loss": 0.0023, "step": 25276 }, { "epoch": 23.58, "learning_rate": 4.705223880597015e-05, "loss": 0.0004, "step": 25280 }, { "epoch": 23.59, "learning_rate": 4.705177238805971e-05, "loss": 0.0054, "step": 25284 }, { "epoch": 23.59, "learning_rate": 4.7051305970149256e-05, "loss": 0.0, "step": 25288 }, { "epoch": 23.59, "learning_rate": 4.7050839552238804e-05, "loss": 0.0004, "step": 25292 }, { "epoch": 23.6, "learning_rate": 4.7050373134328365e-05, "loss": 0.0001, "step": 25296 }, { "epoch": 23.6, "learning_rate": 4.7049906716417914e-05, "loss": 0.0001, "step": 25300 }, { "epoch": 23.6, "learning_rate": 4.704944029850746e-05, "loss": 0.001, "step": 25304 }, { "epoch": 23.61, "learning_rate": 4.7048973880597017e-05, "loss": 0.0003, "step": 25308 }, { "epoch": 23.61, "learning_rate": 4.704850746268657e-05, "loss": 0.0029, "step": 25312 }, { "epoch": 23.62, "learning_rate": 4.704804104477612e-05, "loss": 0.0015, "step": 25316 }, { "epoch": 23.62, "learning_rate": 4.7047574626865674e-05, "loss": 0.0019, "step": 25320 }, { "epoch": 23.62, "learning_rate": 4.704710820895523e-05, "loss": 0.0018, "step": 25324 }, { "epoch": 23.63, "learning_rate": 4.704664179104478e-05, "loss": 0.0, "step": 25328 }, { "epoch": 23.63, "learning_rate": 4.704617537313433e-05, "loss": 0.0007, "step": 25332 }, { "epoch": 23.63, "learning_rate": 4.704570895522388e-05, "loss": 0.004, "step": 25336 }, { "epoch": 23.64, "learning_rate": 4.7045242537313435e-05, "loss": 0.0004, "step": 25340 }, { "epoch": 23.64, "learning_rate": 4.704477611940299e-05, "loss": 0.0002, "step": 25344 }, { "epoch": 23.65, "learning_rate": 4.704430970149254e-05, "loss": 0.0033, "step": 25348 }, { "epoch": 23.65, "learning_rate": 4.704384328358209e-05, "loss": 0.0004, "step": 25352 }, { "epoch": 23.65, "learning_rate": 4.704337686567165e-05, "loss": 0.0073, "step": 25356 }, { "epoch": 23.66, "learning_rate": 4.7042910447761196e-05, "loss": 0.0004, "step": 25360 }, { "epoch": 23.66, "learning_rate": 4.7042444029850745e-05, "loss": 0.0017, "step": 25364 }, { "epoch": 23.66, "learning_rate": 4.70419776119403e-05, "loss": 0.0078, "step": 25368 }, { "epoch": 23.67, "learning_rate": 4.7041511194029854e-05, "loss": 0.0002, "step": 25372 }, { "epoch": 23.67, "learning_rate": 4.70410447761194e-05, "loss": 0.0006, "step": 25376 }, { "epoch": 23.68, "learning_rate": 4.704057835820896e-05, "loss": 0.0027, "step": 25380 }, { "epoch": 23.68, "learning_rate": 4.704011194029851e-05, "loss": 0.0021, "step": 25384 }, { "epoch": 23.68, "learning_rate": 4.703964552238806e-05, "loss": 0.0059, "step": 25388 }, { "epoch": 23.69, "learning_rate": 4.7039179104477615e-05, "loss": 0.0002, "step": 25392 }, { "epoch": 23.69, "learning_rate": 4.7038712686567164e-05, "loss": 0.0048, "step": 25396 }, { "epoch": 23.69, "learning_rate": 4.703824626865672e-05, "loss": 0.0018, "step": 25400 }, { "epoch": 23.7, "learning_rate": 4.703777985074627e-05, "loss": 0.0001, "step": 25404 }, { "epoch": 23.7, "learning_rate": 4.703731343283582e-05, "loss": 0.0035, "step": 25408 }, { "epoch": 23.71, "learning_rate": 4.7036847014925376e-05, "loss": 0.0006, "step": 25412 }, { "epoch": 23.71, "learning_rate": 4.703638059701493e-05, "loss": 0.0004, "step": 25416 }, { "epoch": 23.71, "learning_rate": 4.703591417910448e-05, "loss": 0.0, "step": 25420 }, { "epoch": 23.72, "learning_rate": 4.7035447761194034e-05, "loss": 0.0002, "step": 25424 }, { "epoch": 23.72, "learning_rate": 4.703498134328358e-05, "loss": 0.0019, "step": 25428 }, { "epoch": 23.72, "learning_rate": 4.703451492537314e-05, "loss": 0.0001, "step": 25432 }, { "epoch": 23.73, "learning_rate": 4.703404850746269e-05, "loss": 0.0087, "step": 25436 }, { "epoch": 23.73, "learning_rate": 4.703358208955224e-05, "loss": 0.0002, "step": 25440 }, { "epoch": 23.73, "learning_rate": 4.7033115671641795e-05, "loss": 0.0046, "step": 25444 }, { "epoch": 23.74, "learning_rate": 4.703264925373135e-05, "loss": 0.0002, "step": 25448 }, { "epoch": 23.74, "learning_rate": 4.70321828358209e-05, "loss": 0.0007, "step": 25452 }, { "epoch": 23.75, "learning_rate": 4.7031716417910446e-05, "loss": 0.0033, "step": 25456 }, { "epoch": 23.75, "learning_rate": 4.703125e-05, "loss": 0.0002, "step": 25460 }, { "epoch": 23.75, "learning_rate": 4.7030783582089556e-05, "loss": 0.0004, "step": 25464 }, { "epoch": 23.76, "learning_rate": 4.7030317164179104e-05, "loss": 0.0019, "step": 25468 }, { "epoch": 23.76, "learning_rate": 4.702985074626866e-05, "loss": 0.0056, "step": 25472 }, { "epoch": 23.76, "learning_rate": 4.7029384328358214e-05, "loss": 0.0001, "step": 25476 }, { "epoch": 23.77, "learning_rate": 4.702891791044776e-05, "loss": 0.0001, "step": 25480 }, { "epoch": 23.77, "learning_rate": 4.702845149253732e-05, "loss": 0.0007, "step": 25484 }, { "epoch": 23.78, "learning_rate": 4.7027985074626865e-05, "loss": 0.0028, "step": 25488 }, { "epoch": 23.78, "learning_rate": 4.702751865671642e-05, "loss": 0.0002, "step": 25492 }, { "epoch": 23.78, "learning_rate": 4.7027052238805975e-05, "loss": 0.0005, "step": 25496 }, { "epoch": 23.79, "learning_rate": 4.702658582089552e-05, "loss": 0.0029, "step": 25500 }, { "epoch": 23.79, "eval_exact_match": 0.7369439071566731, "eval_exec": 0.7611218568665378, "eval_loss": 0.3535132110118866, "eval_runtime": 1197.1055, "eval_samples_per_second": 0.864, "step": 25500 }, { "epoch": 23.79, "learning_rate": 4.702611940298508e-05, "loss": 0.0041, "step": 25504 }, { "epoch": 23.79, "learning_rate": 4.702565298507463e-05, "loss": 0.0005, "step": 25508 }, { "epoch": 23.8, "learning_rate": 4.702518656716418e-05, "loss": 0.0014, "step": 25512 }, { "epoch": 23.8, "learning_rate": 4.702472014925373e-05, "loss": 0.0017, "step": 25516 }, { "epoch": 23.81, "learning_rate": 4.7024253731343284e-05, "loss": 0.0065, "step": 25520 }, { "epoch": 23.81, "learning_rate": 4.702378731343284e-05, "loss": 0.0002, "step": 25524 }, { "epoch": 23.81, "learning_rate": 4.702332089552239e-05, "loss": 0.0034, "step": 25528 }, { "epoch": 23.82, "learning_rate": 4.702285447761194e-05, "loss": 0.0004, "step": 25532 }, { "epoch": 23.82, "learning_rate": 4.70223880597015e-05, "loss": 0.0005, "step": 25536 }, { "epoch": 23.82, "learning_rate": 4.7021921641791045e-05, "loss": 0.004, "step": 25540 }, { "epoch": 23.83, "learning_rate": 4.70214552238806e-05, "loss": 0.002, "step": 25544 }, { "epoch": 23.83, "learning_rate": 4.702098880597015e-05, "loss": 0.0006, "step": 25548 }, { "epoch": 23.84, "learning_rate": 4.70205223880597e-05, "loss": 0.0009, "step": 25552 }, { "epoch": 23.84, "learning_rate": 4.702005597014926e-05, "loss": 0.0043, "step": 25556 }, { "epoch": 23.84, "learning_rate": 4.7019589552238806e-05, "loss": 0.0052, "step": 25560 }, { "epoch": 23.85, "learning_rate": 4.7019123134328354e-05, "loss": 0.0047, "step": 25564 }, { "epoch": 23.85, "learning_rate": 4.7018656716417916e-05, "loss": 0.0028, "step": 25568 }, { "epoch": 23.85, "learning_rate": 4.7018190298507464e-05, "loss": 0.0003, "step": 25572 }, { "epoch": 23.86, "learning_rate": 4.701772388059702e-05, "loss": 0.0004, "step": 25576 }, { "epoch": 23.86, "learning_rate": 4.701725746268657e-05, "loss": 0.0006, "step": 25580 }, { "epoch": 23.87, "learning_rate": 4.701679104477612e-05, "loss": 0.0011, "step": 25584 }, { "epoch": 23.87, "learning_rate": 4.701632462686568e-05, "loss": 0.0001, "step": 25588 }, { "epoch": 23.87, "learning_rate": 4.7015858208955225e-05, "loss": 0.0003, "step": 25592 }, { "epoch": 23.88, "learning_rate": 4.701539179104478e-05, "loss": 0.0046, "step": 25596 }, { "epoch": 23.88, "learning_rate": 4.7014925373134335e-05, "loss": 0.0064, "step": 25600 }, { "epoch": 23.88, "learning_rate": 4.701445895522388e-05, "loss": 0.0011, "step": 25604 }, { "epoch": 23.89, "learning_rate": 4.701399253731343e-05, "loss": 0.0002, "step": 25608 }, { "epoch": 23.89, "learning_rate": 4.701352611940299e-05, "loss": 0.0006, "step": 25612 }, { "epoch": 23.9, "learning_rate": 4.701305970149254e-05, "loss": 0.0022, "step": 25616 }, { "epoch": 23.9, "learning_rate": 4.701259328358209e-05, "loss": 0.0024, "step": 25620 }, { "epoch": 23.9, "learning_rate": 4.7012126865671644e-05, "loss": 0.0019, "step": 25624 }, { "epoch": 23.91, "learning_rate": 4.70116604477612e-05, "loss": 0.0004, "step": 25628 }, { "epoch": 23.91, "learning_rate": 4.701119402985075e-05, "loss": 0.0001, "step": 25632 }, { "epoch": 23.91, "learning_rate": 4.70107276119403e-05, "loss": 0.0006, "step": 25636 }, { "epoch": 23.92, "learning_rate": 4.701026119402985e-05, "loss": 0.0044, "step": 25640 }, { "epoch": 23.92, "learning_rate": 4.7009794776119405e-05, "loss": 0.0003, "step": 25644 }, { "epoch": 23.93, "learning_rate": 4.700932835820896e-05, "loss": 0.0001, "step": 25648 }, { "epoch": 23.93, "learning_rate": 4.700886194029851e-05, "loss": 0.0002, "step": 25652 }, { "epoch": 23.93, "learning_rate": 4.700839552238806e-05, "loss": 0.0016, "step": 25656 }, { "epoch": 23.94, "learning_rate": 4.700792910447762e-05, "loss": 0.0002, "step": 25660 }, { "epoch": 23.94, "learning_rate": 4.7007462686567166e-05, "loss": 0.0004, "step": 25664 }, { "epoch": 23.94, "learning_rate": 4.7006996268656714e-05, "loss": 0.0001, "step": 25668 }, { "epoch": 23.95, "learning_rate": 4.7006529850746276e-05, "loss": 0.0001, "step": 25672 }, { "epoch": 23.95, "learning_rate": 4.7006063432835824e-05, "loss": 0.0003, "step": 25676 }, { "epoch": 23.96, "learning_rate": 4.700559701492537e-05, "loss": 0.0012, "step": 25680 }, { "epoch": 23.96, "learning_rate": 4.700513059701493e-05, "loss": 0.0006, "step": 25684 }, { "epoch": 23.96, "learning_rate": 4.700466417910448e-05, "loss": 0.0028, "step": 25688 }, { "epoch": 23.97, "learning_rate": 4.700419776119403e-05, "loss": 0.0014, "step": 25692 }, { "epoch": 23.97, "learning_rate": 4.7003731343283585e-05, "loss": 0.0002, "step": 25696 }, { "epoch": 23.97, "learning_rate": 4.700326492537313e-05, "loss": 0.0001, "step": 25700 }, { "epoch": 23.98, "learning_rate": 4.700279850746269e-05, "loss": 0.0004, "step": 25704 }, { "epoch": 23.98, "learning_rate": 4.700233208955224e-05, "loss": 0.0058, "step": 25708 }, { "epoch": 23.98, "learning_rate": 4.700186567164179e-05, "loss": 0.0001, "step": 25712 }, { "epoch": 23.99, "learning_rate": 4.7001399253731346e-05, "loss": 0.005, "step": 25716 }, { "epoch": 23.99, "learning_rate": 4.70009328358209e-05, "loss": 0.0002, "step": 25720 }, { "epoch": 24.0, "learning_rate": 4.700046641791045e-05, "loss": 0.0001, "step": 25724 }, { "epoch": 24.0, "learning_rate": 4.7e-05, "loss": 0.0021, "step": 25728 }, { "epoch": 24.0, "learning_rate": 4.699953358208955e-05, "loss": 0.0013, "step": 25732 }, { "epoch": 24.01, "learning_rate": 4.699906716417911e-05, "loss": 0.0007, "step": 25736 }, { "epoch": 24.01, "learning_rate": 4.699860074626866e-05, "loss": 0.0076, "step": 25740 }, { "epoch": 24.01, "learning_rate": 4.699813432835821e-05, "loss": 0.0079, "step": 25744 }, { "epoch": 24.02, "learning_rate": 4.6997667910447765e-05, "loss": 0.0001, "step": 25748 }, { "epoch": 24.02, "learning_rate": 4.699720149253732e-05, "loss": 0.0084, "step": 25752 }, { "epoch": 24.03, "learning_rate": 4.699673507462687e-05, "loss": 0.0021, "step": 25756 }, { "epoch": 24.03, "learning_rate": 4.6996268656716416e-05, "loss": 0.0029, "step": 25760 }, { "epoch": 24.03, "learning_rate": 4.699580223880598e-05, "loss": 0.0023, "step": 25764 }, { "epoch": 24.04, "learning_rate": 4.6995335820895526e-05, "loss": 0.0001, "step": 25768 }, { "epoch": 24.04, "learning_rate": 4.6994869402985074e-05, "loss": 0.0003, "step": 25772 }, { "epoch": 24.04, "learning_rate": 4.699440298507463e-05, "loss": 0.0002, "step": 25776 }, { "epoch": 24.05, "learning_rate": 4.6993936567164184e-05, "loss": 0.001, "step": 25780 }, { "epoch": 24.05, "learning_rate": 4.699347014925373e-05, "loss": 0.0001, "step": 25784 }, { "epoch": 24.06, "learning_rate": 4.699300373134329e-05, "loss": 0.0008, "step": 25788 }, { "epoch": 24.06, "learning_rate": 4.6992537313432835e-05, "loss": 0.0007, "step": 25792 }, { "epoch": 24.06, "learning_rate": 4.699207089552239e-05, "loss": 0.0001, "step": 25796 }, { "epoch": 24.07, "learning_rate": 4.6991604477611945e-05, "loss": 0.0021, "step": 25800 }, { "epoch": 24.07, "learning_rate": 4.699113805970149e-05, "loss": 0.0002, "step": 25804 }, { "epoch": 24.07, "learning_rate": 4.699067164179105e-05, "loss": 0.0013, "step": 25808 }, { "epoch": 24.08, "learning_rate": 4.69902052238806e-05, "loss": 0.0077, "step": 25812 }, { "epoch": 24.08, "learning_rate": 4.698973880597015e-05, "loss": 0.0002, "step": 25816 }, { "epoch": 24.09, "learning_rate": 4.69892723880597e-05, "loss": 0.0005, "step": 25820 }, { "epoch": 24.09, "learning_rate": 4.698880597014926e-05, "loss": 0.0043, "step": 25824 }, { "epoch": 24.09, "learning_rate": 4.698833955223881e-05, "loss": 0.0001, "step": 25828 }, { "epoch": 24.1, "learning_rate": 4.698787313432836e-05, "loss": 0.0002, "step": 25832 }, { "epoch": 24.1, "learning_rate": 4.698740671641791e-05, "loss": 0.0002, "step": 25836 }, { "epoch": 24.1, "learning_rate": 4.698694029850747e-05, "loss": 0.0008, "step": 25840 }, { "epoch": 24.11, "learning_rate": 4.6986473880597015e-05, "loss": 0.0008, "step": 25844 }, { "epoch": 24.11, "learning_rate": 4.698600746268657e-05, "loss": 0.0014, "step": 25848 }, { "epoch": 24.12, "learning_rate": 4.698554104477612e-05, "loss": 0.0018, "step": 25852 }, { "epoch": 24.12, "learning_rate": 4.698507462686567e-05, "loss": 0.0002, "step": 25856 }, { "epoch": 24.12, "learning_rate": 4.698460820895523e-05, "loss": 0.0042, "step": 25860 }, { "epoch": 24.13, "learning_rate": 4.6984141791044776e-05, "loss": 0.0026, "step": 25864 }, { "epoch": 24.13, "learning_rate": 4.698367537313433e-05, "loss": 0.002, "step": 25868 }, { "epoch": 24.13, "learning_rate": 4.6983208955223886e-05, "loss": 0.001, "step": 25872 }, { "epoch": 24.14, "learning_rate": 4.6982742537313434e-05, "loss": 0.0002, "step": 25876 }, { "epoch": 24.14, "learning_rate": 4.698227611940298e-05, "loss": 0.0003, "step": 25880 }, { "epoch": 24.15, "learning_rate": 4.6981809701492544e-05, "loss": 0.0003, "step": 25884 }, { "epoch": 24.15, "learning_rate": 4.698134328358209e-05, "loss": 0.0002, "step": 25888 }, { "epoch": 24.15, "learning_rate": 4.698087686567164e-05, "loss": 0.0003, "step": 25892 }, { "epoch": 24.16, "learning_rate": 4.6980410447761195e-05, "loss": 0.0001, "step": 25896 }, { "epoch": 24.16, "learning_rate": 4.697994402985075e-05, "loss": 0.0001, "step": 25900 }, { "epoch": 24.16, "learning_rate": 4.6979477611940305e-05, "loss": 0.0001, "step": 25904 }, { "epoch": 24.17, "learning_rate": 4.697901119402985e-05, "loss": 0.0002, "step": 25908 }, { "epoch": 24.17, "learning_rate": 4.69785447761194e-05, "loss": 0.0003, "step": 25912 }, { "epoch": 24.18, "learning_rate": 4.697807835820896e-05, "loss": 0.0002, "step": 25916 }, { "epoch": 24.18, "learning_rate": 4.697761194029851e-05, "loss": 0.0002, "step": 25920 }, { "epoch": 24.18, "learning_rate": 4.697714552238806e-05, "loss": 0.0034, "step": 25924 }, { "epoch": 24.19, "learning_rate": 4.6976679104477614e-05, "loss": 0.0027, "step": 25928 }, { "epoch": 24.19, "learning_rate": 4.697621268656717e-05, "loss": 0.001, "step": 25932 }, { "epoch": 24.19, "learning_rate": 4.697574626865672e-05, "loss": 0.0009, "step": 25936 }, { "epoch": 24.2, "learning_rate": 4.697527985074627e-05, "loss": 0.0001, "step": 25940 }, { "epoch": 24.2, "learning_rate": 4.697481343283583e-05, "loss": 0.0003, "step": 25944 }, { "epoch": 24.21, "learning_rate": 4.6974347014925375e-05, "loss": 0.0002, "step": 25948 }, { "epoch": 24.21, "learning_rate": 4.697388059701493e-05, "loss": 0.0006, "step": 25952 }, { "epoch": 24.21, "learning_rate": 4.697341417910448e-05, "loss": 0.0006, "step": 25956 }, { "epoch": 24.22, "learning_rate": 4.697294776119403e-05, "loss": 0.0022, "step": 25960 }, { "epoch": 24.22, "learning_rate": 4.697248134328359e-05, "loss": 0.0067, "step": 25964 }, { "epoch": 24.22, "learning_rate": 4.6972014925373136e-05, "loss": 0.0022, "step": 25968 }, { "epoch": 24.23, "learning_rate": 4.6971548507462684e-05, "loss": 0.0004, "step": 25972 }, { "epoch": 24.23, "learning_rate": 4.6971082089552246e-05, "loss": 0.0001, "step": 25976 }, { "epoch": 24.24, "learning_rate": 4.6970615671641794e-05, "loss": 0.0108, "step": 25980 }, { "epoch": 24.24, "learning_rate": 4.697014925373134e-05, "loss": 0.0007, "step": 25984 }, { "epoch": 24.24, "learning_rate": 4.69696828358209e-05, "loss": 0.0003, "step": 25988 }, { "epoch": 24.25, "learning_rate": 4.696921641791045e-05, "loss": 0.0008, "step": 25992 }, { "epoch": 24.25, "learning_rate": 4.696875e-05, "loss": 0.0006, "step": 25996 }, { "epoch": 24.25, "learning_rate": 4.6968283582089555e-05, "loss": 0.0008, "step": 26000 }, { "epoch": 24.25, "eval_exact_match": 0.7330754352030948, "eval_exec": 0.7678916827852998, "eval_loss": 0.36795204877853394, "eval_runtime": 1555.2766, "eval_samples_per_second": 0.665, "step": 26000 }, { "epoch": 24.26, "learning_rate": 4.696781716417911e-05, "loss": 0.0003, "step": 26004 }, { "epoch": 24.26, "learning_rate": 4.696735074626866e-05, "loss": 0.0002, "step": 26008 }, { "epoch": 24.26, "learning_rate": 4.696688432835821e-05, "loss": 0.0018, "step": 26012 }, { "epoch": 24.27, "learning_rate": 4.696641791044776e-05, "loss": 0.0021, "step": 26016 }, { "epoch": 24.27, "learning_rate": 4.6965951492537316e-05, "loss": 0.0, "step": 26020 }, { "epoch": 24.28, "learning_rate": 4.696548507462687e-05, "loss": 0.0002, "step": 26024 }, { "epoch": 24.28, "learning_rate": 4.696501865671642e-05, "loss": 0.0002, "step": 26028 }, { "epoch": 24.28, "learning_rate": 4.696455223880597e-05, "loss": 0.0048, "step": 26032 }, { "epoch": 24.29, "learning_rate": 4.696408582089553e-05, "loss": 0.0005, "step": 26036 }, { "epoch": 24.29, "learning_rate": 4.696361940298508e-05, "loss": 0.0003, "step": 26040 }, { "epoch": 24.29, "learning_rate": 4.6963152985074625e-05, "loss": 0.0004, "step": 26044 }, { "epoch": 24.3, "learning_rate": 4.696268656716418e-05, "loss": 0.0064, "step": 26048 }, { "epoch": 24.3, "learning_rate": 4.6962220149253735e-05, "loss": 0.0003, "step": 26052 }, { "epoch": 24.31, "learning_rate": 4.696175373134328e-05, "loss": 0.0032, "step": 26056 }, { "epoch": 24.31, "learning_rate": 4.696128731343284e-05, "loss": 0.001, "step": 26060 }, { "epoch": 24.31, "learning_rate": 4.696082089552239e-05, "loss": 0.0002, "step": 26064 }, { "epoch": 24.32, "learning_rate": 4.696035447761195e-05, "loss": 0.0021, "step": 26068 }, { "epoch": 24.32, "learning_rate": 4.6959888059701496e-05, "loss": 0.0005, "step": 26072 }, { "epoch": 24.32, "learning_rate": 4.6959421641791044e-05, "loss": 0.0007, "step": 26076 }, { "epoch": 24.33, "learning_rate": 4.69589552238806e-05, "loss": 0.0006, "step": 26080 }, { "epoch": 24.33, "learning_rate": 4.6958488805970154e-05, "loss": 0.0029, "step": 26084 }, { "epoch": 24.34, "learning_rate": 4.69580223880597e-05, "loss": 0.0002, "step": 26088 }, { "epoch": 24.34, "learning_rate": 4.695755597014926e-05, "loss": 0.0014, "step": 26092 }, { "epoch": 24.34, "learning_rate": 4.695708955223881e-05, "loss": 0.0, "step": 26096 }, { "epoch": 24.35, "learning_rate": 4.695662313432836e-05, "loss": 0.0015, "step": 26100 }, { "epoch": 24.35, "learning_rate": 4.6956156716417915e-05, "loss": 0.0001, "step": 26104 }, { "epoch": 24.35, "learning_rate": 4.695569029850746e-05, "loss": 0.0002, "step": 26108 }, { "epoch": 24.36, "learning_rate": 4.695522388059702e-05, "loss": 0.0002, "step": 26112 }, { "epoch": 24.36, "learning_rate": 4.695475746268657e-05, "loss": 0.0002, "step": 26116 }, { "epoch": 24.37, "learning_rate": 4.695429104477612e-05, "loss": 0.0002, "step": 26120 }, { "epoch": 24.37, "learning_rate": 4.6953824626865676e-05, "loss": 0.0012, "step": 26124 }, { "epoch": 24.37, "learning_rate": 4.695335820895523e-05, "loss": 0.0001, "step": 26128 }, { "epoch": 24.38, "learning_rate": 4.695289179104478e-05, "loss": 0.0002, "step": 26132 }, { "epoch": 24.38, "learning_rate": 4.695242537313433e-05, "loss": 0.0003, "step": 26136 }, { "epoch": 24.38, "learning_rate": 4.695195895522388e-05, "loss": 0.0046, "step": 26140 }, { "epoch": 24.39, "learning_rate": 4.6951492537313437e-05, "loss": 0.0003, "step": 26144 }, { "epoch": 24.39, "learning_rate": 4.6951026119402985e-05, "loss": 0.0003, "step": 26148 }, { "epoch": 24.4, "learning_rate": 4.695055970149254e-05, "loss": 0.0003, "step": 26152 }, { "epoch": 24.4, "learning_rate": 4.6950093283582095e-05, "loss": 0.0004, "step": 26156 }, { "epoch": 24.4, "learning_rate": 4.694962686567164e-05, "loss": 0.0003, "step": 26160 }, { "epoch": 24.41, "learning_rate": 4.69491604477612e-05, "loss": 0.0001, "step": 26164 }, { "epoch": 24.41, "learning_rate": 4.6948694029850746e-05, "loss": 0.001, "step": 26168 }, { "epoch": 24.41, "learning_rate": 4.69482276119403e-05, "loss": 0.0015, "step": 26172 }, { "epoch": 24.42, "learning_rate": 4.6947761194029856e-05, "loss": 0.0002, "step": 26176 }, { "epoch": 24.42, "learning_rate": 4.6947294776119404e-05, "loss": 0.0003, "step": 26180 }, { "epoch": 24.43, "learning_rate": 4.694682835820896e-05, "loss": 0.0017, "step": 26184 }, { "epoch": 24.43, "learning_rate": 4.6946361940298513e-05, "loss": 0.0001, "step": 26188 }, { "epoch": 24.43, "learning_rate": 4.694589552238806e-05, "loss": 0.0001, "step": 26192 }, { "epoch": 24.44, "learning_rate": 4.694542910447761e-05, "loss": 0.001, "step": 26196 }, { "epoch": 24.44, "learning_rate": 4.6944962686567165e-05, "loss": 0.0061, "step": 26200 }, { "epoch": 24.44, "learning_rate": 4.694449626865672e-05, "loss": 0.0005, "step": 26204 }, { "epoch": 24.45, "learning_rate": 4.694402985074627e-05, "loss": 0.0012, "step": 26208 }, { "epoch": 24.45, "learning_rate": 4.694356343283582e-05, "loss": 0.0001, "step": 26212 }, { "epoch": 24.46, "learning_rate": 4.694309701492538e-05, "loss": 0.0, "step": 26216 }, { "epoch": 24.46, "learning_rate": 4.6942630597014926e-05, "loss": 0.0, "step": 26220 }, { "epoch": 24.46, "learning_rate": 4.694216417910448e-05, "loss": 0.0006, "step": 26224 }, { "epoch": 24.47, "learning_rate": 4.694169776119403e-05, "loss": 0.0, "step": 26228 }, { "epoch": 24.47, "learning_rate": 4.694123134328359e-05, "loss": 0.0011, "step": 26232 }, { "epoch": 24.47, "learning_rate": 4.694076492537314e-05, "loss": 0.0003, "step": 26236 }, { "epoch": 24.48, "learning_rate": 4.6940298507462687e-05, "loss": 0.0007, "step": 26240 }, { "epoch": 24.48, "learning_rate": 4.693983208955224e-05, "loss": 0.0036, "step": 26244 }, { "epoch": 24.49, "learning_rate": 4.6939365671641796e-05, "loss": 0.0004, "step": 26248 }, { "epoch": 24.49, "learning_rate": 4.6938899253731345e-05, "loss": 0.0017, "step": 26252 }, { "epoch": 24.49, "learning_rate": 4.69384328358209e-05, "loss": 0.0001, "step": 26256 }, { "epoch": 24.5, "learning_rate": 4.693796641791045e-05, "loss": 0.0001, "step": 26260 }, { "epoch": 24.5, "learning_rate": 4.69375e-05, "loss": 0.0002, "step": 26264 }, { "epoch": 24.5, "learning_rate": 4.693703358208956e-05, "loss": 0.0016, "step": 26268 }, { "epoch": 24.51, "learning_rate": 4.6936567164179106e-05, "loss": 0.0008, "step": 26272 }, { "epoch": 24.51, "learning_rate": 4.693610074626866e-05, "loss": 0.0031, "step": 26276 }, { "epoch": 24.51, "learning_rate": 4.6935634328358215e-05, "loss": 0.0004, "step": 26280 }, { "epoch": 24.52, "learning_rate": 4.6935167910447763e-05, "loss": 0.0013, "step": 26284 }, { "epoch": 24.52, "learning_rate": 4.693470149253731e-05, "loss": 0.0001, "step": 26288 }, { "epoch": 24.53, "learning_rate": 4.693423507462687e-05, "loss": 0.0001, "step": 26292 }, { "epoch": 24.53, "learning_rate": 4.693376865671642e-05, "loss": 0.0015, "step": 26296 }, { "epoch": 24.53, "learning_rate": 4.693330223880597e-05, "loss": 0.0008, "step": 26300 }, { "epoch": 24.54, "learning_rate": 4.6932835820895524e-05, "loss": 0.0001, "step": 26304 }, { "epoch": 24.54, "learning_rate": 4.693236940298508e-05, "loss": 0.0003, "step": 26308 }, { "epoch": 24.54, "learning_rate": 4.693190298507463e-05, "loss": 0.0002, "step": 26312 }, { "epoch": 24.55, "learning_rate": 4.693143656716418e-05, "loss": 0.0005, "step": 26316 }, { "epoch": 24.55, "learning_rate": 4.693097014925373e-05, "loss": 0.0002, "step": 26320 }, { "epoch": 24.56, "learning_rate": 4.6930503731343285e-05, "loss": 0.0001, "step": 26324 }, { "epoch": 24.56, "learning_rate": 4.693003731343284e-05, "loss": 0.0004, "step": 26328 }, { "epoch": 24.56, "learning_rate": 4.692957089552239e-05, "loss": 0.002, "step": 26332 }, { "epoch": 24.57, "learning_rate": 4.6929104477611943e-05, "loss": 0.0001, "step": 26336 }, { "epoch": 24.57, "learning_rate": 4.69286380597015e-05, "loss": 0.0005, "step": 26340 }, { "epoch": 24.57, "learning_rate": 4.6928171641791046e-05, "loss": 0.0063, "step": 26344 }, { "epoch": 24.58, "learning_rate": 4.6927705223880595e-05, "loss": 0.0003, "step": 26348 }, { "epoch": 24.58, "learning_rate": 4.6927238805970156e-05, "loss": 0.0011, "step": 26352 }, { "epoch": 24.59, "learning_rate": 4.6926772388059704e-05, "loss": 0.0027, "step": 26356 }, { "epoch": 24.59, "learning_rate": 4.692630597014925e-05, "loss": 0.0006, "step": 26360 }, { "epoch": 24.59, "learning_rate": 4.692583955223881e-05, "loss": 0.0011, "step": 26364 }, { "epoch": 24.6, "learning_rate": 4.692537313432836e-05, "loss": 0.0004, "step": 26368 }, { "epoch": 24.6, "learning_rate": 4.692490671641791e-05, "loss": 0.0005, "step": 26372 }, { "epoch": 24.6, "learning_rate": 4.6924440298507465e-05, "loss": 0.0002, "step": 26376 }, { "epoch": 24.61, "learning_rate": 4.6923973880597014e-05, "loss": 0.0004, "step": 26380 }, { "epoch": 24.61, "learning_rate": 4.692350746268657e-05, "loss": 0.0011, "step": 26384 }, { "epoch": 24.62, "learning_rate": 4.692304104477612e-05, "loss": 0.0001, "step": 26388 }, { "epoch": 24.62, "learning_rate": 4.692257462686567e-05, "loss": 0.0, "step": 26392 }, { "epoch": 24.62, "learning_rate": 4.6922108208955226e-05, "loss": 0.0006, "step": 26396 }, { "epoch": 24.63, "learning_rate": 4.692164179104478e-05, "loss": 0.0, "step": 26400 }, { "epoch": 24.63, "learning_rate": 4.692117537313433e-05, "loss": 0.0031, "step": 26404 }, { "epoch": 24.63, "learning_rate": 4.6920708955223884e-05, "loss": 0.0009, "step": 26408 }, { "epoch": 24.64, "learning_rate": 4.692024253731343e-05, "loss": 0.0048, "step": 26412 }, { "epoch": 24.64, "learning_rate": 4.691977611940299e-05, "loss": 0.0003, "step": 26416 }, { "epoch": 24.65, "learning_rate": 4.691930970149254e-05, "loss": 0.0, "step": 26420 }, { "epoch": 24.65, "learning_rate": 4.691884328358209e-05, "loss": 0.0005, "step": 26424 }, { "epoch": 24.65, "learning_rate": 4.6918376865671645e-05, "loss": 0.0022, "step": 26428 }, { "epoch": 24.66, "learning_rate": 4.69179104477612e-05, "loss": 0.0002, "step": 26432 }, { "epoch": 24.66, "learning_rate": 4.691744402985075e-05, "loss": 0.0001, "step": 26436 }, { "epoch": 24.66, "learning_rate": 4.6916977611940296e-05, "loss": 0.0015, "step": 26440 }, { "epoch": 24.67, "learning_rate": 4.691651119402986e-05, "loss": 0.0003, "step": 26444 }, { "epoch": 24.67, "learning_rate": 4.6916044776119406e-05, "loss": 0.0035, "step": 26448 }, { "epoch": 24.68, "learning_rate": 4.6915578358208954e-05, "loss": 0.0034, "step": 26452 }, { "epoch": 24.68, "learning_rate": 4.691511194029851e-05, "loss": 0.0004, "step": 26456 }, { "epoch": 24.68, "learning_rate": 4.6914645522388064e-05, "loss": 0.0001, "step": 26460 }, { "epoch": 24.69, "learning_rate": 4.691417910447761e-05, "loss": 0.0001, "step": 26464 }, { "epoch": 24.69, "learning_rate": 4.691371268656717e-05, "loss": 0.001, "step": 26468 }, { "epoch": 24.69, "learning_rate": 4.6913246268656715e-05, "loss": 0.0022, "step": 26472 }, { "epoch": 24.7, "learning_rate": 4.691277985074627e-05, "loss": 0.0001, "step": 26476 }, { "epoch": 24.7, "learning_rate": 4.6912313432835825e-05, "loss": 0.0, "step": 26480 }, { "epoch": 24.71, "learning_rate": 4.691184701492537e-05, "loss": 0.0005, "step": 26484 }, { "epoch": 24.71, "learning_rate": 4.691138059701493e-05, "loss": 0.0031, "step": 26488 }, { "epoch": 24.71, "learning_rate": 4.691091417910448e-05, "loss": 0.0005, "step": 26492 }, { "epoch": 24.72, "learning_rate": 4.691044776119403e-05, "loss": 0.0001, "step": 26496 }, { "epoch": 24.72, "learning_rate": 4.690998134328358e-05, "loss": 0.0001, "step": 26500 }, { "epoch": 24.72, "eval_exact_match": 0.7446808510638298, "eval_exec": 0.7736943907156673, "eval_loss": 0.403182715177536, "eval_runtime": 1851.9773, "eval_samples_per_second": 0.558, "step": 26500 }, { "epoch": 24.72, "learning_rate": 4.690951492537314e-05, "loss": 0.0009, "step": 26504 }, { "epoch": 24.73, "learning_rate": 4.690904850746269e-05, "loss": 0.0003, "step": 26508 }, { "epoch": 24.73, "learning_rate": 4.690858208955224e-05, "loss": 0.0001, "step": 26512 }, { "epoch": 24.73, "learning_rate": 4.690811567164179e-05, "loss": 0.0015, "step": 26516 }, { "epoch": 24.74, "learning_rate": 4.690764925373135e-05, "loss": 0.0003, "step": 26520 }, { "epoch": 24.74, "learning_rate": 4.6907182835820895e-05, "loss": 0.0021, "step": 26524 }, { "epoch": 24.75, "learning_rate": 4.690671641791045e-05, "loss": 0.0006, "step": 26528 }, { "epoch": 24.75, "learning_rate": 4.690625e-05, "loss": 0.0009, "step": 26532 }, { "epoch": 24.75, "learning_rate": 4.690578358208955e-05, "loss": 0.0, "step": 26536 }, { "epoch": 24.76, "learning_rate": 4.690531716417911e-05, "loss": 0.0002, "step": 26540 }, { "epoch": 24.76, "learning_rate": 4.6904850746268656e-05, "loss": 0.0004, "step": 26544 }, { "epoch": 24.76, "learning_rate": 4.690438432835821e-05, "loss": 0.0109, "step": 26548 }, { "epoch": 24.77, "learning_rate": 4.6903917910447766e-05, "loss": 0.0008, "step": 26552 }, { "epoch": 24.77, "learning_rate": 4.6903451492537314e-05, "loss": 0.001, "step": 26556 }, { "epoch": 24.78, "learning_rate": 4.690298507462687e-05, "loss": 0.0027, "step": 26560 }, { "epoch": 24.78, "learning_rate": 4.6902518656716424e-05, "loss": 0.0001, "step": 26564 }, { "epoch": 24.78, "learning_rate": 4.690205223880597e-05, "loss": 0.0004, "step": 26568 }, { "epoch": 24.79, "learning_rate": 4.690158582089553e-05, "loss": 0.0099, "step": 26572 }, { "epoch": 24.79, "learning_rate": 4.6901119402985075e-05, "loss": 0.0017, "step": 26576 }, { "epoch": 24.79, "learning_rate": 4.690065298507463e-05, "loss": 0.0046, "step": 26580 }, { "epoch": 24.8, "learning_rate": 4.6900186567164185e-05, "loss": 0.0015, "step": 26584 }, { "epoch": 24.8, "learning_rate": 4.689972014925373e-05, "loss": 0.001, "step": 26588 }, { "epoch": 24.81, "learning_rate": 4.689925373134328e-05, "loss": 0.001, "step": 26592 }, { "epoch": 24.81, "learning_rate": 4.689878731343284e-05, "loss": 0.0003, "step": 26596 }, { "epoch": 24.81, "learning_rate": 4.689832089552239e-05, "loss": 0.0036, "step": 26600 }, { "epoch": 24.82, "learning_rate": 4.689785447761194e-05, "loss": 0.0021, "step": 26604 }, { "epoch": 24.82, "learning_rate": 4.6897388059701494e-05, "loss": 0.0002, "step": 26608 }, { "epoch": 24.82, "learning_rate": 4.689692164179105e-05, "loss": 0.0001, "step": 26612 }, { "epoch": 24.83, "learning_rate": 4.68964552238806e-05, "loss": 0.0015, "step": 26616 }, { "epoch": 24.83, "learning_rate": 4.689598880597015e-05, "loss": 0.0015, "step": 26620 }, { "epoch": 24.84, "learning_rate": 4.689552238805971e-05, "loss": 0.0032, "step": 26624 }, { "epoch": 24.84, "learning_rate": 4.6895055970149255e-05, "loss": 0.0009, "step": 26628 }, { "epoch": 24.84, "learning_rate": 4.689458955223881e-05, "loss": 0.0013, "step": 26632 }, { "epoch": 24.85, "learning_rate": 4.689412313432836e-05, "loss": 0.0004, "step": 26636 }, { "epoch": 24.85, "learning_rate": 4.689365671641791e-05, "loss": 0.0029, "step": 26640 }, { "epoch": 24.85, "learning_rate": 4.689319029850747e-05, "loss": 0.0001, "step": 26644 }, { "epoch": 24.86, "learning_rate": 4.6892723880597016e-05, "loss": 0.0026, "step": 26648 }, { "epoch": 24.86, "learning_rate": 4.6892257462686564e-05, "loss": 0.0009, "step": 26652 }, { "epoch": 24.87, "learning_rate": 4.6891791044776126e-05, "loss": 0.0006, "step": 26656 }, { "epoch": 24.87, "learning_rate": 4.6891324626865674e-05, "loss": 0.0002, "step": 26660 }, { "epoch": 24.87, "learning_rate": 4.689085820895522e-05, "loss": 0.0002, "step": 26664 }, { "epoch": 24.88, "learning_rate": 4.689039179104478e-05, "loss": 0.0037, "step": 26668 }, { "epoch": 24.88, "learning_rate": 4.688992537313433e-05, "loss": 0.0004, "step": 26672 }, { "epoch": 24.88, "learning_rate": 4.688945895522388e-05, "loss": 0.0035, "step": 26676 }, { "epoch": 24.89, "learning_rate": 4.6888992537313435e-05, "loss": 0.0003, "step": 26680 }, { "epoch": 24.89, "learning_rate": 4.688852611940299e-05, "loss": 0.0004, "step": 26684 }, { "epoch": 24.9, "learning_rate": 4.688805970149254e-05, "loss": 0.0002, "step": 26688 }, { "epoch": 24.9, "learning_rate": 4.688759328358209e-05, "loss": 0.0, "step": 26692 }, { "epoch": 24.9, "learning_rate": 4.688712686567164e-05, "loss": 0.003, "step": 26696 }, { "epoch": 24.91, "learning_rate": 4.6886660447761196e-05, "loss": 0.0013, "step": 26700 }, { "epoch": 24.91, "learning_rate": 4.688619402985075e-05, "loss": 0.0, "step": 26704 }, { "epoch": 24.91, "learning_rate": 4.68857276119403e-05, "loss": 0.0002, "step": 26708 }, { "epoch": 24.92, "learning_rate": 4.688526119402985e-05, "loss": 0.0008, "step": 26712 }, { "epoch": 24.92, "learning_rate": 4.688479477611941e-05, "loss": 0.0005, "step": 26716 }, { "epoch": 24.93, "learning_rate": 4.688432835820896e-05, "loss": 0.0007, "step": 26720 }, { "epoch": 24.93, "learning_rate": 4.688386194029851e-05, "loss": 0.0, "step": 26724 }, { "epoch": 24.93, "learning_rate": 4.688339552238806e-05, "loss": 0.0001, "step": 26728 }, { "epoch": 24.94, "learning_rate": 4.6882929104477615e-05, "loss": 0.0001, "step": 26732 }, { "epoch": 24.94, "learning_rate": 4.688246268656717e-05, "loss": 0.0008, "step": 26736 }, { "epoch": 24.94, "learning_rate": 4.688199626865672e-05, "loss": 0.0001, "step": 26740 }, { "epoch": 24.95, "learning_rate": 4.688152985074627e-05, "loss": 0.0001, "step": 26744 }, { "epoch": 24.95, "learning_rate": 4.688106343283583e-05, "loss": 0.0013, "step": 26748 }, { "epoch": 24.96, "learning_rate": 4.6880597014925376e-05, "loss": 0.0001, "step": 26752 }, { "epoch": 24.96, "learning_rate": 4.6880130597014924e-05, "loss": 0.0005, "step": 26756 }, { "epoch": 24.96, "learning_rate": 4.687966417910448e-05, "loss": 0.0015, "step": 26760 }, { "epoch": 24.97, "learning_rate": 4.6879197761194034e-05, "loss": 0.0004, "step": 26764 }, { "epoch": 24.97, "learning_rate": 4.687873134328358e-05, "loss": 0.0023, "step": 26768 }, { "epoch": 24.97, "learning_rate": 4.687826492537314e-05, "loss": 0.0001, "step": 26772 }, { "epoch": 24.98, "learning_rate": 4.687779850746269e-05, "loss": 0.0001, "step": 26776 }, { "epoch": 24.98, "learning_rate": 4.687733208955224e-05, "loss": 0.0015, "step": 26780 }, { "epoch": 24.98, "learning_rate": 4.6876865671641795e-05, "loss": 0.0002, "step": 26784 }, { "epoch": 24.99, "learning_rate": 4.687639925373134e-05, "loss": 0.0001, "step": 26788 }, { "epoch": 24.99, "learning_rate": 4.68759328358209e-05, "loss": 0.0003, "step": 26792 }, { "epoch": 25.0, "learning_rate": 4.687546641791045e-05, "loss": 0.0088, "step": 26796 }, { "epoch": 25.0, "learning_rate": 4.6875e-05, "loss": 0.0008, "step": 26800 }, { "epoch": 25.0, "learning_rate": 4.6874533582089556e-05, "loss": 0.0018, "step": 26804 }, { "epoch": 25.01, "learning_rate": 4.687406716417911e-05, "loss": 0.0, "step": 26808 }, { "epoch": 25.01, "learning_rate": 4.687360074626866e-05, "loss": 0.0, "step": 26812 }, { "epoch": 25.01, "learning_rate": 4.687313432835821e-05, "loss": 0.0, "step": 26816 }, { "epoch": 25.02, "learning_rate": 4.687266791044776e-05, "loss": 0.0002, "step": 26820 }, { "epoch": 25.02, "learning_rate": 4.687220149253732e-05, "loss": 0.0018, "step": 26824 }, { "epoch": 25.03, "learning_rate": 4.6871735074626865e-05, "loss": 0.0002, "step": 26828 }, { "epoch": 25.03, "learning_rate": 4.687126865671642e-05, "loss": 0.0037, "step": 26832 }, { "epoch": 25.03, "learning_rate": 4.6870802238805975e-05, "loss": 0.0019, "step": 26836 }, { "epoch": 25.04, "learning_rate": 4.687033582089552e-05, "loss": 0.0003, "step": 26840 }, { "epoch": 25.04, "learning_rate": 4.686986940298508e-05, "loss": 0.0004, "step": 26844 }, { "epoch": 25.04, "learning_rate": 4.6869402985074626e-05, "loss": 0.0001, "step": 26848 }, { "epoch": 25.05, "learning_rate": 4.686893656716418e-05, "loss": 0.0002, "step": 26852 }, { "epoch": 25.05, "learning_rate": 4.6868470149253736e-05, "loss": 0.0001, "step": 26856 }, { "epoch": 25.06, "learning_rate": 4.6868003731343284e-05, "loss": 0.0017, "step": 26860 }, { "epoch": 25.06, "learning_rate": 4.686753731343284e-05, "loss": 0.0001, "step": 26864 }, { "epoch": 25.06, "learning_rate": 4.6867070895522394e-05, "loss": 0.0009, "step": 26868 }, { "epoch": 25.07, "learning_rate": 4.686660447761194e-05, "loss": 0.0085, "step": 26872 }, { "epoch": 25.07, "learning_rate": 4.686613805970149e-05, "loss": 0.0021, "step": 26876 }, { "epoch": 25.07, "learning_rate": 4.6865671641791045e-05, "loss": 0.0007, "step": 26880 }, { "epoch": 25.08, "learning_rate": 4.68652052238806e-05, "loss": 0.0021, "step": 26884 }, { "epoch": 25.08, "learning_rate": 4.6864738805970155e-05, "loss": 0.0012, "step": 26888 }, { "epoch": 25.09, "learning_rate": 4.68642723880597e-05, "loss": 0.0, "step": 26892 }, { "epoch": 25.09, "learning_rate": 4.686380597014926e-05, "loss": 0.0011, "step": 26896 }, { "epoch": 25.09, "learning_rate": 4.686333955223881e-05, "loss": 0.001, "step": 26900 }, { "epoch": 25.1, "learning_rate": 4.686287313432836e-05, "loss": 0.0006, "step": 26904 }, { "epoch": 25.1, "learning_rate": 4.686240671641791e-05, "loss": 0.0019, "step": 26908 }, { "epoch": 25.1, "learning_rate": 4.686194029850747e-05, "loss": 0.0002, "step": 26912 }, { "epoch": 25.11, "learning_rate": 4.686147388059702e-05, "loss": 0.0002, "step": 26916 }, { "epoch": 25.11, "learning_rate": 4.686100746268657e-05, "loss": 0.0014, "step": 26920 }, { "epoch": 25.12, "learning_rate": 4.686054104477612e-05, "loss": 0.0001, "step": 26924 }, { "epoch": 25.12, "learning_rate": 4.686007462686568e-05, "loss": 0.0036, "step": 26928 }, { "epoch": 25.12, "learning_rate": 4.6859608208955225e-05, "loss": 0.0018, "step": 26932 }, { "epoch": 25.13, "learning_rate": 4.685914179104478e-05, "loss": 0.0045, "step": 26936 }, { "epoch": 25.13, "learning_rate": 4.685867537313433e-05, "loss": 0.0017, "step": 26940 }, { "epoch": 25.13, "learning_rate": 4.685820895522388e-05, "loss": 0.0011, "step": 26944 }, { "epoch": 25.14, "learning_rate": 4.685774253731344e-05, "loss": 0.0015, "step": 26948 }, { "epoch": 25.14, "learning_rate": 4.6857276119402986e-05, "loss": 0.0408, "step": 26952 }, { "epoch": 25.15, "learning_rate": 4.685680970149254e-05, "loss": 0.008, "step": 26956 }, { "epoch": 25.15, "learning_rate": 4.6856343283582096e-05, "loss": 0.0, "step": 26960 }, { "epoch": 25.15, "learning_rate": 4.6855876865671644e-05, "loss": 0.0036, "step": 26964 }, { "epoch": 25.16, "learning_rate": 4.685541044776119e-05, "loss": 0.0021, "step": 26968 }, { "epoch": 25.16, "learning_rate": 4.6854944029850754e-05, "loss": 0.001, "step": 26972 }, { "epoch": 25.16, "learning_rate": 4.68544776119403e-05, "loss": 0.0101, "step": 26976 }, { "epoch": 25.17, "learning_rate": 4.685401119402985e-05, "loss": 0.0008, "step": 26980 }, { "epoch": 25.17, "learning_rate": 4.6853544776119405e-05, "loss": 0.0, "step": 26984 }, { "epoch": 25.18, "learning_rate": 4.685307835820896e-05, "loss": 0.001, "step": 26988 }, { "epoch": 25.18, "learning_rate": 4.685261194029851e-05, "loss": 0.0004, "step": 26992 }, { "epoch": 25.18, "learning_rate": 4.685214552238806e-05, "loss": 0.0001, "step": 26996 }, { "epoch": 25.19, "learning_rate": 4.685167910447761e-05, "loss": 0.0002, "step": 27000 }, { "epoch": 25.19, "eval_exact_match": 0.7427466150870407, "eval_exec": 0.7794970986460348, "eval_loss": 0.3887414336204529, "eval_runtime": 1208.0912, "eval_samples_per_second": 0.856, "step": 27000 }, { "epoch": 25.19, "learning_rate": 4.6851212686567166e-05, "loss": 0.0008, "step": 27004 }, { "epoch": 25.19, "learning_rate": 4.685074626865672e-05, "loss": 0.0002, "step": 27008 }, { "epoch": 25.2, "learning_rate": 4.685027985074627e-05, "loss": 0.0004, "step": 27012 }, { "epoch": 25.2, "learning_rate": 4.6849813432835824e-05, "loss": 0.0028, "step": 27016 }, { "epoch": 25.21, "learning_rate": 4.684934701492538e-05, "loss": 0.0006, "step": 27020 }, { "epoch": 25.21, "learning_rate": 4.684888059701493e-05, "loss": 0.0005, "step": 27024 }, { "epoch": 25.21, "learning_rate": 4.6848414179104475e-05, "loss": 0.0001, "step": 27028 }, { "epoch": 25.22, "learning_rate": 4.6847947761194037e-05, "loss": 0.0013, "step": 27032 }, { "epoch": 25.22, "learning_rate": 4.6847481343283585e-05, "loss": 0.0019, "step": 27036 }, { "epoch": 25.22, "learning_rate": 4.684701492537313e-05, "loss": 0.0004, "step": 27040 }, { "epoch": 25.23, "learning_rate": 4.684654850746269e-05, "loss": 0.0012, "step": 27044 }, { "epoch": 25.23, "learning_rate": 4.684608208955224e-05, "loss": 0.0008, "step": 27048 }, { "epoch": 25.24, "learning_rate": 4.68456156716418e-05, "loss": 0.0028, "step": 27052 }, { "epoch": 25.24, "learning_rate": 4.6845149253731346e-05, "loss": 0.0005, "step": 27056 }, { "epoch": 25.24, "learning_rate": 4.6844682835820894e-05, "loss": 0.0012, "step": 27060 }, { "epoch": 25.25, "learning_rate": 4.6844216417910455e-05, "loss": 0.0002, "step": 27064 }, { "epoch": 25.25, "learning_rate": 4.6843750000000004e-05, "loss": 0.0007, "step": 27068 }, { "epoch": 25.25, "learning_rate": 4.684328358208955e-05, "loss": 0.0023, "step": 27072 }, { "epoch": 25.26, "learning_rate": 4.684281716417911e-05, "loss": 0.0008, "step": 27076 }, { "epoch": 25.26, "learning_rate": 4.684235074626866e-05, "loss": 0.0001, "step": 27080 }, { "epoch": 25.26, "learning_rate": 4.684188432835821e-05, "loss": 0.0008, "step": 27084 }, { "epoch": 25.27, "learning_rate": 4.6841417910447765e-05, "loss": 0.004, "step": 27088 }, { "epoch": 25.27, "learning_rate": 4.684095149253731e-05, "loss": 0.0075, "step": 27092 }, { "epoch": 25.28, "learning_rate": 4.684048507462687e-05, "loss": 0.0006, "step": 27096 }, { "epoch": 25.28, "learning_rate": 4.684001865671642e-05, "loss": 0.0002, "step": 27100 }, { "epoch": 25.28, "learning_rate": 4.683955223880597e-05, "loss": 0.0002, "step": 27104 }, { "epoch": 25.29, "learning_rate": 4.6839085820895526e-05, "loss": 0.0052, "step": 27108 }, { "epoch": 25.29, "learning_rate": 4.683861940298508e-05, "loss": 0.0016, "step": 27112 }, { "epoch": 25.29, "learning_rate": 4.683815298507463e-05, "loss": 0.0021, "step": 27116 }, { "epoch": 25.3, "learning_rate": 4.683768656716418e-05, "loss": 0.0004, "step": 27120 }, { "epoch": 25.3, "learning_rate": 4.683722014925374e-05, "loss": 0.0011, "step": 27124 }, { "epoch": 25.31, "learning_rate": 4.6836753731343287e-05, "loss": 0.0, "step": 27128 }, { "epoch": 25.31, "learning_rate": 4.6836287313432835e-05, "loss": 0.0018, "step": 27132 }, { "epoch": 25.31, "learning_rate": 4.683582089552239e-05, "loss": 0.001, "step": 27136 }, { "epoch": 25.32, "learning_rate": 4.6835354477611945e-05, "loss": 0.0001, "step": 27140 }, { "epoch": 25.32, "learning_rate": 4.683488805970149e-05, "loss": 0.0094, "step": 27144 }, { "epoch": 25.32, "learning_rate": 4.683442164179105e-05, "loss": 0.0019, "step": 27148 }, { "epoch": 25.33, "learning_rate": 4.6833955223880596e-05, "loss": 0.0067, "step": 27152 }, { "epoch": 25.33, "learning_rate": 4.683348880597015e-05, "loss": 0.0001, "step": 27156 }, { "epoch": 25.34, "learning_rate": 4.6833022388059705e-05, "loss": 0.0008, "step": 27160 }, { "epoch": 25.34, "learning_rate": 4.6832555970149254e-05, "loss": 0.0013, "step": 27164 }, { "epoch": 25.34, "learning_rate": 4.683208955223881e-05, "loss": 0.0013, "step": 27168 }, { "epoch": 25.35, "learning_rate": 4.6831623134328363e-05, "loss": 0.0001, "step": 27172 }, { "epoch": 25.35, "learning_rate": 4.683115671641791e-05, "loss": 0.0001, "step": 27176 }, { "epoch": 25.35, "learning_rate": 4.683069029850746e-05, "loss": 0.0005, "step": 27180 }, { "epoch": 25.36, "learning_rate": 4.683022388059702e-05, "loss": 0.0006, "step": 27184 }, { "epoch": 25.36, "learning_rate": 4.682975746268657e-05, "loss": 0.0001, "step": 27188 }, { "epoch": 25.37, "learning_rate": 4.682929104477612e-05, "loss": 0.0001, "step": 27192 }, { "epoch": 25.37, "learning_rate": 4.682882462686567e-05, "loss": 0.0012, "step": 27196 }, { "epoch": 25.37, "learning_rate": 4.682835820895523e-05, "loss": 0.005, "step": 27200 }, { "epoch": 25.38, "learning_rate": 4.6827891791044776e-05, "loss": 0.0013, "step": 27204 }, { "epoch": 25.38, "learning_rate": 4.682742537313433e-05, "loss": 0.0078, "step": 27208 }, { "epoch": 25.38, "learning_rate": 4.682695895522388e-05, "loss": 0.0002, "step": 27212 }, { "epoch": 25.39, "learning_rate": 4.682649253731344e-05, "loss": 0.0006, "step": 27216 }, { "epoch": 25.39, "learning_rate": 4.682602611940299e-05, "loss": 0.0002, "step": 27220 }, { "epoch": 25.4, "learning_rate": 4.6825559701492537e-05, "loss": 0.0005, "step": 27224 }, { "epoch": 25.4, "learning_rate": 4.682509328358209e-05, "loss": 0.004, "step": 27228 }, { "epoch": 25.4, "learning_rate": 4.6824626865671646e-05, "loss": 0.0108, "step": 27232 }, { "epoch": 25.41, "learning_rate": 4.6824160447761195e-05, "loss": 0.0004, "step": 27236 }, { "epoch": 25.41, "learning_rate": 4.682369402985075e-05, "loss": 0.0009, "step": 27240 }, { "epoch": 25.41, "learning_rate": 4.6823227611940304e-05, "loss": 0.0001, "step": 27244 }, { "epoch": 25.42, "learning_rate": 4.682276119402985e-05, "loss": 0.0001, "step": 27248 }, { "epoch": 25.42, "learning_rate": 4.682229477611941e-05, "loss": 0.0002, "step": 27252 }, { "epoch": 25.43, "learning_rate": 4.6821828358208956e-05, "loss": 0.0003, "step": 27256 }, { "epoch": 25.43, "learning_rate": 4.682136194029851e-05, "loss": 0.0007, "step": 27260 }, { "epoch": 25.43, "learning_rate": 4.6820895522388065e-05, "loss": 0.0006, "step": 27264 }, { "epoch": 25.44, "learning_rate": 4.6820429104477613e-05, "loss": 0.0002, "step": 27268 }, { "epoch": 25.44, "learning_rate": 4.681996268656716e-05, "loss": 0.0004, "step": 27272 }, { "epoch": 25.44, "learning_rate": 4.681949626865672e-05, "loss": 0.0001, "step": 27276 }, { "epoch": 25.45, "learning_rate": 4.681902985074627e-05, "loss": 0.005, "step": 27280 }, { "epoch": 25.45, "learning_rate": 4.681856343283582e-05, "loss": 0.0094, "step": 27284 }, { "epoch": 25.46, "learning_rate": 4.6818097014925374e-05, "loss": 0.0005, "step": 27288 }, { "epoch": 25.46, "learning_rate": 4.681763059701493e-05, "loss": 0.0038, "step": 27292 }, { "epoch": 25.46, "learning_rate": 4.681716417910448e-05, "loss": 0.0005, "step": 27296 }, { "epoch": 25.47, "learning_rate": 4.681669776119403e-05, "loss": 0.0001, "step": 27300 }, { "epoch": 25.47, "learning_rate": 4.681623134328359e-05, "loss": 0.0018, "step": 27304 }, { "epoch": 25.47, "learning_rate": 4.6815764925373135e-05, "loss": 0.0006, "step": 27308 }, { "epoch": 25.48, "learning_rate": 4.681529850746269e-05, "loss": 0.0001, "step": 27312 }, { "epoch": 25.48, "learning_rate": 4.681483208955224e-05, "loss": 0.0006, "step": 27316 }, { "epoch": 25.49, "learning_rate": 4.681436567164179e-05, "loss": 0.0018, "step": 27320 }, { "epoch": 25.49, "learning_rate": 4.681389925373135e-05, "loss": 0.0086, "step": 27324 }, { "epoch": 25.49, "learning_rate": 4.6813432835820896e-05, "loss": 0.0001, "step": 27328 }, { "epoch": 25.5, "learning_rate": 4.6812966417910445e-05, "loss": 0.0012, "step": 27332 }, { "epoch": 25.5, "learning_rate": 4.6812500000000006e-05, "loss": 0.0001, "step": 27336 }, { "epoch": 25.5, "learning_rate": 4.6812033582089554e-05, "loss": 0.0015, "step": 27340 }, { "epoch": 25.51, "learning_rate": 4.68115671641791e-05, "loss": 0.0023, "step": 27344 }, { "epoch": 25.51, "learning_rate": 4.681110074626866e-05, "loss": 0.0007, "step": 27348 }, { "epoch": 25.51, "learning_rate": 4.681063432835821e-05, "loss": 0.0034, "step": 27352 }, { "epoch": 25.52, "learning_rate": 4.681016791044776e-05, "loss": 0.0001, "step": 27356 }, { "epoch": 25.52, "learning_rate": 4.6809701492537315e-05, "loss": 0.0001, "step": 27360 }, { "epoch": 25.53, "learning_rate": 4.680923507462687e-05, "loss": 0.0019, "step": 27364 }, { "epoch": 25.53, "learning_rate": 4.680876865671642e-05, "loss": 0.0001, "step": 27368 }, { "epoch": 25.53, "learning_rate": 4.680830223880597e-05, "loss": 0.0042, "step": 27372 }, { "epoch": 25.54, "learning_rate": 4.680783582089552e-05, "loss": 0.0001, "step": 27376 }, { "epoch": 25.54, "learning_rate": 4.6807369402985076e-05, "loss": 0.0011, "step": 27380 }, { "epoch": 25.54, "learning_rate": 4.680690298507463e-05, "loss": 0.0004, "step": 27384 }, { "epoch": 25.55, "learning_rate": 4.680643656716418e-05, "loss": 0.0008, "step": 27388 }, { "epoch": 25.55, "learning_rate": 4.6805970149253734e-05, "loss": 0.0003, "step": 27392 }, { "epoch": 25.56, "learning_rate": 4.680550373134329e-05, "loss": 0.0003, "step": 27396 }, { "epoch": 25.56, "learning_rate": 4.680503731343284e-05, "loss": 0.0021, "step": 27400 }, { "epoch": 25.56, "learning_rate": 4.680457089552239e-05, "loss": 0.0001, "step": 27404 }, { "epoch": 25.57, "learning_rate": 4.680410447761194e-05, "loss": 0.0024, "step": 27408 }, { "epoch": 25.57, "learning_rate": 4.6803638059701495e-05, "loss": 0.0041, "step": 27412 }, { "epoch": 25.57, "learning_rate": 4.680317164179105e-05, "loss": 0.001, "step": 27416 }, { "epoch": 25.58, "learning_rate": 4.68027052238806e-05, "loss": 0.0008, "step": 27420 }, { "epoch": 25.58, "learning_rate": 4.680223880597015e-05, "loss": 0.0029, "step": 27424 }, { "epoch": 25.59, "learning_rate": 4.680177238805971e-05, "loss": 0.0012, "step": 27428 }, { "epoch": 25.59, "learning_rate": 4.6801305970149256e-05, "loss": 0.0008, "step": 27432 }, { "epoch": 25.59, "learning_rate": 4.6800839552238804e-05, "loss": 0.0002, "step": 27436 }, { "epoch": 25.6, "learning_rate": 4.680037313432836e-05, "loss": 0.0016, "step": 27440 }, { "epoch": 25.6, "learning_rate": 4.6799906716417914e-05, "loss": 0.0009, "step": 27444 }, { "epoch": 25.6, "learning_rate": 4.679944029850746e-05, "loss": 0.0008, "step": 27448 }, { "epoch": 25.61, "learning_rate": 4.679897388059702e-05, "loss": 0.0012, "step": 27452 }, { "epoch": 25.61, "learning_rate": 4.679850746268657e-05, "loss": 0.0012, "step": 27456 }, { "epoch": 25.62, "learning_rate": 4.679804104477612e-05, "loss": 0.0018, "step": 27460 }, { "epoch": 25.62, "learning_rate": 4.6797574626865675e-05, "loss": 0.0002, "step": 27464 }, { "epoch": 25.62, "learning_rate": 4.679710820895522e-05, "loss": 0.0001, "step": 27468 }, { "epoch": 25.63, "learning_rate": 4.679664179104478e-05, "loss": 0.0017, "step": 27472 }, { "epoch": 25.63, "learning_rate": 4.679617537313433e-05, "loss": 0.0011, "step": 27476 }, { "epoch": 25.63, "learning_rate": 4.679570895522388e-05, "loss": 0.0003, "step": 27480 }, { "epoch": 25.64, "learning_rate": 4.6795242537313436e-05, "loss": 0.01, "step": 27484 }, { "epoch": 25.64, "learning_rate": 4.679477611940299e-05, "loss": 0.0016, "step": 27488 }, { "epoch": 25.65, "learning_rate": 4.679430970149254e-05, "loss": 0.0048, "step": 27492 }, { "epoch": 25.65, "learning_rate": 4.679384328358209e-05, "loss": 0.0038, "step": 27496 }, { "epoch": 25.65, "learning_rate": 4.679337686567164e-05, "loss": 0.0004, "step": 27500 }, { "epoch": 25.65, "eval_exact_match": 0.7427466150870407, "eval_exec": 0.7833655705996132, "eval_loss": 0.3488633632659912, "eval_runtime": 1170.5529, "eval_samples_per_second": 0.883, "step": 27500 }, { "epoch": 25.66, "learning_rate": 4.67929104477612e-05, "loss": 0.0005, "step": 27504 }, { "epoch": 25.66, "learning_rate": 4.6792444029850745e-05, "loss": 0.0003, "step": 27508 }, { "epoch": 25.66, "learning_rate": 4.67919776119403e-05, "loss": 0.0001, "step": 27512 }, { "epoch": 25.67, "learning_rate": 4.6791511194029855e-05, "loss": 0.0045, "step": 27516 }, { "epoch": 25.67, "learning_rate": 4.67910447761194e-05, "loss": 0.0001, "step": 27520 }, { "epoch": 25.68, "learning_rate": 4.679057835820896e-05, "loss": 0.0014, "step": 27524 }, { "epoch": 25.68, "learning_rate": 4.6790111940298506e-05, "loss": 0.0004, "step": 27528 }, { "epoch": 25.68, "learning_rate": 4.678964552238806e-05, "loss": 0.0047, "step": 27532 }, { "epoch": 25.69, "learning_rate": 4.6789179104477616e-05, "loss": 0.0002, "step": 27536 }, { "epoch": 25.69, "learning_rate": 4.6788712686567164e-05, "loss": 0.0001, "step": 27540 }, { "epoch": 25.69, "learning_rate": 4.678824626865672e-05, "loss": 0.0021, "step": 27544 }, { "epoch": 25.7, "learning_rate": 4.6787779850746274e-05, "loss": 0.0018, "step": 27548 }, { "epoch": 25.7, "learning_rate": 4.678731343283582e-05, "loss": 0.0036, "step": 27552 }, { "epoch": 25.71, "learning_rate": 4.678684701492538e-05, "loss": 0.0001, "step": 27556 }, { "epoch": 25.71, "learning_rate": 4.6786380597014925e-05, "loss": 0.0041, "step": 27560 }, { "epoch": 25.71, "learning_rate": 4.678591417910448e-05, "loss": 0.0001, "step": 27564 }, { "epoch": 25.72, "learning_rate": 4.6785447761194035e-05, "loss": 0.0009, "step": 27568 }, { "epoch": 25.72, "learning_rate": 4.678498134328358e-05, "loss": 0.0035, "step": 27572 }, { "epoch": 25.72, "learning_rate": 4.678451492537314e-05, "loss": 0.002, "step": 27576 }, { "epoch": 25.73, "learning_rate": 4.678404850746269e-05, "loss": 0.0001, "step": 27580 }, { "epoch": 25.73, "learning_rate": 4.678358208955224e-05, "loss": 0.0027, "step": 27584 }, { "epoch": 25.73, "learning_rate": 4.678311567164179e-05, "loss": 0.0073, "step": 27588 }, { "epoch": 25.74, "learning_rate": 4.678264925373135e-05, "loss": 0.0004, "step": 27592 }, { "epoch": 25.74, "learning_rate": 4.67821828358209e-05, "loss": 0.0008, "step": 27596 }, { "epoch": 25.75, "learning_rate": 4.678171641791045e-05, "loss": 0.0004, "step": 27600 }, { "epoch": 25.75, "learning_rate": 4.678125e-05, "loss": 0.0095, "step": 27604 }, { "epoch": 25.75, "learning_rate": 4.678078358208956e-05, "loss": 0.0004, "step": 27608 }, { "epoch": 25.76, "learning_rate": 4.6780317164179105e-05, "loss": 0.0162, "step": 27612 }, { "epoch": 25.76, "learning_rate": 4.677985074626866e-05, "loss": 0.0008, "step": 27616 }, { "epoch": 25.76, "learning_rate": 4.677938432835821e-05, "loss": 0.0013, "step": 27620 }, { "epoch": 25.77, "learning_rate": 4.677891791044776e-05, "loss": 0.0006, "step": 27624 }, { "epoch": 25.77, "learning_rate": 4.677845149253732e-05, "loss": 0.0004, "step": 27628 }, { "epoch": 25.78, "learning_rate": 4.6777985074626866e-05, "loss": 0.0016, "step": 27632 }, { "epoch": 25.78, "learning_rate": 4.677751865671642e-05, "loss": 0.0001, "step": 27636 }, { "epoch": 25.78, "learning_rate": 4.6777052238805976e-05, "loss": 0.0001, "step": 27640 }, { "epoch": 25.79, "learning_rate": 4.6776585820895524e-05, "loss": 0.0002, "step": 27644 }, { "epoch": 25.79, "learning_rate": 4.677611940298507e-05, "loss": 0.0003, "step": 27648 }, { "epoch": 25.79, "learning_rate": 4.6775652985074634e-05, "loss": 0.0002, "step": 27652 }, { "epoch": 25.8, "learning_rate": 4.677518656716418e-05, "loss": 0.0, "step": 27656 }, { "epoch": 25.8, "learning_rate": 4.677472014925373e-05, "loss": 0.0011, "step": 27660 }, { "epoch": 25.81, "learning_rate": 4.6774253731343285e-05, "loss": 0.0002, "step": 27664 }, { "epoch": 25.81, "learning_rate": 4.677378731343284e-05, "loss": 0.0009, "step": 27668 }, { "epoch": 25.81, "learning_rate": 4.677332089552239e-05, "loss": 0.0003, "step": 27672 }, { "epoch": 25.82, "learning_rate": 4.677285447761194e-05, "loss": 0.0011, "step": 27676 }, { "epoch": 25.82, "learning_rate": 4.677238805970149e-05, "loss": 0.0007, "step": 27680 }, { "epoch": 25.82, "learning_rate": 4.6771921641791046e-05, "loss": 0.0129, "step": 27684 }, { "epoch": 25.83, "learning_rate": 4.67714552238806e-05, "loss": 0.0053, "step": 27688 }, { "epoch": 25.83, "learning_rate": 4.677098880597015e-05, "loss": 0.0096, "step": 27692 }, { "epoch": 25.84, "learning_rate": 4.6770522388059704e-05, "loss": 0.0001, "step": 27696 }, { "epoch": 25.84, "learning_rate": 4.677005597014926e-05, "loss": 0.0019, "step": 27700 }, { "epoch": 25.84, "learning_rate": 4.676958955223881e-05, "loss": 0.0001, "step": 27704 }, { "epoch": 25.85, "learning_rate": 4.676912313432836e-05, "loss": 0.0017, "step": 27708 }, { "epoch": 25.85, "learning_rate": 4.676865671641792e-05, "loss": 0.0008, "step": 27712 }, { "epoch": 25.85, "learning_rate": 4.6768190298507465e-05, "loss": 0.0004, "step": 27716 }, { "epoch": 25.86, "learning_rate": 4.676772388059702e-05, "loss": 0.0052, "step": 27720 }, { "epoch": 25.86, "learning_rate": 4.676725746268657e-05, "loss": 0.0012, "step": 27724 }, { "epoch": 25.87, "learning_rate": 4.676679104477612e-05, "loss": 0.001, "step": 27728 }, { "epoch": 25.87, "learning_rate": 4.676632462686568e-05, "loss": 0.0001, "step": 27732 }, { "epoch": 25.87, "learning_rate": 4.6765858208955226e-05, "loss": 0.0006, "step": 27736 }, { "epoch": 25.88, "learning_rate": 4.6765391791044774e-05, "loss": 0.0004, "step": 27740 }, { "epoch": 25.88, "learning_rate": 4.6764925373134336e-05, "loss": 0.0002, "step": 27744 }, { "epoch": 25.88, "learning_rate": 4.6764458955223884e-05, "loss": 0.0022, "step": 27748 }, { "epoch": 25.89, "learning_rate": 4.676399253731343e-05, "loss": 0.001, "step": 27752 }, { "epoch": 25.89, "learning_rate": 4.676352611940299e-05, "loss": 0.0005, "step": 27756 }, { "epoch": 25.9, "learning_rate": 4.676305970149254e-05, "loss": 0.0013, "step": 27760 }, { "epoch": 25.9, "learning_rate": 4.676259328358209e-05, "loss": 0.0015, "step": 27764 }, { "epoch": 25.9, "learning_rate": 4.6762126865671645e-05, "loss": 0.0012, "step": 27768 }, { "epoch": 25.91, "learning_rate": 4.676166044776119e-05, "loss": 0.0005, "step": 27772 }, { "epoch": 25.91, "learning_rate": 4.676119402985075e-05, "loss": 0.0005, "step": 27776 }, { "epoch": 25.91, "learning_rate": 4.67607276119403e-05, "loss": 0.0037, "step": 27780 }, { "epoch": 25.92, "learning_rate": 4.676026119402985e-05, "loss": 0.0003, "step": 27784 }, { "epoch": 25.92, "learning_rate": 4.6759794776119406e-05, "loss": 0.0002, "step": 27788 }, { "epoch": 25.93, "learning_rate": 4.675932835820896e-05, "loss": 0.0004, "step": 27792 }, { "epoch": 25.93, "learning_rate": 4.675886194029851e-05, "loss": 0.0013, "step": 27796 }, { "epoch": 25.93, "learning_rate": 4.675839552238806e-05, "loss": 0.0001, "step": 27800 }, { "epoch": 25.94, "learning_rate": 4.675792910447762e-05, "loss": 0.0, "step": 27804 }, { "epoch": 25.94, "learning_rate": 4.675746268656717e-05, "loss": 0.0001, "step": 27808 }, { "epoch": 25.94, "learning_rate": 4.6756996268656715e-05, "loss": 0.0005, "step": 27812 }, { "epoch": 25.95, "learning_rate": 4.675652985074627e-05, "loss": 0.0002, "step": 27816 }, { "epoch": 25.95, "learning_rate": 4.6756063432835825e-05, "loss": 0.0018, "step": 27820 }, { "epoch": 25.96, "learning_rate": 4.675559701492537e-05, "loss": 0.0099, "step": 27824 }, { "epoch": 25.96, "learning_rate": 4.675513059701493e-05, "loss": 0.004, "step": 27828 }, { "epoch": 25.96, "learning_rate": 4.6754664179104476e-05, "loss": 0.0025, "step": 27832 }, { "epoch": 25.97, "learning_rate": 4.675419776119403e-05, "loss": 0.0001, "step": 27836 }, { "epoch": 25.97, "learning_rate": 4.6753731343283586e-05, "loss": 0.0002, "step": 27840 }, { "epoch": 25.97, "learning_rate": 4.6753264925373134e-05, "loss": 0.0, "step": 27844 }, { "epoch": 25.98, "learning_rate": 4.675279850746269e-05, "loss": 0.0001, "step": 27848 }, { "epoch": 25.98, "learning_rate": 4.6752332089552244e-05, "loss": 0.0013, "step": 27852 }, { "epoch": 25.98, "learning_rate": 4.675186567164179e-05, "loss": 0.0002, "step": 27856 }, { "epoch": 25.99, "learning_rate": 4.675139925373134e-05, "loss": 0.0009, "step": 27860 }, { "epoch": 25.99, "learning_rate": 4.67509328358209e-05, "loss": 0.001, "step": 27864 }, { "epoch": 26.0, "learning_rate": 4.675046641791045e-05, "loss": 0.0001, "step": 27868 }, { "epoch": 26.0, "learning_rate": 4.6750000000000005e-05, "loss": 0.005, "step": 27872 }, { "epoch": 26.0, "learning_rate": 4.674953358208955e-05, "loss": 0.001, "step": 27876 }, { "epoch": 26.01, "learning_rate": 4.674906716417911e-05, "loss": 0.0177, "step": 27880 }, { "epoch": 26.01, "learning_rate": 4.674860074626866e-05, "loss": 0.0028, "step": 27884 }, { "epoch": 26.01, "learning_rate": 4.674813432835821e-05, "loss": 0.001, "step": 27888 }, { "epoch": 26.02, "learning_rate": 4.674766791044776e-05, "loss": 0.0001, "step": 27892 }, { "epoch": 26.02, "learning_rate": 4.674720149253732e-05, "loss": 0.001, "step": 27896 }, { "epoch": 26.03, "learning_rate": 4.674673507462687e-05, "loss": 0.0004, "step": 27900 }, { "epoch": 26.03, "learning_rate": 4.674626865671642e-05, "loss": 0.0003, "step": 27904 }, { "epoch": 26.03, "learning_rate": 4.674580223880597e-05, "loss": 0.0005, "step": 27908 }, { "epoch": 26.04, "learning_rate": 4.674533582089553e-05, "loss": 0.0001, "step": 27912 }, { "epoch": 26.04, "learning_rate": 4.6744869402985075e-05, "loss": 0.0012, "step": 27916 }, { "epoch": 26.04, "learning_rate": 4.674440298507463e-05, "loss": 0.0008, "step": 27920 }, { "epoch": 26.05, "learning_rate": 4.6743936567164185e-05, "loss": 0.0008, "step": 27924 }, { "epoch": 26.05, "learning_rate": 4.674347014925373e-05, "loss": 0.0003, "step": 27928 }, { "epoch": 26.06, "learning_rate": 4.674300373134329e-05, "loss": 0.0002, "step": 27932 }, { "epoch": 26.06, "learning_rate": 4.6742537313432836e-05, "loss": 0.0047, "step": 27936 }, { "epoch": 26.06, "learning_rate": 4.674207089552239e-05, "loss": 0.0001, "step": 27940 }, { "epoch": 26.07, "learning_rate": 4.6741604477611946e-05, "loss": 0.0004, "step": 27944 }, { "epoch": 26.07, "learning_rate": 4.6741138059701494e-05, "loss": 0.0001, "step": 27948 }, { "epoch": 26.07, "learning_rate": 4.674067164179104e-05, "loss": 0.0002, "step": 27952 }, { "epoch": 26.08, "learning_rate": 4.6740205223880604e-05, "loss": 0.0034, "step": 27956 }, { "epoch": 26.08, "learning_rate": 4.673973880597015e-05, "loss": 0.0018, "step": 27960 }, { "epoch": 26.09, "learning_rate": 4.67392723880597e-05, "loss": 0.0017, "step": 27964 }, { "epoch": 26.09, "learning_rate": 4.6738805970149255e-05, "loss": 0.0006, "step": 27968 }, { "epoch": 26.09, "learning_rate": 4.673833955223881e-05, "loss": 0.0081, "step": 27972 }, { "epoch": 26.1, "learning_rate": 4.673787313432836e-05, "loss": 0.0001, "step": 27976 }, { "epoch": 26.1, "learning_rate": 4.673740671641791e-05, "loss": 0.0002, "step": 27980 }, { "epoch": 26.1, "learning_rate": 4.673694029850747e-05, "loss": 0.0002, "step": 27984 }, { "epoch": 26.11, "learning_rate": 4.6736473880597016e-05, "loss": 0.0027, "step": 27988 }, { "epoch": 26.11, "learning_rate": 4.673600746268657e-05, "loss": 0.0018, "step": 27992 }, { "epoch": 26.12, "learning_rate": 4.673554104477612e-05, "loss": 0.0001, "step": 27996 }, { "epoch": 26.12, "learning_rate": 4.6735074626865674e-05, "loss": 0.0, "step": 28000 }, { "epoch": 26.12, "eval_exact_match": 0.7437137330754352, "eval_exec": 0.7785299806576402, "eval_loss": 0.38033804297447205, "eval_runtime": 1184.2263, "eval_samples_per_second": 0.873, "step": 28000 }, { "epoch": 26.12, "learning_rate": 4.673460820895523e-05, "loss": 0.0016, "step": 28004 }, { "epoch": 26.13, "learning_rate": 4.673414179104478e-05, "loss": 0.0084, "step": 28008 }, { "epoch": 26.13, "learning_rate": 4.6733675373134325e-05, "loss": 0.0003, "step": 28012 }, { "epoch": 26.13, "learning_rate": 4.6733208955223887e-05, "loss": 0.0001, "step": 28016 }, { "epoch": 26.14, "learning_rate": 4.6732742537313435e-05, "loss": 0.0006, "step": 28020 }, { "epoch": 26.14, "learning_rate": 4.673227611940298e-05, "loss": 0.0018, "step": 28024 }, { "epoch": 26.15, "learning_rate": 4.673180970149254e-05, "loss": 0.0001, "step": 28028 }, { "epoch": 26.15, "learning_rate": 4.673134328358209e-05, "loss": 0.0002, "step": 28032 }, { "epoch": 26.15, "learning_rate": 4.673087686567165e-05, "loss": 0.0003, "step": 28036 }, { "epoch": 26.16, "learning_rate": 4.6730410447761196e-05, "loss": 0.0005, "step": 28040 }, { "epoch": 26.16, "learning_rate": 4.672994402985075e-05, "loss": 0.0001, "step": 28044 }, { "epoch": 26.16, "learning_rate": 4.6729477611940305e-05, "loss": 0.0034, "step": 28048 }, { "epoch": 26.17, "learning_rate": 4.6729011194029854e-05, "loss": 0.0013, "step": 28052 }, { "epoch": 26.17, "learning_rate": 4.67285447761194e-05, "loss": 0.0001, "step": 28056 }, { "epoch": 26.18, "learning_rate": 4.672807835820896e-05, "loss": 0.0014, "step": 28060 }, { "epoch": 26.18, "learning_rate": 4.672761194029851e-05, "loss": 0.0001, "step": 28064 }, { "epoch": 26.18, "learning_rate": 4.672714552238806e-05, "loss": 0.0106, "step": 28068 }, { "epoch": 26.19, "learning_rate": 4.6726679104477615e-05, "loss": 0.0022, "step": 28072 }, { "epoch": 26.19, "learning_rate": 4.672621268656717e-05, "loss": 0.0007, "step": 28076 }, { "epoch": 26.19, "learning_rate": 4.672574626865672e-05, "loss": 0.0005, "step": 28080 }, { "epoch": 26.2, "learning_rate": 4.672527985074627e-05, "loss": 0.0035, "step": 28084 }, { "epoch": 26.2, "learning_rate": 4.672481343283582e-05, "loss": 0.0045, "step": 28088 }, { "epoch": 26.21, "learning_rate": 4.6724347014925376e-05, "loss": 0.0002, "step": 28092 }, { "epoch": 26.21, "learning_rate": 4.672388059701493e-05, "loss": 0.0033, "step": 28096 }, { "epoch": 26.21, "learning_rate": 4.672341417910448e-05, "loss": 0.001, "step": 28100 }, { "epoch": 26.22, "learning_rate": 4.6722947761194034e-05, "loss": 0.0002, "step": 28104 }, { "epoch": 26.22, "learning_rate": 4.672248134328359e-05, "loss": 0.0014, "step": 28108 }, { "epoch": 26.22, "learning_rate": 4.6722014925373137e-05, "loss": 0.0095, "step": 28112 }, { "epoch": 26.23, "learning_rate": 4.6721548507462685e-05, "loss": 0.0016, "step": 28116 }, { "epoch": 26.23, "learning_rate": 4.672108208955224e-05, "loss": 0.0088, "step": 28120 }, { "epoch": 26.24, "learning_rate": 4.6720615671641794e-05, "loss": 0.0013, "step": 28124 }, { "epoch": 26.24, "learning_rate": 4.672014925373134e-05, "loss": 0.0004, "step": 28128 }, { "epoch": 26.24, "learning_rate": 4.67196828358209e-05, "loss": 0.0004, "step": 28132 }, { "epoch": 26.25, "learning_rate": 4.671921641791045e-05, "loss": 0.0002, "step": 28136 }, { "epoch": 26.25, "learning_rate": 4.671875e-05, "loss": 0.0006, "step": 28140 }, { "epoch": 26.25, "learning_rate": 4.6718283582089555e-05, "loss": 0.0003, "step": 28144 }, { "epoch": 26.26, "learning_rate": 4.6717817164179104e-05, "loss": 0.0004, "step": 28148 }, { "epoch": 26.26, "learning_rate": 4.671735074626866e-05, "loss": 0.0003, "step": 28152 }, { "epoch": 26.26, "learning_rate": 4.6716884328358213e-05, "loss": 0.0008, "step": 28156 }, { "epoch": 26.27, "learning_rate": 4.671641791044776e-05, "loss": 0.0, "step": 28160 }, { "epoch": 26.27, "learning_rate": 4.6715951492537316e-05, "loss": 0.0065, "step": 28164 }, { "epoch": 26.28, "learning_rate": 4.671548507462687e-05, "loss": 0.0005, "step": 28168 }, { "epoch": 26.28, "learning_rate": 4.671501865671642e-05, "loss": 0.0013, "step": 28172 }, { "epoch": 26.28, "learning_rate": 4.671455223880597e-05, "loss": 0.001, "step": 28176 }, { "epoch": 26.29, "learning_rate": 4.671408582089552e-05, "loss": 0.0159, "step": 28180 }, { "epoch": 26.29, "learning_rate": 4.671361940298508e-05, "loss": 0.0004, "step": 28184 }, { "epoch": 26.29, "learning_rate": 4.6713152985074626e-05, "loss": 0.0047, "step": 28188 }, { "epoch": 26.3, "learning_rate": 4.671268656716418e-05, "loss": 0.0001, "step": 28192 }, { "epoch": 26.3, "learning_rate": 4.6712220149253735e-05, "loss": 0.0007, "step": 28196 }, { "epoch": 26.31, "learning_rate": 4.671175373134329e-05, "loss": 0.0003, "step": 28200 }, { "epoch": 26.31, "learning_rate": 4.671128731343284e-05, "loss": 0.0004, "step": 28204 }, { "epoch": 26.31, "learning_rate": 4.6710820895522387e-05, "loss": 0.0002, "step": 28208 }, { "epoch": 26.32, "learning_rate": 4.671035447761195e-05, "loss": 0.0001, "step": 28212 }, { "epoch": 26.32, "learning_rate": 4.6709888059701496e-05, "loss": 0.0028, "step": 28216 }, { "epoch": 26.32, "learning_rate": 4.6709421641791045e-05, "loss": 0.0002, "step": 28220 }, { "epoch": 26.33, "learning_rate": 4.67089552238806e-05, "loss": 0.0002, "step": 28224 }, { "epoch": 26.33, "learning_rate": 4.6708488805970154e-05, "loss": 0.001, "step": 28228 }, { "epoch": 26.34, "learning_rate": 4.67080223880597e-05, "loss": 0.0003, "step": 28232 }, { "epoch": 26.34, "learning_rate": 4.670755597014926e-05, "loss": 0.0005, "step": 28236 }, { "epoch": 26.34, "learning_rate": 4.6707089552238805e-05, "loss": 0.0007, "step": 28240 }, { "epoch": 26.35, "learning_rate": 4.670662313432836e-05, "loss": 0.0002, "step": 28244 }, { "epoch": 26.35, "learning_rate": 4.6706156716417915e-05, "loss": 0.0001, "step": 28248 }, { "epoch": 26.35, "learning_rate": 4.6705690298507463e-05, "loss": 0.0002, "step": 28252 }, { "epoch": 26.36, "learning_rate": 4.670522388059702e-05, "loss": 0.0017, "step": 28256 }, { "epoch": 26.36, "learning_rate": 4.670475746268657e-05, "loss": 0.0005, "step": 28260 }, { "epoch": 26.37, "learning_rate": 4.670429104477612e-05, "loss": 0.0003, "step": 28264 }, { "epoch": 26.37, "learning_rate": 4.670382462686567e-05, "loss": 0.0, "step": 28268 }, { "epoch": 26.37, "learning_rate": 4.670335820895523e-05, "loss": 0.0008, "step": 28272 }, { "epoch": 26.38, "learning_rate": 4.670289179104478e-05, "loss": 0.0004, "step": 28276 }, { "epoch": 26.38, "learning_rate": 4.670242537313433e-05, "loss": 0.0003, "step": 28280 }, { "epoch": 26.38, "learning_rate": 4.670195895522388e-05, "loss": 0.0003, "step": 28284 }, { "epoch": 26.39, "learning_rate": 4.670149253731344e-05, "loss": 0.0025, "step": 28288 }, { "epoch": 26.39, "learning_rate": 4.6701026119402985e-05, "loss": 0.0014, "step": 28292 }, { "epoch": 26.4, "learning_rate": 4.670055970149254e-05, "loss": 0.0002, "step": 28296 }, { "epoch": 26.4, "learning_rate": 4.670009328358209e-05, "loss": 0.0002, "step": 28300 }, { "epoch": 26.4, "learning_rate": 4.669962686567164e-05, "loss": 0.0004, "step": 28304 }, { "epoch": 26.41, "learning_rate": 4.66991604477612e-05, "loss": 0.0002, "step": 28308 }, { "epoch": 26.41, "learning_rate": 4.6698694029850746e-05, "loss": 0.0004, "step": 28312 }, { "epoch": 26.41, "learning_rate": 4.66982276119403e-05, "loss": 0.0001, "step": 28316 }, { "epoch": 26.42, "learning_rate": 4.6697761194029856e-05, "loss": 0.0004, "step": 28320 }, { "epoch": 26.42, "learning_rate": 4.6697294776119404e-05, "loss": 0.0008, "step": 28324 }, { "epoch": 26.43, "learning_rate": 4.669682835820895e-05, "loss": 0.0024, "step": 28328 }, { "epoch": 26.43, "learning_rate": 4.6696361940298514e-05, "loss": 0.0005, "step": 28332 }, { "epoch": 26.43, "learning_rate": 4.669589552238806e-05, "loss": 0.006, "step": 28336 }, { "epoch": 26.44, "learning_rate": 4.669542910447761e-05, "loss": 0.0046, "step": 28340 }, { "epoch": 26.44, "learning_rate": 4.6694962686567165e-05, "loss": 0.0001, "step": 28344 }, { "epoch": 26.44, "learning_rate": 4.669449626865672e-05, "loss": 0.0009, "step": 28348 }, { "epoch": 26.45, "learning_rate": 4.669402985074627e-05, "loss": 0.0006, "step": 28352 }, { "epoch": 26.45, "learning_rate": 4.669356343283582e-05, "loss": 0.0001, "step": 28356 }, { "epoch": 26.46, "learning_rate": 4.669309701492537e-05, "loss": 0.0001, "step": 28360 }, { "epoch": 26.46, "learning_rate": 4.669263059701493e-05, "loss": 0.0004, "step": 28364 }, { "epoch": 26.46, "learning_rate": 4.669216417910448e-05, "loss": 0.0003, "step": 28368 }, { "epoch": 26.47, "learning_rate": 4.669169776119403e-05, "loss": 0.0002, "step": 28372 }, { "epoch": 26.47, "learning_rate": 4.6691231343283584e-05, "loss": 0.0001, "step": 28376 }, { "epoch": 26.47, "learning_rate": 4.669076492537314e-05, "loss": 0.0043, "step": 28380 }, { "epoch": 26.48, "learning_rate": 4.669029850746269e-05, "loss": 0.0052, "step": 28384 }, { "epoch": 26.48, "learning_rate": 4.668983208955224e-05, "loss": 0.0012, "step": 28388 }, { "epoch": 26.49, "learning_rate": 4.66893656716418e-05, "loss": 0.0005, "step": 28392 }, { "epoch": 26.49, "learning_rate": 4.6688899253731345e-05, "loss": 0.001, "step": 28396 }, { "epoch": 26.49, "learning_rate": 4.66884328358209e-05, "loss": 0.0001, "step": 28400 }, { "epoch": 26.5, "learning_rate": 4.668796641791045e-05, "loss": 0.0012, "step": 28404 }, { "epoch": 26.5, "learning_rate": 4.66875e-05, "loss": 0.0002, "step": 28408 }, { "epoch": 26.5, "learning_rate": 4.668703358208956e-05, "loss": 0.0002, "step": 28412 }, { "epoch": 26.51, "learning_rate": 4.6686567164179106e-05, "loss": 0.0001, "step": 28416 }, { "epoch": 26.51, "learning_rate": 4.6686100746268654e-05, "loss": 0.0001, "step": 28420 }, { "epoch": 26.51, "learning_rate": 4.6685634328358216e-05, "loss": 0.0009, "step": 28424 }, { "epoch": 26.52, "learning_rate": 4.6685167910447764e-05, "loss": 0.0003, "step": 28428 }, { "epoch": 26.52, "learning_rate": 4.668470149253731e-05, "loss": 0.0001, "step": 28432 }, { "epoch": 26.53, "learning_rate": 4.668423507462687e-05, "loss": 0.0021, "step": 28436 }, { "epoch": 26.53, "learning_rate": 4.668376865671642e-05, "loss": 0.0024, "step": 28440 }, { "epoch": 26.53, "learning_rate": 4.668330223880597e-05, "loss": 0.001, "step": 28444 }, { "epoch": 26.54, "learning_rate": 4.6682835820895525e-05, "loss": 0.0006, "step": 28448 }, { "epoch": 26.54, "learning_rate": 4.668236940298507e-05, "loss": 0.0001, "step": 28452 }, { "epoch": 26.54, "learning_rate": 4.668190298507463e-05, "loss": 0.0005, "step": 28456 }, { "epoch": 26.55, "learning_rate": 4.668143656716418e-05, "loss": 0.0041, "step": 28460 }, { "epoch": 26.55, "learning_rate": 4.668097014925373e-05, "loss": 0.0049, "step": 28464 }, { "epoch": 26.56, "learning_rate": 4.6680503731343286e-05, "loss": 0.0023, "step": 28468 }, { "epoch": 26.56, "learning_rate": 4.668003731343284e-05, "loss": 0.0006, "step": 28472 }, { "epoch": 26.56, "learning_rate": 4.667957089552239e-05, "loss": 0.0001, "step": 28476 }, { "epoch": 26.57, "learning_rate": 4.667910447761194e-05, "loss": 0.0009, "step": 28480 }, { "epoch": 26.57, "learning_rate": 4.66786380597015e-05, "loss": 0.0008, "step": 28484 }, { "epoch": 26.57, "learning_rate": 4.667817164179105e-05, "loss": 0.0014, "step": 28488 }, { "epoch": 26.58, "learning_rate": 4.6677705223880595e-05, "loss": 0.0002, "step": 28492 }, { "epoch": 26.58, "learning_rate": 4.667723880597015e-05, "loss": 0.001, "step": 28496 }, { "epoch": 26.59, "learning_rate": 4.6676772388059705e-05, "loss": 0.0014, "step": 28500 }, { "epoch": 26.59, "eval_exact_match": 0.7321083172147002, "eval_exec": 0.7688588007736944, "eval_loss": 0.3977905809879303, "eval_runtime": 1155.6229, "eval_samples_per_second": 0.895, "step": 28500 }, { "epoch": 26.59, "learning_rate": 4.667630597014925e-05, "loss": 0.0002, "step": 28504 }, { "epoch": 26.59, "learning_rate": 4.667583955223881e-05, "loss": 0.0001, "step": 28508 }, { "epoch": 26.6, "learning_rate": 4.6675373134328356e-05, "loss": 0.0006, "step": 28512 }, { "epoch": 26.6, "learning_rate": 4.667490671641792e-05, "loss": 0.0018, "step": 28516 }, { "epoch": 26.6, "learning_rate": 4.6674440298507466e-05, "loss": 0.0003, "step": 28520 }, { "epoch": 26.61, "learning_rate": 4.6673973880597014e-05, "loss": 0.0005, "step": 28524 }, { "epoch": 26.61, "learning_rate": 4.667350746268657e-05, "loss": 0.0001, "step": 28528 }, { "epoch": 26.62, "learning_rate": 4.6673041044776124e-05, "loss": 0.0022, "step": 28532 }, { "epoch": 26.62, "learning_rate": 4.667257462686567e-05, "loss": 0.0006, "step": 28536 }, { "epoch": 26.62, "learning_rate": 4.667210820895523e-05, "loss": 0.0002, "step": 28540 }, { "epoch": 26.63, "learning_rate": 4.667164179104478e-05, "loss": 0.0001, "step": 28544 }, { "epoch": 26.63, "learning_rate": 4.667117537313433e-05, "loss": 0.0001, "step": 28548 }, { "epoch": 26.63, "learning_rate": 4.6670708955223885e-05, "loss": 0.0036, "step": 28552 }, { "epoch": 26.64, "learning_rate": 4.667024253731343e-05, "loss": 0.0002, "step": 28556 }, { "epoch": 26.64, "learning_rate": 4.666977611940299e-05, "loss": 0.0001, "step": 28560 }, { "epoch": 26.65, "learning_rate": 4.666930970149254e-05, "loss": 0.0002, "step": 28564 }, { "epoch": 26.65, "learning_rate": 4.666884328358209e-05, "loss": 0.001, "step": 28568 }, { "epoch": 26.65, "learning_rate": 4.666837686567164e-05, "loss": 0.0032, "step": 28572 }, { "epoch": 26.66, "learning_rate": 4.66679104477612e-05, "loss": 0.0002, "step": 28576 }, { "epoch": 26.66, "learning_rate": 4.666744402985075e-05, "loss": 0.0015, "step": 28580 }, { "epoch": 26.66, "learning_rate": 4.66669776119403e-05, "loss": 0.0034, "step": 28584 }, { "epoch": 26.67, "learning_rate": 4.666651119402985e-05, "loss": 0.0009, "step": 28588 }, { "epoch": 26.67, "learning_rate": 4.666604477611941e-05, "loss": 0.0019, "step": 28592 }, { "epoch": 26.68, "learning_rate": 4.6665578358208955e-05, "loss": 0.0029, "step": 28596 }, { "epoch": 26.68, "learning_rate": 4.666511194029851e-05, "loss": 0.0, "step": 28600 }, { "epoch": 26.68, "learning_rate": 4.6664645522388065e-05, "loss": 0.0006, "step": 28604 }, { "epoch": 26.69, "learning_rate": 4.666417910447761e-05, "loss": 0.001, "step": 28608 }, { "epoch": 26.69, "learning_rate": 4.666371268656717e-05, "loss": 0.0, "step": 28612 }, { "epoch": 26.69, "learning_rate": 4.6663246268656716e-05, "loss": 0.0001, "step": 28616 }, { "epoch": 26.7, "learning_rate": 4.666277985074627e-05, "loss": 0.003, "step": 28620 }, { "epoch": 26.7, "learning_rate": 4.6662313432835826e-05, "loss": 0.0009, "step": 28624 }, { "epoch": 26.71, "learning_rate": 4.6661847014925374e-05, "loss": 0.0001, "step": 28628 }, { "epoch": 26.71, "learning_rate": 4.666138059701492e-05, "loss": 0.0007, "step": 28632 }, { "epoch": 26.71, "learning_rate": 4.6660914179104484e-05, "loss": 0.0061, "step": 28636 }, { "epoch": 26.72, "learning_rate": 4.666044776119403e-05, "loss": 0.0041, "step": 28640 }, { "epoch": 26.72, "learning_rate": 4.665998134328358e-05, "loss": 0.0067, "step": 28644 }, { "epoch": 26.72, "learning_rate": 4.6659514925373135e-05, "loss": 0.0009, "step": 28648 }, { "epoch": 26.73, "learning_rate": 4.665904850746269e-05, "loss": 0.0003, "step": 28652 }, { "epoch": 26.73, "learning_rate": 4.665858208955224e-05, "loss": 0.0005, "step": 28656 }, { "epoch": 26.73, "learning_rate": 4.665811567164179e-05, "loss": 0.0002, "step": 28660 }, { "epoch": 26.74, "learning_rate": 4.665764925373135e-05, "loss": 0.0002, "step": 28664 }, { "epoch": 26.74, "learning_rate": 4.6657182835820896e-05, "loss": 0.0076, "step": 28668 }, { "epoch": 26.75, "learning_rate": 4.665671641791045e-05, "loss": 0.0014, "step": 28672 }, { "epoch": 26.75, "learning_rate": 4.665625e-05, "loss": 0.0002, "step": 28676 }, { "epoch": 26.75, "learning_rate": 4.6655783582089554e-05, "loss": 0.0002, "step": 28680 }, { "epoch": 26.76, "learning_rate": 4.665531716417911e-05, "loss": 0.0006, "step": 28684 }, { "epoch": 26.76, "learning_rate": 4.665485074626866e-05, "loss": 0.0048, "step": 28688 }, { "epoch": 26.76, "learning_rate": 4.665438432835821e-05, "loss": 0.0002, "step": 28692 }, { "epoch": 26.77, "learning_rate": 4.665391791044777e-05, "loss": 0.0012, "step": 28696 }, { "epoch": 26.77, "learning_rate": 4.6653451492537315e-05, "loss": 0.0002, "step": 28700 }, { "epoch": 26.78, "learning_rate": 4.665298507462687e-05, "loss": 0.0004, "step": 28704 }, { "epoch": 26.78, "learning_rate": 4.665251865671642e-05, "loss": 0.0001, "step": 28708 }, { "epoch": 26.78, "learning_rate": 4.665205223880597e-05, "loss": 0.0001, "step": 28712 }, { "epoch": 26.79, "learning_rate": 4.665158582089553e-05, "loss": 0.0023, "step": 28716 }, { "epoch": 26.79, "learning_rate": 4.6651119402985076e-05, "loss": 0.0012, "step": 28720 }, { "epoch": 26.79, "learning_rate": 4.665065298507463e-05, "loss": 0.0002, "step": 28724 }, { "epoch": 26.8, "learning_rate": 4.6650186567164186e-05, "loss": 0.0015, "step": 28728 }, { "epoch": 26.8, "learning_rate": 4.6649720149253734e-05, "loss": 0.0008, "step": 28732 }, { "epoch": 26.81, "learning_rate": 4.664925373134328e-05, "loss": 0.0019, "step": 28736 }, { "epoch": 26.81, "learning_rate": 4.664878731343284e-05, "loss": 0.0001, "step": 28740 }, { "epoch": 26.81, "learning_rate": 4.664832089552239e-05, "loss": 0.0007, "step": 28744 }, { "epoch": 26.82, "learning_rate": 4.664785447761194e-05, "loss": 0.0013, "step": 28748 }, { "epoch": 26.82, "learning_rate": 4.6647388059701495e-05, "loss": 0.0018, "step": 28752 }, { "epoch": 26.82, "learning_rate": 4.664692164179105e-05, "loss": 0.0005, "step": 28756 }, { "epoch": 26.83, "learning_rate": 4.66464552238806e-05, "loss": 0.0042, "step": 28760 }, { "epoch": 26.83, "learning_rate": 4.664598880597015e-05, "loss": 0.0186, "step": 28764 }, { "epoch": 26.84, "learning_rate": 4.66455223880597e-05, "loss": 0.0004, "step": 28768 }, { "epoch": 26.84, "learning_rate": 4.6645055970149256e-05, "loss": 0.0026, "step": 28772 }, { "epoch": 26.84, "learning_rate": 4.664458955223881e-05, "loss": 0.0029, "step": 28776 }, { "epoch": 26.85, "learning_rate": 4.664412313432836e-05, "loss": 0.0073, "step": 28780 }, { "epoch": 26.85, "learning_rate": 4.6643656716417914e-05, "loss": 0.0005, "step": 28784 }, { "epoch": 26.85, "learning_rate": 4.664319029850747e-05, "loss": 0.0006, "step": 28788 }, { "epoch": 26.86, "learning_rate": 4.664272388059702e-05, "loss": 0.0002, "step": 28792 }, { "epoch": 26.86, "learning_rate": 4.6642257462686565e-05, "loss": 0.0004, "step": 28796 }, { "epoch": 26.87, "learning_rate": 4.664179104477612e-05, "loss": 0.0002, "step": 28800 }, { "epoch": 26.87, "learning_rate": 4.6641324626865675e-05, "loss": 0.0021, "step": 28804 }, { "epoch": 26.87, "learning_rate": 4.664085820895522e-05, "loss": 0.0002, "step": 28808 }, { "epoch": 26.88, "learning_rate": 4.664039179104478e-05, "loss": 0.0009, "step": 28812 }, { "epoch": 26.88, "learning_rate": 4.663992537313433e-05, "loss": 0.0011, "step": 28816 }, { "epoch": 26.88, "learning_rate": 4.663945895522388e-05, "loss": 0.0003, "step": 28820 }, { "epoch": 26.89, "learning_rate": 4.6638992537313436e-05, "loss": 0.0001, "step": 28824 }, { "epoch": 26.89, "learning_rate": 4.6638526119402984e-05, "loss": 0.0001, "step": 28828 }, { "epoch": 26.9, "learning_rate": 4.663805970149254e-05, "loss": 0.0011, "step": 28832 }, { "epoch": 26.9, "learning_rate": 4.6637593283582094e-05, "loss": 0.0003, "step": 28836 }, { "epoch": 26.9, "learning_rate": 4.663712686567164e-05, "loss": 0.0001, "step": 28840 }, { "epoch": 26.91, "learning_rate": 4.66366604477612e-05, "loss": 0.0015, "step": 28844 }, { "epoch": 26.91, "learning_rate": 4.663619402985075e-05, "loss": 0.0002, "step": 28848 }, { "epoch": 26.91, "learning_rate": 4.66357276119403e-05, "loss": 0.0007, "step": 28852 }, { "epoch": 26.92, "learning_rate": 4.6635261194029855e-05, "loss": 0.001, "step": 28856 }, { "epoch": 26.92, "learning_rate": 4.66347947761194e-05, "loss": 0.0003, "step": 28860 }, { "epoch": 26.93, "learning_rate": 4.663432835820896e-05, "loss": 0.0026, "step": 28864 }, { "epoch": 26.93, "learning_rate": 4.663386194029851e-05, "loss": 0.0001, "step": 28868 }, { "epoch": 26.93, "learning_rate": 4.663339552238806e-05, "loss": 0.0007, "step": 28872 }, { "epoch": 26.94, "learning_rate": 4.6632929104477616e-05, "loss": 0.0001, "step": 28876 }, { "epoch": 26.94, "learning_rate": 4.663246268656717e-05, "loss": 0.0009, "step": 28880 }, { "epoch": 26.94, "learning_rate": 4.663199626865672e-05, "loss": 0.0006, "step": 28884 }, { "epoch": 26.95, "learning_rate": 4.663152985074627e-05, "loss": 0.0013, "step": 28888 }, { "epoch": 26.95, "learning_rate": 4.663106343283583e-05, "loss": 0.001, "step": 28892 }, { "epoch": 26.96, "learning_rate": 4.663059701492538e-05, "loss": 0.0058, "step": 28896 }, { "epoch": 26.96, "learning_rate": 4.6630130597014925e-05, "loss": 0.0191, "step": 28900 }, { "epoch": 26.96, "learning_rate": 4.662966417910448e-05, "loss": 0.0006, "step": 28904 }, { "epoch": 26.97, "learning_rate": 4.6629197761194035e-05, "loss": 0.0017, "step": 28908 }, { "epoch": 26.97, "learning_rate": 4.662873134328358e-05, "loss": 0.0013, "step": 28912 }, { "epoch": 26.97, "learning_rate": 4.662826492537314e-05, "loss": 0.0019, "step": 28916 }, { "epoch": 26.98, "learning_rate": 4.6627798507462686e-05, "loss": 0.0083, "step": 28920 }, { "epoch": 26.98, "learning_rate": 4.662733208955224e-05, "loss": 0.0022, "step": 28924 }, { "epoch": 26.98, "learning_rate": 4.6626865671641796e-05, "loss": 0.0003, "step": 28928 }, { "epoch": 26.99, "learning_rate": 4.6626399253731344e-05, "loss": 0.0031, "step": 28932 }, { "epoch": 26.99, "learning_rate": 4.66259328358209e-05, "loss": 0.0046, "step": 28936 }, { "epoch": 27.0, "learning_rate": 4.6625466417910454e-05, "loss": 0.0008, "step": 28940 }, { "epoch": 27.0, "learning_rate": 4.6625e-05, "loss": 0.0012, "step": 28944 }, { "epoch": 27.0, "learning_rate": 4.662453358208955e-05, "loss": 0.0001, "step": 28948 }, { "epoch": 27.01, "learning_rate": 4.662406716417911e-05, "loss": 0.0021, "step": 28952 }, { "epoch": 27.01, "learning_rate": 4.662360074626866e-05, "loss": 0.0017, "step": 28956 }, { "epoch": 27.01, "learning_rate": 4.662313432835821e-05, "loss": 0.0023, "step": 28960 }, { "epoch": 27.02, "learning_rate": 4.662266791044776e-05, "loss": 0.0001, "step": 28964 }, { "epoch": 27.02, "learning_rate": 4.662220149253732e-05, "loss": 0.001, "step": 28968 }, { "epoch": 27.03, "learning_rate": 4.6621735074626866e-05, "loss": 0.0003, "step": 28972 }, { "epoch": 27.03, "learning_rate": 4.662126865671642e-05, "loss": 0.0002, "step": 28976 }, { "epoch": 27.03, "learning_rate": 4.662080223880597e-05, "loss": 0.0003, "step": 28980 }, { "epoch": 27.04, "learning_rate": 4.6620335820895524e-05, "loss": 0.0007, "step": 28984 }, { "epoch": 27.04, "learning_rate": 4.661986940298508e-05, "loss": 0.0008, "step": 28988 }, { "epoch": 27.04, "learning_rate": 4.661940298507463e-05, "loss": 0.0025, "step": 28992 }, { "epoch": 27.05, "learning_rate": 4.661893656716418e-05, "loss": 0.0004, "step": 28996 }, { "epoch": 27.05, "learning_rate": 4.6618470149253737e-05, "loss": 0.0005, "step": 29000 }, { "epoch": 27.05, "eval_exact_match": 0.7330754352030948, "eval_exec": 0.7736943907156673, "eval_loss": 0.374284952878952, "eval_runtime": 1172.5719, "eval_samples_per_second": 0.882, "step": 29000 }, { "epoch": 27.06, "learning_rate": 4.6618003731343285e-05, "loss": 0.0004, "step": 29004 }, { "epoch": 27.06, "learning_rate": 4.661753731343284e-05, "loss": 0.0003, "step": 29008 }, { "epoch": 27.06, "learning_rate": 4.6617070895522394e-05, "loss": 0.0004, "step": 29012 }, { "epoch": 27.07, "learning_rate": 4.661660447761194e-05, "loss": 0.0001, "step": 29016 }, { "epoch": 27.07, "learning_rate": 4.66161380597015e-05, "loss": 0.0004, "step": 29020 }, { "epoch": 27.07, "learning_rate": 4.6615671641791046e-05, "loss": 0.0005, "step": 29024 }, { "epoch": 27.08, "learning_rate": 4.66152052238806e-05, "loss": 0.0149, "step": 29028 }, { "epoch": 27.08, "learning_rate": 4.6614738805970155e-05, "loss": 0.0012, "step": 29032 }, { "epoch": 27.09, "learning_rate": 4.6614272388059704e-05, "loss": 0.0, "step": 29036 }, { "epoch": 27.09, "learning_rate": 4.661380597014925e-05, "loss": 0.0001, "step": 29040 }, { "epoch": 27.09, "learning_rate": 4.661333955223881e-05, "loss": 0.0014, "step": 29044 }, { "epoch": 27.1, "learning_rate": 4.661287313432836e-05, "loss": 0.0009, "step": 29048 }, { "epoch": 27.1, "learning_rate": 4.661240671641791e-05, "loss": 0.003, "step": 29052 }, { "epoch": 27.1, "learning_rate": 4.6611940298507465e-05, "loss": 0.0019, "step": 29056 }, { "epoch": 27.11, "learning_rate": 4.661147388059702e-05, "loss": 0.0001, "step": 29060 }, { "epoch": 27.11, "learning_rate": 4.661100746268657e-05, "loss": 0.0001, "step": 29064 }, { "epoch": 27.12, "learning_rate": 4.661054104477612e-05, "loss": 0.0005, "step": 29068 }, { "epoch": 27.12, "learning_rate": 4.661007462686568e-05, "loss": 0.0, "step": 29072 }, { "epoch": 27.12, "learning_rate": 4.6609608208955226e-05, "loss": 0.0, "step": 29076 }, { "epoch": 27.13, "learning_rate": 4.660914179104478e-05, "loss": 0.0008, "step": 29080 }, { "epoch": 27.13, "learning_rate": 4.660867537313433e-05, "loss": 0.0003, "step": 29084 }, { "epoch": 27.13, "learning_rate": 4.6608208955223883e-05, "loss": 0.0001, "step": 29088 }, { "epoch": 27.14, "learning_rate": 4.660774253731344e-05, "loss": 0.001, "step": 29092 }, { "epoch": 27.14, "learning_rate": 4.6607276119402987e-05, "loss": 0.0016, "step": 29096 }, { "epoch": 27.15, "learning_rate": 4.6606809701492535e-05, "loss": 0.0016, "step": 29100 }, { "epoch": 27.15, "learning_rate": 4.6606343283582096e-05, "loss": 0.0003, "step": 29104 }, { "epoch": 27.15, "learning_rate": 4.6605876865671644e-05, "loss": 0.0002, "step": 29108 }, { "epoch": 27.16, "learning_rate": 4.660541044776119e-05, "loss": 0.0002, "step": 29112 }, { "epoch": 27.16, "learning_rate": 4.660494402985075e-05, "loss": 0.0001, "step": 29116 }, { "epoch": 27.16, "learning_rate": 4.66044776119403e-05, "loss": 0.0, "step": 29120 }, { "epoch": 27.17, "learning_rate": 4.660401119402985e-05, "loss": 0.0002, "step": 29124 }, { "epoch": 27.17, "learning_rate": 4.6603544776119405e-05, "loss": 0.0005, "step": 29128 }, { "epoch": 27.18, "learning_rate": 4.6603078358208954e-05, "loss": 0.0001, "step": 29132 }, { "epoch": 27.18, "learning_rate": 4.660261194029851e-05, "loss": 0.0, "step": 29136 }, { "epoch": 27.18, "learning_rate": 4.6602145522388063e-05, "loss": 0.0, "step": 29140 }, { "epoch": 27.19, "learning_rate": 4.660167910447761e-05, "loss": 0.0002, "step": 29144 }, { "epoch": 27.19, "learning_rate": 4.6601212686567166e-05, "loss": 0.0005, "step": 29148 }, { "epoch": 27.19, "learning_rate": 4.660074626865672e-05, "loss": 0.0007, "step": 29152 }, { "epoch": 27.2, "learning_rate": 4.660027985074627e-05, "loss": 0.0002, "step": 29156 }, { "epoch": 27.2, "learning_rate": 4.659981343283582e-05, "loss": 0.0, "step": 29160 }, { "epoch": 27.21, "learning_rate": 4.659934701492538e-05, "loss": 0.0026, "step": 29164 }, { "epoch": 27.21, "learning_rate": 4.659888059701493e-05, "loss": 0.0001, "step": 29168 }, { "epoch": 27.21, "learning_rate": 4.659841417910448e-05, "loss": 0.0012, "step": 29172 }, { "epoch": 27.22, "learning_rate": 4.659794776119403e-05, "loss": 0.0158, "step": 29176 }, { "epoch": 27.22, "learning_rate": 4.6597481343283585e-05, "loss": 0.0003, "step": 29180 }, { "epoch": 27.22, "learning_rate": 4.659701492537314e-05, "loss": 0.0022, "step": 29184 }, { "epoch": 27.23, "learning_rate": 4.659654850746269e-05, "loss": 0.0002, "step": 29188 }, { "epoch": 27.23, "learning_rate": 4.6596082089552237e-05, "loss": 0.0001, "step": 29192 }, { "epoch": 27.24, "learning_rate": 4.65956156716418e-05, "loss": 0.0028, "step": 29196 }, { "epoch": 27.24, "learning_rate": 4.6595149253731346e-05, "loss": 0.0001, "step": 29200 }, { "epoch": 27.24, "learning_rate": 4.6594682835820894e-05, "loss": 0.0009, "step": 29204 }, { "epoch": 27.25, "learning_rate": 4.659421641791045e-05, "loss": 0.0, "step": 29208 }, { "epoch": 27.25, "learning_rate": 4.6593750000000004e-05, "loss": 0.0012, "step": 29212 }, { "epoch": 27.25, "learning_rate": 4.659328358208955e-05, "loss": 0.0034, "step": 29216 }, { "epoch": 27.26, "learning_rate": 4.659281716417911e-05, "loss": 0.0001, "step": 29220 }, { "epoch": 27.26, "learning_rate": 4.659235074626866e-05, "loss": 0.0, "step": 29224 }, { "epoch": 27.26, "learning_rate": 4.659188432835821e-05, "loss": 0.0002, "step": 29228 }, { "epoch": 27.27, "learning_rate": 4.6591417910447765e-05, "loss": 0.0052, "step": 29232 }, { "epoch": 27.27, "learning_rate": 4.6590951492537313e-05, "loss": 0.0002, "step": 29236 }, { "epoch": 27.28, "learning_rate": 4.659048507462687e-05, "loss": 0.0, "step": 29240 }, { "epoch": 27.28, "learning_rate": 4.659001865671642e-05, "loss": 0.0001, "step": 29244 }, { "epoch": 27.28, "learning_rate": 4.658955223880597e-05, "loss": 0.0018, "step": 29248 }, { "epoch": 27.29, "learning_rate": 4.658908582089552e-05, "loss": 0.0002, "step": 29252 }, { "epoch": 27.29, "learning_rate": 4.658861940298508e-05, "loss": 0.0019, "step": 29256 }, { "epoch": 27.29, "learning_rate": 4.658815298507463e-05, "loss": 0.0004, "step": 29260 }, { "epoch": 27.3, "learning_rate": 4.658768656716418e-05, "loss": 0.0008, "step": 29264 }, { "epoch": 27.3, "learning_rate": 4.658722014925373e-05, "loss": 0.0001, "step": 29268 }, { "epoch": 27.31, "learning_rate": 4.658675373134329e-05, "loss": 0.0013, "step": 29272 }, { "epoch": 27.31, "learning_rate": 4.6586287313432835e-05, "loss": 0.0, "step": 29276 }, { "epoch": 27.31, "learning_rate": 4.658582089552239e-05, "loss": 0.0001, "step": 29280 }, { "epoch": 27.32, "learning_rate": 4.6585354477611945e-05, "loss": 0.0021, "step": 29284 }, { "epoch": 27.32, "learning_rate": 4.658488805970149e-05, "loss": 0.0001, "step": 29288 }, { "epoch": 27.32, "learning_rate": 4.658442164179105e-05, "loss": 0.0001, "step": 29292 }, { "epoch": 27.33, "learning_rate": 4.6583955223880596e-05, "loss": 0.0003, "step": 29296 }, { "epoch": 27.33, "learning_rate": 4.658348880597015e-05, "loss": 0.0001, "step": 29300 }, { "epoch": 27.34, "learning_rate": 4.6583022388059706e-05, "loss": 0.0001, "step": 29304 }, { "epoch": 27.34, "learning_rate": 4.6582555970149254e-05, "loss": 0.0, "step": 29308 }, { "epoch": 27.34, "learning_rate": 4.65820895522388e-05, "loss": 0.0001, "step": 29312 }, { "epoch": 27.35, "learning_rate": 4.6581623134328364e-05, "loss": 0.0003, "step": 29316 }, { "epoch": 27.35, "learning_rate": 4.658115671641791e-05, "loss": 0.0, "step": 29320 }, { "epoch": 27.35, "learning_rate": 4.658069029850746e-05, "loss": 0.0, "step": 29324 }, { "epoch": 27.36, "learning_rate": 4.6580223880597015e-05, "loss": 0.0042, "step": 29328 }, { "epoch": 27.36, "learning_rate": 4.657975746268657e-05, "loss": 0.0003, "step": 29332 }, { "epoch": 27.37, "learning_rate": 4.6579291044776125e-05, "loss": 0.0, "step": 29336 }, { "epoch": 27.37, "learning_rate": 4.657882462686567e-05, "loss": 0.0093, "step": 29340 }, { "epoch": 27.37, "learning_rate": 4.657835820895523e-05, "loss": 0.0035, "step": 29344 }, { "epoch": 27.38, "learning_rate": 4.657789179104478e-05, "loss": 0.004, "step": 29348 }, { "epoch": 27.38, "learning_rate": 4.657742537313433e-05, "loss": 0.0006, "step": 29352 }, { "epoch": 27.38, "learning_rate": 4.657695895522388e-05, "loss": 0.0009, "step": 29356 }, { "epoch": 27.39, "learning_rate": 4.6576492537313434e-05, "loss": 0.0006, "step": 29360 }, { "epoch": 27.39, "learning_rate": 4.657602611940299e-05, "loss": 0.0001, "step": 29364 }, { "epoch": 27.4, "learning_rate": 4.657555970149254e-05, "loss": 0.0001, "step": 29368 }, { "epoch": 27.4, "learning_rate": 4.657509328358209e-05, "loss": 0.0066, "step": 29372 }, { "epoch": 27.4, "learning_rate": 4.657462686567165e-05, "loss": 0.0003, "step": 29376 }, { "epoch": 27.41, "learning_rate": 4.6574160447761195e-05, "loss": 0.0004, "step": 29380 }, { "epoch": 27.41, "learning_rate": 4.657369402985075e-05, "loss": 0.003, "step": 29384 }, { "epoch": 27.41, "learning_rate": 4.65732276119403e-05, "loss": 0.0017, "step": 29388 }, { "epoch": 27.42, "learning_rate": 4.657276119402985e-05, "loss": 0.002, "step": 29392 }, { "epoch": 27.42, "learning_rate": 4.657229477611941e-05, "loss": 0.0001, "step": 29396 }, { "epoch": 27.43, "learning_rate": 4.6571828358208956e-05, "loss": 0.0082, "step": 29400 }, { "epoch": 27.43, "learning_rate": 4.657136194029851e-05, "loss": 0.001, "step": 29404 }, { "epoch": 27.43, "learning_rate": 4.6570895522388066e-05, "loss": 0.0002, "step": 29408 }, { "epoch": 27.44, "learning_rate": 4.6570429104477614e-05, "loss": 0.0013, "step": 29412 }, { "epoch": 27.44, "learning_rate": 4.656996268656716e-05, "loss": 0.0003, "step": 29416 }, { "epoch": 27.44, "learning_rate": 4.656949626865672e-05, "loss": 0.0004, "step": 29420 }, { "epoch": 27.45, "learning_rate": 4.656902985074627e-05, "loss": 0.0001, "step": 29424 }, { "epoch": 27.45, "learning_rate": 4.656856343283582e-05, "loss": 0.0029, "step": 29428 }, { "epoch": 27.46, "learning_rate": 4.6568097014925375e-05, "loss": 0.0001, "step": 29432 }, { "epoch": 27.46, "learning_rate": 4.656763059701493e-05, "loss": 0.0055, "step": 29436 }, { "epoch": 27.46, "learning_rate": 4.656716417910448e-05, "loss": 0.0005, "step": 29440 }, { "epoch": 27.47, "learning_rate": 4.656669776119403e-05, "loss": 0.0002, "step": 29444 }, { "epoch": 27.47, "learning_rate": 4.656623134328358e-05, "loss": 0.0033, "step": 29448 }, { "epoch": 27.47, "learning_rate": 4.6565764925373136e-05, "loss": 0.0005, "step": 29452 }, { "epoch": 27.48, "learning_rate": 4.656529850746269e-05, "loss": 0.0002, "step": 29456 }, { "epoch": 27.48, "learning_rate": 4.656483208955224e-05, "loss": 0.0003, "step": 29460 }, { "epoch": 27.49, "learning_rate": 4.6564365671641794e-05, "loss": 0.0001, "step": 29464 }, { "epoch": 27.49, "learning_rate": 4.656389925373135e-05, "loss": 0.001, "step": 29468 }, { "epoch": 27.49, "learning_rate": 4.65634328358209e-05, "loss": 0.0001, "step": 29472 }, { "epoch": 27.5, "learning_rate": 4.6562966417910445e-05, "loss": 0.0062, "step": 29476 }, { "epoch": 27.5, "learning_rate": 4.65625e-05, "loss": 0.0055, "step": 29480 }, { "epoch": 27.5, "learning_rate": 4.6562033582089555e-05, "loss": 0.0001, "step": 29484 }, { "epoch": 27.51, "learning_rate": 4.65615671641791e-05, "loss": 0.0002, "step": 29488 }, { "epoch": 27.51, "learning_rate": 4.656110074626866e-05, "loss": 0.0049, "step": 29492 }, { "epoch": 27.51, "learning_rate": 4.656063432835821e-05, "loss": 0.0002, "step": 29496 }, { "epoch": 27.52, "learning_rate": 4.656016791044777e-05, "loss": 0.0001, "step": 29500 }, { "epoch": 27.52, "eval_exact_match": 0.741779497098646, "eval_exec": 0.769825918762089, "eval_loss": 0.3933384418487549, "eval_runtime": 1168.2038, "eval_samples_per_second": 0.885, "step": 29500 }, { "epoch": 27.52, "learning_rate": 4.6559701492537316e-05, "loss": 0.0001, "step": 29504 }, { "epoch": 27.53, "learning_rate": 4.6559235074626864e-05, "loss": 0.0025, "step": 29508 }, { "epoch": 27.53, "learning_rate": 4.6558768656716426e-05, "loss": 0.0003, "step": 29512 }, { "epoch": 27.53, "learning_rate": 4.6558302238805974e-05, "loss": 0.0003, "step": 29516 }, { "epoch": 27.54, "learning_rate": 4.655783582089552e-05, "loss": 0.0016, "step": 29520 }, { "epoch": 27.54, "learning_rate": 4.655736940298508e-05, "loss": 0.0001, "step": 29524 }, { "epoch": 27.54, "learning_rate": 4.655690298507463e-05, "loss": 0.0007, "step": 29528 }, { "epoch": 27.55, "learning_rate": 4.655643656716418e-05, "loss": 0.0013, "step": 29532 }, { "epoch": 27.55, "learning_rate": 4.6555970149253735e-05, "loss": 0.0001, "step": 29536 }, { "epoch": 27.56, "learning_rate": 4.655550373134328e-05, "loss": 0.0009, "step": 29540 }, { "epoch": 27.56, "learning_rate": 4.655503731343284e-05, "loss": 0.0015, "step": 29544 }, { "epoch": 27.56, "learning_rate": 4.655457089552239e-05, "loss": 0.0002, "step": 29548 }, { "epoch": 27.57, "learning_rate": 4.655410447761194e-05, "loss": 0.0001, "step": 29552 }, { "epoch": 27.57, "learning_rate": 4.6553638059701496e-05, "loss": 0.001, "step": 29556 }, { "epoch": 27.57, "learning_rate": 4.655317164179105e-05, "loss": 0.0001, "step": 29560 }, { "epoch": 27.58, "learning_rate": 4.65527052238806e-05, "loss": 0.0001, "step": 29564 }, { "epoch": 27.58, "learning_rate": 4.655223880597015e-05, "loss": 0.0067, "step": 29568 }, { "epoch": 27.59, "learning_rate": 4.655177238805971e-05, "loss": 0.0029, "step": 29572 }, { "epoch": 27.59, "learning_rate": 4.655130597014926e-05, "loss": 0.0007, "step": 29576 }, { "epoch": 27.59, "learning_rate": 4.6550839552238805e-05, "loss": 0.0004, "step": 29580 }, { "epoch": 27.6, "learning_rate": 4.655037313432836e-05, "loss": 0.0001, "step": 29584 }, { "epoch": 27.6, "learning_rate": 4.6549906716417915e-05, "loss": 0.0002, "step": 29588 }, { "epoch": 27.6, "learning_rate": 4.654944029850746e-05, "loss": 0.0001, "step": 29592 }, { "epoch": 27.61, "learning_rate": 4.654897388059702e-05, "loss": 0.0003, "step": 29596 }, { "epoch": 27.61, "learning_rate": 4.6548507462686566e-05, "loss": 0.0017, "step": 29600 }, { "epoch": 27.62, "learning_rate": 4.654804104477612e-05, "loss": 0.0031, "step": 29604 }, { "epoch": 27.62, "learning_rate": 4.6547574626865676e-05, "loss": 0.0004, "step": 29608 }, { "epoch": 27.62, "learning_rate": 4.6547108208955224e-05, "loss": 0.0003, "step": 29612 }, { "epoch": 27.63, "learning_rate": 4.654664179104478e-05, "loss": 0.0002, "step": 29616 }, { "epoch": 27.63, "learning_rate": 4.6546175373134334e-05, "loss": 0.0034, "step": 29620 }, { "epoch": 27.63, "learning_rate": 4.654570895522388e-05, "loss": 0.0008, "step": 29624 }, { "epoch": 27.64, "learning_rate": 4.654524253731343e-05, "loss": 0.0001, "step": 29628 }, { "epoch": 27.64, "learning_rate": 4.654477611940299e-05, "loss": 0.0001, "step": 29632 }, { "epoch": 27.65, "learning_rate": 4.654430970149254e-05, "loss": 0.0002, "step": 29636 }, { "epoch": 27.65, "learning_rate": 4.654384328358209e-05, "loss": 0.0035, "step": 29640 }, { "epoch": 27.65, "learning_rate": 4.654337686567164e-05, "loss": 0.0001, "step": 29644 }, { "epoch": 27.66, "learning_rate": 4.65429104477612e-05, "loss": 0.0001, "step": 29648 }, { "epoch": 27.66, "learning_rate": 4.6542444029850746e-05, "loss": 0.0001, "step": 29652 }, { "epoch": 27.66, "learning_rate": 4.65419776119403e-05, "loss": 0.0007, "step": 29656 }, { "epoch": 27.67, "learning_rate": 4.654151119402985e-05, "loss": 0.0005, "step": 29660 }, { "epoch": 27.67, "learning_rate": 4.654104477611941e-05, "loss": 0.0015, "step": 29664 }, { "epoch": 27.68, "learning_rate": 4.654057835820896e-05, "loss": 0.0003, "step": 29668 }, { "epoch": 27.68, "learning_rate": 4.654011194029851e-05, "loss": 0.0001, "step": 29672 }, { "epoch": 27.68, "learning_rate": 4.653964552238806e-05, "loss": 0.0027, "step": 29676 }, { "epoch": 27.69, "learning_rate": 4.653917910447762e-05, "loss": 0.0002, "step": 29680 }, { "epoch": 27.69, "learning_rate": 4.6538712686567165e-05, "loss": 0.0016, "step": 29684 }, { "epoch": 27.69, "learning_rate": 4.653824626865672e-05, "loss": 0.0001, "step": 29688 }, { "epoch": 27.7, "learning_rate": 4.6537779850746275e-05, "loss": 0.0004, "step": 29692 }, { "epoch": 27.7, "learning_rate": 4.653731343283582e-05, "loss": 0.0001, "step": 29696 }, { "epoch": 27.71, "learning_rate": 4.653684701492538e-05, "loss": 0.0019, "step": 29700 }, { "epoch": 27.71, "learning_rate": 4.6536380597014926e-05, "loss": 0.0015, "step": 29704 }, { "epoch": 27.71, "learning_rate": 4.653591417910448e-05, "loss": 0.0001, "step": 29708 }, { "epoch": 27.72, "learning_rate": 4.6535447761194036e-05, "loss": 0.0005, "step": 29712 }, { "epoch": 27.72, "learning_rate": 4.6534981343283584e-05, "loss": 0.0011, "step": 29716 }, { "epoch": 27.72, "learning_rate": 4.653451492537313e-05, "loss": 0.0086, "step": 29720 }, { "epoch": 27.73, "learning_rate": 4.6534048507462694e-05, "loss": 0.0006, "step": 29724 }, { "epoch": 27.73, "learning_rate": 4.653358208955224e-05, "loss": 0.0001, "step": 29728 }, { "epoch": 27.73, "learning_rate": 4.653311567164179e-05, "loss": 0.001, "step": 29732 }, { "epoch": 27.74, "learning_rate": 4.6532649253731345e-05, "loss": 0.0023, "step": 29736 }, { "epoch": 27.74, "learning_rate": 4.65321828358209e-05, "loss": 0.0094, "step": 29740 }, { "epoch": 27.75, "learning_rate": 4.653171641791045e-05, "loss": 0.0033, "step": 29744 }, { "epoch": 27.75, "learning_rate": 4.653125e-05, "loss": 0.0016, "step": 29748 }, { "epoch": 27.75, "learning_rate": 4.653078358208956e-05, "loss": 0.0007, "step": 29752 }, { "epoch": 27.76, "learning_rate": 4.6530317164179106e-05, "loss": 0.0053, "step": 29756 }, { "epoch": 27.76, "learning_rate": 4.652985074626866e-05, "loss": 0.0008, "step": 29760 }, { "epoch": 27.76, "learning_rate": 4.652938432835821e-05, "loss": 0.0001, "step": 29764 }, { "epoch": 27.77, "learning_rate": 4.6528917910447764e-05, "loss": 0.0009, "step": 29768 }, { "epoch": 27.77, "learning_rate": 4.652845149253732e-05, "loss": 0.0, "step": 29772 }, { "epoch": 27.78, "learning_rate": 4.652798507462687e-05, "loss": 0.0078, "step": 29776 }, { "epoch": 27.78, "learning_rate": 4.6527518656716415e-05, "loss": 0.0015, "step": 29780 }, { "epoch": 27.78, "learning_rate": 4.652705223880598e-05, "loss": 0.0003, "step": 29784 }, { "epoch": 27.79, "learning_rate": 4.6526585820895525e-05, "loss": 0.0029, "step": 29788 }, { "epoch": 27.79, "learning_rate": 4.652611940298507e-05, "loss": 0.0012, "step": 29792 }, { "epoch": 27.79, "learning_rate": 4.652565298507463e-05, "loss": 0.0003, "step": 29796 }, { "epoch": 27.8, "learning_rate": 4.652518656716418e-05, "loss": 0.0, "step": 29800 }, { "epoch": 27.8, "learning_rate": 4.652472014925373e-05, "loss": 0.0024, "step": 29804 }, { "epoch": 27.81, "learning_rate": 4.6524253731343286e-05, "loss": 0.0003, "step": 29808 }, { "epoch": 27.81, "learning_rate": 4.6523787313432834e-05, "loss": 0.0006, "step": 29812 }, { "epoch": 27.81, "learning_rate": 4.652332089552239e-05, "loss": 0.0004, "step": 29816 }, { "epoch": 27.82, "learning_rate": 4.6522854477611944e-05, "loss": 0.0001, "step": 29820 }, { "epoch": 27.82, "learning_rate": 4.652238805970149e-05, "loss": 0.0, "step": 29824 }, { "epoch": 27.82, "learning_rate": 4.652192164179105e-05, "loss": 0.0001, "step": 29828 }, { "epoch": 27.83, "learning_rate": 4.65214552238806e-05, "loss": 0.0023, "step": 29832 }, { "epoch": 27.83, "learning_rate": 4.652098880597015e-05, "loss": 0.0027, "step": 29836 }, { "epoch": 27.84, "learning_rate": 4.6520522388059705e-05, "loss": 0.0025, "step": 29840 }, { "epoch": 27.84, "learning_rate": 4.652005597014926e-05, "loss": 0.0049, "step": 29844 }, { "epoch": 27.84, "learning_rate": 4.651958955223881e-05, "loss": 0.0001, "step": 29848 }, { "epoch": 27.85, "learning_rate": 4.651912313432836e-05, "loss": 0.0002, "step": 29852 }, { "epoch": 27.85, "learning_rate": 4.651865671641791e-05, "loss": 0.0004, "step": 29856 }, { "epoch": 27.85, "learning_rate": 4.6518190298507466e-05, "loss": 0.0039, "step": 29860 }, { "epoch": 27.86, "learning_rate": 4.651772388059702e-05, "loss": 0.0001, "step": 29864 }, { "epoch": 27.86, "learning_rate": 4.651725746268657e-05, "loss": 0.0002, "step": 29868 }, { "epoch": 27.87, "learning_rate": 4.651679104477612e-05, "loss": 0.0003, "step": 29872 }, { "epoch": 27.87, "learning_rate": 4.651632462686568e-05, "loss": 0.0078, "step": 29876 }, { "epoch": 27.87, "learning_rate": 4.651585820895523e-05, "loss": 0.0001, "step": 29880 }, { "epoch": 27.88, "learning_rate": 4.6515391791044775e-05, "loss": 0.0015, "step": 29884 }, { "epoch": 27.88, "learning_rate": 4.651492537313433e-05, "loss": 0.0007, "step": 29888 }, { "epoch": 27.88, "learning_rate": 4.6514458955223885e-05, "loss": 0.0002, "step": 29892 }, { "epoch": 27.89, "learning_rate": 4.651399253731343e-05, "loss": 0.0013, "step": 29896 }, { "epoch": 27.89, "learning_rate": 4.651352611940299e-05, "loss": 0.0008, "step": 29900 }, { "epoch": 27.9, "learning_rate": 4.651305970149254e-05, "loss": 0.0056, "step": 29904 }, { "epoch": 27.9, "learning_rate": 4.651259328358209e-05, "loss": 0.0073, "step": 29908 }, { "epoch": 27.9, "learning_rate": 4.6512126865671646e-05, "loss": 0.0015, "step": 29912 }, { "epoch": 27.91, "learning_rate": 4.6511660447761194e-05, "loss": 0.0005, "step": 29916 }, { "epoch": 27.91, "learning_rate": 4.651119402985075e-05, "loss": 0.0001, "step": 29920 }, { "epoch": 27.91, "learning_rate": 4.6510727611940304e-05, "loss": 0.0006, "step": 29924 }, { "epoch": 27.92, "learning_rate": 4.651026119402985e-05, "loss": 0.0004, "step": 29928 }, { "epoch": 27.92, "learning_rate": 4.65097947761194e-05, "loss": 0.0015, "step": 29932 }, { "epoch": 27.93, "learning_rate": 4.650932835820896e-05, "loss": 0.002, "step": 29936 }, { "epoch": 27.93, "learning_rate": 4.650886194029851e-05, "loss": 0.0002, "step": 29940 }, { "epoch": 27.93, "learning_rate": 4.650839552238806e-05, "loss": 0.0006, "step": 29944 }, { "epoch": 27.94, "learning_rate": 4.650792910447761e-05, "loss": 0.0062, "step": 29948 }, { "epoch": 27.94, "learning_rate": 4.650746268656717e-05, "loss": 0.0003, "step": 29952 }, { "epoch": 27.94, "learning_rate": 4.6506996268656716e-05, "loss": 0.0018, "step": 29956 }, { "epoch": 27.95, "learning_rate": 4.650652985074627e-05, "loss": 0.0003, "step": 29960 }, { "epoch": 27.95, "learning_rate": 4.6506063432835825e-05, "loss": 0.0001, "step": 29964 }, { "epoch": 27.96, "learning_rate": 4.6505597014925374e-05, "loss": 0.0017, "step": 29968 }, { "epoch": 27.96, "learning_rate": 4.650513059701493e-05, "loss": 0.0019, "step": 29972 }, { "epoch": 27.96, "learning_rate": 4.650466417910448e-05, "loss": 0.0003, "step": 29976 }, { "epoch": 27.97, "learning_rate": 4.650419776119403e-05, "loss": 0.0001, "step": 29980 }, { "epoch": 27.97, "learning_rate": 4.6503731343283586e-05, "loss": 0.0011, "step": 29984 }, { "epoch": 27.97, "learning_rate": 4.6503264925373135e-05, "loss": 0.009, "step": 29988 }, { "epoch": 27.98, "learning_rate": 4.650279850746269e-05, "loss": 0.0013, "step": 29992 }, { "epoch": 27.98, "learning_rate": 4.6502332089552244e-05, "loss": 0.0, "step": 29996 }, { "epoch": 27.98, "learning_rate": 4.650186567164179e-05, "loss": 0.0007, "step": 30000 }, { "epoch": 27.98, "eval_exact_match": 0.7350096711798839, "eval_exec": 0.769825918762089, "eval_loss": 0.3727231025695801, "eval_runtime": 1201.9878, "eval_samples_per_second": 0.86, "step": 30000 }, { "epoch": 27.99, "learning_rate": 4.650139925373135e-05, "loss": 0.0001, "step": 30004 }, { "epoch": 27.99, "learning_rate": 4.6500932835820896e-05, "loss": 0.0002, "step": 30008 }, { "epoch": 28.0, "learning_rate": 4.650046641791045e-05, "loss": 0.0004, "step": 30012 }, { "epoch": 28.0, "learning_rate": 4.6500000000000005e-05, "loss": 0.0001, "step": 30016 }, { "epoch": 28.0, "learning_rate": 4.6499533582089554e-05, "loss": 0.0026, "step": 30020 }, { "epoch": 28.01, "learning_rate": 4.649906716417911e-05, "loss": 0.0003, "step": 30024 }, { "epoch": 28.01, "learning_rate": 4.649860074626866e-05, "loss": 0.0002, "step": 30028 }, { "epoch": 28.01, "learning_rate": 4.649813432835821e-05, "loss": 0.0043, "step": 30032 }, { "epoch": 28.02, "learning_rate": 4.649766791044776e-05, "loss": 0.0, "step": 30036 }, { "epoch": 28.02, "learning_rate": 4.6497201492537315e-05, "loss": 0.0001, "step": 30040 }, { "epoch": 28.03, "learning_rate": 4.649673507462687e-05, "loss": 0.0041, "step": 30044 }, { "epoch": 28.03, "learning_rate": 4.649626865671642e-05, "loss": 0.0003, "step": 30048 }, { "epoch": 28.03, "learning_rate": 4.649580223880597e-05, "loss": 0.0, "step": 30052 }, { "epoch": 28.04, "learning_rate": 4.649533582089553e-05, "loss": 0.0002, "step": 30056 }, { "epoch": 28.04, "learning_rate": 4.6494869402985076e-05, "loss": 0.0005, "step": 30060 }, { "epoch": 28.04, "learning_rate": 4.649440298507463e-05, "loss": 0.002, "step": 30064 }, { "epoch": 28.05, "learning_rate": 4.649393656716418e-05, "loss": 0.0003, "step": 30068 }, { "epoch": 28.05, "learning_rate": 4.6493470149253733e-05, "loss": 0.0008, "step": 30072 }, { "epoch": 28.06, "learning_rate": 4.649300373134329e-05, "loss": 0.0001, "step": 30076 }, { "epoch": 28.06, "learning_rate": 4.6492537313432837e-05, "loss": 0.0001, "step": 30080 }, { "epoch": 28.06, "learning_rate": 4.649207089552239e-05, "loss": 0.0001, "step": 30084 }, { "epoch": 28.07, "learning_rate": 4.6491604477611946e-05, "loss": 0.0005, "step": 30088 }, { "epoch": 28.07, "learning_rate": 4.6491138059701494e-05, "loss": 0.0009, "step": 30092 }, { "epoch": 28.07, "learning_rate": 4.649067164179104e-05, "loss": 0.0015, "step": 30096 }, { "epoch": 28.08, "learning_rate": 4.64902052238806e-05, "loss": 0.0009, "step": 30100 }, { "epoch": 28.08, "learning_rate": 4.648973880597015e-05, "loss": 0.0013, "step": 30104 }, { "epoch": 28.09, "learning_rate": 4.64892723880597e-05, "loss": 0.0003, "step": 30108 }, { "epoch": 28.09, "learning_rate": 4.6488805970149255e-05, "loss": 0.0005, "step": 30112 }, { "epoch": 28.09, "learning_rate": 4.648833955223881e-05, "loss": 0.0006, "step": 30116 }, { "epoch": 28.1, "learning_rate": 4.648787313432836e-05, "loss": 0.0001, "step": 30120 }, { "epoch": 28.1, "learning_rate": 4.648740671641791e-05, "loss": 0.0002, "step": 30124 }, { "epoch": 28.1, "learning_rate": 4.648694029850746e-05, "loss": 0.0013, "step": 30128 }, { "epoch": 28.11, "learning_rate": 4.6486473880597016e-05, "loss": 0.0001, "step": 30132 }, { "epoch": 28.11, "learning_rate": 4.648600746268657e-05, "loss": 0.0031, "step": 30136 }, { "epoch": 28.12, "learning_rate": 4.648554104477612e-05, "loss": 0.0004, "step": 30140 }, { "epoch": 28.12, "learning_rate": 4.6485074626865674e-05, "loss": 0.0002, "step": 30144 }, { "epoch": 28.12, "learning_rate": 4.648460820895523e-05, "loss": 0.0003, "step": 30148 }, { "epoch": 28.13, "learning_rate": 4.648414179104478e-05, "loss": 0.0041, "step": 30152 }, { "epoch": 28.13, "learning_rate": 4.648367537313433e-05, "loss": 0.0002, "step": 30156 }, { "epoch": 28.13, "learning_rate": 4.648320895522388e-05, "loss": 0.0002, "step": 30160 }, { "epoch": 28.14, "learning_rate": 4.6482742537313435e-05, "loss": 0.0001, "step": 30164 }, { "epoch": 28.14, "learning_rate": 4.648227611940299e-05, "loss": 0.0004, "step": 30168 }, { "epoch": 28.15, "learning_rate": 4.648180970149254e-05, "loss": 0.0033, "step": 30172 }, { "epoch": 28.15, "learning_rate": 4.648134328358209e-05, "loss": 0.0013, "step": 30176 }, { "epoch": 28.15, "learning_rate": 4.648087686567165e-05, "loss": 0.0021, "step": 30180 }, { "epoch": 28.16, "learning_rate": 4.6480410447761196e-05, "loss": 0.0003, "step": 30184 }, { "epoch": 28.16, "learning_rate": 4.6479944029850744e-05, "loss": 0.0004, "step": 30188 }, { "epoch": 28.16, "learning_rate": 4.6479477611940306e-05, "loss": 0.0002, "step": 30192 }, { "epoch": 28.17, "learning_rate": 4.6479011194029854e-05, "loss": 0.004, "step": 30196 }, { "epoch": 28.17, "learning_rate": 4.64785447761194e-05, "loss": 0.0082, "step": 30200 }, { "epoch": 28.18, "learning_rate": 4.647807835820896e-05, "loss": 0.0004, "step": 30204 }, { "epoch": 28.18, "learning_rate": 4.647761194029851e-05, "loss": 0.0002, "step": 30208 }, { "epoch": 28.18, "learning_rate": 4.647714552238806e-05, "loss": 0.0001, "step": 30212 }, { "epoch": 28.19, "learning_rate": 4.6476679104477615e-05, "loss": 0.0004, "step": 30216 }, { "epoch": 28.19, "learning_rate": 4.6476212686567163e-05, "loss": 0.0022, "step": 30220 }, { "epoch": 28.19, "learning_rate": 4.647574626865672e-05, "loss": 0.0004, "step": 30224 }, { "epoch": 28.2, "learning_rate": 4.647527985074627e-05, "loss": 0.0001, "step": 30228 }, { "epoch": 28.2, "learning_rate": 4.647481343283582e-05, "loss": 0.0005, "step": 30232 }, { "epoch": 28.21, "learning_rate": 4.6474347014925376e-05, "loss": 0.0001, "step": 30236 }, { "epoch": 28.21, "learning_rate": 4.647388059701493e-05, "loss": 0.003, "step": 30240 }, { "epoch": 28.21, "learning_rate": 4.647341417910448e-05, "loss": 0.0, "step": 30244 }, { "epoch": 28.22, "learning_rate": 4.647294776119403e-05, "loss": 0.0043, "step": 30248 }, { "epoch": 28.22, "learning_rate": 4.647248134328359e-05, "loss": 0.0021, "step": 30252 }, { "epoch": 28.22, "learning_rate": 4.647201492537314e-05, "loss": 0.0038, "step": 30256 }, { "epoch": 28.23, "learning_rate": 4.6471548507462685e-05, "loss": 0.0008, "step": 30260 }, { "epoch": 28.23, "learning_rate": 4.647108208955224e-05, "loss": 0.0005, "step": 30264 }, { "epoch": 28.24, "learning_rate": 4.6470615671641795e-05, "loss": 0.0001, "step": 30268 }, { "epoch": 28.24, "learning_rate": 4.647014925373134e-05, "loss": 0.0038, "step": 30272 }, { "epoch": 28.24, "learning_rate": 4.64696828358209e-05, "loss": 0.0013, "step": 30276 }, { "epoch": 28.25, "learning_rate": 4.6469216417910446e-05, "loss": 0.0002, "step": 30280 }, { "epoch": 28.25, "learning_rate": 4.646875e-05, "loss": 0.0029, "step": 30284 }, { "epoch": 28.25, "learning_rate": 4.6468283582089556e-05, "loss": 0.0001, "step": 30288 }, { "epoch": 28.26, "learning_rate": 4.6467817164179104e-05, "loss": 0.0001, "step": 30292 }, { "epoch": 28.26, "learning_rate": 4.646735074626866e-05, "loss": 0.0012, "step": 30296 }, { "epoch": 28.26, "learning_rate": 4.6466884328358214e-05, "loss": 0.0002, "step": 30300 }, { "epoch": 28.27, "learning_rate": 4.646641791044776e-05, "loss": 0.0018, "step": 30304 }, { "epoch": 28.27, "learning_rate": 4.646595149253731e-05, "loss": 0.0, "step": 30308 }, { "epoch": 28.28, "learning_rate": 4.646548507462687e-05, "loss": 0.0003, "step": 30312 }, { "epoch": 28.28, "learning_rate": 4.646501865671642e-05, "loss": 0.0001, "step": 30316 }, { "epoch": 28.28, "learning_rate": 4.6464552238805975e-05, "loss": 0.0003, "step": 30320 }, { "epoch": 28.29, "learning_rate": 4.646408582089552e-05, "loss": 0.0025, "step": 30324 }, { "epoch": 28.29, "learning_rate": 4.646361940298508e-05, "loss": 0.0001, "step": 30328 }, { "epoch": 28.29, "learning_rate": 4.646315298507463e-05, "loss": 0.0018, "step": 30332 }, { "epoch": 28.3, "learning_rate": 4.646268656716418e-05, "loss": 0.0011, "step": 30336 }, { "epoch": 28.3, "learning_rate": 4.646222014925373e-05, "loss": 0.0006, "step": 30340 }, { "epoch": 28.31, "learning_rate": 4.646175373134329e-05, "loss": 0.0001, "step": 30344 }, { "epoch": 28.31, "learning_rate": 4.646128731343284e-05, "loss": 0.0004, "step": 30348 }, { "epoch": 28.31, "learning_rate": 4.646082089552239e-05, "loss": 0.0002, "step": 30352 }, { "epoch": 28.32, "learning_rate": 4.646035447761194e-05, "loss": 0.0016, "step": 30356 }, { "epoch": 28.32, "learning_rate": 4.64598880597015e-05, "loss": 0.0006, "step": 30360 }, { "epoch": 28.32, "learning_rate": 4.6459421641791045e-05, "loss": 0.004, "step": 30364 }, { "epoch": 28.33, "learning_rate": 4.64589552238806e-05, "loss": 0.0001, "step": 30368 }, { "epoch": 28.33, "learning_rate": 4.6458488805970155e-05, "loss": 0.0001, "step": 30372 }, { "epoch": 28.34, "learning_rate": 4.64580223880597e-05, "loss": 0.0029, "step": 30376 }, { "epoch": 28.34, "learning_rate": 4.645755597014926e-05, "loss": 0.0078, "step": 30380 }, { "epoch": 28.34, "learning_rate": 4.6457089552238806e-05, "loss": 0.0001, "step": 30384 }, { "epoch": 28.35, "learning_rate": 4.645662313432836e-05, "loss": 0.001, "step": 30388 }, { "epoch": 28.35, "learning_rate": 4.6456156716417916e-05, "loss": 0.0031, "step": 30392 }, { "epoch": 28.35, "learning_rate": 4.6455690298507464e-05, "loss": 0.0004, "step": 30396 }, { "epoch": 28.36, "learning_rate": 4.645522388059701e-05, "loss": 0.0001, "step": 30400 }, { "epoch": 28.36, "learning_rate": 4.6454757462686574e-05, "loss": 0.0002, "step": 30404 }, { "epoch": 28.37, "learning_rate": 4.645429104477612e-05, "loss": 0.0005, "step": 30408 }, { "epoch": 28.37, "learning_rate": 4.645382462686567e-05, "loss": 0.0001, "step": 30412 }, { "epoch": 28.37, "learning_rate": 4.6453358208955225e-05, "loss": 0.0001, "step": 30416 }, { "epoch": 28.38, "learning_rate": 4.645289179104478e-05, "loss": 0.0012, "step": 30420 }, { "epoch": 28.38, "learning_rate": 4.645242537313433e-05, "loss": 0.0074, "step": 30424 }, { "epoch": 28.38, "learning_rate": 4.645195895522388e-05, "loss": 0.002, "step": 30428 }, { "epoch": 28.39, "learning_rate": 4.645149253731344e-05, "loss": 0.0008, "step": 30432 }, { "epoch": 28.39, "learning_rate": 4.6451026119402986e-05, "loss": 0.0001, "step": 30436 }, { "epoch": 28.4, "learning_rate": 4.645055970149254e-05, "loss": 0.004, "step": 30440 }, { "epoch": 28.4, "learning_rate": 4.645009328358209e-05, "loss": 0.0011, "step": 30444 }, { "epoch": 28.4, "learning_rate": 4.6449626865671644e-05, "loss": 0.0064, "step": 30448 }, { "epoch": 28.41, "learning_rate": 4.64491604477612e-05, "loss": 0.0002, "step": 30452 }, { "epoch": 28.41, "learning_rate": 4.644869402985075e-05, "loss": 0.0027, "step": 30456 }, { "epoch": 28.41, "learning_rate": 4.6448227611940295e-05, "loss": 0.0004, "step": 30460 }, { "epoch": 28.42, "learning_rate": 4.644776119402986e-05, "loss": 0.0001, "step": 30464 }, { "epoch": 28.42, "learning_rate": 4.6447294776119405e-05, "loss": 0.0006, "step": 30468 }, { "epoch": 28.43, "learning_rate": 4.644682835820895e-05, "loss": 0.0003, "step": 30472 }, { "epoch": 28.43, "learning_rate": 4.644636194029851e-05, "loss": 0.0026, "step": 30476 }, { "epoch": 28.43, "learning_rate": 4.644589552238806e-05, "loss": 0.0056, "step": 30480 }, { "epoch": 28.44, "learning_rate": 4.644542910447762e-05, "loss": 0.0002, "step": 30484 }, { "epoch": 28.44, "learning_rate": 4.6444962686567166e-05, "loss": 0.0002, "step": 30488 }, { "epoch": 28.44, "learning_rate": 4.6444496268656714e-05, "loss": 0.0001, "step": 30492 }, { "epoch": 28.45, "learning_rate": 4.6444029850746276e-05, "loss": 0.0002, "step": 30496 }, { "epoch": 28.45, "learning_rate": 4.6443563432835824e-05, "loss": 0.0003, "step": 30500 }, { "epoch": 28.45, "eval_exact_match": 0.718568665377176, "eval_exec": 0.7514506769825918, "eval_loss": 0.3722231984138489, "eval_runtime": 1165.7612, "eval_samples_per_second": 0.887, "step": 30500 }, { "epoch": 28.46, "learning_rate": 4.644309701492537e-05, "loss": 0.0001, "step": 30504 }, { "epoch": 28.46, "learning_rate": 4.644263059701493e-05, "loss": 0.0008, "step": 30508 }, { "epoch": 28.46, "learning_rate": 4.644216417910448e-05, "loss": 0.0006, "step": 30512 }, { "epoch": 28.47, "learning_rate": 4.644169776119403e-05, "loss": 0.0001, "step": 30516 }, { "epoch": 28.47, "learning_rate": 4.6441231343283585e-05, "loss": 0.0001, "step": 30520 }, { "epoch": 28.47, "learning_rate": 4.644076492537314e-05, "loss": 0.0, "step": 30524 }, { "epoch": 28.48, "learning_rate": 4.644029850746269e-05, "loss": 0.0001, "step": 30528 }, { "epoch": 28.48, "learning_rate": 4.643983208955224e-05, "loss": 0.0025, "step": 30532 }, { "epoch": 28.49, "learning_rate": 4.643936567164179e-05, "loss": 0.0004, "step": 30536 }, { "epoch": 28.49, "learning_rate": 4.6438899253731346e-05, "loss": 0.0, "step": 30540 }, { "epoch": 28.49, "learning_rate": 4.64384328358209e-05, "loss": 0.0002, "step": 30544 }, { "epoch": 28.5, "learning_rate": 4.643796641791045e-05, "loss": 0.0001, "step": 30548 }, { "epoch": 28.5, "learning_rate": 4.64375e-05, "loss": 0.0001, "step": 30552 }, { "epoch": 28.5, "learning_rate": 4.643703358208956e-05, "loss": 0.0002, "step": 30556 }, { "epoch": 28.51, "learning_rate": 4.643656716417911e-05, "loss": 0.0001, "step": 30560 }, { "epoch": 28.51, "learning_rate": 4.6436100746268655e-05, "loss": 0.0009, "step": 30564 }, { "epoch": 28.51, "learning_rate": 4.643563432835821e-05, "loss": 0.0066, "step": 30568 }, { "epoch": 28.52, "learning_rate": 4.6435167910447765e-05, "loss": 0.0007, "step": 30572 }, { "epoch": 28.52, "learning_rate": 4.643470149253731e-05, "loss": 0.0004, "step": 30576 }, { "epoch": 28.53, "learning_rate": 4.643423507462687e-05, "loss": 0.0007, "step": 30580 }, { "epoch": 28.53, "learning_rate": 4.643376865671642e-05, "loss": 0.0012, "step": 30584 }, { "epoch": 28.53, "learning_rate": 4.643330223880597e-05, "loss": 0.0001, "step": 30588 }, { "epoch": 28.54, "learning_rate": 4.6432835820895526e-05, "loss": 0.0001, "step": 30592 }, { "epoch": 28.54, "learning_rate": 4.6432369402985074e-05, "loss": 0.004, "step": 30596 }, { "epoch": 28.54, "learning_rate": 4.643190298507463e-05, "loss": 0.0001, "step": 30600 }, { "epoch": 28.55, "learning_rate": 4.6431436567164184e-05, "loss": 0.0005, "step": 30604 }, { "epoch": 28.55, "learning_rate": 4.643097014925373e-05, "loss": 0.0011, "step": 30608 }, { "epoch": 28.56, "learning_rate": 4.643050373134328e-05, "loss": 0.0001, "step": 30612 }, { "epoch": 28.56, "learning_rate": 4.643003731343284e-05, "loss": 0.0005, "step": 30616 }, { "epoch": 28.56, "learning_rate": 4.642957089552239e-05, "loss": 0.0004, "step": 30620 }, { "epoch": 28.57, "learning_rate": 4.642910447761194e-05, "loss": 0.0, "step": 30624 }, { "epoch": 28.57, "learning_rate": 4.642863805970149e-05, "loss": 0.0001, "step": 30628 }, { "epoch": 28.57, "learning_rate": 4.642817164179105e-05, "loss": 0.0033, "step": 30632 }, { "epoch": 28.58, "learning_rate": 4.6427705223880596e-05, "loss": 0.0002, "step": 30636 }, { "epoch": 28.58, "learning_rate": 4.642723880597015e-05, "loss": 0.0046, "step": 30640 }, { "epoch": 28.59, "learning_rate": 4.6426772388059706e-05, "loss": 0.0022, "step": 30644 }, { "epoch": 28.59, "learning_rate": 4.642630597014926e-05, "loss": 0.0011, "step": 30648 }, { "epoch": 28.59, "learning_rate": 4.642583955223881e-05, "loss": 0.0047, "step": 30652 }, { "epoch": 28.6, "learning_rate": 4.642537313432836e-05, "loss": 0.0001, "step": 30656 }, { "epoch": 28.6, "learning_rate": 4.642490671641791e-05, "loss": 0.0002, "step": 30660 }, { "epoch": 28.6, "learning_rate": 4.642444029850747e-05, "loss": 0.0005, "step": 30664 }, { "epoch": 28.61, "learning_rate": 4.6423973880597015e-05, "loss": 0.0007, "step": 30668 }, { "epoch": 28.61, "learning_rate": 4.642350746268657e-05, "loss": 0.0027, "step": 30672 }, { "epoch": 28.62, "learning_rate": 4.6423041044776125e-05, "loss": 0.0001, "step": 30676 }, { "epoch": 28.62, "learning_rate": 4.642257462686567e-05, "loss": 0.0003, "step": 30680 }, { "epoch": 28.62, "learning_rate": 4.642210820895523e-05, "loss": 0.0053, "step": 30684 }, { "epoch": 28.63, "learning_rate": 4.6421641791044776e-05, "loss": 0.0001, "step": 30688 }, { "epoch": 28.63, "learning_rate": 4.642117537313433e-05, "loss": 0.0002, "step": 30692 }, { "epoch": 28.63, "learning_rate": 4.6420708955223886e-05, "loss": 0.0011, "step": 30696 }, { "epoch": 28.64, "learning_rate": 4.6420242537313434e-05, "loss": 0.0009, "step": 30700 }, { "epoch": 28.64, "learning_rate": 4.641977611940299e-05, "loss": 0.0007, "step": 30704 }, { "epoch": 28.65, "learning_rate": 4.6419309701492544e-05, "loss": 0.0011, "step": 30708 }, { "epoch": 28.65, "learning_rate": 4.641884328358209e-05, "loss": 0.0003, "step": 30712 }, { "epoch": 28.65, "learning_rate": 4.641837686567164e-05, "loss": 0.0016, "step": 30716 }, { "epoch": 28.66, "learning_rate": 4.6417910447761195e-05, "loss": 0.0002, "step": 30720 }, { "epoch": 28.66, "learning_rate": 4.641744402985075e-05, "loss": 0.0001, "step": 30724 }, { "epoch": 28.66, "learning_rate": 4.64169776119403e-05, "loss": 0.0003, "step": 30728 }, { "epoch": 28.67, "learning_rate": 4.641651119402985e-05, "loss": 0.0002, "step": 30732 }, { "epoch": 28.67, "learning_rate": 4.641604477611941e-05, "loss": 0.0003, "step": 30736 }, { "epoch": 28.68, "learning_rate": 4.6415578358208956e-05, "loss": 0.0001, "step": 30740 }, { "epoch": 28.68, "learning_rate": 4.641511194029851e-05, "loss": 0.0001, "step": 30744 }, { "epoch": 28.68, "learning_rate": 4.641464552238806e-05, "loss": 0.0, "step": 30748 }, { "epoch": 28.69, "learning_rate": 4.6414179104477614e-05, "loss": 0.0014, "step": 30752 }, { "epoch": 28.69, "learning_rate": 4.641371268656717e-05, "loss": 0.0002, "step": 30756 }, { "epoch": 28.69, "learning_rate": 4.641324626865672e-05, "loss": 0.0, "step": 30760 }, { "epoch": 28.7, "learning_rate": 4.641277985074627e-05, "loss": 0.0006, "step": 30764 }, { "epoch": 28.7, "learning_rate": 4.6412313432835827e-05, "loss": 0.0004, "step": 30768 }, { "epoch": 28.71, "learning_rate": 4.6411847014925375e-05, "loss": 0.0, "step": 30772 }, { "epoch": 28.71, "learning_rate": 4.641138059701492e-05, "loss": 0.0, "step": 30776 }, { "epoch": 28.71, "learning_rate": 4.641091417910448e-05, "loss": 0.0002, "step": 30780 }, { "epoch": 28.72, "learning_rate": 4.641044776119403e-05, "loss": 0.0029, "step": 30784 }, { "epoch": 28.72, "learning_rate": 4.640998134328358e-05, "loss": 0.0002, "step": 30788 }, { "epoch": 28.72, "learning_rate": 4.6409514925373136e-05, "loss": 0.0094, "step": 30792 }, { "epoch": 28.73, "learning_rate": 4.640904850746269e-05, "loss": 0.0011, "step": 30796 }, { "epoch": 28.73, "learning_rate": 4.640858208955224e-05, "loss": 0.0027, "step": 30800 }, { "epoch": 28.73, "learning_rate": 4.6408115671641794e-05, "loss": 0.0009, "step": 30804 }, { "epoch": 28.74, "learning_rate": 4.640764925373134e-05, "loss": 0.0007, "step": 30808 }, { "epoch": 28.74, "learning_rate": 4.6407182835820903e-05, "loss": 0.0001, "step": 30812 }, { "epoch": 28.75, "learning_rate": 4.640671641791045e-05, "loss": 0.0018, "step": 30816 }, { "epoch": 28.75, "learning_rate": 4.640625e-05, "loss": 0.0005, "step": 30820 }, { "epoch": 28.75, "learning_rate": 4.6405783582089555e-05, "loss": 0.0003, "step": 30824 }, { "epoch": 28.76, "learning_rate": 4.640531716417911e-05, "loss": 0.0003, "step": 30828 }, { "epoch": 28.76, "learning_rate": 4.640485074626866e-05, "loss": 0.0011, "step": 30832 }, { "epoch": 28.76, "learning_rate": 4.640438432835821e-05, "loss": 0.0047, "step": 30836 }, { "epoch": 28.77, "learning_rate": 4.640391791044776e-05, "loss": 0.0002, "step": 30840 }, { "epoch": 28.77, "learning_rate": 4.6403451492537316e-05, "loss": 0.0002, "step": 30844 }, { "epoch": 28.78, "learning_rate": 4.640298507462687e-05, "loss": 0.0, "step": 30848 }, { "epoch": 28.78, "learning_rate": 4.640251865671642e-05, "loss": 0.0, "step": 30852 }, { "epoch": 28.78, "learning_rate": 4.6402052238805974e-05, "loss": 0.0, "step": 30856 }, { "epoch": 28.79, "learning_rate": 4.640158582089553e-05, "loss": 0.0002, "step": 30860 }, { "epoch": 28.79, "learning_rate": 4.640111940298508e-05, "loss": 0.0031, "step": 30864 }, { "epoch": 28.79, "learning_rate": 4.6400652985074625e-05, "loss": 0.0001, "step": 30868 }, { "epoch": 28.8, "learning_rate": 4.6400186567164186e-05, "loss": 0.0001, "step": 30872 }, { "epoch": 28.8, "learning_rate": 4.6399720149253735e-05, "loss": 0.0007, "step": 30876 }, { "epoch": 28.81, "learning_rate": 4.639925373134328e-05, "loss": 0.0001, "step": 30880 }, { "epoch": 28.81, "learning_rate": 4.639878731343284e-05, "loss": 0.0004, "step": 30884 }, { "epoch": 28.81, "learning_rate": 4.639832089552239e-05, "loss": 0.0003, "step": 30888 }, { "epoch": 28.82, "learning_rate": 4.639785447761194e-05, "loss": 0.0042, "step": 30892 }, { "epoch": 28.82, "learning_rate": 4.6397388059701496e-05, "loss": 0.0001, "step": 30896 }, { "epoch": 28.82, "learning_rate": 4.6396921641791044e-05, "loss": 0.0, "step": 30900 }, { "epoch": 28.83, "learning_rate": 4.63964552238806e-05, "loss": 0.0001, "step": 30904 }, { "epoch": 28.83, "learning_rate": 4.6395988805970154e-05, "loss": 0.0, "step": 30908 }, { "epoch": 28.84, "learning_rate": 4.63955223880597e-05, "loss": 0.0007, "step": 30912 }, { "epoch": 28.84, "learning_rate": 4.6395055970149257e-05, "loss": 0.0009, "step": 30916 }, { "epoch": 28.84, "learning_rate": 4.639458955223881e-05, "loss": 0.0066, "step": 30920 }, { "epoch": 28.85, "learning_rate": 4.639412313432836e-05, "loss": 0.0005, "step": 30924 }, { "epoch": 28.85, "learning_rate": 4.639365671641791e-05, "loss": 0.0012, "step": 30928 }, { "epoch": 28.85, "learning_rate": 4.639319029850747e-05, "loss": 0.0002, "step": 30932 }, { "epoch": 28.86, "learning_rate": 4.639272388059702e-05, "loss": 0.0, "step": 30936 }, { "epoch": 28.86, "learning_rate": 4.6392257462686566e-05, "loss": 0.0003, "step": 30940 }, { "epoch": 28.87, "learning_rate": 4.639179104477612e-05, "loss": 0.0007, "step": 30944 }, { "epoch": 28.87, "learning_rate": 4.6391324626865675e-05, "loss": 0.0001, "step": 30948 }, { "epoch": 28.87, "learning_rate": 4.6390858208955224e-05, "loss": 0.0001, "step": 30952 }, { "epoch": 28.88, "learning_rate": 4.639039179104478e-05, "loss": 0.0002, "step": 30956 }, { "epoch": 28.88, "learning_rate": 4.638992537313433e-05, "loss": 0.0007, "step": 30960 }, { "epoch": 28.88, "learning_rate": 4.638945895522388e-05, "loss": 0.0002, "step": 30964 }, { "epoch": 28.89, "learning_rate": 4.6388992537313436e-05, "loss": 0.0039, "step": 30968 }, { "epoch": 28.89, "learning_rate": 4.6388526119402985e-05, "loss": 0.0, "step": 30972 }, { "epoch": 28.9, "learning_rate": 4.638805970149254e-05, "loss": 0.0006, "step": 30976 }, { "epoch": 28.9, "learning_rate": 4.6387593283582094e-05, "loss": 0.0, "step": 30980 }, { "epoch": 28.9, "learning_rate": 4.638712686567164e-05, "loss": 0.0018, "step": 30984 }, { "epoch": 28.91, "learning_rate": 4.63866604477612e-05, "loss": 0.0024, "step": 30988 }, { "epoch": 28.91, "learning_rate": 4.638619402985075e-05, "loss": 0.0023, "step": 30992 }, { "epoch": 28.91, "learning_rate": 4.63857276119403e-05, "loss": 0.001, "step": 30996 }, { "epoch": 28.92, "learning_rate": 4.6385261194029855e-05, "loss": 0.0003, "step": 31000 }, { "epoch": 28.92, "eval_exact_match": 0.7263056092843327, "eval_exec": 0.7649903288201161, "eval_loss": 0.41026368737220764, "eval_runtime": 1150.5617, "eval_samples_per_second": 0.899, "step": 31000 }, { "epoch": 28.92, "learning_rate": 4.6384794776119404e-05, "loss": 0.0009, "step": 31004 }, { "epoch": 28.93, "learning_rate": 4.638432835820896e-05, "loss": 0.0017, "step": 31008 }, { "epoch": 28.93, "learning_rate": 4.638386194029851e-05, "loss": 0.0015, "step": 31012 }, { "epoch": 28.93, "learning_rate": 4.638339552238806e-05, "loss": 0.0, "step": 31016 }, { "epoch": 28.94, "learning_rate": 4.638292910447761e-05, "loss": 0.0016, "step": 31020 }, { "epoch": 28.94, "learning_rate": 4.638246268656717e-05, "loss": 0.0142, "step": 31024 }, { "epoch": 28.94, "learning_rate": 4.638199626865672e-05, "loss": 0.0027, "step": 31028 }, { "epoch": 28.95, "learning_rate": 4.638152985074627e-05, "loss": 0.0, "step": 31032 }, { "epoch": 28.95, "learning_rate": 4.638106343283582e-05, "loss": 0.0025, "step": 31036 }, { "epoch": 28.96, "learning_rate": 4.638059701492538e-05, "loss": 0.0004, "step": 31040 }, { "epoch": 28.96, "learning_rate": 4.6380130597014925e-05, "loss": 0.0007, "step": 31044 }, { "epoch": 28.96, "learning_rate": 4.637966417910448e-05, "loss": 0.0001, "step": 31048 }, { "epoch": 28.97, "learning_rate": 4.6379197761194035e-05, "loss": 0.0002, "step": 31052 }, { "epoch": 28.97, "learning_rate": 4.6378731343283583e-05, "loss": 0.0045, "step": 31056 }, { "epoch": 28.97, "learning_rate": 4.637826492537314e-05, "loss": 0.0011, "step": 31060 }, { "epoch": 28.98, "learning_rate": 4.6377798507462686e-05, "loss": 0.0001, "step": 31064 }, { "epoch": 28.98, "learning_rate": 4.637733208955224e-05, "loss": 0.0005, "step": 31068 }, { "epoch": 28.98, "learning_rate": 4.6376865671641796e-05, "loss": 0.0009, "step": 31072 }, { "epoch": 28.99, "learning_rate": 4.6376399253731344e-05, "loss": 0.0003, "step": 31076 }, { "epoch": 28.99, "learning_rate": 4.637593283582089e-05, "loss": 0.0017, "step": 31080 }, { "epoch": 29.0, "learning_rate": 4.6375466417910454e-05, "loss": 0.0004, "step": 31084 }, { "epoch": 29.0, "learning_rate": 4.6375e-05, "loss": 0.0047, "step": 31088 }, { "epoch": 29.0, "learning_rate": 4.637453358208955e-05, "loss": 0.0004, "step": 31092 }, { "epoch": 29.01, "learning_rate": 4.6374067164179105e-05, "loss": 0.0001, "step": 31096 }, { "epoch": 29.01, "learning_rate": 4.637360074626866e-05, "loss": 0.0001, "step": 31100 }, { "epoch": 29.01, "learning_rate": 4.637313432835821e-05, "loss": 0.0003, "step": 31104 }, { "epoch": 29.02, "learning_rate": 4.637266791044776e-05, "loss": 0.0004, "step": 31108 }, { "epoch": 29.02, "learning_rate": 4.637220149253732e-05, "loss": 0.0036, "step": 31112 }, { "epoch": 29.03, "learning_rate": 4.6371735074626866e-05, "loss": 0.0001, "step": 31116 }, { "epoch": 29.03, "learning_rate": 4.637126865671642e-05, "loss": 0.0001, "step": 31120 }, { "epoch": 29.03, "learning_rate": 4.637080223880597e-05, "loss": 0.0019, "step": 31124 }, { "epoch": 29.04, "learning_rate": 4.6370335820895524e-05, "loss": 0.0003, "step": 31128 }, { "epoch": 29.04, "learning_rate": 4.636986940298508e-05, "loss": 0.0018, "step": 31132 }, { "epoch": 29.04, "learning_rate": 4.636940298507463e-05, "loss": 0.0001, "step": 31136 }, { "epoch": 29.05, "learning_rate": 4.636893656716418e-05, "loss": 0.0005, "step": 31140 }, { "epoch": 29.05, "learning_rate": 4.636847014925374e-05, "loss": 0.0001, "step": 31144 }, { "epoch": 29.06, "learning_rate": 4.6368003731343285e-05, "loss": 0.0001, "step": 31148 }, { "epoch": 29.06, "learning_rate": 4.636753731343284e-05, "loss": 0.0, "step": 31152 }, { "epoch": 29.06, "learning_rate": 4.636707089552239e-05, "loss": 0.0, "step": 31156 }, { "epoch": 29.07, "learning_rate": 4.636660447761194e-05, "loss": 0.0001, "step": 31160 }, { "epoch": 29.07, "learning_rate": 4.63661380597015e-05, "loss": 0.0001, "step": 31164 }, { "epoch": 29.07, "learning_rate": 4.6365671641791046e-05, "loss": 0.0, "step": 31168 }, { "epoch": 29.08, "learning_rate": 4.63652052238806e-05, "loss": 0.0006, "step": 31172 }, { "epoch": 29.08, "learning_rate": 4.6364738805970156e-05, "loss": 0.0001, "step": 31176 }, { "epoch": 29.09, "learning_rate": 4.6364272388059704e-05, "loss": 0.0007, "step": 31180 }, { "epoch": 29.09, "learning_rate": 4.636380597014925e-05, "loss": 0.0053, "step": 31184 }, { "epoch": 29.09, "learning_rate": 4.636333955223881e-05, "loss": 0.0003, "step": 31188 }, { "epoch": 29.1, "learning_rate": 4.636287313432836e-05, "loss": 0.0001, "step": 31192 }, { "epoch": 29.1, "learning_rate": 4.636240671641791e-05, "loss": 0.0003, "step": 31196 }, { "epoch": 29.1, "learning_rate": 4.6361940298507465e-05, "loss": 0.0001, "step": 31200 }, { "epoch": 29.11, "learning_rate": 4.636147388059702e-05, "loss": 0.0007, "step": 31204 }, { "epoch": 29.11, "learning_rate": 4.636100746268657e-05, "loss": 0.0002, "step": 31208 }, { "epoch": 29.12, "learning_rate": 4.636054104477612e-05, "loss": 0.0013, "step": 31212 }, { "epoch": 29.12, "learning_rate": 4.636007462686567e-05, "loss": 0.0014, "step": 31216 }, { "epoch": 29.12, "learning_rate": 4.6359608208955226e-05, "loss": 0.0, "step": 31220 }, { "epoch": 29.13, "learning_rate": 4.635914179104478e-05, "loss": 0.0011, "step": 31224 }, { "epoch": 29.13, "learning_rate": 4.635867537313433e-05, "loss": 0.0001, "step": 31228 }, { "epoch": 29.13, "learning_rate": 4.635820895522388e-05, "loss": 0.0006, "step": 31232 }, { "epoch": 29.14, "learning_rate": 4.635774253731344e-05, "loss": 0.0071, "step": 31236 }, { "epoch": 29.14, "learning_rate": 4.635727611940299e-05, "loss": 0.0001, "step": 31240 }, { "epoch": 29.15, "learning_rate": 4.6356809701492535e-05, "loss": 0.0003, "step": 31244 }, { "epoch": 29.15, "learning_rate": 4.635634328358209e-05, "loss": 0.0029, "step": 31248 }, { "epoch": 29.15, "learning_rate": 4.6355876865671645e-05, "loss": 0.0001, "step": 31252 }, { "epoch": 29.16, "learning_rate": 4.635541044776119e-05, "loss": 0.0001, "step": 31256 }, { "epoch": 29.16, "learning_rate": 4.635494402985075e-05, "loss": 0.0004, "step": 31260 }, { "epoch": 29.16, "learning_rate": 4.63544776119403e-05, "loss": 0.0, "step": 31264 }, { "epoch": 29.17, "learning_rate": 4.635401119402985e-05, "loss": 0.0007, "step": 31268 }, { "epoch": 29.17, "learning_rate": 4.6353544776119406e-05, "loss": 0.0059, "step": 31272 }, { "epoch": 29.18, "learning_rate": 4.6353078358208954e-05, "loss": 0.0023, "step": 31276 }, { "epoch": 29.18, "learning_rate": 4.635261194029851e-05, "loss": 0.0004, "step": 31280 }, { "epoch": 29.18, "learning_rate": 4.6352145522388064e-05, "loss": 0.0004, "step": 31284 }, { "epoch": 29.19, "learning_rate": 4.635167910447761e-05, "loss": 0.0006, "step": 31288 }, { "epoch": 29.19, "learning_rate": 4.635121268656716e-05, "loss": 0.0001, "step": 31292 }, { "epoch": 29.19, "learning_rate": 4.635074626865672e-05, "loss": 0.0001, "step": 31296 }, { "epoch": 29.2, "learning_rate": 4.635027985074627e-05, "loss": 0.0003, "step": 31300 }, { "epoch": 29.2, "learning_rate": 4.6349813432835825e-05, "loss": 0.0, "step": 31304 }, { "epoch": 29.21, "learning_rate": 4.634934701492537e-05, "loss": 0.0007, "step": 31308 }, { "epoch": 29.21, "learning_rate": 4.634888059701493e-05, "loss": 0.0015, "step": 31312 }, { "epoch": 29.21, "learning_rate": 4.634841417910448e-05, "loss": 0.0005, "step": 31316 }, { "epoch": 29.22, "learning_rate": 4.634794776119403e-05, "loss": 0.0006, "step": 31320 }, { "epoch": 29.22, "learning_rate": 4.6347481343283586e-05, "loss": 0.0005, "step": 31324 }, { "epoch": 29.22, "learning_rate": 4.634701492537314e-05, "loss": 0.0001, "step": 31328 }, { "epoch": 29.23, "learning_rate": 4.634654850746269e-05, "loss": 0.0018, "step": 31332 }, { "epoch": 29.23, "learning_rate": 4.634608208955224e-05, "loss": 0.0002, "step": 31336 }, { "epoch": 29.24, "learning_rate": 4.634561567164179e-05, "loss": 0.0, "step": 31340 }, { "epoch": 29.24, "learning_rate": 4.634514925373135e-05, "loss": 0.0001, "step": 31344 }, { "epoch": 29.24, "learning_rate": 4.6344682835820895e-05, "loss": 0.0, "step": 31348 }, { "epoch": 29.25, "learning_rate": 4.634421641791045e-05, "loss": 0.0, "step": 31352 }, { "epoch": 29.25, "learning_rate": 4.6343750000000005e-05, "loss": 0.0006, "step": 31356 }, { "epoch": 29.25, "learning_rate": 4.634328358208955e-05, "loss": 0.0002, "step": 31360 }, { "epoch": 29.26, "learning_rate": 4.634281716417911e-05, "loss": 0.0002, "step": 31364 }, { "epoch": 29.26, "learning_rate": 4.6342350746268656e-05, "loss": 0.0097, "step": 31368 }, { "epoch": 29.26, "learning_rate": 4.634188432835821e-05, "loss": 0.0, "step": 31372 }, { "epoch": 29.27, "learning_rate": 4.6341417910447766e-05, "loss": 0.0008, "step": 31376 }, { "epoch": 29.27, "learning_rate": 4.6340951492537314e-05, "loss": 0.0001, "step": 31380 }, { "epoch": 29.28, "learning_rate": 4.634048507462687e-05, "loss": 0.0015, "step": 31384 }, { "epoch": 29.28, "learning_rate": 4.6340018656716424e-05, "loss": 0.0018, "step": 31388 }, { "epoch": 29.28, "learning_rate": 4.633955223880597e-05, "loss": 0.0008, "step": 31392 }, { "epoch": 29.29, "learning_rate": 4.633908582089552e-05, "loss": 0.0001, "step": 31396 }, { "epoch": 29.29, "learning_rate": 4.6338619402985075e-05, "loss": 0.0008, "step": 31400 }, { "epoch": 29.29, "learning_rate": 4.633815298507463e-05, "loss": 0.0, "step": 31404 }, { "epoch": 29.3, "learning_rate": 4.633768656716418e-05, "loss": 0.0063, "step": 31408 }, { "epoch": 29.3, "learning_rate": 4.633722014925373e-05, "loss": 0.0001, "step": 31412 }, { "epoch": 29.31, "learning_rate": 4.633675373134329e-05, "loss": 0.0002, "step": 31416 }, { "epoch": 29.31, "learning_rate": 4.6336287313432836e-05, "loss": 0.0, "step": 31420 }, { "epoch": 29.31, "learning_rate": 4.633582089552239e-05, "loss": 0.0003, "step": 31424 }, { "epoch": 29.32, "learning_rate": 4.633535447761194e-05, "loss": 0.0007, "step": 31428 }, { "epoch": 29.32, "learning_rate": 4.6334888059701494e-05, "loss": 0.0002, "step": 31432 }, { "epoch": 29.32, "learning_rate": 4.633442164179105e-05, "loss": 0.0011, "step": 31436 }, { "epoch": 29.33, "learning_rate": 4.63339552238806e-05, "loss": 0.0022, "step": 31440 }, { "epoch": 29.33, "learning_rate": 4.633348880597015e-05, "loss": 0.0, "step": 31444 }, { "epoch": 29.34, "learning_rate": 4.633302238805971e-05, "loss": 0.0002, "step": 31448 }, { "epoch": 29.34, "learning_rate": 4.6332555970149255e-05, "loss": 0.0003, "step": 31452 }, { "epoch": 29.34, "learning_rate": 4.63320895522388e-05, "loss": 0.0002, "step": 31456 }, { "epoch": 29.35, "learning_rate": 4.633162313432836e-05, "loss": 0.0002, "step": 31460 }, { "epoch": 29.35, "learning_rate": 4.633115671641791e-05, "loss": 0.0003, "step": 31464 }, { "epoch": 29.35, "learning_rate": 4.633069029850747e-05, "loss": 0.0003, "step": 31468 }, { "epoch": 29.36, "learning_rate": 4.6330223880597016e-05, "loss": 0.0035, "step": 31472 }, { "epoch": 29.36, "learning_rate": 4.632975746268657e-05, "loss": 0.0011, "step": 31476 }, { "epoch": 29.37, "learning_rate": 4.6329291044776126e-05, "loss": 0.0001, "step": 31480 }, { "epoch": 29.37, "learning_rate": 4.6328824626865674e-05, "loss": 0.0002, "step": 31484 }, { "epoch": 29.37, "learning_rate": 4.632835820895522e-05, "loss": 0.0005, "step": 31488 }, { "epoch": 29.38, "learning_rate": 4.6327891791044784e-05, "loss": 0.0004, "step": 31492 }, { "epoch": 29.38, "learning_rate": 4.632742537313433e-05, "loss": 0.0008, "step": 31496 }, { "epoch": 29.38, "learning_rate": 4.632695895522388e-05, "loss": 0.0002, "step": 31500 }, { "epoch": 29.38, "eval_exact_match": 0.741779497098646, "eval_exec": 0.7746615087040619, "eval_loss": 0.42285799980163574, "eval_runtime": 1077.7296, "eval_samples_per_second": 0.959, "step": 31500 }, { "epoch": 29.39, "learning_rate": 4.6326492537313435e-05, "loss": 0.0001, "step": 31504 }, { "epoch": 29.39, "learning_rate": 4.632602611940299e-05, "loss": 0.0, "step": 31508 }, { "epoch": 29.4, "learning_rate": 4.632555970149254e-05, "loss": 0.0004, "step": 31512 }, { "epoch": 29.4, "learning_rate": 4.632509328358209e-05, "loss": 0.0, "step": 31516 }, { "epoch": 29.4, "learning_rate": 4.632462686567164e-05, "loss": 0.0014, "step": 31520 }, { "epoch": 29.41, "learning_rate": 4.6324160447761196e-05, "loss": 0.037, "step": 31524 }, { "epoch": 29.41, "learning_rate": 4.632369402985075e-05, "loss": 0.004, "step": 31528 }, { "epoch": 29.41, "learning_rate": 4.63232276119403e-05, "loss": 0.0, "step": 31532 }, { "epoch": 29.42, "learning_rate": 4.6322761194029854e-05, "loss": 0.0001, "step": 31536 }, { "epoch": 29.42, "learning_rate": 4.632229477611941e-05, "loss": 0.0005, "step": 31540 }, { "epoch": 29.43, "learning_rate": 4.632182835820896e-05, "loss": 0.0022, "step": 31544 }, { "epoch": 29.43, "learning_rate": 4.6321361940298505e-05, "loss": 0.0001, "step": 31548 }, { "epoch": 29.43, "learning_rate": 4.632089552238807e-05, "loss": 0.001, "step": 31552 }, { "epoch": 29.44, "learning_rate": 4.6320429104477615e-05, "loss": 0.001, "step": 31556 }, { "epoch": 29.44, "learning_rate": 4.631996268656716e-05, "loss": 0.0, "step": 31560 }, { "epoch": 29.44, "learning_rate": 4.631949626865672e-05, "loss": 0.0012, "step": 31564 }, { "epoch": 29.45, "learning_rate": 4.631902985074627e-05, "loss": 0.0011, "step": 31568 }, { "epoch": 29.45, "learning_rate": 4.631856343283582e-05, "loss": 0.0005, "step": 31572 }, { "epoch": 29.46, "learning_rate": 4.6318097014925376e-05, "loss": 0.0018, "step": 31576 }, { "epoch": 29.46, "learning_rate": 4.6317630597014924e-05, "loss": 0.0, "step": 31580 }, { "epoch": 29.46, "learning_rate": 4.631716417910448e-05, "loss": 0.0001, "step": 31584 }, { "epoch": 29.47, "learning_rate": 4.6316697761194034e-05, "loss": 0.002, "step": 31588 }, { "epoch": 29.47, "learning_rate": 4.631623134328358e-05, "loss": 0.0004, "step": 31592 }, { "epoch": 29.47, "learning_rate": 4.631576492537314e-05, "loss": 0.0172, "step": 31596 }, { "epoch": 29.48, "learning_rate": 4.631529850746269e-05, "loss": 0.0001, "step": 31600 }, { "epoch": 29.48, "learning_rate": 4.631483208955224e-05, "loss": 0.0021, "step": 31604 }, { "epoch": 29.49, "learning_rate": 4.631436567164179e-05, "loss": 0.0003, "step": 31608 }, { "epoch": 29.49, "learning_rate": 4.631389925373135e-05, "loss": 0.0005, "step": 31612 }, { "epoch": 29.49, "learning_rate": 4.63134328358209e-05, "loss": 0.0004, "step": 31616 }, { "epoch": 29.5, "learning_rate": 4.6312966417910446e-05, "loss": 0.0002, "step": 31620 }, { "epoch": 29.5, "learning_rate": 4.63125e-05, "loss": 0.0, "step": 31624 }, { "epoch": 29.5, "learning_rate": 4.6312033582089556e-05, "loss": 0.0007, "step": 31628 }, { "epoch": 29.51, "learning_rate": 4.631156716417911e-05, "loss": 0.0001, "step": 31632 }, { "epoch": 29.51, "learning_rate": 4.631110074626866e-05, "loss": 0.0015, "step": 31636 }, { "epoch": 29.51, "learning_rate": 4.631063432835821e-05, "loss": 0.0001, "step": 31640 }, { "epoch": 29.52, "learning_rate": 4.631016791044777e-05, "loss": 0.0019, "step": 31644 }, { "epoch": 29.52, "learning_rate": 4.630970149253732e-05, "loss": 0.0003, "step": 31648 }, { "epoch": 29.53, "learning_rate": 4.6309235074626865e-05, "loss": 0.0, "step": 31652 }, { "epoch": 29.53, "learning_rate": 4.630876865671642e-05, "loss": 0.0235, "step": 31656 }, { "epoch": 29.53, "learning_rate": 4.6308302238805975e-05, "loss": 0.0001, "step": 31660 }, { "epoch": 29.54, "learning_rate": 4.630783582089552e-05, "loss": 0.0015, "step": 31664 }, { "epoch": 29.54, "learning_rate": 4.630736940298508e-05, "loss": 0.0002, "step": 31668 }, { "epoch": 29.54, "learning_rate": 4.630690298507463e-05, "loss": 0.0001, "step": 31672 }, { "epoch": 29.55, "learning_rate": 4.630643656716418e-05, "loss": 0.0002, "step": 31676 }, { "epoch": 29.55, "learning_rate": 4.6305970149253736e-05, "loss": 0.0, "step": 31680 }, { "epoch": 29.56, "learning_rate": 4.6305503731343284e-05, "loss": 0.0001, "step": 31684 }, { "epoch": 29.56, "learning_rate": 4.630503731343284e-05, "loss": 0.0003, "step": 31688 }, { "epoch": 29.56, "learning_rate": 4.6304570895522394e-05, "loss": 0.0, "step": 31692 }, { "epoch": 29.57, "learning_rate": 4.630410447761194e-05, "loss": 0.0001, "step": 31696 }, { "epoch": 29.57, "learning_rate": 4.630363805970149e-05, "loss": 0.0002, "step": 31700 }, { "epoch": 29.57, "learning_rate": 4.630317164179105e-05, "loss": 0.0009, "step": 31704 }, { "epoch": 29.58, "learning_rate": 4.63027052238806e-05, "loss": 0.0001, "step": 31708 }, { "epoch": 29.58, "learning_rate": 4.630223880597015e-05, "loss": 0.0001, "step": 31712 }, { "epoch": 29.59, "learning_rate": 4.63017723880597e-05, "loss": 0.0, "step": 31716 }, { "epoch": 29.59, "learning_rate": 4.630130597014926e-05, "loss": 0.0001, "step": 31720 }, { "epoch": 29.59, "learning_rate": 4.6300839552238806e-05, "loss": 0.0023, "step": 31724 }, { "epoch": 29.6, "learning_rate": 4.630037313432836e-05, "loss": 0.001, "step": 31728 }, { "epoch": 29.6, "learning_rate": 4.6299906716417916e-05, "loss": 0.0001, "step": 31732 }, { "epoch": 29.6, "learning_rate": 4.6299440298507464e-05, "loss": 0.0034, "step": 31736 }, { "epoch": 29.61, "learning_rate": 4.629897388059702e-05, "loss": 0.0007, "step": 31740 }, { "epoch": 29.61, "learning_rate": 4.629850746268657e-05, "loss": 0.0002, "step": 31744 }, { "epoch": 29.62, "learning_rate": 4.629804104477612e-05, "loss": 0.0001, "step": 31748 }, { "epoch": 29.62, "learning_rate": 4.6297574626865677e-05, "loss": 0.0007, "step": 31752 }, { "epoch": 29.62, "learning_rate": 4.6297108208955225e-05, "loss": 0.0001, "step": 31756 }, { "epoch": 29.63, "learning_rate": 4.629664179104477e-05, "loss": 0.0028, "step": 31760 }, { "epoch": 29.63, "learning_rate": 4.6296175373134335e-05, "loss": 0.0001, "step": 31764 }, { "epoch": 29.63, "learning_rate": 4.629570895522388e-05, "loss": 0.0111, "step": 31768 }, { "epoch": 29.64, "learning_rate": 4.629524253731343e-05, "loss": 0.0001, "step": 31772 }, { "epoch": 29.64, "learning_rate": 4.6294776119402986e-05, "loss": 0.0001, "step": 31776 }, { "epoch": 29.65, "learning_rate": 4.629430970149254e-05, "loss": 0.0013, "step": 31780 }, { "epoch": 29.65, "learning_rate": 4.629384328358209e-05, "loss": 0.0004, "step": 31784 }, { "epoch": 29.65, "learning_rate": 4.6293376865671644e-05, "loss": 0.0011, "step": 31788 }, { "epoch": 29.66, "learning_rate": 4.62929104477612e-05, "loss": 0.0001, "step": 31792 }, { "epoch": 29.66, "learning_rate": 4.6292444029850753e-05, "loss": 0.0015, "step": 31796 }, { "epoch": 29.66, "learning_rate": 4.62919776119403e-05, "loss": 0.001, "step": 31800 }, { "epoch": 29.67, "learning_rate": 4.629151119402985e-05, "loss": 0.0008, "step": 31804 }, { "epoch": 29.67, "learning_rate": 4.6291044776119405e-05, "loss": 0.0002, "step": 31808 }, { "epoch": 29.68, "learning_rate": 4.629057835820896e-05, "loss": 0.0002, "step": 31812 }, { "epoch": 29.68, "learning_rate": 4.629011194029851e-05, "loss": 0.0117, "step": 31816 }, { "epoch": 29.68, "learning_rate": 4.628964552238806e-05, "loss": 0.0005, "step": 31820 }, { "epoch": 29.69, "learning_rate": 4.628917910447762e-05, "loss": 0.0025, "step": 31824 }, { "epoch": 29.69, "learning_rate": 4.6288712686567166e-05, "loss": 0.0005, "step": 31828 }, { "epoch": 29.69, "learning_rate": 4.628824626865672e-05, "loss": 0.003, "step": 31832 }, { "epoch": 29.7, "learning_rate": 4.628777985074627e-05, "loss": 0.0, "step": 31836 }, { "epoch": 29.7, "learning_rate": 4.6287313432835824e-05, "loss": 0.0002, "step": 31840 }, { "epoch": 29.71, "learning_rate": 4.628684701492538e-05, "loss": 0.0016, "step": 31844 }, { "epoch": 29.71, "learning_rate": 4.6286380597014927e-05, "loss": 0.0002, "step": 31848 }, { "epoch": 29.71, "learning_rate": 4.628591417910448e-05, "loss": 0.0002, "step": 31852 }, { "epoch": 29.72, "learning_rate": 4.6285447761194036e-05, "loss": 0.0002, "step": 31856 }, { "epoch": 29.72, "learning_rate": 4.6284981343283585e-05, "loss": 0.0003, "step": 31860 }, { "epoch": 29.72, "learning_rate": 4.628451492537313e-05, "loss": 0.002, "step": 31864 }, { "epoch": 29.73, "learning_rate": 4.628404850746269e-05, "loss": 0.0025, "step": 31868 }, { "epoch": 29.73, "learning_rate": 4.628358208955224e-05, "loss": 0.0001, "step": 31872 }, { "epoch": 29.73, "learning_rate": 4.628311567164179e-05, "loss": 0.0001, "step": 31876 }, { "epoch": 29.74, "learning_rate": 4.6282649253731346e-05, "loss": 0.0001, "step": 31880 }, { "epoch": 29.74, "learning_rate": 4.62821828358209e-05, "loss": 0.0002, "step": 31884 }, { "epoch": 29.75, "learning_rate": 4.628171641791045e-05, "loss": 0.0001, "step": 31888 }, { "epoch": 29.75, "learning_rate": 4.6281250000000003e-05, "loss": 0.0004, "step": 31892 }, { "epoch": 29.75, "learning_rate": 4.628078358208955e-05, "loss": 0.0013, "step": 31896 }, { "epoch": 29.76, "learning_rate": 4.6280317164179107e-05, "loss": 0.0084, "step": 31900 }, { "epoch": 29.76, "learning_rate": 4.627985074626866e-05, "loss": 0.002, "step": 31904 }, { "epoch": 29.76, "learning_rate": 4.627938432835821e-05, "loss": 0.0001, "step": 31908 }, { "epoch": 29.77, "learning_rate": 4.627891791044776e-05, "loss": 0.0001, "step": 31912 }, { "epoch": 29.77, "learning_rate": 4.627845149253732e-05, "loss": 0.0007, "step": 31916 }, { "epoch": 29.78, "learning_rate": 4.627798507462687e-05, "loss": 0.0002, "step": 31920 }, { "epoch": 29.78, "learning_rate": 4.6277518656716416e-05, "loss": 0.0034, "step": 31924 }, { "epoch": 29.78, "learning_rate": 4.627705223880597e-05, "loss": 0.0002, "step": 31928 }, { "epoch": 29.79, "learning_rate": 4.6276585820895525e-05, "loss": 0.0013, "step": 31932 }, { "epoch": 29.79, "learning_rate": 4.6276119402985074e-05, "loss": 0.0002, "step": 31936 }, { "epoch": 29.79, "learning_rate": 4.627565298507463e-05, "loss": 0.0, "step": 31940 }, { "epoch": 29.8, "learning_rate": 4.6275186567164183e-05, "loss": 0.0003, "step": 31944 }, { "epoch": 29.8, "learning_rate": 4.627472014925374e-05, "loss": 0.0001, "step": 31948 }, { "epoch": 29.81, "learning_rate": 4.6274253731343286e-05, "loss": 0.0002, "step": 31952 }, { "epoch": 29.81, "learning_rate": 4.6273787313432835e-05, "loss": 0.0003, "step": 31956 }, { "epoch": 29.81, "learning_rate": 4.6273320895522396e-05, "loss": 0.001, "step": 31960 }, { "epoch": 29.82, "learning_rate": 4.6272854477611944e-05, "loss": 0.0, "step": 31964 }, { "epoch": 29.82, "learning_rate": 4.627238805970149e-05, "loss": 0.0015, "step": 31968 }, { "epoch": 29.82, "learning_rate": 4.627192164179105e-05, "loss": 0.001, "step": 31972 }, { "epoch": 29.83, "learning_rate": 4.62714552238806e-05, "loss": 0.0001, "step": 31976 }, { "epoch": 29.83, "learning_rate": 4.627098880597015e-05, "loss": 0.0011, "step": 31980 }, { "epoch": 29.84, "learning_rate": 4.6270522388059705e-05, "loss": 0.0006, "step": 31984 }, { "epoch": 29.84, "learning_rate": 4.6270055970149254e-05, "loss": 0.0002, "step": 31988 }, { "epoch": 29.84, "learning_rate": 4.626958955223881e-05, "loss": 0.0014, "step": 31992 }, { "epoch": 29.85, "learning_rate": 4.626912313432836e-05, "loss": 0.0007, "step": 31996 }, { "epoch": 29.85, "learning_rate": 4.626865671641791e-05, "loss": 0.0037, "step": 32000 }, { "epoch": 29.85, "eval_exact_match": 0.7437137330754352, "eval_exec": 0.7717601547388782, "eval_loss": 0.40884315967559814, "eval_runtime": 1094.761, "eval_samples_per_second": 0.944, "step": 32000 }, { "epoch": 29.85, "learning_rate": 4.6268190298507466e-05, "loss": 0.0, "step": 32004 }, { "epoch": 29.86, "learning_rate": 4.626772388059702e-05, "loss": 0.0149, "step": 32008 }, { "epoch": 29.86, "learning_rate": 4.626725746268657e-05, "loss": 0.0004, "step": 32012 }, { "epoch": 29.87, "learning_rate": 4.626679104477612e-05, "loss": 0.0004, "step": 32016 }, { "epoch": 29.87, "learning_rate": 4.626632462686567e-05, "loss": 0.0001, "step": 32020 }, { "epoch": 29.87, "learning_rate": 4.626585820895523e-05, "loss": 0.0001, "step": 32024 }, { "epoch": 29.88, "learning_rate": 4.6265391791044775e-05, "loss": 0.0, "step": 32028 }, { "epoch": 29.88, "learning_rate": 4.626492537313433e-05, "loss": 0.0046, "step": 32032 }, { "epoch": 29.88, "learning_rate": 4.6264458955223885e-05, "loss": 0.0023, "step": 32036 }, { "epoch": 29.89, "learning_rate": 4.6263992537313433e-05, "loss": 0.0013, "step": 32040 }, { "epoch": 29.89, "learning_rate": 4.626352611940299e-05, "loss": 0.0003, "step": 32044 }, { "epoch": 29.9, "learning_rate": 4.6263059701492536e-05, "loss": 0.0, "step": 32048 }, { "epoch": 29.9, "learning_rate": 4.626259328358209e-05, "loss": 0.0001, "step": 32052 }, { "epoch": 29.9, "learning_rate": 4.6262126865671646e-05, "loss": 0.0001, "step": 32056 }, { "epoch": 29.91, "learning_rate": 4.6261660447761194e-05, "loss": 0.0001, "step": 32060 }, { "epoch": 29.91, "learning_rate": 4.626119402985075e-05, "loss": 0.0023, "step": 32064 }, { "epoch": 29.91, "learning_rate": 4.6260727611940304e-05, "loss": 0.0001, "step": 32068 }, { "epoch": 29.92, "learning_rate": 4.626026119402985e-05, "loss": 0.0004, "step": 32072 }, { "epoch": 29.92, "learning_rate": 4.62597947761194e-05, "loss": 0.0009, "step": 32076 }, { "epoch": 29.93, "learning_rate": 4.6259328358208955e-05, "loss": 0.0062, "step": 32080 }, { "epoch": 29.93, "learning_rate": 4.625886194029851e-05, "loss": 0.002, "step": 32084 }, { "epoch": 29.93, "learning_rate": 4.625839552238806e-05, "loss": 0.0001, "step": 32088 }, { "epoch": 29.94, "learning_rate": 4.625792910447761e-05, "loss": 0.0055, "step": 32092 }, { "epoch": 29.94, "learning_rate": 4.625746268656717e-05, "loss": 0.0002, "step": 32096 }, { "epoch": 29.94, "learning_rate": 4.6256996268656716e-05, "loss": 0.0003, "step": 32100 }, { "epoch": 29.95, "learning_rate": 4.625652985074627e-05, "loss": 0.0006, "step": 32104 }, { "epoch": 29.95, "learning_rate": 4.625606343283582e-05, "loss": 0.0005, "step": 32108 }, { "epoch": 29.96, "learning_rate": 4.625559701492538e-05, "loss": 0.0001, "step": 32112 }, { "epoch": 29.96, "learning_rate": 4.625513059701493e-05, "loss": 0.0001, "step": 32116 }, { "epoch": 29.96, "learning_rate": 4.625466417910448e-05, "loss": 0.0003, "step": 32120 }, { "epoch": 29.97, "learning_rate": 4.625419776119403e-05, "loss": 0.0014, "step": 32124 }, { "epoch": 29.97, "learning_rate": 4.625373134328359e-05, "loss": 0.0, "step": 32128 }, { "epoch": 29.97, "learning_rate": 4.6253264925373135e-05, "loss": 0.0021, "step": 32132 }, { "epoch": 29.98, "learning_rate": 4.625279850746269e-05, "loss": 0.0006, "step": 32136 }, { "epoch": 29.98, "learning_rate": 4.625233208955224e-05, "loss": 0.0003, "step": 32140 }, { "epoch": 29.98, "learning_rate": 4.625186567164179e-05, "loss": 0.0011, "step": 32144 }, { "epoch": 29.99, "learning_rate": 4.625139925373135e-05, "loss": 0.0005, "step": 32148 }, { "epoch": 29.99, "learning_rate": 4.6250932835820896e-05, "loss": 0.0001, "step": 32152 }, { "epoch": 30.0, "learning_rate": 4.625046641791045e-05, "loss": 0.0002, "step": 32156 }, { "epoch": 30.0, "learning_rate": 4.6250000000000006e-05, "loss": 0.0002, "step": 32160 }, { "epoch": 30.0, "learning_rate": 4.6249533582089554e-05, "loss": 0.0002, "step": 32164 }, { "epoch": 30.01, "learning_rate": 4.62490671641791e-05, "loss": 0.0005, "step": 32168 }, { "epoch": 30.01, "learning_rate": 4.6248600746268664e-05, "loss": 0.0006, "step": 32172 }, { "epoch": 30.01, "learning_rate": 4.624813432835821e-05, "loss": 0.0003, "step": 32176 }, { "epoch": 30.02, "learning_rate": 4.624766791044776e-05, "loss": 0.0139, "step": 32180 }, { "epoch": 30.02, "learning_rate": 4.6247201492537315e-05, "loss": 0.0009, "step": 32184 }, { "epoch": 30.03, "learning_rate": 4.624673507462687e-05, "loss": 0.0002, "step": 32188 }, { "epoch": 30.03, "learning_rate": 4.624626865671642e-05, "loss": 0.0001, "step": 32192 }, { "epoch": 30.03, "learning_rate": 4.624580223880597e-05, "loss": 0.0001, "step": 32196 }, { "epoch": 30.04, "learning_rate": 4.624533582089552e-05, "loss": 0.0001, "step": 32200 }, { "epoch": 30.04, "learning_rate": 4.6244869402985076e-05, "loss": 0.0008, "step": 32204 }, { "epoch": 30.04, "learning_rate": 4.624440298507463e-05, "loss": 0.0071, "step": 32208 }, { "epoch": 30.05, "learning_rate": 4.624393656716418e-05, "loss": 0.0001, "step": 32212 }, { "epoch": 30.05, "learning_rate": 4.6243470149253734e-05, "loss": 0.0001, "step": 32216 }, { "epoch": 30.06, "learning_rate": 4.624300373134329e-05, "loss": 0.0002, "step": 32220 }, { "epoch": 30.06, "learning_rate": 4.624253731343284e-05, "loss": 0.0, "step": 32224 }, { "epoch": 30.06, "learning_rate": 4.6242070895522385e-05, "loss": 0.0001, "step": 32228 }, { "epoch": 30.07, "learning_rate": 4.624160447761195e-05, "loss": 0.0008, "step": 32232 }, { "epoch": 30.07, "learning_rate": 4.6241138059701495e-05, "loss": 0.0003, "step": 32236 }, { "epoch": 30.07, "learning_rate": 4.624067164179104e-05, "loss": 0.0002, "step": 32240 }, { "epoch": 30.08, "learning_rate": 4.62402052238806e-05, "loss": 0.0013, "step": 32244 }, { "epoch": 30.08, "learning_rate": 4.623973880597015e-05, "loss": 0.0013, "step": 32248 }, { "epoch": 30.09, "learning_rate": 4.62392723880597e-05, "loss": 0.0003, "step": 32252 }, { "epoch": 30.09, "learning_rate": 4.6238805970149256e-05, "loss": 0.0002, "step": 32256 }, { "epoch": 30.09, "learning_rate": 4.6238339552238804e-05, "loss": 0.0004, "step": 32260 }, { "epoch": 30.1, "learning_rate": 4.623787313432836e-05, "loss": 0.0005, "step": 32264 }, { "epoch": 30.1, "learning_rate": 4.6237406716417914e-05, "loss": 0.0001, "step": 32268 }, { "epoch": 30.1, "learning_rate": 4.623694029850746e-05, "loss": 0.0003, "step": 32272 }, { "epoch": 30.11, "learning_rate": 4.623647388059702e-05, "loss": 0.0003, "step": 32276 }, { "epoch": 30.11, "learning_rate": 4.623600746268657e-05, "loss": 0.0002, "step": 32280 }, { "epoch": 30.12, "learning_rate": 4.623554104477612e-05, "loss": 0.0001, "step": 32284 }, { "epoch": 30.12, "learning_rate": 4.6235074626865675e-05, "loss": 0.0, "step": 32288 }, { "epoch": 30.12, "learning_rate": 4.623460820895523e-05, "loss": 0.0004, "step": 32292 }, { "epoch": 30.13, "learning_rate": 4.623414179104478e-05, "loss": 0.0, "step": 32296 }, { "epoch": 30.13, "learning_rate": 4.623367537313433e-05, "loss": 0.0006, "step": 32300 }, { "epoch": 30.13, "learning_rate": 4.623320895522388e-05, "loss": 0.001, "step": 32304 }, { "epoch": 30.14, "learning_rate": 4.6232742537313436e-05, "loss": 0.005, "step": 32308 }, { "epoch": 30.14, "learning_rate": 4.623227611940299e-05, "loss": 0.0001, "step": 32312 }, { "epoch": 30.15, "learning_rate": 4.623180970149254e-05, "loss": 0.001, "step": 32316 }, { "epoch": 30.15, "learning_rate": 4.623134328358209e-05, "loss": 0.0, "step": 32320 }, { "epoch": 30.15, "learning_rate": 4.623087686567165e-05, "loss": 0.0001, "step": 32324 }, { "epoch": 30.16, "learning_rate": 4.62304104477612e-05, "loss": 0.0008, "step": 32328 }, { "epoch": 30.16, "learning_rate": 4.6229944029850745e-05, "loss": 0.0002, "step": 32332 }, { "epoch": 30.16, "learning_rate": 4.62294776119403e-05, "loss": 0.0001, "step": 32336 }, { "epoch": 30.17, "learning_rate": 4.6229011194029855e-05, "loss": 0.0008, "step": 32340 }, { "epoch": 30.17, "learning_rate": 4.62285447761194e-05, "loss": 0.003, "step": 32344 }, { "epoch": 30.18, "learning_rate": 4.622807835820896e-05, "loss": 0.0002, "step": 32348 }, { "epoch": 30.18, "learning_rate": 4.622761194029851e-05, "loss": 0.0005, "step": 32352 }, { "epoch": 30.18, "learning_rate": 4.622714552238806e-05, "loss": 0.0007, "step": 32356 }, { "epoch": 30.19, "learning_rate": 4.6226679104477616e-05, "loss": 0.0001, "step": 32360 }, { "epoch": 30.19, "learning_rate": 4.6226212686567164e-05, "loss": 0.0001, "step": 32364 }, { "epoch": 30.19, "learning_rate": 4.622574626865672e-05, "loss": 0.0001, "step": 32368 }, { "epoch": 30.2, "learning_rate": 4.6225279850746274e-05, "loss": 0.0004, "step": 32372 }, { "epoch": 30.2, "learning_rate": 4.622481343283582e-05, "loss": 0.0002, "step": 32376 }, { "epoch": 30.21, "learning_rate": 4.622434701492537e-05, "loss": 0.0, "step": 32380 }, { "epoch": 30.21, "learning_rate": 4.622388059701493e-05, "loss": 0.0017, "step": 32384 }, { "epoch": 30.21, "learning_rate": 4.622341417910448e-05, "loss": 0.0007, "step": 32388 }, { "epoch": 30.22, "learning_rate": 4.622294776119403e-05, "loss": 0.0014, "step": 32392 }, { "epoch": 30.22, "learning_rate": 4.622248134328358e-05, "loss": 0.0003, "step": 32396 }, { "epoch": 30.22, "learning_rate": 4.622201492537314e-05, "loss": 0.0131, "step": 32400 }, { "epoch": 30.23, "learning_rate": 4.6221548507462686e-05, "loss": 0.0001, "step": 32404 }, { "epoch": 30.23, "learning_rate": 4.622108208955224e-05, "loss": 0.0023, "step": 32408 }, { "epoch": 30.24, "learning_rate": 4.6220615671641796e-05, "loss": 0.0002, "step": 32412 }, { "epoch": 30.24, "learning_rate": 4.6220149253731344e-05, "loss": 0.0004, "step": 32416 }, { "epoch": 30.24, "learning_rate": 4.62196828358209e-05, "loss": 0.0001, "step": 32420 }, { "epoch": 30.25, "learning_rate": 4.621921641791045e-05, "loss": 0.001, "step": 32424 }, { "epoch": 30.25, "learning_rate": 4.621875e-05, "loss": 0.0001, "step": 32428 }, { "epoch": 30.25, "learning_rate": 4.621828358208956e-05, "loss": 0.0006, "step": 32432 }, { "epoch": 30.26, "learning_rate": 4.6217817164179105e-05, "loss": 0.0003, "step": 32436 }, { "epoch": 30.26, "learning_rate": 4.621735074626866e-05, "loss": 0.0002, "step": 32440 }, { "epoch": 30.26, "learning_rate": 4.6216884328358215e-05, "loss": 0.0003, "step": 32444 }, { "epoch": 30.27, "learning_rate": 4.621641791044776e-05, "loss": 0.0001, "step": 32448 }, { "epoch": 30.27, "learning_rate": 4.621595149253732e-05, "loss": 0.0006, "step": 32452 }, { "epoch": 30.28, "learning_rate": 4.6215485074626866e-05, "loss": 0.0, "step": 32456 }, { "epoch": 30.28, "learning_rate": 4.621501865671642e-05, "loss": 0.0002, "step": 32460 }, { "epoch": 30.28, "learning_rate": 4.6214552238805976e-05, "loss": 0.0003, "step": 32464 }, { "epoch": 30.29, "learning_rate": 4.6214085820895524e-05, "loss": 0.0016, "step": 32468 }, { "epoch": 30.29, "learning_rate": 4.621361940298508e-05, "loss": 0.0013, "step": 32472 }, { "epoch": 30.29, "learning_rate": 4.6213152985074634e-05, "loss": 0.0, "step": 32476 }, { "epoch": 30.3, "learning_rate": 4.621268656716418e-05, "loss": 0.0, "step": 32480 }, { "epoch": 30.3, "learning_rate": 4.621222014925373e-05, "loss": 0.0001, "step": 32484 }, { "epoch": 30.31, "learning_rate": 4.6211753731343285e-05, "loss": 0.0008, "step": 32488 }, { "epoch": 30.31, "learning_rate": 4.621128731343284e-05, "loss": 0.0003, "step": 32492 }, { "epoch": 30.31, "learning_rate": 4.621082089552239e-05, "loss": 0.0001, "step": 32496 }, { "epoch": 30.32, "learning_rate": 4.621035447761194e-05, "loss": 0.0, "step": 32500 }, { "epoch": 30.32, "eval_exact_match": 0.7388781431334622, "eval_exec": 0.7678916827852998, "eval_loss": 0.4083205461502075, "eval_runtime": 1184.6901, "eval_samples_per_second": 0.873, "step": 32500 }, { "epoch": 30.32, "learning_rate": 4.62098880597015e-05, "loss": 0.0, "step": 32504 }, { "epoch": 30.32, "learning_rate": 4.6209421641791046e-05, "loss": 0.0046, "step": 32508 }, { "epoch": 30.33, "learning_rate": 4.62089552238806e-05, "loss": 0.0001, "step": 32512 }, { "epoch": 30.33, "learning_rate": 4.620848880597015e-05, "loss": 0.0001, "step": 32516 }, { "epoch": 30.34, "learning_rate": 4.6208022388059704e-05, "loss": 0.0014, "step": 32520 }, { "epoch": 30.34, "learning_rate": 4.620755597014926e-05, "loss": 0.0004, "step": 32524 }, { "epoch": 30.34, "learning_rate": 4.620708955223881e-05, "loss": 0.0001, "step": 32528 }, { "epoch": 30.35, "learning_rate": 4.620662313432836e-05, "loss": 0.0028, "step": 32532 }, { "epoch": 30.35, "learning_rate": 4.620615671641792e-05, "loss": 0.0, "step": 32536 }, { "epoch": 30.35, "learning_rate": 4.6205690298507465e-05, "loss": 0.0001, "step": 32540 }, { "epoch": 30.36, "learning_rate": 4.620522388059701e-05, "loss": 0.0002, "step": 32544 }, { "epoch": 30.36, "learning_rate": 4.620475746268657e-05, "loss": 0.0001, "step": 32548 }, { "epoch": 30.37, "learning_rate": 4.620429104477612e-05, "loss": 0.001, "step": 32552 }, { "epoch": 30.37, "learning_rate": 4.620382462686567e-05, "loss": 0.0002, "step": 32556 }, { "epoch": 30.37, "learning_rate": 4.6203358208955226e-05, "loss": 0.0167, "step": 32560 }, { "epoch": 30.38, "learning_rate": 4.620289179104478e-05, "loss": 0.0051, "step": 32564 }, { "epoch": 30.38, "learning_rate": 4.620242537313433e-05, "loss": 0.0002, "step": 32568 }, { "epoch": 30.38, "learning_rate": 4.6201958955223884e-05, "loss": 0.0, "step": 32572 }, { "epoch": 30.39, "learning_rate": 4.620149253731343e-05, "loss": 0.0027, "step": 32576 }, { "epoch": 30.39, "learning_rate": 4.620102611940299e-05, "loss": 0.007, "step": 32580 }, { "epoch": 30.4, "learning_rate": 4.620055970149254e-05, "loss": 0.0001, "step": 32584 }, { "epoch": 30.4, "learning_rate": 4.620009328358209e-05, "loss": 0.0037, "step": 32588 }, { "epoch": 30.4, "learning_rate": 4.619962686567164e-05, "loss": 0.0001, "step": 32592 }, { "epoch": 30.41, "learning_rate": 4.61991604477612e-05, "loss": 0.0018, "step": 32596 }, { "epoch": 30.41, "learning_rate": 4.619869402985075e-05, "loss": 0.0007, "step": 32600 }, { "epoch": 30.41, "learning_rate": 4.61982276119403e-05, "loss": 0.0001, "step": 32604 }, { "epoch": 30.42, "learning_rate": 4.619776119402985e-05, "loss": 0.0002, "step": 32608 }, { "epoch": 30.42, "learning_rate": 4.6197294776119406e-05, "loss": 0.0001, "step": 32612 }, { "epoch": 30.43, "learning_rate": 4.619682835820896e-05, "loss": 0.0016, "step": 32616 }, { "epoch": 30.43, "learning_rate": 4.619636194029851e-05, "loss": 0.0, "step": 32620 }, { "epoch": 30.43, "learning_rate": 4.6195895522388064e-05, "loss": 0.0002, "step": 32624 }, { "epoch": 30.44, "learning_rate": 4.619542910447762e-05, "loss": 0.0003, "step": 32628 }, { "epoch": 30.44, "learning_rate": 4.619496268656717e-05, "loss": 0.0002, "step": 32632 }, { "epoch": 30.44, "learning_rate": 4.6194496268656715e-05, "loss": 0.0001, "step": 32636 }, { "epoch": 30.45, "learning_rate": 4.6194029850746277e-05, "loss": 0.0002, "step": 32640 }, { "epoch": 30.45, "learning_rate": 4.6193563432835825e-05, "loss": 0.0001, "step": 32644 }, { "epoch": 30.46, "learning_rate": 4.619309701492537e-05, "loss": 0.0003, "step": 32648 }, { "epoch": 30.46, "learning_rate": 4.619263059701493e-05, "loss": 0.0014, "step": 32652 }, { "epoch": 30.46, "learning_rate": 4.619216417910448e-05, "loss": 0.0, "step": 32656 }, { "epoch": 30.47, "learning_rate": 4.619169776119403e-05, "loss": 0.0085, "step": 32660 }, { "epoch": 30.47, "learning_rate": 4.6191231343283586e-05, "loss": 0.0002, "step": 32664 }, { "epoch": 30.47, "learning_rate": 4.6190764925373134e-05, "loss": 0.0008, "step": 32668 }, { "epoch": 30.48, "learning_rate": 4.619029850746269e-05, "loss": 0.0007, "step": 32672 }, { "epoch": 30.48, "learning_rate": 4.6189832089552244e-05, "loss": 0.0025, "step": 32676 }, { "epoch": 30.49, "learning_rate": 4.618936567164179e-05, "loss": 0.0002, "step": 32680 }, { "epoch": 30.49, "learning_rate": 4.618889925373135e-05, "loss": 0.0009, "step": 32684 }, { "epoch": 30.49, "learning_rate": 4.61884328358209e-05, "loss": 0.0041, "step": 32688 }, { "epoch": 30.5, "learning_rate": 4.618796641791045e-05, "loss": 0.0002, "step": 32692 }, { "epoch": 30.5, "learning_rate": 4.61875e-05, "loss": 0.0012, "step": 32696 }, { "epoch": 30.5, "learning_rate": 4.618703358208955e-05, "loss": 0.0005, "step": 32700 }, { "epoch": 30.51, "learning_rate": 4.618656716417911e-05, "loss": 0.0004, "step": 32704 }, { "epoch": 30.51, "learning_rate": 4.6186100746268656e-05, "loss": 0.0019, "step": 32708 }, { "epoch": 30.51, "learning_rate": 4.618563432835821e-05, "loss": 0.001, "step": 32712 }, { "epoch": 30.52, "learning_rate": 4.6185167910447766e-05, "loss": 0.0, "step": 32716 }, { "epoch": 30.52, "learning_rate": 4.6184701492537314e-05, "loss": 0.001, "step": 32720 }, { "epoch": 30.53, "learning_rate": 4.618423507462687e-05, "loss": 0.0001, "step": 32724 }, { "epoch": 30.53, "learning_rate": 4.618376865671642e-05, "loss": 0.0002, "step": 32728 }, { "epoch": 30.53, "learning_rate": 4.618330223880597e-05, "loss": 0.0001, "step": 32732 }, { "epoch": 30.54, "learning_rate": 4.6182835820895527e-05, "loss": 0.0002, "step": 32736 }, { "epoch": 30.54, "learning_rate": 4.6182369402985075e-05, "loss": 0.0002, "step": 32740 }, { "epoch": 30.54, "learning_rate": 4.618190298507463e-05, "loss": 0.0, "step": 32744 }, { "epoch": 30.55, "learning_rate": 4.6181436567164185e-05, "loss": 0.0012, "step": 32748 }, { "epoch": 30.55, "learning_rate": 4.618097014925373e-05, "loss": 0.0002, "step": 32752 }, { "epoch": 30.56, "learning_rate": 4.618050373134328e-05, "loss": 0.0014, "step": 32756 }, { "epoch": 30.56, "learning_rate": 4.6180037313432836e-05, "loss": 0.0005, "step": 32760 }, { "epoch": 30.56, "learning_rate": 4.617957089552239e-05, "loss": 0.0001, "step": 32764 }, { "epoch": 30.57, "learning_rate": 4.6179104477611945e-05, "loss": 0.0012, "step": 32768 }, { "epoch": 30.57, "learning_rate": 4.6178638059701494e-05, "loss": 0.0006, "step": 32772 }, { "epoch": 30.57, "learning_rate": 4.617817164179105e-05, "loss": 0.0002, "step": 32776 }, { "epoch": 30.58, "learning_rate": 4.6177705223880603e-05, "loss": 0.0023, "step": 32780 }, { "epoch": 30.58, "learning_rate": 4.617723880597015e-05, "loss": 0.0011, "step": 32784 }, { "epoch": 30.59, "learning_rate": 4.61767723880597e-05, "loss": 0.0008, "step": 32788 }, { "epoch": 30.59, "learning_rate": 4.617630597014926e-05, "loss": 0.0011, "step": 32792 }, { "epoch": 30.59, "learning_rate": 4.617583955223881e-05, "loss": 0.0, "step": 32796 }, { "epoch": 30.6, "learning_rate": 4.617537313432836e-05, "loss": 0.0021, "step": 32800 }, { "epoch": 30.6, "learning_rate": 4.617490671641791e-05, "loss": 0.001, "step": 32804 }, { "epoch": 30.6, "learning_rate": 4.617444029850747e-05, "loss": 0.0003, "step": 32808 }, { "epoch": 30.61, "learning_rate": 4.6173973880597016e-05, "loss": 0.0001, "step": 32812 }, { "epoch": 30.61, "learning_rate": 4.617350746268657e-05, "loss": 0.0076, "step": 32816 }, { "epoch": 30.62, "learning_rate": 4.617304104477612e-05, "loss": 0.0001, "step": 32820 }, { "epoch": 30.62, "learning_rate": 4.6172574626865674e-05, "loss": 0.0002, "step": 32824 }, { "epoch": 30.62, "learning_rate": 4.617210820895523e-05, "loss": 0.0001, "step": 32828 }, { "epoch": 30.63, "learning_rate": 4.6171641791044777e-05, "loss": 0.002, "step": 32832 }, { "epoch": 30.63, "learning_rate": 4.617117537313433e-05, "loss": 0.0037, "step": 32836 }, { "epoch": 30.63, "learning_rate": 4.6170708955223886e-05, "loss": 0.0004, "step": 32840 }, { "epoch": 30.64, "learning_rate": 4.6170242537313435e-05, "loss": 0.001, "step": 32844 }, { "epoch": 30.64, "learning_rate": 4.616977611940298e-05, "loss": 0.0001, "step": 32848 }, { "epoch": 30.65, "learning_rate": 4.6169309701492544e-05, "loss": 0.0001, "step": 32852 }, { "epoch": 30.65, "learning_rate": 4.616884328358209e-05, "loss": 0.0, "step": 32856 }, { "epoch": 30.65, "learning_rate": 4.616837686567164e-05, "loss": 0.0011, "step": 32860 }, { "epoch": 30.66, "learning_rate": 4.6167910447761196e-05, "loss": 0.0023, "step": 32864 }, { "epoch": 30.66, "learning_rate": 4.616744402985075e-05, "loss": 0.0, "step": 32868 }, { "epoch": 30.66, "learning_rate": 4.61669776119403e-05, "loss": 0.0001, "step": 32872 }, { "epoch": 30.67, "learning_rate": 4.6166511194029853e-05, "loss": 0.0013, "step": 32876 }, { "epoch": 30.67, "learning_rate": 4.61660447761194e-05, "loss": 0.0, "step": 32880 }, { "epoch": 30.68, "learning_rate": 4.6165578358208957e-05, "loss": 0.0006, "step": 32884 }, { "epoch": 30.68, "learning_rate": 4.616511194029851e-05, "loss": 0.0001, "step": 32888 }, { "epoch": 30.68, "learning_rate": 4.616464552238806e-05, "loss": 0.0001, "step": 32892 }, { "epoch": 30.69, "learning_rate": 4.6164179104477614e-05, "loss": 0.0009, "step": 32896 }, { "epoch": 30.69, "learning_rate": 4.616371268656717e-05, "loss": 0.0, "step": 32900 }, { "epoch": 30.69, "learning_rate": 4.616324626865672e-05, "loss": 0.002, "step": 32904 }, { "epoch": 30.7, "learning_rate": 4.6162779850746266e-05, "loss": 0.0001, "step": 32908 }, { "epoch": 30.7, "learning_rate": 4.616231343283583e-05, "loss": 0.0, "step": 32912 }, { "epoch": 30.71, "learning_rate": 4.6161847014925375e-05, "loss": 0.0041, "step": 32916 }, { "epoch": 30.71, "learning_rate": 4.6161380597014924e-05, "loss": 0.0001, "step": 32920 }, { "epoch": 30.71, "learning_rate": 4.616091417910448e-05, "loss": 0.0002, "step": 32924 }, { "epoch": 30.72, "learning_rate": 4.616044776119403e-05, "loss": 0.0001, "step": 32928 }, { "epoch": 30.72, "learning_rate": 4.615998134328359e-05, "loss": 0.0005, "step": 32932 }, { "epoch": 30.72, "learning_rate": 4.6159514925373136e-05, "loss": 0.0006, "step": 32936 }, { "epoch": 30.73, "learning_rate": 4.6159048507462685e-05, "loss": 0.0002, "step": 32940 }, { "epoch": 30.73, "learning_rate": 4.6158582089552246e-05, "loss": 0.0004, "step": 32944 }, { "epoch": 30.73, "learning_rate": 4.6158115671641794e-05, "loss": 0.0068, "step": 32948 }, { "epoch": 30.74, "learning_rate": 4.615764925373134e-05, "loss": 0.0001, "step": 32952 }, { "epoch": 30.74, "learning_rate": 4.61571828358209e-05, "loss": 0.0001, "step": 32956 }, { "epoch": 30.75, "learning_rate": 4.615671641791045e-05, "loss": 0.0022, "step": 32960 }, { "epoch": 30.75, "learning_rate": 4.615625e-05, "loss": 0.0028, "step": 32964 }, { "epoch": 30.75, "learning_rate": 4.6155783582089555e-05, "loss": 0.0011, "step": 32968 }, { "epoch": 30.76, "learning_rate": 4.615531716417911e-05, "loss": 0.0004, "step": 32972 }, { "epoch": 30.76, "learning_rate": 4.615485074626866e-05, "loss": 0.0, "step": 32976 }, { "epoch": 30.76, "learning_rate": 4.615438432835821e-05, "loss": 0.0038, "step": 32980 }, { "epoch": 30.77, "learning_rate": 4.615391791044776e-05, "loss": 0.0007, "step": 32984 }, { "epoch": 30.77, "learning_rate": 4.6153451492537316e-05, "loss": 0.0, "step": 32988 }, { "epoch": 30.78, "learning_rate": 4.615298507462687e-05, "loss": 0.0004, "step": 32992 }, { "epoch": 30.78, "learning_rate": 4.615251865671642e-05, "loss": 0.0, "step": 32996 }, { "epoch": 30.78, "learning_rate": 4.615205223880597e-05, "loss": 0.0, "step": 33000 }, { "epoch": 30.78, "eval_exact_match": 0.7427466150870407, "eval_exec": 0.7707930367504836, "eval_loss": 0.3826291561126709, "eval_runtime": 1590.6112, "eval_samples_per_second": 0.65, "step": 33000 }, { "epoch": 30.79, "learning_rate": 4.615158582089553e-05, "loss": 0.0001, "step": 33004 }, { "epoch": 30.79, "learning_rate": 4.615111940298508e-05, "loss": 0.0, "step": 33008 }, { "epoch": 30.79, "learning_rate": 4.6150652985074625e-05, "loss": 0.0005, "step": 33012 }, { "epoch": 30.8, "learning_rate": 4.615018656716418e-05, "loss": 0.0007, "step": 33016 }, { "epoch": 30.8, "learning_rate": 4.6149720149253735e-05, "loss": 0.002, "step": 33020 }, { "epoch": 30.81, "learning_rate": 4.6149253731343283e-05, "loss": 0.0002, "step": 33024 }, { "epoch": 30.81, "learning_rate": 4.614878731343284e-05, "loss": 0.0003, "step": 33028 }, { "epoch": 30.81, "learning_rate": 4.614832089552239e-05, "loss": 0.0012, "step": 33032 }, { "epoch": 30.82, "learning_rate": 4.614785447761194e-05, "loss": 0.0001, "step": 33036 }, { "epoch": 30.82, "learning_rate": 4.6147388059701496e-05, "loss": 0.0003, "step": 33040 }, { "epoch": 30.82, "learning_rate": 4.6146921641791044e-05, "loss": 0.0001, "step": 33044 }, { "epoch": 30.83, "learning_rate": 4.61464552238806e-05, "loss": 0.0, "step": 33048 }, { "epoch": 30.83, "learning_rate": 4.6145988805970154e-05, "loss": 0.0001, "step": 33052 }, { "epoch": 30.84, "learning_rate": 4.61455223880597e-05, "loss": 0.0002, "step": 33056 }, { "epoch": 30.84, "learning_rate": 4.614505597014925e-05, "loss": 0.0001, "step": 33060 }, { "epoch": 30.84, "learning_rate": 4.614458955223881e-05, "loss": 0.0002, "step": 33064 }, { "epoch": 30.85, "learning_rate": 4.614412313432836e-05, "loss": 0.0001, "step": 33068 }, { "epoch": 30.85, "learning_rate": 4.614365671641791e-05, "loss": 0.001, "step": 33072 }, { "epoch": 30.85, "learning_rate": 4.614319029850746e-05, "loss": 0.0001, "step": 33076 }, { "epoch": 30.86, "learning_rate": 4.614272388059702e-05, "loss": 0.0031, "step": 33080 }, { "epoch": 30.86, "learning_rate": 4.6142257462686566e-05, "loss": 0.0001, "step": 33084 }, { "epoch": 30.87, "learning_rate": 4.614179104477612e-05, "loss": 0.001, "step": 33088 }, { "epoch": 30.87, "learning_rate": 4.6141324626865676e-05, "loss": 0.0007, "step": 33092 }, { "epoch": 30.87, "learning_rate": 4.614085820895523e-05, "loss": 0.0015, "step": 33096 }, { "epoch": 30.88, "learning_rate": 4.614039179104478e-05, "loss": 0.0001, "step": 33100 }, { "epoch": 30.88, "learning_rate": 4.613992537313433e-05, "loss": 0.0, "step": 33104 }, { "epoch": 30.88, "learning_rate": 4.613945895522388e-05, "loss": 0.0024, "step": 33108 }, { "epoch": 30.89, "learning_rate": 4.613899253731344e-05, "loss": 0.0012, "step": 33112 }, { "epoch": 30.89, "learning_rate": 4.6138526119402985e-05, "loss": 0.0003, "step": 33116 }, { "epoch": 30.9, "learning_rate": 4.613805970149254e-05, "loss": 0.0001, "step": 33120 }, { "epoch": 30.9, "learning_rate": 4.6137593283582095e-05, "loss": 0.0001, "step": 33124 }, { "epoch": 30.9, "learning_rate": 4.613712686567164e-05, "loss": 0.0004, "step": 33128 }, { "epoch": 30.91, "learning_rate": 4.61366604477612e-05, "loss": 0.0007, "step": 33132 }, { "epoch": 30.91, "learning_rate": 4.6136194029850746e-05, "loss": 0.0001, "step": 33136 }, { "epoch": 30.91, "learning_rate": 4.61357276119403e-05, "loss": 0.0002, "step": 33140 }, { "epoch": 30.92, "learning_rate": 4.6135261194029856e-05, "loss": 0.0002, "step": 33144 }, { "epoch": 30.92, "learning_rate": 4.6134794776119404e-05, "loss": 0.0, "step": 33148 }, { "epoch": 30.93, "learning_rate": 4.613432835820896e-05, "loss": 0.0001, "step": 33152 }, { "epoch": 30.93, "learning_rate": 4.6133861940298514e-05, "loss": 0.0001, "step": 33156 }, { "epoch": 30.93, "learning_rate": 4.613339552238806e-05, "loss": 0.0002, "step": 33160 }, { "epoch": 30.94, "learning_rate": 4.613292910447761e-05, "loss": 0.0013, "step": 33164 }, { "epoch": 30.94, "learning_rate": 4.6132462686567165e-05, "loss": 0.0011, "step": 33168 }, { "epoch": 30.94, "learning_rate": 4.613199626865672e-05, "loss": 0.0, "step": 33172 }, { "epoch": 30.95, "learning_rate": 4.613152985074627e-05, "loss": 0.0, "step": 33176 }, { "epoch": 30.95, "learning_rate": 4.613106343283582e-05, "loss": 0.0003, "step": 33180 }, { "epoch": 30.96, "learning_rate": 4.613059701492538e-05, "loss": 0.0002, "step": 33184 }, { "epoch": 30.96, "learning_rate": 4.6130130597014926e-05, "loss": 0.005, "step": 33188 }, { "epoch": 30.96, "learning_rate": 4.612966417910448e-05, "loss": 0.0004, "step": 33192 }, { "epoch": 30.97, "learning_rate": 4.612919776119403e-05, "loss": 0.0026, "step": 33196 }, { "epoch": 30.97, "learning_rate": 4.6128731343283584e-05, "loss": 0.0001, "step": 33200 }, { "epoch": 30.97, "learning_rate": 4.612826492537314e-05, "loss": 0.0016, "step": 33204 }, { "epoch": 30.98, "learning_rate": 4.612779850746269e-05, "loss": 0.0008, "step": 33208 }, { "epoch": 30.98, "learning_rate": 4.612733208955224e-05, "loss": 0.0031, "step": 33212 }, { "epoch": 30.98, "learning_rate": 4.61268656716418e-05, "loss": 0.0001, "step": 33216 }, { "epoch": 30.99, "learning_rate": 4.6126399253731345e-05, "loss": 0.0002, "step": 33220 }, { "epoch": 30.99, "learning_rate": 4.612593283582089e-05, "loss": 0.0014, "step": 33224 }, { "epoch": 31.0, "learning_rate": 4.612546641791045e-05, "loss": 0.0001, "step": 33228 }, { "epoch": 31.0, "learning_rate": 4.6125e-05, "loss": 0.003, "step": 33232 }, { "epoch": 31.0, "learning_rate": 4.612453358208955e-05, "loss": 0.0009, "step": 33236 }, { "epoch": 31.01, "learning_rate": 4.6124067164179106e-05, "loss": 0.001, "step": 33240 }, { "epoch": 31.01, "learning_rate": 4.612360074626866e-05, "loss": 0.003, "step": 33244 }, { "epoch": 31.01, "learning_rate": 4.612313432835821e-05, "loss": 0.0005, "step": 33248 }, { "epoch": 31.02, "learning_rate": 4.6122667910447764e-05, "loss": 0.0011, "step": 33252 }, { "epoch": 31.02, "learning_rate": 4.612220149253731e-05, "loss": 0.0022, "step": 33256 }, { "epoch": 31.03, "learning_rate": 4.6121735074626874e-05, "loss": 0.0002, "step": 33260 }, { "epoch": 31.03, "learning_rate": 4.612126865671642e-05, "loss": 0.0028, "step": 33264 }, { "epoch": 31.03, "learning_rate": 4.612080223880597e-05, "loss": 0.0013, "step": 33268 }, { "epoch": 31.04, "learning_rate": 4.6120335820895525e-05, "loss": 0.001, "step": 33272 }, { "epoch": 31.04, "learning_rate": 4.611986940298508e-05, "loss": 0.0004, "step": 33276 }, { "epoch": 31.04, "learning_rate": 4.611940298507463e-05, "loss": 0.0002, "step": 33280 }, { "epoch": 31.05, "learning_rate": 4.611893656716418e-05, "loss": 0.0001, "step": 33284 }, { "epoch": 31.05, "learning_rate": 4.611847014925373e-05, "loss": 0.005, "step": 33288 }, { "epoch": 31.06, "learning_rate": 4.6118003731343286e-05, "loss": 0.0001, "step": 33292 }, { "epoch": 31.06, "learning_rate": 4.611753731343284e-05, "loss": 0.0, "step": 33296 }, { "epoch": 31.06, "learning_rate": 4.611707089552239e-05, "loss": 0.0003, "step": 33300 }, { "epoch": 31.07, "learning_rate": 4.6116604477611944e-05, "loss": 0.0015, "step": 33304 }, { "epoch": 31.07, "learning_rate": 4.61161380597015e-05, "loss": 0.0002, "step": 33308 }, { "epoch": 31.07, "learning_rate": 4.611567164179105e-05, "loss": 0.0001, "step": 33312 }, { "epoch": 31.08, "learning_rate": 4.6115205223880595e-05, "loss": 0.0001, "step": 33316 }, { "epoch": 31.08, "learning_rate": 4.611473880597016e-05, "loss": 0.0, "step": 33320 }, { "epoch": 31.09, "learning_rate": 4.6114272388059705e-05, "loss": 0.0, "step": 33324 }, { "epoch": 31.09, "learning_rate": 4.611380597014925e-05, "loss": 0.0006, "step": 33328 }, { "epoch": 31.09, "learning_rate": 4.611333955223881e-05, "loss": 0.0002, "step": 33332 }, { "epoch": 31.1, "learning_rate": 4.611287313432836e-05, "loss": 0.0004, "step": 33336 }, { "epoch": 31.1, "learning_rate": 4.611240671641791e-05, "loss": 0.0, "step": 33340 }, { "epoch": 31.1, "learning_rate": 4.6111940298507466e-05, "loss": 0.0001, "step": 33344 }, { "epoch": 31.11, "learning_rate": 4.6111473880597014e-05, "loss": 0.0, "step": 33348 }, { "epoch": 31.11, "learning_rate": 4.611100746268657e-05, "loss": 0.0006, "step": 33352 }, { "epoch": 31.12, "learning_rate": 4.6110541044776124e-05, "loss": 0.0013, "step": 33356 }, { "epoch": 31.12, "learning_rate": 4.611007462686567e-05, "loss": 0.0001, "step": 33360 }, { "epoch": 31.12, "learning_rate": 4.610960820895523e-05, "loss": 0.0038, "step": 33364 }, { "epoch": 31.13, "learning_rate": 4.610914179104478e-05, "loss": 0.0016, "step": 33368 }, { "epoch": 31.13, "learning_rate": 4.610867537313433e-05, "loss": 0.0002, "step": 33372 }, { "epoch": 31.13, "learning_rate": 4.610820895522388e-05, "loss": 0.0001, "step": 33376 }, { "epoch": 31.14, "learning_rate": 4.610774253731344e-05, "loss": 0.0003, "step": 33380 }, { "epoch": 31.14, "learning_rate": 4.610727611940299e-05, "loss": 0.0107, "step": 33384 }, { "epoch": 31.15, "learning_rate": 4.6106809701492536e-05, "loss": 0.0001, "step": 33388 }, { "epoch": 31.15, "learning_rate": 4.610634328358209e-05, "loss": 0.0005, "step": 33392 }, { "epoch": 31.15, "learning_rate": 4.6105876865671646e-05, "loss": 0.0004, "step": 33396 }, { "epoch": 31.16, "learning_rate": 4.6105410447761194e-05, "loss": 0.0072, "step": 33400 }, { "epoch": 31.16, "learning_rate": 4.610494402985075e-05, "loss": 0.0, "step": 33404 }, { "epoch": 31.16, "learning_rate": 4.61044776119403e-05, "loss": 0.0003, "step": 33408 }, { "epoch": 31.17, "learning_rate": 4.610401119402985e-05, "loss": 0.0001, "step": 33412 }, { "epoch": 31.17, "learning_rate": 4.610354477611941e-05, "loss": 0.0004, "step": 33416 }, { "epoch": 31.18, "learning_rate": 4.6103078358208955e-05, "loss": 0.0, "step": 33420 }, { "epoch": 31.18, "learning_rate": 4.610261194029851e-05, "loss": 0.0001, "step": 33424 }, { "epoch": 31.18, "learning_rate": 4.6102145522388065e-05, "loss": 0.0001, "step": 33428 }, { "epoch": 31.19, "learning_rate": 4.610167910447761e-05, "loss": 0.0002, "step": 33432 }, { "epoch": 31.19, "learning_rate": 4.610121268656717e-05, "loss": 0.0032, "step": 33436 }, { "epoch": 31.19, "learning_rate": 4.6100746268656716e-05, "loss": 0.0002, "step": 33440 }, { "epoch": 31.2, "learning_rate": 4.610027985074627e-05, "loss": 0.0001, "step": 33444 }, { "epoch": 31.2, "learning_rate": 4.6099813432835826e-05, "loss": 0.0005, "step": 33448 }, { "epoch": 31.21, "learning_rate": 4.6099347014925374e-05, "loss": 0.0001, "step": 33452 }, { "epoch": 31.21, "learning_rate": 4.609888059701493e-05, "loss": 0.0001, "step": 33456 }, { "epoch": 31.21, "learning_rate": 4.6098414179104484e-05, "loss": 0.0, "step": 33460 }, { "epoch": 31.22, "learning_rate": 4.609794776119403e-05, "loss": 0.0001, "step": 33464 }, { "epoch": 31.22, "learning_rate": 4.609748134328358e-05, "loss": 0.0014, "step": 33468 }, { "epoch": 31.22, "learning_rate": 4.609701492537314e-05, "loss": 0.0008, "step": 33472 }, { "epoch": 31.23, "learning_rate": 4.609654850746269e-05, "loss": 0.0004, "step": 33476 }, { "epoch": 31.23, "learning_rate": 4.609608208955224e-05, "loss": 0.0002, "step": 33480 }, { "epoch": 31.24, "learning_rate": 4.609561567164179e-05, "loss": 0.0001, "step": 33484 }, { "epoch": 31.24, "learning_rate": 4.609514925373135e-05, "loss": 0.0001, "step": 33488 }, { "epoch": 31.24, "learning_rate": 4.6094682835820896e-05, "loss": 0.0036, "step": 33492 }, { "epoch": 31.25, "learning_rate": 4.609421641791045e-05, "loss": 0.0031, "step": 33496 }, { "epoch": 31.25, "learning_rate": 4.609375e-05, "loss": 0.005, "step": 33500 }, { "epoch": 31.25, "eval_exact_match": 0.7456479690522244, "eval_exec": 0.7659574468085106, "eval_loss": 0.4037356376647949, "eval_runtime": 1057.7845, "eval_samples_per_second": 0.978, "step": 33500 }, { "epoch": 31.25, "learning_rate": 4.6093283582089554e-05, "loss": 0.0006, "step": 33504 }, { "epoch": 31.26, "learning_rate": 4.609281716417911e-05, "loss": 0.0002, "step": 33508 }, { "epoch": 31.26, "learning_rate": 4.609235074626866e-05, "loss": 0.0019, "step": 33512 }, { "epoch": 31.26, "learning_rate": 4.609188432835821e-05, "loss": 0.0002, "step": 33516 }, { "epoch": 31.27, "learning_rate": 4.609141791044777e-05, "loss": 0.0003, "step": 33520 }, { "epoch": 31.27, "learning_rate": 4.6090951492537315e-05, "loss": 0.0, "step": 33524 }, { "epoch": 31.28, "learning_rate": 4.609048507462686e-05, "loss": 0.0033, "step": 33528 }, { "epoch": 31.28, "learning_rate": 4.6090018656716425e-05, "loss": 0.0, "step": 33532 }, { "epoch": 31.28, "learning_rate": 4.608955223880597e-05, "loss": 0.0001, "step": 33536 }, { "epoch": 31.29, "learning_rate": 4.608908582089552e-05, "loss": 0.0003, "step": 33540 }, { "epoch": 31.29, "learning_rate": 4.6088619402985076e-05, "loss": 0.0002, "step": 33544 }, { "epoch": 31.29, "learning_rate": 4.608815298507463e-05, "loss": 0.0035, "step": 33548 }, { "epoch": 31.3, "learning_rate": 4.608768656716418e-05, "loss": 0.0, "step": 33552 }, { "epoch": 31.3, "learning_rate": 4.6087220149253734e-05, "loss": 0.0004, "step": 33556 }, { "epoch": 31.31, "learning_rate": 4.608675373134328e-05, "loss": 0.0001, "step": 33560 }, { "epoch": 31.31, "learning_rate": 4.608628731343284e-05, "loss": 0.0001, "step": 33564 }, { "epoch": 31.31, "learning_rate": 4.608582089552239e-05, "loss": 0.0002, "step": 33568 }, { "epoch": 31.32, "learning_rate": 4.608535447761194e-05, "loss": 0.0005, "step": 33572 }, { "epoch": 31.32, "learning_rate": 4.6084888059701495e-05, "loss": 0.0007, "step": 33576 }, { "epoch": 31.32, "learning_rate": 4.608442164179105e-05, "loss": 0.0002, "step": 33580 }, { "epoch": 31.33, "learning_rate": 4.60839552238806e-05, "loss": 0.0003, "step": 33584 }, { "epoch": 31.33, "learning_rate": 4.608348880597015e-05, "loss": 0.0001, "step": 33588 }, { "epoch": 31.34, "learning_rate": 4.608302238805971e-05, "loss": 0.0, "step": 33592 }, { "epoch": 31.34, "learning_rate": 4.6082555970149256e-05, "loss": 0.0001, "step": 33596 }, { "epoch": 31.34, "learning_rate": 4.608208955223881e-05, "loss": 0.0, "step": 33600 }, { "epoch": 31.35, "learning_rate": 4.608162313432836e-05, "loss": 0.0013, "step": 33604 }, { "epoch": 31.35, "learning_rate": 4.6081156716417914e-05, "loss": 0.0, "step": 33608 }, { "epoch": 31.35, "learning_rate": 4.608069029850747e-05, "loss": 0.0002, "step": 33612 }, { "epoch": 31.36, "learning_rate": 4.608022388059702e-05, "loss": 0.002, "step": 33616 }, { "epoch": 31.36, "learning_rate": 4.6079757462686565e-05, "loss": 0.0, "step": 33620 }, { "epoch": 31.37, "learning_rate": 4.6079291044776127e-05, "loss": 0.0001, "step": 33624 }, { "epoch": 31.37, "learning_rate": 4.6078824626865675e-05, "loss": 0.0053, "step": 33628 }, { "epoch": 31.37, "learning_rate": 4.607835820895522e-05, "loss": 0.0003, "step": 33632 }, { "epoch": 31.38, "learning_rate": 4.607789179104478e-05, "loss": 0.0008, "step": 33636 }, { "epoch": 31.38, "learning_rate": 4.607742537313433e-05, "loss": 0.0, "step": 33640 }, { "epoch": 31.38, "learning_rate": 4.607695895522388e-05, "loss": 0.0028, "step": 33644 }, { "epoch": 31.39, "learning_rate": 4.6076492537313436e-05, "loss": 0.0001, "step": 33648 }, { "epoch": 31.39, "learning_rate": 4.607602611940299e-05, "loss": 0.001, "step": 33652 }, { "epoch": 31.4, "learning_rate": 4.607555970149254e-05, "loss": 0.0003, "step": 33656 }, { "epoch": 31.4, "learning_rate": 4.6075093283582094e-05, "loss": 0.0077, "step": 33660 }, { "epoch": 31.4, "learning_rate": 4.607462686567164e-05, "loss": 0.0003, "step": 33664 }, { "epoch": 31.41, "learning_rate": 4.60741604477612e-05, "loss": 0.0001, "step": 33668 }, { "epoch": 31.41, "learning_rate": 4.607369402985075e-05, "loss": 0.0001, "step": 33672 }, { "epoch": 31.41, "learning_rate": 4.60732276119403e-05, "loss": 0.0019, "step": 33676 }, { "epoch": 31.42, "learning_rate": 4.607276119402985e-05, "loss": 0.0003, "step": 33680 }, { "epoch": 31.42, "learning_rate": 4.607229477611941e-05, "loss": 0.0001, "step": 33684 }, { "epoch": 31.43, "learning_rate": 4.607182835820896e-05, "loss": 0.0036, "step": 33688 }, { "epoch": 31.43, "learning_rate": 4.6071361940298506e-05, "loss": 0.0, "step": 33692 }, { "epoch": 31.43, "learning_rate": 4.607089552238806e-05, "loss": 0.0002, "step": 33696 }, { "epoch": 31.44, "learning_rate": 4.6070429104477616e-05, "loss": 0.0004, "step": 33700 }, { "epoch": 31.44, "learning_rate": 4.6069962686567164e-05, "loss": 0.0, "step": 33704 }, { "epoch": 31.44, "learning_rate": 4.606949626865672e-05, "loss": 0.0001, "step": 33708 }, { "epoch": 31.45, "learning_rate": 4.6069029850746274e-05, "loss": 0.0001, "step": 33712 }, { "epoch": 31.45, "learning_rate": 4.606856343283582e-05, "loss": 0.0005, "step": 33716 }, { "epoch": 31.46, "learning_rate": 4.6068097014925377e-05, "loss": 0.0005, "step": 33720 }, { "epoch": 31.46, "learning_rate": 4.6067630597014925e-05, "loss": 0.001, "step": 33724 }, { "epoch": 31.46, "learning_rate": 4.606716417910448e-05, "loss": 0.0002, "step": 33728 }, { "epoch": 31.47, "learning_rate": 4.6066697761194034e-05, "loss": 0.0002, "step": 33732 }, { "epoch": 31.47, "learning_rate": 4.606623134328358e-05, "loss": 0.0006, "step": 33736 }, { "epoch": 31.47, "learning_rate": 4.606576492537313e-05, "loss": 0.0002, "step": 33740 }, { "epoch": 31.48, "learning_rate": 4.606529850746269e-05, "loss": 0.0, "step": 33744 }, { "epoch": 31.48, "learning_rate": 4.606483208955224e-05, "loss": 0.0001, "step": 33748 }, { "epoch": 31.49, "learning_rate": 4.6064365671641795e-05, "loss": 0.0, "step": 33752 }, { "epoch": 31.49, "learning_rate": 4.6063899253731344e-05, "loss": 0.0014, "step": 33756 }, { "epoch": 31.49, "learning_rate": 4.60634328358209e-05, "loss": 0.0001, "step": 33760 }, { "epoch": 31.5, "learning_rate": 4.6062966417910453e-05, "loss": 0.0001, "step": 33764 }, { "epoch": 31.5, "learning_rate": 4.60625e-05, "loss": 0.0038, "step": 33768 }, { "epoch": 31.5, "learning_rate": 4.6062033582089556e-05, "loss": 0.0001, "step": 33772 }, { "epoch": 31.51, "learning_rate": 4.606156716417911e-05, "loss": 0.0, "step": 33776 }, { "epoch": 31.51, "learning_rate": 4.606110074626866e-05, "loss": 0.0001, "step": 33780 }, { "epoch": 31.51, "learning_rate": 4.606063432835821e-05, "loss": 0.0028, "step": 33784 }, { "epoch": 31.52, "learning_rate": 4.606016791044776e-05, "loss": 0.0003, "step": 33788 }, { "epoch": 31.52, "learning_rate": 4.605970149253732e-05, "loss": 0.0, "step": 33792 }, { "epoch": 31.53, "learning_rate": 4.6059235074626866e-05, "loss": 0.0, "step": 33796 }, { "epoch": 31.53, "learning_rate": 4.605876865671642e-05, "loss": 0.0, "step": 33800 }, { "epoch": 31.53, "learning_rate": 4.6058302238805975e-05, "loss": 0.0001, "step": 33804 }, { "epoch": 31.54, "learning_rate": 4.6057835820895524e-05, "loss": 0.0001, "step": 33808 }, { "epoch": 31.54, "learning_rate": 4.605736940298508e-05, "loss": 0.0105, "step": 33812 }, { "epoch": 31.54, "learning_rate": 4.6056902985074627e-05, "loss": 0.0003, "step": 33816 }, { "epoch": 31.55, "learning_rate": 4.605643656716418e-05, "loss": 0.0014, "step": 33820 }, { "epoch": 31.55, "learning_rate": 4.6055970149253736e-05, "loss": 0.0003, "step": 33824 }, { "epoch": 31.56, "learning_rate": 4.6055503731343285e-05, "loss": 0.0152, "step": 33828 }, { "epoch": 31.56, "learning_rate": 4.605503731343284e-05, "loss": 0.0, "step": 33832 }, { "epoch": 31.56, "learning_rate": 4.6054570895522394e-05, "loss": 0.0018, "step": 33836 }, { "epoch": 31.57, "learning_rate": 4.605410447761194e-05, "loss": 0.0004, "step": 33840 }, { "epoch": 31.57, "learning_rate": 4.605363805970149e-05, "loss": 0.0031, "step": 33844 }, { "epoch": 31.57, "learning_rate": 4.6053171641791045e-05, "loss": 0.0, "step": 33848 }, { "epoch": 31.58, "learning_rate": 4.60527052238806e-05, "loss": 0.0001, "step": 33852 }, { "epoch": 31.58, "learning_rate": 4.605223880597015e-05, "loss": 0.0002, "step": 33856 }, { "epoch": 31.59, "learning_rate": 4.6051772388059703e-05, "loss": 0.0066, "step": 33860 }, { "epoch": 31.59, "learning_rate": 4.605130597014926e-05, "loss": 0.0006, "step": 33864 }, { "epoch": 31.59, "learning_rate": 4.6050839552238806e-05, "loss": 0.0001, "step": 33868 }, { "epoch": 31.6, "learning_rate": 4.605037313432836e-05, "loss": 0.0, "step": 33872 }, { "epoch": 31.6, "learning_rate": 4.604990671641791e-05, "loss": 0.0017, "step": 33876 }, { "epoch": 31.6, "learning_rate": 4.6049440298507464e-05, "loss": 0.0001, "step": 33880 }, { "epoch": 31.61, "learning_rate": 4.604897388059702e-05, "loss": 0.0003, "step": 33884 }, { "epoch": 31.61, "learning_rate": 4.604850746268657e-05, "loss": 0.0007, "step": 33888 }, { "epoch": 31.62, "learning_rate": 4.604804104477612e-05, "loss": 0.0003, "step": 33892 }, { "epoch": 31.62, "learning_rate": 4.604757462686568e-05, "loss": 0.001, "step": 33896 }, { "epoch": 31.62, "learning_rate": 4.6047108208955225e-05, "loss": 0.0001, "step": 33900 }, { "epoch": 31.63, "learning_rate": 4.6046641791044774e-05, "loss": 0.0033, "step": 33904 }, { "epoch": 31.63, "learning_rate": 4.604617537313433e-05, "loss": 0.0002, "step": 33908 }, { "epoch": 31.63, "learning_rate": 4.604570895522388e-05, "loss": 0.0012, "step": 33912 }, { "epoch": 31.64, "learning_rate": 4.604524253731344e-05, "loss": 0.0001, "step": 33916 }, { "epoch": 31.64, "learning_rate": 4.6044776119402986e-05, "loss": 0.0001, "step": 33920 }, { "epoch": 31.65, "learning_rate": 4.604430970149254e-05, "loss": 0.0001, "step": 33924 }, { "epoch": 31.65, "learning_rate": 4.6043843283582096e-05, "loss": 0.0002, "step": 33928 }, { "epoch": 31.65, "learning_rate": 4.6043376865671644e-05, "loss": 0.0003, "step": 33932 }, { "epoch": 31.66, "learning_rate": 4.604291044776119e-05, "loss": 0.0003, "step": 33936 }, { "epoch": 31.66, "learning_rate": 4.6042444029850754e-05, "loss": 0.0039, "step": 33940 }, { "epoch": 31.66, "learning_rate": 4.60419776119403e-05, "loss": 0.0004, "step": 33944 }, { "epoch": 31.67, "learning_rate": 4.604151119402985e-05, "loss": 0.0002, "step": 33948 }, { "epoch": 31.67, "learning_rate": 4.6041044776119405e-05, "loss": 0.0, "step": 33952 }, { "epoch": 31.68, "learning_rate": 4.604057835820896e-05, "loss": 0.0001, "step": 33956 }, { "epoch": 31.68, "learning_rate": 4.604011194029851e-05, "loss": 0.0003, "step": 33960 }, { "epoch": 31.68, "learning_rate": 4.603964552238806e-05, "loss": 0.0001, "step": 33964 }, { "epoch": 31.69, "learning_rate": 4.603917910447761e-05, "loss": 0.0018, "step": 33968 }, { "epoch": 31.69, "learning_rate": 4.6038712686567166e-05, "loss": 0.0001, "step": 33972 }, { "epoch": 31.69, "learning_rate": 4.603824626865672e-05, "loss": 0.0002, "step": 33976 }, { "epoch": 31.7, "learning_rate": 4.603777985074627e-05, "loss": 0.0014, "step": 33980 }, { "epoch": 31.7, "learning_rate": 4.6037313432835824e-05, "loss": 0.0003, "step": 33984 }, { "epoch": 31.71, "learning_rate": 4.603684701492538e-05, "loss": 0.0001, "step": 33988 }, { "epoch": 31.71, "learning_rate": 4.603638059701493e-05, "loss": 0.0005, "step": 33992 }, { "epoch": 31.71, "learning_rate": 4.6035914179104475e-05, "loss": 0.0028, "step": 33996 }, { "epoch": 31.72, "learning_rate": 4.603544776119404e-05, "loss": 0.0, "step": 34000 }, { "epoch": 31.72, "eval_exact_match": 0.7379110251450677, "eval_exec": 0.7572533849129593, "eval_loss": 0.42316770553588867, "eval_runtime": 1045.9822, "eval_samples_per_second": 0.989, "step": 34000 }, { "epoch": 31.72, "learning_rate": 4.6034981343283585e-05, "loss": 0.0001, "step": 34004 }, { "epoch": 31.72, "learning_rate": 4.603451492537313e-05, "loss": 0.0, "step": 34008 }, { "epoch": 31.73, "learning_rate": 4.603404850746269e-05, "loss": 0.0, "step": 34012 }, { "epoch": 31.73, "learning_rate": 4.603358208955224e-05, "loss": 0.0002, "step": 34016 }, { "epoch": 31.73, "learning_rate": 4.603311567164179e-05, "loss": 0.0004, "step": 34020 }, { "epoch": 31.74, "learning_rate": 4.6032649253731346e-05, "loss": 0.0029, "step": 34024 }, { "epoch": 31.74, "learning_rate": 4.6032182835820894e-05, "loss": 0.0001, "step": 34028 }, { "epoch": 31.75, "learning_rate": 4.603171641791045e-05, "loss": 0.0004, "step": 34032 }, { "epoch": 31.75, "learning_rate": 4.6031250000000004e-05, "loss": 0.0001, "step": 34036 }, { "epoch": 31.75, "learning_rate": 4.603078358208955e-05, "loss": 0.0, "step": 34040 }, { "epoch": 31.76, "learning_rate": 4.603031716417911e-05, "loss": 0.0021, "step": 34044 }, { "epoch": 31.76, "learning_rate": 4.602985074626866e-05, "loss": 0.0, "step": 34048 }, { "epoch": 31.76, "learning_rate": 4.602938432835821e-05, "loss": 0.001, "step": 34052 }, { "epoch": 31.77, "learning_rate": 4.602891791044776e-05, "loss": 0.0001, "step": 34056 }, { "epoch": 31.77, "learning_rate": 4.602845149253732e-05, "loss": 0.0001, "step": 34060 }, { "epoch": 31.78, "learning_rate": 4.602798507462687e-05, "loss": 0.0004, "step": 34064 }, { "epoch": 31.78, "learning_rate": 4.6027518656716416e-05, "loss": 0.0, "step": 34068 }, { "epoch": 31.78, "learning_rate": 4.602705223880597e-05, "loss": 0.0, "step": 34072 }, { "epoch": 31.79, "learning_rate": 4.6026585820895526e-05, "loss": 0.0002, "step": 34076 }, { "epoch": 31.79, "learning_rate": 4.602611940298508e-05, "loss": 0.0005, "step": 34080 }, { "epoch": 31.79, "learning_rate": 4.602565298507463e-05, "loss": 0.0015, "step": 34084 }, { "epoch": 31.8, "learning_rate": 4.602518656716418e-05, "loss": 0.0002, "step": 34088 }, { "epoch": 31.8, "learning_rate": 4.602472014925374e-05, "loss": 0.0008, "step": 34092 }, { "epoch": 31.81, "learning_rate": 4.602425373134329e-05, "loss": 0.0009, "step": 34096 }, { "epoch": 31.81, "learning_rate": 4.6023787313432835e-05, "loss": 0.0005, "step": 34100 }, { "epoch": 31.81, "learning_rate": 4.602332089552239e-05, "loss": 0.0059, "step": 34104 }, { "epoch": 31.82, "learning_rate": 4.6022854477611945e-05, "loss": 0.0012, "step": 34108 }, { "epoch": 31.82, "learning_rate": 4.602238805970149e-05, "loss": 0.0017, "step": 34112 }, { "epoch": 31.82, "learning_rate": 4.602192164179105e-05, "loss": 0.0003, "step": 34116 }, { "epoch": 31.83, "learning_rate": 4.6021455223880596e-05, "loss": 0.0021, "step": 34120 }, { "epoch": 31.83, "learning_rate": 4.602098880597015e-05, "loss": 0.0008, "step": 34124 }, { "epoch": 31.84, "learning_rate": 4.6020522388059706e-05, "loss": 0.0001, "step": 34128 }, { "epoch": 31.84, "learning_rate": 4.6020055970149254e-05, "loss": 0.0001, "step": 34132 }, { "epoch": 31.84, "learning_rate": 4.601958955223881e-05, "loss": 0.0, "step": 34136 }, { "epoch": 31.85, "learning_rate": 4.6019123134328364e-05, "loss": 0.0001, "step": 34140 }, { "epoch": 31.85, "learning_rate": 4.601865671641791e-05, "loss": 0.0032, "step": 34144 }, { "epoch": 31.85, "learning_rate": 4.601819029850746e-05, "loss": 0.0001, "step": 34148 }, { "epoch": 31.86, "learning_rate": 4.601772388059702e-05, "loss": 0.0007, "step": 34152 }, { "epoch": 31.86, "learning_rate": 4.601725746268657e-05, "loss": 0.0001, "step": 34156 }, { "epoch": 31.87, "learning_rate": 4.601679104477612e-05, "loss": 0.003, "step": 34160 }, { "epoch": 31.87, "learning_rate": 4.601632462686567e-05, "loss": 0.0003, "step": 34164 }, { "epoch": 31.87, "learning_rate": 4.601585820895523e-05, "loss": 0.0003, "step": 34168 }, { "epoch": 31.88, "learning_rate": 4.6015391791044776e-05, "loss": 0.0027, "step": 34172 }, { "epoch": 31.88, "learning_rate": 4.601492537313433e-05, "loss": 0.0, "step": 34176 }, { "epoch": 31.88, "learning_rate": 4.601445895522388e-05, "loss": 0.0005, "step": 34180 }, { "epoch": 31.89, "learning_rate": 4.6013992537313434e-05, "loss": 0.0002, "step": 34184 }, { "epoch": 31.89, "learning_rate": 4.601352611940299e-05, "loss": 0.0001, "step": 34188 }, { "epoch": 31.9, "learning_rate": 4.601305970149254e-05, "loss": 0.0, "step": 34192 }, { "epoch": 31.9, "learning_rate": 4.601259328358209e-05, "loss": 0.0004, "step": 34196 }, { "epoch": 31.9, "learning_rate": 4.601212686567165e-05, "loss": 0.0, "step": 34200 }, { "epoch": 31.91, "learning_rate": 4.6011660447761195e-05, "loss": 0.0008, "step": 34204 }, { "epoch": 31.91, "learning_rate": 4.601119402985074e-05, "loss": 0.0017, "step": 34208 }, { "epoch": 31.91, "learning_rate": 4.6010727611940305e-05, "loss": 0.0002, "step": 34212 }, { "epoch": 31.92, "learning_rate": 4.601026119402985e-05, "loss": 0.0001, "step": 34216 }, { "epoch": 31.92, "learning_rate": 4.60097947761194e-05, "loss": 0.0001, "step": 34220 }, { "epoch": 31.93, "learning_rate": 4.6009328358208956e-05, "loss": 0.0009, "step": 34224 }, { "epoch": 31.93, "learning_rate": 4.600886194029851e-05, "loss": 0.001, "step": 34228 }, { "epoch": 31.93, "learning_rate": 4.600839552238806e-05, "loss": 0.0001, "step": 34232 }, { "epoch": 31.94, "learning_rate": 4.6007929104477614e-05, "loss": 0.001, "step": 34236 }, { "epoch": 31.94, "learning_rate": 4.600746268656716e-05, "loss": 0.0001, "step": 34240 }, { "epoch": 31.94, "learning_rate": 4.6006996268656724e-05, "loss": 0.0, "step": 34244 }, { "epoch": 31.95, "learning_rate": 4.600652985074627e-05, "loss": 0.0044, "step": 34248 }, { "epoch": 31.95, "learning_rate": 4.600606343283582e-05, "loss": 0.0002, "step": 34252 }, { "epoch": 31.96, "learning_rate": 4.6005597014925375e-05, "loss": 0.0004, "step": 34256 }, { "epoch": 31.96, "learning_rate": 4.600513059701493e-05, "loss": 0.0011, "step": 34260 }, { "epoch": 31.96, "learning_rate": 4.600466417910448e-05, "loss": 0.0005, "step": 34264 }, { "epoch": 31.97, "learning_rate": 4.600419776119403e-05, "loss": 0.0033, "step": 34268 }, { "epoch": 31.97, "learning_rate": 4.600373134328359e-05, "loss": 0.0001, "step": 34272 }, { "epoch": 31.97, "learning_rate": 4.6003264925373136e-05, "loss": 0.0001, "step": 34276 }, { "epoch": 31.98, "learning_rate": 4.600279850746269e-05, "loss": 0.0001, "step": 34280 }, { "epoch": 31.98, "learning_rate": 4.600233208955224e-05, "loss": 0.0, "step": 34284 }, { "epoch": 31.98, "learning_rate": 4.6001865671641794e-05, "loss": 0.0001, "step": 34288 }, { "epoch": 31.99, "learning_rate": 4.600139925373135e-05, "loss": 0.0015, "step": 34292 }, { "epoch": 31.99, "learning_rate": 4.60009328358209e-05, "loss": 0.0002, "step": 34296 }, { "epoch": 32.0, "learning_rate": 4.6000466417910445e-05, "loss": 0.0002, "step": 34300 }, { "epoch": 32.0, "learning_rate": 4.600000000000001e-05, "loss": 0.001, "step": 34304 }, { "epoch": 32.0, "learning_rate": 4.5999533582089555e-05, "loss": 0.0001, "step": 34308 }, { "epoch": 32.01, "learning_rate": 4.59990671641791e-05, "loss": 0.0013, "step": 34312 }, { "epoch": 32.01, "learning_rate": 4.599860074626866e-05, "loss": 0.0017, "step": 34316 }, { "epoch": 32.01, "learning_rate": 4.599813432835821e-05, "loss": 0.0014, "step": 34320 }, { "epoch": 32.02, "learning_rate": 4.599766791044776e-05, "loss": 0.0001, "step": 34324 }, { "epoch": 32.02, "learning_rate": 4.5997201492537316e-05, "loss": 0.0007, "step": 34328 }, { "epoch": 32.03, "learning_rate": 4.599673507462687e-05, "loss": 0.0001, "step": 34332 }, { "epoch": 32.03, "learning_rate": 4.599626865671642e-05, "loss": 0.0001, "step": 34336 }, { "epoch": 32.03, "learning_rate": 4.5995802238805974e-05, "loss": 0.0001, "step": 34340 }, { "epoch": 32.04, "learning_rate": 4.599533582089552e-05, "loss": 0.0001, "step": 34344 }, { "epoch": 32.04, "learning_rate": 4.599486940298508e-05, "loss": 0.0, "step": 34348 }, { "epoch": 32.04, "learning_rate": 4.599440298507463e-05, "loss": 0.0, "step": 34352 }, { "epoch": 32.05, "learning_rate": 4.599393656716418e-05, "loss": 0.0, "step": 34356 }, { "epoch": 32.05, "learning_rate": 4.599347014925373e-05, "loss": 0.0003, "step": 34360 }, { "epoch": 32.06, "learning_rate": 4.599300373134329e-05, "loss": 0.0, "step": 34364 }, { "epoch": 32.06, "learning_rate": 4.599253731343284e-05, "loss": 0.0004, "step": 34368 }, { "epoch": 32.06, "learning_rate": 4.5992070895522386e-05, "loss": 0.0, "step": 34372 }, { "epoch": 32.07, "learning_rate": 4.599160447761194e-05, "loss": 0.0001, "step": 34376 }, { "epoch": 32.07, "learning_rate": 4.5991138059701496e-05, "loss": 0.0001, "step": 34380 }, { "epoch": 32.07, "learning_rate": 4.5990671641791044e-05, "loss": 0.0128, "step": 34384 }, { "epoch": 32.08, "learning_rate": 4.59902052238806e-05, "loss": 0.0002, "step": 34388 }, { "epoch": 32.08, "learning_rate": 4.5989738805970154e-05, "loss": 0.0038, "step": 34392 }, { "epoch": 32.09, "learning_rate": 4.59892723880597e-05, "loss": 0.0015, "step": 34396 }, { "epoch": 32.09, "learning_rate": 4.598880597014926e-05, "loss": 0.0, "step": 34400 }, { "epoch": 32.09, "learning_rate": 4.5988339552238805e-05, "loss": 0.0067, "step": 34404 }, { "epoch": 32.1, "learning_rate": 4.598787313432836e-05, "loss": 0.0001, "step": 34408 }, { "epoch": 32.1, "learning_rate": 4.5987406716417915e-05, "loss": 0.0, "step": 34412 }, { "epoch": 32.1, "learning_rate": 4.598694029850746e-05, "loss": 0.0002, "step": 34416 }, { "epoch": 32.11, "learning_rate": 4.598647388059702e-05, "loss": 0.0, "step": 34420 }, { "epoch": 32.11, "learning_rate": 4.598600746268657e-05, "loss": 0.0002, "step": 34424 }, { "epoch": 32.12, "learning_rate": 4.598554104477612e-05, "loss": 0.0001, "step": 34428 }, { "epoch": 32.12, "learning_rate": 4.5985074626865676e-05, "loss": 0.0, "step": 34432 }, { "epoch": 32.12, "learning_rate": 4.5984608208955224e-05, "loss": 0.0, "step": 34436 }, { "epoch": 32.13, "learning_rate": 4.598414179104478e-05, "loss": 0.0003, "step": 34440 }, { "epoch": 32.13, "learning_rate": 4.5983675373134334e-05, "loss": 0.0, "step": 34444 }, { "epoch": 32.13, "learning_rate": 4.598320895522388e-05, "loss": 0.0, "step": 34448 }, { "epoch": 32.14, "learning_rate": 4.598274253731344e-05, "loss": 0.0001, "step": 34452 }, { "epoch": 32.14, "learning_rate": 4.598227611940299e-05, "loss": 0.0005, "step": 34456 }, { "epoch": 32.15, "learning_rate": 4.598180970149254e-05, "loss": 0.0066, "step": 34460 }, { "epoch": 32.15, "learning_rate": 4.598134328358209e-05, "loss": 0.0007, "step": 34464 }, { "epoch": 32.15, "learning_rate": 4.598087686567164e-05, "loss": 0.0022, "step": 34468 }, { "epoch": 32.16, "learning_rate": 4.59804104477612e-05, "loss": 0.0, "step": 34472 }, { "epoch": 32.16, "learning_rate": 4.5979944029850746e-05, "loss": 0.001, "step": 34476 }, { "epoch": 32.16, "learning_rate": 4.59794776119403e-05, "loss": 0.0, "step": 34480 }, { "epoch": 32.17, "learning_rate": 4.5979011194029856e-05, "loss": 0.0001, "step": 34484 }, { "epoch": 32.17, "learning_rate": 4.5978544776119404e-05, "loss": 0.0052, "step": 34488 }, { "epoch": 32.18, "learning_rate": 4.597807835820896e-05, "loss": 0.0, "step": 34492 }, { "epoch": 32.18, "learning_rate": 4.597761194029851e-05, "loss": 0.0, "step": 34496 }, { "epoch": 32.18, "learning_rate": 4.597714552238806e-05, "loss": 0.0005, "step": 34500 }, { "epoch": 32.18, "eval_exact_match": 0.7282398452611218, "eval_exec": 0.7495164410058027, "eval_loss": 0.42082998156547546, "eval_runtime": 1063.8322, "eval_samples_per_second": 0.972, "step": 34500 }, { "epoch": 32.19, "learning_rate": 4.597667910447762e-05, "loss": 0.0003, "step": 34504 }, { "epoch": 32.19, "learning_rate": 4.5976212686567165e-05, "loss": 0.0007, "step": 34508 }, { "epoch": 32.19, "learning_rate": 4.597574626865672e-05, "loss": 0.0003, "step": 34512 }, { "epoch": 32.2, "learning_rate": 4.5975279850746275e-05, "loss": 0.0014, "step": 34516 }, { "epoch": 32.2, "learning_rate": 4.597481343283582e-05, "loss": 0.0015, "step": 34520 }, { "epoch": 32.21, "learning_rate": 4.597434701492537e-05, "loss": 0.0, "step": 34524 }, { "epoch": 32.21, "learning_rate": 4.5973880597014926e-05, "loss": 0.0003, "step": 34528 }, { "epoch": 32.21, "learning_rate": 4.597341417910448e-05, "loss": 0.0005, "step": 34532 }, { "epoch": 32.22, "learning_rate": 4.597294776119403e-05, "loss": 0.0, "step": 34536 }, { "epoch": 32.22, "learning_rate": 4.5972481343283584e-05, "loss": 0.0002, "step": 34540 }, { "epoch": 32.22, "learning_rate": 4.597201492537314e-05, "loss": 0.0, "step": 34544 }, { "epoch": 32.23, "learning_rate": 4.597154850746269e-05, "loss": 0.0001, "step": 34548 }, { "epoch": 32.23, "learning_rate": 4.597108208955224e-05, "loss": 0.0026, "step": 34552 }, { "epoch": 32.24, "learning_rate": 4.597061567164179e-05, "loss": 0.0001, "step": 34556 }, { "epoch": 32.24, "learning_rate": 4.5970149253731345e-05, "loss": 0.0009, "step": 34560 }, { "epoch": 32.24, "learning_rate": 4.59696828358209e-05, "loss": 0.0001, "step": 34564 }, { "epoch": 32.25, "learning_rate": 4.596921641791045e-05, "loss": 0.008, "step": 34568 }, { "epoch": 32.25, "learning_rate": 4.596875e-05, "loss": 0.0003, "step": 34572 }, { "epoch": 32.25, "learning_rate": 4.596828358208956e-05, "loss": 0.0008, "step": 34576 }, { "epoch": 32.26, "learning_rate": 4.5967817164179106e-05, "loss": 0.0063, "step": 34580 }, { "epoch": 32.26, "learning_rate": 4.596735074626866e-05, "loss": 0.0002, "step": 34584 }, { "epoch": 32.26, "learning_rate": 4.596688432835821e-05, "loss": 0.0002, "step": 34588 }, { "epoch": 32.27, "learning_rate": 4.5966417910447764e-05, "loss": 0.0001, "step": 34592 }, { "epoch": 32.27, "learning_rate": 4.596595149253732e-05, "loss": 0.0008, "step": 34596 }, { "epoch": 32.28, "learning_rate": 4.596548507462687e-05, "loss": 0.0, "step": 34600 }, { "epoch": 32.28, "learning_rate": 4.596501865671642e-05, "loss": 0.0031, "step": 34604 }, { "epoch": 32.28, "learning_rate": 4.5964552238805977e-05, "loss": 0.0001, "step": 34608 }, { "epoch": 32.29, "learning_rate": 4.5964085820895525e-05, "loss": 0.0, "step": 34612 }, { "epoch": 32.29, "learning_rate": 4.596361940298507e-05, "loss": 0.0001, "step": 34616 }, { "epoch": 32.29, "learning_rate": 4.5963152985074634e-05, "loss": 0.0011, "step": 34620 }, { "epoch": 32.3, "learning_rate": 4.596268656716418e-05, "loss": 0.0011, "step": 34624 }, { "epoch": 32.3, "learning_rate": 4.596222014925373e-05, "loss": 0.0004, "step": 34628 }, { "epoch": 32.31, "learning_rate": 4.5961753731343286e-05, "loss": 0.0001, "step": 34632 }, { "epoch": 32.31, "learning_rate": 4.596128731343284e-05, "loss": 0.0006, "step": 34636 }, { "epoch": 32.31, "learning_rate": 4.596082089552239e-05, "loss": 0.0007, "step": 34640 }, { "epoch": 32.32, "learning_rate": 4.5960354477611944e-05, "loss": 0.0005, "step": 34644 }, { "epoch": 32.32, "learning_rate": 4.595988805970149e-05, "loss": 0.0015, "step": 34648 }, { "epoch": 32.32, "learning_rate": 4.5959421641791047e-05, "loss": 0.0017, "step": 34652 }, { "epoch": 32.33, "learning_rate": 4.59589552238806e-05, "loss": 0.0, "step": 34656 }, { "epoch": 32.33, "learning_rate": 4.595848880597015e-05, "loss": 0.0, "step": 34660 }, { "epoch": 32.34, "learning_rate": 4.5958022388059705e-05, "loss": 0.0, "step": 34664 }, { "epoch": 32.34, "learning_rate": 4.595755597014926e-05, "loss": 0.0001, "step": 34668 }, { "epoch": 32.34, "learning_rate": 4.595708955223881e-05, "loss": 0.0005, "step": 34672 }, { "epoch": 32.35, "learning_rate": 4.5956623134328356e-05, "loss": 0.0001, "step": 34676 }, { "epoch": 32.35, "learning_rate": 4.595615671641792e-05, "loss": 0.0001, "step": 34680 }, { "epoch": 32.35, "learning_rate": 4.5955690298507466e-05, "loss": 0.0005, "step": 34684 }, { "epoch": 32.36, "learning_rate": 4.5955223880597014e-05, "loss": 0.0002, "step": 34688 }, { "epoch": 32.36, "learning_rate": 4.595475746268657e-05, "loss": 0.0002, "step": 34692 }, { "epoch": 32.37, "learning_rate": 4.5954291044776123e-05, "loss": 0.0003, "step": 34696 }, { "epoch": 32.37, "learning_rate": 4.595382462686567e-05, "loss": 0.0009, "step": 34700 }, { "epoch": 32.37, "learning_rate": 4.5953358208955227e-05, "loss": 0.0001, "step": 34704 }, { "epoch": 32.38, "learning_rate": 4.5952891791044775e-05, "loss": 0.0001, "step": 34708 }, { "epoch": 32.38, "learning_rate": 4.595242537313433e-05, "loss": 0.0015, "step": 34712 }, { "epoch": 32.38, "learning_rate": 4.5951958955223884e-05, "loss": 0.0001, "step": 34716 }, { "epoch": 32.39, "learning_rate": 4.595149253731343e-05, "loss": 0.0001, "step": 34720 }, { "epoch": 32.39, "learning_rate": 4.595102611940299e-05, "loss": 0.0005, "step": 34724 }, { "epoch": 32.4, "learning_rate": 4.595055970149254e-05, "loss": 0.0, "step": 34728 }, { "epoch": 32.4, "learning_rate": 4.595009328358209e-05, "loss": 0.0002, "step": 34732 }, { "epoch": 32.4, "learning_rate": 4.5949626865671645e-05, "loss": 0.0, "step": 34736 }, { "epoch": 32.41, "learning_rate": 4.59491604477612e-05, "loss": 0.0, "step": 34740 }, { "epoch": 32.41, "learning_rate": 4.594869402985075e-05, "loss": 0.0, "step": 34744 }, { "epoch": 32.41, "learning_rate": 4.5948227611940303e-05, "loss": 0.0006, "step": 34748 }, { "epoch": 32.42, "learning_rate": 4.594776119402985e-05, "loss": 0.0009, "step": 34752 }, { "epoch": 32.42, "learning_rate": 4.5947294776119406e-05, "loss": 0.0001, "step": 34756 }, { "epoch": 32.43, "learning_rate": 4.594682835820896e-05, "loss": 0.0004, "step": 34760 }, { "epoch": 32.43, "learning_rate": 4.594636194029851e-05, "loss": 0.002, "step": 34764 }, { "epoch": 32.43, "learning_rate": 4.594589552238806e-05, "loss": 0.0014, "step": 34768 }, { "epoch": 32.44, "learning_rate": 4.594542910447762e-05, "loss": 0.0021, "step": 34772 }, { "epoch": 32.44, "learning_rate": 4.594496268656717e-05, "loss": 0.0005, "step": 34776 }, { "epoch": 32.44, "learning_rate": 4.5944496268656716e-05, "loss": 0.0, "step": 34780 }, { "epoch": 32.45, "learning_rate": 4.594402985074627e-05, "loss": 0.0006, "step": 34784 }, { "epoch": 32.45, "learning_rate": 4.5943563432835825e-05, "loss": 0.0, "step": 34788 }, { "epoch": 32.46, "learning_rate": 4.5943097014925374e-05, "loss": 0.0002, "step": 34792 }, { "epoch": 32.46, "learning_rate": 4.594263059701493e-05, "loss": 0.0047, "step": 34796 }, { "epoch": 32.46, "learning_rate": 4.5942164179104477e-05, "loss": 0.0005, "step": 34800 }, { "epoch": 32.47, "learning_rate": 4.594169776119403e-05, "loss": 0.0003, "step": 34804 }, { "epoch": 32.47, "learning_rate": 4.5941231343283586e-05, "loss": 0.0183, "step": 34808 }, { "epoch": 32.47, "learning_rate": 4.5940764925373134e-05, "loss": 0.0, "step": 34812 }, { "epoch": 32.48, "learning_rate": 4.594029850746269e-05, "loss": 0.0005, "step": 34816 }, { "epoch": 32.48, "learning_rate": 4.5939832089552244e-05, "loss": 0.0013, "step": 34820 }, { "epoch": 32.49, "learning_rate": 4.593936567164179e-05, "loss": 0.0042, "step": 34824 }, { "epoch": 32.49, "learning_rate": 4.593889925373134e-05, "loss": 0.0003, "step": 34828 }, { "epoch": 32.49, "learning_rate": 4.59384328358209e-05, "loss": 0.0, "step": 34832 }, { "epoch": 32.5, "learning_rate": 4.593796641791045e-05, "loss": 0.0001, "step": 34836 }, { "epoch": 32.5, "learning_rate": 4.59375e-05, "loss": 0.0001, "step": 34840 }, { "epoch": 32.5, "learning_rate": 4.5937033582089553e-05, "loss": 0.0001, "step": 34844 }, { "epoch": 32.51, "learning_rate": 4.593656716417911e-05, "loss": 0.0002, "step": 34848 }, { "epoch": 32.51, "learning_rate": 4.5936100746268656e-05, "loss": 0.0001, "step": 34852 }, { "epoch": 32.51, "learning_rate": 4.593563432835821e-05, "loss": 0.0, "step": 34856 }, { "epoch": 32.52, "learning_rate": 4.593516791044776e-05, "loss": 0.0035, "step": 34860 }, { "epoch": 32.52, "learning_rate": 4.5934701492537314e-05, "loss": 0.0043, "step": 34864 }, { "epoch": 32.53, "learning_rate": 4.593423507462687e-05, "loss": 0.0, "step": 34868 }, { "epoch": 32.53, "learning_rate": 4.593376865671642e-05, "loss": 0.0001, "step": 34872 }, { "epoch": 32.53, "learning_rate": 4.593330223880597e-05, "loss": 0.0007, "step": 34876 }, { "epoch": 32.54, "learning_rate": 4.593283582089553e-05, "loss": 0.0002, "step": 34880 }, { "epoch": 32.54, "learning_rate": 4.5932369402985075e-05, "loss": 0.0004, "step": 34884 }, { "epoch": 32.54, "learning_rate": 4.5931902985074624e-05, "loss": 0.0, "step": 34888 }, { "epoch": 32.55, "learning_rate": 4.5931436567164185e-05, "loss": 0.0021, "step": 34892 }, { "epoch": 32.55, "learning_rate": 4.593097014925373e-05, "loss": 0.0, "step": 34896 }, { "epoch": 32.56, "learning_rate": 4.593050373134329e-05, "loss": 0.0002, "step": 34900 }, { "epoch": 32.56, "learning_rate": 4.5930037313432836e-05, "loss": 0.0002, "step": 34904 }, { "epoch": 32.56, "learning_rate": 4.592957089552239e-05, "loss": 0.0001, "step": 34908 }, { "epoch": 32.57, "learning_rate": 4.5929104477611946e-05, "loss": 0.0036, "step": 34912 }, { "epoch": 32.57, "learning_rate": 4.5928638059701494e-05, "loss": 0.0019, "step": 34916 }, { "epoch": 32.57, "learning_rate": 4.592817164179104e-05, "loss": 0.0009, "step": 34920 }, { "epoch": 32.58, "learning_rate": 4.5927705223880604e-05, "loss": 0.0015, "step": 34924 }, { "epoch": 32.58, "learning_rate": 4.592723880597015e-05, "loss": 0.0002, "step": 34928 }, { "epoch": 32.59, "learning_rate": 4.59267723880597e-05, "loss": 0.0, "step": 34932 }, { "epoch": 32.59, "learning_rate": 4.5926305970149255e-05, "loss": 0.0001, "step": 34936 }, { "epoch": 32.59, "learning_rate": 4.592583955223881e-05, "loss": 0.0014, "step": 34940 }, { "epoch": 32.6, "learning_rate": 4.592537313432836e-05, "loss": 0.0001, "step": 34944 }, { "epoch": 32.6, "learning_rate": 4.592490671641791e-05, "loss": 0.0001, "step": 34948 }, { "epoch": 32.6, "learning_rate": 4.592444029850747e-05, "loss": 0.0002, "step": 34952 }, { "epoch": 32.61, "learning_rate": 4.5923973880597016e-05, "loss": 0.0001, "step": 34956 }, { "epoch": 32.61, "learning_rate": 4.592350746268657e-05, "loss": 0.001, "step": 34960 }, { "epoch": 32.62, "learning_rate": 4.592304104477612e-05, "loss": 0.0002, "step": 34964 }, { "epoch": 32.62, "learning_rate": 4.5922574626865674e-05, "loss": 0.0014, "step": 34968 }, { "epoch": 32.62, "learning_rate": 4.592210820895523e-05, "loss": 0.0, "step": 34972 }, { "epoch": 32.63, "learning_rate": 4.592164179104478e-05, "loss": 0.0013, "step": 34976 }, { "epoch": 32.63, "learning_rate": 4.5921175373134325e-05, "loss": 0.0001, "step": 34980 }, { "epoch": 32.63, "learning_rate": 4.592070895522389e-05, "loss": 0.0029, "step": 34984 }, { "epoch": 32.64, "learning_rate": 4.5920242537313435e-05, "loss": 0.0, "step": 34988 }, { "epoch": 32.64, "learning_rate": 4.591977611940298e-05, "loss": 0.0009, "step": 34992 }, { "epoch": 32.65, "learning_rate": 4.591930970149254e-05, "loss": 0.0, "step": 34996 }, { "epoch": 32.65, "learning_rate": 4.591884328358209e-05, "loss": 0.0, "step": 35000 }, { "epoch": 32.65, "eval_exact_match": 0.7350096711798839, "eval_exec": 0.7543520309477756, "eval_loss": 0.41416093707084656, "eval_runtime": 1061.3828, "eval_samples_per_second": 0.974, "step": 35000 }, { "epoch": 32.65, "learning_rate": 4.591837686567164e-05, "loss": 0.0063, "step": 35004 }, { "epoch": 32.66, "learning_rate": 4.5917910447761196e-05, "loss": 0.0001, "step": 35008 }, { "epoch": 32.66, "learning_rate": 4.591744402985075e-05, "loss": 0.0001, "step": 35012 }, { "epoch": 32.66, "learning_rate": 4.59169776119403e-05, "loss": 0.0014, "step": 35016 }, { "epoch": 32.67, "learning_rate": 4.5916511194029854e-05, "loss": 0.0, "step": 35020 }, { "epoch": 32.67, "learning_rate": 4.59160447761194e-05, "loss": 0.0001, "step": 35024 }, { "epoch": 32.68, "learning_rate": 4.591557835820896e-05, "loss": 0.0005, "step": 35028 }, { "epoch": 32.68, "learning_rate": 4.591511194029851e-05, "loss": 0.0, "step": 35032 }, { "epoch": 32.68, "learning_rate": 4.591464552238806e-05, "loss": 0.0, "step": 35036 }, { "epoch": 32.69, "learning_rate": 4.591417910447761e-05, "loss": 0.0001, "step": 35040 }, { "epoch": 32.69, "learning_rate": 4.591371268656717e-05, "loss": 0.0002, "step": 35044 }, { "epoch": 32.69, "learning_rate": 4.591324626865672e-05, "loss": 0.0001, "step": 35048 }, { "epoch": 32.7, "learning_rate": 4.5912779850746266e-05, "loss": 0.0016, "step": 35052 }, { "epoch": 32.7, "learning_rate": 4.591231343283582e-05, "loss": 0.0025, "step": 35056 }, { "epoch": 32.71, "learning_rate": 4.5911847014925376e-05, "loss": 0.0, "step": 35060 }, { "epoch": 32.71, "learning_rate": 4.591138059701493e-05, "loss": 0.0001, "step": 35064 }, { "epoch": 32.71, "learning_rate": 4.591091417910448e-05, "loss": 0.0001, "step": 35068 }, { "epoch": 32.72, "learning_rate": 4.5910447761194034e-05, "loss": 0.0002, "step": 35072 }, { "epoch": 32.72, "learning_rate": 4.590998134328359e-05, "loss": 0.0001, "step": 35076 }, { "epoch": 32.72, "learning_rate": 4.590951492537314e-05, "loss": 0.0014, "step": 35080 }, { "epoch": 32.73, "learning_rate": 4.5909048507462685e-05, "loss": 0.0007, "step": 35084 }, { "epoch": 32.73, "learning_rate": 4.590858208955224e-05, "loss": 0.0001, "step": 35088 }, { "epoch": 32.73, "learning_rate": 4.5908115671641795e-05, "loss": 0.0001, "step": 35092 }, { "epoch": 32.74, "learning_rate": 4.590764925373134e-05, "loss": 0.0001, "step": 35096 }, { "epoch": 32.74, "learning_rate": 4.59071828358209e-05, "loss": 0.0, "step": 35100 }, { "epoch": 32.75, "learning_rate": 4.590671641791045e-05, "loss": 0.0, "step": 35104 }, { "epoch": 32.75, "learning_rate": 4.590625e-05, "loss": 0.0, "step": 35108 }, { "epoch": 32.75, "learning_rate": 4.5905783582089556e-05, "loss": 0.0004, "step": 35112 }, { "epoch": 32.76, "learning_rate": 4.5905317164179104e-05, "loss": 0.0, "step": 35116 }, { "epoch": 32.76, "learning_rate": 4.590485074626866e-05, "loss": 0.0, "step": 35120 }, { "epoch": 32.76, "learning_rate": 4.5904384328358214e-05, "loss": 0.0003, "step": 35124 }, { "epoch": 32.77, "learning_rate": 4.590391791044776e-05, "loss": 0.0003, "step": 35128 }, { "epoch": 32.77, "learning_rate": 4.590345149253732e-05, "loss": 0.0002, "step": 35132 }, { "epoch": 32.78, "learning_rate": 4.590298507462687e-05, "loss": 0.0001, "step": 35136 }, { "epoch": 32.78, "learning_rate": 4.590251865671642e-05, "loss": 0.0014, "step": 35140 }, { "epoch": 32.78, "learning_rate": 4.590205223880597e-05, "loss": 0.0, "step": 35144 }, { "epoch": 32.79, "learning_rate": 4.590158582089552e-05, "loss": 0.0, "step": 35148 }, { "epoch": 32.79, "learning_rate": 4.590111940298508e-05, "loss": 0.0005, "step": 35152 }, { "epoch": 32.79, "learning_rate": 4.5900652985074626e-05, "loss": 0.0022, "step": 35156 }, { "epoch": 32.8, "learning_rate": 4.590018656716418e-05, "loss": 0.0002, "step": 35160 }, { "epoch": 32.8, "learning_rate": 4.5899720149253736e-05, "loss": 0.0002, "step": 35164 }, { "epoch": 32.81, "learning_rate": 4.5899253731343284e-05, "loss": 0.0079, "step": 35168 }, { "epoch": 32.81, "learning_rate": 4.589878731343284e-05, "loss": 0.0002, "step": 35172 }, { "epoch": 32.81, "learning_rate": 4.589832089552239e-05, "loss": 0.0001, "step": 35176 }, { "epoch": 32.82, "learning_rate": 4.589785447761194e-05, "loss": 0.0002, "step": 35180 }, { "epoch": 32.82, "learning_rate": 4.58973880597015e-05, "loss": 0.0001, "step": 35184 }, { "epoch": 32.82, "learning_rate": 4.5896921641791045e-05, "loss": 0.0001, "step": 35188 }, { "epoch": 32.83, "learning_rate": 4.58964552238806e-05, "loss": 0.0, "step": 35192 }, { "epoch": 32.83, "learning_rate": 4.5895988805970155e-05, "loss": 0.0001, "step": 35196 }, { "epoch": 32.84, "learning_rate": 4.58955223880597e-05, "loss": 0.0, "step": 35200 }, { "epoch": 32.84, "learning_rate": 4.589505597014925e-05, "loss": 0.0008, "step": 35204 }, { "epoch": 32.84, "learning_rate": 4.5894589552238806e-05, "loss": 0.0003, "step": 35208 }, { "epoch": 32.85, "learning_rate": 4.589412313432836e-05, "loss": 0.0003, "step": 35212 }, { "epoch": 32.85, "learning_rate": 4.5893656716417916e-05, "loss": 0.0001, "step": 35216 }, { "epoch": 32.85, "learning_rate": 4.5893190298507464e-05, "loss": 0.0, "step": 35220 }, { "epoch": 32.86, "learning_rate": 4.589272388059702e-05, "loss": 0.0, "step": 35224 }, { "epoch": 32.86, "learning_rate": 4.5892257462686574e-05, "loss": 0.0002, "step": 35228 }, { "epoch": 32.87, "learning_rate": 4.589179104477612e-05, "loss": 0.0002, "step": 35232 }, { "epoch": 32.87, "learning_rate": 4.589132462686567e-05, "loss": 0.0, "step": 35236 }, { "epoch": 32.87, "learning_rate": 4.589085820895523e-05, "loss": 0.0014, "step": 35240 }, { "epoch": 32.88, "learning_rate": 4.589039179104478e-05, "loss": 0.0002, "step": 35244 }, { "epoch": 32.88, "learning_rate": 4.588992537313433e-05, "loss": 0.0006, "step": 35248 }, { "epoch": 32.88, "learning_rate": 4.588945895522388e-05, "loss": 0.0001, "step": 35252 }, { "epoch": 32.89, "learning_rate": 4.588899253731344e-05, "loss": 0.0019, "step": 35256 }, { "epoch": 32.89, "learning_rate": 4.5888526119402986e-05, "loss": 0.0007, "step": 35260 }, { "epoch": 32.9, "learning_rate": 4.588805970149254e-05, "loss": 0.0, "step": 35264 }, { "epoch": 32.9, "learning_rate": 4.588759328358209e-05, "loss": 0.0004, "step": 35268 }, { "epoch": 32.9, "learning_rate": 4.5887126865671644e-05, "loss": 0.0, "step": 35272 }, { "epoch": 32.91, "learning_rate": 4.58866604477612e-05, "loss": 0.0009, "step": 35276 }, { "epoch": 32.91, "learning_rate": 4.588619402985075e-05, "loss": 0.0001, "step": 35280 }, { "epoch": 32.91, "learning_rate": 4.58857276119403e-05, "loss": 0.0014, "step": 35284 }, { "epoch": 32.92, "learning_rate": 4.588526119402986e-05, "loss": 0.0025, "step": 35288 }, { "epoch": 32.92, "learning_rate": 4.5884794776119405e-05, "loss": 0.0001, "step": 35292 }, { "epoch": 32.93, "learning_rate": 4.588432835820895e-05, "loss": 0.0064, "step": 35296 }, { "epoch": 32.93, "learning_rate": 4.5883861940298515e-05, "loss": 0.0001, "step": 35300 }, { "epoch": 32.93, "learning_rate": 4.588339552238806e-05, "loss": 0.0006, "step": 35304 }, { "epoch": 32.94, "learning_rate": 4.588292910447761e-05, "loss": 0.0004, "step": 35308 }, { "epoch": 32.94, "learning_rate": 4.5882462686567166e-05, "loss": 0.0076, "step": 35312 }, { "epoch": 32.94, "learning_rate": 4.588199626865672e-05, "loss": 0.0001, "step": 35316 }, { "epoch": 32.95, "learning_rate": 4.588152985074627e-05, "loss": 0.0001, "step": 35320 }, { "epoch": 32.95, "learning_rate": 4.5881063432835824e-05, "loss": 0.0001, "step": 35324 }, { "epoch": 32.96, "learning_rate": 4.588059701492537e-05, "loss": 0.0, "step": 35328 }, { "epoch": 32.96, "learning_rate": 4.588013059701493e-05, "loss": 0.001, "step": 35332 }, { "epoch": 32.96, "learning_rate": 4.587966417910448e-05, "loss": 0.0001, "step": 35336 }, { "epoch": 32.97, "learning_rate": 4.587919776119403e-05, "loss": 0.0001, "step": 35340 }, { "epoch": 32.97, "learning_rate": 4.5878731343283585e-05, "loss": 0.0001, "step": 35344 }, { "epoch": 32.97, "learning_rate": 4.587826492537314e-05, "loss": 0.0001, "step": 35348 }, { "epoch": 32.98, "learning_rate": 4.587779850746269e-05, "loss": 0.0, "step": 35352 }, { "epoch": 32.98, "learning_rate": 4.5877332089552236e-05, "loss": 0.0001, "step": 35356 }, { "epoch": 32.98, "learning_rate": 4.58768656716418e-05, "loss": 0.0001, "step": 35360 }, { "epoch": 32.99, "learning_rate": 4.5876399253731346e-05, "loss": 0.0, "step": 35364 }, { "epoch": 32.99, "learning_rate": 4.5875932835820894e-05, "loss": 0.0003, "step": 35368 }, { "epoch": 33.0, "learning_rate": 4.587546641791045e-05, "loss": 0.0008, "step": 35372 }, { "epoch": 33.0, "learning_rate": 4.5875000000000004e-05, "loss": 0.0001, "step": 35376 }, { "epoch": 33.0, "learning_rate": 4.587453358208956e-05, "loss": 0.0008, "step": 35380 }, { "epoch": 33.01, "learning_rate": 4.587406716417911e-05, "loss": 0.0006, "step": 35384 }, { "epoch": 33.01, "learning_rate": 4.5873600746268655e-05, "loss": 0.0, "step": 35388 }, { "epoch": 33.01, "learning_rate": 4.587313432835822e-05, "loss": 0.0007, "step": 35392 }, { "epoch": 33.02, "learning_rate": 4.5872667910447765e-05, "loss": 0.0001, "step": 35396 }, { "epoch": 33.02, "learning_rate": 4.587220149253731e-05, "loss": 0.0, "step": 35400 }, { "epoch": 33.03, "learning_rate": 4.587173507462687e-05, "loss": 0.0001, "step": 35404 }, { "epoch": 33.03, "learning_rate": 4.587126865671642e-05, "loss": 0.001, "step": 35408 }, { "epoch": 33.03, "learning_rate": 4.587080223880597e-05, "loss": 0.0012, "step": 35412 }, { "epoch": 33.04, "learning_rate": 4.5870335820895526e-05, "loss": 0.0008, "step": 35416 }, { "epoch": 33.04, "learning_rate": 4.586986940298508e-05, "loss": 0.0001, "step": 35420 }, { "epoch": 33.04, "learning_rate": 4.586940298507463e-05, "loss": 0.0, "step": 35424 }, { "epoch": 33.05, "learning_rate": 4.5868936567164184e-05, "loss": 0.0005, "step": 35428 }, { "epoch": 33.05, "learning_rate": 4.586847014925373e-05, "loss": 0.0, "step": 35432 }, { "epoch": 33.06, "learning_rate": 4.586800373134329e-05, "loss": 0.0001, "step": 35436 }, { "epoch": 33.06, "learning_rate": 4.586753731343284e-05, "loss": 0.0009, "step": 35440 }, { "epoch": 33.06, "learning_rate": 4.586707089552239e-05, "loss": 0.0002, "step": 35444 }, { "epoch": 33.07, "learning_rate": 4.586660447761194e-05, "loss": 0.0005, "step": 35448 }, { "epoch": 33.07, "learning_rate": 4.58661380597015e-05, "loss": 0.0005, "step": 35452 }, { "epoch": 33.07, "learning_rate": 4.586567164179105e-05, "loss": 0.0042, "step": 35456 }, { "epoch": 33.08, "learning_rate": 4.5865205223880596e-05, "loss": 0.0, "step": 35460 }, { "epoch": 33.08, "learning_rate": 4.586473880597015e-05, "loss": 0.0001, "step": 35464 }, { "epoch": 33.09, "learning_rate": 4.5864272388059706e-05, "loss": 0.0064, "step": 35468 }, { "epoch": 33.09, "learning_rate": 4.5863805970149254e-05, "loss": 0.0004, "step": 35472 }, { "epoch": 33.09, "learning_rate": 4.586333955223881e-05, "loss": 0.0001, "step": 35476 }, { "epoch": 33.1, "learning_rate": 4.586287313432836e-05, "loss": 0.0, "step": 35480 }, { "epoch": 33.1, "learning_rate": 4.586240671641791e-05, "loss": 0.0005, "step": 35484 }, { "epoch": 33.1, "learning_rate": 4.586194029850747e-05, "loss": 0.0002, "step": 35488 }, { "epoch": 33.11, "learning_rate": 4.5861473880597015e-05, "loss": 0.0002, "step": 35492 }, { "epoch": 33.11, "learning_rate": 4.586100746268657e-05, "loss": 0.0002, "step": 35496 }, { "epoch": 33.12, "learning_rate": 4.5860541044776125e-05, "loss": 0.0068, "step": 35500 }, { "epoch": 33.12, "eval_exact_match": 0.7379110251450677, "eval_exec": 0.758220502901354, "eval_loss": 0.4318758547306061, "eval_runtime": 1370.5932, "eval_samples_per_second": 0.754, "step": 35500 }, { "epoch": 33.12, "learning_rate": 4.586007462686567e-05, "loss": 0.0, "step": 35504 }, { "epoch": 33.12, "learning_rate": 4.585960820895522e-05, "loss": 0.0004, "step": 35508 }, { "epoch": 33.13, "learning_rate": 4.585914179104478e-05, "loss": 0.0012, "step": 35512 }, { "epoch": 33.13, "learning_rate": 4.585867537313433e-05, "loss": 0.0001, "step": 35516 }, { "epoch": 33.13, "learning_rate": 4.585820895522388e-05, "loss": 0.0006, "step": 35520 }, { "epoch": 33.14, "learning_rate": 4.5857742537313434e-05, "loss": 0.0001, "step": 35524 }, { "epoch": 33.14, "learning_rate": 4.585727611940299e-05, "loss": 0.0, "step": 35528 }, { "epoch": 33.15, "learning_rate": 4.585680970149254e-05, "loss": 0.0002, "step": 35532 }, { "epoch": 33.15, "learning_rate": 4.585634328358209e-05, "loss": 0.0001, "step": 35536 }, { "epoch": 33.15, "learning_rate": 4.585587686567164e-05, "loss": 0.0, "step": 35540 }, { "epoch": 33.16, "learning_rate": 4.58554104477612e-05, "loss": 0.0011, "step": 35544 }, { "epoch": 33.16, "learning_rate": 4.585494402985075e-05, "loss": 0.0001, "step": 35548 }, { "epoch": 33.16, "learning_rate": 4.58544776119403e-05, "loss": 0.0, "step": 35552 }, { "epoch": 33.17, "learning_rate": 4.585401119402985e-05, "loss": 0.0001, "step": 35556 }, { "epoch": 33.17, "learning_rate": 4.585354477611941e-05, "loss": 0.0, "step": 35560 }, { "epoch": 33.18, "learning_rate": 4.5853078358208956e-05, "loss": 0.0, "step": 35564 }, { "epoch": 33.18, "learning_rate": 4.585261194029851e-05, "loss": 0.0001, "step": 35568 }, { "epoch": 33.18, "learning_rate": 4.5852145522388065e-05, "loss": 0.0003, "step": 35572 }, { "epoch": 33.19, "learning_rate": 4.5851679104477614e-05, "loss": 0.0001, "step": 35576 }, { "epoch": 33.19, "learning_rate": 4.585121268656717e-05, "loss": 0.0, "step": 35580 }, { "epoch": 33.19, "learning_rate": 4.585074626865672e-05, "loss": 0.0004, "step": 35584 }, { "epoch": 33.2, "learning_rate": 4.585027985074627e-05, "loss": 0.0002, "step": 35588 }, { "epoch": 33.2, "learning_rate": 4.5849813432835826e-05, "loss": 0.0006, "step": 35592 }, { "epoch": 33.21, "learning_rate": 4.5849347014925375e-05, "loss": 0.0, "step": 35596 }, { "epoch": 33.21, "learning_rate": 4.584888059701492e-05, "loss": 0.0001, "step": 35600 }, { "epoch": 33.21, "learning_rate": 4.5848414179104484e-05, "loss": 0.0003, "step": 35604 }, { "epoch": 33.22, "learning_rate": 4.584794776119403e-05, "loss": 0.0001, "step": 35608 }, { "epoch": 33.22, "learning_rate": 4.584748134328358e-05, "loss": 0.001, "step": 35612 }, { "epoch": 33.22, "learning_rate": 4.5847014925373136e-05, "loss": 0.0, "step": 35616 }, { "epoch": 33.23, "learning_rate": 4.584654850746269e-05, "loss": 0.0016, "step": 35620 }, { "epoch": 33.23, "learning_rate": 4.584608208955224e-05, "loss": 0.0, "step": 35624 }, { "epoch": 33.24, "learning_rate": 4.5845615671641794e-05, "loss": 0.0, "step": 35628 }, { "epoch": 33.24, "learning_rate": 4.584514925373135e-05, "loss": 0.0001, "step": 35632 }, { "epoch": 33.24, "learning_rate": 4.5844682835820897e-05, "loss": 0.0, "step": 35636 }, { "epoch": 33.25, "learning_rate": 4.584421641791045e-05, "loss": 0.0, "step": 35640 }, { "epoch": 33.25, "learning_rate": 4.584375e-05, "loss": 0.0, "step": 35644 }, { "epoch": 33.25, "learning_rate": 4.5843283582089555e-05, "loss": 0.0017, "step": 35648 }, { "epoch": 33.26, "learning_rate": 4.584281716417911e-05, "loss": 0.0, "step": 35652 }, { "epoch": 33.26, "learning_rate": 4.584235074626866e-05, "loss": 0.0001, "step": 35656 }, { "epoch": 33.26, "learning_rate": 4.5841884328358206e-05, "loss": 0.0, "step": 35660 }, { "epoch": 33.27, "learning_rate": 4.584141791044777e-05, "loss": 0.0, "step": 35664 }, { "epoch": 33.27, "learning_rate": 4.5840951492537316e-05, "loss": 0.0, "step": 35668 }, { "epoch": 33.28, "learning_rate": 4.5840485074626864e-05, "loss": 0.0, "step": 35672 }, { "epoch": 33.28, "learning_rate": 4.584001865671642e-05, "loss": 0.0001, "step": 35676 }, { "epoch": 33.28, "learning_rate": 4.5839552238805973e-05, "loss": 0.0001, "step": 35680 }, { "epoch": 33.29, "learning_rate": 4.583908582089552e-05, "loss": 0.0, "step": 35684 }, { "epoch": 33.29, "learning_rate": 4.5838619402985077e-05, "loss": 0.0, "step": 35688 }, { "epoch": 33.29, "learning_rate": 4.583815298507463e-05, "loss": 0.0027, "step": 35692 }, { "epoch": 33.3, "learning_rate": 4.583768656716418e-05, "loss": 0.0003, "step": 35696 }, { "epoch": 33.3, "learning_rate": 4.5837220149253734e-05, "loss": 0.0032, "step": 35700 }, { "epoch": 33.31, "learning_rate": 4.583675373134328e-05, "loss": 0.0007, "step": 35704 }, { "epoch": 33.31, "learning_rate": 4.583628731343284e-05, "loss": 0.0005, "step": 35708 }, { "epoch": 33.31, "learning_rate": 4.583582089552239e-05, "loss": 0.0032, "step": 35712 }, { "epoch": 33.32, "learning_rate": 4.583535447761194e-05, "loss": 0.0002, "step": 35716 }, { "epoch": 33.32, "learning_rate": 4.5834888059701495e-05, "loss": 0.0027, "step": 35720 }, { "epoch": 33.32, "learning_rate": 4.583442164179105e-05, "loss": 0.0004, "step": 35724 }, { "epoch": 33.33, "learning_rate": 4.58339552238806e-05, "loss": 0.0, "step": 35728 }, { "epoch": 33.33, "learning_rate": 4.583348880597015e-05, "loss": 0.0, "step": 35732 }, { "epoch": 33.34, "learning_rate": 4.58330223880597e-05, "loss": 0.0001, "step": 35736 }, { "epoch": 33.34, "learning_rate": 4.5832555970149256e-05, "loss": 0.0, "step": 35740 }, { "epoch": 33.34, "learning_rate": 4.583208955223881e-05, "loss": 0.0023, "step": 35744 }, { "epoch": 33.35, "learning_rate": 4.583162313432836e-05, "loss": 0.0001, "step": 35748 }, { "epoch": 33.35, "learning_rate": 4.5831156716417914e-05, "loss": 0.0145, "step": 35752 }, { "epoch": 33.35, "learning_rate": 4.583069029850747e-05, "loss": 0.0028, "step": 35756 }, { "epoch": 33.36, "learning_rate": 4.583022388059702e-05, "loss": 0.0007, "step": 35760 }, { "epoch": 33.36, "learning_rate": 4.5829757462686566e-05, "loss": 0.0, "step": 35764 }, { "epoch": 33.37, "learning_rate": 4.582929104477612e-05, "loss": 0.0007, "step": 35768 }, { "epoch": 33.37, "learning_rate": 4.5828824626865675e-05, "loss": 0.0005, "step": 35772 }, { "epoch": 33.37, "learning_rate": 4.5828358208955223e-05, "loss": 0.0, "step": 35776 }, { "epoch": 33.38, "learning_rate": 4.582789179104478e-05, "loss": 0.001, "step": 35780 }, { "epoch": 33.38, "learning_rate": 4.582742537313433e-05, "loss": 0.0002, "step": 35784 }, { "epoch": 33.38, "learning_rate": 4.582695895522388e-05, "loss": 0.0001, "step": 35788 }, { "epoch": 33.39, "learning_rate": 4.5826492537313436e-05, "loss": 0.0011, "step": 35792 }, { "epoch": 33.39, "learning_rate": 4.5826026119402984e-05, "loss": 0.0091, "step": 35796 }, { "epoch": 33.4, "learning_rate": 4.582555970149254e-05, "loss": 0.0017, "step": 35800 }, { "epoch": 33.4, "learning_rate": 4.5825093283582094e-05, "loss": 0.0003, "step": 35804 }, { "epoch": 33.4, "learning_rate": 4.582462686567164e-05, "loss": 0.002, "step": 35808 }, { "epoch": 33.41, "learning_rate": 4.58241604477612e-05, "loss": 0.0002, "step": 35812 }, { "epoch": 33.41, "learning_rate": 4.582369402985075e-05, "loss": 0.0001, "step": 35816 }, { "epoch": 33.41, "learning_rate": 4.58232276119403e-05, "loss": 0.0098, "step": 35820 }, { "epoch": 33.42, "learning_rate": 4.582276119402985e-05, "loss": 0.0001, "step": 35824 }, { "epoch": 33.42, "learning_rate": 4.5822294776119403e-05, "loss": 0.0004, "step": 35828 }, { "epoch": 33.43, "learning_rate": 4.582182835820896e-05, "loss": 0.0001, "step": 35832 }, { "epoch": 33.43, "learning_rate": 4.5821361940298506e-05, "loss": 0.0001, "step": 35836 }, { "epoch": 33.43, "learning_rate": 4.582089552238806e-05, "loss": 0.0001, "step": 35840 }, { "epoch": 33.44, "learning_rate": 4.5820429104477616e-05, "loss": 0.0, "step": 35844 }, { "epoch": 33.44, "learning_rate": 4.5819962686567164e-05, "loss": 0.0018, "step": 35848 }, { "epoch": 33.44, "learning_rate": 4.581949626865672e-05, "loss": 0.0005, "step": 35852 }, { "epoch": 33.45, "learning_rate": 4.581902985074627e-05, "loss": 0.0002, "step": 35856 }, { "epoch": 33.45, "learning_rate": 4.581856343283582e-05, "loss": 0.0006, "step": 35860 }, { "epoch": 33.46, "learning_rate": 4.581809701492538e-05, "loss": 0.0001, "step": 35864 }, { "epoch": 33.46, "learning_rate": 4.5817630597014925e-05, "loss": 0.0001, "step": 35868 }, { "epoch": 33.46, "learning_rate": 4.581716417910448e-05, "loss": 0.0001, "step": 35872 }, { "epoch": 33.47, "learning_rate": 4.5816697761194035e-05, "loss": 0.0001, "step": 35876 }, { "epoch": 33.47, "learning_rate": 4.581623134328358e-05, "loss": 0.0007, "step": 35880 }, { "epoch": 33.47, "learning_rate": 4.581576492537314e-05, "loss": 0.0047, "step": 35884 }, { "epoch": 33.48, "learning_rate": 4.5815298507462686e-05, "loss": 0.0004, "step": 35888 }, { "epoch": 33.48, "learning_rate": 4.581483208955224e-05, "loss": 0.0001, "step": 35892 }, { "epoch": 33.49, "learning_rate": 4.5814365671641796e-05, "loss": 0.0003, "step": 35896 }, { "epoch": 33.49, "learning_rate": 4.5813899253731344e-05, "loss": 0.0047, "step": 35900 }, { "epoch": 33.49, "learning_rate": 4.58134328358209e-05, "loss": 0.0012, "step": 35904 }, { "epoch": 33.5, "learning_rate": 4.5812966417910454e-05, "loss": 0.0042, "step": 35908 }, { "epoch": 33.5, "learning_rate": 4.58125e-05, "loss": 0.0, "step": 35912 }, { "epoch": 33.5, "learning_rate": 4.581203358208955e-05, "loss": 0.0256, "step": 35916 }, { "epoch": 33.51, "learning_rate": 4.581156716417911e-05, "loss": 0.0033, "step": 35920 }, { "epoch": 33.51, "learning_rate": 4.581110074626866e-05, "loss": 0.0001, "step": 35924 }, { "epoch": 33.51, "learning_rate": 4.581063432835821e-05, "loss": 0.0015, "step": 35928 }, { "epoch": 33.52, "learning_rate": 4.581016791044776e-05, "loss": 0.0003, "step": 35932 }, { "epoch": 33.52, "learning_rate": 4.580970149253732e-05, "loss": 0.0001, "step": 35936 }, { "epoch": 33.53, "learning_rate": 4.5809235074626866e-05, "loss": 0.0003, "step": 35940 }, { "epoch": 33.53, "learning_rate": 4.580876865671642e-05, "loss": 0.0, "step": 35944 }, { "epoch": 33.53, "learning_rate": 4.580830223880597e-05, "loss": 0.0015, "step": 35948 }, { "epoch": 33.54, "learning_rate": 4.5807835820895524e-05, "loss": 0.0002, "step": 35952 }, { "epoch": 33.54, "learning_rate": 4.580736940298508e-05, "loss": 0.0005, "step": 35956 }, { "epoch": 33.54, "learning_rate": 4.580690298507463e-05, "loss": 0.0001, "step": 35960 }, { "epoch": 33.55, "learning_rate": 4.580643656716418e-05, "loss": 0.0033, "step": 35964 }, { "epoch": 33.55, "learning_rate": 4.580597014925374e-05, "loss": 0.0027, "step": 35968 }, { "epoch": 33.56, "learning_rate": 4.5805503731343285e-05, "loss": 0.0024, "step": 35972 }, { "epoch": 33.56, "learning_rate": 4.580503731343283e-05, "loss": 0.0041, "step": 35976 }, { "epoch": 33.56, "learning_rate": 4.5804570895522395e-05, "loss": 0.0017, "step": 35980 }, { "epoch": 33.57, "learning_rate": 4.580410447761194e-05, "loss": 0.0017, "step": 35984 }, { "epoch": 33.57, "learning_rate": 4.580363805970149e-05, "loss": 0.0001, "step": 35988 }, { "epoch": 33.57, "learning_rate": 4.5803171641791046e-05, "loss": 0.0015, "step": 35992 }, { "epoch": 33.58, "learning_rate": 4.58027052238806e-05, "loss": 0.0001, "step": 35996 }, { "epoch": 33.58, "learning_rate": 4.580223880597015e-05, "loss": 0.0006, "step": 36000 }, { "epoch": 33.58, "eval_exact_match": 0.723404255319149, "eval_exec": 0.7630560928433269, "eval_loss": 0.4088174104690552, "eval_runtime": 1192.806, "eval_samples_per_second": 0.867, "step": 36000 }, { "epoch": 33.59, "learning_rate": 4.5801772388059704e-05, "loss": 0.0, "step": 36004 }, { "epoch": 33.59, "learning_rate": 4.580130597014925e-05, "loss": 0.0005, "step": 36008 }, { "epoch": 33.59, "learning_rate": 4.580083955223881e-05, "loss": 0.0001, "step": 36012 }, { "epoch": 33.6, "learning_rate": 4.580037313432836e-05, "loss": 0.0003, "step": 36016 }, { "epoch": 33.6, "learning_rate": 4.579990671641791e-05, "loss": 0.0, "step": 36020 }, { "epoch": 33.6, "learning_rate": 4.5799440298507465e-05, "loss": 0.0, "step": 36024 }, { "epoch": 33.61, "learning_rate": 4.579897388059702e-05, "loss": 0.0002, "step": 36028 }, { "epoch": 33.61, "learning_rate": 4.579850746268657e-05, "loss": 0.0064, "step": 36032 }, { "epoch": 33.62, "learning_rate": 4.579804104477612e-05, "loss": 0.0014, "step": 36036 }, { "epoch": 33.62, "learning_rate": 4.579757462686568e-05, "loss": 0.0011, "step": 36040 }, { "epoch": 33.62, "learning_rate": 4.5797108208955226e-05, "loss": 0.001, "step": 36044 }, { "epoch": 33.63, "learning_rate": 4.579664179104478e-05, "loss": 0.0271, "step": 36048 }, { "epoch": 33.63, "learning_rate": 4.579617537313433e-05, "loss": 0.0015, "step": 36052 }, { "epoch": 33.63, "learning_rate": 4.5795708955223884e-05, "loss": 0.0014, "step": 36056 }, { "epoch": 33.64, "learning_rate": 4.579524253731344e-05, "loss": 0.0008, "step": 36060 }, { "epoch": 33.64, "learning_rate": 4.579477611940299e-05, "loss": 0.0, "step": 36064 }, { "epoch": 33.65, "learning_rate": 4.5794309701492535e-05, "loss": 0.0001, "step": 36068 }, { "epoch": 33.65, "learning_rate": 4.57938432835821e-05, "loss": 0.0007, "step": 36072 }, { "epoch": 33.65, "learning_rate": 4.5793376865671645e-05, "loss": 0.0004, "step": 36076 }, { "epoch": 33.66, "learning_rate": 4.579291044776119e-05, "loss": 0.0001, "step": 36080 }, { "epoch": 33.66, "learning_rate": 4.579244402985075e-05, "loss": 0.0001, "step": 36084 }, { "epoch": 33.66, "learning_rate": 4.57919776119403e-05, "loss": 0.0002, "step": 36088 }, { "epoch": 33.67, "learning_rate": 4.579151119402985e-05, "loss": 0.0001, "step": 36092 }, { "epoch": 33.67, "learning_rate": 4.5791044776119406e-05, "loss": 0.0001, "step": 36096 }, { "epoch": 33.68, "learning_rate": 4.579057835820896e-05, "loss": 0.0032, "step": 36100 }, { "epoch": 33.68, "learning_rate": 4.579011194029851e-05, "loss": 0.0002, "step": 36104 }, { "epoch": 33.68, "learning_rate": 4.5789645522388064e-05, "loss": 0.0025, "step": 36108 }, { "epoch": 33.69, "learning_rate": 4.578917910447761e-05, "loss": 0.0007, "step": 36112 }, { "epoch": 33.69, "learning_rate": 4.578871268656717e-05, "loss": 0.0001, "step": 36116 }, { "epoch": 33.69, "learning_rate": 4.578824626865672e-05, "loss": 0.0003, "step": 36120 }, { "epoch": 33.7, "learning_rate": 4.578777985074627e-05, "loss": 0.001, "step": 36124 }, { "epoch": 33.7, "learning_rate": 4.578731343283582e-05, "loss": 0.0001, "step": 36128 }, { "epoch": 33.71, "learning_rate": 4.578684701492538e-05, "loss": 0.0, "step": 36132 }, { "epoch": 33.71, "learning_rate": 4.578638059701493e-05, "loss": 0.0002, "step": 36136 }, { "epoch": 33.71, "learning_rate": 4.5785914179104476e-05, "loss": 0.0029, "step": 36140 }, { "epoch": 33.72, "learning_rate": 4.578544776119403e-05, "loss": 0.0002, "step": 36144 }, { "epoch": 33.72, "learning_rate": 4.5784981343283586e-05, "loss": 0.0, "step": 36148 }, { "epoch": 33.72, "learning_rate": 4.5784514925373134e-05, "loss": 0.0, "step": 36152 }, { "epoch": 33.73, "learning_rate": 4.578404850746269e-05, "loss": 0.0066, "step": 36156 }, { "epoch": 33.73, "learning_rate": 4.578358208955224e-05, "loss": 0.0017, "step": 36160 }, { "epoch": 33.73, "learning_rate": 4.578311567164179e-05, "loss": 0.0028, "step": 36164 }, { "epoch": 33.74, "learning_rate": 4.578264925373135e-05, "loss": 0.0022, "step": 36168 }, { "epoch": 33.74, "learning_rate": 4.5782182835820895e-05, "loss": 0.0001, "step": 36172 }, { "epoch": 33.75, "learning_rate": 4.578171641791045e-05, "loss": 0.0001, "step": 36176 }, { "epoch": 33.75, "learning_rate": 4.5781250000000005e-05, "loss": 0.0001, "step": 36180 }, { "epoch": 33.75, "learning_rate": 4.578078358208955e-05, "loss": 0.0003, "step": 36184 }, { "epoch": 33.76, "learning_rate": 4.57803171641791e-05, "loss": 0.0, "step": 36188 }, { "epoch": 33.76, "learning_rate": 4.577985074626866e-05, "loss": 0.0001, "step": 36192 }, { "epoch": 33.76, "learning_rate": 4.577938432835821e-05, "loss": 0.0005, "step": 36196 }, { "epoch": 33.77, "learning_rate": 4.5778917910447766e-05, "loss": 0.0019, "step": 36200 }, { "epoch": 33.77, "learning_rate": 4.5778451492537314e-05, "loss": 0.0, "step": 36204 }, { "epoch": 33.78, "learning_rate": 4.577798507462687e-05, "loss": 0.0004, "step": 36208 }, { "epoch": 33.78, "learning_rate": 4.5777518656716424e-05, "loss": 0.0003, "step": 36212 }, { "epoch": 33.78, "learning_rate": 4.577705223880597e-05, "loss": 0.0003, "step": 36216 }, { "epoch": 33.79, "learning_rate": 4.577658582089552e-05, "loss": 0.0013, "step": 36220 }, { "epoch": 33.79, "learning_rate": 4.577611940298508e-05, "loss": 0.0005, "step": 36224 }, { "epoch": 33.79, "learning_rate": 4.577565298507463e-05, "loss": 0.0002, "step": 36228 }, { "epoch": 33.8, "learning_rate": 4.577518656716418e-05, "loss": 0.0006, "step": 36232 }, { "epoch": 33.8, "learning_rate": 4.577472014925373e-05, "loss": 0.0065, "step": 36236 }, { "epoch": 33.81, "learning_rate": 4.577425373134329e-05, "loss": 0.0, "step": 36240 }, { "epoch": 33.81, "learning_rate": 4.5773787313432836e-05, "loss": 0.0014, "step": 36244 }, { "epoch": 33.81, "learning_rate": 4.577332089552239e-05, "loss": 0.0, "step": 36248 }, { "epoch": 33.82, "learning_rate": 4.5772854477611946e-05, "loss": 0.0003, "step": 36252 }, { "epoch": 33.82, "learning_rate": 4.5772388059701494e-05, "loss": 0.0, "step": 36256 }, { "epoch": 33.82, "learning_rate": 4.577192164179105e-05, "loss": 0.0006, "step": 36260 }, { "epoch": 33.83, "learning_rate": 4.57714552238806e-05, "loss": 0.0001, "step": 36264 }, { "epoch": 33.83, "learning_rate": 4.577098880597015e-05, "loss": 0.0007, "step": 36268 }, { "epoch": 33.84, "learning_rate": 4.577052238805971e-05, "loss": 0.0001, "step": 36272 }, { "epoch": 33.84, "learning_rate": 4.5770055970149255e-05, "loss": 0.0029, "step": 36276 }, { "epoch": 33.84, "learning_rate": 4.57695895522388e-05, "loss": 0.0002, "step": 36280 }, { "epoch": 33.85, "learning_rate": 4.5769123134328365e-05, "loss": 0.0001, "step": 36284 }, { "epoch": 33.85, "learning_rate": 4.576865671641791e-05, "loss": 0.0002, "step": 36288 }, { "epoch": 33.85, "learning_rate": 4.576819029850746e-05, "loss": 0.0014, "step": 36292 }, { "epoch": 33.86, "learning_rate": 4.5767723880597016e-05, "loss": 0.0, "step": 36296 }, { "epoch": 33.86, "learning_rate": 4.576725746268657e-05, "loss": 0.0003, "step": 36300 }, { "epoch": 33.87, "learning_rate": 4.576679104477612e-05, "loss": 0.0002, "step": 36304 }, { "epoch": 33.87, "learning_rate": 4.5766324626865674e-05, "loss": 0.0005, "step": 36308 }, { "epoch": 33.87, "learning_rate": 4.576585820895523e-05, "loss": 0.0, "step": 36312 }, { "epoch": 33.88, "learning_rate": 4.576539179104478e-05, "loss": 0.0, "step": 36316 }, { "epoch": 33.88, "learning_rate": 4.576492537313433e-05, "loss": 0.0026, "step": 36320 }, { "epoch": 33.88, "learning_rate": 4.576445895522388e-05, "loss": 0.0037, "step": 36324 }, { "epoch": 33.89, "learning_rate": 4.5763992537313435e-05, "loss": 0.0001, "step": 36328 }, { "epoch": 33.89, "learning_rate": 4.576352611940299e-05, "loss": 0.0001, "step": 36332 }, { "epoch": 33.9, "learning_rate": 4.576305970149254e-05, "loss": 0.0008, "step": 36336 }, { "epoch": 33.9, "learning_rate": 4.5762593283582086e-05, "loss": 0.0, "step": 36340 }, { "epoch": 33.9, "learning_rate": 4.576212686567165e-05, "loss": 0.0, "step": 36344 }, { "epoch": 33.91, "learning_rate": 4.5761660447761196e-05, "loss": 0.0, "step": 36348 }, { "epoch": 33.91, "learning_rate": 4.5761194029850744e-05, "loss": 0.0001, "step": 36352 }, { "epoch": 33.91, "learning_rate": 4.57607276119403e-05, "loss": 0.0002, "step": 36356 }, { "epoch": 33.92, "learning_rate": 4.5760261194029854e-05, "loss": 0.0018, "step": 36360 }, { "epoch": 33.92, "learning_rate": 4.575979477611941e-05, "loss": 0.0018, "step": 36364 }, { "epoch": 33.93, "learning_rate": 4.575932835820896e-05, "loss": 0.0017, "step": 36368 }, { "epoch": 33.93, "learning_rate": 4.575886194029851e-05, "loss": 0.0, "step": 36372 }, { "epoch": 33.93, "learning_rate": 4.5758395522388067e-05, "loss": 0.0001, "step": 36376 }, { "epoch": 33.94, "learning_rate": 4.5757929104477615e-05, "loss": 0.0009, "step": 36380 }, { "epoch": 33.94, "learning_rate": 4.575746268656716e-05, "loss": 0.0002, "step": 36384 }, { "epoch": 33.94, "learning_rate": 4.575699626865672e-05, "loss": 0.0, "step": 36388 }, { "epoch": 33.95, "learning_rate": 4.575652985074627e-05, "loss": 0.0055, "step": 36392 }, { "epoch": 33.95, "learning_rate": 4.575606343283582e-05, "loss": 0.0003, "step": 36396 }, { "epoch": 33.96, "learning_rate": 4.5755597014925376e-05, "loss": 0.0001, "step": 36400 }, { "epoch": 33.96, "learning_rate": 4.575513059701493e-05, "loss": 0.0001, "step": 36404 }, { "epoch": 33.96, "learning_rate": 4.575466417910448e-05, "loss": 0.001, "step": 36408 }, { "epoch": 33.97, "learning_rate": 4.5754197761194034e-05, "loss": 0.0001, "step": 36412 }, { "epoch": 33.97, "learning_rate": 4.575373134328358e-05, "loss": 0.0, "step": 36416 }, { "epoch": 33.97, "learning_rate": 4.575326492537314e-05, "loss": 0.0, "step": 36420 }, { "epoch": 33.98, "learning_rate": 4.575279850746269e-05, "loss": 0.0009, "step": 36424 }, { "epoch": 33.98, "learning_rate": 4.575233208955224e-05, "loss": 0.0045, "step": 36428 }, { "epoch": 33.98, "learning_rate": 4.5751865671641795e-05, "loss": 0.0003, "step": 36432 }, { "epoch": 33.99, "learning_rate": 4.575139925373135e-05, "loss": 0.0, "step": 36436 }, { "epoch": 33.99, "learning_rate": 4.57509328358209e-05, "loss": 0.0, "step": 36440 }, { "epoch": 34.0, "learning_rate": 4.5750466417910446e-05, "loss": 0.0079, "step": 36444 }, { "epoch": 34.0, "learning_rate": 4.575e-05, "loss": 0.0003, "step": 36448 }, { "epoch": 34.0, "learning_rate": 4.5749533582089556e-05, "loss": 0.0001, "step": 36452 }, { "epoch": 34.01, "learning_rate": 4.5749067164179104e-05, "loss": 0.0001, "step": 36456 }, { "epoch": 34.01, "learning_rate": 4.574860074626866e-05, "loss": 0.0002, "step": 36460 }, { "epoch": 34.01, "learning_rate": 4.5748134328358214e-05, "loss": 0.0031, "step": 36464 }, { "epoch": 34.02, "learning_rate": 4.574766791044776e-05, "loss": 0.0002, "step": 36468 }, { "epoch": 34.02, "learning_rate": 4.574720149253732e-05, "loss": 0.0013, "step": 36472 }, { "epoch": 34.03, "learning_rate": 4.5746735074626865e-05, "loss": 0.0006, "step": 36476 }, { "epoch": 34.03, "learning_rate": 4.574626865671642e-05, "loss": 0.0052, "step": 36480 }, { "epoch": 34.03, "learning_rate": 4.5745802238805975e-05, "loss": 0.001, "step": 36484 }, { "epoch": 34.04, "learning_rate": 4.574533582089552e-05, "loss": 0.0001, "step": 36488 }, { "epoch": 34.04, "learning_rate": 4.574486940298508e-05, "loss": 0.0011, "step": 36492 }, { "epoch": 34.04, "learning_rate": 4.574440298507463e-05, "loss": 0.0001, "step": 36496 }, { "epoch": 34.05, "learning_rate": 4.574393656716418e-05, "loss": 0.0, "step": 36500 }, { "epoch": 34.05, "eval_exact_match": 0.730174081237911, "eval_exec": 0.7659574468085106, "eval_loss": 0.4040814936161041, "eval_runtime": 1222.2246, "eval_samples_per_second": 0.846, "step": 36500 }, { "epoch": 34.05, "learning_rate": 4.574347014925373e-05, "loss": 0.0004, "step": 36504 }, { "epoch": 34.06, "learning_rate": 4.5743003731343284e-05, "loss": 0.0036, "step": 36508 }, { "epoch": 34.06, "learning_rate": 4.574253731343284e-05, "loss": 0.0008, "step": 36512 }, { "epoch": 34.06, "learning_rate": 4.574207089552239e-05, "loss": 0.0002, "step": 36516 }, { "epoch": 34.07, "learning_rate": 4.574160447761194e-05, "loss": 0.0006, "step": 36520 }, { "epoch": 34.07, "learning_rate": 4.5741138059701497e-05, "loss": 0.0, "step": 36524 }, { "epoch": 34.07, "learning_rate": 4.574067164179105e-05, "loss": 0.0017, "step": 36528 }, { "epoch": 34.08, "learning_rate": 4.57402052238806e-05, "loss": 0.0001, "step": 36532 }, { "epoch": 34.08, "learning_rate": 4.573973880597015e-05, "loss": 0.0001, "step": 36536 }, { "epoch": 34.09, "learning_rate": 4.573927238805971e-05, "loss": 0.0003, "step": 36540 }, { "epoch": 34.09, "learning_rate": 4.573880597014926e-05, "loss": 0.0013, "step": 36544 }, { "epoch": 34.09, "learning_rate": 4.5738339552238806e-05, "loss": 0.0003, "step": 36548 }, { "epoch": 34.1, "learning_rate": 4.573787313432836e-05, "loss": 0.0004, "step": 36552 }, { "epoch": 34.1, "learning_rate": 4.5737406716417915e-05, "loss": 0.0003, "step": 36556 }, { "epoch": 34.1, "learning_rate": 4.5736940298507464e-05, "loss": 0.0016, "step": 36560 }, { "epoch": 34.11, "learning_rate": 4.573647388059702e-05, "loss": 0.0024, "step": 36564 }, { "epoch": 34.11, "learning_rate": 4.573600746268657e-05, "loss": 0.0003, "step": 36568 }, { "epoch": 34.12, "learning_rate": 4.573554104477612e-05, "loss": 0.0001, "step": 36572 }, { "epoch": 34.12, "learning_rate": 4.5735074626865676e-05, "loss": 0.0005, "step": 36576 }, { "epoch": 34.12, "learning_rate": 4.5734608208955225e-05, "loss": 0.0008, "step": 36580 }, { "epoch": 34.13, "learning_rate": 4.573414179104478e-05, "loss": 0.0001, "step": 36584 }, { "epoch": 34.13, "learning_rate": 4.5733675373134334e-05, "loss": 0.013, "step": 36588 }, { "epoch": 34.13, "learning_rate": 4.573320895522388e-05, "loss": 0.0001, "step": 36592 }, { "epoch": 34.14, "learning_rate": 4.573274253731343e-05, "loss": 0.0001, "step": 36596 }, { "epoch": 34.14, "learning_rate": 4.573227611940299e-05, "loss": 0.0003, "step": 36600 }, { "epoch": 34.15, "learning_rate": 4.573180970149254e-05, "loss": 0.0001, "step": 36604 }, { "epoch": 34.15, "learning_rate": 4.573134328358209e-05, "loss": 0.0, "step": 36608 }, { "epoch": 34.15, "learning_rate": 4.5730876865671644e-05, "loss": 0.0001, "step": 36612 }, { "epoch": 34.16, "learning_rate": 4.57304104477612e-05, "loss": 0.002, "step": 36616 }, { "epoch": 34.16, "learning_rate": 4.5729944029850747e-05, "loss": 0.0002, "step": 36620 }, { "epoch": 34.16, "learning_rate": 4.57294776119403e-05, "loss": 0.0003, "step": 36624 }, { "epoch": 34.17, "learning_rate": 4.572901119402985e-05, "loss": 0.0003, "step": 36628 }, { "epoch": 34.17, "learning_rate": 4.5728544776119405e-05, "loss": 0.0001, "step": 36632 }, { "epoch": 34.18, "learning_rate": 4.572807835820896e-05, "loss": 0.0002, "step": 36636 }, { "epoch": 34.18, "learning_rate": 4.572761194029851e-05, "loss": 0.0001, "step": 36640 }, { "epoch": 34.18, "learning_rate": 4.572714552238806e-05, "loss": 0.0015, "step": 36644 }, { "epoch": 34.19, "learning_rate": 4.572667910447762e-05, "loss": 0.0003, "step": 36648 }, { "epoch": 34.19, "learning_rate": 4.5726212686567165e-05, "loss": 0.0016, "step": 36652 }, { "epoch": 34.19, "learning_rate": 4.5725746268656714e-05, "loss": 0.0003, "step": 36656 }, { "epoch": 34.2, "learning_rate": 4.5725279850746275e-05, "loss": 0.0002, "step": 36660 }, { "epoch": 34.2, "learning_rate": 4.5724813432835823e-05, "loss": 0.0, "step": 36664 }, { "epoch": 34.21, "learning_rate": 4.572434701492537e-05, "loss": 0.0001, "step": 36668 }, { "epoch": 34.21, "learning_rate": 4.5723880597014926e-05, "loss": 0.0001, "step": 36672 }, { "epoch": 34.21, "learning_rate": 4.572341417910448e-05, "loss": 0.0003, "step": 36676 }, { "epoch": 34.22, "learning_rate": 4.572294776119403e-05, "loss": 0.0001, "step": 36680 }, { "epoch": 34.22, "learning_rate": 4.5722481343283584e-05, "loss": 0.0002, "step": 36684 }, { "epoch": 34.22, "learning_rate": 4.572201492537313e-05, "loss": 0.0016, "step": 36688 }, { "epoch": 34.23, "learning_rate": 4.5721548507462694e-05, "loss": 0.0011, "step": 36692 }, { "epoch": 34.23, "learning_rate": 4.572108208955224e-05, "loss": 0.0, "step": 36696 }, { "epoch": 34.24, "learning_rate": 4.572061567164179e-05, "loss": 0.0001, "step": 36700 }, { "epoch": 34.24, "learning_rate": 4.5720149253731345e-05, "loss": 0.0, "step": 36704 }, { "epoch": 34.24, "learning_rate": 4.57196828358209e-05, "loss": 0.0, "step": 36708 }, { "epoch": 34.25, "learning_rate": 4.571921641791045e-05, "loss": 0.0, "step": 36712 }, { "epoch": 34.25, "learning_rate": 4.571875e-05, "loss": 0.0022, "step": 36716 }, { "epoch": 34.25, "learning_rate": 4.571828358208956e-05, "loss": 0.0002, "step": 36720 }, { "epoch": 34.26, "learning_rate": 4.5717817164179106e-05, "loss": 0.0001, "step": 36724 }, { "epoch": 34.26, "learning_rate": 4.571735074626866e-05, "loss": 0.0001, "step": 36728 }, { "epoch": 34.26, "learning_rate": 4.571688432835821e-05, "loss": 0.0064, "step": 36732 }, { "epoch": 34.27, "learning_rate": 4.5716417910447764e-05, "loss": 0.0001, "step": 36736 }, { "epoch": 34.27, "learning_rate": 4.571595149253732e-05, "loss": 0.0001, "step": 36740 }, { "epoch": 34.28, "learning_rate": 4.571548507462687e-05, "loss": 0.0001, "step": 36744 }, { "epoch": 34.28, "learning_rate": 4.5715018656716416e-05, "loss": 0.0001, "step": 36748 }, { "epoch": 34.28, "learning_rate": 4.571455223880598e-05, "loss": 0.0, "step": 36752 }, { "epoch": 34.29, "learning_rate": 4.5714085820895525e-05, "loss": 0.012, "step": 36756 }, { "epoch": 34.29, "learning_rate": 4.5713619402985073e-05, "loss": 0.0025, "step": 36760 }, { "epoch": 34.29, "learning_rate": 4.571315298507463e-05, "loss": 0.0001, "step": 36764 }, { "epoch": 34.3, "learning_rate": 4.571268656716418e-05, "loss": 0.0016, "step": 36768 }, { "epoch": 34.3, "learning_rate": 4.571222014925373e-05, "loss": 0.0001, "step": 36772 }, { "epoch": 34.31, "learning_rate": 4.5711753731343286e-05, "loss": 0.0007, "step": 36776 }, { "epoch": 34.31, "learning_rate": 4.571128731343284e-05, "loss": 0.0011, "step": 36780 }, { "epoch": 34.31, "learning_rate": 4.571082089552239e-05, "loss": 0.0, "step": 36784 }, { "epoch": 34.32, "learning_rate": 4.5710354477611944e-05, "loss": 0.0008, "step": 36788 }, { "epoch": 34.32, "learning_rate": 4.570988805970149e-05, "loss": 0.002, "step": 36792 }, { "epoch": 34.32, "learning_rate": 4.570942164179105e-05, "loss": 0.0001, "step": 36796 }, { "epoch": 34.33, "learning_rate": 4.57089552238806e-05, "loss": 0.0, "step": 36800 }, { "epoch": 34.33, "learning_rate": 4.570848880597015e-05, "loss": 0.0001, "step": 36804 }, { "epoch": 34.34, "learning_rate": 4.57080223880597e-05, "loss": 0.0005, "step": 36808 }, { "epoch": 34.34, "learning_rate": 4.570755597014926e-05, "loss": 0.0002, "step": 36812 }, { "epoch": 34.34, "learning_rate": 4.570708955223881e-05, "loss": 0.0001, "step": 36816 }, { "epoch": 34.35, "learning_rate": 4.5706623134328356e-05, "loss": 0.0001, "step": 36820 }, { "epoch": 34.35, "learning_rate": 4.570615671641791e-05, "loss": 0.0008, "step": 36824 }, { "epoch": 34.35, "learning_rate": 4.5705690298507466e-05, "loss": 0.0006, "step": 36828 }, { "epoch": 34.36, "learning_rate": 4.5705223880597014e-05, "loss": 0.0, "step": 36832 }, { "epoch": 34.36, "learning_rate": 4.570475746268657e-05, "loss": 0.0003, "step": 36836 }, { "epoch": 34.37, "learning_rate": 4.570429104477612e-05, "loss": 0.0036, "step": 36840 }, { "epoch": 34.37, "learning_rate": 4.570382462686567e-05, "loss": 0.0, "step": 36844 }, { "epoch": 34.37, "learning_rate": 4.570335820895523e-05, "loss": 0.0, "step": 36848 }, { "epoch": 34.38, "learning_rate": 4.5702891791044775e-05, "loss": 0.0, "step": 36852 }, { "epoch": 34.38, "learning_rate": 4.570242537313433e-05, "loss": 0.0, "step": 36856 }, { "epoch": 34.38, "learning_rate": 4.5701958955223885e-05, "loss": 0.001, "step": 36860 }, { "epoch": 34.39, "learning_rate": 4.570149253731343e-05, "loss": 0.0139, "step": 36864 }, { "epoch": 34.39, "learning_rate": 4.570102611940299e-05, "loss": 0.0001, "step": 36868 }, { "epoch": 34.4, "learning_rate": 4.570055970149254e-05, "loss": 0.0005, "step": 36872 }, { "epoch": 34.4, "learning_rate": 4.570009328358209e-05, "loss": 0.0002, "step": 36876 }, { "epoch": 34.4, "learning_rate": 4.5699626865671646e-05, "loss": 0.0008, "step": 36880 }, { "epoch": 34.41, "learning_rate": 4.5699160447761194e-05, "loss": 0.0003, "step": 36884 }, { "epoch": 34.41, "learning_rate": 4.569869402985075e-05, "loss": 0.0, "step": 36888 }, { "epoch": 34.41, "learning_rate": 4.5698227611940304e-05, "loss": 0.0003, "step": 36892 }, { "epoch": 34.42, "learning_rate": 4.569776119402985e-05, "loss": 0.0001, "step": 36896 }, { "epoch": 34.42, "learning_rate": 4.56972947761194e-05, "loss": 0.0002, "step": 36900 }, { "epoch": 34.43, "learning_rate": 4.569682835820896e-05, "loss": 0.0035, "step": 36904 }, { "epoch": 34.43, "learning_rate": 4.569636194029851e-05, "loss": 0.0023, "step": 36908 }, { "epoch": 34.43, "learning_rate": 4.569589552238806e-05, "loss": 0.0009, "step": 36912 }, { "epoch": 34.44, "learning_rate": 4.569542910447761e-05, "loss": 0.0073, "step": 36916 }, { "epoch": 34.44, "learning_rate": 4.569496268656717e-05, "loss": 0.0006, "step": 36920 }, { "epoch": 34.44, "learning_rate": 4.5694496268656716e-05, "loss": 0.0004, "step": 36924 }, { "epoch": 34.45, "learning_rate": 4.569402985074627e-05, "loss": 0.0003, "step": 36928 }, { "epoch": 34.45, "learning_rate": 4.5693563432835826e-05, "loss": 0.0001, "step": 36932 }, { "epoch": 34.46, "learning_rate": 4.5693097014925374e-05, "loss": 0.0001, "step": 36936 }, { "epoch": 34.46, "learning_rate": 4.569263059701493e-05, "loss": 0.0029, "step": 36940 }, { "epoch": 34.46, "learning_rate": 4.569216417910448e-05, "loss": 0.0003, "step": 36944 }, { "epoch": 34.47, "learning_rate": 4.569169776119403e-05, "loss": 0.0005, "step": 36948 }, { "epoch": 34.47, "learning_rate": 4.569123134328359e-05, "loss": 0.001, "step": 36952 }, { "epoch": 34.47, "learning_rate": 4.5690764925373135e-05, "loss": 0.0001, "step": 36956 }, { "epoch": 34.48, "learning_rate": 4.569029850746268e-05, "loss": 0.0004, "step": 36960 }, { "epoch": 34.48, "learning_rate": 4.5689832089552245e-05, "loss": 0.0003, "step": 36964 }, { "epoch": 34.49, "learning_rate": 4.568936567164179e-05, "loss": 0.0001, "step": 36968 }, { "epoch": 34.49, "learning_rate": 4.568889925373134e-05, "loss": 0.0025, "step": 36972 }, { "epoch": 34.49, "learning_rate": 4.5688432835820896e-05, "loss": 0.0008, "step": 36976 }, { "epoch": 34.5, "learning_rate": 4.568796641791045e-05, "loss": 0.0018, "step": 36980 }, { "epoch": 34.5, "learning_rate": 4.56875e-05, "loss": 0.0023, "step": 36984 }, { "epoch": 34.5, "learning_rate": 4.5687033582089554e-05, "loss": 0.0002, "step": 36988 }, { "epoch": 34.51, "learning_rate": 4.568656716417911e-05, "loss": 0.0002, "step": 36992 }, { "epoch": 34.51, "learning_rate": 4.568610074626866e-05, "loss": 0.0044, "step": 36996 }, { "epoch": 34.51, "learning_rate": 4.568563432835821e-05, "loss": 0.0001, "step": 37000 }, { "epoch": 34.51, "eval_exact_match": 0.7504835589941973, "eval_exec": 0.7794970986460348, "eval_loss": 0.40531110763549805, "eval_runtime": 1164.9382, "eval_samples_per_second": 0.888, "step": 37000 }, { "epoch": 34.52, "learning_rate": 4.568516791044776e-05, "loss": 0.0, "step": 37004 }, { "epoch": 34.52, "learning_rate": 4.5684701492537315e-05, "loss": 0.0019, "step": 37008 }, { "epoch": 34.53, "learning_rate": 4.568423507462687e-05, "loss": 0.0003, "step": 37012 }, { "epoch": 34.53, "learning_rate": 4.568376865671642e-05, "loss": 0.0016, "step": 37016 }, { "epoch": 34.53, "learning_rate": 4.568330223880597e-05, "loss": 0.0001, "step": 37020 }, { "epoch": 34.54, "learning_rate": 4.568283582089553e-05, "loss": 0.0, "step": 37024 }, { "epoch": 34.54, "learning_rate": 4.5682369402985076e-05, "loss": 0.0001, "step": 37028 }, { "epoch": 34.54, "learning_rate": 4.568190298507463e-05, "loss": 0.0001, "step": 37032 }, { "epoch": 34.55, "learning_rate": 4.568143656716418e-05, "loss": 0.0, "step": 37036 }, { "epoch": 34.55, "learning_rate": 4.5680970149253734e-05, "loss": 0.0003, "step": 37040 }, { "epoch": 34.56, "learning_rate": 4.568050373134329e-05, "loss": 0.0035, "step": 37044 }, { "epoch": 34.56, "learning_rate": 4.568003731343284e-05, "loss": 0.0, "step": 37048 }, { "epoch": 34.56, "learning_rate": 4.567957089552239e-05, "loss": 0.0002, "step": 37052 }, { "epoch": 34.57, "learning_rate": 4.567910447761195e-05, "loss": 0.0077, "step": 37056 }, { "epoch": 34.57, "learning_rate": 4.5678638059701495e-05, "loss": 0.0022, "step": 37060 }, { "epoch": 34.57, "learning_rate": 4.567817164179104e-05, "loss": 0.0003, "step": 37064 }, { "epoch": 34.58, "learning_rate": 4.56777052238806e-05, "loss": 0.0, "step": 37068 }, { "epoch": 34.58, "learning_rate": 4.567723880597015e-05, "loss": 0.0, "step": 37072 }, { "epoch": 34.59, "learning_rate": 4.56767723880597e-05, "loss": 0.0001, "step": 37076 }, { "epoch": 34.59, "learning_rate": 4.5676305970149256e-05, "loss": 0.004, "step": 37080 }, { "epoch": 34.59, "learning_rate": 4.567583955223881e-05, "loss": 0.0001, "step": 37084 }, { "epoch": 34.6, "learning_rate": 4.567537313432836e-05, "loss": 0.0003, "step": 37088 }, { "epoch": 34.6, "learning_rate": 4.5674906716417914e-05, "loss": 0.0006, "step": 37092 }, { "epoch": 34.6, "learning_rate": 4.567444029850746e-05, "loss": 0.0001, "step": 37096 }, { "epoch": 34.61, "learning_rate": 4.567397388059702e-05, "loss": 0.0001, "step": 37100 }, { "epoch": 34.61, "learning_rate": 4.567350746268657e-05, "loss": 0.0, "step": 37104 }, { "epoch": 34.62, "learning_rate": 4.567304104477612e-05, "loss": 0.0009, "step": 37108 }, { "epoch": 34.62, "learning_rate": 4.5672574626865675e-05, "loss": 0.0002, "step": 37112 }, { "epoch": 34.62, "learning_rate": 4.567210820895523e-05, "loss": 0.0011, "step": 37116 }, { "epoch": 34.63, "learning_rate": 4.567164179104478e-05, "loss": 0.0, "step": 37120 }, { "epoch": 34.63, "learning_rate": 4.5671175373134326e-05, "loss": 0.0011, "step": 37124 }, { "epoch": 34.63, "learning_rate": 4.567070895522388e-05, "loss": 0.0004, "step": 37128 }, { "epoch": 34.64, "learning_rate": 4.5670242537313436e-05, "loss": 0.0008, "step": 37132 }, { "epoch": 34.64, "learning_rate": 4.5669776119402984e-05, "loss": 0.0001, "step": 37136 }, { "epoch": 34.65, "learning_rate": 4.566930970149254e-05, "loss": 0.0002, "step": 37140 }, { "epoch": 34.65, "learning_rate": 4.5668843283582094e-05, "loss": 0.0001, "step": 37144 }, { "epoch": 34.65, "learning_rate": 4.566837686567164e-05, "loss": 0.0009, "step": 37148 }, { "epoch": 34.66, "learning_rate": 4.56679104477612e-05, "loss": 0.0001, "step": 37152 }, { "epoch": 34.66, "learning_rate": 4.5667444029850745e-05, "loss": 0.0003, "step": 37156 }, { "epoch": 34.66, "learning_rate": 4.56669776119403e-05, "loss": 0.0029, "step": 37160 }, { "epoch": 34.67, "learning_rate": 4.5666511194029855e-05, "loss": 0.0001, "step": 37164 }, { "epoch": 34.67, "learning_rate": 4.56660447761194e-05, "loss": 0.0136, "step": 37168 }, { "epoch": 34.68, "learning_rate": 4.566557835820896e-05, "loss": 0.0019, "step": 37172 }, { "epoch": 34.68, "learning_rate": 4.566511194029851e-05, "loss": 0.0005, "step": 37176 }, { "epoch": 34.68, "learning_rate": 4.566464552238806e-05, "loss": 0.0005, "step": 37180 }, { "epoch": 34.69, "learning_rate": 4.5664179104477616e-05, "loss": 0.0008, "step": 37184 }, { "epoch": 34.69, "learning_rate": 4.5663712686567164e-05, "loss": 0.0, "step": 37188 }, { "epoch": 34.69, "learning_rate": 4.566324626865672e-05, "loss": 0.0, "step": 37192 }, { "epoch": 34.7, "learning_rate": 4.5662779850746274e-05, "loss": 0.0022, "step": 37196 }, { "epoch": 34.7, "learning_rate": 4.566231343283582e-05, "loss": 0.0001, "step": 37200 }, { "epoch": 34.71, "learning_rate": 4.566184701492538e-05, "loss": 0.0001, "step": 37204 }, { "epoch": 34.71, "learning_rate": 4.566138059701493e-05, "loss": 0.003, "step": 37208 }, { "epoch": 34.71, "learning_rate": 4.566091417910448e-05, "loss": 0.0002, "step": 37212 }, { "epoch": 34.72, "learning_rate": 4.566044776119403e-05, "loss": 0.0001, "step": 37216 }, { "epoch": 34.72, "learning_rate": 4.565998134328359e-05, "loss": 0.0001, "step": 37220 }, { "epoch": 34.72, "learning_rate": 4.565951492537314e-05, "loss": 0.005, "step": 37224 }, { "epoch": 34.73, "learning_rate": 4.5659048507462686e-05, "loss": 0.0066, "step": 37228 }, { "epoch": 34.73, "learning_rate": 4.565858208955224e-05, "loss": 0.0001, "step": 37232 }, { "epoch": 34.73, "learning_rate": 4.5658115671641796e-05, "loss": 0.0005, "step": 37236 }, { "epoch": 34.74, "learning_rate": 4.5657649253731344e-05, "loss": 0.0005, "step": 37240 }, { "epoch": 34.74, "learning_rate": 4.56571828358209e-05, "loss": 0.0, "step": 37244 }, { "epoch": 34.75, "learning_rate": 4.565671641791045e-05, "loss": 0.0042, "step": 37248 }, { "epoch": 34.75, "learning_rate": 4.565625e-05, "loss": 0.0, "step": 37252 }, { "epoch": 34.75, "learning_rate": 4.565578358208956e-05, "loss": 0.0, "step": 37256 }, { "epoch": 34.76, "learning_rate": 4.5655317164179105e-05, "loss": 0.0009, "step": 37260 }, { "epoch": 34.76, "learning_rate": 4.565485074626866e-05, "loss": 0.0001, "step": 37264 }, { "epoch": 34.76, "learning_rate": 4.5654384328358215e-05, "loss": 0.0011, "step": 37268 }, { "epoch": 34.77, "learning_rate": 4.565391791044776e-05, "loss": 0.0009, "step": 37272 }, { "epoch": 34.77, "learning_rate": 4.565345149253731e-05, "loss": 0.0, "step": 37276 }, { "epoch": 34.78, "learning_rate": 4.565298507462687e-05, "loss": 0.0012, "step": 37280 }, { "epoch": 34.78, "learning_rate": 4.565251865671642e-05, "loss": 0.0041, "step": 37284 }, { "epoch": 34.78, "learning_rate": 4.565205223880597e-05, "loss": 0.0, "step": 37288 }, { "epoch": 34.79, "learning_rate": 4.5651585820895524e-05, "loss": 0.0002, "step": 37292 }, { "epoch": 34.79, "learning_rate": 4.565111940298508e-05, "loss": 0.0001, "step": 37296 }, { "epoch": 34.79, "learning_rate": 4.565065298507463e-05, "loss": 0.0002, "step": 37300 }, { "epoch": 34.8, "learning_rate": 4.565018656716418e-05, "loss": 0.0, "step": 37304 }, { "epoch": 34.8, "learning_rate": 4.564972014925373e-05, "loss": 0.0, "step": 37308 }, { "epoch": 34.81, "learning_rate": 4.5649253731343285e-05, "loss": 0.0013, "step": 37312 }, { "epoch": 34.81, "learning_rate": 4.564878731343284e-05, "loss": 0.0001, "step": 37316 }, { "epoch": 34.81, "learning_rate": 4.564832089552239e-05, "loss": 0.0008, "step": 37320 }, { "epoch": 34.82, "learning_rate": 4.564785447761194e-05, "loss": 0.0007, "step": 37324 }, { "epoch": 34.82, "learning_rate": 4.56473880597015e-05, "loss": 0.0, "step": 37328 }, { "epoch": 34.82, "learning_rate": 4.5646921641791046e-05, "loss": 0.0019, "step": 37332 }, { "epoch": 34.83, "learning_rate": 4.5646455223880594e-05, "loss": 0.0001, "step": 37336 }, { "epoch": 34.83, "learning_rate": 4.5645988805970156e-05, "loss": 0.0046, "step": 37340 }, { "epoch": 34.84, "learning_rate": 4.5645522388059704e-05, "loss": 0.001, "step": 37344 }, { "epoch": 34.84, "learning_rate": 4.564505597014926e-05, "loss": 0.0003, "step": 37348 }, { "epoch": 34.84, "learning_rate": 4.564458955223881e-05, "loss": 0.0002, "step": 37352 }, { "epoch": 34.85, "learning_rate": 4.564412313432836e-05, "loss": 0.0004, "step": 37356 }, { "epoch": 34.85, "learning_rate": 4.5643656716417917e-05, "loss": 0.0009, "step": 37360 }, { "epoch": 34.85, "learning_rate": 4.5643190298507465e-05, "loss": 0.0, "step": 37364 }, { "epoch": 34.86, "learning_rate": 4.564272388059701e-05, "loss": 0.0001, "step": 37368 }, { "epoch": 34.86, "learning_rate": 4.5642257462686575e-05, "loss": 0.0054, "step": 37372 }, { "epoch": 34.87, "learning_rate": 4.564179104477612e-05, "loss": 0.0017, "step": 37376 }, { "epoch": 34.87, "learning_rate": 4.564132462686567e-05, "loss": 0.0002, "step": 37380 }, { "epoch": 34.87, "learning_rate": 4.5640858208955226e-05, "loss": 0.0001, "step": 37384 }, { "epoch": 34.88, "learning_rate": 4.564039179104478e-05, "loss": 0.0002, "step": 37388 }, { "epoch": 34.88, "learning_rate": 4.563992537313433e-05, "loss": 0.0001, "step": 37392 }, { "epoch": 34.88, "learning_rate": 4.5639458955223884e-05, "loss": 0.0001, "step": 37396 }, { "epoch": 34.89, "learning_rate": 4.563899253731344e-05, "loss": 0.0001, "step": 37400 }, { "epoch": 34.89, "learning_rate": 4.563852611940299e-05, "loss": 0.0001, "step": 37404 }, { "epoch": 34.9, "learning_rate": 4.563805970149254e-05, "loss": 0.0001, "step": 37408 }, { "epoch": 34.9, "learning_rate": 4.563759328358209e-05, "loss": 0.0003, "step": 37412 }, { "epoch": 34.9, "learning_rate": 4.5637126865671645e-05, "loss": 0.0, "step": 37416 }, { "epoch": 34.91, "learning_rate": 4.56366604477612e-05, "loss": 0.0, "step": 37420 }, { "epoch": 34.91, "learning_rate": 4.563619402985075e-05, "loss": 0.0013, "step": 37424 }, { "epoch": 34.91, "learning_rate": 4.5635727611940296e-05, "loss": 0.0, "step": 37428 }, { "epoch": 34.92, "learning_rate": 4.563526119402986e-05, "loss": 0.0, "step": 37432 }, { "epoch": 34.92, "learning_rate": 4.5634794776119406e-05, "loss": 0.0028, "step": 37436 }, { "epoch": 34.93, "learning_rate": 4.5634328358208954e-05, "loss": 0.0, "step": 37440 }, { "epoch": 34.93, "learning_rate": 4.563386194029851e-05, "loss": 0.0, "step": 37444 }, { "epoch": 34.93, "learning_rate": 4.5633395522388064e-05, "loss": 0.0005, "step": 37448 }, { "epoch": 34.94, "learning_rate": 4.563292910447761e-05, "loss": 0.0001, "step": 37452 }, { "epoch": 34.94, "learning_rate": 4.5632462686567167e-05, "loss": 0.0021, "step": 37456 }, { "epoch": 34.94, "learning_rate": 4.563199626865672e-05, "loss": 0.0, "step": 37460 }, { "epoch": 34.95, "learning_rate": 4.563152985074627e-05, "loss": 0.0005, "step": 37464 }, { "epoch": 34.95, "learning_rate": 4.5631063432835825e-05, "loss": 0.0005, "step": 37468 }, { "epoch": 34.96, "learning_rate": 4.563059701492537e-05, "loss": 0.0001, "step": 37472 }, { "epoch": 34.96, "learning_rate": 4.563013059701493e-05, "loss": 0.0101, "step": 37476 }, { "epoch": 34.96, "learning_rate": 4.562966417910448e-05, "loss": 0.0, "step": 37480 }, { "epoch": 34.97, "learning_rate": 4.562919776119403e-05, "loss": 0.0001, "step": 37484 }, { "epoch": 34.97, "learning_rate": 4.562873134328358e-05, "loss": 0.0044, "step": 37488 }, { "epoch": 34.97, "learning_rate": 4.562826492537314e-05, "loss": 0.0001, "step": 37492 }, { "epoch": 34.98, "learning_rate": 4.562779850746269e-05, "loss": 0.0002, "step": 37496 }, { "epoch": 34.98, "learning_rate": 4.562733208955224e-05, "loss": 0.0012, "step": 37500 }, { "epoch": 34.98, "eval_exact_match": 0.7282398452611218, "eval_exec": 0.7630560928433269, "eval_loss": 0.4141908288002014, "eval_runtime": 1965.9572, "eval_samples_per_second": 0.526, "step": 37500 }, { "epoch": 34.98, "learning_rate": 4.562686567164179e-05, "loss": 0.0023, "step": 37504 }, { "epoch": 34.99, "learning_rate": 4.5626399253731347e-05, "loss": 0.0028, "step": 37508 }, { "epoch": 34.99, "learning_rate": 4.56259328358209e-05, "loss": 0.0001, "step": 37512 }, { "epoch": 35.0, "learning_rate": 4.562546641791045e-05, "loss": 0.0001, "step": 37516 }, { "epoch": 35.0, "learning_rate": 4.5625e-05, "loss": 0.0002, "step": 37520 }, { "epoch": 35.0, "learning_rate": 4.562453358208956e-05, "loss": 0.0001, "step": 37524 }, { "epoch": 35.01, "learning_rate": 4.562406716417911e-05, "loss": 0.0069, "step": 37528 }, { "epoch": 35.01, "learning_rate": 4.5623600746268656e-05, "loss": 0.0007, "step": 37532 }, { "epoch": 35.01, "learning_rate": 4.562313432835821e-05, "loss": 0.0001, "step": 37536 }, { "epoch": 35.02, "learning_rate": 4.5622667910447765e-05, "loss": 0.0001, "step": 37540 }, { "epoch": 35.02, "learning_rate": 4.5622201492537314e-05, "loss": 0.0001, "step": 37544 }, { "epoch": 35.03, "learning_rate": 4.562173507462687e-05, "loss": 0.0001, "step": 37548 }, { "epoch": 35.03, "learning_rate": 4.5621268656716423e-05, "loss": 0.0, "step": 37552 }, { "epoch": 35.03, "learning_rate": 4.562080223880597e-05, "loss": 0.0, "step": 37556 }, { "epoch": 35.04, "learning_rate": 4.5620335820895526e-05, "loss": 0.0, "step": 37560 }, { "epoch": 35.04, "learning_rate": 4.5619869402985075e-05, "loss": 0.0001, "step": 37564 }, { "epoch": 35.04, "learning_rate": 4.561940298507463e-05, "loss": 0.0009, "step": 37568 }, { "epoch": 35.05, "learning_rate": 4.5618936567164184e-05, "loss": 0.0, "step": 37572 }, { "epoch": 35.05, "learning_rate": 4.561847014925373e-05, "loss": 0.0001, "step": 37576 }, { "epoch": 35.06, "learning_rate": 4.561800373134328e-05, "loss": 0.0008, "step": 37580 }, { "epoch": 35.06, "learning_rate": 4.561753731343284e-05, "loss": 0.0001, "step": 37584 }, { "epoch": 35.06, "learning_rate": 4.561707089552239e-05, "loss": 0.0007, "step": 37588 }, { "epoch": 35.07, "learning_rate": 4.561660447761194e-05, "loss": 0.0001, "step": 37592 }, { "epoch": 35.07, "learning_rate": 4.5616138059701494e-05, "loss": 0.0002, "step": 37596 }, { "epoch": 35.07, "learning_rate": 4.561567164179105e-05, "loss": 0.0003, "step": 37600 }, { "epoch": 35.08, "learning_rate": 4.5615205223880597e-05, "loss": 0.0, "step": 37604 }, { "epoch": 35.08, "learning_rate": 4.561473880597015e-05, "loss": 0.0, "step": 37608 }, { "epoch": 35.09, "learning_rate": 4.5614272388059706e-05, "loss": 0.0, "step": 37612 }, { "epoch": 35.09, "learning_rate": 4.5613805970149254e-05, "loss": 0.0, "step": 37616 }, { "epoch": 35.09, "learning_rate": 4.561333955223881e-05, "loss": 0.0, "step": 37620 }, { "epoch": 35.1, "learning_rate": 4.561287313432836e-05, "loss": 0.0002, "step": 37624 }, { "epoch": 35.1, "learning_rate": 4.561240671641791e-05, "loss": 0.0, "step": 37628 }, { "epoch": 35.1, "learning_rate": 4.561194029850747e-05, "loss": 0.0, "step": 37632 }, { "epoch": 35.11, "learning_rate": 4.5611473880597015e-05, "loss": 0.0001, "step": 37636 }, { "epoch": 35.11, "learning_rate": 4.5611007462686564e-05, "loss": 0.0, "step": 37640 }, { "epoch": 35.12, "learning_rate": 4.5610541044776125e-05, "loss": 0.0, "step": 37644 }, { "epoch": 35.12, "learning_rate": 4.5610074626865673e-05, "loss": 0.0019, "step": 37648 }, { "epoch": 35.12, "learning_rate": 4.560960820895522e-05, "loss": 0.0007, "step": 37652 }, { "epoch": 35.13, "learning_rate": 4.5609141791044776e-05, "loss": 0.0001, "step": 37656 }, { "epoch": 35.13, "learning_rate": 4.560867537313433e-05, "loss": 0.0001, "step": 37660 }, { "epoch": 35.13, "learning_rate": 4.560820895522388e-05, "loss": 0.0, "step": 37664 }, { "epoch": 35.14, "learning_rate": 4.5607742537313434e-05, "loss": 0.0001, "step": 37668 }, { "epoch": 35.14, "learning_rate": 4.560727611940299e-05, "loss": 0.0004, "step": 37672 }, { "epoch": 35.15, "learning_rate": 4.5606809701492544e-05, "loss": 0.0002, "step": 37676 }, { "epoch": 35.15, "learning_rate": 4.560634328358209e-05, "loss": 0.0, "step": 37680 }, { "epoch": 35.15, "learning_rate": 4.560587686567164e-05, "loss": 0.0038, "step": 37684 }, { "epoch": 35.16, "learning_rate": 4.5605410447761195e-05, "loss": 0.0, "step": 37688 }, { "epoch": 35.16, "learning_rate": 4.560494402985075e-05, "loss": 0.0001, "step": 37692 }, { "epoch": 35.16, "learning_rate": 4.56044776119403e-05, "loss": 0.0, "step": 37696 }, { "epoch": 35.17, "learning_rate": 4.560401119402985e-05, "loss": 0.0001, "step": 37700 }, { "epoch": 35.17, "learning_rate": 4.560354477611941e-05, "loss": 0.0006, "step": 37704 }, { "epoch": 35.18, "learning_rate": 4.5603078358208956e-05, "loss": 0.0, "step": 37708 }, { "epoch": 35.18, "learning_rate": 4.560261194029851e-05, "loss": 0.0003, "step": 37712 }, { "epoch": 35.18, "learning_rate": 4.560214552238806e-05, "loss": 0.0001, "step": 37716 }, { "epoch": 35.19, "learning_rate": 4.5601679104477614e-05, "loss": 0.0001, "step": 37720 }, { "epoch": 35.19, "learning_rate": 4.560121268656717e-05, "loss": 0.0031, "step": 37724 }, { "epoch": 35.19, "learning_rate": 4.560074626865672e-05, "loss": 0.0024, "step": 37728 }, { "epoch": 35.2, "learning_rate": 4.560027985074627e-05, "loss": 0.0009, "step": 37732 }, { "epoch": 35.2, "learning_rate": 4.559981343283583e-05, "loss": 0.0, "step": 37736 }, { "epoch": 35.21, "learning_rate": 4.5599347014925375e-05, "loss": 0.0002, "step": 37740 }, { "epoch": 35.21, "learning_rate": 4.5598880597014923e-05, "loss": 0.0001, "step": 37744 }, { "epoch": 35.21, "learning_rate": 4.559841417910448e-05, "loss": 0.0006, "step": 37748 }, { "epoch": 35.22, "learning_rate": 4.559794776119403e-05, "loss": 0.0, "step": 37752 }, { "epoch": 35.22, "learning_rate": 4.559748134328358e-05, "loss": 0.0005, "step": 37756 }, { "epoch": 35.22, "learning_rate": 4.5597014925373136e-05, "loss": 0.0035, "step": 37760 }, { "epoch": 35.23, "learning_rate": 4.559654850746269e-05, "loss": 0.0001, "step": 37764 }, { "epoch": 35.23, "learning_rate": 4.559608208955224e-05, "loss": 0.0012, "step": 37768 }, { "epoch": 35.24, "learning_rate": 4.5595615671641794e-05, "loss": 0.0001, "step": 37772 }, { "epoch": 35.24, "learning_rate": 4.559514925373134e-05, "loss": 0.0022, "step": 37776 }, { "epoch": 35.24, "learning_rate": 4.55946828358209e-05, "loss": 0.0003, "step": 37780 }, { "epoch": 35.25, "learning_rate": 4.559421641791045e-05, "loss": 0.0002, "step": 37784 }, { "epoch": 35.25, "learning_rate": 4.559375e-05, "loss": 0.0003, "step": 37788 }, { "epoch": 35.25, "learning_rate": 4.5593283582089555e-05, "loss": 0.0008, "step": 37792 }, { "epoch": 35.26, "learning_rate": 4.559281716417911e-05, "loss": 0.0, "step": 37796 }, { "epoch": 35.26, "learning_rate": 4.559235074626866e-05, "loss": 0.0, "step": 37800 }, { "epoch": 35.26, "learning_rate": 4.5591884328358206e-05, "loss": 0.0, "step": 37804 }, { "epoch": 35.27, "learning_rate": 4.559141791044776e-05, "loss": 0.0002, "step": 37808 }, { "epoch": 35.27, "learning_rate": 4.5590951492537316e-05, "loss": 0.0001, "step": 37812 }, { "epoch": 35.28, "learning_rate": 4.5590485074626864e-05, "loss": 0.0009, "step": 37816 }, { "epoch": 35.28, "learning_rate": 4.559001865671642e-05, "loss": 0.0006, "step": 37820 }, { "epoch": 35.28, "learning_rate": 4.5589552238805974e-05, "loss": 0.0, "step": 37824 }, { "epoch": 35.29, "learning_rate": 4.558908582089552e-05, "loss": 0.0002, "step": 37828 }, { "epoch": 35.29, "learning_rate": 4.558861940298508e-05, "loss": 0.0002, "step": 37832 }, { "epoch": 35.29, "learning_rate": 4.5588152985074625e-05, "loss": 0.0002, "step": 37836 }, { "epoch": 35.3, "learning_rate": 4.558768656716419e-05, "loss": 0.0001, "step": 37840 }, { "epoch": 35.3, "learning_rate": 4.5587220149253735e-05, "loss": 0.0, "step": 37844 }, { "epoch": 35.31, "learning_rate": 4.558675373134328e-05, "loss": 0.0001, "step": 37848 }, { "epoch": 35.31, "learning_rate": 4.558628731343284e-05, "loss": 0.0001, "step": 37852 }, { "epoch": 35.31, "learning_rate": 4.558582089552239e-05, "loss": 0.0, "step": 37856 }, { "epoch": 35.32, "learning_rate": 4.558535447761194e-05, "loss": 0.0, "step": 37860 }, { "epoch": 35.32, "learning_rate": 4.5584888059701496e-05, "loss": 0.0001, "step": 37864 }, { "epoch": 35.32, "learning_rate": 4.5584421641791044e-05, "loss": 0.0, "step": 37868 }, { "epoch": 35.33, "learning_rate": 4.55839552238806e-05, "loss": 0.0004, "step": 37872 }, { "epoch": 35.33, "learning_rate": 4.5583488805970154e-05, "loss": 0.0001, "step": 37876 }, { "epoch": 35.34, "learning_rate": 4.55830223880597e-05, "loss": 0.0, "step": 37880 }, { "epoch": 35.34, "learning_rate": 4.558255597014926e-05, "loss": 0.0002, "step": 37884 }, { "epoch": 35.34, "learning_rate": 4.558208955223881e-05, "loss": 0.0, "step": 37888 }, { "epoch": 35.35, "learning_rate": 4.558162313432836e-05, "loss": 0.0, "step": 37892 }, { "epoch": 35.35, "learning_rate": 4.558115671641791e-05, "loss": 0.0001, "step": 37896 }, { "epoch": 35.35, "learning_rate": 4.558069029850747e-05, "loss": 0.0, "step": 37900 }, { "epoch": 35.36, "learning_rate": 4.558022388059702e-05, "loss": 0.0001, "step": 37904 }, { "epoch": 35.36, "learning_rate": 4.5579757462686566e-05, "loss": 0.0, "step": 37908 }, { "epoch": 35.37, "learning_rate": 4.557929104477612e-05, "loss": 0.0, "step": 37912 }, { "epoch": 35.37, "learning_rate": 4.5578824626865676e-05, "loss": 0.0004, "step": 37916 }, { "epoch": 35.37, "learning_rate": 4.5578358208955224e-05, "loss": 0.0003, "step": 37920 }, { "epoch": 35.38, "learning_rate": 4.557789179104478e-05, "loss": 0.0082, "step": 37924 }, { "epoch": 35.38, "learning_rate": 4.557742537313433e-05, "loss": 0.0001, "step": 37928 }, { "epoch": 35.38, "learning_rate": 4.557695895522388e-05, "loss": 0.0007, "step": 37932 }, { "epoch": 35.39, "learning_rate": 4.557649253731344e-05, "loss": 0.0001, "step": 37936 }, { "epoch": 35.39, "learning_rate": 4.5576026119402985e-05, "loss": 0.0013, "step": 37940 }, { "epoch": 35.4, "learning_rate": 4.557555970149254e-05, "loss": 0.0005, "step": 37944 }, { "epoch": 35.4, "learning_rate": 4.5575093283582095e-05, "loss": 0.0002, "step": 37948 }, { "epoch": 35.4, "learning_rate": 4.557462686567164e-05, "loss": 0.0001, "step": 37952 }, { "epoch": 35.41, "learning_rate": 4.557416044776119e-05, "loss": 0.0, "step": 37956 }, { "epoch": 35.41, "learning_rate": 4.557369402985075e-05, "loss": 0.0006, "step": 37960 }, { "epoch": 35.41, "learning_rate": 4.55732276119403e-05, "loss": 0.0, "step": 37964 }, { "epoch": 35.42, "learning_rate": 4.557276119402985e-05, "loss": 0.0001, "step": 37968 }, { "epoch": 35.42, "learning_rate": 4.5572294776119404e-05, "loss": 0.0, "step": 37972 }, { "epoch": 35.43, "learning_rate": 4.557182835820896e-05, "loss": 0.0014, "step": 37976 }, { "epoch": 35.43, "learning_rate": 4.557136194029851e-05, "loss": 0.0, "step": 37980 }, { "epoch": 35.43, "learning_rate": 4.557089552238806e-05, "loss": 0.0019, "step": 37984 }, { "epoch": 35.44, "learning_rate": 4.557042910447761e-05, "loss": 0.0021, "step": 37988 }, { "epoch": 35.44, "learning_rate": 4.5569962686567165e-05, "loss": 0.0, "step": 37992 }, { "epoch": 35.44, "learning_rate": 4.556949626865672e-05, "loss": 0.0, "step": 37996 }, { "epoch": 35.45, "learning_rate": 4.556902985074627e-05, "loss": 0.0001, "step": 38000 }, { "epoch": 35.45, "eval_exact_match": 0.7427466150870407, "eval_exec": 0.7717601547388782, "eval_loss": 0.43170931935310364, "eval_runtime": 1115.1121, "eval_samples_per_second": 0.927, "step": 38000 }, { "epoch": 35.45, "learning_rate": 4.556856343283582e-05, "loss": 0.001, "step": 38004 }, { "epoch": 35.46, "learning_rate": 4.556809701492538e-05, "loss": 0.0019, "step": 38008 }, { "epoch": 35.46, "learning_rate": 4.5567630597014926e-05, "loss": 0.001, "step": 38012 }, { "epoch": 35.46, "learning_rate": 4.556716417910448e-05, "loss": 0.0006, "step": 38016 }, { "epoch": 35.47, "learning_rate": 4.5566697761194036e-05, "loss": 0.0, "step": 38020 }, { "epoch": 35.47, "learning_rate": 4.5566231343283584e-05, "loss": 0.0018, "step": 38024 }, { "epoch": 35.47, "learning_rate": 4.556576492537314e-05, "loss": 0.0001, "step": 38028 }, { "epoch": 35.48, "learning_rate": 4.556529850746269e-05, "loss": 0.0003, "step": 38032 }, { "epoch": 35.48, "learning_rate": 4.556483208955224e-05, "loss": 0.0003, "step": 38036 }, { "epoch": 35.49, "learning_rate": 4.55643656716418e-05, "loss": 0.0, "step": 38040 }, { "epoch": 35.49, "learning_rate": 4.5563899253731345e-05, "loss": 0.0, "step": 38044 }, { "epoch": 35.49, "learning_rate": 4.556343283582089e-05, "loss": 0.0, "step": 38048 }, { "epoch": 35.5, "learning_rate": 4.5562966417910455e-05, "loss": 0.0004, "step": 38052 }, { "epoch": 35.5, "learning_rate": 4.55625e-05, "loss": 0.0002, "step": 38056 }, { "epoch": 35.5, "learning_rate": 4.556203358208955e-05, "loss": 0.0001, "step": 38060 }, { "epoch": 35.51, "learning_rate": 4.5561567164179106e-05, "loss": 0.0, "step": 38064 }, { "epoch": 35.51, "learning_rate": 4.556110074626866e-05, "loss": 0.0002, "step": 38068 }, { "epoch": 35.51, "learning_rate": 4.556063432835821e-05, "loss": 0.0, "step": 38072 }, { "epoch": 35.52, "learning_rate": 4.5560167910447764e-05, "loss": 0.0011, "step": 38076 }, { "epoch": 35.52, "learning_rate": 4.555970149253732e-05, "loss": 0.0, "step": 38080 }, { "epoch": 35.53, "learning_rate": 4.555923507462687e-05, "loss": 0.0002, "step": 38084 }, { "epoch": 35.53, "learning_rate": 4.555876865671642e-05, "loss": 0.0001, "step": 38088 }, { "epoch": 35.53, "learning_rate": 4.555830223880597e-05, "loss": 0.0004, "step": 38092 }, { "epoch": 35.54, "learning_rate": 4.5557835820895525e-05, "loss": 0.0011, "step": 38096 }, { "epoch": 35.54, "learning_rate": 4.555736940298508e-05, "loss": 0.0, "step": 38100 }, { "epoch": 35.54, "learning_rate": 4.555690298507463e-05, "loss": 0.0001, "step": 38104 }, { "epoch": 35.55, "learning_rate": 4.5556436567164176e-05, "loss": 0.0011, "step": 38108 }, { "epoch": 35.55, "learning_rate": 4.555597014925374e-05, "loss": 0.0001, "step": 38112 }, { "epoch": 35.56, "learning_rate": 4.5555503731343286e-05, "loss": 0.0, "step": 38116 }, { "epoch": 35.56, "learning_rate": 4.5555037313432834e-05, "loss": 0.0002, "step": 38120 }, { "epoch": 35.56, "learning_rate": 4.555457089552239e-05, "loss": 0.0009, "step": 38124 }, { "epoch": 35.57, "learning_rate": 4.5554104477611944e-05, "loss": 0.0007, "step": 38128 }, { "epoch": 35.57, "learning_rate": 4.555363805970149e-05, "loss": 0.0001, "step": 38132 }, { "epoch": 35.57, "learning_rate": 4.555317164179105e-05, "loss": 0.0, "step": 38136 }, { "epoch": 35.58, "learning_rate": 4.55527052238806e-05, "loss": 0.0003, "step": 38140 }, { "epoch": 35.58, "learning_rate": 4.555223880597015e-05, "loss": 0.0007, "step": 38144 }, { "epoch": 35.59, "learning_rate": 4.5551772388059705e-05, "loss": 0.0008, "step": 38148 }, { "epoch": 35.59, "learning_rate": 4.555130597014925e-05, "loss": 0.0, "step": 38152 }, { "epoch": 35.59, "learning_rate": 4.555083955223881e-05, "loss": 0.0138, "step": 38156 }, { "epoch": 35.6, "learning_rate": 4.555037313432836e-05, "loss": 0.0019, "step": 38160 }, { "epoch": 35.6, "learning_rate": 4.554990671641791e-05, "loss": 0.0, "step": 38164 }, { "epoch": 35.6, "learning_rate": 4.5549440298507466e-05, "loss": 0.0026, "step": 38168 }, { "epoch": 35.61, "learning_rate": 4.554897388059702e-05, "loss": 0.0, "step": 38172 }, { "epoch": 35.61, "learning_rate": 4.554850746268657e-05, "loss": 0.0, "step": 38176 }, { "epoch": 35.62, "learning_rate": 4.5548041044776124e-05, "loss": 0.0003, "step": 38180 }, { "epoch": 35.62, "learning_rate": 4.554757462686567e-05, "loss": 0.0006, "step": 38184 }, { "epoch": 35.62, "learning_rate": 4.554710820895523e-05, "loss": 0.0007, "step": 38188 }, { "epoch": 35.63, "learning_rate": 4.554664179104478e-05, "loss": 0.001, "step": 38192 }, { "epoch": 35.63, "learning_rate": 4.554617537313433e-05, "loss": 0.0048, "step": 38196 }, { "epoch": 35.63, "learning_rate": 4.554570895522388e-05, "loss": 0.0004, "step": 38200 }, { "epoch": 35.64, "learning_rate": 4.554524253731344e-05, "loss": 0.0, "step": 38204 }, { "epoch": 35.64, "learning_rate": 4.554477611940299e-05, "loss": 0.0025, "step": 38208 }, { "epoch": 35.65, "learning_rate": 4.5544309701492536e-05, "loss": 0.0042, "step": 38212 }, { "epoch": 35.65, "learning_rate": 4.554384328358209e-05, "loss": 0.0001, "step": 38216 }, { "epoch": 35.65, "learning_rate": 4.5543376865671646e-05, "loss": 0.0001, "step": 38220 }, { "epoch": 35.66, "learning_rate": 4.5542910447761194e-05, "loss": 0.0016, "step": 38224 }, { "epoch": 35.66, "learning_rate": 4.554244402985075e-05, "loss": 0.0008, "step": 38228 }, { "epoch": 35.66, "learning_rate": 4.5541977611940304e-05, "loss": 0.0003, "step": 38232 }, { "epoch": 35.67, "learning_rate": 4.554151119402985e-05, "loss": 0.0009, "step": 38236 }, { "epoch": 35.67, "learning_rate": 4.554104477611941e-05, "loss": 0.0, "step": 38240 }, { "epoch": 35.68, "learning_rate": 4.5540578358208955e-05, "loss": 0.0, "step": 38244 }, { "epoch": 35.68, "learning_rate": 4.554011194029851e-05, "loss": 0.0001, "step": 38248 }, { "epoch": 35.68, "learning_rate": 4.5539645522388065e-05, "loss": 0.0003, "step": 38252 }, { "epoch": 35.69, "learning_rate": 4.553917910447761e-05, "loss": 0.0004, "step": 38256 }, { "epoch": 35.69, "learning_rate": 4.553871268656716e-05, "loss": 0.0001, "step": 38260 }, { "epoch": 35.69, "learning_rate": 4.553824626865672e-05, "loss": 0.0, "step": 38264 }, { "epoch": 35.7, "learning_rate": 4.553777985074627e-05, "loss": 0.0001, "step": 38268 }, { "epoch": 35.7, "learning_rate": 4.553731343283582e-05, "loss": 0.0001, "step": 38272 }, { "epoch": 35.71, "learning_rate": 4.5536847014925374e-05, "loss": 0.0, "step": 38276 }, { "epoch": 35.71, "learning_rate": 4.553638059701493e-05, "loss": 0.0005, "step": 38280 }, { "epoch": 35.71, "learning_rate": 4.553591417910448e-05, "loss": 0.0001, "step": 38284 }, { "epoch": 35.72, "learning_rate": 4.553544776119403e-05, "loss": 0.0013, "step": 38288 }, { "epoch": 35.72, "learning_rate": 4.553498134328359e-05, "loss": 0.0002, "step": 38292 }, { "epoch": 35.72, "learning_rate": 4.5534514925373135e-05, "loss": 0.0001, "step": 38296 }, { "epoch": 35.73, "learning_rate": 4.553404850746269e-05, "loss": 0.0001, "step": 38300 }, { "epoch": 35.73, "learning_rate": 4.553358208955224e-05, "loss": 0.0, "step": 38304 }, { "epoch": 35.73, "learning_rate": 4.553311567164179e-05, "loss": 0.0002, "step": 38308 }, { "epoch": 35.74, "learning_rate": 4.553264925373135e-05, "loss": 0.0036, "step": 38312 }, { "epoch": 35.74, "learning_rate": 4.5532182835820896e-05, "loss": 0.0001, "step": 38316 }, { "epoch": 35.75, "learning_rate": 4.5531716417910444e-05, "loss": 0.0002, "step": 38320 }, { "epoch": 35.75, "learning_rate": 4.5531250000000006e-05, "loss": 0.0003, "step": 38324 }, { "epoch": 35.75, "learning_rate": 4.5530783582089554e-05, "loss": 0.0, "step": 38328 }, { "epoch": 35.76, "learning_rate": 4.553031716417911e-05, "loss": 0.0006, "step": 38332 }, { "epoch": 35.76, "learning_rate": 4.552985074626866e-05, "loss": 0.0, "step": 38336 }, { "epoch": 35.76, "learning_rate": 4.552938432835821e-05, "loss": 0.0001, "step": 38340 }, { "epoch": 35.77, "learning_rate": 4.5528917910447767e-05, "loss": 0.0, "step": 38344 }, { "epoch": 35.77, "learning_rate": 4.5528451492537315e-05, "loss": 0.0028, "step": 38348 }, { "epoch": 35.78, "learning_rate": 4.552798507462687e-05, "loss": 0.0001, "step": 38352 }, { "epoch": 35.78, "learning_rate": 4.5527518656716425e-05, "loss": 0.0001, "step": 38356 }, { "epoch": 35.78, "learning_rate": 4.552705223880597e-05, "loss": 0.0007, "step": 38360 }, { "epoch": 35.79, "learning_rate": 4.552658582089552e-05, "loss": 0.0001, "step": 38364 }, { "epoch": 35.79, "learning_rate": 4.5526119402985076e-05, "loss": 0.0001, "step": 38368 }, { "epoch": 35.79, "learning_rate": 4.552565298507463e-05, "loss": 0.0001, "step": 38372 }, { "epoch": 35.8, "learning_rate": 4.552518656716418e-05, "loss": 0.0002, "step": 38376 }, { "epoch": 35.8, "learning_rate": 4.5524720149253734e-05, "loss": 0.0034, "step": 38380 }, { "epoch": 35.81, "learning_rate": 4.552425373134329e-05, "loss": 0.0, "step": 38384 }, { "epoch": 35.81, "learning_rate": 4.552378731343284e-05, "loss": 0.0005, "step": 38388 }, { "epoch": 35.81, "learning_rate": 4.552332089552239e-05, "loss": 0.0, "step": 38392 }, { "epoch": 35.82, "learning_rate": 4.552285447761194e-05, "loss": 0.0001, "step": 38396 }, { "epoch": 35.82, "learning_rate": 4.5522388059701495e-05, "loss": 0.0, "step": 38400 }, { "epoch": 35.82, "learning_rate": 4.552192164179105e-05, "loss": 0.0, "step": 38404 }, { "epoch": 35.83, "learning_rate": 4.55214552238806e-05, "loss": 0.0, "step": 38408 }, { "epoch": 35.83, "learning_rate": 4.552098880597015e-05, "loss": 0.0026, "step": 38412 }, { "epoch": 35.84, "learning_rate": 4.552052238805971e-05, "loss": 0.0008, "step": 38416 }, { "epoch": 35.84, "learning_rate": 4.5520055970149256e-05, "loss": 0.0, "step": 38420 }, { "epoch": 35.84, "learning_rate": 4.5519589552238804e-05, "loss": 0.0, "step": 38424 }, { "epoch": 35.85, "learning_rate": 4.551912313432836e-05, "loss": 0.0, "step": 38428 }, { "epoch": 35.85, "learning_rate": 4.5518656716417914e-05, "loss": 0.0002, "step": 38432 }, { "epoch": 35.85, "learning_rate": 4.551819029850746e-05, "loss": 0.0, "step": 38436 }, { "epoch": 35.86, "learning_rate": 4.5517723880597017e-05, "loss": 0.0011, "step": 38440 }, { "epoch": 35.86, "learning_rate": 4.551725746268657e-05, "loss": 0.0001, "step": 38444 }, { "epoch": 35.87, "learning_rate": 4.551679104477612e-05, "loss": 0.0, "step": 38448 }, { "epoch": 35.87, "learning_rate": 4.5516324626865675e-05, "loss": 0.0001, "step": 38452 }, { "epoch": 35.87, "learning_rate": 4.551585820895522e-05, "loss": 0.0003, "step": 38456 }, { "epoch": 35.88, "learning_rate": 4.551539179104478e-05, "loss": 0.0002, "step": 38460 }, { "epoch": 35.88, "learning_rate": 4.551492537313433e-05, "loss": 0.0004, "step": 38464 }, { "epoch": 35.88, "learning_rate": 4.551445895522388e-05, "loss": 0.0, "step": 38468 }, { "epoch": 35.89, "learning_rate": 4.5513992537313436e-05, "loss": 0.0001, "step": 38472 }, { "epoch": 35.89, "learning_rate": 4.551352611940299e-05, "loss": 0.0, "step": 38476 }, { "epoch": 35.9, "learning_rate": 4.551305970149254e-05, "loss": 0.0, "step": 38480 }, { "epoch": 35.9, "learning_rate": 4.551259328358209e-05, "loss": 0.0001, "step": 38484 }, { "epoch": 35.9, "learning_rate": 4.551212686567164e-05, "loss": 0.0001, "step": 38488 }, { "epoch": 35.91, "learning_rate": 4.5511660447761197e-05, "loss": 0.0011, "step": 38492 }, { "epoch": 35.91, "learning_rate": 4.551119402985075e-05, "loss": 0.0003, "step": 38496 }, { "epoch": 35.91, "learning_rate": 4.55107276119403e-05, "loss": 0.0001, "step": 38500 }, { "epoch": 35.91, "eval_exact_match": 0.7543520309477756, "eval_exec": 0.7794970986460348, "eval_loss": 0.45771145820617676, "eval_runtime": 1150.8415, "eval_samples_per_second": 0.898, "step": 38500 }, { "epoch": 35.92, "learning_rate": 4.5510261194029854e-05, "loss": 0.0, "step": 38504 }, { "epoch": 35.92, "learning_rate": 4.550979477611941e-05, "loss": 0.0, "step": 38508 }, { "epoch": 35.93, "learning_rate": 4.550932835820896e-05, "loss": 0.0005, "step": 38512 }, { "epoch": 35.93, "learning_rate": 4.5508861940298506e-05, "loss": 0.0, "step": 38516 }, { "epoch": 35.93, "learning_rate": 4.550839552238807e-05, "loss": 0.0001, "step": 38520 }, { "epoch": 35.94, "learning_rate": 4.5507929104477615e-05, "loss": 0.0026, "step": 38524 }, { "epoch": 35.94, "learning_rate": 4.5507462686567164e-05, "loss": 0.0004, "step": 38528 }, { "epoch": 35.94, "learning_rate": 4.550699626865672e-05, "loss": 0.0002, "step": 38532 }, { "epoch": 35.95, "learning_rate": 4.550652985074627e-05, "loss": 0.0, "step": 38536 }, { "epoch": 35.95, "learning_rate": 4.550606343283582e-05, "loss": 0.0, "step": 38540 }, { "epoch": 35.96, "learning_rate": 4.5505597014925376e-05, "loss": 0.0015, "step": 38544 }, { "epoch": 35.96, "learning_rate": 4.5505130597014925e-05, "loss": 0.0002, "step": 38548 }, { "epoch": 35.96, "learning_rate": 4.550466417910448e-05, "loss": 0.0001, "step": 38552 }, { "epoch": 35.97, "learning_rate": 4.5504197761194034e-05, "loss": 0.0001, "step": 38556 }, { "epoch": 35.97, "learning_rate": 4.550373134328358e-05, "loss": 0.0, "step": 38560 }, { "epoch": 35.97, "learning_rate": 4.550326492537314e-05, "loss": 0.0, "step": 38564 }, { "epoch": 35.98, "learning_rate": 4.550279850746269e-05, "loss": 0.0002, "step": 38568 }, { "epoch": 35.98, "learning_rate": 4.550233208955224e-05, "loss": 0.0, "step": 38572 }, { "epoch": 35.98, "learning_rate": 4.550186567164179e-05, "loss": 0.0, "step": 38576 }, { "epoch": 35.99, "learning_rate": 4.550139925373135e-05, "loss": 0.0055, "step": 38580 }, { "epoch": 35.99, "learning_rate": 4.55009328358209e-05, "loss": 0.0001, "step": 38584 }, { "epoch": 36.0, "learning_rate": 4.5500466417910447e-05, "loss": 0.0002, "step": 38588 }, { "epoch": 36.0, "learning_rate": 4.55e-05, "loss": 0.0001, "step": 38592 }, { "epoch": 36.0, "learning_rate": 4.5499533582089556e-05, "loss": 0.0011, "step": 38596 }, { "epoch": 36.01, "learning_rate": 4.5499067164179104e-05, "loss": 0.0008, "step": 38600 }, { "epoch": 36.01, "learning_rate": 4.549860074626866e-05, "loss": 0.0, "step": 38604 }, { "epoch": 36.01, "learning_rate": 4.549813432835821e-05, "loss": 0.0, "step": 38608 }, { "epoch": 36.02, "learning_rate": 4.549766791044776e-05, "loss": 0.0, "step": 38612 }, { "epoch": 36.02, "learning_rate": 4.549720149253732e-05, "loss": 0.0001, "step": 38616 }, { "epoch": 36.03, "learning_rate": 4.5496735074626865e-05, "loss": 0.0002, "step": 38620 }, { "epoch": 36.03, "learning_rate": 4.549626865671642e-05, "loss": 0.0, "step": 38624 }, { "epoch": 36.03, "learning_rate": 4.5495802238805975e-05, "loss": 0.0008, "step": 38628 }, { "epoch": 36.04, "learning_rate": 4.5495335820895523e-05, "loss": 0.0006, "step": 38632 }, { "epoch": 36.04, "learning_rate": 4.549486940298507e-05, "loss": 0.0002, "step": 38636 }, { "epoch": 36.04, "learning_rate": 4.549440298507463e-05, "loss": 0.0, "step": 38640 }, { "epoch": 36.05, "learning_rate": 4.549393656716418e-05, "loss": 0.0, "step": 38644 }, { "epoch": 36.05, "learning_rate": 4.5493470149253736e-05, "loss": 0.0001, "step": 38648 }, { "epoch": 36.06, "learning_rate": 4.5493003731343284e-05, "loss": 0.0, "step": 38652 }, { "epoch": 36.06, "learning_rate": 4.549253731343284e-05, "loss": 0.0, "step": 38656 }, { "epoch": 36.06, "learning_rate": 4.5492070895522394e-05, "loss": 0.0, "step": 38660 }, { "epoch": 36.07, "learning_rate": 4.549160447761194e-05, "loss": 0.0006, "step": 38664 }, { "epoch": 36.07, "learning_rate": 4.549113805970149e-05, "loss": 0.0, "step": 38668 }, { "epoch": 36.07, "learning_rate": 4.549067164179105e-05, "loss": 0.0003, "step": 38672 }, { "epoch": 36.08, "learning_rate": 4.54902052238806e-05, "loss": 0.0006, "step": 38676 }, { "epoch": 36.08, "learning_rate": 4.548973880597015e-05, "loss": 0.0, "step": 38680 }, { "epoch": 36.09, "learning_rate": 4.54892723880597e-05, "loss": 0.0, "step": 38684 }, { "epoch": 36.09, "learning_rate": 4.548880597014926e-05, "loss": 0.001, "step": 38688 }, { "epoch": 36.09, "learning_rate": 4.5488339552238806e-05, "loss": 0.0001, "step": 38692 }, { "epoch": 36.1, "learning_rate": 4.548787313432836e-05, "loss": 0.0, "step": 38696 }, { "epoch": 36.1, "learning_rate": 4.5487406716417916e-05, "loss": 0.0014, "step": 38700 }, { "epoch": 36.1, "learning_rate": 4.5486940298507464e-05, "loss": 0.0001, "step": 38704 }, { "epoch": 36.11, "learning_rate": 4.548647388059702e-05, "loss": 0.0002, "step": 38708 }, { "epoch": 36.11, "learning_rate": 4.548600746268657e-05, "loss": 0.0002, "step": 38712 }, { "epoch": 36.12, "learning_rate": 4.548554104477612e-05, "loss": 0.0001, "step": 38716 }, { "epoch": 36.12, "learning_rate": 4.548507462686568e-05, "loss": 0.0017, "step": 38720 }, { "epoch": 36.12, "learning_rate": 4.5484608208955225e-05, "loss": 0.0003, "step": 38724 }, { "epoch": 36.13, "learning_rate": 4.5484141791044773e-05, "loss": 0.0176, "step": 38728 }, { "epoch": 36.13, "learning_rate": 4.5483675373134335e-05, "loss": 0.0012, "step": 38732 }, { "epoch": 36.13, "learning_rate": 4.548320895522388e-05, "loss": 0.0001, "step": 38736 }, { "epoch": 36.14, "learning_rate": 4.548274253731343e-05, "loss": 0.0043, "step": 38740 }, { "epoch": 36.14, "learning_rate": 4.5482276119402986e-05, "loss": 0.0004, "step": 38744 }, { "epoch": 36.15, "learning_rate": 4.548180970149254e-05, "loss": 0.0002, "step": 38748 }, { "epoch": 36.15, "learning_rate": 4.548134328358209e-05, "loss": 0.0001, "step": 38752 }, { "epoch": 36.15, "learning_rate": 4.5480876865671644e-05, "loss": 0.0, "step": 38756 }, { "epoch": 36.16, "learning_rate": 4.54804104477612e-05, "loss": 0.0, "step": 38760 }, { "epoch": 36.16, "learning_rate": 4.547994402985075e-05, "loss": 0.0, "step": 38764 }, { "epoch": 36.16, "learning_rate": 4.54794776119403e-05, "loss": 0.0251, "step": 38768 }, { "epoch": 36.17, "learning_rate": 4.547901119402985e-05, "loss": 0.001, "step": 38772 }, { "epoch": 36.17, "learning_rate": 4.5478544776119405e-05, "loss": 0.0011, "step": 38776 }, { "epoch": 36.18, "learning_rate": 4.547807835820896e-05, "loss": 0.0, "step": 38780 }, { "epoch": 36.18, "learning_rate": 4.547761194029851e-05, "loss": 0.0, "step": 38784 }, { "epoch": 36.18, "learning_rate": 4.5477145522388056e-05, "loss": 0.0001, "step": 38788 }, { "epoch": 36.19, "learning_rate": 4.547667910447762e-05, "loss": 0.0, "step": 38792 }, { "epoch": 36.19, "learning_rate": 4.5476212686567166e-05, "loss": 0.0001, "step": 38796 }, { "epoch": 36.19, "learning_rate": 4.5475746268656714e-05, "loss": 0.0001, "step": 38800 }, { "epoch": 36.2, "learning_rate": 4.547527985074627e-05, "loss": 0.0001, "step": 38804 }, { "epoch": 36.2, "learning_rate": 4.5474813432835824e-05, "loss": 0.0001, "step": 38808 }, { "epoch": 36.21, "learning_rate": 4.547434701492538e-05, "loss": 0.0001, "step": 38812 }, { "epoch": 36.21, "learning_rate": 4.547388059701493e-05, "loss": 0.0001, "step": 38816 }, { "epoch": 36.21, "learning_rate": 4.547341417910448e-05, "loss": 0.0004, "step": 38820 }, { "epoch": 36.22, "learning_rate": 4.547294776119404e-05, "loss": 0.001, "step": 38824 }, { "epoch": 36.22, "learning_rate": 4.5472481343283585e-05, "loss": 0.0011, "step": 38828 }, { "epoch": 36.22, "learning_rate": 4.547201492537313e-05, "loss": 0.001, "step": 38832 }, { "epoch": 36.23, "learning_rate": 4.547154850746269e-05, "loss": 0.0, "step": 38836 }, { "epoch": 36.23, "learning_rate": 4.547108208955224e-05, "loss": 0.0002, "step": 38840 }, { "epoch": 36.24, "learning_rate": 4.547061567164179e-05, "loss": 0.0001, "step": 38844 }, { "epoch": 36.24, "learning_rate": 4.5470149253731346e-05, "loss": 0.0001, "step": 38848 }, { "epoch": 36.24, "learning_rate": 4.54696828358209e-05, "loss": 0.0035, "step": 38852 }, { "epoch": 36.25, "learning_rate": 4.546921641791045e-05, "loss": 0.0001, "step": 38856 }, { "epoch": 36.25, "learning_rate": 4.5468750000000004e-05, "loss": 0.0008, "step": 38860 }, { "epoch": 36.25, "learning_rate": 4.546828358208955e-05, "loss": 0.0001, "step": 38864 }, { "epoch": 36.26, "learning_rate": 4.546781716417911e-05, "loss": 0.0001, "step": 38868 }, { "epoch": 36.26, "learning_rate": 4.546735074626866e-05, "loss": 0.0006, "step": 38872 }, { "epoch": 36.26, "learning_rate": 4.546688432835821e-05, "loss": 0.0001, "step": 38876 }, { "epoch": 36.27, "learning_rate": 4.5466417910447765e-05, "loss": 0.0, "step": 38880 }, { "epoch": 36.27, "learning_rate": 4.546595149253732e-05, "loss": 0.0008, "step": 38884 }, { "epoch": 36.28, "learning_rate": 4.546548507462687e-05, "loss": 0.0001, "step": 38888 }, { "epoch": 36.28, "learning_rate": 4.5465018656716416e-05, "loss": 0.0017, "step": 38892 }, { "epoch": 36.28, "learning_rate": 4.546455223880597e-05, "loss": 0.0, "step": 38896 }, { "epoch": 36.29, "learning_rate": 4.5464085820895526e-05, "loss": 0.0043, "step": 38900 }, { "epoch": 36.29, "learning_rate": 4.5463619402985074e-05, "loss": 0.0006, "step": 38904 }, { "epoch": 36.29, "learning_rate": 4.546315298507463e-05, "loss": 0.0, "step": 38908 }, { "epoch": 36.3, "learning_rate": 4.5462686567164184e-05, "loss": 0.0001, "step": 38912 }, { "epoch": 36.3, "learning_rate": 4.546222014925373e-05, "loss": 0.0014, "step": 38916 }, { "epoch": 36.31, "learning_rate": 4.546175373134329e-05, "loss": 0.0001, "step": 38920 }, { "epoch": 36.31, "learning_rate": 4.5461287313432835e-05, "loss": 0.0006, "step": 38924 }, { "epoch": 36.31, "learning_rate": 4.546082089552239e-05, "loss": 0.0004, "step": 38928 }, { "epoch": 36.32, "learning_rate": 4.5460354477611945e-05, "loss": 0.0022, "step": 38932 }, { "epoch": 36.32, "learning_rate": 4.545988805970149e-05, "loss": 0.0, "step": 38936 }, { "epoch": 36.32, "learning_rate": 4.545942164179104e-05, "loss": 0.0, "step": 38940 }, { "epoch": 36.33, "learning_rate": 4.54589552238806e-05, "loss": 0.0007, "step": 38944 }, { "epoch": 36.33, "learning_rate": 4.545848880597015e-05, "loss": 0.0001, "step": 38948 }, { "epoch": 36.34, "learning_rate": 4.54580223880597e-05, "loss": 0.0027, "step": 38952 }, { "epoch": 36.34, "learning_rate": 4.5457555970149254e-05, "loss": 0.0004, "step": 38956 }, { "epoch": 36.34, "learning_rate": 4.545708955223881e-05, "loss": 0.0001, "step": 38960 }, { "epoch": 36.35, "learning_rate": 4.545662313432836e-05, "loss": 0.0001, "step": 38964 }, { "epoch": 36.35, "learning_rate": 4.545615671641791e-05, "loss": 0.0035, "step": 38968 }, { "epoch": 36.35, "learning_rate": 4.545569029850747e-05, "loss": 0.0002, "step": 38972 }, { "epoch": 36.36, "learning_rate": 4.545522388059702e-05, "loss": 0.0072, "step": 38976 }, { "epoch": 36.36, "learning_rate": 4.545475746268657e-05, "loss": 0.0, "step": 38980 }, { "epoch": 36.37, "learning_rate": 4.545429104477612e-05, "loss": 0.0, "step": 38984 }, { "epoch": 36.37, "learning_rate": 4.545382462686568e-05, "loss": 0.0001, "step": 38988 }, { "epoch": 36.37, "learning_rate": 4.545335820895523e-05, "loss": 0.0007, "step": 38992 }, { "epoch": 36.38, "learning_rate": 4.5452891791044776e-05, "loss": 0.0015, "step": 38996 }, { "epoch": 36.38, "learning_rate": 4.545242537313433e-05, "loss": 0.0001, "step": 39000 }, { "epoch": 36.38, "eval_exact_match": 0.7456479690522244, "eval_exec": 0.769825918762089, "eval_loss": 0.4195860028266907, "eval_runtime": 1174.4998, "eval_samples_per_second": 0.88, "step": 39000 }, { "epoch": 36.38, "learning_rate": 4.5451958955223886e-05, "loss": 0.0005, "step": 39004 }, { "epoch": 36.39, "learning_rate": 4.5451492537313434e-05, "loss": 0.0, "step": 39008 }, { "epoch": 36.39, "learning_rate": 4.545102611940299e-05, "loss": 0.0001, "step": 39012 }, { "epoch": 36.4, "learning_rate": 4.545055970149254e-05, "loss": 0.0, "step": 39016 }, { "epoch": 36.4, "learning_rate": 4.545009328358209e-05, "loss": 0.0003, "step": 39020 }, { "epoch": 36.4, "learning_rate": 4.544962686567165e-05, "loss": 0.0, "step": 39024 }, { "epoch": 36.41, "learning_rate": 4.5449160447761195e-05, "loss": 0.0001, "step": 39028 }, { "epoch": 36.41, "learning_rate": 4.544869402985075e-05, "loss": 0.0001, "step": 39032 }, { "epoch": 36.41, "learning_rate": 4.5448227611940305e-05, "loss": 0.0033, "step": 39036 }, { "epoch": 36.42, "learning_rate": 4.544776119402985e-05, "loss": 0.0002, "step": 39040 }, { "epoch": 36.42, "learning_rate": 4.54472947761194e-05, "loss": 0.0001, "step": 39044 }, { "epoch": 36.43, "learning_rate": 4.5446828358208956e-05, "loss": 0.0, "step": 39048 }, { "epoch": 36.43, "learning_rate": 4.544636194029851e-05, "loss": 0.0002, "step": 39052 }, { "epoch": 36.43, "learning_rate": 4.544589552238806e-05, "loss": 0.0, "step": 39056 }, { "epoch": 36.44, "learning_rate": 4.5445429104477614e-05, "loss": 0.0002, "step": 39060 }, { "epoch": 36.44, "learning_rate": 4.544496268656717e-05, "loss": 0.0009, "step": 39064 }, { "epoch": 36.44, "learning_rate": 4.544449626865672e-05, "loss": 0.0001, "step": 39068 }, { "epoch": 36.45, "learning_rate": 4.544402985074627e-05, "loss": 0.0, "step": 39072 }, { "epoch": 36.45, "learning_rate": 4.544356343283582e-05, "loss": 0.0001, "step": 39076 }, { "epoch": 36.46, "learning_rate": 4.5443097014925375e-05, "loss": 0.0, "step": 39080 }, { "epoch": 36.46, "learning_rate": 4.544263059701493e-05, "loss": 0.0, "step": 39084 }, { "epoch": 36.46, "learning_rate": 4.544216417910448e-05, "loss": 0.0002, "step": 39088 }, { "epoch": 36.47, "learning_rate": 4.544169776119403e-05, "loss": 0.003, "step": 39092 }, { "epoch": 36.47, "learning_rate": 4.544123134328359e-05, "loss": 0.0001, "step": 39096 }, { "epoch": 36.47, "learning_rate": 4.5440764925373136e-05, "loss": 0.0002, "step": 39100 }, { "epoch": 36.48, "learning_rate": 4.5440298507462684e-05, "loss": 0.0001, "step": 39104 }, { "epoch": 36.48, "learning_rate": 4.543983208955224e-05, "loss": 0.0004, "step": 39108 }, { "epoch": 36.49, "learning_rate": 4.5439365671641794e-05, "loss": 0.0192, "step": 39112 }, { "epoch": 36.49, "learning_rate": 4.543889925373134e-05, "loss": 0.0006, "step": 39116 }, { "epoch": 36.49, "learning_rate": 4.54384328358209e-05, "loss": 0.0017, "step": 39120 }, { "epoch": 36.5, "learning_rate": 4.543796641791045e-05, "loss": 0.0049, "step": 39124 }, { "epoch": 36.5, "learning_rate": 4.54375e-05, "loss": 0.0001, "step": 39128 }, { "epoch": 36.5, "learning_rate": 4.5437033582089555e-05, "loss": 0.0003, "step": 39132 }, { "epoch": 36.51, "learning_rate": 4.54365671641791e-05, "loss": 0.0, "step": 39136 }, { "epoch": 36.51, "learning_rate": 4.5436100746268665e-05, "loss": 0.0001, "step": 39140 }, { "epoch": 36.51, "learning_rate": 4.543563432835821e-05, "loss": 0.0001, "step": 39144 }, { "epoch": 36.52, "learning_rate": 4.543516791044776e-05, "loss": 0.0001, "step": 39148 }, { "epoch": 36.52, "learning_rate": 4.5434701492537316e-05, "loss": 0.0001, "step": 39152 }, { "epoch": 36.53, "learning_rate": 4.543423507462687e-05, "loss": 0.0001, "step": 39156 }, { "epoch": 36.53, "learning_rate": 4.543376865671642e-05, "loss": 0.0001, "step": 39160 }, { "epoch": 36.53, "learning_rate": 4.5433302238805974e-05, "loss": 0.0, "step": 39164 }, { "epoch": 36.54, "learning_rate": 4.543283582089552e-05, "loss": 0.0002, "step": 39168 }, { "epoch": 36.54, "learning_rate": 4.543236940298508e-05, "loss": 0.0, "step": 39172 }, { "epoch": 36.54, "learning_rate": 4.543190298507463e-05, "loss": 0.0012, "step": 39176 }, { "epoch": 36.55, "learning_rate": 4.543143656716418e-05, "loss": 0.0001, "step": 39180 }, { "epoch": 36.55, "learning_rate": 4.5430970149253735e-05, "loss": 0.0009, "step": 39184 }, { "epoch": 36.56, "learning_rate": 4.543050373134329e-05, "loss": 0.0001, "step": 39188 }, { "epoch": 36.56, "learning_rate": 4.543003731343284e-05, "loss": 0.0006, "step": 39192 }, { "epoch": 36.56, "learning_rate": 4.5429570895522386e-05, "loss": 0.0001, "step": 39196 }, { "epoch": 36.57, "learning_rate": 4.542910447761195e-05, "loss": 0.0, "step": 39200 }, { "epoch": 36.57, "learning_rate": 4.5428638059701496e-05, "loss": 0.0002, "step": 39204 }, { "epoch": 36.57, "learning_rate": 4.5428171641791044e-05, "loss": 0.0, "step": 39208 }, { "epoch": 36.58, "learning_rate": 4.54277052238806e-05, "loss": 0.0001, "step": 39212 }, { "epoch": 36.58, "learning_rate": 4.5427238805970154e-05, "loss": 0.0, "step": 39216 }, { "epoch": 36.59, "learning_rate": 4.54267723880597e-05, "loss": 0.0001, "step": 39220 }, { "epoch": 36.59, "learning_rate": 4.542630597014926e-05, "loss": 0.0001, "step": 39224 }, { "epoch": 36.59, "learning_rate": 4.5425839552238805e-05, "loss": 0.0, "step": 39228 }, { "epoch": 36.6, "learning_rate": 4.542537313432836e-05, "loss": 0.0, "step": 39232 }, { "epoch": 36.6, "learning_rate": 4.5424906716417915e-05, "loss": 0.0001, "step": 39236 }, { "epoch": 36.6, "learning_rate": 4.542444029850746e-05, "loss": 0.0025, "step": 39240 }, { "epoch": 36.61, "learning_rate": 4.542397388059702e-05, "loss": 0.0007, "step": 39244 }, { "epoch": 36.61, "learning_rate": 4.542350746268657e-05, "loss": 0.0016, "step": 39248 }, { "epoch": 36.62, "learning_rate": 4.542304104477612e-05, "loss": 0.0003, "step": 39252 }, { "epoch": 36.62, "learning_rate": 4.542257462686567e-05, "loss": 0.0, "step": 39256 }, { "epoch": 36.62, "learning_rate": 4.542210820895523e-05, "loss": 0.0, "step": 39260 }, { "epoch": 36.63, "learning_rate": 4.542164179104478e-05, "loss": 0.0004, "step": 39264 }, { "epoch": 36.63, "learning_rate": 4.542117537313433e-05, "loss": 0.0013, "step": 39268 }, { "epoch": 36.63, "learning_rate": 4.542070895522388e-05, "loss": 0.0, "step": 39272 }, { "epoch": 36.64, "learning_rate": 4.542024253731344e-05, "loss": 0.0, "step": 39276 }, { "epoch": 36.64, "learning_rate": 4.5419776119402985e-05, "loss": 0.0001, "step": 39280 }, { "epoch": 36.65, "learning_rate": 4.541930970149254e-05, "loss": 0.0001, "step": 39284 }, { "epoch": 36.65, "learning_rate": 4.541884328358209e-05, "loss": 0.0, "step": 39288 }, { "epoch": 36.65, "learning_rate": 4.541837686567164e-05, "loss": 0.0, "step": 39292 }, { "epoch": 36.66, "learning_rate": 4.54179104477612e-05, "loss": 0.0, "step": 39296 }, { "epoch": 36.66, "learning_rate": 4.5417444029850746e-05, "loss": 0.0001, "step": 39300 }, { "epoch": 36.66, "learning_rate": 4.54169776119403e-05, "loss": 0.0014, "step": 39304 }, { "epoch": 36.67, "learning_rate": 4.5416511194029856e-05, "loss": 0.0, "step": 39308 }, { "epoch": 36.67, "learning_rate": 4.5416044776119404e-05, "loss": 0.0003, "step": 39312 }, { "epoch": 36.68, "learning_rate": 4.541557835820896e-05, "loss": 0.0, "step": 39316 }, { "epoch": 36.68, "learning_rate": 4.5415111940298514e-05, "loss": 0.0046, "step": 39320 }, { "epoch": 36.68, "learning_rate": 4.541464552238806e-05, "loss": 0.0013, "step": 39324 }, { "epoch": 36.69, "learning_rate": 4.5414179104477617e-05, "loss": 0.0006, "step": 39328 }, { "epoch": 36.69, "learning_rate": 4.5413712686567165e-05, "loss": 0.0003, "step": 39332 }, { "epoch": 36.69, "learning_rate": 4.541324626865672e-05, "loss": 0.0001, "step": 39336 }, { "epoch": 36.7, "learning_rate": 4.5412779850746274e-05, "loss": 0.0006, "step": 39340 }, { "epoch": 36.7, "learning_rate": 4.541231343283582e-05, "loss": 0.0, "step": 39344 }, { "epoch": 36.71, "learning_rate": 4.541184701492537e-05, "loss": 0.0007, "step": 39348 }, { "epoch": 36.71, "learning_rate": 4.541138059701493e-05, "loss": 0.0008, "step": 39352 }, { "epoch": 36.71, "learning_rate": 4.541091417910448e-05, "loss": 0.0, "step": 39356 }, { "epoch": 36.72, "learning_rate": 4.541044776119403e-05, "loss": 0.0, "step": 39360 }, { "epoch": 36.72, "learning_rate": 4.5409981343283584e-05, "loss": 0.0, "step": 39364 }, { "epoch": 36.72, "learning_rate": 4.540951492537314e-05, "loss": 0.0002, "step": 39368 }, { "epoch": 36.73, "learning_rate": 4.540904850746269e-05, "loss": 0.0005, "step": 39372 }, { "epoch": 36.73, "learning_rate": 4.540858208955224e-05, "loss": 0.0001, "step": 39376 }, { "epoch": 36.73, "learning_rate": 4.5408115671641796e-05, "loss": 0.0001, "step": 39380 }, { "epoch": 36.74, "learning_rate": 4.5407649253731345e-05, "loss": 0.0, "step": 39384 }, { "epoch": 36.74, "learning_rate": 4.54071828358209e-05, "loss": 0.0, "step": 39388 }, { "epoch": 36.75, "learning_rate": 4.540671641791045e-05, "loss": 0.0, "step": 39392 }, { "epoch": 36.75, "learning_rate": 4.540625e-05, "loss": 0.0005, "step": 39396 }, { "epoch": 36.75, "learning_rate": 4.540578358208956e-05, "loss": 0.0004, "step": 39400 }, { "epoch": 36.76, "learning_rate": 4.5405317164179106e-05, "loss": 0.0001, "step": 39404 }, { "epoch": 36.76, "learning_rate": 4.5404850746268654e-05, "loss": 0.0, "step": 39408 }, { "epoch": 36.76, "learning_rate": 4.5404384328358215e-05, "loss": 0.0, "step": 39412 }, { "epoch": 36.77, "learning_rate": 4.5403917910447764e-05, "loss": 0.0001, "step": 39416 }, { "epoch": 36.77, "learning_rate": 4.540345149253731e-05, "loss": 0.0, "step": 39420 }, { "epoch": 36.78, "learning_rate": 4.5402985074626867e-05, "loss": 0.0001, "step": 39424 }, { "epoch": 36.78, "learning_rate": 4.540251865671642e-05, "loss": 0.0, "step": 39428 }, { "epoch": 36.78, "learning_rate": 4.540205223880597e-05, "loss": 0.0009, "step": 39432 }, { "epoch": 36.79, "learning_rate": 4.5401585820895525e-05, "loss": 0.0, "step": 39436 }, { "epoch": 36.79, "learning_rate": 4.540111940298508e-05, "loss": 0.0, "step": 39440 }, { "epoch": 36.79, "learning_rate": 4.540065298507463e-05, "loss": 0.0, "step": 39444 }, { "epoch": 36.8, "learning_rate": 4.540018656716418e-05, "loss": 0.0001, "step": 39448 }, { "epoch": 36.8, "learning_rate": 4.539972014925373e-05, "loss": 0.0027, "step": 39452 }, { "epoch": 36.81, "learning_rate": 4.5399253731343286e-05, "loss": 0.0, "step": 39456 }, { "epoch": 36.81, "learning_rate": 4.539878731343284e-05, "loss": 0.0002, "step": 39460 }, { "epoch": 36.81, "learning_rate": 4.539832089552239e-05, "loss": 0.0001, "step": 39464 }, { "epoch": 36.82, "learning_rate": 4.5397854477611943e-05, "loss": 0.0, "step": 39468 }, { "epoch": 36.82, "learning_rate": 4.53973880597015e-05, "loss": 0.0009, "step": 39472 }, { "epoch": 36.82, "learning_rate": 4.5396921641791046e-05, "loss": 0.0038, "step": 39476 }, { "epoch": 36.83, "learning_rate": 4.53964552238806e-05, "loss": 0.0022, "step": 39480 }, { "epoch": 36.83, "learning_rate": 4.539598880597015e-05, "loss": 0.0005, "step": 39484 }, { "epoch": 36.84, "learning_rate": 4.5395522388059704e-05, "loss": 0.0, "step": 39488 }, { "epoch": 36.84, "learning_rate": 4.539505597014926e-05, "loss": 0.0001, "step": 39492 }, { "epoch": 36.84, "learning_rate": 4.539458955223881e-05, "loss": 0.0, "step": 39496 }, { "epoch": 36.85, "learning_rate": 4.539412313432836e-05, "loss": 0.0001, "step": 39500 }, { "epoch": 36.85, "eval_exact_match": 0.7495164410058027, "eval_exec": 0.7852998065764023, "eval_loss": 0.4335377514362335, "eval_runtime": 1568.2028, "eval_samples_per_second": 0.659, "step": 39500 }, { "epoch": 36.85, "learning_rate": 4.539365671641792e-05, "loss": 0.0002, "step": 39504 }, { "epoch": 36.85, "learning_rate": 4.5393190298507465e-05, "loss": 0.0, "step": 39508 }, { "epoch": 36.86, "learning_rate": 4.5392723880597014e-05, "loss": 0.0001, "step": 39512 }, { "epoch": 36.86, "learning_rate": 4.539225746268657e-05, "loss": 0.0041, "step": 39516 }, { "epoch": 36.87, "learning_rate": 4.539179104477612e-05, "loss": 0.0001, "step": 39520 }, { "epoch": 36.87, "learning_rate": 4.539132462686567e-05, "loss": 0.0001, "step": 39524 }, { "epoch": 36.87, "learning_rate": 4.5390858208955226e-05, "loss": 0.0, "step": 39528 }, { "epoch": 36.88, "learning_rate": 4.539039179104478e-05, "loss": 0.0055, "step": 39532 }, { "epoch": 36.88, "learning_rate": 4.538992537313433e-05, "loss": 0.0002, "step": 39536 }, { "epoch": 36.88, "learning_rate": 4.5389458955223884e-05, "loss": 0.0001, "step": 39540 }, { "epoch": 36.89, "learning_rate": 4.538899253731343e-05, "loss": 0.0002, "step": 39544 }, { "epoch": 36.89, "learning_rate": 4.538852611940299e-05, "loss": 0.0, "step": 39548 }, { "epoch": 36.9, "learning_rate": 4.538805970149254e-05, "loss": 0.0, "step": 39552 }, { "epoch": 36.9, "learning_rate": 4.538759328358209e-05, "loss": 0.0, "step": 39556 }, { "epoch": 36.9, "learning_rate": 4.5387126865671645e-05, "loss": 0.0015, "step": 39560 }, { "epoch": 36.91, "learning_rate": 4.53866604477612e-05, "loss": 0.0001, "step": 39564 }, { "epoch": 36.91, "learning_rate": 4.538619402985075e-05, "loss": 0.0001, "step": 39568 }, { "epoch": 36.91, "learning_rate": 4.5385727611940297e-05, "loss": 0.0041, "step": 39572 }, { "epoch": 36.92, "learning_rate": 4.538526119402985e-05, "loss": 0.0001, "step": 39576 }, { "epoch": 36.92, "learning_rate": 4.5384794776119406e-05, "loss": 0.0001, "step": 39580 }, { "epoch": 36.93, "learning_rate": 4.5384328358208954e-05, "loss": 0.0004, "step": 39584 }, { "epoch": 36.93, "learning_rate": 4.538386194029851e-05, "loss": 0.0002, "step": 39588 }, { "epoch": 36.93, "learning_rate": 4.5383395522388064e-05, "loss": 0.0001, "step": 39592 }, { "epoch": 36.94, "learning_rate": 4.538292910447761e-05, "loss": 0.0, "step": 39596 }, { "epoch": 36.94, "learning_rate": 4.538246268656717e-05, "loss": 0.0011, "step": 39600 }, { "epoch": 36.94, "learning_rate": 4.5381996268656715e-05, "loss": 0.0004, "step": 39604 }, { "epoch": 36.95, "learning_rate": 4.538152985074627e-05, "loss": 0.0002, "step": 39608 }, { "epoch": 36.95, "learning_rate": 4.5381063432835825e-05, "loss": 0.0101, "step": 39612 }, { "epoch": 36.96, "learning_rate": 4.538059701492537e-05, "loss": 0.0006, "step": 39616 }, { "epoch": 36.96, "learning_rate": 4.538013059701492e-05, "loss": 0.0001, "step": 39620 }, { "epoch": 36.96, "learning_rate": 4.537966417910448e-05, "loss": 0.0042, "step": 39624 }, { "epoch": 36.97, "learning_rate": 4.537919776119403e-05, "loss": 0.0004, "step": 39628 }, { "epoch": 36.97, "learning_rate": 4.5378731343283586e-05, "loss": 0.0002, "step": 39632 }, { "epoch": 36.97, "learning_rate": 4.5378264925373134e-05, "loss": 0.001, "step": 39636 }, { "epoch": 36.98, "learning_rate": 4.537779850746269e-05, "loss": 0.0011, "step": 39640 }, { "epoch": 36.98, "learning_rate": 4.5377332089552244e-05, "loss": 0.0018, "step": 39644 }, { "epoch": 36.98, "learning_rate": 4.537686567164179e-05, "loss": 0.0001, "step": 39648 }, { "epoch": 36.99, "learning_rate": 4.537639925373135e-05, "loss": 0.0, "step": 39652 }, { "epoch": 36.99, "learning_rate": 4.53759328358209e-05, "loss": 0.0001, "step": 39656 }, { "epoch": 37.0, "learning_rate": 4.537546641791045e-05, "loss": 0.0, "step": 39660 }, { "epoch": 37.0, "learning_rate": 4.5375e-05, "loss": 0.0, "step": 39664 }, { "epoch": 37.0, "learning_rate": 4.537453358208956e-05, "loss": 0.0002, "step": 39668 }, { "epoch": 37.01, "learning_rate": 4.537406716417911e-05, "loss": 0.0, "step": 39672 }, { "epoch": 37.01, "learning_rate": 4.5373600746268656e-05, "loss": 0.0, "step": 39676 }, { "epoch": 37.01, "learning_rate": 4.537313432835821e-05, "loss": 0.0, "step": 39680 }, { "epoch": 37.02, "learning_rate": 4.5372667910447766e-05, "loss": 0.0, "step": 39684 }, { "epoch": 37.02, "learning_rate": 4.5372201492537314e-05, "loss": 0.0001, "step": 39688 }, { "epoch": 37.03, "learning_rate": 4.537173507462687e-05, "loss": 0.0005, "step": 39692 }, { "epoch": 37.03, "learning_rate": 4.537126865671642e-05, "loss": 0.0, "step": 39696 }, { "epoch": 37.03, "learning_rate": 4.537080223880597e-05, "loss": 0.0, "step": 39700 }, { "epoch": 37.04, "learning_rate": 4.537033582089553e-05, "loss": 0.0011, "step": 39704 }, { "epoch": 37.04, "learning_rate": 4.5369869402985075e-05, "loss": 0.0004, "step": 39708 }, { "epoch": 37.04, "learning_rate": 4.536940298507463e-05, "loss": 0.0002, "step": 39712 }, { "epoch": 37.05, "learning_rate": 4.5368936567164185e-05, "loss": 0.0001, "step": 39716 }, { "epoch": 37.05, "learning_rate": 4.536847014925373e-05, "loss": 0.0, "step": 39720 }, { "epoch": 37.06, "learning_rate": 4.536800373134328e-05, "loss": 0.0, "step": 39724 }, { "epoch": 37.06, "learning_rate": 4.5367537313432836e-05, "loss": 0.0006, "step": 39728 }, { "epoch": 37.06, "learning_rate": 4.536707089552239e-05, "loss": 0.0001, "step": 39732 }, { "epoch": 37.07, "learning_rate": 4.536660447761194e-05, "loss": 0.0005, "step": 39736 }, { "epoch": 37.07, "learning_rate": 4.5366138059701494e-05, "loss": 0.0002, "step": 39740 }, { "epoch": 37.07, "learning_rate": 4.536567164179105e-05, "loss": 0.0, "step": 39744 }, { "epoch": 37.08, "learning_rate": 4.53652052238806e-05, "loss": 0.0088, "step": 39748 }, { "epoch": 37.08, "learning_rate": 4.536473880597015e-05, "loss": 0.0005, "step": 39752 }, { "epoch": 37.09, "learning_rate": 4.53642723880597e-05, "loss": 0.0001, "step": 39756 }, { "epoch": 37.09, "learning_rate": 4.5363805970149255e-05, "loss": 0.0, "step": 39760 }, { "epoch": 37.09, "learning_rate": 4.536333955223881e-05, "loss": 0.0002, "step": 39764 }, { "epoch": 37.1, "learning_rate": 4.536287313432836e-05, "loss": 0.0011, "step": 39768 }, { "epoch": 37.1, "learning_rate": 4.536240671641791e-05, "loss": 0.0, "step": 39772 }, { "epoch": 37.1, "learning_rate": 4.536194029850747e-05, "loss": 0.0, "step": 39776 }, { "epoch": 37.11, "learning_rate": 4.5361473880597016e-05, "loss": 0.0001, "step": 39780 }, { "epoch": 37.11, "learning_rate": 4.5361007462686564e-05, "loss": 0.0004, "step": 39784 }, { "epoch": 37.12, "learning_rate": 4.536054104477612e-05, "loss": 0.0015, "step": 39788 }, { "epoch": 37.12, "learning_rate": 4.5360074626865674e-05, "loss": 0.0009, "step": 39792 }, { "epoch": 37.12, "learning_rate": 4.535960820895523e-05, "loss": 0.0, "step": 39796 }, { "epoch": 37.13, "learning_rate": 4.535914179104478e-05, "loss": 0.0, "step": 39800 }, { "epoch": 37.13, "learning_rate": 4.535867537313433e-05, "loss": 0.0018, "step": 39804 }, { "epoch": 37.13, "learning_rate": 4.535820895522389e-05, "loss": 0.0001, "step": 39808 }, { "epoch": 37.14, "learning_rate": 4.5357742537313435e-05, "loss": 0.0003, "step": 39812 }, { "epoch": 37.14, "learning_rate": 4.535727611940298e-05, "loss": 0.002, "step": 39816 }, { "epoch": 37.15, "learning_rate": 4.5356809701492545e-05, "loss": 0.0006, "step": 39820 }, { "epoch": 37.15, "learning_rate": 4.535634328358209e-05, "loss": 0.001, "step": 39824 }, { "epoch": 37.15, "learning_rate": 4.535587686567164e-05, "loss": 0.0003, "step": 39828 }, { "epoch": 37.16, "learning_rate": 4.5355410447761196e-05, "loss": 0.0011, "step": 39832 }, { "epoch": 37.16, "learning_rate": 4.535494402985075e-05, "loss": 0.0001, "step": 39836 }, { "epoch": 37.16, "learning_rate": 4.53544776119403e-05, "loss": 0.0002, "step": 39840 }, { "epoch": 37.17, "learning_rate": 4.5354011194029854e-05, "loss": 0.0001, "step": 39844 }, { "epoch": 37.17, "learning_rate": 4.53535447761194e-05, "loss": 0.0, "step": 39848 }, { "epoch": 37.18, "learning_rate": 4.535307835820896e-05, "loss": 0.0, "step": 39852 }, { "epoch": 37.18, "learning_rate": 4.535261194029851e-05, "loss": 0.0001, "step": 39856 }, { "epoch": 37.18, "learning_rate": 4.535214552238806e-05, "loss": 0.0, "step": 39860 }, { "epoch": 37.19, "learning_rate": 4.5351679104477615e-05, "loss": 0.0002, "step": 39864 }, { "epoch": 37.19, "learning_rate": 4.535121268656717e-05, "loss": 0.001, "step": 39868 }, { "epoch": 37.19, "learning_rate": 4.535074626865672e-05, "loss": 0.0001, "step": 39872 }, { "epoch": 37.2, "learning_rate": 4.5350279850746266e-05, "loss": 0.0003, "step": 39876 }, { "epoch": 37.2, "learning_rate": 4.534981343283583e-05, "loss": 0.0, "step": 39880 }, { "epoch": 37.21, "learning_rate": 4.5349347014925376e-05, "loss": 0.0006, "step": 39884 }, { "epoch": 37.21, "learning_rate": 4.5348880597014924e-05, "loss": 0.0006, "step": 39888 }, { "epoch": 37.21, "learning_rate": 4.534841417910448e-05, "loss": 0.0009, "step": 39892 }, { "epoch": 37.22, "learning_rate": 4.5347947761194034e-05, "loss": 0.0, "step": 39896 }, { "epoch": 37.22, "learning_rate": 4.534748134328358e-05, "loss": 0.0002, "step": 39900 }, { "epoch": 37.22, "learning_rate": 4.534701492537314e-05, "loss": 0.0, "step": 39904 }, { "epoch": 37.23, "learning_rate": 4.5346548507462685e-05, "loss": 0.0004, "step": 39908 }, { "epoch": 37.23, "learning_rate": 4.534608208955224e-05, "loss": 0.0, "step": 39912 }, { "epoch": 37.24, "learning_rate": 4.5345615671641795e-05, "loss": 0.0, "step": 39916 }, { "epoch": 37.24, "learning_rate": 4.534514925373134e-05, "loss": 0.0002, "step": 39920 }, { "epoch": 37.24, "learning_rate": 4.53446828358209e-05, "loss": 0.0001, "step": 39924 }, { "epoch": 37.25, "learning_rate": 4.534421641791045e-05, "loss": 0.0015, "step": 39928 }, { "epoch": 37.25, "learning_rate": 4.534375e-05, "loss": 0.0, "step": 39932 }, { "epoch": 37.25, "learning_rate": 4.534328358208955e-05, "loss": 0.0, "step": 39936 }, { "epoch": 37.26, "learning_rate": 4.534281716417911e-05, "loss": 0.0, "step": 39940 }, { "epoch": 37.26, "learning_rate": 4.534235074626866e-05, "loss": 0.0, "step": 39944 }, { "epoch": 37.26, "learning_rate": 4.534188432835821e-05, "loss": 0.0003, "step": 39948 }, { "epoch": 37.27, "learning_rate": 4.534141791044776e-05, "loss": 0.0, "step": 39952 }, { "epoch": 37.27, "learning_rate": 4.534095149253732e-05, "loss": 0.0006, "step": 39956 }, { "epoch": 37.28, "learning_rate": 4.534048507462687e-05, "loss": 0.0017, "step": 39960 }, { "epoch": 37.28, "learning_rate": 4.534001865671642e-05, "loss": 0.0028, "step": 39964 }, { "epoch": 37.28, "learning_rate": 4.533955223880597e-05, "loss": 0.0019, "step": 39968 }, { "epoch": 37.29, "learning_rate": 4.533908582089553e-05, "loss": 0.0001, "step": 39972 }, { "epoch": 37.29, "learning_rate": 4.533861940298508e-05, "loss": 0.0009, "step": 39976 }, { "epoch": 37.29, "learning_rate": 4.5338152985074626e-05, "loss": 0.0059, "step": 39980 }, { "epoch": 37.3, "learning_rate": 4.533768656716418e-05, "loss": 0.0, "step": 39984 }, { "epoch": 37.3, "learning_rate": 4.5337220149253736e-05, "loss": 0.0034, "step": 39988 }, { "epoch": 37.31, "learning_rate": 4.5336753731343284e-05, "loss": 0.0, "step": 39992 }, { "epoch": 37.31, "learning_rate": 4.533628731343284e-05, "loss": 0.0012, "step": 39996 }, { "epoch": 37.31, "learning_rate": 4.5335820895522394e-05, "loss": 0.0001, "step": 40000 }, { "epoch": 37.31, "eval_exact_match": 0.7282398452611218, "eval_exec": 0.7514506769825918, "eval_loss": 0.42848145961761475, "eval_runtime": 1107.5429, "eval_samples_per_second": 0.934, "step": 40000 }, { "epoch": 37.32, "learning_rate": 4.533535447761194e-05, "loss": 0.0, "step": 40004 }, { "epoch": 37.32, "learning_rate": 4.53348880597015e-05, "loss": 0.0039, "step": 40008 }, { "epoch": 37.32, "learning_rate": 4.5334421641791045e-05, "loss": 0.0006, "step": 40012 }, { "epoch": 37.33, "learning_rate": 4.53339552238806e-05, "loss": 0.0014, "step": 40016 }, { "epoch": 37.33, "learning_rate": 4.5333488805970155e-05, "loss": 0.0025, "step": 40020 }, { "epoch": 37.34, "learning_rate": 4.53330223880597e-05, "loss": 0.0002, "step": 40024 }, { "epoch": 37.34, "learning_rate": 4.533255597014925e-05, "loss": 0.0001, "step": 40028 }, { "epoch": 37.34, "learning_rate": 4.533208955223881e-05, "loss": 0.0017, "step": 40032 }, { "epoch": 37.35, "learning_rate": 4.533162313432836e-05, "loss": 0.0, "step": 40036 }, { "epoch": 37.35, "learning_rate": 4.533115671641791e-05, "loss": 0.0001, "step": 40040 }, { "epoch": 37.35, "learning_rate": 4.5330690298507464e-05, "loss": 0.0001, "step": 40044 }, { "epoch": 37.36, "learning_rate": 4.533022388059702e-05, "loss": 0.0, "step": 40048 }, { "epoch": 37.36, "learning_rate": 4.532975746268657e-05, "loss": 0.0002, "step": 40052 }, { "epoch": 37.37, "learning_rate": 4.532929104477612e-05, "loss": 0.0016, "step": 40056 }, { "epoch": 37.37, "learning_rate": 4.532882462686568e-05, "loss": 0.0, "step": 40060 }, { "epoch": 37.37, "learning_rate": 4.5328358208955225e-05, "loss": 0.0003, "step": 40064 }, { "epoch": 37.38, "learning_rate": 4.532789179104478e-05, "loss": 0.0003, "step": 40068 }, { "epoch": 37.38, "learning_rate": 4.532742537313433e-05, "loss": 0.0012, "step": 40072 }, { "epoch": 37.38, "learning_rate": 4.532695895522388e-05, "loss": 0.0, "step": 40076 }, { "epoch": 37.39, "learning_rate": 4.532649253731344e-05, "loss": 0.0, "step": 40080 }, { "epoch": 37.39, "learning_rate": 4.5326026119402986e-05, "loss": 0.0002, "step": 40084 }, { "epoch": 37.4, "learning_rate": 4.5325559701492534e-05, "loss": 0.0001, "step": 40088 }, { "epoch": 37.4, "learning_rate": 4.5325093283582096e-05, "loss": 0.0004, "step": 40092 }, { "epoch": 37.4, "learning_rate": 4.5324626865671644e-05, "loss": 0.0038, "step": 40096 }, { "epoch": 37.41, "learning_rate": 4.532416044776119e-05, "loss": 0.0, "step": 40100 }, { "epoch": 37.41, "learning_rate": 4.532369402985075e-05, "loss": 0.001, "step": 40104 }, { "epoch": 37.41, "learning_rate": 4.53232276119403e-05, "loss": 0.0088, "step": 40108 }, { "epoch": 37.42, "learning_rate": 4.532276119402985e-05, "loss": 0.0, "step": 40112 }, { "epoch": 37.42, "learning_rate": 4.5322294776119405e-05, "loss": 0.001, "step": 40116 }, { "epoch": 37.43, "learning_rate": 4.532182835820896e-05, "loss": 0.0009, "step": 40120 }, { "epoch": 37.43, "learning_rate": 4.5321361940298515e-05, "loss": 0.0002, "step": 40124 }, { "epoch": 37.43, "learning_rate": 4.532089552238806e-05, "loss": 0.0002, "step": 40128 }, { "epoch": 37.44, "learning_rate": 4.532042910447761e-05, "loss": 0.0, "step": 40132 }, { "epoch": 37.44, "learning_rate": 4.5319962686567166e-05, "loss": 0.0004, "step": 40136 }, { "epoch": 37.44, "learning_rate": 4.531949626865672e-05, "loss": 0.0, "step": 40140 }, { "epoch": 37.45, "learning_rate": 4.531902985074627e-05, "loss": 0.0, "step": 40144 }, { "epoch": 37.45, "learning_rate": 4.5318563432835824e-05, "loss": 0.0, "step": 40148 }, { "epoch": 37.46, "learning_rate": 4.531809701492538e-05, "loss": 0.0001, "step": 40152 }, { "epoch": 37.46, "learning_rate": 4.531763059701493e-05, "loss": 0.0, "step": 40156 }, { "epoch": 37.46, "learning_rate": 4.531716417910448e-05, "loss": 0.0, "step": 40160 }, { "epoch": 37.47, "learning_rate": 4.531669776119403e-05, "loss": 0.0004, "step": 40164 }, { "epoch": 37.47, "learning_rate": 4.5316231343283585e-05, "loss": 0.0003, "step": 40168 }, { "epoch": 37.47, "learning_rate": 4.531576492537314e-05, "loss": 0.002, "step": 40172 }, { "epoch": 37.48, "learning_rate": 4.531529850746269e-05, "loss": 0.0, "step": 40176 }, { "epoch": 37.48, "learning_rate": 4.531483208955224e-05, "loss": 0.0005, "step": 40180 }, { "epoch": 37.49, "learning_rate": 4.53143656716418e-05, "loss": 0.0011, "step": 40184 }, { "epoch": 37.49, "learning_rate": 4.5313899253731346e-05, "loss": 0.0002, "step": 40188 }, { "epoch": 37.49, "learning_rate": 4.5313432835820894e-05, "loss": 0.0003, "step": 40192 }, { "epoch": 37.5, "learning_rate": 4.531296641791045e-05, "loss": 0.0002, "step": 40196 }, { "epoch": 37.5, "learning_rate": 4.5312500000000004e-05, "loss": 0.0, "step": 40200 }, { "epoch": 37.5, "learning_rate": 4.531203358208955e-05, "loss": 0.0, "step": 40204 }, { "epoch": 37.51, "learning_rate": 4.531156716417911e-05, "loss": 0.0011, "step": 40208 }, { "epoch": 37.51, "learning_rate": 4.531110074626866e-05, "loss": 0.0003, "step": 40212 }, { "epoch": 37.51, "learning_rate": 4.531063432835821e-05, "loss": 0.0001, "step": 40216 }, { "epoch": 37.52, "learning_rate": 4.5310167910447765e-05, "loss": 0.0002, "step": 40220 }, { "epoch": 37.52, "learning_rate": 4.530970149253731e-05, "loss": 0.0004, "step": 40224 }, { "epoch": 37.53, "learning_rate": 4.530923507462687e-05, "loss": 0.0001, "step": 40228 }, { "epoch": 37.53, "learning_rate": 4.530876865671642e-05, "loss": 0.0, "step": 40232 }, { "epoch": 37.53, "learning_rate": 4.530830223880597e-05, "loss": 0.0, "step": 40236 }, { "epoch": 37.54, "learning_rate": 4.5307835820895526e-05, "loss": 0.0, "step": 40240 }, { "epoch": 37.54, "learning_rate": 4.530736940298508e-05, "loss": 0.0001, "step": 40244 }, { "epoch": 37.54, "learning_rate": 4.530690298507463e-05, "loss": 0.0001, "step": 40248 }, { "epoch": 37.55, "learning_rate": 4.530643656716418e-05, "loss": 0.0005, "step": 40252 }, { "epoch": 37.55, "learning_rate": 4.530597014925373e-05, "loss": 0.0003, "step": 40256 }, { "epoch": 37.56, "learning_rate": 4.5305503731343287e-05, "loss": 0.0009, "step": 40260 }, { "epoch": 37.56, "learning_rate": 4.5305037313432835e-05, "loss": 0.0, "step": 40264 }, { "epoch": 37.56, "learning_rate": 4.530457089552239e-05, "loss": 0.0017, "step": 40268 }, { "epoch": 37.57, "learning_rate": 4.5304104477611945e-05, "loss": 0.0001, "step": 40272 }, { "epoch": 37.57, "learning_rate": 4.530363805970149e-05, "loss": 0.0, "step": 40276 }, { "epoch": 37.57, "learning_rate": 4.530317164179105e-05, "loss": 0.0001, "step": 40280 }, { "epoch": 37.58, "learning_rate": 4.5302705223880596e-05, "loss": 0.0002, "step": 40284 }, { "epoch": 37.58, "learning_rate": 4.530223880597016e-05, "loss": 0.0019, "step": 40288 }, { "epoch": 37.59, "learning_rate": 4.5301772388059706e-05, "loss": 0.0, "step": 40292 }, { "epoch": 37.59, "learning_rate": 4.5301305970149254e-05, "loss": 0.0, "step": 40296 }, { "epoch": 37.59, "learning_rate": 4.530083955223881e-05, "loss": 0.0057, "step": 40300 }, { "epoch": 37.6, "learning_rate": 4.5300373134328363e-05, "loss": 0.0, "step": 40304 }, { "epoch": 37.6, "learning_rate": 4.529990671641791e-05, "loss": 0.0, "step": 40308 }, { "epoch": 37.6, "learning_rate": 4.5299440298507467e-05, "loss": 0.0036, "step": 40312 }, { "epoch": 37.61, "learning_rate": 4.5298973880597015e-05, "loss": 0.0002, "step": 40316 }, { "epoch": 37.61, "learning_rate": 4.529850746268657e-05, "loss": 0.0001, "step": 40320 }, { "epoch": 37.62, "learning_rate": 4.5298041044776124e-05, "loss": 0.0006, "step": 40324 }, { "epoch": 37.62, "learning_rate": 4.529757462686567e-05, "loss": 0.0001, "step": 40328 }, { "epoch": 37.62, "learning_rate": 4.529710820895523e-05, "loss": 0.0, "step": 40332 }, { "epoch": 37.63, "learning_rate": 4.529664179104478e-05, "loss": 0.0013, "step": 40336 }, { "epoch": 37.63, "learning_rate": 4.529617537313433e-05, "loss": 0.0, "step": 40340 }, { "epoch": 37.63, "learning_rate": 4.529570895522388e-05, "loss": 0.0001, "step": 40344 }, { "epoch": 37.64, "learning_rate": 4.529524253731344e-05, "loss": 0.0, "step": 40348 }, { "epoch": 37.64, "learning_rate": 4.529477611940299e-05, "loss": 0.0002, "step": 40352 }, { "epoch": 37.65, "learning_rate": 4.529430970149254e-05, "loss": 0.0017, "step": 40356 }, { "epoch": 37.65, "learning_rate": 4.529384328358209e-05, "loss": 0.0001, "step": 40360 }, { "epoch": 37.65, "learning_rate": 4.5293376865671646e-05, "loss": 0.0, "step": 40364 }, { "epoch": 37.66, "learning_rate": 4.5292910447761195e-05, "loss": 0.0007, "step": 40368 }, { "epoch": 37.66, "learning_rate": 4.529244402985075e-05, "loss": 0.0, "step": 40372 }, { "epoch": 37.66, "learning_rate": 4.52919776119403e-05, "loss": 0.0, "step": 40376 }, { "epoch": 37.67, "learning_rate": 4.529151119402985e-05, "loss": 0.0, "step": 40380 }, { "epoch": 37.67, "learning_rate": 4.529104477611941e-05, "loss": 0.0001, "step": 40384 }, { "epoch": 37.68, "learning_rate": 4.5290578358208956e-05, "loss": 0.0013, "step": 40388 }, { "epoch": 37.68, "learning_rate": 4.529011194029851e-05, "loss": 0.02, "step": 40392 }, { "epoch": 37.68, "learning_rate": 4.5289645522388065e-05, "loss": 0.0, "step": 40396 }, { "epoch": 37.69, "learning_rate": 4.5289179104477614e-05, "loss": 0.0, "step": 40400 }, { "epoch": 37.69, "learning_rate": 4.528871268656716e-05, "loss": 0.0001, "step": 40404 }, { "epoch": 37.69, "learning_rate": 4.5288246268656717e-05, "loss": 0.0001, "step": 40408 }, { "epoch": 37.7, "learning_rate": 4.528777985074627e-05, "loss": 0.0001, "step": 40412 }, { "epoch": 37.7, "learning_rate": 4.528731343283582e-05, "loss": 0.0001, "step": 40416 }, { "epoch": 37.71, "learning_rate": 4.5286847014925374e-05, "loss": 0.0, "step": 40420 }, { "epoch": 37.71, "learning_rate": 4.528638059701493e-05, "loss": 0.0002, "step": 40424 }, { "epoch": 37.71, "learning_rate": 4.528591417910448e-05, "loss": 0.0071, "step": 40428 }, { "epoch": 37.72, "learning_rate": 4.528544776119403e-05, "loss": 0.0001, "step": 40432 }, { "epoch": 37.72, "learning_rate": 4.528498134328358e-05, "loss": 0.0001, "step": 40436 }, { "epoch": 37.72, "learning_rate": 4.5284514925373135e-05, "loss": 0.0004, "step": 40440 }, { "epoch": 37.73, "learning_rate": 4.528404850746269e-05, "loss": 0.0006, "step": 40444 }, { "epoch": 37.73, "learning_rate": 4.528358208955224e-05, "loss": 0.0, "step": 40448 }, { "epoch": 37.73, "learning_rate": 4.5283115671641793e-05, "loss": 0.0, "step": 40452 }, { "epoch": 37.74, "learning_rate": 4.528264925373135e-05, "loss": 0.0001, "step": 40456 }, { "epoch": 37.74, "learning_rate": 4.5282182835820896e-05, "loss": 0.0001, "step": 40460 }, { "epoch": 37.75, "learning_rate": 4.528171641791045e-05, "loss": 0.0003, "step": 40464 }, { "epoch": 37.75, "learning_rate": 4.528125e-05, "loss": 0.0015, "step": 40468 }, { "epoch": 37.75, "learning_rate": 4.5280783582089554e-05, "loss": 0.0029, "step": 40472 }, { "epoch": 37.76, "learning_rate": 4.528031716417911e-05, "loss": 0.0001, "step": 40476 }, { "epoch": 37.76, "learning_rate": 4.527985074626866e-05, "loss": 0.0, "step": 40480 }, { "epoch": 37.76, "learning_rate": 4.527938432835821e-05, "loss": 0.0002, "step": 40484 }, { "epoch": 37.77, "learning_rate": 4.527891791044777e-05, "loss": 0.0014, "step": 40488 }, { "epoch": 37.77, "learning_rate": 4.5278451492537315e-05, "loss": 0.0001, "step": 40492 }, { "epoch": 37.78, "learning_rate": 4.5277985074626864e-05, "loss": 0.0, "step": 40496 }, { "epoch": 37.78, "learning_rate": 4.5277518656716425e-05, "loss": 0.012, "step": 40500 }, { "epoch": 37.78, "eval_exact_match": 0.7379110251450677, "eval_exec": 0.769825918762089, "eval_loss": 0.41693997383117676, "eval_runtime": 1159.3081, "eval_samples_per_second": 0.892, "step": 40500 }, { "epoch": 37.78, "learning_rate": 4.527705223880597e-05, "loss": 0.0001, "step": 40504 }, { "epoch": 37.79, "learning_rate": 4.527658582089552e-05, "loss": 0.0, "step": 40508 }, { "epoch": 37.79, "learning_rate": 4.5276119402985076e-05, "loss": 0.0003, "step": 40512 }, { "epoch": 37.79, "learning_rate": 4.527565298507463e-05, "loss": 0.0001, "step": 40516 }, { "epoch": 37.8, "learning_rate": 4.527518656716418e-05, "loss": 0.0007, "step": 40520 }, { "epoch": 37.8, "learning_rate": 4.5274720149253734e-05, "loss": 0.0003, "step": 40524 }, { "epoch": 37.81, "learning_rate": 4.527425373134328e-05, "loss": 0.0, "step": 40528 }, { "epoch": 37.81, "learning_rate": 4.527378731343284e-05, "loss": 0.0003, "step": 40532 }, { "epoch": 37.81, "learning_rate": 4.527332089552239e-05, "loss": 0.0007, "step": 40536 }, { "epoch": 37.82, "learning_rate": 4.527285447761194e-05, "loss": 0.0001, "step": 40540 }, { "epoch": 37.82, "learning_rate": 4.5272388059701495e-05, "loss": 0.0029, "step": 40544 }, { "epoch": 37.82, "learning_rate": 4.527192164179105e-05, "loss": 0.0, "step": 40548 }, { "epoch": 37.83, "learning_rate": 4.52714552238806e-05, "loss": 0.0005, "step": 40552 }, { "epoch": 37.83, "learning_rate": 4.5270988805970146e-05, "loss": 0.0001, "step": 40556 }, { "epoch": 37.84, "learning_rate": 4.527052238805971e-05, "loss": 0.0002, "step": 40560 }, { "epoch": 37.84, "learning_rate": 4.5270055970149256e-05, "loss": 0.0, "step": 40564 }, { "epoch": 37.84, "learning_rate": 4.5269589552238804e-05, "loss": 0.0, "step": 40568 }, { "epoch": 37.85, "learning_rate": 4.526912313432836e-05, "loss": 0.0019, "step": 40572 }, { "epoch": 37.85, "learning_rate": 4.5268656716417914e-05, "loss": 0.0003, "step": 40576 }, { "epoch": 37.85, "learning_rate": 4.526819029850746e-05, "loss": 0.0002, "step": 40580 }, { "epoch": 37.86, "learning_rate": 4.526772388059702e-05, "loss": 0.0005, "step": 40584 }, { "epoch": 37.86, "learning_rate": 4.5267257462686565e-05, "loss": 0.0, "step": 40588 }, { "epoch": 37.87, "learning_rate": 4.526679104477612e-05, "loss": 0.0014, "step": 40592 }, { "epoch": 37.87, "learning_rate": 4.5266324626865675e-05, "loss": 0.0, "step": 40596 }, { "epoch": 37.87, "learning_rate": 4.526585820895522e-05, "loss": 0.0076, "step": 40600 }, { "epoch": 37.88, "learning_rate": 4.526539179104478e-05, "loss": 0.0003, "step": 40604 }, { "epoch": 37.88, "learning_rate": 4.526492537313433e-05, "loss": 0.0031, "step": 40608 }, { "epoch": 37.88, "learning_rate": 4.526445895522388e-05, "loss": 0.0001, "step": 40612 }, { "epoch": 37.89, "learning_rate": 4.5263992537313436e-05, "loss": 0.0002, "step": 40616 }, { "epoch": 37.89, "learning_rate": 4.526352611940299e-05, "loss": 0.0002, "step": 40620 }, { "epoch": 37.9, "learning_rate": 4.526305970149254e-05, "loss": 0.0004, "step": 40624 }, { "epoch": 37.9, "learning_rate": 4.5262593283582094e-05, "loss": 0.0, "step": 40628 }, { "epoch": 37.9, "learning_rate": 4.526212686567164e-05, "loss": 0.0001, "step": 40632 }, { "epoch": 37.91, "learning_rate": 4.52616604477612e-05, "loss": 0.0016, "step": 40636 }, { "epoch": 37.91, "learning_rate": 4.526119402985075e-05, "loss": 0.0, "step": 40640 }, { "epoch": 37.91, "learning_rate": 4.52607276119403e-05, "loss": 0.0002, "step": 40644 }, { "epoch": 37.92, "learning_rate": 4.526026119402985e-05, "loss": 0.0, "step": 40648 }, { "epoch": 37.92, "learning_rate": 4.525979477611941e-05, "loss": 0.0011, "step": 40652 }, { "epoch": 37.93, "learning_rate": 4.525932835820896e-05, "loss": 0.0, "step": 40656 }, { "epoch": 37.93, "learning_rate": 4.5258861940298506e-05, "loss": 0.0002, "step": 40660 }, { "epoch": 37.93, "learning_rate": 4.525839552238806e-05, "loss": 0.0008, "step": 40664 }, { "epoch": 37.94, "learning_rate": 4.5257929104477616e-05, "loss": 0.0001, "step": 40668 }, { "epoch": 37.94, "learning_rate": 4.5257462686567164e-05, "loss": 0.0021, "step": 40672 }, { "epoch": 37.94, "learning_rate": 4.525699626865672e-05, "loss": 0.0029, "step": 40676 }, { "epoch": 37.95, "learning_rate": 4.5256529850746274e-05, "loss": 0.0002, "step": 40680 }, { "epoch": 37.95, "learning_rate": 4.525606343283582e-05, "loss": 0.0024, "step": 40684 }, { "epoch": 37.96, "learning_rate": 4.525559701492538e-05, "loss": 0.0222, "step": 40688 }, { "epoch": 37.96, "learning_rate": 4.5255130597014925e-05, "loss": 0.0001, "step": 40692 }, { "epoch": 37.96, "learning_rate": 4.525466417910448e-05, "loss": 0.0001, "step": 40696 }, { "epoch": 37.97, "learning_rate": 4.5254197761194035e-05, "loss": 0.0037, "step": 40700 }, { "epoch": 37.97, "learning_rate": 4.525373134328358e-05, "loss": 0.0008, "step": 40704 }, { "epoch": 37.97, "learning_rate": 4.525326492537313e-05, "loss": 0.0002, "step": 40708 }, { "epoch": 37.98, "learning_rate": 4.525279850746269e-05, "loss": 0.0007, "step": 40712 }, { "epoch": 37.98, "learning_rate": 4.525233208955224e-05, "loss": 0.0, "step": 40716 }, { "epoch": 37.98, "learning_rate": 4.525186567164179e-05, "loss": 0.0, "step": 40720 }, { "epoch": 37.99, "learning_rate": 4.5251399253731344e-05, "loss": 0.0, "step": 40724 }, { "epoch": 37.99, "learning_rate": 4.52509328358209e-05, "loss": 0.0001, "step": 40728 }, { "epoch": 38.0, "learning_rate": 4.525046641791045e-05, "loss": 0.001, "step": 40732 }, { "epoch": 38.0, "learning_rate": 4.525e-05, "loss": 0.0001, "step": 40736 }, { "epoch": 38.0, "learning_rate": 4.524953358208956e-05, "loss": 0.0, "step": 40740 }, { "epoch": 38.01, "learning_rate": 4.5249067164179105e-05, "loss": 0.0, "step": 40744 }, { "epoch": 38.01, "learning_rate": 4.524860074626866e-05, "loss": 0.0, "step": 40748 }, { "epoch": 38.01, "learning_rate": 4.524813432835821e-05, "loss": 0.0, "step": 40752 }, { "epoch": 38.02, "learning_rate": 4.524766791044776e-05, "loss": 0.0001, "step": 40756 }, { "epoch": 38.02, "learning_rate": 4.524720149253732e-05, "loss": 0.0, "step": 40760 }, { "epoch": 38.03, "learning_rate": 4.5246735074626866e-05, "loss": 0.0001, "step": 40764 }, { "epoch": 38.03, "learning_rate": 4.5246268656716414e-05, "loss": 0.0001, "step": 40768 }, { "epoch": 38.03, "learning_rate": 4.5245802238805976e-05, "loss": 0.0001, "step": 40772 }, { "epoch": 38.04, "learning_rate": 4.5245335820895524e-05, "loss": 0.0012, "step": 40776 }, { "epoch": 38.04, "learning_rate": 4.524486940298508e-05, "loss": 0.0003, "step": 40780 }, { "epoch": 38.04, "learning_rate": 4.524440298507463e-05, "loss": 0.0003, "step": 40784 }, { "epoch": 38.05, "learning_rate": 4.524393656716418e-05, "loss": 0.0001, "step": 40788 }, { "epoch": 38.05, "learning_rate": 4.524347014925374e-05, "loss": 0.0014, "step": 40792 }, { "epoch": 38.06, "learning_rate": 4.5243003731343285e-05, "loss": 0.0007, "step": 40796 }, { "epoch": 38.06, "learning_rate": 4.524253731343284e-05, "loss": 0.0045, "step": 40800 }, { "epoch": 38.06, "learning_rate": 4.5242070895522395e-05, "loss": 0.0002, "step": 40804 }, { "epoch": 38.07, "learning_rate": 4.524160447761194e-05, "loss": 0.0011, "step": 40808 }, { "epoch": 38.07, "learning_rate": 4.524113805970149e-05, "loss": 0.0001, "step": 40812 }, { "epoch": 38.07, "learning_rate": 4.5240671641791046e-05, "loss": 0.0, "step": 40816 }, { "epoch": 38.08, "learning_rate": 4.52402052238806e-05, "loss": 0.0006, "step": 40820 }, { "epoch": 38.08, "learning_rate": 4.523973880597015e-05, "loss": 0.001, "step": 40824 }, { "epoch": 38.09, "learning_rate": 4.5239272388059704e-05, "loss": 0.0, "step": 40828 }, { "epoch": 38.09, "learning_rate": 4.523880597014926e-05, "loss": 0.0, "step": 40832 }, { "epoch": 38.09, "learning_rate": 4.523833955223881e-05, "loss": 0.0007, "step": 40836 }, { "epoch": 38.1, "learning_rate": 4.523787313432836e-05, "loss": 0.0, "step": 40840 }, { "epoch": 38.1, "learning_rate": 4.523740671641791e-05, "loss": 0.0003, "step": 40844 }, { "epoch": 38.1, "learning_rate": 4.5236940298507465e-05, "loss": 0.0009, "step": 40848 }, { "epoch": 38.11, "learning_rate": 4.523647388059702e-05, "loss": 0.0041, "step": 40852 }, { "epoch": 38.11, "learning_rate": 4.523600746268657e-05, "loss": 0.0011, "step": 40856 }, { "epoch": 38.12, "learning_rate": 4.523554104477612e-05, "loss": 0.0, "step": 40860 }, { "epoch": 38.12, "learning_rate": 4.523507462686568e-05, "loss": 0.0002, "step": 40864 }, { "epoch": 38.12, "learning_rate": 4.5234608208955226e-05, "loss": 0.0001, "step": 40868 }, { "epoch": 38.13, "learning_rate": 4.5234141791044774e-05, "loss": 0.0, "step": 40872 }, { "epoch": 38.13, "learning_rate": 4.523367537313433e-05, "loss": 0.0001, "step": 40876 }, { "epoch": 38.13, "learning_rate": 4.5233208955223884e-05, "loss": 0.0, "step": 40880 }, { "epoch": 38.14, "learning_rate": 4.523274253731343e-05, "loss": 0.0003, "step": 40884 }, { "epoch": 38.14, "learning_rate": 4.523227611940299e-05, "loss": 0.0001, "step": 40888 }, { "epoch": 38.15, "learning_rate": 4.523180970149254e-05, "loss": 0.0015, "step": 40892 }, { "epoch": 38.15, "learning_rate": 4.523134328358209e-05, "loss": 0.0003, "step": 40896 }, { "epoch": 38.15, "learning_rate": 4.5230876865671645e-05, "loss": 0.001, "step": 40900 }, { "epoch": 38.16, "learning_rate": 4.523041044776119e-05, "loss": 0.0016, "step": 40904 }, { "epoch": 38.16, "learning_rate": 4.522994402985075e-05, "loss": 0.0023, "step": 40908 }, { "epoch": 38.16, "learning_rate": 4.52294776119403e-05, "loss": 0.0, "step": 40912 }, { "epoch": 38.17, "learning_rate": 4.522901119402985e-05, "loss": 0.0004, "step": 40916 }, { "epoch": 38.17, "learning_rate": 4.5228544776119406e-05, "loss": 0.0001, "step": 40920 }, { "epoch": 38.18, "learning_rate": 4.522807835820896e-05, "loss": 0.0, "step": 40924 }, { "epoch": 38.18, "learning_rate": 4.522761194029851e-05, "loss": 0.0017, "step": 40928 }, { "epoch": 38.18, "learning_rate": 4.522714552238806e-05, "loss": 0.0002, "step": 40932 }, { "epoch": 38.19, "learning_rate": 4.522667910447761e-05, "loss": 0.0008, "step": 40936 }, { "epoch": 38.19, "learning_rate": 4.522621268656717e-05, "loss": 0.0008, "step": 40940 }, { "epoch": 38.19, "learning_rate": 4.522574626865672e-05, "loss": 0.0, "step": 40944 }, { "epoch": 38.2, "learning_rate": 4.522527985074627e-05, "loss": 0.0019, "step": 40948 }, { "epoch": 38.2, "learning_rate": 4.5224813432835825e-05, "loss": 0.0011, "step": 40952 }, { "epoch": 38.21, "learning_rate": 4.522434701492538e-05, "loss": 0.0005, "step": 40956 }, { "epoch": 38.21, "learning_rate": 4.522388059701493e-05, "loss": 0.0, "step": 40960 }, { "epoch": 38.21, "learning_rate": 4.5223414179104476e-05, "loss": 0.0003, "step": 40964 }, { "epoch": 38.22, "learning_rate": 4.522294776119404e-05, "loss": 0.0, "step": 40968 }, { "epoch": 38.22, "learning_rate": 4.5222481343283586e-05, "loss": 0.0027, "step": 40972 }, { "epoch": 38.22, "learning_rate": 4.5222014925373134e-05, "loss": 0.0, "step": 40976 }, { "epoch": 38.23, "learning_rate": 4.522154850746269e-05, "loss": 0.0003, "step": 40980 }, { "epoch": 38.23, "learning_rate": 4.5221082089552244e-05, "loss": 0.0002, "step": 40984 }, { "epoch": 38.24, "learning_rate": 4.522061567164179e-05, "loss": 0.0001, "step": 40988 }, { "epoch": 38.24, "learning_rate": 4.522014925373135e-05, "loss": 0.0, "step": 40992 }, { "epoch": 38.24, "learning_rate": 4.5219682835820895e-05, "loss": 0.0001, "step": 40996 }, { "epoch": 38.25, "learning_rate": 4.521921641791045e-05, "loss": 0.0, "step": 41000 }, { "epoch": 38.25, "eval_exact_match": 0.7359767891682786, "eval_exec": 0.7649903288201161, "eval_loss": 0.43792104721069336, "eval_runtime": 1152.442, "eval_samples_per_second": 0.897, "step": 41000 }, { "epoch": 38.25, "learning_rate": 4.5218750000000005e-05, "loss": 0.0001, "step": 41004 }, { "epoch": 38.25, "learning_rate": 4.521828358208955e-05, "loss": 0.0003, "step": 41008 }, { "epoch": 38.26, "learning_rate": 4.521781716417911e-05, "loss": 0.0, "step": 41012 }, { "epoch": 38.26, "learning_rate": 4.521735074626866e-05, "loss": 0.0003, "step": 41016 }, { "epoch": 38.26, "learning_rate": 4.521688432835821e-05, "loss": 0.0002, "step": 41020 }, { "epoch": 38.27, "learning_rate": 4.521641791044776e-05, "loss": 0.0002, "step": 41024 }, { "epoch": 38.27, "learning_rate": 4.521595149253732e-05, "loss": 0.0, "step": 41028 }, { "epoch": 38.28, "learning_rate": 4.521548507462687e-05, "loss": 0.0004, "step": 41032 }, { "epoch": 38.28, "learning_rate": 4.521501865671642e-05, "loss": 0.0, "step": 41036 }, { "epoch": 38.28, "learning_rate": 4.521455223880597e-05, "loss": 0.0008, "step": 41040 }, { "epoch": 38.29, "learning_rate": 4.521408582089553e-05, "loss": 0.0001, "step": 41044 }, { "epoch": 38.29, "learning_rate": 4.5213619402985075e-05, "loss": 0.0, "step": 41048 }, { "epoch": 38.29, "learning_rate": 4.521315298507463e-05, "loss": 0.0, "step": 41052 }, { "epoch": 38.3, "learning_rate": 4.521268656716418e-05, "loss": 0.0, "step": 41056 }, { "epoch": 38.3, "learning_rate": 4.521222014925373e-05, "loss": 0.0003, "step": 41060 }, { "epoch": 38.31, "learning_rate": 4.521175373134329e-05, "loss": 0.0001, "step": 41064 }, { "epoch": 38.31, "learning_rate": 4.5211287313432836e-05, "loss": 0.0003, "step": 41068 }, { "epoch": 38.31, "learning_rate": 4.521082089552239e-05, "loss": 0.0001, "step": 41072 }, { "epoch": 38.32, "learning_rate": 4.5210354477611946e-05, "loss": 0.0004, "step": 41076 }, { "epoch": 38.32, "learning_rate": 4.5209888059701494e-05, "loss": 0.0073, "step": 41080 }, { "epoch": 38.32, "learning_rate": 4.520942164179104e-05, "loss": 0.0, "step": 41084 }, { "epoch": 38.33, "learning_rate": 4.5208955223880604e-05, "loss": 0.0018, "step": 41088 }, { "epoch": 38.33, "learning_rate": 4.520848880597015e-05, "loss": 0.0053, "step": 41092 }, { "epoch": 38.34, "learning_rate": 4.52080223880597e-05, "loss": 0.0001, "step": 41096 }, { "epoch": 38.34, "learning_rate": 4.5207555970149255e-05, "loss": 0.0004, "step": 41100 }, { "epoch": 38.34, "learning_rate": 4.520708955223881e-05, "loss": 0.0002, "step": 41104 }, { "epoch": 38.35, "learning_rate": 4.5206623134328365e-05, "loss": 0.0, "step": 41108 }, { "epoch": 38.35, "learning_rate": 4.520615671641791e-05, "loss": 0.0, "step": 41112 }, { "epoch": 38.35, "learning_rate": 4.520569029850746e-05, "loss": 0.0016, "step": 41116 }, { "epoch": 38.36, "learning_rate": 4.520522388059702e-05, "loss": 0.0, "step": 41120 }, { "epoch": 38.36, "learning_rate": 4.520475746268657e-05, "loss": 0.0, "step": 41124 }, { "epoch": 38.37, "learning_rate": 4.520429104477612e-05, "loss": 0.0001, "step": 41128 }, { "epoch": 38.37, "learning_rate": 4.5203824626865674e-05, "loss": 0.0, "step": 41132 }, { "epoch": 38.37, "learning_rate": 4.520335820895523e-05, "loss": 0.0, "step": 41136 }, { "epoch": 38.38, "learning_rate": 4.520289179104478e-05, "loss": 0.0, "step": 41140 }, { "epoch": 38.38, "learning_rate": 4.520242537313433e-05, "loss": 0.0, "step": 41144 }, { "epoch": 38.38, "learning_rate": 4.520195895522388e-05, "loss": 0.0002, "step": 41148 }, { "epoch": 38.39, "learning_rate": 4.5201492537313435e-05, "loss": 0.0001, "step": 41152 }, { "epoch": 38.39, "learning_rate": 4.520102611940299e-05, "loss": 0.0018, "step": 41156 }, { "epoch": 38.4, "learning_rate": 4.520055970149254e-05, "loss": 0.0, "step": 41160 }, { "epoch": 38.4, "learning_rate": 4.520009328358209e-05, "loss": 0.0, "step": 41164 }, { "epoch": 38.4, "learning_rate": 4.519962686567165e-05, "loss": 0.0032, "step": 41168 }, { "epoch": 38.41, "learning_rate": 4.5199160447761196e-05, "loss": 0.0019, "step": 41172 }, { "epoch": 38.41, "learning_rate": 4.5198694029850744e-05, "loss": 0.0002, "step": 41176 }, { "epoch": 38.41, "learning_rate": 4.5198227611940306e-05, "loss": 0.0, "step": 41180 }, { "epoch": 38.42, "learning_rate": 4.5197761194029854e-05, "loss": 0.0001, "step": 41184 }, { "epoch": 38.42, "learning_rate": 4.51972947761194e-05, "loss": 0.0002, "step": 41188 }, { "epoch": 38.43, "learning_rate": 4.519682835820896e-05, "loss": 0.0034, "step": 41192 }, { "epoch": 38.43, "learning_rate": 4.519636194029851e-05, "loss": 0.0011, "step": 41196 }, { "epoch": 38.43, "learning_rate": 4.519589552238806e-05, "loss": 0.0003, "step": 41200 }, { "epoch": 38.44, "learning_rate": 4.5195429104477615e-05, "loss": 0.0002, "step": 41204 }, { "epoch": 38.44, "learning_rate": 4.519496268656716e-05, "loss": 0.0008, "step": 41208 }, { "epoch": 38.44, "learning_rate": 4.519449626865672e-05, "loss": 0.0, "step": 41212 }, { "epoch": 38.45, "learning_rate": 4.519402985074627e-05, "loss": 0.0001, "step": 41216 }, { "epoch": 38.45, "learning_rate": 4.519356343283582e-05, "loss": 0.0002, "step": 41220 }, { "epoch": 38.46, "learning_rate": 4.5193097014925376e-05, "loss": 0.0, "step": 41224 }, { "epoch": 38.46, "learning_rate": 4.519263059701493e-05, "loss": 0.0001, "step": 41228 }, { "epoch": 38.46, "learning_rate": 4.519216417910448e-05, "loss": 0.0005, "step": 41232 }, { "epoch": 38.47, "learning_rate": 4.519169776119403e-05, "loss": 0.0, "step": 41236 }, { "epoch": 38.47, "learning_rate": 4.519123134328359e-05, "loss": 0.0, "step": 41240 }, { "epoch": 38.47, "learning_rate": 4.5190764925373137e-05, "loss": 0.0, "step": 41244 }, { "epoch": 38.48, "learning_rate": 4.5190298507462685e-05, "loss": 0.0001, "step": 41248 }, { "epoch": 38.48, "learning_rate": 4.518983208955224e-05, "loss": 0.0003, "step": 41252 }, { "epoch": 38.49, "learning_rate": 4.5189365671641795e-05, "loss": 0.0, "step": 41256 }, { "epoch": 38.49, "learning_rate": 4.518889925373134e-05, "loss": 0.0, "step": 41260 }, { "epoch": 38.49, "learning_rate": 4.51884328358209e-05, "loss": 0.0, "step": 41264 }, { "epoch": 38.5, "learning_rate": 4.5187966417910446e-05, "loss": 0.0, "step": 41268 }, { "epoch": 38.5, "learning_rate": 4.518750000000001e-05, "loss": 0.0001, "step": 41272 }, { "epoch": 38.5, "learning_rate": 4.5187033582089556e-05, "loss": 0.0044, "step": 41276 }, { "epoch": 38.51, "learning_rate": 4.5186567164179104e-05, "loss": 0.0001, "step": 41280 }, { "epoch": 38.51, "learning_rate": 4.518610074626866e-05, "loss": 0.0, "step": 41284 }, { "epoch": 38.51, "learning_rate": 4.5185634328358213e-05, "loss": 0.0, "step": 41288 }, { "epoch": 38.52, "learning_rate": 4.518516791044776e-05, "loss": 0.0022, "step": 41292 }, { "epoch": 38.52, "learning_rate": 4.5184701492537317e-05, "loss": 0.0013, "step": 41296 }, { "epoch": 38.53, "learning_rate": 4.518423507462687e-05, "loss": 0.0003, "step": 41300 }, { "epoch": 38.53, "learning_rate": 4.518376865671642e-05, "loss": 0.0, "step": 41304 }, { "epoch": 38.53, "learning_rate": 4.5183302238805974e-05, "loss": 0.0, "step": 41308 }, { "epoch": 38.54, "learning_rate": 4.518283582089552e-05, "loss": 0.0001, "step": 41312 }, { "epoch": 38.54, "learning_rate": 4.518236940298508e-05, "loss": 0.0003, "step": 41316 }, { "epoch": 38.54, "learning_rate": 4.518190298507463e-05, "loss": 0.0, "step": 41320 }, { "epoch": 38.55, "learning_rate": 4.518143656716418e-05, "loss": 0.0, "step": 41324 }, { "epoch": 38.55, "learning_rate": 4.518097014925373e-05, "loss": 0.0002, "step": 41328 }, { "epoch": 38.56, "learning_rate": 4.518050373134329e-05, "loss": 0.0066, "step": 41332 }, { "epoch": 38.56, "learning_rate": 4.518003731343284e-05, "loss": 0.0001, "step": 41336 }, { "epoch": 38.56, "learning_rate": 4.5179570895522387e-05, "loss": 0.0005, "step": 41340 }, { "epoch": 38.57, "learning_rate": 4.517910447761194e-05, "loss": 0.0003, "step": 41344 }, { "epoch": 38.57, "learning_rate": 4.5178638059701496e-05, "loss": 0.0009, "step": 41348 }, { "epoch": 38.57, "learning_rate": 4.5178171641791045e-05, "loss": 0.0, "step": 41352 }, { "epoch": 38.58, "learning_rate": 4.51777052238806e-05, "loss": 0.0, "step": 41356 }, { "epoch": 38.58, "learning_rate": 4.5177238805970154e-05, "loss": 0.0002, "step": 41360 }, { "epoch": 38.59, "learning_rate": 4.51767723880597e-05, "loss": 0.0, "step": 41364 }, { "epoch": 38.59, "learning_rate": 4.517630597014926e-05, "loss": 0.0001, "step": 41368 }, { "epoch": 38.59, "learning_rate": 4.5175839552238806e-05, "loss": 0.0, "step": 41372 }, { "epoch": 38.6, "learning_rate": 4.517537313432836e-05, "loss": 0.0, "step": 41376 }, { "epoch": 38.6, "learning_rate": 4.5174906716417915e-05, "loss": 0.0, "step": 41380 }, { "epoch": 38.6, "learning_rate": 4.5174440298507463e-05, "loss": 0.0012, "step": 41384 }, { "epoch": 38.61, "learning_rate": 4.517397388059701e-05, "loss": 0.0001, "step": 41388 }, { "epoch": 38.61, "learning_rate": 4.517350746268657e-05, "loss": 0.0, "step": 41392 }, { "epoch": 38.62, "learning_rate": 4.517304104477612e-05, "loss": 0.0001, "step": 41396 }, { "epoch": 38.62, "learning_rate": 4.517257462686567e-05, "loss": 0.0001, "step": 41400 }, { "epoch": 38.62, "learning_rate": 4.5172108208955224e-05, "loss": 0.0016, "step": 41404 }, { "epoch": 38.63, "learning_rate": 4.517164179104478e-05, "loss": 0.0024, "step": 41408 }, { "epoch": 38.63, "learning_rate": 4.517117537313433e-05, "loss": 0.0001, "step": 41412 }, { "epoch": 38.63, "learning_rate": 4.517070895522388e-05, "loss": 0.0, "step": 41416 }, { "epoch": 38.64, "learning_rate": 4.517024253731344e-05, "loss": 0.0019, "step": 41420 }, { "epoch": 38.64, "learning_rate": 4.5169776119402985e-05, "loss": 0.0, "step": 41424 }, { "epoch": 38.65, "learning_rate": 4.516930970149254e-05, "loss": 0.0032, "step": 41428 }, { "epoch": 38.65, "learning_rate": 4.516884328358209e-05, "loss": 0.0009, "step": 41432 }, { "epoch": 38.65, "learning_rate": 4.5168376865671643e-05, "loss": 0.0049, "step": 41436 }, { "epoch": 38.66, "learning_rate": 4.51679104477612e-05, "loss": 0.0, "step": 41440 }, { "epoch": 38.66, "learning_rate": 4.5167444029850746e-05, "loss": 0.0004, "step": 41444 }, { "epoch": 38.66, "learning_rate": 4.51669776119403e-05, "loss": 0.0015, "step": 41448 }, { "epoch": 38.67, "learning_rate": 4.5166511194029856e-05, "loss": 0.0, "step": 41452 }, { "epoch": 38.67, "learning_rate": 4.5166044776119404e-05, "loss": 0.0008, "step": 41456 }, { "epoch": 38.68, "learning_rate": 4.516557835820896e-05, "loss": 0.0, "step": 41460 }, { "epoch": 38.68, "learning_rate": 4.516511194029851e-05, "loss": 0.0, "step": 41464 }, { "epoch": 38.68, "learning_rate": 4.516464552238806e-05, "loss": 0.0, "step": 41468 }, { "epoch": 38.69, "learning_rate": 4.516417910447762e-05, "loss": 0.0008, "step": 41472 }, { "epoch": 38.69, "learning_rate": 4.5163712686567165e-05, "loss": 0.0001, "step": 41476 }, { "epoch": 38.69, "learning_rate": 4.516324626865672e-05, "loss": 0.0002, "step": 41480 }, { "epoch": 38.7, "learning_rate": 4.5162779850746275e-05, "loss": 0.0001, "step": 41484 }, { "epoch": 38.7, "learning_rate": 4.516231343283582e-05, "loss": 0.0001, "step": 41488 }, { "epoch": 38.71, "learning_rate": 4.516184701492537e-05, "loss": 0.0001, "step": 41492 }, { "epoch": 38.71, "learning_rate": 4.5161380597014926e-05, "loss": 0.0, "step": 41496 }, { "epoch": 38.71, "learning_rate": 4.516091417910448e-05, "loss": 0.0035, "step": 41500 }, { "epoch": 38.71, "eval_exact_match": 0.7263056092843327, "eval_exec": 0.7562862669245648, "eval_loss": 0.440681517124176, "eval_runtime": 1097.6911, "eval_samples_per_second": 0.942, "step": 41500 }, { "epoch": 38.72, "learning_rate": 4.516044776119403e-05, "loss": 0.0, "step": 41504 }, { "epoch": 38.72, "learning_rate": 4.5159981343283584e-05, "loss": 0.0001, "step": 41508 }, { "epoch": 38.72, "learning_rate": 4.515951492537314e-05, "loss": 0.0014, "step": 41512 }, { "epoch": 38.73, "learning_rate": 4.515904850746269e-05, "loss": 0.0014, "step": 41516 }, { "epoch": 38.73, "learning_rate": 4.515858208955224e-05, "loss": 0.0, "step": 41520 }, { "epoch": 38.73, "learning_rate": 4.515811567164179e-05, "loss": 0.0002, "step": 41524 }, { "epoch": 38.74, "learning_rate": 4.5157649253731345e-05, "loss": 0.002, "step": 41528 }, { "epoch": 38.74, "learning_rate": 4.51571828358209e-05, "loss": 0.0015, "step": 41532 }, { "epoch": 38.75, "learning_rate": 4.515671641791045e-05, "loss": 0.0, "step": 41536 }, { "epoch": 38.75, "learning_rate": 4.515625e-05, "loss": 0.0, "step": 41540 }, { "epoch": 38.75, "learning_rate": 4.515578358208956e-05, "loss": 0.0013, "step": 41544 }, { "epoch": 38.76, "learning_rate": 4.5155317164179106e-05, "loss": 0.0, "step": 41548 }, { "epoch": 38.76, "learning_rate": 4.5154850746268654e-05, "loss": 0.0001, "step": 41552 }, { "epoch": 38.76, "learning_rate": 4.515438432835821e-05, "loss": 0.0022, "step": 41556 }, { "epoch": 38.77, "learning_rate": 4.5153917910447764e-05, "loss": 0.0011, "step": 41560 }, { "epoch": 38.77, "learning_rate": 4.515345149253731e-05, "loss": 0.0001, "step": 41564 }, { "epoch": 38.78, "learning_rate": 4.515298507462687e-05, "loss": 0.0001, "step": 41568 }, { "epoch": 38.78, "learning_rate": 4.515251865671642e-05, "loss": 0.0004, "step": 41572 }, { "epoch": 38.78, "learning_rate": 4.515205223880597e-05, "loss": 0.0, "step": 41576 }, { "epoch": 38.79, "learning_rate": 4.5151585820895525e-05, "loss": 0.0, "step": 41580 }, { "epoch": 38.79, "learning_rate": 4.515111940298507e-05, "loss": 0.0006, "step": 41584 }, { "epoch": 38.79, "learning_rate": 4.515065298507463e-05, "loss": 0.0001, "step": 41588 }, { "epoch": 38.8, "learning_rate": 4.515018656716418e-05, "loss": 0.0016, "step": 41592 }, { "epoch": 38.8, "learning_rate": 4.514972014925373e-05, "loss": 0.0007, "step": 41596 }, { "epoch": 38.81, "learning_rate": 4.5149253731343286e-05, "loss": 0.0, "step": 41600 }, { "epoch": 38.81, "learning_rate": 4.514878731343284e-05, "loss": 0.0, "step": 41604 }, { "epoch": 38.81, "learning_rate": 4.514832089552239e-05, "loss": 0.0014, "step": 41608 }, { "epoch": 38.82, "learning_rate": 4.5147854477611944e-05, "loss": 0.0007, "step": 41612 }, { "epoch": 38.82, "learning_rate": 4.514738805970149e-05, "loss": 0.0, "step": 41616 }, { "epoch": 38.82, "learning_rate": 4.514692164179105e-05, "loss": 0.0002, "step": 41620 }, { "epoch": 38.83, "learning_rate": 4.51464552238806e-05, "loss": 0.0001, "step": 41624 }, { "epoch": 38.83, "learning_rate": 4.514598880597015e-05, "loss": 0.0015, "step": 41628 }, { "epoch": 38.84, "learning_rate": 4.5145522388059705e-05, "loss": 0.0, "step": 41632 }, { "epoch": 38.84, "learning_rate": 4.514505597014926e-05, "loss": 0.0023, "step": 41636 }, { "epoch": 38.84, "learning_rate": 4.514458955223881e-05, "loss": 0.0008, "step": 41640 }, { "epoch": 38.85, "learning_rate": 4.5144123134328356e-05, "loss": 0.0001, "step": 41644 }, { "epoch": 38.85, "learning_rate": 4.514365671641792e-05, "loss": 0.0004, "step": 41648 }, { "epoch": 38.85, "learning_rate": 4.5143190298507466e-05, "loss": 0.0014, "step": 41652 }, { "epoch": 38.86, "learning_rate": 4.5142723880597014e-05, "loss": 0.0, "step": 41656 }, { "epoch": 38.86, "learning_rate": 4.514225746268657e-05, "loss": 0.0041, "step": 41660 }, { "epoch": 38.87, "learning_rate": 4.5141791044776124e-05, "loss": 0.0014, "step": 41664 }, { "epoch": 38.87, "learning_rate": 4.514132462686567e-05, "loss": 0.0019, "step": 41668 }, { "epoch": 38.87, "learning_rate": 4.514085820895523e-05, "loss": 0.0, "step": 41672 }, { "epoch": 38.88, "learning_rate": 4.5140391791044775e-05, "loss": 0.0001, "step": 41676 }, { "epoch": 38.88, "learning_rate": 4.513992537313433e-05, "loss": 0.0018, "step": 41680 }, { "epoch": 38.88, "learning_rate": 4.5139458955223885e-05, "loss": 0.0001, "step": 41684 }, { "epoch": 38.89, "learning_rate": 4.513899253731343e-05, "loss": 0.0001, "step": 41688 }, { "epoch": 38.89, "learning_rate": 4.513852611940299e-05, "loss": 0.0001, "step": 41692 }, { "epoch": 38.9, "learning_rate": 4.513805970149254e-05, "loss": 0.0003, "step": 41696 }, { "epoch": 38.9, "learning_rate": 4.513759328358209e-05, "loss": 0.0003, "step": 41700 }, { "epoch": 38.9, "learning_rate": 4.513712686567164e-05, "loss": 0.0004, "step": 41704 }, { "epoch": 38.91, "learning_rate": 4.51366604477612e-05, "loss": 0.0001, "step": 41708 }, { "epoch": 38.91, "learning_rate": 4.513619402985075e-05, "loss": 0.0031, "step": 41712 }, { "epoch": 38.91, "learning_rate": 4.51357276119403e-05, "loss": 0.0006, "step": 41716 }, { "epoch": 38.92, "learning_rate": 4.513526119402985e-05, "loss": 0.0009, "step": 41720 }, { "epoch": 38.92, "learning_rate": 4.513479477611941e-05, "loss": 0.0001, "step": 41724 }, { "epoch": 38.93, "learning_rate": 4.5134328358208955e-05, "loss": 0.0, "step": 41728 }, { "epoch": 38.93, "learning_rate": 4.513386194029851e-05, "loss": 0.0048, "step": 41732 }, { "epoch": 38.93, "learning_rate": 4.513339552238806e-05, "loss": 0.0001, "step": 41736 }, { "epoch": 38.94, "learning_rate": 4.513292910447761e-05, "loss": 0.0022, "step": 41740 }, { "epoch": 38.94, "learning_rate": 4.513246268656717e-05, "loss": 0.0, "step": 41744 }, { "epoch": 38.94, "learning_rate": 4.5131996268656716e-05, "loss": 0.0003, "step": 41748 }, { "epoch": 38.95, "learning_rate": 4.513152985074627e-05, "loss": 0.0146, "step": 41752 }, { "epoch": 38.95, "learning_rate": 4.5131063432835826e-05, "loss": 0.0001, "step": 41756 }, { "epoch": 38.96, "learning_rate": 4.5130597014925374e-05, "loss": 0.0, "step": 41760 }, { "epoch": 38.96, "learning_rate": 4.513013059701493e-05, "loss": 0.0001, "step": 41764 }, { "epoch": 38.96, "learning_rate": 4.5129664179104484e-05, "loss": 0.0, "step": 41768 }, { "epoch": 38.97, "learning_rate": 4.512919776119403e-05, "loss": 0.0001, "step": 41772 }, { "epoch": 38.97, "learning_rate": 4.512873134328359e-05, "loss": 0.0052, "step": 41776 }, { "epoch": 38.97, "learning_rate": 4.5128264925373135e-05, "loss": 0.0008, "step": 41780 }, { "epoch": 38.98, "learning_rate": 4.512779850746269e-05, "loss": 0.0001, "step": 41784 }, { "epoch": 38.98, "learning_rate": 4.5127332089552245e-05, "loss": 0.0001, "step": 41788 }, { "epoch": 38.98, "learning_rate": 4.512686567164179e-05, "loss": 0.0, "step": 41792 }, { "epoch": 38.99, "learning_rate": 4.512639925373134e-05, "loss": 0.0007, "step": 41796 }, { "epoch": 38.99, "learning_rate": 4.51259328358209e-05, "loss": 0.0001, "step": 41800 }, { "epoch": 39.0, "learning_rate": 4.512546641791045e-05, "loss": 0.0001, "step": 41804 }, { "epoch": 39.0, "learning_rate": 4.5125e-05, "loss": 0.0, "step": 41808 }, { "epoch": 39.0, "learning_rate": 4.5124533582089554e-05, "loss": 0.0, "step": 41812 }, { "epoch": 39.01, "learning_rate": 4.512406716417911e-05, "loss": 0.0001, "step": 41816 }, { "epoch": 39.01, "learning_rate": 4.512360074626866e-05, "loss": 0.0003, "step": 41820 }, { "epoch": 39.01, "learning_rate": 4.512313432835821e-05, "loss": 0.0001, "step": 41824 }, { "epoch": 39.02, "learning_rate": 4.512266791044776e-05, "loss": 0.0002, "step": 41828 }, { "epoch": 39.02, "learning_rate": 4.5122201492537315e-05, "loss": 0.0001, "step": 41832 }, { "epoch": 39.03, "learning_rate": 4.512173507462687e-05, "loss": 0.0, "step": 41836 }, { "epoch": 39.03, "learning_rate": 4.512126865671642e-05, "loss": 0.0009, "step": 41840 }, { "epoch": 39.03, "learning_rate": 4.512080223880597e-05, "loss": 0.0002, "step": 41844 }, { "epoch": 39.04, "learning_rate": 4.512033582089553e-05, "loss": 0.0073, "step": 41848 }, { "epoch": 39.04, "learning_rate": 4.5119869402985076e-05, "loss": 0.0, "step": 41852 }, { "epoch": 39.04, "learning_rate": 4.5119402985074624e-05, "loss": 0.0, "step": 41856 }, { "epoch": 39.05, "learning_rate": 4.5118936567164186e-05, "loss": 0.0001, "step": 41860 }, { "epoch": 39.05, "learning_rate": 4.5118470149253734e-05, "loss": 0.0, "step": 41864 }, { "epoch": 39.06, "learning_rate": 4.511800373134328e-05, "loss": 0.0001, "step": 41868 }, { "epoch": 39.06, "learning_rate": 4.511753731343284e-05, "loss": 0.0, "step": 41872 }, { "epoch": 39.06, "learning_rate": 4.511707089552239e-05, "loss": 0.0, "step": 41876 }, { "epoch": 39.07, "learning_rate": 4.511660447761194e-05, "loss": 0.0087, "step": 41880 }, { "epoch": 39.07, "learning_rate": 4.5116138059701495e-05, "loss": 0.0001, "step": 41884 }, { "epoch": 39.07, "learning_rate": 4.511567164179104e-05, "loss": 0.0001, "step": 41888 }, { "epoch": 39.08, "learning_rate": 4.51152052238806e-05, "loss": 0.0, "step": 41892 }, { "epoch": 39.08, "learning_rate": 4.511473880597015e-05, "loss": 0.0004, "step": 41896 }, { "epoch": 39.09, "learning_rate": 4.51142723880597e-05, "loss": 0.0007, "step": 41900 }, { "epoch": 39.09, "learning_rate": 4.5113805970149256e-05, "loss": 0.0012, "step": 41904 }, { "epoch": 39.09, "learning_rate": 4.511333955223881e-05, "loss": 0.0003, "step": 41908 }, { "epoch": 39.1, "learning_rate": 4.511287313432836e-05, "loss": 0.0005, "step": 41912 }, { "epoch": 39.1, "learning_rate": 4.5112406716417914e-05, "loss": 0.0007, "step": 41916 }, { "epoch": 39.1, "learning_rate": 4.511194029850747e-05, "loss": 0.0, "step": 41920 }, { "epoch": 39.11, "learning_rate": 4.511147388059702e-05, "loss": 0.0, "step": 41924 }, { "epoch": 39.11, "learning_rate": 4.511100746268657e-05, "loss": 0.0003, "step": 41928 }, { "epoch": 39.12, "learning_rate": 4.511054104477612e-05, "loss": 0.0015, "step": 41932 }, { "epoch": 39.12, "learning_rate": 4.5110074626865675e-05, "loss": 0.0, "step": 41936 }, { "epoch": 39.12, "learning_rate": 4.510960820895523e-05, "loss": 0.0, "step": 41940 }, { "epoch": 39.13, "learning_rate": 4.510914179104478e-05, "loss": 0.0004, "step": 41944 }, { "epoch": 39.13, "learning_rate": 4.5108675373134326e-05, "loss": 0.0, "step": 41948 }, { "epoch": 39.13, "learning_rate": 4.510820895522389e-05, "loss": 0.0001, "step": 41952 }, { "epoch": 39.14, "learning_rate": 4.5107742537313436e-05, "loss": 0.0001, "step": 41956 }, { "epoch": 39.14, "learning_rate": 4.5107276119402984e-05, "loss": 0.0001, "step": 41960 }, { "epoch": 39.15, "learning_rate": 4.510680970149254e-05, "loss": 0.0001, "step": 41964 }, { "epoch": 39.15, "learning_rate": 4.5106343283582094e-05, "loss": 0.005, "step": 41968 }, { "epoch": 39.15, "learning_rate": 4.510587686567164e-05, "loss": 0.0011, "step": 41972 }, { "epoch": 39.16, "learning_rate": 4.51054104477612e-05, "loss": 0.0019, "step": 41976 }, { "epoch": 39.16, "learning_rate": 4.510494402985075e-05, "loss": 0.0013, "step": 41980 }, { "epoch": 39.16, "learning_rate": 4.51044776119403e-05, "loss": 0.0001, "step": 41984 }, { "epoch": 39.17, "learning_rate": 4.5104011194029855e-05, "loss": 0.0, "step": 41988 }, { "epoch": 39.17, "learning_rate": 4.51035447761194e-05, "loss": 0.0001, "step": 41992 }, { "epoch": 39.18, "learning_rate": 4.510307835820896e-05, "loss": 0.0001, "step": 41996 }, { "epoch": 39.18, "learning_rate": 4.510261194029851e-05, "loss": 0.0001, "step": 42000 }, { "epoch": 39.18, "eval_exact_match": 0.7350096711798839, "eval_exec": 0.7630560928433269, "eval_loss": 0.40719056129455566, "eval_runtime": 1122.0735, "eval_samples_per_second": 0.922, "step": 42000 }, { "epoch": 39.18, "learning_rate": 4.510214552238806e-05, "loss": 0.0, "step": 42004 }, { "epoch": 39.19, "learning_rate": 4.510167910447761e-05, "loss": 0.0002, "step": 42008 }, { "epoch": 39.19, "learning_rate": 4.510121268656717e-05, "loss": 0.0005, "step": 42012 }, { "epoch": 39.19, "learning_rate": 4.510074626865672e-05, "loss": 0.0002, "step": 42016 }, { "epoch": 39.2, "learning_rate": 4.510027985074627e-05, "loss": 0.0, "step": 42020 }, { "epoch": 39.2, "learning_rate": 4.509981343283582e-05, "loss": 0.0, "step": 42024 }, { "epoch": 39.21, "learning_rate": 4.509934701492538e-05, "loss": 0.0, "step": 42028 }, { "epoch": 39.21, "learning_rate": 4.5098880597014925e-05, "loss": 0.0, "step": 42032 }, { "epoch": 39.21, "learning_rate": 4.509841417910448e-05, "loss": 0.0064, "step": 42036 }, { "epoch": 39.22, "learning_rate": 4.5097947761194035e-05, "loss": 0.0003, "step": 42040 }, { "epoch": 39.22, "learning_rate": 4.509748134328358e-05, "loss": 0.0094, "step": 42044 }, { "epoch": 39.22, "learning_rate": 4.509701492537314e-05, "loss": 0.0001, "step": 42048 }, { "epoch": 39.23, "learning_rate": 4.5096548507462686e-05, "loss": 0.0002, "step": 42052 }, { "epoch": 39.23, "learning_rate": 4.509608208955224e-05, "loss": 0.0006, "step": 42056 }, { "epoch": 39.24, "learning_rate": 4.5095615671641796e-05, "loss": 0.0011, "step": 42060 }, { "epoch": 39.24, "learning_rate": 4.5095149253731344e-05, "loss": 0.0003, "step": 42064 }, { "epoch": 39.24, "learning_rate": 4.509468283582089e-05, "loss": 0.0008, "step": 42068 }, { "epoch": 39.25, "learning_rate": 4.5094216417910454e-05, "loss": 0.0, "step": 42072 }, { "epoch": 39.25, "learning_rate": 4.509375e-05, "loss": 0.0001, "step": 42076 }, { "epoch": 39.25, "learning_rate": 4.509328358208956e-05, "loss": 0.0001, "step": 42080 }, { "epoch": 39.26, "learning_rate": 4.5092817164179105e-05, "loss": 0.0, "step": 42084 }, { "epoch": 39.26, "learning_rate": 4.509235074626866e-05, "loss": 0.0, "step": 42088 }, { "epoch": 39.26, "learning_rate": 4.5091884328358215e-05, "loss": 0.0019, "step": 42092 }, { "epoch": 39.27, "learning_rate": 4.509141791044776e-05, "loss": 0.0, "step": 42096 }, { "epoch": 39.27, "learning_rate": 4.509095149253732e-05, "loss": 0.0014, "step": 42100 }, { "epoch": 39.28, "learning_rate": 4.509048507462687e-05, "loss": 0.0001, "step": 42104 }, { "epoch": 39.28, "learning_rate": 4.509001865671642e-05, "loss": 0.0002, "step": 42108 }, { "epoch": 39.28, "learning_rate": 4.508955223880597e-05, "loss": 0.0001, "step": 42112 }, { "epoch": 39.29, "learning_rate": 4.5089085820895524e-05, "loss": 0.0046, "step": 42116 }, { "epoch": 39.29, "learning_rate": 4.508861940298508e-05, "loss": 0.0, "step": 42120 }, { "epoch": 39.29, "learning_rate": 4.508815298507463e-05, "loss": 0.0, "step": 42124 }, { "epoch": 39.3, "learning_rate": 4.508768656716418e-05, "loss": 0.0001, "step": 42128 }, { "epoch": 39.3, "learning_rate": 4.5087220149253737e-05, "loss": 0.0006, "step": 42132 }, { "epoch": 39.31, "learning_rate": 4.5086753731343285e-05, "loss": 0.0001, "step": 42136 }, { "epoch": 39.31, "learning_rate": 4.508628731343284e-05, "loss": 0.0, "step": 42140 }, { "epoch": 39.31, "learning_rate": 4.508582089552239e-05, "loss": 0.0008, "step": 42144 }, { "epoch": 39.32, "learning_rate": 4.508535447761194e-05, "loss": 0.0, "step": 42148 }, { "epoch": 39.32, "learning_rate": 4.50848880597015e-05, "loss": 0.0, "step": 42152 }, { "epoch": 39.32, "learning_rate": 4.5084421641791046e-05, "loss": 0.002, "step": 42156 }, { "epoch": 39.33, "learning_rate": 4.50839552238806e-05, "loss": 0.0001, "step": 42160 }, { "epoch": 39.33, "learning_rate": 4.5083488805970155e-05, "loss": 0.0001, "step": 42164 }, { "epoch": 39.34, "learning_rate": 4.5083022388059704e-05, "loss": 0.0012, "step": 42168 }, { "epoch": 39.34, "learning_rate": 4.508255597014925e-05, "loss": 0.0014, "step": 42172 }, { "epoch": 39.34, "learning_rate": 4.508208955223881e-05, "loss": 0.0, "step": 42176 }, { "epoch": 39.35, "learning_rate": 4.508162313432836e-05, "loss": 0.0024, "step": 42180 }, { "epoch": 39.35, "learning_rate": 4.508115671641791e-05, "loss": 0.0, "step": 42184 }, { "epoch": 39.35, "learning_rate": 4.5080690298507465e-05, "loss": 0.0, "step": 42188 }, { "epoch": 39.36, "learning_rate": 4.508022388059702e-05, "loss": 0.0, "step": 42192 }, { "epoch": 39.36, "learning_rate": 4.507975746268657e-05, "loss": 0.0001, "step": 42196 }, { "epoch": 39.37, "learning_rate": 4.507929104477612e-05, "loss": 0.0, "step": 42200 }, { "epoch": 39.37, "learning_rate": 4.507882462686567e-05, "loss": 0.0002, "step": 42204 }, { "epoch": 39.37, "learning_rate": 4.5078358208955226e-05, "loss": 0.0, "step": 42208 }, { "epoch": 39.38, "learning_rate": 4.507789179104478e-05, "loss": 0.0001, "step": 42212 }, { "epoch": 39.38, "learning_rate": 4.507742537313433e-05, "loss": 0.0, "step": 42216 }, { "epoch": 39.38, "learning_rate": 4.5076958955223884e-05, "loss": 0.0, "step": 42220 }, { "epoch": 39.39, "learning_rate": 4.507649253731344e-05, "loss": 0.0002, "step": 42224 }, { "epoch": 39.39, "learning_rate": 4.5076026119402987e-05, "loss": 0.0001, "step": 42228 }, { "epoch": 39.4, "learning_rate": 4.5075559701492535e-05, "loss": 0.0001, "step": 42232 }, { "epoch": 39.4, "learning_rate": 4.507509328358209e-05, "loss": 0.0, "step": 42236 }, { "epoch": 39.4, "learning_rate": 4.5074626865671645e-05, "loss": 0.0, "step": 42240 }, { "epoch": 39.41, "learning_rate": 4.50741604477612e-05, "loss": 0.0002, "step": 42244 }, { "epoch": 39.41, "learning_rate": 4.507369402985075e-05, "loss": 0.0, "step": 42248 }, { "epoch": 39.41, "learning_rate": 4.50732276119403e-05, "loss": 0.0, "step": 42252 }, { "epoch": 39.42, "learning_rate": 4.507276119402986e-05, "loss": 0.0021, "step": 42256 }, { "epoch": 39.42, "learning_rate": 4.5072294776119406e-05, "loss": 0.0, "step": 42260 }, { "epoch": 39.43, "learning_rate": 4.5071828358208954e-05, "loss": 0.0011, "step": 42264 }, { "epoch": 39.43, "learning_rate": 4.5071361940298515e-05, "loss": 0.0001, "step": 42268 }, { "epoch": 39.43, "learning_rate": 4.5070895522388063e-05, "loss": 0.0, "step": 42272 }, { "epoch": 39.44, "learning_rate": 4.507042910447761e-05, "loss": 0.0, "step": 42276 }, { "epoch": 39.44, "learning_rate": 4.5069962686567166e-05, "loss": 0.0004, "step": 42280 }, { "epoch": 39.44, "learning_rate": 4.506949626865672e-05, "loss": 0.003, "step": 42284 }, { "epoch": 39.45, "learning_rate": 4.506902985074627e-05, "loss": 0.0007, "step": 42288 }, { "epoch": 39.45, "learning_rate": 4.5068563432835824e-05, "loss": 0.0001, "step": 42292 }, { "epoch": 39.46, "learning_rate": 4.506809701492537e-05, "loss": 0.0, "step": 42296 }, { "epoch": 39.46, "learning_rate": 4.506763059701493e-05, "loss": 0.0012, "step": 42300 }, { "epoch": 39.46, "learning_rate": 4.506716417910448e-05, "loss": 0.0109, "step": 42304 }, { "epoch": 39.47, "learning_rate": 4.506669776119403e-05, "loss": 0.0, "step": 42308 }, { "epoch": 39.47, "learning_rate": 4.5066231343283585e-05, "loss": 0.0053, "step": 42312 }, { "epoch": 39.47, "learning_rate": 4.506576492537314e-05, "loss": 0.0001, "step": 42316 }, { "epoch": 39.48, "learning_rate": 4.506529850746269e-05, "loss": 0.0001, "step": 42320 }, { "epoch": 39.48, "learning_rate": 4.5064832089552237e-05, "loss": 0.0001, "step": 42324 }, { "epoch": 39.49, "learning_rate": 4.50643656716418e-05, "loss": 0.0002, "step": 42328 }, { "epoch": 39.49, "learning_rate": 4.5063899253731346e-05, "loss": 0.0001, "step": 42332 }, { "epoch": 39.49, "learning_rate": 4.5063432835820895e-05, "loss": 0.0, "step": 42336 }, { "epoch": 39.5, "learning_rate": 4.506296641791045e-05, "loss": 0.0001, "step": 42340 }, { "epoch": 39.5, "learning_rate": 4.5062500000000004e-05, "loss": 0.0037, "step": 42344 }, { "epoch": 39.5, "learning_rate": 4.506203358208955e-05, "loss": 0.0001, "step": 42348 }, { "epoch": 39.51, "learning_rate": 4.506156716417911e-05, "loss": 0.0001, "step": 42352 }, { "epoch": 39.51, "learning_rate": 4.5061100746268656e-05, "loss": 0.0, "step": 42356 }, { "epoch": 39.51, "learning_rate": 4.506063432835821e-05, "loss": 0.0, "step": 42360 }, { "epoch": 39.52, "learning_rate": 4.5060167910447765e-05, "loss": 0.0, "step": 42364 }, { "epoch": 39.52, "learning_rate": 4.5059701492537313e-05, "loss": 0.0001, "step": 42368 }, { "epoch": 39.53, "learning_rate": 4.505923507462687e-05, "loss": 0.0, "step": 42372 }, { "epoch": 39.53, "learning_rate": 4.505876865671642e-05, "loss": 0.0001, "step": 42376 }, { "epoch": 39.53, "learning_rate": 4.505830223880597e-05, "loss": 0.0, "step": 42380 }, { "epoch": 39.54, "learning_rate": 4.505783582089552e-05, "loss": 0.0001, "step": 42384 }, { "epoch": 39.54, "learning_rate": 4.505736940298508e-05, "loss": 0.0001, "step": 42388 }, { "epoch": 39.54, "learning_rate": 4.505690298507463e-05, "loss": 0.0004, "step": 42392 }, { "epoch": 39.55, "learning_rate": 4.505643656716418e-05, "loss": 0.0005, "step": 42396 }, { "epoch": 39.55, "learning_rate": 4.505597014925373e-05, "loss": 0.0, "step": 42400 }, { "epoch": 39.56, "learning_rate": 4.505550373134329e-05, "loss": 0.0008, "step": 42404 }, { "epoch": 39.56, "learning_rate": 4.505503731343284e-05, "loss": 0.0, "step": 42408 }, { "epoch": 39.56, "learning_rate": 4.505457089552239e-05, "loss": 0.0002, "step": 42412 }, { "epoch": 39.57, "learning_rate": 4.505410447761194e-05, "loss": 0.0011, "step": 42416 }, { "epoch": 39.57, "learning_rate": 4.50536380597015e-05, "loss": 0.0007, "step": 42420 }, { "epoch": 39.57, "learning_rate": 4.505317164179105e-05, "loss": 0.0, "step": 42424 }, { "epoch": 39.58, "learning_rate": 4.5052705223880596e-05, "loss": 0.0, "step": 42428 }, { "epoch": 39.58, "learning_rate": 4.505223880597015e-05, "loss": 0.0001, "step": 42432 }, { "epoch": 39.59, "learning_rate": 4.5051772388059706e-05, "loss": 0.0001, "step": 42436 }, { "epoch": 39.59, "learning_rate": 4.5051305970149254e-05, "loss": 0.0003, "step": 42440 }, { "epoch": 39.59, "learning_rate": 4.505083955223881e-05, "loss": 0.0003, "step": 42444 }, { "epoch": 39.6, "learning_rate": 4.5050373134328364e-05, "loss": 0.0001, "step": 42448 }, { "epoch": 39.6, "learning_rate": 4.504990671641791e-05, "loss": 0.0006, "step": 42452 }, { "epoch": 39.6, "learning_rate": 4.504944029850747e-05, "loss": 0.0, "step": 42456 }, { "epoch": 39.61, "learning_rate": 4.5048973880597015e-05, "loss": 0.0004, "step": 42460 }, { "epoch": 39.61, "learning_rate": 4.504850746268657e-05, "loss": 0.0001, "step": 42464 }, { "epoch": 39.62, "learning_rate": 4.5048041044776125e-05, "loss": 0.0, "step": 42468 }, { "epoch": 39.62, "learning_rate": 4.504757462686567e-05, "loss": 0.0, "step": 42472 }, { "epoch": 39.62, "learning_rate": 4.504710820895522e-05, "loss": 0.0008, "step": 42476 }, { "epoch": 39.63, "learning_rate": 4.504664179104478e-05, "loss": 0.0, "step": 42480 }, { "epoch": 39.63, "learning_rate": 4.504617537313433e-05, "loss": 0.0015, "step": 42484 }, { "epoch": 39.63, "learning_rate": 4.504570895522388e-05, "loss": 0.0004, "step": 42488 }, { "epoch": 39.64, "learning_rate": 4.5045242537313434e-05, "loss": 0.0001, "step": 42492 }, { "epoch": 39.64, "learning_rate": 4.504477611940299e-05, "loss": 0.0002, "step": 42496 }, { "epoch": 39.65, "learning_rate": 4.504430970149254e-05, "loss": 0.0002, "step": 42500 }, { "epoch": 39.65, "eval_exact_match": 0.7253384912959381, "eval_exec": 0.758220502901354, "eval_loss": 0.42694738507270813, "eval_runtime": 1134.6223, "eval_samples_per_second": 0.911, "step": 42500 }, { "epoch": 39.65, "learning_rate": 4.504384328358209e-05, "loss": 0.0001, "step": 42504 }, { "epoch": 39.65, "learning_rate": 4.504337686567164e-05, "loss": 0.0, "step": 42508 }, { "epoch": 39.66, "learning_rate": 4.5042910447761195e-05, "loss": 0.0, "step": 42512 }, { "epoch": 39.66, "learning_rate": 4.504244402985075e-05, "loss": 0.0001, "step": 42516 }, { "epoch": 39.66, "learning_rate": 4.50419776119403e-05, "loss": 0.0, "step": 42520 }, { "epoch": 39.67, "learning_rate": 4.504151119402985e-05, "loss": 0.0001, "step": 42524 }, { "epoch": 39.67, "learning_rate": 4.504104477611941e-05, "loss": 0.0, "step": 42528 }, { "epoch": 39.68, "learning_rate": 4.5040578358208956e-05, "loss": 0.0, "step": 42532 }, { "epoch": 39.68, "learning_rate": 4.5040111940298504e-05, "loss": 0.0016, "step": 42536 }, { "epoch": 39.68, "learning_rate": 4.5039645522388066e-05, "loss": 0.0002, "step": 42540 }, { "epoch": 39.69, "learning_rate": 4.5039179104477614e-05, "loss": 0.0003, "step": 42544 }, { "epoch": 39.69, "learning_rate": 4.503871268656716e-05, "loss": 0.0, "step": 42548 }, { "epoch": 39.69, "learning_rate": 4.503824626865672e-05, "loss": 0.0002, "step": 42552 }, { "epoch": 39.7, "learning_rate": 4.503777985074627e-05, "loss": 0.0, "step": 42556 }, { "epoch": 39.7, "learning_rate": 4.503731343283582e-05, "loss": 0.0, "step": 42560 }, { "epoch": 39.71, "learning_rate": 4.5036847014925375e-05, "loss": 0.0002, "step": 42564 }, { "epoch": 39.71, "learning_rate": 4.503638059701492e-05, "loss": 0.0001, "step": 42568 }, { "epoch": 39.71, "learning_rate": 4.5035914179104485e-05, "loss": 0.0001, "step": 42572 }, { "epoch": 39.72, "learning_rate": 4.503544776119403e-05, "loss": 0.0008, "step": 42576 }, { "epoch": 39.72, "learning_rate": 4.503498134328358e-05, "loss": 0.0001, "step": 42580 }, { "epoch": 39.72, "learning_rate": 4.5034514925373136e-05, "loss": 0.0006, "step": 42584 }, { "epoch": 39.73, "learning_rate": 4.503404850746269e-05, "loss": 0.0005, "step": 42588 }, { "epoch": 39.73, "learning_rate": 4.503358208955224e-05, "loss": 0.0, "step": 42592 }, { "epoch": 39.73, "learning_rate": 4.5033115671641794e-05, "loss": 0.0, "step": 42596 }, { "epoch": 39.74, "learning_rate": 4.503264925373135e-05, "loss": 0.0001, "step": 42600 }, { "epoch": 39.74, "learning_rate": 4.50321828358209e-05, "loss": 0.0, "step": 42604 }, { "epoch": 39.75, "learning_rate": 4.503171641791045e-05, "loss": 0.0004, "step": 42608 }, { "epoch": 39.75, "learning_rate": 4.503125e-05, "loss": 0.0001, "step": 42612 }, { "epoch": 39.75, "learning_rate": 4.5030783582089555e-05, "loss": 0.0002, "step": 42616 }, { "epoch": 39.76, "learning_rate": 4.503031716417911e-05, "loss": 0.0, "step": 42620 }, { "epoch": 39.76, "learning_rate": 4.502985074626866e-05, "loss": 0.0, "step": 42624 }, { "epoch": 39.76, "learning_rate": 4.5029384328358206e-05, "loss": 0.0, "step": 42628 }, { "epoch": 39.77, "learning_rate": 4.502891791044777e-05, "loss": 0.0, "step": 42632 }, { "epoch": 39.77, "learning_rate": 4.5028451492537316e-05, "loss": 0.0, "step": 42636 }, { "epoch": 39.78, "learning_rate": 4.5027985074626864e-05, "loss": 0.0001, "step": 42640 }, { "epoch": 39.78, "learning_rate": 4.502751865671642e-05, "loss": 0.0004, "step": 42644 }, { "epoch": 39.78, "learning_rate": 4.5027052238805974e-05, "loss": 0.0, "step": 42648 }, { "epoch": 39.79, "learning_rate": 4.502658582089552e-05, "loss": 0.0, "step": 42652 }, { "epoch": 39.79, "learning_rate": 4.502611940298508e-05, "loss": 0.0, "step": 42656 }, { "epoch": 39.79, "learning_rate": 4.502565298507463e-05, "loss": 0.0002, "step": 42660 }, { "epoch": 39.8, "learning_rate": 4.502518656716418e-05, "loss": 0.0, "step": 42664 }, { "epoch": 39.8, "learning_rate": 4.5024720149253735e-05, "loss": 0.0007, "step": 42668 }, { "epoch": 39.81, "learning_rate": 4.502425373134328e-05, "loss": 0.0002, "step": 42672 }, { "epoch": 39.81, "learning_rate": 4.502378731343284e-05, "loss": 0.0, "step": 42676 }, { "epoch": 39.81, "learning_rate": 4.502332089552239e-05, "loss": 0.0, "step": 42680 }, { "epoch": 39.82, "learning_rate": 4.502285447761194e-05, "loss": 0.0008, "step": 42684 }, { "epoch": 39.82, "learning_rate": 4.502238805970149e-05, "loss": 0.0002, "step": 42688 }, { "epoch": 39.82, "learning_rate": 4.502192164179105e-05, "loss": 0.0004, "step": 42692 }, { "epoch": 39.83, "learning_rate": 4.50214552238806e-05, "loss": 0.0031, "step": 42696 }, { "epoch": 39.83, "learning_rate": 4.502098880597015e-05, "loss": 0.0, "step": 42700 }, { "epoch": 39.84, "learning_rate": 4.50205223880597e-05, "loss": 0.0003, "step": 42704 }, { "epoch": 39.84, "learning_rate": 4.502005597014926e-05, "loss": 0.0, "step": 42708 }, { "epoch": 39.84, "learning_rate": 4.5019589552238805e-05, "loss": 0.001, "step": 42712 }, { "epoch": 39.85, "learning_rate": 4.501912313432836e-05, "loss": 0.0, "step": 42716 }, { "epoch": 39.85, "learning_rate": 4.5018656716417915e-05, "loss": 0.0, "step": 42720 }, { "epoch": 39.85, "learning_rate": 4.501819029850746e-05, "loss": 0.0, "step": 42724 }, { "epoch": 39.86, "learning_rate": 4.501772388059702e-05, "loss": 0.0001, "step": 42728 }, { "epoch": 39.86, "learning_rate": 4.5017257462686566e-05, "loss": 0.0026, "step": 42732 }, { "epoch": 39.87, "learning_rate": 4.501679104477612e-05, "loss": 0.0008, "step": 42736 }, { "epoch": 39.87, "learning_rate": 4.5016324626865676e-05, "loss": 0.0, "step": 42740 }, { "epoch": 39.87, "learning_rate": 4.5015858208955224e-05, "loss": 0.0, "step": 42744 }, { "epoch": 39.88, "learning_rate": 4.501539179104478e-05, "loss": 0.0001, "step": 42748 }, { "epoch": 39.88, "learning_rate": 4.5014925373134334e-05, "loss": 0.0, "step": 42752 }, { "epoch": 39.88, "learning_rate": 4.501445895522388e-05, "loss": 0.0, "step": 42756 }, { "epoch": 39.89, "learning_rate": 4.501399253731344e-05, "loss": 0.0001, "step": 42760 }, { "epoch": 39.89, "learning_rate": 4.5013526119402985e-05, "loss": 0.0055, "step": 42764 }, { "epoch": 39.9, "learning_rate": 4.501305970149254e-05, "loss": 0.0, "step": 42768 }, { "epoch": 39.9, "learning_rate": 4.5012593283582095e-05, "loss": 0.0006, "step": 42772 }, { "epoch": 39.9, "learning_rate": 4.501212686567164e-05, "loss": 0.0, "step": 42776 }, { "epoch": 39.91, "learning_rate": 4.50116604477612e-05, "loss": 0.0004, "step": 42780 }, { "epoch": 39.91, "learning_rate": 4.501119402985075e-05, "loss": 0.0001, "step": 42784 }, { "epoch": 39.91, "learning_rate": 4.50107276119403e-05, "loss": 0.0001, "step": 42788 }, { "epoch": 39.92, "learning_rate": 4.501026119402985e-05, "loss": 0.0006, "step": 42792 }, { "epoch": 39.92, "learning_rate": 4.5009794776119404e-05, "loss": 0.0028, "step": 42796 }, { "epoch": 39.93, "learning_rate": 4.500932835820896e-05, "loss": 0.0006, "step": 42800 }, { "epoch": 39.93, "learning_rate": 4.500886194029851e-05, "loss": 0.0002, "step": 42804 }, { "epoch": 39.93, "learning_rate": 4.500839552238806e-05, "loss": 0.0001, "step": 42808 }, { "epoch": 39.94, "learning_rate": 4.500792910447762e-05, "loss": 0.0001, "step": 42812 }, { "epoch": 39.94, "learning_rate": 4.5007462686567165e-05, "loss": 0.0001, "step": 42816 }, { "epoch": 39.94, "learning_rate": 4.500699626865672e-05, "loss": 0.0, "step": 42820 }, { "epoch": 39.95, "learning_rate": 4.500652985074627e-05, "loss": 0.0002, "step": 42824 }, { "epoch": 39.95, "learning_rate": 4.500606343283582e-05, "loss": 0.0001, "step": 42828 }, { "epoch": 39.96, "learning_rate": 4.500559701492538e-05, "loss": 0.0004, "step": 42832 }, { "epoch": 39.96, "learning_rate": 4.5005130597014926e-05, "loss": 0.0, "step": 42836 }, { "epoch": 39.96, "learning_rate": 4.500466417910448e-05, "loss": 0.0006, "step": 42840 }, { "epoch": 39.97, "learning_rate": 4.5004197761194036e-05, "loss": 0.0023, "step": 42844 }, { "epoch": 39.97, "learning_rate": 4.5003731343283584e-05, "loss": 0.0, "step": 42848 }, { "epoch": 39.97, "learning_rate": 4.500326492537313e-05, "loss": 0.0045, "step": 42852 }, { "epoch": 39.98, "learning_rate": 4.500279850746269e-05, "loss": 0.0001, "step": 42856 }, { "epoch": 39.98, "learning_rate": 4.500233208955224e-05, "loss": 0.0001, "step": 42860 }, { "epoch": 39.98, "learning_rate": 4.500186567164179e-05, "loss": 0.0188, "step": 42864 }, { "epoch": 39.99, "learning_rate": 4.5001399253731345e-05, "loss": 0.0, "step": 42868 }, { "epoch": 39.99, "learning_rate": 4.50009328358209e-05, "loss": 0.0, "step": 42872 }, { "epoch": 40.0, "learning_rate": 4.500046641791045e-05, "loss": 0.0005, "step": 42876 }, { "epoch": 40.0, "learning_rate": 4.5e-05, "loss": 0.0057, "step": 42880 }, { "epoch": 40.0, "learning_rate": 4.499953358208955e-05, "loss": 0.0001, "step": 42884 }, { "epoch": 40.01, "learning_rate": 4.4999067164179106e-05, "loss": 0.0001, "step": 42888 }, { "epoch": 40.01, "learning_rate": 4.499860074626866e-05, "loss": 0.0, "step": 42892 }, { "epoch": 40.01, "learning_rate": 4.499813432835821e-05, "loss": 0.0, "step": 42896 }, { "epoch": 40.02, "learning_rate": 4.4997667910447764e-05, "loss": 0.0, "step": 42900 }, { "epoch": 40.02, "learning_rate": 4.499720149253732e-05, "loss": 0.0, "step": 42904 }, { "epoch": 40.03, "learning_rate": 4.499673507462687e-05, "loss": 0.0001, "step": 42908 }, { "epoch": 40.03, "learning_rate": 4.499626865671642e-05, "loss": 0.0015, "step": 42912 }, { "epoch": 40.03, "learning_rate": 4.499580223880597e-05, "loss": 0.001, "step": 42916 }, { "epoch": 40.04, "learning_rate": 4.4995335820895525e-05, "loss": 0.0001, "step": 42920 }, { "epoch": 40.04, "learning_rate": 4.499486940298508e-05, "loss": 0.0004, "step": 42924 }, { "epoch": 40.04, "learning_rate": 4.499440298507463e-05, "loss": 0.0, "step": 42928 }, { "epoch": 40.05, "learning_rate": 4.499393656716418e-05, "loss": 0.0004, "step": 42932 }, { "epoch": 40.05, "learning_rate": 4.499347014925374e-05, "loss": 0.0017, "step": 42936 }, { "epoch": 40.06, "learning_rate": 4.4993003731343286e-05, "loss": 0.0006, "step": 42940 }, { "epoch": 40.06, "learning_rate": 4.4992537313432834e-05, "loss": 0.0, "step": 42944 }, { "epoch": 40.06, "learning_rate": 4.4992070895522396e-05, "loss": 0.0004, "step": 42948 }, { "epoch": 40.07, "learning_rate": 4.4991604477611944e-05, "loss": 0.0, "step": 42952 }, { "epoch": 40.07, "learning_rate": 4.499113805970149e-05, "loss": 0.0001, "step": 42956 }, { "epoch": 40.07, "learning_rate": 4.499067164179105e-05, "loss": 0.0, "step": 42960 }, { "epoch": 40.08, "learning_rate": 4.49902052238806e-05, "loss": 0.0, "step": 42964 }, { "epoch": 40.08, "learning_rate": 4.498973880597015e-05, "loss": 0.0002, "step": 42968 }, { "epoch": 40.09, "learning_rate": 4.4989272388059705e-05, "loss": 0.0001, "step": 42972 }, { "epoch": 40.09, "learning_rate": 4.498880597014925e-05, "loss": 0.0001, "step": 42976 }, { "epoch": 40.09, "learning_rate": 4.498833955223881e-05, "loss": 0.0, "step": 42980 }, { "epoch": 40.1, "learning_rate": 4.498787313432836e-05, "loss": 0.0001, "step": 42984 }, { "epoch": 40.1, "learning_rate": 4.498740671641791e-05, "loss": 0.0, "step": 42988 }, { "epoch": 40.1, "learning_rate": 4.4986940298507466e-05, "loss": 0.0, "step": 42992 }, { "epoch": 40.11, "learning_rate": 4.498647388059702e-05, "loss": 0.0001, "step": 42996 }, { "epoch": 40.11, "learning_rate": 4.498600746268657e-05, "loss": 0.0, "step": 43000 }, { "epoch": 40.11, "eval_exact_match": 0.7475822050290135, "eval_exec": 0.7736943907156673, "eval_loss": 0.4348256289958954, "eval_runtime": 1267.9199, "eval_samples_per_second": 0.816, "step": 43000 }, { "epoch": 40.12, "learning_rate": 4.498554104477612e-05, "loss": 0.0, "step": 43004 }, { "epoch": 40.12, "learning_rate": 4.498507462686568e-05, "loss": 0.0, "step": 43008 }, { "epoch": 40.12, "learning_rate": 4.498460820895523e-05, "loss": 0.0, "step": 43012 }, { "epoch": 40.13, "learning_rate": 4.4984141791044775e-05, "loss": 0.0013, "step": 43016 }, { "epoch": 40.13, "learning_rate": 4.498367537313433e-05, "loss": 0.0, "step": 43020 }, { "epoch": 40.13, "learning_rate": 4.4983208955223885e-05, "loss": 0.0, "step": 43024 }, { "epoch": 40.14, "learning_rate": 4.498274253731343e-05, "loss": 0.0024, "step": 43028 }, { "epoch": 40.14, "learning_rate": 4.498227611940299e-05, "loss": 0.0, "step": 43032 }, { "epoch": 40.15, "learning_rate": 4.4981809701492536e-05, "loss": 0.0001, "step": 43036 }, { "epoch": 40.15, "learning_rate": 4.498134328358209e-05, "loss": 0.0, "step": 43040 }, { "epoch": 40.15, "learning_rate": 4.4980876865671646e-05, "loss": 0.0001, "step": 43044 }, { "epoch": 40.16, "learning_rate": 4.4980410447761194e-05, "loss": 0.0001, "step": 43048 }, { "epoch": 40.16, "learning_rate": 4.497994402985075e-05, "loss": 0.0, "step": 43052 }, { "epoch": 40.16, "learning_rate": 4.4979477611940304e-05, "loss": 0.0, "step": 43056 }, { "epoch": 40.17, "learning_rate": 4.497901119402985e-05, "loss": 0.0, "step": 43060 }, { "epoch": 40.17, "learning_rate": 4.497854477611941e-05, "loss": 0.0, "step": 43064 }, { "epoch": 40.18, "learning_rate": 4.497807835820896e-05, "loss": 0.0012, "step": 43068 }, { "epoch": 40.18, "learning_rate": 4.497761194029851e-05, "loss": 0.0, "step": 43072 }, { "epoch": 40.18, "learning_rate": 4.4977145522388065e-05, "loss": 0.0006, "step": 43076 }, { "epoch": 40.19, "learning_rate": 4.497667910447761e-05, "loss": 0.0001, "step": 43080 }, { "epoch": 40.19, "learning_rate": 4.497621268656717e-05, "loss": 0.0, "step": 43084 }, { "epoch": 40.19, "learning_rate": 4.497574626865672e-05, "loss": 0.0, "step": 43088 }, { "epoch": 40.2, "learning_rate": 4.497527985074627e-05, "loss": 0.0001, "step": 43092 }, { "epoch": 40.2, "learning_rate": 4.497481343283582e-05, "loss": 0.0, "step": 43096 }, { "epoch": 40.21, "learning_rate": 4.497434701492538e-05, "loss": 0.0, "step": 43100 }, { "epoch": 40.21, "learning_rate": 4.497388059701493e-05, "loss": 0.0004, "step": 43104 }, { "epoch": 40.21, "learning_rate": 4.497341417910448e-05, "loss": 0.0, "step": 43108 }, { "epoch": 40.22, "learning_rate": 4.497294776119403e-05, "loss": 0.0, "step": 43112 }, { "epoch": 40.22, "learning_rate": 4.4972481343283587e-05, "loss": 0.0, "step": 43116 }, { "epoch": 40.22, "learning_rate": 4.4972014925373135e-05, "loss": 0.0003, "step": 43120 }, { "epoch": 40.23, "learning_rate": 4.497154850746269e-05, "loss": 0.0002, "step": 43124 }, { "epoch": 40.23, "learning_rate": 4.4971082089552244e-05, "loss": 0.0001, "step": 43128 }, { "epoch": 40.24, "learning_rate": 4.497061567164179e-05, "loss": 0.0003, "step": 43132 }, { "epoch": 40.24, "learning_rate": 4.497014925373135e-05, "loss": 0.0, "step": 43136 }, { "epoch": 40.24, "learning_rate": 4.4969682835820896e-05, "loss": 0.0018, "step": 43140 }, { "epoch": 40.25, "learning_rate": 4.496921641791045e-05, "loss": 0.0003, "step": 43144 }, { "epoch": 40.25, "learning_rate": 4.4968750000000005e-05, "loss": 0.0001, "step": 43148 }, { "epoch": 40.25, "learning_rate": 4.4968283582089554e-05, "loss": 0.0001, "step": 43152 }, { "epoch": 40.26, "learning_rate": 4.49678171641791e-05, "loss": 0.0, "step": 43156 }, { "epoch": 40.26, "learning_rate": 4.4967350746268663e-05, "loss": 0.0, "step": 43160 }, { "epoch": 40.26, "learning_rate": 4.496688432835821e-05, "loss": 0.0, "step": 43164 }, { "epoch": 40.27, "learning_rate": 4.496641791044776e-05, "loss": 0.0013, "step": 43168 }, { "epoch": 40.27, "learning_rate": 4.4965951492537315e-05, "loss": 0.0022, "step": 43172 }, { "epoch": 40.28, "learning_rate": 4.496548507462687e-05, "loss": 0.0003, "step": 43176 }, { "epoch": 40.28, "learning_rate": 4.496501865671642e-05, "loss": 0.0008, "step": 43180 }, { "epoch": 40.28, "learning_rate": 4.496455223880597e-05, "loss": 0.0, "step": 43184 }, { "epoch": 40.29, "learning_rate": 4.496408582089552e-05, "loss": 0.0001, "step": 43188 }, { "epoch": 40.29, "learning_rate": 4.4963619402985076e-05, "loss": 0.0, "step": 43192 }, { "epoch": 40.29, "learning_rate": 4.496315298507463e-05, "loss": 0.0001, "step": 43196 }, { "epoch": 40.3, "learning_rate": 4.496268656716418e-05, "loss": 0.0, "step": 43200 }, { "epoch": 40.3, "learning_rate": 4.4962220149253734e-05, "loss": 0.0001, "step": 43204 }, { "epoch": 40.31, "learning_rate": 4.496175373134329e-05, "loss": 0.0001, "step": 43208 }, { "epoch": 40.31, "learning_rate": 4.4961287313432837e-05, "loss": 0.0, "step": 43212 }, { "epoch": 40.31, "learning_rate": 4.4960820895522385e-05, "loss": 0.0008, "step": 43216 }, { "epoch": 40.32, "learning_rate": 4.4960354477611946e-05, "loss": 0.0002, "step": 43220 }, { "epoch": 40.32, "learning_rate": 4.4959888059701494e-05, "loss": 0.0, "step": 43224 }, { "epoch": 40.32, "learning_rate": 4.495942164179105e-05, "loss": 0.0025, "step": 43228 }, { "epoch": 40.33, "learning_rate": 4.49589552238806e-05, "loss": 0.0001, "step": 43232 }, { "epoch": 40.33, "learning_rate": 4.495848880597015e-05, "loss": 0.0, "step": 43236 }, { "epoch": 40.34, "learning_rate": 4.495802238805971e-05, "loss": 0.0032, "step": 43240 }, { "epoch": 40.34, "learning_rate": 4.4957555970149255e-05, "loss": 0.0, "step": 43244 }, { "epoch": 40.34, "learning_rate": 4.4957089552238804e-05, "loss": 0.0, "step": 43248 }, { "epoch": 40.35, "learning_rate": 4.4956623134328365e-05, "loss": 0.0097, "step": 43252 }, { "epoch": 40.35, "learning_rate": 4.4956156716417913e-05, "loss": 0.0077, "step": 43256 }, { "epoch": 40.35, "learning_rate": 4.495569029850746e-05, "loss": 0.0002, "step": 43260 }, { "epoch": 40.36, "learning_rate": 4.4955223880597016e-05, "loss": 0.0002, "step": 43264 }, { "epoch": 40.36, "learning_rate": 4.495475746268657e-05, "loss": 0.0, "step": 43268 }, { "epoch": 40.37, "learning_rate": 4.495429104477612e-05, "loss": 0.0004, "step": 43272 }, { "epoch": 40.37, "learning_rate": 4.4953824626865674e-05, "loss": 0.0, "step": 43276 }, { "epoch": 40.37, "learning_rate": 4.495335820895523e-05, "loss": 0.0011, "step": 43280 }, { "epoch": 40.38, "learning_rate": 4.495289179104478e-05, "loss": 0.0002, "step": 43284 }, { "epoch": 40.38, "learning_rate": 4.495242537313433e-05, "loss": 0.0, "step": 43288 }, { "epoch": 40.38, "learning_rate": 4.495195895522388e-05, "loss": 0.0, "step": 43292 }, { "epoch": 40.39, "learning_rate": 4.4951492537313435e-05, "loss": 0.0039, "step": 43296 }, { "epoch": 40.39, "learning_rate": 4.495102611940299e-05, "loss": 0.0004, "step": 43300 }, { "epoch": 40.4, "learning_rate": 4.495055970149254e-05, "loss": 0.0003, "step": 43304 }, { "epoch": 40.4, "learning_rate": 4.4950093283582087e-05, "loss": 0.0001, "step": 43308 }, { "epoch": 40.4, "learning_rate": 4.494962686567165e-05, "loss": 0.0001, "step": 43312 }, { "epoch": 40.41, "learning_rate": 4.4949160447761196e-05, "loss": 0.0006, "step": 43316 }, { "epoch": 40.41, "learning_rate": 4.4948694029850745e-05, "loss": 0.0, "step": 43320 }, { "epoch": 40.41, "learning_rate": 4.49482276119403e-05, "loss": 0.0, "step": 43324 }, { "epoch": 40.42, "learning_rate": 4.4947761194029854e-05, "loss": 0.0001, "step": 43328 }, { "epoch": 40.42, "learning_rate": 4.49472947761194e-05, "loss": 0.0, "step": 43332 }, { "epoch": 40.43, "learning_rate": 4.494682835820896e-05, "loss": 0.0058, "step": 43336 }, { "epoch": 40.43, "learning_rate": 4.494636194029851e-05, "loss": 0.0001, "step": 43340 }, { "epoch": 40.43, "learning_rate": 4.494589552238806e-05, "loss": 0.0001, "step": 43344 }, { "epoch": 40.44, "learning_rate": 4.4945429104477615e-05, "loss": 0.0006, "step": 43348 }, { "epoch": 40.44, "learning_rate": 4.4944962686567163e-05, "loss": 0.0006, "step": 43352 }, { "epoch": 40.44, "learning_rate": 4.494449626865672e-05, "loss": 0.0001, "step": 43356 }, { "epoch": 40.45, "learning_rate": 4.494402985074627e-05, "loss": 0.0, "step": 43360 }, { "epoch": 40.45, "learning_rate": 4.494356343283582e-05, "loss": 0.0013, "step": 43364 }, { "epoch": 40.46, "learning_rate": 4.494309701492537e-05, "loss": 0.0009, "step": 43368 }, { "epoch": 40.46, "learning_rate": 4.494263059701493e-05, "loss": 0.0001, "step": 43372 }, { "epoch": 40.46, "learning_rate": 4.494216417910448e-05, "loss": 0.0, "step": 43376 }, { "epoch": 40.47, "learning_rate": 4.494169776119403e-05, "loss": 0.0, "step": 43380 }, { "epoch": 40.47, "learning_rate": 4.494123134328358e-05, "loss": 0.0001, "step": 43384 }, { "epoch": 40.47, "learning_rate": 4.494076492537314e-05, "loss": 0.0, "step": 43388 }, { "epoch": 40.48, "learning_rate": 4.494029850746269e-05, "loss": 0.0, "step": 43392 }, { "epoch": 40.48, "learning_rate": 4.493983208955224e-05, "loss": 0.001, "step": 43396 }, { "epoch": 40.49, "learning_rate": 4.4939365671641795e-05, "loss": 0.0, "step": 43400 }, { "epoch": 40.49, "learning_rate": 4.493889925373135e-05, "loss": 0.0, "step": 43404 }, { "epoch": 40.49, "learning_rate": 4.49384328358209e-05, "loss": 0.0, "step": 43408 }, { "epoch": 40.5, "learning_rate": 4.4937966417910446e-05, "loss": 0.0002, "step": 43412 }, { "epoch": 40.5, "learning_rate": 4.49375e-05, "loss": 0.0018, "step": 43416 }, { "epoch": 40.5, "learning_rate": 4.4937033582089556e-05, "loss": 0.0, "step": 43420 }, { "epoch": 40.51, "learning_rate": 4.4936567164179104e-05, "loss": 0.003, "step": 43424 }, { "epoch": 40.51, "learning_rate": 4.493610074626866e-05, "loss": 0.0008, "step": 43428 }, { "epoch": 40.51, "learning_rate": 4.4935634328358214e-05, "loss": 0.0, "step": 43432 }, { "epoch": 40.52, "learning_rate": 4.493516791044776e-05, "loss": 0.009, "step": 43436 }, { "epoch": 40.52, "learning_rate": 4.493470149253732e-05, "loss": 0.0, "step": 43440 }, { "epoch": 40.53, "learning_rate": 4.4934235074626865e-05, "loss": 0.0, "step": 43444 }, { "epoch": 40.53, "learning_rate": 4.493376865671642e-05, "loss": 0.0, "step": 43448 }, { "epoch": 40.53, "learning_rate": 4.4933302238805975e-05, "loss": 0.0, "step": 43452 }, { "epoch": 40.54, "learning_rate": 4.493283582089552e-05, "loss": 0.0004, "step": 43456 }, { "epoch": 40.54, "learning_rate": 4.493236940298508e-05, "loss": 0.0004, "step": 43460 }, { "epoch": 40.54, "learning_rate": 4.493190298507463e-05, "loss": 0.0, "step": 43464 }, { "epoch": 40.55, "learning_rate": 4.493143656716418e-05, "loss": 0.0, "step": 43468 }, { "epoch": 40.55, "learning_rate": 4.493097014925373e-05, "loss": 0.0, "step": 43472 }, { "epoch": 40.56, "learning_rate": 4.4930503731343284e-05, "loss": 0.0001, "step": 43476 }, { "epoch": 40.56, "learning_rate": 4.493003731343284e-05, "loss": 0.0008, "step": 43480 }, { "epoch": 40.56, "learning_rate": 4.492957089552239e-05, "loss": 0.0001, "step": 43484 }, { "epoch": 40.57, "learning_rate": 4.492910447761194e-05, "loss": 0.0, "step": 43488 }, { "epoch": 40.57, "learning_rate": 4.49286380597015e-05, "loss": 0.0012, "step": 43492 }, { "epoch": 40.57, "learning_rate": 4.4928171641791045e-05, "loss": 0.0, "step": 43496 }, { "epoch": 40.58, "learning_rate": 4.49277052238806e-05, "loss": 0.0013, "step": 43500 }, { "epoch": 40.58, "eval_exact_match": 0.7292069632495164, "eval_exec": 0.758220502901354, "eval_loss": 0.4297082722187042, "eval_runtime": 1140.8475, "eval_samples_per_second": 0.906, "step": 43500 }, { "epoch": 40.58, "learning_rate": 4.492723880597015e-05, "loss": 0.0, "step": 43504 }, { "epoch": 40.59, "learning_rate": 4.49267723880597e-05, "loss": 0.0001, "step": 43508 }, { "epoch": 40.59, "learning_rate": 4.492630597014926e-05, "loss": 0.0003, "step": 43512 }, { "epoch": 40.59, "learning_rate": 4.4925839552238806e-05, "loss": 0.0001, "step": 43516 }, { "epoch": 40.6, "learning_rate": 4.492537313432836e-05, "loss": 0.0108, "step": 43520 }, { "epoch": 40.6, "learning_rate": 4.4924906716417916e-05, "loss": 0.0, "step": 43524 }, { "epoch": 40.6, "learning_rate": 4.4924440298507464e-05, "loss": 0.0001, "step": 43528 }, { "epoch": 40.61, "learning_rate": 4.492397388059701e-05, "loss": 0.0, "step": 43532 }, { "epoch": 40.61, "learning_rate": 4.492350746268657e-05, "loss": 0.0001, "step": 43536 }, { "epoch": 40.62, "learning_rate": 4.492304104477612e-05, "loss": 0.0, "step": 43540 }, { "epoch": 40.62, "learning_rate": 4.492257462686567e-05, "loss": 0.005, "step": 43544 }, { "epoch": 40.62, "learning_rate": 4.4922108208955225e-05, "loss": 0.0, "step": 43548 }, { "epoch": 40.63, "learning_rate": 4.492164179104478e-05, "loss": 0.0001, "step": 43552 }, { "epoch": 40.63, "learning_rate": 4.4921175373134335e-05, "loss": 0.0003, "step": 43556 }, { "epoch": 40.63, "learning_rate": 4.492070895522388e-05, "loss": 0.0024, "step": 43560 }, { "epoch": 40.64, "learning_rate": 4.492024253731343e-05, "loss": 0.0004, "step": 43564 }, { "epoch": 40.64, "learning_rate": 4.491977611940299e-05, "loss": 0.0016, "step": 43568 }, { "epoch": 40.65, "learning_rate": 4.491930970149254e-05, "loss": 0.0014, "step": 43572 }, { "epoch": 40.65, "learning_rate": 4.491884328358209e-05, "loss": 0.0, "step": 43576 }, { "epoch": 40.65, "learning_rate": 4.4918376865671644e-05, "loss": 0.0018, "step": 43580 }, { "epoch": 40.66, "learning_rate": 4.49179104477612e-05, "loss": 0.0004, "step": 43584 }, { "epoch": 40.66, "learning_rate": 4.491744402985075e-05, "loss": 0.0006, "step": 43588 }, { "epoch": 40.66, "learning_rate": 4.49169776119403e-05, "loss": 0.0, "step": 43592 }, { "epoch": 40.67, "learning_rate": 4.491651119402985e-05, "loss": 0.0, "step": 43596 }, { "epoch": 40.67, "learning_rate": 4.4916044776119405e-05, "loss": 0.0014, "step": 43600 }, { "epoch": 40.68, "learning_rate": 4.491557835820896e-05, "loss": 0.0001, "step": 43604 }, { "epoch": 40.68, "learning_rate": 4.491511194029851e-05, "loss": 0.0, "step": 43608 }, { "epoch": 40.68, "learning_rate": 4.491464552238806e-05, "loss": 0.0, "step": 43612 }, { "epoch": 40.69, "learning_rate": 4.491417910447762e-05, "loss": 0.0, "step": 43616 }, { "epoch": 40.69, "learning_rate": 4.4913712686567166e-05, "loss": 0.0028, "step": 43620 }, { "epoch": 40.69, "learning_rate": 4.4913246268656714e-05, "loss": 0.0, "step": 43624 }, { "epoch": 40.7, "learning_rate": 4.4912779850746276e-05, "loss": 0.0, "step": 43628 }, { "epoch": 40.7, "learning_rate": 4.4912313432835824e-05, "loss": 0.0001, "step": 43632 }, { "epoch": 40.71, "learning_rate": 4.491184701492537e-05, "loss": 0.0001, "step": 43636 }, { "epoch": 40.71, "learning_rate": 4.491138059701493e-05, "loss": 0.0, "step": 43640 }, { "epoch": 40.71, "learning_rate": 4.491091417910448e-05, "loss": 0.0, "step": 43644 }, { "epoch": 40.72, "learning_rate": 4.491044776119403e-05, "loss": 0.0, "step": 43648 }, { "epoch": 40.72, "learning_rate": 4.4909981343283585e-05, "loss": 0.0002, "step": 43652 }, { "epoch": 40.72, "learning_rate": 4.490951492537313e-05, "loss": 0.0, "step": 43656 }, { "epoch": 40.73, "learning_rate": 4.490904850746269e-05, "loss": 0.0001, "step": 43660 }, { "epoch": 40.73, "learning_rate": 4.490858208955224e-05, "loss": 0.0001, "step": 43664 }, { "epoch": 40.73, "learning_rate": 4.490811567164179e-05, "loss": 0.0, "step": 43668 }, { "epoch": 40.74, "learning_rate": 4.4907649253731346e-05, "loss": 0.0, "step": 43672 }, { "epoch": 40.74, "learning_rate": 4.49071828358209e-05, "loss": 0.0042, "step": 43676 }, { "epoch": 40.75, "learning_rate": 4.490671641791045e-05, "loss": 0.0, "step": 43680 }, { "epoch": 40.75, "learning_rate": 4.490625e-05, "loss": 0.0001, "step": 43684 }, { "epoch": 40.75, "learning_rate": 4.490578358208956e-05, "loss": 0.0001, "step": 43688 }, { "epoch": 40.76, "learning_rate": 4.490531716417911e-05, "loss": 0.0002, "step": 43692 }, { "epoch": 40.76, "learning_rate": 4.4904850746268655e-05, "loss": 0.0001, "step": 43696 }, { "epoch": 40.76, "learning_rate": 4.490438432835821e-05, "loss": 0.0003, "step": 43700 }, { "epoch": 40.77, "learning_rate": 4.4903917910447765e-05, "loss": 0.0, "step": 43704 }, { "epoch": 40.77, "learning_rate": 4.490345149253731e-05, "loss": 0.0001, "step": 43708 }, { "epoch": 40.78, "learning_rate": 4.490298507462687e-05, "loss": 0.0, "step": 43712 }, { "epoch": 40.78, "learning_rate": 4.4902518656716416e-05, "loss": 0.0007, "step": 43716 }, { "epoch": 40.78, "learning_rate": 4.490205223880598e-05, "loss": 0.0001, "step": 43720 }, { "epoch": 40.79, "learning_rate": 4.4901585820895526e-05, "loss": 0.0003, "step": 43724 }, { "epoch": 40.79, "learning_rate": 4.4901119402985074e-05, "loss": 0.0, "step": 43728 }, { "epoch": 40.79, "learning_rate": 4.490065298507463e-05, "loss": 0.0001, "step": 43732 }, { "epoch": 40.8, "learning_rate": 4.4900186567164184e-05, "loss": 0.0014, "step": 43736 }, { "epoch": 40.8, "learning_rate": 4.489972014925373e-05, "loss": 0.0, "step": 43740 }, { "epoch": 40.81, "learning_rate": 4.489925373134329e-05, "loss": 0.0016, "step": 43744 }, { "epoch": 40.81, "learning_rate": 4.489878731343284e-05, "loss": 0.0, "step": 43748 }, { "epoch": 40.81, "learning_rate": 4.489832089552239e-05, "loss": 0.0, "step": 43752 }, { "epoch": 40.82, "learning_rate": 4.4897854477611945e-05, "loss": 0.0, "step": 43756 }, { "epoch": 40.82, "learning_rate": 4.489738805970149e-05, "loss": 0.0, "step": 43760 }, { "epoch": 40.82, "learning_rate": 4.489692164179105e-05, "loss": 0.0001, "step": 43764 }, { "epoch": 40.83, "learning_rate": 4.48964552238806e-05, "loss": 0.0001, "step": 43768 }, { "epoch": 40.83, "learning_rate": 4.489598880597015e-05, "loss": 0.0, "step": 43772 }, { "epoch": 40.84, "learning_rate": 4.48955223880597e-05, "loss": 0.0, "step": 43776 }, { "epoch": 40.84, "learning_rate": 4.489505597014926e-05, "loss": 0.0, "step": 43780 }, { "epoch": 40.84, "learning_rate": 4.489458955223881e-05, "loss": 0.0, "step": 43784 }, { "epoch": 40.85, "learning_rate": 4.489412313432836e-05, "loss": 0.0, "step": 43788 }, { "epoch": 40.85, "learning_rate": 4.489365671641791e-05, "loss": 0.0009, "step": 43792 }, { "epoch": 40.85, "learning_rate": 4.489319029850747e-05, "loss": 0.0001, "step": 43796 }, { "epoch": 40.86, "learning_rate": 4.4892723880597015e-05, "loss": 0.0002, "step": 43800 }, { "epoch": 40.86, "learning_rate": 4.489225746268657e-05, "loss": 0.0, "step": 43804 }, { "epoch": 40.87, "learning_rate": 4.4891791044776125e-05, "loss": 0.0022, "step": 43808 }, { "epoch": 40.87, "learning_rate": 4.489132462686567e-05, "loss": 0.0, "step": 43812 }, { "epoch": 40.87, "learning_rate": 4.489085820895523e-05, "loss": 0.0, "step": 43816 }, { "epoch": 40.88, "learning_rate": 4.4890391791044776e-05, "loss": 0.0017, "step": 43820 }, { "epoch": 40.88, "learning_rate": 4.488992537313433e-05, "loss": 0.0, "step": 43824 }, { "epoch": 40.88, "learning_rate": 4.4889458955223886e-05, "loss": 0.0002, "step": 43828 }, { "epoch": 40.89, "learning_rate": 4.4888992537313434e-05, "loss": 0.0004, "step": 43832 }, { "epoch": 40.89, "learning_rate": 4.488852611940298e-05, "loss": 0.0003, "step": 43836 }, { "epoch": 40.9, "learning_rate": 4.4888059701492544e-05, "loss": 0.0, "step": 43840 }, { "epoch": 40.9, "learning_rate": 4.488759328358209e-05, "loss": 0.0001, "step": 43844 }, { "epoch": 40.9, "learning_rate": 4.488712686567164e-05, "loss": 0.0009, "step": 43848 }, { "epoch": 40.91, "learning_rate": 4.4886660447761195e-05, "loss": 0.0002, "step": 43852 }, { "epoch": 40.91, "learning_rate": 4.488619402985075e-05, "loss": 0.0005, "step": 43856 }, { "epoch": 40.91, "learning_rate": 4.48857276119403e-05, "loss": 0.0003, "step": 43860 }, { "epoch": 40.92, "learning_rate": 4.488526119402985e-05, "loss": 0.0, "step": 43864 }, { "epoch": 40.92, "learning_rate": 4.48847947761194e-05, "loss": 0.003, "step": 43868 }, { "epoch": 40.93, "learning_rate": 4.4884328358208956e-05, "loss": 0.0, "step": 43872 }, { "epoch": 40.93, "learning_rate": 4.488386194029851e-05, "loss": 0.0001, "step": 43876 }, { "epoch": 40.93, "learning_rate": 4.488339552238806e-05, "loss": 0.0018, "step": 43880 }, { "epoch": 40.94, "learning_rate": 4.4882929104477614e-05, "loss": 0.0003, "step": 43884 }, { "epoch": 40.94, "learning_rate": 4.488246268656717e-05, "loss": 0.0004, "step": 43888 }, { "epoch": 40.94, "learning_rate": 4.488199626865672e-05, "loss": 0.0, "step": 43892 }, { "epoch": 40.95, "learning_rate": 4.488152985074627e-05, "loss": 0.0, "step": 43896 }, { "epoch": 40.95, "learning_rate": 4.488106343283583e-05, "loss": 0.0001, "step": 43900 }, { "epoch": 40.96, "learning_rate": 4.4880597014925375e-05, "loss": 0.0001, "step": 43904 }, { "epoch": 40.96, "learning_rate": 4.488013059701493e-05, "loss": 0.0, "step": 43908 }, { "epoch": 40.96, "learning_rate": 4.487966417910448e-05, "loss": 0.0, "step": 43912 }, { "epoch": 40.97, "learning_rate": 4.487919776119403e-05, "loss": 0.0001, "step": 43916 }, { "epoch": 40.97, "learning_rate": 4.487873134328359e-05, "loss": 0.0001, "step": 43920 }, { "epoch": 40.97, "learning_rate": 4.4878264925373136e-05, "loss": 0.0001, "step": 43924 }, { "epoch": 40.98, "learning_rate": 4.4877798507462684e-05, "loss": 0.0003, "step": 43928 }, { "epoch": 40.98, "learning_rate": 4.4877332089552246e-05, "loss": 0.0009, "step": 43932 }, { "epoch": 40.98, "learning_rate": 4.4876865671641794e-05, "loss": 0.0008, "step": 43936 }, { "epoch": 40.99, "learning_rate": 4.487639925373134e-05, "loss": 0.0021, "step": 43940 }, { "epoch": 40.99, "learning_rate": 4.48759328358209e-05, "loss": 0.0, "step": 43944 }, { "epoch": 41.0, "learning_rate": 4.487546641791045e-05, "loss": 0.0003, "step": 43948 }, { "epoch": 41.0, "learning_rate": 4.4875e-05, "loss": 0.0001, "step": 43952 }, { "epoch": 41.0, "learning_rate": 4.4874533582089555e-05, "loss": 0.0001, "step": 43956 }, { "epoch": 41.01, "learning_rate": 4.487406716417911e-05, "loss": 0.0003, "step": 43960 }, { "epoch": 41.01, "learning_rate": 4.487360074626866e-05, "loss": 0.0, "step": 43964 }, { "epoch": 41.01, "learning_rate": 4.487313432835821e-05, "loss": 0.0003, "step": 43968 }, { "epoch": 41.02, "learning_rate": 4.487266791044776e-05, "loss": 0.0003, "step": 43972 }, { "epoch": 41.02, "learning_rate": 4.4872201492537316e-05, "loss": 0.0, "step": 43976 }, { "epoch": 41.03, "learning_rate": 4.487173507462687e-05, "loss": 0.0017, "step": 43980 }, { "epoch": 41.03, "learning_rate": 4.487126865671642e-05, "loss": 0.0002, "step": 43984 }, { "epoch": 41.03, "learning_rate": 4.487080223880597e-05, "loss": 0.0002, "step": 43988 }, { "epoch": 41.04, "learning_rate": 4.487033582089553e-05, "loss": 0.0, "step": 43992 }, { "epoch": 41.04, "learning_rate": 4.486986940298508e-05, "loss": 0.0002, "step": 43996 }, { "epoch": 41.04, "learning_rate": 4.4869402985074625e-05, "loss": 0.0001, "step": 44000 }, { "epoch": 41.04, "eval_exact_match": 0.7379110251450677, "eval_exec": 0.7543520309477756, "eval_loss": 0.43217331171035767, "eval_runtime": 1129.1199, "eval_samples_per_second": 0.916, "step": 44000 }, { "epoch": 41.05, "learning_rate": 4.486893656716418e-05, "loss": 0.0008, "step": 44004 }, { "epoch": 41.05, "learning_rate": 4.4868470149253735e-05, "loss": 0.0, "step": 44008 }, { "epoch": 41.06, "learning_rate": 4.486800373134328e-05, "loss": 0.0001, "step": 44012 }, { "epoch": 41.06, "learning_rate": 4.486753731343284e-05, "loss": 0.0, "step": 44016 }, { "epoch": 41.06, "learning_rate": 4.486707089552239e-05, "loss": 0.0, "step": 44020 }, { "epoch": 41.07, "learning_rate": 4.486660447761194e-05, "loss": 0.0, "step": 44024 }, { "epoch": 41.07, "learning_rate": 4.4866138059701496e-05, "loss": 0.0001, "step": 44028 }, { "epoch": 41.07, "learning_rate": 4.4865671641791044e-05, "loss": 0.0, "step": 44032 }, { "epoch": 41.08, "learning_rate": 4.48652052238806e-05, "loss": 0.0, "step": 44036 }, { "epoch": 41.08, "learning_rate": 4.4864738805970154e-05, "loss": 0.0, "step": 44040 }, { "epoch": 41.09, "learning_rate": 4.48642723880597e-05, "loss": 0.0001, "step": 44044 }, { "epoch": 41.09, "learning_rate": 4.4863805970149257e-05, "loss": 0.0, "step": 44048 }, { "epoch": 41.09, "learning_rate": 4.486333955223881e-05, "loss": 0.0011, "step": 44052 }, { "epoch": 41.1, "learning_rate": 4.486287313432836e-05, "loss": 0.0001, "step": 44056 }, { "epoch": 41.1, "learning_rate": 4.4862406716417915e-05, "loss": 0.0003, "step": 44060 }, { "epoch": 41.1, "learning_rate": 4.486194029850746e-05, "loss": 0.0003, "step": 44064 }, { "epoch": 41.11, "learning_rate": 4.486147388059702e-05, "loss": 0.0001, "step": 44068 }, { "epoch": 41.11, "learning_rate": 4.486100746268657e-05, "loss": 0.0, "step": 44072 }, { "epoch": 41.12, "learning_rate": 4.486054104477612e-05, "loss": 0.0, "step": 44076 }, { "epoch": 41.12, "learning_rate": 4.4860074626865676e-05, "loss": 0.0, "step": 44080 }, { "epoch": 41.12, "learning_rate": 4.485960820895523e-05, "loss": 0.0, "step": 44084 }, { "epoch": 41.13, "learning_rate": 4.485914179104478e-05, "loss": 0.0001, "step": 44088 }, { "epoch": 41.13, "learning_rate": 4.485867537313433e-05, "loss": 0.0, "step": 44092 }, { "epoch": 41.13, "learning_rate": 4.485820895522388e-05, "loss": 0.0001, "step": 44096 }, { "epoch": 41.14, "learning_rate": 4.4857742537313437e-05, "loss": 0.0, "step": 44100 }, { "epoch": 41.14, "learning_rate": 4.4857276119402985e-05, "loss": 0.0001, "step": 44104 }, { "epoch": 41.15, "learning_rate": 4.485680970149254e-05, "loss": 0.0, "step": 44108 }, { "epoch": 41.15, "learning_rate": 4.4856343283582094e-05, "loss": 0.0001, "step": 44112 }, { "epoch": 41.15, "learning_rate": 4.485587686567164e-05, "loss": 0.0, "step": 44116 }, { "epoch": 41.16, "learning_rate": 4.48554104477612e-05, "loss": 0.0, "step": 44120 }, { "epoch": 41.16, "learning_rate": 4.4854944029850746e-05, "loss": 0.0, "step": 44124 }, { "epoch": 41.16, "learning_rate": 4.48544776119403e-05, "loss": 0.0002, "step": 44128 }, { "epoch": 41.17, "learning_rate": 4.4854011194029855e-05, "loss": 0.0001, "step": 44132 }, { "epoch": 41.17, "learning_rate": 4.4853544776119404e-05, "loss": 0.0, "step": 44136 }, { "epoch": 41.18, "learning_rate": 4.485307835820896e-05, "loss": 0.0009, "step": 44140 }, { "epoch": 41.18, "learning_rate": 4.4852611940298513e-05, "loss": 0.0006, "step": 44144 }, { "epoch": 41.18, "learning_rate": 4.485214552238806e-05, "loss": 0.0, "step": 44148 }, { "epoch": 41.19, "learning_rate": 4.485167910447761e-05, "loss": 0.0001, "step": 44152 }, { "epoch": 41.19, "learning_rate": 4.4851212686567165e-05, "loss": 0.0001, "step": 44156 }, { "epoch": 41.19, "learning_rate": 4.485074626865672e-05, "loss": 0.0, "step": 44160 }, { "epoch": 41.2, "learning_rate": 4.485027985074627e-05, "loss": 0.0, "step": 44164 }, { "epoch": 41.2, "learning_rate": 4.484981343283582e-05, "loss": 0.0, "step": 44168 }, { "epoch": 41.21, "learning_rate": 4.484934701492538e-05, "loss": 0.0, "step": 44172 }, { "epoch": 41.21, "learning_rate": 4.4848880597014926e-05, "loss": 0.0, "step": 44176 }, { "epoch": 41.21, "learning_rate": 4.484841417910448e-05, "loss": 0.0, "step": 44180 }, { "epoch": 41.22, "learning_rate": 4.484794776119403e-05, "loss": 0.0023, "step": 44184 }, { "epoch": 41.22, "learning_rate": 4.4847481343283583e-05, "loss": 0.0003, "step": 44188 }, { "epoch": 41.22, "learning_rate": 4.484701492537314e-05, "loss": 0.0, "step": 44192 }, { "epoch": 41.23, "learning_rate": 4.4846548507462687e-05, "loss": 0.0001, "step": 44196 }, { "epoch": 41.23, "learning_rate": 4.484608208955224e-05, "loss": 0.0001, "step": 44200 }, { "epoch": 41.24, "learning_rate": 4.4845615671641796e-05, "loss": 0.0002, "step": 44204 }, { "epoch": 41.24, "learning_rate": 4.4845149253731344e-05, "loss": 0.0, "step": 44208 }, { "epoch": 41.24, "learning_rate": 4.48446828358209e-05, "loss": 0.0, "step": 44212 }, { "epoch": 41.25, "learning_rate": 4.484421641791045e-05, "loss": 0.0, "step": 44216 }, { "epoch": 41.25, "learning_rate": 4.484375e-05, "loss": 0.0, "step": 44220 }, { "epoch": 41.25, "learning_rate": 4.484328358208956e-05, "loss": 0.0004, "step": 44224 }, { "epoch": 41.26, "learning_rate": 4.4842817164179105e-05, "loss": 0.0, "step": 44228 }, { "epoch": 41.26, "learning_rate": 4.484235074626866e-05, "loss": 0.0015, "step": 44232 }, { "epoch": 41.26, "learning_rate": 4.4841884328358215e-05, "loss": 0.0, "step": 44236 }, { "epoch": 41.27, "learning_rate": 4.4841417910447763e-05, "loss": 0.0005, "step": 44240 }, { "epoch": 41.27, "learning_rate": 4.484095149253731e-05, "loss": 0.0, "step": 44244 }, { "epoch": 41.28, "learning_rate": 4.484048507462687e-05, "loss": 0.0, "step": 44248 }, { "epoch": 41.28, "learning_rate": 4.484001865671642e-05, "loss": 0.0001, "step": 44252 }, { "epoch": 41.28, "learning_rate": 4.483955223880597e-05, "loss": 0.0, "step": 44256 }, { "epoch": 41.29, "learning_rate": 4.4839085820895524e-05, "loss": 0.0001, "step": 44260 }, { "epoch": 41.29, "learning_rate": 4.483861940298508e-05, "loss": 0.0003, "step": 44264 }, { "epoch": 41.29, "learning_rate": 4.483815298507463e-05, "loss": 0.0, "step": 44268 }, { "epoch": 41.3, "learning_rate": 4.483768656716418e-05, "loss": 0.0, "step": 44272 }, { "epoch": 41.3, "learning_rate": 4.483722014925373e-05, "loss": 0.0001, "step": 44276 }, { "epoch": 41.31, "learning_rate": 4.4836753731343285e-05, "loss": 0.0006, "step": 44280 }, { "epoch": 41.31, "learning_rate": 4.483628731343284e-05, "loss": 0.0, "step": 44284 }, { "epoch": 41.31, "learning_rate": 4.483582089552239e-05, "loss": 0.0, "step": 44288 }, { "epoch": 41.32, "learning_rate": 4.483535447761194e-05, "loss": 0.0001, "step": 44292 }, { "epoch": 41.32, "learning_rate": 4.48348880597015e-05, "loss": 0.0001, "step": 44296 }, { "epoch": 41.32, "learning_rate": 4.4834421641791046e-05, "loss": 0.0, "step": 44300 }, { "epoch": 41.33, "learning_rate": 4.4833955223880594e-05, "loss": 0.002, "step": 44304 }, { "epoch": 41.33, "learning_rate": 4.4833488805970156e-05, "loss": 0.0003, "step": 44308 }, { "epoch": 41.34, "learning_rate": 4.4833022388059704e-05, "loss": 0.0, "step": 44312 }, { "epoch": 41.34, "learning_rate": 4.483255597014925e-05, "loss": 0.0, "step": 44316 }, { "epoch": 41.34, "learning_rate": 4.483208955223881e-05, "loss": 0.0039, "step": 44320 }, { "epoch": 41.35, "learning_rate": 4.483162313432836e-05, "loss": 0.0029, "step": 44324 }, { "epoch": 41.35, "learning_rate": 4.483115671641791e-05, "loss": 0.0016, "step": 44328 }, { "epoch": 41.35, "learning_rate": 4.4830690298507465e-05, "loss": 0.0003, "step": 44332 }, { "epoch": 41.36, "learning_rate": 4.4830223880597013e-05, "loss": 0.0001, "step": 44336 }, { "epoch": 41.36, "learning_rate": 4.482975746268657e-05, "loss": 0.0, "step": 44340 }, { "epoch": 41.37, "learning_rate": 4.482929104477612e-05, "loss": 0.0001, "step": 44344 }, { "epoch": 41.37, "learning_rate": 4.482882462686567e-05, "loss": 0.0034, "step": 44348 }, { "epoch": 41.37, "learning_rate": 4.4828358208955226e-05, "loss": 0.0032, "step": 44352 }, { "epoch": 41.38, "learning_rate": 4.482789179104478e-05, "loss": 0.0004, "step": 44356 }, { "epoch": 41.38, "learning_rate": 4.482742537313433e-05, "loss": 0.0, "step": 44360 }, { "epoch": 41.38, "learning_rate": 4.482695895522388e-05, "loss": 0.0013, "step": 44364 }, { "epoch": 41.39, "learning_rate": 4.482649253731344e-05, "loss": 0.0055, "step": 44368 }, { "epoch": 41.39, "learning_rate": 4.482602611940299e-05, "loss": 0.0, "step": 44372 }, { "epoch": 41.4, "learning_rate": 4.482555970149254e-05, "loss": 0.0, "step": 44376 }, { "epoch": 41.4, "learning_rate": 4.482509328358209e-05, "loss": 0.0028, "step": 44380 }, { "epoch": 41.4, "learning_rate": 4.4824626865671645e-05, "loss": 0.0, "step": 44384 }, { "epoch": 41.41, "learning_rate": 4.48241604477612e-05, "loss": 0.0, "step": 44388 }, { "epoch": 41.41, "learning_rate": 4.482369402985075e-05, "loss": 0.0001, "step": 44392 }, { "epoch": 41.41, "learning_rate": 4.4823227611940296e-05, "loss": 0.0, "step": 44396 }, { "epoch": 41.42, "learning_rate": 4.482276119402986e-05, "loss": 0.0, "step": 44400 }, { "epoch": 41.42, "learning_rate": 4.4822294776119406e-05, "loss": 0.0, "step": 44404 }, { "epoch": 41.43, "learning_rate": 4.4821828358208954e-05, "loss": 0.0, "step": 44408 }, { "epoch": 41.43, "learning_rate": 4.482136194029851e-05, "loss": 0.0004, "step": 44412 }, { "epoch": 41.43, "learning_rate": 4.4820895522388064e-05, "loss": 0.0, "step": 44416 }, { "epoch": 41.44, "learning_rate": 4.482042910447761e-05, "loss": 0.0004, "step": 44420 }, { "epoch": 41.44, "learning_rate": 4.481996268656717e-05, "loss": 0.0, "step": 44424 }, { "epoch": 41.44, "learning_rate": 4.481949626865672e-05, "loss": 0.0, "step": 44428 }, { "epoch": 41.45, "learning_rate": 4.481902985074627e-05, "loss": 0.0029, "step": 44432 }, { "epoch": 41.45, "learning_rate": 4.4818563432835825e-05, "loss": 0.0, "step": 44436 }, { "epoch": 41.46, "learning_rate": 4.481809701492537e-05, "loss": 0.0001, "step": 44440 }, { "epoch": 41.46, "learning_rate": 4.481763059701493e-05, "loss": 0.0004, "step": 44444 }, { "epoch": 41.46, "learning_rate": 4.481716417910448e-05, "loss": 0.0, "step": 44448 }, { "epoch": 41.47, "learning_rate": 4.481669776119403e-05, "loss": 0.0, "step": 44452 }, { "epoch": 41.47, "learning_rate": 4.481623134328358e-05, "loss": 0.0001, "step": 44456 }, { "epoch": 41.47, "learning_rate": 4.481576492537314e-05, "loss": 0.0001, "step": 44460 }, { "epoch": 41.48, "learning_rate": 4.481529850746269e-05, "loss": 0.0, "step": 44464 }, { "epoch": 41.48, "learning_rate": 4.481483208955224e-05, "loss": 0.0, "step": 44468 }, { "epoch": 41.49, "learning_rate": 4.481436567164179e-05, "loss": 0.0009, "step": 44472 }, { "epoch": 41.49, "learning_rate": 4.481389925373135e-05, "loss": 0.0001, "step": 44476 }, { "epoch": 41.49, "learning_rate": 4.4813432835820895e-05, "loss": 0.0, "step": 44480 }, { "epoch": 41.5, "learning_rate": 4.481296641791045e-05, "loss": 0.0, "step": 44484 }, { "epoch": 41.5, "learning_rate": 4.4812500000000005e-05, "loss": 0.0, "step": 44488 }, { "epoch": 41.5, "learning_rate": 4.481203358208955e-05, "loss": 0.0001, "step": 44492 }, { "epoch": 41.51, "learning_rate": 4.481156716417911e-05, "loss": 0.0027, "step": 44496 }, { "epoch": 41.51, "learning_rate": 4.4811100746268656e-05, "loss": 0.0001, "step": 44500 }, { "epoch": 41.51, "eval_exact_match": 0.7350096711798839, "eval_exec": 0.7543520309477756, "eval_loss": 0.42939507961273193, "eval_runtime": 1157.4066, "eval_samples_per_second": 0.893, "step": 44500 }, { "epoch": 41.51, "learning_rate": 4.481063432835821e-05, "loss": 0.0, "step": 44504 }, { "epoch": 41.52, "learning_rate": 4.4810167910447766e-05, "loss": 0.0, "step": 44508 }, { "epoch": 41.52, "learning_rate": 4.4809701492537314e-05, "loss": 0.0, "step": 44512 }, { "epoch": 41.53, "learning_rate": 4.480923507462686e-05, "loss": 0.0003, "step": 44516 }, { "epoch": 41.53, "learning_rate": 4.4808768656716424e-05, "loss": 0.0, "step": 44520 }, { "epoch": 41.53, "learning_rate": 4.480830223880597e-05, "loss": 0.0011, "step": 44524 }, { "epoch": 41.54, "learning_rate": 4.480783582089552e-05, "loss": 0.0003, "step": 44528 }, { "epoch": 41.54, "learning_rate": 4.4807369402985075e-05, "loss": 0.0, "step": 44532 }, { "epoch": 41.54, "learning_rate": 4.480690298507463e-05, "loss": 0.0002, "step": 44536 }, { "epoch": 41.55, "learning_rate": 4.4806436567164185e-05, "loss": 0.0012, "step": 44540 }, { "epoch": 41.55, "learning_rate": 4.480597014925373e-05, "loss": 0.0001, "step": 44544 }, { "epoch": 41.56, "learning_rate": 4.480550373134328e-05, "loss": 0.0, "step": 44548 }, { "epoch": 41.56, "learning_rate": 4.480503731343284e-05, "loss": 0.0, "step": 44552 }, { "epoch": 41.56, "learning_rate": 4.480457089552239e-05, "loss": 0.0, "step": 44556 }, { "epoch": 41.57, "learning_rate": 4.480410447761194e-05, "loss": 0.0, "step": 44560 }, { "epoch": 41.57, "learning_rate": 4.4803638059701494e-05, "loss": 0.0001, "step": 44564 }, { "epoch": 41.57, "learning_rate": 4.480317164179105e-05, "loss": 0.0, "step": 44568 }, { "epoch": 41.58, "learning_rate": 4.48027052238806e-05, "loss": 0.0, "step": 44572 }, { "epoch": 41.58, "learning_rate": 4.480223880597015e-05, "loss": 0.0001, "step": 44576 }, { "epoch": 41.59, "learning_rate": 4.480177238805971e-05, "loss": 0.0003, "step": 44580 }, { "epoch": 41.59, "learning_rate": 4.4801305970149255e-05, "loss": 0.0, "step": 44584 }, { "epoch": 41.59, "learning_rate": 4.480083955223881e-05, "loss": 0.0013, "step": 44588 }, { "epoch": 41.6, "learning_rate": 4.480037313432836e-05, "loss": 0.0021, "step": 44592 }, { "epoch": 41.6, "learning_rate": 4.479990671641791e-05, "loss": 0.0, "step": 44596 }, { "epoch": 41.6, "learning_rate": 4.479944029850747e-05, "loss": 0.0001, "step": 44600 }, { "epoch": 41.61, "learning_rate": 4.4798973880597016e-05, "loss": 0.0043, "step": 44604 }, { "epoch": 41.61, "learning_rate": 4.4798507462686564e-05, "loss": 0.0014, "step": 44608 }, { "epoch": 41.62, "learning_rate": 4.4798041044776126e-05, "loss": 0.0001, "step": 44612 }, { "epoch": 41.62, "learning_rate": 4.4797574626865674e-05, "loss": 0.0002, "step": 44616 }, { "epoch": 41.62, "learning_rate": 4.479710820895522e-05, "loss": 0.0001, "step": 44620 }, { "epoch": 41.63, "learning_rate": 4.479664179104478e-05, "loss": 0.0001, "step": 44624 }, { "epoch": 41.63, "learning_rate": 4.479617537313433e-05, "loss": 0.0007, "step": 44628 }, { "epoch": 41.63, "learning_rate": 4.479570895522388e-05, "loss": 0.0006, "step": 44632 }, { "epoch": 41.64, "learning_rate": 4.4795242537313435e-05, "loss": 0.0002, "step": 44636 }, { "epoch": 41.64, "learning_rate": 4.479477611940299e-05, "loss": 0.0001, "step": 44640 }, { "epoch": 41.65, "learning_rate": 4.479430970149254e-05, "loss": 0.0, "step": 44644 }, { "epoch": 41.65, "learning_rate": 4.479384328358209e-05, "loss": 0.0001, "step": 44648 }, { "epoch": 41.65, "learning_rate": 4.479337686567164e-05, "loss": 0.0, "step": 44652 }, { "epoch": 41.66, "learning_rate": 4.4792910447761196e-05, "loss": 0.0, "step": 44656 }, { "epoch": 41.66, "learning_rate": 4.479244402985075e-05, "loss": 0.0016, "step": 44660 }, { "epoch": 41.66, "learning_rate": 4.47919776119403e-05, "loss": 0.0019, "step": 44664 }, { "epoch": 41.67, "learning_rate": 4.479151119402985e-05, "loss": 0.0009, "step": 44668 }, { "epoch": 41.67, "learning_rate": 4.479104477611941e-05, "loss": 0.0006, "step": 44672 }, { "epoch": 41.68, "learning_rate": 4.479057835820896e-05, "loss": 0.0, "step": 44676 }, { "epoch": 41.68, "learning_rate": 4.4790111940298505e-05, "loss": 0.0, "step": 44680 }, { "epoch": 41.68, "learning_rate": 4.478964552238806e-05, "loss": 0.0001, "step": 44684 }, { "epoch": 41.69, "learning_rate": 4.4789179104477615e-05, "loss": 0.0008, "step": 44688 }, { "epoch": 41.69, "learning_rate": 4.478871268656716e-05, "loss": 0.0019, "step": 44692 }, { "epoch": 41.69, "learning_rate": 4.478824626865672e-05, "loss": 0.0001, "step": 44696 }, { "epoch": 41.7, "learning_rate": 4.478777985074627e-05, "loss": 0.0, "step": 44700 }, { "epoch": 41.7, "learning_rate": 4.478731343283583e-05, "loss": 0.0, "step": 44704 }, { "epoch": 41.71, "learning_rate": 4.4786847014925376e-05, "loss": 0.0001, "step": 44708 }, { "epoch": 41.71, "learning_rate": 4.4786380597014924e-05, "loss": 0.0004, "step": 44712 }, { "epoch": 41.71, "learning_rate": 4.478591417910448e-05, "loss": 0.0005, "step": 44716 }, { "epoch": 41.72, "learning_rate": 4.4785447761194034e-05, "loss": 0.0002, "step": 44720 }, { "epoch": 41.72, "learning_rate": 4.478498134328358e-05, "loss": 0.0002, "step": 44724 }, { "epoch": 41.72, "learning_rate": 4.478451492537314e-05, "loss": 0.0001, "step": 44728 }, { "epoch": 41.73, "learning_rate": 4.478404850746269e-05, "loss": 0.0, "step": 44732 }, { "epoch": 41.73, "learning_rate": 4.478358208955224e-05, "loss": 0.0019, "step": 44736 }, { "epoch": 41.73, "learning_rate": 4.4783115671641795e-05, "loss": 0.0001, "step": 44740 }, { "epoch": 41.74, "learning_rate": 4.478264925373134e-05, "loss": 0.0006, "step": 44744 }, { "epoch": 41.74, "learning_rate": 4.47821828358209e-05, "loss": 0.0004, "step": 44748 }, { "epoch": 41.75, "learning_rate": 4.478171641791045e-05, "loss": 0.0001, "step": 44752 }, { "epoch": 41.75, "learning_rate": 4.478125e-05, "loss": 0.0, "step": 44756 }, { "epoch": 41.75, "learning_rate": 4.4780783582089556e-05, "loss": 0.0001, "step": 44760 }, { "epoch": 41.76, "learning_rate": 4.478031716417911e-05, "loss": 0.0, "step": 44764 }, { "epoch": 41.76, "learning_rate": 4.477985074626866e-05, "loss": 0.0, "step": 44768 }, { "epoch": 41.76, "learning_rate": 4.477938432835821e-05, "loss": 0.0, "step": 44772 }, { "epoch": 41.77, "learning_rate": 4.477891791044776e-05, "loss": 0.002, "step": 44776 }, { "epoch": 41.77, "learning_rate": 4.477845149253732e-05, "loss": 0.0001, "step": 44780 }, { "epoch": 41.78, "learning_rate": 4.4777985074626865e-05, "loss": 0.0, "step": 44784 }, { "epoch": 41.78, "learning_rate": 4.477751865671642e-05, "loss": 0.0, "step": 44788 }, { "epoch": 41.78, "learning_rate": 4.4777052238805975e-05, "loss": 0.0, "step": 44792 }, { "epoch": 41.79, "learning_rate": 4.477658582089552e-05, "loss": 0.0029, "step": 44796 }, { "epoch": 41.79, "learning_rate": 4.477611940298508e-05, "loss": 0.0, "step": 44800 }, { "epoch": 41.79, "learning_rate": 4.4775652985074626e-05, "loss": 0.0002, "step": 44804 }, { "epoch": 41.8, "learning_rate": 4.477518656716418e-05, "loss": 0.0, "step": 44808 }, { "epoch": 41.8, "learning_rate": 4.4774720149253736e-05, "loss": 0.0001, "step": 44812 }, { "epoch": 41.81, "learning_rate": 4.4774253731343284e-05, "loss": 0.0, "step": 44816 }, { "epoch": 41.81, "learning_rate": 4.477378731343284e-05, "loss": 0.0, "step": 44820 }, { "epoch": 41.81, "learning_rate": 4.4773320895522394e-05, "loss": 0.0006, "step": 44824 }, { "epoch": 41.82, "learning_rate": 4.477285447761194e-05, "loss": 0.0006, "step": 44828 }, { "epoch": 41.82, "learning_rate": 4.477238805970149e-05, "loss": 0.0013, "step": 44832 }, { "epoch": 41.82, "learning_rate": 4.4771921641791045e-05, "loss": 0.0014, "step": 44836 }, { "epoch": 41.83, "learning_rate": 4.47714552238806e-05, "loss": 0.0001, "step": 44840 }, { "epoch": 41.83, "learning_rate": 4.477098880597015e-05, "loss": 0.0001, "step": 44844 }, { "epoch": 41.84, "learning_rate": 4.47705223880597e-05, "loss": 0.0, "step": 44848 }, { "epoch": 41.84, "learning_rate": 4.477005597014926e-05, "loss": 0.0001, "step": 44852 }, { "epoch": 41.84, "learning_rate": 4.4769589552238806e-05, "loss": 0.003, "step": 44856 }, { "epoch": 41.85, "learning_rate": 4.476912313432836e-05, "loss": 0.0001, "step": 44860 }, { "epoch": 41.85, "learning_rate": 4.476865671641791e-05, "loss": 0.0, "step": 44864 }, { "epoch": 41.85, "learning_rate": 4.476819029850747e-05, "loss": 0.0001, "step": 44868 }, { "epoch": 41.86, "learning_rate": 4.476772388059702e-05, "loss": 0.0054, "step": 44872 }, { "epoch": 41.86, "learning_rate": 4.476725746268657e-05, "loss": 0.0003, "step": 44876 }, { "epoch": 41.87, "learning_rate": 4.476679104477612e-05, "loss": 0.0001, "step": 44880 }, { "epoch": 41.87, "learning_rate": 4.476632462686568e-05, "loss": 0.0001, "step": 44884 }, { "epoch": 41.87, "learning_rate": 4.4765858208955225e-05, "loss": 0.0, "step": 44888 }, { "epoch": 41.88, "learning_rate": 4.476539179104478e-05, "loss": 0.0001, "step": 44892 }, { "epoch": 41.88, "learning_rate": 4.476492537313433e-05, "loss": 0.0, "step": 44896 }, { "epoch": 41.88, "learning_rate": 4.476445895522388e-05, "loss": 0.0, "step": 44900 }, { "epoch": 41.89, "learning_rate": 4.476399253731344e-05, "loss": 0.0001, "step": 44904 }, { "epoch": 41.89, "learning_rate": 4.4763526119402986e-05, "loss": 0.0, "step": 44908 }, { "epoch": 41.9, "learning_rate": 4.476305970149254e-05, "loss": 0.0026, "step": 44912 }, { "epoch": 41.9, "learning_rate": 4.4762593283582096e-05, "loss": 0.0, "step": 44916 }, { "epoch": 41.9, "learning_rate": 4.4762126865671644e-05, "loss": 0.0, "step": 44920 }, { "epoch": 41.91, "learning_rate": 4.476166044776119e-05, "loss": 0.0001, "step": 44924 }, { "epoch": 41.91, "learning_rate": 4.4761194029850754e-05, "loss": 0.0001, "step": 44928 }, { "epoch": 41.91, "learning_rate": 4.47607276119403e-05, "loss": 0.0, "step": 44932 }, { "epoch": 41.92, "learning_rate": 4.476026119402985e-05, "loss": 0.0, "step": 44936 }, { "epoch": 41.92, "learning_rate": 4.4759794776119405e-05, "loss": 0.0032, "step": 44940 }, { "epoch": 41.93, "learning_rate": 4.475932835820896e-05, "loss": 0.001, "step": 44944 }, { "epoch": 41.93, "learning_rate": 4.475886194029851e-05, "loss": 0.0, "step": 44948 }, { "epoch": 41.93, "learning_rate": 4.475839552238806e-05, "loss": 0.0, "step": 44952 }, { "epoch": 41.94, "learning_rate": 4.475792910447761e-05, "loss": 0.0, "step": 44956 }, { "epoch": 41.94, "learning_rate": 4.4757462686567166e-05, "loss": 0.0, "step": 44960 }, { "epoch": 41.94, "learning_rate": 4.475699626865672e-05, "loss": 0.0045, "step": 44964 }, { "epoch": 41.95, "learning_rate": 4.475652985074627e-05, "loss": 0.0001, "step": 44968 }, { "epoch": 41.95, "learning_rate": 4.4756063432835824e-05, "loss": 0.0, "step": 44972 }, { "epoch": 41.96, "learning_rate": 4.475559701492538e-05, "loss": 0.0, "step": 44976 }, { "epoch": 41.96, "learning_rate": 4.475513059701493e-05, "loss": 0.0002, "step": 44980 }, { "epoch": 41.96, "learning_rate": 4.4754664179104475e-05, "loss": 0.0, "step": 44984 }, { "epoch": 41.97, "learning_rate": 4.4754197761194036e-05, "loss": 0.0015, "step": 44988 }, { "epoch": 41.97, "learning_rate": 4.4753731343283585e-05, "loss": 0.0, "step": 44992 }, { "epoch": 41.97, "learning_rate": 4.475326492537313e-05, "loss": 0.0, "step": 44996 }, { "epoch": 41.98, "learning_rate": 4.475279850746269e-05, "loss": 0.0, "step": 45000 }, { "epoch": 41.98, "eval_exact_match": 0.7340425531914894, "eval_exec": 0.758220502901354, "eval_loss": 0.4176974892616272, "eval_runtime": 1170.0858, "eval_samples_per_second": 0.884, "step": 45000 }, { "epoch": 41.98, "learning_rate": 4.475233208955224e-05, "loss": 0.0081, "step": 45004 }, { "epoch": 41.98, "learning_rate": 4.475186567164179e-05, "loss": 0.0001, "step": 45008 }, { "epoch": 41.99, "learning_rate": 4.4751399253731346e-05, "loss": 0.0001, "step": 45012 }, { "epoch": 41.99, "learning_rate": 4.4750932835820894e-05, "loss": 0.0002, "step": 45016 }, { "epoch": 42.0, "learning_rate": 4.475046641791045e-05, "loss": 0.0002, "step": 45020 }, { "epoch": 42.0, "learning_rate": 4.4750000000000004e-05, "loss": 0.0002, "step": 45024 }, { "epoch": 42.0, "learning_rate": 4.474953358208955e-05, "loss": 0.0011, "step": 45028 }, { "epoch": 42.01, "learning_rate": 4.4749067164179107e-05, "loss": 0.0001, "step": 45032 }, { "epoch": 42.01, "learning_rate": 4.474860074626866e-05, "loss": 0.0, "step": 45036 }, { "epoch": 42.01, "learning_rate": 4.474813432835821e-05, "loss": 0.0003, "step": 45040 }, { "epoch": 42.02, "learning_rate": 4.4747667910447765e-05, "loss": 0.0004, "step": 45044 }, { "epoch": 42.02, "learning_rate": 4.474720149253732e-05, "loss": 0.0, "step": 45048 }, { "epoch": 42.03, "learning_rate": 4.474673507462687e-05, "loss": 0.0, "step": 45052 }, { "epoch": 42.03, "learning_rate": 4.474626865671642e-05, "loss": 0.0, "step": 45056 }, { "epoch": 42.03, "learning_rate": 4.474580223880597e-05, "loss": 0.0, "step": 45060 }, { "epoch": 42.04, "learning_rate": 4.4745335820895526e-05, "loss": 0.0, "step": 45064 }, { "epoch": 42.04, "learning_rate": 4.474486940298508e-05, "loss": 0.0001, "step": 45068 }, { "epoch": 42.04, "learning_rate": 4.474440298507463e-05, "loss": 0.002, "step": 45072 }, { "epoch": 42.05, "learning_rate": 4.474393656716418e-05, "loss": 0.0003, "step": 45076 }, { "epoch": 42.05, "learning_rate": 4.474347014925374e-05, "loss": 0.0001, "step": 45080 }, { "epoch": 42.06, "learning_rate": 4.4743003731343286e-05, "loss": 0.0003, "step": 45084 }, { "epoch": 42.06, "learning_rate": 4.4742537313432835e-05, "loss": 0.0027, "step": 45088 }, { "epoch": 42.06, "learning_rate": 4.474207089552239e-05, "loss": 0.0005, "step": 45092 }, { "epoch": 42.07, "learning_rate": 4.4741604477611944e-05, "loss": 0.0, "step": 45096 }, { "epoch": 42.07, "learning_rate": 4.474113805970149e-05, "loss": 0.0014, "step": 45100 }, { "epoch": 42.07, "learning_rate": 4.474067164179105e-05, "loss": 0.0001, "step": 45104 }, { "epoch": 42.08, "learning_rate": 4.47402052238806e-05, "loss": 0.0013, "step": 45108 }, { "epoch": 42.08, "learning_rate": 4.473973880597015e-05, "loss": 0.0, "step": 45112 }, { "epoch": 42.09, "learning_rate": 4.4739272388059705e-05, "loss": 0.0, "step": 45116 }, { "epoch": 42.09, "learning_rate": 4.4738805970149254e-05, "loss": 0.0001, "step": 45120 }, { "epoch": 42.09, "learning_rate": 4.473833955223881e-05, "loss": 0.0002, "step": 45124 }, { "epoch": 42.1, "learning_rate": 4.473787313432836e-05, "loss": 0.0008, "step": 45128 }, { "epoch": 42.1, "learning_rate": 4.473740671641791e-05, "loss": 0.0001, "step": 45132 }, { "epoch": 42.1, "learning_rate": 4.473694029850746e-05, "loss": 0.0, "step": 45136 }, { "epoch": 42.11, "learning_rate": 4.473647388059702e-05, "loss": 0.0, "step": 45140 }, { "epoch": 42.11, "learning_rate": 4.473600746268657e-05, "loss": 0.0014, "step": 45144 }, { "epoch": 42.12, "learning_rate": 4.473554104477612e-05, "loss": 0.0001, "step": 45148 }, { "epoch": 42.12, "learning_rate": 4.473507462686567e-05, "loss": 0.0015, "step": 45152 }, { "epoch": 42.12, "learning_rate": 4.473460820895523e-05, "loss": 0.0, "step": 45156 }, { "epoch": 42.13, "learning_rate": 4.4734141791044776e-05, "loss": 0.0055, "step": 45160 }, { "epoch": 42.13, "learning_rate": 4.473367537313433e-05, "loss": 0.0001, "step": 45164 }, { "epoch": 42.13, "learning_rate": 4.4733208955223885e-05, "loss": 0.0002, "step": 45168 }, { "epoch": 42.14, "learning_rate": 4.4732742537313433e-05, "loss": 0.0001, "step": 45172 }, { "epoch": 42.14, "learning_rate": 4.473227611940299e-05, "loss": 0.0001, "step": 45176 }, { "epoch": 42.15, "learning_rate": 4.4731809701492537e-05, "loss": 0.0, "step": 45180 }, { "epoch": 42.15, "learning_rate": 4.473134328358209e-05, "loss": 0.0, "step": 45184 }, { "epoch": 42.15, "learning_rate": 4.4730876865671646e-05, "loss": 0.0, "step": 45188 }, { "epoch": 42.16, "learning_rate": 4.4730410447761194e-05, "loss": 0.0003, "step": 45192 }, { "epoch": 42.16, "learning_rate": 4.472994402985075e-05, "loss": 0.0033, "step": 45196 }, { "epoch": 42.16, "learning_rate": 4.4729477611940304e-05, "loss": 0.0011, "step": 45200 }, { "epoch": 42.17, "learning_rate": 4.472901119402985e-05, "loss": 0.0, "step": 45204 }, { "epoch": 42.17, "learning_rate": 4.472854477611941e-05, "loss": 0.0017, "step": 45208 }, { "epoch": 42.18, "learning_rate": 4.4728078358208955e-05, "loss": 0.0001, "step": 45212 }, { "epoch": 42.18, "learning_rate": 4.472761194029851e-05, "loss": 0.0001, "step": 45216 }, { "epoch": 42.18, "learning_rate": 4.4727145522388065e-05, "loss": 0.0, "step": 45220 }, { "epoch": 42.19, "learning_rate": 4.4726679104477613e-05, "loss": 0.0, "step": 45224 }, { "epoch": 42.19, "learning_rate": 4.472621268656716e-05, "loss": 0.0, "step": 45228 }, { "epoch": 42.19, "learning_rate": 4.472574626865672e-05, "loss": 0.0005, "step": 45232 }, { "epoch": 42.2, "learning_rate": 4.472527985074627e-05, "loss": 0.0001, "step": 45236 }, { "epoch": 42.2, "learning_rate": 4.472481343283582e-05, "loss": 0.0001, "step": 45240 }, { "epoch": 42.21, "learning_rate": 4.4724347014925374e-05, "loss": 0.001, "step": 45244 }, { "epoch": 42.21, "learning_rate": 4.472388059701493e-05, "loss": 0.0001, "step": 45248 }, { "epoch": 42.21, "learning_rate": 4.472341417910448e-05, "loss": 0.0, "step": 45252 }, { "epoch": 42.22, "learning_rate": 4.472294776119403e-05, "loss": 0.0018, "step": 45256 }, { "epoch": 42.22, "learning_rate": 4.472248134328359e-05, "loss": 0.0006, "step": 45260 }, { "epoch": 42.22, "learning_rate": 4.4722014925373135e-05, "loss": 0.0, "step": 45264 }, { "epoch": 42.23, "learning_rate": 4.472154850746269e-05, "loss": 0.0001, "step": 45268 }, { "epoch": 42.23, "learning_rate": 4.472108208955224e-05, "loss": 0.0004, "step": 45272 }, { "epoch": 42.24, "learning_rate": 4.472061567164179e-05, "loss": 0.0028, "step": 45276 }, { "epoch": 42.24, "learning_rate": 4.472014925373135e-05, "loss": 0.0, "step": 45280 }, { "epoch": 42.24, "learning_rate": 4.4719682835820896e-05, "loss": 0.0001, "step": 45284 }, { "epoch": 42.25, "learning_rate": 4.4719216417910444e-05, "loss": 0.0, "step": 45288 }, { "epoch": 42.25, "learning_rate": 4.4718750000000006e-05, "loss": 0.0003, "step": 45292 }, { "epoch": 42.25, "learning_rate": 4.4718283582089554e-05, "loss": 0.0, "step": 45296 }, { "epoch": 42.26, "learning_rate": 4.47178171641791e-05, "loss": 0.004, "step": 45300 }, { "epoch": 42.26, "learning_rate": 4.471735074626866e-05, "loss": 0.0, "step": 45304 }, { "epoch": 42.26, "learning_rate": 4.471688432835821e-05, "loss": 0.0, "step": 45308 }, { "epoch": 42.27, "learning_rate": 4.471641791044776e-05, "loss": 0.0014, "step": 45312 }, { "epoch": 42.27, "learning_rate": 4.4715951492537315e-05, "loss": 0.0, "step": 45316 }, { "epoch": 42.28, "learning_rate": 4.471548507462687e-05, "loss": 0.0, "step": 45320 }, { "epoch": 42.28, "learning_rate": 4.471501865671642e-05, "loss": 0.0001, "step": 45324 }, { "epoch": 42.28, "learning_rate": 4.471455223880597e-05, "loss": 0.0005, "step": 45328 }, { "epoch": 42.29, "learning_rate": 4.471408582089552e-05, "loss": 0.0, "step": 45332 }, { "epoch": 42.29, "learning_rate": 4.4713619402985076e-05, "loss": 0.0002, "step": 45336 }, { "epoch": 42.29, "learning_rate": 4.471315298507463e-05, "loss": 0.0, "step": 45340 }, { "epoch": 42.3, "learning_rate": 4.471268656716418e-05, "loss": 0.0031, "step": 45344 }, { "epoch": 42.3, "learning_rate": 4.4712220149253734e-05, "loss": 0.0, "step": 45348 }, { "epoch": 42.31, "learning_rate": 4.471175373134329e-05, "loss": 0.0, "step": 45352 }, { "epoch": 42.31, "learning_rate": 4.471128731343284e-05, "loss": 0.0001, "step": 45356 }, { "epoch": 42.31, "learning_rate": 4.471082089552239e-05, "loss": 0.0, "step": 45360 }, { "epoch": 42.32, "learning_rate": 4.471035447761194e-05, "loss": 0.0, "step": 45364 }, { "epoch": 42.32, "learning_rate": 4.4709888059701495e-05, "loss": 0.0001, "step": 45368 }, { "epoch": 42.32, "learning_rate": 4.470942164179105e-05, "loss": 0.0006, "step": 45372 }, { "epoch": 42.33, "learning_rate": 4.47089552238806e-05, "loss": 0.0, "step": 45376 }, { "epoch": 42.33, "learning_rate": 4.470848880597015e-05, "loss": 0.0045, "step": 45380 }, { "epoch": 42.34, "learning_rate": 4.470802238805971e-05, "loss": 0.0004, "step": 45384 }, { "epoch": 42.34, "learning_rate": 4.4707555970149256e-05, "loss": 0.0, "step": 45388 }, { "epoch": 42.34, "learning_rate": 4.4707089552238804e-05, "loss": 0.0006, "step": 45392 }, { "epoch": 42.35, "learning_rate": 4.470662313432836e-05, "loss": 0.0, "step": 45396 }, { "epoch": 42.35, "learning_rate": 4.4706156716417914e-05, "loss": 0.0001, "step": 45400 }, { "epoch": 42.35, "learning_rate": 4.470569029850746e-05, "loss": 0.0, "step": 45404 }, { "epoch": 42.36, "learning_rate": 4.470522388059702e-05, "loss": 0.0005, "step": 45408 }, { "epoch": 42.36, "learning_rate": 4.470475746268657e-05, "loss": 0.0001, "step": 45412 }, { "epoch": 42.37, "learning_rate": 4.470429104477612e-05, "loss": 0.0004, "step": 45416 }, { "epoch": 42.37, "learning_rate": 4.4703824626865675e-05, "loss": 0.0001, "step": 45420 }, { "epoch": 42.37, "learning_rate": 4.470335820895522e-05, "loss": 0.0008, "step": 45424 }, { "epoch": 42.38, "learning_rate": 4.470289179104478e-05, "loss": 0.0, "step": 45428 }, { "epoch": 42.38, "learning_rate": 4.470242537313433e-05, "loss": 0.0001, "step": 45432 }, { "epoch": 42.38, "learning_rate": 4.470195895522388e-05, "loss": 0.0001, "step": 45436 }, { "epoch": 42.39, "learning_rate": 4.4701492537313436e-05, "loss": 0.0002, "step": 45440 }, { "epoch": 42.39, "learning_rate": 4.470102611940299e-05, "loss": 0.0, "step": 45444 }, { "epoch": 42.4, "learning_rate": 4.470055970149254e-05, "loss": 0.0, "step": 45448 }, { "epoch": 42.4, "learning_rate": 4.470009328358209e-05, "loss": 0.0, "step": 45452 }, { "epoch": 42.4, "learning_rate": 4.469962686567164e-05, "loss": 0.0, "step": 45456 }, { "epoch": 42.41, "learning_rate": 4.46991604477612e-05, "loss": 0.0, "step": 45460 }, { "epoch": 42.41, "learning_rate": 4.4698694029850745e-05, "loss": 0.0, "step": 45464 }, { "epoch": 42.41, "learning_rate": 4.46982276119403e-05, "loss": 0.0, "step": 45468 }, { "epoch": 42.42, "learning_rate": 4.4697761194029855e-05, "loss": 0.0001, "step": 45472 }, { "epoch": 42.42, "learning_rate": 4.46972947761194e-05, "loss": 0.0001, "step": 45476 }, { "epoch": 42.43, "learning_rate": 4.469682835820896e-05, "loss": 0.0, "step": 45480 }, { "epoch": 42.43, "learning_rate": 4.4696361940298506e-05, "loss": 0.001, "step": 45484 }, { "epoch": 42.43, "learning_rate": 4.469589552238806e-05, "loss": 0.0001, "step": 45488 }, { "epoch": 42.44, "learning_rate": 4.4695429104477616e-05, "loss": 0.0, "step": 45492 }, { "epoch": 42.44, "learning_rate": 4.4694962686567164e-05, "loss": 0.0, "step": 45496 }, { "epoch": 42.44, "learning_rate": 4.469449626865672e-05, "loss": 0.0, "step": 45500 }, { "epoch": 42.44, "eval_exact_match": 0.7485493230174082, "eval_exec": 0.7669245647969052, "eval_loss": 0.42073753476142883, "eval_runtime": 1143.3015, "eval_samples_per_second": 0.904, "step": 45500 }, { "epoch": 42.45, "learning_rate": 4.4694029850746274e-05, "loss": 0.0001, "step": 45504 }, { "epoch": 42.45, "learning_rate": 4.469356343283582e-05, "loss": 0.0002, "step": 45508 }, { "epoch": 42.46, "learning_rate": 4.469309701492538e-05, "loss": 0.0001, "step": 45512 }, { "epoch": 42.46, "learning_rate": 4.4692630597014925e-05, "loss": 0.0, "step": 45516 }, { "epoch": 42.46, "learning_rate": 4.469216417910448e-05, "loss": 0.0001, "step": 45520 }, { "epoch": 42.47, "learning_rate": 4.4691697761194035e-05, "loss": 0.0001, "step": 45524 }, { "epoch": 42.47, "learning_rate": 4.469123134328358e-05, "loss": 0.0003, "step": 45528 }, { "epoch": 42.47, "learning_rate": 4.469076492537314e-05, "loss": 0.0, "step": 45532 }, { "epoch": 42.48, "learning_rate": 4.469029850746269e-05, "loss": 0.001, "step": 45536 }, { "epoch": 42.48, "learning_rate": 4.468983208955224e-05, "loss": 0.0009, "step": 45540 }, { "epoch": 42.49, "learning_rate": 4.468936567164179e-05, "loss": 0.0001, "step": 45544 }, { "epoch": 42.49, "learning_rate": 4.468889925373135e-05, "loss": 0.0, "step": 45548 }, { "epoch": 42.49, "learning_rate": 4.46884328358209e-05, "loss": 0.0004, "step": 45552 }, { "epoch": 42.5, "learning_rate": 4.468796641791045e-05, "loss": 0.0, "step": 45556 }, { "epoch": 42.5, "learning_rate": 4.46875e-05, "loss": 0.0, "step": 45560 }, { "epoch": 42.5, "learning_rate": 4.468703358208956e-05, "loss": 0.0068, "step": 45564 }, { "epoch": 42.51, "learning_rate": 4.4686567164179105e-05, "loss": 0.0, "step": 45568 }, { "epoch": 42.51, "learning_rate": 4.468610074626866e-05, "loss": 0.0, "step": 45572 }, { "epoch": 42.51, "learning_rate": 4.468563432835821e-05, "loss": 0.0, "step": 45576 }, { "epoch": 42.52, "learning_rate": 4.468516791044776e-05, "loss": 0.0, "step": 45580 }, { "epoch": 42.52, "learning_rate": 4.468470149253732e-05, "loss": 0.0, "step": 45584 }, { "epoch": 42.53, "learning_rate": 4.4684235074626866e-05, "loss": 0.0003, "step": 45588 }, { "epoch": 42.53, "learning_rate": 4.468376865671642e-05, "loss": 0.0, "step": 45592 }, { "epoch": 42.53, "learning_rate": 4.4683302238805976e-05, "loss": 0.0002, "step": 45596 }, { "epoch": 42.54, "learning_rate": 4.4682835820895524e-05, "loss": 0.0001, "step": 45600 }, { "epoch": 42.54, "learning_rate": 4.468236940298507e-05, "loss": 0.0002, "step": 45604 }, { "epoch": 42.54, "learning_rate": 4.4681902985074634e-05, "loss": 0.0, "step": 45608 }, { "epoch": 42.55, "learning_rate": 4.468143656716418e-05, "loss": 0.002, "step": 45612 }, { "epoch": 42.55, "learning_rate": 4.468097014925373e-05, "loss": 0.0003, "step": 45616 }, { "epoch": 42.56, "learning_rate": 4.4680503731343285e-05, "loss": 0.0, "step": 45620 }, { "epoch": 42.56, "learning_rate": 4.468003731343284e-05, "loss": 0.0, "step": 45624 }, { "epoch": 42.56, "learning_rate": 4.467957089552239e-05, "loss": 0.0001, "step": 45628 }, { "epoch": 42.57, "learning_rate": 4.467910447761194e-05, "loss": 0.0001, "step": 45632 }, { "epoch": 42.57, "learning_rate": 4.467863805970149e-05, "loss": 0.0, "step": 45636 }, { "epoch": 42.57, "learning_rate": 4.4678171641791046e-05, "loss": 0.0, "step": 45640 }, { "epoch": 42.58, "learning_rate": 4.46777052238806e-05, "loss": 0.0, "step": 45644 }, { "epoch": 42.58, "learning_rate": 4.467723880597015e-05, "loss": 0.0, "step": 45648 }, { "epoch": 42.59, "learning_rate": 4.4676772388059704e-05, "loss": 0.0, "step": 45652 }, { "epoch": 42.59, "learning_rate": 4.467630597014926e-05, "loss": 0.0031, "step": 45656 }, { "epoch": 42.59, "learning_rate": 4.467583955223881e-05, "loss": 0.0, "step": 45660 }, { "epoch": 42.6, "learning_rate": 4.4675373134328355e-05, "loss": 0.0002, "step": 45664 }, { "epoch": 42.6, "learning_rate": 4.467490671641792e-05, "loss": 0.0, "step": 45668 }, { "epoch": 42.6, "learning_rate": 4.4674440298507465e-05, "loss": 0.0006, "step": 45672 }, { "epoch": 42.61, "learning_rate": 4.467397388059702e-05, "loss": 0.0003, "step": 45676 }, { "epoch": 42.61, "learning_rate": 4.467350746268657e-05, "loss": 0.0002, "step": 45680 }, { "epoch": 42.62, "learning_rate": 4.467304104477612e-05, "loss": 0.0, "step": 45684 }, { "epoch": 42.62, "learning_rate": 4.467257462686568e-05, "loss": 0.0, "step": 45688 }, { "epoch": 42.62, "learning_rate": 4.4672108208955226e-05, "loss": 0.0, "step": 45692 }, { "epoch": 42.63, "learning_rate": 4.4671641791044774e-05, "loss": 0.0, "step": 45696 }, { "epoch": 42.63, "learning_rate": 4.4671175373134336e-05, "loss": 0.0002, "step": 45700 }, { "epoch": 42.63, "learning_rate": 4.4670708955223884e-05, "loss": 0.0, "step": 45704 }, { "epoch": 42.64, "learning_rate": 4.467024253731343e-05, "loss": 0.0001, "step": 45708 }, { "epoch": 42.64, "learning_rate": 4.466977611940299e-05, "loss": 0.0, "step": 45712 }, { "epoch": 42.65, "learning_rate": 4.466930970149254e-05, "loss": 0.0, "step": 45716 }, { "epoch": 42.65, "learning_rate": 4.466884328358209e-05, "loss": 0.0002, "step": 45720 }, { "epoch": 42.65, "learning_rate": 4.4668376865671645e-05, "loss": 0.0002, "step": 45724 }, { "epoch": 42.66, "learning_rate": 4.46679104477612e-05, "loss": 0.0031, "step": 45728 }, { "epoch": 42.66, "learning_rate": 4.466744402985075e-05, "loss": 0.0003, "step": 45732 }, { "epoch": 42.66, "learning_rate": 4.46669776119403e-05, "loss": 0.0001, "step": 45736 }, { "epoch": 42.67, "learning_rate": 4.466651119402985e-05, "loss": 0.0, "step": 45740 }, { "epoch": 42.67, "learning_rate": 4.4666044776119406e-05, "loss": 0.0, "step": 45744 }, { "epoch": 42.68, "learning_rate": 4.466557835820896e-05, "loss": 0.0001, "step": 45748 }, { "epoch": 42.68, "learning_rate": 4.466511194029851e-05, "loss": 0.0001, "step": 45752 }, { "epoch": 42.68, "learning_rate": 4.466464552238806e-05, "loss": 0.0, "step": 45756 }, { "epoch": 42.69, "learning_rate": 4.466417910447762e-05, "loss": 0.0, "step": 45760 }, { "epoch": 42.69, "learning_rate": 4.466371268656717e-05, "loss": 0.0004, "step": 45764 }, { "epoch": 42.69, "learning_rate": 4.4663246268656715e-05, "loss": 0.0001, "step": 45768 }, { "epoch": 42.7, "learning_rate": 4.466277985074627e-05, "loss": 0.0004, "step": 45772 }, { "epoch": 42.7, "learning_rate": 4.4662313432835825e-05, "loss": 0.0003, "step": 45776 }, { "epoch": 42.71, "learning_rate": 4.466184701492537e-05, "loss": 0.0, "step": 45780 }, { "epoch": 42.71, "learning_rate": 4.466138059701493e-05, "loss": 0.0, "step": 45784 }, { "epoch": 42.71, "learning_rate": 4.466091417910448e-05, "loss": 0.0001, "step": 45788 }, { "epoch": 42.72, "learning_rate": 4.466044776119403e-05, "loss": 0.0, "step": 45792 }, { "epoch": 42.72, "learning_rate": 4.4659981343283586e-05, "loss": 0.0001, "step": 45796 }, { "epoch": 42.72, "learning_rate": 4.4659514925373134e-05, "loss": 0.0002, "step": 45800 }, { "epoch": 42.73, "learning_rate": 4.465904850746269e-05, "loss": 0.0003, "step": 45804 }, { "epoch": 42.73, "learning_rate": 4.4658582089552244e-05, "loss": 0.0, "step": 45808 }, { "epoch": 42.73, "learning_rate": 4.465811567164179e-05, "loss": 0.0002, "step": 45812 }, { "epoch": 42.74, "learning_rate": 4.465764925373134e-05, "loss": 0.0, "step": 45816 }, { "epoch": 42.74, "learning_rate": 4.46571828358209e-05, "loss": 0.0016, "step": 45820 }, { "epoch": 42.75, "learning_rate": 4.465671641791045e-05, "loss": 0.0005, "step": 45824 }, { "epoch": 42.75, "learning_rate": 4.465625e-05, "loss": 0.0003, "step": 45828 }, { "epoch": 42.75, "learning_rate": 4.465578358208955e-05, "loss": 0.0, "step": 45832 }, { "epoch": 42.76, "learning_rate": 4.465531716417911e-05, "loss": 0.0033, "step": 45836 }, { "epoch": 42.76, "learning_rate": 4.465485074626866e-05, "loss": 0.0, "step": 45840 }, { "epoch": 42.76, "learning_rate": 4.465438432835821e-05, "loss": 0.0001, "step": 45844 }, { "epoch": 42.77, "learning_rate": 4.4653917910447766e-05, "loss": 0.0, "step": 45848 }, { "epoch": 42.77, "learning_rate": 4.465345149253732e-05, "loss": 0.0007, "step": 45852 }, { "epoch": 42.78, "learning_rate": 4.465298507462687e-05, "loss": 0.0, "step": 45856 }, { "epoch": 42.78, "learning_rate": 4.465251865671642e-05, "loss": 0.0, "step": 45860 }, { "epoch": 42.78, "learning_rate": 4.465205223880597e-05, "loss": 0.0, "step": 45864 }, { "epoch": 42.79, "learning_rate": 4.465158582089553e-05, "loss": 0.0, "step": 45868 }, { "epoch": 42.79, "learning_rate": 4.4651119402985075e-05, "loss": 0.0001, "step": 45872 }, { "epoch": 42.79, "learning_rate": 4.465065298507463e-05, "loss": 0.0013, "step": 45876 }, { "epoch": 42.8, "learning_rate": 4.4650186567164185e-05, "loss": 0.0, "step": 45880 }, { "epoch": 42.8, "learning_rate": 4.464972014925373e-05, "loss": 0.0, "step": 45884 }, { "epoch": 42.81, "learning_rate": 4.464925373134329e-05, "loss": 0.0, "step": 45888 }, { "epoch": 42.81, "learning_rate": 4.4648787313432836e-05, "loss": 0.0005, "step": 45892 }, { "epoch": 42.81, "learning_rate": 4.464832089552239e-05, "loss": 0.0, "step": 45896 }, { "epoch": 42.82, "learning_rate": 4.4647854477611946e-05, "loss": 0.0001, "step": 45900 }, { "epoch": 42.82, "learning_rate": 4.4647388059701494e-05, "loss": 0.0, "step": 45904 }, { "epoch": 42.82, "learning_rate": 4.464692164179104e-05, "loss": 0.0105, "step": 45908 }, { "epoch": 42.83, "learning_rate": 4.4646455223880603e-05, "loss": 0.0007, "step": 45912 }, { "epoch": 42.83, "learning_rate": 4.464598880597015e-05, "loss": 0.0002, "step": 45916 }, { "epoch": 42.84, "learning_rate": 4.46455223880597e-05, "loss": 0.0, "step": 45920 }, { "epoch": 42.84, "learning_rate": 4.4645055970149255e-05, "loss": 0.0043, "step": 45924 }, { "epoch": 42.84, "learning_rate": 4.464458955223881e-05, "loss": 0.0, "step": 45928 }, { "epoch": 42.85, "learning_rate": 4.464412313432836e-05, "loss": 0.0002, "step": 45932 }, { "epoch": 42.85, "learning_rate": 4.464365671641791e-05, "loss": 0.0033, "step": 45936 }, { "epoch": 42.85, "learning_rate": 4.464319029850747e-05, "loss": 0.0001, "step": 45940 }, { "epoch": 42.86, "learning_rate": 4.4642723880597016e-05, "loss": 0.0001, "step": 45944 }, { "epoch": 42.86, "learning_rate": 4.464225746268657e-05, "loss": 0.0, "step": 45948 }, { "epoch": 42.87, "learning_rate": 4.464179104477612e-05, "loss": 0.0024, "step": 45952 }, { "epoch": 42.87, "learning_rate": 4.4641324626865674e-05, "loss": 0.0008, "step": 45956 }, { "epoch": 42.87, "learning_rate": 4.464085820895523e-05, "loss": 0.0011, "step": 45960 }, { "epoch": 42.88, "learning_rate": 4.464039179104478e-05, "loss": 0.0015, "step": 45964 }, { "epoch": 42.88, "learning_rate": 4.4639925373134325e-05, "loss": 0.0, "step": 45968 }, { "epoch": 42.88, "learning_rate": 4.4639458955223886e-05, "loss": 0.0001, "step": 45972 }, { "epoch": 42.89, "learning_rate": 4.4638992537313435e-05, "loss": 0.0009, "step": 45976 }, { "epoch": 42.89, "learning_rate": 4.463852611940298e-05, "loss": 0.0063, "step": 45980 }, { "epoch": 42.9, "learning_rate": 4.463805970149254e-05, "loss": 0.0, "step": 45984 }, { "epoch": 42.9, "learning_rate": 4.463759328358209e-05, "loss": 0.0006, "step": 45988 }, { "epoch": 42.9, "learning_rate": 4.463712686567164e-05, "loss": 0.0001, "step": 45992 }, { "epoch": 42.91, "learning_rate": 4.4636660447761196e-05, "loss": 0.0009, "step": 45996 }, { "epoch": 42.91, "learning_rate": 4.463619402985075e-05, "loss": 0.0002, "step": 46000 }, { "epoch": 42.91, "eval_exact_match": 0.7340425531914894, "eval_exec": 0.7678916827852998, "eval_loss": 0.4125958979129791, "eval_runtime": 1108.5153, "eval_samples_per_second": 0.933, "step": 46000 }, { "epoch": 42.91, "learning_rate": 4.4635727611940305e-05, "loss": 0.0128, "step": 46004 }, { "epoch": 42.92, "learning_rate": 4.4635261194029854e-05, "loss": 0.0002, "step": 46008 }, { "epoch": 42.92, "learning_rate": 4.46347947761194e-05, "loss": 0.0003, "step": 46012 }, { "epoch": 42.93, "learning_rate": 4.463432835820896e-05, "loss": 0.0, "step": 46016 }, { "epoch": 42.93, "learning_rate": 4.463386194029851e-05, "loss": 0.0, "step": 46020 }, { "epoch": 42.93, "learning_rate": 4.463339552238806e-05, "loss": 0.0002, "step": 46024 }, { "epoch": 42.94, "learning_rate": 4.4632929104477614e-05, "loss": 0.0, "step": 46028 }, { "epoch": 42.94, "learning_rate": 4.463246268656717e-05, "loss": 0.0, "step": 46032 }, { "epoch": 42.94, "learning_rate": 4.463199626865672e-05, "loss": 0.0019, "step": 46036 }, { "epoch": 42.95, "learning_rate": 4.463152985074627e-05, "loss": 0.0003, "step": 46040 }, { "epoch": 42.95, "learning_rate": 4.463106343283582e-05, "loss": 0.0, "step": 46044 }, { "epoch": 42.96, "learning_rate": 4.4630597014925375e-05, "loss": 0.0, "step": 46048 }, { "epoch": 42.96, "learning_rate": 4.463013059701493e-05, "loss": 0.0001, "step": 46052 }, { "epoch": 42.96, "learning_rate": 4.462966417910448e-05, "loss": 0.0005, "step": 46056 }, { "epoch": 42.97, "learning_rate": 4.4629197761194033e-05, "loss": 0.0, "step": 46060 }, { "epoch": 42.97, "learning_rate": 4.462873134328359e-05, "loss": 0.0011, "step": 46064 }, { "epoch": 42.97, "learning_rate": 4.4628264925373136e-05, "loss": 0.0, "step": 46068 }, { "epoch": 42.98, "learning_rate": 4.4627798507462685e-05, "loss": 0.0, "step": 46072 }, { "epoch": 42.98, "learning_rate": 4.462733208955224e-05, "loss": 0.0008, "step": 46076 }, { "epoch": 42.98, "learning_rate": 4.4626865671641794e-05, "loss": 0.0051, "step": 46080 }, { "epoch": 42.99, "learning_rate": 4.462639925373134e-05, "loss": 0.0002, "step": 46084 }, { "epoch": 42.99, "learning_rate": 4.46259328358209e-05, "loss": 0.0, "step": 46088 }, { "epoch": 43.0, "learning_rate": 4.462546641791045e-05, "loss": 0.0, "step": 46092 }, { "epoch": 43.0, "learning_rate": 4.4625e-05, "loss": 0.0002, "step": 46096 }, { "epoch": 43.0, "learning_rate": 4.4624533582089555e-05, "loss": 0.0, "step": 46100 }, { "epoch": 43.01, "learning_rate": 4.4624067164179104e-05, "loss": 0.007, "step": 46104 }, { "epoch": 43.01, "learning_rate": 4.462360074626866e-05, "loss": 0.0003, "step": 46108 }, { "epoch": 43.01, "learning_rate": 4.462313432835821e-05, "loss": 0.0023, "step": 46112 }, { "epoch": 43.02, "learning_rate": 4.462266791044776e-05, "loss": 0.0001, "step": 46116 }, { "epoch": 43.02, "learning_rate": 4.4622201492537316e-05, "loss": 0.0003, "step": 46120 }, { "epoch": 43.03, "learning_rate": 4.462173507462687e-05, "loss": 0.0001, "step": 46124 }, { "epoch": 43.03, "learning_rate": 4.462126865671642e-05, "loss": 0.0001, "step": 46128 }, { "epoch": 43.03, "learning_rate": 4.462080223880597e-05, "loss": 0.0006, "step": 46132 }, { "epoch": 43.04, "learning_rate": 4.462033582089552e-05, "loss": 0.0, "step": 46136 }, { "epoch": 43.04, "learning_rate": 4.461986940298508e-05, "loss": 0.0001, "step": 46140 }, { "epoch": 43.04, "learning_rate": 4.4619402985074626e-05, "loss": 0.0, "step": 46144 }, { "epoch": 43.05, "learning_rate": 4.461893656716418e-05, "loss": 0.0, "step": 46148 }, { "epoch": 43.05, "learning_rate": 4.4618470149253735e-05, "loss": 0.0001, "step": 46152 }, { "epoch": 43.06, "learning_rate": 4.4618003731343283e-05, "loss": 0.0, "step": 46156 }, { "epoch": 43.06, "learning_rate": 4.461753731343284e-05, "loss": 0.0, "step": 46160 }, { "epoch": 43.06, "learning_rate": 4.4617070895522386e-05, "loss": 0.0, "step": 46164 }, { "epoch": 43.07, "learning_rate": 4.461660447761195e-05, "loss": 0.0, "step": 46168 }, { "epoch": 43.07, "learning_rate": 4.4616138059701496e-05, "loss": 0.0, "step": 46172 }, { "epoch": 43.07, "learning_rate": 4.4615671641791044e-05, "loss": 0.0001, "step": 46176 }, { "epoch": 43.08, "learning_rate": 4.46152052238806e-05, "loss": 0.0003, "step": 46180 }, { "epoch": 43.08, "learning_rate": 4.4614738805970154e-05, "loss": 0.0001, "step": 46184 }, { "epoch": 43.09, "learning_rate": 4.46142723880597e-05, "loss": 0.0, "step": 46188 }, { "epoch": 43.09, "learning_rate": 4.461380597014926e-05, "loss": 0.0, "step": 46192 }, { "epoch": 43.09, "learning_rate": 4.4613339552238805e-05, "loss": 0.0001, "step": 46196 }, { "epoch": 43.1, "learning_rate": 4.461287313432836e-05, "loss": 0.0, "step": 46200 }, { "epoch": 43.1, "learning_rate": 4.4612406716417915e-05, "loss": 0.0, "step": 46204 }, { "epoch": 43.1, "learning_rate": 4.461194029850746e-05, "loss": 0.0002, "step": 46208 }, { "epoch": 43.11, "learning_rate": 4.461147388059702e-05, "loss": 0.0, "step": 46212 }, { "epoch": 43.11, "learning_rate": 4.461100746268657e-05, "loss": 0.0012, "step": 46216 }, { "epoch": 43.12, "learning_rate": 4.461054104477612e-05, "loss": 0.0009, "step": 46220 }, { "epoch": 43.12, "learning_rate": 4.461007462686567e-05, "loss": 0.0003, "step": 46224 }, { "epoch": 43.12, "learning_rate": 4.460960820895523e-05, "loss": 0.0, "step": 46228 }, { "epoch": 43.13, "learning_rate": 4.460914179104478e-05, "loss": 0.0, "step": 46232 }, { "epoch": 43.13, "learning_rate": 4.460867537313433e-05, "loss": 0.0001, "step": 46236 }, { "epoch": 43.13, "learning_rate": 4.460820895522388e-05, "loss": 0.0, "step": 46240 }, { "epoch": 43.14, "learning_rate": 4.460774253731344e-05, "loss": 0.0, "step": 46244 }, { "epoch": 43.14, "learning_rate": 4.4607276119402985e-05, "loss": 0.0, "step": 46248 }, { "epoch": 43.15, "learning_rate": 4.460680970149254e-05, "loss": 0.0, "step": 46252 }, { "epoch": 43.15, "learning_rate": 4.460634328358209e-05, "loss": 0.0, "step": 46256 }, { "epoch": 43.15, "learning_rate": 4.460587686567164e-05, "loss": 0.0, "step": 46260 }, { "epoch": 43.16, "learning_rate": 4.46054104477612e-05, "loss": 0.0001, "step": 46264 }, { "epoch": 43.16, "learning_rate": 4.4604944029850746e-05, "loss": 0.0, "step": 46268 }, { "epoch": 43.16, "learning_rate": 4.46044776119403e-05, "loss": 0.0077, "step": 46272 }, { "epoch": 43.17, "learning_rate": 4.4604011194029856e-05, "loss": 0.0, "step": 46276 }, { "epoch": 43.17, "learning_rate": 4.4603544776119404e-05, "loss": 0.0, "step": 46280 }, { "epoch": 43.18, "learning_rate": 4.460307835820895e-05, "loss": 0.0, "step": 46284 }, { "epoch": 43.18, "learning_rate": 4.4602611940298514e-05, "loss": 0.0001, "step": 46288 }, { "epoch": 43.18, "learning_rate": 4.460214552238806e-05, "loss": 0.0015, "step": 46292 }, { "epoch": 43.19, "learning_rate": 4.460167910447761e-05, "loss": 0.0, "step": 46296 }, { "epoch": 43.19, "learning_rate": 4.4601212686567165e-05, "loss": 0.0, "step": 46300 }, { "epoch": 43.19, "learning_rate": 4.460074626865672e-05, "loss": 0.0015, "step": 46304 }, { "epoch": 43.2, "learning_rate": 4.460027985074627e-05, "loss": 0.0, "step": 46308 }, { "epoch": 43.2, "learning_rate": 4.459981343283582e-05, "loss": 0.0, "step": 46312 }, { "epoch": 43.21, "learning_rate": 4.459934701492537e-05, "loss": 0.0002, "step": 46316 }, { "epoch": 43.21, "learning_rate": 4.4598880597014926e-05, "loss": 0.0, "step": 46320 }, { "epoch": 43.21, "learning_rate": 4.459841417910448e-05, "loss": 0.0046, "step": 46324 }, { "epoch": 43.22, "learning_rate": 4.459794776119403e-05, "loss": 0.0, "step": 46328 }, { "epoch": 43.22, "learning_rate": 4.4597481343283584e-05, "loss": 0.0005, "step": 46332 }, { "epoch": 43.22, "learning_rate": 4.459701492537314e-05, "loss": 0.0001, "step": 46336 }, { "epoch": 43.23, "learning_rate": 4.459654850746269e-05, "loss": 0.0002, "step": 46340 }, { "epoch": 43.23, "learning_rate": 4.459608208955224e-05, "loss": 0.0006, "step": 46344 }, { "epoch": 43.24, "learning_rate": 4.45956156716418e-05, "loss": 0.0006, "step": 46348 }, { "epoch": 43.24, "learning_rate": 4.4595149253731345e-05, "loss": 0.0003, "step": 46352 }, { "epoch": 43.24, "learning_rate": 4.45946828358209e-05, "loss": 0.0, "step": 46356 }, { "epoch": 43.25, "learning_rate": 4.459421641791045e-05, "loss": 0.0003, "step": 46360 }, { "epoch": 43.25, "learning_rate": 4.459375e-05, "loss": 0.0005, "step": 46364 }, { "epoch": 43.25, "learning_rate": 4.459328358208956e-05, "loss": 0.0, "step": 46368 }, { "epoch": 43.26, "learning_rate": 4.4592817164179106e-05, "loss": 0.0005, "step": 46372 }, { "epoch": 43.26, "learning_rate": 4.4592350746268654e-05, "loss": 0.0, "step": 46376 }, { "epoch": 43.26, "learning_rate": 4.4591884328358216e-05, "loss": 0.0, "step": 46380 }, { "epoch": 43.27, "learning_rate": 4.4591417910447764e-05, "loss": 0.0, "step": 46384 }, { "epoch": 43.27, "learning_rate": 4.459095149253731e-05, "loss": 0.0001, "step": 46388 }, { "epoch": 43.28, "learning_rate": 4.459048507462687e-05, "loss": 0.0, "step": 46392 }, { "epoch": 43.28, "learning_rate": 4.459001865671642e-05, "loss": 0.001, "step": 46396 }, { "epoch": 43.28, "learning_rate": 4.458955223880597e-05, "loss": 0.0014, "step": 46400 }, { "epoch": 43.29, "learning_rate": 4.4589085820895525e-05, "loss": 0.0008, "step": 46404 }, { "epoch": 43.29, "learning_rate": 4.458861940298508e-05, "loss": 0.0, "step": 46408 }, { "epoch": 43.29, "learning_rate": 4.458815298507463e-05, "loss": 0.0, "step": 46412 }, { "epoch": 43.3, "learning_rate": 4.458768656716418e-05, "loss": 0.0011, "step": 46416 }, { "epoch": 43.3, "learning_rate": 4.458722014925373e-05, "loss": 0.0, "step": 46420 }, { "epoch": 43.31, "learning_rate": 4.4586753731343286e-05, "loss": 0.0, "step": 46424 }, { "epoch": 43.31, "learning_rate": 4.458628731343284e-05, "loss": 0.0001, "step": 46428 }, { "epoch": 43.31, "learning_rate": 4.458582089552239e-05, "loss": 0.0, "step": 46432 }, { "epoch": 43.32, "learning_rate": 4.458535447761194e-05, "loss": 0.0, "step": 46436 }, { "epoch": 43.32, "learning_rate": 4.45848880597015e-05, "loss": 0.0009, "step": 46440 }, { "epoch": 43.32, "learning_rate": 4.458442164179105e-05, "loss": 0.0002, "step": 46444 }, { "epoch": 43.33, "learning_rate": 4.4583955223880595e-05, "loss": 0.0, "step": 46448 }, { "epoch": 43.33, "learning_rate": 4.458348880597015e-05, "loss": 0.0021, "step": 46452 }, { "epoch": 43.34, "learning_rate": 4.4583022388059705e-05, "loss": 0.0, "step": 46456 }, { "epoch": 43.34, "learning_rate": 4.458255597014925e-05, "loss": 0.0012, "step": 46460 }, { "epoch": 43.34, "learning_rate": 4.458208955223881e-05, "loss": 0.0027, "step": 46464 }, { "epoch": 43.35, "learning_rate": 4.458162313432836e-05, "loss": 0.0, "step": 46468 }, { "epoch": 43.35, "learning_rate": 4.458115671641791e-05, "loss": 0.0001, "step": 46472 }, { "epoch": 43.35, "learning_rate": 4.4580690298507466e-05, "loss": 0.0002, "step": 46476 }, { "epoch": 43.36, "learning_rate": 4.4580223880597014e-05, "loss": 0.0002, "step": 46480 }, { "epoch": 43.36, "learning_rate": 4.457975746268657e-05, "loss": 0.0007, "step": 46484 }, { "epoch": 43.37, "learning_rate": 4.4579291044776124e-05, "loss": 0.0002, "step": 46488 }, { "epoch": 43.37, "learning_rate": 4.457882462686567e-05, "loss": 0.0009, "step": 46492 }, { "epoch": 43.37, "learning_rate": 4.457835820895523e-05, "loss": 0.0063, "step": 46496 }, { "epoch": 43.38, "learning_rate": 4.457789179104478e-05, "loss": 0.0004, "step": 46500 }, { "epoch": 43.38, "eval_exact_match": 0.7263056092843327, "eval_exec": 0.758220502901354, "eval_loss": 0.437593549489975, "eval_runtime": 1140.2178, "eval_samples_per_second": 0.907, "step": 46500 }, { "epoch": 43.38, "learning_rate": 4.457742537313433e-05, "loss": 0.0002, "step": 46504 }, { "epoch": 43.38, "learning_rate": 4.4576958955223885e-05, "loss": 0.0, "step": 46508 }, { "epoch": 43.39, "learning_rate": 4.457649253731343e-05, "loss": 0.0, "step": 46512 }, { "epoch": 43.39, "learning_rate": 4.457602611940299e-05, "loss": 0.0004, "step": 46516 }, { "epoch": 43.4, "learning_rate": 4.457555970149254e-05, "loss": 0.0001, "step": 46520 }, { "epoch": 43.4, "learning_rate": 4.457509328358209e-05, "loss": 0.0053, "step": 46524 }, { "epoch": 43.4, "learning_rate": 4.4574626865671646e-05, "loss": 0.0002, "step": 46528 }, { "epoch": 43.41, "learning_rate": 4.45741604477612e-05, "loss": 0.0, "step": 46532 }, { "epoch": 43.41, "learning_rate": 4.457369402985075e-05, "loss": 0.0007, "step": 46536 }, { "epoch": 43.41, "learning_rate": 4.45732276119403e-05, "loss": 0.0001, "step": 46540 }, { "epoch": 43.42, "learning_rate": 4.457276119402985e-05, "loss": 0.0001, "step": 46544 }, { "epoch": 43.42, "learning_rate": 4.457229477611941e-05, "loss": 0.0, "step": 46548 }, { "epoch": 43.43, "learning_rate": 4.4571828358208955e-05, "loss": 0.0, "step": 46552 }, { "epoch": 43.43, "learning_rate": 4.457136194029851e-05, "loss": 0.0051, "step": 46556 }, { "epoch": 43.43, "learning_rate": 4.4570895522388065e-05, "loss": 0.0003, "step": 46560 }, { "epoch": 43.44, "learning_rate": 4.457042910447761e-05, "loss": 0.0001, "step": 46564 }, { "epoch": 43.44, "learning_rate": 4.456996268656717e-05, "loss": 0.0003, "step": 46568 }, { "epoch": 43.44, "learning_rate": 4.4569496268656716e-05, "loss": 0.0001, "step": 46572 }, { "epoch": 43.45, "learning_rate": 4.456902985074627e-05, "loss": 0.0, "step": 46576 }, { "epoch": 43.45, "learning_rate": 4.4568563432835826e-05, "loss": 0.0, "step": 46580 }, { "epoch": 43.46, "learning_rate": 4.4568097014925374e-05, "loss": 0.0002, "step": 46584 }, { "epoch": 43.46, "learning_rate": 4.456763059701493e-05, "loss": 0.0, "step": 46588 }, { "epoch": 43.46, "learning_rate": 4.4567164179104484e-05, "loss": 0.0, "step": 46592 }, { "epoch": 43.47, "learning_rate": 4.456669776119403e-05, "loss": 0.0, "step": 46596 }, { "epoch": 43.47, "learning_rate": 4.456623134328358e-05, "loss": 0.0, "step": 46600 }, { "epoch": 43.47, "learning_rate": 4.4565764925373135e-05, "loss": 0.0, "step": 46604 }, { "epoch": 43.48, "learning_rate": 4.456529850746269e-05, "loss": 0.0005, "step": 46608 }, { "epoch": 43.48, "learning_rate": 4.456483208955224e-05, "loss": 0.0, "step": 46612 }, { "epoch": 43.49, "learning_rate": 4.456436567164179e-05, "loss": 0.0, "step": 46616 }, { "epoch": 43.49, "learning_rate": 4.456389925373135e-05, "loss": 0.0001, "step": 46620 }, { "epoch": 43.49, "learning_rate": 4.4563432835820896e-05, "loss": 0.0, "step": 46624 }, { "epoch": 43.5, "learning_rate": 4.456296641791045e-05, "loss": 0.0015, "step": 46628 }, { "epoch": 43.5, "learning_rate": 4.45625e-05, "loss": 0.0, "step": 46632 }, { "epoch": 43.5, "learning_rate": 4.4562033582089554e-05, "loss": 0.0, "step": 46636 }, { "epoch": 43.51, "learning_rate": 4.456156716417911e-05, "loss": 0.0002, "step": 46640 }, { "epoch": 43.51, "learning_rate": 4.456110074626866e-05, "loss": 0.0, "step": 46644 }, { "epoch": 43.51, "learning_rate": 4.4560634328358205e-05, "loss": 0.0002, "step": 46648 }, { "epoch": 43.52, "learning_rate": 4.456016791044777e-05, "loss": 0.0001, "step": 46652 }, { "epoch": 43.52, "learning_rate": 4.4559701492537315e-05, "loss": 0.0001, "step": 46656 }, { "epoch": 43.53, "learning_rate": 4.455923507462687e-05, "loss": 0.0, "step": 46660 }, { "epoch": 43.53, "learning_rate": 4.455876865671642e-05, "loss": 0.0002, "step": 46664 }, { "epoch": 43.53, "learning_rate": 4.455830223880597e-05, "loss": 0.0, "step": 46668 }, { "epoch": 43.54, "learning_rate": 4.455783582089553e-05, "loss": 0.0003, "step": 46672 }, { "epoch": 43.54, "learning_rate": 4.4557369402985076e-05, "loss": 0.0001, "step": 46676 }, { "epoch": 43.54, "learning_rate": 4.455690298507463e-05, "loss": 0.0001, "step": 46680 }, { "epoch": 43.55, "learning_rate": 4.4556436567164186e-05, "loss": 0.0, "step": 46684 }, { "epoch": 43.55, "learning_rate": 4.4555970149253734e-05, "loss": 0.0, "step": 46688 }, { "epoch": 43.56, "learning_rate": 4.455550373134328e-05, "loss": 0.0007, "step": 46692 }, { "epoch": 43.56, "learning_rate": 4.4555037313432844e-05, "loss": 0.0042, "step": 46696 }, { "epoch": 43.56, "learning_rate": 4.455457089552239e-05, "loss": 0.0004, "step": 46700 }, { "epoch": 43.57, "learning_rate": 4.455410447761194e-05, "loss": 0.0001, "step": 46704 }, { "epoch": 43.57, "learning_rate": 4.4553638059701495e-05, "loss": 0.0, "step": 46708 }, { "epoch": 43.57, "learning_rate": 4.455317164179105e-05, "loss": 0.0002, "step": 46712 }, { "epoch": 43.58, "learning_rate": 4.45527052238806e-05, "loss": 0.0013, "step": 46716 }, { "epoch": 43.58, "learning_rate": 4.455223880597015e-05, "loss": 0.0001, "step": 46720 }, { "epoch": 43.59, "learning_rate": 4.45517723880597e-05, "loss": 0.0, "step": 46724 }, { "epoch": 43.59, "learning_rate": 4.4551305970149256e-05, "loss": 0.0, "step": 46728 }, { "epoch": 43.59, "learning_rate": 4.455083955223881e-05, "loss": 0.0, "step": 46732 }, { "epoch": 43.6, "learning_rate": 4.455037313432836e-05, "loss": 0.0001, "step": 46736 }, { "epoch": 43.6, "learning_rate": 4.4549906716417914e-05, "loss": 0.0001, "step": 46740 }, { "epoch": 43.6, "learning_rate": 4.454944029850747e-05, "loss": 0.0, "step": 46744 }, { "epoch": 43.61, "learning_rate": 4.454897388059702e-05, "loss": 0.0, "step": 46748 }, { "epoch": 43.61, "learning_rate": 4.4548507462686565e-05, "loss": 0.0, "step": 46752 }, { "epoch": 43.62, "learning_rate": 4.454804104477612e-05, "loss": 0.0001, "step": 46756 }, { "epoch": 43.62, "learning_rate": 4.4547574626865675e-05, "loss": 0.0, "step": 46760 }, { "epoch": 43.62, "learning_rate": 4.454710820895522e-05, "loss": 0.0, "step": 46764 }, { "epoch": 43.63, "learning_rate": 4.454664179104478e-05, "loss": 0.0, "step": 46768 }, { "epoch": 43.63, "learning_rate": 4.454617537313433e-05, "loss": 0.0, "step": 46772 }, { "epoch": 43.63, "learning_rate": 4.454570895522388e-05, "loss": 0.0004, "step": 46776 }, { "epoch": 43.64, "learning_rate": 4.4545242537313436e-05, "loss": 0.0009, "step": 46780 }, { "epoch": 43.64, "learning_rate": 4.4544776119402984e-05, "loss": 0.0, "step": 46784 }, { "epoch": 43.65, "learning_rate": 4.454430970149254e-05, "loss": 0.0, "step": 46788 }, { "epoch": 43.65, "learning_rate": 4.4543843283582094e-05, "loss": 0.0025, "step": 46792 }, { "epoch": 43.65, "learning_rate": 4.454337686567164e-05, "loss": 0.0, "step": 46796 }, { "epoch": 43.66, "learning_rate": 4.45429104477612e-05, "loss": 0.002, "step": 46800 }, { "epoch": 43.66, "learning_rate": 4.454244402985075e-05, "loss": 0.0, "step": 46804 }, { "epoch": 43.66, "learning_rate": 4.45419776119403e-05, "loss": 0.0002, "step": 46808 }, { "epoch": 43.67, "learning_rate": 4.454151119402985e-05, "loss": 0.0, "step": 46812 }, { "epoch": 43.67, "learning_rate": 4.45410447761194e-05, "loss": 0.0014, "step": 46816 }, { "epoch": 43.68, "learning_rate": 4.454057835820896e-05, "loss": 0.0003, "step": 46820 }, { "epoch": 43.68, "learning_rate": 4.454011194029851e-05, "loss": 0.0001, "step": 46824 }, { "epoch": 43.68, "learning_rate": 4.453964552238806e-05, "loss": 0.0, "step": 46828 }, { "epoch": 43.69, "learning_rate": 4.4539179104477616e-05, "loss": 0.0008, "step": 46832 }, { "epoch": 43.69, "learning_rate": 4.453871268656717e-05, "loss": 0.0144, "step": 46836 }, { "epoch": 43.69, "learning_rate": 4.453824626865672e-05, "loss": 0.0022, "step": 46840 }, { "epoch": 43.7, "learning_rate": 4.453777985074627e-05, "loss": 0.0, "step": 46844 }, { "epoch": 43.7, "learning_rate": 4.453731343283583e-05, "loss": 0.0001, "step": 46848 }, { "epoch": 43.71, "learning_rate": 4.4536847014925377e-05, "loss": 0.0, "step": 46852 }, { "epoch": 43.71, "learning_rate": 4.4536380597014925e-05, "loss": 0.0012, "step": 46856 }, { "epoch": 43.71, "learning_rate": 4.453591417910448e-05, "loss": 0.0, "step": 46860 }, { "epoch": 43.72, "learning_rate": 4.4535447761194035e-05, "loss": 0.0001, "step": 46864 }, { "epoch": 43.72, "learning_rate": 4.453498134328358e-05, "loss": 0.0003, "step": 46868 }, { "epoch": 43.72, "learning_rate": 4.453451492537314e-05, "loss": 0.0, "step": 46872 }, { "epoch": 43.73, "learning_rate": 4.4534048507462686e-05, "loss": 0.0012, "step": 46876 }, { "epoch": 43.73, "learning_rate": 4.453358208955224e-05, "loss": 0.0, "step": 46880 }, { "epoch": 43.73, "learning_rate": 4.4533115671641796e-05, "loss": 0.0, "step": 46884 }, { "epoch": 43.74, "learning_rate": 4.4532649253731344e-05, "loss": 0.0, "step": 46888 }, { "epoch": 43.74, "learning_rate": 4.45321828358209e-05, "loss": 0.0001, "step": 46892 }, { "epoch": 43.75, "learning_rate": 4.4531716417910453e-05, "loss": 0.0, "step": 46896 }, { "epoch": 43.75, "learning_rate": 4.453125e-05, "loss": 0.0226, "step": 46900 }, { "epoch": 43.75, "learning_rate": 4.453078358208955e-05, "loss": 0.0001, "step": 46904 }, { "epoch": 43.76, "learning_rate": 4.453031716417911e-05, "loss": 0.0018, "step": 46908 }, { "epoch": 43.76, "learning_rate": 4.452985074626866e-05, "loss": 0.0001, "step": 46912 }, { "epoch": 43.76, "learning_rate": 4.452938432835821e-05, "loss": 0.0017, "step": 46916 }, { "epoch": 43.77, "learning_rate": 4.452891791044776e-05, "loss": 0.0, "step": 46920 }, { "epoch": 43.77, "learning_rate": 4.452845149253732e-05, "loss": 0.0002, "step": 46924 }, { "epoch": 43.78, "learning_rate": 4.4527985074626866e-05, "loss": 0.0, "step": 46928 }, { "epoch": 43.78, "learning_rate": 4.452751865671642e-05, "loss": 0.0, "step": 46932 }, { "epoch": 43.78, "learning_rate": 4.452705223880597e-05, "loss": 0.0019, "step": 46936 }, { "epoch": 43.79, "learning_rate": 4.4526585820895524e-05, "loss": 0.0, "step": 46940 }, { "epoch": 43.79, "learning_rate": 4.452611940298508e-05, "loss": 0.0003, "step": 46944 }, { "epoch": 43.79, "learning_rate": 4.452565298507463e-05, "loss": 0.0004, "step": 46948 }, { "epoch": 43.8, "learning_rate": 4.452518656716418e-05, "loss": 0.0, "step": 46952 }, { "epoch": 43.8, "learning_rate": 4.4524720149253736e-05, "loss": 0.0001, "step": 46956 }, { "epoch": 43.81, "learning_rate": 4.4524253731343285e-05, "loss": 0.0, "step": 46960 }, { "epoch": 43.81, "learning_rate": 4.452378731343283e-05, "loss": 0.0004, "step": 46964 }, { "epoch": 43.81, "learning_rate": 4.4523320895522394e-05, "loss": 0.0001, "step": 46968 }, { "epoch": 43.82, "learning_rate": 4.452285447761194e-05, "loss": 0.0, "step": 46972 }, { "epoch": 43.82, "learning_rate": 4.452238805970149e-05, "loss": 0.0, "step": 46976 }, { "epoch": 43.82, "learning_rate": 4.4521921641791046e-05, "loss": 0.0001, "step": 46980 }, { "epoch": 43.83, "learning_rate": 4.45214552238806e-05, "loss": 0.0084, "step": 46984 }, { "epoch": 43.83, "learning_rate": 4.4520988805970155e-05, "loss": 0.0007, "step": 46988 }, { "epoch": 43.84, "learning_rate": 4.4520522388059703e-05, "loss": 0.0, "step": 46992 }, { "epoch": 43.84, "learning_rate": 4.452005597014925e-05, "loss": 0.0, "step": 46996 }, { "epoch": 43.84, "learning_rate": 4.451958955223881e-05, "loss": 0.0, "step": 47000 }, { "epoch": 43.84, "eval_exact_match": 0.7321083172147002, "eval_exec": 0.7688588007736944, "eval_loss": 0.4131454825401306, "eval_runtime": 1115.6019, "eval_samples_per_second": 0.927, "step": 47000 }, { "epoch": 43.85, "learning_rate": 4.451912313432836e-05, "loss": 0.0, "step": 47004 }, { "epoch": 43.85, "learning_rate": 4.451865671641791e-05, "loss": 0.0021, "step": 47008 }, { "epoch": 43.85, "learning_rate": 4.4518190298507464e-05, "loss": 0.002, "step": 47012 }, { "epoch": 43.86, "learning_rate": 4.451772388059702e-05, "loss": 0.0084, "step": 47016 }, { "epoch": 43.86, "learning_rate": 4.451725746268657e-05, "loss": 0.0002, "step": 47020 }, { "epoch": 43.87, "learning_rate": 4.451679104477612e-05, "loss": 0.0007, "step": 47024 }, { "epoch": 43.87, "learning_rate": 4.451632462686568e-05, "loss": 0.0007, "step": 47028 }, { "epoch": 43.87, "learning_rate": 4.4515858208955225e-05, "loss": 0.0001, "step": 47032 }, { "epoch": 43.88, "learning_rate": 4.451539179104478e-05, "loss": 0.0001, "step": 47036 }, { "epoch": 43.88, "learning_rate": 4.451492537313433e-05, "loss": 0.0, "step": 47040 }, { "epoch": 43.88, "learning_rate": 4.4514458955223883e-05, "loss": 0.0002, "step": 47044 }, { "epoch": 43.89, "learning_rate": 4.451399253731344e-05, "loss": 0.0051, "step": 47048 }, { "epoch": 43.89, "learning_rate": 4.4513526119402986e-05, "loss": 0.0004, "step": 47052 }, { "epoch": 43.9, "learning_rate": 4.4513059701492535e-05, "loss": 0.0001, "step": 47056 }, { "epoch": 43.9, "learning_rate": 4.4512593283582096e-05, "loss": 0.0, "step": 47060 }, { "epoch": 43.9, "learning_rate": 4.4512126865671644e-05, "loss": 0.0005, "step": 47064 }, { "epoch": 43.91, "learning_rate": 4.451166044776119e-05, "loss": 0.0, "step": 47068 }, { "epoch": 43.91, "learning_rate": 4.451119402985075e-05, "loss": 0.0033, "step": 47072 }, { "epoch": 43.91, "learning_rate": 4.45107276119403e-05, "loss": 0.0003, "step": 47076 }, { "epoch": 43.92, "learning_rate": 4.451026119402985e-05, "loss": 0.0016, "step": 47080 }, { "epoch": 43.92, "learning_rate": 4.4509794776119405e-05, "loss": 0.0, "step": 47084 }, { "epoch": 43.93, "learning_rate": 4.450932835820896e-05, "loss": 0.001, "step": 47088 }, { "epoch": 43.93, "learning_rate": 4.450886194029851e-05, "loss": 0.0, "step": 47092 }, { "epoch": 43.93, "learning_rate": 4.450839552238806e-05, "loss": 0.0015, "step": 47096 }, { "epoch": 43.94, "learning_rate": 4.450792910447761e-05, "loss": 0.0, "step": 47100 }, { "epoch": 43.94, "learning_rate": 4.4507462686567166e-05, "loss": 0.001, "step": 47104 }, { "epoch": 43.94, "learning_rate": 4.450699626865672e-05, "loss": 0.0, "step": 47108 }, { "epoch": 43.95, "learning_rate": 4.450652985074627e-05, "loss": 0.0012, "step": 47112 }, { "epoch": 43.95, "learning_rate": 4.450606343283582e-05, "loss": 0.0, "step": 47116 }, { "epoch": 43.96, "learning_rate": 4.450559701492538e-05, "loss": 0.0, "step": 47120 }, { "epoch": 43.96, "learning_rate": 4.450513059701493e-05, "loss": 0.0018, "step": 47124 }, { "epoch": 43.96, "learning_rate": 4.4504664179104475e-05, "loss": 0.0002, "step": 47128 }, { "epoch": 43.97, "learning_rate": 4.450419776119403e-05, "loss": 0.0, "step": 47132 }, { "epoch": 43.97, "learning_rate": 4.4503731343283585e-05, "loss": 0.001, "step": 47136 }, { "epoch": 43.97, "learning_rate": 4.4503264925373133e-05, "loss": 0.0003, "step": 47140 }, { "epoch": 43.98, "learning_rate": 4.450279850746269e-05, "loss": 0.0081, "step": 47144 }, { "epoch": 43.98, "learning_rate": 4.450233208955224e-05, "loss": 0.0001, "step": 47148 }, { "epoch": 43.98, "learning_rate": 4.45018656716418e-05, "loss": 0.0001, "step": 47152 }, { "epoch": 43.99, "learning_rate": 4.4501399253731346e-05, "loss": 0.0, "step": 47156 }, { "epoch": 43.99, "learning_rate": 4.4500932835820894e-05, "loss": 0.0002, "step": 47160 }, { "epoch": 44.0, "learning_rate": 4.450046641791045e-05, "loss": 0.0, "step": 47164 }, { "epoch": 44.0, "learning_rate": 4.4500000000000004e-05, "loss": 0.0001, "step": 47168 }, { "epoch": 44.0, "learning_rate": 4.449953358208955e-05, "loss": 0.0001, "step": 47172 }, { "epoch": 44.01, "learning_rate": 4.449906716417911e-05, "loss": 0.0, "step": 47176 }, { "epoch": 44.01, "learning_rate": 4.449860074626866e-05, "loss": 0.0001, "step": 47180 }, { "epoch": 44.01, "learning_rate": 4.449813432835821e-05, "loss": 0.0017, "step": 47184 }, { "epoch": 44.02, "learning_rate": 4.4497667910447765e-05, "loss": 0.0003, "step": 47188 }, { "epoch": 44.02, "learning_rate": 4.449720149253731e-05, "loss": 0.0002, "step": 47192 }, { "epoch": 44.03, "learning_rate": 4.449673507462687e-05, "loss": 0.0, "step": 47196 }, { "epoch": 44.03, "learning_rate": 4.449626865671642e-05, "loss": 0.0003, "step": 47200 }, { "epoch": 44.03, "learning_rate": 4.449580223880597e-05, "loss": 0.0058, "step": 47204 }, { "epoch": 44.04, "learning_rate": 4.4495335820895526e-05, "loss": 0.0, "step": 47208 }, { "epoch": 44.04, "learning_rate": 4.449486940298508e-05, "loss": 0.0086, "step": 47212 }, { "epoch": 44.04, "learning_rate": 4.449440298507463e-05, "loss": 0.001, "step": 47216 }, { "epoch": 44.05, "learning_rate": 4.449393656716418e-05, "loss": 0.0016, "step": 47220 }, { "epoch": 44.05, "learning_rate": 4.449347014925373e-05, "loss": 0.0015, "step": 47224 }, { "epoch": 44.06, "learning_rate": 4.449300373134329e-05, "loss": 0.0006, "step": 47228 }, { "epoch": 44.06, "learning_rate": 4.4492537313432835e-05, "loss": 0.0002, "step": 47232 }, { "epoch": 44.06, "learning_rate": 4.449207089552239e-05, "loss": 0.0002, "step": 47236 }, { "epoch": 44.07, "learning_rate": 4.4491604477611945e-05, "loss": 0.0002, "step": 47240 }, { "epoch": 44.07, "learning_rate": 4.449113805970149e-05, "loss": 0.0001, "step": 47244 }, { "epoch": 44.07, "learning_rate": 4.449067164179105e-05, "loss": 0.0001, "step": 47248 }, { "epoch": 44.08, "learning_rate": 4.4490205223880596e-05, "loss": 0.002, "step": 47252 }, { "epoch": 44.08, "learning_rate": 4.448973880597015e-05, "loss": 0.0001, "step": 47256 }, { "epoch": 44.09, "learning_rate": 4.4489272388059706e-05, "loss": 0.0001, "step": 47260 }, { "epoch": 44.09, "learning_rate": 4.4488805970149254e-05, "loss": 0.0001, "step": 47264 }, { "epoch": 44.09, "learning_rate": 4.448833955223881e-05, "loss": 0.0, "step": 47268 }, { "epoch": 44.1, "learning_rate": 4.4487873134328364e-05, "loss": 0.0, "step": 47272 }, { "epoch": 44.1, "learning_rate": 4.448740671641791e-05, "loss": 0.0034, "step": 47276 }, { "epoch": 44.1, "learning_rate": 4.448694029850746e-05, "loss": 0.0004, "step": 47280 }, { "epoch": 44.11, "learning_rate": 4.4486473880597015e-05, "loss": 0.0, "step": 47284 }, { "epoch": 44.11, "learning_rate": 4.448600746268657e-05, "loss": 0.0001, "step": 47288 }, { "epoch": 44.12, "learning_rate": 4.448554104477612e-05, "loss": 0.0002, "step": 47292 }, { "epoch": 44.12, "learning_rate": 4.448507462686567e-05, "loss": 0.001, "step": 47296 }, { "epoch": 44.12, "learning_rate": 4.448460820895523e-05, "loss": 0.0006, "step": 47300 }, { "epoch": 44.13, "learning_rate": 4.4484141791044776e-05, "loss": 0.0003, "step": 47304 }, { "epoch": 44.13, "learning_rate": 4.448367537313433e-05, "loss": 0.0, "step": 47308 }, { "epoch": 44.13, "learning_rate": 4.448320895522388e-05, "loss": 0.0082, "step": 47312 }, { "epoch": 44.14, "learning_rate": 4.448274253731344e-05, "loss": 0.0, "step": 47316 }, { "epoch": 44.14, "learning_rate": 4.448227611940299e-05, "loss": 0.0001, "step": 47320 }, { "epoch": 44.15, "learning_rate": 4.448180970149254e-05, "loss": 0.0001, "step": 47324 }, { "epoch": 44.15, "learning_rate": 4.448134328358209e-05, "loss": 0.0, "step": 47328 }, { "epoch": 44.15, "learning_rate": 4.448087686567165e-05, "loss": 0.0004, "step": 47332 }, { "epoch": 44.16, "learning_rate": 4.4480410447761195e-05, "loss": 0.0002, "step": 47336 }, { "epoch": 44.16, "learning_rate": 4.447994402985075e-05, "loss": 0.0002, "step": 47340 }, { "epoch": 44.16, "learning_rate": 4.44794776119403e-05, "loss": 0.0, "step": 47344 }, { "epoch": 44.17, "learning_rate": 4.447901119402985e-05, "loss": 0.0, "step": 47348 }, { "epoch": 44.17, "learning_rate": 4.447854477611941e-05, "loss": 0.0047, "step": 47352 }, { "epoch": 44.18, "learning_rate": 4.4478078358208956e-05, "loss": 0.0003, "step": 47356 }, { "epoch": 44.18, "learning_rate": 4.447761194029851e-05, "loss": 0.0, "step": 47360 }, { "epoch": 44.18, "learning_rate": 4.4477145522388066e-05, "loss": 0.0006, "step": 47364 }, { "epoch": 44.19, "learning_rate": 4.4476679104477614e-05, "loss": 0.0, "step": 47368 }, { "epoch": 44.19, "learning_rate": 4.447621268656716e-05, "loss": 0.0005, "step": 47372 }, { "epoch": 44.19, "learning_rate": 4.4475746268656724e-05, "loss": 0.002, "step": 47376 }, { "epoch": 44.2, "learning_rate": 4.447527985074627e-05, "loss": 0.0, "step": 47380 }, { "epoch": 44.2, "learning_rate": 4.447481343283582e-05, "loss": 0.0141, "step": 47384 }, { "epoch": 44.21, "learning_rate": 4.4474347014925375e-05, "loss": 0.0001, "step": 47388 }, { "epoch": 44.21, "learning_rate": 4.447388059701493e-05, "loss": 0.0002, "step": 47392 }, { "epoch": 44.21, "learning_rate": 4.447341417910448e-05, "loss": 0.0001, "step": 47396 }, { "epoch": 44.22, "learning_rate": 4.447294776119403e-05, "loss": 0.0, "step": 47400 }, { "epoch": 44.22, "learning_rate": 4.447248134328358e-05, "loss": 0.0027, "step": 47404 }, { "epoch": 44.22, "learning_rate": 4.4472014925373136e-05, "loss": 0.0, "step": 47408 }, { "epoch": 44.23, "learning_rate": 4.447154850746269e-05, "loss": 0.0005, "step": 47412 }, { "epoch": 44.23, "learning_rate": 4.447108208955224e-05, "loss": 0.0, "step": 47416 }, { "epoch": 44.24, "learning_rate": 4.4470615671641794e-05, "loss": 0.0002, "step": 47420 }, { "epoch": 44.24, "learning_rate": 4.447014925373135e-05, "loss": 0.0, "step": 47424 }, { "epoch": 44.24, "learning_rate": 4.44696828358209e-05, "loss": 0.0004, "step": 47428 }, { "epoch": 44.25, "learning_rate": 4.4469216417910445e-05, "loss": 0.0, "step": 47432 }, { "epoch": 44.25, "learning_rate": 4.446875e-05, "loss": 0.001, "step": 47436 }, { "epoch": 44.25, "learning_rate": 4.4468283582089555e-05, "loss": 0.0, "step": 47440 }, { "epoch": 44.26, "learning_rate": 4.44678171641791e-05, "loss": 0.0, "step": 47444 }, { "epoch": 44.26, "learning_rate": 4.446735074626866e-05, "loss": 0.0, "step": 47448 }, { "epoch": 44.26, "learning_rate": 4.446688432835821e-05, "loss": 0.0012, "step": 47452 }, { "epoch": 44.27, "learning_rate": 4.446641791044776e-05, "loss": 0.0001, "step": 47456 }, { "epoch": 44.27, "learning_rate": 4.4465951492537316e-05, "loss": 0.0, "step": 47460 }, { "epoch": 44.28, "learning_rate": 4.4465485074626864e-05, "loss": 0.0, "step": 47464 }, { "epoch": 44.28, "learning_rate": 4.446501865671642e-05, "loss": 0.0001, "step": 47468 }, { "epoch": 44.28, "learning_rate": 4.4464552238805974e-05, "loss": 0.0012, "step": 47472 }, { "epoch": 44.29, "learning_rate": 4.446408582089552e-05, "loss": 0.0035, "step": 47476 }, { "epoch": 44.29, "learning_rate": 4.446361940298508e-05, "loss": 0.007, "step": 47480 }, { "epoch": 44.29, "learning_rate": 4.446315298507463e-05, "loss": 0.0001, "step": 47484 }, { "epoch": 44.3, "learning_rate": 4.446268656716418e-05, "loss": 0.0, "step": 47488 }, { "epoch": 44.3, "learning_rate": 4.4462220149253735e-05, "loss": 0.0, "step": 47492 }, { "epoch": 44.31, "learning_rate": 4.446175373134328e-05, "loss": 0.0, "step": 47496 }, { "epoch": 44.31, "learning_rate": 4.446128731343284e-05, "loss": 0.0004, "step": 47500 }, { "epoch": 44.31, "eval_exact_match": 0.7330754352030948, "eval_exec": 0.7707930367504836, "eval_loss": 0.42296141386032104, "eval_runtime": 1217.964, "eval_samples_per_second": 0.849, "step": 47500 }, { "epoch": 44.31, "learning_rate": 4.446082089552239e-05, "loss": 0.0009, "step": 47504 }, { "epoch": 44.32, "learning_rate": 4.446035447761194e-05, "loss": 0.0001, "step": 47508 }, { "epoch": 44.32, "learning_rate": 4.4459888059701496e-05, "loss": 0.0001, "step": 47512 }, { "epoch": 44.32, "learning_rate": 4.445942164179105e-05, "loss": 0.0003, "step": 47516 }, { "epoch": 44.33, "learning_rate": 4.44589552238806e-05, "loss": 0.0, "step": 47520 }, { "epoch": 44.33, "learning_rate": 4.445848880597015e-05, "loss": 0.0, "step": 47524 }, { "epoch": 44.34, "learning_rate": 4.445802238805971e-05, "loss": 0.0, "step": 47528 }, { "epoch": 44.34, "learning_rate": 4.445755597014926e-05, "loss": 0.0001, "step": 47532 }, { "epoch": 44.34, "learning_rate": 4.4457089552238805e-05, "loss": 0.0001, "step": 47536 }, { "epoch": 44.35, "learning_rate": 4.445662313432836e-05, "loss": 0.0001, "step": 47540 }, { "epoch": 44.35, "learning_rate": 4.4456156716417915e-05, "loss": 0.0, "step": 47544 }, { "epoch": 44.35, "learning_rate": 4.445569029850746e-05, "loss": 0.0001, "step": 47548 }, { "epoch": 44.36, "learning_rate": 4.445522388059702e-05, "loss": 0.0, "step": 47552 }, { "epoch": 44.36, "learning_rate": 4.4454757462686566e-05, "loss": 0.0001, "step": 47556 }, { "epoch": 44.37, "learning_rate": 4.445429104477612e-05, "loss": 0.0001, "step": 47560 }, { "epoch": 44.37, "learning_rate": 4.4453824626865676e-05, "loss": 0.0001, "step": 47564 }, { "epoch": 44.37, "learning_rate": 4.4453358208955224e-05, "loss": 0.0001, "step": 47568 }, { "epoch": 44.38, "learning_rate": 4.445289179104478e-05, "loss": 0.0013, "step": 47572 }, { "epoch": 44.38, "learning_rate": 4.4452425373134334e-05, "loss": 0.0, "step": 47576 }, { "epoch": 44.38, "learning_rate": 4.445195895522388e-05, "loss": 0.0, "step": 47580 }, { "epoch": 44.39, "learning_rate": 4.445149253731343e-05, "loss": 0.0, "step": 47584 }, { "epoch": 44.39, "learning_rate": 4.445102611940299e-05, "loss": 0.0, "step": 47588 }, { "epoch": 44.4, "learning_rate": 4.445055970149254e-05, "loss": 0.0015, "step": 47592 }, { "epoch": 44.4, "learning_rate": 4.445009328358209e-05, "loss": 0.0003, "step": 47596 }, { "epoch": 44.4, "learning_rate": 4.444962686567164e-05, "loss": 0.0001, "step": 47600 }, { "epoch": 44.41, "learning_rate": 4.44491604477612e-05, "loss": 0.0001, "step": 47604 }, { "epoch": 44.41, "learning_rate": 4.4448694029850746e-05, "loss": 0.0, "step": 47608 }, { "epoch": 44.41, "learning_rate": 4.44482276119403e-05, "loss": 0.0, "step": 47612 }, { "epoch": 44.42, "learning_rate": 4.444776119402985e-05, "loss": 0.0001, "step": 47616 }, { "epoch": 44.42, "learning_rate": 4.4447294776119404e-05, "loss": 0.0001, "step": 47620 }, { "epoch": 44.43, "learning_rate": 4.444682835820896e-05, "loss": 0.0068, "step": 47624 }, { "epoch": 44.43, "learning_rate": 4.444636194029851e-05, "loss": 0.0001, "step": 47628 }, { "epoch": 44.43, "learning_rate": 4.444589552238806e-05, "loss": 0.0024, "step": 47632 }, { "epoch": 44.44, "learning_rate": 4.444542910447762e-05, "loss": 0.0, "step": 47636 }, { "epoch": 44.44, "learning_rate": 4.4444962686567165e-05, "loss": 0.0, "step": 47640 }, { "epoch": 44.44, "learning_rate": 4.444449626865672e-05, "loss": 0.0049, "step": 47644 }, { "epoch": 44.45, "learning_rate": 4.4444029850746275e-05, "loss": 0.0, "step": 47648 }, { "epoch": 44.45, "learning_rate": 4.444356343283582e-05, "loss": 0.0, "step": 47652 }, { "epoch": 44.46, "learning_rate": 4.444309701492538e-05, "loss": 0.0, "step": 47656 }, { "epoch": 44.46, "learning_rate": 4.4442630597014926e-05, "loss": 0.0002, "step": 47660 }, { "epoch": 44.46, "learning_rate": 4.444216417910448e-05, "loss": 0.0009, "step": 47664 }, { "epoch": 44.47, "learning_rate": 4.4441697761194036e-05, "loss": 0.0001, "step": 47668 }, { "epoch": 44.47, "learning_rate": 4.4441231343283584e-05, "loss": 0.0001, "step": 47672 }, { "epoch": 44.47, "learning_rate": 4.444076492537313e-05, "loss": 0.0007, "step": 47676 }, { "epoch": 44.48, "learning_rate": 4.4440298507462694e-05, "loss": 0.0, "step": 47680 }, { "epoch": 44.48, "learning_rate": 4.443983208955224e-05, "loss": 0.0, "step": 47684 }, { "epoch": 44.49, "learning_rate": 4.443936567164179e-05, "loss": 0.0003, "step": 47688 }, { "epoch": 44.49, "learning_rate": 4.4438899253731345e-05, "loss": 0.0021, "step": 47692 }, { "epoch": 44.49, "learning_rate": 4.44384328358209e-05, "loss": 0.0001, "step": 47696 }, { "epoch": 44.5, "learning_rate": 4.443796641791045e-05, "loss": 0.0, "step": 47700 }, { "epoch": 44.5, "learning_rate": 4.44375e-05, "loss": 0.0, "step": 47704 }, { "epoch": 44.5, "learning_rate": 4.443703358208956e-05, "loss": 0.0001, "step": 47708 }, { "epoch": 44.51, "learning_rate": 4.4436567164179106e-05, "loss": 0.0002, "step": 47712 }, { "epoch": 44.51, "learning_rate": 4.443610074626866e-05, "loss": 0.0, "step": 47716 }, { "epoch": 44.51, "learning_rate": 4.443563432835821e-05, "loss": 0.0001, "step": 47720 }, { "epoch": 44.52, "learning_rate": 4.4435167910447764e-05, "loss": 0.0001, "step": 47724 }, { "epoch": 44.52, "learning_rate": 4.443470149253732e-05, "loss": 0.0, "step": 47728 }, { "epoch": 44.53, "learning_rate": 4.443423507462687e-05, "loss": 0.0, "step": 47732 }, { "epoch": 44.53, "learning_rate": 4.4433768656716415e-05, "loss": 0.0, "step": 47736 }, { "epoch": 44.53, "learning_rate": 4.4433302238805977e-05, "loss": 0.0035, "step": 47740 }, { "epoch": 44.54, "learning_rate": 4.4432835820895525e-05, "loss": 0.0025, "step": 47744 }, { "epoch": 44.54, "learning_rate": 4.443236940298507e-05, "loss": 0.0, "step": 47748 }, { "epoch": 44.54, "learning_rate": 4.443190298507463e-05, "loss": 0.0, "step": 47752 }, { "epoch": 44.55, "learning_rate": 4.443143656716418e-05, "loss": 0.0031, "step": 47756 }, { "epoch": 44.55, "learning_rate": 4.443097014925373e-05, "loss": 0.0, "step": 47760 }, { "epoch": 44.56, "learning_rate": 4.4430503731343286e-05, "loss": 0.0004, "step": 47764 }, { "epoch": 44.56, "learning_rate": 4.443003731343284e-05, "loss": 0.0003, "step": 47768 }, { "epoch": 44.56, "learning_rate": 4.442957089552239e-05, "loss": 0.0, "step": 47772 }, { "epoch": 44.57, "learning_rate": 4.4429104477611944e-05, "loss": 0.0, "step": 47776 }, { "epoch": 44.57, "learning_rate": 4.442863805970149e-05, "loss": 0.0739, "step": 47780 }, { "epoch": 44.57, "learning_rate": 4.442817164179105e-05, "loss": 0.0, "step": 47784 }, { "epoch": 44.58, "learning_rate": 4.44277052238806e-05, "loss": 0.0, "step": 47788 }, { "epoch": 44.58, "learning_rate": 4.442723880597015e-05, "loss": 0.0, "step": 47792 }, { "epoch": 44.59, "learning_rate": 4.44267723880597e-05, "loss": 0.0002, "step": 47796 }, { "epoch": 44.59, "learning_rate": 4.442630597014926e-05, "loss": 0.0038, "step": 47800 }, { "epoch": 44.59, "learning_rate": 4.442583955223881e-05, "loss": 0.0001, "step": 47804 }, { "epoch": 44.6, "learning_rate": 4.442537313432836e-05, "loss": 0.0005, "step": 47808 }, { "epoch": 44.6, "learning_rate": 4.442490671641791e-05, "loss": 0.0001, "step": 47812 }, { "epoch": 44.6, "learning_rate": 4.4424440298507466e-05, "loss": 0.0005, "step": 47816 }, { "epoch": 44.61, "learning_rate": 4.442397388059702e-05, "loss": 0.0001, "step": 47820 }, { "epoch": 44.61, "learning_rate": 4.442350746268657e-05, "loss": 0.0047, "step": 47824 }, { "epoch": 44.62, "learning_rate": 4.4423041044776124e-05, "loss": 0.0, "step": 47828 }, { "epoch": 44.62, "learning_rate": 4.442257462686568e-05, "loss": 0.0, "step": 47832 }, { "epoch": 44.62, "learning_rate": 4.4422108208955227e-05, "loss": 0.0001, "step": 47836 }, { "epoch": 44.63, "learning_rate": 4.4421641791044775e-05, "loss": 0.0006, "step": 47840 }, { "epoch": 44.63, "learning_rate": 4.442117537313433e-05, "loss": 0.0001, "step": 47844 }, { "epoch": 44.63, "learning_rate": 4.4420708955223885e-05, "loss": 0.0003, "step": 47848 }, { "epoch": 44.64, "learning_rate": 4.442024253731343e-05, "loss": 0.0001, "step": 47852 }, { "epoch": 44.64, "learning_rate": 4.441977611940299e-05, "loss": 0.0005, "step": 47856 }, { "epoch": 44.65, "learning_rate": 4.441930970149254e-05, "loss": 0.0, "step": 47860 }, { "epoch": 44.65, "learning_rate": 4.441884328358209e-05, "loss": 0.0, "step": 47864 }, { "epoch": 44.65, "learning_rate": 4.4418376865671646e-05, "loss": 0.0, "step": 47868 }, { "epoch": 44.66, "learning_rate": 4.4417910447761194e-05, "loss": 0.0, "step": 47872 }, { "epoch": 44.66, "learning_rate": 4.441744402985075e-05, "loss": 0.0003, "step": 47876 }, { "epoch": 44.66, "learning_rate": 4.4416977611940303e-05, "loss": 0.0191, "step": 47880 }, { "epoch": 44.67, "learning_rate": 4.441651119402985e-05, "loss": 0.0, "step": 47884 }, { "epoch": 44.67, "learning_rate": 4.4416044776119406e-05, "loss": 0.0, "step": 47888 }, { "epoch": 44.68, "learning_rate": 4.441557835820896e-05, "loss": 0.0, "step": 47892 }, { "epoch": 44.68, "learning_rate": 4.441511194029851e-05, "loss": 0.0002, "step": 47896 }, { "epoch": 44.68, "learning_rate": 4.441464552238806e-05, "loss": 0.0, "step": 47900 }, { "epoch": 44.69, "learning_rate": 4.441417910447761e-05, "loss": 0.0, "step": 47904 }, { "epoch": 44.69, "learning_rate": 4.441371268656717e-05, "loss": 0.0, "step": 47908 }, { "epoch": 44.69, "learning_rate": 4.4413246268656716e-05, "loss": 0.0, "step": 47912 }, { "epoch": 44.7, "learning_rate": 4.441277985074627e-05, "loss": 0.0001, "step": 47916 }, { "epoch": 44.7, "learning_rate": 4.4412313432835825e-05, "loss": 0.0, "step": 47920 }, { "epoch": 44.71, "learning_rate": 4.4411847014925374e-05, "loss": 0.0049, "step": 47924 }, { "epoch": 44.71, "learning_rate": 4.441138059701493e-05, "loss": 0.0, "step": 47928 }, { "epoch": 44.71, "learning_rate": 4.4410914179104477e-05, "loss": 0.0, "step": 47932 }, { "epoch": 44.72, "learning_rate": 4.441044776119403e-05, "loss": 0.0001, "step": 47936 }, { "epoch": 44.72, "learning_rate": 4.4409981343283586e-05, "loss": 0.0, "step": 47940 }, { "epoch": 44.72, "learning_rate": 4.4409514925373135e-05, "loss": 0.0, "step": 47944 }, { "epoch": 44.73, "learning_rate": 4.440904850746269e-05, "loss": 0.0001, "step": 47948 }, { "epoch": 44.73, "learning_rate": 4.4408582089552244e-05, "loss": 0.0002, "step": 47952 }, { "epoch": 44.73, "learning_rate": 4.440811567164179e-05, "loss": 0.0001, "step": 47956 }, { "epoch": 44.74, "learning_rate": 4.440764925373134e-05, "loss": 0.0, "step": 47960 }, { "epoch": 44.74, "learning_rate": 4.4407182835820896e-05, "loss": 0.0001, "step": 47964 }, { "epoch": 44.75, "learning_rate": 4.440671641791045e-05, "loss": 0.0, "step": 47968 }, { "epoch": 44.75, "learning_rate": 4.4406250000000005e-05, "loss": 0.0001, "step": 47972 }, { "epoch": 44.75, "learning_rate": 4.4405783582089553e-05, "loss": 0.0003, "step": 47976 }, { "epoch": 44.76, "learning_rate": 4.440531716417911e-05, "loss": 0.0, "step": 47980 }, { "epoch": 44.76, "learning_rate": 4.440485074626866e-05, "loss": 0.0, "step": 47984 }, { "epoch": 44.76, "learning_rate": 4.440438432835821e-05, "loss": 0.0001, "step": 47988 }, { "epoch": 44.77, "learning_rate": 4.440391791044776e-05, "loss": 0.0, "step": 47992 }, { "epoch": 44.77, "learning_rate": 4.440345149253732e-05, "loss": 0.0, "step": 47996 }, { "epoch": 44.78, "learning_rate": 4.440298507462687e-05, "loss": 0.0, "step": 48000 }, { "epoch": 44.78, "eval_exact_match": 0.7263056092843327, "eval_exec": 0.7678916827852998, "eval_loss": 0.43220871686935425, "eval_runtime": 1142.6437, "eval_samples_per_second": 0.905, "step": 48000 }, { "epoch": 44.78, "learning_rate": 4.440251865671642e-05, "loss": 0.0002, "step": 48004 }, { "epoch": 44.78, "learning_rate": 4.440205223880597e-05, "loss": 0.0, "step": 48008 }, { "epoch": 44.79, "learning_rate": 4.440158582089553e-05, "loss": 0.0, "step": 48012 }, { "epoch": 44.79, "learning_rate": 4.4401119402985075e-05, "loss": 0.0001, "step": 48016 }, { "epoch": 44.79, "learning_rate": 4.440065298507463e-05, "loss": 0.0003, "step": 48020 }, { "epoch": 44.8, "learning_rate": 4.440018656716418e-05, "loss": 0.0, "step": 48024 }, { "epoch": 44.8, "learning_rate": 4.4399720149253733e-05, "loss": 0.0001, "step": 48028 }, { "epoch": 44.81, "learning_rate": 4.439925373134329e-05, "loss": 0.0, "step": 48032 }, { "epoch": 44.81, "learning_rate": 4.4398787313432836e-05, "loss": 0.0, "step": 48036 }, { "epoch": 44.81, "learning_rate": 4.439832089552239e-05, "loss": 0.0001, "step": 48040 }, { "epoch": 44.82, "learning_rate": 4.4397854477611946e-05, "loss": 0.0, "step": 48044 }, { "epoch": 44.82, "learning_rate": 4.4397388059701494e-05, "loss": 0.0, "step": 48048 }, { "epoch": 44.82, "learning_rate": 4.439692164179104e-05, "loss": 0.0, "step": 48052 }, { "epoch": 44.83, "learning_rate": 4.4396455223880604e-05, "loss": 0.0, "step": 48056 }, { "epoch": 44.83, "learning_rate": 4.439598880597015e-05, "loss": 0.0043, "step": 48060 }, { "epoch": 44.84, "learning_rate": 4.43955223880597e-05, "loss": 0.0002, "step": 48064 }, { "epoch": 44.84, "learning_rate": 4.4395055970149255e-05, "loss": 0.0003, "step": 48068 }, { "epoch": 44.84, "learning_rate": 4.439458955223881e-05, "loss": 0.0001, "step": 48072 }, { "epoch": 44.85, "learning_rate": 4.439412313432836e-05, "loss": 0.0, "step": 48076 }, { "epoch": 44.85, "learning_rate": 4.439365671641791e-05, "loss": 0.0002, "step": 48080 }, { "epoch": 44.85, "learning_rate": 4.439319029850746e-05, "loss": 0.0075, "step": 48084 }, { "epoch": 44.86, "learning_rate": 4.4392723880597016e-05, "loss": 0.0, "step": 48088 }, { "epoch": 44.86, "learning_rate": 4.439225746268657e-05, "loss": 0.0001, "step": 48092 }, { "epoch": 44.87, "learning_rate": 4.439179104477612e-05, "loss": 0.0, "step": 48096 }, { "epoch": 44.87, "learning_rate": 4.4391324626865674e-05, "loss": 0.0, "step": 48100 }, { "epoch": 44.87, "learning_rate": 4.439085820895523e-05, "loss": 0.0038, "step": 48104 }, { "epoch": 44.88, "learning_rate": 4.439039179104478e-05, "loss": 0.0002, "step": 48108 }, { "epoch": 44.88, "learning_rate": 4.4389925373134325e-05, "loss": 0.0, "step": 48112 }, { "epoch": 44.88, "learning_rate": 4.438945895522388e-05, "loss": 0.0, "step": 48116 }, { "epoch": 44.89, "learning_rate": 4.4388992537313435e-05, "loss": 0.0055, "step": 48120 }, { "epoch": 44.89, "learning_rate": 4.4388526119402983e-05, "loss": 0.0001, "step": 48124 }, { "epoch": 44.9, "learning_rate": 4.438805970149254e-05, "loss": 0.0001, "step": 48128 }, { "epoch": 44.9, "learning_rate": 4.438759328358209e-05, "loss": 0.0003, "step": 48132 }, { "epoch": 44.9, "learning_rate": 4.438712686567165e-05, "loss": 0.0001, "step": 48136 }, { "epoch": 44.91, "learning_rate": 4.4386660447761196e-05, "loss": 0.0013, "step": 48140 }, { "epoch": 44.91, "learning_rate": 4.4386194029850744e-05, "loss": 0.0001, "step": 48144 }, { "epoch": 44.91, "learning_rate": 4.4385727611940306e-05, "loss": 0.0, "step": 48148 }, { "epoch": 44.92, "learning_rate": 4.4385261194029854e-05, "loss": 0.0, "step": 48152 }, { "epoch": 44.92, "learning_rate": 4.43847947761194e-05, "loss": 0.0, "step": 48156 }, { "epoch": 44.93, "learning_rate": 4.438432835820896e-05, "loss": 0.0001, "step": 48160 }, { "epoch": 44.93, "learning_rate": 4.438386194029851e-05, "loss": 0.0, "step": 48164 }, { "epoch": 44.93, "learning_rate": 4.438339552238806e-05, "loss": 0.0001, "step": 48168 }, { "epoch": 44.94, "learning_rate": 4.4382929104477615e-05, "loss": 0.0, "step": 48172 }, { "epoch": 44.94, "learning_rate": 4.438246268656716e-05, "loss": 0.0009, "step": 48176 }, { "epoch": 44.94, "learning_rate": 4.438199626865672e-05, "loss": 0.0, "step": 48180 }, { "epoch": 44.95, "learning_rate": 4.438152985074627e-05, "loss": 0.0001, "step": 48184 }, { "epoch": 44.95, "learning_rate": 4.438106343283582e-05, "loss": 0.0001, "step": 48188 }, { "epoch": 44.96, "learning_rate": 4.4380597014925376e-05, "loss": 0.0003, "step": 48192 }, { "epoch": 44.96, "learning_rate": 4.438013059701493e-05, "loss": 0.0, "step": 48196 }, { "epoch": 44.96, "learning_rate": 4.437966417910448e-05, "loss": 0.0, "step": 48200 }, { "epoch": 44.97, "learning_rate": 4.437919776119403e-05, "loss": 0.0, "step": 48204 }, { "epoch": 44.97, "learning_rate": 4.437873134328359e-05, "loss": 0.0, "step": 48208 }, { "epoch": 44.97, "learning_rate": 4.437826492537314e-05, "loss": 0.0002, "step": 48212 }, { "epoch": 44.98, "learning_rate": 4.4377798507462685e-05, "loss": 0.0, "step": 48216 }, { "epoch": 44.98, "learning_rate": 4.437733208955224e-05, "loss": 0.0, "step": 48220 }, { "epoch": 44.98, "learning_rate": 4.4376865671641795e-05, "loss": 0.0, "step": 48224 }, { "epoch": 44.99, "learning_rate": 4.437639925373134e-05, "loss": 0.0072, "step": 48228 }, { "epoch": 44.99, "learning_rate": 4.43759328358209e-05, "loss": 0.0, "step": 48232 }, { "epoch": 45.0, "learning_rate": 4.4375466417910446e-05, "loss": 0.0013, "step": 48236 }, { "epoch": 45.0, "learning_rate": 4.4375e-05, "loss": 0.0002, "step": 48240 }, { "epoch": 45.0, "learning_rate": 4.4374533582089556e-05, "loss": 0.0003, "step": 48244 }, { "epoch": 45.01, "learning_rate": 4.4374067164179104e-05, "loss": 0.0002, "step": 48248 }, { "epoch": 45.01, "learning_rate": 4.437360074626866e-05, "loss": 0.0009, "step": 48252 }, { "epoch": 45.01, "learning_rate": 4.4373134328358214e-05, "loss": 0.0, "step": 48256 }, { "epoch": 45.02, "learning_rate": 4.437266791044776e-05, "loss": 0.0001, "step": 48260 }, { "epoch": 45.02, "learning_rate": 4.437220149253731e-05, "loss": 0.0, "step": 48264 }, { "epoch": 45.03, "learning_rate": 4.437173507462687e-05, "loss": 0.0, "step": 48268 }, { "epoch": 45.03, "learning_rate": 4.437126865671642e-05, "loss": 0.0, "step": 48272 }, { "epoch": 45.03, "learning_rate": 4.437080223880597e-05, "loss": 0.0, "step": 48276 }, { "epoch": 45.04, "learning_rate": 4.437033582089552e-05, "loss": 0.0007, "step": 48280 }, { "epoch": 45.04, "learning_rate": 4.436986940298508e-05, "loss": 0.0005, "step": 48284 }, { "epoch": 45.04, "learning_rate": 4.4369402985074626e-05, "loss": 0.0, "step": 48288 }, { "epoch": 45.05, "learning_rate": 4.436893656716418e-05, "loss": 0.0002, "step": 48292 }, { "epoch": 45.05, "learning_rate": 4.436847014925373e-05, "loss": 0.0004, "step": 48296 }, { "epoch": 45.06, "learning_rate": 4.436800373134329e-05, "loss": 0.0005, "step": 48300 }, { "epoch": 45.06, "learning_rate": 4.436753731343284e-05, "loss": 0.0, "step": 48304 }, { "epoch": 45.06, "learning_rate": 4.436707089552239e-05, "loss": 0.0007, "step": 48308 }, { "epoch": 45.07, "learning_rate": 4.436660447761194e-05, "loss": 0.0, "step": 48312 }, { "epoch": 45.07, "learning_rate": 4.43661380597015e-05, "loss": 0.0, "step": 48316 }, { "epoch": 45.07, "learning_rate": 4.4365671641791045e-05, "loss": 0.0, "step": 48320 }, { "epoch": 45.08, "learning_rate": 4.43652052238806e-05, "loss": 0.001, "step": 48324 }, { "epoch": 45.08, "learning_rate": 4.4364738805970155e-05, "loss": 0.0001, "step": 48328 }, { "epoch": 45.09, "learning_rate": 4.43642723880597e-05, "loss": 0.001, "step": 48332 }, { "epoch": 45.09, "learning_rate": 4.436380597014926e-05, "loss": 0.0, "step": 48336 }, { "epoch": 45.09, "learning_rate": 4.4363339552238806e-05, "loss": 0.0, "step": 48340 }, { "epoch": 45.1, "learning_rate": 4.436287313432836e-05, "loss": 0.0044, "step": 48344 }, { "epoch": 45.1, "learning_rate": 4.4362406716417916e-05, "loss": 0.0, "step": 48348 }, { "epoch": 45.1, "learning_rate": 4.4361940298507464e-05, "loss": 0.0006, "step": 48352 }, { "epoch": 45.11, "learning_rate": 4.436147388059701e-05, "loss": 0.0, "step": 48356 }, { "epoch": 45.11, "learning_rate": 4.4361007462686574e-05, "loss": 0.0002, "step": 48360 }, { "epoch": 45.12, "learning_rate": 4.436054104477612e-05, "loss": 0.0002, "step": 48364 }, { "epoch": 45.12, "learning_rate": 4.436007462686567e-05, "loss": 0.0, "step": 48368 }, { "epoch": 45.12, "learning_rate": 4.4359608208955225e-05, "loss": 0.0002, "step": 48372 }, { "epoch": 45.13, "learning_rate": 4.435914179104478e-05, "loss": 0.0, "step": 48376 }, { "epoch": 45.13, "learning_rate": 4.435867537313433e-05, "loss": 0.0001, "step": 48380 }, { "epoch": 45.13, "learning_rate": 4.435820895522388e-05, "loss": 0.0, "step": 48384 }, { "epoch": 45.14, "learning_rate": 4.435774253731344e-05, "loss": 0.0, "step": 48388 }, { "epoch": 45.14, "learning_rate": 4.4357276119402986e-05, "loss": 0.0, "step": 48392 }, { "epoch": 45.15, "learning_rate": 4.435680970149254e-05, "loss": 0.0, "step": 48396 }, { "epoch": 45.15, "learning_rate": 4.435634328358209e-05, "loss": 0.0015, "step": 48400 }, { "epoch": 45.15, "learning_rate": 4.4355876865671644e-05, "loss": 0.0062, "step": 48404 }, { "epoch": 45.16, "learning_rate": 4.43554104477612e-05, "loss": 0.0, "step": 48408 }, { "epoch": 45.16, "learning_rate": 4.435494402985075e-05, "loss": 0.0, "step": 48412 }, { "epoch": 45.16, "learning_rate": 4.4354477611940295e-05, "loss": 0.0003, "step": 48416 }, { "epoch": 45.17, "learning_rate": 4.435401119402986e-05, "loss": 0.0006, "step": 48420 }, { "epoch": 45.17, "learning_rate": 4.4353544776119405e-05, "loss": 0.0, "step": 48424 }, { "epoch": 45.18, "learning_rate": 4.435307835820895e-05, "loss": 0.0, "step": 48428 }, { "epoch": 45.18, "learning_rate": 4.435261194029851e-05, "loss": 0.0, "step": 48432 }, { "epoch": 45.18, "learning_rate": 4.435214552238806e-05, "loss": 0.001, "step": 48436 }, { "epoch": 45.19, "learning_rate": 4.435167910447761e-05, "loss": 0.0001, "step": 48440 }, { "epoch": 45.19, "learning_rate": 4.4351212686567166e-05, "loss": 0.0, "step": 48444 }, { "epoch": 45.19, "learning_rate": 4.435074626865672e-05, "loss": 0.0001, "step": 48448 }, { "epoch": 45.2, "learning_rate": 4.435027985074627e-05, "loss": 0.0003, "step": 48452 }, { "epoch": 45.2, "learning_rate": 4.4349813432835824e-05, "loss": 0.0, "step": 48456 }, { "epoch": 45.21, "learning_rate": 4.434934701492537e-05, "loss": 0.0001, "step": 48460 }, { "epoch": 45.21, "learning_rate": 4.434888059701493e-05, "loss": 0.0004, "step": 48464 }, { "epoch": 45.21, "learning_rate": 4.434841417910448e-05, "loss": 0.0001, "step": 48468 }, { "epoch": 45.22, "learning_rate": 4.434794776119403e-05, "loss": 0.0, "step": 48472 }, { "epoch": 45.22, "learning_rate": 4.4347481343283585e-05, "loss": 0.0, "step": 48476 }, { "epoch": 45.22, "learning_rate": 4.434701492537314e-05, "loss": 0.0001, "step": 48480 }, { "epoch": 45.23, "learning_rate": 4.434654850746269e-05, "loss": 0.0009, "step": 48484 }, { "epoch": 45.23, "learning_rate": 4.434608208955224e-05, "loss": 0.0001, "step": 48488 }, { "epoch": 45.24, "learning_rate": 4.434561567164179e-05, "loss": 0.0, "step": 48492 }, { "epoch": 45.24, "learning_rate": 4.4345149253731346e-05, "loss": 0.0, "step": 48496 }, { "epoch": 45.24, "learning_rate": 4.43446828358209e-05, "loss": 0.0, "step": 48500 }, { "epoch": 45.24, "eval_exact_match": 0.7272727272727273, "eval_exec": 0.7659574468085106, "eval_loss": 0.433497816324234, "eval_runtime": 1151.4161, "eval_samples_per_second": 0.898, "step": 48500 }, { "epoch": 45.25, "learning_rate": 4.434421641791045e-05, "loss": 0.0001, "step": 48504 }, { "epoch": 45.25, "learning_rate": 4.4343750000000004e-05, "loss": 0.0, "step": 48508 }, { "epoch": 45.25, "learning_rate": 4.434328358208956e-05, "loss": 0.0, "step": 48512 }, { "epoch": 45.26, "learning_rate": 4.434281716417911e-05, "loss": 0.0, "step": 48516 }, { "epoch": 45.26, "learning_rate": 4.4342350746268655e-05, "loss": 0.0, "step": 48520 }, { "epoch": 45.26, "learning_rate": 4.434188432835821e-05, "loss": 0.0, "step": 48524 }, { "epoch": 45.27, "learning_rate": 4.4341417910447765e-05, "loss": 0.0001, "step": 48528 }, { "epoch": 45.27, "learning_rate": 4.434095149253731e-05, "loss": 0.0, "step": 48532 }, { "epoch": 45.28, "learning_rate": 4.434048507462687e-05, "loss": 0.004, "step": 48536 }, { "epoch": 45.28, "learning_rate": 4.434001865671642e-05, "loss": 0.0, "step": 48540 }, { "epoch": 45.28, "learning_rate": 4.433955223880597e-05, "loss": 0.0, "step": 48544 }, { "epoch": 45.29, "learning_rate": 4.4339085820895526e-05, "loss": 0.0, "step": 48548 }, { "epoch": 45.29, "learning_rate": 4.4338619402985074e-05, "loss": 0.0, "step": 48552 }, { "epoch": 45.29, "learning_rate": 4.433815298507463e-05, "loss": 0.0, "step": 48556 }, { "epoch": 45.3, "learning_rate": 4.4337686567164184e-05, "loss": 0.0008, "step": 48560 }, { "epoch": 45.3, "learning_rate": 4.433722014925373e-05, "loss": 0.0, "step": 48564 }, { "epoch": 45.31, "learning_rate": 4.433675373134329e-05, "loss": 0.0003, "step": 48568 }, { "epoch": 45.31, "learning_rate": 4.433628731343284e-05, "loss": 0.0, "step": 48572 }, { "epoch": 45.31, "learning_rate": 4.433582089552239e-05, "loss": 0.0, "step": 48576 }, { "epoch": 45.32, "learning_rate": 4.433535447761194e-05, "loss": 0.0, "step": 48580 }, { "epoch": 45.32, "learning_rate": 4.433488805970149e-05, "loss": 0.0, "step": 48584 }, { "epoch": 45.32, "learning_rate": 4.433442164179105e-05, "loss": 0.0, "step": 48588 }, { "epoch": 45.33, "learning_rate": 4.4333955223880596e-05, "loss": 0.0001, "step": 48592 }, { "epoch": 45.33, "learning_rate": 4.433348880597015e-05, "loss": 0.0, "step": 48596 }, { "epoch": 45.34, "learning_rate": 4.4333022388059706e-05, "loss": 0.0, "step": 48600 }, { "epoch": 45.34, "learning_rate": 4.4332555970149254e-05, "loss": 0.0, "step": 48604 }, { "epoch": 45.34, "learning_rate": 4.433208955223881e-05, "loss": 0.0002, "step": 48608 }, { "epoch": 45.35, "learning_rate": 4.433162313432836e-05, "loss": 0.0, "step": 48612 }, { "epoch": 45.35, "learning_rate": 4.433115671641792e-05, "loss": 0.0001, "step": 48616 }, { "epoch": 45.35, "learning_rate": 4.433069029850747e-05, "loss": 0.0, "step": 48620 }, { "epoch": 45.36, "learning_rate": 4.4330223880597015e-05, "loss": 0.0027, "step": 48624 }, { "epoch": 45.36, "learning_rate": 4.432975746268657e-05, "loss": 0.0, "step": 48628 }, { "epoch": 45.37, "learning_rate": 4.4329291044776125e-05, "loss": 0.0002, "step": 48632 }, { "epoch": 45.37, "learning_rate": 4.432882462686567e-05, "loss": 0.0001, "step": 48636 }, { "epoch": 45.37, "learning_rate": 4.432835820895523e-05, "loss": 0.0, "step": 48640 }, { "epoch": 45.38, "learning_rate": 4.4327891791044776e-05, "loss": 0.0001, "step": 48644 }, { "epoch": 45.38, "learning_rate": 4.432742537313433e-05, "loss": 0.0001, "step": 48648 }, { "epoch": 45.38, "learning_rate": 4.4326958955223886e-05, "loss": 0.0, "step": 48652 }, { "epoch": 45.39, "learning_rate": 4.4326492537313434e-05, "loss": 0.0001, "step": 48656 }, { "epoch": 45.39, "learning_rate": 4.432602611940299e-05, "loss": 0.0, "step": 48660 }, { "epoch": 45.4, "learning_rate": 4.4325559701492544e-05, "loss": 0.0, "step": 48664 }, { "epoch": 45.4, "learning_rate": 4.432509328358209e-05, "loss": 0.0, "step": 48668 }, { "epoch": 45.4, "learning_rate": 4.432462686567164e-05, "loss": 0.0, "step": 48672 }, { "epoch": 45.41, "learning_rate": 4.43241604477612e-05, "loss": 0.0, "step": 48676 }, { "epoch": 45.41, "learning_rate": 4.432369402985075e-05, "loss": 0.0001, "step": 48680 }, { "epoch": 45.41, "learning_rate": 4.43232276119403e-05, "loss": 0.0, "step": 48684 }, { "epoch": 45.42, "learning_rate": 4.432276119402985e-05, "loss": 0.0006, "step": 48688 }, { "epoch": 45.42, "learning_rate": 4.432229477611941e-05, "loss": 0.0, "step": 48692 }, { "epoch": 45.43, "learning_rate": 4.4321828358208956e-05, "loss": 0.0005, "step": 48696 }, { "epoch": 45.43, "learning_rate": 4.432136194029851e-05, "loss": 0.0001, "step": 48700 }, { "epoch": 45.43, "learning_rate": 4.432089552238806e-05, "loss": 0.0, "step": 48704 }, { "epoch": 45.44, "learning_rate": 4.4320429104477614e-05, "loss": 0.0, "step": 48708 }, { "epoch": 45.44, "learning_rate": 4.431996268656717e-05, "loss": 0.0005, "step": 48712 }, { "epoch": 45.44, "learning_rate": 4.431949626865672e-05, "loss": 0.0, "step": 48716 }, { "epoch": 45.45, "learning_rate": 4.431902985074627e-05, "loss": 0.0, "step": 48720 }, { "epoch": 45.45, "learning_rate": 4.4318563432835827e-05, "loss": 0.0, "step": 48724 }, { "epoch": 45.46, "learning_rate": 4.4318097014925375e-05, "loss": 0.0, "step": 48728 }, { "epoch": 45.46, "learning_rate": 4.431763059701492e-05, "loss": 0.0001, "step": 48732 }, { "epoch": 45.46, "learning_rate": 4.4317164179104484e-05, "loss": 0.0, "step": 48736 }, { "epoch": 45.47, "learning_rate": 4.431669776119403e-05, "loss": 0.0, "step": 48740 }, { "epoch": 45.47, "learning_rate": 4.431623134328358e-05, "loss": 0.0001, "step": 48744 }, { "epoch": 45.47, "learning_rate": 4.4315764925373136e-05, "loss": 0.0, "step": 48748 }, { "epoch": 45.48, "learning_rate": 4.431529850746269e-05, "loss": 0.0, "step": 48752 }, { "epoch": 45.48, "learning_rate": 4.431483208955224e-05, "loss": 0.0, "step": 48756 }, { "epoch": 45.49, "learning_rate": 4.4314365671641794e-05, "loss": 0.0, "step": 48760 }, { "epoch": 45.49, "learning_rate": 4.431389925373134e-05, "loss": 0.0001, "step": 48764 }, { "epoch": 45.49, "learning_rate": 4.43134328358209e-05, "loss": 0.0, "step": 48768 }, { "epoch": 45.5, "learning_rate": 4.431296641791045e-05, "loss": 0.0, "step": 48772 }, { "epoch": 45.5, "learning_rate": 4.43125e-05, "loss": 0.0003, "step": 48776 }, { "epoch": 45.5, "learning_rate": 4.4312033582089555e-05, "loss": 0.0, "step": 48780 }, { "epoch": 45.51, "learning_rate": 4.431156716417911e-05, "loss": 0.0, "step": 48784 }, { "epoch": 45.51, "learning_rate": 4.431110074626866e-05, "loss": 0.0022, "step": 48788 }, { "epoch": 45.51, "learning_rate": 4.431063432835821e-05, "loss": 0.0019, "step": 48792 }, { "epoch": 45.52, "learning_rate": 4.431016791044777e-05, "loss": 0.0002, "step": 48796 }, { "epoch": 45.52, "learning_rate": 4.4309701492537316e-05, "loss": 0.0, "step": 48800 }, { "epoch": 45.53, "learning_rate": 4.430923507462687e-05, "loss": 0.0001, "step": 48804 }, { "epoch": 45.53, "learning_rate": 4.430876865671642e-05, "loss": 0.0, "step": 48808 }, { "epoch": 45.53, "learning_rate": 4.4308302238805974e-05, "loss": 0.0002, "step": 48812 }, { "epoch": 45.54, "learning_rate": 4.430783582089553e-05, "loss": 0.0, "step": 48816 }, { "epoch": 45.54, "learning_rate": 4.4307369402985077e-05, "loss": 0.0, "step": 48820 }, { "epoch": 45.54, "learning_rate": 4.4306902985074625e-05, "loss": 0.0, "step": 48824 }, { "epoch": 45.55, "learning_rate": 4.4306436567164186e-05, "loss": 0.0001, "step": 48828 }, { "epoch": 45.55, "learning_rate": 4.4305970149253735e-05, "loss": 0.005, "step": 48832 }, { "epoch": 45.56, "learning_rate": 4.430550373134328e-05, "loss": 0.0, "step": 48836 }, { "epoch": 45.56, "learning_rate": 4.430503731343284e-05, "loss": 0.0, "step": 48840 }, { "epoch": 45.56, "learning_rate": 4.430457089552239e-05, "loss": 0.0001, "step": 48844 }, { "epoch": 45.57, "learning_rate": 4.430410447761194e-05, "loss": 0.0, "step": 48848 }, { "epoch": 45.57, "learning_rate": 4.4303638059701495e-05, "loss": 0.0, "step": 48852 }, { "epoch": 45.57, "learning_rate": 4.4303171641791044e-05, "loss": 0.0, "step": 48856 }, { "epoch": 45.58, "learning_rate": 4.43027052238806e-05, "loss": 0.0, "step": 48860 }, { "epoch": 45.58, "learning_rate": 4.4302238805970153e-05, "loss": 0.0, "step": 48864 }, { "epoch": 45.59, "learning_rate": 4.43017723880597e-05, "loss": 0.0, "step": 48868 }, { "epoch": 45.59, "learning_rate": 4.4301305970149256e-05, "loss": 0.0001, "step": 48872 }, { "epoch": 45.59, "learning_rate": 4.430083955223881e-05, "loss": 0.0, "step": 48876 }, { "epoch": 45.6, "learning_rate": 4.430037313432836e-05, "loss": 0.0, "step": 48880 }, { "epoch": 45.6, "learning_rate": 4.429990671641791e-05, "loss": 0.0, "step": 48884 }, { "epoch": 45.6, "learning_rate": 4.429944029850747e-05, "loss": 0.0, "step": 48888 }, { "epoch": 45.61, "learning_rate": 4.429897388059702e-05, "loss": 0.0, "step": 48892 }, { "epoch": 45.61, "learning_rate": 4.4298507462686566e-05, "loss": 0.0002, "step": 48896 }, { "epoch": 45.62, "learning_rate": 4.429804104477612e-05, "loss": 0.0, "step": 48900 }, { "epoch": 45.62, "learning_rate": 4.4297574626865675e-05, "loss": 0.0, "step": 48904 }, { "epoch": 45.62, "learning_rate": 4.4297108208955224e-05, "loss": 0.0, "step": 48908 }, { "epoch": 45.63, "learning_rate": 4.429664179104478e-05, "loss": 0.0, "step": 48912 }, { "epoch": 45.63, "learning_rate": 4.4296175373134327e-05, "loss": 0.0057, "step": 48916 }, { "epoch": 45.63, "learning_rate": 4.429570895522388e-05, "loss": 0.0, "step": 48920 }, { "epoch": 45.64, "learning_rate": 4.4295242537313436e-05, "loss": 0.0002, "step": 48924 }, { "epoch": 45.64, "learning_rate": 4.4294776119402985e-05, "loss": 0.0003, "step": 48928 }, { "epoch": 45.65, "learning_rate": 4.429430970149254e-05, "loss": 0.0, "step": 48932 }, { "epoch": 45.65, "learning_rate": 4.4293843283582094e-05, "loss": 0.0003, "step": 48936 }, { "epoch": 45.65, "learning_rate": 4.429337686567164e-05, "loss": 0.0, "step": 48940 }, { "epoch": 45.66, "learning_rate": 4.42929104477612e-05, "loss": 0.0, "step": 48944 }, { "epoch": 45.66, "learning_rate": 4.429244402985075e-05, "loss": 0.0, "step": 48948 }, { "epoch": 45.66, "learning_rate": 4.42919776119403e-05, "loss": 0.0, "step": 48952 }, { "epoch": 45.67, "learning_rate": 4.4291511194029855e-05, "loss": 0.0, "step": 48956 }, { "epoch": 45.67, "learning_rate": 4.4291044776119403e-05, "loss": 0.0001, "step": 48960 }, { "epoch": 45.68, "learning_rate": 4.429057835820896e-05, "loss": 0.0, "step": 48964 }, { "epoch": 45.68, "learning_rate": 4.429011194029851e-05, "loss": 0.0, "step": 48968 }, { "epoch": 45.68, "learning_rate": 4.428964552238806e-05, "loss": 0.0004, "step": 48972 }, { "epoch": 45.69, "learning_rate": 4.428917910447761e-05, "loss": 0.0, "step": 48976 }, { "epoch": 45.69, "learning_rate": 4.428871268656717e-05, "loss": 0.0, "step": 48980 }, { "epoch": 45.69, "learning_rate": 4.428824626865672e-05, "loss": 0.0, "step": 48984 }, { "epoch": 45.7, "learning_rate": 4.428777985074627e-05, "loss": 0.0, "step": 48988 }, { "epoch": 45.7, "learning_rate": 4.428731343283582e-05, "loss": 0.0, "step": 48992 }, { "epoch": 45.71, "learning_rate": 4.428684701492538e-05, "loss": 0.0, "step": 48996 }, { "epoch": 45.71, "learning_rate": 4.4286380597014925e-05, "loss": 0.001, "step": 49000 }, { "epoch": 45.71, "eval_exact_match": 0.7321083172147002, "eval_exec": 0.7707930367504836, "eval_loss": 0.4455174505710602, "eval_runtime": 1081.8406, "eval_samples_per_second": 0.956, "step": 49000 }, { "epoch": 45.71, "learning_rate": 4.428591417910448e-05, "loss": 0.0, "step": 49004 }, { "epoch": 45.72, "learning_rate": 4.4285447761194035e-05, "loss": 0.0001, "step": 49008 }, { "epoch": 45.72, "learning_rate": 4.428498134328358e-05, "loss": 0.0005, "step": 49012 }, { "epoch": 45.72, "learning_rate": 4.428451492537314e-05, "loss": 0.0, "step": 49016 }, { "epoch": 45.73, "learning_rate": 4.4284048507462686e-05, "loss": 0.0001, "step": 49020 }, { "epoch": 45.73, "learning_rate": 4.428358208955224e-05, "loss": 0.0, "step": 49024 }, { "epoch": 45.73, "learning_rate": 4.4283115671641796e-05, "loss": 0.0, "step": 49028 }, { "epoch": 45.74, "learning_rate": 4.4282649253731344e-05, "loss": 0.0002, "step": 49032 }, { "epoch": 45.74, "learning_rate": 4.428218283582089e-05, "loss": 0.0002, "step": 49036 }, { "epoch": 45.75, "learning_rate": 4.4281716417910454e-05, "loss": 0.0, "step": 49040 }, { "epoch": 45.75, "learning_rate": 4.428125e-05, "loss": 0.0, "step": 49044 }, { "epoch": 45.75, "learning_rate": 4.428078358208955e-05, "loss": 0.0, "step": 49048 }, { "epoch": 45.76, "learning_rate": 4.4280317164179105e-05, "loss": 0.0, "step": 49052 }, { "epoch": 45.76, "learning_rate": 4.427985074626866e-05, "loss": 0.0001, "step": 49056 }, { "epoch": 45.76, "learning_rate": 4.427938432835821e-05, "loss": 0.0003, "step": 49060 }, { "epoch": 45.77, "learning_rate": 4.427891791044776e-05, "loss": 0.0, "step": 49064 }, { "epoch": 45.77, "learning_rate": 4.427845149253732e-05, "loss": 0.0, "step": 49068 }, { "epoch": 45.78, "learning_rate": 4.4277985074626866e-05, "loss": 0.0001, "step": 49072 }, { "epoch": 45.78, "learning_rate": 4.427751865671642e-05, "loss": 0.0001, "step": 49076 }, { "epoch": 45.78, "learning_rate": 4.427705223880597e-05, "loss": 0.0, "step": 49080 }, { "epoch": 45.79, "learning_rate": 4.4276585820895524e-05, "loss": 0.0, "step": 49084 }, { "epoch": 45.79, "learning_rate": 4.427611940298508e-05, "loss": 0.0001, "step": 49088 }, { "epoch": 45.79, "learning_rate": 4.427565298507463e-05, "loss": 0.0011, "step": 49092 }, { "epoch": 45.8, "learning_rate": 4.4275186567164175e-05, "loss": 0.0001, "step": 49096 }, { "epoch": 45.8, "learning_rate": 4.427472014925374e-05, "loss": 0.0, "step": 49100 }, { "epoch": 45.81, "learning_rate": 4.4274253731343285e-05, "loss": 0.0, "step": 49104 }, { "epoch": 45.81, "learning_rate": 4.427378731343284e-05, "loss": 0.0, "step": 49108 }, { "epoch": 45.81, "learning_rate": 4.427332089552239e-05, "loss": 0.0, "step": 49112 }, { "epoch": 45.82, "learning_rate": 4.427285447761194e-05, "loss": 0.0001, "step": 49116 }, { "epoch": 45.82, "learning_rate": 4.42723880597015e-05, "loss": 0.0, "step": 49120 }, { "epoch": 45.82, "learning_rate": 4.4271921641791046e-05, "loss": 0.0005, "step": 49124 }, { "epoch": 45.83, "learning_rate": 4.42714552238806e-05, "loss": 0.0001, "step": 49128 }, { "epoch": 45.83, "learning_rate": 4.4270988805970156e-05, "loss": 0.0, "step": 49132 }, { "epoch": 45.84, "learning_rate": 4.4270522388059704e-05, "loss": 0.0, "step": 49136 }, { "epoch": 45.84, "learning_rate": 4.427005597014925e-05, "loss": 0.0, "step": 49140 }, { "epoch": 45.84, "learning_rate": 4.426958955223881e-05, "loss": 0.0005, "step": 49144 }, { "epoch": 45.85, "learning_rate": 4.426912313432836e-05, "loss": 0.0, "step": 49148 }, { "epoch": 45.85, "learning_rate": 4.426865671641791e-05, "loss": 0.0001, "step": 49152 }, { "epoch": 45.85, "learning_rate": 4.4268190298507465e-05, "loss": 0.0, "step": 49156 }, { "epoch": 45.86, "learning_rate": 4.426772388059702e-05, "loss": 0.0, "step": 49160 }, { "epoch": 45.86, "learning_rate": 4.426725746268657e-05, "loss": 0.0, "step": 49164 }, { "epoch": 45.87, "learning_rate": 4.426679104477612e-05, "loss": 0.0, "step": 49168 }, { "epoch": 45.87, "learning_rate": 4.426632462686567e-05, "loss": 0.0001, "step": 49172 }, { "epoch": 45.87, "learning_rate": 4.4265858208955226e-05, "loss": 0.0, "step": 49176 }, { "epoch": 45.88, "learning_rate": 4.426539179104478e-05, "loss": 0.0014, "step": 49180 }, { "epoch": 45.88, "learning_rate": 4.426492537313433e-05, "loss": 0.0, "step": 49184 }, { "epoch": 45.88, "learning_rate": 4.4264458955223884e-05, "loss": 0.0, "step": 49188 }, { "epoch": 45.89, "learning_rate": 4.426399253731344e-05, "loss": 0.0, "step": 49192 }, { "epoch": 45.89, "learning_rate": 4.426352611940299e-05, "loss": 0.0012, "step": 49196 }, { "epoch": 45.9, "learning_rate": 4.4263059701492535e-05, "loss": 0.0, "step": 49200 }, { "epoch": 45.9, "learning_rate": 4.426259328358209e-05, "loss": 0.0, "step": 49204 }, { "epoch": 45.9, "learning_rate": 4.4262126865671645e-05, "loss": 0.0, "step": 49208 }, { "epoch": 45.91, "learning_rate": 4.426166044776119e-05, "loss": 0.0, "step": 49212 }, { "epoch": 45.91, "learning_rate": 4.426119402985075e-05, "loss": 0.0, "step": 49216 }, { "epoch": 45.91, "learning_rate": 4.42607276119403e-05, "loss": 0.0003, "step": 49220 }, { "epoch": 45.92, "learning_rate": 4.426026119402985e-05, "loss": 0.0004, "step": 49224 }, { "epoch": 45.92, "learning_rate": 4.4259794776119406e-05, "loss": 0.0, "step": 49228 }, { "epoch": 45.93, "learning_rate": 4.4259328358208954e-05, "loss": 0.0, "step": 49232 }, { "epoch": 45.93, "learning_rate": 4.425886194029851e-05, "loss": 0.0021, "step": 49236 }, { "epoch": 45.93, "learning_rate": 4.4258395522388064e-05, "loss": 0.0017, "step": 49240 }, { "epoch": 45.94, "learning_rate": 4.425792910447761e-05, "loss": 0.0001, "step": 49244 }, { "epoch": 45.94, "learning_rate": 4.425746268656717e-05, "loss": 0.0, "step": 49248 }, { "epoch": 45.94, "learning_rate": 4.425699626865672e-05, "loss": 0.0017, "step": 49252 }, { "epoch": 45.95, "learning_rate": 4.425652985074627e-05, "loss": 0.0, "step": 49256 }, { "epoch": 45.95, "learning_rate": 4.425606343283582e-05, "loss": 0.0, "step": 49260 }, { "epoch": 45.96, "learning_rate": 4.425559701492537e-05, "loss": 0.0001, "step": 49264 }, { "epoch": 45.96, "learning_rate": 4.425513059701493e-05, "loss": 0.0, "step": 49268 }, { "epoch": 45.96, "learning_rate": 4.425466417910448e-05, "loss": 0.0034, "step": 49272 }, { "epoch": 45.97, "learning_rate": 4.425419776119403e-05, "loss": 0.0, "step": 49276 }, { "epoch": 45.97, "learning_rate": 4.4253731343283586e-05, "loss": 0.0001, "step": 49280 }, { "epoch": 45.97, "learning_rate": 4.425326492537314e-05, "loss": 0.0, "step": 49284 }, { "epoch": 45.98, "learning_rate": 4.425279850746269e-05, "loss": 0.0, "step": 49288 }, { "epoch": 45.98, "learning_rate": 4.425233208955224e-05, "loss": 0.0004, "step": 49292 }, { "epoch": 45.98, "learning_rate": 4.42518656716418e-05, "loss": 0.0001, "step": 49296 }, { "epoch": 45.99, "learning_rate": 4.425139925373135e-05, "loss": 0.0, "step": 49300 }, { "epoch": 45.99, "learning_rate": 4.4250932835820895e-05, "loss": 0.0, "step": 49304 }, { "epoch": 46.0, "learning_rate": 4.425046641791045e-05, "loss": 0.0039, "step": 49308 }, { "epoch": 46.0, "learning_rate": 4.4250000000000005e-05, "loss": 0.0, "step": 49312 }, { "epoch": 46.0, "learning_rate": 4.424953358208955e-05, "loss": 0.0001, "step": 49316 }, { "epoch": 46.01, "learning_rate": 4.424906716417911e-05, "loss": 0.0008, "step": 49320 }, { "epoch": 46.01, "learning_rate": 4.4248600746268656e-05, "loss": 0.0, "step": 49324 }, { "epoch": 46.01, "learning_rate": 4.424813432835821e-05, "loss": 0.0004, "step": 49328 }, { "epoch": 46.02, "learning_rate": 4.4247667910447766e-05, "loss": 0.0, "step": 49332 }, { "epoch": 46.02, "learning_rate": 4.4247201492537314e-05, "loss": 0.0, "step": 49336 }, { "epoch": 46.03, "learning_rate": 4.424673507462687e-05, "loss": 0.0, "step": 49340 }, { "epoch": 46.03, "learning_rate": 4.4246268656716424e-05, "loss": 0.0, "step": 49344 }, { "epoch": 46.03, "learning_rate": 4.424580223880597e-05, "loss": 0.0, "step": 49348 }, { "epoch": 46.04, "learning_rate": 4.424533582089552e-05, "loss": 0.0, "step": 49352 }, { "epoch": 46.04, "learning_rate": 4.424486940298508e-05, "loss": 0.0, "step": 49356 }, { "epoch": 46.04, "learning_rate": 4.424440298507463e-05, "loss": 0.0, "step": 49360 }, { "epoch": 46.05, "learning_rate": 4.424393656716418e-05, "loss": 0.0, "step": 49364 }, { "epoch": 46.05, "learning_rate": 4.424347014925373e-05, "loss": 0.0065, "step": 49368 }, { "epoch": 46.06, "learning_rate": 4.424300373134329e-05, "loss": 0.0, "step": 49372 }, { "epoch": 46.06, "learning_rate": 4.4242537313432836e-05, "loss": 0.0, "step": 49376 }, { "epoch": 46.06, "learning_rate": 4.424207089552239e-05, "loss": 0.0, "step": 49380 }, { "epoch": 46.07, "learning_rate": 4.424160447761194e-05, "loss": 0.0014, "step": 49384 }, { "epoch": 46.07, "learning_rate": 4.4241138059701494e-05, "loss": 0.0, "step": 49388 }, { "epoch": 46.07, "learning_rate": 4.424067164179105e-05, "loss": 0.0, "step": 49392 }, { "epoch": 46.08, "learning_rate": 4.42402052238806e-05, "loss": 0.0001, "step": 49396 }, { "epoch": 46.08, "learning_rate": 4.423973880597015e-05, "loss": 0.0001, "step": 49400 }, { "epoch": 46.09, "learning_rate": 4.423927238805971e-05, "loss": 0.0, "step": 49404 }, { "epoch": 46.09, "learning_rate": 4.4238805970149255e-05, "loss": 0.0, "step": 49408 }, { "epoch": 46.09, "learning_rate": 4.42383395522388e-05, "loss": 0.0, "step": 49412 }, { "epoch": 46.1, "learning_rate": 4.4237873134328365e-05, "loss": 0.0, "step": 49416 }, { "epoch": 46.1, "learning_rate": 4.423740671641791e-05, "loss": 0.0, "step": 49420 }, { "epoch": 46.1, "learning_rate": 4.423694029850746e-05, "loss": 0.0001, "step": 49424 }, { "epoch": 46.11, "learning_rate": 4.4236473880597016e-05, "loss": 0.0, "step": 49428 }, { "epoch": 46.11, "learning_rate": 4.423600746268657e-05, "loss": 0.0, "step": 49432 }, { "epoch": 46.12, "learning_rate": 4.4235541044776126e-05, "loss": 0.0, "step": 49436 }, { "epoch": 46.12, "learning_rate": 4.4235074626865674e-05, "loss": 0.0, "step": 49440 }, { "epoch": 46.12, "learning_rate": 4.423460820895522e-05, "loss": 0.0, "step": 49444 }, { "epoch": 46.13, "learning_rate": 4.4234141791044784e-05, "loss": 0.0001, "step": 49448 }, { "epoch": 46.13, "learning_rate": 4.423367537313433e-05, "loss": 0.0002, "step": 49452 }, { "epoch": 46.13, "learning_rate": 4.423320895522388e-05, "loss": 0.0, "step": 49456 }, { "epoch": 46.14, "learning_rate": 4.4232742537313435e-05, "loss": 0.0001, "step": 49460 }, { "epoch": 46.14, "learning_rate": 4.423227611940299e-05, "loss": 0.0061, "step": 49464 }, { "epoch": 46.15, "learning_rate": 4.423180970149254e-05, "loss": 0.0004, "step": 49468 }, { "epoch": 46.15, "learning_rate": 4.423134328358209e-05, "loss": 0.0, "step": 49472 }, { "epoch": 46.15, "learning_rate": 4.423087686567165e-05, "loss": 0.0001, "step": 49476 }, { "epoch": 46.16, "learning_rate": 4.4230410447761196e-05, "loss": 0.0, "step": 49480 }, { "epoch": 46.16, "learning_rate": 4.422994402985075e-05, "loss": 0.0, "step": 49484 }, { "epoch": 46.16, "learning_rate": 4.42294776119403e-05, "loss": 0.0002, "step": 49488 }, { "epoch": 46.17, "learning_rate": 4.4229011194029854e-05, "loss": 0.0, "step": 49492 }, { "epoch": 46.17, "learning_rate": 4.422854477611941e-05, "loss": 0.0, "step": 49496 }, { "epoch": 46.18, "learning_rate": 4.422807835820896e-05, "loss": 0.0006, "step": 49500 }, { "epoch": 46.18, "eval_exact_match": 0.7214700193423598, "eval_exec": 0.7572533849129593, "eval_loss": 0.45067957043647766, "eval_runtime": 1072.9162, "eval_samples_per_second": 0.964, "step": 49500 }, { "epoch": 46.18, "learning_rate": 4.4227611940298505e-05, "loss": 0.0001, "step": 49504 }, { "epoch": 46.18, "learning_rate": 4.422714552238807e-05, "loss": 0.0, "step": 49508 }, { "epoch": 46.19, "learning_rate": 4.4226679104477615e-05, "loss": 0.0001, "step": 49512 }, { "epoch": 46.19, "learning_rate": 4.422621268656716e-05, "loss": 0.0, "step": 49516 }, { "epoch": 46.19, "learning_rate": 4.422574626865672e-05, "loss": 0.0001, "step": 49520 }, { "epoch": 46.2, "learning_rate": 4.422527985074627e-05, "loss": 0.0009, "step": 49524 }, { "epoch": 46.2, "learning_rate": 4.422481343283582e-05, "loss": 0.0, "step": 49528 }, { "epoch": 46.21, "learning_rate": 4.4224347014925376e-05, "loss": 0.0, "step": 49532 }, { "epoch": 46.21, "learning_rate": 4.4223880597014924e-05, "loss": 0.0, "step": 49536 }, { "epoch": 46.21, "learning_rate": 4.422341417910448e-05, "loss": 0.0001, "step": 49540 }, { "epoch": 46.22, "learning_rate": 4.4222947761194034e-05, "loss": 0.0, "step": 49544 }, { "epoch": 46.22, "learning_rate": 4.422248134328358e-05, "loss": 0.0009, "step": 49548 }, { "epoch": 46.22, "learning_rate": 4.422201492537314e-05, "loss": 0.0003, "step": 49552 }, { "epoch": 46.23, "learning_rate": 4.422154850746269e-05, "loss": 0.0022, "step": 49556 }, { "epoch": 46.23, "learning_rate": 4.422108208955224e-05, "loss": 0.0003, "step": 49560 }, { "epoch": 46.24, "learning_rate": 4.422061567164179e-05, "loss": 0.0001, "step": 49564 }, { "epoch": 46.24, "learning_rate": 4.422014925373135e-05, "loss": 0.0, "step": 49568 }, { "epoch": 46.24, "learning_rate": 4.42196828358209e-05, "loss": 0.0003, "step": 49572 }, { "epoch": 46.25, "learning_rate": 4.4219216417910446e-05, "loss": 0.0, "step": 49576 }, { "epoch": 46.25, "learning_rate": 4.421875e-05, "loss": 0.0, "step": 49580 }, { "epoch": 46.25, "learning_rate": 4.4218283582089556e-05, "loss": 0.0, "step": 49584 }, { "epoch": 46.26, "learning_rate": 4.4217817164179104e-05, "loss": 0.0004, "step": 49588 }, { "epoch": 46.26, "learning_rate": 4.421735074626866e-05, "loss": 0.0, "step": 49592 }, { "epoch": 46.26, "learning_rate": 4.421688432835821e-05, "loss": 0.0, "step": 49596 }, { "epoch": 46.27, "learning_rate": 4.421641791044777e-05, "loss": 0.0, "step": 49600 }, { "epoch": 46.27, "learning_rate": 4.421595149253732e-05, "loss": 0.0006, "step": 49604 }, { "epoch": 46.28, "learning_rate": 4.4215485074626865e-05, "loss": 0.0, "step": 49608 }, { "epoch": 46.28, "learning_rate": 4.421501865671642e-05, "loss": 0.0, "step": 49612 }, { "epoch": 46.28, "learning_rate": 4.4214552238805975e-05, "loss": 0.0008, "step": 49616 }, { "epoch": 46.29, "learning_rate": 4.421408582089552e-05, "loss": 0.0, "step": 49620 }, { "epoch": 46.29, "learning_rate": 4.421361940298508e-05, "loss": 0.0, "step": 49624 }, { "epoch": 46.29, "learning_rate": 4.421315298507463e-05, "loss": 0.0, "step": 49628 }, { "epoch": 46.3, "learning_rate": 4.421268656716418e-05, "loss": 0.0032, "step": 49632 }, { "epoch": 46.3, "learning_rate": 4.4212220149253736e-05, "loss": 0.0, "step": 49636 }, { "epoch": 46.31, "learning_rate": 4.4211753731343284e-05, "loss": 0.0001, "step": 49640 }, { "epoch": 46.31, "learning_rate": 4.421128731343284e-05, "loss": 0.0003, "step": 49644 }, { "epoch": 46.31, "learning_rate": 4.4210820895522394e-05, "loss": 0.0011, "step": 49648 }, { "epoch": 46.32, "learning_rate": 4.421035447761194e-05, "loss": 0.0, "step": 49652 }, { "epoch": 46.32, "learning_rate": 4.420988805970149e-05, "loss": 0.0, "step": 49656 }, { "epoch": 46.32, "learning_rate": 4.420942164179105e-05, "loss": 0.0, "step": 49660 }, { "epoch": 46.33, "learning_rate": 4.42089552238806e-05, "loss": 0.0, "step": 49664 }, { "epoch": 46.33, "learning_rate": 4.420848880597015e-05, "loss": 0.0004, "step": 49668 }, { "epoch": 46.34, "learning_rate": 4.42080223880597e-05, "loss": 0.0, "step": 49672 }, { "epoch": 46.34, "learning_rate": 4.420755597014926e-05, "loss": 0.0007, "step": 49676 }, { "epoch": 46.34, "learning_rate": 4.4207089552238806e-05, "loss": 0.0, "step": 49680 }, { "epoch": 46.35, "learning_rate": 4.420662313432836e-05, "loss": 0.0001, "step": 49684 }, { "epoch": 46.35, "learning_rate": 4.4206156716417916e-05, "loss": 0.0, "step": 49688 }, { "epoch": 46.35, "learning_rate": 4.4205690298507464e-05, "loss": 0.0006, "step": 49692 }, { "epoch": 46.36, "learning_rate": 4.420522388059702e-05, "loss": 0.0027, "step": 49696 }, { "epoch": 46.36, "learning_rate": 4.420475746268657e-05, "loss": 0.007, "step": 49700 }, { "epoch": 46.37, "learning_rate": 4.420429104477612e-05, "loss": 0.0, "step": 49704 }, { "epoch": 46.37, "learning_rate": 4.4203824626865677e-05, "loss": 0.0, "step": 49708 }, { "epoch": 46.37, "learning_rate": 4.4203358208955225e-05, "loss": 0.0001, "step": 49712 }, { "epoch": 46.38, "learning_rate": 4.420289179104477e-05, "loss": 0.0, "step": 49716 }, { "epoch": 46.38, "learning_rate": 4.4202425373134334e-05, "loss": 0.0003, "step": 49720 }, { "epoch": 46.38, "learning_rate": 4.420195895522388e-05, "loss": 0.0004, "step": 49724 }, { "epoch": 46.39, "learning_rate": 4.420149253731343e-05, "loss": 0.0, "step": 49728 }, { "epoch": 46.39, "learning_rate": 4.4201026119402986e-05, "loss": 0.0, "step": 49732 }, { "epoch": 46.4, "learning_rate": 4.420055970149254e-05, "loss": 0.0, "step": 49736 }, { "epoch": 46.4, "learning_rate": 4.420009328358209e-05, "loss": 0.0, "step": 49740 }, { "epoch": 46.4, "learning_rate": 4.4199626865671644e-05, "loss": 0.0, "step": 49744 }, { "epoch": 46.41, "learning_rate": 4.41991604477612e-05, "loss": 0.0, "step": 49748 }, { "epoch": 46.41, "learning_rate": 4.419869402985075e-05, "loss": 0.0, "step": 49752 }, { "epoch": 46.41, "learning_rate": 4.41982276119403e-05, "loss": 0.0, "step": 49756 }, { "epoch": 46.42, "learning_rate": 4.419776119402985e-05, "loss": 0.0004, "step": 49760 }, { "epoch": 46.42, "learning_rate": 4.4197294776119405e-05, "loss": 0.0001, "step": 49764 }, { "epoch": 46.43, "learning_rate": 4.419682835820896e-05, "loss": 0.0, "step": 49768 }, { "epoch": 46.43, "learning_rate": 4.419636194029851e-05, "loss": 0.0, "step": 49772 }, { "epoch": 46.43, "learning_rate": 4.419589552238806e-05, "loss": 0.0, "step": 49776 }, { "epoch": 46.44, "learning_rate": 4.419542910447762e-05, "loss": 0.0001, "step": 49780 }, { "epoch": 46.44, "learning_rate": 4.4194962686567166e-05, "loss": 0.0, "step": 49784 }, { "epoch": 46.44, "learning_rate": 4.419449626865672e-05, "loss": 0.0, "step": 49788 }, { "epoch": 46.45, "learning_rate": 4.419402985074627e-05, "loss": 0.0001, "step": 49792 }, { "epoch": 46.45, "learning_rate": 4.4193563432835823e-05, "loss": 0.0, "step": 49796 }, { "epoch": 46.46, "learning_rate": 4.419309701492538e-05, "loss": 0.0, "step": 49800 }, { "epoch": 46.46, "learning_rate": 4.4192630597014927e-05, "loss": 0.0, "step": 49804 }, { "epoch": 46.46, "learning_rate": 4.419216417910448e-05, "loss": 0.0, "step": 49808 }, { "epoch": 46.47, "learning_rate": 4.4191697761194036e-05, "loss": 0.0, "step": 49812 }, { "epoch": 46.47, "learning_rate": 4.4191231343283584e-05, "loss": 0.0, "step": 49816 }, { "epoch": 46.47, "learning_rate": 4.419076492537313e-05, "loss": 0.001, "step": 49820 }, { "epoch": 46.48, "learning_rate": 4.419029850746269e-05, "loss": 0.0, "step": 49824 }, { "epoch": 46.48, "learning_rate": 4.418983208955224e-05, "loss": 0.0006, "step": 49828 }, { "epoch": 46.49, "learning_rate": 4.418936567164179e-05, "loss": 0.0, "step": 49832 }, { "epoch": 46.49, "learning_rate": 4.4188899253731345e-05, "loss": 0.001, "step": 49836 }, { "epoch": 46.49, "learning_rate": 4.41884328358209e-05, "loss": 0.0, "step": 49840 }, { "epoch": 46.5, "learning_rate": 4.418796641791045e-05, "loss": 0.0, "step": 49844 }, { "epoch": 46.5, "learning_rate": 4.4187500000000003e-05, "loss": 0.0, "step": 49848 }, { "epoch": 46.5, "learning_rate": 4.418703358208955e-05, "loss": 0.0013, "step": 49852 }, { "epoch": 46.51, "learning_rate": 4.4186567164179106e-05, "loss": 0.0, "step": 49856 }, { "epoch": 46.51, "learning_rate": 4.418610074626866e-05, "loss": 0.0, "step": 49860 }, { "epoch": 46.51, "learning_rate": 4.418563432835821e-05, "loss": 0.0004, "step": 49864 }, { "epoch": 46.52, "learning_rate": 4.4185167910447764e-05, "loss": 0.0, "step": 49868 }, { "epoch": 46.52, "learning_rate": 4.418470149253732e-05, "loss": 0.0006, "step": 49872 }, { "epoch": 46.53, "learning_rate": 4.418423507462687e-05, "loss": 0.0, "step": 49876 }, { "epoch": 46.53, "learning_rate": 4.4183768656716416e-05, "loss": 0.0, "step": 49880 }, { "epoch": 46.53, "learning_rate": 4.418330223880597e-05, "loss": 0.0001, "step": 49884 }, { "epoch": 46.54, "learning_rate": 4.4182835820895525e-05, "loss": 0.0, "step": 49888 }, { "epoch": 46.54, "learning_rate": 4.4182369402985074e-05, "loss": 0.0, "step": 49892 }, { "epoch": 46.54, "learning_rate": 4.418190298507463e-05, "loss": 0.0013, "step": 49896 }, { "epoch": 46.55, "learning_rate": 4.418143656716418e-05, "loss": 0.0, "step": 49900 }, { "epoch": 46.55, "learning_rate": 4.418097014925373e-05, "loss": 0.0, "step": 49904 }, { "epoch": 46.56, "learning_rate": 4.4180503731343286e-05, "loss": 0.0012, "step": 49908 }, { "epoch": 46.56, "learning_rate": 4.4180037313432835e-05, "loss": 0.0005, "step": 49912 }, { "epoch": 46.56, "learning_rate": 4.417957089552239e-05, "loss": 0.0, "step": 49916 }, { "epoch": 46.57, "learning_rate": 4.4179104477611944e-05, "loss": 0.0027, "step": 49920 }, { "epoch": 46.57, "learning_rate": 4.417863805970149e-05, "loss": 0.0001, "step": 49924 }, { "epoch": 46.57, "learning_rate": 4.417817164179105e-05, "loss": 0.0, "step": 49928 }, { "epoch": 46.58, "learning_rate": 4.41777052238806e-05, "loss": 0.0, "step": 49932 }, { "epoch": 46.58, "learning_rate": 4.417723880597015e-05, "loss": 0.0002, "step": 49936 }, { "epoch": 46.59, "learning_rate": 4.4176772388059705e-05, "loss": 0.0, "step": 49940 }, { "epoch": 46.59, "learning_rate": 4.4176305970149253e-05, "loss": 0.0, "step": 49944 }, { "epoch": 46.59, "learning_rate": 4.417583955223881e-05, "loss": 0.0028, "step": 49948 }, { "epoch": 46.6, "learning_rate": 4.417537313432836e-05, "loss": 0.0, "step": 49952 }, { "epoch": 46.6, "learning_rate": 4.417490671641791e-05, "loss": 0.0002, "step": 49956 }, { "epoch": 46.6, "learning_rate": 4.4174440298507466e-05, "loss": 0.0001, "step": 49960 }, { "epoch": 46.61, "learning_rate": 4.417397388059702e-05, "loss": 0.0, "step": 49964 }, { "epoch": 46.61, "learning_rate": 4.417350746268657e-05, "loss": 0.0, "step": 49968 }, { "epoch": 46.62, "learning_rate": 4.417304104477612e-05, "loss": 0.0, "step": 49972 }, { "epoch": 46.62, "learning_rate": 4.417257462686568e-05, "loss": 0.0, "step": 49976 }, { "epoch": 46.62, "learning_rate": 4.417210820895523e-05, "loss": 0.0004, "step": 49980 }, { "epoch": 46.63, "learning_rate": 4.4171641791044775e-05, "loss": 0.0001, "step": 49984 }, { "epoch": 46.63, "learning_rate": 4.417117537313433e-05, "loss": 0.0002, "step": 49988 }, { "epoch": 46.63, "learning_rate": 4.4170708955223885e-05, "loss": 0.0, "step": 49992 }, { "epoch": 46.64, "learning_rate": 4.417024253731343e-05, "loss": 0.0, "step": 49996 }, { "epoch": 46.64, "learning_rate": 4.416977611940299e-05, "loss": 0.0001, "step": 50000 }, { "epoch": 46.64, "eval_exact_match": 0.7156673114119922, "eval_exec": 0.7485493230174082, "eval_loss": 0.4576376974582672, "eval_runtime": 1189.333, "eval_samples_per_second": 0.869, "step": 50000 }, { "epoch": 46.65, "learning_rate": 4.4169309701492536e-05, "loss": 0.0001, "step": 50004 }, { "epoch": 46.65, "learning_rate": 4.416884328358209e-05, "loss": 0.0004, "step": 50008 }, { "epoch": 46.65, "learning_rate": 4.4168376865671646e-05, "loss": 0.0, "step": 50012 }, { "epoch": 46.66, "learning_rate": 4.4167910447761194e-05, "loss": 0.0, "step": 50016 }, { "epoch": 46.66, "learning_rate": 4.416744402985075e-05, "loss": 0.0001, "step": 50020 }, { "epoch": 46.66, "learning_rate": 4.4166977611940304e-05, "loss": 0.0, "step": 50024 }, { "epoch": 46.67, "learning_rate": 4.416651119402985e-05, "loss": 0.0, "step": 50028 }, { "epoch": 46.67, "learning_rate": 4.41660447761194e-05, "loss": 0.0, "step": 50032 }, { "epoch": 46.68, "learning_rate": 4.416557835820896e-05, "loss": 0.0002, "step": 50036 }, { "epoch": 46.68, "learning_rate": 4.416511194029851e-05, "loss": 0.0, "step": 50040 }, { "epoch": 46.68, "learning_rate": 4.416464552238806e-05, "loss": 0.0, "step": 50044 }, { "epoch": 46.69, "learning_rate": 4.416417910447761e-05, "loss": 0.0041, "step": 50048 }, { "epoch": 46.69, "learning_rate": 4.416371268656717e-05, "loss": 0.0001, "step": 50052 }, { "epoch": 46.69, "learning_rate": 4.4163246268656716e-05, "loss": 0.0001, "step": 50056 }, { "epoch": 46.7, "learning_rate": 4.416277985074627e-05, "loss": 0.0019, "step": 50060 }, { "epoch": 46.7, "learning_rate": 4.416231343283582e-05, "loss": 0.0, "step": 50064 }, { "epoch": 46.71, "learning_rate": 4.4161847014925374e-05, "loss": 0.0, "step": 50068 }, { "epoch": 46.71, "learning_rate": 4.416138059701493e-05, "loss": 0.0003, "step": 50072 }, { "epoch": 46.71, "learning_rate": 4.416091417910448e-05, "loss": 0.0, "step": 50076 }, { "epoch": 46.72, "learning_rate": 4.416044776119403e-05, "loss": 0.0, "step": 50080 }, { "epoch": 46.72, "learning_rate": 4.415998134328359e-05, "loss": 0.0, "step": 50084 }, { "epoch": 46.72, "learning_rate": 4.4159514925373135e-05, "loss": 0.0026, "step": 50088 }, { "epoch": 46.73, "learning_rate": 4.415904850746269e-05, "loss": 0.0001, "step": 50092 }, { "epoch": 46.73, "learning_rate": 4.4158582089552245e-05, "loss": 0.0, "step": 50096 }, { "epoch": 46.73, "learning_rate": 4.415811567164179e-05, "loss": 0.0, "step": 50100 }, { "epoch": 46.74, "learning_rate": 4.415764925373135e-05, "loss": 0.0, "step": 50104 }, { "epoch": 46.74, "learning_rate": 4.4157182835820896e-05, "loss": 0.0, "step": 50108 }, { "epoch": 46.75, "learning_rate": 4.415671641791045e-05, "loss": 0.0, "step": 50112 }, { "epoch": 46.75, "learning_rate": 4.4156250000000006e-05, "loss": 0.0001, "step": 50116 }, { "epoch": 46.75, "learning_rate": 4.4155783582089554e-05, "loss": 0.0018, "step": 50120 }, { "epoch": 46.76, "learning_rate": 4.41553171641791e-05, "loss": 0.0001, "step": 50124 }, { "epoch": 46.76, "learning_rate": 4.4154850746268664e-05, "loss": 0.0, "step": 50128 }, { "epoch": 46.76, "learning_rate": 4.415438432835821e-05, "loss": 0.0, "step": 50132 }, { "epoch": 46.77, "learning_rate": 4.415391791044776e-05, "loss": 0.0001, "step": 50136 }, { "epoch": 46.77, "learning_rate": 4.4153451492537315e-05, "loss": 0.0014, "step": 50140 }, { "epoch": 46.78, "learning_rate": 4.415298507462687e-05, "loss": 0.0, "step": 50144 }, { "epoch": 46.78, "learning_rate": 4.415251865671642e-05, "loss": 0.0001, "step": 50148 }, { "epoch": 46.78, "learning_rate": 4.415205223880597e-05, "loss": 0.0, "step": 50152 }, { "epoch": 46.79, "learning_rate": 4.415158582089553e-05, "loss": 0.0056, "step": 50156 }, { "epoch": 46.79, "learning_rate": 4.4151119402985076e-05, "loss": 0.0, "step": 50160 }, { "epoch": 46.79, "learning_rate": 4.415065298507463e-05, "loss": 0.0, "step": 50164 }, { "epoch": 46.8, "learning_rate": 4.415018656716418e-05, "loss": 0.0, "step": 50168 }, { "epoch": 46.8, "learning_rate": 4.4149720149253734e-05, "loss": 0.0001, "step": 50172 }, { "epoch": 46.81, "learning_rate": 4.414925373134329e-05, "loss": 0.0, "step": 50176 }, { "epoch": 46.81, "learning_rate": 4.414878731343284e-05, "loss": 0.0001, "step": 50180 }, { "epoch": 46.81, "learning_rate": 4.4148320895522385e-05, "loss": 0.0, "step": 50184 }, { "epoch": 46.82, "learning_rate": 4.414785447761195e-05, "loss": 0.0001, "step": 50188 }, { "epoch": 46.82, "learning_rate": 4.4147388059701495e-05, "loss": 0.0001, "step": 50192 }, { "epoch": 46.82, "learning_rate": 4.414692164179104e-05, "loss": 0.0, "step": 50196 }, { "epoch": 46.83, "learning_rate": 4.41464552238806e-05, "loss": 0.0, "step": 50200 }, { "epoch": 46.83, "learning_rate": 4.414598880597015e-05, "loss": 0.0, "step": 50204 }, { "epoch": 46.84, "learning_rate": 4.41455223880597e-05, "loss": 0.0, "step": 50208 }, { "epoch": 46.84, "learning_rate": 4.4145055970149256e-05, "loss": 0.0001, "step": 50212 }, { "epoch": 46.84, "learning_rate": 4.4144589552238804e-05, "loss": 0.0, "step": 50216 }, { "epoch": 46.85, "learning_rate": 4.414412313432836e-05, "loss": 0.0, "step": 50220 }, { "epoch": 46.85, "learning_rate": 4.4143656716417914e-05, "loss": 0.0001, "step": 50224 }, { "epoch": 46.85, "learning_rate": 4.414319029850746e-05, "loss": 0.0002, "step": 50228 }, { "epoch": 46.86, "learning_rate": 4.414272388059702e-05, "loss": 0.0, "step": 50232 }, { "epoch": 46.86, "learning_rate": 4.414225746268657e-05, "loss": 0.0001, "step": 50236 }, { "epoch": 46.87, "learning_rate": 4.414179104477612e-05, "loss": 0.0, "step": 50240 }, { "epoch": 46.87, "learning_rate": 4.414132462686567e-05, "loss": 0.0003, "step": 50244 }, { "epoch": 46.87, "learning_rate": 4.414085820895523e-05, "loss": 0.0, "step": 50248 }, { "epoch": 46.88, "learning_rate": 4.414039179104478e-05, "loss": 0.0001, "step": 50252 }, { "epoch": 46.88, "learning_rate": 4.413992537313433e-05, "loss": 0.0, "step": 50256 }, { "epoch": 46.88, "learning_rate": 4.413945895522388e-05, "loss": 0.0, "step": 50260 }, { "epoch": 46.89, "learning_rate": 4.4138992537313436e-05, "loss": 0.0, "step": 50264 }, { "epoch": 46.89, "learning_rate": 4.413852611940299e-05, "loss": 0.0001, "step": 50268 }, { "epoch": 46.9, "learning_rate": 4.413805970149254e-05, "loss": 0.0019, "step": 50272 }, { "epoch": 46.9, "learning_rate": 4.413759328358209e-05, "loss": 0.005, "step": 50276 }, { "epoch": 46.9, "learning_rate": 4.413712686567165e-05, "loss": 0.0, "step": 50280 }, { "epoch": 46.91, "learning_rate": 4.41366604477612e-05, "loss": 0.0008, "step": 50284 }, { "epoch": 46.91, "learning_rate": 4.4136194029850745e-05, "loss": 0.0, "step": 50288 }, { "epoch": 46.91, "learning_rate": 4.41357276119403e-05, "loss": 0.0001, "step": 50292 }, { "epoch": 46.92, "learning_rate": 4.4135261194029855e-05, "loss": 0.0001, "step": 50296 }, { "epoch": 46.92, "learning_rate": 4.41347947761194e-05, "loss": 0.0002, "step": 50300 }, { "epoch": 46.93, "learning_rate": 4.413432835820896e-05, "loss": 0.0, "step": 50304 }, { "epoch": 46.93, "learning_rate": 4.413386194029851e-05, "loss": 0.0022, "step": 50308 }, { "epoch": 46.93, "learning_rate": 4.413339552238806e-05, "loss": 0.0002, "step": 50312 }, { "epoch": 46.94, "learning_rate": 4.4132929104477616e-05, "loss": 0.0001, "step": 50316 }, { "epoch": 46.94, "learning_rate": 4.4132462686567164e-05, "loss": 0.009, "step": 50320 }, { "epoch": 46.94, "learning_rate": 4.413199626865672e-05, "loss": 0.0, "step": 50324 }, { "epoch": 46.95, "learning_rate": 4.4131529850746274e-05, "loss": 0.0002, "step": 50328 }, { "epoch": 46.95, "learning_rate": 4.413106343283582e-05, "loss": 0.0, "step": 50332 }, { "epoch": 46.96, "learning_rate": 4.413059701492537e-05, "loss": 0.0001, "step": 50336 }, { "epoch": 46.96, "learning_rate": 4.413013059701493e-05, "loss": 0.0023, "step": 50340 }, { "epoch": 46.96, "learning_rate": 4.412966417910448e-05, "loss": 0.0002, "step": 50344 }, { "epoch": 46.97, "learning_rate": 4.412919776119403e-05, "loss": 0.003, "step": 50348 }, { "epoch": 46.97, "learning_rate": 4.412873134328358e-05, "loss": 0.0, "step": 50352 }, { "epoch": 46.97, "learning_rate": 4.412826492537314e-05, "loss": 0.0, "step": 50356 }, { "epoch": 46.98, "learning_rate": 4.4127798507462686e-05, "loss": 0.0, "step": 50360 }, { "epoch": 46.98, "learning_rate": 4.412733208955224e-05, "loss": 0.0025, "step": 50364 }, { "epoch": 46.98, "learning_rate": 4.4126865671641796e-05, "loss": 0.0001, "step": 50368 }, { "epoch": 46.99, "learning_rate": 4.4126399253731344e-05, "loss": 0.0, "step": 50372 }, { "epoch": 46.99, "learning_rate": 4.41259328358209e-05, "loss": 0.0001, "step": 50376 }, { "epoch": 47.0, "learning_rate": 4.412546641791045e-05, "loss": 0.0002, "step": 50380 }, { "epoch": 47.0, "learning_rate": 4.4125e-05, "loss": 0.0, "step": 50384 }, { "epoch": 47.0, "learning_rate": 4.412453358208956e-05, "loss": 0.0026, "step": 50388 }, { "epoch": 47.01, "learning_rate": 4.4124067164179105e-05, "loss": 0.0, "step": 50392 }, { "epoch": 47.01, "learning_rate": 4.412360074626865e-05, "loss": 0.0049, "step": 50396 }, { "epoch": 47.01, "learning_rate": 4.4123134328358215e-05, "loss": 0.0, "step": 50400 }, { "epoch": 47.02, "learning_rate": 4.412266791044776e-05, "loss": 0.0, "step": 50404 }, { "epoch": 47.02, "learning_rate": 4.412220149253731e-05, "loss": 0.0013, "step": 50408 }, { "epoch": 47.03, "learning_rate": 4.4121735074626866e-05, "loss": 0.0011, "step": 50412 }, { "epoch": 47.03, "learning_rate": 4.412126865671642e-05, "loss": 0.0001, "step": 50416 }, { "epoch": 47.03, "learning_rate": 4.4120802238805976e-05, "loss": 0.0005, "step": 50420 }, { "epoch": 47.04, "learning_rate": 4.4120335820895524e-05, "loss": 0.0, "step": 50424 }, { "epoch": 47.04, "learning_rate": 4.411986940298508e-05, "loss": 0.0, "step": 50428 }, { "epoch": 47.04, "learning_rate": 4.4119402985074634e-05, "loss": 0.0, "step": 50432 }, { "epoch": 47.05, "learning_rate": 4.411893656716418e-05, "loss": 0.0, "step": 50436 }, { "epoch": 47.05, "learning_rate": 4.411847014925373e-05, "loss": 0.0, "step": 50440 }, { "epoch": 47.06, "learning_rate": 4.4118003731343285e-05, "loss": 0.0, "step": 50444 }, { "epoch": 47.06, "learning_rate": 4.411753731343284e-05, "loss": 0.0002, "step": 50448 }, { "epoch": 47.06, "learning_rate": 4.411707089552239e-05, "loss": 0.0, "step": 50452 }, { "epoch": 47.07, "learning_rate": 4.411660447761194e-05, "loss": 0.0, "step": 50456 }, { "epoch": 47.07, "learning_rate": 4.41161380597015e-05, "loss": 0.0, "step": 50460 }, { "epoch": 47.07, "learning_rate": 4.4115671641791046e-05, "loss": 0.0, "step": 50464 }, { "epoch": 47.08, "learning_rate": 4.41152052238806e-05, "loss": 0.0001, "step": 50468 }, { "epoch": 47.08, "learning_rate": 4.411473880597015e-05, "loss": 0.0002, "step": 50472 }, { "epoch": 47.09, "learning_rate": 4.4114272388059704e-05, "loss": 0.0018, "step": 50476 }, { "epoch": 47.09, "learning_rate": 4.411380597014926e-05, "loss": 0.0001, "step": 50480 }, { "epoch": 47.09, "learning_rate": 4.411333955223881e-05, "loss": 0.0001, "step": 50484 }, { "epoch": 47.1, "learning_rate": 4.411287313432836e-05, "loss": 0.0, "step": 50488 }, { "epoch": 47.1, "learning_rate": 4.411240671641792e-05, "loss": 0.0, "step": 50492 }, { "epoch": 47.1, "learning_rate": 4.4111940298507465e-05, "loss": 0.0, "step": 50496 }, { "epoch": 47.11, "learning_rate": 4.411147388059701e-05, "loss": 0.0001, "step": 50500 }, { "epoch": 47.11, "eval_exact_match": 0.7321083172147002, "eval_exec": 0.7649903288201161, "eval_loss": 0.4581345319747925, "eval_runtime": 1124.8565, "eval_samples_per_second": 0.919, "step": 50500 }, { "epoch": 47.11, "learning_rate": 4.411100746268657e-05, "loss": 0.0001, "step": 50504 }, { "epoch": 47.12, "learning_rate": 4.411054104477612e-05, "loss": 0.0, "step": 50508 }, { "epoch": 47.12, "learning_rate": 4.411007462686567e-05, "loss": 0.0, "step": 50512 }, { "epoch": 47.12, "learning_rate": 4.4109608208955226e-05, "loss": 0.0, "step": 50516 }, { "epoch": 47.13, "learning_rate": 4.410914179104478e-05, "loss": 0.0001, "step": 50520 }, { "epoch": 47.13, "learning_rate": 4.410867537313433e-05, "loss": 0.0001, "step": 50524 }, { "epoch": 47.13, "learning_rate": 4.4108208955223884e-05, "loss": 0.0002, "step": 50528 }, { "epoch": 47.14, "learning_rate": 4.410774253731343e-05, "loss": 0.0001, "step": 50532 }, { "epoch": 47.14, "learning_rate": 4.410727611940299e-05, "loss": 0.0, "step": 50536 }, { "epoch": 47.15, "learning_rate": 4.410680970149254e-05, "loss": 0.0006, "step": 50540 }, { "epoch": 47.15, "learning_rate": 4.410634328358209e-05, "loss": 0.0, "step": 50544 }, { "epoch": 47.15, "learning_rate": 4.4105876865671645e-05, "loss": 0.0, "step": 50548 }, { "epoch": 47.16, "learning_rate": 4.41054104477612e-05, "loss": 0.0001, "step": 50552 }, { "epoch": 47.16, "learning_rate": 4.410494402985075e-05, "loss": 0.0, "step": 50556 }, { "epoch": 47.16, "learning_rate": 4.4104477611940296e-05, "loss": 0.0, "step": 50560 }, { "epoch": 47.17, "learning_rate": 4.410401119402985e-05, "loss": 0.0001, "step": 50564 }, { "epoch": 47.17, "learning_rate": 4.4103544776119406e-05, "loss": 0.0, "step": 50568 }, { "epoch": 47.18, "learning_rate": 4.4103078358208954e-05, "loss": 0.0, "step": 50572 }, { "epoch": 47.18, "learning_rate": 4.410261194029851e-05, "loss": 0.0001, "step": 50576 }, { "epoch": 47.18, "learning_rate": 4.4102145522388064e-05, "loss": 0.0, "step": 50580 }, { "epoch": 47.19, "learning_rate": 4.410167910447762e-05, "loss": 0.0, "step": 50584 }, { "epoch": 47.19, "learning_rate": 4.410121268656717e-05, "loss": 0.0001, "step": 50588 }, { "epoch": 47.19, "learning_rate": 4.4100746268656715e-05, "loss": 0.0, "step": 50592 }, { "epoch": 47.2, "learning_rate": 4.4100279850746276e-05, "loss": 0.0, "step": 50596 }, { "epoch": 47.2, "learning_rate": 4.4099813432835825e-05, "loss": 0.0, "step": 50600 }, { "epoch": 47.21, "learning_rate": 4.409934701492537e-05, "loss": 0.0008, "step": 50604 }, { "epoch": 47.21, "learning_rate": 4.409888059701493e-05, "loss": 0.0, "step": 50608 }, { "epoch": 47.21, "learning_rate": 4.409841417910448e-05, "loss": 0.0, "step": 50612 }, { "epoch": 47.22, "learning_rate": 4.409794776119403e-05, "loss": 0.0, "step": 50616 }, { "epoch": 47.22, "learning_rate": 4.4097481343283586e-05, "loss": 0.0, "step": 50620 }, { "epoch": 47.22, "learning_rate": 4.4097014925373134e-05, "loss": 0.0, "step": 50624 }, { "epoch": 47.23, "learning_rate": 4.409654850746269e-05, "loss": 0.0001, "step": 50628 }, { "epoch": 47.23, "learning_rate": 4.4096082089552244e-05, "loss": 0.0011, "step": 50632 }, { "epoch": 47.24, "learning_rate": 4.409561567164179e-05, "loss": 0.0024, "step": 50636 }, { "epoch": 47.24, "learning_rate": 4.4095149253731347e-05, "loss": 0.0, "step": 50640 }, { "epoch": 47.24, "learning_rate": 4.40946828358209e-05, "loss": 0.0002, "step": 50644 }, { "epoch": 47.25, "learning_rate": 4.409421641791045e-05, "loss": 0.0, "step": 50648 }, { "epoch": 47.25, "learning_rate": 4.409375e-05, "loss": 0.0, "step": 50652 }, { "epoch": 47.25, "learning_rate": 4.409328358208956e-05, "loss": 0.0, "step": 50656 }, { "epoch": 47.26, "learning_rate": 4.409281716417911e-05, "loss": 0.0, "step": 50660 }, { "epoch": 47.26, "learning_rate": 4.4092350746268656e-05, "loss": 0.0, "step": 50664 }, { "epoch": 47.26, "learning_rate": 4.409188432835821e-05, "loss": 0.0, "step": 50668 }, { "epoch": 47.27, "learning_rate": 4.4091417910447766e-05, "loss": 0.0, "step": 50672 }, { "epoch": 47.27, "learning_rate": 4.4090951492537314e-05, "loss": 0.0, "step": 50676 }, { "epoch": 47.28, "learning_rate": 4.409048507462687e-05, "loss": 0.0001, "step": 50680 }, { "epoch": 47.28, "learning_rate": 4.409001865671642e-05, "loss": 0.0002, "step": 50684 }, { "epoch": 47.28, "learning_rate": 4.408955223880597e-05, "loss": 0.0001, "step": 50688 }, { "epoch": 47.29, "learning_rate": 4.4089085820895526e-05, "loss": 0.0, "step": 50692 }, { "epoch": 47.29, "learning_rate": 4.4088619402985075e-05, "loss": 0.0003, "step": 50696 }, { "epoch": 47.29, "learning_rate": 4.408815298507463e-05, "loss": 0.0, "step": 50700 }, { "epoch": 47.3, "learning_rate": 4.4087686567164184e-05, "loss": 0.0, "step": 50704 }, { "epoch": 47.3, "learning_rate": 4.408722014925373e-05, "loss": 0.0, "step": 50708 }, { "epoch": 47.31, "learning_rate": 4.408675373134328e-05, "loss": 0.0, "step": 50712 }, { "epoch": 47.31, "learning_rate": 4.408628731343284e-05, "loss": 0.0003, "step": 50716 }, { "epoch": 47.31, "learning_rate": 4.408582089552239e-05, "loss": 0.0, "step": 50720 }, { "epoch": 47.32, "learning_rate": 4.408535447761194e-05, "loss": 0.0, "step": 50724 }, { "epoch": 47.32, "learning_rate": 4.4084888059701494e-05, "loss": 0.0, "step": 50728 }, { "epoch": 47.32, "learning_rate": 4.408442164179105e-05, "loss": 0.0, "step": 50732 }, { "epoch": 47.33, "learning_rate": 4.4083955223880597e-05, "loss": 0.0, "step": 50736 }, { "epoch": 47.33, "learning_rate": 4.408348880597015e-05, "loss": 0.0, "step": 50740 }, { "epoch": 47.34, "learning_rate": 4.40830223880597e-05, "loss": 0.0, "step": 50744 }, { "epoch": 47.34, "learning_rate": 4.408255597014926e-05, "loss": 0.0, "step": 50748 }, { "epoch": 47.34, "learning_rate": 4.408208955223881e-05, "loss": 0.0001, "step": 50752 }, { "epoch": 47.35, "learning_rate": 4.408162313432836e-05, "loss": 0.0, "step": 50756 }, { "epoch": 47.35, "learning_rate": 4.408115671641791e-05, "loss": 0.0, "step": 50760 }, { "epoch": 47.35, "learning_rate": 4.408069029850747e-05, "loss": 0.0, "step": 50764 }, { "epoch": 47.36, "learning_rate": 4.4080223880597016e-05, "loss": 0.0001, "step": 50768 }, { "epoch": 47.36, "learning_rate": 4.407975746268657e-05, "loss": 0.0, "step": 50772 }, { "epoch": 47.37, "learning_rate": 4.4079291044776125e-05, "loss": 0.0, "step": 50776 }, { "epoch": 47.37, "learning_rate": 4.4078824626865673e-05, "loss": 0.0, "step": 50780 }, { "epoch": 47.37, "learning_rate": 4.407835820895523e-05, "loss": 0.0004, "step": 50784 }, { "epoch": 47.38, "learning_rate": 4.4077891791044777e-05, "loss": 0.0, "step": 50788 }, { "epoch": 47.38, "learning_rate": 4.407742537313433e-05, "loss": 0.0, "step": 50792 }, { "epoch": 47.38, "learning_rate": 4.4076958955223886e-05, "loss": 0.0, "step": 50796 }, { "epoch": 47.39, "learning_rate": 4.4076492537313434e-05, "loss": 0.0, "step": 50800 }, { "epoch": 47.39, "learning_rate": 4.407602611940298e-05, "loss": 0.0002, "step": 50804 }, { "epoch": 47.4, "learning_rate": 4.4075559701492544e-05, "loss": 0.0, "step": 50808 }, { "epoch": 47.4, "learning_rate": 4.407509328358209e-05, "loss": 0.0012, "step": 50812 }, { "epoch": 47.4, "learning_rate": 4.407462686567164e-05, "loss": 0.0003, "step": 50816 }, { "epoch": 47.41, "learning_rate": 4.4074160447761195e-05, "loss": 0.0, "step": 50820 }, { "epoch": 47.41, "learning_rate": 4.407369402985075e-05, "loss": 0.0004, "step": 50824 }, { "epoch": 47.41, "learning_rate": 4.40732276119403e-05, "loss": 0.0024, "step": 50828 }, { "epoch": 47.42, "learning_rate": 4.4072761194029853e-05, "loss": 0.0017, "step": 50832 }, { "epoch": 47.42, "learning_rate": 4.407229477611941e-05, "loss": 0.0005, "step": 50836 }, { "epoch": 47.43, "learning_rate": 4.4071828358208956e-05, "loss": 0.0, "step": 50840 }, { "epoch": 47.43, "learning_rate": 4.407136194029851e-05, "loss": 0.0, "step": 50844 }, { "epoch": 47.43, "learning_rate": 4.407089552238806e-05, "loss": 0.0, "step": 50848 }, { "epoch": 47.44, "learning_rate": 4.4070429104477614e-05, "loss": 0.0003, "step": 50852 }, { "epoch": 47.44, "learning_rate": 4.406996268656717e-05, "loss": 0.0, "step": 50856 }, { "epoch": 47.44, "learning_rate": 4.406949626865672e-05, "loss": 0.0, "step": 50860 }, { "epoch": 47.45, "learning_rate": 4.4069029850746266e-05, "loss": 0.0001, "step": 50864 }, { "epoch": 47.45, "learning_rate": 4.406856343283583e-05, "loss": 0.0, "step": 50868 }, { "epoch": 47.46, "learning_rate": 4.4068097014925375e-05, "loss": 0.0009, "step": 50872 }, { "epoch": 47.46, "learning_rate": 4.4067630597014923e-05, "loss": 0.0001, "step": 50876 }, { "epoch": 47.46, "learning_rate": 4.406716417910448e-05, "loss": 0.0, "step": 50880 }, { "epoch": 47.47, "learning_rate": 4.406669776119403e-05, "loss": 0.0003, "step": 50884 }, { "epoch": 47.47, "learning_rate": 4.406623134328358e-05, "loss": 0.0002, "step": 50888 }, { "epoch": 47.47, "learning_rate": 4.4065764925373136e-05, "loss": 0.0, "step": 50892 }, { "epoch": 47.48, "learning_rate": 4.4065298507462684e-05, "loss": 0.0001, "step": 50896 }, { "epoch": 47.48, "learning_rate": 4.406483208955224e-05, "loss": 0.0, "step": 50900 }, { "epoch": 47.49, "learning_rate": 4.4064365671641794e-05, "loss": 0.0, "step": 50904 }, { "epoch": 47.49, "learning_rate": 4.406389925373134e-05, "loss": 0.0015, "step": 50908 }, { "epoch": 47.49, "learning_rate": 4.40634328358209e-05, "loss": 0.0, "step": 50912 }, { "epoch": 47.5, "learning_rate": 4.406296641791045e-05, "loss": 0.0011, "step": 50916 }, { "epoch": 47.5, "learning_rate": 4.40625e-05, "loss": 0.0001, "step": 50920 }, { "epoch": 47.5, "learning_rate": 4.4062033582089555e-05, "loss": 0.0, "step": 50924 }, { "epoch": 47.51, "learning_rate": 4.406156716417911e-05, "loss": 0.0006, "step": 50928 }, { "epoch": 47.51, "learning_rate": 4.406110074626866e-05, "loss": 0.0, "step": 50932 }, { "epoch": 47.51, "learning_rate": 4.406063432835821e-05, "loss": 0.0, "step": 50936 }, { "epoch": 47.52, "learning_rate": 4.406016791044776e-05, "loss": 0.0003, "step": 50940 }, { "epoch": 47.52, "learning_rate": 4.4059701492537316e-05, "loss": 0.0, "step": 50944 }, { "epoch": 47.53, "learning_rate": 4.405923507462687e-05, "loss": 0.0001, "step": 50948 }, { "epoch": 47.53, "learning_rate": 4.405876865671642e-05, "loss": 0.0, "step": 50952 }, { "epoch": 47.53, "learning_rate": 4.405830223880597e-05, "loss": 0.0001, "step": 50956 }, { "epoch": 47.54, "learning_rate": 4.405783582089553e-05, "loss": 0.0, "step": 50960 }, { "epoch": 47.54, "learning_rate": 4.405736940298508e-05, "loss": 0.0, "step": 50964 }, { "epoch": 47.54, "learning_rate": 4.4056902985074625e-05, "loss": 0.0, "step": 50968 }, { "epoch": 47.55, "learning_rate": 4.405643656716418e-05, "loss": 0.0001, "step": 50972 }, { "epoch": 47.55, "learning_rate": 4.4055970149253735e-05, "loss": 0.0, "step": 50976 }, { "epoch": 47.56, "learning_rate": 4.405550373134328e-05, "loss": 0.0016, "step": 50980 }, { "epoch": 47.56, "learning_rate": 4.405503731343284e-05, "loss": 0.0, "step": 50984 }, { "epoch": 47.56, "learning_rate": 4.405457089552239e-05, "loss": 0.0, "step": 50988 }, { "epoch": 47.57, "learning_rate": 4.405410447761194e-05, "loss": 0.0022, "step": 50992 }, { "epoch": 47.57, "learning_rate": 4.4053638059701496e-05, "loss": 0.0, "step": 50996 }, { "epoch": 47.57, "learning_rate": 4.4053171641791044e-05, "loss": 0.0, "step": 51000 }, { "epoch": 47.57, "eval_exact_match": 0.7369439071566731, "eval_exec": 0.7649903288201161, "eval_loss": 0.47057628631591797, "eval_runtime": 1132.9681, "eval_samples_per_second": 0.913, "step": 51000 }, { "epoch": 47.58, "learning_rate": 4.40527052238806e-05, "loss": 0.0, "step": 51004 }, { "epoch": 47.58, "learning_rate": 4.4052238805970154e-05, "loss": 0.0002, "step": 51008 }, { "epoch": 47.59, "learning_rate": 4.40517723880597e-05, "loss": 0.0029, "step": 51012 }, { "epoch": 47.59, "learning_rate": 4.405130597014925e-05, "loss": 0.0, "step": 51016 }, { "epoch": 47.59, "learning_rate": 4.405083955223881e-05, "loss": 0.0055, "step": 51020 }, { "epoch": 47.6, "learning_rate": 4.405037313432836e-05, "loss": 0.0, "step": 51024 }, { "epoch": 47.6, "learning_rate": 4.404990671641791e-05, "loss": 0.0, "step": 51028 }, { "epoch": 47.6, "learning_rate": 4.404944029850746e-05, "loss": 0.0, "step": 51032 }, { "epoch": 47.61, "learning_rate": 4.404897388059702e-05, "loss": 0.0, "step": 51036 }, { "epoch": 47.61, "learning_rate": 4.4048507462686566e-05, "loss": 0.0, "step": 51040 }, { "epoch": 47.62, "learning_rate": 4.404804104477612e-05, "loss": 0.0, "step": 51044 }, { "epoch": 47.62, "learning_rate": 4.4047574626865676e-05, "loss": 0.0007, "step": 51048 }, { "epoch": 47.62, "learning_rate": 4.4047108208955224e-05, "loss": 0.0, "step": 51052 }, { "epoch": 47.63, "learning_rate": 4.404664179104478e-05, "loss": 0.0, "step": 51056 }, { "epoch": 47.63, "learning_rate": 4.404617537313433e-05, "loss": 0.0001, "step": 51060 }, { "epoch": 47.63, "learning_rate": 4.404570895522388e-05, "loss": 0.0011, "step": 51064 }, { "epoch": 47.64, "learning_rate": 4.404524253731344e-05, "loss": 0.0, "step": 51068 }, { "epoch": 47.64, "learning_rate": 4.4044776119402985e-05, "loss": 0.0027, "step": 51072 }, { "epoch": 47.65, "learning_rate": 4.404430970149254e-05, "loss": 0.0003, "step": 51076 }, { "epoch": 47.65, "learning_rate": 4.4043843283582095e-05, "loss": 0.0, "step": 51080 }, { "epoch": 47.65, "learning_rate": 4.404337686567164e-05, "loss": 0.0002, "step": 51084 }, { "epoch": 47.66, "learning_rate": 4.40429104477612e-05, "loss": 0.0011, "step": 51088 }, { "epoch": 47.66, "learning_rate": 4.4042444029850746e-05, "loss": 0.0, "step": 51092 }, { "epoch": 47.66, "learning_rate": 4.40419776119403e-05, "loss": 0.0048, "step": 51096 }, { "epoch": 47.67, "learning_rate": 4.4041511194029856e-05, "loss": 0.0, "step": 51100 }, { "epoch": 47.67, "learning_rate": 4.4041044776119404e-05, "loss": 0.0, "step": 51104 }, { "epoch": 47.68, "learning_rate": 4.404057835820896e-05, "loss": 0.0, "step": 51108 }, { "epoch": 47.68, "learning_rate": 4.4040111940298514e-05, "loss": 0.0, "step": 51112 }, { "epoch": 47.68, "learning_rate": 4.403964552238806e-05, "loss": 0.0026, "step": 51116 }, { "epoch": 47.69, "learning_rate": 4.403917910447761e-05, "loss": 0.0002, "step": 51120 }, { "epoch": 47.69, "learning_rate": 4.4038712686567165e-05, "loss": 0.0, "step": 51124 }, { "epoch": 47.69, "learning_rate": 4.403824626865672e-05, "loss": 0.0021, "step": 51128 }, { "epoch": 47.7, "learning_rate": 4.403777985074627e-05, "loss": 0.0008, "step": 51132 }, { "epoch": 47.7, "learning_rate": 4.403731343283582e-05, "loss": 0.0, "step": 51136 }, { "epoch": 47.71, "learning_rate": 4.403684701492538e-05, "loss": 0.0001, "step": 51140 }, { "epoch": 47.71, "learning_rate": 4.4036380597014926e-05, "loss": 0.0002, "step": 51144 }, { "epoch": 47.71, "learning_rate": 4.403591417910448e-05, "loss": 0.0, "step": 51148 }, { "epoch": 47.72, "learning_rate": 4.403544776119403e-05, "loss": 0.0, "step": 51152 }, { "epoch": 47.72, "learning_rate": 4.4034981343283584e-05, "loss": 0.0, "step": 51156 }, { "epoch": 47.72, "learning_rate": 4.403451492537314e-05, "loss": 0.0003, "step": 51160 }, { "epoch": 47.73, "learning_rate": 4.403404850746269e-05, "loss": 0.0004, "step": 51164 }, { "epoch": 47.73, "learning_rate": 4.403358208955224e-05, "loss": 0.0, "step": 51168 }, { "epoch": 47.73, "learning_rate": 4.40331156716418e-05, "loss": 0.0, "step": 51172 }, { "epoch": 47.74, "learning_rate": 4.4032649253731345e-05, "loss": 0.0021, "step": 51176 }, { "epoch": 47.74, "learning_rate": 4.403218283582089e-05, "loss": 0.0, "step": 51180 }, { "epoch": 47.75, "learning_rate": 4.403171641791045e-05, "loss": 0.0, "step": 51184 }, { "epoch": 47.75, "learning_rate": 4.403125e-05, "loss": 0.0001, "step": 51188 }, { "epoch": 47.75, "learning_rate": 4.403078358208955e-05, "loss": 0.0001, "step": 51192 }, { "epoch": 47.76, "learning_rate": 4.4030317164179106e-05, "loss": 0.0041, "step": 51196 }, { "epoch": 47.76, "learning_rate": 4.402985074626866e-05, "loss": 0.0, "step": 51200 }, { "epoch": 47.76, "learning_rate": 4.402938432835821e-05, "loss": 0.0, "step": 51204 }, { "epoch": 47.77, "learning_rate": 4.4028917910447764e-05, "loss": 0.0, "step": 51208 }, { "epoch": 47.77, "learning_rate": 4.402845149253731e-05, "loss": 0.0, "step": 51212 }, { "epoch": 47.78, "learning_rate": 4.402798507462687e-05, "loss": 0.0, "step": 51216 }, { "epoch": 47.78, "learning_rate": 4.402751865671642e-05, "loss": 0.0018, "step": 51220 }, { "epoch": 47.78, "learning_rate": 4.402705223880597e-05, "loss": 0.0, "step": 51224 }, { "epoch": 47.79, "learning_rate": 4.4026585820895525e-05, "loss": 0.0, "step": 51228 }, { "epoch": 47.79, "learning_rate": 4.402611940298508e-05, "loss": 0.0001, "step": 51232 }, { "epoch": 47.79, "learning_rate": 4.402565298507463e-05, "loss": 0.0, "step": 51236 }, { "epoch": 47.8, "learning_rate": 4.402518656716418e-05, "loss": 0.0001, "step": 51240 }, { "epoch": 47.8, "learning_rate": 4.402472014925373e-05, "loss": 0.0001, "step": 51244 }, { "epoch": 47.81, "learning_rate": 4.4024253731343286e-05, "loss": 0.0, "step": 51248 }, { "epoch": 47.81, "learning_rate": 4.402378731343284e-05, "loss": 0.0005, "step": 51252 }, { "epoch": 47.81, "learning_rate": 4.402332089552239e-05, "loss": 0.0004, "step": 51256 }, { "epoch": 47.82, "learning_rate": 4.4022854477611944e-05, "loss": 0.0, "step": 51260 }, { "epoch": 47.82, "learning_rate": 4.40223880597015e-05, "loss": 0.0, "step": 51264 }, { "epoch": 47.82, "learning_rate": 4.402192164179105e-05, "loss": 0.0, "step": 51268 }, { "epoch": 47.83, "learning_rate": 4.4021455223880595e-05, "loss": 0.0, "step": 51272 }, { "epoch": 47.83, "learning_rate": 4.402098880597016e-05, "loss": 0.0008, "step": 51276 }, { "epoch": 47.84, "learning_rate": 4.4020522388059705e-05, "loss": 0.0001, "step": 51280 }, { "epoch": 47.84, "learning_rate": 4.402005597014925e-05, "loss": 0.0001, "step": 51284 }, { "epoch": 47.84, "learning_rate": 4.401958955223881e-05, "loss": 0.0, "step": 51288 }, { "epoch": 47.85, "learning_rate": 4.401912313432836e-05, "loss": 0.0, "step": 51292 }, { "epoch": 47.85, "learning_rate": 4.401865671641791e-05, "loss": 0.0, "step": 51296 }, { "epoch": 47.85, "learning_rate": 4.4018190298507466e-05, "loss": 0.0001, "step": 51300 }, { "epoch": 47.86, "learning_rate": 4.4017723880597014e-05, "loss": 0.0, "step": 51304 }, { "epoch": 47.86, "learning_rate": 4.401725746268657e-05, "loss": 0.0001, "step": 51308 }, { "epoch": 47.87, "learning_rate": 4.4016791044776124e-05, "loss": 0.0, "step": 51312 }, { "epoch": 47.87, "learning_rate": 4.401632462686567e-05, "loss": 0.0, "step": 51316 }, { "epoch": 47.87, "learning_rate": 4.401585820895523e-05, "loss": 0.0, "step": 51320 }, { "epoch": 47.88, "learning_rate": 4.401539179104478e-05, "loss": 0.0, "step": 51324 }, { "epoch": 47.88, "learning_rate": 4.401492537313433e-05, "loss": 0.0, "step": 51328 }, { "epoch": 47.88, "learning_rate": 4.401445895522388e-05, "loss": 0.0, "step": 51332 }, { "epoch": 47.89, "learning_rate": 4.401399253731344e-05, "loss": 0.0, "step": 51336 }, { "epoch": 47.89, "learning_rate": 4.401352611940299e-05, "loss": 0.0, "step": 51340 }, { "epoch": 47.9, "learning_rate": 4.4013059701492536e-05, "loss": 0.0001, "step": 51344 }, { "epoch": 47.9, "learning_rate": 4.401259328358209e-05, "loss": 0.0, "step": 51348 }, { "epoch": 47.9, "learning_rate": 4.4012126865671646e-05, "loss": 0.0, "step": 51352 }, { "epoch": 47.91, "learning_rate": 4.4011660447761194e-05, "loss": 0.0048, "step": 51356 }, { "epoch": 47.91, "learning_rate": 4.401119402985075e-05, "loss": 0.0, "step": 51360 }, { "epoch": 47.91, "learning_rate": 4.40107276119403e-05, "loss": 0.0, "step": 51364 }, { "epoch": 47.92, "learning_rate": 4.401026119402985e-05, "loss": 0.0, "step": 51368 }, { "epoch": 47.92, "learning_rate": 4.400979477611941e-05, "loss": 0.0014, "step": 51372 }, { "epoch": 47.93, "learning_rate": 4.4009328358208955e-05, "loss": 0.0012, "step": 51376 }, { "epoch": 47.93, "learning_rate": 4.400886194029851e-05, "loss": 0.0, "step": 51380 }, { "epoch": 47.93, "learning_rate": 4.4008395522388065e-05, "loss": 0.0, "step": 51384 }, { "epoch": 47.94, "learning_rate": 4.400792910447761e-05, "loss": 0.0, "step": 51388 }, { "epoch": 47.94, "learning_rate": 4.400746268656716e-05, "loss": 0.0, "step": 51392 }, { "epoch": 47.94, "learning_rate": 4.400699626865672e-05, "loss": 0.0, "step": 51396 }, { "epoch": 47.95, "learning_rate": 4.400652985074627e-05, "loss": 0.0002, "step": 51400 }, { "epoch": 47.95, "learning_rate": 4.4006063432835826e-05, "loss": 0.0001, "step": 51404 }, { "epoch": 47.96, "learning_rate": 4.4005597014925374e-05, "loss": 0.0003, "step": 51408 }, { "epoch": 47.96, "learning_rate": 4.400513059701493e-05, "loss": 0.0002, "step": 51412 }, { "epoch": 47.96, "learning_rate": 4.4004664179104484e-05, "loss": 0.0001, "step": 51416 }, { "epoch": 47.97, "learning_rate": 4.400419776119403e-05, "loss": 0.0, "step": 51420 }, { "epoch": 47.97, "learning_rate": 4.400373134328358e-05, "loss": 0.0, "step": 51424 }, { "epoch": 47.97, "learning_rate": 4.400326492537314e-05, "loss": 0.0, "step": 51428 }, { "epoch": 47.98, "learning_rate": 4.400279850746269e-05, "loss": 0.0, "step": 51432 }, { "epoch": 47.98, "learning_rate": 4.400233208955224e-05, "loss": 0.0, "step": 51436 }, { "epoch": 47.98, "learning_rate": 4.400186567164179e-05, "loss": 0.0, "step": 51440 }, { "epoch": 47.99, "learning_rate": 4.400139925373135e-05, "loss": 0.0, "step": 51444 }, { "epoch": 47.99, "learning_rate": 4.4000932835820896e-05, "loss": 0.0001, "step": 51448 }, { "epoch": 48.0, "learning_rate": 4.400046641791045e-05, "loss": 0.0008, "step": 51452 }, { "epoch": 48.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.0, "step": 51456 }, { "epoch": 48.0, "learning_rate": 4.3999533582089554e-05, "loss": 0.0001, "step": 51460 }, { "epoch": 48.01, "learning_rate": 4.399906716417911e-05, "loss": 0.0, "step": 51464 }, { "epoch": 48.01, "learning_rate": 4.399860074626866e-05, "loss": 0.0, "step": 51468 }, { "epoch": 48.01, "learning_rate": 4.399813432835821e-05, "loss": 0.0, "step": 51472 }, { "epoch": 48.02, "learning_rate": 4.399766791044777e-05, "loss": 0.0, "step": 51476 }, { "epoch": 48.02, "learning_rate": 4.3997201492537315e-05, "loss": 0.0, "step": 51480 }, { "epoch": 48.03, "learning_rate": 4.399673507462686e-05, "loss": 0.0005, "step": 51484 }, { "epoch": 48.03, "learning_rate": 4.3996268656716425e-05, "loss": 0.0012, "step": 51488 }, { "epoch": 48.03, "learning_rate": 4.399580223880597e-05, "loss": 0.0146, "step": 51492 }, { "epoch": 48.04, "learning_rate": 4.399533582089552e-05, "loss": 0.0004, "step": 51496 }, { "epoch": 48.04, "learning_rate": 4.3994869402985076e-05, "loss": 0.0, "step": 51500 }, { "epoch": 48.04, "eval_exact_match": 0.741779497098646, "eval_exec": 0.776595744680851, "eval_loss": 0.44945940375328064, "eval_runtime": 1251.2204, "eval_samples_per_second": 0.826, "step": 51500 }, { "epoch": 48.04, "learning_rate": 4.399440298507463e-05, "loss": 0.0, "step": 51504 }, { "epoch": 48.05, "learning_rate": 4.399393656716418e-05, "loss": 0.0001, "step": 51508 }, { "epoch": 48.05, "learning_rate": 4.3993470149253734e-05, "loss": 0.0, "step": 51512 }, { "epoch": 48.06, "learning_rate": 4.399300373134329e-05, "loss": 0.0, "step": 51516 }, { "epoch": 48.06, "learning_rate": 4.399253731343284e-05, "loss": 0.0, "step": 51520 }, { "epoch": 48.06, "learning_rate": 4.399207089552239e-05, "loss": 0.0, "step": 51524 }, { "epoch": 48.07, "learning_rate": 4.399160447761194e-05, "loss": 0.0004, "step": 51528 }, { "epoch": 48.07, "learning_rate": 4.3991138059701495e-05, "loss": 0.0, "step": 51532 }, { "epoch": 48.07, "learning_rate": 4.399067164179105e-05, "loss": 0.0001, "step": 51536 }, { "epoch": 48.08, "learning_rate": 4.39902052238806e-05, "loss": 0.0022, "step": 51540 }, { "epoch": 48.08, "learning_rate": 4.3989738805970146e-05, "loss": 0.0001, "step": 51544 }, { "epoch": 48.09, "learning_rate": 4.398927238805971e-05, "loss": 0.0, "step": 51548 }, { "epoch": 48.09, "learning_rate": 4.3988805970149256e-05, "loss": 0.0, "step": 51552 }, { "epoch": 48.09, "learning_rate": 4.3988339552238804e-05, "loss": 0.0001, "step": 51556 }, { "epoch": 48.1, "learning_rate": 4.398787313432836e-05, "loss": 0.0, "step": 51560 }, { "epoch": 48.1, "learning_rate": 4.3987406716417914e-05, "loss": 0.0003, "step": 51564 }, { "epoch": 48.1, "learning_rate": 4.398694029850747e-05, "loss": 0.0, "step": 51568 }, { "epoch": 48.11, "learning_rate": 4.398647388059702e-05, "loss": 0.0001, "step": 51572 }, { "epoch": 48.11, "learning_rate": 4.3986007462686565e-05, "loss": 0.0, "step": 51576 }, { "epoch": 48.12, "learning_rate": 4.3985541044776126e-05, "loss": 0.0, "step": 51580 }, { "epoch": 48.12, "learning_rate": 4.3985074626865675e-05, "loss": 0.0002, "step": 51584 }, { "epoch": 48.12, "learning_rate": 4.398460820895522e-05, "loss": 0.0009, "step": 51588 }, { "epoch": 48.13, "learning_rate": 4.398414179104478e-05, "loss": 0.0, "step": 51592 }, { "epoch": 48.13, "learning_rate": 4.398367537313433e-05, "loss": 0.0001, "step": 51596 }, { "epoch": 48.13, "learning_rate": 4.398320895522388e-05, "loss": 0.0, "step": 51600 }, { "epoch": 48.14, "learning_rate": 4.3982742537313436e-05, "loss": 0.0, "step": 51604 }, { "epoch": 48.14, "learning_rate": 4.398227611940299e-05, "loss": 0.0, "step": 51608 }, { "epoch": 48.15, "learning_rate": 4.398180970149254e-05, "loss": 0.0007, "step": 51612 }, { "epoch": 48.15, "learning_rate": 4.3981343283582094e-05, "loss": 0.0, "step": 51616 }, { "epoch": 48.15, "learning_rate": 4.398087686567164e-05, "loss": 0.0001, "step": 51620 }, { "epoch": 48.16, "learning_rate": 4.3980410447761197e-05, "loss": 0.0, "step": 51624 }, { "epoch": 48.16, "learning_rate": 4.397994402985075e-05, "loss": 0.0, "step": 51628 }, { "epoch": 48.16, "learning_rate": 4.39794776119403e-05, "loss": 0.0082, "step": 51632 }, { "epoch": 48.17, "learning_rate": 4.397901119402985e-05, "loss": 0.0001, "step": 51636 }, { "epoch": 48.17, "learning_rate": 4.397854477611941e-05, "loss": 0.0, "step": 51640 }, { "epoch": 48.18, "learning_rate": 4.397807835820896e-05, "loss": 0.0001, "step": 51644 }, { "epoch": 48.18, "learning_rate": 4.3977611940298506e-05, "loss": 0.0, "step": 51648 }, { "epoch": 48.18, "learning_rate": 4.397714552238806e-05, "loss": 0.0, "step": 51652 }, { "epoch": 48.19, "learning_rate": 4.3976679104477615e-05, "loss": 0.0001, "step": 51656 }, { "epoch": 48.19, "learning_rate": 4.3976212686567164e-05, "loss": 0.0, "step": 51660 }, { "epoch": 48.19, "learning_rate": 4.397574626865672e-05, "loss": 0.0139, "step": 51664 }, { "epoch": 48.2, "learning_rate": 4.3975279850746273e-05, "loss": 0.0, "step": 51668 }, { "epoch": 48.2, "learning_rate": 4.397481343283582e-05, "loss": 0.0007, "step": 51672 }, { "epoch": 48.21, "learning_rate": 4.3974347014925376e-05, "loss": 0.0, "step": 51676 }, { "epoch": 48.21, "learning_rate": 4.3973880597014925e-05, "loss": 0.0, "step": 51680 }, { "epoch": 48.21, "learning_rate": 4.397341417910448e-05, "loss": 0.0, "step": 51684 }, { "epoch": 48.22, "learning_rate": 4.3972947761194034e-05, "loss": 0.0001, "step": 51688 }, { "epoch": 48.22, "learning_rate": 4.397248134328358e-05, "loss": 0.0, "step": 51692 }, { "epoch": 48.22, "learning_rate": 4.397201492537313e-05, "loss": 0.0, "step": 51696 }, { "epoch": 48.23, "learning_rate": 4.397154850746269e-05, "loss": 0.001, "step": 51700 }, { "epoch": 48.23, "learning_rate": 4.397108208955224e-05, "loss": 0.001, "step": 51704 }, { "epoch": 48.24, "learning_rate": 4.397061567164179e-05, "loss": 0.0, "step": 51708 }, { "epoch": 48.24, "learning_rate": 4.3970149253731344e-05, "loss": 0.0001, "step": 51712 }, { "epoch": 48.24, "learning_rate": 4.39696828358209e-05, "loss": 0.0001, "step": 51716 }, { "epoch": 48.25, "learning_rate": 4.3969216417910447e-05, "loss": 0.0027, "step": 51720 }, { "epoch": 48.25, "learning_rate": 4.396875e-05, "loss": 0.0, "step": 51724 }, { "epoch": 48.25, "learning_rate": 4.3968283582089556e-05, "loss": 0.0, "step": 51728 }, { "epoch": 48.26, "learning_rate": 4.396781716417911e-05, "loss": 0.0004, "step": 51732 }, { "epoch": 48.26, "learning_rate": 4.396735074626866e-05, "loss": 0.0001, "step": 51736 }, { "epoch": 48.26, "learning_rate": 4.396688432835821e-05, "loss": 0.0, "step": 51740 }, { "epoch": 48.27, "learning_rate": 4.396641791044776e-05, "loss": 0.0, "step": 51744 }, { "epoch": 48.27, "learning_rate": 4.396595149253732e-05, "loss": 0.0, "step": 51748 }, { "epoch": 48.28, "learning_rate": 4.3965485074626866e-05, "loss": 0.0001, "step": 51752 }, { "epoch": 48.28, "learning_rate": 4.396501865671642e-05, "loss": 0.0, "step": 51756 }, { "epoch": 48.28, "learning_rate": 4.3964552238805975e-05, "loss": 0.0001, "step": 51760 }, { "epoch": 48.29, "learning_rate": 4.3964085820895523e-05, "loss": 0.0001, "step": 51764 }, { "epoch": 48.29, "learning_rate": 4.396361940298508e-05, "loss": 0.0, "step": 51768 }, { "epoch": 48.29, "learning_rate": 4.3963152985074626e-05, "loss": 0.0001, "step": 51772 }, { "epoch": 48.3, "learning_rate": 4.396268656716418e-05, "loss": 0.0004, "step": 51776 }, { "epoch": 48.3, "learning_rate": 4.3962220149253736e-05, "loss": 0.0, "step": 51780 }, { "epoch": 48.31, "learning_rate": 4.3961753731343284e-05, "loss": 0.0, "step": 51784 }, { "epoch": 48.31, "learning_rate": 4.396128731343284e-05, "loss": 0.0022, "step": 51788 }, { "epoch": 48.31, "learning_rate": 4.3960820895522394e-05, "loss": 0.0, "step": 51792 }, { "epoch": 48.32, "learning_rate": 4.396035447761194e-05, "loss": 0.0007, "step": 51796 }, { "epoch": 48.32, "learning_rate": 4.395988805970149e-05, "loss": 0.0, "step": 51800 }, { "epoch": 48.32, "learning_rate": 4.3959421641791045e-05, "loss": 0.0, "step": 51804 }, { "epoch": 48.33, "learning_rate": 4.39589552238806e-05, "loss": 0.0, "step": 51808 }, { "epoch": 48.33, "learning_rate": 4.395848880597015e-05, "loss": 0.0, "step": 51812 }, { "epoch": 48.34, "learning_rate": 4.39580223880597e-05, "loss": 0.0, "step": 51816 }, { "epoch": 48.34, "learning_rate": 4.395755597014926e-05, "loss": 0.0, "step": 51820 }, { "epoch": 48.34, "learning_rate": 4.3957089552238806e-05, "loss": 0.0, "step": 51824 }, { "epoch": 48.35, "learning_rate": 4.395662313432836e-05, "loss": 0.0, "step": 51828 }, { "epoch": 48.35, "learning_rate": 4.395615671641791e-05, "loss": 0.0001, "step": 51832 }, { "epoch": 48.35, "learning_rate": 4.3955690298507464e-05, "loss": 0.0001, "step": 51836 }, { "epoch": 48.36, "learning_rate": 4.395522388059702e-05, "loss": 0.0001, "step": 51840 }, { "epoch": 48.36, "learning_rate": 4.395475746268657e-05, "loss": 0.0, "step": 51844 }, { "epoch": 48.37, "learning_rate": 4.395429104477612e-05, "loss": 0.0028, "step": 51848 }, { "epoch": 48.37, "learning_rate": 4.395382462686568e-05, "loss": 0.0024, "step": 51852 }, { "epoch": 48.37, "learning_rate": 4.3953358208955225e-05, "loss": 0.0002, "step": 51856 }, { "epoch": 48.38, "learning_rate": 4.3952891791044773e-05, "loss": 0.0001, "step": 51860 }, { "epoch": 48.38, "learning_rate": 4.395242537313433e-05, "loss": 0.0001, "step": 51864 }, { "epoch": 48.38, "learning_rate": 4.395195895522388e-05, "loss": 0.0, "step": 51868 }, { "epoch": 48.39, "learning_rate": 4.395149253731343e-05, "loss": 0.0002, "step": 51872 }, { "epoch": 48.39, "learning_rate": 4.3951026119402986e-05, "loss": 0.0, "step": 51876 }, { "epoch": 48.4, "learning_rate": 4.395055970149254e-05, "loss": 0.0, "step": 51880 }, { "epoch": 48.4, "learning_rate": 4.395009328358209e-05, "loss": 0.0, "step": 51884 }, { "epoch": 48.4, "learning_rate": 4.3949626865671644e-05, "loss": 0.0, "step": 51888 }, { "epoch": 48.41, "learning_rate": 4.394916044776119e-05, "loss": 0.0, "step": 51892 }, { "epoch": 48.41, "learning_rate": 4.3948694029850754e-05, "loss": 0.0, "step": 51896 }, { "epoch": 48.41, "learning_rate": 4.39482276119403e-05, "loss": 0.0, "step": 51900 }, { "epoch": 48.42, "learning_rate": 4.394776119402985e-05, "loss": 0.0, "step": 51904 }, { "epoch": 48.42, "learning_rate": 4.3947294776119405e-05, "loss": 0.0, "step": 51908 }, { "epoch": 48.43, "learning_rate": 4.394682835820896e-05, "loss": 0.0003, "step": 51912 }, { "epoch": 48.43, "learning_rate": 4.394636194029851e-05, "loss": 0.0024, "step": 51916 }, { "epoch": 48.43, "learning_rate": 4.394589552238806e-05, "loss": 0.0, "step": 51920 }, { "epoch": 48.44, "learning_rate": 4.394542910447761e-05, "loss": 0.0001, "step": 51924 }, { "epoch": 48.44, "learning_rate": 4.3944962686567166e-05, "loss": 0.0, "step": 51928 }, { "epoch": 48.44, "learning_rate": 4.394449626865672e-05, "loss": 0.0, "step": 51932 }, { "epoch": 48.45, "learning_rate": 4.394402985074627e-05, "loss": 0.0, "step": 51936 }, { "epoch": 48.45, "learning_rate": 4.3943563432835824e-05, "loss": 0.0001, "step": 51940 }, { "epoch": 48.46, "learning_rate": 4.394309701492538e-05, "loss": 0.0, "step": 51944 }, { "epoch": 48.46, "learning_rate": 4.394263059701493e-05, "loss": 0.0029, "step": 51948 }, { "epoch": 48.46, "learning_rate": 4.3942164179104475e-05, "loss": 0.0, "step": 51952 }, { "epoch": 48.47, "learning_rate": 4.394169776119404e-05, "loss": 0.0, "step": 51956 }, { "epoch": 48.47, "learning_rate": 4.3941231343283585e-05, "loss": 0.0, "step": 51960 }, { "epoch": 48.47, "learning_rate": 4.394076492537313e-05, "loss": 0.0002, "step": 51964 }, { "epoch": 48.48, "learning_rate": 4.394029850746269e-05, "loss": 0.0, "step": 51968 }, { "epoch": 48.48, "learning_rate": 4.393983208955224e-05, "loss": 0.0, "step": 51972 }, { "epoch": 48.49, "learning_rate": 4.393936567164179e-05, "loss": 0.002, "step": 51976 }, { "epoch": 48.49, "learning_rate": 4.3938899253731346e-05, "loss": 0.0007, "step": 51980 }, { "epoch": 48.49, "learning_rate": 4.3938432835820894e-05, "loss": 0.0, "step": 51984 }, { "epoch": 48.5, "learning_rate": 4.393796641791045e-05, "loss": 0.001, "step": 51988 }, { "epoch": 48.5, "learning_rate": 4.3937500000000004e-05, "loss": 0.0, "step": 51992 }, { "epoch": 48.5, "learning_rate": 4.393703358208955e-05, "loss": 0.0, "step": 51996 }, { "epoch": 48.51, "learning_rate": 4.393656716417911e-05, "loss": 0.0018, "step": 52000 }, { "epoch": 48.51, "eval_exact_match": 0.7446808510638298, "eval_exec": 0.7746615087040619, "eval_loss": 0.44412875175476074, "eval_runtime": 1559.1098, "eval_samples_per_second": 0.663, "step": 52000 }, { "epoch": 48.51, "learning_rate": 4.393610074626866e-05, "loss": 0.0, "step": 52004 }, { "epoch": 48.51, "learning_rate": 4.393563432835821e-05, "loss": 0.0, "step": 52008 }, { "epoch": 48.52, "learning_rate": 4.393516791044776e-05, "loss": 0.0, "step": 52012 }, { "epoch": 48.52, "learning_rate": 4.393470149253732e-05, "loss": 0.0, "step": 52016 }, { "epoch": 48.53, "learning_rate": 4.393423507462687e-05, "loss": 0.0, "step": 52020 }, { "epoch": 48.53, "learning_rate": 4.3933768656716416e-05, "loss": 0.0, "step": 52024 }, { "epoch": 48.53, "learning_rate": 4.393330223880597e-05, "loss": 0.0, "step": 52028 }, { "epoch": 48.54, "learning_rate": 4.3932835820895526e-05, "loss": 0.0003, "step": 52032 }, { "epoch": 48.54, "learning_rate": 4.3932369402985074e-05, "loss": 0.0018, "step": 52036 }, { "epoch": 48.54, "learning_rate": 4.393190298507463e-05, "loss": 0.0, "step": 52040 }, { "epoch": 48.55, "learning_rate": 4.393143656716418e-05, "loss": 0.0, "step": 52044 }, { "epoch": 48.55, "learning_rate": 4.393097014925374e-05, "loss": 0.0, "step": 52048 }, { "epoch": 48.56, "learning_rate": 4.393050373134329e-05, "loss": 0.0, "step": 52052 }, { "epoch": 48.56, "learning_rate": 4.3930037313432835e-05, "loss": 0.0, "step": 52056 }, { "epoch": 48.56, "learning_rate": 4.392957089552239e-05, "loss": 0.0022, "step": 52060 }, { "epoch": 48.57, "learning_rate": 4.3929104477611945e-05, "loss": 0.0001, "step": 52064 }, { "epoch": 48.57, "learning_rate": 4.392863805970149e-05, "loss": 0.0056, "step": 52068 }, { "epoch": 48.57, "learning_rate": 4.392817164179105e-05, "loss": 0.0, "step": 52072 }, { "epoch": 48.58, "learning_rate": 4.39277052238806e-05, "loss": 0.0, "step": 52076 }, { "epoch": 48.58, "learning_rate": 4.392723880597015e-05, "loss": 0.0, "step": 52080 }, { "epoch": 48.59, "learning_rate": 4.3926772388059706e-05, "loss": 0.0, "step": 52084 }, { "epoch": 48.59, "learning_rate": 4.3926305970149254e-05, "loss": 0.0, "step": 52088 }, { "epoch": 48.59, "learning_rate": 4.392583955223881e-05, "loss": 0.0, "step": 52092 }, { "epoch": 48.6, "learning_rate": 4.3925373134328364e-05, "loss": 0.0, "step": 52096 }, { "epoch": 48.6, "learning_rate": 4.392490671641791e-05, "loss": 0.0003, "step": 52100 }, { "epoch": 48.6, "learning_rate": 4.392444029850746e-05, "loss": 0.0, "step": 52104 }, { "epoch": 48.61, "learning_rate": 4.392397388059702e-05, "loss": 0.0, "step": 52108 }, { "epoch": 48.61, "learning_rate": 4.392350746268657e-05, "loss": 0.0, "step": 52112 }, { "epoch": 48.62, "learning_rate": 4.392304104477612e-05, "loss": 0.0019, "step": 52116 }, { "epoch": 48.62, "learning_rate": 4.392257462686567e-05, "loss": 0.0, "step": 52120 }, { "epoch": 48.62, "learning_rate": 4.392210820895523e-05, "loss": 0.0, "step": 52124 }, { "epoch": 48.63, "learning_rate": 4.3921641791044776e-05, "loss": 0.0, "step": 52128 }, { "epoch": 48.63, "learning_rate": 4.392117537313433e-05, "loss": 0.0005, "step": 52132 }, { "epoch": 48.63, "learning_rate": 4.3920708955223886e-05, "loss": 0.0001, "step": 52136 }, { "epoch": 48.64, "learning_rate": 4.3920242537313434e-05, "loss": 0.0001, "step": 52140 }, { "epoch": 48.64, "learning_rate": 4.391977611940299e-05, "loss": 0.0, "step": 52144 }, { "epoch": 48.65, "learning_rate": 4.391930970149254e-05, "loss": 0.0002, "step": 52148 }, { "epoch": 48.65, "learning_rate": 4.391884328358209e-05, "loss": 0.0001, "step": 52152 }, { "epoch": 48.65, "learning_rate": 4.391837686567165e-05, "loss": 0.0, "step": 52156 }, { "epoch": 48.66, "learning_rate": 4.3917910447761195e-05, "loss": 0.0, "step": 52160 }, { "epoch": 48.66, "learning_rate": 4.391744402985074e-05, "loss": 0.0, "step": 52164 }, { "epoch": 48.66, "learning_rate": 4.3916977611940305e-05, "loss": 0.0, "step": 52168 }, { "epoch": 48.67, "learning_rate": 4.391651119402985e-05, "loss": 0.0, "step": 52172 }, { "epoch": 48.67, "learning_rate": 4.39160447761194e-05, "loss": 0.0, "step": 52176 }, { "epoch": 48.68, "learning_rate": 4.3915578358208956e-05, "loss": 0.0001, "step": 52180 }, { "epoch": 48.68, "learning_rate": 4.391511194029851e-05, "loss": 0.0024, "step": 52184 }, { "epoch": 48.68, "learning_rate": 4.391464552238806e-05, "loss": 0.0001, "step": 52188 }, { "epoch": 48.69, "learning_rate": 4.3914179104477614e-05, "loss": 0.0, "step": 52192 }, { "epoch": 48.69, "learning_rate": 4.391371268656717e-05, "loss": 0.0, "step": 52196 }, { "epoch": 48.69, "learning_rate": 4.391324626865672e-05, "loss": 0.0, "step": 52200 }, { "epoch": 48.7, "learning_rate": 4.391277985074627e-05, "loss": 0.0, "step": 52204 }, { "epoch": 48.7, "learning_rate": 4.391231343283582e-05, "loss": 0.0001, "step": 52208 }, { "epoch": 48.71, "learning_rate": 4.3911847014925375e-05, "loss": 0.0, "step": 52212 }, { "epoch": 48.71, "learning_rate": 4.391138059701493e-05, "loss": 0.0, "step": 52216 }, { "epoch": 48.71, "learning_rate": 4.391091417910448e-05, "loss": 0.0016, "step": 52220 }, { "epoch": 48.72, "learning_rate": 4.391044776119403e-05, "loss": 0.0001, "step": 52224 }, { "epoch": 48.72, "learning_rate": 4.390998134328359e-05, "loss": 0.0, "step": 52228 }, { "epoch": 48.72, "learning_rate": 4.3909514925373136e-05, "loss": 0.0009, "step": 52232 }, { "epoch": 48.73, "learning_rate": 4.390904850746269e-05, "loss": 0.0, "step": 52236 }, { "epoch": 48.73, "learning_rate": 4.390858208955224e-05, "loss": 0.0, "step": 52240 }, { "epoch": 48.73, "learning_rate": 4.3908115671641794e-05, "loss": 0.0001, "step": 52244 }, { "epoch": 48.74, "learning_rate": 4.390764925373135e-05, "loss": 0.0, "step": 52248 }, { "epoch": 48.74, "learning_rate": 4.39071828358209e-05, "loss": 0.0005, "step": 52252 }, { "epoch": 48.75, "learning_rate": 4.3906716417910445e-05, "loss": 0.0001, "step": 52256 }, { "epoch": 48.75, "learning_rate": 4.390625000000001e-05, "loss": 0.0, "step": 52260 }, { "epoch": 48.75, "learning_rate": 4.3905783582089555e-05, "loss": 0.0, "step": 52264 }, { "epoch": 48.76, "learning_rate": 4.39053171641791e-05, "loss": 0.0, "step": 52268 }, { "epoch": 48.76, "learning_rate": 4.390485074626866e-05, "loss": 0.0, "step": 52272 }, { "epoch": 48.76, "learning_rate": 4.390438432835821e-05, "loss": 0.0, "step": 52276 }, { "epoch": 48.77, "learning_rate": 4.390391791044776e-05, "loss": 0.0, "step": 52280 }, { "epoch": 48.77, "learning_rate": 4.3903451492537316e-05, "loss": 0.0001, "step": 52284 }, { "epoch": 48.78, "learning_rate": 4.390298507462687e-05, "loss": 0.0, "step": 52288 }, { "epoch": 48.78, "learning_rate": 4.390251865671642e-05, "loss": 0.0, "step": 52292 }, { "epoch": 48.78, "learning_rate": 4.3902052238805974e-05, "loss": 0.0, "step": 52296 }, { "epoch": 48.79, "learning_rate": 4.390158582089552e-05, "loss": 0.0, "step": 52300 }, { "epoch": 48.79, "learning_rate": 4.390111940298508e-05, "loss": 0.0113, "step": 52304 }, { "epoch": 48.79, "learning_rate": 4.390065298507463e-05, "loss": 0.0003, "step": 52308 }, { "epoch": 48.8, "learning_rate": 4.390018656716418e-05, "loss": 0.0004, "step": 52312 }, { "epoch": 48.8, "learning_rate": 4.389972014925373e-05, "loss": 0.0001, "step": 52316 }, { "epoch": 48.81, "learning_rate": 4.389925373134329e-05, "loss": 0.0, "step": 52320 }, { "epoch": 48.81, "learning_rate": 4.389878731343284e-05, "loss": 0.0, "step": 52324 }, { "epoch": 48.81, "learning_rate": 4.3898320895522386e-05, "loss": 0.0001, "step": 52328 }, { "epoch": 48.82, "learning_rate": 4.389785447761194e-05, "loss": 0.0, "step": 52332 }, { "epoch": 48.82, "learning_rate": 4.3897388059701496e-05, "loss": 0.0, "step": 52336 }, { "epoch": 48.82, "learning_rate": 4.3896921641791044e-05, "loss": 0.0, "step": 52340 }, { "epoch": 48.83, "learning_rate": 4.38964552238806e-05, "loss": 0.0, "step": 52344 }, { "epoch": 48.83, "learning_rate": 4.3895988805970154e-05, "loss": 0.0, "step": 52348 }, { "epoch": 48.84, "learning_rate": 4.38955223880597e-05, "loss": 0.0, "step": 52352 }, { "epoch": 48.84, "learning_rate": 4.389505597014926e-05, "loss": 0.0008, "step": 52356 }, { "epoch": 48.84, "learning_rate": 4.3894589552238805e-05, "loss": 0.0001, "step": 52360 }, { "epoch": 48.85, "learning_rate": 4.389412313432836e-05, "loss": 0.0, "step": 52364 }, { "epoch": 48.85, "learning_rate": 4.3893656716417915e-05, "loss": 0.0, "step": 52368 }, { "epoch": 48.85, "learning_rate": 4.389319029850746e-05, "loss": 0.0001, "step": 52372 }, { "epoch": 48.86, "learning_rate": 4.389272388059702e-05, "loss": 0.0, "step": 52376 }, { "epoch": 48.86, "learning_rate": 4.389225746268657e-05, "loss": 0.0, "step": 52380 }, { "epoch": 48.87, "learning_rate": 4.389179104477612e-05, "loss": 0.0, "step": 52384 }, { "epoch": 48.87, "learning_rate": 4.3891324626865676e-05, "loss": 0.0003, "step": 52388 }, { "epoch": 48.87, "learning_rate": 4.3890858208955224e-05, "loss": 0.0, "step": 52392 }, { "epoch": 48.88, "learning_rate": 4.389039179104478e-05, "loss": 0.0, "step": 52396 }, { "epoch": 48.88, "learning_rate": 4.3889925373134334e-05, "loss": 0.0, "step": 52400 }, { "epoch": 48.88, "learning_rate": 4.388945895522388e-05, "loss": 0.0, "step": 52404 }, { "epoch": 48.89, "learning_rate": 4.388899253731344e-05, "loss": 0.0, "step": 52408 }, { "epoch": 48.89, "learning_rate": 4.388852611940299e-05, "loss": 0.0003, "step": 52412 }, { "epoch": 48.9, "learning_rate": 4.388805970149254e-05, "loss": 0.0, "step": 52416 }, { "epoch": 48.9, "learning_rate": 4.388759328358209e-05, "loss": 0.0, "step": 52420 }, { "epoch": 48.9, "learning_rate": 4.388712686567164e-05, "loss": 0.0, "step": 52424 }, { "epoch": 48.91, "learning_rate": 4.38866604477612e-05, "loss": 0.0, "step": 52428 }, { "epoch": 48.91, "learning_rate": 4.3886194029850746e-05, "loss": 0.0011, "step": 52432 }, { "epoch": 48.91, "learning_rate": 4.38857276119403e-05, "loss": 0.0, "step": 52436 }, { "epoch": 48.92, "learning_rate": 4.3885261194029856e-05, "loss": 0.0003, "step": 52440 }, { "epoch": 48.92, "learning_rate": 4.3884794776119404e-05, "loss": 0.0, "step": 52444 }, { "epoch": 48.93, "learning_rate": 4.388432835820896e-05, "loss": 0.0, "step": 52448 }, { "epoch": 48.93, "learning_rate": 4.388386194029851e-05, "loss": 0.0, "step": 52452 }, { "epoch": 48.93, "learning_rate": 4.388339552238806e-05, "loss": 0.0001, "step": 52456 }, { "epoch": 48.94, "learning_rate": 4.3882929104477617e-05, "loss": 0.0, "step": 52460 }, { "epoch": 48.94, "learning_rate": 4.3882462686567165e-05, "loss": 0.0, "step": 52464 }, { "epoch": 48.94, "learning_rate": 4.388199626865672e-05, "loss": 0.0, "step": 52468 }, { "epoch": 48.95, "learning_rate": 4.3881529850746275e-05, "loss": 0.0005, "step": 52472 }, { "epoch": 48.95, "learning_rate": 4.388106343283582e-05, "loss": 0.003, "step": 52476 }, { "epoch": 48.96, "learning_rate": 4.388059701492537e-05, "loss": 0.0, "step": 52480 }, { "epoch": 48.96, "learning_rate": 4.3880130597014926e-05, "loss": 0.0029, "step": 52484 }, { "epoch": 48.96, "learning_rate": 4.387966417910448e-05, "loss": 0.002, "step": 52488 }, { "epoch": 48.97, "learning_rate": 4.387919776119403e-05, "loss": 0.0, "step": 52492 }, { "epoch": 48.97, "learning_rate": 4.3878731343283584e-05, "loss": 0.0, "step": 52496 }, { "epoch": 48.97, "learning_rate": 4.387826492537314e-05, "loss": 0.0016, "step": 52500 }, { "epoch": 48.97, "eval_exact_match": 0.741779497098646, "eval_exec": 0.769825918762089, "eval_loss": 0.4436146914958954, "eval_runtime": 1145.0497, "eval_samples_per_second": 0.903, "step": 52500 }, { "epoch": 48.98, "learning_rate": 4.387779850746269e-05, "loss": 0.0001, "step": 52504 }, { "epoch": 48.98, "learning_rate": 4.387733208955224e-05, "loss": 0.0001, "step": 52508 }, { "epoch": 48.98, "learning_rate": 4.387686567164179e-05, "loss": 0.0, "step": 52512 }, { "epoch": 48.99, "learning_rate": 4.3876399253731345e-05, "loss": 0.0019, "step": 52516 }, { "epoch": 48.99, "learning_rate": 4.38759328358209e-05, "loss": 0.0001, "step": 52520 }, { "epoch": 49.0, "learning_rate": 4.387546641791045e-05, "loss": 0.0, "step": 52524 }, { "epoch": 49.0, "learning_rate": 4.3875e-05, "loss": 0.0, "step": 52528 }, { "epoch": 49.0, "learning_rate": 4.387453358208956e-05, "loss": 0.0, "step": 52532 }, { "epoch": 49.01, "learning_rate": 4.3874067164179106e-05, "loss": 0.0, "step": 52536 }, { "epoch": 49.01, "learning_rate": 4.387360074626866e-05, "loss": 0.001, "step": 52540 }, { "epoch": 49.01, "learning_rate": 4.387313432835821e-05, "loss": 0.0, "step": 52544 }, { "epoch": 49.02, "learning_rate": 4.3872667910447764e-05, "loss": 0.0, "step": 52548 }, { "epoch": 49.02, "learning_rate": 4.387220149253732e-05, "loss": 0.0006, "step": 52552 }, { "epoch": 49.03, "learning_rate": 4.387173507462687e-05, "loss": 0.0, "step": 52556 }, { "epoch": 49.03, "learning_rate": 4.387126865671642e-05, "loss": 0.0029, "step": 52560 }, { "epoch": 49.03, "learning_rate": 4.3870802238805976e-05, "loss": 0.0, "step": 52564 }, { "epoch": 49.04, "learning_rate": 4.3870335820895525e-05, "loss": 0.0, "step": 52568 }, { "epoch": 49.04, "learning_rate": 4.386986940298507e-05, "loss": 0.0001, "step": 52572 }, { "epoch": 49.04, "learning_rate": 4.3869402985074634e-05, "loss": 0.0005, "step": 52576 }, { "epoch": 49.05, "learning_rate": 4.386893656716418e-05, "loss": 0.0, "step": 52580 }, { "epoch": 49.05, "learning_rate": 4.386847014925373e-05, "loss": 0.0, "step": 52584 }, { "epoch": 49.06, "learning_rate": 4.3868003731343286e-05, "loss": 0.0001, "step": 52588 }, { "epoch": 49.06, "learning_rate": 4.386753731343284e-05, "loss": 0.0, "step": 52592 }, { "epoch": 49.06, "learning_rate": 4.386707089552239e-05, "loss": 0.0001, "step": 52596 }, { "epoch": 49.07, "learning_rate": 4.3866604477611943e-05, "loss": 0.0009, "step": 52600 }, { "epoch": 49.07, "learning_rate": 4.386613805970149e-05, "loss": 0.0, "step": 52604 }, { "epoch": 49.07, "learning_rate": 4.3865671641791047e-05, "loss": 0.0001, "step": 52608 }, { "epoch": 49.08, "learning_rate": 4.38652052238806e-05, "loss": 0.0, "step": 52612 }, { "epoch": 49.08, "learning_rate": 4.386473880597015e-05, "loss": 0.0001, "step": 52616 }, { "epoch": 49.09, "learning_rate": 4.3864272388059704e-05, "loss": 0.0, "step": 52620 }, { "epoch": 49.09, "learning_rate": 4.386380597014926e-05, "loss": 0.0006, "step": 52624 }, { "epoch": 49.09, "learning_rate": 4.386333955223881e-05, "loss": 0.0008, "step": 52628 }, { "epoch": 49.1, "learning_rate": 4.3862873134328356e-05, "loss": 0.0001, "step": 52632 }, { "epoch": 49.1, "learning_rate": 4.386240671641792e-05, "loss": 0.0001, "step": 52636 }, { "epoch": 49.1, "learning_rate": 4.3861940298507465e-05, "loss": 0.0, "step": 52640 }, { "epoch": 49.11, "learning_rate": 4.3861473880597014e-05, "loss": 0.0003, "step": 52644 }, { "epoch": 49.11, "learning_rate": 4.386100746268657e-05, "loss": 0.0111, "step": 52648 }, { "epoch": 49.12, "learning_rate": 4.3860541044776123e-05, "loss": 0.0, "step": 52652 }, { "epoch": 49.12, "learning_rate": 4.386007462686567e-05, "loss": 0.0003, "step": 52656 }, { "epoch": 49.12, "learning_rate": 4.3859608208955226e-05, "loss": 0.0016, "step": 52660 }, { "epoch": 49.13, "learning_rate": 4.3859141791044775e-05, "loss": 0.0, "step": 52664 }, { "epoch": 49.13, "learning_rate": 4.385867537313433e-05, "loss": 0.0, "step": 52668 }, { "epoch": 49.13, "learning_rate": 4.3858208955223884e-05, "loss": 0.0, "step": 52672 }, { "epoch": 49.14, "learning_rate": 4.385774253731343e-05, "loss": 0.0005, "step": 52676 }, { "epoch": 49.14, "learning_rate": 4.385727611940299e-05, "loss": 0.0, "step": 52680 }, { "epoch": 49.15, "learning_rate": 4.385680970149254e-05, "loss": 0.0, "step": 52684 }, { "epoch": 49.15, "learning_rate": 4.385634328358209e-05, "loss": 0.0001, "step": 52688 }, { "epoch": 49.15, "learning_rate": 4.385587686567164e-05, "loss": 0.0, "step": 52692 }, { "epoch": 49.16, "learning_rate": 4.38554104477612e-05, "loss": 0.0001, "step": 52696 }, { "epoch": 49.16, "learning_rate": 4.385494402985075e-05, "loss": 0.0, "step": 52700 }, { "epoch": 49.16, "learning_rate": 4.38544776119403e-05, "loss": 0.0, "step": 52704 }, { "epoch": 49.17, "learning_rate": 4.385401119402985e-05, "loss": 0.0, "step": 52708 }, { "epoch": 49.17, "learning_rate": 4.3853544776119406e-05, "loss": 0.0003, "step": 52712 }, { "epoch": 49.18, "learning_rate": 4.385307835820896e-05, "loss": 0.0009, "step": 52716 }, { "epoch": 49.18, "learning_rate": 4.385261194029851e-05, "loss": 0.0001, "step": 52720 }, { "epoch": 49.18, "learning_rate": 4.385214552238806e-05, "loss": 0.0008, "step": 52724 }, { "epoch": 49.19, "learning_rate": 4.385167910447762e-05, "loss": 0.005, "step": 52728 }, { "epoch": 49.19, "learning_rate": 4.385121268656717e-05, "loss": 0.0, "step": 52732 }, { "epoch": 49.19, "learning_rate": 4.3850746268656715e-05, "loss": 0.0001, "step": 52736 }, { "epoch": 49.2, "learning_rate": 4.385027985074627e-05, "loss": 0.0, "step": 52740 }, { "epoch": 49.2, "learning_rate": 4.3849813432835825e-05, "loss": 0.0, "step": 52744 }, { "epoch": 49.21, "learning_rate": 4.3849347014925373e-05, "loss": 0.0, "step": 52748 }, { "epoch": 49.21, "learning_rate": 4.384888059701493e-05, "loss": 0.0002, "step": 52752 }, { "epoch": 49.21, "learning_rate": 4.384841417910448e-05, "loss": 0.0001, "step": 52756 }, { "epoch": 49.22, "learning_rate": 4.384794776119403e-05, "loss": 0.0, "step": 52760 }, { "epoch": 49.22, "learning_rate": 4.3847481343283586e-05, "loss": 0.0006, "step": 52764 }, { "epoch": 49.22, "learning_rate": 4.3847014925373134e-05, "loss": 0.0009, "step": 52768 }, { "epoch": 49.23, "learning_rate": 4.384654850746269e-05, "loss": 0.0, "step": 52772 }, { "epoch": 49.23, "learning_rate": 4.3846082089552244e-05, "loss": 0.0003, "step": 52776 }, { "epoch": 49.24, "learning_rate": 4.384561567164179e-05, "loss": 0.0001, "step": 52780 }, { "epoch": 49.24, "learning_rate": 4.384514925373134e-05, "loss": 0.0, "step": 52784 }, { "epoch": 49.24, "learning_rate": 4.38446828358209e-05, "loss": 0.0, "step": 52788 }, { "epoch": 49.25, "learning_rate": 4.384421641791045e-05, "loss": 0.0, "step": 52792 }, { "epoch": 49.25, "learning_rate": 4.384375e-05, "loss": 0.0008, "step": 52796 }, { "epoch": 49.25, "learning_rate": 4.384328358208955e-05, "loss": 0.0001, "step": 52800 }, { "epoch": 49.26, "learning_rate": 4.384281716417911e-05, "loss": 0.0, "step": 52804 }, { "epoch": 49.26, "learning_rate": 4.3842350746268656e-05, "loss": 0.0, "step": 52808 }, { "epoch": 49.26, "learning_rate": 4.384188432835821e-05, "loss": 0.0127, "step": 52812 }, { "epoch": 49.27, "learning_rate": 4.3841417910447766e-05, "loss": 0.0, "step": 52816 }, { "epoch": 49.27, "learning_rate": 4.3840951492537314e-05, "loss": 0.0001, "step": 52820 }, { "epoch": 49.28, "learning_rate": 4.384048507462687e-05, "loss": 0.0, "step": 52824 }, { "epoch": 49.28, "learning_rate": 4.384001865671642e-05, "loss": 0.0, "step": 52828 }, { "epoch": 49.28, "learning_rate": 4.383955223880597e-05, "loss": 0.0002, "step": 52832 }, { "epoch": 49.29, "learning_rate": 4.383908582089553e-05, "loss": 0.0, "step": 52836 }, { "epoch": 49.29, "learning_rate": 4.3838619402985075e-05, "loss": 0.0007, "step": 52840 }, { "epoch": 49.29, "learning_rate": 4.3838152985074623e-05, "loss": 0.0006, "step": 52844 }, { "epoch": 49.3, "learning_rate": 4.3837686567164185e-05, "loss": 0.0001, "step": 52848 }, { "epoch": 49.3, "learning_rate": 4.383722014925373e-05, "loss": 0.0, "step": 52852 }, { "epoch": 49.31, "learning_rate": 4.383675373134328e-05, "loss": 0.0001, "step": 52856 }, { "epoch": 49.31, "learning_rate": 4.3836287313432836e-05, "loss": 0.0004, "step": 52860 }, { "epoch": 49.31, "learning_rate": 4.383582089552239e-05, "loss": 0.0001, "step": 52864 }, { "epoch": 49.32, "learning_rate": 4.3835354477611946e-05, "loss": 0.0017, "step": 52868 }, { "epoch": 49.32, "learning_rate": 4.3834888059701494e-05, "loss": 0.0015, "step": 52872 }, { "epoch": 49.32, "learning_rate": 4.383442164179105e-05, "loss": 0.0, "step": 52876 }, { "epoch": 49.33, "learning_rate": 4.3833955223880604e-05, "loss": 0.0, "step": 52880 }, { "epoch": 49.33, "learning_rate": 4.383348880597015e-05, "loss": 0.0, "step": 52884 }, { "epoch": 49.34, "learning_rate": 4.38330223880597e-05, "loss": 0.0006, "step": 52888 }, { "epoch": 49.34, "learning_rate": 4.3832555970149255e-05, "loss": 0.0, "step": 52892 }, { "epoch": 49.34, "learning_rate": 4.383208955223881e-05, "loss": 0.0, "step": 52896 }, { "epoch": 49.35, "learning_rate": 4.383162313432836e-05, "loss": 0.0005, "step": 52900 }, { "epoch": 49.35, "learning_rate": 4.383115671641791e-05, "loss": 0.0, "step": 52904 }, { "epoch": 49.35, "learning_rate": 4.383069029850747e-05, "loss": 0.0005, "step": 52908 }, { "epoch": 49.36, "learning_rate": 4.3830223880597016e-05, "loss": 0.0, "step": 52912 }, { "epoch": 49.36, "learning_rate": 4.382975746268657e-05, "loss": 0.0001, "step": 52916 }, { "epoch": 49.37, "learning_rate": 4.382929104477612e-05, "loss": 0.0003, "step": 52920 }, { "epoch": 49.37, "learning_rate": 4.3828824626865674e-05, "loss": 0.0, "step": 52924 }, { "epoch": 49.37, "learning_rate": 4.382835820895523e-05, "loss": 0.0, "step": 52928 }, { "epoch": 49.38, "learning_rate": 4.382789179104478e-05, "loss": 0.0, "step": 52932 }, { "epoch": 49.38, "learning_rate": 4.3827425373134325e-05, "loss": 0.0002, "step": 52936 }, { "epoch": 49.38, "learning_rate": 4.382695895522389e-05, "loss": 0.0002, "step": 52940 }, { "epoch": 49.39, "learning_rate": 4.3826492537313435e-05, "loss": 0.0, "step": 52944 }, { "epoch": 49.39, "learning_rate": 4.382602611940298e-05, "loss": 0.0, "step": 52948 }, { "epoch": 49.4, "learning_rate": 4.382555970149254e-05, "loss": 0.0007, "step": 52952 }, { "epoch": 49.4, "learning_rate": 4.382509328358209e-05, "loss": 0.0001, "step": 52956 }, { "epoch": 49.4, "learning_rate": 4.382462686567164e-05, "loss": 0.0, "step": 52960 }, { "epoch": 49.41, "learning_rate": 4.3824160447761196e-05, "loss": 0.0001, "step": 52964 }, { "epoch": 49.41, "learning_rate": 4.382369402985075e-05, "loss": 0.0003, "step": 52968 }, { "epoch": 49.41, "learning_rate": 4.38232276119403e-05, "loss": 0.0006, "step": 52972 }, { "epoch": 49.42, "learning_rate": 4.3822761194029854e-05, "loss": 0.0, "step": 52976 }, { "epoch": 49.42, "learning_rate": 4.38222947761194e-05, "loss": 0.0, "step": 52980 }, { "epoch": 49.43, "learning_rate": 4.382182835820896e-05, "loss": 0.0001, "step": 52984 }, { "epoch": 49.43, "learning_rate": 4.382136194029851e-05, "loss": 0.0, "step": 52988 }, { "epoch": 49.43, "learning_rate": 4.382089552238806e-05, "loss": 0.0, "step": 52992 }, { "epoch": 49.44, "learning_rate": 4.382042910447761e-05, "loss": 0.0, "step": 52996 }, { "epoch": 49.44, "learning_rate": 4.381996268656717e-05, "loss": 0.0, "step": 53000 }, { "epoch": 49.44, "eval_exact_match": 0.7485493230174082, "eval_exec": 0.7804642166344294, "eval_loss": 0.4629165828227997, "eval_runtime": 1125.3708, "eval_samples_per_second": 0.919, "step": 53000 }, { "epoch": 49.44, "learning_rate": 4.381949626865672e-05, "loss": 0.0061, "step": 53004 }, { "epoch": 49.45, "learning_rate": 4.3819029850746266e-05, "loss": 0.0, "step": 53008 }, { "epoch": 49.45, "learning_rate": 4.381856343283582e-05, "loss": 0.0001, "step": 53012 }, { "epoch": 49.46, "learning_rate": 4.3818097014925376e-05, "loss": 0.0, "step": 53016 }, { "epoch": 49.46, "learning_rate": 4.3817630597014924e-05, "loss": 0.0, "step": 53020 }, { "epoch": 49.46, "learning_rate": 4.381716417910448e-05, "loss": 0.0, "step": 53024 }, { "epoch": 49.47, "learning_rate": 4.3816697761194034e-05, "loss": 0.0013, "step": 53028 }, { "epoch": 49.47, "learning_rate": 4.381623134328359e-05, "loss": 0.0001, "step": 53032 }, { "epoch": 49.47, "learning_rate": 4.381576492537314e-05, "loss": 0.0, "step": 53036 }, { "epoch": 49.48, "learning_rate": 4.3815298507462685e-05, "loss": 0.0, "step": 53040 }, { "epoch": 49.48, "learning_rate": 4.381483208955225e-05, "loss": 0.0003, "step": 53044 }, { "epoch": 49.49, "learning_rate": 4.3814365671641795e-05, "loss": 0.0003, "step": 53048 }, { "epoch": 49.49, "learning_rate": 4.381389925373134e-05, "loss": 0.0, "step": 53052 }, { "epoch": 49.49, "learning_rate": 4.38134328358209e-05, "loss": 0.0, "step": 53056 }, { "epoch": 49.5, "learning_rate": 4.381296641791045e-05, "loss": 0.0001, "step": 53060 }, { "epoch": 49.5, "learning_rate": 4.38125e-05, "loss": 0.0, "step": 53064 }, { "epoch": 49.5, "learning_rate": 4.3812033582089556e-05, "loss": 0.0, "step": 53068 }, { "epoch": 49.51, "learning_rate": 4.3811567164179104e-05, "loss": 0.0002, "step": 53072 }, { "epoch": 49.51, "learning_rate": 4.381110074626866e-05, "loss": 0.0001, "step": 53076 }, { "epoch": 49.51, "learning_rate": 4.3810634328358214e-05, "loss": 0.0, "step": 53080 }, { "epoch": 49.52, "learning_rate": 4.381016791044776e-05, "loss": 0.0001, "step": 53084 }, { "epoch": 49.52, "learning_rate": 4.380970149253732e-05, "loss": 0.0, "step": 53088 }, { "epoch": 49.53, "learning_rate": 4.380923507462687e-05, "loss": 0.0006, "step": 53092 }, { "epoch": 49.53, "learning_rate": 4.380876865671642e-05, "loss": 0.0001, "step": 53096 }, { "epoch": 49.53, "learning_rate": 4.380830223880597e-05, "loss": 0.0, "step": 53100 }, { "epoch": 49.54, "learning_rate": 4.380783582089552e-05, "loss": 0.0028, "step": 53104 }, { "epoch": 49.54, "learning_rate": 4.380736940298508e-05, "loss": 0.0, "step": 53108 }, { "epoch": 49.54, "learning_rate": 4.3806902985074626e-05, "loss": 0.0001, "step": 53112 }, { "epoch": 49.55, "learning_rate": 4.380643656716418e-05, "loss": 0.0, "step": 53116 }, { "epoch": 49.55, "learning_rate": 4.3805970149253736e-05, "loss": 0.0, "step": 53120 }, { "epoch": 49.56, "learning_rate": 4.3805503731343284e-05, "loss": 0.0001, "step": 53124 }, { "epoch": 49.56, "learning_rate": 4.380503731343284e-05, "loss": 0.0, "step": 53128 }, { "epoch": 49.56, "learning_rate": 4.380457089552239e-05, "loss": 0.0, "step": 53132 }, { "epoch": 49.57, "learning_rate": 4.380410447761194e-05, "loss": 0.0, "step": 53136 }, { "epoch": 49.57, "learning_rate": 4.38036380597015e-05, "loss": 0.0001, "step": 53140 }, { "epoch": 49.57, "learning_rate": 4.3803171641791045e-05, "loss": 0.0001, "step": 53144 }, { "epoch": 49.58, "learning_rate": 4.38027052238806e-05, "loss": 0.0001, "step": 53148 }, { "epoch": 49.58, "learning_rate": 4.3802238805970155e-05, "loss": 0.0, "step": 53152 }, { "epoch": 49.59, "learning_rate": 4.38017723880597e-05, "loss": 0.0, "step": 53156 }, { "epoch": 49.59, "learning_rate": 4.380130597014925e-05, "loss": 0.0, "step": 53160 }, { "epoch": 49.59, "learning_rate": 4.3800839552238806e-05, "loss": 0.0001, "step": 53164 }, { "epoch": 49.6, "learning_rate": 4.380037313432836e-05, "loss": 0.0, "step": 53168 }, { "epoch": 49.6, "learning_rate": 4.379990671641791e-05, "loss": 0.0001, "step": 53172 }, { "epoch": 49.6, "learning_rate": 4.3799440298507464e-05, "loss": 0.0015, "step": 53176 }, { "epoch": 49.61, "learning_rate": 4.379897388059702e-05, "loss": 0.0, "step": 53180 }, { "epoch": 49.61, "learning_rate": 4.379850746268657e-05, "loss": 0.0, "step": 53184 }, { "epoch": 49.62, "learning_rate": 4.379804104477612e-05, "loss": 0.0, "step": 53188 }, { "epoch": 49.62, "learning_rate": 4.379757462686567e-05, "loss": 0.0007, "step": 53192 }, { "epoch": 49.62, "learning_rate": 4.379710820895523e-05, "loss": 0.0007, "step": 53196 }, { "epoch": 49.63, "learning_rate": 4.379664179104478e-05, "loss": 0.0, "step": 53200 }, { "epoch": 49.63, "learning_rate": 4.379617537313433e-05, "loss": 0.0002, "step": 53204 }, { "epoch": 49.63, "learning_rate": 4.379570895522388e-05, "loss": 0.0001, "step": 53208 }, { "epoch": 49.64, "learning_rate": 4.379524253731344e-05, "loss": 0.0003, "step": 53212 }, { "epoch": 49.64, "learning_rate": 4.3794776119402986e-05, "loss": 0.0, "step": 53216 }, { "epoch": 49.65, "learning_rate": 4.379430970149254e-05, "loss": 0.0, "step": 53220 }, { "epoch": 49.65, "learning_rate": 4.379384328358209e-05, "loss": 0.0, "step": 53224 }, { "epoch": 49.65, "learning_rate": 4.3793376865671644e-05, "loss": 0.0, "step": 53228 }, { "epoch": 49.66, "learning_rate": 4.37929104477612e-05, "loss": 0.0, "step": 53232 }, { "epoch": 49.66, "learning_rate": 4.379244402985075e-05, "loss": 0.0001, "step": 53236 }, { "epoch": 49.66, "learning_rate": 4.37919776119403e-05, "loss": 0.001, "step": 53240 }, { "epoch": 49.67, "learning_rate": 4.379151119402986e-05, "loss": 0.0, "step": 53244 }, { "epoch": 49.67, "learning_rate": 4.3791044776119405e-05, "loss": 0.0, "step": 53248 }, { "epoch": 49.68, "learning_rate": 4.379057835820895e-05, "loss": 0.0005, "step": 53252 }, { "epoch": 49.68, "learning_rate": 4.3790111940298515e-05, "loss": 0.0, "step": 53256 }, { "epoch": 49.68, "learning_rate": 4.378964552238806e-05, "loss": 0.0001, "step": 53260 }, { "epoch": 49.69, "learning_rate": 4.378917910447761e-05, "loss": 0.0, "step": 53264 }, { "epoch": 49.69, "learning_rate": 4.3788712686567166e-05, "loss": 0.0, "step": 53268 }, { "epoch": 49.69, "learning_rate": 4.378824626865672e-05, "loss": 0.0, "step": 53272 }, { "epoch": 49.7, "learning_rate": 4.378777985074627e-05, "loss": 0.0021, "step": 53276 }, { "epoch": 49.7, "learning_rate": 4.3787313432835824e-05, "loss": 0.0, "step": 53280 }, { "epoch": 49.71, "learning_rate": 4.378684701492537e-05, "loss": 0.0, "step": 53284 }, { "epoch": 49.71, "learning_rate": 4.378638059701493e-05, "loss": 0.0001, "step": 53288 }, { "epoch": 49.71, "learning_rate": 4.378591417910448e-05, "loss": 0.0, "step": 53292 }, { "epoch": 49.72, "learning_rate": 4.378544776119403e-05, "loss": 0.0, "step": 53296 }, { "epoch": 49.72, "learning_rate": 4.3784981343283585e-05, "loss": 0.0, "step": 53300 }, { "epoch": 49.72, "learning_rate": 4.378451492537314e-05, "loss": 0.0, "step": 53304 }, { "epoch": 49.73, "learning_rate": 4.378404850746269e-05, "loss": 0.0, "step": 53308 }, { "epoch": 49.73, "learning_rate": 4.3783582089552236e-05, "loss": 0.0, "step": 53312 }, { "epoch": 49.73, "learning_rate": 4.37831156716418e-05, "loss": 0.0, "step": 53316 }, { "epoch": 49.74, "learning_rate": 4.3782649253731346e-05, "loss": 0.0015, "step": 53320 }, { "epoch": 49.74, "learning_rate": 4.3782182835820894e-05, "loss": 0.0, "step": 53324 }, { "epoch": 49.75, "learning_rate": 4.378171641791045e-05, "loss": 0.0002, "step": 53328 }, { "epoch": 49.75, "learning_rate": 4.3781250000000004e-05, "loss": 0.0, "step": 53332 }, { "epoch": 49.75, "learning_rate": 4.378078358208955e-05, "loss": 0.001, "step": 53336 }, { "epoch": 49.76, "learning_rate": 4.378031716417911e-05, "loss": 0.0, "step": 53340 }, { "epoch": 49.76, "learning_rate": 4.3779850746268655e-05, "loss": 0.0007, "step": 53344 }, { "epoch": 49.76, "learning_rate": 4.377938432835821e-05, "loss": 0.0, "step": 53348 }, { "epoch": 49.77, "learning_rate": 4.3778917910447765e-05, "loss": 0.0, "step": 53352 }, { "epoch": 49.77, "learning_rate": 4.377845149253731e-05, "loss": 0.013, "step": 53356 }, { "epoch": 49.78, "learning_rate": 4.377798507462687e-05, "loss": 0.0007, "step": 53360 }, { "epoch": 49.78, "learning_rate": 4.377751865671642e-05, "loss": 0.0, "step": 53364 }, { "epoch": 49.78, "learning_rate": 4.377705223880597e-05, "loss": 0.0, "step": 53368 }, { "epoch": 49.79, "learning_rate": 4.3776585820895526e-05, "loss": 0.0, "step": 53372 }, { "epoch": 49.79, "learning_rate": 4.377611940298508e-05, "loss": 0.0, "step": 53376 }, { "epoch": 49.79, "learning_rate": 4.377565298507463e-05, "loss": 0.0, "step": 53380 }, { "epoch": 49.8, "learning_rate": 4.3775186567164184e-05, "loss": 0.0, "step": 53384 }, { "epoch": 49.8, "learning_rate": 4.377472014925373e-05, "loss": 0.0006, "step": 53388 }, { "epoch": 49.81, "learning_rate": 4.377425373134329e-05, "loss": 0.0032, "step": 53392 }, { "epoch": 49.81, "learning_rate": 4.377378731343284e-05, "loss": 0.0001, "step": 53396 }, { "epoch": 49.81, "learning_rate": 4.377332089552239e-05, "loss": 0.0, "step": 53400 }, { "epoch": 49.82, "learning_rate": 4.377285447761194e-05, "loss": 0.0, "step": 53404 }, { "epoch": 49.82, "learning_rate": 4.37723880597015e-05, "loss": 0.0, "step": 53408 }, { "epoch": 49.82, "learning_rate": 4.377192164179105e-05, "loss": 0.0001, "step": 53412 }, { "epoch": 49.83, "learning_rate": 4.3771455223880596e-05, "loss": 0.0, "step": 53416 }, { "epoch": 49.83, "learning_rate": 4.377098880597015e-05, "loss": 0.0, "step": 53420 }, { "epoch": 49.84, "learning_rate": 4.3770522388059706e-05, "loss": 0.0, "step": 53424 }, { "epoch": 49.84, "learning_rate": 4.3770055970149254e-05, "loss": 0.0, "step": 53428 }, { "epoch": 49.84, "learning_rate": 4.376958955223881e-05, "loss": 0.0, "step": 53432 }, { "epoch": 49.85, "learning_rate": 4.3769123134328364e-05, "loss": 0.0, "step": 53436 }, { "epoch": 49.85, "learning_rate": 4.376865671641791e-05, "loss": 0.0, "step": 53440 }, { "epoch": 49.85, "learning_rate": 4.3768190298507467e-05, "loss": 0.0021, "step": 53444 }, { "epoch": 49.86, "learning_rate": 4.3767723880597015e-05, "loss": 0.0, "step": 53448 }, { "epoch": 49.86, "learning_rate": 4.376725746268657e-05, "loss": 0.0002, "step": 53452 }, { "epoch": 49.87, "learning_rate": 4.3766791044776125e-05, "loss": 0.0021, "step": 53456 }, { "epoch": 49.87, "learning_rate": 4.376632462686567e-05, "loss": 0.0, "step": 53460 }, { "epoch": 49.87, "learning_rate": 4.376585820895522e-05, "loss": 0.0, "step": 53464 }, { "epoch": 49.88, "learning_rate": 4.376539179104478e-05, "loss": 0.0001, "step": 53468 }, { "epoch": 49.88, "learning_rate": 4.376492537313433e-05, "loss": 0.0, "step": 53472 }, { "epoch": 49.88, "learning_rate": 4.376445895522388e-05, "loss": 0.0, "step": 53476 }, { "epoch": 49.89, "learning_rate": 4.3763992537313434e-05, "loss": 0.0, "step": 53480 }, { "epoch": 49.89, "learning_rate": 4.376352611940299e-05, "loss": 0.0005, "step": 53484 }, { "epoch": 49.9, "learning_rate": 4.376305970149254e-05, "loss": 0.0, "step": 53488 }, { "epoch": 49.9, "learning_rate": 4.376259328358209e-05, "loss": 0.0, "step": 53492 }, { "epoch": 49.9, "learning_rate": 4.3762126865671646e-05, "loss": 0.0, "step": 53496 }, { "epoch": 49.91, "learning_rate": 4.3761660447761195e-05, "loss": 0.0, "step": 53500 }, { "epoch": 49.91, "eval_exact_match": 0.7495164410058027, "eval_exec": 0.7833655705996132, "eval_loss": 0.4703017771244049, "eval_runtime": 1159.4214, "eval_samples_per_second": 0.892, "step": 53500 }, { "epoch": 49.91, "learning_rate": 4.376119402985075e-05, "loss": 0.001, "step": 53504 }, { "epoch": 49.91, "learning_rate": 4.37607276119403e-05, "loss": 0.0, "step": 53508 }, { "epoch": 49.92, "learning_rate": 4.376026119402985e-05, "loss": 0.0, "step": 53512 }, { "epoch": 49.92, "learning_rate": 4.375979477611941e-05, "loss": 0.0004, "step": 53516 }, { "epoch": 49.93, "learning_rate": 4.3759328358208956e-05, "loss": 0.0, "step": 53520 }, { "epoch": 49.93, "learning_rate": 4.375886194029851e-05, "loss": 0.0, "step": 53524 }, { "epoch": 49.93, "learning_rate": 4.3758395522388065e-05, "loss": 0.0, "step": 53528 }, { "epoch": 49.94, "learning_rate": 4.3757929104477614e-05, "loss": 0.0, "step": 53532 }, { "epoch": 49.94, "learning_rate": 4.375746268656717e-05, "loss": 0.0, "step": 53536 }, { "epoch": 49.94, "learning_rate": 4.3756996268656717e-05, "loss": 0.0023, "step": 53540 }, { "epoch": 49.95, "learning_rate": 4.375652985074627e-05, "loss": 0.0001, "step": 53544 }, { "epoch": 49.95, "learning_rate": 4.3756063432835826e-05, "loss": 0.0, "step": 53548 }, { "epoch": 49.96, "learning_rate": 4.3755597014925375e-05, "loss": 0.0, "step": 53552 }, { "epoch": 49.96, "learning_rate": 4.375513059701493e-05, "loss": 0.0, "step": 53556 }, { "epoch": 49.96, "learning_rate": 4.3754664179104484e-05, "loss": 0.0001, "step": 53560 }, { "epoch": 49.97, "learning_rate": 4.375419776119403e-05, "loss": 0.0001, "step": 53564 }, { "epoch": 49.97, "learning_rate": 4.375373134328358e-05, "loss": 0.0, "step": 53568 }, { "epoch": 49.97, "learning_rate": 4.3753264925373136e-05, "loss": 0.0001, "step": 53572 }, { "epoch": 49.98, "learning_rate": 4.375279850746269e-05, "loss": 0.0002, "step": 53576 }, { "epoch": 49.98, "learning_rate": 4.375233208955224e-05, "loss": 0.0003, "step": 53580 }, { "epoch": 49.98, "learning_rate": 4.3751865671641793e-05, "loss": 0.0001, "step": 53584 }, { "epoch": 49.99, "learning_rate": 4.375139925373135e-05, "loss": 0.0024, "step": 53588 }, { "epoch": 49.99, "learning_rate": 4.3750932835820897e-05, "loss": 0.0, "step": 53592 }, { "epoch": 50.0, "learning_rate": 4.375046641791045e-05, "loss": 0.0, "step": 53596 }, { "epoch": 50.0, "learning_rate": 4.375e-05, "loss": 0.0, "step": 53600 }, { "epoch": 50.0, "learning_rate": 4.3749533582089554e-05, "loss": 0.0, "step": 53604 }, { "epoch": 50.01, "learning_rate": 4.374906716417911e-05, "loss": 0.0, "step": 53608 }, { "epoch": 50.01, "learning_rate": 4.374860074626866e-05, "loss": 0.0008, "step": 53612 }, { "epoch": 50.01, "learning_rate": 4.3748134328358206e-05, "loss": 0.0, "step": 53616 }, { "epoch": 50.02, "learning_rate": 4.374766791044777e-05, "loss": 0.0003, "step": 53620 }, { "epoch": 50.02, "learning_rate": 4.3747201492537315e-05, "loss": 0.0001, "step": 53624 }, { "epoch": 50.03, "learning_rate": 4.3746735074626864e-05, "loss": 0.003, "step": 53628 }, { "epoch": 50.03, "learning_rate": 4.374626865671642e-05, "loss": 0.0002, "step": 53632 }, { "epoch": 50.03, "learning_rate": 4.3745802238805973e-05, "loss": 0.0001, "step": 53636 }, { "epoch": 50.04, "learning_rate": 4.374533582089552e-05, "loss": 0.0, "step": 53640 }, { "epoch": 50.04, "learning_rate": 4.3744869402985076e-05, "loss": 0.0003, "step": 53644 }, { "epoch": 50.04, "learning_rate": 4.374440298507463e-05, "loss": 0.0, "step": 53648 }, { "epoch": 50.05, "learning_rate": 4.374393656716418e-05, "loss": 0.0, "step": 53652 }, { "epoch": 50.05, "learning_rate": 4.3743470149253734e-05, "loss": 0.0, "step": 53656 }, { "epoch": 50.06, "learning_rate": 4.374300373134328e-05, "loss": 0.0002, "step": 53660 }, { "epoch": 50.06, "learning_rate": 4.374253731343284e-05, "loss": 0.0001, "step": 53664 }, { "epoch": 50.06, "learning_rate": 4.374207089552239e-05, "loss": 0.0005, "step": 53668 }, { "epoch": 50.07, "learning_rate": 4.374160447761194e-05, "loss": 0.0001, "step": 53672 }, { "epoch": 50.07, "learning_rate": 4.374113805970149e-05, "loss": 0.0001, "step": 53676 }, { "epoch": 50.07, "learning_rate": 4.374067164179105e-05, "loss": 0.0, "step": 53680 }, { "epoch": 50.08, "learning_rate": 4.37402052238806e-05, "loss": 0.006, "step": 53684 }, { "epoch": 50.08, "learning_rate": 4.373973880597015e-05, "loss": 0.0, "step": 53688 }, { "epoch": 50.09, "learning_rate": 4.37392723880597e-05, "loss": 0.0, "step": 53692 }, { "epoch": 50.09, "learning_rate": 4.3738805970149256e-05, "loss": 0.0001, "step": 53696 }, { "epoch": 50.09, "learning_rate": 4.373833955223881e-05, "loss": 0.0004, "step": 53700 }, { "epoch": 50.1, "learning_rate": 4.373787313432836e-05, "loss": 0.0, "step": 53704 }, { "epoch": 50.1, "learning_rate": 4.3737406716417914e-05, "loss": 0.0, "step": 53708 }, { "epoch": 50.1, "learning_rate": 4.373694029850747e-05, "loss": 0.0, "step": 53712 }, { "epoch": 50.11, "learning_rate": 4.373647388059702e-05, "loss": 0.0001, "step": 53716 }, { "epoch": 50.11, "learning_rate": 4.3736007462686565e-05, "loss": 0.0, "step": 53720 }, { "epoch": 50.12, "learning_rate": 4.373554104477613e-05, "loss": 0.0, "step": 53724 }, { "epoch": 50.12, "learning_rate": 4.3735074626865675e-05, "loss": 0.0, "step": 53728 }, { "epoch": 50.12, "learning_rate": 4.3734608208955223e-05, "loss": 0.0001, "step": 53732 }, { "epoch": 50.13, "learning_rate": 4.373414179104478e-05, "loss": 0.0022, "step": 53736 }, { "epoch": 50.13, "learning_rate": 4.373367537313433e-05, "loss": 0.0, "step": 53740 }, { "epoch": 50.13, "learning_rate": 4.373320895522388e-05, "loss": 0.0, "step": 53744 }, { "epoch": 50.14, "learning_rate": 4.3732742537313436e-05, "loss": 0.0, "step": 53748 }, { "epoch": 50.14, "learning_rate": 4.3732276119402984e-05, "loss": 0.0001, "step": 53752 }, { "epoch": 50.15, "learning_rate": 4.373180970149254e-05, "loss": 0.0, "step": 53756 }, { "epoch": 50.15, "learning_rate": 4.3731343283582094e-05, "loss": 0.0001, "step": 53760 }, { "epoch": 50.15, "learning_rate": 4.373087686567164e-05, "loss": 0.0001, "step": 53764 }, { "epoch": 50.16, "learning_rate": 4.37304104477612e-05, "loss": 0.0, "step": 53768 }, { "epoch": 50.16, "learning_rate": 4.372994402985075e-05, "loss": 0.0002, "step": 53772 }, { "epoch": 50.16, "learning_rate": 4.37294776119403e-05, "loss": 0.0, "step": 53776 }, { "epoch": 50.17, "learning_rate": 4.372901119402985e-05, "loss": 0.0, "step": 53780 }, { "epoch": 50.17, "learning_rate": 4.37285447761194e-05, "loss": 0.0, "step": 53784 }, { "epoch": 50.18, "learning_rate": 4.372807835820896e-05, "loss": 0.0, "step": 53788 }, { "epoch": 50.18, "learning_rate": 4.3727611940298506e-05, "loss": 0.0001, "step": 53792 }, { "epoch": 50.18, "learning_rate": 4.372714552238806e-05, "loss": 0.0002, "step": 53796 }, { "epoch": 50.19, "learning_rate": 4.3726679104477616e-05, "loss": 0.0003, "step": 53800 }, { "epoch": 50.19, "learning_rate": 4.3726212686567164e-05, "loss": 0.0, "step": 53804 }, { "epoch": 50.19, "learning_rate": 4.372574626865672e-05, "loss": 0.0006, "step": 53808 }, { "epoch": 50.2, "learning_rate": 4.372527985074627e-05, "loss": 0.0, "step": 53812 }, { "epoch": 50.2, "learning_rate": 4.372481343283582e-05, "loss": 0.0, "step": 53816 }, { "epoch": 50.21, "learning_rate": 4.372434701492538e-05, "loss": 0.0003, "step": 53820 }, { "epoch": 50.21, "learning_rate": 4.3723880597014925e-05, "loss": 0.0001, "step": 53824 }, { "epoch": 50.21, "learning_rate": 4.372341417910448e-05, "loss": 0.0001, "step": 53828 }, { "epoch": 50.22, "learning_rate": 4.3722947761194035e-05, "loss": 0.0, "step": 53832 }, { "epoch": 50.22, "learning_rate": 4.372248134328358e-05, "loss": 0.0003, "step": 53836 }, { "epoch": 50.22, "learning_rate": 4.372201492537313e-05, "loss": 0.0001, "step": 53840 }, { "epoch": 50.23, "learning_rate": 4.3721548507462686e-05, "loss": 0.0, "step": 53844 }, { "epoch": 50.23, "learning_rate": 4.372108208955224e-05, "loss": 0.0026, "step": 53848 }, { "epoch": 50.24, "learning_rate": 4.3720615671641796e-05, "loss": 0.0, "step": 53852 }, { "epoch": 50.24, "learning_rate": 4.3720149253731344e-05, "loss": 0.0017, "step": 53856 }, { "epoch": 50.24, "learning_rate": 4.37196828358209e-05, "loss": 0.0, "step": 53860 }, { "epoch": 50.25, "learning_rate": 4.3719216417910454e-05, "loss": 0.0, "step": 53864 }, { "epoch": 50.25, "learning_rate": 4.371875e-05, "loss": 0.0001, "step": 53868 }, { "epoch": 50.25, "learning_rate": 4.371828358208955e-05, "loss": 0.0, "step": 53872 }, { "epoch": 50.26, "learning_rate": 4.371781716417911e-05, "loss": 0.0003, "step": 53876 }, { "epoch": 50.26, "learning_rate": 4.371735074626866e-05, "loss": 0.0, "step": 53880 }, { "epoch": 50.26, "learning_rate": 4.371688432835821e-05, "loss": 0.0001, "step": 53884 }, { "epoch": 50.27, "learning_rate": 4.371641791044776e-05, "loss": 0.0, "step": 53888 }, { "epoch": 50.27, "learning_rate": 4.371595149253732e-05, "loss": 0.0, "step": 53892 }, { "epoch": 50.28, "learning_rate": 4.3715485074626866e-05, "loss": 0.0001, "step": 53896 }, { "epoch": 50.28, "learning_rate": 4.371501865671642e-05, "loss": 0.0001, "step": 53900 }, { "epoch": 50.28, "learning_rate": 4.371455223880597e-05, "loss": 0.0001, "step": 53904 }, { "epoch": 50.29, "learning_rate": 4.3714085820895524e-05, "loss": 0.0001, "step": 53908 }, { "epoch": 50.29, "learning_rate": 4.371361940298508e-05, "loss": 0.0, "step": 53912 }, { "epoch": 50.29, "learning_rate": 4.371315298507463e-05, "loss": 0.0006, "step": 53916 }, { "epoch": 50.3, "learning_rate": 4.371268656716418e-05, "loss": 0.0, "step": 53920 }, { "epoch": 50.3, "learning_rate": 4.371222014925374e-05, "loss": 0.0003, "step": 53924 }, { "epoch": 50.31, "learning_rate": 4.3711753731343285e-05, "loss": 0.0, "step": 53928 }, { "epoch": 50.31, "learning_rate": 4.371128731343283e-05, "loss": 0.0, "step": 53932 }, { "epoch": 50.31, "learning_rate": 4.3710820895522395e-05, "loss": 0.0071, "step": 53936 }, { "epoch": 50.32, "learning_rate": 4.371035447761194e-05, "loss": 0.0003, "step": 53940 }, { "epoch": 50.32, "learning_rate": 4.370988805970149e-05, "loss": 0.0, "step": 53944 }, { "epoch": 50.32, "learning_rate": 4.3709421641791046e-05, "loss": 0.0, "step": 53948 }, { "epoch": 50.33, "learning_rate": 4.37089552238806e-05, "loss": 0.0017, "step": 53952 }, { "epoch": 50.33, "learning_rate": 4.370848880597015e-05, "loss": 0.0029, "step": 53956 }, { "epoch": 50.34, "learning_rate": 4.3708022388059704e-05, "loss": 0.0, "step": 53960 }, { "epoch": 50.34, "learning_rate": 4.370755597014925e-05, "loss": 0.0, "step": 53964 }, { "epoch": 50.34, "learning_rate": 4.370708955223881e-05, "loss": 0.0, "step": 53968 }, { "epoch": 50.35, "learning_rate": 4.370662313432836e-05, "loss": 0.0, "step": 53972 }, { "epoch": 50.35, "learning_rate": 4.370615671641791e-05, "loss": 0.0, "step": 53976 }, { "epoch": 50.35, "learning_rate": 4.3705690298507465e-05, "loss": 0.0004, "step": 53980 }, { "epoch": 50.36, "learning_rate": 4.370522388059702e-05, "loss": 0.0, "step": 53984 }, { "epoch": 50.36, "learning_rate": 4.370475746268657e-05, "loss": 0.001, "step": 53988 }, { "epoch": 50.37, "learning_rate": 4.3704291044776116e-05, "loss": 0.0, "step": 53992 }, { "epoch": 50.37, "learning_rate": 4.370382462686568e-05, "loss": 0.0, "step": 53996 }, { "epoch": 50.37, "learning_rate": 4.3703358208955226e-05, "loss": 0.0, "step": 54000 }, { "epoch": 50.37, "eval_exact_match": 0.746615087040619, "eval_exec": 0.7669245647969052, "eval_loss": 0.46396273374557495, "eval_runtime": 1103.7037, "eval_samples_per_second": 0.937, "step": 54000 }, { "epoch": 50.38, "learning_rate": 4.3702891791044774e-05, "loss": 0.0, "step": 54004 }, { "epoch": 50.38, "learning_rate": 4.370242537313433e-05, "loss": 0.0, "step": 54008 }, { "epoch": 50.38, "learning_rate": 4.3701958955223884e-05, "loss": 0.0002, "step": 54012 }, { "epoch": 50.39, "learning_rate": 4.370149253731344e-05, "loss": 0.0, "step": 54016 }, { "epoch": 50.39, "learning_rate": 4.370102611940299e-05, "loss": 0.0011, "step": 54020 }, { "epoch": 50.4, "learning_rate": 4.3700559701492535e-05, "loss": 0.0, "step": 54024 }, { "epoch": 50.4, "learning_rate": 4.37000932835821e-05, "loss": 0.0, "step": 54028 }, { "epoch": 50.4, "learning_rate": 4.3699626865671645e-05, "loss": 0.0, "step": 54032 }, { "epoch": 50.41, "learning_rate": 4.369916044776119e-05, "loss": 0.0, "step": 54036 }, { "epoch": 50.41, "learning_rate": 4.369869402985075e-05, "loss": 0.0, "step": 54040 }, { "epoch": 50.41, "learning_rate": 4.36982276119403e-05, "loss": 0.0001, "step": 54044 }, { "epoch": 50.42, "learning_rate": 4.369776119402985e-05, "loss": 0.0, "step": 54048 }, { "epoch": 50.42, "learning_rate": 4.3697294776119406e-05, "loss": 0.0, "step": 54052 }, { "epoch": 50.43, "learning_rate": 4.369682835820896e-05, "loss": 0.0, "step": 54056 }, { "epoch": 50.43, "learning_rate": 4.369636194029851e-05, "loss": 0.0004, "step": 54060 }, { "epoch": 50.43, "learning_rate": 4.3695895522388064e-05, "loss": 0.0, "step": 54064 }, { "epoch": 50.44, "learning_rate": 4.369542910447761e-05, "loss": 0.0, "step": 54068 }, { "epoch": 50.44, "learning_rate": 4.369496268656717e-05, "loss": 0.0004, "step": 54072 }, { "epoch": 50.44, "learning_rate": 4.369449626865672e-05, "loss": 0.0001, "step": 54076 }, { "epoch": 50.45, "learning_rate": 4.369402985074627e-05, "loss": 0.0, "step": 54080 }, { "epoch": 50.45, "learning_rate": 4.369356343283582e-05, "loss": 0.0, "step": 54084 }, { "epoch": 50.46, "learning_rate": 4.369309701492538e-05, "loss": 0.0, "step": 54088 }, { "epoch": 50.46, "learning_rate": 4.369263059701493e-05, "loss": 0.0, "step": 54092 }, { "epoch": 50.46, "learning_rate": 4.3692164179104476e-05, "loss": 0.0001, "step": 54096 }, { "epoch": 50.47, "learning_rate": 4.369169776119403e-05, "loss": 0.0, "step": 54100 }, { "epoch": 50.47, "learning_rate": 4.3691231343283586e-05, "loss": 0.0, "step": 54104 }, { "epoch": 50.47, "learning_rate": 4.3690764925373134e-05, "loss": 0.0, "step": 54108 }, { "epoch": 50.48, "learning_rate": 4.369029850746269e-05, "loss": 0.0003, "step": 54112 }, { "epoch": 50.48, "learning_rate": 4.3689832089552244e-05, "loss": 0.0, "step": 54116 }, { "epoch": 50.49, "learning_rate": 4.368936567164179e-05, "loss": 0.0, "step": 54120 }, { "epoch": 50.49, "learning_rate": 4.368889925373135e-05, "loss": 0.0009, "step": 54124 }, { "epoch": 50.49, "learning_rate": 4.3688432835820895e-05, "loss": 0.0, "step": 54128 }, { "epoch": 50.5, "learning_rate": 4.368796641791045e-05, "loss": 0.0001, "step": 54132 }, { "epoch": 50.5, "learning_rate": 4.3687500000000005e-05, "loss": 0.0, "step": 54136 }, { "epoch": 50.5, "learning_rate": 4.368703358208955e-05, "loss": 0.0003, "step": 54140 }, { "epoch": 50.51, "learning_rate": 4.36865671641791e-05, "loss": 0.0001, "step": 54144 }, { "epoch": 50.51, "learning_rate": 4.368610074626866e-05, "loss": 0.0003, "step": 54148 }, { "epoch": 50.51, "learning_rate": 4.368563432835821e-05, "loss": 0.0005, "step": 54152 }, { "epoch": 50.52, "learning_rate": 4.368516791044776e-05, "loss": 0.0, "step": 54156 }, { "epoch": 50.52, "learning_rate": 4.3684701492537314e-05, "loss": 0.0, "step": 54160 }, { "epoch": 50.53, "learning_rate": 4.368423507462687e-05, "loss": 0.0, "step": 54164 }, { "epoch": 50.53, "learning_rate": 4.368376865671642e-05, "loss": 0.0004, "step": 54168 }, { "epoch": 50.53, "learning_rate": 4.368330223880597e-05, "loss": 0.0002, "step": 54172 }, { "epoch": 50.54, "learning_rate": 4.368283582089553e-05, "loss": 0.0, "step": 54176 }, { "epoch": 50.54, "learning_rate": 4.368236940298508e-05, "loss": 0.0003, "step": 54180 }, { "epoch": 50.54, "learning_rate": 4.368190298507463e-05, "loss": 0.0, "step": 54184 }, { "epoch": 50.55, "learning_rate": 4.368143656716418e-05, "loss": 0.0, "step": 54188 }, { "epoch": 50.55, "learning_rate": 4.368097014925373e-05, "loss": 0.0, "step": 54192 }, { "epoch": 50.56, "learning_rate": 4.368050373134329e-05, "loss": 0.0, "step": 54196 }, { "epoch": 50.56, "learning_rate": 4.3680037313432836e-05, "loss": 0.0, "step": 54200 }, { "epoch": 50.56, "learning_rate": 4.367957089552239e-05, "loss": 0.0141, "step": 54204 }, { "epoch": 50.57, "learning_rate": 4.3679104477611946e-05, "loss": 0.0001, "step": 54208 }, { "epoch": 50.57, "learning_rate": 4.3678638059701494e-05, "loss": 0.0006, "step": 54212 }, { "epoch": 50.57, "learning_rate": 4.367817164179105e-05, "loss": 0.0, "step": 54216 }, { "epoch": 50.58, "learning_rate": 4.36777052238806e-05, "loss": 0.0, "step": 54220 }, { "epoch": 50.58, "learning_rate": 4.367723880597015e-05, "loss": 0.0, "step": 54224 }, { "epoch": 50.59, "learning_rate": 4.367677238805971e-05, "loss": 0.0, "step": 54228 }, { "epoch": 50.59, "learning_rate": 4.3676305970149255e-05, "loss": 0.0, "step": 54232 }, { "epoch": 50.59, "learning_rate": 4.367583955223881e-05, "loss": 0.0, "step": 54236 }, { "epoch": 50.6, "learning_rate": 4.3675373134328365e-05, "loss": 0.0006, "step": 54240 }, { "epoch": 50.6, "learning_rate": 4.367490671641791e-05, "loss": 0.0, "step": 54244 }, { "epoch": 50.6, "learning_rate": 4.367444029850746e-05, "loss": 0.0001, "step": 54248 }, { "epoch": 50.61, "learning_rate": 4.3673973880597016e-05, "loss": 0.0, "step": 54252 }, { "epoch": 50.61, "learning_rate": 4.367350746268657e-05, "loss": 0.0002, "step": 54256 }, { "epoch": 50.62, "learning_rate": 4.367304104477612e-05, "loss": 0.0, "step": 54260 }, { "epoch": 50.62, "learning_rate": 4.3672574626865674e-05, "loss": 0.0, "step": 54264 }, { "epoch": 50.62, "learning_rate": 4.367210820895523e-05, "loss": 0.0, "step": 54268 }, { "epoch": 50.63, "learning_rate": 4.367164179104478e-05, "loss": 0.0001, "step": 54272 }, { "epoch": 50.63, "learning_rate": 4.367117537313433e-05, "loss": 0.0057, "step": 54276 }, { "epoch": 50.63, "learning_rate": 4.367070895522388e-05, "loss": 0.0004, "step": 54280 }, { "epoch": 50.64, "learning_rate": 4.3670242537313435e-05, "loss": 0.0, "step": 54284 }, { "epoch": 50.64, "learning_rate": 4.366977611940299e-05, "loss": 0.0, "step": 54288 }, { "epoch": 50.65, "learning_rate": 4.366930970149254e-05, "loss": 0.0006, "step": 54292 }, { "epoch": 50.65, "learning_rate": 4.366884328358209e-05, "loss": 0.0, "step": 54296 }, { "epoch": 50.65, "learning_rate": 4.366837686567165e-05, "loss": 0.0, "step": 54300 }, { "epoch": 50.66, "learning_rate": 4.3667910447761196e-05, "loss": 0.0, "step": 54304 }, { "epoch": 50.66, "learning_rate": 4.3667444029850744e-05, "loss": 0.0001, "step": 54308 }, { "epoch": 50.66, "learning_rate": 4.36669776119403e-05, "loss": 0.0012, "step": 54312 }, { "epoch": 50.67, "learning_rate": 4.3666511194029854e-05, "loss": 0.0, "step": 54316 }, { "epoch": 50.67, "learning_rate": 4.36660447761194e-05, "loss": 0.0, "step": 54320 }, { "epoch": 50.68, "learning_rate": 4.366557835820896e-05, "loss": 0.0009, "step": 54324 }, { "epoch": 50.68, "learning_rate": 4.366511194029851e-05, "loss": 0.0001, "step": 54328 }, { "epoch": 50.68, "learning_rate": 4.366464552238806e-05, "loss": 0.0, "step": 54332 }, { "epoch": 50.69, "learning_rate": 4.3664179104477615e-05, "loss": 0.0, "step": 54336 }, { "epoch": 50.69, "learning_rate": 4.366371268656716e-05, "loss": 0.0, "step": 54340 }, { "epoch": 50.69, "learning_rate": 4.3663246268656724e-05, "loss": 0.0, "step": 54344 }, { "epoch": 50.7, "learning_rate": 4.366277985074627e-05, "loss": 0.0, "step": 54348 }, { "epoch": 50.7, "learning_rate": 4.366231343283582e-05, "loss": 0.0002, "step": 54352 }, { "epoch": 50.71, "learning_rate": 4.3661847014925376e-05, "loss": 0.0, "step": 54356 }, { "epoch": 50.71, "learning_rate": 4.366138059701493e-05, "loss": 0.0, "step": 54360 }, { "epoch": 50.71, "learning_rate": 4.366091417910448e-05, "loss": 0.0, "step": 54364 }, { "epoch": 50.72, "learning_rate": 4.3660447761194034e-05, "loss": 0.0, "step": 54368 }, { "epoch": 50.72, "learning_rate": 4.365998134328358e-05, "loss": 0.0, "step": 54372 }, { "epoch": 50.72, "learning_rate": 4.365951492537314e-05, "loss": 0.0, "step": 54376 }, { "epoch": 50.73, "learning_rate": 4.365904850746269e-05, "loss": 0.0001, "step": 54380 }, { "epoch": 50.73, "learning_rate": 4.365858208955224e-05, "loss": 0.0034, "step": 54384 }, { "epoch": 50.73, "learning_rate": 4.3658115671641795e-05, "loss": 0.0045, "step": 54388 }, { "epoch": 50.74, "learning_rate": 4.365764925373135e-05, "loss": 0.0, "step": 54392 }, { "epoch": 50.74, "learning_rate": 4.36571828358209e-05, "loss": 0.0, "step": 54396 }, { "epoch": 50.75, "learning_rate": 4.3656716417910446e-05, "loss": 0.0016, "step": 54400 }, { "epoch": 50.75, "learning_rate": 4.365625000000001e-05, "loss": 0.0011, "step": 54404 }, { "epoch": 50.75, "learning_rate": 4.3655783582089556e-05, "loss": 0.0026, "step": 54408 }, { "epoch": 50.76, "learning_rate": 4.3655317164179104e-05, "loss": 0.0, "step": 54412 }, { "epoch": 50.76, "learning_rate": 4.365485074626866e-05, "loss": 0.0, "step": 54416 }, { "epoch": 50.76, "learning_rate": 4.3654384328358214e-05, "loss": 0.0004, "step": 54420 }, { "epoch": 50.77, "learning_rate": 4.365391791044776e-05, "loss": 0.0006, "step": 54424 }, { "epoch": 50.77, "learning_rate": 4.3653451492537317e-05, "loss": 0.0, "step": 54428 }, { "epoch": 50.78, "learning_rate": 4.3652985074626865e-05, "loss": 0.0001, "step": 54432 }, { "epoch": 50.78, "learning_rate": 4.365251865671642e-05, "loss": 0.0, "step": 54436 }, { "epoch": 50.78, "learning_rate": 4.3652052238805975e-05, "loss": 0.0, "step": 54440 }, { "epoch": 50.79, "learning_rate": 4.365158582089552e-05, "loss": 0.0, "step": 54444 }, { "epoch": 50.79, "learning_rate": 4.365111940298508e-05, "loss": 0.0013, "step": 54448 }, { "epoch": 50.79, "learning_rate": 4.365065298507463e-05, "loss": 0.0, "step": 54452 }, { "epoch": 50.8, "learning_rate": 4.365018656716418e-05, "loss": 0.0, "step": 54456 }, { "epoch": 50.8, "learning_rate": 4.364972014925373e-05, "loss": 0.0, "step": 54460 }, { "epoch": 50.81, "learning_rate": 4.3649253731343284e-05, "loss": 0.0, "step": 54464 }, { "epoch": 50.81, "learning_rate": 4.364878731343284e-05, "loss": 0.0014, "step": 54468 }, { "epoch": 50.81, "learning_rate": 4.364832089552239e-05, "loss": 0.0028, "step": 54472 }, { "epoch": 50.82, "learning_rate": 4.364785447761194e-05, "loss": 0.0, "step": 54476 }, { "epoch": 50.82, "learning_rate": 4.3647388059701496e-05, "loss": 0.0, "step": 54480 }, { "epoch": 50.82, "learning_rate": 4.3646921641791045e-05, "loss": 0.0002, "step": 54484 }, { "epoch": 50.83, "learning_rate": 4.36464552238806e-05, "loss": 0.0, "step": 54488 }, { "epoch": 50.83, "learning_rate": 4.364598880597015e-05, "loss": 0.0, "step": 54492 }, { "epoch": 50.84, "learning_rate": 4.36455223880597e-05, "loss": 0.0017, "step": 54496 }, { "epoch": 50.84, "learning_rate": 4.364505597014926e-05, "loss": 0.0037, "step": 54500 }, { "epoch": 50.84, "eval_exact_match": 0.7446808510638298, "eval_exec": 0.7756286266924565, "eval_loss": 0.45397448539733887, "eval_runtime": 1155.302, "eval_samples_per_second": 0.895, "step": 54500 }, { "epoch": 50.84, "learning_rate": 4.3644589552238806e-05, "loss": 0.0, "step": 54504 }, { "epoch": 50.85, "learning_rate": 4.364412313432836e-05, "loss": 0.0, "step": 54508 }, { "epoch": 50.85, "learning_rate": 4.3643656716417915e-05, "loss": 0.0, "step": 54512 }, { "epoch": 50.85, "learning_rate": 4.3643190298507464e-05, "loss": 0.0, "step": 54516 }, { "epoch": 50.86, "learning_rate": 4.364272388059702e-05, "loss": 0.0, "step": 54520 }, { "epoch": 50.86, "learning_rate": 4.3642257462686567e-05, "loss": 0.0, "step": 54524 }, { "epoch": 50.87, "learning_rate": 4.364179104477612e-05, "loss": 0.0001, "step": 54528 }, { "epoch": 50.87, "learning_rate": 4.3641324626865676e-05, "loss": 0.0, "step": 54532 }, { "epoch": 50.87, "learning_rate": 4.3640858208955225e-05, "loss": 0.0, "step": 54536 }, { "epoch": 50.88, "learning_rate": 4.364039179104478e-05, "loss": 0.0023, "step": 54540 }, { "epoch": 50.88, "learning_rate": 4.3639925373134334e-05, "loss": 0.0, "step": 54544 }, { "epoch": 50.88, "learning_rate": 4.363945895522388e-05, "loss": 0.0, "step": 54548 }, { "epoch": 50.89, "learning_rate": 4.363899253731343e-05, "loss": 0.0, "step": 54552 }, { "epoch": 50.89, "learning_rate": 4.363852611940299e-05, "loss": 0.0, "step": 54556 }, { "epoch": 50.9, "learning_rate": 4.363805970149254e-05, "loss": 0.0066, "step": 54560 }, { "epoch": 50.9, "learning_rate": 4.363759328358209e-05, "loss": 0.0, "step": 54564 }, { "epoch": 50.9, "learning_rate": 4.3637126865671643e-05, "loss": 0.0, "step": 54568 }, { "epoch": 50.91, "learning_rate": 4.36366604477612e-05, "loss": 0.0029, "step": 54572 }, { "epoch": 50.91, "learning_rate": 4.3636194029850746e-05, "loss": 0.0005, "step": 54576 }, { "epoch": 50.91, "learning_rate": 4.36357276119403e-05, "loss": 0.0, "step": 54580 }, { "epoch": 50.92, "learning_rate": 4.363526119402985e-05, "loss": 0.0, "step": 54584 }, { "epoch": 50.92, "learning_rate": 4.3634794776119404e-05, "loss": 0.0003, "step": 54588 }, { "epoch": 50.93, "learning_rate": 4.363432835820896e-05, "loss": 0.0001, "step": 54592 }, { "epoch": 50.93, "learning_rate": 4.363386194029851e-05, "loss": 0.0, "step": 54596 }, { "epoch": 50.93, "learning_rate": 4.363339552238806e-05, "loss": 0.0, "step": 54600 }, { "epoch": 50.94, "learning_rate": 4.363292910447762e-05, "loss": 0.0, "step": 54604 }, { "epoch": 50.94, "learning_rate": 4.3632462686567165e-05, "loss": 0.0032, "step": 54608 }, { "epoch": 50.94, "learning_rate": 4.3631996268656714e-05, "loss": 0.0, "step": 54612 }, { "epoch": 50.95, "learning_rate": 4.3631529850746275e-05, "loss": 0.0, "step": 54616 }, { "epoch": 50.95, "learning_rate": 4.363106343283582e-05, "loss": 0.0, "step": 54620 }, { "epoch": 50.96, "learning_rate": 4.363059701492537e-05, "loss": 0.0057, "step": 54624 }, { "epoch": 50.96, "learning_rate": 4.3630130597014926e-05, "loss": 0.0001, "step": 54628 }, { "epoch": 50.96, "learning_rate": 4.362966417910448e-05, "loss": 0.0, "step": 54632 }, { "epoch": 50.97, "learning_rate": 4.362919776119403e-05, "loss": 0.0001, "step": 54636 }, { "epoch": 50.97, "learning_rate": 4.3628731343283584e-05, "loss": 0.0, "step": 54640 }, { "epoch": 50.97, "learning_rate": 4.362826492537313e-05, "loss": 0.0, "step": 54644 }, { "epoch": 50.98, "learning_rate": 4.362779850746269e-05, "loss": 0.0001, "step": 54648 }, { "epoch": 50.98, "learning_rate": 4.362733208955224e-05, "loss": 0.0001, "step": 54652 }, { "epoch": 50.98, "learning_rate": 4.362686567164179e-05, "loss": 0.0, "step": 54656 }, { "epoch": 50.99, "learning_rate": 4.3626399253731345e-05, "loss": 0.0003, "step": 54660 }, { "epoch": 50.99, "learning_rate": 4.36259328358209e-05, "loss": 0.0, "step": 54664 }, { "epoch": 51.0, "learning_rate": 4.362546641791045e-05, "loss": 0.0, "step": 54668 }, { "epoch": 51.0, "learning_rate": 4.3625e-05, "loss": 0.0013, "step": 54672 }, { "epoch": 51.0, "learning_rate": 4.362453358208956e-05, "loss": 0.0006, "step": 54676 }, { "epoch": 51.01, "learning_rate": 4.3624067164179106e-05, "loss": 0.0006, "step": 54680 }, { "epoch": 51.01, "learning_rate": 4.362360074626866e-05, "loss": 0.0001, "step": 54684 }, { "epoch": 51.01, "learning_rate": 4.362313432835821e-05, "loss": 0.0001, "step": 54688 }, { "epoch": 51.02, "learning_rate": 4.3622667910447764e-05, "loss": 0.0001, "step": 54692 }, { "epoch": 51.02, "learning_rate": 4.362220149253732e-05, "loss": 0.0001, "step": 54696 }, { "epoch": 51.03, "learning_rate": 4.362173507462687e-05, "loss": 0.0, "step": 54700 }, { "epoch": 51.03, "learning_rate": 4.3621268656716415e-05, "loss": 0.0, "step": 54704 }, { "epoch": 51.03, "learning_rate": 4.362080223880598e-05, "loss": 0.0, "step": 54708 }, { "epoch": 51.04, "learning_rate": 4.3620335820895525e-05, "loss": 0.0001, "step": 54712 }, { "epoch": 51.04, "learning_rate": 4.3619869402985073e-05, "loss": 0.0023, "step": 54716 }, { "epoch": 51.04, "learning_rate": 4.361940298507463e-05, "loss": 0.0, "step": 54720 }, { "epoch": 51.05, "learning_rate": 4.361893656716418e-05, "loss": 0.0, "step": 54724 }, { "epoch": 51.05, "learning_rate": 4.361847014925373e-05, "loss": 0.0, "step": 54728 }, { "epoch": 51.06, "learning_rate": 4.3618003731343286e-05, "loss": 0.0, "step": 54732 }, { "epoch": 51.06, "learning_rate": 4.361753731343284e-05, "loss": 0.0, "step": 54736 }, { "epoch": 51.06, "learning_rate": 4.361707089552239e-05, "loss": 0.0002, "step": 54740 }, { "epoch": 51.07, "learning_rate": 4.3616604477611944e-05, "loss": 0.0007, "step": 54744 }, { "epoch": 51.07, "learning_rate": 4.361613805970149e-05, "loss": 0.0002, "step": 54748 }, { "epoch": 51.07, "learning_rate": 4.361567164179105e-05, "loss": 0.0005, "step": 54752 }, { "epoch": 51.08, "learning_rate": 4.36152052238806e-05, "loss": 0.0, "step": 54756 }, { "epoch": 51.08, "learning_rate": 4.361473880597015e-05, "loss": 0.0, "step": 54760 }, { "epoch": 51.09, "learning_rate": 4.36142723880597e-05, "loss": 0.0, "step": 54764 }, { "epoch": 51.09, "learning_rate": 4.361380597014926e-05, "loss": 0.0, "step": 54768 }, { "epoch": 51.09, "learning_rate": 4.361333955223881e-05, "loss": 0.0092, "step": 54772 }, { "epoch": 51.1, "learning_rate": 4.3612873134328356e-05, "loss": 0.0002, "step": 54776 }, { "epoch": 51.1, "learning_rate": 4.361240671641791e-05, "loss": 0.0003, "step": 54780 }, { "epoch": 51.1, "learning_rate": 4.3611940298507466e-05, "loss": 0.0001, "step": 54784 }, { "epoch": 51.11, "learning_rate": 4.3611473880597014e-05, "loss": 0.0, "step": 54788 }, { "epoch": 51.11, "learning_rate": 4.361100746268657e-05, "loss": 0.0, "step": 54792 }, { "epoch": 51.12, "learning_rate": 4.3610541044776124e-05, "loss": 0.0007, "step": 54796 }, { "epoch": 51.12, "learning_rate": 4.361007462686567e-05, "loss": 0.0, "step": 54800 }, { "epoch": 51.12, "learning_rate": 4.360960820895523e-05, "loss": 0.0001, "step": 54804 }, { "epoch": 51.13, "learning_rate": 4.3609141791044775e-05, "loss": 0.0, "step": 54808 }, { "epoch": 51.13, "learning_rate": 4.360867537313433e-05, "loss": 0.0, "step": 54812 }, { "epoch": 51.13, "learning_rate": 4.3608208955223885e-05, "loss": 0.0, "step": 54816 }, { "epoch": 51.14, "learning_rate": 4.360774253731343e-05, "loss": 0.0, "step": 54820 }, { "epoch": 51.14, "learning_rate": 4.360727611940298e-05, "loss": 0.0, "step": 54824 }, { "epoch": 51.15, "learning_rate": 4.360680970149254e-05, "loss": 0.0, "step": 54828 }, { "epoch": 51.15, "learning_rate": 4.360634328358209e-05, "loss": 0.0, "step": 54832 }, { "epoch": 51.15, "learning_rate": 4.3605876865671646e-05, "loss": 0.0012, "step": 54836 }, { "epoch": 51.16, "learning_rate": 4.3605410447761194e-05, "loss": 0.0001, "step": 54840 }, { "epoch": 51.16, "learning_rate": 4.360494402985075e-05, "loss": 0.0, "step": 54844 }, { "epoch": 51.16, "learning_rate": 4.3604477611940304e-05, "loss": 0.0, "step": 54848 }, { "epoch": 51.17, "learning_rate": 4.360401119402985e-05, "loss": 0.0, "step": 54852 }, { "epoch": 51.17, "learning_rate": 4.360354477611941e-05, "loss": 0.0, "step": 54856 }, { "epoch": 51.18, "learning_rate": 4.360307835820896e-05, "loss": 0.0, "step": 54860 }, { "epoch": 51.18, "learning_rate": 4.360261194029851e-05, "loss": 0.0003, "step": 54864 }, { "epoch": 51.18, "learning_rate": 4.360214552238806e-05, "loss": 0.0001, "step": 54868 }, { "epoch": 51.19, "learning_rate": 4.360167910447761e-05, "loss": 0.0, "step": 54872 }, { "epoch": 51.19, "learning_rate": 4.360121268656717e-05, "loss": 0.0001, "step": 54876 }, { "epoch": 51.19, "learning_rate": 4.3600746268656716e-05, "loss": 0.0009, "step": 54880 }, { "epoch": 51.2, "learning_rate": 4.360027985074627e-05, "loss": 0.0003, "step": 54884 }, { "epoch": 51.2, "learning_rate": 4.3599813432835826e-05, "loss": 0.0, "step": 54888 }, { "epoch": 51.21, "learning_rate": 4.3599347014925374e-05, "loss": 0.0001, "step": 54892 }, { "epoch": 51.21, "learning_rate": 4.359888059701493e-05, "loss": 0.0, "step": 54896 }, { "epoch": 51.21, "learning_rate": 4.359841417910448e-05, "loss": 0.0002, "step": 54900 }, { "epoch": 51.22, "learning_rate": 4.359794776119403e-05, "loss": 0.0, "step": 54904 }, { "epoch": 51.22, "learning_rate": 4.359748134328359e-05, "loss": 0.0, "step": 54908 }, { "epoch": 51.22, "learning_rate": 4.3597014925373135e-05, "loss": 0.0, "step": 54912 }, { "epoch": 51.23, "learning_rate": 4.359654850746269e-05, "loss": 0.0, "step": 54916 }, { "epoch": 51.23, "learning_rate": 4.3596082089552245e-05, "loss": 0.0, "step": 54920 }, { "epoch": 51.24, "learning_rate": 4.359561567164179e-05, "loss": 0.004, "step": 54924 }, { "epoch": 51.24, "learning_rate": 4.359514925373134e-05, "loss": 0.0, "step": 54928 }, { "epoch": 51.24, "learning_rate": 4.3594682835820896e-05, "loss": 0.0001, "step": 54932 }, { "epoch": 51.25, "learning_rate": 4.359421641791045e-05, "loss": 0.0001, "step": 54936 }, { "epoch": 51.25, "learning_rate": 4.359375e-05, "loss": 0.0, "step": 54940 }, { "epoch": 51.25, "learning_rate": 4.3593283582089554e-05, "loss": 0.0, "step": 54944 }, { "epoch": 51.26, "learning_rate": 4.359281716417911e-05, "loss": 0.0, "step": 54948 }, { "epoch": 51.26, "learning_rate": 4.359235074626866e-05, "loss": 0.0, "step": 54952 }, { "epoch": 51.26, "learning_rate": 4.359188432835821e-05, "loss": 0.0, "step": 54956 }, { "epoch": 51.27, "learning_rate": 4.359141791044776e-05, "loss": 0.0, "step": 54960 }, { "epoch": 51.27, "learning_rate": 4.3590951492537315e-05, "loss": 0.0, "step": 54964 }, { "epoch": 51.28, "learning_rate": 4.359048507462687e-05, "loss": 0.0001, "step": 54968 }, { "epoch": 51.28, "learning_rate": 4.359001865671642e-05, "loss": 0.0, "step": 54972 }, { "epoch": 51.28, "learning_rate": 4.358955223880597e-05, "loss": 0.0006, "step": 54976 }, { "epoch": 51.29, "learning_rate": 4.358908582089553e-05, "loss": 0.0003, "step": 54980 }, { "epoch": 51.29, "learning_rate": 4.3588619402985076e-05, "loss": 0.0, "step": 54984 }, { "epoch": 51.29, "learning_rate": 4.3588152985074624e-05, "loss": 0.0, "step": 54988 }, { "epoch": 51.3, "learning_rate": 4.358768656716418e-05, "loss": 0.0001, "step": 54992 }, { "epoch": 51.3, "learning_rate": 4.3587220149253734e-05, "loss": 0.0, "step": 54996 }, { "epoch": 51.31, "learning_rate": 4.358675373134329e-05, "loss": 0.0, "step": 55000 }, { "epoch": 51.31, "eval_exact_match": 0.7379110251450677, "eval_exec": 0.758220502901354, "eval_loss": 0.4803534746170044, "eval_runtime": 1126.6002, "eval_samples_per_second": 0.918, "step": 55000 }, { "epoch": 51.31, "learning_rate": 4.358628731343284e-05, "loss": 0.0, "step": 55004 }, { "epoch": 51.31, "learning_rate": 4.358582089552239e-05, "loss": 0.0, "step": 55008 }, { "epoch": 51.32, "learning_rate": 4.358535447761195e-05, "loss": 0.0004, "step": 55012 }, { "epoch": 51.32, "learning_rate": 4.3584888059701495e-05, "loss": 0.001, "step": 55016 }, { "epoch": 51.32, "learning_rate": 4.358442164179104e-05, "loss": 0.0, "step": 55020 }, { "epoch": 51.33, "learning_rate": 4.3583955223880605e-05, "loss": 0.0, "step": 55024 }, { "epoch": 51.33, "learning_rate": 4.358348880597015e-05, "loss": 0.001, "step": 55028 }, { "epoch": 51.34, "learning_rate": 4.35830223880597e-05, "loss": 0.0003, "step": 55032 }, { "epoch": 51.34, "learning_rate": 4.3582555970149256e-05, "loss": 0.0001, "step": 55036 }, { "epoch": 51.34, "learning_rate": 4.358208955223881e-05, "loss": 0.0013, "step": 55040 }, { "epoch": 51.35, "learning_rate": 4.358162313432836e-05, "loss": 0.0, "step": 55044 }, { "epoch": 51.35, "learning_rate": 4.3581156716417914e-05, "loss": 0.0, "step": 55048 }, { "epoch": 51.35, "learning_rate": 4.358069029850746e-05, "loss": 0.0, "step": 55052 }, { "epoch": 51.36, "learning_rate": 4.358022388059702e-05, "loss": 0.0, "step": 55056 }, { "epoch": 51.36, "learning_rate": 4.357975746268657e-05, "loss": 0.0, "step": 55060 }, { "epoch": 51.37, "learning_rate": 4.357929104477612e-05, "loss": 0.0, "step": 55064 }, { "epoch": 51.37, "learning_rate": 4.3578824626865675e-05, "loss": 0.0, "step": 55068 }, { "epoch": 51.37, "learning_rate": 4.357835820895523e-05, "loss": 0.0053, "step": 55072 }, { "epoch": 51.38, "learning_rate": 4.357789179104478e-05, "loss": 0.0002, "step": 55076 }, { "epoch": 51.38, "learning_rate": 4.3577425373134326e-05, "loss": 0.0, "step": 55080 }, { "epoch": 51.38, "learning_rate": 4.357695895522389e-05, "loss": 0.0, "step": 55084 }, { "epoch": 51.39, "learning_rate": 4.3576492537313436e-05, "loss": 0.0, "step": 55088 }, { "epoch": 51.39, "learning_rate": 4.3576026119402984e-05, "loss": 0.0001, "step": 55092 }, { "epoch": 51.4, "learning_rate": 4.357555970149254e-05, "loss": 0.0, "step": 55096 }, { "epoch": 51.4, "learning_rate": 4.3575093283582094e-05, "loss": 0.0, "step": 55100 }, { "epoch": 51.4, "learning_rate": 4.357462686567164e-05, "loss": 0.0, "step": 55104 }, { "epoch": 51.41, "learning_rate": 4.35741604477612e-05, "loss": 0.0, "step": 55108 }, { "epoch": 51.41, "learning_rate": 4.3573694029850745e-05, "loss": 0.0002, "step": 55112 }, { "epoch": 51.41, "learning_rate": 4.35732276119403e-05, "loss": 0.0002, "step": 55116 }, { "epoch": 51.42, "learning_rate": 4.3572761194029855e-05, "loss": 0.0, "step": 55120 }, { "epoch": 51.42, "learning_rate": 4.35722947761194e-05, "loss": 0.0, "step": 55124 }, { "epoch": 51.43, "learning_rate": 4.357182835820896e-05, "loss": 0.0, "step": 55128 }, { "epoch": 51.43, "learning_rate": 4.357136194029851e-05, "loss": 0.0, "step": 55132 }, { "epoch": 51.43, "learning_rate": 4.357089552238806e-05, "loss": 0.0, "step": 55136 }, { "epoch": 51.44, "learning_rate": 4.357042910447761e-05, "loss": 0.0001, "step": 55140 }, { "epoch": 51.44, "learning_rate": 4.3569962686567164e-05, "loss": 0.0001, "step": 55144 }, { "epoch": 51.44, "learning_rate": 4.356949626865672e-05, "loss": 0.0001, "step": 55148 }, { "epoch": 51.45, "learning_rate": 4.356902985074627e-05, "loss": 0.0001, "step": 55152 }, { "epoch": 51.45, "learning_rate": 4.356856343283582e-05, "loss": 0.0039, "step": 55156 }, { "epoch": 51.46, "learning_rate": 4.356809701492538e-05, "loss": 0.0003, "step": 55160 }, { "epoch": 51.46, "learning_rate": 4.356763059701493e-05, "loss": 0.0, "step": 55164 }, { "epoch": 51.46, "learning_rate": 4.356716417910448e-05, "loss": 0.0, "step": 55168 }, { "epoch": 51.47, "learning_rate": 4.356669776119403e-05, "loss": 0.0, "step": 55172 }, { "epoch": 51.47, "learning_rate": 4.356623134328359e-05, "loss": 0.0, "step": 55176 }, { "epoch": 51.47, "learning_rate": 4.356576492537314e-05, "loss": 0.0004, "step": 55180 }, { "epoch": 51.48, "learning_rate": 4.3565298507462686e-05, "loss": 0.0001, "step": 55184 }, { "epoch": 51.48, "learning_rate": 4.356483208955224e-05, "loss": 0.0, "step": 55188 }, { "epoch": 51.49, "learning_rate": 4.3564365671641796e-05, "loss": 0.0001, "step": 55192 }, { "epoch": 51.49, "learning_rate": 4.3563899253731344e-05, "loss": 0.0001, "step": 55196 }, { "epoch": 51.49, "learning_rate": 4.35634328358209e-05, "loss": 0.0, "step": 55200 }, { "epoch": 51.5, "learning_rate": 4.356296641791045e-05, "loss": 0.0, "step": 55204 }, { "epoch": 51.5, "learning_rate": 4.35625e-05, "loss": 0.0002, "step": 55208 }, { "epoch": 51.5, "learning_rate": 4.356203358208956e-05, "loss": 0.0, "step": 55212 }, { "epoch": 51.51, "learning_rate": 4.3561567164179105e-05, "loss": 0.0001, "step": 55216 }, { "epoch": 51.51, "learning_rate": 4.356110074626866e-05, "loss": 0.0, "step": 55220 }, { "epoch": 51.51, "learning_rate": 4.3560634328358215e-05, "loss": 0.0, "step": 55224 }, { "epoch": 51.52, "learning_rate": 4.356016791044776e-05, "loss": 0.0001, "step": 55228 }, { "epoch": 51.52, "learning_rate": 4.355970149253731e-05, "loss": 0.0017, "step": 55232 }, { "epoch": 51.53, "learning_rate": 4.355923507462687e-05, "loss": 0.0003, "step": 55236 }, { "epoch": 51.53, "learning_rate": 4.355876865671642e-05, "loss": 0.0, "step": 55240 }, { "epoch": 51.53, "learning_rate": 4.355830223880597e-05, "loss": 0.0, "step": 55244 }, { "epoch": 51.54, "learning_rate": 4.3557835820895524e-05, "loss": 0.0, "step": 55248 }, { "epoch": 51.54, "learning_rate": 4.355736940298508e-05, "loss": 0.0, "step": 55252 }, { "epoch": 51.54, "learning_rate": 4.355690298507463e-05, "loss": 0.0005, "step": 55256 }, { "epoch": 51.55, "learning_rate": 4.355643656716418e-05, "loss": 0.0, "step": 55260 }, { "epoch": 51.55, "learning_rate": 4.355597014925373e-05, "loss": 0.0003, "step": 55264 }, { "epoch": 51.56, "learning_rate": 4.3555503731343285e-05, "loss": 0.0001, "step": 55268 }, { "epoch": 51.56, "learning_rate": 4.355503731343284e-05, "loss": 0.0, "step": 55272 }, { "epoch": 51.56, "learning_rate": 4.355457089552239e-05, "loss": 0.0, "step": 55276 }, { "epoch": 51.57, "learning_rate": 4.355410447761194e-05, "loss": 0.0, "step": 55280 }, { "epoch": 51.57, "learning_rate": 4.35536380597015e-05, "loss": 0.0009, "step": 55284 }, { "epoch": 51.57, "learning_rate": 4.3553171641791046e-05, "loss": 0.0, "step": 55288 }, { "epoch": 51.58, "learning_rate": 4.3552705223880594e-05, "loss": 0.0, "step": 55292 }, { "epoch": 51.58, "learning_rate": 4.3552238805970156e-05, "loss": 0.0, "step": 55296 }, { "epoch": 51.59, "learning_rate": 4.3551772388059704e-05, "loss": 0.0, "step": 55300 }, { "epoch": 51.59, "learning_rate": 4.355130597014925e-05, "loss": 0.004, "step": 55304 }, { "epoch": 51.59, "learning_rate": 4.355083955223881e-05, "loss": 0.0, "step": 55308 }, { "epoch": 51.6, "learning_rate": 4.355037313432836e-05, "loss": 0.0, "step": 55312 }, { "epoch": 51.6, "learning_rate": 4.3549906716417917e-05, "loss": 0.004, "step": 55316 }, { "epoch": 51.6, "learning_rate": 4.3549440298507465e-05, "loss": 0.0, "step": 55320 }, { "epoch": 51.61, "learning_rate": 4.354897388059701e-05, "loss": 0.0002, "step": 55324 }, { "epoch": 51.61, "learning_rate": 4.3548507462686574e-05, "loss": 0.0, "step": 55328 }, { "epoch": 51.62, "learning_rate": 4.354804104477612e-05, "loss": 0.0001, "step": 55332 }, { "epoch": 51.62, "learning_rate": 4.354757462686567e-05, "loss": 0.0017, "step": 55336 }, { "epoch": 51.62, "learning_rate": 4.3547108208955226e-05, "loss": 0.0, "step": 55340 }, { "epoch": 51.63, "learning_rate": 4.354664179104478e-05, "loss": 0.0002, "step": 55344 }, { "epoch": 51.63, "learning_rate": 4.354617537313433e-05, "loss": 0.0, "step": 55348 }, { "epoch": 51.63, "learning_rate": 4.3545708955223884e-05, "loss": 0.0, "step": 55352 }, { "epoch": 51.64, "learning_rate": 4.354524253731344e-05, "loss": 0.0001, "step": 55356 }, { "epoch": 51.64, "learning_rate": 4.354477611940299e-05, "loss": 0.0027, "step": 55360 }, { "epoch": 51.65, "learning_rate": 4.354430970149254e-05, "loss": 0.0, "step": 55364 }, { "epoch": 51.65, "learning_rate": 4.354384328358209e-05, "loss": 0.0, "step": 55368 }, { "epoch": 51.65, "learning_rate": 4.3543376865671645e-05, "loss": 0.0043, "step": 55372 }, { "epoch": 51.66, "learning_rate": 4.35429104477612e-05, "loss": 0.0015, "step": 55376 }, { "epoch": 51.66, "learning_rate": 4.354244402985075e-05, "loss": 0.0, "step": 55380 }, { "epoch": 51.66, "learning_rate": 4.3541977611940296e-05, "loss": 0.0005, "step": 55384 }, { "epoch": 51.67, "learning_rate": 4.354151119402986e-05, "loss": 0.0001, "step": 55388 }, { "epoch": 51.67, "learning_rate": 4.3541044776119406e-05, "loss": 0.0, "step": 55392 }, { "epoch": 51.68, "learning_rate": 4.3540578358208954e-05, "loss": 0.0, "step": 55396 }, { "epoch": 51.68, "learning_rate": 4.354011194029851e-05, "loss": 0.0001, "step": 55400 }, { "epoch": 51.68, "learning_rate": 4.3539645522388063e-05, "loss": 0.0, "step": 55404 }, { "epoch": 51.69, "learning_rate": 4.353917910447761e-05, "loss": 0.0, "step": 55408 }, { "epoch": 51.69, "learning_rate": 4.3538712686567167e-05, "loss": 0.0001, "step": 55412 }, { "epoch": 51.69, "learning_rate": 4.353824626865672e-05, "loss": 0.0001, "step": 55416 }, { "epoch": 51.7, "learning_rate": 4.353777985074627e-05, "loss": 0.0029, "step": 55420 }, { "epoch": 51.7, "learning_rate": 4.3537313432835824e-05, "loss": 0.0116, "step": 55424 }, { "epoch": 51.71, "learning_rate": 4.353684701492537e-05, "loss": 0.0006, "step": 55428 }, { "epoch": 51.71, "learning_rate": 4.353638059701493e-05, "loss": 0.0003, "step": 55432 }, { "epoch": 51.71, "learning_rate": 4.353591417910448e-05, "loss": 0.0, "step": 55436 }, { "epoch": 51.72, "learning_rate": 4.353544776119403e-05, "loss": 0.0006, "step": 55440 }, { "epoch": 51.72, "learning_rate": 4.353498134328358e-05, "loss": 0.0, "step": 55444 }, { "epoch": 51.72, "learning_rate": 4.353451492537314e-05, "loss": 0.0004, "step": 55448 }, { "epoch": 51.73, "learning_rate": 4.353404850746269e-05, "loss": 0.001, "step": 55452 }, { "epoch": 51.73, "learning_rate": 4.353358208955224e-05, "loss": 0.0003, "step": 55456 }, { "epoch": 51.73, "learning_rate": 4.353311567164179e-05, "loss": 0.001, "step": 55460 }, { "epoch": 51.74, "learning_rate": 4.3532649253731346e-05, "loss": 0.0, "step": 55464 }, { "epoch": 51.74, "learning_rate": 4.3532182835820895e-05, "loss": 0.0005, "step": 55468 }, { "epoch": 51.75, "learning_rate": 4.353171641791045e-05, "loss": 0.0009, "step": 55472 }, { "epoch": 51.75, "learning_rate": 4.3531250000000004e-05, "loss": 0.0001, "step": 55476 }, { "epoch": 51.75, "learning_rate": 4.353078358208956e-05, "loss": 0.0, "step": 55480 }, { "epoch": 51.76, "learning_rate": 4.353031716417911e-05, "loss": 0.0028, "step": 55484 }, { "epoch": 51.76, "learning_rate": 4.3529850746268656e-05, "loss": 0.0, "step": 55488 }, { "epoch": 51.76, "learning_rate": 4.352938432835821e-05, "loss": 0.0, "step": 55492 }, { "epoch": 51.77, "learning_rate": 4.3528917910447765e-05, "loss": 0.0, "step": 55496 }, { "epoch": 51.77, "learning_rate": 4.3528451492537314e-05, "loss": 0.0, "step": 55500 }, { "epoch": 51.77, "eval_exact_match": 0.741779497098646, "eval_exec": 0.758220502901354, "eval_loss": 0.46916627883911133, "eval_runtime": 1122.7652, "eval_samples_per_second": 0.921, "step": 55500 }, { "epoch": 51.78, "learning_rate": 4.352798507462687e-05, "loss": 0.0, "step": 55504 }, { "epoch": 51.78, "learning_rate": 4.352751865671642e-05, "loss": 0.0, "step": 55508 }, { "epoch": 51.78, "learning_rate": 4.352705223880597e-05, "loss": 0.0034, "step": 55512 }, { "epoch": 51.79, "learning_rate": 4.3526585820895526e-05, "loss": 0.0009, "step": 55516 }, { "epoch": 51.79, "learning_rate": 4.3526119402985075e-05, "loss": 0.002, "step": 55520 }, { "epoch": 51.79, "learning_rate": 4.352565298507463e-05, "loss": 0.0001, "step": 55524 }, { "epoch": 51.8, "learning_rate": 4.3525186567164184e-05, "loss": 0.0008, "step": 55528 }, { "epoch": 51.8, "learning_rate": 4.352472014925373e-05, "loss": 0.0002, "step": 55532 }, { "epoch": 51.81, "learning_rate": 4.352425373134329e-05, "loss": 0.0, "step": 55536 }, { "epoch": 51.81, "learning_rate": 4.352378731343284e-05, "loss": 0.0005, "step": 55540 }, { "epoch": 51.81, "learning_rate": 4.352332089552239e-05, "loss": 0.0001, "step": 55544 }, { "epoch": 51.82, "learning_rate": 4.352285447761194e-05, "loss": 0.0, "step": 55548 }, { "epoch": 51.82, "learning_rate": 4.3522388059701493e-05, "loss": 0.0001, "step": 55552 }, { "epoch": 51.82, "learning_rate": 4.352192164179105e-05, "loss": 0.0, "step": 55556 }, { "epoch": 51.83, "learning_rate": 4.3521455223880596e-05, "loss": 0.0, "step": 55560 }, { "epoch": 51.83, "learning_rate": 4.352098880597015e-05, "loss": 0.0004, "step": 55564 }, { "epoch": 51.84, "learning_rate": 4.3520522388059706e-05, "loss": 0.0001, "step": 55568 }, { "epoch": 51.84, "learning_rate": 4.3520055970149254e-05, "loss": 0.0, "step": 55572 }, { "epoch": 51.84, "learning_rate": 4.351958955223881e-05, "loss": 0.0005, "step": 55576 }, { "epoch": 51.85, "learning_rate": 4.351912313432836e-05, "loss": 0.0001, "step": 55580 }, { "epoch": 51.85, "learning_rate": 4.351865671641791e-05, "loss": 0.0, "step": 55584 }, { "epoch": 51.85, "learning_rate": 4.351819029850747e-05, "loss": 0.0, "step": 55588 }, { "epoch": 51.86, "learning_rate": 4.3517723880597015e-05, "loss": 0.0, "step": 55592 }, { "epoch": 51.86, "learning_rate": 4.351725746268657e-05, "loss": 0.0001, "step": 55596 }, { "epoch": 51.87, "learning_rate": 4.3516791044776125e-05, "loss": 0.0001, "step": 55600 }, { "epoch": 51.87, "learning_rate": 4.351632462686567e-05, "loss": 0.0001, "step": 55604 }, { "epoch": 51.87, "learning_rate": 4.351585820895522e-05, "loss": 0.0, "step": 55608 }, { "epoch": 51.88, "learning_rate": 4.3515391791044776e-05, "loss": 0.0, "step": 55612 }, { "epoch": 51.88, "learning_rate": 4.351492537313433e-05, "loss": 0.0, "step": 55616 }, { "epoch": 51.88, "learning_rate": 4.351445895522388e-05, "loss": 0.0021, "step": 55620 }, { "epoch": 51.89, "learning_rate": 4.3513992537313434e-05, "loss": 0.0, "step": 55624 }, { "epoch": 51.89, "learning_rate": 4.351352611940299e-05, "loss": 0.0003, "step": 55628 }, { "epoch": 51.9, "learning_rate": 4.351305970149254e-05, "loss": 0.0013, "step": 55632 }, { "epoch": 51.9, "learning_rate": 4.351259328358209e-05, "loss": 0.0, "step": 55636 }, { "epoch": 51.9, "learning_rate": 4.351212686567164e-05, "loss": 0.0, "step": 55640 }, { "epoch": 51.91, "learning_rate": 4.35116604477612e-05, "loss": 0.0, "step": 55644 }, { "epoch": 51.91, "learning_rate": 4.351119402985075e-05, "loss": 0.0001, "step": 55648 }, { "epoch": 51.91, "learning_rate": 4.35107276119403e-05, "loss": 0.0001, "step": 55652 }, { "epoch": 51.92, "learning_rate": 4.351026119402985e-05, "loss": 0.0002, "step": 55656 }, { "epoch": 51.92, "learning_rate": 4.350979477611941e-05, "loss": 0.0008, "step": 55660 }, { "epoch": 51.93, "learning_rate": 4.3509328358208956e-05, "loss": 0.0, "step": 55664 }, { "epoch": 51.93, "learning_rate": 4.350886194029851e-05, "loss": 0.0, "step": 55668 }, { "epoch": 51.93, "learning_rate": 4.350839552238806e-05, "loss": 0.0, "step": 55672 }, { "epoch": 51.94, "learning_rate": 4.3507929104477614e-05, "loss": 0.0001, "step": 55676 }, { "epoch": 51.94, "learning_rate": 4.350746268656717e-05, "loss": 0.0, "step": 55680 }, { "epoch": 51.94, "learning_rate": 4.350699626865672e-05, "loss": 0.0, "step": 55684 }, { "epoch": 51.95, "learning_rate": 4.350652985074627e-05, "loss": 0.0004, "step": 55688 }, { "epoch": 51.95, "learning_rate": 4.350606343283583e-05, "loss": 0.0, "step": 55692 }, { "epoch": 51.96, "learning_rate": 4.3505597014925375e-05, "loss": 0.0001, "step": 55696 }, { "epoch": 51.96, "learning_rate": 4.350513059701492e-05, "loss": 0.0, "step": 55700 }, { "epoch": 51.96, "learning_rate": 4.3504664179104485e-05, "loss": 0.0001, "step": 55704 }, { "epoch": 51.97, "learning_rate": 4.350419776119403e-05, "loss": 0.0001, "step": 55708 }, { "epoch": 51.97, "learning_rate": 4.350373134328358e-05, "loss": 0.0003, "step": 55712 }, { "epoch": 51.97, "learning_rate": 4.3503264925373136e-05, "loss": 0.0, "step": 55716 }, { "epoch": 51.98, "learning_rate": 4.350279850746269e-05, "loss": 0.0027, "step": 55720 }, { "epoch": 51.98, "learning_rate": 4.350233208955224e-05, "loss": 0.0, "step": 55724 }, { "epoch": 51.98, "learning_rate": 4.3501865671641794e-05, "loss": 0.0002, "step": 55728 }, { "epoch": 51.99, "learning_rate": 4.350139925373134e-05, "loss": 0.0, "step": 55732 }, { "epoch": 51.99, "learning_rate": 4.35009328358209e-05, "loss": 0.0001, "step": 55736 }, { "epoch": 52.0, "learning_rate": 4.350046641791045e-05, "loss": 0.0003, "step": 55740 }, { "epoch": 52.0, "learning_rate": 4.35e-05, "loss": 0.0, "step": 55744 }, { "epoch": 52.0, "learning_rate": 4.3499533582089555e-05, "loss": 0.0, "step": 55748 }, { "epoch": 52.01, "learning_rate": 4.349906716417911e-05, "loss": 0.0, "step": 55752 }, { "epoch": 52.01, "learning_rate": 4.349860074626866e-05, "loss": 0.0, "step": 55756 }, { "epoch": 52.01, "learning_rate": 4.3498134328358206e-05, "loss": 0.0, "step": 55760 }, { "epoch": 52.02, "learning_rate": 4.349766791044777e-05, "loss": 0.0, "step": 55764 }, { "epoch": 52.02, "learning_rate": 4.3497201492537316e-05, "loss": 0.0, "step": 55768 }, { "epoch": 52.03, "learning_rate": 4.3496735074626864e-05, "loss": 0.0001, "step": 55772 }, { "epoch": 52.03, "learning_rate": 4.349626865671642e-05, "loss": 0.0, "step": 55776 }, { "epoch": 52.03, "learning_rate": 4.3495802238805974e-05, "loss": 0.0006, "step": 55780 }, { "epoch": 52.04, "learning_rate": 4.349533582089552e-05, "loss": 0.0044, "step": 55784 }, { "epoch": 52.04, "learning_rate": 4.349486940298508e-05, "loss": 0.0, "step": 55788 }, { "epoch": 52.04, "learning_rate": 4.3494402985074625e-05, "loss": 0.0, "step": 55792 }, { "epoch": 52.05, "learning_rate": 4.349393656716418e-05, "loss": 0.0, "step": 55796 }, { "epoch": 52.05, "learning_rate": 4.3493470149253735e-05, "loss": 0.0005, "step": 55800 }, { "epoch": 52.06, "learning_rate": 4.349300373134328e-05, "loss": 0.0, "step": 55804 }, { "epoch": 52.06, "learning_rate": 4.349253731343284e-05, "loss": 0.0, "step": 55808 }, { "epoch": 52.06, "learning_rate": 4.349207089552239e-05, "loss": 0.0001, "step": 55812 }, { "epoch": 52.07, "learning_rate": 4.349160447761194e-05, "loss": 0.0024, "step": 55816 }, { "epoch": 52.07, "learning_rate": 4.3491138059701496e-05, "loss": 0.0015, "step": 55820 }, { "epoch": 52.07, "learning_rate": 4.3490671641791044e-05, "loss": 0.0001, "step": 55824 }, { "epoch": 52.08, "learning_rate": 4.34902052238806e-05, "loss": 0.0, "step": 55828 }, { "epoch": 52.08, "learning_rate": 4.3489738805970154e-05, "loss": 0.0, "step": 55832 }, { "epoch": 52.09, "learning_rate": 4.34892723880597e-05, "loss": 0.003, "step": 55836 }, { "epoch": 52.09, "learning_rate": 4.348880597014926e-05, "loss": 0.0, "step": 55840 }, { "epoch": 52.09, "learning_rate": 4.348833955223881e-05, "loss": 0.0, "step": 55844 }, { "epoch": 52.1, "learning_rate": 4.348787313432836e-05, "loss": 0.0008, "step": 55848 }, { "epoch": 52.1, "learning_rate": 4.348740671641791e-05, "loss": 0.0, "step": 55852 }, { "epoch": 52.1, "learning_rate": 4.348694029850747e-05, "loss": 0.0002, "step": 55856 }, { "epoch": 52.11, "learning_rate": 4.348647388059702e-05, "loss": 0.0001, "step": 55860 }, { "epoch": 52.11, "learning_rate": 4.3486007462686566e-05, "loss": 0.0001, "step": 55864 }, { "epoch": 52.12, "learning_rate": 4.348554104477612e-05, "loss": 0.0, "step": 55868 }, { "epoch": 52.12, "learning_rate": 4.3485074626865676e-05, "loss": 0.0004, "step": 55872 }, { "epoch": 52.12, "learning_rate": 4.3484608208955224e-05, "loss": 0.0, "step": 55876 }, { "epoch": 52.13, "learning_rate": 4.348414179104478e-05, "loss": 0.0001, "step": 55880 }, { "epoch": 52.13, "learning_rate": 4.348367537313433e-05, "loss": 0.0, "step": 55884 }, { "epoch": 52.13, "learning_rate": 4.348320895522388e-05, "loss": 0.0, "step": 55888 }, { "epoch": 52.14, "learning_rate": 4.348274253731344e-05, "loss": 0.0, "step": 55892 }, { "epoch": 52.14, "learning_rate": 4.3482276119402985e-05, "loss": 0.0, "step": 55896 }, { "epoch": 52.15, "learning_rate": 4.348180970149254e-05, "loss": 0.0, "step": 55900 }, { "epoch": 52.15, "learning_rate": 4.3481343283582095e-05, "loss": 0.0, "step": 55904 }, { "epoch": 52.15, "learning_rate": 4.348087686567164e-05, "loss": 0.0001, "step": 55908 }, { "epoch": 52.16, "learning_rate": 4.348041044776119e-05, "loss": 0.0003, "step": 55912 }, { "epoch": 52.16, "learning_rate": 4.347994402985075e-05, "loss": 0.0, "step": 55916 }, { "epoch": 52.16, "learning_rate": 4.34794776119403e-05, "loss": 0.0014, "step": 55920 }, { "epoch": 52.17, "learning_rate": 4.347901119402985e-05, "loss": 0.0005, "step": 55924 }, { "epoch": 52.17, "learning_rate": 4.3478544776119404e-05, "loss": 0.0, "step": 55928 }, { "epoch": 52.18, "learning_rate": 4.347807835820896e-05, "loss": 0.0, "step": 55932 }, { "epoch": 52.18, "learning_rate": 4.347761194029851e-05, "loss": 0.0, "step": 55936 }, { "epoch": 52.18, "learning_rate": 4.347714552238806e-05, "loss": 0.0, "step": 55940 }, { "epoch": 52.19, "learning_rate": 4.347667910447761e-05, "loss": 0.0, "step": 55944 }, { "epoch": 52.19, "learning_rate": 4.3476212686567165e-05, "loss": 0.0, "step": 55948 }, { "epoch": 52.19, "learning_rate": 4.347574626865672e-05, "loss": 0.0, "step": 55952 }, { "epoch": 52.2, "learning_rate": 4.347527985074627e-05, "loss": 0.0001, "step": 55956 }, { "epoch": 52.2, "learning_rate": 4.347481343283582e-05, "loss": 0.0, "step": 55960 }, { "epoch": 52.21, "learning_rate": 4.347434701492538e-05, "loss": 0.0, "step": 55964 }, { "epoch": 52.21, "learning_rate": 4.3473880597014926e-05, "loss": 0.0, "step": 55968 }, { "epoch": 52.21, "learning_rate": 4.347341417910448e-05, "loss": 0.0, "step": 55972 }, { "epoch": 52.22, "learning_rate": 4.3472947761194036e-05, "loss": 0.0001, "step": 55976 }, { "epoch": 52.22, "learning_rate": 4.3472481343283584e-05, "loss": 0.0, "step": 55980 }, { "epoch": 52.22, "learning_rate": 4.347201492537314e-05, "loss": 0.0, "step": 55984 }, { "epoch": 52.23, "learning_rate": 4.347154850746269e-05, "loss": 0.0005, "step": 55988 }, { "epoch": 52.23, "learning_rate": 4.347108208955224e-05, "loss": 0.0, "step": 55992 }, { "epoch": 52.24, "learning_rate": 4.34706156716418e-05, "loss": 0.0, "step": 55996 }, { "epoch": 52.24, "learning_rate": 4.3470149253731345e-05, "loss": 0.0, "step": 56000 }, { "epoch": 52.24, "eval_exact_match": 0.7398452611218569, "eval_exec": 0.7630560928433269, "eval_loss": 0.48453760147094727, "eval_runtime": 1139.6139, "eval_samples_per_second": 0.907, "step": 56000 }, { "epoch": 52.24, "learning_rate": 4.346968283582089e-05, "loss": 0.0, "step": 56004 }, { "epoch": 52.25, "learning_rate": 4.3469216417910455e-05, "loss": 0.0, "step": 56008 }, { "epoch": 52.25, "learning_rate": 4.346875e-05, "loss": 0.0, "step": 56012 }, { "epoch": 52.25, "learning_rate": 4.346828358208955e-05, "loss": 0.0, "step": 56016 }, { "epoch": 52.26, "learning_rate": 4.3467817164179106e-05, "loss": 0.0, "step": 56020 }, { "epoch": 52.26, "learning_rate": 4.346735074626866e-05, "loss": 0.0, "step": 56024 }, { "epoch": 52.26, "learning_rate": 4.346688432835821e-05, "loss": 0.0, "step": 56028 }, { "epoch": 52.27, "learning_rate": 4.3466417910447764e-05, "loss": 0.0, "step": 56032 }, { "epoch": 52.27, "learning_rate": 4.346595149253732e-05, "loss": 0.0001, "step": 56036 }, { "epoch": 52.28, "learning_rate": 4.346548507462687e-05, "loss": 0.0, "step": 56040 }, { "epoch": 52.28, "learning_rate": 4.346501865671642e-05, "loss": 0.0, "step": 56044 }, { "epoch": 52.28, "learning_rate": 4.346455223880597e-05, "loss": 0.0032, "step": 56048 }, { "epoch": 52.29, "learning_rate": 4.3464085820895525e-05, "loss": 0.0, "step": 56052 }, { "epoch": 52.29, "learning_rate": 4.346361940298508e-05, "loss": 0.0, "step": 56056 }, { "epoch": 52.29, "learning_rate": 4.346315298507463e-05, "loss": 0.0021, "step": 56060 }, { "epoch": 52.3, "learning_rate": 4.3462686567164176e-05, "loss": 0.0, "step": 56064 }, { "epoch": 52.3, "learning_rate": 4.346222014925374e-05, "loss": 0.0, "step": 56068 }, { "epoch": 52.31, "learning_rate": 4.3461753731343286e-05, "loss": 0.0, "step": 56072 }, { "epoch": 52.31, "learning_rate": 4.3461287313432834e-05, "loss": 0.0, "step": 56076 }, { "epoch": 52.31, "learning_rate": 4.346082089552239e-05, "loss": 0.0, "step": 56080 }, { "epoch": 52.32, "learning_rate": 4.3460354477611944e-05, "loss": 0.0, "step": 56084 }, { "epoch": 52.32, "learning_rate": 4.345988805970149e-05, "loss": 0.0, "step": 56088 }, { "epoch": 52.32, "learning_rate": 4.345942164179105e-05, "loss": 0.0001, "step": 56092 }, { "epoch": 52.33, "learning_rate": 4.34589552238806e-05, "loss": 0.0, "step": 56096 }, { "epoch": 52.33, "learning_rate": 4.345848880597015e-05, "loss": 0.0003, "step": 56100 }, { "epoch": 52.34, "learning_rate": 4.3458022388059705e-05, "loss": 0.0, "step": 56104 }, { "epoch": 52.34, "learning_rate": 4.345755597014925e-05, "loss": 0.0, "step": 56108 }, { "epoch": 52.34, "learning_rate": 4.345708955223881e-05, "loss": 0.0002, "step": 56112 }, { "epoch": 52.35, "learning_rate": 4.345662313432836e-05, "loss": 0.001, "step": 56116 }, { "epoch": 52.35, "learning_rate": 4.345615671641791e-05, "loss": 0.0005, "step": 56120 }, { "epoch": 52.35, "learning_rate": 4.345569029850746e-05, "loss": 0.0041, "step": 56124 }, { "epoch": 52.36, "learning_rate": 4.345522388059702e-05, "loss": 0.0, "step": 56128 }, { "epoch": 52.36, "learning_rate": 4.345475746268657e-05, "loss": 0.0, "step": 56132 }, { "epoch": 52.37, "learning_rate": 4.3454291044776124e-05, "loss": 0.0, "step": 56136 }, { "epoch": 52.37, "learning_rate": 4.345382462686567e-05, "loss": 0.0, "step": 56140 }, { "epoch": 52.37, "learning_rate": 4.345335820895523e-05, "loss": 0.0006, "step": 56144 }, { "epoch": 52.38, "learning_rate": 4.345289179104478e-05, "loss": 0.0001, "step": 56148 }, { "epoch": 52.38, "learning_rate": 4.345242537313433e-05, "loss": 0.0, "step": 56152 }, { "epoch": 52.38, "learning_rate": 4.3451958955223885e-05, "loss": 0.0, "step": 56156 }, { "epoch": 52.39, "learning_rate": 4.345149253731344e-05, "loss": 0.0, "step": 56160 }, { "epoch": 52.39, "learning_rate": 4.345102611940299e-05, "loss": 0.0001, "step": 56164 }, { "epoch": 52.4, "learning_rate": 4.3450559701492536e-05, "loss": 0.0, "step": 56168 }, { "epoch": 52.4, "learning_rate": 4.345009328358209e-05, "loss": 0.0, "step": 56172 }, { "epoch": 52.4, "learning_rate": 4.3449626865671646e-05, "loss": 0.0, "step": 56176 }, { "epoch": 52.41, "learning_rate": 4.3449160447761194e-05, "loss": 0.0, "step": 56180 }, { "epoch": 52.41, "learning_rate": 4.344869402985075e-05, "loss": 0.0, "step": 56184 }, { "epoch": 52.41, "learning_rate": 4.3448227611940304e-05, "loss": 0.0, "step": 56188 }, { "epoch": 52.42, "learning_rate": 4.344776119402985e-05, "loss": 0.0, "step": 56192 }, { "epoch": 52.42, "learning_rate": 4.344729477611941e-05, "loss": 0.0, "step": 56196 }, { "epoch": 52.43, "learning_rate": 4.3446828358208955e-05, "loss": 0.0001, "step": 56200 }, { "epoch": 52.43, "learning_rate": 4.344636194029851e-05, "loss": 0.0002, "step": 56204 }, { "epoch": 52.43, "learning_rate": 4.3445895522388065e-05, "loss": 0.0, "step": 56208 }, { "epoch": 52.44, "learning_rate": 4.344542910447761e-05, "loss": 0.0, "step": 56212 }, { "epoch": 52.44, "learning_rate": 4.344496268656717e-05, "loss": 0.0, "step": 56216 }, { "epoch": 52.44, "learning_rate": 4.344449626865672e-05, "loss": 0.0, "step": 56220 }, { "epoch": 52.45, "learning_rate": 4.344402985074627e-05, "loss": 0.0009, "step": 56224 }, { "epoch": 52.45, "learning_rate": 4.344356343283582e-05, "loss": 0.0008, "step": 56228 }, { "epoch": 52.46, "learning_rate": 4.3443097014925374e-05, "loss": 0.0, "step": 56232 }, { "epoch": 52.46, "learning_rate": 4.344263059701493e-05, "loss": 0.0014, "step": 56236 }, { "epoch": 52.46, "learning_rate": 4.344216417910448e-05, "loss": 0.0, "step": 56240 }, { "epoch": 52.47, "learning_rate": 4.344169776119403e-05, "loss": 0.0, "step": 56244 }, { "epoch": 52.47, "learning_rate": 4.3441231343283587e-05, "loss": 0.0002, "step": 56248 }, { "epoch": 52.47, "learning_rate": 4.3440764925373135e-05, "loss": 0.0, "step": 56252 }, { "epoch": 52.48, "learning_rate": 4.344029850746269e-05, "loss": 0.0, "step": 56256 }, { "epoch": 52.48, "learning_rate": 4.343983208955224e-05, "loss": 0.0002, "step": 56260 }, { "epoch": 52.49, "learning_rate": 4.343936567164179e-05, "loss": 0.0, "step": 56264 }, { "epoch": 52.49, "learning_rate": 4.343889925373135e-05, "loss": 0.0004, "step": 56268 }, { "epoch": 52.49, "learning_rate": 4.3438432835820896e-05, "loss": 0.0, "step": 56272 }, { "epoch": 52.5, "learning_rate": 4.343796641791045e-05, "loss": 0.0, "step": 56276 }, { "epoch": 52.5, "learning_rate": 4.3437500000000006e-05, "loss": 0.0, "step": 56280 }, { "epoch": 52.5, "learning_rate": 4.3437033582089554e-05, "loss": 0.0, "step": 56284 }, { "epoch": 52.51, "learning_rate": 4.34365671641791e-05, "loss": 0.0, "step": 56288 }, { "epoch": 52.51, "learning_rate": 4.343610074626866e-05, "loss": 0.0, "step": 56292 }, { "epoch": 52.51, "learning_rate": 4.343563432835821e-05, "loss": 0.0, "step": 56296 }, { "epoch": 52.52, "learning_rate": 4.3435167910447766e-05, "loss": 0.0, "step": 56300 }, { "epoch": 52.52, "learning_rate": 4.3434701492537315e-05, "loss": 0.0, "step": 56304 }, { "epoch": 52.53, "learning_rate": 4.343423507462687e-05, "loss": 0.0, "step": 56308 }, { "epoch": 52.53, "learning_rate": 4.3433768656716424e-05, "loss": 0.0, "step": 56312 }, { "epoch": 52.53, "learning_rate": 4.343330223880597e-05, "loss": 0.0, "step": 56316 }, { "epoch": 52.54, "learning_rate": 4.343283582089552e-05, "loss": 0.0002, "step": 56320 }, { "epoch": 52.54, "learning_rate": 4.343236940298508e-05, "loss": 0.0, "step": 56324 }, { "epoch": 52.54, "learning_rate": 4.343190298507463e-05, "loss": 0.0004, "step": 56328 }, { "epoch": 52.55, "learning_rate": 4.343143656716418e-05, "loss": 0.0, "step": 56332 }, { "epoch": 52.55, "learning_rate": 4.3430970149253734e-05, "loss": 0.0, "step": 56336 }, { "epoch": 52.56, "learning_rate": 4.343050373134329e-05, "loss": 0.0, "step": 56340 }, { "epoch": 52.56, "learning_rate": 4.3430037313432837e-05, "loss": 0.0, "step": 56344 }, { "epoch": 52.56, "learning_rate": 4.342957089552239e-05, "loss": 0.0, "step": 56348 }, { "epoch": 52.57, "learning_rate": 4.342910447761194e-05, "loss": 0.0, "step": 56352 }, { "epoch": 52.57, "learning_rate": 4.3428638059701495e-05, "loss": 0.0, "step": 56356 }, { "epoch": 52.57, "learning_rate": 4.342817164179105e-05, "loss": 0.0, "step": 56360 }, { "epoch": 52.58, "learning_rate": 4.34277052238806e-05, "loss": 0.0, "step": 56364 }, { "epoch": 52.58, "learning_rate": 4.342723880597015e-05, "loss": 0.0, "step": 56368 }, { "epoch": 52.59, "learning_rate": 4.342677238805971e-05, "loss": 0.0, "step": 56372 }, { "epoch": 52.59, "learning_rate": 4.3426305970149256e-05, "loss": 0.0, "step": 56376 }, { "epoch": 52.59, "learning_rate": 4.3425839552238804e-05, "loss": 0.0, "step": 56380 }, { "epoch": 52.6, "learning_rate": 4.3425373134328365e-05, "loss": 0.0004, "step": 56384 }, { "epoch": 52.6, "learning_rate": 4.3424906716417913e-05, "loss": 0.0, "step": 56388 }, { "epoch": 52.6, "learning_rate": 4.342444029850746e-05, "loss": 0.0001, "step": 56392 }, { "epoch": 52.61, "learning_rate": 4.3423973880597017e-05, "loss": 0.0, "step": 56396 }, { "epoch": 52.61, "learning_rate": 4.342350746268657e-05, "loss": 0.0002, "step": 56400 }, { "epoch": 52.62, "learning_rate": 4.342304104477612e-05, "loss": 0.0004, "step": 56404 }, { "epoch": 52.62, "learning_rate": 4.3422574626865674e-05, "loss": 0.0007, "step": 56408 }, { "epoch": 52.62, "learning_rate": 4.342210820895522e-05, "loss": 0.0, "step": 56412 }, { "epoch": 52.63, "learning_rate": 4.342164179104478e-05, "loss": 0.0, "step": 56416 }, { "epoch": 52.63, "learning_rate": 4.342117537313433e-05, "loss": 0.0, "step": 56420 }, { "epoch": 52.63, "learning_rate": 4.342070895522388e-05, "loss": 0.0, "step": 56424 }, { "epoch": 52.64, "learning_rate": 4.3420242537313435e-05, "loss": 0.0016, "step": 56428 }, { "epoch": 52.64, "learning_rate": 4.341977611940299e-05, "loss": 0.0, "step": 56432 }, { "epoch": 52.65, "learning_rate": 4.341930970149254e-05, "loss": 0.0, "step": 56436 }, { "epoch": 52.65, "learning_rate": 4.341884328358209e-05, "loss": 0.0006, "step": 56440 }, { "epoch": 52.65, "learning_rate": 4.341837686567165e-05, "loss": 0.0004, "step": 56444 }, { "epoch": 52.66, "learning_rate": 4.3417910447761196e-05, "loss": 0.0, "step": 56448 }, { "epoch": 52.66, "learning_rate": 4.3417444029850745e-05, "loss": 0.0003, "step": 56452 }, { "epoch": 52.66, "learning_rate": 4.34169776119403e-05, "loss": 0.0, "step": 56456 }, { "epoch": 52.67, "learning_rate": 4.3416511194029854e-05, "loss": 0.0, "step": 56460 }, { "epoch": 52.67, "learning_rate": 4.341604477611941e-05, "loss": 0.0, "step": 56464 }, { "epoch": 52.68, "learning_rate": 4.341557835820896e-05, "loss": 0.0001, "step": 56468 }, { "epoch": 52.68, "learning_rate": 4.3415111940298506e-05, "loss": 0.0, "step": 56472 }, { "epoch": 52.68, "learning_rate": 4.341464552238807e-05, "loss": 0.0, "step": 56476 }, { "epoch": 52.69, "learning_rate": 4.3414179104477615e-05, "loss": 0.0, "step": 56480 }, { "epoch": 52.69, "learning_rate": 4.3413712686567163e-05, "loss": 0.0, "step": 56484 }, { "epoch": 52.69, "learning_rate": 4.341324626865672e-05, "loss": 0.0, "step": 56488 }, { "epoch": 52.7, "learning_rate": 4.341277985074627e-05, "loss": 0.0007, "step": 56492 }, { "epoch": 52.7, "learning_rate": 4.341231343283582e-05, "loss": 0.0, "step": 56496 }, { "epoch": 52.71, "learning_rate": 4.3411847014925376e-05, "loss": 0.0, "step": 56500 }, { "epoch": 52.71, "eval_exact_match": 0.7398452611218569, "eval_exec": 0.7688588007736944, "eval_loss": 0.48737451434135437, "eval_runtime": 1152.9321, "eval_samples_per_second": 0.897, "step": 56500 }, { "epoch": 52.71, "learning_rate": 4.341138059701493e-05, "loss": 0.0008, "step": 56504 }, { "epoch": 52.71, "learning_rate": 4.341091417910448e-05, "loss": 0.0, "step": 56508 }, { "epoch": 52.72, "learning_rate": 4.3410447761194034e-05, "loss": 0.0, "step": 56512 }, { "epoch": 52.72, "learning_rate": 4.340998134328358e-05, "loss": 0.0, "step": 56516 }, { "epoch": 52.72, "learning_rate": 4.340951492537314e-05, "loss": 0.0002, "step": 56520 }, { "epoch": 52.73, "learning_rate": 4.340904850746269e-05, "loss": 0.0, "step": 56524 }, { "epoch": 52.73, "learning_rate": 4.340858208955224e-05, "loss": 0.0, "step": 56528 }, { "epoch": 52.73, "learning_rate": 4.340811567164179e-05, "loss": 0.0, "step": 56532 }, { "epoch": 52.74, "learning_rate": 4.340764925373135e-05, "loss": 0.0022, "step": 56536 }, { "epoch": 52.74, "learning_rate": 4.34071828358209e-05, "loss": 0.0025, "step": 56540 }, { "epoch": 52.75, "learning_rate": 4.3406716417910446e-05, "loss": 0.0, "step": 56544 }, { "epoch": 52.75, "learning_rate": 4.340625e-05, "loss": 0.0001, "step": 56548 }, { "epoch": 52.75, "learning_rate": 4.3405783582089556e-05, "loss": 0.0001, "step": 56552 }, { "epoch": 52.76, "learning_rate": 4.3405317164179104e-05, "loss": 0.0001, "step": 56556 }, { "epoch": 52.76, "learning_rate": 4.340485074626866e-05, "loss": 0.0003, "step": 56560 }, { "epoch": 52.76, "learning_rate": 4.340438432835821e-05, "loss": 0.0004, "step": 56564 }, { "epoch": 52.77, "learning_rate": 4.340391791044776e-05, "loss": 0.0, "step": 56568 }, { "epoch": 52.77, "learning_rate": 4.340345149253732e-05, "loss": 0.0, "step": 56572 }, { "epoch": 52.78, "learning_rate": 4.3402985074626865e-05, "loss": 0.0004, "step": 56576 }, { "epoch": 52.78, "learning_rate": 4.340251865671642e-05, "loss": 0.0, "step": 56580 }, { "epoch": 52.78, "learning_rate": 4.3402052238805975e-05, "loss": 0.0, "step": 56584 }, { "epoch": 52.79, "learning_rate": 4.340158582089552e-05, "loss": 0.0001, "step": 56588 }, { "epoch": 52.79, "learning_rate": 4.340111940298507e-05, "loss": 0.0, "step": 56592 }, { "epoch": 52.79, "learning_rate": 4.340065298507463e-05, "loss": 0.0, "step": 56596 }, { "epoch": 52.8, "learning_rate": 4.340018656716418e-05, "loss": 0.0, "step": 56600 }, { "epoch": 52.8, "learning_rate": 4.339972014925373e-05, "loss": 0.0003, "step": 56604 }, { "epoch": 52.81, "learning_rate": 4.3399253731343284e-05, "loss": 0.0002, "step": 56608 }, { "epoch": 52.81, "learning_rate": 4.339878731343284e-05, "loss": 0.0009, "step": 56612 }, { "epoch": 52.81, "learning_rate": 4.339832089552239e-05, "loss": 0.0001, "step": 56616 }, { "epoch": 52.82, "learning_rate": 4.339785447761194e-05, "loss": 0.0009, "step": 56620 }, { "epoch": 52.82, "learning_rate": 4.339738805970149e-05, "loss": 0.0001, "step": 56624 }, { "epoch": 52.82, "learning_rate": 4.339692164179105e-05, "loss": 0.0, "step": 56628 }, { "epoch": 52.83, "learning_rate": 4.33964552238806e-05, "loss": 0.0001, "step": 56632 }, { "epoch": 52.83, "learning_rate": 4.339598880597015e-05, "loss": 0.0005, "step": 56636 }, { "epoch": 52.84, "learning_rate": 4.33955223880597e-05, "loss": 0.0, "step": 56640 }, { "epoch": 52.84, "learning_rate": 4.339505597014926e-05, "loss": 0.0, "step": 56644 }, { "epoch": 52.84, "learning_rate": 4.3394589552238806e-05, "loss": 0.0001, "step": 56648 }, { "epoch": 52.85, "learning_rate": 4.339412313432836e-05, "loss": 0.0001, "step": 56652 }, { "epoch": 52.85, "learning_rate": 4.3393656716417916e-05, "loss": 0.0003, "step": 56656 }, { "epoch": 52.85, "learning_rate": 4.3393190298507464e-05, "loss": 0.0084, "step": 56660 }, { "epoch": 52.86, "learning_rate": 4.339272388059702e-05, "loss": 0.0001, "step": 56664 }, { "epoch": 52.86, "learning_rate": 4.339225746268657e-05, "loss": 0.0, "step": 56668 }, { "epoch": 52.87, "learning_rate": 4.339179104477612e-05, "loss": 0.0, "step": 56672 }, { "epoch": 52.87, "learning_rate": 4.339132462686568e-05, "loss": 0.0, "step": 56676 }, { "epoch": 52.87, "learning_rate": 4.3390858208955225e-05, "loss": 0.0, "step": 56680 }, { "epoch": 52.88, "learning_rate": 4.339039179104477e-05, "loss": 0.0, "step": 56684 }, { "epoch": 52.88, "learning_rate": 4.3389925373134335e-05, "loss": 0.0, "step": 56688 }, { "epoch": 52.88, "learning_rate": 4.338945895522388e-05, "loss": 0.0, "step": 56692 }, { "epoch": 52.89, "learning_rate": 4.338899253731343e-05, "loss": 0.0035, "step": 56696 }, { "epoch": 52.89, "learning_rate": 4.3388526119402986e-05, "loss": 0.0, "step": 56700 }, { "epoch": 52.9, "learning_rate": 4.338805970149254e-05, "loss": 0.0001, "step": 56704 }, { "epoch": 52.9, "learning_rate": 4.338759328358209e-05, "loss": 0.0, "step": 56708 }, { "epoch": 52.9, "learning_rate": 4.3387126865671644e-05, "loss": 0.0, "step": 56712 }, { "epoch": 52.91, "learning_rate": 4.33866604477612e-05, "loss": 0.0005, "step": 56716 }, { "epoch": 52.91, "learning_rate": 4.338619402985075e-05, "loss": 0.0, "step": 56720 }, { "epoch": 52.91, "learning_rate": 4.33857276119403e-05, "loss": 0.0, "step": 56724 }, { "epoch": 52.92, "learning_rate": 4.338526119402985e-05, "loss": 0.0, "step": 56728 }, { "epoch": 52.92, "learning_rate": 4.3384794776119405e-05, "loss": 0.0001, "step": 56732 }, { "epoch": 52.93, "learning_rate": 4.338432835820896e-05, "loss": 0.0007, "step": 56736 }, { "epoch": 52.93, "learning_rate": 4.338386194029851e-05, "loss": 0.0, "step": 56740 }, { "epoch": 52.93, "learning_rate": 4.3383395522388056e-05, "loss": 0.0088, "step": 56744 }, { "epoch": 52.94, "learning_rate": 4.338292910447762e-05, "loss": 0.0, "step": 56748 }, { "epoch": 52.94, "learning_rate": 4.3382462686567166e-05, "loss": 0.0001, "step": 56752 }, { "epoch": 52.94, "learning_rate": 4.3381996268656714e-05, "loss": 0.0002, "step": 56756 }, { "epoch": 52.95, "learning_rate": 4.338152985074627e-05, "loss": 0.0001, "step": 56760 }, { "epoch": 52.95, "learning_rate": 4.3381063432835824e-05, "loss": 0.0003, "step": 56764 }, { "epoch": 52.96, "learning_rate": 4.338059701492537e-05, "loss": 0.0, "step": 56768 }, { "epoch": 52.96, "learning_rate": 4.338013059701493e-05, "loss": 0.0, "step": 56772 }, { "epoch": 52.96, "learning_rate": 4.337966417910448e-05, "loss": 0.0, "step": 56776 }, { "epoch": 52.97, "learning_rate": 4.337919776119403e-05, "loss": 0.0056, "step": 56780 }, { "epoch": 52.97, "learning_rate": 4.3378731343283585e-05, "loss": 0.0001, "step": 56784 }, { "epoch": 52.97, "learning_rate": 4.337826492537313e-05, "loss": 0.0, "step": 56788 }, { "epoch": 52.98, "learning_rate": 4.337779850746269e-05, "loss": 0.0008, "step": 56792 }, { "epoch": 52.98, "learning_rate": 4.337733208955224e-05, "loss": 0.0007, "step": 56796 }, { "epoch": 52.98, "learning_rate": 4.337686567164179e-05, "loss": 0.0002, "step": 56800 }, { "epoch": 52.99, "learning_rate": 4.3376399253731346e-05, "loss": 0.0001, "step": 56804 }, { "epoch": 52.99, "learning_rate": 4.33759328358209e-05, "loss": 0.0, "step": 56808 }, { "epoch": 53.0, "learning_rate": 4.337546641791045e-05, "loss": 0.0, "step": 56812 }, { "epoch": 53.0, "learning_rate": 4.3375000000000004e-05, "loss": 0.0005, "step": 56816 }, { "epoch": 53.0, "learning_rate": 4.337453358208955e-05, "loss": 0.0018, "step": 56820 }, { "epoch": 53.01, "learning_rate": 4.337406716417911e-05, "loss": 0.0017, "step": 56824 }, { "epoch": 53.01, "learning_rate": 4.337360074626866e-05, "loss": 0.0, "step": 56828 }, { "epoch": 53.01, "learning_rate": 4.337313432835821e-05, "loss": 0.0003, "step": 56832 }, { "epoch": 53.02, "learning_rate": 4.3372667910447765e-05, "loss": 0.0, "step": 56836 }, { "epoch": 53.02, "learning_rate": 4.337220149253732e-05, "loss": 0.0007, "step": 56840 }, { "epoch": 53.03, "learning_rate": 4.337173507462687e-05, "loss": 0.0, "step": 56844 }, { "epoch": 53.03, "learning_rate": 4.3371268656716416e-05, "loss": 0.0001, "step": 56848 }, { "epoch": 53.03, "learning_rate": 4.337080223880597e-05, "loss": 0.0, "step": 56852 }, { "epoch": 53.04, "learning_rate": 4.3370335820895526e-05, "loss": 0.0, "step": 56856 }, { "epoch": 53.04, "learning_rate": 4.3369869402985074e-05, "loss": 0.0003, "step": 56860 }, { "epoch": 53.04, "learning_rate": 4.336940298507463e-05, "loss": 0.0, "step": 56864 }, { "epoch": 53.05, "learning_rate": 4.3368936567164184e-05, "loss": 0.0, "step": 56868 }, { "epoch": 53.05, "learning_rate": 4.336847014925373e-05, "loss": 0.0, "step": 56872 }, { "epoch": 53.06, "learning_rate": 4.336800373134329e-05, "loss": 0.0, "step": 56876 }, { "epoch": 53.06, "learning_rate": 4.3367537313432835e-05, "loss": 0.0, "step": 56880 }, { "epoch": 53.06, "learning_rate": 4.336707089552239e-05, "loss": 0.0, "step": 56884 }, { "epoch": 53.07, "learning_rate": 4.3366604477611945e-05, "loss": 0.0, "step": 56888 }, { "epoch": 53.07, "learning_rate": 4.336613805970149e-05, "loss": 0.0, "step": 56892 }, { "epoch": 53.07, "learning_rate": 4.336567164179105e-05, "loss": 0.0, "step": 56896 }, { "epoch": 53.08, "learning_rate": 4.33652052238806e-05, "loss": 0.0, "step": 56900 }, { "epoch": 53.08, "learning_rate": 4.336473880597015e-05, "loss": 0.0008, "step": 56904 }, { "epoch": 53.09, "learning_rate": 4.33642723880597e-05, "loss": 0.0002, "step": 56908 }, { "epoch": 53.09, "learning_rate": 4.3363805970149254e-05, "loss": 0.0, "step": 56912 }, { "epoch": 53.09, "learning_rate": 4.336333955223881e-05, "loss": 0.0, "step": 56916 }, { "epoch": 53.1, "learning_rate": 4.336287313432836e-05, "loss": 0.0034, "step": 56920 }, { "epoch": 53.1, "learning_rate": 4.336240671641791e-05, "loss": 0.0001, "step": 56924 }, { "epoch": 53.1, "learning_rate": 4.336194029850747e-05, "loss": 0.0, "step": 56928 }, { "epoch": 53.11, "learning_rate": 4.3361473880597015e-05, "loss": 0.0, "step": 56932 }, { "epoch": 53.11, "learning_rate": 4.336100746268657e-05, "loss": 0.0001, "step": 56936 }, { "epoch": 53.12, "learning_rate": 4.336054104477612e-05, "loss": 0.0013, "step": 56940 }, { "epoch": 53.12, "learning_rate": 4.336007462686567e-05, "loss": 0.0, "step": 56944 }, { "epoch": 53.12, "learning_rate": 4.335960820895523e-05, "loss": 0.0, "step": 56948 }, { "epoch": 53.13, "learning_rate": 4.3359141791044776e-05, "loss": 0.0006, "step": 56952 }, { "epoch": 53.13, "learning_rate": 4.335867537313433e-05, "loss": 0.0, "step": 56956 }, { "epoch": 53.13, "learning_rate": 4.3358208955223886e-05, "loss": 0.0, "step": 56960 }, { "epoch": 53.14, "learning_rate": 4.3357742537313434e-05, "loss": 0.0, "step": 56964 }, { "epoch": 53.14, "learning_rate": 4.335727611940299e-05, "loss": 0.0, "step": 56968 }, { "epoch": 53.15, "learning_rate": 4.335680970149254e-05, "loss": 0.0, "step": 56972 }, { "epoch": 53.15, "learning_rate": 4.335634328358209e-05, "loss": 0.0, "step": 56976 }, { "epoch": 53.15, "learning_rate": 4.335587686567165e-05, "loss": 0.0003, "step": 56980 }, { "epoch": 53.16, "learning_rate": 4.3355410447761195e-05, "loss": 0.0, "step": 56984 }, { "epoch": 53.16, "learning_rate": 4.335494402985075e-05, "loss": 0.0003, "step": 56988 }, { "epoch": 53.16, "learning_rate": 4.3354477611940305e-05, "loss": 0.0, "step": 56992 }, { "epoch": 53.17, "learning_rate": 4.335401119402985e-05, "loss": 0.0, "step": 56996 }, { "epoch": 53.17, "learning_rate": 4.33535447761194e-05, "loss": 0.0, "step": 57000 }, { "epoch": 53.17, "eval_exact_match": 0.7311411992263056, "eval_exec": 0.7756286266924565, "eval_loss": 0.4817250370979309, "eval_runtime": 1118.231, "eval_samples_per_second": 0.925, "step": 57000 }, { "epoch": 53.18, "learning_rate": 4.335307835820896e-05, "loss": 0.0001, "step": 57004 }, { "epoch": 53.18, "learning_rate": 4.335261194029851e-05, "loss": 0.0, "step": 57008 }, { "epoch": 53.18, "learning_rate": 4.335214552238806e-05, "loss": 0.0, "step": 57012 }, { "epoch": 53.19, "learning_rate": 4.3351679104477614e-05, "loss": 0.0, "step": 57016 }, { "epoch": 53.19, "learning_rate": 4.335121268656717e-05, "loss": 0.0, "step": 57020 }, { "epoch": 53.19, "learning_rate": 4.335074626865672e-05, "loss": 0.0001, "step": 57024 }, { "epoch": 53.2, "learning_rate": 4.335027985074627e-05, "loss": 0.0001, "step": 57028 }, { "epoch": 53.2, "learning_rate": 4.334981343283582e-05, "loss": 0.0, "step": 57032 }, { "epoch": 53.21, "learning_rate": 4.3349347014925375e-05, "loss": 0.0, "step": 57036 }, { "epoch": 53.21, "learning_rate": 4.334888059701493e-05, "loss": 0.0, "step": 57040 }, { "epoch": 53.21, "learning_rate": 4.334841417910448e-05, "loss": 0.0, "step": 57044 }, { "epoch": 53.22, "learning_rate": 4.334794776119403e-05, "loss": 0.0, "step": 57048 }, { "epoch": 53.22, "learning_rate": 4.334748134328359e-05, "loss": 0.0031, "step": 57052 }, { "epoch": 53.22, "learning_rate": 4.3347014925373136e-05, "loss": 0.0001, "step": 57056 }, { "epoch": 53.23, "learning_rate": 4.3346548507462684e-05, "loss": 0.0, "step": 57060 }, { "epoch": 53.23, "learning_rate": 4.3346082089552246e-05, "loss": 0.0117, "step": 57064 }, { "epoch": 53.24, "learning_rate": 4.3345615671641794e-05, "loss": 0.0, "step": 57068 }, { "epoch": 53.24, "learning_rate": 4.334514925373134e-05, "loss": 0.0, "step": 57072 }, { "epoch": 53.24, "learning_rate": 4.33446828358209e-05, "loss": 0.0, "step": 57076 }, { "epoch": 53.25, "learning_rate": 4.334421641791045e-05, "loss": 0.0, "step": 57080 }, { "epoch": 53.25, "learning_rate": 4.334375e-05, "loss": 0.0, "step": 57084 }, { "epoch": 53.25, "learning_rate": 4.3343283582089555e-05, "loss": 0.0006, "step": 57088 }, { "epoch": 53.26, "learning_rate": 4.33428171641791e-05, "loss": 0.0, "step": 57092 }, { "epoch": 53.26, "learning_rate": 4.334235074626866e-05, "loss": 0.0, "step": 57096 }, { "epoch": 53.26, "learning_rate": 4.334188432835821e-05, "loss": 0.0007, "step": 57100 }, { "epoch": 53.27, "learning_rate": 4.334141791044776e-05, "loss": 0.0014, "step": 57104 }, { "epoch": 53.27, "learning_rate": 4.3340951492537316e-05, "loss": 0.0, "step": 57108 }, { "epoch": 53.28, "learning_rate": 4.334048507462687e-05, "loss": 0.0001, "step": 57112 }, { "epoch": 53.28, "learning_rate": 4.334001865671642e-05, "loss": 0.0, "step": 57116 }, { "epoch": 53.28, "learning_rate": 4.3339552238805974e-05, "loss": 0.0065, "step": 57120 }, { "epoch": 53.29, "learning_rate": 4.333908582089553e-05, "loss": 0.0, "step": 57124 }, { "epoch": 53.29, "learning_rate": 4.333861940298508e-05, "loss": 0.0, "step": 57128 }, { "epoch": 53.29, "learning_rate": 4.333815298507463e-05, "loss": 0.0, "step": 57132 }, { "epoch": 53.3, "learning_rate": 4.333768656716418e-05, "loss": 0.0, "step": 57136 }, { "epoch": 53.3, "learning_rate": 4.3337220149253735e-05, "loss": 0.0, "step": 57140 }, { "epoch": 53.31, "learning_rate": 4.333675373134329e-05, "loss": 0.0, "step": 57144 }, { "epoch": 53.31, "learning_rate": 4.333628731343284e-05, "loss": 0.0001, "step": 57148 }, { "epoch": 53.31, "learning_rate": 4.3335820895522386e-05, "loss": 0.0, "step": 57152 }, { "epoch": 53.32, "learning_rate": 4.333535447761195e-05, "loss": 0.0005, "step": 57156 }, { "epoch": 53.32, "learning_rate": 4.3334888059701496e-05, "loss": 0.0, "step": 57160 }, { "epoch": 53.32, "learning_rate": 4.3334421641791044e-05, "loss": 0.0, "step": 57164 }, { "epoch": 53.33, "learning_rate": 4.33339552238806e-05, "loss": 0.0234, "step": 57168 }, { "epoch": 53.33, "learning_rate": 4.3333488805970154e-05, "loss": 0.0, "step": 57172 }, { "epoch": 53.34, "learning_rate": 4.33330223880597e-05, "loss": 0.0, "step": 57176 }, { "epoch": 53.34, "learning_rate": 4.333255597014926e-05, "loss": 0.0001, "step": 57180 }, { "epoch": 53.34, "learning_rate": 4.333208955223881e-05, "loss": 0.0041, "step": 57184 }, { "epoch": 53.35, "learning_rate": 4.333162313432836e-05, "loss": 0.0, "step": 57188 }, { "epoch": 53.35, "learning_rate": 4.3331156716417915e-05, "loss": 0.0, "step": 57192 }, { "epoch": 53.35, "learning_rate": 4.333069029850746e-05, "loss": 0.0, "step": 57196 }, { "epoch": 53.36, "learning_rate": 4.333022388059702e-05, "loss": 0.0, "step": 57200 }, { "epoch": 53.36, "learning_rate": 4.332975746268657e-05, "loss": 0.0001, "step": 57204 }, { "epoch": 53.37, "learning_rate": 4.332929104477612e-05, "loss": 0.0, "step": 57208 }, { "epoch": 53.37, "learning_rate": 4.332882462686567e-05, "loss": 0.0001, "step": 57212 }, { "epoch": 53.37, "learning_rate": 4.332835820895523e-05, "loss": 0.0, "step": 57216 }, { "epoch": 53.38, "learning_rate": 4.332789179104478e-05, "loss": 0.0005, "step": 57220 }, { "epoch": 53.38, "learning_rate": 4.332742537313433e-05, "loss": 0.0, "step": 57224 }, { "epoch": 53.38, "learning_rate": 4.332695895522388e-05, "loss": 0.0, "step": 57228 }, { "epoch": 53.39, "learning_rate": 4.3326492537313437e-05, "loss": 0.0003, "step": 57232 }, { "epoch": 53.39, "learning_rate": 4.3326026119402985e-05, "loss": 0.0, "step": 57236 }, { "epoch": 53.4, "learning_rate": 4.332555970149254e-05, "loss": 0.0, "step": 57240 }, { "epoch": 53.4, "learning_rate": 4.332509328358209e-05, "loss": 0.0, "step": 57244 }, { "epoch": 53.4, "learning_rate": 4.332462686567164e-05, "loss": 0.0, "step": 57248 }, { "epoch": 53.41, "learning_rate": 4.33241604477612e-05, "loss": 0.0, "step": 57252 }, { "epoch": 53.41, "learning_rate": 4.3323694029850746e-05, "loss": 0.0, "step": 57256 }, { "epoch": 53.41, "learning_rate": 4.33232276119403e-05, "loss": 0.0, "step": 57260 }, { "epoch": 53.42, "learning_rate": 4.3322761194029855e-05, "loss": 0.0, "step": 57264 }, { "epoch": 53.42, "learning_rate": 4.3322294776119404e-05, "loss": 0.0, "step": 57268 }, { "epoch": 53.43, "learning_rate": 4.332182835820895e-05, "loss": 0.0, "step": 57272 }, { "epoch": 53.43, "learning_rate": 4.3321361940298513e-05, "loss": 0.0, "step": 57276 }, { "epoch": 53.43, "learning_rate": 4.332089552238806e-05, "loss": 0.0, "step": 57280 }, { "epoch": 53.44, "learning_rate": 4.3320429104477616e-05, "loss": 0.0, "step": 57284 }, { "epoch": 53.44, "learning_rate": 4.3319962686567165e-05, "loss": 0.0, "step": 57288 }, { "epoch": 53.44, "learning_rate": 4.331949626865672e-05, "loss": 0.0, "step": 57292 }, { "epoch": 53.45, "learning_rate": 4.3319029850746274e-05, "loss": 0.0, "step": 57296 }, { "epoch": 53.45, "learning_rate": 4.331856343283582e-05, "loss": 0.0, "step": 57300 }, { "epoch": 53.46, "learning_rate": 4.331809701492537e-05, "loss": 0.0, "step": 57304 }, { "epoch": 53.46, "learning_rate": 4.331763059701493e-05, "loss": 0.0023, "step": 57308 }, { "epoch": 53.46, "learning_rate": 4.331716417910448e-05, "loss": 0.0001, "step": 57312 }, { "epoch": 53.47, "learning_rate": 4.331669776119403e-05, "loss": 0.0, "step": 57316 }, { "epoch": 53.47, "learning_rate": 4.3316231343283584e-05, "loss": 0.0006, "step": 57320 }, { "epoch": 53.47, "learning_rate": 4.331576492537314e-05, "loss": 0.0091, "step": 57324 }, { "epoch": 53.48, "learning_rate": 4.3315298507462687e-05, "loss": 0.0, "step": 57328 }, { "epoch": 53.48, "learning_rate": 4.331483208955224e-05, "loss": 0.0, "step": 57332 }, { "epoch": 53.49, "learning_rate": 4.3314365671641796e-05, "loss": 0.0, "step": 57336 }, { "epoch": 53.49, "learning_rate": 4.3313899253731345e-05, "loss": 0.0, "step": 57340 }, { "epoch": 53.49, "learning_rate": 4.33134328358209e-05, "loss": 0.0002, "step": 57344 }, { "epoch": 53.5, "learning_rate": 4.331296641791045e-05, "loss": 0.0, "step": 57348 }, { "epoch": 53.5, "learning_rate": 4.33125e-05, "loss": 0.0, "step": 57352 }, { "epoch": 53.5, "learning_rate": 4.331203358208956e-05, "loss": 0.0022, "step": 57356 }, { "epoch": 53.51, "learning_rate": 4.3311567164179106e-05, "loss": 0.0, "step": 57360 }, { "epoch": 53.51, "learning_rate": 4.3311100746268654e-05, "loss": 0.0, "step": 57364 }, { "epoch": 53.51, "learning_rate": 4.3310634328358215e-05, "loss": 0.0, "step": 57368 }, { "epoch": 53.52, "learning_rate": 4.3310167910447763e-05, "loss": 0.0001, "step": 57372 }, { "epoch": 53.52, "learning_rate": 4.330970149253731e-05, "loss": 0.0, "step": 57376 }, { "epoch": 53.53, "learning_rate": 4.3309235074626866e-05, "loss": 0.0, "step": 57380 }, { "epoch": 53.53, "learning_rate": 4.330876865671642e-05, "loss": 0.0, "step": 57384 }, { "epoch": 53.53, "learning_rate": 4.330830223880597e-05, "loss": 0.002, "step": 57388 }, { "epoch": 53.54, "learning_rate": 4.3307835820895524e-05, "loss": 0.0001, "step": 57392 }, { "epoch": 53.54, "learning_rate": 4.330736940298508e-05, "loss": 0.0001, "step": 57396 }, { "epoch": 53.54, "learning_rate": 4.330690298507463e-05, "loss": 0.0, "step": 57400 }, { "epoch": 53.55, "learning_rate": 4.330643656716418e-05, "loss": 0.0, "step": 57404 }, { "epoch": 53.55, "learning_rate": 4.330597014925373e-05, "loss": 0.0, "step": 57408 }, { "epoch": 53.56, "learning_rate": 4.3305503731343285e-05, "loss": 0.0021, "step": 57412 }, { "epoch": 53.56, "learning_rate": 4.330503731343284e-05, "loss": 0.0, "step": 57416 }, { "epoch": 53.56, "learning_rate": 4.330457089552239e-05, "loss": 0.0008, "step": 57420 }, { "epoch": 53.57, "learning_rate": 4.3304104477611937e-05, "loss": 0.0001, "step": 57424 }, { "epoch": 53.57, "learning_rate": 4.33036380597015e-05, "loss": 0.0, "step": 57428 }, { "epoch": 53.57, "learning_rate": 4.3303171641791046e-05, "loss": 0.0, "step": 57432 }, { "epoch": 53.58, "learning_rate": 4.3302705223880595e-05, "loss": 0.0005, "step": 57436 }, { "epoch": 53.58, "learning_rate": 4.330223880597015e-05, "loss": 0.0, "step": 57440 }, { "epoch": 53.59, "learning_rate": 4.3301772388059704e-05, "loss": 0.0, "step": 57444 }, { "epoch": 53.59, "learning_rate": 4.330130597014926e-05, "loss": 0.0, "step": 57448 }, { "epoch": 53.59, "learning_rate": 4.330083955223881e-05, "loss": 0.0, "step": 57452 }, { "epoch": 53.6, "learning_rate": 4.330037313432836e-05, "loss": 0.0, "step": 57456 }, { "epoch": 53.6, "learning_rate": 4.329990671641792e-05, "loss": 0.0001, "step": 57460 }, { "epoch": 53.6, "learning_rate": 4.3299440298507465e-05, "loss": 0.0, "step": 57464 }, { "epoch": 53.61, "learning_rate": 4.3298973880597013e-05, "loss": 0.0014, "step": 57468 }, { "epoch": 53.61, "learning_rate": 4.329850746268657e-05, "loss": 0.0, "step": 57472 }, { "epoch": 53.62, "learning_rate": 4.329804104477612e-05, "loss": 0.0, "step": 57476 }, { "epoch": 53.62, "learning_rate": 4.329757462686567e-05, "loss": 0.0076, "step": 57480 }, { "epoch": 53.62, "learning_rate": 4.3297108208955226e-05, "loss": 0.0, "step": 57484 }, { "epoch": 53.63, "learning_rate": 4.329664179104478e-05, "loss": 0.0, "step": 57488 }, { "epoch": 53.63, "learning_rate": 4.329617537313433e-05, "loss": 0.0, "step": 57492 }, { "epoch": 53.63, "learning_rate": 4.3295708955223884e-05, "loss": 0.0, "step": 57496 }, { "epoch": 53.64, "learning_rate": 4.329524253731343e-05, "loss": 0.001, "step": 57500 }, { "epoch": 53.64, "eval_exact_match": 0.7446808510638298, "eval_exec": 0.7823984526112185, "eval_loss": 0.4641968011856079, "eval_runtime": 1155.653, "eval_samples_per_second": 0.895, "step": 57500 }, { "epoch": 53.64, "learning_rate": 4.329477611940299e-05, "loss": 0.0, "step": 57504 }, { "epoch": 53.65, "learning_rate": 4.329430970149254e-05, "loss": 0.0, "step": 57508 }, { "epoch": 53.65, "learning_rate": 4.329384328358209e-05, "loss": 0.0005, "step": 57512 }, { "epoch": 53.65, "learning_rate": 4.3293376865671645e-05, "loss": 0.0, "step": 57516 }, { "epoch": 53.66, "learning_rate": 4.32929104477612e-05, "loss": 0.0051, "step": 57520 }, { "epoch": 53.66, "learning_rate": 4.329244402985075e-05, "loss": 0.0001, "step": 57524 }, { "epoch": 53.66, "learning_rate": 4.3291977611940296e-05, "loss": 0.0, "step": 57528 }, { "epoch": 53.67, "learning_rate": 4.329151119402985e-05, "loss": 0.0, "step": 57532 }, { "epoch": 53.67, "learning_rate": 4.3291044776119406e-05, "loss": 0.0, "step": 57536 }, { "epoch": 53.68, "learning_rate": 4.3290578358208954e-05, "loss": 0.0, "step": 57540 }, { "epoch": 53.68, "learning_rate": 4.329011194029851e-05, "loss": 0.0, "step": 57544 }, { "epoch": 53.68, "learning_rate": 4.3289645522388064e-05, "loss": 0.0, "step": 57548 }, { "epoch": 53.69, "learning_rate": 4.328917910447761e-05, "loss": 0.0008, "step": 57552 }, { "epoch": 53.69, "learning_rate": 4.328871268656717e-05, "loss": 0.0002, "step": 57556 }, { "epoch": 53.69, "learning_rate": 4.3288246268656715e-05, "loss": 0.0001, "step": 57560 }, { "epoch": 53.7, "learning_rate": 4.328777985074627e-05, "loss": 0.0, "step": 57564 }, { "epoch": 53.7, "learning_rate": 4.3287313432835825e-05, "loss": 0.0, "step": 57568 }, { "epoch": 53.71, "learning_rate": 4.328684701492537e-05, "loss": 0.0, "step": 57572 }, { "epoch": 53.71, "learning_rate": 4.328638059701493e-05, "loss": 0.0, "step": 57576 }, { "epoch": 53.71, "learning_rate": 4.328591417910448e-05, "loss": 0.0015, "step": 57580 }, { "epoch": 53.72, "learning_rate": 4.328544776119403e-05, "loss": 0.0, "step": 57584 }, { "epoch": 53.72, "learning_rate": 4.328498134328358e-05, "loss": 0.0013, "step": 57588 }, { "epoch": 53.72, "learning_rate": 4.3284514925373134e-05, "loss": 0.0, "step": 57592 }, { "epoch": 53.73, "learning_rate": 4.328404850746269e-05, "loss": 0.0049, "step": 57596 }, { "epoch": 53.73, "learning_rate": 4.328358208955224e-05, "loss": 0.0, "step": 57600 }, { "epoch": 53.73, "learning_rate": 4.328311567164179e-05, "loss": 0.0, "step": 57604 }, { "epoch": 53.74, "learning_rate": 4.328264925373135e-05, "loss": 0.0, "step": 57608 }, { "epoch": 53.74, "learning_rate": 4.32821828358209e-05, "loss": 0.0, "step": 57612 }, { "epoch": 53.75, "learning_rate": 4.328171641791045e-05, "loss": 0.0001, "step": 57616 }, { "epoch": 53.75, "learning_rate": 4.328125e-05, "loss": 0.0006, "step": 57620 }, { "epoch": 53.75, "learning_rate": 4.328078358208956e-05, "loss": 0.0, "step": 57624 }, { "epoch": 53.76, "learning_rate": 4.328031716417911e-05, "loss": 0.0, "step": 57628 }, { "epoch": 53.76, "learning_rate": 4.3279850746268656e-05, "loss": 0.0004, "step": 57632 }, { "epoch": 53.76, "learning_rate": 4.327938432835821e-05, "loss": 0.0, "step": 57636 }, { "epoch": 53.77, "learning_rate": 4.3278917910447766e-05, "loss": 0.0, "step": 57640 }, { "epoch": 53.77, "learning_rate": 4.3278451492537314e-05, "loss": 0.0, "step": 57644 }, { "epoch": 53.78, "learning_rate": 4.327798507462687e-05, "loss": 0.0, "step": 57648 }, { "epoch": 53.78, "learning_rate": 4.327751865671642e-05, "loss": 0.0, "step": 57652 }, { "epoch": 53.78, "learning_rate": 4.327705223880597e-05, "loss": 0.0, "step": 57656 }, { "epoch": 53.79, "learning_rate": 4.327658582089553e-05, "loss": 0.0, "step": 57660 }, { "epoch": 53.79, "learning_rate": 4.3276119402985075e-05, "loss": 0.0001, "step": 57664 }, { "epoch": 53.79, "learning_rate": 4.327565298507463e-05, "loss": 0.0001, "step": 57668 }, { "epoch": 53.8, "learning_rate": 4.3275186567164185e-05, "loss": 0.0001, "step": 57672 }, { "epoch": 53.8, "learning_rate": 4.327472014925373e-05, "loss": 0.0, "step": 57676 }, { "epoch": 53.81, "learning_rate": 4.327425373134328e-05, "loss": 0.0, "step": 57680 }, { "epoch": 53.81, "learning_rate": 4.327378731343284e-05, "loss": 0.0, "step": 57684 }, { "epoch": 53.81, "learning_rate": 4.327332089552239e-05, "loss": 0.0, "step": 57688 }, { "epoch": 53.82, "learning_rate": 4.327285447761194e-05, "loss": 0.0, "step": 57692 }, { "epoch": 53.82, "learning_rate": 4.3272388059701494e-05, "loss": 0.0002, "step": 57696 }, { "epoch": 53.82, "learning_rate": 4.327192164179105e-05, "loss": 0.0, "step": 57700 }, { "epoch": 53.83, "learning_rate": 4.32714552238806e-05, "loss": 0.0, "step": 57704 }, { "epoch": 53.83, "learning_rate": 4.327098880597015e-05, "loss": 0.0, "step": 57708 }, { "epoch": 53.84, "learning_rate": 4.32705223880597e-05, "loss": 0.0, "step": 57712 }, { "epoch": 53.84, "learning_rate": 4.3270055970149255e-05, "loss": 0.0, "step": 57716 }, { "epoch": 53.84, "learning_rate": 4.326958955223881e-05, "loss": 0.0, "step": 57720 }, { "epoch": 53.85, "learning_rate": 4.326912313432836e-05, "loss": 0.0, "step": 57724 }, { "epoch": 53.85, "learning_rate": 4.326865671641791e-05, "loss": 0.0001, "step": 57728 }, { "epoch": 53.85, "learning_rate": 4.326819029850747e-05, "loss": 0.003, "step": 57732 }, { "epoch": 53.86, "learning_rate": 4.3267723880597016e-05, "loss": 0.0, "step": 57736 }, { "epoch": 53.86, "learning_rate": 4.3267257462686564e-05, "loss": 0.0003, "step": 57740 }, { "epoch": 53.87, "learning_rate": 4.3266791044776126e-05, "loss": 0.0, "step": 57744 }, { "epoch": 53.87, "learning_rate": 4.3266324626865674e-05, "loss": 0.0, "step": 57748 }, { "epoch": 53.87, "learning_rate": 4.326585820895522e-05, "loss": 0.0001, "step": 57752 }, { "epoch": 53.88, "learning_rate": 4.326539179104478e-05, "loss": 0.0, "step": 57756 }, { "epoch": 53.88, "learning_rate": 4.326492537313433e-05, "loss": 0.0001, "step": 57760 }, { "epoch": 53.88, "learning_rate": 4.326445895522388e-05, "loss": 0.0002, "step": 57764 }, { "epoch": 53.89, "learning_rate": 4.3263992537313435e-05, "loss": 0.0003, "step": 57768 }, { "epoch": 53.89, "learning_rate": 4.326352611940298e-05, "loss": 0.0, "step": 57772 }, { "epoch": 53.9, "learning_rate": 4.3263059701492545e-05, "loss": 0.0, "step": 57776 }, { "epoch": 53.9, "learning_rate": 4.326259328358209e-05, "loss": 0.0, "step": 57780 }, { "epoch": 53.9, "learning_rate": 4.326212686567164e-05, "loss": 0.0001, "step": 57784 }, { "epoch": 53.91, "learning_rate": 4.3261660447761196e-05, "loss": 0.0, "step": 57788 }, { "epoch": 53.91, "learning_rate": 4.326119402985075e-05, "loss": 0.0, "step": 57792 }, { "epoch": 53.91, "learning_rate": 4.32607276119403e-05, "loss": 0.0, "step": 57796 }, { "epoch": 53.92, "learning_rate": 4.3260261194029854e-05, "loss": 0.0, "step": 57800 }, { "epoch": 53.92, "learning_rate": 4.325979477611941e-05, "loss": 0.0, "step": 57804 }, { "epoch": 53.93, "learning_rate": 4.325932835820896e-05, "loss": 0.0, "step": 57808 }, { "epoch": 53.93, "learning_rate": 4.325886194029851e-05, "loss": 0.0, "step": 57812 }, { "epoch": 53.93, "learning_rate": 4.325839552238806e-05, "loss": 0.0, "step": 57816 }, { "epoch": 53.94, "learning_rate": 4.3257929104477615e-05, "loss": 0.0, "step": 57820 }, { "epoch": 53.94, "learning_rate": 4.325746268656717e-05, "loss": 0.0, "step": 57824 }, { "epoch": 53.94, "learning_rate": 4.325699626865672e-05, "loss": 0.0001, "step": 57828 }, { "epoch": 53.95, "learning_rate": 4.3256529850746266e-05, "loss": 0.0, "step": 57832 }, { "epoch": 53.95, "learning_rate": 4.325606343283583e-05, "loss": 0.0, "step": 57836 }, { "epoch": 53.96, "learning_rate": 4.3255597014925376e-05, "loss": 0.0, "step": 57840 }, { "epoch": 53.96, "learning_rate": 4.3255130597014924e-05, "loss": 0.0, "step": 57844 }, { "epoch": 53.96, "learning_rate": 4.325466417910448e-05, "loss": 0.0, "step": 57848 }, { "epoch": 53.97, "learning_rate": 4.3254197761194034e-05, "loss": 0.002, "step": 57852 }, { "epoch": 53.97, "learning_rate": 4.325373134328358e-05, "loss": 0.0, "step": 57856 }, { "epoch": 53.97, "learning_rate": 4.325326492537314e-05, "loss": 0.0, "step": 57860 }, { "epoch": 53.98, "learning_rate": 4.325279850746269e-05, "loss": 0.0, "step": 57864 }, { "epoch": 53.98, "learning_rate": 4.325233208955224e-05, "loss": 0.0, "step": 57868 }, { "epoch": 53.98, "learning_rate": 4.3251865671641795e-05, "loss": 0.0003, "step": 57872 }, { "epoch": 53.99, "learning_rate": 4.325139925373134e-05, "loss": 0.0004, "step": 57876 }, { "epoch": 53.99, "learning_rate": 4.32509328358209e-05, "loss": 0.0002, "step": 57880 }, { "epoch": 54.0, "learning_rate": 4.325046641791045e-05, "loss": 0.0, "step": 57884 }, { "epoch": 54.0, "learning_rate": 4.325e-05, "loss": 0.0001, "step": 57888 }, { "epoch": 54.0, "learning_rate": 4.324953358208955e-05, "loss": 0.0, "step": 57892 }, { "epoch": 54.01, "learning_rate": 4.324906716417911e-05, "loss": 0.0, "step": 57896 }, { "epoch": 54.01, "learning_rate": 4.324860074626866e-05, "loss": 0.0, "step": 57900 }, { "epoch": 54.01, "learning_rate": 4.324813432835821e-05, "loss": 0.0, "step": 57904 }, { "epoch": 54.02, "learning_rate": 4.324766791044776e-05, "loss": 0.0, "step": 57908 }, { "epoch": 54.02, "learning_rate": 4.324720149253732e-05, "loss": 0.0, "step": 57912 }, { "epoch": 54.03, "learning_rate": 4.3246735074626865e-05, "loss": 0.0001, "step": 57916 }, { "epoch": 54.03, "learning_rate": 4.324626865671642e-05, "loss": 0.0, "step": 57920 }, { "epoch": 54.03, "learning_rate": 4.324580223880597e-05, "loss": 0.0004, "step": 57924 }, { "epoch": 54.04, "learning_rate": 4.324533582089552e-05, "loss": 0.0, "step": 57928 }, { "epoch": 54.04, "learning_rate": 4.324486940298508e-05, "loss": 0.0, "step": 57932 }, { "epoch": 54.04, "learning_rate": 4.3244402985074626e-05, "loss": 0.0, "step": 57936 }, { "epoch": 54.05, "learning_rate": 4.324393656716418e-05, "loss": 0.0, "step": 57940 }, { "epoch": 54.05, "learning_rate": 4.3243470149253736e-05, "loss": 0.0, "step": 57944 }, { "epoch": 54.06, "learning_rate": 4.3243003731343284e-05, "loss": 0.0001, "step": 57948 }, { "epoch": 54.06, "learning_rate": 4.324253731343284e-05, "loss": 0.0013, "step": 57952 }, { "epoch": 54.06, "learning_rate": 4.3242070895522394e-05, "loss": 0.0, "step": 57956 }, { "epoch": 54.07, "learning_rate": 4.324160447761194e-05, "loss": 0.0001, "step": 57960 }, { "epoch": 54.07, "learning_rate": 4.32411380597015e-05, "loss": 0.0001, "step": 57964 }, { "epoch": 54.07, "learning_rate": 4.3240671641791045e-05, "loss": 0.0, "step": 57968 }, { "epoch": 54.08, "learning_rate": 4.32402052238806e-05, "loss": 0.0, "step": 57972 }, { "epoch": 54.08, "learning_rate": 4.3239738805970155e-05, "loss": 0.0, "step": 57976 }, { "epoch": 54.09, "learning_rate": 4.32392723880597e-05, "loss": 0.0001, "step": 57980 }, { "epoch": 54.09, "learning_rate": 4.323880597014925e-05, "loss": 0.0001, "step": 57984 }, { "epoch": 54.09, "learning_rate": 4.323833955223881e-05, "loss": 0.0, "step": 57988 }, { "epoch": 54.1, "learning_rate": 4.323787313432836e-05, "loss": 0.0, "step": 57992 }, { "epoch": 54.1, "learning_rate": 4.323740671641791e-05, "loss": 0.0002, "step": 57996 }, { "epoch": 54.1, "learning_rate": 4.3236940298507464e-05, "loss": 0.0001, "step": 58000 }, { "epoch": 54.1, "eval_exact_match": 0.7427466150870407, "eval_exec": 0.7785299806576402, "eval_loss": 0.49178797006607056, "eval_runtime": 1102.7711, "eval_samples_per_second": 0.938, "step": 58000 }, { "epoch": 54.11, "learning_rate": 4.323647388059702e-05, "loss": 0.0, "step": 58004 }, { "epoch": 54.11, "learning_rate": 4.323600746268657e-05, "loss": 0.0001, "step": 58008 }, { "epoch": 54.12, "learning_rate": 4.323554104477612e-05, "loss": 0.0, "step": 58012 }, { "epoch": 54.12, "learning_rate": 4.323507462686568e-05, "loss": 0.0008, "step": 58016 }, { "epoch": 54.12, "learning_rate": 4.3234608208955225e-05, "loss": 0.0, "step": 58020 }, { "epoch": 54.13, "learning_rate": 4.323414179104478e-05, "loss": 0.0, "step": 58024 }, { "epoch": 54.13, "learning_rate": 4.323367537313433e-05, "loss": 0.0, "step": 58028 }, { "epoch": 54.13, "learning_rate": 4.323320895522388e-05, "loss": 0.0, "step": 58032 }, { "epoch": 54.14, "learning_rate": 4.323274253731344e-05, "loss": 0.0, "step": 58036 }, { "epoch": 54.14, "learning_rate": 4.3232276119402986e-05, "loss": 0.0, "step": 58040 }, { "epoch": 54.15, "learning_rate": 4.3231809701492534e-05, "loss": 0.0, "step": 58044 }, { "epoch": 54.15, "learning_rate": 4.3231343283582096e-05, "loss": 0.0005, "step": 58048 }, { "epoch": 54.15, "learning_rate": 4.3230876865671644e-05, "loss": 0.0, "step": 58052 }, { "epoch": 54.16, "learning_rate": 4.323041044776119e-05, "loss": 0.0002, "step": 58056 }, { "epoch": 54.16, "learning_rate": 4.322994402985075e-05, "loss": 0.0, "step": 58060 }, { "epoch": 54.16, "learning_rate": 4.32294776119403e-05, "loss": 0.0003, "step": 58064 }, { "epoch": 54.17, "learning_rate": 4.322901119402985e-05, "loss": 0.0, "step": 58068 }, { "epoch": 54.17, "learning_rate": 4.3228544776119405e-05, "loss": 0.0, "step": 58072 }, { "epoch": 54.18, "learning_rate": 4.322807835820896e-05, "loss": 0.0, "step": 58076 }, { "epoch": 54.18, "learning_rate": 4.322761194029851e-05, "loss": 0.0043, "step": 58080 }, { "epoch": 54.18, "learning_rate": 4.322714552238806e-05, "loss": 0.0, "step": 58084 }, { "epoch": 54.19, "learning_rate": 4.322667910447761e-05, "loss": 0.0, "step": 58088 }, { "epoch": 54.19, "learning_rate": 4.3226212686567166e-05, "loss": 0.0, "step": 58092 }, { "epoch": 54.19, "learning_rate": 4.322574626865672e-05, "loss": 0.0, "step": 58096 }, { "epoch": 54.2, "learning_rate": 4.322527985074627e-05, "loss": 0.0, "step": 58100 }, { "epoch": 54.2, "learning_rate": 4.3224813432835824e-05, "loss": 0.0, "step": 58104 }, { "epoch": 54.21, "learning_rate": 4.322434701492538e-05, "loss": 0.0, "step": 58108 }, { "epoch": 54.21, "learning_rate": 4.322388059701493e-05, "loss": 0.0, "step": 58112 }, { "epoch": 54.21, "learning_rate": 4.322341417910448e-05, "loss": 0.0003, "step": 58116 }, { "epoch": 54.22, "learning_rate": 4.322294776119403e-05, "loss": 0.0082, "step": 58120 }, { "epoch": 54.22, "learning_rate": 4.3222481343283585e-05, "loss": 0.0, "step": 58124 }, { "epoch": 54.22, "learning_rate": 4.322201492537314e-05, "loss": 0.0003, "step": 58128 }, { "epoch": 54.23, "learning_rate": 4.322154850746269e-05, "loss": 0.0063, "step": 58132 }, { "epoch": 54.23, "learning_rate": 4.322108208955224e-05, "loss": 0.0, "step": 58136 }, { "epoch": 54.24, "learning_rate": 4.32206156716418e-05, "loss": 0.0001, "step": 58140 }, { "epoch": 54.24, "learning_rate": 4.3220149253731346e-05, "loss": 0.0002, "step": 58144 }, { "epoch": 54.24, "learning_rate": 4.3219682835820894e-05, "loss": 0.0, "step": 58148 }, { "epoch": 54.25, "learning_rate": 4.321921641791045e-05, "loss": 0.0004, "step": 58152 }, { "epoch": 54.25, "learning_rate": 4.3218750000000004e-05, "loss": 0.0001, "step": 58156 }, { "epoch": 54.25, "learning_rate": 4.321828358208955e-05, "loss": 0.0, "step": 58160 }, { "epoch": 54.26, "learning_rate": 4.321781716417911e-05, "loss": 0.0, "step": 58164 }, { "epoch": 54.26, "learning_rate": 4.321735074626866e-05, "loss": 0.0, "step": 58168 }, { "epoch": 54.26, "learning_rate": 4.321688432835821e-05, "loss": 0.0, "step": 58172 }, { "epoch": 54.27, "learning_rate": 4.3216417910447765e-05, "loss": 0.0, "step": 58176 }, { "epoch": 54.27, "learning_rate": 4.321595149253731e-05, "loss": 0.0, "step": 58180 }, { "epoch": 54.28, "learning_rate": 4.321548507462687e-05, "loss": 0.0, "step": 58184 }, { "epoch": 54.28, "learning_rate": 4.321501865671642e-05, "loss": 0.0001, "step": 58188 }, { "epoch": 54.28, "learning_rate": 4.321455223880597e-05, "loss": 0.0, "step": 58192 }, { "epoch": 54.29, "learning_rate": 4.3214085820895526e-05, "loss": 0.001, "step": 58196 }, { "epoch": 54.29, "learning_rate": 4.321361940298508e-05, "loss": 0.0019, "step": 58200 }, { "epoch": 54.29, "learning_rate": 4.321315298507463e-05, "loss": 0.0, "step": 58204 }, { "epoch": 54.3, "learning_rate": 4.321268656716418e-05, "loss": 0.0, "step": 58208 }, { "epoch": 54.3, "learning_rate": 4.321222014925373e-05, "loss": 0.0, "step": 58212 }, { "epoch": 54.31, "learning_rate": 4.3211753731343287e-05, "loss": 0.0026, "step": 58216 }, { "epoch": 54.31, "learning_rate": 4.3211287313432835e-05, "loss": 0.0001, "step": 58220 }, { "epoch": 54.31, "learning_rate": 4.321082089552239e-05, "loss": 0.0003, "step": 58224 }, { "epoch": 54.32, "learning_rate": 4.3210354477611944e-05, "loss": 0.0, "step": 58228 }, { "epoch": 54.32, "learning_rate": 4.320988805970149e-05, "loss": 0.0001, "step": 58232 }, { "epoch": 54.32, "learning_rate": 4.320942164179105e-05, "loss": 0.0054, "step": 58236 }, { "epoch": 54.33, "learning_rate": 4.3208955223880596e-05, "loss": 0.0, "step": 58240 }, { "epoch": 54.33, "learning_rate": 4.320848880597015e-05, "loss": 0.0, "step": 58244 }, { "epoch": 54.34, "learning_rate": 4.3208022388059705e-05, "loss": 0.0, "step": 58248 }, { "epoch": 54.34, "learning_rate": 4.3207555970149254e-05, "loss": 0.0005, "step": 58252 }, { "epoch": 54.34, "learning_rate": 4.320708955223881e-05, "loss": 0.0003, "step": 58256 }, { "epoch": 54.35, "learning_rate": 4.3206623134328363e-05, "loss": 0.0, "step": 58260 }, { "epoch": 54.35, "learning_rate": 4.320615671641791e-05, "loss": 0.0, "step": 58264 }, { "epoch": 54.35, "learning_rate": 4.3205690298507466e-05, "loss": 0.0, "step": 58268 }, { "epoch": 54.36, "learning_rate": 4.3205223880597015e-05, "loss": 0.0, "step": 58272 }, { "epoch": 54.36, "learning_rate": 4.320475746268657e-05, "loss": 0.0, "step": 58276 }, { "epoch": 54.37, "learning_rate": 4.3204291044776124e-05, "loss": 0.0, "step": 58280 }, { "epoch": 54.37, "learning_rate": 4.320382462686567e-05, "loss": 0.0, "step": 58284 }, { "epoch": 54.37, "learning_rate": 4.320335820895523e-05, "loss": 0.0, "step": 58288 }, { "epoch": 54.38, "learning_rate": 4.320289179104478e-05, "loss": 0.0004, "step": 58292 }, { "epoch": 54.38, "learning_rate": 4.320242537313433e-05, "loss": 0.0, "step": 58296 }, { "epoch": 54.38, "learning_rate": 4.320195895522388e-05, "loss": 0.0, "step": 58300 }, { "epoch": 54.39, "learning_rate": 4.320149253731344e-05, "loss": 0.011, "step": 58304 }, { "epoch": 54.39, "learning_rate": 4.320102611940299e-05, "loss": 0.0052, "step": 58308 }, { "epoch": 54.4, "learning_rate": 4.3200559701492537e-05, "loss": 0.0001, "step": 58312 }, { "epoch": 54.4, "learning_rate": 4.320009328358209e-05, "loss": 0.0, "step": 58316 }, { "epoch": 54.4, "learning_rate": 4.3199626865671646e-05, "loss": 0.0, "step": 58320 }, { "epoch": 54.41, "learning_rate": 4.3199160447761195e-05, "loss": 0.0001, "step": 58324 }, { "epoch": 54.41, "learning_rate": 4.319869402985075e-05, "loss": 0.0, "step": 58328 }, { "epoch": 54.41, "learning_rate": 4.31982276119403e-05, "loss": 0.0003, "step": 58332 }, { "epoch": 54.42, "learning_rate": 4.319776119402985e-05, "loss": 0.0001, "step": 58336 }, { "epoch": 54.42, "learning_rate": 4.319729477611941e-05, "loss": 0.001, "step": 58340 }, { "epoch": 54.43, "learning_rate": 4.3196828358208955e-05, "loss": 0.0021, "step": 58344 }, { "epoch": 54.43, "learning_rate": 4.319636194029851e-05, "loss": 0.0003, "step": 58348 }, { "epoch": 54.43, "learning_rate": 4.3195895522388065e-05, "loss": 0.0, "step": 58352 }, { "epoch": 54.44, "learning_rate": 4.3195429104477613e-05, "loss": 0.0001, "step": 58356 }, { "epoch": 54.44, "learning_rate": 4.319496268656716e-05, "loss": 0.0001, "step": 58360 }, { "epoch": 54.44, "learning_rate": 4.319449626865672e-05, "loss": 0.0, "step": 58364 }, { "epoch": 54.45, "learning_rate": 4.319402985074627e-05, "loss": 0.0, "step": 58368 }, { "epoch": 54.45, "learning_rate": 4.319356343283582e-05, "loss": 0.0, "step": 58372 }, { "epoch": 54.46, "learning_rate": 4.3193097014925374e-05, "loss": 0.0016, "step": 58376 }, { "epoch": 54.46, "learning_rate": 4.319263059701493e-05, "loss": 0.0, "step": 58380 }, { "epoch": 54.46, "learning_rate": 4.319216417910448e-05, "loss": 0.0001, "step": 58384 }, { "epoch": 54.47, "learning_rate": 4.319169776119403e-05, "loss": 0.0, "step": 58388 }, { "epoch": 54.47, "learning_rate": 4.319123134328358e-05, "loss": 0.0, "step": 58392 }, { "epoch": 54.47, "learning_rate": 4.3190764925373135e-05, "loss": 0.0, "step": 58396 }, { "epoch": 54.48, "learning_rate": 4.319029850746269e-05, "loss": 0.0001, "step": 58400 }, { "epoch": 54.48, "learning_rate": 4.318983208955224e-05, "loss": 0.0, "step": 58404 }, { "epoch": 54.49, "learning_rate": 4.318936567164179e-05, "loss": 0.0001, "step": 58408 }, { "epoch": 54.49, "learning_rate": 4.318889925373135e-05, "loss": 0.0004, "step": 58412 }, { "epoch": 54.49, "learning_rate": 4.3188432835820896e-05, "loss": 0.0004, "step": 58416 }, { "epoch": 54.5, "learning_rate": 4.3187966417910445e-05, "loss": 0.0, "step": 58420 }, { "epoch": 54.5, "learning_rate": 4.3187500000000006e-05, "loss": 0.0001, "step": 58424 }, { "epoch": 54.5, "learning_rate": 4.3187033582089554e-05, "loss": 0.0, "step": 58428 }, { "epoch": 54.51, "learning_rate": 4.318656716417911e-05, "loss": 0.0004, "step": 58432 }, { "epoch": 54.51, "learning_rate": 4.318610074626866e-05, "loss": 0.0001, "step": 58436 }, { "epoch": 54.51, "learning_rate": 4.318563432835821e-05, "loss": 0.0, "step": 58440 }, { "epoch": 54.52, "learning_rate": 4.318516791044777e-05, "loss": 0.0, "step": 58444 }, { "epoch": 54.52, "learning_rate": 4.3184701492537315e-05, "loss": 0.0008, "step": 58448 }, { "epoch": 54.53, "learning_rate": 4.3184235074626863e-05, "loss": 0.0001, "step": 58452 }, { "epoch": 54.53, "learning_rate": 4.3183768656716425e-05, "loss": 0.0, "step": 58456 }, { "epoch": 54.53, "learning_rate": 4.318330223880597e-05, "loss": 0.0, "step": 58460 }, { "epoch": 54.54, "learning_rate": 4.318283582089552e-05, "loss": 0.0017, "step": 58464 }, { "epoch": 54.54, "learning_rate": 4.3182369402985076e-05, "loss": 0.0006, "step": 58468 }, { "epoch": 54.54, "learning_rate": 4.318190298507463e-05, "loss": 0.0001, "step": 58472 }, { "epoch": 54.55, "learning_rate": 4.318143656716418e-05, "loss": 0.0, "step": 58476 }, { "epoch": 54.55, "learning_rate": 4.3180970149253734e-05, "loss": 0.0, "step": 58480 }, { "epoch": 54.56, "learning_rate": 4.318050373134329e-05, "loss": 0.0, "step": 58484 }, { "epoch": 54.56, "learning_rate": 4.318003731343284e-05, "loss": 0.0029, "step": 58488 }, { "epoch": 54.56, "learning_rate": 4.317957089552239e-05, "loss": 0.0, "step": 58492 }, { "epoch": 54.57, "learning_rate": 4.317910447761194e-05, "loss": 0.0005, "step": 58496 }, { "epoch": 54.57, "learning_rate": 4.3178638059701495e-05, "loss": 0.0001, "step": 58500 }, { "epoch": 54.57, "eval_exact_match": 0.7350096711798839, "eval_exec": 0.7717601547388782, "eval_loss": 0.4618161916732788, "eval_runtime": 1164.5608, "eval_samples_per_second": 0.888, "step": 58500 }, { "epoch": 54.57, "learning_rate": 4.317817164179105e-05, "loss": 0.0, "step": 58504 }, { "epoch": 54.58, "learning_rate": 4.31777052238806e-05, "loss": 0.0, "step": 58508 }, { "epoch": 54.58, "learning_rate": 4.3177238805970146e-05, "loss": 0.0, "step": 58512 }, { "epoch": 54.59, "learning_rate": 4.317677238805971e-05, "loss": 0.0, "step": 58516 }, { "epoch": 54.59, "learning_rate": 4.3176305970149256e-05, "loss": 0.0, "step": 58520 }, { "epoch": 54.59, "learning_rate": 4.3175839552238804e-05, "loss": 0.0, "step": 58524 }, { "epoch": 54.6, "learning_rate": 4.317537313432836e-05, "loss": 0.0, "step": 58528 }, { "epoch": 54.6, "learning_rate": 4.3174906716417914e-05, "loss": 0.0001, "step": 58532 }, { "epoch": 54.6, "learning_rate": 4.317444029850746e-05, "loss": 0.0, "step": 58536 }, { "epoch": 54.61, "learning_rate": 4.317397388059702e-05, "loss": 0.0, "step": 58540 }, { "epoch": 54.61, "learning_rate": 4.317350746268657e-05, "loss": 0.0, "step": 58544 }, { "epoch": 54.62, "learning_rate": 4.317304104477612e-05, "loss": 0.0, "step": 58548 }, { "epoch": 54.62, "learning_rate": 4.3172574626865675e-05, "loss": 0.0, "step": 58552 }, { "epoch": 54.62, "learning_rate": 4.317210820895522e-05, "loss": 0.0, "step": 58556 }, { "epoch": 54.63, "learning_rate": 4.317164179104478e-05, "loss": 0.0, "step": 58560 }, { "epoch": 54.63, "learning_rate": 4.317117537313433e-05, "loss": 0.0, "step": 58564 }, { "epoch": 54.63, "learning_rate": 4.317070895522388e-05, "loss": 0.0, "step": 58568 }, { "epoch": 54.64, "learning_rate": 4.317024253731343e-05, "loss": 0.004, "step": 58572 }, { "epoch": 54.64, "learning_rate": 4.316977611940299e-05, "loss": 0.0, "step": 58576 }, { "epoch": 54.65, "learning_rate": 4.316930970149254e-05, "loss": 0.0, "step": 58580 }, { "epoch": 54.65, "learning_rate": 4.316884328358209e-05, "loss": 0.0, "step": 58584 }, { "epoch": 54.65, "learning_rate": 4.316837686567164e-05, "loss": 0.0, "step": 58588 }, { "epoch": 54.66, "learning_rate": 4.31679104477612e-05, "loss": 0.0, "step": 58592 }, { "epoch": 54.66, "learning_rate": 4.316744402985075e-05, "loss": 0.0, "step": 58596 }, { "epoch": 54.66, "learning_rate": 4.31669776119403e-05, "loss": 0.0, "step": 58600 }, { "epoch": 54.67, "learning_rate": 4.316651119402985e-05, "loss": 0.0, "step": 58604 }, { "epoch": 54.67, "learning_rate": 4.316604477611941e-05, "loss": 0.0, "step": 58608 }, { "epoch": 54.68, "learning_rate": 4.316557835820896e-05, "loss": 0.0, "step": 58612 }, { "epoch": 54.68, "learning_rate": 4.3165111940298506e-05, "loss": 0.0, "step": 58616 }, { "epoch": 54.68, "learning_rate": 4.316464552238806e-05, "loss": 0.0, "step": 58620 }, { "epoch": 54.69, "learning_rate": 4.3164179104477616e-05, "loss": 0.0, "step": 58624 }, { "epoch": 54.69, "learning_rate": 4.3163712686567164e-05, "loss": 0.0, "step": 58628 }, { "epoch": 54.69, "learning_rate": 4.316324626865672e-05, "loss": 0.0001, "step": 58632 }, { "epoch": 54.7, "learning_rate": 4.3162779850746274e-05, "loss": 0.0, "step": 58636 }, { "epoch": 54.7, "learning_rate": 4.316231343283582e-05, "loss": 0.0, "step": 58640 }, { "epoch": 54.71, "learning_rate": 4.316184701492538e-05, "loss": 0.0, "step": 58644 }, { "epoch": 54.71, "learning_rate": 4.3161380597014925e-05, "loss": 0.0, "step": 58648 }, { "epoch": 54.71, "learning_rate": 4.316091417910448e-05, "loss": 0.0, "step": 58652 }, { "epoch": 54.72, "learning_rate": 4.3160447761194035e-05, "loss": 0.0, "step": 58656 }, { "epoch": 54.72, "learning_rate": 4.315998134328358e-05, "loss": 0.0, "step": 58660 }, { "epoch": 54.72, "learning_rate": 4.315951492537313e-05, "loss": 0.0001, "step": 58664 }, { "epoch": 54.73, "learning_rate": 4.315904850746269e-05, "loss": 0.0, "step": 58668 }, { "epoch": 54.73, "learning_rate": 4.315858208955224e-05, "loss": 0.0, "step": 58672 }, { "epoch": 54.73, "learning_rate": 4.315811567164179e-05, "loss": 0.0, "step": 58676 }, { "epoch": 54.74, "learning_rate": 4.3157649253731344e-05, "loss": 0.0004, "step": 58680 }, { "epoch": 54.74, "learning_rate": 4.31571828358209e-05, "loss": 0.0, "step": 58684 }, { "epoch": 54.75, "learning_rate": 4.315671641791045e-05, "loss": 0.0, "step": 58688 }, { "epoch": 54.75, "learning_rate": 4.315625e-05, "loss": 0.0, "step": 58692 }, { "epoch": 54.75, "learning_rate": 4.315578358208956e-05, "loss": 0.0, "step": 58696 }, { "epoch": 54.76, "learning_rate": 4.3155317164179105e-05, "loss": 0.0023, "step": 58700 }, { "epoch": 54.76, "learning_rate": 4.315485074626866e-05, "loss": 0.0039, "step": 58704 }, { "epoch": 54.76, "learning_rate": 4.315438432835821e-05, "loss": 0.0, "step": 58708 }, { "epoch": 54.77, "learning_rate": 4.315391791044776e-05, "loss": 0.0001, "step": 58712 }, { "epoch": 54.77, "learning_rate": 4.315345149253732e-05, "loss": 0.0, "step": 58716 }, { "epoch": 54.78, "learning_rate": 4.3152985074626866e-05, "loss": 0.0002, "step": 58720 }, { "epoch": 54.78, "learning_rate": 4.3152518656716414e-05, "loss": 0.0, "step": 58724 }, { "epoch": 54.78, "learning_rate": 4.3152052238805976e-05, "loss": 0.0, "step": 58728 }, { "epoch": 54.79, "learning_rate": 4.3151585820895524e-05, "loss": 0.0127, "step": 58732 }, { "epoch": 54.79, "learning_rate": 4.315111940298507e-05, "loss": 0.0, "step": 58736 }, { "epoch": 54.79, "learning_rate": 4.315065298507463e-05, "loss": 0.0006, "step": 58740 }, { "epoch": 54.8, "learning_rate": 4.315018656716418e-05, "loss": 0.0, "step": 58744 }, { "epoch": 54.8, "learning_rate": 4.314972014925374e-05, "loss": 0.0, "step": 58748 }, { "epoch": 54.81, "learning_rate": 4.3149253731343285e-05, "loss": 0.0009, "step": 58752 }, { "epoch": 54.81, "learning_rate": 4.314878731343284e-05, "loss": 0.0003, "step": 58756 }, { "epoch": 54.81, "learning_rate": 4.3148320895522395e-05, "loss": 0.0002, "step": 58760 }, { "epoch": 54.82, "learning_rate": 4.314785447761194e-05, "loss": 0.0001, "step": 58764 }, { "epoch": 54.82, "learning_rate": 4.314738805970149e-05, "loss": 0.0011, "step": 58768 }, { "epoch": 54.82, "learning_rate": 4.3146921641791046e-05, "loss": 0.0, "step": 58772 }, { "epoch": 54.83, "learning_rate": 4.31464552238806e-05, "loss": 0.0, "step": 58776 }, { "epoch": 54.83, "learning_rate": 4.314598880597015e-05, "loss": 0.0, "step": 58780 }, { "epoch": 54.84, "learning_rate": 4.3145522388059704e-05, "loss": 0.0, "step": 58784 }, { "epoch": 54.84, "learning_rate": 4.314505597014926e-05, "loss": 0.0003, "step": 58788 }, { "epoch": 54.84, "learning_rate": 4.314458955223881e-05, "loss": 0.0003, "step": 58792 }, { "epoch": 54.85, "learning_rate": 4.314412313432836e-05, "loss": 0.0005, "step": 58796 }, { "epoch": 54.85, "learning_rate": 4.314365671641791e-05, "loss": 0.0005, "step": 58800 }, { "epoch": 54.85, "learning_rate": 4.3143190298507465e-05, "loss": 0.0, "step": 58804 }, { "epoch": 54.86, "learning_rate": 4.314272388059702e-05, "loss": 0.0001, "step": 58808 }, { "epoch": 54.86, "learning_rate": 4.314225746268657e-05, "loss": 0.0, "step": 58812 }, { "epoch": 54.87, "learning_rate": 4.314179104477612e-05, "loss": 0.0, "step": 58816 }, { "epoch": 54.87, "learning_rate": 4.314132462686568e-05, "loss": 0.0002, "step": 58820 }, { "epoch": 54.87, "learning_rate": 4.3140858208955226e-05, "loss": 0.0, "step": 58824 }, { "epoch": 54.88, "learning_rate": 4.3140391791044774e-05, "loss": 0.0011, "step": 58828 }, { "epoch": 54.88, "learning_rate": 4.313992537313433e-05, "loss": 0.0, "step": 58832 }, { "epoch": 54.88, "learning_rate": 4.3139458955223884e-05, "loss": 0.0, "step": 58836 }, { "epoch": 54.89, "learning_rate": 4.313899253731343e-05, "loss": 0.0, "step": 58840 }, { "epoch": 54.89, "learning_rate": 4.313852611940299e-05, "loss": 0.0, "step": 58844 }, { "epoch": 54.9, "learning_rate": 4.313805970149254e-05, "loss": 0.0001, "step": 58848 }, { "epoch": 54.9, "learning_rate": 4.313759328358209e-05, "loss": 0.0, "step": 58852 }, { "epoch": 54.9, "learning_rate": 4.3137126865671645e-05, "loss": 0.0, "step": 58856 }, { "epoch": 54.91, "learning_rate": 4.313666044776119e-05, "loss": 0.0001, "step": 58860 }, { "epoch": 54.91, "learning_rate": 4.313619402985075e-05, "loss": 0.0001, "step": 58864 }, { "epoch": 54.91, "learning_rate": 4.31357276119403e-05, "loss": 0.0005, "step": 58868 }, { "epoch": 54.92, "learning_rate": 4.313526119402985e-05, "loss": 0.0, "step": 58872 }, { "epoch": 54.92, "learning_rate": 4.3134794776119406e-05, "loss": 0.0002, "step": 58876 }, { "epoch": 54.93, "learning_rate": 4.313432835820896e-05, "loss": 0.0, "step": 58880 }, { "epoch": 54.93, "learning_rate": 4.313386194029851e-05, "loss": 0.0006, "step": 58884 }, { "epoch": 54.93, "learning_rate": 4.313339552238806e-05, "loss": 0.0, "step": 58888 }, { "epoch": 54.94, "learning_rate": 4.313292910447761e-05, "loss": 0.0, "step": 58892 }, { "epoch": 54.94, "learning_rate": 4.313246268656717e-05, "loss": 0.0003, "step": 58896 }, { "epoch": 54.94, "learning_rate": 4.3131996268656715e-05, "loss": 0.0002, "step": 58900 }, { "epoch": 54.95, "learning_rate": 4.313152985074627e-05, "loss": 0.0, "step": 58904 }, { "epoch": 54.95, "learning_rate": 4.3131063432835825e-05, "loss": 0.0, "step": 58908 }, { "epoch": 54.96, "learning_rate": 4.313059701492538e-05, "loss": 0.0, "step": 58912 }, { "epoch": 54.96, "learning_rate": 4.313013059701493e-05, "loss": 0.0, "step": 58916 }, { "epoch": 54.96, "learning_rate": 4.3129664179104476e-05, "loss": 0.0001, "step": 58920 }, { "epoch": 54.97, "learning_rate": 4.312919776119404e-05, "loss": 0.0, "step": 58924 }, { "epoch": 54.97, "learning_rate": 4.3128731343283586e-05, "loss": 0.0, "step": 58928 }, { "epoch": 54.97, "learning_rate": 4.3128264925373134e-05, "loss": 0.0001, "step": 58932 }, { "epoch": 54.98, "learning_rate": 4.312779850746269e-05, "loss": 0.0, "step": 58936 }, { "epoch": 54.98, "learning_rate": 4.3127332089552244e-05, "loss": 0.0001, "step": 58940 }, { "epoch": 54.98, "learning_rate": 4.312686567164179e-05, "loss": 0.0003, "step": 58944 }, { "epoch": 54.99, "learning_rate": 4.312639925373135e-05, "loss": 0.0, "step": 58948 }, { "epoch": 54.99, "learning_rate": 4.3125932835820895e-05, "loss": 0.0, "step": 58952 }, { "epoch": 55.0, "learning_rate": 4.312546641791045e-05, "loss": 0.0008, "step": 58956 }, { "epoch": 55.0, "learning_rate": 4.3125000000000005e-05, "loss": 0.0004, "step": 58960 }, { "epoch": 55.0, "learning_rate": 4.312453358208955e-05, "loss": 0.0001, "step": 58964 }, { "epoch": 55.01, "learning_rate": 4.312406716417911e-05, "loss": 0.0, "step": 58968 }, { "epoch": 55.01, "learning_rate": 4.312360074626866e-05, "loss": 0.0, "step": 58972 }, { "epoch": 55.01, "learning_rate": 4.312313432835821e-05, "loss": 0.0, "step": 58976 }, { "epoch": 55.02, "learning_rate": 4.312266791044776e-05, "loss": 0.0, "step": 58980 }, { "epoch": 55.02, "learning_rate": 4.312220149253732e-05, "loss": 0.0, "step": 58984 }, { "epoch": 55.03, "learning_rate": 4.312173507462687e-05, "loss": 0.0002, "step": 58988 }, { "epoch": 55.03, "learning_rate": 4.312126865671642e-05, "loss": 0.0001, "step": 58992 }, { "epoch": 55.03, "learning_rate": 4.312080223880597e-05, "loss": 0.0, "step": 58996 }, { "epoch": 55.04, "learning_rate": 4.312033582089553e-05, "loss": 0.0, "step": 59000 }, { "epoch": 55.04, "eval_exact_match": 0.7446808510638298, "eval_exec": 0.781431334622824, "eval_loss": 0.4688223898410797, "eval_runtime": 1134.1914, "eval_samples_per_second": 0.912, "step": 59000 }, { "epoch": 55.04, "learning_rate": 4.3119869402985075e-05, "loss": 0.0026, "step": 59004 }, { "epoch": 55.04, "learning_rate": 4.311940298507463e-05, "loss": 0.0, "step": 59008 }, { "epoch": 55.05, "learning_rate": 4.311893656716418e-05, "loss": 0.0, "step": 59012 }, { "epoch": 55.05, "learning_rate": 4.311847014925373e-05, "loss": 0.0, "step": 59016 }, { "epoch": 55.06, "learning_rate": 4.311800373134329e-05, "loss": 0.0095, "step": 59020 }, { "epoch": 55.06, "learning_rate": 4.3117537313432836e-05, "loss": 0.0001, "step": 59024 }, { "epoch": 55.06, "learning_rate": 4.311707089552239e-05, "loss": 0.0, "step": 59028 }, { "epoch": 55.07, "learning_rate": 4.3116604477611946e-05, "loss": 0.0001, "step": 59032 }, { "epoch": 55.07, "learning_rate": 4.3116138059701494e-05, "loss": 0.0, "step": 59036 }, { "epoch": 55.07, "learning_rate": 4.311567164179104e-05, "loss": 0.0, "step": 59040 }, { "epoch": 55.08, "learning_rate": 4.3115205223880604e-05, "loss": 0.0, "step": 59044 }, { "epoch": 55.08, "learning_rate": 4.311473880597015e-05, "loss": 0.0002, "step": 59048 }, { "epoch": 55.09, "learning_rate": 4.31142723880597e-05, "loss": 0.0, "step": 59052 }, { "epoch": 55.09, "learning_rate": 4.3113805970149255e-05, "loss": 0.0, "step": 59056 }, { "epoch": 55.09, "learning_rate": 4.311333955223881e-05, "loss": 0.0, "step": 59060 }, { "epoch": 55.1, "learning_rate": 4.311287313432836e-05, "loss": 0.0004, "step": 59064 }, { "epoch": 55.1, "learning_rate": 4.311240671641791e-05, "loss": 0.0002, "step": 59068 }, { "epoch": 55.1, "learning_rate": 4.311194029850746e-05, "loss": 0.0, "step": 59072 }, { "epoch": 55.11, "learning_rate": 4.311147388059702e-05, "loss": 0.0008, "step": 59076 }, { "epoch": 55.11, "learning_rate": 4.311100746268657e-05, "loss": 0.0, "step": 59080 }, { "epoch": 55.12, "learning_rate": 4.311054104477612e-05, "loss": 0.0003, "step": 59084 }, { "epoch": 55.12, "learning_rate": 4.3110074626865674e-05, "loss": 0.0, "step": 59088 }, { "epoch": 55.12, "learning_rate": 4.310960820895523e-05, "loss": 0.0023, "step": 59092 }, { "epoch": 55.13, "learning_rate": 4.310914179104478e-05, "loss": 0.001, "step": 59096 }, { "epoch": 55.13, "learning_rate": 4.310867537313433e-05, "loss": 0.0, "step": 59100 }, { "epoch": 55.13, "learning_rate": 4.3108208955223886e-05, "loss": 0.0, "step": 59104 }, { "epoch": 55.14, "learning_rate": 4.3107742537313435e-05, "loss": 0.0007, "step": 59108 }, { "epoch": 55.14, "learning_rate": 4.310727611940299e-05, "loss": 0.0, "step": 59112 }, { "epoch": 55.15, "learning_rate": 4.310680970149254e-05, "loss": 0.0, "step": 59116 }, { "epoch": 55.15, "learning_rate": 4.310634328358209e-05, "loss": 0.0045, "step": 59120 }, { "epoch": 55.15, "learning_rate": 4.310587686567165e-05, "loss": 0.0012, "step": 59124 }, { "epoch": 55.16, "learning_rate": 4.3105410447761196e-05, "loss": 0.0028, "step": 59128 }, { "epoch": 55.16, "learning_rate": 4.3104944029850744e-05, "loss": 0.0, "step": 59132 }, { "epoch": 55.16, "learning_rate": 4.3104477611940305e-05, "loss": 0.0, "step": 59136 }, { "epoch": 55.17, "learning_rate": 4.3104011194029854e-05, "loss": 0.0, "step": 59140 }, { "epoch": 55.17, "learning_rate": 4.31035447761194e-05, "loss": 0.0, "step": 59144 }, { "epoch": 55.18, "learning_rate": 4.3103078358208957e-05, "loss": 0.0, "step": 59148 }, { "epoch": 55.18, "learning_rate": 4.310261194029851e-05, "loss": 0.0011, "step": 59152 }, { "epoch": 55.18, "learning_rate": 4.310214552238806e-05, "loss": 0.0, "step": 59156 }, { "epoch": 55.19, "learning_rate": 4.3101679104477615e-05, "loss": 0.0, "step": 59160 }, { "epoch": 55.19, "learning_rate": 4.310121268656717e-05, "loss": 0.0, "step": 59164 }, { "epoch": 55.19, "learning_rate": 4.310074626865672e-05, "loss": 0.0, "step": 59168 }, { "epoch": 55.2, "learning_rate": 4.310027985074627e-05, "loss": 0.0, "step": 59172 }, { "epoch": 55.2, "learning_rate": 4.309981343283582e-05, "loss": 0.0, "step": 59176 }, { "epoch": 55.21, "learning_rate": 4.3099347014925376e-05, "loss": 0.0, "step": 59180 }, { "epoch": 55.21, "learning_rate": 4.309888059701493e-05, "loss": 0.0001, "step": 59184 }, { "epoch": 55.21, "learning_rate": 4.309841417910448e-05, "loss": 0.0, "step": 59188 }, { "epoch": 55.22, "learning_rate": 4.309794776119403e-05, "loss": 0.0001, "step": 59192 }, { "epoch": 55.22, "learning_rate": 4.309748134328359e-05, "loss": 0.0001, "step": 59196 }, { "epoch": 55.22, "learning_rate": 4.3097014925373137e-05, "loss": 0.0, "step": 59200 }, { "epoch": 55.23, "learning_rate": 4.3096548507462685e-05, "loss": 0.0001, "step": 59204 }, { "epoch": 55.23, "learning_rate": 4.309608208955224e-05, "loss": 0.0, "step": 59208 }, { "epoch": 55.24, "learning_rate": 4.3095615671641794e-05, "loss": 0.0, "step": 59212 }, { "epoch": 55.24, "learning_rate": 4.309514925373134e-05, "loss": 0.0044, "step": 59216 }, { "epoch": 55.24, "learning_rate": 4.30946828358209e-05, "loss": 0.0009, "step": 59220 }, { "epoch": 55.25, "learning_rate": 4.309421641791045e-05, "loss": 0.0001, "step": 59224 }, { "epoch": 55.25, "learning_rate": 4.309375e-05, "loss": 0.0, "step": 59228 }, { "epoch": 55.25, "learning_rate": 4.3093283582089555e-05, "loss": 0.0001, "step": 59232 }, { "epoch": 55.26, "learning_rate": 4.3092817164179104e-05, "loss": 0.0, "step": 59236 }, { "epoch": 55.26, "learning_rate": 4.309235074626866e-05, "loss": 0.0001, "step": 59240 }, { "epoch": 55.26, "learning_rate": 4.3091884328358213e-05, "loss": 0.0, "step": 59244 }, { "epoch": 55.27, "learning_rate": 4.309141791044776e-05, "loss": 0.0011, "step": 59248 }, { "epoch": 55.27, "learning_rate": 4.3090951492537316e-05, "loss": 0.0, "step": 59252 }, { "epoch": 55.28, "learning_rate": 4.309048507462687e-05, "loss": 0.0, "step": 59256 }, { "epoch": 55.28, "learning_rate": 4.309001865671642e-05, "loss": 0.0, "step": 59260 }, { "epoch": 55.28, "learning_rate": 4.3089552238805974e-05, "loss": 0.0, "step": 59264 }, { "epoch": 55.29, "learning_rate": 4.308908582089552e-05, "loss": 0.0, "step": 59268 }, { "epoch": 55.29, "learning_rate": 4.308861940298508e-05, "loss": 0.0001, "step": 59272 }, { "epoch": 55.29, "learning_rate": 4.308815298507463e-05, "loss": 0.0044, "step": 59276 }, { "epoch": 55.3, "learning_rate": 4.308768656716418e-05, "loss": 0.0002, "step": 59280 }, { "epoch": 55.3, "learning_rate": 4.308722014925373e-05, "loss": 0.0, "step": 59284 }, { "epoch": 55.31, "learning_rate": 4.308675373134329e-05, "loss": 0.0, "step": 59288 }, { "epoch": 55.31, "learning_rate": 4.308628731343284e-05, "loss": 0.0, "step": 59292 }, { "epoch": 55.31, "learning_rate": 4.3085820895522387e-05, "loss": 0.0, "step": 59296 }, { "epoch": 55.32, "learning_rate": 4.308535447761194e-05, "loss": 0.0, "step": 59300 }, { "epoch": 55.32, "learning_rate": 4.3084888059701496e-05, "loss": 0.0, "step": 59304 }, { "epoch": 55.32, "learning_rate": 4.3084421641791044e-05, "loss": 0.0, "step": 59308 }, { "epoch": 55.33, "learning_rate": 4.30839552238806e-05, "loss": 0.0001, "step": 59312 }, { "epoch": 55.33, "learning_rate": 4.3083488805970154e-05, "loss": 0.0, "step": 59316 }, { "epoch": 55.34, "learning_rate": 4.30830223880597e-05, "loss": 0.0, "step": 59320 }, { "epoch": 55.34, "learning_rate": 4.308255597014926e-05, "loss": 0.0, "step": 59324 }, { "epoch": 55.34, "learning_rate": 4.3082089552238805e-05, "loss": 0.0001, "step": 59328 }, { "epoch": 55.35, "learning_rate": 4.308162313432836e-05, "loss": 0.0, "step": 59332 }, { "epoch": 55.35, "learning_rate": 4.3081156716417915e-05, "loss": 0.0, "step": 59336 }, { "epoch": 55.35, "learning_rate": 4.3080690298507463e-05, "loss": 0.0, "step": 59340 }, { "epoch": 55.36, "learning_rate": 4.308022388059701e-05, "loss": 0.0, "step": 59344 }, { "epoch": 55.36, "learning_rate": 4.307975746268657e-05, "loss": 0.0, "step": 59348 }, { "epoch": 55.37, "learning_rate": 4.307929104477612e-05, "loss": 0.0, "step": 59352 }, { "epoch": 55.37, "learning_rate": 4.307882462686567e-05, "loss": 0.0, "step": 59356 }, { "epoch": 55.37, "learning_rate": 4.3078358208955224e-05, "loss": 0.0, "step": 59360 }, { "epoch": 55.38, "learning_rate": 4.307789179104478e-05, "loss": 0.0, "step": 59364 }, { "epoch": 55.38, "learning_rate": 4.307742537313433e-05, "loss": 0.0004, "step": 59368 }, { "epoch": 55.38, "learning_rate": 4.307695895522388e-05, "loss": 0.0, "step": 59372 }, { "epoch": 55.39, "learning_rate": 4.307649253731344e-05, "loss": 0.0, "step": 59376 }, { "epoch": 55.39, "learning_rate": 4.3076026119402985e-05, "loss": 0.0, "step": 59380 }, { "epoch": 55.4, "learning_rate": 4.307555970149254e-05, "loss": 0.0045, "step": 59384 }, { "epoch": 55.4, "learning_rate": 4.307509328358209e-05, "loss": 0.0, "step": 59388 }, { "epoch": 55.4, "learning_rate": 4.307462686567164e-05, "loss": 0.0, "step": 59392 }, { "epoch": 55.41, "learning_rate": 4.30741604477612e-05, "loss": 0.0, "step": 59396 }, { "epoch": 55.41, "learning_rate": 4.3073694029850746e-05, "loss": 0.0001, "step": 59400 }, { "epoch": 55.41, "learning_rate": 4.30732276119403e-05, "loss": 0.0001, "step": 59404 }, { "epoch": 55.42, "learning_rate": 4.3072761194029856e-05, "loss": 0.0, "step": 59408 }, { "epoch": 55.42, "learning_rate": 4.3072294776119404e-05, "loss": 0.0, "step": 59412 }, { "epoch": 55.43, "learning_rate": 4.307182835820896e-05, "loss": 0.0001, "step": 59416 }, { "epoch": 55.43, "learning_rate": 4.307136194029851e-05, "loss": 0.0, "step": 59420 }, { "epoch": 55.43, "learning_rate": 4.307089552238806e-05, "loss": 0.0002, "step": 59424 }, { "epoch": 55.44, "learning_rate": 4.307042910447762e-05, "loss": 0.0025, "step": 59428 }, { "epoch": 55.44, "learning_rate": 4.3069962686567165e-05, "loss": 0.0, "step": 59432 }, { "epoch": 55.44, "learning_rate": 4.306949626865672e-05, "loss": 0.0, "step": 59436 }, { "epoch": 55.45, "learning_rate": 4.3069029850746275e-05, "loss": 0.0, "step": 59440 }, { "epoch": 55.45, "learning_rate": 4.306856343283582e-05, "loss": 0.0001, "step": 59444 }, { "epoch": 55.46, "learning_rate": 4.306809701492537e-05, "loss": 0.0, "step": 59448 }, { "epoch": 55.46, "learning_rate": 4.3067630597014926e-05, "loss": 0.0, "step": 59452 }, { "epoch": 55.46, "learning_rate": 4.306716417910448e-05, "loss": 0.0, "step": 59456 }, { "epoch": 55.47, "learning_rate": 4.306669776119403e-05, "loss": 0.0, "step": 59460 }, { "epoch": 55.47, "learning_rate": 4.3066231343283584e-05, "loss": 0.0, "step": 59464 }, { "epoch": 55.47, "learning_rate": 4.306576492537314e-05, "loss": 0.0, "step": 59468 }, { "epoch": 55.48, "learning_rate": 4.306529850746269e-05, "loss": 0.0003, "step": 59472 }, { "epoch": 55.48, "learning_rate": 4.306483208955224e-05, "loss": 0.0003, "step": 59476 }, { "epoch": 55.49, "learning_rate": 4.306436567164179e-05, "loss": 0.0002, "step": 59480 }, { "epoch": 55.49, "learning_rate": 4.3063899253731345e-05, "loss": 0.0, "step": 59484 }, { "epoch": 55.49, "learning_rate": 4.30634328358209e-05, "loss": 0.0006, "step": 59488 }, { "epoch": 55.5, "learning_rate": 4.306296641791045e-05, "loss": 0.0007, "step": 59492 }, { "epoch": 55.5, "learning_rate": 4.30625e-05, "loss": 0.0002, "step": 59496 }, { "epoch": 55.5, "learning_rate": 4.306203358208956e-05, "loss": 0.0003, "step": 59500 }, { "epoch": 55.5, "eval_exact_match": 0.7446808510638298, "eval_exec": 0.7756286266924565, "eval_loss": 0.48064661026000977, "eval_runtime": 1173.6666, "eval_samples_per_second": 0.881, "step": 59500 }, { "epoch": 55.51, "learning_rate": 4.3061567164179106e-05, "loss": 0.0001, "step": 59504 }, { "epoch": 55.51, "learning_rate": 4.3061100746268654e-05, "loss": 0.0, "step": 59508 }, { "epoch": 55.51, "learning_rate": 4.306063432835821e-05, "loss": 0.0001, "step": 59512 }, { "epoch": 55.52, "learning_rate": 4.3060167910447764e-05, "loss": 0.0001, "step": 59516 }, { "epoch": 55.52, "learning_rate": 4.305970149253731e-05, "loss": 0.0, "step": 59520 }, { "epoch": 55.53, "learning_rate": 4.305923507462687e-05, "loss": 0.001, "step": 59524 }, { "epoch": 55.53, "learning_rate": 4.305876865671642e-05, "loss": 0.0004, "step": 59528 }, { "epoch": 55.53, "learning_rate": 4.305830223880597e-05, "loss": 0.0002, "step": 59532 }, { "epoch": 55.54, "learning_rate": 4.3057835820895525e-05, "loss": 0.0, "step": 59536 }, { "epoch": 55.54, "learning_rate": 4.305736940298507e-05, "loss": 0.0, "step": 59540 }, { "epoch": 55.54, "learning_rate": 4.305690298507463e-05, "loss": 0.0, "step": 59544 }, { "epoch": 55.55, "learning_rate": 4.305643656716418e-05, "loss": 0.0001, "step": 59548 }, { "epoch": 55.55, "learning_rate": 4.305597014925373e-05, "loss": 0.0, "step": 59552 }, { "epoch": 55.56, "learning_rate": 4.3055503731343286e-05, "loss": 0.0003, "step": 59556 }, { "epoch": 55.56, "learning_rate": 4.305503731343284e-05, "loss": 0.0019, "step": 59560 }, { "epoch": 55.56, "learning_rate": 4.305457089552239e-05, "loss": 0.0, "step": 59564 }, { "epoch": 55.57, "learning_rate": 4.3054104477611944e-05, "loss": 0.0, "step": 59568 }, { "epoch": 55.57, "learning_rate": 4.305363805970149e-05, "loss": 0.0012, "step": 59572 }, { "epoch": 55.57, "learning_rate": 4.305317164179105e-05, "loss": 0.0, "step": 59576 }, { "epoch": 55.58, "learning_rate": 4.30527052238806e-05, "loss": 0.0, "step": 59580 }, { "epoch": 55.58, "learning_rate": 4.305223880597015e-05, "loss": 0.0, "step": 59584 }, { "epoch": 55.59, "learning_rate": 4.3051772388059705e-05, "loss": 0.0, "step": 59588 }, { "epoch": 55.59, "learning_rate": 4.305130597014926e-05, "loss": 0.0, "step": 59592 }, { "epoch": 55.59, "learning_rate": 4.305083955223881e-05, "loss": 0.0, "step": 59596 }, { "epoch": 55.6, "learning_rate": 4.3050373134328356e-05, "loss": 0.0003, "step": 59600 }, { "epoch": 55.6, "learning_rate": 4.304990671641792e-05, "loss": 0.0084, "step": 59604 }, { "epoch": 55.6, "learning_rate": 4.3049440298507466e-05, "loss": 0.0001, "step": 59608 }, { "epoch": 55.61, "learning_rate": 4.3048973880597014e-05, "loss": 0.0, "step": 59612 }, { "epoch": 55.61, "learning_rate": 4.304850746268657e-05, "loss": 0.0, "step": 59616 }, { "epoch": 55.62, "learning_rate": 4.3048041044776124e-05, "loss": 0.0, "step": 59620 }, { "epoch": 55.62, "learning_rate": 4.304757462686567e-05, "loss": 0.0, "step": 59624 }, { "epoch": 55.62, "learning_rate": 4.304710820895523e-05, "loss": 0.0, "step": 59628 }, { "epoch": 55.63, "learning_rate": 4.3046641791044775e-05, "loss": 0.0, "step": 59632 }, { "epoch": 55.63, "learning_rate": 4.304617537313433e-05, "loss": 0.0, "step": 59636 }, { "epoch": 55.63, "learning_rate": 4.3045708955223885e-05, "loss": 0.0, "step": 59640 }, { "epoch": 55.64, "learning_rate": 4.304524253731343e-05, "loss": 0.0, "step": 59644 }, { "epoch": 55.64, "learning_rate": 4.304477611940299e-05, "loss": 0.0, "step": 59648 }, { "epoch": 55.65, "learning_rate": 4.304430970149254e-05, "loss": 0.0, "step": 59652 }, { "epoch": 55.65, "learning_rate": 4.304384328358209e-05, "loss": 0.0001, "step": 59656 }, { "epoch": 55.65, "learning_rate": 4.304337686567164e-05, "loss": 0.0, "step": 59660 }, { "epoch": 55.66, "learning_rate": 4.30429104477612e-05, "loss": 0.0, "step": 59664 }, { "epoch": 55.66, "learning_rate": 4.304244402985075e-05, "loss": 0.0, "step": 59668 }, { "epoch": 55.66, "learning_rate": 4.30419776119403e-05, "loss": 0.0, "step": 59672 }, { "epoch": 55.67, "learning_rate": 4.304151119402985e-05, "loss": 0.0, "step": 59676 }, { "epoch": 55.67, "learning_rate": 4.304104477611941e-05, "loss": 0.0, "step": 59680 }, { "epoch": 55.68, "learning_rate": 4.3040578358208955e-05, "loss": 0.0, "step": 59684 }, { "epoch": 55.68, "learning_rate": 4.304011194029851e-05, "loss": 0.0, "step": 59688 }, { "epoch": 55.68, "learning_rate": 4.303964552238806e-05, "loss": 0.0001, "step": 59692 }, { "epoch": 55.69, "learning_rate": 4.303917910447761e-05, "loss": 0.0, "step": 59696 }, { "epoch": 55.69, "learning_rate": 4.303871268656717e-05, "loss": 0.0, "step": 59700 }, { "epoch": 55.69, "learning_rate": 4.3038246268656716e-05, "loss": 0.0, "step": 59704 }, { "epoch": 55.7, "learning_rate": 4.303777985074627e-05, "loss": 0.0, "step": 59708 }, { "epoch": 55.7, "learning_rate": 4.3037313432835826e-05, "loss": 0.0001, "step": 59712 }, { "epoch": 55.71, "learning_rate": 4.3036847014925374e-05, "loss": 0.0, "step": 59716 }, { "epoch": 55.71, "learning_rate": 4.303638059701492e-05, "loss": 0.0, "step": 59720 }, { "epoch": 55.71, "learning_rate": 4.3035914179104484e-05, "loss": 0.0, "step": 59724 }, { "epoch": 55.72, "learning_rate": 4.303544776119403e-05, "loss": 0.0037, "step": 59728 }, { "epoch": 55.72, "learning_rate": 4.303498134328359e-05, "loss": 0.0, "step": 59732 }, { "epoch": 55.72, "learning_rate": 4.3034514925373135e-05, "loss": 0.0, "step": 59736 }, { "epoch": 55.73, "learning_rate": 4.303404850746269e-05, "loss": 0.0, "step": 59740 }, { "epoch": 55.73, "learning_rate": 4.3033582089552245e-05, "loss": 0.0, "step": 59744 }, { "epoch": 55.73, "learning_rate": 4.303311567164179e-05, "loss": 0.0002, "step": 59748 }, { "epoch": 55.74, "learning_rate": 4.303264925373134e-05, "loss": 0.0, "step": 59752 }, { "epoch": 55.74, "learning_rate": 4.30321828358209e-05, "loss": 0.0, "step": 59756 }, { "epoch": 55.75, "learning_rate": 4.303171641791045e-05, "loss": 0.0, "step": 59760 }, { "epoch": 55.75, "learning_rate": 4.303125e-05, "loss": 0.0, "step": 59764 }, { "epoch": 55.75, "learning_rate": 4.3030783582089554e-05, "loss": 0.0, "step": 59768 }, { "epoch": 55.76, "learning_rate": 4.303031716417911e-05, "loss": 0.0001, "step": 59772 }, { "epoch": 55.76, "learning_rate": 4.302985074626866e-05, "loss": 0.0, "step": 59776 }, { "epoch": 55.76, "learning_rate": 4.302938432835821e-05, "loss": 0.0, "step": 59780 }, { "epoch": 55.77, "learning_rate": 4.302891791044777e-05, "loss": 0.0, "step": 59784 }, { "epoch": 55.77, "learning_rate": 4.3028451492537315e-05, "loss": 0.0, "step": 59788 }, { "epoch": 55.78, "learning_rate": 4.302798507462687e-05, "loss": 0.0, "step": 59792 }, { "epoch": 55.78, "learning_rate": 4.302751865671642e-05, "loss": 0.0001, "step": 59796 }, { "epoch": 55.78, "learning_rate": 4.302705223880597e-05, "loss": 0.0, "step": 59800 }, { "epoch": 55.79, "learning_rate": 4.302658582089553e-05, "loss": 0.0, "step": 59804 }, { "epoch": 55.79, "learning_rate": 4.3026119402985076e-05, "loss": 0.0001, "step": 59808 }, { "epoch": 55.79, "learning_rate": 4.3025652985074624e-05, "loss": 0.0, "step": 59812 }, { "epoch": 55.8, "learning_rate": 4.3025186567164186e-05, "loss": 0.0007, "step": 59816 }, { "epoch": 55.8, "learning_rate": 4.3024720149253734e-05, "loss": 0.0, "step": 59820 }, { "epoch": 55.81, "learning_rate": 4.302425373134328e-05, "loss": 0.0, "step": 59824 }, { "epoch": 55.81, "learning_rate": 4.302378731343284e-05, "loss": 0.0003, "step": 59828 }, { "epoch": 55.81, "learning_rate": 4.302332089552239e-05, "loss": 0.0, "step": 59832 }, { "epoch": 55.82, "learning_rate": 4.302285447761194e-05, "loss": 0.0, "step": 59836 }, { "epoch": 55.82, "learning_rate": 4.3022388059701495e-05, "loss": 0.0004, "step": 59840 }, { "epoch": 55.82, "learning_rate": 4.302192164179105e-05, "loss": 0.0001, "step": 59844 }, { "epoch": 55.83, "learning_rate": 4.30214552238806e-05, "loss": 0.0001, "step": 59848 }, { "epoch": 55.83, "learning_rate": 4.302098880597015e-05, "loss": 0.0, "step": 59852 }, { "epoch": 55.84, "learning_rate": 4.30205223880597e-05, "loss": 0.0, "step": 59856 }, { "epoch": 55.84, "learning_rate": 4.3020055970149256e-05, "loss": 0.0, "step": 59860 }, { "epoch": 55.84, "learning_rate": 4.301958955223881e-05, "loss": 0.0, "step": 59864 }, { "epoch": 55.85, "learning_rate": 4.301912313432836e-05, "loss": 0.0015, "step": 59868 }, { "epoch": 55.85, "learning_rate": 4.301865671641791e-05, "loss": 0.0, "step": 59872 }, { "epoch": 55.85, "learning_rate": 4.301819029850747e-05, "loss": 0.0, "step": 59876 }, { "epoch": 55.86, "learning_rate": 4.301772388059702e-05, "loss": 0.0, "step": 59880 }, { "epoch": 55.86, "learning_rate": 4.3017257462686565e-05, "loss": 0.0001, "step": 59884 }, { "epoch": 55.87, "learning_rate": 4.301679104477612e-05, "loss": 0.0001, "step": 59888 }, { "epoch": 55.87, "learning_rate": 4.3016324626865675e-05, "loss": 0.0, "step": 59892 }, { "epoch": 55.87, "learning_rate": 4.301585820895523e-05, "loss": 0.0, "step": 59896 }, { "epoch": 55.88, "learning_rate": 4.301539179104478e-05, "loss": 0.0001, "step": 59900 }, { "epoch": 55.88, "learning_rate": 4.301492537313433e-05, "loss": 0.0, "step": 59904 }, { "epoch": 55.88, "learning_rate": 4.301445895522389e-05, "loss": 0.0, "step": 59908 }, { "epoch": 55.89, "learning_rate": 4.3013992537313436e-05, "loss": 0.0009, "step": 59912 }, { "epoch": 55.89, "learning_rate": 4.3013526119402984e-05, "loss": 0.0, "step": 59916 }, { "epoch": 55.9, "learning_rate": 4.301305970149254e-05, "loss": 0.0, "step": 59920 }, { "epoch": 55.9, "learning_rate": 4.3012593283582094e-05, "loss": 0.0, "step": 59924 }, { "epoch": 55.9, "learning_rate": 4.301212686567164e-05, "loss": 0.0, "step": 59928 }, { "epoch": 55.91, "learning_rate": 4.30116604477612e-05, "loss": 0.0, "step": 59932 }, { "epoch": 55.91, "learning_rate": 4.301119402985075e-05, "loss": 0.0, "step": 59936 }, { "epoch": 55.91, "learning_rate": 4.30107276119403e-05, "loss": 0.0, "step": 59940 }, { "epoch": 55.92, "learning_rate": 4.3010261194029855e-05, "loss": 0.0, "step": 59944 }, { "epoch": 55.92, "learning_rate": 4.30097947761194e-05, "loss": 0.0, "step": 59948 }, { "epoch": 55.93, "learning_rate": 4.300932835820896e-05, "loss": 0.0, "step": 59952 }, { "epoch": 55.93, "learning_rate": 4.300886194029851e-05, "loss": 0.0, "step": 59956 }, { "epoch": 55.93, "learning_rate": 4.300839552238806e-05, "loss": 0.0, "step": 59960 }, { "epoch": 55.94, "learning_rate": 4.300792910447761e-05, "loss": 0.0, "step": 59964 }, { "epoch": 55.94, "learning_rate": 4.300746268656717e-05, "loss": 0.0008, "step": 59968 }, { "epoch": 55.94, "learning_rate": 4.300699626865672e-05, "loss": 0.0001, "step": 59972 }, { "epoch": 55.95, "learning_rate": 4.300652985074627e-05, "loss": 0.0, "step": 59976 }, { "epoch": 55.95, "learning_rate": 4.300606343283582e-05, "loss": 0.0, "step": 59980 }, { "epoch": 55.96, "learning_rate": 4.300559701492538e-05, "loss": 0.0, "step": 59984 }, { "epoch": 55.96, "learning_rate": 4.3005130597014925e-05, "loss": 0.0004, "step": 59988 }, { "epoch": 55.96, "learning_rate": 4.300466417910448e-05, "loss": 0.0, "step": 59992 }, { "epoch": 55.97, "learning_rate": 4.3004197761194035e-05, "loss": 0.0, "step": 59996 }, { "epoch": 55.97, "learning_rate": 4.300373134328358e-05, "loss": 0.0, "step": 60000 }, { "epoch": 55.97, "eval_exact_match": 0.7514506769825918, "eval_exec": 0.7843326885880078, "eval_loss": 0.5125755667686462, "eval_runtime": 1138.056, "eval_samples_per_second": 0.909, "step": 60000 }, { "epoch": 55.97, "learning_rate": 4.300326492537314e-05, "loss": 0.0, "step": 60004 }, { "epoch": 55.98, "learning_rate": 4.3002798507462686e-05, "loss": 0.0, "step": 60008 }, { "epoch": 55.98, "learning_rate": 4.300233208955224e-05, "loss": 0.0001, "step": 60012 }, { "epoch": 55.98, "learning_rate": 4.3001865671641796e-05, "loss": 0.0, "step": 60016 }, { "epoch": 55.99, "learning_rate": 4.3001399253731344e-05, "loss": 0.0, "step": 60020 }, { "epoch": 55.99, "learning_rate": 4.300093283582089e-05, "loss": 0.0, "step": 60024 }, { "epoch": 56.0, "learning_rate": 4.3000466417910454e-05, "loss": 0.0, "step": 60028 }, { "epoch": 56.0, "learning_rate": 4.3e-05, "loss": 0.0, "step": 60032 }, { "epoch": 56.0, "learning_rate": 4.299953358208955e-05, "loss": 0.0, "step": 60036 }, { "epoch": 56.01, "learning_rate": 4.2999067164179105e-05, "loss": 0.0001, "step": 60040 }, { "epoch": 56.01, "learning_rate": 4.299860074626866e-05, "loss": 0.0001, "step": 60044 }, { "epoch": 56.01, "learning_rate": 4.299813432835821e-05, "loss": 0.0014, "step": 60048 }, { "epoch": 56.02, "learning_rate": 4.299766791044776e-05, "loss": 0.0, "step": 60052 }, { "epoch": 56.02, "learning_rate": 4.299720149253732e-05, "loss": 0.0, "step": 60056 }, { "epoch": 56.03, "learning_rate": 4.299673507462687e-05, "loss": 0.0, "step": 60060 }, { "epoch": 56.03, "learning_rate": 4.299626865671642e-05, "loss": 0.0011, "step": 60064 }, { "epoch": 56.03, "learning_rate": 4.299580223880597e-05, "loss": 0.0005, "step": 60068 }, { "epoch": 56.04, "learning_rate": 4.299533582089553e-05, "loss": 0.0, "step": 60072 }, { "epoch": 56.04, "learning_rate": 4.299486940298508e-05, "loss": 0.0, "step": 60076 }, { "epoch": 56.04, "learning_rate": 4.299440298507463e-05, "loss": 0.0, "step": 60080 }, { "epoch": 56.05, "learning_rate": 4.299393656716418e-05, "loss": 0.0, "step": 60084 }, { "epoch": 56.05, "learning_rate": 4.2993470149253736e-05, "loss": 0.0, "step": 60088 }, { "epoch": 56.06, "learning_rate": 4.2993003731343285e-05, "loss": 0.0, "step": 60092 }, { "epoch": 56.06, "learning_rate": 4.299253731343284e-05, "loss": 0.0, "step": 60096 }, { "epoch": 56.06, "learning_rate": 4.299207089552239e-05, "loss": 0.0, "step": 60100 }, { "epoch": 56.07, "learning_rate": 4.299160447761194e-05, "loss": 0.0, "step": 60104 }, { "epoch": 56.07, "learning_rate": 4.29911380597015e-05, "loss": 0.0, "step": 60108 }, { "epoch": 56.07, "learning_rate": 4.2990671641791046e-05, "loss": 0.0005, "step": 60112 }, { "epoch": 56.08, "learning_rate": 4.29902052238806e-05, "loss": 0.0001, "step": 60116 }, { "epoch": 56.08, "learning_rate": 4.2989738805970155e-05, "loss": 0.0, "step": 60120 }, { "epoch": 56.09, "learning_rate": 4.2989272388059704e-05, "loss": 0.0, "step": 60124 }, { "epoch": 56.09, "learning_rate": 4.298880597014925e-05, "loss": 0.0001, "step": 60128 }, { "epoch": 56.09, "learning_rate": 4.2988339552238807e-05, "loss": 0.0, "step": 60132 }, { "epoch": 56.1, "learning_rate": 4.298787313432836e-05, "loss": 0.0008, "step": 60136 }, { "epoch": 56.1, "learning_rate": 4.298740671641791e-05, "loss": 0.0001, "step": 60140 }, { "epoch": 56.1, "learning_rate": 4.2986940298507465e-05, "loss": 0.0009, "step": 60144 }, { "epoch": 56.11, "learning_rate": 4.298647388059702e-05, "loss": 0.0, "step": 60148 }, { "epoch": 56.11, "learning_rate": 4.298600746268657e-05, "loss": 0.0021, "step": 60152 }, { "epoch": 56.12, "learning_rate": 4.298554104477612e-05, "loss": 0.0009, "step": 60156 }, { "epoch": 56.12, "learning_rate": 4.298507462686567e-05, "loss": 0.0, "step": 60160 }, { "epoch": 56.12, "learning_rate": 4.2984608208955226e-05, "loss": 0.0001, "step": 60164 }, { "epoch": 56.13, "learning_rate": 4.298414179104478e-05, "loss": 0.0008, "step": 60168 }, { "epoch": 56.13, "learning_rate": 4.298367537313433e-05, "loss": 0.0, "step": 60172 }, { "epoch": 56.13, "learning_rate": 4.2983208955223883e-05, "loss": 0.0, "step": 60176 }, { "epoch": 56.14, "learning_rate": 4.298274253731344e-05, "loss": 0.0, "step": 60180 }, { "epoch": 56.14, "learning_rate": 4.2982276119402987e-05, "loss": 0.0, "step": 60184 }, { "epoch": 56.15, "learning_rate": 4.2981809701492535e-05, "loss": 0.0, "step": 60188 }, { "epoch": 56.15, "learning_rate": 4.298134328358209e-05, "loss": 0.0, "step": 60192 }, { "epoch": 56.15, "learning_rate": 4.2980876865671644e-05, "loss": 0.0, "step": 60196 }, { "epoch": 56.16, "learning_rate": 4.298041044776119e-05, "loss": 0.0001, "step": 60200 }, { "epoch": 56.16, "learning_rate": 4.297994402985075e-05, "loss": 0.001, "step": 60204 }, { "epoch": 56.16, "learning_rate": 4.29794776119403e-05, "loss": 0.0, "step": 60208 }, { "epoch": 56.17, "learning_rate": 4.297901119402985e-05, "loss": 0.0, "step": 60212 }, { "epoch": 56.17, "learning_rate": 4.2978544776119405e-05, "loss": 0.0, "step": 60216 }, { "epoch": 56.18, "learning_rate": 4.2978078358208954e-05, "loss": 0.0, "step": 60220 }, { "epoch": 56.18, "learning_rate": 4.2977611940298515e-05, "loss": 0.0, "step": 60224 }, { "epoch": 56.18, "learning_rate": 4.297714552238806e-05, "loss": 0.0019, "step": 60228 }, { "epoch": 56.19, "learning_rate": 4.297667910447761e-05, "loss": 0.0, "step": 60232 }, { "epoch": 56.19, "learning_rate": 4.2976212686567166e-05, "loss": 0.0, "step": 60236 }, { "epoch": 56.19, "learning_rate": 4.297574626865672e-05, "loss": 0.0001, "step": 60240 }, { "epoch": 56.2, "learning_rate": 4.297527985074627e-05, "loss": 0.0001, "step": 60244 }, { "epoch": 56.2, "learning_rate": 4.2974813432835824e-05, "loss": 0.0, "step": 60248 }, { "epoch": 56.21, "learning_rate": 4.297434701492537e-05, "loss": 0.0, "step": 60252 }, { "epoch": 56.21, "learning_rate": 4.297388059701493e-05, "loss": 0.0056, "step": 60256 }, { "epoch": 56.21, "learning_rate": 4.297341417910448e-05, "loss": 0.0, "step": 60260 }, { "epoch": 56.22, "learning_rate": 4.297294776119403e-05, "loss": 0.0, "step": 60264 }, { "epoch": 56.22, "learning_rate": 4.2972481343283585e-05, "loss": 0.0, "step": 60268 }, { "epoch": 56.22, "learning_rate": 4.297201492537314e-05, "loss": 0.0, "step": 60272 }, { "epoch": 56.23, "learning_rate": 4.297154850746269e-05, "loss": 0.0, "step": 60276 }, { "epoch": 56.23, "learning_rate": 4.2971082089552237e-05, "loss": 0.0, "step": 60280 }, { "epoch": 56.24, "learning_rate": 4.29706156716418e-05, "loss": 0.0002, "step": 60284 }, { "epoch": 56.24, "learning_rate": 4.2970149253731346e-05, "loss": 0.0, "step": 60288 }, { "epoch": 56.24, "learning_rate": 4.2969682835820894e-05, "loss": 0.0, "step": 60292 }, { "epoch": 56.25, "learning_rate": 4.296921641791045e-05, "loss": 0.0, "step": 60296 }, { "epoch": 56.25, "learning_rate": 4.2968750000000004e-05, "loss": 0.0, "step": 60300 }, { "epoch": 56.25, "learning_rate": 4.296828358208955e-05, "loss": 0.0, "step": 60304 }, { "epoch": 56.26, "learning_rate": 4.296781716417911e-05, "loss": 0.0, "step": 60308 }, { "epoch": 56.26, "learning_rate": 4.2967350746268655e-05, "loss": 0.0, "step": 60312 }, { "epoch": 56.26, "learning_rate": 4.296688432835821e-05, "loss": 0.0, "step": 60316 }, { "epoch": 56.27, "learning_rate": 4.2966417910447765e-05, "loss": 0.0, "step": 60320 }, { "epoch": 56.27, "learning_rate": 4.2965951492537313e-05, "loss": 0.0, "step": 60324 }, { "epoch": 56.28, "learning_rate": 4.296548507462687e-05, "loss": 0.0, "step": 60328 }, { "epoch": 56.28, "learning_rate": 4.296501865671642e-05, "loss": 0.0, "step": 60332 }, { "epoch": 56.28, "learning_rate": 4.296455223880597e-05, "loss": 0.0005, "step": 60336 }, { "epoch": 56.29, "learning_rate": 4.296408582089552e-05, "loss": 0.0, "step": 60340 }, { "epoch": 56.29, "learning_rate": 4.296361940298508e-05, "loss": 0.0, "step": 60344 }, { "epoch": 56.29, "learning_rate": 4.296315298507463e-05, "loss": 0.0, "step": 60348 }, { "epoch": 56.3, "learning_rate": 4.296268656716418e-05, "loss": 0.0, "step": 60352 }, { "epoch": 56.3, "learning_rate": 4.296222014925373e-05, "loss": 0.0, "step": 60356 }, { "epoch": 56.31, "learning_rate": 4.296175373134329e-05, "loss": 0.0004, "step": 60360 }, { "epoch": 56.31, "learning_rate": 4.2961287313432835e-05, "loss": 0.0, "step": 60364 }, { "epoch": 56.31, "learning_rate": 4.296082089552239e-05, "loss": 0.0, "step": 60368 }, { "epoch": 56.32, "learning_rate": 4.296035447761194e-05, "loss": 0.0, "step": 60372 }, { "epoch": 56.32, "learning_rate": 4.295988805970149e-05, "loss": 0.0, "step": 60376 }, { "epoch": 56.32, "learning_rate": 4.295942164179105e-05, "loss": 0.0, "step": 60380 }, { "epoch": 56.33, "learning_rate": 4.2958955223880596e-05, "loss": 0.0, "step": 60384 }, { "epoch": 56.33, "learning_rate": 4.295848880597015e-05, "loss": 0.001, "step": 60388 }, { "epoch": 56.34, "learning_rate": 4.2958022388059706e-05, "loss": 0.0, "step": 60392 }, { "epoch": 56.34, "learning_rate": 4.2957555970149254e-05, "loss": 0.0, "step": 60396 }, { "epoch": 56.34, "learning_rate": 4.295708955223881e-05, "loss": 0.0032, "step": 60400 }, { "epoch": 56.35, "learning_rate": 4.2956623134328364e-05, "loss": 0.0, "step": 60404 }, { "epoch": 56.35, "learning_rate": 4.295615671641791e-05, "loss": 0.0, "step": 60408 }, { "epoch": 56.35, "learning_rate": 4.295569029850747e-05, "loss": 0.0, "step": 60412 }, { "epoch": 56.36, "learning_rate": 4.2955223880597015e-05, "loss": 0.0001, "step": 60416 }, { "epoch": 56.36, "learning_rate": 4.295475746268657e-05, "loss": 0.0008, "step": 60420 }, { "epoch": 56.37, "learning_rate": 4.2954291044776125e-05, "loss": 0.0, "step": 60424 }, { "epoch": 56.37, "learning_rate": 4.295382462686567e-05, "loss": 0.0, "step": 60428 }, { "epoch": 56.37, "learning_rate": 4.295335820895522e-05, "loss": 0.0, "step": 60432 }, { "epoch": 56.38, "learning_rate": 4.295289179104478e-05, "loss": 0.001, "step": 60436 }, { "epoch": 56.38, "learning_rate": 4.295242537313433e-05, "loss": 0.0001, "step": 60440 }, { "epoch": 56.38, "learning_rate": 4.295195895522388e-05, "loss": 0.0003, "step": 60444 }, { "epoch": 56.39, "learning_rate": 4.2951492537313434e-05, "loss": 0.0, "step": 60448 }, { "epoch": 56.39, "learning_rate": 4.295102611940299e-05, "loss": 0.0, "step": 60452 }, { "epoch": 56.4, "learning_rate": 4.295055970149254e-05, "loss": 0.0, "step": 60456 }, { "epoch": 56.4, "learning_rate": 4.295009328358209e-05, "loss": 0.0, "step": 60460 }, { "epoch": 56.4, "learning_rate": 4.294962686567165e-05, "loss": 0.0, "step": 60464 }, { "epoch": 56.41, "learning_rate": 4.2949160447761195e-05, "loss": 0.0, "step": 60468 }, { "epoch": 56.41, "learning_rate": 4.294869402985075e-05, "loss": 0.0, "step": 60472 }, { "epoch": 56.41, "learning_rate": 4.29482276119403e-05, "loss": 0.0, "step": 60476 }, { "epoch": 56.42, "learning_rate": 4.294776119402985e-05, "loss": 0.0004, "step": 60480 }, { "epoch": 56.42, "learning_rate": 4.294729477611941e-05, "loss": 0.0088, "step": 60484 }, { "epoch": 56.43, "learning_rate": 4.2946828358208956e-05, "loss": 0.0, "step": 60488 }, { "epoch": 56.43, "learning_rate": 4.2946361940298504e-05, "loss": 0.0, "step": 60492 }, { "epoch": 56.43, "learning_rate": 4.2945895522388066e-05, "loss": 0.0009, "step": 60496 }, { "epoch": 56.44, "learning_rate": 4.2945429104477614e-05, "loss": 0.0002, "step": 60500 }, { "epoch": 56.44, "eval_exact_match": 0.7524177949709865, "eval_exec": 0.7833655705996132, "eval_loss": 0.480731725692749, "eval_runtime": 1132.1694, "eval_samples_per_second": 0.913, "step": 60500 }, { "epoch": 56.44, "learning_rate": 4.294496268656716e-05, "loss": 0.0, "step": 60504 }, { "epoch": 56.44, "learning_rate": 4.294449626865672e-05, "loss": 0.0, "step": 60508 }, { "epoch": 56.45, "learning_rate": 4.294402985074627e-05, "loss": 0.0, "step": 60512 }, { "epoch": 56.45, "learning_rate": 4.294356343283582e-05, "loss": 0.0, "step": 60516 }, { "epoch": 56.46, "learning_rate": 4.2943097014925375e-05, "loss": 0.0, "step": 60520 }, { "epoch": 56.46, "learning_rate": 4.294263059701493e-05, "loss": 0.0, "step": 60524 }, { "epoch": 56.46, "learning_rate": 4.294216417910448e-05, "loss": 0.0, "step": 60528 }, { "epoch": 56.47, "learning_rate": 4.294169776119403e-05, "loss": 0.0, "step": 60532 }, { "epoch": 56.47, "learning_rate": 4.294123134328358e-05, "loss": 0.0, "step": 60536 }, { "epoch": 56.47, "learning_rate": 4.2940764925373136e-05, "loss": 0.0, "step": 60540 }, { "epoch": 56.48, "learning_rate": 4.294029850746269e-05, "loss": 0.0001, "step": 60544 }, { "epoch": 56.48, "learning_rate": 4.293983208955224e-05, "loss": 0.0, "step": 60548 }, { "epoch": 56.49, "learning_rate": 4.2939365671641794e-05, "loss": 0.0, "step": 60552 }, { "epoch": 56.49, "learning_rate": 4.293889925373135e-05, "loss": 0.0013, "step": 60556 }, { "epoch": 56.49, "learning_rate": 4.29384328358209e-05, "loss": 0.0, "step": 60560 }, { "epoch": 56.5, "learning_rate": 4.293796641791045e-05, "loss": 0.0, "step": 60564 }, { "epoch": 56.5, "learning_rate": 4.29375e-05, "loss": 0.0, "step": 60568 }, { "epoch": 56.5, "learning_rate": 4.2937033582089555e-05, "loss": 0.0, "step": 60572 }, { "epoch": 56.51, "learning_rate": 4.293656716417911e-05, "loss": 0.0, "step": 60576 }, { "epoch": 56.51, "learning_rate": 4.293610074626866e-05, "loss": 0.0, "step": 60580 }, { "epoch": 56.51, "learning_rate": 4.293563432835821e-05, "loss": 0.0, "step": 60584 }, { "epoch": 56.52, "learning_rate": 4.293516791044777e-05, "loss": 0.0, "step": 60588 }, { "epoch": 56.52, "learning_rate": 4.2934701492537316e-05, "loss": 0.0103, "step": 60592 }, { "epoch": 56.53, "learning_rate": 4.2934235074626864e-05, "loss": 0.0007, "step": 60596 }, { "epoch": 56.53, "learning_rate": 4.293376865671642e-05, "loss": 0.0001, "step": 60600 }, { "epoch": 56.53, "learning_rate": 4.2933302238805974e-05, "loss": 0.0, "step": 60604 }, { "epoch": 56.54, "learning_rate": 4.293283582089552e-05, "loss": 0.0, "step": 60608 }, { "epoch": 56.54, "learning_rate": 4.293236940298508e-05, "loss": 0.0007, "step": 60612 }, { "epoch": 56.54, "learning_rate": 4.293190298507463e-05, "loss": 0.0, "step": 60616 }, { "epoch": 56.55, "learning_rate": 4.293143656716418e-05, "loss": 0.0002, "step": 60620 }, { "epoch": 56.55, "learning_rate": 4.2930970149253735e-05, "loss": 0.0011, "step": 60624 }, { "epoch": 56.56, "learning_rate": 4.293050373134328e-05, "loss": 0.0001, "step": 60628 }, { "epoch": 56.56, "learning_rate": 4.293003731343284e-05, "loss": 0.0, "step": 60632 }, { "epoch": 56.56, "learning_rate": 4.292957089552239e-05, "loss": 0.0, "step": 60636 }, { "epoch": 56.57, "learning_rate": 4.292910447761194e-05, "loss": 0.0005, "step": 60640 }, { "epoch": 56.57, "learning_rate": 4.292863805970149e-05, "loss": 0.0, "step": 60644 }, { "epoch": 56.57, "learning_rate": 4.292817164179105e-05, "loss": 0.0, "step": 60648 }, { "epoch": 56.58, "learning_rate": 4.29277052238806e-05, "loss": 0.0, "step": 60652 }, { "epoch": 56.58, "learning_rate": 4.292723880597015e-05, "loss": 0.0, "step": 60656 }, { "epoch": 56.59, "learning_rate": 4.29267723880597e-05, "loss": 0.0002, "step": 60660 }, { "epoch": 56.59, "learning_rate": 4.292630597014926e-05, "loss": 0.0, "step": 60664 }, { "epoch": 56.59, "learning_rate": 4.2925839552238805e-05, "loss": 0.0001, "step": 60668 }, { "epoch": 56.6, "learning_rate": 4.292537313432836e-05, "loss": 0.0, "step": 60672 }, { "epoch": 56.6, "learning_rate": 4.2924906716417915e-05, "loss": 0.0001, "step": 60676 }, { "epoch": 56.6, "learning_rate": 4.292444029850746e-05, "loss": 0.0, "step": 60680 }, { "epoch": 56.61, "learning_rate": 4.292397388059702e-05, "loss": 0.0, "step": 60684 }, { "epoch": 56.61, "learning_rate": 4.2923507462686566e-05, "loss": 0.0, "step": 60688 }, { "epoch": 56.62, "learning_rate": 4.292304104477612e-05, "loss": 0.0, "step": 60692 }, { "epoch": 56.62, "learning_rate": 4.2922574626865676e-05, "loss": 0.0, "step": 60696 }, { "epoch": 56.62, "learning_rate": 4.2922108208955224e-05, "loss": 0.0001, "step": 60700 }, { "epoch": 56.63, "learning_rate": 4.292164179104477e-05, "loss": 0.0, "step": 60704 }, { "epoch": 56.63, "learning_rate": 4.2921175373134334e-05, "loss": 0.0, "step": 60708 }, { "epoch": 56.63, "learning_rate": 4.292070895522388e-05, "loss": 0.0, "step": 60712 }, { "epoch": 56.64, "learning_rate": 4.292024253731344e-05, "loss": 0.0, "step": 60716 }, { "epoch": 56.64, "learning_rate": 4.2919776119402985e-05, "loss": 0.0, "step": 60720 }, { "epoch": 56.65, "learning_rate": 4.291930970149254e-05, "loss": 0.0003, "step": 60724 }, { "epoch": 56.65, "learning_rate": 4.2918843283582095e-05, "loss": 0.0016, "step": 60728 }, { "epoch": 56.65, "learning_rate": 4.291837686567164e-05, "loss": 0.0197, "step": 60732 }, { "epoch": 56.66, "learning_rate": 4.29179104477612e-05, "loss": 0.0, "step": 60736 }, { "epoch": 56.66, "learning_rate": 4.291744402985075e-05, "loss": 0.0001, "step": 60740 }, { "epoch": 56.66, "learning_rate": 4.29169776119403e-05, "loss": 0.0, "step": 60744 }, { "epoch": 56.67, "learning_rate": 4.291651119402985e-05, "loss": 0.0002, "step": 60748 }, { "epoch": 56.67, "learning_rate": 4.291604477611941e-05, "loss": 0.0, "step": 60752 }, { "epoch": 56.68, "learning_rate": 4.291557835820896e-05, "loss": 0.0, "step": 60756 }, { "epoch": 56.68, "learning_rate": 4.291511194029851e-05, "loss": 0.0, "step": 60760 }, { "epoch": 56.68, "learning_rate": 4.291464552238806e-05, "loss": 0.0, "step": 60764 }, { "epoch": 56.69, "learning_rate": 4.291417910447762e-05, "loss": 0.0, "step": 60768 }, { "epoch": 56.69, "learning_rate": 4.2913712686567165e-05, "loss": 0.0, "step": 60772 }, { "epoch": 56.69, "learning_rate": 4.291324626865672e-05, "loss": 0.0, "step": 60776 }, { "epoch": 56.7, "learning_rate": 4.291277985074627e-05, "loss": 0.0001, "step": 60780 }, { "epoch": 56.7, "learning_rate": 4.291231343283582e-05, "loss": 0.0, "step": 60784 }, { "epoch": 56.71, "learning_rate": 4.291184701492538e-05, "loss": 0.0002, "step": 60788 }, { "epoch": 56.71, "learning_rate": 4.2911380597014926e-05, "loss": 0.0, "step": 60792 }, { "epoch": 56.71, "learning_rate": 4.291091417910448e-05, "loss": 0.0, "step": 60796 }, { "epoch": 56.72, "learning_rate": 4.2910447761194036e-05, "loss": 0.0001, "step": 60800 }, { "epoch": 56.72, "learning_rate": 4.2909981343283584e-05, "loss": 0.0001, "step": 60804 }, { "epoch": 56.72, "learning_rate": 4.290951492537313e-05, "loss": 0.0006, "step": 60808 }, { "epoch": 56.73, "learning_rate": 4.290904850746269e-05, "loss": 0.0, "step": 60812 }, { "epoch": 56.73, "learning_rate": 4.290858208955224e-05, "loss": 0.0006, "step": 60816 }, { "epoch": 56.73, "learning_rate": 4.290811567164179e-05, "loss": 0.0, "step": 60820 }, { "epoch": 56.74, "learning_rate": 4.2907649253731345e-05, "loss": 0.0, "step": 60824 }, { "epoch": 56.74, "learning_rate": 4.29071828358209e-05, "loss": 0.0, "step": 60828 }, { "epoch": 56.75, "learning_rate": 4.290671641791045e-05, "loss": 0.0011, "step": 60832 }, { "epoch": 56.75, "learning_rate": 4.290625e-05, "loss": 0.0, "step": 60836 }, { "epoch": 56.75, "learning_rate": 4.290578358208955e-05, "loss": 0.0003, "step": 60840 }, { "epoch": 56.76, "learning_rate": 4.2905317164179106e-05, "loss": 0.0002, "step": 60844 }, { "epoch": 56.76, "learning_rate": 4.290485074626866e-05, "loss": 0.0001, "step": 60848 }, { "epoch": 56.76, "learning_rate": 4.290438432835821e-05, "loss": 0.0, "step": 60852 }, { "epoch": 56.77, "learning_rate": 4.2903917910447764e-05, "loss": 0.0001, "step": 60856 }, { "epoch": 56.77, "learning_rate": 4.290345149253732e-05, "loss": 0.0, "step": 60860 }, { "epoch": 56.78, "learning_rate": 4.290298507462687e-05, "loss": 0.0, "step": 60864 }, { "epoch": 56.78, "learning_rate": 4.2902518656716415e-05, "loss": 0.0, "step": 60868 }, { "epoch": 56.78, "learning_rate": 4.290205223880597e-05, "loss": 0.001, "step": 60872 }, { "epoch": 56.79, "learning_rate": 4.2901585820895525e-05, "loss": 0.0, "step": 60876 }, { "epoch": 56.79, "learning_rate": 4.290111940298508e-05, "loss": 0.0, "step": 60880 }, { "epoch": 56.79, "learning_rate": 4.290065298507463e-05, "loss": 0.0, "step": 60884 }, { "epoch": 56.8, "learning_rate": 4.290018656716418e-05, "loss": 0.0, "step": 60888 }, { "epoch": 56.8, "learning_rate": 4.289972014925374e-05, "loss": 0.0, "step": 60892 }, { "epoch": 56.81, "learning_rate": 4.2899253731343286e-05, "loss": 0.0004, "step": 60896 }, { "epoch": 56.81, "learning_rate": 4.2898787313432834e-05, "loss": 0.0001, "step": 60900 }, { "epoch": 56.81, "learning_rate": 4.2898320895522396e-05, "loss": 0.0, "step": 60904 }, { "epoch": 56.82, "learning_rate": 4.2897854477611944e-05, "loss": 0.0, "step": 60908 }, { "epoch": 56.82, "learning_rate": 4.289738805970149e-05, "loss": 0.0002, "step": 60912 }, { "epoch": 56.82, "learning_rate": 4.289692164179105e-05, "loss": 0.0, "step": 60916 }, { "epoch": 56.83, "learning_rate": 4.28964552238806e-05, "loss": 0.0001, "step": 60920 }, { "epoch": 56.83, "learning_rate": 4.289598880597015e-05, "loss": 0.0, "step": 60924 }, { "epoch": 56.84, "learning_rate": 4.2895522388059705e-05, "loss": 0.0, "step": 60928 }, { "epoch": 56.84, "learning_rate": 4.289505597014925e-05, "loss": 0.0, "step": 60932 }, { "epoch": 56.84, "learning_rate": 4.289458955223881e-05, "loss": 0.0001, "step": 60936 }, { "epoch": 56.85, "learning_rate": 4.289412313432836e-05, "loss": 0.0, "step": 60940 }, { "epoch": 56.85, "learning_rate": 4.289365671641791e-05, "loss": 0.0001, "step": 60944 }, { "epoch": 56.85, "learning_rate": 4.2893190298507466e-05, "loss": 0.0, "step": 60948 }, { "epoch": 56.86, "learning_rate": 4.289272388059702e-05, "loss": 0.0, "step": 60952 }, { "epoch": 56.86, "learning_rate": 4.289225746268657e-05, "loss": 0.0, "step": 60956 }, { "epoch": 56.87, "learning_rate": 4.289179104477612e-05, "loss": 0.0, "step": 60960 }, { "epoch": 56.87, "learning_rate": 4.289132462686568e-05, "loss": 0.0, "step": 60964 }, { "epoch": 56.87, "learning_rate": 4.289085820895523e-05, "loss": 0.0, "step": 60968 }, { "epoch": 56.88, "learning_rate": 4.2890391791044775e-05, "loss": 0.0, "step": 60972 }, { "epoch": 56.88, "learning_rate": 4.288992537313433e-05, "loss": 0.0, "step": 60976 }, { "epoch": 56.88, "learning_rate": 4.2889458955223885e-05, "loss": 0.0001, "step": 60980 }, { "epoch": 56.89, "learning_rate": 4.288899253731343e-05, "loss": 0.0001, "step": 60984 }, { "epoch": 56.89, "learning_rate": 4.288852611940299e-05, "loss": 0.0, "step": 60988 }, { "epoch": 56.9, "learning_rate": 4.2888059701492536e-05, "loss": 0.0, "step": 60992 }, { "epoch": 56.9, "learning_rate": 4.288759328358209e-05, "loss": 0.0024, "step": 60996 }, { "epoch": 56.9, "learning_rate": 4.2887126865671646e-05, "loss": 0.0, "step": 61000 }, { "epoch": 56.9, "eval_exact_match": 0.7495164410058027, "eval_exec": 0.7775628626692457, "eval_loss": 0.4850376546382904, "eval_runtime": 1127.2294, "eval_samples_per_second": 0.917, "step": 61000 }, { "epoch": 56.91, "learning_rate": 4.2886660447761194e-05, "loss": 0.0001, "step": 61004 }, { "epoch": 56.91, "learning_rate": 4.288619402985075e-05, "loss": 0.0, "step": 61008 }, { "epoch": 56.91, "learning_rate": 4.2885727611940304e-05, "loss": 0.0, "step": 61012 }, { "epoch": 56.92, "learning_rate": 4.288526119402985e-05, "loss": 0.0, "step": 61016 }, { "epoch": 56.92, "learning_rate": 4.28847947761194e-05, "loss": 0.0, "step": 61020 }, { "epoch": 56.93, "learning_rate": 4.288432835820896e-05, "loss": 0.0, "step": 61024 }, { "epoch": 56.93, "learning_rate": 4.288386194029851e-05, "loss": 0.0, "step": 61028 }, { "epoch": 56.93, "learning_rate": 4.288339552238806e-05, "loss": 0.0023, "step": 61032 }, { "epoch": 56.94, "learning_rate": 4.288292910447761e-05, "loss": 0.0001, "step": 61036 }, { "epoch": 56.94, "learning_rate": 4.288246268656717e-05, "loss": 0.0, "step": 61040 }, { "epoch": 56.94, "learning_rate": 4.288199626865672e-05, "loss": 0.0, "step": 61044 }, { "epoch": 56.95, "learning_rate": 4.288152985074627e-05, "loss": 0.0, "step": 61048 }, { "epoch": 56.95, "learning_rate": 4.288106343283582e-05, "loss": 0.0, "step": 61052 }, { "epoch": 56.96, "learning_rate": 4.288059701492538e-05, "loss": 0.0, "step": 61056 }, { "epoch": 56.96, "learning_rate": 4.288013059701493e-05, "loss": 0.0, "step": 61060 }, { "epoch": 56.96, "learning_rate": 4.287966417910448e-05, "loss": 0.0, "step": 61064 }, { "epoch": 56.97, "learning_rate": 4.287919776119403e-05, "loss": 0.0008, "step": 61068 }, { "epoch": 56.97, "learning_rate": 4.2878731343283586e-05, "loss": 0.0, "step": 61072 }, { "epoch": 56.97, "learning_rate": 4.2878264925373135e-05, "loss": 0.0, "step": 61076 }, { "epoch": 56.98, "learning_rate": 4.287779850746269e-05, "loss": 0.0, "step": 61080 }, { "epoch": 56.98, "learning_rate": 4.2877332089552244e-05, "loss": 0.0, "step": 61084 }, { "epoch": 56.98, "learning_rate": 4.287686567164179e-05, "loss": 0.0, "step": 61088 }, { "epoch": 56.99, "learning_rate": 4.287639925373135e-05, "loss": 0.0, "step": 61092 }, { "epoch": 56.99, "learning_rate": 4.2875932835820896e-05, "loss": 0.0001, "step": 61096 }, { "epoch": 57.0, "learning_rate": 4.287546641791045e-05, "loss": 0.0, "step": 61100 }, { "epoch": 57.0, "learning_rate": 4.2875000000000005e-05, "loss": 0.0, "step": 61104 }, { "epoch": 57.0, "learning_rate": 4.2874533582089554e-05, "loss": 0.0003, "step": 61108 }, { "epoch": 57.01, "learning_rate": 4.28740671641791e-05, "loss": 0.0, "step": 61112 }, { "epoch": 57.01, "learning_rate": 4.287360074626866e-05, "loss": 0.0178, "step": 61116 }, { "epoch": 57.01, "learning_rate": 4.287313432835821e-05, "loss": 0.0, "step": 61120 }, { "epoch": 57.02, "learning_rate": 4.287266791044776e-05, "loss": 0.0, "step": 61124 }, { "epoch": 57.02, "learning_rate": 4.2872201492537315e-05, "loss": 0.0, "step": 61128 }, { "epoch": 57.03, "learning_rate": 4.287173507462687e-05, "loss": 0.0001, "step": 61132 }, { "epoch": 57.03, "learning_rate": 4.287126865671642e-05, "loss": 0.0103, "step": 61136 }, { "epoch": 57.03, "learning_rate": 4.287080223880597e-05, "loss": 0.0001, "step": 61140 }, { "epoch": 57.04, "learning_rate": 4.287033582089553e-05, "loss": 0.0, "step": 61144 }, { "epoch": 57.04, "learning_rate": 4.2869869402985075e-05, "loss": 0.0, "step": 61148 }, { "epoch": 57.04, "learning_rate": 4.286940298507463e-05, "loss": 0.0, "step": 61152 }, { "epoch": 57.05, "learning_rate": 4.286893656716418e-05, "loss": 0.0, "step": 61156 }, { "epoch": 57.05, "learning_rate": 4.2868470149253733e-05, "loss": 0.0006, "step": 61160 }, { "epoch": 57.06, "learning_rate": 4.286800373134329e-05, "loss": 0.0, "step": 61164 }, { "epoch": 57.06, "learning_rate": 4.2867537313432836e-05, "loss": 0.0, "step": 61168 }, { "epoch": 57.06, "learning_rate": 4.2867070895522385e-05, "loss": 0.0, "step": 61172 }, { "epoch": 57.07, "learning_rate": 4.2866604477611946e-05, "loss": 0.0, "step": 61176 }, { "epoch": 57.07, "learning_rate": 4.2866138059701494e-05, "loss": 0.0, "step": 61180 }, { "epoch": 57.07, "learning_rate": 4.286567164179104e-05, "loss": 0.0001, "step": 61184 }, { "epoch": 57.08, "learning_rate": 4.28652052238806e-05, "loss": 0.0, "step": 61188 }, { "epoch": 57.08, "learning_rate": 4.286473880597015e-05, "loss": 0.0, "step": 61192 }, { "epoch": 57.09, "learning_rate": 4.28642723880597e-05, "loss": 0.0, "step": 61196 }, { "epoch": 57.09, "learning_rate": 4.2863805970149255e-05, "loss": 0.0, "step": 61200 }, { "epoch": 57.09, "learning_rate": 4.286333955223881e-05, "loss": 0.0011, "step": 61204 }, { "epoch": 57.1, "learning_rate": 4.2862873134328365e-05, "loss": 0.0, "step": 61208 }, { "epoch": 57.1, "learning_rate": 4.286240671641791e-05, "loss": 0.0, "step": 61212 }, { "epoch": 57.1, "learning_rate": 4.286194029850746e-05, "loss": 0.0009, "step": 61216 }, { "epoch": 57.11, "learning_rate": 4.2861473880597016e-05, "loss": 0.0, "step": 61220 }, { "epoch": 57.11, "learning_rate": 4.286100746268657e-05, "loss": 0.0, "step": 61224 }, { "epoch": 57.12, "learning_rate": 4.286054104477612e-05, "loss": 0.0044, "step": 61228 }, { "epoch": 57.12, "learning_rate": 4.2860074626865674e-05, "loss": 0.0, "step": 61232 }, { "epoch": 57.12, "learning_rate": 4.285960820895523e-05, "loss": 0.0, "step": 61236 }, { "epoch": 57.13, "learning_rate": 4.285914179104478e-05, "loss": 0.0, "step": 61240 }, { "epoch": 57.13, "learning_rate": 4.285867537313433e-05, "loss": 0.0001, "step": 61244 }, { "epoch": 57.13, "learning_rate": 4.285820895522388e-05, "loss": 0.0, "step": 61248 }, { "epoch": 57.14, "learning_rate": 4.2857742537313435e-05, "loss": 0.0, "step": 61252 }, { "epoch": 57.14, "learning_rate": 4.285727611940299e-05, "loss": 0.0001, "step": 61256 }, { "epoch": 57.15, "learning_rate": 4.285680970149254e-05, "loss": 0.0, "step": 61260 }, { "epoch": 57.15, "learning_rate": 4.285634328358209e-05, "loss": 0.0, "step": 61264 }, { "epoch": 57.15, "learning_rate": 4.285587686567165e-05, "loss": 0.0, "step": 61268 }, { "epoch": 57.16, "learning_rate": 4.2855410447761196e-05, "loss": 0.0, "step": 61272 }, { "epoch": 57.16, "learning_rate": 4.2854944029850744e-05, "loss": 0.0, "step": 61276 }, { "epoch": 57.16, "learning_rate": 4.28544776119403e-05, "loss": 0.0, "step": 61280 }, { "epoch": 57.17, "learning_rate": 4.2854011194029854e-05, "loss": 0.0, "step": 61284 }, { "epoch": 57.17, "learning_rate": 4.28535447761194e-05, "loss": 0.0, "step": 61288 }, { "epoch": 57.18, "learning_rate": 4.285307835820896e-05, "loss": 0.0, "step": 61292 }, { "epoch": 57.18, "learning_rate": 4.285261194029851e-05, "loss": 0.0, "step": 61296 }, { "epoch": 57.18, "learning_rate": 4.285214552238806e-05, "loss": 0.0, "step": 61300 }, { "epoch": 57.19, "learning_rate": 4.2851679104477615e-05, "loss": 0.0, "step": 61304 }, { "epoch": 57.19, "learning_rate": 4.285121268656716e-05, "loss": 0.0, "step": 61308 }, { "epoch": 57.19, "learning_rate": 4.285074626865672e-05, "loss": 0.0007, "step": 61312 }, { "epoch": 57.2, "learning_rate": 4.285027985074627e-05, "loss": 0.0, "step": 61316 }, { "epoch": 57.2, "learning_rate": 4.284981343283582e-05, "loss": 0.0, "step": 61320 }, { "epoch": 57.21, "learning_rate": 4.284934701492537e-05, "loss": 0.0001, "step": 61324 }, { "epoch": 57.21, "learning_rate": 4.284888059701493e-05, "loss": 0.0, "step": 61328 }, { "epoch": 57.21, "learning_rate": 4.284841417910448e-05, "loss": 0.0008, "step": 61332 }, { "epoch": 57.22, "learning_rate": 4.284794776119403e-05, "loss": 0.0001, "step": 61336 }, { "epoch": 57.22, "learning_rate": 4.284748134328358e-05, "loss": 0.0001, "step": 61340 }, { "epoch": 57.22, "learning_rate": 4.284701492537314e-05, "loss": 0.0003, "step": 61344 }, { "epoch": 57.23, "learning_rate": 4.2846548507462685e-05, "loss": 0.0, "step": 61348 }, { "epoch": 57.23, "learning_rate": 4.284608208955224e-05, "loss": 0.0, "step": 61352 }, { "epoch": 57.24, "learning_rate": 4.2845615671641795e-05, "loss": 0.0, "step": 61356 }, { "epoch": 57.24, "learning_rate": 4.284514925373134e-05, "loss": 0.0001, "step": 61360 }, { "epoch": 57.24, "learning_rate": 4.28446828358209e-05, "loss": 0.0001, "step": 61364 }, { "epoch": 57.25, "learning_rate": 4.2844216417910446e-05, "loss": 0.0, "step": 61368 }, { "epoch": 57.25, "learning_rate": 4.284375000000001e-05, "loss": 0.0, "step": 61372 }, { "epoch": 57.25, "learning_rate": 4.2843283582089556e-05, "loss": 0.0, "step": 61376 }, { "epoch": 57.26, "learning_rate": 4.2842817164179104e-05, "loss": 0.0, "step": 61380 }, { "epoch": 57.26, "learning_rate": 4.284235074626866e-05, "loss": 0.0, "step": 61384 }, { "epoch": 57.26, "learning_rate": 4.2841884328358214e-05, "loss": 0.0, "step": 61388 }, { "epoch": 57.27, "learning_rate": 4.284141791044776e-05, "loss": 0.0, "step": 61392 }, { "epoch": 57.27, "learning_rate": 4.284095149253732e-05, "loss": 0.0001, "step": 61396 }, { "epoch": 57.28, "learning_rate": 4.2840485074626865e-05, "loss": 0.0001, "step": 61400 }, { "epoch": 57.28, "learning_rate": 4.284001865671642e-05, "loss": 0.0, "step": 61404 }, { "epoch": 57.28, "learning_rate": 4.2839552238805975e-05, "loss": 0.0, "step": 61408 }, { "epoch": 57.29, "learning_rate": 4.283908582089552e-05, "loss": 0.0, "step": 61412 }, { "epoch": 57.29, "learning_rate": 4.283861940298508e-05, "loss": 0.0, "step": 61416 }, { "epoch": 57.29, "learning_rate": 4.283815298507463e-05, "loss": 0.0, "step": 61420 }, { "epoch": 57.3, "learning_rate": 4.283768656716418e-05, "loss": 0.0, "step": 61424 }, { "epoch": 57.3, "learning_rate": 4.283722014925373e-05, "loss": 0.0, "step": 61428 }, { "epoch": 57.31, "learning_rate": 4.283675373134329e-05, "loss": 0.0004, "step": 61432 }, { "epoch": 57.31, "learning_rate": 4.283628731343284e-05, "loss": 0.0001, "step": 61436 }, { "epoch": 57.31, "learning_rate": 4.283582089552239e-05, "loss": 0.0, "step": 61440 }, { "epoch": 57.32, "learning_rate": 4.283535447761194e-05, "loss": 0.0004, "step": 61444 }, { "epoch": 57.32, "learning_rate": 4.28348880597015e-05, "loss": 0.0, "step": 61448 }, { "epoch": 57.32, "learning_rate": 4.2834421641791045e-05, "loss": 0.0, "step": 61452 }, { "epoch": 57.33, "learning_rate": 4.28339552238806e-05, "loss": 0.0025, "step": 61456 }, { "epoch": 57.33, "learning_rate": 4.283348880597015e-05, "loss": 0.0002, "step": 61460 }, { "epoch": 57.34, "learning_rate": 4.28330223880597e-05, "loss": 0.0, "step": 61464 }, { "epoch": 57.34, "learning_rate": 4.283255597014926e-05, "loss": 0.0, "step": 61468 }, { "epoch": 57.34, "learning_rate": 4.2832089552238806e-05, "loss": 0.0, "step": 61472 }, { "epoch": 57.35, "learning_rate": 4.283162313432836e-05, "loss": 0.0, "step": 61476 }, { "epoch": 57.35, "learning_rate": 4.2831156716417916e-05, "loss": 0.0, "step": 61480 }, { "epoch": 57.35, "learning_rate": 4.2830690298507464e-05, "loss": 0.0, "step": 61484 }, { "epoch": 57.36, "learning_rate": 4.283022388059701e-05, "loss": 0.0, "step": 61488 }, { "epoch": 57.36, "learning_rate": 4.282975746268657e-05, "loss": 0.0009, "step": 61492 }, { "epoch": 57.37, "learning_rate": 4.282929104477612e-05, "loss": 0.0001, "step": 61496 }, { "epoch": 57.37, "learning_rate": 4.282882462686567e-05, "loss": 0.0, "step": 61500 }, { "epoch": 57.37, "eval_exact_match": 0.7514506769825918, "eval_exec": 0.7843326885880078, "eval_loss": 0.4877242147922516, "eval_runtime": 1153.3063, "eval_samples_per_second": 0.897, "step": 61500 }, { "epoch": 57.37, "learning_rate": 4.2828358208955225e-05, "loss": 0.0, "step": 61504 }, { "epoch": 57.38, "learning_rate": 4.282789179104478e-05, "loss": 0.0, "step": 61508 }, { "epoch": 57.38, "learning_rate": 4.282742537313433e-05, "loss": 0.0001, "step": 61512 }, { "epoch": 57.38, "learning_rate": 4.282695895522388e-05, "loss": 0.0, "step": 61516 }, { "epoch": 57.39, "learning_rate": 4.282649253731343e-05, "loss": 0.0, "step": 61520 }, { "epoch": 57.39, "learning_rate": 4.2826026119402986e-05, "loss": 0.0038, "step": 61524 }, { "epoch": 57.4, "learning_rate": 4.282555970149254e-05, "loss": 0.0001, "step": 61528 }, { "epoch": 57.4, "learning_rate": 4.282509328358209e-05, "loss": 0.0, "step": 61532 }, { "epoch": 57.4, "learning_rate": 4.2824626865671644e-05, "loss": 0.0, "step": 61536 }, { "epoch": 57.41, "learning_rate": 4.28241604477612e-05, "loss": 0.0, "step": 61540 }, { "epoch": 57.41, "learning_rate": 4.282369402985075e-05, "loss": 0.0, "step": 61544 }, { "epoch": 57.41, "learning_rate": 4.28232276119403e-05, "loss": 0.0, "step": 61548 }, { "epoch": 57.42, "learning_rate": 4.282276119402985e-05, "loss": 0.0, "step": 61552 }, { "epoch": 57.42, "learning_rate": 4.2822294776119405e-05, "loss": 0.0, "step": 61556 }, { "epoch": 57.43, "learning_rate": 4.282182835820896e-05, "loss": 0.0, "step": 61560 }, { "epoch": 57.43, "learning_rate": 4.282136194029851e-05, "loss": 0.0, "step": 61564 }, { "epoch": 57.43, "learning_rate": 4.282089552238806e-05, "loss": 0.0, "step": 61568 }, { "epoch": 57.44, "learning_rate": 4.282042910447762e-05, "loss": 0.0, "step": 61572 }, { "epoch": 57.44, "learning_rate": 4.2819962686567166e-05, "loss": 0.0, "step": 61576 }, { "epoch": 57.44, "learning_rate": 4.2819496268656714e-05, "loss": 0.0002, "step": 61580 }, { "epoch": 57.45, "learning_rate": 4.2819029850746276e-05, "loss": 0.0007, "step": 61584 }, { "epoch": 57.45, "learning_rate": 4.2818563432835824e-05, "loss": 0.0, "step": 61588 }, { "epoch": 57.46, "learning_rate": 4.281809701492537e-05, "loss": 0.0, "step": 61592 }, { "epoch": 57.46, "learning_rate": 4.281763059701493e-05, "loss": 0.0, "step": 61596 }, { "epoch": 57.46, "learning_rate": 4.281716417910448e-05, "loss": 0.0, "step": 61600 }, { "epoch": 57.47, "learning_rate": 4.281669776119403e-05, "loss": 0.0001, "step": 61604 }, { "epoch": 57.47, "learning_rate": 4.2816231343283585e-05, "loss": 0.0, "step": 61608 }, { "epoch": 57.47, "learning_rate": 4.281576492537313e-05, "loss": 0.0, "step": 61612 }, { "epoch": 57.48, "learning_rate": 4.281529850746269e-05, "loss": 0.0, "step": 61616 }, { "epoch": 57.48, "learning_rate": 4.281483208955224e-05, "loss": 0.0, "step": 61620 }, { "epoch": 57.49, "learning_rate": 4.281436567164179e-05, "loss": 0.0003, "step": 61624 }, { "epoch": 57.49, "learning_rate": 4.2813899253731346e-05, "loss": 0.0, "step": 61628 }, { "epoch": 57.49, "learning_rate": 4.28134328358209e-05, "loss": 0.0, "step": 61632 }, { "epoch": 57.5, "learning_rate": 4.281296641791045e-05, "loss": 0.0, "step": 61636 }, { "epoch": 57.5, "learning_rate": 4.28125e-05, "loss": 0.0, "step": 61640 }, { "epoch": 57.5, "learning_rate": 4.281203358208956e-05, "loss": 0.0, "step": 61644 }, { "epoch": 57.51, "learning_rate": 4.281156716417911e-05, "loss": 0.0035, "step": 61648 }, { "epoch": 57.51, "learning_rate": 4.2811100746268655e-05, "loss": 0.0, "step": 61652 }, { "epoch": 57.51, "learning_rate": 4.281063432835821e-05, "loss": 0.0002, "step": 61656 }, { "epoch": 57.52, "learning_rate": 4.2810167910447765e-05, "loss": 0.0, "step": 61660 }, { "epoch": 57.52, "learning_rate": 4.280970149253731e-05, "loss": 0.0, "step": 61664 }, { "epoch": 57.53, "learning_rate": 4.280923507462687e-05, "loss": 0.0, "step": 61668 }, { "epoch": 57.53, "learning_rate": 4.2808768656716416e-05, "loss": 0.0, "step": 61672 }, { "epoch": 57.53, "learning_rate": 4.280830223880597e-05, "loss": 0.0001, "step": 61676 }, { "epoch": 57.54, "learning_rate": 4.2807835820895526e-05, "loss": 0.0, "step": 61680 }, { "epoch": 57.54, "learning_rate": 4.2807369402985074e-05, "loss": 0.0, "step": 61684 }, { "epoch": 57.54, "learning_rate": 4.280690298507463e-05, "loss": 0.0, "step": 61688 }, { "epoch": 57.55, "learning_rate": 4.2806436567164184e-05, "loss": 0.0, "step": 61692 }, { "epoch": 57.55, "learning_rate": 4.280597014925373e-05, "loss": 0.0, "step": 61696 }, { "epoch": 57.56, "learning_rate": 4.280550373134329e-05, "loss": 0.0, "step": 61700 }, { "epoch": 57.56, "learning_rate": 4.280503731343284e-05, "loss": 0.0, "step": 61704 }, { "epoch": 57.56, "learning_rate": 4.280457089552239e-05, "loss": 0.0, "step": 61708 }, { "epoch": 57.57, "learning_rate": 4.2804104477611945e-05, "loss": 0.0, "step": 61712 }, { "epoch": 57.57, "learning_rate": 4.280363805970149e-05, "loss": 0.0, "step": 61716 }, { "epoch": 57.57, "learning_rate": 4.280317164179105e-05, "loss": 0.0, "step": 61720 }, { "epoch": 57.58, "learning_rate": 4.28027052238806e-05, "loss": 0.0001, "step": 61724 }, { "epoch": 57.58, "learning_rate": 4.280223880597015e-05, "loss": 0.0, "step": 61728 }, { "epoch": 57.59, "learning_rate": 4.28017723880597e-05, "loss": 0.0, "step": 61732 }, { "epoch": 57.59, "learning_rate": 4.280130597014926e-05, "loss": 0.0, "step": 61736 }, { "epoch": 57.59, "learning_rate": 4.280083955223881e-05, "loss": 0.0, "step": 61740 }, { "epoch": 57.6, "learning_rate": 4.280037313432836e-05, "loss": 0.0142, "step": 61744 }, { "epoch": 57.6, "learning_rate": 4.279990671641791e-05, "loss": 0.0, "step": 61748 }, { "epoch": 57.6, "learning_rate": 4.279944029850747e-05, "loss": 0.0003, "step": 61752 }, { "epoch": 57.61, "learning_rate": 4.2798973880597015e-05, "loss": 0.0, "step": 61756 }, { "epoch": 57.61, "learning_rate": 4.279850746268657e-05, "loss": 0.0003, "step": 61760 }, { "epoch": 57.62, "learning_rate": 4.2798041044776125e-05, "loss": 0.0, "step": 61764 }, { "epoch": 57.62, "learning_rate": 4.279757462686567e-05, "loss": 0.0, "step": 61768 }, { "epoch": 57.62, "learning_rate": 4.279710820895523e-05, "loss": 0.0, "step": 61772 }, { "epoch": 57.63, "learning_rate": 4.2796641791044776e-05, "loss": 0.0002, "step": 61776 }, { "epoch": 57.63, "learning_rate": 4.279617537313433e-05, "loss": 0.0009, "step": 61780 }, { "epoch": 57.63, "learning_rate": 4.2795708955223886e-05, "loss": 0.0, "step": 61784 }, { "epoch": 57.64, "learning_rate": 4.2795242537313434e-05, "loss": 0.0, "step": 61788 }, { "epoch": 57.64, "learning_rate": 4.279477611940298e-05, "loss": 0.0003, "step": 61792 }, { "epoch": 57.65, "learning_rate": 4.2794309701492544e-05, "loss": 0.0002, "step": 61796 }, { "epoch": 57.65, "learning_rate": 4.279384328358209e-05, "loss": 0.0, "step": 61800 }, { "epoch": 57.65, "learning_rate": 4.279337686567164e-05, "loss": 0.0, "step": 61804 }, { "epoch": 57.66, "learning_rate": 4.2792910447761195e-05, "loss": 0.0, "step": 61808 }, { "epoch": 57.66, "learning_rate": 4.279244402985075e-05, "loss": 0.0, "step": 61812 }, { "epoch": 57.66, "learning_rate": 4.27919776119403e-05, "loss": 0.0, "step": 61816 }, { "epoch": 57.67, "learning_rate": 4.279151119402985e-05, "loss": 0.0, "step": 61820 }, { "epoch": 57.67, "learning_rate": 4.279104477611941e-05, "loss": 0.0003, "step": 61824 }, { "epoch": 57.68, "learning_rate": 4.2790578358208956e-05, "loss": 0.0, "step": 61828 }, { "epoch": 57.68, "learning_rate": 4.279011194029851e-05, "loss": 0.0001, "step": 61832 }, { "epoch": 57.68, "learning_rate": 4.278964552238806e-05, "loss": 0.0, "step": 61836 }, { "epoch": 57.69, "learning_rate": 4.2789179104477614e-05, "loss": 0.0, "step": 61840 }, { "epoch": 57.69, "learning_rate": 4.278871268656717e-05, "loss": 0.0011, "step": 61844 }, { "epoch": 57.69, "learning_rate": 4.278824626865672e-05, "loss": 0.0, "step": 61848 }, { "epoch": 57.7, "learning_rate": 4.2787779850746265e-05, "loss": 0.0, "step": 61852 }, { "epoch": 57.7, "learning_rate": 4.2787313432835827e-05, "loss": 0.0005, "step": 61856 }, { "epoch": 57.71, "learning_rate": 4.2786847014925375e-05, "loss": 0.0, "step": 61860 }, { "epoch": 57.71, "learning_rate": 4.278638059701493e-05, "loss": 0.0001, "step": 61864 }, { "epoch": 57.71, "learning_rate": 4.278591417910448e-05, "loss": 0.0, "step": 61868 }, { "epoch": 57.72, "learning_rate": 4.278544776119403e-05, "loss": 0.0003, "step": 61872 }, { "epoch": 57.72, "learning_rate": 4.278498134328359e-05, "loss": 0.0014, "step": 61876 }, { "epoch": 57.72, "learning_rate": 4.2784514925373136e-05, "loss": 0.0, "step": 61880 }, { "epoch": 57.73, "learning_rate": 4.278404850746269e-05, "loss": 0.0, "step": 61884 }, { "epoch": 57.73, "learning_rate": 4.2783582089552246e-05, "loss": 0.0, "step": 61888 }, { "epoch": 57.73, "learning_rate": 4.2783115671641794e-05, "loss": 0.0, "step": 61892 }, { "epoch": 57.74, "learning_rate": 4.278264925373134e-05, "loss": 0.0, "step": 61896 }, { "epoch": 57.74, "learning_rate": 4.27821828358209e-05, "loss": 0.0, "step": 61900 }, { "epoch": 57.75, "learning_rate": 4.278171641791045e-05, "loss": 0.0, "step": 61904 }, { "epoch": 57.75, "learning_rate": 4.278125e-05, "loss": 0.0, "step": 61908 }, { "epoch": 57.75, "learning_rate": 4.2780783582089555e-05, "loss": 0.0, "step": 61912 }, { "epoch": 57.76, "learning_rate": 4.278031716417911e-05, "loss": 0.0, "step": 61916 }, { "epoch": 57.76, "learning_rate": 4.277985074626866e-05, "loss": 0.0, "step": 61920 }, { "epoch": 57.76, "learning_rate": 4.277938432835821e-05, "loss": 0.0, "step": 61924 }, { "epoch": 57.77, "learning_rate": 4.277891791044776e-05, "loss": 0.0024, "step": 61928 }, { "epoch": 57.77, "learning_rate": 4.2778451492537316e-05, "loss": 0.0, "step": 61932 }, { "epoch": 57.78, "learning_rate": 4.277798507462687e-05, "loss": 0.0019, "step": 61936 }, { "epoch": 57.78, "learning_rate": 4.277751865671642e-05, "loss": 0.0, "step": 61940 }, { "epoch": 57.78, "learning_rate": 4.2777052238805974e-05, "loss": 0.0001, "step": 61944 }, { "epoch": 57.79, "learning_rate": 4.277658582089553e-05, "loss": 0.0, "step": 61948 }, { "epoch": 57.79, "learning_rate": 4.2776119402985077e-05, "loss": 0.0, "step": 61952 }, { "epoch": 57.79, "learning_rate": 4.2775652985074625e-05, "loss": 0.0, "step": 61956 }, { "epoch": 57.8, "learning_rate": 4.277518656716418e-05, "loss": 0.0, "step": 61960 }, { "epoch": 57.8, "learning_rate": 4.2774720149253735e-05, "loss": 0.0, "step": 61964 }, { "epoch": 57.81, "learning_rate": 4.277425373134328e-05, "loss": 0.0, "step": 61968 }, { "epoch": 57.81, "learning_rate": 4.277378731343284e-05, "loss": 0.0, "step": 61972 }, { "epoch": 57.81, "learning_rate": 4.277332089552239e-05, "loss": 0.0, "step": 61976 }, { "epoch": 57.82, "learning_rate": 4.277285447761194e-05, "loss": 0.0, "step": 61980 }, { "epoch": 57.82, "learning_rate": 4.2772388059701496e-05, "loss": 0.0, "step": 61984 }, { "epoch": 57.82, "learning_rate": 4.2771921641791044e-05, "loss": 0.0, "step": 61988 }, { "epoch": 57.83, "learning_rate": 4.27714552238806e-05, "loss": 0.0, "step": 61992 }, { "epoch": 57.83, "learning_rate": 4.2770988805970153e-05, "loss": 0.0, "step": 61996 }, { "epoch": 57.84, "learning_rate": 4.27705223880597e-05, "loss": 0.0, "step": 62000 }, { "epoch": 57.84, "eval_exact_match": 0.7514506769825918, "eval_exec": 0.7872340425531915, "eval_loss": 0.4987480640411377, "eval_runtime": 1158.3298, "eval_samples_per_second": 0.893, "step": 62000 }, { "epoch": 57.84, "learning_rate": 4.2770055970149257e-05, "loss": 0.0, "step": 62004 }, { "epoch": 57.84, "learning_rate": 4.276958955223881e-05, "loss": 0.0, "step": 62008 }, { "epoch": 57.85, "learning_rate": 4.276912313432836e-05, "loss": 0.0, "step": 62012 }, { "epoch": 57.85, "learning_rate": 4.2768656716417914e-05, "loss": 0.0003, "step": 62016 }, { "epoch": 57.85, "learning_rate": 4.276819029850746e-05, "loss": 0.0, "step": 62020 }, { "epoch": 57.86, "learning_rate": 4.276772388059702e-05, "loss": 0.0035, "step": 62024 }, { "epoch": 57.86, "learning_rate": 4.276725746268657e-05, "loss": 0.0, "step": 62028 }, { "epoch": 57.87, "learning_rate": 4.276679104477612e-05, "loss": 0.0, "step": 62032 }, { "epoch": 57.87, "learning_rate": 4.2766324626865675e-05, "loss": 0.0, "step": 62036 }, { "epoch": 57.87, "learning_rate": 4.276585820895523e-05, "loss": 0.0, "step": 62040 }, { "epoch": 57.88, "learning_rate": 4.276539179104478e-05, "loss": 0.0, "step": 62044 }, { "epoch": 57.88, "learning_rate": 4.276492537313433e-05, "loss": 0.0, "step": 62048 }, { "epoch": 57.88, "learning_rate": 4.276445895522389e-05, "loss": 0.0, "step": 62052 }, { "epoch": 57.89, "learning_rate": 4.2763992537313436e-05, "loss": 0.0, "step": 62056 }, { "epoch": 57.89, "learning_rate": 4.2763526119402985e-05, "loss": 0.0, "step": 62060 }, { "epoch": 57.9, "learning_rate": 4.276305970149254e-05, "loss": 0.0, "step": 62064 }, { "epoch": 57.9, "learning_rate": 4.2762593283582094e-05, "loss": 0.0, "step": 62068 }, { "epoch": 57.9, "learning_rate": 4.276212686567164e-05, "loss": 0.0, "step": 62072 }, { "epoch": 57.91, "learning_rate": 4.27616604477612e-05, "loss": 0.0, "step": 62076 }, { "epoch": 57.91, "learning_rate": 4.2761194029850746e-05, "loss": 0.0, "step": 62080 }, { "epoch": 57.91, "learning_rate": 4.27607276119403e-05, "loss": 0.0001, "step": 62084 }, { "epoch": 57.92, "learning_rate": 4.2760261194029855e-05, "loss": 0.0001, "step": 62088 }, { "epoch": 57.92, "learning_rate": 4.2759794776119404e-05, "loss": 0.0, "step": 62092 }, { "epoch": 57.93, "learning_rate": 4.275932835820896e-05, "loss": 0.0, "step": 62096 }, { "epoch": 57.93, "learning_rate": 4.275886194029851e-05, "loss": 0.0, "step": 62100 }, { "epoch": 57.93, "learning_rate": 4.275839552238806e-05, "loss": 0.0027, "step": 62104 }, { "epoch": 57.94, "learning_rate": 4.275792910447761e-05, "loss": 0.0, "step": 62108 }, { "epoch": 57.94, "learning_rate": 4.275746268656717e-05, "loss": 0.0, "step": 62112 }, { "epoch": 57.94, "learning_rate": 4.275699626865672e-05, "loss": 0.0001, "step": 62116 }, { "epoch": 57.95, "learning_rate": 4.275652985074627e-05, "loss": 0.0, "step": 62120 }, { "epoch": 57.95, "learning_rate": 4.275606343283582e-05, "loss": 0.0, "step": 62124 }, { "epoch": 57.96, "learning_rate": 4.275559701492538e-05, "loss": 0.0, "step": 62128 }, { "epoch": 57.96, "learning_rate": 4.2755130597014925e-05, "loss": 0.0001, "step": 62132 }, { "epoch": 57.96, "learning_rate": 4.275466417910448e-05, "loss": 0.0001, "step": 62136 }, { "epoch": 57.97, "learning_rate": 4.275419776119403e-05, "loss": 0.0, "step": 62140 }, { "epoch": 57.97, "learning_rate": 4.2753731343283583e-05, "loss": 0.0, "step": 62144 }, { "epoch": 57.97, "learning_rate": 4.275326492537314e-05, "loss": 0.0014, "step": 62148 }, { "epoch": 57.98, "learning_rate": 4.2752798507462686e-05, "loss": 0.0, "step": 62152 }, { "epoch": 57.98, "learning_rate": 4.275233208955224e-05, "loss": 0.0, "step": 62156 }, { "epoch": 57.98, "learning_rate": 4.2751865671641796e-05, "loss": 0.0001, "step": 62160 }, { "epoch": 57.99, "learning_rate": 4.2751399253731344e-05, "loss": 0.0025, "step": 62164 }, { "epoch": 57.99, "learning_rate": 4.275093283582089e-05, "loss": 0.0, "step": 62168 }, { "epoch": 58.0, "learning_rate": 4.275046641791045e-05, "loss": 0.003, "step": 62172 }, { "epoch": 58.0, "learning_rate": 4.275e-05, "loss": 0.0001, "step": 62176 }, { "epoch": 58.0, "learning_rate": 4.274953358208956e-05, "loss": 0.0, "step": 62180 }, { "epoch": 58.01, "learning_rate": 4.2749067164179105e-05, "loss": 0.0007, "step": 62184 }, { "epoch": 58.01, "learning_rate": 4.274860074626866e-05, "loss": 0.0, "step": 62188 }, { "epoch": 58.01, "learning_rate": 4.2748134328358215e-05, "loss": 0.0001, "step": 62192 }, { "epoch": 58.02, "learning_rate": 4.274766791044776e-05, "loss": 0.0, "step": 62196 }, { "epoch": 58.02, "learning_rate": 4.274720149253731e-05, "loss": 0.0002, "step": 62200 }, { "epoch": 58.03, "learning_rate": 4.274673507462687e-05, "loss": 0.0, "step": 62204 }, { "epoch": 58.03, "learning_rate": 4.274626865671642e-05, "loss": 0.0, "step": 62208 }, { "epoch": 58.03, "learning_rate": 4.274580223880597e-05, "loss": 0.0, "step": 62212 }, { "epoch": 58.04, "learning_rate": 4.2745335820895524e-05, "loss": 0.0, "step": 62216 }, { "epoch": 58.04, "learning_rate": 4.274486940298508e-05, "loss": 0.0, "step": 62220 }, { "epoch": 58.04, "learning_rate": 4.274440298507463e-05, "loss": 0.0, "step": 62224 }, { "epoch": 58.05, "learning_rate": 4.274393656716418e-05, "loss": 0.0, "step": 62228 }, { "epoch": 58.05, "learning_rate": 4.274347014925373e-05, "loss": 0.0, "step": 62232 }, { "epoch": 58.06, "learning_rate": 4.2743003731343285e-05, "loss": 0.0, "step": 62236 }, { "epoch": 58.06, "learning_rate": 4.274253731343284e-05, "loss": 0.0, "step": 62240 }, { "epoch": 58.06, "learning_rate": 4.274207089552239e-05, "loss": 0.0, "step": 62244 }, { "epoch": 58.07, "learning_rate": 4.274160447761194e-05, "loss": 0.0, "step": 62248 }, { "epoch": 58.07, "learning_rate": 4.27411380597015e-05, "loss": 0.0, "step": 62252 }, { "epoch": 58.07, "learning_rate": 4.2740671641791046e-05, "loss": 0.0, "step": 62256 }, { "epoch": 58.08, "learning_rate": 4.2740205223880594e-05, "loss": 0.0, "step": 62260 }, { "epoch": 58.08, "learning_rate": 4.2739738805970156e-05, "loss": 0.0066, "step": 62264 }, { "epoch": 58.09, "learning_rate": 4.2739272388059704e-05, "loss": 0.0, "step": 62268 }, { "epoch": 58.09, "learning_rate": 4.273880597014925e-05, "loss": 0.0, "step": 62272 }, { "epoch": 58.09, "learning_rate": 4.273833955223881e-05, "loss": 0.0009, "step": 62276 }, { "epoch": 58.1, "learning_rate": 4.273787313432836e-05, "loss": 0.0, "step": 62280 }, { "epoch": 58.1, "learning_rate": 4.273740671641791e-05, "loss": 0.0, "step": 62284 }, { "epoch": 58.1, "learning_rate": 4.2736940298507465e-05, "loss": 0.0015, "step": 62288 }, { "epoch": 58.11, "learning_rate": 4.273647388059701e-05, "loss": 0.0, "step": 62292 }, { "epoch": 58.11, "learning_rate": 4.273600746268657e-05, "loss": 0.0005, "step": 62296 }, { "epoch": 58.12, "learning_rate": 4.273554104477612e-05, "loss": 0.002, "step": 62300 }, { "epoch": 58.12, "learning_rate": 4.273507462686567e-05, "loss": 0.0, "step": 62304 }, { "epoch": 58.12, "learning_rate": 4.2734608208955226e-05, "loss": 0.0006, "step": 62308 }, { "epoch": 58.13, "learning_rate": 4.273414179104478e-05, "loss": 0.0007, "step": 62312 }, { "epoch": 58.13, "learning_rate": 4.273367537313433e-05, "loss": 0.0, "step": 62316 }, { "epoch": 58.13, "learning_rate": 4.273320895522388e-05, "loss": 0.0, "step": 62320 }, { "epoch": 58.14, "learning_rate": 4.273274253731344e-05, "loss": 0.0, "step": 62324 }, { "epoch": 58.14, "learning_rate": 4.273227611940299e-05, "loss": 0.0, "step": 62328 }, { "epoch": 58.15, "learning_rate": 4.2731809701492535e-05, "loss": 0.0, "step": 62332 }, { "epoch": 58.15, "learning_rate": 4.273134328358209e-05, "loss": 0.0, "step": 62336 }, { "epoch": 58.15, "learning_rate": 4.2730876865671645e-05, "loss": 0.0009, "step": 62340 }, { "epoch": 58.16, "learning_rate": 4.27304104477612e-05, "loss": 0.0021, "step": 62344 }, { "epoch": 58.16, "learning_rate": 4.272994402985075e-05, "loss": 0.0, "step": 62348 }, { "epoch": 58.16, "learning_rate": 4.2729477611940296e-05, "loss": 0.0, "step": 62352 }, { "epoch": 58.17, "learning_rate": 4.272901119402986e-05, "loss": 0.0, "step": 62356 }, { "epoch": 58.17, "learning_rate": 4.2728544776119406e-05, "loss": 0.0006, "step": 62360 }, { "epoch": 58.18, "learning_rate": 4.2728078358208954e-05, "loss": 0.0, "step": 62364 }, { "epoch": 58.18, "learning_rate": 4.272761194029851e-05, "loss": 0.0, "step": 62368 }, { "epoch": 58.18, "learning_rate": 4.2727145522388064e-05, "loss": 0.0, "step": 62372 }, { "epoch": 58.19, "learning_rate": 4.272667910447761e-05, "loss": 0.0, "step": 62376 }, { "epoch": 58.19, "learning_rate": 4.272621268656717e-05, "loss": 0.0109, "step": 62380 }, { "epoch": 58.19, "learning_rate": 4.272574626865672e-05, "loss": 0.0007, "step": 62384 }, { "epoch": 58.2, "learning_rate": 4.272527985074627e-05, "loss": 0.0, "step": 62388 }, { "epoch": 58.2, "learning_rate": 4.2724813432835825e-05, "loss": 0.0, "step": 62392 }, { "epoch": 58.21, "learning_rate": 4.272434701492537e-05, "loss": 0.0, "step": 62396 }, { "epoch": 58.21, "learning_rate": 4.272388059701493e-05, "loss": 0.0001, "step": 62400 }, { "epoch": 58.21, "learning_rate": 4.272341417910448e-05, "loss": 0.0, "step": 62404 }, { "epoch": 58.22, "learning_rate": 4.272294776119403e-05, "loss": 0.002, "step": 62408 }, { "epoch": 58.22, "learning_rate": 4.272248134328358e-05, "loss": 0.0001, "step": 62412 }, { "epoch": 58.22, "learning_rate": 4.272201492537314e-05, "loss": 0.0003, "step": 62416 }, { "epoch": 58.23, "learning_rate": 4.272154850746269e-05, "loss": 0.0, "step": 62420 }, { "epoch": 58.23, "learning_rate": 4.272108208955224e-05, "loss": 0.0, "step": 62424 }, { "epoch": 58.24, "learning_rate": 4.272061567164179e-05, "loss": 0.0, "step": 62428 }, { "epoch": 58.24, "learning_rate": 4.272014925373135e-05, "loss": 0.0, "step": 62432 }, { "epoch": 58.24, "learning_rate": 4.2719682835820895e-05, "loss": 0.0, "step": 62436 }, { "epoch": 58.25, "learning_rate": 4.271921641791045e-05, "loss": 0.0, "step": 62440 }, { "epoch": 58.25, "learning_rate": 4.2718750000000005e-05, "loss": 0.0, "step": 62444 }, { "epoch": 58.25, "learning_rate": 4.271828358208955e-05, "loss": 0.0, "step": 62448 }, { "epoch": 58.26, "learning_rate": 4.271781716417911e-05, "loss": 0.0002, "step": 62452 }, { "epoch": 58.26, "learning_rate": 4.2717350746268656e-05, "loss": 0.0002, "step": 62456 }, { "epoch": 58.26, "learning_rate": 4.271688432835821e-05, "loss": 0.0, "step": 62460 }, { "epoch": 58.27, "learning_rate": 4.2716417910447766e-05, "loss": 0.0002, "step": 62464 }, { "epoch": 58.27, "learning_rate": 4.2715951492537314e-05, "loss": 0.0, "step": 62468 }, { "epoch": 58.28, "learning_rate": 4.271548507462686e-05, "loss": 0.0, "step": 62472 }, { "epoch": 58.28, "learning_rate": 4.2715018656716424e-05, "loss": 0.0, "step": 62476 }, { "epoch": 58.28, "learning_rate": 4.271455223880597e-05, "loss": 0.0, "step": 62480 }, { "epoch": 58.29, "learning_rate": 4.271408582089552e-05, "loss": 0.0, "step": 62484 }, { "epoch": 58.29, "learning_rate": 4.2713619402985075e-05, "loss": 0.0, "step": 62488 }, { "epoch": 58.29, "learning_rate": 4.271315298507463e-05, "loss": 0.0026, "step": 62492 }, { "epoch": 58.3, "learning_rate": 4.271268656716418e-05, "loss": 0.0002, "step": 62496 }, { "epoch": 58.3, "learning_rate": 4.271222014925373e-05, "loss": 0.0005, "step": 62500 }, { "epoch": 58.3, "eval_exact_match": 0.7446808510638298, "eval_exec": 0.7823984526112185, "eval_loss": 0.46804332733154297, "eval_runtime": 1167.1187, "eval_samples_per_second": 0.886, "step": 62500 }, { "epoch": 58.31, "learning_rate": 4.271175373134329e-05, "loss": 0.0018, "step": 62504 }, { "epoch": 58.31, "learning_rate": 4.271128731343284e-05, "loss": 0.0, "step": 62508 }, { "epoch": 58.31, "learning_rate": 4.271082089552239e-05, "loss": 0.0, "step": 62512 }, { "epoch": 58.32, "learning_rate": 4.271035447761194e-05, "loss": 0.0, "step": 62516 }, { "epoch": 58.32, "learning_rate": 4.2709888059701494e-05, "loss": 0.0, "step": 62520 }, { "epoch": 58.32, "learning_rate": 4.270942164179105e-05, "loss": 0.0, "step": 62524 }, { "epoch": 58.33, "learning_rate": 4.27089552238806e-05, "loss": 0.0027, "step": 62528 }, { "epoch": 58.33, "learning_rate": 4.270848880597015e-05, "loss": 0.0, "step": 62532 }, { "epoch": 58.34, "learning_rate": 4.270802238805971e-05, "loss": 0.0003, "step": 62536 }, { "epoch": 58.34, "learning_rate": 4.2707555970149255e-05, "loss": 0.0, "step": 62540 }, { "epoch": 58.34, "learning_rate": 4.270708955223881e-05, "loss": 0.0, "step": 62544 }, { "epoch": 58.35, "learning_rate": 4.270662313432836e-05, "loss": 0.0, "step": 62548 }, { "epoch": 58.35, "learning_rate": 4.270615671641791e-05, "loss": 0.0, "step": 62552 }, { "epoch": 58.35, "learning_rate": 4.270569029850747e-05, "loss": 0.0003, "step": 62556 }, { "epoch": 58.36, "learning_rate": 4.2705223880597016e-05, "loss": 0.0, "step": 62560 }, { "epoch": 58.36, "learning_rate": 4.270475746268657e-05, "loss": 0.0003, "step": 62564 }, { "epoch": 58.37, "learning_rate": 4.2704291044776126e-05, "loss": 0.0, "step": 62568 }, { "epoch": 58.37, "learning_rate": 4.2703824626865674e-05, "loss": 0.0, "step": 62572 }, { "epoch": 58.37, "learning_rate": 4.270335820895522e-05, "loss": 0.0, "step": 62576 }, { "epoch": 58.38, "learning_rate": 4.270289179104478e-05, "loss": 0.0002, "step": 62580 }, { "epoch": 58.38, "learning_rate": 4.270242537313433e-05, "loss": 0.0, "step": 62584 }, { "epoch": 58.38, "learning_rate": 4.270195895522388e-05, "loss": 0.0, "step": 62588 }, { "epoch": 58.39, "learning_rate": 4.2701492537313435e-05, "loss": 0.0012, "step": 62592 }, { "epoch": 58.39, "learning_rate": 4.270102611940299e-05, "loss": 0.0004, "step": 62596 }, { "epoch": 58.4, "learning_rate": 4.270055970149254e-05, "loss": 0.0, "step": 62600 }, { "epoch": 58.4, "learning_rate": 4.270009328358209e-05, "loss": 0.0, "step": 62604 }, { "epoch": 58.4, "learning_rate": 4.269962686567164e-05, "loss": 0.0007, "step": 62608 }, { "epoch": 58.41, "learning_rate": 4.2699160447761196e-05, "loss": 0.0, "step": 62612 }, { "epoch": 58.41, "learning_rate": 4.269869402985075e-05, "loss": 0.0, "step": 62616 }, { "epoch": 58.41, "learning_rate": 4.26982276119403e-05, "loss": 0.0, "step": 62620 }, { "epoch": 58.42, "learning_rate": 4.2697761194029854e-05, "loss": 0.0, "step": 62624 }, { "epoch": 58.42, "learning_rate": 4.269729477611941e-05, "loss": 0.0, "step": 62628 }, { "epoch": 58.43, "learning_rate": 4.269682835820896e-05, "loss": 0.0059, "step": 62632 }, { "epoch": 58.43, "learning_rate": 4.2696361940298505e-05, "loss": 0.0003, "step": 62636 }, { "epoch": 58.43, "learning_rate": 4.269589552238806e-05, "loss": 0.0, "step": 62640 }, { "epoch": 58.44, "learning_rate": 4.2695429104477615e-05, "loss": 0.0, "step": 62644 }, { "epoch": 58.44, "learning_rate": 4.269496268656716e-05, "loss": 0.0, "step": 62648 }, { "epoch": 58.44, "learning_rate": 4.269449626865672e-05, "loss": 0.0009, "step": 62652 }, { "epoch": 58.45, "learning_rate": 4.269402985074627e-05, "loss": 0.0, "step": 62656 }, { "epoch": 58.45, "learning_rate": 4.269356343283582e-05, "loss": 0.0, "step": 62660 }, { "epoch": 58.46, "learning_rate": 4.2693097014925376e-05, "loss": 0.0, "step": 62664 }, { "epoch": 58.46, "learning_rate": 4.2692630597014924e-05, "loss": 0.0001, "step": 62668 }, { "epoch": 58.46, "learning_rate": 4.2692164179104486e-05, "loss": 0.0001, "step": 62672 }, { "epoch": 58.47, "learning_rate": 4.2691697761194034e-05, "loss": 0.0, "step": 62676 }, { "epoch": 58.47, "learning_rate": 4.269123134328358e-05, "loss": 0.0002, "step": 62680 }, { "epoch": 58.47, "learning_rate": 4.269076492537314e-05, "loss": 0.0, "step": 62684 }, { "epoch": 58.48, "learning_rate": 4.269029850746269e-05, "loss": 0.0002, "step": 62688 }, { "epoch": 58.48, "learning_rate": 4.268983208955224e-05, "loss": 0.0, "step": 62692 }, { "epoch": 58.49, "learning_rate": 4.2689365671641795e-05, "loss": 0.0001, "step": 62696 }, { "epoch": 58.49, "learning_rate": 4.268889925373134e-05, "loss": 0.0001, "step": 62700 }, { "epoch": 58.49, "learning_rate": 4.26884328358209e-05, "loss": 0.0, "step": 62704 }, { "epoch": 58.5, "learning_rate": 4.268796641791045e-05, "loss": 0.0, "step": 62708 }, { "epoch": 58.5, "learning_rate": 4.26875e-05, "loss": 0.0029, "step": 62712 }, { "epoch": 58.5, "learning_rate": 4.2687033582089556e-05, "loss": 0.0001, "step": 62716 }, { "epoch": 58.51, "learning_rate": 4.268656716417911e-05, "loss": 0.0004, "step": 62720 }, { "epoch": 58.51, "learning_rate": 4.268610074626866e-05, "loss": 0.0001, "step": 62724 }, { "epoch": 58.51, "learning_rate": 4.268563432835821e-05, "loss": 0.0, "step": 62728 }, { "epoch": 58.52, "learning_rate": 4.268516791044777e-05, "loss": 0.0, "step": 62732 }, { "epoch": 58.52, "learning_rate": 4.268470149253732e-05, "loss": 0.0, "step": 62736 }, { "epoch": 58.53, "learning_rate": 4.2684235074626865e-05, "loss": 0.0002, "step": 62740 }, { "epoch": 58.53, "learning_rate": 4.268376865671642e-05, "loss": 0.0, "step": 62744 }, { "epoch": 58.53, "learning_rate": 4.2683302238805975e-05, "loss": 0.002, "step": 62748 }, { "epoch": 58.54, "learning_rate": 4.268283582089552e-05, "loss": 0.0001, "step": 62752 }, { "epoch": 58.54, "learning_rate": 4.268236940298508e-05, "loss": 0.0, "step": 62756 }, { "epoch": 58.54, "learning_rate": 4.2681902985074626e-05, "loss": 0.0139, "step": 62760 }, { "epoch": 58.55, "learning_rate": 4.268143656716418e-05, "loss": 0.0, "step": 62764 }, { "epoch": 58.55, "learning_rate": 4.2680970149253736e-05, "loss": 0.0, "step": 62768 }, { "epoch": 58.56, "learning_rate": 4.2680503731343284e-05, "loss": 0.0, "step": 62772 }, { "epoch": 58.56, "learning_rate": 4.268003731343284e-05, "loss": 0.0001, "step": 62776 }, { "epoch": 58.56, "learning_rate": 4.2679570895522394e-05, "loss": 0.0, "step": 62780 }, { "epoch": 58.57, "learning_rate": 4.267910447761194e-05, "loss": 0.0, "step": 62784 }, { "epoch": 58.57, "learning_rate": 4.267863805970149e-05, "loss": 0.0, "step": 62788 }, { "epoch": 58.57, "learning_rate": 4.267817164179105e-05, "loss": 0.0, "step": 62792 }, { "epoch": 58.58, "learning_rate": 4.26777052238806e-05, "loss": 0.0, "step": 62796 }, { "epoch": 58.58, "learning_rate": 4.267723880597015e-05, "loss": 0.0002, "step": 62800 }, { "epoch": 58.59, "learning_rate": 4.26767723880597e-05, "loss": 0.0, "step": 62804 }, { "epoch": 58.59, "learning_rate": 4.267630597014926e-05, "loss": 0.0, "step": 62808 }, { "epoch": 58.59, "learning_rate": 4.2675839552238806e-05, "loss": 0.0, "step": 62812 }, { "epoch": 58.6, "learning_rate": 4.267537313432836e-05, "loss": 0.0002, "step": 62816 }, { "epoch": 58.6, "learning_rate": 4.267490671641791e-05, "loss": 0.0, "step": 62820 }, { "epoch": 58.6, "learning_rate": 4.2674440298507464e-05, "loss": 0.0, "step": 62824 }, { "epoch": 58.61, "learning_rate": 4.267397388059702e-05, "loss": 0.0001, "step": 62828 }, { "epoch": 58.61, "learning_rate": 4.267350746268657e-05, "loss": 0.0, "step": 62832 }, { "epoch": 58.62, "learning_rate": 4.267304104477612e-05, "loss": 0.0, "step": 62836 }, { "epoch": 58.62, "learning_rate": 4.2672574626865677e-05, "loss": 0.0, "step": 62840 }, { "epoch": 58.62, "learning_rate": 4.2672108208955225e-05, "loss": 0.0, "step": 62844 }, { "epoch": 58.63, "learning_rate": 4.267164179104478e-05, "loss": 0.0001, "step": 62848 }, { "epoch": 58.63, "learning_rate": 4.267117537313433e-05, "loss": 0.0003, "step": 62852 }, { "epoch": 58.63, "learning_rate": 4.267070895522388e-05, "loss": 0.0, "step": 62856 }, { "epoch": 58.64, "learning_rate": 4.267024253731344e-05, "loss": 0.0, "step": 62860 }, { "epoch": 58.64, "learning_rate": 4.2669776119402986e-05, "loss": 0.0, "step": 62864 }, { "epoch": 58.65, "learning_rate": 4.266930970149254e-05, "loss": 0.0, "step": 62868 }, { "epoch": 58.65, "learning_rate": 4.2668843283582095e-05, "loss": 0.0004, "step": 62872 }, { "epoch": 58.65, "learning_rate": 4.2668376865671644e-05, "loss": 0.0, "step": 62876 }, { "epoch": 58.66, "learning_rate": 4.266791044776119e-05, "loss": 0.0, "step": 62880 }, { "epoch": 58.66, "learning_rate": 4.2667444029850753e-05, "loss": 0.0001, "step": 62884 }, { "epoch": 58.66, "learning_rate": 4.26669776119403e-05, "loss": 0.0, "step": 62888 }, { "epoch": 58.67, "learning_rate": 4.266651119402985e-05, "loss": 0.0, "step": 62892 }, { "epoch": 58.67, "learning_rate": 4.2666044776119405e-05, "loss": 0.0, "step": 62896 }, { "epoch": 58.68, "learning_rate": 4.266557835820896e-05, "loss": 0.0002, "step": 62900 }, { "epoch": 58.68, "learning_rate": 4.266511194029851e-05, "loss": 0.0, "step": 62904 }, { "epoch": 58.68, "learning_rate": 4.266464552238806e-05, "loss": 0.0009, "step": 62908 }, { "epoch": 58.69, "learning_rate": 4.266417910447761e-05, "loss": 0.0, "step": 62912 }, { "epoch": 58.69, "learning_rate": 4.2663712686567166e-05, "loss": 0.0, "step": 62916 }, { "epoch": 58.69, "learning_rate": 4.266324626865672e-05, "loss": 0.0, "step": 62920 }, { "epoch": 58.7, "learning_rate": 4.266277985074627e-05, "loss": 0.0, "step": 62924 }, { "epoch": 58.7, "learning_rate": 4.2662313432835824e-05, "loss": 0.0006, "step": 62928 }, { "epoch": 58.71, "learning_rate": 4.266184701492538e-05, "loss": 0.0001, "step": 62932 }, { "epoch": 58.71, "learning_rate": 4.2661380597014927e-05, "loss": 0.0, "step": 62936 }, { "epoch": 58.71, "learning_rate": 4.2660914179104475e-05, "loss": 0.0, "step": 62940 }, { "epoch": 58.72, "learning_rate": 4.2660447761194036e-05, "loss": 0.0, "step": 62944 }, { "epoch": 58.72, "learning_rate": 4.2659981343283585e-05, "loss": 0.0003, "step": 62948 }, { "epoch": 58.72, "learning_rate": 4.265951492537313e-05, "loss": 0.0003, "step": 62952 }, { "epoch": 58.73, "learning_rate": 4.265904850746269e-05, "loss": 0.0, "step": 62956 }, { "epoch": 58.73, "learning_rate": 4.265858208955224e-05, "loss": 0.0031, "step": 62960 }, { "epoch": 58.73, "learning_rate": 4.265811567164179e-05, "loss": 0.0, "step": 62964 }, { "epoch": 58.74, "learning_rate": 4.2657649253731346e-05, "loss": 0.0, "step": 62968 }, { "epoch": 58.74, "learning_rate": 4.2657182835820894e-05, "loss": 0.0002, "step": 62972 }, { "epoch": 58.75, "learning_rate": 4.265671641791045e-05, "loss": 0.0, "step": 62976 }, { "epoch": 58.75, "learning_rate": 4.2656250000000003e-05, "loss": 0.0001, "step": 62980 }, { "epoch": 58.75, "learning_rate": 4.265578358208955e-05, "loss": 0.0, "step": 62984 }, { "epoch": 58.76, "learning_rate": 4.2655317164179107e-05, "loss": 0.0004, "step": 62988 }, { "epoch": 58.76, "learning_rate": 4.265485074626866e-05, "loss": 0.0001, "step": 62992 }, { "epoch": 58.76, "learning_rate": 4.265438432835821e-05, "loss": 0.0, "step": 62996 }, { "epoch": 58.77, "learning_rate": 4.2653917910447764e-05, "loss": 0.0, "step": 63000 }, { "epoch": 58.77, "eval_exact_match": 0.741779497098646, "eval_exec": 0.7872340425531915, "eval_loss": 0.4804081916809082, "eval_runtime": 1136.1578, "eval_samples_per_second": 0.91, "step": 63000 }, { "epoch": 58.77, "learning_rate": 4.265345149253732e-05, "loss": 0.0, "step": 63004 }, { "epoch": 58.78, "learning_rate": 4.265298507462687e-05, "loss": 0.0, "step": 63008 }, { "epoch": 58.78, "learning_rate": 4.265251865671642e-05, "loss": 0.0003, "step": 63012 }, { "epoch": 58.78, "learning_rate": 4.265205223880597e-05, "loss": 0.0001, "step": 63016 }, { "epoch": 58.79, "learning_rate": 4.2651585820895525e-05, "loss": 0.0001, "step": 63020 }, { "epoch": 58.79, "learning_rate": 4.265111940298508e-05, "loss": 0.0, "step": 63024 }, { "epoch": 58.79, "learning_rate": 4.265065298507463e-05, "loss": 0.0, "step": 63028 }, { "epoch": 58.8, "learning_rate": 4.2650186567164177e-05, "loss": 0.0, "step": 63032 }, { "epoch": 58.8, "learning_rate": 4.264972014925374e-05, "loss": 0.0075, "step": 63036 }, { "epoch": 58.81, "learning_rate": 4.2649253731343286e-05, "loss": 0.0001, "step": 63040 }, { "epoch": 58.81, "learning_rate": 4.2648787313432835e-05, "loss": 0.0001, "step": 63044 }, { "epoch": 58.81, "learning_rate": 4.264832089552239e-05, "loss": 0.0, "step": 63048 }, { "epoch": 58.82, "learning_rate": 4.2647854477611944e-05, "loss": 0.0, "step": 63052 }, { "epoch": 58.82, "learning_rate": 4.264738805970149e-05, "loss": 0.0002, "step": 63056 }, { "epoch": 58.82, "learning_rate": 4.264692164179105e-05, "loss": 0.0, "step": 63060 }, { "epoch": 58.83, "learning_rate": 4.26464552238806e-05, "loss": 0.0, "step": 63064 }, { "epoch": 58.83, "learning_rate": 4.264598880597015e-05, "loss": 0.0001, "step": 63068 }, { "epoch": 58.84, "learning_rate": 4.2645522388059705e-05, "loss": 0.0001, "step": 63072 }, { "epoch": 58.84, "learning_rate": 4.2645055970149253e-05, "loss": 0.0, "step": 63076 }, { "epoch": 58.84, "learning_rate": 4.264458955223881e-05, "loss": 0.0, "step": 63080 }, { "epoch": 58.85, "learning_rate": 4.264412313432836e-05, "loss": 0.0, "step": 63084 }, { "epoch": 58.85, "learning_rate": 4.264365671641791e-05, "loss": 0.0, "step": 63088 }, { "epoch": 58.85, "learning_rate": 4.264319029850746e-05, "loss": 0.0, "step": 63092 }, { "epoch": 58.86, "learning_rate": 4.264272388059702e-05, "loss": 0.0, "step": 63096 }, { "epoch": 58.86, "learning_rate": 4.264225746268657e-05, "loss": 0.0, "step": 63100 }, { "epoch": 58.87, "learning_rate": 4.264179104477612e-05, "loss": 0.0, "step": 63104 }, { "epoch": 58.87, "learning_rate": 4.264132462686567e-05, "loss": 0.0019, "step": 63108 }, { "epoch": 58.87, "learning_rate": 4.264085820895523e-05, "loss": 0.0, "step": 63112 }, { "epoch": 58.88, "learning_rate": 4.2640391791044775e-05, "loss": 0.0, "step": 63116 }, { "epoch": 58.88, "learning_rate": 4.263992537313433e-05, "loss": 0.0, "step": 63120 }, { "epoch": 58.88, "learning_rate": 4.2639458955223885e-05, "loss": 0.0, "step": 63124 }, { "epoch": 58.89, "learning_rate": 4.2638992537313433e-05, "loss": 0.0001, "step": 63128 }, { "epoch": 58.89, "learning_rate": 4.263852611940299e-05, "loss": 0.0, "step": 63132 }, { "epoch": 58.9, "learning_rate": 4.2638059701492536e-05, "loss": 0.0001, "step": 63136 }, { "epoch": 58.9, "learning_rate": 4.263759328358209e-05, "loss": 0.0012, "step": 63140 }, { "epoch": 58.9, "learning_rate": 4.2637126865671646e-05, "loss": 0.0004, "step": 63144 }, { "epoch": 58.91, "learning_rate": 4.2636660447761194e-05, "loss": 0.0002, "step": 63148 }, { "epoch": 58.91, "learning_rate": 4.263619402985074e-05, "loss": 0.0, "step": 63152 }, { "epoch": 58.91, "learning_rate": 4.2635727611940304e-05, "loss": 0.0, "step": 63156 }, { "epoch": 58.92, "learning_rate": 4.263526119402985e-05, "loss": 0.0, "step": 63160 }, { "epoch": 58.92, "learning_rate": 4.263479477611941e-05, "loss": 0.0, "step": 63164 }, { "epoch": 58.93, "learning_rate": 4.2634328358208955e-05, "loss": 0.0002, "step": 63168 }, { "epoch": 58.93, "learning_rate": 4.263386194029851e-05, "loss": 0.0004, "step": 63172 }, { "epoch": 58.93, "learning_rate": 4.2633395522388065e-05, "loss": 0.0, "step": 63176 }, { "epoch": 58.94, "learning_rate": 4.263292910447761e-05, "loss": 0.0, "step": 63180 }, { "epoch": 58.94, "learning_rate": 4.263246268656717e-05, "loss": 0.0, "step": 63184 }, { "epoch": 58.94, "learning_rate": 4.263199626865672e-05, "loss": 0.0, "step": 63188 }, { "epoch": 58.95, "learning_rate": 4.263152985074627e-05, "loss": 0.0, "step": 63192 }, { "epoch": 58.95, "learning_rate": 4.263106343283582e-05, "loss": 0.0, "step": 63196 }, { "epoch": 58.96, "learning_rate": 4.2630597014925374e-05, "loss": 0.0, "step": 63200 }, { "epoch": 58.96, "learning_rate": 4.263013059701493e-05, "loss": 0.0038, "step": 63204 }, { "epoch": 58.96, "learning_rate": 4.262966417910448e-05, "loss": 0.0, "step": 63208 }, { "epoch": 58.97, "learning_rate": 4.262919776119403e-05, "loss": 0.0001, "step": 63212 }, { "epoch": 58.97, "learning_rate": 4.262873134328359e-05, "loss": 0.0156, "step": 63216 }, { "epoch": 58.97, "learning_rate": 4.2628264925373135e-05, "loss": 0.0, "step": 63220 }, { "epoch": 58.98, "learning_rate": 4.262779850746269e-05, "loss": 0.0001, "step": 63224 }, { "epoch": 58.98, "learning_rate": 4.262733208955224e-05, "loss": 0.0003, "step": 63228 }, { "epoch": 58.98, "learning_rate": 4.262686567164179e-05, "loss": 0.0, "step": 63232 }, { "epoch": 58.99, "learning_rate": 4.262639925373135e-05, "loss": 0.0, "step": 63236 }, { "epoch": 58.99, "learning_rate": 4.2625932835820896e-05, "loss": 0.0, "step": 63240 }, { "epoch": 59.0, "learning_rate": 4.262546641791045e-05, "loss": 0.0, "step": 63244 }, { "epoch": 59.0, "learning_rate": 4.2625000000000006e-05, "loss": 0.0, "step": 63248 }, { "epoch": 59.0, "learning_rate": 4.2624533582089554e-05, "loss": 0.0, "step": 63252 }, { "epoch": 59.01, "learning_rate": 4.26240671641791e-05, "loss": 0.0, "step": 63256 }, { "epoch": 59.01, "learning_rate": 4.262360074626866e-05, "loss": 0.0001, "step": 63260 }, { "epoch": 59.01, "learning_rate": 4.262313432835821e-05, "loss": 0.0, "step": 63264 }, { "epoch": 59.02, "learning_rate": 4.262266791044776e-05, "loss": 0.0, "step": 63268 }, { "epoch": 59.02, "learning_rate": 4.2622201492537315e-05, "loss": 0.0, "step": 63272 }, { "epoch": 59.03, "learning_rate": 4.262173507462687e-05, "loss": 0.0018, "step": 63276 }, { "epoch": 59.03, "learning_rate": 4.262126865671642e-05, "loss": 0.0002, "step": 63280 }, { "epoch": 59.03, "learning_rate": 4.262080223880597e-05, "loss": 0.0, "step": 63284 }, { "epoch": 59.04, "learning_rate": 4.262033582089552e-05, "loss": 0.0, "step": 63288 }, { "epoch": 59.04, "learning_rate": 4.2619869402985076e-05, "loss": 0.0002, "step": 63292 }, { "epoch": 59.04, "learning_rate": 4.261940298507463e-05, "loss": 0.0002, "step": 63296 }, { "epoch": 59.05, "learning_rate": 4.261893656716418e-05, "loss": 0.0, "step": 63300 }, { "epoch": 59.05, "learning_rate": 4.2618470149253734e-05, "loss": 0.0, "step": 63304 }, { "epoch": 59.06, "learning_rate": 4.261800373134329e-05, "loss": 0.0001, "step": 63308 }, { "epoch": 59.06, "learning_rate": 4.261753731343284e-05, "loss": 0.0, "step": 63312 }, { "epoch": 59.06, "learning_rate": 4.2617070895522385e-05, "loss": 0.0054, "step": 63316 }, { "epoch": 59.07, "learning_rate": 4.261660447761194e-05, "loss": 0.0, "step": 63320 }, { "epoch": 59.07, "learning_rate": 4.2616138059701495e-05, "loss": 0.0, "step": 63324 }, { "epoch": 59.07, "learning_rate": 4.261567164179105e-05, "loss": 0.0, "step": 63328 }, { "epoch": 59.08, "learning_rate": 4.26152052238806e-05, "loss": 0.0, "step": 63332 }, { "epoch": 59.08, "learning_rate": 4.261473880597015e-05, "loss": 0.0, "step": 63336 }, { "epoch": 59.09, "learning_rate": 4.261427238805971e-05, "loss": 0.0, "step": 63340 }, { "epoch": 59.09, "learning_rate": 4.2613805970149256e-05, "loss": 0.0007, "step": 63344 }, { "epoch": 59.09, "learning_rate": 4.2613339552238804e-05, "loss": 0.0, "step": 63348 }, { "epoch": 59.1, "learning_rate": 4.2612873134328366e-05, "loss": 0.0, "step": 63352 }, { "epoch": 59.1, "learning_rate": 4.2612406716417914e-05, "loss": 0.0, "step": 63356 }, { "epoch": 59.1, "learning_rate": 4.261194029850746e-05, "loss": 0.0, "step": 63360 }, { "epoch": 59.11, "learning_rate": 4.261147388059702e-05, "loss": 0.0, "step": 63364 }, { "epoch": 59.11, "learning_rate": 4.261100746268657e-05, "loss": 0.0, "step": 63368 }, { "epoch": 59.12, "learning_rate": 4.261054104477612e-05, "loss": 0.0001, "step": 63372 }, { "epoch": 59.12, "learning_rate": 4.2610074626865675e-05, "loss": 0.0, "step": 63376 }, { "epoch": 59.12, "learning_rate": 4.260960820895522e-05, "loss": 0.0, "step": 63380 }, { "epoch": 59.13, "learning_rate": 4.260914179104478e-05, "loss": 0.0, "step": 63384 }, { "epoch": 59.13, "learning_rate": 4.260867537313433e-05, "loss": 0.0007, "step": 63388 }, { "epoch": 59.13, "learning_rate": 4.260820895522388e-05, "loss": 0.0001, "step": 63392 }, { "epoch": 59.14, "learning_rate": 4.2607742537313436e-05, "loss": 0.0, "step": 63396 }, { "epoch": 59.14, "learning_rate": 4.260727611940299e-05, "loss": 0.0002, "step": 63400 }, { "epoch": 59.15, "learning_rate": 4.260680970149254e-05, "loss": 0.0, "step": 63404 }, { "epoch": 59.15, "learning_rate": 4.260634328358209e-05, "loss": 0.0, "step": 63408 }, { "epoch": 59.15, "learning_rate": 4.260587686567165e-05, "loss": 0.0019, "step": 63412 }, { "epoch": 59.16, "learning_rate": 4.26054104477612e-05, "loss": 0.0, "step": 63416 }, { "epoch": 59.16, "learning_rate": 4.2604944029850745e-05, "loss": 0.006, "step": 63420 }, { "epoch": 59.16, "learning_rate": 4.26044776119403e-05, "loss": 0.0, "step": 63424 }, { "epoch": 59.17, "learning_rate": 4.2604011194029855e-05, "loss": 0.0001, "step": 63428 }, { "epoch": 59.17, "learning_rate": 4.26035447761194e-05, "loss": 0.0008, "step": 63432 }, { "epoch": 59.18, "learning_rate": 4.260307835820896e-05, "loss": 0.0, "step": 63436 }, { "epoch": 59.18, "learning_rate": 4.2602611940298506e-05, "loss": 0.0032, "step": 63440 }, { "epoch": 59.18, "learning_rate": 4.260214552238806e-05, "loss": 0.0004, "step": 63444 }, { "epoch": 59.19, "learning_rate": 4.2601679104477616e-05, "loss": 0.0, "step": 63448 }, { "epoch": 59.19, "learning_rate": 4.2601212686567164e-05, "loss": 0.0, "step": 63452 }, { "epoch": 59.19, "learning_rate": 4.260074626865672e-05, "loss": 0.0, "step": 63456 }, { "epoch": 59.2, "learning_rate": 4.2600279850746274e-05, "loss": 0.0, "step": 63460 }, { "epoch": 59.2, "learning_rate": 4.259981343283582e-05, "loss": 0.0, "step": 63464 }, { "epoch": 59.21, "learning_rate": 4.259934701492537e-05, "loss": 0.0001, "step": 63468 }, { "epoch": 59.21, "learning_rate": 4.259888059701493e-05, "loss": 0.0001, "step": 63472 }, { "epoch": 59.21, "learning_rate": 4.259841417910448e-05, "loss": 0.0, "step": 63476 }, { "epoch": 59.22, "learning_rate": 4.259794776119403e-05, "loss": 0.0, "step": 63480 }, { "epoch": 59.22, "learning_rate": 4.259748134328358e-05, "loss": 0.0, "step": 63484 }, { "epoch": 59.22, "learning_rate": 4.259701492537314e-05, "loss": 0.0, "step": 63488 }, { "epoch": 59.23, "learning_rate": 4.259654850746269e-05, "loss": 0.0, "step": 63492 }, { "epoch": 59.23, "learning_rate": 4.259608208955224e-05, "loss": 0.0001, "step": 63496 }, { "epoch": 59.24, "learning_rate": 4.259561567164179e-05, "loss": 0.0002, "step": 63500 }, { "epoch": 59.24, "eval_exact_match": 0.746615087040619, "eval_exec": 0.7872340425531915, "eval_loss": 0.48403027653694153, "eval_runtime": 1101.428, "eval_samples_per_second": 0.939, "step": 63500 }, { "epoch": 59.24, "learning_rate": 4.259514925373135e-05, "loss": 0.0, "step": 63504 }, { "epoch": 59.24, "learning_rate": 4.25946828358209e-05, "loss": 0.0001, "step": 63508 }, { "epoch": 59.25, "learning_rate": 4.259421641791045e-05, "loss": 0.0101, "step": 63512 }, { "epoch": 59.25, "learning_rate": 4.259375e-05, "loss": 0.0, "step": 63516 }, { "epoch": 59.25, "learning_rate": 4.259328358208956e-05, "loss": 0.0, "step": 63520 }, { "epoch": 59.26, "learning_rate": 4.2592817164179105e-05, "loss": 0.0, "step": 63524 }, { "epoch": 59.26, "learning_rate": 4.259235074626866e-05, "loss": 0.0, "step": 63528 }, { "epoch": 59.26, "learning_rate": 4.259188432835821e-05, "loss": 0.0, "step": 63532 }, { "epoch": 59.27, "learning_rate": 4.259141791044776e-05, "loss": 0.0, "step": 63536 }, { "epoch": 59.27, "learning_rate": 4.259095149253732e-05, "loss": 0.0, "step": 63540 }, { "epoch": 59.28, "learning_rate": 4.2590485074626866e-05, "loss": 0.0, "step": 63544 }, { "epoch": 59.28, "learning_rate": 4.259001865671642e-05, "loss": 0.0, "step": 63548 }, { "epoch": 59.28, "learning_rate": 4.2589552238805976e-05, "loss": 0.0, "step": 63552 }, { "epoch": 59.29, "learning_rate": 4.2589085820895524e-05, "loss": 0.0, "step": 63556 }, { "epoch": 59.29, "learning_rate": 4.258861940298507e-05, "loss": 0.0, "step": 63560 }, { "epoch": 59.29, "learning_rate": 4.2588152985074634e-05, "loss": 0.0, "step": 63564 }, { "epoch": 59.3, "learning_rate": 4.258768656716418e-05, "loss": 0.0011, "step": 63568 }, { "epoch": 59.3, "learning_rate": 4.258722014925373e-05, "loss": 0.0001, "step": 63572 }, { "epoch": 59.31, "learning_rate": 4.2586753731343285e-05, "loss": 0.0, "step": 63576 }, { "epoch": 59.31, "learning_rate": 4.258628731343284e-05, "loss": 0.0002, "step": 63580 }, { "epoch": 59.31, "learning_rate": 4.258582089552239e-05, "loss": 0.0, "step": 63584 }, { "epoch": 59.32, "learning_rate": 4.258535447761194e-05, "loss": 0.0, "step": 63588 }, { "epoch": 59.32, "learning_rate": 4.258488805970149e-05, "loss": 0.0, "step": 63592 }, { "epoch": 59.32, "learning_rate": 4.2584421641791046e-05, "loss": 0.0, "step": 63596 }, { "epoch": 59.33, "learning_rate": 4.25839552238806e-05, "loss": 0.0, "step": 63600 }, { "epoch": 59.33, "learning_rate": 4.258348880597015e-05, "loss": 0.0001, "step": 63604 }, { "epoch": 59.34, "learning_rate": 4.2583022388059704e-05, "loss": 0.0, "step": 63608 }, { "epoch": 59.34, "learning_rate": 4.258255597014926e-05, "loss": 0.0, "step": 63612 }, { "epoch": 59.34, "learning_rate": 4.258208955223881e-05, "loss": 0.0006, "step": 63616 }, { "epoch": 59.35, "learning_rate": 4.2581623134328355e-05, "loss": 0.0001, "step": 63620 }, { "epoch": 59.35, "learning_rate": 4.258115671641792e-05, "loss": 0.0, "step": 63624 }, { "epoch": 59.35, "learning_rate": 4.2580690298507465e-05, "loss": 0.0001, "step": 63628 }, { "epoch": 59.36, "learning_rate": 4.258022388059701e-05, "loss": 0.0, "step": 63632 }, { "epoch": 59.36, "learning_rate": 4.257975746268657e-05, "loss": 0.0, "step": 63636 }, { "epoch": 59.37, "learning_rate": 4.257929104477612e-05, "loss": 0.0001, "step": 63640 }, { "epoch": 59.37, "learning_rate": 4.257882462686567e-05, "loss": 0.0, "step": 63644 }, { "epoch": 59.37, "learning_rate": 4.2578358208955226e-05, "loss": 0.0006, "step": 63648 }, { "epoch": 59.38, "learning_rate": 4.2577891791044774e-05, "loss": 0.0, "step": 63652 }, { "epoch": 59.38, "learning_rate": 4.2577425373134336e-05, "loss": 0.0011, "step": 63656 }, { "epoch": 59.38, "learning_rate": 4.2576958955223884e-05, "loss": 0.0, "step": 63660 }, { "epoch": 59.39, "learning_rate": 4.257649253731343e-05, "loss": 0.0001, "step": 63664 }, { "epoch": 59.39, "learning_rate": 4.257602611940299e-05, "loss": 0.0, "step": 63668 }, { "epoch": 59.4, "learning_rate": 4.257555970149254e-05, "loss": 0.0, "step": 63672 }, { "epoch": 59.4, "learning_rate": 4.257509328358209e-05, "loss": 0.0, "step": 63676 }, { "epoch": 59.4, "learning_rate": 4.2574626865671645e-05, "loss": 0.0, "step": 63680 }, { "epoch": 59.41, "learning_rate": 4.25741604477612e-05, "loss": 0.0, "step": 63684 }, { "epoch": 59.41, "learning_rate": 4.257369402985075e-05, "loss": 0.0, "step": 63688 }, { "epoch": 59.41, "learning_rate": 4.25732276119403e-05, "loss": 0.0004, "step": 63692 }, { "epoch": 59.42, "learning_rate": 4.257276119402985e-05, "loss": 0.0, "step": 63696 }, { "epoch": 59.42, "learning_rate": 4.2572294776119406e-05, "loss": 0.0, "step": 63700 }, { "epoch": 59.43, "learning_rate": 4.257182835820896e-05, "loss": 0.0001, "step": 63704 }, { "epoch": 59.43, "learning_rate": 4.257136194029851e-05, "loss": 0.0001, "step": 63708 }, { "epoch": 59.43, "learning_rate": 4.257089552238806e-05, "loss": 0.0001, "step": 63712 }, { "epoch": 59.44, "learning_rate": 4.257042910447762e-05, "loss": 0.0, "step": 63716 }, { "epoch": 59.44, "learning_rate": 4.256996268656717e-05, "loss": 0.0, "step": 63720 }, { "epoch": 59.44, "learning_rate": 4.2569496268656715e-05, "loss": 0.0, "step": 63724 }, { "epoch": 59.45, "learning_rate": 4.256902985074627e-05, "loss": 0.0, "step": 63728 }, { "epoch": 59.45, "learning_rate": 4.2568563432835825e-05, "loss": 0.0, "step": 63732 }, { "epoch": 59.46, "learning_rate": 4.256809701492537e-05, "loss": 0.0, "step": 63736 }, { "epoch": 59.46, "learning_rate": 4.256763059701493e-05, "loss": 0.0001, "step": 63740 }, { "epoch": 59.46, "learning_rate": 4.256716417910448e-05, "loss": 0.0, "step": 63744 }, { "epoch": 59.47, "learning_rate": 4.256669776119403e-05, "loss": 0.0, "step": 63748 }, { "epoch": 59.47, "learning_rate": 4.2566231343283586e-05, "loss": 0.0001, "step": 63752 }, { "epoch": 59.47, "learning_rate": 4.2565764925373134e-05, "loss": 0.0, "step": 63756 }, { "epoch": 59.48, "learning_rate": 4.256529850746269e-05, "loss": 0.001, "step": 63760 }, { "epoch": 59.48, "learning_rate": 4.2564832089552244e-05, "loss": 0.0, "step": 63764 }, { "epoch": 59.49, "learning_rate": 4.256436567164179e-05, "loss": 0.0001, "step": 63768 }, { "epoch": 59.49, "learning_rate": 4.256389925373134e-05, "loss": 0.0001, "step": 63772 }, { "epoch": 59.49, "learning_rate": 4.25634328358209e-05, "loss": 0.0, "step": 63776 }, { "epoch": 59.5, "learning_rate": 4.256296641791045e-05, "loss": 0.0006, "step": 63780 }, { "epoch": 59.5, "learning_rate": 4.25625e-05, "loss": 0.0, "step": 63784 }, { "epoch": 59.5, "learning_rate": 4.256203358208955e-05, "loss": 0.0, "step": 63788 }, { "epoch": 59.51, "learning_rate": 4.256156716417911e-05, "loss": 0.0002, "step": 63792 }, { "epoch": 59.51, "learning_rate": 4.2561100746268656e-05, "loss": 0.0001, "step": 63796 }, { "epoch": 59.51, "learning_rate": 4.256063432835821e-05, "loss": 0.0, "step": 63800 }, { "epoch": 59.52, "learning_rate": 4.2560167910447766e-05, "loss": 0.0, "step": 63804 }, { "epoch": 59.52, "learning_rate": 4.2559701492537314e-05, "loss": 0.0, "step": 63808 }, { "epoch": 59.53, "learning_rate": 4.255923507462687e-05, "loss": 0.0, "step": 63812 }, { "epoch": 59.53, "learning_rate": 4.255876865671642e-05, "loss": 0.0, "step": 63816 }, { "epoch": 59.53, "learning_rate": 4.255830223880597e-05, "loss": 0.0, "step": 63820 }, { "epoch": 59.54, "learning_rate": 4.2557835820895527e-05, "loss": 0.0, "step": 63824 }, { "epoch": 59.54, "learning_rate": 4.2557369402985075e-05, "loss": 0.0, "step": 63828 }, { "epoch": 59.54, "learning_rate": 4.255690298507463e-05, "loss": 0.0, "step": 63832 }, { "epoch": 59.55, "learning_rate": 4.2556436567164184e-05, "loss": 0.0, "step": 63836 }, { "epoch": 59.55, "learning_rate": 4.255597014925373e-05, "loss": 0.0, "step": 63840 }, { "epoch": 59.56, "learning_rate": 4.255550373134329e-05, "loss": 0.0001, "step": 63844 }, { "epoch": 59.56, "learning_rate": 4.2555037313432836e-05, "loss": 0.0, "step": 63848 }, { "epoch": 59.56, "learning_rate": 4.255457089552239e-05, "loss": 0.0, "step": 63852 }, { "epoch": 59.57, "learning_rate": 4.2554104477611945e-05, "loss": 0.0, "step": 63856 }, { "epoch": 59.57, "learning_rate": 4.2553638059701494e-05, "loss": 0.0, "step": 63860 }, { "epoch": 59.57, "learning_rate": 4.255317164179105e-05, "loss": 0.0, "step": 63864 }, { "epoch": 59.58, "learning_rate": 4.2552705223880603e-05, "loss": 0.0, "step": 63868 }, { "epoch": 59.58, "learning_rate": 4.255223880597015e-05, "loss": 0.0, "step": 63872 }, { "epoch": 59.59, "learning_rate": 4.25517723880597e-05, "loss": 0.0, "step": 63876 }, { "epoch": 59.59, "learning_rate": 4.2551305970149255e-05, "loss": 0.0, "step": 63880 }, { "epoch": 59.59, "learning_rate": 4.255083955223881e-05, "loss": 0.0, "step": 63884 }, { "epoch": 59.6, "learning_rate": 4.255037313432836e-05, "loss": 0.0004, "step": 63888 }, { "epoch": 59.6, "learning_rate": 4.254990671641791e-05, "loss": 0.0004, "step": 63892 }, { "epoch": 59.6, "learning_rate": 4.254944029850747e-05, "loss": 0.0003, "step": 63896 }, { "epoch": 59.61, "learning_rate": 4.2548973880597016e-05, "loss": 0.0, "step": 63900 }, { "epoch": 59.61, "learning_rate": 4.254850746268657e-05, "loss": 0.0, "step": 63904 }, { "epoch": 59.62, "learning_rate": 4.254804104477612e-05, "loss": 0.0, "step": 63908 }, { "epoch": 59.62, "learning_rate": 4.2547574626865674e-05, "loss": 0.0, "step": 63912 }, { "epoch": 59.62, "learning_rate": 4.254710820895523e-05, "loss": 0.0006, "step": 63916 }, { "epoch": 59.63, "learning_rate": 4.2546641791044777e-05, "loss": 0.0, "step": 63920 }, { "epoch": 59.63, "learning_rate": 4.254617537313433e-05, "loss": 0.0, "step": 63924 }, { "epoch": 59.63, "learning_rate": 4.2545708955223886e-05, "loss": 0.0, "step": 63928 }, { "epoch": 59.64, "learning_rate": 4.2545242537313435e-05, "loss": 0.0, "step": 63932 }, { "epoch": 59.64, "learning_rate": 4.254477611940298e-05, "loss": 0.0002, "step": 63936 }, { "epoch": 59.65, "learning_rate": 4.254430970149254e-05, "loss": 0.0, "step": 63940 }, { "epoch": 59.65, "learning_rate": 4.254384328358209e-05, "loss": 0.0, "step": 63944 }, { "epoch": 59.65, "learning_rate": 4.254337686567164e-05, "loss": 0.0, "step": 63948 }, { "epoch": 59.66, "learning_rate": 4.2542910447761195e-05, "loss": 0.0, "step": 63952 }, { "epoch": 59.66, "learning_rate": 4.254244402985075e-05, "loss": 0.0, "step": 63956 }, { "epoch": 59.66, "learning_rate": 4.25419776119403e-05, "loss": 0.0, "step": 63960 }, { "epoch": 59.67, "learning_rate": 4.2541511194029853e-05, "loss": 0.0, "step": 63964 }, { "epoch": 59.67, "learning_rate": 4.25410447761194e-05, "loss": 0.0002, "step": 63968 }, { "epoch": 59.68, "learning_rate": 4.2540578358208956e-05, "loss": 0.0, "step": 63972 }, { "epoch": 59.68, "learning_rate": 4.254011194029851e-05, "loss": 0.0, "step": 63976 }, { "epoch": 59.68, "learning_rate": 4.253964552238806e-05, "loss": 0.0001, "step": 63980 }, { "epoch": 59.69, "learning_rate": 4.2539179104477614e-05, "loss": 0.0001, "step": 63984 }, { "epoch": 59.69, "learning_rate": 4.253871268656717e-05, "loss": 0.0, "step": 63988 }, { "epoch": 59.69, "learning_rate": 4.253824626865672e-05, "loss": 0.0, "step": 63992 }, { "epoch": 59.7, "learning_rate": 4.253777985074627e-05, "loss": 0.0, "step": 63996 }, { "epoch": 59.7, "learning_rate": 4.253731343283582e-05, "loss": 0.0014, "step": 64000 }, { "epoch": 59.7, "eval_exact_match": 0.7437137330754352, "eval_exec": 0.7891682785299806, "eval_loss": 0.5204067826271057, "eval_runtime": 1133.3553, "eval_samples_per_second": 0.912, "step": 64000 }, { "epoch": 59.71, "learning_rate": 4.2536847014925375e-05, "loss": 0.0001, "step": 64004 }, { "epoch": 59.71, "learning_rate": 4.253638059701493e-05, "loss": 0.0, "step": 64008 }, { "epoch": 59.71, "learning_rate": 4.253591417910448e-05, "loss": 0.0, "step": 64012 }, { "epoch": 59.72, "learning_rate": 4.253544776119403e-05, "loss": 0.0, "step": 64016 }, { "epoch": 59.72, "learning_rate": 4.253498134328359e-05, "loss": 0.0, "step": 64020 }, { "epoch": 59.72, "learning_rate": 4.2534514925373136e-05, "loss": 0.003, "step": 64024 }, { "epoch": 59.73, "learning_rate": 4.2534048507462685e-05, "loss": 0.0, "step": 64028 }, { "epoch": 59.73, "learning_rate": 4.2533582089552246e-05, "loss": 0.0016, "step": 64032 }, { "epoch": 59.73, "learning_rate": 4.2533115671641794e-05, "loss": 0.0001, "step": 64036 }, { "epoch": 59.74, "learning_rate": 4.253264925373134e-05, "loss": 0.0, "step": 64040 }, { "epoch": 59.74, "learning_rate": 4.25321828358209e-05, "loss": 0.0, "step": 64044 }, { "epoch": 59.75, "learning_rate": 4.253171641791045e-05, "loss": 0.0014, "step": 64048 }, { "epoch": 59.75, "learning_rate": 4.253125e-05, "loss": 0.0, "step": 64052 }, { "epoch": 59.75, "learning_rate": 4.2530783582089555e-05, "loss": 0.0, "step": 64056 }, { "epoch": 59.76, "learning_rate": 4.2530317164179103e-05, "loss": 0.0, "step": 64060 }, { "epoch": 59.76, "learning_rate": 4.252985074626866e-05, "loss": 0.0, "step": 64064 }, { "epoch": 59.76, "learning_rate": 4.252938432835821e-05, "loss": 0.0, "step": 64068 }, { "epoch": 59.77, "learning_rate": 4.252891791044776e-05, "loss": 0.0, "step": 64072 }, { "epoch": 59.77, "learning_rate": 4.2528451492537316e-05, "loss": 0.0, "step": 64076 }, { "epoch": 59.78, "learning_rate": 4.252798507462687e-05, "loss": 0.0, "step": 64080 }, { "epoch": 59.78, "learning_rate": 4.252751865671642e-05, "loss": 0.0002, "step": 64084 }, { "epoch": 59.78, "learning_rate": 4.252705223880597e-05, "loss": 0.0, "step": 64088 }, { "epoch": 59.79, "learning_rate": 4.252658582089553e-05, "loss": 0.0, "step": 64092 }, { "epoch": 59.79, "learning_rate": 4.252611940298508e-05, "loss": 0.0, "step": 64096 }, { "epoch": 59.79, "learning_rate": 4.2525652985074625e-05, "loss": 0.0, "step": 64100 }, { "epoch": 59.8, "learning_rate": 4.252518656716418e-05, "loss": 0.0, "step": 64104 }, { "epoch": 59.8, "learning_rate": 4.2524720149253735e-05, "loss": 0.0, "step": 64108 }, { "epoch": 59.81, "learning_rate": 4.252425373134328e-05, "loss": 0.0, "step": 64112 }, { "epoch": 59.81, "learning_rate": 4.252378731343284e-05, "loss": 0.0, "step": 64116 }, { "epoch": 59.81, "learning_rate": 4.2523320895522386e-05, "loss": 0.0, "step": 64120 }, { "epoch": 59.82, "learning_rate": 4.252285447761194e-05, "loss": 0.0, "step": 64124 }, { "epoch": 59.82, "learning_rate": 4.2522388059701496e-05, "loss": 0.0, "step": 64128 }, { "epoch": 59.82, "learning_rate": 4.2521921641791044e-05, "loss": 0.0, "step": 64132 }, { "epoch": 59.83, "learning_rate": 4.25214552238806e-05, "loss": 0.0001, "step": 64136 }, { "epoch": 59.83, "learning_rate": 4.2520988805970154e-05, "loss": 0.0, "step": 64140 }, { "epoch": 59.84, "learning_rate": 4.25205223880597e-05, "loss": 0.0, "step": 64144 }, { "epoch": 59.84, "learning_rate": 4.252005597014926e-05, "loss": 0.0, "step": 64148 }, { "epoch": 59.84, "learning_rate": 4.251958955223881e-05, "loss": 0.0, "step": 64152 }, { "epoch": 59.85, "learning_rate": 4.251912313432836e-05, "loss": 0.0, "step": 64156 }, { "epoch": 59.85, "learning_rate": 4.2518656716417915e-05, "loss": 0.0, "step": 64160 }, { "epoch": 59.85, "learning_rate": 4.251819029850746e-05, "loss": 0.001, "step": 64164 }, { "epoch": 59.86, "learning_rate": 4.251772388059702e-05, "loss": 0.0001, "step": 64168 }, { "epoch": 59.86, "learning_rate": 4.251725746268657e-05, "loss": 0.0, "step": 64172 }, { "epoch": 59.87, "learning_rate": 4.251679104477612e-05, "loss": 0.0, "step": 64176 }, { "epoch": 59.87, "learning_rate": 4.251632462686567e-05, "loss": 0.0069, "step": 64180 }, { "epoch": 59.87, "learning_rate": 4.251585820895523e-05, "loss": 0.0, "step": 64184 }, { "epoch": 59.88, "learning_rate": 4.251539179104478e-05, "loss": 0.0, "step": 64188 }, { "epoch": 59.88, "learning_rate": 4.251492537313433e-05, "loss": 0.0004, "step": 64192 }, { "epoch": 59.88, "learning_rate": 4.251445895522388e-05, "loss": 0.0, "step": 64196 }, { "epoch": 59.89, "learning_rate": 4.251399253731344e-05, "loss": 0.0, "step": 64200 }, { "epoch": 59.89, "learning_rate": 4.2513526119402985e-05, "loss": 0.0, "step": 64204 }, { "epoch": 59.9, "learning_rate": 4.251305970149254e-05, "loss": 0.0, "step": 64208 }, { "epoch": 59.9, "learning_rate": 4.2512593283582095e-05, "loss": 0.0006, "step": 64212 }, { "epoch": 59.9, "learning_rate": 4.251212686567164e-05, "loss": 0.0, "step": 64216 }, { "epoch": 59.91, "learning_rate": 4.25116604477612e-05, "loss": 0.0, "step": 64220 }, { "epoch": 59.91, "learning_rate": 4.2511194029850746e-05, "loss": 0.0, "step": 64224 }, { "epoch": 59.91, "learning_rate": 4.25107276119403e-05, "loss": 0.0, "step": 64228 }, { "epoch": 59.92, "learning_rate": 4.2510261194029856e-05, "loss": 0.0001, "step": 64232 }, { "epoch": 59.92, "learning_rate": 4.2509794776119404e-05, "loss": 0.0, "step": 64236 }, { "epoch": 59.93, "learning_rate": 4.250932835820895e-05, "loss": 0.0, "step": 64240 }, { "epoch": 59.93, "learning_rate": 4.2508861940298514e-05, "loss": 0.0, "step": 64244 }, { "epoch": 59.93, "learning_rate": 4.250839552238806e-05, "loss": 0.0004, "step": 64248 }, { "epoch": 59.94, "learning_rate": 4.250792910447761e-05, "loss": 0.0001, "step": 64252 }, { "epoch": 59.94, "learning_rate": 4.2507462686567165e-05, "loss": 0.0, "step": 64256 }, { "epoch": 59.94, "learning_rate": 4.250699626865672e-05, "loss": 0.0, "step": 64260 }, { "epoch": 59.95, "learning_rate": 4.250652985074627e-05, "loss": 0.0, "step": 64264 }, { "epoch": 59.95, "learning_rate": 4.250606343283582e-05, "loss": 0.0, "step": 64268 }, { "epoch": 59.96, "learning_rate": 4.250559701492537e-05, "loss": 0.0003, "step": 64272 }, { "epoch": 59.96, "learning_rate": 4.2505130597014926e-05, "loss": 0.0, "step": 64276 }, { "epoch": 59.96, "learning_rate": 4.250466417910448e-05, "loss": 0.0, "step": 64280 }, { "epoch": 59.97, "learning_rate": 4.250419776119403e-05, "loss": 0.0, "step": 64284 }, { "epoch": 59.97, "learning_rate": 4.2503731343283584e-05, "loss": 0.0, "step": 64288 }, { "epoch": 59.97, "learning_rate": 4.250326492537314e-05, "loss": 0.0, "step": 64292 }, { "epoch": 59.98, "learning_rate": 4.250279850746269e-05, "loss": 0.0001, "step": 64296 }, { "epoch": 59.98, "learning_rate": 4.2502332089552235e-05, "loss": 0.0, "step": 64300 }, { "epoch": 59.98, "learning_rate": 4.25018656716418e-05, "loss": 0.0, "step": 64304 }, { "epoch": 59.99, "learning_rate": 4.2501399253731345e-05, "loss": 0.0, "step": 64308 }, { "epoch": 59.99, "learning_rate": 4.25009328358209e-05, "loss": 0.0, "step": 64312 }, { "epoch": 60.0, "learning_rate": 4.250046641791045e-05, "loss": 0.0, "step": 64316 }, { "epoch": 60.0, "learning_rate": 4.25e-05, "loss": 0.0, "step": 64320 }, { "epoch": 60.0, "learning_rate": 4.249953358208956e-05, "loss": 0.0, "step": 64324 }, { "epoch": 60.01, "learning_rate": 4.2499067164179106e-05, "loss": 0.0001, "step": 64328 }, { "epoch": 60.01, "learning_rate": 4.2498600746268654e-05, "loss": 0.0, "step": 64332 }, { "epoch": 60.01, "learning_rate": 4.2498134328358216e-05, "loss": 0.0, "step": 64336 }, { "epoch": 60.02, "learning_rate": 4.2497667910447764e-05, "loss": 0.0, "step": 64340 }, { "epoch": 60.02, "learning_rate": 4.249720149253731e-05, "loss": 0.0019, "step": 64344 }, { "epoch": 60.03, "learning_rate": 4.249673507462687e-05, "loss": 0.0013, "step": 64348 }, { "epoch": 60.03, "learning_rate": 4.249626865671642e-05, "loss": 0.0, "step": 64352 }, { "epoch": 60.03, "learning_rate": 4.249580223880597e-05, "loss": 0.0, "step": 64356 }, { "epoch": 60.04, "learning_rate": 4.2495335820895525e-05, "loss": 0.0, "step": 64360 }, { "epoch": 60.04, "learning_rate": 4.249486940298508e-05, "loss": 0.0, "step": 64364 }, { "epoch": 60.04, "learning_rate": 4.249440298507463e-05, "loss": 0.0, "step": 64368 }, { "epoch": 60.05, "learning_rate": 4.249393656716418e-05, "loss": 0.0, "step": 64372 }, { "epoch": 60.05, "learning_rate": 4.249347014925373e-05, "loss": 0.0, "step": 64376 }, { "epoch": 60.06, "learning_rate": 4.2493003731343286e-05, "loss": 0.0, "step": 64380 }, { "epoch": 60.06, "learning_rate": 4.249253731343284e-05, "loss": 0.0, "step": 64384 }, { "epoch": 60.06, "learning_rate": 4.249207089552239e-05, "loss": 0.0, "step": 64388 }, { "epoch": 60.07, "learning_rate": 4.249160447761194e-05, "loss": 0.0, "step": 64392 }, { "epoch": 60.07, "learning_rate": 4.24911380597015e-05, "loss": 0.0, "step": 64396 }, { "epoch": 60.07, "learning_rate": 4.249067164179105e-05, "loss": 0.0014, "step": 64400 }, { "epoch": 60.08, "learning_rate": 4.2490205223880595e-05, "loss": 0.0, "step": 64404 }, { "epoch": 60.08, "learning_rate": 4.248973880597015e-05, "loss": 0.0, "step": 64408 }, { "epoch": 60.09, "learning_rate": 4.2489272388059705e-05, "loss": 0.0, "step": 64412 }, { "epoch": 60.09, "learning_rate": 4.248880597014925e-05, "loss": 0.0, "step": 64416 }, { "epoch": 60.09, "learning_rate": 4.248833955223881e-05, "loss": 0.0, "step": 64420 }, { "epoch": 60.1, "learning_rate": 4.248787313432836e-05, "loss": 0.0001, "step": 64424 }, { "epoch": 60.1, "learning_rate": 4.248740671641791e-05, "loss": 0.0, "step": 64428 }, { "epoch": 60.1, "learning_rate": 4.2486940298507466e-05, "loss": 0.0001, "step": 64432 }, { "epoch": 60.11, "learning_rate": 4.2486473880597014e-05, "loss": 0.0, "step": 64436 }, { "epoch": 60.11, "learning_rate": 4.248600746268657e-05, "loss": 0.0, "step": 64440 }, { "epoch": 60.12, "learning_rate": 4.2485541044776124e-05, "loss": 0.0, "step": 64444 }, { "epoch": 60.12, "learning_rate": 4.248507462686567e-05, "loss": 0.0, "step": 64448 }, { "epoch": 60.12, "learning_rate": 4.248460820895522e-05, "loss": 0.0, "step": 64452 }, { "epoch": 60.13, "learning_rate": 4.248414179104478e-05, "loss": 0.0, "step": 64456 }, { "epoch": 60.13, "learning_rate": 4.248367537313433e-05, "loss": 0.0, "step": 64460 }, { "epoch": 60.13, "learning_rate": 4.248320895522388e-05, "loss": 0.0, "step": 64464 }, { "epoch": 60.14, "learning_rate": 4.248274253731343e-05, "loss": 0.0, "step": 64468 }, { "epoch": 60.14, "learning_rate": 4.248227611940299e-05, "loss": 0.0, "step": 64472 }, { "epoch": 60.15, "learning_rate": 4.248180970149254e-05, "loss": 0.0, "step": 64476 }, { "epoch": 60.15, "learning_rate": 4.248134328358209e-05, "loss": 0.0, "step": 64480 }, { "epoch": 60.15, "learning_rate": 4.2480876865671646e-05, "loss": 0.0, "step": 64484 }, { "epoch": 60.16, "learning_rate": 4.24804104477612e-05, "loss": 0.0, "step": 64488 }, { "epoch": 60.16, "learning_rate": 4.247994402985075e-05, "loss": 0.0, "step": 64492 }, { "epoch": 60.16, "learning_rate": 4.24794776119403e-05, "loss": 0.0002, "step": 64496 }, { "epoch": 60.17, "learning_rate": 4.247901119402985e-05, "loss": 0.0, "step": 64500 }, { "epoch": 60.17, "eval_exact_match": 0.7485493230174082, "eval_exec": 0.7911025145067698, "eval_loss": 0.5219013690948486, "eval_runtime": 1099.0283, "eval_samples_per_second": 0.941, "step": 64500 }, { "epoch": 60.17, "learning_rate": 4.247854477611941e-05, "loss": 0.0, "step": 64504 }, { "epoch": 60.18, "learning_rate": 4.2478078358208955e-05, "loss": 0.0, "step": 64508 }, { "epoch": 60.18, "learning_rate": 4.247761194029851e-05, "loss": 0.0005, "step": 64512 }, { "epoch": 60.18, "learning_rate": 4.2477145522388065e-05, "loss": 0.0, "step": 64516 }, { "epoch": 60.19, "learning_rate": 4.247667910447761e-05, "loss": 0.0007, "step": 64520 }, { "epoch": 60.19, "learning_rate": 4.247621268656717e-05, "loss": 0.0, "step": 64524 }, { "epoch": 60.19, "learning_rate": 4.2475746268656716e-05, "loss": 0.0, "step": 64528 }, { "epoch": 60.2, "learning_rate": 4.247527985074627e-05, "loss": 0.0046, "step": 64532 }, { "epoch": 60.2, "learning_rate": 4.2474813432835826e-05, "loss": 0.0001, "step": 64536 }, { "epoch": 60.21, "learning_rate": 4.2474347014925374e-05, "loss": 0.0, "step": 64540 }, { "epoch": 60.21, "learning_rate": 4.247388059701493e-05, "loss": 0.0003, "step": 64544 }, { "epoch": 60.21, "learning_rate": 4.2473414179104484e-05, "loss": 0.0, "step": 64548 }, { "epoch": 60.22, "learning_rate": 4.247294776119403e-05, "loss": 0.0, "step": 64552 }, { "epoch": 60.22, "learning_rate": 4.247248134328358e-05, "loss": 0.0, "step": 64556 }, { "epoch": 60.22, "learning_rate": 4.2472014925373135e-05, "loss": 0.0001, "step": 64560 }, { "epoch": 60.23, "learning_rate": 4.247154850746269e-05, "loss": 0.0, "step": 64564 }, { "epoch": 60.23, "learning_rate": 4.247108208955224e-05, "loss": 0.0, "step": 64568 }, { "epoch": 60.24, "learning_rate": 4.247061567164179e-05, "loss": 0.0, "step": 64572 }, { "epoch": 60.24, "learning_rate": 4.247014925373135e-05, "loss": 0.0, "step": 64576 }, { "epoch": 60.24, "learning_rate": 4.2469682835820896e-05, "loss": 0.0, "step": 64580 }, { "epoch": 60.25, "learning_rate": 4.246921641791045e-05, "loss": 0.0004, "step": 64584 }, { "epoch": 60.25, "learning_rate": 4.246875e-05, "loss": 0.0009, "step": 64588 }, { "epoch": 60.25, "learning_rate": 4.2468283582089554e-05, "loss": 0.0002, "step": 64592 }, { "epoch": 60.26, "learning_rate": 4.246781716417911e-05, "loss": 0.0, "step": 64596 }, { "epoch": 60.26, "learning_rate": 4.246735074626866e-05, "loss": 0.0, "step": 64600 }, { "epoch": 60.26, "learning_rate": 4.246688432835821e-05, "loss": 0.0, "step": 64604 }, { "epoch": 60.27, "learning_rate": 4.246641791044777e-05, "loss": 0.0008, "step": 64608 }, { "epoch": 60.27, "learning_rate": 4.2465951492537315e-05, "loss": 0.0, "step": 64612 }, { "epoch": 60.28, "learning_rate": 4.246548507462686e-05, "loss": 0.0037, "step": 64616 }, { "epoch": 60.28, "learning_rate": 4.246501865671642e-05, "loss": 0.0, "step": 64620 }, { "epoch": 60.28, "learning_rate": 4.246455223880597e-05, "loss": 0.0, "step": 64624 }, { "epoch": 60.29, "learning_rate": 4.246408582089552e-05, "loss": 0.0, "step": 64628 }, { "epoch": 60.29, "learning_rate": 4.2463619402985076e-05, "loss": 0.0, "step": 64632 }, { "epoch": 60.29, "learning_rate": 4.246315298507463e-05, "loss": 0.0007, "step": 64636 }, { "epoch": 60.3, "learning_rate": 4.2462686567164186e-05, "loss": 0.0002, "step": 64640 }, { "epoch": 60.3, "learning_rate": 4.2462220149253734e-05, "loss": 0.0046, "step": 64644 }, { "epoch": 60.31, "learning_rate": 4.246175373134328e-05, "loss": 0.0, "step": 64648 }, { "epoch": 60.31, "learning_rate": 4.2461287313432844e-05, "loss": 0.0001, "step": 64652 }, { "epoch": 60.31, "learning_rate": 4.246082089552239e-05, "loss": 0.0, "step": 64656 }, { "epoch": 60.32, "learning_rate": 4.246035447761194e-05, "loss": 0.0, "step": 64660 }, { "epoch": 60.32, "learning_rate": 4.2459888059701495e-05, "loss": 0.0, "step": 64664 }, { "epoch": 60.32, "learning_rate": 4.245942164179105e-05, "loss": 0.0, "step": 64668 }, { "epoch": 60.33, "learning_rate": 4.24589552238806e-05, "loss": 0.0, "step": 64672 }, { "epoch": 60.33, "learning_rate": 4.245848880597015e-05, "loss": 0.0, "step": 64676 }, { "epoch": 60.34, "learning_rate": 4.24580223880597e-05, "loss": 0.0, "step": 64680 }, { "epoch": 60.34, "learning_rate": 4.2457555970149256e-05, "loss": 0.0, "step": 64684 }, { "epoch": 60.34, "learning_rate": 4.245708955223881e-05, "loss": 0.0, "step": 64688 }, { "epoch": 60.35, "learning_rate": 4.245662313432836e-05, "loss": 0.0, "step": 64692 }, { "epoch": 60.35, "learning_rate": 4.2456156716417914e-05, "loss": 0.0, "step": 64696 }, { "epoch": 60.35, "learning_rate": 4.245569029850747e-05, "loss": 0.0, "step": 64700 }, { "epoch": 60.36, "learning_rate": 4.245522388059702e-05, "loss": 0.0, "step": 64704 }, { "epoch": 60.36, "learning_rate": 4.2454757462686565e-05, "loss": 0.0001, "step": 64708 }, { "epoch": 60.37, "learning_rate": 4.2454291044776127e-05, "loss": 0.0021, "step": 64712 }, { "epoch": 60.37, "learning_rate": 4.2453824626865675e-05, "loss": 0.0004, "step": 64716 }, { "epoch": 60.37, "learning_rate": 4.245335820895522e-05, "loss": 0.0, "step": 64720 }, { "epoch": 60.38, "learning_rate": 4.245289179104478e-05, "loss": 0.0001, "step": 64724 }, { "epoch": 60.38, "learning_rate": 4.245242537313433e-05, "loss": 0.0, "step": 64728 }, { "epoch": 60.38, "learning_rate": 4.245195895522388e-05, "loss": 0.0, "step": 64732 }, { "epoch": 60.39, "learning_rate": 4.2451492537313436e-05, "loss": 0.0, "step": 64736 }, { "epoch": 60.39, "learning_rate": 4.2451026119402984e-05, "loss": 0.0, "step": 64740 }, { "epoch": 60.4, "learning_rate": 4.245055970149254e-05, "loss": 0.0011, "step": 64744 }, { "epoch": 60.4, "learning_rate": 4.2450093283582094e-05, "loss": 0.0, "step": 64748 }, { "epoch": 60.4, "learning_rate": 4.244962686567164e-05, "loss": 0.0001, "step": 64752 }, { "epoch": 60.41, "learning_rate": 4.2449160447761197e-05, "loss": 0.0, "step": 64756 }, { "epoch": 60.41, "learning_rate": 4.244869402985075e-05, "loss": 0.0001, "step": 64760 }, { "epoch": 60.41, "learning_rate": 4.24482276119403e-05, "loss": 0.0, "step": 64764 }, { "epoch": 60.42, "learning_rate": 4.244776119402985e-05, "loss": 0.0, "step": 64768 }, { "epoch": 60.42, "learning_rate": 4.244729477611941e-05, "loss": 0.0, "step": 64772 }, { "epoch": 60.43, "learning_rate": 4.244682835820896e-05, "loss": 0.0, "step": 64776 }, { "epoch": 60.43, "learning_rate": 4.2446361940298506e-05, "loss": 0.0, "step": 64780 }, { "epoch": 60.43, "learning_rate": 4.244589552238806e-05, "loss": 0.0001, "step": 64784 }, { "epoch": 60.44, "learning_rate": 4.2445429104477616e-05, "loss": 0.0, "step": 64788 }, { "epoch": 60.44, "learning_rate": 4.2444962686567164e-05, "loss": 0.0007, "step": 64792 }, { "epoch": 60.44, "learning_rate": 4.244449626865672e-05, "loss": 0.0086, "step": 64796 }, { "epoch": 60.45, "learning_rate": 4.244402985074627e-05, "loss": 0.0002, "step": 64800 }, { "epoch": 60.45, "learning_rate": 4.244356343283583e-05, "loss": 0.0, "step": 64804 }, { "epoch": 60.46, "learning_rate": 4.2443097014925377e-05, "loss": 0.0, "step": 64808 }, { "epoch": 60.46, "learning_rate": 4.2442630597014925e-05, "loss": 0.0, "step": 64812 }, { "epoch": 60.46, "learning_rate": 4.244216417910448e-05, "loss": 0.0, "step": 64816 }, { "epoch": 60.47, "learning_rate": 4.2441697761194034e-05, "loss": 0.0, "step": 64820 }, { "epoch": 60.47, "learning_rate": 4.244123134328358e-05, "loss": 0.0, "step": 64824 }, { "epoch": 60.47, "learning_rate": 4.244076492537314e-05, "loss": 0.0, "step": 64828 }, { "epoch": 60.48, "learning_rate": 4.244029850746269e-05, "loss": 0.0, "step": 64832 }, { "epoch": 60.48, "learning_rate": 4.243983208955224e-05, "loss": 0.0001, "step": 64836 }, { "epoch": 60.49, "learning_rate": 4.2439365671641795e-05, "loss": 0.0, "step": 64840 }, { "epoch": 60.49, "learning_rate": 4.2438899253731344e-05, "loss": 0.0, "step": 64844 }, { "epoch": 60.49, "learning_rate": 4.24384328358209e-05, "loss": 0.0, "step": 64848 }, { "epoch": 60.5, "learning_rate": 4.2437966417910453e-05, "loss": 0.0002, "step": 64852 }, { "epoch": 60.5, "learning_rate": 4.24375e-05, "loss": 0.0002, "step": 64856 }, { "epoch": 60.5, "learning_rate": 4.243703358208955e-05, "loss": 0.0, "step": 64860 }, { "epoch": 60.51, "learning_rate": 4.243656716417911e-05, "loss": 0.0001, "step": 64864 }, { "epoch": 60.51, "learning_rate": 4.243610074626866e-05, "loss": 0.0, "step": 64868 }, { "epoch": 60.51, "learning_rate": 4.243563432835821e-05, "loss": 0.0, "step": 64872 }, { "epoch": 60.52, "learning_rate": 4.243516791044776e-05, "loss": 0.0, "step": 64876 }, { "epoch": 60.52, "learning_rate": 4.243470149253732e-05, "loss": 0.0, "step": 64880 }, { "epoch": 60.53, "learning_rate": 4.2434235074626866e-05, "loss": 0.0001, "step": 64884 }, { "epoch": 60.53, "learning_rate": 4.243376865671642e-05, "loss": 0.0, "step": 64888 }, { "epoch": 60.53, "learning_rate": 4.2433302238805975e-05, "loss": 0.0001, "step": 64892 }, { "epoch": 60.54, "learning_rate": 4.2432835820895524e-05, "loss": 0.0, "step": 64896 }, { "epoch": 60.54, "learning_rate": 4.243236940298508e-05, "loss": 0.0003, "step": 64900 }, { "epoch": 60.54, "learning_rate": 4.2431902985074627e-05, "loss": 0.0009, "step": 64904 }, { "epoch": 60.55, "learning_rate": 4.243143656716418e-05, "loss": 0.0, "step": 64908 }, { "epoch": 60.55, "learning_rate": 4.2430970149253736e-05, "loss": 0.0, "step": 64912 }, { "epoch": 60.56, "learning_rate": 4.2430503731343284e-05, "loss": 0.0002, "step": 64916 }, { "epoch": 60.56, "learning_rate": 4.243003731343283e-05, "loss": 0.0004, "step": 64920 }, { "epoch": 60.56, "learning_rate": 4.2429570895522394e-05, "loss": 0.0, "step": 64924 }, { "epoch": 60.57, "learning_rate": 4.242910447761194e-05, "loss": 0.0, "step": 64928 }, { "epoch": 60.57, "learning_rate": 4.242863805970149e-05, "loss": 0.0, "step": 64932 }, { "epoch": 60.57, "learning_rate": 4.2428171641791045e-05, "loss": 0.0, "step": 64936 }, { "epoch": 60.58, "learning_rate": 4.24277052238806e-05, "loss": 0.0, "step": 64940 }, { "epoch": 60.58, "learning_rate": 4.242723880597015e-05, "loss": 0.0001, "step": 64944 }, { "epoch": 60.59, "learning_rate": 4.2426772388059703e-05, "loss": 0.0001, "step": 64948 }, { "epoch": 60.59, "learning_rate": 4.242630597014925e-05, "loss": 0.0001, "step": 64952 }, { "epoch": 60.59, "learning_rate": 4.2425839552238806e-05, "loss": 0.0001, "step": 64956 }, { "epoch": 60.6, "learning_rate": 4.242537313432836e-05, "loss": 0.0, "step": 64960 }, { "epoch": 60.6, "learning_rate": 4.242490671641791e-05, "loss": 0.0003, "step": 64964 }, { "epoch": 60.6, "learning_rate": 4.2424440298507464e-05, "loss": 0.0, "step": 64968 }, { "epoch": 60.61, "learning_rate": 4.242397388059702e-05, "loss": 0.0, "step": 64972 }, { "epoch": 60.61, "learning_rate": 4.242350746268657e-05, "loss": 0.0002, "step": 64976 }, { "epoch": 60.62, "learning_rate": 4.242304104477612e-05, "loss": 0.0, "step": 64980 }, { "epoch": 60.62, "learning_rate": 4.242257462686568e-05, "loss": 0.0, "step": 64984 }, { "epoch": 60.62, "learning_rate": 4.2422108208955225e-05, "loss": 0.0, "step": 64988 }, { "epoch": 60.63, "learning_rate": 4.242164179104478e-05, "loss": 0.0, "step": 64992 }, { "epoch": 60.63, "learning_rate": 4.242117537313433e-05, "loss": 0.0, "step": 64996 }, { "epoch": 60.63, "learning_rate": 4.242070895522388e-05, "loss": 0.0, "step": 65000 }, { "epoch": 60.63, "eval_exact_match": 0.741779497098646, "eval_exec": 0.7852998065764023, "eval_loss": 0.5173532366752625, "eval_runtime": 1115.6158, "eval_samples_per_second": 0.927, "step": 65000 }, { "epoch": 60.64, "learning_rate": 4.242024253731344e-05, "loss": 0.0, "step": 65004 }, { "epoch": 60.64, "learning_rate": 4.2419776119402986e-05, "loss": 0.0, "step": 65008 }, { "epoch": 60.65, "learning_rate": 4.2419309701492535e-05, "loss": 0.0, "step": 65012 }, { "epoch": 60.65, "learning_rate": 4.2418843283582096e-05, "loss": 0.0, "step": 65016 }, { "epoch": 60.65, "learning_rate": 4.2418376865671644e-05, "loss": 0.0002, "step": 65020 }, { "epoch": 60.66, "learning_rate": 4.241791044776119e-05, "loss": 0.0001, "step": 65024 }, { "epoch": 60.66, "learning_rate": 4.241744402985075e-05, "loss": 0.0015, "step": 65028 }, { "epoch": 60.66, "learning_rate": 4.24169776119403e-05, "loss": 0.0004, "step": 65032 }, { "epoch": 60.67, "learning_rate": 4.241651119402985e-05, "loss": 0.0008, "step": 65036 }, { "epoch": 60.67, "learning_rate": 4.2416044776119405e-05, "loss": 0.0, "step": 65040 }, { "epoch": 60.68, "learning_rate": 4.241557835820896e-05, "loss": 0.0, "step": 65044 }, { "epoch": 60.68, "learning_rate": 4.241511194029851e-05, "loss": 0.0, "step": 65048 }, { "epoch": 60.68, "learning_rate": 4.241464552238806e-05, "loss": 0.0, "step": 65052 }, { "epoch": 60.69, "learning_rate": 4.241417910447761e-05, "loss": 0.0, "step": 65056 }, { "epoch": 60.69, "learning_rate": 4.2413712686567166e-05, "loss": 0.0001, "step": 65060 }, { "epoch": 60.69, "learning_rate": 4.241324626865672e-05, "loss": 0.0017, "step": 65064 }, { "epoch": 60.7, "learning_rate": 4.241277985074627e-05, "loss": 0.0, "step": 65068 }, { "epoch": 60.7, "learning_rate": 4.241231343283582e-05, "loss": 0.0, "step": 65072 }, { "epoch": 60.71, "learning_rate": 4.241184701492538e-05, "loss": 0.0, "step": 65076 }, { "epoch": 60.71, "learning_rate": 4.241138059701493e-05, "loss": 0.0002, "step": 65080 }, { "epoch": 60.71, "learning_rate": 4.2410914179104475e-05, "loss": 0.0, "step": 65084 }, { "epoch": 60.72, "learning_rate": 4.241044776119403e-05, "loss": 0.0, "step": 65088 }, { "epoch": 60.72, "learning_rate": 4.2409981343283585e-05, "loss": 0.0, "step": 65092 }, { "epoch": 60.72, "learning_rate": 4.240951492537313e-05, "loss": 0.0, "step": 65096 }, { "epoch": 60.73, "learning_rate": 4.240904850746269e-05, "loss": 0.0, "step": 65100 }, { "epoch": 60.73, "learning_rate": 4.240858208955224e-05, "loss": 0.0, "step": 65104 }, { "epoch": 60.73, "learning_rate": 4.240811567164179e-05, "loss": 0.0, "step": 65108 }, { "epoch": 60.74, "learning_rate": 4.2407649253731346e-05, "loss": 0.0042, "step": 65112 }, { "epoch": 60.74, "learning_rate": 4.2407182835820894e-05, "loss": 0.0, "step": 65116 }, { "epoch": 60.75, "learning_rate": 4.240671641791045e-05, "loss": 0.0, "step": 65120 }, { "epoch": 60.75, "learning_rate": 4.2406250000000004e-05, "loss": 0.0, "step": 65124 }, { "epoch": 60.75, "learning_rate": 4.240578358208955e-05, "loss": 0.0, "step": 65128 }, { "epoch": 60.76, "learning_rate": 4.240531716417911e-05, "loss": 0.0, "step": 65132 }, { "epoch": 60.76, "learning_rate": 4.240485074626866e-05, "loss": 0.0, "step": 65136 }, { "epoch": 60.76, "learning_rate": 4.240438432835821e-05, "loss": 0.0, "step": 65140 }, { "epoch": 60.77, "learning_rate": 4.2403917910447765e-05, "loss": 0.0001, "step": 65144 }, { "epoch": 60.77, "learning_rate": 4.240345149253731e-05, "loss": 0.0, "step": 65148 }, { "epoch": 60.78, "learning_rate": 4.240298507462687e-05, "loss": 0.0, "step": 65152 }, { "epoch": 60.78, "learning_rate": 4.240251865671642e-05, "loss": 0.0, "step": 65156 }, { "epoch": 60.78, "learning_rate": 4.240205223880597e-05, "loss": 0.0001, "step": 65160 }, { "epoch": 60.79, "learning_rate": 4.2401585820895526e-05, "loss": 0.0001, "step": 65164 }, { "epoch": 60.79, "learning_rate": 4.240111940298508e-05, "loss": 0.0, "step": 65168 }, { "epoch": 60.79, "learning_rate": 4.240065298507463e-05, "loss": 0.0001, "step": 65172 }, { "epoch": 60.8, "learning_rate": 4.240018656716418e-05, "loss": 0.0001, "step": 65176 }, { "epoch": 60.8, "learning_rate": 4.239972014925373e-05, "loss": 0.0, "step": 65180 }, { "epoch": 60.81, "learning_rate": 4.239925373134329e-05, "loss": 0.0001, "step": 65184 }, { "epoch": 60.81, "learning_rate": 4.2398787313432835e-05, "loss": 0.0, "step": 65188 }, { "epoch": 60.81, "learning_rate": 4.239832089552239e-05, "loss": 0.0, "step": 65192 }, { "epoch": 60.82, "learning_rate": 4.2397854477611945e-05, "loss": 0.0, "step": 65196 }, { "epoch": 60.82, "learning_rate": 4.239738805970149e-05, "loss": 0.0001, "step": 65200 }, { "epoch": 60.82, "learning_rate": 4.239692164179105e-05, "loss": 0.0, "step": 65204 }, { "epoch": 60.83, "learning_rate": 4.2396455223880596e-05, "loss": 0.0001, "step": 65208 }, { "epoch": 60.83, "learning_rate": 4.239598880597015e-05, "loss": 0.0, "step": 65212 }, { "epoch": 60.84, "learning_rate": 4.2395522388059706e-05, "loss": 0.0, "step": 65216 }, { "epoch": 60.84, "learning_rate": 4.2395055970149254e-05, "loss": 0.0, "step": 65220 }, { "epoch": 60.84, "learning_rate": 4.239458955223881e-05, "loss": 0.0098, "step": 65224 }, { "epoch": 60.85, "learning_rate": 4.2394123134328364e-05, "loss": 0.0, "step": 65228 }, { "epoch": 60.85, "learning_rate": 4.239365671641791e-05, "loss": 0.0, "step": 65232 }, { "epoch": 60.85, "learning_rate": 4.239319029850746e-05, "loss": 0.0, "step": 65236 }, { "epoch": 60.86, "learning_rate": 4.2392723880597015e-05, "loss": 0.0, "step": 65240 }, { "epoch": 60.86, "learning_rate": 4.239225746268657e-05, "loss": 0.0, "step": 65244 }, { "epoch": 60.87, "learning_rate": 4.239179104477612e-05, "loss": 0.0, "step": 65248 }, { "epoch": 60.87, "learning_rate": 4.239132462686567e-05, "loss": 0.0, "step": 65252 }, { "epoch": 60.87, "learning_rate": 4.239085820895523e-05, "loss": 0.0, "step": 65256 }, { "epoch": 60.88, "learning_rate": 4.2390391791044776e-05, "loss": 0.0, "step": 65260 }, { "epoch": 60.88, "learning_rate": 4.238992537313433e-05, "loss": 0.0, "step": 65264 }, { "epoch": 60.88, "learning_rate": 4.238945895522388e-05, "loss": 0.0028, "step": 65268 }, { "epoch": 60.89, "learning_rate": 4.2388992537313434e-05, "loss": 0.0, "step": 65272 }, { "epoch": 60.89, "learning_rate": 4.238852611940299e-05, "loss": 0.0002, "step": 65276 }, { "epoch": 60.9, "learning_rate": 4.238805970149254e-05, "loss": 0.0, "step": 65280 }, { "epoch": 60.9, "learning_rate": 4.238759328358209e-05, "loss": 0.0, "step": 65284 }, { "epoch": 60.9, "learning_rate": 4.238712686567165e-05, "loss": 0.0, "step": 65288 }, { "epoch": 60.91, "learning_rate": 4.2386660447761195e-05, "loss": 0.0, "step": 65292 }, { "epoch": 60.91, "learning_rate": 4.238619402985075e-05, "loss": 0.0, "step": 65296 }, { "epoch": 60.91, "learning_rate": 4.23857276119403e-05, "loss": 0.0, "step": 65300 }, { "epoch": 60.92, "learning_rate": 4.238526119402985e-05, "loss": 0.0011, "step": 65304 }, { "epoch": 60.92, "learning_rate": 4.238479477611941e-05, "loss": 0.0003, "step": 65308 }, { "epoch": 60.93, "learning_rate": 4.2384328358208956e-05, "loss": 0.001, "step": 65312 }, { "epoch": 60.93, "learning_rate": 4.238386194029851e-05, "loss": 0.0, "step": 65316 }, { "epoch": 60.93, "learning_rate": 4.2383395522388066e-05, "loss": 0.0, "step": 65320 }, { "epoch": 60.94, "learning_rate": 4.2382929104477614e-05, "loss": 0.0, "step": 65324 }, { "epoch": 60.94, "learning_rate": 4.238246268656716e-05, "loss": 0.0, "step": 65328 }, { "epoch": 60.94, "learning_rate": 4.2381996268656724e-05, "loss": 0.0, "step": 65332 }, { "epoch": 60.95, "learning_rate": 4.238152985074627e-05, "loss": 0.0, "step": 65336 }, { "epoch": 60.95, "learning_rate": 4.238106343283582e-05, "loss": 0.0, "step": 65340 }, { "epoch": 60.96, "learning_rate": 4.2380597014925375e-05, "loss": 0.0, "step": 65344 }, { "epoch": 60.96, "learning_rate": 4.238013059701493e-05, "loss": 0.0, "step": 65348 }, { "epoch": 60.96, "learning_rate": 4.237966417910448e-05, "loss": 0.0001, "step": 65352 }, { "epoch": 60.97, "learning_rate": 4.237919776119403e-05, "loss": 0.0019, "step": 65356 }, { "epoch": 60.97, "learning_rate": 4.237873134328358e-05, "loss": 0.0001, "step": 65360 }, { "epoch": 60.97, "learning_rate": 4.2378264925373136e-05, "loss": 0.0, "step": 65364 }, { "epoch": 60.98, "learning_rate": 4.237779850746269e-05, "loss": 0.0, "step": 65368 }, { "epoch": 60.98, "learning_rate": 4.237733208955224e-05, "loss": 0.0005, "step": 65372 }, { "epoch": 60.98, "learning_rate": 4.2376865671641794e-05, "loss": 0.0042, "step": 65376 }, { "epoch": 60.99, "learning_rate": 4.237639925373135e-05, "loss": 0.0, "step": 65380 }, { "epoch": 60.99, "learning_rate": 4.23759328358209e-05, "loss": 0.0, "step": 65384 }, { "epoch": 61.0, "learning_rate": 4.2375466417910445e-05, "loss": 0.0, "step": 65388 }, { "epoch": 61.0, "learning_rate": 4.237500000000001e-05, "loss": 0.0, "step": 65392 }, { "epoch": 61.0, "learning_rate": 4.2374533582089555e-05, "loss": 0.0, "step": 65396 }, { "epoch": 61.01, "learning_rate": 4.23740671641791e-05, "loss": 0.0, "step": 65400 }, { "epoch": 61.01, "learning_rate": 4.237360074626866e-05, "loss": 0.0, "step": 65404 }, { "epoch": 61.01, "learning_rate": 4.237313432835821e-05, "loss": 0.0, "step": 65408 }, { "epoch": 61.02, "learning_rate": 4.237266791044776e-05, "loss": 0.0, "step": 65412 }, { "epoch": 61.02, "learning_rate": 4.2372201492537316e-05, "loss": 0.0, "step": 65416 }, { "epoch": 61.03, "learning_rate": 4.2371735074626864e-05, "loss": 0.0, "step": 65420 }, { "epoch": 61.03, "learning_rate": 4.237126865671642e-05, "loss": 0.0126, "step": 65424 }, { "epoch": 61.03, "learning_rate": 4.2370802238805974e-05, "loss": 0.0, "step": 65428 }, { "epoch": 61.04, "learning_rate": 4.237033582089552e-05, "loss": 0.0, "step": 65432 }, { "epoch": 61.04, "learning_rate": 4.236986940298508e-05, "loss": 0.0, "step": 65436 }, { "epoch": 61.04, "learning_rate": 4.236940298507463e-05, "loss": 0.0, "step": 65440 }, { "epoch": 61.05, "learning_rate": 4.236893656716418e-05, "loss": 0.0, "step": 65444 }, { "epoch": 61.05, "learning_rate": 4.2368470149253735e-05, "loss": 0.0, "step": 65448 }, { "epoch": 61.06, "learning_rate": 4.236800373134329e-05, "loss": 0.0, "step": 65452 }, { "epoch": 61.06, "learning_rate": 4.236753731343284e-05, "loss": 0.0, "step": 65456 }, { "epoch": 61.06, "learning_rate": 4.236707089552239e-05, "loss": 0.0, "step": 65460 }, { "epoch": 61.07, "learning_rate": 4.236660447761194e-05, "loss": 0.0015, "step": 65464 }, { "epoch": 61.07, "learning_rate": 4.2366138059701496e-05, "loss": 0.0, "step": 65468 }, { "epoch": 61.07, "learning_rate": 4.236567164179105e-05, "loss": 0.0002, "step": 65472 }, { "epoch": 61.08, "learning_rate": 4.23652052238806e-05, "loss": 0.0, "step": 65476 }, { "epoch": 61.08, "learning_rate": 4.236473880597015e-05, "loss": 0.0, "step": 65480 }, { "epoch": 61.09, "learning_rate": 4.236427238805971e-05, "loss": 0.0001, "step": 65484 }, { "epoch": 61.09, "learning_rate": 4.236380597014926e-05, "loss": 0.0017, "step": 65488 }, { "epoch": 61.09, "learning_rate": 4.2363339552238805e-05, "loss": 0.0001, "step": 65492 }, { "epoch": 61.1, "learning_rate": 4.236287313432836e-05, "loss": 0.0, "step": 65496 }, { "epoch": 61.1, "learning_rate": 4.2362406716417915e-05, "loss": 0.0, "step": 65500 }, { "epoch": 61.1, "eval_exact_match": 0.7475822050290135, "eval_exec": 0.7833655705996132, "eval_loss": 0.49484971165657043, "eval_runtime": 1108.9884, "eval_samples_per_second": 0.932, "step": 65500 }, { "epoch": 61.1, "learning_rate": 4.236194029850746e-05, "loss": 0.0, "step": 65504 }, { "epoch": 61.11, "learning_rate": 4.236147388059702e-05, "loss": 0.0014, "step": 65508 }, { "epoch": 61.11, "learning_rate": 4.236100746268657e-05, "loss": 0.0011, "step": 65512 }, { "epoch": 61.12, "learning_rate": 4.236054104477612e-05, "loss": 0.0, "step": 65516 }, { "epoch": 61.12, "learning_rate": 4.2360074626865676e-05, "loss": 0.0, "step": 65520 }, { "epoch": 61.12, "learning_rate": 4.2359608208955224e-05, "loss": 0.0001, "step": 65524 }, { "epoch": 61.13, "learning_rate": 4.235914179104478e-05, "loss": 0.0001, "step": 65528 }, { "epoch": 61.13, "learning_rate": 4.2358675373134334e-05, "loss": 0.0, "step": 65532 }, { "epoch": 61.13, "learning_rate": 4.235820895522388e-05, "loss": 0.0, "step": 65536 }, { "epoch": 61.14, "learning_rate": 4.235774253731343e-05, "loss": 0.0, "step": 65540 }, { "epoch": 61.14, "learning_rate": 4.235727611940299e-05, "loss": 0.0001, "step": 65544 }, { "epoch": 61.15, "learning_rate": 4.235680970149254e-05, "loss": 0.0, "step": 65548 }, { "epoch": 61.15, "learning_rate": 4.235634328358209e-05, "loss": 0.0, "step": 65552 }, { "epoch": 61.15, "learning_rate": 4.235587686567164e-05, "loss": 0.0, "step": 65556 }, { "epoch": 61.16, "learning_rate": 4.23554104477612e-05, "loss": 0.0, "step": 65560 }, { "epoch": 61.16, "learning_rate": 4.2354944029850746e-05, "loss": 0.0001, "step": 65564 }, { "epoch": 61.16, "learning_rate": 4.23544776119403e-05, "loss": 0.0026, "step": 65568 }, { "epoch": 61.17, "learning_rate": 4.2354011194029856e-05, "loss": 0.0001, "step": 65572 }, { "epoch": 61.17, "learning_rate": 4.2353544776119404e-05, "loss": 0.0001, "step": 65576 }, { "epoch": 61.18, "learning_rate": 4.235307835820896e-05, "loss": 0.0, "step": 65580 }, { "epoch": 61.18, "learning_rate": 4.235261194029851e-05, "loss": 0.0, "step": 65584 }, { "epoch": 61.18, "learning_rate": 4.235214552238806e-05, "loss": 0.0053, "step": 65588 }, { "epoch": 61.19, "learning_rate": 4.235167910447762e-05, "loss": 0.0003, "step": 65592 }, { "epoch": 61.19, "learning_rate": 4.2351212686567165e-05, "loss": 0.0006, "step": 65596 }, { "epoch": 61.19, "learning_rate": 4.235074626865671e-05, "loss": 0.0012, "step": 65600 }, { "epoch": 61.2, "learning_rate": 4.2350279850746275e-05, "loss": 0.0, "step": 65604 }, { "epoch": 61.2, "learning_rate": 4.234981343283582e-05, "loss": 0.0, "step": 65608 }, { "epoch": 61.21, "learning_rate": 4.234934701492538e-05, "loss": 0.0, "step": 65612 }, { "epoch": 61.21, "learning_rate": 4.2348880597014926e-05, "loss": 0.0, "step": 65616 }, { "epoch": 61.21, "learning_rate": 4.234841417910448e-05, "loss": 0.0004, "step": 65620 }, { "epoch": 61.22, "learning_rate": 4.2347947761194036e-05, "loss": 0.0, "step": 65624 }, { "epoch": 61.22, "learning_rate": 4.2347481343283584e-05, "loss": 0.0001, "step": 65628 }, { "epoch": 61.22, "learning_rate": 4.234701492537313e-05, "loss": 0.0, "step": 65632 }, { "epoch": 61.23, "learning_rate": 4.2346548507462694e-05, "loss": 0.0, "step": 65636 }, { "epoch": 61.23, "learning_rate": 4.234608208955224e-05, "loss": 0.0052, "step": 65640 }, { "epoch": 61.24, "learning_rate": 4.234561567164179e-05, "loss": 0.0, "step": 65644 }, { "epoch": 61.24, "learning_rate": 4.2345149253731345e-05, "loss": 0.0, "step": 65648 }, { "epoch": 61.24, "learning_rate": 4.23446828358209e-05, "loss": 0.0001, "step": 65652 }, { "epoch": 61.25, "learning_rate": 4.234421641791045e-05, "loss": 0.0001, "step": 65656 }, { "epoch": 61.25, "learning_rate": 4.234375e-05, "loss": 0.0, "step": 65660 }, { "epoch": 61.25, "learning_rate": 4.234328358208956e-05, "loss": 0.0, "step": 65664 }, { "epoch": 61.26, "learning_rate": 4.2342817164179106e-05, "loss": 0.0, "step": 65668 }, { "epoch": 61.26, "learning_rate": 4.234235074626866e-05, "loss": 0.0, "step": 65672 }, { "epoch": 61.26, "learning_rate": 4.234188432835821e-05, "loss": 0.0, "step": 65676 }, { "epoch": 61.27, "learning_rate": 4.2341417910447764e-05, "loss": 0.0, "step": 65680 }, { "epoch": 61.27, "learning_rate": 4.234095149253732e-05, "loss": 0.0004, "step": 65684 }, { "epoch": 61.28, "learning_rate": 4.234048507462687e-05, "loss": 0.0002, "step": 65688 }, { "epoch": 61.28, "learning_rate": 4.2340018656716415e-05, "loss": 0.0, "step": 65692 }, { "epoch": 61.28, "learning_rate": 4.2339552238805976e-05, "loss": 0.0003, "step": 65696 }, { "epoch": 61.29, "learning_rate": 4.2339085820895525e-05, "loss": 0.0021, "step": 65700 }, { "epoch": 61.29, "learning_rate": 4.233861940298507e-05, "loss": 0.0, "step": 65704 }, { "epoch": 61.29, "learning_rate": 4.233815298507463e-05, "loss": 0.0009, "step": 65708 }, { "epoch": 61.3, "learning_rate": 4.233768656716418e-05, "loss": 0.0008, "step": 65712 }, { "epoch": 61.3, "learning_rate": 4.233722014925373e-05, "loss": 0.0, "step": 65716 }, { "epoch": 61.31, "learning_rate": 4.2336753731343286e-05, "loss": 0.0, "step": 65720 }, { "epoch": 61.31, "learning_rate": 4.233628731343284e-05, "loss": 0.0, "step": 65724 }, { "epoch": 61.31, "learning_rate": 4.233582089552239e-05, "loss": 0.0, "step": 65728 }, { "epoch": 61.32, "learning_rate": 4.2335354477611944e-05, "loss": 0.0003, "step": 65732 }, { "epoch": 61.32, "learning_rate": 4.233488805970149e-05, "loss": 0.0015, "step": 65736 }, { "epoch": 61.32, "learning_rate": 4.2334421641791047e-05, "loss": 0.0, "step": 65740 }, { "epoch": 61.33, "learning_rate": 4.23339552238806e-05, "loss": 0.002, "step": 65744 }, { "epoch": 61.33, "learning_rate": 4.233348880597015e-05, "loss": 0.0, "step": 65748 }, { "epoch": 61.34, "learning_rate": 4.23330223880597e-05, "loss": 0.0051, "step": 65752 }, { "epoch": 61.34, "learning_rate": 4.233255597014926e-05, "loss": 0.0007, "step": 65756 }, { "epoch": 61.34, "learning_rate": 4.233208955223881e-05, "loss": 0.0, "step": 65760 }, { "epoch": 61.35, "learning_rate": 4.2331623134328356e-05, "loss": 0.0, "step": 65764 }, { "epoch": 61.35, "learning_rate": 4.233115671641791e-05, "loss": 0.0, "step": 65768 }, { "epoch": 61.35, "learning_rate": 4.2330690298507466e-05, "loss": 0.0, "step": 65772 }, { "epoch": 61.36, "learning_rate": 4.233022388059702e-05, "loss": 0.0, "step": 65776 }, { "epoch": 61.36, "learning_rate": 4.232975746268657e-05, "loss": 0.0, "step": 65780 }, { "epoch": 61.37, "learning_rate": 4.2329291044776123e-05, "loss": 0.0002, "step": 65784 }, { "epoch": 61.37, "learning_rate": 4.232882462686568e-05, "loss": 0.0001, "step": 65788 }, { "epoch": 61.37, "learning_rate": 4.2328358208955227e-05, "loss": 0.0089, "step": 65792 }, { "epoch": 61.38, "learning_rate": 4.2327891791044775e-05, "loss": 0.0, "step": 65796 }, { "epoch": 61.38, "learning_rate": 4.232742537313433e-05, "loss": 0.0, "step": 65800 }, { "epoch": 61.38, "learning_rate": 4.2326958955223884e-05, "loss": 0.0, "step": 65804 }, { "epoch": 61.39, "learning_rate": 4.232649253731343e-05, "loss": 0.0003, "step": 65808 }, { "epoch": 61.39, "learning_rate": 4.232602611940299e-05, "loss": 0.0, "step": 65812 }, { "epoch": 61.4, "learning_rate": 4.232555970149254e-05, "loss": 0.0, "step": 65816 }, { "epoch": 61.4, "learning_rate": 4.232509328358209e-05, "loss": 0.0, "step": 65820 }, { "epoch": 61.4, "learning_rate": 4.2324626865671645e-05, "loss": 0.0001, "step": 65824 }, { "epoch": 61.41, "learning_rate": 4.2324160447761194e-05, "loss": 0.0, "step": 65828 }, { "epoch": 61.41, "learning_rate": 4.232369402985075e-05, "loss": 0.0, "step": 65832 }, { "epoch": 61.41, "learning_rate": 4.23232276119403e-05, "loss": 0.0011, "step": 65836 }, { "epoch": 61.42, "learning_rate": 4.232276119402985e-05, "loss": 0.0, "step": 65840 }, { "epoch": 61.42, "learning_rate": 4.2322294776119406e-05, "loss": 0.0, "step": 65844 }, { "epoch": 61.43, "learning_rate": 4.232182835820896e-05, "loss": 0.0, "step": 65848 }, { "epoch": 61.43, "learning_rate": 4.232136194029851e-05, "loss": 0.0, "step": 65852 }, { "epoch": 61.43, "learning_rate": 4.232089552238806e-05, "loss": 0.0001, "step": 65856 }, { "epoch": 61.44, "learning_rate": 4.232042910447761e-05, "loss": 0.0, "step": 65860 }, { "epoch": 61.44, "learning_rate": 4.231996268656717e-05, "loss": 0.0, "step": 65864 }, { "epoch": 61.44, "learning_rate": 4.2319496268656716e-05, "loss": 0.0, "step": 65868 }, { "epoch": 61.45, "learning_rate": 4.231902985074627e-05, "loss": 0.0, "step": 65872 }, { "epoch": 61.45, "learning_rate": 4.2318563432835825e-05, "loss": 0.0, "step": 65876 }, { "epoch": 61.46, "learning_rate": 4.2318097014925373e-05, "loss": 0.0, "step": 65880 }, { "epoch": 61.46, "learning_rate": 4.231763059701493e-05, "loss": 0.0001, "step": 65884 }, { "epoch": 61.46, "learning_rate": 4.2317164179104477e-05, "loss": 0.0, "step": 65888 }, { "epoch": 61.47, "learning_rate": 4.231669776119403e-05, "loss": 0.0, "step": 65892 }, { "epoch": 61.47, "learning_rate": 4.2316231343283586e-05, "loss": 0.0, "step": 65896 }, { "epoch": 61.47, "learning_rate": 4.2315764925373134e-05, "loss": 0.0, "step": 65900 }, { "epoch": 61.48, "learning_rate": 4.231529850746269e-05, "loss": 0.0, "step": 65904 }, { "epoch": 61.48, "learning_rate": 4.2314832089552244e-05, "loss": 0.0, "step": 65908 }, { "epoch": 61.49, "learning_rate": 4.231436567164179e-05, "loss": 0.0, "step": 65912 }, { "epoch": 61.49, "learning_rate": 4.231389925373134e-05, "loss": 0.0, "step": 65916 }, { "epoch": 61.49, "learning_rate": 4.2313432835820895e-05, "loss": 0.0, "step": 65920 }, { "epoch": 61.5, "learning_rate": 4.231296641791045e-05, "loss": 0.0, "step": 65924 }, { "epoch": 61.5, "learning_rate": 4.23125e-05, "loss": 0.0, "step": 65928 }, { "epoch": 61.5, "learning_rate": 4.2312033582089553e-05, "loss": 0.0, "step": 65932 }, { "epoch": 61.51, "learning_rate": 4.231156716417911e-05, "loss": 0.0089, "step": 65936 }, { "epoch": 61.51, "learning_rate": 4.231110074626866e-05, "loss": 0.0, "step": 65940 }, { "epoch": 61.51, "learning_rate": 4.231063432835821e-05, "loss": 0.0, "step": 65944 }, { "epoch": 61.52, "learning_rate": 4.231016791044776e-05, "loss": 0.0005, "step": 65948 }, { "epoch": 61.52, "learning_rate": 4.230970149253732e-05, "loss": 0.0, "step": 65952 }, { "epoch": 61.53, "learning_rate": 4.230923507462687e-05, "loss": 0.0, "step": 65956 }, { "epoch": 61.53, "learning_rate": 4.230876865671642e-05, "loss": 0.0, "step": 65960 }, { "epoch": 61.53, "learning_rate": 4.230830223880597e-05, "loss": 0.0, "step": 65964 }, { "epoch": 61.54, "learning_rate": 4.230783582089553e-05, "loss": 0.0, "step": 65968 }, { "epoch": 61.54, "learning_rate": 4.2307369402985075e-05, "loss": 0.0, "step": 65972 }, { "epoch": 61.54, "learning_rate": 4.230690298507463e-05, "loss": 0.0, "step": 65976 }, { "epoch": 61.55, "learning_rate": 4.230643656716418e-05, "loss": 0.0, "step": 65980 }, { "epoch": 61.55, "learning_rate": 4.230597014925373e-05, "loss": 0.001, "step": 65984 }, { "epoch": 61.56, "learning_rate": 4.230550373134329e-05, "loss": 0.0, "step": 65988 }, { "epoch": 61.56, "learning_rate": 4.2305037313432836e-05, "loss": 0.0, "step": 65992 }, { "epoch": 61.56, "learning_rate": 4.230457089552239e-05, "loss": 0.0001, "step": 65996 }, { "epoch": 61.57, "learning_rate": 4.2304104477611946e-05, "loss": 0.0, "step": 66000 }, { "epoch": 61.57, "eval_exact_match": 0.7446808510638298, "eval_exec": 0.7775628626692457, "eval_loss": 0.4943001866340637, "eval_runtime": 1085.5598, "eval_samples_per_second": 0.953, "step": 66000 }, { "epoch": 61.57, "learning_rate": 4.2303638059701494e-05, "loss": 0.0, "step": 66004 }, { "epoch": 61.57, "learning_rate": 4.230317164179104e-05, "loss": 0.0, "step": 66008 }, { "epoch": 61.58, "learning_rate": 4.2302705223880604e-05, "loss": 0.0, "step": 66012 }, { "epoch": 61.58, "learning_rate": 4.230223880597015e-05, "loss": 0.0, "step": 66016 }, { "epoch": 61.59, "learning_rate": 4.23017723880597e-05, "loss": 0.002, "step": 66020 }, { "epoch": 61.59, "learning_rate": 4.2301305970149255e-05, "loss": 0.0001, "step": 66024 }, { "epoch": 61.59, "learning_rate": 4.230083955223881e-05, "loss": 0.0, "step": 66028 }, { "epoch": 61.6, "learning_rate": 4.230037313432836e-05, "loss": 0.0, "step": 66032 }, { "epoch": 61.6, "learning_rate": 4.229990671641791e-05, "loss": 0.0005, "step": 66036 }, { "epoch": 61.6, "learning_rate": 4.229944029850746e-05, "loss": 0.0, "step": 66040 }, { "epoch": 61.61, "learning_rate": 4.2298973880597016e-05, "loss": 0.0, "step": 66044 }, { "epoch": 61.61, "learning_rate": 4.229850746268657e-05, "loss": 0.0, "step": 66048 }, { "epoch": 61.62, "learning_rate": 4.229804104477612e-05, "loss": 0.0, "step": 66052 }, { "epoch": 61.62, "learning_rate": 4.2297574626865674e-05, "loss": 0.0, "step": 66056 }, { "epoch": 61.62, "learning_rate": 4.229710820895523e-05, "loss": 0.0, "step": 66060 }, { "epoch": 61.63, "learning_rate": 4.229664179104478e-05, "loss": 0.0002, "step": 66064 }, { "epoch": 61.63, "learning_rate": 4.2296175373134325e-05, "loss": 0.0, "step": 66068 }, { "epoch": 61.63, "learning_rate": 4.229570895522389e-05, "loss": 0.0, "step": 66072 }, { "epoch": 61.64, "learning_rate": 4.2295242537313435e-05, "loss": 0.0, "step": 66076 }, { "epoch": 61.64, "learning_rate": 4.229477611940298e-05, "loss": 0.0, "step": 66080 }, { "epoch": 61.65, "learning_rate": 4.229430970149254e-05, "loss": 0.0, "step": 66084 }, { "epoch": 61.65, "learning_rate": 4.229384328358209e-05, "loss": 0.0, "step": 66088 }, { "epoch": 61.65, "learning_rate": 4.229337686567164e-05, "loss": 0.0, "step": 66092 }, { "epoch": 61.66, "learning_rate": 4.2292910447761196e-05, "loss": 0.0, "step": 66096 }, { "epoch": 61.66, "learning_rate": 4.2292444029850744e-05, "loss": 0.0, "step": 66100 }, { "epoch": 61.66, "learning_rate": 4.2291977611940306e-05, "loss": 0.005, "step": 66104 }, { "epoch": 61.67, "learning_rate": 4.2291511194029854e-05, "loss": 0.0, "step": 66108 }, { "epoch": 61.67, "learning_rate": 4.22910447761194e-05, "loss": 0.0, "step": 66112 }, { "epoch": 61.68, "learning_rate": 4.229057835820896e-05, "loss": 0.0, "step": 66116 }, { "epoch": 61.68, "learning_rate": 4.229011194029851e-05, "loss": 0.0, "step": 66120 }, { "epoch": 61.68, "learning_rate": 4.228964552238806e-05, "loss": 0.0, "step": 66124 }, { "epoch": 61.69, "learning_rate": 4.2289179104477615e-05, "loss": 0.0, "step": 66128 }, { "epoch": 61.69, "learning_rate": 4.228871268656717e-05, "loss": 0.0, "step": 66132 }, { "epoch": 61.69, "learning_rate": 4.228824626865672e-05, "loss": 0.0309, "step": 66136 }, { "epoch": 61.7, "learning_rate": 4.228777985074627e-05, "loss": 0.0, "step": 66140 }, { "epoch": 61.7, "learning_rate": 4.228731343283582e-05, "loss": 0.0, "step": 66144 }, { "epoch": 61.71, "learning_rate": 4.2286847014925376e-05, "loss": 0.0, "step": 66148 }, { "epoch": 61.71, "learning_rate": 4.228638059701493e-05, "loss": 0.0, "step": 66152 }, { "epoch": 61.71, "learning_rate": 4.228591417910448e-05, "loss": 0.0, "step": 66156 }, { "epoch": 61.72, "learning_rate": 4.228544776119403e-05, "loss": 0.0, "step": 66160 }, { "epoch": 61.72, "learning_rate": 4.228498134328359e-05, "loss": 0.0, "step": 66164 }, { "epoch": 61.72, "learning_rate": 4.228451492537314e-05, "loss": 0.0, "step": 66168 }, { "epoch": 61.73, "learning_rate": 4.2284048507462685e-05, "loss": 0.0, "step": 66172 }, { "epoch": 61.73, "learning_rate": 4.228358208955224e-05, "loss": 0.0, "step": 66176 }, { "epoch": 61.73, "learning_rate": 4.2283115671641795e-05, "loss": 0.0, "step": 66180 }, { "epoch": 61.74, "learning_rate": 4.228264925373134e-05, "loss": 0.0, "step": 66184 }, { "epoch": 61.74, "learning_rate": 4.22821828358209e-05, "loss": 0.0, "step": 66188 }, { "epoch": 61.75, "learning_rate": 4.228171641791045e-05, "loss": 0.0, "step": 66192 }, { "epoch": 61.75, "learning_rate": 4.228125e-05, "loss": 0.0001, "step": 66196 }, { "epoch": 61.75, "learning_rate": 4.2280783582089556e-05, "loss": 0.0, "step": 66200 }, { "epoch": 61.76, "learning_rate": 4.2280317164179104e-05, "loss": 0.0, "step": 66204 }, { "epoch": 61.76, "learning_rate": 4.227985074626866e-05, "loss": 0.0, "step": 66208 }, { "epoch": 61.76, "learning_rate": 4.2279384328358214e-05, "loss": 0.0, "step": 66212 }, { "epoch": 61.77, "learning_rate": 4.227891791044776e-05, "loss": 0.0, "step": 66216 }, { "epoch": 61.77, "learning_rate": 4.227845149253731e-05, "loss": 0.0, "step": 66220 }, { "epoch": 61.78, "learning_rate": 4.227798507462687e-05, "loss": 0.0, "step": 66224 }, { "epoch": 61.78, "learning_rate": 4.227751865671642e-05, "loss": 0.0, "step": 66228 }, { "epoch": 61.78, "learning_rate": 4.227705223880597e-05, "loss": 0.0, "step": 66232 }, { "epoch": 61.79, "learning_rate": 4.227658582089552e-05, "loss": 0.0, "step": 66236 }, { "epoch": 61.79, "learning_rate": 4.227611940298508e-05, "loss": 0.0, "step": 66240 }, { "epoch": 61.79, "learning_rate": 4.2275652985074626e-05, "loss": 0.0, "step": 66244 }, { "epoch": 61.8, "learning_rate": 4.227518656716418e-05, "loss": 0.0011, "step": 66248 }, { "epoch": 61.8, "learning_rate": 4.2274720149253736e-05, "loss": 0.0, "step": 66252 }, { "epoch": 61.81, "learning_rate": 4.2274253731343284e-05, "loss": 0.0, "step": 66256 }, { "epoch": 61.81, "learning_rate": 4.227378731343284e-05, "loss": 0.0001, "step": 66260 }, { "epoch": 61.81, "learning_rate": 4.227332089552239e-05, "loss": 0.0, "step": 66264 }, { "epoch": 61.82, "learning_rate": 4.227285447761194e-05, "loss": 0.0, "step": 66268 }, { "epoch": 61.82, "learning_rate": 4.22723880597015e-05, "loss": 0.0, "step": 66272 }, { "epoch": 61.82, "learning_rate": 4.2271921641791045e-05, "loss": 0.0007, "step": 66276 }, { "epoch": 61.83, "learning_rate": 4.22714552238806e-05, "loss": 0.0, "step": 66280 }, { "epoch": 61.83, "learning_rate": 4.2270988805970155e-05, "loss": 0.0111, "step": 66284 }, { "epoch": 61.84, "learning_rate": 4.22705223880597e-05, "loss": 0.0006, "step": 66288 }, { "epoch": 61.84, "learning_rate": 4.227005597014926e-05, "loss": 0.0, "step": 66292 }, { "epoch": 61.84, "learning_rate": 4.2269589552238806e-05, "loss": 0.0, "step": 66296 }, { "epoch": 61.85, "learning_rate": 4.226912313432836e-05, "loss": 0.0, "step": 66300 }, { "epoch": 61.85, "learning_rate": 4.2268656716417916e-05, "loss": 0.0, "step": 66304 }, { "epoch": 61.85, "learning_rate": 4.2268190298507464e-05, "loss": 0.0001, "step": 66308 }, { "epoch": 61.86, "learning_rate": 4.226772388059701e-05, "loss": 0.0029, "step": 66312 }, { "epoch": 61.86, "learning_rate": 4.2267257462686574e-05, "loss": 0.0003, "step": 66316 }, { "epoch": 61.87, "learning_rate": 4.226679104477612e-05, "loss": 0.0002, "step": 66320 }, { "epoch": 61.87, "learning_rate": 4.226632462686567e-05, "loss": 0.0002, "step": 66324 }, { "epoch": 61.87, "learning_rate": 4.2265858208955225e-05, "loss": 0.0001, "step": 66328 }, { "epoch": 61.88, "learning_rate": 4.226539179104478e-05, "loss": 0.0, "step": 66332 }, { "epoch": 61.88, "learning_rate": 4.226492537313433e-05, "loss": 0.0, "step": 66336 }, { "epoch": 61.88, "learning_rate": 4.226445895522388e-05, "loss": 0.0, "step": 66340 }, { "epoch": 61.89, "learning_rate": 4.226399253731344e-05, "loss": 0.0, "step": 66344 }, { "epoch": 61.89, "learning_rate": 4.2263526119402986e-05, "loss": 0.0002, "step": 66348 }, { "epoch": 61.9, "learning_rate": 4.226305970149254e-05, "loss": 0.0, "step": 66352 }, { "epoch": 61.9, "learning_rate": 4.226259328358209e-05, "loss": 0.0, "step": 66356 }, { "epoch": 61.9, "learning_rate": 4.2262126865671644e-05, "loss": 0.0009, "step": 66360 }, { "epoch": 61.91, "learning_rate": 4.22616604477612e-05, "loss": 0.0, "step": 66364 }, { "epoch": 61.91, "learning_rate": 4.226119402985075e-05, "loss": 0.0, "step": 66368 }, { "epoch": 61.91, "learning_rate": 4.2260727611940295e-05, "loss": 0.0, "step": 66372 }, { "epoch": 61.92, "learning_rate": 4.226026119402986e-05, "loss": 0.0, "step": 66376 }, { "epoch": 61.92, "learning_rate": 4.2259794776119405e-05, "loss": 0.0, "step": 66380 }, { "epoch": 61.93, "learning_rate": 4.225932835820895e-05, "loss": 0.0012, "step": 66384 }, { "epoch": 61.93, "learning_rate": 4.225886194029851e-05, "loss": 0.0, "step": 66388 }, { "epoch": 61.93, "learning_rate": 4.225839552238806e-05, "loss": 0.0, "step": 66392 }, { "epoch": 61.94, "learning_rate": 4.225792910447761e-05, "loss": 0.0, "step": 66396 }, { "epoch": 61.94, "learning_rate": 4.2257462686567166e-05, "loss": 0.0, "step": 66400 }, { "epoch": 61.94, "learning_rate": 4.225699626865672e-05, "loss": 0.0, "step": 66404 }, { "epoch": 61.95, "learning_rate": 4.225652985074627e-05, "loss": 0.0, "step": 66408 }, { "epoch": 61.95, "learning_rate": 4.2256063432835824e-05, "loss": 0.0005, "step": 66412 }, { "epoch": 61.96, "learning_rate": 4.225559701492537e-05, "loss": 0.0, "step": 66416 }, { "epoch": 61.96, "learning_rate": 4.225513059701493e-05, "loss": 0.0, "step": 66420 }, { "epoch": 61.96, "learning_rate": 4.225466417910448e-05, "loss": 0.0, "step": 66424 }, { "epoch": 61.97, "learning_rate": 4.225419776119403e-05, "loss": 0.0, "step": 66428 }, { "epoch": 61.97, "learning_rate": 4.2253731343283585e-05, "loss": 0.0, "step": 66432 }, { "epoch": 61.97, "learning_rate": 4.225326492537314e-05, "loss": 0.0, "step": 66436 }, { "epoch": 61.98, "learning_rate": 4.225279850746269e-05, "loss": 0.0, "step": 66440 }, { "epoch": 61.98, "learning_rate": 4.225233208955224e-05, "loss": 0.0, "step": 66444 }, { "epoch": 61.98, "learning_rate": 4.225186567164179e-05, "loss": 0.0, "step": 66448 }, { "epoch": 61.99, "learning_rate": 4.2251399253731346e-05, "loss": 0.0004, "step": 66452 }, { "epoch": 61.99, "learning_rate": 4.22509328358209e-05, "loss": 0.0001, "step": 66456 }, { "epoch": 62.0, "learning_rate": 4.225046641791045e-05, "loss": 0.0, "step": 66460 }, { "epoch": 62.0, "learning_rate": 4.2250000000000004e-05, "loss": 0.0, "step": 66464 }, { "epoch": 62.0, "learning_rate": 4.224953358208956e-05, "loss": 0.0, "step": 66468 }, { "epoch": 62.01, "learning_rate": 4.224906716417911e-05, "loss": 0.0, "step": 66472 }, { "epoch": 62.01, "learning_rate": 4.2248600746268655e-05, "loss": 0.0001, "step": 66476 }, { "epoch": 62.01, "learning_rate": 4.224813432835821e-05, "loss": 0.0, "step": 66480 }, { "epoch": 62.02, "learning_rate": 4.2247667910447765e-05, "loss": 0.0019, "step": 66484 }, { "epoch": 62.02, "learning_rate": 4.224720149253731e-05, "loss": 0.0, "step": 66488 }, { "epoch": 62.03, "learning_rate": 4.224673507462687e-05, "loss": 0.0, "step": 66492 }, { "epoch": 62.03, "learning_rate": 4.224626865671642e-05, "loss": 0.0, "step": 66496 }, { "epoch": 62.03, "learning_rate": 4.224580223880597e-05, "loss": 0.0, "step": 66500 }, { "epoch": 62.03, "eval_exact_match": 0.7562862669245648, "eval_exec": 0.7823984526112185, "eval_loss": 0.4995794892311096, "eval_runtime": 1097.5777, "eval_samples_per_second": 0.942, "step": 66500 } ], "max_steps": 428800, "num_train_epochs": 400, "total_flos": 2.2981425097872384e+18, "trial_name": null, "trial_params": null }