diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,100969 @@ +{ + "best_metric": 0.7562862669245648, + "best_model_checkpoint": "/train_db_id/checkpoint-66500", + "epoch": 62.03357817418678, + "global_step": 66500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999988339552239e-05, + "loss": 3.3987, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953358208955e-05, + "loss": 2.8913, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999067164179105e-05, + "loss": 1.6031, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 4.999860074626866e-05, + "loss": 1.5674, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 4.999813432835821e-05, + "loss": 1.075, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 4.999766791044776e-05, + "loss": 1.0344, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 4.999720149253732e-05, + "loss": 0.839, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 4.9996735074626866e-05, + "loss": 0.6872, + "step": 28 + }, + { + "epoch": 0.03, + "learning_rate": 4.999626865671642e-05, + "loss": 0.6713, + "step": 32 + }, + { + "epoch": 0.03, + "learning_rate": 4.999580223880597e-05, + "loss": 0.707, + "step": 36 + }, + { + "epoch": 0.04, + "learning_rate": 4.9995335820895524e-05, + "loss": 0.6456, + "step": 40 + }, + { + "epoch": 0.04, + "learning_rate": 4.999486940298508e-05, + "loss": 0.6796, + "step": 44 + }, + { + "epoch": 0.04, + "learning_rate": 4.999440298507463e-05, + "loss": 0.5273, + "step": 48 + }, + { + "epoch": 0.05, + "learning_rate": 4.999393656716418e-05, + "loss": 0.9018, + "step": 52 + }, + { + "epoch": 0.05, + "learning_rate": 4.999347014925374e-05, + "loss": 0.6358, + "step": 56 + }, + { + "epoch": 0.06, + "learning_rate": 4.9993003731343285e-05, + "loss": 0.462, + "step": 60 + }, + { + "epoch": 0.06, + "learning_rate": 4.9992537313432833e-05, + "loss": 0.5077, + "step": 64 + }, + { + "epoch": 0.06, + "learning_rate": 4.999207089552239e-05, + "loss": 0.5189, + "step": 68 + }, + { + "epoch": 0.07, + "learning_rate": 4.999160447761194e-05, + "loss": 0.5189, + "step": 72 + }, + { + "epoch": 0.07, + "learning_rate": 4.999113805970149e-05, + "loss": 0.4512, + "step": 76 + }, + { + "epoch": 0.07, + "learning_rate": 4.9990671641791046e-05, + "loss": 0.4906, + "step": 80 + }, + { + "epoch": 0.08, + "learning_rate": 4.99902052238806e-05, + "loss": 0.4084, + "step": 84 + }, + { + "epoch": 0.08, + "learning_rate": 4.9989738805970156e-05, + "loss": 0.3692, + "step": 88 + }, + { + "epoch": 0.09, + "learning_rate": 4.9989272388059704e-05, + "loss": 0.459, + "step": 92 + }, + { + "epoch": 0.09, + "learning_rate": 4.998880597014925e-05, + "loss": 0.4347, + "step": 96 + }, + { + "epoch": 0.09, + "learning_rate": 4.9988339552238814e-05, + "loss": 0.39, + "step": 100 + }, + { + "epoch": 0.1, + "learning_rate": 4.998787313432836e-05, + "loss": 0.4953, + "step": 104 + }, + { + "epoch": 0.1, + "learning_rate": 4.998740671641791e-05, + "loss": 0.4506, + "step": 108 + }, + { + "epoch": 0.1, + "learning_rate": 4.9986940298507465e-05, + "loss": 0.407, + "step": 112 + }, + { + "epoch": 0.11, + "learning_rate": 4.998647388059702e-05, + "loss": 0.3425, + "step": 116 + }, + { + "epoch": 0.11, + "learning_rate": 4.998600746268657e-05, + "loss": 0.4349, + "step": 120 + }, + { + "epoch": 0.12, + "learning_rate": 4.998554104477612e-05, + "loss": 0.3413, + "step": 124 + }, + { + "epoch": 0.12, + "learning_rate": 4.998507462686567e-05, + "loss": 0.3923, + "step": 128 + }, + { + "epoch": 0.12, + "learning_rate": 4.9984608208955226e-05, + "loss": 0.4086, + "step": 132 + }, + { + "epoch": 0.13, + "learning_rate": 4.998414179104478e-05, + "loss": 0.3972, + "step": 136 + }, + { + "epoch": 0.13, + "learning_rate": 4.998367537313433e-05, + "loss": 0.3259, + "step": 140 + }, + { + "epoch": 0.13, + "learning_rate": 4.9983208955223884e-05, + "loss": 0.3852, + "step": 144 + }, + { + "epoch": 0.14, + "learning_rate": 4.998274253731344e-05, + "loss": 0.3536, + "step": 148 + }, + { + "epoch": 0.14, + "learning_rate": 4.998227611940299e-05, + "loss": 0.3187, + "step": 152 + }, + { + "epoch": 0.15, + "learning_rate": 4.9981809701492535e-05, + "loss": 0.2841, + "step": 156 + }, + { + "epoch": 0.15, + "learning_rate": 4.99813432835821e-05, + "loss": 0.4, + "step": 160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9980876865671645e-05, + "loss": 0.319, + "step": 164 + }, + { + "epoch": 0.16, + "learning_rate": 4.998041044776119e-05, + "loss": 0.285, + "step": 168 + }, + { + "epoch": 0.16, + "learning_rate": 4.997994402985075e-05, + "loss": 0.3721, + "step": 172 + }, + { + "epoch": 0.16, + "learning_rate": 4.99794776119403e-05, + "loss": 0.2752, + "step": 176 + }, + { + "epoch": 0.17, + "learning_rate": 4.997901119402985e-05, + "loss": 0.2895, + "step": 180 + }, + { + "epoch": 0.17, + "learning_rate": 4.9978544776119406e-05, + "loss": 0.2652, + "step": 184 + }, + { + "epoch": 0.18, + "learning_rate": 4.9978078358208954e-05, + "loss": 0.4065, + "step": 188 + }, + { + "epoch": 0.18, + "learning_rate": 4.997761194029851e-05, + "loss": 0.2846, + "step": 192 + }, + { + "epoch": 0.18, + "learning_rate": 4.9977145522388064e-05, + "loss": 0.3481, + "step": 196 + }, + { + "epoch": 0.19, + "learning_rate": 4.997667910447761e-05, + "loss": 0.33, + "step": 200 + }, + { + "epoch": 0.19, + "learning_rate": 4.997621268656717e-05, + "loss": 0.3122, + "step": 204 + }, + { + "epoch": 0.19, + "learning_rate": 4.997574626865672e-05, + "loss": 0.2949, + "step": 208 + }, + { + "epoch": 0.2, + "learning_rate": 4.997527985074627e-05, + "loss": 0.2282, + "step": 212 + }, + { + "epoch": 0.2, + "learning_rate": 4.997481343283582e-05, + "loss": 0.2633, + "step": 216 + }, + { + "epoch": 0.21, + "learning_rate": 4.997434701492538e-05, + "loss": 0.2847, + "step": 220 + }, + { + "epoch": 0.21, + "learning_rate": 4.997388059701493e-05, + "loss": 0.252, + "step": 224 + }, + { + "epoch": 0.21, + "learning_rate": 4.9973414179104476e-05, + "loss": 0.2747, + "step": 228 + }, + { + "epoch": 0.22, + "learning_rate": 4.997294776119403e-05, + "loss": 0.2968, + "step": 232 + }, + { + "epoch": 0.22, + "learning_rate": 4.9972481343283586e-05, + "loss": 0.3299, + "step": 236 + }, + { + "epoch": 0.22, + "learning_rate": 4.9972014925373134e-05, + "loss": 0.3038, + "step": 240 + }, + { + "epoch": 0.23, + "learning_rate": 4.997154850746269e-05, + "loss": 0.214, + "step": 244 + }, + { + "epoch": 0.23, + "learning_rate": 4.997108208955224e-05, + "loss": 0.2138, + "step": 248 + }, + { + "epoch": 0.24, + "learning_rate": 4.99706156716418e-05, + "loss": 0.2308, + "step": 252 + }, + { + "epoch": 0.24, + "learning_rate": 4.997014925373135e-05, + "loss": 0.3071, + "step": 256 + }, + { + "epoch": 0.24, + "learning_rate": 4.9969682835820895e-05, + "loss": 0.3456, + "step": 260 + }, + { + "epoch": 0.25, + "learning_rate": 4.996921641791045e-05, + "loss": 0.2254, + "step": 264 + }, + { + "epoch": 0.25, + "learning_rate": 4.9968750000000005e-05, + "loss": 0.2141, + "step": 268 + }, + { + "epoch": 0.25, + "learning_rate": 4.996828358208955e-05, + "loss": 0.1988, + "step": 272 + }, + { + "epoch": 0.26, + "learning_rate": 4.996781716417911e-05, + "loss": 0.2797, + "step": 276 + }, + { + "epoch": 0.26, + "learning_rate": 4.996735074626866e-05, + "loss": 0.265, + "step": 280 + }, + { + "epoch": 0.26, + "learning_rate": 4.996688432835821e-05, + "loss": 0.3751, + "step": 284 + }, + { + "epoch": 0.27, + "learning_rate": 4.9966417910447766e-05, + "loss": 0.2993, + "step": 288 + }, + { + "epoch": 0.27, + "learning_rate": 4.9965951492537314e-05, + "loss": 0.2626, + "step": 292 + }, + { + "epoch": 0.28, + "learning_rate": 4.996548507462687e-05, + "loss": 0.2083, + "step": 296 + }, + { + "epoch": 0.28, + "learning_rate": 4.9965018656716424e-05, + "loss": 0.1907, + "step": 300 + }, + { + "epoch": 0.28, + "learning_rate": 4.996455223880597e-05, + "loss": 0.2737, + "step": 304 + }, + { + "epoch": 0.29, + "learning_rate": 4.996408582089552e-05, + "loss": 0.3005, + "step": 308 + }, + { + "epoch": 0.29, + "learning_rate": 4.996361940298508e-05, + "loss": 0.2399, + "step": 312 + }, + { + "epoch": 0.29, + "learning_rate": 4.996315298507463e-05, + "loss": 0.2554, + "step": 316 + }, + { + "epoch": 0.3, + "learning_rate": 4.996268656716418e-05, + "loss": 0.2626, + "step": 320 + }, + { + "epoch": 0.3, + "learning_rate": 4.996222014925373e-05, + "loss": 0.2391, + "step": 324 + }, + { + "epoch": 0.31, + "learning_rate": 4.996175373134329e-05, + "loss": 0.2105, + "step": 328 + }, + { + "epoch": 0.31, + "learning_rate": 4.9961287313432836e-05, + "loss": 0.2333, + "step": 332 + }, + { + "epoch": 0.31, + "learning_rate": 4.996082089552239e-05, + "loss": 0.2182, + "step": 336 + }, + { + "epoch": 0.32, + "learning_rate": 4.9960354477611946e-05, + "loss": 0.2743, + "step": 340 + }, + { + "epoch": 0.32, + "learning_rate": 4.9959888059701494e-05, + "loss": 0.1997, + "step": 344 + }, + { + "epoch": 0.32, + "learning_rate": 4.995942164179105e-05, + "loss": 0.3057, + "step": 348 + }, + { + "epoch": 0.33, + "learning_rate": 4.99589552238806e-05, + "loss": 0.1665, + "step": 352 + }, + { + "epoch": 0.33, + "learning_rate": 4.995848880597015e-05, + "loss": 0.2077, + "step": 356 + }, + { + "epoch": 0.34, + "learning_rate": 4.995802238805971e-05, + "loss": 0.229, + "step": 360 + }, + { + "epoch": 0.34, + "learning_rate": 4.9957555970149255e-05, + "loss": 0.1626, + "step": 364 + }, + { + "epoch": 0.34, + "learning_rate": 4.99570895522388e-05, + "loss": 0.2124, + "step": 368 + }, + { + "epoch": 0.35, + "learning_rate": 4.9956623134328365e-05, + "loss": 0.1981, + "step": 372 + }, + { + "epoch": 0.35, + "learning_rate": 4.995615671641791e-05, + "loss": 0.1941, + "step": 376 + }, + { + "epoch": 0.35, + "learning_rate": 4.995569029850746e-05, + "loss": 0.1985, + "step": 380 + }, + { + "epoch": 0.36, + "learning_rate": 4.9955223880597016e-05, + "loss": 0.2346, + "step": 384 + }, + { + "epoch": 0.36, + "learning_rate": 4.995475746268657e-05, + "loss": 0.1867, + "step": 388 + }, + { + "epoch": 0.37, + "learning_rate": 4.995429104477612e-05, + "loss": 0.1955, + "step": 392 + }, + { + "epoch": 0.37, + "learning_rate": 4.9953824626865674e-05, + "loss": 0.2611, + "step": 396 + }, + { + "epoch": 0.37, + "learning_rate": 4.995335820895522e-05, + "loss": 0.2636, + "step": 400 + }, + { + "epoch": 0.38, + "learning_rate": 4.995289179104478e-05, + "loss": 0.2358, + "step": 404 + }, + { + "epoch": 0.38, + "learning_rate": 4.995242537313433e-05, + "loss": 0.2215, + "step": 408 + }, + { + "epoch": 0.38, + "learning_rate": 4.995195895522388e-05, + "loss": 0.1965, + "step": 412 + }, + { + "epoch": 0.39, + "learning_rate": 4.9951492537313435e-05, + "loss": 0.2427, + "step": 416 + }, + { + "epoch": 0.39, + "learning_rate": 4.995102611940299e-05, + "loss": 0.1671, + "step": 420 + }, + { + "epoch": 0.4, + "learning_rate": 4.995055970149254e-05, + "loss": 0.1296, + "step": 424 + }, + { + "epoch": 0.4, + "learning_rate": 4.995009328358209e-05, + "loss": 0.2114, + "step": 428 + }, + { + "epoch": 0.4, + "learning_rate": 4.994962686567165e-05, + "loss": 0.1675, + "step": 432 + }, + { + "epoch": 0.41, + "learning_rate": 4.9949160447761196e-05, + "loss": 0.162, + "step": 436 + }, + { + "epoch": 0.41, + "learning_rate": 4.994869402985075e-05, + "loss": 0.1415, + "step": 440 + }, + { + "epoch": 0.41, + "learning_rate": 4.99482276119403e-05, + "loss": 0.148, + "step": 444 + }, + { + "epoch": 0.42, + "learning_rate": 4.9947761194029854e-05, + "loss": 0.1717, + "step": 448 + }, + { + "epoch": 0.42, + "learning_rate": 4.994729477611941e-05, + "loss": 0.157, + "step": 452 + }, + { + "epoch": 0.43, + "learning_rate": 4.994682835820896e-05, + "loss": 0.226, + "step": 456 + }, + { + "epoch": 0.43, + "learning_rate": 4.9946361940298505e-05, + "loss": 0.2185, + "step": 460 + }, + { + "epoch": 0.43, + "learning_rate": 4.994589552238807e-05, + "loss": 0.2423, + "step": 464 + }, + { + "epoch": 0.44, + "learning_rate": 4.9945429104477615e-05, + "loss": 0.2652, + "step": 468 + }, + { + "epoch": 0.44, + "learning_rate": 4.994496268656716e-05, + "loss": 0.1864, + "step": 472 + }, + { + "epoch": 0.44, + "learning_rate": 4.994449626865672e-05, + "loss": 0.1418, + "step": 476 + }, + { + "epoch": 0.45, + "learning_rate": 4.994402985074627e-05, + "loss": 0.1907, + "step": 480 + }, + { + "epoch": 0.45, + "learning_rate": 4.994356343283582e-05, + "loss": 0.1826, + "step": 484 + }, + { + "epoch": 0.46, + "learning_rate": 4.9943097014925376e-05, + "loss": 0.1699, + "step": 488 + }, + { + "epoch": 0.46, + "learning_rate": 4.994263059701493e-05, + "loss": 0.1956, + "step": 492 + }, + { + "epoch": 0.46, + "learning_rate": 4.994216417910448e-05, + "loss": 0.1386, + "step": 496 + }, + { + "epoch": 0.47, + "learning_rate": 4.9941697761194034e-05, + "loss": 0.1573, + "step": 500 + }, + { + "epoch": 0.47, + "eval_exact_match": 0.4410058027079304, + "eval_exec": 0.534816247582205, + "eval_loss": 0.1741107553243637, + "eval_runtime": 975.177, + "eval_samples_per_second": 1.06, + "step": 500 + }, + { + "epoch": 0.47, + "learning_rate": 4.994123134328358e-05, + "loss": 0.165, + "step": 504 + }, + { + "epoch": 0.47, + "learning_rate": 4.994076492537314e-05, + "loss": 0.1845, + "step": 508 + }, + { + "epoch": 0.48, + "learning_rate": 4.994029850746269e-05, + "loss": 0.1452, + "step": 512 + }, + { + "epoch": 0.48, + "learning_rate": 4.993983208955224e-05, + "loss": 0.1927, + "step": 516 + }, + { + "epoch": 0.49, + "learning_rate": 4.993936567164179e-05, + "loss": 0.1153, + "step": 520 + }, + { + "epoch": 0.49, + "learning_rate": 4.993889925373135e-05, + "loss": 0.1446, + "step": 524 + }, + { + "epoch": 0.49, + "learning_rate": 4.99384328358209e-05, + "loss": 0.1632, + "step": 528 + }, + { + "epoch": 0.5, + "learning_rate": 4.9937966417910446e-05, + "loss": 0.1256, + "step": 532 + }, + { + "epoch": 0.5, + "learning_rate": 4.99375e-05, + "loss": 0.2019, + "step": 536 + }, + { + "epoch": 0.5, + "learning_rate": 4.9937033582089556e-05, + "loss": 0.1862, + "step": 540 + }, + { + "epoch": 0.51, + "learning_rate": 4.9936567164179104e-05, + "loss": 0.1931, + "step": 544 + }, + { + "epoch": 0.51, + "learning_rate": 4.993610074626866e-05, + "loss": 0.16, + "step": 548 + }, + { + "epoch": 0.51, + "learning_rate": 4.9935634328358214e-05, + "loss": 0.2193, + "step": 552 + }, + { + "epoch": 0.52, + "learning_rate": 4.993516791044776e-05, + "loss": 0.1208, + "step": 556 + }, + { + "epoch": 0.52, + "learning_rate": 4.993470149253732e-05, + "loss": 0.1524, + "step": 560 + }, + { + "epoch": 0.53, + "learning_rate": 4.9934235074626865e-05, + "loss": 0.1348, + "step": 564 + }, + { + "epoch": 0.53, + "learning_rate": 4.993376865671642e-05, + "loss": 0.1796, + "step": 568 + }, + { + "epoch": 0.53, + "learning_rate": 4.9933302238805975e-05, + "loss": 0.1142, + "step": 572 + }, + { + "epoch": 0.54, + "learning_rate": 4.993283582089552e-05, + "loss": 0.1417, + "step": 576 + }, + { + "epoch": 0.54, + "learning_rate": 4.993236940298508e-05, + "loss": 0.1696, + "step": 580 + }, + { + "epoch": 0.54, + "learning_rate": 4.993190298507463e-05, + "loss": 0.1412, + "step": 584 + }, + { + "epoch": 0.55, + "learning_rate": 4.993143656716418e-05, + "loss": 0.1188, + "step": 588 + }, + { + "epoch": 0.55, + "learning_rate": 4.9930970149253736e-05, + "loss": 0.1944, + "step": 592 + }, + { + "epoch": 0.56, + "learning_rate": 4.9930503731343284e-05, + "loss": 0.0786, + "step": 596 + }, + { + "epoch": 0.56, + "learning_rate": 4.993003731343284e-05, + "loss": 0.1258, + "step": 600 + }, + { + "epoch": 0.56, + "learning_rate": 4.9929570895522394e-05, + "loss": 0.1359, + "step": 604 + }, + { + "epoch": 0.57, + "learning_rate": 4.992910447761194e-05, + "loss": 0.1297, + "step": 608 + }, + { + "epoch": 0.57, + "learning_rate": 4.99286380597015e-05, + "loss": 0.2223, + "step": 612 + }, + { + "epoch": 0.57, + "learning_rate": 4.992817164179105e-05, + "loss": 0.1363, + "step": 616 + }, + { + "epoch": 0.58, + "learning_rate": 4.99277052238806e-05, + "loss": 0.1362, + "step": 620 + }, + { + "epoch": 0.58, + "learning_rate": 4.992723880597015e-05, + "loss": 0.1977, + "step": 624 + }, + { + "epoch": 0.59, + "learning_rate": 4.99267723880597e-05, + "loss": 0.1176, + "step": 628 + }, + { + "epoch": 0.59, + "learning_rate": 4.992630597014926e-05, + "loss": 0.1254, + "step": 632 + }, + { + "epoch": 0.59, + "learning_rate": 4.9925839552238806e-05, + "loss": 0.1825, + "step": 636 + }, + { + "epoch": 0.6, + "learning_rate": 4.992537313432836e-05, + "loss": 0.1363, + "step": 640 + }, + { + "epoch": 0.6, + "learning_rate": 4.9924906716417916e-05, + "loss": 0.1388, + "step": 644 + }, + { + "epoch": 0.6, + "learning_rate": 4.9924440298507464e-05, + "loss": 0.1409, + "step": 648 + }, + { + "epoch": 0.61, + "learning_rate": 4.992397388059702e-05, + "loss": 0.1481, + "step": 652 + }, + { + "epoch": 0.61, + "learning_rate": 4.992350746268657e-05, + "loss": 0.183, + "step": 656 + }, + { + "epoch": 0.62, + "learning_rate": 4.992304104477612e-05, + "loss": 0.1344, + "step": 660 + }, + { + "epoch": 0.62, + "learning_rate": 4.9922574626865677e-05, + "loss": 0.1149, + "step": 664 + }, + { + "epoch": 0.62, + "learning_rate": 4.9922108208955225e-05, + "loss": 0.1584, + "step": 668 + }, + { + "epoch": 0.63, + "learning_rate": 4.992164179104478e-05, + "loss": 0.1784, + "step": 672 + }, + { + "epoch": 0.63, + "learning_rate": 4.9921175373134335e-05, + "loss": 0.066, + "step": 676 + }, + { + "epoch": 0.63, + "learning_rate": 4.992070895522388e-05, + "loss": 0.1293, + "step": 680 + }, + { + "epoch": 0.64, + "learning_rate": 4.992024253731343e-05, + "loss": 0.1601, + "step": 684 + }, + { + "epoch": 0.64, + "learning_rate": 4.9919776119402986e-05, + "loss": 0.1549, + "step": 688 + }, + { + "epoch": 0.65, + "learning_rate": 4.991930970149254e-05, + "loss": 0.102, + "step": 692 + }, + { + "epoch": 0.65, + "learning_rate": 4.991884328358209e-05, + "loss": 0.1538, + "step": 696 + }, + { + "epoch": 0.65, + "learning_rate": 4.9918376865671644e-05, + "loss": 0.1375, + "step": 700 + }, + { + "epoch": 0.66, + "learning_rate": 4.99179104477612e-05, + "loss": 0.1162, + "step": 704 + }, + { + "epoch": 0.66, + "learning_rate": 4.991744402985075e-05, + "loss": 0.1154, + "step": 708 + }, + { + "epoch": 0.66, + "learning_rate": 4.99169776119403e-05, + "loss": 0.2463, + "step": 712 + }, + { + "epoch": 0.67, + "learning_rate": 4.991651119402985e-05, + "loss": 0.1519, + "step": 716 + }, + { + "epoch": 0.67, + "learning_rate": 4.9916044776119405e-05, + "loss": 0.0805, + "step": 720 + }, + { + "epoch": 0.68, + "learning_rate": 4.991557835820896e-05, + "loss": 0.1146, + "step": 724 + }, + { + "epoch": 0.68, + "learning_rate": 4.991511194029851e-05, + "loss": 0.1365, + "step": 728 + }, + { + "epoch": 0.68, + "learning_rate": 4.991464552238806e-05, + "loss": 0.1944, + "step": 732 + }, + { + "epoch": 0.69, + "learning_rate": 4.991417910447762e-05, + "loss": 0.1529, + "step": 736 + }, + { + "epoch": 0.69, + "learning_rate": 4.9913712686567166e-05, + "loss": 0.1301, + "step": 740 + }, + { + "epoch": 0.69, + "learning_rate": 4.991324626865672e-05, + "loss": 0.1387, + "step": 744 + }, + { + "epoch": 0.7, + "learning_rate": 4.991277985074627e-05, + "loss": 0.1241, + "step": 748 + }, + { + "epoch": 0.7, + "learning_rate": 4.9912313432835824e-05, + "loss": 0.1165, + "step": 752 + }, + { + "epoch": 0.71, + "learning_rate": 4.991184701492538e-05, + "loss": 0.1206, + "step": 756 + }, + { + "epoch": 0.71, + "learning_rate": 4.991138059701493e-05, + "loss": 0.1771, + "step": 760 + }, + { + "epoch": 0.71, + "learning_rate": 4.991091417910448e-05, + "loss": 0.1229, + "step": 764 + }, + { + "epoch": 0.72, + "learning_rate": 4.9910447761194036e-05, + "loss": 0.1462, + "step": 768 + }, + { + "epoch": 0.72, + "learning_rate": 4.9909981343283585e-05, + "loss": 0.119, + "step": 772 + }, + { + "epoch": 0.72, + "learning_rate": 4.990951492537313e-05, + "loss": 0.1318, + "step": 776 + }, + { + "epoch": 0.73, + "learning_rate": 4.9909048507462694e-05, + "loss": 0.1514, + "step": 780 + }, + { + "epoch": 0.73, + "learning_rate": 4.990858208955224e-05, + "loss": 0.1043, + "step": 784 + }, + { + "epoch": 0.73, + "learning_rate": 4.990811567164179e-05, + "loss": 0.1466, + "step": 788 + }, + { + "epoch": 0.74, + "learning_rate": 4.9907649253731346e-05, + "loss": 0.1052, + "step": 792 + }, + { + "epoch": 0.74, + "learning_rate": 4.99071828358209e-05, + "loss": 0.1098, + "step": 796 + }, + { + "epoch": 0.75, + "learning_rate": 4.990671641791045e-05, + "loss": 0.2, + "step": 800 + }, + { + "epoch": 0.75, + "learning_rate": 4.9906250000000004e-05, + "loss": 0.1416, + "step": 804 + }, + { + "epoch": 0.75, + "learning_rate": 4.990578358208955e-05, + "loss": 0.1137, + "step": 808 + }, + { + "epoch": 0.76, + "learning_rate": 4.9905317164179107e-05, + "loss": 0.14, + "step": 812 + }, + { + "epoch": 0.76, + "learning_rate": 4.990485074626866e-05, + "loss": 0.1023, + "step": 816 + }, + { + "epoch": 0.76, + "learning_rate": 4.990438432835821e-05, + "loss": 0.1276, + "step": 820 + }, + { + "epoch": 0.77, + "learning_rate": 4.9903917910447764e-05, + "loss": 0.1445, + "step": 824 + }, + { + "epoch": 0.77, + "learning_rate": 4.990345149253732e-05, + "loss": 0.1165, + "step": 828 + }, + { + "epoch": 0.78, + "learning_rate": 4.990298507462687e-05, + "loss": 0.1369, + "step": 832 + }, + { + "epoch": 0.78, + "learning_rate": 4.9902518656716416e-05, + "loss": 0.1656, + "step": 836 + }, + { + "epoch": 0.78, + "learning_rate": 4.990205223880598e-05, + "loss": 0.1383, + "step": 840 + }, + { + "epoch": 0.79, + "learning_rate": 4.9901585820895525e-05, + "loss": 0.1065, + "step": 844 + }, + { + "epoch": 0.79, + "learning_rate": 4.9901119402985074e-05, + "loss": 0.1279, + "step": 848 + }, + { + "epoch": 0.79, + "learning_rate": 4.990065298507463e-05, + "loss": 0.111, + "step": 852 + }, + { + "epoch": 0.8, + "learning_rate": 4.9900186567164183e-05, + "loss": 0.1729, + "step": 856 + }, + { + "epoch": 0.8, + "learning_rate": 4.989972014925373e-05, + "loss": 0.1522, + "step": 860 + }, + { + "epoch": 0.81, + "learning_rate": 4.9899253731343286e-05, + "loss": 0.1157, + "step": 864 + }, + { + "epoch": 0.81, + "learning_rate": 4.9898787313432835e-05, + "loss": 0.0954, + "step": 868 + }, + { + "epoch": 0.81, + "learning_rate": 4.989832089552239e-05, + "loss": 0.1079, + "step": 872 + }, + { + "epoch": 0.82, + "learning_rate": 4.9897854477611944e-05, + "loss": 0.094, + "step": 876 + }, + { + "epoch": 0.82, + "learning_rate": 4.989738805970149e-05, + "loss": 0.1084, + "step": 880 + }, + { + "epoch": 0.82, + "learning_rate": 4.989692164179105e-05, + "loss": 0.1579, + "step": 884 + }, + { + "epoch": 0.83, + "learning_rate": 4.98964552238806e-05, + "loss": 0.1245, + "step": 888 + }, + { + "epoch": 0.83, + "learning_rate": 4.989598880597015e-05, + "loss": 0.1674, + "step": 892 + }, + { + "epoch": 0.84, + "learning_rate": 4.98955223880597e-05, + "loss": 0.0896, + "step": 896 + }, + { + "epoch": 0.84, + "learning_rate": 4.989505597014926e-05, + "loss": 0.1532, + "step": 900 + }, + { + "epoch": 0.84, + "learning_rate": 4.989458955223881e-05, + "loss": 0.1221, + "step": 904 + }, + { + "epoch": 0.85, + "learning_rate": 4.989412313432836e-05, + "loss": 0.1003, + "step": 908 + }, + { + "epoch": 0.85, + "learning_rate": 4.989365671641791e-05, + "loss": 0.149, + "step": 912 + }, + { + "epoch": 0.85, + "learning_rate": 4.9893190298507466e-05, + "loss": 0.1192, + "step": 916 + }, + { + "epoch": 0.86, + "learning_rate": 4.989272388059702e-05, + "loss": 0.122, + "step": 920 + }, + { + "epoch": 0.86, + "learning_rate": 4.989225746268657e-05, + "loss": 0.0945, + "step": 924 + }, + { + "epoch": 0.87, + "learning_rate": 4.989179104477612e-05, + "loss": 0.1248, + "step": 928 + }, + { + "epoch": 0.87, + "learning_rate": 4.989132462686568e-05, + "loss": 0.0974, + "step": 932 + }, + { + "epoch": 0.87, + "learning_rate": 4.989085820895523e-05, + "loss": 0.0956, + "step": 936 + }, + { + "epoch": 0.88, + "learning_rate": 4.9890391791044775e-05, + "loss": 0.1214, + "step": 940 + }, + { + "epoch": 0.88, + "learning_rate": 4.988992537313433e-05, + "loss": 0.1591, + "step": 944 + }, + { + "epoch": 0.88, + "learning_rate": 4.9889458955223885e-05, + "loss": 0.1303, + "step": 948 + }, + { + "epoch": 0.89, + "learning_rate": 4.9888992537313433e-05, + "loss": 0.1071, + "step": 952 + }, + { + "epoch": 0.89, + "learning_rate": 4.988852611940299e-05, + "loss": 0.1266, + "step": 956 + }, + { + "epoch": 0.9, + "learning_rate": 4.988805970149254e-05, + "loss": 0.1142, + "step": 960 + }, + { + "epoch": 0.9, + "learning_rate": 4.988759328358209e-05, + "loss": 0.0919, + "step": 964 + }, + { + "epoch": 0.9, + "learning_rate": 4.9887126865671646e-05, + "loss": 0.1704, + "step": 968 + }, + { + "epoch": 0.91, + "learning_rate": 4.9886660447761194e-05, + "loss": 0.1074, + "step": 972 + }, + { + "epoch": 0.91, + "learning_rate": 4.988619402985075e-05, + "loss": 0.1104, + "step": 976 + }, + { + "epoch": 0.91, + "learning_rate": 4.9885727611940304e-05, + "loss": 0.1298, + "step": 980 + }, + { + "epoch": 0.92, + "learning_rate": 4.988526119402985e-05, + "loss": 0.1069, + "step": 984 + }, + { + "epoch": 0.92, + "learning_rate": 4.98847947761194e-05, + "loss": 0.0986, + "step": 988 + }, + { + "epoch": 0.93, + "learning_rate": 4.988432835820896e-05, + "loss": 0.1057, + "step": 992 + }, + { + "epoch": 0.93, + "learning_rate": 4.988386194029851e-05, + "loss": 0.1513, + "step": 996 + }, + { + "epoch": 0.93, + "learning_rate": 4.988339552238806e-05, + "loss": 0.16, + "step": 1000 + }, + { + "epoch": 0.93, + "eval_exact_match": 0.597678916827853, + "eval_exec": 0.6266924564796905, + "eval_loss": 0.14075149595737457, + "eval_runtime": 1596.4093, + "eval_samples_per_second": 0.648, + "step": 1000 + }, + { + "epoch": 0.94, + "learning_rate": 4.988292910447761e-05, + "loss": 0.1095, + "step": 1004 + }, + { + "epoch": 0.94, + "learning_rate": 4.988246268656717e-05, + "loss": 0.0819, + "step": 1008 + }, + { + "epoch": 0.94, + "learning_rate": 4.9881996268656716e-05, + "loss": 0.1231, + "step": 1012 + }, + { + "epoch": 0.95, + "learning_rate": 4.988152985074627e-05, + "loss": 0.0878, + "step": 1016 + }, + { + "epoch": 0.95, + "learning_rate": 4.9881063432835826e-05, + "loss": 0.1158, + "step": 1020 + }, + { + "epoch": 0.96, + "learning_rate": 4.9880597014925374e-05, + "loss": 0.1312, + "step": 1024 + }, + { + "epoch": 0.96, + "learning_rate": 4.988013059701493e-05, + "loss": 0.1238, + "step": 1028 + }, + { + "epoch": 0.96, + "learning_rate": 4.987966417910448e-05, + "loss": 0.1167, + "step": 1032 + }, + { + "epoch": 0.97, + "learning_rate": 4.987919776119403e-05, + "loss": 0.0814, + "step": 1036 + }, + { + "epoch": 0.97, + "learning_rate": 4.987873134328359e-05, + "loss": 0.0817, + "step": 1040 + }, + { + "epoch": 0.97, + "learning_rate": 4.9878264925373135e-05, + "loss": 0.1221, + "step": 1044 + }, + { + "epoch": 0.98, + "learning_rate": 4.9877798507462683e-05, + "loss": 0.131, + "step": 1048 + }, + { + "epoch": 0.98, + "learning_rate": 4.9877332089552245e-05, + "loss": 0.1129, + "step": 1052 + }, + { + "epoch": 0.98, + "learning_rate": 4.987686567164179e-05, + "loss": 0.1051, + "step": 1056 + }, + { + "epoch": 0.99, + "learning_rate": 4.987639925373134e-05, + "loss": 0.0943, + "step": 1060 + }, + { + "epoch": 0.99, + "learning_rate": 4.9875932835820896e-05, + "loss": 0.0652, + "step": 1064 + }, + { + "epoch": 1.0, + "learning_rate": 4.987546641791045e-05, + "loss": 0.109, + "step": 1068 + }, + { + "epoch": 1.0, + "learning_rate": 4.9875000000000006e-05, + "loss": 0.1217, + "step": 1072 + }, + { + "epoch": 1.0, + "learning_rate": 4.9874533582089554e-05, + "loss": 0.0951, + "step": 1076 + }, + { + "epoch": 1.01, + "learning_rate": 4.98740671641791e-05, + "loss": 0.0788, + "step": 1080 + }, + { + "epoch": 1.01, + "learning_rate": 4.9873600746268664e-05, + "loss": 0.0627, + "step": 1084 + }, + { + "epoch": 1.01, + "learning_rate": 4.987313432835821e-05, + "loss": 0.1035, + "step": 1088 + }, + { + "epoch": 1.02, + "learning_rate": 4.987266791044776e-05, + "loss": 0.0611, + "step": 1092 + }, + { + "epoch": 1.02, + "learning_rate": 4.9872201492537315e-05, + "loss": 0.0593, + "step": 1096 + }, + { + "epoch": 1.03, + "learning_rate": 4.987173507462687e-05, + "loss": 0.0997, + "step": 1100 + }, + { + "epoch": 1.03, + "learning_rate": 4.987126865671642e-05, + "loss": 0.0991, + "step": 1104 + }, + { + "epoch": 1.03, + "learning_rate": 4.987080223880597e-05, + "loss": 0.0917, + "step": 1108 + }, + { + "epoch": 1.04, + "learning_rate": 4.987033582089553e-05, + "loss": 0.0519, + "step": 1112 + }, + { + "epoch": 1.04, + "learning_rate": 4.9869869402985076e-05, + "loss": 0.0923, + "step": 1116 + }, + { + "epoch": 1.04, + "learning_rate": 4.986940298507463e-05, + "loss": 0.071, + "step": 1120 + }, + { + "epoch": 1.05, + "learning_rate": 4.986893656716418e-05, + "loss": 0.0654, + "step": 1124 + }, + { + "epoch": 1.05, + "learning_rate": 4.9868470149253734e-05, + "loss": 0.0823, + "step": 1128 + }, + { + "epoch": 1.06, + "learning_rate": 4.986800373134329e-05, + "loss": 0.0674, + "step": 1132 + }, + { + "epoch": 1.06, + "learning_rate": 4.986753731343284e-05, + "loss": 0.0838, + "step": 1136 + }, + { + "epoch": 1.06, + "learning_rate": 4.9867070895522385e-05, + "loss": 0.115, + "step": 1140 + }, + { + "epoch": 1.07, + "learning_rate": 4.986660447761195e-05, + "loss": 0.0886, + "step": 1144 + }, + { + "epoch": 1.07, + "learning_rate": 4.9866138059701495e-05, + "loss": 0.086, + "step": 1148 + }, + { + "epoch": 1.07, + "learning_rate": 4.986567164179104e-05, + "loss": 0.0902, + "step": 1152 + }, + { + "epoch": 1.08, + "learning_rate": 4.98652052238806e-05, + "loss": 0.0724, + "step": 1156 + }, + { + "epoch": 1.08, + "learning_rate": 4.986473880597015e-05, + "loss": 0.0832, + "step": 1160 + }, + { + "epoch": 1.09, + "learning_rate": 4.98642723880597e-05, + "loss": 0.0507, + "step": 1164 + }, + { + "epoch": 1.09, + "learning_rate": 4.9863805970149256e-05, + "loss": 0.0754, + "step": 1168 + }, + { + "epoch": 1.09, + "learning_rate": 4.986333955223881e-05, + "loss": 0.0738, + "step": 1172 + }, + { + "epoch": 1.1, + "learning_rate": 4.986287313432836e-05, + "loss": 0.0834, + "step": 1176 + }, + { + "epoch": 1.1, + "learning_rate": 4.9862406716417914e-05, + "loss": 0.0867, + "step": 1180 + }, + { + "epoch": 1.1, + "learning_rate": 4.986194029850746e-05, + "loss": 0.0967, + "step": 1184 + }, + { + "epoch": 1.11, + "learning_rate": 4.986147388059702e-05, + "loss": 0.0759, + "step": 1188 + }, + { + "epoch": 1.11, + "learning_rate": 4.986100746268657e-05, + "loss": 0.0858, + "step": 1192 + }, + { + "epoch": 1.12, + "learning_rate": 4.986054104477612e-05, + "loss": 0.143, + "step": 1196 + }, + { + "epoch": 1.12, + "learning_rate": 4.986007462686567e-05, + "loss": 0.0648, + "step": 1200 + }, + { + "epoch": 1.12, + "learning_rate": 4.985960820895523e-05, + "loss": 0.0706, + "step": 1204 + }, + { + "epoch": 1.13, + "learning_rate": 4.985914179104478e-05, + "loss": 0.0687, + "step": 1208 + }, + { + "epoch": 1.13, + "learning_rate": 4.9858675373134326e-05, + "loss": 0.098, + "step": 1212 + }, + { + "epoch": 1.13, + "learning_rate": 4.985820895522388e-05, + "loss": 0.0809, + "step": 1216 + }, + { + "epoch": 1.14, + "learning_rate": 4.9857742537313436e-05, + "loss": 0.0634, + "step": 1220 + }, + { + "epoch": 1.14, + "learning_rate": 4.9857276119402984e-05, + "loss": 0.0899, + "step": 1224 + }, + { + "epoch": 1.15, + "learning_rate": 4.985680970149254e-05, + "loss": 0.0905, + "step": 1228 + }, + { + "epoch": 1.15, + "learning_rate": 4.9856343283582094e-05, + "loss": 0.0669, + "step": 1232 + }, + { + "epoch": 1.15, + "learning_rate": 4.985587686567165e-05, + "loss": 0.0898, + "step": 1236 + }, + { + "epoch": 1.16, + "learning_rate": 4.98554104477612e-05, + "loss": 0.1037, + "step": 1240 + }, + { + "epoch": 1.16, + "learning_rate": 4.9854944029850745e-05, + "loss": 0.1286, + "step": 1244 + }, + { + "epoch": 1.16, + "learning_rate": 4.98544776119403e-05, + "loss": 0.0882, + "step": 1248 + }, + { + "epoch": 1.17, + "learning_rate": 4.9854011194029855e-05, + "loss": 0.0847, + "step": 1252 + }, + { + "epoch": 1.17, + "learning_rate": 4.98535447761194e-05, + "loss": 0.0579, + "step": 1256 + }, + { + "epoch": 1.18, + "learning_rate": 4.985307835820896e-05, + "loss": 0.0864, + "step": 1260 + }, + { + "epoch": 1.18, + "learning_rate": 4.985261194029851e-05, + "loss": 0.0831, + "step": 1264 + }, + { + "epoch": 1.18, + "learning_rate": 4.985214552238806e-05, + "loss": 0.1051, + "step": 1268 + }, + { + "epoch": 1.19, + "learning_rate": 4.9851679104477616e-05, + "loss": 0.0406, + "step": 1272 + }, + { + "epoch": 1.19, + "learning_rate": 4.9851212686567164e-05, + "loss": 0.1047, + "step": 1276 + }, + { + "epoch": 1.19, + "learning_rate": 4.985074626865672e-05, + "loss": 0.0855, + "step": 1280 + }, + { + "epoch": 1.2, + "learning_rate": 4.9850279850746274e-05, + "loss": 0.1054, + "step": 1284 + }, + { + "epoch": 1.2, + "learning_rate": 4.984981343283582e-05, + "loss": 0.102, + "step": 1288 + }, + { + "epoch": 1.21, + "learning_rate": 4.984934701492538e-05, + "loss": 0.0599, + "step": 1292 + }, + { + "epoch": 1.21, + "learning_rate": 4.984888059701493e-05, + "loss": 0.054, + "step": 1296 + }, + { + "epoch": 1.21, + "learning_rate": 4.984841417910448e-05, + "loss": 0.0712, + "step": 1300 + }, + { + "epoch": 1.22, + "learning_rate": 4.984794776119403e-05, + "loss": 0.0768, + "step": 1304 + }, + { + "epoch": 1.22, + "learning_rate": 4.984748134328358e-05, + "loss": 0.0646, + "step": 1308 + }, + { + "epoch": 1.22, + "learning_rate": 4.984701492537314e-05, + "loss": 0.0775, + "step": 1312 + }, + { + "epoch": 1.23, + "learning_rate": 4.9846548507462686e-05, + "loss": 0.0643, + "step": 1316 + }, + { + "epoch": 1.23, + "learning_rate": 4.984608208955224e-05, + "loss": 0.0972, + "step": 1320 + }, + { + "epoch": 1.24, + "learning_rate": 4.9845615671641796e-05, + "loss": 0.0801, + "step": 1324 + }, + { + "epoch": 1.24, + "learning_rate": 4.9845149253731344e-05, + "loss": 0.114, + "step": 1328 + }, + { + "epoch": 1.24, + "learning_rate": 4.98446828358209e-05, + "loss": 0.0623, + "step": 1332 + }, + { + "epoch": 1.25, + "learning_rate": 4.984421641791045e-05, + "loss": 0.07, + "step": 1336 + }, + { + "epoch": 1.25, + "learning_rate": 4.984375e-05, + "loss": 0.0768, + "step": 1340 + }, + { + "epoch": 1.25, + "learning_rate": 4.984328358208956e-05, + "loss": 0.0684, + "step": 1344 + }, + { + "epoch": 1.26, + "learning_rate": 4.9842817164179105e-05, + "loss": 0.08, + "step": 1348 + }, + { + "epoch": 1.26, + "learning_rate": 4.984235074626866e-05, + "loss": 0.062, + "step": 1352 + }, + { + "epoch": 1.26, + "learning_rate": 4.9841884328358215e-05, + "loss": 0.1145, + "step": 1356 + }, + { + "epoch": 1.27, + "learning_rate": 4.984141791044776e-05, + "loss": 0.088, + "step": 1360 + }, + { + "epoch": 1.27, + "learning_rate": 4.984095149253731e-05, + "loss": 0.0713, + "step": 1364 + }, + { + "epoch": 1.28, + "learning_rate": 4.9840485074626866e-05, + "loss": 0.0372, + "step": 1368 + }, + { + "epoch": 1.28, + "learning_rate": 4.984001865671642e-05, + "loss": 0.0618, + "step": 1372 + }, + { + "epoch": 1.28, + "learning_rate": 4.983955223880597e-05, + "loss": 0.119, + "step": 1376 + }, + { + "epoch": 1.29, + "learning_rate": 4.9839085820895524e-05, + "loss": 0.0979, + "step": 1380 + }, + { + "epoch": 1.29, + "learning_rate": 4.983861940298508e-05, + "loss": 0.056, + "step": 1384 + }, + { + "epoch": 1.29, + "learning_rate": 4.983815298507463e-05, + "loss": 0.0805, + "step": 1388 + }, + { + "epoch": 1.3, + "learning_rate": 4.983768656716418e-05, + "loss": 0.0768, + "step": 1392 + }, + { + "epoch": 1.3, + "learning_rate": 4.983722014925373e-05, + "loss": 0.0488, + "step": 1396 + }, + { + "epoch": 1.31, + "learning_rate": 4.983675373134329e-05, + "loss": 0.0914, + "step": 1400 + }, + { + "epoch": 1.31, + "learning_rate": 4.983628731343284e-05, + "loss": 0.0765, + "step": 1404 + }, + { + "epoch": 1.31, + "learning_rate": 4.983582089552239e-05, + "loss": 0.0862, + "step": 1408 + }, + { + "epoch": 1.32, + "learning_rate": 4.983535447761194e-05, + "loss": 0.0692, + "step": 1412 + }, + { + "epoch": 1.32, + "learning_rate": 4.98348880597015e-05, + "loss": 0.0611, + "step": 1416 + }, + { + "epoch": 1.32, + "learning_rate": 4.9834421641791046e-05, + "loss": 0.0947, + "step": 1420 + }, + { + "epoch": 1.33, + "learning_rate": 4.98339552238806e-05, + "loss": 0.0615, + "step": 1424 + }, + { + "epoch": 1.33, + "learning_rate": 4.983348880597015e-05, + "loss": 0.0774, + "step": 1428 + }, + { + "epoch": 1.34, + "learning_rate": 4.9833022388059704e-05, + "loss": 0.0725, + "step": 1432 + }, + { + "epoch": 1.34, + "learning_rate": 4.983255597014926e-05, + "loss": 0.0784, + "step": 1436 + }, + { + "epoch": 1.34, + "learning_rate": 4.983208955223881e-05, + "loss": 0.061, + "step": 1440 + }, + { + "epoch": 1.35, + "learning_rate": 4.983162313432836e-05, + "loss": 0.1046, + "step": 1444 + }, + { + "epoch": 1.35, + "learning_rate": 4.983115671641792e-05, + "loss": 0.0768, + "step": 1448 + }, + { + "epoch": 1.35, + "learning_rate": 4.9830690298507465e-05, + "loss": 0.0608, + "step": 1452 + }, + { + "epoch": 1.36, + "learning_rate": 4.983022388059701e-05, + "loss": 0.0959, + "step": 1456 + }, + { + "epoch": 1.36, + "learning_rate": 4.9829757462686575e-05, + "loss": 0.0602, + "step": 1460 + }, + { + "epoch": 1.37, + "learning_rate": 4.982929104477612e-05, + "loss": 0.0655, + "step": 1464 + }, + { + "epoch": 1.37, + "learning_rate": 4.982882462686567e-05, + "loss": 0.1009, + "step": 1468 + }, + { + "epoch": 1.37, + "learning_rate": 4.9828358208955226e-05, + "loss": 0.0563, + "step": 1472 + }, + { + "epoch": 1.38, + "learning_rate": 4.982789179104478e-05, + "loss": 0.0763, + "step": 1476 + }, + { + "epoch": 1.38, + "learning_rate": 4.982742537313433e-05, + "loss": 0.0651, + "step": 1480 + }, + { + "epoch": 1.38, + "learning_rate": 4.9826958955223884e-05, + "loss": 0.0346, + "step": 1484 + }, + { + "epoch": 1.39, + "learning_rate": 4.982649253731343e-05, + "loss": 0.0437, + "step": 1488 + }, + { + "epoch": 1.39, + "learning_rate": 4.982602611940299e-05, + "loss": 0.0944, + "step": 1492 + }, + { + "epoch": 1.4, + "learning_rate": 4.982555970149254e-05, + "loss": 0.0718, + "step": 1496 + }, + { + "epoch": 1.4, + "learning_rate": 4.982509328358209e-05, + "loss": 0.098, + "step": 1500 + }, + { + "epoch": 1.4, + "eval_exact_match": 0.660541586073501, + "eval_exec": 0.6818181818181818, + "eval_loss": 0.1437511444091797, + "eval_runtime": 1123.3017, + "eval_samples_per_second": 0.921, + "step": 1500 + }, + { + "epoch": 1.4, + "learning_rate": 4.9824626865671645e-05, + "loss": 0.087, + "step": 1504 + }, + { + "epoch": 1.41, + "learning_rate": 4.98241604477612e-05, + "loss": 0.0929, + "step": 1508 + }, + { + "epoch": 1.41, + "learning_rate": 4.982369402985075e-05, + "loss": 0.1199, + "step": 1512 + }, + { + "epoch": 1.41, + "learning_rate": 4.9823227611940296e-05, + "loss": 0.0896, + "step": 1516 + }, + { + "epoch": 1.42, + "learning_rate": 4.982276119402986e-05, + "loss": 0.0901, + "step": 1520 + }, + { + "epoch": 1.42, + "learning_rate": 4.9822294776119406e-05, + "loss": 0.0724, + "step": 1524 + }, + { + "epoch": 1.43, + "learning_rate": 4.9821828358208954e-05, + "loss": 0.067, + "step": 1528 + }, + { + "epoch": 1.43, + "learning_rate": 4.982136194029851e-05, + "loss": 0.0706, + "step": 1532 + }, + { + "epoch": 1.43, + "learning_rate": 4.9820895522388064e-05, + "loss": 0.0507, + "step": 1536 + }, + { + "epoch": 1.44, + "learning_rate": 4.982042910447761e-05, + "loss": 0.0632, + "step": 1540 + }, + { + "epoch": 1.44, + "learning_rate": 4.981996268656717e-05, + "loss": 0.0833, + "step": 1544 + }, + { + "epoch": 1.44, + "learning_rate": 4.9819496268656715e-05, + "loss": 0.1091, + "step": 1548 + }, + { + "epoch": 1.45, + "learning_rate": 4.981902985074627e-05, + "loss": 0.0629, + "step": 1552 + }, + { + "epoch": 1.45, + "learning_rate": 4.9818563432835825e-05, + "loss": 0.1016, + "step": 1556 + }, + { + "epoch": 1.46, + "learning_rate": 4.981809701492537e-05, + "loss": 0.0879, + "step": 1560 + }, + { + "epoch": 1.46, + "learning_rate": 4.981763059701493e-05, + "loss": 0.0938, + "step": 1564 + }, + { + "epoch": 1.46, + "learning_rate": 4.981716417910448e-05, + "loss": 0.0596, + "step": 1568 + }, + { + "epoch": 1.47, + "learning_rate": 4.981669776119403e-05, + "loss": 0.1145, + "step": 1572 + }, + { + "epoch": 1.47, + "learning_rate": 4.9816231343283586e-05, + "loss": 0.0723, + "step": 1576 + }, + { + "epoch": 1.47, + "learning_rate": 4.981576492537314e-05, + "loss": 0.0335, + "step": 1580 + }, + { + "epoch": 1.48, + "learning_rate": 4.981529850746269e-05, + "loss": 0.0567, + "step": 1584 + }, + { + "epoch": 1.48, + "learning_rate": 4.9814832089552244e-05, + "loss": 0.0653, + "step": 1588 + }, + { + "epoch": 1.49, + "learning_rate": 4.981436567164179e-05, + "loss": 0.0643, + "step": 1592 + }, + { + "epoch": 1.49, + "learning_rate": 4.981389925373135e-05, + "loss": 0.0719, + "step": 1596 + }, + { + "epoch": 1.49, + "learning_rate": 4.98134328358209e-05, + "loss": 0.0477, + "step": 1600 + }, + { + "epoch": 1.5, + "learning_rate": 4.981296641791045e-05, + "loss": 0.0702, + "step": 1604 + }, + { + "epoch": 1.5, + "learning_rate": 4.98125e-05, + "loss": 0.0685, + "step": 1608 + }, + { + "epoch": 1.5, + "learning_rate": 4.981203358208956e-05, + "loss": 0.0908, + "step": 1612 + }, + { + "epoch": 1.51, + "learning_rate": 4.981156716417911e-05, + "loss": 0.1234, + "step": 1616 + }, + { + "epoch": 1.51, + "learning_rate": 4.9811100746268656e-05, + "loss": 0.0749, + "step": 1620 + }, + { + "epoch": 1.51, + "learning_rate": 4.981063432835821e-05, + "loss": 0.0627, + "step": 1624 + }, + { + "epoch": 1.52, + "learning_rate": 4.9810167910447766e-05, + "loss": 0.0642, + "step": 1628 + }, + { + "epoch": 1.52, + "learning_rate": 4.9809701492537314e-05, + "loss": 0.0632, + "step": 1632 + }, + { + "epoch": 1.53, + "learning_rate": 4.980923507462687e-05, + "loss": 0.0874, + "step": 1636 + }, + { + "epoch": 1.53, + "learning_rate": 4.9808768656716424e-05, + "loss": 0.1122, + "step": 1640 + }, + { + "epoch": 1.53, + "learning_rate": 4.980830223880597e-05, + "loss": 0.0583, + "step": 1644 + }, + { + "epoch": 1.54, + "learning_rate": 4.9807835820895527e-05, + "loss": 0.0584, + "step": 1648 + }, + { + "epoch": 1.54, + "learning_rate": 4.9807369402985075e-05, + "loss": 0.1076, + "step": 1652 + }, + { + "epoch": 1.54, + "learning_rate": 4.980690298507463e-05, + "loss": 0.0854, + "step": 1656 + }, + { + "epoch": 1.55, + "learning_rate": 4.9806436567164185e-05, + "loss": 0.0862, + "step": 1660 + }, + { + "epoch": 1.55, + "learning_rate": 4.980597014925373e-05, + "loss": 0.0705, + "step": 1664 + }, + { + "epoch": 1.56, + "learning_rate": 4.980550373134328e-05, + "loss": 0.0803, + "step": 1668 + }, + { + "epoch": 1.56, + "learning_rate": 4.980503731343284e-05, + "loss": 0.0387, + "step": 1672 + }, + { + "epoch": 1.56, + "learning_rate": 4.980457089552239e-05, + "loss": 0.0556, + "step": 1676 + }, + { + "epoch": 1.57, + "learning_rate": 4.980410447761194e-05, + "loss": 0.0559, + "step": 1680 + }, + { + "epoch": 1.57, + "learning_rate": 4.9803638059701494e-05, + "loss": 0.0766, + "step": 1684 + }, + { + "epoch": 1.57, + "learning_rate": 4.980317164179105e-05, + "loss": 0.0778, + "step": 1688 + }, + { + "epoch": 1.58, + "learning_rate": 4.98027052238806e-05, + "loss": 0.0976, + "step": 1692 + }, + { + "epoch": 1.58, + "learning_rate": 4.980223880597015e-05, + "loss": 0.0818, + "step": 1696 + }, + { + "epoch": 1.59, + "learning_rate": 4.9801772388059707e-05, + "loss": 0.0597, + "step": 1700 + }, + { + "epoch": 1.59, + "learning_rate": 4.9801305970149255e-05, + "loss": 0.0626, + "step": 1704 + }, + { + "epoch": 1.59, + "learning_rate": 4.980083955223881e-05, + "loss": 0.117, + "step": 1708 + }, + { + "epoch": 1.6, + "learning_rate": 4.980037313432836e-05, + "loss": 0.0725, + "step": 1712 + }, + { + "epoch": 1.6, + "learning_rate": 4.979990671641791e-05, + "loss": 0.0788, + "step": 1716 + }, + { + "epoch": 1.6, + "learning_rate": 4.979944029850747e-05, + "loss": 0.0874, + "step": 1720 + }, + { + "epoch": 1.61, + "learning_rate": 4.9798973880597016e-05, + "loss": 0.0781, + "step": 1724 + }, + { + "epoch": 1.61, + "learning_rate": 4.979850746268657e-05, + "loss": 0.0971, + "step": 1728 + }, + { + "epoch": 1.62, + "learning_rate": 4.9798041044776125e-05, + "loss": 0.1099, + "step": 1732 + }, + { + "epoch": 1.62, + "learning_rate": 4.9797574626865674e-05, + "loss": 0.0748, + "step": 1736 + }, + { + "epoch": 1.62, + "learning_rate": 4.979710820895523e-05, + "loss": 0.073, + "step": 1740 + }, + { + "epoch": 1.63, + "learning_rate": 4.9796641791044777e-05, + "loss": 0.0584, + "step": 1744 + }, + { + "epoch": 1.63, + "learning_rate": 4.979617537313433e-05, + "loss": 0.0749, + "step": 1748 + }, + { + "epoch": 1.63, + "learning_rate": 4.9795708955223886e-05, + "loss": 0.0712, + "step": 1752 + }, + { + "epoch": 1.64, + "learning_rate": 4.9795242537313435e-05, + "loss": 0.082, + "step": 1756 + }, + { + "epoch": 1.64, + "learning_rate": 4.979477611940298e-05, + "loss": 0.0701, + "step": 1760 + }, + { + "epoch": 1.65, + "learning_rate": 4.9794309701492544e-05, + "loss": 0.0424, + "step": 1764 + }, + { + "epoch": 1.65, + "learning_rate": 4.979384328358209e-05, + "loss": 0.1098, + "step": 1768 + }, + { + "epoch": 1.65, + "learning_rate": 4.979337686567164e-05, + "loss": 0.0737, + "step": 1772 + }, + { + "epoch": 1.66, + "learning_rate": 4.9792910447761196e-05, + "loss": 0.0571, + "step": 1776 + }, + { + "epoch": 1.66, + "learning_rate": 4.979244402985075e-05, + "loss": 0.0605, + "step": 1780 + }, + { + "epoch": 1.66, + "learning_rate": 4.97919776119403e-05, + "loss": 0.0686, + "step": 1784 + }, + { + "epoch": 1.67, + "learning_rate": 4.9791511194029853e-05, + "loss": 0.0497, + "step": 1788 + }, + { + "epoch": 1.67, + "learning_rate": 4.979104477611941e-05, + "loss": 0.0528, + "step": 1792 + }, + { + "epoch": 1.68, + "learning_rate": 4.9790578358208957e-05, + "loss": 0.0618, + "step": 1796 + }, + { + "epoch": 1.68, + "learning_rate": 4.979011194029851e-05, + "loss": 0.0575, + "step": 1800 + }, + { + "epoch": 1.68, + "learning_rate": 4.978964552238806e-05, + "loss": 0.0448, + "step": 1804 + }, + { + "epoch": 1.69, + "learning_rate": 4.9789179104477614e-05, + "loss": 0.0505, + "step": 1808 + }, + { + "epoch": 1.69, + "learning_rate": 4.978871268656717e-05, + "loss": 0.0813, + "step": 1812 + }, + { + "epoch": 1.69, + "learning_rate": 4.978824626865672e-05, + "loss": 0.0487, + "step": 1816 + }, + { + "epoch": 1.7, + "learning_rate": 4.9787779850746266e-05, + "loss": 0.0912, + "step": 1820 + }, + { + "epoch": 1.7, + "learning_rate": 4.978731343283583e-05, + "loss": 0.0839, + "step": 1824 + }, + { + "epoch": 1.71, + "learning_rate": 4.9786847014925375e-05, + "loss": 0.0734, + "step": 1828 + }, + { + "epoch": 1.71, + "learning_rate": 4.9786380597014924e-05, + "loss": 0.0664, + "step": 1832 + }, + { + "epoch": 1.71, + "learning_rate": 4.978591417910448e-05, + "loss": 0.0435, + "step": 1836 + }, + { + "epoch": 1.72, + "learning_rate": 4.9785447761194033e-05, + "loss": 0.0893, + "step": 1840 + }, + { + "epoch": 1.72, + "learning_rate": 4.978498134328358e-05, + "loss": 0.0807, + "step": 1844 + }, + { + "epoch": 1.72, + "learning_rate": 4.9784514925373136e-05, + "loss": 0.0587, + "step": 1848 + }, + { + "epoch": 1.73, + "learning_rate": 4.978404850746269e-05, + "loss": 0.072, + "step": 1852 + }, + { + "epoch": 1.73, + "learning_rate": 4.978358208955224e-05, + "loss": 0.0707, + "step": 1856 + }, + { + "epoch": 1.73, + "learning_rate": 4.9783115671641794e-05, + "loss": 0.0658, + "step": 1860 + }, + { + "epoch": 1.74, + "learning_rate": 4.978264925373134e-05, + "loss": 0.0782, + "step": 1864 + }, + { + "epoch": 1.74, + "learning_rate": 4.97821828358209e-05, + "loss": 0.124, + "step": 1868 + }, + { + "epoch": 1.75, + "learning_rate": 4.978171641791045e-05, + "loss": 0.0764, + "step": 1872 + }, + { + "epoch": 1.75, + "learning_rate": 4.978125e-05, + "loss": 0.0901, + "step": 1876 + }, + { + "epoch": 1.75, + "learning_rate": 4.9780783582089555e-05, + "loss": 0.0531, + "step": 1880 + }, + { + "epoch": 1.76, + "learning_rate": 4.978031716417911e-05, + "loss": 0.0964, + "step": 1884 + }, + { + "epoch": 1.76, + "learning_rate": 4.977985074626866e-05, + "loss": 0.0847, + "step": 1888 + }, + { + "epoch": 1.76, + "learning_rate": 4.977938432835821e-05, + "loss": 0.0643, + "step": 1892 + }, + { + "epoch": 1.77, + "learning_rate": 4.977891791044776e-05, + "loss": 0.0743, + "step": 1896 + }, + { + "epoch": 1.77, + "learning_rate": 4.9778451492537316e-05, + "loss": 0.0533, + "step": 1900 + }, + { + "epoch": 1.78, + "learning_rate": 4.977798507462687e-05, + "loss": 0.0736, + "step": 1904 + }, + { + "epoch": 1.78, + "learning_rate": 4.977751865671642e-05, + "loss": 0.0535, + "step": 1908 + }, + { + "epoch": 1.78, + "learning_rate": 4.9777052238805974e-05, + "loss": 0.0636, + "step": 1912 + }, + { + "epoch": 1.79, + "learning_rate": 4.977658582089553e-05, + "loss": 0.0381, + "step": 1916 + }, + { + "epoch": 1.79, + "learning_rate": 4.977611940298508e-05, + "loss": 0.0298, + "step": 1920 + }, + { + "epoch": 1.79, + "learning_rate": 4.9775652985074625e-05, + "loss": 0.0838, + "step": 1924 + }, + { + "epoch": 1.8, + "learning_rate": 4.977518656716418e-05, + "loss": 0.1048, + "step": 1928 + }, + { + "epoch": 1.8, + "learning_rate": 4.9774720149253735e-05, + "loss": 0.031, + "step": 1932 + }, + { + "epoch": 1.81, + "learning_rate": 4.9774253731343283e-05, + "loss": 0.0617, + "step": 1936 + }, + { + "epoch": 1.81, + "learning_rate": 4.977378731343284e-05, + "loss": 0.1009, + "step": 1940 + }, + { + "epoch": 1.81, + "learning_rate": 4.977332089552239e-05, + "loss": 0.057, + "step": 1944 + }, + { + "epoch": 1.82, + "learning_rate": 4.977285447761194e-05, + "loss": 0.0519, + "step": 1948 + }, + { + "epoch": 1.82, + "learning_rate": 4.9772388059701496e-05, + "loss": 0.061, + "step": 1952 + }, + { + "epoch": 1.82, + "learning_rate": 4.9771921641791044e-05, + "loss": 0.1017, + "step": 1956 + }, + { + "epoch": 1.83, + "learning_rate": 4.97714552238806e-05, + "loss": 0.0552, + "step": 1960 + }, + { + "epoch": 1.83, + "learning_rate": 4.9770988805970154e-05, + "loss": 0.0943, + "step": 1964 + }, + { + "epoch": 1.84, + "learning_rate": 4.97705223880597e-05, + "loss": 0.0545, + "step": 1968 + }, + { + "epoch": 1.84, + "learning_rate": 4.977005597014926e-05, + "loss": 0.0931, + "step": 1972 + }, + { + "epoch": 1.84, + "learning_rate": 4.976958955223881e-05, + "loss": 0.0426, + "step": 1976 + }, + { + "epoch": 1.85, + "learning_rate": 4.976912313432836e-05, + "loss": 0.0939, + "step": 1980 + }, + { + "epoch": 1.85, + "learning_rate": 4.976865671641791e-05, + "loss": 0.086, + "step": 1984 + }, + { + "epoch": 1.85, + "learning_rate": 4.976819029850746e-05, + "loss": 0.0391, + "step": 1988 + }, + { + "epoch": 1.86, + "learning_rate": 4.976772388059702e-05, + "loss": 0.0389, + "step": 1992 + }, + { + "epoch": 1.86, + "learning_rate": 4.9767257462686566e-05, + "loss": 0.0725, + "step": 1996 + }, + { + "epoch": 1.87, + "learning_rate": 4.976679104477612e-05, + "loss": 0.074, + "step": 2000 + }, + { + "epoch": 1.87, + "eval_exact_match": 0.6721470019342359, + "eval_exec": 0.7147001934235977, + "eval_loss": 0.14614403247833252, + "eval_runtime": 1069.1464, + "eval_samples_per_second": 0.967, + "step": 2000 + }, + { + "epoch": 1.87, + "learning_rate": 4.9766324626865676e-05, + "loss": 0.0657, + "step": 2004 + }, + { + "epoch": 1.87, + "learning_rate": 4.9765858208955224e-05, + "loss": 0.0452, + "step": 2008 + }, + { + "epoch": 1.88, + "learning_rate": 4.976539179104478e-05, + "loss": 0.0343, + "step": 2012 + }, + { + "epoch": 1.88, + "learning_rate": 4.976492537313433e-05, + "loss": 0.0896, + "step": 2016 + }, + { + "epoch": 1.88, + "learning_rate": 4.976445895522388e-05, + "loss": 0.043, + "step": 2020 + }, + { + "epoch": 1.89, + "learning_rate": 4.976399253731344e-05, + "loss": 0.0543, + "step": 2024 + }, + { + "epoch": 1.89, + "learning_rate": 4.9763526119402985e-05, + "loss": 0.0976, + "step": 2028 + }, + { + "epoch": 1.9, + "learning_rate": 4.976305970149254e-05, + "loss": 0.0623, + "step": 2032 + }, + { + "epoch": 1.9, + "learning_rate": 4.9762593283582095e-05, + "loss": 0.0817, + "step": 2036 + }, + { + "epoch": 1.9, + "learning_rate": 4.976212686567164e-05, + "loss": 0.0442, + "step": 2040 + }, + { + "epoch": 1.91, + "learning_rate": 4.97616604477612e-05, + "loss": 0.0688, + "step": 2044 + }, + { + "epoch": 1.91, + "learning_rate": 4.9761194029850746e-05, + "loss": 0.0729, + "step": 2048 + }, + { + "epoch": 1.91, + "learning_rate": 4.97607276119403e-05, + "loss": 0.055, + "step": 2052 + }, + { + "epoch": 1.92, + "learning_rate": 4.9760261194029856e-05, + "loss": 0.055, + "step": 2056 + }, + { + "epoch": 1.92, + "learning_rate": 4.9759794776119404e-05, + "loss": 0.0533, + "step": 2060 + }, + { + "epoch": 1.93, + "learning_rate": 4.975932835820896e-05, + "loss": 0.0656, + "step": 2064 + }, + { + "epoch": 1.93, + "learning_rate": 4.9758861940298514e-05, + "loss": 0.0534, + "step": 2068 + }, + { + "epoch": 1.93, + "learning_rate": 4.975839552238806e-05, + "loss": 0.0565, + "step": 2072 + }, + { + "epoch": 1.94, + "learning_rate": 4.975792910447761e-05, + "loss": 0.0608, + "step": 2076 + }, + { + "epoch": 1.94, + "learning_rate": 4.975746268656717e-05, + "loss": 0.0383, + "step": 2080 + }, + { + "epoch": 1.94, + "learning_rate": 4.975699626865672e-05, + "loss": 0.1284, + "step": 2084 + }, + { + "epoch": 1.95, + "learning_rate": 4.975652985074627e-05, + "loss": 0.0556, + "step": 2088 + }, + { + "epoch": 1.95, + "learning_rate": 4.975606343283582e-05, + "loss": 0.0666, + "step": 2092 + }, + { + "epoch": 1.96, + "learning_rate": 4.975559701492538e-05, + "loss": 0.0705, + "step": 2096 + }, + { + "epoch": 1.96, + "learning_rate": 4.9755130597014926e-05, + "loss": 0.0421, + "step": 2100 + }, + { + "epoch": 1.96, + "learning_rate": 4.975466417910448e-05, + "loss": 0.0666, + "step": 2104 + }, + { + "epoch": 1.97, + "learning_rate": 4.975419776119403e-05, + "loss": 0.0333, + "step": 2108 + }, + { + "epoch": 1.97, + "learning_rate": 4.9753731343283584e-05, + "loss": 0.05, + "step": 2112 + }, + { + "epoch": 1.97, + "learning_rate": 4.975326492537314e-05, + "loss": 0.059, + "step": 2116 + }, + { + "epoch": 1.98, + "learning_rate": 4.975279850746269e-05, + "loss": 0.0567, + "step": 2120 + }, + { + "epoch": 1.98, + "learning_rate": 4.975233208955224e-05, + "loss": 0.0447, + "step": 2124 + }, + { + "epoch": 1.98, + "learning_rate": 4.97518656716418e-05, + "loss": 0.0653, + "step": 2128 + }, + { + "epoch": 1.99, + "learning_rate": 4.9751399253731345e-05, + "loss": 0.0653, + "step": 2132 + }, + { + "epoch": 1.99, + "learning_rate": 4.975093283582089e-05, + "loss": 0.0908, + "step": 2136 + }, + { + "epoch": 2.0, + "learning_rate": 4.9750466417910455e-05, + "loss": 0.086, + "step": 2140 + }, + { + "epoch": 2.0, + "learning_rate": 4.975e-05, + "loss": 0.0909, + "step": 2144 + }, + { + "epoch": 2.0, + "learning_rate": 4.974953358208955e-05, + "loss": 0.0282, + "step": 2148 + }, + { + "epoch": 2.01, + "learning_rate": 4.9749067164179106e-05, + "loss": 0.0412, + "step": 2152 + }, + { + "epoch": 2.01, + "learning_rate": 4.974860074626866e-05, + "loss": 0.0324, + "step": 2156 + }, + { + "epoch": 2.01, + "learning_rate": 4.974813432835821e-05, + "loss": 0.0381, + "step": 2160 + }, + { + "epoch": 2.02, + "learning_rate": 4.9747667910447764e-05, + "loss": 0.0609, + "step": 2164 + }, + { + "epoch": 2.02, + "learning_rate": 4.974720149253731e-05, + "loss": 0.0252, + "step": 2168 + }, + { + "epoch": 2.03, + "learning_rate": 4.974673507462687e-05, + "loss": 0.0282, + "step": 2172 + }, + { + "epoch": 2.03, + "learning_rate": 4.974626865671642e-05, + "loss": 0.0329, + "step": 2176 + }, + { + "epoch": 2.03, + "learning_rate": 4.974580223880597e-05, + "loss": 0.0479, + "step": 2180 + }, + { + "epoch": 2.04, + "learning_rate": 4.9745335820895525e-05, + "loss": 0.0323, + "step": 2184 + }, + { + "epoch": 2.04, + "learning_rate": 4.974486940298508e-05, + "loss": 0.0309, + "step": 2188 + }, + { + "epoch": 2.04, + "learning_rate": 4.974440298507463e-05, + "loss": 0.0482, + "step": 2192 + }, + { + "epoch": 2.05, + "learning_rate": 4.9743936567164176e-05, + "loss": 0.0378, + "step": 2196 + }, + { + "epoch": 2.05, + "learning_rate": 4.974347014925374e-05, + "loss": 0.057, + "step": 2200 + }, + { + "epoch": 2.06, + "learning_rate": 4.9743003731343286e-05, + "loss": 0.0485, + "step": 2204 + }, + { + "epoch": 2.06, + "learning_rate": 4.974253731343284e-05, + "loss": 0.0409, + "step": 2208 + }, + { + "epoch": 2.06, + "learning_rate": 4.974207089552239e-05, + "loss": 0.0497, + "step": 2212 + }, + { + "epoch": 2.07, + "learning_rate": 4.9741604477611944e-05, + "loss": 0.0286, + "step": 2216 + }, + { + "epoch": 2.07, + "learning_rate": 4.97411380597015e-05, + "loss": 0.0323, + "step": 2220 + }, + { + "epoch": 2.07, + "learning_rate": 4.974067164179105e-05, + "loss": 0.0414, + "step": 2224 + }, + { + "epoch": 2.08, + "learning_rate": 4.9740205223880595e-05, + "loss": 0.0469, + "step": 2228 + }, + { + "epoch": 2.08, + "learning_rate": 4.973973880597016e-05, + "loss": 0.0267, + "step": 2232 + }, + { + "epoch": 2.09, + "learning_rate": 4.9739272388059705e-05, + "loss": 0.0438, + "step": 2236 + }, + { + "epoch": 2.09, + "learning_rate": 4.973880597014925e-05, + "loss": 0.0423, + "step": 2240 + }, + { + "epoch": 2.09, + "learning_rate": 4.973833955223881e-05, + "loss": 0.0424, + "step": 2244 + }, + { + "epoch": 2.1, + "learning_rate": 4.973787313432836e-05, + "loss": 0.0562, + "step": 2248 + }, + { + "epoch": 2.1, + "learning_rate": 4.973740671641791e-05, + "loss": 0.0538, + "step": 2252 + }, + { + "epoch": 2.1, + "learning_rate": 4.9736940298507466e-05, + "loss": 0.0554, + "step": 2256 + }, + { + "epoch": 2.11, + "learning_rate": 4.973647388059702e-05, + "loss": 0.0595, + "step": 2260 + }, + { + "epoch": 2.11, + "learning_rate": 4.973600746268657e-05, + "loss": 0.0652, + "step": 2264 + }, + { + "epoch": 2.12, + "learning_rate": 4.9735541044776124e-05, + "loss": 0.05, + "step": 2268 + }, + { + "epoch": 2.12, + "learning_rate": 4.973507462686567e-05, + "loss": 0.0699, + "step": 2272 + }, + { + "epoch": 2.12, + "learning_rate": 4.973460820895523e-05, + "loss": 0.0484, + "step": 2276 + }, + { + "epoch": 2.13, + "learning_rate": 4.973414179104478e-05, + "loss": 0.0502, + "step": 2280 + }, + { + "epoch": 2.13, + "learning_rate": 4.973367537313433e-05, + "loss": 0.0437, + "step": 2284 + }, + { + "epoch": 2.13, + "learning_rate": 4.973320895522388e-05, + "loss": 0.072, + "step": 2288 + }, + { + "epoch": 2.14, + "learning_rate": 4.973274253731344e-05, + "loss": 0.0548, + "step": 2292 + }, + { + "epoch": 2.14, + "learning_rate": 4.973227611940299e-05, + "loss": 0.0734, + "step": 2296 + }, + { + "epoch": 2.15, + "learning_rate": 4.9731809701492536e-05, + "loss": 0.0451, + "step": 2300 + }, + { + "epoch": 2.15, + "learning_rate": 4.973134328358209e-05, + "loss": 0.0202, + "step": 2304 + }, + { + "epoch": 2.15, + "learning_rate": 4.9730876865671646e-05, + "loss": 0.0281, + "step": 2308 + }, + { + "epoch": 2.16, + "learning_rate": 4.9730410447761194e-05, + "loss": 0.0335, + "step": 2312 + }, + { + "epoch": 2.16, + "learning_rate": 4.972994402985075e-05, + "loss": 0.042, + "step": 2316 + }, + { + "epoch": 2.16, + "learning_rate": 4.9729477611940304e-05, + "loss": 0.0429, + "step": 2320 + }, + { + "epoch": 2.17, + "learning_rate": 4.972901119402985e-05, + "loss": 0.0548, + "step": 2324 + }, + { + "epoch": 2.17, + "learning_rate": 4.972854477611941e-05, + "loss": 0.0355, + "step": 2328 + }, + { + "epoch": 2.18, + "learning_rate": 4.9728078358208955e-05, + "loss": 0.0399, + "step": 2332 + }, + { + "epoch": 2.18, + "learning_rate": 4.972761194029851e-05, + "loss": 0.0273, + "step": 2336 + }, + { + "epoch": 2.18, + "learning_rate": 4.9727145522388065e-05, + "loss": 0.0439, + "step": 2340 + }, + { + "epoch": 2.19, + "learning_rate": 4.972667910447761e-05, + "loss": 0.0303, + "step": 2344 + }, + { + "epoch": 2.19, + "learning_rate": 4.972621268656716e-05, + "loss": 0.0439, + "step": 2348 + }, + { + "epoch": 2.19, + "learning_rate": 4.972574626865672e-05, + "loss": 0.0417, + "step": 2352 + }, + { + "epoch": 2.2, + "learning_rate": 4.972527985074627e-05, + "loss": 0.0286, + "step": 2356 + }, + { + "epoch": 2.2, + "learning_rate": 4.972481343283582e-05, + "loss": 0.0292, + "step": 2360 + }, + { + "epoch": 2.21, + "learning_rate": 4.9724347014925374e-05, + "loss": 0.0515, + "step": 2364 + }, + { + "epoch": 2.21, + "learning_rate": 4.972388059701493e-05, + "loss": 0.0473, + "step": 2368 + }, + { + "epoch": 2.21, + "learning_rate": 4.9723414179104484e-05, + "loss": 0.0406, + "step": 2372 + }, + { + "epoch": 2.22, + "learning_rate": 4.972294776119403e-05, + "loss": 0.0629, + "step": 2376 + }, + { + "epoch": 2.22, + "learning_rate": 4.972248134328359e-05, + "loss": 0.0504, + "step": 2380 + }, + { + "epoch": 2.22, + "learning_rate": 4.972201492537314e-05, + "loss": 0.0267, + "step": 2384 + }, + { + "epoch": 2.23, + "learning_rate": 4.972154850746269e-05, + "loss": 0.0302, + "step": 2388 + }, + { + "epoch": 2.23, + "learning_rate": 4.972108208955224e-05, + "loss": 0.0223, + "step": 2392 + }, + { + "epoch": 2.24, + "learning_rate": 4.972061567164179e-05, + "loss": 0.0636, + "step": 2396 + }, + { + "epoch": 2.24, + "learning_rate": 4.972014925373135e-05, + "loss": 0.0347, + "step": 2400 + }, + { + "epoch": 2.24, + "learning_rate": 4.9719682835820896e-05, + "loss": 0.0412, + "step": 2404 + }, + { + "epoch": 2.25, + "learning_rate": 4.971921641791045e-05, + "loss": 0.0416, + "step": 2408 + }, + { + "epoch": 2.25, + "learning_rate": 4.9718750000000006e-05, + "loss": 0.0442, + "step": 2412 + }, + { + "epoch": 2.25, + "learning_rate": 4.9718283582089554e-05, + "loss": 0.0268, + "step": 2416 + }, + { + "epoch": 2.26, + "learning_rate": 4.971781716417911e-05, + "loss": 0.043, + "step": 2420 + }, + { + "epoch": 2.26, + "learning_rate": 4.971735074626866e-05, + "loss": 0.0315, + "step": 2424 + }, + { + "epoch": 2.26, + "learning_rate": 4.971688432835821e-05, + "loss": 0.0238, + "step": 2428 + }, + { + "epoch": 2.27, + "learning_rate": 4.971641791044777e-05, + "loss": 0.0405, + "step": 2432 + }, + { + "epoch": 2.27, + "learning_rate": 4.9715951492537315e-05, + "loss": 0.0445, + "step": 2436 + }, + { + "epoch": 2.28, + "learning_rate": 4.971548507462686e-05, + "loss": 0.0537, + "step": 2440 + }, + { + "epoch": 2.28, + "learning_rate": 4.9715018656716425e-05, + "loss": 0.0656, + "step": 2444 + }, + { + "epoch": 2.28, + "learning_rate": 4.971455223880597e-05, + "loss": 0.0381, + "step": 2448 + }, + { + "epoch": 2.29, + "learning_rate": 4.971408582089552e-05, + "loss": 0.0455, + "step": 2452 + }, + { + "epoch": 2.29, + "learning_rate": 4.9713619402985076e-05, + "loss": 0.0489, + "step": 2456 + }, + { + "epoch": 2.29, + "learning_rate": 4.971315298507463e-05, + "loss": 0.0505, + "step": 2460 + }, + { + "epoch": 2.3, + "learning_rate": 4.971268656716418e-05, + "loss": 0.0454, + "step": 2464 + }, + { + "epoch": 2.3, + "learning_rate": 4.9712220149253734e-05, + "loss": 0.0409, + "step": 2468 + }, + { + "epoch": 2.31, + "learning_rate": 4.971175373134329e-05, + "loss": 0.0429, + "step": 2472 + }, + { + "epoch": 2.31, + "learning_rate": 4.971128731343284e-05, + "loss": 0.0383, + "step": 2476 + }, + { + "epoch": 2.31, + "learning_rate": 4.971082089552239e-05, + "loss": 0.0349, + "step": 2480 + }, + { + "epoch": 2.32, + "learning_rate": 4.971035447761194e-05, + "loss": 0.0369, + "step": 2484 + }, + { + "epoch": 2.32, + "learning_rate": 4.9709888059701495e-05, + "loss": 0.052, + "step": 2488 + }, + { + "epoch": 2.32, + "learning_rate": 4.970942164179105e-05, + "loss": 0.0564, + "step": 2492 + }, + { + "epoch": 2.33, + "learning_rate": 4.97089552238806e-05, + "loss": 0.0585, + "step": 2496 + }, + { + "epoch": 2.33, + "learning_rate": 4.9708488805970146e-05, + "loss": 0.0382, + "step": 2500 + }, + { + "epoch": 2.33, + "eval_exact_match": 0.6847195357833655, + "eval_exec": 0.7408123791102514, + "eval_loss": 0.16533711552619934, + "eval_runtime": 1046.3454, + "eval_samples_per_second": 0.988, + "step": 2500 + }, + { + "epoch": 2.34, + "learning_rate": 4.970802238805971e-05, + "loss": 0.0608, + "step": 2504 + }, + { + "epoch": 2.34, + "learning_rate": 4.9707555970149256e-05, + "loss": 0.0526, + "step": 2508 + }, + { + "epoch": 2.34, + "learning_rate": 4.9707089552238804e-05, + "loss": 0.0666, + "step": 2512 + }, + { + "epoch": 2.35, + "learning_rate": 4.970662313432836e-05, + "loss": 0.019, + "step": 2516 + }, + { + "epoch": 2.35, + "learning_rate": 4.9706156716417914e-05, + "loss": 0.0384, + "step": 2520 + }, + { + "epoch": 2.35, + "learning_rate": 4.970569029850746e-05, + "loss": 0.0553, + "step": 2524 + }, + { + "epoch": 2.36, + "learning_rate": 4.970522388059702e-05, + "loss": 0.0517, + "step": 2528 + }, + { + "epoch": 2.36, + "learning_rate": 4.970475746268657e-05, + "loss": 0.086, + "step": 2532 + }, + { + "epoch": 2.37, + "learning_rate": 4.9704291044776127e-05, + "loss": 0.0685, + "step": 2536 + }, + { + "epoch": 2.37, + "learning_rate": 4.9703824626865675e-05, + "loss": 0.0479, + "step": 2540 + }, + { + "epoch": 2.37, + "learning_rate": 4.970335820895522e-05, + "loss": 0.0378, + "step": 2544 + }, + { + "epoch": 2.38, + "learning_rate": 4.9702891791044784e-05, + "loss": 0.046, + "step": 2548 + }, + { + "epoch": 2.38, + "learning_rate": 4.970242537313433e-05, + "loss": 0.056, + "step": 2552 + }, + { + "epoch": 2.38, + "learning_rate": 4.970195895522388e-05, + "loss": 0.0294, + "step": 2556 + }, + { + "epoch": 2.39, + "learning_rate": 4.9701492537313436e-05, + "loss": 0.0497, + "step": 2560 + }, + { + "epoch": 2.39, + "learning_rate": 4.970102611940299e-05, + "loss": 0.039, + "step": 2564 + }, + { + "epoch": 2.4, + "learning_rate": 4.970055970149254e-05, + "loss": 0.0395, + "step": 2568 + }, + { + "epoch": 2.4, + "learning_rate": 4.9700093283582094e-05, + "loss": 0.0542, + "step": 2572 + }, + { + "epoch": 2.4, + "learning_rate": 4.969962686567164e-05, + "loss": 0.0596, + "step": 2576 + }, + { + "epoch": 2.41, + "learning_rate": 4.96991604477612e-05, + "loss": 0.0358, + "step": 2580 + }, + { + "epoch": 2.41, + "learning_rate": 4.969869402985075e-05, + "loss": 0.0467, + "step": 2584 + }, + { + "epoch": 2.41, + "learning_rate": 4.96982276119403e-05, + "loss": 0.0431, + "step": 2588 + }, + { + "epoch": 2.42, + "learning_rate": 4.9697761194029855e-05, + "loss": 0.0355, + "step": 2592 + }, + { + "epoch": 2.42, + "learning_rate": 4.969729477611941e-05, + "loss": 0.0251, + "step": 2596 + }, + { + "epoch": 2.43, + "learning_rate": 4.969682835820896e-05, + "loss": 0.0417, + "step": 2600 + }, + { + "epoch": 2.43, + "learning_rate": 4.9696361940298506e-05, + "loss": 0.063, + "step": 2604 + }, + { + "epoch": 2.43, + "learning_rate": 4.969589552238806e-05, + "loss": 0.0454, + "step": 2608 + }, + { + "epoch": 2.44, + "learning_rate": 4.9695429104477616e-05, + "loss": 0.0351, + "step": 2612 + }, + { + "epoch": 2.44, + "learning_rate": 4.9694962686567164e-05, + "loss": 0.0543, + "step": 2616 + }, + { + "epoch": 2.44, + "learning_rate": 4.969449626865672e-05, + "loss": 0.0422, + "step": 2620 + }, + { + "epoch": 2.45, + "learning_rate": 4.9694029850746274e-05, + "loss": 0.0427, + "step": 2624 + }, + { + "epoch": 2.45, + "learning_rate": 4.969356343283582e-05, + "loss": 0.0595, + "step": 2628 + }, + { + "epoch": 2.46, + "learning_rate": 4.9693097014925377e-05, + "loss": 0.0437, + "step": 2632 + }, + { + "epoch": 2.46, + "learning_rate": 4.9692630597014925e-05, + "loss": 0.0358, + "step": 2636 + }, + { + "epoch": 2.46, + "learning_rate": 4.969216417910448e-05, + "loss": 0.041, + "step": 2640 + }, + { + "epoch": 2.47, + "learning_rate": 4.9691697761194035e-05, + "loss": 0.0497, + "step": 2644 + }, + { + "epoch": 2.47, + "learning_rate": 4.969123134328358e-05, + "loss": 0.0494, + "step": 2648 + }, + { + "epoch": 2.47, + "learning_rate": 4.969076492537314e-05, + "loss": 0.0541, + "step": 2652 + }, + { + "epoch": 2.48, + "learning_rate": 4.969029850746269e-05, + "loss": 0.0668, + "step": 2656 + }, + { + "epoch": 2.48, + "learning_rate": 4.968983208955224e-05, + "loss": 0.0314, + "step": 2660 + }, + { + "epoch": 2.49, + "learning_rate": 4.968936567164179e-05, + "loss": 0.0576, + "step": 2664 + }, + { + "epoch": 2.49, + "learning_rate": 4.9688899253731344e-05, + "loss": 0.0514, + "step": 2668 + }, + { + "epoch": 2.49, + "learning_rate": 4.96884328358209e-05, + "loss": 0.0398, + "step": 2672 + }, + { + "epoch": 2.5, + "learning_rate": 4.968796641791045e-05, + "loss": 0.0481, + "step": 2676 + }, + { + "epoch": 2.5, + "learning_rate": 4.96875e-05, + "loss": 0.0431, + "step": 2680 + }, + { + "epoch": 2.5, + "learning_rate": 4.9687033582089556e-05, + "loss": 0.0419, + "step": 2684 + }, + { + "epoch": 2.51, + "learning_rate": 4.9686567164179105e-05, + "loss": 0.0343, + "step": 2688 + }, + { + "epoch": 2.51, + "learning_rate": 4.968610074626866e-05, + "loss": 0.0421, + "step": 2692 + }, + { + "epoch": 2.51, + "learning_rate": 4.968563432835821e-05, + "loss": 0.0585, + "step": 2696 + }, + { + "epoch": 2.52, + "learning_rate": 4.968516791044777e-05, + "loss": 0.0442, + "step": 2700 + }, + { + "epoch": 2.52, + "learning_rate": 4.968470149253732e-05, + "loss": 0.0315, + "step": 2704 + }, + { + "epoch": 2.53, + "learning_rate": 4.9684235074626866e-05, + "loss": 0.0563, + "step": 2708 + }, + { + "epoch": 2.53, + "learning_rate": 4.968376865671642e-05, + "loss": 0.05, + "step": 2712 + }, + { + "epoch": 2.53, + "learning_rate": 4.9683302238805975e-05, + "loss": 0.0356, + "step": 2716 + }, + { + "epoch": 2.54, + "learning_rate": 4.9682835820895524e-05, + "loss": 0.0399, + "step": 2720 + }, + { + "epoch": 2.54, + "learning_rate": 4.968236940298508e-05, + "loss": 0.035, + "step": 2724 + }, + { + "epoch": 2.54, + "learning_rate": 4.9681902985074627e-05, + "loss": 0.0222, + "step": 2728 + }, + { + "epoch": 2.55, + "learning_rate": 4.968143656716418e-05, + "loss": 0.0527, + "step": 2732 + }, + { + "epoch": 2.55, + "learning_rate": 4.9680970149253736e-05, + "loss": 0.0451, + "step": 2736 + }, + { + "epoch": 2.56, + "learning_rate": 4.9680503731343285e-05, + "loss": 0.0488, + "step": 2740 + }, + { + "epoch": 2.56, + "learning_rate": 4.968003731343284e-05, + "loss": 0.0794, + "step": 2744 + }, + { + "epoch": 2.56, + "learning_rate": 4.9679570895522394e-05, + "loss": 0.0571, + "step": 2748 + }, + { + "epoch": 2.57, + "learning_rate": 4.967910447761194e-05, + "loss": 0.0391, + "step": 2752 + }, + { + "epoch": 2.57, + "learning_rate": 4.967863805970149e-05, + "loss": 0.048, + "step": 2756 + }, + { + "epoch": 2.57, + "learning_rate": 4.967817164179105e-05, + "loss": 0.051, + "step": 2760 + }, + { + "epoch": 2.58, + "learning_rate": 4.96777052238806e-05, + "loss": 0.0402, + "step": 2764 + }, + { + "epoch": 2.58, + "learning_rate": 4.967723880597015e-05, + "loss": 0.0392, + "step": 2768 + }, + { + "epoch": 2.59, + "learning_rate": 4.9676772388059703e-05, + "loss": 0.0518, + "step": 2772 + }, + { + "epoch": 2.59, + "learning_rate": 4.967630597014926e-05, + "loss": 0.0494, + "step": 2776 + }, + { + "epoch": 2.59, + "learning_rate": 4.9675839552238807e-05, + "loss": 0.0324, + "step": 2780 + }, + { + "epoch": 2.6, + "learning_rate": 4.967537313432836e-05, + "loss": 0.0357, + "step": 2784 + }, + { + "epoch": 2.6, + "learning_rate": 4.967490671641791e-05, + "loss": 0.0458, + "step": 2788 + }, + { + "epoch": 2.6, + "learning_rate": 4.9674440298507464e-05, + "loss": 0.0593, + "step": 2792 + }, + { + "epoch": 2.61, + "learning_rate": 4.967397388059702e-05, + "loss": 0.0454, + "step": 2796 + }, + { + "epoch": 2.61, + "learning_rate": 4.967350746268657e-05, + "loss": 0.0629, + "step": 2800 + }, + { + "epoch": 2.62, + "learning_rate": 4.967304104477612e-05, + "loss": 0.0287, + "step": 2804 + }, + { + "epoch": 2.62, + "learning_rate": 4.967257462686568e-05, + "loss": 0.0413, + "step": 2808 + }, + { + "epoch": 2.62, + "learning_rate": 4.9672108208955225e-05, + "loss": 0.0231, + "step": 2812 + }, + { + "epoch": 2.63, + "learning_rate": 4.9671641791044774e-05, + "loss": 0.0362, + "step": 2816 + }, + { + "epoch": 2.63, + "learning_rate": 4.9671175373134335e-05, + "loss": 0.055, + "step": 2820 + }, + { + "epoch": 2.63, + "learning_rate": 4.967070895522388e-05, + "loss": 0.0448, + "step": 2824 + }, + { + "epoch": 2.64, + "learning_rate": 4.967024253731343e-05, + "loss": 0.0337, + "step": 2828 + }, + { + "epoch": 2.64, + "learning_rate": 4.9669776119402986e-05, + "loss": 0.048, + "step": 2832 + }, + { + "epoch": 2.65, + "learning_rate": 4.966930970149254e-05, + "loss": 0.0534, + "step": 2836 + }, + { + "epoch": 2.65, + "learning_rate": 4.966884328358209e-05, + "loss": 0.0296, + "step": 2840 + }, + { + "epoch": 2.65, + "learning_rate": 4.9668376865671644e-05, + "loss": 0.0418, + "step": 2844 + }, + { + "epoch": 2.66, + "learning_rate": 4.966791044776119e-05, + "loss": 0.0556, + "step": 2848 + }, + { + "epoch": 2.66, + "learning_rate": 4.966744402985075e-05, + "loss": 0.0262, + "step": 2852 + }, + { + "epoch": 2.66, + "learning_rate": 4.96669776119403e-05, + "loss": 0.0494, + "step": 2856 + }, + { + "epoch": 2.67, + "learning_rate": 4.966651119402985e-05, + "loss": 0.0361, + "step": 2860 + }, + { + "epoch": 2.67, + "learning_rate": 4.9666044776119405e-05, + "loss": 0.0495, + "step": 2864 + }, + { + "epoch": 2.68, + "learning_rate": 4.966557835820896e-05, + "loss": 0.0491, + "step": 2868 + }, + { + "epoch": 2.68, + "learning_rate": 4.966511194029851e-05, + "loss": 0.023, + "step": 2872 + }, + { + "epoch": 2.68, + "learning_rate": 4.966464552238806e-05, + "loss": 0.0381, + "step": 2876 + }, + { + "epoch": 2.69, + "learning_rate": 4.966417910447762e-05, + "loss": 0.0618, + "step": 2880 + }, + { + "epoch": 2.69, + "learning_rate": 4.9663712686567166e-05, + "loss": 0.0352, + "step": 2884 + }, + { + "epoch": 2.69, + "learning_rate": 4.966324626865672e-05, + "loss": 0.0534, + "step": 2888 + }, + { + "epoch": 2.7, + "learning_rate": 4.966277985074627e-05, + "loss": 0.1242, + "step": 2892 + }, + { + "epoch": 2.7, + "learning_rate": 4.9662313432835824e-05, + "loss": 0.0236, + "step": 2896 + }, + { + "epoch": 2.71, + "learning_rate": 4.966184701492538e-05, + "loss": 0.0396, + "step": 2900 + }, + { + "epoch": 2.71, + "learning_rate": 4.966138059701493e-05, + "loss": 0.0516, + "step": 2904 + }, + { + "epoch": 2.71, + "learning_rate": 4.9660914179104475e-05, + "loss": 0.0378, + "step": 2908 + }, + { + "epoch": 2.72, + "learning_rate": 4.966044776119404e-05, + "loss": 0.049, + "step": 2912 + }, + { + "epoch": 2.72, + "learning_rate": 4.9659981343283585e-05, + "loss": 0.0445, + "step": 2916 + }, + { + "epoch": 2.72, + "learning_rate": 4.9659514925373133e-05, + "loss": 0.0205, + "step": 2920 + }, + { + "epoch": 2.73, + "learning_rate": 4.965904850746269e-05, + "loss": 0.046, + "step": 2924 + }, + { + "epoch": 2.73, + "learning_rate": 4.965858208955224e-05, + "loss": 0.0493, + "step": 2928 + }, + { + "epoch": 2.73, + "learning_rate": 4.965811567164179e-05, + "loss": 0.054, + "step": 2932 + }, + { + "epoch": 2.74, + "learning_rate": 4.9657649253731346e-05, + "loss": 0.0409, + "step": 2936 + }, + { + "epoch": 2.74, + "learning_rate": 4.96571828358209e-05, + "loss": 0.0396, + "step": 2940 + }, + { + "epoch": 2.75, + "learning_rate": 4.965671641791045e-05, + "loss": 0.0362, + "step": 2944 + }, + { + "epoch": 2.75, + "learning_rate": 4.9656250000000004e-05, + "loss": 0.0487, + "step": 2948 + }, + { + "epoch": 2.75, + "learning_rate": 4.965578358208955e-05, + "loss": 0.0336, + "step": 2952 + }, + { + "epoch": 2.76, + "learning_rate": 4.965531716417911e-05, + "loss": 0.041, + "step": 2956 + }, + { + "epoch": 2.76, + "learning_rate": 4.965485074626866e-05, + "loss": 0.0395, + "step": 2960 + }, + { + "epoch": 2.76, + "learning_rate": 4.965438432835821e-05, + "loss": 0.032, + "step": 2964 + }, + { + "epoch": 2.77, + "learning_rate": 4.965391791044776e-05, + "loss": 0.0231, + "step": 2968 + }, + { + "epoch": 2.77, + "learning_rate": 4.965345149253732e-05, + "loss": 0.0594, + "step": 2972 + }, + { + "epoch": 2.78, + "learning_rate": 4.965298507462687e-05, + "loss": 0.0319, + "step": 2976 + }, + { + "epoch": 2.78, + "learning_rate": 4.9652518656716416e-05, + "loss": 0.031, + "step": 2980 + }, + { + "epoch": 2.78, + "learning_rate": 4.965205223880597e-05, + "loss": 0.0608, + "step": 2984 + }, + { + "epoch": 2.79, + "learning_rate": 4.9651585820895526e-05, + "loss": 0.0273, + "step": 2988 + }, + { + "epoch": 2.79, + "learning_rate": 4.9651119402985074e-05, + "loss": 0.0461, + "step": 2992 + }, + { + "epoch": 2.79, + "learning_rate": 4.965065298507463e-05, + "loss": 0.0546, + "step": 2996 + }, + { + "epoch": 2.8, + "learning_rate": 4.9650186567164184e-05, + "loss": 0.0209, + "step": 3000 + }, + { + "epoch": 2.8, + "eval_exact_match": 0.6721470019342359, + "eval_exec": 0.7224371373307543, + "eval_loss": 0.16579200327396393, + "eval_runtime": 1120.3474, + "eval_samples_per_second": 0.923, + "step": 3000 + }, + { + "epoch": 2.8, + "learning_rate": 4.964972014925373e-05, + "loss": 0.0316, + "step": 3004 + }, + { + "epoch": 2.81, + "learning_rate": 4.964925373134329e-05, + "loss": 0.0333, + "step": 3008 + }, + { + "epoch": 2.81, + "learning_rate": 4.9648787313432835e-05, + "loss": 0.0722, + "step": 3012 + }, + { + "epoch": 2.81, + "learning_rate": 4.964832089552239e-05, + "loss": 0.0709, + "step": 3016 + }, + { + "epoch": 2.82, + "learning_rate": 4.9647854477611945e-05, + "loss": 0.0526, + "step": 3020 + }, + { + "epoch": 2.82, + "learning_rate": 4.964738805970149e-05, + "loss": 0.0346, + "step": 3024 + }, + { + "epoch": 2.82, + "learning_rate": 4.964692164179105e-05, + "loss": 0.0917, + "step": 3028 + }, + { + "epoch": 2.83, + "learning_rate": 4.96464552238806e-05, + "loss": 0.0397, + "step": 3032 + }, + { + "epoch": 2.83, + "learning_rate": 4.964598880597015e-05, + "loss": 0.0284, + "step": 3036 + }, + { + "epoch": 2.84, + "learning_rate": 4.9645522388059706e-05, + "loss": 0.0392, + "step": 3040 + }, + { + "epoch": 2.84, + "learning_rate": 4.9645055970149254e-05, + "loss": 0.0406, + "step": 3044 + }, + { + "epoch": 2.84, + "learning_rate": 4.964458955223881e-05, + "loss": 0.0423, + "step": 3048 + }, + { + "epoch": 2.85, + "learning_rate": 4.9644123134328364e-05, + "loss": 0.0459, + "step": 3052 + }, + { + "epoch": 2.85, + "learning_rate": 4.964365671641791e-05, + "loss": 0.0497, + "step": 3056 + }, + { + "epoch": 2.85, + "learning_rate": 4.964319029850747e-05, + "loss": 0.0248, + "step": 3060 + }, + { + "epoch": 2.86, + "learning_rate": 4.964272388059702e-05, + "loss": 0.0581, + "step": 3064 + }, + { + "epoch": 2.86, + "learning_rate": 4.964225746268657e-05, + "loss": 0.0254, + "step": 3068 + }, + { + "epoch": 2.87, + "learning_rate": 4.964179104477612e-05, + "loss": 0.029, + "step": 3072 + }, + { + "epoch": 2.87, + "learning_rate": 4.964132462686567e-05, + "loss": 0.0354, + "step": 3076 + }, + { + "epoch": 2.87, + "learning_rate": 4.964085820895523e-05, + "loss": 0.0398, + "step": 3080 + }, + { + "epoch": 2.88, + "learning_rate": 4.9640391791044776e-05, + "loss": 0.0325, + "step": 3084 + }, + { + "epoch": 2.88, + "learning_rate": 4.963992537313433e-05, + "loss": 0.0627, + "step": 3088 + }, + { + "epoch": 2.88, + "learning_rate": 4.9639458955223886e-05, + "loss": 0.0465, + "step": 3092 + }, + { + "epoch": 2.89, + "learning_rate": 4.9638992537313434e-05, + "loss": 0.0338, + "step": 3096 + }, + { + "epoch": 2.89, + "learning_rate": 4.963852611940299e-05, + "loss": 0.0313, + "step": 3100 + }, + { + "epoch": 2.9, + "learning_rate": 4.963805970149254e-05, + "loss": 0.0247, + "step": 3104 + }, + { + "epoch": 2.9, + "learning_rate": 4.963759328358209e-05, + "loss": 0.029, + "step": 3108 + }, + { + "epoch": 2.9, + "learning_rate": 4.963712686567165e-05, + "loss": 0.0319, + "step": 3112 + }, + { + "epoch": 2.91, + "learning_rate": 4.9636660447761195e-05, + "loss": 0.0358, + "step": 3116 + }, + { + "epoch": 2.91, + "learning_rate": 4.963619402985074e-05, + "loss": 0.0499, + "step": 3120 + }, + { + "epoch": 2.91, + "learning_rate": 4.9635727611940305e-05, + "loss": 0.0388, + "step": 3124 + }, + { + "epoch": 2.92, + "learning_rate": 4.963526119402985e-05, + "loss": 0.0304, + "step": 3128 + }, + { + "epoch": 2.92, + "learning_rate": 4.96347947761194e-05, + "loss": 0.02, + "step": 3132 + }, + { + "epoch": 2.93, + "learning_rate": 4.9634328358208956e-05, + "loss": 0.0344, + "step": 3136 + }, + { + "epoch": 2.93, + "learning_rate": 4.963386194029851e-05, + "loss": 0.0321, + "step": 3140 + }, + { + "epoch": 2.93, + "learning_rate": 4.963339552238806e-05, + "loss": 0.0496, + "step": 3144 + }, + { + "epoch": 2.94, + "learning_rate": 4.9632929104477614e-05, + "loss": 0.0624, + "step": 3148 + }, + { + "epoch": 2.94, + "learning_rate": 4.963246268656717e-05, + "loss": 0.039, + "step": 3152 + }, + { + "epoch": 2.94, + "learning_rate": 4.963199626865672e-05, + "loss": 0.0444, + "step": 3156 + }, + { + "epoch": 2.95, + "learning_rate": 4.963152985074627e-05, + "loss": 0.0342, + "step": 3160 + }, + { + "epoch": 2.95, + "learning_rate": 4.963106343283582e-05, + "loss": 0.0329, + "step": 3164 + }, + { + "epoch": 2.96, + "learning_rate": 4.9630597014925375e-05, + "loss": 0.0226, + "step": 3168 + }, + { + "epoch": 2.96, + "learning_rate": 4.963013059701493e-05, + "loss": 0.0301, + "step": 3172 + }, + { + "epoch": 2.96, + "learning_rate": 4.962966417910448e-05, + "loss": 0.019, + "step": 3176 + }, + { + "epoch": 2.97, + "learning_rate": 4.9629197761194026e-05, + "loss": 0.0331, + "step": 3180 + }, + { + "epoch": 2.97, + "learning_rate": 4.962873134328359e-05, + "loss": 0.0497, + "step": 3184 + }, + { + "epoch": 2.97, + "learning_rate": 4.9628264925373136e-05, + "loss": 0.0405, + "step": 3188 + }, + { + "epoch": 2.98, + "learning_rate": 4.962779850746269e-05, + "loss": 0.0312, + "step": 3192 + }, + { + "epoch": 2.98, + "learning_rate": 4.962733208955224e-05, + "loss": 0.0289, + "step": 3196 + }, + { + "epoch": 2.98, + "learning_rate": 4.9626865671641794e-05, + "loss": 0.0514, + "step": 3200 + }, + { + "epoch": 2.99, + "learning_rate": 4.962639925373135e-05, + "loss": 0.0698, + "step": 3204 + }, + { + "epoch": 2.99, + "learning_rate": 4.96259328358209e-05, + "loss": 0.0424, + "step": 3208 + }, + { + "epoch": 3.0, + "learning_rate": 4.962546641791045e-05, + "loss": 0.0357, + "step": 3212 + }, + { + "epoch": 3.0, + "learning_rate": 4.962500000000001e-05, + "loss": 0.0193, + "step": 3216 + }, + { + "epoch": 3.0, + "learning_rate": 4.9624533582089555e-05, + "loss": 0.0471, + "step": 3220 + }, + { + "epoch": 3.01, + "learning_rate": 4.96240671641791e-05, + "loss": 0.0428, + "step": 3224 + }, + { + "epoch": 3.01, + "learning_rate": 4.9623600746268665e-05, + "loss": 0.0268, + "step": 3228 + }, + { + "epoch": 3.01, + "learning_rate": 4.962313432835821e-05, + "loss": 0.0359, + "step": 3232 + }, + { + "epoch": 3.02, + "learning_rate": 4.962266791044776e-05, + "loss": 0.0468, + "step": 3236 + }, + { + "epoch": 3.02, + "learning_rate": 4.9622201492537316e-05, + "loss": 0.0176, + "step": 3240 + }, + { + "epoch": 3.03, + "learning_rate": 4.962173507462687e-05, + "loss": 0.0279, + "step": 3244 + }, + { + "epoch": 3.03, + "learning_rate": 4.962126865671642e-05, + "loss": 0.0193, + "step": 3248 + }, + { + "epoch": 3.03, + "learning_rate": 4.9620802238805974e-05, + "loss": 0.0468, + "step": 3252 + }, + { + "epoch": 3.04, + "learning_rate": 4.962033582089552e-05, + "loss": 0.0284, + "step": 3256 + }, + { + "epoch": 3.04, + "learning_rate": 4.961986940298508e-05, + "loss": 0.0166, + "step": 3260 + }, + { + "epoch": 3.04, + "learning_rate": 4.961940298507463e-05, + "loss": 0.0197, + "step": 3264 + }, + { + "epoch": 3.05, + "learning_rate": 4.961893656716418e-05, + "loss": 0.0305, + "step": 3268 + }, + { + "epoch": 3.05, + "learning_rate": 4.9618470149253735e-05, + "loss": 0.026, + "step": 3272 + }, + { + "epoch": 3.06, + "learning_rate": 4.961800373134329e-05, + "loss": 0.0427, + "step": 3276 + }, + { + "epoch": 3.06, + "learning_rate": 4.961753731343284e-05, + "loss": 0.0206, + "step": 3280 + }, + { + "epoch": 3.06, + "learning_rate": 4.9617070895522386e-05, + "loss": 0.017, + "step": 3284 + }, + { + "epoch": 3.07, + "learning_rate": 4.961660447761194e-05, + "loss": 0.0299, + "step": 3288 + }, + { + "epoch": 3.07, + "learning_rate": 4.9616138059701496e-05, + "loss": 0.0169, + "step": 3292 + }, + { + "epoch": 3.07, + "learning_rate": 4.9615671641791044e-05, + "loss": 0.0345, + "step": 3296 + }, + { + "epoch": 3.08, + "learning_rate": 4.96152052238806e-05, + "loss": 0.0367, + "step": 3300 + }, + { + "epoch": 3.08, + "learning_rate": 4.9614738805970154e-05, + "loss": 0.0288, + "step": 3304 + }, + { + "epoch": 3.09, + "learning_rate": 4.96142723880597e-05, + "loss": 0.0272, + "step": 3308 + }, + { + "epoch": 3.09, + "learning_rate": 4.961380597014926e-05, + "loss": 0.0117, + "step": 3312 + }, + { + "epoch": 3.09, + "learning_rate": 4.9613339552238805e-05, + "loss": 0.0219, + "step": 3316 + }, + { + "epoch": 3.1, + "learning_rate": 4.961287313432836e-05, + "loss": 0.0071, + "step": 3320 + }, + { + "epoch": 3.1, + "learning_rate": 4.9612406716417915e-05, + "loss": 0.0183, + "step": 3324 + }, + { + "epoch": 3.1, + "learning_rate": 4.961194029850746e-05, + "loss": 0.0314, + "step": 3328 + }, + { + "epoch": 3.11, + "learning_rate": 4.961147388059702e-05, + "loss": 0.0123, + "step": 3332 + }, + { + "epoch": 3.11, + "learning_rate": 4.961100746268657e-05, + "loss": 0.0308, + "step": 3336 + }, + { + "epoch": 3.12, + "learning_rate": 4.961054104477612e-05, + "loss": 0.0281, + "step": 3340 + }, + { + "epoch": 3.12, + "learning_rate": 4.961007462686567e-05, + "loss": 0.0215, + "step": 3344 + }, + { + "epoch": 3.12, + "learning_rate": 4.9609608208955224e-05, + "loss": 0.017, + "step": 3348 + }, + { + "epoch": 3.13, + "learning_rate": 4.960914179104478e-05, + "loss": 0.0152, + "step": 3352 + }, + { + "epoch": 3.13, + "learning_rate": 4.9608675373134334e-05, + "loss": 0.044, + "step": 3356 + }, + { + "epoch": 3.13, + "learning_rate": 4.960820895522388e-05, + "loss": 0.0241, + "step": 3360 + }, + { + "epoch": 3.14, + "learning_rate": 4.960774253731344e-05, + "loss": 0.0159, + "step": 3364 + }, + { + "epoch": 3.14, + "learning_rate": 4.960727611940299e-05, + "loss": 0.0282, + "step": 3368 + }, + { + "epoch": 3.15, + "learning_rate": 4.960680970149254e-05, + "loss": 0.0336, + "step": 3372 + }, + { + "epoch": 3.15, + "learning_rate": 4.960634328358209e-05, + "loss": 0.038, + "step": 3376 + }, + { + "epoch": 3.15, + "learning_rate": 4.960587686567165e-05, + "loss": 0.0369, + "step": 3380 + }, + { + "epoch": 3.16, + "learning_rate": 4.96054104477612e-05, + "loss": 0.0246, + "step": 3384 + }, + { + "epoch": 3.16, + "learning_rate": 4.9604944029850746e-05, + "loss": 0.0163, + "step": 3388 + }, + { + "epoch": 3.16, + "learning_rate": 4.96044776119403e-05, + "loss": 0.0241, + "step": 3392 + }, + { + "epoch": 3.17, + "learning_rate": 4.9604011194029856e-05, + "loss": 0.0232, + "step": 3396 + }, + { + "epoch": 3.17, + "learning_rate": 4.9603544776119404e-05, + "loss": 0.0355, + "step": 3400 + }, + { + "epoch": 3.18, + "learning_rate": 4.960307835820896e-05, + "loss": 0.0187, + "step": 3404 + }, + { + "epoch": 3.18, + "learning_rate": 4.960261194029851e-05, + "loss": 0.0208, + "step": 3408 + }, + { + "epoch": 3.18, + "learning_rate": 4.960214552238806e-05, + "loss": 0.0233, + "step": 3412 + }, + { + "epoch": 3.19, + "learning_rate": 4.960167910447762e-05, + "loss": 0.0418, + "step": 3416 + }, + { + "epoch": 3.19, + "learning_rate": 4.9601212686567165e-05, + "loss": 0.0465, + "step": 3420 + }, + { + "epoch": 3.19, + "learning_rate": 4.960074626865672e-05, + "loss": 0.028, + "step": 3424 + }, + { + "epoch": 3.2, + "learning_rate": 4.9600279850746275e-05, + "loss": 0.0257, + "step": 3428 + }, + { + "epoch": 3.2, + "learning_rate": 4.959981343283582e-05, + "loss": 0.0357, + "step": 3432 + }, + { + "epoch": 3.21, + "learning_rate": 4.959934701492537e-05, + "loss": 0.017, + "step": 3436 + }, + { + "epoch": 3.21, + "learning_rate": 4.959888059701493e-05, + "loss": 0.0296, + "step": 3440 + }, + { + "epoch": 3.21, + "learning_rate": 4.959841417910448e-05, + "loss": 0.0336, + "step": 3444 + }, + { + "epoch": 3.22, + "learning_rate": 4.959794776119403e-05, + "loss": 0.0194, + "step": 3448 + }, + { + "epoch": 3.22, + "learning_rate": 4.9597481343283584e-05, + "loss": 0.0231, + "step": 3452 + }, + { + "epoch": 3.22, + "learning_rate": 4.959701492537314e-05, + "loss": 0.0186, + "step": 3456 + }, + { + "epoch": 3.23, + "learning_rate": 4.959654850746269e-05, + "loss": 0.0477, + "step": 3460 + }, + { + "epoch": 3.23, + "learning_rate": 4.959608208955224e-05, + "loss": 0.041, + "step": 3464 + }, + { + "epoch": 3.24, + "learning_rate": 4.959561567164179e-05, + "loss": 0.0304, + "step": 3468 + }, + { + "epoch": 3.24, + "learning_rate": 4.9595149253731345e-05, + "loss": 0.0318, + "step": 3472 + }, + { + "epoch": 3.24, + "learning_rate": 4.95946828358209e-05, + "loss": 0.0789, + "step": 3476 + }, + { + "epoch": 3.25, + "learning_rate": 4.959421641791045e-05, + "loss": 0.0251, + "step": 3480 + }, + { + "epoch": 3.25, + "learning_rate": 4.959375e-05, + "loss": 0.0254, + "step": 3484 + }, + { + "epoch": 3.25, + "learning_rate": 4.959328358208956e-05, + "loss": 0.0178, + "step": 3488 + }, + { + "epoch": 3.26, + "learning_rate": 4.9592817164179106e-05, + "loss": 0.03, + "step": 3492 + }, + { + "epoch": 3.26, + "learning_rate": 4.9592350746268654e-05, + "loss": 0.0202, + "step": 3496 + }, + { + "epoch": 3.26, + "learning_rate": 4.9591884328358216e-05, + "loss": 0.0243, + "step": 3500 + }, + { + "epoch": 3.26, + "eval_exact_match": 0.7108317214700194, + "eval_exec": 0.7543520309477756, + "eval_loss": 0.17954690754413605, + "eval_runtime": 1070.1188, + "eval_samples_per_second": 0.966, + "step": 3500 + }, + { + "epoch": 3.27, + "learning_rate": 4.9591417910447764e-05, + "loss": 0.0259, + "step": 3504 + }, + { + "epoch": 3.27, + "learning_rate": 4.959095149253731e-05, + "loss": 0.0117, + "step": 3508 + }, + { + "epoch": 3.28, + "learning_rate": 4.959048507462687e-05, + "loss": 0.0275, + "step": 3512 + }, + { + "epoch": 3.28, + "learning_rate": 4.959001865671642e-05, + "loss": 0.0219, + "step": 3516 + }, + { + "epoch": 3.28, + "learning_rate": 4.9589552238805977e-05, + "loss": 0.0259, + "step": 3520 + }, + { + "epoch": 3.29, + "learning_rate": 4.9589085820895525e-05, + "loss": 0.0362, + "step": 3524 + }, + { + "epoch": 3.29, + "learning_rate": 4.958861940298507e-05, + "loss": 0.0362, + "step": 3528 + }, + { + "epoch": 3.29, + "learning_rate": 4.9588152985074634e-05, + "loss": 0.0363, + "step": 3532 + }, + { + "epoch": 3.3, + "learning_rate": 4.958768656716418e-05, + "loss": 0.0603, + "step": 3536 + }, + { + "epoch": 3.3, + "learning_rate": 4.958722014925373e-05, + "loss": 0.0303, + "step": 3540 + }, + { + "epoch": 3.31, + "learning_rate": 4.9586753731343286e-05, + "loss": 0.0152, + "step": 3544 + }, + { + "epoch": 3.31, + "learning_rate": 4.958628731343284e-05, + "loss": 0.0177, + "step": 3548 + }, + { + "epoch": 3.31, + "learning_rate": 4.958582089552239e-05, + "loss": 0.03, + "step": 3552 + }, + { + "epoch": 3.32, + "learning_rate": 4.9585354477611944e-05, + "loss": 0.0267, + "step": 3556 + }, + { + "epoch": 3.32, + "learning_rate": 4.95848880597015e-05, + "loss": 0.016, + "step": 3560 + }, + { + "epoch": 3.32, + "learning_rate": 4.958442164179105e-05, + "loss": 0.0345, + "step": 3564 + }, + { + "epoch": 3.33, + "learning_rate": 4.95839552238806e-05, + "loss": 0.0247, + "step": 3568 + }, + { + "epoch": 3.33, + "learning_rate": 4.958348880597015e-05, + "loss": 0.0342, + "step": 3572 + }, + { + "epoch": 3.34, + "learning_rate": 4.9583022388059705e-05, + "loss": 0.0234, + "step": 3576 + }, + { + "epoch": 3.34, + "learning_rate": 4.958255597014926e-05, + "loss": 0.034, + "step": 3580 + }, + { + "epoch": 3.34, + "learning_rate": 4.958208955223881e-05, + "loss": 0.0257, + "step": 3584 + }, + { + "epoch": 3.35, + "learning_rate": 4.9581623134328356e-05, + "loss": 0.0194, + "step": 3588 + }, + { + "epoch": 3.35, + "learning_rate": 4.958115671641792e-05, + "loss": 0.0289, + "step": 3592 + }, + { + "epoch": 3.35, + "learning_rate": 4.9580690298507466e-05, + "loss": 0.0336, + "step": 3596 + }, + { + "epoch": 3.36, + "learning_rate": 4.9580223880597014e-05, + "loss": 0.0295, + "step": 3600 + }, + { + "epoch": 3.36, + "learning_rate": 4.957975746268657e-05, + "loss": 0.0334, + "step": 3604 + }, + { + "epoch": 3.37, + "learning_rate": 4.9579291044776124e-05, + "loss": 0.0198, + "step": 3608 + }, + { + "epoch": 3.37, + "learning_rate": 4.957882462686567e-05, + "loss": 0.0195, + "step": 3612 + }, + { + "epoch": 3.37, + "learning_rate": 4.9578358208955227e-05, + "loss": 0.0281, + "step": 3616 + }, + { + "epoch": 3.38, + "learning_rate": 4.957789179104478e-05, + "loss": 0.0192, + "step": 3620 + }, + { + "epoch": 3.38, + "learning_rate": 4.957742537313433e-05, + "loss": 0.0361, + "step": 3624 + }, + { + "epoch": 3.38, + "learning_rate": 4.9576958955223884e-05, + "loss": 0.0094, + "step": 3628 + }, + { + "epoch": 3.39, + "learning_rate": 4.957649253731343e-05, + "loss": 0.0212, + "step": 3632 + }, + { + "epoch": 3.39, + "learning_rate": 4.957602611940299e-05, + "loss": 0.0237, + "step": 3636 + }, + { + "epoch": 3.4, + "learning_rate": 4.957555970149254e-05, + "loss": 0.0295, + "step": 3640 + }, + { + "epoch": 3.4, + "learning_rate": 4.957509328358209e-05, + "loss": 0.0373, + "step": 3644 + }, + { + "epoch": 3.4, + "learning_rate": 4.957462686567164e-05, + "loss": 0.0255, + "step": 3648 + }, + { + "epoch": 3.41, + "learning_rate": 4.95741604477612e-05, + "loss": 0.0313, + "step": 3652 + }, + { + "epoch": 3.41, + "learning_rate": 4.957369402985075e-05, + "loss": 0.0548, + "step": 3656 + }, + { + "epoch": 3.41, + "learning_rate": 4.95732276119403e-05, + "loss": 0.0417, + "step": 3660 + }, + { + "epoch": 3.42, + "learning_rate": 4.957276119402985e-05, + "loss": 0.0232, + "step": 3664 + }, + { + "epoch": 3.42, + "learning_rate": 4.9572294776119406e-05, + "loss": 0.0203, + "step": 3668 + }, + { + "epoch": 3.43, + "learning_rate": 4.9571828358208955e-05, + "loss": 0.0422, + "step": 3672 + }, + { + "epoch": 3.43, + "learning_rate": 4.957136194029851e-05, + "loss": 0.0225, + "step": 3676 + }, + { + "epoch": 3.43, + "learning_rate": 4.9570895522388064e-05, + "loss": 0.0162, + "step": 3680 + }, + { + "epoch": 3.44, + "learning_rate": 4.957042910447762e-05, + "loss": 0.0359, + "step": 3684 + }, + { + "epoch": 3.44, + "learning_rate": 4.956996268656717e-05, + "loss": 0.0186, + "step": 3688 + }, + { + "epoch": 3.44, + "learning_rate": 4.9569496268656716e-05, + "loss": 0.0165, + "step": 3692 + }, + { + "epoch": 3.45, + "learning_rate": 4.956902985074627e-05, + "loss": 0.0148, + "step": 3696 + }, + { + "epoch": 3.45, + "learning_rate": 4.9568563432835825e-05, + "loss": 0.026, + "step": 3700 + }, + { + "epoch": 3.46, + "learning_rate": 4.9568097014925374e-05, + "loss": 0.0157, + "step": 3704 + }, + { + "epoch": 3.46, + "learning_rate": 4.956763059701493e-05, + "loss": 0.0465, + "step": 3708 + }, + { + "epoch": 3.46, + "learning_rate": 4.956716417910448e-05, + "loss": 0.0174, + "step": 3712 + }, + { + "epoch": 3.47, + "learning_rate": 4.956669776119403e-05, + "loss": 0.0233, + "step": 3716 + }, + { + "epoch": 3.47, + "learning_rate": 4.9566231343283586e-05, + "loss": 0.0347, + "step": 3720 + }, + { + "epoch": 3.47, + "learning_rate": 4.9565764925373135e-05, + "loss": 0.0201, + "step": 3724 + }, + { + "epoch": 3.48, + "learning_rate": 4.956529850746269e-05, + "loss": 0.0182, + "step": 3728 + }, + { + "epoch": 3.48, + "learning_rate": 4.9564832089552244e-05, + "loss": 0.0306, + "step": 3732 + }, + { + "epoch": 3.49, + "learning_rate": 4.956436567164179e-05, + "loss": 0.0205, + "step": 3736 + }, + { + "epoch": 3.49, + "learning_rate": 4.956389925373135e-05, + "loss": 0.0255, + "step": 3740 + }, + { + "epoch": 3.49, + "learning_rate": 4.95634328358209e-05, + "loss": 0.0309, + "step": 3744 + }, + { + "epoch": 3.5, + "learning_rate": 4.956296641791045e-05, + "loss": 0.018, + "step": 3748 + }, + { + "epoch": 3.5, + "learning_rate": 4.95625e-05, + "loss": 0.0175, + "step": 3752 + }, + { + "epoch": 3.5, + "learning_rate": 4.9562033582089553e-05, + "loss": 0.0334, + "step": 3756 + }, + { + "epoch": 3.51, + "learning_rate": 4.956156716417911e-05, + "loss": 0.0213, + "step": 3760 + }, + { + "epoch": 3.51, + "learning_rate": 4.9561100746268656e-05, + "loss": 0.0153, + "step": 3764 + }, + { + "epoch": 3.51, + "learning_rate": 4.956063432835821e-05, + "loss": 0.0223, + "step": 3768 + }, + { + "epoch": 3.52, + "learning_rate": 4.9560167910447766e-05, + "loss": 0.0178, + "step": 3772 + }, + { + "epoch": 3.52, + "learning_rate": 4.9559701492537314e-05, + "loss": 0.0278, + "step": 3776 + }, + { + "epoch": 3.53, + "learning_rate": 4.955923507462687e-05, + "loss": 0.0264, + "step": 3780 + }, + { + "epoch": 3.53, + "learning_rate": 4.955876865671642e-05, + "loss": 0.0204, + "step": 3784 + }, + { + "epoch": 3.53, + "learning_rate": 4.955830223880597e-05, + "loss": 0.0252, + "step": 3788 + }, + { + "epoch": 3.54, + "learning_rate": 4.955783582089553e-05, + "loss": 0.044, + "step": 3792 + }, + { + "epoch": 3.54, + "learning_rate": 4.9557369402985075e-05, + "loss": 0.0266, + "step": 3796 + }, + { + "epoch": 3.54, + "learning_rate": 4.9556902985074624e-05, + "loss": 0.0191, + "step": 3800 + }, + { + "epoch": 3.55, + "learning_rate": 4.9556436567164185e-05, + "loss": 0.0422, + "step": 3804 + }, + { + "epoch": 3.55, + "learning_rate": 4.955597014925373e-05, + "loss": 0.0284, + "step": 3808 + }, + { + "epoch": 3.56, + "learning_rate": 4.955550373134328e-05, + "loss": 0.0344, + "step": 3812 + }, + { + "epoch": 3.56, + "learning_rate": 4.9555037313432836e-05, + "loss": 0.0295, + "step": 3816 + }, + { + "epoch": 3.56, + "learning_rate": 4.955457089552239e-05, + "loss": 0.0331, + "step": 3820 + }, + { + "epoch": 3.57, + "learning_rate": 4.955410447761194e-05, + "loss": 0.0436, + "step": 3824 + }, + { + "epoch": 3.57, + "learning_rate": 4.9553638059701494e-05, + "loss": 0.0292, + "step": 3828 + }, + { + "epoch": 3.57, + "learning_rate": 4.955317164179105e-05, + "loss": 0.0356, + "step": 3832 + }, + { + "epoch": 3.58, + "learning_rate": 4.95527052238806e-05, + "loss": 0.0315, + "step": 3836 + }, + { + "epoch": 3.58, + "learning_rate": 4.955223880597015e-05, + "loss": 0.0295, + "step": 3840 + }, + { + "epoch": 3.59, + "learning_rate": 4.95517723880597e-05, + "loss": 0.0413, + "step": 3844 + }, + { + "epoch": 3.59, + "learning_rate": 4.955130597014926e-05, + "loss": 0.0287, + "step": 3848 + }, + { + "epoch": 3.59, + "learning_rate": 4.955083955223881e-05, + "loss": 0.0223, + "step": 3852 + }, + { + "epoch": 3.6, + "learning_rate": 4.955037313432836e-05, + "loss": 0.0355, + "step": 3856 + }, + { + "epoch": 3.6, + "learning_rate": 4.954990671641791e-05, + "loss": 0.0244, + "step": 3860 + }, + { + "epoch": 3.6, + "learning_rate": 4.954944029850747e-05, + "loss": 0.0149, + "step": 3864 + }, + { + "epoch": 3.61, + "learning_rate": 4.9548973880597016e-05, + "loss": 0.0225, + "step": 3868 + }, + { + "epoch": 3.61, + "learning_rate": 4.954850746268657e-05, + "loss": 0.0261, + "step": 3872 + }, + { + "epoch": 3.62, + "learning_rate": 4.954804104477612e-05, + "loss": 0.0264, + "step": 3876 + }, + { + "epoch": 3.62, + "learning_rate": 4.9547574626865674e-05, + "loss": 0.0282, + "step": 3880 + }, + { + "epoch": 3.62, + "learning_rate": 4.954710820895523e-05, + "loss": 0.0511, + "step": 3884 + }, + { + "epoch": 3.63, + "learning_rate": 4.954664179104478e-05, + "loss": 0.0487, + "step": 3888 + }, + { + "epoch": 3.63, + "learning_rate": 4.954617537313433e-05, + "loss": 0.0169, + "step": 3892 + }, + { + "epoch": 3.63, + "learning_rate": 4.954570895522389e-05, + "loss": 0.0243, + "step": 3896 + }, + { + "epoch": 3.64, + "learning_rate": 4.9545242537313435e-05, + "loss": 0.0247, + "step": 3900 + }, + { + "epoch": 3.64, + "learning_rate": 4.954477611940298e-05, + "loss": 0.0258, + "step": 3904 + }, + { + "epoch": 3.65, + "learning_rate": 4.9544309701492545e-05, + "loss": 0.0328, + "step": 3908 + }, + { + "epoch": 3.65, + "learning_rate": 4.954384328358209e-05, + "loss": 0.0328, + "step": 3912 + }, + { + "epoch": 3.65, + "learning_rate": 4.954337686567164e-05, + "loss": 0.0265, + "step": 3916 + }, + { + "epoch": 3.66, + "learning_rate": 4.9542910447761196e-05, + "loss": 0.0349, + "step": 3920 + }, + { + "epoch": 3.66, + "learning_rate": 4.954244402985075e-05, + "loss": 0.0307, + "step": 3924 + }, + { + "epoch": 3.66, + "learning_rate": 4.95419776119403e-05, + "loss": 0.0256, + "step": 3928 + }, + { + "epoch": 3.67, + "learning_rate": 4.9541511194029854e-05, + "loss": 0.0246, + "step": 3932 + }, + { + "epoch": 3.67, + "learning_rate": 4.95410447761194e-05, + "loss": 0.0212, + "step": 3936 + }, + { + "epoch": 3.68, + "learning_rate": 4.954057835820896e-05, + "loss": 0.0548, + "step": 3940 + }, + { + "epoch": 3.68, + "learning_rate": 4.954011194029851e-05, + "loss": 0.05, + "step": 3944 + }, + { + "epoch": 3.68, + "learning_rate": 4.953964552238806e-05, + "loss": 0.036, + "step": 3948 + }, + { + "epoch": 3.69, + "learning_rate": 4.9539179104477615e-05, + "loss": 0.0122, + "step": 3952 + }, + { + "epoch": 3.69, + "learning_rate": 4.953871268656717e-05, + "loss": 0.0352, + "step": 3956 + }, + { + "epoch": 3.69, + "learning_rate": 4.953824626865672e-05, + "loss": 0.0316, + "step": 3960 + }, + { + "epoch": 3.7, + "learning_rate": 4.9537779850746266e-05, + "loss": 0.0243, + "step": 3964 + }, + { + "epoch": 3.7, + "learning_rate": 4.953731343283582e-05, + "loss": 0.0224, + "step": 3968 + }, + { + "epoch": 3.71, + "learning_rate": 4.9536847014925376e-05, + "loss": 0.0249, + "step": 3972 + }, + { + "epoch": 3.71, + "learning_rate": 4.9536380597014924e-05, + "loss": 0.0368, + "step": 3976 + }, + { + "epoch": 3.71, + "learning_rate": 4.953591417910448e-05, + "loss": 0.0412, + "step": 3980 + }, + { + "epoch": 3.72, + "learning_rate": 4.9535447761194034e-05, + "loss": 0.0248, + "step": 3984 + }, + { + "epoch": 3.72, + "learning_rate": 4.953498134328358e-05, + "loss": 0.0399, + "step": 3988 + }, + { + "epoch": 3.72, + "learning_rate": 4.953451492537314e-05, + "loss": 0.0156, + "step": 3992 + }, + { + "epoch": 3.73, + "learning_rate": 4.9534048507462685e-05, + "loss": 0.0109, + "step": 3996 + }, + { + "epoch": 3.73, + "learning_rate": 4.953358208955224e-05, + "loss": 0.0412, + "step": 4000 + }, + { + "epoch": 3.73, + "eval_exact_match": 0.7030947775628626, + "eval_exec": 0.7369439071566731, + "eval_loss": 0.189029723405838, + "eval_runtime": 1025.0796, + "eval_samples_per_second": 1.009, + "step": 4000 + }, + { + "epoch": 3.73, + "learning_rate": 4.9533115671641795e-05, + "loss": 0.0135, + "step": 4004 + }, + { + "epoch": 3.74, + "learning_rate": 4.953264925373134e-05, + "loss": 0.0238, + "step": 4008 + }, + { + "epoch": 3.74, + "learning_rate": 4.95321828358209e-05, + "loss": 0.0143, + "step": 4012 + }, + { + "epoch": 3.75, + "learning_rate": 4.953171641791045e-05, + "loss": 0.0691, + "step": 4016 + }, + { + "epoch": 3.75, + "learning_rate": 4.953125e-05, + "loss": 0.0458, + "step": 4020 + }, + { + "epoch": 3.75, + "learning_rate": 4.9530783582089556e-05, + "loss": 0.0638, + "step": 4024 + }, + { + "epoch": 3.76, + "learning_rate": 4.9530317164179104e-05, + "loss": 0.022, + "step": 4028 + }, + { + "epoch": 3.76, + "learning_rate": 4.952985074626866e-05, + "loss": 0.0205, + "step": 4032 + }, + { + "epoch": 3.76, + "learning_rate": 4.9529384328358214e-05, + "loss": 0.0268, + "step": 4036 + }, + { + "epoch": 3.77, + "learning_rate": 4.952891791044776e-05, + "loss": 0.0292, + "step": 4040 + }, + { + "epoch": 3.77, + "learning_rate": 4.952845149253732e-05, + "loss": 0.0362, + "step": 4044 + }, + { + "epoch": 3.78, + "learning_rate": 4.952798507462687e-05, + "loss": 0.0272, + "step": 4048 + }, + { + "epoch": 3.78, + "learning_rate": 4.952751865671642e-05, + "loss": 0.0357, + "step": 4052 + }, + { + "epoch": 3.78, + "learning_rate": 4.952705223880597e-05, + "loss": 0.0465, + "step": 4056 + }, + { + "epoch": 3.79, + "learning_rate": 4.952658582089553e-05, + "loss": 0.0329, + "step": 4060 + }, + { + "epoch": 3.79, + "learning_rate": 4.952611940298508e-05, + "loss": 0.0191, + "step": 4064 + }, + { + "epoch": 3.79, + "learning_rate": 4.9525652985074626e-05, + "loss": 0.0204, + "step": 4068 + }, + { + "epoch": 3.8, + "learning_rate": 4.952518656716418e-05, + "loss": 0.0362, + "step": 4072 + }, + { + "epoch": 3.8, + "learning_rate": 4.9524720149253736e-05, + "loss": 0.0175, + "step": 4076 + }, + { + "epoch": 3.81, + "learning_rate": 4.9524253731343284e-05, + "loss": 0.0609, + "step": 4080 + }, + { + "epoch": 3.81, + "learning_rate": 4.952378731343284e-05, + "loss": 0.0194, + "step": 4084 + }, + { + "epoch": 3.81, + "learning_rate": 4.952332089552239e-05, + "loss": 0.0331, + "step": 4088 + }, + { + "epoch": 3.82, + "learning_rate": 4.952285447761194e-05, + "loss": 0.0464, + "step": 4092 + }, + { + "epoch": 3.82, + "learning_rate": 4.95223880597015e-05, + "loss": 0.0154, + "step": 4096 + }, + { + "epoch": 3.82, + "learning_rate": 4.9521921641791045e-05, + "loss": 0.0402, + "step": 4100 + }, + { + "epoch": 3.83, + "learning_rate": 4.95214552238806e-05, + "loss": 0.0201, + "step": 4104 + }, + { + "epoch": 3.83, + "learning_rate": 4.9520988805970155e-05, + "loss": 0.0329, + "step": 4108 + }, + { + "epoch": 3.84, + "learning_rate": 4.95205223880597e-05, + "loss": 0.0357, + "step": 4112 + }, + { + "epoch": 3.84, + "learning_rate": 4.952005597014925e-05, + "loss": 0.0322, + "step": 4116 + }, + { + "epoch": 3.84, + "learning_rate": 4.951958955223881e-05, + "loss": 0.0286, + "step": 4120 + }, + { + "epoch": 3.85, + "learning_rate": 4.951912313432836e-05, + "loss": 0.0334, + "step": 4124 + }, + { + "epoch": 3.85, + "learning_rate": 4.951865671641791e-05, + "loss": 0.0324, + "step": 4128 + }, + { + "epoch": 3.85, + "learning_rate": 4.9518190298507464e-05, + "loss": 0.0235, + "step": 4132 + }, + { + "epoch": 3.86, + "learning_rate": 4.951772388059702e-05, + "loss": 0.0456, + "step": 4136 + }, + { + "epoch": 3.86, + "learning_rate": 4.951725746268657e-05, + "loss": 0.029, + "step": 4140 + }, + { + "epoch": 3.87, + "learning_rate": 4.951679104477612e-05, + "loss": 0.039, + "step": 4144 + }, + { + "epoch": 3.87, + "learning_rate": 4.951632462686567e-05, + "loss": 0.0576, + "step": 4148 + }, + { + "epoch": 3.87, + "learning_rate": 4.9515858208955225e-05, + "loss": 0.0086, + "step": 4152 + }, + { + "epoch": 3.88, + "learning_rate": 4.951539179104478e-05, + "loss": 0.0461, + "step": 4156 + }, + { + "epoch": 3.88, + "learning_rate": 4.951492537313433e-05, + "loss": 0.0401, + "step": 4160 + }, + { + "epoch": 3.88, + "learning_rate": 4.951445895522388e-05, + "loss": 0.0242, + "step": 4164 + }, + { + "epoch": 3.89, + "learning_rate": 4.951399253731344e-05, + "loss": 0.0193, + "step": 4168 + }, + { + "epoch": 3.89, + "learning_rate": 4.9513526119402986e-05, + "loss": 0.018, + "step": 4172 + }, + { + "epoch": 3.9, + "learning_rate": 4.951305970149254e-05, + "loss": 0.0306, + "step": 4176 + }, + { + "epoch": 3.9, + "learning_rate": 4.9512593283582096e-05, + "loss": 0.0333, + "step": 4180 + }, + { + "epoch": 3.9, + "learning_rate": 4.9512126865671644e-05, + "loss": 0.0168, + "step": 4184 + }, + { + "epoch": 3.91, + "learning_rate": 4.95116604477612e-05, + "loss": 0.0275, + "step": 4188 + }, + { + "epoch": 3.91, + "learning_rate": 4.951119402985075e-05, + "loss": 0.0228, + "step": 4192 + }, + { + "epoch": 3.91, + "learning_rate": 4.95107276119403e-05, + "loss": 0.0351, + "step": 4196 + }, + { + "epoch": 3.92, + "learning_rate": 4.951026119402986e-05, + "loss": 0.0217, + "step": 4200 + }, + { + "epoch": 3.92, + "learning_rate": 4.9509794776119405e-05, + "loss": 0.0157, + "step": 4204 + }, + { + "epoch": 3.93, + "learning_rate": 4.950932835820895e-05, + "loss": 0.031, + "step": 4208 + }, + { + "epoch": 3.93, + "learning_rate": 4.9508861940298515e-05, + "loss": 0.0352, + "step": 4212 + }, + { + "epoch": 3.93, + "learning_rate": 4.950839552238806e-05, + "loss": 0.0201, + "step": 4216 + }, + { + "epoch": 3.94, + "learning_rate": 4.950792910447761e-05, + "loss": 0.03, + "step": 4220 + }, + { + "epoch": 3.94, + "learning_rate": 4.9507462686567166e-05, + "loss": 0.0345, + "step": 4224 + }, + { + "epoch": 3.94, + "learning_rate": 4.950699626865672e-05, + "loss": 0.043, + "step": 4228 + }, + { + "epoch": 3.95, + "learning_rate": 4.950652985074627e-05, + "loss": 0.0389, + "step": 4232 + }, + { + "epoch": 3.95, + "learning_rate": 4.9506063432835824e-05, + "loss": 0.0203, + "step": 4236 + }, + { + "epoch": 3.96, + "learning_rate": 4.950559701492538e-05, + "loss": 0.0464, + "step": 4240 + }, + { + "epoch": 3.96, + "learning_rate": 4.950513059701493e-05, + "loss": 0.0171, + "step": 4244 + }, + { + "epoch": 3.96, + "learning_rate": 4.950466417910448e-05, + "loss": 0.0478, + "step": 4248 + }, + { + "epoch": 3.97, + "learning_rate": 4.950419776119403e-05, + "loss": 0.0148, + "step": 4252 + }, + { + "epoch": 3.97, + "learning_rate": 4.9503731343283585e-05, + "loss": 0.0182, + "step": 4256 + }, + { + "epoch": 3.97, + "learning_rate": 4.950326492537314e-05, + "loss": 0.0165, + "step": 4260 + }, + { + "epoch": 3.98, + "learning_rate": 4.950279850746269e-05, + "loss": 0.0097, + "step": 4264 + }, + { + "epoch": 3.98, + "learning_rate": 4.9502332089552236e-05, + "loss": 0.0343, + "step": 4268 + }, + { + "epoch": 3.98, + "learning_rate": 4.95018656716418e-05, + "loss": 0.0079, + "step": 4272 + }, + { + "epoch": 3.99, + "learning_rate": 4.9501399253731346e-05, + "loss": 0.0332, + "step": 4276 + }, + { + "epoch": 3.99, + "learning_rate": 4.9500932835820894e-05, + "loss": 0.0205, + "step": 4280 + }, + { + "epoch": 4.0, + "learning_rate": 4.950046641791045e-05, + "loss": 0.0333, + "step": 4284 + }, + { + "epoch": 4.0, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.0344, + "step": 4288 + }, + { + "epoch": 4.0, + "learning_rate": 4.949953358208955e-05, + "loss": 0.016, + "step": 4292 + }, + { + "epoch": 4.01, + "learning_rate": 4.949906716417911e-05, + "loss": 0.0103, + "step": 4296 + }, + { + "epoch": 4.01, + "learning_rate": 4.949860074626866e-05, + "loss": 0.0192, + "step": 4300 + }, + { + "epoch": 4.01, + "learning_rate": 4.949813432835821e-05, + "loss": 0.0157, + "step": 4304 + }, + { + "epoch": 4.02, + "learning_rate": 4.9497667910447765e-05, + "loss": 0.0178, + "step": 4308 + }, + { + "epoch": 4.02, + "learning_rate": 4.949720149253731e-05, + "loss": 0.0073, + "step": 4312 + }, + { + "epoch": 4.03, + "learning_rate": 4.949673507462687e-05, + "loss": 0.0299, + "step": 4316 + }, + { + "epoch": 4.03, + "learning_rate": 4.949626865671642e-05, + "loss": 0.0182, + "step": 4320 + }, + { + "epoch": 4.03, + "learning_rate": 4.949580223880597e-05, + "loss": 0.0246, + "step": 4324 + }, + { + "epoch": 4.04, + "learning_rate": 4.949533582089552e-05, + "loss": 0.0322, + "step": 4328 + }, + { + "epoch": 4.04, + "learning_rate": 4.949486940298508e-05, + "loss": 0.0391, + "step": 4332 + }, + { + "epoch": 4.04, + "learning_rate": 4.949440298507463e-05, + "loss": 0.0207, + "step": 4336 + }, + { + "epoch": 4.05, + "learning_rate": 4.9493936567164184e-05, + "loss": 0.0201, + "step": 4340 + }, + { + "epoch": 4.05, + "learning_rate": 4.949347014925373e-05, + "loss": 0.0194, + "step": 4344 + }, + { + "epoch": 4.06, + "learning_rate": 4.949300373134329e-05, + "loss": 0.0162, + "step": 4348 + }, + { + "epoch": 4.06, + "learning_rate": 4.949253731343284e-05, + "loss": 0.0393, + "step": 4352 + }, + { + "epoch": 4.06, + "learning_rate": 4.949207089552239e-05, + "loss": 0.0156, + "step": 4356 + }, + { + "epoch": 4.07, + "learning_rate": 4.9491604477611945e-05, + "loss": 0.0182, + "step": 4360 + }, + { + "epoch": 4.07, + "learning_rate": 4.94911380597015e-05, + "loss": 0.0119, + "step": 4364 + }, + { + "epoch": 4.07, + "learning_rate": 4.949067164179105e-05, + "loss": 0.0116, + "step": 4368 + }, + { + "epoch": 4.08, + "learning_rate": 4.9490205223880596e-05, + "loss": 0.0179, + "step": 4372 + }, + { + "epoch": 4.08, + "learning_rate": 4.948973880597015e-05, + "loss": 0.0153, + "step": 4376 + }, + { + "epoch": 4.09, + "learning_rate": 4.9489272388059706e-05, + "loss": 0.0362, + "step": 4380 + }, + { + "epoch": 4.09, + "learning_rate": 4.9488805970149254e-05, + "loss": 0.0082, + "step": 4384 + }, + { + "epoch": 4.09, + "learning_rate": 4.948833955223881e-05, + "loss": 0.0287, + "step": 4388 + }, + { + "epoch": 4.1, + "learning_rate": 4.9487873134328364e-05, + "loss": 0.0125, + "step": 4392 + }, + { + "epoch": 4.1, + "learning_rate": 4.948740671641791e-05, + "loss": 0.0094, + "step": 4396 + }, + { + "epoch": 4.1, + "learning_rate": 4.948694029850747e-05, + "loss": 0.0187, + "step": 4400 + }, + { + "epoch": 4.11, + "learning_rate": 4.9486473880597015e-05, + "loss": 0.0197, + "step": 4404 + }, + { + "epoch": 4.11, + "learning_rate": 4.948600746268657e-05, + "loss": 0.0104, + "step": 4408 + }, + { + "epoch": 4.12, + "learning_rate": 4.9485541044776125e-05, + "loss": 0.0424, + "step": 4412 + }, + { + "epoch": 4.12, + "learning_rate": 4.948507462686567e-05, + "loss": 0.0181, + "step": 4416 + }, + { + "epoch": 4.12, + "learning_rate": 4.948460820895523e-05, + "loss": 0.0187, + "step": 4420 + }, + { + "epoch": 4.13, + "learning_rate": 4.948414179104478e-05, + "loss": 0.0154, + "step": 4424 + }, + { + "epoch": 4.13, + "learning_rate": 4.948367537313433e-05, + "loss": 0.0218, + "step": 4428 + }, + { + "epoch": 4.13, + "learning_rate": 4.948320895522388e-05, + "loss": 0.0119, + "step": 4432 + }, + { + "epoch": 4.14, + "learning_rate": 4.9482742537313434e-05, + "loss": 0.0215, + "step": 4436 + }, + { + "epoch": 4.14, + "learning_rate": 4.948227611940299e-05, + "loss": 0.0112, + "step": 4440 + }, + { + "epoch": 4.15, + "learning_rate": 4.948180970149254e-05, + "loss": 0.0187, + "step": 4444 + }, + { + "epoch": 4.15, + "learning_rate": 4.948134328358209e-05, + "loss": 0.0083, + "step": 4448 + }, + { + "epoch": 4.15, + "learning_rate": 4.9480876865671647e-05, + "loss": 0.0153, + "step": 4452 + }, + { + "epoch": 4.16, + "learning_rate": 4.9480410447761195e-05, + "loss": 0.0342, + "step": 4456 + }, + { + "epoch": 4.16, + "learning_rate": 4.947994402985075e-05, + "loss": 0.0264, + "step": 4460 + }, + { + "epoch": 4.16, + "learning_rate": 4.94794776119403e-05, + "loss": 0.0149, + "step": 4464 + }, + { + "epoch": 4.17, + "learning_rate": 4.947901119402985e-05, + "loss": 0.0193, + "step": 4468 + }, + { + "epoch": 4.17, + "learning_rate": 4.947854477611941e-05, + "loss": 0.011, + "step": 4472 + }, + { + "epoch": 4.18, + "learning_rate": 4.9478078358208956e-05, + "loss": 0.0224, + "step": 4476 + }, + { + "epoch": 4.18, + "learning_rate": 4.9477611940298504e-05, + "loss": 0.0225, + "step": 4480 + }, + { + "epoch": 4.18, + "learning_rate": 4.9477145522388066e-05, + "loss": 0.0176, + "step": 4484 + }, + { + "epoch": 4.19, + "learning_rate": 4.9476679104477614e-05, + "loss": 0.0262, + "step": 4488 + }, + { + "epoch": 4.19, + "learning_rate": 4.947621268656716e-05, + "loss": 0.0165, + "step": 4492 + }, + { + "epoch": 4.19, + "learning_rate": 4.947574626865672e-05, + "loss": 0.0167, + "step": 4496 + }, + { + "epoch": 4.2, + "learning_rate": 4.947527985074627e-05, + "loss": 0.0262, + "step": 4500 + }, + { + "epoch": 4.2, + "eval_exact_match": 0.6992263056092843, + "eval_exec": 0.7437137330754352, + "eval_loss": 0.18752895295619965, + "eval_runtime": 1059.8867, + "eval_samples_per_second": 0.976, + "step": 4500 + }, + { + "epoch": 4.2, + "learning_rate": 4.9474813432835827e-05, + "loss": 0.0147, + "step": 4504 + }, + { + "epoch": 4.21, + "learning_rate": 4.9474347014925375e-05, + "loss": 0.0179, + "step": 4508 + }, + { + "epoch": 4.21, + "learning_rate": 4.947388059701493e-05, + "loss": 0.0218, + "step": 4512 + }, + { + "epoch": 4.21, + "learning_rate": 4.9473414179104484e-05, + "loss": 0.0346, + "step": 4516 + }, + { + "epoch": 4.22, + "learning_rate": 4.947294776119403e-05, + "loss": 0.0181, + "step": 4520 + }, + { + "epoch": 4.22, + "learning_rate": 4.947248134328358e-05, + "loss": 0.0138, + "step": 4524 + }, + { + "epoch": 4.22, + "learning_rate": 4.947201492537314e-05, + "loss": 0.0184, + "step": 4528 + }, + { + "epoch": 4.23, + "learning_rate": 4.947154850746269e-05, + "loss": 0.0319, + "step": 4532 + }, + { + "epoch": 4.23, + "learning_rate": 4.947108208955224e-05, + "loss": 0.0194, + "step": 4536 + }, + { + "epoch": 4.24, + "learning_rate": 4.9470615671641794e-05, + "loss": 0.0148, + "step": 4540 + }, + { + "epoch": 4.24, + "learning_rate": 4.947014925373135e-05, + "loss": 0.0233, + "step": 4544 + }, + { + "epoch": 4.24, + "learning_rate": 4.9469682835820897e-05, + "loss": 0.0273, + "step": 4548 + }, + { + "epoch": 4.25, + "learning_rate": 4.946921641791045e-05, + "loss": 0.0139, + "step": 4552 + }, + { + "epoch": 4.25, + "learning_rate": 4.946875e-05, + "loss": 0.0158, + "step": 4556 + }, + { + "epoch": 4.25, + "learning_rate": 4.9468283582089555e-05, + "loss": 0.0123, + "step": 4560 + }, + { + "epoch": 4.26, + "learning_rate": 4.946781716417911e-05, + "loss": 0.0205, + "step": 4564 + }, + { + "epoch": 4.26, + "learning_rate": 4.946735074626866e-05, + "loss": 0.0181, + "step": 4568 + }, + { + "epoch": 4.26, + "learning_rate": 4.946688432835821e-05, + "loss": 0.0365, + "step": 4572 + }, + { + "epoch": 4.27, + "learning_rate": 4.946641791044777e-05, + "loss": 0.0174, + "step": 4576 + }, + { + "epoch": 4.27, + "learning_rate": 4.9465951492537316e-05, + "loss": 0.0239, + "step": 4580 + }, + { + "epoch": 4.28, + "learning_rate": 4.9465485074626864e-05, + "loss": 0.0113, + "step": 4584 + }, + { + "epoch": 4.28, + "learning_rate": 4.9465018656716425e-05, + "loss": 0.0309, + "step": 4588 + }, + { + "epoch": 4.28, + "learning_rate": 4.9464552238805973e-05, + "loss": 0.0245, + "step": 4592 + }, + { + "epoch": 4.29, + "learning_rate": 4.946408582089552e-05, + "loss": 0.0252, + "step": 4596 + }, + { + "epoch": 4.29, + "learning_rate": 4.9463619402985077e-05, + "loss": 0.0233, + "step": 4600 + }, + { + "epoch": 4.29, + "learning_rate": 4.946315298507463e-05, + "loss": 0.0214, + "step": 4604 + }, + { + "epoch": 4.3, + "learning_rate": 4.946268656716418e-05, + "loss": 0.0286, + "step": 4608 + }, + { + "epoch": 4.3, + "learning_rate": 4.9462220149253734e-05, + "loss": 0.0199, + "step": 4612 + }, + { + "epoch": 4.31, + "learning_rate": 4.946175373134328e-05, + "loss": 0.0373, + "step": 4616 + }, + { + "epoch": 4.31, + "learning_rate": 4.946128731343284e-05, + "loss": 0.0142, + "step": 4620 + }, + { + "epoch": 4.31, + "learning_rate": 4.946082089552239e-05, + "loss": 0.0189, + "step": 4624 + }, + { + "epoch": 4.32, + "learning_rate": 4.946035447761194e-05, + "loss": 0.0208, + "step": 4628 + }, + { + "epoch": 4.32, + "learning_rate": 4.9459888059701495e-05, + "loss": 0.0197, + "step": 4632 + }, + { + "epoch": 4.32, + "learning_rate": 4.945942164179105e-05, + "loss": 0.0124, + "step": 4636 + }, + { + "epoch": 4.33, + "learning_rate": 4.94589552238806e-05, + "loss": 0.0308, + "step": 4640 + }, + { + "epoch": 4.33, + "learning_rate": 4.945848880597015e-05, + "loss": 0.0192, + "step": 4644 + }, + { + "epoch": 4.34, + "learning_rate": 4.94580223880597e-05, + "loss": 0.0239, + "step": 4648 + }, + { + "epoch": 4.34, + "learning_rate": 4.9457555970149256e-05, + "loss": 0.0087, + "step": 4652 + }, + { + "epoch": 4.34, + "learning_rate": 4.9457089552238805e-05, + "loss": 0.0256, + "step": 4656 + }, + { + "epoch": 4.35, + "learning_rate": 4.945662313432836e-05, + "loss": 0.0396, + "step": 4660 + }, + { + "epoch": 4.35, + "learning_rate": 4.9456156716417914e-05, + "loss": 0.0162, + "step": 4664 + }, + { + "epoch": 4.35, + "learning_rate": 4.945569029850747e-05, + "loss": 0.0233, + "step": 4668 + }, + { + "epoch": 4.36, + "learning_rate": 4.945522388059702e-05, + "loss": 0.0246, + "step": 4672 + }, + { + "epoch": 4.36, + "learning_rate": 4.9454757462686566e-05, + "loss": 0.0165, + "step": 4676 + }, + { + "epoch": 4.37, + "learning_rate": 4.945429104477613e-05, + "loss": 0.027, + "step": 4680 + }, + { + "epoch": 4.37, + "learning_rate": 4.9453824626865675e-05, + "loss": 0.0126, + "step": 4684 + }, + { + "epoch": 4.37, + "learning_rate": 4.9453358208955224e-05, + "loss": 0.0182, + "step": 4688 + }, + { + "epoch": 4.38, + "learning_rate": 4.945289179104478e-05, + "loss": 0.0226, + "step": 4692 + }, + { + "epoch": 4.38, + "learning_rate": 4.945242537313433e-05, + "loss": 0.0264, + "step": 4696 + }, + { + "epoch": 4.38, + "learning_rate": 4.945195895522388e-05, + "loss": 0.0378, + "step": 4700 + }, + { + "epoch": 4.39, + "learning_rate": 4.9451492537313436e-05, + "loss": 0.0161, + "step": 4704 + }, + { + "epoch": 4.39, + "learning_rate": 4.9451026119402984e-05, + "loss": 0.012, + "step": 4708 + }, + { + "epoch": 4.4, + "learning_rate": 4.945055970149254e-05, + "loss": 0.0132, + "step": 4712 + }, + { + "epoch": 4.4, + "learning_rate": 4.9450093283582094e-05, + "loss": 0.0185, + "step": 4716 + }, + { + "epoch": 4.4, + "learning_rate": 4.944962686567164e-05, + "loss": 0.0214, + "step": 4720 + }, + { + "epoch": 4.41, + "learning_rate": 4.94491604477612e-05, + "loss": 0.0057, + "step": 4724 + }, + { + "epoch": 4.41, + "learning_rate": 4.944869402985075e-05, + "loss": 0.0158, + "step": 4728 + }, + { + "epoch": 4.41, + "learning_rate": 4.94482276119403e-05, + "loss": 0.0063, + "step": 4732 + }, + { + "epoch": 4.42, + "learning_rate": 4.944776119402985e-05, + "loss": 0.0059, + "step": 4736 + }, + { + "epoch": 4.42, + "learning_rate": 4.944729477611941e-05, + "loss": 0.03, + "step": 4740 + }, + { + "epoch": 4.43, + "learning_rate": 4.944682835820896e-05, + "loss": 0.0185, + "step": 4744 + }, + { + "epoch": 4.43, + "learning_rate": 4.9446361940298506e-05, + "loss": 0.0241, + "step": 4748 + }, + { + "epoch": 4.43, + "learning_rate": 4.944589552238806e-05, + "loss": 0.0162, + "step": 4752 + }, + { + "epoch": 4.44, + "learning_rate": 4.9445429104477616e-05, + "loss": 0.0138, + "step": 4756 + }, + { + "epoch": 4.44, + "learning_rate": 4.9444962686567164e-05, + "loss": 0.0128, + "step": 4760 + }, + { + "epoch": 4.44, + "learning_rate": 4.944449626865672e-05, + "loss": 0.0107, + "step": 4764 + }, + { + "epoch": 4.45, + "learning_rate": 4.944402985074627e-05, + "loss": 0.0171, + "step": 4768 + }, + { + "epoch": 4.45, + "learning_rate": 4.944356343283582e-05, + "loss": 0.0238, + "step": 4772 + }, + { + "epoch": 4.46, + "learning_rate": 4.944309701492538e-05, + "loss": 0.0075, + "step": 4776 + }, + { + "epoch": 4.46, + "learning_rate": 4.9442630597014925e-05, + "loss": 0.0178, + "step": 4780 + }, + { + "epoch": 4.46, + "learning_rate": 4.944216417910448e-05, + "loss": 0.0115, + "step": 4784 + }, + { + "epoch": 4.47, + "learning_rate": 4.9441697761194035e-05, + "loss": 0.0189, + "step": 4788 + }, + { + "epoch": 4.47, + "learning_rate": 4.944123134328358e-05, + "loss": 0.026, + "step": 4792 + }, + { + "epoch": 4.47, + "learning_rate": 4.944076492537313e-05, + "loss": 0.0286, + "step": 4796 + }, + { + "epoch": 4.48, + "learning_rate": 4.944029850746269e-05, + "loss": 0.0529, + "step": 4800 + }, + { + "epoch": 4.48, + "learning_rate": 4.943983208955224e-05, + "loss": 0.0191, + "step": 4804 + }, + { + "epoch": 4.49, + "learning_rate": 4.943936567164179e-05, + "loss": 0.0323, + "step": 4808 + }, + { + "epoch": 4.49, + "learning_rate": 4.9438899253731344e-05, + "loss": 0.0208, + "step": 4812 + }, + { + "epoch": 4.49, + "learning_rate": 4.94384328358209e-05, + "loss": 0.0217, + "step": 4816 + }, + { + "epoch": 4.5, + "learning_rate": 4.943796641791045e-05, + "loss": 0.0251, + "step": 4820 + }, + { + "epoch": 4.5, + "learning_rate": 4.94375e-05, + "loss": 0.0133, + "step": 4824 + }, + { + "epoch": 4.5, + "learning_rate": 4.943703358208955e-05, + "loss": 0.02, + "step": 4828 + }, + { + "epoch": 4.51, + "learning_rate": 4.943656716417911e-05, + "loss": 0.0258, + "step": 4832 + }, + { + "epoch": 4.51, + "learning_rate": 4.943610074626866e-05, + "loss": 0.0172, + "step": 4836 + }, + { + "epoch": 4.51, + "learning_rate": 4.943563432835821e-05, + "loss": 0.0382, + "step": 4840 + }, + { + "epoch": 4.52, + "learning_rate": 4.943516791044776e-05, + "loss": 0.014, + "step": 4844 + }, + { + "epoch": 4.52, + "learning_rate": 4.943470149253732e-05, + "loss": 0.0078, + "step": 4848 + }, + { + "epoch": 4.53, + "learning_rate": 4.9434235074626866e-05, + "loss": 0.0191, + "step": 4852 + }, + { + "epoch": 4.53, + "learning_rate": 4.943376865671642e-05, + "loss": 0.025, + "step": 4856 + }, + { + "epoch": 4.53, + "learning_rate": 4.9433302238805976e-05, + "loss": 0.0247, + "step": 4860 + }, + { + "epoch": 4.54, + "learning_rate": 4.9432835820895524e-05, + "loss": 0.0205, + "step": 4864 + }, + { + "epoch": 4.54, + "learning_rate": 4.943236940298508e-05, + "loss": 0.0087, + "step": 4868 + }, + { + "epoch": 4.54, + "learning_rate": 4.943190298507463e-05, + "loss": 0.0133, + "step": 4872 + }, + { + "epoch": 4.55, + "learning_rate": 4.943143656716418e-05, + "loss": 0.0116, + "step": 4876 + }, + { + "epoch": 4.55, + "learning_rate": 4.943097014925374e-05, + "loss": 0.0378, + "step": 4880 + }, + { + "epoch": 4.56, + "learning_rate": 4.9430503731343285e-05, + "loss": 0.0138, + "step": 4884 + }, + { + "epoch": 4.56, + "learning_rate": 4.943003731343283e-05, + "loss": 0.0109, + "step": 4888 + }, + { + "epoch": 4.56, + "learning_rate": 4.9429570895522395e-05, + "loss": 0.0393, + "step": 4892 + }, + { + "epoch": 4.57, + "learning_rate": 4.942910447761194e-05, + "loss": 0.0111, + "step": 4896 + }, + { + "epoch": 4.57, + "learning_rate": 4.942863805970149e-05, + "loss": 0.0159, + "step": 4900 + }, + { + "epoch": 4.57, + "learning_rate": 4.9428171641791046e-05, + "loss": 0.0169, + "step": 4904 + }, + { + "epoch": 4.58, + "learning_rate": 4.94277052238806e-05, + "loss": 0.0271, + "step": 4908 + }, + { + "epoch": 4.58, + "learning_rate": 4.942723880597015e-05, + "loss": 0.0064, + "step": 4912 + }, + { + "epoch": 4.59, + "learning_rate": 4.9426772388059704e-05, + "loss": 0.016, + "step": 4916 + }, + { + "epoch": 4.59, + "learning_rate": 4.942630597014926e-05, + "loss": 0.0194, + "step": 4920 + }, + { + "epoch": 4.59, + "learning_rate": 4.942583955223881e-05, + "loss": 0.0241, + "step": 4924 + }, + { + "epoch": 4.6, + "learning_rate": 4.942537313432836e-05, + "loss": 0.0217, + "step": 4928 + }, + { + "epoch": 4.6, + "learning_rate": 4.942490671641791e-05, + "loss": 0.0054, + "step": 4932 + }, + { + "epoch": 4.6, + "learning_rate": 4.9424440298507465e-05, + "loss": 0.0156, + "step": 4936 + }, + { + "epoch": 4.61, + "learning_rate": 4.942397388059702e-05, + "loss": 0.016, + "step": 4940 + }, + { + "epoch": 4.61, + "learning_rate": 4.942350746268657e-05, + "loss": 0.0283, + "step": 4944 + }, + { + "epoch": 4.62, + "learning_rate": 4.9423041044776116e-05, + "loss": 0.0189, + "step": 4948 + }, + { + "epoch": 4.62, + "learning_rate": 4.942257462686568e-05, + "loss": 0.0236, + "step": 4952 + }, + { + "epoch": 4.62, + "learning_rate": 4.9422108208955226e-05, + "loss": 0.0101, + "step": 4956 + }, + { + "epoch": 4.63, + "learning_rate": 4.9421641791044774e-05, + "loss": 0.0299, + "step": 4960 + }, + { + "epoch": 4.63, + "learning_rate": 4.942117537313433e-05, + "loss": 0.0215, + "step": 4964 + }, + { + "epoch": 4.63, + "learning_rate": 4.9420708955223884e-05, + "loss": 0.0109, + "step": 4968 + }, + { + "epoch": 4.64, + "learning_rate": 4.942024253731343e-05, + "loss": 0.0157, + "step": 4972 + }, + { + "epoch": 4.64, + "learning_rate": 4.941977611940299e-05, + "loss": 0.0241, + "step": 4976 + }, + { + "epoch": 4.65, + "learning_rate": 4.941930970149254e-05, + "loss": 0.0279, + "step": 4980 + }, + { + "epoch": 4.65, + "learning_rate": 4.941884328358209e-05, + "loss": 0.0318, + "step": 4984 + }, + { + "epoch": 4.65, + "learning_rate": 4.9418376865671645e-05, + "loss": 0.022, + "step": 4988 + }, + { + "epoch": 4.66, + "learning_rate": 4.941791044776119e-05, + "loss": 0.0137, + "step": 4992 + }, + { + "epoch": 4.66, + "learning_rate": 4.941744402985075e-05, + "loss": 0.0226, + "step": 4996 + }, + { + "epoch": 4.66, + "learning_rate": 4.94169776119403e-05, + "loss": 0.009, + "step": 5000 + }, + { + "epoch": 4.66, + "eval_exact_match": 0.7030947775628626, + "eval_exec": 0.746615087040619, + "eval_loss": 0.18529821932315826, + "eval_runtime": 1152.0827, + "eval_samples_per_second": 0.898, + "step": 5000 + }, + { + "epoch": 4.67, + "learning_rate": 4.941651119402985e-05, + "loss": 0.0145, + "step": 5004 + }, + { + "epoch": 4.67, + "learning_rate": 4.9416044776119406e-05, + "loss": 0.0094, + "step": 5008 + }, + { + "epoch": 4.68, + "learning_rate": 4.941557835820896e-05, + "loss": 0.0242, + "step": 5012 + }, + { + "epoch": 4.68, + "learning_rate": 4.941511194029851e-05, + "loss": 0.0222, + "step": 5016 + }, + { + "epoch": 4.68, + "learning_rate": 4.9414645522388064e-05, + "loss": 0.0191, + "step": 5020 + }, + { + "epoch": 4.69, + "learning_rate": 4.941417910447761e-05, + "loss": 0.0174, + "step": 5024 + }, + { + "epoch": 4.69, + "learning_rate": 4.941371268656717e-05, + "loss": 0.0283, + "step": 5028 + }, + { + "epoch": 4.69, + "learning_rate": 4.941324626865672e-05, + "loss": 0.0104, + "step": 5032 + }, + { + "epoch": 4.7, + "learning_rate": 4.941277985074627e-05, + "loss": 0.0104, + "step": 5036 + }, + { + "epoch": 4.7, + "learning_rate": 4.9412313432835825e-05, + "loss": 0.0192, + "step": 5040 + }, + { + "epoch": 4.71, + "learning_rate": 4.941184701492538e-05, + "loss": 0.0129, + "step": 5044 + }, + { + "epoch": 4.71, + "learning_rate": 4.941138059701493e-05, + "loss": 0.0177, + "step": 5048 + }, + { + "epoch": 4.71, + "learning_rate": 4.9410914179104476e-05, + "loss": 0.0157, + "step": 5052 + }, + { + "epoch": 4.72, + "learning_rate": 4.941044776119403e-05, + "loss": 0.0169, + "step": 5056 + }, + { + "epoch": 4.72, + "learning_rate": 4.9409981343283586e-05, + "loss": 0.0124, + "step": 5060 + }, + { + "epoch": 4.72, + "learning_rate": 4.9409514925373134e-05, + "loss": 0.0167, + "step": 5064 + }, + { + "epoch": 4.73, + "learning_rate": 4.940904850746269e-05, + "loss": 0.0112, + "step": 5068 + }, + { + "epoch": 4.73, + "learning_rate": 4.9408582089552244e-05, + "loss": 0.0228, + "step": 5072 + }, + { + "epoch": 4.73, + "learning_rate": 4.940811567164179e-05, + "loss": 0.0211, + "step": 5076 + }, + { + "epoch": 4.74, + "learning_rate": 4.940764925373135e-05, + "loss": 0.0306, + "step": 5080 + }, + { + "epoch": 4.74, + "learning_rate": 4.9407182835820895e-05, + "loss": 0.0173, + "step": 5084 + }, + { + "epoch": 4.75, + "learning_rate": 4.940671641791045e-05, + "loss": 0.0061, + "step": 5088 + }, + { + "epoch": 4.75, + "learning_rate": 4.9406250000000005e-05, + "loss": 0.0432, + "step": 5092 + }, + { + "epoch": 4.75, + "learning_rate": 4.940578358208955e-05, + "loss": 0.0349, + "step": 5096 + }, + { + "epoch": 4.76, + "learning_rate": 4.940531716417911e-05, + "loss": 0.0483, + "step": 5100 + }, + { + "epoch": 4.76, + "learning_rate": 4.940485074626866e-05, + "loss": 0.0176, + "step": 5104 + }, + { + "epoch": 4.76, + "learning_rate": 4.940438432835821e-05, + "loss": 0.0256, + "step": 5108 + }, + { + "epoch": 4.77, + "learning_rate": 4.940391791044776e-05, + "loss": 0.0236, + "step": 5112 + }, + { + "epoch": 4.77, + "learning_rate": 4.9403451492537314e-05, + "loss": 0.0167, + "step": 5116 + }, + { + "epoch": 4.78, + "learning_rate": 4.940298507462687e-05, + "loss": 0.0294, + "step": 5120 + }, + { + "epoch": 4.78, + "learning_rate": 4.940251865671642e-05, + "loss": 0.0245, + "step": 5124 + }, + { + "epoch": 4.78, + "learning_rate": 4.940205223880597e-05, + "loss": 0.0169, + "step": 5128 + }, + { + "epoch": 4.79, + "learning_rate": 4.940158582089553e-05, + "loss": 0.0202, + "step": 5132 + }, + { + "epoch": 4.79, + "learning_rate": 4.9401119402985075e-05, + "loss": 0.0286, + "step": 5136 + }, + { + "epoch": 4.79, + "learning_rate": 4.940065298507463e-05, + "loss": 0.0245, + "step": 5140 + }, + { + "epoch": 4.8, + "learning_rate": 4.940018656716418e-05, + "loss": 0.0287, + "step": 5144 + }, + { + "epoch": 4.8, + "learning_rate": 4.939972014925374e-05, + "loss": 0.0185, + "step": 5148 + }, + { + "epoch": 4.81, + "learning_rate": 4.939925373134329e-05, + "loss": 0.0274, + "step": 5152 + }, + { + "epoch": 4.81, + "learning_rate": 4.9398787313432836e-05, + "loss": 0.0052, + "step": 5156 + }, + { + "epoch": 4.81, + "learning_rate": 4.939832089552239e-05, + "loss": 0.0217, + "step": 5160 + }, + { + "epoch": 4.82, + "learning_rate": 4.9397854477611946e-05, + "loss": 0.0214, + "step": 5164 + }, + { + "epoch": 4.82, + "learning_rate": 4.9397388059701494e-05, + "loss": 0.0243, + "step": 5168 + }, + { + "epoch": 4.82, + "learning_rate": 4.939692164179105e-05, + "loss": 0.0253, + "step": 5172 + }, + { + "epoch": 4.83, + "learning_rate": 4.93964552238806e-05, + "loss": 0.013, + "step": 5176 + }, + { + "epoch": 4.83, + "learning_rate": 4.939598880597015e-05, + "loss": 0.0193, + "step": 5180 + }, + { + "epoch": 4.84, + "learning_rate": 4.939552238805971e-05, + "loss": 0.0098, + "step": 5184 + }, + { + "epoch": 4.84, + "learning_rate": 4.9395055970149255e-05, + "loss": 0.0193, + "step": 5188 + }, + { + "epoch": 4.84, + "learning_rate": 4.939458955223881e-05, + "loss": 0.0238, + "step": 5192 + }, + { + "epoch": 4.85, + "learning_rate": 4.9394123134328365e-05, + "loss": 0.0204, + "step": 5196 + }, + { + "epoch": 4.85, + "learning_rate": 4.939365671641791e-05, + "loss": 0.0424, + "step": 5200 + }, + { + "epoch": 4.85, + "learning_rate": 4.939319029850746e-05, + "loss": 0.012, + "step": 5204 + }, + { + "epoch": 4.86, + "learning_rate": 4.939272388059702e-05, + "loss": 0.0168, + "step": 5208 + }, + { + "epoch": 4.86, + "learning_rate": 4.939225746268657e-05, + "loss": 0.0328, + "step": 5212 + }, + { + "epoch": 4.87, + "learning_rate": 4.939179104477612e-05, + "loss": 0.0151, + "step": 5216 + }, + { + "epoch": 4.87, + "learning_rate": 4.9391324626865674e-05, + "loss": 0.0222, + "step": 5220 + }, + { + "epoch": 4.87, + "learning_rate": 4.939085820895523e-05, + "loss": 0.0315, + "step": 5224 + }, + { + "epoch": 4.88, + "learning_rate": 4.939039179104478e-05, + "loss": 0.0255, + "step": 5228 + }, + { + "epoch": 4.88, + "learning_rate": 4.938992537313433e-05, + "loss": 0.0187, + "step": 5232 + }, + { + "epoch": 4.88, + "learning_rate": 4.938945895522388e-05, + "loss": 0.0132, + "step": 5236 + }, + { + "epoch": 4.89, + "learning_rate": 4.9388992537313435e-05, + "loss": 0.0296, + "step": 5240 + }, + { + "epoch": 4.89, + "learning_rate": 4.938852611940299e-05, + "loss": 0.0223, + "step": 5244 + }, + { + "epoch": 4.9, + "learning_rate": 4.938805970149254e-05, + "loss": 0.0189, + "step": 5248 + }, + { + "epoch": 4.9, + "learning_rate": 4.938759328358209e-05, + "loss": 0.0232, + "step": 5252 + }, + { + "epoch": 4.9, + "learning_rate": 4.938712686567165e-05, + "loss": 0.0243, + "step": 5256 + }, + { + "epoch": 4.91, + "learning_rate": 4.9386660447761196e-05, + "loss": 0.0218, + "step": 5260 + }, + { + "epoch": 4.91, + "learning_rate": 4.9386194029850744e-05, + "loss": 0.0371, + "step": 5264 + }, + { + "epoch": 4.91, + "learning_rate": 4.9385727611940306e-05, + "loss": 0.0128, + "step": 5268 + }, + { + "epoch": 4.92, + "learning_rate": 4.9385261194029854e-05, + "loss": 0.0266, + "step": 5272 + }, + { + "epoch": 4.92, + "learning_rate": 4.93847947761194e-05, + "loss": 0.0152, + "step": 5276 + }, + { + "epoch": 4.93, + "learning_rate": 4.938432835820896e-05, + "loss": 0.019, + "step": 5280 + }, + { + "epoch": 4.93, + "learning_rate": 4.938386194029851e-05, + "loss": 0.0055, + "step": 5284 + }, + { + "epoch": 4.93, + "learning_rate": 4.938339552238806e-05, + "loss": 0.023, + "step": 5288 + }, + { + "epoch": 4.94, + "learning_rate": 4.9382929104477615e-05, + "loss": 0.011, + "step": 5292 + }, + { + "epoch": 4.94, + "learning_rate": 4.938246268656716e-05, + "loss": 0.0152, + "step": 5296 + }, + { + "epoch": 4.94, + "learning_rate": 4.938199626865672e-05, + "loss": 0.0356, + "step": 5300 + }, + { + "epoch": 4.95, + "learning_rate": 4.938152985074627e-05, + "loss": 0.0317, + "step": 5304 + }, + { + "epoch": 4.95, + "learning_rate": 4.938106343283582e-05, + "loss": 0.0347, + "step": 5308 + }, + { + "epoch": 4.96, + "learning_rate": 4.9380597014925376e-05, + "loss": 0.0067, + "step": 5312 + }, + { + "epoch": 4.96, + "learning_rate": 4.938013059701493e-05, + "loss": 0.0202, + "step": 5316 + }, + { + "epoch": 4.96, + "learning_rate": 4.937966417910448e-05, + "loss": 0.0275, + "step": 5320 + }, + { + "epoch": 4.97, + "learning_rate": 4.9379197761194034e-05, + "loss": 0.0165, + "step": 5324 + }, + { + "epoch": 4.97, + "learning_rate": 4.937873134328358e-05, + "loss": 0.019, + "step": 5328 + }, + { + "epoch": 4.97, + "learning_rate": 4.937826492537314e-05, + "loss": 0.0157, + "step": 5332 + }, + { + "epoch": 4.98, + "learning_rate": 4.937779850746269e-05, + "loss": 0.0303, + "step": 5336 + }, + { + "epoch": 4.98, + "learning_rate": 4.937733208955224e-05, + "loss": 0.0178, + "step": 5340 + }, + { + "epoch": 4.98, + "learning_rate": 4.9376865671641795e-05, + "loss": 0.0162, + "step": 5344 + }, + { + "epoch": 4.99, + "learning_rate": 4.937639925373135e-05, + "loss": 0.0235, + "step": 5348 + }, + { + "epoch": 4.99, + "learning_rate": 4.93759328358209e-05, + "loss": 0.0154, + "step": 5352 + }, + { + "epoch": 5.0, + "learning_rate": 4.9375466417910446e-05, + "loss": 0.0167, + "step": 5356 + }, + { + "epoch": 5.0, + "learning_rate": 4.937500000000001e-05, + "loss": 0.0238, + "step": 5360 + }, + { + "epoch": 5.0, + "learning_rate": 4.9374533582089556e-05, + "loss": 0.0116, + "step": 5364 + }, + { + "epoch": 5.01, + "learning_rate": 4.9374067164179104e-05, + "loss": 0.0073, + "step": 5368 + }, + { + "epoch": 5.01, + "learning_rate": 4.937360074626866e-05, + "loss": 0.0178, + "step": 5372 + }, + { + "epoch": 5.01, + "learning_rate": 4.9373134328358214e-05, + "loss": 0.0102, + "step": 5376 + }, + { + "epoch": 5.02, + "learning_rate": 4.937266791044776e-05, + "loss": 0.0109, + "step": 5380 + }, + { + "epoch": 5.02, + "learning_rate": 4.937220149253732e-05, + "loss": 0.0288, + "step": 5384 + }, + { + "epoch": 5.03, + "learning_rate": 4.9371735074626865e-05, + "loss": 0.0073, + "step": 5388 + }, + { + "epoch": 5.03, + "learning_rate": 4.937126865671642e-05, + "loss": 0.0181, + "step": 5392 + }, + { + "epoch": 5.03, + "learning_rate": 4.9370802238805975e-05, + "loss": 0.0159, + "step": 5396 + }, + { + "epoch": 5.04, + "learning_rate": 4.937033582089552e-05, + "loss": 0.0234, + "step": 5400 + }, + { + "epoch": 5.04, + "learning_rate": 4.936986940298508e-05, + "loss": 0.0203, + "step": 5404 + }, + { + "epoch": 5.04, + "learning_rate": 4.936940298507463e-05, + "loss": 0.0132, + "step": 5408 + }, + { + "epoch": 5.05, + "learning_rate": 4.936893656716418e-05, + "loss": 0.0069, + "step": 5412 + }, + { + "epoch": 5.05, + "learning_rate": 4.936847014925373e-05, + "loss": 0.0182, + "step": 5416 + }, + { + "epoch": 5.06, + "learning_rate": 4.936800373134329e-05, + "loss": 0.0064, + "step": 5420 + }, + { + "epoch": 5.06, + "learning_rate": 4.936753731343284e-05, + "loss": 0.0057, + "step": 5424 + }, + { + "epoch": 5.06, + "learning_rate": 4.936707089552239e-05, + "loss": 0.0206, + "step": 5428 + }, + { + "epoch": 5.07, + "learning_rate": 4.936660447761194e-05, + "loss": 0.0148, + "step": 5432 + }, + { + "epoch": 5.07, + "learning_rate": 4.9366138059701497e-05, + "loss": 0.0233, + "step": 5436 + }, + { + "epoch": 5.07, + "learning_rate": 4.9365671641791045e-05, + "loss": 0.0297, + "step": 5440 + }, + { + "epoch": 5.08, + "learning_rate": 4.93652052238806e-05, + "loss": 0.0138, + "step": 5444 + }, + { + "epoch": 5.08, + "learning_rate": 4.936473880597015e-05, + "loss": 0.0223, + "step": 5448 + }, + { + "epoch": 5.09, + "learning_rate": 4.93642723880597e-05, + "loss": 0.0104, + "step": 5452 + }, + { + "epoch": 5.09, + "learning_rate": 4.936380597014926e-05, + "loss": 0.0088, + "step": 5456 + }, + { + "epoch": 5.09, + "learning_rate": 4.9363339552238806e-05, + "loss": 0.0276, + "step": 5460 + }, + { + "epoch": 5.1, + "learning_rate": 4.936287313432836e-05, + "loss": 0.0533, + "step": 5464 + }, + { + "epoch": 5.1, + "learning_rate": 4.9362406716417916e-05, + "loss": 0.0108, + "step": 5468 + }, + { + "epoch": 5.1, + "learning_rate": 4.9361940298507464e-05, + "loss": 0.0145, + "step": 5472 + }, + { + "epoch": 5.11, + "learning_rate": 4.936147388059702e-05, + "loss": 0.0045, + "step": 5476 + }, + { + "epoch": 5.11, + "learning_rate": 4.9361007462686573e-05, + "loss": 0.0249, + "step": 5480 + }, + { + "epoch": 5.12, + "learning_rate": 4.936054104477612e-05, + "loss": 0.0071, + "step": 5484 + }, + { + "epoch": 5.12, + "learning_rate": 4.9360074626865676e-05, + "loss": 0.0101, + "step": 5488 + }, + { + "epoch": 5.12, + "learning_rate": 4.9359608208955225e-05, + "loss": 0.0113, + "step": 5492 + }, + { + "epoch": 5.13, + "learning_rate": 4.935914179104478e-05, + "loss": 0.0135, + "step": 5496 + }, + { + "epoch": 5.13, + "learning_rate": 4.9358675373134334e-05, + "loss": 0.0129, + "step": 5500 + }, + { + "epoch": 5.13, + "eval_exact_match": 0.723404255319149, + "eval_exec": 0.7562862669245648, + "eval_loss": 0.20738151669502258, + "eval_runtime": 1207.3594, + "eval_samples_per_second": 0.856, + "step": 5500 + }, + { + "epoch": 5.13, + "learning_rate": 4.935820895522388e-05, + "loss": 0.0183, + "step": 5504 + }, + { + "epoch": 5.14, + "learning_rate": 4.935774253731343e-05, + "loss": 0.0204, + "step": 5508 + }, + { + "epoch": 5.14, + "learning_rate": 4.935727611940299e-05, + "loss": 0.0059, + "step": 5512 + }, + { + "epoch": 5.15, + "learning_rate": 4.935680970149254e-05, + "loss": 0.017, + "step": 5516 + }, + { + "epoch": 5.15, + "learning_rate": 4.935634328358209e-05, + "loss": 0.0125, + "step": 5520 + }, + { + "epoch": 5.15, + "learning_rate": 4.9355876865671644e-05, + "loss": 0.022, + "step": 5524 + }, + { + "epoch": 5.16, + "learning_rate": 4.93554104477612e-05, + "loss": 0.0079, + "step": 5528 + }, + { + "epoch": 5.16, + "learning_rate": 4.9354944029850747e-05, + "loss": 0.0082, + "step": 5532 + }, + { + "epoch": 5.16, + "learning_rate": 4.93544776119403e-05, + "loss": 0.0078, + "step": 5536 + }, + { + "epoch": 5.17, + "learning_rate": 4.9354011194029856e-05, + "loss": 0.0172, + "step": 5540 + }, + { + "epoch": 5.17, + "learning_rate": 4.9353544776119405e-05, + "loss": 0.0133, + "step": 5544 + }, + { + "epoch": 5.18, + "learning_rate": 4.935307835820896e-05, + "loss": 0.0171, + "step": 5548 + }, + { + "epoch": 5.18, + "learning_rate": 4.935261194029851e-05, + "loss": 0.0192, + "step": 5552 + }, + { + "epoch": 5.18, + "learning_rate": 4.935214552238806e-05, + "loss": 0.0066, + "step": 5556 + }, + { + "epoch": 5.19, + "learning_rate": 4.935167910447762e-05, + "loss": 0.019, + "step": 5560 + }, + { + "epoch": 5.19, + "learning_rate": 4.9351212686567166e-05, + "loss": 0.0155, + "step": 5564 + }, + { + "epoch": 5.19, + "learning_rate": 4.9350746268656714e-05, + "loss": 0.0426, + "step": 5568 + }, + { + "epoch": 5.2, + "learning_rate": 4.9350279850746275e-05, + "loss": 0.0177, + "step": 5572 + }, + { + "epoch": 5.2, + "learning_rate": 4.9349813432835823e-05, + "loss": 0.0092, + "step": 5576 + }, + { + "epoch": 5.21, + "learning_rate": 4.934934701492537e-05, + "loss": 0.0128, + "step": 5580 + }, + { + "epoch": 5.21, + "learning_rate": 4.9348880597014927e-05, + "loss": 0.0208, + "step": 5584 + }, + { + "epoch": 5.21, + "learning_rate": 4.934841417910448e-05, + "loss": 0.0207, + "step": 5588 + }, + { + "epoch": 5.22, + "learning_rate": 4.934794776119403e-05, + "loss": 0.0251, + "step": 5592 + }, + { + "epoch": 5.22, + "learning_rate": 4.9347481343283584e-05, + "loss": 0.0099, + "step": 5596 + }, + { + "epoch": 5.22, + "learning_rate": 4.934701492537314e-05, + "loss": 0.0138, + "step": 5600 + }, + { + "epoch": 5.23, + "learning_rate": 4.934654850746269e-05, + "loss": 0.0153, + "step": 5604 + }, + { + "epoch": 5.23, + "learning_rate": 4.934608208955224e-05, + "loss": 0.0268, + "step": 5608 + }, + { + "epoch": 5.24, + "learning_rate": 4.934561567164179e-05, + "loss": 0.0149, + "step": 5612 + }, + { + "epoch": 5.24, + "learning_rate": 4.9345149253731345e-05, + "loss": 0.0148, + "step": 5616 + }, + { + "epoch": 5.24, + "learning_rate": 4.93446828358209e-05, + "loss": 0.0114, + "step": 5620 + }, + { + "epoch": 5.25, + "learning_rate": 4.934421641791045e-05, + "loss": 0.0064, + "step": 5624 + }, + { + "epoch": 5.25, + "learning_rate": 4.9343749999999997e-05, + "loss": 0.0176, + "step": 5628 + }, + { + "epoch": 5.25, + "learning_rate": 4.934328358208956e-05, + "loss": 0.0175, + "step": 5632 + }, + { + "epoch": 5.26, + "learning_rate": 4.9342817164179106e-05, + "loss": 0.028, + "step": 5636 + }, + { + "epoch": 5.26, + "learning_rate": 4.934235074626866e-05, + "loss": 0.0124, + "step": 5640 + }, + { + "epoch": 5.26, + "learning_rate": 4.934188432835821e-05, + "loss": 0.0196, + "step": 5644 + }, + { + "epoch": 5.27, + "learning_rate": 4.9341417910447764e-05, + "loss": 0.0129, + "step": 5648 + }, + { + "epoch": 5.27, + "learning_rate": 4.934095149253732e-05, + "loss": 0.0173, + "step": 5652 + }, + { + "epoch": 5.28, + "learning_rate": 4.934048507462687e-05, + "loss": 0.042, + "step": 5656 + }, + { + "epoch": 5.28, + "learning_rate": 4.934001865671642e-05, + "loss": 0.0103, + "step": 5660 + }, + { + "epoch": 5.28, + "learning_rate": 4.933955223880598e-05, + "loss": 0.0108, + "step": 5664 + }, + { + "epoch": 5.29, + "learning_rate": 4.9339085820895525e-05, + "loss": 0.0123, + "step": 5668 + }, + { + "epoch": 5.29, + "learning_rate": 4.9338619402985073e-05, + "loss": 0.0061, + "step": 5672 + }, + { + "epoch": 5.29, + "learning_rate": 4.933815298507463e-05, + "loss": 0.0136, + "step": 5676 + }, + { + "epoch": 5.3, + "learning_rate": 4.933768656716418e-05, + "loss": 0.0152, + "step": 5680 + }, + { + "epoch": 5.3, + "learning_rate": 4.933722014925373e-05, + "loss": 0.0209, + "step": 5684 + }, + { + "epoch": 5.31, + "learning_rate": 4.9336753731343286e-05, + "loss": 0.0259, + "step": 5688 + }, + { + "epoch": 5.31, + "learning_rate": 4.933628731343284e-05, + "loss": 0.0155, + "step": 5692 + }, + { + "epoch": 5.31, + "learning_rate": 4.933582089552239e-05, + "loss": 0.0257, + "step": 5696 + }, + { + "epoch": 5.32, + "learning_rate": 4.9335354477611944e-05, + "loss": 0.0359, + "step": 5700 + }, + { + "epoch": 5.32, + "learning_rate": 4.933488805970149e-05, + "loss": 0.0091, + "step": 5704 + }, + { + "epoch": 5.32, + "learning_rate": 4.933442164179105e-05, + "loss": 0.021, + "step": 5708 + }, + { + "epoch": 5.33, + "learning_rate": 4.93339552238806e-05, + "loss": 0.0175, + "step": 5712 + }, + { + "epoch": 5.33, + "learning_rate": 4.933348880597015e-05, + "loss": 0.0134, + "step": 5716 + }, + { + "epoch": 5.34, + "learning_rate": 4.9333022388059705e-05, + "loss": 0.0152, + "step": 5720 + }, + { + "epoch": 5.34, + "learning_rate": 4.933255597014926e-05, + "loss": 0.014, + "step": 5724 + }, + { + "epoch": 5.34, + "learning_rate": 4.933208955223881e-05, + "loss": 0.0205, + "step": 5728 + }, + { + "epoch": 5.35, + "learning_rate": 4.9331623134328356e-05, + "loss": 0.0145, + "step": 5732 + }, + { + "epoch": 5.35, + "learning_rate": 4.933115671641791e-05, + "loss": 0.0054, + "step": 5736 + }, + { + "epoch": 5.35, + "learning_rate": 4.9330690298507466e-05, + "loss": 0.0197, + "step": 5740 + }, + { + "epoch": 5.36, + "learning_rate": 4.9330223880597014e-05, + "loss": 0.0167, + "step": 5744 + }, + { + "epoch": 5.36, + "learning_rate": 4.932975746268657e-05, + "loss": 0.0326, + "step": 5748 + }, + { + "epoch": 5.37, + "learning_rate": 4.9329291044776124e-05, + "loss": 0.0252, + "step": 5752 + }, + { + "epoch": 5.37, + "learning_rate": 4.932882462686567e-05, + "loss": 0.0169, + "step": 5756 + }, + { + "epoch": 5.37, + "learning_rate": 4.932835820895523e-05, + "loss": 0.0097, + "step": 5760 + }, + { + "epoch": 5.38, + "learning_rate": 4.9327891791044775e-05, + "loss": 0.0059, + "step": 5764 + }, + { + "epoch": 5.38, + "learning_rate": 4.932742537313433e-05, + "loss": 0.0128, + "step": 5768 + }, + { + "epoch": 5.38, + "learning_rate": 4.9326958955223885e-05, + "loss": 0.012, + "step": 5772 + }, + { + "epoch": 5.39, + "learning_rate": 4.932649253731343e-05, + "loss": 0.0202, + "step": 5776 + }, + { + "epoch": 5.39, + "learning_rate": 4.932602611940299e-05, + "loss": 0.0112, + "step": 5780 + }, + { + "epoch": 5.4, + "learning_rate": 4.932555970149254e-05, + "loss": 0.0114, + "step": 5784 + }, + { + "epoch": 5.4, + "learning_rate": 4.932509328358209e-05, + "loss": 0.0085, + "step": 5788 + }, + { + "epoch": 5.4, + "learning_rate": 4.932462686567164e-05, + "loss": 0.0115, + "step": 5792 + }, + { + "epoch": 5.41, + "learning_rate": 4.9324160447761194e-05, + "loss": 0.0318, + "step": 5796 + }, + { + "epoch": 5.41, + "learning_rate": 4.932369402985075e-05, + "loss": 0.032, + "step": 5800 + }, + { + "epoch": 5.41, + "learning_rate": 4.9323227611940304e-05, + "loss": 0.015, + "step": 5804 + }, + { + "epoch": 5.42, + "learning_rate": 4.932276119402985e-05, + "loss": 0.0194, + "step": 5808 + }, + { + "epoch": 5.42, + "learning_rate": 4.932229477611941e-05, + "loss": 0.017, + "step": 5812 + }, + { + "epoch": 5.43, + "learning_rate": 4.932182835820896e-05, + "loss": 0.0081, + "step": 5816 + }, + { + "epoch": 5.43, + "learning_rate": 4.932136194029851e-05, + "loss": 0.0084, + "step": 5820 + }, + { + "epoch": 5.43, + "learning_rate": 4.932089552238806e-05, + "loss": 0.0069, + "step": 5824 + }, + { + "epoch": 5.44, + "learning_rate": 4.932042910447762e-05, + "loss": 0.0175, + "step": 5828 + }, + { + "epoch": 5.44, + "learning_rate": 4.931996268656717e-05, + "loss": 0.021, + "step": 5832 + }, + { + "epoch": 5.44, + "learning_rate": 4.9319496268656716e-05, + "loss": 0.0136, + "step": 5836 + }, + { + "epoch": 5.45, + "learning_rate": 4.931902985074627e-05, + "loss": 0.012, + "step": 5840 + }, + { + "epoch": 5.45, + "learning_rate": 4.9318563432835826e-05, + "loss": 0.0068, + "step": 5844 + }, + { + "epoch": 5.46, + "learning_rate": 4.9318097014925374e-05, + "loss": 0.0151, + "step": 5848 + }, + { + "epoch": 5.46, + "learning_rate": 4.931763059701493e-05, + "loss": 0.0189, + "step": 5852 + }, + { + "epoch": 5.46, + "learning_rate": 4.931716417910448e-05, + "loss": 0.0198, + "step": 5856 + }, + { + "epoch": 5.47, + "learning_rate": 4.931669776119403e-05, + "loss": 0.0086, + "step": 5860 + }, + { + "epoch": 5.47, + "learning_rate": 4.931623134328359e-05, + "loss": 0.0126, + "step": 5864 + }, + { + "epoch": 5.47, + "learning_rate": 4.9315764925373135e-05, + "loss": 0.0093, + "step": 5868 + }, + { + "epoch": 5.48, + "learning_rate": 4.931529850746269e-05, + "loss": 0.0232, + "step": 5872 + }, + { + "epoch": 5.48, + "learning_rate": 4.9314832089552245e-05, + "loss": 0.0138, + "step": 5876 + }, + { + "epoch": 5.49, + "learning_rate": 4.931436567164179e-05, + "loss": 0.0332, + "step": 5880 + }, + { + "epoch": 5.49, + "learning_rate": 4.931389925373134e-05, + "loss": 0.0102, + "step": 5884 + }, + { + "epoch": 5.49, + "learning_rate": 4.93134328358209e-05, + "loss": 0.015, + "step": 5888 + }, + { + "epoch": 5.5, + "learning_rate": 4.931296641791045e-05, + "loss": 0.0099, + "step": 5892 + }, + { + "epoch": 5.5, + "learning_rate": 4.93125e-05, + "loss": 0.0109, + "step": 5896 + }, + { + "epoch": 5.5, + "learning_rate": 4.9312033582089554e-05, + "loss": 0.0084, + "step": 5900 + }, + { + "epoch": 5.51, + "learning_rate": 4.931156716417911e-05, + "loss": 0.0145, + "step": 5904 + }, + { + "epoch": 5.51, + "learning_rate": 4.931110074626866e-05, + "loss": 0.0085, + "step": 5908 + }, + { + "epoch": 5.51, + "learning_rate": 4.931063432835821e-05, + "loss": 0.0101, + "step": 5912 + }, + { + "epoch": 5.52, + "learning_rate": 4.931016791044776e-05, + "loss": 0.0131, + "step": 5916 + }, + { + "epoch": 5.52, + "learning_rate": 4.9309701492537315e-05, + "loss": 0.021, + "step": 5920 + }, + { + "epoch": 5.53, + "learning_rate": 4.930923507462687e-05, + "loss": 0.0113, + "step": 5924 + }, + { + "epoch": 5.53, + "learning_rate": 4.930876865671642e-05, + "loss": 0.0208, + "step": 5928 + }, + { + "epoch": 5.53, + "learning_rate": 4.930830223880597e-05, + "loss": 0.0187, + "step": 5932 + }, + { + "epoch": 5.54, + "learning_rate": 4.930783582089553e-05, + "loss": 0.0216, + "step": 5936 + }, + { + "epoch": 5.54, + "learning_rate": 4.9307369402985076e-05, + "loss": 0.0101, + "step": 5940 + }, + { + "epoch": 5.54, + "learning_rate": 4.9306902985074624e-05, + "loss": 0.0108, + "step": 5944 + }, + { + "epoch": 5.55, + "learning_rate": 4.9306436567164186e-05, + "loss": 0.0206, + "step": 5948 + }, + { + "epoch": 5.55, + "learning_rate": 4.9305970149253734e-05, + "loss": 0.0134, + "step": 5952 + }, + { + "epoch": 5.56, + "learning_rate": 4.930550373134328e-05, + "loss": 0.0075, + "step": 5956 + }, + { + "epoch": 5.56, + "learning_rate": 4.930503731343284e-05, + "loss": 0.0154, + "step": 5960 + }, + { + "epoch": 5.56, + "learning_rate": 4.930457089552239e-05, + "loss": 0.0136, + "step": 5964 + }, + { + "epoch": 5.57, + "learning_rate": 4.930410447761195e-05, + "loss": 0.011, + "step": 5968 + }, + { + "epoch": 5.57, + "learning_rate": 4.9303638059701495e-05, + "loss": 0.0114, + "step": 5972 + }, + { + "epoch": 5.57, + "learning_rate": 4.930317164179104e-05, + "loss": 0.0133, + "step": 5976 + }, + { + "epoch": 5.58, + "learning_rate": 4.9302705223880605e-05, + "loss": 0.0109, + "step": 5980 + }, + { + "epoch": 5.58, + "learning_rate": 4.930223880597015e-05, + "loss": 0.0084, + "step": 5984 + }, + { + "epoch": 5.59, + "learning_rate": 4.93017723880597e-05, + "loss": 0.0147, + "step": 5988 + }, + { + "epoch": 5.59, + "learning_rate": 4.9301305970149256e-05, + "loss": 0.0139, + "step": 5992 + }, + { + "epoch": 5.59, + "learning_rate": 4.930083955223881e-05, + "loss": 0.009, + "step": 5996 + }, + { + "epoch": 5.6, + "learning_rate": 4.930037313432836e-05, + "loss": 0.0127, + "step": 6000 + }, + { + "epoch": 5.6, + "eval_exact_match": 0.7224371373307543, + "eval_exec": 0.7446808510638298, + "eval_loss": 0.20770786702632904, + "eval_runtime": 1400.4625, + "eval_samples_per_second": 0.738, + "step": 6000 + }, + { + "epoch": 5.6, + "learning_rate": 4.9299906716417914e-05, + "loss": 0.0203, + "step": 6004 + }, + { + "epoch": 5.6, + "learning_rate": 4.929944029850746e-05, + "loss": 0.0078, + "step": 6008 + }, + { + "epoch": 5.61, + "learning_rate": 4.929897388059702e-05, + "loss": 0.0127, + "step": 6012 + }, + { + "epoch": 5.61, + "learning_rate": 4.929850746268657e-05, + "loss": 0.0099, + "step": 6016 + }, + { + "epoch": 5.62, + "learning_rate": 4.929804104477612e-05, + "loss": 0.0149, + "step": 6020 + }, + { + "epoch": 5.62, + "learning_rate": 4.9297574626865675e-05, + "loss": 0.0142, + "step": 6024 + }, + { + "epoch": 5.62, + "learning_rate": 4.929710820895523e-05, + "loss": 0.0129, + "step": 6028 + }, + { + "epoch": 5.63, + "learning_rate": 4.929664179104478e-05, + "loss": 0.015, + "step": 6032 + }, + { + "epoch": 5.63, + "learning_rate": 4.9296175373134326e-05, + "loss": 0.0056, + "step": 6036 + }, + { + "epoch": 5.63, + "learning_rate": 4.929570895522389e-05, + "loss": 0.0085, + "step": 6040 + }, + { + "epoch": 5.64, + "learning_rate": 4.9295242537313436e-05, + "loss": 0.0157, + "step": 6044 + }, + { + "epoch": 5.64, + "learning_rate": 4.9294776119402984e-05, + "loss": 0.0248, + "step": 6048 + }, + { + "epoch": 5.65, + "learning_rate": 4.929430970149254e-05, + "loss": 0.0234, + "step": 6052 + }, + { + "epoch": 5.65, + "learning_rate": 4.9293843283582094e-05, + "loss": 0.0103, + "step": 6056 + }, + { + "epoch": 5.65, + "learning_rate": 4.929337686567164e-05, + "loss": 0.0106, + "step": 6060 + }, + { + "epoch": 5.66, + "learning_rate": 4.92929104477612e-05, + "loss": 0.0075, + "step": 6064 + }, + { + "epoch": 5.66, + "learning_rate": 4.9292444029850745e-05, + "loss": 0.0082, + "step": 6068 + }, + { + "epoch": 5.66, + "learning_rate": 4.92919776119403e-05, + "loss": 0.0289, + "step": 6072 + }, + { + "epoch": 5.67, + "learning_rate": 4.9291511194029855e-05, + "loss": 0.0079, + "step": 6076 + }, + { + "epoch": 5.67, + "learning_rate": 4.92910447761194e-05, + "loss": 0.0148, + "step": 6080 + }, + { + "epoch": 5.68, + "learning_rate": 4.929057835820896e-05, + "loss": 0.0072, + "step": 6084 + }, + { + "epoch": 5.68, + "learning_rate": 4.929011194029851e-05, + "loss": 0.0087, + "step": 6088 + }, + { + "epoch": 5.68, + "learning_rate": 4.928964552238806e-05, + "loss": 0.0075, + "step": 6092 + }, + { + "epoch": 5.69, + "learning_rate": 4.928917910447761e-05, + "loss": 0.0202, + "step": 6096 + }, + { + "epoch": 5.69, + "learning_rate": 4.928871268656717e-05, + "loss": 0.0076, + "step": 6100 + }, + { + "epoch": 5.69, + "learning_rate": 4.928824626865672e-05, + "loss": 0.0187, + "step": 6104 + }, + { + "epoch": 5.7, + "learning_rate": 4.928777985074627e-05, + "loss": 0.0158, + "step": 6108 + }, + { + "epoch": 5.7, + "learning_rate": 4.928731343283582e-05, + "loss": 0.0142, + "step": 6112 + }, + { + "epoch": 5.71, + "learning_rate": 4.928684701492538e-05, + "loss": 0.0116, + "step": 6116 + }, + { + "epoch": 5.71, + "learning_rate": 4.9286380597014925e-05, + "loss": 0.0218, + "step": 6120 + }, + { + "epoch": 5.71, + "learning_rate": 4.928591417910448e-05, + "loss": 0.0132, + "step": 6124 + }, + { + "epoch": 5.72, + "learning_rate": 4.928544776119403e-05, + "loss": 0.0084, + "step": 6128 + }, + { + "epoch": 5.72, + "learning_rate": 4.928498134328359e-05, + "loss": 0.0063, + "step": 6132 + }, + { + "epoch": 5.72, + "learning_rate": 4.928451492537314e-05, + "loss": 0.008, + "step": 6136 + }, + { + "epoch": 5.73, + "learning_rate": 4.9284048507462686e-05, + "loss": 0.0157, + "step": 6140 + }, + { + "epoch": 5.73, + "learning_rate": 4.928358208955224e-05, + "loss": 0.0041, + "step": 6144 + }, + { + "epoch": 5.73, + "learning_rate": 4.9283115671641796e-05, + "loss": 0.0168, + "step": 6148 + }, + { + "epoch": 5.74, + "learning_rate": 4.9282649253731344e-05, + "loss": 0.039, + "step": 6152 + }, + { + "epoch": 5.74, + "learning_rate": 4.92821828358209e-05, + "loss": 0.0105, + "step": 6156 + }, + { + "epoch": 5.75, + "learning_rate": 4.9281716417910454e-05, + "loss": 0.0158, + "step": 6160 + }, + { + "epoch": 5.75, + "learning_rate": 4.928125e-05, + "loss": 0.0184, + "step": 6164 + }, + { + "epoch": 5.75, + "learning_rate": 4.928078358208956e-05, + "loss": 0.01, + "step": 6168 + }, + { + "epoch": 5.76, + "learning_rate": 4.9280317164179105e-05, + "loss": 0.0229, + "step": 6172 + }, + { + "epoch": 5.76, + "learning_rate": 4.927985074626866e-05, + "loss": 0.0083, + "step": 6176 + }, + { + "epoch": 5.76, + "learning_rate": 4.9279384328358215e-05, + "loss": 0.016, + "step": 6180 + }, + { + "epoch": 5.77, + "learning_rate": 4.927891791044776e-05, + "loss": 0.0097, + "step": 6184 + }, + { + "epoch": 5.77, + "learning_rate": 4.927845149253731e-05, + "loss": 0.0119, + "step": 6188 + }, + { + "epoch": 5.78, + "learning_rate": 4.927798507462687e-05, + "loss": 0.0089, + "step": 6192 + }, + { + "epoch": 5.78, + "learning_rate": 4.927751865671642e-05, + "loss": 0.0472, + "step": 6196 + }, + { + "epoch": 5.78, + "learning_rate": 4.927705223880597e-05, + "loss": 0.0154, + "step": 6200 + }, + { + "epoch": 5.79, + "learning_rate": 4.9276585820895524e-05, + "loss": 0.0054, + "step": 6204 + }, + { + "epoch": 5.79, + "learning_rate": 4.927611940298508e-05, + "loss": 0.0133, + "step": 6208 + }, + { + "epoch": 5.79, + "learning_rate": 4.927565298507463e-05, + "loss": 0.0163, + "step": 6212 + }, + { + "epoch": 5.8, + "learning_rate": 4.927518656716418e-05, + "loss": 0.0296, + "step": 6216 + }, + { + "epoch": 5.8, + "learning_rate": 4.927472014925374e-05, + "loss": 0.0075, + "step": 6220 + }, + { + "epoch": 5.81, + "learning_rate": 4.9274253731343285e-05, + "loss": 0.0128, + "step": 6224 + }, + { + "epoch": 5.81, + "learning_rate": 4.927378731343284e-05, + "loss": 0.0127, + "step": 6228 + }, + { + "epoch": 5.81, + "learning_rate": 4.927332089552239e-05, + "loss": 0.0246, + "step": 6232 + }, + { + "epoch": 5.82, + "learning_rate": 4.927285447761194e-05, + "loss": 0.0223, + "step": 6236 + }, + { + "epoch": 5.82, + "learning_rate": 4.92723880597015e-05, + "loss": 0.0312, + "step": 6240 + }, + { + "epoch": 5.82, + "learning_rate": 4.9271921641791046e-05, + "loss": 0.0177, + "step": 6244 + }, + { + "epoch": 5.83, + "learning_rate": 4.9271455223880594e-05, + "loss": 0.0184, + "step": 6248 + }, + { + "epoch": 5.83, + "learning_rate": 4.9270988805970156e-05, + "loss": 0.0151, + "step": 6252 + }, + { + "epoch": 5.84, + "learning_rate": 4.9270522388059704e-05, + "loss": 0.0263, + "step": 6256 + }, + { + "epoch": 5.84, + "learning_rate": 4.927005597014925e-05, + "loss": 0.0185, + "step": 6260 + }, + { + "epoch": 5.84, + "learning_rate": 4.926958955223881e-05, + "loss": 0.0235, + "step": 6264 + }, + { + "epoch": 5.85, + "learning_rate": 4.926912313432836e-05, + "loss": 0.0132, + "step": 6268 + }, + { + "epoch": 5.85, + "learning_rate": 4.926865671641791e-05, + "loss": 0.0167, + "step": 6272 + }, + { + "epoch": 5.85, + "learning_rate": 4.9268190298507465e-05, + "loss": 0.0097, + "step": 6276 + }, + { + "epoch": 5.86, + "learning_rate": 4.926772388059702e-05, + "loss": 0.0157, + "step": 6280 + }, + { + "epoch": 5.86, + "learning_rate": 4.926725746268657e-05, + "loss": 0.0161, + "step": 6284 + }, + { + "epoch": 5.87, + "learning_rate": 4.926679104477612e-05, + "loss": 0.0129, + "step": 6288 + }, + { + "epoch": 5.87, + "learning_rate": 4.926632462686567e-05, + "loss": 0.0307, + "step": 6292 + }, + { + "epoch": 5.87, + "learning_rate": 4.9265858208955226e-05, + "loss": 0.0091, + "step": 6296 + }, + { + "epoch": 5.88, + "learning_rate": 4.926539179104478e-05, + "loss": 0.0108, + "step": 6300 + }, + { + "epoch": 5.88, + "learning_rate": 4.926492537313433e-05, + "loss": 0.0164, + "step": 6304 + }, + { + "epoch": 5.88, + "learning_rate": 4.9264458955223884e-05, + "loss": 0.0067, + "step": 6308 + }, + { + "epoch": 5.89, + "learning_rate": 4.926399253731344e-05, + "loss": 0.0078, + "step": 6312 + }, + { + "epoch": 5.89, + "learning_rate": 4.926352611940299e-05, + "loss": 0.0118, + "step": 6316 + }, + { + "epoch": 5.9, + "learning_rate": 4.926305970149254e-05, + "loss": 0.0174, + "step": 6320 + }, + { + "epoch": 5.9, + "learning_rate": 4.926259328358209e-05, + "loss": 0.0226, + "step": 6324 + }, + { + "epoch": 5.9, + "learning_rate": 4.9262126865671645e-05, + "loss": 0.013, + "step": 6328 + }, + { + "epoch": 5.91, + "learning_rate": 4.92616604477612e-05, + "loss": 0.0241, + "step": 6332 + }, + { + "epoch": 5.91, + "learning_rate": 4.926119402985075e-05, + "loss": 0.0154, + "step": 6336 + }, + { + "epoch": 5.91, + "learning_rate": 4.92607276119403e-05, + "loss": 0.0339, + "step": 6340 + }, + { + "epoch": 5.92, + "learning_rate": 4.926026119402986e-05, + "loss": 0.0147, + "step": 6344 + }, + { + "epoch": 5.92, + "learning_rate": 4.9259794776119406e-05, + "loss": 0.0203, + "step": 6348 + }, + { + "epoch": 5.93, + "learning_rate": 4.9259328358208954e-05, + "loss": 0.008, + "step": 6352 + }, + { + "epoch": 5.93, + "learning_rate": 4.925886194029851e-05, + "loss": 0.0371, + "step": 6356 + }, + { + "epoch": 5.93, + "learning_rate": 4.9258395522388064e-05, + "loss": 0.0161, + "step": 6360 + }, + { + "epoch": 5.94, + "learning_rate": 4.925792910447761e-05, + "loss": 0.02, + "step": 6364 + }, + { + "epoch": 5.94, + "learning_rate": 4.925746268656717e-05, + "loss": 0.0117, + "step": 6368 + }, + { + "epoch": 5.94, + "learning_rate": 4.925699626865672e-05, + "loss": 0.0057, + "step": 6372 + }, + { + "epoch": 5.95, + "learning_rate": 4.925652985074627e-05, + "loss": 0.0157, + "step": 6376 + }, + { + "epoch": 5.95, + "learning_rate": 4.9256063432835825e-05, + "loss": 0.0115, + "step": 6380 + }, + { + "epoch": 5.96, + "learning_rate": 4.925559701492537e-05, + "loss": 0.0082, + "step": 6384 + }, + { + "epoch": 5.96, + "learning_rate": 4.925513059701493e-05, + "loss": 0.009, + "step": 6388 + }, + { + "epoch": 5.96, + "learning_rate": 4.925466417910448e-05, + "loss": 0.0169, + "step": 6392 + }, + { + "epoch": 5.97, + "learning_rate": 4.925419776119403e-05, + "loss": 0.0204, + "step": 6396 + }, + { + "epoch": 5.97, + "learning_rate": 4.9253731343283586e-05, + "loss": 0.0356, + "step": 6400 + }, + { + "epoch": 5.97, + "learning_rate": 4.925326492537314e-05, + "loss": 0.0079, + "step": 6404 + }, + { + "epoch": 5.98, + "learning_rate": 4.925279850746269e-05, + "loss": 0.008, + "step": 6408 + }, + { + "epoch": 5.98, + "learning_rate": 4.925233208955224e-05, + "loss": 0.01, + "step": 6412 + }, + { + "epoch": 5.98, + "learning_rate": 4.925186567164179e-05, + "loss": 0.0309, + "step": 6416 + }, + { + "epoch": 5.99, + "learning_rate": 4.9251399253731347e-05, + "loss": 0.023, + "step": 6420 + }, + { + "epoch": 5.99, + "learning_rate": 4.9250932835820895e-05, + "loss": 0.0079, + "step": 6424 + }, + { + "epoch": 6.0, + "learning_rate": 4.925046641791045e-05, + "loss": 0.0145, + "step": 6428 + }, + { + "epoch": 6.0, + "learning_rate": 4.9250000000000004e-05, + "loss": 0.0435, + "step": 6432 + }, + { + "epoch": 6.0, + "learning_rate": 4.924953358208955e-05, + "loss": 0.0114, + "step": 6436 + }, + { + "epoch": 6.01, + "learning_rate": 4.924906716417911e-05, + "loss": 0.0078, + "step": 6440 + }, + { + "epoch": 6.01, + "learning_rate": 4.9248600746268656e-05, + "loss": 0.0081, + "step": 6444 + }, + { + "epoch": 6.01, + "learning_rate": 4.924813432835821e-05, + "loss": 0.013, + "step": 6448 + }, + { + "epoch": 6.02, + "learning_rate": 4.9247667910447765e-05, + "loss": 0.0108, + "step": 6452 + }, + { + "epoch": 6.02, + "learning_rate": 4.9247201492537314e-05, + "loss": 0.0088, + "step": 6456 + }, + { + "epoch": 6.03, + "learning_rate": 4.924673507462687e-05, + "loss": 0.0064, + "step": 6460 + }, + { + "epoch": 6.03, + "learning_rate": 4.9246268656716423e-05, + "loss": 0.0124, + "step": 6464 + }, + { + "epoch": 6.03, + "learning_rate": 4.924580223880597e-05, + "loss": 0.0052, + "step": 6468 + }, + { + "epoch": 6.04, + "learning_rate": 4.9245335820895526e-05, + "loss": 0.0062, + "step": 6472 + }, + { + "epoch": 6.04, + "learning_rate": 4.9244869402985075e-05, + "loss": 0.0173, + "step": 6476 + }, + { + "epoch": 6.04, + "learning_rate": 4.924440298507463e-05, + "loss": 0.014, + "step": 6480 + }, + { + "epoch": 6.05, + "learning_rate": 4.9243936567164184e-05, + "loss": 0.0144, + "step": 6484 + }, + { + "epoch": 6.05, + "learning_rate": 4.924347014925373e-05, + "loss": 0.0037, + "step": 6488 + }, + { + "epoch": 6.06, + "learning_rate": 4.924300373134329e-05, + "loss": 0.0062, + "step": 6492 + }, + { + "epoch": 6.06, + "learning_rate": 4.924253731343284e-05, + "loss": 0.0095, + "step": 6496 + }, + { + "epoch": 6.06, + "learning_rate": 4.924207089552239e-05, + "loss": 0.0078, + "step": 6500 + }, + { + "epoch": 6.06, + "eval_exact_match": 0.7195357833655706, + "eval_exec": 0.741779497098646, + "eval_loss": 0.2259366363286972, + "eval_runtime": 1072.2544, + "eval_samples_per_second": 0.964, + "step": 6500 + }, + { + "epoch": 6.07, + "learning_rate": 4.924160447761194e-05, + "loss": 0.0215, + "step": 6504 + }, + { + "epoch": 6.07, + "learning_rate": 4.92411380597015e-05, + "loss": 0.005, + "step": 6508 + }, + { + "epoch": 6.07, + "learning_rate": 4.924067164179105e-05, + "loss": 0.0141, + "step": 6512 + }, + { + "epoch": 6.08, + "learning_rate": 4.9240205223880597e-05, + "loss": 0.0077, + "step": 6516 + }, + { + "epoch": 6.08, + "learning_rate": 4.923973880597015e-05, + "loss": 0.0067, + "step": 6520 + }, + { + "epoch": 6.09, + "learning_rate": 4.9239272388059706e-05, + "loss": 0.0072, + "step": 6524 + }, + { + "epoch": 6.09, + "learning_rate": 4.9238805970149255e-05, + "loss": 0.0193, + "step": 6528 + }, + { + "epoch": 6.09, + "learning_rate": 4.923833955223881e-05, + "loss": 0.0103, + "step": 6532 + }, + { + "epoch": 6.1, + "learning_rate": 4.923787313432836e-05, + "loss": 0.0157, + "step": 6536 + }, + { + "epoch": 6.1, + "learning_rate": 4.923740671641791e-05, + "loss": 0.0103, + "step": 6540 + }, + { + "epoch": 6.1, + "learning_rate": 4.923694029850747e-05, + "loss": 0.0052, + "step": 6544 + }, + { + "epoch": 6.11, + "learning_rate": 4.9236473880597016e-05, + "loss": 0.0028, + "step": 6548 + }, + { + "epoch": 6.11, + "learning_rate": 4.923600746268657e-05, + "loss": 0.0065, + "step": 6552 + }, + { + "epoch": 6.12, + "learning_rate": 4.9235541044776125e-05, + "loss": 0.0114, + "step": 6556 + }, + { + "epoch": 6.12, + "learning_rate": 4.9235074626865673e-05, + "loss": 0.0147, + "step": 6560 + }, + { + "epoch": 6.12, + "learning_rate": 4.923460820895522e-05, + "loss": 0.0049, + "step": 6564 + }, + { + "epoch": 6.13, + "learning_rate": 4.923414179104478e-05, + "loss": 0.0079, + "step": 6568 + }, + { + "epoch": 6.13, + "learning_rate": 4.923367537313433e-05, + "loss": 0.0099, + "step": 6572 + }, + { + "epoch": 6.13, + "learning_rate": 4.923320895522388e-05, + "loss": 0.0079, + "step": 6576 + }, + { + "epoch": 6.14, + "learning_rate": 4.9232742537313434e-05, + "loss": 0.0042, + "step": 6580 + }, + { + "epoch": 6.14, + "learning_rate": 4.923227611940299e-05, + "loss": 0.0112, + "step": 6584 + }, + { + "epoch": 6.15, + "learning_rate": 4.923180970149254e-05, + "loss": 0.006, + "step": 6588 + }, + { + "epoch": 6.15, + "learning_rate": 4.923134328358209e-05, + "loss": 0.0107, + "step": 6592 + }, + { + "epoch": 6.15, + "learning_rate": 4.923087686567164e-05, + "loss": 0.0112, + "step": 6596 + }, + { + "epoch": 6.16, + "learning_rate": 4.9230410447761195e-05, + "loss": 0.0251, + "step": 6600 + }, + { + "epoch": 6.16, + "learning_rate": 4.922994402985075e-05, + "loss": 0.023, + "step": 6604 + }, + { + "epoch": 6.16, + "learning_rate": 4.92294776119403e-05, + "loss": 0.016, + "step": 6608 + }, + { + "epoch": 6.17, + "learning_rate": 4.922901119402985e-05, + "loss": 0.0044, + "step": 6612 + }, + { + "epoch": 6.17, + "learning_rate": 4.922854477611941e-05, + "loss": 0.018, + "step": 6616 + }, + { + "epoch": 6.18, + "learning_rate": 4.9228078358208956e-05, + "loss": 0.0172, + "step": 6620 + }, + { + "epoch": 6.18, + "learning_rate": 4.922761194029851e-05, + "loss": 0.0419, + "step": 6624 + }, + { + "epoch": 6.18, + "learning_rate": 4.9227145522388066e-05, + "loss": 0.0044, + "step": 6628 + }, + { + "epoch": 6.19, + "learning_rate": 4.9226679104477614e-05, + "loss": 0.0193, + "step": 6632 + }, + { + "epoch": 6.19, + "learning_rate": 4.922621268656717e-05, + "loss": 0.0122, + "step": 6636 + }, + { + "epoch": 6.19, + "learning_rate": 4.922574626865672e-05, + "loss": 0.009, + "step": 6640 + }, + { + "epoch": 6.2, + "learning_rate": 4.922527985074627e-05, + "loss": 0.0184, + "step": 6644 + }, + { + "epoch": 6.2, + "learning_rate": 4.922481343283583e-05, + "loss": 0.0084, + "step": 6648 + }, + { + "epoch": 6.21, + "learning_rate": 4.9224347014925375e-05, + "loss": 0.0035, + "step": 6652 + }, + { + "epoch": 6.21, + "learning_rate": 4.9223880597014923e-05, + "loss": 0.0089, + "step": 6656 + }, + { + "epoch": 6.21, + "learning_rate": 4.9223414179104485e-05, + "loss": 0.0073, + "step": 6660 + }, + { + "epoch": 6.22, + "learning_rate": 4.922294776119403e-05, + "loss": 0.0126, + "step": 6664 + }, + { + "epoch": 6.22, + "learning_rate": 4.922248134328358e-05, + "loss": 0.0028, + "step": 6668 + }, + { + "epoch": 6.22, + "learning_rate": 4.9222014925373136e-05, + "loss": 0.0066, + "step": 6672 + }, + { + "epoch": 6.23, + "learning_rate": 4.922154850746269e-05, + "loss": 0.0041, + "step": 6676 + }, + { + "epoch": 6.23, + "learning_rate": 4.922108208955224e-05, + "loss": 0.0064, + "step": 6680 + }, + { + "epoch": 6.24, + "learning_rate": 4.9220615671641794e-05, + "loss": 0.0171, + "step": 6684 + }, + { + "epoch": 6.24, + "learning_rate": 4.922014925373134e-05, + "loss": 0.0186, + "step": 6688 + }, + { + "epoch": 6.24, + "learning_rate": 4.92196828358209e-05, + "loss": 0.0028, + "step": 6692 + }, + { + "epoch": 6.25, + "learning_rate": 4.921921641791045e-05, + "loss": 0.0133, + "step": 6696 + }, + { + "epoch": 6.25, + "learning_rate": 4.921875e-05, + "loss": 0.0054, + "step": 6700 + }, + { + "epoch": 6.25, + "learning_rate": 4.9218283582089555e-05, + "loss": 0.0071, + "step": 6704 + }, + { + "epoch": 6.26, + "learning_rate": 4.921781716417911e-05, + "loss": 0.0216, + "step": 6708 + }, + { + "epoch": 6.26, + "learning_rate": 4.921735074626866e-05, + "loss": 0.0213, + "step": 6712 + }, + { + "epoch": 6.26, + "learning_rate": 4.9216884328358206e-05, + "loss": 0.0134, + "step": 6716 + }, + { + "epoch": 6.27, + "learning_rate": 4.921641791044777e-05, + "loss": 0.0077, + "step": 6720 + }, + { + "epoch": 6.27, + "learning_rate": 4.9215951492537316e-05, + "loss": 0.0022, + "step": 6724 + }, + { + "epoch": 6.28, + "learning_rate": 4.9215485074626864e-05, + "loss": 0.0227, + "step": 6728 + }, + { + "epoch": 6.28, + "learning_rate": 4.921501865671642e-05, + "loss": 0.0049, + "step": 6732 + }, + { + "epoch": 6.28, + "learning_rate": 4.9214552238805974e-05, + "loss": 0.0083, + "step": 6736 + }, + { + "epoch": 6.29, + "learning_rate": 4.921408582089552e-05, + "loss": 0.0151, + "step": 6740 + }, + { + "epoch": 6.29, + "learning_rate": 4.921361940298508e-05, + "loss": 0.0093, + "step": 6744 + }, + { + "epoch": 6.29, + "learning_rate": 4.9213152985074625e-05, + "loss": 0.0131, + "step": 6748 + }, + { + "epoch": 6.3, + "learning_rate": 4.921268656716418e-05, + "loss": 0.0173, + "step": 6752 + }, + { + "epoch": 6.3, + "learning_rate": 4.9212220149253735e-05, + "loss": 0.0198, + "step": 6756 + }, + { + "epoch": 6.31, + "learning_rate": 4.921175373134328e-05, + "loss": 0.014, + "step": 6760 + }, + { + "epoch": 6.31, + "learning_rate": 4.921128731343284e-05, + "loss": 0.0136, + "step": 6764 + }, + { + "epoch": 6.31, + "learning_rate": 4.921082089552239e-05, + "loss": 0.011, + "step": 6768 + }, + { + "epoch": 6.32, + "learning_rate": 4.921035447761194e-05, + "loss": 0.0175, + "step": 6772 + }, + { + "epoch": 6.32, + "learning_rate": 4.920988805970149e-05, + "loss": 0.0123, + "step": 6776 + }, + { + "epoch": 6.32, + "learning_rate": 4.920942164179105e-05, + "loss": 0.0137, + "step": 6780 + }, + { + "epoch": 6.33, + "learning_rate": 4.92089552238806e-05, + "loss": 0.0129, + "step": 6784 + }, + { + "epoch": 6.33, + "learning_rate": 4.9208488805970154e-05, + "loss": 0.0116, + "step": 6788 + }, + { + "epoch": 6.34, + "learning_rate": 4.92080223880597e-05, + "loss": 0.0142, + "step": 6792 + }, + { + "epoch": 6.34, + "learning_rate": 4.920755597014926e-05, + "loss": 0.0161, + "step": 6796 + }, + { + "epoch": 6.34, + "learning_rate": 4.920708955223881e-05, + "loss": 0.0078, + "step": 6800 + }, + { + "epoch": 6.35, + "learning_rate": 4.920662313432836e-05, + "loss": 0.0183, + "step": 6804 + }, + { + "epoch": 6.35, + "learning_rate": 4.920615671641791e-05, + "loss": 0.0138, + "step": 6808 + }, + { + "epoch": 6.35, + "learning_rate": 4.920569029850747e-05, + "loss": 0.0128, + "step": 6812 + }, + { + "epoch": 6.36, + "learning_rate": 4.920522388059702e-05, + "loss": 0.0105, + "step": 6816 + }, + { + "epoch": 6.36, + "learning_rate": 4.9204757462686566e-05, + "loss": 0.0246, + "step": 6820 + }, + { + "epoch": 6.37, + "learning_rate": 4.920429104477612e-05, + "loss": 0.0076, + "step": 6824 + }, + { + "epoch": 6.37, + "learning_rate": 4.9203824626865676e-05, + "loss": 0.0074, + "step": 6828 + }, + { + "epoch": 6.37, + "learning_rate": 4.9203358208955224e-05, + "loss": 0.0065, + "step": 6832 + }, + { + "epoch": 6.38, + "learning_rate": 4.920289179104478e-05, + "loss": 0.0156, + "step": 6836 + }, + { + "epoch": 6.38, + "learning_rate": 4.9202425373134334e-05, + "loss": 0.0128, + "step": 6840 + }, + { + "epoch": 6.38, + "learning_rate": 4.920195895522388e-05, + "loss": 0.015, + "step": 6844 + }, + { + "epoch": 6.39, + "learning_rate": 4.920149253731344e-05, + "loss": 0.0054, + "step": 6848 + }, + { + "epoch": 6.39, + "learning_rate": 4.9201026119402985e-05, + "loss": 0.0092, + "step": 6852 + }, + { + "epoch": 6.4, + "learning_rate": 4.920055970149254e-05, + "loss": 0.0159, + "step": 6856 + }, + { + "epoch": 6.4, + "learning_rate": 4.9200093283582095e-05, + "loss": 0.0132, + "step": 6860 + }, + { + "epoch": 6.4, + "learning_rate": 4.919962686567164e-05, + "loss": 0.0106, + "step": 6864 + }, + { + "epoch": 6.41, + "learning_rate": 4.919916044776119e-05, + "loss": 0.0067, + "step": 6868 + }, + { + "epoch": 6.41, + "learning_rate": 4.919869402985075e-05, + "loss": 0.0088, + "step": 6872 + }, + { + "epoch": 6.41, + "learning_rate": 4.91982276119403e-05, + "loss": 0.0098, + "step": 6876 + }, + { + "epoch": 6.42, + "learning_rate": 4.919776119402985e-05, + "loss": 0.0177, + "step": 6880 + }, + { + "epoch": 6.42, + "learning_rate": 4.9197294776119404e-05, + "loss": 0.0097, + "step": 6884 + }, + { + "epoch": 6.43, + "learning_rate": 4.919682835820896e-05, + "loss": 0.0176, + "step": 6888 + }, + { + "epoch": 6.43, + "learning_rate": 4.919636194029851e-05, + "loss": 0.0085, + "step": 6892 + }, + { + "epoch": 6.43, + "learning_rate": 4.919589552238806e-05, + "loss": 0.0129, + "step": 6896 + }, + { + "epoch": 6.44, + "learning_rate": 4.919542910447762e-05, + "loss": 0.0131, + "step": 6900 + }, + { + "epoch": 6.44, + "learning_rate": 4.9194962686567165e-05, + "loss": 0.0155, + "step": 6904 + }, + { + "epoch": 6.44, + "learning_rate": 4.919449626865672e-05, + "loss": 0.009, + "step": 6908 + }, + { + "epoch": 6.45, + "learning_rate": 4.919402985074627e-05, + "loss": 0.0105, + "step": 6912 + }, + { + "epoch": 6.45, + "learning_rate": 4.919356343283582e-05, + "loss": 0.013, + "step": 6916 + }, + { + "epoch": 6.46, + "learning_rate": 4.919309701492538e-05, + "loss": 0.0181, + "step": 6920 + }, + { + "epoch": 6.46, + "learning_rate": 4.9192630597014926e-05, + "loss": 0.0118, + "step": 6924 + }, + { + "epoch": 6.46, + "learning_rate": 4.9192164179104474e-05, + "loss": 0.0109, + "step": 6928 + }, + { + "epoch": 6.47, + "learning_rate": 4.9191697761194036e-05, + "loss": 0.011, + "step": 6932 + }, + { + "epoch": 6.47, + "learning_rate": 4.9191231343283584e-05, + "loss": 0.0091, + "step": 6936 + }, + { + "epoch": 6.47, + "learning_rate": 4.919076492537313e-05, + "loss": 0.0233, + "step": 6940 + }, + { + "epoch": 6.48, + "learning_rate": 4.919029850746269e-05, + "loss": 0.0207, + "step": 6944 + }, + { + "epoch": 6.48, + "learning_rate": 4.918983208955224e-05, + "loss": 0.0153, + "step": 6948 + }, + { + "epoch": 6.49, + "learning_rate": 4.91893656716418e-05, + "loss": 0.0155, + "step": 6952 + }, + { + "epoch": 6.49, + "learning_rate": 4.9188899253731345e-05, + "loss": 0.015, + "step": 6956 + }, + { + "epoch": 6.49, + "learning_rate": 4.91884328358209e-05, + "loss": 0.0072, + "step": 6960 + }, + { + "epoch": 6.5, + "learning_rate": 4.9187966417910455e-05, + "loss": 0.0077, + "step": 6964 + }, + { + "epoch": 6.5, + "learning_rate": 4.91875e-05, + "loss": 0.017, + "step": 6968 + }, + { + "epoch": 6.5, + "learning_rate": 4.918703358208955e-05, + "loss": 0.0154, + "step": 6972 + }, + { + "epoch": 6.51, + "learning_rate": 4.9186567164179106e-05, + "loss": 0.0102, + "step": 6976 + }, + { + "epoch": 6.51, + "learning_rate": 4.918610074626866e-05, + "loss": 0.0213, + "step": 6980 + }, + { + "epoch": 6.51, + "learning_rate": 4.918563432835821e-05, + "loss": 0.0095, + "step": 6984 + }, + { + "epoch": 6.52, + "learning_rate": 4.9185167910447764e-05, + "loss": 0.0187, + "step": 6988 + }, + { + "epoch": 6.52, + "learning_rate": 4.918470149253732e-05, + "loss": 0.0093, + "step": 6992 + }, + { + "epoch": 6.53, + "learning_rate": 4.918423507462687e-05, + "loss": 0.0095, + "step": 6996 + }, + { + "epoch": 6.53, + "learning_rate": 4.918376865671642e-05, + "loss": 0.0084, + "step": 7000 + }, + { + "epoch": 6.53, + "eval_exact_match": 0.7030947775628626, + "eval_exec": 0.7504835589941973, + "eval_loss": 0.20910172164440155, + "eval_runtime": 1063.5178, + "eval_samples_per_second": 0.972, + "step": 7000 + }, + { + "epoch": 6.53, + "learning_rate": 4.918330223880597e-05, + "loss": 0.0181, + "step": 7004 + }, + { + "epoch": 6.54, + "learning_rate": 4.9182835820895525e-05, + "loss": 0.011, + "step": 7008 + }, + { + "epoch": 6.54, + "learning_rate": 4.918236940298508e-05, + "loss": 0.0015, + "step": 7012 + }, + { + "epoch": 6.54, + "learning_rate": 4.918190298507463e-05, + "loss": 0.0112, + "step": 7016 + }, + { + "epoch": 6.55, + "learning_rate": 4.918143656716418e-05, + "loss": 0.0163, + "step": 7020 + }, + { + "epoch": 6.55, + "learning_rate": 4.918097014925374e-05, + "loss": 0.0113, + "step": 7024 + }, + { + "epoch": 6.56, + "learning_rate": 4.9180503731343286e-05, + "loss": 0.0139, + "step": 7028 + }, + { + "epoch": 6.56, + "learning_rate": 4.9180037313432834e-05, + "loss": 0.019, + "step": 7032 + }, + { + "epoch": 6.56, + "learning_rate": 4.917957089552239e-05, + "loss": 0.0094, + "step": 7036 + }, + { + "epoch": 6.57, + "learning_rate": 4.9179104477611944e-05, + "loss": 0.0109, + "step": 7040 + }, + { + "epoch": 6.57, + "learning_rate": 4.917863805970149e-05, + "loss": 0.0132, + "step": 7044 + }, + { + "epoch": 6.57, + "learning_rate": 4.917817164179105e-05, + "loss": 0.0169, + "step": 7048 + }, + { + "epoch": 6.58, + "learning_rate": 4.91777052238806e-05, + "loss": 0.0235, + "step": 7052 + }, + { + "epoch": 6.58, + "learning_rate": 4.917723880597015e-05, + "loss": 0.0129, + "step": 7056 + }, + { + "epoch": 6.59, + "learning_rate": 4.9176772388059705e-05, + "loss": 0.0266, + "step": 7060 + }, + { + "epoch": 6.59, + "learning_rate": 4.917630597014925e-05, + "loss": 0.0084, + "step": 7064 + }, + { + "epoch": 6.59, + "learning_rate": 4.917583955223881e-05, + "loss": 0.0167, + "step": 7068 + }, + { + "epoch": 6.6, + "learning_rate": 4.917537313432836e-05, + "loss": 0.0045, + "step": 7072 + }, + { + "epoch": 6.6, + "learning_rate": 4.917490671641791e-05, + "loss": 0.0207, + "step": 7076 + }, + { + "epoch": 6.6, + "learning_rate": 4.9174440298507466e-05, + "loss": 0.0144, + "step": 7080 + }, + { + "epoch": 6.61, + "learning_rate": 4.917397388059702e-05, + "loss": 0.0044, + "step": 7084 + }, + { + "epoch": 6.61, + "learning_rate": 4.917350746268657e-05, + "loss": 0.0197, + "step": 7088 + }, + { + "epoch": 6.62, + "learning_rate": 4.917304104477612e-05, + "loss": 0.0137, + "step": 7092 + }, + { + "epoch": 6.62, + "learning_rate": 4.917257462686567e-05, + "loss": 0.0139, + "step": 7096 + }, + { + "epoch": 6.62, + "learning_rate": 4.917210820895523e-05, + "loss": 0.0188, + "step": 7100 + }, + { + "epoch": 6.63, + "learning_rate": 4.9171641791044775e-05, + "loss": 0.0072, + "step": 7104 + }, + { + "epoch": 6.63, + "learning_rate": 4.917117537313433e-05, + "loss": 0.01, + "step": 7108 + }, + { + "epoch": 6.63, + "learning_rate": 4.9170708955223885e-05, + "loss": 0.0185, + "step": 7112 + }, + { + "epoch": 6.64, + "learning_rate": 4.917024253731344e-05, + "loss": 0.005, + "step": 7116 + }, + { + "epoch": 6.64, + "learning_rate": 4.916977611940299e-05, + "loss": 0.0143, + "step": 7120 + }, + { + "epoch": 6.65, + "learning_rate": 4.9169309701492536e-05, + "loss": 0.0176, + "step": 7124 + }, + { + "epoch": 6.65, + "learning_rate": 4.91688432835821e-05, + "loss": 0.0129, + "step": 7128 + }, + { + "epoch": 6.65, + "learning_rate": 4.9168376865671646e-05, + "loss": 0.0038, + "step": 7132 + }, + { + "epoch": 6.66, + "learning_rate": 4.9167910447761194e-05, + "loss": 0.0117, + "step": 7136 + }, + { + "epoch": 6.66, + "learning_rate": 4.916744402985075e-05, + "loss": 0.0063, + "step": 7140 + }, + { + "epoch": 6.66, + "learning_rate": 4.9166977611940304e-05, + "loss": 0.0198, + "step": 7144 + }, + { + "epoch": 6.67, + "learning_rate": 4.916651119402985e-05, + "loss": 0.0205, + "step": 7148 + }, + { + "epoch": 6.67, + "learning_rate": 4.916604477611941e-05, + "loss": 0.0097, + "step": 7152 + }, + { + "epoch": 6.68, + "learning_rate": 4.9165578358208955e-05, + "loss": 0.0118, + "step": 7156 + }, + { + "epoch": 6.68, + "learning_rate": 4.916511194029851e-05, + "loss": 0.0137, + "step": 7160 + }, + { + "epoch": 6.68, + "learning_rate": 4.9164645522388065e-05, + "loss": 0.0047, + "step": 7164 + }, + { + "epoch": 6.69, + "learning_rate": 4.916417910447761e-05, + "loss": 0.0149, + "step": 7168 + }, + { + "epoch": 6.69, + "learning_rate": 4.916371268656717e-05, + "loss": 0.0069, + "step": 7172 + }, + { + "epoch": 6.69, + "learning_rate": 4.916324626865672e-05, + "loss": 0.0114, + "step": 7176 + }, + { + "epoch": 6.7, + "learning_rate": 4.916277985074627e-05, + "loss": 0.0087, + "step": 7180 + }, + { + "epoch": 6.7, + "learning_rate": 4.916231343283582e-05, + "loss": 0.0021, + "step": 7184 + }, + { + "epoch": 6.71, + "learning_rate": 4.916184701492538e-05, + "loss": 0.0121, + "step": 7188 + }, + { + "epoch": 6.71, + "learning_rate": 4.916138059701493e-05, + "loss": 0.0042, + "step": 7192 + }, + { + "epoch": 6.71, + "learning_rate": 4.916091417910448e-05, + "loss": 0.0052, + "step": 7196 + }, + { + "epoch": 6.72, + "learning_rate": 4.916044776119403e-05, + "loss": 0.0108, + "step": 7200 + }, + { + "epoch": 6.72, + "learning_rate": 4.915998134328359e-05, + "loss": 0.0175, + "step": 7204 + }, + { + "epoch": 6.72, + "learning_rate": 4.9159514925373135e-05, + "loss": 0.0045, + "step": 7208 + }, + { + "epoch": 6.73, + "learning_rate": 4.915904850746269e-05, + "loss": 0.0068, + "step": 7212 + }, + { + "epoch": 6.73, + "learning_rate": 4.915858208955224e-05, + "loss": 0.0096, + "step": 7216 + }, + { + "epoch": 6.73, + "learning_rate": 4.915811567164179e-05, + "loss": 0.0232, + "step": 7220 + }, + { + "epoch": 6.74, + "learning_rate": 4.915764925373135e-05, + "loss": 0.0145, + "step": 7224 + }, + { + "epoch": 6.74, + "learning_rate": 4.9157182835820896e-05, + "loss": 0.0063, + "step": 7228 + }, + { + "epoch": 6.75, + "learning_rate": 4.915671641791045e-05, + "loss": 0.0228, + "step": 7232 + }, + { + "epoch": 6.75, + "learning_rate": 4.9156250000000006e-05, + "loss": 0.0146, + "step": 7236 + }, + { + "epoch": 6.75, + "learning_rate": 4.9155783582089554e-05, + "loss": 0.0097, + "step": 7240 + }, + { + "epoch": 6.76, + "learning_rate": 4.91553171641791e-05, + "loss": 0.0089, + "step": 7244 + }, + { + "epoch": 6.76, + "learning_rate": 4.9154850746268664e-05, + "loss": 0.0091, + "step": 7248 + }, + { + "epoch": 6.76, + "learning_rate": 4.915438432835821e-05, + "loss": 0.0064, + "step": 7252 + }, + { + "epoch": 6.77, + "learning_rate": 4.915391791044776e-05, + "loss": 0.0121, + "step": 7256 + }, + { + "epoch": 6.77, + "learning_rate": 4.9153451492537315e-05, + "loss": 0.0026, + "step": 7260 + }, + { + "epoch": 6.78, + "learning_rate": 4.915298507462687e-05, + "loss": 0.0141, + "step": 7264 + }, + { + "epoch": 6.78, + "learning_rate": 4.915251865671642e-05, + "loss": 0.004, + "step": 7268 + }, + { + "epoch": 6.78, + "learning_rate": 4.915205223880597e-05, + "loss": 0.0124, + "step": 7272 + }, + { + "epoch": 6.79, + "learning_rate": 4.915158582089552e-05, + "loss": 0.0115, + "step": 7276 + }, + { + "epoch": 6.79, + "learning_rate": 4.915111940298508e-05, + "loss": 0.0193, + "step": 7280 + }, + { + "epoch": 6.79, + "learning_rate": 4.915065298507463e-05, + "loss": 0.0088, + "step": 7284 + }, + { + "epoch": 6.8, + "learning_rate": 4.915018656716418e-05, + "loss": 0.0076, + "step": 7288 + }, + { + "epoch": 6.8, + "learning_rate": 4.9149720149253734e-05, + "loss": 0.0137, + "step": 7292 + }, + { + "epoch": 6.81, + "learning_rate": 4.914925373134329e-05, + "loss": 0.0108, + "step": 7296 + }, + { + "epoch": 6.81, + "learning_rate": 4.914878731343284e-05, + "loss": 0.008, + "step": 7300 + }, + { + "epoch": 6.81, + "learning_rate": 4.914832089552239e-05, + "loss": 0.0146, + "step": 7304 + }, + { + "epoch": 6.82, + "learning_rate": 4.9147854477611947e-05, + "loss": 0.0134, + "step": 7308 + }, + { + "epoch": 6.82, + "learning_rate": 4.9147388059701495e-05, + "loss": 0.0099, + "step": 7312 + }, + { + "epoch": 6.82, + "learning_rate": 4.914692164179105e-05, + "loss": 0.0079, + "step": 7316 + }, + { + "epoch": 6.83, + "learning_rate": 4.91464552238806e-05, + "loss": 0.0115, + "step": 7320 + }, + { + "epoch": 6.83, + "learning_rate": 4.914598880597015e-05, + "loss": 0.0088, + "step": 7324 + }, + { + "epoch": 6.84, + "learning_rate": 4.914552238805971e-05, + "loss": 0.0175, + "step": 7328 + }, + { + "epoch": 6.84, + "learning_rate": 4.9145055970149256e-05, + "loss": 0.0159, + "step": 7332 + }, + { + "epoch": 6.84, + "learning_rate": 4.9144589552238804e-05, + "loss": 0.0211, + "step": 7336 + }, + { + "epoch": 6.85, + "learning_rate": 4.9144123134328365e-05, + "loss": 0.02, + "step": 7340 + }, + { + "epoch": 6.85, + "learning_rate": 4.9143656716417914e-05, + "loss": 0.0059, + "step": 7344 + }, + { + "epoch": 6.85, + "learning_rate": 4.914319029850746e-05, + "loss": 0.0227, + "step": 7348 + }, + { + "epoch": 6.86, + "learning_rate": 4.9142723880597017e-05, + "loss": 0.0125, + "step": 7352 + }, + { + "epoch": 6.86, + "learning_rate": 4.914225746268657e-05, + "loss": 0.0047, + "step": 7356 + }, + { + "epoch": 6.87, + "learning_rate": 4.914179104477612e-05, + "loss": 0.0119, + "step": 7360 + }, + { + "epoch": 6.87, + "learning_rate": 4.9141324626865675e-05, + "loss": 0.0123, + "step": 7364 + }, + { + "epoch": 6.87, + "learning_rate": 4.914085820895522e-05, + "loss": 0.0082, + "step": 7368 + }, + { + "epoch": 6.88, + "learning_rate": 4.914039179104478e-05, + "loss": 0.0152, + "step": 7372 + }, + { + "epoch": 6.88, + "learning_rate": 4.913992537313433e-05, + "loss": 0.0106, + "step": 7376 + }, + { + "epoch": 6.88, + "learning_rate": 4.913945895522388e-05, + "loss": 0.0141, + "step": 7380 + }, + { + "epoch": 6.89, + "learning_rate": 4.9138992537313436e-05, + "loss": 0.0142, + "step": 7384 + }, + { + "epoch": 6.89, + "learning_rate": 4.913852611940299e-05, + "loss": 0.0166, + "step": 7388 + }, + { + "epoch": 6.9, + "learning_rate": 4.913805970149254e-05, + "loss": 0.0135, + "step": 7392 + }, + { + "epoch": 6.9, + "learning_rate": 4.913759328358209e-05, + "loss": 0.0169, + "step": 7396 + }, + { + "epoch": 6.9, + "learning_rate": 4.913712686567165e-05, + "loss": 0.011, + "step": 7400 + }, + { + "epoch": 6.91, + "learning_rate": 4.9136660447761197e-05, + "loss": 0.0163, + "step": 7404 + }, + { + "epoch": 6.91, + "learning_rate": 4.9136194029850745e-05, + "loss": 0.0102, + "step": 7408 + }, + { + "epoch": 6.91, + "learning_rate": 4.91357276119403e-05, + "loss": 0.0066, + "step": 7412 + }, + { + "epoch": 6.92, + "learning_rate": 4.9135261194029854e-05, + "loss": 0.0132, + "step": 7416 + }, + { + "epoch": 6.92, + "learning_rate": 4.91347947761194e-05, + "loss": 0.0091, + "step": 7420 + }, + { + "epoch": 6.93, + "learning_rate": 4.913432835820896e-05, + "loss": 0.0095, + "step": 7424 + }, + { + "epoch": 6.93, + "learning_rate": 4.9133861940298506e-05, + "loss": 0.0222, + "step": 7428 + }, + { + "epoch": 6.93, + "learning_rate": 4.913339552238806e-05, + "loss": 0.0078, + "step": 7432 + }, + { + "epoch": 6.94, + "learning_rate": 4.9132929104477615e-05, + "loss": 0.0157, + "step": 7436 + }, + { + "epoch": 6.94, + "learning_rate": 4.9132462686567164e-05, + "loss": 0.0103, + "step": 7440 + }, + { + "epoch": 6.94, + "learning_rate": 4.913199626865672e-05, + "loss": 0.0178, + "step": 7444 + }, + { + "epoch": 6.95, + "learning_rate": 4.9131529850746273e-05, + "loss": 0.0115, + "step": 7448 + }, + { + "epoch": 6.95, + "learning_rate": 4.913106343283582e-05, + "loss": 0.0083, + "step": 7452 + }, + { + "epoch": 6.96, + "learning_rate": 4.9130597014925376e-05, + "loss": 0.0242, + "step": 7456 + }, + { + "epoch": 6.96, + "learning_rate": 4.913013059701493e-05, + "loss": 0.0086, + "step": 7460 + }, + { + "epoch": 6.96, + "learning_rate": 4.912966417910448e-05, + "loss": 0.0056, + "step": 7464 + }, + { + "epoch": 6.97, + "learning_rate": 4.9129197761194034e-05, + "loss": 0.0148, + "step": 7468 + }, + { + "epoch": 6.97, + "learning_rate": 4.912873134328358e-05, + "loss": 0.0127, + "step": 7472 + }, + { + "epoch": 6.97, + "learning_rate": 4.912826492537314e-05, + "loss": 0.0049, + "step": 7476 + }, + { + "epoch": 6.98, + "learning_rate": 4.912779850746269e-05, + "loss": 0.0044, + "step": 7480 + }, + { + "epoch": 6.98, + "learning_rate": 4.912733208955224e-05, + "loss": 0.0056, + "step": 7484 + }, + { + "epoch": 6.98, + "learning_rate": 4.912686567164179e-05, + "loss": 0.0335, + "step": 7488 + }, + { + "epoch": 6.99, + "learning_rate": 4.912639925373135e-05, + "loss": 0.0055, + "step": 7492 + }, + { + "epoch": 6.99, + "learning_rate": 4.91259328358209e-05, + "loss": 0.0129, + "step": 7496 + }, + { + "epoch": 7.0, + "learning_rate": 4.9125466417910447e-05, + "loss": 0.0122, + "step": 7500 + }, + { + "epoch": 7.0, + "eval_exact_match": 0.730174081237911, + "eval_exec": 0.7591876208897486, + "eval_loss": 0.21238785982131958, + "eval_runtime": 1160.0553, + "eval_samples_per_second": 0.891, + "step": 7500 + }, + { + "epoch": 7.0, + "learning_rate": 4.9125e-05, + "loss": 0.0162, + "step": 7504 + }, + { + "epoch": 7.0, + "learning_rate": 4.9124533582089556e-05, + "loss": 0.0051, + "step": 7508 + }, + { + "epoch": 7.01, + "learning_rate": 4.9124067164179104e-05, + "loss": 0.0151, + "step": 7512 + }, + { + "epoch": 7.01, + "learning_rate": 4.912360074626866e-05, + "loss": 0.0096, + "step": 7516 + }, + { + "epoch": 7.01, + "learning_rate": 4.9123134328358214e-05, + "loss": 0.0107, + "step": 7520 + }, + { + "epoch": 7.02, + "learning_rate": 4.912266791044776e-05, + "loss": 0.0088, + "step": 7524 + }, + { + "epoch": 7.02, + "learning_rate": 4.912220149253732e-05, + "loss": 0.0022, + "step": 7528 + }, + { + "epoch": 7.03, + "learning_rate": 4.9121735074626865e-05, + "loss": 0.0129, + "step": 7532 + }, + { + "epoch": 7.03, + "learning_rate": 4.912126865671642e-05, + "loss": 0.0049, + "step": 7536 + }, + { + "epoch": 7.03, + "learning_rate": 4.9120802238805975e-05, + "loss": 0.0068, + "step": 7540 + }, + { + "epoch": 7.04, + "learning_rate": 4.9120335820895523e-05, + "loss": 0.0109, + "step": 7544 + }, + { + "epoch": 7.04, + "learning_rate": 4.911986940298507e-05, + "loss": 0.0266, + "step": 7548 + }, + { + "epoch": 7.04, + "learning_rate": 4.911940298507463e-05, + "loss": 0.0092, + "step": 7552 + }, + { + "epoch": 7.05, + "learning_rate": 4.911893656716418e-05, + "loss": 0.0166, + "step": 7556 + }, + { + "epoch": 7.05, + "learning_rate": 4.911847014925373e-05, + "loss": 0.007, + "step": 7560 + }, + { + "epoch": 7.06, + "learning_rate": 4.9118003731343284e-05, + "loss": 0.0054, + "step": 7564 + }, + { + "epoch": 7.06, + "learning_rate": 4.911753731343284e-05, + "loss": 0.0135, + "step": 7568 + }, + { + "epoch": 7.06, + "learning_rate": 4.911707089552239e-05, + "loss": 0.0149, + "step": 7572 + }, + { + "epoch": 7.07, + "learning_rate": 4.911660447761194e-05, + "loss": 0.0119, + "step": 7576 + }, + { + "epoch": 7.07, + "learning_rate": 4.91161380597015e-05, + "loss": 0.0054, + "step": 7580 + }, + { + "epoch": 7.07, + "learning_rate": 4.9115671641791045e-05, + "loss": 0.0073, + "step": 7584 + }, + { + "epoch": 7.08, + "learning_rate": 4.91152052238806e-05, + "loss": 0.0045, + "step": 7588 + }, + { + "epoch": 7.08, + "learning_rate": 4.911473880597015e-05, + "loss": 0.0114, + "step": 7592 + }, + { + "epoch": 7.09, + "learning_rate": 4.91142723880597e-05, + "loss": 0.0094, + "step": 7596 + }, + { + "epoch": 7.09, + "learning_rate": 4.911380597014926e-05, + "loss": 0.0177, + "step": 7600 + }, + { + "epoch": 7.09, + "learning_rate": 4.9113339552238806e-05, + "loss": 0.0134, + "step": 7604 + }, + { + "epoch": 7.1, + "learning_rate": 4.911287313432836e-05, + "loss": 0.0129, + "step": 7608 + }, + { + "epoch": 7.1, + "learning_rate": 4.9112406716417916e-05, + "loss": 0.0205, + "step": 7612 + }, + { + "epoch": 7.1, + "learning_rate": 4.9111940298507464e-05, + "loss": 0.0096, + "step": 7616 + }, + { + "epoch": 7.11, + "learning_rate": 4.911147388059702e-05, + "loss": 0.0108, + "step": 7620 + }, + { + "epoch": 7.11, + "learning_rate": 4.911100746268657e-05, + "loss": 0.0313, + "step": 7624 + }, + { + "epoch": 7.12, + "learning_rate": 4.911054104477612e-05, + "loss": 0.0134, + "step": 7628 + }, + { + "epoch": 7.12, + "learning_rate": 4.911007462686568e-05, + "loss": 0.005, + "step": 7632 + }, + { + "epoch": 7.12, + "learning_rate": 4.9109608208955225e-05, + "loss": 0.0154, + "step": 7636 + }, + { + "epoch": 7.13, + "learning_rate": 4.910914179104478e-05, + "loss": 0.0044, + "step": 7640 + }, + { + "epoch": 7.13, + "learning_rate": 4.9108675373134335e-05, + "loss": 0.0115, + "step": 7644 + }, + { + "epoch": 7.13, + "learning_rate": 4.910820895522388e-05, + "loss": 0.0098, + "step": 7648 + }, + { + "epoch": 7.14, + "learning_rate": 4.910774253731343e-05, + "loss": 0.003, + "step": 7652 + }, + { + "epoch": 7.14, + "learning_rate": 4.9107276119402986e-05, + "loss": 0.0067, + "step": 7656 + }, + { + "epoch": 7.15, + "learning_rate": 4.910680970149254e-05, + "loss": 0.0123, + "step": 7660 + }, + { + "epoch": 7.15, + "learning_rate": 4.910634328358209e-05, + "loss": 0.0124, + "step": 7664 + }, + { + "epoch": 7.15, + "learning_rate": 4.9105876865671644e-05, + "loss": 0.0063, + "step": 7668 + }, + { + "epoch": 7.16, + "learning_rate": 4.91054104477612e-05, + "loss": 0.0181, + "step": 7672 + }, + { + "epoch": 7.16, + "learning_rate": 4.910494402985075e-05, + "loss": 0.005, + "step": 7676 + }, + { + "epoch": 7.16, + "learning_rate": 4.91044776119403e-05, + "loss": 0.0052, + "step": 7680 + }, + { + "epoch": 7.17, + "learning_rate": 4.910401119402985e-05, + "loss": 0.0135, + "step": 7684 + }, + { + "epoch": 7.17, + "learning_rate": 4.9103544776119405e-05, + "loss": 0.0053, + "step": 7688 + }, + { + "epoch": 7.18, + "learning_rate": 4.910307835820896e-05, + "loss": 0.0116, + "step": 7692 + }, + { + "epoch": 7.18, + "learning_rate": 4.910261194029851e-05, + "loss": 0.0078, + "step": 7696 + }, + { + "epoch": 7.18, + "learning_rate": 4.910214552238806e-05, + "loss": 0.0033, + "step": 7700 + }, + { + "epoch": 7.19, + "learning_rate": 4.910167910447762e-05, + "loss": 0.0107, + "step": 7704 + }, + { + "epoch": 7.19, + "learning_rate": 4.9101212686567166e-05, + "loss": 0.0045, + "step": 7708 + }, + { + "epoch": 7.19, + "learning_rate": 4.9100746268656714e-05, + "loss": 0.0127, + "step": 7712 + }, + { + "epoch": 7.2, + "learning_rate": 4.910027985074627e-05, + "loss": 0.0106, + "step": 7716 + }, + { + "epoch": 7.2, + "learning_rate": 4.9099813432835824e-05, + "loss": 0.0105, + "step": 7720 + }, + { + "epoch": 7.21, + "learning_rate": 4.909934701492537e-05, + "loss": 0.0073, + "step": 7724 + }, + { + "epoch": 7.21, + "learning_rate": 4.909888059701493e-05, + "loss": 0.0075, + "step": 7728 + }, + { + "epoch": 7.21, + "learning_rate": 4.909841417910448e-05, + "loss": 0.013, + "step": 7732 + }, + { + "epoch": 7.22, + "learning_rate": 4.909794776119403e-05, + "loss": 0.0041, + "step": 7736 + }, + { + "epoch": 7.22, + "learning_rate": 4.9097481343283585e-05, + "loss": 0.0102, + "step": 7740 + }, + { + "epoch": 7.22, + "learning_rate": 4.909701492537313e-05, + "loss": 0.0078, + "step": 7744 + }, + { + "epoch": 7.23, + "learning_rate": 4.909654850746269e-05, + "loss": 0.0059, + "step": 7748 + }, + { + "epoch": 7.23, + "learning_rate": 4.909608208955224e-05, + "loss": 0.0029, + "step": 7752 + }, + { + "epoch": 7.24, + "learning_rate": 4.909561567164179e-05, + "loss": 0.0101, + "step": 7756 + }, + { + "epoch": 7.24, + "learning_rate": 4.9095149253731346e-05, + "loss": 0.0113, + "step": 7760 + }, + { + "epoch": 7.24, + "learning_rate": 4.90946828358209e-05, + "loss": 0.0097, + "step": 7764 + }, + { + "epoch": 7.25, + "learning_rate": 4.909421641791045e-05, + "loss": 0.003, + "step": 7768 + }, + { + "epoch": 7.25, + "learning_rate": 4.9093750000000004e-05, + "loss": 0.0047, + "step": 7772 + }, + { + "epoch": 7.25, + "learning_rate": 4.909328358208955e-05, + "loss": 0.0113, + "step": 7776 + }, + { + "epoch": 7.26, + "learning_rate": 4.909281716417911e-05, + "loss": 0.0074, + "step": 7780 + }, + { + "epoch": 7.26, + "learning_rate": 4.909235074626866e-05, + "loss": 0.0122, + "step": 7784 + }, + { + "epoch": 7.26, + "learning_rate": 4.909188432835821e-05, + "loss": 0.0312, + "step": 7788 + }, + { + "epoch": 7.27, + "learning_rate": 4.9091417910447765e-05, + "loss": 0.0104, + "step": 7792 + }, + { + "epoch": 7.27, + "learning_rate": 4.909095149253732e-05, + "loss": 0.0124, + "step": 7796 + }, + { + "epoch": 7.28, + "learning_rate": 4.909048507462687e-05, + "loss": 0.0217, + "step": 7800 + }, + { + "epoch": 7.28, + "learning_rate": 4.9090018656716416e-05, + "loss": 0.0164, + "step": 7804 + }, + { + "epoch": 7.28, + "learning_rate": 4.908955223880598e-05, + "loss": 0.0105, + "step": 7808 + }, + { + "epoch": 7.29, + "learning_rate": 4.9089085820895526e-05, + "loss": 0.01, + "step": 7812 + }, + { + "epoch": 7.29, + "learning_rate": 4.9088619402985074e-05, + "loss": 0.0267, + "step": 7816 + }, + { + "epoch": 7.29, + "learning_rate": 4.908815298507463e-05, + "loss": 0.0062, + "step": 7820 + }, + { + "epoch": 7.3, + "learning_rate": 4.9087686567164184e-05, + "loss": 0.0099, + "step": 7824 + }, + { + "epoch": 7.3, + "learning_rate": 4.908722014925373e-05, + "loss": 0.0089, + "step": 7828 + }, + { + "epoch": 7.31, + "learning_rate": 4.908675373134329e-05, + "loss": 0.0067, + "step": 7832 + }, + { + "epoch": 7.31, + "learning_rate": 4.9086287313432835e-05, + "loss": 0.0028, + "step": 7836 + }, + { + "epoch": 7.31, + "learning_rate": 4.908582089552239e-05, + "loss": 0.0052, + "step": 7840 + }, + { + "epoch": 7.32, + "learning_rate": 4.9085354477611945e-05, + "loss": 0.0033, + "step": 7844 + }, + { + "epoch": 7.32, + "learning_rate": 4.908488805970149e-05, + "loss": 0.0082, + "step": 7848 + }, + { + "epoch": 7.32, + "learning_rate": 4.908442164179105e-05, + "loss": 0.0134, + "step": 7852 + }, + { + "epoch": 7.33, + "learning_rate": 4.90839552238806e-05, + "loss": 0.0053, + "step": 7856 + }, + { + "epoch": 7.33, + "learning_rate": 4.908348880597015e-05, + "loss": 0.0142, + "step": 7860 + }, + { + "epoch": 7.34, + "learning_rate": 4.90830223880597e-05, + "loss": 0.0097, + "step": 7864 + }, + { + "epoch": 7.34, + "learning_rate": 4.908255597014926e-05, + "loss": 0.0063, + "step": 7868 + }, + { + "epoch": 7.34, + "learning_rate": 4.908208955223881e-05, + "loss": 0.0141, + "step": 7872 + }, + { + "epoch": 7.35, + "learning_rate": 4.908162313432836e-05, + "loss": 0.0046, + "step": 7876 + }, + { + "epoch": 7.35, + "learning_rate": 4.908115671641791e-05, + "loss": 0.0068, + "step": 7880 + }, + { + "epoch": 7.35, + "learning_rate": 4.908069029850747e-05, + "loss": 0.006, + "step": 7884 + }, + { + "epoch": 7.36, + "learning_rate": 4.9080223880597015e-05, + "loss": 0.0082, + "step": 7888 + }, + { + "epoch": 7.36, + "learning_rate": 4.907975746268657e-05, + "loss": 0.0071, + "step": 7892 + }, + { + "epoch": 7.37, + "learning_rate": 4.907929104477612e-05, + "loss": 0.0034, + "step": 7896 + }, + { + "epoch": 7.37, + "learning_rate": 4.907882462686567e-05, + "loss": 0.0028, + "step": 7900 + }, + { + "epoch": 7.37, + "learning_rate": 4.907835820895523e-05, + "loss": 0.004, + "step": 7904 + }, + { + "epoch": 7.38, + "learning_rate": 4.9077891791044776e-05, + "loss": 0.0083, + "step": 7908 + }, + { + "epoch": 7.38, + "learning_rate": 4.907742537313433e-05, + "loss": 0.0091, + "step": 7912 + }, + { + "epoch": 7.38, + "learning_rate": 4.9076958955223886e-05, + "loss": 0.0068, + "step": 7916 + }, + { + "epoch": 7.39, + "learning_rate": 4.9076492537313434e-05, + "loss": 0.0053, + "step": 7920 + }, + { + "epoch": 7.39, + "learning_rate": 4.907602611940298e-05, + "loss": 0.0092, + "step": 7924 + }, + { + "epoch": 7.4, + "learning_rate": 4.9075559701492544e-05, + "loss": 0.002, + "step": 7928 + }, + { + "epoch": 7.4, + "learning_rate": 4.907509328358209e-05, + "loss": 0.0121, + "step": 7932 + }, + { + "epoch": 7.4, + "learning_rate": 4.907462686567165e-05, + "loss": 0.0158, + "step": 7936 + }, + { + "epoch": 7.41, + "learning_rate": 4.9074160447761195e-05, + "loss": 0.0135, + "step": 7940 + }, + { + "epoch": 7.41, + "learning_rate": 4.907369402985075e-05, + "loss": 0.0088, + "step": 7944 + }, + { + "epoch": 7.41, + "learning_rate": 4.9073227611940305e-05, + "loss": 0.0071, + "step": 7948 + }, + { + "epoch": 7.42, + "learning_rate": 4.907276119402985e-05, + "loss": 0.0072, + "step": 7952 + }, + { + "epoch": 7.42, + "learning_rate": 4.90722947761194e-05, + "loss": 0.0344, + "step": 7956 + }, + { + "epoch": 7.43, + "learning_rate": 4.907182835820896e-05, + "loss": 0.0059, + "step": 7960 + }, + { + "epoch": 7.43, + "learning_rate": 4.907136194029851e-05, + "loss": 0.0101, + "step": 7964 + }, + { + "epoch": 7.43, + "learning_rate": 4.907089552238806e-05, + "loss": 0.013, + "step": 7968 + }, + { + "epoch": 7.44, + "learning_rate": 4.9070429104477614e-05, + "loss": 0.0268, + "step": 7972 + }, + { + "epoch": 7.44, + "learning_rate": 4.906996268656717e-05, + "loss": 0.0088, + "step": 7976 + }, + { + "epoch": 7.44, + "learning_rate": 4.906949626865672e-05, + "loss": 0.01, + "step": 7980 + }, + { + "epoch": 7.45, + "learning_rate": 4.906902985074627e-05, + "loss": 0.0091, + "step": 7984 + }, + { + "epoch": 7.45, + "learning_rate": 4.906856343283583e-05, + "loss": 0.0176, + "step": 7988 + }, + { + "epoch": 7.46, + "learning_rate": 4.9068097014925375e-05, + "loss": 0.0144, + "step": 7992 + }, + { + "epoch": 7.46, + "learning_rate": 4.906763059701493e-05, + "loss": 0.016, + "step": 7996 + }, + { + "epoch": 7.46, + "learning_rate": 4.906716417910448e-05, + "loss": 0.0184, + "step": 8000 + }, + { + "epoch": 7.46, + "eval_exact_match": 0.7059961315280464, + "eval_exec": 0.7427466150870407, + "eval_loss": 0.19857120513916016, + "eval_runtime": 1295.3702, + "eval_samples_per_second": 0.798, + "step": 8000 + }, + { + "epoch": 7.47, + "learning_rate": 4.906669776119403e-05, + "loss": 0.0074, + "step": 8004 + }, + { + "epoch": 7.47, + "learning_rate": 4.906623134328359e-05, + "loss": 0.0099, + "step": 8008 + }, + { + "epoch": 7.47, + "learning_rate": 4.9065764925373136e-05, + "loss": 0.0031, + "step": 8012 + }, + { + "epoch": 7.48, + "learning_rate": 4.9065298507462684e-05, + "loss": 0.0014, + "step": 8016 + }, + { + "epoch": 7.48, + "learning_rate": 4.9064832089552246e-05, + "loss": 0.0187, + "step": 8020 + }, + { + "epoch": 7.49, + "learning_rate": 4.9064365671641794e-05, + "loss": 0.0078, + "step": 8024 + }, + { + "epoch": 7.49, + "learning_rate": 4.906389925373134e-05, + "loss": 0.0109, + "step": 8028 + }, + { + "epoch": 7.49, + "learning_rate": 4.90634328358209e-05, + "loss": 0.0088, + "step": 8032 + }, + { + "epoch": 7.5, + "learning_rate": 4.906296641791045e-05, + "loss": 0.0195, + "step": 8036 + }, + { + "epoch": 7.5, + "learning_rate": 4.90625e-05, + "loss": 0.0075, + "step": 8040 + }, + { + "epoch": 7.5, + "learning_rate": 4.9062033582089555e-05, + "loss": 0.0075, + "step": 8044 + }, + { + "epoch": 7.51, + "learning_rate": 4.906156716417911e-05, + "loss": 0.0111, + "step": 8048 + }, + { + "epoch": 7.51, + "learning_rate": 4.906110074626866e-05, + "loss": 0.0068, + "step": 8052 + }, + { + "epoch": 7.51, + "learning_rate": 4.906063432835821e-05, + "loss": 0.0266, + "step": 8056 + }, + { + "epoch": 7.52, + "learning_rate": 4.906016791044776e-05, + "loss": 0.0099, + "step": 8060 + }, + { + "epoch": 7.52, + "learning_rate": 4.9059701492537316e-05, + "loss": 0.0185, + "step": 8064 + }, + { + "epoch": 7.53, + "learning_rate": 4.905923507462687e-05, + "loss": 0.0554, + "step": 8068 + }, + { + "epoch": 7.53, + "learning_rate": 4.905876865671642e-05, + "loss": 0.0089, + "step": 8072 + }, + { + "epoch": 7.53, + "learning_rate": 4.905830223880597e-05, + "loss": 0.0142, + "step": 8076 + }, + { + "epoch": 7.54, + "learning_rate": 4.905783582089553e-05, + "loss": 0.0093, + "step": 8080 + }, + { + "epoch": 7.54, + "learning_rate": 4.905736940298508e-05, + "loss": 0.004, + "step": 8084 + }, + { + "epoch": 7.54, + "learning_rate": 4.9056902985074625e-05, + "loss": 0.0191, + "step": 8088 + }, + { + "epoch": 7.55, + "learning_rate": 4.905643656716418e-05, + "loss": 0.0087, + "step": 8092 + }, + { + "epoch": 7.55, + "learning_rate": 4.9055970149253735e-05, + "loss": 0.0024, + "step": 8096 + }, + { + "epoch": 7.56, + "learning_rate": 4.905550373134329e-05, + "loss": 0.0119, + "step": 8100 + }, + { + "epoch": 7.56, + "learning_rate": 4.905503731343284e-05, + "loss": 0.0183, + "step": 8104 + }, + { + "epoch": 7.56, + "learning_rate": 4.9054570895522386e-05, + "loss": 0.0335, + "step": 8108 + }, + { + "epoch": 7.57, + "learning_rate": 4.905410447761195e-05, + "loss": 0.0109, + "step": 8112 + }, + { + "epoch": 7.57, + "learning_rate": 4.9053638059701496e-05, + "loss": 0.0087, + "step": 8116 + }, + { + "epoch": 7.57, + "learning_rate": 4.9053171641791044e-05, + "loss": 0.0147, + "step": 8120 + }, + { + "epoch": 7.58, + "learning_rate": 4.90527052238806e-05, + "loss": 0.0057, + "step": 8124 + }, + { + "epoch": 7.58, + "learning_rate": 4.9052238805970154e-05, + "loss": 0.0076, + "step": 8128 + }, + { + "epoch": 7.59, + "learning_rate": 4.90517723880597e-05, + "loss": 0.0141, + "step": 8132 + }, + { + "epoch": 7.59, + "learning_rate": 4.905130597014926e-05, + "loss": 0.011, + "step": 8136 + }, + { + "epoch": 7.59, + "learning_rate": 4.905083955223881e-05, + "loss": 0.0131, + "step": 8140 + }, + { + "epoch": 7.6, + "learning_rate": 4.905037313432836e-05, + "loss": 0.0042, + "step": 8144 + }, + { + "epoch": 7.6, + "learning_rate": 4.9049906716417915e-05, + "loss": 0.0029, + "step": 8148 + }, + { + "epoch": 7.6, + "learning_rate": 4.904944029850746e-05, + "loss": 0.0093, + "step": 8152 + }, + { + "epoch": 7.61, + "learning_rate": 4.904897388059702e-05, + "loss": 0.0169, + "step": 8156 + }, + { + "epoch": 7.61, + "learning_rate": 4.904850746268657e-05, + "loss": 0.0051, + "step": 8160 + }, + { + "epoch": 7.62, + "learning_rate": 4.904804104477612e-05, + "loss": 0.0164, + "step": 8164 + }, + { + "epoch": 7.62, + "learning_rate": 4.904757462686567e-05, + "loss": 0.0187, + "step": 8168 + }, + { + "epoch": 7.62, + "learning_rate": 4.904710820895523e-05, + "loss": 0.0233, + "step": 8172 + }, + { + "epoch": 7.63, + "learning_rate": 4.904664179104478e-05, + "loss": 0.0178, + "step": 8176 + }, + { + "epoch": 7.63, + "learning_rate": 4.904617537313433e-05, + "loss": 0.0074, + "step": 8180 + }, + { + "epoch": 7.63, + "learning_rate": 4.904570895522388e-05, + "loss": 0.0159, + "step": 8184 + }, + { + "epoch": 7.64, + "learning_rate": 4.904524253731344e-05, + "loss": 0.0035, + "step": 8188 + }, + { + "epoch": 7.64, + "learning_rate": 4.9044776119402985e-05, + "loss": 0.0147, + "step": 8192 + }, + { + "epoch": 7.65, + "learning_rate": 4.904430970149254e-05, + "loss": 0.0221, + "step": 8196 + }, + { + "epoch": 7.65, + "learning_rate": 4.9043843283582095e-05, + "loss": 0.0231, + "step": 8200 + }, + { + "epoch": 7.65, + "learning_rate": 4.904337686567164e-05, + "loss": 0.0097, + "step": 8204 + }, + { + "epoch": 7.66, + "learning_rate": 4.90429104477612e-05, + "loss": 0.0051, + "step": 8208 + }, + { + "epoch": 7.66, + "learning_rate": 4.9042444029850746e-05, + "loss": 0.0112, + "step": 8212 + }, + { + "epoch": 7.66, + "learning_rate": 4.90419776119403e-05, + "loss": 0.0071, + "step": 8216 + }, + { + "epoch": 7.67, + "learning_rate": 4.9041511194029856e-05, + "loss": 0.0031, + "step": 8220 + }, + { + "epoch": 7.67, + "learning_rate": 4.9041044776119404e-05, + "loss": 0.0922, + "step": 8224 + }, + { + "epoch": 7.68, + "learning_rate": 4.904057835820895e-05, + "loss": 0.0062, + "step": 8228 + }, + { + "epoch": 7.68, + "learning_rate": 4.9040111940298514e-05, + "loss": 0.0073, + "step": 8232 + }, + { + "epoch": 7.68, + "learning_rate": 4.903964552238806e-05, + "loss": 0.008, + "step": 8236 + }, + { + "epoch": 7.69, + "learning_rate": 4.903917910447761e-05, + "loss": 0.0109, + "step": 8240 + }, + { + "epoch": 7.69, + "learning_rate": 4.9038712686567165e-05, + "loss": 0.0079, + "step": 8244 + }, + { + "epoch": 7.69, + "learning_rate": 4.903824626865672e-05, + "loss": 0.0109, + "step": 8248 + }, + { + "epoch": 7.7, + "learning_rate": 4.903777985074627e-05, + "loss": 0.0054, + "step": 8252 + }, + { + "epoch": 7.7, + "learning_rate": 4.903731343283582e-05, + "loss": 0.005, + "step": 8256 + }, + { + "epoch": 7.71, + "learning_rate": 4.903684701492538e-05, + "loss": 0.0063, + "step": 8260 + }, + { + "epoch": 7.71, + "learning_rate": 4.903638059701493e-05, + "loss": 0.009, + "step": 8264 + }, + { + "epoch": 7.71, + "learning_rate": 4.903591417910448e-05, + "loss": 0.0047, + "step": 8268 + }, + { + "epoch": 7.72, + "learning_rate": 4.903544776119403e-05, + "loss": 0.0079, + "step": 8272 + }, + { + "epoch": 7.72, + "learning_rate": 4.9034981343283584e-05, + "loss": 0.0158, + "step": 8276 + }, + { + "epoch": 7.72, + "learning_rate": 4.903451492537314e-05, + "loss": 0.0092, + "step": 8280 + }, + { + "epoch": 7.73, + "learning_rate": 4.903404850746269e-05, + "loss": 0.013, + "step": 8284 + }, + { + "epoch": 7.73, + "learning_rate": 4.903358208955224e-05, + "loss": 0.0055, + "step": 8288 + }, + { + "epoch": 7.73, + "learning_rate": 4.9033115671641796e-05, + "loss": 0.0108, + "step": 8292 + }, + { + "epoch": 7.74, + "learning_rate": 4.9032649253731345e-05, + "loss": 0.0144, + "step": 8296 + }, + { + "epoch": 7.74, + "learning_rate": 4.90321828358209e-05, + "loss": 0.0122, + "step": 8300 + }, + { + "epoch": 7.75, + "learning_rate": 4.903171641791045e-05, + "loss": 0.0042, + "step": 8304 + }, + { + "epoch": 7.75, + "learning_rate": 4.903125e-05, + "loss": 0.0077, + "step": 8308 + }, + { + "epoch": 7.75, + "learning_rate": 4.903078358208956e-05, + "loss": 0.0076, + "step": 8312 + }, + { + "epoch": 7.76, + "learning_rate": 4.9030317164179106e-05, + "loss": 0.0096, + "step": 8316 + }, + { + "epoch": 7.76, + "learning_rate": 4.902985074626866e-05, + "loss": 0.0126, + "step": 8320 + }, + { + "epoch": 7.76, + "learning_rate": 4.9029384328358215e-05, + "loss": 0.0178, + "step": 8324 + }, + { + "epoch": 7.77, + "learning_rate": 4.9028917910447764e-05, + "loss": 0.0056, + "step": 8328 + }, + { + "epoch": 7.77, + "learning_rate": 4.902845149253731e-05, + "loss": 0.0135, + "step": 8332 + }, + { + "epoch": 7.78, + "learning_rate": 4.9027985074626867e-05, + "loss": 0.0065, + "step": 8336 + }, + { + "epoch": 7.78, + "learning_rate": 4.902751865671642e-05, + "loss": 0.0056, + "step": 8340 + }, + { + "epoch": 7.78, + "learning_rate": 4.902705223880597e-05, + "loss": 0.006, + "step": 8344 + }, + { + "epoch": 7.79, + "learning_rate": 4.9026585820895525e-05, + "loss": 0.0108, + "step": 8348 + }, + { + "epoch": 7.79, + "learning_rate": 4.902611940298508e-05, + "loss": 0.0076, + "step": 8352 + }, + { + "epoch": 7.79, + "learning_rate": 4.902565298507463e-05, + "loss": 0.0112, + "step": 8356 + }, + { + "epoch": 7.8, + "learning_rate": 4.902518656716418e-05, + "loss": 0.0044, + "step": 8360 + }, + { + "epoch": 7.8, + "learning_rate": 4.902472014925373e-05, + "loss": 0.0029, + "step": 8364 + }, + { + "epoch": 7.81, + "learning_rate": 4.9024253731343286e-05, + "loss": 0.0062, + "step": 8368 + }, + { + "epoch": 7.81, + "learning_rate": 4.902378731343284e-05, + "loss": 0.0114, + "step": 8372 + }, + { + "epoch": 7.81, + "learning_rate": 4.902332089552239e-05, + "loss": 0.0344, + "step": 8376 + }, + { + "epoch": 7.82, + "learning_rate": 4.9022854477611943e-05, + "loss": 0.0113, + "step": 8380 + }, + { + "epoch": 7.82, + "learning_rate": 4.90223880597015e-05, + "loss": 0.0098, + "step": 8384 + }, + { + "epoch": 7.82, + "learning_rate": 4.9021921641791047e-05, + "loss": 0.011, + "step": 8388 + }, + { + "epoch": 7.83, + "learning_rate": 4.9021455223880595e-05, + "loss": 0.0147, + "step": 8392 + }, + { + "epoch": 7.83, + "learning_rate": 4.902098880597015e-05, + "loss": 0.0191, + "step": 8396 + }, + { + "epoch": 7.84, + "learning_rate": 4.9020522388059704e-05, + "loss": 0.0161, + "step": 8400 + }, + { + "epoch": 7.84, + "learning_rate": 4.902005597014925e-05, + "loss": 0.0097, + "step": 8404 + }, + { + "epoch": 7.84, + "learning_rate": 4.901958955223881e-05, + "loss": 0.0107, + "step": 8408 + }, + { + "epoch": 7.85, + "learning_rate": 4.901912313432836e-05, + "loss": 0.0112, + "step": 8412 + }, + { + "epoch": 7.85, + "learning_rate": 4.901865671641792e-05, + "loss": 0.0135, + "step": 8416 + }, + { + "epoch": 7.85, + "learning_rate": 4.9018190298507465e-05, + "loss": 0.0098, + "step": 8420 + }, + { + "epoch": 7.86, + "learning_rate": 4.9017723880597014e-05, + "loss": 0.0128, + "step": 8424 + }, + { + "epoch": 7.86, + "learning_rate": 4.9017257462686575e-05, + "loss": 0.0111, + "step": 8428 + }, + { + "epoch": 7.87, + "learning_rate": 4.901679104477612e-05, + "loss": 0.0231, + "step": 8432 + }, + { + "epoch": 7.87, + "learning_rate": 4.901632462686567e-05, + "loss": 0.0146, + "step": 8436 + }, + { + "epoch": 7.87, + "learning_rate": 4.9015858208955226e-05, + "loss": 0.0016, + "step": 8440 + }, + { + "epoch": 7.88, + "learning_rate": 4.901539179104478e-05, + "loss": 0.0031, + "step": 8444 + }, + { + "epoch": 7.88, + "learning_rate": 4.901492537313433e-05, + "loss": 0.0071, + "step": 8448 + }, + { + "epoch": 7.88, + "learning_rate": 4.9014458955223884e-05, + "loss": 0.0152, + "step": 8452 + }, + { + "epoch": 7.89, + "learning_rate": 4.901399253731343e-05, + "loss": 0.0076, + "step": 8456 + }, + { + "epoch": 7.89, + "learning_rate": 4.901352611940299e-05, + "loss": 0.0204, + "step": 8460 + }, + { + "epoch": 7.9, + "learning_rate": 4.901305970149254e-05, + "loss": 0.0027, + "step": 8464 + }, + { + "epoch": 7.9, + "learning_rate": 4.901259328358209e-05, + "loss": 0.012, + "step": 8468 + }, + { + "epoch": 7.9, + "learning_rate": 4.9012126865671645e-05, + "loss": 0.0056, + "step": 8472 + }, + { + "epoch": 7.91, + "learning_rate": 4.90116604477612e-05, + "loss": 0.0064, + "step": 8476 + }, + { + "epoch": 7.91, + "learning_rate": 4.901119402985075e-05, + "loss": 0.0082, + "step": 8480 + }, + { + "epoch": 7.91, + "learning_rate": 4.9010727611940297e-05, + "loss": 0.0071, + "step": 8484 + }, + { + "epoch": 7.92, + "learning_rate": 4.901026119402986e-05, + "loss": 0.0152, + "step": 8488 + }, + { + "epoch": 7.92, + "learning_rate": 4.9009794776119406e-05, + "loss": 0.008, + "step": 8492 + }, + { + "epoch": 7.93, + "learning_rate": 4.9009328358208954e-05, + "loss": 0.0148, + "step": 8496 + }, + { + "epoch": 7.93, + "learning_rate": 4.900886194029851e-05, + "loss": 0.0038, + "step": 8500 + }, + { + "epoch": 7.93, + "eval_exact_match": 0.7127659574468085, + "eval_exec": 0.7504835589941973, + "eval_loss": 0.23233897984027863, + "eval_runtime": 1425.0133, + "eval_samples_per_second": 0.726, + "step": 8500 + }, + { + "epoch": 7.93, + "learning_rate": 4.9008395522388064e-05, + "loss": 0.0051, + "step": 8504 + }, + { + "epoch": 7.94, + "learning_rate": 4.900792910447761e-05, + "loss": 0.0202, + "step": 8508 + }, + { + "epoch": 7.94, + "learning_rate": 4.900746268656717e-05, + "loss": 0.0163, + "step": 8512 + }, + { + "epoch": 7.94, + "learning_rate": 4.9006996268656715e-05, + "loss": 0.0055, + "step": 8516 + }, + { + "epoch": 7.95, + "learning_rate": 4.900652985074627e-05, + "loss": 0.0053, + "step": 8520 + }, + { + "epoch": 7.95, + "learning_rate": 4.9006063432835825e-05, + "loss": 0.0126, + "step": 8524 + }, + { + "epoch": 7.96, + "learning_rate": 4.9005597014925373e-05, + "loss": 0.0098, + "step": 8528 + }, + { + "epoch": 7.96, + "learning_rate": 4.900513059701493e-05, + "loss": 0.0072, + "step": 8532 + }, + { + "epoch": 7.96, + "learning_rate": 4.900466417910448e-05, + "loss": 0.0078, + "step": 8536 + }, + { + "epoch": 7.97, + "learning_rate": 4.900419776119403e-05, + "loss": 0.0104, + "step": 8540 + }, + { + "epoch": 7.97, + "learning_rate": 4.900373134328358e-05, + "loss": 0.0062, + "step": 8544 + }, + { + "epoch": 7.97, + "learning_rate": 4.900326492537314e-05, + "loss": 0.0079, + "step": 8548 + }, + { + "epoch": 7.98, + "learning_rate": 4.900279850746269e-05, + "loss": 0.0096, + "step": 8552 + }, + { + "epoch": 7.98, + "learning_rate": 4.900233208955224e-05, + "loss": 0.0116, + "step": 8556 + }, + { + "epoch": 7.98, + "learning_rate": 4.900186567164179e-05, + "loss": 0.01, + "step": 8560 + }, + { + "epoch": 7.99, + "learning_rate": 4.900139925373135e-05, + "loss": 0.0133, + "step": 8564 + }, + { + "epoch": 7.99, + "learning_rate": 4.9000932835820895e-05, + "loss": 0.0058, + "step": 8568 + }, + { + "epoch": 8.0, + "learning_rate": 4.900046641791045e-05, + "loss": 0.0039, + "step": 8572 + }, + { + "epoch": 8.0, + "learning_rate": 4.9e-05, + "loss": 0.0034, + "step": 8576 + }, + { + "epoch": 8.0, + "learning_rate": 4.899953358208956e-05, + "loss": 0.0095, + "step": 8580 + }, + { + "epoch": 8.01, + "learning_rate": 4.899906716417911e-05, + "loss": 0.0008, + "step": 8584 + }, + { + "epoch": 8.01, + "learning_rate": 4.8998600746268656e-05, + "loss": 0.0054, + "step": 8588 + }, + { + "epoch": 8.01, + "learning_rate": 4.899813432835821e-05, + "loss": 0.0044, + "step": 8592 + }, + { + "epoch": 8.02, + "learning_rate": 4.8997667910447766e-05, + "loss": 0.0024, + "step": 8596 + }, + { + "epoch": 8.02, + "learning_rate": 4.8997201492537314e-05, + "loss": 0.0031, + "step": 8600 + }, + { + "epoch": 8.03, + "learning_rate": 4.899673507462687e-05, + "loss": 0.0033, + "step": 8604 + }, + { + "epoch": 8.03, + "learning_rate": 4.8996268656716424e-05, + "loss": 0.0056, + "step": 8608 + }, + { + "epoch": 8.03, + "learning_rate": 4.899580223880597e-05, + "loss": 0.0025, + "step": 8612 + }, + { + "epoch": 8.04, + "learning_rate": 4.899533582089553e-05, + "loss": 0.0038, + "step": 8616 + }, + { + "epoch": 8.04, + "learning_rate": 4.8994869402985075e-05, + "loss": 0.0088, + "step": 8620 + }, + { + "epoch": 8.04, + "learning_rate": 4.899440298507463e-05, + "loss": 0.0041, + "step": 8624 + }, + { + "epoch": 8.05, + "learning_rate": 4.8993936567164185e-05, + "loss": 0.0068, + "step": 8628 + }, + { + "epoch": 8.05, + "learning_rate": 4.899347014925373e-05, + "loss": 0.011, + "step": 8632 + }, + { + "epoch": 8.06, + "learning_rate": 4.899300373134328e-05, + "loss": 0.0037, + "step": 8636 + }, + { + "epoch": 8.06, + "learning_rate": 4.899253731343284e-05, + "loss": 0.0043, + "step": 8640 + }, + { + "epoch": 8.06, + "learning_rate": 4.899207089552239e-05, + "loss": 0.0058, + "step": 8644 + }, + { + "epoch": 8.07, + "learning_rate": 4.899160447761194e-05, + "loss": 0.005, + "step": 8648 + }, + { + "epoch": 8.07, + "learning_rate": 4.8991138059701494e-05, + "loss": 0.0084, + "step": 8652 + }, + { + "epoch": 8.07, + "learning_rate": 4.899067164179105e-05, + "loss": 0.0252, + "step": 8656 + }, + { + "epoch": 8.08, + "learning_rate": 4.89902052238806e-05, + "loss": 0.0063, + "step": 8660 + }, + { + "epoch": 8.08, + "learning_rate": 4.898973880597015e-05, + "loss": 0.0119, + "step": 8664 + }, + { + "epoch": 8.09, + "learning_rate": 4.898927238805971e-05, + "loss": 0.0016, + "step": 8668 + }, + { + "epoch": 8.09, + "learning_rate": 4.8988805970149255e-05, + "loss": 0.0067, + "step": 8672 + }, + { + "epoch": 8.09, + "learning_rate": 4.898833955223881e-05, + "loss": 0.0108, + "step": 8676 + }, + { + "epoch": 8.1, + "learning_rate": 4.898787313432836e-05, + "loss": 0.0061, + "step": 8680 + }, + { + "epoch": 8.1, + "learning_rate": 4.898740671641791e-05, + "loss": 0.0072, + "step": 8684 + }, + { + "epoch": 8.1, + "learning_rate": 4.898694029850747e-05, + "loss": 0.0017, + "step": 8688 + }, + { + "epoch": 8.11, + "learning_rate": 4.8986473880597016e-05, + "loss": 0.0038, + "step": 8692 + }, + { + "epoch": 8.11, + "learning_rate": 4.8986007462686564e-05, + "loss": 0.0169, + "step": 8696 + }, + { + "epoch": 8.12, + "learning_rate": 4.8985541044776126e-05, + "loss": 0.0112, + "step": 8700 + }, + { + "epoch": 8.12, + "learning_rate": 4.8985074626865674e-05, + "loss": 0.0025, + "step": 8704 + }, + { + "epoch": 8.12, + "learning_rate": 4.898460820895522e-05, + "loss": 0.0019, + "step": 8708 + }, + { + "epoch": 8.13, + "learning_rate": 4.898414179104478e-05, + "loss": 0.0063, + "step": 8712 + }, + { + "epoch": 8.13, + "learning_rate": 4.898367537313433e-05, + "loss": 0.0054, + "step": 8716 + }, + { + "epoch": 8.13, + "learning_rate": 4.898320895522388e-05, + "loss": 0.004, + "step": 8720 + }, + { + "epoch": 8.14, + "learning_rate": 4.8982742537313435e-05, + "loss": 0.0049, + "step": 8724 + }, + { + "epoch": 8.14, + "learning_rate": 4.898227611940299e-05, + "loss": 0.0072, + "step": 8728 + }, + { + "epoch": 8.15, + "learning_rate": 4.898180970149254e-05, + "loss": 0.0028, + "step": 8732 + }, + { + "epoch": 8.15, + "learning_rate": 4.898134328358209e-05, + "loss": 0.0143, + "step": 8736 + }, + { + "epoch": 8.15, + "learning_rate": 4.898087686567164e-05, + "loss": 0.0043, + "step": 8740 + }, + { + "epoch": 8.16, + "learning_rate": 4.8980410447761196e-05, + "loss": 0.0136, + "step": 8744 + }, + { + "epoch": 8.16, + "learning_rate": 4.897994402985075e-05, + "loss": 0.0029, + "step": 8748 + }, + { + "epoch": 8.16, + "learning_rate": 4.89794776119403e-05, + "loss": 0.0028, + "step": 8752 + }, + { + "epoch": 8.17, + "learning_rate": 4.8979011194029854e-05, + "loss": 0.0205, + "step": 8756 + }, + { + "epoch": 8.17, + "learning_rate": 4.897854477611941e-05, + "loss": 0.005, + "step": 8760 + }, + { + "epoch": 8.18, + "learning_rate": 4.897807835820896e-05, + "loss": 0.0037, + "step": 8764 + }, + { + "epoch": 8.18, + "learning_rate": 4.897761194029851e-05, + "loss": 0.0069, + "step": 8768 + }, + { + "epoch": 8.18, + "learning_rate": 4.897714552238806e-05, + "loss": 0.0075, + "step": 8772 + }, + { + "epoch": 8.19, + "learning_rate": 4.8976679104477615e-05, + "loss": 0.0102, + "step": 8776 + }, + { + "epoch": 8.19, + "learning_rate": 4.897621268656717e-05, + "loss": 0.0185, + "step": 8780 + }, + { + "epoch": 8.19, + "learning_rate": 4.897574626865672e-05, + "loss": 0.0029, + "step": 8784 + }, + { + "epoch": 8.2, + "learning_rate": 4.8975279850746266e-05, + "loss": 0.003, + "step": 8788 + }, + { + "epoch": 8.2, + "learning_rate": 4.897481343283583e-05, + "loss": 0.0142, + "step": 8792 + }, + { + "epoch": 8.21, + "learning_rate": 4.8974347014925376e-05, + "loss": 0.0086, + "step": 8796 + }, + { + "epoch": 8.21, + "learning_rate": 4.8973880597014924e-05, + "loss": 0.0037, + "step": 8800 + }, + { + "epoch": 8.21, + "learning_rate": 4.897341417910448e-05, + "loss": 0.0189, + "step": 8804 + }, + { + "epoch": 8.22, + "learning_rate": 4.8972947761194034e-05, + "loss": 0.0031, + "step": 8808 + }, + { + "epoch": 8.22, + "learning_rate": 4.897248134328358e-05, + "loss": 0.0061, + "step": 8812 + }, + { + "epoch": 8.22, + "learning_rate": 4.897201492537314e-05, + "loss": 0.0047, + "step": 8816 + }, + { + "epoch": 8.23, + "learning_rate": 4.897154850746269e-05, + "loss": 0.0041, + "step": 8820 + }, + { + "epoch": 8.23, + "learning_rate": 4.897108208955224e-05, + "loss": 0.0055, + "step": 8824 + }, + { + "epoch": 8.24, + "learning_rate": 4.8970615671641795e-05, + "loss": 0.0222, + "step": 8828 + }, + { + "epoch": 8.24, + "learning_rate": 4.897014925373134e-05, + "loss": 0.0059, + "step": 8832 + }, + { + "epoch": 8.24, + "learning_rate": 4.89696828358209e-05, + "loss": 0.016, + "step": 8836 + }, + { + "epoch": 8.25, + "learning_rate": 4.896921641791045e-05, + "loss": 0.0095, + "step": 8840 + }, + { + "epoch": 8.25, + "learning_rate": 4.896875e-05, + "loss": 0.0062, + "step": 8844 + }, + { + "epoch": 8.25, + "learning_rate": 4.896828358208955e-05, + "loss": 0.0012, + "step": 8848 + }, + { + "epoch": 8.26, + "learning_rate": 4.896781716417911e-05, + "loss": 0.0032, + "step": 8852 + }, + { + "epoch": 8.26, + "learning_rate": 4.896735074626866e-05, + "loss": 0.0146, + "step": 8856 + }, + { + "epoch": 8.26, + "learning_rate": 4.896688432835821e-05, + "loss": 0.0123, + "step": 8860 + }, + { + "epoch": 8.27, + "learning_rate": 4.896641791044776e-05, + "loss": 0.0066, + "step": 8864 + }, + { + "epoch": 8.27, + "learning_rate": 4.896595149253732e-05, + "loss": 0.0026, + "step": 8868 + }, + { + "epoch": 8.28, + "learning_rate": 4.8965485074626865e-05, + "loss": 0.0051, + "step": 8872 + }, + { + "epoch": 8.28, + "learning_rate": 4.896501865671642e-05, + "loss": 0.0045, + "step": 8876 + }, + { + "epoch": 8.28, + "learning_rate": 4.8964552238805975e-05, + "loss": 0.004, + "step": 8880 + }, + { + "epoch": 8.29, + "learning_rate": 4.896408582089552e-05, + "loss": 0.0113, + "step": 8884 + }, + { + "epoch": 8.29, + "learning_rate": 4.896361940298508e-05, + "loss": 0.005, + "step": 8888 + }, + { + "epoch": 8.29, + "learning_rate": 4.8963152985074626e-05, + "loss": 0.0062, + "step": 8892 + }, + { + "epoch": 8.3, + "learning_rate": 4.896268656716418e-05, + "loss": 0.0053, + "step": 8896 + }, + { + "epoch": 8.3, + "learning_rate": 4.8962220149253736e-05, + "loss": 0.0049, + "step": 8900 + }, + { + "epoch": 8.31, + "learning_rate": 4.8961753731343284e-05, + "loss": 0.0037, + "step": 8904 + }, + { + "epoch": 8.31, + "learning_rate": 4.896128731343284e-05, + "loss": 0.0359, + "step": 8908 + }, + { + "epoch": 8.31, + "learning_rate": 4.8960820895522394e-05, + "loss": 0.0034, + "step": 8912 + }, + { + "epoch": 8.32, + "learning_rate": 4.896035447761194e-05, + "loss": 0.0019, + "step": 8916 + }, + { + "epoch": 8.32, + "learning_rate": 4.89598880597015e-05, + "loss": 0.0039, + "step": 8920 + }, + { + "epoch": 8.32, + "learning_rate": 4.8959421641791045e-05, + "loss": 0.0091, + "step": 8924 + }, + { + "epoch": 8.33, + "learning_rate": 4.89589552238806e-05, + "loss": 0.0103, + "step": 8928 + }, + { + "epoch": 8.33, + "learning_rate": 4.8958488805970155e-05, + "loss": 0.0076, + "step": 8932 + }, + { + "epoch": 8.34, + "learning_rate": 4.89580223880597e-05, + "loss": 0.0653, + "step": 8936 + }, + { + "epoch": 8.34, + "learning_rate": 4.895755597014926e-05, + "loss": 0.0164, + "step": 8940 + }, + { + "epoch": 8.34, + "learning_rate": 4.895708955223881e-05, + "loss": 0.0133, + "step": 8944 + }, + { + "epoch": 8.35, + "learning_rate": 4.895662313432836e-05, + "loss": 0.0063, + "step": 8948 + }, + { + "epoch": 8.35, + "learning_rate": 4.895615671641791e-05, + "loss": 0.0046, + "step": 8952 + }, + { + "epoch": 8.35, + "learning_rate": 4.8955690298507464e-05, + "loss": 0.002, + "step": 8956 + }, + { + "epoch": 8.36, + "learning_rate": 4.895522388059702e-05, + "loss": 0.0034, + "step": 8960 + }, + { + "epoch": 8.36, + "learning_rate": 4.895475746268657e-05, + "loss": 0.003, + "step": 8964 + }, + { + "epoch": 8.37, + "learning_rate": 4.895429104477612e-05, + "loss": 0.0112, + "step": 8968 + }, + { + "epoch": 8.37, + "learning_rate": 4.895382462686568e-05, + "loss": 0.0156, + "step": 8972 + }, + { + "epoch": 8.37, + "learning_rate": 4.8953358208955225e-05, + "loss": 0.0045, + "step": 8976 + }, + { + "epoch": 8.38, + "learning_rate": 4.895289179104478e-05, + "loss": 0.003, + "step": 8980 + }, + { + "epoch": 8.38, + "learning_rate": 4.895242537313433e-05, + "loss": 0.0061, + "step": 8984 + }, + { + "epoch": 8.38, + "learning_rate": 4.895195895522388e-05, + "loss": 0.0125, + "step": 8988 + }, + { + "epoch": 8.39, + "learning_rate": 4.895149253731344e-05, + "loss": 0.0068, + "step": 8992 + }, + { + "epoch": 8.39, + "learning_rate": 4.8951026119402986e-05, + "loss": 0.0072, + "step": 8996 + }, + { + "epoch": 8.4, + "learning_rate": 4.895055970149254e-05, + "loss": 0.0054, + "step": 9000 + }, + { + "epoch": 8.4, + "eval_exact_match": 0.7321083172147002, + "eval_exec": 0.7572533849129593, + "eval_loss": 0.24979576468467712, + "eval_runtime": 1232.0481, + "eval_samples_per_second": 0.839, + "step": 9000 + }, + { + "epoch": 8.4, + "learning_rate": 4.8950093283582096e-05, + "loss": 0.0038, + "step": 9004 + }, + { + "epoch": 8.4, + "learning_rate": 4.8949626865671644e-05, + "loss": 0.007, + "step": 9008 + }, + { + "epoch": 8.41, + "learning_rate": 4.894916044776119e-05, + "loss": 0.0187, + "step": 9012 + }, + { + "epoch": 8.41, + "learning_rate": 4.894869402985075e-05, + "loss": 0.0081, + "step": 9016 + }, + { + "epoch": 8.41, + "learning_rate": 4.89482276119403e-05, + "loss": 0.0047, + "step": 9020 + }, + { + "epoch": 8.42, + "learning_rate": 4.894776119402985e-05, + "loss": 0.0043, + "step": 9024 + }, + { + "epoch": 8.42, + "learning_rate": 4.8947294776119405e-05, + "loss": 0.028, + "step": 9028 + }, + { + "epoch": 8.43, + "learning_rate": 4.894682835820896e-05, + "loss": 0.0063, + "step": 9032 + }, + { + "epoch": 8.43, + "learning_rate": 4.894636194029851e-05, + "loss": 0.0072, + "step": 9036 + }, + { + "epoch": 8.43, + "learning_rate": 4.894589552238806e-05, + "loss": 0.0169, + "step": 9040 + }, + { + "epoch": 8.44, + "learning_rate": 4.894542910447761e-05, + "loss": 0.0325, + "step": 9044 + }, + { + "epoch": 8.44, + "learning_rate": 4.8944962686567166e-05, + "loss": 0.0085, + "step": 9048 + }, + { + "epoch": 8.44, + "learning_rate": 4.894449626865672e-05, + "loss": 0.0022, + "step": 9052 + }, + { + "epoch": 8.45, + "learning_rate": 4.894402985074627e-05, + "loss": 0.0097, + "step": 9056 + }, + { + "epoch": 8.45, + "learning_rate": 4.8943563432835824e-05, + "loss": 0.0049, + "step": 9060 + }, + { + "epoch": 8.46, + "learning_rate": 4.894309701492538e-05, + "loss": 0.0099, + "step": 9064 + }, + { + "epoch": 8.46, + "learning_rate": 4.894263059701493e-05, + "loss": 0.003, + "step": 9068 + }, + { + "epoch": 8.46, + "learning_rate": 4.894216417910448e-05, + "loss": 0.0065, + "step": 9072 + }, + { + "epoch": 8.47, + "learning_rate": 4.894169776119403e-05, + "loss": 0.0048, + "step": 9076 + }, + { + "epoch": 8.47, + "learning_rate": 4.8941231343283585e-05, + "loss": 0.0018, + "step": 9080 + }, + { + "epoch": 8.47, + "learning_rate": 4.894076492537314e-05, + "loss": 0.0039, + "step": 9084 + }, + { + "epoch": 8.48, + "learning_rate": 4.894029850746269e-05, + "loss": 0.0058, + "step": 9088 + }, + { + "epoch": 8.48, + "learning_rate": 4.893983208955224e-05, + "loss": 0.0059, + "step": 9092 + }, + { + "epoch": 8.49, + "learning_rate": 4.89393656716418e-05, + "loss": 0.0059, + "step": 9096 + }, + { + "epoch": 8.49, + "learning_rate": 4.8938899253731346e-05, + "loss": 0.0076, + "step": 9100 + }, + { + "epoch": 8.49, + "learning_rate": 4.8938432835820894e-05, + "loss": 0.0044, + "step": 9104 + }, + { + "epoch": 8.5, + "learning_rate": 4.8937966417910456e-05, + "loss": 0.0052, + "step": 9108 + }, + { + "epoch": 8.5, + "learning_rate": 4.8937500000000004e-05, + "loss": 0.0091, + "step": 9112 + }, + { + "epoch": 8.5, + "learning_rate": 4.893703358208955e-05, + "loss": 0.007, + "step": 9116 + }, + { + "epoch": 8.51, + "learning_rate": 4.893656716417911e-05, + "loss": 0.0019, + "step": 9120 + }, + { + "epoch": 8.51, + "learning_rate": 4.893610074626866e-05, + "loss": 0.0059, + "step": 9124 + }, + { + "epoch": 8.51, + "learning_rate": 4.893563432835821e-05, + "loss": 0.0049, + "step": 9128 + }, + { + "epoch": 8.52, + "learning_rate": 4.8935167910447765e-05, + "loss": 0.0033, + "step": 9132 + }, + { + "epoch": 8.52, + "learning_rate": 4.893470149253731e-05, + "loss": 0.0168, + "step": 9136 + }, + { + "epoch": 8.53, + "learning_rate": 4.893423507462687e-05, + "loss": 0.0114, + "step": 9140 + }, + { + "epoch": 8.53, + "learning_rate": 4.893376865671642e-05, + "loss": 0.0085, + "step": 9144 + }, + { + "epoch": 8.53, + "learning_rate": 4.893330223880597e-05, + "loss": 0.0111, + "step": 9148 + }, + { + "epoch": 8.54, + "learning_rate": 4.8932835820895526e-05, + "loss": 0.0053, + "step": 9152 + }, + { + "epoch": 8.54, + "learning_rate": 4.893236940298508e-05, + "loss": 0.0014, + "step": 9156 + }, + { + "epoch": 8.54, + "learning_rate": 4.893190298507463e-05, + "loss": 0.008, + "step": 9160 + }, + { + "epoch": 8.55, + "learning_rate": 4.893143656716418e-05, + "loss": 0.0079, + "step": 9164 + }, + { + "epoch": 8.55, + "learning_rate": 4.893097014925374e-05, + "loss": 0.0069, + "step": 9168 + }, + { + "epoch": 8.56, + "learning_rate": 4.893050373134329e-05, + "loss": 0.0094, + "step": 9172 + }, + { + "epoch": 8.56, + "learning_rate": 4.8930037313432835e-05, + "loss": 0.0086, + "step": 9176 + }, + { + "epoch": 8.56, + "learning_rate": 4.892957089552239e-05, + "loss": 0.0151, + "step": 9180 + }, + { + "epoch": 8.57, + "learning_rate": 4.8929104477611945e-05, + "loss": 0.0061, + "step": 9184 + }, + { + "epoch": 8.57, + "learning_rate": 4.892863805970149e-05, + "loss": 0.0031, + "step": 9188 + }, + { + "epoch": 8.57, + "learning_rate": 4.892817164179105e-05, + "loss": 0.013, + "step": 9192 + }, + { + "epoch": 8.58, + "learning_rate": 4.8927705223880596e-05, + "loss": 0.0115, + "step": 9196 + }, + { + "epoch": 8.58, + "learning_rate": 4.892723880597015e-05, + "loss": 0.0132, + "step": 9200 + }, + { + "epoch": 8.59, + "learning_rate": 4.8926772388059706e-05, + "loss": 0.016, + "step": 9204 + }, + { + "epoch": 8.59, + "learning_rate": 4.8926305970149254e-05, + "loss": 0.0091, + "step": 9208 + }, + { + "epoch": 8.59, + "learning_rate": 4.892583955223881e-05, + "loss": 0.0022, + "step": 9212 + }, + { + "epoch": 8.6, + "learning_rate": 4.8925373134328364e-05, + "loss": 0.006, + "step": 9216 + }, + { + "epoch": 8.6, + "learning_rate": 4.892490671641791e-05, + "loss": 0.01, + "step": 9220 + }, + { + "epoch": 8.6, + "learning_rate": 4.892444029850746e-05, + "loss": 0.0023, + "step": 9224 + }, + { + "epoch": 8.61, + "learning_rate": 4.892397388059702e-05, + "loss": 0.0046, + "step": 9228 + }, + { + "epoch": 8.61, + "learning_rate": 4.892350746268657e-05, + "loss": 0.0106, + "step": 9232 + }, + { + "epoch": 8.62, + "learning_rate": 4.8923041044776124e-05, + "loss": 0.0152, + "step": 9236 + }, + { + "epoch": 8.62, + "learning_rate": 4.892257462686567e-05, + "loss": 0.0108, + "step": 9240 + }, + { + "epoch": 8.62, + "learning_rate": 4.892210820895523e-05, + "loss": 0.0146, + "step": 9244 + }, + { + "epoch": 8.63, + "learning_rate": 4.892164179104478e-05, + "loss": 0.0046, + "step": 9248 + }, + { + "epoch": 8.63, + "learning_rate": 4.892117537313433e-05, + "loss": 0.0106, + "step": 9252 + }, + { + "epoch": 8.63, + "learning_rate": 4.892070895522388e-05, + "loss": 0.002, + "step": 9256 + }, + { + "epoch": 8.64, + "learning_rate": 4.892024253731344e-05, + "loss": 0.0041, + "step": 9260 + }, + { + "epoch": 8.64, + "learning_rate": 4.891977611940299e-05, + "loss": 0.0086, + "step": 9264 + }, + { + "epoch": 8.65, + "learning_rate": 4.891930970149254e-05, + "loss": 0.0109, + "step": 9268 + }, + { + "epoch": 8.65, + "learning_rate": 4.891884328358209e-05, + "loss": 0.0073, + "step": 9272 + }, + { + "epoch": 8.65, + "learning_rate": 4.8918376865671646e-05, + "loss": 0.0168, + "step": 9276 + }, + { + "epoch": 8.66, + "learning_rate": 4.8917910447761195e-05, + "loss": 0.0122, + "step": 9280 + }, + { + "epoch": 8.66, + "learning_rate": 4.891744402985075e-05, + "loss": 0.0049, + "step": 9284 + }, + { + "epoch": 8.66, + "learning_rate": 4.8916977611940304e-05, + "loss": 0.008, + "step": 9288 + }, + { + "epoch": 8.67, + "learning_rate": 4.891651119402985e-05, + "loss": 0.0071, + "step": 9292 + }, + { + "epoch": 8.67, + "learning_rate": 4.891604477611941e-05, + "loss": 0.0187, + "step": 9296 + }, + { + "epoch": 8.68, + "learning_rate": 4.8915578358208956e-05, + "loss": 0.006, + "step": 9300 + }, + { + "epoch": 8.68, + "learning_rate": 4.891511194029851e-05, + "loss": 0.0052, + "step": 9304 + }, + { + "epoch": 8.68, + "learning_rate": 4.8914645522388065e-05, + "loss": 0.0079, + "step": 9308 + }, + { + "epoch": 8.69, + "learning_rate": 4.8914179104477614e-05, + "loss": 0.0029, + "step": 9312 + }, + { + "epoch": 8.69, + "learning_rate": 4.891371268656716e-05, + "loss": 0.0057, + "step": 9316 + }, + { + "epoch": 8.69, + "learning_rate": 4.891324626865672e-05, + "loss": 0.0256, + "step": 9320 + }, + { + "epoch": 8.7, + "learning_rate": 4.891277985074627e-05, + "loss": 0.0075, + "step": 9324 + }, + { + "epoch": 8.7, + "learning_rate": 4.891231343283582e-05, + "loss": 0.0223, + "step": 9328 + }, + { + "epoch": 8.71, + "learning_rate": 4.8911847014925375e-05, + "loss": 0.013, + "step": 9332 + }, + { + "epoch": 8.71, + "learning_rate": 4.891138059701493e-05, + "loss": 0.0038, + "step": 9336 + }, + { + "epoch": 8.71, + "learning_rate": 4.891091417910448e-05, + "loss": 0.014, + "step": 9340 + }, + { + "epoch": 8.72, + "learning_rate": 4.891044776119403e-05, + "loss": 0.004, + "step": 9344 + }, + { + "epoch": 8.72, + "learning_rate": 4.890998134328359e-05, + "loss": 0.0035, + "step": 9348 + }, + { + "epoch": 8.72, + "learning_rate": 4.8909514925373136e-05, + "loss": 0.0139, + "step": 9352 + }, + { + "epoch": 8.73, + "learning_rate": 4.890904850746269e-05, + "loss": 0.0054, + "step": 9356 + }, + { + "epoch": 8.73, + "learning_rate": 4.890858208955224e-05, + "loss": 0.007, + "step": 9360 + }, + { + "epoch": 8.73, + "learning_rate": 4.8908115671641793e-05, + "loss": 0.0136, + "step": 9364 + }, + { + "epoch": 8.74, + "learning_rate": 4.890764925373135e-05, + "loss": 0.0025, + "step": 9368 + }, + { + "epoch": 8.74, + "learning_rate": 4.8907182835820896e-05, + "loss": 0.0116, + "step": 9372 + }, + { + "epoch": 8.75, + "learning_rate": 4.8906716417910445e-05, + "loss": 0.013, + "step": 9376 + }, + { + "epoch": 8.75, + "learning_rate": 4.8906250000000006e-05, + "loss": 0.0061, + "step": 9380 + }, + { + "epoch": 8.75, + "learning_rate": 4.8905783582089554e-05, + "loss": 0.0027, + "step": 9384 + }, + { + "epoch": 8.76, + "learning_rate": 4.89053171641791e-05, + "loss": 0.0024, + "step": 9388 + }, + { + "epoch": 8.76, + "learning_rate": 4.890485074626866e-05, + "loss": 0.0027, + "step": 9392 + }, + { + "epoch": 8.76, + "learning_rate": 4.890438432835821e-05, + "loss": 0.005, + "step": 9396 + }, + { + "epoch": 8.77, + "learning_rate": 4.890391791044777e-05, + "loss": 0.0056, + "step": 9400 + }, + { + "epoch": 8.77, + "learning_rate": 4.8903451492537315e-05, + "loss": 0.0092, + "step": 9404 + }, + { + "epoch": 8.78, + "learning_rate": 4.890298507462687e-05, + "loss": 0.0028, + "step": 9408 + }, + { + "epoch": 8.78, + "learning_rate": 4.8902518656716425e-05, + "loss": 0.0263, + "step": 9412 + }, + { + "epoch": 8.78, + "learning_rate": 4.890205223880597e-05, + "loss": 0.0037, + "step": 9416 + }, + { + "epoch": 8.79, + "learning_rate": 4.890158582089552e-05, + "loss": 0.0013, + "step": 9420 + }, + { + "epoch": 8.79, + "learning_rate": 4.8901119402985076e-05, + "loss": 0.0043, + "step": 9424 + }, + { + "epoch": 8.79, + "learning_rate": 4.890065298507463e-05, + "loss": 0.016, + "step": 9428 + }, + { + "epoch": 8.8, + "learning_rate": 4.890018656716418e-05, + "loss": 0.0099, + "step": 9432 + }, + { + "epoch": 8.8, + "learning_rate": 4.8899720149253734e-05, + "loss": 0.0034, + "step": 9436 + }, + { + "epoch": 8.81, + "learning_rate": 4.889925373134329e-05, + "loss": 0.0022, + "step": 9440 + }, + { + "epoch": 8.81, + "learning_rate": 4.889878731343284e-05, + "loss": 0.0035, + "step": 9444 + }, + { + "epoch": 8.81, + "learning_rate": 4.889832089552239e-05, + "loss": 0.0063, + "step": 9448 + }, + { + "epoch": 8.82, + "learning_rate": 4.889785447761194e-05, + "loss": 0.0102, + "step": 9452 + }, + { + "epoch": 8.82, + "learning_rate": 4.8897388059701495e-05, + "loss": 0.0085, + "step": 9456 + }, + { + "epoch": 8.82, + "learning_rate": 4.889692164179105e-05, + "loss": 0.0024, + "step": 9460 + }, + { + "epoch": 8.83, + "learning_rate": 4.88964552238806e-05, + "loss": 0.0029, + "step": 9464 + }, + { + "epoch": 8.83, + "learning_rate": 4.8895988805970147e-05, + "loss": 0.0111, + "step": 9468 + }, + { + "epoch": 8.84, + "learning_rate": 4.889552238805971e-05, + "loss": 0.0018, + "step": 9472 + }, + { + "epoch": 8.84, + "learning_rate": 4.8895055970149256e-05, + "loss": 0.005, + "step": 9476 + }, + { + "epoch": 8.84, + "learning_rate": 4.8894589552238804e-05, + "loss": 0.011, + "step": 9480 + }, + { + "epoch": 8.85, + "learning_rate": 4.889412313432836e-05, + "loss": 0.0065, + "step": 9484 + }, + { + "epoch": 8.85, + "learning_rate": 4.8893656716417914e-05, + "loss": 0.0071, + "step": 9488 + }, + { + "epoch": 8.85, + "learning_rate": 4.889319029850746e-05, + "loss": 0.0172, + "step": 9492 + }, + { + "epoch": 8.86, + "learning_rate": 4.889272388059702e-05, + "loss": 0.0057, + "step": 9496 + }, + { + "epoch": 8.86, + "learning_rate": 4.889225746268657e-05, + "loss": 0.0052, + "step": 9500 + }, + { + "epoch": 8.86, + "eval_exact_match": 0.718568665377176, + "eval_exec": 0.7475822050290135, + "eval_loss": 0.24757526814937592, + "eval_runtime": 1186.3447, + "eval_samples_per_second": 0.872, + "step": 9500 + }, + { + "epoch": 8.87, + "learning_rate": 4.889179104477612e-05, + "loss": 0.0078, + "step": 9504 + }, + { + "epoch": 8.87, + "learning_rate": 4.8891324626865675e-05, + "loss": 0.0044, + "step": 9508 + }, + { + "epoch": 8.87, + "learning_rate": 4.889085820895522e-05, + "loss": 0.0049, + "step": 9512 + }, + { + "epoch": 8.88, + "learning_rate": 4.889039179104478e-05, + "loss": 0.0046, + "step": 9516 + }, + { + "epoch": 8.88, + "learning_rate": 4.888992537313433e-05, + "loss": 0.0027, + "step": 9520 + }, + { + "epoch": 8.88, + "learning_rate": 4.888945895522388e-05, + "loss": 0.0052, + "step": 9524 + }, + { + "epoch": 8.89, + "learning_rate": 4.888899253731343e-05, + "loss": 0.0024, + "step": 9528 + }, + { + "epoch": 8.89, + "learning_rate": 4.888852611940299e-05, + "loss": 0.0125, + "step": 9532 + }, + { + "epoch": 8.9, + "learning_rate": 4.888805970149254e-05, + "loss": 0.0113, + "step": 9536 + }, + { + "epoch": 8.9, + "learning_rate": 4.888759328358209e-05, + "loss": 0.0031, + "step": 9540 + }, + { + "epoch": 8.9, + "learning_rate": 4.888712686567164e-05, + "loss": 0.0017, + "step": 9544 + }, + { + "epoch": 8.91, + "learning_rate": 4.88866604477612e-05, + "loss": 0.0101, + "step": 9548 + }, + { + "epoch": 8.91, + "learning_rate": 4.8886194029850745e-05, + "loss": 0.0128, + "step": 9552 + }, + { + "epoch": 8.91, + "learning_rate": 4.88857276119403e-05, + "loss": 0.002, + "step": 9556 + }, + { + "epoch": 8.92, + "learning_rate": 4.8885261194029855e-05, + "loss": 0.0032, + "step": 9560 + }, + { + "epoch": 8.92, + "learning_rate": 4.888479477611941e-05, + "loss": 0.0099, + "step": 9564 + }, + { + "epoch": 8.93, + "learning_rate": 4.888432835820896e-05, + "loss": 0.0095, + "step": 9568 + }, + { + "epoch": 8.93, + "learning_rate": 4.8883861940298506e-05, + "loss": 0.0131, + "step": 9572 + }, + { + "epoch": 8.93, + "learning_rate": 4.888339552238806e-05, + "loss": 0.004, + "step": 9576 + }, + { + "epoch": 8.94, + "learning_rate": 4.8882929104477616e-05, + "loss": 0.0112, + "step": 9580 + }, + { + "epoch": 8.94, + "learning_rate": 4.8882462686567164e-05, + "loss": 0.0112, + "step": 9584 + }, + { + "epoch": 8.94, + "learning_rate": 4.888199626865672e-05, + "loss": 0.0062, + "step": 9588 + }, + { + "epoch": 8.95, + "learning_rate": 4.8881529850746274e-05, + "loss": 0.0102, + "step": 9592 + }, + { + "epoch": 8.95, + "learning_rate": 4.888106343283582e-05, + "loss": 0.0127, + "step": 9596 + }, + { + "epoch": 8.96, + "learning_rate": 4.888059701492538e-05, + "loss": 0.0036, + "step": 9600 + }, + { + "epoch": 8.96, + "learning_rate": 4.8880130597014925e-05, + "loss": 0.0038, + "step": 9604 + }, + { + "epoch": 8.96, + "learning_rate": 4.887966417910448e-05, + "loss": 0.0052, + "step": 9608 + }, + { + "epoch": 8.97, + "learning_rate": 4.8879197761194035e-05, + "loss": 0.0101, + "step": 9612 + }, + { + "epoch": 8.97, + "learning_rate": 4.887873134328358e-05, + "loss": 0.0144, + "step": 9616 + }, + { + "epoch": 8.97, + "learning_rate": 4.887826492537314e-05, + "loss": 0.0155, + "step": 9620 + }, + { + "epoch": 8.98, + "learning_rate": 4.887779850746269e-05, + "loss": 0.0092, + "step": 9624 + }, + { + "epoch": 8.98, + "learning_rate": 4.887733208955224e-05, + "loss": 0.0083, + "step": 9628 + }, + { + "epoch": 8.98, + "learning_rate": 4.887686567164179e-05, + "loss": 0.0097, + "step": 9632 + }, + { + "epoch": 8.99, + "learning_rate": 4.8876399253731344e-05, + "loss": 0.0148, + "step": 9636 + }, + { + "epoch": 8.99, + "learning_rate": 4.88759328358209e-05, + "loss": 0.0022, + "step": 9640 + }, + { + "epoch": 9.0, + "learning_rate": 4.887546641791045e-05, + "loss": 0.0131, + "step": 9644 + }, + { + "epoch": 9.0, + "learning_rate": 4.8875e-05, + "loss": 0.0049, + "step": 9648 + }, + { + "epoch": 9.0, + "learning_rate": 4.887453358208956e-05, + "loss": 0.004, + "step": 9652 + }, + { + "epoch": 9.01, + "learning_rate": 4.8874067164179105e-05, + "loss": 0.0092, + "step": 9656 + }, + { + "epoch": 9.01, + "learning_rate": 4.887360074626866e-05, + "loss": 0.0087, + "step": 9660 + }, + { + "epoch": 9.01, + "learning_rate": 4.887313432835821e-05, + "loss": 0.0054, + "step": 9664 + }, + { + "epoch": 9.02, + "learning_rate": 4.887266791044776e-05, + "loss": 0.0041, + "step": 9668 + }, + { + "epoch": 9.02, + "learning_rate": 4.887220149253732e-05, + "loss": 0.0093, + "step": 9672 + }, + { + "epoch": 9.03, + "learning_rate": 4.8871735074626866e-05, + "loss": 0.005, + "step": 9676 + }, + { + "epoch": 9.03, + "learning_rate": 4.887126865671642e-05, + "loss": 0.0079, + "step": 9680 + }, + { + "epoch": 9.03, + "learning_rate": 4.8870802238805976e-05, + "loss": 0.0252, + "step": 9684 + }, + { + "epoch": 9.04, + "learning_rate": 4.8870335820895524e-05, + "loss": 0.0045, + "step": 9688 + }, + { + "epoch": 9.04, + "learning_rate": 4.886986940298507e-05, + "loss": 0.0063, + "step": 9692 + }, + { + "epoch": 9.04, + "learning_rate": 4.886940298507463e-05, + "loss": 0.0023, + "step": 9696 + }, + { + "epoch": 9.05, + "learning_rate": 4.886893656716418e-05, + "loss": 0.0042, + "step": 9700 + }, + { + "epoch": 9.05, + "learning_rate": 4.886847014925373e-05, + "loss": 0.0088, + "step": 9704 + }, + { + "epoch": 9.06, + "learning_rate": 4.8868003731343285e-05, + "loss": 0.0078, + "step": 9708 + }, + { + "epoch": 9.06, + "learning_rate": 4.886753731343284e-05, + "loss": 0.0094, + "step": 9712 + }, + { + "epoch": 9.06, + "learning_rate": 4.886707089552239e-05, + "loss": 0.0031, + "step": 9716 + }, + { + "epoch": 9.07, + "learning_rate": 4.886660447761194e-05, + "loss": 0.008, + "step": 9720 + }, + { + "epoch": 9.07, + "learning_rate": 4.886613805970149e-05, + "loss": 0.0017, + "step": 9724 + }, + { + "epoch": 9.07, + "learning_rate": 4.886567164179105e-05, + "loss": 0.007, + "step": 9728 + }, + { + "epoch": 9.08, + "learning_rate": 4.88652052238806e-05, + "loss": 0.0045, + "step": 9732 + }, + { + "epoch": 9.08, + "learning_rate": 4.886473880597015e-05, + "loss": 0.0098, + "step": 9736 + }, + { + "epoch": 9.09, + "learning_rate": 4.8864272388059704e-05, + "loss": 0.0131, + "step": 9740 + }, + { + "epoch": 9.09, + "learning_rate": 4.886380597014926e-05, + "loss": 0.0022, + "step": 9744 + }, + { + "epoch": 9.09, + "learning_rate": 4.886333955223881e-05, + "loss": 0.0044, + "step": 9748 + }, + { + "epoch": 9.1, + "learning_rate": 4.886287313432836e-05, + "loss": 0.0078, + "step": 9752 + }, + { + "epoch": 9.1, + "learning_rate": 4.886240671641791e-05, + "loss": 0.0047, + "step": 9756 + }, + { + "epoch": 9.1, + "learning_rate": 4.8861940298507465e-05, + "loss": 0.008, + "step": 9760 + }, + { + "epoch": 9.11, + "learning_rate": 4.886147388059702e-05, + "loss": 0.0065, + "step": 9764 + }, + { + "epoch": 9.11, + "learning_rate": 4.886100746268657e-05, + "loss": 0.0024, + "step": 9768 + }, + { + "epoch": 9.12, + "learning_rate": 4.886054104477612e-05, + "loss": 0.0123, + "step": 9772 + }, + { + "epoch": 9.12, + "learning_rate": 4.886007462686568e-05, + "loss": 0.0016, + "step": 9776 + }, + { + "epoch": 9.12, + "learning_rate": 4.8859608208955226e-05, + "loss": 0.0018, + "step": 9780 + }, + { + "epoch": 9.13, + "learning_rate": 4.8859141791044774e-05, + "loss": 0.0096, + "step": 9784 + }, + { + "epoch": 9.13, + "learning_rate": 4.8858675373134336e-05, + "loss": 0.0037, + "step": 9788 + }, + { + "epoch": 9.13, + "learning_rate": 4.8858208955223884e-05, + "loss": 0.0041, + "step": 9792 + }, + { + "epoch": 9.14, + "learning_rate": 4.885774253731343e-05, + "loss": 0.0031, + "step": 9796 + }, + { + "epoch": 9.14, + "learning_rate": 4.885727611940299e-05, + "loss": 0.0084, + "step": 9800 + }, + { + "epoch": 9.15, + "learning_rate": 4.885680970149254e-05, + "loss": 0.0042, + "step": 9804 + }, + { + "epoch": 9.15, + "learning_rate": 4.885634328358209e-05, + "loss": 0.0035, + "step": 9808 + }, + { + "epoch": 9.15, + "learning_rate": 4.8855876865671645e-05, + "loss": 0.008, + "step": 9812 + }, + { + "epoch": 9.16, + "learning_rate": 4.885541044776119e-05, + "loss": 0.0092, + "step": 9816 + }, + { + "epoch": 9.16, + "learning_rate": 4.885494402985075e-05, + "loss": 0.0018, + "step": 9820 + }, + { + "epoch": 9.16, + "learning_rate": 4.88544776119403e-05, + "loss": 0.0074, + "step": 9824 + }, + { + "epoch": 9.17, + "learning_rate": 4.885401119402985e-05, + "loss": 0.0033, + "step": 9828 + }, + { + "epoch": 9.17, + "learning_rate": 4.8853544776119406e-05, + "loss": 0.0062, + "step": 9832 + }, + { + "epoch": 9.18, + "learning_rate": 4.885307835820896e-05, + "loss": 0.0108, + "step": 9836 + }, + { + "epoch": 9.18, + "learning_rate": 4.885261194029851e-05, + "loss": 0.0013, + "step": 9840 + }, + { + "epoch": 9.18, + "learning_rate": 4.885214552238806e-05, + "loss": 0.0041, + "step": 9844 + }, + { + "epoch": 9.19, + "learning_rate": 4.885167910447762e-05, + "loss": 0.0033, + "step": 9848 + }, + { + "epoch": 9.19, + "learning_rate": 4.885121268656717e-05, + "loss": 0.0097, + "step": 9852 + }, + { + "epoch": 9.19, + "learning_rate": 4.8850746268656715e-05, + "loss": 0.0038, + "step": 9856 + }, + { + "epoch": 9.2, + "learning_rate": 4.885027985074627e-05, + "loss": 0.0038, + "step": 9860 + }, + { + "epoch": 9.2, + "learning_rate": 4.8849813432835825e-05, + "loss": 0.0096, + "step": 9864 + }, + { + "epoch": 9.21, + "learning_rate": 4.884934701492537e-05, + "loss": 0.0028, + "step": 9868 + }, + { + "epoch": 9.21, + "learning_rate": 4.884888059701493e-05, + "loss": 0.0026, + "step": 9872 + }, + { + "epoch": 9.21, + "learning_rate": 4.8848414179104476e-05, + "loss": 0.0077, + "step": 9876 + }, + { + "epoch": 9.22, + "learning_rate": 4.884794776119403e-05, + "loss": 0.0042, + "step": 9880 + }, + { + "epoch": 9.22, + "learning_rate": 4.8847481343283586e-05, + "loss": 0.0126, + "step": 9884 + }, + { + "epoch": 9.22, + "learning_rate": 4.8847014925373134e-05, + "loss": 0.0051, + "step": 9888 + }, + { + "epoch": 9.23, + "learning_rate": 4.884654850746269e-05, + "loss": 0.0021, + "step": 9892 + }, + { + "epoch": 9.23, + "learning_rate": 4.8846082089552244e-05, + "loss": 0.0047, + "step": 9896 + }, + { + "epoch": 9.24, + "learning_rate": 4.884561567164179e-05, + "loss": 0.0127, + "step": 9900 + }, + { + "epoch": 9.24, + "learning_rate": 4.884514925373135e-05, + "loss": 0.0092, + "step": 9904 + }, + { + "epoch": 9.24, + "learning_rate": 4.88446828358209e-05, + "loss": 0.0014, + "step": 9908 + }, + { + "epoch": 9.25, + "learning_rate": 4.884421641791045e-05, + "loss": 0.0029, + "step": 9912 + }, + { + "epoch": 9.25, + "learning_rate": 4.8843750000000005e-05, + "loss": 0.0095, + "step": 9916 + }, + { + "epoch": 9.25, + "learning_rate": 4.884328358208955e-05, + "loss": 0.0046, + "step": 9920 + }, + { + "epoch": 9.26, + "learning_rate": 4.884281716417911e-05, + "loss": 0.0055, + "step": 9924 + }, + { + "epoch": 9.26, + "learning_rate": 4.884235074626866e-05, + "loss": 0.0018, + "step": 9928 + }, + { + "epoch": 9.26, + "learning_rate": 4.884188432835821e-05, + "loss": 0.0035, + "step": 9932 + }, + { + "epoch": 9.27, + "learning_rate": 4.884141791044776e-05, + "loss": 0.0005, + "step": 9936 + }, + { + "epoch": 9.27, + "learning_rate": 4.884095149253732e-05, + "loss": 0.0007, + "step": 9940 + }, + { + "epoch": 9.28, + "learning_rate": 4.884048507462687e-05, + "loss": 0.0067, + "step": 9944 + }, + { + "epoch": 9.28, + "learning_rate": 4.884001865671642e-05, + "loss": 0.0027, + "step": 9948 + }, + { + "epoch": 9.28, + "learning_rate": 4.883955223880597e-05, + "loss": 0.0144, + "step": 9952 + }, + { + "epoch": 9.29, + "learning_rate": 4.883908582089553e-05, + "loss": 0.0074, + "step": 9956 + }, + { + "epoch": 9.29, + "learning_rate": 4.8838619402985075e-05, + "loss": 0.0047, + "step": 9960 + }, + { + "epoch": 9.29, + "learning_rate": 4.883815298507463e-05, + "loss": 0.0044, + "step": 9964 + }, + { + "epoch": 9.3, + "learning_rate": 4.8837686567164185e-05, + "loss": 0.0054, + "step": 9968 + }, + { + "epoch": 9.3, + "learning_rate": 4.883722014925373e-05, + "loss": 0.0024, + "step": 9972 + }, + { + "epoch": 9.31, + "learning_rate": 4.883675373134329e-05, + "loss": 0.0095, + "step": 9976 + }, + { + "epoch": 9.31, + "learning_rate": 4.8836287313432836e-05, + "loss": 0.0029, + "step": 9980 + }, + { + "epoch": 9.31, + "learning_rate": 4.883582089552239e-05, + "loss": 0.0036, + "step": 9984 + }, + { + "epoch": 9.32, + "learning_rate": 4.8835354477611946e-05, + "loss": 0.0026, + "step": 9988 + }, + { + "epoch": 9.32, + "learning_rate": 4.8834888059701494e-05, + "loss": 0.0053, + "step": 9992 + }, + { + "epoch": 9.32, + "learning_rate": 4.883442164179104e-05, + "loss": 0.0005, + "step": 9996 + }, + { + "epoch": 9.33, + "learning_rate": 4.8833955223880604e-05, + "loss": 0.005, + "step": 10000 + }, + { + "epoch": 9.33, + "eval_exact_match": 0.7166344294003868, + "eval_exec": 0.7543520309477756, + "eval_loss": 0.2650771737098694, + "eval_runtime": 1372.3967, + "eval_samples_per_second": 0.753, + "step": 10000 + }, + { + "epoch": 9.33, + "learning_rate": 4.883348880597015e-05, + "loss": 0.0067, + "step": 10004 + }, + { + "epoch": 9.34, + "learning_rate": 4.88330223880597e-05, + "loss": 0.0106, + "step": 10008 + }, + { + "epoch": 9.34, + "learning_rate": 4.8832555970149255e-05, + "loss": 0.003, + "step": 10012 + }, + { + "epoch": 9.34, + "learning_rate": 4.883208955223881e-05, + "loss": 0.0035, + "step": 10016 + }, + { + "epoch": 9.35, + "learning_rate": 4.883162313432836e-05, + "loss": 0.0091, + "step": 10020 + }, + { + "epoch": 9.35, + "learning_rate": 4.883115671641791e-05, + "loss": 0.0061, + "step": 10024 + }, + { + "epoch": 9.35, + "learning_rate": 4.883069029850747e-05, + "loss": 0.0104, + "step": 10028 + }, + { + "epoch": 9.36, + "learning_rate": 4.8830223880597016e-05, + "loss": 0.0064, + "step": 10032 + }, + { + "epoch": 9.36, + "learning_rate": 4.882975746268657e-05, + "loss": 0.0049, + "step": 10036 + }, + { + "epoch": 9.37, + "learning_rate": 4.882929104477612e-05, + "loss": 0.0095, + "step": 10040 + }, + { + "epoch": 9.37, + "learning_rate": 4.8828824626865674e-05, + "loss": 0.0055, + "step": 10044 + }, + { + "epoch": 9.37, + "learning_rate": 4.882835820895523e-05, + "loss": 0.0069, + "step": 10048 + }, + { + "epoch": 9.38, + "learning_rate": 4.882789179104478e-05, + "loss": 0.0087, + "step": 10052 + }, + { + "epoch": 9.38, + "learning_rate": 4.882742537313433e-05, + "loss": 0.006, + "step": 10056 + }, + { + "epoch": 9.38, + "learning_rate": 4.8826958955223887e-05, + "loss": 0.0081, + "step": 10060 + }, + { + "epoch": 9.39, + "learning_rate": 4.8826492537313435e-05, + "loss": 0.0057, + "step": 10064 + }, + { + "epoch": 9.39, + "learning_rate": 4.882602611940299e-05, + "loss": 0.0143, + "step": 10068 + }, + { + "epoch": 9.4, + "learning_rate": 4.882555970149254e-05, + "loss": 0.0006, + "step": 10072 + }, + { + "epoch": 9.4, + "learning_rate": 4.882509328358209e-05, + "loss": 0.0026, + "step": 10076 + }, + { + "epoch": 9.4, + "learning_rate": 4.882462686567165e-05, + "loss": 0.0029, + "step": 10080 + }, + { + "epoch": 9.41, + "learning_rate": 4.8824160447761196e-05, + "loss": 0.0095, + "step": 10084 + }, + { + "epoch": 9.41, + "learning_rate": 4.882369402985075e-05, + "loss": 0.0035, + "step": 10088 + }, + { + "epoch": 9.41, + "learning_rate": 4.8823227611940306e-05, + "loss": 0.005, + "step": 10092 + }, + { + "epoch": 9.42, + "learning_rate": 4.8822761194029854e-05, + "loss": 0.0078, + "step": 10096 + }, + { + "epoch": 9.42, + "learning_rate": 4.88222947761194e-05, + "loss": 0.0091, + "step": 10100 + }, + { + "epoch": 9.43, + "learning_rate": 4.882182835820896e-05, + "loss": 0.0016, + "step": 10104 + }, + { + "epoch": 9.43, + "learning_rate": 4.882136194029851e-05, + "loss": 0.0048, + "step": 10108 + }, + { + "epoch": 9.43, + "learning_rate": 4.882089552238806e-05, + "loss": 0.0057, + "step": 10112 + }, + { + "epoch": 9.44, + "learning_rate": 4.8820429104477615e-05, + "loss": 0.0101, + "step": 10116 + }, + { + "epoch": 9.44, + "learning_rate": 4.881996268656717e-05, + "loss": 0.0057, + "step": 10120 + }, + { + "epoch": 9.44, + "learning_rate": 4.881949626865672e-05, + "loss": 0.0196, + "step": 10124 + }, + { + "epoch": 9.45, + "learning_rate": 4.881902985074627e-05, + "loss": 0.0033, + "step": 10128 + }, + { + "epoch": 9.45, + "learning_rate": 4.881856343283582e-05, + "loss": 0.0071, + "step": 10132 + }, + { + "epoch": 9.46, + "learning_rate": 4.8818097014925376e-05, + "loss": 0.007, + "step": 10136 + }, + { + "epoch": 9.46, + "learning_rate": 4.881763059701493e-05, + "loss": 0.0107, + "step": 10140 + }, + { + "epoch": 9.46, + "learning_rate": 4.881716417910448e-05, + "loss": 0.0095, + "step": 10144 + }, + { + "epoch": 9.47, + "learning_rate": 4.881669776119403e-05, + "loss": 0.0059, + "step": 10148 + }, + { + "epoch": 9.47, + "learning_rate": 4.881623134328359e-05, + "loss": 0.0046, + "step": 10152 + }, + { + "epoch": 9.47, + "learning_rate": 4.881576492537314e-05, + "loss": 0.01, + "step": 10156 + }, + { + "epoch": 9.48, + "learning_rate": 4.8815298507462685e-05, + "loss": 0.0111, + "step": 10160 + }, + { + "epoch": 9.48, + "learning_rate": 4.881483208955224e-05, + "loss": 0.0019, + "step": 10164 + }, + { + "epoch": 9.49, + "learning_rate": 4.8814365671641795e-05, + "loss": 0.0036, + "step": 10168 + }, + { + "epoch": 9.49, + "learning_rate": 4.881389925373134e-05, + "loss": 0.0077, + "step": 10172 + }, + { + "epoch": 9.49, + "learning_rate": 4.88134328358209e-05, + "loss": 0.0048, + "step": 10176 + }, + { + "epoch": 9.5, + "learning_rate": 4.881296641791045e-05, + "loss": 0.0053, + "step": 10180 + }, + { + "epoch": 9.5, + "learning_rate": 4.88125e-05, + "loss": 0.009, + "step": 10184 + }, + { + "epoch": 9.5, + "learning_rate": 4.8812033582089556e-05, + "loss": 0.0019, + "step": 10188 + }, + { + "epoch": 9.51, + "learning_rate": 4.8811567164179104e-05, + "loss": 0.0117, + "step": 10192 + }, + { + "epoch": 9.51, + "learning_rate": 4.881110074626866e-05, + "loss": 0.0055, + "step": 10196 + }, + { + "epoch": 9.51, + "learning_rate": 4.8810634328358213e-05, + "loss": 0.0021, + "step": 10200 + }, + { + "epoch": 9.52, + "learning_rate": 4.881016791044776e-05, + "loss": 0.0033, + "step": 10204 + }, + { + "epoch": 9.52, + "learning_rate": 4.880970149253731e-05, + "loss": 0.0086, + "step": 10208 + }, + { + "epoch": 9.53, + "learning_rate": 4.880923507462687e-05, + "loss": 0.002, + "step": 10212 + }, + { + "epoch": 9.53, + "learning_rate": 4.880876865671642e-05, + "loss": 0.0155, + "step": 10216 + }, + { + "epoch": 9.53, + "learning_rate": 4.8808302238805974e-05, + "loss": 0.0059, + "step": 10220 + }, + { + "epoch": 9.54, + "learning_rate": 4.880783582089552e-05, + "loss": 0.0176, + "step": 10224 + }, + { + "epoch": 9.54, + "learning_rate": 4.880736940298508e-05, + "loss": 0.0054, + "step": 10228 + }, + { + "epoch": 9.54, + "learning_rate": 4.880690298507463e-05, + "loss": 0.0152, + "step": 10232 + }, + { + "epoch": 9.55, + "learning_rate": 4.880643656716418e-05, + "loss": 0.0048, + "step": 10236 + }, + { + "epoch": 9.55, + "learning_rate": 4.8805970149253735e-05, + "loss": 0.0032, + "step": 10240 + }, + { + "epoch": 9.56, + "learning_rate": 4.880550373134329e-05, + "loss": 0.0011, + "step": 10244 + }, + { + "epoch": 9.56, + "learning_rate": 4.880503731343284e-05, + "loss": 0.0076, + "step": 10248 + }, + { + "epoch": 9.56, + "learning_rate": 4.880457089552239e-05, + "loss": 0.0022, + "step": 10252 + }, + { + "epoch": 9.57, + "learning_rate": 4.880410447761195e-05, + "loss": 0.0075, + "step": 10256 + }, + { + "epoch": 9.57, + "learning_rate": 4.8803638059701496e-05, + "loss": 0.0046, + "step": 10260 + }, + { + "epoch": 9.57, + "learning_rate": 4.8803171641791045e-05, + "loss": 0.0122, + "step": 10264 + }, + { + "epoch": 9.58, + "learning_rate": 4.88027052238806e-05, + "loss": 0.0022, + "step": 10268 + }, + { + "epoch": 9.58, + "learning_rate": 4.8802238805970154e-05, + "loss": 0.0089, + "step": 10272 + }, + { + "epoch": 9.59, + "learning_rate": 4.88017723880597e-05, + "loss": 0.0036, + "step": 10276 + }, + { + "epoch": 9.59, + "learning_rate": 4.880130597014926e-05, + "loss": 0.0034, + "step": 10280 + }, + { + "epoch": 9.59, + "learning_rate": 4.8800839552238806e-05, + "loss": 0.0018, + "step": 10284 + }, + { + "epoch": 9.6, + "learning_rate": 4.880037313432836e-05, + "loss": 0.013, + "step": 10288 + }, + { + "epoch": 9.6, + "learning_rate": 4.8799906716417915e-05, + "loss": 0.0064, + "step": 10292 + }, + { + "epoch": 9.6, + "learning_rate": 4.8799440298507464e-05, + "loss": 0.0188, + "step": 10296 + }, + { + "epoch": 9.61, + "learning_rate": 4.879897388059702e-05, + "loss": 0.0066, + "step": 10300 + }, + { + "epoch": 9.61, + "learning_rate": 4.879850746268657e-05, + "loss": 0.002, + "step": 10304 + }, + { + "epoch": 9.62, + "learning_rate": 4.879804104477612e-05, + "loss": 0.0185, + "step": 10308 + }, + { + "epoch": 9.62, + "learning_rate": 4.879757462686567e-05, + "loss": 0.0018, + "step": 10312 + }, + { + "epoch": 9.62, + "learning_rate": 4.8797108208955224e-05, + "loss": 0.0054, + "step": 10316 + }, + { + "epoch": 9.63, + "learning_rate": 4.879664179104478e-05, + "loss": 0.0053, + "step": 10320 + }, + { + "epoch": 9.63, + "learning_rate": 4.879617537313433e-05, + "loss": 0.0041, + "step": 10324 + }, + { + "epoch": 9.63, + "learning_rate": 4.879570895522388e-05, + "loss": 0.0044, + "step": 10328 + }, + { + "epoch": 9.64, + "learning_rate": 4.879524253731344e-05, + "loss": 0.0053, + "step": 10332 + }, + { + "epoch": 9.64, + "learning_rate": 4.8794776119402985e-05, + "loss": 0.0015, + "step": 10336 + }, + { + "epoch": 9.65, + "learning_rate": 4.879430970149254e-05, + "loss": 0.0031, + "step": 10340 + }, + { + "epoch": 9.65, + "learning_rate": 4.879384328358209e-05, + "loss": 0.0079, + "step": 10344 + }, + { + "epoch": 9.65, + "learning_rate": 4.8793376865671643e-05, + "loss": 0.0049, + "step": 10348 + }, + { + "epoch": 9.66, + "learning_rate": 4.87929104477612e-05, + "loss": 0.0065, + "step": 10352 + }, + { + "epoch": 9.66, + "learning_rate": 4.8792444029850746e-05, + "loss": 0.0036, + "step": 10356 + }, + { + "epoch": 9.66, + "learning_rate": 4.87919776119403e-05, + "loss": 0.004, + "step": 10360 + }, + { + "epoch": 9.67, + "learning_rate": 4.8791511194029856e-05, + "loss": 0.0037, + "step": 10364 + }, + { + "epoch": 9.67, + "learning_rate": 4.8791044776119404e-05, + "loss": 0.007, + "step": 10368 + }, + { + "epoch": 9.68, + "learning_rate": 4.879057835820895e-05, + "loss": 0.0059, + "step": 10372 + }, + { + "epoch": 9.68, + "learning_rate": 4.879011194029851e-05, + "loss": 0.0114, + "step": 10376 + }, + { + "epoch": 9.68, + "learning_rate": 4.878964552238806e-05, + "loss": 0.0061, + "step": 10380 + }, + { + "epoch": 9.69, + "learning_rate": 4.878917910447762e-05, + "loss": 0.0068, + "step": 10384 + }, + { + "epoch": 9.69, + "learning_rate": 4.8788712686567165e-05, + "loss": 0.0023, + "step": 10388 + }, + { + "epoch": 9.69, + "learning_rate": 4.878824626865672e-05, + "loss": 0.0013, + "step": 10392 + }, + { + "epoch": 9.7, + "learning_rate": 4.8787779850746275e-05, + "loss": 0.018, + "step": 10396 + }, + { + "epoch": 9.7, + "learning_rate": 4.878731343283582e-05, + "loss": 0.0046, + "step": 10400 + }, + { + "epoch": 9.71, + "learning_rate": 4.878684701492537e-05, + "loss": 0.0023, + "step": 10404 + }, + { + "epoch": 9.71, + "learning_rate": 4.878638059701493e-05, + "loss": 0.0012, + "step": 10408 + }, + { + "epoch": 9.71, + "learning_rate": 4.878591417910448e-05, + "loss": 0.0094, + "step": 10412 + }, + { + "epoch": 9.72, + "learning_rate": 4.878544776119403e-05, + "loss": 0.0019, + "step": 10416 + }, + { + "epoch": 9.72, + "learning_rate": 4.8784981343283584e-05, + "loss": 0.0021, + "step": 10420 + }, + { + "epoch": 9.72, + "learning_rate": 4.878451492537314e-05, + "loss": 0.0051, + "step": 10424 + }, + { + "epoch": 9.73, + "learning_rate": 4.878404850746269e-05, + "loss": 0.0066, + "step": 10428 + }, + { + "epoch": 9.73, + "learning_rate": 4.878358208955224e-05, + "loss": 0.0152, + "step": 10432 + }, + { + "epoch": 9.73, + "learning_rate": 4.878311567164179e-05, + "loss": 0.0071, + "step": 10436 + }, + { + "epoch": 9.74, + "learning_rate": 4.8782649253731345e-05, + "loss": 0.0122, + "step": 10440 + }, + { + "epoch": 9.74, + "learning_rate": 4.87821828358209e-05, + "loss": 0.0071, + "step": 10444 + }, + { + "epoch": 9.75, + "learning_rate": 4.878171641791045e-05, + "loss": 0.0062, + "step": 10448 + }, + { + "epoch": 9.75, + "learning_rate": 4.878125e-05, + "loss": 0.0091, + "step": 10452 + }, + { + "epoch": 9.75, + "learning_rate": 4.878078358208956e-05, + "loss": 0.0056, + "step": 10456 + }, + { + "epoch": 9.76, + "learning_rate": 4.8780317164179106e-05, + "loss": 0.0059, + "step": 10460 + }, + { + "epoch": 9.76, + "learning_rate": 4.8779850746268654e-05, + "loss": 0.0024, + "step": 10464 + }, + { + "epoch": 9.76, + "learning_rate": 4.8779384328358216e-05, + "loss": 0.0057, + "step": 10468 + }, + { + "epoch": 9.77, + "learning_rate": 4.8778917910447764e-05, + "loss": 0.0105, + "step": 10472 + }, + { + "epoch": 9.77, + "learning_rate": 4.877845149253731e-05, + "loss": 0.0117, + "step": 10476 + }, + { + "epoch": 9.78, + "learning_rate": 4.877798507462687e-05, + "loss": 0.0049, + "step": 10480 + }, + { + "epoch": 9.78, + "learning_rate": 4.877751865671642e-05, + "loss": 0.0039, + "step": 10484 + }, + { + "epoch": 9.78, + "learning_rate": 4.877705223880597e-05, + "loss": 0.0077, + "step": 10488 + }, + { + "epoch": 9.79, + "learning_rate": 4.8776585820895525e-05, + "loss": 0.0077, + "step": 10492 + }, + { + "epoch": 9.79, + "learning_rate": 4.877611940298507e-05, + "loss": 0.0067, + "step": 10496 + }, + { + "epoch": 9.79, + "learning_rate": 4.877565298507463e-05, + "loss": 0.0076, + "step": 10500 + }, + { + "epoch": 9.79, + "eval_exact_match": 0.7263056092843327, + "eval_exec": 0.746615087040619, + "eval_loss": 0.25435617566108704, + "eval_runtime": 1163.3507, + "eval_samples_per_second": 0.889, + "step": 10500 + }, + { + "epoch": 9.8, + "learning_rate": 4.877518656716418e-05, + "loss": 0.0073, + "step": 10504 + }, + { + "epoch": 9.8, + "learning_rate": 4.877472014925373e-05, + "loss": 0.0052, + "step": 10508 + }, + { + "epoch": 9.81, + "learning_rate": 4.8774253731343286e-05, + "loss": 0.0037, + "step": 10512 + }, + { + "epoch": 9.81, + "learning_rate": 4.877378731343284e-05, + "loss": 0.0038, + "step": 10516 + }, + { + "epoch": 9.81, + "learning_rate": 4.877332089552239e-05, + "loss": 0.0067, + "step": 10520 + }, + { + "epoch": 9.82, + "learning_rate": 4.877285447761194e-05, + "loss": 0.0172, + "step": 10524 + }, + { + "epoch": 9.82, + "learning_rate": 4.87723880597015e-05, + "loss": 0.0064, + "step": 10528 + }, + { + "epoch": 9.82, + "learning_rate": 4.877192164179105e-05, + "loss": 0.0025, + "step": 10532 + }, + { + "epoch": 9.83, + "learning_rate": 4.8771455223880595e-05, + "loss": 0.0071, + "step": 10536 + }, + { + "epoch": 9.83, + "learning_rate": 4.877098880597015e-05, + "loss": 0.0036, + "step": 10540 + }, + { + "epoch": 9.84, + "learning_rate": 4.8770522388059705e-05, + "loss": 0.002, + "step": 10544 + }, + { + "epoch": 9.84, + "learning_rate": 4.877005597014926e-05, + "loss": 0.0026, + "step": 10548 + }, + { + "epoch": 9.84, + "learning_rate": 4.876958955223881e-05, + "loss": 0.0103, + "step": 10552 + }, + { + "epoch": 9.85, + "learning_rate": 4.8769123134328356e-05, + "loss": 0.022, + "step": 10556 + }, + { + "epoch": 9.85, + "learning_rate": 4.876865671641792e-05, + "loss": 0.0156, + "step": 10560 + }, + { + "epoch": 9.85, + "learning_rate": 4.8768190298507466e-05, + "loss": 0.0061, + "step": 10564 + }, + { + "epoch": 9.86, + "learning_rate": 4.8767723880597014e-05, + "loss": 0.0035, + "step": 10568 + }, + { + "epoch": 9.86, + "learning_rate": 4.876725746268657e-05, + "loss": 0.0074, + "step": 10572 + }, + { + "epoch": 9.87, + "learning_rate": 4.8766791044776124e-05, + "loss": 0.0039, + "step": 10576 + }, + { + "epoch": 9.87, + "learning_rate": 4.876632462686567e-05, + "loss": 0.0038, + "step": 10580 + }, + { + "epoch": 9.87, + "learning_rate": 4.876585820895523e-05, + "loss": 0.0179, + "step": 10584 + }, + { + "epoch": 9.88, + "learning_rate": 4.876539179104478e-05, + "loss": 0.0066, + "step": 10588 + }, + { + "epoch": 9.88, + "learning_rate": 4.876492537313433e-05, + "loss": 0.0219, + "step": 10592 + }, + { + "epoch": 9.88, + "learning_rate": 4.8764458955223885e-05, + "loss": 0.0091, + "step": 10596 + }, + { + "epoch": 9.89, + "learning_rate": 4.876399253731343e-05, + "loss": 0.0163, + "step": 10600 + }, + { + "epoch": 9.89, + "learning_rate": 4.876352611940299e-05, + "loss": 0.0063, + "step": 10604 + }, + { + "epoch": 9.9, + "learning_rate": 4.876305970149254e-05, + "loss": 0.0016, + "step": 10608 + }, + { + "epoch": 9.9, + "learning_rate": 4.876259328358209e-05, + "loss": 0.0107, + "step": 10612 + }, + { + "epoch": 9.9, + "learning_rate": 4.876212686567164e-05, + "loss": 0.0038, + "step": 10616 + }, + { + "epoch": 9.91, + "learning_rate": 4.87616604477612e-05, + "loss": 0.0051, + "step": 10620 + }, + { + "epoch": 9.91, + "learning_rate": 4.876119402985075e-05, + "loss": 0.0046, + "step": 10624 + }, + { + "epoch": 9.91, + "learning_rate": 4.87607276119403e-05, + "loss": 0.0087, + "step": 10628 + }, + { + "epoch": 9.92, + "learning_rate": 4.876026119402985e-05, + "loss": 0.0055, + "step": 10632 + }, + { + "epoch": 9.92, + "learning_rate": 4.875979477611941e-05, + "loss": 0.0079, + "step": 10636 + }, + { + "epoch": 9.93, + "learning_rate": 4.8759328358208955e-05, + "loss": 0.0017, + "step": 10640 + }, + { + "epoch": 9.93, + "learning_rate": 4.875886194029851e-05, + "loss": 0.0048, + "step": 10644 + }, + { + "epoch": 9.93, + "learning_rate": 4.8758395522388065e-05, + "loss": 0.0052, + "step": 10648 + }, + { + "epoch": 9.94, + "learning_rate": 4.875792910447761e-05, + "loss": 0.0089, + "step": 10652 + }, + { + "epoch": 9.94, + "learning_rate": 4.875746268656717e-05, + "loss": 0.0057, + "step": 10656 + }, + { + "epoch": 9.94, + "learning_rate": 4.8756996268656716e-05, + "loss": 0.0079, + "step": 10660 + }, + { + "epoch": 9.95, + "learning_rate": 4.875652985074627e-05, + "loss": 0.0027, + "step": 10664 + }, + { + "epoch": 9.95, + "learning_rate": 4.8756063432835826e-05, + "loss": 0.0058, + "step": 10668 + }, + { + "epoch": 9.96, + "learning_rate": 4.8755597014925374e-05, + "loss": 0.0024, + "step": 10672 + }, + { + "epoch": 9.96, + "learning_rate": 4.875513059701492e-05, + "loss": 0.0095, + "step": 10676 + }, + { + "epoch": 9.96, + "learning_rate": 4.8754664179104484e-05, + "loss": 0.0044, + "step": 10680 + }, + { + "epoch": 9.97, + "learning_rate": 4.875419776119403e-05, + "loss": 0.0091, + "step": 10684 + }, + { + "epoch": 9.97, + "learning_rate": 4.875373134328358e-05, + "loss": 0.0044, + "step": 10688 + }, + { + "epoch": 9.97, + "learning_rate": 4.8753264925373135e-05, + "loss": 0.015, + "step": 10692 + }, + { + "epoch": 9.98, + "learning_rate": 4.875279850746269e-05, + "loss": 0.0052, + "step": 10696 + }, + { + "epoch": 9.98, + "learning_rate": 4.875233208955224e-05, + "loss": 0.0128, + "step": 10700 + }, + { + "epoch": 9.98, + "learning_rate": 4.875186567164179e-05, + "loss": 0.0034, + "step": 10704 + }, + { + "epoch": 9.99, + "learning_rate": 4.875139925373135e-05, + "loss": 0.0081, + "step": 10708 + }, + { + "epoch": 9.99, + "learning_rate": 4.87509328358209e-05, + "loss": 0.0177, + "step": 10712 + }, + { + "epoch": 10.0, + "learning_rate": 4.875046641791045e-05, + "loss": 0.0081, + "step": 10716 + }, + { + "epoch": 10.0, + "learning_rate": 4.875e-05, + "loss": 0.0112, + "step": 10720 + }, + { + "epoch": 10.0, + "learning_rate": 4.8749533582089554e-05, + "loss": 0.0034, + "step": 10724 + }, + { + "epoch": 10.01, + "learning_rate": 4.874906716417911e-05, + "loss": 0.0058, + "step": 10728 + }, + { + "epoch": 10.01, + "learning_rate": 4.874860074626866e-05, + "loss": 0.0068, + "step": 10732 + }, + { + "epoch": 10.01, + "learning_rate": 4.874813432835821e-05, + "loss": 0.003, + "step": 10736 + }, + { + "epoch": 10.02, + "learning_rate": 4.874766791044777e-05, + "loss": 0.0021, + "step": 10740 + }, + { + "epoch": 10.02, + "learning_rate": 4.8747201492537315e-05, + "loss": 0.0023, + "step": 10744 + }, + { + "epoch": 10.03, + "learning_rate": 4.874673507462687e-05, + "loss": 0.0125, + "step": 10748 + }, + { + "epoch": 10.03, + "learning_rate": 4.874626865671642e-05, + "loss": 0.0033, + "step": 10752 + }, + { + "epoch": 10.03, + "learning_rate": 4.874580223880597e-05, + "loss": 0.0033, + "step": 10756 + }, + { + "epoch": 10.04, + "learning_rate": 4.874533582089553e-05, + "loss": 0.004, + "step": 10760 + }, + { + "epoch": 10.04, + "learning_rate": 4.8744869402985076e-05, + "loss": 0.0077, + "step": 10764 + }, + { + "epoch": 10.04, + "learning_rate": 4.874440298507463e-05, + "loss": 0.0011, + "step": 10768 + }, + { + "epoch": 10.05, + "learning_rate": 4.8743936567164186e-05, + "loss": 0.018, + "step": 10772 + }, + { + "epoch": 10.05, + "learning_rate": 4.8743470149253734e-05, + "loss": 0.0127, + "step": 10776 + }, + { + "epoch": 10.06, + "learning_rate": 4.874300373134328e-05, + "loss": 0.0034, + "step": 10780 + }, + { + "epoch": 10.06, + "learning_rate": 4.874253731343284e-05, + "loss": 0.0034, + "step": 10784 + }, + { + "epoch": 10.06, + "learning_rate": 4.874207089552239e-05, + "loss": 0.0028, + "step": 10788 + }, + { + "epoch": 10.07, + "learning_rate": 4.874160447761194e-05, + "loss": 0.0033, + "step": 10792 + }, + { + "epoch": 10.07, + "learning_rate": 4.8741138059701495e-05, + "loss": 0.0063, + "step": 10796 + }, + { + "epoch": 10.07, + "learning_rate": 4.874067164179105e-05, + "loss": 0.0076, + "step": 10800 + }, + { + "epoch": 10.08, + "learning_rate": 4.87402052238806e-05, + "loss": 0.0073, + "step": 10804 + }, + { + "epoch": 10.08, + "learning_rate": 4.873973880597015e-05, + "loss": 0.0182, + "step": 10808 + }, + { + "epoch": 10.09, + "learning_rate": 4.87392723880597e-05, + "loss": 0.0039, + "step": 10812 + }, + { + "epoch": 10.09, + "learning_rate": 4.8738805970149256e-05, + "loss": 0.0044, + "step": 10816 + }, + { + "epoch": 10.09, + "learning_rate": 4.873833955223881e-05, + "loss": 0.0031, + "step": 10820 + }, + { + "epoch": 10.1, + "learning_rate": 4.873787313432836e-05, + "loss": 0.004, + "step": 10824 + }, + { + "epoch": 10.1, + "learning_rate": 4.873740671641791e-05, + "loss": 0.0014, + "step": 10828 + }, + { + "epoch": 10.1, + "learning_rate": 4.873694029850747e-05, + "loss": 0.0011, + "step": 10832 + }, + { + "epoch": 10.11, + "learning_rate": 4.873647388059702e-05, + "loss": 0.0087, + "step": 10836 + }, + { + "epoch": 10.11, + "learning_rate": 4.8736007462686565e-05, + "loss": 0.0035, + "step": 10840 + }, + { + "epoch": 10.12, + "learning_rate": 4.873554104477612e-05, + "loss": 0.0023, + "step": 10844 + }, + { + "epoch": 10.12, + "learning_rate": 4.8735074626865675e-05, + "loss": 0.0029, + "step": 10848 + }, + { + "epoch": 10.12, + "learning_rate": 4.873460820895522e-05, + "loss": 0.0066, + "step": 10852 + }, + { + "epoch": 10.13, + "learning_rate": 4.873414179104478e-05, + "loss": 0.0055, + "step": 10856 + }, + { + "epoch": 10.13, + "learning_rate": 4.873367537313433e-05, + "loss": 0.0012, + "step": 10860 + }, + { + "epoch": 10.13, + "learning_rate": 4.873320895522388e-05, + "loss": 0.0036, + "step": 10864 + }, + { + "epoch": 10.14, + "learning_rate": 4.8732742537313436e-05, + "loss": 0.0055, + "step": 10868 + }, + { + "epoch": 10.14, + "learning_rate": 4.8732276119402984e-05, + "loss": 0.006, + "step": 10872 + }, + { + "epoch": 10.15, + "learning_rate": 4.8731809701492546e-05, + "loss": 0.0038, + "step": 10876 + }, + { + "epoch": 10.15, + "learning_rate": 4.8731343283582094e-05, + "loss": 0.0035, + "step": 10880 + }, + { + "epoch": 10.15, + "learning_rate": 4.873087686567164e-05, + "loss": 0.0044, + "step": 10884 + }, + { + "epoch": 10.16, + "learning_rate": 4.87304104477612e-05, + "loss": 0.0012, + "step": 10888 + }, + { + "epoch": 10.16, + "learning_rate": 4.872994402985075e-05, + "loss": 0.0067, + "step": 10892 + }, + { + "epoch": 10.16, + "learning_rate": 4.87294776119403e-05, + "loss": 0.0057, + "step": 10896 + }, + { + "epoch": 10.17, + "learning_rate": 4.8729011194029855e-05, + "loss": 0.0012, + "step": 10900 + }, + { + "epoch": 10.17, + "learning_rate": 4.87285447761194e-05, + "loss": 0.0095, + "step": 10904 + }, + { + "epoch": 10.18, + "learning_rate": 4.872807835820896e-05, + "loss": 0.003, + "step": 10908 + }, + { + "epoch": 10.18, + "learning_rate": 4.872761194029851e-05, + "loss": 0.0048, + "step": 10912 + }, + { + "epoch": 10.18, + "learning_rate": 4.872714552238806e-05, + "loss": 0.0052, + "step": 10916 + }, + { + "epoch": 10.19, + "learning_rate": 4.8726679104477616e-05, + "loss": 0.0047, + "step": 10920 + }, + { + "epoch": 10.19, + "learning_rate": 4.872621268656717e-05, + "loss": 0.0015, + "step": 10924 + }, + { + "epoch": 10.19, + "learning_rate": 4.872574626865672e-05, + "loss": 0.0007, + "step": 10928 + }, + { + "epoch": 10.2, + "learning_rate": 4.872527985074627e-05, + "loss": 0.0024, + "step": 10932 + }, + { + "epoch": 10.2, + "learning_rate": 4.872481343283583e-05, + "loss": 0.003, + "step": 10936 + }, + { + "epoch": 10.21, + "learning_rate": 4.872434701492538e-05, + "loss": 0.0018, + "step": 10940 + }, + { + "epoch": 10.21, + "learning_rate": 4.8723880597014925e-05, + "loss": 0.0084, + "step": 10944 + }, + { + "epoch": 10.21, + "learning_rate": 4.872341417910448e-05, + "loss": 0.0087, + "step": 10948 + }, + { + "epoch": 10.22, + "learning_rate": 4.8722947761194035e-05, + "loss": 0.0147, + "step": 10952 + }, + { + "epoch": 10.22, + "learning_rate": 4.872248134328358e-05, + "loss": 0.0021, + "step": 10956 + }, + { + "epoch": 10.22, + "learning_rate": 4.872201492537314e-05, + "loss": 0.0037, + "step": 10960 + }, + { + "epoch": 10.23, + "learning_rate": 4.8721548507462686e-05, + "loss": 0.0021, + "step": 10964 + }, + { + "epoch": 10.23, + "learning_rate": 4.872108208955224e-05, + "loss": 0.0016, + "step": 10968 + }, + { + "epoch": 10.24, + "learning_rate": 4.8720615671641796e-05, + "loss": 0.003, + "step": 10972 + }, + { + "epoch": 10.24, + "learning_rate": 4.8720149253731344e-05, + "loss": 0.0034, + "step": 10976 + }, + { + "epoch": 10.24, + "learning_rate": 4.87196828358209e-05, + "loss": 0.0032, + "step": 10980 + }, + { + "epoch": 10.25, + "learning_rate": 4.8719216417910454e-05, + "loss": 0.0095, + "step": 10984 + }, + { + "epoch": 10.25, + "learning_rate": 4.871875e-05, + "loss": 0.0083, + "step": 10988 + }, + { + "epoch": 10.25, + "learning_rate": 4.871828358208955e-05, + "loss": 0.0156, + "step": 10992 + }, + { + "epoch": 10.26, + "learning_rate": 4.8717817164179105e-05, + "loss": 0.0089, + "step": 10996 + }, + { + "epoch": 10.26, + "learning_rate": 4.871735074626866e-05, + "loss": 0.0049, + "step": 11000 + }, + { + "epoch": 10.26, + "eval_exact_match": 0.7388781431334622, + "eval_exec": 0.7514506769825918, + "eval_loss": 0.25768712162971497, + "eval_runtime": 1364.5681, + "eval_samples_per_second": 0.758, + "step": 11000 + }, + { + "epoch": 10.26, + "learning_rate": 4.871688432835821e-05, + "loss": 0.0159, + "step": 11004 + }, + { + "epoch": 10.27, + "learning_rate": 4.871641791044776e-05, + "loss": 0.0124, + "step": 11008 + }, + { + "epoch": 10.27, + "learning_rate": 4.871595149253732e-05, + "loss": 0.0136, + "step": 11012 + }, + { + "epoch": 10.28, + "learning_rate": 4.8715485074626866e-05, + "loss": 0.0046, + "step": 11016 + }, + { + "epoch": 10.28, + "learning_rate": 4.871501865671642e-05, + "loss": 0.0018, + "step": 11020 + }, + { + "epoch": 10.28, + "learning_rate": 4.871455223880597e-05, + "loss": 0.0029, + "step": 11024 + }, + { + "epoch": 10.29, + "learning_rate": 4.8714085820895524e-05, + "loss": 0.002, + "step": 11028 + }, + { + "epoch": 10.29, + "learning_rate": 4.871361940298508e-05, + "loss": 0.0022, + "step": 11032 + }, + { + "epoch": 10.29, + "learning_rate": 4.871315298507463e-05, + "loss": 0.006, + "step": 11036 + }, + { + "epoch": 10.3, + "learning_rate": 4.871268656716418e-05, + "loss": 0.005, + "step": 11040 + }, + { + "epoch": 10.3, + "learning_rate": 4.8712220149253737e-05, + "loss": 0.0033, + "step": 11044 + }, + { + "epoch": 10.31, + "learning_rate": 4.8711753731343285e-05, + "loss": 0.0016, + "step": 11048 + }, + { + "epoch": 10.31, + "learning_rate": 4.871128731343284e-05, + "loss": 0.0011, + "step": 11052 + }, + { + "epoch": 10.31, + "learning_rate": 4.871082089552239e-05, + "loss": 0.0031, + "step": 11056 + }, + { + "epoch": 10.32, + "learning_rate": 4.871035447761194e-05, + "loss": 0.0041, + "step": 11060 + }, + { + "epoch": 10.32, + "learning_rate": 4.87098880597015e-05, + "loss": 0.0029, + "step": 11064 + }, + { + "epoch": 10.32, + "learning_rate": 4.8709421641791046e-05, + "loss": 0.0194, + "step": 11068 + }, + { + "epoch": 10.33, + "learning_rate": 4.87089552238806e-05, + "loss": 0.0036, + "step": 11072 + }, + { + "epoch": 10.33, + "learning_rate": 4.8708488805970156e-05, + "loss": 0.0092, + "step": 11076 + }, + { + "epoch": 10.34, + "learning_rate": 4.8708022388059704e-05, + "loss": 0.0026, + "step": 11080 + }, + { + "epoch": 10.34, + "learning_rate": 4.870755597014925e-05, + "loss": 0.0023, + "step": 11084 + }, + { + "epoch": 10.34, + "learning_rate": 4.8707089552238813e-05, + "loss": 0.0055, + "step": 11088 + }, + { + "epoch": 10.35, + "learning_rate": 4.870662313432836e-05, + "loss": 0.0009, + "step": 11092 + }, + { + "epoch": 10.35, + "learning_rate": 4.870615671641791e-05, + "loss": 0.0041, + "step": 11096 + }, + { + "epoch": 10.35, + "learning_rate": 4.8705690298507465e-05, + "loss": 0.0043, + "step": 11100 + }, + { + "epoch": 10.36, + "learning_rate": 4.870522388059702e-05, + "loss": 0.0057, + "step": 11104 + }, + { + "epoch": 10.36, + "learning_rate": 4.870475746268657e-05, + "loss": 0.0052, + "step": 11108 + }, + { + "epoch": 10.37, + "learning_rate": 4.870429104477612e-05, + "loss": 0.003, + "step": 11112 + }, + { + "epoch": 10.37, + "learning_rate": 4.870382462686567e-05, + "loss": 0.0026, + "step": 11116 + }, + { + "epoch": 10.37, + "learning_rate": 4.8703358208955226e-05, + "loss": 0.0012, + "step": 11120 + }, + { + "epoch": 10.38, + "learning_rate": 4.870289179104478e-05, + "loss": 0.0201, + "step": 11124 + }, + { + "epoch": 10.38, + "learning_rate": 4.870242537313433e-05, + "loss": 0.0014, + "step": 11128 + }, + { + "epoch": 10.38, + "learning_rate": 4.8701958955223884e-05, + "loss": 0.0032, + "step": 11132 + }, + { + "epoch": 10.39, + "learning_rate": 4.870149253731344e-05, + "loss": 0.0047, + "step": 11136 + }, + { + "epoch": 10.39, + "learning_rate": 4.8701026119402987e-05, + "loss": 0.0053, + "step": 11140 + }, + { + "epoch": 10.4, + "learning_rate": 4.8700559701492535e-05, + "loss": 0.009, + "step": 11144 + }, + { + "epoch": 10.4, + "learning_rate": 4.8700093283582096e-05, + "loss": 0.0018, + "step": 11148 + }, + { + "epoch": 10.4, + "learning_rate": 4.8699626865671645e-05, + "loss": 0.0007, + "step": 11152 + }, + { + "epoch": 10.41, + "learning_rate": 4.869916044776119e-05, + "loss": 0.005, + "step": 11156 + }, + { + "epoch": 10.41, + "learning_rate": 4.869869402985075e-05, + "loss": 0.0029, + "step": 11160 + }, + { + "epoch": 10.41, + "learning_rate": 4.86982276119403e-05, + "loss": 0.002, + "step": 11164 + }, + { + "epoch": 10.42, + "learning_rate": 4.869776119402985e-05, + "loss": 0.0049, + "step": 11168 + }, + { + "epoch": 10.42, + "learning_rate": 4.8697294776119406e-05, + "loss": 0.0034, + "step": 11172 + }, + { + "epoch": 10.43, + "learning_rate": 4.8696828358208954e-05, + "loss": 0.0078, + "step": 11176 + }, + { + "epoch": 10.43, + "learning_rate": 4.869636194029851e-05, + "loss": 0.0016, + "step": 11180 + }, + { + "epoch": 10.43, + "learning_rate": 4.8695895522388063e-05, + "loss": 0.0114, + "step": 11184 + }, + { + "epoch": 10.44, + "learning_rate": 4.869542910447761e-05, + "loss": 0.0186, + "step": 11188 + }, + { + "epoch": 10.44, + "learning_rate": 4.8694962686567167e-05, + "loss": 0.0076, + "step": 11192 + }, + { + "epoch": 10.44, + "learning_rate": 4.869449626865672e-05, + "loss": 0.0042, + "step": 11196 + }, + { + "epoch": 10.45, + "learning_rate": 4.869402985074627e-05, + "loss": 0.019, + "step": 11200 + }, + { + "epoch": 10.45, + "learning_rate": 4.8693563432835824e-05, + "loss": 0.0029, + "step": 11204 + }, + { + "epoch": 10.46, + "learning_rate": 4.869309701492538e-05, + "loss": 0.0135, + "step": 11208 + }, + { + "epoch": 10.46, + "learning_rate": 4.869263059701493e-05, + "loss": 0.002, + "step": 11212 + }, + { + "epoch": 10.46, + "learning_rate": 4.869216417910448e-05, + "loss": 0.0021, + "step": 11216 + }, + { + "epoch": 10.47, + "learning_rate": 4.869169776119403e-05, + "loss": 0.003, + "step": 11220 + }, + { + "epoch": 10.47, + "learning_rate": 4.8691231343283585e-05, + "loss": 0.0094, + "step": 11224 + }, + { + "epoch": 10.47, + "learning_rate": 4.869076492537314e-05, + "loss": 0.0035, + "step": 11228 + }, + { + "epoch": 10.48, + "learning_rate": 4.869029850746269e-05, + "loss": 0.0061, + "step": 11232 + }, + { + "epoch": 10.48, + "learning_rate": 4.868983208955224e-05, + "loss": 0.0027, + "step": 11236 + }, + { + "epoch": 10.49, + "learning_rate": 4.86893656716418e-05, + "loss": 0.003, + "step": 11240 + }, + { + "epoch": 10.49, + "learning_rate": 4.8688899253731346e-05, + "loss": 0.0033, + "step": 11244 + }, + { + "epoch": 10.49, + "learning_rate": 4.8688432835820895e-05, + "loss": 0.004, + "step": 11248 + }, + { + "epoch": 10.5, + "learning_rate": 4.868796641791045e-05, + "loss": 0.0024, + "step": 11252 + }, + { + "epoch": 10.5, + "learning_rate": 4.8687500000000004e-05, + "loss": 0.0114, + "step": 11256 + }, + { + "epoch": 10.5, + "learning_rate": 4.868703358208955e-05, + "loss": 0.0017, + "step": 11260 + }, + { + "epoch": 10.51, + "learning_rate": 4.868656716417911e-05, + "loss": 0.0173, + "step": 11264 + }, + { + "epoch": 10.51, + "learning_rate": 4.868610074626866e-05, + "loss": 0.0101, + "step": 11268 + }, + { + "epoch": 10.51, + "learning_rate": 4.868563432835821e-05, + "loss": 0.0054, + "step": 11272 + }, + { + "epoch": 10.52, + "learning_rate": 4.8685167910447765e-05, + "loss": 0.0031, + "step": 11276 + }, + { + "epoch": 10.52, + "learning_rate": 4.8684701492537313e-05, + "loss": 0.0042, + "step": 11280 + }, + { + "epoch": 10.53, + "learning_rate": 4.868423507462687e-05, + "loss": 0.01, + "step": 11284 + }, + { + "epoch": 10.53, + "learning_rate": 4.868376865671642e-05, + "loss": 0.0073, + "step": 11288 + }, + { + "epoch": 10.53, + "learning_rate": 4.868330223880597e-05, + "loss": 0.002, + "step": 11292 + }, + { + "epoch": 10.54, + "learning_rate": 4.868283582089552e-05, + "loss": 0.0045, + "step": 11296 + }, + { + "epoch": 10.54, + "learning_rate": 4.868236940298508e-05, + "loss": 0.0062, + "step": 11300 + }, + { + "epoch": 10.54, + "learning_rate": 4.868190298507463e-05, + "loss": 0.0057, + "step": 11304 + }, + { + "epoch": 10.55, + "learning_rate": 4.868143656716418e-05, + "loss": 0.0037, + "step": 11308 + }, + { + "epoch": 10.55, + "learning_rate": 4.868097014925373e-05, + "loss": 0.0071, + "step": 11312 + }, + { + "epoch": 10.56, + "learning_rate": 4.868050373134329e-05, + "loss": 0.0073, + "step": 11316 + }, + { + "epoch": 10.56, + "learning_rate": 4.8680037313432835e-05, + "loss": 0.0121, + "step": 11320 + }, + { + "epoch": 10.56, + "learning_rate": 4.867957089552239e-05, + "loss": 0.0091, + "step": 11324 + }, + { + "epoch": 10.57, + "learning_rate": 4.8679104477611945e-05, + "loss": 0.0058, + "step": 11328 + }, + { + "epoch": 10.57, + "learning_rate": 4.8678638059701493e-05, + "loss": 0.0047, + "step": 11332 + }, + { + "epoch": 10.57, + "learning_rate": 4.867817164179105e-05, + "loss": 0.002, + "step": 11336 + }, + { + "epoch": 10.58, + "learning_rate": 4.8677705223880596e-05, + "loss": 0.0051, + "step": 11340 + }, + { + "epoch": 10.58, + "learning_rate": 4.867723880597015e-05, + "loss": 0.0096, + "step": 11344 + }, + { + "epoch": 10.59, + "learning_rate": 4.8676772388059706e-05, + "loss": 0.0019, + "step": 11348 + }, + { + "epoch": 10.59, + "learning_rate": 4.8676305970149254e-05, + "loss": 0.006, + "step": 11352 + }, + { + "epoch": 10.59, + "learning_rate": 4.86758395522388e-05, + "loss": 0.0018, + "step": 11356 + }, + { + "epoch": 10.6, + "learning_rate": 4.8675373134328364e-05, + "loss": 0.0004, + "step": 11360 + }, + { + "epoch": 10.6, + "learning_rate": 4.867490671641791e-05, + "loss": 0.0032, + "step": 11364 + }, + { + "epoch": 10.6, + "learning_rate": 4.867444029850747e-05, + "loss": 0.0023, + "step": 11368 + }, + { + "epoch": 10.61, + "learning_rate": 4.8673973880597015e-05, + "loss": 0.0312, + "step": 11372 + }, + { + "epoch": 10.61, + "learning_rate": 4.867350746268657e-05, + "loss": 0.0034, + "step": 11376 + }, + { + "epoch": 10.62, + "learning_rate": 4.8673041044776125e-05, + "loss": 0.0044, + "step": 11380 + }, + { + "epoch": 10.62, + "learning_rate": 4.867257462686567e-05, + "loss": 0.0065, + "step": 11384 + }, + { + "epoch": 10.62, + "learning_rate": 4.867210820895523e-05, + "loss": 0.0032, + "step": 11388 + }, + { + "epoch": 10.63, + "learning_rate": 4.867164179104478e-05, + "loss": 0.0054, + "step": 11392 + }, + { + "epoch": 10.63, + "learning_rate": 4.867117537313433e-05, + "loss": 0.0187, + "step": 11396 + }, + { + "epoch": 10.63, + "learning_rate": 4.867070895522388e-05, + "loss": 0.0059, + "step": 11400 + }, + { + "epoch": 10.64, + "learning_rate": 4.8670242537313434e-05, + "loss": 0.0104, + "step": 11404 + }, + { + "epoch": 10.64, + "learning_rate": 4.866977611940299e-05, + "loss": 0.0055, + "step": 11408 + }, + { + "epoch": 10.65, + "learning_rate": 4.866930970149254e-05, + "loss": 0.0007, + "step": 11412 + }, + { + "epoch": 10.65, + "learning_rate": 4.866884328358209e-05, + "loss": 0.0154, + "step": 11416 + }, + { + "epoch": 10.65, + "learning_rate": 4.866837686567165e-05, + "loss": 0.0075, + "step": 11420 + }, + { + "epoch": 10.66, + "learning_rate": 4.8667910447761195e-05, + "loss": 0.0022, + "step": 11424 + }, + { + "epoch": 10.66, + "learning_rate": 4.866744402985075e-05, + "loss": 0.0005, + "step": 11428 + }, + { + "epoch": 10.66, + "learning_rate": 4.86669776119403e-05, + "loss": 0.0053, + "step": 11432 + }, + { + "epoch": 10.67, + "learning_rate": 4.866651119402985e-05, + "loss": 0.0014, + "step": 11436 + }, + { + "epoch": 10.67, + "learning_rate": 4.866604477611941e-05, + "loss": 0.0055, + "step": 11440 + }, + { + "epoch": 10.68, + "learning_rate": 4.8665578358208956e-05, + "loss": 0.0081, + "step": 11444 + }, + { + "epoch": 10.68, + "learning_rate": 4.866511194029851e-05, + "loss": 0.0083, + "step": 11448 + }, + { + "epoch": 10.68, + "learning_rate": 4.8664645522388066e-05, + "loss": 0.0017, + "step": 11452 + }, + { + "epoch": 10.69, + "learning_rate": 4.8664179104477614e-05, + "loss": 0.0027, + "step": 11456 + }, + { + "epoch": 10.69, + "learning_rate": 4.866371268656716e-05, + "loss": 0.0168, + "step": 11460 + }, + { + "epoch": 10.69, + "learning_rate": 4.866324626865672e-05, + "loss": 0.0013, + "step": 11464 + }, + { + "epoch": 10.7, + "learning_rate": 4.866277985074627e-05, + "loss": 0.0057, + "step": 11468 + }, + { + "epoch": 10.7, + "learning_rate": 4.866231343283582e-05, + "loss": 0.0095, + "step": 11472 + }, + { + "epoch": 10.71, + "learning_rate": 4.8661847014925375e-05, + "loss": 0.0029, + "step": 11476 + }, + { + "epoch": 10.71, + "learning_rate": 4.866138059701493e-05, + "loss": 0.0151, + "step": 11480 + }, + { + "epoch": 10.71, + "learning_rate": 4.866091417910448e-05, + "loss": 0.0043, + "step": 11484 + }, + { + "epoch": 10.72, + "learning_rate": 4.866044776119403e-05, + "loss": 0.001, + "step": 11488 + }, + { + "epoch": 10.72, + "learning_rate": 4.865998134328358e-05, + "loss": 0.0074, + "step": 11492 + }, + { + "epoch": 10.72, + "learning_rate": 4.8659514925373136e-05, + "loss": 0.0067, + "step": 11496 + }, + { + "epoch": 10.73, + "learning_rate": 4.865904850746269e-05, + "loss": 0.0019, + "step": 11500 + }, + { + "epoch": 10.73, + "eval_exact_match": 0.7321083172147002, + "eval_exec": 0.7620889748549323, + "eval_loss": 0.27294236421585083, + "eval_runtime": 4988.8833, + "eval_samples_per_second": 0.207, + "step": 11500 + }, + { + "epoch": 10.73, + "learning_rate": 4.865858208955224e-05, + "loss": 0.0054, + "step": 11504 + }, + { + "epoch": 10.73, + "learning_rate": 4.865811567164179e-05, + "loss": 0.0097, + "step": 11508 + }, + { + "epoch": 10.74, + "learning_rate": 4.865764925373135e-05, + "loss": 0.0018, + "step": 11512 + }, + { + "epoch": 10.74, + "learning_rate": 4.86571828358209e-05, + "loss": 0.0024, + "step": 11516 + }, + { + "epoch": 10.75, + "learning_rate": 4.8656716417910445e-05, + "loss": 0.0051, + "step": 11520 + }, + { + "epoch": 10.75, + "learning_rate": 4.865625e-05, + "loss": 0.0032, + "step": 11524 + }, + { + "epoch": 10.75, + "learning_rate": 4.8655783582089555e-05, + "loss": 0.0028, + "step": 11528 + }, + { + "epoch": 10.76, + "learning_rate": 4.865531716417911e-05, + "loss": 0.0004, + "step": 11532 + }, + { + "epoch": 10.76, + "learning_rate": 4.865485074626866e-05, + "loss": 0.0465, + "step": 11536 + }, + { + "epoch": 10.76, + "learning_rate": 4.865438432835821e-05, + "loss": 0.0039, + "step": 11540 + }, + { + "epoch": 10.77, + "learning_rate": 4.865391791044777e-05, + "loss": 0.0051, + "step": 11544 + }, + { + "epoch": 10.77, + "learning_rate": 4.8653451492537316e-05, + "loss": 0.0056, + "step": 11548 + }, + { + "epoch": 10.78, + "learning_rate": 4.8652985074626864e-05, + "loss": 0.0043, + "step": 11552 + }, + { + "epoch": 10.78, + "learning_rate": 4.8652518656716426e-05, + "loss": 0.0012, + "step": 11556 + }, + { + "epoch": 10.78, + "learning_rate": 4.8652052238805974e-05, + "loss": 0.0004, + "step": 11560 + }, + { + "epoch": 10.79, + "learning_rate": 4.865158582089552e-05, + "loss": 0.0015, + "step": 11564 + }, + { + "epoch": 10.79, + "learning_rate": 4.865111940298508e-05, + "loss": 0.0031, + "step": 11568 + }, + { + "epoch": 10.79, + "learning_rate": 4.865065298507463e-05, + "loss": 0.0018, + "step": 11572 + }, + { + "epoch": 10.8, + "learning_rate": 4.865018656716418e-05, + "loss": 0.0042, + "step": 11576 + }, + { + "epoch": 10.8, + "learning_rate": 4.8649720149253735e-05, + "loss": 0.0076, + "step": 11580 + }, + { + "epoch": 10.81, + "learning_rate": 4.864925373134328e-05, + "loss": 0.0032, + "step": 11584 + }, + { + "epoch": 10.81, + "learning_rate": 4.864878731343284e-05, + "loss": 0.0011, + "step": 11588 + }, + { + "epoch": 10.81, + "learning_rate": 4.864832089552239e-05, + "loss": 0.0061, + "step": 11592 + }, + { + "epoch": 10.82, + "learning_rate": 4.864785447761194e-05, + "loss": 0.0019, + "step": 11596 + }, + { + "epoch": 10.82, + "learning_rate": 4.8647388059701496e-05, + "loss": 0.0035, + "step": 11600 + }, + { + "epoch": 10.82, + "learning_rate": 4.864692164179105e-05, + "loss": 0.0057, + "step": 11604 + }, + { + "epoch": 10.83, + "learning_rate": 4.86464552238806e-05, + "loss": 0.0037, + "step": 11608 + }, + { + "epoch": 10.83, + "learning_rate": 4.864598880597015e-05, + "loss": 0.0068, + "step": 11612 + }, + { + "epoch": 10.84, + "learning_rate": 4.864552238805971e-05, + "loss": 0.0087, + "step": 11616 + }, + { + "epoch": 10.84, + "learning_rate": 4.864505597014926e-05, + "loss": 0.0074, + "step": 11620 + }, + { + "epoch": 10.84, + "learning_rate": 4.8644589552238805e-05, + "loss": 0.0016, + "step": 11624 + }, + { + "epoch": 10.85, + "learning_rate": 4.864412313432836e-05, + "loss": 0.0041, + "step": 11628 + }, + { + "epoch": 10.85, + "learning_rate": 4.8643656716417915e-05, + "loss": 0.0054, + "step": 11632 + }, + { + "epoch": 10.85, + "learning_rate": 4.864319029850746e-05, + "loss": 0.0204, + "step": 11636 + }, + { + "epoch": 10.86, + "learning_rate": 4.864272388059702e-05, + "loss": 0.003, + "step": 11640 + }, + { + "epoch": 10.86, + "learning_rate": 4.8642257462686566e-05, + "loss": 0.0014, + "step": 11644 + }, + { + "epoch": 10.87, + "learning_rate": 4.864179104477612e-05, + "loss": 0.0107, + "step": 11648 + }, + { + "epoch": 10.87, + "learning_rate": 4.8641324626865676e-05, + "loss": 0.0018, + "step": 11652 + }, + { + "epoch": 10.87, + "learning_rate": 4.8640858208955224e-05, + "loss": 0.0025, + "step": 11656 + }, + { + "epoch": 10.88, + "learning_rate": 4.864039179104478e-05, + "loss": 0.0021, + "step": 11660 + }, + { + "epoch": 10.88, + "learning_rate": 4.8639925373134334e-05, + "loss": 0.0039, + "step": 11664 + }, + { + "epoch": 10.88, + "learning_rate": 4.863945895522388e-05, + "loss": 0.0034, + "step": 11668 + }, + { + "epoch": 10.89, + "learning_rate": 4.863899253731343e-05, + "loss": 0.0057, + "step": 11672 + }, + { + "epoch": 10.89, + "learning_rate": 4.8638526119402985e-05, + "loss": 0.0024, + "step": 11676 + }, + { + "epoch": 10.9, + "learning_rate": 4.863805970149254e-05, + "loss": 0.0056, + "step": 11680 + }, + { + "epoch": 10.9, + "learning_rate": 4.863759328358209e-05, + "loss": 0.0083, + "step": 11684 + }, + { + "epoch": 10.9, + "learning_rate": 4.863712686567164e-05, + "loss": 0.0061, + "step": 11688 + }, + { + "epoch": 10.91, + "learning_rate": 4.86366604477612e-05, + "loss": 0.0049, + "step": 11692 + }, + { + "epoch": 10.91, + "learning_rate": 4.863619402985075e-05, + "loss": 0.0035, + "step": 11696 + }, + { + "epoch": 10.91, + "learning_rate": 4.86357276119403e-05, + "loss": 0.0211, + "step": 11700 + }, + { + "epoch": 10.92, + "learning_rate": 4.863526119402985e-05, + "loss": 0.012, + "step": 11704 + }, + { + "epoch": 10.92, + "learning_rate": 4.863479477611941e-05, + "loss": 0.0067, + "step": 11708 + }, + { + "epoch": 10.93, + "learning_rate": 4.863432835820896e-05, + "loss": 0.0035, + "step": 11712 + }, + { + "epoch": 10.93, + "learning_rate": 4.863386194029851e-05, + "loss": 0.0078, + "step": 11716 + }, + { + "epoch": 10.93, + "learning_rate": 4.863339552238806e-05, + "loss": 0.0042, + "step": 11720 + }, + { + "epoch": 10.94, + "learning_rate": 4.863292910447762e-05, + "loss": 0.0036, + "step": 11724 + }, + { + "epoch": 10.94, + "learning_rate": 4.8632462686567165e-05, + "loss": 0.0036, + "step": 11728 + }, + { + "epoch": 10.94, + "learning_rate": 4.863199626865672e-05, + "loss": 0.0044, + "step": 11732 + }, + { + "epoch": 10.95, + "learning_rate": 4.863152985074627e-05, + "loss": 0.0137, + "step": 11736 + }, + { + "epoch": 10.95, + "learning_rate": 4.863106343283582e-05, + "loss": 0.0015, + "step": 11740 + }, + { + "epoch": 10.96, + "learning_rate": 4.863059701492538e-05, + "loss": 0.0024, + "step": 11744 + }, + { + "epoch": 10.96, + "learning_rate": 4.8630130597014926e-05, + "loss": 0.0024, + "step": 11748 + }, + { + "epoch": 10.96, + "learning_rate": 4.862966417910448e-05, + "loss": 0.0022, + "step": 11752 + }, + { + "epoch": 10.97, + "learning_rate": 4.8629197761194036e-05, + "loss": 0.0009, + "step": 11756 + }, + { + "epoch": 10.97, + "learning_rate": 4.8628731343283584e-05, + "loss": 0.0066, + "step": 11760 + }, + { + "epoch": 10.97, + "learning_rate": 4.862826492537313e-05, + "loss": 0.0165, + "step": 11764 + }, + { + "epoch": 10.98, + "learning_rate": 4.8627798507462694e-05, + "loss": 0.0085, + "step": 11768 + }, + { + "epoch": 10.98, + "learning_rate": 4.862733208955224e-05, + "loss": 0.0094, + "step": 11772 + }, + { + "epoch": 10.98, + "learning_rate": 4.862686567164179e-05, + "loss": 0.0009, + "step": 11776 + }, + { + "epoch": 10.99, + "learning_rate": 4.8626399253731345e-05, + "loss": 0.0039, + "step": 11780 + }, + { + "epoch": 10.99, + "learning_rate": 4.86259328358209e-05, + "loss": 0.0113, + "step": 11784 + }, + { + "epoch": 11.0, + "learning_rate": 4.862546641791045e-05, + "loss": 0.0044, + "step": 11788 + }, + { + "epoch": 11.0, + "learning_rate": 4.8625e-05, + "loss": 0.0027, + "step": 11792 + }, + { + "epoch": 11.0, + "learning_rate": 4.862453358208955e-05, + "loss": 0.0014, + "step": 11796 + }, + { + "epoch": 11.01, + "learning_rate": 4.8624067164179106e-05, + "loss": 0.0013, + "step": 11800 + }, + { + "epoch": 11.01, + "learning_rate": 4.862360074626866e-05, + "loss": 0.0007, + "step": 11804 + }, + { + "epoch": 11.01, + "learning_rate": 4.862313432835821e-05, + "loss": 0.004, + "step": 11808 + }, + { + "epoch": 11.02, + "learning_rate": 4.8622667910447764e-05, + "loss": 0.0021, + "step": 11812 + }, + { + "epoch": 11.02, + "learning_rate": 4.862220149253732e-05, + "loss": 0.0034, + "step": 11816 + }, + { + "epoch": 11.03, + "learning_rate": 4.862173507462687e-05, + "loss": 0.0059, + "step": 11820 + }, + { + "epoch": 11.03, + "learning_rate": 4.8621268656716415e-05, + "loss": 0.0025, + "step": 11824 + }, + { + "epoch": 11.03, + "learning_rate": 4.862080223880598e-05, + "loss": 0.0009, + "step": 11828 + }, + { + "epoch": 11.04, + "learning_rate": 4.8620335820895525e-05, + "loss": 0.0025, + "step": 11832 + }, + { + "epoch": 11.04, + "learning_rate": 4.861986940298507e-05, + "loss": 0.0071, + "step": 11836 + }, + { + "epoch": 11.04, + "learning_rate": 4.861940298507463e-05, + "loss": 0.0012, + "step": 11840 + }, + { + "epoch": 11.05, + "learning_rate": 4.861893656716418e-05, + "loss": 0.001, + "step": 11844 + }, + { + "epoch": 11.05, + "learning_rate": 4.861847014925374e-05, + "loss": 0.0013, + "step": 11848 + }, + { + "epoch": 11.06, + "learning_rate": 4.8618003731343286e-05, + "loss": 0.0019, + "step": 11852 + }, + { + "epoch": 11.06, + "learning_rate": 4.8617537313432834e-05, + "loss": 0.0056, + "step": 11856 + }, + { + "epoch": 11.06, + "learning_rate": 4.8617070895522396e-05, + "loss": 0.0038, + "step": 11860 + }, + { + "epoch": 11.07, + "learning_rate": 4.8616604477611944e-05, + "loss": 0.0036, + "step": 11864 + }, + { + "epoch": 11.07, + "learning_rate": 4.861613805970149e-05, + "loss": 0.0014, + "step": 11868 + }, + { + "epoch": 11.07, + "learning_rate": 4.861567164179105e-05, + "loss": 0.0019, + "step": 11872 + }, + { + "epoch": 11.08, + "learning_rate": 4.86152052238806e-05, + "loss": 0.0009, + "step": 11876 + }, + { + "epoch": 11.08, + "learning_rate": 4.861473880597015e-05, + "loss": 0.0039, + "step": 11880 + }, + { + "epoch": 11.09, + "learning_rate": 4.8614272388059705e-05, + "loss": 0.0012, + "step": 11884 + }, + { + "epoch": 11.09, + "learning_rate": 4.861380597014926e-05, + "loss": 0.0022, + "step": 11888 + }, + { + "epoch": 11.09, + "learning_rate": 4.861333955223881e-05, + "loss": 0.0032, + "step": 11892 + }, + { + "epoch": 11.1, + "learning_rate": 4.861287313432836e-05, + "loss": 0.002, + "step": 11896 + }, + { + "epoch": 11.1, + "learning_rate": 4.861240671641791e-05, + "loss": 0.0079, + "step": 11900 + }, + { + "epoch": 11.1, + "learning_rate": 4.8611940298507466e-05, + "loss": 0.0097, + "step": 11904 + }, + { + "epoch": 11.11, + "learning_rate": 4.861147388059702e-05, + "loss": 0.0023, + "step": 11908 + }, + { + "epoch": 11.11, + "learning_rate": 4.861100746268657e-05, + "loss": 0.0062, + "step": 11912 + }, + { + "epoch": 11.12, + "learning_rate": 4.861054104477612e-05, + "loss": 0.0034, + "step": 11916 + }, + { + "epoch": 11.12, + "learning_rate": 4.861007462686568e-05, + "loss": 0.001, + "step": 11920 + }, + { + "epoch": 11.12, + "learning_rate": 4.860960820895523e-05, + "loss": 0.0027, + "step": 11924 + }, + { + "epoch": 11.13, + "learning_rate": 4.8609141791044775e-05, + "loss": 0.0022, + "step": 11928 + }, + { + "epoch": 11.13, + "learning_rate": 4.860867537313433e-05, + "loss": 0.0078, + "step": 11932 + }, + { + "epoch": 11.13, + "learning_rate": 4.8608208955223885e-05, + "loss": 0.0028, + "step": 11936 + }, + { + "epoch": 11.14, + "learning_rate": 4.860774253731343e-05, + "loss": 0.0079, + "step": 11940 + }, + { + "epoch": 11.14, + "learning_rate": 4.860727611940299e-05, + "loss": 0.0014, + "step": 11944 + }, + { + "epoch": 11.15, + "learning_rate": 4.860680970149254e-05, + "loss": 0.0053, + "step": 11948 + }, + { + "epoch": 11.15, + "learning_rate": 4.860634328358209e-05, + "loss": 0.0048, + "step": 11952 + }, + { + "epoch": 11.15, + "learning_rate": 4.8605876865671646e-05, + "loss": 0.0078, + "step": 11956 + }, + { + "epoch": 11.16, + "learning_rate": 4.8605410447761194e-05, + "loss": 0.0035, + "step": 11960 + }, + { + "epoch": 11.16, + "learning_rate": 4.860494402985075e-05, + "loss": 0.0044, + "step": 11964 + }, + { + "epoch": 11.16, + "learning_rate": 4.8604477611940304e-05, + "loss": 0.0121, + "step": 11968 + }, + { + "epoch": 11.17, + "learning_rate": 4.860401119402985e-05, + "loss": 0.0056, + "step": 11972 + }, + { + "epoch": 11.17, + "learning_rate": 4.86035447761194e-05, + "loss": 0.0039, + "step": 11976 + }, + { + "epoch": 11.18, + "learning_rate": 4.860307835820896e-05, + "loss": 0.0053, + "step": 11980 + }, + { + "epoch": 11.18, + "learning_rate": 4.860261194029851e-05, + "loss": 0.0029, + "step": 11984 + }, + { + "epoch": 11.18, + "learning_rate": 4.860214552238806e-05, + "loss": 0.0017, + "step": 11988 + }, + { + "epoch": 11.19, + "learning_rate": 4.860167910447761e-05, + "loss": 0.0027, + "step": 11992 + }, + { + "epoch": 11.19, + "learning_rate": 4.860121268656717e-05, + "loss": 0.0033, + "step": 11996 + }, + { + "epoch": 11.19, + "learning_rate": 4.8600746268656716e-05, + "loss": 0.0026, + "step": 12000 + }, + { + "epoch": 11.19, + "eval_exact_match": 0.7292069632495164, + "eval_exec": 0.7553191489361702, + "eval_loss": 0.28799450397491455, + "eval_runtime": 1479.0438, + "eval_samples_per_second": 0.699, + "step": 12000 + }, + { + "epoch": 11.2, + "learning_rate": 4.860027985074627e-05, + "loss": 0.0073, + "step": 12004 + }, + { + "epoch": 11.2, + "learning_rate": 4.8599813432835826e-05, + "loss": 0.0067, + "step": 12008 + }, + { + "epoch": 11.21, + "learning_rate": 4.859934701492538e-05, + "loss": 0.0064, + "step": 12012 + }, + { + "epoch": 11.21, + "learning_rate": 4.859888059701493e-05, + "loss": 0.041, + "step": 12016 + }, + { + "epoch": 11.21, + "learning_rate": 4.859841417910448e-05, + "loss": 0.0005, + "step": 12020 + }, + { + "epoch": 11.22, + "learning_rate": 4.859794776119403e-05, + "loss": 0.004, + "step": 12024 + }, + { + "epoch": 11.22, + "learning_rate": 4.8597481343283587e-05, + "loss": 0.0102, + "step": 12028 + }, + { + "epoch": 11.22, + "learning_rate": 4.8597014925373135e-05, + "loss": 0.0062, + "step": 12032 + }, + { + "epoch": 11.23, + "learning_rate": 4.859654850746269e-05, + "loss": 0.0041, + "step": 12036 + }, + { + "epoch": 11.23, + "learning_rate": 4.8596082089552244e-05, + "loss": 0.0034, + "step": 12040 + }, + { + "epoch": 11.24, + "learning_rate": 4.859561567164179e-05, + "loss": 0.0085, + "step": 12044 + }, + { + "epoch": 11.24, + "learning_rate": 4.859514925373135e-05, + "loss": 0.0106, + "step": 12048 + }, + { + "epoch": 11.24, + "learning_rate": 4.8594682835820896e-05, + "loss": 0.0103, + "step": 12052 + }, + { + "epoch": 11.25, + "learning_rate": 4.859421641791045e-05, + "loss": 0.006, + "step": 12056 + }, + { + "epoch": 11.25, + "learning_rate": 4.8593750000000005e-05, + "loss": 0.0047, + "step": 12060 + }, + { + "epoch": 11.25, + "learning_rate": 4.8593283582089554e-05, + "loss": 0.0104, + "step": 12064 + }, + { + "epoch": 11.26, + "learning_rate": 4.859281716417911e-05, + "loss": 0.0043, + "step": 12068 + }, + { + "epoch": 11.26, + "learning_rate": 4.8592350746268663e-05, + "loss": 0.0063, + "step": 12072 + }, + { + "epoch": 11.26, + "learning_rate": 4.859188432835821e-05, + "loss": 0.0032, + "step": 12076 + }, + { + "epoch": 11.27, + "learning_rate": 4.859141791044776e-05, + "loss": 0.0088, + "step": 12080 + }, + { + "epoch": 11.27, + "learning_rate": 4.8590951492537315e-05, + "loss": 0.0046, + "step": 12084 + }, + { + "epoch": 11.28, + "learning_rate": 4.859048507462687e-05, + "loss": 0.0052, + "step": 12088 + }, + { + "epoch": 11.28, + "learning_rate": 4.859001865671642e-05, + "loss": 0.0082, + "step": 12092 + }, + { + "epoch": 11.28, + "learning_rate": 4.858955223880597e-05, + "loss": 0.002, + "step": 12096 + }, + { + "epoch": 11.29, + "learning_rate": 4.858908582089553e-05, + "loss": 0.0014, + "step": 12100 + }, + { + "epoch": 11.29, + "learning_rate": 4.8588619402985076e-05, + "loss": 0.0029, + "step": 12104 + }, + { + "epoch": 11.29, + "learning_rate": 4.858815298507463e-05, + "loss": 0.0057, + "step": 12108 + }, + { + "epoch": 11.3, + "learning_rate": 4.858768656716418e-05, + "loss": 0.0019, + "step": 12112 + }, + { + "epoch": 11.3, + "learning_rate": 4.8587220149253734e-05, + "loss": 0.011, + "step": 12116 + }, + { + "epoch": 11.31, + "learning_rate": 4.858675373134329e-05, + "loss": 0.0029, + "step": 12120 + }, + { + "epoch": 11.31, + "learning_rate": 4.8586287313432837e-05, + "loss": 0.0103, + "step": 12124 + }, + { + "epoch": 11.31, + "learning_rate": 4.858582089552239e-05, + "loss": 0.0034, + "step": 12128 + }, + { + "epoch": 11.32, + "learning_rate": 4.8585354477611946e-05, + "loss": 0.0027, + "step": 12132 + }, + { + "epoch": 11.32, + "learning_rate": 4.8584888059701495e-05, + "loss": 0.0065, + "step": 12136 + }, + { + "epoch": 11.32, + "learning_rate": 4.858442164179104e-05, + "loss": 0.0038, + "step": 12140 + }, + { + "epoch": 11.33, + "learning_rate": 4.85839552238806e-05, + "loss": 0.0018, + "step": 12144 + }, + { + "epoch": 11.33, + "learning_rate": 4.858348880597015e-05, + "loss": 0.003, + "step": 12148 + }, + { + "epoch": 11.34, + "learning_rate": 4.85830223880597e-05, + "loss": 0.0033, + "step": 12152 + }, + { + "epoch": 11.34, + "learning_rate": 4.8582555970149256e-05, + "loss": 0.0043, + "step": 12156 + }, + { + "epoch": 11.34, + "learning_rate": 4.858208955223881e-05, + "loss": 0.0072, + "step": 12160 + }, + { + "epoch": 11.35, + "learning_rate": 4.858162313432836e-05, + "loss": 0.0037, + "step": 12164 + }, + { + "epoch": 11.35, + "learning_rate": 4.8581156716417913e-05, + "loss": 0.0056, + "step": 12168 + }, + { + "epoch": 11.35, + "learning_rate": 4.858069029850746e-05, + "loss": 0.0006, + "step": 12172 + }, + { + "epoch": 11.36, + "learning_rate": 4.858022388059702e-05, + "loss": 0.0046, + "step": 12176 + }, + { + "epoch": 11.36, + "learning_rate": 4.857975746268657e-05, + "loss": 0.003, + "step": 12180 + }, + { + "epoch": 11.37, + "learning_rate": 4.857929104477612e-05, + "loss": 0.0136, + "step": 12184 + }, + { + "epoch": 11.37, + "learning_rate": 4.8578824626865674e-05, + "loss": 0.0166, + "step": 12188 + }, + { + "epoch": 11.37, + "learning_rate": 4.857835820895523e-05, + "loss": 0.0101, + "step": 12192 + }, + { + "epoch": 11.38, + "learning_rate": 4.857789179104478e-05, + "loss": 0.0007, + "step": 12196 + }, + { + "epoch": 11.38, + "learning_rate": 4.857742537313433e-05, + "loss": 0.0022, + "step": 12200 + }, + { + "epoch": 11.38, + "learning_rate": 4.857695895522388e-05, + "loss": 0.0046, + "step": 12204 + }, + { + "epoch": 11.39, + "learning_rate": 4.8576492537313435e-05, + "loss": 0.0103, + "step": 12208 + }, + { + "epoch": 11.39, + "learning_rate": 4.857602611940299e-05, + "loss": 0.0057, + "step": 12212 + }, + { + "epoch": 11.4, + "learning_rate": 4.857555970149254e-05, + "loss": 0.001, + "step": 12216 + }, + { + "epoch": 11.4, + "learning_rate": 4.857509328358209e-05, + "loss": 0.0023, + "step": 12220 + }, + { + "epoch": 11.4, + "learning_rate": 4.857462686567165e-05, + "loss": 0.0158, + "step": 12224 + }, + { + "epoch": 11.41, + "learning_rate": 4.8574160447761196e-05, + "loss": 0.0056, + "step": 12228 + }, + { + "epoch": 11.41, + "learning_rate": 4.8573694029850745e-05, + "loss": 0.0043, + "step": 12232 + }, + { + "epoch": 11.41, + "learning_rate": 4.8573227611940306e-05, + "loss": 0.007, + "step": 12236 + }, + { + "epoch": 11.42, + "learning_rate": 4.8572761194029854e-05, + "loss": 0.01, + "step": 12240 + }, + { + "epoch": 11.42, + "learning_rate": 4.85722947761194e-05, + "loss": 0.0065, + "step": 12244 + }, + { + "epoch": 11.43, + "learning_rate": 4.857182835820896e-05, + "loss": 0.0031, + "step": 12248 + }, + { + "epoch": 11.43, + "learning_rate": 4.857136194029851e-05, + "loss": 0.0028, + "step": 12252 + }, + { + "epoch": 11.43, + "learning_rate": 4.857089552238806e-05, + "loss": 0.0023, + "step": 12256 + }, + { + "epoch": 11.44, + "learning_rate": 4.8570429104477615e-05, + "loss": 0.0022, + "step": 12260 + }, + { + "epoch": 11.44, + "learning_rate": 4.8569962686567163e-05, + "loss": 0.0092, + "step": 12264 + }, + { + "epoch": 11.44, + "learning_rate": 4.856949626865672e-05, + "loss": 0.0047, + "step": 12268 + }, + { + "epoch": 11.45, + "learning_rate": 4.856902985074627e-05, + "loss": 0.0081, + "step": 12272 + }, + { + "epoch": 11.45, + "learning_rate": 4.856856343283582e-05, + "loss": 0.002, + "step": 12276 + }, + { + "epoch": 11.46, + "learning_rate": 4.8568097014925376e-05, + "loss": 0.0027, + "step": 12280 + }, + { + "epoch": 11.46, + "learning_rate": 4.856763059701493e-05, + "loss": 0.0015, + "step": 12284 + }, + { + "epoch": 11.46, + "learning_rate": 4.856716417910448e-05, + "loss": 0.003, + "step": 12288 + }, + { + "epoch": 11.47, + "learning_rate": 4.856669776119403e-05, + "loss": 0.0054, + "step": 12292 + }, + { + "epoch": 11.47, + "learning_rate": 4.856623134328359e-05, + "loss": 0.0237, + "step": 12296 + }, + { + "epoch": 11.47, + "learning_rate": 4.856576492537314e-05, + "loss": 0.006, + "step": 12300 + }, + { + "epoch": 11.48, + "learning_rate": 4.8565298507462685e-05, + "loss": 0.0008, + "step": 12304 + }, + { + "epoch": 11.48, + "learning_rate": 4.856483208955224e-05, + "loss": 0.0012, + "step": 12308 + }, + { + "epoch": 11.49, + "learning_rate": 4.8564365671641795e-05, + "loss": 0.0025, + "step": 12312 + }, + { + "epoch": 11.49, + "learning_rate": 4.8563899253731343e-05, + "loss": 0.0073, + "step": 12316 + }, + { + "epoch": 11.49, + "learning_rate": 4.85634328358209e-05, + "loss": 0.0037, + "step": 12320 + }, + { + "epoch": 11.5, + "learning_rate": 4.8562966417910446e-05, + "loss": 0.0057, + "step": 12324 + }, + { + "epoch": 11.5, + "learning_rate": 4.85625e-05, + "loss": 0.006, + "step": 12328 + }, + { + "epoch": 11.5, + "learning_rate": 4.8562033582089556e-05, + "loss": 0.0032, + "step": 12332 + }, + { + "epoch": 11.51, + "learning_rate": 4.8561567164179104e-05, + "loss": 0.0137, + "step": 12336 + }, + { + "epoch": 11.51, + "learning_rate": 4.856110074626866e-05, + "loss": 0.0022, + "step": 12340 + }, + { + "epoch": 11.51, + "learning_rate": 4.8560634328358214e-05, + "loss": 0.0046, + "step": 12344 + }, + { + "epoch": 11.52, + "learning_rate": 4.856016791044776e-05, + "loss": 0.0016, + "step": 12348 + }, + { + "epoch": 11.52, + "learning_rate": 4.855970149253732e-05, + "loss": 0.0054, + "step": 12352 + }, + { + "epoch": 11.53, + "learning_rate": 4.8559235074626865e-05, + "loss": 0.015, + "step": 12356 + }, + { + "epoch": 11.53, + "learning_rate": 4.855876865671642e-05, + "loss": 0.0028, + "step": 12360 + }, + { + "epoch": 11.53, + "learning_rate": 4.8558302238805975e-05, + "loss": 0.004, + "step": 12364 + }, + { + "epoch": 11.54, + "learning_rate": 4.855783582089552e-05, + "loss": 0.0046, + "step": 12368 + }, + { + "epoch": 11.54, + "learning_rate": 4.855736940298508e-05, + "loss": 0.0031, + "step": 12372 + }, + { + "epoch": 11.54, + "learning_rate": 4.855690298507463e-05, + "loss": 0.0008, + "step": 12376 + }, + { + "epoch": 11.55, + "learning_rate": 4.855643656716418e-05, + "loss": 0.0011, + "step": 12380 + }, + { + "epoch": 11.55, + "learning_rate": 4.855597014925373e-05, + "loss": 0.0039, + "step": 12384 + }, + { + "epoch": 11.56, + "learning_rate": 4.855550373134329e-05, + "loss": 0.0028, + "step": 12388 + }, + { + "epoch": 11.56, + "learning_rate": 4.855503731343284e-05, + "loss": 0.0044, + "step": 12392 + }, + { + "epoch": 11.56, + "learning_rate": 4.855457089552239e-05, + "loss": 0.0022, + "step": 12396 + }, + { + "epoch": 11.57, + "learning_rate": 4.855410447761194e-05, + "loss": 0.013, + "step": 12400 + }, + { + "epoch": 11.57, + "learning_rate": 4.85536380597015e-05, + "loss": 0.0035, + "step": 12404 + }, + { + "epoch": 11.57, + "learning_rate": 4.8553171641791045e-05, + "loss": 0.0038, + "step": 12408 + }, + { + "epoch": 11.58, + "learning_rate": 4.85527052238806e-05, + "loss": 0.0004, + "step": 12412 + }, + { + "epoch": 11.58, + "learning_rate": 4.855223880597015e-05, + "loss": 0.0019, + "step": 12416 + }, + { + "epoch": 11.59, + "learning_rate": 4.85517723880597e-05, + "loss": 0.0123, + "step": 12420 + }, + { + "epoch": 11.59, + "learning_rate": 4.855130597014926e-05, + "loss": 0.0087, + "step": 12424 + }, + { + "epoch": 11.59, + "learning_rate": 4.8550839552238806e-05, + "loss": 0.0008, + "step": 12428 + }, + { + "epoch": 11.6, + "learning_rate": 4.855037313432836e-05, + "loss": 0.0046, + "step": 12432 + }, + { + "epoch": 11.6, + "learning_rate": 4.8549906716417916e-05, + "loss": 0.0057, + "step": 12436 + }, + { + "epoch": 11.6, + "learning_rate": 4.8549440298507464e-05, + "loss": 0.0011, + "step": 12440 + }, + { + "epoch": 11.61, + "learning_rate": 4.854897388059701e-05, + "loss": 0.0024, + "step": 12444 + }, + { + "epoch": 11.61, + "learning_rate": 4.8548507462686574e-05, + "loss": 0.0038, + "step": 12448 + }, + { + "epoch": 11.62, + "learning_rate": 4.854804104477612e-05, + "loss": 0.0114, + "step": 12452 + }, + { + "epoch": 11.62, + "learning_rate": 4.854757462686567e-05, + "loss": 0.007, + "step": 12456 + }, + { + "epoch": 11.62, + "learning_rate": 4.8547108208955225e-05, + "loss": 0.0028, + "step": 12460 + }, + { + "epoch": 11.63, + "learning_rate": 4.854664179104478e-05, + "loss": 0.0039, + "step": 12464 + }, + { + "epoch": 11.63, + "learning_rate": 4.854617537313433e-05, + "loss": 0.0057, + "step": 12468 + }, + { + "epoch": 11.63, + "learning_rate": 4.854570895522388e-05, + "loss": 0.0133, + "step": 12472 + }, + { + "epoch": 11.64, + "learning_rate": 4.854524253731343e-05, + "loss": 0.0005, + "step": 12476 + }, + { + "epoch": 11.64, + "learning_rate": 4.8544776119402986e-05, + "loss": 0.0009, + "step": 12480 + }, + { + "epoch": 11.65, + "learning_rate": 4.854430970149254e-05, + "loss": 0.0055, + "step": 12484 + }, + { + "epoch": 11.65, + "learning_rate": 4.854384328358209e-05, + "loss": 0.0012, + "step": 12488 + }, + { + "epoch": 11.65, + "learning_rate": 4.8543376865671644e-05, + "loss": 0.0037, + "step": 12492 + }, + { + "epoch": 11.66, + "learning_rate": 4.85429104477612e-05, + "loss": 0.0054, + "step": 12496 + }, + { + "epoch": 11.66, + "learning_rate": 4.854244402985075e-05, + "loss": 0.0035, + "step": 12500 + }, + { + "epoch": 11.66, + "eval_exact_match": 0.7340425531914894, + "eval_exec": 0.7611218568665378, + "eval_loss": 0.2715718448162079, + "eval_runtime": 1831.9341, + "eval_samples_per_second": 0.564, + "step": 12500 + }, + { + "epoch": 11.66, + "learning_rate": 4.85419776119403e-05, + "loss": 0.004, + "step": 12504 + }, + { + "epoch": 11.67, + "learning_rate": 4.854151119402986e-05, + "loss": 0.0061, + "step": 12508 + }, + { + "epoch": 11.67, + "learning_rate": 4.8541044776119405e-05, + "loss": 0.0124, + "step": 12512 + }, + { + "epoch": 11.68, + "learning_rate": 4.854057835820896e-05, + "loss": 0.004, + "step": 12516 + }, + { + "epoch": 11.68, + "learning_rate": 4.854011194029851e-05, + "loss": 0.0018, + "step": 12520 + }, + { + "epoch": 11.68, + "learning_rate": 4.853964552238806e-05, + "loss": 0.0019, + "step": 12524 + }, + { + "epoch": 11.69, + "learning_rate": 4.853917910447762e-05, + "loss": 0.0036, + "step": 12528 + }, + { + "epoch": 11.69, + "learning_rate": 4.8538712686567166e-05, + "loss": 0.0024, + "step": 12532 + }, + { + "epoch": 11.69, + "learning_rate": 4.8538246268656714e-05, + "loss": 0.0188, + "step": 12536 + }, + { + "epoch": 11.7, + "learning_rate": 4.8537779850746276e-05, + "loss": 0.0057, + "step": 12540 + }, + { + "epoch": 11.7, + "learning_rate": 4.8537313432835824e-05, + "loss": 0.0071, + "step": 12544 + }, + { + "epoch": 11.71, + "learning_rate": 4.853684701492537e-05, + "loss": 0.004, + "step": 12548 + }, + { + "epoch": 11.71, + "learning_rate": 4.853638059701493e-05, + "loss": 0.0049, + "step": 12552 + }, + { + "epoch": 11.71, + "learning_rate": 4.853591417910448e-05, + "loss": 0.0012, + "step": 12556 + }, + { + "epoch": 11.72, + "learning_rate": 4.853544776119403e-05, + "loss": 0.0031, + "step": 12560 + }, + { + "epoch": 11.72, + "learning_rate": 4.8534981343283585e-05, + "loss": 0.0016, + "step": 12564 + }, + { + "epoch": 11.72, + "learning_rate": 4.853451492537314e-05, + "loss": 0.0092, + "step": 12568 + }, + { + "epoch": 11.73, + "learning_rate": 4.853404850746269e-05, + "loss": 0.0102, + "step": 12572 + }, + { + "epoch": 11.73, + "learning_rate": 4.853358208955224e-05, + "loss": 0.0059, + "step": 12576 + }, + { + "epoch": 11.73, + "learning_rate": 4.853311567164179e-05, + "loss": 0.0085, + "step": 12580 + }, + { + "epoch": 11.74, + "learning_rate": 4.8532649253731346e-05, + "loss": 0.0012, + "step": 12584 + }, + { + "epoch": 11.74, + "learning_rate": 4.85321828358209e-05, + "loss": 0.0033, + "step": 12588 + }, + { + "epoch": 11.75, + "learning_rate": 4.853171641791045e-05, + "loss": 0.0124, + "step": 12592 + }, + { + "epoch": 11.75, + "learning_rate": 4.853125e-05, + "loss": 0.0054, + "step": 12596 + }, + { + "epoch": 11.75, + "learning_rate": 4.853078358208956e-05, + "loss": 0.0055, + "step": 12600 + }, + { + "epoch": 11.76, + "learning_rate": 4.853031716417911e-05, + "loss": 0.0073, + "step": 12604 + }, + { + "epoch": 11.76, + "learning_rate": 4.8529850746268655e-05, + "loss": 0.0036, + "step": 12608 + }, + { + "epoch": 11.76, + "learning_rate": 4.852938432835821e-05, + "loss": 0.01, + "step": 12612 + }, + { + "epoch": 11.77, + "learning_rate": 4.8528917910447765e-05, + "loss": 0.0048, + "step": 12616 + }, + { + "epoch": 11.77, + "learning_rate": 4.852845149253731e-05, + "loss": 0.0093, + "step": 12620 + }, + { + "epoch": 11.78, + "learning_rate": 4.852798507462687e-05, + "loss": 0.0096, + "step": 12624 + }, + { + "epoch": 11.78, + "learning_rate": 4.852751865671642e-05, + "loss": 0.0075, + "step": 12628 + }, + { + "epoch": 11.78, + "learning_rate": 4.852705223880597e-05, + "loss": 0.0062, + "step": 12632 + }, + { + "epoch": 11.79, + "learning_rate": 4.8526585820895526e-05, + "loss": 0.0034, + "step": 12636 + }, + { + "epoch": 11.79, + "learning_rate": 4.8526119402985074e-05, + "loss": 0.0076, + "step": 12640 + }, + { + "epoch": 11.79, + "learning_rate": 4.852565298507463e-05, + "loss": 0.0033, + "step": 12644 + }, + { + "epoch": 11.8, + "learning_rate": 4.8525186567164184e-05, + "loss": 0.0045, + "step": 12648 + }, + { + "epoch": 11.8, + "learning_rate": 4.852472014925373e-05, + "loss": 0.0009, + "step": 12652 + }, + { + "epoch": 11.81, + "learning_rate": 4.852425373134328e-05, + "loss": 0.0036, + "step": 12656 + }, + { + "epoch": 11.81, + "learning_rate": 4.852378731343284e-05, + "loss": 0.0031, + "step": 12660 + }, + { + "epoch": 11.81, + "learning_rate": 4.852332089552239e-05, + "loss": 0.0046, + "step": 12664 + }, + { + "epoch": 11.82, + "learning_rate": 4.8522854477611945e-05, + "loss": 0.0096, + "step": 12668 + }, + { + "epoch": 11.82, + "learning_rate": 4.852238805970149e-05, + "loss": 0.0056, + "step": 12672 + }, + { + "epoch": 11.82, + "learning_rate": 4.852192164179105e-05, + "loss": 0.001, + "step": 12676 + }, + { + "epoch": 11.83, + "learning_rate": 4.85214552238806e-05, + "loss": 0.0037, + "step": 12680 + }, + { + "epoch": 11.83, + "learning_rate": 4.852098880597015e-05, + "loss": 0.0024, + "step": 12684 + }, + { + "epoch": 11.84, + "learning_rate": 4.8520522388059706e-05, + "loss": 0.0048, + "step": 12688 + }, + { + "epoch": 11.84, + "learning_rate": 4.852005597014926e-05, + "loss": 0.0019, + "step": 12692 + }, + { + "epoch": 11.84, + "learning_rate": 4.851958955223881e-05, + "loss": 0.0137, + "step": 12696 + }, + { + "epoch": 11.85, + "learning_rate": 4.851912313432836e-05, + "loss": 0.0009, + "step": 12700 + }, + { + "epoch": 11.85, + "learning_rate": 4.851865671641791e-05, + "loss": 0.0021, + "step": 12704 + }, + { + "epoch": 11.85, + "learning_rate": 4.851819029850747e-05, + "loss": 0.0109, + "step": 12708 + }, + { + "epoch": 11.86, + "learning_rate": 4.8517723880597015e-05, + "loss": 0.0013, + "step": 12712 + }, + { + "epoch": 11.86, + "learning_rate": 4.851725746268657e-05, + "loss": 0.0026, + "step": 12716 + }, + { + "epoch": 11.87, + "learning_rate": 4.8516791044776125e-05, + "loss": 0.0037, + "step": 12720 + }, + { + "epoch": 11.87, + "learning_rate": 4.851632462686567e-05, + "loss": 0.007, + "step": 12724 + }, + { + "epoch": 11.87, + "learning_rate": 4.851585820895523e-05, + "loss": 0.0014, + "step": 12728 + }, + { + "epoch": 11.88, + "learning_rate": 4.8515391791044776e-05, + "loss": 0.0035, + "step": 12732 + }, + { + "epoch": 11.88, + "learning_rate": 4.851492537313433e-05, + "loss": 0.0008, + "step": 12736 + }, + { + "epoch": 11.88, + "learning_rate": 4.8514458955223886e-05, + "loss": 0.0022, + "step": 12740 + }, + { + "epoch": 11.89, + "learning_rate": 4.8513992537313434e-05, + "loss": 0.0071, + "step": 12744 + }, + { + "epoch": 11.89, + "learning_rate": 4.851352611940299e-05, + "loss": 0.0099, + "step": 12748 + }, + { + "epoch": 11.9, + "learning_rate": 4.8513059701492544e-05, + "loss": 0.0049, + "step": 12752 + }, + { + "epoch": 11.9, + "learning_rate": 4.851259328358209e-05, + "loss": 0.0013, + "step": 12756 + }, + { + "epoch": 11.9, + "learning_rate": 4.851212686567164e-05, + "loss": 0.0058, + "step": 12760 + }, + { + "epoch": 11.91, + "learning_rate": 4.8511660447761195e-05, + "loss": 0.0041, + "step": 12764 + }, + { + "epoch": 11.91, + "learning_rate": 4.851119402985075e-05, + "loss": 0.0051, + "step": 12768 + }, + { + "epoch": 11.91, + "learning_rate": 4.85107276119403e-05, + "loss": 0.0015, + "step": 12772 + }, + { + "epoch": 11.92, + "learning_rate": 4.851026119402985e-05, + "loss": 0.0047, + "step": 12776 + }, + { + "epoch": 11.92, + "learning_rate": 4.850979477611941e-05, + "loss": 0.0118, + "step": 12780 + }, + { + "epoch": 11.93, + "learning_rate": 4.8509328358208956e-05, + "loss": 0.0017, + "step": 12784 + }, + { + "epoch": 11.93, + "learning_rate": 4.850886194029851e-05, + "loss": 0.0031, + "step": 12788 + }, + { + "epoch": 11.93, + "learning_rate": 4.850839552238806e-05, + "loss": 0.001, + "step": 12792 + }, + { + "epoch": 11.94, + "learning_rate": 4.8507929104477614e-05, + "loss": 0.0053, + "step": 12796 + }, + { + "epoch": 11.94, + "learning_rate": 4.850746268656717e-05, + "loss": 0.0016, + "step": 12800 + }, + { + "epoch": 11.94, + "learning_rate": 4.850699626865672e-05, + "loss": 0.0067, + "step": 12804 + }, + { + "epoch": 11.95, + "learning_rate": 4.850652985074627e-05, + "loss": 0.0027, + "step": 12808 + }, + { + "epoch": 11.95, + "learning_rate": 4.850606343283583e-05, + "loss": 0.0021, + "step": 12812 + }, + { + "epoch": 11.96, + "learning_rate": 4.8505597014925375e-05, + "loss": 0.0151, + "step": 12816 + }, + { + "epoch": 11.96, + "learning_rate": 4.850513059701492e-05, + "loss": 0.0073, + "step": 12820 + }, + { + "epoch": 11.96, + "learning_rate": 4.850466417910448e-05, + "loss": 0.0051, + "step": 12824 + }, + { + "epoch": 11.97, + "learning_rate": 4.850419776119403e-05, + "loss": 0.0044, + "step": 12828 + }, + { + "epoch": 11.97, + "learning_rate": 4.850373134328359e-05, + "loss": 0.0032, + "step": 12832 + }, + { + "epoch": 11.97, + "learning_rate": 4.8503264925373136e-05, + "loss": 0.0065, + "step": 12836 + }, + { + "epoch": 11.98, + "learning_rate": 4.850279850746269e-05, + "loss": 0.0048, + "step": 12840 + }, + { + "epoch": 11.98, + "learning_rate": 4.8502332089552246e-05, + "loss": 0.0049, + "step": 12844 + }, + { + "epoch": 11.98, + "learning_rate": 4.8501865671641794e-05, + "loss": 0.0006, + "step": 12848 + }, + { + "epoch": 11.99, + "learning_rate": 4.850139925373134e-05, + "loss": 0.0011, + "step": 12852 + }, + { + "epoch": 11.99, + "learning_rate": 4.8500932835820904e-05, + "loss": 0.0028, + "step": 12856 + }, + { + "epoch": 12.0, + "learning_rate": 4.850046641791045e-05, + "loss": 0.0016, + "step": 12860 + }, + { + "epoch": 12.0, + "learning_rate": 4.85e-05, + "loss": 0.004, + "step": 12864 + }, + { + "epoch": 12.0, + "learning_rate": 4.8499533582089555e-05, + "loss": 0.0025, + "step": 12868 + }, + { + "epoch": 12.01, + "learning_rate": 4.849906716417911e-05, + "loss": 0.0103, + "step": 12872 + }, + { + "epoch": 12.01, + "learning_rate": 4.849860074626866e-05, + "loss": 0.0008, + "step": 12876 + }, + { + "epoch": 12.01, + "learning_rate": 4.849813432835821e-05, + "loss": 0.0046, + "step": 12880 + }, + { + "epoch": 12.02, + "learning_rate": 4.849766791044776e-05, + "loss": 0.0017, + "step": 12884 + }, + { + "epoch": 12.02, + "learning_rate": 4.8497201492537316e-05, + "loss": 0.002, + "step": 12888 + }, + { + "epoch": 12.03, + "learning_rate": 4.849673507462687e-05, + "loss": 0.0037, + "step": 12892 + }, + { + "epoch": 12.03, + "learning_rate": 4.849626865671642e-05, + "loss": 0.0072, + "step": 12896 + }, + { + "epoch": 12.03, + "learning_rate": 4.8495802238805974e-05, + "loss": 0.0018, + "step": 12900 + }, + { + "epoch": 12.04, + "learning_rate": 4.849533582089553e-05, + "loss": 0.0022, + "step": 12904 + }, + { + "epoch": 12.04, + "learning_rate": 4.849486940298508e-05, + "loss": 0.0014, + "step": 12908 + }, + { + "epoch": 12.04, + "learning_rate": 4.8494402985074625e-05, + "loss": 0.0023, + "step": 12912 + }, + { + "epoch": 12.05, + "learning_rate": 4.8493936567164187e-05, + "loss": 0.0034, + "step": 12916 + }, + { + "epoch": 12.05, + "learning_rate": 4.8493470149253735e-05, + "loss": 0.0044, + "step": 12920 + }, + { + "epoch": 12.06, + "learning_rate": 4.849300373134328e-05, + "loss": 0.0052, + "step": 12924 + }, + { + "epoch": 12.06, + "learning_rate": 4.849253731343284e-05, + "loss": 0.0061, + "step": 12928 + }, + { + "epoch": 12.06, + "learning_rate": 4.849207089552239e-05, + "loss": 0.0022, + "step": 12932 + }, + { + "epoch": 12.07, + "learning_rate": 4.849160447761194e-05, + "loss": 0.0068, + "step": 12936 + }, + { + "epoch": 12.07, + "learning_rate": 4.8491138059701496e-05, + "loss": 0.005, + "step": 12940 + }, + { + "epoch": 12.07, + "learning_rate": 4.8490671641791044e-05, + "loss": 0.015, + "step": 12944 + }, + { + "epoch": 12.08, + "learning_rate": 4.84902052238806e-05, + "loss": 0.0009, + "step": 12948 + }, + { + "epoch": 12.08, + "learning_rate": 4.8489738805970154e-05, + "loss": 0.002, + "step": 12952 + }, + { + "epoch": 12.09, + "learning_rate": 4.84892723880597e-05, + "loss": 0.0019, + "step": 12956 + }, + { + "epoch": 12.09, + "learning_rate": 4.848880597014926e-05, + "loss": 0.0029, + "step": 12960 + }, + { + "epoch": 12.09, + "learning_rate": 4.848833955223881e-05, + "loss": 0.0012, + "step": 12964 + }, + { + "epoch": 12.1, + "learning_rate": 4.848787313432836e-05, + "loss": 0.0198, + "step": 12968 + }, + { + "epoch": 12.1, + "learning_rate": 4.848740671641791e-05, + "loss": 0.0013, + "step": 12972 + }, + { + "epoch": 12.1, + "learning_rate": 4.848694029850747e-05, + "loss": 0.0032, + "step": 12976 + }, + { + "epoch": 12.11, + "learning_rate": 4.848647388059702e-05, + "loss": 0.0141, + "step": 12980 + }, + { + "epoch": 12.11, + "learning_rate": 4.8486007462686566e-05, + "loss": 0.0038, + "step": 12984 + }, + { + "epoch": 12.12, + "learning_rate": 4.848554104477612e-05, + "loss": 0.0037, + "step": 12988 + }, + { + "epoch": 12.12, + "learning_rate": 4.8485074626865676e-05, + "loss": 0.0028, + "step": 12992 + }, + { + "epoch": 12.12, + "learning_rate": 4.848460820895523e-05, + "loss": 0.0139, + "step": 12996 + }, + { + "epoch": 12.13, + "learning_rate": 4.848414179104478e-05, + "loss": 0.0063, + "step": 13000 + }, + { + "epoch": 12.13, + "eval_exact_match": 0.730174081237911, + "eval_exec": 0.7572533849129593, + "eval_loss": 0.28556960821151733, + "eval_runtime": 1682.081, + "eval_samples_per_second": 0.615, + "step": 13000 + }, + { + "epoch": 12.13, + "learning_rate": 4.848367537313433e-05, + "loss": 0.0037, + "step": 13004 + }, + { + "epoch": 12.13, + "learning_rate": 4.848320895522389e-05, + "loss": 0.0027, + "step": 13008 + }, + { + "epoch": 12.14, + "learning_rate": 4.8482742537313437e-05, + "loss": 0.004, + "step": 13012 + }, + { + "epoch": 12.14, + "learning_rate": 4.8482276119402985e-05, + "loss": 0.0039, + "step": 13016 + }, + { + "epoch": 12.15, + "learning_rate": 4.848180970149254e-05, + "loss": 0.0056, + "step": 13020 + }, + { + "epoch": 12.15, + "learning_rate": 4.8481343283582094e-05, + "loss": 0.0067, + "step": 13024 + }, + { + "epoch": 12.15, + "learning_rate": 4.848087686567164e-05, + "loss": 0.0021, + "step": 13028 + }, + { + "epoch": 12.16, + "learning_rate": 4.84804104477612e-05, + "loss": 0.0053, + "step": 13032 + }, + { + "epoch": 12.16, + "learning_rate": 4.8479944029850746e-05, + "loss": 0.0016, + "step": 13036 + }, + { + "epoch": 12.16, + "learning_rate": 4.84794776119403e-05, + "loss": 0.0004, + "step": 13040 + }, + { + "epoch": 12.17, + "learning_rate": 4.8479011194029855e-05, + "loss": 0.0014, + "step": 13044 + }, + { + "epoch": 12.17, + "learning_rate": 4.8478544776119404e-05, + "loss": 0.0163, + "step": 13048 + }, + { + "epoch": 12.18, + "learning_rate": 4.847807835820896e-05, + "loss": 0.0108, + "step": 13052 + }, + { + "epoch": 12.18, + "learning_rate": 4.8477611940298513e-05, + "loss": 0.006, + "step": 13056 + }, + { + "epoch": 12.18, + "learning_rate": 4.847714552238806e-05, + "loss": 0.0039, + "step": 13060 + }, + { + "epoch": 12.19, + "learning_rate": 4.847667910447761e-05, + "loss": 0.002, + "step": 13064 + }, + { + "epoch": 12.19, + "learning_rate": 4.847621268656717e-05, + "loss": 0.0018, + "step": 13068 + }, + { + "epoch": 12.19, + "learning_rate": 4.847574626865672e-05, + "loss": 0.0013, + "step": 13072 + }, + { + "epoch": 12.2, + "learning_rate": 4.847527985074627e-05, + "loss": 0.0022, + "step": 13076 + }, + { + "epoch": 12.2, + "learning_rate": 4.847481343283582e-05, + "loss": 0.0071, + "step": 13080 + }, + { + "epoch": 12.21, + "learning_rate": 4.847434701492538e-05, + "loss": 0.0003, + "step": 13084 + }, + { + "epoch": 12.21, + "learning_rate": 4.8473880597014926e-05, + "loss": 0.0025, + "step": 13088 + }, + { + "epoch": 12.21, + "learning_rate": 4.847341417910448e-05, + "loss": 0.0033, + "step": 13092 + }, + { + "epoch": 12.22, + "learning_rate": 4.847294776119403e-05, + "loss": 0.001, + "step": 13096 + }, + { + "epoch": 12.22, + "learning_rate": 4.8472481343283584e-05, + "loss": 0.0064, + "step": 13100 + }, + { + "epoch": 12.22, + "learning_rate": 4.847201492537314e-05, + "loss": 0.0025, + "step": 13104 + }, + { + "epoch": 12.23, + "learning_rate": 4.8471548507462687e-05, + "loss": 0.0011, + "step": 13108 + }, + { + "epoch": 12.23, + "learning_rate": 4.847108208955224e-05, + "loss": 0.0005, + "step": 13112 + }, + { + "epoch": 12.24, + "learning_rate": 4.8470615671641796e-05, + "loss": 0.0029, + "step": 13116 + }, + { + "epoch": 12.24, + "learning_rate": 4.8470149253731344e-05, + "loss": 0.0021, + "step": 13120 + }, + { + "epoch": 12.24, + "learning_rate": 4.846968283582089e-05, + "loss": 0.0043, + "step": 13124 + }, + { + "epoch": 12.25, + "learning_rate": 4.8469216417910454e-05, + "loss": 0.0226, + "step": 13128 + }, + { + "epoch": 12.25, + "learning_rate": 4.846875e-05, + "loss": 0.0006, + "step": 13132 + }, + { + "epoch": 12.25, + "learning_rate": 4.846828358208955e-05, + "loss": 0.0005, + "step": 13136 + }, + { + "epoch": 12.26, + "learning_rate": 4.8467817164179105e-05, + "loss": 0.0057, + "step": 13140 + }, + { + "epoch": 12.26, + "learning_rate": 4.846735074626866e-05, + "loss": 0.006, + "step": 13144 + }, + { + "epoch": 12.26, + "learning_rate": 4.846688432835821e-05, + "loss": 0.0037, + "step": 13148 + }, + { + "epoch": 12.27, + "learning_rate": 4.8466417910447763e-05, + "loss": 0.0027, + "step": 13152 + }, + { + "epoch": 12.27, + "learning_rate": 4.846595149253731e-05, + "loss": 0.0009, + "step": 13156 + }, + { + "epoch": 12.28, + "learning_rate": 4.846548507462687e-05, + "loss": 0.0017, + "step": 13160 + }, + { + "epoch": 12.28, + "learning_rate": 4.846501865671642e-05, + "loss": 0.004, + "step": 13164 + }, + { + "epoch": 12.28, + "learning_rate": 4.846455223880597e-05, + "loss": 0.0093, + "step": 13168 + }, + { + "epoch": 12.29, + "learning_rate": 4.8464085820895524e-05, + "loss": 0.0036, + "step": 13172 + }, + { + "epoch": 12.29, + "learning_rate": 4.846361940298508e-05, + "loss": 0.0018, + "step": 13176 + }, + { + "epoch": 12.29, + "learning_rate": 4.846315298507463e-05, + "loss": 0.0036, + "step": 13180 + }, + { + "epoch": 12.3, + "learning_rate": 4.846268656716418e-05, + "loss": 0.0029, + "step": 13184 + }, + { + "epoch": 12.3, + "learning_rate": 4.846222014925374e-05, + "loss": 0.0133, + "step": 13188 + }, + { + "epoch": 12.31, + "learning_rate": 4.8461753731343285e-05, + "loss": 0.0097, + "step": 13192 + }, + { + "epoch": 12.31, + "learning_rate": 4.846128731343284e-05, + "loss": 0.0028, + "step": 13196 + }, + { + "epoch": 12.31, + "learning_rate": 4.846082089552239e-05, + "loss": 0.0009, + "step": 13200 + }, + { + "epoch": 12.32, + "learning_rate": 4.846035447761194e-05, + "loss": 0.0009, + "step": 13204 + }, + { + "epoch": 12.32, + "learning_rate": 4.84598880597015e-05, + "loss": 0.0101, + "step": 13208 + }, + { + "epoch": 12.32, + "learning_rate": 4.8459421641791046e-05, + "loss": 0.0021, + "step": 13212 + }, + { + "epoch": 12.33, + "learning_rate": 4.8458955223880595e-05, + "loss": 0.0066, + "step": 13216 + }, + { + "epoch": 12.33, + "learning_rate": 4.8458488805970156e-05, + "loss": 0.0027, + "step": 13220 + }, + { + "epoch": 12.34, + "learning_rate": 4.8458022388059704e-05, + "loss": 0.0007, + "step": 13224 + }, + { + "epoch": 12.34, + "learning_rate": 4.845755597014925e-05, + "loss": 0.0025, + "step": 13228 + }, + { + "epoch": 12.34, + "learning_rate": 4.845708955223881e-05, + "loss": 0.0007, + "step": 13232 + }, + { + "epoch": 12.35, + "learning_rate": 4.845662313432836e-05, + "loss": 0.003, + "step": 13236 + }, + { + "epoch": 12.35, + "learning_rate": 4.845615671641791e-05, + "loss": 0.0023, + "step": 13240 + }, + { + "epoch": 12.35, + "learning_rate": 4.8455690298507465e-05, + "loss": 0.0033, + "step": 13244 + }, + { + "epoch": 12.36, + "learning_rate": 4.845522388059702e-05, + "loss": 0.0045, + "step": 13248 + }, + { + "epoch": 12.36, + "learning_rate": 4.845475746268657e-05, + "loss": 0.0027, + "step": 13252 + }, + { + "epoch": 12.37, + "learning_rate": 4.845429104477612e-05, + "loss": 0.0039, + "step": 13256 + }, + { + "epoch": 12.37, + "learning_rate": 4.845382462686567e-05, + "loss": 0.0004, + "step": 13260 + }, + { + "epoch": 12.37, + "learning_rate": 4.8453358208955226e-05, + "loss": 0.0049, + "step": 13264 + }, + { + "epoch": 12.38, + "learning_rate": 4.845289179104478e-05, + "loss": 0.0062, + "step": 13268 + }, + { + "epoch": 12.38, + "learning_rate": 4.845242537313433e-05, + "loss": 0.0009, + "step": 13272 + }, + { + "epoch": 12.38, + "learning_rate": 4.845195895522388e-05, + "loss": 0.005, + "step": 13276 + }, + { + "epoch": 12.39, + "learning_rate": 4.845149253731344e-05, + "loss": 0.0025, + "step": 13280 + }, + { + "epoch": 12.39, + "learning_rate": 4.845102611940299e-05, + "loss": 0.0037, + "step": 13284 + }, + { + "epoch": 12.4, + "learning_rate": 4.8450559701492535e-05, + "loss": 0.0039, + "step": 13288 + }, + { + "epoch": 12.4, + "learning_rate": 4.845009328358209e-05, + "loss": 0.002, + "step": 13292 + }, + { + "epoch": 12.4, + "learning_rate": 4.8449626865671645e-05, + "loss": 0.0055, + "step": 13296 + }, + { + "epoch": 12.41, + "learning_rate": 4.844916044776119e-05, + "loss": 0.001, + "step": 13300 + }, + { + "epoch": 12.41, + "learning_rate": 4.844869402985075e-05, + "loss": 0.0008, + "step": 13304 + }, + { + "epoch": 12.41, + "learning_rate": 4.84482276119403e-05, + "loss": 0.0092, + "step": 13308 + }, + { + "epoch": 12.42, + "learning_rate": 4.844776119402985e-05, + "loss": 0.0028, + "step": 13312 + }, + { + "epoch": 12.42, + "learning_rate": 4.8447294776119406e-05, + "loss": 0.0009, + "step": 13316 + }, + { + "epoch": 12.43, + "learning_rate": 4.8446828358208954e-05, + "loss": 0.0088, + "step": 13320 + }, + { + "epoch": 12.43, + "learning_rate": 4.844636194029851e-05, + "loss": 0.0014, + "step": 13324 + }, + { + "epoch": 12.43, + "learning_rate": 4.8445895522388064e-05, + "loss": 0.0012, + "step": 13328 + }, + { + "epoch": 12.44, + "learning_rate": 4.844542910447761e-05, + "loss": 0.002, + "step": 13332 + }, + { + "epoch": 12.44, + "learning_rate": 4.844496268656717e-05, + "loss": 0.0031, + "step": 13336 + }, + { + "epoch": 12.44, + "learning_rate": 4.844449626865672e-05, + "loss": 0.0013, + "step": 13340 + }, + { + "epoch": 12.45, + "learning_rate": 4.844402985074627e-05, + "loss": 0.0059, + "step": 13344 + }, + { + "epoch": 12.45, + "learning_rate": 4.8443563432835825e-05, + "loss": 0.0025, + "step": 13348 + }, + { + "epoch": 12.46, + "learning_rate": 4.844309701492537e-05, + "loss": 0.0002, + "step": 13352 + }, + { + "epoch": 12.46, + "learning_rate": 4.844263059701493e-05, + "loss": 0.002, + "step": 13356 + }, + { + "epoch": 12.46, + "learning_rate": 4.844216417910448e-05, + "loss": 0.0007, + "step": 13360 + }, + { + "epoch": 12.47, + "learning_rate": 4.844169776119403e-05, + "loss": 0.009, + "step": 13364 + }, + { + "epoch": 12.47, + "learning_rate": 4.8441231343283586e-05, + "loss": 0.0065, + "step": 13368 + }, + { + "epoch": 12.47, + "learning_rate": 4.844076492537314e-05, + "loss": 0.0114, + "step": 13372 + }, + { + "epoch": 12.48, + "learning_rate": 4.844029850746269e-05, + "loss": 0.0021, + "step": 13376 + }, + { + "epoch": 12.48, + "learning_rate": 4.843983208955224e-05, + "loss": 0.0036, + "step": 13380 + }, + { + "epoch": 12.49, + "learning_rate": 4.843936567164179e-05, + "loss": 0.0027, + "step": 13384 + }, + { + "epoch": 12.49, + "learning_rate": 4.843889925373135e-05, + "loss": 0.0042, + "step": 13388 + }, + { + "epoch": 12.49, + "learning_rate": 4.8438432835820895e-05, + "loss": 0.0013, + "step": 13392 + }, + { + "epoch": 12.5, + "learning_rate": 4.843796641791045e-05, + "loss": 0.0008, + "step": 13396 + }, + { + "epoch": 12.5, + "learning_rate": 4.8437500000000005e-05, + "loss": 0.0047, + "step": 13400 + }, + { + "epoch": 12.5, + "learning_rate": 4.843703358208955e-05, + "loss": 0.0012, + "step": 13404 + }, + { + "epoch": 12.51, + "learning_rate": 4.843656716417911e-05, + "loss": 0.0024, + "step": 13408 + }, + { + "epoch": 12.51, + "learning_rate": 4.8436100746268656e-05, + "loss": 0.0128, + "step": 13412 + }, + { + "epoch": 12.51, + "learning_rate": 4.843563432835821e-05, + "loss": 0.0007, + "step": 13416 + }, + { + "epoch": 12.52, + "learning_rate": 4.8435167910447766e-05, + "loss": 0.0055, + "step": 13420 + }, + { + "epoch": 12.52, + "learning_rate": 4.8434701492537314e-05, + "loss": 0.0122, + "step": 13424 + }, + { + "epoch": 12.53, + "learning_rate": 4.843423507462687e-05, + "loss": 0.0029, + "step": 13428 + }, + { + "epoch": 12.53, + "learning_rate": 4.8433768656716424e-05, + "loss": 0.0019, + "step": 13432 + }, + { + "epoch": 12.53, + "learning_rate": 4.843330223880597e-05, + "loss": 0.0009, + "step": 13436 + }, + { + "epoch": 12.54, + "learning_rate": 4.843283582089552e-05, + "loss": 0.0006, + "step": 13440 + }, + { + "epoch": 12.54, + "learning_rate": 4.8432369402985075e-05, + "loss": 0.004, + "step": 13444 + }, + { + "epoch": 12.54, + "learning_rate": 4.843190298507463e-05, + "loss": 0.0022, + "step": 13448 + }, + { + "epoch": 12.55, + "learning_rate": 4.843143656716418e-05, + "loss": 0.005, + "step": 13452 + }, + { + "epoch": 12.55, + "learning_rate": 4.843097014925373e-05, + "loss": 0.0061, + "step": 13456 + }, + { + "epoch": 12.56, + "learning_rate": 4.843050373134329e-05, + "loss": 0.0062, + "step": 13460 + }, + { + "epoch": 12.56, + "learning_rate": 4.8430037313432836e-05, + "loss": 0.0023, + "step": 13464 + }, + { + "epoch": 12.56, + "learning_rate": 4.842957089552239e-05, + "loss": 0.0005, + "step": 13468 + }, + { + "epoch": 12.57, + "learning_rate": 4.842910447761194e-05, + "loss": 0.008, + "step": 13472 + }, + { + "epoch": 12.57, + "learning_rate": 4.8428638059701494e-05, + "loss": 0.0039, + "step": 13476 + }, + { + "epoch": 12.57, + "learning_rate": 4.842817164179105e-05, + "loss": 0.0017, + "step": 13480 + }, + { + "epoch": 12.58, + "learning_rate": 4.84277052238806e-05, + "loss": 0.0037, + "step": 13484 + }, + { + "epoch": 12.58, + "learning_rate": 4.842723880597015e-05, + "loss": 0.0091, + "step": 13488 + }, + { + "epoch": 12.59, + "learning_rate": 4.842677238805971e-05, + "loss": 0.0006, + "step": 13492 + }, + { + "epoch": 12.59, + "learning_rate": 4.8426305970149255e-05, + "loss": 0.0026, + "step": 13496 + }, + { + "epoch": 12.59, + "learning_rate": 4.842583955223881e-05, + "loss": 0.0058, + "step": 13500 + }, + { + "epoch": 12.59, + "eval_exact_match": 0.7156673114119922, + "eval_exec": 0.7562862669245648, + "eval_loss": 0.295368492603302, + "eval_runtime": 1541.5565, + "eval_samples_per_second": 0.671, + "step": 13500 + }, + { + "epoch": 12.6, + "learning_rate": 4.842537313432836e-05, + "loss": 0.0075, + "step": 13504 + }, + { + "epoch": 12.6, + "learning_rate": 4.842490671641791e-05, + "loss": 0.002, + "step": 13508 + }, + { + "epoch": 12.6, + "learning_rate": 4.842444029850747e-05, + "loss": 0.0156, + "step": 13512 + }, + { + "epoch": 12.61, + "learning_rate": 4.8423973880597016e-05, + "loss": 0.0082, + "step": 13516 + }, + { + "epoch": 12.61, + "learning_rate": 4.842350746268657e-05, + "loss": 0.0057, + "step": 13520 + }, + { + "epoch": 12.62, + "learning_rate": 4.8423041044776126e-05, + "loss": 0.0022, + "step": 13524 + }, + { + "epoch": 12.62, + "learning_rate": 4.8422574626865674e-05, + "loss": 0.0029, + "step": 13528 + }, + { + "epoch": 12.62, + "learning_rate": 4.842210820895522e-05, + "loss": 0.0014, + "step": 13532 + }, + { + "epoch": 12.63, + "learning_rate": 4.8421641791044784e-05, + "loss": 0.0013, + "step": 13536 + }, + { + "epoch": 12.63, + "learning_rate": 4.842117537313433e-05, + "loss": 0.002, + "step": 13540 + }, + { + "epoch": 12.63, + "learning_rate": 4.842070895522388e-05, + "loss": 0.0019, + "step": 13544 + }, + { + "epoch": 12.64, + "learning_rate": 4.8420242537313435e-05, + "loss": 0.0006, + "step": 13548 + }, + { + "epoch": 12.64, + "learning_rate": 4.841977611940299e-05, + "loss": 0.0011, + "step": 13552 + }, + { + "epoch": 12.65, + "learning_rate": 4.841930970149254e-05, + "loss": 0.0063, + "step": 13556 + }, + { + "epoch": 12.65, + "learning_rate": 4.841884328358209e-05, + "loss": 0.0033, + "step": 13560 + }, + { + "epoch": 12.65, + "learning_rate": 4.841837686567164e-05, + "loss": 0.0013, + "step": 13564 + }, + { + "epoch": 12.66, + "learning_rate": 4.8417910447761196e-05, + "loss": 0.0014, + "step": 13568 + }, + { + "epoch": 12.66, + "learning_rate": 4.841744402985075e-05, + "loss": 0.0036, + "step": 13572 + }, + { + "epoch": 12.66, + "learning_rate": 4.84169776119403e-05, + "loss": 0.0034, + "step": 13576 + }, + { + "epoch": 12.67, + "learning_rate": 4.8416511194029854e-05, + "loss": 0.0062, + "step": 13580 + }, + { + "epoch": 12.67, + "learning_rate": 4.841604477611941e-05, + "loss": 0.0032, + "step": 13584 + }, + { + "epoch": 12.68, + "learning_rate": 4.841557835820896e-05, + "loss": 0.005, + "step": 13588 + }, + { + "epoch": 12.68, + "learning_rate": 4.8415111940298505e-05, + "loss": 0.008, + "step": 13592 + }, + { + "epoch": 12.68, + "learning_rate": 4.841464552238807e-05, + "loss": 0.0038, + "step": 13596 + }, + { + "epoch": 12.69, + "learning_rate": 4.8414179104477615e-05, + "loss": 0.0033, + "step": 13600 + }, + { + "epoch": 12.69, + "learning_rate": 4.841371268656716e-05, + "loss": 0.0008, + "step": 13604 + }, + { + "epoch": 12.69, + "learning_rate": 4.841324626865672e-05, + "loss": 0.0041, + "step": 13608 + }, + { + "epoch": 12.7, + "learning_rate": 4.841277985074627e-05, + "loss": 0.006, + "step": 13612 + }, + { + "epoch": 12.7, + "learning_rate": 4.841231343283582e-05, + "loss": 0.0007, + "step": 13616 + }, + { + "epoch": 12.71, + "learning_rate": 4.8411847014925376e-05, + "loss": 0.0035, + "step": 13620 + }, + { + "epoch": 12.71, + "learning_rate": 4.8411380597014924e-05, + "loss": 0.0067, + "step": 13624 + }, + { + "epoch": 12.71, + "learning_rate": 4.841091417910448e-05, + "loss": 0.0056, + "step": 13628 + }, + { + "epoch": 12.72, + "learning_rate": 4.8410447761194034e-05, + "loss": 0.0396, + "step": 13632 + }, + { + "epoch": 12.72, + "learning_rate": 4.840998134328358e-05, + "loss": 0.0055, + "step": 13636 + }, + { + "epoch": 12.72, + "learning_rate": 4.840951492537314e-05, + "loss": 0.0048, + "step": 13640 + }, + { + "epoch": 12.73, + "learning_rate": 4.840904850746269e-05, + "loss": 0.0046, + "step": 13644 + }, + { + "epoch": 12.73, + "learning_rate": 4.840858208955224e-05, + "loss": 0.0037, + "step": 13648 + }, + { + "epoch": 12.73, + "learning_rate": 4.8408115671641795e-05, + "loss": 0.0061, + "step": 13652 + }, + { + "epoch": 12.74, + "learning_rate": 4.840764925373135e-05, + "loss": 0.0053, + "step": 13656 + }, + { + "epoch": 12.74, + "learning_rate": 4.84071828358209e-05, + "loss": 0.0048, + "step": 13660 + }, + { + "epoch": 12.75, + "learning_rate": 4.840671641791045e-05, + "loss": 0.0038, + "step": 13664 + }, + { + "epoch": 12.75, + "learning_rate": 4.840625e-05, + "loss": 0.0069, + "step": 13668 + }, + { + "epoch": 12.75, + "learning_rate": 4.8405783582089556e-05, + "loss": 0.0086, + "step": 13672 + }, + { + "epoch": 12.76, + "learning_rate": 4.840531716417911e-05, + "loss": 0.0017, + "step": 13676 + }, + { + "epoch": 12.76, + "learning_rate": 4.840485074626866e-05, + "loss": 0.0021, + "step": 13680 + }, + { + "epoch": 12.76, + "learning_rate": 4.840438432835821e-05, + "loss": 0.004, + "step": 13684 + }, + { + "epoch": 12.77, + "learning_rate": 4.840391791044777e-05, + "loss": 0.0007, + "step": 13688 + }, + { + "epoch": 12.77, + "learning_rate": 4.840345149253732e-05, + "loss": 0.0118, + "step": 13692 + }, + { + "epoch": 12.78, + "learning_rate": 4.8402985074626865e-05, + "loss": 0.0022, + "step": 13696 + }, + { + "epoch": 12.78, + "learning_rate": 4.840251865671642e-05, + "loss": 0.0033, + "step": 13700 + }, + { + "epoch": 12.78, + "learning_rate": 4.8402052238805975e-05, + "loss": 0.0048, + "step": 13704 + }, + { + "epoch": 12.79, + "learning_rate": 4.840158582089552e-05, + "loss": 0.004, + "step": 13708 + }, + { + "epoch": 12.79, + "learning_rate": 4.840111940298508e-05, + "loss": 0.0018, + "step": 13712 + }, + { + "epoch": 12.79, + "learning_rate": 4.8400652985074626e-05, + "loss": 0.0021, + "step": 13716 + }, + { + "epoch": 12.8, + "learning_rate": 4.840018656716418e-05, + "loss": 0.007, + "step": 13720 + }, + { + "epoch": 12.8, + "learning_rate": 4.8399720149253736e-05, + "loss": 0.0012, + "step": 13724 + }, + { + "epoch": 12.81, + "learning_rate": 4.8399253731343284e-05, + "loss": 0.0032, + "step": 13728 + }, + { + "epoch": 12.81, + "learning_rate": 4.839878731343284e-05, + "loss": 0.0031, + "step": 13732 + }, + { + "epoch": 12.81, + "learning_rate": 4.8398320895522394e-05, + "loss": 0.0064, + "step": 13736 + }, + { + "epoch": 12.82, + "learning_rate": 4.839785447761194e-05, + "loss": 0.0105, + "step": 13740 + }, + { + "epoch": 12.82, + "learning_rate": 4.839738805970149e-05, + "loss": 0.004, + "step": 13744 + }, + { + "epoch": 12.82, + "learning_rate": 4.839692164179105e-05, + "loss": 0.0012, + "step": 13748 + }, + { + "epoch": 12.83, + "learning_rate": 4.83964552238806e-05, + "loss": 0.0024, + "step": 13752 + }, + { + "epoch": 12.83, + "learning_rate": 4.839598880597015e-05, + "loss": 0.0046, + "step": 13756 + }, + { + "epoch": 12.84, + "learning_rate": 4.83955223880597e-05, + "loss": 0.003, + "step": 13760 + }, + { + "epoch": 12.84, + "learning_rate": 4.839505597014926e-05, + "loss": 0.0049, + "step": 13764 + }, + { + "epoch": 12.84, + "learning_rate": 4.8394589552238806e-05, + "loss": 0.0031, + "step": 13768 + }, + { + "epoch": 12.85, + "learning_rate": 4.839412313432836e-05, + "loss": 0.0045, + "step": 13772 + }, + { + "epoch": 12.85, + "learning_rate": 4.839365671641791e-05, + "loss": 0.0035, + "step": 13776 + }, + { + "epoch": 12.85, + "learning_rate": 4.8393190298507464e-05, + "loss": 0.0107, + "step": 13780 + }, + { + "epoch": 12.86, + "learning_rate": 4.839272388059702e-05, + "loss": 0.0014, + "step": 13784 + }, + { + "epoch": 12.86, + "learning_rate": 4.839225746268657e-05, + "loss": 0.0063, + "step": 13788 + }, + { + "epoch": 12.87, + "learning_rate": 4.839179104477612e-05, + "loss": 0.001, + "step": 13792 + }, + { + "epoch": 12.87, + "learning_rate": 4.839132462686568e-05, + "loss": 0.0022, + "step": 13796 + }, + { + "epoch": 12.87, + "learning_rate": 4.8390858208955225e-05, + "loss": 0.0021, + "step": 13800 + }, + { + "epoch": 12.88, + "learning_rate": 4.839039179104477e-05, + "loss": 0.0031, + "step": 13804 + }, + { + "epoch": 12.88, + "learning_rate": 4.8389925373134335e-05, + "loss": 0.0099, + "step": 13808 + }, + { + "epoch": 12.88, + "learning_rate": 4.838945895522388e-05, + "loss": 0.0008, + "step": 13812 + }, + { + "epoch": 12.89, + "learning_rate": 4.838899253731344e-05, + "loss": 0.009, + "step": 13816 + }, + { + "epoch": 12.89, + "learning_rate": 4.8388526119402986e-05, + "loss": 0.01, + "step": 13820 + }, + { + "epoch": 12.9, + "learning_rate": 4.838805970149254e-05, + "loss": 0.0051, + "step": 13824 + }, + { + "epoch": 12.9, + "learning_rate": 4.8387593283582096e-05, + "loss": 0.0097, + "step": 13828 + }, + { + "epoch": 12.9, + "learning_rate": 4.8387126865671644e-05, + "loss": 0.0004, + "step": 13832 + }, + { + "epoch": 12.91, + "learning_rate": 4.838666044776119e-05, + "loss": 0.001, + "step": 13836 + }, + { + "epoch": 12.91, + "learning_rate": 4.8386194029850754e-05, + "loss": 0.0049, + "step": 13840 + }, + { + "epoch": 12.91, + "learning_rate": 4.83857276119403e-05, + "loss": 0.0015, + "step": 13844 + }, + { + "epoch": 12.92, + "learning_rate": 4.838526119402985e-05, + "loss": 0.0044, + "step": 13848 + }, + { + "epoch": 12.92, + "learning_rate": 4.8384794776119405e-05, + "loss": 0.002, + "step": 13852 + }, + { + "epoch": 12.93, + "learning_rate": 4.838432835820896e-05, + "loss": 0.0018, + "step": 13856 + }, + { + "epoch": 12.93, + "learning_rate": 4.838386194029851e-05, + "loss": 0.0216, + "step": 13860 + }, + { + "epoch": 12.93, + "learning_rate": 4.838339552238806e-05, + "loss": 0.0043, + "step": 13864 + }, + { + "epoch": 12.94, + "learning_rate": 4.838292910447762e-05, + "loss": 0.0007, + "step": 13868 + }, + { + "epoch": 12.94, + "learning_rate": 4.8382462686567166e-05, + "loss": 0.0054, + "step": 13872 + }, + { + "epoch": 12.94, + "learning_rate": 4.838199626865672e-05, + "loss": 0.0009, + "step": 13876 + }, + { + "epoch": 12.95, + "learning_rate": 4.838152985074627e-05, + "loss": 0.0131, + "step": 13880 + }, + { + "epoch": 12.95, + "learning_rate": 4.8381063432835824e-05, + "loss": 0.0049, + "step": 13884 + }, + { + "epoch": 12.96, + "learning_rate": 4.838059701492538e-05, + "loss": 0.0013, + "step": 13888 + }, + { + "epoch": 12.96, + "learning_rate": 4.838013059701493e-05, + "loss": 0.0049, + "step": 13892 + }, + { + "epoch": 12.96, + "learning_rate": 4.8379664179104475e-05, + "loss": 0.004, + "step": 13896 + }, + { + "epoch": 12.97, + "learning_rate": 4.8379197761194036e-05, + "loss": 0.0028, + "step": 13900 + }, + { + "epoch": 12.97, + "learning_rate": 4.8378731343283585e-05, + "loss": 0.0072, + "step": 13904 + }, + { + "epoch": 12.97, + "learning_rate": 4.837826492537313e-05, + "loss": 0.0047, + "step": 13908 + }, + { + "epoch": 12.98, + "learning_rate": 4.837779850746269e-05, + "loss": 0.0005, + "step": 13912 + }, + { + "epoch": 12.98, + "learning_rate": 4.837733208955224e-05, + "loss": 0.0068, + "step": 13916 + }, + { + "epoch": 12.98, + "learning_rate": 4.837686567164179e-05, + "loss": 0.0106, + "step": 13920 + }, + { + "epoch": 12.99, + "learning_rate": 4.8376399253731346e-05, + "loss": 0.0077, + "step": 13924 + }, + { + "epoch": 12.99, + "learning_rate": 4.83759328358209e-05, + "loss": 0.003, + "step": 13928 + }, + { + "epoch": 13.0, + "learning_rate": 4.837546641791045e-05, + "loss": 0.0073, + "step": 13932 + }, + { + "epoch": 13.0, + "learning_rate": 4.8375000000000004e-05, + "loss": 0.0009, + "step": 13936 + }, + { + "epoch": 13.0, + "learning_rate": 4.837453358208955e-05, + "loss": 0.0026, + "step": 13940 + }, + { + "epoch": 13.01, + "learning_rate": 4.8374067164179107e-05, + "loss": 0.0009, + "step": 13944 + }, + { + "epoch": 13.01, + "learning_rate": 4.837360074626866e-05, + "loss": 0.0012, + "step": 13948 + }, + { + "epoch": 13.01, + "learning_rate": 4.837313432835821e-05, + "loss": 0.0035, + "step": 13952 + }, + { + "epoch": 13.02, + "learning_rate": 4.837266791044776e-05, + "loss": 0.0041, + "step": 13956 + }, + { + "epoch": 13.02, + "learning_rate": 4.837220149253732e-05, + "loss": 0.0033, + "step": 13960 + }, + { + "epoch": 13.03, + "learning_rate": 4.837173507462687e-05, + "loss": 0.0029, + "step": 13964 + }, + { + "epoch": 13.03, + "learning_rate": 4.8371268656716416e-05, + "loss": 0.0017, + "step": 13968 + }, + { + "epoch": 13.03, + "learning_rate": 4.837080223880597e-05, + "loss": 0.0047, + "step": 13972 + }, + { + "epoch": 13.04, + "learning_rate": 4.8370335820895526e-05, + "loss": 0.0019, + "step": 13976 + }, + { + "epoch": 13.04, + "learning_rate": 4.836986940298508e-05, + "loss": 0.0022, + "step": 13980 + }, + { + "epoch": 13.04, + "learning_rate": 4.836940298507463e-05, + "loss": 0.0007, + "step": 13984 + }, + { + "epoch": 13.05, + "learning_rate": 4.8368936567164183e-05, + "loss": 0.0045, + "step": 13988 + }, + { + "epoch": 13.05, + "learning_rate": 4.836847014925374e-05, + "loss": 0.0013, + "step": 13992 + }, + { + "epoch": 13.06, + "learning_rate": 4.8368003731343287e-05, + "loss": 0.0051, + "step": 13996 + }, + { + "epoch": 13.06, + "learning_rate": 4.8367537313432835e-05, + "loss": 0.0061, + "step": 14000 + }, + { + "epoch": 13.06, + "eval_exact_match": 0.7108317214700194, + "eval_exec": 0.7456479690522244, + "eval_loss": 0.29429176449775696, + "eval_runtime": 1353.2304, + "eval_samples_per_second": 0.764, + "step": 14000 + }, + { + "epoch": 13.06, + "learning_rate": 4.836707089552239e-05, + "loss": 0.0029, + "step": 14004 + }, + { + "epoch": 13.07, + "learning_rate": 4.8366604477611944e-05, + "loss": 0.0007, + "step": 14008 + }, + { + "epoch": 13.07, + "learning_rate": 4.836613805970149e-05, + "loss": 0.0118, + "step": 14012 + }, + { + "epoch": 13.07, + "learning_rate": 4.836567164179105e-05, + "loss": 0.0014, + "step": 14016 + }, + { + "epoch": 13.08, + "learning_rate": 4.83652052238806e-05, + "loss": 0.0061, + "step": 14020 + }, + { + "epoch": 13.08, + "learning_rate": 4.836473880597015e-05, + "loss": 0.0007, + "step": 14024 + }, + { + "epoch": 13.09, + "learning_rate": 4.8364272388059705e-05, + "loss": 0.0039, + "step": 14028 + }, + { + "epoch": 13.09, + "learning_rate": 4.8363805970149254e-05, + "loss": 0.0063, + "step": 14032 + }, + { + "epoch": 13.09, + "learning_rate": 4.836333955223881e-05, + "loss": 0.0004, + "step": 14036 + }, + { + "epoch": 13.1, + "learning_rate": 4.8362873134328363e-05, + "loss": 0.0054, + "step": 14040 + }, + { + "epoch": 13.1, + "learning_rate": 4.836240671641791e-05, + "loss": 0.0014, + "step": 14044 + }, + { + "epoch": 13.1, + "learning_rate": 4.8361940298507466e-05, + "loss": 0.002, + "step": 14048 + }, + { + "epoch": 13.11, + "learning_rate": 4.836147388059702e-05, + "loss": 0.006, + "step": 14052 + }, + { + "epoch": 13.11, + "learning_rate": 4.836100746268657e-05, + "loss": 0.0012, + "step": 14056 + }, + { + "epoch": 13.12, + "learning_rate": 4.836054104477612e-05, + "loss": 0.0007, + "step": 14060 + }, + { + "epoch": 13.12, + "learning_rate": 4.836007462686567e-05, + "loss": 0.0023, + "step": 14064 + }, + { + "epoch": 13.12, + "learning_rate": 4.835960820895523e-05, + "loss": 0.001, + "step": 14068 + }, + { + "epoch": 13.13, + "learning_rate": 4.8359141791044776e-05, + "loss": 0.0075, + "step": 14072 + }, + { + "epoch": 13.13, + "learning_rate": 4.835867537313433e-05, + "loss": 0.0018, + "step": 14076 + }, + { + "epoch": 13.13, + "learning_rate": 4.8358208955223885e-05, + "loss": 0.0024, + "step": 14080 + }, + { + "epoch": 13.14, + "learning_rate": 4.8357742537313433e-05, + "loss": 0.0012, + "step": 14084 + }, + { + "epoch": 13.14, + "learning_rate": 4.835727611940299e-05, + "loss": 0.002, + "step": 14088 + }, + { + "epoch": 13.15, + "learning_rate": 4.8356809701492537e-05, + "loss": 0.0019, + "step": 14092 + }, + { + "epoch": 13.15, + "learning_rate": 4.835634328358209e-05, + "loss": 0.0041, + "step": 14096 + }, + { + "epoch": 13.15, + "learning_rate": 4.8355876865671646e-05, + "loss": 0.006, + "step": 14100 + }, + { + "epoch": 13.16, + "learning_rate": 4.8355410447761194e-05, + "loss": 0.0048, + "step": 14104 + }, + { + "epoch": 13.16, + "learning_rate": 4.835494402985075e-05, + "loss": 0.0118, + "step": 14108 + }, + { + "epoch": 13.16, + "learning_rate": 4.8354477611940304e-05, + "loss": 0.0067, + "step": 14112 + }, + { + "epoch": 13.17, + "learning_rate": 4.835401119402985e-05, + "loss": 0.0075, + "step": 14116 + }, + { + "epoch": 13.17, + "learning_rate": 4.83535447761194e-05, + "loss": 0.0042, + "step": 14120 + }, + { + "epoch": 13.18, + "learning_rate": 4.8353078358208955e-05, + "loss": 0.0119, + "step": 14124 + }, + { + "epoch": 13.18, + "learning_rate": 4.835261194029851e-05, + "loss": 0.0032, + "step": 14128 + }, + { + "epoch": 13.18, + "learning_rate": 4.835214552238806e-05, + "loss": 0.0019, + "step": 14132 + }, + { + "epoch": 13.19, + "learning_rate": 4.8351679104477613e-05, + "loss": 0.0052, + "step": 14136 + }, + { + "epoch": 13.19, + "learning_rate": 4.835121268656717e-05, + "loss": 0.0137, + "step": 14140 + }, + { + "epoch": 13.19, + "learning_rate": 4.835074626865672e-05, + "loss": 0.0016, + "step": 14144 + }, + { + "epoch": 13.2, + "learning_rate": 4.835027985074627e-05, + "loss": 0.0104, + "step": 14148 + }, + { + "epoch": 13.2, + "learning_rate": 4.834981343283582e-05, + "loss": 0.0018, + "step": 14152 + }, + { + "epoch": 13.21, + "learning_rate": 4.834934701492538e-05, + "loss": 0.0009, + "step": 14156 + }, + { + "epoch": 13.21, + "learning_rate": 4.834888059701493e-05, + "loss": 0.0051, + "step": 14160 + }, + { + "epoch": 13.21, + "learning_rate": 4.834841417910448e-05, + "loss": 0.0082, + "step": 14164 + }, + { + "epoch": 13.22, + "learning_rate": 4.834794776119403e-05, + "loss": 0.0052, + "step": 14168 + }, + { + "epoch": 13.22, + "learning_rate": 4.834748134328359e-05, + "loss": 0.0013, + "step": 14172 + }, + { + "epoch": 13.22, + "learning_rate": 4.8347014925373135e-05, + "loss": 0.0037, + "step": 14176 + }, + { + "epoch": 13.23, + "learning_rate": 4.834654850746269e-05, + "loss": 0.0014, + "step": 14180 + }, + { + "epoch": 13.23, + "learning_rate": 4.834608208955224e-05, + "loss": 0.0066, + "step": 14184 + }, + { + "epoch": 13.24, + "learning_rate": 4.834561567164179e-05, + "loss": 0.0038, + "step": 14188 + }, + { + "epoch": 13.24, + "learning_rate": 4.834514925373135e-05, + "loss": 0.0055, + "step": 14192 + }, + { + "epoch": 13.24, + "learning_rate": 4.8344682835820896e-05, + "loss": 0.0034, + "step": 14196 + }, + { + "epoch": 13.25, + "learning_rate": 4.834421641791045e-05, + "loss": 0.0021, + "step": 14200 + }, + { + "epoch": 13.25, + "learning_rate": 4.8343750000000006e-05, + "loss": 0.006, + "step": 14204 + }, + { + "epoch": 13.25, + "learning_rate": 4.8343283582089554e-05, + "loss": 0.001, + "step": 14208 + }, + { + "epoch": 13.26, + "learning_rate": 4.83428171641791e-05, + "loss": 0.0006, + "step": 14212 + }, + { + "epoch": 13.26, + "learning_rate": 4.8342350746268664e-05, + "loss": 0.0009, + "step": 14216 + }, + { + "epoch": 13.26, + "learning_rate": 4.834188432835821e-05, + "loss": 0.0015, + "step": 14220 + }, + { + "epoch": 13.27, + "learning_rate": 4.834141791044776e-05, + "loss": 0.0008, + "step": 14224 + }, + { + "epoch": 13.27, + "learning_rate": 4.8340951492537315e-05, + "loss": 0.0043, + "step": 14228 + }, + { + "epoch": 13.28, + "learning_rate": 4.834048507462687e-05, + "loss": 0.0044, + "step": 14232 + }, + { + "epoch": 13.28, + "learning_rate": 4.834001865671642e-05, + "loss": 0.002, + "step": 14236 + }, + { + "epoch": 13.28, + "learning_rate": 4.833955223880597e-05, + "loss": 0.0025, + "step": 14240 + }, + { + "epoch": 13.29, + "learning_rate": 4.833908582089552e-05, + "loss": 0.007, + "step": 14244 + }, + { + "epoch": 13.29, + "learning_rate": 4.8338619402985076e-05, + "loss": 0.0061, + "step": 14248 + }, + { + "epoch": 13.29, + "learning_rate": 4.833815298507463e-05, + "loss": 0.0038, + "step": 14252 + }, + { + "epoch": 13.3, + "learning_rate": 4.833768656716418e-05, + "loss": 0.0027, + "step": 14256 + }, + { + "epoch": 13.3, + "learning_rate": 4.8337220149253734e-05, + "loss": 0.0008, + "step": 14260 + }, + { + "epoch": 13.31, + "learning_rate": 4.833675373134329e-05, + "loss": 0.0024, + "step": 14264 + }, + { + "epoch": 13.31, + "learning_rate": 4.833628731343284e-05, + "loss": 0.0032, + "step": 14268 + }, + { + "epoch": 13.31, + "learning_rate": 4.8335820895522385e-05, + "loss": 0.0079, + "step": 14272 + }, + { + "epoch": 13.32, + "learning_rate": 4.833535447761195e-05, + "loss": 0.0065, + "step": 14276 + }, + { + "epoch": 13.32, + "learning_rate": 4.8334888059701495e-05, + "loss": 0.0045, + "step": 14280 + }, + { + "epoch": 13.32, + "learning_rate": 4.833442164179104e-05, + "loss": 0.0006, + "step": 14284 + }, + { + "epoch": 13.33, + "learning_rate": 4.83339552238806e-05, + "loss": 0.0027, + "step": 14288 + }, + { + "epoch": 13.33, + "learning_rate": 4.833348880597015e-05, + "loss": 0.0089, + "step": 14292 + }, + { + "epoch": 13.34, + "learning_rate": 4.83330223880597e-05, + "loss": 0.0009, + "step": 14296 + }, + { + "epoch": 13.34, + "learning_rate": 4.8332555970149256e-05, + "loss": 0.0052, + "step": 14300 + }, + { + "epoch": 13.34, + "learning_rate": 4.8332089552238804e-05, + "loss": 0.0019, + "step": 14304 + }, + { + "epoch": 13.35, + "learning_rate": 4.8331623134328366e-05, + "loss": 0.0033, + "step": 14308 + }, + { + "epoch": 13.35, + "learning_rate": 4.8331156716417914e-05, + "loss": 0.0011, + "step": 14312 + }, + { + "epoch": 13.35, + "learning_rate": 4.833069029850746e-05, + "loss": 0.0017, + "step": 14316 + }, + { + "epoch": 13.36, + "learning_rate": 4.833022388059702e-05, + "loss": 0.0008, + "step": 14320 + }, + { + "epoch": 13.36, + "learning_rate": 4.832975746268657e-05, + "loss": 0.001, + "step": 14324 + }, + { + "epoch": 13.37, + "learning_rate": 4.832929104477612e-05, + "loss": 0.0044, + "step": 14328 + }, + { + "epoch": 13.37, + "learning_rate": 4.8328824626865675e-05, + "loss": 0.0029, + "step": 14332 + }, + { + "epoch": 13.37, + "learning_rate": 4.832835820895523e-05, + "loss": 0.0106, + "step": 14336 + }, + { + "epoch": 13.38, + "learning_rate": 4.832789179104478e-05, + "loss": 0.016, + "step": 14340 + }, + { + "epoch": 13.38, + "learning_rate": 4.832742537313433e-05, + "loss": 0.0078, + "step": 14344 + }, + { + "epoch": 13.38, + "learning_rate": 4.832695895522388e-05, + "loss": 0.0097, + "step": 14348 + }, + { + "epoch": 13.39, + "learning_rate": 4.8326492537313436e-05, + "loss": 0.0039, + "step": 14352 + }, + { + "epoch": 13.39, + "learning_rate": 4.832602611940299e-05, + "loss": 0.0068, + "step": 14356 + }, + { + "epoch": 13.4, + "learning_rate": 4.832555970149254e-05, + "loss": 0.0163, + "step": 14360 + }, + { + "epoch": 13.4, + "learning_rate": 4.832509328358209e-05, + "loss": 0.0011, + "step": 14364 + }, + { + "epoch": 13.4, + "learning_rate": 4.832462686567165e-05, + "loss": 0.0113, + "step": 14368 + }, + { + "epoch": 13.41, + "learning_rate": 4.83241604477612e-05, + "loss": 0.0027, + "step": 14372 + }, + { + "epoch": 13.41, + "learning_rate": 4.8323694029850745e-05, + "loss": 0.0008, + "step": 14376 + }, + { + "epoch": 13.41, + "learning_rate": 4.83232276119403e-05, + "loss": 0.003, + "step": 14380 + }, + { + "epoch": 13.42, + "learning_rate": 4.8322761194029855e-05, + "loss": 0.0014, + "step": 14384 + }, + { + "epoch": 13.42, + "learning_rate": 4.83222947761194e-05, + "loss": 0.0042, + "step": 14388 + }, + { + "epoch": 13.43, + "learning_rate": 4.832182835820896e-05, + "loss": 0.0017, + "step": 14392 + }, + { + "epoch": 13.43, + "learning_rate": 4.8321361940298506e-05, + "loss": 0.0041, + "step": 14396 + }, + { + "epoch": 13.43, + "learning_rate": 4.832089552238806e-05, + "loss": 0.0012, + "step": 14400 + }, + { + "epoch": 13.44, + "learning_rate": 4.8320429104477616e-05, + "loss": 0.0005, + "step": 14404 + }, + { + "epoch": 13.44, + "learning_rate": 4.8319962686567164e-05, + "loss": 0.0005, + "step": 14408 + }, + { + "epoch": 13.44, + "learning_rate": 4.831949626865672e-05, + "loss": 0.0031, + "step": 14412 + }, + { + "epoch": 13.45, + "learning_rate": 4.8319029850746274e-05, + "loss": 0.007, + "step": 14416 + }, + { + "epoch": 13.45, + "learning_rate": 4.831856343283582e-05, + "loss": 0.0006, + "step": 14420 + }, + { + "epoch": 13.46, + "learning_rate": 4.831809701492537e-05, + "loss": 0.0038, + "step": 14424 + }, + { + "epoch": 13.46, + "learning_rate": 4.831763059701493e-05, + "loss": 0.0033, + "step": 14428 + }, + { + "epoch": 13.46, + "learning_rate": 4.831716417910448e-05, + "loss": 0.0012, + "step": 14432 + }, + { + "epoch": 13.47, + "learning_rate": 4.831669776119403e-05, + "loss": 0.005, + "step": 14436 + }, + { + "epoch": 13.47, + "learning_rate": 4.831623134328358e-05, + "loss": 0.01, + "step": 14440 + }, + { + "epoch": 13.47, + "learning_rate": 4.831576492537314e-05, + "loss": 0.0034, + "step": 14444 + }, + { + "epoch": 13.48, + "learning_rate": 4.8315298507462686e-05, + "loss": 0.0139, + "step": 14448 + }, + { + "epoch": 13.48, + "learning_rate": 4.831483208955224e-05, + "loss": 0.0069, + "step": 14452 + }, + { + "epoch": 13.49, + "learning_rate": 4.831436567164179e-05, + "loss": 0.0012, + "step": 14456 + }, + { + "epoch": 13.49, + "learning_rate": 4.8313899253731344e-05, + "loss": 0.004, + "step": 14460 + }, + { + "epoch": 13.49, + "learning_rate": 4.83134328358209e-05, + "loss": 0.0009, + "step": 14464 + }, + { + "epoch": 13.5, + "learning_rate": 4.831296641791045e-05, + "loss": 0.0009, + "step": 14468 + }, + { + "epoch": 13.5, + "learning_rate": 4.83125e-05, + "loss": 0.0009, + "step": 14472 + }, + { + "epoch": 13.5, + "learning_rate": 4.831203358208956e-05, + "loss": 0.0011, + "step": 14476 + }, + { + "epoch": 13.51, + "learning_rate": 4.8311567164179105e-05, + "loss": 0.0003, + "step": 14480 + }, + { + "epoch": 13.51, + "learning_rate": 4.831110074626866e-05, + "loss": 0.0001, + "step": 14484 + }, + { + "epoch": 13.51, + "learning_rate": 4.8310634328358215e-05, + "loss": 0.0037, + "step": 14488 + }, + { + "epoch": 13.52, + "learning_rate": 4.831016791044776e-05, + "loss": 0.0035, + "step": 14492 + }, + { + "epoch": 13.52, + "learning_rate": 4.830970149253732e-05, + "loss": 0.0006, + "step": 14496 + }, + { + "epoch": 13.53, + "learning_rate": 4.8309235074626866e-05, + "loss": 0.0156, + "step": 14500 + }, + { + "epoch": 13.53, + "eval_exact_match": 0.7224371373307543, + "eval_exec": 0.7591876208897486, + "eval_loss": 0.2937206029891968, + "eval_runtime": 1346.8122, + "eval_samples_per_second": 0.768, + "step": 14500 + }, + { + "epoch": 13.53, + "learning_rate": 4.830876865671642e-05, + "loss": 0.0009, + "step": 14504 + }, + { + "epoch": 13.53, + "learning_rate": 4.8308302238805976e-05, + "loss": 0.0014, + "step": 14508 + }, + { + "epoch": 13.54, + "learning_rate": 4.8307835820895524e-05, + "loss": 0.0018, + "step": 14512 + }, + { + "epoch": 13.54, + "learning_rate": 4.830736940298507e-05, + "loss": 0.0028, + "step": 14516 + }, + { + "epoch": 13.54, + "learning_rate": 4.8306902985074634e-05, + "loss": 0.0014, + "step": 14520 + }, + { + "epoch": 13.55, + "learning_rate": 4.830643656716418e-05, + "loss": 0.0018, + "step": 14524 + }, + { + "epoch": 13.55, + "learning_rate": 4.830597014925373e-05, + "loss": 0.0011, + "step": 14528 + }, + { + "epoch": 13.56, + "learning_rate": 4.8305503731343285e-05, + "loss": 0.0041, + "step": 14532 + }, + { + "epoch": 13.56, + "learning_rate": 4.830503731343284e-05, + "loss": 0.0035, + "step": 14536 + }, + { + "epoch": 13.56, + "learning_rate": 4.830457089552239e-05, + "loss": 0.0096, + "step": 14540 + }, + { + "epoch": 13.57, + "learning_rate": 4.830410447761194e-05, + "loss": 0.0062, + "step": 14544 + }, + { + "epoch": 13.57, + "learning_rate": 4.83036380597015e-05, + "loss": 0.0106, + "step": 14548 + }, + { + "epoch": 13.57, + "learning_rate": 4.8303171641791046e-05, + "loss": 0.0049, + "step": 14552 + }, + { + "epoch": 13.58, + "learning_rate": 4.83027052238806e-05, + "loss": 0.0069, + "step": 14556 + }, + { + "epoch": 13.58, + "learning_rate": 4.830223880597015e-05, + "loss": 0.0088, + "step": 14560 + }, + { + "epoch": 13.59, + "learning_rate": 4.8301772388059704e-05, + "loss": 0.0012, + "step": 14564 + }, + { + "epoch": 13.59, + "learning_rate": 4.830130597014926e-05, + "loss": 0.0029, + "step": 14568 + }, + { + "epoch": 13.59, + "learning_rate": 4.830083955223881e-05, + "loss": 0.0005, + "step": 14572 + }, + { + "epoch": 13.6, + "learning_rate": 4.8300373134328355e-05, + "loss": 0.001, + "step": 14576 + }, + { + "epoch": 13.6, + "learning_rate": 4.829990671641792e-05, + "loss": 0.0041, + "step": 14580 + }, + { + "epoch": 13.6, + "learning_rate": 4.8299440298507465e-05, + "loss": 0.0037, + "step": 14584 + }, + { + "epoch": 13.61, + "learning_rate": 4.829897388059701e-05, + "loss": 0.0035, + "step": 14588 + }, + { + "epoch": 13.61, + "learning_rate": 4.829850746268657e-05, + "loss": 0.0068, + "step": 14592 + }, + { + "epoch": 13.62, + "learning_rate": 4.829804104477612e-05, + "loss": 0.0018, + "step": 14596 + }, + { + "epoch": 13.62, + "learning_rate": 4.829757462686567e-05, + "loss": 0.0029, + "step": 14600 + }, + { + "epoch": 13.62, + "learning_rate": 4.8297108208955226e-05, + "loss": 0.0013, + "step": 14604 + }, + { + "epoch": 13.63, + "learning_rate": 4.829664179104478e-05, + "loss": 0.0051, + "step": 14608 + }, + { + "epoch": 13.63, + "learning_rate": 4.829617537313433e-05, + "loss": 0.0012, + "step": 14612 + }, + { + "epoch": 13.63, + "learning_rate": 4.8295708955223884e-05, + "loss": 0.0021, + "step": 14616 + }, + { + "epoch": 13.64, + "learning_rate": 4.829524253731343e-05, + "loss": 0.0019, + "step": 14620 + }, + { + "epoch": 13.64, + "learning_rate": 4.829477611940299e-05, + "loss": 0.0023, + "step": 14624 + }, + { + "epoch": 13.65, + "learning_rate": 4.829430970149254e-05, + "loss": 0.0015, + "step": 14628 + }, + { + "epoch": 13.65, + "learning_rate": 4.829384328358209e-05, + "loss": 0.002, + "step": 14632 + }, + { + "epoch": 13.65, + "learning_rate": 4.8293376865671645e-05, + "loss": 0.0095, + "step": 14636 + }, + { + "epoch": 13.66, + "learning_rate": 4.82929104477612e-05, + "loss": 0.006, + "step": 14640 + }, + { + "epoch": 13.66, + "learning_rate": 4.829244402985075e-05, + "loss": 0.0024, + "step": 14644 + }, + { + "epoch": 13.66, + "learning_rate": 4.82919776119403e-05, + "loss": 0.0022, + "step": 14648 + }, + { + "epoch": 13.67, + "learning_rate": 4.829151119402985e-05, + "loss": 0.0016, + "step": 14652 + }, + { + "epoch": 13.67, + "learning_rate": 4.8291044776119406e-05, + "loss": 0.0011, + "step": 14656 + }, + { + "epoch": 13.68, + "learning_rate": 4.829057835820896e-05, + "loss": 0.001, + "step": 14660 + }, + { + "epoch": 13.68, + "learning_rate": 4.829011194029851e-05, + "loss": 0.0068, + "step": 14664 + }, + { + "epoch": 13.68, + "learning_rate": 4.8289645522388064e-05, + "loss": 0.0007, + "step": 14668 + }, + { + "epoch": 13.69, + "learning_rate": 4.828917910447762e-05, + "loss": 0.0044, + "step": 14672 + }, + { + "epoch": 13.69, + "learning_rate": 4.828871268656717e-05, + "loss": 0.0097, + "step": 14676 + }, + { + "epoch": 13.69, + "learning_rate": 4.8288246268656715e-05, + "loss": 0.0004, + "step": 14680 + }, + { + "epoch": 13.7, + "learning_rate": 4.828777985074627e-05, + "loss": 0.0077, + "step": 14684 + }, + { + "epoch": 13.7, + "learning_rate": 4.8287313432835825e-05, + "loss": 0.0011, + "step": 14688 + }, + { + "epoch": 13.71, + "learning_rate": 4.828684701492537e-05, + "loss": 0.0008, + "step": 14692 + }, + { + "epoch": 13.71, + "learning_rate": 4.828638059701493e-05, + "loss": 0.0111, + "step": 14696 + }, + { + "epoch": 13.71, + "learning_rate": 4.828591417910448e-05, + "loss": 0.0005, + "step": 14700 + }, + { + "epoch": 13.72, + "learning_rate": 4.828544776119403e-05, + "loss": 0.0037, + "step": 14704 + }, + { + "epoch": 13.72, + "learning_rate": 4.8284981343283586e-05, + "loss": 0.0024, + "step": 14708 + }, + { + "epoch": 13.72, + "learning_rate": 4.8284514925373134e-05, + "loss": 0.0024, + "step": 14712 + }, + { + "epoch": 13.73, + "learning_rate": 4.828404850746269e-05, + "loss": 0.0052, + "step": 14716 + }, + { + "epoch": 13.73, + "learning_rate": 4.8283582089552244e-05, + "loss": 0.0005, + "step": 14720 + }, + { + "epoch": 13.73, + "learning_rate": 4.828311567164179e-05, + "loss": 0.006, + "step": 14724 + }, + { + "epoch": 13.74, + "learning_rate": 4.828264925373135e-05, + "loss": 0.0023, + "step": 14728 + }, + { + "epoch": 13.74, + "learning_rate": 4.82821828358209e-05, + "loss": 0.0033, + "step": 14732 + }, + { + "epoch": 13.75, + "learning_rate": 4.828171641791045e-05, + "loss": 0.0012, + "step": 14736 + }, + { + "epoch": 13.75, + "learning_rate": 4.828125e-05, + "loss": 0.0076, + "step": 14740 + }, + { + "epoch": 13.75, + "learning_rate": 4.828078358208955e-05, + "loss": 0.0063, + "step": 14744 + }, + { + "epoch": 13.76, + "learning_rate": 4.828031716417911e-05, + "loss": 0.0086, + "step": 14748 + }, + { + "epoch": 13.76, + "learning_rate": 4.8279850746268656e-05, + "loss": 0.0102, + "step": 14752 + }, + { + "epoch": 13.76, + "learning_rate": 4.827938432835821e-05, + "loss": 0.0011, + "step": 14756 + }, + { + "epoch": 13.77, + "learning_rate": 4.8278917910447766e-05, + "loss": 0.0056, + "step": 14760 + }, + { + "epoch": 13.77, + "learning_rate": 4.8278451492537314e-05, + "loss": 0.0025, + "step": 14764 + }, + { + "epoch": 13.78, + "learning_rate": 4.827798507462687e-05, + "loss": 0.0076, + "step": 14768 + }, + { + "epoch": 13.78, + "learning_rate": 4.827751865671642e-05, + "loss": 0.0081, + "step": 14772 + }, + { + "epoch": 13.78, + "learning_rate": 4.827705223880597e-05, + "loss": 0.0039, + "step": 14776 + }, + { + "epoch": 13.79, + "learning_rate": 4.827658582089553e-05, + "loss": 0.0013, + "step": 14780 + }, + { + "epoch": 13.79, + "learning_rate": 4.8276119402985075e-05, + "loss": 0.0019, + "step": 14784 + }, + { + "epoch": 13.79, + "learning_rate": 4.827565298507463e-05, + "loss": 0.0004, + "step": 14788 + }, + { + "epoch": 13.8, + "learning_rate": 4.8275186567164185e-05, + "loss": 0.0035, + "step": 14792 + }, + { + "epoch": 13.8, + "learning_rate": 4.827472014925373e-05, + "loss": 0.0006, + "step": 14796 + }, + { + "epoch": 13.81, + "learning_rate": 4.827425373134329e-05, + "loss": 0.0058, + "step": 14800 + }, + { + "epoch": 13.81, + "learning_rate": 4.8273787313432836e-05, + "loss": 0.0014, + "step": 14804 + }, + { + "epoch": 13.81, + "learning_rate": 4.827332089552239e-05, + "loss": 0.0023, + "step": 14808 + }, + { + "epoch": 13.82, + "learning_rate": 4.8272854477611946e-05, + "loss": 0.0006, + "step": 14812 + }, + { + "epoch": 13.82, + "learning_rate": 4.8272388059701494e-05, + "loss": 0.0057, + "step": 14816 + }, + { + "epoch": 13.82, + "learning_rate": 4.827192164179105e-05, + "loss": 0.0039, + "step": 14820 + }, + { + "epoch": 13.83, + "learning_rate": 4.8271455223880604e-05, + "loss": 0.0086, + "step": 14824 + }, + { + "epoch": 13.83, + "learning_rate": 4.827098880597015e-05, + "loss": 0.0012, + "step": 14828 + }, + { + "epoch": 13.84, + "learning_rate": 4.82705223880597e-05, + "loss": 0.0007, + "step": 14832 + }, + { + "epoch": 13.84, + "learning_rate": 4.827005597014926e-05, + "loss": 0.0006, + "step": 14836 + }, + { + "epoch": 13.84, + "learning_rate": 4.826958955223881e-05, + "loss": 0.0049, + "step": 14840 + }, + { + "epoch": 13.85, + "learning_rate": 4.826912313432836e-05, + "loss": 0.0008, + "step": 14844 + }, + { + "epoch": 13.85, + "learning_rate": 4.826865671641791e-05, + "loss": 0.001, + "step": 14848 + }, + { + "epoch": 13.85, + "learning_rate": 4.826819029850747e-05, + "loss": 0.0003, + "step": 14852 + }, + { + "epoch": 13.86, + "learning_rate": 4.8267723880597016e-05, + "loss": 0.0022, + "step": 14856 + }, + { + "epoch": 13.86, + "learning_rate": 4.826725746268657e-05, + "loss": 0.0009, + "step": 14860 + }, + { + "epoch": 13.87, + "learning_rate": 4.826679104477612e-05, + "loss": 0.0005, + "step": 14864 + }, + { + "epoch": 13.87, + "learning_rate": 4.8266324626865674e-05, + "loss": 0.0038, + "step": 14868 + }, + { + "epoch": 13.87, + "learning_rate": 4.826585820895523e-05, + "loss": 0.002, + "step": 14872 + }, + { + "epoch": 13.88, + "learning_rate": 4.826539179104478e-05, + "loss": 0.01, + "step": 14876 + }, + { + "epoch": 13.88, + "learning_rate": 4.826492537313433e-05, + "loss": 0.006, + "step": 14880 + }, + { + "epoch": 13.88, + "learning_rate": 4.8264458955223886e-05, + "loss": 0.011, + "step": 14884 + }, + { + "epoch": 13.89, + "learning_rate": 4.8263992537313435e-05, + "loss": 0.0027, + "step": 14888 + }, + { + "epoch": 13.89, + "learning_rate": 4.826352611940298e-05, + "loss": 0.0025, + "step": 14892 + }, + { + "epoch": 13.9, + "learning_rate": 4.8263059701492544e-05, + "loss": 0.0008, + "step": 14896 + }, + { + "epoch": 13.9, + "learning_rate": 4.826259328358209e-05, + "loss": 0.0017, + "step": 14900 + }, + { + "epoch": 13.9, + "learning_rate": 4.826212686567164e-05, + "loss": 0.001, + "step": 14904 + }, + { + "epoch": 13.91, + "learning_rate": 4.8261660447761196e-05, + "loss": 0.0004, + "step": 14908 + }, + { + "epoch": 13.91, + "learning_rate": 4.826119402985075e-05, + "loss": 0.0025, + "step": 14912 + }, + { + "epoch": 13.91, + "learning_rate": 4.82607276119403e-05, + "loss": 0.0011, + "step": 14916 + }, + { + "epoch": 13.92, + "learning_rate": 4.8260261194029854e-05, + "loss": 0.0007, + "step": 14920 + }, + { + "epoch": 13.92, + "learning_rate": 4.82597947761194e-05, + "loss": 0.0015, + "step": 14924 + }, + { + "epoch": 13.93, + "learning_rate": 4.8259328358208957e-05, + "loss": 0.0101, + "step": 14928 + }, + { + "epoch": 13.93, + "learning_rate": 4.825886194029851e-05, + "loss": 0.0003, + "step": 14932 + }, + { + "epoch": 13.93, + "learning_rate": 4.825839552238806e-05, + "loss": 0.0052, + "step": 14936 + }, + { + "epoch": 13.94, + "learning_rate": 4.8257929104477615e-05, + "loss": 0.0044, + "step": 14940 + }, + { + "epoch": 13.94, + "learning_rate": 4.825746268656717e-05, + "loss": 0.0005, + "step": 14944 + }, + { + "epoch": 13.94, + "learning_rate": 4.825699626865672e-05, + "loss": 0.0054, + "step": 14948 + }, + { + "epoch": 13.95, + "learning_rate": 4.8256529850746266e-05, + "loss": 0.0016, + "step": 14952 + }, + { + "epoch": 13.95, + "learning_rate": 4.825606343283583e-05, + "loss": 0.0017, + "step": 14956 + }, + { + "epoch": 13.96, + "learning_rate": 4.8255597014925376e-05, + "loss": 0.0065, + "step": 14960 + }, + { + "epoch": 13.96, + "learning_rate": 4.825513059701493e-05, + "loss": 0.001, + "step": 14964 + }, + { + "epoch": 13.96, + "learning_rate": 4.825466417910448e-05, + "loss": 0.012, + "step": 14968 + }, + { + "epoch": 13.97, + "learning_rate": 4.8254197761194033e-05, + "loss": 0.0023, + "step": 14972 + }, + { + "epoch": 13.97, + "learning_rate": 4.825373134328359e-05, + "loss": 0.0053, + "step": 14976 + }, + { + "epoch": 13.97, + "learning_rate": 4.8253264925373136e-05, + "loss": 0.0021, + "step": 14980 + }, + { + "epoch": 13.98, + "learning_rate": 4.8252798507462685e-05, + "loss": 0.0019, + "step": 14984 + }, + { + "epoch": 13.98, + "learning_rate": 4.8252332089552246e-05, + "loss": 0.002, + "step": 14988 + }, + { + "epoch": 13.98, + "learning_rate": 4.8251865671641794e-05, + "loss": 0.0069, + "step": 14992 + }, + { + "epoch": 13.99, + "learning_rate": 4.825139925373134e-05, + "loss": 0.0035, + "step": 14996 + }, + { + "epoch": 13.99, + "learning_rate": 4.82509328358209e-05, + "loss": 0.0026, + "step": 15000 + }, + { + "epoch": 13.99, + "eval_exact_match": 0.7156673114119922, + "eval_exec": 0.7524177949709865, + "eval_loss": 0.31172001361846924, + "eval_runtime": 1126.7509, + "eval_samples_per_second": 0.918, + "step": 15000 + }, + { + "epoch": 14.0, + "learning_rate": 4.825046641791045e-05, + "loss": 0.0005, + "step": 15004 + }, + { + "epoch": 14.0, + "learning_rate": 4.825e-05, + "loss": 0.0089, + "step": 15008 + }, + { + "epoch": 14.0, + "learning_rate": 4.8249533582089555e-05, + "loss": 0.0092, + "step": 15012 + }, + { + "epoch": 14.01, + "learning_rate": 4.824906716417911e-05, + "loss": 0.001, + "step": 15016 + }, + { + "epoch": 14.01, + "learning_rate": 4.824860074626866e-05, + "loss": 0.0052, + "step": 15020 + }, + { + "epoch": 14.01, + "learning_rate": 4.824813432835821e-05, + "loss": 0.0005, + "step": 15024 + }, + { + "epoch": 14.02, + "learning_rate": 4.824766791044776e-05, + "loss": 0.0014, + "step": 15028 + }, + { + "epoch": 14.02, + "learning_rate": 4.8247201492537316e-05, + "loss": 0.0028, + "step": 15032 + }, + { + "epoch": 14.03, + "learning_rate": 4.824673507462687e-05, + "loss": 0.0008, + "step": 15036 + }, + { + "epoch": 14.03, + "learning_rate": 4.824626865671642e-05, + "loss": 0.0031, + "step": 15040 + }, + { + "epoch": 14.03, + "learning_rate": 4.824580223880597e-05, + "loss": 0.0055, + "step": 15044 + }, + { + "epoch": 14.04, + "learning_rate": 4.824533582089553e-05, + "loss": 0.0049, + "step": 15048 + }, + { + "epoch": 14.04, + "learning_rate": 4.824486940298508e-05, + "loss": 0.0045, + "step": 15052 + }, + { + "epoch": 14.04, + "learning_rate": 4.8244402985074626e-05, + "loss": 0.0033, + "step": 15056 + }, + { + "epoch": 14.05, + "learning_rate": 4.824393656716418e-05, + "loss": 0.0015, + "step": 15060 + }, + { + "epoch": 14.05, + "learning_rate": 4.8243470149253735e-05, + "loss": 0.0027, + "step": 15064 + }, + { + "epoch": 14.06, + "learning_rate": 4.8243003731343283e-05, + "loss": 0.0007, + "step": 15068 + }, + { + "epoch": 14.06, + "learning_rate": 4.824253731343284e-05, + "loss": 0.001, + "step": 15072 + }, + { + "epoch": 14.06, + "learning_rate": 4.8242070895522387e-05, + "loss": 0.001, + "step": 15076 + }, + { + "epoch": 14.07, + "learning_rate": 4.824160447761194e-05, + "loss": 0.0022, + "step": 15080 + }, + { + "epoch": 14.07, + "learning_rate": 4.8241138059701496e-05, + "loss": 0.0005, + "step": 15084 + }, + { + "epoch": 14.07, + "learning_rate": 4.8240671641791044e-05, + "loss": 0.0004, + "step": 15088 + }, + { + "epoch": 14.08, + "learning_rate": 4.82402052238806e-05, + "loss": 0.0027, + "step": 15092 + }, + { + "epoch": 14.08, + "learning_rate": 4.8239738805970154e-05, + "loss": 0.0034, + "step": 15096 + }, + { + "epoch": 14.09, + "learning_rate": 4.82392723880597e-05, + "loss": 0.0063, + "step": 15100 + }, + { + "epoch": 14.09, + "learning_rate": 4.823880597014925e-05, + "loss": 0.0004, + "step": 15104 + }, + { + "epoch": 14.09, + "learning_rate": 4.823833955223881e-05, + "loss": 0.0002, + "step": 15108 + }, + { + "epoch": 14.1, + "learning_rate": 4.823787313432836e-05, + "loss": 0.0029, + "step": 15112 + }, + { + "epoch": 14.1, + "learning_rate": 4.8237406716417915e-05, + "loss": 0.0011, + "step": 15116 + }, + { + "epoch": 14.1, + "learning_rate": 4.8236940298507463e-05, + "loss": 0.0043, + "step": 15120 + }, + { + "epoch": 14.11, + "learning_rate": 4.823647388059702e-05, + "loss": 0.0084, + "step": 15124 + }, + { + "epoch": 14.11, + "learning_rate": 4.823600746268657e-05, + "loss": 0.0062, + "step": 15128 + }, + { + "epoch": 14.12, + "learning_rate": 4.823554104477612e-05, + "loss": 0.0088, + "step": 15132 + }, + { + "epoch": 14.12, + "learning_rate": 4.823507462686567e-05, + "loss": 0.0005, + "step": 15136 + }, + { + "epoch": 14.12, + "learning_rate": 4.823460820895523e-05, + "loss": 0.0078, + "step": 15140 + }, + { + "epoch": 14.13, + "learning_rate": 4.823414179104478e-05, + "loss": 0.0007, + "step": 15144 + }, + { + "epoch": 14.13, + "learning_rate": 4.823367537313433e-05, + "loss": 0.0056, + "step": 15148 + }, + { + "epoch": 14.13, + "learning_rate": 4.823320895522388e-05, + "loss": 0.0005, + "step": 15152 + }, + { + "epoch": 14.14, + "learning_rate": 4.823274253731344e-05, + "loss": 0.0117, + "step": 15156 + }, + { + "epoch": 14.14, + "learning_rate": 4.8232276119402985e-05, + "loss": 0.0032, + "step": 15160 + }, + { + "epoch": 14.15, + "learning_rate": 4.823180970149254e-05, + "loss": 0.0032, + "step": 15164 + }, + { + "epoch": 14.15, + "learning_rate": 4.8231343283582095e-05, + "loss": 0.0061, + "step": 15168 + }, + { + "epoch": 14.15, + "learning_rate": 4.823087686567164e-05, + "loss": 0.0018, + "step": 15172 + }, + { + "epoch": 14.16, + "learning_rate": 4.82304104477612e-05, + "loss": 0.0101, + "step": 15176 + }, + { + "epoch": 14.16, + "learning_rate": 4.8229944029850746e-05, + "loss": 0.0014, + "step": 15180 + }, + { + "epoch": 14.16, + "learning_rate": 4.82294776119403e-05, + "loss": 0.0058, + "step": 15184 + }, + { + "epoch": 14.17, + "learning_rate": 4.8229011194029856e-05, + "loss": 0.0036, + "step": 15188 + }, + { + "epoch": 14.17, + "learning_rate": 4.8228544776119404e-05, + "loss": 0.0013, + "step": 15192 + }, + { + "epoch": 14.18, + "learning_rate": 4.822807835820895e-05, + "loss": 0.0029, + "step": 15196 + }, + { + "epoch": 14.18, + "learning_rate": 4.8227611940298514e-05, + "loss": 0.0063, + "step": 15200 + }, + { + "epoch": 14.18, + "learning_rate": 4.822714552238806e-05, + "loss": 0.0028, + "step": 15204 + }, + { + "epoch": 14.19, + "learning_rate": 4.822667910447761e-05, + "loss": 0.0006, + "step": 15208 + }, + { + "epoch": 14.19, + "learning_rate": 4.8226212686567165e-05, + "loss": 0.0014, + "step": 15212 + }, + { + "epoch": 14.19, + "learning_rate": 4.822574626865672e-05, + "loss": 0.0005, + "step": 15216 + }, + { + "epoch": 14.2, + "learning_rate": 4.822527985074627e-05, + "loss": 0.0019, + "step": 15220 + }, + { + "epoch": 14.2, + "learning_rate": 4.822481343283582e-05, + "loss": 0.0003, + "step": 15224 + }, + { + "epoch": 14.21, + "learning_rate": 4.822434701492538e-05, + "loss": 0.001, + "step": 15228 + }, + { + "epoch": 14.21, + "learning_rate": 4.8223880597014926e-05, + "loss": 0.0025, + "step": 15232 + }, + { + "epoch": 14.21, + "learning_rate": 4.822341417910448e-05, + "loss": 0.0007, + "step": 15236 + }, + { + "epoch": 14.22, + "learning_rate": 4.822294776119403e-05, + "loss": 0.0043, + "step": 15240 + }, + { + "epoch": 14.22, + "learning_rate": 4.8222481343283584e-05, + "loss": 0.0014, + "step": 15244 + }, + { + "epoch": 14.22, + "learning_rate": 4.822201492537314e-05, + "loss": 0.0025, + "step": 15248 + }, + { + "epoch": 14.23, + "learning_rate": 4.822154850746269e-05, + "loss": 0.0002, + "step": 15252 + }, + { + "epoch": 14.23, + "learning_rate": 4.8221082089552235e-05, + "loss": 0.0012, + "step": 15256 + }, + { + "epoch": 14.24, + "learning_rate": 4.82206156716418e-05, + "loss": 0.0026, + "step": 15260 + }, + { + "epoch": 14.24, + "learning_rate": 4.8220149253731345e-05, + "loss": 0.0005, + "step": 15264 + }, + { + "epoch": 14.24, + "learning_rate": 4.821968283582089e-05, + "loss": 0.0004, + "step": 15268 + }, + { + "epoch": 14.25, + "learning_rate": 4.821921641791045e-05, + "loss": 0.0002, + "step": 15272 + }, + { + "epoch": 14.25, + "learning_rate": 4.821875e-05, + "loss": 0.0069, + "step": 15276 + }, + { + "epoch": 14.25, + "learning_rate": 4.821828358208956e-05, + "loss": 0.0019, + "step": 15280 + }, + { + "epoch": 14.26, + "learning_rate": 4.8217817164179106e-05, + "loss": 0.0076, + "step": 15284 + }, + { + "epoch": 14.26, + "learning_rate": 4.821735074626866e-05, + "loss": 0.0015, + "step": 15288 + }, + { + "epoch": 14.26, + "learning_rate": 4.8216884328358216e-05, + "loss": 0.0013, + "step": 15292 + }, + { + "epoch": 14.27, + "learning_rate": 4.8216417910447764e-05, + "loss": 0.0014, + "step": 15296 + }, + { + "epoch": 14.27, + "learning_rate": 4.821595149253731e-05, + "loss": 0.0048, + "step": 15300 + }, + { + "epoch": 14.28, + "learning_rate": 4.821548507462687e-05, + "loss": 0.002, + "step": 15304 + }, + { + "epoch": 14.28, + "learning_rate": 4.821501865671642e-05, + "loss": 0.0023, + "step": 15308 + }, + { + "epoch": 14.28, + "learning_rate": 4.821455223880597e-05, + "loss": 0.0034, + "step": 15312 + }, + { + "epoch": 14.29, + "learning_rate": 4.8214085820895525e-05, + "loss": 0.0047, + "step": 15316 + }, + { + "epoch": 14.29, + "learning_rate": 4.821361940298508e-05, + "loss": 0.0092, + "step": 15320 + }, + { + "epoch": 14.29, + "learning_rate": 4.821315298507463e-05, + "loss": 0.0144, + "step": 15324 + }, + { + "epoch": 14.3, + "learning_rate": 4.821268656716418e-05, + "loss": 0.0068, + "step": 15328 + }, + { + "epoch": 14.3, + "learning_rate": 4.821222014925373e-05, + "loss": 0.0042, + "step": 15332 + }, + { + "epoch": 14.31, + "learning_rate": 4.8211753731343286e-05, + "loss": 0.0021, + "step": 15336 + }, + { + "epoch": 14.31, + "learning_rate": 4.821128731343284e-05, + "loss": 0.004, + "step": 15340 + }, + { + "epoch": 14.31, + "learning_rate": 4.821082089552239e-05, + "loss": 0.0018, + "step": 15344 + }, + { + "epoch": 14.32, + "learning_rate": 4.8210354477611944e-05, + "loss": 0.0032, + "step": 15348 + }, + { + "epoch": 14.32, + "learning_rate": 4.82098880597015e-05, + "loss": 0.0047, + "step": 15352 + }, + { + "epoch": 14.32, + "learning_rate": 4.820942164179105e-05, + "loss": 0.0024, + "step": 15356 + }, + { + "epoch": 14.33, + "learning_rate": 4.8208955223880595e-05, + "loss": 0.0012, + "step": 15360 + }, + { + "epoch": 14.33, + "learning_rate": 4.820848880597015e-05, + "loss": 0.0011, + "step": 15364 + }, + { + "epoch": 14.34, + "learning_rate": 4.8208022388059705e-05, + "loss": 0.0041, + "step": 15368 + }, + { + "epoch": 14.34, + "learning_rate": 4.820755597014925e-05, + "loss": 0.0026, + "step": 15372 + }, + { + "epoch": 14.34, + "learning_rate": 4.820708955223881e-05, + "loss": 0.0012, + "step": 15376 + }, + { + "epoch": 14.35, + "learning_rate": 4.820662313432836e-05, + "loss": 0.0038, + "step": 15380 + }, + { + "epoch": 14.35, + "learning_rate": 4.820615671641791e-05, + "loss": 0.0011, + "step": 15384 + }, + { + "epoch": 14.35, + "learning_rate": 4.8205690298507466e-05, + "loss": 0.0007, + "step": 15388 + }, + { + "epoch": 14.36, + "learning_rate": 4.8205223880597014e-05, + "loss": 0.0003, + "step": 15392 + }, + { + "epoch": 14.36, + "learning_rate": 4.820475746268657e-05, + "loss": 0.0002, + "step": 15396 + }, + { + "epoch": 14.37, + "learning_rate": 4.8204291044776124e-05, + "loss": 0.0008, + "step": 15400 + }, + { + "epoch": 14.37, + "learning_rate": 4.820382462686567e-05, + "loss": 0.0011, + "step": 15404 + }, + { + "epoch": 14.37, + "learning_rate": 4.820335820895523e-05, + "loss": 0.0026, + "step": 15408 + }, + { + "epoch": 14.38, + "learning_rate": 4.820289179104478e-05, + "loss": 0.0019, + "step": 15412 + }, + { + "epoch": 14.38, + "learning_rate": 4.820242537313433e-05, + "loss": 0.0036, + "step": 15416 + }, + { + "epoch": 14.38, + "learning_rate": 4.820195895522388e-05, + "loss": 0.003, + "step": 15420 + }, + { + "epoch": 14.39, + "learning_rate": 4.820149253731343e-05, + "loss": 0.0109, + "step": 15424 + }, + { + "epoch": 14.39, + "learning_rate": 4.820102611940299e-05, + "loss": 0.0034, + "step": 15428 + }, + { + "epoch": 14.4, + "learning_rate": 4.8200559701492536e-05, + "loss": 0.0005, + "step": 15432 + }, + { + "epoch": 14.4, + "learning_rate": 4.820009328358209e-05, + "loss": 0.0084, + "step": 15436 + }, + { + "epoch": 14.4, + "learning_rate": 4.8199626865671646e-05, + "loss": 0.0008, + "step": 15440 + }, + { + "epoch": 14.41, + "learning_rate": 4.81991604477612e-05, + "loss": 0.009, + "step": 15444 + }, + { + "epoch": 14.41, + "learning_rate": 4.819869402985075e-05, + "loss": 0.0021, + "step": 15448 + }, + { + "epoch": 14.41, + "learning_rate": 4.81982276119403e-05, + "loss": 0.0029, + "step": 15452 + }, + { + "epoch": 14.42, + "learning_rate": 4.819776119402986e-05, + "loss": 0.0013, + "step": 15456 + }, + { + "epoch": 14.42, + "learning_rate": 4.819729477611941e-05, + "loss": 0.0014, + "step": 15460 + }, + { + "epoch": 14.43, + "learning_rate": 4.8196828358208955e-05, + "loss": 0.0073, + "step": 15464 + }, + { + "epoch": 14.43, + "learning_rate": 4.819636194029851e-05, + "loss": 0.0027, + "step": 15468 + }, + { + "epoch": 14.43, + "learning_rate": 4.8195895522388065e-05, + "loss": 0.0027, + "step": 15472 + }, + { + "epoch": 14.44, + "learning_rate": 4.819542910447761e-05, + "loss": 0.0005, + "step": 15476 + }, + { + "epoch": 14.44, + "learning_rate": 4.819496268656717e-05, + "loss": 0.0028, + "step": 15480 + }, + { + "epoch": 14.44, + "learning_rate": 4.8194496268656716e-05, + "loss": 0.0005, + "step": 15484 + }, + { + "epoch": 14.45, + "learning_rate": 4.819402985074627e-05, + "loss": 0.0063, + "step": 15488 + }, + { + "epoch": 14.45, + "learning_rate": 4.8193563432835826e-05, + "loss": 0.0037, + "step": 15492 + }, + { + "epoch": 14.46, + "learning_rate": 4.8193097014925374e-05, + "loss": 0.0025, + "step": 15496 + }, + { + "epoch": 14.46, + "learning_rate": 4.819263059701493e-05, + "loss": 0.0044, + "step": 15500 + }, + { + "epoch": 14.46, + "eval_exact_match": 0.7340425531914894, + "eval_exec": 0.7727272727272727, + "eval_loss": 0.30817216634750366, + "eval_runtime": 1067.8388, + "eval_samples_per_second": 0.968, + "step": 15500 + }, + { + "epoch": 14.46, + "learning_rate": 4.8192164179104484e-05, + "loss": 0.0109, + "step": 15504 + }, + { + "epoch": 14.47, + "learning_rate": 4.819169776119403e-05, + "loss": 0.0041, + "step": 15508 + }, + { + "epoch": 14.47, + "learning_rate": 4.819123134328358e-05, + "loss": 0.0005, + "step": 15512 + }, + { + "epoch": 14.47, + "learning_rate": 4.819076492537314e-05, + "loss": 0.0005, + "step": 15516 + }, + { + "epoch": 14.48, + "learning_rate": 4.819029850746269e-05, + "loss": 0.0044, + "step": 15520 + }, + { + "epoch": 14.48, + "learning_rate": 4.818983208955224e-05, + "loss": 0.0005, + "step": 15524 + }, + { + "epoch": 14.49, + "learning_rate": 4.818936567164179e-05, + "loss": 0.0006, + "step": 15528 + }, + { + "epoch": 14.49, + "learning_rate": 4.818889925373135e-05, + "loss": 0.0026, + "step": 15532 + }, + { + "epoch": 14.49, + "learning_rate": 4.8188432835820896e-05, + "loss": 0.0042, + "step": 15536 + }, + { + "epoch": 14.5, + "learning_rate": 4.818796641791045e-05, + "loss": 0.0017, + "step": 15540 + }, + { + "epoch": 14.5, + "learning_rate": 4.81875e-05, + "loss": 0.0013, + "step": 15544 + }, + { + "epoch": 14.5, + "learning_rate": 4.8187033582089554e-05, + "loss": 0.0041, + "step": 15548 + }, + { + "epoch": 14.51, + "learning_rate": 4.818656716417911e-05, + "loss": 0.0098, + "step": 15552 + }, + { + "epoch": 14.51, + "learning_rate": 4.818610074626866e-05, + "loss": 0.0025, + "step": 15556 + }, + { + "epoch": 14.51, + "learning_rate": 4.818563432835821e-05, + "loss": 0.0019, + "step": 15560 + }, + { + "epoch": 14.52, + "learning_rate": 4.818516791044777e-05, + "loss": 0.0021, + "step": 15564 + }, + { + "epoch": 14.52, + "learning_rate": 4.8184701492537315e-05, + "loss": 0.0054, + "step": 15568 + }, + { + "epoch": 14.53, + "learning_rate": 4.818423507462686e-05, + "loss": 0.0031, + "step": 15572 + }, + { + "epoch": 14.53, + "learning_rate": 4.8183768656716425e-05, + "loss": 0.0079, + "step": 15576 + }, + { + "epoch": 14.53, + "learning_rate": 4.818330223880597e-05, + "loss": 0.0037, + "step": 15580 + }, + { + "epoch": 14.54, + "learning_rate": 4.818283582089552e-05, + "loss": 0.0011, + "step": 15584 + }, + { + "epoch": 14.54, + "learning_rate": 4.8182369402985076e-05, + "loss": 0.0073, + "step": 15588 + }, + { + "epoch": 14.54, + "learning_rate": 4.818190298507463e-05, + "loss": 0.0069, + "step": 15592 + }, + { + "epoch": 14.55, + "learning_rate": 4.818143656716418e-05, + "loss": 0.006, + "step": 15596 + }, + { + "epoch": 14.55, + "learning_rate": 4.8180970149253734e-05, + "loss": 0.0053, + "step": 15600 + }, + { + "epoch": 14.56, + "learning_rate": 4.818050373134328e-05, + "loss": 0.0039, + "step": 15604 + }, + { + "epoch": 14.56, + "learning_rate": 4.8180037313432844e-05, + "loss": 0.0132, + "step": 15608 + }, + { + "epoch": 14.56, + "learning_rate": 4.817957089552239e-05, + "loss": 0.0041, + "step": 15612 + }, + { + "epoch": 14.57, + "learning_rate": 4.817910447761194e-05, + "loss": 0.0063, + "step": 15616 + }, + { + "epoch": 14.57, + "learning_rate": 4.8178638059701495e-05, + "loss": 0.0051, + "step": 15620 + }, + { + "epoch": 14.57, + "learning_rate": 4.817817164179105e-05, + "loss": 0.0036, + "step": 15624 + }, + { + "epoch": 14.58, + "learning_rate": 4.81777052238806e-05, + "loss": 0.0027, + "step": 15628 + }, + { + "epoch": 14.58, + "learning_rate": 4.817723880597015e-05, + "loss": 0.0041, + "step": 15632 + }, + { + "epoch": 14.59, + "learning_rate": 4.817677238805971e-05, + "loss": 0.0017, + "step": 15636 + }, + { + "epoch": 14.59, + "learning_rate": 4.8176305970149256e-05, + "loss": 0.0036, + "step": 15640 + }, + { + "epoch": 14.59, + "learning_rate": 4.817583955223881e-05, + "loss": 0.0112, + "step": 15644 + }, + { + "epoch": 14.6, + "learning_rate": 4.817537313432836e-05, + "loss": 0.0043, + "step": 15648 + }, + { + "epoch": 14.6, + "learning_rate": 4.8174906716417914e-05, + "loss": 0.0023, + "step": 15652 + }, + { + "epoch": 14.6, + "learning_rate": 4.817444029850747e-05, + "loss": 0.0029, + "step": 15656 + }, + { + "epoch": 14.61, + "learning_rate": 4.817397388059702e-05, + "loss": 0.001, + "step": 15660 + }, + { + "epoch": 14.61, + "learning_rate": 4.8173507462686565e-05, + "loss": 0.001, + "step": 15664 + }, + { + "epoch": 14.62, + "learning_rate": 4.8173041044776127e-05, + "loss": 0.0011, + "step": 15668 + }, + { + "epoch": 14.62, + "learning_rate": 4.8172574626865675e-05, + "loss": 0.009, + "step": 15672 + }, + { + "epoch": 14.62, + "learning_rate": 4.817210820895522e-05, + "loss": 0.0054, + "step": 15676 + }, + { + "epoch": 14.63, + "learning_rate": 4.817164179104478e-05, + "loss": 0.0043, + "step": 15680 + }, + { + "epoch": 14.63, + "learning_rate": 4.817117537313433e-05, + "loss": 0.0026, + "step": 15684 + }, + { + "epoch": 14.63, + "learning_rate": 4.817070895522388e-05, + "loss": 0.0013, + "step": 15688 + }, + { + "epoch": 14.64, + "learning_rate": 4.8170242537313436e-05, + "loss": 0.0018, + "step": 15692 + }, + { + "epoch": 14.64, + "learning_rate": 4.816977611940299e-05, + "loss": 0.0052, + "step": 15696 + }, + { + "epoch": 14.65, + "learning_rate": 4.816930970149254e-05, + "loss": 0.0049, + "step": 15700 + }, + { + "epoch": 14.65, + "learning_rate": 4.8168843283582094e-05, + "loss": 0.0006, + "step": 15704 + }, + { + "epoch": 14.65, + "learning_rate": 4.816837686567164e-05, + "loss": 0.0119, + "step": 15708 + }, + { + "epoch": 14.66, + "learning_rate": 4.81679104477612e-05, + "loss": 0.0032, + "step": 15712 + }, + { + "epoch": 14.66, + "learning_rate": 4.816744402985075e-05, + "loss": 0.0048, + "step": 15716 + }, + { + "epoch": 14.66, + "learning_rate": 4.81669776119403e-05, + "loss": 0.0005, + "step": 15720 + }, + { + "epoch": 14.67, + "learning_rate": 4.816651119402985e-05, + "loss": 0.001, + "step": 15724 + }, + { + "epoch": 14.67, + "learning_rate": 4.816604477611941e-05, + "loss": 0.0078, + "step": 15728 + }, + { + "epoch": 14.68, + "learning_rate": 4.816557835820896e-05, + "loss": 0.0037, + "step": 15732 + }, + { + "epoch": 14.68, + "learning_rate": 4.8165111940298506e-05, + "loss": 0.0027, + "step": 15736 + }, + { + "epoch": 14.68, + "learning_rate": 4.816464552238806e-05, + "loss": 0.0025, + "step": 15740 + }, + { + "epoch": 14.69, + "learning_rate": 4.8164179104477616e-05, + "loss": 0.0017, + "step": 15744 + }, + { + "epoch": 14.69, + "learning_rate": 4.8163712686567164e-05, + "loss": 0.0008, + "step": 15748 + }, + { + "epoch": 14.69, + "learning_rate": 4.816324626865672e-05, + "loss": 0.0007, + "step": 15752 + }, + { + "epoch": 14.7, + "learning_rate": 4.8162779850746274e-05, + "loss": 0.0105, + "step": 15756 + }, + { + "epoch": 14.7, + "learning_rate": 4.816231343283582e-05, + "loss": 0.0056, + "step": 15760 + }, + { + "epoch": 14.71, + "learning_rate": 4.816184701492538e-05, + "loss": 0.0056, + "step": 15764 + }, + { + "epoch": 14.71, + "learning_rate": 4.8161380597014925e-05, + "loss": 0.009, + "step": 15768 + }, + { + "epoch": 14.71, + "learning_rate": 4.816091417910448e-05, + "loss": 0.0036, + "step": 15772 + }, + { + "epoch": 14.72, + "learning_rate": 4.8160447761194035e-05, + "loss": 0.0007, + "step": 15776 + }, + { + "epoch": 14.72, + "learning_rate": 4.815998134328358e-05, + "loss": 0.002, + "step": 15780 + }, + { + "epoch": 14.72, + "learning_rate": 4.815951492537314e-05, + "loss": 0.0027, + "step": 15784 + }, + { + "epoch": 14.73, + "learning_rate": 4.815904850746269e-05, + "loss": 0.002, + "step": 15788 + }, + { + "epoch": 14.73, + "learning_rate": 4.815858208955224e-05, + "loss": 0.0004, + "step": 15792 + }, + { + "epoch": 14.73, + "learning_rate": 4.8158115671641796e-05, + "loss": 0.0035, + "step": 15796 + }, + { + "epoch": 14.74, + "learning_rate": 4.8157649253731344e-05, + "loss": 0.0032, + "step": 15800 + }, + { + "epoch": 14.74, + "learning_rate": 4.81571828358209e-05, + "loss": 0.0004, + "step": 15804 + }, + { + "epoch": 14.75, + "learning_rate": 4.8156716417910453e-05, + "loss": 0.0007, + "step": 15808 + }, + { + "epoch": 14.75, + "learning_rate": 4.815625e-05, + "loss": 0.002, + "step": 15812 + }, + { + "epoch": 14.75, + "learning_rate": 4.815578358208955e-05, + "loss": 0.0007, + "step": 15816 + }, + { + "epoch": 14.76, + "learning_rate": 4.815531716417911e-05, + "loss": 0.0001, + "step": 15820 + }, + { + "epoch": 14.76, + "learning_rate": 4.815485074626866e-05, + "loss": 0.0011, + "step": 15824 + }, + { + "epoch": 14.76, + "learning_rate": 4.815438432835821e-05, + "loss": 0.0041, + "step": 15828 + }, + { + "epoch": 14.77, + "learning_rate": 4.815391791044776e-05, + "loss": 0.0061, + "step": 15832 + }, + { + "epoch": 14.77, + "learning_rate": 4.815345149253732e-05, + "loss": 0.0028, + "step": 15836 + }, + { + "epoch": 14.78, + "learning_rate": 4.8152985074626866e-05, + "loss": 0.0028, + "step": 15840 + }, + { + "epoch": 14.78, + "learning_rate": 4.815251865671642e-05, + "loss": 0.0051, + "step": 15844 + }, + { + "epoch": 14.78, + "learning_rate": 4.8152052238805975e-05, + "loss": 0.0054, + "step": 15848 + }, + { + "epoch": 14.79, + "learning_rate": 4.8151585820895524e-05, + "loss": 0.0016, + "step": 15852 + }, + { + "epoch": 14.79, + "learning_rate": 4.815111940298508e-05, + "loss": 0.0024, + "step": 15856 + }, + { + "epoch": 14.79, + "learning_rate": 4.815065298507463e-05, + "loss": 0.0041, + "step": 15860 + }, + { + "epoch": 14.8, + "learning_rate": 4.815018656716418e-05, + "loss": 0.0025, + "step": 15864 + }, + { + "epoch": 14.8, + "learning_rate": 4.8149720149253736e-05, + "loss": 0.003, + "step": 15868 + }, + { + "epoch": 14.81, + "learning_rate": 4.8149253731343285e-05, + "loss": 0.0043, + "step": 15872 + }, + { + "epoch": 14.81, + "learning_rate": 4.814878731343283e-05, + "loss": 0.014, + "step": 15876 + }, + { + "epoch": 14.81, + "learning_rate": 4.8148320895522394e-05, + "loss": 0.0046, + "step": 15880 + }, + { + "epoch": 14.82, + "learning_rate": 4.814785447761194e-05, + "loss": 0.0089, + "step": 15884 + }, + { + "epoch": 14.82, + "learning_rate": 4.814738805970149e-05, + "loss": 0.0005, + "step": 15888 + }, + { + "epoch": 14.82, + "learning_rate": 4.8146921641791046e-05, + "loss": 0.0025, + "step": 15892 + }, + { + "epoch": 14.83, + "learning_rate": 4.81464552238806e-05, + "loss": 0.0027, + "step": 15896 + }, + { + "epoch": 14.83, + "learning_rate": 4.814598880597015e-05, + "loss": 0.0082, + "step": 15900 + }, + { + "epoch": 14.84, + "learning_rate": 4.8145522388059704e-05, + "loss": 0.0016, + "step": 15904 + }, + { + "epoch": 14.84, + "learning_rate": 4.814505597014926e-05, + "loss": 0.0556, + "step": 15908 + }, + { + "epoch": 14.84, + "learning_rate": 4.8144589552238807e-05, + "loss": 0.0032, + "step": 15912 + }, + { + "epoch": 14.85, + "learning_rate": 4.814412313432836e-05, + "loss": 0.0014, + "step": 15916 + }, + { + "epoch": 14.85, + "learning_rate": 4.814365671641791e-05, + "loss": 0.0013, + "step": 15920 + }, + { + "epoch": 14.85, + "learning_rate": 4.8143190298507465e-05, + "loss": 0.0037, + "step": 15924 + }, + { + "epoch": 14.86, + "learning_rate": 4.814272388059702e-05, + "loss": 0.002, + "step": 15928 + }, + { + "epoch": 14.86, + "learning_rate": 4.814225746268657e-05, + "loss": 0.0032, + "step": 15932 + }, + { + "epoch": 14.87, + "learning_rate": 4.814179104477612e-05, + "loss": 0.0133, + "step": 15936 + }, + { + "epoch": 14.87, + "learning_rate": 4.814132462686568e-05, + "loss": 0.0004, + "step": 15940 + }, + { + "epoch": 14.87, + "learning_rate": 4.8140858208955225e-05, + "loss": 0.0026, + "step": 15944 + }, + { + "epoch": 14.88, + "learning_rate": 4.814039179104478e-05, + "loss": 0.0004, + "step": 15948 + }, + { + "epoch": 14.88, + "learning_rate": 4.813992537313433e-05, + "loss": 0.0043, + "step": 15952 + }, + { + "epoch": 14.88, + "learning_rate": 4.8139458955223883e-05, + "loss": 0.0121, + "step": 15956 + }, + { + "epoch": 14.89, + "learning_rate": 4.813899253731344e-05, + "loss": 0.0007, + "step": 15960 + }, + { + "epoch": 14.89, + "learning_rate": 4.8138526119402986e-05, + "loss": 0.0046, + "step": 15964 + }, + { + "epoch": 14.9, + "learning_rate": 4.813805970149254e-05, + "loss": 0.0008, + "step": 15968 + }, + { + "epoch": 14.9, + "learning_rate": 4.8137593283582096e-05, + "loss": 0.0106, + "step": 15972 + }, + { + "epoch": 14.9, + "learning_rate": 4.8137126865671644e-05, + "loss": 0.0034, + "step": 15976 + }, + { + "epoch": 14.91, + "learning_rate": 4.813666044776119e-05, + "loss": 0.0062, + "step": 15980 + }, + { + "epoch": 14.91, + "learning_rate": 4.813619402985075e-05, + "loss": 0.0059, + "step": 15984 + }, + { + "epoch": 14.91, + "learning_rate": 4.81357276119403e-05, + "loss": 0.0028, + "step": 15988 + }, + { + "epoch": 14.92, + "learning_rate": 4.813526119402985e-05, + "loss": 0.0056, + "step": 15992 + }, + { + "epoch": 14.92, + "learning_rate": 4.8134794776119405e-05, + "loss": 0.0029, + "step": 15996 + }, + { + "epoch": 14.93, + "learning_rate": 4.813432835820896e-05, + "loss": 0.0031, + "step": 16000 + }, + { + "epoch": 14.93, + "eval_exact_match": 0.7166344294003868, + "eval_exec": 0.7553191489361702, + "eval_loss": 0.2952513098716736, + "eval_runtime": 1078.2591, + "eval_samples_per_second": 0.959, + "step": 16000 + }, + { + "epoch": 14.93, + "learning_rate": 4.813386194029851e-05, + "loss": 0.005, + "step": 16004 + }, + { + "epoch": 14.93, + "learning_rate": 4.813339552238806e-05, + "loss": 0.0055, + "step": 16008 + }, + { + "epoch": 14.94, + "learning_rate": 4.813292910447761e-05, + "loss": 0.0085, + "step": 16012 + }, + { + "epoch": 14.94, + "learning_rate": 4.8132462686567166e-05, + "loss": 0.0029, + "step": 16016 + }, + { + "epoch": 14.94, + "learning_rate": 4.813199626865672e-05, + "loss": 0.0035, + "step": 16020 + }, + { + "epoch": 14.95, + "learning_rate": 4.813152985074627e-05, + "loss": 0.0051, + "step": 16024 + }, + { + "epoch": 14.95, + "learning_rate": 4.8131063432835824e-05, + "loss": 0.0015, + "step": 16028 + }, + { + "epoch": 14.96, + "learning_rate": 4.813059701492538e-05, + "loss": 0.005, + "step": 16032 + }, + { + "epoch": 14.96, + "learning_rate": 4.813013059701493e-05, + "loss": 0.0011, + "step": 16036 + }, + { + "epoch": 14.96, + "learning_rate": 4.8129664179104476e-05, + "loss": 0.002, + "step": 16040 + }, + { + "epoch": 14.97, + "learning_rate": 4.812919776119403e-05, + "loss": 0.0034, + "step": 16044 + }, + { + "epoch": 14.97, + "learning_rate": 4.8128731343283585e-05, + "loss": 0.0025, + "step": 16048 + }, + { + "epoch": 14.97, + "learning_rate": 4.8128264925373133e-05, + "loss": 0.002, + "step": 16052 + }, + { + "epoch": 14.98, + "learning_rate": 4.812779850746269e-05, + "loss": 0.0081, + "step": 16056 + }, + { + "epoch": 14.98, + "learning_rate": 4.812733208955224e-05, + "loss": 0.0041, + "step": 16060 + }, + { + "epoch": 14.98, + "learning_rate": 4.812686567164179e-05, + "loss": 0.0021, + "step": 16064 + }, + { + "epoch": 14.99, + "learning_rate": 4.8126399253731346e-05, + "loss": 0.0031, + "step": 16068 + }, + { + "epoch": 14.99, + "learning_rate": 4.8125932835820894e-05, + "loss": 0.0066, + "step": 16072 + }, + { + "epoch": 15.0, + "learning_rate": 4.812546641791045e-05, + "loss": 0.0015, + "step": 16076 + }, + { + "epoch": 15.0, + "learning_rate": 4.8125000000000004e-05, + "loss": 0.0011, + "step": 16080 + }, + { + "epoch": 15.0, + "learning_rate": 4.812453358208955e-05, + "loss": 0.0049, + "step": 16084 + }, + { + "epoch": 15.01, + "learning_rate": 4.812406716417911e-05, + "loss": 0.0013, + "step": 16088 + }, + { + "epoch": 15.01, + "learning_rate": 4.812360074626866e-05, + "loss": 0.0014, + "step": 16092 + }, + { + "epoch": 15.01, + "learning_rate": 4.812313432835821e-05, + "loss": 0.0009, + "step": 16096 + }, + { + "epoch": 15.02, + "learning_rate": 4.8122667910447765e-05, + "loss": 0.0009, + "step": 16100 + }, + { + "epoch": 15.02, + "learning_rate": 4.812220149253731e-05, + "loss": 0.0004, + "step": 16104 + }, + { + "epoch": 15.03, + "learning_rate": 4.812173507462687e-05, + "loss": 0.015, + "step": 16108 + }, + { + "epoch": 15.03, + "learning_rate": 4.812126865671642e-05, + "loss": 0.0018, + "step": 16112 + }, + { + "epoch": 15.03, + "learning_rate": 4.812080223880597e-05, + "loss": 0.002, + "step": 16116 + }, + { + "epoch": 15.04, + "learning_rate": 4.8120335820895526e-05, + "loss": 0.0027, + "step": 16120 + }, + { + "epoch": 15.04, + "learning_rate": 4.811986940298508e-05, + "loss": 0.0018, + "step": 16124 + }, + { + "epoch": 15.04, + "learning_rate": 4.811940298507463e-05, + "loss": 0.0019, + "step": 16128 + }, + { + "epoch": 15.05, + "learning_rate": 4.811893656716418e-05, + "loss": 0.0064, + "step": 16132 + }, + { + "epoch": 15.05, + "learning_rate": 4.811847014925374e-05, + "loss": 0.0063, + "step": 16136 + }, + { + "epoch": 15.06, + "learning_rate": 4.811800373134329e-05, + "loss": 0.0016, + "step": 16140 + }, + { + "epoch": 15.06, + "learning_rate": 4.8117537313432835e-05, + "loss": 0.0042, + "step": 16144 + }, + { + "epoch": 15.06, + "learning_rate": 4.811707089552239e-05, + "loss": 0.0011, + "step": 16148 + }, + { + "epoch": 15.07, + "learning_rate": 4.8116604477611945e-05, + "loss": 0.0019, + "step": 16152 + }, + { + "epoch": 15.07, + "learning_rate": 4.811613805970149e-05, + "loss": 0.0003, + "step": 16156 + }, + { + "epoch": 15.07, + "learning_rate": 4.811567164179105e-05, + "loss": 0.0019, + "step": 16160 + }, + { + "epoch": 15.08, + "learning_rate": 4.8115205223880596e-05, + "loss": 0.0049, + "step": 16164 + }, + { + "epoch": 15.08, + "learning_rate": 4.811473880597015e-05, + "loss": 0.0036, + "step": 16168 + }, + { + "epoch": 15.09, + "learning_rate": 4.8114272388059706e-05, + "loss": 0.0012, + "step": 16172 + }, + { + "epoch": 15.09, + "learning_rate": 4.8113805970149254e-05, + "loss": 0.0004, + "step": 16176 + }, + { + "epoch": 15.09, + "learning_rate": 4.811333955223881e-05, + "loss": 0.0018, + "step": 16180 + }, + { + "epoch": 15.1, + "learning_rate": 4.8112873134328364e-05, + "loss": 0.0003, + "step": 16184 + }, + { + "epoch": 15.1, + "learning_rate": 4.811240671641791e-05, + "loss": 0.003, + "step": 16188 + }, + { + "epoch": 15.1, + "learning_rate": 4.811194029850746e-05, + "loss": 0.0029, + "step": 16192 + }, + { + "epoch": 15.11, + "learning_rate": 4.811147388059702e-05, + "loss": 0.0081, + "step": 16196 + }, + { + "epoch": 15.11, + "learning_rate": 4.811100746268657e-05, + "loss": 0.0094, + "step": 16200 + }, + { + "epoch": 15.12, + "learning_rate": 4.811054104477612e-05, + "loss": 0.0017, + "step": 16204 + }, + { + "epoch": 15.12, + "learning_rate": 4.811007462686567e-05, + "loss": 0.0049, + "step": 16208 + }, + { + "epoch": 15.12, + "learning_rate": 4.810960820895523e-05, + "loss": 0.0044, + "step": 16212 + }, + { + "epoch": 15.13, + "learning_rate": 4.8109141791044776e-05, + "loss": 0.0012, + "step": 16216 + }, + { + "epoch": 15.13, + "learning_rate": 4.810867537313433e-05, + "loss": 0.0013, + "step": 16220 + }, + { + "epoch": 15.13, + "learning_rate": 4.810820895522388e-05, + "loss": 0.0024, + "step": 16224 + }, + { + "epoch": 15.14, + "learning_rate": 4.8107742537313434e-05, + "loss": 0.0004, + "step": 16228 + }, + { + "epoch": 15.14, + "learning_rate": 4.810727611940299e-05, + "loss": 0.0072, + "step": 16232 + }, + { + "epoch": 15.15, + "learning_rate": 4.810680970149254e-05, + "loss": 0.0043, + "step": 16236 + }, + { + "epoch": 15.15, + "learning_rate": 4.810634328358209e-05, + "loss": 0.0004, + "step": 16240 + }, + { + "epoch": 15.15, + "learning_rate": 4.810587686567165e-05, + "loss": 0.0024, + "step": 16244 + }, + { + "epoch": 15.16, + "learning_rate": 4.8105410447761195e-05, + "loss": 0.0038, + "step": 16248 + }, + { + "epoch": 15.16, + "learning_rate": 4.810494402985074e-05, + "loss": 0.0016, + "step": 16252 + }, + { + "epoch": 15.16, + "learning_rate": 4.8104477611940305e-05, + "loss": 0.0045, + "step": 16256 + }, + { + "epoch": 15.17, + "learning_rate": 4.810401119402985e-05, + "loss": 0.0002, + "step": 16260 + }, + { + "epoch": 15.17, + "learning_rate": 4.810354477611941e-05, + "loss": 0.0052, + "step": 16264 + }, + { + "epoch": 15.18, + "learning_rate": 4.8103078358208956e-05, + "loss": 0.0047, + "step": 16268 + }, + { + "epoch": 15.18, + "learning_rate": 4.810261194029851e-05, + "loss": 0.0022, + "step": 16272 + }, + { + "epoch": 15.18, + "learning_rate": 4.8102145522388066e-05, + "loss": 0.0205, + "step": 16276 + }, + { + "epoch": 15.19, + "learning_rate": 4.8101679104477614e-05, + "loss": 0.0053, + "step": 16280 + }, + { + "epoch": 15.19, + "learning_rate": 4.810121268656716e-05, + "loss": 0.0004, + "step": 16284 + }, + { + "epoch": 15.19, + "learning_rate": 4.8100746268656724e-05, + "loss": 0.0042, + "step": 16288 + }, + { + "epoch": 15.2, + "learning_rate": 4.810027985074627e-05, + "loss": 0.0027, + "step": 16292 + }, + { + "epoch": 15.2, + "learning_rate": 4.809981343283582e-05, + "loss": 0.0143, + "step": 16296 + }, + { + "epoch": 15.21, + "learning_rate": 4.8099347014925375e-05, + "loss": 0.0028, + "step": 16300 + }, + { + "epoch": 15.21, + "learning_rate": 4.809888059701493e-05, + "loss": 0.0093, + "step": 16304 + }, + { + "epoch": 15.21, + "learning_rate": 4.809841417910448e-05, + "loss": 0.0039, + "step": 16308 + }, + { + "epoch": 15.22, + "learning_rate": 4.809794776119403e-05, + "loss": 0.0009, + "step": 16312 + }, + { + "epoch": 15.22, + "learning_rate": 4.809748134328359e-05, + "loss": 0.0022, + "step": 16316 + }, + { + "epoch": 15.22, + "learning_rate": 4.8097014925373136e-05, + "loss": 0.0019, + "step": 16320 + }, + { + "epoch": 15.23, + "learning_rate": 4.809654850746269e-05, + "loss": 0.0058, + "step": 16324 + }, + { + "epoch": 15.23, + "learning_rate": 4.809608208955224e-05, + "loss": 0.0013, + "step": 16328 + }, + { + "epoch": 15.24, + "learning_rate": 4.8095615671641794e-05, + "loss": 0.0025, + "step": 16332 + }, + { + "epoch": 15.24, + "learning_rate": 4.809514925373135e-05, + "loss": 0.0045, + "step": 16336 + }, + { + "epoch": 15.24, + "learning_rate": 4.80946828358209e-05, + "loss": 0.0026, + "step": 16340 + }, + { + "epoch": 15.25, + "learning_rate": 4.8094216417910445e-05, + "loss": 0.0062, + "step": 16344 + }, + { + "epoch": 15.25, + "learning_rate": 4.809375000000001e-05, + "loss": 0.0044, + "step": 16348 + }, + { + "epoch": 15.25, + "learning_rate": 4.8093283582089555e-05, + "loss": 0.0026, + "step": 16352 + }, + { + "epoch": 15.26, + "learning_rate": 4.80928171641791e-05, + "loss": 0.003, + "step": 16356 + }, + { + "epoch": 15.26, + "learning_rate": 4.809235074626866e-05, + "loss": 0.0039, + "step": 16360 + }, + { + "epoch": 15.26, + "learning_rate": 4.809188432835821e-05, + "loss": 0.012, + "step": 16364 + }, + { + "epoch": 15.27, + "learning_rate": 4.809141791044776e-05, + "loss": 0.0023, + "step": 16368 + }, + { + "epoch": 15.27, + "learning_rate": 4.8090951492537316e-05, + "loss": 0.0057, + "step": 16372 + }, + { + "epoch": 15.28, + "learning_rate": 4.809048507462687e-05, + "loss": 0.0011, + "step": 16376 + }, + { + "epoch": 15.28, + "learning_rate": 4.809001865671642e-05, + "loss": 0.009, + "step": 16380 + }, + { + "epoch": 15.28, + "learning_rate": 4.8089552238805974e-05, + "loss": 0.0026, + "step": 16384 + }, + { + "epoch": 15.29, + "learning_rate": 4.808908582089552e-05, + "loss": 0.0005, + "step": 16388 + }, + { + "epoch": 15.29, + "learning_rate": 4.808861940298508e-05, + "loss": 0.0054, + "step": 16392 + }, + { + "epoch": 15.29, + "learning_rate": 4.808815298507463e-05, + "loss": 0.0035, + "step": 16396 + }, + { + "epoch": 15.3, + "learning_rate": 4.808768656716418e-05, + "loss": 0.0049, + "step": 16400 + }, + { + "epoch": 15.3, + "learning_rate": 4.808722014925373e-05, + "loss": 0.0028, + "step": 16404 + }, + { + "epoch": 15.31, + "learning_rate": 4.808675373134329e-05, + "loss": 0.0002, + "step": 16408 + }, + { + "epoch": 15.31, + "learning_rate": 4.808628731343284e-05, + "loss": 0.0003, + "step": 16412 + }, + { + "epoch": 15.31, + "learning_rate": 4.8085820895522386e-05, + "loss": 0.0014, + "step": 16416 + }, + { + "epoch": 15.32, + "learning_rate": 4.808535447761194e-05, + "loss": 0.009, + "step": 16420 + }, + { + "epoch": 15.32, + "learning_rate": 4.8084888059701496e-05, + "loss": 0.0002, + "step": 16424 + }, + { + "epoch": 15.32, + "learning_rate": 4.808442164179105e-05, + "loss": 0.0004, + "step": 16428 + }, + { + "epoch": 15.33, + "learning_rate": 4.80839552238806e-05, + "loss": 0.0043, + "step": 16432 + }, + { + "epoch": 15.33, + "learning_rate": 4.8083488805970154e-05, + "loss": 0.0091, + "step": 16436 + }, + { + "epoch": 15.34, + "learning_rate": 4.808302238805971e-05, + "loss": 0.0007, + "step": 16440 + }, + { + "epoch": 15.34, + "learning_rate": 4.808255597014926e-05, + "loss": 0.0021, + "step": 16444 + }, + { + "epoch": 15.34, + "learning_rate": 4.8082089552238805e-05, + "loss": 0.0036, + "step": 16448 + }, + { + "epoch": 15.35, + "learning_rate": 4.808162313432836e-05, + "loss": 0.0018, + "step": 16452 + }, + { + "epoch": 15.35, + "learning_rate": 4.8081156716417915e-05, + "loss": 0.0222, + "step": 16456 + }, + { + "epoch": 15.35, + "learning_rate": 4.808069029850746e-05, + "loss": 0.0022, + "step": 16460 + }, + { + "epoch": 15.36, + "learning_rate": 4.808022388059702e-05, + "loss": 0.0023, + "step": 16464 + }, + { + "epoch": 15.36, + "learning_rate": 4.807975746268657e-05, + "loss": 0.0022, + "step": 16468 + }, + { + "epoch": 15.37, + "learning_rate": 4.807929104477612e-05, + "loss": 0.0056, + "step": 16472 + }, + { + "epoch": 15.37, + "learning_rate": 4.8078824626865676e-05, + "loss": 0.0005, + "step": 16476 + }, + { + "epoch": 15.37, + "learning_rate": 4.8078358208955224e-05, + "loss": 0.0017, + "step": 16480 + }, + { + "epoch": 15.38, + "learning_rate": 4.807789179104478e-05, + "loss": 0.0005, + "step": 16484 + }, + { + "epoch": 15.38, + "learning_rate": 4.8077425373134334e-05, + "loss": 0.0085, + "step": 16488 + }, + { + "epoch": 15.38, + "learning_rate": 4.807695895522388e-05, + "loss": 0.0005, + "step": 16492 + }, + { + "epoch": 15.39, + "learning_rate": 4.807649253731343e-05, + "loss": 0.0032, + "step": 16496 + }, + { + "epoch": 15.39, + "learning_rate": 4.807602611940299e-05, + "loss": 0.0007, + "step": 16500 + }, + { + "epoch": 15.39, + "eval_exact_match": 0.7272727272727273, + "eval_exec": 0.7669245647969052, + "eval_loss": 0.3177631199359894, + "eval_runtime": 1071.5811, + "eval_samples_per_second": 0.965, + "step": 16500 + }, + { + "epoch": 15.4, + "learning_rate": 4.807555970149254e-05, + "loss": 0.0019, + "step": 16504 + }, + { + "epoch": 15.4, + "learning_rate": 4.807509328358209e-05, + "loss": 0.0072, + "step": 16508 + }, + { + "epoch": 15.4, + "learning_rate": 4.807462686567164e-05, + "loss": 0.0004, + "step": 16512 + }, + { + "epoch": 15.41, + "learning_rate": 4.80741604477612e-05, + "loss": 0.0006, + "step": 16516 + }, + { + "epoch": 15.41, + "learning_rate": 4.8073694029850746e-05, + "loss": 0.0023, + "step": 16520 + }, + { + "epoch": 15.41, + "learning_rate": 4.80732276119403e-05, + "loss": 0.0017, + "step": 16524 + }, + { + "epoch": 15.42, + "learning_rate": 4.8072761194029856e-05, + "loss": 0.0038, + "step": 16528 + }, + { + "epoch": 15.42, + "learning_rate": 4.8072294776119404e-05, + "loss": 0.0014, + "step": 16532 + }, + { + "epoch": 15.43, + "learning_rate": 4.807182835820896e-05, + "loss": 0.0032, + "step": 16536 + }, + { + "epoch": 15.43, + "learning_rate": 4.807136194029851e-05, + "loss": 0.0007, + "step": 16540 + }, + { + "epoch": 15.43, + "learning_rate": 4.807089552238806e-05, + "loss": 0.0019, + "step": 16544 + }, + { + "epoch": 15.44, + "learning_rate": 4.807042910447762e-05, + "loss": 0.0026, + "step": 16548 + }, + { + "epoch": 15.44, + "learning_rate": 4.8069962686567165e-05, + "loss": 0.004, + "step": 16552 + }, + { + "epoch": 15.44, + "learning_rate": 4.806949626865671e-05, + "loss": 0.0006, + "step": 16556 + }, + { + "epoch": 15.45, + "learning_rate": 4.8069029850746275e-05, + "loss": 0.0016, + "step": 16560 + }, + { + "epoch": 15.45, + "learning_rate": 4.806856343283582e-05, + "loss": 0.0025, + "step": 16564 + }, + { + "epoch": 15.46, + "learning_rate": 4.806809701492537e-05, + "loss": 0.0006, + "step": 16568 + }, + { + "epoch": 15.46, + "learning_rate": 4.8067630597014926e-05, + "loss": 0.0008, + "step": 16572 + }, + { + "epoch": 15.46, + "learning_rate": 4.806716417910448e-05, + "loss": 0.0043, + "step": 16576 + }, + { + "epoch": 15.47, + "learning_rate": 4.806669776119403e-05, + "loss": 0.0013, + "step": 16580 + }, + { + "epoch": 15.47, + "learning_rate": 4.8066231343283584e-05, + "loss": 0.0018, + "step": 16584 + }, + { + "epoch": 15.47, + "learning_rate": 4.806576492537314e-05, + "loss": 0.0001, + "step": 16588 + }, + { + "epoch": 15.48, + "learning_rate": 4.8065298507462694e-05, + "loss": 0.0008, + "step": 16592 + }, + { + "epoch": 15.48, + "learning_rate": 4.806483208955224e-05, + "loss": 0.0017, + "step": 16596 + }, + { + "epoch": 15.49, + "learning_rate": 4.806436567164179e-05, + "loss": 0.0005, + "step": 16600 + }, + { + "epoch": 15.49, + "learning_rate": 4.8063899253731345e-05, + "loss": 0.0004, + "step": 16604 + }, + { + "epoch": 15.49, + "learning_rate": 4.80634328358209e-05, + "loss": 0.0003, + "step": 16608 + }, + { + "epoch": 15.5, + "learning_rate": 4.806296641791045e-05, + "loss": 0.0063, + "step": 16612 + }, + { + "epoch": 15.5, + "learning_rate": 4.80625e-05, + "loss": 0.0138, + "step": 16616 + }, + { + "epoch": 15.5, + "learning_rate": 4.806203358208956e-05, + "loss": 0.0003, + "step": 16620 + }, + { + "epoch": 15.51, + "learning_rate": 4.8061567164179106e-05, + "loss": 0.0004, + "step": 16624 + }, + { + "epoch": 15.51, + "learning_rate": 4.806110074626866e-05, + "loss": 0.0017, + "step": 16628 + }, + { + "epoch": 15.51, + "learning_rate": 4.806063432835821e-05, + "loss": 0.0007, + "step": 16632 + }, + { + "epoch": 15.52, + "learning_rate": 4.8060167910447764e-05, + "loss": 0.0026, + "step": 16636 + }, + { + "epoch": 15.52, + "learning_rate": 4.805970149253732e-05, + "loss": 0.0024, + "step": 16640 + }, + { + "epoch": 15.53, + "learning_rate": 4.805923507462687e-05, + "loss": 0.0096, + "step": 16644 + }, + { + "epoch": 15.53, + "learning_rate": 4.805876865671642e-05, + "loss": 0.0074, + "step": 16648 + }, + { + "epoch": 15.53, + "learning_rate": 4.8058302238805977e-05, + "loss": 0.0007, + "step": 16652 + }, + { + "epoch": 15.54, + "learning_rate": 4.8057835820895525e-05, + "loss": 0.0049, + "step": 16656 + }, + { + "epoch": 15.54, + "learning_rate": 4.805736940298507e-05, + "loss": 0.001, + "step": 16660 + }, + { + "epoch": 15.54, + "learning_rate": 4.805690298507463e-05, + "loss": 0.007, + "step": 16664 + }, + { + "epoch": 15.55, + "learning_rate": 4.805643656716418e-05, + "loss": 0.0027, + "step": 16668 + }, + { + "epoch": 15.55, + "learning_rate": 4.805597014925373e-05, + "loss": 0.0018, + "step": 16672 + }, + { + "epoch": 15.56, + "learning_rate": 4.8055503731343286e-05, + "loss": 0.0017, + "step": 16676 + }, + { + "epoch": 15.56, + "learning_rate": 4.805503731343284e-05, + "loss": 0.0009, + "step": 16680 + }, + { + "epoch": 15.56, + "learning_rate": 4.805457089552239e-05, + "loss": 0.005, + "step": 16684 + }, + { + "epoch": 15.57, + "learning_rate": 4.8054104477611944e-05, + "loss": 0.0016, + "step": 16688 + }, + { + "epoch": 15.57, + "learning_rate": 4.805363805970149e-05, + "loss": 0.0005, + "step": 16692 + }, + { + "epoch": 15.57, + "learning_rate": 4.805317164179105e-05, + "loss": 0.0041, + "step": 16696 + }, + { + "epoch": 15.58, + "learning_rate": 4.80527052238806e-05, + "loss": 0.0019, + "step": 16700 + }, + { + "epoch": 15.58, + "learning_rate": 4.805223880597015e-05, + "loss": 0.0024, + "step": 16704 + }, + { + "epoch": 15.59, + "learning_rate": 4.8051772388059705e-05, + "loss": 0.0122, + "step": 16708 + }, + { + "epoch": 15.59, + "learning_rate": 4.805130597014926e-05, + "loss": 0.0028, + "step": 16712 + }, + { + "epoch": 15.59, + "learning_rate": 4.805083955223881e-05, + "loss": 0.003, + "step": 16716 + }, + { + "epoch": 15.6, + "learning_rate": 4.8050373134328356e-05, + "loss": 0.0032, + "step": 16720 + }, + { + "epoch": 15.6, + "learning_rate": 4.804990671641791e-05, + "loss": 0.0014, + "step": 16724 + }, + { + "epoch": 15.6, + "learning_rate": 4.8049440298507466e-05, + "loss": 0.0014, + "step": 16728 + }, + { + "epoch": 15.61, + "learning_rate": 4.8048973880597014e-05, + "loss": 0.001, + "step": 16732 + }, + { + "epoch": 15.61, + "learning_rate": 4.804850746268657e-05, + "loss": 0.0025, + "step": 16736 + }, + { + "epoch": 15.62, + "learning_rate": 4.8048041044776124e-05, + "loss": 0.0017, + "step": 16740 + }, + { + "epoch": 15.62, + "learning_rate": 4.804757462686567e-05, + "loss": 0.0124, + "step": 16744 + }, + { + "epoch": 15.62, + "learning_rate": 4.8047108208955227e-05, + "loss": 0.0022, + "step": 16748 + }, + { + "epoch": 15.63, + "learning_rate": 4.8046641791044775e-05, + "loss": 0.0093, + "step": 16752 + }, + { + "epoch": 15.63, + "learning_rate": 4.8046175373134336e-05, + "loss": 0.0019, + "step": 16756 + }, + { + "epoch": 15.63, + "learning_rate": 4.8045708955223885e-05, + "loss": 0.003, + "step": 16760 + }, + { + "epoch": 15.64, + "learning_rate": 4.804524253731343e-05, + "loss": 0.0019, + "step": 16764 + }, + { + "epoch": 15.64, + "learning_rate": 4.804477611940299e-05, + "loss": 0.0017, + "step": 16768 + }, + { + "epoch": 15.65, + "learning_rate": 4.804430970149254e-05, + "loss": 0.0004, + "step": 16772 + }, + { + "epoch": 15.65, + "learning_rate": 4.804384328358209e-05, + "loss": 0.0017, + "step": 16776 + }, + { + "epoch": 15.65, + "learning_rate": 4.8043376865671646e-05, + "loss": 0.0031, + "step": 16780 + }, + { + "epoch": 15.66, + "learning_rate": 4.8042910447761194e-05, + "loss": 0.0042, + "step": 16784 + }, + { + "epoch": 15.66, + "learning_rate": 4.804244402985075e-05, + "loss": 0.002, + "step": 16788 + }, + { + "epoch": 15.66, + "learning_rate": 4.8041977611940303e-05, + "loss": 0.0007, + "step": 16792 + }, + { + "epoch": 15.67, + "learning_rate": 4.804151119402985e-05, + "loss": 0.0008, + "step": 16796 + }, + { + "epoch": 15.67, + "learning_rate": 4.8041044776119407e-05, + "loss": 0.0008, + "step": 16800 + }, + { + "epoch": 15.68, + "learning_rate": 4.804057835820896e-05, + "loss": 0.0017, + "step": 16804 + }, + { + "epoch": 15.68, + "learning_rate": 4.804011194029851e-05, + "loss": 0.0055, + "step": 16808 + }, + { + "epoch": 15.68, + "learning_rate": 4.803964552238806e-05, + "loss": 0.0021, + "step": 16812 + }, + { + "epoch": 15.69, + "learning_rate": 4.803917910447762e-05, + "loss": 0.0046, + "step": 16816 + }, + { + "epoch": 15.69, + "learning_rate": 4.803871268656717e-05, + "loss": 0.0013, + "step": 16820 + }, + { + "epoch": 15.69, + "learning_rate": 4.8038246268656716e-05, + "loss": 0.0049, + "step": 16824 + }, + { + "epoch": 15.7, + "learning_rate": 4.803777985074627e-05, + "loss": 0.0023, + "step": 16828 + }, + { + "epoch": 15.7, + "learning_rate": 4.8037313432835825e-05, + "loss": 0.0031, + "step": 16832 + }, + { + "epoch": 15.71, + "learning_rate": 4.8036847014925374e-05, + "loss": 0.0127, + "step": 16836 + }, + { + "epoch": 15.71, + "learning_rate": 4.803638059701493e-05, + "loss": 0.0041, + "step": 16840 + }, + { + "epoch": 15.71, + "learning_rate": 4.803591417910448e-05, + "loss": 0.0102, + "step": 16844 + }, + { + "epoch": 15.72, + "learning_rate": 4.803544776119403e-05, + "loss": 0.0031, + "step": 16848 + }, + { + "epoch": 15.72, + "learning_rate": 4.8034981343283586e-05, + "loss": 0.0038, + "step": 16852 + }, + { + "epoch": 15.72, + "learning_rate": 4.8034514925373135e-05, + "loss": 0.0079, + "step": 16856 + }, + { + "epoch": 15.73, + "learning_rate": 4.803404850746269e-05, + "loss": 0.0113, + "step": 16860 + }, + { + "epoch": 15.73, + "learning_rate": 4.8033582089552244e-05, + "loss": 0.0009, + "step": 16864 + }, + { + "epoch": 15.73, + "learning_rate": 4.803311567164179e-05, + "loss": 0.0019, + "step": 16868 + }, + { + "epoch": 15.74, + "learning_rate": 4.803264925373134e-05, + "loss": 0.0053, + "step": 16872 + }, + { + "epoch": 15.74, + "learning_rate": 4.80321828358209e-05, + "loss": 0.0009, + "step": 16876 + }, + { + "epoch": 15.75, + "learning_rate": 4.803171641791045e-05, + "loss": 0.001, + "step": 16880 + }, + { + "epoch": 15.75, + "learning_rate": 4.803125e-05, + "loss": 0.0125, + "step": 16884 + }, + { + "epoch": 15.75, + "learning_rate": 4.8030783582089553e-05, + "loss": 0.0018, + "step": 16888 + }, + { + "epoch": 15.76, + "learning_rate": 4.803031716417911e-05, + "loss": 0.0032, + "step": 16892 + }, + { + "epoch": 15.76, + "learning_rate": 4.8029850746268657e-05, + "loss": 0.0013, + "step": 16896 + }, + { + "epoch": 15.76, + "learning_rate": 4.802938432835821e-05, + "loss": 0.0017, + "step": 16900 + }, + { + "epoch": 15.77, + "learning_rate": 4.802891791044776e-05, + "loss": 0.0011, + "step": 16904 + }, + { + "epoch": 15.77, + "learning_rate": 4.8028451492537314e-05, + "loss": 0.0013, + "step": 16908 + }, + { + "epoch": 15.78, + "learning_rate": 4.802798507462687e-05, + "loss": 0.0009, + "step": 16912 + }, + { + "epoch": 15.78, + "learning_rate": 4.802751865671642e-05, + "loss": 0.0007, + "step": 16916 + }, + { + "epoch": 15.78, + "learning_rate": 4.802705223880597e-05, + "loss": 0.001, + "step": 16920 + }, + { + "epoch": 15.79, + "learning_rate": 4.802658582089553e-05, + "loss": 0.0007, + "step": 16924 + }, + { + "epoch": 15.79, + "learning_rate": 4.8026119402985075e-05, + "loss": 0.0075, + "step": 16928 + }, + { + "epoch": 15.79, + "learning_rate": 4.802565298507463e-05, + "loss": 0.0008, + "step": 16932 + }, + { + "epoch": 15.8, + "learning_rate": 4.8025186567164185e-05, + "loss": 0.0015, + "step": 16936 + }, + { + "epoch": 15.8, + "learning_rate": 4.8024720149253733e-05, + "loss": 0.0034, + "step": 16940 + }, + { + "epoch": 15.81, + "learning_rate": 4.802425373134329e-05, + "loss": 0.0054, + "step": 16944 + }, + { + "epoch": 15.81, + "learning_rate": 4.8023787313432836e-05, + "loss": 0.0013, + "step": 16948 + }, + { + "epoch": 15.81, + "learning_rate": 4.802332089552239e-05, + "loss": 0.001, + "step": 16952 + }, + { + "epoch": 15.82, + "learning_rate": 4.8022854477611946e-05, + "loss": 0.0023, + "step": 16956 + }, + { + "epoch": 15.82, + "learning_rate": 4.8022388059701494e-05, + "loss": 0.0005, + "step": 16960 + }, + { + "epoch": 15.82, + "learning_rate": 4.802192164179104e-05, + "loss": 0.0018, + "step": 16964 + }, + { + "epoch": 15.83, + "learning_rate": 4.8021455223880604e-05, + "loss": 0.0038, + "step": 16968 + }, + { + "epoch": 15.83, + "learning_rate": 4.802098880597015e-05, + "loss": 0.0024, + "step": 16972 + }, + { + "epoch": 15.84, + "learning_rate": 4.80205223880597e-05, + "loss": 0.0078, + "step": 16976 + }, + { + "epoch": 15.84, + "learning_rate": 4.8020055970149255e-05, + "loss": 0.0027, + "step": 16980 + }, + { + "epoch": 15.84, + "learning_rate": 4.801958955223881e-05, + "loss": 0.0014, + "step": 16984 + }, + { + "epoch": 15.85, + "learning_rate": 4.801912313432836e-05, + "loss": 0.0037, + "step": 16988 + }, + { + "epoch": 15.85, + "learning_rate": 4.801865671641791e-05, + "loss": 0.0042, + "step": 16992 + }, + { + "epoch": 15.85, + "learning_rate": 4.801819029850747e-05, + "loss": 0.0025, + "step": 16996 + }, + { + "epoch": 15.86, + "learning_rate": 4.8017723880597016e-05, + "loss": 0.0119, + "step": 17000 + }, + { + "epoch": 15.86, + "eval_exact_match": 0.7321083172147002, + "eval_exec": 0.7611218568665378, + "eval_loss": 0.2985568642616272, + "eval_runtime": 1073.3502, + "eval_samples_per_second": 0.963, + "step": 17000 + }, + { + "epoch": 15.86, + "learning_rate": 4.801725746268657e-05, + "loss": 0.0016, + "step": 17004 + }, + { + "epoch": 15.87, + "learning_rate": 4.801679104477612e-05, + "loss": 0.0039, + "step": 17008 + }, + { + "epoch": 15.87, + "learning_rate": 4.8016324626865674e-05, + "loss": 0.0053, + "step": 17012 + }, + { + "epoch": 15.87, + "learning_rate": 4.801585820895523e-05, + "loss": 0.0005, + "step": 17016 + }, + { + "epoch": 15.88, + "learning_rate": 4.801539179104478e-05, + "loss": 0.0023, + "step": 17020 + }, + { + "epoch": 15.88, + "learning_rate": 4.8014925373134325e-05, + "loss": 0.0009, + "step": 17024 + }, + { + "epoch": 15.88, + "learning_rate": 4.801445895522389e-05, + "loss": 0.0106, + "step": 17028 + }, + { + "epoch": 15.89, + "learning_rate": 4.8013992537313435e-05, + "loss": 0.0086, + "step": 17032 + }, + { + "epoch": 15.89, + "learning_rate": 4.8013526119402983e-05, + "loss": 0.0005, + "step": 17036 + }, + { + "epoch": 15.9, + "learning_rate": 4.801305970149254e-05, + "loss": 0.0011, + "step": 17040 + }, + { + "epoch": 15.9, + "learning_rate": 4.801259328358209e-05, + "loss": 0.0035, + "step": 17044 + }, + { + "epoch": 15.9, + "learning_rate": 4.801212686567164e-05, + "loss": 0.0006, + "step": 17048 + }, + { + "epoch": 15.91, + "learning_rate": 4.8011660447761196e-05, + "loss": 0.0013, + "step": 17052 + }, + { + "epoch": 15.91, + "learning_rate": 4.801119402985075e-05, + "loss": 0.005, + "step": 17056 + }, + { + "epoch": 15.91, + "learning_rate": 4.80107276119403e-05, + "loss": 0.0029, + "step": 17060 + }, + { + "epoch": 15.92, + "learning_rate": 4.8010261194029854e-05, + "loss": 0.0006, + "step": 17064 + }, + { + "epoch": 15.92, + "learning_rate": 4.80097947761194e-05, + "loss": 0.0028, + "step": 17068 + }, + { + "epoch": 15.93, + "learning_rate": 4.800932835820896e-05, + "loss": 0.0086, + "step": 17072 + }, + { + "epoch": 15.93, + "learning_rate": 4.800886194029851e-05, + "loss": 0.0046, + "step": 17076 + }, + { + "epoch": 15.93, + "learning_rate": 4.800839552238806e-05, + "loss": 0.0011, + "step": 17080 + }, + { + "epoch": 15.94, + "learning_rate": 4.8007929104477615e-05, + "loss": 0.0025, + "step": 17084 + }, + { + "epoch": 15.94, + "learning_rate": 4.800746268656717e-05, + "loss": 0.0012, + "step": 17088 + }, + { + "epoch": 15.94, + "learning_rate": 4.800699626865672e-05, + "loss": 0.006, + "step": 17092 + }, + { + "epoch": 15.95, + "learning_rate": 4.800652985074627e-05, + "loss": 0.0031, + "step": 17096 + }, + { + "epoch": 15.95, + "learning_rate": 4.800606343283582e-05, + "loss": 0.0008, + "step": 17100 + }, + { + "epoch": 15.96, + "learning_rate": 4.8005597014925376e-05, + "loss": 0.0068, + "step": 17104 + }, + { + "epoch": 15.96, + "learning_rate": 4.800513059701493e-05, + "loss": 0.0005, + "step": 17108 + }, + { + "epoch": 15.96, + "learning_rate": 4.800466417910448e-05, + "loss": 0.0005, + "step": 17112 + }, + { + "epoch": 15.97, + "learning_rate": 4.8004197761194034e-05, + "loss": 0.0024, + "step": 17116 + }, + { + "epoch": 15.97, + "learning_rate": 4.800373134328359e-05, + "loss": 0.0016, + "step": 17120 + }, + { + "epoch": 15.97, + "learning_rate": 4.800326492537314e-05, + "loss": 0.0004, + "step": 17124 + }, + { + "epoch": 15.98, + "learning_rate": 4.8002798507462685e-05, + "loss": 0.0037, + "step": 17128 + }, + { + "epoch": 15.98, + "learning_rate": 4.800233208955224e-05, + "loss": 0.0018, + "step": 17132 + }, + { + "epoch": 15.98, + "learning_rate": 4.8001865671641795e-05, + "loss": 0.0022, + "step": 17136 + }, + { + "epoch": 15.99, + "learning_rate": 4.800139925373134e-05, + "loss": 0.0007, + "step": 17140 + }, + { + "epoch": 15.99, + "learning_rate": 4.80009328358209e-05, + "loss": 0.0013, + "step": 17144 + }, + { + "epoch": 16.0, + "learning_rate": 4.800046641791045e-05, + "loss": 0.0039, + "step": 17148 + }, + { + "epoch": 16.0, + "learning_rate": 4.8e-05, + "loss": 0.0049, + "step": 17152 + }, + { + "epoch": 16.0, + "learning_rate": 4.7999533582089556e-05, + "loss": 0.0026, + "step": 17156 + }, + { + "epoch": 16.01, + "learning_rate": 4.7999067164179104e-05, + "loss": 0.0088, + "step": 17160 + }, + { + "epoch": 16.01, + "learning_rate": 4.799860074626866e-05, + "loss": 0.0007, + "step": 17164 + }, + { + "epoch": 16.01, + "learning_rate": 4.7998134328358214e-05, + "loss": 0.0012, + "step": 17168 + }, + { + "epoch": 16.02, + "learning_rate": 4.799766791044776e-05, + "loss": 0.0009, + "step": 17172 + }, + { + "epoch": 16.02, + "learning_rate": 4.799720149253731e-05, + "loss": 0.0035, + "step": 17176 + }, + { + "epoch": 16.03, + "learning_rate": 4.799673507462687e-05, + "loss": 0.0006, + "step": 17180 + }, + { + "epoch": 16.03, + "learning_rate": 4.799626865671642e-05, + "loss": 0.0043, + "step": 17184 + }, + { + "epoch": 16.03, + "learning_rate": 4.799580223880597e-05, + "loss": 0.0002, + "step": 17188 + }, + { + "epoch": 16.04, + "learning_rate": 4.799533582089552e-05, + "loss": 0.0001, + "step": 17192 + }, + { + "epoch": 16.04, + "learning_rate": 4.799486940298508e-05, + "loss": 0.0037, + "step": 17196 + }, + { + "epoch": 16.04, + "learning_rate": 4.7994402985074626e-05, + "loss": 0.0033, + "step": 17200 + }, + { + "epoch": 16.05, + "learning_rate": 4.799393656716418e-05, + "loss": 0.0002, + "step": 17204 + }, + { + "epoch": 16.05, + "learning_rate": 4.7993470149253736e-05, + "loss": 0.0011, + "step": 17208 + }, + { + "epoch": 16.06, + "learning_rate": 4.7993003731343284e-05, + "loss": 0.0001, + "step": 17212 + }, + { + "epoch": 16.06, + "learning_rate": 4.799253731343284e-05, + "loss": 0.0005, + "step": 17216 + }, + { + "epoch": 16.06, + "learning_rate": 4.799207089552239e-05, + "loss": 0.0023, + "step": 17220 + }, + { + "epoch": 16.07, + "learning_rate": 4.799160447761194e-05, + "loss": 0.0112, + "step": 17224 + }, + { + "epoch": 16.07, + "learning_rate": 4.79911380597015e-05, + "loss": 0.0062, + "step": 17228 + }, + { + "epoch": 16.07, + "learning_rate": 4.7990671641791045e-05, + "loss": 0.001, + "step": 17232 + }, + { + "epoch": 16.08, + "learning_rate": 4.799020522388059e-05, + "loss": 0.0012, + "step": 17236 + }, + { + "epoch": 16.08, + "learning_rate": 4.7989738805970155e-05, + "loss": 0.0004, + "step": 17240 + }, + { + "epoch": 16.09, + "learning_rate": 4.79892723880597e-05, + "loss": 0.0001, + "step": 17244 + }, + { + "epoch": 16.09, + "learning_rate": 4.798880597014926e-05, + "loss": 0.0014, + "step": 17248 + }, + { + "epoch": 16.09, + "learning_rate": 4.7988339552238806e-05, + "loss": 0.003, + "step": 17252 + }, + { + "epoch": 16.1, + "learning_rate": 4.798787313432836e-05, + "loss": 0.0062, + "step": 17256 + }, + { + "epoch": 16.1, + "learning_rate": 4.7987406716417916e-05, + "loss": 0.0018, + "step": 17260 + }, + { + "epoch": 16.1, + "learning_rate": 4.7986940298507464e-05, + "loss": 0.001, + "step": 17264 + }, + { + "epoch": 16.11, + "learning_rate": 4.798647388059702e-05, + "loss": 0.0036, + "step": 17268 + }, + { + "epoch": 16.11, + "learning_rate": 4.7986007462686574e-05, + "loss": 0.0008, + "step": 17272 + }, + { + "epoch": 16.12, + "learning_rate": 4.798554104477612e-05, + "loss": 0.0004, + "step": 17276 + }, + { + "epoch": 16.12, + "learning_rate": 4.798507462686567e-05, + "loss": 0.0041, + "step": 17280 + }, + { + "epoch": 16.12, + "learning_rate": 4.7984608208955225e-05, + "loss": 0.0003, + "step": 17284 + }, + { + "epoch": 16.13, + "learning_rate": 4.798414179104478e-05, + "loss": 0.0012, + "step": 17288 + }, + { + "epoch": 16.13, + "learning_rate": 4.798367537313433e-05, + "loss": 0.0057, + "step": 17292 + }, + { + "epoch": 16.13, + "learning_rate": 4.798320895522388e-05, + "loss": 0.0005, + "step": 17296 + }, + { + "epoch": 16.14, + "learning_rate": 4.798274253731344e-05, + "loss": 0.0002, + "step": 17300 + }, + { + "epoch": 16.14, + "learning_rate": 4.7982276119402986e-05, + "loss": 0.0036, + "step": 17304 + }, + { + "epoch": 16.15, + "learning_rate": 4.798180970149254e-05, + "loss": 0.0022, + "step": 17308 + }, + { + "epoch": 16.15, + "learning_rate": 4.798134328358209e-05, + "loss": 0.0042, + "step": 17312 + }, + { + "epoch": 16.15, + "learning_rate": 4.7980876865671644e-05, + "loss": 0.0023, + "step": 17316 + }, + { + "epoch": 16.16, + "learning_rate": 4.79804104477612e-05, + "loss": 0.0008, + "step": 17320 + }, + { + "epoch": 16.16, + "learning_rate": 4.797994402985075e-05, + "loss": 0.0009, + "step": 17324 + }, + { + "epoch": 16.16, + "learning_rate": 4.79794776119403e-05, + "loss": 0.0075, + "step": 17328 + }, + { + "epoch": 16.17, + "learning_rate": 4.797901119402986e-05, + "loss": 0.0036, + "step": 17332 + }, + { + "epoch": 16.17, + "learning_rate": 4.7978544776119405e-05, + "loss": 0.0057, + "step": 17336 + }, + { + "epoch": 16.18, + "learning_rate": 4.797807835820895e-05, + "loss": 0.0024, + "step": 17340 + }, + { + "epoch": 16.18, + "learning_rate": 4.797761194029851e-05, + "loss": 0.0023, + "step": 17344 + }, + { + "epoch": 16.18, + "learning_rate": 4.797714552238806e-05, + "loss": 0.0398, + "step": 17348 + }, + { + "epoch": 16.19, + "learning_rate": 4.797667910447761e-05, + "loss": 0.0049, + "step": 17352 + }, + { + "epoch": 16.19, + "learning_rate": 4.7976212686567166e-05, + "loss": 0.0004, + "step": 17356 + }, + { + "epoch": 16.19, + "learning_rate": 4.797574626865672e-05, + "loss": 0.0033, + "step": 17360 + }, + { + "epoch": 16.2, + "learning_rate": 4.797527985074627e-05, + "loss": 0.0007, + "step": 17364 + }, + { + "epoch": 16.2, + "learning_rate": 4.7974813432835824e-05, + "loss": 0.0017, + "step": 17368 + }, + { + "epoch": 16.21, + "learning_rate": 4.797434701492537e-05, + "loss": 0.0024, + "step": 17372 + }, + { + "epoch": 16.21, + "learning_rate": 4.797388059701493e-05, + "loss": 0.0026, + "step": 17376 + }, + { + "epoch": 16.21, + "learning_rate": 4.797341417910448e-05, + "loss": 0.0003, + "step": 17380 + }, + { + "epoch": 16.22, + "learning_rate": 4.797294776119403e-05, + "loss": 0.0032, + "step": 17384 + }, + { + "epoch": 16.22, + "learning_rate": 4.7972481343283585e-05, + "loss": 0.0009, + "step": 17388 + }, + { + "epoch": 16.22, + "learning_rate": 4.797201492537314e-05, + "loss": 0.002, + "step": 17392 + }, + { + "epoch": 16.23, + "learning_rate": 4.797154850746269e-05, + "loss": 0.001, + "step": 17396 + }, + { + "epoch": 16.23, + "learning_rate": 4.7971082089552236e-05, + "loss": 0.0016, + "step": 17400 + }, + { + "epoch": 16.24, + "learning_rate": 4.797061567164179e-05, + "loss": 0.0039, + "step": 17404 + }, + { + "epoch": 16.24, + "learning_rate": 4.7970149253731346e-05, + "loss": 0.002, + "step": 17408 + }, + { + "epoch": 16.24, + "learning_rate": 4.79696828358209e-05, + "loss": 0.0006, + "step": 17412 + }, + { + "epoch": 16.25, + "learning_rate": 4.796921641791045e-05, + "loss": 0.0004, + "step": 17416 + }, + { + "epoch": 16.25, + "learning_rate": 4.7968750000000004e-05, + "loss": 0.0054, + "step": 17420 + }, + { + "epoch": 16.25, + "learning_rate": 4.796828358208956e-05, + "loss": 0.0098, + "step": 17424 + }, + { + "epoch": 16.26, + "learning_rate": 4.796781716417911e-05, + "loss": 0.0011, + "step": 17428 + }, + { + "epoch": 16.26, + "learning_rate": 4.7967350746268655e-05, + "loss": 0.0047, + "step": 17432 + }, + { + "epoch": 16.26, + "learning_rate": 4.796688432835822e-05, + "loss": 0.007, + "step": 17436 + }, + { + "epoch": 16.27, + "learning_rate": 4.7966417910447765e-05, + "loss": 0.0004, + "step": 17440 + }, + { + "epoch": 16.27, + "learning_rate": 4.796595149253731e-05, + "loss": 0.0062, + "step": 17444 + }, + { + "epoch": 16.28, + "learning_rate": 4.796548507462687e-05, + "loss": 0.0034, + "step": 17448 + }, + { + "epoch": 16.28, + "learning_rate": 4.796501865671642e-05, + "loss": 0.0049, + "step": 17452 + }, + { + "epoch": 16.28, + "learning_rate": 4.796455223880597e-05, + "loss": 0.0079, + "step": 17456 + }, + { + "epoch": 16.29, + "learning_rate": 4.7964085820895526e-05, + "loss": 0.0007, + "step": 17460 + }, + { + "epoch": 16.29, + "learning_rate": 4.7963619402985074e-05, + "loss": 0.0051, + "step": 17464 + }, + { + "epoch": 16.29, + "learning_rate": 4.796315298507463e-05, + "loss": 0.0125, + "step": 17468 + }, + { + "epoch": 16.3, + "learning_rate": 4.7962686567164184e-05, + "loss": 0.0018, + "step": 17472 + }, + { + "epoch": 16.3, + "learning_rate": 4.796222014925373e-05, + "loss": 0.0008, + "step": 17476 + }, + { + "epoch": 16.31, + "learning_rate": 4.796175373134329e-05, + "loss": 0.0007, + "step": 17480 + }, + { + "epoch": 16.31, + "learning_rate": 4.796128731343284e-05, + "loss": 0.0022, + "step": 17484 + }, + { + "epoch": 16.31, + "learning_rate": 4.796082089552239e-05, + "loss": 0.0008, + "step": 17488 + }, + { + "epoch": 16.32, + "learning_rate": 4.796035447761194e-05, + "loss": 0.0038, + "step": 17492 + }, + { + "epoch": 16.32, + "learning_rate": 4.79598880597015e-05, + "loss": 0.0036, + "step": 17496 + }, + { + "epoch": 16.32, + "learning_rate": 4.795942164179105e-05, + "loss": 0.0002, + "step": 17500 + }, + { + "epoch": 16.32, + "eval_exact_match": 0.7330754352030948, + "eval_exec": 0.7475822050290135, + "eval_loss": 0.3184237778186798, + "eval_runtime": 1056.9157, + "eval_samples_per_second": 0.978, + "step": 17500 + }, + { + "epoch": 16.33, + "learning_rate": 4.7958955223880596e-05, + "loss": 0.0139, + "step": 17504 + }, + { + "epoch": 16.33, + "learning_rate": 4.795848880597015e-05, + "loss": 0.0024, + "step": 17508 + }, + { + "epoch": 16.34, + "learning_rate": 4.7958022388059706e-05, + "loss": 0.005, + "step": 17512 + }, + { + "epoch": 16.34, + "learning_rate": 4.7957555970149254e-05, + "loss": 0.001, + "step": 17516 + }, + { + "epoch": 16.34, + "learning_rate": 4.795708955223881e-05, + "loss": 0.0053, + "step": 17520 + }, + { + "epoch": 16.35, + "learning_rate": 4.795662313432836e-05, + "loss": 0.0021, + "step": 17524 + }, + { + "epoch": 16.35, + "learning_rate": 4.795615671641791e-05, + "loss": 0.0059, + "step": 17528 + }, + { + "epoch": 16.35, + "learning_rate": 4.795569029850747e-05, + "loss": 0.0008, + "step": 17532 + }, + { + "epoch": 16.36, + "learning_rate": 4.7955223880597015e-05, + "loss": 0.0151, + "step": 17536 + }, + { + "epoch": 16.36, + "learning_rate": 4.795475746268657e-05, + "loss": 0.0023, + "step": 17540 + }, + { + "epoch": 16.37, + "learning_rate": 4.7954291044776125e-05, + "loss": 0.0022, + "step": 17544 + }, + { + "epoch": 16.37, + "learning_rate": 4.795382462686567e-05, + "loss": 0.0004, + "step": 17548 + }, + { + "epoch": 16.37, + "learning_rate": 4.795335820895522e-05, + "loss": 0.0005, + "step": 17552 + }, + { + "epoch": 16.38, + "learning_rate": 4.795289179104478e-05, + "loss": 0.0019, + "step": 17556 + }, + { + "epoch": 16.38, + "learning_rate": 4.795242537313433e-05, + "loss": 0.0057, + "step": 17560 + }, + { + "epoch": 16.38, + "learning_rate": 4.795195895522388e-05, + "loss": 0.0046, + "step": 17564 + }, + { + "epoch": 16.39, + "learning_rate": 4.7951492537313434e-05, + "loss": 0.0009, + "step": 17568 + }, + { + "epoch": 16.39, + "learning_rate": 4.795102611940299e-05, + "loss": 0.0035, + "step": 17572 + }, + { + "epoch": 16.4, + "learning_rate": 4.7950559701492544e-05, + "loss": 0.0062, + "step": 17576 + }, + { + "epoch": 16.4, + "learning_rate": 4.795009328358209e-05, + "loss": 0.0036, + "step": 17580 + }, + { + "epoch": 16.4, + "learning_rate": 4.794962686567164e-05, + "loss": 0.0003, + "step": 17584 + }, + { + "epoch": 16.41, + "learning_rate": 4.79491604477612e-05, + "loss": 0.011, + "step": 17588 + }, + { + "epoch": 16.41, + "learning_rate": 4.794869402985075e-05, + "loss": 0.0021, + "step": 17592 + }, + { + "epoch": 16.41, + "learning_rate": 4.79482276119403e-05, + "loss": 0.0015, + "step": 17596 + }, + { + "epoch": 16.42, + "learning_rate": 4.794776119402985e-05, + "loss": 0.0022, + "step": 17600 + }, + { + "epoch": 16.42, + "learning_rate": 4.794729477611941e-05, + "loss": 0.0033, + "step": 17604 + }, + { + "epoch": 16.43, + "learning_rate": 4.7946828358208956e-05, + "loss": 0.0012, + "step": 17608 + }, + { + "epoch": 16.43, + "learning_rate": 4.794636194029851e-05, + "loss": 0.0006, + "step": 17612 + }, + { + "epoch": 16.43, + "learning_rate": 4.7945895522388066e-05, + "loss": 0.0052, + "step": 17616 + }, + { + "epoch": 16.44, + "learning_rate": 4.7945429104477614e-05, + "loss": 0.0002, + "step": 17620 + }, + { + "epoch": 16.44, + "learning_rate": 4.794496268656717e-05, + "loss": 0.0016, + "step": 17624 + }, + { + "epoch": 16.44, + "learning_rate": 4.794449626865672e-05, + "loss": 0.0009, + "step": 17628 + }, + { + "epoch": 16.45, + "learning_rate": 4.794402985074627e-05, + "loss": 0.0031, + "step": 17632 + }, + { + "epoch": 16.45, + "learning_rate": 4.7943563432835827e-05, + "loss": 0.0077, + "step": 17636 + }, + { + "epoch": 16.46, + "learning_rate": 4.7943097014925375e-05, + "loss": 0.0037, + "step": 17640 + }, + { + "epoch": 16.46, + "learning_rate": 4.794263059701492e-05, + "loss": 0.0023, + "step": 17644 + }, + { + "epoch": 16.46, + "learning_rate": 4.7942164179104485e-05, + "loss": 0.0008, + "step": 17648 + }, + { + "epoch": 16.47, + "learning_rate": 4.794169776119403e-05, + "loss": 0.0013, + "step": 17652 + }, + { + "epoch": 16.47, + "learning_rate": 4.794123134328358e-05, + "loss": 0.0015, + "step": 17656 + }, + { + "epoch": 16.47, + "learning_rate": 4.7940764925373136e-05, + "loss": 0.0071, + "step": 17660 + }, + { + "epoch": 16.48, + "learning_rate": 4.794029850746269e-05, + "loss": 0.0004, + "step": 17664 + }, + { + "epoch": 16.48, + "learning_rate": 4.793983208955224e-05, + "loss": 0.0041, + "step": 17668 + }, + { + "epoch": 16.49, + "learning_rate": 4.7939365671641794e-05, + "loss": 0.0022, + "step": 17672 + }, + { + "epoch": 16.49, + "learning_rate": 4.793889925373135e-05, + "loss": 0.0052, + "step": 17676 + }, + { + "epoch": 16.49, + "learning_rate": 4.79384328358209e-05, + "loss": 0.0029, + "step": 17680 + }, + { + "epoch": 16.5, + "learning_rate": 4.793796641791045e-05, + "loss": 0.0017, + "step": 17684 + }, + { + "epoch": 16.5, + "learning_rate": 4.79375e-05, + "loss": 0.0033, + "step": 17688 + }, + { + "epoch": 16.5, + "learning_rate": 4.7937033582089555e-05, + "loss": 0.0054, + "step": 17692 + }, + { + "epoch": 16.51, + "learning_rate": 4.793656716417911e-05, + "loss": 0.0005, + "step": 17696 + }, + { + "epoch": 16.51, + "learning_rate": 4.793610074626866e-05, + "loss": 0.0047, + "step": 17700 + }, + { + "epoch": 16.51, + "learning_rate": 4.7935634328358206e-05, + "loss": 0.0014, + "step": 17704 + }, + { + "epoch": 16.52, + "learning_rate": 4.793516791044777e-05, + "loss": 0.0014, + "step": 17708 + }, + { + "epoch": 16.52, + "learning_rate": 4.7934701492537316e-05, + "loss": 0.0051, + "step": 17712 + }, + { + "epoch": 16.53, + "learning_rate": 4.7934235074626864e-05, + "loss": 0.0008, + "step": 17716 + }, + { + "epoch": 16.53, + "learning_rate": 4.793376865671642e-05, + "loss": 0.0046, + "step": 17720 + }, + { + "epoch": 16.53, + "learning_rate": 4.7933302238805974e-05, + "loss": 0.0013, + "step": 17724 + }, + { + "epoch": 16.54, + "learning_rate": 4.793283582089552e-05, + "loss": 0.0003, + "step": 17728 + }, + { + "epoch": 16.54, + "learning_rate": 4.7932369402985077e-05, + "loss": 0.0048, + "step": 17732 + }, + { + "epoch": 16.54, + "learning_rate": 4.793190298507463e-05, + "loss": 0.0008, + "step": 17736 + }, + { + "epoch": 16.55, + "learning_rate": 4.7931436567164186e-05, + "loss": 0.0007, + "step": 17740 + }, + { + "epoch": 16.55, + "learning_rate": 4.7930970149253735e-05, + "loss": 0.001, + "step": 17744 + }, + { + "epoch": 16.56, + "learning_rate": 4.793050373134328e-05, + "loss": 0.005, + "step": 17748 + }, + { + "epoch": 16.56, + "learning_rate": 4.793003731343284e-05, + "loss": 0.0033, + "step": 17752 + }, + { + "epoch": 16.56, + "learning_rate": 4.792957089552239e-05, + "loss": 0.0031, + "step": 17756 + }, + { + "epoch": 16.57, + "learning_rate": 4.792910447761194e-05, + "loss": 0.0004, + "step": 17760 + }, + { + "epoch": 16.57, + "learning_rate": 4.7928638059701496e-05, + "loss": 0.0024, + "step": 17764 + }, + { + "epoch": 16.57, + "learning_rate": 4.792817164179105e-05, + "loss": 0.0059, + "step": 17768 + }, + { + "epoch": 16.58, + "learning_rate": 4.79277052238806e-05, + "loss": 0.0031, + "step": 17772 + }, + { + "epoch": 16.58, + "learning_rate": 4.7927238805970153e-05, + "loss": 0.0009, + "step": 17776 + }, + { + "epoch": 16.59, + "learning_rate": 4.79267723880597e-05, + "loss": 0.0044, + "step": 17780 + }, + { + "epoch": 16.59, + "learning_rate": 4.7926305970149256e-05, + "loss": 0.0076, + "step": 17784 + }, + { + "epoch": 16.59, + "learning_rate": 4.792583955223881e-05, + "loss": 0.0002, + "step": 17788 + }, + { + "epoch": 16.6, + "learning_rate": 4.792537313432836e-05, + "loss": 0.0005, + "step": 17792 + }, + { + "epoch": 16.6, + "learning_rate": 4.7924906716417914e-05, + "loss": 0.0018, + "step": 17796 + }, + { + "epoch": 16.6, + "learning_rate": 4.792444029850747e-05, + "loss": 0.0041, + "step": 17800 + }, + { + "epoch": 16.61, + "learning_rate": 4.792397388059702e-05, + "loss": 0.0115, + "step": 17804 + }, + { + "epoch": 16.61, + "learning_rate": 4.7923507462686566e-05, + "loss": 0.0022, + "step": 17808 + }, + { + "epoch": 16.62, + "learning_rate": 4.792304104477612e-05, + "loss": 0.0012, + "step": 17812 + }, + { + "epoch": 16.62, + "learning_rate": 4.7922574626865675e-05, + "loss": 0.0002, + "step": 17816 + }, + { + "epoch": 16.62, + "learning_rate": 4.7922108208955224e-05, + "loss": 0.0014, + "step": 17820 + }, + { + "epoch": 16.63, + "learning_rate": 4.792164179104478e-05, + "loss": 0.0007, + "step": 17824 + }, + { + "epoch": 16.63, + "learning_rate": 4.792117537313433e-05, + "loss": 0.0051, + "step": 17828 + }, + { + "epoch": 16.63, + "learning_rate": 4.792070895522388e-05, + "loss": 0.0007, + "step": 17832 + }, + { + "epoch": 16.64, + "learning_rate": 4.7920242537313436e-05, + "loss": 0.0012, + "step": 17836 + }, + { + "epoch": 16.64, + "learning_rate": 4.7919776119402985e-05, + "loss": 0.0037, + "step": 17840 + }, + { + "epoch": 16.65, + "learning_rate": 4.791930970149254e-05, + "loss": 0.0002, + "step": 17844 + }, + { + "epoch": 16.65, + "learning_rate": 4.7918843283582094e-05, + "loss": 0.0069, + "step": 17848 + }, + { + "epoch": 16.65, + "learning_rate": 4.791837686567164e-05, + "loss": 0.0016, + "step": 17852 + }, + { + "epoch": 16.66, + "learning_rate": 4.791791044776119e-05, + "loss": 0.0029, + "step": 17856 + }, + { + "epoch": 16.66, + "learning_rate": 4.791744402985075e-05, + "loss": 0.005, + "step": 17860 + }, + { + "epoch": 16.66, + "learning_rate": 4.79169776119403e-05, + "loss": 0.0005, + "step": 17864 + }, + { + "epoch": 16.67, + "learning_rate": 4.791651119402985e-05, + "loss": 0.0014, + "step": 17868 + }, + { + "epoch": 16.67, + "learning_rate": 4.7916044776119403e-05, + "loss": 0.0003, + "step": 17872 + }, + { + "epoch": 16.68, + "learning_rate": 4.791557835820896e-05, + "loss": 0.0012, + "step": 17876 + }, + { + "epoch": 16.68, + "learning_rate": 4.7915111940298507e-05, + "loss": 0.0041, + "step": 17880 + }, + { + "epoch": 16.68, + "learning_rate": 4.791464552238806e-05, + "loss": 0.0016, + "step": 17884 + }, + { + "epoch": 16.69, + "learning_rate": 4.7914179104477616e-05, + "loss": 0.0005, + "step": 17888 + }, + { + "epoch": 16.69, + "learning_rate": 4.7913712686567164e-05, + "loss": 0.0002, + "step": 17892 + }, + { + "epoch": 16.69, + "learning_rate": 4.791324626865672e-05, + "loss": 0.0017, + "step": 17896 + }, + { + "epoch": 16.7, + "learning_rate": 4.791277985074627e-05, + "loss": 0.0069, + "step": 17900 + }, + { + "epoch": 16.7, + "learning_rate": 4.791231343283583e-05, + "loss": 0.0039, + "step": 17904 + }, + { + "epoch": 16.71, + "learning_rate": 4.791184701492538e-05, + "loss": 0.0006, + "step": 17908 + }, + { + "epoch": 16.71, + "learning_rate": 4.7911380597014925e-05, + "loss": 0.0, + "step": 17912 + }, + { + "epoch": 16.71, + "learning_rate": 4.791091417910448e-05, + "loss": 0.0031, + "step": 17916 + }, + { + "epoch": 16.72, + "learning_rate": 4.7910447761194035e-05, + "loss": 0.0014, + "step": 17920 + }, + { + "epoch": 16.72, + "learning_rate": 4.7909981343283583e-05, + "loss": 0.0024, + "step": 17924 + }, + { + "epoch": 16.72, + "learning_rate": 4.790951492537314e-05, + "loss": 0.0006, + "step": 17928 + }, + { + "epoch": 16.73, + "learning_rate": 4.7909048507462686e-05, + "loss": 0.0009, + "step": 17932 + }, + { + "epoch": 16.73, + "learning_rate": 4.790858208955224e-05, + "loss": 0.0012, + "step": 17936 + }, + { + "epoch": 16.73, + "learning_rate": 4.7908115671641796e-05, + "loss": 0.0021, + "step": 17940 + }, + { + "epoch": 16.74, + "learning_rate": 4.7907649253731344e-05, + "loss": 0.0082, + "step": 17944 + }, + { + "epoch": 16.74, + "learning_rate": 4.79071828358209e-05, + "loss": 0.0014, + "step": 17948 + }, + { + "epoch": 16.75, + "learning_rate": 4.7906716417910454e-05, + "loss": 0.0013, + "step": 17952 + }, + { + "epoch": 16.75, + "learning_rate": 4.790625e-05, + "loss": 0.0008, + "step": 17956 + }, + { + "epoch": 16.75, + "learning_rate": 4.790578358208955e-05, + "loss": 0.0026, + "step": 17960 + }, + { + "epoch": 16.76, + "learning_rate": 4.790531716417911e-05, + "loss": 0.0003, + "step": 17964 + }, + { + "epoch": 16.76, + "learning_rate": 4.790485074626866e-05, + "loss": 0.0055, + "step": 17968 + }, + { + "epoch": 16.76, + "learning_rate": 4.790438432835821e-05, + "loss": 0.0009, + "step": 17972 + }, + { + "epoch": 16.77, + "learning_rate": 4.790391791044776e-05, + "loss": 0.0008, + "step": 17976 + }, + { + "epoch": 16.77, + "learning_rate": 4.790345149253732e-05, + "loss": 0.0027, + "step": 17980 + }, + { + "epoch": 16.78, + "learning_rate": 4.7902985074626866e-05, + "loss": 0.0003, + "step": 17984 + }, + { + "epoch": 16.78, + "learning_rate": 4.790251865671642e-05, + "loss": 0.0023, + "step": 17988 + }, + { + "epoch": 16.78, + "learning_rate": 4.790205223880597e-05, + "loss": 0.0005, + "step": 17992 + }, + { + "epoch": 16.79, + "learning_rate": 4.7901585820895524e-05, + "loss": 0.0024, + "step": 17996 + }, + { + "epoch": 16.79, + "learning_rate": 4.790111940298508e-05, + "loss": 0.0005, + "step": 18000 + }, + { + "epoch": 16.79, + "eval_exact_match": 0.730174081237911, + "eval_exec": 0.7553191489361702, + "eval_loss": 0.3473448157310486, + "eval_runtime": 1359.5008, + "eval_samples_per_second": 0.761, + "step": 18000 + }, + { + "epoch": 16.79, + "learning_rate": 4.790065298507463e-05, + "loss": 0.0041, + "step": 18004 + }, + { + "epoch": 16.8, + "learning_rate": 4.790018656716418e-05, + "loss": 0.0034, + "step": 18008 + }, + { + "epoch": 16.8, + "learning_rate": 4.789972014925374e-05, + "loss": 0.0004, + "step": 18012 + }, + { + "epoch": 16.81, + "learning_rate": 4.7899253731343285e-05, + "loss": 0.0046, + "step": 18016 + }, + { + "epoch": 16.81, + "learning_rate": 4.7898787313432833e-05, + "loss": 0.001, + "step": 18020 + }, + { + "epoch": 16.81, + "learning_rate": 4.789832089552239e-05, + "loss": 0.0069, + "step": 18024 + }, + { + "epoch": 16.82, + "learning_rate": 4.789785447761194e-05, + "loss": 0.0081, + "step": 18028 + }, + { + "epoch": 16.82, + "learning_rate": 4.789738805970149e-05, + "loss": 0.0012, + "step": 18032 + }, + { + "epoch": 16.82, + "learning_rate": 4.7896921641791046e-05, + "loss": 0.0023, + "step": 18036 + }, + { + "epoch": 16.83, + "learning_rate": 4.78964552238806e-05, + "loss": 0.0049, + "step": 18040 + }, + { + "epoch": 16.83, + "learning_rate": 4.789598880597015e-05, + "loss": 0.0011, + "step": 18044 + }, + { + "epoch": 16.84, + "learning_rate": 4.7895522388059704e-05, + "loss": 0.0028, + "step": 18048 + }, + { + "epoch": 16.84, + "learning_rate": 4.789505597014925e-05, + "loss": 0.0016, + "step": 18052 + }, + { + "epoch": 16.84, + "learning_rate": 4.789458955223881e-05, + "loss": 0.0007, + "step": 18056 + }, + { + "epoch": 16.85, + "learning_rate": 4.789412313432836e-05, + "loss": 0.0008, + "step": 18060 + }, + { + "epoch": 16.85, + "learning_rate": 4.789365671641791e-05, + "loss": 0.0013, + "step": 18064 + }, + { + "epoch": 16.85, + "learning_rate": 4.7893190298507465e-05, + "loss": 0.0038, + "step": 18068 + }, + { + "epoch": 16.86, + "learning_rate": 4.789272388059702e-05, + "loss": 0.0002, + "step": 18072 + }, + { + "epoch": 16.86, + "learning_rate": 4.789225746268657e-05, + "loss": 0.0016, + "step": 18076 + }, + { + "epoch": 16.87, + "learning_rate": 4.789179104477612e-05, + "loss": 0.0003, + "step": 18080 + }, + { + "epoch": 16.87, + "learning_rate": 4.789132462686567e-05, + "loss": 0.0013, + "step": 18084 + }, + { + "epoch": 16.87, + "learning_rate": 4.7890858208955226e-05, + "loss": 0.0124, + "step": 18088 + }, + { + "epoch": 16.88, + "learning_rate": 4.789039179104478e-05, + "loss": 0.0009, + "step": 18092 + }, + { + "epoch": 16.88, + "learning_rate": 4.788992537313433e-05, + "loss": 0.0034, + "step": 18096 + }, + { + "epoch": 16.88, + "learning_rate": 4.7889458955223884e-05, + "loss": 0.0118, + "step": 18100 + }, + { + "epoch": 16.89, + "learning_rate": 4.788899253731344e-05, + "loss": 0.003, + "step": 18104 + }, + { + "epoch": 16.89, + "learning_rate": 4.788852611940299e-05, + "loss": 0.0029, + "step": 18108 + }, + { + "epoch": 16.9, + "learning_rate": 4.7888059701492535e-05, + "loss": 0.0013, + "step": 18112 + }, + { + "epoch": 16.9, + "learning_rate": 4.78875932835821e-05, + "loss": 0.0057, + "step": 18116 + }, + { + "epoch": 16.9, + "learning_rate": 4.7887126865671645e-05, + "loss": 0.0001, + "step": 18120 + }, + { + "epoch": 16.91, + "learning_rate": 4.788666044776119e-05, + "loss": 0.001, + "step": 18124 + }, + { + "epoch": 16.91, + "learning_rate": 4.788619402985075e-05, + "loss": 0.011, + "step": 18128 + }, + { + "epoch": 16.91, + "learning_rate": 4.78857276119403e-05, + "loss": 0.0044, + "step": 18132 + }, + { + "epoch": 16.92, + "learning_rate": 4.788526119402985e-05, + "loss": 0.0017, + "step": 18136 + }, + { + "epoch": 16.92, + "learning_rate": 4.7884794776119406e-05, + "loss": 0.0047, + "step": 18140 + }, + { + "epoch": 16.93, + "learning_rate": 4.7884328358208954e-05, + "loss": 0.0044, + "step": 18144 + }, + { + "epoch": 16.93, + "learning_rate": 4.788386194029851e-05, + "loss": 0.0012, + "step": 18148 + }, + { + "epoch": 16.93, + "learning_rate": 4.7883395522388064e-05, + "loss": 0.0003, + "step": 18152 + }, + { + "epoch": 16.94, + "learning_rate": 4.788292910447761e-05, + "loss": 0.0006, + "step": 18156 + }, + { + "epoch": 16.94, + "learning_rate": 4.788246268656717e-05, + "loss": 0.0041, + "step": 18160 + }, + { + "epoch": 16.94, + "learning_rate": 4.788199626865672e-05, + "loss": 0.0014, + "step": 18164 + }, + { + "epoch": 16.95, + "learning_rate": 4.788152985074627e-05, + "loss": 0.0039, + "step": 18168 + }, + { + "epoch": 16.95, + "learning_rate": 4.788106343283582e-05, + "loss": 0.0021, + "step": 18172 + }, + { + "epoch": 16.96, + "learning_rate": 4.788059701492538e-05, + "loss": 0.0052, + "step": 18176 + }, + { + "epoch": 16.96, + "learning_rate": 4.788013059701493e-05, + "loss": 0.0009, + "step": 18180 + }, + { + "epoch": 16.96, + "learning_rate": 4.7879664179104476e-05, + "loss": 0.0008, + "step": 18184 + }, + { + "epoch": 16.97, + "learning_rate": 4.787919776119403e-05, + "loss": 0.0023, + "step": 18188 + }, + { + "epoch": 16.97, + "learning_rate": 4.7878731343283586e-05, + "loss": 0.0028, + "step": 18192 + }, + { + "epoch": 16.97, + "learning_rate": 4.7878264925373134e-05, + "loss": 0.0007, + "step": 18196 + }, + { + "epoch": 16.98, + "learning_rate": 4.787779850746269e-05, + "loss": 0.0004, + "step": 18200 + }, + { + "epoch": 16.98, + "learning_rate": 4.787733208955224e-05, + "loss": 0.0063, + "step": 18204 + }, + { + "epoch": 16.98, + "learning_rate": 4.787686567164179e-05, + "loss": 0.0018, + "step": 18208 + }, + { + "epoch": 16.99, + "learning_rate": 4.787639925373135e-05, + "loss": 0.0034, + "step": 18212 + }, + { + "epoch": 16.99, + "learning_rate": 4.7875932835820895e-05, + "loss": 0.0019, + "step": 18216 + }, + { + "epoch": 17.0, + "learning_rate": 4.787546641791045e-05, + "loss": 0.0065, + "step": 18220 + }, + { + "epoch": 17.0, + "learning_rate": 4.7875000000000005e-05, + "loss": 0.0032, + "step": 18224 + }, + { + "epoch": 17.0, + "learning_rate": 4.787453358208955e-05, + "loss": 0.0003, + "step": 18228 + }, + { + "epoch": 17.01, + "learning_rate": 4.787406716417911e-05, + "loss": 0.0036, + "step": 18232 + }, + { + "epoch": 17.01, + "learning_rate": 4.787360074626866e-05, + "loss": 0.0004, + "step": 18236 + }, + { + "epoch": 17.01, + "learning_rate": 4.787313432835821e-05, + "loss": 0.0008, + "step": 18240 + }, + { + "epoch": 17.02, + "learning_rate": 4.7872667910447766e-05, + "loss": 0.0037, + "step": 18244 + }, + { + "epoch": 17.02, + "learning_rate": 4.7872201492537314e-05, + "loss": 0.0003, + "step": 18248 + }, + { + "epoch": 17.03, + "learning_rate": 4.787173507462687e-05, + "loss": 0.0202, + "step": 18252 + }, + { + "epoch": 17.03, + "learning_rate": 4.7871268656716424e-05, + "loss": 0.0002, + "step": 18256 + }, + { + "epoch": 17.03, + "learning_rate": 4.787080223880597e-05, + "loss": 0.0032, + "step": 18260 + }, + { + "epoch": 17.04, + "learning_rate": 4.787033582089552e-05, + "loss": 0.0015, + "step": 18264 + }, + { + "epoch": 17.04, + "learning_rate": 4.786986940298508e-05, + "loss": 0.0018, + "step": 18268 + }, + { + "epoch": 17.04, + "learning_rate": 4.786940298507463e-05, + "loss": 0.0034, + "step": 18272 + }, + { + "epoch": 17.05, + "learning_rate": 4.786893656716418e-05, + "loss": 0.0008, + "step": 18276 + }, + { + "epoch": 17.05, + "learning_rate": 4.786847014925373e-05, + "loss": 0.0004, + "step": 18280 + }, + { + "epoch": 17.06, + "learning_rate": 4.786800373134329e-05, + "loss": 0.0002, + "step": 18284 + }, + { + "epoch": 17.06, + "learning_rate": 4.7867537313432836e-05, + "loss": 0.0006, + "step": 18288 + }, + { + "epoch": 17.06, + "learning_rate": 4.786707089552239e-05, + "loss": 0.0016, + "step": 18292 + }, + { + "epoch": 17.07, + "learning_rate": 4.7866604477611946e-05, + "loss": 0.0019, + "step": 18296 + }, + { + "epoch": 17.07, + "learning_rate": 4.7866138059701494e-05, + "loss": 0.003, + "step": 18300 + }, + { + "epoch": 17.07, + "learning_rate": 4.786567164179105e-05, + "loss": 0.0014, + "step": 18304 + }, + { + "epoch": 17.08, + "learning_rate": 4.78652052238806e-05, + "loss": 0.0056, + "step": 18308 + }, + { + "epoch": 17.08, + "learning_rate": 4.786473880597015e-05, + "loss": 0.0004, + "step": 18312 + }, + { + "epoch": 17.09, + "learning_rate": 4.786427238805971e-05, + "loss": 0.0002, + "step": 18316 + }, + { + "epoch": 17.09, + "learning_rate": 4.7863805970149255e-05, + "loss": 0.0025, + "step": 18320 + }, + { + "epoch": 17.09, + "learning_rate": 4.78633395522388e-05, + "loss": 0.0019, + "step": 18324 + }, + { + "epoch": 17.1, + "learning_rate": 4.7862873134328365e-05, + "loss": 0.0006, + "step": 18328 + }, + { + "epoch": 17.1, + "learning_rate": 4.786240671641791e-05, + "loss": 0.0006, + "step": 18332 + }, + { + "epoch": 17.1, + "learning_rate": 4.786194029850746e-05, + "loss": 0.0004, + "step": 18336 + }, + { + "epoch": 17.11, + "learning_rate": 4.7861473880597016e-05, + "loss": 0.0008, + "step": 18340 + }, + { + "epoch": 17.11, + "learning_rate": 4.786100746268657e-05, + "loss": 0.0019, + "step": 18344 + }, + { + "epoch": 17.12, + "learning_rate": 4.786054104477612e-05, + "loss": 0.0002, + "step": 18348 + }, + { + "epoch": 17.12, + "learning_rate": 4.7860074626865674e-05, + "loss": 0.0008, + "step": 18352 + }, + { + "epoch": 17.12, + "learning_rate": 4.785960820895523e-05, + "loss": 0.0015, + "step": 18356 + }, + { + "epoch": 17.13, + "learning_rate": 4.785914179104478e-05, + "loss": 0.0041, + "step": 18360 + }, + { + "epoch": 17.13, + "learning_rate": 4.785867537313433e-05, + "loss": 0.0029, + "step": 18364 + }, + { + "epoch": 17.13, + "learning_rate": 4.785820895522388e-05, + "loss": 0.0002, + "step": 18368 + }, + { + "epoch": 17.14, + "learning_rate": 4.7857742537313435e-05, + "loss": 0.0004, + "step": 18372 + }, + { + "epoch": 17.14, + "learning_rate": 4.785727611940299e-05, + "loss": 0.0019, + "step": 18376 + }, + { + "epoch": 17.15, + "learning_rate": 4.785680970149254e-05, + "loss": 0.0011, + "step": 18380 + }, + { + "epoch": 17.15, + "learning_rate": 4.7856343283582086e-05, + "loss": 0.0027, + "step": 18384 + }, + { + "epoch": 17.15, + "learning_rate": 4.785587686567165e-05, + "loss": 0.0055, + "step": 18388 + }, + { + "epoch": 17.16, + "learning_rate": 4.7855410447761196e-05, + "loss": 0.0001, + "step": 18392 + }, + { + "epoch": 17.16, + "learning_rate": 4.785494402985075e-05, + "loss": 0.0004, + "step": 18396 + }, + { + "epoch": 17.16, + "learning_rate": 4.78544776119403e-05, + "loss": 0.0002, + "step": 18400 + }, + { + "epoch": 17.17, + "learning_rate": 4.7854011194029854e-05, + "loss": 0.006, + "step": 18404 + }, + { + "epoch": 17.17, + "learning_rate": 4.785354477611941e-05, + "loss": 0.0001, + "step": 18408 + }, + { + "epoch": 17.18, + "learning_rate": 4.785307835820896e-05, + "loss": 0.0029, + "step": 18412 + }, + { + "epoch": 17.18, + "learning_rate": 4.785261194029851e-05, + "loss": 0.0004, + "step": 18416 + }, + { + "epoch": 17.18, + "learning_rate": 4.785214552238807e-05, + "loss": 0.0008, + "step": 18420 + }, + { + "epoch": 17.19, + "learning_rate": 4.7851679104477615e-05, + "loss": 0.0004, + "step": 18424 + }, + { + "epoch": 17.19, + "learning_rate": 4.785121268656716e-05, + "loss": 0.0022, + "step": 18428 + }, + { + "epoch": 17.19, + "learning_rate": 4.785074626865672e-05, + "loss": 0.0009, + "step": 18432 + }, + { + "epoch": 17.2, + "learning_rate": 4.785027985074627e-05, + "loss": 0.0009, + "step": 18436 + }, + { + "epoch": 17.2, + "learning_rate": 4.784981343283582e-05, + "loss": 0.0053, + "step": 18440 + }, + { + "epoch": 17.21, + "learning_rate": 4.7849347014925376e-05, + "loss": 0.0019, + "step": 18444 + }, + { + "epoch": 17.21, + "learning_rate": 4.784888059701493e-05, + "loss": 0.0028, + "step": 18448 + }, + { + "epoch": 17.21, + "learning_rate": 4.784841417910448e-05, + "loss": 0.0016, + "step": 18452 + }, + { + "epoch": 17.22, + "learning_rate": 4.7847947761194034e-05, + "loss": 0.0012, + "step": 18456 + }, + { + "epoch": 17.22, + "learning_rate": 4.784748134328358e-05, + "loss": 0.0004, + "step": 18460 + }, + { + "epoch": 17.22, + "learning_rate": 4.784701492537314e-05, + "loss": 0.0002, + "step": 18464 + }, + { + "epoch": 17.23, + "learning_rate": 4.784654850746269e-05, + "loss": 0.0049, + "step": 18468 + }, + { + "epoch": 17.23, + "learning_rate": 4.784608208955224e-05, + "loss": 0.0037, + "step": 18472 + }, + { + "epoch": 17.24, + "learning_rate": 4.7845615671641795e-05, + "loss": 0.0102, + "step": 18476 + }, + { + "epoch": 17.24, + "learning_rate": 4.784514925373135e-05, + "loss": 0.0021, + "step": 18480 + }, + { + "epoch": 17.24, + "learning_rate": 4.78446828358209e-05, + "loss": 0.0025, + "step": 18484 + }, + { + "epoch": 17.25, + "learning_rate": 4.7844216417910446e-05, + "loss": 0.0012, + "step": 18488 + }, + { + "epoch": 17.25, + "learning_rate": 4.784375e-05, + "loss": 0.0019, + "step": 18492 + }, + { + "epoch": 17.25, + "learning_rate": 4.7843283582089556e-05, + "loss": 0.0006, + "step": 18496 + }, + { + "epoch": 17.26, + "learning_rate": 4.7842817164179104e-05, + "loss": 0.007, + "step": 18500 + }, + { + "epoch": 17.26, + "eval_exact_match": 0.7292069632495164, + "eval_exec": 0.7562862669245648, + "eval_loss": 0.35152459144592285, + "eval_runtime": 1127.9193, + "eval_samples_per_second": 0.917, + "step": 18500 + }, + { + "epoch": 17.26, + "learning_rate": 4.784235074626866e-05, + "loss": 0.0043, + "step": 18504 + }, + { + "epoch": 17.26, + "learning_rate": 4.7841884328358214e-05, + "loss": 0.0025, + "step": 18508 + }, + { + "epoch": 17.27, + "learning_rate": 4.784141791044776e-05, + "loss": 0.0137, + "step": 18512 + }, + { + "epoch": 17.27, + "learning_rate": 4.784095149253732e-05, + "loss": 0.002, + "step": 18516 + }, + { + "epoch": 17.28, + "learning_rate": 4.7840485074626865e-05, + "loss": 0.0014, + "step": 18520 + }, + { + "epoch": 17.28, + "learning_rate": 4.784001865671642e-05, + "loss": 0.0019, + "step": 18524 + }, + { + "epoch": 17.28, + "learning_rate": 4.7839552238805975e-05, + "loss": 0.0002, + "step": 18528 + }, + { + "epoch": 17.29, + "learning_rate": 4.783908582089552e-05, + "loss": 0.0006, + "step": 18532 + }, + { + "epoch": 17.29, + "learning_rate": 4.783861940298507e-05, + "loss": 0.0048, + "step": 18536 + }, + { + "epoch": 17.29, + "learning_rate": 4.783815298507463e-05, + "loss": 0.0017, + "step": 18540 + }, + { + "epoch": 17.3, + "learning_rate": 4.783768656716418e-05, + "loss": 0.0053, + "step": 18544 + }, + { + "epoch": 17.3, + "learning_rate": 4.7837220149253736e-05, + "loss": 0.0008, + "step": 18548 + }, + { + "epoch": 17.31, + "learning_rate": 4.7836753731343284e-05, + "loss": 0.0023, + "step": 18552 + }, + { + "epoch": 17.31, + "learning_rate": 4.783628731343284e-05, + "loss": 0.0049, + "step": 18556 + }, + { + "epoch": 17.31, + "learning_rate": 4.7835820895522394e-05, + "loss": 0.0028, + "step": 18560 + }, + { + "epoch": 17.32, + "learning_rate": 4.783535447761194e-05, + "loss": 0.0002, + "step": 18564 + }, + { + "epoch": 17.32, + "learning_rate": 4.78348880597015e-05, + "loss": 0.001, + "step": 18568 + }, + { + "epoch": 17.32, + "learning_rate": 4.783442164179105e-05, + "loss": 0.0032, + "step": 18572 + }, + { + "epoch": 17.33, + "learning_rate": 4.78339552238806e-05, + "loss": 0.0009, + "step": 18576 + }, + { + "epoch": 17.33, + "learning_rate": 4.783348880597015e-05, + "loss": 0.0019, + "step": 18580 + }, + { + "epoch": 17.34, + "learning_rate": 4.783302238805971e-05, + "loss": 0.0008, + "step": 18584 + }, + { + "epoch": 17.34, + "learning_rate": 4.783255597014926e-05, + "loss": 0.0029, + "step": 18588 + }, + { + "epoch": 17.34, + "learning_rate": 4.7832089552238806e-05, + "loss": 0.0024, + "step": 18592 + }, + { + "epoch": 17.35, + "learning_rate": 4.783162313432836e-05, + "loss": 0.0064, + "step": 18596 + }, + { + "epoch": 17.35, + "learning_rate": 4.7831156716417916e-05, + "loss": 0.0005, + "step": 18600 + }, + { + "epoch": 17.35, + "learning_rate": 4.7830690298507464e-05, + "loss": 0.0026, + "step": 18604 + }, + { + "epoch": 17.36, + "learning_rate": 4.783022388059702e-05, + "loss": 0.0002, + "step": 18608 + }, + { + "epoch": 17.36, + "learning_rate": 4.782975746268657e-05, + "loss": 0.0007, + "step": 18612 + }, + { + "epoch": 17.37, + "learning_rate": 4.782929104477612e-05, + "loss": 0.0005, + "step": 18616 + }, + { + "epoch": 17.37, + "learning_rate": 4.7828824626865677e-05, + "loss": 0.0015, + "step": 18620 + }, + { + "epoch": 17.37, + "learning_rate": 4.7828358208955225e-05, + "loss": 0.002, + "step": 18624 + }, + { + "epoch": 17.38, + "learning_rate": 4.782789179104478e-05, + "loss": 0.0133, + "step": 18628 + }, + { + "epoch": 17.38, + "learning_rate": 4.7827425373134334e-05, + "loss": 0.0004, + "step": 18632 + }, + { + "epoch": 17.38, + "learning_rate": 4.782695895522388e-05, + "loss": 0.0002, + "step": 18636 + }, + { + "epoch": 17.39, + "learning_rate": 4.782649253731343e-05, + "loss": 0.0052, + "step": 18640 + }, + { + "epoch": 17.39, + "learning_rate": 4.782602611940299e-05, + "loss": 0.0012, + "step": 18644 + }, + { + "epoch": 17.4, + "learning_rate": 4.782555970149254e-05, + "loss": 0.0045, + "step": 18648 + }, + { + "epoch": 17.4, + "learning_rate": 4.782509328358209e-05, + "loss": 0.0024, + "step": 18652 + }, + { + "epoch": 17.4, + "learning_rate": 4.7824626865671644e-05, + "loss": 0.0025, + "step": 18656 + }, + { + "epoch": 17.41, + "learning_rate": 4.78241604477612e-05, + "loss": 0.004, + "step": 18660 + }, + { + "epoch": 17.41, + "learning_rate": 4.782369402985075e-05, + "loss": 0.0038, + "step": 18664 + }, + { + "epoch": 17.41, + "learning_rate": 4.78232276119403e-05, + "loss": 0.0013, + "step": 18668 + }, + { + "epoch": 17.42, + "learning_rate": 4.782276119402985e-05, + "loss": 0.002, + "step": 18672 + }, + { + "epoch": 17.42, + "learning_rate": 4.7822294776119405e-05, + "loss": 0.0007, + "step": 18676 + }, + { + "epoch": 17.43, + "learning_rate": 4.782182835820896e-05, + "loss": 0.0028, + "step": 18680 + }, + { + "epoch": 17.43, + "learning_rate": 4.782136194029851e-05, + "loss": 0.0001, + "step": 18684 + }, + { + "epoch": 17.43, + "learning_rate": 4.782089552238806e-05, + "loss": 0.0058, + "step": 18688 + }, + { + "epoch": 17.44, + "learning_rate": 4.782042910447762e-05, + "loss": 0.0038, + "step": 18692 + }, + { + "epoch": 17.44, + "learning_rate": 4.7819962686567166e-05, + "loss": 0.0014, + "step": 18696 + }, + { + "epoch": 17.44, + "learning_rate": 4.7819496268656714e-05, + "loss": 0.0005, + "step": 18700 + }, + { + "epoch": 17.45, + "learning_rate": 4.781902985074627e-05, + "loss": 0.0137, + "step": 18704 + }, + { + "epoch": 17.45, + "learning_rate": 4.7818563432835824e-05, + "loss": 0.0038, + "step": 18708 + }, + { + "epoch": 17.46, + "learning_rate": 4.781809701492538e-05, + "loss": 0.0009, + "step": 18712 + }, + { + "epoch": 17.46, + "learning_rate": 4.7817630597014927e-05, + "loss": 0.0006, + "step": 18716 + }, + { + "epoch": 17.46, + "learning_rate": 4.781716417910448e-05, + "loss": 0.0013, + "step": 18720 + }, + { + "epoch": 17.47, + "learning_rate": 4.7816697761194036e-05, + "loss": 0.0049, + "step": 18724 + }, + { + "epoch": 17.47, + "learning_rate": 4.7816231343283585e-05, + "loss": 0.0006, + "step": 18728 + }, + { + "epoch": 17.47, + "learning_rate": 4.781576492537313e-05, + "loss": 0.004, + "step": 18732 + }, + { + "epoch": 17.48, + "learning_rate": 4.7815298507462694e-05, + "loss": 0.0003, + "step": 18736 + }, + { + "epoch": 17.48, + "learning_rate": 4.781483208955224e-05, + "loss": 0.0011, + "step": 18740 + }, + { + "epoch": 17.49, + "learning_rate": 4.781436567164179e-05, + "loss": 0.0006, + "step": 18744 + }, + { + "epoch": 17.49, + "learning_rate": 4.7813899253731345e-05, + "loss": 0.0025, + "step": 18748 + }, + { + "epoch": 17.49, + "learning_rate": 4.78134328358209e-05, + "loss": 0.0009, + "step": 18752 + }, + { + "epoch": 17.5, + "learning_rate": 4.781296641791045e-05, + "loss": 0.0027, + "step": 18756 + }, + { + "epoch": 17.5, + "learning_rate": 4.7812500000000003e-05, + "loss": 0.0025, + "step": 18760 + }, + { + "epoch": 17.5, + "learning_rate": 4.781203358208955e-05, + "loss": 0.0008, + "step": 18764 + }, + { + "epoch": 17.51, + "learning_rate": 4.7811567164179106e-05, + "loss": 0.0051, + "step": 18768 + }, + { + "epoch": 17.51, + "learning_rate": 4.781110074626866e-05, + "loss": 0.0012, + "step": 18772 + }, + { + "epoch": 17.51, + "learning_rate": 4.781063432835821e-05, + "loss": 0.0003, + "step": 18776 + }, + { + "epoch": 17.52, + "learning_rate": 4.7810167910447764e-05, + "loss": 0.0024, + "step": 18780 + }, + { + "epoch": 17.52, + "learning_rate": 4.780970149253732e-05, + "loss": 0.0002, + "step": 18784 + }, + { + "epoch": 17.53, + "learning_rate": 4.780923507462687e-05, + "loss": 0.0074, + "step": 18788 + }, + { + "epoch": 17.53, + "learning_rate": 4.7808768656716416e-05, + "loss": 0.0023, + "step": 18792 + }, + { + "epoch": 17.53, + "learning_rate": 4.780830223880598e-05, + "loss": 0.0009, + "step": 18796 + }, + { + "epoch": 17.54, + "learning_rate": 4.7807835820895525e-05, + "loss": 0.0157, + "step": 18800 + }, + { + "epoch": 17.54, + "learning_rate": 4.7807369402985074e-05, + "loss": 0.0032, + "step": 18804 + }, + { + "epoch": 17.54, + "learning_rate": 4.780690298507463e-05, + "loss": 0.0005, + "step": 18808 + }, + { + "epoch": 17.55, + "learning_rate": 4.780643656716418e-05, + "loss": 0.0012, + "step": 18812 + }, + { + "epoch": 17.55, + "learning_rate": 4.780597014925373e-05, + "loss": 0.0014, + "step": 18816 + }, + { + "epoch": 17.56, + "learning_rate": 4.7805503731343286e-05, + "loss": 0.0055, + "step": 18820 + }, + { + "epoch": 17.56, + "learning_rate": 4.7805037313432835e-05, + "loss": 0.0026, + "step": 18824 + }, + { + "epoch": 17.56, + "learning_rate": 4.780457089552239e-05, + "loss": 0.0003, + "step": 18828 + }, + { + "epoch": 17.57, + "learning_rate": 4.7804104477611944e-05, + "loss": 0.0013, + "step": 18832 + }, + { + "epoch": 17.57, + "learning_rate": 4.780363805970149e-05, + "loss": 0.0062, + "step": 18836 + }, + { + "epoch": 17.57, + "learning_rate": 4.780317164179105e-05, + "loss": 0.0007, + "step": 18840 + }, + { + "epoch": 17.58, + "learning_rate": 4.78027052238806e-05, + "loss": 0.001, + "step": 18844 + }, + { + "epoch": 17.58, + "learning_rate": 4.780223880597015e-05, + "loss": 0.0005, + "step": 18848 + }, + { + "epoch": 17.59, + "learning_rate": 4.78017723880597e-05, + "loss": 0.0013, + "step": 18852 + }, + { + "epoch": 17.59, + "learning_rate": 4.780130597014926e-05, + "loss": 0.0033, + "step": 18856 + }, + { + "epoch": 17.59, + "learning_rate": 4.780083955223881e-05, + "loss": 0.0028, + "step": 18860 + }, + { + "epoch": 17.6, + "learning_rate": 4.7800373134328356e-05, + "loss": 0.0045, + "step": 18864 + }, + { + "epoch": 17.6, + "learning_rate": 4.779990671641791e-05, + "loss": 0.001, + "step": 18868 + }, + { + "epoch": 17.6, + "learning_rate": 4.7799440298507466e-05, + "loss": 0.0095, + "step": 18872 + }, + { + "epoch": 17.61, + "learning_rate": 4.779897388059702e-05, + "loss": 0.0001, + "step": 18876 + }, + { + "epoch": 17.61, + "learning_rate": 4.779850746268657e-05, + "loss": 0.0029, + "step": 18880 + }, + { + "epoch": 17.62, + "learning_rate": 4.779804104477612e-05, + "loss": 0.0007, + "step": 18884 + }, + { + "epoch": 17.62, + "learning_rate": 4.779757462686568e-05, + "loss": 0.0042, + "step": 18888 + }, + { + "epoch": 17.62, + "learning_rate": 4.779710820895523e-05, + "loss": 0.0029, + "step": 18892 + }, + { + "epoch": 17.63, + "learning_rate": 4.7796641791044775e-05, + "loss": 0.0007, + "step": 18896 + }, + { + "epoch": 17.63, + "learning_rate": 4.779617537313433e-05, + "loss": 0.001, + "step": 18900 + }, + { + "epoch": 17.63, + "learning_rate": 4.7795708955223885e-05, + "loss": 0.003, + "step": 18904 + }, + { + "epoch": 17.64, + "learning_rate": 4.779524253731343e-05, + "loss": 0.0074, + "step": 18908 + }, + { + "epoch": 17.64, + "learning_rate": 4.779477611940299e-05, + "loss": 0.0013, + "step": 18912 + }, + { + "epoch": 17.65, + "learning_rate": 4.779430970149254e-05, + "loss": 0.0011, + "step": 18916 + }, + { + "epoch": 17.65, + "learning_rate": 4.779384328358209e-05, + "loss": 0.004, + "step": 18920 + }, + { + "epoch": 17.65, + "learning_rate": 4.7793376865671646e-05, + "loss": 0.0014, + "step": 18924 + }, + { + "epoch": 17.66, + "learning_rate": 4.7792910447761194e-05, + "loss": 0.0015, + "step": 18928 + }, + { + "epoch": 17.66, + "learning_rate": 4.779244402985075e-05, + "loss": 0.0012, + "step": 18932 + }, + { + "epoch": 17.66, + "learning_rate": 4.7791977611940304e-05, + "loss": 0.0003, + "step": 18936 + }, + { + "epoch": 17.67, + "learning_rate": 4.779151119402985e-05, + "loss": 0.0001, + "step": 18940 + }, + { + "epoch": 17.67, + "learning_rate": 4.77910447761194e-05, + "loss": 0.0025, + "step": 18944 + }, + { + "epoch": 17.68, + "learning_rate": 4.779057835820896e-05, + "loss": 0.0009, + "step": 18948 + }, + { + "epoch": 17.68, + "learning_rate": 4.779011194029851e-05, + "loss": 0.0003, + "step": 18952 + }, + { + "epoch": 17.68, + "learning_rate": 4.778964552238806e-05, + "loss": 0.0022, + "step": 18956 + }, + { + "epoch": 17.69, + "learning_rate": 4.778917910447761e-05, + "loss": 0.0035, + "step": 18960 + }, + { + "epoch": 17.69, + "learning_rate": 4.778871268656717e-05, + "loss": 0.0004, + "step": 18964 + }, + { + "epoch": 17.69, + "learning_rate": 4.7788246268656716e-05, + "loss": 0.0001, + "step": 18968 + }, + { + "epoch": 17.7, + "learning_rate": 4.778777985074627e-05, + "loss": 0.0002, + "step": 18972 + }, + { + "epoch": 17.7, + "learning_rate": 4.7787313432835826e-05, + "loss": 0.0026, + "step": 18976 + }, + { + "epoch": 17.71, + "learning_rate": 4.7786847014925374e-05, + "loss": 0.0026, + "step": 18980 + }, + { + "epoch": 17.71, + "learning_rate": 4.778638059701493e-05, + "loss": 0.0017, + "step": 18984 + }, + { + "epoch": 17.71, + "learning_rate": 4.778591417910448e-05, + "loss": 0.0053, + "step": 18988 + }, + { + "epoch": 17.72, + "learning_rate": 4.778544776119403e-05, + "loss": 0.0042, + "step": 18992 + }, + { + "epoch": 17.72, + "learning_rate": 4.778498134328359e-05, + "loss": 0.0018, + "step": 18996 + }, + { + "epoch": 17.72, + "learning_rate": 4.7784514925373135e-05, + "loss": 0.0001, + "step": 19000 + }, + { + "epoch": 17.72, + "eval_exact_match": 0.7282398452611218, + "eval_exec": 0.7601547388781431, + "eval_loss": 0.3401549458503723, + "eval_runtime": 1055.5133, + "eval_samples_per_second": 0.98, + "step": 19000 + }, + { + "epoch": 17.73, + "learning_rate": 4.7784048507462683e-05, + "loss": 0.0016, + "step": 19004 + }, + { + "epoch": 17.73, + "learning_rate": 4.7783582089552245e-05, + "loss": 0.0027, + "step": 19008 + }, + { + "epoch": 17.73, + "learning_rate": 4.778311567164179e-05, + "loss": 0.0041, + "step": 19012 + }, + { + "epoch": 17.74, + "learning_rate": 4.778264925373134e-05, + "loss": 0.0009, + "step": 19016 + }, + { + "epoch": 17.74, + "learning_rate": 4.7782182835820896e-05, + "loss": 0.0073, + "step": 19020 + }, + { + "epoch": 17.75, + "learning_rate": 4.778171641791045e-05, + "loss": 0.0015, + "step": 19024 + }, + { + "epoch": 17.75, + "learning_rate": 4.778125e-05, + "loss": 0.0036, + "step": 19028 + }, + { + "epoch": 17.75, + "learning_rate": 4.7780783582089554e-05, + "loss": 0.0074, + "step": 19032 + }, + { + "epoch": 17.76, + "learning_rate": 4.778031716417911e-05, + "loss": 0.0003, + "step": 19036 + }, + { + "epoch": 17.76, + "learning_rate": 4.7779850746268664e-05, + "loss": 0.0006, + "step": 19040 + }, + { + "epoch": 17.76, + "learning_rate": 4.777938432835821e-05, + "loss": 0.001, + "step": 19044 + }, + { + "epoch": 17.77, + "learning_rate": 4.777891791044776e-05, + "loss": 0.0008, + "step": 19048 + }, + { + "epoch": 17.77, + "learning_rate": 4.7778451492537315e-05, + "loss": 0.0005, + "step": 19052 + }, + { + "epoch": 17.78, + "learning_rate": 4.777798507462687e-05, + "loss": 0.0007, + "step": 19056 + }, + { + "epoch": 17.78, + "learning_rate": 4.777751865671642e-05, + "loss": 0.0002, + "step": 19060 + }, + { + "epoch": 17.78, + "learning_rate": 4.777705223880597e-05, + "loss": 0.0003, + "step": 19064 + }, + { + "epoch": 17.79, + "learning_rate": 4.777658582089553e-05, + "loss": 0.001, + "step": 19068 + }, + { + "epoch": 17.79, + "learning_rate": 4.7776119402985076e-05, + "loss": 0.0006, + "step": 19072 + }, + { + "epoch": 17.79, + "learning_rate": 4.777565298507463e-05, + "loss": 0.0043, + "step": 19076 + }, + { + "epoch": 17.8, + "learning_rate": 4.777518656716418e-05, + "loss": 0.002, + "step": 19080 + }, + { + "epoch": 17.8, + "learning_rate": 4.7774720149253734e-05, + "loss": 0.0013, + "step": 19084 + }, + { + "epoch": 17.81, + "learning_rate": 4.777425373134329e-05, + "loss": 0.0001, + "step": 19088 + }, + { + "epoch": 17.81, + "learning_rate": 4.777378731343284e-05, + "loss": 0.004, + "step": 19092 + }, + { + "epoch": 17.81, + "learning_rate": 4.777332089552239e-05, + "loss": 0.0023, + "step": 19096 + }, + { + "epoch": 17.82, + "learning_rate": 4.777285447761195e-05, + "loss": 0.0003, + "step": 19100 + }, + { + "epoch": 17.82, + "learning_rate": 4.7772388059701495e-05, + "loss": 0.001, + "step": 19104 + }, + { + "epoch": 17.82, + "learning_rate": 4.777192164179104e-05, + "loss": 0.0035, + "step": 19108 + }, + { + "epoch": 17.83, + "learning_rate": 4.77714552238806e-05, + "loss": 0.0021, + "step": 19112 + }, + { + "epoch": 17.83, + "learning_rate": 4.777098880597015e-05, + "loss": 0.0005, + "step": 19116 + }, + { + "epoch": 17.84, + "learning_rate": 4.77705223880597e-05, + "loss": 0.0005, + "step": 19120 + }, + { + "epoch": 17.84, + "learning_rate": 4.7770055970149256e-05, + "loss": 0.0004, + "step": 19124 + }, + { + "epoch": 17.84, + "learning_rate": 4.776958955223881e-05, + "loss": 0.0001, + "step": 19128 + }, + { + "epoch": 17.85, + "learning_rate": 4.776912313432836e-05, + "loss": 0.0002, + "step": 19132 + }, + { + "epoch": 17.85, + "learning_rate": 4.7768656716417914e-05, + "loss": 0.0117, + "step": 19136 + }, + { + "epoch": 17.85, + "learning_rate": 4.776819029850746e-05, + "loss": 0.0008, + "step": 19140 + }, + { + "epoch": 17.86, + "learning_rate": 4.776772388059702e-05, + "loss": 0.0037, + "step": 19144 + }, + { + "epoch": 17.86, + "learning_rate": 4.776725746268657e-05, + "loss": 0.0002, + "step": 19148 + }, + { + "epoch": 17.87, + "learning_rate": 4.776679104477612e-05, + "loss": 0.0005, + "step": 19152 + }, + { + "epoch": 17.87, + "learning_rate": 4.7766324626865675e-05, + "loss": 0.0026, + "step": 19156 + }, + { + "epoch": 17.87, + "learning_rate": 4.776585820895523e-05, + "loss": 0.0021, + "step": 19160 + }, + { + "epoch": 17.88, + "learning_rate": 4.776539179104478e-05, + "loss": 0.0062, + "step": 19164 + }, + { + "epoch": 17.88, + "learning_rate": 4.7764925373134326e-05, + "loss": 0.002, + "step": 19168 + }, + { + "epoch": 17.88, + "learning_rate": 4.776445895522388e-05, + "loss": 0.0011, + "step": 19172 + }, + { + "epoch": 17.89, + "learning_rate": 4.7763992537313436e-05, + "loss": 0.0026, + "step": 19176 + }, + { + "epoch": 17.89, + "learning_rate": 4.7763526119402984e-05, + "loss": 0.0024, + "step": 19180 + }, + { + "epoch": 17.9, + "learning_rate": 4.776305970149254e-05, + "loss": 0.0022, + "step": 19184 + }, + { + "epoch": 17.9, + "learning_rate": 4.7762593283582094e-05, + "loss": 0.0018, + "step": 19188 + }, + { + "epoch": 17.9, + "learning_rate": 4.776212686567164e-05, + "loss": 0.0019, + "step": 19192 + }, + { + "epoch": 17.91, + "learning_rate": 4.77616604477612e-05, + "loss": 0.0001, + "step": 19196 + }, + { + "epoch": 17.91, + "learning_rate": 4.7761194029850745e-05, + "loss": 0.0007, + "step": 19200 + }, + { + "epoch": 17.91, + "learning_rate": 4.776072761194031e-05, + "loss": 0.0002, + "step": 19204 + }, + { + "epoch": 17.92, + "learning_rate": 4.7760261194029855e-05, + "loss": 0.0001, + "step": 19208 + }, + { + "epoch": 17.92, + "learning_rate": 4.77597947761194e-05, + "loss": 0.0004, + "step": 19212 + }, + { + "epoch": 17.93, + "learning_rate": 4.775932835820896e-05, + "loss": 0.0008, + "step": 19216 + }, + { + "epoch": 17.93, + "learning_rate": 4.775886194029851e-05, + "loss": 0.001, + "step": 19220 + }, + { + "epoch": 17.93, + "learning_rate": 4.775839552238806e-05, + "loss": 0.001, + "step": 19224 + }, + { + "epoch": 17.94, + "learning_rate": 4.7757929104477616e-05, + "loss": 0.0003, + "step": 19228 + }, + { + "epoch": 17.94, + "learning_rate": 4.7757462686567164e-05, + "loss": 0.0016, + "step": 19232 + }, + { + "epoch": 17.94, + "learning_rate": 4.775699626865672e-05, + "loss": 0.002, + "step": 19236 + }, + { + "epoch": 17.95, + "learning_rate": 4.7756529850746274e-05, + "loss": 0.0033, + "step": 19240 + }, + { + "epoch": 17.95, + "learning_rate": 4.775606343283582e-05, + "loss": 0.0007, + "step": 19244 + }, + { + "epoch": 17.96, + "learning_rate": 4.775559701492538e-05, + "loss": 0.0015, + "step": 19248 + }, + { + "epoch": 17.96, + "learning_rate": 4.775513059701493e-05, + "loss": 0.0029, + "step": 19252 + }, + { + "epoch": 17.96, + "learning_rate": 4.775466417910448e-05, + "loss": 0.0053, + "step": 19256 + }, + { + "epoch": 17.97, + "learning_rate": 4.775419776119403e-05, + "loss": 0.0033, + "step": 19260 + }, + { + "epoch": 17.97, + "learning_rate": 4.775373134328359e-05, + "loss": 0.0094, + "step": 19264 + }, + { + "epoch": 17.97, + "learning_rate": 4.775326492537314e-05, + "loss": 0.0032, + "step": 19268 + }, + { + "epoch": 17.98, + "learning_rate": 4.7752798507462686e-05, + "loss": 0.0007, + "step": 19272 + }, + { + "epoch": 17.98, + "learning_rate": 4.775233208955224e-05, + "loss": 0.0167, + "step": 19276 + }, + { + "epoch": 17.98, + "learning_rate": 4.7751865671641796e-05, + "loss": 0.0062, + "step": 19280 + }, + { + "epoch": 17.99, + "learning_rate": 4.7751399253731344e-05, + "loss": 0.005, + "step": 19284 + }, + { + "epoch": 17.99, + "learning_rate": 4.77509328358209e-05, + "loss": 0.0014, + "step": 19288 + }, + { + "epoch": 18.0, + "learning_rate": 4.775046641791045e-05, + "loss": 0.0047, + "step": 19292 + }, + { + "epoch": 18.0, + "learning_rate": 4.775e-05, + "loss": 0.0049, + "step": 19296 + }, + { + "epoch": 18.0, + "learning_rate": 4.774953358208956e-05, + "loss": 0.0003, + "step": 19300 + }, + { + "epoch": 18.01, + "learning_rate": 4.7749067164179105e-05, + "loss": 0.0048, + "step": 19304 + }, + { + "epoch": 18.01, + "learning_rate": 4.774860074626866e-05, + "loss": 0.001, + "step": 19308 + }, + { + "epoch": 18.01, + "learning_rate": 4.7748134328358215e-05, + "loss": 0.0018, + "step": 19312 + }, + { + "epoch": 18.02, + "learning_rate": 4.774766791044776e-05, + "loss": 0.0004, + "step": 19316 + }, + { + "epoch": 18.02, + "learning_rate": 4.774720149253731e-05, + "loss": 0.0014, + "step": 19320 + }, + { + "epoch": 18.03, + "learning_rate": 4.774673507462687e-05, + "loss": 0.0046, + "step": 19324 + }, + { + "epoch": 18.03, + "learning_rate": 4.774626865671642e-05, + "loss": 0.0002, + "step": 19328 + }, + { + "epoch": 18.03, + "learning_rate": 4.774580223880597e-05, + "loss": 0.0082, + "step": 19332 + }, + { + "epoch": 18.04, + "learning_rate": 4.7745335820895524e-05, + "loss": 0.0046, + "step": 19336 + }, + { + "epoch": 18.04, + "learning_rate": 4.774486940298508e-05, + "loss": 0.0008, + "step": 19340 + }, + { + "epoch": 18.04, + "learning_rate": 4.774440298507463e-05, + "loss": 0.0003, + "step": 19344 + }, + { + "epoch": 18.05, + "learning_rate": 4.774393656716418e-05, + "loss": 0.001, + "step": 19348 + }, + { + "epoch": 18.05, + "learning_rate": 4.774347014925373e-05, + "loss": 0.002, + "step": 19352 + }, + { + "epoch": 18.06, + "learning_rate": 4.7743003731343285e-05, + "loss": 0.0033, + "step": 19356 + }, + { + "epoch": 18.06, + "learning_rate": 4.774253731343284e-05, + "loss": 0.0005, + "step": 19360 + }, + { + "epoch": 18.06, + "learning_rate": 4.774207089552239e-05, + "loss": 0.0007, + "step": 19364 + }, + { + "epoch": 18.07, + "learning_rate": 4.774160447761194e-05, + "loss": 0.0035, + "step": 19368 + }, + { + "epoch": 18.07, + "learning_rate": 4.77411380597015e-05, + "loss": 0.0001, + "step": 19372 + }, + { + "epoch": 18.07, + "learning_rate": 4.7740671641791046e-05, + "loss": 0.0002, + "step": 19376 + }, + { + "epoch": 18.08, + "learning_rate": 4.77402052238806e-05, + "loss": 0.0058, + "step": 19380 + }, + { + "epoch": 18.08, + "learning_rate": 4.773973880597015e-05, + "loss": 0.0005, + "step": 19384 + }, + { + "epoch": 18.09, + "learning_rate": 4.7739272388059704e-05, + "loss": 0.002, + "step": 19388 + }, + { + "epoch": 18.09, + "learning_rate": 4.773880597014926e-05, + "loss": 0.0095, + "step": 19392 + }, + { + "epoch": 18.09, + "learning_rate": 4.773833955223881e-05, + "loss": 0.0022, + "step": 19396 + }, + { + "epoch": 18.1, + "learning_rate": 4.773787313432836e-05, + "loss": 0.0041, + "step": 19400 + }, + { + "epoch": 18.1, + "learning_rate": 4.773740671641792e-05, + "loss": 0.0015, + "step": 19404 + }, + { + "epoch": 18.1, + "learning_rate": 4.7736940298507465e-05, + "loss": 0.0038, + "step": 19408 + }, + { + "epoch": 18.11, + "learning_rate": 4.773647388059701e-05, + "loss": 0.0002, + "step": 19412 + }, + { + "epoch": 18.11, + "learning_rate": 4.7736007462686575e-05, + "loss": 0.0033, + "step": 19416 + }, + { + "epoch": 18.12, + "learning_rate": 4.773554104477612e-05, + "loss": 0.0003, + "step": 19420 + }, + { + "epoch": 18.12, + "learning_rate": 4.773507462686567e-05, + "loss": 0.0005, + "step": 19424 + }, + { + "epoch": 18.12, + "learning_rate": 4.7734608208955226e-05, + "loss": 0.0003, + "step": 19428 + }, + { + "epoch": 18.13, + "learning_rate": 4.773414179104478e-05, + "loss": 0.0009, + "step": 19432 + }, + { + "epoch": 18.13, + "learning_rate": 4.773367537313433e-05, + "loss": 0.0189, + "step": 19436 + }, + { + "epoch": 18.13, + "learning_rate": 4.7733208955223884e-05, + "loss": 0.0036, + "step": 19440 + }, + { + "epoch": 18.14, + "learning_rate": 4.773274253731343e-05, + "loss": 0.0008, + "step": 19444 + }, + { + "epoch": 18.14, + "learning_rate": 4.773227611940299e-05, + "loss": 0.0014, + "step": 19448 + }, + { + "epoch": 18.15, + "learning_rate": 4.773180970149254e-05, + "loss": 0.001, + "step": 19452 + }, + { + "epoch": 18.15, + "learning_rate": 4.773134328358209e-05, + "loss": 0.0032, + "step": 19456 + }, + { + "epoch": 18.15, + "learning_rate": 4.7730876865671645e-05, + "loss": 0.0025, + "step": 19460 + }, + { + "epoch": 18.16, + "learning_rate": 4.77304104477612e-05, + "loss": 0.0018, + "step": 19464 + }, + { + "epoch": 18.16, + "learning_rate": 4.772994402985075e-05, + "loss": 0.0002, + "step": 19468 + }, + { + "epoch": 18.16, + "learning_rate": 4.7729477611940296e-05, + "loss": 0.0008, + "step": 19472 + }, + { + "epoch": 18.17, + "learning_rate": 4.772901119402986e-05, + "loss": 0.001, + "step": 19476 + }, + { + "epoch": 18.17, + "learning_rate": 4.7728544776119406e-05, + "loss": 0.0002, + "step": 19480 + }, + { + "epoch": 18.18, + "learning_rate": 4.7728078358208954e-05, + "loss": 0.0, + "step": 19484 + }, + { + "epoch": 18.18, + "learning_rate": 4.772761194029851e-05, + "loss": 0.0005, + "step": 19488 + }, + { + "epoch": 18.18, + "learning_rate": 4.7727145522388064e-05, + "loss": 0.0026, + "step": 19492 + }, + { + "epoch": 18.19, + "learning_rate": 4.772667910447761e-05, + "loss": 0.001, + "step": 19496 + }, + { + "epoch": 18.19, + "learning_rate": 4.772621268656717e-05, + "loss": 0.0037, + "step": 19500 + }, + { + "epoch": 18.19, + "eval_exact_match": 0.7292069632495164, + "eval_exec": 0.7562862669245648, + "eval_loss": 0.34727153182029724, + "eval_runtime": 1047.0187, + "eval_samples_per_second": 0.988, + "step": 19500 + }, + { + "epoch": 18.19, + "learning_rate": 4.7725746268656715e-05, + "loss": 0.0025, + "step": 19504 + }, + { + "epoch": 18.2, + "learning_rate": 4.772527985074627e-05, + "loss": 0.0047, + "step": 19508 + }, + { + "epoch": 18.2, + "learning_rate": 4.7724813432835825e-05, + "loss": 0.0004, + "step": 19512 + }, + { + "epoch": 18.21, + "learning_rate": 4.772434701492537e-05, + "loss": 0.0015, + "step": 19516 + }, + { + "epoch": 18.21, + "learning_rate": 4.772388059701493e-05, + "loss": 0.0003, + "step": 19520 + }, + { + "epoch": 18.21, + "learning_rate": 4.772341417910448e-05, + "loss": 0.0003, + "step": 19524 + }, + { + "epoch": 18.22, + "learning_rate": 4.772294776119403e-05, + "loss": 0.001, + "step": 19528 + }, + { + "epoch": 18.22, + "learning_rate": 4.7722481343283586e-05, + "loss": 0.003, + "step": 19532 + }, + { + "epoch": 18.22, + "learning_rate": 4.772201492537314e-05, + "loss": 0.0019, + "step": 19536 + }, + { + "epoch": 18.23, + "learning_rate": 4.772154850746269e-05, + "loss": 0.0001, + "step": 19540 + }, + { + "epoch": 18.23, + "learning_rate": 4.7721082089552244e-05, + "loss": 0.0022, + "step": 19544 + }, + { + "epoch": 18.24, + "learning_rate": 4.772061567164179e-05, + "loss": 0.0008, + "step": 19548 + }, + { + "epoch": 18.24, + "learning_rate": 4.7720149253731347e-05, + "loss": 0.001, + "step": 19552 + }, + { + "epoch": 18.24, + "learning_rate": 4.77196828358209e-05, + "loss": 0.0003, + "step": 19556 + }, + { + "epoch": 18.25, + "learning_rate": 4.771921641791045e-05, + "loss": 0.0001, + "step": 19560 + }, + { + "epoch": 18.25, + "learning_rate": 4.771875e-05, + "loss": 0.0001, + "step": 19564 + }, + { + "epoch": 18.25, + "learning_rate": 4.771828358208956e-05, + "loss": 0.0018, + "step": 19568 + }, + { + "epoch": 18.26, + "learning_rate": 4.771781716417911e-05, + "loss": 0.0018, + "step": 19572 + }, + { + "epoch": 18.26, + "learning_rate": 4.7717350746268656e-05, + "loss": 0.001, + "step": 19576 + }, + { + "epoch": 18.26, + "learning_rate": 4.771688432835821e-05, + "loss": 0.0007, + "step": 19580 + }, + { + "epoch": 18.27, + "learning_rate": 4.7716417910447766e-05, + "loss": 0.0019, + "step": 19584 + }, + { + "epoch": 18.27, + "learning_rate": 4.7715951492537314e-05, + "loss": 0.0018, + "step": 19588 + }, + { + "epoch": 18.28, + "learning_rate": 4.771548507462687e-05, + "loss": 0.0076, + "step": 19592 + }, + { + "epoch": 18.28, + "learning_rate": 4.7715018656716423e-05, + "loss": 0.0002, + "step": 19596 + }, + { + "epoch": 18.28, + "learning_rate": 4.771455223880597e-05, + "loss": 0.0102, + "step": 19600 + }, + { + "epoch": 18.29, + "learning_rate": 4.7714085820895527e-05, + "loss": 0.0018, + "step": 19604 + }, + { + "epoch": 18.29, + "learning_rate": 4.7713619402985075e-05, + "loss": 0.0008, + "step": 19608 + }, + { + "epoch": 18.29, + "learning_rate": 4.771315298507463e-05, + "loss": 0.0012, + "step": 19612 + }, + { + "epoch": 18.3, + "learning_rate": 4.7712686567164184e-05, + "loss": 0.002, + "step": 19616 + }, + { + "epoch": 18.3, + "learning_rate": 4.771222014925373e-05, + "loss": 0.0046, + "step": 19620 + }, + { + "epoch": 18.31, + "learning_rate": 4.771175373134328e-05, + "loss": 0.0061, + "step": 19624 + }, + { + "epoch": 18.31, + "learning_rate": 4.771128731343284e-05, + "loss": 0.001, + "step": 19628 + }, + { + "epoch": 18.31, + "learning_rate": 4.771082089552239e-05, + "loss": 0.002, + "step": 19632 + }, + { + "epoch": 18.32, + "learning_rate": 4.771035447761194e-05, + "loss": 0.0012, + "step": 19636 + }, + { + "epoch": 18.32, + "learning_rate": 4.7709888059701494e-05, + "loss": 0.0, + "step": 19640 + }, + { + "epoch": 18.32, + "learning_rate": 4.770942164179105e-05, + "loss": 0.0019, + "step": 19644 + }, + { + "epoch": 18.33, + "learning_rate": 4.77089552238806e-05, + "loss": 0.0005, + "step": 19648 + }, + { + "epoch": 18.33, + "learning_rate": 4.770848880597015e-05, + "loss": 0.0058, + "step": 19652 + }, + { + "epoch": 18.34, + "learning_rate": 4.7708022388059706e-05, + "loss": 0.0007, + "step": 19656 + }, + { + "epoch": 18.34, + "learning_rate": 4.7707555970149255e-05, + "loss": 0.001, + "step": 19660 + }, + { + "epoch": 18.34, + "learning_rate": 4.770708955223881e-05, + "loss": 0.0023, + "step": 19664 + }, + { + "epoch": 18.35, + "learning_rate": 4.770662313432836e-05, + "loss": 0.0056, + "step": 19668 + }, + { + "epoch": 18.35, + "learning_rate": 4.770615671641791e-05, + "loss": 0.0024, + "step": 19672 + }, + { + "epoch": 18.35, + "learning_rate": 4.770569029850747e-05, + "loss": 0.0051, + "step": 19676 + }, + { + "epoch": 18.36, + "learning_rate": 4.7705223880597016e-05, + "loss": 0.0016, + "step": 19680 + }, + { + "epoch": 18.36, + "learning_rate": 4.7704757462686564e-05, + "loss": 0.0104, + "step": 19684 + }, + { + "epoch": 18.37, + "learning_rate": 4.7704291044776125e-05, + "loss": 0.0003, + "step": 19688 + }, + { + "epoch": 18.37, + "learning_rate": 4.7703824626865673e-05, + "loss": 0.0033, + "step": 19692 + }, + { + "epoch": 18.37, + "learning_rate": 4.770335820895523e-05, + "loss": 0.0005, + "step": 19696 + }, + { + "epoch": 18.38, + "learning_rate": 4.7702891791044777e-05, + "loss": 0.0015, + "step": 19700 + }, + { + "epoch": 18.38, + "learning_rate": 4.770242537313433e-05, + "loss": 0.0002, + "step": 19704 + }, + { + "epoch": 18.38, + "learning_rate": 4.7701958955223886e-05, + "loss": 0.0003, + "step": 19708 + }, + { + "epoch": 18.39, + "learning_rate": 4.7701492537313434e-05, + "loss": 0.0054, + "step": 19712 + }, + { + "epoch": 18.39, + "learning_rate": 4.770102611940299e-05, + "loss": 0.0025, + "step": 19716 + }, + { + "epoch": 18.4, + "learning_rate": 4.7700559701492544e-05, + "loss": 0.0014, + "step": 19720 + }, + { + "epoch": 18.4, + "learning_rate": 4.770009328358209e-05, + "loss": 0.0006, + "step": 19724 + }, + { + "epoch": 18.4, + "learning_rate": 4.769962686567164e-05, + "loss": 0.0008, + "step": 19728 + }, + { + "epoch": 18.41, + "learning_rate": 4.7699160447761195e-05, + "loss": 0.0006, + "step": 19732 + }, + { + "epoch": 18.41, + "learning_rate": 4.769869402985075e-05, + "loss": 0.0002, + "step": 19736 + }, + { + "epoch": 18.41, + "learning_rate": 4.76982276119403e-05, + "loss": 0.0044, + "step": 19740 + }, + { + "epoch": 18.42, + "learning_rate": 4.7697761194029853e-05, + "loss": 0.001, + "step": 19744 + }, + { + "epoch": 18.42, + "learning_rate": 4.769729477611941e-05, + "loss": 0.0011, + "step": 19748 + }, + { + "epoch": 18.43, + "learning_rate": 4.7696828358208956e-05, + "loss": 0.0009, + "step": 19752 + }, + { + "epoch": 18.43, + "learning_rate": 4.769636194029851e-05, + "loss": 0.0032, + "step": 19756 + }, + { + "epoch": 18.43, + "learning_rate": 4.769589552238806e-05, + "loss": 0.001, + "step": 19760 + }, + { + "epoch": 18.44, + "learning_rate": 4.7695429104477614e-05, + "loss": 0.0008, + "step": 19764 + }, + { + "epoch": 18.44, + "learning_rate": 4.769496268656717e-05, + "loss": 0.0022, + "step": 19768 + }, + { + "epoch": 18.44, + "learning_rate": 4.769449626865672e-05, + "loss": 0.0001, + "step": 19772 + }, + { + "epoch": 18.45, + "learning_rate": 4.769402985074627e-05, + "loss": 0.0003, + "step": 19776 + }, + { + "epoch": 18.45, + "learning_rate": 4.769356343283583e-05, + "loss": 0.0043, + "step": 19780 + }, + { + "epoch": 18.46, + "learning_rate": 4.7693097014925375e-05, + "loss": 0.0003, + "step": 19784 + }, + { + "epoch": 18.46, + "learning_rate": 4.7692630597014924e-05, + "loss": 0.0007, + "step": 19788 + }, + { + "epoch": 18.46, + "learning_rate": 4.769216417910448e-05, + "loss": 0.0013, + "step": 19792 + }, + { + "epoch": 18.47, + "learning_rate": 4.769169776119403e-05, + "loss": 0.0001, + "step": 19796 + }, + { + "epoch": 18.47, + "learning_rate": 4.769123134328358e-05, + "loss": 0.0005, + "step": 19800 + }, + { + "epoch": 18.47, + "learning_rate": 4.7690764925373136e-05, + "loss": 0.0015, + "step": 19804 + }, + { + "epoch": 18.48, + "learning_rate": 4.769029850746269e-05, + "loss": 0.0018, + "step": 19808 + }, + { + "epoch": 18.48, + "learning_rate": 4.768983208955224e-05, + "loss": 0.0104, + "step": 19812 + }, + { + "epoch": 18.49, + "learning_rate": 4.7689365671641794e-05, + "loss": 0.0005, + "step": 19816 + }, + { + "epoch": 18.49, + "learning_rate": 4.768889925373134e-05, + "loss": 0.0006, + "step": 19820 + }, + { + "epoch": 18.49, + "learning_rate": 4.76884328358209e-05, + "loss": 0.0001, + "step": 19824 + }, + { + "epoch": 18.5, + "learning_rate": 4.768796641791045e-05, + "loss": 0.0131, + "step": 19828 + }, + { + "epoch": 18.5, + "learning_rate": 4.76875e-05, + "loss": 0.0006, + "step": 19832 + }, + { + "epoch": 18.5, + "learning_rate": 4.7687033582089555e-05, + "loss": 0.0004, + "step": 19836 + }, + { + "epoch": 18.51, + "learning_rate": 4.768656716417911e-05, + "loss": 0.0004, + "step": 19840 + }, + { + "epoch": 18.51, + "learning_rate": 4.768610074626866e-05, + "loss": 0.0006, + "step": 19844 + }, + { + "epoch": 18.51, + "learning_rate": 4.7685634328358206e-05, + "loss": 0.0004, + "step": 19848 + }, + { + "epoch": 18.52, + "learning_rate": 4.768516791044776e-05, + "loss": 0.003, + "step": 19852 + }, + { + "epoch": 18.52, + "learning_rate": 4.7684701492537316e-05, + "loss": 0.0117, + "step": 19856 + }, + { + "epoch": 18.53, + "learning_rate": 4.768423507462687e-05, + "loss": 0.0017, + "step": 19860 + }, + { + "epoch": 18.53, + "learning_rate": 4.768376865671642e-05, + "loss": 0.003, + "step": 19864 + }, + { + "epoch": 18.53, + "learning_rate": 4.7683302238805974e-05, + "loss": 0.0009, + "step": 19868 + }, + { + "epoch": 18.54, + "learning_rate": 4.768283582089553e-05, + "loss": 0.0057, + "step": 19872 + }, + { + "epoch": 18.54, + "learning_rate": 4.768236940298508e-05, + "loss": 0.0018, + "step": 19876 + }, + { + "epoch": 18.54, + "learning_rate": 4.7681902985074625e-05, + "loss": 0.0009, + "step": 19880 + }, + { + "epoch": 18.55, + "learning_rate": 4.768143656716419e-05, + "loss": 0.0014, + "step": 19884 + }, + { + "epoch": 18.55, + "learning_rate": 4.7680970149253735e-05, + "loss": 0.0004, + "step": 19888 + }, + { + "epoch": 18.56, + "learning_rate": 4.768050373134328e-05, + "loss": 0.0036, + "step": 19892 + }, + { + "epoch": 18.56, + "learning_rate": 4.768003731343284e-05, + "loss": 0.0022, + "step": 19896 + }, + { + "epoch": 18.56, + "learning_rate": 4.767957089552239e-05, + "loss": 0.0007, + "step": 19900 + }, + { + "epoch": 18.57, + "learning_rate": 4.767910447761194e-05, + "loss": 0.0018, + "step": 19904 + }, + { + "epoch": 18.57, + "learning_rate": 4.7678638059701496e-05, + "loss": 0.0004, + "step": 19908 + }, + { + "epoch": 18.57, + "learning_rate": 4.7678171641791044e-05, + "loss": 0.0002, + "step": 19912 + }, + { + "epoch": 18.58, + "learning_rate": 4.76777052238806e-05, + "loss": 0.0021, + "step": 19916 + }, + { + "epoch": 18.58, + "learning_rate": 4.7677238805970154e-05, + "loss": 0.0014, + "step": 19920 + }, + { + "epoch": 18.59, + "learning_rate": 4.76767723880597e-05, + "loss": 0.014, + "step": 19924 + }, + { + "epoch": 18.59, + "learning_rate": 4.767630597014926e-05, + "loss": 0.0028, + "step": 19928 + }, + { + "epoch": 18.59, + "learning_rate": 4.767583955223881e-05, + "loss": 0.0004, + "step": 19932 + }, + { + "epoch": 18.6, + "learning_rate": 4.767537313432836e-05, + "loss": 0.0009, + "step": 19936 + }, + { + "epoch": 18.6, + "learning_rate": 4.767490671641791e-05, + "loss": 0.0011, + "step": 19940 + }, + { + "epoch": 18.6, + "learning_rate": 4.767444029850747e-05, + "loss": 0.0016, + "step": 19944 + }, + { + "epoch": 18.61, + "learning_rate": 4.767397388059702e-05, + "loss": 0.0238, + "step": 19948 + }, + { + "epoch": 18.61, + "learning_rate": 4.7673507462686566e-05, + "loss": 0.0036, + "step": 19952 + }, + { + "epoch": 18.62, + "learning_rate": 4.767304104477612e-05, + "loss": 0.0089, + "step": 19956 + }, + { + "epoch": 18.62, + "learning_rate": 4.7672574626865676e-05, + "loss": 0.0041, + "step": 19960 + }, + { + "epoch": 18.62, + "learning_rate": 4.7672108208955224e-05, + "loss": 0.0005, + "step": 19964 + }, + { + "epoch": 18.63, + "learning_rate": 4.767164179104478e-05, + "loss": 0.0002, + "step": 19968 + }, + { + "epoch": 18.63, + "learning_rate": 4.767117537313433e-05, + "loss": 0.0013, + "step": 19972 + }, + { + "epoch": 18.63, + "learning_rate": 4.767070895522388e-05, + "loss": 0.0007, + "step": 19976 + }, + { + "epoch": 18.64, + "learning_rate": 4.767024253731344e-05, + "loss": 0.0004, + "step": 19980 + }, + { + "epoch": 18.64, + "learning_rate": 4.7669776119402985e-05, + "loss": 0.001, + "step": 19984 + }, + { + "epoch": 18.65, + "learning_rate": 4.766930970149254e-05, + "loss": 0.0006, + "step": 19988 + }, + { + "epoch": 18.65, + "learning_rate": 4.7668843283582095e-05, + "loss": 0.0007, + "step": 19992 + }, + { + "epoch": 18.65, + "learning_rate": 4.766837686567164e-05, + "loss": 0.0006, + "step": 19996 + }, + { + "epoch": 18.66, + "learning_rate": 4.766791044776119e-05, + "loss": 0.0025, + "step": 20000 + }, + { + "epoch": 18.66, + "eval_exact_match": 0.7263056092843327, + "eval_exec": 0.7543520309477756, + "eval_loss": 0.3301263451576233, + "eval_runtime": 1520.1707, + "eval_samples_per_second": 0.68, + "step": 20000 + }, + { + "epoch": 18.66, + "learning_rate": 4.766744402985075e-05, + "loss": 0.0024, + "step": 20004 + }, + { + "epoch": 18.66, + "learning_rate": 4.76669776119403e-05, + "loss": 0.0036, + "step": 20008 + }, + { + "epoch": 18.67, + "learning_rate": 4.766651119402985e-05, + "loss": 0.0009, + "step": 20012 + }, + { + "epoch": 18.67, + "learning_rate": 4.7666044776119404e-05, + "loss": 0.001, + "step": 20016 + }, + { + "epoch": 18.68, + "learning_rate": 4.766557835820896e-05, + "loss": 0.0146, + "step": 20020 + }, + { + "epoch": 18.68, + "learning_rate": 4.7665111940298514e-05, + "loss": 0.0006, + "step": 20024 + }, + { + "epoch": 18.68, + "learning_rate": 4.766464552238806e-05, + "loss": 0.0068, + "step": 20028 + }, + { + "epoch": 18.69, + "learning_rate": 4.766417910447761e-05, + "loss": 0.0021, + "step": 20032 + }, + { + "epoch": 18.69, + "learning_rate": 4.766371268656717e-05, + "loss": 0.0053, + "step": 20036 + }, + { + "epoch": 18.69, + "learning_rate": 4.766324626865672e-05, + "loss": 0.0006, + "step": 20040 + }, + { + "epoch": 18.7, + "learning_rate": 4.766277985074627e-05, + "loss": 0.0008, + "step": 20044 + }, + { + "epoch": 18.7, + "learning_rate": 4.766231343283582e-05, + "loss": 0.0032, + "step": 20048 + }, + { + "epoch": 18.71, + "learning_rate": 4.766184701492538e-05, + "loss": 0.0026, + "step": 20052 + }, + { + "epoch": 18.71, + "learning_rate": 4.7661380597014926e-05, + "loss": 0.0032, + "step": 20056 + }, + { + "epoch": 18.71, + "learning_rate": 4.766091417910448e-05, + "loss": 0.0131, + "step": 20060 + }, + { + "epoch": 18.72, + "learning_rate": 4.766044776119403e-05, + "loss": 0.0026, + "step": 20064 + }, + { + "epoch": 18.72, + "learning_rate": 4.7659981343283584e-05, + "loss": 0.0009, + "step": 20068 + }, + { + "epoch": 18.72, + "learning_rate": 4.765951492537314e-05, + "loss": 0.0003, + "step": 20072 + }, + { + "epoch": 18.73, + "learning_rate": 4.765904850746269e-05, + "loss": 0.0007, + "step": 20076 + }, + { + "epoch": 18.73, + "learning_rate": 4.765858208955224e-05, + "loss": 0.0048, + "step": 20080 + }, + { + "epoch": 18.73, + "learning_rate": 4.76581156716418e-05, + "loss": 0.0032, + "step": 20084 + }, + { + "epoch": 18.74, + "learning_rate": 4.7657649253731345e-05, + "loss": 0.0023, + "step": 20088 + }, + { + "epoch": 18.74, + "learning_rate": 4.765718283582089e-05, + "loss": 0.0005, + "step": 20092 + }, + { + "epoch": 18.75, + "learning_rate": 4.7656716417910455e-05, + "loss": 0.0006, + "step": 20096 + }, + { + "epoch": 18.75, + "learning_rate": 4.765625e-05, + "loss": 0.0026, + "step": 20100 + }, + { + "epoch": 18.75, + "learning_rate": 4.765578358208955e-05, + "loss": 0.0002, + "step": 20104 + }, + { + "epoch": 18.76, + "learning_rate": 4.7655317164179106e-05, + "loss": 0.0031, + "step": 20108 + }, + { + "epoch": 18.76, + "learning_rate": 4.765485074626866e-05, + "loss": 0.0003, + "step": 20112 + }, + { + "epoch": 18.76, + "learning_rate": 4.765438432835821e-05, + "loss": 0.0017, + "step": 20116 + }, + { + "epoch": 18.77, + "learning_rate": 4.7653917910447764e-05, + "loss": 0.0003, + "step": 20120 + }, + { + "epoch": 18.77, + "learning_rate": 4.765345149253731e-05, + "loss": 0.0011, + "step": 20124 + }, + { + "epoch": 18.78, + "learning_rate": 4.765298507462687e-05, + "loss": 0.0013, + "step": 20128 + }, + { + "epoch": 18.78, + "learning_rate": 4.765251865671642e-05, + "loss": 0.0071, + "step": 20132 + }, + { + "epoch": 18.78, + "learning_rate": 4.765205223880597e-05, + "loss": 0.0006, + "step": 20136 + }, + { + "epoch": 18.79, + "learning_rate": 4.7651585820895525e-05, + "loss": 0.0009, + "step": 20140 + }, + { + "epoch": 18.79, + "learning_rate": 4.765111940298508e-05, + "loss": 0.0018, + "step": 20144 + }, + { + "epoch": 18.79, + "learning_rate": 4.765065298507463e-05, + "loss": 0.0144, + "step": 20148 + }, + { + "epoch": 18.8, + "learning_rate": 4.7650186567164176e-05, + "loss": 0.0011, + "step": 20152 + }, + { + "epoch": 18.8, + "learning_rate": 4.764972014925374e-05, + "loss": 0.0033, + "step": 20156 + }, + { + "epoch": 18.81, + "learning_rate": 4.7649253731343286e-05, + "loss": 0.0064, + "step": 20160 + }, + { + "epoch": 18.81, + "learning_rate": 4.7648787313432834e-05, + "loss": 0.0016, + "step": 20164 + }, + { + "epoch": 18.81, + "learning_rate": 4.764832089552239e-05, + "loss": 0.0014, + "step": 20168 + }, + { + "epoch": 18.82, + "learning_rate": 4.7647854477611944e-05, + "loss": 0.0002, + "step": 20172 + }, + { + "epoch": 18.82, + "learning_rate": 4.764738805970149e-05, + "loss": 0.0019, + "step": 20176 + }, + { + "epoch": 18.82, + "learning_rate": 4.764692164179105e-05, + "loss": 0.0024, + "step": 20180 + }, + { + "epoch": 18.83, + "learning_rate": 4.7646455223880595e-05, + "loss": 0.005, + "step": 20184 + }, + { + "epoch": 18.83, + "learning_rate": 4.764598880597016e-05, + "loss": 0.0004, + "step": 20188 + }, + { + "epoch": 18.84, + "learning_rate": 4.7645522388059705e-05, + "loss": 0.0003, + "step": 20192 + }, + { + "epoch": 18.84, + "learning_rate": 4.764505597014925e-05, + "loss": 0.0008, + "step": 20196 + }, + { + "epoch": 18.84, + "learning_rate": 4.764458955223881e-05, + "loss": 0.0009, + "step": 20200 + }, + { + "epoch": 18.85, + "learning_rate": 4.764412313432836e-05, + "loss": 0.0004, + "step": 20204 + }, + { + "epoch": 18.85, + "learning_rate": 4.764365671641791e-05, + "loss": 0.0062, + "step": 20208 + }, + { + "epoch": 18.85, + "learning_rate": 4.7643190298507466e-05, + "loss": 0.0042, + "step": 20212 + }, + { + "epoch": 18.86, + "learning_rate": 4.764272388059702e-05, + "loss": 0.0016, + "step": 20216 + }, + { + "epoch": 18.86, + "learning_rate": 4.764225746268657e-05, + "loss": 0.0016, + "step": 20220 + }, + { + "epoch": 18.87, + "learning_rate": 4.7641791044776124e-05, + "loss": 0.0009, + "step": 20224 + }, + { + "epoch": 18.87, + "learning_rate": 4.764132462686567e-05, + "loss": 0.0018, + "step": 20228 + }, + { + "epoch": 18.87, + "learning_rate": 4.764085820895523e-05, + "loss": 0.0009, + "step": 20232 + }, + { + "epoch": 18.88, + "learning_rate": 4.764039179104478e-05, + "loss": 0.0021, + "step": 20236 + }, + { + "epoch": 18.88, + "learning_rate": 4.763992537313433e-05, + "loss": 0.0036, + "step": 20240 + }, + { + "epoch": 18.88, + "learning_rate": 4.763945895522388e-05, + "loss": 0.0006, + "step": 20244 + }, + { + "epoch": 18.89, + "learning_rate": 4.763899253731344e-05, + "loss": 0.0013, + "step": 20248 + }, + { + "epoch": 18.89, + "learning_rate": 4.763852611940299e-05, + "loss": 0.0019, + "step": 20252 + }, + { + "epoch": 18.9, + "learning_rate": 4.7638059701492536e-05, + "loss": 0.0003, + "step": 20256 + }, + { + "epoch": 18.9, + "learning_rate": 4.763759328358209e-05, + "loss": 0.0237, + "step": 20260 + }, + { + "epoch": 18.9, + "learning_rate": 4.7637126865671646e-05, + "loss": 0.0001, + "step": 20264 + }, + { + "epoch": 18.91, + "learning_rate": 4.7636660447761194e-05, + "loss": 0.0005, + "step": 20268 + }, + { + "epoch": 18.91, + "learning_rate": 4.763619402985075e-05, + "loss": 0.0016, + "step": 20272 + }, + { + "epoch": 18.91, + "learning_rate": 4.7635727611940304e-05, + "loss": 0.0026, + "step": 20276 + }, + { + "epoch": 18.92, + "learning_rate": 4.763526119402985e-05, + "loss": 0.0013, + "step": 20280 + }, + { + "epoch": 18.92, + "learning_rate": 4.763479477611941e-05, + "loss": 0.0007, + "step": 20284 + }, + { + "epoch": 18.93, + "learning_rate": 4.7634328358208955e-05, + "loss": 0.0037, + "step": 20288 + }, + { + "epoch": 18.93, + "learning_rate": 4.763386194029851e-05, + "loss": 0.0016, + "step": 20292 + }, + { + "epoch": 18.93, + "learning_rate": 4.7633395522388065e-05, + "loss": 0.001, + "step": 20296 + }, + { + "epoch": 18.94, + "learning_rate": 4.763292910447761e-05, + "loss": 0.0004, + "step": 20300 + }, + { + "epoch": 18.94, + "learning_rate": 4.763246268656716e-05, + "loss": 0.0015, + "step": 20304 + }, + { + "epoch": 18.94, + "learning_rate": 4.763199626865672e-05, + "loss": 0.0057, + "step": 20308 + }, + { + "epoch": 18.95, + "learning_rate": 4.763152985074627e-05, + "loss": 0.0001, + "step": 20312 + }, + { + "epoch": 18.95, + "learning_rate": 4.763106343283582e-05, + "loss": 0.0113, + "step": 20316 + }, + { + "epoch": 18.96, + "learning_rate": 4.7630597014925374e-05, + "loss": 0.0017, + "step": 20320 + }, + { + "epoch": 18.96, + "learning_rate": 4.763013059701493e-05, + "loss": 0.0024, + "step": 20324 + }, + { + "epoch": 18.96, + "learning_rate": 4.762966417910448e-05, + "loss": 0.0005, + "step": 20328 + }, + { + "epoch": 18.97, + "learning_rate": 4.762919776119403e-05, + "loss": 0.0051, + "step": 20332 + }, + { + "epoch": 18.97, + "learning_rate": 4.762873134328359e-05, + "loss": 0.0017, + "step": 20336 + }, + { + "epoch": 18.97, + "learning_rate": 4.7628264925373135e-05, + "loss": 0.0002, + "step": 20340 + }, + { + "epoch": 18.98, + "learning_rate": 4.762779850746269e-05, + "loss": 0.0024, + "step": 20344 + }, + { + "epoch": 18.98, + "learning_rate": 4.762733208955224e-05, + "loss": 0.007, + "step": 20348 + }, + { + "epoch": 18.98, + "learning_rate": 4.762686567164179e-05, + "loss": 0.0002, + "step": 20352 + }, + { + "epoch": 18.99, + "learning_rate": 4.762639925373135e-05, + "loss": 0.0005, + "step": 20356 + }, + { + "epoch": 18.99, + "learning_rate": 4.7625932835820896e-05, + "loss": 0.0012, + "step": 20360 + }, + { + "epoch": 19.0, + "learning_rate": 4.762546641791045e-05, + "loss": 0.0012, + "step": 20364 + }, + { + "epoch": 19.0, + "learning_rate": 4.7625000000000006e-05, + "loss": 0.0002, + "step": 20368 + }, + { + "epoch": 19.0, + "learning_rate": 4.7624533582089554e-05, + "loss": 0.0021, + "step": 20372 + }, + { + "epoch": 19.01, + "learning_rate": 4.762406716417911e-05, + "loss": 0.0027, + "step": 20376 + }, + { + "epoch": 19.01, + "learning_rate": 4.762360074626866e-05, + "loss": 0.0007, + "step": 20380 + }, + { + "epoch": 19.01, + "learning_rate": 4.762313432835821e-05, + "loss": 0.004, + "step": 20384 + }, + { + "epoch": 19.02, + "learning_rate": 4.762266791044777e-05, + "loss": 0.0004, + "step": 20388 + }, + { + "epoch": 19.02, + "learning_rate": 4.7622201492537315e-05, + "loss": 0.0001, + "step": 20392 + }, + { + "epoch": 19.03, + "learning_rate": 4.762173507462687e-05, + "loss": 0.0009, + "step": 20396 + }, + { + "epoch": 19.03, + "learning_rate": 4.7621268656716425e-05, + "loss": 0.0089, + "step": 20400 + }, + { + "epoch": 19.03, + "learning_rate": 4.762080223880597e-05, + "loss": 0.0002, + "step": 20404 + }, + { + "epoch": 19.04, + "learning_rate": 4.762033582089552e-05, + "loss": 0.0019, + "step": 20408 + }, + { + "epoch": 19.04, + "learning_rate": 4.7619869402985076e-05, + "loss": 0.0003, + "step": 20412 + }, + { + "epoch": 19.04, + "learning_rate": 4.761940298507463e-05, + "loss": 0.0013, + "step": 20416 + }, + { + "epoch": 19.05, + "learning_rate": 4.761893656716418e-05, + "loss": 0.0034, + "step": 20420 + }, + { + "epoch": 19.05, + "learning_rate": 4.7618470149253734e-05, + "loss": 0.0009, + "step": 20424 + }, + { + "epoch": 19.06, + "learning_rate": 4.761800373134329e-05, + "loss": 0.0014, + "step": 20428 + }, + { + "epoch": 19.06, + "learning_rate": 4.761753731343284e-05, + "loss": 0.0013, + "step": 20432 + }, + { + "epoch": 19.06, + "learning_rate": 4.761707089552239e-05, + "loss": 0.0007, + "step": 20436 + }, + { + "epoch": 19.07, + "learning_rate": 4.761660447761194e-05, + "loss": 0.0006, + "step": 20440 + }, + { + "epoch": 19.07, + "learning_rate": 4.7616138059701495e-05, + "loss": 0.0049, + "step": 20444 + }, + { + "epoch": 19.07, + "learning_rate": 4.761567164179105e-05, + "loss": 0.0066, + "step": 20448 + }, + { + "epoch": 19.08, + "learning_rate": 4.76152052238806e-05, + "loss": 0.0008, + "step": 20452 + }, + { + "epoch": 19.08, + "learning_rate": 4.761473880597015e-05, + "loss": 0.0011, + "step": 20456 + }, + { + "epoch": 19.09, + "learning_rate": 4.761427238805971e-05, + "loss": 0.0002, + "step": 20460 + }, + { + "epoch": 19.09, + "learning_rate": 4.7613805970149256e-05, + "loss": 0.0002, + "step": 20464 + }, + { + "epoch": 19.09, + "learning_rate": 4.7613339552238804e-05, + "loss": 0.0, + "step": 20468 + }, + { + "epoch": 19.1, + "learning_rate": 4.761287313432836e-05, + "loss": 0.0014, + "step": 20472 + }, + { + "epoch": 19.1, + "learning_rate": 4.7612406716417914e-05, + "loss": 0.0011, + "step": 20476 + }, + { + "epoch": 19.1, + "learning_rate": 4.761194029850746e-05, + "loss": 0.0005, + "step": 20480 + }, + { + "epoch": 19.11, + "learning_rate": 4.761147388059702e-05, + "loss": 0.0002, + "step": 20484 + }, + { + "epoch": 19.11, + "learning_rate": 4.761100746268657e-05, + "loss": 0.0027, + "step": 20488 + }, + { + "epoch": 19.12, + "learning_rate": 4.761054104477612e-05, + "loss": 0.0006, + "step": 20492 + }, + { + "epoch": 19.12, + "learning_rate": 4.7610074626865675e-05, + "loss": 0.0019, + "step": 20496 + }, + { + "epoch": 19.12, + "learning_rate": 4.760960820895522e-05, + "loss": 0.0001, + "step": 20500 + }, + { + "epoch": 19.12, + "eval_exact_match": 0.7340425531914894, + "eval_exec": 0.7601547388781431, + "eval_loss": 0.35038140416145325, + "eval_runtime": 1096.2977, + "eval_samples_per_second": 0.943, + "step": 20500 + }, + { + "epoch": 19.13, + "learning_rate": 4.760914179104478e-05, + "loss": 0.001, + "step": 20504 + }, + { + "epoch": 19.13, + "learning_rate": 4.760867537313433e-05, + "loss": 0.0015, + "step": 20508 + }, + { + "epoch": 19.13, + "learning_rate": 4.760820895522388e-05, + "loss": 0.0236, + "step": 20512 + }, + { + "epoch": 19.14, + "learning_rate": 4.7607742537313436e-05, + "loss": 0.0016, + "step": 20516 + }, + { + "epoch": 19.14, + "learning_rate": 4.760727611940299e-05, + "loss": 0.0006, + "step": 20520 + }, + { + "epoch": 19.15, + "learning_rate": 4.760680970149254e-05, + "loss": 0.0004, + "step": 20524 + }, + { + "epoch": 19.15, + "learning_rate": 4.7606343283582094e-05, + "loss": 0.0001, + "step": 20528 + }, + { + "epoch": 19.15, + "learning_rate": 4.760587686567164e-05, + "loss": 0.0006, + "step": 20532 + }, + { + "epoch": 19.16, + "learning_rate": 4.7605410447761197e-05, + "loss": 0.0021, + "step": 20536 + }, + { + "epoch": 19.16, + "learning_rate": 4.760494402985075e-05, + "loss": 0.0039, + "step": 20540 + }, + { + "epoch": 19.16, + "learning_rate": 4.76044776119403e-05, + "loss": 0.0032, + "step": 20544 + }, + { + "epoch": 19.17, + "learning_rate": 4.7604011194029855e-05, + "loss": 0.0026, + "step": 20548 + }, + { + "epoch": 19.17, + "learning_rate": 4.760354477611941e-05, + "loss": 0.0011, + "step": 20552 + }, + { + "epoch": 19.18, + "learning_rate": 4.760307835820896e-05, + "loss": 0.0015, + "step": 20556 + }, + { + "epoch": 19.18, + "learning_rate": 4.7602611940298506e-05, + "loss": 0.0033, + "step": 20560 + }, + { + "epoch": 19.18, + "learning_rate": 4.760214552238807e-05, + "loss": 0.0002, + "step": 20564 + }, + { + "epoch": 19.19, + "learning_rate": 4.7601679104477616e-05, + "loss": 0.002, + "step": 20568 + }, + { + "epoch": 19.19, + "learning_rate": 4.7601212686567164e-05, + "loss": 0.009, + "step": 20572 + }, + { + "epoch": 19.19, + "learning_rate": 4.760074626865672e-05, + "loss": 0.0006, + "step": 20576 + }, + { + "epoch": 19.2, + "learning_rate": 4.7600279850746273e-05, + "loss": 0.0002, + "step": 20580 + }, + { + "epoch": 19.2, + "learning_rate": 4.759981343283582e-05, + "loss": 0.0012, + "step": 20584 + }, + { + "epoch": 19.21, + "learning_rate": 4.7599347014925376e-05, + "loss": 0.0019, + "step": 20588 + }, + { + "epoch": 19.21, + "learning_rate": 4.7598880597014925e-05, + "loss": 0.0005, + "step": 20592 + }, + { + "epoch": 19.21, + "learning_rate": 4.759841417910448e-05, + "loss": 0.0133, + "step": 20596 + }, + { + "epoch": 19.22, + "learning_rate": 4.7597947761194034e-05, + "loss": 0.0001, + "step": 20600 + }, + { + "epoch": 19.22, + "learning_rate": 4.759748134328358e-05, + "loss": 0.0013, + "step": 20604 + }, + { + "epoch": 19.22, + "learning_rate": 4.759701492537314e-05, + "loss": 0.0002, + "step": 20608 + }, + { + "epoch": 19.23, + "learning_rate": 4.759654850746269e-05, + "loss": 0.0044, + "step": 20612 + }, + { + "epoch": 19.23, + "learning_rate": 4.759608208955224e-05, + "loss": 0.0002, + "step": 20616 + }, + { + "epoch": 19.24, + "learning_rate": 4.759561567164179e-05, + "loss": 0.0007, + "step": 20620 + }, + { + "epoch": 19.24, + "learning_rate": 4.759514925373135e-05, + "loss": 0.0018, + "step": 20624 + }, + { + "epoch": 19.24, + "learning_rate": 4.75946828358209e-05, + "loss": 0.0061, + "step": 20628 + }, + { + "epoch": 19.25, + "learning_rate": 4.7594216417910447e-05, + "loss": 0.0014, + "step": 20632 + }, + { + "epoch": 19.25, + "learning_rate": 4.759375e-05, + "loss": 0.0011, + "step": 20636 + }, + { + "epoch": 19.25, + "learning_rate": 4.7593283582089556e-05, + "loss": 0.0002, + "step": 20640 + }, + { + "epoch": 19.26, + "learning_rate": 4.7592817164179105e-05, + "loss": 0.0028, + "step": 20644 + }, + { + "epoch": 19.26, + "learning_rate": 4.759235074626866e-05, + "loss": 0.0021, + "step": 20648 + }, + { + "epoch": 19.26, + "learning_rate": 4.759188432835821e-05, + "loss": 0.0002, + "step": 20652 + }, + { + "epoch": 19.27, + "learning_rate": 4.759141791044776e-05, + "loss": 0.0008, + "step": 20656 + }, + { + "epoch": 19.27, + "learning_rate": 4.759095149253732e-05, + "loss": 0.0021, + "step": 20660 + }, + { + "epoch": 19.28, + "learning_rate": 4.7590485074626866e-05, + "loss": 0.0035, + "step": 20664 + }, + { + "epoch": 19.28, + "learning_rate": 4.759001865671642e-05, + "loss": 0.0002, + "step": 20668 + }, + { + "epoch": 19.28, + "learning_rate": 4.7589552238805975e-05, + "loss": 0.0004, + "step": 20672 + }, + { + "epoch": 19.29, + "learning_rate": 4.7589085820895523e-05, + "loss": 0.0007, + "step": 20676 + }, + { + "epoch": 19.29, + "learning_rate": 4.758861940298508e-05, + "loss": 0.0005, + "step": 20680 + }, + { + "epoch": 19.29, + "learning_rate": 4.758815298507463e-05, + "loss": 0.0002, + "step": 20684 + }, + { + "epoch": 19.3, + "learning_rate": 4.758768656716418e-05, + "loss": 0.0005, + "step": 20688 + }, + { + "epoch": 19.3, + "learning_rate": 4.7587220149253736e-05, + "loss": 0.0001, + "step": 20692 + }, + { + "epoch": 19.31, + "learning_rate": 4.7586753731343284e-05, + "loss": 0.0017, + "step": 20696 + }, + { + "epoch": 19.31, + "learning_rate": 4.758628731343284e-05, + "loss": 0.0006, + "step": 20700 + }, + { + "epoch": 19.31, + "learning_rate": 4.7585820895522394e-05, + "loss": 0.0003, + "step": 20704 + }, + { + "epoch": 19.32, + "learning_rate": 4.758535447761194e-05, + "loss": 0.0007, + "step": 20708 + }, + { + "epoch": 19.32, + "learning_rate": 4.758488805970149e-05, + "loss": 0.0018, + "step": 20712 + }, + { + "epoch": 19.32, + "learning_rate": 4.758442164179105e-05, + "loss": 0.0003, + "step": 20716 + }, + { + "epoch": 19.33, + "learning_rate": 4.75839552238806e-05, + "loss": 0.0039, + "step": 20720 + }, + { + "epoch": 19.33, + "learning_rate": 4.758348880597015e-05, + "loss": 0.002, + "step": 20724 + }, + { + "epoch": 19.34, + "learning_rate": 4.7583022388059703e-05, + "loss": 0.0006, + "step": 20728 + }, + { + "epoch": 19.34, + "learning_rate": 4.758255597014926e-05, + "loss": 0.0009, + "step": 20732 + }, + { + "epoch": 19.34, + "learning_rate": 4.7582089552238806e-05, + "loss": 0.0011, + "step": 20736 + }, + { + "epoch": 19.35, + "learning_rate": 4.758162313432836e-05, + "loss": 0.0001, + "step": 20740 + }, + { + "epoch": 19.35, + "learning_rate": 4.758115671641791e-05, + "loss": 0.0014, + "step": 20744 + }, + { + "epoch": 19.35, + "learning_rate": 4.7580690298507464e-05, + "loss": 0.0003, + "step": 20748 + }, + { + "epoch": 19.36, + "learning_rate": 4.758022388059702e-05, + "loss": 0.0003, + "step": 20752 + }, + { + "epoch": 19.36, + "learning_rate": 4.757975746268657e-05, + "loss": 0.0028, + "step": 20756 + }, + { + "epoch": 19.37, + "learning_rate": 4.757929104477612e-05, + "loss": 0.0005, + "step": 20760 + }, + { + "epoch": 19.37, + "learning_rate": 4.757882462686568e-05, + "loss": 0.0004, + "step": 20764 + }, + { + "epoch": 19.37, + "learning_rate": 4.7578358208955225e-05, + "loss": 0.0011, + "step": 20768 + }, + { + "epoch": 19.38, + "learning_rate": 4.7577891791044773e-05, + "loss": 0.0003, + "step": 20772 + }, + { + "epoch": 19.38, + "learning_rate": 4.7577425373134335e-05, + "loss": 0.0002, + "step": 20776 + }, + { + "epoch": 19.38, + "learning_rate": 4.757695895522388e-05, + "loss": 0.0001, + "step": 20780 + }, + { + "epoch": 19.39, + "learning_rate": 4.757649253731343e-05, + "loss": 0.0002, + "step": 20784 + }, + { + "epoch": 19.39, + "learning_rate": 4.7576026119402986e-05, + "loss": 0.003, + "step": 20788 + }, + { + "epoch": 19.4, + "learning_rate": 4.757555970149254e-05, + "loss": 0.0002, + "step": 20792 + }, + { + "epoch": 19.4, + "learning_rate": 4.757509328358209e-05, + "loss": 0.0017, + "step": 20796 + }, + { + "epoch": 19.4, + "learning_rate": 4.7574626865671644e-05, + "loss": 0.0032, + "step": 20800 + }, + { + "epoch": 19.41, + "learning_rate": 4.757416044776119e-05, + "loss": 0.0023, + "step": 20804 + }, + { + "epoch": 19.41, + "learning_rate": 4.757369402985075e-05, + "loss": 0.0003, + "step": 20808 + }, + { + "epoch": 19.41, + "learning_rate": 4.75732276119403e-05, + "loss": 0.0037, + "step": 20812 + }, + { + "epoch": 19.42, + "learning_rate": 4.757276119402985e-05, + "loss": 0.0008, + "step": 20816 + }, + { + "epoch": 19.42, + "learning_rate": 4.7572294776119405e-05, + "loss": 0.0021, + "step": 20820 + }, + { + "epoch": 19.43, + "learning_rate": 4.757182835820896e-05, + "loss": 0.0005, + "step": 20824 + }, + { + "epoch": 19.43, + "learning_rate": 4.757136194029851e-05, + "loss": 0.0001, + "step": 20828 + }, + { + "epoch": 19.43, + "learning_rate": 4.7570895522388056e-05, + "loss": 0.0001, + "step": 20832 + }, + { + "epoch": 19.44, + "learning_rate": 4.757042910447762e-05, + "loss": 0.0005, + "step": 20836 + }, + { + "epoch": 19.44, + "learning_rate": 4.7569962686567166e-05, + "loss": 0.002, + "step": 20840 + }, + { + "epoch": 19.44, + "learning_rate": 4.756949626865672e-05, + "loss": 0.0021, + "step": 20844 + }, + { + "epoch": 19.45, + "learning_rate": 4.756902985074627e-05, + "loss": 0.0001, + "step": 20848 + }, + { + "epoch": 19.45, + "learning_rate": 4.7568563432835824e-05, + "loss": 0.0032, + "step": 20852 + }, + { + "epoch": 19.46, + "learning_rate": 4.756809701492538e-05, + "loss": 0.0003, + "step": 20856 + }, + { + "epoch": 19.46, + "learning_rate": 4.756763059701493e-05, + "loss": 0.0038, + "step": 20860 + }, + { + "epoch": 19.46, + "learning_rate": 4.7567164179104475e-05, + "loss": 0.0087, + "step": 20864 + }, + { + "epoch": 19.47, + "learning_rate": 4.756669776119404e-05, + "loss": 0.0025, + "step": 20868 + }, + { + "epoch": 19.47, + "learning_rate": 4.7566231343283585e-05, + "loss": 0.0016, + "step": 20872 + }, + { + "epoch": 19.47, + "learning_rate": 4.756576492537313e-05, + "loss": 0.0013, + "step": 20876 + }, + { + "epoch": 19.48, + "learning_rate": 4.756529850746269e-05, + "loss": 0.0016, + "step": 20880 + }, + { + "epoch": 19.48, + "learning_rate": 4.756483208955224e-05, + "loss": 0.0001, + "step": 20884 + }, + { + "epoch": 19.49, + "learning_rate": 4.756436567164179e-05, + "loss": 0.0016, + "step": 20888 + }, + { + "epoch": 19.49, + "learning_rate": 4.7563899253731346e-05, + "loss": 0.0056, + "step": 20892 + }, + { + "epoch": 19.49, + "learning_rate": 4.75634328358209e-05, + "loss": 0.0002, + "step": 20896 + }, + { + "epoch": 19.5, + "learning_rate": 4.756296641791045e-05, + "loss": 0.0001, + "step": 20900 + }, + { + "epoch": 19.5, + "learning_rate": 4.7562500000000004e-05, + "loss": 0.0047, + "step": 20904 + }, + { + "epoch": 19.5, + "learning_rate": 4.756203358208955e-05, + "loss": 0.0059, + "step": 20908 + }, + { + "epoch": 19.51, + "learning_rate": 4.756156716417911e-05, + "loss": 0.0003, + "step": 20912 + }, + { + "epoch": 19.51, + "learning_rate": 4.756110074626866e-05, + "loss": 0.0013, + "step": 20916 + }, + { + "epoch": 19.51, + "learning_rate": 4.756063432835821e-05, + "loss": 0.0007, + "step": 20920 + }, + { + "epoch": 19.52, + "learning_rate": 4.756016791044776e-05, + "loss": 0.0058, + "step": 20924 + }, + { + "epoch": 19.52, + "learning_rate": 4.755970149253732e-05, + "loss": 0.0018, + "step": 20928 + }, + { + "epoch": 19.53, + "learning_rate": 4.755923507462687e-05, + "loss": 0.0001, + "step": 20932 + }, + { + "epoch": 19.53, + "learning_rate": 4.7558768656716416e-05, + "loss": 0.0082, + "step": 20936 + }, + { + "epoch": 19.53, + "learning_rate": 4.755830223880597e-05, + "loss": 0.0066, + "step": 20940 + }, + { + "epoch": 19.54, + "learning_rate": 4.7557835820895526e-05, + "loss": 0.0064, + "step": 20944 + }, + { + "epoch": 19.54, + "learning_rate": 4.7557369402985074e-05, + "loss": 0.0015, + "step": 20948 + }, + { + "epoch": 19.54, + "learning_rate": 4.755690298507463e-05, + "loss": 0.0042, + "step": 20952 + }, + { + "epoch": 19.55, + "learning_rate": 4.7556436567164184e-05, + "loss": 0.0006, + "step": 20956 + }, + { + "epoch": 19.55, + "learning_rate": 4.755597014925373e-05, + "loss": 0.0001, + "step": 20960 + }, + { + "epoch": 19.56, + "learning_rate": 4.755550373134329e-05, + "loss": 0.0042, + "step": 20964 + }, + { + "epoch": 19.56, + "learning_rate": 4.7555037313432835e-05, + "loss": 0.0001, + "step": 20968 + }, + { + "epoch": 19.56, + "learning_rate": 4.755457089552239e-05, + "loss": 0.0044, + "step": 20972 + }, + { + "epoch": 19.57, + "learning_rate": 4.7554104477611945e-05, + "loss": 0.0087, + "step": 20976 + }, + { + "epoch": 19.57, + "learning_rate": 4.755363805970149e-05, + "loss": 0.0023, + "step": 20980 + }, + { + "epoch": 19.57, + "learning_rate": 4.755317164179104e-05, + "loss": 0.0004, + "step": 20984 + }, + { + "epoch": 19.58, + "learning_rate": 4.75527052238806e-05, + "loss": 0.0004, + "step": 20988 + }, + { + "epoch": 19.58, + "learning_rate": 4.755223880597015e-05, + "loss": 0.0007, + "step": 20992 + }, + { + "epoch": 19.59, + "learning_rate": 4.75517723880597e-05, + "loss": 0.0063, + "step": 20996 + }, + { + "epoch": 19.59, + "learning_rate": 4.7551305970149254e-05, + "loss": 0.0003, + "step": 21000 + }, + { + "epoch": 19.59, + "eval_exact_match": 0.7253384912959381, + "eval_exec": 0.7553191489361702, + "eval_loss": 0.33756983280181885, + "eval_runtime": 1072.0397, + "eval_samples_per_second": 0.965, + "step": 21000 + }, + { + "epoch": 19.59, + "learning_rate": 4.755083955223881e-05, + "loss": 0.0037, + "step": 21004 + }, + { + "epoch": 19.6, + "learning_rate": 4.7550373134328364e-05, + "loss": 0.0005, + "step": 21008 + }, + { + "epoch": 19.6, + "learning_rate": 4.754990671641791e-05, + "loss": 0.0002, + "step": 21012 + }, + { + "epoch": 19.6, + "learning_rate": 4.754944029850747e-05, + "loss": 0.0008, + "step": 21016 + }, + { + "epoch": 19.61, + "learning_rate": 4.754897388059702e-05, + "loss": 0.0061, + "step": 21020 + }, + { + "epoch": 19.61, + "learning_rate": 4.754850746268657e-05, + "loss": 0.0014, + "step": 21024 + }, + { + "epoch": 19.62, + "learning_rate": 4.754804104477612e-05, + "loss": 0.0056, + "step": 21028 + }, + { + "epoch": 19.62, + "learning_rate": 4.754757462686567e-05, + "loss": 0.0009, + "step": 21032 + }, + { + "epoch": 19.62, + "learning_rate": 4.754710820895523e-05, + "loss": 0.0099, + "step": 21036 + }, + { + "epoch": 19.63, + "learning_rate": 4.7546641791044776e-05, + "loss": 0.0005, + "step": 21040 + }, + { + "epoch": 19.63, + "learning_rate": 4.754617537313433e-05, + "loss": 0.0065, + "step": 21044 + }, + { + "epoch": 19.63, + "learning_rate": 4.7545708955223886e-05, + "loss": 0.0012, + "step": 21048 + }, + { + "epoch": 19.64, + "learning_rate": 4.7545242537313434e-05, + "loss": 0.0002, + "step": 21052 + }, + { + "epoch": 19.64, + "learning_rate": 4.754477611940299e-05, + "loss": 0.0021, + "step": 21056 + }, + { + "epoch": 19.65, + "learning_rate": 4.754430970149254e-05, + "loss": 0.0005, + "step": 21060 + }, + { + "epoch": 19.65, + "learning_rate": 4.754384328358209e-05, + "loss": 0.0007, + "step": 21064 + }, + { + "epoch": 19.65, + "learning_rate": 4.754337686567165e-05, + "loss": 0.0002, + "step": 21068 + }, + { + "epoch": 19.66, + "learning_rate": 4.7542910447761195e-05, + "loss": 0.0032, + "step": 21072 + }, + { + "epoch": 19.66, + "learning_rate": 4.754244402985075e-05, + "loss": 0.0073, + "step": 21076 + }, + { + "epoch": 19.66, + "learning_rate": 4.7541977611940305e-05, + "loss": 0.0034, + "step": 21080 + }, + { + "epoch": 19.67, + "learning_rate": 4.754151119402985e-05, + "loss": 0.0011, + "step": 21084 + }, + { + "epoch": 19.67, + "learning_rate": 4.75410447761194e-05, + "loss": 0.0085, + "step": 21088 + }, + { + "epoch": 19.68, + "learning_rate": 4.7540578358208956e-05, + "loss": 0.0041, + "step": 21092 + }, + { + "epoch": 19.68, + "learning_rate": 4.754011194029851e-05, + "loss": 0.0002, + "step": 21096 + }, + { + "epoch": 19.68, + "learning_rate": 4.753964552238806e-05, + "loss": 0.0033, + "step": 21100 + }, + { + "epoch": 19.69, + "learning_rate": 4.7539179104477614e-05, + "loss": 0.0009, + "step": 21104 + }, + { + "epoch": 19.69, + "learning_rate": 4.753871268656717e-05, + "loss": 0.0004, + "step": 21108 + }, + { + "epoch": 19.69, + "learning_rate": 4.753824626865672e-05, + "loss": 0.0017, + "step": 21112 + }, + { + "epoch": 19.7, + "learning_rate": 4.753777985074627e-05, + "loss": 0.0024, + "step": 21116 + }, + { + "epoch": 19.7, + "learning_rate": 4.753731343283582e-05, + "loss": 0.0003, + "step": 21120 + }, + { + "epoch": 19.71, + "learning_rate": 4.7536847014925375e-05, + "loss": 0.0007, + "step": 21124 + }, + { + "epoch": 19.71, + "learning_rate": 4.753638059701493e-05, + "loss": 0.0053, + "step": 21128 + }, + { + "epoch": 19.71, + "learning_rate": 4.753591417910448e-05, + "loss": 0.0009, + "step": 21132 + }, + { + "epoch": 19.72, + "learning_rate": 4.753544776119403e-05, + "loss": 0.0008, + "step": 21136 + }, + { + "epoch": 19.72, + "learning_rate": 4.753498134328359e-05, + "loss": 0.0035, + "step": 21140 + }, + { + "epoch": 19.72, + "learning_rate": 4.7534514925373136e-05, + "loss": 0.0008, + "step": 21144 + }, + { + "epoch": 19.73, + "learning_rate": 4.7534048507462684e-05, + "loss": 0.0029, + "step": 21148 + }, + { + "epoch": 19.73, + "learning_rate": 4.753358208955224e-05, + "loss": 0.0089, + "step": 21152 + }, + { + "epoch": 19.73, + "learning_rate": 4.7533115671641794e-05, + "loss": 0.0034, + "step": 21156 + }, + { + "epoch": 19.74, + "learning_rate": 4.753264925373134e-05, + "loss": 0.0016, + "step": 21160 + }, + { + "epoch": 19.74, + "learning_rate": 4.75321828358209e-05, + "loss": 0.0006, + "step": 21164 + }, + { + "epoch": 19.75, + "learning_rate": 4.753171641791045e-05, + "loss": 0.0003, + "step": 21168 + }, + { + "epoch": 19.75, + "learning_rate": 4.753125000000001e-05, + "loss": 0.0036, + "step": 21172 + }, + { + "epoch": 19.75, + "learning_rate": 4.7530783582089555e-05, + "loss": 0.0001, + "step": 21176 + }, + { + "epoch": 19.76, + "learning_rate": 4.75303171641791e-05, + "loss": 0.0003, + "step": 21180 + }, + { + "epoch": 19.76, + "learning_rate": 4.7529850746268665e-05, + "loss": 0.001, + "step": 21184 + }, + { + "epoch": 19.76, + "learning_rate": 4.752938432835821e-05, + "loss": 0.0008, + "step": 21188 + }, + { + "epoch": 19.77, + "learning_rate": 4.752891791044776e-05, + "loss": 0.0002, + "step": 21192 + }, + { + "epoch": 19.77, + "learning_rate": 4.7528451492537316e-05, + "loss": 0.0001, + "step": 21196 + }, + { + "epoch": 19.78, + "learning_rate": 4.752798507462687e-05, + "loss": 0.0001, + "step": 21200 + }, + { + "epoch": 19.78, + "learning_rate": 4.752751865671642e-05, + "loss": 0.0011, + "step": 21204 + }, + { + "epoch": 19.78, + "learning_rate": 4.7527052238805974e-05, + "loss": 0.0002, + "step": 21208 + }, + { + "epoch": 19.79, + "learning_rate": 4.752658582089552e-05, + "loss": 0.0024, + "step": 21212 + }, + { + "epoch": 19.79, + "learning_rate": 4.752611940298508e-05, + "loss": 0.0023, + "step": 21216 + }, + { + "epoch": 19.79, + "learning_rate": 4.752565298507463e-05, + "loss": 0.0044, + "step": 21220 + }, + { + "epoch": 19.8, + "learning_rate": 4.752518656716418e-05, + "loss": 0.001, + "step": 21224 + }, + { + "epoch": 19.8, + "learning_rate": 4.7524720149253735e-05, + "loss": 0.001, + "step": 21228 + }, + { + "epoch": 19.81, + "learning_rate": 4.752425373134329e-05, + "loss": 0.0075, + "step": 21232 + }, + { + "epoch": 19.81, + "learning_rate": 4.752378731343284e-05, + "loss": 0.0005, + "step": 21236 + }, + { + "epoch": 19.81, + "learning_rate": 4.7523320895522386e-05, + "loss": 0.0036, + "step": 21240 + }, + { + "epoch": 19.82, + "learning_rate": 4.752285447761195e-05, + "loss": 0.0015, + "step": 21244 + }, + { + "epoch": 19.82, + "learning_rate": 4.7522388059701496e-05, + "loss": 0.0062, + "step": 21248 + }, + { + "epoch": 19.82, + "learning_rate": 4.7521921641791044e-05, + "loss": 0.0002, + "step": 21252 + }, + { + "epoch": 19.83, + "learning_rate": 4.75214552238806e-05, + "loss": 0.0007, + "step": 21256 + }, + { + "epoch": 19.83, + "learning_rate": 4.7520988805970154e-05, + "loss": 0.0002, + "step": 21260 + }, + { + "epoch": 19.84, + "learning_rate": 4.75205223880597e-05, + "loss": 0.0016, + "step": 21264 + }, + { + "epoch": 19.84, + "learning_rate": 4.752005597014926e-05, + "loss": 0.0025, + "step": 21268 + }, + { + "epoch": 19.84, + "learning_rate": 4.7519589552238805e-05, + "loss": 0.0002, + "step": 21272 + }, + { + "epoch": 19.85, + "learning_rate": 4.751912313432836e-05, + "loss": 0.0004, + "step": 21276 + }, + { + "epoch": 19.85, + "learning_rate": 4.7518656716417915e-05, + "loss": 0.0004, + "step": 21280 + }, + { + "epoch": 19.85, + "learning_rate": 4.751819029850746e-05, + "loss": 0.0002, + "step": 21284 + }, + { + "epoch": 19.86, + "learning_rate": 4.751772388059702e-05, + "loss": 0.0125, + "step": 21288 + }, + { + "epoch": 19.86, + "learning_rate": 4.751725746268657e-05, + "loss": 0.0006, + "step": 21292 + }, + { + "epoch": 19.87, + "learning_rate": 4.751679104477612e-05, + "loss": 0.0034, + "step": 21296 + }, + { + "epoch": 19.87, + "learning_rate": 4.751632462686567e-05, + "loss": 0.011, + "step": 21300 + }, + { + "epoch": 19.87, + "learning_rate": 4.751585820895523e-05, + "loss": 0.0026, + "step": 21304 + }, + { + "epoch": 19.88, + "learning_rate": 4.751539179104478e-05, + "loss": 0.0015, + "step": 21308 + }, + { + "epoch": 19.88, + "learning_rate": 4.751492537313433e-05, + "loss": 0.0013, + "step": 21312 + }, + { + "epoch": 19.88, + "learning_rate": 4.751445895522388e-05, + "loss": 0.0002, + "step": 21316 + }, + { + "epoch": 19.89, + "learning_rate": 4.751399253731344e-05, + "loss": 0.0001, + "step": 21320 + }, + { + "epoch": 19.89, + "learning_rate": 4.7513526119402985e-05, + "loss": 0.0036, + "step": 21324 + }, + { + "epoch": 19.9, + "learning_rate": 4.751305970149254e-05, + "loss": 0.0013, + "step": 21328 + }, + { + "epoch": 19.9, + "learning_rate": 4.751259328358209e-05, + "loss": 0.0009, + "step": 21332 + }, + { + "epoch": 19.9, + "learning_rate": 4.751212686567165e-05, + "loss": 0.0024, + "step": 21336 + }, + { + "epoch": 19.91, + "learning_rate": 4.75116604477612e-05, + "loss": 0.008, + "step": 21340 + }, + { + "epoch": 19.91, + "learning_rate": 4.7511194029850746e-05, + "loss": 0.0018, + "step": 21344 + }, + { + "epoch": 19.91, + "learning_rate": 4.75107276119403e-05, + "loss": 0.0031, + "step": 21348 + }, + { + "epoch": 19.92, + "learning_rate": 4.7510261194029856e-05, + "loss": 0.0002, + "step": 21352 + }, + { + "epoch": 19.92, + "learning_rate": 4.7509794776119404e-05, + "loss": 0.0017, + "step": 21356 + }, + { + "epoch": 19.93, + "learning_rate": 4.750932835820896e-05, + "loss": 0.0004, + "step": 21360 + }, + { + "epoch": 19.93, + "learning_rate": 4.7508861940298514e-05, + "loss": 0.0022, + "step": 21364 + }, + { + "epoch": 19.93, + "learning_rate": 4.750839552238806e-05, + "loss": 0.0005, + "step": 21368 + }, + { + "epoch": 19.94, + "learning_rate": 4.750792910447762e-05, + "loss": 0.0028, + "step": 21372 + }, + { + "epoch": 19.94, + "learning_rate": 4.7507462686567165e-05, + "loss": 0.0007, + "step": 21376 + }, + { + "epoch": 19.94, + "learning_rate": 4.750699626865672e-05, + "loss": 0.0053, + "step": 21380 + }, + { + "epoch": 19.95, + "learning_rate": 4.7506529850746275e-05, + "loss": 0.0003, + "step": 21384 + }, + { + "epoch": 19.95, + "learning_rate": 4.750606343283582e-05, + "loss": 0.0004, + "step": 21388 + }, + { + "epoch": 19.96, + "learning_rate": 4.750559701492537e-05, + "loss": 0.0046, + "step": 21392 + }, + { + "epoch": 19.96, + "learning_rate": 4.750513059701493e-05, + "loss": 0.0003, + "step": 21396 + }, + { + "epoch": 19.96, + "learning_rate": 4.750466417910448e-05, + "loss": 0.0001, + "step": 21400 + }, + { + "epoch": 19.97, + "learning_rate": 4.750419776119403e-05, + "loss": 0.0012, + "step": 21404 + }, + { + "epoch": 19.97, + "learning_rate": 4.7503731343283584e-05, + "loss": 0.0019, + "step": 21408 + }, + { + "epoch": 19.97, + "learning_rate": 4.750326492537314e-05, + "loss": 0.0003, + "step": 21412 + }, + { + "epoch": 19.98, + "learning_rate": 4.750279850746269e-05, + "loss": 0.0007, + "step": 21416 + }, + { + "epoch": 19.98, + "learning_rate": 4.750233208955224e-05, + "loss": 0.0089, + "step": 21420 + }, + { + "epoch": 19.98, + "learning_rate": 4.750186567164179e-05, + "loss": 0.0011, + "step": 21424 + }, + { + "epoch": 19.99, + "learning_rate": 4.7501399253731345e-05, + "loss": 0.0011, + "step": 21428 + }, + { + "epoch": 19.99, + "learning_rate": 4.75009328358209e-05, + "loss": 0.001, + "step": 21432 + }, + { + "epoch": 20.0, + "learning_rate": 4.750046641791045e-05, + "loss": 0.0058, + "step": 21436 + }, + { + "epoch": 20.0, + "learning_rate": 4.75e-05, + "loss": 0.0003, + "step": 21440 + }, + { + "epoch": 20.0, + "learning_rate": 4.749953358208956e-05, + "loss": 0.0001, + "step": 21444 + }, + { + "epoch": 20.01, + "learning_rate": 4.7499067164179106e-05, + "loss": 0.0005, + "step": 21448 + }, + { + "epoch": 20.01, + "learning_rate": 4.7498600746268654e-05, + "loss": 0.0018, + "step": 21452 + }, + { + "epoch": 20.01, + "learning_rate": 4.7498134328358215e-05, + "loss": 0.0005, + "step": 21456 + }, + { + "epoch": 20.02, + "learning_rate": 4.7497667910447764e-05, + "loss": 0.0027, + "step": 21460 + }, + { + "epoch": 20.02, + "learning_rate": 4.749720149253731e-05, + "loss": 0.0005, + "step": 21464 + }, + { + "epoch": 20.03, + "learning_rate": 4.749673507462687e-05, + "loss": 0.0005, + "step": 21468 + }, + { + "epoch": 20.03, + "learning_rate": 4.749626865671642e-05, + "loss": 0.0014, + "step": 21472 + }, + { + "epoch": 20.03, + "learning_rate": 4.749580223880597e-05, + "loss": 0.001, + "step": 21476 + }, + { + "epoch": 20.04, + "learning_rate": 4.7495335820895525e-05, + "loss": 0.0003, + "step": 21480 + }, + { + "epoch": 20.04, + "learning_rate": 4.749486940298507e-05, + "loss": 0.0005, + "step": 21484 + }, + { + "epoch": 20.04, + "learning_rate": 4.749440298507463e-05, + "loss": 0.0003, + "step": 21488 + }, + { + "epoch": 20.05, + "learning_rate": 4.749393656716418e-05, + "loss": 0.0037, + "step": 21492 + }, + { + "epoch": 20.05, + "learning_rate": 4.749347014925373e-05, + "loss": 0.0005, + "step": 21496 + }, + { + "epoch": 20.06, + "learning_rate": 4.7493003731343286e-05, + "loss": 0.0023, + "step": 21500 + }, + { + "epoch": 20.06, + "eval_exact_match": 0.7224371373307543, + "eval_exec": 0.7562862669245648, + "eval_loss": 0.34417060017585754, + "eval_runtime": 1073.2583, + "eval_samples_per_second": 0.963, + "step": 21500 + }, + { + "epoch": 20.06, + "learning_rate": 4.749253731343284e-05, + "loss": 0.0007, + "step": 21504 + }, + { + "epoch": 20.06, + "learning_rate": 4.749207089552239e-05, + "loss": 0.0026, + "step": 21508 + }, + { + "epoch": 20.07, + "learning_rate": 4.7491604477611944e-05, + "loss": 0.0012, + "step": 21512 + }, + { + "epoch": 20.07, + "learning_rate": 4.74911380597015e-05, + "loss": 0.0002, + "step": 21516 + }, + { + "epoch": 20.07, + "learning_rate": 4.7490671641791047e-05, + "loss": 0.0004, + "step": 21520 + }, + { + "epoch": 20.08, + "learning_rate": 4.74902052238806e-05, + "loss": 0.009, + "step": 21524 + }, + { + "epoch": 20.08, + "learning_rate": 4.748973880597015e-05, + "loss": 0.0002, + "step": 21528 + }, + { + "epoch": 20.09, + "learning_rate": 4.7489272388059705e-05, + "loss": 0.0004, + "step": 21532 + }, + { + "epoch": 20.09, + "learning_rate": 4.748880597014926e-05, + "loss": 0.0026, + "step": 21536 + }, + { + "epoch": 20.09, + "learning_rate": 4.748833955223881e-05, + "loss": 0.0007, + "step": 21540 + }, + { + "epoch": 20.1, + "learning_rate": 4.7487873134328356e-05, + "loss": 0.0021, + "step": 21544 + }, + { + "epoch": 20.1, + "learning_rate": 4.748740671641792e-05, + "loss": 0.0003, + "step": 21548 + }, + { + "epoch": 20.1, + "learning_rate": 4.7486940298507465e-05, + "loss": 0.0002, + "step": 21552 + }, + { + "epoch": 20.11, + "learning_rate": 4.7486473880597014e-05, + "loss": 0.0034, + "step": 21556 + }, + { + "epoch": 20.11, + "learning_rate": 4.748600746268657e-05, + "loss": 0.0003, + "step": 21560 + }, + { + "epoch": 20.12, + "learning_rate": 4.7485541044776123e-05, + "loss": 0.0003, + "step": 21564 + }, + { + "epoch": 20.12, + "learning_rate": 4.748507462686567e-05, + "loss": 0.0048, + "step": 21568 + }, + { + "epoch": 20.12, + "learning_rate": 4.7484608208955226e-05, + "loss": 0.0008, + "step": 21572 + }, + { + "epoch": 20.13, + "learning_rate": 4.748414179104478e-05, + "loss": 0.002, + "step": 21576 + }, + { + "epoch": 20.13, + "learning_rate": 4.748367537313433e-05, + "loss": 0.0005, + "step": 21580 + }, + { + "epoch": 20.13, + "learning_rate": 4.7483208955223884e-05, + "loss": 0.0014, + "step": 21584 + }, + { + "epoch": 20.14, + "learning_rate": 4.748274253731343e-05, + "loss": 0.0035, + "step": 21588 + }, + { + "epoch": 20.14, + "learning_rate": 4.748227611940299e-05, + "loss": 0.001, + "step": 21592 + }, + { + "epoch": 20.15, + "learning_rate": 4.748180970149254e-05, + "loss": 0.0008, + "step": 21596 + }, + { + "epoch": 20.15, + "learning_rate": 4.748134328358209e-05, + "loss": 0.0169, + "step": 21600 + }, + { + "epoch": 20.15, + "learning_rate": 4.748087686567164e-05, + "loss": 0.0008, + "step": 21604 + }, + { + "epoch": 20.16, + "learning_rate": 4.74804104477612e-05, + "loss": 0.0003, + "step": 21608 + }, + { + "epoch": 20.16, + "learning_rate": 4.747994402985075e-05, + "loss": 0.0003, + "step": 21612 + }, + { + "epoch": 20.16, + "learning_rate": 4.7479477611940297e-05, + "loss": 0.0005, + "step": 21616 + }, + { + "epoch": 20.17, + "learning_rate": 4.747901119402985e-05, + "loss": 0.0, + "step": 21620 + }, + { + "epoch": 20.17, + "learning_rate": 4.7478544776119406e-05, + "loss": 0.0018, + "step": 21624 + }, + { + "epoch": 20.18, + "learning_rate": 4.7478078358208955e-05, + "loss": 0.0007, + "step": 21628 + }, + { + "epoch": 20.18, + "learning_rate": 4.747761194029851e-05, + "loss": 0.0013, + "step": 21632 + }, + { + "epoch": 20.18, + "learning_rate": 4.7477145522388064e-05, + "loss": 0.0001, + "step": 21636 + }, + { + "epoch": 20.19, + "learning_rate": 4.747667910447761e-05, + "loss": 0.0017, + "step": 21640 + }, + { + "epoch": 20.19, + "learning_rate": 4.747621268656717e-05, + "loss": 0.002, + "step": 21644 + }, + { + "epoch": 20.19, + "learning_rate": 4.7475746268656716e-05, + "loss": 0.0002, + "step": 21648 + }, + { + "epoch": 20.2, + "learning_rate": 4.747527985074627e-05, + "loss": 0.0025, + "step": 21652 + }, + { + "epoch": 20.2, + "learning_rate": 4.7474813432835825e-05, + "loss": 0.0003, + "step": 21656 + }, + { + "epoch": 20.21, + "learning_rate": 4.7474347014925373e-05, + "loss": 0.0018, + "step": 21660 + }, + { + "epoch": 20.21, + "learning_rate": 4.747388059701493e-05, + "loss": 0.0071, + "step": 21664 + }, + { + "epoch": 20.21, + "learning_rate": 4.747341417910448e-05, + "loss": 0.0013, + "step": 21668 + }, + { + "epoch": 20.22, + "learning_rate": 4.747294776119403e-05, + "loss": 0.0014, + "step": 21672 + }, + { + "epoch": 20.22, + "learning_rate": 4.7472481343283586e-05, + "loss": 0.0003, + "step": 21676 + }, + { + "epoch": 20.22, + "learning_rate": 4.7472014925373134e-05, + "loss": 0.0003, + "step": 21680 + }, + { + "epoch": 20.23, + "learning_rate": 4.747154850746269e-05, + "loss": 0.0034, + "step": 21684 + }, + { + "epoch": 20.23, + "learning_rate": 4.7471082089552244e-05, + "loss": 0.0001, + "step": 21688 + }, + { + "epoch": 20.24, + "learning_rate": 4.747061567164179e-05, + "loss": 0.0003, + "step": 21692 + }, + { + "epoch": 20.24, + "learning_rate": 4.747014925373135e-05, + "loss": 0.0041, + "step": 21696 + }, + { + "epoch": 20.24, + "learning_rate": 4.74696828358209e-05, + "loss": 0.0008, + "step": 21700 + }, + { + "epoch": 20.25, + "learning_rate": 4.746921641791045e-05, + "loss": 0.0004, + "step": 21704 + }, + { + "epoch": 20.25, + "learning_rate": 4.746875e-05, + "loss": 0.0005, + "step": 21708 + }, + { + "epoch": 20.25, + "learning_rate": 4.746828358208955e-05, + "loss": 0.0003, + "step": 21712 + }, + { + "epoch": 20.26, + "learning_rate": 4.746781716417911e-05, + "loss": 0.0005, + "step": 21716 + }, + { + "epoch": 20.26, + "learning_rate": 4.7467350746268656e-05, + "loss": 0.0001, + "step": 21720 + }, + { + "epoch": 20.26, + "learning_rate": 4.746688432835821e-05, + "loss": 0.0039, + "step": 21724 + }, + { + "epoch": 20.27, + "learning_rate": 4.7466417910447766e-05, + "loss": 0.0005, + "step": 21728 + }, + { + "epoch": 20.27, + "learning_rate": 4.7465951492537314e-05, + "loss": 0.0046, + "step": 21732 + }, + { + "epoch": 20.28, + "learning_rate": 4.746548507462687e-05, + "loss": 0.0003, + "step": 21736 + }, + { + "epoch": 20.28, + "learning_rate": 4.746501865671642e-05, + "loss": 0.0025, + "step": 21740 + }, + { + "epoch": 20.28, + "learning_rate": 4.746455223880597e-05, + "loss": 0.0016, + "step": 21744 + }, + { + "epoch": 20.29, + "learning_rate": 4.746408582089553e-05, + "loss": 0.0019, + "step": 21748 + }, + { + "epoch": 20.29, + "learning_rate": 4.7463619402985075e-05, + "loss": 0.0016, + "step": 21752 + }, + { + "epoch": 20.29, + "learning_rate": 4.746315298507463e-05, + "loss": 0.0004, + "step": 21756 + }, + { + "epoch": 20.3, + "learning_rate": 4.7462686567164185e-05, + "loss": 0.0004, + "step": 21760 + }, + { + "epoch": 20.3, + "learning_rate": 4.746222014925373e-05, + "loss": 0.0007, + "step": 21764 + }, + { + "epoch": 20.31, + "learning_rate": 4.746175373134328e-05, + "loss": 0.0002, + "step": 21768 + }, + { + "epoch": 20.31, + "learning_rate": 4.7461287313432836e-05, + "loss": 0.0006, + "step": 21772 + }, + { + "epoch": 20.31, + "learning_rate": 4.746082089552239e-05, + "loss": 0.0003, + "step": 21776 + }, + { + "epoch": 20.32, + "learning_rate": 4.746035447761194e-05, + "loss": 0.0005, + "step": 21780 + }, + { + "epoch": 20.32, + "learning_rate": 4.7459888059701494e-05, + "loss": 0.0007, + "step": 21784 + }, + { + "epoch": 20.32, + "learning_rate": 4.745942164179105e-05, + "loss": 0.0005, + "step": 21788 + }, + { + "epoch": 20.33, + "learning_rate": 4.74589552238806e-05, + "loss": 0.0005, + "step": 21792 + }, + { + "epoch": 20.33, + "learning_rate": 4.745848880597015e-05, + "loss": 0.0004, + "step": 21796 + }, + { + "epoch": 20.34, + "learning_rate": 4.74580223880597e-05, + "loss": 0.0003, + "step": 21800 + }, + { + "epoch": 20.34, + "learning_rate": 4.7457555970149255e-05, + "loss": 0.0032, + "step": 21804 + }, + { + "epoch": 20.34, + "learning_rate": 4.745708955223881e-05, + "loss": 0.0004, + "step": 21808 + }, + { + "epoch": 20.35, + "learning_rate": 4.745662313432836e-05, + "loss": 0.0013, + "step": 21812 + }, + { + "epoch": 20.35, + "learning_rate": 4.745615671641791e-05, + "loss": 0.0008, + "step": 21816 + }, + { + "epoch": 20.35, + "learning_rate": 4.745569029850747e-05, + "loss": 0.0047, + "step": 21820 + }, + { + "epoch": 20.36, + "learning_rate": 4.7455223880597016e-05, + "loss": 0.0001, + "step": 21824 + }, + { + "epoch": 20.36, + "learning_rate": 4.745475746268657e-05, + "loss": 0.0012, + "step": 21828 + }, + { + "epoch": 20.37, + "learning_rate": 4.745429104477612e-05, + "loss": 0.0035, + "step": 21832 + }, + { + "epoch": 20.37, + "learning_rate": 4.7453824626865674e-05, + "loss": 0.0052, + "step": 21836 + }, + { + "epoch": 20.37, + "learning_rate": 4.745335820895523e-05, + "loss": 0.0006, + "step": 21840 + }, + { + "epoch": 20.38, + "learning_rate": 4.745289179104478e-05, + "loss": 0.002, + "step": 21844 + }, + { + "epoch": 20.38, + "learning_rate": 4.745242537313433e-05, + "loss": 0.0063, + "step": 21848 + }, + { + "epoch": 20.38, + "learning_rate": 4.745195895522389e-05, + "loss": 0.0005, + "step": 21852 + }, + { + "epoch": 20.39, + "learning_rate": 4.7451492537313435e-05, + "loss": 0.0214, + "step": 21856 + }, + { + "epoch": 20.39, + "learning_rate": 4.745102611940298e-05, + "loss": 0.0038, + "step": 21860 + }, + { + "epoch": 20.4, + "learning_rate": 4.7450559701492545e-05, + "loss": 0.0043, + "step": 21864 + }, + { + "epoch": 20.4, + "learning_rate": 4.745009328358209e-05, + "loss": 0.0023, + "step": 21868 + }, + { + "epoch": 20.4, + "learning_rate": 4.744962686567164e-05, + "loss": 0.0001, + "step": 21872 + }, + { + "epoch": 20.41, + "learning_rate": 4.7449160447761196e-05, + "loss": 0.0002, + "step": 21876 + }, + { + "epoch": 20.41, + "learning_rate": 4.744869402985075e-05, + "loss": 0.0021, + "step": 21880 + }, + { + "epoch": 20.41, + "learning_rate": 4.74482276119403e-05, + "loss": 0.0018, + "step": 21884 + }, + { + "epoch": 20.42, + "learning_rate": 4.7447761194029854e-05, + "loss": 0.0012, + "step": 21888 + }, + { + "epoch": 20.42, + "learning_rate": 4.74472947761194e-05, + "loss": 0.0003, + "step": 21892 + }, + { + "epoch": 20.43, + "learning_rate": 4.744682835820896e-05, + "loss": 0.0016, + "step": 21896 + }, + { + "epoch": 20.43, + "learning_rate": 4.744636194029851e-05, + "loss": 0.0029, + "step": 21900 + }, + { + "epoch": 20.43, + "learning_rate": 4.744589552238806e-05, + "loss": 0.0005, + "step": 21904 + }, + { + "epoch": 20.44, + "learning_rate": 4.7445429104477615e-05, + "loss": 0.0028, + "step": 21908 + }, + { + "epoch": 20.44, + "learning_rate": 4.744496268656717e-05, + "loss": 0.0006, + "step": 21912 + }, + { + "epoch": 20.44, + "learning_rate": 4.744449626865672e-05, + "loss": 0.0002, + "step": 21916 + }, + { + "epoch": 20.45, + "learning_rate": 4.7444029850746266e-05, + "loss": 0.0009, + "step": 21920 + }, + { + "epoch": 20.45, + "learning_rate": 4.744356343283583e-05, + "loss": 0.0019, + "step": 21924 + }, + { + "epoch": 20.46, + "learning_rate": 4.7443097014925376e-05, + "loss": 0.0005, + "step": 21928 + }, + { + "epoch": 20.46, + "learning_rate": 4.7442630597014924e-05, + "loss": 0.0018, + "step": 21932 + }, + { + "epoch": 20.46, + "learning_rate": 4.744216417910448e-05, + "loss": 0.0024, + "step": 21936 + }, + { + "epoch": 20.47, + "learning_rate": 4.7441697761194034e-05, + "loss": 0.0007, + "step": 21940 + }, + { + "epoch": 20.47, + "learning_rate": 4.744123134328358e-05, + "loss": 0.0129, + "step": 21944 + }, + { + "epoch": 20.47, + "learning_rate": 4.744076492537314e-05, + "loss": 0.005, + "step": 21948 + }, + { + "epoch": 20.48, + "learning_rate": 4.7440298507462685e-05, + "loss": 0.0038, + "step": 21952 + }, + { + "epoch": 20.48, + "learning_rate": 4.743983208955224e-05, + "loss": 0.0001, + "step": 21956 + }, + { + "epoch": 20.49, + "learning_rate": 4.7439365671641795e-05, + "loss": 0.0003, + "step": 21960 + }, + { + "epoch": 20.49, + "learning_rate": 4.743889925373134e-05, + "loss": 0.0015, + "step": 21964 + }, + { + "epoch": 20.49, + "learning_rate": 4.74384328358209e-05, + "loss": 0.0002, + "step": 21968 + }, + { + "epoch": 20.5, + "learning_rate": 4.743796641791045e-05, + "loss": 0.0009, + "step": 21972 + }, + { + "epoch": 20.5, + "learning_rate": 4.74375e-05, + "loss": 0.0006, + "step": 21976 + }, + { + "epoch": 20.5, + "learning_rate": 4.7437033582089556e-05, + "loss": 0.0001, + "step": 21980 + }, + { + "epoch": 20.51, + "learning_rate": 4.743656716417911e-05, + "loss": 0.0005, + "step": 21984 + }, + { + "epoch": 20.51, + "learning_rate": 4.743610074626866e-05, + "loss": 0.0021, + "step": 21988 + }, + { + "epoch": 20.51, + "learning_rate": 4.7435634328358214e-05, + "loss": 0.0008, + "step": 21992 + }, + { + "epoch": 20.52, + "learning_rate": 4.743516791044776e-05, + "loss": 0.0022, + "step": 21996 + }, + { + "epoch": 20.52, + "learning_rate": 4.743470149253732e-05, + "loss": 0.009, + "step": 22000 + }, + { + "epoch": 20.52, + "eval_exact_match": 0.718568665377176, + "eval_exec": 0.746615087040619, + "eval_loss": 0.36730751395225525, + "eval_runtime": 1232.9452, + "eval_samples_per_second": 0.839, + "step": 22000 + }, + { + "epoch": 20.53, + "learning_rate": 4.743423507462687e-05, + "loss": 0.0004, + "step": 22004 + }, + { + "epoch": 20.53, + "learning_rate": 4.743376865671642e-05, + "loss": 0.0028, + "step": 22008 + }, + { + "epoch": 20.53, + "learning_rate": 4.743330223880597e-05, + "loss": 0.0001, + "step": 22012 + }, + { + "epoch": 20.54, + "learning_rate": 4.743283582089553e-05, + "loss": 0.0002, + "step": 22016 + }, + { + "epoch": 20.54, + "learning_rate": 4.743236940298508e-05, + "loss": 0.0049, + "step": 22020 + }, + { + "epoch": 20.54, + "learning_rate": 4.7431902985074626e-05, + "loss": 0.0001, + "step": 22024 + }, + { + "epoch": 20.55, + "learning_rate": 4.743143656716418e-05, + "loss": 0.0003, + "step": 22028 + }, + { + "epoch": 20.55, + "learning_rate": 4.7430970149253736e-05, + "loss": 0.0003, + "step": 22032 + }, + { + "epoch": 20.56, + "learning_rate": 4.7430503731343284e-05, + "loss": 0.0009, + "step": 22036 + }, + { + "epoch": 20.56, + "learning_rate": 4.743003731343284e-05, + "loss": 0.0002, + "step": 22040 + }, + { + "epoch": 20.56, + "learning_rate": 4.7429570895522394e-05, + "loss": 0.0001, + "step": 22044 + }, + { + "epoch": 20.57, + "learning_rate": 4.742910447761194e-05, + "loss": 0.0001, + "step": 22048 + }, + { + "epoch": 20.57, + "learning_rate": 4.74286380597015e-05, + "loss": 0.0337, + "step": 22052 + }, + { + "epoch": 20.57, + "learning_rate": 4.7428171641791045e-05, + "loss": 0.0002, + "step": 22056 + }, + { + "epoch": 20.58, + "learning_rate": 4.74277052238806e-05, + "loss": 0.0001, + "step": 22060 + }, + { + "epoch": 20.58, + "learning_rate": 4.7427238805970155e-05, + "loss": 0.0001, + "step": 22064 + }, + { + "epoch": 20.59, + "learning_rate": 4.74267723880597e-05, + "loss": 0.0031, + "step": 22068 + }, + { + "epoch": 20.59, + "learning_rate": 4.742630597014925e-05, + "loss": 0.0008, + "step": 22072 + }, + { + "epoch": 20.59, + "learning_rate": 4.742583955223881e-05, + "loss": 0.0003, + "step": 22076 + }, + { + "epoch": 20.6, + "learning_rate": 4.742537313432836e-05, + "loss": 0.0011, + "step": 22080 + }, + { + "epoch": 20.6, + "learning_rate": 4.742490671641791e-05, + "loss": 0.0006, + "step": 22084 + }, + { + "epoch": 20.6, + "learning_rate": 4.7424440298507464e-05, + "loss": 0.0001, + "step": 22088 + }, + { + "epoch": 20.61, + "learning_rate": 4.742397388059702e-05, + "loss": 0.0025, + "step": 22092 + }, + { + "epoch": 20.61, + "learning_rate": 4.742350746268657e-05, + "loss": 0.0034, + "step": 22096 + }, + { + "epoch": 20.62, + "learning_rate": 4.742304104477612e-05, + "loss": 0.0001, + "step": 22100 + }, + { + "epoch": 20.62, + "learning_rate": 4.742257462686567e-05, + "loss": 0.0001, + "step": 22104 + }, + { + "epoch": 20.62, + "learning_rate": 4.7422108208955225e-05, + "loss": 0.0015, + "step": 22108 + }, + { + "epoch": 20.63, + "learning_rate": 4.742164179104478e-05, + "loss": 0.0001, + "step": 22112 + }, + { + "epoch": 20.63, + "learning_rate": 4.742117537313433e-05, + "loss": 0.0002, + "step": 22116 + }, + { + "epoch": 20.63, + "learning_rate": 4.742070895522388e-05, + "loss": 0.0075, + "step": 22120 + }, + { + "epoch": 20.64, + "learning_rate": 4.742024253731344e-05, + "loss": 0.0001, + "step": 22124 + }, + { + "epoch": 20.64, + "learning_rate": 4.7419776119402986e-05, + "loss": 0.0042, + "step": 22128 + }, + { + "epoch": 20.65, + "learning_rate": 4.7419309701492534e-05, + "loss": 0.0004, + "step": 22132 + }, + { + "epoch": 20.65, + "learning_rate": 4.7418843283582096e-05, + "loss": 0.0016, + "step": 22136 + }, + { + "epoch": 20.65, + "learning_rate": 4.7418376865671644e-05, + "loss": 0.0008, + "step": 22140 + }, + { + "epoch": 20.66, + "learning_rate": 4.74179104477612e-05, + "loss": 0.0014, + "step": 22144 + }, + { + "epoch": 20.66, + "learning_rate": 4.741744402985075e-05, + "loss": 0.0001, + "step": 22148 + }, + { + "epoch": 20.66, + "learning_rate": 4.74169776119403e-05, + "loss": 0.0036, + "step": 22152 + }, + { + "epoch": 20.67, + "learning_rate": 4.741651119402986e-05, + "loss": 0.0004, + "step": 22156 + }, + { + "epoch": 20.67, + "learning_rate": 4.7416044776119405e-05, + "loss": 0.0022, + "step": 22160 + }, + { + "epoch": 20.68, + "learning_rate": 4.741557835820895e-05, + "loss": 0.0009, + "step": 22164 + }, + { + "epoch": 20.68, + "learning_rate": 4.7415111940298515e-05, + "loss": 0.0004, + "step": 22168 + }, + { + "epoch": 20.68, + "learning_rate": 4.741464552238806e-05, + "loss": 0.0057, + "step": 22172 + }, + { + "epoch": 20.69, + "learning_rate": 4.741417910447761e-05, + "loss": 0.0005, + "step": 22176 + }, + { + "epoch": 20.69, + "learning_rate": 4.7413712686567166e-05, + "loss": 0.0015, + "step": 22180 + }, + { + "epoch": 20.69, + "learning_rate": 4.741324626865672e-05, + "loss": 0.0002, + "step": 22184 + }, + { + "epoch": 20.7, + "learning_rate": 4.741277985074627e-05, + "loss": 0.0002, + "step": 22188 + }, + { + "epoch": 20.7, + "learning_rate": 4.7412313432835824e-05, + "loss": 0.0003, + "step": 22192 + }, + { + "epoch": 20.71, + "learning_rate": 4.741184701492538e-05, + "loss": 0.0003, + "step": 22196 + }, + { + "epoch": 20.71, + "learning_rate": 4.741138059701493e-05, + "loss": 0.0023, + "step": 22200 + }, + { + "epoch": 20.71, + "learning_rate": 4.741091417910448e-05, + "loss": 0.0016, + "step": 22204 + }, + { + "epoch": 20.72, + "learning_rate": 4.741044776119403e-05, + "loss": 0.0012, + "step": 22208 + }, + { + "epoch": 20.72, + "learning_rate": 4.7409981343283585e-05, + "loss": 0.0025, + "step": 22212 + }, + { + "epoch": 20.72, + "learning_rate": 4.740951492537314e-05, + "loss": 0.0003, + "step": 22216 + }, + { + "epoch": 20.73, + "learning_rate": 4.740904850746269e-05, + "loss": 0.0009, + "step": 22220 + }, + { + "epoch": 20.73, + "learning_rate": 4.7408582089552236e-05, + "loss": 0.0008, + "step": 22224 + }, + { + "epoch": 20.73, + "learning_rate": 4.74081156716418e-05, + "loss": 0.004, + "step": 22228 + }, + { + "epoch": 20.74, + "learning_rate": 4.7407649253731346e-05, + "loss": 0.0004, + "step": 22232 + }, + { + "epoch": 20.74, + "learning_rate": 4.7407182835820894e-05, + "loss": 0.0026, + "step": 22236 + }, + { + "epoch": 20.75, + "learning_rate": 4.740671641791045e-05, + "loss": 0.0016, + "step": 22240 + }, + { + "epoch": 20.75, + "learning_rate": 4.7406250000000004e-05, + "loss": 0.0011, + "step": 22244 + }, + { + "epoch": 20.75, + "learning_rate": 4.740578358208955e-05, + "loss": 0.0045, + "step": 22248 + }, + { + "epoch": 20.76, + "learning_rate": 4.740531716417911e-05, + "loss": 0.0007, + "step": 22252 + }, + { + "epoch": 20.76, + "learning_rate": 4.740485074626866e-05, + "loss": 0.0013, + "step": 22256 + }, + { + "epoch": 20.76, + "learning_rate": 4.740438432835821e-05, + "loss": 0.0009, + "step": 22260 + }, + { + "epoch": 20.77, + "learning_rate": 4.7403917910447765e-05, + "loss": 0.0041, + "step": 22264 + }, + { + "epoch": 20.77, + "learning_rate": 4.740345149253731e-05, + "loss": 0.0016, + "step": 22268 + }, + { + "epoch": 20.78, + "learning_rate": 4.740298507462687e-05, + "loss": 0.0008, + "step": 22272 + }, + { + "epoch": 20.78, + "learning_rate": 4.740251865671642e-05, + "loss": 0.0174, + "step": 22276 + }, + { + "epoch": 20.78, + "learning_rate": 4.740205223880597e-05, + "loss": 0.0067, + "step": 22280 + }, + { + "epoch": 20.79, + "learning_rate": 4.740158582089552e-05, + "loss": 0.0009, + "step": 22284 + }, + { + "epoch": 20.79, + "learning_rate": 4.740111940298508e-05, + "loss": 0.0, + "step": 22288 + }, + { + "epoch": 20.79, + "learning_rate": 4.740065298507463e-05, + "loss": 0.0009, + "step": 22292 + }, + { + "epoch": 20.8, + "learning_rate": 4.740018656716418e-05, + "loss": 0.0014, + "step": 22296 + }, + { + "epoch": 20.8, + "learning_rate": 4.739972014925373e-05, + "loss": 0.0009, + "step": 22300 + }, + { + "epoch": 20.81, + "learning_rate": 4.739925373134329e-05, + "loss": 0.0001, + "step": 22304 + }, + { + "epoch": 20.81, + "learning_rate": 4.739878731343284e-05, + "loss": 0.0046, + "step": 22308 + }, + { + "epoch": 20.81, + "learning_rate": 4.739832089552239e-05, + "loss": 0.0004, + "step": 22312 + }, + { + "epoch": 20.82, + "learning_rate": 4.7397854477611945e-05, + "loss": 0.0076, + "step": 22316 + }, + { + "epoch": 20.82, + "learning_rate": 4.73973880597015e-05, + "loss": 0.0117, + "step": 22320 + }, + { + "epoch": 20.82, + "learning_rate": 4.739692164179105e-05, + "loss": 0.0002, + "step": 22324 + }, + { + "epoch": 20.83, + "learning_rate": 4.7396455223880596e-05, + "loss": 0.0009, + "step": 22328 + }, + { + "epoch": 20.83, + "learning_rate": 4.739598880597015e-05, + "loss": 0.0028, + "step": 22332 + }, + { + "epoch": 20.84, + "learning_rate": 4.7395522388059706e-05, + "loss": 0.0009, + "step": 22336 + }, + { + "epoch": 20.84, + "learning_rate": 4.7395055970149254e-05, + "loss": 0.0007, + "step": 22340 + }, + { + "epoch": 20.84, + "learning_rate": 4.739458955223881e-05, + "loss": 0.001, + "step": 22344 + }, + { + "epoch": 20.85, + "learning_rate": 4.7394123134328364e-05, + "loss": 0.0058, + "step": 22348 + }, + { + "epoch": 20.85, + "learning_rate": 4.739365671641791e-05, + "loss": 0.0018, + "step": 22352 + }, + { + "epoch": 20.85, + "learning_rate": 4.7393190298507467e-05, + "loss": 0.0022, + "step": 22356 + }, + { + "epoch": 20.86, + "learning_rate": 4.7392723880597015e-05, + "loss": 0.0006, + "step": 22360 + }, + { + "epoch": 20.86, + "learning_rate": 4.739225746268657e-05, + "loss": 0.0006, + "step": 22364 + }, + { + "epoch": 20.87, + "learning_rate": 4.7391791044776125e-05, + "loss": 0.0003, + "step": 22368 + }, + { + "epoch": 20.87, + "learning_rate": 4.739132462686567e-05, + "loss": 0.0001, + "step": 22372 + }, + { + "epoch": 20.87, + "learning_rate": 4.739085820895523e-05, + "loss": 0.0002, + "step": 22376 + }, + { + "epoch": 20.88, + "learning_rate": 4.739039179104478e-05, + "loss": 0.0007, + "step": 22380 + }, + { + "epoch": 20.88, + "learning_rate": 4.738992537313433e-05, + "loss": 0.0015, + "step": 22384 + }, + { + "epoch": 20.88, + "learning_rate": 4.738945895522388e-05, + "loss": 0.0007, + "step": 22388 + }, + { + "epoch": 20.89, + "learning_rate": 4.7388992537313434e-05, + "loss": 0.0003, + "step": 22392 + }, + { + "epoch": 20.89, + "learning_rate": 4.738852611940299e-05, + "loss": 0.0024, + "step": 22396 + }, + { + "epoch": 20.9, + "learning_rate": 4.738805970149254e-05, + "loss": 0.0006, + "step": 22400 + }, + { + "epoch": 20.9, + "learning_rate": 4.738759328358209e-05, + "loss": 0.0005, + "step": 22404 + }, + { + "epoch": 20.9, + "learning_rate": 4.7387126865671647e-05, + "loss": 0.0026, + "step": 22408 + }, + { + "epoch": 20.91, + "learning_rate": 4.7386660447761195e-05, + "loss": 0.0015, + "step": 22412 + }, + { + "epoch": 20.91, + "learning_rate": 4.738619402985075e-05, + "loss": 0.0008, + "step": 22416 + }, + { + "epoch": 20.91, + "learning_rate": 4.73857276119403e-05, + "loss": 0.0009, + "step": 22420 + }, + { + "epoch": 20.92, + "learning_rate": 4.738526119402985e-05, + "loss": 0.0006, + "step": 22424 + }, + { + "epoch": 20.92, + "learning_rate": 4.738479477611941e-05, + "loss": 0.0038, + "step": 22428 + }, + { + "epoch": 20.93, + "learning_rate": 4.7384328358208956e-05, + "loss": 0.0001, + "step": 22432 + }, + { + "epoch": 20.93, + "learning_rate": 4.738386194029851e-05, + "loss": 0.0005, + "step": 22436 + }, + { + "epoch": 20.93, + "learning_rate": 4.7383395522388065e-05, + "loss": 0.0003, + "step": 22440 + }, + { + "epoch": 20.94, + "learning_rate": 4.7382929104477614e-05, + "loss": 0.0004, + "step": 22444 + }, + { + "epoch": 20.94, + "learning_rate": 4.738246268656716e-05, + "loss": 0.0036, + "step": 22448 + }, + { + "epoch": 20.94, + "learning_rate": 4.738199626865672e-05, + "loss": 0.0002, + "step": 22452 + }, + { + "epoch": 20.95, + "learning_rate": 4.738152985074627e-05, + "loss": 0.0025, + "step": 22456 + }, + { + "epoch": 20.95, + "learning_rate": 4.738106343283582e-05, + "loss": 0.0004, + "step": 22460 + }, + { + "epoch": 20.96, + "learning_rate": 4.7380597014925375e-05, + "loss": 0.0019, + "step": 22464 + }, + { + "epoch": 20.96, + "learning_rate": 4.738013059701493e-05, + "loss": 0.005, + "step": 22468 + }, + { + "epoch": 20.96, + "learning_rate": 4.7379664179104484e-05, + "loss": 0.0005, + "step": 22472 + }, + { + "epoch": 20.97, + "learning_rate": 4.737919776119403e-05, + "loss": 0.0002, + "step": 22476 + }, + { + "epoch": 20.97, + "learning_rate": 4.737873134328358e-05, + "loss": 0.0046, + "step": 22480 + }, + { + "epoch": 20.97, + "learning_rate": 4.737826492537314e-05, + "loss": 0.0005, + "step": 22484 + }, + { + "epoch": 20.98, + "learning_rate": 4.737779850746269e-05, + "loss": 0.0011, + "step": 22488 + }, + { + "epoch": 20.98, + "learning_rate": 4.737733208955224e-05, + "loss": 0.0024, + "step": 22492 + }, + { + "epoch": 20.98, + "learning_rate": 4.7376865671641793e-05, + "loss": 0.001, + "step": 22496 + }, + { + "epoch": 20.99, + "learning_rate": 4.737639925373135e-05, + "loss": 0.0001, + "step": 22500 + }, + { + "epoch": 20.99, + "eval_exact_match": 0.7321083172147002, + "eval_exec": 0.7669245647969052, + "eval_loss": 0.3726440668106079, + "eval_runtime": 1238.1908, + "eval_samples_per_second": 0.835, + "step": 22500 + }, + { + "epoch": 20.99, + "learning_rate": 4.7375932835820897e-05, + "loss": 0.0001, + "step": 22504 + }, + { + "epoch": 21.0, + "learning_rate": 4.737546641791045e-05, + "loss": 0.0035, + "step": 22508 + }, + { + "epoch": 21.0, + "learning_rate": 4.7375e-05, + "loss": 0.0003, + "step": 22512 + }, + { + "epoch": 21.0, + "learning_rate": 4.7374533582089554e-05, + "loss": 0.0002, + "step": 22516 + }, + { + "epoch": 21.01, + "learning_rate": 4.737406716417911e-05, + "loss": 0.0005, + "step": 22520 + }, + { + "epoch": 21.01, + "learning_rate": 4.737360074626866e-05, + "loss": 0.006, + "step": 22524 + }, + { + "epoch": 21.01, + "learning_rate": 4.737313432835821e-05, + "loss": 0.0019, + "step": 22528 + }, + { + "epoch": 21.02, + "learning_rate": 4.737266791044777e-05, + "loss": 0.0008, + "step": 22532 + }, + { + "epoch": 21.02, + "learning_rate": 4.7372201492537315e-05, + "loss": 0.002, + "step": 22536 + }, + { + "epoch": 21.03, + "learning_rate": 4.7371735074626864e-05, + "loss": 0.0002, + "step": 22540 + }, + { + "epoch": 21.03, + "learning_rate": 4.7371268656716425e-05, + "loss": 0.0007, + "step": 22544 + }, + { + "epoch": 21.03, + "learning_rate": 4.7370802238805973e-05, + "loss": 0.0004, + "step": 22548 + }, + { + "epoch": 21.04, + "learning_rate": 4.737033582089552e-05, + "loss": 0.0002, + "step": 22552 + }, + { + "epoch": 21.04, + "learning_rate": 4.7369869402985076e-05, + "loss": 0.0005, + "step": 22556 + }, + { + "epoch": 21.04, + "learning_rate": 4.736940298507463e-05, + "loss": 0.0001, + "step": 22560 + }, + { + "epoch": 21.05, + "learning_rate": 4.736893656716418e-05, + "loss": 0.0003, + "step": 22564 + }, + { + "epoch": 21.05, + "learning_rate": 4.7368470149253734e-05, + "loss": 0.0008, + "step": 22568 + }, + { + "epoch": 21.06, + "learning_rate": 4.736800373134328e-05, + "loss": 0.0027, + "step": 22572 + }, + { + "epoch": 21.06, + "learning_rate": 4.736753731343284e-05, + "loss": 0.0021, + "step": 22576 + }, + { + "epoch": 21.06, + "learning_rate": 4.736707089552239e-05, + "loss": 0.0071, + "step": 22580 + }, + { + "epoch": 21.07, + "learning_rate": 4.736660447761194e-05, + "loss": 0.001, + "step": 22584 + }, + { + "epoch": 21.07, + "learning_rate": 4.7366138059701495e-05, + "loss": 0.0019, + "step": 22588 + }, + { + "epoch": 21.07, + "learning_rate": 4.736567164179105e-05, + "loss": 0.0006, + "step": 22592 + }, + { + "epoch": 21.08, + "learning_rate": 4.73652052238806e-05, + "loss": 0.0009, + "step": 22596 + }, + { + "epoch": 21.08, + "learning_rate": 4.7364738805970147e-05, + "loss": 0.0016, + "step": 22600 + }, + { + "epoch": 21.09, + "learning_rate": 4.736427238805971e-05, + "loss": 0.0001, + "step": 22604 + }, + { + "epoch": 21.09, + "learning_rate": 4.7363805970149256e-05, + "loss": 0.0004, + "step": 22608 + }, + { + "epoch": 21.09, + "learning_rate": 4.7363339552238805e-05, + "loss": 0.0074, + "step": 22612 + }, + { + "epoch": 21.1, + "learning_rate": 4.736287313432836e-05, + "loss": 0.0042, + "step": 22616 + }, + { + "epoch": 21.1, + "learning_rate": 4.7362406716417914e-05, + "loss": 0.0001, + "step": 22620 + }, + { + "epoch": 21.1, + "learning_rate": 4.736194029850746e-05, + "loss": 0.0036, + "step": 22624 + }, + { + "epoch": 21.11, + "learning_rate": 4.736147388059702e-05, + "loss": 0.0009, + "step": 22628 + }, + { + "epoch": 21.11, + "learning_rate": 4.7361007462686565e-05, + "loss": 0.0002, + "step": 22632 + }, + { + "epoch": 21.12, + "learning_rate": 4.736054104477613e-05, + "loss": 0.0, + "step": 22636 + }, + { + "epoch": 21.12, + "learning_rate": 4.7360074626865675e-05, + "loss": 0.0076, + "step": 22640 + }, + { + "epoch": 21.12, + "learning_rate": 4.7359608208955223e-05, + "loss": 0.0015, + "step": 22644 + }, + { + "epoch": 21.13, + "learning_rate": 4.735914179104478e-05, + "loss": 0.003, + "step": 22648 + }, + { + "epoch": 21.13, + "learning_rate": 4.735867537313433e-05, + "loss": 0.0014, + "step": 22652 + }, + { + "epoch": 21.13, + "learning_rate": 4.735820895522388e-05, + "loss": 0.0033, + "step": 22656 + }, + { + "epoch": 21.14, + "learning_rate": 4.7357742537313436e-05, + "loss": 0.0001, + "step": 22660 + }, + { + "epoch": 21.14, + "learning_rate": 4.735727611940299e-05, + "loss": 0.0011, + "step": 22664 + }, + { + "epoch": 21.15, + "learning_rate": 4.735680970149254e-05, + "loss": 0.0007, + "step": 22668 + }, + { + "epoch": 21.15, + "learning_rate": 4.7356343283582094e-05, + "loss": 0.0007, + "step": 22672 + }, + { + "epoch": 21.15, + "learning_rate": 4.735587686567164e-05, + "loss": 0.0041, + "step": 22676 + }, + { + "epoch": 21.16, + "learning_rate": 4.73554104477612e-05, + "loss": 0.0011, + "step": 22680 + }, + { + "epoch": 21.16, + "learning_rate": 4.735494402985075e-05, + "loss": 0.0036, + "step": 22684 + }, + { + "epoch": 21.16, + "learning_rate": 4.73544776119403e-05, + "loss": 0.0049, + "step": 22688 + }, + { + "epoch": 21.17, + "learning_rate": 4.735401119402985e-05, + "loss": 0.0011, + "step": 22692 + }, + { + "epoch": 21.17, + "learning_rate": 4.735354477611941e-05, + "loss": 0.0011, + "step": 22696 + }, + { + "epoch": 21.18, + "learning_rate": 4.735307835820896e-05, + "loss": 0.0031, + "step": 22700 + }, + { + "epoch": 21.18, + "learning_rate": 4.7352611940298506e-05, + "loss": 0.0002, + "step": 22704 + }, + { + "epoch": 21.18, + "learning_rate": 4.735214552238806e-05, + "loss": 0.0002, + "step": 22708 + }, + { + "epoch": 21.19, + "learning_rate": 4.7351679104477616e-05, + "loss": 0.0005, + "step": 22712 + }, + { + "epoch": 21.19, + "learning_rate": 4.7351212686567164e-05, + "loss": 0.0044, + "step": 22716 + }, + { + "epoch": 21.19, + "learning_rate": 4.735074626865672e-05, + "loss": 0.0009, + "step": 22720 + }, + { + "epoch": 21.2, + "learning_rate": 4.7350279850746274e-05, + "loss": 0.0002, + "step": 22724 + }, + { + "epoch": 21.2, + "learning_rate": 4.734981343283582e-05, + "loss": 0.0003, + "step": 22728 + }, + { + "epoch": 21.21, + "learning_rate": 4.734934701492538e-05, + "loss": 0.0001, + "step": 22732 + }, + { + "epoch": 21.21, + "learning_rate": 4.7348880597014925e-05, + "loss": 0.0013, + "step": 22736 + }, + { + "epoch": 21.21, + "learning_rate": 4.734841417910448e-05, + "loss": 0.0047, + "step": 22740 + }, + { + "epoch": 21.22, + "learning_rate": 4.7347947761194035e-05, + "loss": 0.0031, + "step": 22744 + }, + { + "epoch": 21.22, + "learning_rate": 4.734748134328358e-05, + "loss": 0.0044, + "step": 22748 + }, + { + "epoch": 21.22, + "learning_rate": 4.734701492537313e-05, + "loss": 0.0001, + "step": 22752 + }, + { + "epoch": 21.23, + "learning_rate": 4.734654850746269e-05, + "loss": 0.0006, + "step": 22756 + }, + { + "epoch": 21.23, + "learning_rate": 4.734608208955224e-05, + "loss": 0.0004, + "step": 22760 + }, + { + "epoch": 21.24, + "learning_rate": 4.734561567164179e-05, + "loss": 0.0019, + "step": 22764 + }, + { + "epoch": 21.24, + "learning_rate": 4.7345149253731344e-05, + "loss": 0.0003, + "step": 22768 + }, + { + "epoch": 21.24, + "learning_rate": 4.73446828358209e-05, + "loss": 0.0009, + "step": 22772 + }, + { + "epoch": 21.25, + "learning_rate": 4.734421641791045e-05, + "loss": 0.0006, + "step": 22776 + }, + { + "epoch": 21.25, + "learning_rate": 4.734375e-05, + "loss": 0.0015, + "step": 22780 + }, + { + "epoch": 21.25, + "learning_rate": 4.734328358208955e-05, + "loss": 0.0011, + "step": 22784 + }, + { + "epoch": 21.26, + "learning_rate": 4.7342817164179105e-05, + "loss": 0.0038, + "step": 22788 + }, + { + "epoch": 21.26, + "learning_rate": 4.734235074626866e-05, + "loss": 0.0001, + "step": 22792 + }, + { + "epoch": 21.26, + "learning_rate": 4.734188432835821e-05, + "loss": 0.0001, + "step": 22796 + }, + { + "epoch": 21.27, + "learning_rate": 4.734141791044776e-05, + "loss": 0.0037, + "step": 22800 + }, + { + "epoch": 21.27, + "learning_rate": 4.734095149253732e-05, + "loss": 0.0003, + "step": 22804 + }, + { + "epoch": 21.28, + "learning_rate": 4.7340485074626866e-05, + "loss": 0.0006, + "step": 22808 + }, + { + "epoch": 21.28, + "learning_rate": 4.734001865671642e-05, + "loss": 0.0003, + "step": 22812 + }, + { + "epoch": 21.28, + "learning_rate": 4.7339552238805976e-05, + "loss": 0.0018, + "step": 22816 + }, + { + "epoch": 21.29, + "learning_rate": 4.7339085820895524e-05, + "loss": 0.0003, + "step": 22820 + }, + { + "epoch": 21.29, + "learning_rate": 4.733861940298508e-05, + "loss": 0.0003, + "step": 22824 + }, + { + "epoch": 21.29, + "learning_rate": 4.733815298507463e-05, + "loss": 0.004, + "step": 22828 + }, + { + "epoch": 21.3, + "learning_rate": 4.733768656716418e-05, + "loss": 0.0005, + "step": 22832 + }, + { + "epoch": 21.3, + "learning_rate": 4.733722014925374e-05, + "loss": 0.0001, + "step": 22836 + }, + { + "epoch": 21.31, + "learning_rate": 4.7336753731343285e-05, + "loss": 0.0006, + "step": 22840 + }, + { + "epoch": 21.31, + "learning_rate": 4.733628731343283e-05, + "loss": 0.0018, + "step": 22844 + }, + { + "epoch": 21.31, + "learning_rate": 4.7335820895522395e-05, + "loss": 0.0, + "step": 22848 + }, + { + "epoch": 21.32, + "learning_rate": 4.733535447761194e-05, + "loss": 0.0008, + "step": 22852 + }, + { + "epoch": 21.32, + "learning_rate": 4.733488805970149e-05, + "loss": 0.0045, + "step": 22856 + }, + { + "epoch": 21.32, + "learning_rate": 4.7334421641791046e-05, + "loss": 0.0006, + "step": 22860 + }, + { + "epoch": 21.33, + "learning_rate": 4.73339552238806e-05, + "loss": 0.0014, + "step": 22864 + }, + { + "epoch": 21.33, + "learning_rate": 4.733348880597015e-05, + "loss": 0.0005, + "step": 22868 + }, + { + "epoch": 21.34, + "learning_rate": 4.7333022388059704e-05, + "loss": 0.0001, + "step": 22872 + }, + { + "epoch": 21.34, + "learning_rate": 4.733255597014926e-05, + "loss": 0.0032, + "step": 22876 + }, + { + "epoch": 21.34, + "learning_rate": 4.733208955223881e-05, + "loss": 0.0015, + "step": 22880 + }, + { + "epoch": 21.35, + "learning_rate": 4.733162313432836e-05, + "loss": 0.0006, + "step": 22884 + }, + { + "epoch": 21.35, + "learning_rate": 4.733115671641791e-05, + "loss": 0.0001, + "step": 22888 + }, + { + "epoch": 21.35, + "learning_rate": 4.7330690298507465e-05, + "loss": 0.0023, + "step": 22892 + }, + { + "epoch": 21.36, + "learning_rate": 4.733022388059702e-05, + "loss": 0.0013, + "step": 22896 + }, + { + "epoch": 21.36, + "learning_rate": 4.732975746268657e-05, + "loss": 0.0022, + "step": 22900 + }, + { + "epoch": 21.37, + "learning_rate": 4.7329291044776116e-05, + "loss": 0.0071, + "step": 22904 + }, + { + "epoch": 21.37, + "learning_rate": 4.732882462686568e-05, + "loss": 0.0038, + "step": 22908 + }, + { + "epoch": 21.37, + "learning_rate": 4.7328358208955226e-05, + "loss": 0.0001, + "step": 22912 + }, + { + "epoch": 21.38, + "learning_rate": 4.7327891791044774e-05, + "loss": 0.0005, + "step": 22916 + }, + { + "epoch": 21.38, + "learning_rate": 4.732742537313433e-05, + "loss": 0.0001, + "step": 22920 + }, + { + "epoch": 21.38, + "learning_rate": 4.7326958955223884e-05, + "loss": 0.0085, + "step": 22924 + }, + { + "epoch": 21.39, + "learning_rate": 4.732649253731343e-05, + "loss": 0.0053, + "step": 22928 + }, + { + "epoch": 21.39, + "learning_rate": 4.732602611940299e-05, + "loss": 0.0007, + "step": 22932 + }, + { + "epoch": 21.4, + "learning_rate": 4.732555970149254e-05, + "loss": 0.0013, + "step": 22936 + }, + { + "epoch": 21.4, + "learning_rate": 4.732509328358209e-05, + "loss": 0.0002, + "step": 22940 + }, + { + "epoch": 21.4, + "learning_rate": 4.7324626865671645e-05, + "loss": 0.0027, + "step": 22944 + }, + { + "epoch": 21.41, + "learning_rate": 4.732416044776119e-05, + "loss": 0.0005, + "step": 22948 + }, + { + "epoch": 21.41, + "learning_rate": 4.732369402985075e-05, + "loss": 0.0004, + "step": 22952 + }, + { + "epoch": 21.41, + "learning_rate": 4.73232276119403e-05, + "loss": 0.0006, + "step": 22956 + }, + { + "epoch": 21.42, + "learning_rate": 4.732276119402985e-05, + "loss": 0.0047, + "step": 22960 + }, + { + "epoch": 21.42, + "learning_rate": 4.7322294776119406e-05, + "loss": 0.0008, + "step": 22964 + }, + { + "epoch": 21.43, + "learning_rate": 4.732182835820896e-05, + "loss": 0.0019, + "step": 22968 + }, + { + "epoch": 21.43, + "learning_rate": 4.732136194029851e-05, + "loss": 0.0008, + "step": 22972 + }, + { + "epoch": 21.43, + "learning_rate": 4.7320895522388064e-05, + "loss": 0.0001, + "step": 22976 + }, + { + "epoch": 21.44, + "learning_rate": 4.732042910447761e-05, + "loss": 0.0002, + "step": 22980 + }, + { + "epoch": 21.44, + "learning_rate": 4.731996268656717e-05, + "loss": 0.0008, + "step": 22984 + }, + { + "epoch": 21.44, + "learning_rate": 4.731949626865672e-05, + "loss": 0.0007, + "step": 22988 + }, + { + "epoch": 21.45, + "learning_rate": 4.731902985074627e-05, + "loss": 0.0002, + "step": 22992 + }, + { + "epoch": 21.45, + "learning_rate": 4.7318563432835825e-05, + "loss": 0.0017, + "step": 22996 + }, + { + "epoch": 21.46, + "learning_rate": 4.731809701492538e-05, + "loss": 0.0074, + "step": 23000 + }, + { + "epoch": 21.46, + "eval_exact_match": 0.7263056092843327, + "eval_exec": 0.7630560928433269, + "eval_loss": 0.3736368417739868, + "eval_runtime": 1044.2285, + "eval_samples_per_second": 0.99, + "step": 23000 + }, + { + "epoch": 21.46, + "learning_rate": 4.731763059701493e-05, + "loss": 0.0002, + "step": 23004 + }, + { + "epoch": 21.46, + "learning_rate": 4.7317164179104476e-05, + "loss": 0.0008, + "step": 23008 + }, + { + "epoch": 21.47, + "learning_rate": 4.731669776119403e-05, + "loss": 0.0008, + "step": 23012 + }, + { + "epoch": 21.47, + "learning_rate": 4.7316231343283586e-05, + "loss": 0.0004, + "step": 23016 + }, + { + "epoch": 21.47, + "learning_rate": 4.7315764925373134e-05, + "loss": 0.0002, + "step": 23020 + }, + { + "epoch": 21.48, + "learning_rate": 4.731529850746269e-05, + "loss": 0.0021, + "step": 23024 + }, + { + "epoch": 21.48, + "learning_rate": 4.7314832089552244e-05, + "loss": 0.0001, + "step": 23028 + }, + { + "epoch": 21.49, + "learning_rate": 4.731436567164179e-05, + "loss": 0.0001, + "step": 23032 + }, + { + "epoch": 21.49, + "learning_rate": 4.731389925373135e-05, + "loss": 0.0063, + "step": 23036 + }, + { + "epoch": 21.49, + "learning_rate": 4.7313432835820895e-05, + "loss": 0.0068, + "step": 23040 + }, + { + "epoch": 21.5, + "learning_rate": 4.731296641791045e-05, + "loss": 0.0021, + "step": 23044 + }, + { + "epoch": 21.5, + "learning_rate": 4.7312500000000005e-05, + "loss": 0.0038, + "step": 23048 + }, + { + "epoch": 21.5, + "learning_rate": 4.731203358208955e-05, + "loss": 0.0005, + "step": 23052 + }, + { + "epoch": 21.51, + "learning_rate": 4.731156716417911e-05, + "loss": 0.0001, + "step": 23056 + }, + { + "epoch": 21.51, + "learning_rate": 4.731110074626866e-05, + "loss": 0.0007, + "step": 23060 + }, + { + "epoch": 21.51, + "learning_rate": 4.731063432835821e-05, + "loss": 0.0004, + "step": 23064 + }, + { + "epoch": 21.52, + "learning_rate": 4.731016791044776e-05, + "loss": 0.0005, + "step": 23068 + }, + { + "epoch": 21.52, + "learning_rate": 4.7309701492537314e-05, + "loss": 0.0006, + "step": 23072 + }, + { + "epoch": 21.53, + "learning_rate": 4.730923507462687e-05, + "loss": 0.0016, + "step": 23076 + }, + { + "epoch": 21.53, + "learning_rate": 4.730876865671642e-05, + "loss": 0.0008, + "step": 23080 + }, + { + "epoch": 21.53, + "learning_rate": 4.730830223880597e-05, + "loss": 0.0009, + "step": 23084 + }, + { + "epoch": 21.54, + "learning_rate": 4.730783582089553e-05, + "loss": 0.0008, + "step": 23088 + }, + { + "epoch": 21.54, + "learning_rate": 4.7307369402985075e-05, + "loss": 0.0016, + "step": 23092 + }, + { + "epoch": 21.54, + "learning_rate": 4.730690298507463e-05, + "loss": 0.0001, + "step": 23096 + }, + { + "epoch": 21.55, + "learning_rate": 4.730643656716418e-05, + "loss": 0.0047, + "step": 23100 + }, + { + "epoch": 21.55, + "learning_rate": 4.730597014925373e-05, + "loss": 0.0004, + "step": 23104 + }, + { + "epoch": 21.56, + "learning_rate": 4.730550373134329e-05, + "loss": 0.0008, + "step": 23108 + }, + { + "epoch": 21.56, + "learning_rate": 4.7305037313432836e-05, + "loss": 0.0011, + "step": 23112 + }, + { + "epoch": 21.56, + "learning_rate": 4.730457089552239e-05, + "loss": 0.0015, + "step": 23116 + }, + { + "epoch": 21.57, + "learning_rate": 4.7304104477611946e-05, + "loss": 0.0034, + "step": 23120 + }, + { + "epoch": 21.57, + "learning_rate": 4.7303638059701494e-05, + "loss": 0.0004, + "step": 23124 + }, + { + "epoch": 21.57, + "learning_rate": 4.730317164179105e-05, + "loss": 0.0009, + "step": 23128 + }, + { + "epoch": 21.58, + "learning_rate": 4.73027052238806e-05, + "loss": 0.0052, + "step": 23132 + }, + { + "epoch": 21.58, + "learning_rate": 4.730223880597015e-05, + "loss": 0.0007, + "step": 23136 + }, + { + "epoch": 21.59, + "learning_rate": 4.730177238805971e-05, + "loss": 0.0061, + "step": 23140 + }, + { + "epoch": 21.59, + "learning_rate": 4.7301305970149255e-05, + "loss": 0.0036, + "step": 23144 + }, + { + "epoch": 21.59, + "learning_rate": 4.730083955223881e-05, + "loss": 0.0002, + "step": 23148 + }, + { + "epoch": 21.6, + "learning_rate": 4.7300373134328365e-05, + "loss": 0.0001, + "step": 23152 + }, + { + "epoch": 21.6, + "learning_rate": 4.729990671641791e-05, + "loss": 0.0001, + "step": 23156 + }, + { + "epoch": 21.6, + "learning_rate": 4.729944029850746e-05, + "loss": 0.0013, + "step": 23160 + }, + { + "epoch": 21.61, + "learning_rate": 4.729897388059702e-05, + "loss": 0.0001, + "step": 23164 + }, + { + "epoch": 21.61, + "learning_rate": 4.729850746268657e-05, + "loss": 0.003, + "step": 23168 + }, + { + "epoch": 21.62, + "learning_rate": 4.729804104477612e-05, + "loss": 0.0053, + "step": 23172 + }, + { + "epoch": 21.62, + "learning_rate": 4.7297574626865674e-05, + "loss": 0.0004, + "step": 23176 + }, + { + "epoch": 21.62, + "learning_rate": 4.729710820895523e-05, + "loss": 0.0028, + "step": 23180 + }, + { + "epoch": 21.63, + "learning_rate": 4.729664179104478e-05, + "loss": 0.0004, + "step": 23184 + }, + { + "epoch": 21.63, + "learning_rate": 4.729617537313433e-05, + "loss": 0.0004, + "step": 23188 + }, + { + "epoch": 21.63, + "learning_rate": 4.729570895522388e-05, + "loss": 0.0018, + "step": 23192 + }, + { + "epoch": 21.64, + "learning_rate": 4.7295242537313435e-05, + "loss": 0.0004, + "step": 23196 + }, + { + "epoch": 21.64, + "learning_rate": 4.729477611940299e-05, + "loss": 0.001, + "step": 23200 + }, + { + "epoch": 21.65, + "learning_rate": 4.729430970149254e-05, + "loss": 0.0012, + "step": 23204 + }, + { + "epoch": 21.65, + "learning_rate": 4.729384328358209e-05, + "loss": 0.001, + "step": 23208 + }, + { + "epoch": 21.65, + "learning_rate": 4.729337686567165e-05, + "loss": 0.005, + "step": 23212 + }, + { + "epoch": 21.66, + "learning_rate": 4.7292910447761196e-05, + "loss": 0.0017, + "step": 23216 + }, + { + "epoch": 21.66, + "learning_rate": 4.7292444029850744e-05, + "loss": 0.0002, + "step": 23220 + }, + { + "epoch": 21.66, + "learning_rate": 4.7291977611940306e-05, + "loss": 0.0004, + "step": 23224 + }, + { + "epoch": 21.67, + "learning_rate": 4.7291511194029854e-05, + "loss": 0.0004, + "step": 23228 + }, + { + "epoch": 21.67, + "learning_rate": 4.72910447761194e-05, + "loss": 0.001, + "step": 23232 + }, + { + "epoch": 21.68, + "learning_rate": 4.729057835820896e-05, + "loss": 0.0002, + "step": 23236 + }, + { + "epoch": 21.68, + "learning_rate": 4.729011194029851e-05, + "loss": 0.0063, + "step": 23240 + }, + { + "epoch": 21.68, + "learning_rate": 4.728964552238806e-05, + "loss": 0.0015, + "step": 23244 + }, + { + "epoch": 21.69, + "learning_rate": 4.7289179104477615e-05, + "loss": 0.0066, + "step": 23248 + }, + { + "epoch": 21.69, + "learning_rate": 4.728871268656716e-05, + "loss": 0.0004, + "step": 23252 + }, + { + "epoch": 21.69, + "learning_rate": 4.728824626865672e-05, + "loss": 0.0006, + "step": 23256 + }, + { + "epoch": 21.7, + "learning_rate": 4.728777985074627e-05, + "loss": 0.0007, + "step": 23260 + }, + { + "epoch": 21.7, + "learning_rate": 4.728731343283582e-05, + "loss": 0.0007, + "step": 23264 + }, + { + "epoch": 21.71, + "learning_rate": 4.7286847014925376e-05, + "loss": 0.0005, + "step": 23268 + }, + { + "epoch": 21.71, + "learning_rate": 4.728638059701493e-05, + "loss": 0.0004, + "step": 23272 + }, + { + "epoch": 21.71, + "learning_rate": 4.728591417910448e-05, + "loss": 0.0004, + "step": 23276 + }, + { + "epoch": 21.72, + "learning_rate": 4.728544776119403e-05, + "loss": 0.0002, + "step": 23280 + }, + { + "epoch": 21.72, + "learning_rate": 4.728498134328359e-05, + "loss": 0.0005, + "step": 23284 + }, + { + "epoch": 21.72, + "learning_rate": 4.728451492537314e-05, + "loss": 0.0003, + "step": 23288 + }, + { + "epoch": 21.73, + "learning_rate": 4.728404850746269e-05, + "loss": 0.0016, + "step": 23292 + }, + { + "epoch": 21.73, + "learning_rate": 4.728358208955224e-05, + "loss": 0.0005, + "step": 23296 + }, + { + "epoch": 21.73, + "learning_rate": 4.7283115671641795e-05, + "loss": 0.0026, + "step": 23300 + }, + { + "epoch": 21.74, + "learning_rate": 4.728264925373135e-05, + "loss": 0.0004, + "step": 23304 + }, + { + "epoch": 21.74, + "learning_rate": 4.72821828358209e-05, + "loss": 0.0041, + "step": 23308 + }, + { + "epoch": 21.75, + "learning_rate": 4.7281716417910446e-05, + "loss": 0.0022, + "step": 23312 + }, + { + "epoch": 21.75, + "learning_rate": 4.728125000000001e-05, + "loss": 0.0001, + "step": 23316 + }, + { + "epoch": 21.75, + "learning_rate": 4.7280783582089556e-05, + "loss": 0.0013, + "step": 23320 + }, + { + "epoch": 21.76, + "learning_rate": 4.7280317164179104e-05, + "loss": 0.0005, + "step": 23324 + }, + { + "epoch": 21.76, + "learning_rate": 4.727985074626866e-05, + "loss": 0.0039, + "step": 23328 + }, + { + "epoch": 21.76, + "learning_rate": 4.7279384328358214e-05, + "loss": 0.0002, + "step": 23332 + }, + { + "epoch": 21.77, + "learning_rate": 4.727891791044776e-05, + "loss": 0.0032, + "step": 23336 + }, + { + "epoch": 21.77, + "learning_rate": 4.7278451492537317e-05, + "loss": 0.003, + "step": 23340 + }, + { + "epoch": 21.78, + "learning_rate": 4.727798507462687e-05, + "loss": 0.0012, + "step": 23344 + }, + { + "epoch": 21.78, + "learning_rate": 4.727751865671642e-05, + "loss": 0.0001, + "step": 23348 + }, + { + "epoch": 21.78, + "learning_rate": 4.7277052238805975e-05, + "loss": 0.0053, + "step": 23352 + }, + { + "epoch": 21.79, + "learning_rate": 4.727658582089552e-05, + "loss": 0.0026, + "step": 23356 + }, + { + "epoch": 21.79, + "learning_rate": 4.727611940298508e-05, + "loss": 0.0003, + "step": 23360 + }, + { + "epoch": 21.79, + "learning_rate": 4.727565298507463e-05, + "loss": 0.0002, + "step": 23364 + }, + { + "epoch": 21.8, + "learning_rate": 4.727518656716418e-05, + "loss": 0.0061, + "step": 23368 + }, + { + "epoch": 21.8, + "learning_rate": 4.727472014925373e-05, + "loss": 0.0089, + "step": 23372 + }, + { + "epoch": 21.81, + "learning_rate": 4.727425373134329e-05, + "loss": 0.0052, + "step": 23376 + }, + { + "epoch": 21.81, + "learning_rate": 4.727378731343284e-05, + "loss": 0.0004, + "step": 23380 + }, + { + "epoch": 21.81, + "learning_rate": 4.727332089552239e-05, + "loss": 0.0003, + "step": 23384 + }, + { + "epoch": 21.82, + "learning_rate": 4.727285447761194e-05, + "loss": 0.0017, + "step": 23388 + }, + { + "epoch": 21.82, + "learning_rate": 4.7272388059701496e-05, + "loss": 0.0068, + "step": 23392 + }, + { + "epoch": 21.82, + "learning_rate": 4.7271921641791045e-05, + "loss": 0.0007, + "step": 23396 + }, + { + "epoch": 21.83, + "learning_rate": 4.72714552238806e-05, + "loss": 0.0001, + "step": 23400 + }, + { + "epoch": 21.83, + "learning_rate": 4.7270988805970154e-05, + "loss": 0.0002, + "step": 23404 + }, + { + "epoch": 21.84, + "learning_rate": 4.72705223880597e-05, + "loss": 0.0011, + "step": 23408 + }, + { + "epoch": 21.84, + "learning_rate": 4.727005597014926e-05, + "loss": 0.0083, + "step": 23412 + }, + { + "epoch": 21.84, + "learning_rate": 4.7269589552238806e-05, + "loss": 0.0012, + "step": 23416 + }, + { + "epoch": 21.85, + "learning_rate": 4.726912313432836e-05, + "loss": 0.0002, + "step": 23420 + }, + { + "epoch": 21.85, + "learning_rate": 4.7268656716417915e-05, + "loss": 0.0012, + "step": 23424 + }, + { + "epoch": 21.85, + "learning_rate": 4.7268190298507464e-05, + "loss": 0.0065, + "step": 23428 + }, + { + "epoch": 21.86, + "learning_rate": 4.726772388059701e-05, + "loss": 0.0005, + "step": 23432 + }, + { + "epoch": 21.86, + "learning_rate": 4.726725746268657e-05, + "loss": 0.0027, + "step": 23436 + }, + { + "epoch": 21.87, + "learning_rate": 4.726679104477612e-05, + "loss": 0.0001, + "step": 23440 + }, + { + "epoch": 21.87, + "learning_rate": 4.726632462686567e-05, + "loss": 0.0003, + "step": 23444 + }, + { + "epoch": 21.87, + "learning_rate": 4.7265858208955225e-05, + "loss": 0.0003, + "step": 23448 + }, + { + "epoch": 21.88, + "learning_rate": 4.726539179104478e-05, + "loss": 0.0001, + "step": 23452 + }, + { + "epoch": 21.88, + "learning_rate": 4.7264925373134334e-05, + "loss": 0.0002, + "step": 23456 + }, + { + "epoch": 21.88, + "learning_rate": 4.726445895522388e-05, + "loss": 0.0014, + "step": 23460 + }, + { + "epoch": 21.89, + "learning_rate": 4.726399253731344e-05, + "loss": 0.0018, + "step": 23464 + }, + { + "epoch": 21.89, + "learning_rate": 4.726352611940299e-05, + "loss": 0.0012, + "step": 23468 + }, + { + "epoch": 21.9, + "learning_rate": 4.726305970149254e-05, + "loss": 0.0036, + "step": 23472 + }, + { + "epoch": 21.9, + "learning_rate": 4.726259328358209e-05, + "loss": 0.0006, + "step": 23476 + }, + { + "epoch": 21.9, + "learning_rate": 4.7262126865671643e-05, + "loss": 0.0003, + "step": 23480 + }, + { + "epoch": 21.91, + "learning_rate": 4.72616604477612e-05, + "loss": 0.0003, + "step": 23484 + }, + { + "epoch": 21.91, + "learning_rate": 4.7261194029850747e-05, + "loss": 0.0009, + "step": 23488 + }, + { + "epoch": 21.91, + "learning_rate": 4.72607276119403e-05, + "loss": 0.0003, + "step": 23492 + }, + { + "epoch": 21.92, + "learning_rate": 4.7260261194029856e-05, + "loss": 0.0002, + "step": 23496 + }, + { + "epoch": 21.92, + "learning_rate": 4.7259794776119404e-05, + "loss": 0.0011, + "step": 23500 + }, + { + "epoch": 21.92, + "eval_exact_match": 0.7272727272727273, + "eval_exec": 0.7630560928433269, + "eval_loss": 0.36809346079826355, + "eval_runtime": 1056.1475, + "eval_samples_per_second": 0.979, + "step": 23500 + }, + { + "epoch": 21.93, + "learning_rate": 4.725932835820896e-05, + "loss": 0.0008, + "step": 23504 + }, + { + "epoch": 21.93, + "learning_rate": 4.725886194029851e-05, + "loss": 0.0004, + "step": 23508 + }, + { + "epoch": 21.93, + "learning_rate": 4.725839552238806e-05, + "loss": 0.0012, + "step": 23512 + }, + { + "epoch": 21.94, + "learning_rate": 4.725792910447762e-05, + "loss": 0.0006, + "step": 23516 + }, + { + "epoch": 21.94, + "learning_rate": 4.7257462686567165e-05, + "loss": 0.0019, + "step": 23520 + }, + { + "epoch": 21.94, + "learning_rate": 4.7256996268656714e-05, + "loss": 0.0013, + "step": 23524 + }, + { + "epoch": 21.95, + "learning_rate": 4.7256529850746275e-05, + "loss": 0.0033, + "step": 23528 + }, + { + "epoch": 21.95, + "learning_rate": 4.7256063432835823e-05, + "loss": 0.0003, + "step": 23532 + }, + { + "epoch": 21.96, + "learning_rate": 4.725559701492537e-05, + "loss": 0.0004, + "step": 23536 + }, + { + "epoch": 21.96, + "learning_rate": 4.7255130597014926e-05, + "loss": 0.001, + "step": 23540 + }, + { + "epoch": 21.96, + "learning_rate": 4.725466417910448e-05, + "loss": 0.0015, + "step": 23544 + }, + { + "epoch": 21.97, + "learning_rate": 4.725419776119403e-05, + "loss": 0.0011, + "step": 23548 + }, + { + "epoch": 21.97, + "learning_rate": 4.7253731343283584e-05, + "loss": 0.0004, + "step": 23552 + }, + { + "epoch": 21.97, + "learning_rate": 4.725326492537314e-05, + "loss": 0.0005, + "step": 23556 + }, + { + "epoch": 21.98, + "learning_rate": 4.725279850746269e-05, + "loss": 0.0012, + "step": 23560 + }, + { + "epoch": 21.98, + "learning_rate": 4.725233208955224e-05, + "loss": 0.003, + "step": 23564 + }, + { + "epoch": 21.98, + "learning_rate": 4.725186567164179e-05, + "loss": 0.0087, + "step": 23568 + }, + { + "epoch": 21.99, + "learning_rate": 4.7251399253731345e-05, + "loss": 0.0005, + "step": 23572 + }, + { + "epoch": 21.99, + "learning_rate": 4.72509328358209e-05, + "loss": 0.0026, + "step": 23576 + }, + { + "epoch": 22.0, + "learning_rate": 4.725046641791045e-05, + "loss": 0.0004, + "step": 23580 + }, + { + "epoch": 22.0, + "learning_rate": 4.7249999999999997e-05, + "loss": 0.0038, + "step": 23584 + }, + { + "epoch": 22.0, + "learning_rate": 4.724953358208956e-05, + "loss": 0.0006, + "step": 23588 + }, + { + "epoch": 22.01, + "learning_rate": 4.7249067164179106e-05, + "loss": 0.0012, + "step": 23592 + }, + { + "epoch": 22.01, + "learning_rate": 4.7248600746268654e-05, + "loss": 0.0009, + "step": 23596 + }, + { + "epoch": 22.01, + "learning_rate": 4.724813432835821e-05, + "loss": 0.0002, + "step": 23600 + }, + { + "epoch": 22.02, + "learning_rate": 4.7247667910447764e-05, + "loss": 0.0009, + "step": 23604 + }, + { + "epoch": 22.02, + "learning_rate": 4.724720149253731e-05, + "loss": 0.0002, + "step": 23608 + }, + { + "epoch": 22.03, + "learning_rate": 4.724673507462687e-05, + "loss": 0.0005, + "step": 23612 + }, + { + "epoch": 22.03, + "learning_rate": 4.724626865671642e-05, + "loss": 0.0022, + "step": 23616 + }, + { + "epoch": 22.03, + "learning_rate": 4.724580223880598e-05, + "loss": 0.0006, + "step": 23620 + }, + { + "epoch": 22.04, + "learning_rate": 4.7245335820895525e-05, + "loss": 0.0001, + "step": 23624 + }, + { + "epoch": 22.04, + "learning_rate": 4.7244869402985073e-05, + "loss": 0.0013, + "step": 23628 + }, + { + "epoch": 22.04, + "learning_rate": 4.724440298507463e-05, + "loss": 0.0006, + "step": 23632 + }, + { + "epoch": 22.05, + "learning_rate": 4.724393656716418e-05, + "loss": 0.0001, + "step": 23636 + }, + { + "epoch": 22.05, + "learning_rate": 4.724347014925373e-05, + "loss": 0.0021, + "step": 23640 + }, + { + "epoch": 22.06, + "learning_rate": 4.7243003731343286e-05, + "loss": 0.001, + "step": 23644 + }, + { + "epoch": 22.06, + "learning_rate": 4.724253731343284e-05, + "loss": 0.002, + "step": 23648 + }, + { + "epoch": 22.06, + "learning_rate": 4.724207089552239e-05, + "loss": 0.0002, + "step": 23652 + }, + { + "epoch": 22.07, + "learning_rate": 4.7241604477611944e-05, + "loss": 0.0001, + "step": 23656 + }, + { + "epoch": 22.07, + "learning_rate": 4.724113805970149e-05, + "loss": 0.0002, + "step": 23660 + }, + { + "epoch": 22.07, + "learning_rate": 4.724067164179105e-05, + "loss": 0.0004, + "step": 23664 + }, + { + "epoch": 22.08, + "learning_rate": 4.72402052238806e-05, + "loss": 0.0013, + "step": 23668 + }, + { + "epoch": 22.08, + "learning_rate": 4.723973880597015e-05, + "loss": 0.0005, + "step": 23672 + }, + { + "epoch": 22.09, + "learning_rate": 4.7239272388059705e-05, + "loss": 0.0027, + "step": 23676 + }, + { + "epoch": 22.09, + "learning_rate": 4.723880597014926e-05, + "loss": 0.0012, + "step": 23680 + }, + { + "epoch": 22.09, + "learning_rate": 4.723833955223881e-05, + "loss": 0.0015, + "step": 23684 + }, + { + "epoch": 22.1, + "learning_rate": 4.7237873134328356e-05, + "loss": 0.003, + "step": 23688 + }, + { + "epoch": 22.1, + "learning_rate": 4.723740671641791e-05, + "loss": 0.0004, + "step": 23692 + }, + { + "epoch": 22.1, + "learning_rate": 4.7236940298507466e-05, + "loss": 0.0002, + "step": 23696 + }, + { + "epoch": 22.11, + "learning_rate": 4.7236473880597014e-05, + "loss": 0.0003, + "step": 23700 + }, + { + "epoch": 22.11, + "learning_rate": 4.723600746268657e-05, + "loss": 0.0019, + "step": 23704 + }, + { + "epoch": 22.12, + "learning_rate": 4.7235541044776124e-05, + "loss": 0.0005, + "step": 23708 + }, + { + "epoch": 22.12, + "learning_rate": 4.723507462686567e-05, + "loss": 0.0004, + "step": 23712 + }, + { + "epoch": 22.12, + "learning_rate": 4.723460820895523e-05, + "loss": 0.0002, + "step": 23716 + }, + { + "epoch": 22.13, + "learning_rate": 4.7234141791044775e-05, + "loss": 0.0002, + "step": 23720 + }, + { + "epoch": 22.13, + "learning_rate": 4.723367537313433e-05, + "loss": 0.0002, + "step": 23724 + }, + { + "epoch": 22.13, + "learning_rate": 4.7233208955223885e-05, + "loss": 0.0082, + "step": 23728 + }, + { + "epoch": 22.14, + "learning_rate": 4.723274253731343e-05, + "loss": 0.0003, + "step": 23732 + }, + { + "epoch": 22.14, + "learning_rate": 4.723227611940299e-05, + "loss": 0.0001, + "step": 23736 + }, + { + "epoch": 22.15, + "learning_rate": 4.723180970149254e-05, + "loss": 0.0039, + "step": 23740 + }, + { + "epoch": 22.15, + "learning_rate": 4.723134328358209e-05, + "loss": 0.0001, + "step": 23744 + }, + { + "epoch": 22.15, + "learning_rate": 4.723087686567164e-05, + "loss": 0.0098, + "step": 23748 + }, + { + "epoch": 22.16, + "learning_rate": 4.7230410447761194e-05, + "loss": 0.0001, + "step": 23752 + }, + { + "epoch": 22.16, + "learning_rate": 4.722994402985075e-05, + "loss": 0.0003, + "step": 23756 + }, + { + "epoch": 22.16, + "learning_rate": 4.72294776119403e-05, + "loss": 0.001, + "step": 23760 + }, + { + "epoch": 22.17, + "learning_rate": 4.722901119402985e-05, + "loss": 0.0, + "step": 23764 + }, + { + "epoch": 22.17, + "learning_rate": 4.722854477611941e-05, + "loss": 0.0005, + "step": 23768 + }, + { + "epoch": 22.18, + "learning_rate": 4.7228078358208955e-05, + "loss": 0.0013, + "step": 23772 + }, + { + "epoch": 22.18, + "learning_rate": 4.722761194029851e-05, + "loss": 0.0003, + "step": 23776 + }, + { + "epoch": 22.18, + "learning_rate": 4.722714552238806e-05, + "loss": 0.0004, + "step": 23780 + }, + { + "epoch": 22.19, + "learning_rate": 4.722667910447762e-05, + "loss": 0.0005, + "step": 23784 + }, + { + "epoch": 22.19, + "learning_rate": 4.722621268656717e-05, + "loss": 0.0001, + "step": 23788 + }, + { + "epoch": 22.19, + "learning_rate": 4.7225746268656716e-05, + "loss": 0.0005, + "step": 23792 + }, + { + "epoch": 22.2, + "learning_rate": 4.722527985074627e-05, + "loss": 0.0002, + "step": 23796 + }, + { + "epoch": 22.2, + "learning_rate": 4.7224813432835826e-05, + "loss": 0.0021, + "step": 23800 + }, + { + "epoch": 22.21, + "learning_rate": 4.7224347014925374e-05, + "loss": 0.0011, + "step": 23804 + }, + { + "epoch": 22.21, + "learning_rate": 4.722388059701493e-05, + "loss": 0.001, + "step": 23808 + }, + { + "epoch": 22.21, + "learning_rate": 4.722341417910448e-05, + "loss": 0.0004, + "step": 23812 + }, + { + "epoch": 22.22, + "learning_rate": 4.722294776119403e-05, + "loss": 0.0082, + "step": 23816 + }, + { + "epoch": 22.22, + "learning_rate": 4.722248134328359e-05, + "loss": 0.0006, + "step": 23820 + }, + { + "epoch": 22.22, + "learning_rate": 4.7222014925373135e-05, + "loss": 0.0021, + "step": 23824 + }, + { + "epoch": 22.23, + "learning_rate": 4.722154850746269e-05, + "loss": 0.0002, + "step": 23828 + }, + { + "epoch": 22.23, + "learning_rate": 4.7221082089552245e-05, + "loss": 0.0003, + "step": 23832 + }, + { + "epoch": 22.24, + "learning_rate": 4.722061567164179e-05, + "loss": 0.0034, + "step": 23836 + }, + { + "epoch": 22.24, + "learning_rate": 4.722014925373134e-05, + "loss": 0.0007, + "step": 23840 + }, + { + "epoch": 22.24, + "learning_rate": 4.72196828358209e-05, + "loss": 0.0041, + "step": 23844 + }, + { + "epoch": 22.25, + "learning_rate": 4.721921641791045e-05, + "loss": 0.0001, + "step": 23848 + }, + { + "epoch": 22.25, + "learning_rate": 4.721875e-05, + "loss": 0.001, + "step": 23852 + }, + { + "epoch": 22.25, + "learning_rate": 4.7218283582089554e-05, + "loss": 0.0001, + "step": 23856 + }, + { + "epoch": 22.26, + "learning_rate": 4.721781716417911e-05, + "loss": 0.001, + "step": 23860 + }, + { + "epoch": 22.26, + "learning_rate": 4.721735074626866e-05, + "loss": 0.0025, + "step": 23864 + }, + { + "epoch": 22.26, + "learning_rate": 4.721688432835821e-05, + "loss": 0.0001, + "step": 23868 + }, + { + "epoch": 22.27, + "learning_rate": 4.721641791044776e-05, + "loss": 0.0023, + "step": 23872 + }, + { + "epoch": 22.27, + "learning_rate": 4.7215951492537315e-05, + "loss": 0.0054, + "step": 23876 + }, + { + "epoch": 22.28, + "learning_rate": 4.721548507462687e-05, + "loss": 0.0035, + "step": 23880 + }, + { + "epoch": 22.28, + "learning_rate": 4.721501865671642e-05, + "loss": 0.0015, + "step": 23884 + }, + { + "epoch": 22.28, + "learning_rate": 4.721455223880597e-05, + "loss": 0.0002, + "step": 23888 + }, + { + "epoch": 22.29, + "learning_rate": 4.721408582089553e-05, + "loss": 0.0007, + "step": 23892 + }, + { + "epoch": 22.29, + "learning_rate": 4.7213619402985076e-05, + "loss": 0.0007, + "step": 23896 + }, + { + "epoch": 22.29, + "learning_rate": 4.7213152985074624e-05, + "loss": 0.0122, + "step": 23900 + }, + { + "epoch": 22.3, + "learning_rate": 4.7212686567164186e-05, + "loss": 0.0012, + "step": 23904 + }, + { + "epoch": 22.3, + "learning_rate": 4.7212220149253734e-05, + "loss": 0.0023, + "step": 23908 + }, + { + "epoch": 22.31, + "learning_rate": 4.721175373134328e-05, + "loss": 0.0001, + "step": 23912 + }, + { + "epoch": 22.31, + "learning_rate": 4.721128731343284e-05, + "loss": 0.0003, + "step": 23916 + }, + { + "epoch": 22.31, + "learning_rate": 4.721082089552239e-05, + "loss": 0.0008, + "step": 23920 + }, + { + "epoch": 22.32, + "learning_rate": 4.721035447761194e-05, + "loss": 0.0004, + "step": 23924 + }, + { + "epoch": 22.32, + "learning_rate": 4.7209888059701495e-05, + "loss": 0.0005, + "step": 23928 + }, + { + "epoch": 22.32, + "learning_rate": 4.720942164179104e-05, + "loss": 0.0058, + "step": 23932 + }, + { + "epoch": 22.33, + "learning_rate": 4.72089552238806e-05, + "loss": 0.0006, + "step": 23936 + }, + { + "epoch": 22.33, + "learning_rate": 4.720848880597015e-05, + "loss": 0.0009, + "step": 23940 + }, + { + "epoch": 22.34, + "learning_rate": 4.72080223880597e-05, + "loss": 0.0004, + "step": 23944 + }, + { + "epoch": 22.34, + "learning_rate": 4.7207555970149256e-05, + "loss": 0.0005, + "step": 23948 + }, + { + "epoch": 22.34, + "learning_rate": 4.720708955223881e-05, + "loss": 0.0012, + "step": 23952 + }, + { + "epoch": 22.35, + "learning_rate": 4.720662313432836e-05, + "loss": 0.0026, + "step": 23956 + }, + { + "epoch": 22.35, + "learning_rate": 4.7206156716417914e-05, + "loss": 0.0011, + "step": 23960 + }, + { + "epoch": 22.35, + "learning_rate": 4.720569029850747e-05, + "loss": 0.0041, + "step": 23964 + }, + { + "epoch": 22.36, + "learning_rate": 4.720522388059702e-05, + "loss": 0.0008, + "step": 23968 + }, + { + "epoch": 22.36, + "learning_rate": 4.720475746268657e-05, + "loss": 0.0008, + "step": 23972 + }, + { + "epoch": 22.37, + "learning_rate": 4.720429104477612e-05, + "loss": 0.0001, + "step": 23976 + }, + { + "epoch": 22.37, + "learning_rate": 4.7203824626865675e-05, + "loss": 0.007, + "step": 23980 + }, + { + "epoch": 22.37, + "learning_rate": 4.720335820895523e-05, + "loss": 0.0003, + "step": 23984 + }, + { + "epoch": 22.38, + "learning_rate": 4.720289179104478e-05, + "loss": 0.0006, + "step": 23988 + }, + { + "epoch": 22.38, + "learning_rate": 4.7202425373134326e-05, + "loss": 0.0001, + "step": 23992 + }, + { + "epoch": 22.38, + "learning_rate": 4.720195895522389e-05, + "loss": 0.0005, + "step": 23996 + }, + { + "epoch": 22.39, + "learning_rate": 4.7201492537313436e-05, + "loss": 0.0002, + "step": 24000 + }, + { + "epoch": 22.39, + "eval_exact_match": 0.7214700193423598, + "eval_exec": 0.7601547388781431, + "eval_loss": 0.3911060094833374, + "eval_runtime": 1226.8058, + "eval_samples_per_second": 0.843, + "step": 24000 + }, + { + "epoch": 22.39, + "learning_rate": 4.7201026119402984e-05, + "loss": 0.0042, + "step": 24004 + }, + { + "epoch": 22.4, + "learning_rate": 4.720055970149254e-05, + "loss": 0.0012, + "step": 24008 + }, + { + "epoch": 22.4, + "learning_rate": 4.7200093283582094e-05, + "loss": 0.0001, + "step": 24012 + }, + { + "epoch": 22.4, + "learning_rate": 4.719962686567164e-05, + "loss": 0.0004, + "step": 24016 + }, + { + "epoch": 22.41, + "learning_rate": 4.71991604477612e-05, + "loss": 0.0045, + "step": 24020 + }, + { + "epoch": 22.41, + "learning_rate": 4.719869402985075e-05, + "loss": 0.0001, + "step": 24024 + }, + { + "epoch": 22.41, + "learning_rate": 4.71982276119403e-05, + "loss": 0.0062, + "step": 24028 + }, + { + "epoch": 22.42, + "learning_rate": 4.7197761194029855e-05, + "loss": 0.0037, + "step": 24032 + }, + { + "epoch": 22.42, + "learning_rate": 4.71972947761194e-05, + "loss": 0.0016, + "step": 24036 + }, + { + "epoch": 22.43, + "learning_rate": 4.719682835820896e-05, + "loss": 0.0004, + "step": 24040 + }, + { + "epoch": 22.43, + "learning_rate": 4.719636194029851e-05, + "loss": 0.001, + "step": 24044 + }, + { + "epoch": 22.43, + "learning_rate": 4.719589552238806e-05, + "loss": 0.0024, + "step": 24048 + }, + { + "epoch": 22.44, + "learning_rate": 4.719542910447761e-05, + "loss": 0.0011, + "step": 24052 + }, + { + "epoch": 22.44, + "learning_rate": 4.719496268656717e-05, + "loss": 0.0002, + "step": 24056 + }, + { + "epoch": 22.44, + "learning_rate": 4.719449626865672e-05, + "loss": 0.0004, + "step": 24060 + }, + { + "epoch": 22.45, + "learning_rate": 4.719402985074627e-05, + "loss": 0.0029, + "step": 24064 + }, + { + "epoch": 22.45, + "learning_rate": 4.719356343283582e-05, + "loss": 0.0002, + "step": 24068 + }, + { + "epoch": 22.46, + "learning_rate": 4.719309701492538e-05, + "loss": 0.0043, + "step": 24072 + }, + { + "epoch": 22.46, + "learning_rate": 4.7192630597014925e-05, + "loss": 0.0003, + "step": 24076 + }, + { + "epoch": 22.46, + "learning_rate": 4.719216417910448e-05, + "loss": 0.0002, + "step": 24080 + }, + { + "epoch": 22.47, + "learning_rate": 4.7191697761194035e-05, + "loss": 0.0014, + "step": 24084 + }, + { + "epoch": 22.47, + "learning_rate": 4.719123134328358e-05, + "loss": 0.0008, + "step": 24088 + }, + { + "epoch": 22.47, + "learning_rate": 4.719076492537314e-05, + "loss": 0.0002, + "step": 24092 + }, + { + "epoch": 22.48, + "learning_rate": 4.7190298507462686e-05, + "loss": 0.002, + "step": 24096 + }, + { + "epoch": 22.48, + "learning_rate": 4.718983208955224e-05, + "loss": 0.0002, + "step": 24100 + }, + { + "epoch": 22.49, + "learning_rate": 4.7189365671641796e-05, + "loss": 0.0012, + "step": 24104 + }, + { + "epoch": 22.49, + "learning_rate": 4.7188899253731344e-05, + "loss": 0.002, + "step": 24108 + }, + { + "epoch": 22.49, + "learning_rate": 4.71884328358209e-05, + "loss": 0.0002, + "step": 24112 + }, + { + "epoch": 22.5, + "learning_rate": 4.7187966417910454e-05, + "loss": 0.0003, + "step": 24116 + }, + { + "epoch": 22.5, + "learning_rate": 4.71875e-05, + "loss": 0.0018, + "step": 24120 + }, + { + "epoch": 22.5, + "learning_rate": 4.718703358208956e-05, + "loss": 0.0011, + "step": 24124 + }, + { + "epoch": 22.51, + "learning_rate": 4.7186567164179105e-05, + "loss": 0.0014, + "step": 24128 + }, + { + "epoch": 22.51, + "learning_rate": 4.718610074626866e-05, + "loss": 0.0001, + "step": 24132 + }, + { + "epoch": 22.51, + "learning_rate": 4.7185634328358215e-05, + "loss": 0.0001, + "step": 24136 + }, + { + "epoch": 22.52, + "learning_rate": 4.718516791044776e-05, + "loss": 0.0007, + "step": 24140 + }, + { + "epoch": 22.52, + "learning_rate": 4.718470149253732e-05, + "loss": 0.002, + "step": 24144 + }, + { + "epoch": 22.53, + "learning_rate": 4.718423507462687e-05, + "loss": 0.0022, + "step": 24148 + }, + { + "epoch": 22.53, + "learning_rate": 4.718376865671642e-05, + "loss": 0.0014, + "step": 24152 + }, + { + "epoch": 22.53, + "learning_rate": 4.718330223880597e-05, + "loss": 0.0003, + "step": 24156 + }, + { + "epoch": 22.54, + "learning_rate": 4.7182835820895524e-05, + "loss": 0.0009, + "step": 24160 + }, + { + "epoch": 22.54, + "learning_rate": 4.718236940298508e-05, + "loss": 0.0016, + "step": 24164 + }, + { + "epoch": 22.54, + "learning_rate": 4.718190298507463e-05, + "loss": 0.0, + "step": 24168 + }, + { + "epoch": 22.55, + "learning_rate": 4.718143656716418e-05, + "loss": 0.0005, + "step": 24172 + }, + { + "epoch": 22.55, + "learning_rate": 4.718097014925374e-05, + "loss": 0.0004, + "step": 24176 + }, + { + "epoch": 22.56, + "learning_rate": 4.7180503731343285e-05, + "loss": 0.0014, + "step": 24180 + }, + { + "epoch": 22.56, + "learning_rate": 4.718003731343284e-05, + "loss": 0.0001, + "step": 24184 + }, + { + "epoch": 22.56, + "learning_rate": 4.717957089552239e-05, + "loss": 0.001, + "step": 24188 + }, + { + "epoch": 22.57, + "learning_rate": 4.717910447761194e-05, + "loss": 0.0002, + "step": 24192 + }, + { + "epoch": 22.57, + "learning_rate": 4.71786380597015e-05, + "loss": 0.0005, + "step": 24196 + }, + { + "epoch": 22.57, + "learning_rate": 4.7178171641791046e-05, + "loss": 0.0005, + "step": 24200 + }, + { + "epoch": 22.58, + "learning_rate": 4.7177705223880594e-05, + "loss": 0.0023, + "step": 24204 + }, + { + "epoch": 22.58, + "learning_rate": 4.7177238805970156e-05, + "loss": 0.0051, + "step": 24208 + }, + { + "epoch": 22.59, + "learning_rate": 4.7176772388059704e-05, + "loss": 0.0009, + "step": 24212 + }, + { + "epoch": 22.59, + "learning_rate": 4.717630597014925e-05, + "loss": 0.0091, + "step": 24216 + }, + { + "epoch": 22.59, + "learning_rate": 4.717583955223881e-05, + "loss": 0.0002, + "step": 24220 + }, + { + "epoch": 22.6, + "learning_rate": 4.717537313432836e-05, + "loss": 0.0014, + "step": 24224 + }, + { + "epoch": 22.6, + "learning_rate": 4.717490671641791e-05, + "loss": 0.0001, + "step": 24228 + }, + { + "epoch": 22.6, + "learning_rate": 4.7174440298507465e-05, + "loss": 0.0034, + "step": 24232 + }, + { + "epoch": 22.61, + "learning_rate": 4.717397388059702e-05, + "loss": 0.0047, + "step": 24236 + }, + { + "epoch": 22.61, + "learning_rate": 4.717350746268657e-05, + "loss": 0.0017, + "step": 24240 + }, + { + "epoch": 22.62, + "learning_rate": 4.717304104477612e-05, + "loss": 0.0013, + "step": 24244 + }, + { + "epoch": 22.62, + "learning_rate": 4.717257462686567e-05, + "loss": 0.0008, + "step": 24248 + }, + { + "epoch": 22.62, + "learning_rate": 4.7172108208955226e-05, + "loss": 0.0029, + "step": 24252 + }, + { + "epoch": 22.63, + "learning_rate": 4.717164179104478e-05, + "loss": 0.0023, + "step": 24256 + }, + { + "epoch": 22.63, + "learning_rate": 4.717117537313433e-05, + "loss": 0.0011, + "step": 24260 + }, + { + "epoch": 22.63, + "learning_rate": 4.717070895522388e-05, + "loss": 0.0121, + "step": 24264 + }, + { + "epoch": 22.64, + "learning_rate": 4.717024253731344e-05, + "loss": 0.0003, + "step": 24268 + }, + { + "epoch": 22.64, + "learning_rate": 4.716977611940299e-05, + "loss": 0.0011, + "step": 24272 + }, + { + "epoch": 22.65, + "learning_rate": 4.716930970149254e-05, + "loss": 0.0001, + "step": 24276 + }, + { + "epoch": 22.65, + "learning_rate": 4.716884328358209e-05, + "loss": 0.0002, + "step": 24280 + }, + { + "epoch": 22.65, + "learning_rate": 4.7168376865671645e-05, + "loss": 0.0005, + "step": 24284 + }, + { + "epoch": 22.66, + "learning_rate": 4.71679104477612e-05, + "loss": 0.0006, + "step": 24288 + }, + { + "epoch": 22.66, + "learning_rate": 4.716744402985075e-05, + "loss": 0.0003, + "step": 24292 + }, + { + "epoch": 22.66, + "learning_rate": 4.71669776119403e-05, + "loss": 0.0103, + "step": 24296 + }, + { + "epoch": 22.67, + "learning_rate": 4.716651119402986e-05, + "loss": 0.0001, + "step": 24300 + }, + { + "epoch": 22.67, + "learning_rate": 4.7166044776119406e-05, + "loss": 0.0017, + "step": 24304 + }, + { + "epoch": 22.68, + "learning_rate": 4.7165578358208954e-05, + "loss": 0.0011, + "step": 24308 + }, + { + "epoch": 22.68, + "learning_rate": 4.716511194029851e-05, + "loss": 0.0001, + "step": 24312 + }, + { + "epoch": 22.68, + "learning_rate": 4.7164645522388064e-05, + "loss": 0.0017, + "step": 24316 + }, + { + "epoch": 22.69, + "learning_rate": 4.716417910447761e-05, + "loss": 0.0002, + "step": 24320 + }, + { + "epoch": 22.69, + "learning_rate": 4.7163712686567167e-05, + "loss": 0.0004, + "step": 24324 + }, + { + "epoch": 22.69, + "learning_rate": 4.716324626865672e-05, + "loss": 0.0014, + "step": 24328 + }, + { + "epoch": 22.7, + "learning_rate": 4.716277985074627e-05, + "loss": 0.0087, + "step": 24332 + }, + { + "epoch": 22.7, + "learning_rate": 4.7162313432835825e-05, + "loss": 0.0003, + "step": 24336 + }, + { + "epoch": 22.71, + "learning_rate": 4.716184701492537e-05, + "loss": 0.0002, + "step": 24340 + }, + { + "epoch": 22.71, + "learning_rate": 4.716138059701493e-05, + "loss": 0.0077, + "step": 24344 + }, + { + "epoch": 22.71, + "learning_rate": 4.716091417910448e-05, + "loss": 0.0058, + "step": 24348 + }, + { + "epoch": 22.72, + "learning_rate": 4.716044776119403e-05, + "loss": 0.0052, + "step": 24352 + }, + { + "epoch": 22.72, + "learning_rate": 4.7159981343283585e-05, + "loss": 0.0028, + "step": 24356 + }, + { + "epoch": 22.72, + "learning_rate": 4.715951492537314e-05, + "loss": 0.0002, + "step": 24360 + }, + { + "epoch": 22.73, + "learning_rate": 4.715904850746269e-05, + "loss": 0.0023, + "step": 24364 + }, + { + "epoch": 22.73, + "learning_rate": 4.715858208955224e-05, + "loss": 0.0002, + "step": 24368 + }, + { + "epoch": 22.73, + "learning_rate": 4.715811567164179e-05, + "loss": 0.0004, + "step": 24372 + }, + { + "epoch": 22.74, + "learning_rate": 4.7157649253731346e-05, + "loss": 0.0006, + "step": 24376 + }, + { + "epoch": 22.74, + "learning_rate": 4.7157182835820895e-05, + "loss": 0.0004, + "step": 24380 + }, + { + "epoch": 22.75, + "learning_rate": 4.715671641791045e-05, + "loss": 0.0004, + "step": 24384 + }, + { + "epoch": 22.75, + "learning_rate": 4.7156250000000004e-05, + "loss": 0.0012, + "step": 24388 + }, + { + "epoch": 22.75, + "learning_rate": 4.715578358208955e-05, + "loss": 0.0012, + "step": 24392 + }, + { + "epoch": 22.76, + "learning_rate": 4.715531716417911e-05, + "loss": 0.0009, + "step": 24396 + }, + { + "epoch": 22.76, + "learning_rate": 4.7154850746268656e-05, + "loss": 0.0009, + "step": 24400 + }, + { + "epoch": 22.76, + "learning_rate": 4.715438432835821e-05, + "loss": 0.0004, + "step": 24404 + }, + { + "epoch": 22.77, + "learning_rate": 4.7153917910447765e-05, + "loss": 0.0004, + "step": 24408 + }, + { + "epoch": 22.77, + "learning_rate": 4.7153451492537314e-05, + "loss": 0.001, + "step": 24412 + }, + { + "epoch": 22.78, + "learning_rate": 4.715298507462687e-05, + "loss": 0.0005, + "step": 24416 + }, + { + "epoch": 22.78, + "learning_rate": 4.715251865671642e-05, + "loss": 0.0003, + "step": 24420 + }, + { + "epoch": 22.78, + "learning_rate": 4.715205223880597e-05, + "loss": 0.0019, + "step": 24424 + }, + { + "epoch": 22.79, + "learning_rate": 4.715158582089552e-05, + "loss": 0.0008, + "step": 24428 + }, + { + "epoch": 22.79, + "learning_rate": 4.7151119402985075e-05, + "loss": 0.0007, + "step": 24432 + }, + { + "epoch": 22.79, + "learning_rate": 4.715065298507463e-05, + "loss": 0.0027, + "step": 24436 + }, + { + "epoch": 22.8, + "learning_rate": 4.7150186567164184e-05, + "loss": 0.0009, + "step": 24440 + }, + { + "epoch": 22.8, + "learning_rate": 4.714972014925373e-05, + "loss": 0.0034, + "step": 24444 + }, + { + "epoch": 22.81, + "learning_rate": 4.714925373134329e-05, + "loss": 0.0026, + "step": 24448 + }, + { + "epoch": 22.81, + "learning_rate": 4.714878731343284e-05, + "loss": 0.001, + "step": 24452 + }, + { + "epoch": 22.81, + "learning_rate": 4.714832089552239e-05, + "loss": 0.0003, + "step": 24456 + }, + { + "epoch": 22.82, + "learning_rate": 4.714785447761194e-05, + "loss": 0.0001, + "step": 24460 + }, + { + "epoch": 22.82, + "learning_rate": 4.71473880597015e-05, + "loss": 0.0009, + "step": 24464 + }, + { + "epoch": 22.82, + "learning_rate": 4.714692164179105e-05, + "loss": 0.0019, + "step": 24468 + }, + { + "epoch": 22.83, + "learning_rate": 4.7146455223880596e-05, + "loss": 0.0003, + "step": 24472 + }, + { + "epoch": 22.83, + "learning_rate": 4.714598880597015e-05, + "loss": 0.0001, + "step": 24476 + }, + { + "epoch": 22.84, + "learning_rate": 4.7145522388059706e-05, + "loss": 0.0003, + "step": 24480 + }, + { + "epoch": 22.84, + "learning_rate": 4.7145055970149254e-05, + "loss": 0.0013, + "step": 24484 + }, + { + "epoch": 22.84, + "learning_rate": 4.714458955223881e-05, + "loss": 0.0095, + "step": 24488 + }, + { + "epoch": 22.85, + "learning_rate": 4.714412313432836e-05, + "loss": 0.0002, + "step": 24492 + }, + { + "epoch": 22.85, + "learning_rate": 4.714365671641791e-05, + "loss": 0.0009, + "step": 24496 + }, + { + "epoch": 22.85, + "learning_rate": 4.714319029850747e-05, + "loss": 0.0002, + "step": 24500 + }, + { + "epoch": 22.85, + "eval_exact_match": 0.7292069632495164, + "eval_exec": 0.7572533849129593, + "eval_loss": 0.35489827394485474, + "eval_runtime": 1237.9317, + "eval_samples_per_second": 0.835, + "step": 24500 + }, + { + "epoch": 22.86, + "learning_rate": 4.7142723880597015e-05, + "loss": 0.001, + "step": 24504 + }, + { + "epoch": 22.86, + "learning_rate": 4.714225746268657e-05, + "loss": 0.0014, + "step": 24508 + }, + { + "epoch": 22.87, + "learning_rate": 4.7141791044776125e-05, + "loss": 0.0068, + "step": 24512 + }, + { + "epoch": 22.87, + "learning_rate": 4.714132462686567e-05, + "loss": 0.0008, + "step": 24516 + }, + { + "epoch": 22.87, + "learning_rate": 4.714085820895522e-05, + "loss": 0.0002, + "step": 24520 + }, + { + "epoch": 22.88, + "learning_rate": 4.714039179104478e-05, + "loss": 0.0031, + "step": 24524 + }, + { + "epoch": 22.88, + "learning_rate": 4.713992537313433e-05, + "loss": 0.0003, + "step": 24528 + }, + { + "epoch": 22.88, + "learning_rate": 4.713945895522388e-05, + "loss": 0.0001, + "step": 24532 + }, + { + "epoch": 22.89, + "learning_rate": 4.7138992537313434e-05, + "loss": 0.0041, + "step": 24536 + }, + { + "epoch": 22.89, + "learning_rate": 4.713852611940299e-05, + "loss": 0.0003, + "step": 24540 + }, + { + "epoch": 22.9, + "learning_rate": 4.713805970149254e-05, + "loss": 0.0001, + "step": 24544 + }, + { + "epoch": 22.9, + "learning_rate": 4.713759328358209e-05, + "loss": 0.0058, + "step": 24548 + }, + { + "epoch": 22.9, + "learning_rate": 4.713712686567164e-05, + "loss": 0.0006, + "step": 24552 + }, + { + "epoch": 22.91, + "learning_rate": 4.7136660447761195e-05, + "loss": 0.0001, + "step": 24556 + }, + { + "epoch": 22.91, + "learning_rate": 4.713619402985075e-05, + "loss": 0.0011, + "step": 24560 + }, + { + "epoch": 22.91, + "learning_rate": 4.71357276119403e-05, + "loss": 0.0019, + "step": 24564 + }, + { + "epoch": 22.92, + "learning_rate": 4.713526119402985e-05, + "loss": 0.0006, + "step": 24568 + }, + { + "epoch": 22.92, + "learning_rate": 4.713479477611941e-05, + "loss": 0.0015, + "step": 24572 + }, + { + "epoch": 22.93, + "learning_rate": 4.7134328358208956e-05, + "loss": 0.0004, + "step": 24576 + }, + { + "epoch": 22.93, + "learning_rate": 4.7133861940298504e-05, + "loss": 0.0049, + "step": 24580 + }, + { + "epoch": 22.93, + "learning_rate": 4.7133395522388066e-05, + "loss": 0.0001, + "step": 24584 + }, + { + "epoch": 22.94, + "learning_rate": 4.7132929104477614e-05, + "loss": 0.0026, + "step": 24588 + }, + { + "epoch": 22.94, + "learning_rate": 4.713246268656716e-05, + "loss": 0.0007, + "step": 24592 + }, + { + "epoch": 22.94, + "learning_rate": 4.713199626865672e-05, + "loss": 0.0002, + "step": 24596 + }, + { + "epoch": 22.95, + "learning_rate": 4.713152985074627e-05, + "loss": 0.0001, + "step": 24600 + }, + { + "epoch": 22.95, + "learning_rate": 4.713106343283583e-05, + "loss": 0.0002, + "step": 24604 + }, + { + "epoch": 22.96, + "learning_rate": 4.7130597014925375e-05, + "loss": 0.0204, + "step": 24608 + }, + { + "epoch": 22.96, + "learning_rate": 4.7130130597014923e-05, + "loss": 0.0018, + "step": 24612 + }, + { + "epoch": 22.96, + "learning_rate": 4.7129664179104485e-05, + "loss": 0.0037, + "step": 24616 + }, + { + "epoch": 22.97, + "learning_rate": 4.712919776119403e-05, + "loss": 0.0002, + "step": 24620 + }, + { + "epoch": 22.97, + "learning_rate": 4.712873134328358e-05, + "loss": 0.0006, + "step": 24624 + }, + { + "epoch": 22.97, + "learning_rate": 4.7128264925373136e-05, + "loss": 0.0034, + "step": 24628 + }, + { + "epoch": 22.98, + "learning_rate": 4.712779850746269e-05, + "loss": 0.0018, + "step": 24632 + }, + { + "epoch": 22.98, + "learning_rate": 4.712733208955224e-05, + "loss": 0.0031, + "step": 24636 + }, + { + "epoch": 22.98, + "learning_rate": 4.7126865671641794e-05, + "loss": 0.0002, + "step": 24640 + }, + { + "epoch": 22.99, + "learning_rate": 4.712639925373135e-05, + "loss": 0.002, + "step": 24644 + }, + { + "epoch": 22.99, + "learning_rate": 4.71259328358209e-05, + "loss": 0.0027, + "step": 24648 + }, + { + "epoch": 23.0, + "learning_rate": 4.712546641791045e-05, + "loss": 0.0018, + "step": 24652 + }, + { + "epoch": 23.0, + "learning_rate": 4.7125e-05, + "loss": 0.0015, + "step": 24656 + }, + { + "epoch": 23.0, + "learning_rate": 4.7124533582089555e-05, + "loss": 0.0004, + "step": 24660 + }, + { + "epoch": 23.01, + "learning_rate": 4.712406716417911e-05, + "loss": 0.0035, + "step": 24664 + }, + { + "epoch": 23.01, + "learning_rate": 4.712360074626866e-05, + "loss": 0.0002, + "step": 24668 + }, + { + "epoch": 23.01, + "learning_rate": 4.7123134328358206e-05, + "loss": 0.0024, + "step": 24672 + }, + { + "epoch": 23.02, + "learning_rate": 4.712266791044777e-05, + "loss": 0.0001, + "step": 24676 + }, + { + "epoch": 23.02, + "learning_rate": 4.7122201492537316e-05, + "loss": 0.0006, + "step": 24680 + }, + { + "epoch": 23.03, + "learning_rate": 4.7121735074626864e-05, + "loss": 0.0048, + "step": 24684 + }, + { + "epoch": 23.03, + "learning_rate": 4.712126865671642e-05, + "loss": 0.0005, + "step": 24688 + }, + { + "epoch": 23.03, + "learning_rate": 4.7120802238805974e-05, + "loss": 0.0001, + "step": 24692 + }, + { + "epoch": 23.04, + "learning_rate": 4.712033582089552e-05, + "loss": 0.0008, + "step": 24696 + }, + { + "epoch": 23.04, + "learning_rate": 4.711986940298508e-05, + "loss": 0.0006, + "step": 24700 + }, + { + "epoch": 23.04, + "learning_rate": 4.711940298507463e-05, + "loss": 0.0002, + "step": 24704 + }, + { + "epoch": 23.05, + "learning_rate": 4.711893656716418e-05, + "loss": 0.0008, + "step": 24708 + }, + { + "epoch": 23.05, + "learning_rate": 4.7118470149253735e-05, + "loss": 0.0002, + "step": 24712 + }, + { + "epoch": 23.06, + "learning_rate": 4.711800373134328e-05, + "loss": 0.0002, + "step": 24716 + }, + { + "epoch": 23.06, + "learning_rate": 4.711753731343284e-05, + "loss": 0.0002, + "step": 24720 + }, + { + "epoch": 23.06, + "learning_rate": 4.711707089552239e-05, + "loss": 0.0005, + "step": 24724 + }, + { + "epoch": 23.07, + "learning_rate": 4.711660447761194e-05, + "loss": 0.0009, + "step": 24728 + }, + { + "epoch": 23.07, + "learning_rate": 4.711613805970149e-05, + "loss": 0.0001, + "step": 24732 + }, + { + "epoch": 23.07, + "learning_rate": 4.711567164179105e-05, + "loss": 0.0002, + "step": 24736 + }, + { + "epoch": 23.08, + "learning_rate": 4.71152052238806e-05, + "loss": 0.0012, + "step": 24740 + }, + { + "epoch": 23.08, + "learning_rate": 4.711473880597015e-05, + "loss": 0.0002, + "step": 24744 + }, + { + "epoch": 23.09, + "learning_rate": 4.71142723880597e-05, + "loss": 0.0017, + "step": 24748 + }, + { + "epoch": 23.09, + "learning_rate": 4.711380597014926e-05, + "loss": 0.0016, + "step": 24752 + }, + { + "epoch": 23.09, + "learning_rate": 4.7113339552238805e-05, + "loss": 0.0005, + "step": 24756 + }, + { + "epoch": 23.1, + "learning_rate": 4.711287313432836e-05, + "loss": 0.0002, + "step": 24760 + }, + { + "epoch": 23.1, + "learning_rate": 4.7112406716417915e-05, + "loss": 0.0002, + "step": 24764 + }, + { + "epoch": 23.1, + "learning_rate": 4.711194029850747e-05, + "loss": 0.0092, + "step": 24768 + }, + { + "epoch": 23.11, + "learning_rate": 4.711147388059702e-05, + "loss": 0.0022, + "step": 24772 + }, + { + "epoch": 23.11, + "learning_rate": 4.7111007462686566e-05, + "loss": 0.0004, + "step": 24776 + }, + { + "epoch": 23.12, + "learning_rate": 4.711054104477612e-05, + "loss": 0.0006, + "step": 24780 + }, + { + "epoch": 23.12, + "learning_rate": 4.7110074626865676e-05, + "loss": 0.0004, + "step": 24784 + }, + { + "epoch": 23.12, + "learning_rate": 4.7109608208955224e-05, + "loss": 0.001, + "step": 24788 + }, + { + "epoch": 23.13, + "learning_rate": 4.710914179104478e-05, + "loss": 0.0002, + "step": 24792 + }, + { + "epoch": 23.13, + "learning_rate": 4.7108675373134334e-05, + "loss": 0.0017, + "step": 24796 + }, + { + "epoch": 23.13, + "learning_rate": 4.710820895522388e-05, + "loss": 0.0001, + "step": 24800 + }, + { + "epoch": 23.14, + "learning_rate": 4.710774253731344e-05, + "loss": 0.0017, + "step": 24804 + }, + { + "epoch": 23.14, + "learning_rate": 4.7107276119402985e-05, + "loss": 0.0002, + "step": 24808 + }, + { + "epoch": 23.15, + "learning_rate": 4.710680970149254e-05, + "loss": 0.0002, + "step": 24812 + }, + { + "epoch": 23.15, + "learning_rate": 4.7106343283582095e-05, + "loss": 0.0, + "step": 24816 + }, + { + "epoch": 23.15, + "learning_rate": 4.710587686567164e-05, + "loss": 0.0, + "step": 24820 + }, + { + "epoch": 23.16, + "learning_rate": 4.71054104477612e-05, + "loss": 0.0001, + "step": 24824 + }, + { + "epoch": 23.16, + "learning_rate": 4.710494402985075e-05, + "loss": 0.0001, + "step": 24828 + }, + { + "epoch": 23.16, + "learning_rate": 4.71044776119403e-05, + "loss": 0.0011, + "step": 24832 + }, + { + "epoch": 23.17, + "learning_rate": 4.710401119402985e-05, + "loss": 0.0004, + "step": 24836 + }, + { + "epoch": 23.17, + "learning_rate": 4.7103544776119404e-05, + "loss": 0.0014, + "step": 24840 + }, + { + "epoch": 23.18, + "learning_rate": 4.710307835820896e-05, + "loss": 0.0002, + "step": 24844 + }, + { + "epoch": 23.18, + "learning_rate": 4.710261194029851e-05, + "loss": 0.001, + "step": 24848 + }, + { + "epoch": 23.18, + "learning_rate": 4.710214552238806e-05, + "loss": 0.0003, + "step": 24852 + }, + { + "epoch": 23.19, + "learning_rate": 4.710167910447762e-05, + "loss": 0.0003, + "step": 24856 + }, + { + "epoch": 23.19, + "learning_rate": 4.7101212686567165e-05, + "loss": 0.0049, + "step": 24860 + }, + { + "epoch": 23.19, + "learning_rate": 4.710074626865672e-05, + "loss": 0.0021, + "step": 24864 + }, + { + "epoch": 23.2, + "learning_rate": 4.710027985074627e-05, + "loss": 0.0002, + "step": 24868 + }, + { + "epoch": 23.2, + "learning_rate": 4.709981343283582e-05, + "loss": 0.0063, + "step": 24872 + }, + { + "epoch": 23.21, + "learning_rate": 4.709934701492538e-05, + "loss": 0.0003, + "step": 24876 + }, + { + "epoch": 23.21, + "learning_rate": 4.7098880597014926e-05, + "loss": 0.0009, + "step": 24880 + }, + { + "epoch": 23.21, + "learning_rate": 4.7098414179104474e-05, + "loss": 0.0044, + "step": 24884 + }, + { + "epoch": 23.22, + "learning_rate": 4.7097947761194036e-05, + "loss": 0.0003, + "step": 24888 + }, + { + "epoch": 23.22, + "learning_rate": 4.7097481343283584e-05, + "loss": 0.0002, + "step": 24892 + }, + { + "epoch": 23.22, + "learning_rate": 4.709701492537313e-05, + "loss": 0.0008, + "step": 24896 + }, + { + "epoch": 23.23, + "learning_rate": 4.709654850746269e-05, + "loss": 0.0021, + "step": 24900 + }, + { + "epoch": 23.23, + "learning_rate": 4.709608208955224e-05, + "loss": 0.0001, + "step": 24904 + }, + { + "epoch": 23.24, + "learning_rate": 4.709561567164179e-05, + "loss": 0.0002, + "step": 24908 + }, + { + "epoch": 23.24, + "learning_rate": 4.7095149253731345e-05, + "loss": 0.0008, + "step": 24912 + }, + { + "epoch": 23.24, + "learning_rate": 4.70946828358209e-05, + "loss": 0.0011, + "step": 24916 + }, + { + "epoch": 23.25, + "learning_rate": 4.709421641791045e-05, + "loss": 0.0003, + "step": 24920 + }, + { + "epoch": 23.25, + "learning_rate": 4.709375e-05, + "loss": 0.0007, + "step": 24924 + }, + { + "epoch": 23.25, + "learning_rate": 4.709328358208955e-05, + "loss": 0.0031, + "step": 24928 + }, + { + "epoch": 23.26, + "learning_rate": 4.709281716417911e-05, + "loss": 0.008, + "step": 24932 + }, + { + "epoch": 23.26, + "learning_rate": 4.709235074626866e-05, + "loss": 0.0002, + "step": 24936 + }, + { + "epoch": 23.26, + "learning_rate": 4.709188432835821e-05, + "loss": 0.0032, + "step": 24940 + }, + { + "epoch": 23.27, + "learning_rate": 4.7091417910447764e-05, + "loss": 0.0001, + "step": 24944 + }, + { + "epoch": 23.27, + "learning_rate": 4.709095149253732e-05, + "loss": 0.0024, + "step": 24948 + }, + { + "epoch": 23.28, + "learning_rate": 4.709048507462687e-05, + "loss": 0.0003, + "step": 24952 + }, + { + "epoch": 23.28, + "learning_rate": 4.709001865671642e-05, + "loss": 0.0001, + "step": 24956 + }, + { + "epoch": 23.28, + "learning_rate": 4.708955223880597e-05, + "loss": 0.0003, + "step": 24960 + }, + { + "epoch": 23.29, + "learning_rate": 4.7089085820895525e-05, + "loss": 0.0003, + "step": 24964 + }, + { + "epoch": 23.29, + "learning_rate": 4.708861940298508e-05, + "loss": 0.0076, + "step": 24968 + }, + { + "epoch": 23.29, + "learning_rate": 4.708815298507463e-05, + "loss": 0.0044, + "step": 24972 + }, + { + "epoch": 23.3, + "learning_rate": 4.708768656716418e-05, + "loss": 0.0026, + "step": 24976 + }, + { + "epoch": 23.3, + "learning_rate": 4.708722014925374e-05, + "loss": 0.0001, + "step": 24980 + }, + { + "epoch": 23.31, + "learning_rate": 4.7086753731343286e-05, + "loss": 0.0015, + "step": 24984 + }, + { + "epoch": 23.31, + "learning_rate": 4.7086287313432834e-05, + "loss": 0.0001, + "step": 24988 + }, + { + "epoch": 23.31, + "learning_rate": 4.708582089552239e-05, + "loss": 0.0101, + "step": 24992 + }, + { + "epoch": 23.32, + "learning_rate": 4.7085354477611944e-05, + "loss": 0.0002, + "step": 24996 + }, + { + "epoch": 23.32, + "learning_rate": 4.708488805970149e-05, + "loss": 0.0001, + "step": 25000 + }, + { + "epoch": 23.32, + "eval_exact_match": 0.7263056092843327, + "eval_exec": 0.7620889748549323, + "eval_loss": 0.3771933615207672, + "eval_runtime": 1213.1376, + "eval_samples_per_second": 0.852, + "step": 25000 + }, + { + "epoch": 23.32, + "learning_rate": 4.708442164179105e-05, + "loss": 0.0006, + "step": 25004 + }, + { + "epoch": 23.33, + "learning_rate": 4.70839552238806e-05, + "loss": 0.0035, + "step": 25008 + }, + { + "epoch": 23.33, + "learning_rate": 4.708348880597015e-05, + "loss": 0.0, + "step": 25012 + }, + { + "epoch": 23.34, + "learning_rate": 4.7083022388059705e-05, + "loss": 0.0009, + "step": 25016 + }, + { + "epoch": 23.34, + "learning_rate": 4.708255597014925e-05, + "loss": 0.0004, + "step": 25020 + }, + { + "epoch": 23.34, + "learning_rate": 4.708208955223881e-05, + "loss": 0.0027, + "step": 25024 + }, + { + "epoch": 23.35, + "learning_rate": 4.708162313432836e-05, + "loss": 0.0013, + "step": 25028 + }, + { + "epoch": 23.35, + "learning_rate": 4.708115671641791e-05, + "loss": 0.0016, + "step": 25032 + }, + { + "epoch": 23.35, + "learning_rate": 4.7080690298507466e-05, + "loss": 0.0002, + "step": 25036 + }, + { + "epoch": 23.36, + "learning_rate": 4.708022388059702e-05, + "loss": 0.0001, + "step": 25040 + }, + { + "epoch": 23.36, + "learning_rate": 4.707975746268657e-05, + "loss": 0.0003, + "step": 25044 + }, + { + "epoch": 23.37, + "learning_rate": 4.707929104477612e-05, + "loss": 0.0005, + "step": 25048 + }, + { + "epoch": 23.37, + "learning_rate": 4.707882462686567e-05, + "loss": 0.0006, + "step": 25052 + }, + { + "epoch": 23.37, + "learning_rate": 4.707835820895523e-05, + "loss": 0.008, + "step": 25056 + }, + { + "epoch": 23.38, + "learning_rate": 4.7077891791044775e-05, + "loss": 0.0058, + "step": 25060 + }, + { + "epoch": 23.38, + "learning_rate": 4.707742537313433e-05, + "loss": 0.0025, + "step": 25064 + }, + { + "epoch": 23.38, + "learning_rate": 4.7076958955223885e-05, + "loss": 0.0001, + "step": 25068 + }, + { + "epoch": 23.39, + "learning_rate": 4.707649253731343e-05, + "loss": 0.0002, + "step": 25072 + }, + { + "epoch": 23.39, + "learning_rate": 4.707602611940299e-05, + "loss": 0.0004, + "step": 25076 + }, + { + "epoch": 23.4, + "learning_rate": 4.7075559701492536e-05, + "loss": 0.0101, + "step": 25080 + }, + { + "epoch": 23.4, + "learning_rate": 4.707509328358209e-05, + "loss": 0.0012, + "step": 25084 + }, + { + "epoch": 23.4, + "learning_rate": 4.7074626865671646e-05, + "loss": 0.0007, + "step": 25088 + }, + { + "epoch": 23.41, + "learning_rate": 4.7074160447761194e-05, + "loss": 0.0014, + "step": 25092 + }, + { + "epoch": 23.41, + "learning_rate": 4.707369402985075e-05, + "loss": 0.0009, + "step": 25096 + }, + { + "epoch": 23.41, + "learning_rate": 4.7073227611940304e-05, + "loss": 0.0004, + "step": 25100 + }, + { + "epoch": 23.42, + "learning_rate": 4.707276119402985e-05, + "loss": 0.0006, + "step": 25104 + }, + { + "epoch": 23.42, + "learning_rate": 4.707229477611941e-05, + "loss": 0.0011, + "step": 25108 + }, + { + "epoch": 23.43, + "learning_rate": 4.7071828358208955e-05, + "loss": 0.0, + "step": 25112 + }, + { + "epoch": 23.43, + "learning_rate": 4.707136194029851e-05, + "loss": 0.0004, + "step": 25116 + }, + { + "epoch": 23.43, + "learning_rate": 4.7070895522388065e-05, + "loss": 0.0003, + "step": 25120 + }, + { + "epoch": 23.44, + "learning_rate": 4.707042910447761e-05, + "loss": 0.0005, + "step": 25124 + }, + { + "epoch": 23.44, + "learning_rate": 4.706996268656717e-05, + "loss": 0.0012, + "step": 25128 + }, + { + "epoch": 23.44, + "learning_rate": 4.706949626865672e-05, + "loss": 0.0, + "step": 25132 + }, + { + "epoch": 23.45, + "learning_rate": 4.706902985074627e-05, + "loss": 0.0012, + "step": 25136 + }, + { + "epoch": 23.45, + "learning_rate": 4.706856343283582e-05, + "loss": 0.0002, + "step": 25140 + }, + { + "epoch": 23.46, + "learning_rate": 4.706809701492538e-05, + "loss": 0.0078, + "step": 25144 + }, + { + "epoch": 23.46, + "learning_rate": 4.706763059701493e-05, + "loss": 0.0002, + "step": 25148 + }, + { + "epoch": 23.46, + "learning_rate": 4.706716417910448e-05, + "loss": 0.0062, + "step": 25152 + }, + { + "epoch": 23.47, + "learning_rate": 4.706669776119403e-05, + "loss": 0.0002, + "step": 25156 + }, + { + "epoch": 23.47, + "learning_rate": 4.7066231343283587e-05, + "loss": 0.0002, + "step": 25160 + }, + { + "epoch": 23.47, + "learning_rate": 4.7065764925373135e-05, + "loss": 0.003, + "step": 25164 + }, + { + "epoch": 23.48, + "learning_rate": 4.706529850746269e-05, + "loss": 0.0006, + "step": 25168 + }, + { + "epoch": 23.48, + "learning_rate": 4.706483208955224e-05, + "loss": 0.0003, + "step": 25172 + }, + { + "epoch": 23.49, + "learning_rate": 4.706436567164179e-05, + "loss": 0.0007, + "step": 25176 + }, + { + "epoch": 23.49, + "learning_rate": 4.706389925373135e-05, + "loss": 0.0002, + "step": 25180 + }, + { + "epoch": 23.49, + "learning_rate": 4.7063432835820896e-05, + "loss": 0.0005, + "step": 25184 + }, + { + "epoch": 23.5, + "learning_rate": 4.706296641791045e-05, + "loss": 0.0004, + "step": 25188 + }, + { + "epoch": 23.5, + "learning_rate": 4.7062500000000006e-05, + "loss": 0.0025, + "step": 25192 + }, + { + "epoch": 23.5, + "learning_rate": 4.7062033582089554e-05, + "loss": 0.0001, + "step": 25196 + }, + { + "epoch": 23.51, + "learning_rate": 4.70615671641791e-05, + "loss": 0.0006, + "step": 25200 + }, + { + "epoch": 23.51, + "learning_rate": 4.7061100746268663e-05, + "loss": 0.0012, + "step": 25204 + }, + { + "epoch": 23.51, + "learning_rate": 4.706063432835821e-05, + "loss": 0.0002, + "step": 25208 + }, + { + "epoch": 23.52, + "learning_rate": 4.706016791044776e-05, + "loss": 0.0003, + "step": 25212 + }, + { + "epoch": 23.52, + "learning_rate": 4.7059701492537315e-05, + "loss": 0.0005, + "step": 25216 + }, + { + "epoch": 23.53, + "learning_rate": 4.705923507462687e-05, + "loss": 0.0005, + "step": 25220 + }, + { + "epoch": 23.53, + "learning_rate": 4.705876865671642e-05, + "loss": 0.0001, + "step": 25224 + }, + { + "epoch": 23.53, + "learning_rate": 4.705830223880597e-05, + "loss": 0.0005, + "step": 25228 + }, + { + "epoch": 23.54, + "learning_rate": 4.705783582089552e-05, + "loss": 0.0128, + "step": 25232 + }, + { + "epoch": 23.54, + "learning_rate": 4.7057369402985076e-05, + "loss": 0.0123, + "step": 25236 + }, + { + "epoch": 23.54, + "learning_rate": 4.705690298507463e-05, + "loss": 0.0012, + "step": 25240 + }, + { + "epoch": 23.55, + "learning_rate": 4.705643656716418e-05, + "loss": 0.0005, + "step": 25244 + }, + { + "epoch": 23.55, + "learning_rate": 4.7055970149253734e-05, + "loss": 0.0001, + "step": 25248 + }, + { + "epoch": 23.56, + "learning_rate": 4.705550373134329e-05, + "loss": 0.0008, + "step": 25252 + }, + { + "epoch": 23.56, + "learning_rate": 4.705503731343284e-05, + "loss": 0.0002, + "step": 25256 + }, + { + "epoch": 23.56, + "learning_rate": 4.705457089552239e-05, + "loss": 0.0005, + "step": 25260 + }, + { + "epoch": 23.57, + "learning_rate": 4.7054104477611946e-05, + "loss": 0.0, + "step": 25264 + }, + { + "epoch": 23.57, + "learning_rate": 4.7053638059701495e-05, + "loss": 0.0, + "step": 25268 + }, + { + "epoch": 23.57, + "learning_rate": 4.705317164179105e-05, + "loss": 0.0008, + "step": 25272 + }, + { + "epoch": 23.58, + "learning_rate": 4.70527052238806e-05, + "loss": 0.0023, + "step": 25276 + }, + { + "epoch": 23.58, + "learning_rate": 4.705223880597015e-05, + "loss": 0.0004, + "step": 25280 + }, + { + "epoch": 23.59, + "learning_rate": 4.705177238805971e-05, + "loss": 0.0054, + "step": 25284 + }, + { + "epoch": 23.59, + "learning_rate": 4.7051305970149256e-05, + "loss": 0.0, + "step": 25288 + }, + { + "epoch": 23.59, + "learning_rate": 4.7050839552238804e-05, + "loss": 0.0004, + "step": 25292 + }, + { + "epoch": 23.6, + "learning_rate": 4.7050373134328365e-05, + "loss": 0.0001, + "step": 25296 + }, + { + "epoch": 23.6, + "learning_rate": 4.7049906716417914e-05, + "loss": 0.0001, + "step": 25300 + }, + { + "epoch": 23.6, + "learning_rate": 4.704944029850746e-05, + "loss": 0.001, + "step": 25304 + }, + { + "epoch": 23.61, + "learning_rate": 4.7048973880597017e-05, + "loss": 0.0003, + "step": 25308 + }, + { + "epoch": 23.61, + "learning_rate": 4.704850746268657e-05, + "loss": 0.0029, + "step": 25312 + }, + { + "epoch": 23.62, + "learning_rate": 4.704804104477612e-05, + "loss": 0.0015, + "step": 25316 + }, + { + "epoch": 23.62, + "learning_rate": 4.7047574626865674e-05, + "loss": 0.0019, + "step": 25320 + }, + { + "epoch": 23.62, + "learning_rate": 4.704710820895523e-05, + "loss": 0.0018, + "step": 25324 + }, + { + "epoch": 23.63, + "learning_rate": 4.704664179104478e-05, + "loss": 0.0, + "step": 25328 + }, + { + "epoch": 23.63, + "learning_rate": 4.704617537313433e-05, + "loss": 0.0007, + "step": 25332 + }, + { + "epoch": 23.63, + "learning_rate": 4.704570895522388e-05, + "loss": 0.004, + "step": 25336 + }, + { + "epoch": 23.64, + "learning_rate": 4.7045242537313435e-05, + "loss": 0.0004, + "step": 25340 + }, + { + "epoch": 23.64, + "learning_rate": 4.704477611940299e-05, + "loss": 0.0002, + "step": 25344 + }, + { + "epoch": 23.65, + "learning_rate": 4.704430970149254e-05, + "loss": 0.0033, + "step": 25348 + }, + { + "epoch": 23.65, + "learning_rate": 4.704384328358209e-05, + "loss": 0.0004, + "step": 25352 + }, + { + "epoch": 23.65, + "learning_rate": 4.704337686567165e-05, + "loss": 0.0073, + "step": 25356 + }, + { + "epoch": 23.66, + "learning_rate": 4.7042910447761196e-05, + "loss": 0.0004, + "step": 25360 + }, + { + "epoch": 23.66, + "learning_rate": 4.7042444029850745e-05, + "loss": 0.0017, + "step": 25364 + }, + { + "epoch": 23.66, + "learning_rate": 4.70419776119403e-05, + "loss": 0.0078, + "step": 25368 + }, + { + "epoch": 23.67, + "learning_rate": 4.7041511194029854e-05, + "loss": 0.0002, + "step": 25372 + }, + { + "epoch": 23.67, + "learning_rate": 4.70410447761194e-05, + "loss": 0.0006, + "step": 25376 + }, + { + "epoch": 23.68, + "learning_rate": 4.704057835820896e-05, + "loss": 0.0027, + "step": 25380 + }, + { + "epoch": 23.68, + "learning_rate": 4.704011194029851e-05, + "loss": 0.0021, + "step": 25384 + }, + { + "epoch": 23.68, + "learning_rate": 4.703964552238806e-05, + "loss": 0.0059, + "step": 25388 + }, + { + "epoch": 23.69, + "learning_rate": 4.7039179104477615e-05, + "loss": 0.0002, + "step": 25392 + }, + { + "epoch": 23.69, + "learning_rate": 4.7038712686567164e-05, + "loss": 0.0048, + "step": 25396 + }, + { + "epoch": 23.69, + "learning_rate": 4.703824626865672e-05, + "loss": 0.0018, + "step": 25400 + }, + { + "epoch": 23.7, + "learning_rate": 4.703777985074627e-05, + "loss": 0.0001, + "step": 25404 + }, + { + "epoch": 23.7, + "learning_rate": 4.703731343283582e-05, + "loss": 0.0035, + "step": 25408 + }, + { + "epoch": 23.71, + "learning_rate": 4.7036847014925376e-05, + "loss": 0.0006, + "step": 25412 + }, + { + "epoch": 23.71, + "learning_rate": 4.703638059701493e-05, + "loss": 0.0004, + "step": 25416 + }, + { + "epoch": 23.71, + "learning_rate": 4.703591417910448e-05, + "loss": 0.0, + "step": 25420 + }, + { + "epoch": 23.72, + "learning_rate": 4.7035447761194034e-05, + "loss": 0.0002, + "step": 25424 + }, + { + "epoch": 23.72, + "learning_rate": 4.703498134328358e-05, + "loss": 0.0019, + "step": 25428 + }, + { + "epoch": 23.72, + "learning_rate": 4.703451492537314e-05, + "loss": 0.0001, + "step": 25432 + }, + { + "epoch": 23.73, + "learning_rate": 4.703404850746269e-05, + "loss": 0.0087, + "step": 25436 + }, + { + "epoch": 23.73, + "learning_rate": 4.703358208955224e-05, + "loss": 0.0002, + "step": 25440 + }, + { + "epoch": 23.73, + "learning_rate": 4.7033115671641795e-05, + "loss": 0.0046, + "step": 25444 + }, + { + "epoch": 23.74, + "learning_rate": 4.703264925373135e-05, + "loss": 0.0002, + "step": 25448 + }, + { + "epoch": 23.74, + "learning_rate": 4.70321828358209e-05, + "loss": 0.0007, + "step": 25452 + }, + { + "epoch": 23.75, + "learning_rate": 4.7031716417910446e-05, + "loss": 0.0033, + "step": 25456 + }, + { + "epoch": 23.75, + "learning_rate": 4.703125e-05, + "loss": 0.0002, + "step": 25460 + }, + { + "epoch": 23.75, + "learning_rate": 4.7030783582089556e-05, + "loss": 0.0004, + "step": 25464 + }, + { + "epoch": 23.76, + "learning_rate": 4.7030317164179104e-05, + "loss": 0.0019, + "step": 25468 + }, + { + "epoch": 23.76, + "learning_rate": 4.702985074626866e-05, + "loss": 0.0056, + "step": 25472 + }, + { + "epoch": 23.76, + "learning_rate": 4.7029384328358214e-05, + "loss": 0.0001, + "step": 25476 + }, + { + "epoch": 23.77, + "learning_rate": 4.702891791044776e-05, + "loss": 0.0001, + "step": 25480 + }, + { + "epoch": 23.77, + "learning_rate": 4.702845149253732e-05, + "loss": 0.0007, + "step": 25484 + }, + { + "epoch": 23.78, + "learning_rate": 4.7027985074626865e-05, + "loss": 0.0028, + "step": 25488 + }, + { + "epoch": 23.78, + "learning_rate": 4.702751865671642e-05, + "loss": 0.0002, + "step": 25492 + }, + { + "epoch": 23.78, + "learning_rate": 4.7027052238805975e-05, + "loss": 0.0005, + "step": 25496 + }, + { + "epoch": 23.79, + "learning_rate": 4.702658582089552e-05, + "loss": 0.0029, + "step": 25500 + }, + { + "epoch": 23.79, + "eval_exact_match": 0.7369439071566731, + "eval_exec": 0.7611218568665378, + "eval_loss": 0.3535132110118866, + "eval_runtime": 1197.1055, + "eval_samples_per_second": 0.864, + "step": 25500 + }, + { + "epoch": 23.79, + "learning_rate": 4.702611940298508e-05, + "loss": 0.0041, + "step": 25504 + }, + { + "epoch": 23.79, + "learning_rate": 4.702565298507463e-05, + "loss": 0.0005, + "step": 25508 + }, + { + "epoch": 23.8, + "learning_rate": 4.702518656716418e-05, + "loss": 0.0014, + "step": 25512 + }, + { + "epoch": 23.8, + "learning_rate": 4.702472014925373e-05, + "loss": 0.0017, + "step": 25516 + }, + { + "epoch": 23.81, + "learning_rate": 4.7024253731343284e-05, + "loss": 0.0065, + "step": 25520 + }, + { + "epoch": 23.81, + "learning_rate": 4.702378731343284e-05, + "loss": 0.0002, + "step": 25524 + }, + { + "epoch": 23.81, + "learning_rate": 4.702332089552239e-05, + "loss": 0.0034, + "step": 25528 + }, + { + "epoch": 23.82, + "learning_rate": 4.702285447761194e-05, + "loss": 0.0004, + "step": 25532 + }, + { + "epoch": 23.82, + "learning_rate": 4.70223880597015e-05, + "loss": 0.0005, + "step": 25536 + }, + { + "epoch": 23.82, + "learning_rate": 4.7021921641791045e-05, + "loss": 0.004, + "step": 25540 + }, + { + "epoch": 23.83, + "learning_rate": 4.70214552238806e-05, + "loss": 0.002, + "step": 25544 + }, + { + "epoch": 23.83, + "learning_rate": 4.702098880597015e-05, + "loss": 0.0006, + "step": 25548 + }, + { + "epoch": 23.84, + "learning_rate": 4.70205223880597e-05, + "loss": 0.0009, + "step": 25552 + }, + { + "epoch": 23.84, + "learning_rate": 4.702005597014926e-05, + "loss": 0.0043, + "step": 25556 + }, + { + "epoch": 23.84, + "learning_rate": 4.7019589552238806e-05, + "loss": 0.0052, + "step": 25560 + }, + { + "epoch": 23.85, + "learning_rate": 4.7019123134328354e-05, + "loss": 0.0047, + "step": 25564 + }, + { + "epoch": 23.85, + "learning_rate": 4.7018656716417916e-05, + "loss": 0.0028, + "step": 25568 + }, + { + "epoch": 23.85, + "learning_rate": 4.7018190298507464e-05, + "loss": 0.0003, + "step": 25572 + }, + { + "epoch": 23.86, + "learning_rate": 4.701772388059702e-05, + "loss": 0.0004, + "step": 25576 + }, + { + "epoch": 23.86, + "learning_rate": 4.701725746268657e-05, + "loss": 0.0006, + "step": 25580 + }, + { + "epoch": 23.87, + "learning_rate": 4.701679104477612e-05, + "loss": 0.0011, + "step": 25584 + }, + { + "epoch": 23.87, + "learning_rate": 4.701632462686568e-05, + "loss": 0.0001, + "step": 25588 + }, + { + "epoch": 23.87, + "learning_rate": 4.7015858208955225e-05, + "loss": 0.0003, + "step": 25592 + }, + { + "epoch": 23.88, + "learning_rate": 4.701539179104478e-05, + "loss": 0.0046, + "step": 25596 + }, + { + "epoch": 23.88, + "learning_rate": 4.7014925373134335e-05, + "loss": 0.0064, + "step": 25600 + }, + { + "epoch": 23.88, + "learning_rate": 4.701445895522388e-05, + "loss": 0.0011, + "step": 25604 + }, + { + "epoch": 23.89, + "learning_rate": 4.701399253731343e-05, + "loss": 0.0002, + "step": 25608 + }, + { + "epoch": 23.89, + "learning_rate": 4.701352611940299e-05, + "loss": 0.0006, + "step": 25612 + }, + { + "epoch": 23.9, + "learning_rate": 4.701305970149254e-05, + "loss": 0.0022, + "step": 25616 + }, + { + "epoch": 23.9, + "learning_rate": 4.701259328358209e-05, + "loss": 0.0024, + "step": 25620 + }, + { + "epoch": 23.9, + "learning_rate": 4.7012126865671644e-05, + "loss": 0.0019, + "step": 25624 + }, + { + "epoch": 23.91, + "learning_rate": 4.70116604477612e-05, + "loss": 0.0004, + "step": 25628 + }, + { + "epoch": 23.91, + "learning_rate": 4.701119402985075e-05, + "loss": 0.0001, + "step": 25632 + }, + { + "epoch": 23.91, + "learning_rate": 4.70107276119403e-05, + "loss": 0.0006, + "step": 25636 + }, + { + "epoch": 23.92, + "learning_rate": 4.701026119402985e-05, + "loss": 0.0044, + "step": 25640 + }, + { + "epoch": 23.92, + "learning_rate": 4.7009794776119405e-05, + "loss": 0.0003, + "step": 25644 + }, + { + "epoch": 23.93, + "learning_rate": 4.700932835820896e-05, + "loss": 0.0001, + "step": 25648 + }, + { + "epoch": 23.93, + "learning_rate": 4.700886194029851e-05, + "loss": 0.0002, + "step": 25652 + }, + { + "epoch": 23.93, + "learning_rate": 4.700839552238806e-05, + "loss": 0.0016, + "step": 25656 + }, + { + "epoch": 23.94, + "learning_rate": 4.700792910447762e-05, + "loss": 0.0002, + "step": 25660 + }, + { + "epoch": 23.94, + "learning_rate": 4.7007462686567166e-05, + "loss": 0.0004, + "step": 25664 + }, + { + "epoch": 23.94, + "learning_rate": 4.7006996268656714e-05, + "loss": 0.0001, + "step": 25668 + }, + { + "epoch": 23.95, + "learning_rate": 4.7006529850746276e-05, + "loss": 0.0001, + "step": 25672 + }, + { + "epoch": 23.95, + "learning_rate": 4.7006063432835824e-05, + "loss": 0.0003, + "step": 25676 + }, + { + "epoch": 23.96, + "learning_rate": 4.700559701492537e-05, + "loss": 0.0012, + "step": 25680 + }, + { + "epoch": 23.96, + "learning_rate": 4.700513059701493e-05, + "loss": 0.0006, + "step": 25684 + }, + { + "epoch": 23.96, + "learning_rate": 4.700466417910448e-05, + "loss": 0.0028, + "step": 25688 + }, + { + "epoch": 23.97, + "learning_rate": 4.700419776119403e-05, + "loss": 0.0014, + "step": 25692 + }, + { + "epoch": 23.97, + "learning_rate": 4.7003731343283585e-05, + "loss": 0.0002, + "step": 25696 + }, + { + "epoch": 23.97, + "learning_rate": 4.700326492537313e-05, + "loss": 0.0001, + "step": 25700 + }, + { + "epoch": 23.98, + "learning_rate": 4.700279850746269e-05, + "loss": 0.0004, + "step": 25704 + }, + { + "epoch": 23.98, + "learning_rate": 4.700233208955224e-05, + "loss": 0.0058, + "step": 25708 + }, + { + "epoch": 23.98, + "learning_rate": 4.700186567164179e-05, + "loss": 0.0001, + "step": 25712 + }, + { + "epoch": 23.99, + "learning_rate": 4.7001399253731346e-05, + "loss": 0.005, + "step": 25716 + }, + { + "epoch": 23.99, + "learning_rate": 4.70009328358209e-05, + "loss": 0.0002, + "step": 25720 + }, + { + "epoch": 24.0, + "learning_rate": 4.700046641791045e-05, + "loss": 0.0001, + "step": 25724 + }, + { + "epoch": 24.0, + "learning_rate": 4.7e-05, + "loss": 0.0021, + "step": 25728 + }, + { + "epoch": 24.0, + "learning_rate": 4.699953358208955e-05, + "loss": 0.0013, + "step": 25732 + }, + { + "epoch": 24.01, + "learning_rate": 4.699906716417911e-05, + "loss": 0.0007, + "step": 25736 + }, + { + "epoch": 24.01, + "learning_rate": 4.699860074626866e-05, + "loss": 0.0076, + "step": 25740 + }, + { + "epoch": 24.01, + "learning_rate": 4.699813432835821e-05, + "loss": 0.0079, + "step": 25744 + }, + { + "epoch": 24.02, + "learning_rate": 4.6997667910447765e-05, + "loss": 0.0001, + "step": 25748 + }, + { + "epoch": 24.02, + "learning_rate": 4.699720149253732e-05, + "loss": 0.0084, + "step": 25752 + }, + { + "epoch": 24.03, + "learning_rate": 4.699673507462687e-05, + "loss": 0.0021, + "step": 25756 + }, + { + "epoch": 24.03, + "learning_rate": 4.6996268656716416e-05, + "loss": 0.0029, + "step": 25760 + }, + { + "epoch": 24.03, + "learning_rate": 4.699580223880598e-05, + "loss": 0.0023, + "step": 25764 + }, + { + "epoch": 24.04, + "learning_rate": 4.6995335820895526e-05, + "loss": 0.0001, + "step": 25768 + }, + { + "epoch": 24.04, + "learning_rate": 4.6994869402985074e-05, + "loss": 0.0003, + "step": 25772 + }, + { + "epoch": 24.04, + "learning_rate": 4.699440298507463e-05, + "loss": 0.0002, + "step": 25776 + }, + { + "epoch": 24.05, + "learning_rate": 4.6993936567164184e-05, + "loss": 0.001, + "step": 25780 + }, + { + "epoch": 24.05, + "learning_rate": 4.699347014925373e-05, + "loss": 0.0001, + "step": 25784 + }, + { + "epoch": 24.06, + "learning_rate": 4.699300373134329e-05, + "loss": 0.0008, + "step": 25788 + }, + { + "epoch": 24.06, + "learning_rate": 4.6992537313432835e-05, + "loss": 0.0007, + "step": 25792 + }, + { + "epoch": 24.06, + "learning_rate": 4.699207089552239e-05, + "loss": 0.0001, + "step": 25796 + }, + { + "epoch": 24.07, + "learning_rate": 4.6991604477611945e-05, + "loss": 0.0021, + "step": 25800 + }, + { + "epoch": 24.07, + "learning_rate": 4.699113805970149e-05, + "loss": 0.0002, + "step": 25804 + }, + { + "epoch": 24.07, + "learning_rate": 4.699067164179105e-05, + "loss": 0.0013, + "step": 25808 + }, + { + "epoch": 24.08, + "learning_rate": 4.69902052238806e-05, + "loss": 0.0077, + "step": 25812 + }, + { + "epoch": 24.08, + "learning_rate": 4.698973880597015e-05, + "loss": 0.0002, + "step": 25816 + }, + { + "epoch": 24.09, + "learning_rate": 4.69892723880597e-05, + "loss": 0.0005, + "step": 25820 + }, + { + "epoch": 24.09, + "learning_rate": 4.698880597014926e-05, + "loss": 0.0043, + "step": 25824 + }, + { + "epoch": 24.09, + "learning_rate": 4.698833955223881e-05, + "loss": 0.0001, + "step": 25828 + }, + { + "epoch": 24.1, + "learning_rate": 4.698787313432836e-05, + "loss": 0.0002, + "step": 25832 + }, + { + "epoch": 24.1, + "learning_rate": 4.698740671641791e-05, + "loss": 0.0002, + "step": 25836 + }, + { + "epoch": 24.1, + "learning_rate": 4.698694029850747e-05, + "loss": 0.0008, + "step": 25840 + }, + { + "epoch": 24.11, + "learning_rate": 4.6986473880597015e-05, + "loss": 0.0008, + "step": 25844 + }, + { + "epoch": 24.11, + "learning_rate": 4.698600746268657e-05, + "loss": 0.0014, + "step": 25848 + }, + { + "epoch": 24.12, + "learning_rate": 4.698554104477612e-05, + "loss": 0.0018, + "step": 25852 + }, + { + "epoch": 24.12, + "learning_rate": 4.698507462686567e-05, + "loss": 0.0002, + "step": 25856 + }, + { + "epoch": 24.12, + "learning_rate": 4.698460820895523e-05, + "loss": 0.0042, + "step": 25860 + }, + { + "epoch": 24.13, + "learning_rate": 4.6984141791044776e-05, + "loss": 0.0026, + "step": 25864 + }, + { + "epoch": 24.13, + "learning_rate": 4.698367537313433e-05, + "loss": 0.002, + "step": 25868 + }, + { + "epoch": 24.13, + "learning_rate": 4.6983208955223886e-05, + "loss": 0.001, + "step": 25872 + }, + { + "epoch": 24.14, + "learning_rate": 4.6982742537313434e-05, + "loss": 0.0002, + "step": 25876 + }, + { + "epoch": 24.14, + "learning_rate": 4.698227611940298e-05, + "loss": 0.0003, + "step": 25880 + }, + { + "epoch": 24.15, + "learning_rate": 4.6981809701492544e-05, + "loss": 0.0003, + "step": 25884 + }, + { + "epoch": 24.15, + "learning_rate": 4.698134328358209e-05, + "loss": 0.0002, + "step": 25888 + }, + { + "epoch": 24.15, + "learning_rate": 4.698087686567164e-05, + "loss": 0.0003, + "step": 25892 + }, + { + "epoch": 24.16, + "learning_rate": 4.6980410447761195e-05, + "loss": 0.0001, + "step": 25896 + }, + { + "epoch": 24.16, + "learning_rate": 4.697994402985075e-05, + "loss": 0.0001, + "step": 25900 + }, + { + "epoch": 24.16, + "learning_rate": 4.6979477611940305e-05, + "loss": 0.0001, + "step": 25904 + }, + { + "epoch": 24.17, + "learning_rate": 4.697901119402985e-05, + "loss": 0.0002, + "step": 25908 + }, + { + "epoch": 24.17, + "learning_rate": 4.69785447761194e-05, + "loss": 0.0003, + "step": 25912 + }, + { + "epoch": 24.18, + "learning_rate": 4.697807835820896e-05, + "loss": 0.0002, + "step": 25916 + }, + { + "epoch": 24.18, + "learning_rate": 4.697761194029851e-05, + "loss": 0.0002, + "step": 25920 + }, + { + "epoch": 24.18, + "learning_rate": 4.697714552238806e-05, + "loss": 0.0034, + "step": 25924 + }, + { + "epoch": 24.19, + "learning_rate": 4.6976679104477614e-05, + "loss": 0.0027, + "step": 25928 + }, + { + "epoch": 24.19, + "learning_rate": 4.697621268656717e-05, + "loss": 0.001, + "step": 25932 + }, + { + "epoch": 24.19, + "learning_rate": 4.697574626865672e-05, + "loss": 0.0009, + "step": 25936 + }, + { + "epoch": 24.2, + "learning_rate": 4.697527985074627e-05, + "loss": 0.0001, + "step": 25940 + }, + { + "epoch": 24.2, + "learning_rate": 4.697481343283583e-05, + "loss": 0.0003, + "step": 25944 + }, + { + "epoch": 24.21, + "learning_rate": 4.6974347014925375e-05, + "loss": 0.0002, + "step": 25948 + }, + { + "epoch": 24.21, + "learning_rate": 4.697388059701493e-05, + "loss": 0.0006, + "step": 25952 + }, + { + "epoch": 24.21, + "learning_rate": 4.697341417910448e-05, + "loss": 0.0006, + "step": 25956 + }, + { + "epoch": 24.22, + "learning_rate": 4.697294776119403e-05, + "loss": 0.0022, + "step": 25960 + }, + { + "epoch": 24.22, + "learning_rate": 4.697248134328359e-05, + "loss": 0.0067, + "step": 25964 + }, + { + "epoch": 24.22, + "learning_rate": 4.6972014925373136e-05, + "loss": 0.0022, + "step": 25968 + }, + { + "epoch": 24.23, + "learning_rate": 4.6971548507462684e-05, + "loss": 0.0004, + "step": 25972 + }, + { + "epoch": 24.23, + "learning_rate": 4.6971082089552246e-05, + "loss": 0.0001, + "step": 25976 + }, + { + "epoch": 24.24, + "learning_rate": 4.6970615671641794e-05, + "loss": 0.0108, + "step": 25980 + }, + { + "epoch": 24.24, + "learning_rate": 4.697014925373134e-05, + "loss": 0.0007, + "step": 25984 + }, + { + "epoch": 24.24, + "learning_rate": 4.69696828358209e-05, + "loss": 0.0003, + "step": 25988 + }, + { + "epoch": 24.25, + "learning_rate": 4.696921641791045e-05, + "loss": 0.0008, + "step": 25992 + }, + { + "epoch": 24.25, + "learning_rate": 4.696875e-05, + "loss": 0.0006, + "step": 25996 + }, + { + "epoch": 24.25, + "learning_rate": 4.6968283582089555e-05, + "loss": 0.0008, + "step": 26000 + }, + { + "epoch": 24.25, + "eval_exact_match": 0.7330754352030948, + "eval_exec": 0.7678916827852998, + "eval_loss": 0.36795204877853394, + "eval_runtime": 1555.2766, + "eval_samples_per_second": 0.665, + "step": 26000 + }, + { + "epoch": 24.26, + "learning_rate": 4.696781716417911e-05, + "loss": 0.0003, + "step": 26004 + }, + { + "epoch": 24.26, + "learning_rate": 4.696735074626866e-05, + "loss": 0.0002, + "step": 26008 + }, + { + "epoch": 24.26, + "learning_rate": 4.696688432835821e-05, + "loss": 0.0018, + "step": 26012 + }, + { + "epoch": 24.27, + "learning_rate": 4.696641791044776e-05, + "loss": 0.0021, + "step": 26016 + }, + { + "epoch": 24.27, + "learning_rate": 4.6965951492537316e-05, + "loss": 0.0, + "step": 26020 + }, + { + "epoch": 24.28, + "learning_rate": 4.696548507462687e-05, + "loss": 0.0002, + "step": 26024 + }, + { + "epoch": 24.28, + "learning_rate": 4.696501865671642e-05, + "loss": 0.0002, + "step": 26028 + }, + { + "epoch": 24.28, + "learning_rate": 4.696455223880597e-05, + "loss": 0.0048, + "step": 26032 + }, + { + "epoch": 24.29, + "learning_rate": 4.696408582089553e-05, + "loss": 0.0005, + "step": 26036 + }, + { + "epoch": 24.29, + "learning_rate": 4.696361940298508e-05, + "loss": 0.0003, + "step": 26040 + }, + { + "epoch": 24.29, + "learning_rate": 4.6963152985074625e-05, + "loss": 0.0004, + "step": 26044 + }, + { + "epoch": 24.3, + "learning_rate": 4.696268656716418e-05, + "loss": 0.0064, + "step": 26048 + }, + { + "epoch": 24.3, + "learning_rate": 4.6962220149253735e-05, + "loss": 0.0003, + "step": 26052 + }, + { + "epoch": 24.31, + "learning_rate": 4.696175373134328e-05, + "loss": 0.0032, + "step": 26056 + }, + { + "epoch": 24.31, + "learning_rate": 4.696128731343284e-05, + "loss": 0.001, + "step": 26060 + }, + { + "epoch": 24.31, + "learning_rate": 4.696082089552239e-05, + "loss": 0.0002, + "step": 26064 + }, + { + "epoch": 24.32, + "learning_rate": 4.696035447761195e-05, + "loss": 0.0021, + "step": 26068 + }, + { + "epoch": 24.32, + "learning_rate": 4.6959888059701496e-05, + "loss": 0.0005, + "step": 26072 + }, + { + "epoch": 24.32, + "learning_rate": 4.6959421641791044e-05, + "loss": 0.0007, + "step": 26076 + }, + { + "epoch": 24.33, + "learning_rate": 4.69589552238806e-05, + "loss": 0.0006, + "step": 26080 + }, + { + "epoch": 24.33, + "learning_rate": 4.6958488805970154e-05, + "loss": 0.0029, + "step": 26084 + }, + { + "epoch": 24.34, + "learning_rate": 4.69580223880597e-05, + "loss": 0.0002, + "step": 26088 + }, + { + "epoch": 24.34, + "learning_rate": 4.695755597014926e-05, + "loss": 0.0014, + "step": 26092 + }, + { + "epoch": 24.34, + "learning_rate": 4.695708955223881e-05, + "loss": 0.0, + "step": 26096 + }, + { + "epoch": 24.35, + "learning_rate": 4.695662313432836e-05, + "loss": 0.0015, + "step": 26100 + }, + { + "epoch": 24.35, + "learning_rate": 4.6956156716417915e-05, + "loss": 0.0001, + "step": 26104 + }, + { + "epoch": 24.35, + "learning_rate": 4.695569029850746e-05, + "loss": 0.0002, + "step": 26108 + }, + { + "epoch": 24.36, + "learning_rate": 4.695522388059702e-05, + "loss": 0.0002, + "step": 26112 + }, + { + "epoch": 24.36, + "learning_rate": 4.695475746268657e-05, + "loss": 0.0002, + "step": 26116 + }, + { + "epoch": 24.37, + "learning_rate": 4.695429104477612e-05, + "loss": 0.0002, + "step": 26120 + }, + { + "epoch": 24.37, + "learning_rate": 4.6953824626865676e-05, + "loss": 0.0012, + "step": 26124 + }, + { + "epoch": 24.37, + "learning_rate": 4.695335820895523e-05, + "loss": 0.0001, + "step": 26128 + }, + { + "epoch": 24.38, + "learning_rate": 4.695289179104478e-05, + "loss": 0.0002, + "step": 26132 + }, + { + "epoch": 24.38, + "learning_rate": 4.695242537313433e-05, + "loss": 0.0003, + "step": 26136 + }, + { + "epoch": 24.38, + "learning_rate": 4.695195895522388e-05, + "loss": 0.0046, + "step": 26140 + }, + { + "epoch": 24.39, + "learning_rate": 4.6951492537313437e-05, + "loss": 0.0003, + "step": 26144 + }, + { + "epoch": 24.39, + "learning_rate": 4.6951026119402985e-05, + "loss": 0.0003, + "step": 26148 + }, + { + "epoch": 24.4, + "learning_rate": 4.695055970149254e-05, + "loss": 0.0003, + "step": 26152 + }, + { + "epoch": 24.4, + "learning_rate": 4.6950093283582095e-05, + "loss": 0.0004, + "step": 26156 + }, + { + "epoch": 24.4, + "learning_rate": 4.694962686567164e-05, + "loss": 0.0003, + "step": 26160 + }, + { + "epoch": 24.41, + "learning_rate": 4.69491604477612e-05, + "loss": 0.0001, + "step": 26164 + }, + { + "epoch": 24.41, + "learning_rate": 4.6948694029850746e-05, + "loss": 0.001, + "step": 26168 + }, + { + "epoch": 24.41, + "learning_rate": 4.69482276119403e-05, + "loss": 0.0015, + "step": 26172 + }, + { + "epoch": 24.42, + "learning_rate": 4.6947761194029856e-05, + "loss": 0.0002, + "step": 26176 + }, + { + "epoch": 24.42, + "learning_rate": 4.6947294776119404e-05, + "loss": 0.0003, + "step": 26180 + }, + { + "epoch": 24.43, + "learning_rate": 4.694682835820896e-05, + "loss": 0.0017, + "step": 26184 + }, + { + "epoch": 24.43, + "learning_rate": 4.6946361940298513e-05, + "loss": 0.0001, + "step": 26188 + }, + { + "epoch": 24.43, + "learning_rate": 4.694589552238806e-05, + "loss": 0.0001, + "step": 26192 + }, + { + "epoch": 24.44, + "learning_rate": 4.694542910447761e-05, + "loss": 0.001, + "step": 26196 + }, + { + "epoch": 24.44, + "learning_rate": 4.6944962686567165e-05, + "loss": 0.0061, + "step": 26200 + }, + { + "epoch": 24.44, + "learning_rate": 4.694449626865672e-05, + "loss": 0.0005, + "step": 26204 + }, + { + "epoch": 24.45, + "learning_rate": 4.694402985074627e-05, + "loss": 0.0012, + "step": 26208 + }, + { + "epoch": 24.45, + "learning_rate": 4.694356343283582e-05, + "loss": 0.0001, + "step": 26212 + }, + { + "epoch": 24.46, + "learning_rate": 4.694309701492538e-05, + "loss": 0.0, + "step": 26216 + }, + { + "epoch": 24.46, + "learning_rate": 4.6942630597014926e-05, + "loss": 0.0, + "step": 26220 + }, + { + "epoch": 24.46, + "learning_rate": 4.694216417910448e-05, + "loss": 0.0006, + "step": 26224 + }, + { + "epoch": 24.47, + "learning_rate": 4.694169776119403e-05, + "loss": 0.0, + "step": 26228 + }, + { + "epoch": 24.47, + "learning_rate": 4.694123134328359e-05, + "loss": 0.0011, + "step": 26232 + }, + { + "epoch": 24.47, + "learning_rate": 4.694076492537314e-05, + "loss": 0.0003, + "step": 26236 + }, + { + "epoch": 24.48, + "learning_rate": 4.6940298507462687e-05, + "loss": 0.0007, + "step": 26240 + }, + { + "epoch": 24.48, + "learning_rate": 4.693983208955224e-05, + "loss": 0.0036, + "step": 26244 + }, + { + "epoch": 24.49, + "learning_rate": 4.6939365671641796e-05, + "loss": 0.0004, + "step": 26248 + }, + { + "epoch": 24.49, + "learning_rate": 4.6938899253731345e-05, + "loss": 0.0017, + "step": 26252 + }, + { + "epoch": 24.49, + "learning_rate": 4.69384328358209e-05, + "loss": 0.0001, + "step": 26256 + }, + { + "epoch": 24.5, + "learning_rate": 4.693796641791045e-05, + "loss": 0.0001, + "step": 26260 + }, + { + "epoch": 24.5, + "learning_rate": 4.69375e-05, + "loss": 0.0002, + "step": 26264 + }, + { + "epoch": 24.5, + "learning_rate": 4.693703358208956e-05, + "loss": 0.0016, + "step": 26268 + }, + { + "epoch": 24.51, + "learning_rate": 4.6936567164179106e-05, + "loss": 0.0008, + "step": 26272 + }, + { + "epoch": 24.51, + "learning_rate": 4.693610074626866e-05, + "loss": 0.0031, + "step": 26276 + }, + { + "epoch": 24.51, + "learning_rate": 4.6935634328358215e-05, + "loss": 0.0004, + "step": 26280 + }, + { + "epoch": 24.52, + "learning_rate": 4.6935167910447763e-05, + "loss": 0.0013, + "step": 26284 + }, + { + "epoch": 24.52, + "learning_rate": 4.693470149253731e-05, + "loss": 0.0001, + "step": 26288 + }, + { + "epoch": 24.53, + "learning_rate": 4.693423507462687e-05, + "loss": 0.0001, + "step": 26292 + }, + { + "epoch": 24.53, + "learning_rate": 4.693376865671642e-05, + "loss": 0.0015, + "step": 26296 + }, + { + "epoch": 24.53, + "learning_rate": 4.693330223880597e-05, + "loss": 0.0008, + "step": 26300 + }, + { + "epoch": 24.54, + "learning_rate": 4.6932835820895524e-05, + "loss": 0.0001, + "step": 26304 + }, + { + "epoch": 24.54, + "learning_rate": 4.693236940298508e-05, + "loss": 0.0003, + "step": 26308 + }, + { + "epoch": 24.54, + "learning_rate": 4.693190298507463e-05, + "loss": 0.0002, + "step": 26312 + }, + { + "epoch": 24.55, + "learning_rate": 4.693143656716418e-05, + "loss": 0.0005, + "step": 26316 + }, + { + "epoch": 24.55, + "learning_rate": 4.693097014925373e-05, + "loss": 0.0002, + "step": 26320 + }, + { + "epoch": 24.56, + "learning_rate": 4.6930503731343285e-05, + "loss": 0.0001, + "step": 26324 + }, + { + "epoch": 24.56, + "learning_rate": 4.693003731343284e-05, + "loss": 0.0004, + "step": 26328 + }, + { + "epoch": 24.56, + "learning_rate": 4.692957089552239e-05, + "loss": 0.002, + "step": 26332 + }, + { + "epoch": 24.57, + "learning_rate": 4.6929104477611943e-05, + "loss": 0.0001, + "step": 26336 + }, + { + "epoch": 24.57, + "learning_rate": 4.69286380597015e-05, + "loss": 0.0005, + "step": 26340 + }, + { + "epoch": 24.57, + "learning_rate": 4.6928171641791046e-05, + "loss": 0.0063, + "step": 26344 + }, + { + "epoch": 24.58, + "learning_rate": 4.6927705223880595e-05, + "loss": 0.0003, + "step": 26348 + }, + { + "epoch": 24.58, + "learning_rate": 4.6927238805970156e-05, + "loss": 0.0011, + "step": 26352 + }, + { + "epoch": 24.59, + "learning_rate": 4.6926772388059704e-05, + "loss": 0.0027, + "step": 26356 + }, + { + "epoch": 24.59, + "learning_rate": 4.692630597014925e-05, + "loss": 0.0006, + "step": 26360 + }, + { + "epoch": 24.59, + "learning_rate": 4.692583955223881e-05, + "loss": 0.0011, + "step": 26364 + }, + { + "epoch": 24.6, + "learning_rate": 4.692537313432836e-05, + "loss": 0.0004, + "step": 26368 + }, + { + "epoch": 24.6, + "learning_rate": 4.692490671641791e-05, + "loss": 0.0005, + "step": 26372 + }, + { + "epoch": 24.6, + "learning_rate": 4.6924440298507465e-05, + "loss": 0.0002, + "step": 26376 + }, + { + "epoch": 24.61, + "learning_rate": 4.6923973880597014e-05, + "loss": 0.0004, + "step": 26380 + }, + { + "epoch": 24.61, + "learning_rate": 4.692350746268657e-05, + "loss": 0.0011, + "step": 26384 + }, + { + "epoch": 24.62, + "learning_rate": 4.692304104477612e-05, + "loss": 0.0001, + "step": 26388 + }, + { + "epoch": 24.62, + "learning_rate": 4.692257462686567e-05, + "loss": 0.0, + "step": 26392 + }, + { + "epoch": 24.62, + "learning_rate": 4.6922108208955226e-05, + "loss": 0.0006, + "step": 26396 + }, + { + "epoch": 24.63, + "learning_rate": 4.692164179104478e-05, + "loss": 0.0, + "step": 26400 + }, + { + "epoch": 24.63, + "learning_rate": 4.692117537313433e-05, + "loss": 0.0031, + "step": 26404 + }, + { + "epoch": 24.63, + "learning_rate": 4.6920708955223884e-05, + "loss": 0.0009, + "step": 26408 + }, + { + "epoch": 24.64, + "learning_rate": 4.692024253731343e-05, + "loss": 0.0048, + "step": 26412 + }, + { + "epoch": 24.64, + "learning_rate": 4.691977611940299e-05, + "loss": 0.0003, + "step": 26416 + }, + { + "epoch": 24.65, + "learning_rate": 4.691930970149254e-05, + "loss": 0.0, + "step": 26420 + }, + { + "epoch": 24.65, + "learning_rate": 4.691884328358209e-05, + "loss": 0.0005, + "step": 26424 + }, + { + "epoch": 24.65, + "learning_rate": 4.6918376865671645e-05, + "loss": 0.0022, + "step": 26428 + }, + { + "epoch": 24.66, + "learning_rate": 4.69179104477612e-05, + "loss": 0.0002, + "step": 26432 + }, + { + "epoch": 24.66, + "learning_rate": 4.691744402985075e-05, + "loss": 0.0001, + "step": 26436 + }, + { + "epoch": 24.66, + "learning_rate": 4.6916977611940296e-05, + "loss": 0.0015, + "step": 26440 + }, + { + "epoch": 24.67, + "learning_rate": 4.691651119402986e-05, + "loss": 0.0003, + "step": 26444 + }, + { + "epoch": 24.67, + "learning_rate": 4.6916044776119406e-05, + "loss": 0.0035, + "step": 26448 + }, + { + "epoch": 24.68, + "learning_rate": 4.6915578358208954e-05, + "loss": 0.0034, + "step": 26452 + }, + { + "epoch": 24.68, + "learning_rate": 4.691511194029851e-05, + "loss": 0.0004, + "step": 26456 + }, + { + "epoch": 24.68, + "learning_rate": 4.6914645522388064e-05, + "loss": 0.0001, + "step": 26460 + }, + { + "epoch": 24.69, + "learning_rate": 4.691417910447761e-05, + "loss": 0.0001, + "step": 26464 + }, + { + "epoch": 24.69, + "learning_rate": 4.691371268656717e-05, + "loss": 0.001, + "step": 26468 + }, + { + "epoch": 24.69, + "learning_rate": 4.6913246268656715e-05, + "loss": 0.0022, + "step": 26472 + }, + { + "epoch": 24.7, + "learning_rate": 4.691277985074627e-05, + "loss": 0.0001, + "step": 26476 + }, + { + "epoch": 24.7, + "learning_rate": 4.6912313432835825e-05, + "loss": 0.0, + "step": 26480 + }, + { + "epoch": 24.71, + "learning_rate": 4.691184701492537e-05, + "loss": 0.0005, + "step": 26484 + }, + { + "epoch": 24.71, + "learning_rate": 4.691138059701493e-05, + "loss": 0.0031, + "step": 26488 + }, + { + "epoch": 24.71, + "learning_rate": 4.691091417910448e-05, + "loss": 0.0005, + "step": 26492 + }, + { + "epoch": 24.72, + "learning_rate": 4.691044776119403e-05, + "loss": 0.0001, + "step": 26496 + }, + { + "epoch": 24.72, + "learning_rate": 4.690998134328358e-05, + "loss": 0.0001, + "step": 26500 + }, + { + "epoch": 24.72, + "eval_exact_match": 0.7446808510638298, + "eval_exec": 0.7736943907156673, + "eval_loss": 0.403182715177536, + "eval_runtime": 1851.9773, + "eval_samples_per_second": 0.558, + "step": 26500 + }, + { + "epoch": 24.72, + "learning_rate": 4.690951492537314e-05, + "loss": 0.0009, + "step": 26504 + }, + { + "epoch": 24.73, + "learning_rate": 4.690904850746269e-05, + "loss": 0.0003, + "step": 26508 + }, + { + "epoch": 24.73, + "learning_rate": 4.690858208955224e-05, + "loss": 0.0001, + "step": 26512 + }, + { + "epoch": 24.73, + "learning_rate": 4.690811567164179e-05, + "loss": 0.0015, + "step": 26516 + }, + { + "epoch": 24.74, + "learning_rate": 4.690764925373135e-05, + "loss": 0.0003, + "step": 26520 + }, + { + "epoch": 24.74, + "learning_rate": 4.6907182835820895e-05, + "loss": 0.0021, + "step": 26524 + }, + { + "epoch": 24.75, + "learning_rate": 4.690671641791045e-05, + "loss": 0.0006, + "step": 26528 + }, + { + "epoch": 24.75, + "learning_rate": 4.690625e-05, + "loss": 0.0009, + "step": 26532 + }, + { + "epoch": 24.75, + "learning_rate": 4.690578358208955e-05, + "loss": 0.0, + "step": 26536 + }, + { + "epoch": 24.76, + "learning_rate": 4.690531716417911e-05, + "loss": 0.0002, + "step": 26540 + }, + { + "epoch": 24.76, + "learning_rate": 4.6904850746268656e-05, + "loss": 0.0004, + "step": 26544 + }, + { + "epoch": 24.76, + "learning_rate": 4.690438432835821e-05, + "loss": 0.0109, + "step": 26548 + }, + { + "epoch": 24.77, + "learning_rate": 4.6903917910447766e-05, + "loss": 0.0008, + "step": 26552 + }, + { + "epoch": 24.77, + "learning_rate": 4.6903451492537314e-05, + "loss": 0.001, + "step": 26556 + }, + { + "epoch": 24.78, + "learning_rate": 4.690298507462687e-05, + "loss": 0.0027, + "step": 26560 + }, + { + "epoch": 24.78, + "learning_rate": 4.6902518656716424e-05, + "loss": 0.0001, + "step": 26564 + }, + { + "epoch": 24.78, + "learning_rate": 4.690205223880597e-05, + "loss": 0.0004, + "step": 26568 + }, + { + "epoch": 24.79, + "learning_rate": 4.690158582089553e-05, + "loss": 0.0099, + "step": 26572 + }, + { + "epoch": 24.79, + "learning_rate": 4.6901119402985075e-05, + "loss": 0.0017, + "step": 26576 + }, + { + "epoch": 24.79, + "learning_rate": 4.690065298507463e-05, + "loss": 0.0046, + "step": 26580 + }, + { + "epoch": 24.8, + "learning_rate": 4.6900186567164185e-05, + "loss": 0.0015, + "step": 26584 + }, + { + "epoch": 24.8, + "learning_rate": 4.689972014925373e-05, + "loss": 0.001, + "step": 26588 + }, + { + "epoch": 24.81, + "learning_rate": 4.689925373134328e-05, + "loss": 0.001, + "step": 26592 + }, + { + "epoch": 24.81, + "learning_rate": 4.689878731343284e-05, + "loss": 0.0003, + "step": 26596 + }, + { + "epoch": 24.81, + "learning_rate": 4.689832089552239e-05, + "loss": 0.0036, + "step": 26600 + }, + { + "epoch": 24.82, + "learning_rate": 4.689785447761194e-05, + "loss": 0.0021, + "step": 26604 + }, + { + "epoch": 24.82, + "learning_rate": 4.6897388059701494e-05, + "loss": 0.0002, + "step": 26608 + }, + { + "epoch": 24.82, + "learning_rate": 4.689692164179105e-05, + "loss": 0.0001, + "step": 26612 + }, + { + "epoch": 24.83, + "learning_rate": 4.68964552238806e-05, + "loss": 0.0015, + "step": 26616 + }, + { + "epoch": 24.83, + "learning_rate": 4.689598880597015e-05, + "loss": 0.0015, + "step": 26620 + }, + { + "epoch": 24.84, + "learning_rate": 4.689552238805971e-05, + "loss": 0.0032, + "step": 26624 + }, + { + "epoch": 24.84, + "learning_rate": 4.6895055970149255e-05, + "loss": 0.0009, + "step": 26628 + }, + { + "epoch": 24.84, + "learning_rate": 4.689458955223881e-05, + "loss": 0.0013, + "step": 26632 + }, + { + "epoch": 24.85, + "learning_rate": 4.689412313432836e-05, + "loss": 0.0004, + "step": 26636 + }, + { + "epoch": 24.85, + "learning_rate": 4.689365671641791e-05, + "loss": 0.0029, + "step": 26640 + }, + { + "epoch": 24.85, + "learning_rate": 4.689319029850747e-05, + "loss": 0.0001, + "step": 26644 + }, + { + "epoch": 24.86, + "learning_rate": 4.6892723880597016e-05, + "loss": 0.0026, + "step": 26648 + }, + { + "epoch": 24.86, + "learning_rate": 4.6892257462686564e-05, + "loss": 0.0009, + "step": 26652 + }, + { + "epoch": 24.87, + "learning_rate": 4.6891791044776126e-05, + "loss": 0.0006, + "step": 26656 + }, + { + "epoch": 24.87, + "learning_rate": 4.6891324626865674e-05, + "loss": 0.0002, + "step": 26660 + }, + { + "epoch": 24.87, + "learning_rate": 4.689085820895522e-05, + "loss": 0.0002, + "step": 26664 + }, + { + "epoch": 24.88, + "learning_rate": 4.689039179104478e-05, + "loss": 0.0037, + "step": 26668 + }, + { + "epoch": 24.88, + "learning_rate": 4.688992537313433e-05, + "loss": 0.0004, + "step": 26672 + }, + { + "epoch": 24.88, + "learning_rate": 4.688945895522388e-05, + "loss": 0.0035, + "step": 26676 + }, + { + "epoch": 24.89, + "learning_rate": 4.6888992537313435e-05, + "loss": 0.0003, + "step": 26680 + }, + { + "epoch": 24.89, + "learning_rate": 4.688852611940299e-05, + "loss": 0.0004, + "step": 26684 + }, + { + "epoch": 24.9, + "learning_rate": 4.688805970149254e-05, + "loss": 0.0002, + "step": 26688 + }, + { + "epoch": 24.9, + "learning_rate": 4.688759328358209e-05, + "loss": 0.0, + "step": 26692 + }, + { + "epoch": 24.9, + "learning_rate": 4.688712686567164e-05, + "loss": 0.003, + "step": 26696 + }, + { + "epoch": 24.91, + "learning_rate": 4.6886660447761196e-05, + "loss": 0.0013, + "step": 26700 + }, + { + "epoch": 24.91, + "learning_rate": 4.688619402985075e-05, + "loss": 0.0, + "step": 26704 + }, + { + "epoch": 24.91, + "learning_rate": 4.68857276119403e-05, + "loss": 0.0002, + "step": 26708 + }, + { + "epoch": 24.92, + "learning_rate": 4.688526119402985e-05, + "loss": 0.0008, + "step": 26712 + }, + { + "epoch": 24.92, + "learning_rate": 4.688479477611941e-05, + "loss": 0.0005, + "step": 26716 + }, + { + "epoch": 24.93, + "learning_rate": 4.688432835820896e-05, + "loss": 0.0007, + "step": 26720 + }, + { + "epoch": 24.93, + "learning_rate": 4.688386194029851e-05, + "loss": 0.0, + "step": 26724 + }, + { + "epoch": 24.93, + "learning_rate": 4.688339552238806e-05, + "loss": 0.0001, + "step": 26728 + }, + { + "epoch": 24.94, + "learning_rate": 4.6882929104477615e-05, + "loss": 0.0001, + "step": 26732 + }, + { + "epoch": 24.94, + "learning_rate": 4.688246268656717e-05, + "loss": 0.0008, + "step": 26736 + }, + { + "epoch": 24.94, + "learning_rate": 4.688199626865672e-05, + "loss": 0.0001, + "step": 26740 + }, + { + "epoch": 24.95, + "learning_rate": 4.688152985074627e-05, + "loss": 0.0001, + "step": 26744 + }, + { + "epoch": 24.95, + "learning_rate": 4.688106343283583e-05, + "loss": 0.0013, + "step": 26748 + }, + { + "epoch": 24.96, + "learning_rate": 4.6880597014925376e-05, + "loss": 0.0001, + "step": 26752 + }, + { + "epoch": 24.96, + "learning_rate": 4.6880130597014924e-05, + "loss": 0.0005, + "step": 26756 + }, + { + "epoch": 24.96, + "learning_rate": 4.687966417910448e-05, + "loss": 0.0015, + "step": 26760 + }, + { + "epoch": 24.97, + "learning_rate": 4.6879197761194034e-05, + "loss": 0.0004, + "step": 26764 + }, + { + "epoch": 24.97, + "learning_rate": 4.687873134328358e-05, + "loss": 0.0023, + "step": 26768 + }, + { + "epoch": 24.97, + "learning_rate": 4.687826492537314e-05, + "loss": 0.0001, + "step": 26772 + }, + { + "epoch": 24.98, + "learning_rate": 4.687779850746269e-05, + "loss": 0.0001, + "step": 26776 + }, + { + "epoch": 24.98, + "learning_rate": 4.687733208955224e-05, + "loss": 0.0015, + "step": 26780 + }, + { + "epoch": 24.98, + "learning_rate": 4.6876865671641795e-05, + "loss": 0.0002, + "step": 26784 + }, + { + "epoch": 24.99, + "learning_rate": 4.687639925373134e-05, + "loss": 0.0001, + "step": 26788 + }, + { + "epoch": 24.99, + "learning_rate": 4.68759328358209e-05, + "loss": 0.0003, + "step": 26792 + }, + { + "epoch": 25.0, + "learning_rate": 4.687546641791045e-05, + "loss": 0.0088, + "step": 26796 + }, + { + "epoch": 25.0, + "learning_rate": 4.6875e-05, + "loss": 0.0008, + "step": 26800 + }, + { + "epoch": 25.0, + "learning_rate": 4.6874533582089556e-05, + "loss": 0.0018, + "step": 26804 + }, + { + "epoch": 25.01, + "learning_rate": 4.687406716417911e-05, + "loss": 0.0, + "step": 26808 + }, + { + "epoch": 25.01, + "learning_rate": 4.687360074626866e-05, + "loss": 0.0, + "step": 26812 + }, + { + "epoch": 25.01, + "learning_rate": 4.687313432835821e-05, + "loss": 0.0, + "step": 26816 + }, + { + "epoch": 25.02, + "learning_rate": 4.687266791044776e-05, + "loss": 0.0002, + "step": 26820 + }, + { + "epoch": 25.02, + "learning_rate": 4.687220149253732e-05, + "loss": 0.0018, + "step": 26824 + }, + { + "epoch": 25.03, + "learning_rate": 4.6871735074626865e-05, + "loss": 0.0002, + "step": 26828 + }, + { + "epoch": 25.03, + "learning_rate": 4.687126865671642e-05, + "loss": 0.0037, + "step": 26832 + }, + { + "epoch": 25.03, + "learning_rate": 4.6870802238805975e-05, + "loss": 0.0019, + "step": 26836 + }, + { + "epoch": 25.04, + "learning_rate": 4.687033582089552e-05, + "loss": 0.0003, + "step": 26840 + }, + { + "epoch": 25.04, + "learning_rate": 4.686986940298508e-05, + "loss": 0.0004, + "step": 26844 + }, + { + "epoch": 25.04, + "learning_rate": 4.6869402985074626e-05, + "loss": 0.0001, + "step": 26848 + }, + { + "epoch": 25.05, + "learning_rate": 4.686893656716418e-05, + "loss": 0.0002, + "step": 26852 + }, + { + "epoch": 25.05, + "learning_rate": 4.6868470149253736e-05, + "loss": 0.0001, + "step": 26856 + }, + { + "epoch": 25.06, + "learning_rate": 4.6868003731343284e-05, + "loss": 0.0017, + "step": 26860 + }, + { + "epoch": 25.06, + "learning_rate": 4.686753731343284e-05, + "loss": 0.0001, + "step": 26864 + }, + { + "epoch": 25.06, + "learning_rate": 4.6867070895522394e-05, + "loss": 0.0009, + "step": 26868 + }, + { + "epoch": 25.07, + "learning_rate": 4.686660447761194e-05, + "loss": 0.0085, + "step": 26872 + }, + { + "epoch": 25.07, + "learning_rate": 4.686613805970149e-05, + "loss": 0.0021, + "step": 26876 + }, + { + "epoch": 25.07, + "learning_rate": 4.6865671641791045e-05, + "loss": 0.0007, + "step": 26880 + }, + { + "epoch": 25.08, + "learning_rate": 4.68652052238806e-05, + "loss": 0.0021, + "step": 26884 + }, + { + "epoch": 25.08, + "learning_rate": 4.6864738805970155e-05, + "loss": 0.0012, + "step": 26888 + }, + { + "epoch": 25.09, + "learning_rate": 4.68642723880597e-05, + "loss": 0.0, + "step": 26892 + }, + { + "epoch": 25.09, + "learning_rate": 4.686380597014926e-05, + "loss": 0.0011, + "step": 26896 + }, + { + "epoch": 25.09, + "learning_rate": 4.686333955223881e-05, + "loss": 0.001, + "step": 26900 + }, + { + "epoch": 25.1, + "learning_rate": 4.686287313432836e-05, + "loss": 0.0006, + "step": 26904 + }, + { + "epoch": 25.1, + "learning_rate": 4.686240671641791e-05, + "loss": 0.0019, + "step": 26908 + }, + { + "epoch": 25.1, + "learning_rate": 4.686194029850747e-05, + "loss": 0.0002, + "step": 26912 + }, + { + "epoch": 25.11, + "learning_rate": 4.686147388059702e-05, + "loss": 0.0002, + "step": 26916 + }, + { + "epoch": 25.11, + "learning_rate": 4.686100746268657e-05, + "loss": 0.0014, + "step": 26920 + }, + { + "epoch": 25.12, + "learning_rate": 4.686054104477612e-05, + "loss": 0.0001, + "step": 26924 + }, + { + "epoch": 25.12, + "learning_rate": 4.686007462686568e-05, + "loss": 0.0036, + "step": 26928 + }, + { + "epoch": 25.12, + "learning_rate": 4.6859608208955225e-05, + "loss": 0.0018, + "step": 26932 + }, + { + "epoch": 25.13, + "learning_rate": 4.685914179104478e-05, + "loss": 0.0045, + "step": 26936 + }, + { + "epoch": 25.13, + "learning_rate": 4.685867537313433e-05, + "loss": 0.0017, + "step": 26940 + }, + { + "epoch": 25.13, + "learning_rate": 4.685820895522388e-05, + "loss": 0.0011, + "step": 26944 + }, + { + "epoch": 25.14, + "learning_rate": 4.685774253731344e-05, + "loss": 0.0015, + "step": 26948 + }, + { + "epoch": 25.14, + "learning_rate": 4.6857276119402986e-05, + "loss": 0.0408, + "step": 26952 + }, + { + "epoch": 25.15, + "learning_rate": 4.685680970149254e-05, + "loss": 0.008, + "step": 26956 + }, + { + "epoch": 25.15, + "learning_rate": 4.6856343283582096e-05, + "loss": 0.0, + "step": 26960 + }, + { + "epoch": 25.15, + "learning_rate": 4.6855876865671644e-05, + "loss": 0.0036, + "step": 26964 + }, + { + "epoch": 25.16, + "learning_rate": 4.685541044776119e-05, + "loss": 0.0021, + "step": 26968 + }, + { + "epoch": 25.16, + "learning_rate": 4.6854944029850754e-05, + "loss": 0.001, + "step": 26972 + }, + { + "epoch": 25.16, + "learning_rate": 4.68544776119403e-05, + "loss": 0.0101, + "step": 26976 + }, + { + "epoch": 25.17, + "learning_rate": 4.685401119402985e-05, + "loss": 0.0008, + "step": 26980 + }, + { + "epoch": 25.17, + "learning_rate": 4.6853544776119405e-05, + "loss": 0.0, + "step": 26984 + }, + { + "epoch": 25.18, + "learning_rate": 4.685307835820896e-05, + "loss": 0.001, + "step": 26988 + }, + { + "epoch": 25.18, + "learning_rate": 4.685261194029851e-05, + "loss": 0.0004, + "step": 26992 + }, + { + "epoch": 25.18, + "learning_rate": 4.685214552238806e-05, + "loss": 0.0001, + "step": 26996 + }, + { + "epoch": 25.19, + "learning_rate": 4.685167910447761e-05, + "loss": 0.0002, + "step": 27000 + }, + { + "epoch": 25.19, + "eval_exact_match": 0.7427466150870407, + "eval_exec": 0.7794970986460348, + "eval_loss": 0.3887414336204529, + "eval_runtime": 1208.0912, + "eval_samples_per_second": 0.856, + "step": 27000 + }, + { + "epoch": 25.19, + "learning_rate": 4.6851212686567166e-05, + "loss": 0.0008, + "step": 27004 + }, + { + "epoch": 25.19, + "learning_rate": 4.685074626865672e-05, + "loss": 0.0002, + "step": 27008 + }, + { + "epoch": 25.2, + "learning_rate": 4.685027985074627e-05, + "loss": 0.0004, + "step": 27012 + }, + { + "epoch": 25.2, + "learning_rate": 4.6849813432835824e-05, + "loss": 0.0028, + "step": 27016 + }, + { + "epoch": 25.21, + "learning_rate": 4.684934701492538e-05, + "loss": 0.0006, + "step": 27020 + }, + { + "epoch": 25.21, + "learning_rate": 4.684888059701493e-05, + "loss": 0.0005, + "step": 27024 + }, + { + "epoch": 25.21, + "learning_rate": 4.6848414179104475e-05, + "loss": 0.0001, + "step": 27028 + }, + { + "epoch": 25.22, + "learning_rate": 4.6847947761194037e-05, + "loss": 0.0013, + "step": 27032 + }, + { + "epoch": 25.22, + "learning_rate": 4.6847481343283585e-05, + "loss": 0.0019, + "step": 27036 + }, + { + "epoch": 25.22, + "learning_rate": 4.684701492537313e-05, + "loss": 0.0004, + "step": 27040 + }, + { + "epoch": 25.23, + "learning_rate": 4.684654850746269e-05, + "loss": 0.0012, + "step": 27044 + }, + { + "epoch": 25.23, + "learning_rate": 4.684608208955224e-05, + "loss": 0.0008, + "step": 27048 + }, + { + "epoch": 25.24, + "learning_rate": 4.68456156716418e-05, + "loss": 0.0028, + "step": 27052 + }, + { + "epoch": 25.24, + "learning_rate": 4.6845149253731346e-05, + "loss": 0.0005, + "step": 27056 + }, + { + "epoch": 25.24, + "learning_rate": 4.6844682835820894e-05, + "loss": 0.0012, + "step": 27060 + }, + { + "epoch": 25.25, + "learning_rate": 4.6844216417910455e-05, + "loss": 0.0002, + "step": 27064 + }, + { + "epoch": 25.25, + "learning_rate": 4.6843750000000004e-05, + "loss": 0.0007, + "step": 27068 + }, + { + "epoch": 25.25, + "learning_rate": 4.684328358208955e-05, + "loss": 0.0023, + "step": 27072 + }, + { + "epoch": 25.26, + "learning_rate": 4.684281716417911e-05, + "loss": 0.0008, + "step": 27076 + }, + { + "epoch": 25.26, + "learning_rate": 4.684235074626866e-05, + "loss": 0.0001, + "step": 27080 + }, + { + "epoch": 25.26, + "learning_rate": 4.684188432835821e-05, + "loss": 0.0008, + "step": 27084 + }, + { + "epoch": 25.27, + "learning_rate": 4.6841417910447765e-05, + "loss": 0.004, + "step": 27088 + }, + { + "epoch": 25.27, + "learning_rate": 4.684095149253731e-05, + "loss": 0.0075, + "step": 27092 + }, + { + "epoch": 25.28, + "learning_rate": 4.684048507462687e-05, + "loss": 0.0006, + "step": 27096 + }, + { + "epoch": 25.28, + "learning_rate": 4.684001865671642e-05, + "loss": 0.0002, + "step": 27100 + }, + { + "epoch": 25.28, + "learning_rate": 4.683955223880597e-05, + "loss": 0.0002, + "step": 27104 + }, + { + "epoch": 25.29, + "learning_rate": 4.6839085820895526e-05, + "loss": 0.0052, + "step": 27108 + }, + { + "epoch": 25.29, + "learning_rate": 4.683861940298508e-05, + "loss": 0.0016, + "step": 27112 + }, + { + "epoch": 25.29, + "learning_rate": 4.683815298507463e-05, + "loss": 0.0021, + "step": 27116 + }, + { + "epoch": 25.3, + "learning_rate": 4.683768656716418e-05, + "loss": 0.0004, + "step": 27120 + }, + { + "epoch": 25.3, + "learning_rate": 4.683722014925374e-05, + "loss": 0.0011, + "step": 27124 + }, + { + "epoch": 25.31, + "learning_rate": 4.6836753731343287e-05, + "loss": 0.0, + "step": 27128 + }, + { + "epoch": 25.31, + "learning_rate": 4.6836287313432835e-05, + "loss": 0.0018, + "step": 27132 + }, + { + "epoch": 25.31, + "learning_rate": 4.683582089552239e-05, + "loss": 0.001, + "step": 27136 + }, + { + "epoch": 25.32, + "learning_rate": 4.6835354477611945e-05, + "loss": 0.0001, + "step": 27140 + }, + { + "epoch": 25.32, + "learning_rate": 4.683488805970149e-05, + "loss": 0.0094, + "step": 27144 + }, + { + "epoch": 25.32, + "learning_rate": 4.683442164179105e-05, + "loss": 0.0019, + "step": 27148 + }, + { + "epoch": 25.33, + "learning_rate": 4.6833955223880596e-05, + "loss": 0.0067, + "step": 27152 + }, + { + "epoch": 25.33, + "learning_rate": 4.683348880597015e-05, + "loss": 0.0001, + "step": 27156 + }, + { + "epoch": 25.34, + "learning_rate": 4.6833022388059705e-05, + "loss": 0.0008, + "step": 27160 + }, + { + "epoch": 25.34, + "learning_rate": 4.6832555970149254e-05, + "loss": 0.0013, + "step": 27164 + }, + { + "epoch": 25.34, + "learning_rate": 4.683208955223881e-05, + "loss": 0.0013, + "step": 27168 + }, + { + "epoch": 25.35, + "learning_rate": 4.6831623134328363e-05, + "loss": 0.0001, + "step": 27172 + }, + { + "epoch": 25.35, + "learning_rate": 4.683115671641791e-05, + "loss": 0.0001, + "step": 27176 + }, + { + "epoch": 25.35, + "learning_rate": 4.683069029850746e-05, + "loss": 0.0005, + "step": 27180 + }, + { + "epoch": 25.36, + "learning_rate": 4.683022388059702e-05, + "loss": 0.0006, + "step": 27184 + }, + { + "epoch": 25.36, + "learning_rate": 4.682975746268657e-05, + "loss": 0.0001, + "step": 27188 + }, + { + "epoch": 25.37, + "learning_rate": 4.682929104477612e-05, + "loss": 0.0001, + "step": 27192 + }, + { + "epoch": 25.37, + "learning_rate": 4.682882462686567e-05, + "loss": 0.0012, + "step": 27196 + }, + { + "epoch": 25.37, + "learning_rate": 4.682835820895523e-05, + "loss": 0.005, + "step": 27200 + }, + { + "epoch": 25.38, + "learning_rate": 4.6827891791044776e-05, + "loss": 0.0013, + "step": 27204 + }, + { + "epoch": 25.38, + "learning_rate": 4.682742537313433e-05, + "loss": 0.0078, + "step": 27208 + }, + { + "epoch": 25.38, + "learning_rate": 4.682695895522388e-05, + "loss": 0.0002, + "step": 27212 + }, + { + "epoch": 25.39, + "learning_rate": 4.682649253731344e-05, + "loss": 0.0006, + "step": 27216 + }, + { + "epoch": 25.39, + "learning_rate": 4.682602611940299e-05, + "loss": 0.0002, + "step": 27220 + }, + { + "epoch": 25.4, + "learning_rate": 4.6825559701492537e-05, + "loss": 0.0005, + "step": 27224 + }, + { + "epoch": 25.4, + "learning_rate": 4.682509328358209e-05, + "loss": 0.004, + "step": 27228 + }, + { + "epoch": 25.4, + "learning_rate": 4.6824626865671646e-05, + "loss": 0.0108, + "step": 27232 + }, + { + "epoch": 25.41, + "learning_rate": 4.6824160447761195e-05, + "loss": 0.0004, + "step": 27236 + }, + { + "epoch": 25.41, + "learning_rate": 4.682369402985075e-05, + "loss": 0.0009, + "step": 27240 + }, + { + "epoch": 25.41, + "learning_rate": 4.6823227611940304e-05, + "loss": 0.0001, + "step": 27244 + }, + { + "epoch": 25.42, + "learning_rate": 4.682276119402985e-05, + "loss": 0.0001, + "step": 27248 + }, + { + "epoch": 25.42, + "learning_rate": 4.682229477611941e-05, + "loss": 0.0002, + "step": 27252 + }, + { + "epoch": 25.43, + "learning_rate": 4.6821828358208956e-05, + "loss": 0.0003, + "step": 27256 + }, + { + "epoch": 25.43, + "learning_rate": 4.682136194029851e-05, + "loss": 0.0007, + "step": 27260 + }, + { + "epoch": 25.43, + "learning_rate": 4.6820895522388065e-05, + "loss": 0.0006, + "step": 27264 + }, + { + "epoch": 25.44, + "learning_rate": 4.6820429104477613e-05, + "loss": 0.0002, + "step": 27268 + }, + { + "epoch": 25.44, + "learning_rate": 4.681996268656716e-05, + "loss": 0.0004, + "step": 27272 + }, + { + "epoch": 25.44, + "learning_rate": 4.681949626865672e-05, + "loss": 0.0001, + "step": 27276 + }, + { + "epoch": 25.45, + "learning_rate": 4.681902985074627e-05, + "loss": 0.005, + "step": 27280 + }, + { + "epoch": 25.45, + "learning_rate": 4.681856343283582e-05, + "loss": 0.0094, + "step": 27284 + }, + { + "epoch": 25.46, + "learning_rate": 4.6818097014925374e-05, + "loss": 0.0005, + "step": 27288 + }, + { + "epoch": 25.46, + "learning_rate": 4.681763059701493e-05, + "loss": 0.0038, + "step": 27292 + }, + { + "epoch": 25.46, + "learning_rate": 4.681716417910448e-05, + "loss": 0.0005, + "step": 27296 + }, + { + "epoch": 25.47, + "learning_rate": 4.681669776119403e-05, + "loss": 0.0001, + "step": 27300 + }, + { + "epoch": 25.47, + "learning_rate": 4.681623134328359e-05, + "loss": 0.0018, + "step": 27304 + }, + { + "epoch": 25.47, + "learning_rate": 4.6815764925373135e-05, + "loss": 0.0006, + "step": 27308 + }, + { + "epoch": 25.48, + "learning_rate": 4.681529850746269e-05, + "loss": 0.0001, + "step": 27312 + }, + { + "epoch": 25.48, + "learning_rate": 4.681483208955224e-05, + "loss": 0.0006, + "step": 27316 + }, + { + "epoch": 25.49, + "learning_rate": 4.681436567164179e-05, + "loss": 0.0018, + "step": 27320 + }, + { + "epoch": 25.49, + "learning_rate": 4.681389925373135e-05, + "loss": 0.0086, + "step": 27324 + }, + { + "epoch": 25.49, + "learning_rate": 4.6813432835820896e-05, + "loss": 0.0001, + "step": 27328 + }, + { + "epoch": 25.5, + "learning_rate": 4.6812966417910445e-05, + "loss": 0.0012, + "step": 27332 + }, + { + "epoch": 25.5, + "learning_rate": 4.6812500000000006e-05, + "loss": 0.0001, + "step": 27336 + }, + { + "epoch": 25.5, + "learning_rate": 4.6812033582089554e-05, + "loss": 0.0015, + "step": 27340 + }, + { + "epoch": 25.51, + "learning_rate": 4.68115671641791e-05, + "loss": 0.0023, + "step": 27344 + }, + { + "epoch": 25.51, + "learning_rate": 4.681110074626866e-05, + "loss": 0.0007, + "step": 27348 + }, + { + "epoch": 25.51, + "learning_rate": 4.681063432835821e-05, + "loss": 0.0034, + "step": 27352 + }, + { + "epoch": 25.52, + "learning_rate": 4.681016791044776e-05, + "loss": 0.0001, + "step": 27356 + }, + { + "epoch": 25.52, + "learning_rate": 4.6809701492537315e-05, + "loss": 0.0001, + "step": 27360 + }, + { + "epoch": 25.53, + "learning_rate": 4.680923507462687e-05, + "loss": 0.0019, + "step": 27364 + }, + { + "epoch": 25.53, + "learning_rate": 4.680876865671642e-05, + "loss": 0.0001, + "step": 27368 + }, + { + "epoch": 25.53, + "learning_rate": 4.680830223880597e-05, + "loss": 0.0042, + "step": 27372 + }, + { + "epoch": 25.54, + "learning_rate": 4.680783582089552e-05, + "loss": 0.0001, + "step": 27376 + }, + { + "epoch": 25.54, + "learning_rate": 4.6807369402985076e-05, + "loss": 0.0011, + "step": 27380 + }, + { + "epoch": 25.54, + "learning_rate": 4.680690298507463e-05, + "loss": 0.0004, + "step": 27384 + }, + { + "epoch": 25.55, + "learning_rate": 4.680643656716418e-05, + "loss": 0.0008, + "step": 27388 + }, + { + "epoch": 25.55, + "learning_rate": 4.6805970149253734e-05, + "loss": 0.0003, + "step": 27392 + }, + { + "epoch": 25.56, + "learning_rate": 4.680550373134329e-05, + "loss": 0.0003, + "step": 27396 + }, + { + "epoch": 25.56, + "learning_rate": 4.680503731343284e-05, + "loss": 0.0021, + "step": 27400 + }, + { + "epoch": 25.56, + "learning_rate": 4.680457089552239e-05, + "loss": 0.0001, + "step": 27404 + }, + { + "epoch": 25.57, + "learning_rate": 4.680410447761194e-05, + "loss": 0.0024, + "step": 27408 + }, + { + "epoch": 25.57, + "learning_rate": 4.6803638059701495e-05, + "loss": 0.0041, + "step": 27412 + }, + { + "epoch": 25.57, + "learning_rate": 4.680317164179105e-05, + "loss": 0.001, + "step": 27416 + }, + { + "epoch": 25.58, + "learning_rate": 4.68027052238806e-05, + "loss": 0.0008, + "step": 27420 + }, + { + "epoch": 25.58, + "learning_rate": 4.680223880597015e-05, + "loss": 0.0029, + "step": 27424 + }, + { + "epoch": 25.59, + "learning_rate": 4.680177238805971e-05, + "loss": 0.0012, + "step": 27428 + }, + { + "epoch": 25.59, + "learning_rate": 4.6801305970149256e-05, + "loss": 0.0008, + "step": 27432 + }, + { + "epoch": 25.59, + "learning_rate": 4.6800839552238804e-05, + "loss": 0.0002, + "step": 27436 + }, + { + "epoch": 25.6, + "learning_rate": 4.680037313432836e-05, + "loss": 0.0016, + "step": 27440 + }, + { + "epoch": 25.6, + "learning_rate": 4.6799906716417914e-05, + "loss": 0.0009, + "step": 27444 + }, + { + "epoch": 25.6, + "learning_rate": 4.679944029850746e-05, + "loss": 0.0008, + "step": 27448 + }, + { + "epoch": 25.61, + "learning_rate": 4.679897388059702e-05, + "loss": 0.0012, + "step": 27452 + }, + { + "epoch": 25.61, + "learning_rate": 4.679850746268657e-05, + "loss": 0.0012, + "step": 27456 + }, + { + "epoch": 25.62, + "learning_rate": 4.679804104477612e-05, + "loss": 0.0018, + "step": 27460 + }, + { + "epoch": 25.62, + "learning_rate": 4.6797574626865675e-05, + "loss": 0.0002, + "step": 27464 + }, + { + "epoch": 25.62, + "learning_rate": 4.679710820895522e-05, + "loss": 0.0001, + "step": 27468 + }, + { + "epoch": 25.63, + "learning_rate": 4.679664179104478e-05, + "loss": 0.0017, + "step": 27472 + }, + { + "epoch": 25.63, + "learning_rate": 4.679617537313433e-05, + "loss": 0.0011, + "step": 27476 + }, + { + "epoch": 25.63, + "learning_rate": 4.679570895522388e-05, + "loss": 0.0003, + "step": 27480 + }, + { + "epoch": 25.64, + "learning_rate": 4.6795242537313436e-05, + "loss": 0.01, + "step": 27484 + }, + { + "epoch": 25.64, + "learning_rate": 4.679477611940299e-05, + "loss": 0.0016, + "step": 27488 + }, + { + "epoch": 25.65, + "learning_rate": 4.679430970149254e-05, + "loss": 0.0048, + "step": 27492 + }, + { + "epoch": 25.65, + "learning_rate": 4.679384328358209e-05, + "loss": 0.0038, + "step": 27496 + }, + { + "epoch": 25.65, + "learning_rate": 4.679337686567164e-05, + "loss": 0.0004, + "step": 27500 + }, + { + "epoch": 25.65, + "eval_exact_match": 0.7427466150870407, + "eval_exec": 0.7833655705996132, + "eval_loss": 0.3488633632659912, + "eval_runtime": 1170.5529, + "eval_samples_per_second": 0.883, + "step": 27500 + }, + { + "epoch": 25.66, + "learning_rate": 4.67929104477612e-05, + "loss": 0.0005, + "step": 27504 + }, + { + "epoch": 25.66, + "learning_rate": 4.6792444029850745e-05, + "loss": 0.0003, + "step": 27508 + }, + { + "epoch": 25.66, + "learning_rate": 4.67919776119403e-05, + "loss": 0.0001, + "step": 27512 + }, + { + "epoch": 25.67, + "learning_rate": 4.6791511194029855e-05, + "loss": 0.0045, + "step": 27516 + }, + { + "epoch": 25.67, + "learning_rate": 4.67910447761194e-05, + "loss": 0.0001, + "step": 27520 + }, + { + "epoch": 25.68, + "learning_rate": 4.679057835820896e-05, + "loss": 0.0014, + "step": 27524 + }, + { + "epoch": 25.68, + "learning_rate": 4.6790111940298506e-05, + "loss": 0.0004, + "step": 27528 + }, + { + "epoch": 25.68, + "learning_rate": 4.678964552238806e-05, + "loss": 0.0047, + "step": 27532 + }, + { + "epoch": 25.69, + "learning_rate": 4.6789179104477616e-05, + "loss": 0.0002, + "step": 27536 + }, + { + "epoch": 25.69, + "learning_rate": 4.6788712686567164e-05, + "loss": 0.0001, + "step": 27540 + }, + { + "epoch": 25.69, + "learning_rate": 4.678824626865672e-05, + "loss": 0.0021, + "step": 27544 + }, + { + "epoch": 25.7, + "learning_rate": 4.6787779850746274e-05, + "loss": 0.0018, + "step": 27548 + }, + { + "epoch": 25.7, + "learning_rate": 4.678731343283582e-05, + "loss": 0.0036, + "step": 27552 + }, + { + "epoch": 25.71, + "learning_rate": 4.678684701492538e-05, + "loss": 0.0001, + "step": 27556 + }, + { + "epoch": 25.71, + "learning_rate": 4.6786380597014925e-05, + "loss": 0.0041, + "step": 27560 + }, + { + "epoch": 25.71, + "learning_rate": 4.678591417910448e-05, + "loss": 0.0001, + "step": 27564 + }, + { + "epoch": 25.72, + "learning_rate": 4.6785447761194035e-05, + "loss": 0.0009, + "step": 27568 + }, + { + "epoch": 25.72, + "learning_rate": 4.678498134328358e-05, + "loss": 0.0035, + "step": 27572 + }, + { + "epoch": 25.72, + "learning_rate": 4.678451492537314e-05, + "loss": 0.002, + "step": 27576 + }, + { + "epoch": 25.73, + "learning_rate": 4.678404850746269e-05, + "loss": 0.0001, + "step": 27580 + }, + { + "epoch": 25.73, + "learning_rate": 4.678358208955224e-05, + "loss": 0.0027, + "step": 27584 + }, + { + "epoch": 25.73, + "learning_rate": 4.678311567164179e-05, + "loss": 0.0073, + "step": 27588 + }, + { + "epoch": 25.74, + "learning_rate": 4.678264925373135e-05, + "loss": 0.0004, + "step": 27592 + }, + { + "epoch": 25.74, + "learning_rate": 4.67821828358209e-05, + "loss": 0.0008, + "step": 27596 + }, + { + "epoch": 25.75, + "learning_rate": 4.678171641791045e-05, + "loss": 0.0004, + "step": 27600 + }, + { + "epoch": 25.75, + "learning_rate": 4.678125e-05, + "loss": 0.0095, + "step": 27604 + }, + { + "epoch": 25.75, + "learning_rate": 4.678078358208956e-05, + "loss": 0.0004, + "step": 27608 + }, + { + "epoch": 25.76, + "learning_rate": 4.6780317164179105e-05, + "loss": 0.0162, + "step": 27612 + }, + { + "epoch": 25.76, + "learning_rate": 4.677985074626866e-05, + "loss": 0.0008, + "step": 27616 + }, + { + "epoch": 25.76, + "learning_rate": 4.677938432835821e-05, + "loss": 0.0013, + "step": 27620 + }, + { + "epoch": 25.77, + "learning_rate": 4.677891791044776e-05, + "loss": 0.0006, + "step": 27624 + }, + { + "epoch": 25.77, + "learning_rate": 4.677845149253732e-05, + "loss": 0.0004, + "step": 27628 + }, + { + "epoch": 25.78, + "learning_rate": 4.6777985074626866e-05, + "loss": 0.0016, + "step": 27632 + }, + { + "epoch": 25.78, + "learning_rate": 4.677751865671642e-05, + "loss": 0.0001, + "step": 27636 + }, + { + "epoch": 25.78, + "learning_rate": 4.6777052238805976e-05, + "loss": 0.0001, + "step": 27640 + }, + { + "epoch": 25.79, + "learning_rate": 4.6776585820895524e-05, + "loss": 0.0002, + "step": 27644 + }, + { + "epoch": 25.79, + "learning_rate": 4.677611940298507e-05, + "loss": 0.0003, + "step": 27648 + }, + { + "epoch": 25.79, + "learning_rate": 4.6775652985074634e-05, + "loss": 0.0002, + "step": 27652 + }, + { + "epoch": 25.8, + "learning_rate": 4.677518656716418e-05, + "loss": 0.0, + "step": 27656 + }, + { + "epoch": 25.8, + "learning_rate": 4.677472014925373e-05, + "loss": 0.0011, + "step": 27660 + }, + { + "epoch": 25.81, + "learning_rate": 4.6774253731343285e-05, + "loss": 0.0002, + "step": 27664 + }, + { + "epoch": 25.81, + "learning_rate": 4.677378731343284e-05, + "loss": 0.0009, + "step": 27668 + }, + { + "epoch": 25.81, + "learning_rate": 4.677332089552239e-05, + "loss": 0.0003, + "step": 27672 + }, + { + "epoch": 25.82, + "learning_rate": 4.677285447761194e-05, + "loss": 0.0011, + "step": 27676 + }, + { + "epoch": 25.82, + "learning_rate": 4.677238805970149e-05, + "loss": 0.0007, + "step": 27680 + }, + { + "epoch": 25.82, + "learning_rate": 4.6771921641791046e-05, + "loss": 0.0129, + "step": 27684 + }, + { + "epoch": 25.83, + "learning_rate": 4.67714552238806e-05, + "loss": 0.0053, + "step": 27688 + }, + { + "epoch": 25.83, + "learning_rate": 4.677098880597015e-05, + "loss": 0.0096, + "step": 27692 + }, + { + "epoch": 25.84, + "learning_rate": 4.6770522388059704e-05, + "loss": 0.0001, + "step": 27696 + }, + { + "epoch": 25.84, + "learning_rate": 4.677005597014926e-05, + "loss": 0.0019, + "step": 27700 + }, + { + "epoch": 25.84, + "learning_rate": 4.676958955223881e-05, + "loss": 0.0001, + "step": 27704 + }, + { + "epoch": 25.85, + "learning_rate": 4.676912313432836e-05, + "loss": 0.0017, + "step": 27708 + }, + { + "epoch": 25.85, + "learning_rate": 4.676865671641792e-05, + "loss": 0.0008, + "step": 27712 + }, + { + "epoch": 25.85, + "learning_rate": 4.6768190298507465e-05, + "loss": 0.0004, + "step": 27716 + }, + { + "epoch": 25.86, + "learning_rate": 4.676772388059702e-05, + "loss": 0.0052, + "step": 27720 + }, + { + "epoch": 25.86, + "learning_rate": 4.676725746268657e-05, + "loss": 0.0012, + "step": 27724 + }, + { + "epoch": 25.87, + "learning_rate": 4.676679104477612e-05, + "loss": 0.001, + "step": 27728 + }, + { + "epoch": 25.87, + "learning_rate": 4.676632462686568e-05, + "loss": 0.0001, + "step": 27732 + }, + { + "epoch": 25.87, + "learning_rate": 4.6765858208955226e-05, + "loss": 0.0006, + "step": 27736 + }, + { + "epoch": 25.88, + "learning_rate": 4.6765391791044774e-05, + "loss": 0.0004, + "step": 27740 + }, + { + "epoch": 25.88, + "learning_rate": 4.6764925373134336e-05, + "loss": 0.0002, + "step": 27744 + }, + { + "epoch": 25.88, + "learning_rate": 4.6764458955223884e-05, + "loss": 0.0022, + "step": 27748 + }, + { + "epoch": 25.89, + "learning_rate": 4.676399253731343e-05, + "loss": 0.001, + "step": 27752 + }, + { + "epoch": 25.89, + "learning_rate": 4.676352611940299e-05, + "loss": 0.0005, + "step": 27756 + }, + { + "epoch": 25.9, + "learning_rate": 4.676305970149254e-05, + "loss": 0.0013, + "step": 27760 + }, + { + "epoch": 25.9, + "learning_rate": 4.676259328358209e-05, + "loss": 0.0015, + "step": 27764 + }, + { + "epoch": 25.9, + "learning_rate": 4.6762126865671645e-05, + "loss": 0.0012, + "step": 27768 + }, + { + "epoch": 25.91, + "learning_rate": 4.676166044776119e-05, + "loss": 0.0005, + "step": 27772 + }, + { + "epoch": 25.91, + "learning_rate": 4.676119402985075e-05, + "loss": 0.0005, + "step": 27776 + }, + { + "epoch": 25.91, + "learning_rate": 4.67607276119403e-05, + "loss": 0.0037, + "step": 27780 + }, + { + "epoch": 25.92, + "learning_rate": 4.676026119402985e-05, + "loss": 0.0003, + "step": 27784 + }, + { + "epoch": 25.92, + "learning_rate": 4.6759794776119406e-05, + "loss": 0.0002, + "step": 27788 + }, + { + "epoch": 25.93, + "learning_rate": 4.675932835820896e-05, + "loss": 0.0004, + "step": 27792 + }, + { + "epoch": 25.93, + "learning_rate": 4.675886194029851e-05, + "loss": 0.0013, + "step": 27796 + }, + { + "epoch": 25.93, + "learning_rate": 4.675839552238806e-05, + "loss": 0.0001, + "step": 27800 + }, + { + "epoch": 25.94, + "learning_rate": 4.675792910447762e-05, + "loss": 0.0, + "step": 27804 + }, + { + "epoch": 25.94, + "learning_rate": 4.675746268656717e-05, + "loss": 0.0001, + "step": 27808 + }, + { + "epoch": 25.94, + "learning_rate": 4.6756996268656715e-05, + "loss": 0.0005, + "step": 27812 + }, + { + "epoch": 25.95, + "learning_rate": 4.675652985074627e-05, + "loss": 0.0002, + "step": 27816 + }, + { + "epoch": 25.95, + "learning_rate": 4.6756063432835825e-05, + "loss": 0.0018, + "step": 27820 + }, + { + "epoch": 25.96, + "learning_rate": 4.675559701492537e-05, + "loss": 0.0099, + "step": 27824 + }, + { + "epoch": 25.96, + "learning_rate": 4.675513059701493e-05, + "loss": 0.004, + "step": 27828 + }, + { + "epoch": 25.96, + "learning_rate": 4.6754664179104476e-05, + "loss": 0.0025, + "step": 27832 + }, + { + "epoch": 25.97, + "learning_rate": 4.675419776119403e-05, + "loss": 0.0001, + "step": 27836 + }, + { + "epoch": 25.97, + "learning_rate": 4.6753731343283586e-05, + "loss": 0.0002, + "step": 27840 + }, + { + "epoch": 25.97, + "learning_rate": 4.6753264925373134e-05, + "loss": 0.0, + "step": 27844 + }, + { + "epoch": 25.98, + "learning_rate": 4.675279850746269e-05, + "loss": 0.0001, + "step": 27848 + }, + { + "epoch": 25.98, + "learning_rate": 4.6752332089552244e-05, + "loss": 0.0013, + "step": 27852 + }, + { + "epoch": 25.98, + "learning_rate": 4.675186567164179e-05, + "loss": 0.0002, + "step": 27856 + }, + { + "epoch": 25.99, + "learning_rate": 4.675139925373134e-05, + "loss": 0.0009, + "step": 27860 + }, + { + "epoch": 25.99, + "learning_rate": 4.67509328358209e-05, + "loss": 0.001, + "step": 27864 + }, + { + "epoch": 26.0, + "learning_rate": 4.675046641791045e-05, + "loss": 0.0001, + "step": 27868 + }, + { + "epoch": 26.0, + "learning_rate": 4.6750000000000005e-05, + "loss": 0.005, + "step": 27872 + }, + { + "epoch": 26.0, + "learning_rate": 4.674953358208955e-05, + "loss": 0.001, + "step": 27876 + }, + { + "epoch": 26.01, + "learning_rate": 4.674906716417911e-05, + "loss": 0.0177, + "step": 27880 + }, + { + "epoch": 26.01, + "learning_rate": 4.674860074626866e-05, + "loss": 0.0028, + "step": 27884 + }, + { + "epoch": 26.01, + "learning_rate": 4.674813432835821e-05, + "loss": 0.001, + "step": 27888 + }, + { + "epoch": 26.02, + "learning_rate": 4.674766791044776e-05, + "loss": 0.0001, + "step": 27892 + }, + { + "epoch": 26.02, + "learning_rate": 4.674720149253732e-05, + "loss": 0.001, + "step": 27896 + }, + { + "epoch": 26.03, + "learning_rate": 4.674673507462687e-05, + "loss": 0.0004, + "step": 27900 + }, + { + "epoch": 26.03, + "learning_rate": 4.674626865671642e-05, + "loss": 0.0003, + "step": 27904 + }, + { + "epoch": 26.03, + "learning_rate": 4.674580223880597e-05, + "loss": 0.0005, + "step": 27908 + }, + { + "epoch": 26.04, + "learning_rate": 4.674533582089553e-05, + "loss": 0.0001, + "step": 27912 + }, + { + "epoch": 26.04, + "learning_rate": 4.6744869402985075e-05, + "loss": 0.0012, + "step": 27916 + }, + { + "epoch": 26.04, + "learning_rate": 4.674440298507463e-05, + "loss": 0.0008, + "step": 27920 + }, + { + "epoch": 26.05, + "learning_rate": 4.6743936567164185e-05, + "loss": 0.0008, + "step": 27924 + }, + { + "epoch": 26.05, + "learning_rate": 4.674347014925373e-05, + "loss": 0.0003, + "step": 27928 + }, + { + "epoch": 26.06, + "learning_rate": 4.674300373134329e-05, + "loss": 0.0002, + "step": 27932 + }, + { + "epoch": 26.06, + "learning_rate": 4.6742537313432836e-05, + "loss": 0.0047, + "step": 27936 + }, + { + "epoch": 26.06, + "learning_rate": 4.674207089552239e-05, + "loss": 0.0001, + "step": 27940 + }, + { + "epoch": 26.07, + "learning_rate": 4.6741604477611946e-05, + "loss": 0.0004, + "step": 27944 + }, + { + "epoch": 26.07, + "learning_rate": 4.6741138059701494e-05, + "loss": 0.0001, + "step": 27948 + }, + { + "epoch": 26.07, + "learning_rate": 4.674067164179104e-05, + "loss": 0.0002, + "step": 27952 + }, + { + "epoch": 26.08, + "learning_rate": 4.6740205223880604e-05, + "loss": 0.0034, + "step": 27956 + }, + { + "epoch": 26.08, + "learning_rate": 4.673973880597015e-05, + "loss": 0.0018, + "step": 27960 + }, + { + "epoch": 26.09, + "learning_rate": 4.67392723880597e-05, + "loss": 0.0017, + "step": 27964 + }, + { + "epoch": 26.09, + "learning_rate": 4.6738805970149255e-05, + "loss": 0.0006, + "step": 27968 + }, + { + "epoch": 26.09, + "learning_rate": 4.673833955223881e-05, + "loss": 0.0081, + "step": 27972 + }, + { + "epoch": 26.1, + "learning_rate": 4.673787313432836e-05, + "loss": 0.0001, + "step": 27976 + }, + { + "epoch": 26.1, + "learning_rate": 4.673740671641791e-05, + "loss": 0.0002, + "step": 27980 + }, + { + "epoch": 26.1, + "learning_rate": 4.673694029850747e-05, + "loss": 0.0002, + "step": 27984 + }, + { + "epoch": 26.11, + "learning_rate": 4.6736473880597016e-05, + "loss": 0.0027, + "step": 27988 + }, + { + "epoch": 26.11, + "learning_rate": 4.673600746268657e-05, + "loss": 0.0018, + "step": 27992 + }, + { + "epoch": 26.12, + "learning_rate": 4.673554104477612e-05, + "loss": 0.0001, + "step": 27996 + }, + { + "epoch": 26.12, + "learning_rate": 4.6735074626865674e-05, + "loss": 0.0, + "step": 28000 + }, + { + "epoch": 26.12, + "eval_exact_match": 0.7437137330754352, + "eval_exec": 0.7785299806576402, + "eval_loss": 0.38033804297447205, + "eval_runtime": 1184.2263, + "eval_samples_per_second": 0.873, + "step": 28000 + }, + { + "epoch": 26.12, + "learning_rate": 4.673460820895523e-05, + "loss": 0.0016, + "step": 28004 + }, + { + "epoch": 26.13, + "learning_rate": 4.673414179104478e-05, + "loss": 0.0084, + "step": 28008 + }, + { + "epoch": 26.13, + "learning_rate": 4.6733675373134325e-05, + "loss": 0.0003, + "step": 28012 + }, + { + "epoch": 26.13, + "learning_rate": 4.6733208955223887e-05, + "loss": 0.0001, + "step": 28016 + }, + { + "epoch": 26.14, + "learning_rate": 4.6732742537313435e-05, + "loss": 0.0006, + "step": 28020 + }, + { + "epoch": 26.14, + "learning_rate": 4.673227611940298e-05, + "loss": 0.0018, + "step": 28024 + }, + { + "epoch": 26.15, + "learning_rate": 4.673180970149254e-05, + "loss": 0.0001, + "step": 28028 + }, + { + "epoch": 26.15, + "learning_rate": 4.673134328358209e-05, + "loss": 0.0002, + "step": 28032 + }, + { + "epoch": 26.15, + "learning_rate": 4.673087686567165e-05, + "loss": 0.0003, + "step": 28036 + }, + { + "epoch": 26.16, + "learning_rate": 4.6730410447761196e-05, + "loss": 0.0005, + "step": 28040 + }, + { + "epoch": 26.16, + "learning_rate": 4.672994402985075e-05, + "loss": 0.0001, + "step": 28044 + }, + { + "epoch": 26.16, + "learning_rate": 4.6729477611940305e-05, + "loss": 0.0034, + "step": 28048 + }, + { + "epoch": 26.17, + "learning_rate": 4.6729011194029854e-05, + "loss": 0.0013, + "step": 28052 + }, + { + "epoch": 26.17, + "learning_rate": 4.67285447761194e-05, + "loss": 0.0001, + "step": 28056 + }, + { + "epoch": 26.18, + "learning_rate": 4.672807835820896e-05, + "loss": 0.0014, + "step": 28060 + }, + { + "epoch": 26.18, + "learning_rate": 4.672761194029851e-05, + "loss": 0.0001, + "step": 28064 + }, + { + "epoch": 26.18, + "learning_rate": 4.672714552238806e-05, + "loss": 0.0106, + "step": 28068 + }, + { + "epoch": 26.19, + "learning_rate": 4.6726679104477615e-05, + "loss": 0.0022, + "step": 28072 + }, + { + "epoch": 26.19, + "learning_rate": 4.672621268656717e-05, + "loss": 0.0007, + "step": 28076 + }, + { + "epoch": 26.19, + "learning_rate": 4.672574626865672e-05, + "loss": 0.0005, + "step": 28080 + }, + { + "epoch": 26.2, + "learning_rate": 4.672527985074627e-05, + "loss": 0.0035, + "step": 28084 + }, + { + "epoch": 26.2, + "learning_rate": 4.672481343283582e-05, + "loss": 0.0045, + "step": 28088 + }, + { + "epoch": 26.21, + "learning_rate": 4.6724347014925376e-05, + "loss": 0.0002, + "step": 28092 + }, + { + "epoch": 26.21, + "learning_rate": 4.672388059701493e-05, + "loss": 0.0033, + "step": 28096 + }, + { + "epoch": 26.21, + "learning_rate": 4.672341417910448e-05, + "loss": 0.001, + "step": 28100 + }, + { + "epoch": 26.22, + "learning_rate": 4.6722947761194034e-05, + "loss": 0.0002, + "step": 28104 + }, + { + "epoch": 26.22, + "learning_rate": 4.672248134328359e-05, + "loss": 0.0014, + "step": 28108 + }, + { + "epoch": 26.22, + "learning_rate": 4.6722014925373137e-05, + "loss": 0.0095, + "step": 28112 + }, + { + "epoch": 26.23, + "learning_rate": 4.6721548507462685e-05, + "loss": 0.0016, + "step": 28116 + }, + { + "epoch": 26.23, + "learning_rate": 4.672108208955224e-05, + "loss": 0.0088, + "step": 28120 + }, + { + "epoch": 26.24, + "learning_rate": 4.6720615671641794e-05, + "loss": 0.0013, + "step": 28124 + }, + { + "epoch": 26.24, + "learning_rate": 4.672014925373134e-05, + "loss": 0.0004, + "step": 28128 + }, + { + "epoch": 26.24, + "learning_rate": 4.67196828358209e-05, + "loss": 0.0004, + "step": 28132 + }, + { + "epoch": 26.25, + "learning_rate": 4.671921641791045e-05, + "loss": 0.0002, + "step": 28136 + }, + { + "epoch": 26.25, + "learning_rate": 4.671875e-05, + "loss": 0.0006, + "step": 28140 + }, + { + "epoch": 26.25, + "learning_rate": 4.6718283582089555e-05, + "loss": 0.0003, + "step": 28144 + }, + { + "epoch": 26.26, + "learning_rate": 4.6717817164179104e-05, + "loss": 0.0004, + "step": 28148 + }, + { + "epoch": 26.26, + "learning_rate": 4.671735074626866e-05, + "loss": 0.0003, + "step": 28152 + }, + { + "epoch": 26.26, + "learning_rate": 4.6716884328358213e-05, + "loss": 0.0008, + "step": 28156 + }, + { + "epoch": 26.27, + "learning_rate": 4.671641791044776e-05, + "loss": 0.0, + "step": 28160 + }, + { + "epoch": 26.27, + "learning_rate": 4.6715951492537316e-05, + "loss": 0.0065, + "step": 28164 + }, + { + "epoch": 26.28, + "learning_rate": 4.671548507462687e-05, + "loss": 0.0005, + "step": 28168 + }, + { + "epoch": 26.28, + "learning_rate": 4.671501865671642e-05, + "loss": 0.0013, + "step": 28172 + }, + { + "epoch": 26.28, + "learning_rate": 4.671455223880597e-05, + "loss": 0.001, + "step": 28176 + }, + { + "epoch": 26.29, + "learning_rate": 4.671408582089552e-05, + "loss": 0.0159, + "step": 28180 + }, + { + "epoch": 26.29, + "learning_rate": 4.671361940298508e-05, + "loss": 0.0004, + "step": 28184 + }, + { + "epoch": 26.29, + "learning_rate": 4.6713152985074626e-05, + "loss": 0.0047, + "step": 28188 + }, + { + "epoch": 26.3, + "learning_rate": 4.671268656716418e-05, + "loss": 0.0001, + "step": 28192 + }, + { + "epoch": 26.3, + "learning_rate": 4.6712220149253735e-05, + "loss": 0.0007, + "step": 28196 + }, + { + "epoch": 26.31, + "learning_rate": 4.671175373134329e-05, + "loss": 0.0003, + "step": 28200 + }, + { + "epoch": 26.31, + "learning_rate": 4.671128731343284e-05, + "loss": 0.0004, + "step": 28204 + }, + { + "epoch": 26.31, + "learning_rate": 4.6710820895522387e-05, + "loss": 0.0002, + "step": 28208 + }, + { + "epoch": 26.32, + "learning_rate": 4.671035447761195e-05, + "loss": 0.0001, + "step": 28212 + }, + { + "epoch": 26.32, + "learning_rate": 4.6709888059701496e-05, + "loss": 0.0028, + "step": 28216 + }, + { + "epoch": 26.32, + "learning_rate": 4.6709421641791045e-05, + "loss": 0.0002, + "step": 28220 + }, + { + "epoch": 26.33, + "learning_rate": 4.67089552238806e-05, + "loss": 0.0002, + "step": 28224 + }, + { + "epoch": 26.33, + "learning_rate": 4.6708488805970154e-05, + "loss": 0.001, + "step": 28228 + }, + { + "epoch": 26.34, + "learning_rate": 4.67080223880597e-05, + "loss": 0.0003, + "step": 28232 + }, + { + "epoch": 26.34, + "learning_rate": 4.670755597014926e-05, + "loss": 0.0005, + "step": 28236 + }, + { + "epoch": 26.34, + "learning_rate": 4.6707089552238805e-05, + "loss": 0.0007, + "step": 28240 + }, + { + "epoch": 26.35, + "learning_rate": 4.670662313432836e-05, + "loss": 0.0002, + "step": 28244 + }, + { + "epoch": 26.35, + "learning_rate": 4.6706156716417915e-05, + "loss": 0.0001, + "step": 28248 + }, + { + "epoch": 26.35, + "learning_rate": 4.6705690298507463e-05, + "loss": 0.0002, + "step": 28252 + }, + { + "epoch": 26.36, + "learning_rate": 4.670522388059702e-05, + "loss": 0.0017, + "step": 28256 + }, + { + "epoch": 26.36, + "learning_rate": 4.670475746268657e-05, + "loss": 0.0005, + "step": 28260 + }, + { + "epoch": 26.37, + "learning_rate": 4.670429104477612e-05, + "loss": 0.0003, + "step": 28264 + }, + { + "epoch": 26.37, + "learning_rate": 4.670382462686567e-05, + "loss": 0.0, + "step": 28268 + }, + { + "epoch": 26.37, + "learning_rate": 4.670335820895523e-05, + "loss": 0.0008, + "step": 28272 + }, + { + "epoch": 26.38, + "learning_rate": 4.670289179104478e-05, + "loss": 0.0004, + "step": 28276 + }, + { + "epoch": 26.38, + "learning_rate": 4.670242537313433e-05, + "loss": 0.0003, + "step": 28280 + }, + { + "epoch": 26.38, + "learning_rate": 4.670195895522388e-05, + "loss": 0.0003, + "step": 28284 + }, + { + "epoch": 26.39, + "learning_rate": 4.670149253731344e-05, + "loss": 0.0025, + "step": 28288 + }, + { + "epoch": 26.39, + "learning_rate": 4.6701026119402985e-05, + "loss": 0.0014, + "step": 28292 + }, + { + "epoch": 26.4, + "learning_rate": 4.670055970149254e-05, + "loss": 0.0002, + "step": 28296 + }, + { + "epoch": 26.4, + "learning_rate": 4.670009328358209e-05, + "loss": 0.0002, + "step": 28300 + }, + { + "epoch": 26.4, + "learning_rate": 4.669962686567164e-05, + "loss": 0.0004, + "step": 28304 + }, + { + "epoch": 26.41, + "learning_rate": 4.66991604477612e-05, + "loss": 0.0002, + "step": 28308 + }, + { + "epoch": 26.41, + "learning_rate": 4.6698694029850746e-05, + "loss": 0.0004, + "step": 28312 + }, + { + "epoch": 26.41, + "learning_rate": 4.66982276119403e-05, + "loss": 0.0001, + "step": 28316 + }, + { + "epoch": 26.42, + "learning_rate": 4.6697761194029856e-05, + "loss": 0.0004, + "step": 28320 + }, + { + "epoch": 26.42, + "learning_rate": 4.6697294776119404e-05, + "loss": 0.0008, + "step": 28324 + }, + { + "epoch": 26.43, + "learning_rate": 4.669682835820895e-05, + "loss": 0.0024, + "step": 28328 + }, + { + "epoch": 26.43, + "learning_rate": 4.6696361940298514e-05, + "loss": 0.0005, + "step": 28332 + }, + { + "epoch": 26.43, + "learning_rate": 4.669589552238806e-05, + "loss": 0.006, + "step": 28336 + }, + { + "epoch": 26.44, + "learning_rate": 4.669542910447761e-05, + "loss": 0.0046, + "step": 28340 + }, + { + "epoch": 26.44, + "learning_rate": 4.6694962686567165e-05, + "loss": 0.0001, + "step": 28344 + }, + { + "epoch": 26.44, + "learning_rate": 4.669449626865672e-05, + "loss": 0.0009, + "step": 28348 + }, + { + "epoch": 26.45, + "learning_rate": 4.669402985074627e-05, + "loss": 0.0006, + "step": 28352 + }, + { + "epoch": 26.45, + "learning_rate": 4.669356343283582e-05, + "loss": 0.0001, + "step": 28356 + }, + { + "epoch": 26.46, + "learning_rate": 4.669309701492537e-05, + "loss": 0.0001, + "step": 28360 + }, + { + "epoch": 26.46, + "learning_rate": 4.669263059701493e-05, + "loss": 0.0004, + "step": 28364 + }, + { + "epoch": 26.46, + "learning_rate": 4.669216417910448e-05, + "loss": 0.0003, + "step": 28368 + }, + { + "epoch": 26.47, + "learning_rate": 4.669169776119403e-05, + "loss": 0.0002, + "step": 28372 + }, + { + "epoch": 26.47, + "learning_rate": 4.6691231343283584e-05, + "loss": 0.0001, + "step": 28376 + }, + { + "epoch": 26.47, + "learning_rate": 4.669076492537314e-05, + "loss": 0.0043, + "step": 28380 + }, + { + "epoch": 26.48, + "learning_rate": 4.669029850746269e-05, + "loss": 0.0052, + "step": 28384 + }, + { + "epoch": 26.48, + "learning_rate": 4.668983208955224e-05, + "loss": 0.0012, + "step": 28388 + }, + { + "epoch": 26.49, + "learning_rate": 4.66893656716418e-05, + "loss": 0.0005, + "step": 28392 + }, + { + "epoch": 26.49, + "learning_rate": 4.6688899253731345e-05, + "loss": 0.001, + "step": 28396 + }, + { + "epoch": 26.49, + "learning_rate": 4.66884328358209e-05, + "loss": 0.0001, + "step": 28400 + }, + { + "epoch": 26.5, + "learning_rate": 4.668796641791045e-05, + "loss": 0.0012, + "step": 28404 + }, + { + "epoch": 26.5, + "learning_rate": 4.66875e-05, + "loss": 0.0002, + "step": 28408 + }, + { + "epoch": 26.5, + "learning_rate": 4.668703358208956e-05, + "loss": 0.0002, + "step": 28412 + }, + { + "epoch": 26.51, + "learning_rate": 4.6686567164179106e-05, + "loss": 0.0001, + "step": 28416 + }, + { + "epoch": 26.51, + "learning_rate": 4.6686100746268654e-05, + "loss": 0.0001, + "step": 28420 + }, + { + "epoch": 26.51, + "learning_rate": 4.6685634328358216e-05, + "loss": 0.0009, + "step": 28424 + }, + { + "epoch": 26.52, + "learning_rate": 4.6685167910447764e-05, + "loss": 0.0003, + "step": 28428 + }, + { + "epoch": 26.52, + "learning_rate": 4.668470149253731e-05, + "loss": 0.0001, + "step": 28432 + }, + { + "epoch": 26.53, + "learning_rate": 4.668423507462687e-05, + "loss": 0.0021, + "step": 28436 + }, + { + "epoch": 26.53, + "learning_rate": 4.668376865671642e-05, + "loss": 0.0024, + "step": 28440 + }, + { + "epoch": 26.53, + "learning_rate": 4.668330223880597e-05, + "loss": 0.001, + "step": 28444 + }, + { + "epoch": 26.54, + "learning_rate": 4.6682835820895525e-05, + "loss": 0.0006, + "step": 28448 + }, + { + "epoch": 26.54, + "learning_rate": 4.668236940298507e-05, + "loss": 0.0001, + "step": 28452 + }, + { + "epoch": 26.54, + "learning_rate": 4.668190298507463e-05, + "loss": 0.0005, + "step": 28456 + }, + { + "epoch": 26.55, + "learning_rate": 4.668143656716418e-05, + "loss": 0.0041, + "step": 28460 + }, + { + "epoch": 26.55, + "learning_rate": 4.668097014925373e-05, + "loss": 0.0049, + "step": 28464 + }, + { + "epoch": 26.56, + "learning_rate": 4.6680503731343286e-05, + "loss": 0.0023, + "step": 28468 + }, + { + "epoch": 26.56, + "learning_rate": 4.668003731343284e-05, + "loss": 0.0006, + "step": 28472 + }, + { + "epoch": 26.56, + "learning_rate": 4.667957089552239e-05, + "loss": 0.0001, + "step": 28476 + }, + { + "epoch": 26.57, + "learning_rate": 4.667910447761194e-05, + "loss": 0.0009, + "step": 28480 + }, + { + "epoch": 26.57, + "learning_rate": 4.66786380597015e-05, + "loss": 0.0008, + "step": 28484 + }, + { + "epoch": 26.57, + "learning_rate": 4.667817164179105e-05, + "loss": 0.0014, + "step": 28488 + }, + { + "epoch": 26.58, + "learning_rate": 4.6677705223880595e-05, + "loss": 0.0002, + "step": 28492 + }, + { + "epoch": 26.58, + "learning_rate": 4.667723880597015e-05, + "loss": 0.001, + "step": 28496 + }, + { + "epoch": 26.59, + "learning_rate": 4.6676772388059705e-05, + "loss": 0.0014, + "step": 28500 + }, + { + "epoch": 26.59, + "eval_exact_match": 0.7321083172147002, + "eval_exec": 0.7688588007736944, + "eval_loss": 0.3977905809879303, + "eval_runtime": 1155.6229, + "eval_samples_per_second": 0.895, + "step": 28500 + }, + { + "epoch": 26.59, + "learning_rate": 4.667630597014925e-05, + "loss": 0.0002, + "step": 28504 + }, + { + "epoch": 26.59, + "learning_rate": 4.667583955223881e-05, + "loss": 0.0001, + "step": 28508 + }, + { + "epoch": 26.6, + "learning_rate": 4.6675373134328356e-05, + "loss": 0.0006, + "step": 28512 + }, + { + "epoch": 26.6, + "learning_rate": 4.667490671641792e-05, + "loss": 0.0018, + "step": 28516 + }, + { + "epoch": 26.6, + "learning_rate": 4.6674440298507466e-05, + "loss": 0.0003, + "step": 28520 + }, + { + "epoch": 26.61, + "learning_rate": 4.6673973880597014e-05, + "loss": 0.0005, + "step": 28524 + }, + { + "epoch": 26.61, + "learning_rate": 4.667350746268657e-05, + "loss": 0.0001, + "step": 28528 + }, + { + "epoch": 26.62, + "learning_rate": 4.6673041044776124e-05, + "loss": 0.0022, + "step": 28532 + }, + { + "epoch": 26.62, + "learning_rate": 4.667257462686567e-05, + "loss": 0.0006, + "step": 28536 + }, + { + "epoch": 26.62, + "learning_rate": 4.667210820895523e-05, + "loss": 0.0002, + "step": 28540 + }, + { + "epoch": 26.63, + "learning_rate": 4.667164179104478e-05, + "loss": 0.0001, + "step": 28544 + }, + { + "epoch": 26.63, + "learning_rate": 4.667117537313433e-05, + "loss": 0.0001, + "step": 28548 + }, + { + "epoch": 26.63, + "learning_rate": 4.6670708955223885e-05, + "loss": 0.0036, + "step": 28552 + }, + { + "epoch": 26.64, + "learning_rate": 4.667024253731343e-05, + "loss": 0.0002, + "step": 28556 + }, + { + "epoch": 26.64, + "learning_rate": 4.666977611940299e-05, + "loss": 0.0001, + "step": 28560 + }, + { + "epoch": 26.65, + "learning_rate": 4.666930970149254e-05, + "loss": 0.0002, + "step": 28564 + }, + { + "epoch": 26.65, + "learning_rate": 4.666884328358209e-05, + "loss": 0.001, + "step": 28568 + }, + { + "epoch": 26.65, + "learning_rate": 4.666837686567164e-05, + "loss": 0.0032, + "step": 28572 + }, + { + "epoch": 26.66, + "learning_rate": 4.66679104477612e-05, + "loss": 0.0002, + "step": 28576 + }, + { + "epoch": 26.66, + "learning_rate": 4.666744402985075e-05, + "loss": 0.0015, + "step": 28580 + }, + { + "epoch": 26.66, + "learning_rate": 4.66669776119403e-05, + "loss": 0.0034, + "step": 28584 + }, + { + "epoch": 26.67, + "learning_rate": 4.666651119402985e-05, + "loss": 0.0009, + "step": 28588 + }, + { + "epoch": 26.67, + "learning_rate": 4.666604477611941e-05, + "loss": 0.0019, + "step": 28592 + }, + { + "epoch": 26.68, + "learning_rate": 4.6665578358208955e-05, + "loss": 0.0029, + "step": 28596 + }, + { + "epoch": 26.68, + "learning_rate": 4.666511194029851e-05, + "loss": 0.0, + "step": 28600 + }, + { + "epoch": 26.68, + "learning_rate": 4.6664645522388065e-05, + "loss": 0.0006, + "step": 28604 + }, + { + "epoch": 26.69, + "learning_rate": 4.666417910447761e-05, + "loss": 0.001, + "step": 28608 + }, + { + "epoch": 26.69, + "learning_rate": 4.666371268656717e-05, + "loss": 0.0, + "step": 28612 + }, + { + "epoch": 26.69, + "learning_rate": 4.6663246268656716e-05, + "loss": 0.0001, + "step": 28616 + }, + { + "epoch": 26.7, + "learning_rate": 4.666277985074627e-05, + "loss": 0.003, + "step": 28620 + }, + { + "epoch": 26.7, + "learning_rate": 4.6662313432835826e-05, + "loss": 0.0009, + "step": 28624 + }, + { + "epoch": 26.71, + "learning_rate": 4.6661847014925374e-05, + "loss": 0.0001, + "step": 28628 + }, + { + "epoch": 26.71, + "learning_rate": 4.666138059701492e-05, + "loss": 0.0007, + "step": 28632 + }, + { + "epoch": 26.71, + "learning_rate": 4.6660914179104484e-05, + "loss": 0.0061, + "step": 28636 + }, + { + "epoch": 26.72, + "learning_rate": 4.666044776119403e-05, + "loss": 0.0041, + "step": 28640 + }, + { + "epoch": 26.72, + "learning_rate": 4.665998134328358e-05, + "loss": 0.0067, + "step": 28644 + }, + { + "epoch": 26.72, + "learning_rate": 4.6659514925373135e-05, + "loss": 0.0009, + "step": 28648 + }, + { + "epoch": 26.73, + "learning_rate": 4.665904850746269e-05, + "loss": 0.0003, + "step": 28652 + }, + { + "epoch": 26.73, + "learning_rate": 4.665858208955224e-05, + "loss": 0.0005, + "step": 28656 + }, + { + "epoch": 26.73, + "learning_rate": 4.665811567164179e-05, + "loss": 0.0002, + "step": 28660 + }, + { + "epoch": 26.74, + "learning_rate": 4.665764925373135e-05, + "loss": 0.0002, + "step": 28664 + }, + { + "epoch": 26.74, + "learning_rate": 4.6657182835820896e-05, + "loss": 0.0076, + "step": 28668 + }, + { + "epoch": 26.75, + "learning_rate": 4.665671641791045e-05, + "loss": 0.0014, + "step": 28672 + }, + { + "epoch": 26.75, + "learning_rate": 4.665625e-05, + "loss": 0.0002, + "step": 28676 + }, + { + "epoch": 26.75, + "learning_rate": 4.6655783582089554e-05, + "loss": 0.0002, + "step": 28680 + }, + { + "epoch": 26.76, + "learning_rate": 4.665531716417911e-05, + "loss": 0.0006, + "step": 28684 + }, + { + "epoch": 26.76, + "learning_rate": 4.665485074626866e-05, + "loss": 0.0048, + "step": 28688 + }, + { + "epoch": 26.76, + "learning_rate": 4.665438432835821e-05, + "loss": 0.0002, + "step": 28692 + }, + { + "epoch": 26.77, + "learning_rate": 4.665391791044777e-05, + "loss": 0.0012, + "step": 28696 + }, + { + "epoch": 26.77, + "learning_rate": 4.6653451492537315e-05, + "loss": 0.0002, + "step": 28700 + }, + { + "epoch": 26.78, + "learning_rate": 4.665298507462687e-05, + "loss": 0.0004, + "step": 28704 + }, + { + "epoch": 26.78, + "learning_rate": 4.665251865671642e-05, + "loss": 0.0001, + "step": 28708 + }, + { + "epoch": 26.78, + "learning_rate": 4.665205223880597e-05, + "loss": 0.0001, + "step": 28712 + }, + { + "epoch": 26.79, + "learning_rate": 4.665158582089553e-05, + "loss": 0.0023, + "step": 28716 + }, + { + "epoch": 26.79, + "learning_rate": 4.6651119402985076e-05, + "loss": 0.0012, + "step": 28720 + }, + { + "epoch": 26.79, + "learning_rate": 4.665065298507463e-05, + "loss": 0.0002, + "step": 28724 + }, + { + "epoch": 26.8, + "learning_rate": 4.6650186567164186e-05, + "loss": 0.0015, + "step": 28728 + }, + { + "epoch": 26.8, + "learning_rate": 4.6649720149253734e-05, + "loss": 0.0008, + "step": 28732 + }, + { + "epoch": 26.81, + "learning_rate": 4.664925373134328e-05, + "loss": 0.0019, + "step": 28736 + }, + { + "epoch": 26.81, + "learning_rate": 4.664878731343284e-05, + "loss": 0.0001, + "step": 28740 + }, + { + "epoch": 26.81, + "learning_rate": 4.664832089552239e-05, + "loss": 0.0007, + "step": 28744 + }, + { + "epoch": 26.82, + "learning_rate": 4.664785447761194e-05, + "loss": 0.0013, + "step": 28748 + }, + { + "epoch": 26.82, + "learning_rate": 4.6647388059701495e-05, + "loss": 0.0018, + "step": 28752 + }, + { + "epoch": 26.82, + "learning_rate": 4.664692164179105e-05, + "loss": 0.0005, + "step": 28756 + }, + { + "epoch": 26.83, + "learning_rate": 4.66464552238806e-05, + "loss": 0.0042, + "step": 28760 + }, + { + "epoch": 26.83, + "learning_rate": 4.664598880597015e-05, + "loss": 0.0186, + "step": 28764 + }, + { + "epoch": 26.84, + "learning_rate": 4.66455223880597e-05, + "loss": 0.0004, + "step": 28768 + }, + { + "epoch": 26.84, + "learning_rate": 4.6645055970149256e-05, + "loss": 0.0026, + "step": 28772 + }, + { + "epoch": 26.84, + "learning_rate": 4.664458955223881e-05, + "loss": 0.0029, + "step": 28776 + }, + { + "epoch": 26.85, + "learning_rate": 4.664412313432836e-05, + "loss": 0.0073, + "step": 28780 + }, + { + "epoch": 26.85, + "learning_rate": 4.6643656716417914e-05, + "loss": 0.0005, + "step": 28784 + }, + { + "epoch": 26.85, + "learning_rate": 4.664319029850747e-05, + "loss": 0.0006, + "step": 28788 + }, + { + "epoch": 26.86, + "learning_rate": 4.664272388059702e-05, + "loss": 0.0002, + "step": 28792 + }, + { + "epoch": 26.86, + "learning_rate": 4.6642257462686565e-05, + "loss": 0.0004, + "step": 28796 + }, + { + "epoch": 26.87, + "learning_rate": 4.664179104477612e-05, + "loss": 0.0002, + "step": 28800 + }, + { + "epoch": 26.87, + "learning_rate": 4.6641324626865675e-05, + "loss": 0.0021, + "step": 28804 + }, + { + "epoch": 26.87, + "learning_rate": 4.664085820895522e-05, + "loss": 0.0002, + "step": 28808 + }, + { + "epoch": 26.88, + "learning_rate": 4.664039179104478e-05, + "loss": 0.0009, + "step": 28812 + }, + { + "epoch": 26.88, + "learning_rate": 4.663992537313433e-05, + "loss": 0.0011, + "step": 28816 + }, + { + "epoch": 26.88, + "learning_rate": 4.663945895522388e-05, + "loss": 0.0003, + "step": 28820 + }, + { + "epoch": 26.89, + "learning_rate": 4.6638992537313436e-05, + "loss": 0.0001, + "step": 28824 + }, + { + "epoch": 26.89, + "learning_rate": 4.6638526119402984e-05, + "loss": 0.0001, + "step": 28828 + }, + { + "epoch": 26.9, + "learning_rate": 4.663805970149254e-05, + "loss": 0.0011, + "step": 28832 + }, + { + "epoch": 26.9, + "learning_rate": 4.6637593283582094e-05, + "loss": 0.0003, + "step": 28836 + }, + { + "epoch": 26.9, + "learning_rate": 4.663712686567164e-05, + "loss": 0.0001, + "step": 28840 + }, + { + "epoch": 26.91, + "learning_rate": 4.66366604477612e-05, + "loss": 0.0015, + "step": 28844 + }, + { + "epoch": 26.91, + "learning_rate": 4.663619402985075e-05, + "loss": 0.0002, + "step": 28848 + }, + { + "epoch": 26.91, + "learning_rate": 4.66357276119403e-05, + "loss": 0.0007, + "step": 28852 + }, + { + "epoch": 26.92, + "learning_rate": 4.6635261194029855e-05, + "loss": 0.001, + "step": 28856 + }, + { + "epoch": 26.92, + "learning_rate": 4.66347947761194e-05, + "loss": 0.0003, + "step": 28860 + }, + { + "epoch": 26.93, + "learning_rate": 4.663432835820896e-05, + "loss": 0.0026, + "step": 28864 + }, + { + "epoch": 26.93, + "learning_rate": 4.663386194029851e-05, + "loss": 0.0001, + "step": 28868 + }, + { + "epoch": 26.93, + "learning_rate": 4.663339552238806e-05, + "loss": 0.0007, + "step": 28872 + }, + { + "epoch": 26.94, + "learning_rate": 4.6632929104477616e-05, + "loss": 0.0001, + "step": 28876 + }, + { + "epoch": 26.94, + "learning_rate": 4.663246268656717e-05, + "loss": 0.0009, + "step": 28880 + }, + { + "epoch": 26.94, + "learning_rate": 4.663199626865672e-05, + "loss": 0.0006, + "step": 28884 + }, + { + "epoch": 26.95, + "learning_rate": 4.663152985074627e-05, + "loss": 0.0013, + "step": 28888 + }, + { + "epoch": 26.95, + "learning_rate": 4.663106343283583e-05, + "loss": 0.001, + "step": 28892 + }, + { + "epoch": 26.96, + "learning_rate": 4.663059701492538e-05, + "loss": 0.0058, + "step": 28896 + }, + { + "epoch": 26.96, + "learning_rate": 4.6630130597014925e-05, + "loss": 0.0191, + "step": 28900 + }, + { + "epoch": 26.96, + "learning_rate": 4.662966417910448e-05, + "loss": 0.0006, + "step": 28904 + }, + { + "epoch": 26.97, + "learning_rate": 4.6629197761194035e-05, + "loss": 0.0017, + "step": 28908 + }, + { + "epoch": 26.97, + "learning_rate": 4.662873134328358e-05, + "loss": 0.0013, + "step": 28912 + }, + { + "epoch": 26.97, + "learning_rate": 4.662826492537314e-05, + "loss": 0.0019, + "step": 28916 + }, + { + "epoch": 26.98, + "learning_rate": 4.6627798507462686e-05, + "loss": 0.0083, + "step": 28920 + }, + { + "epoch": 26.98, + "learning_rate": 4.662733208955224e-05, + "loss": 0.0022, + "step": 28924 + }, + { + "epoch": 26.98, + "learning_rate": 4.6626865671641796e-05, + "loss": 0.0003, + "step": 28928 + }, + { + "epoch": 26.99, + "learning_rate": 4.6626399253731344e-05, + "loss": 0.0031, + "step": 28932 + }, + { + "epoch": 26.99, + "learning_rate": 4.66259328358209e-05, + "loss": 0.0046, + "step": 28936 + }, + { + "epoch": 27.0, + "learning_rate": 4.6625466417910454e-05, + "loss": 0.0008, + "step": 28940 + }, + { + "epoch": 27.0, + "learning_rate": 4.6625e-05, + "loss": 0.0012, + "step": 28944 + }, + { + "epoch": 27.0, + "learning_rate": 4.662453358208955e-05, + "loss": 0.0001, + "step": 28948 + }, + { + "epoch": 27.01, + "learning_rate": 4.662406716417911e-05, + "loss": 0.0021, + "step": 28952 + }, + { + "epoch": 27.01, + "learning_rate": 4.662360074626866e-05, + "loss": 0.0017, + "step": 28956 + }, + { + "epoch": 27.01, + "learning_rate": 4.662313432835821e-05, + "loss": 0.0023, + "step": 28960 + }, + { + "epoch": 27.02, + "learning_rate": 4.662266791044776e-05, + "loss": 0.0001, + "step": 28964 + }, + { + "epoch": 27.02, + "learning_rate": 4.662220149253732e-05, + "loss": 0.001, + "step": 28968 + }, + { + "epoch": 27.03, + "learning_rate": 4.6621735074626866e-05, + "loss": 0.0003, + "step": 28972 + }, + { + "epoch": 27.03, + "learning_rate": 4.662126865671642e-05, + "loss": 0.0002, + "step": 28976 + }, + { + "epoch": 27.03, + "learning_rate": 4.662080223880597e-05, + "loss": 0.0003, + "step": 28980 + }, + { + "epoch": 27.04, + "learning_rate": 4.6620335820895524e-05, + "loss": 0.0007, + "step": 28984 + }, + { + "epoch": 27.04, + "learning_rate": 4.661986940298508e-05, + "loss": 0.0008, + "step": 28988 + }, + { + "epoch": 27.04, + "learning_rate": 4.661940298507463e-05, + "loss": 0.0025, + "step": 28992 + }, + { + "epoch": 27.05, + "learning_rate": 4.661893656716418e-05, + "loss": 0.0004, + "step": 28996 + }, + { + "epoch": 27.05, + "learning_rate": 4.6618470149253737e-05, + "loss": 0.0005, + "step": 29000 + }, + { + "epoch": 27.05, + "eval_exact_match": 0.7330754352030948, + "eval_exec": 0.7736943907156673, + "eval_loss": 0.374284952878952, + "eval_runtime": 1172.5719, + "eval_samples_per_second": 0.882, + "step": 29000 + }, + { + "epoch": 27.06, + "learning_rate": 4.6618003731343285e-05, + "loss": 0.0004, + "step": 29004 + }, + { + "epoch": 27.06, + "learning_rate": 4.661753731343284e-05, + "loss": 0.0003, + "step": 29008 + }, + { + "epoch": 27.06, + "learning_rate": 4.6617070895522394e-05, + "loss": 0.0004, + "step": 29012 + }, + { + "epoch": 27.07, + "learning_rate": 4.661660447761194e-05, + "loss": 0.0001, + "step": 29016 + }, + { + "epoch": 27.07, + "learning_rate": 4.66161380597015e-05, + "loss": 0.0004, + "step": 29020 + }, + { + "epoch": 27.07, + "learning_rate": 4.6615671641791046e-05, + "loss": 0.0005, + "step": 29024 + }, + { + "epoch": 27.08, + "learning_rate": 4.66152052238806e-05, + "loss": 0.0149, + "step": 29028 + }, + { + "epoch": 27.08, + "learning_rate": 4.6614738805970155e-05, + "loss": 0.0012, + "step": 29032 + }, + { + "epoch": 27.09, + "learning_rate": 4.6614272388059704e-05, + "loss": 0.0, + "step": 29036 + }, + { + "epoch": 27.09, + "learning_rate": 4.661380597014925e-05, + "loss": 0.0001, + "step": 29040 + }, + { + "epoch": 27.09, + "learning_rate": 4.661333955223881e-05, + "loss": 0.0014, + "step": 29044 + }, + { + "epoch": 27.1, + "learning_rate": 4.661287313432836e-05, + "loss": 0.0009, + "step": 29048 + }, + { + "epoch": 27.1, + "learning_rate": 4.661240671641791e-05, + "loss": 0.003, + "step": 29052 + }, + { + "epoch": 27.1, + "learning_rate": 4.6611940298507465e-05, + "loss": 0.0019, + "step": 29056 + }, + { + "epoch": 27.11, + "learning_rate": 4.661147388059702e-05, + "loss": 0.0001, + "step": 29060 + }, + { + "epoch": 27.11, + "learning_rate": 4.661100746268657e-05, + "loss": 0.0001, + "step": 29064 + }, + { + "epoch": 27.12, + "learning_rate": 4.661054104477612e-05, + "loss": 0.0005, + "step": 29068 + }, + { + "epoch": 27.12, + "learning_rate": 4.661007462686568e-05, + "loss": 0.0, + "step": 29072 + }, + { + "epoch": 27.12, + "learning_rate": 4.6609608208955226e-05, + "loss": 0.0, + "step": 29076 + }, + { + "epoch": 27.13, + "learning_rate": 4.660914179104478e-05, + "loss": 0.0008, + "step": 29080 + }, + { + "epoch": 27.13, + "learning_rate": 4.660867537313433e-05, + "loss": 0.0003, + "step": 29084 + }, + { + "epoch": 27.13, + "learning_rate": 4.6608208955223883e-05, + "loss": 0.0001, + "step": 29088 + }, + { + "epoch": 27.14, + "learning_rate": 4.660774253731344e-05, + "loss": 0.001, + "step": 29092 + }, + { + "epoch": 27.14, + "learning_rate": 4.6607276119402987e-05, + "loss": 0.0016, + "step": 29096 + }, + { + "epoch": 27.15, + "learning_rate": 4.6606809701492535e-05, + "loss": 0.0016, + "step": 29100 + }, + { + "epoch": 27.15, + "learning_rate": 4.6606343283582096e-05, + "loss": 0.0003, + "step": 29104 + }, + { + "epoch": 27.15, + "learning_rate": 4.6605876865671644e-05, + "loss": 0.0002, + "step": 29108 + }, + { + "epoch": 27.16, + "learning_rate": 4.660541044776119e-05, + "loss": 0.0002, + "step": 29112 + }, + { + "epoch": 27.16, + "learning_rate": 4.660494402985075e-05, + "loss": 0.0001, + "step": 29116 + }, + { + "epoch": 27.16, + "learning_rate": 4.66044776119403e-05, + "loss": 0.0, + "step": 29120 + }, + { + "epoch": 27.17, + "learning_rate": 4.660401119402985e-05, + "loss": 0.0002, + "step": 29124 + }, + { + "epoch": 27.17, + "learning_rate": 4.6603544776119405e-05, + "loss": 0.0005, + "step": 29128 + }, + { + "epoch": 27.18, + "learning_rate": 4.6603078358208954e-05, + "loss": 0.0001, + "step": 29132 + }, + { + "epoch": 27.18, + "learning_rate": 4.660261194029851e-05, + "loss": 0.0, + "step": 29136 + }, + { + "epoch": 27.18, + "learning_rate": 4.6602145522388063e-05, + "loss": 0.0, + "step": 29140 + }, + { + "epoch": 27.19, + "learning_rate": 4.660167910447761e-05, + "loss": 0.0002, + "step": 29144 + }, + { + "epoch": 27.19, + "learning_rate": 4.6601212686567166e-05, + "loss": 0.0005, + "step": 29148 + }, + { + "epoch": 27.19, + "learning_rate": 4.660074626865672e-05, + "loss": 0.0007, + "step": 29152 + }, + { + "epoch": 27.2, + "learning_rate": 4.660027985074627e-05, + "loss": 0.0002, + "step": 29156 + }, + { + "epoch": 27.2, + "learning_rate": 4.659981343283582e-05, + "loss": 0.0, + "step": 29160 + }, + { + "epoch": 27.21, + "learning_rate": 4.659934701492538e-05, + "loss": 0.0026, + "step": 29164 + }, + { + "epoch": 27.21, + "learning_rate": 4.659888059701493e-05, + "loss": 0.0001, + "step": 29168 + }, + { + "epoch": 27.21, + "learning_rate": 4.659841417910448e-05, + "loss": 0.0012, + "step": 29172 + }, + { + "epoch": 27.22, + "learning_rate": 4.659794776119403e-05, + "loss": 0.0158, + "step": 29176 + }, + { + "epoch": 27.22, + "learning_rate": 4.6597481343283585e-05, + "loss": 0.0003, + "step": 29180 + }, + { + "epoch": 27.22, + "learning_rate": 4.659701492537314e-05, + "loss": 0.0022, + "step": 29184 + }, + { + "epoch": 27.23, + "learning_rate": 4.659654850746269e-05, + "loss": 0.0002, + "step": 29188 + }, + { + "epoch": 27.23, + "learning_rate": 4.6596082089552237e-05, + "loss": 0.0001, + "step": 29192 + }, + { + "epoch": 27.24, + "learning_rate": 4.65956156716418e-05, + "loss": 0.0028, + "step": 29196 + }, + { + "epoch": 27.24, + "learning_rate": 4.6595149253731346e-05, + "loss": 0.0001, + "step": 29200 + }, + { + "epoch": 27.24, + "learning_rate": 4.6594682835820894e-05, + "loss": 0.0009, + "step": 29204 + }, + { + "epoch": 27.25, + "learning_rate": 4.659421641791045e-05, + "loss": 0.0, + "step": 29208 + }, + { + "epoch": 27.25, + "learning_rate": 4.6593750000000004e-05, + "loss": 0.0012, + "step": 29212 + }, + { + "epoch": 27.25, + "learning_rate": 4.659328358208955e-05, + "loss": 0.0034, + "step": 29216 + }, + { + "epoch": 27.26, + "learning_rate": 4.659281716417911e-05, + "loss": 0.0001, + "step": 29220 + }, + { + "epoch": 27.26, + "learning_rate": 4.659235074626866e-05, + "loss": 0.0, + "step": 29224 + }, + { + "epoch": 27.26, + "learning_rate": 4.659188432835821e-05, + "loss": 0.0002, + "step": 29228 + }, + { + "epoch": 27.27, + "learning_rate": 4.6591417910447765e-05, + "loss": 0.0052, + "step": 29232 + }, + { + "epoch": 27.27, + "learning_rate": 4.6590951492537313e-05, + "loss": 0.0002, + "step": 29236 + }, + { + "epoch": 27.28, + "learning_rate": 4.659048507462687e-05, + "loss": 0.0, + "step": 29240 + }, + { + "epoch": 27.28, + "learning_rate": 4.659001865671642e-05, + "loss": 0.0001, + "step": 29244 + }, + { + "epoch": 27.28, + "learning_rate": 4.658955223880597e-05, + "loss": 0.0018, + "step": 29248 + }, + { + "epoch": 27.29, + "learning_rate": 4.658908582089552e-05, + "loss": 0.0002, + "step": 29252 + }, + { + "epoch": 27.29, + "learning_rate": 4.658861940298508e-05, + "loss": 0.0019, + "step": 29256 + }, + { + "epoch": 27.29, + "learning_rate": 4.658815298507463e-05, + "loss": 0.0004, + "step": 29260 + }, + { + "epoch": 27.3, + "learning_rate": 4.658768656716418e-05, + "loss": 0.0008, + "step": 29264 + }, + { + "epoch": 27.3, + "learning_rate": 4.658722014925373e-05, + "loss": 0.0001, + "step": 29268 + }, + { + "epoch": 27.31, + "learning_rate": 4.658675373134329e-05, + "loss": 0.0013, + "step": 29272 + }, + { + "epoch": 27.31, + "learning_rate": 4.6586287313432835e-05, + "loss": 0.0, + "step": 29276 + }, + { + "epoch": 27.31, + "learning_rate": 4.658582089552239e-05, + "loss": 0.0001, + "step": 29280 + }, + { + "epoch": 27.32, + "learning_rate": 4.6585354477611945e-05, + "loss": 0.0021, + "step": 29284 + }, + { + "epoch": 27.32, + "learning_rate": 4.658488805970149e-05, + "loss": 0.0001, + "step": 29288 + }, + { + "epoch": 27.32, + "learning_rate": 4.658442164179105e-05, + "loss": 0.0001, + "step": 29292 + }, + { + "epoch": 27.33, + "learning_rate": 4.6583955223880596e-05, + "loss": 0.0003, + "step": 29296 + }, + { + "epoch": 27.33, + "learning_rate": 4.658348880597015e-05, + "loss": 0.0001, + "step": 29300 + }, + { + "epoch": 27.34, + "learning_rate": 4.6583022388059706e-05, + "loss": 0.0001, + "step": 29304 + }, + { + "epoch": 27.34, + "learning_rate": 4.6582555970149254e-05, + "loss": 0.0, + "step": 29308 + }, + { + "epoch": 27.34, + "learning_rate": 4.65820895522388e-05, + "loss": 0.0001, + "step": 29312 + }, + { + "epoch": 27.35, + "learning_rate": 4.6581623134328364e-05, + "loss": 0.0003, + "step": 29316 + }, + { + "epoch": 27.35, + "learning_rate": 4.658115671641791e-05, + "loss": 0.0, + "step": 29320 + }, + { + "epoch": 27.35, + "learning_rate": 4.658069029850746e-05, + "loss": 0.0, + "step": 29324 + }, + { + "epoch": 27.36, + "learning_rate": 4.6580223880597015e-05, + "loss": 0.0042, + "step": 29328 + }, + { + "epoch": 27.36, + "learning_rate": 4.657975746268657e-05, + "loss": 0.0003, + "step": 29332 + }, + { + "epoch": 27.37, + "learning_rate": 4.6579291044776125e-05, + "loss": 0.0, + "step": 29336 + }, + { + "epoch": 27.37, + "learning_rate": 4.657882462686567e-05, + "loss": 0.0093, + "step": 29340 + }, + { + "epoch": 27.37, + "learning_rate": 4.657835820895523e-05, + "loss": 0.0035, + "step": 29344 + }, + { + "epoch": 27.38, + "learning_rate": 4.657789179104478e-05, + "loss": 0.004, + "step": 29348 + }, + { + "epoch": 27.38, + "learning_rate": 4.657742537313433e-05, + "loss": 0.0006, + "step": 29352 + }, + { + "epoch": 27.38, + "learning_rate": 4.657695895522388e-05, + "loss": 0.0009, + "step": 29356 + }, + { + "epoch": 27.39, + "learning_rate": 4.6576492537313434e-05, + "loss": 0.0006, + "step": 29360 + }, + { + "epoch": 27.39, + "learning_rate": 4.657602611940299e-05, + "loss": 0.0001, + "step": 29364 + }, + { + "epoch": 27.4, + "learning_rate": 4.657555970149254e-05, + "loss": 0.0001, + "step": 29368 + }, + { + "epoch": 27.4, + "learning_rate": 4.657509328358209e-05, + "loss": 0.0066, + "step": 29372 + }, + { + "epoch": 27.4, + "learning_rate": 4.657462686567165e-05, + "loss": 0.0003, + "step": 29376 + }, + { + "epoch": 27.41, + "learning_rate": 4.6574160447761195e-05, + "loss": 0.0004, + "step": 29380 + }, + { + "epoch": 27.41, + "learning_rate": 4.657369402985075e-05, + "loss": 0.003, + "step": 29384 + }, + { + "epoch": 27.41, + "learning_rate": 4.65732276119403e-05, + "loss": 0.0017, + "step": 29388 + }, + { + "epoch": 27.42, + "learning_rate": 4.657276119402985e-05, + "loss": 0.002, + "step": 29392 + }, + { + "epoch": 27.42, + "learning_rate": 4.657229477611941e-05, + "loss": 0.0001, + "step": 29396 + }, + { + "epoch": 27.43, + "learning_rate": 4.6571828358208956e-05, + "loss": 0.0082, + "step": 29400 + }, + { + "epoch": 27.43, + "learning_rate": 4.657136194029851e-05, + "loss": 0.001, + "step": 29404 + }, + { + "epoch": 27.43, + "learning_rate": 4.6570895522388066e-05, + "loss": 0.0002, + "step": 29408 + }, + { + "epoch": 27.44, + "learning_rate": 4.6570429104477614e-05, + "loss": 0.0013, + "step": 29412 + }, + { + "epoch": 27.44, + "learning_rate": 4.656996268656716e-05, + "loss": 0.0003, + "step": 29416 + }, + { + "epoch": 27.44, + "learning_rate": 4.656949626865672e-05, + "loss": 0.0004, + "step": 29420 + }, + { + "epoch": 27.45, + "learning_rate": 4.656902985074627e-05, + "loss": 0.0001, + "step": 29424 + }, + { + "epoch": 27.45, + "learning_rate": 4.656856343283582e-05, + "loss": 0.0029, + "step": 29428 + }, + { + "epoch": 27.46, + "learning_rate": 4.6568097014925375e-05, + "loss": 0.0001, + "step": 29432 + }, + { + "epoch": 27.46, + "learning_rate": 4.656763059701493e-05, + "loss": 0.0055, + "step": 29436 + }, + { + "epoch": 27.46, + "learning_rate": 4.656716417910448e-05, + "loss": 0.0005, + "step": 29440 + }, + { + "epoch": 27.47, + "learning_rate": 4.656669776119403e-05, + "loss": 0.0002, + "step": 29444 + }, + { + "epoch": 27.47, + "learning_rate": 4.656623134328358e-05, + "loss": 0.0033, + "step": 29448 + }, + { + "epoch": 27.47, + "learning_rate": 4.6565764925373136e-05, + "loss": 0.0005, + "step": 29452 + }, + { + "epoch": 27.48, + "learning_rate": 4.656529850746269e-05, + "loss": 0.0002, + "step": 29456 + }, + { + "epoch": 27.48, + "learning_rate": 4.656483208955224e-05, + "loss": 0.0003, + "step": 29460 + }, + { + "epoch": 27.49, + "learning_rate": 4.6564365671641794e-05, + "loss": 0.0001, + "step": 29464 + }, + { + "epoch": 27.49, + "learning_rate": 4.656389925373135e-05, + "loss": 0.001, + "step": 29468 + }, + { + "epoch": 27.49, + "learning_rate": 4.65634328358209e-05, + "loss": 0.0001, + "step": 29472 + }, + { + "epoch": 27.5, + "learning_rate": 4.6562966417910445e-05, + "loss": 0.0062, + "step": 29476 + }, + { + "epoch": 27.5, + "learning_rate": 4.65625e-05, + "loss": 0.0055, + "step": 29480 + }, + { + "epoch": 27.5, + "learning_rate": 4.6562033582089555e-05, + "loss": 0.0001, + "step": 29484 + }, + { + "epoch": 27.51, + "learning_rate": 4.65615671641791e-05, + "loss": 0.0002, + "step": 29488 + }, + { + "epoch": 27.51, + "learning_rate": 4.656110074626866e-05, + "loss": 0.0049, + "step": 29492 + }, + { + "epoch": 27.51, + "learning_rate": 4.656063432835821e-05, + "loss": 0.0002, + "step": 29496 + }, + { + "epoch": 27.52, + "learning_rate": 4.656016791044777e-05, + "loss": 0.0001, + "step": 29500 + }, + { + "epoch": 27.52, + "eval_exact_match": 0.741779497098646, + "eval_exec": 0.769825918762089, + "eval_loss": 0.3933384418487549, + "eval_runtime": 1168.2038, + "eval_samples_per_second": 0.885, + "step": 29500 + }, + { + "epoch": 27.52, + "learning_rate": 4.6559701492537316e-05, + "loss": 0.0001, + "step": 29504 + }, + { + "epoch": 27.53, + "learning_rate": 4.6559235074626864e-05, + "loss": 0.0025, + "step": 29508 + }, + { + "epoch": 27.53, + "learning_rate": 4.6558768656716426e-05, + "loss": 0.0003, + "step": 29512 + }, + { + "epoch": 27.53, + "learning_rate": 4.6558302238805974e-05, + "loss": 0.0003, + "step": 29516 + }, + { + "epoch": 27.54, + "learning_rate": 4.655783582089552e-05, + "loss": 0.0016, + "step": 29520 + }, + { + "epoch": 27.54, + "learning_rate": 4.655736940298508e-05, + "loss": 0.0001, + "step": 29524 + }, + { + "epoch": 27.54, + "learning_rate": 4.655690298507463e-05, + "loss": 0.0007, + "step": 29528 + }, + { + "epoch": 27.55, + "learning_rate": 4.655643656716418e-05, + "loss": 0.0013, + "step": 29532 + }, + { + "epoch": 27.55, + "learning_rate": 4.6555970149253735e-05, + "loss": 0.0001, + "step": 29536 + }, + { + "epoch": 27.56, + "learning_rate": 4.655550373134328e-05, + "loss": 0.0009, + "step": 29540 + }, + { + "epoch": 27.56, + "learning_rate": 4.655503731343284e-05, + "loss": 0.0015, + "step": 29544 + }, + { + "epoch": 27.56, + "learning_rate": 4.655457089552239e-05, + "loss": 0.0002, + "step": 29548 + }, + { + "epoch": 27.57, + "learning_rate": 4.655410447761194e-05, + "loss": 0.0001, + "step": 29552 + }, + { + "epoch": 27.57, + "learning_rate": 4.6553638059701496e-05, + "loss": 0.001, + "step": 29556 + }, + { + "epoch": 27.57, + "learning_rate": 4.655317164179105e-05, + "loss": 0.0001, + "step": 29560 + }, + { + "epoch": 27.58, + "learning_rate": 4.65527052238806e-05, + "loss": 0.0001, + "step": 29564 + }, + { + "epoch": 27.58, + "learning_rate": 4.655223880597015e-05, + "loss": 0.0067, + "step": 29568 + }, + { + "epoch": 27.59, + "learning_rate": 4.655177238805971e-05, + "loss": 0.0029, + "step": 29572 + }, + { + "epoch": 27.59, + "learning_rate": 4.655130597014926e-05, + "loss": 0.0007, + "step": 29576 + }, + { + "epoch": 27.59, + "learning_rate": 4.6550839552238805e-05, + "loss": 0.0004, + "step": 29580 + }, + { + "epoch": 27.6, + "learning_rate": 4.655037313432836e-05, + "loss": 0.0001, + "step": 29584 + }, + { + "epoch": 27.6, + "learning_rate": 4.6549906716417915e-05, + "loss": 0.0002, + "step": 29588 + }, + { + "epoch": 27.6, + "learning_rate": 4.654944029850746e-05, + "loss": 0.0001, + "step": 29592 + }, + { + "epoch": 27.61, + "learning_rate": 4.654897388059702e-05, + "loss": 0.0003, + "step": 29596 + }, + { + "epoch": 27.61, + "learning_rate": 4.6548507462686566e-05, + "loss": 0.0017, + "step": 29600 + }, + { + "epoch": 27.62, + "learning_rate": 4.654804104477612e-05, + "loss": 0.0031, + "step": 29604 + }, + { + "epoch": 27.62, + "learning_rate": 4.6547574626865676e-05, + "loss": 0.0004, + "step": 29608 + }, + { + "epoch": 27.62, + "learning_rate": 4.6547108208955224e-05, + "loss": 0.0003, + "step": 29612 + }, + { + "epoch": 27.63, + "learning_rate": 4.654664179104478e-05, + "loss": 0.0002, + "step": 29616 + }, + { + "epoch": 27.63, + "learning_rate": 4.6546175373134334e-05, + "loss": 0.0034, + "step": 29620 + }, + { + "epoch": 27.63, + "learning_rate": 4.654570895522388e-05, + "loss": 0.0008, + "step": 29624 + }, + { + "epoch": 27.64, + "learning_rate": 4.654524253731343e-05, + "loss": 0.0001, + "step": 29628 + }, + { + "epoch": 27.64, + "learning_rate": 4.654477611940299e-05, + "loss": 0.0001, + "step": 29632 + }, + { + "epoch": 27.65, + "learning_rate": 4.654430970149254e-05, + "loss": 0.0002, + "step": 29636 + }, + { + "epoch": 27.65, + "learning_rate": 4.654384328358209e-05, + "loss": 0.0035, + "step": 29640 + }, + { + "epoch": 27.65, + "learning_rate": 4.654337686567164e-05, + "loss": 0.0001, + "step": 29644 + }, + { + "epoch": 27.66, + "learning_rate": 4.65429104477612e-05, + "loss": 0.0001, + "step": 29648 + }, + { + "epoch": 27.66, + "learning_rate": 4.6542444029850746e-05, + "loss": 0.0001, + "step": 29652 + }, + { + "epoch": 27.66, + "learning_rate": 4.65419776119403e-05, + "loss": 0.0007, + "step": 29656 + }, + { + "epoch": 27.67, + "learning_rate": 4.654151119402985e-05, + "loss": 0.0005, + "step": 29660 + }, + { + "epoch": 27.67, + "learning_rate": 4.654104477611941e-05, + "loss": 0.0015, + "step": 29664 + }, + { + "epoch": 27.68, + "learning_rate": 4.654057835820896e-05, + "loss": 0.0003, + "step": 29668 + }, + { + "epoch": 27.68, + "learning_rate": 4.654011194029851e-05, + "loss": 0.0001, + "step": 29672 + }, + { + "epoch": 27.68, + "learning_rate": 4.653964552238806e-05, + "loss": 0.0027, + "step": 29676 + }, + { + "epoch": 27.69, + "learning_rate": 4.653917910447762e-05, + "loss": 0.0002, + "step": 29680 + }, + { + "epoch": 27.69, + "learning_rate": 4.6538712686567165e-05, + "loss": 0.0016, + "step": 29684 + }, + { + "epoch": 27.69, + "learning_rate": 4.653824626865672e-05, + "loss": 0.0001, + "step": 29688 + }, + { + "epoch": 27.7, + "learning_rate": 4.6537779850746275e-05, + "loss": 0.0004, + "step": 29692 + }, + { + "epoch": 27.7, + "learning_rate": 4.653731343283582e-05, + "loss": 0.0001, + "step": 29696 + }, + { + "epoch": 27.71, + "learning_rate": 4.653684701492538e-05, + "loss": 0.0019, + "step": 29700 + }, + { + "epoch": 27.71, + "learning_rate": 4.6536380597014926e-05, + "loss": 0.0015, + "step": 29704 + }, + { + "epoch": 27.71, + "learning_rate": 4.653591417910448e-05, + "loss": 0.0001, + "step": 29708 + }, + { + "epoch": 27.72, + "learning_rate": 4.6535447761194036e-05, + "loss": 0.0005, + "step": 29712 + }, + { + "epoch": 27.72, + "learning_rate": 4.6534981343283584e-05, + "loss": 0.0011, + "step": 29716 + }, + { + "epoch": 27.72, + "learning_rate": 4.653451492537313e-05, + "loss": 0.0086, + "step": 29720 + }, + { + "epoch": 27.73, + "learning_rate": 4.6534048507462694e-05, + "loss": 0.0006, + "step": 29724 + }, + { + "epoch": 27.73, + "learning_rate": 4.653358208955224e-05, + "loss": 0.0001, + "step": 29728 + }, + { + "epoch": 27.73, + "learning_rate": 4.653311567164179e-05, + "loss": 0.001, + "step": 29732 + }, + { + "epoch": 27.74, + "learning_rate": 4.6532649253731345e-05, + "loss": 0.0023, + "step": 29736 + }, + { + "epoch": 27.74, + "learning_rate": 4.65321828358209e-05, + "loss": 0.0094, + "step": 29740 + }, + { + "epoch": 27.75, + "learning_rate": 4.653171641791045e-05, + "loss": 0.0033, + "step": 29744 + }, + { + "epoch": 27.75, + "learning_rate": 4.653125e-05, + "loss": 0.0016, + "step": 29748 + }, + { + "epoch": 27.75, + "learning_rate": 4.653078358208956e-05, + "loss": 0.0007, + "step": 29752 + }, + { + "epoch": 27.76, + "learning_rate": 4.6530317164179106e-05, + "loss": 0.0053, + "step": 29756 + }, + { + "epoch": 27.76, + "learning_rate": 4.652985074626866e-05, + "loss": 0.0008, + "step": 29760 + }, + { + "epoch": 27.76, + "learning_rate": 4.652938432835821e-05, + "loss": 0.0001, + "step": 29764 + }, + { + "epoch": 27.77, + "learning_rate": 4.6528917910447764e-05, + "loss": 0.0009, + "step": 29768 + }, + { + "epoch": 27.77, + "learning_rate": 4.652845149253732e-05, + "loss": 0.0, + "step": 29772 + }, + { + "epoch": 27.78, + "learning_rate": 4.652798507462687e-05, + "loss": 0.0078, + "step": 29776 + }, + { + "epoch": 27.78, + "learning_rate": 4.6527518656716415e-05, + "loss": 0.0015, + "step": 29780 + }, + { + "epoch": 27.78, + "learning_rate": 4.652705223880598e-05, + "loss": 0.0003, + "step": 29784 + }, + { + "epoch": 27.79, + "learning_rate": 4.6526585820895525e-05, + "loss": 0.0029, + "step": 29788 + }, + { + "epoch": 27.79, + "learning_rate": 4.652611940298507e-05, + "loss": 0.0012, + "step": 29792 + }, + { + "epoch": 27.79, + "learning_rate": 4.652565298507463e-05, + "loss": 0.0003, + "step": 29796 + }, + { + "epoch": 27.8, + "learning_rate": 4.652518656716418e-05, + "loss": 0.0, + "step": 29800 + }, + { + "epoch": 27.8, + "learning_rate": 4.652472014925373e-05, + "loss": 0.0024, + "step": 29804 + }, + { + "epoch": 27.81, + "learning_rate": 4.6524253731343286e-05, + "loss": 0.0003, + "step": 29808 + }, + { + "epoch": 27.81, + "learning_rate": 4.6523787313432834e-05, + "loss": 0.0006, + "step": 29812 + }, + { + "epoch": 27.81, + "learning_rate": 4.652332089552239e-05, + "loss": 0.0004, + "step": 29816 + }, + { + "epoch": 27.82, + "learning_rate": 4.6522854477611944e-05, + "loss": 0.0001, + "step": 29820 + }, + { + "epoch": 27.82, + "learning_rate": 4.652238805970149e-05, + "loss": 0.0, + "step": 29824 + }, + { + "epoch": 27.82, + "learning_rate": 4.652192164179105e-05, + "loss": 0.0001, + "step": 29828 + }, + { + "epoch": 27.83, + "learning_rate": 4.65214552238806e-05, + "loss": 0.0023, + "step": 29832 + }, + { + "epoch": 27.83, + "learning_rate": 4.652098880597015e-05, + "loss": 0.0027, + "step": 29836 + }, + { + "epoch": 27.84, + "learning_rate": 4.6520522388059705e-05, + "loss": 0.0025, + "step": 29840 + }, + { + "epoch": 27.84, + "learning_rate": 4.652005597014926e-05, + "loss": 0.0049, + "step": 29844 + }, + { + "epoch": 27.84, + "learning_rate": 4.651958955223881e-05, + "loss": 0.0001, + "step": 29848 + }, + { + "epoch": 27.85, + "learning_rate": 4.651912313432836e-05, + "loss": 0.0002, + "step": 29852 + }, + { + "epoch": 27.85, + "learning_rate": 4.651865671641791e-05, + "loss": 0.0004, + "step": 29856 + }, + { + "epoch": 27.85, + "learning_rate": 4.6518190298507466e-05, + "loss": 0.0039, + "step": 29860 + }, + { + "epoch": 27.86, + "learning_rate": 4.651772388059702e-05, + "loss": 0.0001, + "step": 29864 + }, + { + "epoch": 27.86, + "learning_rate": 4.651725746268657e-05, + "loss": 0.0002, + "step": 29868 + }, + { + "epoch": 27.87, + "learning_rate": 4.651679104477612e-05, + "loss": 0.0003, + "step": 29872 + }, + { + "epoch": 27.87, + "learning_rate": 4.651632462686568e-05, + "loss": 0.0078, + "step": 29876 + }, + { + "epoch": 27.87, + "learning_rate": 4.651585820895523e-05, + "loss": 0.0001, + "step": 29880 + }, + { + "epoch": 27.88, + "learning_rate": 4.6515391791044775e-05, + "loss": 0.0015, + "step": 29884 + }, + { + "epoch": 27.88, + "learning_rate": 4.651492537313433e-05, + "loss": 0.0007, + "step": 29888 + }, + { + "epoch": 27.88, + "learning_rate": 4.6514458955223885e-05, + "loss": 0.0002, + "step": 29892 + }, + { + "epoch": 27.89, + "learning_rate": 4.651399253731343e-05, + "loss": 0.0013, + "step": 29896 + }, + { + "epoch": 27.89, + "learning_rate": 4.651352611940299e-05, + "loss": 0.0008, + "step": 29900 + }, + { + "epoch": 27.9, + "learning_rate": 4.651305970149254e-05, + "loss": 0.0056, + "step": 29904 + }, + { + "epoch": 27.9, + "learning_rate": 4.651259328358209e-05, + "loss": 0.0073, + "step": 29908 + }, + { + "epoch": 27.9, + "learning_rate": 4.6512126865671646e-05, + "loss": 0.0015, + "step": 29912 + }, + { + "epoch": 27.91, + "learning_rate": 4.6511660447761194e-05, + "loss": 0.0005, + "step": 29916 + }, + { + "epoch": 27.91, + "learning_rate": 4.651119402985075e-05, + "loss": 0.0001, + "step": 29920 + }, + { + "epoch": 27.91, + "learning_rate": 4.6510727611940304e-05, + "loss": 0.0006, + "step": 29924 + }, + { + "epoch": 27.92, + "learning_rate": 4.651026119402985e-05, + "loss": 0.0004, + "step": 29928 + }, + { + "epoch": 27.92, + "learning_rate": 4.65097947761194e-05, + "loss": 0.0015, + "step": 29932 + }, + { + "epoch": 27.93, + "learning_rate": 4.650932835820896e-05, + "loss": 0.002, + "step": 29936 + }, + { + "epoch": 27.93, + "learning_rate": 4.650886194029851e-05, + "loss": 0.0002, + "step": 29940 + }, + { + "epoch": 27.93, + "learning_rate": 4.650839552238806e-05, + "loss": 0.0006, + "step": 29944 + }, + { + "epoch": 27.94, + "learning_rate": 4.650792910447761e-05, + "loss": 0.0062, + "step": 29948 + }, + { + "epoch": 27.94, + "learning_rate": 4.650746268656717e-05, + "loss": 0.0003, + "step": 29952 + }, + { + "epoch": 27.94, + "learning_rate": 4.6506996268656716e-05, + "loss": 0.0018, + "step": 29956 + }, + { + "epoch": 27.95, + "learning_rate": 4.650652985074627e-05, + "loss": 0.0003, + "step": 29960 + }, + { + "epoch": 27.95, + "learning_rate": 4.6506063432835825e-05, + "loss": 0.0001, + "step": 29964 + }, + { + "epoch": 27.96, + "learning_rate": 4.6505597014925374e-05, + "loss": 0.0017, + "step": 29968 + }, + { + "epoch": 27.96, + "learning_rate": 4.650513059701493e-05, + "loss": 0.0019, + "step": 29972 + }, + { + "epoch": 27.96, + "learning_rate": 4.650466417910448e-05, + "loss": 0.0003, + "step": 29976 + }, + { + "epoch": 27.97, + "learning_rate": 4.650419776119403e-05, + "loss": 0.0001, + "step": 29980 + }, + { + "epoch": 27.97, + "learning_rate": 4.6503731343283586e-05, + "loss": 0.0011, + "step": 29984 + }, + { + "epoch": 27.97, + "learning_rate": 4.6503264925373135e-05, + "loss": 0.009, + "step": 29988 + }, + { + "epoch": 27.98, + "learning_rate": 4.650279850746269e-05, + "loss": 0.0013, + "step": 29992 + }, + { + "epoch": 27.98, + "learning_rate": 4.6502332089552244e-05, + "loss": 0.0, + "step": 29996 + }, + { + "epoch": 27.98, + "learning_rate": 4.650186567164179e-05, + "loss": 0.0007, + "step": 30000 + }, + { + "epoch": 27.98, + "eval_exact_match": 0.7350096711798839, + "eval_exec": 0.769825918762089, + "eval_loss": 0.3727231025695801, + "eval_runtime": 1201.9878, + "eval_samples_per_second": 0.86, + "step": 30000 + }, + { + "epoch": 27.99, + "learning_rate": 4.650139925373135e-05, + "loss": 0.0001, + "step": 30004 + }, + { + "epoch": 27.99, + "learning_rate": 4.6500932835820896e-05, + "loss": 0.0002, + "step": 30008 + }, + { + "epoch": 28.0, + "learning_rate": 4.650046641791045e-05, + "loss": 0.0004, + "step": 30012 + }, + { + "epoch": 28.0, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.0001, + "step": 30016 + }, + { + "epoch": 28.0, + "learning_rate": 4.6499533582089554e-05, + "loss": 0.0026, + "step": 30020 + }, + { + "epoch": 28.01, + "learning_rate": 4.649906716417911e-05, + "loss": 0.0003, + "step": 30024 + }, + { + "epoch": 28.01, + "learning_rate": 4.649860074626866e-05, + "loss": 0.0002, + "step": 30028 + }, + { + "epoch": 28.01, + "learning_rate": 4.649813432835821e-05, + "loss": 0.0043, + "step": 30032 + }, + { + "epoch": 28.02, + "learning_rate": 4.649766791044776e-05, + "loss": 0.0, + "step": 30036 + }, + { + "epoch": 28.02, + "learning_rate": 4.6497201492537315e-05, + "loss": 0.0001, + "step": 30040 + }, + { + "epoch": 28.03, + "learning_rate": 4.649673507462687e-05, + "loss": 0.0041, + "step": 30044 + }, + { + "epoch": 28.03, + "learning_rate": 4.649626865671642e-05, + "loss": 0.0003, + "step": 30048 + }, + { + "epoch": 28.03, + "learning_rate": 4.649580223880597e-05, + "loss": 0.0, + "step": 30052 + }, + { + "epoch": 28.04, + "learning_rate": 4.649533582089553e-05, + "loss": 0.0002, + "step": 30056 + }, + { + "epoch": 28.04, + "learning_rate": 4.6494869402985076e-05, + "loss": 0.0005, + "step": 30060 + }, + { + "epoch": 28.04, + "learning_rate": 4.649440298507463e-05, + "loss": 0.002, + "step": 30064 + }, + { + "epoch": 28.05, + "learning_rate": 4.649393656716418e-05, + "loss": 0.0003, + "step": 30068 + }, + { + "epoch": 28.05, + "learning_rate": 4.6493470149253733e-05, + "loss": 0.0008, + "step": 30072 + }, + { + "epoch": 28.06, + "learning_rate": 4.649300373134329e-05, + "loss": 0.0001, + "step": 30076 + }, + { + "epoch": 28.06, + "learning_rate": 4.6492537313432837e-05, + "loss": 0.0001, + "step": 30080 + }, + { + "epoch": 28.06, + "learning_rate": 4.649207089552239e-05, + "loss": 0.0001, + "step": 30084 + }, + { + "epoch": 28.07, + "learning_rate": 4.6491604477611946e-05, + "loss": 0.0005, + "step": 30088 + }, + { + "epoch": 28.07, + "learning_rate": 4.6491138059701494e-05, + "loss": 0.0009, + "step": 30092 + }, + { + "epoch": 28.07, + "learning_rate": 4.649067164179104e-05, + "loss": 0.0015, + "step": 30096 + }, + { + "epoch": 28.08, + "learning_rate": 4.64902052238806e-05, + "loss": 0.0009, + "step": 30100 + }, + { + "epoch": 28.08, + "learning_rate": 4.648973880597015e-05, + "loss": 0.0013, + "step": 30104 + }, + { + "epoch": 28.09, + "learning_rate": 4.64892723880597e-05, + "loss": 0.0003, + "step": 30108 + }, + { + "epoch": 28.09, + "learning_rate": 4.6488805970149255e-05, + "loss": 0.0005, + "step": 30112 + }, + { + "epoch": 28.09, + "learning_rate": 4.648833955223881e-05, + "loss": 0.0006, + "step": 30116 + }, + { + "epoch": 28.1, + "learning_rate": 4.648787313432836e-05, + "loss": 0.0001, + "step": 30120 + }, + { + "epoch": 28.1, + "learning_rate": 4.648740671641791e-05, + "loss": 0.0002, + "step": 30124 + }, + { + "epoch": 28.1, + "learning_rate": 4.648694029850746e-05, + "loss": 0.0013, + "step": 30128 + }, + { + "epoch": 28.11, + "learning_rate": 4.6486473880597016e-05, + "loss": 0.0001, + "step": 30132 + }, + { + "epoch": 28.11, + "learning_rate": 4.648600746268657e-05, + "loss": 0.0031, + "step": 30136 + }, + { + "epoch": 28.12, + "learning_rate": 4.648554104477612e-05, + "loss": 0.0004, + "step": 30140 + }, + { + "epoch": 28.12, + "learning_rate": 4.6485074626865674e-05, + "loss": 0.0002, + "step": 30144 + }, + { + "epoch": 28.12, + "learning_rate": 4.648460820895523e-05, + "loss": 0.0003, + "step": 30148 + }, + { + "epoch": 28.13, + "learning_rate": 4.648414179104478e-05, + "loss": 0.0041, + "step": 30152 + }, + { + "epoch": 28.13, + "learning_rate": 4.648367537313433e-05, + "loss": 0.0002, + "step": 30156 + }, + { + "epoch": 28.13, + "learning_rate": 4.648320895522388e-05, + "loss": 0.0002, + "step": 30160 + }, + { + "epoch": 28.14, + "learning_rate": 4.6482742537313435e-05, + "loss": 0.0001, + "step": 30164 + }, + { + "epoch": 28.14, + "learning_rate": 4.648227611940299e-05, + "loss": 0.0004, + "step": 30168 + }, + { + "epoch": 28.15, + "learning_rate": 4.648180970149254e-05, + "loss": 0.0033, + "step": 30172 + }, + { + "epoch": 28.15, + "learning_rate": 4.648134328358209e-05, + "loss": 0.0013, + "step": 30176 + }, + { + "epoch": 28.15, + "learning_rate": 4.648087686567165e-05, + "loss": 0.0021, + "step": 30180 + }, + { + "epoch": 28.16, + "learning_rate": 4.6480410447761196e-05, + "loss": 0.0003, + "step": 30184 + }, + { + "epoch": 28.16, + "learning_rate": 4.6479944029850744e-05, + "loss": 0.0004, + "step": 30188 + }, + { + "epoch": 28.16, + "learning_rate": 4.6479477611940306e-05, + "loss": 0.0002, + "step": 30192 + }, + { + "epoch": 28.17, + "learning_rate": 4.6479011194029854e-05, + "loss": 0.004, + "step": 30196 + }, + { + "epoch": 28.17, + "learning_rate": 4.64785447761194e-05, + "loss": 0.0082, + "step": 30200 + }, + { + "epoch": 28.18, + "learning_rate": 4.647807835820896e-05, + "loss": 0.0004, + "step": 30204 + }, + { + "epoch": 28.18, + "learning_rate": 4.647761194029851e-05, + "loss": 0.0002, + "step": 30208 + }, + { + "epoch": 28.18, + "learning_rate": 4.647714552238806e-05, + "loss": 0.0001, + "step": 30212 + }, + { + "epoch": 28.19, + "learning_rate": 4.6476679104477615e-05, + "loss": 0.0004, + "step": 30216 + }, + { + "epoch": 28.19, + "learning_rate": 4.6476212686567163e-05, + "loss": 0.0022, + "step": 30220 + }, + { + "epoch": 28.19, + "learning_rate": 4.647574626865672e-05, + "loss": 0.0004, + "step": 30224 + }, + { + "epoch": 28.2, + "learning_rate": 4.647527985074627e-05, + "loss": 0.0001, + "step": 30228 + }, + { + "epoch": 28.2, + "learning_rate": 4.647481343283582e-05, + "loss": 0.0005, + "step": 30232 + }, + { + "epoch": 28.21, + "learning_rate": 4.6474347014925376e-05, + "loss": 0.0001, + "step": 30236 + }, + { + "epoch": 28.21, + "learning_rate": 4.647388059701493e-05, + "loss": 0.003, + "step": 30240 + }, + { + "epoch": 28.21, + "learning_rate": 4.647341417910448e-05, + "loss": 0.0, + "step": 30244 + }, + { + "epoch": 28.22, + "learning_rate": 4.647294776119403e-05, + "loss": 0.0043, + "step": 30248 + }, + { + "epoch": 28.22, + "learning_rate": 4.647248134328359e-05, + "loss": 0.0021, + "step": 30252 + }, + { + "epoch": 28.22, + "learning_rate": 4.647201492537314e-05, + "loss": 0.0038, + "step": 30256 + }, + { + "epoch": 28.23, + "learning_rate": 4.6471548507462685e-05, + "loss": 0.0008, + "step": 30260 + }, + { + "epoch": 28.23, + "learning_rate": 4.647108208955224e-05, + "loss": 0.0005, + "step": 30264 + }, + { + "epoch": 28.24, + "learning_rate": 4.6470615671641795e-05, + "loss": 0.0001, + "step": 30268 + }, + { + "epoch": 28.24, + "learning_rate": 4.647014925373134e-05, + "loss": 0.0038, + "step": 30272 + }, + { + "epoch": 28.24, + "learning_rate": 4.64696828358209e-05, + "loss": 0.0013, + "step": 30276 + }, + { + "epoch": 28.25, + "learning_rate": 4.6469216417910446e-05, + "loss": 0.0002, + "step": 30280 + }, + { + "epoch": 28.25, + "learning_rate": 4.646875e-05, + "loss": 0.0029, + "step": 30284 + }, + { + "epoch": 28.25, + "learning_rate": 4.6468283582089556e-05, + "loss": 0.0001, + "step": 30288 + }, + { + "epoch": 28.26, + "learning_rate": 4.6467817164179104e-05, + "loss": 0.0001, + "step": 30292 + }, + { + "epoch": 28.26, + "learning_rate": 4.646735074626866e-05, + "loss": 0.0012, + "step": 30296 + }, + { + "epoch": 28.26, + "learning_rate": 4.6466884328358214e-05, + "loss": 0.0002, + "step": 30300 + }, + { + "epoch": 28.27, + "learning_rate": 4.646641791044776e-05, + "loss": 0.0018, + "step": 30304 + }, + { + "epoch": 28.27, + "learning_rate": 4.646595149253731e-05, + "loss": 0.0, + "step": 30308 + }, + { + "epoch": 28.28, + "learning_rate": 4.646548507462687e-05, + "loss": 0.0003, + "step": 30312 + }, + { + "epoch": 28.28, + "learning_rate": 4.646501865671642e-05, + "loss": 0.0001, + "step": 30316 + }, + { + "epoch": 28.28, + "learning_rate": 4.6464552238805975e-05, + "loss": 0.0003, + "step": 30320 + }, + { + "epoch": 28.29, + "learning_rate": 4.646408582089552e-05, + "loss": 0.0025, + "step": 30324 + }, + { + "epoch": 28.29, + "learning_rate": 4.646361940298508e-05, + "loss": 0.0001, + "step": 30328 + }, + { + "epoch": 28.29, + "learning_rate": 4.646315298507463e-05, + "loss": 0.0018, + "step": 30332 + }, + { + "epoch": 28.3, + "learning_rate": 4.646268656716418e-05, + "loss": 0.0011, + "step": 30336 + }, + { + "epoch": 28.3, + "learning_rate": 4.646222014925373e-05, + "loss": 0.0006, + "step": 30340 + }, + { + "epoch": 28.31, + "learning_rate": 4.646175373134329e-05, + "loss": 0.0001, + "step": 30344 + }, + { + "epoch": 28.31, + "learning_rate": 4.646128731343284e-05, + "loss": 0.0004, + "step": 30348 + }, + { + "epoch": 28.31, + "learning_rate": 4.646082089552239e-05, + "loss": 0.0002, + "step": 30352 + }, + { + "epoch": 28.32, + "learning_rate": 4.646035447761194e-05, + "loss": 0.0016, + "step": 30356 + }, + { + "epoch": 28.32, + "learning_rate": 4.64598880597015e-05, + "loss": 0.0006, + "step": 30360 + }, + { + "epoch": 28.32, + "learning_rate": 4.6459421641791045e-05, + "loss": 0.004, + "step": 30364 + }, + { + "epoch": 28.33, + "learning_rate": 4.64589552238806e-05, + "loss": 0.0001, + "step": 30368 + }, + { + "epoch": 28.33, + "learning_rate": 4.6458488805970155e-05, + "loss": 0.0001, + "step": 30372 + }, + { + "epoch": 28.34, + "learning_rate": 4.64580223880597e-05, + "loss": 0.0029, + "step": 30376 + }, + { + "epoch": 28.34, + "learning_rate": 4.645755597014926e-05, + "loss": 0.0078, + "step": 30380 + }, + { + "epoch": 28.34, + "learning_rate": 4.6457089552238806e-05, + "loss": 0.0001, + "step": 30384 + }, + { + "epoch": 28.35, + "learning_rate": 4.645662313432836e-05, + "loss": 0.001, + "step": 30388 + }, + { + "epoch": 28.35, + "learning_rate": 4.6456156716417916e-05, + "loss": 0.0031, + "step": 30392 + }, + { + "epoch": 28.35, + "learning_rate": 4.6455690298507464e-05, + "loss": 0.0004, + "step": 30396 + }, + { + "epoch": 28.36, + "learning_rate": 4.645522388059701e-05, + "loss": 0.0001, + "step": 30400 + }, + { + "epoch": 28.36, + "learning_rate": 4.6454757462686574e-05, + "loss": 0.0002, + "step": 30404 + }, + { + "epoch": 28.37, + "learning_rate": 4.645429104477612e-05, + "loss": 0.0005, + "step": 30408 + }, + { + "epoch": 28.37, + "learning_rate": 4.645382462686567e-05, + "loss": 0.0001, + "step": 30412 + }, + { + "epoch": 28.37, + "learning_rate": 4.6453358208955225e-05, + "loss": 0.0001, + "step": 30416 + }, + { + "epoch": 28.38, + "learning_rate": 4.645289179104478e-05, + "loss": 0.0012, + "step": 30420 + }, + { + "epoch": 28.38, + "learning_rate": 4.645242537313433e-05, + "loss": 0.0074, + "step": 30424 + }, + { + "epoch": 28.38, + "learning_rate": 4.645195895522388e-05, + "loss": 0.002, + "step": 30428 + }, + { + "epoch": 28.39, + "learning_rate": 4.645149253731344e-05, + "loss": 0.0008, + "step": 30432 + }, + { + "epoch": 28.39, + "learning_rate": 4.6451026119402986e-05, + "loss": 0.0001, + "step": 30436 + }, + { + "epoch": 28.4, + "learning_rate": 4.645055970149254e-05, + "loss": 0.004, + "step": 30440 + }, + { + "epoch": 28.4, + "learning_rate": 4.645009328358209e-05, + "loss": 0.0011, + "step": 30444 + }, + { + "epoch": 28.4, + "learning_rate": 4.6449626865671644e-05, + "loss": 0.0064, + "step": 30448 + }, + { + "epoch": 28.41, + "learning_rate": 4.64491604477612e-05, + "loss": 0.0002, + "step": 30452 + }, + { + "epoch": 28.41, + "learning_rate": 4.644869402985075e-05, + "loss": 0.0027, + "step": 30456 + }, + { + "epoch": 28.41, + "learning_rate": 4.6448227611940295e-05, + "loss": 0.0004, + "step": 30460 + }, + { + "epoch": 28.42, + "learning_rate": 4.644776119402986e-05, + "loss": 0.0001, + "step": 30464 + }, + { + "epoch": 28.42, + "learning_rate": 4.6447294776119405e-05, + "loss": 0.0006, + "step": 30468 + }, + { + "epoch": 28.43, + "learning_rate": 4.644682835820895e-05, + "loss": 0.0003, + "step": 30472 + }, + { + "epoch": 28.43, + "learning_rate": 4.644636194029851e-05, + "loss": 0.0026, + "step": 30476 + }, + { + "epoch": 28.43, + "learning_rate": 4.644589552238806e-05, + "loss": 0.0056, + "step": 30480 + }, + { + "epoch": 28.44, + "learning_rate": 4.644542910447762e-05, + "loss": 0.0002, + "step": 30484 + }, + { + "epoch": 28.44, + "learning_rate": 4.6444962686567166e-05, + "loss": 0.0002, + "step": 30488 + }, + { + "epoch": 28.44, + "learning_rate": 4.6444496268656714e-05, + "loss": 0.0001, + "step": 30492 + }, + { + "epoch": 28.45, + "learning_rate": 4.6444029850746276e-05, + "loss": 0.0002, + "step": 30496 + }, + { + "epoch": 28.45, + "learning_rate": 4.6443563432835824e-05, + "loss": 0.0003, + "step": 30500 + }, + { + "epoch": 28.45, + "eval_exact_match": 0.718568665377176, + "eval_exec": 0.7514506769825918, + "eval_loss": 0.3722231984138489, + "eval_runtime": 1165.7612, + "eval_samples_per_second": 0.887, + "step": 30500 + }, + { + "epoch": 28.46, + "learning_rate": 4.644309701492537e-05, + "loss": 0.0001, + "step": 30504 + }, + { + "epoch": 28.46, + "learning_rate": 4.644263059701493e-05, + "loss": 0.0008, + "step": 30508 + }, + { + "epoch": 28.46, + "learning_rate": 4.644216417910448e-05, + "loss": 0.0006, + "step": 30512 + }, + { + "epoch": 28.47, + "learning_rate": 4.644169776119403e-05, + "loss": 0.0001, + "step": 30516 + }, + { + "epoch": 28.47, + "learning_rate": 4.6441231343283585e-05, + "loss": 0.0001, + "step": 30520 + }, + { + "epoch": 28.47, + "learning_rate": 4.644076492537314e-05, + "loss": 0.0, + "step": 30524 + }, + { + "epoch": 28.48, + "learning_rate": 4.644029850746269e-05, + "loss": 0.0001, + "step": 30528 + }, + { + "epoch": 28.48, + "learning_rate": 4.643983208955224e-05, + "loss": 0.0025, + "step": 30532 + }, + { + "epoch": 28.49, + "learning_rate": 4.643936567164179e-05, + "loss": 0.0004, + "step": 30536 + }, + { + "epoch": 28.49, + "learning_rate": 4.6438899253731346e-05, + "loss": 0.0, + "step": 30540 + }, + { + "epoch": 28.49, + "learning_rate": 4.64384328358209e-05, + "loss": 0.0002, + "step": 30544 + }, + { + "epoch": 28.5, + "learning_rate": 4.643796641791045e-05, + "loss": 0.0001, + "step": 30548 + }, + { + "epoch": 28.5, + "learning_rate": 4.64375e-05, + "loss": 0.0001, + "step": 30552 + }, + { + "epoch": 28.5, + "learning_rate": 4.643703358208956e-05, + "loss": 0.0002, + "step": 30556 + }, + { + "epoch": 28.51, + "learning_rate": 4.643656716417911e-05, + "loss": 0.0001, + "step": 30560 + }, + { + "epoch": 28.51, + "learning_rate": 4.6436100746268655e-05, + "loss": 0.0009, + "step": 30564 + }, + { + "epoch": 28.51, + "learning_rate": 4.643563432835821e-05, + "loss": 0.0066, + "step": 30568 + }, + { + "epoch": 28.52, + "learning_rate": 4.6435167910447765e-05, + "loss": 0.0007, + "step": 30572 + }, + { + "epoch": 28.52, + "learning_rate": 4.643470149253731e-05, + "loss": 0.0004, + "step": 30576 + }, + { + "epoch": 28.53, + "learning_rate": 4.643423507462687e-05, + "loss": 0.0007, + "step": 30580 + }, + { + "epoch": 28.53, + "learning_rate": 4.643376865671642e-05, + "loss": 0.0012, + "step": 30584 + }, + { + "epoch": 28.53, + "learning_rate": 4.643330223880597e-05, + "loss": 0.0001, + "step": 30588 + }, + { + "epoch": 28.54, + "learning_rate": 4.6432835820895526e-05, + "loss": 0.0001, + "step": 30592 + }, + { + "epoch": 28.54, + "learning_rate": 4.6432369402985074e-05, + "loss": 0.004, + "step": 30596 + }, + { + "epoch": 28.54, + "learning_rate": 4.643190298507463e-05, + "loss": 0.0001, + "step": 30600 + }, + { + "epoch": 28.55, + "learning_rate": 4.6431436567164184e-05, + "loss": 0.0005, + "step": 30604 + }, + { + "epoch": 28.55, + "learning_rate": 4.643097014925373e-05, + "loss": 0.0011, + "step": 30608 + }, + { + "epoch": 28.56, + "learning_rate": 4.643050373134328e-05, + "loss": 0.0001, + "step": 30612 + }, + { + "epoch": 28.56, + "learning_rate": 4.643003731343284e-05, + "loss": 0.0005, + "step": 30616 + }, + { + "epoch": 28.56, + "learning_rate": 4.642957089552239e-05, + "loss": 0.0004, + "step": 30620 + }, + { + "epoch": 28.57, + "learning_rate": 4.642910447761194e-05, + "loss": 0.0, + "step": 30624 + }, + { + "epoch": 28.57, + "learning_rate": 4.642863805970149e-05, + "loss": 0.0001, + "step": 30628 + }, + { + "epoch": 28.57, + "learning_rate": 4.642817164179105e-05, + "loss": 0.0033, + "step": 30632 + }, + { + "epoch": 28.58, + "learning_rate": 4.6427705223880596e-05, + "loss": 0.0002, + "step": 30636 + }, + { + "epoch": 28.58, + "learning_rate": 4.642723880597015e-05, + "loss": 0.0046, + "step": 30640 + }, + { + "epoch": 28.59, + "learning_rate": 4.6426772388059706e-05, + "loss": 0.0022, + "step": 30644 + }, + { + "epoch": 28.59, + "learning_rate": 4.642630597014926e-05, + "loss": 0.0011, + "step": 30648 + }, + { + "epoch": 28.59, + "learning_rate": 4.642583955223881e-05, + "loss": 0.0047, + "step": 30652 + }, + { + "epoch": 28.6, + "learning_rate": 4.642537313432836e-05, + "loss": 0.0001, + "step": 30656 + }, + { + "epoch": 28.6, + "learning_rate": 4.642490671641791e-05, + "loss": 0.0002, + "step": 30660 + }, + { + "epoch": 28.6, + "learning_rate": 4.642444029850747e-05, + "loss": 0.0005, + "step": 30664 + }, + { + "epoch": 28.61, + "learning_rate": 4.6423973880597015e-05, + "loss": 0.0007, + "step": 30668 + }, + { + "epoch": 28.61, + "learning_rate": 4.642350746268657e-05, + "loss": 0.0027, + "step": 30672 + }, + { + "epoch": 28.62, + "learning_rate": 4.6423041044776125e-05, + "loss": 0.0001, + "step": 30676 + }, + { + "epoch": 28.62, + "learning_rate": 4.642257462686567e-05, + "loss": 0.0003, + "step": 30680 + }, + { + "epoch": 28.62, + "learning_rate": 4.642210820895523e-05, + "loss": 0.0053, + "step": 30684 + }, + { + "epoch": 28.63, + "learning_rate": 4.6421641791044776e-05, + "loss": 0.0001, + "step": 30688 + }, + { + "epoch": 28.63, + "learning_rate": 4.642117537313433e-05, + "loss": 0.0002, + "step": 30692 + }, + { + "epoch": 28.63, + "learning_rate": 4.6420708955223886e-05, + "loss": 0.0011, + "step": 30696 + }, + { + "epoch": 28.64, + "learning_rate": 4.6420242537313434e-05, + "loss": 0.0009, + "step": 30700 + }, + { + "epoch": 28.64, + "learning_rate": 4.641977611940299e-05, + "loss": 0.0007, + "step": 30704 + }, + { + "epoch": 28.65, + "learning_rate": 4.6419309701492544e-05, + "loss": 0.0011, + "step": 30708 + }, + { + "epoch": 28.65, + "learning_rate": 4.641884328358209e-05, + "loss": 0.0003, + "step": 30712 + }, + { + "epoch": 28.65, + "learning_rate": 4.641837686567164e-05, + "loss": 0.0016, + "step": 30716 + }, + { + "epoch": 28.66, + "learning_rate": 4.6417910447761195e-05, + "loss": 0.0002, + "step": 30720 + }, + { + "epoch": 28.66, + "learning_rate": 4.641744402985075e-05, + "loss": 0.0001, + "step": 30724 + }, + { + "epoch": 28.66, + "learning_rate": 4.64169776119403e-05, + "loss": 0.0003, + "step": 30728 + }, + { + "epoch": 28.67, + "learning_rate": 4.641651119402985e-05, + "loss": 0.0002, + "step": 30732 + }, + { + "epoch": 28.67, + "learning_rate": 4.641604477611941e-05, + "loss": 0.0003, + "step": 30736 + }, + { + "epoch": 28.68, + "learning_rate": 4.6415578358208956e-05, + "loss": 0.0001, + "step": 30740 + }, + { + "epoch": 28.68, + "learning_rate": 4.641511194029851e-05, + "loss": 0.0001, + "step": 30744 + }, + { + "epoch": 28.68, + "learning_rate": 4.641464552238806e-05, + "loss": 0.0, + "step": 30748 + }, + { + "epoch": 28.69, + "learning_rate": 4.6414179104477614e-05, + "loss": 0.0014, + "step": 30752 + }, + { + "epoch": 28.69, + "learning_rate": 4.641371268656717e-05, + "loss": 0.0002, + "step": 30756 + }, + { + "epoch": 28.69, + "learning_rate": 4.641324626865672e-05, + "loss": 0.0, + "step": 30760 + }, + { + "epoch": 28.7, + "learning_rate": 4.641277985074627e-05, + "loss": 0.0006, + "step": 30764 + }, + { + "epoch": 28.7, + "learning_rate": 4.6412313432835827e-05, + "loss": 0.0004, + "step": 30768 + }, + { + "epoch": 28.71, + "learning_rate": 4.6411847014925375e-05, + "loss": 0.0, + "step": 30772 + }, + { + "epoch": 28.71, + "learning_rate": 4.641138059701492e-05, + "loss": 0.0, + "step": 30776 + }, + { + "epoch": 28.71, + "learning_rate": 4.641091417910448e-05, + "loss": 0.0002, + "step": 30780 + }, + { + "epoch": 28.72, + "learning_rate": 4.641044776119403e-05, + "loss": 0.0029, + "step": 30784 + }, + { + "epoch": 28.72, + "learning_rate": 4.640998134328358e-05, + "loss": 0.0002, + "step": 30788 + }, + { + "epoch": 28.72, + "learning_rate": 4.6409514925373136e-05, + "loss": 0.0094, + "step": 30792 + }, + { + "epoch": 28.73, + "learning_rate": 4.640904850746269e-05, + "loss": 0.0011, + "step": 30796 + }, + { + "epoch": 28.73, + "learning_rate": 4.640858208955224e-05, + "loss": 0.0027, + "step": 30800 + }, + { + "epoch": 28.73, + "learning_rate": 4.6408115671641794e-05, + "loss": 0.0009, + "step": 30804 + }, + { + "epoch": 28.74, + "learning_rate": 4.640764925373134e-05, + "loss": 0.0007, + "step": 30808 + }, + { + "epoch": 28.74, + "learning_rate": 4.6407182835820903e-05, + "loss": 0.0001, + "step": 30812 + }, + { + "epoch": 28.75, + "learning_rate": 4.640671641791045e-05, + "loss": 0.0018, + "step": 30816 + }, + { + "epoch": 28.75, + "learning_rate": 4.640625e-05, + "loss": 0.0005, + "step": 30820 + }, + { + "epoch": 28.75, + "learning_rate": 4.6405783582089555e-05, + "loss": 0.0003, + "step": 30824 + }, + { + "epoch": 28.76, + "learning_rate": 4.640531716417911e-05, + "loss": 0.0003, + "step": 30828 + }, + { + "epoch": 28.76, + "learning_rate": 4.640485074626866e-05, + "loss": 0.0011, + "step": 30832 + }, + { + "epoch": 28.76, + "learning_rate": 4.640438432835821e-05, + "loss": 0.0047, + "step": 30836 + }, + { + "epoch": 28.77, + "learning_rate": 4.640391791044776e-05, + "loss": 0.0002, + "step": 30840 + }, + { + "epoch": 28.77, + "learning_rate": 4.6403451492537316e-05, + "loss": 0.0002, + "step": 30844 + }, + { + "epoch": 28.78, + "learning_rate": 4.640298507462687e-05, + "loss": 0.0, + "step": 30848 + }, + { + "epoch": 28.78, + "learning_rate": 4.640251865671642e-05, + "loss": 0.0, + "step": 30852 + }, + { + "epoch": 28.78, + "learning_rate": 4.6402052238805974e-05, + "loss": 0.0, + "step": 30856 + }, + { + "epoch": 28.79, + "learning_rate": 4.640158582089553e-05, + "loss": 0.0002, + "step": 30860 + }, + { + "epoch": 28.79, + "learning_rate": 4.640111940298508e-05, + "loss": 0.0031, + "step": 30864 + }, + { + "epoch": 28.79, + "learning_rate": 4.6400652985074625e-05, + "loss": 0.0001, + "step": 30868 + }, + { + "epoch": 28.8, + "learning_rate": 4.6400186567164186e-05, + "loss": 0.0001, + "step": 30872 + }, + { + "epoch": 28.8, + "learning_rate": 4.6399720149253735e-05, + "loss": 0.0007, + "step": 30876 + }, + { + "epoch": 28.81, + "learning_rate": 4.639925373134328e-05, + "loss": 0.0001, + "step": 30880 + }, + { + "epoch": 28.81, + "learning_rate": 4.639878731343284e-05, + "loss": 0.0004, + "step": 30884 + }, + { + "epoch": 28.81, + "learning_rate": 4.639832089552239e-05, + "loss": 0.0003, + "step": 30888 + }, + { + "epoch": 28.82, + "learning_rate": 4.639785447761194e-05, + "loss": 0.0042, + "step": 30892 + }, + { + "epoch": 28.82, + "learning_rate": 4.6397388059701496e-05, + "loss": 0.0001, + "step": 30896 + }, + { + "epoch": 28.82, + "learning_rate": 4.6396921641791044e-05, + "loss": 0.0, + "step": 30900 + }, + { + "epoch": 28.83, + "learning_rate": 4.63964552238806e-05, + "loss": 0.0001, + "step": 30904 + }, + { + "epoch": 28.83, + "learning_rate": 4.6395988805970154e-05, + "loss": 0.0, + "step": 30908 + }, + { + "epoch": 28.84, + "learning_rate": 4.63955223880597e-05, + "loss": 0.0007, + "step": 30912 + }, + { + "epoch": 28.84, + "learning_rate": 4.6395055970149257e-05, + "loss": 0.0009, + "step": 30916 + }, + { + "epoch": 28.84, + "learning_rate": 4.639458955223881e-05, + "loss": 0.0066, + "step": 30920 + }, + { + "epoch": 28.85, + "learning_rate": 4.639412313432836e-05, + "loss": 0.0005, + "step": 30924 + }, + { + "epoch": 28.85, + "learning_rate": 4.639365671641791e-05, + "loss": 0.0012, + "step": 30928 + }, + { + "epoch": 28.85, + "learning_rate": 4.639319029850747e-05, + "loss": 0.0002, + "step": 30932 + }, + { + "epoch": 28.86, + "learning_rate": 4.639272388059702e-05, + "loss": 0.0, + "step": 30936 + }, + { + "epoch": 28.86, + "learning_rate": 4.6392257462686566e-05, + "loss": 0.0003, + "step": 30940 + }, + { + "epoch": 28.87, + "learning_rate": 4.639179104477612e-05, + "loss": 0.0007, + "step": 30944 + }, + { + "epoch": 28.87, + "learning_rate": 4.6391324626865675e-05, + "loss": 0.0001, + "step": 30948 + }, + { + "epoch": 28.87, + "learning_rate": 4.6390858208955224e-05, + "loss": 0.0001, + "step": 30952 + }, + { + "epoch": 28.88, + "learning_rate": 4.639039179104478e-05, + "loss": 0.0002, + "step": 30956 + }, + { + "epoch": 28.88, + "learning_rate": 4.638992537313433e-05, + "loss": 0.0007, + "step": 30960 + }, + { + "epoch": 28.88, + "learning_rate": 4.638945895522388e-05, + "loss": 0.0002, + "step": 30964 + }, + { + "epoch": 28.89, + "learning_rate": 4.6388992537313436e-05, + "loss": 0.0039, + "step": 30968 + }, + { + "epoch": 28.89, + "learning_rate": 4.6388526119402985e-05, + "loss": 0.0, + "step": 30972 + }, + { + "epoch": 28.9, + "learning_rate": 4.638805970149254e-05, + "loss": 0.0006, + "step": 30976 + }, + { + "epoch": 28.9, + "learning_rate": 4.6387593283582094e-05, + "loss": 0.0, + "step": 30980 + }, + { + "epoch": 28.9, + "learning_rate": 4.638712686567164e-05, + "loss": 0.0018, + "step": 30984 + }, + { + "epoch": 28.91, + "learning_rate": 4.63866604477612e-05, + "loss": 0.0024, + "step": 30988 + }, + { + "epoch": 28.91, + "learning_rate": 4.638619402985075e-05, + "loss": 0.0023, + "step": 30992 + }, + { + "epoch": 28.91, + "learning_rate": 4.63857276119403e-05, + "loss": 0.001, + "step": 30996 + }, + { + "epoch": 28.92, + "learning_rate": 4.6385261194029855e-05, + "loss": 0.0003, + "step": 31000 + }, + { + "epoch": 28.92, + "eval_exact_match": 0.7263056092843327, + "eval_exec": 0.7649903288201161, + "eval_loss": 0.41026368737220764, + "eval_runtime": 1150.5617, + "eval_samples_per_second": 0.899, + "step": 31000 + }, + { + "epoch": 28.92, + "learning_rate": 4.6384794776119404e-05, + "loss": 0.0009, + "step": 31004 + }, + { + "epoch": 28.93, + "learning_rate": 4.638432835820896e-05, + "loss": 0.0017, + "step": 31008 + }, + { + "epoch": 28.93, + "learning_rate": 4.638386194029851e-05, + "loss": 0.0015, + "step": 31012 + }, + { + "epoch": 28.93, + "learning_rate": 4.638339552238806e-05, + "loss": 0.0, + "step": 31016 + }, + { + "epoch": 28.94, + "learning_rate": 4.638292910447761e-05, + "loss": 0.0016, + "step": 31020 + }, + { + "epoch": 28.94, + "learning_rate": 4.638246268656717e-05, + "loss": 0.0142, + "step": 31024 + }, + { + "epoch": 28.94, + "learning_rate": 4.638199626865672e-05, + "loss": 0.0027, + "step": 31028 + }, + { + "epoch": 28.95, + "learning_rate": 4.638152985074627e-05, + "loss": 0.0, + "step": 31032 + }, + { + "epoch": 28.95, + "learning_rate": 4.638106343283582e-05, + "loss": 0.0025, + "step": 31036 + }, + { + "epoch": 28.96, + "learning_rate": 4.638059701492538e-05, + "loss": 0.0004, + "step": 31040 + }, + { + "epoch": 28.96, + "learning_rate": 4.6380130597014925e-05, + "loss": 0.0007, + "step": 31044 + }, + { + "epoch": 28.96, + "learning_rate": 4.637966417910448e-05, + "loss": 0.0001, + "step": 31048 + }, + { + "epoch": 28.97, + "learning_rate": 4.6379197761194035e-05, + "loss": 0.0002, + "step": 31052 + }, + { + "epoch": 28.97, + "learning_rate": 4.6378731343283583e-05, + "loss": 0.0045, + "step": 31056 + }, + { + "epoch": 28.97, + "learning_rate": 4.637826492537314e-05, + "loss": 0.0011, + "step": 31060 + }, + { + "epoch": 28.98, + "learning_rate": 4.6377798507462686e-05, + "loss": 0.0001, + "step": 31064 + }, + { + "epoch": 28.98, + "learning_rate": 4.637733208955224e-05, + "loss": 0.0005, + "step": 31068 + }, + { + "epoch": 28.98, + "learning_rate": 4.6376865671641796e-05, + "loss": 0.0009, + "step": 31072 + }, + { + "epoch": 28.99, + "learning_rate": 4.6376399253731344e-05, + "loss": 0.0003, + "step": 31076 + }, + { + "epoch": 28.99, + "learning_rate": 4.637593283582089e-05, + "loss": 0.0017, + "step": 31080 + }, + { + "epoch": 29.0, + "learning_rate": 4.6375466417910454e-05, + "loss": 0.0004, + "step": 31084 + }, + { + "epoch": 29.0, + "learning_rate": 4.6375e-05, + "loss": 0.0047, + "step": 31088 + }, + { + "epoch": 29.0, + "learning_rate": 4.637453358208955e-05, + "loss": 0.0004, + "step": 31092 + }, + { + "epoch": 29.01, + "learning_rate": 4.6374067164179105e-05, + "loss": 0.0001, + "step": 31096 + }, + { + "epoch": 29.01, + "learning_rate": 4.637360074626866e-05, + "loss": 0.0001, + "step": 31100 + }, + { + "epoch": 29.01, + "learning_rate": 4.637313432835821e-05, + "loss": 0.0003, + "step": 31104 + }, + { + "epoch": 29.02, + "learning_rate": 4.637266791044776e-05, + "loss": 0.0004, + "step": 31108 + }, + { + "epoch": 29.02, + "learning_rate": 4.637220149253732e-05, + "loss": 0.0036, + "step": 31112 + }, + { + "epoch": 29.03, + "learning_rate": 4.6371735074626866e-05, + "loss": 0.0001, + "step": 31116 + }, + { + "epoch": 29.03, + "learning_rate": 4.637126865671642e-05, + "loss": 0.0001, + "step": 31120 + }, + { + "epoch": 29.03, + "learning_rate": 4.637080223880597e-05, + "loss": 0.0019, + "step": 31124 + }, + { + "epoch": 29.04, + "learning_rate": 4.6370335820895524e-05, + "loss": 0.0003, + "step": 31128 + }, + { + "epoch": 29.04, + "learning_rate": 4.636986940298508e-05, + "loss": 0.0018, + "step": 31132 + }, + { + "epoch": 29.04, + "learning_rate": 4.636940298507463e-05, + "loss": 0.0001, + "step": 31136 + }, + { + "epoch": 29.05, + "learning_rate": 4.636893656716418e-05, + "loss": 0.0005, + "step": 31140 + }, + { + "epoch": 29.05, + "learning_rate": 4.636847014925374e-05, + "loss": 0.0001, + "step": 31144 + }, + { + "epoch": 29.06, + "learning_rate": 4.6368003731343285e-05, + "loss": 0.0001, + "step": 31148 + }, + { + "epoch": 29.06, + "learning_rate": 4.636753731343284e-05, + "loss": 0.0, + "step": 31152 + }, + { + "epoch": 29.06, + "learning_rate": 4.636707089552239e-05, + "loss": 0.0, + "step": 31156 + }, + { + "epoch": 29.07, + "learning_rate": 4.636660447761194e-05, + "loss": 0.0001, + "step": 31160 + }, + { + "epoch": 29.07, + "learning_rate": 4.63661380597015e-05, + "loss": 0.0001, + "step": 31164 + }, + { + "epoch": 29.07, + "learning_rate": 4.6365671641791046e-05, + "loss": 0.0, + "step": 31168 + }, + { + "epoch": 29.08, + "learning_rate": 4.63652052238806e-05, + "loss": 0.0006, + "step": 31172 + }, + { + "epoch": 29.08, + "learning_rate": 4.6364738805970156e-05, + "loss": 0.0001, + "step": 31176 + }, + { + "epoch": 29.09, + "learning_rate": 4.6364272388059704e-05, + "loss": 0.0007, + "step": 31180 + }, + { + "epoch": 29.09, + "learning_rate": 4.636380597014925e-05, + "loss": 0.0053, + "step": 31184 + }, + { + "epoch": 29.09, + "learning_rate": 4.636333955223881e-05, + "loss": 0.0003, + "step": 31188 + }, + { + "epoch": 29.1, + "learning_rate": 4.636287313432836e-05, + "loss": 0.0001, + "step": 31192 + }, + { + "epoch": 29.1, + "learning_rate": 4.636240671641791e-05, + "loss": 0.0003, + "step": 31196 + }, + { + "epoch": 29.1, + "learning_rate": 4.6361940298507465e-05, + "loss": 0.0001, + "step": 31200 + }, + { + "epoch": 29.11, + "learning_rate": 4.636147388059702e-05, + "loss": 0.0007, + "step": 31204 + }, + { + "epoch": 29.11, + "learning_rate": 4.636100746268657e-05, + "loss": 0.0002, + "step": 31208 + }, + { + "epoch": 29.12, + "learning_rate": 4.636054104477612e-05, + "loss": 0.0013, + "step": 31212 + }, + { + "epoch": 29.12, + "learning_rate": 4.636007462686567e-05, + "loss": 0.0014, + "step": 31216 + }, + { + "epoch": 29.12, + "learning_rate": 4.6359608208955226e-05, + "loss": 0.0, + "step": 31220 + }, + { + "epoch": 29.13, + "learning_rate": 4.635914179104478e-05, + "loss": 0.0011, + "step": 31224 + }, + { + "epoch": 29.13, + "learning_rate": 4.635867537313433e-05, + "loss": 0.0001, + "step": 31228 + }, + { + "epoch": 29.13, + "learning_rate": 4.635820895522388e-05, + "loss": 0.0006, + "step": 31232 + }, + { + "epoch": 29.14, + "learning_rate": 4.635774253731344e-05, + "loss": 0.0071, + "step": 31236 + }, + { + "epoch": 29.14, + "learning_rate": 4.635727611940299e-05, + "loss": 0.0001, + "step": 31240 + }, + { + "epoch": 29.15, + "learning_rate": 4.6356809701492535e-05, + "loss": 0.0003, + "step": 31244 + }, + { + "epoch": 29.15, + "learning_rate": 4.635634328358209e-05, + "loss": 0.0029, + "step": 31248 + }, + { + "epoch": 29.15, + "learning_rate": 4.6355876865671645e-05, + "loss": 0.0001, + "step": 31252 + }, + { + "epoch": 29.16, + "learning_rate": 4.635541044776119e-05, + "loss": 0.0001, + "step": 31256 + }, + { + "epoch": 29.16, + "learning_rate": 4.635494402985075e-05, + "loss": 0.0004, + "step": 31260 + }, + { + "epoch": 29.16, + "learning_rate": 4.63544776119403e-05, + "loss": 0.0, + "step": 31264 + }, + { + "epoch": 29.17, + "learning_rate": 4.635401119402985e-05, + "loss": 0.0007, + "step": 31268 + }, + { + "epoch": 29.17, + "learning_rate": 4.6353544776119406e-05, + "loss": 0.0059, + "step": 31272 + }, + { + "epoch": 29.18, + "learning_rate": 4.6353078358208954e-05, + "loss": 0.0023, + "step": 31276 + }, + { + "epoch": 29.18, + "learning_rate": 4.635261194029851e-05, + "loss": 0.0004, + "step": 31280 + }, + { + "epoch": 29.18, + "learning_rate": 4.6352145522388064e-05, + "loss": 0.0004, + "step": 31284 + }, + { + "epoch": 29.19, + "learning_rate": 4.635167910447761e-05, + "loss": 0.0006, + "step": 31288 + }, + { + "epoch": 29.19, + "learning_rate": 4.635121268656716e-05, + "loss": 0.0001, + "step": 31292 + }, + { + "epoch": 29.19, + "learning_rate": 4.635074626865672e-05, + "loss": 0.0001, + "step": 31296 + }, + { + "epoch": 29.2, + "learning_rate": 4.635027985074627e-05, + "loss": 0.0003, + "step": 31300 + }, + { + "epoch": 29.2, + "learning_rate": 4.6349813432835825e-05, + "loss": 0.0, + "step": 31304 + }, + { + "epoch": 29.21, + "learning_rate": 4.634934701492537e-05, + "loss": 0.0007, + "step": 31308 + }, + { + "epoch": 29.21, + "learning_rate": 4.634888059701493e-05, + "loss": 0.0015, + "step": 31312 + }, + { + "epoch": 29.21, + "learning_rate": 4.634841417910448e-05, + "loss": 0.0005, + "step": 31316 + }, + { + "epoch": 29.22, + "learning_rate": 4.634794776119403e-05, + "loss": 0.0006, + "step": 31320 + }, + { + "epoch": 29.22, + "learning_rate": 4.6347481343283586e-05, + "loss": 0.0005, + "step": 31324 + }, + { + "epoch": 29.22, + "learning_rate": 4.634701492537314e-05, + "loss": 0.0001, + "step": 31328 + }, + { + "epoch": 29.23, + "learning_rate": 4.634654850746269e-05, + "loss": 0.0018, + "step": 31332 + }, + { + "epoch": 29.23, + "learning_rate": 4.634608208955224e-05, + "loss": 0.0002, + "step": 31336 + }, + { + "epoch": 29.24, + "learning_rate": 4.634561567164179e-05, + "loss": 0.0, + "step": 31340 + }, + { + "epoch": 29.24, + "learning_rate": 4.634514925373135e-05, + "loss": 0.0001, + "step": 31344 + }, + { + "epoch": 29.24, + "learning_rate": 4.6344682835820895e-05, + "loss": 0.0, + "step": 31348 + }, + { + "epoch": 29.25, + "learning_rate": 4.634421641791045e-05, + "loss": 0.0, + "step": 31352 + }, + { + "epoch": 29.25, + "learning_rate": 4.6343750000000005e-05, + "loss": 0.0006, + "step": 31356 + }, + { + "epoch": 29.25, + "learning_rate": 4.634328358208955e-05, + "loss": 0.0002, + "step": 31360 + }, + { + "epoch": 29.26, + "learning_rate": 4.634281716417911e-05, + "loss": 0.0002, + "step": 31364 + }, + { + "epoch": 29.26, + "learning_rate": 4.6342350746268656e-05, + "loss": 0.0097, + "step": 31368 + }, + { + "epoch": 29.26, + "learning_rate": 4.634188432835821e-05, + "loss": 0.0, + "step": 31372 + }, + { + "epoch": 29.27, + "learning_rate": 4.6341417910447766e-05, + "loss": 0.0008, + "step": 31376 + }, + { + "epoch": 29.27, + "learning_rate": 4.6340951492537314e-05, + "loss": 0.0001, + "step": 31380 + }, + { + "epoch": 29.28, + "learning_rate": 4.634048507462687e-05, + "loss": 0.0015, + "step": 31384 + }, + { + "epoch": 29.28, + "learning_rate": 4.6340018656716424e-05, + "loss": 0.0018, + "step": 31388 + }, + { + "epoch": 29.28, + "learning_rate": 4.633955223880597e-05, + "loss": 0.0008, + "step": 31392 + }, + { + "epoch": 29.29, + "learning_rate": 4.633908582089552e-05, + "loss": 0.0001, + "step": 31396 + }, + { + "epoch": 29.29, + "learning_rate": 4.6338619402985075e-05, + "loss": 0.0008, + "step": 31400 + }, + { + "epoch": 29.29, + "learning_rate": 4.633815298507463e-05, + "loss": 0.0, + "step": 31404 + }, + { + "epoch": 29.3, + "learning_rate": 4.633768656716418e-05, + "loss": 0.0063, + "step": 31408 + }, + { + "epoch": 29.3, + "learning_rate": 4.633722014925373e-05, + "loss": 0.0001, + "step": 31412 + }, + { + "epoch": 29.31, + "learning_rate": 4.633675373134329e-05, + "loss": 0.0002, + "step": 31416 + }, + { + "epoch": 29.31, + "learning_rate": 4.6336287313432836e-05, + "loss": 0.0, + "step": 31420 + }, + { + "epoch": 29.31, + "learning_rate": 4.633582089552239e-05, + "loss": 0.0003, + "step": 31424 + }, + { + "epoch": 29.32, + "learning_rate": 4.633535447761194e-05, + "loss": 0.0007, + "step": 31428 + }, + { + "epoch": 29.32, + "learning_rate": 4.6334888059701494e-05, + "loss": 0.0002, + "step": 31432 + }, + { + "epoch": 29.32, + "learning_rate": 4.633442164179105e-05, + "loss": 0.0011, + "step": 31436 + }, + { + "epoch": 29.33, + "learning_rate": 4.63339552238806e-05, + "loss": 0.0022, + "step": 31440 + }, + { + "epoch": 29.33, + "learning_rate": 4.633348880597015e-05, + "loss": 0.0, + "step": 31444 + }, + { + "epoch": 29.34, + "learning_rate": 4.633302238805971e-05, + "loss": 0.0002, + "step": 31448 + }, + { + "epoch": 29.34, + "learning_rate": 4.6332555970149255e-05, + "loss": 0.0003, + "step": 31452 + }, + { + "epoch": 29.34, + "learning_rate": 4.63320895522388e-05, + "loss": 0.0002, + "step": 31456 + }, + { + "epoch": 29.35, + "learning_rate": 4.633162313432836e-05, + "loss": 0.0002, + "step": 31460 + }, + { + "epoch": 29.35, + "learning_rate": 4.633115671641791e-05, + "loss": 0.0003, + "step": 31464 + }, + { + "epoch": 29.35, + "learning_rate": 4.633069029850747e-05, + "loss": 0.0003, + "step": 31468 + }, + { + "epoch": 29.36, + "learning_rate": 4.6330223880597016e-05, + "loss": 0.0035, + "step": 31472 + }, + { + "epoch": 29.36, + "learning_rate": 4.632975746268657e-05, + "loss": 0.0011, + "step": 31476 + }, + { + "epoch": 29.37, + "learning_rate": 4.6329291044776126e-05, + "loss": 0.0001, + "step": 31480 + }, + { + "epoch": 29.37, + "learning_rate": 4.6328824626865674e-05, + "loss": 0.0002, + "step": 31484 + }, + { + "epoch": 29.37, + "learning_rate": 4.632835820895522e-05, + "loss": 0.0005, + "step": 31488 + }, + { + "epoch": 29.38, + "learning_rate": 4.6327891791044784e-05, + "loss": 0.0004, + "step": 31492 + }, + { + "epoch": 29.38, + "learning_rate": 4.632742537313433e-05, + "loss": 0.0008, + "step": 31496 + }, + { + "epoch": 29.38, + "learning_rate": 4.632695895522388e-05, + "loss": 0.0002, + "step": 31500 + }, + { + "epoch": 29.38, + "eval_exact_match": 0.741779497098646, + "eval_exec": 0.7746615087040619, + "eval_loss": 0.42285799980163574, + "eval_runtime": 1077.7296, + "eval_samples_per_second": 0.959, + "step": 31500 + }, + { + "epoch": 29.39, + "learning_rate": 4.6326492537313435e-05, + "loss": 0.0001, + "step": 31504 + }, + { + "epoch": 29.39, + "learning_rate": 4.632602611940299e-05, + "loss": 0.0, + "step": 31508 + }, + { + "epoch": 29.4, + "learning_rate": 4.632555970149254e-05, + "loss": 0.0004, + "step": 31512 + }, + { + "epoch": 29.4, + "learning_rate": 4.632509328358209e-05, + "loss": 0.0, + "step": 31516 + }, + { + "epoch": 29.4, + "learning_rate": 4.632462686567164e-05, + "loss": 0.0014, + "step": 31520 + }, + { + "epoch": 29.41, + "learning_rate": 4.6324160447761196e-05, + "loss": 0.037, + "step": 31524 + }, + { + "epoch": 29.41, + "learning_rate": 4.632369402985075e-05, + "loss": 0.004, + "step": 31528 + }, + { + "epoch": 29.41, + "learning_rate": 4.63232276119403e-05, + "loss": 0.0, + "step": 31532 + }, + { + "epoch": 29.42, + "learning_rate": 4.6322761194029854e-05, + "loss": 0.0001, + "step": 31536 + }, + { + "epoch": 29.42, + "learning_rate": 4.632229477611941e-05, + "loss": 0.0005, + "step": 31540 + }, + { + "epoch": 29.43, + "learning_rate": 4.632182835820896e-05, + "loss": 0.0022, + "step": 31544 + }, + { + "epoch": 29.43, + "learning_rate": 4.6321361940298505e-05, + "loss": 0.0001, + "step": 31548 + }, + { + "epoch": 29.43, + "learning_rate": 4.632089552238807e-05, + "loss": 0.001, + "step": 31552 + }, + { + "epoch": 29.44, + "learning_rate": 4.6320429104477615e-05, + "loss": 0.001, + "step": 31556 + }, + { + "epoch": 29.44, + "learning_rate": 4.631996268656716e-05, + "loss": 0.0, + "step": 31560 + }, + { + "epoch": 29.44, + "learning_rate": 4.631949626865672e-05, + "loss": 0.0012, + "step": 31564 + }, + { + "epoch": 29.45, + "learning_rate": 4.631902985074627e-05, + "loss": 0.0011, + "step": 31568 + }, + { + "epoch": 29.45, + "learning_rate": 4.631856343283582e-05, + "loss": 0.0005, + "step": 31572 + }, + { + "epoch": 29.46, + "learning_rate": 4.6318097014925376e-05, + "loss": 0.0018, + "step": 31576 + }, + { + "epoch": 29.46, + "learning_rate": 4.6317630597014924e-05, + "loss": 0.0, + "step": 31580 + }, + { + "epoch": 29.46, + "learning_rate": 4.631716417910448e-05, + "loss": 0.0001, + "step": 31584 + }, + { + "epoch": 29.47, + "learning_rate": 4.6316697761194034e-05, + "loss": 0.002, + "step": 31588 + }, + { + "epoch": 29.47, + "learning_rate": 4.631623134328358e-05, + "loss": 0.0004, + "step": 31592 + }, + { + "epoch": 29.47, + "learning_rate": 4.631576492537314e-05, + "loss": 0.0172, + "step": 31596 + }, + { + "epoch": 29.48, + "learning_rate": 4.631529850746269e-05, + "loss": 0.0001, + "step": 31600 + }, + { + "epoch": 29.48, + "learning_rate": 4.631483208955224e-05, + "loss": 0.0021, + "step": 31604 + }, + { + "epoch": 29.49, + "learning_rate": 4.631436567164179e-05, + "loss": 0.0003, + "step": 31608 + }, + { + "epoch": 29.49, + "learning_rate": 4.631389925373135e-05, + "loss": 0.0005, + "step": 31612 + }, + { + "epoch": 29.49, + "learning_rate": 4.63134328358209e-05, + "loss": 0.0004, + "step": 31616 + }, + { + "epoch": 29.5, + "learning_rate": 4.6312966417910446e-05, + "loss": 0.0002, + "step": 31620 + }, + { + "epoch": 29.5, + "learning_rate": 4.63125e-05, + "loss": 0.0, + "step": 31624 + }, + { + "epoch": 29.5, + "learning_rate": 4.6312033582089556e-05, + "loss": 0.0007, + "step": 31628 + }, + { + "epoch": 29.51, + "learning_rate": 4.631156716417911e-05, + "loss": 0.0001, + "step": 31632 + }, + { + "epoch": 29.51, + "learning_rate": 4.631110074626866e-05, + "loss": 0.0015, + "step": 31636 + }, + { + "epoch": 29.51, + "learning_rate": 4.631063432835821e-05, + "loss": 0.0001, + "step": 31640 + }, + { + "epoch": 29.52, + "learning_rate": 4.631016791044777e-05, + "loss": 0.0019, + "step": 31644 + }, + { + "epoch": 29.52, + "learning_rate": 4.630970149253732e-05, + "loss": 0.0003, + "step": 31648 + }, + { + "epoch": 29.53, + "learning_rate": 4.6309235074626865e-05, + "loss": 0.0, + "step": 31652 + }, + { + "epoch": 29.53, + "learning_rate": 4.630876865671642e-05, + "loss": 0.0235, + "step": 31656 + }, + { + "epoch": 29.53, + "learning_rate": 4.6308302238805975e-05, + "loss": 0.0001, + "step": 31660 + }, + { + "epoch": 29.54, + "learning_rate": 4.630783582089552e-05, + "loss": 0.0015, + "step": 31664 + }, + { + "epoch": 29.54, + "learning_rate": 4.630736940298508e-05, + "loss": 0.0002, + "step": 31668 + }, + { + "epoch": 29.54, + "learning_rate": 4.630690298507463e-05, + "loss": 0.0001, + "step": 31672 + }, + { + "epoch": 29.55, + "learning_rate": 4.630643656716418e-05, + "loss": 0.0002, + "step": 31676 + }, + { + "epoch": 29.55, + "learning_rate": 4.6305970149253736e-05, + "loss": 0.0, + "step": 31680 + }, + { + "epoch": 29.56, + "learning_rate": 4.6305503731343284e-05, + "loss": 0.0001, + "step": 31684 + }, + { + "epoch": 29.56, + "learning_rate": 4.630503731343284e-05, + "loss": 0.0003, + "step": 31688 + }, + { + "epoch": 29.56, + "learning_rate": 4.6304570895522394e-05, + "loss": 0.0, + "step": 31692 + }, + { + "epoch": 29.57, + "learning_rate": 4.630410447761194e-05, + "loss": 0.0001, + "step": 31696 + }, + { + "epoch": 29.57, + "learning_rate": 4.630363805970149e-05, + "loss": 0.0002, + "step": 31700 + }, + { + "epoch": 29.57, + "learning_rate": 4.630317164179105e-05, + "loss": 0.0009, + "step": 31704 + }, + { + "epoch": 29.58, + "learning_rate": 4.63027052238806e-05, + "loss": 0.0001, + "step": 31708 + }, + { + "epoch": 29.58, + "learning_rate": 4.630223880597015e-05, + "loss": 0.0001, + "step": 31712 + }, + { + "epoch": 29.59, + "learning_rate": 4.63017723880597e-05, + "loss": 0.0, + "step": 31716 + }, + { + "epoch": 29.59, + "learning_rate": 4.630130597014926e-05, + "loss": 0.0001, + "step": 31720 + }, + { + "epoch": 29.59, + "learning_rate": 4.6300839552238806e-05, + "loss": 0.0023, + "step": 31724 + }, + { + "epoch": 29.6, + "learning_rate": 4.630037313432836e-05, + "loss": 0.001, + "step": 31728 + }, + { + "epoch": 29.6, + "learning_rate": 4.6299906716417916e-05, + "loss": 0.0001, + "step": 31732 + }, + { + "epoch": 29.6, + "learning_rate": 4.6299440298507464e-05, + "loss": 0.0034, + "step": 31736 + }, + { + "epoch": 29.61, + "learning_rate": 4.629897388059702e-05, + "loss": 0.0007, + "step": 31740 + }, + { + "epoch": 29.61, + "learning_rate": 4.629850746268657e-05, + "loss": 0.0002, + "step": 31744 + }, + { + "epoch": 29.62, + "learning_rate": 4.629804104477612e-05, + "loss": 0.0001, + "step": 31748 + }, + { + "epoch": 29.62, + "learning_rate": 4.6297574626865677e-05, + "loss": 0.0007, + "step": 31752 + }, + { + "epoch": 29.62, + "learning_rate": 4.6297108208955225e-05, + "loss": 0.0001, + "step": 31756 + }, + { + "epoch": 29.63, + "learning_rate": 4.629664179104477e-05, + "loss": 0.0028, + "step": 31760 + }, + { + "epoch": 29.63, + "learning_rate": 4.6296175373134335e-05, + "loss": 0.0001, + "step": 31764 + }, + { + "epoch": 29.63, + "learning_rate": 4.629570895522388e-05, + "loss": 0.0111, + "step": 31768 + }, + { + "epoch": 29.64, + "learning_rate": 4.629524253731343e-05, + "loss": 0.0001, + "step": 31772 + }, + { + "epoch": 29.64, + "learning_rate": 4.6294776119402986e-05, + "loss": 0.0001, + "step": 31776 + }, + { + "epoch": 29.65, + "learning_rate": 4.629430970149254e-05, + "loss": 0.0013, + "step": 31780 + }, + { + "epoch": 29.65, + "learning_rate": 4.629384328358209e-05, + "loss": 0.0004, + "step": 31784 + }, + { + "epoch": 29.65, + "learning_rate": 4.6293376865671644e-05, + "loss": 0.0011, + "step": 31788 + }, + { + "epoch": 29.66, + "learning_rate": 4.62929104477612e-05, + "loss": 0.0001, + "step": 31792 + }, + { + "epoch": 29.66, + "learning_rate": 4.6292444029850753e-05, + "loss": 0.0015, + "step": 31796 + }, + { + "epoch": 29.66, + "learning_rate": 4.62919776119403e-05, + "loss": 0.001, + "step": 31800 + }, + { + "epoch": 29.67, + "learning_rate": 4.629151119402985e-05, + "loss": 0.0008, + "step": 31804 + }, + { + "epoch": 29.67, + "learning_rate": 4.6291044776119405e-05, + "loss": 0.0002, + "step": 31808 + }, + { + "epoch": 29.68, + "learning_rate": 4.629057835820896e-05, + "loss": 0.0002, + "step": 31812 + }, + { + "epoch": 29.68, + "learning_rate": 4.629011194029851e-05, + "loss": 0.0117, + "step": 31816 + }, + { + "epoch": 29.68, + "learning_rate": 4.628964552238806e-05, + "loss": 0.0005, + "step": 31820 + }, + { + "epoch": 29.69, + "learning_rate": 4.628917910447762e-05, + "loss": 0.0025, + "step": 31824 + }, + { + "epoch": 29.69, + "learning_rate": 4.6288712686567166e-05, + "loss": 0.0005, + "step": 31828 + }, + { + "epoch": 29.69, + "learning_rate": 4.628824626865672e-05, + "loss": 0.003, + "step": 31832 + }, + { + "epoch": 29.7, + "learning_rate": 4.628777985074627e-05, + "loss": 0.0, + "step": 31836 + }, + { + "epoch": 29.7, + "learning_rate": 4.6287313432835824e-05, + "loss": 0.0002, + "step": 31840 + }, + { + "epoch": 29.71, + "learning_rate": 4.628684701492538e-05, + "loss": 0.0016, + "step": 31844 + }, + { + "epoch": 29.71, + "learning_rate": 4.6286380597014927e-05, + "loss": 0.0002, + "step": 31848 + }, + { + "epoch": 29.71, + "learning_rate": 4.628591417910448e-05, + "loss": 0.0002, + "step": 31852 + }, + { + "epoch": 29.72, + "learning_rate": 4.6285447761194036e-05, + "loss": 0.0002, + "step": 31856 + }, + { + "epoch": 29.72, + "learning_rate": 4.6284981343283585e-05, + "loss": 0.0003, + "step": 31860 + }, + { + "epoch": 29.72, + "learning_rate": 4.628451492537313e-05, + "loss": 0.002, + "step": 31864 + }, + { + "epoch": 29.73, + "learning_rate": 4.628404850746269e-05, + "loss": 0.0025, + "step": 31868 + }, + { + "epoch": 29.73, + "learning_rate": 4.628358208955224e-05, + "loss": 0.0001, + "step": 31872 + }, + { + "epoch": 29.73, + "learning_rate": 4.628311567164179e-05, + "loss": 0.0001, + "step": 31876 + }, + { + "epoch": 29.74, + "learning_rate": 4.6282649253731346e-05, + "loss": 0.0001, + "step": 31880 + }, + { + "epoch": 29.74, + "learning_rate": 4.62821828358209e-05, + "loss": 0.0002, + "step": 31884 + }, + { + "epoch": 29.75, + "learning_rate": 4.628171641791045e-05, + "loss": 0.0001, + "step": 31888 + }, + { + "epoch": 29.75, + "learning_rate": 4.6281250000000003e-05, + "loss": 0.0004, + "step": 31892 + }, + { + "epoch": 29.75, + "learning_rate": 4.628078358208955e-05, + "loss": 0.0013, + "step": 31896 + }, + { + "epoch": 29.76, + "learning_rate": 4.6280317164179107e-05, + "loss": 0.0084, + "step": 31900 + }, + { + "epoch": 29.76, + "learning_rate": 4.627985074626866e-05, + "loss": 0.002, + "step": 31904 + }, + { + "epoch": 29.76, + "learning_rate": 4.627938432835821e-05, + "loss": 0.0001, + "step": 31908 + }, + { + "epoch": 29.77, + "learning_rate": 4.627891791044776e-05, + "loss": 0.0001, + "step": 31912 + }, + { + "epoch": 29.77, + "learning_rate": 4.627845149253732e-05, + "loss": 0.0007, + "step": 31916 + }, + { + "epoch": 29.78, + "learning_rate": 4.627798507462687e-05, + "loss": 0.0002, + "step": 31920 + }, + { + "epoch": 29.78, + "learning_rate": 4.6277518656716416e-05, + "loss": 0.0034, + "step": 31924 + }, + { + "epoch": 29.78, + "learning_rate": 4.627705223880597e-05, + "loss": 0.0002, + "step": 31928 + }, + { + "epoch": 29.79, + "learning_rate": 4.6276585820895525e-05, + "loss": 0.0013, + "step": 31932 + }, + { + "epoch": 29.79, + "learning_rate": 4.6276119402985074e-05, + "loss": 0.0002, + "step": 31936 + }, + { + "epoch": 29.79, + "learning_rate": 4.627565298507463e-05, + "loss": 0.0, + "step": 31940 + }, + { + "epoch": 29.8, + "learning_rate": 4.6275186567164183e-05, + "loss": 0.0003, + "step": 31944 + }, + { + "epoch": 29.8, + "learning_rate": 4.627472014925374e-05, + "loss": 0.0001, + "step": 31948 + }, + { + "epoch": 29.81, + "learning_rate": 4.6274253731343286e-05, + "loss": 0.0002, + "step": 31952 + }, + { + "epoch": 29.81, + "learning_rate": 4.6273787313432835e-05, + "loss": 0.0003, + "step": 31956 + }, + { + "epoch": 29.81, + "learning_rate": 4.6273320895522396e-05, + "loss": 0.001, + "step": 31960 + }, + { + "epoch": 29.82, + "learning_rate": 4.6272854477611944e-05, + "loss": 0.0, + "step": 31964 + }, + { + "epoch": 29.82, + "learning_rate": 4.627238805970149e-05, + "loss": 0.0015, + "step": 31968 + }, + { + "epoch": 29.82, + "learning_rate": 4.627192164179105e-05, + "loss": 0.001, + "step": 31972 + }, + { + "epoch": 29.83, + "learning_rate": 4.62714552238806e-05, + "loss": 0.0001, + "step": 31976 + }, + { + "epoch": 29.83, + "learning_rate": 4.627098880597015e-05, + "loss": 0.0011, + "step": 31980 + }, + { + "epoch": 29.84, + "learning_rate": 4.6270522388059705e-05, + "loss": 0.0006, + "step": 31984 + }, + { + "epoch": 29.84, + "learning_rate": 4.6270055970149254e-05, + "loss": 0.0002, + "step": 31988 + }, + { + "epoch": 29.84, + "learning_rate": 4.626958955223881e-05, + "loss": 0.0014, + "step": 31992 + }, + { + "epoch": 29.85, + "learning_rate": 4.626912313432836e-05, + "loss": 0.0007, + "step": 31996 + }, + { + "epoch": 29.85, + "learning_rate": 4.626865671641791e-05, + "loss": 0.0037, + "step": 32000 + }, + { + "epoch": 29.85, + "eval_exact_match": 0.7437137330754352, + "eval_exec": 0.7717601547388782, + "eval_loss": 0.40884315967559814, + "eval_runtime": 1094.761, + "eval_samples_per_second": 0.944, + "step": 32000 + }, + { + "epoch": 29.85, + "learning_rate": 4.6268190298507466e-05, + "loss": 0.0, + "step": 32004 + }, + { + "epoch": 29.86, + "learning_rate": 4.626772388059702e-05, + "loss": 0.0149, + "step": 32008 + }, + { + "epoch": 29.86, + "learning_rate": 4.626725746268657e-05, + "loss": 0.0004, + "step": 32012 + }, + { + "epoch": 29.87, + "learning_rate": 4.626679104477612e-05, + "loss": 0.0004, + "step": 32016 + }, + { + "epoch": 29.87, + "learning_rate": 4.626632462686567e-05, + "loss": 0.0001, + "step": 32020 + }, + { + "epoch": 29.87, + "learning_rate": 4.626585820895523e-05, + "loss": 0.0001, + "step": 32024 + }, + { + "epoch": 29.88, + "learning_rate": 4.6265391791044775e-05, + "loss": 0.0, + "step": 32028 + }, + { + "epoch": 29.88, + "learning_rate": 4.626492537313433e-05, + "loss": 0.0046, + "step": 32032 + }, + { + "epoch": 29.88, + "learning_rate": 4.6264458955223885e-05, + "loss": 0.0023, + "step": 32036 + }, + { + "epoch": 29.89, + "learning_rate": 4.6263992537313433e-05, + "loss": 0.0013, + "step": 32040 + }, + { + "epoch": 29.89, + "learning_rate": 4.626352611940299e-05, + "loss": 0.0003, + "step": 32044 + }, + { + "epoch": 29.9, + "learning_rate": 4.6263059701492536e-05, + "loss": 0.0, + "step": 32048 + }, + { + "epoch": 29.9, + "learning_rate": 4.626259328358209e-05, + "loss": 0.0001, + "step": 32052 + }, + { + "epoch": 29.9, + "learning_rate": 4.6262126865671646e-05, + "loss": 0.0001, + "step": 32056 + }, + { + "epoch": 29.91, + "learning_rate": 4.6261660447761194e-05, + "loss": 0.0001, + "step": 32060 + }, + { + "epoch": 29.91, + "learning_rate": 4.626119402985075e-05, + "loss": 0.0023, + "step": 32064 + }, + { + "epoch": 29.91, + "learning_rate": 4.6260727611940304e-05, + "loss": 0.0001, + "step": 32068 + }, + { + "epoch": 29.92, + "learning_rate": 4.626026119402985e-05, + "loss": 0.0004, + "step": 32072 + }, + { + "epoch": 29.92, + "learning_rate": 4.62597947761194e-05, + "loss": 0.0009, + "step": 32076 + }, + { + "epoch": 29.93, + "learning_rate": 4.6259328358208955e-05, + "loss": 0.0062, + "step": 32080 + }, + { + "epoch": 29.93, + "learning_rate": 4.625886194029851e-05, + "loss": 0.002, + "step": 32084 + }, + { + "epoch": 29.93, + "learning_rate": 4.625839552238806e-05, + "loss": 0.0001, + "step": 32088 + }, + { + "epoch": 29.94, + "learning_rate": 4.625792910447761e-05, + "loss": 0.0055, + "step": 32092 + }, + { + "epoch": 29.94, + "learning_rate": 4.625746268656717e-05, + "loss": 0.0002, + "step": 32096 + }, + { + "epoch": 29.94, + "learning_rate": 4.6256996268656716e-05, + "loss": 0.0003, + "step": 32100 + }, + { + "epoch": 29.95, + "learning_rate": 4.625652985074627e-05, + "loss": 0.0006, + "step": 32104 + }, + { + "epoch": 29.95, + "learning_rate": 4.625606343283582e-05, + "loss": 0.0005, + "step": 32108 + }, + { + "epoch": 29.96, + "learning_rate": 4.625559701492538e-05, + "loss": 0.0001, + "step": 32112 + }, + { + "epoch": 29.96, + "learning_rate": 4.625513059701493e-05, + "loss": 0.0001, + "step": 32116 + }, + { + "epoch": 29.96, + "learning_rate": 4.625466417910448e-05, + "loss": 0.0003, + "step": 32120 + }, + { + "epoch": 29.97, + "learning_rate": 4.625419776119403e-05, + "loss": 0.0014, + "step": 32124 + }, + { + "epoch": 29.97, + "learning_rate": 4.625373134328359e-05, + "loss": 0.0, + "step": 32128 + }, + { + "epoch": 29.97, + "learning_rate": 4.6253264925373135e-05, + "loss": 0.0021, + "step": 32132 + }, + { + "epoch": 29.98, + "learning_rate": 4.625279850746269e-05, + "loss": 0.0006, + "step": 32136 + }, + { + "epoch": 29.98, + "learning_rate": 4.625233208955224e-05, + "loss": 0.0003, + "step": 32140 + }, + { + "epoch": 29.98, + "learning_rate": 4.625186567164179e-05, + "loss": 0.0011, + "step": 32144 + }, + { + "epoch": 29.99, + "learning_rate": 4.625139925373135e-05, + "loss": 0.0005, + "step": 32148 + }, + { + "epoch": 29.99, + "learning_rate": 4.6250932835820896e-05, + "loss": 0.0001, + "step": 32152 + }, + { + "epoch": 30.0, + "learning_rate": 4.625046641791045e-05, + "loss": 0.0002, + "step": 32156 + }, + { + "epoch": 30.0, + "learning_rate": 4.6250000000000006e-05, + "loss": 0.0002, + "step": 32160 + }, + { + "epoch": 30.0, + "learning_rate": 4.6249533582089554e-05, + "loss": 0.0002, + "step": 32164 + }, + { + "epoch": 30.01, + "learning_rate": 4.62490671641791e-05, + "loss": 0.0005, + "step": 32168 + }, + { + "epoch": 30.01, + "learning_rate": 4.6248600746268664e-05, + "loss": 0.0006, + "step": 32172 + }, + { + "epoch": 30.01, + "learning_rate": 4.624813432835821e-05, + "loss": 0.0003, + "step": 32176 + }, + { + "epoch": 30.02, + "learning_rate": 4.624766791044776e-05, + "loss": 0.0139, + "step": 32180 + }, + { + "epoch": 30.02, + "learning_rate": 4.6247201492537315e-05, + "loss": 0.0009, + "step": 32184 + }, + { + "epoch": 30.03, + "learning_rate": 4.624673507462687e-05, + "loss": 0.0002, + "step": 32188 + }, + { + "epoch": 30.03, + "learning_rate": 4.624626865671642e-05, + "loss": 0.0001, + "step": 32192 + }, + { + "epoch": 30.03, + "learning_rate": 4.624580223880597e-05, + "loss": 0.0001, + "step": 32196 + }, + { + "epoch": 30.04, + "learning_rate": 4.624533582089552e-05, + "loss": 0.0001, + "step": 32200 + }, + { + "epoch": 30.04, + "learning_rate": 4.6244869402985076e-05, + "loss": 0.0008, + "step": 32204 + }, + { + "epoch": 30.04, + "learning_rate": 4.624440298507463e-05, + "loss": 0.0071, + "step": 32208 + }, + { + "epoch": 30.05, + "learning_rate": 4.624393656716418e-05, + "loss": 0.0001, + "step": 32212 + }, + { + "epoch": 30.05, + "learning_rate": 4.6243470149253734e-05, + "loss": 0.0001, + "step": 32216 + }, + { + "epoch": 30.06, + "learning_rate": 4.624300373134329e-05, + "loss": 0.0002, + "step": 32220 + }, + { + "epoch": 30.06, + "learning_rate": 4.624253731343284e-05, + "loss": 0.0, + "step": 32224 + }, + { + "epoch": 30.06, + "learning_rate": 4.6242070895522385e-05, + "loss": 0.0001, + "step": 32228 + }, + { + "epoch": 30.07, + "learning_rate": 4.624160447761195e-05, + "loss": 0.0008, + "step": 32232 + }, + { + "epoch": 30.07, + "learning_rate": 4.6241138059701495e-05, + "loss": 0.0003, + "step": 32236 + }, + { + "epoch": 30.07, + "learning_rate": 4.624067164179104e-05, + "loss": 0.0002, + "step": 32240 + }, + { + "epoch": 30.08, + "learning_rate": 4.62402052238806e-05, + "loss": 0.0013, + "step": 32244 + }, + { + "epoch": 30.08, + "learning_rate": 4.623973880597015e-05, + "loss": 0.0013, + "step": 32248 + }, + { + "epoch": 30.09, + "learning_rate": 4.62392723880597e-05, + "loss": 0.0003, + "step": 32252 + }, + { + "epoch": 30.09, + "learning_rate": 4.6238805970149256e-05, + "loss": 0.0002, + "step": 32256 + }, + { + "epoch": 30.09, + "learning_rate": 4.6238339552238804e-05, + "loss": 0.0004, + "step": 32260 + }, + { + "epoch": 30.1, + "learning_rate": 4.623787313432836e-05, + "loss": 0.0005, + "step": 32264 + }, + { + "epoch": 30.1, + "learning_rate": 4.6237406716417914e-05, + "loss": 0.0001, + "step": 32268 + }, + { + "epoch": 30.1, + "learning_rate": 4.623694029850746e-05, + "loss": 0.0003, + "step": 32272 + }, + { + "epoch": 30.11, + "learning_rate": 4.623647388059702e-05, + "loss": 0.0003, + "step": 32276 + }, + { + "epoch": 30.11, + "learning_rate": 4.623600746268657e-05, + "loss": 0.0002, + "step": 32280 + }, + { + "epoch": 30.12, + "learning_rate": 4.623554104477612e-05, + "loss": 0.0001, + "step": 32284 + }, + { + "epoch": 30.12, + "learning_rate": 4.6235074626865675e-05, + "loss": 0.0, + "step": 32288 + }, + { + "epoch": 30.12, + "learning_rate": 4.623460820895523e-05, + "loss": 0.0004, + "step": 32292 + }, + { + "epoch": 30.13, + "learning_rate": 4.623414179104478e-05, + "loss": 0.0, + "step": 32296 + }, + { + "epoch": 30.13, + "learning_rate": 4.623367537313433e-05, + "loss": 0.0006, + "step": 32300 + }, + { + "epoch": 30.13, + "learning_rate": 4.623320895522388e-05, + "loss": 0.001, + "step": 32304 + }, + { + "epoch": 30.14, + "learning_rate": 4.6232742537313436e-05, + "loss": 0.005, + "step": 32308 + }, + { + "epoch": 30.14, + "learning_rate": 4.623227611940299e-05, + "loss": 0.0001, + "step": 32312 + }, + { + "epoch": 30.15, + "learning_rate": 4.623180970149254e-05, + "loss": 0.001, + "step": 32316 + }, + { + "epoch": 30.15, + "learning_rate": 4.623134328358209e-05, + "loss": 0.0, + "step": 32320 + }, + { + "epoch": 30.15, + "learning_rate": 4.623087686567165e-05, + "loss": 0.0001, + "step": 32324 + }, + { + "epoch": 30.16, + "learning_rate": 4.62304104477612e-05, + "loss": 0.0008, + "step": 32328 + }, + { + "epoch": 30.16, + "learning_rate": 4.6229944029850745e-05, + "loss": 0.0002, + "step": 32332 + }, + { + "epoch": 30.16, + "learning_rate": 4.62294776119403e-05, + "loss": 0.0001, + "step": 32336 + }, + { + "epoch": 30.17, + "learning_rate": 4.6229011194029855e-05, + "loss": 0.0008, + "step": 32340 + }, + { + "epoch": 30.17, + "learning_rate": 4.62285447761194e-05, + "loss": 0.003, + "step": 32344 + }, + { + "epoch": 30.18, + "learning_rate": 4.622807835820896e-05, + "loss": 0.0002, + "step": 32348 + }, + { + "epoch": 30.18, + "learning_rate": 4.622761194029851e-05, + "loss": 0.0005, + "step": 32352 + }, + { + "epoch": 30.18, + "learning_rate": 4.622714552238806e-05, + "loss": 0.0007, + "step": 32356 + }, + { + "epoch": 30.19, + "learning_rate": 4.6226679104477616e-05, + "loss": 0.0001, + "step": 32360 + }, + { + "epoch": 30.19, + "learning_rate": 4.6226212686567164e-05, + "loss": 0.0001, + "step": 32364 + }, + { + "epoch": 30.19, + "learning_rate": 4.622574626865672e-05, + "loss": 0.0001, + "step": 32368 + }, + { + "epoch": 30.2, + "learning_rate": 4.6225279850746274e-05, + "loss": 0.0004, + "step": 32372 + }, + { + "epoch": 30.2, + "learning_rate": 4.622481343283582e-05, + "loss": 0.0002, + "step": 32376 + }, + { + "epoch": 30.21, + "learning_rate": 4.622434701492537e-05, + "loss": 0.0, + "step": 32380 + }, + { + "epoch": 30.21, + "learning_rate": 4.622388059701493e-05, + "loss": 0.0017, + "step": 32384 + }, + { + "epoch": 30.21, + "learning_rate": 4.622341417910448e-05, + "loss": 0.0007, + "step": 32388 + }, + { + "epoch": 30.22, + "learning_rate": 4.622294776119403e-05, + "loss": 0.0014, + "step": 32392 + }, + { + "epoch": 30.22, + "learning_rate": 4.622248134328358e-05, + "loss": 0.0003, + "step": 32396 + }, + { + "epoch": 30.22, + "learning_rate": 4.622201492537314e-05, + "loss": 0.0131, + "step": 32400 + }, + { + "epoch": 30.23, + "learning_rate": 4.6221548507462686e-05, + "loss": 0.0001, + "step": 32404 + }, + { + "epoch": 30.23, + "learning_rate": 4.622108208955224e-05, + "loss": 0.0023, + "step": 32408 + }, + { + "epoch": 30.24, + "learning_rate": 4.6220615671641796e-05, + "loss": 0.0002, + "step": 32412 + }, + { + "epoch": 30.24, + "learning_rate": 4.6220149253731344e-05, + "loss": 0.0004, + "step": 32416 + }, + { + "epoch": 30.24, + "learning_rate": 4.62196828358209e-05, + "loss": 0.0001, + "step": 32420 + }, + { + "epoch": 30.25, + "learning_rate": 4.621921641791045e-05, + "loss": 0.001, + "step": 32424 + }, + { + "epoch": 30.25, + "learning_rate": 4.621875e-05, + "loss": 0.0001, + "step": 32428 + }, + { + "epoch": 30.25, + "learning_rate": 4.621828358208956e-05, + "loss": 0.0006, + "step": 32432 + }, + { + "epoch": 30.26, + "learning_rate": 4.6217817164179105e-05, + "loss": 0.0003, + "step": 32436 + }, + { + "epoch": 30.26, + "learning_rate": 4.621735074626866e-05, + "loss": 0.0002, + "step": 32440 + }, + { + "epoch": 30.26, + "learning_rate": 4.6216884328358215e-05, + "loss": 0.0003, + "step": 32444 + }, + { + "epoch": 30.27, + "learning_rate": 4.621641791044776e-05, + "loss": 0.0001, + "step": 32448 + }, + { + "epoch": 30.27, + "learning_rate": 4.621595149253732e-05, + "loss": 0.0006, + "step": 32452 + }, + { + "epoch": 30.28, + "learning_rate": 4.6215485074626866e-05, + "loss": 0.0, + "step": 32456 + }, + { + "epoch": 30.28, + "learning_rate": 4.621501865671642e-05, + "loss": 0.0002, + "step": 32460 + }, + { + "epoch": 30.28, + "learning_rate": 4.6214552238805976e-05, + "loss": 0.0003, + "step": 32464 + }, + { + "epoch": 30.29, + "learning_rate": 4.6214085820895524e-05, + "loss": 0.0016, + "step": 32468 + }, + { + "epoch": 30.29, + "learning_rate": 4.621361940298508e-05, + "loss": 0.0013, + "step": 32472 + }, + { + "epoch": 30.29, + "learning_rate": 4.6213152985074634e-05, + "loss": 0.0, + "step": 32476 + }, + { + "epoch": 30.3, + "learning_rate": 4.621268656716418e-05, + "loss": 0.0, + "step": 32480 + }, + { + "epoch": 30.3, + "learning_rate": 4.621222014925373e-05, + "loss": 0.0001, + "step": 32484 + }, + { + "epoch": 30.31, + "learning_rate": 4.6211753731343285e-05, + "loss": 0.0008, + "step": 32488 + }, + { + "epoch": 30.31, + "learning_rate": 4.621128731343284e-05, + "loss": 0.0003, + "step": 32492 + }, + { + "epoch": 30.31, + "learning_rate": 4.621082089552239e-05, + "loss": 0.0001, + "step": 32496 + }, + { + "epoch": 30.32, + "learning_rate": 4.621035447761194e-05, + "loss": 0.0, + "step": 32500 + }, + { + "epoch": 30.32, + "eval_exact_match": 0.7388781431334622, + "eval_exec": 0.7678916827852998, + "eval_loss": 0.4083205461502075, + "eval_runtime": 1184.6901, + "eval_samples_per_second": 0.873, + "step": 32500 + }, + { + "epoch": 30.32, + "learning_rate": 4.62098880597015e-05, + "loss": 0.0, + "step": 32504 + }, + { + "epoch": 30.32, + "learning_rate": 4.6209421641791046e-05, + "loss": 0.0046, + "step": 32508 + }, + { + "epoch": 30.33, + "learning_rate": 4.62089552238806e-05, + "loss": 0.0001, + "step": 32512 + }, + { + "epoch": 30.33, + "learning_rate": 4.620848880597015e-05, + "loss": 0.0001, + "step": 32516 + }, + { + "epoch": 30.34, + "learning_rate": 4.6208022388059704e-05, + "loss": 0.0014, + "step": 32520 + }, + { + "epoch": 30.34, + "learning_rate": 4.620755597014926e-05, + "loss": 0.0004, + "step": 32524 + }, + { + "epoch": 30.34, + "learning_rate": 4.620708955223881e-05, + "loss": 0.0001, + "step": 32528 + }, + { + "epoch": 30.35, + "learning_rate": 4.620662313432836e-05, + "loss": 0.0028, + "step": 32532 + }, + { + "epoch": 30.35, + "learning_rate": 4.620615671641792e-05, + "loss": 0.0, + "step": 32536 + }, + { + "epoch": 30.35, + "learning_rate": 4.6205690298507465e-05, + "loss": 0.0001, + "step": 32540 + }, + { + "epoch": 30.36, + "learning_rate": 4.620522388059701e-05, + "loss": 0.0002, + "step": 32544 + }, + { + "epoch": 30.36, + "learning_rate": 4.620475746268657e-05, + "loss": 0.0001, + "step": 32548 + }, + { + "epoch": 30.37, + "learning_rate": 4.620429104477612e-05, + "loss": 0.001, + "step": 32552 + }, + { + "epoch": 30.37, + "learning_rate": 4.620382462686567e-05, + "loss": 0.0002, + "step": 32556 + }, + { + "epoch": 30.37, + "learning_rate": 4.6203358208955226e-05, + "loss": 0.0167, + "step": 32560 + }, + { + "epoch": 30.38, + "learning_rate": 4.620289179104478e-05, + "loss": 0.0051, + "step": 32564 + }, + { + "epoch": 30.38, + "learning_rate": 4.620242537313433e-05, + "loss": 0.0002, + "step": 32568 + }, + { + "epoch": 30.38, + "learning_rate": 4.6201958955223884e-05, + "loss": 0.0, + "step": 32572 + }, + { + "epoch": 30.39, + "learning_rate": 4.620149253731343e-05, + "loss": 0.0027, + "step": 32576 + }, + { + "epoch": 30.39, + "learning_rate": 4.620102611940299e-05, + "loss": 0.007, + "step": 32580 + }, + { + "epoch": 30.4, + "learning_rate": 4.620055970149254e-05, + "loss": 0.0001, + "step": 32584 + }, + { + "epoch": 30.4, + "learning_rate": 4.620009328358209e-05, + "loss": 0.0037, + "step": 32588 + }, + { + "epoch": 30.4, + "learning_rate": 4.619962686567164e-05, + "loss": 0.0001, + "step": 32592 + }, + { + "epoch": 30.41, + "learning_rate": 4.61991604477612e-05, + "loss": 0.0018, + "step": 32596 + }, + { + "epoch": 30.41, + "learning_rate": 4.619869402985075e-05, + "loss": 0.0007, + "step": 32600 + }, + { + "epoch": 30.41, + "learning_rate": 4.61982276119403e-05, + "loss": 0.0001, + "step": 32604 + }, + { + "epoch": 30.42, + "learning_rate": 4.619776119402985e-05, + "loss": 0.0002, + "step": 32608 + }, + { + "epoch": 30.42, + "learning_rate": 4.6197294776119406e-05, + "loss": 0.0001, + "step": 32612 + }, + { + "epoch": 30.43, + "learning_rate": 4.619682835820896e-05, + "loss": 0.0016, + "step": 32616 + }, + { + "epoch": 30.43, + "learning_rate": 4.619636194029851e-05, + "loss": 0.0, + "step": 32620 + }, + { + "epoch": 30.43, + "learning_rate": 4.6195895522388064e-05, + "loss": 0.0002, + "step": 32624 + }, + { + "epoch": 30.44, + "learning_rate": 4.619542910447762e-05, + "loss": 0.0003, + "step": 32628 + }, + { + "epoch": 30.44, + "learning_rate": 4.619496268656717e-05, + "loss": 0.0002, + "step": 32632 + }, + { + "epoch": 30.44, + "learning_rate": 4.6194496268656715e-05, + "loss": 0.0001, + "step": 32636 + }, + { + "epoch": 30.45, + "learning_rate": 4.6194029850746277e-05, + "loss": 0.0002, + "step": 32640 + }, + { + "epoch": 30.45, + "learning_rate": 4.6193563432835825e-05, + "loss": 0.0001, + "step": 32644 + }, + { + "epoch": 30.46, + "learning_rate": 4.619309701492537e-05, + "loss": 0.0003, + "step": 32648 + }, + { + "epoch": 30.46, + "learning_rate": 4.619263059701493e-05, + "loss": 0.0014, + "step": 32652 + }, + { + "epoch": 30.46, + "learning_rate": 4.619216417910448e-05, + "loss": 0.0, + "step": 32656 + }, + { + "epoch": 30.47, + "learning_rate": 4.619169776119403e-05, + "loss": 0.0085, + "step": 32660 + }, + { + "epoch": 30.47, + "learning_rate": 4.6191231343283586e-05, + "loss": 0.0002, + "step": 32664 + }, + { + "epoch": 30.47, + "learning_rate": 4.6190764925373134e-05, + "loss": 0.0008, + "step": 32668 + }, + { + "epoch": 30.48, + "learning_rate": 4.619029850746269e-05, + "loss": 0.0007, + "step": 32672 + }, + { + "epoch": 30.48, + "learning_rate": 4.6189832089552244e-05, + "loss": 0.0025, + "step": 32676 + }, + { + "epoch": 30.49, + "learning_rate": 4.618936567164179e-05, + "loss": 0.0002, + "step": 32680 + }, + { + "epoch": 30.49, + "learning_rate": 4.618889925373135e-05, + "loss": 0.0009, + "step": 32684 + }, + { + "epoch": 30.49, + "learning_rate": 4.61884328358209e-05, + "loss": 0.0041, + "step": 32688 + }, + { + "epoch": 30.5, + "learning_rate": 4.618796641791045e-05, + "loss": 0.0002, + "step": 32692 + }, + { + "epoch": 30.5, + "learning_rate": 4.61875e-05, + "loss": 0.0012, + "step": 32696 + }, + { + "epoch": 30.5, + "learning_rate": 4.618703358208955e-05, + "loss": 0.0005, + "step": 32700 + }, + { + "epoch": 30.51, + "learning_rate": 4.618656716417911e-05, + "loss": 0.0004, + "step": 32704 + }, + { + "epoch": 30.51, + "learning_rate": 4.6186100746268656e-05, + "loss": 0.0019, + "step": 32708 + }, + { + "epoch": 30.51, + "learning_rate": 4.618563432835821e-05, + "loss": 0.001, + "step": 32712 + }, + { + "epoch": 30.52, + "learning_rate": 4.6185167910447766e-05, + "loss": 0.0, + "step": 32716 + }, + { + "epoch": 30.52, + "learning_rate": 4.6184701492537314e-05, + "loss": 0.001, + "step": 32720 + }, + { + "epoch": 30.53, + "learning_rate": 4.618423507462687e-05, + "loss": 0.0001, + "step": 32724 + }, + { + "epoch": 30.53, + "learning_rate": 4.618376865671642e-05, + "loss": 0.0002, + "step": 32728 + }, + { + "epoch": 30.53, + "learning_rate": 4.618330223880597e-05, + "loss": 0.0001, + "step": 32732 + }, + { + "epoch": 30.54, + "learning_rate": 4.6182835820895527e-05, + "loss": 0.0002, + "step": 32736 + }, + { + "epoch": 30.54, + "learning_rate": 4.6182369402985075e-05, + "loss": 0.0002, + "step": 32740 + }, + { + "epoch": 30.54, + "learning_rate": 4.618190298507463e-05, + "loss": 0.0, + "step": 32744 + }, + { + "epoch": 30.55, + "learning_rate": 4.6181436567164185e-05, + "loss": 0.0012, + "step": 32748 + }, + { + "epoch": 30.55, + "learning_rate": 4.618097014925373e-05, + "loss": 0.0002, + "step": 32752 + }, + { + "epoch": 30.56, + "learning_rate": 4.618050373134328e-05, + "loss": 0.0014, + "step": 32756 + }, + { + "epoch": 30.56, + "learning_rate": 4.6180037313432836e-05, + "loss": 0.0005, + "step": 32760 + }, + { + "epoch": 30.56, + "learning_rate": 4.617957089552239e-05, + "loss": 0.0001, + "step": 32764 + }, + { + "epoch": 30.57, + "learning_rate": 4.6179104477611945e-05, + "loss": 0.0012, + "step": 32768 + }, + { + "epoch": 30.57, + "learning_rate": 4.6178638059701494e-05, + "loss": 0.0006, + "step": 32772 + }, + { + "epoch": 30.57, + "learning_rate": 4.617817164179105e-05, + "loss": 0.0002, + "step": 32776 + }, + { + "epoch": 30.58, + "learning_rate": 4.6177705223880603e-05, + "loss": 0.0023, + "step": 32780 + }, + { + "epoch": 30.58, + "learning_rate": 4.617723880597015e-05, + "loss": 0.0011, + "step": 32784 + }, + { + "epoch": 30.59, + "learning_rate": 4.61767723880597e-05, + "loss": 0.0008, + "step": 32788 + }, + { + "epoch": 30.59, + "learning_rate": 4.617630597014926e-05, + "loss": 0.0011, + "step": 32792 + }, + { + "epoch": 30.59, + "learning_rate": 4.617583955223881e-05, + "loss": 0.0, + "step": 32796 + }, + { + "epoch": 30.6, + "learning_rate": 4.617537313432836e-05, + "loss": 0.0021, + "step": 32800 + }, + { + "epoch": 30.6, + "learning_rate": 4.617490671641791e-05, + "loss": 0.001, + "step": 32804 + }, + { + "epoch": 30.6, + "learning_rate": 4.617444029850747e-05, + "loss": 0.0003, + "step": 32808 + }, + { + "epoch": 30.61, + "learning_rate": 4.6173973880597016e-05, + "loss": 0.0001, + "step": 32812 + }, + { + "epoch": 30.61, + "learning_rate": 4.617350746268657e-05, + "loss": 0.0076, + "step": 32816 + }, + { + "epoch": 30.62, + "learning_rate": 4.617304104477612e-05, + "loss": 0.0001, + "step": 32820 + }, + { + "epoch": 30.62, + "learning_rate": 4.6172574626865674e-05, + "loss": 0.0002, + "step": 32824 + }, + { + "epoch": 30.62, + "learning_rate": 4.617210820895523e-05, + "loss": 0.0001, + "step": 32828 + }, + { + "epoch": 30.63, + "learning_rate": 4.6171641791044777e-05, + "loss": 0.002, + "step": 32832 + }, + { + "epoch": 30.63, + "learning_rate": 4.617117537313433e-05, + "loss": 0.0037, + "step": 32836 + }, + { + "epoch": 30.63, + "learning_rate": 4.6170708955223886e-05, + "loss": 0.0004, + "step": 32840 + }, + { + "epoch": 30.64, + "learning_rate": 4.6170242537313435e-05, + "loss": 0.001, + "step": 32844 + }, + { + "epoch": 30.64, + "learning_rate": 4.616977611940298e-05, + "loss": 0.0001, + "step": 32848 + }, + { + "epoch": 30.65, + "learning_rate": 4.6169309701492544e-05, + "loss": 0.0001, + "step": 32852 + }, + { + "epoch": 30.65, + "learning_rate": 4.616884328358209e-05, + "loss": 0.0, + "step": 32856 + }, + { + "epoch": 30.65, + "learning_rate": 4.616837686567164e-05, + "loss": 0.0011, + "step": 32860 + }, + { + "epoch": 30.66, + "learning_rate": 4.6167910447761196e-05, + "loss": 0.0023, + "step": 32864 + }, + { + "epoch": 30.66, + "learning_rate": 4.616744402985075e-05, + "loss": 0.0, + "step": 32868 + }, + { + "epoch": 30.66, + "learning_rate": 4.61669776119403e-05, + "loss": 0.0001, + "step": 32872 + }, + { + "epoch": 30.67, + "learning_rate": 4.6166511194029853e-05, + "loss": 0.0013, + "step": 32876 + }, + { + "epoch": 30.67, + "learning_rate": 4.61660447761194e-05, + "loss": 0.0, + "step": 32880 + }, + { + "epoch": 30.68, + "learning_rate": 4.6165578358208957e-05, + "loss": 0.0006, + "step": 32884 + }, + { + "epoch": 30.68, + "learning_rate": 4.616511194029851e-05, + "loss": 0.0001, + "step": 32888 + }, + { + "epoch": 30.68, + "learning_rate": 4.616464552238806e-05, + "loss": 0.0001, + "step": 32892 + }, + { + "epoch": 30.69, + "learning_rate": 4.6164179104477614e-05, + "loss": 0.0009, + "step": 32896 + }, + { + "epoch": 30.69, + "learning_rate": 4.616371268656717e-05, + "loss": 0.0, + "step": 32900 + }, + { + "epoch": 30.69, + "learning_rate": 4.616324626865672e-05, + "loss": 0.002, + "step": 32904 + }, + { + "epoch": 30.7, + "learning_rate": 4.6162779850746266e-05, + "loss": 0.0001, + "step": 32908 + }, + { + "epoch": 30.7, + "learning_rate": 4.616231343283583e-05, + "loss": 0.0, + "step": 32912 + }, + { + "epoch": 30.71, + "learning_rate": 4.6161847014925375e-05, + "loss": 0.0041, + "step": 32916 + }, + { + "epoch": 30.71, + "learning_rate": 4.6161380597014924e-05, + "loss": 0.0001, + "step": 32920 + }, + { + "epoch": 30.71, + "learning_rate": 4.616091417910448e-05, + "loss": 0.0002, + "step": 32924 + }, + { + "epoch": 30.72, + "learning_rate": 4.616044776119403e-05, + "loss": 0.0001, + "step": 32928 + }, + { + "epoch": 30.72, + "learning_rate": 4.615998134328359e-05, + "loss": 0.0005, + "step": 32932 + }, + { + "epoch": 30.72, + "learning_rate": 4.6159514925373136e-05, + "loss": 0.0006, + "step": 32936 + }, + { + "epoch": 30.73, + "learning_rate": 4.6159048507462685e-05, + "loss": 0.0002, + "step": 32940 + }, + { + "epoch": 30.73, + "learning_rate": 4.6158582089552246e-05, + "loss": 0.0004, + "step": 32944 + }, + { + "epoch": 30.73, + "learning_rate": 4.6158115671641794e-05, + "loss": 0.0068, + "step": 32948 + }, + { + "epoch": 30.74, + "learning_rate": 4.615764925373134e-05, + "loss": 0.0001, + "step": 32952 + }, + { + "epoch": 30.74, + "learning_rate": 4.61571828358209e-05, + "loss": 0.0001, + "step": 32956 + }, + { + "epoch": 30.75, + "learning_rate": 4.615671641791045e-05, + "loss": 0.0022, + "step": 32960 + }, + { + "epoch": 30.75, + "learning_rate": 4.615625e-05, + "loss": 0.0028, + "step": 32964 + }, + { + "epoch": 30.75, + "learning_rate": 4.6155783582089555e-05, + "loss": 0.0011, + "step": 32968 + }, + { + "epoch": 30.76, + "learning_rate": 4.615531716417911e-05, + "loss": 0.0004, + "step": 32972 + }, + { + "epoch": 30.76, + "learning_rate": 4.615485074626866e-05, + "loss": 0.0, + "step": 32976 + }, + { + "epoch": 30.76, + "learning_rate": 4.615438432835821e-05, + "loss": 0.0038, + "step": 32980 + }, + { + "epoch": 30.77, + "learning_rate": 4.615391791044776e-05, + "loss": 0.0007, + "step": 32984 + }, + { + "epoch": 30.77, + "learning_rate": 4.6153451492537316e-05, + "loss": 0.0, + "step": 32988 + }, + { + "epoch": 30.78, + "learning_rate": 4.615298507462687e-05, + "loss": 0.0004, + "step": 32992 + }, + { + "epoch": 30.78, + "learning_rate": 4.615251865671642e-05, + "loss": 0.0, + "step": 32996 + }, + { + "epoch": 30.78, + "learning_rate": 4.615205223880597e-05, + "loss": 0.0, + "step": 33000 + }, + { + "epoch": 30.78, + "eval_exact_match": 0.7427466150870407, + "eval_exec": 0.7707930367504836, + "eval_loss": 0.3826291561126709, + "eval_runtime": 1590.6112, + "eval_samples_per_second": 0.65, + "step": 33000 + }, + { + "epoch": 30.79, + "learning_rate": 4.615158582089553e-05, + "loss": 0.0001, + "step": 33004 + }, + { + "epoch": 30.79, + "learning_rate": 4.615111940298508e-05, + "loss": 0.0, + "step": 33008 + }, + { + "epoch": 30.79, + "learning_rate": 4.6150652985074625e-05, + "loss": 0.0005, + "step": 33012 + }, + { + "epoch": 30.8, + "learning_rate": 4.615018656716418e-05, + "loss": 0.0007, + "step": 33016 + }, + { + "epoch": 30.8, + "learning_rate": 4.6149720149253735e-05, + "loss": 0.002, + "step": 33020 + }, + { + "epoch": 30.81, + "learning_rate": 4.6149253731343283e-05, + "loss": 0.0002, + "step": 33024 + }, + { + "epoch": 30.81, + "learning_rate": 4.614878731343284e-05, + "loss": 0.0003, + "step": 33028 + }, + { + "epoch": 30.81, + "learning_rate": 4.614832089552239e-05, + "loss": 0.0012, + "step": 33032 + }, + { + "epoch": 30.82, + "learning_rate": 4.614785447761194e-05, + "loss": 0.0001, + "step": 33036 + }, + { + "epoch": 30.82, + "learning_rate": 4.6147388059701496e-05, + "loss": 0.0003, + "step": 33040 + }, + { + "epoch": 30.82, + "learning_rate": 4.6146921641791044e-05, + "loss": 0.0001, + "step": 33044 + }, + { + "epoch": 30.83, + "learning_rate": 4.61464552238806e-05, + "loss": 0.0, + "step": 33048 + }, + { + "epoch": 30.83, + "learning_rate": 4.6145988805970154e-05, + "loss": 0.0001, + "step": 33052 + }, + { + "epoch": 30.84, + "learning_rate": 4.61455223880597e-05, + "loss": 0.0002, + "step": 33056 + }, + { + "epoch": 30.84, + "learning_rate": 4.614505597014925e-05, + "loss": 0.0001, + "step": 33060 + }, + { + "epoch": 30.84, + "learning_rate": 4.614458955223881e-05, + "loss": 0.0002, + "step": 33064 + }, + { + "epoch": 30.85, + "learning_rate": 4.614412313432836e-05, + "loss": 0.0001, + "step": 33068 + }, + { + "epoch": 30.85, + "learning_rate": 4.614365671641791e-05, + "loss": 0.001, + "step": 33072 + }, + { + "epoch": 30.85, + "learning_rate": 4.614319029850746e-05, + "loss": 0.0001, + "step": 33076 + }, + { + "epoch": 30.86, + "learning_rate": 4.614272388059702e-05, + "loss": 0.0031, + "step": 33080 + }, + { + "epoch": 30.86, + "learning_rate": 4.6142257462686566e-05, + "loss": 0.0001, + "step": 33084 + }, + { + "epoch": 30.87, + "learning_rate": 4.614179104477612e-05, + "loss": 0.001, + "step": 33088 + }, + { + "epoch": 30.87, + "learning_rate": 4.6141324626865676e-05, + "loss": 0.0007, + "step": 33092 + }, + { + "epoch": 30.87, + "learning_rate": 4.614085820895523e-05, + "loss": 0.0015, + "step": 33096 + }, + { + "epoch": 30.88, + "learning_rate": 4.614039179104478e-05, + "loss": 0.0001, + "step": 33100 + }, + { + "epoch": 30.88, + "learning_rate": 4.613992537313433e-05, + "loss": 0.0, + "step": 33104 + }, + { + "epoch": 30.88, + "learning_rate": 4.613945895522388e-05, + "loss": 0.0024, + "step": 33108 + }, + { + "epoch": 30.89, + "learning_rate": 4.613899253731344e-05, + "loss": 0.0012, + "step": 33112 + }, + { + "epoch": 30.89, + "learning_rate": 4.6138526119402985e-05, + "loss": 0.0003, + "step": 33116 + }, + { + "epoch": 30.9, + "learning_rate": 4.613805970149254e-05, + "loss": 0.0001, + "step": 33120 + }, + { + "epoch": 30.9, + "learning_rate": 4.6137593283582095e-05, + "loss": 0.0001, + "step": 33124 + }, + { + "epoch": 30.9, + "learning_rate": 4.613712686567164e-05, + "loss": 0.0004, + "step": 33128 + }, + { + "epoch": 30.91, + "learning_rate": 4.61366604477612e-05, + "loss": 0.0007, + "step": 33132 + }, + { + "epoch": 30.91, + "learning_rate": 4.6136194029850746e-05, + "loss": 0.0001, + "step": 33136 + }, + { + "epoch": 30.91, + "learning_rate": 4.61357276119403e-05, + "loss": 0.0002, + "step": 33140 + }, + { + "epoch": 30.92, + "learning_rate": 4.6135261194029856e-05, + "loss": 0.0002, + "step": 33144 + }, + { + "epoch": 30.92, + "learning_rate": 4.6134794776119404e-05, + "loss": 0.0, + "step": 33148 + }, + { + "epoch": 30.93, + "learning_rate": 4.613432835820896e-05, + "loss": 0.0001, + "step": 33152 + }, + { + "epoch": 30.93, + "learning_rate": 4.6133861940298514e-05, + "loss": 0.0001, + "step": 33156 + }, + { + "epoch": 30.93, + "learning_rate": 4.613339552238806e-05, + "loss": 0.0002, + "step": 33160 + }, + { + "epoch": 30.94, + "learning_rate": 4.613292910447761e-05, + "loss": 0.0013, + "step": 33164 + }, + { + "epoch": 30.94, + "learning_rate": 4.6132462686567165e-05, + "loss": 0.0011, + "step": 33168 + }, + { + "epoch": 30.94, + "learning_rate": 4.613199626865672e-05, + "loss": 0.0, + "step": 33172 + }, + { + "epoch": 30.95, + "learning_rate": 4.613152985074627e-05, + "loss": 0.0, + "step": 33176 + }, + { + "epoch": 30.95, + "learning_rate": 4.613106343283582e-05, + "loss": 0.0003, + "step": 33180 + }, + { + "epoch": 30.96, + "learning_rate": 4.613059701492538e-05, + "loss": 0.0002, + "step": 33184 + }, + { + "epoch": 30.96, + "learning_rate": 4.6130130597014926e-05, + "loss": 0.005, + "step": 33188 + }, + { + "epoch": 30.96, + "learning_rate": 4.612966417910448e-05, + "loss": 0.0004, + "step": 33192 + }, + { + "epoch": 30.97, + "learning_rate": 4.612919776119403e-05, + "loss": 0.0026, + "step": 33196 + }, + { + "epoch": 30.97, + "learning_rate": 4.6128731343283584e-05, + "loss": 0.0001, + "step": 33200 + }, + { + "epoch": 30.97, + "learning_rate": 4.612826492537314e-05, + "loss": 0.0016, + "step": 33204 + }, + { + "epoch": 30.98, + "learning_rate": 4.612779850746269e-05, + "loss": 0.0008, + "step": 33208 + }, + { + "epoch": 30.98, + "learning_rate": 4.612733208955224e-05, + "loss": 0.0031, + "step": 33212 + }, + { + "epoch": 30.98, + "learning_rate": 4.61268656716418e-05, + "loss": 0.0001, + "step": 33216 + }, + { + "epoch": 30.99, + "learning_rate": 4.6126399253731345e-05, + "loss": 0.0002, + "step": 33220 + }, + { + "epoch": 30.99, + "learning_rate": 4.612593283582089e-05, + "loss": 0.0014, + "step": 33224 + }, + { + "epoch": 31.0, + "learning_rate": 4.612546641791045e-05, + "loss": 0.0001, + "step": 33228 + }, + { + "epoch": 31.0, + "learning_rate": 4.6125e-05, + "loss": 0.003, + "step": 33232 + }, + { + "epoch": 31.0, + "learning_rate": 4.612453358208955e-05, + "loss": 0.0009, + "step": 33236 + }, + { + "epoch": 31.01, + "learning_rate": 4.6124067164179106e-05, + "loss": 0.001, + "step": 33240 + }, + { + "epoch": 31.01, + "learning_rate": 4.612360074626866e-05, + "loss": 0.003, + "step": 33244 + }, + { + "epoch": 31.01, + "learning_rate": 4.612313432835821e-05, + "loss": 0.0005, + "step": 33248 + }, + { + "epoch": 31.02, + "learning_rate": 4.6122667910447764e-05, + "loss": 0.0011, + "step": 33252 + }, + { + "epoch": 31.02, + "learning_rate": 4.612220149253731e-05, + "loss": 0.0022, + "step": 33256 + }, + { + "epoch": 31.03, + "learning_rate": 4.6121735074626874e-05, + "loss": 0.0002, + "step": 33260 + }, + { + "epoch": 31.03, + "learning_rate": 4.612126865671642e-05, + "loss": 0.0028, + "step": 33264 + }, + { + "epoch": 31.03, + "learning_rate": 4.612080223880597e-05, + "loss": 0.0013, + "step": 33268 + }, + { + "epoch": 31.04, + "learning_rate": 4.6120335820895525e-05, + "loss": 0.001, + "step": 33272 + }, + { + "epoch": 31.04, + "learning_rate": 4.611986940298508e-05, + "loss": 0.0004, + "step": 33276 + }, + { + "epoch": 31.04, + "learning_rate": 4.611940298507463e-05, + "loss": 0.0002, + "step": 33280 + }, + { + "epoch": 31.05, + "learning_rate": 4.611893656716418e-05, + "loss": 0.0001, + "step": 33284 + }, + { + "epoch": 31.05, + "learning_rate": 4.611847014925373e-05, + "loss": 0.005, + "step": 33288 + }, + { + "epoch": 31.06, + "learning_rate": 4.6118003731343286e-05, + "loss": 0.0001, + "step": 33292 + }, + { + "epoch": 31.06, + "learning_rate": 4.611753731343284e-05, + "loss": 0.0, + "step": 33296 + }, + { + "epoch": 31.06, + "learning_rate": 4.611707089552239e-05, + "loss": 0.0003, + "step": 33300 + }, + { + "epoch": 31.07, + "learning_rate": 4.6116604477611944e-05, + "loss": 0.0015, + "step": 33304 + }, + { + "epoch": 31.07, + "learning_rate": 4.61161380597015e-05, + "loss": 0.0002, + "step": 33308 + }, + { + "epoch": 31.07, + "learning_rate": 4.611567164179105e-05, + "loss": 0.0001, + "step": 33312 + }, + { + "epoch": 31.08, + "learning_rate": 4.6115205223880595e-05, + "loss": 0.0001, + "step": 33316 + }, + { + "epoch": 31.08, + "learning_rate": 4.611473880597016e-05, + "loss": 0.0, + "step": 33320 + }, + { + "epoch": 31.09, + "learning_rate": 4.6114272388059705e-05, + "loss": 0.0, + "step": 33324 + }, + { + "epoch": 31.09, + "learning_rate": 4.611380597014925e-05, + "loss": 0.0006, + "step": 33328 + }, + { + "epoch": 31.09, + "learning_rate": 4.611333955223881e-05, + "loss": 0.0002, + "step": 33332 + }, + { + "epoch": 31.1, + "learning_rate": 4.611287313432836e-05, + "loss": 0.0004, + "step": 33336 + }, + { + "epoch": 31.1, + "learning_rate": 4.611240671641791e-05, + "loss": 0.0, + "step": 33340 + }, + { + "epoch": 31.1, + "learning_rate": 4.6111940298507466e-05, + "loss": 0.0001, + "step": 33344 + }, + { + "epoch": 31.11, + "learning_rate": 4.6111473880597014e-05, + "loss": 0.0, + "step": 33348 + }, + { + "epoch": 31.11, + "learning_rate": 4.611100746268657e-05, + "loss": 0.0006, + "step": 33352 + }, + { + "epoch": 31.12, + "learning_rate": 4.6110541044776124e-05, + "loss": 0.0013, + "step": 33356 + }, + { + "epoch": 31.12, + "learning_rate": 4.611007462686567e-05, + "loss": 0.0001, + "step": 33360 + }, + { + "epoch": 31.12, + "learning_rate": 4.610960820895523e-05, + "loss": 0.0038, + "step": 33364 + }, + { + "epoch": 31.13, + "learning_rate": 4.610914179104478e-05, + "loss": 0.0016, + "step": 33368 + }, + { + "epoch": 31.13, + "learning_rate": 4.610867537313433e-05, + "loss": 0.0002, + "step": 33372 + }, + { + "epoch": 31.13, + "learning_rate": 4.610820895522388e-05, + "loss": 0.0001, + "step": 33376 + }, + { + "epoch": 31.14, + "learning_rate": 4.610774253731344e-05, + "loss": 0.0003, + "step": 33380 + }, + { + "epoch": 31.14, + "learning_rate": 4.610727611940299e-05, + "loss": 0.0107, + "step": 33384 + }, + { + "epoch": 31.15, + "learning_rate": 4.6106809701492536e-05, + "loss": 0.0001, + "step": 33388 + }, + { + "epoch": 31.15, + "learning_rate": 4.610634328358209e-05, + "loss": 0.0005, + "step": 33392 + }, + { + "epoch": 31.15, + "learning_rate": 4.6105876865671646e-05, + "loss": 0.0004, + "step": 33396 + }, + { + "epoch": 31.16, + "learning_rate": 4.6105410447761194e-05, + "loss": 0.0072, + "step": 33400 + }, + { + "epoch": 31.16, + "learning_rate": 4.610494402985075e-05, + "loss": 0.0, + "step": 33404 + }, + { + "epoch": 31.16, + "learning_rate": 4.61044776119403e-05, + "loss": 0.0003, + "step": 33408 + }, + { + "epoch": 31.17, + "learning_rate": 4.610401119402985e-05, + "loss": 0.0001, + "step": 33412 + }, + { + "epoch": 31.17, + "learning_rate": 4.610354477611941e-05, + "loss": 0.0004, + "step": 33416 + }, + { + "epoch": 31.18, + "learning_rate": 4.6103078358208955e-05, + "loss": 0.0, + "step": 33420 + }, + { + "epoch": 31.18, + "learning_rate": 4.610261194029851e-05, + "loss": 0.0001, + "step": 33424 + }, + { + "epoch": 31.18, + "learning_rate": 4.6102145522388065e-05, + "loss": 0.0001, + "step": 33428 + }, + { + "epoch": 31.19, + "learning_rate": 4.610167910447761e-05, + "loss": 0.0002, + "step": 33432 + }, + { + "epoch": 31.19, + "learning_rate": 4.610121268656717e-05, + "loss": 0.0032, + "step": 33436 + }, + { + "epoch": 31.19, + "learning_rate": 4.6100746268656716e-05, + "loss": 0.0002, + "step": 33440 + }, + { + "epoch": 31.2, + "learning_rate": 4.610027985074627e-05, + "loss": 0.0001, + "step": 33444 + }, + { + "epoch": 31.2, + "learning_rate": 4.6099813432835826e-05, + "loss": 0.0005, + "step": 33448 + }, + { + "epoch": 31.21, + "learning_rate": 4.6099347014925374e-05, + "loss": 0.0001, + "step": 33452 + }, + { + "epoch": 31.21, + "learning_rate": 4.609888059701493e-05, + "loss": 0.0001, + "step": 33456 + }, + { + "epoch": 31.21, + "learning_rate": 4.6098414179104484e-05, + "loss": 0.0, + "step": 33460 + }, + { + "epoch": 31.22, + "learning_rate": 4.609794776119403e-05, + "loss": 0.0001, + "step": 33464 + }, + { + "epoch": 31.22, + "learning_rate": 4.609748134328358e-05, + "loss": 0.0014, + "step": 33468 + }, + { + "epoch": 31.22, + "learning_rate": 4.609701492537314e-05, + "loss": 0.0008, + "step": 33472 + }, + { + "epoch": 31.23, + "learning_rate": 4.609654850746269e-05, + "loss": 0.0004, + "step": 33476 + }, + { + "epoch": 31.23, + "learning_rate": 4.609608208955224e-05, + "loss": 0.0002, + "step": 33480 + }, + { + "epoch": 31.24, + "learning_rate": 4.609561567164179e-05, + "loss": 0.0001, + "step": 33484 + }, + { + "epoch": 31.24, + "learning_rate": 4.609514925373135e-05, + "loss": 0.0001, + "step": 33488 + }, + { + "epoch": 31.24, + "learning_rate": 4.6094682835820896e-05, + "loss": 0.0036, + "step": 33492 + }, + { + "epoch": 31.25, + "learning_rate": 4.609421641791045e-05, + "loss": 0.0031, + "step": 33496 + }, + { + "epoch": 31.25, + "learning_rate": 4.609375e-05, + "loss": 0.005, + "step": 33500 + }, + { + "epoch": 31.25, + "eval_exact_match": 0.7456479690522244, + "eval_exec": 0.7659574468085106, + "eval_loss": 0.4037356376647949, + "eval_runtime": 1057.7845, + "eval_samples_per_second": 0.978, + "step": 33500 + }, + { + "epoch": 31.25, + "learning_rate": 4.6093283582089554e-05, + "loss": 0.0006, + "step": 33504 + }, + { + "epoch": 31.26, + "learning_rate": 4.609281716417911e-05, + "loss": 0.0002, + "step": 33508 + }, + { + "epoch": 31.26, + "learning_rate": 4.609235074626866e-05, + "loss": 0.0019, + "step": 33512 + }, + { + "epoch": 31.26, + "learning_rate": 4.609188432835821e-05, + "loss": 0.0002, + "step": 33516 + }, + { + "epoch": 31.27, + "learning_rate": 4.609141791044777e-05, + "loss": 0.0003, + "step": 33520 + }, + { + "epoch": 31.27, + "learning_rate": 4.6090951492537315e-05, + "loss": 0.0, + "step": 33524 + }, + { + "epoch": 31.28, + "learning_rate": 4.609048507462686e-05, + "loss": 0.0033, + "step": 33528 + }, + { + "epoch": 31.28, + "learning_rate": 4.6090018656716425e-05, + "loss": 0.0, + "step": 33532 + }, + { + "epoch": 31.28, + "learning_rate": 4.608955223880597e-05, + "loss": 0.0001, + "step": 33536 + }, + { + "epoch": 31.29, + "learning_rate": 4.608908582089552e-05, + "loss": 0.0003, + "step": 33540 + }, + { + "epoch": 31.29, + "learning_rate": 4.6088619402985076e-05, + "loss": 0.0002, + "step": 33544 + }, + { + "epoch": 31.29, + "learning_rate": 4.608815298507463e-05, + "loss": 0.0035, + "step": 33548 + }, + { + "epoch": 31.3, + "learning_rate": 4.608768656716418e-05, + "loss": 0.0, + "step": 33552 + }, + { + "epoch": 31.3, + "learning_rate": 4.6087220149253734e-05, + "loss": 0.0004, + "step": 33556 + }, + { + "epoch": 31.31, + "learning_rate": 4.608675373134328e-05, + "loss": 0.0001, + "step": 33560 + }, + { + "epoch": 31.31, + "learning_rate": 4.608628731343284e-05, + "loss": 0.0001, + "step": 33564 + }, + { + "epoch": 31.31, + "learning_rate": 4.608582089552239e-05, + "loss": 0.0002, + "step": 33568 + }, + { + "epoch": 31.32, + "learning_rate": 4.608535447761194e-05, + "loss": 0.0005, + "step": 33572 + }, + { + "epoch": 31.32, + "learning_rate": 4.6084888059701495e-05, + "loss": 0.0007, + "step": 33576 + }, + { + "epoch": 31.32, + "learning_rate": 4.608442164179105e-05, + "loss": 0.0002, + "step": 33580 + }, + { + "epoch": 31.33, + "learning_rate": 4.60839552238806e-05, + "loss": 0.0003, + "step": 33584 + }, + { + "epoch": 31.33, + "learning_rate": 4.608348880597015e-05, + "loss": 0.0001, + "step": 33588 + }, + { + "epoch": 31.34, + "learning_rate": 4.608302238805971e-05, + "loss": 0.0, + "step": 33592 + }, + { + "epoch": 31.34, + "learning_rate": 4.6082555970149256e-05, + "loss": 0.0001, + "step": 33596 + }, + { + "epoch": 31.34, + "learning_rate": 4.608208955223881e-05, + "loss": 0.0, + "step": 33600 + }, + { + "epoch": 31.35, + "learning_rate": 4.608162313432836e-05, + "loss": 0.0013, + "step": 33604 + }, + { + "epoch": 31.35, + "learning_rate": 4.6081156716417914e-05, + "loss": 0.0, + "step": 33608 + }, + { + "epoch": 31.35, + "learning_rate": 4.608069029850747e-05, + "loss": 0.0002, + "step": 33612 + }, + { + "epoch": 31.36, + "learning_rate": 4.608022388059702e-05, + "loss": 0.002, + "step": 33616 + }, + { + "epoch": 31.36, + "learning_rate": 4.6079757462686565e-05, + "loss": 0.0, + "step": 33620 + }, + { + "epoch": 31.37, + "learning_rate": 4.6079291044776127e-05, + "loss": 0.0001, + "step": 33624 + }, + { + "epoch": 31.37, + "learning_rate": 4.6078824626865675e-05, + "loss": 0.0053, + "step": 33628 + }, + { + "epoch": 31.37, + "learning_rate": 4.607835820895522e-05, + "loss": 0.0003, + "step": 33632 + }, + { + "epoch": 31.38, + "learning_rate": 4.607789179104478e-05, + "loss": 0.0008, + "step": 33636 + }, + { + "epoch": 31.38, + "learning_rate": 4.607742537313433e-05, + "loss": 0.0, + "step": 33640 + }, + { + "epoch": 31.38, + "learning_rate": 4.607695895522388e-05, + "loss": 0.0028, + "step": 33644 + }, + { + "epoch": 31.39, + "learning_rate": 4.6076492537313436e-05, + "loss": 0.0001, + "step": 33648 + }, + { + "epoch": 31.39, + "learning_rate": 4.607602611940299e-05, + "loss": 0.001, + "step": 33652 + }, + { + "epoch": 31.4, + "learning_rate": 4.607555970149254e-05, + "loss": 0.0003, + "step": 33656 + }, + { + "epoch": 31.4, + "learning_rate": 4.6075093283582094e-05, + "loss": 0.0077, + "step": 33660 + }, + { + "epoch": 31.4, + "learning_rate": 4.607462686567164e-05, + "loss": 0.0003, + "step": 33664 + }, + { + "epoch": 31.41, + "learning_rate": 4.60741604477612e-05, + "loss": 0.0001, + "step": 33668 + }, + { + "epoch": 31.41, + "learning_rate": 4.607369402985075e-05, + "loss": 0.0001, + "step": 33672 + }, + { + "epoch": 31.41, + "learning_rate": 4.60732276119403e-05, + "loss": 0.0019, + "step": 33676 + }, + { + "epoch": 31.42, + "learning_rate": 4.607276119402985e-05, + "loss": 0.0003, + "step": 33680 + }, + { + "epoch": 31.42, + "learning_rate": 4.607229477611941e-05, + "loss": 0.0001, + "step": 33684 + }, + { + "epoch": 31.43, + "learning_rate": 4.607182835820896e-05, + "loss": 0.0036, + "step": 33688 + }, + { + "epoch": 31.43, + "learning_rate": 4.6071361940298506e-05, + "loss": 0.0, + "step": 33692 + }, + { + "epoch": 31.43, + "learning_rate": 4.607089552238806e-05, + "loss": 0.0002, + "step": 33696 + }, + { + "epoch": 31.44, + "learning_rate": 4.6070429104477616e-05, + "loss": 0.0004, + "step": 33700 + }, + { + "epoch": 31.44, + "learning_rate": 4.6069962686567164e-05, + "loss": 0.0, + "step": 33704 + }, + { + "epoch": 31.44, + "learning_rate": 4.606949626865672e-05, + "loss": 0.0001, + "step": 33708 + }, + { + "epoch": 31.45, + "learning_rate": 4.6069029850746274e-05, + "loss": 0.0001, + "step": 33712 + }, + { + "epoch": 31.45, + "learning_rate": 4.606856343283582e-05, + "loss": 0.0005, + "step": 33716 + }, + { + "epoch": 31.46, + "learning_rate": 4.6068097014925377e-05, + "loss": 0.0005, + "step": 33720 + }, + { + "epoch": 31.46, + "learning_rate": 4.6067630597014925e-05, + "loss": 0.001, + "step": 33724 + }, + { + "epoch": 31.46, + "learning_rate": 4.606716417910448e-05, + "loss": 0.0002, + "step": 33728 + }, + { + "epoch": 31.47, + "learning_rate": 4.6066697761194034e-05, + "loss": 0.0002, + "step": 33732 + }, + { + "epoch": 31.47, + "learning_rate": 4.606623134328358e-05, + "loss": 0.0006, + "step": 33736 + }, + { + "epoch": 31.47, + "learning_rate": 4.606576492537313e-05, + "loss": 0.0002, + "step": 33740 + }, + { + "epoch": 31.48, + "learning_rate": 4.606529850746269e-05, + "loss": 0.0, + "step": 33744 + }, + { + "epoch": 31.48, + "learning_rate": 4.606483208955224e-05, + "loss": 0.0001, + "step": 33748 + }, + { + "epoch": 31.49, + "learning_rate": 4.6064365671641795e-05, + "loss": 0.0, + "step": 33752 + }, + { + "epoch": 31.49, + "learning_rate": 4.6063899253731344e-05, + "loss": 0.0014, + "step": 33756 + }, + { + "epoch": 31.49, + "learning_rate": 4.60634328358209e-05, + "loss": 0.0001, + "step": 33760 + }, + { + "epoch": 31.5, + "learning_rate": 4.6062966417910453e-05, + "loss": 0.0001, + "step": 33764 + }, + { + "epoch": 31.5, + "learning_rate": 4.60625e-05, + "loss": 0.0038, + "step": 33768 + }, + { + "epoch": 31.5, + "learning_rate": 4.6062033582089556e-05, + "loss": 0.0001, + "step": 33772 + }, + { + "epoch": 31.51, + "learning_rate": 4.606156716417911e-05, + "loss": 0.0, + "step": 33776 + }, + { + "epoch": 31.51, + "learning_rate": 4.606110074626866e-05, + "loss": 0.0001, + "step": 33780 + }, + { + "epoch": 31.51, + "learning_rate": 4.606063432835821e-05, + "loss": 0.0028, + "step": 33784 + }, + { + "epoch": 31.52, + "learning_rate": 4.606016791044776e-05, + "loss": 0.0003, + "step": 33788 + }, + { + "epoch": 31.52, + "learning_rate": 4.605970149253732e-05, + "loss": 0.0, + "step": 33792 + }, + { + "epoch": 31.53, + "learning_rate": 4.6059235074626866e-05, + "loss": 0.0, + "step": 33796 + }, + { + "epoch": 31.53, + "learning_rate": 4.605876865671642e-05, + "loss": 0.0, + "step": 33800 + }, + { + "epoch": 31.53, + "learning_rate": 4.6058302238805975e-05, + "loss": 0.0001, + "step": 33804 + }, + { + "epoch": 31.54, + "learning_rate": 4.6057835820895524e-05, + "loss": 0.0001, + "step": 33808 + }, + { + "epoch": 31.54, + "learning_rate": 4.605736940298508e-05, + "loss": 0.0105, + "step": 33812 + }, + { + "epoch": 31.54, + "learning_rate": 4.6056902985074627e-05, + "loss": 0.0003, + "step": 33816 + }, + { + "epoch": 31.55, + "learning_rate": 4.605643656716418e-05, + "loss": 0.0014, + "step": 33820 + }, + { + "epoch": 31.55, + "learning_rate": 4.6055970149253736e-05, + "loss": 0.0003, + "step": 33824 + }, + { + "epoch": 31.56, + "learning_rate": 4.6055503731343285e-05, + "loss": 0.0152, + "step": 33828 + }, + { + "epoch": 31.56, + "learning_rate": 4.605503731343284e-05, + "loss": 0.0, + "step": 33832 + }, + { + "epoch": 31.56, + "learning_rate": 4.6054570895522394e-05, + "loss": 0.0018, + "step": 33836 + }, + { + "epoch": 31.57, + "learning_rate": 4.605410447761194e-05, + "loss": 0.0004, + "step": 33840 + }, + { + "epoch": 31.57, + "learning_rate": 4.605363805970149e-05, + "loss": 0.0031, + "step": 33844 + }, + { + "epoch": 31.57, + "learning_rate": 4.6053171641791045e-05, + "loss": 0.0, + "step": 33848 + }, + { + "epoch": 31.58, + "learning_rate": 4.60527052238806e-05, + "loss": 0.0001, + "step": 33852 + }, + { + "epoch": 31.58, + "learning_rate": 4.605223880597015e-05, + "loss": 0.0002, + "step": 33856 + }, + { + "epoch": 31.59, + "learning_rate": 4.6051772388059703e-05, + "loss": 0.0066, + "step": 33860 + }, + { + "epoch": 31.59, + "learning_rate": 4.605130597014926e-05, + "loss": 0.0006, + "step": 33864 + }, + { + "epoch": 31.59, + "learning_rate": 4.6050839552238806e-05, + "loss": 0.0001, + "step": 33868 + }, + { + "epoch": 31.6, + "learning_rate": 4.605037313432836e-05, + "loss": 0.0, + "step": 33872 + }, + { + "epoch": 31.6, + "learning_rate": 4.604990671641791e-05, + "loss": 0.0017, + "step": 33876 + }, + { + "epoch": 31.6, + "learning_rate": 4.6049440298507464e-05, + "loss": 0.0001, + "step": 33880 + }, + { + "epoch": 31.61, + "learning_rate": 4.604897388059702e-05, + "loss": 0.0003, + "step": 33884 + }, + { + "epoch": 31.61, + "learning_rate": 4.604850746268657e-05, + "loss": 0.0007, + "step": 33888 + }, + { + "epoch": 31.62, + "learning_rate": 4.604804104477612e-05, + "loss": 0.0003, + "step": 33892 + }, + { + "epoch": 31.62, + "learning_rate": 4.604757462686568e-05, + "loss": 0.001, + "step": 33896 + }, + { + "epoch": 31.62, + "learning_rate": 4.6047108208955225e-05, + "loss": 0.0001, + "step": 33900 + }, + { + "epoch": 31.63, + "learning_rate": 4.6046641791044774e-05, + "loss": 0.0033, + "step": 33904 + }, + { + "epoch": 31.63, + "learning_rate": 4.604617537313433e-05, + "loss": 0.0002, + "step": 33908 + }, + { + "epoch": 31.63, + "learning_rate": 4.604570895522388e-05, + "loss": 0.0012, + "step": 33912 + }, + { + "epoch": 31.64, + "learning_rate": 4.604524253731344e-05, + "loss": 0.0001, + "step": 33916 + }, + { + "epoch": 31.64, + "learning_rate": 4.6044776119402986e-05, + "loss": 0.0001, + "step": 33920 + }, + { + "epoch": 31.65, + "learning_rate": 4.604430970149254e-05, + "loss": 0.0001, + "step": 33924 + }, + { + "epoch": 31.65, + "learning_rate": 4.6043843283582096e-05, + "loss": 0.0002, + "step": 33928 + }, + { + "epoch": 31.65, + "learning_rate": 4.6043376865671644e-05, + "loss": 0.0003, + "step": 33932 + }, + { + "epoch": 31.66, + "learning_rate": 4.604291044776119e-05, + "loss": 0.0003, + "step": 33936 + }, + { + "epoch": 31.66, + "learning_rate": 4.6042444029850754e-05, + "loss": 0.0039, + "step": 33940 + }, + { + "epoch": 31.66, + "learning_rate": 4.60419776119403e-05, + "loss": 0.0004, + "step": 33944 + }, + { + "epoch": 31.67, + "learning_rate": 4.604151119402985e-05, + "loss": 0.0002, + "step": 33948 + }, + { + "epoch": 31.67, + "learning_rate": 4.6041044776119405e-05, + "loss": 0.0, + "step": 33952 + }, + { + "epoch": 31.68, + "learning_rate": 4.604057835820896e-05, + "loss": 0.0001, + "step": 33956 + }, + { + "epoch": 31.68, + "learning_rate": 4.604011194029851e-05, + "loss": 0.0003, + "step": 33960 + }, + { + "epoch": 31.68, + "learning_rate": 4.603964552238806e-05, + "loss": 0.0001, + "step": 33964 + }, + { + "epoch": 31.69, + "learning_rate": 4.603917910447761e-05, + "loss": 0.0018, + "step": 33968 + }, + { + "epoch": 31.69, + "learning_rate": 4.6038712686567166e-05, + "loss": 0.0001, + "step": 33972 + }, + { + "epoch": 31.69, + "learning_rate": 4.603824626865672e-05, + "loss": 0.0002, + "step": 33976 + }, + { + "epoch": 31.7, + "learning_rate": 4.603777985074627e-05, + "loss": 0.0014, + "step": 33980 + }, + { + "epoch": 31.7, + "learning_rate": 4.6037313432835824e-05, + "loss": 0.0003, + "step": 33984 + }, + { + "epoch": 31.71, + "learning_rate": 4.603684701492538e-05, + "loss": 0.0001, + "step": 33988 + }, + { + "epoch": 31.71, + "learning_rate": 4.603638059701493e-05, + "loss": 0.0005, + "step": 33992 + }, + { + "epoch": 31.71, + "learning_rate": 4.6035914179104475e-05, + "loss": 0.0028, + "step": 33996 + }, + { + "epoch": 31.72, + "learning_rate": 4.603544776119404e-05, + "loss": 0.0, + "step": 34000 + }, + { + "epoch": 31.72, + "eval_exact_match": 0.7379110251450677, + "eval_exec": 0.7572533849129593, + "eval_loss": 0.42316770553588867, + "eval_runtime": 1045.9822, + "eval_samples_per_second": 0.989, + "step": 34000 + }, + { + "epoch": 31.72, + "learning_rate": 4.6034981343283585e-05, + "loss": 0.0001, + "step": 34004 + }, + { + "epoch": 31.72, + "learning_rate": 4.603451492537313e-05, + "loss": 0.0, + "step": 34008 + }, + { + "epoch": 31.73, + "learning_rate": 4.603404850746269e-05, + "loss": 0.0, + "step": 34012 + }, + { + "epoch": 31.73, + "learning_rate": 4.603358208955224e-05, + "loss": 0.0002, + "step": 34016 + }, + { + "epoch": 31.73, + "learning_rate": 4.603311567164179e-05, + "loss": 0.0004, + "step": 34020 + }, + { + "epoch": 31.74, + "learning_rate": 4.6032649253731346e-05, + "loss": 0.0029, + "step": 34024 + }, + { + "epoch": 31.74, + "learning_rate": 4.6032182835820894e-05, + "loss": 0.0001, + "step": 34028 + }, + { + "epoch": 31.75, + "learning_rate": 4.603171641791045e-05, + "loss": 0.0004, + "step": 34032 + }, + { + "epoch": 31.75, + "learning_rate": 4.6031250000000004e-05, + "loss": 0.0001, + "step": 34036 + }, + { + "epoch": 31.75, + "learning_rate": 4.603078358208955e-05, + "loss": 0.0, + "step": 34040 + }, + { + "epoch": 31.76, + "learning_rate": 4.603031716417911e-05, + "loss": 0.0021, + "step": 34044 + }, + { + "epoch": 31.76, + "learning_rate": 4.602985074626866e-05, + "loss": 0.0, + "step": 34048 + }, + { + "epoch": 31.76, + "learning_rate": 4.602938432835821e-05, + "loss": 0.001, + "step": 34052 + }, + { + "epoch": 31.77, + "learning_rate": 4.602891791044776e-05, + "loss": 0.0001, + "step": 34056 + }, + { + "epoch": 31.77, + "learning_rate": 4.602845149253732e-05, + "loss": 0.0001, + "step": 34060 + }, + { + "epoch": 31.78, + "learning_rate": 4.602798507462687e-05, + "loss": 0.0004, + "step": 34064 + }, + { + "epoch": 31.78, + "learning_rate": 4.6027518656716416e-05, + "loss": 0.0, + "step": 34068 + }, + { + "epoch": 31.78, + "learning_rate": 4.602705223880597e-05, + "loss": 0.0, + "step": 34072 + }, + { + "epoch": 31.79, + "learning_rate": 4.6026585820895526e-05, + "loss": 0.0002, + "step": 34076 + }, + { + "epoch": 31.79, + "learning_rate": 4.602611940298508e-05, + "loss": 0.0005, + "step": 34080 + }, + { + "epoch": 31.79, + "learning_rate": 4.602565298507463e-05, + "loss": 0.0015, + "step": 34084 + }, + { + "epoch": 31.8, + "learning_rate": 4.602518656716418e-05, + "loss": 0.0002, + "step": 34088 + }, + { + "epoch": 31.8, + "learning_rate": 4.602472014925374e-05, + "loss": 0.0008, + "step": 34092 + }, + { + "epoch": 31.81, + "learning_rate": 4.602425373134329e-05, + "loss": 0.0009, + "step": 34096 + }, + { + "epoch": 31.81, + "learning_rate": 4.6023787313432835e-05, + "loss": 0.0005, + "step": 34100 + }, + { + "epoch": 31.81, + "learning_rate": 4.602332089552239e-05, + "loss": 0.0059, + "step": 34104 + }, + { + "epoch": 31.82, + "learning_rate": 4.6022854477611945e-05, + "loss": 0.0012, + "step": 34108 + }, + { + "epoch": 31.82, + "learning_rate": 4.602238805970149e-05, + "loss": 0.0017, + "step": 34112 + }, + { + "epoch": 31.82, + "learning_rate": 4.602192164179105e-05, + "loss": 0.0003, + "step": 34116 + }, + { + "epoch": 31.83, + "learning_rate": 4.6021455223880596e-05, + "loss": 0.0021, + "step": 34120 + }, + { + "epoch": 31.83, + "learning_rate": 4.602098880597015e-05, + "loss": 0.0008, + "step": 34124 + }, + { + "epoch": 31.84, + "learning_rate": 4.6020522388059706e-05, + "loss": 0.0001, + "step": 34128 + }, + { + "epoch": 31.84, + "learning_rate": 4.6020055970149254e-05, + "loss": 0.0001, + "step": 34132 + }, + { + "epoch": 31.84, + "learning_rate": 4.601958955223881e-05, + "loss": 0.0, + "step": 34136 + }, + { + "epoch": 31.85, + "learning_rate": 4.6019123134328364e-05, + "loss": 0.0001, + "step": 34140 + }, + { + "epoch": 31.85, + "learning_rate": 4.601865671641791e-05, + "loss": 0.0032, + "step": 34144 + }, + { + "epoch": 31.85, + "learning_rate": 4.601819029850746e-05, + "loss": 0.0001, + "step": 34148 + }, + { + "epoch": 31.86, + "learning_rate": 4.601772388059702e-05, + "loss": 0.0007, + "step": 34152 + }, + { + "epoch": 31.86, + "learning_rate": 4.601725746268657e-05, + "loss": 0.0001, + "step": 34156 + }, + { + "epoch": 31.87, + "learning_rate": 4.601679104477612e-05, + "loss": 0.003, + "step": 34160 + }, + { + "epoch": 31.87, + "learning_rate": 4.601632462686567e-05, + "loss": 0.0003, + "step": 34164 + }, + { + "epoch": 31.87, + "learning_rate": 4.601585820895523e-05, + "loss": 0.0003, + "step": 34168 + }, + { + "epoch": 31.88, + "learning_rate": 4.6015391791044776e-05, + "loss": 0.0027, + "step": 34172 + }, + { + "epoch": 31.88, + "learning_rate": 4.601492537313433e-05, + "loss": 0.0, + "step": 34176 + }, + { + "epoch": 31.88, + "learning_rate": 4.601445895522388e-05, + "loss": 0.0005, + "step": 34180 + }, + { + "epoch": 31.89, + "learning_rate": 4.6013992537313434e-05, + "loss": 0.0002, + "step": 34184 + }, + { + "epoch": 31.89, + "learning_rate": 4.601352611940299e-05, + "loss": 0.0001, + "step": 34188 + }, + { + "epoch": 31.9, + "learning_rate": 4.601305970149254e-05, + "loss": 0.0, + "step": 34192 + }, + { + "epoch": 31.9, + "learning_rate": 4.601259328358209e-05, + "loss": 0.0004, + "step": 34196 + }, + { + "epoch": 31.9, + "learning_rate": 4.601212686567165e-05, + "loss": 0.0, + "step": 34200 + }, + { + "epoch": 31.91, + "learning_rate": 4.6011660447761195e-05, + "loss": 0.0008, + "step": 34204 + }, + { + "epoch": 31.91, + "learning_rate": 4.601119402985074e-05, + "loss": 0.0017, + "step": 34208 + }, + { + "epoch": 31.91, + "learning_rate": 4.6010727611940305e-05, + "loss": 0.0002, + "step": 34212 + }, + { + "epoch": 31.92, + "learning_rate": 4.601026119402985e-05, + "loss": 0.0001, + "step": 34216 + }, + { + "epoch": 31.92, + "learning_rate": 4.60097947761194e-05, + "loss": 0.0001, + "step": 34220 + }, + { + "epoch": 31.93, + "learning_rate": 4.6009328358208956e-05, + "loss": 0.0009, + "step": 34224 + }, + { + "epoch": 31.93, + "learning_rate": 4.600886194029851e-05, + "loss": 0.001, + "step": 34228 + }, + { + "epoch": 31.93, + "learning_rate": 4.600839552238806e-05, + "loss": 0.0001, + "step": 34232 + }, + { + "epoch": 31.94, + "learning_rate": 4.6007929104477614e-05, + "loss": 0.001, + "step": 34236 + }, + { + "epoch": 31.94, + "learning_rate": 4.600746268656716e-05, + "loss": 0.0001, + "step": 34240 + }, + { + "epoch": 31.94, + "learning_rate": 4.6006996268656724e-05, + "loss": 0.0, + "step": 34244 + }, + { + "epoch": 31.95, + "learning_rate": 4.600652985074627e-05, + "loss": 0.0044, + "step": 34248 + }, + { + "epoch": 31.95, + "learning_rate": 4.600606343283582e-05, + "loss": 0.0002, + "step": 34252 + }, + { + "epoch": 31.96, + "learning_rate": 4.6005597014925375e-05, + "loss": 0.0004, + "step": 34256 + }, + { + "epoch": 31.96, + "learning_rate": 4.600513059701493e-05, + "loss": 0.0011, + "step": 34260 + }, + { + "epoch": 31.96, + "learning_rate": 4.600466417910448e-05, + "loss": 0.0005, + "step": 34264 + }, + { + "epoch": 31.97, + "learning_rate": 4.600419776119403e-05, + "loss": 0.0033, + "step": 34268 + }, + { + "epoch": 31.97, + "learning_rate": 4.600373134328359e-05, + "loss": 0.0001, + "step": 34272 + }, + { + "epoch": 31.97, + "learning_rate": 4.6003264925373136e-05, + "loss": 0.0001, + "step": 34276 + }, + { + "epoch": 31.98, + "learning_rate": 4.600279850746269e-05, + "loss": 0.0001, + "step": 34280 + }, + { + "epoch": 31.98, + "learning_rate": 4.600233208955224e-05, + "loss": 0.0, + "step": 34284 + }, + { + "epoch": 31.98, + "learning_rate": 4.6001865671641794e-05, + "loss": 0.0001, + "step": 34288 + }, + { + "epoch": 31.99, + "learning_rate": 4.600139925373135e-05, + "loss": 0.0015, + "step": 34292 + }, + { + "epoch": 31.99, + "learning_rate": 4.60009328358209e-05, + "loss": 0.0002, + "step": 34296 + }, + { + "epoch": 32.0, + "learning_rate": 4.6000466417910445e-05, + "loss": 0.0002, + "step": 34300 + }, + { + "epoch": 32.0, + "learning_rate": 4.600000000000001e-05, + "loss": 0.001, + "step": 34304 + }, + { + "epoch": 32.0, + "learning_rate": 4.5999533582089555e-05, + "loss": 0.0001, + "step": 34308 + }, + { + "epoch": 32.01, + "learning_rate": 4.59990671641791e-05, + "loss": 0.0013, + "step": 34312 + }, + { + "epoch": 32.01, + "learning_rate": 4.599860074626866e-05, + "loss": 0.0017, + "step": 34316 + }, + { + "epoch": 32.01, + "learning_rate": 4.599813432835821e-05, + "loss": 0.0014, + "step": 34320 + }, + { + "epoch": 32.02, + "learning_rate": 4.599766791044776e-05, + "loss": 0.0001, + "step": 34324 + }, + { + "epoch": 32.02, + "learning_rate": 4.5997201492537316e-05, + "loss": 0.0007, + "step": 34328 + }, + { + "epoch": 32.03, + "learning_rate": 4.599673507462687e-05, + "loss": 0.0001, + "step": 34332 + }, + { + "epoch": 32.03, + "learning_rate": 4.599626865671642e-05, + "loss": 0.0001, + "step": 34336 + }, + { + "epoch": 32.03, + "learning_rate": 4.5995802238805974e-05, + "loss": 0.0001, + "step": 34340 + }, + { + "epoch": 32.04, + "learning_rate": 4.599533582089552e-05, + "loss": 0.0001, + "step": 34344 + }, + { + "epoch": 32.04, + "learning_rate": 4.599486940298508e-05, + "loss": 0.0, + "step": 34348 + }, + { + "epoch": 32.04, + "learning_rate": 4.599440298507463e-05, + "loss": 0.0, + "step": 34352 + }, + { + "epoch": 32.05, + "learning_rate": 4.599393656716418e-05, + "loss": 0.0, + "step": 34356 + }, + { + "epoch": 32.05, + "learning_rate": 4.599347014925373e-05, + "loss": 0.0003, + "step": 34360 + }, + { + "epoch": 32.06, + "learning_rate": 4.599300373134329e-05, + "loss": 0.0, + "step": 34364 + }, + { + "epoch": 32.06, + "learning_rate": 4.599253731343284e-05, + "loss": 0.0004, + "step": 34368 + }, + { + "epoch": 32.06, + "learning_rate": 4.5992070895522386e-05, + "loss": 0.0, + "step": 34372 + }, + { + "epoch": 32.07, + "learning_rate": 4.599160447761194e-05, + "loss": 0.0001, + "step": 34376 + }, + { + "epoch": 32.07, + "learning_rate": 4.5991138059701496e-05, + "loss": 0.0001, + "step": 34380 + }, + { + "epoch": 32.07, + "learning_rate": 4.5990671641791044e-05, + "loss": 0.0128, + "step": 34384 + }, + { + "epoch": 32.08, + "learning_rate": 4.59902052238806e-05, + "loss": 0.0002, + "step": 34388 + }, + { + "epoch": 32.08, + "learning_rate": 4.5989738805970154e-05, + "loss": 0.0038, + "step": 34392 + }, + { + "epoch": 32.09, + "learning_rate": 4.59892723880597e-05, + "loss": 0.0015, + "step": 34396 + }, + { + "epoch": 32.09, + "learning_rate": 4.598880597014926e-05, + "loss": 0.0, + "step": 34400 + }, + { + "epoch": 32.09, + "learning_rate": 4.5988339552238805e-05, + "loss": 0.0067, + "step": 34404 + }, + { + "epoch": 32.1, + "learning_rate": 4.598787313432836e-05, + "loss": 0.0001, + "step": 34408 + }, + { + "epoch": 32.1, + "learning_rate": 4.5987406716417915e-05, + "loss": 0.0, + "step": 34412 + }, + { + "epoch": 32.1, + "learning_rate": 4.598694029850746e-05, + "loss": 0.0002, + "step": 34416 + }, + { + "epoch": 32.11, + "learning_rate": 4.598647388059702e-05, + "loss": 0.0, + "step": 34420 + }, + { + "epoch": 32.11, + "learning_rate": 4.598600746268657e-05, + "loss": 0.0002, + "step": 34424 + }, + { + "epoch": 32.12, + "learning_rate": 4.598554104477612e-05, + "loss": 0.0001, + "step": 34428 + }, + { + "epoch": 32.12, + "learning_rate": 4.5985074626865676e-05, + "loss": 0.0, + "step": 34432 + }, + { + "epoch": 32.12, + "learning_rate": 4.5984608208955224e-05, + "loss": 0.0, + "step": 34436 + }, + { + "epoch": 32.13, + "learning_rate": 4.598414179104478e-05, + "loss": 0.0003, + "step": 34440 + }, + { + "epoch": 32.13, + "learning_rate": 4.5983675373134334e-05, + "loss": 0.0, + "step": 34444 + }, + { + "epoch": 32.13, + "learning_rate": 4.598320895522388e-05, + "loss": 0.0, + "step": 34448 + }, + { + "epoch": 32.14, + "learning_rate": 4.598274253731344e-05, + "loss": 0.0001, + "step": 34452 + }, + { + "epoch": 32.14, + "learning_rate": 4.598227611940299e-05, + "loss": 0.0005, + "step": 34456 + }, + { + "epoch": 32.15, + "learning_rate": 4.598180970149254e-05, + "loss": 0.0066, + "step": 34460 + }, + { + "epoch": 32.15, + "learning_rate": 4.598134328358209e-05, + "loss": 0.0007, + "step": 34464 + }, + { + "epoch": 32.15, + "learning_rate": 4.598087686567164e-05, + "loss": 0.0022, + "step": 34468 + }, + { + "epoch": 32.16, + "learning_rate": 4.59804104477612e-05, + "loss": 0.0, + "step": 34472 + }, + { + "epoch": 32.16, + "learning_rate": 4.5979944029850746e-05, + "loss": 0.001, + "step": 34476 + }, + { + "epoch": 32.16, + "learning_rate": 4.59794776119403e-05, + "loss": 0.0, + "step": 34480 + }, + { + "epoch": 32.17, + "learning_rate": 4.5979011194029856e-05, + "loss": 0.0001, + "step": 34484 + }, + { + "epoch": 32.17, + "learning_rate": 4.5978544776119404e-05, + "loss": 0.0052, + "step": 34488 + }, + { + "epoch": 32.18, + "learning_rate": 4.597807835820896e-05, + "loss": 0.0, + "step": 34492 + }, + { + "epoch": 32.18, + "learning_rate": 4.597761194029851e-05, + "loss": 0.0, + "step": 34496 + }, + { + "epoch": 32.18, + "learning_rate": 4.597714552238806e-05, + "loss": 0.0005, + "step": 34500 + }, + { + "epoch": 32.18, + "eval_exact_match": 0.7282398452611218, + "eval_exec": 0.7495164410058027, + "eval_loss": 0.42082998156547546, + "eval_runtime": 1063.8322, + "eval_samples_per_second": 0.972, + "step": 34500 + }, + { + "epoch": 32.19, + "learning_rate": 4.597667910447762e-05, + "loss": 0.0003, + "step": 34504 + }, + { + "epoch": 32.19, + "learning_rate": 4.5976212686567165e-05, + "loss": 0.0007, + "step": 34508 + }, + { + "epoch": 32.19, + "learning_rate": 4.597574626865672e-05, + "loss": 0.0003, + "step": 34512 + }, + { + "epoch": 32.2, + "learning_rate": 4.5975279850746275e-05, + "loss": 0.0014, + "step": 34516 + }, + { + "epoch": 32.2, + "learning_rate": 4.597481343283582e-05, + "loss": 0.0015, + "step": 34520 + }, + { + "epoch": 32.21, + "learning_rate": 4.597434701492537e-05, + "loss": 0.0, + "step": 34524 + }, + { + "epoch": 32.21, + "learning_rate": 4.5973880597014926e-05, + "loss": 0.0003, + "step": 34528 + }, + { + "epoch": 32.21, + "learning_rate": 4.597341417910448e-05, + "loss": 0.0005, + "step": 34532 + }, + { + "epoch": 32.22, + "learning_rate": 4.597294776119403e-05, + "loss": 0.0, + "step": 34536 + }, + { + "epoch": 32.22, + "learning_rate": 4.5972481343283584e-05, + "loss": 0.0002, + "step": 34540 + }, + { + "epoch": 32.22, + "learning_rate": 4.597201492537314e-05, + "loss": 0.0, + "step": 34544 + }, + { + "epoch": 32.23, + "learning_rate": 4.597154850746269e-05, + "loss": 0.0001, + "step": 34548 + }, + { + "epoch": 32.23, + "learning_rate": 4.597108208955224e-05, + "loss": 0.0026, + "step": 34552 + }, + { + "epoch": 32.24, + "learning_rate": 4.597061567164179e-05, + "loss": 0.0001, + "step": 34556 + }, + { + "epoch": 32.24, + "learning_rate": 4.5970149253731345e-05, + "loss": 0.0009, + "step": 34560 + }, + { + "epoch": 32.24, + "learning_rate": 4.59696828358209e-05, + "loss": 0.0001, + "step": 34564 + }, + { + "epoch": 32.25, + "learning_rate": 4.596921641791045e-05, + "loss": 0.008, + "step": 34568 + }, + { + "epoch": 32.25, + "learning_rate": 4.596875e-05, + "loss": 0.0003, + "step": 34572 + }, + { + "epoch": 32.25, + "learning_rate": 4.596828358208956e-05, + "loss": 0.0008, + "step": 34576 + }, + { + "epoch": 32.26, + "learning_rate": 4.5967817164179106e-05, + "loss": 0.0063, + "step": 34580 + }, + { + "epoch": 32.26, + "learning_rate": 4.596735074626866e-05, + "loss": 0.0002, + "step": 34584 + }, + { + "epoch": 32.26, + "learning_rate": 4.596688432835821e-05, + "loss": 0.0002, + "step": 34588 + }, + { + "epoch": 32.27, + "learning_rate": 4.5966417910447764e-05, + "loss": 0.0001, + "step": 34592 + }, + { + "epoch": 32.27, + "learning_rate": 4.596595149253732e-05, + "loss": 0.0008, + "step": 34596 + }, + { + "epoch": 32.28, + "learning_rate": 4.596548507462687e-05, + "loss": 0.0, + "step": 34600 + }, + { + "epoch": 32.28, + "learning_rate": 4.596501865671642e-05, + "loss": 0.0031, + "step": 34604 + }, + { + "epoch": 32.28, + "learning_rate": 4.5964552238805977e-05, + "loss": 0.0001, + "step": 34608 + }, + { + "epoch": 32.29, + "learning_rate": 4.5964085820895525e-05, + "loss": 0.0, + "step": 34612 + }, + { + "epoch": 32.29, + "learning_rate": 4.596361940298507e-05, + "loss": 0.0001, + "step": 34616 + }, + { + "epoch": 32.29, + "learning_rate": 4.5963152985074634e-05, + "loss": 0.0011, + "step": 34620 + }, + { + "epoch": 32.3, + "learning_rate": 4.596268656716418e-05, + "loss": 0.0011, + "step": 34624 + }, + { + "epoch": 32.3, + "learning_rate": 4.596222014925373e-05, + "loss": 0.0004, + "step": 34628 + }, + { + "epoch": 32.31, + "learning_rate": 4.5961753731343286e-05, + "loss": 0.0001, + "step": 34632 + }, + { + "epoch": 32.31, + "learning_rate": 4.596128731343284e-05, + "loss": 0.0006, + "step": 34636 + }, + { + "epoch": 32.31, + "learning_rate": 4.596082089552239e-05, + "loss": 0.0007, + "step": 34640 + }, + { + "epoch": 32.32, + "learning_rate": 4.5960354477611944e-05, + "loss": 0.0005, + "step": 34644 + }, + { + "epoch": 32.32, + "learning_rate": 4.595988805970149e-05, + "loss": 0.0015, + "step": 34648 + }, + { + "epoch": 32.32, + "learning_rate": 4.5959421641791047e-05, + "loss": 0.0017, + "step": 34652 + }, + { + "epoch": 32.33, + "learning_rate": 4.59589552238806e-05, + "loss": 0.0, + "step": 34656 + }, + { + "epoch": 32.33, + "learning_rate": 4.595848880597015e-05, + "loss": 0.0, + "step": 34660 + }, + { + "epoch": 32.34, + "learning_rate": 4.5958022388059705e-05, + "loss": 0.0, + "step": 34664 + }, + { + "epoch": 32.34, + "learning_rate": 4.595755597014926e-05, + "loss": 0.0001, + "step": 34668 + }, + { + "epoch": 32.34, + "learning_rate": 4.595708955223881e-05, + "loss": 0.0005, + "step": 34672 + }, + { + "epoch": 32.35, + "learning_rate": 4.5956623134328356e-05, + "loss": 0.0001, + "step": 34676 + }, + { + "epoch": 32.35, + "learning_rate": 4.595615671641792e-05, + "loss": 0.0001, + "step": 34680 + }, + { + "epoch": 32.35, + "learning_rate": 4.5955690298507466e-05, + "loss": 0.0005, + "step": 34684 + }, + { + "epoch": 32.36, + "learning_rate": 4.5955223880597014e-05, + "loss": 0.0002, + "step": 34688 + }, + { + "epoch": 32.36, + "learning_rate": 4.595475746268657e-05, + "loss": 0.0002, + "step": 34692 + }, + { + "epoch": 32.37, + "learning_rate": 4.5954291044776123e-05, + "loss": 0.0003, + "step": 34696 + }, + { + "epoch": 32.37, + "learning_rate": 4.595382462686567e-05, + "loss": 0.0009, + "step": 34700 + }, + { + "epoch": 32.37, + "learning_rate": 4.5953358208955227e-05, + "loss": 0.0001, + "step": 34704 + }, + { + "epoch": 32.38, + "learning_rate": 4.5952891791044775e-05, + "loss": 0.0001, + "step": 34708 + }, + { + "epoch": 32.38, + "learning_rate": 4.595242537313433e-05, + "loss": 0.0015, + "step": 34712 + }, + { + "epoch": 32.38, + "learning_rate": 4.5951958955223884e-05, + "loss": 0.0001, + "step": 34716 + }, + { + "epoch": 32.39, + "learning_rate": 4.595149253731343e-05, + "loss": 0.0001, + "step": 34720 + }, + { + "epoch": 32.39, + "learning_rate": 4.595102611940299e-05, + "loss": 0.0005, + "step": 34724 + }, + { + "epoch": 32.4, + "learning_rate": 4.595055970149254e-05, + "loss": 0.0, + "step": 34728 + }, + { + "epoch": 32.4, + "learning_rate": 4.595009328358209e-05, + "loss": 0.0002, + "step": 34732 + }, + { + "epoch": 32.4, + "learning_rate": 4.5949626865671645e-05, + "loss": 0.0, + "step": 34736 + }, + { + "epoch": 32.41, + "learning_rate": 4.59491604477612e-05, + "loss": 0.0, + "step": 34740 + }, + { + "epoch": 32.41, + "learning_rate": 4.594869402985075e-05, + "loss": 0.0, + "step": 34744 + }, + { + "epoch": 32.41, + "learning_rate": 4.5948227611940303e-05, + "loss": 0.0006, + "step": 34748 + }, + { + "epoch": 32.42, + "learning_rate": 4.594776119402985e-05, + "loss": 0.0009, + "step": 34752 + }, + { + "epoch": 32.42, + "learning_rate": 4.5947294776119406e-05, + "loss": 0.0001, + "step": 34756 + }, + { + "epoch": 32.43, + "learning_rate": 4.594682835820896e-05, + "loss": 0.0004, + "step": 34760 + }, + { + "epoch": 32.43, + "learning_rate": 4.594636194029851e-05, + "loss": 0.002, + "step": 34764 + }, + { + "epoch": 32.43, + "learning_rate": 4.594589552238806e-05, + "loss": 0.0014, + "step": 34768 + }, + { + "epoch": 32.44, + "learning_rate": 4.594542910447762e-05, + "loss": 0.0021, + "step": 34772 + }, + { + "epoch": 32.44, + "learning_rate": 4.594496268656717e-05, + "loss": 0.0005, + "step": 34776 + }, + { + "epoch": 32.44, + "learning_rate": 4.5944496268656716e-05, + "loss": 0.0, + "step": 34780 + }, + { + "epoch": 32.45, + "learning_rate": 4.594402985074627e-05, + "loss": 0.0006, + "step": 34784 + }, + { + "epoch": 32.45, + "learning_rate": 4.5943563432835825e-05, + "loss": 0.0, + "step": 34788 + }, + { + "epoch": 32.46, + "learning_rate": 4.5943097014925374e-05, + "loss": 0.0002, + "step": 34792 + }, + { + "epoch": 32.46, + "learning_rate": 4.594263059701493e-05, + "loss": 0.0047, + "step": 34796 + }, + { + "epoch": 32.46, + "learning_rate": 4.5942164179104477e-05, + "loss": 0.0005, + "step": 34800 + }, + { + "epoch": 32.47, + "learning_rate": 4.594169776119403e-05, + "loss": 0.0003, + "step": 34804 + }, + { + "epoch": 32.47, + "learning_rate": 4.5941231343283586e-05, + "loss": 0.0183, + "step": 34808 + }, + { + "epoch": 32.47, + "learning_rate": 4.5940764925373134e-05, + "loss": 0.0, + "step": 34812 + }, + { + "epoch": 32.48, + "learning_rate": 4.594029850746269e-05, + "loss": 0.0005, + "step": 34816 + }, + { + "epoch": 32.48, + "learning_rate": 4.5939832089552244e-05, + "loss": 0.0013, + "step": 34820 + }, + { + "epoch": 32.49, + "learning_rate": 4.593936567164179e-05, + "loss": 0.0042, + "step": 34824 + }, + { + "epoch": 32.49, + "learning_rate": 4.593889925373134e-05, + "loss": 0.0003, + "step": 34828 + }, + { + "epoch": 32.49, + "learning_rate": 4.59384328358209e-05, + "loss": 0.0, + "step": 34832 + }, + { + "epoch": 32.5, + "learning_rate": 4.593796641791045e-05, + "loss": 0.0001, + "step": 34836 + }, + { + "epoch": 32.5, + "learning_rate": 4.59375e-05, + "loss": 0.0001, + "step": 34840 + }, + { + "epoch": 32.5, + "learning_rate": 4.5937033582089553e-05, + "loss": 0.0001, + "step": 34844 + }, + { + "epoch": 32.51, + "learning_rate": 4.593656716417911e-05, + "loss": 0.0002, + "step": 34848 + }, + { + "epoch": 32.51, + "learning_rate": 4.5936100746268656e-05, + "loss": 0.0001, + "step": 34852 + }, + { + "epoch": 32.51, + "learning_rate": 4.593563432835821e-05, + "loss": 0.0, + "step": 34856 + }, + { + "epoch": 32.52, + "learning_rate": 4.593516791044776e-05, + "loss": 0.0035, + "step": 34860 + }, + { + "epoch": 32.52, + "learning_rate": 4.5934701492537314e-05, + "loss": 0.0043, + "step": 34864 + }, + { + "epoch": 32.53, + "learning_rate": 4.593423507462687e-05, + "loss": 0.0, + "step": 34868 + }, + { + "epoch": 32.53, + "learning_rate": 4.593376865671642e-05, + "loss": 0.0001, + "step": 34872 + }, + { + "epoch": 32.53, + "learning_rate": 4.593330223880597e-05, + "loss": 0.0007, + "step": 34876 + }, + { + "epoch": 32.54, + "learning_rate": 4.593283582089553e-05, + "loss": 0.0002, + "step": 34880 + }, + { + "epoch": 32.54, + "learning_rate": 4.5932369402985075e-05, + "loss": 0.0004, + "step": 34884 + }, + { + "epoch": 32.54, + "learning_rate": 4.5931902985074624e-05, + "loss": 0.0, + "step": 34888 + }, + { + "epoch": 32.55, + "learning_rate": 4.5931436567164185e-05, + "loss": 0.0021, + "step": 34892 + }, + { + "epoch": 32.55, + "learning_rate": 4.593097014925373e-05, + "loss": 0.0, + "step": 34896 + }, + { + "epoch": 32.56, + "learning_rate": 4.593050373134329e-05, + "loss": 0.0002, + "step": 34900 + }, + { + "epoch": 32.56, + "learning_rate": 4.5930037313432836e-05, + "loss": 0.0002, + "step": 34904 + }, + { + "epoch": 32.56, + "learning_rate": 4.592957089552239e-05, + "loss": 0.0001, + "step": 34908 + }, + { + "epoch": 32.57, + "learning_rate": 4.5929104477611946e-05, + "loss": 0.0036, + "step": 34912 + }, + { + "epoch": 32.57, + "learning_rate": 4.5928638059701494e-05, + "loss": 0.0019, + "step": 34916 + }, + { + "epoch": 32.57, + "learning_rate": 4.592817164179104e-05, + "loss": 0.0009, + "step": 34920 + }, + { + "epoch": 32.58, + "learning_rate": 4.5927705223880604e-05, + "loss": 0.0015, + "step": 34924 + }, + { + "epoch": 32.58, + "learning_rate": 4.592723880597015e-05, + "loss": 0.0002, + "step": 34928 + }, + { + "epoch": 32.59, + "learning_rate": 4.59267723880597e-05, + "loss": 0.0, + "step": 34932 + }, + { + "epoch": 32.59, + "learning_rate": 4.5926305970149255e-05, + "loss": 0.0001, + "step": 34936 + }, + { + "epoch": 32.59, + "learning_rate": 4.592583955223881e-05, + "loss": 0.0014, + "step": 34940 + }, + { + "epoch": 32.6, + "learning_rate": 4.592537313432836e-05, + "loss": 0.0001, + "step": 34944 + }, + { + "epoch": 32.6, + "learning_rate": 4.592490671641791e-05, + "loss": 0.0001, + "step": 34948 + }, + { + "epoch": 32.6, + "learning_rate": 4.592444029850747e-05, + "loss": 0.0002, + "step": 34952 + }, + { + "epoch": 32.61, + "learning_rate": 4.5923973880597016e-05, + "loss": 0.0001, + "step": 34956 + }, + { + "epoch": 32.61, + "learning_rate": 4.592350746268657e-05, + "loss": 0.001, + "step": 34960 + }, + { + "epoch": 32.62, + "learning_rate": 4.592304104477612e-05, + "loss": 0.0002, + "step": 34964 + }, + { + "epoch": 32.62, + "learning_rate": 4.5922574626865674e-05, + "loss": 0.0014, + "step": 34968 + }, + { + "epoch": 32.62, + "learning_rate": 4.592210820895523e-05, + "loss": 0.0, + "step": 34972 + }, + { + "epoch": 32.63, + "learning_rate": 4.592164179104478e-05, + "loss": 0.0013, + "step": 34976 + }, + { + "epoch": 32.63, + "learning_rate": 4.5921175373134325e-05, + "loss": 0.0001, + "step": 34980 + }, + { + "epoch": 32.63, + "learning_rate": 4.592070895522389e-05, + "loss": 0.0029, + "step": 34984 + }, + { + "epoch": 32.64, + "learning_rate": 4.5920242537313435e-05, + "loss": 0.0, + "step": 34988 + }, + { + "epoch": 32.64, + "learning_rate": 4.591977611940298e-05, + "loss": 0.0009, + "step": 34992 + }, + { + "epoch": 32.65, + "learning_rate": 4.591930970149254e-05, + "loss": 0.0, + "step": 34996 + }, + { + "epoch": 32.65, + "learning_rate": 4.591884328358209e-05, + "loss": 0.0, + "step": 35000 + }, + { + "epoch": 32.65, + "eval_exact_match": 0.7350096711798839, + "eval_exec": 0.7543520309477756, + "eval_loss": 0.41416093707084656, + "eval_runtime": 1061.3828, + "eval_samples_per_second": 0.974, + "step": 35000 + }, + { + "epoch": 32.65, + "learning_rate": 4.591837686567164e-05, + "loss": 0.0063, + "step": 35004 + }, + { + "epoch": 32.66, + "learning_rate": 4.5917910447761196e-05, + "loss": 0.0001, + "step": 35008 + }, + { + "epoch": 32.66, + "learning_rate": 4.591744402985075e-05, + "loss": 0.0001, + "step": 35012 + }, + { + "epoch": 32.66, + "learning_rate": 4.59169776119403e-05, + "loss": 0.0014, + "step": 35016 + }, + { + "epoch": 32.67, + "learning_rate": 4.5916511194029854e-05, + "loss": 0.0, + "step": 35020 + }, + { + "epoch": 32.67, + "learning_rate": 4.59160447761194e-05, + "loss": 0.0001, + "step": 35024 + }, + { + "epoch": 32.68, + "learning_rate": 4.591557835820896e-05, + "loss": 0.0005, + "step": 35028 + }, + { + "epoch": 32.68, + "learning_rate": 4.591511194029851e-05, + "loss": 0.0, + "step": 35032 + }, + { + "epoch": 32.68, + "learning_rate": 4.591464552238806e-05, + "loss": 0.0, + "step": 35036 + }, + { + "epoch": 32.69, + "learning_rate": 4.591417910447761e-05, + "loss": 0.0001, + "step": 35040 + }, + { + "epoch": 32.69, + "learning_rate": 4.591371268656717e-05, + "loss": 0.0002, + "step": 35044 + }, + { + "epoch": 32.69, + "learning_rate": 4.591324626865672e-05, + "loss": 0.0001, + "step": 35048 + }, + { + "epoch": 32.7, + "learning_rate": 4.5912779850746266e-05, + "loss": 0.0016, + "step": 35052 + }, + { + "epoch": 32.7, + "learning_rate": 4.591231343283582e-05, + "loss": 0.0025, + "step": 35056 + }, + { + "epoch": 32.71, + "learning_rate": 4.5911847014925376e-05, + "loss": 0.0, + "step": 35060 + }, + { + "epoch": 32.71, + "learning_rate": 4.591138059701493e-05, + "loss": 0.0001, + "step": 35064 + }, + { + "epoch": 32.71, + "learning_rate": 4.591091417910448e-05, + "loss": 0.0001, + "step": 35068 + }, + { + "epoch": 32.72, + "learning_rate": 4.5910447761194034e-05, + "loss": 0.0002, + "step": 35072 + }, + { + "epoch": 32.72, + "learning_rate": 4.590998134328359e-05, + "loss": 0.0001, + "step": 35076 + }, + { + "epoch": 32.72, + "learning_rate": 4.590951492537314e-05, + "loss": 0.0014, + "step": 35080 + }, + { + "epoch": 32.73, + "learning_rate": 4.5909048507462685e-05, + "loss": 0.0007, + "step": 35084 + }, + { + "epoch": 32.73, + "learning_rate": 4.590858208955224e-05, + "loss": 0.0001, + "step": 35088 + }, + { + "epoch": 32.73, + "learning_rate": 4.5908115671641795e-05, + "loss": 0.0001, + "step": 35092 + }, + { + "epoch": 32.74, + "learning_rate": 4.590764925373134e-05, + "loss": 0.0001, + "step": 35096 + }, + { + "epoch": 32.74, + "learning_rate": 4.59071828358209e-05, + "loss": 0.0, + "step": 35100 + }, + { + "epoch": 32.75, + "learning_rate": 4.590671641791045e-05, + "loss": 0.0, + "step": 35104 + }, + { + "epoch": 32.75, + "learning_rate": 4.590625e-05, + "loss": 0.0, + "step": 35108 + }, + { + "epoch": 32.75, + "learning_rate": 4.5905783582089556e-05, + "loss": 0.0004, + "step": 35112 + }, + { + "epoch": 32.76, + "learning_rate": 4.5905317164179104e-05, + "loss": 0.0, + "step": 35116 + }, + { + "epoch": 32.76, + "learning_rate": 4.590485074626866e-05, + "loss": 0.0, + "step": 35120 + }, + { + "epoch": 32.76, + "learning_rate": 4.5904384328358214e-05, + "loss": 0.0003, + "step": 35124 + }, + { + "epoch": 32.77, + "learning_rate": 4.590391791044776e-05, + "loss": 0.0003, + "step": 35128 + }, + { + "epoch": 32.77, + "learning_rate": 4.590345149253732e-05, + "loss": 0.0002, + "step": 35132 + }, + { + "epoch": 32.78, + "learning_rate": 4.590298507462687e-05, + "loss": 0.0001, + "step": 35136 + }, + { + "epoch": 32.78, + "learning_rate": 4.590251865671642e-05, + "loss": 0.0014, + "step": 35140 + }, + { + "epoch": 32.78, + "learning_rate": 4.590205223880597e-05, + "loss": 0.0, + "step": 35144 + }, + { + "epoch": 32.79, + "learning_rate": 4.590158582089552e-05, + "loss": 0.0, + "step": 35148 + }, + { + "epoch": 32.79, + "learning_rate": 4.590111940298508e-05, + "loss": 0.0005, + "step": 35152 + }, + { + "epoch": 32.79, + "learning_rate": 4.5900652985074626e-05, + "loss": 0.0022, + "step": 35156 + }, + { + "epoch": 32.8, + "learning_rate": 4.590018656716418e-05, + "loss": 0.0002, + "step": 35160 + }, + { + "epoch": 32.8, + "learning_rate": 4.5899720149253736e-05, + "loss": 0.0002, + "step": 35164 + }, + { + "epoch": 32.81, + "learning_rate": 4.5899253731343284e-05, + "loss": 0.0079, + "step": 35168 + }, + { + "epoch": 32.81, + "learning_rate": 4.589878731343284e-05, + "loss": 0.0002, + "step": 35172 + }, + { + "epoch": 32.81, + "learning_rate": 4.589832089552239e-05, + "loss": 0.0001, + "step": 35176 + }, + { + "epoch": 32.82, + "learning_rate": 4.589785447761194e-05, + "loss": 0.0002, + "step": 35180 + }, + { + "epoch": 32.82, + "learning_rate": 4.58973880597015e-05, + "loss": 0.0001, + "step": 35184 + }, + { + "epoch": 32.82, + "learning_rate": 4.5896921641791045e-05, + "loss": 0.0001, + "step": 35188 + }, + { + "epoch": 32.83, + "learning_rate": 4.58964552238806e-05, + "loss": 0.0, + "step": 35192 + }, + { + "epoch": 32.83, + "learning_rate": 4.5895988805970155e-05, + "loss": 0.0001, + "step": 35196 + }, + { + "epoch": 32.84, + "learning_rate": 4.58955223880597e-05, + "loss": 0.0, + "step": 35200 + }, + { + "epoch": 32.84, + "learning_rate": 4.589505597014925e-05, + "loss": 0.0008, + "step": 35204 + }, + { + "epoch": 32.84, + "learning_rate": 4.5894589552238806e-05, + "loss": 0.0003, + "step": 35208 + }, + { + "epoch": 32.85, + "learning_rate": 4.589412313432836e-05, + "loss": 0.0003, + "step": 35212 + }, + { + "epoch": 32.85, + "learning_rate": 4.5893656716417916e-05, + "loss": 0.0001, + "step": 35216 + }, + { + "epoch": 32.85, + "learning_rate": 4.5893190298507464e-05, + "loss": 0.0, + "step": 35220 + }, + { + "epoch": 32.86, + "learning_rate": 4.589272388059702e-05, + "loss": 0.0, + "step": 35224 + }, + { + "epoch": 32.86, + "learning_rate": 4.5892257462686574e-05, + "loss": 0.0002, + "step": 35228 + }, + { + "epoch": 32.87, + "learning_rate": 4.589179104477612e-05, + "loss": 0.0002, + "step": 35232 + }, + { + "epoch": 32.87, + "learning_rate": 4.589132462686567e-05, + "loss": 0.0, + "step": 35236 + }, + { + "epoch": 32.87, + "learning_rate": 4.589085820895523e-05, + "loss": 0.0014, + "step": 35240 + }, + { + "epoch": 32.88, + "learning_rate": 4.589039179104478e-05, + "loss": 0.0002, + "step": 35244 + }, + { + "epoch": 32.88, + "learning_rate": 4.588992537313433e-05, + "loss": 0.0006, + "step": 35248 + }, + { + "epoch": 32.88, + "learning_rate": 4.588945895522388e-05, + "loss": 0.0001, + "step": 35252 + }, + { + "epoch": 32.89, + "learning_rate": 4.588899253731344e-05, + "loss": 0.0019, + "step": 35256 + }, + { + "epoch": 32.89, + "learning_rate": 4.5888526119402986e-05, + "loss": 0.0007, + "step": 35260 + }, + { + "epoch": 32.9, + "learning_rate": 4.588805970149254e-05, + "loss": 0.0, + "step": 35264 + }, + { + "epoch": 32.9, + "learning_rate": 4.588759328358209e-05, + "loss": 0.0004, + "step": 35268 + }, + { + "epoch": 32.9, + "learning_rate": 4.5887126865671644e-05, + "loss": 0.0, + "step": 35272 + }, + { + "epoch": 32.91, + "learning_rate": 4.58866604477612e-05, + "loss": 0.0009, + "step": 35276 + }, + { + "epoch": 32.91, + "learning_rate": 4.588619402985075e-05, + "loss": 0.0001, + "step": 35280 + }, + { + "epoch": 32.91, + "learning_rate": 4.58857276119403e-05, + "loss": 0.0014, + "step": 35284 + }, + { + "epoch": 32.92, + "learning_rate": 4.588526119402986e-05, + "loss": 0.0025, + "step": 35288 + }, + { + "epoch": 32.92, + "learning_rate": 4.5884794776119405e-05, + "loss": 0.0001, + "step": 35292 + }, + { + "epoch": 32.93, + "learning_rate": 4.588432835820895e-05, + "loss": 0.0064, + "step": 35296 + }, + { + "epoch": 32.93, + "learning_rate": 4.5883861940298515e-05, + "loss": 0.0001, + "step": 35300 + }, + { + "epoch": 32.93, + "learning_rate": 4.588339552238806e-05, + "loss": 0.0006, + "step": 35304 + }, + { + "epoch": 32.94, + "learning_rate": 4.588292910447761e-05, + "loss": 0.0004, + "step": 35308 + }, + { + "epoch": 32.94, + "learning_rate": 4.5882462686567166e-05, + "loss": 0.0076, + "step": 35312 + }, + { + "epoch": 32.94, + "learning_rate": 4.588199626865672e-05, + "loss": 0.0001, + "step": 35316 + }, + { + "epoch": 32.95, + "learning_rate": 4.588152985074627e-05, + "loss": 0.0001, + "step": 35320 + }, + { + "epoch": 32.95, + "learning_rate": 4.5881063432835824e-05, + "loss": 0.0001, + "step": 35324 + }, + { + "epoch": 32.96, + "learning_rate": 4.588059701492537e-05, + "loss": 0.0, + "step": 35328 + }, + { + "epoch": 32.96, + "learning_rate": 4.588013059701493e-05, + "loss": 0.001, + "step": 35332 + }, + { + "epoch": 32.96, + "learning_rate": 4.587966417910448e-05, + "loss": 0.0001, + "step": 35336 + }, + { + "epoch": 32.97, + "learning_rate": 4.587919776119403e-05, + "loss": 0.0001, + "step": 35340 + }, + { + "epoch": 32.97, + "learning_rate": 4.5878731343283585e-05, + "loss": 0.0001, + "step": 35344 + }, + { + "epoch": 32.97, + "learning_rate": 4.587826492537314e-05, + "loss": 0.0001, + "step": 35348 + }, + { + "epoch": 32.98, + "learning_rate": 4.587779850746269e-05, + "loss": 0.0, + "step": 35352 + }, + { + "epoch": 32.98, + "learning_rate": 4.5877332089552236e-05, + "loss": 0.0001, + "step": 35356 + }, + { + "epoch": 32.98, + "learning_rate": 4.58768656716418e-05, + "loss": 0.0001, + "step": 35360 + }, + { + "epoch": 32.99, + "learning_rate": 4.5876399253731346e-05, + "loss": 0.0, + "step": 35364 + }, + { + "epoch": 32.99, + "learning_rate": 4.5875932835820894e-05, + "loss": 0.0003, + "step": 35368 + }, + { + "epoch": 33.0, + "learning_rate": 4.587546641791045e-05, + "loss": 0.0008, + "step": 35372 + }, + { + "epoch": 33.0, + "learning_rate": 4.5875000000000004e-05, + "loss": 0.0001, + "step": 35376 + }, + { + "epoch": 33.0, + "learning_rate": 4.587453358208956e-05, + "loss": 0.0008, + "step": 35380 + }, + { + "epoch": 33.01, + "learning_rate": 4.587406716417911e-05, + "loss": 0.0006, + "step": 35384 + }, + { + "epoch": 33.01, + "learning_rate": 4.5873600746268655e-05, + "loss": 0.0, + "step": 35388 + }, + { + "epoch": 33.01, + "learning_rate": 4.587313432835822e-05, + "loss": 0.0007, + "step": 35392 + }, + { + "epoch": 33.02, + "learning_rate": 4.5872667910447765e-05, + "loss": 0.0001, + "step": 35396 + }, + { + "epoch": 33.02, + "learning_rate": 4.587220149253731e-05, + "loss": 0.0, + "step": 35400 + }, + { + "epoch": 33.03, + "learning_rate": 4.587173507462687e-05, + "loss": 0.0001, + "step": 35404 + }, + { + "epoch": 33.03, + "learning_rate": 4.587126865671642e-05, + "loss": 0.001, + "step": 35408 + }, + { + "epoch": 33.03, + "learning_rate": 4.587080223880597e-05, + "loss": 0.0012, + "step": 35412 + }, + { + "epoch": 33.04, + "learning_rate": 4.5870335820895526e-05, + "loss": 0.0008, + "step": 35416 + }, + { + "epoch": 33.04, + "learning_rate": 4.586986940298508e-05, + "loss": 0.0001, + "step": 35420 + }, + { + "epoch": 33.04, + "learning_rate": 4.586940298507463e-05, + "loss": 0.0, + "step": 35424 + }, + { + "epoch": 33.05, + "learning_rate": 4.5868936567164184e-05, + "loss": 0.0005, + "step": 35428 + }, + { + "epoch": 33.05, + "learning_rate": 4.586847014925373e-05, + "loss": 0.0, + "step": 35432 + }, + { + "epoch": 33.06, + "learning_rate": 4.586800373134329e-05, + "loss": 0.0001, + "step": 35436 + }, + { + "epoch": 33.06, + "learning_rate": 4.586753731343284e-05, + "loss": 0.0009, + "step": 35440 + }, + { + "epoch": 33.06, + "learning_rate": 4.586707089552239e-05, + "loss": 0.0002, + "step": 35444 + }, + { + "epoch": 33.07, + "learning_rate": 4.586660447761194e-05, + "loss": 0.0005, + "step": 35448 + }, + { + "epoch": 33.07, + "learning_rate": 4.58661380597015e-05, + "loss": 0.0005, + "step": 35452 + }, + { + "epoch": 33.07, + "learning_rate": 4.586567164179105e-05, + "loss": 0.0042, + "step": 35456 + }, + { + "epoch": 33.08, + "learning_rate": 4.5865205223880596e-05, + "loss": 0.0, + "step": 35460 + }, + { + "epoch": 33.08, + "learning_rate": 4.586473880597015e-05, + "loss": 0.0001, + "step": 35464 + }, + { + "epoch": 33.09, + "learning_rate": 4.5864272388059706e-05, + "loss": 0.0064, + "step": 35468 + }, + { + "epoch": 33.09, + "learning_rate": 4.5863805970149254e-05, + "loss": 0.0004, + "step": 35472 + }, + { + "epoch": 33.09, + "learning_rate": 4.586333955223881e-05, + "loss": 0.0001, + "step": 35476 + }, + { + "epoch": 33.1, + "learning_rate": 4.586287313432836e-05, + "loss": 0.0, + "step": 35480 + }, + { + "epoch": 33.1, + "learning_rate": 4.586240671641791e-05, + "loss": 0.0005, + "step": 35484 + }, + { + "epoch": 33.1, + "learning_rate": 4.586194029850747e-05, + "loss": 0.0002, + "step": 35488 + }, + { + "epoch": 33.11, + "learning_rate": 4.5861473880597015e-05, + "loss": 0.0002, + "step": 35492 + }, + { + "epoch": 33.11, + "learning_rate": 4.586100746268657e-05, + "loss": 0.0002, + "step": 35496 + }, + { + "epoch": 33.12, + "learning_rate": 4.5860541044776125e-05, + "loss": 0.0068, + "step": 35500 + }, + { + "epoch": 33.12, + "eval_exact_match": 0.7379110251450677, + "eval_exec": 0.758220502901354, + "eval_loss": 0.4318758547306061, + "eval_runtime": 1370.5932, + "eval_samples_per_second": 0.754, + "step": 35500 + }, + { + "epoch": 33.12, + "learning_rate": 4.586007462686567e-05, + "loss": 0.0, + "step": 35504 + }, + { + "epoch": 33.12, + "learning_rate": 4.585960820895522e-05, + "loss": 0.0004, + "step": 35508 + }, + { + "epoch": 33.13, + "learning_rate": 4.585914179104478e-05, + "loss": 0.0012, + "step": 35512 + }, + { + "epoch": 33.13, + "learning_rate": 4.585867537313433e-05, + "loss": 0.0001, + "step": 35516 + }, + { + "epoch": 33.13, + "learning_rate": 4.585820895522388e-05, + "loss": 0.0006, + "step": 35520 + }, + { + "epoch": 33.14, + "learning_rate": 4.5857742537313434e-05, + "loss": 0.0001, + "step": 35524 + }, + { + "epoch": 33.14, + "learning_rate": 4.585727611940299e-05, + "loss": 0.0, + "step": 35528 + }, + { + "epoch": 33.15, + "learning_rate": 4.585680970149254e-05, + "loss": 0.0002, + "step": 35532 + }, + { + "epoch": 33.15, + "learning_rate": 4.585634328358209e-05, + "loss": 0.0001, + "step": 35536 + }, + { + "epoch": 33.15, + "learning_rate": 4.585587686567164e-05, + "loss": 0.0, + "step": 35540 + }, + { + "epoch": 33.16, + "learning_rate": 4.58554104477612e-05, + "loss": 0.0011, + "step": 35544 + }, + { + "epoch": 33.16, + "learning_rate": 4.585494402985075e-05, + "loss": 0.0001, + "step": 35548 + }, + { + "epoch": 33.16, + "learning_rate": 4.58544776119403e-05, + "loss": 0.0, + "step": 35552 + }, + { + "epoch": 33.17, + "learning_rate": 4.585401119402985e-05, + "loss": 0.0001, + "step": 35556 + }, + { + "epoch": 33.17, + "learning_rate": 4.585354477611941e-05, + "loss": 0.0, + "step": 35560 + }, + { + "epoch": 33.18, + "learning_rate": 4.5853078358208956e-05, + "loss": 0.0, + "step": 35564 + }, + { + "epoch": 33.18, + "learning_rate": 4.585261194029851e-05, + "loss": 0.0001, + "step": 35568 + }, + { + "epoch": 33.18, + "learning_rate": 4.5852145522388065e-05, + "loss": 0.0003, + "step": 35572 + }, + { + "epoch": 33.19, + "learning_rate": 4.5851679104477614e-05, + "loss": 0.0001, + "step": 35576 + }, + { + "epoch": 33.19, + "learning_rate": 4.585121268656717e-05, + "loss": 0.0, + "step": 35580 + }, + { + "epoch": 33.19, + "learning_rate": 4.585074626865672e-05, + "loss": 0.0004, + "step": 35584 + }, + { + "epoch": 33.2, + "learning_rate": 4.585027985074627e-05, + "loss": 0.0002, + "step": 35588 + }, + { + "epoch": 33.2, + "learning_rate": 4.5849813432835826e-05, + "loss": 0.0006, + "step": 35592 + }, + { + "epoch": 33.21, + "learning_rate": 4.5849347014925375e-05, + "loss": 0.0, + "step": 35596 + }, + { + "epoch": 33.21, + "learning_rate": 4.584888059701492e-05, + "loss": 0.0001, + "step": 35600 + }, + { + "epoch": 33.21, + "learning_rate": 4.5848414179104484e-05, + "loss": 0.0003, + "step": 35604 + }, + { + "epoch": 33.22, + "learning_rate": 4.584794776119403e-05, + "loss": 0.0001, + "step": 35608 + }, + { + "epoch": 33.22, + "learning_rate": 4.584748134328358e-05, + "loss": 0.001, + "step": 35612 + }, + { + "epoch": 33.22, + "learning_rate": 4.5847014925373136e-05, + "loss": 0.0, + "step": 35616 + }, + { + "epoch": 33.23, + "learning_rate": 4.584654850746269e-05, + "loss": 0.0016, + "step": 35620 + }, + { + "epoch": 33.23, + "learning_rate": 4.584608208955224e-05, + "loss": 0.0, + "step": 35624 + }, + { + "epoch": 33.24, + "learning_rate": 4.5845615671641794e-05, + "loss": 0.0, + "step": 35628 + }, + { + "epoch": 33.24, + "learning_rate": 4.584514925373135e-05, + "loss": 0.0001, + "step": 35632 + }, + { + "epoch": 33.24, + "learning_rate": 4.5844682835820897e-05, + "loss": 0.0, + "step": 35636 + }, + { + "epoch": 33.25, + "learning_rate": 4.584421641791045e-05, + "loss": 0.0, + "step": 35640 + }, + { + "epoch": 33.25, + "learning_rate": 4.584375e-05, + "loss": 0.0, + "step": 35644 + }, + { + "epoch": 33.25, + "learning_rate": 4.5843283582089555e-05, + "loss": 0.0017, + "step": 35648 + }, + { + "epoch": 33.26, + "learning_rate": 4.584281716417911e-05, + "loss": 0.0, + "step": 35652 + }, + { + "epoch": 33.26, + "learning_rate": 4.584235074626866e-05, + "loss": 0.0001, + "step": 35656 + }, + { + "epoch": 33.26, + "learning_rate": 4.5841884328358206e-05, + "loss": 0.0, + "step": 35660 + }, + { + "epoch": 33.27, + "learning_rate": 4.584141791044777e-05, + "loss": 0.0, + "step": 35664 + }, + { + "epoch": 33.27, + "learning_rate": 4.5840951492537316e-05, + "loss": 0.0, + "step": 35668 + }, + { + "epoch": 33.28, + "learning_rate": 4.5840485074626864e-05, + "loss": 0.0, + "step": 35672 + }, + { + "epoch": 33.28, + "learning_rate": 4.584001865671642e-05, + "loss": 0.0001, + "step": 35676 + }, + { + "epoch": 33.28, + "learning_rate": 4.5839552238805973e-05, + "loss": 0.0001, + "step": 35680 + }, + { + "epoch": 33.29, + "learning_rate": 4.583908582089552e-05, + "loss": 0.0, + "step": 35684 + }, + { + "epoch": 33.29, + "learning_rate": 4.5838619402985077e-05, + "loss": 0.0, + "step": 35688 + }, + { + "epoch": 33.29, + "learning_rate": 4.583815298507463e-05, + "loss": 0.0027, + "step": 35692 + }, + { + "epoch": 33.3, + "learning_rate": 4.583768656716418e-05, + "loss": 0.0003, + "step": 35696 + }, + { + "epoch": 33.3, + "learning_rate": 4.5837220149253734e-05, + "loss": 0.0032, + "step": 35700 + }, + { + "epoch": 33.31, + "learning_rate": 4.583675373134328e-05, + "loss": 0.0007, + "step": 35704 + }, + { + "epoch": 33.31, + "learning_rate": 4.583628731343284e-05, + "loss": 0.0005, + "step": 35708 + }, + { + "epoch": 33.31, + "learning_rate": 4.583582089552239e-05, + "loss": 0.0032, + "step": 35712 + }, + { + "epoch": 33.32, + "learning_rate": 4.583535447761194e-05, + "loss": 0.0002, + "step": 35716 + }, + { + "epoch": 33.32, + "learning_rate": 4.5834888059701495e-05, + "loss": 0.0027, + "step": 35720 + }, + { + "epoch": 33.32, + "learning_rate": 4.583442164179105e-05, + "loss": 0.0004, + "step": 35724 + }, + { + "epoch": 33.33, + "learning_rate": 4.58339552238806e-05, + "loss": 0.0, + "step": 35728 + }, + { + "epoch": 33.33, + "learning_rate": 4.583348880597015e-05, + "loss": 0.0, + "step": 35732 + }, + { + "epoch": 33.34, + "learning_rate": 4.58330223880597e-05, + "loss": 0.0001, + "step": 35736 + }, + { + "epoch": 33.34, + "learning_rate": 4.5832555970149256e-05, + "loss": 0.0, + "step": 35740 + }, + { + "epoch": 33.34, + "learning_rate": 4.583208955223881e-05, + "loss": 0.0023, + "step": 35744 + }, + { + "epoch": 33.35, + "learning_rate": 4.583162313432836e-05, + "loss": 0.0001, + "step": 35748 + }, + { + "epoch": 33.35, + "learning_rate": 4.5831156716417914e-05, + "loss": 0.0145, + "step": 35752 + }, + { + "epoch": 33.35, + "learning_rate": 4.583069029850747e-05, + "loss": 0.0028, + "step": 35756 + }, + { + "epoch": 33.36, + "learning_rate": 4.583022388059702e-05, + "loss": 0.0007, + "step": 35760 + }, + { + "epoch": 33.36, + "learning_rate": 4.5829757462686566e-05, + "loss": 0.0, + "step": 35764 + }, + { + "epoch": 33.37, + "learning_rate": 4.582929104477612e-05, + "loss": 0.0007, + "step": 35768 + }, + { + "epoch": 33.37, + "learning_rate": 4.5828824626865675e-05, + "loss": 0.0005, + "step": 35772 + }, + { + "epoch": 33.37, + "learning_rate": 4.5828358208955223e-05, + "loss": 0.0, + "step": 35776 + }, + { + "epoch": 33.38, + "learning_rate": 4.582789179104478e-05, + "loss": 0.001, + "step": 35780 + }, + { + "epoch": 33.38, + "learning_rate": 4.582742537313433e-05, + "loss": 0.0002, + "step": 35784 + }, + { + "epoch": 33.38, + "learning_rate": 4.582695895522388e-05, + "loss": 0.0001, + "step": 35788 + }, + { + "epoch": 33.39, + "learning_rate": 4.5826492537313436e-05, + "loss": 0.0011, + "step": 35792 + }, + { + "epoch": 33.39, + "learning_rate": 4.5826026119402984e-05, + "loss": 0.0091, + "step": 35796 + }, + { + "epoch": 33.4, + "learning_rate": 4.582555970149254e-05, + "loss": 0.0017, + "step": 35800 + }, + { + "epoch": 33.4, + "learning_rate": 4.5825093283582094e-05, + "loss": 0.0003, + "step": 35804 + }, + { + "epoch": 33.4, + "learning_rate": 4.582462686567164e-05, + "loss": 0.002, + "step": 35808 + }, + { + "epoch": 33.41, + "learning_rate": 4.58241604477612e-05, + "loss": 0.0002, + "step": 35812 + }, + { + "epoch": 33.41, + "learning_rate": 4.582369402985075e-05, + "loss": 0.0001, + "step": 35816 + }, + { + "epoch": 33.41, + "learning_rate": 4.58232276119403e-05, + "loss": 0.0098, + "step": 35820 + }, + { + "epoch": 33.42, + "learning_rate": 4.582276119402985e-05, + "loss": 0.0001, + "step": 35824 + }, + { + "epoch": 33.42, + "learning_rate": 4.5822294776119403e-05, + "loss": 0.0004, + "step": 35828 + }, + { + "epoch": 33.43, + "learning_rate": 4.582182835820896e-05, + "loss": 0.0001, + "step": 35832 + }, + { + "epoch": 33.43, + "learning_rate": 4.5821361940298506e-05, + "loss": 0.0001, + "step": 35836 + }, + { + "epoch": 33.43, + "learning_rate": 4.582089552238806e-05, + "loss": 0.0001, + "step": 35840 + }, + { + "epoch": 33.44, + "learning_rate": 4.5820429104477616e-05, + "loss": 0.0, + "step": 35844 + }, + { + "epoch": 33.44, + "learning_rate": 4.5819962686567164e-05, + "loss": 0.0018, + "step": 35848 + }, + { + "epoch": 33.44, + "learning_rate": 4.581949626865672e-05, + "loss": 0.0005, + "step": 35852 + }, + { + "epoch": 33.45, + "learning_rate": 4.581902985074627e-05, + "loss": 0.0002, + "step": 35856 + }, + { + "epoch": 33.45, + "learning_rate": 4.581856343283582e-05, + "loss": 0.0006, + "step": 35860 + }, + { + "epoch": 33.46, + "learning_rate": 4.581809701492538e-05, + "loss": 0.0001, + "step": 35864 + }, + { + "epoch": 33.46, + "learning_rate": 4.5817630597014925e-05, + "loss": 0.0001, + "step": 35868 + }, + { + "epoch": 33.46, + "learning_rate": 4.581716417910448e-05, + "loss": 0.0001, + "step": 35872 + }, + { + "epoch": 33.47, + "learning_rate": 4.5816697761194035e-05, + "loss": 0.0001, + "step": 35876 + }, + { + "epoch": 33.47, + "learning_rate": 4.581623134328358e-05, + "loss": 0.0007, + "step": 35880 + }, + { + "epoch": 33.47, + "learning_rate": 4.581576492537314e-05, + "loss": 0.0047, + "step": 35884 + }, + { + "epoch": 33.48, + "learning_rate": 4.5815298507462686e-05, + "loss": 0.0004, + "step": 35888 + }, + { + "epoch": 33.48, + "learning_rate": 4.581483208955224e-05, + "loss": 0.0001, + "step": 35892 + }, + { + "epoch": 33.49, + "learning_rate": 4.5814365671641796e-05, + "loss": 0.0003, + "step": 35896 + }, + { + "epoch": 33.49, + "learning_rate": 4.5813899253731344e-05, + "loss": 0.0047, + "step": 35900 + }, + { + "epoch": 33.49, + "learning_rate": 4.58134328358209e-05, + "loss": 0.0012, + "step": 35904 + }, + { + "epoch": 33.5, + "learning_rate": 4.5812966417910454e-05, + "loss": 0.0042, + "step": 35908 + }, + { + "epoch": 33.5, + "learning_rate": 4.58125e-05, + "loss": 0.0, + "step": 35912 + }, + { + "epoch": 33.5, + "learning_rate": 4.581203358208955e-05, + "loss": 0.0256, + "step": 35916 + }, + { + "epoch": 33.51, + "learning_rate": 4.581156716417911e-05, + "loss": 0.0033, + "step": 35920 + }, + { + "epoch": 33.51, + "learning_rate": 4.581110074626866e-05, + "loss": 0.0001, + "step": 35924 + }, + { + "epoch": 33.51, + "learning_rate": 4.581063432835821e-05, + "loss": 0.0015, + "step": 35928 + }, + { + "epoch": 33.52, + "learning_rate": 4.581016791044776e-05, + "loss": 0.0003, + "step": 35932 + }, + { + "epoch": 33.52, + "learning_rate": 4.580970149253732e-05, + "loss": 0.0001, + "step": 35936 + }, + { + "epoch": 33.53, + "learning_rate": 4.5809235074626866e-05, + "loss": 0.0003, + "step": 35940 + }, + { + "epoch": 33.53, + "learning_rate": 4.580876865671642e-05, + "loss": 0.0, + "step": 35944 + }, + { + "epoch": 33.53, + "learning_rate": 4.580830223880597e-05, + "loss": 0.0015, + "step": 35948 + }, + { + "epoch": 33.54, + "learning_rate": 4.5807835820895524e-05, + "loss": 0.0002, + "step": 35952 + }, + { + "epoch": 33.54, + "learning_rate": 4.580736940298508e-05, + "loss": 0.0005, + "step": 35956 + }, + { + "epoch": 33.54, + "learning_rate": 4.580690298507463e-05, + "loss": 0.0001, + "step": 35960 + }, + { + "epoch": 33.55, + "learning_rate": 4.580643656716418e-05, + "loss": 0.0033, + "step": 35964 + }, + { + "epoch": 33.55, + "learning_rate": 4.580597014925374e-05, + "loss": 0.0027, + "step": 35968 + }, + { + "epoch": 33.56, + "learning_rate": 4.5805503731343285e-05, + "loss": 0.0024, + "step": 35972 + }, + { + "epoch": 33.56, + "learning_rate": 4.580503731343283e-05, + "loss": 0.0041, + "step": 35976 + }, + { + "epoch": 33.56, + "learning_rate": 4.5804570895522395e-05, + "loss": 0.0017, + "step": 35980 + }, + { + "epoch": 33.57, + "learning_rate": 4.580410447761194e-05, + "loss": 0.0017, + "step": 35984 + }, + { + "epoch": 33.57, + "learning_rate": 4.580363805970149e-05, + "loss": 0.0001, + "step": 35988 + }, + { + "epoch": 33.57, + "learning_rate": 4.5803171641791046e-05, + "loss": 0.0015, + "step": 35992 + }, + { + "epoch": 33.58, + "learning_rate": 4.58027052238806e-05, + "loss": 0.0001, + "step": 35996 + }, + { + "epoch": 33.58, + "learning_rate": 4.580223880597015e-05, + "loss": 0.0006, + "step": 36000 + }, + { + "epoch": 33.58, + "eval_exact_match": 0.723404255319149, + "eval_exec": 0.7630560928433269, + "eval_loss": 0.4088174104690552, + "eval_runtime": 1192.806, + "eval_samples_per_second": 0.867, + "step": 36000 + }, + { + "epoch": 33.59, + "learning_rate": 4.5801772388059704e-05, + "loss": 0.0, + "step": 36004 + }, + { + "epoch": 33.59, + "learning_rate": 4.580130597014925e-05, + "loss": 0.0005, + "step": 36008 + }, + { + "epoch": 33.59, + "learning_rate": 4.580083955223881e-05, + "loss": 0.0001, + "step": 36012 + }, + { + "epoch": 33.6, + "learning_rate": 4.580037313432836e-05, + "loss": 0.0003, + "step": 36016 + }, + { + "epoch": 33.6, + "learning_rate": 4.579990671641791e-05, + "loss": 0.0, + "step": 36020 + }, + { + "epoch": 33.6, + "learning_rate": 4.5799440298507465e-05, + "loss": 0.0, + "step": 36024 + }, + { + "epoch": 33.61, + "learning_rate": 4.579897388059702e-05, + "loss": 0.0002, + "step": 36028 + }, + { + "epoch": 33.61, + "learning_rate": 4.579850746268657e-05, + "loss": 0.0064, + "step": 36032 + }, + { + "epoch": 33.62, + "learning_rate": 4.579804104477612e-05, + "loss": 0.0014, + "step": 36036 + }, + { + "epoch": 33.62, + "learning_rate": 4.579757462686568e-05, + "loss": 0.0011, + "step": 36040 + }, + { + "epoch": 33.62, + "learning_rate": 4.5797108208955226e-05, + "loss": 0.001, + "step": 36044 + }, + { + "epoch": 33.63, + "learning_rate": 4.579664179104478e-05, + "loss": 0.0271, + "step": 36048 + }, + { + "epoch": 33.63, + "learning_rate": 4.579617537313433e-05, + "loss": 0.0015, + "step": 36052 + }, + { + "epoch": 33.63, + "learning_rate": 4.5795708955223884e-05, + "loss": 0.0014, + "step": 36056 + }, + { + "epoch": 33.64, + "learning_rate": 4.579524253731344e-05, + "loss": 0.0008, + "step": 36060 + }, + { + "epoch": 33.64, + "learning_rate": 4.579477611940299e-05, + "loss": 0.0, + "step": 36064 + }, + { + "epoch": 33.65, + "learning_rate": 4.5794309701492535e-05, + "loss": 0.0001, + "step": 36068 + }, + { + "epoch": 33.65, + "learning_rate": 4.57938432835821e-05, + "loss": 0.0007, + "step": 36072 + }, + { + "epoch": 33.65, + "learning_rate": 4.5793376865671645e-05, + "loss": 0.0004, + "step": 36076 + }, + { + "epoch": 33.66, + "learning_rate": 4.579291044776119e-05, + "loss": 0.0001, + "step": 36080 + }, + { + "epoch": 33.66, + "learning_rate": 4.579244402985075e-05, + "loss": 0.0001, + "step": 36084 + }, + { + "epoch": 33.66, + "learning_rate": 4.57919776119403e-05, + "loss": 0.0002, + "step": 36088 + }, + { + "epoch": 33.67, + "learning_rate": 4.579151119402985e-05, + "loss": 0.0001, + "step": 36092 + }, + { + "epoch": 33.67, + "learning_rate": 4.5791044776119406e-05, + "loss": 0.0001, + "step": 36096 + }, + { + "epoch": 33.68, + "learning_rate": 4.579057835820896e-05, + "loss": 0.0032, + "step": 36100 + }, + { + "epoch": 33.68, + "learning_rate": 4.579011194029851e-05, + "loss": 0.0002, + "step": 36104 + }, + { + "epoch": 33.68, + "learning_rate": 4.5789645522388064e-05, + "loss": 0.0025, + "step": 36108 + }, + { + "epoch": 33.69, + "learning_rate": 4.578917910447761e-05, + "loss": 0.0007, + "step": 36112 + }, + { + "epoch": 33.69, + "learning_rate": 4.578871268656717e-05, + "loss": 0.0001, + "step": 36116 + }, + { + "epoch": 33.69, + "learning_rate": 4.578824626865672e-05, + "loss": 0.0003, + "step": 36120 + }, + { + "epoch": 33.7, + "learning_rate": 4.578777985074627e-05, + "loss": 0.001, + "step": 36124 + }, + { + "epoch": 33.7, + "learning_rate": 4.578731343283582e-05, + "loss": 0.0001, + "step": 36128 + }, + { + "epoch": 33.71, + "learning_rate": 4.578684701492538e-05, + "loss": 0.0, + "step": 36132 + }, + { + "epoch": 33.71, + "learning_rate": 4.578638059701493e-05, + "loss": 0.0002, + "step": 36136 + }, + { + "epoch": 33.71, + "learning_rate": 4.5785914179104476e-05, + "loss": 0.0029, + "step": 36140 + }, + { + "epoch": 33.72, + "learning_rate": 4.578544776119403e-05, + "loss": 0.0002, + "step": 36144 + }, + { + "epoch": 33.72, + "learning_rate": 4.5784981343283586e-05, + "loss": 0.0, + "step": 36148 + }, + { + "epoch": 33.72, + "learning_rate": 4.5784514925373134e-05, + "loss": 0.0, + "step": 36152 + }, + { + "epoch": 33.73, + "learning_rate": 4.578404850746269e-05, + "loss": 0.0066, + "step": 36156 + }, + { + "epoch": 33.73, + "learning_rate": 4.578358208955224e-05, + "loss": 0.0017, + "step": 36160 + }, + { + "epoch": 33.73, + "learning_rate": 4.578311567164179e-05, + "loss": 0.0028, + "step": 36164 + }, + { + "epoch": 33.74, + "learning_rate": 4.578264925373135e-05, + "loss": 0.0022, + "step": 36168 + }, + { + "epoch": 33.74, + "learning_rate": 4.5782182835820895e-05, + "loss": 0.0001, + "step": 36172 + }, + { + "epoch": 33.75, + "learning_rate": 4.578171641791045e-05, + "loss": 0.0001, + "step": 36176 + }, + { + "epoch": 33.75, + "learning_rate": 4.5781250000000005e-05, + "loss": 0.0001, + "step": 36180 + }, + { + "epoch": 33.75, + "learning_rate": 4.578078358208955e-05, + "loss": 0.0003, + "step": 36184 + }, + { + "epoch": 33.76, + "learning_rate": 4.57803171641791e-05, + "loss": 0.0, + "step": 36188 + }, + { + "epoch": 33.76, + "learning_rate": 4.577985074626866e-05, + "loss": 0.0001, + "step": 36192 + }, + { + "epoch": 33.76, + "learning_rate": 4.577938432835821e-05, + "loss": 0.0005, + "step": 36196 + }, + { + "epoch": 33.77, + "learning_rate": 4.5778917910447766e-05, + "loss": 0.0019, + "step": 36200 + }, + { + "epoch": 33.77, + "learning_rate": 4.5778451492537314e-05, + "loss": 0.0, + "step": 36204 + }, + { + "epoch": 33.78, + "learning_rate": 4.577798507462687e-05, + "loss": 0.0004, + "step": 36208 + }, + { + "epoch": 33.78, + "learning_rate": 4.5777518656716424e-05, + "loss": 0.0003, + "step": 36212 + }, + { + "epoch": 33.78, + "learning_rate": 4.577705223880597e-05, + "loss": 0.0003, + "step": 36216 + }, + { + "epoch": 33.79, + "learning_rate": 4.577658582089552e-05, + "loss": 0.0013, + "step": 36220 + }, + { + "epoch": 33.79, + "learning_rate": 4.577611940298508e-05, + "loss": 0.0005, + "step": 36224 + }, + { + "epoch": 33.79, + "learning_rate": 4.577565298507463e-05, + "loss": 0.0002, + "step": 36228 + }, + { + "epoch": 33.8, + "learning_rate": 4.577518656716418e-05, + "loss": 0.0006, + "step": 36232 + }, + { + "epoch": 33.8, + "learning_rate": 4.577472014925373e-05, + "loss": 0.0065, + "step": 36236 + }, + { + "epoch": 33.81, + "learning_rate": 4.577425373134329e-05, + "loss": 0.0, + "step": 36240 + }, + { + "epoch": 33.81, + "learning_rate": 4.5773787313432836e-05, + "loss": 0.0014, + "step": 36244 + }, + { + "epoch": 33.81, + "learning_rate": 4.577332089552239e-05, + "loss": 0.0, + "step": 36248 + }, + { + "epoch": 33.82, + "learning_rate": 4.5772854477611946e-05, + "loss": 0.0003, + "step": 36252 + }, + { + "epoch": 33.82, + "learning_rate": 4.5772388059701494e-05, + "loss": 0.0, + "step": 36256 + }, + { + "epoch": 33.82, + "learning_rate": 4.577192164179105e-05, + "loss": 0.0006, + "step": 36260 + }, + { + "epoch": 33.83, + "learning_rate": 4.57714552238806e-05, + "loss": 0.0001, + "step": 36264 + }, + { + "epoch": 33.83, + "learning_rate": 4.577098880597015e-05, + "loss": 0.0007, + "step": 36268 + }, + { + "epoch": 33.84, + "learning_rate": 4.577052238805971e-05, + "loss": 0.0001, + "step": 36272 + }, + { + "epoch": 33.84, + "learning_rate": 4.5770055970149255e-05, + "loss": 0.0029, + "step": 36276 + }, + { + "epoch": 33.84, + "learning_rate": 4.57695895522388e-05, + "loss": 0.0002, + "step": 36280 + }, + { + "epoch": 33.85, + "learning_rate": 4.5769123134328365e-05, + "loss": 0.0001, + "step": 36284 + }, + { + "epoch": 33.85, + "learning_rate": 4.576865671641791e-05, + "loss": 0.0002, + "step": 36288 + }, + { + "epoch": 33.85, + "learning_rate": 4.576819029850746e-05, + "loss": 0.0014, + "step": 36292 + }, + { + "epoch": 33.86, + "learning_rate": 4.5767723880597016e-05, + "loss": 0.0, + "step": 36296 + }, + { + "epoch": 33.86, + "learning_rate": 4.576725746268657e-05, + "loss": 0.0003, + "step": 36300 + }, + { + "epoch": 33.87, + "learning_rate": 4.576679104477612e-05, + "loss": 0.0002, + "step": 36304 + }, + { + "epoch": 33.87, + "learning_rate": 4.5766324626865674e-05, + "loss": 0.0005, + "step": 36308 + }, + { + "epoch": 33.87, + "learning_rate": 4.576585820895523e-05, + "loss": 0.0, + "step": 36312 + }, + { + "epoch": 33.88, + "learning_rate": 4.576539179104478e-05, + "loss": 0.0, + "step": 36316 + }, + { + "epoch": 33.88, + "learning_rate": 4.576492537313433e-05, + "loss": 0.0026, + "step": 36320 + }, + { + "epoch": 33.88, + "learning_rate": 4.576445895522388e-05, + "loss": 0.0037, + "step": 36324 + }, + { + "epoch": 33.89, + "learning_rate": 4.5763992537313435e-05, + "loss": 0.0001, + "step": 36328 + }, + { + "epoch": 33.89, + "learning_rate": 4.576352611940299e-05, + "loss": 0.0001, + "step": 36332 + }, + { + "epoch": 33.9, + "learning_rate": 4.576305970149254e-05, + "loss": 0.0008, + "step": 36336 + }, + { + "epoch": 33.9, + "learning_rate": 4.5762593283582086e-05, + "loss": 0.0, + "step": 36340 + }, + { + "epoch": 33.9, + "learning_rate": 4.576212686567165e-05, + "loss": 0.0, + "step": 36344 + }, + { + "epoch": 33.91, + "learning_rate": 4.5761660447761196e-05, + "loss": 0.0, + "step": 36348 + }, + { + "epoch": 33.91, + "learning_rate": 4.5761194029850744e-05, + "loss": 0.0001, + "step": 36352 + }, + { + "epoch": 33.91, + "learning_rate": 4.57607276119403e-05, + "loss": 0.0002, + "step": 36356 + }, + { + "epoch": 33.92, + "learning_rate": 4.5760261194029854e-05, + "loss": 0.0018, + "step": 36360 + }, + { + "epoch": 33.92, + "learning_rate": 4.575979477611941e-05, + "loss": 0.0018, + "step": 36364 + }, + { + "epoch": 33.93, + "learning_rate": 4.575932835820896e-05, + "loss": 0.0017, + "step": 36368 + }, + { + "epoch": 33.93, + "learning_rate": 4.575886194029851e-05, + "loss": 0.0, + "step": 36372 + }, + { + "epoch": 33.93, + "learning_rate": 4.5758395522388067e-05, + "loss": 0.0001, + "step": 36376 + }, + { + "epoch": 33.94, + "learning_rate": 4.5757929104477615e-05, + "loss": 0.0009, + "step": 36380 + }, + { + "epoch": 33.94, + "learning_rate": 4.575746268656716e-05, + "loss": 0.0002, + "step": 36384 + }, + { + "epoch": 33.94, + "learning_rate": 4.575699626865672e-05, + "loss": 0.0, + "step": 36388 + }, + { + "epoch": 33.95, + "learning_rate": 4.575652985074627e-05, + "loss": 0.0055, + "step": 36392 + }, + { + "epoch": 33.95, + "learning_rate": 4.575606343283582e-05, + "loss": 0.0003, + "step": 36396 + }, + { + "epoch": 33.96, + "learning_rate": 4.5755597014925376e-05, + "loss": 0.0001, + "step": 36400 + }, + { + "epoch": 33.96, + "learning_rate": 4.575513059701493e-05, + "loss": 0.0001, + "step": 36404 + }, + { + "epoch": 33.96, + "learning_rate": 4.575466417910448e-05, + "loss": 0.001, + "step": 36408 + }, + { + "epoch": 33.97, + "learning_rate": 4.5754197761194034e-05, + "loss": 0.0001, + "step": 36412 + }, + { + "epoch": 33.97, + "learning_rate": 4.575373134328358e-05, + "loss": 0.0, + "step": 36416 + }, + { + "epoch": 33.97, + "learning_rate": 4.575326492537314e-05, + "loss": 0.0, + "step": 36420 + }, + { + "epoch": 33.98, + "learning_rate": 4.575279850746269e-05, + "loss": 0.0009, + "step": 36424 + }, + { + "epoch": 33.98, + "learning_rate": 4.575233208955224e-05, + "loss": 0.0045, + "step": 36428 + }, + { + "epoch": 33.98, + "learning_rate": 4.5751865671641795e-05, + "loss": 0.0003, + "step": 36432 + }, + { + "epoch": 33.99, + "learning_rate": 4.575139925373135e-05, + "loss": 0.0, + "step": 36436 + }, + { + "epoch": 33.99, + "learning_rate": 4.57509328358209e-05, + "loss": 0.0, + "step": 36440 + }, + { + "epoch": 34.0, + "learning_rate": 4.5750466417910446e-05, + "loss": 0.0079, + "step": 36444 + }, + { + "epoch": 34.0, + "learning_rate": 4.575e-05, + "loss": 0.0003, + "step": 36448 + }, + { + "epoch": 34.0, + "learning_rate": 4.5749533582089556e-05, + "loss": 0.0001, + "step": 36452 + }, + { + "epoch": 34.01, + "learning_rate": 4.5749067164179104e-05, + "loss": 0.0001, + "step": 36456 + }, + { + "epoch": 34.01, + "learning_rate": 4.574860074626866e-05, + "loss": 0.0002, + "step": 36460 + }, + { + "epoch": 34.01, + "learning_rate": 4.5748134328358214e-05, + "loss": 0.0031, + "step": 36464 + }, + { + "epoch": 34.02, + "learning_rate": 4.574766791044776e-05, + "loss": 0.0002, + "step": 36468 + }, + { + "epoch": 34.02, + "learning_rate": 4.574720149253732e-05, + "loss": 0.0013, + "step": 36472 + }, + { + "epoch": 34.03, + "learning_rate": 4.5746735074626865e-05, + "loss": 0.0006, + "step": 36476 + }, + { + "epoch": 34.03, + "learning_rate": 4.574626865671642e-05, + "loss": 0.0052, + "step": 36480 + }, + { + "epoch": 34.03, + "learning_rate": 4.5745802238805975e-05, + "loss": 0.001, + "step": 36484 + }, + { + "epoch": 34.04, + "learning_rate": 4.574533582089552e-05, + "loss": 0.0001, + "step": 36488 + }, + { + "epoch": 34.04, + "learning_rate": 4.574486940298508e-05, + "loss": 0.0011, + "step": 36492 + }, + { + "epoch": 34.04, + "learning_rate": 4.574440298507463e-05, + "loss": 0.0001, + "step": 36496 + }, + { + "epoch": 34.05, + "learning_rate": 4.574393656716418e-05, + "loss": 0.0, + "step": 36500 + }, + { + "epoch": 34.05, + "eval_exact_match": 0.730174081237911, + "eval_exec": 0.7659574468085106, + "eval_loss": 0.4040814936161041, + "eval_runtime": 1222.2246, + "eval_samples_per_second": 0.846, + "step": 36500 + }, + { + "epoch": 34.05, + "learning_rate": 4.574347014925373e-05, + "loss": 0.0004, + "step": 36504 + }, + { + "epoch": 34.06, + "learning_rate": 4.5743003731343284e-05, + "loss": 0.0036, + "step": 36508 + }, + { + "epoch": 34.06, + "learning_rate": 4.574253731343284e-05, + "loss": 0.0008, + "step": 36512 + }, + { + "epoch": 34.06, + "learning_rate": 4.574207089552239e-05, + "loss": 0.0002, + "step": 36516 + }, + { + "epoch": 34.07, + "learning_rate": 4.574160447761194e-05, + "loss": 0.0006, + "step": 36520 + }, + { + "epoch": 34.07, + "learning_rate": 4.5741138059701497e-05, + "loss": 0.0, + "step": 36524 + }, + { + "epoch": 34.07, + "learning_rate": 4.574067164179105e-05, + "loss": 0.0017, + "step": 36528 + }, + { + "epoch": 34.08, + "learning_rate": 4.57402052238806e-05, + "loss": 0.0001, + "step": 36532 + }, + { + "epoch": 34.08, + "learning_rate": 4.573973880597015e-05, + "loss": 0.0001, + "step": 36536 + }, + { + "epoch": 34.09, + "learning_rate": 4.573927238805971e-05, + "loss": 0.0003, + "step": 36540 + }, + { + "epoch": 34.09, + "learning_rate": 4.573880597014926e-05, + "loss": 0.0013, + "step": 36544 + }, + { + "epoch": 34.09, + "learning_rate": 4.5738339552238806e-05, + "loss": 0.0003, + "step": 36548 + }, + { + "epoch": 34.1, + "learning_rate": 4.573787313432836e-05, + "loss": 0.0004, + "step": 36552 + }, + { + "epoch": 34.1, + "learning_rate": 4.5737406716417915e-05, + "loss": 0.0003, + "step": 36556 + }, + { + "epoch": 34.1, + "learning_rate": 4.5736940298507464e-05, + "loss": 0.0016, + "step": 36560 + }, + { + "epoch": 34.11, + "learning_rate": 4.573647388059702e-05, + "loss": 0.0024, + "step": 36564 + }, + { + "epoch": 34.11, + "learning_rate": 4.573600746268657e-05, + "loss": 0.0003, + "step": 36568 + }, + { + "epoch": 34.12, + "learning_rate": 4.573554104477612e-05, + "loss": 0.0001, + "step": 36572 + }, + { + "epoch": 34.12, + "learning_rate": 4.5735074626865676e-05, + "loss": 0.0005, + "step": 36576 + }, + { + "epoch": 34.12, + "learning_rate": 4.5734608208955225e-05, + "loss": 0.0008, + "step": 36580 + }, + { + "epoch": 34.13, + "learning_rate": 4.573414179104478e-05, + "loss": 0.0001, + "step": 36584 + }, + { + "epoch": 34.13, + "learning_rate": 4.5733675373134334e-05, + "loss": 0.013, + "step": 36588 + }, + { + "epoch": 34.13, + "learning_rate": 4.573320895522388e-05, + "loss": 0.0001, + "step": 36592 + }, + { + "epoch": 34.14, + "learning_rate": 4.573274253731343e-05, + "loss": 0.0001, + "step": 36596 + }, + { + "epoch": 34.14, + "learning_rate": 4.573227611940299e-05, + "loss": 0.0003, + "step": 36600 + }, + { + "epoch": 34.15, + "learning_rate": 4.573180970149254e-05, + "loss": 0.0001, + "step": 36604 + }, + { + "epoch": 34.15, + "learning_rate": 4.573134328358209e-05, + "loss": 0.0, + "step": 36608 + }, + { + "epoch": 34.15, + "learning_rate": 4.5730876865671644e-05, + "loss": 0.0001, + "step": 36612 + }, + { + "epoch": 34.16, + "learning_rate": 4.57304104477612e-05, + "loss": 0.002, + "step": 36616 + }, + { + "epoch": 34.16, + "learning_rate": 4.5729944029850747e-05, + "loss": 0.0002, + "step": 36620 + }, + { + "epoch": 34.16, + "learning_rate": 4.57294776119403e-05, + "loss": 0.0003, + "step": 36624 + }, + { + "epoch": 34.17, + "learning_rate": 4.572901119402985e-05, + "loss": 0.0003, + "step": 36628 + }, + { + "epoch": 34.17, + "learning_rate": 4.5728544776119405e-05, + "loss": 0.0001, + "step": 36632 + }, + { + "epoch": 34.18, + "learning_rate": 4.572807835820896e-05, + "loss": 0.0002, + "step": 36636 + }, + { + "epoch": 34.18, + "learning_rate": 4.572761194029851e-05, + "loss": 0.0001, + "step": 36640 + }, + { + "epoch": 34.18, + "learning_rate": 4.572714552238806e-05, + "loss": 0.0015, + "step": 36644 + }, + { + "epoch": 34.19, + "learning_rate": 4.572667910447762e-05, + "loss": 0.0003, + "step": 36648 + }, + { + "epoch": 34.19, + "learning_rate": 4.5726212686567165e-05, + "loss": 0.0016, + "step": 36652 + }, + { + "epoch": 34.19, + "learning_rate": 4.5725746268656714e-05, + "loss": 0.0003, + "step": 36656 + }, + { + "epoch": 34.2, + "learning_rate": 4.5725279850746275e-05, + "loss": 0.0002, + "step": 36660 + }, + { + "epoch": 34.2, + "learning_rate": 4.5724813432835823e-05, + "loss": 0.0, + "step": 36664 + }, + { + "epoch": 34.21, + "learning_rate": 4.572434701492537e-05, + "loss": 0.0001, + "step": 36668 + }, + { + "epoch": 34.21, + "learning_rate": 4.5723880597014926e-05, + "loss": 0.0001, + "step": 36672 + }, + { + "epoch": 34.21, + "learning_rate": 4.572341417910448e-05, + "loss": 0.0003, + "step": 36676 + }, + { + "epoch": 34.22, + "learning_rate": 4.572294776119403e-05, + "loss": 0.0001, + "step": 36680 + }, + { + "epoch": 34.22, + "learning_rate": 4.5722481343283584e-05, + "loss": 0.0002, + "step": 36684 + }, + { + "epoch": 34.22, + "learning_rate": 4.572201492537313e-05, + "loss": 0.0016, + "step": 36688 + }, + { + "epoch": 34.23, + "learning_rate": 4.5721548507462694e-05, + "loss": 0.0011, + "step": 36692 + }, + { + "epoch": 34.23, + "learning_rate": 4.572108208955224e-05, + "loss": 0.0, + "step": 36696 + }, + { + "epoch": 34.24, + "learning_rate": 4.572061567164179e-05, + "loss": 0.0001, + "step": 36700 + }, + { + "epoch": 34.24, + "learning_rate": 4.5720149253731345e-05, + "loss": 0.0, + "step": 36704 + }, + { + "epoch": 34.24, + "learning_rate": 4.57196828358209e-05, + "loss": 0.0, + "step": 36708 + }, + { + "epoch": 34.25, + "learning_rate": 4.571921641791045e-05, + "loss": 0.0, + "step": 36712 + }, + { + "epoch": 34.25, + "learning_rate": 4.571875e-05, + "loss": 0.0022, + "step": 36716 + }, + { + "epoch": 34.25, + "learning_rate": 4.571828358208956e-05, + "loss": 0.0002, + "step": 36720 + }, + { + "epoch": 34.26, + "learning_rate": 4.5717817164179106e-05, + "loss": 0.0001, + "step": 36724 + }, + { + "epoch": 34.26, + "learning_rate": 4.571735074626866e-05, + "loss": 0.0001, + "step": 36728 + }, + { + "epoch": 34.26, + "learning_rate": 4.571688432835821e-05, + "loss": 0.0064, + "step": 36732 + }, + { + "epoch": 34.27, + "learning_rate": 4.5716417910447764e-05, + "loss": 0.0001, + "step": 36736 + }, + { + "epoch": 34.27, + "learning_rate": 4.571595149253732e-05, + "loss": 0.0001, + "step": 36740 + }, + { + "epoch": 34.28, + "learning_rate": 4.571548507462687e-05, + "loss": 0.0001, + "step": 36744 + }, + { + "epoch": 34.28, + "learning_rate": 4.5715018656716416e-05, + "loss": 0.0001, + "step": 36748 + }, + { + "epoch": 34.28, + "learning_rate": 4.571455223880598e-05, + "loss": 0.0, + "step": 36752 + }, + { + "epoch": 34.29, + "learning_rate": 4.5714085820895525e-05, + "loss": 0.012, + "step": 36756 + }, + { + "epoch": 34.29, + "learning_rate": 4.5713619402985073e-05, + "loss": 0.0025, + "step": 36760 + }, + { + "epoch": 34.29, + "learning_rate": 4.571315298507463e-05, + "loss": 0.0001, + "step": 36764 + }, + { + "epoch": 34.3, + "learning_rate": 4.571268656716418e-05, + "loss": 0.0016, + "step": 36768 + }, + { + "epoch": 34.3, + "learning_rate": 4.571222014925373e-05, + "loss": 0.0001, + "step": 36772 + }, + { + "epoch": 34.31, + "learning_rate": 4.5711753731343286e-05, + "loss": 0.0007, + "step": 36776 + }, + { + "epoch": 34.31, + "learning_rate": 4.571128731343284e-05, + "loss": 0.0011, + "step": 36780 + }, + { + "epoch": 34.31, + "learning_rate": 4.571082089552239e-05, + "loss": 0.0, + "step": 36784 + }, + { + "epoch": 34.32, + "learning_rate": 4.5710354477611944e-05, + "loss": 0.0008, + "step": 36788 + }, + { + "epoch": 34.32, + "learning_rate": 4.570988805970149e-05, + "loss": 0.002, + "step": 36792 + }, + { + "epoch": 34.32, + "learning_rate": 4.570942164179105e-05, + "loss": 0.0001, + "step": 36796 + }, + { + "epoch": 34.33, + "learning_rate": 4.57089552238806e-05, + "loss": 0.0, + "step": 36800 + }, + { + "epoch": 34.33, + "learning_rate": 4.570848880597015e-05, + "loss": 0.0001, + "step": 36804 + }, + { + "epoch": 34.34, + "learning_rate": 4.57080223880597e-05, + "loss": 0.0005, + "step": 36808 + }, + { + "epoch": 34.34, + "learning_rate": 4.570755597014926e-05, + "loss": 0.0002, + "step": 36812 + }, + { + "epoch": 34.34, + "learning_rate": 4.570708955223881e-05, + "loss": 0.0001, + "step": 36816 + }, + { + "epoch": 34.35, + "learning_rate": 4.5706623134328356e-05, + "loss": 0.0001, + "step": 36820 + }, + { + "epoch": 34.35, + "learning_rate": 4.570615671641791e-05, + "loss": 0.0008, + "step": 36824 + }, + { + "epoch": 34.35, + "learning_rate": 4.5705690298507466e-05, + "loss": 0.0006, + "step": 36828 + }, + { + "epoch": 34.36, + "learning_rate": 4.5705223880597014e-05, + "loss": 0.0, + "step": 36832 + }, + { + "epoch": 34.36, + "learning_rate": 4.570475746268657e-05, + "loss": 0.0003, + "step": 36836 + }, + { + "epoch": 34.37, + "learning_rate": 4.570429104477612e-05, + "loss": 0.0036, + "step": 36840 + }, + { + "epoch": 34.37, + "learning_rate": 4.570382462686567e-05, + "loss": 0.0, + "step": 36844 + }, + { + "epoch": 34.37, + "learning_rate": 4.570335820895523e-05, + "loss": 0.0, + "step": 36848 + }, + { + "epoch": 34.38, + "learning_rate": 4.5702891791044775e-05, + "loss": 0.0, + "step": 36852 + }, + { + "epoch": 34.38, + "learning_rate": 4.570242537313433e-05, + "loss": 0.0, + "step": 36856 + }, + { + "epoch": 34.38, + "learning_rate": 4.5701958955223885e-05, + "loss": 0.001, + "step": 36860 + }, + { + "epoch": 34.39, + "learning_rate": 4.570149253731343e-05, + "loss": 0.0139, + "step": 36864 + }, + { + "epoch": 34.39, + "learning_rate": 4.570102611940299e-05, + "loss": 0.0001, + "step": 36868 + }, + { + "epoch": 34.4, + "learning_rate": 4.570055970149254e-05, + "loss": 0.0005, + "step": 36872 + }, + { + "epoch": 34.4, + "learning_rate": 4.570009328358209e-05, + "loss": 0.0002, + "step": 36876 + }, + { + "epoch": 34.4, + "learning_rate": 4.5699626865671646e-05, + "loss": 0.0008, + "step": 36880 + }, + { + "epoch": 34.41, + "learning_rate": 4.5699160447761194e-05, + "loss": 0.0003, + "step": 36884 + }, + { + "epoch": 34.41, + "learning_rate": 4.569869402985075e-05, + "loss": 0.0, + "step": 36888 + }, + { + "epoch": 34.41, + "learning_rate": 4.5698227611940304e-05, + "loss": 0.0003, + "step": 36892 + }, + { + "epoch": 34.42, + "learning_rate": 4.569776119402985e-05, + "loss": 0.0001, + "step": 36896 + }, + { + "epoch": 34.42, + "learning_rate": 4.56972947761194e-05, + "loss": 0.0002, + "step": 36900 + }, + { + "epoch": 34.43, + "learning_rate": 4.569682835820896e-05, + "loss": 0.0035, + "step": 36904 + }, + { + "epoch": 34.43, + "learning_rate": 4.569636194029851e-05, + "loss": 0.0023, + "step": 36908 + }, + { + "epoch": 34.43, + "learning_rate": 4.569589552238806e-05, + "loss": 0.0009, + "step": 36912 + }, + { + "epoch": 34.44, + "learning_rate": 4.569542910447761e-05, + "loss": 0.0073, + "step": 36916 + }, + { + "epoch": 34.44, + "learning_rate": 4.569496268656717e-05, + "loss": 0.0006, + "step": 36920 + }, + { + "epoch": 34.44, + "learning_rate": 4.5694496268656716e-05, + "loss": 0.0004, + "step": 36924 + }, + { + "epoch": 34.45, + "learning_rate": 4.569402985074627e-05, + "loss": 0.0003, + "step": 36928 + }, + { + "epoch": 34.45, + "learning_rate": 4.5693563432835826e-05, + "loss": 0.0001, + "step": 36932 + }, + { + "epoch": 34.46, + "learning_rate": 4.5693097014925374e-05, + "loss": 0.0001, + "step": 36936 + }, + { + "epoch": 34.46, + "learning_rate": 4.569263059701493e-05, + "loss": 0.0029, + "step": 36940 + }, + { + "epoch": 34.46, + "learning_rate": 4.569216417910448e-05, + "loss": 0.0003, + "step": 36944 + }, + { + "epoch": 34.47, + "learning_rate": 4.569169776119403e-05, + "loss": 0.0005, + "step": 36948 + }, + { + "epoch": 34.47, + "learning_rate": 4.569123134328359e-05, + "loss": 0.001, + "step": 36952 + }, + { + "epoch": 34.47, + "learning_rate": 4.5690764925373135e-05, + "loss": 0.0001, + "step": 36956 + }, + { + "epoch": 34.48, + "learning_rate": 4.569029850746268e-05, + "loss": 0.0004, + "step": 36960 + }, + { + "epoch": 34.48, + "learning_rate": 4.5689832089552245e-05, + "loss": 0.0003, + "step": 36964 + }, + { + "epoch": 34.49, + "learning_rate": 4.568936567164179e-05, + "loss": 0.0001, + "step": 36968 + }, + { + "epoch": 34.49, + "learning_rate": 4.568889925373134e-05, + "loss": 0.0025, + "step": 36972 + }, + { + "epoch": 34.49, + "learning_rate": 4.5688432835820896e-05, + "loss": 0.0008, + "step": 36976 + }, + { + "epoch": 34.5, + "learning_rate": 4.568796641791045e-05, + "loss": 0.0018, + "step": 36980 + }, + { + "epoch": 34.5, + "learning_rate": 4.56875e-05, + "loss": 0.0023, + "step": 36984 + }, + { + "epoch": 34.5, + "learning_rate": 4.5687033582089554e-05, + "loss": 0.0002, + "step": 36988 + }, + { + "epoch": 34.51, + "learning_rate": 4.568656716417911e-05, + "loss": 0.0002, + "step": 36992 + }, + { + "epoch": 34.51, + "learning_rate": 4.568610074626866e-05, + "loss": 0.0044, + "step": 36996 + }, + { + "epoch": 34.51, + "learning_rate": 4.568563432835821e-05, + "loss": 0.0001, + "step": 37000 + }, + { + "epoch": 34.51, + "eval_exact_match": 0.7504835589941973, + "eval_exec": 0.7794970986460348, + "eval_loss": 0.40531110763549805, + "eval_runtime": 1164.9382, + "eval_samples_per_second": 0.888, + "step": 37000 + }, + { + "epoch": 34.52, + "learning_rate": 4.568516791044776e-05, + "loss": 0.0, + "step": 37004 + }, + { + "epoch": 34.52, + "learning_rate": 4.5684701492537315e-05, + "loss": 0.0019, + "step": 37008 + }, + { + "epoch": 34.53, + "learning_rate": 4.568423507462687e-05, + "loss": 0.0003, + "step": 37012 + }, + { + "epoch": 34.53, + "learning_rate": 4.568376865671642e-05, + "loss": 0.0016, + "step": 37016 + }, + { + "epoch": 34.53, + "learning_rate": 4.568330223880597e-05, + "loss": 0.0001, + "step": 37020 + }, + { + "epoch": 34.54, + "learning_rate": 4.568283582089553e-05, + "loss": 0.0, + "step": 37024 + }, + { + "epoch": 34.54, + "learning_rate": 4.5682369402985076e-05, + "loss": 0.0001, + "step": 37028 + }, + { + "epoch": 34.54, + "learning_rate": 4.568190298507463e-05, + "loss": 0.0001, + "step": 37032 + }, + { + "epoch": 34.55, + "learning_rate": 4.568143656716418e-05, + "loss": 0.0, + "step": 37036 + }, + { + "epoch": 34.55, + "learning_rate": 4.5680970149253734e-05, + "loss": 0.0003, + "step": 37040 + }, + { + "epoch": 34.56, + "learning_rate": 4.568050373134329e-05, + "loss": 0.0035, + "step": 37044 + }, + { + "epoch": 34.56, + "learning_rate": 4.568003731343284e-05, + "loss": 0.0, + "step": 37048 + }, + { + "epoch": 34.56, + "learning_rate": 4.567957089552239e-05, + "loss": 0.0002, + "step": 37052 + }, + { + "epoch": 34.57, + "learning_rate": 4.567910447761195e-05, + "loss": 0.0077, + "step": 37056 + }, + { + "epoch": 34.57, + "learning_rate": 4.5678638059701495e-05, + "loss": 0.0022, + "step": 37060 + }, + { + "epoch": 34.57, + "learning_rate": 4.567817164179104e-05, + "loss": 0.0003, + "step": 37064 + }, + { + "epoch": 34.58, + "learning_rate": 4.56777052238806e-05, + "loss": 0.0, + "step": 37068 + }, + { + "epoch": 34.58, + "learning_rate": 4.567723880597015e-05, + "loss": 0.0, + "step": 37072 + }, + { + "epoch": 34.59, + "learning_rate": 4.56767723880597e-05, + "loss": 0.0001, + "step": 37076 + }, + { + "epoch": 34.59, + "learning_rate": 4.5676305970149256e-05, + "loss": 0.004, + "step": 37080 + }, + { + "epoch": 34.59, + "learning_rate": 4.567583955223881e-05, + "loss": 0.0001, + "step": 37084 + }, + { + "epoch": 34.6, + "learning_rate": 4.567537313432836e-05, + "loss": 0.0003, + "step": 37088 + }, + { + "epoch": 34.6, + "learning_rate": 4.5674906716417914e-05, + "loss": 0.0006, + "step": 37092 + }, + { + "epoch": 34.6, + "learning_rate": 4.567444029850746e-05, + "loss": 0.0001, + "step": 37096 + }, + { + "epoch": 34.61, + "learning_rate": 4.567397388059702e-05, + "loss": 0.0001, + "step": 37100 + }, + { + "epoch": 34.61, + "learning_rate": 4.567350746268657e-05, + "loss": 0.0, + "step": 37104 + }, + { + "epoch": 34.62, + "learning_rate": 4.567304104477612e-05, + "loss": 0.0009, + "step": 37108 + }, + { + "epoch": 34.62, + "learning_rate": 4.5672574626865675e-05, + "loss": 0.0002, + "step": 37112 + }, + { + "epoch": 34.62, + "learning_rate": 4.567210820895523e-05, + "loss": 0.0011, + "step": 37116 + }, + { + "epoch": 34.63, + "learning_rate": 4.567164179104478e-05, + "loss": 0.0, + "step": 37120 + }, + { + "epoch": 34.63, + "learning_rate": 4.5671175373134326e-05, + "loss": 0.0011, + "step": 37124 + }, + { + "epoch": 34.63, + "learning_rate": 4.567070895522388e-05, + "loss": 0.0004, + "step": 37128 + }, + { + "epoch": 34.64, + "learning_rate": 4.5670242537313436e-05, + "loss": 0.0008, + "step": 37132 + }, + { + "epoch": 34.64, + "learning_rate": 4.5669776119402984e-05, + "loss": 0.0001, + "step": 37136 + }, + { + "epoch": 34.65, + "learning_rate": 4.566930970149254e-05, + "loss": 0.0002, + "step": 37140 + }, + { + "epoch": 34.65, + "learning_rate": 4.5668843283582094e-05, + "loss": 0.0001, + "step": 37144 + }, + { + "epoch": 34.65, + "learning_rate": 4.566837686567164e-05, + "loss": 0.0009, + "step": 37148 + }, + { + "epoch": 34.66, + "learning_rate": 4.56679104477612e-05, + "loss": 0.0001, + "step": 37152 + }, + { + "epoch": 34.66, + "learning_rate": 4.5667444029850745e-05, + "loss": 0.0003, + "step": 37156 + }, + { + "epoch": 34.66, + "learning_rate": 4.56669776119403e-05, + "loss": 0.0029, + "step": 37160 + }, + { + "epoch": 34.67, + "learning_rate": 4.5666511194029855e-05, + "loss": 0.0001, + "step": 37164 + }, + { + "epoch": 34.67, + "learning_rate": 4.56660447761194e-05, + "loss": 0.0136, + "step": 37168 + }, + { + "epoch": 34.68, + "learning_rate": 4.566557835820896e-05, + "loss": 0.0019, + "step": 37172 + }, + { + "epoch": 34.68, + "learning_rate": 4.566511194029851e-05, + "loss": 0.0005, + "step": 37176 + }, + { + "epoch": 34.68, + "learning_rate": 4.566464552238806e-05, + "loss": 0.0005, + "step": 37180 + }, + { + "epoch": 34.69, + "learning_rate": 4.5664179104477616e-05, + "loss": 0.0008, + "step": 37184 + }, + { + "epoch": 34.69, + "learning_rate": 4.5663712686567164e-05, + "loss": 0.0, + "step": 37188 + }, + { + "epoch": 34.69, + "learning_rate": 4.566324626865672e-05, + "loss": 0.0, + "step": 37192 + }, + { + "epoch": 34.7, + "learning_rate": 4.5662779850746274e-05, + "loss": 0.0022, + "step": 37196 + }, + { + "epoch": 34.7, + "learning_rate": 4.566231343283582e-05, + "loss": 0.0001, + "step": 37200 + }, + { + "epoch": 34.71, + "learning_rate": 4.566184701492538e-05, + "loss": 0.0001, + "step": 37204 + }, + { + "epoch": 34.71, + "learning_rate": 4.566138059701493e-05, + "loss": 0.003, + "step": 37208 + }, + { + "epoch": 34.71, + "learning_rate": 4.566091417910448e-05, + "loss": 0.0002, + "step": 37212 + }, + { + "epoch": 34.72, + "learning_rate": 4.566044776119403e-05, + "loss": 0.0001, + "step": 37216 + }, + { + "epoch": 34.72, + "learning_rate": 4.565998134328359e-05, + "loss": 0.0001, + "step": 37220 + }, + { + "epoch": 34.72, + "learning_rate": 4.565951492537314e-05, + "loss": 0.005, + "step": 37224 + }, + { + "epoch": 34.73, + "learning_rate": 4.5659048507462686e-05, + "loss": 0.0066, + "step": 37228 + }, + { + "epoch": 34.73, + "learning_rate": 4.565858208955224e-05, + "loss": 0.0001, + "step": 37232 + }, + { + "epoch": 34.73, + "learning_rate": 4.5658115671641796e-05, + "loss": 0.0005, + "step": 37236 + }, + { + "epoch": 34.74, + "learning_rate": 4.5657649253731344e-05, + "loss": 0.0005, + "step": 37240 + }, + { + "epoch": 34.74, + "learning_rate": 4.56571828358209e-05, + "loss": 0.0, + "step": 37244 + }, + { + "epoch": 34.75, + "learning_rate": 4.565671641791045e-05, + "loss": 0.0042, + "step": 37248 + }, + { + "epoch": 34.75, + "learning_rate": 4.565625e-05, + "loss": 0.0, + "step": 37252 + }, + { + "epoch": 34.75, + "learning_rate": 4.565578358208956e-05, + "loss": 0.0, + "step": 37256 + }, + { + "epoch": 34.76, + "learning_rate": 4.5655317164179105e-05, + "loss": 0.0009, + "step": 37260 + }, + { + "epoch": 34.76, + "learning_rate": 4.565485074626866e-05, + "loss": 0.0001, + "step": 37264 + }, + { + "epoch": 34.76, + "learning_rate": 4.5654384328358215e-05, + "loss": 0.0011, + "step": 37268 + }, + { + "epoch": 34.77, + "learning_rate": 4.565391791044776e-05, + "loss": 0.0009, + "step": 37272 + }, + { + "epoch": 34.77, + "learning_rate": 4.565345149253731e-05, + "loss": 0.0, + "step": 37276 + }, + { + "epoch": 34.78, + "learning_rate": 4.565298507462687e-05, + "loss": 0.0012, + "step": 37280 + }, + { + "epoch": 34.78, + "learning_rate": 4.565251865671642e-05, + "loss": 0.0041, + "step": 37284 + }, + { + "epoch": 34.78, + "learning_rate": 4.565205223880597e-05, + "loss": 0.0, + "step": 37288 + }, + { + "epoch": 34.79, + "learning_rate": 4.5651585820895524e-05, + "loss": 0.0002, + "step": 37292 + }, + { + "epoch": 34.79, + "learning_rate": 4.565111940298508e-05, + "loss": 0.0001, + "step": 37296 + }, + { + "epoch": 34.79, + "learning_rate": 4.565065298507463e-05, + "loss": 0.0002, + "step": 37300 + }, + { + "epoch": 34.8, + "learning_rate": 4.565018656716418e-05, + "loss": 0.0, + "step": 37304 + }, + { + "epoch": 34.8, + "learning_rate": 4.564972014925373e-05, + "loss": 0.0, + "step": 37308 + }, + { + "epoch": 34.81, + "learning_rate": 4.5649253731343285e-05, + "loss": 0.0013, + "step": 37312 + }, + { + "epoch": 34.81, + "learning_rate": 4.564878731343284e-05, + "loss": 0.0001, + "step": 37316 + }, + { + "epoch": 34.81, + "learning_rate": 4.564832089552239e-05, + "loss": 0.0008, + "step": 37320 + }, + { + "epoch": 34.82, + "learning_rate": 4.564785447761194e-05, + "loss": 0.0007, + "step": 37324 + }, + { + "epoch": 34.82, + "learning_rate": 4.56473880597015e-05, + "loss": 0.0, + "step": 37328 + }, + { + "epoch": 34.82, + "learning_rate": 4.5646921641791046e-05, + "loss": 0.0019, + "step": 37332 + }, + { + "epoch": 34.83, + "learning_rate": 4.5646455223880594e-05, + "loss": 0.0001, + "step": 37336 + }, + { + "epoch": 34.83, + "learning_rate": 4.5645988805970156e-05, + "loss": 0.0046, + "step": 37340 + }, + { + "epoch": 34.84, + "learning_rate": 4.5645522388059704e-05, + "loss": 0.001, + "step": 37344 + }, + { + "epoch": 34.84, + "learning_rate": 4.564505597014926e-05, + "loss": 0.0003, + "step": 37348 + }, + { + "epoch": 34.84, + "learning_rate": 4.564458955223881e-05, + "loss": 0.0002, + "step": 37352 + }, + { + "epoch": 34.85, + "learning_rate": 4.564412313432836e-05, + "loss": 0.0004, + "step": 37356 + }, + { + "epoch": 34.85, + "learning_rate": 4.5643656716417917e-05, + "loss": 0.0009, + "step": 37360 + }, + { + "epoch": 34.85, + "learning_rate": 4.5643190298507465e-05, + "loss": 0.0, + "step": 37364 + }, + { + "epoch": 34.86, + "learning_rate": 4.564272388059701e-05, + "loss": 0.0001, + "step": 37368 + }, + { + "epoch": 34.86, + "learning_rate": 4.5642257462686575e-05, + "loss": 0.0054, + "step": 37372 + }, + { + "epoch": 34.87, + "learning_rate": 4.564179104477612e-05, + "loss": 0.0017, + "step": 37376 + }, + { + "epoch": 34.87, + "learning_rate": 4.564132462686567e-05, + "loss": 0.0002, + "step": 37380 + }, + { + "epoch": 34.87, + "learning_rate": 4.5640858208955226e-05, + "loss": 0.0001, + "step": 37384 + }, + { + "epoch": 34.88, + "learning_rate": 4.564039179104478e-05, + "loss": 0.0002, + "step": 37388 + }, + { + "epoch": 34.88, + "learning_rate": 4.563992537313433e-05, + "loss": 0.0001, + "step": 37392 + }, + { + "epoch": 34.88, + "learning_rate": 4.5639458955223884e-05, + "loss": 0.0001, + "step": 37396 + }, + { + "epoch": 34.89, + "learning_rate": 4.563899253731344e-05, + "loss": 0.0001, + "step": 37400 + }, + { + "epoch": 34.89, + "learning_rate": 4.563852611940299e-05, + "loss": 0.0001, + "step": 37404 + }, + { + "epoch": 34.9, + "learning_rate": 4.563805970149254e-05, + "loss": 0.0001, + "step": 37408 + }, + { + "epoch": 34.9, + "learning_rate": 4.563759328358209e-05, + "loss": 0.0003, + "step": 37412 + }, + { + "epoch": 34.9, + "learning_rate": 4.5637126865671645e-05, + "loss": 0.0, + "step": 37416 + }, + { + "epoch": 34.91, + "learning_rate": 4.56366604477612e-05, + "loss": 0.0, + "step": 37420 + }, + { + "epoch": 34.91, + "learning_rate": 4.563619402985075e-05, + "loss": 0.0013, + "step": 37424 + }, + { + "epoch": 34.91, + "learning_rate": 4.5635727611940296e-05, + "loss": 0.0, + "step": 37428 + }, + { + "epoch": 34.92, + "learning_rate": 4.563526119402986e-05, + "loss": 0.0, + "step": 37432 + }, + { + "epoch": 34.92, + "learning_rate": 4.5634794776119406e-05, + "loss": 0.0028, + "step": 37436 + }, + { + "epoch": 34.93, + "learning_rate": 4.5634328358208954e-05, + "loss": 0.0, + "step": 37440 + }, + { + "epoch": 34.93, + "learning_rate": 4.563386194029851e-05, + "loss": 0.0, + "step": 37444 + }, + { + "epoch": 34.93, + "learning_rate": 4.5633395522388064e-05, + "loss": 0.0005, + "step": 37448 + }, + { + "epoch": 34.94, + "learning_rate": 4.563292910447761e-05, + "loss": 0.0001, + "step": 37452 + }, + { + "epoch": 34.94, + "learning_rate": 4.5632462686567167e-05, + "loss": 0.0021, + "step": 37456 + }, + { + "epoch": 34.94, + "learning_rate": 4.563199626865672e-05, + "loss": 0.0, + "step": 37460 + }, + { + "epoch": 34.95, + "learning_rate": 4.563152985074627e-05, + "loss": 0.0005, + "step": 37464 + }, + { + "epoch": 34.95, + "learning_rate": 4.5631063432835825e-05, + "loss": 0.0005, + "step": 37468 + }, + { + "epoch": 34.96, + "learning_rate": 4.563059701492537e-05, + "loss": 0.0001, + "step": 37472 + }, + { + "epoch": 34.96, + "learning_rate": 4.563013059701493e-05, + "loss": 0.0101, + "step": 37476 + }, + { + "epoch": 34.96, + "learning_rate": 4.562966417910448e-05, + "loss": 0.0, + "step": 37480 + }, + { + "epoch": 34.97, + "learning_rate": 4.562919776119403e-05, + "loss": 0.0001, + "step": 37484 + }, + { + "epoch": 34.97, + "learning_rate": 4.562873134328358e-05, + "loss": 0.0044, + "step": 37488 + }, + { + "epoch": 34.97, + "learning_rate": 4.562826492537314e-05, + "loss": 0.0001, + "step": 37492 + }, + { + "epoch": 34.98, + "learning_rate": 4.562779850746269e-05, + "loss": 0.0002, + "step": 37496 + }, + { + "epoch": 34.98, + "learning_rate": 4.562733208955224e-05, + "loss": 0.0012, + "step": 37500 + }, + { + "epoch": 34.98, + "eval_exact_match": 0.7282398452611218, + "eval_exec": 0.7630560928433269, + "eval_loss": 0.4141908288002014, + "eval_runtime": 1965.9572, + "eval_samples_per_second": 0.526, + "step": 37500 + }, + { + "epoch": 34.98, + "learning_rate": 4.562686567164179e-05, + "loss": 0.0023, + "step": 37504 + }, + { + "epoch": 34.99, + "learning_rate": 4.5626399253731347e-05, + "loss": 0.0028, + "step": 37508 + }, + { + "epoch": 34.99, + "learning_rate": 4.56259328358209e-05, + "loss": 0.0001, + "step": 37512 + }, + { + "epoch": 35.0, + "learning_rate": 4.562546641791045e-05, + "loss": 0.0001, + "step": 37516 + }, + { + "epoch": 35.0, + "learning_rate": 4.5625e-05, + "loss": 0.0002, + "step": 37520 + }, + { + "epoch": 35.0, + "learning_rate": 4.562453358208956e-05, + "loss": 0.0001, + "step": 37524 + }, + { + "epoch": 35.01, + "learning_rate": 4.562406716417911e-05, + "loss": 0.0069, + "step": 37528 + }, + { + "epoch": 35.01, + "learning_rate": 4.5623600746268656e-05, + "loss": 0.0007, + "step": 37532 + }, + { + "epoch": 35.01, + "learning_rate": 4.562313432835821e-05, + "loss": 0.0001, + "step": 37536 + }, + { + "epoch": 35.02, + "learning_rate": 4.5622667910447765e-05, + "loss": 0.0001, + "step": 37540 + }, + { + "epoch": 35.02, + "learning_rate": 4.5622201492537314e-05, + "loss": 0.0001, + "step": 37544 + }, + { + "epoch": 35.03, + "learning_rate": 4.562173507462687e-05, + "loss": 0.0001, + "step": 37548 + }, + { + "epoch": 35.03, + "learning_rate": 4.5621268656716423e-05, + "loss": 0.0, + "step": 37552 + }, + { + "epoch": 35.03, + "learning_rate": 4.562080223880597e-05, + "loss": 0.0, + "step": 37556 + }, + { + "epoch": 35.04, + "learning_rate": 4.5620335820895526e-05, + "loss": 0.0, + "step": 37560 + }, + { + "epoch": 35.04, + "learning_rate": 4.5619869402985075e-05, + "loss": 0.0001, + "step": 37564 + }, + { + "epoch": 35.04, + "learning_rate": 4.561940298507463e-05, + "loss": 0.0009, + "step": 37568 + }, + { + "epoch": 35.05, + "learning_rate": 4.5618936567164184e-05, + "loss": 0.0, + "step": 37572 + }, + { + "epoch": 35.05, + "learning_rate": 4.561847014925373e-05, + "loss": 0.0001, + "step": 37576 + }, + { + "epoch": 35.06, + "learning_rate": 4.561800373134328e-05, + "loss": 0.0008, + "step": 37580 + }, + { + "epoch": 35.06, + "learning_rate": 4.561753731343284e-05, + "loss": 0.0001, + "step": 37584 + }, + { + "epoch": 35.06, + "learning_rate": 4.561707089552239e-05, + "loss": 0.0007, + "step": 37588 + }, + { + "epoch": 35.07, + "learning_rate": 4.561660447761194e-05, + "loss": 0.0001, + "step": 37592 + }, + { + "epoch": 35.07, + "learning_rate": 4.5616138059701494e-05, + "loss": 0.0002, + "step": 37596 + }, + { + "epoch": 35.07, + "learning_rate": 4.561567164179105e-05, + "loss": 0.0003, + "step": 37600 + }, + { + "epoch": 35.08, + "learning_rate": 4.5615205223880597e-05, + "loss": 0.0, + "step": 37604 + }, + { + "epoch": 35.08, + "learning_rate": 4.561473880597015e-05, + "loss": 0.0, + "step": 37608 + }, + { + "epoch": 35.09, + "learning_rate": 4.5614272388059706e-05, + "loss": 0.0, + "step": 37612 + }, + { + "epoch": 35.09, + "learning_rate": 4.5613805970149254e-05, + "loss": 0.0, + "step": 37616 + }, + { + "epoch": 35.09, + "learning_rate": 4.561333955223881e-05, + "loss": 0.0, + "step": 37620 + }, + { + "epoch": 35.1, + "learning_rate": 4.561287313432836e-05, + "loss": 0.0002, + "step": 37624 + }, + { + "epoch": 35.1, + "learning_rate": 4.561240671641791e-05, + "loss": 0.0, + "step": 37628 + }, + { + "epoch": 35.1, + "learning_rate": 4.561194029850747e-05, + "loss": 0.0, + "step": 37632 + }, + { + "epoch": 35.11, + "learning_rate": 4.5611473880597015e-05, + "loss": 0.0001, + "step": 37636 + }, + { + "epoch": 35.11, + "learning_rate": 4.5611007462686564e-05, + "loss": 0.0, + "step": 37640 + }, + { + "epoch": 35.12, + "learning_rate": 4.5610541044776125e-05, + "loss": 0.0, + "step": 37644 + }, + { + "epoch": 35.12, + "learning_rate": 4.5610074626865673e-05, + "loss": 0.0019, + "step": 37648 + }, + { + "epoch": 35.12, + "learning_rate": 4.560960820895522e-05, + "loss": 0.0007, + "step": 37652 + }, + { + "epoch": 35.13, + "learning_rate": 4.5609141791044776e-05, + "loss": 0.0001, + "step": 37656 + }, + { + "epoch": 35.13, + "learning_rate": 4.560867537313433e-05, + "loss": 0.0001, + "step": 37660 + }, + { + "epoch": 35.13, + "learning_rate": 4.560820895522388e-05, + "loss": 0.0, + "step": 37664 + }, + { + "epoch": 35.14, + "learning_rate": 4.5607742537313434e-05, + "loss": 0.0001, + "step": 37668 + }, + { + "epoch": 35.14, + "learning_rate": 4.560727611940299e-05, + "loss": 0.0004, + "step": 37672 + }, + { + "epoch": 35.15, + "learning_rate": 4.5606809701492544e-05, + "loss": 0.0002, + "step": 37676 + }, + { + "epoch": 35.15, + "learning_rate": 4.560634328358209e-05, + "loss": 0.0, + "step": 37680 + }, + { + "epoch": 35.15, + "learning_rate": 4.560587686567164e-05, + "loss": 0.0038, + "step": 37684 + }, + { + "epoch": 35.16, + "learning_rate": 4.5605410447761195e-05, + "loss": 0.0, + "step": 37688 + }, + { + "epoch": 35.16, + "learning_rate": 4.560494402985075e-05, + "loss": 0.0001, + "step": 37692 + }, + { + "epoch": 35.16, + "learning_rate": 4.56044776119403e-05, + "loss": 0.0, + "step": 37696 + }, + { + "epoch": 35.17, + "learning_rate": 4.560401119402985e-05, + "loss": 0.0001, + "step": 37700 + }, + { + "epoch": 35.17, + "learning_rate": 4.560354477611941e-05, + "loss": 0.0006, + "step": 37704 + }, + { + "epoch": 35.18, + "learning_rate": 4.5603078358208956e-05, + "loss": 0.0, + "step": 37708 + }, + { + "epoch": 35.18, + "learning_rate": 4.560261194029851e-05, + "loss": 0.0003, + "step": 37712 + }, + { + "epoch": 35.18, + "learning_rate": 4.560214552238806e-05, + "loss": 0.0001, + "step": 37716 + }, + { + "epoch": 35.19, + "learning_rate": 4.5601679104477614e-05, + "loss": 0.0001, + "step": 37720 + }, + { + "epoch": 35.19, + "learning_rate": 4.560121268656717e-05, + "loss": 0.0031, + "step": 37724 + }, + { + "epoch": 35.19, + "learning_rate": 4.560074626865672e-05, + "loss": 0.0024, + "step": 37728 + }, + { + "epoch": 35.2, + "learning_rate": 4.560027985074627e-05, + "loss": 0.0009, + "step": 37732 + }, + { + "epoch": 35.2, + "learning_rate": 4.559981343283583e-05, + "loss": 0.0, + "step": 37736 + }, + { + "epoch": 35.21, + "learning_rate": 4.5599347014925375e-05, + "loss": 0.0002, + "step": 37740 + }, + { + "epoch": 35.21, + "learning_rate": 4.5598880597014923e-05, + "loss": 0.0001, + "step": 37744 + }, + { + "epoch": 35.21, + "learning_rate": 4.559841417910448e-05, + "loss": 0.0006, + "step": 37748 + }, + { + "epoch": 35.22, + "learning_rate": 4.559794776119403e-05, + "loss": 0.0, + "step": 37752 + }, + { + "epoch": 35.22, + "learning_rate": 4.559748134328358e-05, + "loss": 0.0005, + "step": 37756 + }, + { + "epoch": 35.22, + "learning_rate": 4.5597014925373136e-05, + "loss": 0.0035, + "step": 37760 + }, + { + "epoch": 35.23, + "learning_rate": 4.559654850746269e-05, + "loss": 0.0001, + "step": 37764 + }, + { + "epoch": 35.23, + "learning_rate": 4.559608208955224e-05, + "loss": 0.0012, + "step": 37768 + }, + { + "epoch": 35.24, + "learning_rate": 4.5595615671641794e-05, + "loss": 0.0001, + "step": 37772 + }, + { + "epoch": 35.24, + "learning_rate": 4.559514925373134e-05, + "loss": 0.0022, + "step": 37776 + }, + { + "epoch": 35.24, + "learning_rate": 4.55946828358209e-05, + "loss": 0.0003, + "step": 37780 + }, + { + "epoch": 35.25, + "learning_rate": 4.559421641791045e-05, + "loss": 0.0002, + "step": 37784 + }, + { + "epoch": 35.25, + "learning_rate": 4.559375e-05, + "loss": 0.0003, + "step": 37788 + }, + { + "epoch": 35.25, + "learning_rate": 4.5593283582089555e-05, + "loss": 0.0008, + "step": 37792 + }, + { + "epoch": 35.26, + "learning_rate": 4.559281716417911e-05, + "loss": 0.0, + "step": 37796 + }, + { + "epoch": 35.26, + "learning_rate": 4.559235074626866e-05, + "loss": 0.0, + "step": 37800 + }, + { + "epoch": 35.26, + "learning_rate": 4.5591884328358206e-05, + "loss": 0.0, + "step": 37804 + }, + { + "epoch": 35.27, + "learning_rate": 4.559141791044776e-05, + "loss": 0.0002, + "step": 37808 + }, + { + "epoch": 35.27, + "learning_rate": 4.5590951492537316e-05, + "loss": 0.0001, + "step": 37812 + }, + { + "epoch": 35.28, + "learning_rate": 4.5590485074626864e-05, + "loss": 0.0009, + "step": 37816 + }, + { + "epoch": 35.28, + "learning_rate": 4.559001865671642e-05, + "loss": 0.0006, + "step": 37820 + }, + { + "epoch": 35.28, + "learning_rate": 4.5589552238805974e-05, + "loss": 0.0, + "step": 37824 + }, + { + "epoch": 35.29, + "learning_rate": 4.558908582089552e-05, + "loss": 0.0002, + "step": 37828 + }, + { + "epoch": 35.29, + "learning_rate": 4.558861940298508e-05, + "loss": 0.0002, + "step": 37832 + }, + { + "epoch": 35.29, + "learning_rate": 4.5588152985074625e-05, + "loss": 0.0002, + "step": 37836 + }, + { + "epoch": 35.3, + "learning_rate": 4.558768656716419e-05, + "loss": 0.0001, + "step": 37840 + }, + { + "epoch": 35.3, + "learning_rate": 4.5587220149253735e-05, + "loss": 0.0, + "step": 37844 + }, + { + "epoch": 35.31, + "learning_rate": 4.558675373134328e-05, + "loss": 0.0001, + "step": 37848 + }, + { + "epoch": 35.31, + "learning_rate": 4.558628731343284e-05, + "loss": 0.0001, + "step": 37852 + }, + { + "epoch": 35.31, + "learning_rate": 4.558582089552239e-05, + "loss": 0.0, + "step": 37856 + }, + { + "epoch": 35.32, + "learning_rate": 4.558535447761194e-05, + "loss": 0.0, + "step": 37860 + }, + { + "epoch": 35.32, + "learning_rate": 4.5584888059701496e-05, + "loss": 0.0001, + "step": 37864 + }, + { + "epoch": 35.32, + "learning_rate": 4.5584421641791044e-05, + "loss": 0.0, + "step": 37868 + }, + { + "epoch": 35.33, + "learning_rate": 4.55839552238806e-05, + "loss": 0.0004, + "step": 37872 + }, + { + "epoch": 35.33, + "learning_rate": 4.5583488805970154e-05, + "loss": 0.0001, + "step": 37876 + }, + { + "epoch": 35.34, + "learning_rate": 4.55830223880597e-05, + "loss": 0.0, + "step": 37880 + }, + { + "epoch": 35.34, + "learning_rate": 4.558255597014926e-05, + "loss": 0.0002, + "step": 37884 + }, + { + "epoch": 35.34, + "learning_rate": 4.558208955223881e-05, + "loss": 0.0, + "step": 37888 + }, + { + "epoch": 35.35, + "learning_rate": 4.558162313432836e-05, + "loss": 0.0, + "step": 37892 + }, + { + "epoch": 35.35, + "learning_rate": 4.558115671641791e-05, + "loss": 0.0001, + "step": 37896 + }, + { + "epoch": 35.35, + "learning_rate": 4.558069029850747e-05, + "loss": 0.0, + "step": 37900 + }, + { + "epoch": 35.36, + "learning_rate": 4.558022388059702e-05, + "loss": 0.0001, + "step": 37904 + }, + { + "epoch": 35.36, + "learning_rate": 4.5579757462686566e-05, + "loss": 0.0, + "step": 37908 + }, + { + "epoch": 35.37, + "learning_rate": 4.557929104477612e-05, + "loss": 0.0, + "step": 37912 + }, + { + "epoch": 35.37, + "learning_rate": 4.5578824626865676e-05, + "loss": 0.0004, + "step": 37916 + }, + { + "epoch": 35.37, + "learning_rate": 4.5578358208955224e-05, + "loss": 0.0003, + "step": 37920 + }, + { + "epoch": 35.38, + "learning_rate": 4.557789179104478e-05, + "loss": 0.0082, + "step": 37924 + }, + { + "epoch": 35.38, + "learning_rate": 4.557742537313433e-05, + "loss": 0.0001, + "step": 37928 + }, + { + "epoch": 35.38, + "learning_rate": 4.557695895522388e-05, + "loss": 0.0007, + "step": 37932 + }, + { + "epoch": 35.39, + "learning_rate": 4.557649253731344e-05, + "loss": 0.0001, + "step": 37936 + }, + { + "epoch": 35.39, + "learning_rate": 4.5576026119402985e-05, + "loss": 0.0013, + "step": 37940 + }, + { + "epoch": 35.4, + "learning_rate": 4.557555970149254e-05, + "loss": 0.0005, + "step": 37944 + }, + { + "epoch": 35.4, + "learning_rate": 4.5575093283582095e-05, + "loss": 0.0002, + "step": 37948 + }, + { + "epoch": 35.4, + "learning_rate": 4.557462686567164e-05, + "loss": 0.0001, + "step": 37952 + }, + { + "epoch": 35.41, + "learning_rate": 4.557416044776119e-05, + "loss": 0.0, + "step": 37956 + }, + { + "epoch": 35.41, + "learning_rate": 4.557369402985075e-05, + "loss": 0.0006, + "step": 37960 + }, + { + "epoch": 35.41, + "learning_rate": 4.55732276119403e-05, + "loss": 0.0, + "step": 37964 + }, + { + "epoch": 35.42, + "learning_rate": 4.557276119402985e-05, + "loss": 0.0001, + "step": 37968 + }, + { + "epoch": 35.42, + "learning_rate": 4.5572294776119404e-05, + "loss": 0.0, + "step": 37972 + }, + { + "epoch": 35.43, + "learning_rate": 4.557182835820896e-05, + "loss": 0.0014, + "step": 37976 + }, + { + "epoch": 35.43, + "learning_rate": 4.557136194029851e-05, + "loss": 0.0, + "step": 37980 + }, + { + "epoch": 35.43, + "learning_rate": 4.557089552238806e-05, + "loss": 0.0019, + "step": 37984 + }, + { + "epoch": 35.44, + "learning_rate": 4.557042910447761e-05, + "loss": 0.0021, + "step": 37988 + }, + { + "epoch": 35.44, + "learning_rate": 4.5569962686567165e-05, + "loss": 0.0, + "step": 37992 + }, + { + "epoch": 35.44, + "learning_rate": 4.556949626865672e-05, + "loss": 0.0, + "step": 37996 + }, + { + "epoch": 35.45, + "learning_rate": 4.556902985074627e-05, + "loss": 0.0001, + "step": 38000 + }, + { + "epoch": 35.45, + "eval_exact_match": 0.7427466150870407, + "eval_exec": 0.7717601547388782, + "eval_loss": 0.43170931935310364, + "eval_runtime": 1115.1121, + "eval_samples_per_second": 0.927, + "step": 38000 + }, + { + "epoch": 35.45, + "learning_rate": 4.556856343283582e-05, + "loss": 0.001, + "step": 38004 + }, + { + "epoch": 35.46, + "learning_rate": 4.556809701492538e-05, + "loss": 0.0019, + "step": 38008 + }, + { + "epoch": 35.46, + "learning_rate": 4.5567630597014926e-05, + "loss": 0.001, + "step": 38012 + }, + { + "epoch": 35.46, + "learning_rate": 4.556716417910448e-05, + "loss": 0.0006, + "step": 38016 + }, + { + "epoch": 35.47, + "learning_rate": 4.5566697761194036e-05, + "loss": 0.0, + "step": 38020 + }, + { + "epoch": 35.47, + "learning_rate": 4.5566231343283584e-05, + "loss": 0.0018, + "step": 38024 + }, + { + "epoch": 35.47, + "learning_rate": 4.556576492537314e-05, + "loss": 0.0001, + "step": 38028 + }, + { + "epoch": 35.48, + "learning_rate": 4.556529850746269e-05, + "loss": 0.0003, + "step": 38032 + }, + { + "epoch": 35.48, + "learning_rate": 4.556483208955224e-05, + "loss": 0.0003, + "step": 38036 + }, + { + "epoch": 35.49, + "learning_rate": 4.55643656716418e-05, + "loss": 0.0, + "step": 38040 + }, + { + "epoch": 35.49, + "learning_rate": 4.5563899253731345e-05, + "loss": 0.0, + "step": 38044 + }, + { + "epoch": 35.49, + "learning_rate": 4.556343283582089e-05, + "loss": 0.0, + "step": 38048 + }, + { + "epoch": 35.5, + "learning_rate": 4.5562966417910455e-05, + "loss": 0.0004, + "step": 38052 + }, + { + "epoch": 35.5, + "learning_rate": 4.55625e-05, + "loss": 0.0002, + "step": 38056 + }, + { + "epoch": 35.5, + "learning_rate": 4.556203358208955e-05, + "loss": 0.0001, + "step": 38060 + }, + { + "epoch": 35.51, + "learning_rate": 4.5561567164179106e-05, + "loss": 0.0, + "step": 38064 + }, + { + "epoch": 35.51, + "learning_rate": 4.556110074626866e-05, + "loss": 0.0002, + "step": 38068 + }, + { + "epoch": 35.51, + "learning_rate": 4.556063432835821e-05, + "loss": 0.0, + "step": 38072 + }, + { + "epoch": 35.52, + "learning_rate": 4.5560167910447764e-05, + "loss": 0.0011, + "step": 38076 + }, + { + "epoch": 35.52, + "learning_rate": 4.555970149253732e-05, + "loss": 0.0, + "step": 38080 + }, + { + "epoch": 35.53, + "learning_rate": 4.555923507462687e-05, + "loss": 0.0002, + "step": 38084 + }, + { + "epoch": 35.53, + "learning_rate": 4.555876865671642e-05, + "loss": 0.0001, + "step": 38088 + }, + { + "epoch": 35.53, + "learning_rate": 4.555830223880597e-05, + "loss": 0.0004, + "step": 38092 + }, + { + "epoch": 35.54, + "learning_rate": 4.5557835820895525e-05, + "loss": 0.0011, + "step": 38096 + }, + { + "epoch": 35.54, + "learning_rate": 4.555736940298508e-05, + "loss": 0.0, + "step": 38100 + }, + { + "epoch": 35.54, + "learning_rate": 4.555690298507463e-05, + "loss": 0.0001, + "step": 38104 + }, + { + "epoch": 35.55, + "learning_rate": 4.5556436567164176e-05, + "loss": 0.0011, + "step": 38108 + }, + { + "epoch": 35.55, + "learning_rate": 4.555597014925374e-05, + "loss": 0.0001, + "step": 38112 + }, + { + "epoch": 35.56, + "learning_rate": 4.5555503731343286e-05, + "loss": 0.0, + "step": 38116 + }, + { + "epoch": 35.56, + "learning_rate": 4.5555037313432834e-05, + "loss": 0.0002, + "step": 38120 + }, + { + "epoch": 35.56, + "learning_rate": 4.555457089552239e-05, + "loss": 0.0009, + "step": 38124 + }, + { + "epoch": 35.57, + "learning_rate": 4.5554104477611944e-05, + "loss": 0.0007, + "step": 38128 + }, + { + "epoch": 35.57, + "learning_rate": 4.555363805970149e-05, + "loss": 0.0001, + "step": 38132 + }, + { + "epoch": 35.57, + "learning_rate": 4.555317164179105e-05, + "loss": 0.0, + "step": 38136 + }, + { + "epoch": 35.58, + "learning_rate": 4.55527052238806e-05, + "loss": 0.0003, + "step": 38140 + }, + { + "epoch": 35.58, + "learning_rate": 4.555223880597015e-05, + "loss": 0.0007, + "step": 38144 + }, + { + "epoch": 35.59, + "learning_rate": 4.5551772388059705e-05, + "loss": 0.0008, + "step": 38148 + }, + { + "epoch": 35.59, + "learning_rate": 4.555130597014925e-05, + "loss": 0.0, + "step": 38152 + }, + { + "epoch": 35.59, + "learning_rate": 4.555083955223881e-05, + "loss": 0.0138, + "step": 38156 + }, + { + "epoch": 35.6, + "learning_rate": 4.555037313432836e-05, + "loss": 0.0019, + "step": 38160 + }, + { + "epoch": 35.6, + "learning_rate": 4.554990671641791e-05, + "loss": 0.0, + "step": 38164 + }, + { + "epoch": 35.6, + "learning_rate": 4.5549440298507466e-05, + "loss": 0.0026, + "step": 38168 + }, + { + "epoch": 35.61, + "learning_rate": 4.554897388059702e-05, + "loss": 0.0, + "step": 38172 + }, + { + "epoch": 35.61, + "learning_rate": 4.554850746268657e-05, + "loss": 0.0, + "step": 38176 + }, + { + "epoch": 35.62, + "learning_rate": 4.5548041044776124e-05, + "loss": 0.0003, + "step": 38180 + }, + { + "epoch": 35.62, + "learning_rate": 4.554757462686567e-05, + "loss": 0.0006, + "step": 38184 + }, + { + "epoch": 35.62, + "learning_rate": 4.554710820895523e-05, + "loss": 0.0007, + "step": 38188 + }, + { + "epoch": 35.63, + "learning_rate": 4.554664179104478e-05, + "loss": 0.001, + "step": 38192 + }, + { + "epoch": 35.63, + "learning_rate": 4.554617537313433e-05, + "loss": 0.0048, + "step": 38196 + }, + { + "epoch": 35.63, + "learning_rate": 4.554570895522388e-05, + "loss": 0.0004, + "step": 38200 + }, + { + "epoch": 35.64, + "learning_rate": 4.554524253731344e-05, + "loss": 0.0, + "step": 38204 + }, + { + "epoch": 35.64, + "learning_rate": 4.554477611940299e-05, + "loss": 0.0025, + "step": 38208 + }, + { + "epoch": 35.65, + "learning_rate": 4.5544309701492536e-05, + "loss": 0.0042, + "step": 38212 + }, + { + "epoch": 35.65, + "learning_rate": 4.554384328358209e-05, + "loss": 0.0001, + "step": 38216 + }, + { + "epoch": 35.65, + "learning_rate": 4.5543376865671646e-05, + "loss": 0.0001, + "step": 38220 + }, + { + "epoch": 35.66, + "learning_rate": 4.5542910447761194e-05, + "loss": 0.0016, + "step": 38224 + }, + { + "epoch": 35.66, + "learning_rate": 4.554244402985075e-05, + "loss": 0.0008, + "step": 38228 + }, + { + "epoch": 35.66, + "learning_rate": 4.5541977611940304e-05, + "loss": 0.0003, + "step": 38232 + }, + { + "epoch": 35.67, + "learning_rate": 4.554151119402985e-05, + "loss": 0.0009, + "step": 38236 + }, + { + "epoch": 35.67, + "learning_rate": 4.554104477611941e-05, + "loss": 0.0, + "step": 38240 + }, + { + "epoch": 35.68, + "learning_rate": 4.5540578358208955e-05, + "loss": 0.0, + "step": 38244 + }, + { + "epoch": 35.68, + "learning_rate": 4.554011194029851e-05, + "loss": 0.0001, + "step": 38248 + }, + { + "epoch": 35.68, + "learning_rate": 4.5539645522388065e-05, + "loss": 0.0003, + "step": 38252 + }, + { + "epoch": 35.69, + "learning_rate": 4.553917910447761e-05, + "loss": 0.0004, + "step": 38256 + }, + { + "epoch": 35.69, + "learning_rate": 4.553871268656716e-05, + "loss": 0.0001, + "step": 38260 + }, + { + "epoch": 35.69, + "learning_rate": 4.553824626865672e-05, + "loss": 0.0, + "step": 38264 + }, + { + "epoch": 35.7, + "learning_rate": 4.553777985074627e-05, + "loss": 0.0001, + "step": 38268 + }, + { + "epoch": 35.7, + "learning_rate": 4.553731343283582e-05, + "loss": 0.0001, + "step": 38272 + }, + { + "epoch": 35.71, + "learning_rate": 4.5536847014925374e-05, + "loss": 0.0, + "step": 38276 + }, + { + "epoch": 35.71, + "learning_rate": 4.553638059701493e-05, + "loss": 0.0005, + "step": 38280 + }, + { + "epoch": 35.71, + "learning_rate": 4.553591417910448e-05, + "loss": 0.0001, + "step": 38284 + }, + { + "epoch": 35.72, + "learning_rate": 4.553544776119403e-05, + "loss": 0.0013, + "step": 38288 + }, + { + "epoch": 35.72, + "learning_rate": 4.553498134328359e-05, + "loss": 0.0002, + "step": 38292 + }, + { + "epoch": 35.72, + "learning_rate": 4.5534514925373135e-05, + "loss": 0.0001, + "step": 38296 + }, + { + "epoch": 35.73, + "learning_rate": 4.553404850746269e-05, + "loss": 0.0001, + "step": 38300 + }, + { + "epoch": 35.73, + "learning_rate": 4.553358208955224e-05, + "loss": 0.0, + "step": 38304 + }, + { + "epoch": 35.73, + "learning_rate": 4.553311567164179e-05, + "loss": 0.0002, + "step": 38308 + }, + { + "epoch": 35.74, + "learning_rate": 4.553264925373135e-05, + "loss": 0.0036, + "step": 38312 + }, + { + "epoch": 35.74, + "learning_rate": 4.5532182835820896e-05, + "loss": 0.0001, + "step": 38316 + }, + { + "epoch": 35.75, + "learning_rate": 4.5531716417910444e-05, + "loss": 0.0002, + "step": 38320 + }, + { + "epoch": 35.75, + "learning_rate": 4.5531250000000006e-05, + "loss": 0.0003, + "step": 38324 + }, + { + "epoch": 35.75, + "learning_rate": 4.5530783582089554e-05, + "loss": 0.0, + "step": 38328 + }, + { + "epoch": 35.76, + "learning_rate": 4.553031716417911e-05, + "loss": 0.0006, + "step": 38332 + }, + { + "epoch": 35.76, + "learning_rate": 4.552985074626866e-05, + "loss": 0.0, + "step": 38336 + }, + { + "epoch": 35.76, + "learning_rate": 4.552938432835821e-05, + "loss": 0.0001, + "step": 38340 + }, + { + "epoch": 35.77, + "learning_rate": 4.5528917910447767e-05, + "loss": 0.0, + "step": 38344 + }, + { + "epoch": 35.77, + "learning_rate": 4.5528451492537315e-05, + "loss": 0.0028, + "step": 38348 + }, + { + "epoch": 35.78, + "learning_rate": 4.552798507462687e-05, + "loss": 0.0001, + "step": 38352 + }, + { + "epoch": 35.78, + "learning_rate": 4.5527518656716425e-05, + "loss": 0.0001, + "step": 38356 + }, + { + "epoch": 35.78, + "learning_rate": 4.552705223880597e-05, + "loss": 0.0007, + "step": 38360 + }, + { + "epoch": 35.79, + "learning_rate": 4.552658582089552e-05, + "loss": 0.0001, + "step": 38364 + }, + { + "epoch": 35.79, + "learning_rate": 4.5526119402985076e-05, + "loss": 0.0001, + "step": 38368 + }, + { + "epoch": 35.79, + "learning_rate": 4.552565298507463e-05, + "loss": 0.0001, + "step": 38372 + }, + { + "epoch": 35.8, + "learning_rate": 4.552518656716418e-05, + "loss": 0.0002, + "step": 38376 + }, + { + "epoch": 35.8, + "learning_rate": 4.5524720149253734e-05, + "loss": 0.0034, + "step": 38380 + }, + { + "epoch": 35.81, + "learning_rate": 4.552425373134329e-05, + "loss": 0.0, + "step": 38384 + }, + { + "epoch": 35.81, + "learning_rate": 4.552378731343284e-05, + "loss": 0.0005, + "step": 38388 + }, + { + "epoch": 35.81, + "learning_rate": 4.552332089552239e-05, + "loss": 0.0, + "step": 38392 + }, + { + "epoch": 35.82, + "learning_rate": 4.552285447761194e-05, + "loss": 0.0001, + "step": 38396 + }, + { + "epoch": 35.82, + "learning_rate": 4.5522388059701495e-05, + "loss": 0.0, + "step": 38400 + }, + { + "epoch": 35.82, + "learning_rate": 4.552192164179105e-05, + "loss": 0.0, + "step": 38404 + }, + { + "epoch": 35.83, + "learning_rate": 4.55214552238806e-05, + "loss": 0.0, + "step": 38408 + }, + { + "epoch": 35.83, + "learning_rate": 4.552098880597015e-05, + "loss": 0.0026, + "step": 38412 + }, + { + "epoch": 35.84, + "learning_rate": 4.552052238805971e-05, + "loss": 0.0008, + "step": 38416 + }, + { + "epoch": 35.84, + "learning_rate": 4.5520055970149256e-05, + "loss": 0.0, + "step": 38420 + }, + { + "epoch": 35.84, + "learning_rate": 4.5519589552238804e-05, + "loss": 0.0, + "step": 38424 + }, + { + "epoch": 35.85, + "learning_rate": 4.551912313432836e-05, + "loss": 0.0, + "step": 38428 + }, + { + "epoch": 35.85, + "learning_rate": 4.5518656716417914e-05, + "loss": 0.0002, + "step": 38432 + }, + { + "epoch": 35.85, + "learning_rate": 4.551819029850746e-05, + "loss": 0.0, + "step": 38436 + }, + { + "epoch": 35.86, + "learning_rate": 4.5517723880597017e-05, + "loss": 0.0011, + "step": 38440 + }, + { + "epoch": 35.86, + "learning_rate": 4.551725746268657e-05, + "loss": 0.0001, + "step": 38444 + }, + { + "epoch": 35.87, + "learning_rate": 4.551679104477612e-05, + "loss": 0.0, + "step": 38448 + }, + { + "epoch": 35.87, + "learning_rate": 4.5516324626865675e-05, + "loss": 0.0001, + "step": 38452 + }, + { + "epoch": 35.87, + "learning_rate": 4.551585820895522e-05, + "loss": 0.0003, + "step": 38456 + }, + { + "epoch": 35.88, + "learning_rate": 4.551539179104478e-05, + "loss": 0.0002, + "step": 38460 + }, + { + "epoch": 35.88, + "learning_rate": 4.551492537313433e-05, + "loss": 0.0004, + "step": 38464 + }, + { + "epoch": 35.88, + "learning_rate": 4.551445895522388e-05, + "loss": 0.0, + "step": 38468 + }, + { + "epoch": 35.89, + "learning_rate": 4.5513992537313436e-05, + "loss": 0.0001, + "step": 38472 + }, + { + "epoch": 35.89, + "learning_rate": 4.551352611940299e-05, + "loss": 0.0, + "step": 38476 + }, + { + "epoch": 35.9, + "learning_rate": 4.551305970149254e-05, + "loss": 0.0, + "step": 38480 + }, + { + "epoch": 35.9, + "learning_rate": 4.551259328358209e-05, + "loss": 0.0001, + "step": 38484 + }, + { + "epoch": 35.9, + "learning_rate": 4.551212686567164e-05, + "loss": 0.0001, + "step": 38488 + }, + { + "epoch": 35.91, + "learning_rate": 4.5511660447761197e-05, + "loss": 0.0011, + "step": 38492 + }, + { + "epoch": 35.91, + "learning_rate": 4.551119402985075e-05, + "loss": 0.0003, + "step": 38496 + }, + { + "epoch": 35.91, + "learning_rate": 4.55107276119403e-05, + "loss": 0.0001, + "step": 38500 + }, + { + "epoch": 35.91, + "eval_exact_match": 0.7543520309477756, + "eval_exec": 0.7794970986460348, + "eval_loss": 0.45771145820617676, + "eval_runtime": 1150.8415, + "eval_samples_per_second": 0.898, + "step": 38500 + }, + { + "epoch": 35.92, + "learning_rate": 4.5510261194029854e-05, + "loss": 0.0, + "step": 38504 + }, + { + "epoch": 35.92, + "learning_rate": 4.550979477611941e-05, + "loss": 0.0, + "step": 38508 + }, + { + "epoch": 35.93, + "learning_rate": 4.550932835820896e-05, + "loss": 0.0005, + "step": 38512 + }, + { + "epoch": 35.93, + "learning_rate": 4.5508861940298506e-05, + "loss": 0.0, + "step": 38516 + }, + { + "epoch": 35.93, + "learning_rate": 4.550839552238807e-05, + "loss": 0.0001, + "step": 38520 + }, + { + "epoch": 35.94, + "learning_rate": 4.5507929104477615e-05, + "loss": 0.0026, + "step": 38524 + }, + { + "epoch": 35.94, + "learning_rate": 4.5507462686567164e-05, + "loss": 0.0004, + "step": 38528 + }, + { + "epoch": 35.94, + "learning_rate": 4.550699626865672e-05, + "loss": 0.0002, + "step": 38532 + }, + { + "epoch": 35.95, + "learning_rate": 4.550652985074627e-05, + "loss": 0.0, + "step": 38536 + }, + { + "epoch": 35.95, + "learning_rate": 4.550606343283582e-05, + "loss": 0.0, + "step": 38540 + }, + { + "epoch": 35.96, + "learning_rate": 4.5505597014925376e-05, + "loss": 0.0015, + "step": 38544 + }, + { + "epoch": 35.96, + "learning_rate": 4.5505130597014925e-05, + "loss": 0.0002, + "step": 38548 + }, + { + "epoch": 35.96, + "learning_rate": 4.550466417910448e-05, + "loss": 0.0001, + "step": 38552 + }, + { + "epoch": 35.97, + "learning_rate": 4.5504197761194034e-05, + "loss": 0.0001, + "step": 38556 + }, + { + "epoch": 35.97, + "learning_rate": 4.550373134328358e-05, + "loss": 0.0, + "step": 38560 + }, + { + "epoch": 35.97, + "learning_rate": 4.550326492537314e-05, + "loss": 0.0, + "step": 38564 + }, + { + "epoch": 35.98, + "learning_rate": 4.550279850746269e-05, + "loss": 0.0002, + "step": 38568 + }, + { + "epoch": 35.98, + "learning_rate": 4.550233208955224e-05, + "loss": 0.0, + "step": 38572 + }, + { + "epoch": 35.98, + "learning_rate": 4.550186567164179e-05, + "loss": 0.0, + "step": 38576 + }, + { + "epoch": 35.99, + "learning_rate": 4.550139925373135e-05, + "loss": 0.0055, + "step": 38580 + }, + { + "epoch": 35.99, + "learning_rate": 4.55009328358209e-05, + "loss": 0.0001, + "step": 38584 + }, + { + "epoch": 36.0, + "learning_rate": 4.5500466417910447e-05, + "loss": 0.0002, + "step": 38588 + }, + { + "epoch": 36.0, + "learning_rate": 4.55e-05, + "loss": 0.0001, + "step": 38592 + }, + { + "epoch": 36.0, + "learning_rate": 4.5499533582089556e-05, + "loss": 0.0011, + "step": 38596 + }, + { + "epoch": 36.01, + "learning_rate": 4.5499067164179104e-05, + "loss": 0.0008, + "step": 38600 + }, + { + "epoch": 36.01, + "learning_rate": 4.549860074626866e-05, + "loss": 0.0, + "step": 38604 + }, + { + "epoch": 36.01, + "learning_rate": 4.549813432835821e-05, + "loss": 0.0, + "step": 38608 + }, + { + "epoch": 36.02, + "learning_rate": 4.549766791044776e-05, + "loss": 0.0, + "step": 38612 + }, + { + "epoch": 36.02, + "learning_rate": 4.549720149253732e-05, + "loss": 0.0001, + "step": 38616 + }, + { + "epoch": 36.03, + "learning_rate": 4.5496735074626865e-05, + "loss": 0.0002, + "step": 38620 + }, + { + "epoch": 36.03, + "learning_rate": 4.549626865671642e-05, + "loss": 0.0, + "step": 38624 + }, + { + "epoch": 36.03, + "learning_rate": 4.5495802238805975e-05, + "loss": 0.0008, + "step": 38628 + }, + { + "epoch": 36.04, + "learning_rate": 4.5495335820895523e-05, + "loss": 0.0006, + "step": 38632 + }, + { + "epoch": 36.04, + "learning_rate": 4.549486940298507e-05, + "loss": 0.0002, + "step": 38636 + }, + { + "epoch": 36.04, + "learning_rate": 4.549440298507463e-05, + "loss": 0.0, + "step": 38640 + }, + { + "epoch": 36.05, + "learning_rate": 4.549393656716418e-05, + "loss": 0.0, + "step": 38644 + }, + { + "epoch": 36.05, + "learning_rate": 4.5493470149253736e-05, + "loss": 0.0001, + "step": 38648 + }, + { + "epoch": 36.06, + "learning_rate": 4.5493003731343284e-05, + "loss": 0.0, + "step": 38652 + }, + { + "epoch": 36.06, + "learning_rate": 4.549253731343284e-05, + "loss": 0.0, + "step": 38656 + }, + { + "epoch": 36.06, + "learning_rate": 4.5492070895522394e-05, + "loss": 0.0, + "step": 38660 + }, + { + "epoch": 36.07, + "learning_rate": 4.549160447761194e-05, + "loss": 0.0006, + "step": 38664 + }, + { + "epoch": 36.07, + "learning_rate": 4.549113805970149e-05, + "loss": 0.0, + "step": 38668 + }, + { + "epoch": 36.07, + "learning_rate": 4.549067164179105e-05, + "loss": 0.0003, + "step": 38672 + }, + { + "epoch": 36.08, + "learning_rate": 4.54902052238806e-05, + "loss": 0.0006, + "step": 38676 + }, + { + "epoch": 36.08, + "learning_rate": 4.548973880597015e-05, + "loss": 0.0, + "step": 38680 + }, + { + "epoch": 36.09, + "learning_rate": 4.54892723880597e-05, + "loss": 0.0, + "step": 38684 + }, + { + "epoch": 36.09, + "learning_rate": 4.548880597014926e-05, + "loss": 0.001, + "step": 38688 + }, + { + "epoch": 36.09, + "learning_rate": 4.5488339552238806e-05, + "loss": 0.0001, + "step": 38692 + }, + { + "epoch": 36.1, + "learning_rate": 4.548787313432836e-05, + "loss": 0.0, + "step": 38696 + }, + { + "epoch": 36.1, + "learning_rate": 4.5487406716417916e-05, + "loss": 0.0014, + "step": 38700 + }, + { + "epoch": 36.1, + "learning_rate": 4.5486940298507464e-05, + "loss": 0.0001, + "step": 38704 + }, + { + "epoch": 36.11, + "learning_rate": 4.548647388059702e-05, + "loss": 0.0002, + "step": 38708 + }, + { + "epoch": 36.11, + "learning_rate": 4.548600746268657e-05, + "loss": 0.0002, + "step": 38712 + }, + { + "epoch": 36.12, + "learning_rate": 4.548554104477612e-05, + "loss": 0.0001, + "step": 38716 + }, + { + "epoch": 36.12, + "learning_rate": 4.548507462686568e-05, + "loss": 0.0017, + "step": 38720 + }, + { + "epoch": 36.12, + "learning_rate": 4.5484608208955225e-05, + "loss": 0.0003, + "step": 38724 + }, + { + "epoch": 36.13, + "learning_rate": 4.5484141791044773e-05, + "loss": 0.0176, + "step": 38728 + }, + { + "epoch": 36.13, + "learning_rate": 4.5483675373134335e-05, + "loss": 0.0012, + "step": 38732 + }, + { + "epoch": 36.13, + "learning_rate": 4.548320895522388e-05, + "loss": 0.0001, + "step": 38736 + }, + { + "epoch": 36.14, + "learning_rate": 4.548274253731343e-05, + "loss": 0.0043, + "step": 38740 + }, + { + "epoch": 36.14, + "learning_rate": 4.5482276119402986e-05, + "loss": 0.0004, + "step": 38744 + }, + { + "epoch": 36.15, + "learning_rate": 4.548180970149254e-05, + "loss": 0.0002, + "step": 38748 + }, + { + "epoch": 36.15, + "learning_rate": 4.548134328358209e-05, + "loss": 0.0001, + "step": 38752 + }, + { + "epoch": 36.15, + "learning_rate": 4.5480876865671644e-05, + "loss": 0.0, + "step": 38756 + }, + { + "epoch": 36.16, + "learning_rate": 4.54804104477612e-05, + "loss": 0.0, + "step": 38760 + }, + { + "epoch": 36.16, + "learning_rate": 4.547994402985075e-05, + "loss": 0.0, + "step": 38764 + }, + { + "epoch": 36.16, + "learning_rate": 4.54794776119403e-05, + "loss": 0.0251, + "step": 38768 + }, + { + "epoch": 36.17, + "learning_rate": 4.547901119402985e-05, + "loss": 0.001, + "step": 38772 + }, + { + "epoch": 36.17, + "learning_rate": 4.5478544776119405e-05, + "loss": 0.0011, + "step": 38776 + }, + { + "epoch": 36.18, + "learning_rate": 4.547807835820896e-05, + "loss": 0.0, + "step": 38780 + }, + { + "epoch": 36.18, + "learning_rate": 4.547761194029851e-05, + "loss": 0.0, + "step": 38784 + }, + { + "epoch": 36.18, + "learning_rate": 4.5477145522388056e-05, + "loss": 0.0001, + "step": 38788 + }, + { + "epoch": 36.19, + "learning_rate": 4.547667910447762e-05, + "loss": 0.0, + "step": 38792 + }, + { + "epoch": 36.19, + "learning_rate": 4.5476212686567166e-05, + "loss": 0.0001, + "step": 38796 + }, + { + "epoch": 36.19, + "learning_rate": 4.5475746268656714e-05, + "loss": 0.0001, + "step": 38800 + }, + { + "epoch": 36.2, + "learning_rate": 4.547527985074627e-05, + "loss": 0.0001, + "step": 38804 + }, + { + "epoch": 36.2, + "learning_rate": 4.5474813432835824e-05, + "loss": 0.0001, + "step": 38808 + }, + { + "epoch": 36.21, + "learning_rate": 4.547434701492538e-05, + "loss": 0.0001, + "step": 38812 + }, + { + "epoch": 36.21, + "learning_rate": 4.547388059701493e-05, + "loss": 0.0001, + "step": 38816 + }, + { + "epoch": 36.21, + "learning_rate": 4.547341417910448e-05, + "loss": 0.0004, + "step": 38820 + }, + { + "epoch": 36.22, + "learning_rate": 4.547294776119404e-05, + "loss": 0.001, + "step": 38824 + }, + { + "epoch": 36.22, + "learning_rate": 4.5472481343283585e-05, + "loss": 0.0011, + "step": 38828 + }, + { + "epoch": 36.22, + "learning_rate": 4.547201492537313e-05, + "loss": 0.001, + "step": 38832 + }, + { + "epoch": 36.23, + "learning_rate": 4.547154850746269e-05, + "loss": 0.0, + "step": 38836 + }, + { + "epoch": 36.23, + "learning_rate": 4.547108208955224e-05, + "loss": 0.0002, + "step": 38840 + }, + { + "epoch": 36.24, + "learning_rate": 4.547061567164179e-05, + "loss": 0.0001, + "step": 38844 + }, + { + "epoch": 36.24, + "learning_rate": 4.5470149253731346e-05, + "loss": 0.0001, + "step": 38848 + }, + { + "epoch": 36.24, + "learning_rate": 4.54696828358209e-05, + "loss": 0.0035, + "step": 38852 + }, + { + "epoch": 36.25, + "learning_rate": 4.546921641791045e-05, + "loss": 0.0001, + "step": 38856 + }, + { + "epoch": 36.25, + "learning_rate": 4.5468750000000004e-05, + "loss": 0.0008, + "step": 38860 + }, + { + "epoch": 36.25, + "learning_rate": 4.546828358208955e-05, + "loss": 0.0001, + "step": 38864 + }, + { + "epoch": 36.26, + "learning_rate": 4.546781716417911e-05, + "loss": 0.0001, + "step": 38868 + }, + { + "epoch": 36.26, + "learning_rate": 4.546735074626866e-05, + "loss": 0.0006, + "step": 38872 + }, + { + "epoch": 36.26, + "learning_rate": 4.546688432835821e-05, + "loss": 0.0001, + "step": 38876 + }, + { + "epoch": 36.27, + "learning_rate": 4.5466417910447765e-05, + "loss": 0.0, + "step": 38880 + }, + { + "epoch": 36.27, + "learning_rate": 4.546595149253732e-05, + "loss": 0.0008, + "step": 38884 + }, + { + "epoch": 36.28, + "learning_rate": 4.546548507462687e-05, + "loss": 0.0001, + "step": 38888 + }, + { + "epoch": 36.28, + "learning_rate": 4.5465018656716416e-05, + "loss": 0.0017, + "step": 38892 + }, + { + "epoch": 36.28, + "learning_rate": 4.546455223880597e-05, + "loss": 0.0, + "step": 38896 + }, + { + "epoch": 36.29, + "learning_rate": 4.5464085820895526e-05, + "loss": 0.0043, + "step": 38900 + }, + { + "epoch": 36.29, + "learning_rate": 4.5463619402985074e-05, + "loss": 0.0006, + "step": 38904 + }, + { + "epoch": 36.29, + "learning_rate": 4.546315298507463e-05, + "loss": 0.0, + "step": 38908 + }, + { + "epoch": 36.3, + "learning_rate": 4.5462686567164184e-05, + "loss": 0.0001, + "step": 38912 + }, + { + "epoch": 36.3, + "learning_rate": 4.546222014925373e-05, + "loss": 0.0014, + "step": 38916 + }, + { + "epoch": 36.31, + "learning_rate": 4.546175373134329e-05, + "loss": 0.0001, + "step": 38920 + }, + { + "epoch": 36.31, + "learning_rate": 4.5461287313432835e-05, + "loss": 0.0006, + "step": 38924 + }, + { + "epoch": 36.31, + "learning_rate": 4.546082089552239e-05, + "loss": 0.0004, + "step": 38928 + }, + { + "epoch": 36.32, + "learning_rate": 4.5460354477611945e-05, + "loss": 0.0022, + "step": 38932 + }, + { + "epoch": 36.32, + "learning_rate": 4.545988805970149e-05, + "loss": 0.0, + "step": 38936 + }, + { + "epoch": 36.32, + "learning_rate": 4.545942164179104e-05, + "loss": 0.0, + "step": 38940 + }, + { + "epoch": 36.33, + "learning_rate": 4.54589552238806e-05, + "loss": 0.0007, + "step": 38944 + }, + { + "epoch": 36.33, + "learning_rate": 4.545848880597015e-05, + "loss": 0.0001, + "step": 38948 + }, + { + "epoch": 36.34, + "learning_rate": 4.54580223880597e-05, + "loss": 0.0027, + "step": 38952 + }, + { + "epoch": 36.34, + "learning_rate": 4.5457555970149254e-05, + "loss": 0.0004, + "step": 38956 + }, + { + "epoch": 36.34, + "learning_rate": 4.545708955223881e-05, + "loss": 0.0001, + "step": 38960 + }, + { + "epoch": 36.35, + "learning_rate": 4.545662313432836e-05, + "loss": 0.0001, + "step": 38964 + }, + { + "epoch": 36.35, + "learning_rate": 4.545615671641791e-05, + "loss": 0.0035, + "step": 38968 + }, + { + "epoch": 36.35, + "learning_rate": 4.545569029850747e-05, + "loss": 0.0002, + "step": 38972 + }, + { + "epoch": 36.36, + "learning_rate": 4.545522388059702e-05, + "loss": 0.0072, + "step": 38976 + }, + { + "epoch": 36.36, + "learning_rate": 4.545475746268657e-05, + "loss": 0.0, + "step": 38980 + }, + { + "epoch": 36.37, + "learning_rate": 4.545429104477612e-05, + "loss": 0.0, + "step": 38984 + }, + { + "epoch": 36.37, + "learning_rate": 4.545382462686568e-05, + "loss": 0.0001, + "step": 38988 + }, + { + "epoch": 36.37, + "learning_rate": 4.545335820895523e-05, + "loss": 0.0007, + "step": 38992 + }, + { + "epoch": 36.38, + "learning_rate": 4.5452891791044776e-05, + "loss": 0.0015, + "step": 38996 + }, + { + "epoch": 36.38, + "learning_rate": 4.545242537313433e-05, + "loss": 0.0001, + "step": 39000 + }, + { + "epoch": 36.38, + "eval_exact_match": 0.7456479690522244, + "eval_exec": 0.769825918762089, + "eval_loss": 0.4195860028266907, + "eval_runtime": 1174.4998, + "eval_samples_per_second": 0.88, + "step": 39000 + }, + { + "epoch": 36.38, + "learning_rate": 4.5451958955223886e-05, + "loss": 0.0005, + "step": 39004 + }, + { + "epoch": 36.39, + "learning_rate": 4.5451492537313434e-05, + "loss": 0.0, + "step": 39008 + }, + { + "epoch": 36.39, + "learning_rate": 4.545102611940299e-05, + "loss": 0.0001, + "step": 39012 + }, + { + "epoch": 36.4, + "learning_rate": 4.545055970149254e-05, + "loss": 0.0, + "step": 39016 + }, + { + "epoch": 36.4, + "learning_rate": 4.545009328358209e-05, + "loss": 0.0003, + "step": 39020 + }, + { + "epoch": 36.4, + "learning_rate": 4.544962686567165e-05, + "loss": 0.0, + "step": 39024 + }, + { + "epoch": 36.41, + "learning_rate": 4.5449160447761195e-05, + "loss": 0.0001, + "step": 39028 + }, + { + "epoch": 36.41, + "learning_rate": 4.544869402985075e-05, + "loss": 0.0001, + "step": 39032 + }, + { + "epoch": 36.41, + "learning_rate": 4.5448227611940305e-05, + "loss": 0.0033, + "step": 39036 + }, + { + "epoch": 36.42, + "learning_rate": 4.544776119402985e-05, + "loss": 0.0002, + "step": 39040 + }, + { + "epoch": 36.42, + "learning_rate": 4.54472947761194e-05, + "loss": 0.0001, + "step": 39044 + }, + { + "epoch": 36.43, + "learning_rate": 4.5446828358208956e-05, + "loss": 0.0, + "step": 39048 + }, + { + "epoch": 36.43, + "learning_rate": 4.544636194029851e-05, + "loss": 0.0002, + "step": 39052 + }, + { + "epoch": 36.43, + "learning_rate": 4.544589552238806e-05, + "loss": 0.0, + "step": 39056 + }, + { + "epoch": 36.44, + "learning_rate": 4.5445429104477614e-05, + "loss": 0.0002, + "step": 39060 + }, + { + "epoch": 36.44, + "learning_rate": 4.544496268656717e-05, + "loss": 0.0009, + "step": 39064 + }, + { + "epoch": 36.44, + "learning_rate": 4.544449626865672e-05, + "loss": 0.0001, + "step": 39068 + }, + { + "epoch": 36.45, + "learning_rate": 4.544402985074627e-05, + "loss": 0.0, + "step": 39072 + }, + { + "epoch": 36.45, + "learning_rate": 4.544356343283582e-05, + "loss": 0.0001, + "step": 39076 + }, + { + "epoch": 36.46, + "learning_rate": 4.5443097014925375e-05, + "loss": 0.0, + "step": 39080 + }, + { + "epoch": 36.46, + "learning_rate": 4.544263059701493e-05, + "loss": 0.0, + "step": 39084 + }, + { + "epoch": 36.46, + "learning_rate": 4.544216417910448e-05, + "loss": 0.0002, + "step": 39088 + }, + { + "epoch": 36.47, + "learning_rate": 4.544169776119403e-05, + "loss": 0.003, + "step": 39092 + }, + { + "epoch": 36.47, + "learning_rate": 4.544123134328359e-05, + "loss": 0.0001, + "step": 39096 + }, + { + "epoch": 36.47, + "learning_rate": 4.5440764925373136e-05, + "loss": 0.0002, + "step": 39100 + }, + { + "epoch": 36.48, + "learning_rate": 4.5440298507462684e-05, + "loss": 0.0001, + "step": 39104 + }, + { + "epoch": 36.48, + "learning_rate": 4.543983208955224e-05, + "loss": 0.0004, + "step": 39108 + }, + { + "epoch": 36.49, + "learning_rate": 4.5439365671641794e-05, + "loss": 0.0192, + "step": 39112 + }, + { + "epoch": 36.49, + "learning_rate": 4.543889925373134e-05, + "loss": 0.0006, + "step": 39116 + }, + { + "epoch": 36.49, + "learning_rate": 4.54384328358209e-05, + "loss": 0.0017, + "step": 39120 + }, + { + "epoch": 36.5, + "learning_rate": 4.543796641791045e-05, + "loss": 0.0049, + "step": 39124 + }, + { + "epoch": 36.5, + "learning_rate": 4.54375e-05, + "loss": 0.0001, + "step": 39128 + }, + { + "epoch": 36.5, + "learning_rate": 4.5437033582089555e-05, + "loss": 0.0003, + "step": 39132 + }, + { + "epoch": 36.51, + "learning_rate": 4.54365671641791e-05, + "loss": 0.0, + "step": 39136 + }, + { + "epoch": 36.51, + "learning_rate": 4.5436100746268665e-05, + "loss": 0.0001, + "step": 39140 + }, + { + "epoch": 36.51, + "learning_rate": 4.543563432835821e-05, + "loss": 0.0001, + "step": 39144 + }, + { + "epoch": 36.52, + "learning_rate": 4.543516791044776e-05, + "loss": 0.0001, + "step": 39148 + }, + { + "epoch": 36.52, + "learning_rate": 4.5434701492537316e-05, + "loss": 0.0001, + "step": 39152 + }, + { + "epoch": 36.53, + "learning_rate": 4.543423507462687e-05, + "loss": 0.0001, + "step": 39156 + }, + { + "epoch": 36.53, + "learning_rate": 4.543376865671642e-05, + "loss": 0.0001, + "step": 39160 + }, + { + "epoch": 36.53, + "learning_rate": 4.5433302238805974e-05, + "loss": 0.0, + "step": 39164 + }, + { + "epoch": 36.54, + "learning_rate": 4.543283582089552e-05, + "loss": 0.0002, + "step": 39168 + }, + { + "epoch": 36.54, + "learning_rate": 4.543236940298508e-05, + "loss": 0.0, + "step": 39172 + }, + { + "epoch": 36.54, + "learning_rate": 4.543190298507463e-05, + "loss": 0.0012, + "step": 39176 + }, + { + "epoch": 36.55, + "learning_rate": 4.543143656716418e-05, + "loss": 0.0001, + "step": 39180 + }, + { + "epoch": 36.55, + "learning_rate": 4.5430970149253735e-05, + "loss": 0.0009, + "step": 39184 + }, + { + "epoch": 36.56, + "learning_rate": 4.543050373134329e-05, + "loss": 0.0001, + "step": 39188 + }, + { + "epoch": 36.56, + "learning_rate": 4.543003731343284e-05, + "loss": 0.0006, + "step": 39192 + }, + { + "epoch": 36.56, + "learning_rate": 4.5429570895522386e-05, + "loss": 0.0001, + "step": 39196 + }, + { + "epoch": 36.57, + "learning_rate": 4.542910447761195e-05, + "loss": 0.0, + "step": 39200 + }, + { + "epoch": 36.57, + "learning_rate": 4.5428638059701496e-05, + "loss": 0.0002, + "step": 39204 + }, + { + "epoch": 36.57, + "learning_rate": 4.5428171641791044e-05, + "loss": 0.0, + "step": 39208 + }, + { + "epoch": 36.58, + "learning_rate": 4.54277052238806e-05, + "loss": 0.0001, + "step": 39212 + }, + { + "epoch": 36.58, + "learning_rate": 4.5427238805970154e-05, + "loss": 0.0, + "step": 39216 + }, + { + "epoch": 36.59, + "learning_rate": 4.54267723880597e-05, + "loss": 0.0001, + "step": 39220 + }, + { + "epoch": 36.59, + "learning_rate": 4.542630597014926e-05, + "loss": 0.0001, + "step": 39224 + }, + { + "epoch": 36.59, + "learning_rate": 4.5425839552238805e-05, + "loss": 0.0, + "step": 39228 + }, + { + "epoch": 36.6, + "learning_rate": 4.542537313432836e-05, + "loss": 0.0, + "step": 39232 + }, + { + "epoch": 36.6, + "learning_rate": 4.5424906716417915e-05, + "loss": 0.0001, + "step": 39236 + }, + { + "epoch": 36.6, + "learning_rate": 4.542444029850746e-05, + "loss": 0.0025, + "step": 39240 + }, + { + "epoch": 36.61, + "learning_rate": 4.542397388059702e-05, + "loss": 0.0007, + "step": 39244 + }, + { + "epoch": 36.61, + "learning_rate": 4.542350746268657e-05, + "loss": 0.0016, + "step": 39248 + }, + { + "epoch": 36.62, + "learning_rate": 4.542304104477612e-05, + "loss": 0.0003, + "step": 39252 + }, + { + "epoch": 36.62, + "learning_rate": 4.542257462686567e-05, + "loss": 0.0, + "step": 39256 + }, + { + "epoch": 36.62, + "learning_rate": 4.542210820895523e-05, + "loss": 0.0, + "step": 39260 + }, + { + "epoch": 36.63, + "learning_rate": 4.542164179104478e-05, + "loss": 0.0004, + "step": 39264 + }, + { + "epoch": 36.63, + "learning_rate": 4.542117537313433e-05, + "loss": 0.0013, + "step": 39268 + }, + { + "epoch": 36.63, + "learning_rate": 4.542070895522388e-05, + "loss": 0.0, + "step": 39272 + }, + { + "epoch": 36.64, + "learning_rate": 4.542024253731344e-05, + "loss": 0.0, + "step": 39276 + }, + { + "epoch": 36.64, + "learning_rate": 4.5419776119402985e-05, + "loss": 0.0001, + "step": 39280 + }, + { + "epoch": 36.65, + "learning_rate": 4.541930970149254e-05, + "loss": 0.0001, + "step": 39284 + }, + { + "epoch": 36.65, + "learning_rate": 4.541884328358209e-05, + "loss": 0.0, + "step": 39288 + }, + { + "epoch": 36.65, + "learning_rate": 4.541837686567164e-05, + "loss": 0.0, + "step": 39292 + }, + { + "epoch": 36.66, + "learning_rate": 4.54179104477612e-05, + "loss": 0.0, + "step": 39296 + }, + { + "epoch": 36.66, + "learning_rate": 4.5417444029850746e-05, + "loss": 0.0001, + "step": 39300 + }, + { + "epoch": 36.66, + "learning_rate": 4.54169776119403e-05, + "loss": 0.0014, + "step": 39304 + }, + { + "epoch": 36.67, + "learning_rate": 4.5416511194029856e-05, + "loss": 0.0, + "step": 39308 + }, + { + "epoch": 36.67, + "learning_rate": 4.5416044776119404e-05, + "loss": 0.0003, + "step": 39312 + }, + { + "epoch": 36.68, + "learning_rate": 4.541557835820896e-05, + "loss": 0.0, + "step": 39316 + }, + { + "epoch": 36.68, + "learning_rate": 4.5415111940298514e-05, + "loss": 0.0046, + "step": 39320 + }, + { + "epoch": 36.68, + "learning_rate": 4.541464552238806e-05, + "loss": 0.0013, + "step": 39324 + }, + { + "epoch": 36.69, + "learning_rate": 4.5414179104477617e-05, + "loss": 0.0006, + "step": 39328 + }, + { + "epoch": 36.69, + "learning_rate": 4.5413712686567165e-05, + "loss": 0.0003, + "step": 39332 + }, + { + "epoch": 36.69, + "learning_rate": 4.541324626865672e-05, + "loss": 0.0001, + "step": 39336 + }, + { + "epoch": 36.7, + "learning_rate": 4.5412779850746274e-05, + "loss": 0.0006, + "step": 39340 + }, + { + "epoch": 36.7, + "learning_rate": 4.541231343283582e-05, + "loss": 0.0, + "step": 39344 + }, + { + "epoch": 36.71, + "learning_rate": 4.541184701492537e-05, + "loss": 0.0007, + "step": 39348 + }, + { + "epoch": 36.71, + "learning_rate": 4.541138059701493e-05, + "loss": 0.0008, + "step": 39352 + }, + { + "epoch": 36.71, + "learning_rate": 4.541091417910448e-05, + "loss": 0.0, + "step": 39356 + }, + { + "epoch": 36.72, + "learning_rate": 4.541044776119403e-05, + "loss": 0.0, + "step": 39360 + }, + { + "epoch": 36.72, + "learning_rate": 4.5409981343283584e-05, + "loss": 0.0, + "step": 39364 + }, + { + "epoch": 36.72, + "learning_rate": 4.540951492537314e-05, + "loss": 0.0002, + "step": 39368 + }, + { + "epoch": 36.73, + "learning_rate": 4.540904850746269e-05, + "loss": 0.0005, + "step": 39372 + }, + { + "epoch": 36.73, + "learning_rate": 4.540858208955224e-05, + "loss": 0.0001, + "step": 39376 + }, + { + "epoch": 36.73, + "learning_rate": 4.5408115671641796e-05, + "loss": 0.0001, + "step": 39380 + }, + { + "epoch": 36.74, + "learning_rate": 4.5407649253731345e-05, + "loss": 0.0, + "step": 39384 + }, + { + "epoch": 36.74, + "learning_rate": 4.54071828358209e-05, + "loss": 0.0, + "step": 39388 + }, + { + "epoch": 36.75, + "learning_rate": 4.540671641791045e-05, + "loss": 0.0, + "step": 39392 + }, + { + "epoch": 36.75, + "learning_rate": 4.540625e-05, + "loss": 0.0005, + "step": 39396 + }, + { + "epoch": 36.75, + "learning_rate": 4.540578358208956e-05, + "loss": 0.0004, + "step": 39400 + }, + { + "epoch": 36.76, + "learning_rate": 4.5405317164179106e-05, + "loss": 0.0001, + "step": 39404 + }, + { + "epoch": 36.76, + "learning_rate": 4.5404850746268654e-05, + "loss": 0.0, + "step": 39408 + }, + { + "epoch": 36.76, + "learning_rate": 4.5404384328358215e-05, + "loss": 0.0, + "step": 39412 + }, + { + "epoch": 36.77, + "learning_rate": 4.5403917910447764e-05, + "loss": 0.0001, + "step": 39416 + }, + { + "epoch": 36.77, + "learning_rate": 4.540345149253731e-05, + "loss": 0.0, + "step": 39420 + }, + { + "epoch": 36.78, + "learning_rate": 4.5402985074626867e-05, + "loss": 0.0001, + "step": 39424 + }, + { + "epoch": 36.78, + "learning_rate": 4.540251865671642e-05, + "loss": 0.0, + "step": 39428 + }, + { + "epoch": 36.78, + "learning_rate": 4.540205223880597e-05, + "loss": 0.0009, + "step": 39432 + }, + { + "epoch": 36.79, + "learning_rate": 4.5401585820895525e-05, + "loss": 0.0, + "step": 39436 + }, + { + "epoch": 36.79, + "learning_rate": 4.540111940298508e-05, + "loss": 0.0, + "step": 39440 + }, + { + "epoch": 36.79, + "learning_rate": 4.540065298507463e-05, + "loss": 0.0, + "step": 39444 + }, + { + "epoch": 36.8, + "learning_rate": 4.540018656716418e-05, + "loss": 0.0001, + "step": 39448 + }, + { + "epoch": 36.8, + "learning_rate": 4.539972014925373e-05, + "loss": 0.0027, + "step": 39452 + }, + { + "epoch": 36.81, + "learning_rate": 4.5399253731343286e-05, + "loss": 0.0, + "step": 39456 + }, + { + "epoch": 36.81, + "learning_rate": 4.539878731343284e-05, + "loss": 0.0002, + "step": 39460 + }, + { + "epoch": 36.81, + "learning_rate": 4.539832089552239e-05, + "loss": 0.0001, + "step": 39464 + }, + { + "epoch": 36.82, + "learning_rate": 4.5397854477611943e-05, + "loss": 0.0, + "step": 39468 + }, + { + "epoch": 36.82, + "learning_rate": 4.53973880597015e-05, + "loss": 0.0009, + "step": 39472 + }, + { + "epoch": 36.82, + "learning_rate": 4.5396921641791046e-05, + "loss": 0.0038, + "step": 39476 + }, + { + "epoch": 36.83, + "learning_rate": 4.53964552238806e-05, + "loss": 0.0022, + "step": 39480 + }, + { + "epoch": 36.83, + "learning_rate": 4.539598880597015e-05, + "loss": 0.0005, + "step": 39484 + }, + { + "epoch": 36.84, + "learning_rate": 4.5395522388059704e-05, + "loss": 0.0, + "step": 39488 + }, + { + "epoch": 36.84, + "learning_rate": 4.539505597014926e-05, + "loss": 0.0001, + "step": 39492 + }, + { + "epoch": 36.84, + "learning_rate": 4.539458955223881e-05, + "loss": 0.0, + "step": 39496 + }, + { + "epoch": 36.85, + "learning_rate": 4.539412313432836e-05, + "loss": 0.0001, + "step": 39500 + }, + { + "epoch": 36.85, + "eval_exact_match": 0.7495164410058027, + "eval_exec": 0.7852998065764023, + "eval_loss": 0.4335377514362335, + "eval_runtime": 1568.2028, + "eval_samples_per_second": 0.659, + "step": 39500 + }, + { + "epoch": 36.85, + "learning_rate": 4.539365671641792e-05, + "loss": 0.0002, + "step": 39504 + }, + { + "epoch": 36.85, + "learning_rate": 4.5393190298507465e-05, + "loss": 0.0, + "step": 39508 + }, + { + "epoch": 36.86, + "learning_rate": 4.5392723880597014e-05, + "loss": 0.0001, + "step": 39512 + }, + { + "epoch": 36.86, + "learning_rate": 4.539225746268657e-05, + "loss": 0.0041, + "step": 39516 + }, + { + "epoch": 36.87, + "learning_rate": 4.539179104477612e-05, + "loss": 0.0001, + "step": 39520 + }, + { + "epoch": 36.87, + "learning_rate": 4.539132462686567e-05, + "loss": 0.0001, + "step": 39524 + }, + { + "epoch": 36.87, + "learning_rate": 4.5390858208955226e-05, + "loss": 0.0, + "step": 39528 + }, + { + "epoch": 36.88, + "learning_rate": 4.539039179104478e-05, + "loss": 0.0055, + "step": 39532 + }, + { + "epoch": 36.88, + "learning_rate": 4.538992537313433e-05, + "loss": 0.0002, + "step": 39536 + }, + { + "epoch": 36.88, + "learning_rate": 4.5389458955223884e-05, + "loss": 0.0001, + "step": 39540 + }, + { + "epoch": 36.89, + "learning_rate": 4.538899253731343e-05, + "loss": 0.0002, + "step": 39544 + }, + { + "epoch": 36.89, + "learning_rate": 4.538852611940299e-05, + "loss": 0.0, + "step": 39548 + }, + { + "epoch": 36.9, + "learning_rate": 4.538805970149254e-05, + "loss": 0.0, + "step": 39552 + }, + { + "epoch": 36.9, + "learning_rate": 4.538759328358209e-05, + "loss": 0.0, + "step": 39556 + }, + { + "epoch": 36.9, + "learning_rate": 4.5387126865671645e-05, + "loss": 0.0015, + "step": 39560 + }, + { + "epoch": 36.91, + "learning_rate": 4.53866604477612e-05, + "loss": 0.0001, + "step": 39564 + }, + { + "epoch": 36.91, + "learning_rate": 4.538619402985075e-05, + "loss": 0.0001, + "step": 39568 + }, + { + "epoch": 36.91, + "learning_rate": 4.5385727611940297e-05, + "loss": 0.0041, + "step": 39572 + }, + { + "epoch": 36.92, + "learning_rate": 4.538526119402985e-05, + "loss": 0.0001, + "step": 39576 + }, + { + "epoch": 36.92, + "learning_rate": 4.5384794776119406e-05, + "loss": 0.0001, + "step": 39580 + }, + { + "epoch": 36.93, + "learning_rate": 4.5384328358208954e-05, + "loss": 0.0004, + "step": 39584 + }, + { + "epoch": 36.93, + "learning_rate": 4.538386194029851e-05, + "loss": 0.0002, + "step": 39588 + }, + { + "epoch": 36.93, + "learning_rate": 4.5383395522388064e-05, + "loss": 0.0001, + "step": 39592 + }, + { + "epoch": 36.94, + "learning_rate": 4.538292910447761e-05, + "loss": 0.0, + "step": 39596 + }, + { + "epoch": 36.94, + "learning_rate": 4.538246268656717e-05, + "loss": 0.0011, + "step": 39600 + }, + { + "epoch": 36.94, + "learning_rate": 4.5381996268656715e-05, + "loss": 0.0004, + "step": 39604 + }, + { + "epoch": 36.95, + "learning_rate": 4.538152985074627e-05, + "loss": 0.0002, + "step": 39608 + }, + { + "epoch": 36.95, + "learning_rate": 4.5381063432835825e-05, + "loss": 0.0101, + "step": 39612 + }, + { + "epoch": 36.96, + "learning_rate": 4.538059701492537e-05, + "loss": 0.0006, + "step": 39616 + }, + { + "epoch": 36.96, + "learning_rate": 4.538013059701492e-05, + "loss": 0.0001, + "step": 39620 + }, + { + "epoch": 36.96, + "learning_rate": 4.537966417910448e-05, + "loss": 0.0042, + "step": 39624 + }, + { + "epoch": 36.97, + "learning_rate": 4.537919776119403e-05, + "loss": 0.0004, + "step": 39628 + }, + { + "epoch": 36.97, + "learning_rate": 4.5378731343283586e-05, + "loss": 0.0002, + "step": 39632 + }, + { + "epoch": 36.97, + "learning_rate": 4.5378264925373134e-05, + "loss": 0.001, + "step": 39636 + }, + { + "epoch": 36.98, + "learning_rate": 4.537779850746269e-05, + "loss": 0.0011, + "step": 39640 + }, + { + "epoch": 36.98, + "learning_rate": 4.5377332089552244e-05, + "loss": 0.0018, + "step": 39644 + }, + { + "epoch": 36.98, + "learning_rate": 4.537686567164179e-05, + "loss": 0.0001, + "step": 39648 + }, + { + "epoch": 36.99, + "learning_rate": 4.537639925373135e-05, + "loss": 0.0, + "step": 39652 + }, + { + "epoch": 36.99, + "learning_rate": 4.53759328358209e-05, + "loss": 0.0001, + "step": 39656 + }, + { + "epoch": 37.0, + "learning_rate": 4.537546641791045e-05, + "loss": 0.0, + "step": 39660 + }, + { + "epoch": 37.0, + "learning_rate": 4.5375e-05, + "loss": 0.0, + "step": 39664 + }, + { + "epoch": 37.0, + "learning_rate": 4.537453358208956e-05, + "loss": 0.0002, + "step": 39668 + }, + { + "epoch": 37.01, + "learning_rate": 4.537406716417911e-05, + "loss": 0.0, + "step": 39672 + }, + { + "epoch": 37.01, + "learning_rate": 4.5373600746268656e-05, + "loss": 0.0, + "step": 39676 + }, + { + "epoch": 37.01, + "learning_rate": 4.537313432835821e-05, + "loss": 0.0, + "step": 39680 + }, + { + "epoch": 37.02, + "learning_rate": 4.5372667910447766e-05, + "loss": 0.0, + "step": 39684 + }, + { + "epoch": 37.02, + "learning_rate": 4.5372201492537314e-05, + "loss": 0.0001, + "step": 39688 + }, + { + "epoch": 37.03, + "learning_rate": 4.537173507462687e-05, + "loss": 0.0005, + "step": 39692 + }, + { + "epoch": 37.03, + "learning_rate": 4.537126865671642e-05, + "loss": 0.0, + "step": 39696 + }, + { + "epoch": 37.03, + "learning_rate": 4.537080223880597e-05, + "loss": 0.0, + "step": 39700 + }, + { + "epoch": 37.04, + "learning_rate": 4.537033582089553e-05, + "loss": 0.0011, + "step": 39704 + }, + { + "epoch": 37.04, + "learning_rate": 4.5369869402985075e-05, + "loss": 0.0004, + "step": 39708 + }, + { + "epoch": 37.04, + "learning_rate": 4.536940298507463e-05, + "loss": 0.0002, + "step": 39712 + }, + { + "epoch": 37.05, + "learning_rate": 4.5368936567164185e-05, + "loss": 0.0001, + "step": 39716 + }, + { + "epoch": 37.05, + "learning_rate": 4.536847014925373e-05, + "loss": 0.0, + "step": 39720 + }, + { + "epoch": 37.06, + "learning_rate": 4.536800373134328e-05, + "loss": 0.0, + "step": 39724 + }, + { + "epoch": 37.06, + "learning_rate": 4.5367537313432836e-05, + "loss": 0.0006, + "step": 39728 + }, + { + "epoch": 37.06, + "learning_rate": 4.536707089552239e-05, + "loss": 0.0001, + "step": 39732 + }, + { + "epoch": 37.07, + "learning_rate": 4.536660447761194e-05, + "loss": 0.0005, + "step": 39736 + }, + { + "epoch": 37.07, + "learning_rate": 4.5366138059701494e-05, + "loss": 0.0002, + "step": 39740 + }, + { + "epoch": 37.07, + "learning_rate": 4.536567164179105e-05, + "loss": 0.0, + "step": 39744 + }, + { + "epoch": 37.08, + "learning_rate": 4.53652052238806e-05, + "loss": 0.0088, + "step": 39748 + }, + { + "epoch": 37.08, + "learning_rate": 4.536473880597015e-05, + "loss": 0.0005, + "step": 39752 + }, + { + "epoch": 37.09, + "learning_rate": 4.53642723880597e-05, + "loss": 0.0001, + "step": 39756 + }, + { + "epoch": 37.09, + "learning_rate": 4.5363805970149255e-05, + "loss": 0.0, + "step": 39760 + }, + { + "epoch": 37.09, + "learning_rate": 4.536333955223881e-05, + "loss": 0.0002, + "step": 39764 + }, + { + "epoch": 37.1, + "learning_rate": 4.536287313432836e-05, + "loss": 0.0011, + "step": 39768 + }, + { + "epoch": 37.1, + "learning_rate": 4.536240671641791e-05, + "loss": 0.0, + "step": 39772 + }, + { + "epoch": 37.1, + "learning_rate": 4.536194029850747e-05, + "loss": 0.0, + "step": 39776 + }, + { + "epoch": 37.11, + "learning_rate": 4.5361473880597016e-05, + "loss": 0.0001, + "step": 39780 + }, + { + "epoch": 37.11, + "learning_rate": 4.5361007462686564e-05, + "loss": 0.0004, + "step": 39784 + }, + { + "epoch": 37.12, + "learning_rate": 4.536054104477612e-05, + "loss": 0.0015, + "step": 39788 + }, + { + "epoch": 37.12, + "learning_rate": 4.5360074626865674e-05, + "loss": 0.0009, + "step": 39792 + }, + { + "epoch": 37.12, + "learning_rate": 4.535960820895523e-05, + "loss": 0.0, + "step": 39796 + }, + { + "epoch": 37.13, + "learning_rate": 4.535914179104478e-05, + "loss": 0.0, + "step": 39800 + }, + { + "epoch": 37.13, + "learning_rate": 4.535867537313433e-05, + "loss": 0.0018, + "step": 39804 + }, + { + "epoch": 37.13, + "learning_rate": 4.535820895522389e-05, + "loss": 0.0001, + "step": 39808 + }, + { + "epoch": 37.14, + "learning_rate": 4.5357742537313435e-05, + "loss": 0.0003, + "step": 39812 + }, + { + "epoch": 37.14, + "learning_rate": 4.535727611940298e-05, + "loss": 0.002, + "step": 39816 + }, + { + "epoch": 37.15, + "learning_rate": 4.5356809701492545e-05, + "loss": 0.0006, + "step": 39820 + }, + { + "epoch": 37.15, + "learning_rate": 4.535634328358209e-05, + "loss": 0.001, + "step": 39824 + }, + { + "epoch": 37.15, + "learning_rate": 4.535587686567164e-05, + "loss": 0.0003, + "step": 39828 + }, + { + "epoch": 37.16, + "learning_rate": 4.5355410447761196e-05, + "loss": 0.0011, + "step": 39832 + }, + { + "epoch": 37.16, + "learning_rate": 4.535494402985075e-05, + "loss": 0.0001, + "step": 39836 + }, + { + "epoch": 37.16, + "learning_rate": 4.53544776119403e-05, + "loss": 0.0002, + "step": 39840 + }, + { + "epoch": 37.17, + "learning_rate": 4.5354011194029854e-05, + "loss": 0.0001, + "step": 39844 + }, + { + "epoch": 37.17, + "learning_rate": 4.53535447761194e-05, + "loss": 0.0, + "step": 39848 + }, + { + "epoch": 37.18, + "learning_rate": 4.535307835820896e-05, + "loss": 0.0, + "step": 39852 + }, + { + "epoch": 37.18, + "learning_rate": 4.535261194029851e-05, + "loss": 0.0001, + "step": 39856 + }, + { + "epoch": 37.18, + "learning_rate": 4.535214552238806e-05, + "loss": 0.0, + "step": 39860 + }, + { + "epoch": 37.19, + "learning_rate": 4.5351679104477615e-05, + "loss": 0.0002, + "step": 39864 + }, + { + "epoch": 37.19, + "learning_rate": 4.535121268656717e-05, + "loss": 0.001, + "step": 39868 + }, + { + "epoch": 37.19, + "learning_rate": 4.535074626865672e-05, + "loss": 0.0001, + "step": 39872 + }, + { + "epoch": 37.2, + "learning_rate": 4.5350279850746266e-05, + "loss": 0.0003, + "step": 39876 + }, + { + "epoch": 37.2, + "learning_rate": 4.534981343283583e-05, + "loss": 0.0, + "step": 39880 + }, + { + "epoch": 37.21, + "learning_rate": 4.5349347014925376e-05, + "loss": 0.0006, + "step": 39884 + }, + { + "epoch": 37.21, + "learning_rate": 4.5348880597014924e-05, + "loss": 0.0006, + "step": 39888 + }, + { + "epoch": 37.21, + "learning_rate": 4.534841417910448e-05, + "loss": 0.0009, + "step": 39892 + }, + { + "epoch": 37.22, + "learning_rate": 4.5347947761194034e-05, + "loss": 0.0, + "step": 39896 + }, + { + "epoch": 37.22, + "learning_rate": 4.534748134328358e-05, + "loss": 0.0002, + "step": 39900 + }, + { + "epoch": 37.22, + "learning_rate": 4.534701492537314e-05, + "loss": 0.0, + "step": 39904 + }, + { + "epoch": 37.23, + "learning_rate": 4.5346548507462685e-05, + "loss": 0.0004, + "step": 39908 + }, + { + "epoch": 37.23, + "learning_rate": 4.534608208955224e-05, + "loss": 0.0, + "step": 39912 + }, + { + "epoch": 37.24, + "learning_rate": 4.5345615671641795e-05, + "loss": 0.0, + "step": 39916 + }, + { + "epoch": 37.24, + "learning_rate": 4.534514925373134e-05, + "loss": 0.0002, + "step": 39920 + }, + { + "epoch": 37.24, + "learning_rate": 4.53446828358209e-05, + "loss": 0.0001, + "step": 39924 + }, + { + "epoch": 37.25, + "learning_rate": 4.534421641791045e-05, + "loss": 0.0015, + "step": 39928 + }, + { + "epoch": 37.25, + "learning_rate": 4.534375e-05, + "loss": 0.0, + "step": 39932 + }, + { + "epoch": 37.25, + "learning_rate": 4.534328358208955e-05, + "loss": 0.0, + "step": 39936 + }, + { + "epoch": 37.26, + "learning_rate": 4.534281716417911e-05, + "loss": 0.0, + "step": 39940 + }, + { + "epoch": 37.26, + "learning_rate": 4.534235074626866e-05, + "loss": 0.0, + "step": 39944 + }, + { + "epoch": 37.26, + "learning_rate": 4.534188432835821e-05, + "loss": 0.0003, + "step": 39948 + }, + { + "epoch": 37.27, + "learning_rate": 4.534141791044776e-05, + "loss": 0.0, + "step": 39952 + }, + { + "epoch": 37.27, + "learning_rate": 4.534095149253732e-05, + "loss": 0.0006, + "step": 39956 + }, + { + "epoch": 37.28, + "learning_rate": 4.534048507462687e-05, + "loss": 0.0017, + "step": 39960 + }, + { + "epoch": 37.28, + "learning_rate": 4.534001865671642e-05, + "loss": 0.0028, + "step": 39964 + }, + { + "epoch": 37.28, + "learning_rate": 4.533955223880597e-05, + "loss": 0.0019, + "step": 39968 + }, + { + "epoch": 37.29, + "learning_rate": 4.533908582089553e-05, + "loss": 0.0001, + "step": 39972 + }, + { + "epoch": 37.29, + "learning_rate": 4.533861940298508e-05, + "loss": 0.0009, + "step": 39976 + }, + { + "epoch": 37.29, + "learning_rate": 4.5338152985074626e-05, + "loss": 0.0059, + "step": 39980 + }, + { + "epoch": 37.3, + "learning_rate": 4.533768656716418e-05, + "loss": 0.0, + "step": 39984 + }, + { + "epoch": 37.3, + "learning_rate": 4.5337220149253736e-05, + "loss": 0.0034, + "step": 39988 + }, + { + "epoch": 37.31, + "learning_rate": 4.5336753731343284e-05, + "loss": 0.0, + "step": 39992 + }, + { + "epoch": 37.31, + "learning_rate": 4.533628731343284e-05, + "loss": 0.0012, + "step": 39996 + }, + { + "epoch": 37.31, + "learning_rate": 4.5335820895522394e-05, + "loss": 0.0001, + "step": 40000 + }, + { + "epoch": 37.31, + "eval_exact_match": 0.7282398452611218, + "eval_exec": 0.7514506769825918, + "eval_loss": 0.42848145961761475, + "eval_runtime": 1107.5429, + "eval_samples_per_second": 0.934, + "step": 40000 + }, + { + "epoch": 37.32, + "learning_rate": 4.533535447761194e-05, + "loss": 0.0, + "step": 40004 + }, + { + "epoch": 37.32, + "learning_rate": 4.53348880597015e-05, + "loss": 0.0039, + "step": 40008 + }, + { + "epoch": 37.32, + "learning_rate": 4.5334421641791045e-05, + "loss": 0.0006, + "step": 40012 + }, + { + "epoch": 37.33, + "learning_rate": 4.53339552238806e-05, + "loss": 0.0014, + "step": 40016 + }, + { + "epoch": 37.33, + "learning_rate": 4.5333488805970155e-05, + "loss": 0.0025, + "step": 40020 + }, + { + "epoch": 37.34, + "learning_rate": 4.53330223880597e-05, + "loss": 0.0002, + "step": 40024 + }, + { + "epoch": 37.34, + "learning_rate": 4.533255597014925e-05, + "loss": 0.0001, + "step": 40028 + }, + { + "epoch": 37.34, + "learning_rate": 4.533208955223881e-05, + "loss": 0.0017, + "step": 40032 + }, + { + "epoch": 37.35, + "learning_rate": 4.533162313432836e-05, + "loss": 0.0, + "step": 40036 + }, + { + "epoch": 37.35, + "learning_rate": 4.533115671641791e-05, + "loss": 0.0001, + "step": 40040 + }, + { + "epoch": 37.35, + "learning_rate": 4.5330690298507464e-05, + "loss": 0.0001, + "step": 40044 + }, + { + "epoch": 37.36, + "learning_rate": 4.533022388059702e-05, + "loss": 0.0, + "step": 40048 + }, + { + "epoch": 37.36, + "learning_rate": 4.532975746268657e-05, + "loss": 0.0002, + "step": 40052 + }, + { + "epoch": 37.37, + "learning_rate": 4.532929104477612e-05, + "loss": 0.0016, + "step": 40056 + }, + { + "epoch": 37.37, + "learning_rate": 4.532882462686568e-05, + "loss": 0.0, + "step": 40060 + }, + { + "epoch": 37.37, + "learning_rate": 4.5328358208955225e-05, + "loss": 0.0003, + "step": 40064 + }, + { + "epoch": 37.38, + "learning_rate": 4.532789179104478e-05, + "loss": 0.0003, + "step": 40068 + }, + { + "epoch": 37.38, + "learning_rate": 4.532742537313433e-05, + "loss": 0.0012, + "step": 40072 + }, + { + "epoch": 37.38, + "learning_rate": 4.532695895522388e-05, + "loss": 0.0, + "step": 40076 + }, + { + "epoch": 37.39, + "learning_rate": 4.532649253731344e-05, + "loss": 0.0, + "step": 40080 + }, + { + "epoch": 37.39, + "learning_rate": 4.5326026119402986e-05, + "loss": 0.0002, + "step": 40084 + }, + { + "epoch": 37.4, + "learning_rate": 4.5325559701492534e-05, + "loss": 0.0001, + "step": 40088 + }, + { + "epoch": 37.4, + "learning_rate": 4.5325093283582096e-05, + "loss": 0.0004, + "step": 40092 + }, + { + "epoch": 37.4, + "learning_rate": 4.5324626865671644e-05, + "loss": 0.0038, + "step": 40096 + }, + { + "epoch": 37.41, + "learning_rate": 4.532416044776119e-05, + "loss": 0.0, + "step": 40100 + }, + { + "epoch": 37.41, + "learning_rate": 4.532369402985075e-05, + "loss": 0.001, + "step": 40104 + }, + { + "epoch": 37.41, + "learning_rate": 4.53232276119403e-05, + "loss": 0.0088, + "step": 40108 + }, + { + "epoch": 37.42, + "learning_rate": 4.532276119402985e-05, + "loss": 0.0, + "step": 40112 + }, + { + "epoch": 37.42, + "learning_rate": 4.5322294776119405e-05, + "loss": 0.001, + "step": 40116 + }, + { + "epoch": 37.43, + "learning_rate": 4.532182835820896e-05, + "loss": 0.0009, + "step": 40120 + }, + { + "epoch": 37.43, + "learning_rate": 4.5321361940298515e-05, + "loss": 0.0002, + "step": 40124 + }, + { + "epoch": 37.43, + "learning_rate": 4.532089552238806e-05, + "loss": 0.0002, + "step": 40128 + }, + { + "epoch": 37.44, + "learning_rate": 4.532042910447761e-05, + "loss": 0.0, + "step": 40132 + }, + { + "epoch": 37.44, + "learning_rate": 4.5319962686567166e-05, + "loss": 0.0004, + "step": 40136 + }, + { + "epoch": 37.44, + "learning_rate": 4.531949626865672e-05, + "loss": 0.0, + "step": 40140 + }, + { + "epoch": 37.45, + "learning_rate": 4.531902985074627e-05, + "loss": 0.0, + "step": 40144 + }, + { + "epoch": 37.45, + "learning_rate": 4.5318563432835824e-05, + "loss": 0.0, + "step": 40148 + }, + { + "epoch": 37.46, + "learning_rate": 4.531809701492538e-05, + "loss": 0.0001, + "step": 40152 + }, + { + "epoch": 37.46, + "learning_rate": 4.531763059701493e-05, + "loss": 0.0, + "step": 40156 + }, + { + "epoch": 37.46, + "learning_rate": 4.531716417910448e-05, + "loss": 0.0, + "step": 40160 + }, + { + "epoch": 37.47, + "learning_rate": 4.531669776119403e-05, + "loss": 0.0004, + "step": 40164 + }, + { + "epoch": 37.47, + "learning_rate": 4.5316231343283585e-05, + "loss": 0.0003, + "step": 40168 + }, + { + "epoch": 37.47, + "learning_rate": 4.531576492537314e-05, + "loss": 0.002, + "step": 40172 + }, + { + "epoch": 37.48, + "learning_rate": 4.531529850746269e-05, + "loss": 0.0, + "step": 40176 + }, + { + "epoch": 37.48, + "learning_rate": 4.531483208955224e-05, + "loss": 0.0005, + "step": 40180 + }, + { + "epoch": 37.49, + "learning_rate": 4.53143656716418e-05, + "loss": 0.0011, + "step": 40184 + }, + { + "epoch": 37.49, + "learning_rate": 4.5313899253731346e-05, + "loss": 0.0002, + "step": 40188 + }, + { + "epoch": 37.49, + "learning_rate": 4.5313432835820894e-05, + "loss": 0.0003, + "step": 40192 + }, + { + "epoch": 37.5, + "learning_rate": 4.531296641791045e-05, + "loss": 0.0002, + "step": 40196 + }, + { + "epoch": 37.5, + "learning_rate": 4.5312500000000004e-05, + "loss": 0.0, + "step": 40200 + }, + { + "epoch": 37.5, + "learning_rate": 4.531203358208955e-05, + "loss": 0.0, + "step": 40204 + }, + { + "epoch": 37.51, + "learning_rate": 4.531156716417911e-05, + "loss": 0.0011, + "step": 40208 + }, + { + "epoch": 37.51, + "learning_rate": 4.531110074626866e-05, + "loss": 0.0003, + "step": 40212 + }, + { + "epoch": 37.51, + "learning_rate": 4.531063432835821e-05, + "loss": 0.0001, + "step": 40216 + }, + { + "epoch": 37.52, + "learning_rate": 4.5310167910447765e-05, + "loss": 0.0002, + "step": 40220 + }, + { + "epoch": 37.52, + "learning_rate": 4.530970149253731e-05, + "loss": 0.0004, + "step": 40224 + }, + { + "epoch": 37.53, + "learning_rate": 4.530923507462687e-05, + "loss": 0.0001, + "step": 40228 + }, + { + "epoch": 37.53, + "learning_rate": 4.530876865671642e-05, + "loss": 0.0, + "step": 40232 + }, + { + "epoch": 37.53, + "learning_rate": 4.530830223880597e-05, + "loss": 0.0, + "step": 40236 + }, + { + "epoch": 37.54, + "learning_rate": 4.5307835820895526e-05, + "loss": 0.0, + "step": 40240 + }, + { + "epoch": 37.54, + "learning_rate": 4.530736940298508e-05, + "loss": 0.0001, + "step": 40244 + }, + { + "epoch": 37.54, + "learning_rate": 4.530690298507463e-05, + "loss": 0.0001, + "step": 40248 + }, + { + "epoch": 37.55, + "learning_rate": 4.530643656716418e-05, + "loss": 0.0005, + "step": 40252 + }, + { + "epoch": 37.55, + "learning_rate": 4.530597014925373e-05, + "loss": 0.0003, + "step": 40256 + }, + { + "epoch": 37.56, + "learning_rate": 4.5305503731343287e-05, + "loss": 0.0009, + "step": 40260 + }, + { + "epoch": 37.56, + "learning_rate": 4.5305037313432835e-05, + "loss": 0.0, + "step": 40264 + }, + { + "epoch": 37.56, + "learning_rate": 4.530457089552239e-05, + "loss": 0.0017, + "step": 40268 + }, + { + "epoch": 37.57, + "learning_rate": 4.5304104477611945e-05, + "loss": 0.0001, + "step": 40272 + }, + { + "epoch": 37.57, + "learning_rate": 4.530363805970149e-05, + "loss": 0.0, + "step": 40276 + }, + { + "epoch": 37.57, + "learning_rate": 4.530317164179105e-05, + "loss": 0.0001, + "step": 40280 + }, + { + "epoch": 37.58, + "learning_rate": 4.5302705223880596e-05, + "loss": 0.0002, + "step": 40284 + }, + { + "epoch": 37.58, + "learning_rate": 4.530223880597016e-05, + "loss": 0.0019, + "step": 40288 + }, + { + "epoch": 37.59, + "learning_rate": 4.5301772388059706e-05, + "loss": 0.0, + "step": 40292 + }, + { + "epoch": 37.59, + "learning_rate": 4.5301305970149254e-05, + "loss": 0.0, + "step": 40296 + }, + { + "epoch": 37.59, + "learning_rate": 4.530083955223881e-05, + "loss": 0.0057, + "step": 40300 + }, + { + "epoch": 37.6, + "learning_rate": 4.5300373134328363e-05, + "loss": 0.0, + "step": 40304 + }, + { + "epoch": 37.6, + "learning_rate": 4.529990671641791e-05, + "loss": 0.0, + "step": 40308 + }, + { + "epoch": 37.6, + "learning_rate": 4.5299440298507467e-05, + "loss": 0.0036, + "step": 40312 + }, + { + "epoch": 37.61, + "learning_rate": 4.5298973880597015e-05, + "loss": 0.0002, + "step": 40316 + }, + { + "epoch": 37.61, + "learning_rate": 4.529850746268657e-05, + "loss": 0.0001, + "step": 40320 + }, + { + "epoch": 37.62, + "learning_rate": 4.5298041044776124e-05, + "loss": 0.0006, + "step": 40324 + }, + { + "epoch": 37.62, + "learning_rate": 4.529757462686567e-05, + "loss": 0.0001, + "step": 40328 + }, + { + "epoch": 37.62, + "learning_rate": 4.529710820895523e-05, + "loss": 0.0, + "step": 40332 + }, + { + "epoch": 37.63, + "learning_rate": 4.529664179104478e-05, + "loss": 0.0013, + "step": 40336 + }, + { + "epoch": 37.63, + "learning_rate": 4.529617537313433e-05, + "loss": 0.0, + "step": 40340 + }, + { + "epoch": 37.63, + "learning_rate": 4.529570895522388e-05, + "loss": 0.0001, + "step": 40344 + }, + { + "epoch": 37.64, + "learning_rate": 4.529524253731344e-05, + "loss": 0.0, + "step": 40348 + }, + { + "epoch": 37.64, + "learning_rate": 4.529477611940299e-05, + "loss": 0.0002, + "step": 40352 + }, + { + "epoch": 37.65, + "learning_rate": 4.529430970149254e-05, + "loss": 0.0017, + "step": 40356 + }, + { + "epoch": 37.65, + "learning_rate": 4.529384328358209e-05, + "loss": 0.0001, + "step": 40360 + }, + { + "epoch": 37.65, + "learning_rate": 4.5293376865671646e-05, + "loss": 0.0, + "step": 40364 + }, + { + "epoch": 37.66, + "learning_rate": 4.5292910447761195e-05, + "loss": 0.0007, + "step": 40368 + }, + { + "epoch": 37.66, + "learning_rate": 4.529244402985075e-05, + "loss": 0.0, + "step": 40372 + }, + { + "epoch": 37.66, + "learning_rate": 4.52919776119403e-05, + "loss": 0.0, + "step": 40376 + }, + { + "epoch": 37.67, + "learning_rate": 4.529151119402985e-05, + "loss": 0.0, + "step": 40380 + }, + { + "epoch": 37.67, + "learning_rate": 4.529104477611941e-05, + "loss": 0.0001, + "step": 40384 + }, + { + "epoch": 37.68, + "learning_rate": 4.5290578358208956e-05, + "loss": 0.0013, + "step": 40388 + }, + { + "epoch": 37.68, + "learning_rate": 4.529011194029851e-05, + "loss": 0.02, + "step": 40392 + }, + { + "epoch": 37.68, + "learning_rate": 4.5289645522388065e-05, + "loss": 0.0, + "step": 40396 + }, + { + "epoch": 37.69, + "learning_rate": 4.5289179104477614e-05, + "loss": 0.0, + "step": 40400 + }, + { + "epoch": 37.69, + "learning_rate": 4.528871268656716e-05, + "loss": 0.0001, + "step": 40404 + }, + { + "epoch": 37.69, + "learning_rate": 4.5288246268656717e-05, + "loss": 0.0001, + "step": 40408 + }, + { + "epoch": 37.7, + "learning_rate": 4.528777985074627e-05, + "loss": 0.0001, + "step": 40412 + }, + { + "epoch": 37.7, + "learning_rate": 4.528731343283582e-05, + "loss": 0.0001, + "step": 40416 + }, + { + "epoch": 37.71, + "learning_rate": 4.5286847014925374e-05, + "loss": 0.0, + "step": 40420 + }, + { + "epoch": 37.71, + "learning_rate": 4.528638059701493e-05, + "loss": 0.0002, + "step": 40424 + }, + { + "epoch": 37.71, + "learning_rate": 4.528591417910448e-05, + "loss": 0.0071, + "step": 40428 + }, + { + "epoch": 37.72, + "learning_rate": 4.528544776119403e-05, + "loss": 0.0001, + "step": 40432 + }, + { + "epoch": 37.72, + "learning_rate": 4.528498134328358e-05, + "loss": 0.0001, + "step": 40436 + }, + { + "epoch": 37.72, + "learning_rate": 4.5284514925373135e-05, + "loss": 0.0004, + "step": 40440 + }, + { + "epoch": 37.73, + "learning_rate": 4.528404850746269e-05, + "loss": 0.0006, + "step": 40444 + }, + { + "epoch": 37.73, + "learning_rate": 4.528358208955224e-05, + "loss": 0.0, + "step": 40448 + }, + { + "epoch": 37.73, + "learning_rate": 4.5283115671641793e-05, + "loss": 0.0, + "step": 40452 + }, + { + "epoch": 37.74, + "learning_rate": 4.528264925373135e-05, + "loss": 0.0001, + "step": 40456 + }, + { + "epoch": 37.74, + "learning_rate": 4.5282182835820896e-05, + "loss": 0.0001, + "step": 40460 + }, + { + "epoch": 37.75, + "learning_rate": 4.528171641791045e-05, + "loss": 0.0003, + "step": 40464 + }, + { + "epoch": 37.75, + "learning_rate": 4.528125e-05, + "loss": 0.0015, + "step": 40468 + }, + { + "epoch": 37.75, + "learning_rate": 4.5280783582089554e-05, + "loss": 0.0029, + "step": 40472 + }, + { + "epoch": 37.76, + "learning_rate": 4.528031716417911e-05, + "loss": 0.0001, + "step": 40476 + }, + { + "epoch": 37.76, + "learning_rate": 4.527985074626866e-05, + "loss": 0.0, + "step": 40480 + }, + { + "epoch": 37.76, + "learning_rate": 4.527938432835821e-05, + "loss": 0.0002, + "step": 40484 + }, + { + "epoch": 37.77, + "learning_rate": 4.527891791044777e-05, + "loss": 0.0014, + "step": 40488 + }, + { + "epoch": 37.77, + "learning_rate": 4.5278451492537315e-05, + "loss": 0.0001, + "step": 40492 + }, + { + "epoch": 37.78, + "learning_rate": 4.5277985074626864e-05, + "loss": 0.0, + "step": 40496 + }, + { + "epoch": 37.78, + "learning_rate": 4.5277518656716425e-05, + "loss": 0.012, + "step": 40500 + }, + { + "epoch": 37.78, + "eval_exact_match": 0.7379110251450677, + "eval_exec": 0.769825918762089, + "eval_loss": 0.41693997383117676, + "eval_runtime": 1159.3081, + "eval_samples_per_second": 0.892, + "step": 40500 + }, + { + "epoch": 37.78, + "learning_rate": 4.527705223880597e-05, + "loss": 0.0001, + "step": 40504 + }, + { + "epoch": 37.79, + "learning_rate": 4.527658582089552e-05, + "loss": 0.0, + "step": 40508 + }, + { + "epoch": 37.79, + "learning_rate": 4.5276119402985076e-05, + "loss": 0.0003, + "step": 40512 + }, + { + "epoch": 37.79, + "learning_rate": 4.527565298507463e-05, + "loss": 0.0001, + "step": 40516 + }, + { + "epoch": 37.8, + "learning_rate": 4.527518656716418e-05, + "loss": 0.0007, + "step": 40520 + }, + { + "epoch": 37.8, + "learning_rate": 4.5274720149253734e-05, + "loss": 0.0003, + "step": 40524 + }, + { + "epoch": 37.81, + "learning_rate": 4.527425373134328e-05, + "loss": 0.0, + "step": 40528 + }, + { + "epoch": 37.81, + "learning_rate": 4.527378731343284e-05, + "loss": 0.0003, + "step": 40532 + }, + { + "epoch": 37.81, + "learning_rate": 4.527332089552239e-05, + "loss": 0.0007, + "step": 40536 + }, + { + "epoch": 37.82, + "learning_rate": 4.527285447761194e-05, + "loss": 0.0001, + "step": 40540 + }, + { + "epoch": 37.82, + "learning_rate": 4.5272388059701495e-05, + "loss": 0.0029, + "step": 40544 + }, + { + "epoch": 37.82, + "learning_rate": 4.527192164179105e-05, + "loss": 0.0, + "step": 40548 + }, + { + "epoch": 37.83, + "learning_rate": 4.52714552238806e-05, + "loss": 0.0005, + "step": 40552 + }, + { + "epoch": 37.83, + "learning_rate": 4.5270988805970146e-05, + "loss": 0.0001, + "step": 40556 + }, + { + "epoch": 37.84, + "learning_rate": 4.527052238805971e-05, + "loss": 0.0002, + "step": 40560 + }, + { + "epoch": 37.84, + "learning_rate": 4.5270055970149256e-05, + "loss": 0.0, + "step": 40564 + }, + { + "epoch": 37.84, + "learning_rate": 4.5269589552238804e-05, + "loss": 0.0, + "step": 40568 + }, + { + "epoch": 37.85, + "learning_rate": 4.526912313432836e-05, + "loss": 0.0019, + "step": 40572 + }, + { + "epoch": 37.85, + "learning_rate": 4.5268656716417914e-05, + "loss": 0.0003, + "step": 40576 + }, + { + "epoch": 37.85, + "learning_rate": 4.526819029850746e-05, + "loss": 0.0002, + "step": 40580 + }, + { + "epoch": 37.86, + "learning_rate": 4.526772388059702e-05, + "loss": 0.0005, + "step": 40584 + }, + { + "epoch": 37.86, + "learning_rate": 4.5267257462686565e-05, + "loss": 0.0, + "step": 40588 + }, + { + "epoch": 37.87, + "learning_rate": 4.526679104477612e-05, + "loss": 0.0014, + "step": 40592 + }, + { + "epoch": 37.87, + "learning_rate": 4.5266324626865675e-05, + "loss": 0.0, + "step": 40596 + }, + { + "epoch": 37.87, + "learning_rate": 4.526585820895522e-05, + "loss": 0.0076, + "step": 40600 + }, + { + "epoch": 37.88, + "learning_rate": 4.526539179104478e-05, + "loss": 0.0003, + "step": 40604 + }, + { + "epoch": 37.88, + "learning_rate": 4.526492537313433e-05, + "loss": 0.0031, + "step": 40608 + }, + { + "epoch": 37.88, + "learning_rate": 4.526445895522388e-05, + "loss": 0.0001, + "step": 40612 + }, + { + "epoch": 37.89, + "learning_rate": 4.5263992537313436e-05, + "loss": 0.0002, + "step": 40616 + }, + { + "epoch": 37.89, + "learning_rate": 4.526352611940299e-05, + "loss": 0.0002, + "step": 40620 + }, + { + "epoch": 37.9, + "learning_rate": 4.526305970149254e-05, + "loss": 0.0004, + "step": 40624 + }, + { + "epoch": 37.9, + "learning_rate": 4.5262593283582094e-05, + "loss": 0.0, + "step": 40628 + }, + { + "epoch": 37.9, + "learning_rate": 4.526212686567164e-05, + "loss": 0.0001, + "step": 40632 + }, + { + "epoch": 37.91, + "learning_rate": 4.52616604477612e-05, + "loss": 0.0016, + "step": 40636 + }, + { + "epoch": 37.91, + "learning_rate": 4.526119402985075e-05, + "loss": 0.0, + "step": 40640 + }, + { + "epoch": 37.91, + "learning_rate": 4.52607276119403e-05, + "loss": 0.0002, + "step": 40644 + }, + { + "epoch": 37.92, + "learning_rate": 4.526026119402985e-05, + "loss": 0.0, + "step": 40648 + }, + { + "epoch": 37.92, + "learning_rate": 4.525979477611941e-05, + "loss": 0.0011, + "step": 40652 + }, + { + "epoch": 37.93, + "learning_rate": 4.525932835820896e-05, + "loss": 0.0, + "step": 40656 + }, + { + "epoch": 37.93, + "learning_rate": 4.5258861940298506e-05, + "loss": 0.0002, + "step": 40660 + }, + { + "epoch": 37.93, + "learning_rate": 4.525839552238806e-05, + "loss": 0.0008, + "step": 40664 + }, + { + "epoch": 37.94, + "learning_rate": 4.5257929104477616e-05, + "loss": 0.0001, + "step": 40668 + }, + { + "epoch": 37.94, + "learning_rate": 4.5257462686567164e-05, + "loss": 0.0021, + "step": 40672 + }, + { + "epoch": 37.94, + "learning_rate": 4.525699626865672e-05, + "loss": 0.0029, + "step": 40676 + }, + { + "epoch": 37.95, + "learning_rate": 4.5256529850746274e-05, + "loss": 0.0002, + "step": 40680 + }, + { + "epoch": 37.95, + "learning_rate": 4.525606343283582e-05, + "loss": 0.0024, + "step": 40684 + }, + { + "epoch": 37.96, + "learning_rate": 4.525559701492538e-05, + "loss": 0.0222, + "step": 40688 + }, + { + "epoch": 37.96, + "learning_rate": 4.5255130597014925e-05, + "loss": 0.0001, + "step": 40692 + }, + { + "epoch": 37.96, + "learning_rate": 4.525466417910448e-05, + "loss": 0.0001, + "step": 40696 + }, + { + "epoch": 37.97, + "learning_rate": 4.5254197761194035e-05, + "loss": 0.0037, + "step": 40700 + }, + { + "epoch": 37.97, + "learning_rate": 4.525373134328358e-05, + "loss": 0.0008, + "step": 40704 + }, + { + "epoch": 37.97, + "learning_rate": 4.525326492537313e-05, + "loss": 0.0002, + "step": 40708 + }, + { + "epoch": 37.98, + "learning_rate": 4.525279850746269e-05, + "loss": 0.0007, + "step": 40712 + }, + { + "epoch": 37.98, + "learning_rate": 4.525233208955224e-05, + "loss": 0.0, + "step": 40716 + }, + { + "epoch": 37.98, + "learning_rate": 4.525186567164179e-05, + "loss": 0.0, + "step": 40720 + }, + { + "epoch": 37.99, + "learning_rate": 4.5251399253731344e-05, + "loss": 0.0, + "step": 40724 + }, + { + "epoch": 37.99, + "learning_rate": 4.52509328358209e-05, + "loss": 0.0001, + "step": 40728 + }, + { + "epoch": 38.0, + "learning_rate": 4.525046641791045e-05, + "loss": 0.001, + "step": 40732 + }, + { + "epoch": 38.0, + "learning_rate": 4.525e-05, + "loss": 0.0001, + "step": 40736 + }, + { + "epoch": 38.0, + "learning_rate": 4.524953358208956e-05, + "loss": 0.0, + "step": 40740 + }, + { + "epoch": 38.01, + "learning_rate": 4.5249067164179105e-05, + "loss": 0.0, + "step": 40744 + }, + { + "epoch": 38.01, + "learning_rate": 4.524860074626866e-05, + "loss": 0.0, + "step": 40748 + }, + { + "epoch": 38.01, + "learning_rate": 4.524813432835821e-05, + "loss": 0.0, + "step": 40752 + }, + { + "epoch": 38.02, + "learning_rate": 4.524766791044776e-05, + "loss": 0.0001, + "step": 40756 + }, + { + "epoch": 38.02, + "learning_rate": 4.524720149253732e-05, + "loss": 0.0, + "step": 40760 + }, + { + "epoch": 38.03, + "learning_rate": 4.5246735074626866e-05, + "loss": 0.0001, + "step": 40764 + }, + { + "epoch": 38.03, + "learning_rate": 4.5246268656716414e-05, + "loss": 0.0001, + "step": 40768 + }, + { + "epoch": 38.03, + "learning_rate": 4.5245802238805976e-05, + "loss": 0.0001, + "step": 40772 + }, + { + "epoch": 38.04, + "learning_rate": 4.5245335820895524e-05, + "loss": 0.0012, + "step": 40776 + }, + { + "epoch": 38.04, + "learning_rate": 4.524486940298508e-05, + "loss": 0.0003, + "step": 40780 + }, + { + "epoch": 38.04, + "learning_rate": 4.524440298507463e-05, + "loss": 0.0003, + "step": 40784 + }, + { + "epoch": 38.05, + "learning_rate": 4.524393656716418e-05, + "loss": 0.0001, + "step": 40788 + }, + { + "epoch": 38.05, + "learning_rate": 4.524347014925374e-05, + "loss": 0.0014, + "step": 40792 + }, + { + "epoch": 38.06, + "learning_rate": 4.5243003731343285e-05, + "loss": 0.0007, + "step": 40796 + }, + { + "epoch": 38.06, + "learning_rate": 4.524253731343284e-05, + "loss": 0.0045, + "step": 40800 + }, + { + "epoch": 38.06, + "learning_rate": 4.5242070895522395e-05, + "loss": 0.0002, + "step": 40804 + }, + { + "epoch": 38.07, + "learning_rate": 4.524160447761194e-05, + "loss": 0.0011, + "step": 40808 + }, + { + "epoch": 38.07, + "learning_rate": 4.524113805970149e-05, + "loss": 0.0001, + "step": 40812 + }, + { + "epoch": 38.07, + "learning_rate": 4.5240671641791046e-05, + "loss": 0.0, + "step": 40816 + }, + { + "epoch": 38.08, + "learning_rate": 4.52402052238806e-05, + "loss": 0.0006, + "step": 40820 + }, + { + "epoch": 38.08, + "learning_rate": 4.523973880597015e-05, + "loss": 0.001, + "step": 40824 + }, + { + "epoch": 38.09, + "learning_rate": 4.5239272388059704e-05, + "loss": 0.0, + "step": 40828 + }, + { + "epoch": 38.09, + "learning_rate": 4.523880597014926e-05, + "loss": 0.0, + "step": 40832 + }, + { + "epoch": 38.09, + "learning_rate": 4.523833955223881e-05, + "loss": 0.0007, + "step": 40836 + }, + { + "epoch": 38.1, + "learning_rate": 4.523787313432836e-05, + "loss": 0.0, + "step": 40840 + }, + { + "epoch": 38.1, + "learning_rate": 4.523740671641791e-05, + "loss": 0.0003, + "step": 40844 + }, + { + "epoch": 38.1, + "learning_rate": 4.5236940298507465e-05, + "loss": 0.0009, + "step": 40848 + }, + { + "epoch": 38.11, + "learning_rate": 4.523647388059702e-05, + "loss": 0.0041, + "step": 40852 + }, + { + "epoch": 38.11, + "learning_rate": 4.523600746268657e-05, + "loss": 0.0011, + "step": 40856 + }, + { + "epoch": 38.12, + "learning_rate": 4.523554104477612e-05, + "loss": 0.0, + "step": 40860 + }, + { + "epoch": 38.12, + "learning_rate": 4.523507462686568e-05, + "loss": 0.0002, + "step": 40864 + }, + { + "epoch": 38.12, + "learning_rate": 4.5234608208955226e-05, + "loss": 0.0001, + "step": 40868 + }, + { + "epoch": 38.13, + "learning_rate": 4.5234141791044774e-05, + "loss": 0.0, + "step": 40872 + }, + { + "epoch": 38.13, + "learning_rate": 4.523367537313433e-05, + "loss": 0.0001, + "step": 40876 + }, + { + "epoch": 38.13, + "learning_rate": 4.5233208955223884e-05, + "loss": 0.0, + "step": 40880 + }, + { + "epoch": 38.14, + "learning_rate": 4.523274253731343e-05, + "loss": 0.0003, + "step": 40884 + }, + { + "epoch": 38.14, + "learning_rate": 4.523227611940299e-05, + "loss": 0.0001, + "step": 40888 + }, + { + "epoch": 38.15, + "learning_rate": 4.523180970149254e-05, + "loss": 0.0015, + "step": 40892 + }, + { + "epoch": 38.15, + "learning_rate": 4.523134328358209e-05, + "loss": 0.0003, + "step": 40896 + }, + { + "epoch": 38.15, + "learning_rate": 4.5230876865671645e-05, + "loss": 0.001, + "step": 40900 + }, + { + "epoch": 38.16, + "learning_rate": 4.523041044776119e-05, + "loss": 0.0016, + "step": 40904 + }, + { + "epoch": 38.16, + "learning_rate": 4.522994402985075e-05, + "loss": 0.0023, + "step": 40908 + }, + { + "epoch": 38.16, + "learning_rate": 4.52294776119403e-05, + "loss": 0.0, + "step": 40912 + }, + { + "epoch": 38.17, + "learning_rate": 4.522901119402985e-05, + "loss": 0.0004, + "step": 40916 + }, + { + "epoch": 38.17, + "learning_rate": 4.5228544776119406e-05, + "loss": 0.0001, + "step": 40920 + }, + { + "epoch": 38.18, + "learning_rate": 4.522807835820896e-05, + "loss": 0.0, + "step": 40924 + }, + { + "epoch": 38.18, + "learning_rate": 4.522761194029851e-05, + "loss": 0.0017, + "step": 40928 + }, + { + "epoch": 38.18, + "learning_rate": 4.522714552238806e-05, + "loss": 0.0002, + "step": 40932 + }, + { + "epoch": 38.19, + "learning_rate": 4.522667910447761e-05, + "loss": 0.0008, + "step": 40936 + }, + { + "epoch": 38.19, + "learning_rate": 4.522621268656717e-05, + "loss": 0.0008, + "step": 40940 + }, + { + "epoch": 38.19, + "learning_rate": 4.522574626865672e-05, + "loss": 0.0, + "step": 40944 + }, + { + "epoch": 38.2, + "learning_rate": 4.522527985074627e-05, + "loss": 0.0019, + "step": 40948 + }, + { + "epoch": 38.2, + "learning_rate": 4.5224813432835825e-05, + "loss": 0.0011, + "step": 40952 + }, + { + "epoch": 38.21, + "learning_rate": 4.522434701492538e-05, + "loss": 0.0005, + "step": 40956 + }, + { + "epoch": 38.21, + "learning_rate": 4.522388059701493e-05, + "loss": 0.0, + "step": 40960 + }, + { + "epoch": 38.21, + "learning_rate": 4.5223414179104476e-05, + "loss": 0.0003, + "step": 40964 + }, + { + "epoch": 38.22, + "learning_rate": 4.522294776119404e-05, + "loss": 0.0, + "step": 40968 + }, + { + "epoch": 38.22, + "learning_rate": 4.5222481343283586e-05, + "loss": 0.0027, + "step": 40972 + }, + { + "epoch": 38.22, + "learning_rate": 4.5222014925373134e-05, + "loss": 0.0, + "step": 40976 + }, + { + "epoch": 38.23, + "learning_rate": 4.522154850746269e-05, + "loss": 0.0003, + "step": 40980 + }, + { + "epoch": 38.23, + "learning_rate": 4.5221082089552244e-05, + "loss": 0.0002, + "step": 40984 + }, + { + "epoch": 38.24, + "learning_rate": 4.522061567164179e-05, + "loss": 0.0001, + "step": 40988 + }, + { + "epoch": 38.24, + "learning_rate": 4.522014925373135e-05, + "loss": 0.0, + "step": 40992 + }, + { + "epoch": 38.24, + "learning_rate": 4.5219682835820895e-05, + "loss": 0.0001, + "step": 40996 + }, + { + "epoch": 38.25, + "learning_rate": 4.521921641791045e-05, + "loss": 0.0, + "step": 41000 + }, + { + "epoch": 38.25, + "eval_exact_match": 0.7359767891682786, + "eval_exec": 0.7649903288201161, + "eval_loss": 0.43792104721069336, + "eval_runtime": 1152.442, + "eval_samples_per_second": 0.897, + "step": 41000 + }, + { + "epoch": 38.25, + "learning_rate": 4.5218750000000005e-05, + "loss": 0.0001, + "step": 41004 + }, + { + "epoch": 38.25, + "learning_rate": 4.521828358208955e-05, + "loss": 0.0003, + "step": 41008 + }, + { + "epoch": 38.26, + "learning_rate": 4.521781716417911e-05, + "loss": 0.0, + "step": 41012 + }, + { + "epoch": 38.26, + "learning_rate": 4.521735074626866e-05, + "loss": 0.0003, + "step": 41016 + }, + { + "epoch": 38.26, + "learning_rate": 4.521688432835821e-05, + "loss": 0.0002, + "step": 41020 + }, + { + "epoch": 38.27, + "learning_rate": 4.521641791044776e-05, + "loss": 0.0002, + "step": 41024 + }, + { + "epoch": 38.27, + "learning_rate": 4.521595149253732e-05, + "loss": 0.0, + "step": 41028 + }, + { + "epoch": 38.28, + "learning_rate": 4.521548507462687e-05, + "loss": 0.0004, + "step": 41032 + }, + { + "epoch": 38.28, + "learning_rate": 4.521501865671642e-05, + "loss": 0.0, + "step": 41036 + }, + { + "epoch": 38.28, + "learning_rate": 4.521455223880597e-05, + "loss": 0.0008, + "step": 41040 + }, + { + "epoch": 38.29, + "learning_rate": 4.521408582089553e-05, + "loss": 0.0001, + "step": 41044 + }, + { + "epoch": 38.29, + "learning_rate": 4.5213619402985075e-05, + "loss": 0.0, + "step": 41048 + }, + { + "epoch": 38.29, + "learning_rate": 4.521315298507463e-05, + "loss": 0.0, + "step": 41052 + }, + { + "epoch": 38.3, + "learning_rate": 4.521268656716418e-05, + "loss": 0.0, + "step": 41056 + }, + { + "epoch": 38.3, + "learning_rate": 4.521222014925373e-05, + "loss": 0.0003, + "step": 41060 + }, + { + "epoch": 38.31, + "learning_rate": 4.521175373134329e-05, + "loss": 0.0001, + "step": 41064 + }, + { + "epoch": 38.31, + "learning_rate": 4.5211287313432836e-05, + "loss": 0.0003, + "step": 41068 + }, + { + "epoch": 38.31, + "learning_rate": 4.521082089552239e-05, + "loss": 0.0001, + "step": 41072 + }, + { + "epoch": 38.32, + "learning_rate": 4.5210354477611946e-05, + "loss": 0.0004, + "step": 41076 + }, + { + "epoch": 38.32, + "learning_rate": 4.5209888059701494e-05, + "loss": 0.0073, + "step": 41080 + }, + { + "epoch": 38.32, + "learning_rate": 4.520942164179104e-05, + "loss": 0.0, + "step": 41084 + }, + { + "epoch": 38.33, + "learning_rate": 4.5208955223880604e-05, + "loss": 0.0018, + "step": 41088 + }, + { + "epoch": 38.33, + "learning_rate": 4.520848880597015e-05, + "loss": 0.0053, + "step": 41092 + }, + { + "epoch": 38.34, + "learning_rate": 4.52080223880597e-05, + "loss": 0.0001, + "step": 41096 + }, + { + "epoch": 38.34, + "learning_rate": 4.5207555970149255e-05, + "loss": 0.0004, + "step": 41100 + }, + { + "epoch": 38.34, + "learning_rate": 4.520708955223881e-05, + "loss": 0.0002, + "step": 41104 + }, + { + "epoch": 38.35, + "learning_rate": 4.5206623134328365e-05, + "loss": 0.0, + "step": 41108 + }, + { + "epoch": 38.35, + "learning_rate": 4.520615671641791e-05, + "loss": 0.0, + "step": 41112 + }, + { + "epoch": 38.35, + "learning_rate": 4.520569029850746e-05, + "loss": 0.0016, + "step": 41116 + }, + { + "epoch": 38.36, + "learning_rate": 4.520522388059702e-05, + "loss": 0.0, + "step": 41120 + }, + { + "epoch": 38.36, + "learning_rate": 4.520475746268657e-05, + "loss": 0.0, + "step": 41124 + }, + { + "epoch": 38.37, + "learning_rate": 4.520429104477612e-05, + "loss": 0.0001, + "step": 41128 + }, + { + "epoch": 38.37, + "learning_rate": 4.5203824626865674e-05, + "loss": 0.0, + "step": 41132 + }, + { + "epoch": 38.37, + "learning_rate": 4.520335820895523e-05, + "loss": 0.0, + "step": 41136 + }, + { + "epoch": 38.38, + "learning_rate": 4.520289179104478e-05, + "loss": 0.0, + "step": 41140 + }, + { + "epoch": 38.38, + "learning_rate": 4.520242537313433e-05, + "loss": 0.0, + "step": 41144 + }, + { + "epoch": 38.38, + "learning_rate": 4.520195895522388e-05, + "loss": 0.0002, + "step": 41148 + }, + { + "epoch": 38.39, + "learning_rate": 4.5201492537313435e-05, + "loss": 0.0001, + "step": 41152 + }, + { + "epoch": 38.39, + "learning_rate": 4.520102611940299e-05, + "loss": 0.0018, + "step": 41156 + }, + { + "epoch": 38.4, + "learning_rate": 4.520055970149254e-05, + "loss": 0.0, + "step": 41160 + }, + { + "epoch": 38.4, + "learning_rate": 4.520009328358209e-05, + "loss": 0.0, + "step": 41164 + }, + { + "epoch": 38.4, + "learning_rate": 4.519962686567165e-05, + "loss": 0.0032, + "step": 41168 + }, + { + "epoch": 38.41, + "learning_rate": 4.5199160447761196e-05, + "loss": 0.0019, + "step": 41172 + }, + { + "epoch": 38.41, + "learning_rate": 4.5198694029850744e-05, + "loss": 0.0002, + "step": 41176 + }, + { + "epoch": 38.41, + "learning_rate": 4.5198227611940306e-05, + "loss": 0.0, + "step": 41180 + }, + { + "epoch": 38.42, + "learning_rate": 4.5197761194029854e-05, + "loss": 0.0001, + "step": 41184 + }, + { + "epoch": 38.42, + "learning_rate": 4.51972947761194e-05, + "loss": 0.0002, + "step": 41188 + }, + { + "epoch": 38.43, + "learning_rate": 4.519682835820896e-05, + "loss": 0.0034, + "step": 41192 + }, + { + "epoch": 38.43, + "learning_rate": 4.519636194029851e-05, + "loss": 0.0011, + "step": 41196 + }, + { + "epoch": 38.43, + "learning_rate": 4.519589552238806e-05, + "loss": 0.0003, + "step": 41200 + }, + { + "epoch": 38.44, + "learning_rate": 4.5195429104477615e-05, + "loss": 0.0002, + "step": 41204 + }, + { + "epoch": 38.44, + "learning_rate": 4.519496268656716e-05, + "loss": 0.0008, + "step": 41208 + }, + { + "epoch": 38.44, + "learning_rate": 4.519449626865672e-05, + "loss": 0.0, + "step": 41212 + }, + { + "epoch": 38.45, + "learning_rate": 4.519402985074627e-05, + "loss": 0.0001, + "step": 41216 + }, + { + "epoch": 38.45, + "learning_rate": 4.519356343283582e-05, + "loss": 0.0002, + "step": 41220 + }, + { + "epoch": 38.46, + "learning_rate": 4.5193097014925376e-05, + "loss": 0.0, + "step": 41224 + }, + { + "epoch": 38.46, + "learning_rate": 4.519263059701493e-05, + "loss": 0.0001, + "step": 41228 + }, + { + "epoch": 38.46, + "learning_rate": 4.519216417910448e-05, + "loss": 0.0005, + "step": 41232 + }, + { + "epoch": 38.47, + "learning_rate": 4.519169776119403e-05, + "loss": 0.0, + "step": 41236 + }, + { + "epoch": 38.47, + "learning_rate": 4.519123134328359e-05, + "loss": 0.0, + "step": 41240 + }, + { + "epoch": 38.47, + "learning_rate": 4.5190764925373137e-05, + "loss": 0.0, + "step": 41244 + }, + { + "epoch": 38.48, + "learning_rate": 4.5190298507462685e-05, + "loss": 0.0001, + "step": 41248 + }, + { + "epoch": 38.48, + "learning_rate": 4.518983208955224e-05, + "loss": 0.0003, + "step": 41252 + }, + { + "epoch": 38.49, + "learning_rate": 4.5189365671641795e-05, + "loss": 0.0, + "step": 41256 + }, + { + "epoch": 38.49, + "learning_rate": 4.518889925373134e-05, + "loss": 0.0, + "step": 41260 + }, + { + "epoch": 38.49, + "learning_rate": 4.51884328358209e-05, + "loss": 0.0, + "step": 41264 + }, + { + "epoch": 38.5, + "learning_rate": 4.5187966417910446e-05, + "loss": 0.0, + "step": 41268 + }, + { + "epoch": 38.5, + "learning_rate": 4.518750000000001e-05, + "loss": 0.0001, + "step": 41272 + }, + { + "epoch": 38.5, + "learning_rate": 4.5187033582089556e-05, + "loss": 0.0044, + "step": 41276 + }, + { + "epoch": 38.51, + "learning_rate": 4.5186567164179104e-05, + "loss": 0.0001, + "step": 41280 + }, + { + "epoch": 38.51, + "learning_rate": 4.518610074626866e-05, + "loss": 0.0, + "step": 41284 + }, + { + "epoch": 38.51, + "learning_rate": 4.5185634328358213e-05, + "loss": 0.0, + "step": 41288 + }, + { + "epoch": 38.52, + "learning_rate": 4.518516791044776e-05, + "loss": 0.0022, + "step": 41292 + }, + { + "epoch": 38.52, + "learning_rate": 4.5184701492537317e-05, + "loss": 0.0013, + "step": 41296 + }, + { + "epoch": 38.53, + "learning_rate": 4.518423507462687e-05, + "loss": 0.0003, + "step": 41300 + }, + { + "epoch": 38.53, + "learning_rate": 4.518376865671642e-05, + "loss": 0.0, + "step": 41304 + }, + { + "epoch": 38.53, + "learning_rate": 4.5183302238805974e-05, + "loss": 0.0, + "step": 41308 + }, + { + "epoch": 38.54, + "learning_rate": 4.518283582089552e-05, + "loss": 0.0001, + "step": 41312 + }, + { + "epoch": 38.54, + "learning_rate": 4.518236940298508e-05, + "loss": 0.0003, + "step": 41316 + }, + { + "epoch": 38.54, + "learning_rate": 4.518190298507463e-05, + "loss": 0.0, + "step": 41320 + }, + { + "epoch": 38.55, + "learning_rate": 4.518143656716418e-05, + "loss": 0.0, + "step": 41324 + }, + { + "epoch": 38.55, + "learning_rate": 4.518097014925373e-05, + "loss": 0.0002, + "step": 41328 + }, + { + "epoch": 38.56, + "learning_rate": 4.518050373134329e-05, + "loss": 0.0066, + "step": 41332 + }, + { + "epoch": 38.56, + "learning_rate": 4.518003731343284e-05, + "loss": 0.0001, + "step": 41336 + }, + { + "epoch": 38.56, + "learning_rate": 4.5179570895522387e-05, + "loss": 0.0005, + "step": 41340 + }, + { + "epoch": 38.57, + "learning_rate": 4.517910447761194e-05, + "loss": 0.0003, + "step": 41344 + }, + { + "epoch": 38.57, + "learning_rate": 4.5178638059701496e-05, + "loss": 0.0009, + "step": 41348 + }, + { + "epoch": 38.57, + "learning_rate": 4.5178171641791045e-05, + "loss": 0.0, + "step": 41352 + }, + { + "epoch": 38.58, + "learning_rate": 4.51777052238806e-05, + "loss": 0.0, + "step": 41356 + }, + { + "epoch": 38.58, + "learning_rate": 4.5177238805970154e-05, + "loss": 0.0002, + "step": 41360 + }, + { + "epoch": 38.59, + "learning_rate": 4.51767723880597e-05, + "loss": 0.0, + "step": 41364 + }, + { + "epoch": 38.59, + "learning_rate": 4.517630597014926e-05, + "loss": 0.0001, + "step": 41368 + }, + { + "epoch": 38.59, + "learning_rate": 4.5175839552238806e-05, + "loss": 0.0, + "step": 41372 + }, + { + "epoch": 38.6, + "learning_rate": 4.517537313432836e-05, + "loss": 0.0, + "step": 41376 + }, + { + "epoch": 38.6, + "learning_rate": 4.5174906716417915e-05, + "loss": 0.0, + "step": 41380 + }, + { + "epoch": 38.6, + "learning_rate": 4.5174440298507463e-05, + "loss": 0.0012, + "step": 41384 + }, + { + "epoch": 38.61, + "learning_rate": 4.517397388059701e-05, + "loss": 0.0001, + "step": 41388 + }, + { + "epoch": 38.61, + "learning_rate": 4.517350746268657e-05, + "loss": 0.0, + "step": 41392 + }, + { + "epoch": 38.62, + "learning_rate": 4.517304104477612e-05, + "loss": 0.0001, + "step": 41396 + }, + { + "epoch": 38.62, + "learning_rate": 4.517257462686567e-05, + "loss": 0.0001, + "step": 41400 + }, + { + "epoch": 38.62, + "learning_rate": 4.5172108208955224e-05, + "loss": 0.0016, + "step": 41404 + }, + { + "epoch": 38.63, + "learning_rate": 4.517164179104478e-05, + "loss": 0.0024, + "step": 41408 + }, + { + "epoch": 38.63, + "learning_rate": 4.517117537313433e-05, + "loss": 0.0001, + "step": 41412 + }, + { + "epoch": 38.63, + "learning_rate": 4.517070895522388e-05, + "loss": 0.0, + "step": 41416 + }, + { + "epoch": 38.64, + "learning_rate": 4.517024253731344e-05, + "loss": 0.0019, + "step": 41420 + }, + { + "epoch": 38.64, + "learning_rate": 4.5169776119402985e-05, + "loss": 0.0, + "step": 41424 + }, + { + "epoch": 38.65, + "learning_rate": 4.516930970149254e-05, + "loss": 0.0032, + "step": 41428 + }, + { + "epoch": 38.65, + "learning_rate": 4.516884328358209e-05, + "loss": 0.0009, + "step": 41432 + }, + { + "epoch": 38.65, + "learning_rate": 4.5168376865671643e-05, + "loss": 0.0049, + "step": 41436 + }, + { + "epoch": 38.66, + "learning_rate": 4.51679104477612e-05, + "loss": 0.0, + "step": 41440 + }, + { + "epoch": 38.66, + "learning_rate": 4.5167444029850746e-05, + "loss": 0.0004, + "step": 41444 + }, + { + "epoch": 38.66, + "learning_rate": 4.51669776119403e-05, + "loss": 0.0015, + "step": 41448 + }, + { + "epoch": 38.67, + "learning_rate": 4.5166511194029856e-05, + "loss": 0.0, + "step": 41452 + }, + { + "epoch": 38.67, + "learning_rate": 4.5166044776119404e-05, + "loss": 0.0008, + "step": 41456 + }, + { + "epoch": 38.68, + "learning_rate": 4.516557835820896e-05, + "loss": 0.0, + "step": 41460 + }, + { + "epoch": 38.68, + "learning_rate": 4.516511194029851e-05, + "loss": 0.0, + "step": 41464 + }, + { + "epoch": 38.68, + "learning_rate": 4.516464552238806e-05, + "loss": 0.0, + "step": 41468 + }, + { + "epoch": 38.69, + "learning_rate": 4.516417910447762e-05, + "loss": 0.0008, + "step": 41472 + }, + { + "epoch": 38.69, + "learning_rate": 4.5163712686567165e-05, + "loss": 0.0001, + "step": 41476 + }, + { + "epoch": 38.69, + "learning_rate": 4.516324626865672e-05, + "loss": 0.0002, + "step": 41480 + }, + { + "epoch": 38.7, + "learning_rate": 4.5162779850746275e-05, + "loss": 0.0001, + "step": 41484 + }, + { + "epoch": 38.7, + "learning_rate": 4.516231343283582e-05, + "loss": 0.0001, + "step": 41488 + }, + { + "epoch": 38.71, + "learning_rate": 4.516184701492537e-05, + "loss": 0.0001, + "step": 41492 + }, + { + "epoch": 38.71, + "learning_rate": 4.5161380597014926e-05, + "loss": 0.0, + "step": 41496 + }, + { + "epoch": 38.71, + "learning_rate": 4.516091417910448e-05, + "loss": 0.0035, + "step": 41500 + }, + { + "epoch": 38.71, + "eval_exact_match": 0.7263056092843327, + "eval_exec": 0.7562862669245648, + "eval_loss": 0.440681517124176, + "eval_runtime": 1097.6911, + "eval_samples_per_second": 0.942, + "step": 41500 + }, + { + "epoch": 38.72, + "learning_rate": 4.516044776119403e-05, + "loss": 0.0, + "step": 41504 + }, + { + "epoch": 38.72, + "learning_rate": 4.5159981343283584e-05, + "loss": 0.0001, + "step": 41508 + }, + { + "epoch": 38.72, + "learning_rate": 4.515951492537314e-05, + "loss": 0.0014, + "step": 41512 + }, + { + "epoch": 38.73, + "learning_rate": 4.515904850746269e-05, + "loss": 0.0014, + "step": 41516 + }, + { + "epoch": 38.73, + "learning_rate": 4.515858208955224e-05, + "loss": 0.0, + "step": 41520 + }, + { + "epoch": 38.73, + "learning_rate": 4.515811567164179e-05, + "loss": 0.0002, + "step": 41524 + }, + { + "epoch": 38.74, + "learning_rate": 4.5157649253731345e-05, + "loss": 0.002, + "step": 41528 + }, + { + "epoch": 38.74, + "learning_rate": 4.51571828358209e-05, + "loss": 0.0015, + "step": 41532 + }, + { + "epoch": 38.75, + "learning_rate": 4.515671641791045e-05, + "loss": 0.0, + "step": 41536 + }, + { + "epoch": 38.75, + "learning_rate": 4.515625e-05, + "loss": 0.0, + "step": 41540 + }, + { + "epoch": 38.75, + "learning_rate": 4.515578358208956e-05, + "loss": 0.0013, + "step": 41544 + }, + { + "epoch": 38.76, + "learning_rate": 4.5155317164179106e-05, + "loss": 0.0, + "step": 41548 + }, + { + "epoch": 38.76, + "learning_rate": 4.5154850746268654e-05, + "loss": 0.0001, + "step": 41552 + }, + { + "epoch": 38.76, + "learning_rate": 4.515438432835821e-05, + "loss": 0.0022, + "step": 41556 + }, + { + "epoch": 38.77, + "learning_rate": 4.5153917910447764e-05, + "loss": 0.0011, + "step": 41560 + }, + { + "epoch": 38.77, + "learning_rate": 4.515345149253731e-05, + "loss": 0.0001, + "step": 41564 + }, + { + "epoch": 38.78, + "learning_rate": 4.515298507462687e-05, + "loss": 0.0001, + "step": 41568 + }, + { + "epoch": 38.78, + "learning_rate": 4.515251865671642e-05, + "loss": 0.0004, + "step": 41572 + }, + { + "epoch": 38.78, + "learning_rate": 4.515205223880597e-05, + "loss": 0.0, + "step": 41576 + }, + { + "epoch": 38.79, + "learning_rate": 4.5151585820895525e-05, + "loss": 0.0, + "step": 41580 + }, + { + "epoch": 38.79, + "learning_rate": 4.515111940298507e-05, + "loss": 0.0006, + "step": 41584 + }, + { + "epoch": 38.79, + "learning_rate": 4.515065298507463e-05, + "loss": 0.0001, + "step": 41588 + }, + { + "epoch": 38.8, + "learning_rate": 4.515018656716418e-05, + "loss": 0.0016, + "step": 41592 + }, + { + "epoch": 38.8, + "learning_rate": 4.514972014925373e-05, + "loss": 0.0007, + "step": 41596 + }, + { + "epoch": 38.81, + "learning_rate": 4.5149253731343286e-05, + "loss": 0.0, + "step": 41600 + }, + { + "epoch": 38.81, + "learning_rate": 4.514878731343284e-05, + "loss": 0.0, + "step": 41604 + }, + { + "epoch": 38.81, + "learning_rate": 4.514832089552239e-05, + "loss": 0.0014, + "step": 41608 + }, + { + "epoch": 38.82, + "learning_rate": 4.5147854477611944e-05, + "loss": 0.0007, + "step": 41612 + }, + { + "epoch": 38.82, + "learning_rate": 4.514738805970149e-05, + "loss": 0.0, + "step": 41616 + }, + { + "epoch": 38.82, + "learning_rate": 4.514692164179105e-05, + "loss": 0.0002, + "step": 41620 + }, + { + "epoch": 38.83, + "learning_rate": 4.51464552238806e-05, + "loss": 0.0001, + "step": 41624 + }, + { + "epoch": 38.83, + "learning_rate": 4.514598880597015e-05, + "loss": 0.0015, + "step": 41628 + }, + { + "epoch": 38.84, + "learning_rate": 4.5145522388059705e-05, + "loss": 0.0, + "step": 41632 + }, + { + "epoch": 38.84, + "learning_rate": 4.514505597014926e-05, + "loss": 0.0023, + "step": 41636 + }, + { + "epoch": 38.84, + "learning_rate": 4.514458955223881e-05, + "loss": 0.0008, + "step": 41640 + }, + { + "epoch": 38.85, + "learning_rate": 4.5144123134328356e-05, + "loss": 0.0001, + "step": 41644 + }, + { + "epoch": 38.85, + "learning_rate": 4.514365671641792e-05, + "loss": 0.0004, + "step": 41648 + }, + { + "epoch": 38.85, + "learning_rate": 4.5143190298507466e-05, + "loss": 0.0014, + "step": 41652 + }, + { + "epoch": 38.86, + "learning_rate": 4.5142723880597014e-05, + "loss": 0.0, + "step": 41656 + }, + { + "epoch": 38.86, + "learning_rate": 4.514225746268657e-05, + "loss": 0.0041, + "step": 41660 + }, + { + "epoch": 38.87, + "learning_rate": 4.5141791044776124e-05, + "loss": 0.0014, + "step": 41664 + }, + { + "epoch": 38.87, + "learning_rate": 4.514132462686567e-05, + "loss": 0.0019, + "step": 41668 + }, + { + "epoch": 38.87, + "learning_rate": 4.514085820895523e-05, + "loss": 0.0, + "step": 41672 + }, + { + "epoch": 38.88, + "learning_rate": 4.5140391791044775e-05, + "loss": 0.0001, + "step": 41676 + }, + { + "epoch": 38.88, + "learning_rate": 4.513992537313433e-05, + "loss": 0.0018, + "step": 41680 + }, + { + "epoch": 38.88, + "learning_rate": 4.5139458955223885e-05, + "loss": 0.0001, + "step": 41684 + }, + { + "epoch": 38.89, + "learning_rate": 4.513899253731343e-05, + "loss": 0.0001, + "step": 41688 + }, + { + "epoch": 38.89, + "learning_rate": 4.513852611940299e-05, + "loss": 0.0001, + "step": 41692 + }, + { + "epoch": 38.9, + "learning_rate": 4.513805970149254e-05, + "loss": 0.0003, + "step": 41696 + }, + { + "epoch": 38.9, + "learning_rate": 4.513759328358209e-05, + "loss": 0.0003, + "step": 41700 + }, + { + "epoch": 38.9, + "learning_rate": 4.513712686567164e-05, + "loss": 0.0004, + "step": 41704 + }, + { + "epoch": 38.91, + "learning_rate": 4.51366604477612e-05, + "loss": 0.0001, + "step": 41708 + }, + { + "epoch": 38.91, + "learning_rate": 4.513619402985075e-05, + "loss": 0.0031, + "step": 41712 + }, + { + "epoch": 38.91, + "learning_rate": 4.51357276119403e-05, + "loss": 0.0006, + "step": 41716 + }, + { + "epoch": 38.92, + "learning_rate": 4.513526119402985e-05, + "loss": 0.0009, + "step": 41720 + }, + { + "epoch": 38.92, + "learning_rate": 4.513479477611941e-05, + "loss": 0.0001, + "step": 41724 + }, + { + "epoch": 38.93, + "learning_rate": 4.5134328358208955e-05, + "loss": 0.0, + "step": 41728 + }, + { + "epoch": 38.93, + "learning_rate": 4.513386194029851e-05, + "loss": 0.0048, + "step": 41732 + }, + { + "epoch": 38.93, + "learning_rate": 4.513339552238806e-05, + "loss": 0.0001, + "step": 41736 + }, + { + "epoch": 38.94, + "learning_rate": 4.513292910447761e-05, + "loss": 0.0022, + "step": 41740 + }, + { + "epoch": 38.94, + "learning_rate": 4.513246268656717e-05, + "loss": 0.0, + "step": 41744 + }, + { + "epoch": 38.94, + "learning_rate": 4.5131996268656716e-05, + "loss": 0.0003, + "step": 41748 + }, + { + "epoch": 38.95, + "learning_rate": 4.513152985074627e-05, + "loss": 0.0146, + "step": 41752 + }, + { + "epoch": 38.95, + "learning_rate": 4.5131063432835826e-05, + "loss": 0.0001, + "step": 41756 + }, + { + "epoch": 38.96, + "learning_rate": 4.5130597014925374e-05, + "loss": 0.0, + "step": 41760 + }, + { + "epoch": 38.96, + "learning_rate": 4.513013059701493e-05, + "loss": 0.0001, + "step": 41764 + }, + { + "epoch": 38.96, + "learning_rate": 4.5129664179104484e-05, + "loss": 0.0, + "step": 41768 + }, + { + "epoch": 38.97, + "learning_rate": 4.512919776119403e-05, + "loss": 0.0001, + "step": 41772 + }, + { + "epoch": 38.97, + "learning_rate": 4.512873134328359e-05, + "loss": 0.0052, + "step": 41776 + }, + { + "epoch": 38.97, + "learning_rate": 4.5128264925373135e-05, + "loss": 0.0008, + "step": 41780 + }, + { + "epoch": 38.98, + "learning_rate": 4.512779850746269e-05, + "loss": 0.0001, + "step": 41784 + }, + { + "epoch": 38.98, + "learning_rate": 4.5127332089552245e-05, + "loss": 0.0001, + "step": 41788 + }, + { + "epoch": 38.98, + "learning_rate": 4.512686567164179e-05, + "loss": 0.0, + "step": 41792 + }, + { + "epoch": 38.99, + "learning_rate": 4.512639925373134e-05, + "loss": 0.0007, + "step": 41796 + }, + { + "epoch": 38.99, + "learning_rate": 4.51259328358209e-05, + "loss": 0.0001, + "step": 41800 + }, + { + "epoch": 39.0, + "learning_rate": 4.512546641791045e-05, + "loss": 0.0001, + "step": 41804 + }, + { + "epoch": 39.0, + "learning_rate": 4.5125e-05, + "loss": 0.0, + "step": 41808 + }, + { + "epoch": 39.0, + "learning_rate": 4.5124533582089554e-05, + "loss": 0.0, + "step": 41812 + }, + { + "epoch": 39.01, + "learning_rate": 4.512406716417911e-05, + "loss": 0.0001, + "step": 41816 + }, + { + "epoch": 39.01, + "learning_rate": 4.512360074626866e-05, + "loss": 0.0003, + "step": 41820 + }, + { + "epoch": 39.01, + "learning_rate": 4.512313432835821e-05, + "loss": 0.0001, + "step": 41824 + }, + { + "epoch": 39.02, + "learning_rate": 4.512266791044776e-05, + "loss": 0.0002, + "step": 41828 + }, + { + "epoch": 39.02, + "learning_rate": 4.5122201492537315e-05, + "loss": 0.0001, + "step": 41832 + }, + { + "epoch": 39.03, + "learning_rate": 4.512173507462687e-05, + "loss": 0.0, + "step": 41836 + }, + { + "epoch": 39.03, + "learning_rate": 4.512126865671642e-05, + "loss": 0.0009, + "step": 41840 + }, + { + "epoch": 39.03, + "learning_rate": 4.512080223880597e-05, + "loss": 0.0002, + "step": 41844 + }, + { + "epoch": 39.04, + "learning_rate": 4.512033582089553e-05, + "loss": 0.0073, + "step": 41848 + }, + { + "epoch": 39.04, + "learning_rate": 4.5119869402985076e-05, + "loss": 0.0, + "step": 41852 + }, + { + "epoch": 39.04, + "learning_rate": 4.5119402985074624e-05, + "loss": 0.0, + "step": 41856 + }, + { + "epoch": 39.05, + "learning_rate": 4.5118936567164186e-05, + "loss": 0.0001, + "step": 41860 + }, + { + "epoch": 39.05, + "learning_rate": 4.5118470149253734e-05, + "loss": 0.0, + "step": 41864 + }, + { + "epoch": 39.06, + "learning_rate": 4.511800373134328e-05, + "loss": 0.0001, + "step": 41868 + }, + { + "epoch": 39.06, + "learning_rate": 4.511753731343284e-05, + "loss": 0.0, + "step": 41872 + }, + { + "epoch": 39.06, + "learning_rate": 4.511707089552239e-05, + "loss": 0.0, + "step": 41876 + }, + { + "epoch": 39.07, + "learning_rate": 4.511660447761194e-05, + "loss": 0.0087, + "step": 41880 + }, + { + "epoch": 39.07, + "learning_rate": 4.5116138059701495e-05, + "loss": 0.0001, + "step": 41884 + }, + { + "epoch": 39.07, + "learning_rate": 4.511567164179104e-05, + "loss": 0.0001, + "step": 41888 + }, + { + "epoch": 39.08, + "learning_rate": 4.51152052238806e-05, + "loss": 0.0, + "step": 41892 + }, + { + "epoch": 39.08, + "learning_rate": 4.511473880597015e-05, + "loss": 0.0004, + "step": 41896 + }, + { + "epoch": 39.09, + "learning_rate": 4.51142723880597e-05, + "loss": 0.0007, + "step": 41900 + }, + { + "epoch": 39.09, + "learning_rate": 4.5113805970149256e-05, + "loss": 0.0012, + "step": 41904 + }, + { + "epoch": 39.09, + "learning_rate": 4.511333955223881e-05, + "loss": 0.0003, + "step": 41908 + }, + { + "epoch": 39.1, + "learning_rate": 4.511287313432836e-05, + "loss": 0.0005, + "step": 41912 + }, + { + "epoch": 39.1, + "learning_rate": 4.5112406716417914e-05, + "loss": 0.0007, + "step": 41916 + }, + { + "epoch": 39.1, + "learning_rate": 4.511194029850747e-05, + "loss": 0.0, + "step": 41920 + }, + { + "epoch": 39.11, + "learning_rate": 4.511147388059702e-05, + "loss": 0.0, + "step": 41924 + }, + { + "epoch": 39.11, + "learning_rate": 4.511100746268657e-05, + "loss": 0.0003, + "step": 41928 + }, + { + "epoch": 39.12, + "learning_rate": 4.511054104477612e-05, + "loss": 0.0015, + "step": 41932 + }, + { + "epoch": 39.12, + "learning_rate": 4.5110074626865675e-05, + "loss": 0.0, + "step": 41936 + }, + { + "epoch": 39.12, + "learning_rate": 4.510960820895523e-05, + "loss": 0.0, + "step": 41940 + }, + { + "epoch": 39.13, + "learning_rate": 4.510914179104478e-05, + "loss": 0.0004, + "step": 41944 + }, + { + "epoch": 39.13, + "learning_rate": 4.5108675373134326e-05, + "loss": 0.0, + "step": 41948 + }, + { + "epoch": 39.13, + "learning_rate": 4.510820895522389e-05, + "loss": 0.0001, + "step": 41952 + }, + { + "epoch": 39.14, + "learning_rate": 4.5107742537313436e-05, + "loss": 0.0001, + "step": 41956 + }, + { + "epoch": 39.14, + "learning_rate": 4.5107276119402984e-05, + "loss": 0.0001, + "step": 41960 + }, + { + "epoch": 39.15, + "learning_rate": 4.510680970149254e-05, + "loss": 0.0001, + "step": 41964 + }, + { + "epoch": 39.15, + "learning_rate": 4.5106343283582094e-05, + "loss": 0.005, + "step": 41968 + }, + { + "epoch": 39.15, + "learning_rate": 4.510587686567164e-05, + "loss": 0.0011, + "step": 41972 + }, + { + "epoch": 39.16, + "learning_rate": 4.51054104477612e-05, + "loss": 0.0019, + "step": 41976 + }, + { + "epoch": 39.16, + "learning_rate": 4.510494402985075e-05, + "loss": 0.0013, + "step": 41980 + }, + { + "epoch": 39.16, + "learning_rate": 4.51044776119403e-05, + "loss": 0.0001, + "step": 41984 + }, + { + "epoch": 39.17, + "learning_rate": 4.5104011194029855e-05, + "loss": 0.0, + "step": 41988 + }, + { + "epoch": 39.17, + "learning_rate": 4.51035447761194e-05, + "loss": 0.0001, + "step": 41992 + }, + { + "epoch": 39.18, + "learning_rate": 4.510307835820896e-05, + "loss": 0.0001, + "step": 41996 + }, + { + "epoch": 39.18, + "learning_rate": 4.510261194029851e-05, + "loss": 0.0001, + "step": 42000 + }, + { + "epoch": 39.18, + "eval_exact_match": 0.7350096711798839, + "eval_exec": 0.7630560928433269, + "eval_loss": 0.40719056129455566, + "eval_runtime": 1122.0735, + "eval_samples_per_second": 0.922, + "step": 42000 + }, + { + "epoch": 39.18, + "learning_rate": 4.510214552238806e-05, + "loss": 0.0, + "step": 42004 + }, + { + "epoch": 39.19, + "learning_rate": 4.510167910447761e-05, + "loss": 0.0002, + "step": 42008 + }, + { + "epoch": 39.19, + "learning_rate": 4.510121268656717e-05, + "loss": 0.0005, + "step": 42012 + }, + { + "epoch": 39.19, + "learning_rate": 4.510074626865672e-05, + "loss": 0.0002, + "step": 42016 + }, + { + "epoch": 39.2, + "learning_rate": 4.510027985074627e-05, + "loss": 0.0, + "step": 42020 + }, + { + "epoch": 39.2, + "learning_rate": 4.509981343283582e-05, + "loss": 0.0, + "step": 42024 + }, + { + "epoch": 39.21, + "learning_rate": 4.509934701492538e-05, + "loss": 0.0, + "step": 42028 + }, + { + "epoch": 39.21, + "learning_rate": 4.5098880597014925e-05, + "loss": 0.0, + "step": 42032 + }, + { + "epoch": 39.21, + "learning_rate": 4.509841417910448e-05, + "loss": 0.0064, + "step": 42036 + }, + { + "epoch": 39.22, + "learning_rate": 4.5097947761194035e-05, + "loss": 0.0003, + "step": 42040 + }, + { + "epoch": 39.22, + "learning_rate": 4.509748134328358e-05, + "loss": 0.0094, + "step": 42044 + }, + { + "epoch": 39.22, + "learning_rate": 4.509701492537314e-05, + "loss": 0.0001, + "step": 42048 + }, + { + "epoch": 39.23, + "learning_rate": 4.5096548507462686e-05, + "loss": 0.0002, + "step": 42052 + }, + { + "epoch": 39.23, + "learning_rate": 4.509608208955224e-05, + "loss": 0.0006, + "step": 42056 + }, + { + "epoch": 39.24, + "learning_rate": 4.5095615671641796e-05, + "loss": 0.0011, + "step": 42060 + }, + { + "epoch": 39.24, + "learning_rate": 4.5095149253731344e-05, + "loss": 0.0003, + "step": 42064 + }, + { + "epoch": 39.24, + "learning_rate": 4.509468283582089e-05, + "loss": 0.0008, + "step": 42068 + }, + { + "epoch": 39.25, + "learning_rate": 4.5094216417910454e-05, + "loss": 0.0, + "step": 42072 + }, + { + "epoch": 39.25, + "learning_rate": 4.509375e-05, + "loss": 0.0001, + "step": 42076 + }, + { + "epoch": 39.25, + "learning_rate": 4.509328358208956e-05, + "loss": 0.0001, + "step": 42080 + }, + { + "epoch": 39.26, + "learning_rate": 4.5092817164179105e-05, + "loss": 0.0, + "step": 42084 + }, + { + "epoch": 39.26, + "learning_rate": 4.509235074626866e-05, + "loss": 0.0, + "step": 42088 + }, + { + "epoch": 39.26, + "learning_rate": 4.5091884328358215e-05, + "loss": 0.0019, + "step": 42092 + }, + { + "epoch": 39.27, + "learning_rate": 4.509141791044776e-05, + "loss": 0.0, + "step": 42096 + }, + { + "epoch": 39.27, + "learning_rate": 4.509095149253732e-05, + "loss": 0.0014, + "step": 42100 + }, + { + "epoch": 39.28, + "learning_rate": 4.509048507462687e-05, + "loss": 0.0001, + "step": 42104 + }, + { + "epoch": 39.28, + "learning_rate": 4.509001865671642e-05, + "loss": 0.0002, + "step": 42108 + }, + { + "epoch": 39.28, + "learning_rate": 4.508955223880597e-05, + "loss": 0.0001, + "step": 42112 + }, + { + "epoch": 39.29, + "learning_rate": 4.5089085820895524e-05, + "loss": 0.0046, + "step": 42116 + }, + { + "epoch": 39.29, + "learning_rate": 4.508861940298508e-05, + "loss": 0.0, + "step": 42120 + }, + { + "epoch": 39.29, + "learning_rate": 4.508815298507463e-05, + "loss": 0.0, + "step": 42124 + }, + { + "epoch": 39.3, + "learning_rate": 4.508768656716418e-05, + "loss": 0.0001, + "step": 42128 + }, + { + "epoch": 39.3, + "learning_rate": 4.5087220149253737e-05, + "loss": 0.0006, + "step": 42132 + }, + { + "epoch": 39.31, + "learning_rate": 4.5086753731343285e-05, + "loss": 0.0001, + "step": 42136 + }, + { + "epoch": 39.31, + "learning_rate": 4.508628731343284e-05, + "loss": 0.0, + "step": 42140 + }, + { + "epoch": 39.31, + "learning_rate": 4.508582089552239e-05, + "loss": 0.0008, + "step": 42144 + }, + { + "epoch": 39.32, + "learning_rate": 4.508535447761194e-05, + "loss": 0.0, + "step": 42148 + }, + { + "epoch": 39.32, + "learning_rate": 4.50848880597015e-05, + "loss": 0.0, + "step": 42152 + }, + { + "epoch": 39.32, + "learning_rate": 4.5084421641791046e-05, + "loss": 0.002, + "step": 42156 + }, + { + "epoch": 39.33, + "learning_rate": 4.50839552238806e-05, + "loss": 0.0001, + "step": 42160 + }, + { + "epoch": 39.33, + "learning_rate": 4.5083488805970155e-05, + "loss": 0.0001, + "step": 42164 + }, + { + "epoch": 39.34, + "learning_rate": 4.5083022388059704e-05, + "loss": 0.0012, + "step": 42168 + }, + { + "epoch": 39.34, + "learning_rate": 4.508255597014925e-05, + "loss": 0.0014, + "step": 42172 + }, + { + "epoch": 39.34, + "learning_rate": 4.508208955223881e-05, + "loss": 0.0, + "step": 42176 + }, + { + "epoch": 39.35, + "learning_rate": 4.508162313432836e-05, + "loss": 0.0024, + "step": 42180 + }, + { + "epoch": 39.35, + "learning_rate": 4.508115671641791e-05, + "loss": 0.0, + "step": 42184 + }, + { + "epoch": 39.35, + "learning_rate": 4.5080690298507465e-05, + "loss": 0.0, + "step": 42188 + }, + { + "epoch": 39.36, + "learning_rate": 4.508022388059702e-05, + "loss": 0.0, + "step": 42192 + }, + { + "epoch": 39.36, + "learning_rate": 4.507975746268657e-05, + "loss": 0.0001, + "step": 42196 + }, + { + "epoch": 39.37, + "learning_rate": 4.507929104477612e-05, + "loss": 0.0, + "step": 42200 + }, + { + "epoch": 39.37, + "learning_rate": 4.507882462686567e-05, + "loss": 0.0002, + "step": 42204 + }, + { + "epoch": 39.37, + "learning_rate": 4.5078358208955226e-05, + "loss": 0.0, + "step": 42208 + }, + { + "epoch": 39.38, + "learning_rate": 4.507789179104478e-05, + "loss": 0.0001, + "step": 42212 + }, + { + "epoch": 39.38, + "learning_rate": 4.507742537313433e-05, + "loss": 0.0, + "step": 42216 + }, + { + "epoch": 39.38, + "learning_rate": 4.5076958955223884e-05, + "loss": 0.0, + "step": 42220 + }, + { + "epoch": 39.39, + "learning_rate": 4.507649253731344e-05, + "loss": 0.0002, + "step": 42224 + }, + { + "epoch": 39.39, + "learning_rate": 4.5076026119402987e-05, + "loss": 0.0001, + "step": 42228 + }, + { + "epoch": 39.4, + "learning_rate": 4.5075559701492535e-05, + "loss": 0.0001, + "step": 42232 + }, + { + "epoch": 39.4, + "learning_rate": 4.507509328358209e-05, + "loss": 0.0, + "step": 42236 + }, + { + "epoch": 39.4, + "learning_rate": 4.5074626865671645e-05, + "loss": 0.0, + "step": 42240 + }, + { + "epoch": 39.41, + "learning_rate": 4.50741604477612e-05, + "loss": 0.0002, + "step": 42244 + }, + { + "epoch": 39.41, + "learning_rate": 4.507369402985075e-05, + "loss": 0.0, + "step": 42248 + }, + { + "epoch": 39.41, + "learning_rate": 4.50732276119403e-05, + "loss": 0.0, + "step": 42252 + }, + { + "epoch": 39.42, + "learning_rate": 4.507276119402986e-05, + "loss": 0.0021, + "step": 42256 + }, + { + "epoch": 39.42, + "learning_rate": 4.5072294776119406e-05, + "loss": 0.0, + "step": 42260 + }, + { + "epoch": 39.43, + "learning_rate": 4.5071828358208954e-05, + "loss": 0.0011, + "step": 42264 + }, + { + "epoch": 39.43, + "learning_rate": 4.5071361940298515e-05, + "loss": 0.0001, + "step": 42268 + }, + { + "epoch": 39.43, + "learning_rate": 4.5070895522388063e-05, + "loss": 0.0, + "step": 42272 + }, + { + "epoch": 39.44, + "learning_rate": 4.507042910447761e-05, + "loss": 0.0, + "step": 42276 + }, + { + "epoch": 39.44, + "learning_rate": 4.5069962686567166e-05, + "loss": 0.0004, + "step": 42280 + }, + { + "epoch": 39.44, + "learning_rate": 4.506949626865672e-05, + "loss": 0.003, + "step": 42284 + }, + { + "epoch": 39.45, + "learning_rate": 4.506902985074627e-05, + "loss": 0.0007, + "step": 42288 + }, + { + "epoch": 39.45, + "learning_rate": 4.5068563432835824e-05, + "loss": 0.0001, + "step": 42292 + }, + { + "epoch": 39.46, + "learning_rate": 4.506809701492537e-05, + "loss": 0.0, + "step": 42296 + }, + { + "epoch": 39.46, + "learning_rate": 4.506763059701493e-05, + "loss": 0.0012, + "step": 42300 + }, + { + "epoch": 39.46, + "learning_rate": 4.506716417910448e-05, + "loss": 0.0109, + "step": 42304 + }, + { + "epoch": 39.47, + "learning_rate": 4.506669776119403e-05, + "loss": 0.0, + "step": 42308 + }, + { + "epoch": 39.47, + "learning_rate": 4.5066231343283585e-05, + "loss": 0.0053, + "step": 42312 + }, + { + "epoch": 39.47, + "learning_rate": 4.506576492537314e-05, + "loss": 0.0001, + "step": 42316 + }, + { + "epoch": 39.48, + "learning_rate": 4.506529850746269e-05, + "loss": 0.0001, + "step": 42320 + }, + { + "epoch": 39.48, + "learning_rate": 4.5064832089552237e-05, + "loss": 0.0001, + "step": 42324 + }, + { + "epoch": 39.49, + "learning_rate": 4.50643656716418e-05, + "loss": 0.0002, + "step": 42328 + }, + { + "epoch": 39.49, + "learning_rate": 4.5063899253731346e-05, + "loss": 0.0001, + "step": 42332 + }, + { + "epoch": 39.49, + "learning_rate": 4.5063432835820895e-05, + "loss": 0.0, + "step": 42336 + }, + { + "epoch": 39.5, + "learning_rate": 4.506296641791045e-05, + "loss": 0.0001, + "step": 42340 + }, + { + "epoch": 39.5, + "learning_rate": 4.5062500000000004e-05, + "loss": 0.0037, + "step": 42344 + }, + { + "epoch": 39.5, + "learning_rate": 4.506203358208955e-05, + "loss": 0.0001, + "step": 42348 + }, + { + "epoch": 39.51, + "learning_rate": 4.506156716417911e-05, + "loss": 0.0001, + "step": 42352 + }, + { + "epoch": 39.51, + "learning_rate": 4.5061100746268656e-05, + "loss": 0.0, + "step": 42356 + }, + { + "epoch": 39.51, + "learning_rate": 4.506063432835821e-05, + "loss": 0.0, + "step": 42360 + }, + { + "epoch": 39.52, + "learning_rate": 4.5060167910447765e-05, + "loss": 0.0, + "step": 42364 + }, + { + "epoch": 39.52, + "learning_rate": 4.5059701492537313e-05, + "loss": 0.0001, + "step": 42368 + }, + { + "epoch": 39.53, + "learning_rate": 4.505923507462687e-05, + "loss": 0.0, + "step": 42372 + }, + { + "epoch": 39.53, + "learning_rate": 4.505876865671642e-05, + "loss": 0.0001, + "step": 42376 + }, + { + "epoch": 39.53, + "learning_rate": 4.505830223880597e-05, + "loss": 0.0, + "step": 42380 + }, + { + "epoch": 39.54, + "learning_rate": 4.505783582089552e-05, + "loss": 0.0001, + "step": 42384 + }, + { + "epoch": 39.54, + "learning_rate": 4.505736940298508e-05, + "loss": 0.0001, + "step": 42388 + }, + { + "epoch": 39.54, + "learning_rate": 4.505690298507463e-05, + "loss": 0.0004, + "step": 42392 + }, + { + "epoch": 39.55, + "learning_rate": 4.505643656716418e-05, + "loss": 0.0005, + "step": 42396 + }, + { + "epoch": 39.55, + "learning_rate": 4.505597014925373e-05, + "loss": 0.0, + "step": 42400 + }, + { + "epoch": 39.56, + "learning_rate": 4.505550373134329e-05, + "loss": 0.0008, + "step": 42404 + }, + { + "epoch": 39.56, + "learning_rate": 4.505503731343284e-05, + "loss": 0.0, + "step": 42408 + }, + { + "epoch": 39.56, + "learning_rate": 4.505457089552239e-05, + "loss": 0.0002, + "step": 42412 + }, + { + "epoch": 39.57, + "learning_rate": 4.505410447761194e-05, + "loss": 0.0011, + "step": 42416 + }, + { + "epoch": 39.57, + "learning_rate": 4.50536380597015e-05, + "loss": 0.0007, + "step": 42420 + }, + { + "epoch": 39.57, + "learning_rate": 4.505317164179105e-05, + "loss": 0.0, + "step": 42424 + }, + { + "epoch": 39.58, + "learning_rate": 4.5052705223880596e-05, + "loss": 0.0, + "step": 42428 + }, + { + "epoch": 39.58, + "learning_rate": 4.505223880597015e-05, + "loss": 0.0001, + "step": 42432 + }, + { + "epoch": 39.59, + "learning_rate": 4.5051772388059706e-05, + "loss": 0.0001, + "step": 42436 + }, + { + "epoch": 39.59, + "learning_rate": 4.5051305970149254e-05, + "loss": 0.0003, + "step": 42440 + }, + { + "epoch": 39.59, + "learning_rate": 4.505083955223881e-05, + "loss": 0.0003, + "step": 42444 + }, + { + "epoch": 39.6, + "learning_rate": 4.5050373134328364e-05, + "loss": 0.0001, + "step": 42448 + }, + { + "epoch": 39.6, + "learning_rate": 4.504990671641791e-05, + "loss": 0.0006, + "step": 42452 + }, + { + "epoch": 39.6, + "learning_rate": 4.504944029850747e-05, + "loss": 0.0, + "step": 42456 + }, + { + "epoch": 39.61, + "learning_rate": 4.5048973880597015e-05, + "loss": 0.0004, + "step": 42460 + }, + { + "epoch": 39.61, + "learning_rate": 4.504850746268657e-05, + "loss": 0.0001, + "step": 42464 + }, + { + "epoch": 39.62, + "learning_rate": 4.5048041044776125e-05, + "loss": 0.0, + "step": 42468 + }, + { + "epoch": 39.62, + "learning_rate": 4.504757462686567e-05, + "loss": 0.0, + "step": 42472 + }, + { + "epoch": 39.62, + "learning_rate": 4.504710820895522e-05, + "loss": 0.0008, + "step": 42476 + }, + { + "epoch": 39.63, + "learning_rate": 4.504664179104478e-05, + "loss": 0.0, + "step": 42480 + }, + { + "epoch": 39.63, + "learning_rate": 4.504617537313433e-05, + "loss": 0.0015, + "step": 42484 + }, + { + "epoch": 39.63, + "learning_rate": 4.504570895522388e-05, + "loss": 0.0004, + "step": 42488 + }, + { + "epoch": 39.64, + "learning_rate": 4.5045242537313434e-05, + "loss": 0.0001, + "step": 42492 + }, + { + "epoch": 39.64, + "learning_rate": 4.504477611940299e-05, + "loss": 0.0002, + "step": 42496 + }, + { + "epoch": 39.65, + "learning_rate": 4.504430970149254e-05, + "loss": 0.0002, + "step": 42500 + }, + { + "epoch": 39.65, + "eval_exact_match": 0.7253384912959381, + "eval_exec": 0.758220502901354, + "eval_loss": 0.42694738507270813, + "eval_runtime": 1134.6223, + "eval_samples_per_second": 0.911, + "step": 42500 + }, + { + "epoch": 39.65, + "learning_rate": 4.504384328358209e-05, + "loss": 0.0001, + "step": 42504 + }, + { + "epoch": 39.65, + "learning_rate": 4.504337686567164e-05, + "loss": 0.0, + "step": 42508 + }, + { + "epoch": 39.66, + "learning_rate": 4.5042910447761195e-05, + "loss": 0.0, + "step": 42512 + }, + { + "epoch": 39.66, + "learning_rate": 4.504244402985075e-05, + "loss": 0.0001, + "step": 42516 + }, + { + "epoch": 39.66, + "learning_rate": 4.50419776119403e-05, + "loss": 0.0, + "step": 42520 + }, + { + "epoch": 39.67, + "learning_rate": 4.504151119402985e-05, + "loss": 0.0001, + "step": 42524 + }, + { + "epoch": 39.67, + "learning_rate": 4.504104477611941e-05, + "loss": 0.0, + "step": 42528 + }, + { + "epoch": 39.68, + "learning_rate": 4.5040578358208956e-05, + "loss": 0.0, + "step": 42532 + }, + { + "epoch": 39.68, + "learning_rate": 4.5040111940298504e-05, + "loss": 0.0016, + "step": 42536 + }, + { + "epoch": 39.68, + "learning_rate": 4.5039645522388066e-05, + "loss": 0.0002, + "step": 42540 + }, + { + "epoch": 39.69, + "learning_rate": 4.5039179104477614e-05, + "loss": 0.0003, + "step": 42544 + }, + { + "epoch": 39.69, + "learning_rate": 4.503871268656716e-05, + "loss": 0.0, + "step": 42548 + }, + { + "epoch": 39.69, + "learning_rate": 4.503824626865672e-05, + "loss": 0.0002, + "step": 42552 + }, + { + "epoch": 39.7, + "learning_rate": 4.503777985074627e-05, + "loss": 0.0, + "step": 42556 + }, + { + "epoch": 39.7, + "learning_rate": 4.503731343283582e-05, + "loss": 0.0, + "step": 42560 + }, + { + "epoch": 39.71, + "learning_rate": 4.5036847014925375e-05, + "loss": 0.0002, + "step": 42564 + }, + { + "epoch": 39.71, + "learning_rate": 4.503638059701492e-05, + "loss": 0.0001, + "step": 42568 + }, + { + "epoch": 39.71, + "learning_rate": 4.5035914179104485e-05, + "loss": 0.0001, + "step": 42572 + }, + { + "epoch": 39.72, + "learning_rate": 4.503544776119403e-05, + "loss": 0.0008, + "step": 42576 + }, + { + "epoch": 39.72, + "learning_rate": 4.503498134328358e-05, + "loss": 0.0001, + "step": 42580 + }, + { + "epoch": 39.72, + "learning_rate": 4.5034514925373136e-05, + "loss": 0.0006, + "step": 42584 + }, + { + "epoch": 39.73, + "learning_rate": 4.503404850746269e-05, + "loss": 0.0005, + "step": 42588 + }, + { + "epoch": 39.73, + "learning_rate": 4.503358208955224e-05, + "loss": 0.0, + "step": 42592 + }, + { + "epoch": 39.73, + "learning_rate": 4.5033115671641794e-05, + "loss": 0.0, + "step": 42596 + }, + { + "epoch": 39.74, + "learning_rate": 4.503264925373135e-05, + "loss": 0.0001, + "step": 42600 + }, + { + "epoch": 39.74, + "learning_rate": 4.50321828358209e-05, + "loss": 0.0, + "step": 42604 + }, + { + "epoch": 39.75, + "learning_rate": 4.503171641791045e-05, + "loss": 0.0004, + "step": 42608 + }, + { + "epoch": 39.75, + "learning_rate": 4.503125e-05, + "loss": 0.0001, + "step": 42612 + }, + { + "epoch": 39.75, + "learning_rate": 4.5030783582089555e-05, + "loss": 0.0002, + "step": 42616 + }, + { + "epoch": 39.76, + "learning_rate": 4.503031716417911e-05, + "loss": 0.0, + "step": 42620 + }, + { + "epoch": 39.76, + "learning_rate": 4.502985074626866e-05, + "loss": 0.0, + "step": 42624 + }, + { + "epoch": 39.76, + "learning_rate": 4.5029384328358206e-05, + "loss": 0.0, + "step": 42628 + }, + { + "epoch": 39.77, + "learning_rate": 4.502891791044777e-05, + "loss": 0.0, + "step": 42632 + }, + { + "epoch": 39.77, + "learning_rate": 4.5028451492537316e-05, + "loss": 0.0, + "step": 42636 + }, + { + "epoch": 39.78, + "learning_rate": 4.5027985074626864e-05, + "loss": 0.0001, + "step": 42640 + }, + { + "epoch": 39.78, + "learning_rate": 4.502751865671642e-05, + "loss": 0.0004, + "step": 42644 + }, + { + "epoch": 39.78, + "learning_rate": 4.5027052238805974e-05, + "loss": 0.0, + "step": 42648 + }, + { + "epoch": 39.79, + "learning_rate": 4.502658582089552e-05, + "loss": 0.0, + "step": 42652 + }, + { + "epoch": 39.79, + "learning_rate": 4.502611940298508e-05, + "loss": 0.0, + "step": 42656 + }, + { + "epoch": 39.79, + "learning_rate": 4.502565298507463e-05, + "loss": 0.0002, + "step": 42660 + }, + { + "epoch": 39.8, + "learning_rate": 4.502518656716418e-05, + "loss": 0.0, + "step": 42664 + }, + { + "epoch": 39.8, + "learning_rate": 4.5024720149253735e-05, + "loss": 0.0007, + "step": 42668 + }, + { + "epoch": 39.81, + "learning_rate": 4.502425373134328e-05, + "loss": 0.0002, + "step": 42672 + }, + { + "epoch": 39.81, + "learning_rate": 4.502378731343284e-05, + "loss": 0.0, + "step": 42676 + }, + { + "epoch": 39.81, + "learning_rate": 4.502332089552239e-05, + "loss": 0.0, + "step": 42680 + }, + { + "epoch": 39.82, + "learning_rate": 4.502285447761194e-05, + "loss": 0.0008, + "step": 42684 + }, + { + "epoch": 39.82, + "learning_rate": 4.502238805970149e-05, + "loss": 0.0002, + "step": 42688 + }, + { + "epoch": 39.82, + "learning_rate": 4.502192164179105e-05, + "loss": 0.0004, + "step": 42692 + }, + { + "epoch": 39.83, + "learning_rate": 4.50214552238806e-05, + "loss": 0.0031, + "step": 42696 + }, + { + "epoch": 39.83, + "learning_rate": 4.502098880597015e-05, + "loss": 0.0, + "step": 42700 + }, + { + "epoch": 39.84, + "learning_rate": 4.50205223880597e-05, + "loss": 0.0003, + "step": 42704 + }, + { + "epoch": 39.84, + "learning_rate": 4.502005597014926e-05, + "loss": 0.0, + "step": 42708 + }, + { + "epoch": 39.84, + "learning_rate": 4.5019589552238805e-05, + "loss": 0.001, + "step": 42712 + }, + { + "epoch": 39.85, + "learning_rate": 4.501912313432836e-05, + "loss": 0.0, + "step": 42716 + }, + { + "epoch": 39.85, + "learning_rate": 4.5018656716417915e-05, + "loss": 0.0, + "step": 42720 + }, + { + "epoch": 39.85, + "learning_rate": 4.501819029850746e-05, + "loss": 0.0, + "step": 42724 + }, + { + "epoch": 39.86, + "learning_rate": 4.501772388059702e-05, + "loss": 0.0001, + "step": 42728 + }, + { + "epoch": 39.86, + "learning_rate": 4.5017257462686566e-05, + "loss": 0.0026, + "step": 42732 + }, + { + "epoch": 39.87, + "learning_rate": 4.501679104477612e-05, + "loss": 0.0008, + "step": 42736 + }, + { + "epoch": 39.87, + "learning_rate": 4.5016324626865676e-05, + "loss": 0.0, + "step": 42740 + }, + { + "epoch": 39.87, + "learning_rate": 4.5015858208955224e-05, + "loss": 0.0, + "step": 42744 + }, + { + "epoch": 39.88, + "learning_rate": 4.501539179104478e-05, + "loss": 0.0001, + "step": 42748 + }, + { + "epoch": 39.88, + "learning_rate": 4.5014925373134334e-05, + "loss": 0.0, + "step": 42752 + }, + { + "epoch": 39.88, + "learning_rate": 4.501445895522388e-05, + "loss": 0.0, + "step": 42756 + }, + { + "epoch": 39.89, + "learning_rate": 4.501399253731344e-05, + "loss": 0.0001, + "step": 42760 + }, + { + "epoch": 39.89, + "learning_rate": 4.5013526119402985e-05, + "loss": 0.0055, + "step": 42764 + }, + { + "epoch": 39.9, + "learning_rate": 4.501305970149254e-05, + "loss": 0.0, + "step": 42768 + }, + { + "epoch": 39.9, + "learning_rate": 4.5012593283582095e-05, + "loss": 0.0006, + "step": 42772 + }, + { + "epoch": 39.9, + "learning_rate": 4.501212686567164e-05, + "loss": 0.0, + "step": 42776 + }, + { + "epoch": 39.91, + "learning_rate": 4.50116604477612e-05, + "loss": 0.0004, + "step": 42780 + }, + { + "epoch": 39.91, + "learning_rate": 4.501119402985075e-05, + "loss": 0.0001, + "step": 42784 + }, + { + "epoch": 39.91, + "learning_rate": 4.50107276119403e-05, + "loss": 0.0001, + "step": 42788 + }, + { + "epoch": 39.92, + "learning_rate": 4.501026119402985e-05, + "loss": 0.0006, + "step": 42792 + }, + { + "epoch": 39.92, + "learning_rate": 4.5009794776119404e-05, + "loss": 0.0028, + "step": 42796 + }, + { + "epoch": 39.93, + "learning_rate": 4.500932835820896e-05, + "loss": 0.0006, + "step": 42800 + }, + { + "epoch": 39.93, + "learning_rate": 4.500886194029851e-05, + "loss": 0.0002, + "step": 42804 + }, + { + "epoch": 39.93, + "learning_rate": 4.500839552238806e-05, + "loss": 0.0001, + "step": 42808 + }, + { + "epoch": 39.94, + "learning_rate": 4.500792910447762e-05, + "loss": 0.0001, + "step": 42812 + }, + { + "epoch": 39.94, + "learning_rate": 4.5007462686567165e-05, + "loss": 0.0001, + "step": 42816 + }, + { + "epoch": 39.94, + "learning_rate": 4.500699626865672e-05, + "loss": 0.0, + "step": 42820 + }, + { + "epoch": 39.95, + "learning_rate": 4.500652985074627e-05, + "loss": 0.0002, + "step": 42824 + }, + { + "epoch": 39.95, + "learning_rate": 4.500606343283582e-05, + "loss": 0.0001, + "step": 42828 + }, + { + "epoch": 39.96, + "learning_rate": 4.500559701492538e-05, + "loss": 0.0004, + "step": 42832 + }, + { + "epoch": 39.96, + "learning_rate": 4.5005130597014926e-05, + "loss": 0.0, + "step": 42836 + }, + { + "epoch": 39.96, + "learning_rate": 4.500466417910448e-05, + "loss": 0.0006, + "step": 42840 + }, + { + "epoch": 39.97, + "learning_rate": 4.5004197761194036e-05, + "loss": 0.0023, + "step": 42844 + }, + { + "epoch": 39.97, + "learning_rate": 4.5003731343283584e-05, + "loss": 0.0, + "step": 42848 + }, + { + "epoch": 39.97, + "learning_rate": 4.500326492537313e-05, + "loss": 0.0045, + "step": 42852 + }, + { + "epoch": 39.98, + "learning_rate": 4.500279850746269e-05, + "loss": 0.0001, + "step": 42856 + }, + { + "epoch": 39.98, + "learning_rate": 4.500233208955224e-05, + "loss": 0.0001, + "step": 42860 + }, + { + "epoch": 39.98, + "learning_rate": 4.500186567164179e-05, + "loss": 0.0188, + "step": 42864 + }, + { + "epoch": 39.99, + "learning_rate": 4.5001399253731345e-05, + "loss": 0.0, + "step": 42868 + }, + { + "epoch": 39.99, + "learning_rate": 4.50009328358209e-05, + "loss": 0.0, + "step": 42872 + }, + { + "epoch": 40.0, + "learning_rate": 4.500046641791045e-05, + "loss": 0.0005, + "step": 42876 + }, + { + "epoch": 40.0, + "learning_rate": 4.5e-05, + "loss": 0.0057, + "step": 42880 + }, + { + "epoch": 40.0, + "learning_rate": 4.499953358208955e-05, + "loss": 0.0001, + "step": 42884 + }, + { + "epoch": 40.01, + "learning_rate": 4.4999067164179106e-05, + "loss": 0.0001, + "step": 42888 + }, + { + "epoch": 40.01, + "learning_rate": 4.499860074626866e-05, + "loss": 0.0, + "step": 42892 + }, + { + "epoch": 40.01, + "learning_rate": 4.499813432835821e-05, + "loss": 0.0, + "step": 42896 + }, + { + "epoch": 40.02, + "learning_rate": 4.4997667910447764e-05, + "loss": 0.0, + "step": 42900 + }, + { + "epoch": 40.02, + "learning_rate": 4.499720149253732e-05, + "loss": 0.0, + "step": 42904 + }, + { + "epoch": 40.03, + "learning_rate": 4.499673507462687e-05, + "loss": 0.0001, + "step": 42908 + }, + { + "epoch": 40.03, + "learning_rate": 4.499626865671642e-05, + "loss": 0.0015, + "step": 42912 + }, + { + "epoch": 40.03, + "learning_rate": 4.499580223880597e-05, + "loss": 0.001, + "step": 42916 + }, + { + "epoch": 40.04, + "learning_rate": 4.4995335820895525e-05, + "loss": 0.0001, + "step": 42920 + }, + { + "epoch": 40.04, + "learning_rate": 4.499486940298508e-05, + "loss": 0.0004, + "step": 42924 + }, + { + "epoch": 40.04, + "learning_rate": 4.499440298507463e-05, + "loss": 0.0, + "step": 42928 + }, + { + "epoch": 40.05, + "learning_rate": 4.499393656716418e-05, + "loss": 0.0004, + "step": 42932 + }, + { + "epoch": 40.05, + "learning_rate": 4.499347014925374e-05, + "loss": 0.0017, + "step": 42936 + }, + { + "epoch": 40.06, + "learning_rate": 4.4993003731343286e-05, + "loss": 0.0006, + "step": 42940 + }, + { + "epoch": 40.06, + "learning_rate": 4.4992537313432834e-05, + "loss": 0.0, + "step": 42944 + }, + { + "epoch": 40.06, + "learning_rate": 4.4992070895522396e-05, + "loss": 0.0004, + "step": 42948 + }, + { + "epoch": 40.07, + "learning_rate": 4.4991604477611944e-05, + "loss": 0.0, + "step": 42952 + }, + { + "epoch": 40.07, + "learning_rate": 4.499113805970149e-05, + "loss": 0.0001, + "step": 42956 + }, + { + "epoch": 40.07, + "learning_rate": 4.499067164179105e-05, + "loss": 0.0, + "step": 42960 + }, + { + "epoch": 40.08, + "learning_rate": 4.49902052238806e-05, + "loss": 0.0, + "step": 42964 + }, + { + "epoch": 40.08, + "learning_rate": 4.498973880597015e-05, + "loss": 0.0002, + "step": 42968 + }, + { + "epoch": 40.09, + "learning_rate": 4.4989272388059705e-05, + "loss": 0.0001, + "step": 42972 + }, + { + "epoch": 40.09, + "learning_rate": 4.498880597014925e-05, + "loss": 0.0001, + "step": 42976 + }, + { + "epoch": 40.09, + "learning_rate": 4.498833955223881e-05, + "loss": 0.0, + "step": 42980 + }, + { + "epoch": 40.1, + "learning_rate": 4.498787313432836e-05, + "loss": 0.0001, + "step": 42984 + }, + { + "epoch": 40.1, + "learning_rate": 4.498740671641791e-05, + "loss": 0.0, + "step": 42988 + }, + { + "epoch": 40.1, + "learning_rate": 4.4986940298507466e-05, + "loss": 0.0, + "step": 42992 + }, + { + "epoch": 40.11, + "learning_rate": 4.498647388059702e-05, + "loss": 0.0001, + "step": 42996 + }, + { + "epoch": 40.11, + "learning_rate": 4.498600746268657e-05, + "loss": 0.0, + "step": 43000 + }, + { + "epoch": 40.11, + "eval_exact_match": 0.7475822050290135, + "eval_exec": 0.7736943907156673, + "eval_loss": 0.4348256289958954, + "eval_runtime": 1267.9199, + "eval_samples_per_second": 0.816, + "step": 43000 + }, + { + "epoch": 40.12, + "learning_rate": 4.498554104477612e-05, + "loss": 0.0, + "step": 43004 + }, + { + "epoch": 40.12, + "learning_rate": 4.498507462686568e-05, + "loss": 0.0, + "step": 43008 + }, + { + "epoch": 40.12, + "learning_rate": 4.498460820895523e-05, + "loss": 0.0, + "step": 43012 + }, + { + "epoch": 40.13, + "learning_rate": 4.4984141791044775e-05, + "loss": 0.0013, + "step": 43016 + }, + { + "epoch": 40.13, + "learning_rate": 4.498367537313433e-05, + "loss": 0.0, + "step": 43020 + }, + { + "epoch": 40.13, + "learning_rate": 4.4983208955223885e-05, + "loss": 0.0, + "step": 43024 + }, + { + "epoch": 40.14, + "learning_rate": 4.498274253731343e-05, + "loss": 0.0024, + "step": 43028 + }, + { + "epoch": 40.14, + "learning_rate": 4.498227611940299e-05, + "loss": 0.0, + "step": 43032 + }, + { + "epoch": 40.15, + "learning_rate": 4.4981809701492536e-05, + "loss": 0.0001, + "step": 43036 + }, + { + "epoch": 40.15, + "learning_rate": 4.498134328358209e-05, + "loss": 0.0, + "step": 43040 + }, + { + "epoch": 40.15, + "learning_rate": 4.4980876865671646e-05, + "loss": 0.0001, + "step": 43044 + }, + { + "epoch": 40.16, + "learning_rate": 4.4980410447761194e-05, + "loss": 0.0001, + "step": 43048 + }, + { + "epoch": 40.16, + "learning_rate": 4.497994402985075e-05, + "loss": 0.0, + "step": 43052 + }, + { + "epoch": 40.16, + "learning_rate": 4.4979477611940304e-05, + "loss": 0.0, + "step": 43056 + }, + { + "epoch": 40.17, + "learning_rate": 4.497901119402985e-05, + "loss": 0.0, + "step": 43060 + }, + { + "epoch": 40.17, + "learning_rate": 4.497854477611941e-05, + "loss": 0.0, + "step": 43064 + }, + { + "epoch": 40.18, + "learning_rate": 4.497807835820896e-05, + "loss": 0.0012, + "step": 43068 + }, + { + "epoch": 40.18, + "learning_rate": 4.497761194029851e-05, + "loss": 0.0, + "step": 43072 + }, + { + "epoch": 40.18, + "learning_rate": 4.4977145522388065e-05, + "loss": 0.0006, + "step": 43076 + }, + { + "epoch": 40.19, + "learning_rate": 4.497667910447761e-05, + "loss": 0.0001, + "step": 43080 + }, + { + "epoch": 40.19, + "learning_rate": 4.497621268656717e-05, + "loss": 0.0, + "step": 43084 + }, + { + "epoch": 40.19, + "learning_rate": 4.497574626865672e-05, + "loss": 0.0, + "step": 43088 + }, + { + "epoch": 40.2, + "learning_rate": 4.497527985074627e-05, + "loss": 0.0001, + "step": 43092 + }, + { + "epoch": 40.2, + "learning_rate": 4.497481343283582e-05, + "loss": 0.0, + "step": 43096 + }, + { + "epoch": 40.21, + "learning_rate": 4.497434701492538e-05, + "loss": 0.0, + "step": 43100 + }, + { + "epoch": 40.21, + "learning_rate": 4.497388059701493e-05, + "loss": 0.0004, + "step": 43104 + }, + { + "epoch": 40.21, + "learning_rate": 4.497341417910448e-05, + "loss": 0.0, + "step": 43108 + }, + { + "epoch": 40.22, + "learning_rate": 4.497294776119403e-05, + "loss": 0.0, + "step": 43112 + }, + { + "epoch": 40.22, + "learning_rate": 4.4972481343283587e-05, + "loss": 0.0, + "step": 43116 + }, + { + "epoch": 40.22, + "learning_rate": 4.4972014925373135e-05, + "loss": 0.0003, + "step": 43120 + }, + { + "epoch": 40.23, + "learning_rate": 4.497154850746269e-05, + "loss": 0.0002, + "step": 43124 + }, + { + "epoch": 40.23, + "learning_rate": 4.4971082089552244e-05, + "loss": 0.0001, + "step": 43128 + }, + { + "epoch": 40.24, + "learning_rate": 4.497061567164179e-05, + "loss": 0.0003, + "step": 43132 + }, + { + "epoch": 40.24, + "learning_rate": 4.497014925373135e-05, + "loss": 0.0, + "step": 43136 + }, + { + "epoch": 40.24, + "learning_rate": 4.4969682835820896e-05, + "loss": 0.0018, + "step": 43140 + }, + { + "epoch": 40.25, + "learning_rate": 4.496921641791045e-05, + "loss": 0.0003, + "step": 43144 + }, + { + "epoch": 40.25, + "learning_rate": 4.4968750000000005e-05, + "loss": 0.0001, + "step": 43148 + }, + { + "epoch": 40.25, + "learning_rate": 4.4968283582089554e-05, + "loss": 0.0001, + "step": 43152 + }, + { + "epoch": 40.26, + "learning_rate": 4.49678171641791e-05, + "loss": 0.0, + "step": 43156 + }, + { + "epoch": 40.26, + "learning_rate": 4.4967350746268663e-05, + "loss": 0.0, + "step": 43160 + }, + { + "epoch": 40.26, + "learning_rate": 4.496688432835821e-05, + "loss": 0.0, + "step": 43164 + }, + { + "epoch": 40.27, + "learning_rate": 4.496641791044776e-05, + "loss": 0.0013, + "step": 43168 + }, + { + "epoch": 40.27, + "learning_rate": 4.4965951492537315e-05, + "loss": 0.0022, + "step": 43172 + }, + { + "epoch": 40.28, + "learning_rate": 4.496548507462687e-05, + "loss": 0.0003, + "step": 43176 + }, + { + "epoch": 40.28, + "learning_rate": 4.496501865671642e-05, + "loss": 0.0008, + "step": 43180 + }, + { + "epoch": 40.28, + "learning_rate": 4.496455223880597e-05, + "loss": 0.0, + "step": 43184 + }, + { + "epoch": 40.29, + "learning_rate": 4.496408582089552e-05, + "loss": 0.0001, + "step": 43188 + }, + { + "epoch": 40.29, + "learning_rate": 4.4963619402985076e-05, + "loss": 0.0, + "step": 43192 + }, + { + "epoch": 40.29, + "learning_rate": 4.496315298507463e-05, + "loss": 0.0001, + "step": 43196 + }, + { + "epoch": 40.3, + "learning_rate": 4.496268656716418e-05, + "loss": 0.0, + "step": 43200 + }, + { + "epoch": 40.3, + "learning_rate": 4.4962220149253734e-05, + "loss": 0.0001, + "step": 43204 + }, + { + "epoch": 40.31, + "learning_rate": 4.496175373134329e-05, + "loss": 0.0001, + "step": 43208 + }, + { + "epoch": 40.31, + "learning_rate": 4.4961287313432837e-05, + "loss": 0.0, + "step": 43212 + }, + { + "epoch": 40.31, + "learning_rate": 4.4960820895522385e-05, + "loss": 0.0008, + "step": 43216 + }, + { + "epoch": 40.32, + "learning_rate": 4.4960354477611946e-05, + "loss": 0.0002, + "step": 43220 + }, + { + "epoch": 40.32, + "learning_rate": 4.4959888059701494e-05, + "loss": 0.0, + "step": 43224 + }, + { + "epoch": 40.32, + "learning_rate": 4.495942164179105e-05, + "loss": 0.0025, + "step": 43228 + }, + { + "epoch": 40.33, + "learning_rate": 4.49589552238806e-05, + "loss": 0.0001, + "step": 43232 + }, + { + "epoch": 40.33, + "learning_rate": 4.495848880597015e-05, + "loss": 0.0, + "step": 43236 + }, + { + "epoch": 40.34, + "learning_rate": 4.495802238805971e-05, + "loss": 0.0032, + "step": 43240 + }, + { + "epoch": 40.34, + "learning_rate": 4.4957555970149255e-05, + "loss": 0.0, + "step": 43244 + }, + { + "epoch": 40.34, + "learning_rate": 4.4957089552238804e-05, + "loss": 0.0, + "step": 43248 + }, + { + "epoch": 40.35, + "learning_rate": 4.4956623134328365e-05, + "loss": 0.0097, + "step": 43252 + }, + { + "epoch": 40.35, + "learning_rate": 4.4956156716417913e-05, + "loss": 0.0077, + "step": 43256 + }, + { + "epoch": 40.35, + "learning_rate": 4.495569029850746e-05, + "loss": 0.0002, + "step": 43260 + }, + { + "epoch": 40.36, + "learning_rate": 4.4955223880597016e-05, + "loss": 0.0002, + "step": 43264 + }, + { + "epoch": 40.36, + "learning_rate": 4.495475746268657e-05, + "loss": 0.0, + "step": 43268 + }, + { + "epoch": 40.37, + "learning_rate": 4.495429104477612e-05, + "loss": 0.0004, + "step": 43272 + }, + { + "epoch": 40.37, + "learning_rate": 4.4953824626865674e-05, + "loss": 0.0, + "step": 43276 + }, + { + "epoch": 40.37, + "learning_rate": 4.495335820895523e-05, + "loss": 0.0011, + "step": 43280 + }, + { + "epoch": 40.38, + "learning_rate": 4.495289179104478e-05, + "loss": 0.0002, + "step": 43284 + }, + { + "epoch": 40.38, + "learning_rate": 4.495242537313433e-05, + "loss": 0.0, + "step": 43288 + }, + { + "epoch": 40.38, + "learning_rate": 4.495195895522388e-05, + "loss": 0.0, + "step": 43292 + }, + { + "epoch": 40.39, + "learning_rate": 4.4951492537313435e-05, + "loss": 0.0039, + "step": 43296 + }, + { + "epoch": 40.39, + "learning_rate": 4.495102611940299e-05, + "loss": 0.0004, + "step": 43300 + }, + { + "epoch": 40.4, + "learning_rate": 4.495055970149254e-05, + "loss": 0.0003, + "step": 43304 + }, + { + "epoch": 40.4, + "learning_rate": 4.4950093283582087e-05, + "loss": 0.0001, + "step": 43308 + }, + { + "epoch": 40.4, + "learning_rate": 4.494962686567165e-05, + "loss": 0.0001, + "step": 43312 + }, + { + "epoch": 40.41, + "learning_rate": 4.4949160447761196e-05, + "loss": 0.0006, + "step": 43316 + }, + { + "epoch": 40.41, + "learning_rate": 4.4948694029850745e-05, + "loss": 0.0, + "step": 43320 + }, + { + "epoch": 40.41, + "learning_rate": 4.49482276119403e-05, + "loss": 0.0, + "step": 43324 + }, + { + "epoch": 40.42, + "learning_rate": 4.4947761194029854e-05, + "loss": 0.0001, + "step": 43328 + }, + { + "epoch": 40.42, + "learning_rate": 4.49472947761194e-05, + "loss": 0.0, + "step": 43332 + }, + { + "epoch": 40.43, + "learning_rate": 4.494682835820896e-05, + "loss": 0.0058, + "step": 43336 + }, + { + "epoch": 40.43, + "learning_rate": 4.494636194029851e-05, + "loss": 0.0001, + "step": 43340 + }, + { + "epoch": 40.43, + "learning_rate": 4.494589552238806e-05, + "loss": 0.0001, + "step": 43344 + }, + { + "epoch": 40.44, + "learning_rate": 4.4945429104477615e-05, + "loss": 0.0006, + "step": 43348 + }, + { + "epoch": 40.44, + "learning_rate": 4.4944962686567163e-05, + "loss": 0.0006, + "step": 43352 + }, + { + "epoch": 40.44, + "learning_rate": 4.494449626865672e-05, + "loss": 0.0001, + "step": 43356 + }, + { + "epoch": 40.45, + "learning_rate": 4.494402985074627e-05, + "loss": 0.0, + "step": 43360 + }, + { + "epoch": 40.45, + "learning_rate": 4.494356343283582e-05, + "loss": 0.0013, + "step": 43364 + }, + { + "epoch": 40.46, + "learning_rate": 4.494309701492537e-05, + "loss": 0.0009, + "step": 43368 + }, + { + "epoch": 40.46, + "learning_rate": 4.494263059701493e-05, + "loss": 0.0001, + "step": 43372 + }, + { + "epoch": 40.46, + "learning_rate": 4.494216417910448e-05, + "loss": 0.0, + "step": 43376 + }, + { + "epoch": 40.47, + "learning_rate": 4.494169776119403e-05, + "loss": 0.0, + "step": 43380 + }, + { + "epoch": 40.47, + "learning_rate": 4.494123134328358e-05, + "loss": 0.0001, + "step": 43384 + }, + { + "epoch": 40.47, + "learning_rate": 4.494076492537314e-05, + "loss": 0.0, + "step": 43388 + }, + { + "epoch": 40.48, + "learning_rate": 4.494029850746269e-05, + "loss": 0.0, + "step": 43392 + }, + { + "epoch": 40.48, + "learning_rate": 4.493983208955224e-05, + "loss": 0.001, + "step": 43396 + }, + { + "epoch": 40.49, + "learning_rate": 4.4939365671641795e-05, + "loss": 0.0, + "step": 43400 + }, + { + "epoch": 40.49, + "learning_rate": 4.493889925373135e-05, + "loss": 0.0, + "step": 43404 + }, + { + "epoch": 40.49, + "learning_rate": 4.49384328358209e-05, + "loss": 0.0, + "step": 43408 + }, + { + "epoch": 40.5, + "learning_rate": 4.4937966417910446e-05, + "loss": 0.0002, + "step": 43412 + }, + { + "epoch": 40.5, + "learning_rate": 4.49375e-05, + "loss": 0.0018, + "step": 43416 + }, + { + "epoch": 40.5, + "learning_rate": 4.4937033582089556e-05, + "loss": 0.0, + "step": 43420 + }, + { + "epoch": 40.51, + "learning_rate": 4.4936567164179104e-05, + "loss": 0.003, + "step": 43424 + }, + { + "epoch": 40.51, + "learning_rate": 4.493610074626866e-05, + "loss": 0.0008, + "step": 43428 + }, + { + "epoch": 40.51, + "learning_rate": 4.4935634328358214e-05, + "loss": 0.0, + "step": 43432 + }, + { + "epoch": 40.52, + "learning_rate": 4.493516791044776e-05, + "loss": 0.009, + "step": 43436 + }, + { + "epoch": 40.52, + "learning_rate": 4.493470149253732e-05, + "loss": 0.0, + "step": 43440 + }, + { + "epoch": 40.53, + "learning_rate": 4.4934235074626865e-05, + "loss": 0.0, + "step": 43444 + }, + { + "epoch": 40.53, + "learning_rate": 4.493376865671642e-05, + "loss": 0.0, + "step": 43448 + }, + { + "epoch": 40.53, + "learning_rate": 4.4933302238805975e-05, + "loss": 0.0, + "step": 43452 + }, + { + "epoch": 40.54, + "learning_rate": 4.493283582089552e-05, + "loss": 0.0004, + "step": 43456 + }, + { + "epoch": 40.54, + "learning_rate": 4.493236940298508e-05, + "loss": 0.0004, + "step": 43460 + }, + { + "epoch": 40.54, + "learning_rate": 4.493190298507463e-05, + "loss": 0.0, + "step": 43464 + }, + { + "epoch": 40.55, + "learning_rate": 4.493143656716418e-05, + "loss": 0.0, + "step": 43468 + }, + { + "epoch": 40.55, + "learning_rate": 4.493097014925373e-05, + "loss": 0.0, + "step": 43472 + }, + { + "epoch": 40.56, + "learning_rate": 4.4930503731343284e-05, + "loss": 0.0001, + "step": 43476 + }, + { + "epoch": 40.56, + "learning_rate": 4.493003731343284e-05, + "loss": 0.0008, + "step": 43480 + }, + { + "epoch": 40.56, + "learning_rate": 4.492957089552239e-05, + "loss": 0.0001, + "step": 43484 + }, + { + "epoch": 40.57, + "learning_rate": 4.492910447761194e-05, + "loss": 0.0, + "step": 43488 + }, + { + "epoch": 40.57, + "learning_rate": 4.49286380597015e-05, + "loss": 0.0012, + "step": 43492 + }, + { + "epoch": 40.57, + "learning_rate": 4.4928171641791045e-05, + "loss": 0.0, + "step": 43496 + }, + { + "epoch": 40.58, + "learning_rate": 4.49277052238806e-05, + "loss": 0.0013, + "step": 43500 + }, + { + "epoch": 40.58, + "eval_exact_match": 0.7292069632495164, + "eval_exec": 0.758220502901354, + "eval_loss": 0.4297082722187042, + "eval_runtime": 1140.8475, + "eval_samples_per_second": 0.906, + "step": 43500 + }, + { + "epoch": 40.58, + "learning_rate": 4.492723880597015e-05, + "loss": 0.0, + "step": 43504 + }, + { + "epoch": 40.59, + "learning_rate": 4.49267723880597e-05, + "loss": 0.0001, + "step": 43508 + }, + { + "epoch": 40.59, + "learning_rate": 4.492630597014926e-05, + "loss": 0.0003, + "step": 43512 + }, + { + "epoch": 40.59, + "learning_rate": 4.4925839552238806e-05, + "loss": 0.0001, + "step": 43516 + }, + { + "epoch": 40.6, + "learning_rate": 4.492537313432836e-05, + "loss": 0.0108, + "step": 43520 + }, + { + "epoch": 40.6, + "learning_rate": 4.4924906716417916e-05, + "loss": 0.0, + "step": 43524 + }, + { + "epoch": 40.6, + "learning_rate": 4.4924440298507464e-05, + "loss": 0.0001, + "step": 43528 + }, + { + "epoch": 40.61, + "learning_rate": 4.492397388059701e-05, + "loss": 0.0, + "step": 43532 + }, + { + "epoch": 40.61, + "learning_rate": 4.492350746268657e-05, + "loss": 0.0001, + "step": 43536 + }, + { + "epoch": 40.62, + "learning_rate": 4.492304104477612e-05, + "loss": 0.0, + "step": 43540 + }, + { + "epoch": 40.62, + "learning_rate": 4.492257462686567e-05, + "loss": 0.005, + "step": 43544 + }, + { + "epoch": 40.62, + "learning_rate": 4.4922108208955225e-05, + "loss": 0.0, + "step": 43548 + }, + { + "epoch": 40.63, + "learning_rate": 4.492164179104478e-05, + "loss": 0.0001, + "step": 43552 + }, + { + "epoch": 40.63, + "learning_rate": 4.4921175373134335e-05, + "loss": 0.0003, + "step": 43556 + }, + { + "epoch": 40.63, + "learning_rate": 4.492070895522388e-05, + "loss": 0.0024, + "step": 43560 + }, + { + "epoch": 40.64, + "learning_rate": 4.492024253731343e-05, + "loss": 0.0004, + "step": 43564 + }, + { + "epoch": 40.64, + "learning_rate": 4.491977611940299e-05, + "loss": 0.0016, + "step": 43568 + }, + { + "epoch": 40.65, + "learning_rate": 4.491930970149254e-05, + "loss": 0.0014, + "step": 43572 + }, + { + "epoch": 40.65, + "learning_rate": 4.491884328358209e-05, + "loss": 0.0, + "step": 43576 + }, + { + "epoch": 40.65, + "learning_rate": 4.4918376865671644e-05, + "loss": 0.0018, + "step": 43580 + }, + { + "epoch": 40.66, + "learning_rate": 4.49179104477612e-05, + "loss": 0.0004, + "step": 43584 + }, + { + "epoch": 40.66, + "learning_rate": 4.491744402985075e-05, + "loss": 0.0006, + "step": 43588 + }, + { + "epoch": 40.66, + "learning_rate": 4.49169776119403e-05, + "loss": 0.0, + "step": 43592 + }, + { + "epoch": 40.67, + "learning_rate": 4.491651119402985e-05, + "loss": 0.0, + "step": 43596 + }, + { + "epoch": 40.67, + "learning_rate": 4.4916044776119405e-05, + "loss": 0.0014, + "step": 43600 + }, + { + "epoch": 40.68, + "learning_rate": 4.491557835820896e-05, + "loss": 0.0001, + "step": 43604 + }, + { + "epoch": 40.68, + "learning_rate": 4.491511194029851e-05, + "loss": 0.0, + "step": 43608 + }, + { + "epoch": 40.68, + "learning_rate": 4.491464552238806e-05, + "loss": 0.0, + "step": 43612 + }, + { + "epoch": 40.69, + "learning_rate": 4.491417910447762e-05, + "loss": 0.0, + "step": 43616 + }, + { + "epoch": 40.69, + "learning_rate": 4.4913712686567166e-05, + "loss": 0.0028, + "step": 43620 + }, + { + "epoch": 40.69, + "learning_rate": 4.4913246268656714e-05, + "loss": 0.0, + "step": 43624 + }, + { + "epoch": 40.7, + "learning_rate": 4.4912779850746276e-05, + "loss": 0.0, + "step": 43628 + }, + { + "epoch": 40.7, + "learning_rate": 4.4912313432835824e-05, + "loss": 0.0001, + "step": 43632 + }, + { + "epoch": 40.71, + "learning_rate": 4.491184701492537e-05, + "loss": 0.0001, + "step": 43636 + }, + { + "epoch": 40.71, + "learning_rate": 4.491138059701493e-05, + "loss": 0.0, + "step": 43640 + }, + { + "epoch": 40.71, + "learning_rate": 4.491091417910448e-05, + "loss": 0.0, + "step": 43644 + }, + { + "epoch": 40.72, + "learning_rate": 4.491044776119403e-05, + "loss": 0.0, + "step": 43648 + }, + { + "epoch": 40.72, + "learning_rate": 4.4909981343283585e-05, + "loss": 0.0002, + "step": 43652 + }, + { + "epoch": 40.72, + "learning_rate": 4.490951492537313e-05, + "loss": 0.0, + "step": 43656 + }, + { + "epoch": 40.73, + "learning_rate": 4.490904850746269e-05, + "loss": 0.0001, + "step": 43660 + }, + { + "epoch": 40.73, + "learning_rate": 4.490858208955224e-05, + "loss": 0.0001, + "step": 43664 + }, + { + "epoch": 40.73, + "learning_rate": 4.490811567164179e-05, + "loss": 0.0, + "step": 43668 + }, + { + "epoch": 40.74, + "learning_rate": 4.4907649253731346e-05, + "loss": 0.0, + "step": 43672 + }, + { + "epoch": 40.74, + "learning_rate": 4.49071828358209e-05, + "loss": 0.0042, + "step": 43676 + }, + { + "epoch": 40.75, + "learning_rate": 4.490671641791045e-05, + "loss": 0.0, + "step": 43680 + }, + { + "epoch": 40.75, + "learning_rate": 4.490625e-05, + "loss": 0.0001, + "step": 43684 + }, + { + "epoch": 40.75, + "learning_rate": 4.490578358208956e-05, + "loss": 0.0001, + "step": 43688 + }, + { + "epoch": 40.76, + "learning_rate": 4.490531716417911e-05, + "loss": 0.0002, + "step": 43692 + }, + { + "epoch": 40.76, + "learning_rate": 4.4904850746268655e-05, + "loss": 0.0001, + "step": 43696 + }, + { + "epoch": 40.76, + "learning_rate": 4.490438432835821e-05, + "loss": 0.0003, + "step": 43700 + }, + { + "epoch": 40.77, + "learning_rate": 4.4903917910447765e-05, + "loss": 0.0, + "step": 43704 + }, + { + "epoch": 40.77, + "learning_rate": 4.490345149253731e-05, + "loss": 0.0001, + "step": 43708 + }, + { + "epoch": 40.78, + "learning_rate": 4.490298507462687e-05, + "loss": 0.0, + "step": 43712 + }, + { + "epoch": 40.78, + "learning_rate": 4.4902518656716416e-05, + "loss": 0.0007, + "step": 43716 + }, + { + "epoch": 40.78, + "learning_rate": 4.490205223880598e-05, + "loss": 0.0001, + "step": 43720 + }, + { + "epoch": 40.79, + "learning_rate": 4.4901585820895526e-05, + "loss": 0.0003, + "step": 43724 + }, + { + "epoch": 40.79, + "learning_rate": 4.4901119402985074e-05, + "loss": 0.0, + "step": 43728 + }, + { + "epoch": 40.79, + "learning_rate": 4.490065298507463e-05, + "loss": 0.0001, + "step": 43732 + }, + { + "epoch": 40.8, + "learning_rate": 4.4900186567164184e-05, + "loss": 0.0014, + "step": 43736 + }, + { + "epoch": 40.8, + "learning_rate": 4.489972014925373e-05, + "loss": 0.0, + "step": 43740 + }, + { + "epoch": 40.81, + "learning_rate": 4.489925373134329e-05, + "loss": 0.0016, + "step": 43744 + }, + { + "epoch": 40.81, + "learning_rate": 4.489878731343284e-05, + "loss": 0.0, + "step": 43748 + }, + { + "epoch": 40.81, + "learning_rate": 4.489832089552239e-05, + "loss": 0.0, + "step": 43752 + }, + { + "epoch": 40.82, + "learning_rate": 4.4897854477611945e-05, + "loss": 0.0, + "step": 43756 + }, + { + "epoch": 40.82, + "learning_rate": 4.489738805970149e-05, + "loss": 0.0, + "step": 43760 + }, + { + "epoch": 40.82, + "learning_rate": 4.489692164179105e-05, + "loss": 0.0001, + "step": 43764 + }, + { + "epoch": 40.83, + "learning_rate": 4.48964552238806e-05, + "loss": 0.0001, + "step": 43768 + }, + { + "epoch": 40.83, + "learning_rate": 4.489598880597015e-05, + "loss": 0.0, + "step": 43772 + }, + { + "epoch": 40.84, + "learning_rate": 4.48955223880597e-05, + "loss": 0.0, + "step": 43776 + }, + { + "epoch": 40.84, + "learning_rate": 4.489505597014926e-05, + "loss": 0.0, + "step": 43780 + }, + { + "epoch": 40.84, + "learning_rate": 4.489458955223881e-05, + "loss": 0.0, + "step": 43784 + }, + { + "epoch": 40.85, + "learning_rate": 4.489412313432836e-05, + "loss": 0.0, + "step": 43788 + }, + { + "epoch": 40.85, + "learning_rate": 4.489365671641791e-05, + "loss": 0.0009, + "step": 43792 + }, + { + "epoch": 40.85, + "learning_rate": 4.489319029850747e-05, + "loss": 0.0001, + "step": 43796 + }, + { + "epoch": 40.86, + "learning_rate": 4.4892723880597015e-05, + "loss": 0.0002, + "step": 43800 + }, + { + "epoch": 40.86, + "learning_rate": 4.489225746268657e-05, + "loss": 0.0, + "step": 43804 + }, + { + "epoch": 40.87, + "learning_rate": 4.4891791044776125e-05, + "loss": 0.0022, + "step": 43808 + }, + { + "epoch": 40.87, + "learning_rate": 4.489132462686567e-05, + "loss": 0.0, + "step": 43812 + }, + { + "epoch": 40.87, + "learning_rate": 4.489085820895523e-05, + "loss": 0.0, + "step": 43816 + }, + { + "epoch": 40.88, + "learning_rate": 4.4890391791044776e-05, + "loss": 0.0017, + "step": 43820 + }, + { + "epoch": 40.88, + "learning_rate": 4.488992537313433e-05, + "loss": 0.0, + "step": 43824 + }, + { + "epoch": 40.88, + "learning_rate": 4.4889458955223886e-05, + "loss": 0.0002, + "step": 43828 + }, + { + "epoch": 40.89, + "learning_rate": 4.4888992537313434e-05, + "loss": 0.0004, + "step": 43832 + }, + { + "epoch": 40.89, + "learning_rate": 4.488852611940298e-05, + "loss": 0.0003, + "step": 43836 + }, + { + "epoch": 40.9, + "learning_rate": 4.4888059701492544e-05, + "loss": 0.0, + "step": 43840 + }, + { + "epoch": 40.9, + "learning_rate": 4.488759328358209e-05, + "loss": 0.0001, + "step": 43844 + }, + { + "epoch": 40.9, + "learning_rate": 4.488712686567164e-05, + "loss": 0.0009, + "step": 43848 + }, + { + "epoch": 40.91, + "learning_rate": 4.4886660447761195e-05, + "loss": 0.0002, + "step": 43852 + }, + { + "epoch": 40.91, + "learning_rate": 4.488619402985075e-05, + "loss": 0.0005, + "step": 43856 + }, + { + "epoch": 40.91, + "learning_rate": 4.48857276119403e-05, + "loss": 0.0003, + "step": 43860 + }, + { + "epoch": 40.92, + "learning_rate": 4.488526119402985e-05, + "loss": 0.0, + "step": 43864 + }, + { + "epoch": 40.92, + "learning_rate": 4.48847947761194e-05, + "loss": 0.003, + "step": 43868 + }, + { + "epoch": 40.93, + "learning_rate": 4.4884328358208956e-05, + "loss": 0.0, + "step": 43872 + }, + { + "epoch": 40.93, + "learning_rate": 4.488386194029851e-05, + "loss": 0.0001, + "step": 43876 + }, + { + "epoch": 40.93, + "learning_rate": 4.488339552238806e-05, + "loss": 0.0018, + "step": 43880 + }, + { + "epoch": 40.94, + "learning_rate": 4.4882929104477614e-05, + "loss": 0.0003, + "step": 43884 + }, + { + "epoch": 40.94, + "learning_rate": 4.488246268656717e-05, + "loss": 0.0004, + "step": 43888 + }, + { + "epoch": 40.94, + "learning_rate": 4.488199626865672e-05, + "loss": 0.0, + "step": 43892 + }, + { + "epoch": 40.95, + "learning_rate": 4.488152985074627e-05, + "loss": 0.0, + "step": 43896 + }, + { + "epoch": 40.95, + "learning_rate": 4.488106343283583e-05, + "loss": 0.0001, + "step": 43900 + }, + { + "epoch": 40.96, + "learning_rate": 4.4880597014925375e-05, + "loss": 0.0001, + "step": 43904 + }, + { + "epoch": 40.96, + "learning_rate": 4.488013059701493e-05, + "loss": 0.0, + "step": 43908 + }, + { + "epoch": 40.96, + "learning_rate": 4.487966417910448e-05, + "loss": 0.0, + "step": 43912 + }, + { + "epoch": 40.97, + "learning_rate": 4.487919776119403e-05, + "loss": 0.0001, + "step": 43916 + }, + { + "epoch": 40.97, + "learning_rate": 4.487873134328359e-05, + "loss": 0.0001, + "step": 43920 + }, + { + "epoch": 40.97, + "learning_rate": 4.4878264925373136e-05, + "loss": 0.0001, + "step": 43924 + }, + { + "epoch": 40.98, + "learning_rate": 4.4877798507462684e-05, + "loss": 0.0003, + "step": 43928 + }, + { + "epoch": 40.98, + "learning_rate": 4.4877332089552246e-05, + "loss": 0.0009, + "step": 43932 + }, + { + "epoch": 40.98, + "learning_rate": 4.4876865671641794e-05, + "loss": 0.0008, + "step": 43936 + }, + { + "epoch": 40.99, + "learning_rate": 4.487639925373134e-05, + "loss": 0.0021, + "step": 43940 + }, + { + "epoch": 40.99, + "learning_rate": 4.48759328358209e-05, + "loss": 0.0, + "step": 43944 + }, + { + "epoch": 41.0, + "learning_rate": 4.487546641791045e-05, + "loss": 0.0003, + "step": 43948 + }, + { + "epoch": 41.0, + "learning_rate": 4.4875e-05, + "loss": 0.0001, + "step": 43952 + }, + { + "epoch": 41.0, + "learning_rate": 4.4874533582089555e-05, + "loss": 0.0001, + "step": 43956 + }, + { + "epoch": 41.01, + "learning_rate": 4.487406716417911e-05, + "loss": 0.0003, + "step": 43960 + }, + { + "epoch": 41.01, + "learning_rate": 4.487360074626866e-05, + "loss": 0.0, + "step": 43964 + }, + { + "epoch": 41.01, + "learning_rate": 4.487313432835821e-05, + "loss": 0.0003, + "step": 43968 + }, + { + "epoch": 41.02, + "learning_rate": 4.487266791044776e-05, + "loss": 0.0003, + "step": 43972 + }, + { + "epoch": 41.02, + "learning_rate": 4.4872201492537316e-05, + "loss": 0.0, + "step": 43976 + }, + { + "epoch": 41.03, + "learning_rate": 4.487173507462687e-05, + "loss": 0.0017, + "step": 43980 + }, + { + "epoch": 41.03, + "learning_rate": 4.487126865671642e-05, + "loss": 0.0002, + "step": 43984 + }, + { + "epoch": 41.03, + "learning_rate": 4.487080223880597e-05, + "loss": 0.0002, + "step": 43988 + }, + { + "epoch": 41.04, + "learning_rate": 4.487033582089553e-05, + "loss": 0.0, + "step": 43992 + }, + { + "epoch": 41.04, + "learning_rate": 4.486986940298508e-05, + "loss": 0.0002, + "step": 43996 + }, + { + "epoch": 41.04, + "learning_rate": 4.4869402985074625e-05, + "loss": 0.0001, + "step": 44000 + }, + { + "epoch": 41.04, + "eval_exact_match": 0.7379110251450677, + "eval_exec": 0.7543520309477756, + "eval_loss": 0.43217331171035767, + "eval_runtime": 1129.1199, + "eval_samples_per_second": 0.916, + "step": 44000 + }, + { + "epoch": 41.05, + "learning_rate": 4.486893656716418e-05, + "loss": 0.0008, + "step": 44004 + }, + { + "epoch": 41.05, + "learning_rate": 4.4868470149253735e-05, + "loss": 0.0, + "step": 44008 + }, + { + "epoch": 41.06, + "learning_rate": 4.486800373134328e-05, + "loss": 0.0001, + "step": 44012 + }, + { + "epoch": 41.06, + "learning_rate": 4.486753731343284e-05, + "loss": 0.0, + "step": 44016 + }, + { + "epoch": 41.06, + "learning_rate": 4.486707089552239e-05, + "loss": 0.0, + "step": 44020 + }, + { + "epoch": 41.07, + "learning_rate": 4.486660447761194e-05, + "loss": 0.0, + "step": 44024 + }, + { + "epoch": 41.07, + "learning_rate": 4.4866138059701496e-05, + "loss": 0.0001, + "step": 44028 + }, + { + "epoch": 41.07, + "learning_rate": 4.4865671641791044e-05, + "loss": 0.0, + "step": 44032 + }, + { + "epoch": 41.08, + "learning_rate": 4.48652052238806e-05, + "loss": 0.0, + "step": 44036 + }, + { + "epoch": 41.08, + "learning_rate": 4.4864738805970154e-05, + "loss": 0.0, + "step": 44040 + }, + { + "epoch": 41.09, + "learning_rate": 4.48642723880597e-05, + "loss": 0.0001, + "step": 44044 + }, + { + "epoch": 41.09, + "learning_rate": 4.4863805970149257e-05, + "loss": 0.0, + "step": 44048 + }, + { + "epoch": 41.09, + "learning_rate": 4.486333955223881e-05, + "loss": 0.0011, + "step": 44052 + }, + { + "epoch": 41.1, + "learning_rate": 4.486287313432836e-05, + "loss": 0.0001, + "step": 44056 + }, + { + "epoch": 41.1, + "learning_rate": 4.4862406716417915e-05, + "loss": 0.0003, + "step": 44060 + }, + { + "epoch": 41.1, + "learning_rate": 4.486194029850746e-05, + "loss": 0.0003, + "step": 44064 + }, + { + "epoch": 41.11, + "learning_rate": 4.486147388059702e-05, + "loss": 0.0001, + "step": 44068 + }, + { + "epoch": 41.11, + "learning_rate": 4.486100746268657e-05, + "loss": 0.0, + "step": 44072 + }, + { + "epoch": 41.12, + "learning_rate": 4.486054104477612e-05, + "loss": 0.0, + "step": 44076 + }, + { + "epoch": 41.12, + "learning_rate": 4.4860074626865676e-05, + "loss": 0.0, + "step": 44080 + }, + { + "epoch": 41.12, + "learning_rate": 4.485960820895523e-05, + "loss": 0.0, + "step": 44084 + }, + { + "epoch": 41.13, + "learning_rate": 4.485914179104478e-05, + "loss": 0.0001, + "step": 44088 + }, + { + "epoch": 41.13, + "learning_rate": 4.485867537313433e-05, + "loss": 0.0, + "step": 44092 + }, + { + "epoch": 41.13, + "learning_rate": 4.485820895522388e-05, + "loss": 0.0001, + "step": 44096 + }, + { + "epoch": 41.14, + "learning_rate": 4.4857742537313437e-05, + "loss": 0.0, + "step": 44100 + }, + { + "epoch": 41.14, + "learning_rate": 4.4857276119402985e-05, + "loss": 0.0001, + "step": 44104 + }, + { + "epoch": 41.15, + "learning_rate": 4.485680970149254e-05, + "loss": 0.0, + "step": 44108 + }, + { + "epoch": 41.15, + "learning_rate": 4.4856343283582094e-05, + "loss": 0.0001, + "step": 44112 + }, + { + "epoch": 41.15, + "learning_rate": 4.485587686567164e-05, + "loss": 0.0, + "step": 44116 + }, + { + "epoch": 41.16, + "learning_rate": 4.48554104477612e-05, + "loss": 0.0, + "step": 44120 + }, + { + "epoch": 41.16, + "learning_rate": 4.4854944029850746e-05, + "loss": 0.0, + "step": 44124 + }, + { + "epoch": 41.16, + "learning_rate": 4.48544776119403e-05, + "loss": 0.0002, + "step": 44128 + }, + { + "epoch": 41.17, + "learning_rate": 4.4854011194029855e-05, + "loss": 0.0001, + "step": 44132 + }, + { + "epoch": 41.17, + "learning_rate": 4.4853544776119404e-05, + "loss": 0.0, + "step": 44136 + }, + { + "epoch": 41.18, + "learning_rate": 4.485307835820896e-05, + "loss": 0.0009, + "step": 44140 + }, + { + "epoch": 41.18, + "learning_rate": 4.4852611940298513e-05, + "loss": 0.0006, + "step": 44144 + }, + { + "epoch": 41.18, + "learning_rate": 4.485214552238806e-05, + "loss": 0.0, + "step": 44148 + }, + { + "epoch": 41.19, + "learning_rate": 4.485167910447761e-05, + "loss": 0.0001, + "step": 44152 + }, + { + "epoch": 41.19, + "learning_rate": 4.4851212686567165e-05, + "loss": 0.0001, + "step": 44156 + }, + { + "epoch": 41.19, + "learning_rate": 4.485074626865672e-05, + "loss": 0.0, + "step": 44160 + }, + { + "epoch": 41.2, + "learning_rate": 4.485027985074627e-05, + "loss": 0.0, + "step": 44164 + }, + { + "epoch": 41.2, + "learning_rate": 4.484981343283582e-05, + "loss": 0.0, + "step": 44168 + }, + { + "epoch": 41.21, + "learning_rate": 4.484934701492538e-05, + "loss": 0.0, + "step": 44172 + }, + { + "epoch": 41.21, + "learning_rate": 4.4848880597014926e-05, + "loss": 0.0, + "step": 44176 + }, + { + "epoch": 41.21, + "learning_rate": 4.484841417910448e-05, + "loss": 0.0, + "step": 44180 + }, + { + "epoch": 41.22, + "learning_rate": 4.484794776119403e-05, + "loss": 0.0023, + "step": 44184 + }, + { + "epoch": 41.22, + "learning_rate": 4.4847481343283583e-05, + "loss": 0.0003, + "step": 44188 + }, + { + "epoch": 41.22, + "learning_rate": 4.484701492537314e-05, + "loss": 0.0, + "step": 44192 + }, + { + "epoch": 41.23, + "learning_rate": 4.4846548507462687e-05, + "loss": 0.0001, + "step": 44196 + }, + { + "epoch": 41.23, + "learning_rate": 4.484608208955224e-05, + "loss": 0.0001, + "step": 44200 + }, + { + "epoch": 41.24, + "learning_rate": 4.4845615671641796e-05, + "loss": 0.0002, + "step": 44204 + }, + { + "epoch": 41.24, + "learning_rate": 4.4845149253731344e-05, + "loss": 0.0, + "step": 44208 + }, + { + "epoch": 41.24, + "learning_rate": 4.48446828358209e-05, + "loss": 0.0, + "step": 44212 + }, + { + "epoch": 41.25, + "learning_rate": 4.484421641791045e-05, + "loss": 0.0, + "step": 44216 + }, + { + "epoch": 41.25, + "learning_rate": 4.484375e-05, + "loss": 0.0, + "step": 44220 + }, + { + "epoch": 41.25, + "learning_rate": 4.484328358208956e-05, + "loss": 0.0004, + "step": 44224 + }, + { + "epoch": 41.26, + "learning_rate": 4.4842817164179105e-05, + "loss": 0.0, + "step": 44228 + }, + { + "epoch": 41.26, + "learning_rate": 4.484235074626866e-05, + "loss": 0.0015, + "step": 44232 + }, + { + "epoch": 41.26, + "learning_rate": 4.4841884328358215e-05, + "loss": 0.0, + "step": 44236 + }, + { + "epoch": 41.27, + "learning_rate": 4.4841417910447763e-05, + "loss": 0.0005, + "step": 44240 + }, + { + "epoch": 41.27, + "learning_rate": 4.484095149253731e-05, + "loss": 0.0, + "step": 44244 + }, + { + "epoch": 41.28, + "learning_rate": 4.484048507462687e-05, + "loss": 0.0, + "step": 44248 + }, + { + "epoch": 41.28, + "learning_rate": 4.484001865671642e-05, + "loss": 0.0001, + "step": 44252 + }, + { + "epoch": 41.28, + "learning_rate": 4.483955223880597e-05, + "loss": 0.0, + "step": 44256 + }, + { + "epoch": 41.29, + "learning_rate": 4.4839085820895524e-05, + "loss": 0.0001, + "step": 44260 + }, + { + "epoch": 41.29, + "learning_rate": 4.483861940298508e-05, + "loss": 0.0003, + "step": 44264 + }, + { + "epoch": 41.29, + "learning_rate": 4.483815298507463e-05, + "loss": 0.0, + "step": 44268 + }, + { + "epoch": 41.3, + "learning_rate": 4.483768656716418e-05, + "loss": 0.0, + "step": 44272 + }, + { + "epoch": 41.3, + "learning_rate": 4.483722014925373e-05, + "loss": 0.0001, + "step": 44276 + }, + { + "epoch": 41.31, + "learning_rate": 4.4836753731343285e-05, + "loss": 0.0006, + "step": 44280 + }, + { + "epoch": 41.31, + "learning_rate": 4.483628731343284e-05, + "loss": 0.0, + "step": 44284 + }, + { + "epoch": 41.31, + "learning_rate": 4.483582089552239e-05, + "loss": 0.0, + "step": 44288 + }, + { + "epoch": 41.32, + "learning_rate": 4.483535447761194e-05, + "loss": 0.0001, + "step": 44292 + }, + { + "epoch": 41.32, + "learning_rate": 4.48348880597015e-05, + "loss": 0.0001, + "step": 44296 + }, + { + "epoch": 41.32, + "learning_rate": 4.4834421641791046e-05, + "loss": 0.0, + "step": 44300 + }, + { + "epoch": 41.33, + "learning_rate": 4.4833955223880594e-05, + "loss": 0.002, + "step": 44304 + }, + { + "epoch": 41.33, + "learning_rate": 4.4833488805970156e-05, + "loss": 0.0003, + "step": 44308 + }, + { + "epoch": 41.34, + "learning_rate": 4.4833022388059704e-05, + "loss": 0.0, + "step": 44312 + }, + { + "epoch": 41.34, + "learning_rate": 4.483255597014925e-05, + "loss": 0.0, + "step": 44316 + }, + { + "epoch": 41.34, + "learning_rate": 4.483208955223881e-05, + "loss": 0.0039, + "step": 44320 + }, + { + "epoch": 41.35, + "learning_rate": 4.483162313432836e-05, + "loss": 0.0029, + "step": 44324 + }, + { + "epoch": 41.35, + "learning_rate": 4.483115671641791e-05, + "loss": 0.0016, + "step": 44328 + }, + { + "epoch": 41.35, + "learning_rate": 4.4830690298507465e-05, + "loss": 0.0003, + "step": 44332 + }, + { + "epoch": 41.36, + "learning_rate": 4.4830223880597013e-05, + "loss": 0.0001, + "step": 44336 + }, + { + "epoch": 41.36, + "learning_rate": 4.482975746268657e-05, + "loss": 0.0, + "step": 44340 + }, + { + "epoch": 41.37, + "learning_rate": 4.482929104477612e-05, + "loss": 0.0001, + "step": 44344 + }, + { + "epoch": 41.37, + "learning_rate": 4.482882462686567e-05, + "loss": 0.0034, + "step": 44348 + }, + { + "epoch": 41.37, + "learning_rate": 4.4828358208955226e-05, + "loss": 0.0032, + "step": 44352 + }, + { + "epoch": 41.38, + "learning_rate": 4.482789179104478e-05, + "loss": 0.0004, + "step": 44356 + }, + { + "epoch": 41.38, + "learning_rate": 4.482742537313433e-05, + "loss": 0.0, + "step": 44360 + }, + { + "epoch": 41.38, + "learning_rate": 4.482695895522388e-05, + "loss": 0.0013, + "step": 44364 + }, + { + "epoch": 41.39, + "learning_rate": 4.482649253731344e-05, + "loss": 0.0055, + "step": 44368 + }, + { + "epoch": 41.39, + "learning_rate": 4.482602611940299e-05, + "loss": 0.0, + "step": 44372 + }, + { + "epoch": 41.4, + "learning_rate": 4.482555970149254e-05, + "loss": 0.0, + "step": 44376 + }, + { + "epoch": 41.4, + "learning_rate": 4.482509328358209e-05, + "loss": 0.0028, + "step": 44380 + }, + { + "epoch": 41.4, + "learning_rate": 4.4824626865671645e-05, + "loss": 0.0, + "step": 44384 + }, + { + "epoch": 41.41, + "learning_rate": 4.48241604477612e-05, + "loss": 0.0, + "step": 44388 + }, + { + "epoch": 41.41, + "learning_rate": 4.482369402985075e-05, + "loss": 0.0001, + "step": 44392 + }, + { + "epoch": 41.41, + "learning_rate": 4.4823227611940296e-05, + "loss": 0.0, + "step": 44396 + }, + { + "epoch": 41.42, + "learning_rate": 4.482276119402986e-05, + "loss": 0.0, + "step": 44400 + }, + { + "epoch": 41.42, + "learning_rate": 4.4822294776119406e-05, + "loss": 0.0, + "step": 44404 + }, + { + "epoch": 41.43, + "learning_rate": 4.4821828358208954e-05, + "loss": 0.0, + "step": 44408 + }, + { + "epoch": 41.43, + "learning_rate": 4.482136194029851e-05, + "loss": 0.0004, + "step": 44412 + }, + { + "epoch": 41.43, + "learning_rate": 4.4820895522388064e-05, + "loss": 0.0, + "step": 44416 + }, + { + "epoch": 41.44, + "learning_rate": 4.482042910447761e-05, + "loss": 0.0004, + "step": 44420 + }, + { + "epoch": 41.44, + "learning_rate": 4.481996268656717e-05, + "loss": 0.0, + "step": 44424 + }, + { + "epoch": 41.44, + "learning_rate": 4.481949626865672e-05, + "loss": 0.0, + "step": 44428 + }, + { + "epoch": 41.45, + "learning_rate": 4.481902985074627e-05, + "loss": 0.0029, + "step": 44432 + }, + { + "epoch": 41.45, + "learning_rate": 4.4818563432835825e-05, + "loss": 0.0, + "step": 44436 + }, + { + "epoch": 41.46, + "learning_rate": 4.481809701492537e-05, + "loss": 0.0001, + "step": 44440 + }, + { + "epoch": 41.46, + "learning_rate": 4.481763059701493e-05, + "loss": 0.0004, + "step": 44444 + }, + { + "epoch": 41.46, + "learning_rate": 4.481716417910448e-05, + "loss": 0.0, + "step": 44448 + }, + { + "epoch": 41.47, + "learning_rate": 4.481669776119403e-05, + "loss": 0.0, + "step": 44452 + }, + { + "epoch": 41.47, + "learning_rate": 4.481623134328358e-05, + "loss": 0.0001, + "step": 44456 + }, + { + "epoch": 41.47, + "learning_rate": 4.481576492537314e-05, + "loss": 0.0001, + "step": 44460 + }, + { + "epoch": 41.48, + "learning_rate": 4.481529850746269e-05, + "loss": 0.0, + "step": 44464 + }, + { + "epoch": 41.48, + "learning_rate": 4.481483208955224e-05, + "loss": 0.0, + "step": 44468 + }, + { + "epoch": 41.49, + "learning_rate": 4.481436567164179e-05, + "loss": 0.0009, + "step": 44472 + }, + { + "epoch": 41.49, + "learning_rate": 4.481389925373135e-05, + "loss": 0.0001, + "step": 44476 + }, + { + "epoch": 41.49, + "learning_rate": 4.4813432835820895e-05, + "loss": 0.0, + "step": 44480 + }, + { + "epoch": 41.5, + "learning_rate": 4.481296641791045e-05, + "loss": 0.0, + "step": 44484 + }, + { + "epoch": 41.5, + "learning_rate": 4.4812500000000005e-05, + "loss": 0.0, + "step": 44488 + }, + { + "epoch": 41.5, + "learning_rate": 4.481203358208955e-05, + "loss": 0.0001, + "step": 44492 + }, + { + "epoch": 41.51, + "learning_rate": 4.481156716417911e-05, + "loss": 0.0027, + "step": 44496 + }, + { + "epoch": 41.51, + "learning_rate": 4.4811100746268656e-05, + "loss": 0.0001, + "step": 44500 + }, + { + "epoch": 41.51, + "eval_exact_match": 0.7350096711798839, + "eval_exec": 0.7543520309477756, + "eval_loss": 0.42939507961273193, + "eval_runtime": 1157.4066, + "eval_samples_per_second": 0.893, + "step": 44500 + }, + { + "epoch": 41.51, + "learning_rate": 4.481063432835821e-05, + "loss": 0.0, + "step": 44504 + }, + { + "epoch": 41.52, + "learning_rate": 4.4810167910447766e-05, + "loss": 0.0, + "step": 44508 + }, + { + "epoch": 41.52, + "learning_rate": 4.4809701492537314e-05, + "loss": 0.0, + "step": 44512 + }, + { + "epoch": 41.53, + "learning_rate": 4.480923507462686e-05, + "loss": 0.0003, + "step": 44516 + }, + { + "epoch": 41.53, + "learning_rate": 4.4808768656716424e-05, + "loss": 0.0, + "step": 44520 + }, + { + "epoch": 41.53, + "learning_rate": 4.480830223880597e-05, + "loss": 0.0011, + "step": 44524 + }, + { + "epoch": 41.54, + "learning_rate": 4.480783582089552e-05, + "loss": 0.0003, + "step": 44528 + }, + { + "epoch": 41.54, + "learning_rate": 4.4807369402985075e-05, + "loss": 0.0, + "step": 44532 + }, + { + "epoch": 41.54, + "learning_rate": 4.480690298507463e-05, + "loss": 0.0002, + "step": 44536 + }, + { + "epoch": 41.55, + "learning_rate": 4.4806436567164185e-05, + "loss": 0.0012, + "step": 44540 + }, + { + "epoch": 41.55, + "learning_rate": 4.480597014925373e-05, + "loss": 0.0001, + "step": 44544 + }, + { + "epoch": 41.56, + "learning_rate": 4.480550373134328e-05, + "loss": 0.0, + "step": 44548 + }, + { + "epoch": 41.56, + "learning_rate": 4.480503731343284e-05, + "loss": 0.0, + "step": 44552 + }, + { + "epoch": 41.56, + "learning_rate": 4.480457089552239e-05, + "loss": 0.0, + "step": 44556 + }, + { + "epoch": 41.57, + "learning_rate": 4.480410447761194e-05, + "loss": 0.0, + "step": 44560 + }, + { + "epoch": 41.57, + "learning_rate": 4.4803638059701494e-05, + "loss": 0.0001, + "step": 44564 + }, + { + "epoch": 41.57, + "learning_rate": 4.480317164179105e-05, + "loss": 0.0, + "step": 44568 + }, + { + "epoch": 41.58, + "learning_rate": 4.48027052238806e-05, + "loss": 0.0, + "step": 44572 + }, + { + "epoch": 41.58, + "learning_rate": 4.480223880597015e-05, + "loss": 0.0001, + "step": 44576 + }, + { + "epoch": 41.59, + "learning_rate": 4.480177238805971e-05, + "loss": 0.0003, + "step": 44580 + }, + { + "epoch": 41.59, + "learning_rate": 4.4801305970149255e-05, + "loss": 0.0, + "step": 44584 + }, + { + "epoch": 41.59, + "learning_rate": 4.480083955223881e-05, + "loss": 0.0013, + "step": 44588 + }, + { + "epoch": 41.6, + "learning_rate": 4.480037313432836e-05, + "loss": 0.0021, + "step": 44592 + }, + { + "epoch": 41.6, + "learning_rate": 4.479990671641791e-05, + "loss": 0.0, + "step": 44596 + }, + { + "epoch": 41.6, + "learning_rate": 4.479944029850747e-05, + "loss": 0.0001, + "step": 44600 + }, + { + "epoch": 41.61, + "learning_rate": 4.4798973880597016e-05, + "loss": 0.0043, + "step": 44604 + }, + { + "epoch": 41.61, + "learning_rate": 4.4798507462686564e-05, + "loss": 0.0014, + "step": 44608 + }, + { + "epoch": 41.62, + "learning_rate": 4.4798041044776126e-05, + "loss": 0.0001, + "step": 44612 + }, + { + "epoch": 41.62, + "learning_rate": 4.4797574626865674e-05, + "loss": 0.0002, + "step": 44616 + }, + { + "epoch": 41.62, + "learning_rate": 4.479710820895522e-05, + "loss": 0.0001, + "step": 44620 + }, + { + "epoch": 41.63, + "learning_rate": 4.479664179104478e-05, + "loss": 0.0001, + "step": 44624 + }, + { + "epoch": 41.63, + "learning_rate": 4.479617537313433e-05, + "loss": 0.0007, + "step": 44628 + }, + { + "epoch": 41.63, + "learning_rate": 4.479570895522388e-05, + "loss": 0.0006, + "step": 44632 + }, + { + "epoch": 41.64, + "learning_rate": 4.4795242537313435e-05, + "loss": 0.0002, + "step": 44636 + }, + { + "epoch": 41.64, + "learning_rate": 4.479477611940299e-05, + "loss": 0.0001, + "step": 44640 + }, + { + "epoch": 41.65, + "learning_rate": 4.479430970149254e-05, + "loss": 0.0, + "step": 44644 + }, + { + "epoch": 41.65, + "learning_rate": 4.479384328358209e-05, + "loss": 0.0001, + "step": 44648 + }, + { + "epoch": 41.65, + "learning_rate": 4.479337686567164e-05, + "loss": 0.0, + "step": 44652 + }, + { + "epoch": 41.66, + "learning_rate": 4.4792910447761196e-05, + "loss": 0.0, + "step": 44656 + }, + { + "epoch": 41.66, + "learning_rate": 4.479244402985075e-05, + "loss": 0.0016, + "step": 44660 + }, + { + "epoch": 41.66, + "learning_rate": 4.47919776119403e-05, + "loss": 0.0019, + "step": 44664 + }, + { + "epoch": 41.67, + "learning_rate": 4.479151119402985e-05, + "loss": 0.0009, + "step": 44668 + }, + { + "epoch": 41.67, + "learning_rate": 4.479104477611941e-05, + "loss": 0.0006, + "step": 44672 + }, + { + "epoch": 41.68, + "learning_rate": 4.479057835820896e-05, + "loss": 0.0, + "step": 44676 + }, + { + "epoch": 41.68, + "learning_rate": 4.4790111940298505e-05, + "loss": 0.0, + "step": 44680 + }, + { + "epoch": 41.68, + "learning_rate": 4.478964552238806e-05, + "loss": 0.0001, + "step": 44684 + }, + { + "epoch": 41.69, + "learning_rate": 4.4789179104477615e-05, + "loss": 0.0008, + "step": 44688 + }, + { + "epoch": 41.69, + "learning_rate": 4.478871268656716e-05, + "loss": 0.0019, + "step": 44692 + }, + { + "epoch": 41.69, + "learning_rate": 4.478824626865672e-05, + "loss": 0.0001, + "step": 44696 + }, + { + "epoch": 41.7, + "learning_rate": 4.478777985074627e-05, + "loss": 0.0, + "step": 44700 + }, + { + "epoch": 41.7, + "learning_rate": 4.478731343283583e-05, + "loss": 0.0, + "step": 44704 + }, + { + "epoch": 41.71, + "learning_rate": 4.4786847014925376e-05, + "loss": 0.0001, + "step": 44708 + }, + { + "epoch": 41.71, + "learning_rate": 4.4786380597014924e-05, + "loss": 0.0004, + "step": 44712 + }, + { + "epoch": 41.71, + "learning_rate": 4.478591417910448e-05, + "loss": 0.0005, + "step": 44716 + }, + { + "epoch": 41.72, + "learning_rate": 4.4785447761194034e-05, + "loss": 0.0002, + "step": 44720 + }, + { + "epoch": 41.72, + "learning_rate": 4.478498134328358e-05, + "loss": 0.0002, + "step": 44724 + }, + { + "epoch": 41.72, + "learning_rate": 4.478451492537314e-05, + "loss": 0.0001, + "step": 44728 + }, + { + "epoch": 41.73, + "learning_rate": 4.478404850746269e-05, + "loss": 0.0, + "step": 44732 + }, + { + "epoch": 41.73, + "learning_rate": 4.478358208955224e-05, + "loss": 0.0019, + "step": 44736 + }, + { + "epoch": 41.73, + "learning_rate": 4.4783115671641795e-05, + "loss": 0.0001, + "step": 44740 + }, + { + "epoch": 41.74, + "learning_rate": 4.478264925373134e-05, + "loss": 0.0006, + "step": 44744 + }, + { + "epoch": 41.74, + "learning_rate": 4.47821828358209e-05, + "loss": 0.0004, + "step": 44748 + }, + { + "epoch": 41.75, + "learning_rate": 4.478171641791045e-05, + "loss": 0.0001, + "step": 44752 + }, + { + "epoch": 41.75, + "learning_rate": 4.478125e-05, + "loss": 0.0, + "step": 44756 + }, + { + "epoch": 41.75, + "learning_rate": 4.4780783582089556e-05, + "loss": 0.0001, + "step": 44760 + }, + { + "epoch": 41.76, + "learning_rate": 4.478031716417911e-05, + "loss": 0.0, + "step": 44764 + }, + { + "epoch": 41.76, + "learning_rate": 4.477985074626866e-05, + "loss": 0.0, + "step": 44768 + }, + { + "epoch": 41.76, + "learning_rate": 4.477938432835821e-05, + "loss": 0.0, + "step": 44772 + }, + { + "epoch": 41.77, + "learning_rate": 4.477891791044776e-05, + "loss": 0.002, + "step": 44776 + }, + { + "epoch": 41.77, + "learning_rate": 4.477845149253732e-05, + "loss": 0.0001, + "step": 44780 + }, + { + "epoch": 41.78, + "learning_rate": 4.4777985074626865e-05, + "loss": 0.0, + "step": 44784 + }, + { + "epoch": 41.78, + "learning_rate": 4.477751865671642e-05, + "loss": 0.0, + "step": 44788 + }, + { + "epoch": 41.78, + "learning_rate": 4.4777052238805975e-05, + "loss": 0.0, + "step": 44792 + }, + { + "epoch": 41.79, + "learning_rate": 4.477658582089552e-05, + "loss": 0.0029, + "step": 44796 + }, + { + "epoch": 41.79, + "learning_rate": 4.477611940298508e-05, + "loss": 0.0, + "step": 44800 + }, + { + "epoch": 41.79, + "learning_rate": 4.4775652985074626e-05, + "loss": 0.0002, + "step": 44804 + }, + { + "epoch": 41.8, + "learning_rate": 4.477518656716418e-05, + "loss": 0.0, + "step": 44808 + }, + { + "epoch": 41.8, + "learning_rate": 4.4774720149253736e-05, + "loss": 0.0001, + "step": 44812 + }, + { + "epoch": 41.81, + "learning_rate": 4.4774253731343284e-05, + "loss": 0.0, + "step": 44816 + }, + { + "epoch": 41.81, + "learning_rate": 4.477378731343284e-05, + "loss": 0.0, + "step": 44820 + }, + { + "epoch": 41.81, + "learning_rate": 4.4773320895522394e-05, + "loss": 0.0006, + "step": 44824 + }, + { + "epoch": 41.82, + "learning_rate": 4.477285447761194e-05, + "loss": 0.0006, + "step": 44828 + }, + { + "epoch": 41.82, + "learning_rate": 4.477238805970149e-05, + "loss": 0.0013, + "step": 44832 + }, + { + "epoch": 41.82, + "learning_rate": 4.4771921641791045e-05, + "loss": 0.0014, + "step": 44836 + }, + { + "epoch": 41.83, + "learning_rate": 4.47714552238806e-05, + "loss": 0.0001, + "step": 44840 + }, + { + "epoch": 41.83, + "learning_rate": 4.477098880597015e-05, + "loss": 0.0001, + "step": 44844 + }, + { + "epoch": 41.84, + "learning_rate": 4.47705223880597e-05, + "loss": 0.0, + "step": 44848 + }, + { + "epoch": 41.84, + "learning_rate": 4.477005597014926e-05, + "loss": 0.0001, + "step": 44852 + }, + { + "epoch": 41.84, + "learning_rate": 4.4769589552238806e-05, + "loss": 0.003, + "step": 44856 + }, + { + "epoch": 41.85, + "learning_rate": 4.476912313432836e-05, + "loss": 0.0001, + "step": 44860 + }, + { + "epoch": 41.85, + "learning_rate": 4.476865671641791e-05, + "loss": 0.0, + "step": 44864 + }, + { + "epoch": 41.85, + "learning_rate": 4.476819029850747e-05, + "loss": 0.0001, + "step": 44868 + }, + { + "epoch": 41.86, + "learning_rate": 4.476772388059702e-05, + "loss": 0.0054, + "step": 44872 + }, + { + "epoch": 41.86, + "learning_rate": 4.476725746268657e-05, + "loss": 0.0003, + "step": 44876 + }, + { + "epoch": 41.87, + "learning_rate": 4.476679104477612e-05, + "loss": 0.0001, + "step": 44880 + }, + { + "epoch": 41.87, + "learning_rate": 4.476632462686568e-05, + "loss": 0.0001, + "step": 44884 + }, + { + "epoch": 41.87, + "learning_rate": 4.4765858208955225e-05, + "loss": 0.0, + "step": 44888 + }, + { + "epoch": 41.88, + "learning_rate": 4.476539179104478e-05, + "loss": 0.0001, + "step": 44892 + }, + { + "epoch": 41.88, + "learning_rate": 4.476492537313433e-05, + "loss": 0.0, + "step": 44896 + }, + { + "epoch": 41.88, + "learning_rate": 4.476445895522388e-05, + "loss": 0.0, + "step": 44900 + }, + { + "epoch": 41.89, + "learning_rate": 4.476399253731344e-05, + "loss": 0.0001, + "step": 44904 + }, + { + "epoch": 41.89, + "learning_rate": 4.4763526119402986e-05, + "loss": 0.0, + "step": 44908 + }, + { + "epoch": 41.9, + "learning_rate": 4.476305970149254e-05, + "loss": 0.0026, + "step": 44912 + }, + { + "epoch": 41.9, + "learning_rate": 4.4762593283582096e-05, + "loss": 0.0, + "step": 44916 + }, + { + "epoch": 41.9, + "learning_rate": 4.4762126865671644e-05, + "loss": 0.0, + "step": 44920 + }, + { + "epoch": 41.91, + "learning_rate": 4.476166044776119e-05, + "loss": 0.0001, + "step": 44924 + }, + { + "epoch": 41.91, + "learning_rate": 4.4761194029850754e-05, + "loss": 0.0001, + "step": 44928 + }, + { + "epoch": 41.91, + "learning_rate": 4.47607276119403e-05, + "loss": 0.0, + "step": 44932 + }, + { + "epoch": 41.92, + "learning_rate": 4.476026119402985e-05, + "loss": 0.0, + "step": 44936 + }, + { + "epoch": 41.92, + "learning_rate": 4.4759794776119405e-05, + "loss": 0.0032, + "step": 44940 + }, + { + "epoch": 41.93, + "learning_rate": 4.475932835820896e-05, + "loss": 0.001, + "step": 44944 + }, + { + "epoch": 41.93, + "learning_rate": 4.475886194029851e-05, + "loss": 0.0, + "step": 44948 + }, + { + "epoch": 41.93, + "learning_rate": 4.475839552238806e-05, + "loss": 0.0, + "step": 44952 + }, + { + "epoch": 41.94, + "learning_rate": 4.475792910447761e-05, + "loss": 0.0, + "step": 44956 + }, + { + "epoch": 41.94, + "learning_rate": 4.4757462686567166e-05, + "loss": 0.0, + "step": 44960 + }, + { + "epoch": 41.94, + "learning_rate": 4.475699626865672e-05, + "loss": 0.0045, + "step": 44964 + }, + { + "epoch": 41.95, + "learning_rate": 4.475652985074627e-05, + "loss": 0.0001, + "step": 44968 + }, + { + "epoch": 41.95, + "learning_rate": 4.4756063432835824e-05, + "loss": 0.0, + "step": 44972 + }, + { + "epoch": 41.96, + "learning_rate": 4.475559701492538e-05, + "loss": 0.0, + "step": 44976 + }, + { + "epoch": 41.96, + "learning_rate": 4.475513059701493e-05, + "loss": 0.0002, + "step": 44980 + }, + { + "epoch": 41.96, + "learning_rate": 4.4754664179104475e-05, + "loss": 0.0, + "step": 44984 + }, + { + "epoch": 41.97, + "learning_rate": 4.4754197761194036e-05, + "loss": 0.0015, + "step": 44988 + }, + { + "epoch": 41.97, + "learning_rate": 4.4753731343283585e-05, + "loss": 0.0, + "step": 44992 + }, + { + "epoch": 41.97, + "learning_rate": 4.475326492537313e-05, + "loss": 0.0, + "step": 44996 + }, + { + "epoch": 41.98, + "learning_rate": 4.475279850746269e-05, + "loss": 0.0, + "step": 45000 + }, + { + "epoch": 41.98, + "eval_exact_match": 0.7340425531914894, + "eval_exec": 0.758220502901354, + "eval_loss": 0.4176974892616272, + "eval_runtime": 1170.0858, + "eval_samples_per_second": 0.884, + "step": 45000 + }, + { + "epoch": 41.98, + "learning_rate": 4.475233208955224e-05, + "loss": 0.0081, + "step": 45004 + }, + { + "epoch": 41.98, + "learning_rate": 4.475186567164179e-05, + "loss": 0.0001, + "step": 45008 + }, + { + "epoch": 41.99, + "learning_rate": 4.4751399253731346e-05, + "loss": 0.0001, + "step": 45012 + }, + { + "epoch": 41.99, + "learning_rate": 4.4750932835820894e-05, + "loss": 0.0002, + "step": 45016 + }, + { + "epoch": 42.0, + "learning_rate": 4.475046641791045e-05, + "loss": 0.0002, + "step": 45020 + }, + { + "epoch": 42.0, + "learning_rate": 4.4750000000000004e-05, + "loss": 0.0002, + "step": 45024 + }, + { + "epoch": 42.0, + "learning_rate": 4.474953358208955e-05, + "loss": 0.0011, + "step": 45028 + }, + { + "epoch": 42.01, + "learning_rate": 4.4749067164179107e-05, + "loss": 0.0001, + "step": 45032 + }, + { + "epoch": 42.01, + "learning_rate": 4.474860074626866e-05, + "loss": 0.0, + "step": 45036 + }, + { + "epoch": 42.01, + "learning_rate": 4.474813432835821e-05, + "loss": 0.0003, + "step": 45040 + }, + { + "epoch": 42.02, + "learning_rate": 4.4747667910447765e-05, + "loss": 0.0004, + "step": 45044 + }, + { + "epoch": 42.02, + "learning_rate": 4.474720149253732e-05, + "loss": 0.0, + "step": 45048 + }, + { + "epoch": 42.03, + "learning_rate": 4.474673507462687e-05, + "loss": 0.0, + "step": 45052 + }, + { + "epoch": 42.03, + "learning_rate": 4.474626865671642e-05, + "loss": 0.0, + "step": 45056 + }, + { + "epoch": 42.03, + "learning_rate": 4.474580223880597e-05, + "loss": 0.0, + "step": 45060 + }, + { + "epoch": 42.04, + "learning_rate": 4.4745335820895526e-05, + "loss": 0.0, + "step": 45064 + }, + { + "epoch": 42.04, + "learning_rate": 4.474486940298508e-05, + "loss": 0.0001, + "step": 45068 + }, + { + "epoch": 42.04, + "learning_rate": 4.474440298507463e-05, + "loss": 0.002, + "step": 45072 + }, + { + "epoch": 42.05, + "learning_rate": 4.474393656716418e-05, + "loss": 0.0003, + "step": 45076 + }, + { + "epoch": 42.05, + "learning_rate": 4.474347014925374e-05, + "loss": 0.0001, + "step": 45080 + }, + { + "epoch": 42.06, + "learning_rate": 4.4743003731343286e-05, + "loss": 0.0003, + "step": 45084 + }, + { + "epoch": 42.06, + "learning_rate": 4.4742537313432835e-05, + "loss": 0.0027, + "step": 45088 + }, + { + "epoch": 42.06, + "learning_rate": 4.474207089552239e-05, + "loss": 0.0005, + "step": 45092 + }, + { + "epoch": 42.07, + "learning_rate": 4.4741604477611944e-05, + "loss": 0.0, + "step": 45096 + }, + { + "epoch": 42.07, + "learning_rate": 4.474113805970149e-05, + "loss": 0.0014, + "step": 45100 + }, + { + "epoch": 42.07, + "learning_rate": 4.474067164179105e-05, + "loss": 0.0001, + "step": 45104 + }, + { + "epoch": 42.08, + "learning_rate": 4.47402052238806e-05, + "loss": 0.0013, + "step": 45108 + }, + { + "epoch": 42.08, + "learning_rate": 4.473973880597015e-05, + "loss": 0.0, + "step": 45112 + }, + { + "epoch": 42.09, + "learning_rate": 4.4739272388059705e-05, + "loss": 0.0, + "step": 45116 + }, + { + "epoch": 42.09, + "learning_rate": 4.4738805970149254e-05, + "loss": 0.0001, + "step": 45120 + }, + { + "epoch": 42.09, + "learning_rate": 4.473833955223881e-05, + "loss": 0.0002, + "step": 45124 + }, + { + "epoch": 42.1, + "learning_rate": 4.473787313432836e-05, + "loss": 0.0008, + "step": 45128 + }, + { + "epoch": 42.1, + "learning_rate": 4.473740671641791e-05, + "loss": 0.0001, + "step": 45132 + }, + { + "epoch": 42.1, + "learning_rate": 4.473694029850746e-05, + "loss": 0.0, + "step": 45136 + }, + { + "epoch": 42.11, + "learning_rate": 4.473647388059702e-05, + "loss": 0.0, + "step": 45140 + }, + { + "epoch": 42.11, + "learning_rate": 4.473600746268657e-05, + "loss": 0.0014, + "step": 45144 + }, + { + "epoch": 42.12, + "learning_rate": 4.473554104477612e-05, + "loss": 0.0001, + "step": 45148 + }, + { + "epoch": 42.12, + "learning_rate": 4.473507462686567e-05, + "loss": 0.0015, + "step": 45152 + }, + { + "epoch": 42.12, + "learning_rate": 4.473460820895523e-05, + "loss": 0.0, + "step": 45156 + }, + { + "epoch": 42.13, + "learning_rate": 4.4734141791044776e-05, + "loss": 0.0055, + "step": 45160 + }, + { + "epoch": 42.13, + "learning_rate": 4.473367537313433e-05, + "loss": 0.0001, + "step": 45164 + }, + { + "epoch": 42.13, + "learning_rate": 4.4733208955223885e-05, + "loss": 0.0002, + "step": 45168 + }, + { + "epoch": 42.14, + "learning_rate": 4.4732742537313433e-05, + "loss": 0.0001, + "step": 45172 + }, + { + "epoch": 42.14, + "learning_rate": 4.473227611940299e-05, + "loss": 0.0001, + "step": 45176 + }, + { + "epoch": 42.15, + "learning_rate": 4.4731809701492537e-05, + "loss": 0.0, + "step": 45180 + }, + { + "epoch": 42.15, + "learning_rate": 4.473134328358209e-05, + "loss": 0.0, + "step": 45184 + }, + { + "epoch": 42.15, + "learning_rate": 4.4730876865671646e-05, + "loss": 0.0, + "step": 45188 + }, + { + "epoch": 42.16, + "learning_rate": 4.4730410447761194e-05, + "loss": 0.0003, + "step": 45192 + }, + { + "epoch": 42.16, + "learning_rate": 4.472994402985075e-05, + "loss": 0.0033, + "step": 45196 + }, + { + "epoch": 42.16, + "learning_rate": 4.4729477611940304e-05, + "loss": 0.0011, + "step": 45200 + }, + { + "epoch": 42.17, + "learning_rate": 4.472901119402985e-05, + "loss": 0.0, + "step": 45204 + }, + { + "epoch": 42.17, + "learning_rate": 4.472854477611941e-05, + "loss": 0.0017, + "step": 45208 + }, + { + "epoch": 42.18, + "learning_rate": 4.4728078358208955e-05, + "loss": 0.0001, + "step": 45212 + }, + { + "epoch": 42.18, + "learning_rate": 4.472761194029851e-05, + "loss": 0.0001, + "step": 45216 + }, + { + "epoch": 42.18, + "learning_rate": 4.4727145522388065e-05, + "loss": 0.0, + "step": 45220 + }, + { + "epoch": 42.19, + "learning_rate": 4.4726679104477613e-05, + "loss": 0.0, + "step": 45224 + }, + { + "epoch": 42.19, + "learning_rate": 4.472621268656716e-05, + "loss": 0.0, + "step": 45228 + }, + { + "epoch": 42.19, + "learning_rate": 4.472574626865672e-05, + "loss": 0.0005, + "step": 45232 + }, + { + "epoch": 42.2, + "learning_rate": 4.472527985074627e-05, + "loss": 0.0001, + "step": 45236 + }, + { + "epoch": 42.2, + "learning_rate": 4.472481343283582e-05, + "loss": 0.0001, + "step": 45240 + }, + { + "epoch": 42.21, + "learning_rate": 4.4724347014925374e-05, + "loss": 0.001, + "step": 45244 + }, + { + "epoch": 42.21, + "learning_rate": 4.472388059701493e-05, + "loss": 0.0001, + "step": 45248 + }, + { + "epoch": 42.21, + "learning_rate": 4.472341417910448e-05, + "loss": 0.0, + "step": 45252 + }, + { + "epoch": 42.22, + "learning_rate": 4.472294776119403e-05, + "loss": 0.0018, + "step": 45256 + }, + { + "epoch": 42.22, + "learning_rate": 4.472248134328359e-05, + "loss": 0.0006, + "step": 45260 + }, + { + "epoch": 42.22, + "learning_rate": 4.4722014925373135e-05, + "loss": 0.0, + "step": 45264 + }, + { + "epoch": 42.23, + "learning_rate": 4.472154850746269e-05, + "loss": 0.0001, + "step": 45268 + }, + { + "epoch": 42.23, + "learning_rate": 4.472108208955224e-05, + "loss": 0.0004, + "step": 45272 + }, + { + "epoch": 42.24, + "learning_rate": 4.472061567164179e-05, + "loss": 0.0028, + "step": 45276 + }, + { + "epoch": 42.24, + "learning_rate": 4.472014925373135e-05, + "loss": 0.0, + "step": 45280 + }, + { + "epoch": 42.24, + "learning_rate": 4.4719682835820896e-05, + "loss": 0.0001, + "step": 45284 + }, + { + "epoch": 42.25, + "learning_rate": 4.4719216417910444e-05, + "loss": 0.0, + "step": 45288 + }, + { + "epoch": 42.25, + "learning_rate": 4.4718750000000006e-05, + "loss": 0.0003, + "step": 45292 + }, + { + "epoch": 42.25, + "learning_rate": 4.4718283582089554e-05, + "loss": 0.0, + "step": 45296 + }, + { + "epoch": 42.26, + "learning_rate": 4.47178171641791e-05, + "loss": 0.004, + "step": 45300 + }, + { + "epoch": 42.26, + "learning_rate": 4.471735074626866e-05, + "loss": 0.0, + "step": 45304 + }, + { + "epoch": 42.26, + "learning_rate": 4.471688432835821e-05, + "loss": 0.0, + "step": 45308 + }, + { + "epoch": 42.27, + "learning_rate": 4.471641791044776e-05, + "loss": 0.0014, + "step": 45312 + }, + { + "epoch": 42.27, + "learning_rate": 4.4715951492537315e-05, + "loss": 0.0, + "step": 45316 + }, + { + "epoch": 42.28, + "learning_rate": 4.471548507462687e-05, + "loss": 0.0, + "step": 45320 + }, + { + "epoch": 42.28, + "learning_rate": 4.471501865671642e-05, + "loss": 0.0001, + "step": 45324 + }, + { + "epoch": 42.28, + "learning_rate": 4.471455223880597e-05, + "loss": 0.0005, + "step": 45328 + }, + { + "epoch": 42.29, + "learning_rate": 4.471408582089552e-05, + "loss": 0.0, + "step": 45332 + }, + { + "epoch": 42.29, + "learning_rate": 4.4713619402985076e-05, + "loss": 0.0002, + "step": 45336 + }, + { + "epoch": 42.29, + "learning_rate": 4.471315298507463e-05, + "loss": 0.0, + "step": 45340 + }, + { + "epoch": 42.3, + "learning_rate": 4.471268656716418e-05, + "loss": 0.0031, + "step": 45344 + }, + { + "epoch": 42.3, + "learning_rate": 4.4712220149253734e-05, + "loss": 0.0, + "step": 45348 + }, + { + "epoch": 42.31, + "learning_rate": 4.471175373134329e-05, + "loss": 0.0, + "step": 45352 + }, + { + "epoch": 42.31, + "learning_rate": 4.471128731343284e-05, + "loss": 0.0001, + "step": 45356 + }, + { + "epoch": 42.31, + "learning_rate": 4.471082089552239e-05, + "loss": 0.0, + "step": 45360 + }, + { + "epoch": 42.32, + "learning_rate": 4.471035447761194e-05, + "loss": 0.0, + "step": 45364 + }, + { + "epoch": 42.32, + "learning_rate": 4.4709888059701495e-05, + "loss": 0.0001, + "step": 45368 + }, + { + "epoch": 42.32, + "learning_rate": 4.470942164179105e-05, + "loss": 0.0006, + "step": 45372 + }, + { + "epoch": 42.33, + "learning_rate": 4.47089552238806e-05, + "loss": 0.0, + "step": 45376 + }, + { + "epoch": 42.33, + "learning_rate": 4.470848880597015e-05, + "loss": 0.0045, + "step": 45380 + }, + { + "epoch": 42.34, + "learning_rate": 4.470802238805971e-05, + "loss": 0.0004, + "step": 45384 + }, + { + "epoch": 42.34, + "learning_rate": 4.4707555970149256e-05, + "loss": 0.0, + "step": 45388 + }, + { + "epoch": 42.34, + "learning_rate": 4.4707089552238804e-05, + "loss": 0.0006, + "step": 45392 + }, + { + "epoch": 42.35, + "learning_rate": 4.470662313432836e-05, + "loss": 0.0, + "step": 45396 + }, + { + "epoch": 42.35, + "learning_rate": 4.4706156716417914e-05, + "loss": 0.0001, + "step": 45400 + }, + { + "epoch": 42.35, + "learning_rate": 4.470569029850746e-05, + "loss": 0.0, + "step": 45404 + }, + { + "epoch": 42.36, + "learning_rate": 4.470522388059702e-05, + "loss": 0.0005, + "step": 45408 + }, + { + "epoch": 42.36, + "learning_rate": 4.470475746268657e-05, + "loss": 0.0001, + "step": 45412 + }, + { + "epoch": 42.37, + "learning_rate": 4.470429104477612e-05, + "loss": 0.0004, + "step": 45416 + }, + { + "epoch": 42.37, + "learning_rate": 4.4703824626865675e-05, + "loss": 0.0001, + "step": 45420 + }, + { + "epoch": 42.37, + "learning_rate": 4.470335820895522e-05, + "loss": 0.0008, + "step": 45424 + }, + { + "epoch": 42.38, + "learning_rate": 4.470289179104478e-05, + "loss": 0.0, + "step": 45428 + }, + { + "epoch": 42.38, + "learning_rate": 4.470242537313433e-05, + "loss": 0.0001, + "step": 45432 + }, + { + "epoch": 42.38, + "learning_rate": 4.470195895522388e-05, + "loss": 0.0001, + "step": 45436 + }, + { + "epoch": 42.39, + "learning_rate": 4.4701492537313436e-05, + "loss": 0.0002, + "step": 45440 + }, + { + "epoch": 42.39, + "learning_rate": 4.470102611940299e-05, + "loss": 0.0, + "step": 45444 + }, + { + "epoch": 42.4, + "learning_rate": 4.470055970149254e-05, + "loss": 0.0, + "step": 45448 + }, + { + "epoch": 42.4, + "learning_rate": 4.470009328358209e-05, + "loss": 0.0, + "step": 45452 + }, + { + "epoch": 42.4, + "learning_rate": 4.469962686567164e-05, + "loss": 0.0, + "step": 45456 + }, + { + "epoch": 42.41, + "learning_rate": 4.46991604477612e-05, + "loss": 0.0, + "step": 45460 + }, + { + "epoch": 42.41, + "learning_rate": 4.4698694029850745e-05, + "loss": 0.0, + "step": 45464 + }, + { + "epoch": 42.41, + "learning_rate": 4.46982276119403e-05, + "loss": 0.0, + "step": 45468 + }, + { + "epoch": 42.42, + "learning_rate": 4.4697761194029855e-05, + "loss": 0.0001, + "step": 45472 + }, + { + "epoch": 42.42, + "learning_rate": 4.46972947761194e-05, + "loss": 0.0001, + "step": 45476 + }, + { + "epoch": 42.43, + "learning_rate": 4.469682835820896e-05, + "loss": 0.0, + "step": 45480 + }, + { + "epoch": 42.43, + "learning_rate": 4.4696361940298506e-05, + "loss": 0.001, + "step": 45484 + }, + { + "epoch": 42.43, + "learning_rate": 4.469589552238806e-05, + "loss": 0.0001, + "step": 45488 + }, + { + "epoch": 42.44, + "learning_rate": 4.4695429104477616e-05, + "loss": 0.0, + "step": 45492 + }, + { + "epoch": 42.44, + "learning_rate": 4.4694962686567164e-05, + "loss": 0.0, + "step": 45496 + }, + { + "epoch": 42.44, + "learning_rate": 4.469449626865672e-05, + "loss": 0.0, + "step": 45500 + }, + { + "epoch": 42.44, + "eval_exact_match": 0.7485493230174082, + "eval_exec": 0.7669245647969052, + "eval_loss": 0.42073753476142883, + "eval_runtime": 1143.3015, + "eval_samples_per_second": 0.904, + "step": 45500 + }, + { + "epoch": 42.45, + "learning_rate": 4.4694029850746274e-05, + "loss": 0.0001, + "step": 45504 + }, + { + "epoch": 42.45, + "learning_rate": 4.469356343283582e-05, + "loss": 0.0002, + "step": 45508 + }, + { + "epoch": 42.46, + "learning_rate": 4.469309701492538e-05, + "loss": 0.0001, + "step": 45512 + }, + { + "epoch": 42.46, + "learning_rate": 4.4692630597014925e-05, + "loss": 0.0, + "step": 45516 + }, + { + "epoch": 42.46, + "learning_rate": 4.469216417910448e-05, + "loss": 0.0001, + "step": 45520 + }, + { + "epoch": 42.47, + "learning_rate": 4.4691697761194035e-05, + "loss": 0.0001, + "step": 45524 + }, + { + "epoch": 42.47, + "learning_rate": 4.469123134328358e-05, + "loss": 0.0003, + "step": 45528 + }, + { + "epoch": 42.47, + "learning_rate": 4.469076492537314e-05, + "loss": 0.0, + "step": 45532 + }, + { + "epoch": 42.48, + "learning_rate": 4.469029850746269e-05, + "loss": 0.001, + "step": 45536 + }, + { + "epoch": 42.48, + "learning_rate": 4.468983208955224e-05, + "loss": 0.0009, + "step": 45540 + }, + { + "epoch": 42.49, + "learning_rate": 4.468936567164179e-05, + "loss": 0.0001, + "step": 45544 + }, + { + "epoch": 42.49, + "learning_rate": 4.468889925373135e-05, + "loss": 0.0, + "step": 45548 + }, + { + "epoch": 42.49, + "learning_rate": 4.46884328358209e-05, + "loss": 0.0004, + "step": 45552 + }, + { + "epoch": 42.5, + "learning_rate": 4.468796641791045e-05, + "loss": 0.0, + "step": 45556 + }, + { + "epoch": 42.5, + "learning_rate": 4.46875e-05, + "loss": 0.0, + "step": 45560 + }, + { + "epoch": 42.5, + "learning_rate": 4.468703358208956e-05, + "loss": 0.0068, + "step": 45564 + }, + { + "epoch": 42.51, + "learning_rate": 4.4686567164179105e-05, + "loss": 0.0, + "step": 45568 + }, + { + "epoch": 42.51, + "learning_rate": 4.468610074626866e-05, + "loss": 0.0, + "step": 45572 + }, + { + "epoch": 42.51, + "learning_rate": 4.468563432835821e-05, + "loss": 0.0, + "step": 45576 + }, + { + "epoch": 42.52, + "learning_rate": 4.468516791044776e-05, + "loss": 0.0, + "step": 45580 + }, + { + "epoch": 42.52, + "learning_rate": 4.468470149253732e-05, + "loss": 0.0, + "step": 45584 + }, + { + "epoch": 42.53, + "learning_rate": 4.4684235074626866e-05, + "loss": 0.0003, + "step": 45588 + }, + { + "epoch": 42.53, + "learning_rate": 4.468376865671642e-05, + "loss": 0.0, + "step": 45592 + }, + { + "epoch": 42.53, + "learning_rate": 4.4683302238805976e-05, + "loss": 0.0002, + "step": 45596 + }, + { + "epoch": 42.54, + "learning_rate": 4.4682835820895524e-05, + "loss": 0.0001, + "step": 45600 + }, + { + "epoch": 42.54, + "learning_rate": 4.468236940298507e-05, + "loss": 0.0002, + "step": 45604 + }, + { + "epoch": 42.54, + "learning_rate": 4.4681902985074634e-05, + "loss": 0.0, + "step": 45608 + }, + { + "epoch": 42.55, + "learning_rate": 4.468143656716418e-05, + "loss": 0.002, + "step": 45612 + }, + { + "epoch": 42.55, + "learning_rate": 4.468097014925373e-05, + "loss": 0.0003, + "step": 45616 + }, + { + "epoch": 42.56, + "learning_rate": 4.4680503731343285e-05, + "loss": 0.0, + "step": 45620 + }, + { + "epoch": 42.56, + "learning_rate": 4.468003731343284e-05, + "loss": 0.0, + "step": 45624 + }, + { + "epoch": 42.56, + "learning_rate": 4.467957089552239e-05, + "loss": 0.0001, + "step": 45628 + }, + { + "epoch": 42.57, + "learning_rate": 4.467910447761194e-05, + "loss": 0.0001, + "step": 45632 + }, + { + "epoch": 42.57, + "learning_rate": 4.467863805970149e-05, + "loss": 0.0, + "step": 45636 + }, + { + "epoch": 42.57, + "learning_rate": 4.4678171641791046e-05, + "loss": 0.0, + "step": 45640 + }, + { + "epoch": 42.58, + "learning_rate": 4.46777052238806e-05, + "loss": 0.0, + "step": 45644 + }, + { + "epoch": 42.58, + "learning_rate": 4.467723880597015e-05, + "loss": 0.0, + "step": 45648 + }, + { + "epoch": 42.59, + "learning_rate": 4.4676772388059704e-05, + "loss": 0.0, + "step": 45652 + }, + { + "epoch": 42.59, + "learning_rate": 4.467630597014926e-05, + "loss": 0.0031, + "step": 45656 + }, + { + "epoch": 42.59, + "learning_rate": 4.467583955223881e-05, + "loss": 0.0, + "step": 45660 + }, + { + "epoch": 42.6, + "learning_rate": 4.4675373134328355e-05, + "loss": 0.0002, + "step": 45664 + }, + { + "epoch": 42.6, + "learning_rate": 4.467490671641792e-05, + "loss": 0.0, + "step": 45668 + }, + { + "epoch": 42.6, + "learning_rate": 4.4674440298507465e-05, + "loss": 0.0006, + "step": 45672 + }, + { + "epoch": 42.61, + "learning_rate": 4.467397388059702e-05, + "loss": 0.0003, + "step": 45676 + }, + { + "epoch": 42.61, + "learning_rate": 4.467350746268657e-05, + "loss": 0.0002, + "step": 45680 + }, + { + "epoch": 42.62, + "learning_rate": 4.467304104477612e-05, + "loss": 0.0, + "step": 45684 + }, + { + "epoch": 42.62, + "learning_rate": 4.467257462686568e-05, + "loss": 0.0, + "step": 45688 + }, + { + "epoch": 42.62, + "learning_rate": 4.4672108208955226e-05, + "loss": 0.0, + "step": 45692 + }, + { + "epoch": 42.63, + "learning_rate": 4.4671641791044774e-05, + "loss": 0.0, + "step": 45696 + }, + { + "epoch": 42.63, + "learning_rate": 4.4671175373134336e-05, + "loss": 0.0002, + "step": 45700 + }, + { + "epoch": 42.63, + "learning_rate": 4.4670708955223884e-05, + "loss": 0.0, + "step": 45704 + }, + { + "epoch": 42.64, + "learning_rate": 4.467024253731343e-05, + "loss": 0.0001, + "step": 45708 + }, + { + "epoch": 42.64, + "learning_rate": 4.466977611940299e-05, + "loss": 0.0, + "step": 45712 + }, + { + "epoch": 42.65, + "learning_rate": 4.466930970149254e-05, + "loss": 0.0, + "step": 45716 + }, + { + "epoch": 42.65, + "learning_rate": 4.466884328358209e-05, + "loss": 0.0002, + "step": 45720 + }, + { + "epoch": 42.65, + "learning_rate": 4.4668376865671645e-05, + "loss": 0.0002, + "step": 45724 + }, + { + "epoch": 42.66, + "learning_rate": 4.46679104477612e-05, + "loss": 0.0031, + "step": 45728 + }, + { + "epoch": 42.66, + "learning_rate": 4.466744402985075e-05, + "loss": 0.0003, + "step": 45732 + }, + { + "epoch": 42.66, + "learning_rate": 4.46669776119403e-05, + "loss": 0.0001, + "step": 45736 + }, + { + "epoch": 42.67, + "learning_rate": 4.466651119402985e-05, + "loss": 0.0, + "step": 45740 + }, + { + "epoch": 42.67, + "learning_rate": 4.4666044776119406e-05, + "loss": 0.0, + "step": 45744 + }, + { + "epoch": 42.68, + "learning_rate": 4.466557835820896e-05, + "loss": 0.0001, + "step": 45748 + }, + { + "epoch": 42.68, + "learning_rate": 4.466511194029851e-05, + "loss": 0.0001, + "step": 45752 + }, + { + "epoch": 42.68, + "learning_rate": 4.466464552238806e-05, + "loss": 0.0, + "step": 45756 + }, + { + "epoch": 42.69, + "learning_rate": 4.466417910447762e-05, + "loss": 0.0, + "step": 45760 + }, + { + "epoch": 42.69, + "learning_rate": 4.466371268656717e-05, + "loss": 0.0004, + "step": 45764 + }, + { + "epoch": 42.69, + "learning_rate": 4.4663246268656715e-05, + "loss": 0.0001, + "step": 45768 + }, + { + "epoch": 42.7, + "learning_rate": 4.466277985074627e-05, + "loss": 0.0004, + "step": 45772 + }, + { + "epoch": 42.7, + "learning_rate": 4.4662313432835825e-05, + "loss": 0.0003, + "step": 45776 + }, + { + "epoch": 42.71, + "learning_rate": 4.466184701492537e-05, + "loss": 0.0, + "step": 45780 + }, + { + "epoch": 42.71, + "learning_rate": 4.466138059701493e-05, + "loss": 0.0, + "step": 45784 + }, + { + "epoch": 42.71, + "learning_rate": 4.466091417910448e-05, + "loss": 0.0001, + "step": 45788 + }, + { + "epoch": 42.72, + "learning_rate": 4.466044776119403e-05, + "loss": 0.0, + "step": 45792 + }, + { + "epoch": 42.72, + "learning_rate": 4.4659981343283586e-05, + "loss": 0.0001, + "step": 45796 + }, + { + "epoch": 42.72, + "learning_rate": 4.4659514925373134e-05, + "loss": 0.0002, + "step": 45800 + }, + { + "epoch": 42.73, + "learning_rate": 4.465904850746269e-05, + "loss": 0.0003, + "step": 45804 + }, + { + "epoch": 42.73, + "learning_rate": 4.4658582089552244e-05, + "loss": 0.0, + "step": 45808 + }, + { + "epoch": 42.73, + "learning_rate": 4.465811567164179e-05, + "loss": 0.0002, + "step": 45812 + }, + { + "epoch": 42.74, + "learning_rate": 4.465764925373134e-05, + "loss": 0.0, + "step": 45816 + }, + { + "epoch": 42.74, + "learning_rate": 4.46571828358209e-05, + "loss": 0.0016, + "step": 45820 + }, + { + "epoch": 42.75, + "learning_rate": 4.465671641791045e-05, + "loss": 0.0005, + "step": 45824 + }, + { + "epoch": 42.75, + "learning_rate": 4.465625e-05, + "loss": 0.0003, + "step": 45828 + }, + { + "epoch": 42.75, + "learning_rate": 4.465578358208955e-05, + "loss": 0.0, + "step": 45832 + }, + { + "epoch": 42.76, + "learning_rate": 4.465531716417911e-05, + "loss": 0.0033, + "step": 45836 + }, + { + "epoch": 42.76, + "learning_rate": 4.465485074626866e-05, + "loss": 0.0, + "step": 45840 + }, + { + "epoch": 42.76, + "learning_rate": 4.465438432835821e-05, + "loss": 0.0001, + "step": 45844 + }, + { + "epoch": 42.77, + "learning_rate": 4.4653917910447766e-05, + "loss": 0.0, + "step": 45848 + }, + { + "epoch": 42.77, + "learning_rate": 4.465345149253732e-05, + "loss": 0.0007, + "step": 45852 + }, + { + "epoch": 42.78, + "learning_rate": 4.465298507462687e-05, + "loss": 0.0, + "step": 45856 + }, + { + "epoch": 42.78, + "learning_rate": 4.465251865671642e-05, + "loss": 0.0, + "step": 45860 + }, + { + "epoch": 42.78, + "learning_rate": 4.465205223880597e-05, + "loss": 0.0, + "step": 45864 + }, + { + "epoch": 42.79, + "learning_rate": 4.465158582089553e-05, + "loss": 0.0, + "step": 45868 + }, + { + "epoch": 42.79, + "learning_rate": 4.4651119402985075e-05, + "loss": 0.0001, + "step": 45872 + }, + { + "epoch": 42.79, + "learning_rate": 4.465065298507463e-05, + "loss": 0.0013, + "step": 45876 + }, + { + "epoch": 42.8, + "learning_rate": 4.4650186567164185e-05, + "loss": 0.0, + "step": 45880 + }, + { + "epoch": 42.8, + "learning_rate": 4.464972014925373e-05, + "loss": 0.0, + "step": 45884 + }, + { + "epoch": 42.81, + "learning_rate": 4.464925373134329e-05, + "loss": 0.0, + "step": 45888 + }, + { + "epoch": 42.81, + "learning_rate": 4.4648787313432836e-05, + "loss": 0.0005, + "step": 45892 + }, + { + "epoch": 42.81, + "learning_rate": 4.464832089552239e-05, + "loss": 0.0, + "step": 45896 + }, + { + "epoch": 42.82, + "learning_rate": 4.4647854477611946e-05, + "loss": 0.0001, + "step": 45900 + }, + { + "epoch": 42.82, + "learning_rate": 4.4647388059701494e-05, + "loss": 0.0, + "step": 45904 + }, + { + "epoch": 42.82, + "learning_rate": 4.464692164179104e-05, + "loss": 0.0105, + "step": 45908 + }, + { + "epoch": 42.83, + "learning_rate": 4.4646455223880603e-05, + "loss": 0.0007, + "step": 45912 + }, + { + "epoch": 42.83, + "learning_rate": 4.464598880597015e-05, + "loss": 0.0002, + "step": 45916 + }, + { + "epoch": 42.84, + "learning_rate": 4.46455223880597e-05, + "loss": 0.0, + "step": 45920 + }, + { + "epoch": 42.84, + "learning_rate": 4.4645055970149255e-05, + "loss": 0.0043, + "step": 45924 + }, + { + "epoch": 42.84, + "learning_rate": 4.464458955223881e-05, + "loss": 0.0, + "step": 45928 + }, + { + "epoch": 42.85, + "learning_rate": 4.464412313432836e-05, + "loss": 0.0002, + "step": 45932 + }, + { + "epoch": 42.85, + "learning_rate": 4.464365671641791e-05, + "loss": 0.0033, + "step": 45936 + }, + { + "epoch": 42.85, + "learning_rate": 4.464319029850747e-05, + "loss": 0.0001, + "step": 45940 + }, + { + "epoch": 42.86, + "learning_rate": 4.4642723880597016e-05, + "loss": 0.0001, + "step": 45944 + }, + { + "epoch": 42.86, + "learning_rate": 4.464225746268657e-05, + "loss": 0.0, + "step": 45948 + }, + { + "epoch": 42.87, + "learning_rate": 4.464179104477612e-05, + "loss": 0.0024, + "step": 45952 + }, + { + "epoch": 42.87, + "learning_rate": 4.4641324626865674e-05, + "loss": 0.0008, + "step": 45956 + }, + { + "epoch": 42.87, + "learning_rate": 4.464085820895523e-05, + "loss": 0.0011, + "step": 45960 + }, + { + "epoch": 42.88, + "learning_rate": 4.464039179104478e-05, + "loss": 0.0015, + "step": 45964 + }, + { + "epoch": 42.88, + "learning_rate": 4.4639925373134325e-05, + "loss": 0.0, + "step": 45968 + }, + { + "epoch": 42.88, + "learning_rate": 4.4639458955223886e-05, + "loss": 0.0001, + "step": 45972 + }, + { + "epoch": 42.89, + "learning_rate": 4.4638992537313435e-05, + "loss": 0.0009, + "step": 45976 + }, + { + "epoch": 42.89, + "learning_rate": 4.463852611940298e-05, + "loss": 0.0063, + "step": 45980 + }, + { + "epoch": 42.9, + "learning_rate": 4.463805970149254e-05, + "loss": 0.0, + "step": 45984 + }, + { + "epoch": 42.9, + "learning_rate": 4.463759328358209e-05, + "loss": 0.0006, + "step": 45988 + }, + { + "epoch": 42.9, + "learning_rate": 4.463712686567164e-05, + "loss": 0.0001, + "step": 45992 + }, + { + "epoch": 42.91, + "learning_rate": 4.4636660447761196e-05, + "loss": 0.0009, + "step": 45996 + }, + { + "epoch": 42.91, + "learning_rate": 4.463619402985075e-05, + "loss": 0.0002, + "step": 46000 + }, + { + "epoch": 42.91, + "eval_exact_match": 0.7340425531914894, + "eval_exec": 0.7678916827852998, + "eval_loss": 0.4125958979129791, + "eval_runtime": 1108.5153, + "eval_samples_per_second": 0.933, + "step": 46000 + }, + { + "epoch": 42.91, + "learning_rate": 4.4635727611940305e-05, + "loss": 0.0128, + "step": 46004 + }, + { + "epoch": 42.92, + "learning_rate": 4.4635261194029854e-05, + "loss": 0.0002, + "step": 46008 + }, + { + "epoch": 42.92, + "learning_rate": 4.46347947761194e-05, + "loss": 0.0003, + "step": 46012 + }, + { + "epoch": 42.93, + "learning_rate": 4.463432835820896e-05, + "loss": 0.0, + "step": 46016 + }, + { + "epoch": 42.93, + "learning_rate": 4.463386194029851e-05, + "loss": 0.0, + "step": 46020 + }, + { + "epoch": 42.93, + "learning_rate": 4.463339552238806e-05, + "loss": 0.0002, + "step": 46024 + }, + { + "epoch": 42.94, + "learning_rate": 4.4632929104477614e-05, + "loss": 0.0, + "step": 46028 + }, + { + "epoch": 42.94, + "learning_rate": 4.463246268656717e-05, + "loss": 0.0, + "step": 46032 + }, + { + "epoch": 42.94, + "learning_rate": 4.463199626865672e-05, + "loss": 0.0019, + "step": 46036 + }, + { + "epoch": 42.95, + "learning_rate": 4.463152985074627e-05, + "loss": 0.0003, + "step": 46040 + }, + { + "epoch": 42.95, + "learning_rate": 4.463106343283582e-05, + "loss": 0.0, + "step": 46044 + }, + { + "epoch": 42.96, + "learning_rate": 4.4630597014925375e-05, + "loss": 0.0, + "step": 46048 + }, + { + "epoch": 42.96, + "learning_rate": 4.463013059701493e-05, + "loss": 0.0001, + "step": 46052 + }, + { + "epoch": 42.96, + "learning_rate": 4.462966417910448e-05, + "loss": 0.0005, + "step": 46056 + }, + { + "epoch": 42.97, + "learning_rate": 4.4629197761194033e-05, + "loss": 0.0, + "step": 46060 + }, + { + "epoch": 42.97, + "learning_rate": 4.462873134328359e-05, + "loss": 0.0011, + "step": 46064 + }, + { + "epoch": 42.97, + "learning_rate": 4.4628264925373136e-05, + "loss": 0.0, + "step": 46068 + }, + { + "epoch": 42.98, + "learning_rate": 4.4627798507462685e-05, + "loss": 0.0, + "step": 46072 + }, + { + "epoch": 42.98, + "learning_rate": 4.462733208955224e-05, + "loss": 0.0008, + "step": 46076 + }, + { + "epoch": 42.98, + "learning_rate": 4.4626865671641794e-05, + "loss": 0.0051, + "step": 46080 + }, + { + "epoch": 42.99, + "learning_rate": 4.462639925373134e-05, + "loss": 0.0002, + "step": 46084 + }, + { + "epoch": 42.99, + "learning_rate": 4.46259328358209e-05, + "loss": 0.0, + "step": 46088 + }, + { + "epoch": 43.0, + "learning_rate": 4.462546641791045e-05, + "loss": 0.0, + "step": 46092 + }, + { + "epoch": 43.0, + "learning_rate": 4.4625e-05, + "loss": 0.0002, + "step": 46096 + }, + { + "epoch": 43.0, + "learning_rate": 4.4624533582089555e-05, + "loss": 0.0, + "step": 46100 + }, + { + "epoch": 43.01, + "learning_rate": 4.4624067164179104e-05, + "loss": 0.007, + "step": 46104 + }, + { + "epoch": 43.01, + "learning_rate": 4.462360074626866e-05, + "loss": 0.0003, + "step": 46108 + }, + { + "epoch": 43.01, + "learning_rate": 4.462313432835821e-05, + "loss": 0.0023, + "step": 46112 + }, + { + "epoch": 43.02, + "learning_rate": 4.462266791044776e-05, + "loss": 0.0001, + "step": 46116 + }, + { + "epoch": 43.02, + "learning_rate": 4.4622201492537316e-05, + "loss": 0.0003, + "step": 46120 + }, + { + "epoch": 43.03, + "learning_rate": 4.462173507462687e-05, + "loss": 0.0001, + "step": 46124 + }, + { + "epoch": 43.03, + "learning_rate": 4.462126865671642e-05, + "loss": 0.0001, + "step": 46128 + }, + { + "epoch": 43.03, + "learning_rate": 4.462080223880597e-05, + "loss": 0.0006, + "step": 46132 + }, + { + "epoch": 43.04, + "learning_rate": 4.462033582089552e-05, + "loss": 0.0, + "step": 46136 + }, + { + "epoch": 43.04, + "learning_rate": 4.461986940298508e-05, + "loss": 0.0001, + "step": 46140 + }, + { + "epoch": 43.04, + "learning_rate": 4.4619402985074626e-05, + "loss": 0.0, + "step": 46144 + }, + { + "epoch": 43.05, + "learning_rate": 4.461893656716418e-05, + "loss": 0.0, + "step": 46148 + }, + { + "epoch": 43.05, + "learning_rate": 4.4618470149253735e-05, + "loss": 0.0001, + "step": 46152 + }, + { + "epoch": 43.06, + "learning_rate": 4.4618003731343283e-05, + "loss": 0.0, + "step": 46156 + }, + { + "epoch": 43.06, + "learning_rate": 4.461753731343284e-05, + "loss": 0.0, + "step": 46160 + }, + { + "epoch": 43.06, + "learning_rate": 4.4617070895522386e-05, + "loss": 0.0, + "step": 46164 + }, + { + "epoch": 43.07, + "learning_rate": 4.461660447761195e-05, + "loss": 0.0, + "step": 46168 + }, + { + "epoch": 43.07, + "learning_rate": 4.4616138059701496e-05, + "loss": 0.0, + "step": 46172 + }, + { + "epoch": 43.07, + "learning_rate": 4.4615671641791044e-05, + "loss": 0.0001, + "step": 46176 + }, + { + "epoch": 43.08, + "learning_rate": 4.46152052238806e-05, + "loss": 0.0003, + "step": 46180 + }, + { + "epoch": 43.08, + "learning_rate": 4.4614738805970154e-05, + "loss": 0.0001, + "step": 46184 + }, + { + "epoch": 43.09, + "learning_rate": 4.46142723880597e-05, + "loss": 0.0, + "step": 46188 + }, + { + "epoch": 43.09, + "learning_rate": 4.461380597014926e-05, + "loss": 0.0, + "step": 46192 + }, + { + "epoch": 43.09, + "learning_rate": 4.4613339552238805e-05, + "loss": 0.0001, + "step": 46196 + }, + { + "epoch": 43.1, + "learning_rate": 4.461287313432836e-05, + "loss": 0.0, + "step": 46200 + }, + { + "epoch": 43.1, + "learning_rate": 4.4612406716417915e-05, + "loss": 0.0, + "step": 46204 + }, + { + "epoch": 43.1, + "learning_rate": 4.461194029850746e-05, + "loss": 0.0002, + "step": 46208 + }, + { + "epoch": 43.11, + "learning_rate": 4.461147388059702e-05, + "loss": 0.0, + "step": 46212 + }, + { + "epoch": 43.11, + "learning_rate": 4.461100746268657e-05, + "loss": 0.0012, + "step": 46216 + }, + { + "epoch": 43.12, + "learning_rate": 4.461054104477612e-05, + "loss": 0.0009, + "step": 46220 + }, + { + "epoch": 43.12, + "learning_rate": 4.461007462686567e-05, + "loss": 0.0003, + "step": 46224 + }, + { + "epoch": 43.12, + "learning_rate": 4.460960820895523e-05, + "loss": 0.0, + "step": 46228 + }, + { + "epoch": 43.13, + "learning_rate": 4.460914179104478e-05, + "loss": 0.0, + "step": 46232 + }, + { + "epoch": 43.13, + "learning_rate": 4.460867537313433e-05, + "loss": 0.0001, + "step": 46236 + }, + { + "epoch": 43.13, + "learning_rate": 4.460820895522388e-05, + "loss": 0.0, + "step": 46240 + }, + { + "epoch": 43.14, + "learning_rate": 4.460774253731344e-05, + "loss": 0.0, + "step": 46244 + }, + { + "epoch": 43.14, + "learning_rate": 4.4607276119402985e-05, + "loss": 0.0, + "step": 46248 + }, + { + "epoch": 43.15, + "learning_rate": 4.460680970149254e-05, + "loss": 0.0, + "step": 46252 + }, + { + "epoch": 43.15, + "learning_rate": 4.460634328358209e-05, + "loss": 0.0, + "step": 46256 + }, + { + "epoch": 43.15, + "learning_rate": 4.460587686567164e-05, + "loss": 0.0, + "step": 46260 + }, + { + "epoch": 43.16, + "learning_rate": 4.46054104477612e-05, + "loss": 0.0001, + "step": 46264 + }, + { + "epoch": 43.16, + "learning_rate": 4.4604944029850746e-05, + "loss": 0.0, + "step": 46268 + }, + { + "epoch": 43.16, + "learning_rate": 4.46044776119403e-05, + "loss": 0.0077, + "step": 46272 + }, + { + "epoch": 43.17, + "learning_rate": 4.4604011194029856e-05, + "loss": 0.0, + "step": 46276 + }, + { + "epoch": 43.17, + "learning_rate": 4.4603544776119404e-05, + "loss": 0.0, + "step": 46280 + }, + { + "epoch": 43.18, + "learning_rate": 4.460307835820895e-05, + "loss": 0.0, + "step": 46284 + }, + { + "epoch": 43.18, + "learning_rate": 4.4602611940298514e-05, + "loss": 0.0001, + "step": 46288 + }, + { + "epoch": 43.18, + "learning_rate": 4.460214552238806e-05, + "loss": 0.0015, + "step": 46292 + }, + { + "epoch": 43.19, + "learning_rate": 4.460167910447761e-05, + "loss": 0.0, + "step": 46296 + }, + { + "epoch": 43.19, + "learning_rate": 4.4601212686567165e-05, + "loss": 0.0, + "step": 46300 + }, + { + "epoch": 43.19, + "learning_rate": 4.460074626865672e-05, + "loss": 0.0015, + "step": 46304 + }, + { + "epoch": 43.2, + "learning_rate": 4.460027985074627e-05, + "loss": 0.0, + "step": 46308 + }, + { + "epoch": 43.2, + "learning_rate": 4.459981343283582e-05, + "loss": 0.0, + "step": 46312 + }, + { + "epoch": 43.21, + "learning_rate": 4.459934701492537e-05, + "loss": 0.0002, + "step": 46316 + }, + { + "epoch": 43.21, + "learning_rate": 4.4598880597014926e-05, + "loss": 0.0, + "step": 46320 + }, + { + "epoch": 43.21, + "learning_rate": 4.459841417910448e-05, + "loss": 0.0046, + "step": 46324 + }, + { + "epoch": 43.22, + "learning_rate": 4.459794776119403e-05, + "loss": 0.0, + "step": 46328 + }, + { + "epoch": 43.22, + "learning_rate": 4.4597481343283584e-05, + "loss": 0.0005, + "step": 46332 + }, + { + "epoch": 43.22, + "learning_rate": 4.459701492537314e-05, + "loss": 0.0001, + "step": 46336 + }, + { + "epoch": 43.23, + "learning_rate": 4.459654850746269e-05, + "loss": 0.0002, + "step": 46340 + }, + { + "epoch": 43.23, + "learning_rate": 4.459608208955224e-05, + "loss": 0.0006, + "step": 46344 + }, + { + "epoch": 43.24, + "learning_rate": 4.45956156716418e-05, + "loss": 0.0006, + "step": 46348 + }, + { + "epoch": 43.24, + "learning_rate": 4.4595149253731345e-05, + "loss": 0.0003, + "step": 46352 + }, + { + "epoch": 43.24, + "learning_rate": 4.45946828358209e-05, + "loss": 0.0, + "step": 46356 + }, + { + "epoch": 43.25, + "learning_rate": 4.459421641791045e-05, + "loss": 0.0003, + "step": 46360 + }, + { + "epoch": 43.25, + "learning_rate": 4.459375e-05, + "loss": 0.0005, + "step": 46364 + }, + { + "epoch": 43.25, + "learning_rate": 4.459328358208956e-05, + "loss": 0.0, + "step": 46368 + }, + { + "epoch": 43.26, + "learning_rate": 4.4592817164179106e-05, + "loss": 0.0005, + "step": 46372 + }, + { + "epoch": 43.26, + "learning_rate": 4.4592350746268654e-05, + "loss": 0.0, + "step": 46376 + }, + { + "epoch": 43.26, + "learning_rate": 4.4591884328358216e-05, + "loss": 0.0, + "step": 46380 + }, + { + "epoch": 43.27, + "learning_rate": 4.4591417910447764e-05, + "loss": 0.0, + "step": 46384 + }, + { + "epoch": 43.27, + "learning_rate": 4.459095149253731e-05, + "loss": 0.0001, + "step": 46388 + }, + { + "epoch": 43.28, + "learning_rate": 4.459048507462687e-05, + "loss": 0.0, + "step": 46392 + }, + { + "epoch": 43.28, + "learning_rate": 4.459001865671642e-05, + "loss": 0.001, + "step": 46396 + }, + { + "epoch": 43.28, + "learning_rate": 4.458955223880597e-05, + "loss": 0.0014, + "step": 46400 + }, + { + "epoch": 43.29, + "learning_rate": 4.4589085820895525e-05, + "loss": 0.0008, + "step": 46404 + }, + { + "epoch": 43.29, + "learning_rate": 4.458861940298508e-05, + "loss": 0.0, + "step": 46408 + }, + { + "epoch": 43.29, + "learning_rate": 4.458815298507463e-05, + "loss": 0.0, + "step": 46412 + }, + { + "epoch": 43.3, + "learning_rate": 4.458768656716418e-05, + "loss": 0.0011, + "step": 46416 + }, + { + "epoch": 43.3, + "learning_rate": 4.458722014925373e-05, + "loss": 0.0, + "step": 46420 + }, + { + "epoch": 43.31, + "learning_rate": 4.4586753731343286e-05, + "loss": 0.0, + "step": 46424 + }, + { + "epoch": 43.31, + "learning_rate": 4.458628731343284e-05, + "loss": 0.0001, + "step": 46428 + }, + { + "epoch": 43.31, + "learning_rate": 4.458582089552239e-05, + "loss": 0.0, + "step": 46432 + }, + { + "epoch": 43.32, + "learning_rate": 4.458535447761194e-05, + "loss": 0.0, + "step": 46436 + }, + { + "epoch": 43.32, + "learning_rate": 4.45848880597015e-05, + "loss": 0.0009, + "step": 46440 + }, + { + "epoch": 43.32, + "learning_rate": 4.458442164179105e-05, + "loss": 0.0002, + "step": 46444 + }, + { + "epoch": 43.33, + "learning_rate": 4.4583955223880595e-05, + "loss": 0.0, + "step": 46448 + }, + { + "epoch": 43.33, + "learning_rate": 4.458348880597015e-05, + "loss": 0.0021, + "step": 46452 + }, + { + "epoch": 43.34, + "learning_rate": 4.4583022388059705e-05, + "loss": 0.0, + "step": 46456 + }, + { + "epoch": 43.34, + "learning_rate": 4.458255597014925e-05, + "loss": 0.0012, + "step": 46460 + }, + { + "epoch": 43.34, + "learning_rate": 4.458208955223881e-05, + "loss": 0.0027, + "step": 46464 + }, + { + "epoch": 43.35, + "learning_rate": 4.458162313432836e-05, + "loss": 0.0, + "step": 46468 + }, + { + "epoch": 43.35, + "learning_rate": 4.458115671641791e-05, + "loss": 0.0001, + "step": 46472 + }, + { + "epoch": 43.35, + "learning_rate": 4.4580690298507466e-05, + "loss": 0.0002, + "step": 46476 + }, + { + "epoch": 43.36, + "learning_rate": 4.4580223880597014e-05, + "loss": 0.0002, + "step": 46480 + }, + { + "epoch": 43.36, + "learning_rate": 4.457975746268657e-05, + "loss": 0.0007, + "step": 46484 + }, + { + "epoch": 43.37, + "learning_rate": 4.4579291044776124e-05, + "loss": 0.0002, + "step": 46488 + }, + { + "epoch": 43.37, + "learning_rate": 4.457882462686567e-05, + "loss": 0.0009, + "step": 46492 + }, + { + "epoch": 43.37, + "learning_rate": 4.457835820895523e-05, + "loss": 0.0063, + "step": 46496 + }, + { + "epoch": 43.38, + "learning_rate": 4.457789179104478e-05, + "loss": 0.0004, + "step": 46500 + }, + { + "epoch": 43.38, + "eval_exact_match": 0.7263056092843327, + "eval_exec": 0.758220502901354, + "eval_loss": 0.437593549489975, + "eval_runtime": 1140.2178, + "eval_samples_per_second": 0.907, + "step": 46500 + }, + { + "epoch": 43.38, + "learning_rate": 4.457742537313433e-05, + "loss": 0.0002, + "step": 46504 + }, + { + "epoch": 43.38, + "learning_rate": 4.4576958955223885e-05, + "loss": 0.0, + "step": 46508 + }, + { + "epoch": 43.39, + "learning_rate": 4.457649253731343e-05, + "loss": 0.0, + "step": 46512 + }, + { + "epoch": 43.39, + "learning_rate": 4.457602611940299e-05, + "loss": 0.0004, + "step": 46516 + }, + { + "epoch": 43.4, + "learning_rate": 4.457555970149254e-05, + "loss": 0.0001, + "step": 46520 + }, + { + "epoch": 43.4, + "learning_rate": 4.457509328358209e-05, + "loss": 0.0053, + "step": 46524 + }, + { + "epoch": 43.4, + "learning_rate": 4.4574626865671646e-05, + "loss": 0.0002, + "step": 46528 + }, + { + "epoch": 43.41, + "learning_rate": 4.45741604477612e-05, + "loss": 0.0, + "step": 46532 + }, + { + "epoch": 43.41, + "learning_rate": 4.457369402985075e-05, + "loss": 0.0007, + "step": 46536 + }, + { + "epoch": 43.41, + "learning_rate": 4.45732276119403e-05, + "loss": 0.0001, + "step": 46540 + }, + { + "epoch": 43.42, + "learning_rate": 4.457276119402985e-05, + "loss": 0.0001, + "step": 46544 + }, + { + "epoch": 43.42, + "learning_rate": 4.457229477611941e-05, + "loss": 0.0, + "step": 46548 + }, + { + "epoch": 43.43, + "learning_rate": 4.4571828358208955e-05, + "loss": 0.0, + "step": 46552 + }, + { + "epoch": 43.43, + "learning_rate": 4.457136194029851e-05, + "loss": 0.0051, + "step": 46556 + }, + { + "epoch": 43.43, + "learning_rate": 4.4570895522388065e-05, + "loss": 0.0003, + "step": 46560 + }, + { + "epoch": 43.44, + "learning_rate": 4.457042910447761e-05, + "loss": 0.0001, + "step": 46564 + }, + { + "epoch": 43.44, + "learning_rate": 4.456996268656717e-05, + "loss": 0.0003, + "step": 46568 + }, + { + "epoch": 43.44, + "learning_rate": 4.4569496268656716e-05, + "loss": 0.0001, + "step": 46572 + }, + { + "epoch": 43.45, + "learning_rate": 4.456902985074627e-05, + "loss": 0.0, + "step": 46576 + }, + { + "epoch": 43.45, + "learning_rate": 4.4568563432835826e-05, + "loss": 0.0, + "step": 46580 + }, + { + "epoch": 43.46, + "learning_rate": 4.4568097014925374e-05, + "loss": 0.0002, + "step": 46584 + }, + { + "epoch": 43.46, + "learning_rate": 4.456763059701493e-05, + "loss": 0.0, + "step": 46588 + }, + { + "epoch": 43.46, + "learning_rate": 4.4567164179104484e-05, + "loss": 0.0, + "step": 46592 + }, + { + "epoch": 43.47, + "learning_rate": 4.456669776119403e-05, + "loss": 0.0, + "step": 46596 + }, + { + "epoch": 43.47, + "learning_rate": 4.456623134328358e-05, + "loss": 0.0, + "step": 46600 + }, + { + "epoch": 43.47, + "learning_rate": 4.4565764925373135e-05, + "loss": 0.0, + "step": 46604 + }, + { + "epoch": 43.48, + "learning_rate": 4.456529850746269e-05, + "loss": 0.0005, + "step": 46608 + }, + { + "epoch": 43.48, + "learning_rate": 4.456483208955224e-05, + "loss": 0.0, + "step": 46612 + }, + { + "epoch": 43.49, + "learning_rate": 4.456436567164179e-05, + "loss": 0.0, + "step": 46616 + }, + { + "epoch": 43.49, + "learning_rate": 4.456389925373135e-05, + "loss": 0.0001, + "step": 46620 + }, + { + "epoch": 43.49, + "learning_rate": 4.4563432835820896e-05, + "loss": 0.0, + "step": 46624 + }, + { + "epoch": 43.5, + "learning_rate": 4.456296641791045e-05, + "loss": 0.0015, + "step": 46628 + }, + { + "epoch": 43.5, + "learning_rate": 4.45625e-05, + "loss": 0.0, + "step": 46632 + }, + { + "epoch": 43.5, + "learning_rate": 4.4562033582089554e-05, + "loss": 0.0, + "step": 46636 + }, + { + "epoch": 43.51, + "learning_rate": 4.456156716417911e-05, + "loss": 0.0002, + "step": 46640 + }, + { + "epoch": 43.51, + "learning_rate": 4.456110074626866e-05, + "loss": 0.0, + "step": 46644 + }, + { + "epoch": 43.51, + "learning_rate": 4.4560634328358205e-05, + "loss": 0.0002, + "step": 46648 + }, + { + "epoch": 43.52, + "learning_rate": 4.456016791044777e-05, + "loss": 0.0001, + "step": 46652 + }, + { + "epoch": 43.52, + "learning_rate": 4.4559701492537315e-05, + "loss": 0.0001, + "step": 46656 + }, + { + "epoch": 43.53, + "learning_rate": 4.455923507462687e-05, + "loss": 0.0, + "step": 46660 + }, + { + "epoch": 43.53, + "learning_rate": 4.455876865671642e-05, + "loss": 0.0002, + "step": 46664 + }, + { + "epoch": 43.53, + "learning_rate": 4.455830223880597e-05, + "loss": 0.0, + "step": 46668 + }, + { + "epoch": 43.54, + "learning_rate": 4.455783582089553e-05, + "loss": 0.0003, + "step": 46672 + }, + { + "epoch": 43.54, + "learning_rate": 4.4557369402985076e-05, + "loss": 0.0001, + "step": 46676 + }, + { + "epoch": 43.54, + "learning_rate": 4.455690298507463e-05, + "loss": 0.0001, + "step": 46680 + }, + { + "epoch": 43.55, + "learning_rate": 4.4556436567164186e-05, + "loss": 0.0, + "step": 46684 + }, + { + "epoch": 43.55, + "learning_rate": 4.4555970149253734e-05, + "loss": 0.0, + "step": 46688 + }, + { + "epoch": 43.56, + "learning_rate": 4.455550373134328e-05, + "loss": 0.0007, + "step": 46692 + }, + { + "epoch": 43.56, + "learning_rate": 4.4555037313432844e-05, + "loss": 0.0042, + "step": 46696 + }, + { + "epoch": 43.56, + "learning_rate": 4.455457089552239e-05, + "loss": 0.0004, + "step": 46700 + }, + { + "epoch": 43.57, + "learning_rate": 4.455410447761194e-05, + "loss": 0.0001, + "step": 46704 + }, + { + "epoch": 43.57, + "learning_rate": 4.4553638059701495e-05, + "loss": 0.0, + "step": 46708 + }, + { + "epoch": 43.57, + "learning_rate": 4.455317164179105e-05, + "loss": 0.0002, + "step": 46712 + }, + { + "epoch": 43.58, + "learning_rate": 4.45527052238806e-05, + "loss": 0.0013, + "step": 46716 + }, + { + "epoch": 43.58, + "learning_rate": 4.455223880597015e-05, + "loss": 0.0001, + "step": 46720 + }, + { + "epoch": 43.59, + "learning_rate": 4.45517723880597e-05, + "loss": 0.0, + "step": 46724 + }, + { + "epoch": 43.59, + "learning_rate": 4.4551305970149256e-05, + "loss": 0.0, + "step": 46728 + }, + { + "epoch": 43.59, + "learning_rate": 4.455083955223881e-05, + "loss": 0.0, + "step": 46732 + }, + { + "epoch": 43.6, + "learning_rate": 4.455037313432836e-05, + "loss": 0.0001, + "step": 46736 + }, + { + "epoch": 43.6, + "learning_rate": 4.4549906716417914e-05, + "loss": 0.0001, + "step": 46740 + }, + { + "epoch": 43.6, + "learning_rate": 4.454944029850747e-05, + "loss": 0.0, + "step": 46744 + }, + { + "epoch": 43.61, + "learning_rate": 4.454897388059702e-05, + "loss": 0.0, + "step": 46748 + }, + { + "epoch": 43.61, + "learning_rate": 4.4548507462686565e-05, + "loss": 0.0, + "step": 46752 + }, + { + "epoch": 43.62, + "learning_rate": 4.454804104477612e-05, + "loss": 0.0001, + "step": 46756 + }, + { + "epoch": 43.62, + "learning_rate": 4.4547574626865675e-05, + "loss": 0.0, + "step": 46760 + }, + { + "epoch": 43.62, + "learning_rate": 4.454710820895522e-05, + "loss": 0.0, + "step": 46764 + }, + { + "epoch": 43.63, + "learning_rate": 4.454664179104478e-05, + "loss": 0.0, + "step": 46768 + }, + { + "epoch": 43.63, + "learning_rate": 4.454617537313433e-05, + "loss": 0.0, + "step": 46772 + }, + { + "epoch": 43.63, + "learning_rate": 4.454570895522388e-05, + "loss": 0.0004, + "step": 46776 + }, + { + "epoch": 43.64, + "learning_rate": 4.4545242537313436e-05, + "loss": 0.0009, + "step": 46780 + }, + { + "epoch": 43.64, + "learning_rate": 4.4544776119402984e-05, + "loss": 0.0, + "step": 46784 + }, + { + "epoch": 43.65, + "learning_rate": 4.454430970149254e-05, + "loss": 0.0, + "step": 46788 + }, + { + "epoch": 43.65, + "learning_rate": 4.4543843283582094e-05, + "loss": 0.0025, + "step": 46792 + }, + { + "epoch": 43.65, + "learning_rate": 4.454337686567164e-05, + "loss": 0.0, + "step": 46796 + }, + { + "epoch": 43.66, + "learning_rate": 4.45429104477612e-05, + "loss": 0.002, + "step": 46800 + }, + { + "epoch": 43.66, + "learning_rate": 4.454244402985075e-05, + "loss": 0.0, + "step": 46804 + }, + { + "epoch": 43.66, + "learning_rate": 4.45419776119403e-05, + "loss": 0.0002, + "step": 46808 + }, + { + "epoch": 43.67, + "learning_rate": 4.454151119402985e-05, + "loss": 0.0, + "step": 46812 + }, + { + "epoch": 43.67, + "learning_rate": 4.45410447761194e-05, + "loss": 0.0014, + "step": 46816 + }, + { + "epoch": 43.68, + "learning_rate": 4.454057835820896e-05, + "loss": 0.0003, + "step": 46820 + }, + { + "epoch": 43.68, + "learning_rate": 4.454011194029851e-05, + "loss": 0.0001, + "step": 46824 + }, + { + "epoch": 43.68, + "learning_rate": 4.453964552238806e-05, + "loss": 0.0, + "step": 46828 + }, + { + "epoch": 43.69, + "learning_rate": 4.4539179104477616e-05, + "loss": 0.0008, + "step": 46832 + }, + { + "epoch": 43.69, + "learning_rate": 4.453871268656717e-05, + "loss": 0.0144, + "step": 46836 + }, + { + "epoch": 43.69, + "learning_rate": 4.453824626865672e-05, + "loss": 0.0022, + "step": 46840 + }, + { + "epoch": 43.7, + "learning_rate": 4.453777985074627e-05, + "loss": 0.0, + "step": 46844 + }, + { + "epoch": 43.7, + "learning_rate": 4.453731343283583e-05, + "loss": 0.0001, + "step": 46848 + }, + { + "epoch": 43.71, + "learning_rate": 4.4536847014925377e-05, + "loss": 0.0, + "step": 46852 + }, + { + "epoch": 43.71, + "learning_rate": 4.4536380597014925e-05, + "loss": 0.0012, + "step": 46856 + }, + { + "epoch": 43.71, + "learning_rate": 4.453591417910448e-05, + "loss": 0.0, + "step": 46860 + }, + { + "epoch": 43.72, + "learning_rate": 4.4535447761194035e-05, + "loss": 0.0001, + "step": 46864 + }, + { + "epoch": 43.72, + "learning_rate": 4.453498134328358e-05, + "loss": 0.0003, + "step": 46868 + }, + { + "epoch": 43.72, + "learning_rate": 4.453451492537314e-05, + "loss": 0.0, + "step": 46872 + }, + { + "epoch": 43.73, + "learning_rate": 4.4534048507462686e-05, + "loss": 0.0012, + "step": 46876 + }, + { + "epoch": 43.73, + "learning_rate": 4.453358208955224e-05, + "loss": 0.0, + "step": 46880 + }, + { + "epoch": 43.73, + "learning_rate": 4.4533115671641796e-05, + "loss": 0.0, + "step": 46884 + }, + { + "epoch": 43.74, + "learning_rate": 4.4532649253731344e-05, + "loss": 0.0, + "step": 46888 + }, + { + "epoch": 43.74, + "learning_rate": 4.45321828358209e-05, + "loss": 0.0001, + "step": 46892 + }, + { + "epoch": 43.75, + "learning_rate": 4.4531716417910453e-05, + "loss": 0.0, + "step": 46896 + }, + { + "epoch": 43.75, + "learning_rate": 4.453125e-05, + "loss": 0.0226, + "step": 46900 + }, + { + "epoch": 43.75, + "learning_rate": 4.453078358208955e-05, + "loss": 0.0001, + "step": 46904 + }, + { + "epoch": 43.76, + "learning_rate": 4.453031716417911e-05, + "loss": 0.0018, + "step": 46908 + }, + { + "epoch": 43.76, + "learning_rate": 4.452985074626866e-05, + "loss": 0.0001, + "step": 46912 + }, + { + "epoch": 43.76, + "learning_rate": 4.452938432835821e-05, + "loss": 0.0017, + "step": 46916 + }, + { + "epoch": 43.77, + "learning_rate": 4.452891791044776e-05, + "loss": 0.0, + "step": 46920 + }, + { + "epoch": 43.77, + "learning_rate": 4.452845149253732e-05, + "loss": 0.0002, + "step": 46924 + }, + { + "epoch": 43.78, + "learning_rate": 4.4527985074626866e-05, + "loss": 0.0, + "step": 46928 + }, + { + "epoch": 43.78, + "learning_rate": 4.452751865671642e-05, + "loss": 0.0, + "step": 46932 + }, + { + "epoch": 43.78, + "learning_rate": 4.452705223880597e-05, + "loss": 0.0019, + "step": 46936 + }, + { + "epoch": 43.79, + "learning_rate": 4.4526585820895524e-05, + "loss": 0.0, + "step": 46940 + }, + { + "epoch": 43.79, + "learning_rate": 4.452611940298508e-05, + "loss": 0.0003, + "step": 46944 + }, + { + "epoch": 43.79, + "learning_rate": 4.452565298507463e-05, + "loss": 0.0004, + "step": 46948 + }, + { + "epoch": 43.8, + "learning_rate": 4.452518656716418e-05, + "loss": 0.0, + "step": 46952 + }, + { + "epoch": 43.8, + "learning_rate": 4.4524720149253736e-05, + "loss": 0.0001, + "step": 46956 + }, + { + "epoch": 43.81, + "learning_rate": 4.4524253731343285e-05, + "loss": 0.0, + "step": 46960 + }, + { + "epoch": 43.81, + "learning_rate": 4.452378731343283e-05, + "loss": 0.0004, + "step": 46964 + }, + { + "epoch": 43.81, + "learning_rate": 4.4523320895522394e-05, + "loss": 0.0001, + "step": 46968 + }, + { + "epoch": 43.82, + "learning_rate": 4.452285447761194e-05, + "loss": 0.0, + "step": 46972 + }, + { + "epoch": 43.82, + "learning_rate": 4.452238805970149e-05, + "loss": 0.0, + "step": 46976 + }, + { + "epoch": 43.82, + "learning_rate": 4.4521921641791046e-05, + "loss": 0.0001, + "step": 46980 + }, + { + "epoch": 43.83, + "learning_rate": 4.45214552238806e-05, + "loss": 0.0084, + "step": 46984 + }, + { + "epoch": 43.83, + "learning_rate": 4.4520988805970155e-05, + "loss": 0.0007, + "step": 46988 + }, + { + "epoch": 43.84, + "learning_rate": 4.4520522388059703e-05, + "loss": 0.0, + "step": 46992 + }, + { + "epoch": 43.84, + "learning_rate": 4.452005597014925e-05, + "loss": 0.0, + "step": 46996 + }, + { + "epoch": 43.84, + "learning_rate": 4.451958955223881e-05, + "loss": 0.0, + "step": 47000 + }, + { + "epoch": 43.84, + "eval_exact_match": 0.7321083172147002, + "eval_exec": 0.7688588007736944, + "eval_loss": 0.4131454825401306, + "eval_runtime": 1115.6019, + "eval_samples_per_second": 0.927, + "step": 47000 + }, + { + "epoch": 43.85, + "learning_rate": 4.451912313432836e-05, + "loss": 0.0, + "step": 47004 + }, + { + "epoch": 43.85, + "learning_rate": 4.451865671641791e-05, + "loss": 0.0021, + "step": 47008 + }, + { + "epoch": 43.85, + "learning_rate": 4.4518190298507464e-05, + "loss": 0.002, + "step": 47012 + }, + { + "epoch": 43.86, + "learning_rate": 4.451772388059702e-05, + "loss": 0.0084, + "step": 47016 + }, + { + "epoch": 43.86, + "learning_rate": 4.451725746268657e-05, + "loss": 0.0002, + "step": 47020 + }, + { + "epoch": 43.87, + "learning_rate": 4.451679104477612e-05, + "loss": 0.0007, + "step": 47024 + }, + { + "epoch": 43.87, + "learning_rate": 4.451632462686568e-05, + "loss": 0.0007, + "step": 47028 + }, + { + "epoch": 43.87, + "learning_rate": 4.4515858208955225e-05, + "loss": 0.0001, + "step": 47032 + }, + { + "epoch": 43.88, + "learning_rate": 4.451539179104478e-05, + "loss": 0.0001, + "step": 47036 + }, + { + "epoch": 43.88, + "learning_rate": 4.451492537313433e-05, + "loss": 0.0, + "step": 47040 + }, + { + "epoch": 43.88, + "learning_rate": 4.4514458955223883e-05, + "loss": 0.0002, + "step": 47044 + }, + { + "epoch": 43.89, + "learning_rate": 4.451399253731344e-05, + "loss": 0.0051, + "step": 47048 + }, + { + "epoch": 43.89, + "learning_rate": 4.4513526119402986e-05, + "loss": 0.0004, + "step": 47052 + }, + { + "epoch": 43.9, + "learning_rate": 4.4513059701492535e-05, + "loss": 0.0001, + "step": 47056 + }, + { + "epoch": 43.9, + "learning_rate": 4.4512593283582096e-05, + "loss": 0.0, + "step": 47060 + }, + { + "epoch": 43.9, + "learning_rate": 4.4512126865671644e-05, + "loss": 0.0005, + "step": 47064 + }, + { + "epoch": 43.91, + "learning_rate": 4.451166044776119e-05, + "loss": 0.0, + "step": 47068 + }, + { + "epoch": 43.91, + "learning_rate": 4.451119402985075e-05, + "loss": 0.0033, + "step": 47072 + }, + { + "epoch": 43.91, + "learning_rate": 4.45107276119403e-05, + "loss": 0.0003, + "step": 47076 + }, + { + "epoch": 43.92, + "learning_rate": 4.451026119402985e-05, + "loss": 0.0016, + "step": 47080 + }, + { + "epoch": 43.92, + "learning_rate": 4.4509794776119405e-05, + "loss": 0.0, + "step": 47084 + }, + { + "epoch": 43.93, + "learning_rate": 4.450932835820896e-05, + "loss": 0.001, + "step": 47088 + }, + { + "epoch": 43.93, + "learning_rate": 4.450886194029851e-05, + "loss": 0.0, + "step": 47092 + }, + { + "epoch": 43.93, + "learning_rate": 4.450839552238806e-05, + "loss": 0.0015, + "step": 47096 + }, + { + "epoch": 43.94, + "learning_rate": 4.450792910447761e-05, + "loss": 0.0, + "step": 47100 + }, + { + "epoch": 43.94, + "learning_rate": 4.4507462686567166e-05, + "loss": 0.001, + "step": 47104 + }, + { + "epoch": 43.94, + "learning_rate": 4.450699626865672e-05, + "loss": 0.0, + "step": 47108 + }, + { + "epoch": 43.95, + "learning_rate": 4.450652985074627e-05, + "loss": 0.0012, + "step": 47112 + }, + { + "epoch": 43.95, + "learning_rate": 4.450606343283582e-05, + "loss": 0.0, + "step": 47116 + }, + { + "epoch": 43.96, + "learning_rate": 4.450559701492538e-05, + "loss": 0.0, + "step": 47120 + }, + { + "epoch": 43.96, + "learning_rate": 4.450513059701493e-05, + "loss": 0.0018, + "step": 47124 + }, + { + "epoch": 43.96, + "learning_rate": 4.4504664179104475e-05, + "loss": 0.0002, + "step": 47128 + }, + { + "epoch": 43.97, + "learning_rate": 4.450419776119403e-05, + "loss": 0.0, + "step": 47132 + }, + { + "epoch": 43.97, + "learning_rate": 4.4503731343283585e-05, + "loss": 0.001, + "step": 47136 + }, + { + "epoch": 43.97, + "learning_rate": 4.4503264925373133e-05, + "loss": 0.0003, + "step": 47140 + }, + { + "epoch": 43.98, + "learning_rate": 4.450279850746269e-05, + "loss": 0.0081, + "step": 47144 + }, + { + "epoch": 43.98, + "learning_rate": 4.450233208955224e-05, + "loss": 0.0001, + "step": 47148 + }, + { + "epoch": 43.98, + "learning_rate": 4.45018656716418e-05, + "loss": 0.0001, + "step": 47152 + }, + { + "epoch": 43.99, + "learning_rate": 4.4501399253731346e-05, + "loss": 0.0, + "step": 47156 + }, + { + "epoch": 43.99, + "learning_rate": 4.4500932835820894e-05, + "loss": 0.0002, + "step": 47160 + }, + { + "epoch": 44.0, + "learning_rate": 4.450046641791045e-05, + "loss": 0.0, + "step": 47164 + }, + { + "epoch": 44.0, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.0001, + "step": 47168 + }, + { + "epoch": 44.0, + "learning_rate": 4.449953358208955e-05, + "loss": 0.0001, + "step": 47172 + }, + { + "epoch": 44.01, + "learning_rate": 4.449906716417911e-05, + "loss": 0.0, + "step": 47176 + }, + { + "epoch": 44.01, + "learning_rate": 4.449860074626866e-05, + "loss": 0.0001, + "step": 47180 + }, + { + "epoch": 44.01, + "learning_rate": 4.449813432835821e-05, + "loss": 0.0017, + "step": 47184 + }, + { + "epoch": 44.02, + "learning_rate": 4.4497667910447765e-05, + "loss": 0.0003, + "step": 47188 + }, + { + "epoch": 44.02, + "learning_rate": 4.449720149253731e-05, + "loss": 0.0002, + "step": 47192 + }, + { + "epoch": 44.03, + "learning_rate": 4.449673507462687e-05, + "loss": 0.0, + "step": 47196 + }, + { + "epoch": 44.03, + "learning_rate": 4.449626865671642e-05, + "loss": 0.0003, + "step": 47200 + }, + { + "epoch": 44.03, + "learning_rate": 4.449580223880597e-05, + "loss": 0.0058, + "step": 47204 + }, + { + "epoch": 44.04, + "learning_rate": 4.4495335820895526e-05, + "loss": 0.0, + "step": 47208 + }, + { + "epoch": 44.04, + "learning_rate": 4.449486940298508e-05, + "loss": 0.0086, + "step": 47212 + }, + { + "epoch": 44.04, + "learning_rate": 4.449440298507463e-05, + "loss": 0.001, + "step": 47216 + }, + { + "epoch": 44.05, + "learning_rate": 4.449393656716418e-05, + "loss": 0.0016, + "step": 47220 + }, + { + "epoch": 44.05, + "learning_rate": 4.449347014925373e-05, + "loss": 0.0015, + "step": 47224 + }, + { + "epoch": 44.06, + "learning_rate": 4.449300373134329e-05, + "loss": 0.0006, + "step": 47228 + }, + { + "epoch": 44.06, + "learning_rate": 4.4492537313432835e-05, + "loss": 0.0002, + "step": 47232 + }, + { + "epoch": 44.06, + "learning_rate": 4.449207089552239e-05, + "loss": 0.0002, + "step": 47236 + }, + { + "epoch": 44.07, + "learning_rate": 4.4491604477611945e-05, + "loss": 0.0002, + "step": 47240 + }, + { + "epoch": 44.07, + "learning_rate": 4.449113805970149e-05, + "loss": 0.0001, + "step": 47244 + }, + { + "epoch": 44.07, + "learning_rate": 4.449067164179105e-05, + "loss": 0.0001, + "step": 47248 + }, + { + "epoch": 44.08, + "learning_rate": 4.4490205223880596e-05, + "loss": 0.002, + "step": 47252 + }, + { + "epoch": 44.08, + "learning_rate": 4.448973880597015e-05, + "loss": 0.0001, + "step": 47256 + }, + { + "epoch": 44.09, + "learning_rate": 4.4489272388059706e-05, + "loss": 0.0001, + "step": 47260 + }, + { + "epoch": 44.09, + "learning_rate": 4.4488805970149254e-05, + "loss": 0.0001, + "step": 47264 + }, + { + "epoch": 44.09, + "learning_rate": 4.448833955223881e-05, + "loss": 0.0, + "step": 47268 + }, + { + "epoch": 44.1, + "learning_rate": 4.4487873134328364e-05, + "loss": 0.0, + "step": 47272 + }, + { + "epoch": 44.1, + "learning_rate": 4.448740671641791e-05, + "loss": 0.0034, + "step": 47276 + }, + { + "epoch": 44.1, + "learning_rate": 4.448694029850746e-05, + "loss": 0.0004, + "step": 47280 + }, + { + "epoch": 44.11, + "learning_rate": 4.4486473880597015e-05, + "loss": 0.0, + "step": 47284 + }, + { + "epoch": 44.11, + "learning_rate": 4.448600746268657e-05, + "loss": 0.0001, + "step": 47288 + }, + { + "epoch": 44.12, + "learning_rate": 4.448554104477612e-05, + "loss": 0.0002, + "step": 47292 + }, + { + "epoch": 44.12, + "learning_rate": 4.448507462686567e-05, + "loss": 0.001, + "step": 47296 + }, + { + "epoch": 44.12, + "learning_rate": 4.448460820895523e-05, + "loss": 0.0006, + "step": 47300 + }, + { + "epoch": 44.13, + "learning_rate": 4.4484141791044776e-05, + "loss": 0.0003, + "step": 47304 + }, + { + "epoch": 44.13, + "learning_rate": 4.448367537313433e-05, + "loss": 0.0, + "step": 47308 + }, + { + "epoch": 44.13, + "learning_rate": 4.448320895522388e-05, + "loss": 0.0082, + "step": 47312 + }, + { + "epoch": 44.14, + "learning_rate": 4.448274253731344e-05, + "loss": 0.0, + "step": 47316 + }, + { + "epoch": 44.14, + "learning_rate": 4.448227611940299e-05, + "loss": 0.0001, + "step": 47320 + }, + { + "epoch": 44.15, + "learning_rate": 4.448180970149254e-05, + "loss": 0.0001, + "step": 47324 + }, + { + "epoch": 44.15, + "learning_rate": 4.448134328358209e-05, + "loss": 0.0, + "step": 47328 + }, + { + "epoch": 44.15, + "learning_rate": 4.448087686567165e-05, + "loss": 0.0004, + "step": 47332 + }, + { + "epoch": 44.16, + "learning_rate": 4.4480410447761195e-05, + "loss": 0.0002, + "step": 47336 + }, + { + "epoch": 44.16, + "learning_rate": 4.447994402985075e-05, + "loss": 0.0002, + "step": 47340 + }, + { + "epoch": 44.16, + "learning_rate": 4.44794776119403e-05, + "loss": 0.0, + "step": 47344 + }, + { + "epoch": 44.17, + "learning_rate": 4.447901119402985e-05, + "loss": 0.0, + "step": 47348 + }, + { + "epoch": 44.17, + "learning_rate": 4.447854477611941e-05, + "loss": 0.0047, + "step": 47352 + }, + { + "epoch": 44.18, + "learning_rate": 4.4478078358208956e-05, + "loss": 0.0003, + "step": 47356 + }, + { + "epoch": 44.18, + "learning_rate": 4.447761194029851e-05, + "loss": 0.0, + "step": 47360 + }, + { + "epoch": 44.18, + "learning_rate": 4.4477145522388066e-05, + "loss": 0.0006, + "step": 47364 + }, + { + "epoch": 44.19, + "learning_rate": 4.4476679104477614e-05, + "loss": 0.0, + "step": 47368 + }, + { + "epoch": 44.19, + "learning_rate": 4.447621268656716e-05, + "loss": 0.0005, + "step": 47372 + }, + { + "epoch": 44.19, + "learning_rate": 4.4475746268656724e-05, + "loss": 0.002, + "step": 47376 + }, + { + "epoch": 44.2, + "learning_rate": 4.447527985074627e-05, + "loss": 0.0, + "step": 47380 + }, + { + "epoch": 44.2, + "learning_rate": 4.447481343283582e-05, + "loss": 0.0141, + "step": 47384 + }, + { + "epoch": 44.21, + "learning_rate": 4.4474347014925375e-05, + "loss": 0.0001, + "step": 47388 + }, + { + "epoch": 44.21, + "learning_rate": 4.447388059701493e-05, + "loss": 0.0002, + "step": 47392 + }, + { + "epoch": 44.21, + "learning_rate": 4.447341417910448e-05, + "loss": 0.0001, + "step": 47396 + }, + { + "epoch": 44.22, + "learning_rate": 4.447294776119403e-05, + "loss": 0.0, + "step": 47400 + }, + { + "epoch": 44.22, + "learning_rate": 4.447248134328358e-05, + "loss": 0.0027, + "step": 47404 + }, + { + "epoch": 44.22, + "learning_rate": 4.4472014925373136e-05, + "loss": 0.0, + "step": 47408 + }, + { + "epoch": 44.23, + "learning_rate": 4.447154850746269e-05, + "loss": 0.0005, + "step": 47412 + }, + { + "epoch": 44.23, + "learning_rate": 4.447108208955224e-05, + "loss": 0.0, + "step": 47416 + }, + { + "epoch": 44.24, + "learning_rate": 4.4470615671641794e-05, + "loss": 0.0002, + "step": 47420 + }, + { + "epoch": 44.24, + "learning_rate": 4.447014925373135e-05, + "loss": 0.0, + "step": 47424 + }, + { + "epoch": 44.24, + "learning_rate": 4.44696828358209e-05, + "loss": 0.0004, + "step": 47428 + }, + { + "epoch": 44.25, + "learning_rate": 4.4469216417910445e-05, + "loss": 0.0, + "step": 47432 + }, + { + "epoch": 44.25, + "learning_rate": 4.446875e-05, + "loss": 0.001, + "step": 47436 + }, + { + "epoch": 44.25, + "learning_rate": 4.4468283582089555e-05, + "loss": 0.0, + "step": 47440 + }, + { + "epoch": 44.26, + "learning_rate": 4.44678171641791e-05, + "loss": 0.0, + "step": 47444 + }, + { + "epoch": 44.26, + "learning_rate": 4.446735074626866e-05, + "loss": 0.0, + "step": 47448 + }, + { + "epoch": 44.26, + "learning_rate": 4.446688432835821e-05, + "loss": 0.0012, + "step": 47452 + }, + { + "epoch": 44.27, + "learning_rate": 4.446641791044776e-05, + "loss": 0.0001, + "step": 47456 + }, + { + "epoch": 44.27, + "learning_rate": 4.4465951492537316e-05, + "loss": 0.0, + "step": 47460 + }, + { + "epoch": 44.28, + "learning_rate": 4.4465485074626864e-05, + "loss": 0.0, + "step": 47464 + }, + { + "epoch": 44.28, + "learning_rate": 4.446501865671642e-05, + "loss": 0.0001, + "step": 47468 + }, + { + "epoch": 44.28, + "learning_rate": 4.4464552238805974e-05, + "loss": 0.0012, + "step": 47472 + }, + { + "epoch": 44.29, + "learning_rate": 4.446408582089552e-05, + "loss": 0.0035, + "step": 47476 + }, + { + "epoch": 44.29, + "learning_rate": 4.446361940298508e-05, + "loss": 0.007, + "step": 47480 + }, + { + "epoch": 44.29, + "learning_rate": 4.446315298507463e-05, + "loss": 0.0001, + "step": 47484 + }, + { + "epoch": 44.3, + "learning_rate": 4.446268656716418e-05, + "loss": 0.0, + "step": 47488 + }, + { + "epoch": 44.3, + "learning_rate": 4.4462220149253735e-05, + "loss": 0.0, + "step": 47492 + }, + { + "epoch": 44.31, + "learning_rate": 4.446175373134328e-05, + "loss": 0.0, + "step": 47496 + }, + { + "epoch": 44.31, + "learning_rate": 4.446128731343284e-05, + "loss": 0.0004, + "step": 47500 + }, + { + "epoch": 44.31, + "eval_exact_match": 0.7330754352030948, + "eval_exec": 0.7707930367504836, + "eval_loss": 0.42296141386032104, + "eval_runtime": 1217.964, + "eval_samples_per_second": 0.849, + "step": 47500 + }, + { + "epoch": 44.31, + "learning_rate": 4.446082089552239e-05, + "loss": 0.0009, + "step": 47504 + }, + { + "epoch": 44.32, + "learning_rate": 4.446035447761194e-05, + "loss": 0.0001, + "step": 47508 + }, + { + "epoch": 44.32, + "learning_rate": 4.4459888059701496e-05, + "loss": 0.0001, + "step": 47512 + }, + { + "epoch": 44.32, + "learning_rate": 4.445942164179105e-05, + "loss": 0.0003, + "step": 47516 + }, + { + "epoch": 44.33, + "learning_rate": 4.44589552238806e-05, + "loss": 0.0, + "step": 47520 + }, + { + "epoch": 44.33, + "learning_rate": 4.445848880597015e-05, + "loss": 0.0, + "step": 47524 + }, + { + "epoch": 44.34, + "learning_rate": 4.445802238805971e-05, + "loss": 0.0, + "step": 47528 + }, + { + "epoch": 44.34, + "learning_rate": 4.445755597014926e-05, + "loss": 0.0001, + "step": 47532 + }, + { + "epoch": 44.34, + "learning_rate": 4.4457089552238805e-05, + "loss": 0.0001, + "step": 47536 + }, + { + "epoch": 44.35, + "learning_rate": 4.445662313432836e-05, + "loss": 0.0001, + "step": 47540 + }, + { + "epoch": 44.35, + "learning_rate": 4.4456156716417915e-05, + "loss": 0.0, + "step": 47544 + }, + { + "epoch": 44.35, + "learning_rate": 4.445569029850746e-05, + "loss": 0.0001, + "step": 47548 + }, + { + "epoch": 44.36, + "learning_rate": 4.445522388059702e-05, + "loss": 0.0, + "step": 47552 + }, + { + "epoch": 44.36, + "learning_rate": 4.4454757462686566e-05, + "loss": 0.0001, + "step": 47556 + }, + { + "epoch": 44.37, + "learning_rate": 4.445429104477612e-05, + "loss": 0.0001, + "step": 47560 + }, + { + "epoch": 44.37, + "learning_rate": 4.4453824626865676e-05, + "loss": 0.0001, + "step": 47564 + }, + { + "epoch": 44.37, + "learning_rate": 4.4453358208955224e-05, + "loss": 0.0001, + "step": 47568 + }, + { + "epoch": 44.38, + "learning_rate": 4.445289179104478e-05, + "loss": 0.0013, + "step": 47572 + }, + { + "epoch": 44.38, + "learning_rate": 4.4452425373134334e-05, + "loss": 0.0, + "step": 47576 + }, + { + "epoch": 44.38, + "learning_rate": 4.445195895522388e-05, + "loss": 0.0, + "step": 47580 + }, + { + "epoch": 44.39, + "learning_rate": 4.445149253731343e-05, + "loss": 0.0, + "step": 47584 + }, + { + "epoch": 44.39, + "learning_rate": 4.445102611940299e-05, + "loss": 0.0, + "step": 47588 + }, + { + "epoch": 44.4, + "learning_rate": 4.445055970149254e-05, + "loss": 0.0015, + "step": 47592 + }, + { + "epoch": 44.4, + "learning_rate": 4.445009328358209e-05, + "loss": 0.0003, + "step": 47596 + }, + { + "epoch": 44.4, + "learning_rate": 4.444962686567164e-05, + "loss": 0.0001, + "step": 47600 + }, + { + "epoch": 44.41, + "learning_rate": 4.44491604477612e-05, + "loss": 0.0001, + "step": 47604 + }, + { + "epoch": 44.41, + "learning_rate": 4.4448694029850746e-05, + "loss": 0.0, + "step": 47608 + }, + { + "epoch": 44.41, + "learning_rate": 4.44482276119403e-05, + "loss": 0.0, + "step": 47612 + }, + { + "epoch": 44.42, + "learning_rate": 4.444776119402985e-05, + "loss": 0.0001, + "step": 47616 + }, + { + "epoch": 44.42, + "learning_rate": 4.4447294776119404e-05, + "loss": 0.0001, + "step": 47620 + }, + { + "epoch": 44.43, + "learning_rate": 4.444682835820896e-05, + "loss": 0.0068, + "step": 47624 + }, + { + "epoch": 44.43, + "learning_rate": 4.444636194029851e-05, + "loss": 0.0001, + "step": 47628 + }, + { + "epoch": 44.43, + "learning_rate": 4.444589552238806e-05, + "loss": 0.0024, + "step": 47632 + }, + { + "epoch": 44.44, + "learning_rate": 4.444542910447762e-05, + "loss": 0.0, + "step": 47636 + }, + { + "epoch": 44.44, + "learning_rate": 4.4444962686567165e-05, + "loss": 0.0, + "step": 47640 + }, + { + "epoch": 44.44, + "learning_rate": 4.444449626865672e-05, + "loss": 0.0049, + "step": 47644 + }, + { + "epoch": 44.45, + "learning_rate": 4.4444029850746275e-05, + "loss": 0.0, + "step": 47648 + }, + { + "epoch": 44.45, + "learning_rate": 4.444356343283582e-05, + "loss": 0.0, + "step": 47652 + }, + { + "epoch": 44.46, + "learning_rate": 4.444309701492538e-05, + "loss": 0.0, + "step": 47656 + }, + { + "epoch": 44.46, + "learning_rate": 4.4442630597014926e-05, + "loss": 0.0002, + "step": 47660 + }, + { + "epoch": 44.46, + "learning_rate": 4.444216417910448e-05, + "loss": 0.0009, + "step": 47664 + }, + { + "epoch": 44.47, + "learning_rate": 4.4441697761194036e-05, + "loss": 0.0001, + "step": 47668 + }, + { + "epoch": 44.47, + "learning_rate": 4.4441231343283584e-05, + "loss": 0.0001, + "step": 47672 + }, + { + "epoch": 44.47, + "learning_rate": 4.444076492537313e-05, + "loss": 0.0007, + "step": 47676 + }, + { + "epoch": 44.48, + "learning_rate": 4.4440298507462694e-05, + "loss": 0.0, + "step": 47680 + }, + { + "epoch": 44.48, + "learning_rate": 4.443983208955224e-05, + "loss": 0.0, + "step": 47684 + }, + { + "epoch": 44.49, + "learning_rate": 4.443936567164179e-05, + "loss": 0.0003, + "step": 47688 + }, + { + "epoch": 44.49, + "learning_rate": 4.4438899253731345e-05, + "loss": 0.0021, + "step": 47692 + }, + { + "epoch": 44.49, + "learning_rate": 4.44384328358209e-05, + "loss": 0.0001, + "step": 47696 + }, + { + "epoch": 44.5, + "learning_rate": 4.443796641791045e-05, + "loss": 0.0, + "step": 47700 + }, + { + "epoch": 44.5, + "learning_rate": 4.44375e-05, + "loss": 0.0, + "step": 47704 + }, + { + "epoch": 44.5, + "learning_rate": 4.443703358208956e-05, + "loss": 0.0001, + "step": 47708 + }, + { + "epoch": 44.51, + "learning_rate": 4.4436567164179106e-05, + "loss": 0.0002, + "step": 47712 + }, + { + "epoch": 44.51, + "learning_rate": 4.443610074626866e-05, + "loss": 0.0, + "step": 47716 + }, + { + "epoch": 44.51, + "learning_rate": 4.443563432835821e-05, + "loss": 0.0001, + "step": 47720 + }, + { + "epoch": 44.52, + "learning_rate": 4.4435167910447764e-05, + "loss": 0.0001, + "step": 47724 + }, + { + "epoch": 44.52, + "learning_rate": 4.443470149253732e-05, + "loss": 0.0, + "step": 47728 + }, + { + "epoch": 44.53, + "learning_rate": 4.443423507462687e-05, + "loss": 0.0, + "step": 47732 + }, + { + "epoch": 44.53, + "learning_rate": 4.4433768656716415e-05, + "loss": 0.0, + "step": 47736 + }, + { + "epoch": 44.53, + "learning_rate": 4.4433302238805977e-05, + "loss": 0.0035, + "step": 47740 + }, + { + "epoch": 44.54, + "learning_rate": 4.4432835820895525e-05, + "loss": 0.0025, + "step": 47744 + }, + { + "epoch": 44.54, + "learning_rate": 4.443236940298507e-05, + "loss": 0.0, + "step": 47748 + }, + { + "epoch": 44.54, + "learning_rate": 4.443190298507463e-05, + "loss": 0.0, + "step": 47752 + }, + { + "epoch": 44.55, + "learning_rate": 4.443143656716418e-05, + "loss": 0.0031, + "step": 47756 + }, + { + "epoch": 44.55, + "learning_rate": 4.443097014925373e-05, + "loss": 0.0, + "step": 47760 + }, + { + "epoch": 44.56, + "learning_rate": 4.4430503731343286e-05, + "loss": 0.0004, + "step": 47764 + }, + { + "epoch": 44.56, + "learning_rate": 4.443003731343284e-05, + "loss": 0.0003, + "step": 47768 + }, + { + "epoch": 44.56, + "learning_rate": 4.442957089552239e-05, + "loss": 0.0, + "step": 47772 + }, + { + "epoch": 44.57, + "learning_rate": 4.4429104477611944e-05, + "loss": 0.0, + "step": 47776 + }, + { + "epoch": 44.57, + "learning_rate": 4.442863805970149e-05, + "loss": 0.0739, + "step": 47780 + }, + { + "epoch": 44.57, + "learning_rate": 4.442817164179105e-05, + "loss": 0.0, + "step": 47784 + }, + { + "epoch": 44.58, + "learning_rate": 4.44277052238806e-05, + "loss": 0.0, + "step": 47788 + }, + { + "epoch": 44.58, + "learning_rate": 4.442723880597015e-05, + "loss": 0.0, + "step": 47792 + }, + { + "epoch": 44.59, + "learning_rate": 4.44267723880597e-05, + "loss": 0.0002, + "step": 47796 + }, + { + "epoch": 44.59, + "learning_rate": 4.442630597014926e-05, + "loss": 0.0038, + "step": 47800 + }, + { + "epoch": 44.59, + "learning_rate": 4.442583955223881e-05, + "loss": 0.0001, + "step": 47804 + }, + { + "epoch": 44.6, + "learning_rate": 4.442537313432836e-05, + "loss": 0.0005, + "step": 47808 + }, + { + "epoch": 44.6, + "learning_rate": 4.442490671641791e-05, + "loss": 0.0001, + "step": 47812 + }, + { + "epoch": 44.6, + "learning_rate": 4.4424440298507466e-05, + "loss": 0.0005, + "step": 47816 + }, + { + "epoch": 44.61, + "learning_rate": 4.442397388059702e-05, + "loss": 0.0001, + "step": 47820 + }, + { + "epoch": 44.61, + "learning_rate": 4.442350746268657e-05, + "loss": 0.0047, + "step": 47824 + }, + { + "epoch": 44.62, + "learning_rate": 4.4423041044776124e-05, + "loss": 0.0, + "step": 47828 + }, + { + "epoch": 44.62, + "learning_rate": 4.442257462686568e-05, + "loss": 0.0, + "step": 47832 + }, + { + "epoch": 44.62, + "learning_rate": 4.4422108208955227e-05, + "loss": 0.0001, + "step": 47836 + }, + { + "epoch": 44.63, + "learning_rate": 4.4421641791044775e-05, + "loss": 0.0006, + "step": 47840 + }, + { + "epoch": 44.63, + "learning_rate": 4.442117537313433e-05, + "loss": 0.0001, + "step": 47844 + }, + { + "epoch": 44.63, + "learning_rate": 4.4420708955223885e-05, + "loss": 0.0003, + "step": 47848 + }, + { + "epoch": 44.64, + "learning_rate": 4.442024253731343e-05, + "loss": 0.0001, + "step": 47852 + }, + { + "epoch": 44.64, + "learning_rate": 4.441977611940299e-05, + "loss": 0.0005, + "step": 47856 + }, + { + "epoch": 44.65, + "learning_rate": 4.441930970149254e-05, + "loss": 0.0, + "step": 47860 + }, + { + "epoch": 44.65, + "learning_rate": 4.441884328358209e-05, + "loss": 0.0, + "step": 47864 + }, + { + "epoch": 44.65, + "learning_rate": 4.4418376865671646e-05, + "loss": 0.0, + "step": 47868 + }, + { + "epoch": 44.66, + "learning_rate": 4.4417910447761194e-05, + "loss": 0.0, + "step": 47872 + }, + { + "epoch": 44.66, + "learning_rate": 4.441744402985075e-05, + "loss": 0.0003, + "step": 47876 + }, + { + "epoch": 44.66, + "learning_rate": 4.4416977611940303e-05, + "loss": 0.0191, + "step": 47880 + }, + { + "epoch": 44.67, + "learning_rate": 4.441651119402985e-05, + "loss": 0.0, + "step": 47884 + }, + { + "epoch": 44.67, + "learning_rate": 4.4416044776119406e-05, + "loss": 0.0, + "step": 47888 + }, + { + "epoch": 44.68, + "learning_rate": 4.441557835820896e-05, + "loss": 0.0, + "step": 47892 + }, + { + "epoch": 44.68, + "learning_rate": 4.441511194029851e-05, + "loss": 0.0002, + "step": 47896 + }, + { + "epoch": 44.68, + "learning_rate": 4.441464552238806e-05, + "loss": 0.0, + "step": 47900 + }, + { + "epoch": 44.69, + "learning_rate": 4.441417910447761e-05, + "loss": 0.0, + "step": 47904 + }, + { + "epoch": 44.69, + "learning_rate": 4.441371268656717e-05, + "loss": 0.0, + "step": 47908 + }, + { + "epoch": 44.69, + "learning_rate": 4.4413246268656716e-05, + "loss": 0.0, + "step": 47912 + }, + { + "epoch": 44.7, + "learning_rate": 4.441277985074627e-05, + "loss": 0.0001, + "step": 47916 + }, + { + "epoch": 44.7, + "learning_rate": 4.4412313432835825e-05, + "loss": 0.0, + "step": 47920 + }, + { + "epoch": 44.71, + "learning_rate": 4.4411847014925374e-05, + "loss": 0.0049, + "step": 47924 + }, + { + "epoch": 44.71, + "learning_rate": 4.441138059701493e-05, + "loss": 0.0, + "step": 47928 + }, + { + "epoch": 44.71, + "learning_rate": 4.4410914179104477e-05, + "loss": 0.0, + "step": 47932 + }, + { + "epoch": 44.72, + "learning_rate": 4.441044776119403e-05, + "loss": 0.0001, + "step": 47936 + }, + { + "epoch": 44.72, + "learning_rate": 4.4409981343283586e-05, + "loss": 0.0, + "step": 47940 + }, + { + "epoch": 44.72, + "learning_rate": 4.4409514925373135e-05, + "loss": 0.0, + "step": 47944 + }, + { + "epoch": 44.73, + "learning_rate": 4.440904850746269e-05, + "loss": 0.0001, + "step": 47948 + }, + { + "epoch": 44.73, + "learning_rate": 4.4408582089552244e-05, + "loss": 0.0002, + "step": 47952 + }, + { + "epoch": 44.73, + "learning_rate": 4.440811567164179e-05, + "loss": 0.0001, + "step": 47956 + }, + { + "epoch": 44.74, + "learning_rate": 4.440764925373134e-05, + "loss": 0.0, + "step": 47960 + }, + { + "epoch": 44.74, + "learning_rate": 4.4407182835820896e-05, + "loss": 0.0001, + "step": 47964 + }, + { + "epoch": 44.75, + "learning_rate": 4.440671641791045e-05, + "loss": 0.0, + "step": 47968 + }, + { + "epoch": 44.75, + "learning_rate": 4.4406250000000005e-05, + "loss": 0.0001, + "step": 47972 + }, + { + "epoch": 44.75, + "learning_rate": 4.4405783582089553e-05, + "loss": 0.0003, + "step": 47976 + }, + { + "epoch": 44.76, + "learning_rate": 4.440531716417911e-05, + "loss": 0.0, + "step": 47980 + }, + { + "epoch": 44.76, + "learning_rate": 4.440485074626866e-05, + "loss": 0.0, + "step": 47984 + }, + { + "epoch": 44.76, + "learning_rate": 4.440438432835821e-05, + "loss": 0.0001, + "step": 47988 + }, + { + "epoch": 44.77, + "learning_rate": 4.440391791044776e-05, + "loss": 0.0, + "step": 47992 + }, + { + "epoch": 44.77, + "learning_rate": 4.440345149253732e-05, + "loss": 0.0, + "step": 47996 + }, + { + "epoch": 44.78, + "learning_rate": 4.440298507462687e-05, + "loss": 0.0, + "step": 48000 + }, + { + "epoch": 44.78, + "eval_exact_match": 0.7263056092843327, + "eval_exec": 0.7678916827852998, + "eval_loss": 0.43220871686935425, + "eval_runtime": 1142.6437, + "eval_samples_per_second": 0.905, + "step": 48000 + }, + { + "epoch": 44.78, + "learning_rate": 4.440251865671642e-05, + "loss": 0.0002, + "step": 48004 + }, + { + "epoch": 44.78, + "learning_rate": 4.440205223880597e-05, + "loss": 0.0, + "step": 48008 + }, + { + "epoch": 44.79, + "learning_rate": 4.440158582089553e-05, + "loss": 0.0, + "step": 48012 + }, + { + "epoch": 44.79, + "learning_rate": 4.4401119402985075e-05, + "loss": 0.0001, + "step": 48016 + }, + { + "epoch": 44.79, + "learning_rate": 4.440065298507463e-05, + "loss": 0.0003, + "step": 48020 + }, + { + "epoch": 44.8, + "learning_rate": 4.440018656716418e-05, + "loss": 0.0, + "step": 48024 + }, + { + "epoch": 44.8, + "learning_rate": 4.4399720149253733e-05, + "loss": 0.0001, + "step": 48028 + }, + { + "epoch": 44.81, + "learning_rate": 4.439925373134329e-05, + "loss": 0.0, + "step": 48032 + }, + { + "epoch": 44.81, + "learning_rate": 4.4398787313432836e-05, + "loss": 0.0, + "step": 48036 + }, + { + "epoch": 44.81, + "learning_rate": 4.439832089552239e-05, + "loss": 0.0001, + "step": 48040 + }, + { + "epoch": 44.82, + "learning_rate": 4.4397854477611946e-05, + "loss": 0.0, + "step": 48044 + }, + { + "epoch": 44.82, + "learning_rate": 4.4397388059701494e-05, + "loss": 0.0, + "step": 48048 + }, + { + "epoch": 44.82, + "learning_rate": 4.439692164179104e-05, + "loss": 0.0, + "step": 48052 + }, + { + "epoch": 44.83, + "learning_rate": 4.4396455223880604e-05, + "loss": 0.0, + "step": 48056 + }, + { + "epoch": 44.83, + "learning_rate": 4.439598880597015e-05, + "loss": 0.0043, + "step": 48060 + }, + { + "epoch": 44.84, + "learning_rate": 4.43955223880597e-05, + "loss": 0.0002, + "step": 48064 + }, + { + "epoch": 44.84, + "learning_rate": 4.4395055970149255e-05, + "loss": 0.0003, + "step": 48068 + }, + { + "epoch": 44.84, + "learning_rate": 4.439458955223881e-05, + "loss": 0.0001, + "step": 48072 + }, + { + "epoch": 44.85, + "learning_rate": 4.439412313432836e-05, + "loss": 0.0, + "step": 48076 + }, + { + "epoch": 44.85, + "learning_rate": 4.439365671641791e-05, + "loss": 0.0002, + "step": 48080 + }, + { + "epoch": 44.85, + "learning_rate": 4.439319029850746e-05, + "loss": 0.0075, + "step": 48084 + }, + { + "epoch": 44.86, + "learning_rate": 4.4392723880597016e-05, + "loss": 0.0, + "step": 48088 + }, + { + "epoch": 44.86, + "learning_rate": 4.439225746268657e-05, + "loss": 0.0001, + "step": 48092 + }, + { + "epoch": 44.87, + "learning_rate": 4.439179104477612e-05, + "loss": 0.0, + "step": 48096 + }, + { + "epoch": 44.87, + "learning_rate": 4.4391324626865674e-05, + "loss": 0.0, + "step": 48100 + }, + { + "epoch": 44.87, + "learning_rate": 4.439085820895523e-05, + "loss": 0.0038, + "step": 48104 + }, + { + "epoch": 44.88, + "learning_rate": 4.439039179104478e-05, + "loss": 0.0002, + "step": 48108 + }, + { + "epoch": 44.88, + "learning_rate": 4.4389925373134325e-05, + "loss": 0.0, + "step": 48112 + }, + { + "epoch": 44.88, + "learning_rate": 4.438945895522388e-05, + "loss": 0.0, + "step": 48116 + }, + { + "epoch": 44.89, + "learning_rate": 4.4388992537313435e-05, + "loss": 0.0055, + "step": 48120 + }, + { + "epoch": 44.89, + "learning_rate": 4.4388526119402983e-05, + "loss": 0.0001, + "step": 48124 + }, + { + "epoch": 44.9, + "learning_rate": 4.438805970149254e-05, + "loss": 0.0001, + "step": 48128 + }, + { + "epoch": 44.9, + "learning_rate": 4.438759328358209e-05, + "loss": 0.0003, + "step": 48132 + }, + { + "epoch": 44.9, + "learning_rate": 4.438712686567165e-05, + "loss": 0.0001, + "step": 48136 + }, + { + "epoch": 44.91, + "learning_rate": 4.4386660447761196e-05, + "loss": 0.0013, + "step": 48140 + }, + { + "epoch": 44.91, + "learning_rate": 4.4386194029850744e-05, + "loss": 0.0001, + "step": 48144 + }, + { + "epoch": 44.91, + "learning_rate": 4.4385727611940306e-05, + "loss": 0.0, + "step": 48148 + }, + { + "epoch": 44.92, + "learning_rate": 4.4385261194029854e-05, + "loss": 0.0, + "step": 48152 + }, + { + "epoch": 44.92, + "learning_rate": 4.43847947761194e-05, + "loss": 0.0, + "step": 48156 + }, + { + "epoch": 44.93, + "learning_rate": 4.438432835820896e-05, + "loss": 0.0001, + "step": 48160 + }, + { + "epoch": 44.93, + "learning_rate": 4.438386194029851e-05, + "loss": 0.0, + "step": 48164 + }, + { + "epoch": 44.93, + "learning_rate": 4.438339552238806e-05, + "loss": 0.0001, + "step": 48168 + }, + { + "epoch": 44.94, + "learning_rate": 4.4382929104477615e-05, + "loss": 0.0, + "step": 48172 + }, + { + "epoch": 44.94, + "learning_rate": 4.438246268656716e-05, + "loss": 0.0009, + "step": 48176 + }, + { + "epoch": 44.94, + "learning_rate": 4.438199626865672e-05, + "loss": 0.0, + "step": 48180 + }, + { + "epoch": 44.95, + "learning_rate": 4.438152985074627e-05, + "loss": 0.0001, + "step": 48184 + }, + { + "epoch": 44.95, + "learning_rate": 4.438106343283582e-05, + "loss": 0.0001, + "step": 48188 + }, + { + "epoch": 44.96, + "learning_rate": 4.4380597014925376e-05, + "loss": 0.0003, + "step": 48192 + }, + { + "epoch": 44.96, + "learning_rate": 4.438013059701493e-05, + "loss": 0.0, + "step": 48196 + }, + { + "epoch": 44.96, + "learning_rate": 4.437966417910448e-05, + "loss": 0.0, + "step": 48200 + }, + { + "epoch": 44.97, + "learning_rate": 4.437919776119403e-05, + "loss": 0.0, + "step": 48204 + }, + { + "epoch": 44.97, + "learning_rate": 4.437873134328359e-05, + "loss": 0.0, + "step": 48208 + }, + { + "epoch": 44.97, + "learning_rate": 4.437826492537314e-05, + "loss": 0.0002, + "step": 48212 + }, + { + "epoch": 44.98, + "learning_rate": 4.4377798507462685e-05, + "loss": 0.0, + "step": 48216 + }, + { + "epoch": 44.98, + "learning_rate": 4.437733208955224e-05, + "loss": 0.0, + "step": 48220 + }, + { + "epoch": 44.98, + "learning_rate": 4.4376865671641795e-05, + "loss": 0.0, + "step": 48224 + }, + { + "epoch": 44.99, + "learning_rate": 4.437639925373134e-05, + "loss": 0.0072, + "step": 48228 + }, + { + "epoch": 44.99, + "learning_rate": 4.43759328358209e-05, + "loss": 0.0, + "step": 48232 + }, + { + "epoch": 45.0, + "learning_rate": 4.4375466417910446e-05, + "loss": 0.0013, + "step": 48236 + }, + { + "epoch": 45.0, + "learning_rate": 4.4375e-05, + "loss": 0.0002, + "step": 48240 + }, + { + "epoch": 45.0, + "learning_rate": 4.4374533582089556e-05, + "loss": 0.0003, + "step": 48244 + }, + { + "epoch": 45.01, + "learning_rate": 4.4374067164179104e-05, + "loss": 0.0002, + "step": 48248 + }, + { + "epoch": 45.01, + "learning_rate": 4.437360074626866e-05, + "loss": 0.0009, + "step": 48252 + }, + { + "epoch": 45.01, + "learning_rate": 4.4373134328358214e-05, + "loss": 0.0, + "step": 48256 + }, + { + "epoch": 45.02, + "learning_rate": 4.437266791044776e-05, + "loss": 0.0001, + "step": 48260 + }, + { + "epoch": 45.02, + "learning_rate": 4.437220149253731e-05, + "loss": 0.0, + "step": 48264 + }, + { + "epoch": 45.03, + "learning_rate": 4.437173507462687e-05, + "loss": 0.0, + "step": 48268 + }, + { + "epoch": 45.03, + "learning_rate": 4.437126865671642e-05, + "loss": 0.0, + "step": 48272 + }, + { + "epoch": 45.03, + "learning_rate": 4.437080223880597e-05, + "loss": 0.0, + "step": 48276 + }, + { + "epoch": 45.04, + "learning_rate": 4.437033582089552e-05, + "loss": 0.0007, + "step": 48280 + }, + { + "epoch": 45.04, + "learning_rate": 4.436986940298508e-05, + "loss": 0.0005, + "step": 48284 + }, + { + "epoch": 45.04, + "learning_rate": 4.4369402985074626e-05, + "loss": 0.0, + "step": 48288 + }, + { + "epoch": 45.05, + "learning_rate": 4.436893656716418e-05, + "loss": 0.0002, + "step": 48292 + }, + { + "epoch": 45.05, + "learning_rate": 4.436847014925373e-05, + "loss": 0.0004, + "step": 48296 + }, + { + "epoch": 45.06, + "learning_rate": 4.436800373134329e-05, + "loss": 0.0005, + "step": 48300 + }, + { + "epoch": 45.06, + "learning_rate": 4.436753731343284e-05, + "loss": 0.0, + "step": 48304 + }, + { + "epoch": 45.06, + "learning_rate": 4.436707089552239e-05, + "loss": 0.0007, + "step": 48308 + }, + { + "epoch": 45.07, + "learning_rate": 4.436660447761194e-05, + "loss": 0.0, + "step": 48312 + }, + { + "epoch": 45.07, + "learning_rate": 4.43661380597015e-05, + "loss": 0.0, + "step": 48316 + }, + { + "epoch": 45.07, + "learning_rate": 4.4365671641791045e-05, + "loss": 0.0, + "step": 48320 + }, + { + "epoch": 45.08, + "learning_rate": 4.43652052238806e-05, + "loss": 0.001, + "step": 48324 + }, + { + "epoch": 45.08, + "learning_rate": 4.4364738805970155e-05, + "loss": 0.0001, + "step": 48328 + }, + { + "epoch": 45.09, + "learning_rate": 4.43642723880597e-05, + "loss": 0.001, + "step": 48332 + }, + { + "epoch": 45.09, + "learning_rate": 4.436380597014926e-05, + "loss": 0.0, + "step": 48336 + }, + { + "epoch": 45.09, + "learning_rate": 4.4363339552238806e-05, + "loss": 0.0, + "step": 48340 + }, + { + "epoch": 45.1, + "learning_rate": 4.436287313432836e-05, + "loss": 0.0044, + "step": 48344 + }, + { + "epoch": 45.1, + "learning_rate": 4.4362406716417916e-05, + "loss": 0.0, + "step": 48348 + }, + { + "epoch": 45.1, + "learning_rate": 4.4361940298507464e-05, + "loss": 0.0006, + "step": 48352 + }, + { + "epoch": 45.11, + "learning_rate": 4.436147388059701e-05, + "loss": 0.0, + "step": 48356 + }, + { + "epoch": 45.11, + "learning_rate": 4.4361007462686574e-05, + "loss": 0.0002, + "step": 48360 + }, + { + "epoch": 45.12, + "learning_rate": 4.436054104477612e-05, + "loss": 0.0002, + "step": 48364 + }, + { + "epoch": 45.12, + "learning_rate": 4.436007462686567e-05, + "loss": 0.0, + "step": 48368 + }, + { + "epoch": 45.12, + "learning_rate": 4.4359608208955225e-05, + "loss": 0.0002, + "step": 48372 + }, + { + "epoch": 45.13, + "learning_rate": 4.435914179104478e-05, + "loss": 0.0, + "step": 48376 + }, + { + "epoch": 45.13, + "learning_rate": 4.435867537313433e-05, + "loss": 0.0001, + "step": 48380 + }, + { + "epoch": 45.13, + "learning_rate": 4.435820895522388e-05, + "loss": 0.0, + "step": 48384 + }, + { + "epoch": 45.14, + "learning_rate": 4.435774253731344e-05, + "loss": 0.0, + "step": 48388 + }, + { + "epoch": 45.14, + "learning_rate": 4.4357276119402986e-05, + "loss": 0.0, + "step": 48392 + }, + { + "epoch": 45.15, + "learning_rate": 4.435680970149254e-05, + "loss": 0.0, + "step": 48396 + }, + { + "epoch": 45.15, + "learning_rate": 4.435634328358209e-05, + "loss": 0.0015, + "step": 48400 + }, + { + "epoch": 45.15, + "learning_rate": 4.4355876865671644e-05, + "loss": 0.0062, + "step": 48404 + }, + { + "epoch": 45.16, + "learning_rate": 4.43554104477612e-05, + "loss": 0.0, + "step": 48408 + }, + { + "epoch": 45.16, + "learning_rate": 4.435494402985075e-05, + "loss": 0.0, + "step": 48412 + }, + { + "epoch": 45.16, + "learning_rate": 4.4354477611940295e-05, + "loss": 0.0003, + "step": 48416 + }, + { + "epoch": 45.17, + "learning_rate": 4.435401119402986e-05, + "loss": 0.0006, + "step": 48420 + }, + { + "epoch": 45.17, + "learning_rate": 4.4353544776119405e-05, + "loss": 0.0, + "step": 48424 + }, + { + "epoch": 45.18, + "learning_rate": 4.435307835820895e-05, + "loss": 0.0, + "step": 48428 + }, + { + "epoch": 45.18, + "learning_rate": 4.435261194029851e-05, + "loss": 0.0, + "step": 48432 + }, + { + "epoch": 45.18, + "learning_rate": 4.435214552238806e-05, + "loss": 0.001, + "step": 48436 + }, + { + "epoch": 45.19, + "learning_rate": 4.435167910447761e-05, + "loss": 0.0001, + "step": 48440 + }, + { + "epoch": 45.19, + "learning_rate": 4.4351212686567166e-05, + "loss": 0.0, + "step": 48444 + }, + { + "epoch": 45.19, + "learning_rate": 4.435074626865672e-05, + "loss": 0.0001, + "step": 48448 + }, + { + "epoch": 45.2, + "learning_rate": 4.435027985074627e-05, + "loss": 0.0003, + "step": 48452 + }, + { + "epoch": 45.2, + "learning_rate": 4.4349813432835824e-05, + "loss": 0.0, + "step": 48456 + }, + { + "epoch": 45.21, + "learning_rate": 4.434934701492537e-05, + "loss": 0.0001, + "step": 48460 + }, + { + "epoch": 45.21, + "learning_rate": 4.434888059701493e-05, + "loss": 0.0004, + "step": 48464 + }, + { + "epoch": 45.21, + "learning_rate": 4.434841417910448e-05, + "loss": 0.0001, + "step": 48468 + }, + { + "epoch": 45.22, + "learning_rate": 4.434794776119403e-05, + "loss": 0.0, + "step": 48472 + }, + { + "epoch": 45.22, + "learning_rate": 4.4347481343283585e-05, + "loss": 0.0, + "step": 48476 + }, + { + "epoch": 45.22, + "learning_rate": 4.434701492537314e-05, + "loss": 0.0001, + "step": 48480 + }, + { + "epoch": 45.23, + "learning_rate": 4.434654850746269e-05, + "loss": 0.0009, + "step": 48484 + }, + { + "epoch": 45.23, + "learning_rate": 4.434608208955224e-05, + "loss": 0.0001, + "step": 48488 + }, + { + "epoch": 45.24, + "learning_rate": 4.434561567164179e-05, + "loss": 0.0, + "step": 48492 + }, + { + "epoch": 45.24, + "learning_rate": 4.4345149253731346e-05, + "loss": 0.0, + "step": 48496 + }, + { + "epoch": 45.24, + "learning_rate": 4.43446828358209e-05, + "loss": 0.0, + "step": 48500 + }, + { + "epoch": 45.24, + "eval_exact_match": 0.7272727272727273, + "eval_exec": 0.7659574468085106, + "eval_loss": 0.433497816324234, + "eval_runtime": 1151.4161, + "eval_samples_per_second": 0.898, + "step": 48500 + }, + { + "epoch": 45.25, + "learning_rate": 4.434421641791045e-05, + "loss": 0.0001, + "step": 48504 + }, + { + "epoch": 45.25, + "learning_rate": 4.4343750000000004e-05, + "loss": 0.0, + "step": 48508 + }, + { + "epoch": 45.25, + "learning_rate": 4.434328358208956e-05, + "loss": 0.0, + "step": 48512 + }, + { + "epoch": 45.26, + "learning_rate": 4.434281716417911e-05, + "loss": 0.0, + "step": 48516 + }, + { + "epoch": 45.26, + "learning_rate": 4.4342350746268655e-05, + "loss": 0.0, + "step": 48520 + }, + { + "epoch": 45.26, + "learning_rate": 4.434188432835821e-05, + "loss": 0.0, + "step": 48524 + }, + { + "epoch": 45.27, + "learning_rate": 4.4341417910447765e-05, + "loss": 0.0001, + "step": 48528 + }, + { + "epoch": 45.27, + "learning_rate": 4.434095149253731e-05, + "loss": 0.0, + "step": 48532 + }, + { + "epoch": 45.28, + "learning_rate": 4.434048507462687e-05, + "loss": 0.004, + "step": 48536 + }, + { + "epoch": 45.28, + "learning_rate": 4.434001865671642e-05, + "loss": 0.0, + "step": 48540 + }, + { + "epoch": 45.28, + "learning_rate": 4.433955223880597e-05, + "loss": 0.0, + "step": 48544 + }, + { + "epoch": 45.29, + "learning_rate": 4.4339085820895526e-05, + "loss": 0.0, + "step": 48548 + }, + { + "epoch": 45.29, + "learning_rate": 4.4338619402985074e-05, + "loss": 0.0, + "step": 48552 + }, + { + "epoch": 45.29, + "learning_rate": 4.433815298507463e-05, + "loss": 0.0, + "step": 48556 + }, + { + "epoch": 45.3, + "learning_rate": 4.4337686567164184e-05, + "loss": 0.0008, + "step": 48560 + }, + { + "epoch": 45.3, + "learning_rate": 4.433722014925373e-05, + "loss": 0.0, + "step": 48564 + }, + { + "epoch": 45.31, + "learning_rate": 4.433675373134329e-05, + "loss": 0.0003, + "step": 48568 + }, + { + "epoch": 45.31, + "learning_rate": 4.433628731343284e-05, + "loss": 0.0, + "step": 48572 + }, + { + "epoch": 45.31, + "learning_rate": 4.433582089552239e-05, + "loss": 0.0, + "step": 48576 + }, + { + "epoch": 45.32, + "learning_rate": 4.433535447761194e-05, + "loss": 0.0, + "step": 48580 + }, + { + "epoch": 45.32, + "learning_rate": 4.433488805970149e-05, + "loss": 0.0, + "step": 48584 + }, + { + "epoch": 45.32, + "learning_rate": 4.433442164179105e-05, + "loss": 0.0, + "step": 48588 + }, + { + "epoch": 45.33, + "learning_rate": 4.4333955223880596e-05, + "loss": 0.0001, + "step": 48592 + }, + { + "epoch": 45.33, + "learning_rate": 4.433348880597015e-05, + "loss": 0.0, + "step": 48596 + }, + { + "epoch": 45.34, + "learning_rate": 4.4333022388059706e-05, + "loss": 0.0, + "step": 48600 + }, + { + "epoch": 45.34, + "learning_rate": 4.4332555970149254e-05, + "loss": 0.0, + "step": 48604 + }, + { + "epoch": 45.34, + "learning_rate": 4.433208955223881e-05, + "loss": 0.0002, + "step": 48608 + }, + { + "epoch": 45.35, + "learning_rate": 4.433162313432836e-05, + "loss": 0.0, + "step": 48612 + }, + { + "epoch": 45.35, + "learning_rate": 4.433115671641792e-05, + "loss": 0.0001, + "step": 48616 + }, + { + "epoch": 45.35, + "learning_rate": 4.433069029850747e-05, + "loss": 0.0, + "step": 48620 + }, + { + "epoch": 45.36, + "learning_rate": 4.4330223880597015e-05, + "loss": 0.0027, + "step": 48624 + }, + { + "epoch": 45.36, + "learning_rate": 4.432975746268657e-05, + "loss": 0.0, + "step": 48628 + }, + { + "epoch": 45.37, + "learning_rate": 4.4329291044776125e-05, + "loss": 0.0002, + "step": 48632 + }, + { + "epoch": 45.37, + "learning_rate": 4.432882462686567e-05, + "loss": 0.0001, + "step": 48636 + }, + { + "epoch": 45.37, + "learning_rate": 4.432835820895523e-05, + "loss": 0.0, + "step": 48640 + }, + { + "epoch": 45.38, + "learning_rate": 4.4327891791044776e-05, + "loss": 0.0001, + "step": 48644 + }, + { + "epoch": 45.38, + "learning_rate": 4.432742537313433e-05, + "loss": 0.0001, + "step": 48648 + }, + { + "epoch": 45.38, + "learning_rate": 4.4326958955223886e-05, + "loss": 0.0, + "step": 48652 + }, + { + "epoch": 45.39, + "learning_rate": 4.4326492537313434e-05, + "loss": 0.0001, + "step": 48656 + }, + { + "epoch": 45.39, + "learning_rate": 4.432602611940299e-05, + "loss": 0.0, + "step": 48660 + }, + { + "epoch": 45.4, + "learning_rate": 4.4325559701492544e-05, + "loss": 0.0, + "step": 48664 + }, + { + "epoch": 45.4, + "learning_rate": 4.432509328358209e-05, + "loss": 0.0, + "step": 48668 + }, + { + "epoch": 45.4, + "learning_rate": 4.432462686567164e-05, + "loss": 0.0, + "step": 48672 + }, + { + "epoch": 45.41, + "learning_rate": 4.43241604477612e-05, + "loss": 0.0, + "step": 48676 + }, + { + "epoch": 45.41, + "learning_rate": 4.432369402985075e-05, + "loss": 0.0001, + "step": 48680 + }, + { + "epoch": 45.41, + "learning_rate": 4.43232276119403e-05, + "loss": 0.0, + "step": 48684 + }, + { + "epoch": 45.42, + "learning_rate": 4.432276119402985e-05, + "loss": 0.0006, + "step": 48688 + }, + { + "epoch": 45.42, + "learning_rate": 4.432229477611941e-05, + "loss": 0.0, + "step": 48692 + }, + { + "epoch": 45.43, + "learning_rate": 4.4321828358208956e-05, + "loss": 0.0005, + "step": 48696 + }, + { + "epoch": 45.43, + "learning_rate": 4.432136194029851e-05, + "loss": 0.0001, + "step": 48700 + }, + { + "epoch": 45.43, + "learning_rate": 4.432089552238806e-05, + "loss": 0.0, + "step": 48704 + }, + { + "epoch": 45.44, + "learning_rate": 4.4320429104477614e-05, + "loss": 0.0, + "step": 48708 + }, + { + "epoch": 45.44, + "learning_rate": 4.431996268656717e-05, + "loss": 0.0005, + "step": 48712 + }, + { + "epoch": 45.44, + "learning_rate": 4.431949626865672e-05, + "loss": 0.0, + "step": 48716 + }, + { + "epoch": 45.45, + "learning_rate": 4.431902985074627e-05, + "loss": 0.0, + "step": 48720 + }, + { + "epoch": 45.45, + "learning_rate": 4.4318563432835827e-05, + "loss": 0.0, + "step": 48724 + }, + { + "epoch": 45.46, + "learning_rate": 4.4318097014925375e-05, + "loss": 0.0, + "step": 48728 + }, + { + "epoch": 45.46, + "learning_rate": 4.431763059701492e-05, + "loss": 0.0001, + "step": 48732 + }, + { + "epoch": 45.46, + "learning_rate": 4.4317164179104484e-05, + "loss": 0.0, + "step": 48736 + }, + { + "epoch": 45.47, + "learning_rate": 4.431669776119403e-05, + "loss": 0.0, + "step": 48740 + }, + { + "epoch": 45.47, + "learning_rate": 4.431623134328358e-05, + "loss": 0.0001, + "step": 48744 + }, + { + "epoch": 45.47, + "learning_rate": 4.4315764925373136e-05, + "loss": 0.0, + "step": 48748 + }, + { + "epoch": 45.48, + "learning_rate": 4.431529850746269e-05, + "loss": 0.0, + "step": 48752 + }, + { + "epoch": 45.48, + "learning_rate": 4.431483208955224e-05, + "loss": 0.0, + "step": 48756 + }, + { + "epoch": 45.49, + "learning_rate": 4.4314365671641794e-05, + "loss": 0.0, + "step": 48760 + }, + { + "epoch": 45.49, + "learning_rate": 4.431389925373134e-05, + "loss": 0.0001, + "step": 48764 + }, + { + "epoch": 45.49, + "learning_rate": 4.43134328358209e-05, + "loss": 0.0, + "step": 48768 + }, + { + "epoch": 45.5, + "learning_rate": 4.431296641791045e-05, + "loss": 0.0, + "step": 48772 + }, + { + "epoch": 45.5, + "learning_rate": 4.43125e-05, + "loss": 0.0003, + "step": 48776 + }, + { + "epoch": 45.5, + "learning_rate": 4.4312033582089555e-05, + "loss": 0.0, + "step": 48780 + }, + { + "epoch": 45.51, + "learning_rate": 4.431156716417911e-05, + "loss": 0.0, + "step": 48784 + }, + { + "epoch": 45.51, + "learning_rate": 4.431110074626866e-05, + "loss": 0.0022, + "step": 48788 + }, + { + "epoch": 45.51, + "learning_rate": 4.431063432835821e-05, + "loss": 0.0019, + "step": 48792 + }, + { + "epoch": 45.52, + "learning_rate": 4.431016791044777e-05, + "loss": 0.0002, + "step": 48796 + }, + { + "epoch": 45.52, + "learning_rate": 4.4309701492537316e-05, + "loss": 0.0, + "step": 48800 + }, + { + "epoch": 45.53, + "learning_rate": 4.430923507462687e-05, + "loss": 0.0001, + "step": 48804 + }, + { + "epoch": 45.53, + "learning_rate": 4.430876865671642e-05, + "loss": 0.0, + "step": 48808 + }, + { + "epoch": 45.53, + "learning_rate": 4.4308302238805974e-05, + "loss": 0.0002, + "step": 48812 + }, + { + "epoch": 45.54, + "learning_rate": 4.430783582089553e-05, + "loss": 0.0, + "step": 48816 + }, + { + "epoch": 45.54, + "learning_rate": 4.4307369402985077e-05, + "loss": 0.0, + "step": 48820 + }, + { + "epoch": 45.54, + "learning_rate": 4.4306902985074625e-05, + "loss": 0.0, + "step": 48824 + }, + { + "epoch": 45.55, + "learning_rate": 4.4306436567164186e-05, + "loss": 0.0001, + "step": 48828 + }, + { + "epoch": 45.55, + "learning_rate": 4.4305970149253735e-05, + "loss": 0.005, + "step": 48832 + }, + { + "epoch": 45.56, + "learning_rate": 4.430550373134328e-05, + "loss": 0.0, + "step": 48836 + }, + { + "epoch": 45.56, + "learning_rate": 4.430503731343284e-05, + "loss": 0.0, + "step": 48840 + }, + { + "epoch": 45.56, + "learning_rate": 4.430457089552239e-05, + "loss": 0.0001, + "step": 48844 + }, + { + "epoch": 45.57, + "learning_rate": 4.430410447761194e-05, + "loss": 0.0, + "step": 48848 + }, + { + "epoch": 45.57, + "learning_rate": 4.4303638059701495e-05, + "loss": 0.0, + "step": 48852 + }, + { + "epoch": 45.57, + "learning_rate": 4.4303171641791044e-05, + "loss": 0.0, + "step": 48856 + }, + { + "epoch": 45.58, + "learning_rate": 4.43027052238806e-05, + "loss": 0.0, + "step": 48860 + }, + { + "epoch": 45.58, + "learning_rate": 4.4302238805970153e-05, + "loss": 0.0, + "step": 48864 + }, + { + "epoch": 45.59, + "learning_rate": 4.43017723880597e-05, + "loss": 0.0, + "step": 48868 + }, + { + "epoch": 45.59, + "learning_rate": 4.4301305970149256e-05, + "loss": 0.0001, + "step": 48872 + }, + { + "epoch": 45.59, + "learning_rate": 4.430083955223881e-05, + "loss": 0.0, + "step": 48876 + }, + { + "epoch": 45.6, + "learning_rate": 4.430037313432836e-05, + "loss": 0.0, + "step": 48880 + }, + { + "epoch": 45.6, + "learning_rate": 4.429990671641791e-05, + "loss": 0.0, + "step": 48884 + }, + { + "epoch": 45.6, + "learning_rate": 4.429944029850747e-05, + "loss": 0.0, + "step": 48888 + }, + { + "epoch": 45.61, + "learning_rate": 4.429897388059702e-05, + "loss": 0.0, + "step": 48892 + }, + { + "epoch": 45.61, + "learning_rate": 4.4298507462686566e-05, + "loss": 0.0002, + "step": 48896 + }, + { + "epoch": 45.62, + "learning_rate": 4.429804104477612e-05, + "loss": 0.0, + "step": 48900 + }, + { + "epoch": 45.62, + "learning_rate": 4.4297574626865675e-05, + "loss": 0.0, + "step": 48904 + }, + { + "epoch": 45.62, + "learning_rate": 4.4297108208955224e-05, + "loss": 0.0, + "step": 48908 + }, + { + "epoch": 45.63, + "learning_rate": 4.429664179104478e-05, + "loss": 0.0, + "step": 48912 + }, + { + "epoch": 45.63, + "learning_rate": 4.4296175373134327e-05, + "loss": 0.0057, + "step": 48916 + }, + { + "epoch": 45.63, + "learning_rate": 4.429570895522388e-05, + "loss": 0.0, + "step": 48920 + }, + { + "epoch": 45.64, + "learning_rate": 4.4295242537313436e-05, + "loss": 0.0002, + "step": 48924 + }, + { + "epoch": 45.64, + "learning_rate": 4.4294776119402985e-05, + "loss": 0.0003, + "step": 48928 + }, + { + "epoch": 45.65, + "learning_rate": 4.429430970149254e-05, + "loss": 0.0, + "step": 48932 + }, + { + "epoch": 45.65, + "learning_rate": 4.4293843283582094e-05, + "loss": 0.0003, + "step": 48936 + }, + { + "epoch": 45.65, + "learning_rate": 4.429337686567164e-05, + "loss": 0.0, + "step": 48940 + }, + { + "epoch": 45.66, + "learning_rate": 4.42929104477612e-05, + "loss": 0.0, + "step": 48944 + }, + { + "epoch": 45.66, + "learning_rate": 4.429244402985075e-05, + "loss": 0.0, + "step": 48948 + }, + { + "epoch": 45.66, + "learning_rate": 4.42919776119403e-05, + "loss": 0.0, + "step": 48952 + }, + { + "epoch": 45.67, + "learning_rate": 4.4291511194029855e-05, + "loss": 0.0, + "step": 48956 + }, + { + "epoch": 45.67, + "learning_rate": 4.4291044776119403e-05, + "loss": 0.0001, + "step": 48960 + }, + { + "epoch": 45.68, + "learning_rate": 4.429057835820896e-05, + "loss": 0.0, + "step": 48964 + }, + { + "epoch": 45.68, + "learning_rate": 4.429011194029851e-05, + "loss": 0.0, + "step": 48968 + }, + { + "epoch": 45.68, + "learning_rate": 4.428964552238806e-05, + "loss": 0.0004, + "step": 48972 + }, + { + "epoch": 45.69, + "learning_rate": 4.428917910447761e-05, + "loss": 0.0, + "step": 48976 + }, + { + "epoch": 45.69, + "learning_rate": 4.428871268656717e-05, + "loss": 0.0, + "step": 48980 + }, + { + "epoch": 45.69, + "learning_rate": 4.428824626865672e-05, + "loss": 0.0, + "step": 48984 + }, + { + "epoch": 45.7, + "learning_rate": 4.428777985074627e-05, + "loss": 0.0, + "step": 48988 + }, + { + "epoch": 45.7, + "learning_rate": 4.428731343283582e-05, + "loss": 0.0, + "step": 48992 + }, + { + "epoch": 45.71, + "learning_rate": 4.428684701492538e-05, + "loss": 0.0, + "step": 48996 + }, + { + "epoch": 45.71, + "learning_rate": 4.4286380597014925e-05, + "loss": 0.001, + "step": 49000 + }, + { + "epoch": 45.71, + "eval_exact_match": 0.7321083172147002, + "eval_exec": 0.7707930367504836, + "eval_loss": 0.4455174505710602, + "eval_runtime": 1081.8406, + "eval_samples_per_second": 0.956, + "step": 49000 + }, + { + "epoch": 45.71, + "learning_rate": 4.428591417910448e-05, + "loss": 0.0, + "step": 49004 + }, + { + "epoch": 45.72, + "learning_rate": 4.4285447761194035e-05, + "loss": 0.0001, + "step": 49008 + }, + { + "epoch": 45.72, + "learning_rate": 4.428498134328358e-05, + "loss": 0.0005, + "step": 49012 + }, + { + "epoch": 45.72, + "learning_rate": 4.428451492537314e-05, + "loss": 0.0, + "step": 49016 + }, + { + "epoch": 45.73, + "learning_rate": 4.4284048507462686e-05, + "loss": 0.0001, + "step": 49020 + }, + { + "epoch": 45.73, + "learning_rate": 4.428358208955224e-05, + "loss": 0.0, + "step": 49024 + }, + { + "epoch": 45.73, + "learning_rate": 4.4283115671641796e-05, + "loss": 0.0, + "step": 49028 + }, + { + "epoch": 45.74, + "learning_rate": 4.4282649253731344e-05, + "loss": 0.0002, + "step": 49032 + }, + { + "epoch": 45.74, + "learning_rate": 4.428218283582089e-05, + "loss": 0.0002, + "step": 49036 + }, + { + "epoch": 45.75, + "learning_rate": 4.4281716417910454e-05, + "loss": 0.0, + "step": 49040 + }, + { + "epoch": 45.75, + "learning_rate": 4.428125e-05, + "loss": 0.0, + "step": 49044 + }, + { + "epoch": 45.75, + "learning_rate": 4.428078358208955e-05, + "loss": 0.0, + "step": 49048 + }, + { + "epoch": 45.76, + "learning_rate": 4.4280317164179105e-05, + "loss": 0.0, + "step": 49052 + }, + { + "epoch": 45.76, + "learning_rate": 4.427985074626866e-05, + "loss": 0.0001, + "step": 49056 + }, + { + "epoch": 45.76, + "learning_rate": 4.427938432835821e-05, + "loss": 0.0003, + "step": 49060 + }, + { + "epoch": 45.77, + "learning_rate": 4.427891791044776e-05, + "loss": 0.0, + "step": 49064 + }, + { + "epoch": 45.77, + "learning_rate": 4.427845149253732e-05, + "loss": 0.0, + "step": 49068 + }, + { + "epoch": 45.78, + "learning_rate": 4.4277985074626866e-05, + "loss": 0.0001, + "step": 49072 + }, + { + "epoch": 45.78, + "learning_rate": 4.427751865671642e-05, + "loss": 0.0001, + "step": 49076 + }, + { + "epoch": 45.78, + "learning_rate": 4.427705223880597e-05, + "loss": 0.0, + "step": 49080 + }, + { + "epoch": 45.79, + "learning_rate": 4.4276585820895524e-05, + "loss": 0.0, + "step": 49084 + }, + { + "epoch": 45.79, + "learning_rate": 4.427611940298508e-05, + "loss": 0.0001, + "step": 49088 + }, + { + "epoch": 45.79, + "learning_rate": 4.427565298507463e-05, + "loss": 0.0011, + "step": 49092 + }, + { + "epoch": 45.8, + "learning_rate": 4.4275186567164175e-05, + "loss": 0.0001, + "step": 49096 + }, + { + "epoch": 45.8, + "learning_rate": 4.427472014925374e-05, + "loss": 0.0, + "step": 49100 + }, + { + "epoch": 45.81, + "learning_rate": 4.4274253731343285e-05, + "loss": 0.0, + "step": 49104 + }, + { + "epoch": 45.81, + "learning_rate": 4.427378731343284e-05, + "loss": 0.0, + "step": 49108 + }, + { + "epoch": 45.81, + "learning_rate": 4.427332089552239e-05, + "loss": 0.0, + "step": 49112 + }, + { + "epoch": 45.82, + "learning_rate": 4.427285447761194e-05, + "loss": 0.0001, + "step": 49116 + }, + { + "epoch": 45.82, + "learning_rate": 4.42723880597015e-05, + "loss": 0.0, + "step": 49120 + }, + { + "epoch": 45.82, + "learning_rate": 4.4271921641791046e-05, + "loss": 0.0005, + "step": 49124 + }, + { + "epoch": 45.83, + "learning_rate": 4.42714552238806e-05, + "loss": 0.0001, + "step": 49128 + }, + { + "epoch": 45.83, + "learning_rate": 4.4270988805970156e-05, + "loss": 0.0, + "step": 49132 + }, + { + "epoch": 45.84, + "learning_rate": 4.4270522388059704e-05, + "loss": 0.0, + "step": 49136 + }, + { + "epoch": 45.84, + "learning_rate": 4.427005597014925e-05, + "loss": 0.0, + "step": 49140 + }, + { + "epoch": 45.84, + "learning_rate": 4.426958955223881e-05, + "loss": 0.0005, + "step": 49144 + }, + { + "epoch": 45.85, + "learning_rate": 4.426912313432836e-05, + "loss": 0.0, + "step": 49148 + }, + { + "epoch": 45.85, + "learning_rate": 4.426865671641791e-05, + "loss": 0.0001, + "step": 49152 + }, + { + "epoch": 45.85, + "learning_rate": 4.4268190298507465e-05, + "loss": 0.0, + "step": 49156 + }, + { + "epoch": 45.86, + "learning_rate": 4.426772388059702e-05, + "loss": 0.0, + "step": 49160 + }, + { + "epoch": 45.86, + "learning_rate": 4.426725746268657e-05, + "loss": 0.0, + "step": 49164 + }, + { + "epoch": 45.87, + "learning_rate": 4.426679104477612e-05, + "loss": 0.0, + "step": 49168 + }, + { + "epoch": 45.87, + "learning_rate": 4.426632462686567e-05, + "loss": 0.0001, + "step": 49172 + }, + { + "epoch": 45.87, + "learning_rate": 4.4265858208955226e-05, + "loss": 0.0, + "step": 49176 + }, + { + "epoch": 45.88, + "learning_rate": 4.426539179104478e-05, + "loss": 0.0014, + "step": 49180 + }, + { + "epoch": 45.88, + "learning_rate": 4.426492537313433e-05, + "loss": 0.0, + "step": 49184 + }, + { + "epoch": 45.88, + "learning_rate": 4.4264458955223884e-05, + "loss": 0.0, + "step": 49188 + }, + { + "epoch": 45.89, + "learning_rate": 4.426399253731344e-05, + "loss": 0.0, + "step": 49192 + }, + { + "epoch": 45.89, + "learning_rate": 4.426352611940299e-05, + "loss": 0.0012, + "step": 49196 + }, + { + "epoch": 45.9, + "learning_rate": 4.4263059701492535e-05, + "loss": 0.0, + "step": 49200 + }, + { + "epoch": 45.9, + "learning_rate": 4.426259328358209e-05, + "loss": 0.0, + "step": 49204 + }, + { + "epoch": 45.9, + "learning_rate": 4.4262126865671645e-05, + "loss": 0.0, + "step": 49208 + }, + { + "epoch": 45.91, + "learning_rate": 4.426166044776119e-05, + "loss": 0.0, + "step": 49212 + }, + { + "epoch": 45.91, + "learning_rate": 4.426119402985075e-05, + "loss": 0.0, + "step": 49216 + }, + { + "epoch": 45.91, + "learning_rate": 4.42607276119403e-05, + "loss": 0.0003, + "step": 49220 + }, + { + "epoch": 45.92, + "learning_rate": 4.426026119402985e-05, + "loss": 0.0004, + "step": 49224 + }, + { + "epoch": 45.92, + "learning_rate": 4.4259794776119406e-05, + "loss": 0.0, + "step": 49228 + }, + { + "epoch": 45.93, + "learning_rate": 4.4259328358208954e-05, + "loss": 0.0, + "step": 49232 + }, + { + "epoch": 45.93, + "learning_rate": 4.425886194029851e-05, + "loss": 0.0021, + "step": 49236 + }, + { + "epoch": 45.93, + "learning_rate": 4.4258395522388064e-05, + "loss": 0.0017, + "step": 49240 + }, + { + "epoch": 45.94, + "learning_rate": 4.425792910447761e-05, + "loss": 0.0001, + "step": 49244 + }, + { + "epoch": 45.94, + "learning_rate": 4.425746268656717e-05, + "loss": 0.0, + "step": 49248 + }, + { + "epoch": 45.94, + "learning_rate": 4.425699626865672e-05, + "loss": 0.0017, + "step": 49252 + }, + { + "epoch": 45.95, + "learning_rate": 4.425652985074627e-05, + "loss": 0.0, + "step": 49256 + }, + { + "epoch": 45.95, + "learning_rate": 4.425606343283582e-05, + "loss": 0.0, + "step": 49260 + }, + { + "epoch": 45.96, + "learning_rate": 4.425559701492537e-05, + "loss": 0.0001, + "step": 49264 + }, + { + "epoch": 45.96, + "learning_rate": 4.425513059701493e-05, + "loss": 0.0, + "step": 49268 + }, + { + "epoch": 45.96, + "learning_rate": 4.425466417910448e-05, + "loss": 0.0034, + "step": 49272 + }, + { + "epoch": 45.97, + "learning_rate": 4.425419776119403e-05, + "loss": 0.0, + "step": 49276 + }, + { + "epoch": 45.97, + "learning_rate": 4.4253731343283586e-05, + "loss": 0.0001, + "step": 49280 + }, + { + "epoch": 45.97, + "learning_rate": 4.425326492537314e-05, + "loss": 0.0, + "step": 49284 + }, + { + "epoch": 45.98, + "learning_rate": 4.425279850746269e-05, + "loss": 0.0, + "step": 49288 + }, + { + "epoch": 45.98, + "learning_rate": 4.425233208955224e-05, + "loss": 0.0004, + "step": 49292 + }, + { + "epoch": 45.98, + "learning_rate": 4.42518656716418e-05, + "loss": 0.0001, + "step": 49296 + }, + { + "epoch": 45.99, + "learning_rate": 4.425139925373135e-05, + "loss": 0.0, + "step": 49300 + }, + { + "epoch": 45.99, + "learning_rate": 4.4250932835820895e-05, + "loss": 0.0, + "step": 49304 + }, + { + "epoch": 46.0, + "learning_rate": 4.425046641791045e-05, + "loss": 0.0039, + "step": 49308 + }, + { + "epoch": 46.0, + "learning_rate": 4.4250000000000005e-05, + "loss": 0.0, + "step": 49312 + }, + { + "epoch": 46.0, + "learning_rate": 4.424953358208955e-05, + "loss": 0.0001, + "step": 49316 + }, + { + "epoch": 46.01, + "learning_rate": 4.424906716417911e-05, + "loss": 0.0008, + "step": 49320 + }, + { + "epoch": 46.01, + "learning_rate": 4.4248600746268656e-05, + "loss": 0.0, + "step": 49324 + }, + { + "epoch": 46.01, + "learning_rate": 4.424813432835821e-05, + "loss": 0.0004, + "step": 49328 + }, + { + "epoch": 46.02, + "learning_rate": 4.4247667910447766e-05, + "loss": 0.0, + "step": 49332 + }, + { + "epoch": 46.02, + "learning_rate": 4.4247201492537314e-05, + "loss": 0.0, + "step": 49336 + }, + { + "epoch": 46.03, + "learning_rate": 4.424673507462687e-05, + "loss": 0.0, + "step": 49340 + }, + { + "epoch": 46.03, + "learning_rate": 4.4246268656716424e-05, + "loss": 0.0, + "step": 49344 + }, + { + "epoch": 46.03, + "learning_rate": 4.424580223880597e-05, + "loss": 0.0, + "step": 49348 + }, + { + "epoch": 46.04, + "learning_rate": 4.424533582089552e-05, + "loss": 0.0, + "step": 49352 + }, + { + "epoch": 46.04, + "learning_rate": 4.424486940298508e-05, + "loss": 0.0, + "step": 49356 + }, + { + "epoch": 46.04, + "learning_rate": 4.424440298507463e-05, + "loss": 0.0, + "step": 49360 + }, + { + "epoch": 46.05, + "learning_rate": 4.424393656716418e-05, + "loss": 0.0, + "step": 49364 + }, + { + "epoch": 46.05, + "learning_rate": 4.424347014925373e-05, + "loss": 0.0065, + "step": 49368 + }, + { + "epoch": 46.06, + "learning_rate": 4.424300373134329e-05, + "loss": 0.0, + "step": 49372 + }, + { + "epoch": 46.06, + "learning_rate": 4.4242537313432836e-05, + "loss": 0.0, + "step": 49376 + }, + { + "epoch": 46.06, + "learning_rate": 4.424207089552239e-05, + "loss": 0.0, + "step": 49380 + }, + { + "epoch": 46.07, + "learning_rate": 4.424160447761194e-05, + "loss": 0.0014, + "step": 49384 + }, + { + "epoch": 46.07, + "learning_rate": 4.4241138059701494e-05, + "loss": 0.0, + "step": 49388 + }, + { + "epoch": 46.07, + "learning_rate": 4.424067164179105e-05, + "loss": 0.0, + "step": 49392 + }, + { + "epoch": 46.08, + "learning_rate": 4.42402052238806e-05, + "loss": 0.0001, + "step": 49396 + }, + { + "epoch": 46.08, + "learning_rate": 4.423973880597015e-05, + "loss": 0.0001, + "step": 49400 + }, + { + "epoch": 46.09, + "learning_rate": 4.423927238805971e-05, + "loss": 0.0, + "step": 49404 + }, + { + "epoch": 46.09, + "learning_rate": 4.4238805970149255e-05, + "loss": 0.0, + "step": 49408 + }, + { + "epoch": 46.09, + "learning_rate": 4.42383395522388e-05, + "loss": 0.0, + "step": 49412 + }, + { + "epoch": 46.1, + "learning_rate": 4.4237873134328365e-05, + "loss": 0.0, + "step": 49416 + }, + { + "epoch": 46.1, + "learning_rate": 4.423740671641791e-05, + "loss": 0.0, + "step": 49420 + }, + { + "epoch": 46.1, + "learning_rate": 4.423694029850746e-05, + "loss": 0.0001, + "step": 49424 + }, + { + "epoch": 46.11, + "learning_rate": 4.4236473880597016e-05, + "loss": 0.0, + "step": 49428 + }, + { + "epoch": 46.11, + "learning_rate": 4.423600746268657e-05, + "loss": 0.0, + "step": 49432 + }, + { + "epoch": 46.12, + "learning_rate": 4.4235541044776126e-05, + "loss": 0.0, + "step": 49436 + }, + { + "epoch": 46.12, + "learning_rate": 4.4235074626865674e-05, + "loss": 0.0, + "step": 49440 + }, + { + "epoch": 46.12, + "learning_rate": 4.423460820895522e-05, + "loss": 0.0, + "step": 49444 + }, + { + "epoch": 46.13, + "learning_rate": 4.4234141791044784e-05, + "loss": 0.0001, + "step": 49448 + }, + { + "epoch": 46.13, + "learning_rate": 4.423367537313433e-05, + "loss": 0.0002, + "step": 49452 + }, + { + "epoch": 46.13, + "learning_rate": 4.423320895522388e-05, + "loss": 0.0, + "step": 49456 + }, + { + "epoch": 46.14, + "learning_rate": 4.4232742537313435e-05, + "loss": 0.0001, + "step": 49460 + }, + { + "epoch": 46.14, + "learning_rate": 4.423227611940299e-05, + "loss": 0.0061, + "step": 49464 + }, + { + "epoch": 46.15, + "learning_rate": 4.423180970149254e-05, + "loss": 0.0004, + "step": 49468 + }, + { + "epoch": 46.15, + "learning_rate": 4.423134328358209e-05, + "loss": 0.0, + "step": 49472 + }, + { + "epoch": 46.15, + "learning_rate": 4.423087686567165e-05, + "loss": 0.0001, + "step": 49476 + }, + { + "epoch": 46.16, + "learning_rate": 4.4230410447761196e-05, + "loss": 0.0, + "step": 49480 + }, + { + "epoch": 46.16, + "learning_rate": 4.422994402985075e-05, + "loss": 0.0, + "step": 49484 + }, + { + "epoch": 46.16, + "learning_rate": 4.42294776119403e-05, + "loss": 0.0002, + "step": 49488 + }, + { + "epoch": 46.17, + "learning_rate": 4.4229011194029854e-05, + "loss": 0.0, + "step": 49492 + }, + { + "epoch": 46.17, + "learning_rate": 4.422854477611941e-05, + "loss": 0.0, + "step": 49496 + }, + { + "epoch": 46.18, + "learning_rate": 4.422807835820896e-05, + "loss": 0.0006, + "step": 49500 + }, + { + "epoch": 46.18, + "eval_exact_match": 0.7214700193423598, + "eval_exec": 0.7572533849129593, + "eval_loss": 0.45067957043647766, + "eval_runtime": 1072.9162, + "eval_samples_per_second": 0.964, + "step": 49500 + }, + { + "epoch": 46.18, + "learning_rate": 4.4227611940298505e-05, + "loss": 0.0001, + "step": 49504 + }, + { + "epoch": 46.18, + "learning_rate": 4.422714552238807e-05, + "loss": 0.0, + "step": 49508 + }, + { + "epoch": 46.19, + "learning_rate": 4.4226679104477615e-05, + "loss": 0.0001, + "step": 49512 + }, + { + "epoch": 46.19, + "learning_rate": 4.422621268656716e-05, + "loss": 0.0, + "step": 49516 + }, + { + "epoch": 46.19, + "learning_rate": 4.422574626865672e-05, + "loss": 0.0001, + "step": 49520 + }, + { + "epoch": 46.2, + "learning_rate": 4.422527985074627e-05, + "loss": 0.0009, + "step": 49524 + }, + { + "epoch": 46.2, + "learning_rate": 4.422481343283582e-05, + "loss": 0.0, + "step": 49528 + }, + { + "epoch": 46.21, + "learning_rate": 4.4224347014925376e-05, + "loss": 0.0, + "step": 49532 + }, + { + "epoch": 46.21, + "learning_rate": 4.4223880597014924e-05, + "loss": 0.0, + "step": 49536 + }, + { + "epoch": 46.21, + "learning_rate": 4.422341417910448e-05, + "loss": 0.0001, + "step": 49540 + }, + { + "epoch": 46.22, + "learning_rate": 4.4222947761194034e-05, + "loss": 0.0, + "step": 49544 + }, + { + "epoch": 46.22, + "learning_rate": 4.422248134328358e-05, + "loss": 0.0009, + "step": 49548 + }, + { + "epoch": 46.22, + "learning_rate": 4.422201492537314e-05, + "loss": 0.0003, + "step": 49552 + }, + { + "epoch": 46.23, + "learning_rate": 4.422154850746269e-05, + "loss": 0.0022, + "step": 49556 + }, + { + "epoch": 46.23, + "learning_rate": 4.422108208955224e-05, + "loss": 0.0003, + "step": 49560 + }, + { + "epoch": 46.24, + "learning_rate": 4.422061567164179e-05, + "loss": 0.0001, + "step": 49564 + }, + { + "epoch": 46.24, + "learning_rate": 4.422014925373135e-05, + "loss": 0.0, + "step": 49568 + }, + { + "epoch": 46.24, + "learning_rate": 4.42196828358209e-05, + "loss": 0.0003, + "step": 49572 + }, + { + "epoch": 46.25, + "learning_rate": 4.4219216417910446e-05, + "loss": 0.0, + "step": 49576 + }, + { + "epoch": 46.25, + "learning_rate": 4.421875e-05, + "loss": 0.0, + "step": 49580 + }, + { + "epoch": 46.25, + "learning_rate": 4.4218283582089556e-05, + "loss": 0.0, + "step": 49584 + }, + { + "epoch": 46.26, + "learning_rate": 4.4217817164179104e-05, + "loss": 0.0004, + "step": 49588 + }, + { + "epoch": 46.26, + "learning_rate": 4.421735074626866e-05, + "loss": 0.0, + "step": 49592 + }, + { + "epoch": 46.26, + "learning_rate": 4.421688432835821e-05, + "loss": 0.0, + "step": 49596 + }, + { + "epoch": 46.27, + "learning_rate": 4.421641791044777e-05, + "loss": 0.0, + "step": 49600 + }, + { + "epoch": 46.27, + "learning_rate": 4.421595149253732e-05, + "loss": 0.0006, + "step": 49604 + }, + { + "epoch": 46.28, + "learning_rate": 4.4215485074626865e-05, + "loss": 0.0, + "step": 49608 + }, + { + "epoch": 46.28, + "learning_rate": 4.421501865671642e-05, + "loss": 0.0, + "step": 49612 + }, + { + "epoch": 46.28, + "learning_rate": 4.4214552238805975e-05, + "loss": 0.0008, + "step": 49616 + }, + { + "epoch": 46.29, + "learning_rate": 4.421408582089552e-05, + "loss": 0.0, + "step": 49620 + }, + { + "epoch": 46.29, + "learning_rate": 4.421361940298508e-05, + "loss": 0.0, + "step": 49624 + }, + { + "epoch": 46.29, + "learning_rate": 4.421315298507463e-05, + "loss": 0.0, + "step": 49628 + }, + { + "epoch": 46.3, + "learning_rate": 4.421268656716418e-05, + "loss": 0.0032, + "step": 49632 + }, + { + "epoch": 46.3, + "learning_rate": 4.4212220149253736e-05, + "loss": 0.0, + "step": 49636 + }, + { + "epoch": 46.31, + "learning_rate": 4.4211753731343284e-05, + "loss": 0.0001, + "step": 49640 + }, + { + "epoch": 46.31, + "learning_rate": 4.421128731343284e-05, + "loss": 0.0003, + "step": 49644 + }, + { + "epoch": 46.31, + "learning_rate": 4.4210820895522394e-05, + "loss": 0.0011, + "step": 49648 + }, + { + "epoch": 46.32, + "learning_rate": 4.421035447761194e-05, + "loss": 0.0, + "step": 49652 + }, + { + "epoch": 46.32, + "learning_rate": 4.420988805970149e-05, + "loss": 0.0, + "step": 49656 + }, + { + "epoch": 46.32, + "learning_rate": 4.420942164179105e-05, + "loss": 0.0, + "step": 49660 + }, + { + "epoch": 46.33, + "learning_rate": 4.42089552238806e-05, + "loss": 0.0, + "step": 49664 + }, + { + "epoch": 46.33, + "learning_rate": 4.420848880597015e-05, + "loss": 0.0004, + "step": 49668 + }, + { + "epoch": 46.34, + "learning_rate": 4.42080223880597e-05, + "loss": 0.0, + "step": 49672 + }, + { + "epoch": 46.34, + "learning_rate": 4.420755597014926e-05, + "loss": 0.0007, + "step": 49676 + }, + { + "epoch": 46.34, + "learning_rate": 4.4207089552238806e-05, + "loss": 0.0, + "step": 49680 + }, + { + "epoch": 46.35, + "learning_rate": 4.420662313432836e-05, + "loss": 0.0001, + "step": 49684 + }, + { + "epoch": 46.35, + "learning_rate": 4.4206156716417916e-05, + "loss": 0.0, + "step": 49688 + }, + { + "epoch": 46.35, + "learning_rate": 4.4205690298507464e-05, + "loss": 0.0006, + "step": 49692 + }, + { + "epoch": 46.36, + "learning_rate": 4.420522388059702e-05, + "loss": 0.0027, + "step": 49696 + }, + { + "epoch": 46.36, + "learning_rate": 4.420475746268657e-05, + "loss": 0.007, + "step": 49700 + }, + { + "epoch": 46.37, + "learning_rate": 4.420429104477612e-05, + "loss": 0.0, + "step": 49704 + }, + { + "epoch": 46.37, + "learning_rate": 4.4203824626865677e-05, + "loss": 0.0, + "step": 49708 + }, + { + "epoch": 46.37, + "learning_rate": 4.4203358208955225e-05, + "loss": 0.0001, + "step": 49712 + }, + { + "epoch": 46.38, + "learning_rate": 4.420289179104477e-05, + "loss": 0.0, + "step": 49716 + }, + { + "epoch": 46.38, + "learning_rate": 4.4202425373134334e-05, + "loss": 0.0003, + "step": 49720 + }, + { + "epoch": 46.38, + "learning_rate": 4.420195895522388e-05, + "loss": 0.0004, + "step": 49724 + }, + { + "epoch": 46.39, + "learning_rate": 4.420149253731343e-05, + "loss": 0.0, + "step": 49728 + }, + { + "epoch": 46.39, + "learning_rate": 4.4201026119402986e-05, + "loss": 0.0, + "step": 49732 + }, + { + "epoch": 46.4, + "learning_rate": 4.420055970149254e-05, + "loss": 0.0, + "step": 49736 + }, + { + "epoch": 46.4, + "learning_rate": 4.420009328358209e-05, + "loss": 0.0, + "step": 49740 + }, + { + "epoch": 46.4, + "learning_rate": 4.4199626865671644e-05, + "loss": 0.0, + "step": 49744 + }, + { + "epoch": 46.41, + "learning_rate": 4.41991604477612e-05, + "loss": 0.0, + "step": 49748 + }, + { + "epoch": 46.41, + "learning_rate": 4.419869402985075e-05, + "loss": 0.0, + "step": 49752 + }, + { + "epoch": 46.41, + "learning_rate": 4.41982276119403e-05, + "loss": 0.0, + "step": 49756 + }, + { + "epoch": 46.42, + "learning_rate": 4.419776119402985e-05, + "loss": 0.0004, + "step": 49760 + }, + { + "epoch": 46.42, + "learning_rate": 4.4197294776119405e-05, + "loss": 0.0001, + "step": 49764 + }, + { + "epoch": 46.43, + "learning_rate": 4.419682835820896e-05, + "loss": 0.0, + "step": 49768 + }, + { + "epoch": 46.43, + "learning_rate": 4.419636194029851e-05, + "loss": 0.0, + "step": 49772 + }, + { + "epoch": 46.43, + "learning_rate": 4.419589552238806e-05, + "loss": 0.0, + "step": 49776 + }, + { + "epoch": 46.44, + "learning_rate": 4.419542910447762e-05, + "loss": 0.0001, + "step": 49780 + }, + { + "epoch": 46.44, + "learning_rate": 4.4194962686567166e-05, + "loss": 0.0, + "step": 49784 + }, + { + "epoch": 46.44, + "learning_rate": 4.419449626865672e-05, + "loss": 0.0, + "step": 49788 + }, + { + "epoch": 46.45, + "learning_rate": 4.419402985074627e-05, + "loss": 0.0001, + "step": 49792 + }, + { + "epoch": 46.45, + "learning_rate": 4.4193563432835823e-05, + "loss": 0.0, + "step": 49796 + }, + { + "epoch": 46.46, + "learning_rate": 4.419309701492538e-05, + "loss": 0.0, + "step": 49800 + }, + { + "epoch": 46.46, + "learning_rate": 4.4192630597014927e-05, + "loss": 0.0, + "step": 49804 + }, + { + "epoch": 46.46, + "learning_rate": 4.419216417910448e-05, + "loss": 0.0, + "step": 49808 + }, + { + "epoch": 46.47, + "learning_rate": 4.4191697761194036e-05, + "loss": 0.0, + "step": 49812 + }, + { + "epoch": 46.47, + "learning_rate": 4.4191231343283584e-05, + "loss": 0.0, + "step": 49816 + }, + { + "epoch": 46.47, + "learning_rate": 4.419076492537313e-05, + "loss": 0.001, + "step": 49820 + }, + { + "epoch": 46.48, + "learning_rate": 4.419029850746269e-05, + "loss": 0.0, + "step": 49824 + }, + { + "epoch": 46.48, + "learning_rate": 4.418983208955224e-05, + "loss": 0.0006, + "step": 49828 + }, + { + "epoch": 46.49, + "learning_rate": 4.418936567164179e-05, + "loss": 0.0, + "step": 49832 + }, + { + "epoch": 46.49, + "learning_rate": 4.4188899253731345e-05, + "loss": 0.001, + "step": 49836 + }, + { + "epoch": 46.49, + "learning_rate": 4.41884328358209e-05, + "loss": 0.0, + "step": 49840 + }, + { + "epoch": 46.5, + "learning_rate": 4.418796641791045e-05, + "loss": 0.0, + "step": 49844 + }, + { + "epoch": 46.5, + "learning_rate": 4.4187500000000003e-05, + "loss": 0.0, + "step": 49848 + }, + { + "epoch": 46.5, + "learning_rate": 4.418703358208955e-05, + "loss": 0.0013, + "step": 49852 + }, + { + "epoch": 46.51, + "learning_rate": 4.4186567164179106e-05, + "loss": 0.0, + "step": 49856 + }, + { + "epoch": 46.51, + "learning_rate": 4.418610074626866e-05, + "loss": 0.0, + "step": 49860 + }, + { + "epoch": 46.51, + "learning_rate": 4.418563432835821e-05, + "loss": 0.0004, + "step": 49864 + }, + { + "epoch": 46.52, + "learning_rate": 4.4185167910447764e-05, + "loss": 0.0, + "step": 49868 + }, + { + "epoch": 46.52, + "learning_rate": 4.418470149253732e-05, + "loss": 0.0006, + "step": 49872 + }, + { + "epoch": 46.53, + "learning_rate": 4.418423507462687e-05, + "loss": 0.0, + "step": 49876 + }, + { + "epoch": 46.53, + "learning_rate": 4.4183768656716416e-05, + "loss": 0.0, + "step": 49880 + }, + { + "epoch": 46.53, + "learning_rate": 4.418330223880597e-05, + "loss": 0.0001, + "step": 49884 + }, + { + "epoch": 46.54, + "learning_rate": 4.4182835820895525e-05, + "loss": 0.0, + "step": 49888 + }, + { + "epoch": 46.54, + "learning_rate": 4.4182369402985074e-05, + "loss": 0.0, + "step": 49892 + }, + { + "epoch": 46.54, + "learning_rate": 4.418190298507463e-05, + "loss": 0.0013, + "step": 49896 + }, + { + "epoch": 46.55, + "learning_rate": 4.418143656716418e-05, + "loss": 0.0, + "step": 49900 + }, + { + "epoch": 46.55, + "learning_rate": 4.418097014925373e-05, + "loss": 0.0, + "step": 49904 + }, + { + "epoch": 46.56, + "learning_rate": 4.4180503731343286e-05, + "loss": 0.0012, + "step": 49908 + }, + { + "epoch": 46.56, + "learning_rate": 4.4180037313432835e-05, + "loss": 0.0005, + "step": 49912 + }, + { + "epoch": 46.56, + "learning_rate": 4.417957089552239e-05, + "loss": 0.0, + "step": 49916 + }, + { + "epoch": 46.57, + "learning_rate": 4.4179104477611944e-05, + "loss": 0.0027, + "step": 49920 + }, + { + "epoch": 46.57, + "learning_rate": 4.417863805970149e-05, + "loss": 0.0001, + "step": 49924 + }, + { + "epoch": 46.57, + "learning_rate": 4.417817164179105e-05, + "loss": 0.0, + "step": 49928 + }, + { + "epoch": 46.58, + "learning_rate": 4.41777052238806e-05, + "loss": 0.0, + "step": 49932 + }, + { + "epoch": 46.58, + "learning_rate": 4.417723880597015e-05, + "loss": 0.0002, + "step": 49936 + }, + { + "epoch": 46.59, + "learning_rate": 4.4176772388059705e-05, + "loss": 0.0, + "step": 49940 + }, + { + "epoch": 46.59, + "learning_rate": 4.4176305970149253e-05, + "loss": 0.0, + "step": 49944 + }, + { + "epoch": 46.59, + "learning_rate": 4.417583955223881e-05, + "loss": 0.0028, + "step": 49948 + }, + { + "epoch": 46.6, + "learning_rate": 4.417537313432836e-05, + "loss": 0.0, + "step": 49952 + }, + { + "epoch": 46.6, + "learning_rate": 4.417490671641791e-05, + "loss": 0.0002, + "step": 49956 + }, + { + "epoch": 46.6, + "learning_rate": 4.4174440298507466e-05, + "loss": 0.0001, + "step": 49960 + }, + { + "epoch": 46.61, + "learning_rate": 4.417397388059702e-05, + "loss": 0.0, + "step": 49964 + }, + { + "epoch": 46.61, + "learning_rate": 4.417350746268657e-05, + "loss": 0.0, + "step": 49968 + }, + { + "epoch": 46.62, + "learning_rate": 4.417304104477612e-05, + "loss": 0.0, + "step": 49972 + }, + { + "epoch": 46.62, + "learning_rate": 4.417257462686568e-05, + "loss": 0.0, + "step": 49976 + }, + { + "epoch": 46.62, + "learning_rate": 4.417210820895523e-05, + "loss": 0.0004, + "step": 49980 + }, + { + "epoch": 46.63, + "learning_rate": 4.4171641791044775e-05, + "loss": 0.0001, + "step": 49984 + }, + { + "epoch": 46.63, + "learning_rate": 4.417117537313433e-05, + "loss": 0.0002, + "step": 49988 + }, + { + "epoch": 46.63, + "learning_rate": 4.4170708955223885e-05, + "loss": 0.0, + "step": 49992 + }, + { + "epoch": 46.64, + "learning_rate": 4.417024253731343e-05, + "loss": 0.0, + "step": 49996 + }, + { + "epoch": 46.64, + "learning_rate": 4.416977611940299e-05, + "loss": 0.0001, + "step": 50000 + }, + { + "epoch": 46.64, + "eval_exact_match": 0.7156673114119922, + "eval_exec": 0.7485493230174082, + "eval_loss": 0.4576376974582672, + "eval_runtime": 1189.333, + "eval_samples_per_second": 0.869, + "step": 50000 + }, + { + "epoch": 46.65, + "learning_rate": 4.4169309701492536e-05, + "loss": 0.0001, + "step": 50004 + }, + { + "epoch": 46.65, + "learning_rate": 4.416884328358209e-05, + "loss": 0.0004, + "step": 50008 + }, + { + "epoch": 46.65, + "learning_rate": 4.4168376865671646e-05, + "loss": 0.0, + "step": 50012 + }, + { + "epoch": 46.66, + "learning_rate": 4.4167910447761194e-05, + "loss": 0.0, + "step": 50016 + }, + { + "epoch": 46.66, + "learning_rate": 4.416744402985075e-05, + "loss": 0.0001, + "step": 50020 + }, + { + "epoch": 46.66, + "learning_rate": 4.4166977611940304e-05, + "loss": 0.0, + "step": 50024 + }, + { + "epoch": 46.67, + "learning_rate": 4.416651119402985e-05, + "loss": 0.0, + "step": 50028 + }, + { + "epoch": 46.67, + "learning_rate": 4.41660447761194e-05, + "loss": 0.0, + "step": 50032 + }, + { + "epoch": 46.68, + "learning_rate": 4.416557835820896e-05, + "loss": 0.0002, + "step": 50036 + }, + { + "epoch": 46.68, + "learning_rate": 4.416511194029851e-05, + "loss": 0.0, + "step": 50040 + }, + { + "epoch": 46.68, + "learning_rate": 4.416464552238806e-05, + "loss": 0.0, + "step": 50044 + }, + { + "epoch": 46.69, + "learning_rate": 4.416417910447761e-05, + "loss": 0.0041, + "step": 50048 + }, + { + "epoch": 46.69, + "learning_rate": 4.416371268656717e-05, + "loss": 0.0001, + "step": 50052 + }, + { + "epoch": 46.69, + "learning_rate": 4.4163246268656716e-05, + "loss": 0.0001, + "step": 50056 + }, + { + "epoch": 46.7, + "learning_rate": 4.416277985074627e-05, + "loss": 0.0019, + "step": 50060 + }, + { + "epoch": 46.7, + "learning_rate": 4.416231343283582e-05, + "loss": 0.0, + "step": 50064 + }, + { + "epoch": 46.71, + "learning_rate": 4.4161847014925374e-05, + "loss": 0.0, + "step": 50068 + }, + { + "epoch": 46.71, + "learning_rate": 4.416138059701493e-05, + "loss": 0.0003, + "step": 50072 + }, + { + "epoch": 46.71, + "learning_rate": 4.416091417910448e-05, + "loss": 0.0, + "step": 50076 + }, + { + "epoch": 46.72, + "learning_rate": 4.416044776119403e-05, + "loss": 0.0, + "step": 50080 + }, + { + "epoch": 46.72, + "learning_rate": 4.415998134328359e-05, + "loss": 0.0, + "step": 50084 + }, + { + "epoch": 46.72, + "learning_rate": 4.4159514925373135e-05, + "loss": 0.0026, + "step": 50088 + }, + { + "epoch": 46.73, + "learning_rate": 4.415904850746269e-05, + "loss": 0.0001, + "step": 50092 + }, + { + "epoch": 46.73, + "learning_rate": 4.4158582089552245e-05, + "loss": 0.0, + "step": 50096 + }, + { + "epoch": 46.73, + "learning_rate": 4.415811567164179e-05, + "loss": 0.0, + "step": 50100 + }, + { + "epoch": 46.74, + "learning_rate": 4.415764925373135e-05, + "loss": 0.0, + "step": 50104 + }, + { + "epoch": 46.74, + "learning_rate": 4.4157182835820896e-05, + "loss": 0.0, + "step": 50108 + }, + { + "epoch": 46.75, + "learning_rate": 4.415671641791045e-05, + "loss": 0.0, + "step": 50112 + }, + { + "epoch": 46.75, + "learning_rate": 4.4156250000000006e-05, + "loss": 0.0001, + "step": 50116 + }, + { + "epoch": 46.75, + "learning_rate": 4.4155783582089554e-05, + "loss": 0.0018, + "step": 50120 + }, + { + "epoch": 46.76, + "learning_rate": 4.41553171641791e-05, + "loss": 0.0001, + "step": 50124 + }, + { + "epoch": 46.76, + "learning_rate": 4.4154850746268664e-05, + "loss": 0.0, + "step": 50128 + }, + { + "epoch": 46.76, + "learning_rate": 4.415438432835821e-05, + "loss": 0.0, + "step": 50132 + }, + { + "epoch": 46.77, + "learning_rate": 4.415391791044776e-05, + "loss": 0.0001, + "step": 50136 + }, + { + "epoch": 46.77, + "learning_rate": 4.4153451492537315e-05, + "loss": 0.0014, + "step": 50140 + }, + { + "epoch": 46.78, + "learning_rate": 4.415298507462687e-05, + "loss": 0.0, + "step": 50144 + }, + { + "epoch": 46.78, + "learning_rate": 4.415251865671642e-05, + "loss": 0.0001, + "step": 50148 + }, + { + "epoch": 46.78, + "learning_rate": 4.415205223880597e-05, + "loss": 0.0, + "step": 50152 + }, + { + "epoch": 46.79, + "learning_rate": 4.415158582089553e-05, + "loss": 0.0056, + "step": 50156 + }, + { + "epoch": 46.79, + "learning_rate": 4.4151119402985076e-05, + "loss": 0.0, + "step": 50160 + }, + { + "epoch": 46.79, + "learning_rate": 4.415065298507463e-05, + "loss": 0.0, + "step": 50164 + }, + { + "epoch": 46.8, + "learning_rate": 4.415018656716418e-05, + "loss": 0.0, + "step": 50168 + }, + { + "epoch": 46.8, + "learning_rate": 4.4149720149253734e-05, + "loss": 0.0001, + "step": 50172 + }, + { + "epoch": 46.81, + "learning_rate": 4.414925373134329e-05, + "loss": 0.0, + "step": 50176 + }, + { + "epoch": 46.81, + "learning_rate": 4.414878731343284e-05, + "loss": 0.0001, + "step": 50180 + }, + { + "epoch": 46.81, + "learning_rate": 4.4148320895522385e-05, + "loss": 0.0, + "step": 50184 + }, + { + "epoch": 46.82, + "learning_rate": 4.414785447761195e-05, + "loss": 0.0001, + "step": 50188 + }, + { + "epoch": 46.82, + "learning_rate": 4.4147388059701495e-05, + "loss": 0.0001, + "step": 50192 + }, + { + "epoch": 46.82, + "learning_rate": 4.414692164179104e-05, + "loss": 0.0, + "step": 50196 + }, + { + "epoch": 46.83, + "learning_rate": 4.41464552238806e-05, + "loss": 0.0, + "step": 50200 + }, + { + "epoch": 46.83, + "learning_rate": 4.414598880597015e-05, + "loss": 0.0, + "step": 50204 + }, + { + "epoch": 46.84, + "learning_rate": 4.41455223880597e-05, + "loss": 0.0, + "step": 50208 + }, + { + "epoch": 46.84, + "learning_rate": 4.4145055970149256e-05, + "loss": 0.0001, + "step": 50212 + }, + { + "epoch": 46.84, + "learning_rate": 4.4144589552238804e-05, + "loss": 0.0, + "step": 50216 + }, + { + "epoch": 46.85, + "learning_rate": 4.414412313432836e-05, + "loss": 0.0, + "step": 50220 + }, + { + "epoch": 46.85, + "learning_rate": 4.4143656716417914e-05, + "loss": 0.0001, + "step": 50224 + }, + { + "epoch": 46.85, + "learning_rate": 4.414319029850746e-05, + "loss": 0.0002, + "step": 50228 + }, + { + "epoch": 46.86, + "learning_rate": 4.414272388059702e-05, + "loss": 0.0, + "step": 50232 + }, + { + "epoch": 46.86, + "learning_rate": 4.414225746268657e-05, + "loss": 0.0001, + "step": 50236 + }, + { + "epoch": 46.87, + "learning_rate": 4.414179104477612e-05, + "loss": 0.0, + "step": 50240 + }, + { + "epoch": 46.87, + "learning_rate": 4.414132462686567e-05, + "loss": 0.0003, + "step": 50244 + }, + { + "epoch": 46.87, + "learning_rate": 4.414085820895523e-05, + "loss": 0.0, + "step": 50248 + }, + { + "epoch": 46.88, + "learning_rate": 4.414039179104478e-05, + "loss": 0.0001, + "step": 50252 + }, + { + "epoch": 46.88, + "learning_rate": 4.413992537313433e-05, + "loss": 0.0, + "step": 50256 + }, + { + "epoch": 46.88, + "learning_rate": 4.413945895522388e-05, + "loss": 0.0, + "step": 50260 + }, + { + "epoch": 46.89, + "learning_rate": 4.4138992537313436e-05, + "loss": 0.0, + "step": 50264 + }, + { + "epoch": 46.89, + "learning_rate": 4.413852611940299e-05, + "loss": 0.0001, + "step": 50268 + }, + { + "epoch": 46.9, + "learning_rate": 4.413805970149254e-05, + "loss": 0.0019, + "step": 50272 + }, + { + "epoch": 46.9, + "learning_rate": 4.413759328358209e-05, + "loss": 0.005, + "step": 50276 + }, + { + "epoch": 46.9, + "learning_rate": 4.413712686567165e-05, + "loss": 0.0, + "step": 50280 + }, + { + "epoch": 46.91, + "learning_rate": 4.41366604477612e-05, + "loss": 0.0008, + "step": 50284 + }, + { + "epoch": 46.91, + "learning_rate": 4.4136194029850745e-05, + "loss": 0.0, + "step": 50288 + }, + { + "epoch": 46.91, + "learning_rate": 4.41357276119403e-05, + "loss": 0.0001, + "step": 50292 + }, + { + "epoch": 46.92, + "learning_rate": 4.4135261194029855e-05, + "loss": 0.0001, + "step": 50296 + }, + { + "epoch": 46.92, + "learning_rate": 4.41347947761194e-05, + "loss": 0.0002, + "step": 50300 + }, + { + "epoch": 46.93, + "learning_rate": 4.413432835820896e-05, + "loss": 0.0, + "step": 50304 + }, + { + "epoch": 46.93, + "learning_rate": 4.413386194029851e-05, + "loss": 0.0022, + "step": 50308 + }, + { + "epoch": 46.93, + "learning_rate": 4.413339552238806e-05, + "loss": 0.0002, + "step": 50312 + }, + { + "epoch": 46.94, + "learning_rate": 4.4132929104477616e-05, + "loss": 0.0001, + "step": 50316 + }, + { + "epoch": 46.94, + "learning_rate": 4.4132462686567164e-05, + "loss": 0.009, + "step": 50320 + }, + { + "epoch": 46.94, + "learning_rate": 4.413199626865672e-05, + "loss": 0.0, + "step": 50324 + }, + { + "epoch": 46.95, + "learning_rate": 4.4131529850746274e-05, + "loss": 0.0002, + "step": 50328 + }, + { + "epoch": 46.95, + "learning_rate": 4.413106343283582e-05, + "loss": 0.0, + "step": 50332 + }, + { + "epoch": 46.96, + "learning_rate": 4.413059701492537e-05, + "loss": 0.0001, + "step": 50336 + }, + { + "epoch": 46.96, + "learning_rate": 4.413013059701493e-05, + "loss": 0.0023, + "step": 50340 + }, + { + "epoch": 46.96, + "learning_rate": 4.412966417910448e-05, + "loss": 0.0002, + "step": 50344 + }, + { + "epoch": 46.97, + "learning_rate": 4.412919776119403e-05, + "loss": 0.003, + "step": 50348 + }, + { + "epoch": 46.97, + "learning_rate": 4.412873134328358e-05, + "loss": 0.0, + "step": 50352 + }, + { + "epoch": 46.97, + "learning_rate": 4.412826492537314e-05, + "loss": 0.0, + "step": 50356 + }, + { + "epoch": 46.98, + "learning_rate": 4.4127798507462686e-05, + "loss": 0.0, + "step": 50360 + }, + { + "epoch": 46.98, + "learning_rate": 4.412733208955224e-05, + "loss": 0.0025, + "step": 50364 + }, + { + "epoch": 46.98, + "learning_rate": 4.4126865671641796e-05, + "loss": 0.0001, + "step": 50368 + }, + { + "epoch": 46.99, + "learning_rate": 4.4126399253731344e-05, + "loss": 0.0, + "step": 50372 + }, + { + "epoch": 46.99, + "learning_rate": 4.41259328358209e-05, + "loss": 0.0001, + "step": 50376 + }, + { + "epoch": 47.0, + "learning_rate": 4.412546641791045e-05, + "loss": 0.0002, + "step": 50380 + }, + { + "epoch": 47.0, + "learning_rate": 4.4125e-05, + "loss": 0.0, + "step": 50384 + }, + { + "epoch": 47.0, + "learning_rate": 4.412453358208956e-05, + "loss": 0.0026, + "step": 50388 + }, + { + "epoch": 47.01, + "learning_rate": 4.4124067164179105e-05, + "loss": 0.0, + "step": 50392 + }, + { + "epoch": 47.01, + "learning_rate": 4.412360074626865e-05, + "loss": 0.0049, + "step": 50396 + }, + { + "epoch": 47.01, + "learning_rate": 4.4123134328358215e-05, + "loss": 0.0, + "step": 50400 + }, + { + "epoch": 47.02, + "learning_rate": 4.412266791044776e-05, + "loss": 0.0, + "step": 50404 + }, + { + "epoch": 47.02, + "learning_rate": 4.412220149253731e-05, + "loss": 0.0013, + "step": 50408 + }, + { + "epoch": 47.03, + "learning_rate": 4.4121735074626866e-05, + "loss": 0.0011, + "step": 50412 + }, + { + "epoch": 47.03, + "learning_rate": 4.412126865671642e-05, + "loss": 0.0001, + "step": 50416 + }, + { + "epoch": 47.03, + "learning_rate": 4.4120802238805976e-05, + "loss": 0.0005, + "step": 50420 + }, + { + "epoch": 47.04, + "learning_rate": 4.4120335820895524e-05, + "loss": 0.0, + "step": 50424 + }, + { + "epoch": 47.04, + "learning_rate": 4.411986940298508e-05, + "loss": 0.0, + "step": 50428 + }, + { + "epoch": 47.04, + "learning_rate": 4.4119402985074634e-05, + "loss": 0.0, + "step": 50432 + }, + { + "epoch": 47.05, + "learning_rate": 4.411893656716418e-05, + "loss": 0.0, + "step": 50436 + }, + { + "epoch": 47.05, + "learning_rate": 4.411847014925373e-05, + "loss": 0.0, + "step": 50440 + }, + { + "epoch": 47.06, + "learning_rate": 4.4118003731343285e-05, + "loss": 0.0, + "step": 50444 + }, + { + "epoch": 47.06, + "learning_rate": 4.411753731343284e-05, + "loss": 0.0002, + "step": 50448 + }, + { + "epoch": 47.06, + "learning_rate": 4.411707089552239e-05, + "loss": 0.0, + "step": 50452 + }, + { + "epoch": 47.07, + "learning_rate": 4.411660447761194e-05, + "loss": 0.0, + "step": 50456 + }, + { + "epoch": 47.07, + "learning_rate": 4.41161380597015e-05, + "loss": 0.0, + "step": 50460 + }, + { + "epoch": 47.07, + "learning_rate": 4.4115671641791046e-05, + "loss": 0.0, + "step": 50464 + }, + { + "epoch": 47.08, + "learning_rate": 4.41152052238806e-05, + "loss": 0.0001, + "step": 50468 + }, + { + "epoch": 47.08, + "learning_rate": 4.411473880597015e-05, + "loss": 0.0002, + "step": 50472 + }, + { + "epoch": 47.09, + "learning_rate": 4.4114272388059704e-05, + "loss": 0.0018, + "step": 50476 + }, + { + "epoch": 47.09, + "learning_rate": 4.411380597014926e-05, + "loss": 0.0001, + "step": 50480 + }, + { + "epoch": 47.09, + "learning_rate": 4.411333955223881e-05, + "loss": 0.0001, + "step": 50484 + }, + { + "epoch": 47.1, + "learning_rate": 4.411287313432836e-05, + "loss": 0.0, + "step": 50488 + }, + { + "epoch": 47.1, + "learning_rate": 4.411240671641792e-05, + "loss": 0.0, + "step": 50492 + }, + { + "epoch": 47.1, + "learning_rate": 4.4111940298507465e-05, + "loss": 0.0, + "step": 50496 + }, + { + "epoch": 47.11, + "learning_rate": 4.411147388059701e-05, + "loss": 0.0001, + "step": 50500 + }, + { + "epoch": 47.11, + "eval_exact_match": 0.7321083172147002, + "eval_exec": 0.7649903288201161, + "eval_loss": 0.4581345319747925, + "eval_runtime": 1124.8565, + "eval_samples_per_second": 0.919, + "step": 50500 + }, + { + "epoch": 47.11, + "learning_rate": 4.411100746268657e-05, + "loss": 0.0001, + "step": 50504 + }, + { + "epoch": 47.12, + "learning_rate": 4.411054104477612e-05, + "loss": 0.0, + "step": 50508 + }, + { + "epoch": 47.12, + "learning_rate": 4.411007462686567e-05, + "loss": 0.0, + "step": 50512 + }, + { + "epoch": 47.12, + "learning_rate": 4.4109608208955226e-05, + "loss": 0.0, + "step": 50516 + }, + { + "epoch": 47.13, + "learning_rate": 4.410914179104478e-05, + "loss": 0.0001, + "step": 50520 + }, + { + "epoch": 47.13, + "learning_rate": 4.410867537313433e-05, + "loss": 0.0001, + "step": 50524 + }, + { + "epoch": 47.13, + "learning_rate": 4.4108208955223884e-05, + "loss": 0.0002, + "step": 50528 + }, + { + "epoch": 47.14, + "learning_rate": 4.410774253731343e-05, + "loss": 0.0001, + "step": 50532 + }, + { + "epoch": 47.14, + "learning_rate": 4.410727611940299e-05, + "loss": 0.0, + "step": 50536 + }, + { + "epoch": 47.15, + "learning_rate": 4.410680970149254e-05, + "loss": 0.0006, + "step": 50540 + }, + { + "epoch": 47.15, + "learning_rate": 4.410634328358209e-05, + "loss": 0.0, + "step": 50544 + }, + { + "epoch": 47.15, + "learning_rate": 4.4105876865671645e-05, + "loss": 0.0, + "step": 50548 + }, + { + "epoch": 47.16, + "learning_rate": 4.41054104477612e-05, + "loss": 0.0001, + "step": 50552 + }, + { + "epoch": 47.16, + "learning_rate": 4.410494402985075e-05, + "loss": 0.0, + "step": 50556 + }, + { + "epoch": 47.16, + "learning_rate": 4.4104477611940296e-05, + "loss": 0.0, + "step": 50560 + }, + { + "epoch": 47.17, + "learning_rate": 4.410401119402985e-05, + "loss": 0.0001, + "step": 50564 + }, + { + "epoch": 47.17, + "learning_rate": 4.4103544776119406e-05, + "loss": 0.0, + "step": 50568 + }, + { + "epoch": 47.18, + "learning_rate": 4.4103078358208954e-05, + "loss": 0.0, + "step": 50572 + }, + { + "epoch": 47.18, + "learning_rate": 4.410261194029851e-05, + "loss": 0.0001, + "step": 50576 + }, + { + "epoch": 47.18, + "learning_rate": 4.4102145522388064e-05, + "loss": 0.0, + "step": 50580 + }, + { + "epoch": 47.19, + "learning_rate": 4.410167910447762e-05, + "loss": 0.0, + "step": 50584 + }, + { + "epoch": 47.19, + "learning_rate": 4.410121268656717e-05, + "loss": 0.0001, + "step": 50588 + }, + { + "epoch": 47.19, + "learning_rate": 4.4100746268656715e-05, + "loss": 0.0, + "step": 50592 + }, + { + "epoch": 47.2, + "learning_rate": 4.4100279850746276e-05, + "loss": 0.0, + "step": 50596 + }, + { + "epoch": 47.2, + "learning_rate": 4.4099813432835825e-05, + "loss": 0.0, + "step": 50600 + }, + { + "epoch": 47.21, + "learning_rate": 4.409934701492537e-05, + "loss": 0.0008, + "step": 50604 + }, + { + "epoch": 47.21, + "learning_rate": 4.409888059701493e-05, + "loss": 0.0, + "step": 50608 + }, + { + "epoch": 47.21, + "learning_rate": 4.409841417910448e-05, + "loss": 0.0, + "step": 50612 + }, + { + "epoch": 47.22, + "learning_rate": 4.409794776119403e-05, + "loss": 0.0, + "step": 50616 + }, + { + "epoch": 47.22, + "learning_rate": 4.4097481343283586e-05, + "loss": 0.0, + "step": 50620 + }, + { + "epoch": 47.22, + "learning_rate": 4.4097014925373134e-05, + "loss": 0.0, + "step": 50624 + }, + { + "epoch": 47.23, + "learning_rate": 4.409654850746269e-05, + "loss": 0.0001, + "step": 50628 + }, + { + "epoch": 47.23, + "learning_rate": 4.4096082089552244e-05, + "loss": 0.0011, + "step": 50632 + }, + { + "epoch": 47.24, + "learning_rate": 4.409561567164179e-05, + "loss": 0.0024, + "step": 50636 + }, + { + "epoch": 47.24, + "learning_rate": 4.4095149253731347e-05, + "loss": 0.0, + "step": 50640 + }, + { + "epoch": 47.24, + "learning_rate": 4.40946828358209e-05, + "loss": 0.0002, + "step": 50644 + }, + { + "epoch": 47.25, + "learning_rate": 4.409421641791045e-05, + "loss": 0.0, + "step": 50648 + }, + { + "epoch": 47.25, + "learning_rate": 4.409375e-05, + "loss": 0.0, + "step": 50652 + }, + { + "epoch": 47.25, + "learning_rate": 4.409328358208956e-05, + "loss": 0.0, + "step": 50656 + }, + { + "epoch": 47.26, + "learning_rate": 4.409281716417911e-05, + "loss": 0.0, + "step": 50660 + }, + { + "epoch": 47.26, + "learning_rate": 4.4092350746268656e-05, + "loss": 0.0, + "step": 50664 + }, + { + "epoch": 47.26, + "learning_rate": 4.409188432835821e-05, + "loss": 0.0, + "step": 50668 + }, + { + "epoch": 47.27, + "learning_rate": 4.4091417910447766e-05, + "loss": 0.0, + "step": 50672 + }, + { + "epoch": 47.27, + "learning_rate": 4.4090951492537314e-05, + "loss": 0.0, + "step": 50676 + }, + { + "epoch": 47.28, + "learning_rate": 4.409048507462687e-05, + "loss": 0.0001, + "step": 50680 + }, + { + "epoch": 47.28, + "learning_rate": 4.409001865671642e-05, + "loss": 0.0002, + "step": 50684 + }, + { + "epoch": 47.28, + "learning_rate": 4.408955223880597e-05, + "loss": 0.0001, + "step": 50688 + }, + { + "epoch": 47.29, + "learning_rate": 4.4089085820895526e-05, + "loss": 0.0, + "step": 50692 + }, + { + "epoch": 47.29, + "learning_rate": 4.4088619402985075e-05, + "loss": 0.0003, + "step": 50696 + }, + { + "epoch": 47.29, + "learning_rate": 4.408815298507463e-05, + "loss": 0.0, + "step": 50700 + }, + { + "epoch": 47.3, + "learning_rate": 4.4087686567164184e-05, + "loss": 0.0, + "step": 50704 + }, + { + "epoch": 47.3, + "learning_rate": 4.408722014925373e-05, + "loss": 0.0, + "step": 50708 + }, + { + "epoch": 47.31, + "learning_rate": 4.408675373134328e-05, + "loss": 0.0, + "step": 50712 + }, + { + "epoch": 47.31, + "learning_rate": 4.408628731343284e-05, + "loss": 0.0003, + "step": 50716 + }, + { + "epoch": 47.31, + "learning_rate": 4.408582089552239e-05, + "loss": 0.0, + "step": 50720 + }, + { + "epoch": 47.32, + "learning_rate": 4.408535447761194e-05, + "loss": 0.0, + "step": 50724 + }, + { + "epoch": 47.32, + "learning_rate": 4.4084888059701494e-05, + "loss": 0.0, + "step": 50728 + }, + { + "epoch": 47.32, + "learning_rate": 4.408442164179105e-05, + "loss": 0.0, + "step": 50732 + }, + { + "epoch": 47.33, + "learning_rate": 4.4083955223880597e-05, + "loss": 0.0, + "step": 50736 + }, + { + "epoch": 47.33, + "learning_rate": 4.408348880597015e-05, + "loss": 0.0, + "step": 50740 + }, + { + "epoch": 47.34, + "learning_rate": 4.40830223880597e-05, + "loss": 0.0, + "step": 50744 + }, + { + "epoch": 47.34, + "learning_rate": 4.408255597014926e-05, + "loss": 0.0, + "step": 50748 + }, + { + "epoch": 47.34, + "learning_rate": 4.408208955223881e-05, + "loss": 0.0001, + "step": 50752 + }, + { + "epoch": 47.35, + "learning_rate": 4.408162313432836e-05, + "loss": 0.0, + "step": 50756 + }, + { + "epoch": 47.35, + "learning_rate": 4.408115671641791e-05, + "loss": 0.0, + "step": 50760 + }, + { + "epoch": 47.35, + "learning_rate": 4.408069029850747e-05, + "loss": 0.0, + "step": 50764 + }, + { + "epoch": 47.36, + "learning_rate": 4.4080223880597016e-05, + "loss": 0.0001, + "step": 50768 + }, + { + "epoch": 47.36, + "learning_rate": 4.407975746268657e-05, + "loss": 0.0, + "step": 50772 + }, + { + "epoch": 47.37, + "learning_rate": 4.4079291044776125e-05, + "loss": 0.0, + "step": 50776 + }, + { + "epoch": 47.37, + "learning_rate": 4.4078824626865673e-05, + "loss": 0.0, + "step": 50780 + }, + { + "epoch": 47.37, + "learning_rate": 4.407835820895523e-05, + "loss": 0.0004, + "step": 50784 + }, + { + "epoch": 47.38, + "learning_rate": 4.4077891791044777e-05, + "loss": 0.0, + "step": 50788 + }, + { + "epoch": 47.38, + "learning_rate": 4.407742537313433e-05, + "loss": 0.0, + "step": 50792 + }, + { + "epoch": 47.38, + "learning_rate": 4.4076958955223886e-05, + "loss": 0.0, + "step": 50796 + }, + { + "epoch": 47.39, + "learning_rate": 4.4076492537313434e-05, + "loss": 0.0, + "step": 50800 + }, + { + "epoch": 47.39, + "learning_rate": 4.407602611940298e-05, + "loss": 0.0002, + "step": 50804 + }, + { + "epoch": 47.4, + "learning_rate": 4.4075559701492544e-05, + "loss": 0.0, + "step": 50808 + }, + { + "epoch": 47.4, + "learning_rate": 4.407509328358209e-05, + "loss": 0.0012, + "step": 50812 + }, + { + "epoch": 47.4, + "learning_rate": 4.407462686567164e-05, + "loss": 0.0003, + "step": 50816 + }, + { + "epoch": 47.41, + "learning_rate": 4.4074160447761195e-05, + "loss": 0.0, + "step": 50820 + }, + { + "epoch": 47.41, + "learning_rate": 4.407369402985075e-05, + "loss": 0.0004, + "step": 50824 + }, + { + "epoch": 47.41, + "learning_rate": 4.40732276119403e-05, + "loss": 0.0024, + "step": 50828 + }, + { + "epoch": 47.42, + "learning_rate": 4.4072761194029853e-05, + "loss": 0.0017, + "step": 50832 + }, + { + "epoch": 47.42, + "learning_rate": 4.407229477611941e-05, + "loss": 0.0005, + "step": 50836 + }, + { + "epoch": 47.43, + "learning_rate": 4.4071828358208956e-05, + "loss": 0.0, + "step": 50840 + }, + { + "epoch": 47.43, + "learning_rate": 4.407136194029851e-05, + "loss": 0.0, + "step": 50844 + }, + { + "epoch": 47.43, + "learning_rate": 4.407089552238806e-05, + "loss": 0.0, + "step": 50848 + }, + { + "epoch": 47.44, + "learning_rate": 4.4070429104477614e-05, + "loss": 0.0003, + "step": 50852 + }, + { + "epoch": 47.44, + "learning_rate": 4.406996268656717e-05, + "loss": 0.0, + "step": 50856 + }, + { + "epoch": 47.44, + "learning_rate": 4.406949626865672e-05, + "loss": 0.0, + "step": 50860 + }, + { + "epoch": 47.45, + "learning_rate": 4.4069029850746266e-05, + "loss": 0.0001, + "step": 50864 + }, + { + "epoch": 47.45, + "learning_rate": 4.406856343283583e-05, + "loss": 0.0, + "step": 50868 + }, + { + "epoch": 47.46, + "learning_rate": 4.4068097014925375e-05, + "loss": 0.0009, + "step": 50872 + }, + { + "epoch": 47.46, + "learning_rate": 4.4067630597014923e-05, + "loss": 0.0001, + "step": 50876 + }, + { + "epoch": 47.46, + "learning_rate": 4.406716417910448e-05, + "loss": 0.0, + "step": 50880 + }, + { + "epoch": 47.47, + "learning_rate": 4.406669776119403e-05, + "loss": 0.0003, + "step": 50884 + }, + { + "epoch": 47.47, + "learning_rate": 4.406623134328358e-05, + "loss": 0.0002, + "step": 50888 + }, + { + "epoch": 47.47, + "learning_rate": 4.4065764925373136e-05, + "loss": 0.0, + "step": 50892 + }, + { + "epoch": 47.48, + "learning_rate": 4.4065298507462684e-05, + "loss": 0.0001, + "step": 50896 + }, + { + "epoch": 47.48, + "learning_rate": 4.406483208955224e-05, + "loss": 0.0, + "step": 50900 + }, + { + "epoch": 47.49, + "learning_rate": 4.4064365671641794e-05, + "loss": 0.0, + "step": 50904 + }, + { + "epoch": 47.49, + "learning_rate": 4.406389925373134e-05, + "loss": 0.0015, + "step": 50908 + }, + { + "epoch": 47.49, + "learning_rate": 4.40634328358209e-05, + "loss": 0.0, + "step": 50912 + }, + { + "epoch": 47.5, + "learning_rate": 4.406296641791045e-05, + "loss": 0.0011, + "step": 50916 + }, + { + "epoch": 47.5, + "learning_rate": 4.40625e-05, + "loss": 0.0001, + "step": 50920 + }, + { + "epoch": 47.5, + "learning_rate": 4.4062033582089555e-05, + "loss": 0.0, + "step": 50924 + }, + { + "epoch": 47.51, + "learning_rate": 4.406156716417911e-05, + "loss": 0.0006, + "step": 50928 + }, + { + "epoch": 47.51, + "learning_rate": 4.406110074626866e-05, + "loss": 0.0, + "step": 50932 + }, + { + "epoch": 47.51, + "learning_rate": 4.406063432835821e-05, + "loss": 0.0, + "step": 50936 + }, + { + "epoch": 47.52, + "learning_rate": 4.406016791044776e-05, + "loss": 0.0003, + "step": 50940 + }, + { + "epoch": 47.52, + "learning_rate": 4.4059701492537316e-05, + "loss": 0.0, + "step": 50944 + }, + { + "epoch": 47.53, + "learning_rate": 4.405923507462687e-05, + "loss": 0.0001, + "step": 50948 + }, + { + "epoch": 47.53, + "learning_rate": 4.405876865671642e-05, + "loss": 0.0, + "step": 50952 + }, + { + "epoch": 47.53, + "learning_rate": 4.405830223880597e-05, + "loss": 0.0001, + "step": 50956 + }, + { + "epoch": 47.54, + "learning_rate": 4.405783582089553e-05, + "loss": 0.0, + "step": 50960 + }, + { + "epoch": 47.54, + "learning_rate": 4.405736940298508e-05, + "loss": 0.0, + "step": 50964 + }, + { + "epoch": 47.54, + "learning_rate": 4.4056902985074625e-05, + "loss": 0.0, + "step": 50968 + }, + { + "epoch": 47.55, + "learning_rate": 4.405643656716418e-05, + "loss": 0.0001, + "step": 50972 + }, + { + "epoch": 47.55, + "learning_rate": 4.4055970149253735e-05, + "loss": 0.0, + "step": 50976 + }, + { + "epoch": 47.56, + "learning_rate": 4.405550373134328e-05, + "loss": 0.0016, + "step": 50980 + }, + { + "epoch": 47.56, + "learning_rate": 4.405503731343284e-05, + "loss": 0.0, + "step": 50984 + }, + { + "epoch": 47.56, + "learning_rate": 4.405457089552239e-05, + "loss": 0.0, + "step": 50988 + }, + { + "epoch": 47.57, + "learning_rate": 4.405410447761194e-05, + "loss": 0.0022, + "step": 50992 + }, + { + "epoch": 47.57, + "learning_rate": 4.4053638059701496e-05, + "loss": 0.0, + "step": 50996 + }, + { + "epoch": 47.57, + "learning_rate": 4.4053171641791044e-05, + "loss": 0.0, + "step": 51000 + }, + { + "epoch": 47.57, + "eval_exact_match": 0.7369439071566731, + "eval_exec": 0.7649903288201161, + "eval_loss": 0.47057628631591797, + "eval_runtime": 1132.9681, + "eval_samples_per_second": 0.913, + "step": 51000 + }, + { + "epoch": 47.58, + "learning_rate": 4.40527052238806e-05, + "loss": 0.0, + "step": 51004 + }, + { + "epoch": 47.58, + "learning_rate": 4.4052238805970154e-05, + "loss": 0.0002, + "step": 51008 + }, + { + "epoch": 47.59, + "learning_rate": 4.40517723880597e-05, + "loss": 0.0029, + "step": 51012 + }, + { + "epoch": 47.59, + "learning_rate": 4.405130597014925e-05, + "loss": 0.0, + "step": 51016 + }, + { + "epoch": 47.59, + "learning_rate": 4.405083955223881e-05, + "loss": 0.0055, + "step": 51020 + }, + { + "epoch": 47.6, + "learning_rate": 4.405037313432836e-05, + "loss": 0.0, + "step": 51024 + }, + { + "epoch": 47.6, + "learning_rate": 4.404990671641791e-05, + "loss": 0.0, + "step": 51028 + }, + { + "epoch": 47.6, + "learning_rate": 4.404944029850746e-05, + "loss": 0.0, + "step": 51032 + }, + { + "epoch": 47.61, + "learning_rate": 4.404897388059702e-05, + "loss": 0.0, + "step": 51036 + }, + { + "epoch": 47.61, + "learning_rate": 4.4048507462686566e-05, + "loss": 0.0, + "step": 51040 + }, + { + "epoch": 47.62, + "learning_rate": 4.404804104477612e-05, + "loss": 0.0, + "step": 51044 + }, + { + "epoch": 47.62, + "learning_rate": 4.4047574626865676e-05, + "loss": 0.0007, + "step": 51048 + }, + { + "epoch": 47.62, + "learning_rate": 4.4047108208955224e-05, + "loss": 0.0, + "step": 51052 + }, + { + "epoch": 47.63, + "learning_rate": 4.404664179104478e-05, + "loss": 0.0, + "step": 51056 + }, + { + "epoch": 47.63, + "learning_rate": 4.404617537313433e-05, + "loss": 0.0001, + "step": 51060 + }, + { + "epoch": 47.63, + "learning_rate": 4.404570895522388e-05, + "loss": 0.0011, + "step": 51064 + }, + { + "epoch": 47.64, + "learning_rate": 4.404524253731344e-05, + "loss": 0.0, + "step": 51068 + }, + { + "epoch": 47.64, + "learning_rate": 4.4044776119402985e-05, + "loss": 0.0027, + "step": 51072 + }, + { + "epoch": 47.65, + "learning_rate": 4.404430970149254e-05, + "loss": 0.0003, + "step": 51076 + }, + { + "epoch": 47.65, + "learning_rate": 4.4043843283582095e-05, + "loss": 0.0, + "step": 51080 + }, + { + "epoch": 47.65, + "learning_rate": 4.404337686567164e-05, + "loss": 0.0002, + "step": 51084 + }, + { + "epoch": 47.66, + "learning_rate": 4.40429104477612e-05, + "loss": 0.0011, + "step": 51088 + }, + { + "epoch": 47.66, + "learning_rate": 4.4042444029850746e-05, + "loss": 0.0, + "step": 51092 + }, + { + "epoch": 47.66, + "learning_rate": 4.40419776119403e-05, + "loss": 0.0048, + "step": 51096 + }, + { + "epoch": 47.67, + "learning_rate": 4.4041511194029856e-05, + "loss": 0.0, + "step": 51100 + }, + { + "epoch": 47.67, + "learning_rate": 4.4041044776119404e-05, + "loss": 0.0, + "step": 51104 + }, + { + "epoch": 47.68, + "learning_rate": 4.404057835820896e-05, + "loss": 0.0, + "step": 51108 + }, + { + "epoch": 47.68, + "learning_rate": 4.4040111940298514e-05, + "loss": 0.0, + "step": 51112 + }, + { + "epoch": 47.68, + "learning_rate": 4.403964552238806e-05, + "loss": 0.0026, + "step": 51116 + }, + { + "epoch": 47.69, + "learning_rate": 4.403917910447761e-05, + "loss": 0.0002, + "step": 51120 + }, + { + "epoch": 47.69, + "learning_rate": 4.4038712686567165e-05, + "loss": 0.0, + "step": 51124 + }, + { + "epoch": 47.69, + "learning_rate": 4.403824626865672e-05, + "loss": 0.0021, + "step": 51128 + }, + { + "epoch": 47.7, + "learning_rate": 4.403777985074627e-05, + "loss": 0.0008, + "step": 51132 + }, + { + "epoch": 47.7, + "learning_rate": 4.403731343283582e-05, + "loss": 0.0, + "step": 51136 + }, + { + "epoch": 47.71, + "learning_rate": 4.403684701492538e-05, + "loss": 0.0001, + "step": 51140 + }, + { + "epoch": 47.71, + "learning_rate": 4.4036380597014926e-05, + "loss": 0.0002, + "step": 51144 + }, + { + "epoch": 47.71, + "learning_rate": 4.403591417910448e-05, + "loss": 0.0, + "step": 51148 + }, + { + "epoch": 47.72, + "learning_rate": 4.403544776119403e-05, + "loss": 0.0, + "step": 51152 + }, + { + "epoch": 47.72, + "learning_rate": 4.4034981343283584e-05, + "loss": 0.0, + "step": 51156 + }, + { + "epoch": 47.72, + "learning_rate": 4.403451492537314e-05, + "loss": 0.0003, + "step": 51160 + }, + { + "epoch": 47.73, + "learning_rate": 4.403404850746269e-05, + "loss": 0.0004, + "step": 51164 + }, + { + "epoch": 47.73, + "learning_rate": 4.403358208955224e-05, + "loss": 0.0, + "step": 51168 + }, + { + "epoch": 47.73, + "learning_rate": 4.40331156716418e-05, + "loss": 0.0, + "step": 51172 + }, + { + "epoch": 47.74, + "learning_rate": 4.4032649253731345e-05, + "loss": 0.0021, + "step": 51176 + }, + { + "epoch": 47.74, + "learning_rate": 4.403218283582089e-05, + "loss": 0.0, + "step": 51180 + }, + { + "epoch": 47.75, + "learning_rate": 4.403171641791045e-05, + "loss": 0.0, + "step": 51184 + }, + { + "epoch": 47.75, + "learning_rate": 4.403125e-05, + "loss": 0.0001, + "step": 51188 + }, + { + "epoch": 47.75, + "learning_rate": 4.403078358208955e-05, + "loss": 0.0001, + "step": 51192 + }, + { + "epoch": 47.76, + "learning_rate": 4.4030317164179106e-05, + "loss": 0.0041, + "step": 51196 + }, + { + "epoch": 47.76, + "learning_rate": 4.402985074626866e-05, + "loss": 0.0, + "step": 51200 + }, + { + "epoch": 47.76, + "learning_rate": 4.402938432835821e-05, + "loss": 0.0, + "step": 51204 + }, + { + "epoch": 47.77, + "learning_rate": 4.4028917910447764e-05, + "loss": 0.0, + "step": 51208 + }, + { + "epoch": 47.77, + "learning_rate": 4.402845149253731e-05, + "loss": 0.0, + "step": 51212 + }, + { + "epoch": 47.78, + "learning_rate": 4.402798507462687e-05, + "loss": 0.0, + "step": 51216 + }, + { + "epoch": 47.78, + "learning_rate": 4.402751865671642e-05, + "loss": 0.0018, + "step": 51220 + }, + { + "epoch": 47.78, + "learning_rate": 4.402705223880597e-05, + "loss": 0.0, + "step": 51224 + }, + { + "epoch": 47.79, + "learning_rate": 4.4026585820895525e-05, + "loss": 0.0, + "step": 51228 + }, + { + "epoch": 47.79, + "learning_rate": 4.402611940298508e-05, + "loss": 0.0001, + "step": 51232 + }, + { + "epoch": 47.79, + "learning_rate": 4.402565298507463e-05, + "loss": 0.0, + "step": 51236 + }, + { + "epoch": 47.8, + "learning_rate": 4.402518656716418e-05, + "loss": 0.0001, + "step": 51240 + }, + { + "epoch": 47.8, + "learning_rate": 4.402472014925373e-05, + "loss": 0.0001, + "step": 51244 + }, + { + "epoch": 47.81, + "learning_rate": 4.4024253731343286e-05, + "loss": 0.0, + "step": 51248 + }, + { + "epoch": 47.81, + "learning_rate": 4.402378731343284e-05, + "loss": 0.0005, + "step": 51252 + }, + { + "epoch": 47.81, + "learning_rate": 4.402332089552239e-05, + "loss": 0.0004, + "step": 51256 + }, + { + "epoch": 47.82, + "learning_rate": 4.4022854477611944e-05, + "loss": 0.0, + "step": 51260 + }, + { + "epoch": 47.82, + "learning_rate": 4.40223880597015e-05, + "loss": 0.0, + "step": 51264 + }, + { + "epoch": 47.82, + "learning_rate": 4.402192164179105e-05, + "loss": 0.0, + "step": 51268 + }, + { + "epoch": 47.83, + "learning_rate": 4.4021455223880595e-05, + "loss": 0.0, + "step": 51272 + }, + { + "epoch": 47.83, + "learning_rate": 4.402098880597016e-05, + "loss": 0.0008, + "step": 51276 + }, + { + "epoch": 47.84, + "learning_rate": 4.4020522388059705e-05, + "loss": 0.0001, + "step": 51280 + }, + { + "epoch": 47.84, + "learning_rate": 4.402005597014925e-05, + "loss": 0.0001, + "step": 51284 + }, + { + "epoch": 47.84, + "learning_rate": 4.401958955223881e-05, + "loss": 0.0, + "step": 51288 + }, + { + "epoch": 47.85, + "learning_rate": 4.401912313432836e-05, + "loss": 0.0, + "step": 51292 + }, + { + "epoch": 47.85, + "learning_rate": 4.401865671641791e-05, + "loss": 0.0, + "step": 51296 + }, + { + "epoch": 47.85, + "learning_rate": 4.4018190298507466e-05, + "loss": 0.0001, + "step": 51300 + }, + { + "epoch": 47.86, + "learning_rate": 4.4017723880597014e-05, + "loss": 0.0, + "step": 51304 + }, + { + "epoch": 47.86, + "learning_rate": 4.401725746268657e-05, + "loss": 0.0001, + "step": 51308 + }, + { + "epoch": 47.87, + "learning_rate": 4.4016791044776124e-05, + "loss": 0.0, + "step": 51312 + }, + { + "epoch": 47.87, + "learning_rate": 4.401632462686567e-05, + "loss": 0.0, + "step": 51316 + }, + { + "epoch": 47.87, + "learning_rate": 4.401585820895523e-05, + "loss": 0.0, + "step": 51320 + }, + { + "epoch": 47.88, + "learning_rate": 4.401539179104478e-05, + "loss": 0.0, + "step": 51324 + }, + { + "epoch": 47.88, + "learning_rate": 4.401492537313433e-05, + "loss": 0.0, + "step": 51328 + }, + { + "epoch": 47.88, + "learning_rate": 4.401445895522388e-05, + "loss": 0.0, + "step": 51332 + }, + { + "epoch": 47.89, + "learning_rate": 4.401399253731344e-05, + "loss": 0.0, + "step": 51336 + }, + { + "epoch": 47.89, + "learning_rate": 4.401352611940299e-05, + "loss": 0.0, + "step": 51340 + }, + { + "epoch": 47.9, + "learning_rate": 4.4013059701492536e-05, + "loss": 0.0001, + "step": 51344 + }, + { + "epoch": 47.9, + "learning_rate": 4.401259328358209e-05, + "loss": 0.0, + "step": 51348 + }, + { + "epoch": 47.9, + "learning_rate": 4.4012126865671646e-05, + "loss": 0.0, + "step": 51352 + }, + { + "epoch": 47.91, + "learning_rate": 4.4011660447761194e-05, + "loss": 0.0048, + "step": 51356 + }, + { + "epoch": 47.91, + "learning_rate": 4.401119402985075e-05, + "loss": 0.0, + "step": 51360 + }, + { + "epoch": 47.91, + "learning_rate": 4.40107276119403e-05, + "loss": 0.0, + "step": 51364 + }, + { + "epoch": 47.92, + "learning_rate": 4.401026119402985e-05, + "loss": 0.0, + "step": 51368 + }, + { + "epoch": 47.92, + "learning_rate": 4.400979477611941e-05, + "loss": 0.0014, + "step": 51372 + }, + { + "epoch": 47.93, + "learning_rate": 4.4009328358208955e-05, + "loss": 0.0012, + "step": 51376 + }, + { + "epoch": 47.93, + "learning_rate": 4.400886194029851e-05, + "loss": 0.0, + "step": 51380 + }, + { + "epoch": 47.93, + "learning_rate": 4.4008395522388065e-05, + "loss": 0.0, + "step": 51384 + }, + { + "epoch": 47.94, + "learning_rate": 4.400792910447761e-05, + "loss": 0.0, + "step": 51388 + }, + { + "epoch": 47.94, + "learning_rate": 4.400746268656716e-05, + "loss": 0.0, + "step": 51392 + }, + { + "epoch": 47.94, + "learning_rate": 4.400699626865672e-05, + "loss": 0.0, + "step": 51396 + }, + { + "epoch": 47.95, + "learning_rate": 4.400652985074627e-05, + "loss": 0.0002, + "step": 51400 + }, + { + "epoch": 47.95, + "learning_rate": 4.4006063432835826e-05, + "loss": 0.0001, + "step": 51404 + }, + { + "epoch": 47.96, + "learning_rate": 4.4005597014925374e-05, + "loss": 0.0003, + "step": 51408 + }, + { + "epoch": 47.96, + "learning_rate": 4.400513059701493e-05, + "loss": 0.0002, + "step": 51412 + }, + { + "epoch": 47.96, + "learning_rate": 4.4004664179104484e-05, + "loss": 0.0001, + "step": 51416 + }, + { + "epoch": 47.97, + "learning_rate": 4.400419776119403e-05, + "loss": 0.0, + "step": 51420 + }, + { + "epoch": 47.97, + "learning_rate": 4.400373134328358e-05, + "loss": 0.0, + "step": 51424 + }, + { + "epoch": 47.97, + "learning_rate": 4.400326492537314e-05, + "loss": 0.0, + "step": 51428 + }, + { + "epoch": 47.98, + "learning_rate": 4.400279850746269e-05, + "loss": 0.0, + "step": 51432 + }, + { + "epoch": 47.98, + "learning_rate": 4.400233208955224e-05, + "loss": 0.0, + "step": 51436 + }, + { + "epoch": 47.98, + "learning_rate": 4.400186567164179e-05, + "loss": 0.0, + "step": 51440 + }, + { + "epoch": 47.99, + "learning_rate": 4.400139925373135e-05, + "loss": 0.0, + "step": 51444 + }, + { + "epoch": 47.99, + "learning_rate": 4.4000932835820896e-05, + "loss": 0.0001, + "step": 51448 + }, + { + "epoch": 48.0, + "learning_rate": 4.400046641791045e-05, + "loss": 0.0008, + "step": 51452 + }, + { + "epoch": 48.0, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.0, + "step": 51456 + }, + { + "epoch": 48.0, + "learning_rate": 4.3999533582089554e-05, + "loss": 0.0001, + "step": 51460 + }, + { + "epoch": 48.01, + "learning_rate": 4.399906716417911e-05, + "loss": 0.0, + "step": 51464 + }, + { + "epoch": 48.01, + "learning_rate": 4.399860074626866e-05, + "loss": 0.0, + "step": 51468 + }, + { + "epoch": 48.01, + "learning_rate": 4.399813432835821e-05, + "loss": 0.0, + "step": 51472 + }, + { + "epoch": 48.02, + "learning_rate": 4.399766791044777e-05, + "loss": 0.0, + "step": 51476 + }, + { + "epoch": 48.02, + "learning_rate": 4.3997201492537315e-05, + "loss": 0.0, + "step": 51480 + }, + { + "epoch": 48.03, + "learning_rate": 4.399673507462686e-05, + "loss": 0.0005, + "step": 51484 + }, + { + "epoch": 48.03, + "learning_rate": 4.3996268656716425e-05, + "loss": 0.0012, + "step": 51488 + }, + { + "epoch": 48.03, + "learning_rate": 4.399580223880597e-05, + "loss": 0.0146, + "step": 51492 + }, + { + "epoch": 48.04, + "learning_rate": 4.399533582089552e-05, + "loss": 0.0004, + "step": 51496 + }, + { + "epoch": 48.04, + "learning_rate": 4.3994869402985076e-05, + "loss": 0.0, + "step": 51500 + }, + { + "epoch": 48.04, + "eval_exact_match": 0.741779497098646, + "eval_exec": 0.776595744680851, + "eval_loss": 0.44945940375328064, + "eval_runtime": 1251.2204, + "eval_samples_per_second": 0.826, + "step": 51500 + }, + { + "epoch": 48.04, + "learning_rate": 4.399440298507463e-05, + "loss": 0.0, + "step": 51504 + }, + { + "epoch": 48.05, + "learning_rate": 4.399393656716418e-05, + "loss": 0.0001, + "step": 51508 + }, + { + "epoch": 48.05, + "learning_rate": 4.3993470149253734e-05, + "loss": 0.0, + "step": 51512 + }, + { + "epoch": 48.06, + "learning_rate": 4.399300373134329e-05, + "loss": 0.0, + "step": 51516 + }, + { + "epoch": 48.06, + "learning_rate": 4.399253731343284e-05, + "loss": 0.0, + "step": 51520 + }, + { + "epoch": 48.06, + "learning_rate": 4.399207089552239e-05, + "loss": 0.0, + "step": 51524 + }, + { + "epoch": 48.07, + "learning_rate": 4.399160447761194e-05, + "loss": 0.0004, + "step": 51528 + }, + { + "epoch": 48.07, + "learning_rate": 4.3991138059701495e-05, + "loss": 0.0, + "step": 51532 + }, + { + "epoch": 48.07, + "learning_rate": 4.399067164179105e-05, + "loss": 0.0001, + "step": 51536 + }, + { + "epoch": 48.08, + "learning_rate": 4.39902052238806e-05, + "loss": 0.0022, + "step": 51540 + }, + { + "epoch": 48.08, + "learning_rate": 4.3989738805970146e-05, + "loss": 0.0001, + "step": 51544 + }, + { + "epoch": 48.09, + "learning_rate": 4.398927238805971e-05, + "loss": 0.0, + "step": 51548 + }, + { + "epoch": 48.09, + "learning_rate": 4.3988805970149256e-05, + "loss": 0.0, + "step": 51552 + }, + { + "epoch": 48.09, + "learning_rate": 4.3988339552238804e-05, + "loss": 0.0001, + "step": 51556 + }, + { + "epoch": 48.1, + "learning_rate": 4.398787313432836e-05, + "loss": 0.0, + "step": 51560 + }, + { + "epoch": 48.1, + "learning_rate": 4.3987406716417914e-05, + "loss": 0.0003, + "step": 51564 + }, + { + "epoch": 48.1, + "learning_rate": 4.398694029850747e-05, + "loss": 0.0, + "step": 51568 + }, + { + "epoch": 48.11, + "learning_rate": 4.398647388059702e-05, + "loss": 0.0001, + "step": 51572 + }, + { + "epoch": 48.11, + "learning_rate": 4.3986007462686565e-05, + "loss": 0.0, + "step": 51576 + }, + { + "epoch": 48.12, + "learning_rate": 4.3985541044776126e-05, + "loss": 0.0, + "step": 51580 + }, + { + "epoch": 48.12, + "learning_rate": 4.3985074626865675e-05, + "loss": 0.0002, + "step": 51584 + }, + { + "epoch": 48.12, + "learning_rate": 4.398460820895522e-05, + "loss": 0.0009, + "step": 51588 + }, + { + "epoch": 48.13, + "learning_rate": 4.398414179104478e-05, + "loss": 0.0, + "step": 51592 + }, + { + "epoch": 48.13, + "learning_rate": 4.398367537313433e-05, + "loss": 0.0001, + "step": 51596 + }, + { + "epoch": 48.13, + "learning_rate": 4.398320895522388e-05, + "loss": 0.0, + "step": 51600 + }, + { + "epoch": 48.14, + "learning_rate": 4.3982742537313436e-05, + "loss": 0.0, + "step": 51604 + }, + { + "epoch": 48.14, + "learning_rate": 4.398227611940299e-05, + "loss": 0.0, + "step": 51608 + }, + { + "epoch": 48.15, + "learning_rate": 4.398180970149254e-05, + "loss": 0.0007, + "step": 51612 + }, + { + "epoch": 48.15, + "learning_rate": 4.3981343283582094e-05, + "loss": 0.0, + "step": 51616 + }, + { + "epoch": 48.15, + "learning_rate": 4.398087686567164e-05, + "loss": 0.0001, + "step": 51620 + }, + { + "epoch": 48.16, + "learning_rate": 4.3980410447761197e-05, + "loss": 0.0, + "step": 51624 + }, + { + "epoch": 48.16, + "learning_rate": 4.397994402985075e-05, + "loss": 0.0, + "step": 51628 + }, + { + "epoch": 48.16, + "learning_rate": 4.39794776119403e-05, + "loss": 0.0082, + "step": 51632 + }, + { + "epoch": 48.17, + "learning_rate": 4.397901119402985e-05, + "loss": 0.0001, + "step": 51636 + }, + { + "epoch": 48.17, + "learning_rate": 4.397854477611941e-05, + "loss": 0.0, + "step": 51640 + }, + { + "epoch": 48.18, + "learning_rate": 4.397807835820896e-05, + "loss": 0.0001, + "step": 51644 + }, + { + "epoch": 48.18, + "learning_rate": 4.3977611940298506e-05, + "loss": 0.0, + "step": 51648 + }, + { + "epoch": 48.18, + "learning_rate": 4.397714552238806e-05, + "loss": 0.0, + "step": 51652 + }, + { + "epoch": 48.19, + "learning_rate": 4.3976679104477615e-05, + "loss": 0.0001, + "step": 51656 + }, + { + "epoch": 48.19, + "learning_rate": 4.3976212686567164e-05, + "loss": 0.0, + "step": 51660 + }, + { + "epoch": 48.19, + "learning_rate": 4.397574626865672e-05, + "loss": 0.0139, + "step": 51664 + }, + { + "epoch": 48.2, + "learning_rate": 4.3975279850746273e-05, + "loss": 0.0, + "step": 51668 + }, + { + "epoch": 48.2, + "learning_rate": 4.397481343283582e-05, + "loss": 0.0007, + "step": 51672 + }, + { + "epoch": 48.21, + "learning_rate": 4.3974347014925376e-05, + "loss": 0.0, + "step": 51676 + }, + { + "epoch": 48.21, + "learning_rate": 4.3973880597014925e-05, + "loss": 0.0, + "step": 51680 + }, + { + "epoch": 48.21, + "learning_rate": 4.397341417910448e-05, + "loss": 0.0, + "step": 51684 + }, + { + "epoch": 48.22, + "learning_rate": 4.3972947761194034e-05, + "loss": 0.0001, + "step": 51688 + }, + { + "epoch": 48.22, + "learning_rate": 4.397248134328358e-05, + "loss": 0.0, + "step": 51692 + }, + { + "epoch": 48.22, + "learning_rate": 4.397201492537313e-05, + "loss": 0.0, + "step": 51696 + }, + { + "epoch": 48.23, + "learning_rate": 4.397154850746269e-05, + "loss": 0.001, + "step": 51700 + }, + { + "epoch": 48.23, + "learning_rate": 4.397108208955224e-05, + "loss": 0.001, + "step": 51704 + }, + { + "epoch": 48.24, + "learning_rate": 4.397061567164179e-05, + "loss": 0.0, + "step": 51708 + }, + { + "epoch": 48.24, + "learning_rate": 4.3970149253731344e-05, + "loss": 0.0001, + "step": 51712 + }, + { + "epoch": 48.24, + "learning_rate": 4.39696828358209e-05, + "loss": 0.0001, + "step": 51716 + }, + { + "epoch": 48.25, + "learning_rate": 4.3969216417910447e-05, + "loss": 0.0027, + "step": 51720 + }, + { + "epoch": 48.25, + "learning_rate": 4.396875e-05, + "loss": 0.0, + "step": 51724 + }, + { + "epoch": 48.25, + "learning_rate": 4.3968283582089556e-05, + "loss": 0.0, + "step": 51728 + }, + { + "epoch": 48.26, + "learning_rate": 4.396781716417911e-05, + "loss": 0.0004, + "step": 51732 + }, + { + "epoch": 48.26, + "learning_rate": 4.396735074626866e-05, + "loss": 0.0001, + "step": 51736 + }, + { + "epoch": 48.26, + "learning_rate": 4.396688432835821e-05, + "loss": 0.0, + "step": 51740 + }, + { + "epoch": 48.27, + "learning_rate": 4.396641791044776e-05, + "loss": 0.0, + "step": 51744 + }, + { + "epoch": 48.27, + "learning_rate": 4.396595149253732e-05, + "loss": 0.0, + "step": 51748 + }, + { + "epoch": 48.28, + "learning_rate": 4.3965485074626866e-05, + "loss": 0.0001, + "step": 51752 + }, + { + "epoch": 48.28, + "learning_rate": 4.396501865671642e-05, + "loss": 0.0, + "step": 51756 + }, + { + "epoch": 48.28, + "learning_rate": 4.3964552238805975e-05, + "loss": 0.0001, + "step": 51760 + }, + { + "epoch": 48.29, + "learning_rate": 4.3964085820895523e-05, + "loss": 0.0001, + "step": 51764 + }, + { + "epoch": 48.29, + "learning_rate": 4.396361940298508e-05, + "loss": 0.0, + "step": 51768 + }, + { + "epoch": 48.29, + "learning_rate": 4.3963152985074626e-05, + "loss": 0.0001, + "step": 51772 + }, + { + "epoch": 48.3, + "learning_rate": 4.396268656716418e-05, + "loss": 0.0004, + "step": 51776 + }, + { + "epoch": 48.3, + "learning_rate": 4.3962220149253736e-05, + "loss": 0.0, + "step": 51780 + }, + { + "epoch": 48.31, + "learning_rate": 4.3961753731343284e-05, + "loss": 0.0, + "step": 51784 + }, + { + "epoch": 48.31, + "learning_rate": 4.396128731343284e-05, + "loss": 0.0022, + "step": 51788 + }, + { + "epoch": 48.31, + "learning_rate": 4.3960820895522394e-05, + "loss": 0.0, + "step": 51792 + }, + { + "epoch": 48.32, + "learning_rate": 4.396035447761194e-05, + "loss": 0.0007, + "step": 51796 + }, + { + "epoch": 48.32, + "learning_rate": 4.395988805970149e-05, + "loss": 0.0, + "step": 51800 + }, + { + "epoch": 48.32, + "learning_rate": 4.3959421641791045e-05, + "loss": 0.0, + "step": 51804 + }, + { + "epoch": 48.33, + "learning_rate": 4.39589552238806e-05, + "loss": 0.0, + "step": 51808 + }, + { + "epoch": 48.33, + "learning_rate": 4.395848880597015e-05, + "loss": 0.0, + "step": 51812 + }, + { + "epoch": 48.34, + "learning_rate": 4.39580223880597e-05, + "loss": 0.0, + "step": 51816 + }, + { + "epoch": 48.34, + "learning_rate": 4.395755597014926e-05, + "loss": 0.0, + "step": 51820 + }, + { + "epoch": 48.34, + "learning_rate": 4.3957089552238806e-05, + "loss": 0.0, + "step": 51824 + }, + { + "epoch": 48.35, + "learning_rate": 4.395662313432836e-05, + "loss": 0.0, + "step": 51828 + }, + { + "epoch": 48.35, + "learning_rate": 4.395615671641791e-05, + "loss": 0.0001, + "step": 51832 + }, + { + "epoch": 48.35, + "learning_rate": 4.3955690298507464e-05, + "loss": 0.0001, + "step": 51836 + }, + { + "epoch": 48.36, + "learning_rate": 4.395522388059702e-05, + "loss": 0.0001, + "step": 51840 + }, + { + "epoch": 48.36, + "learning_rate": 4.395475746268657e-05, + "loss": 0.0, + "step": 51844 + }, + { + "epoch": 48.37, + "learning_rate": 4.395429104477612e-05, + "loss": 0.0028, + "step": 51848 + }, + { + "epoch": 48.37, + "learning_rate": 4.395382462686568e-05, + "loss": 0.0024, + "step": 51852 + }, + { + "epoch": 48.37, + "learning_rate": 4.3953358208955225e-05, + "loss": 0.0002, + "step": 51856 + }, + { + "epoch": 48.38, + "learning_rate": 4.3952891791044773e-05, + "loss": 0.0001, + "step": 51860 + }, + { + "epoch": 48.38, + "learning_rate": 4.395242537313433e-05, + "loss": 0.0001, + "step": 51864 + }, + { + "epoch": 48.38, + "learning_rate": 4.395195895522388e-05, + "loss": 0.0, + "step": 51868 + }, + { + "epoch": 48.39, + "learning_rate": 4.395149253731343e-05, + "loss": 0.0002, + "step": 51872 + }, + { + "epoch": 48.39, + "learning_rate": 4.3951026119402986e-05, + "loss": 0.0, + "step": 51876 + }, + { + "epoch": 48.4, + "learning_rate": 4.395055970149254e-05, + "loss": 0.0, + "step": 51880 + }, + { + "epoch": 48.4, + "learning_rate": 4.395009328358209e-05, + "loss": 0.0, + "step": 51884 + }, + { + "epoch": 48.4, + "learning_rate": 4.3949626865671644e-05, + "loss": 0.0, + "step": 51888 + }, + { + "epoch": 48.41, + "learning_rate": 4.394916044776119e-05, + "loss": 0.0, + "step": 51892 + }, + { + "epoch": 48.41, + "learning_rate": 4.3948694029850754e-05, + "loss": 0.0, + "step": 51896 + }, + { + "epoch": 48.41, + "learning_rate": 4.39482276119403e-05, + "loss": 0.0, + "step": 51900 + }, + { + "epoch": 48.42, + "learning_rate": 4.394776119402985e-05, + "loss": 0.0, + "step": 51904 + }, + { + "epoch": 48.42, + "learning_rate": 4.3947294776119405e-05, + "loss": 0.0, + "step": 51908 + }, + { + "epoch": 48.43, + "learning_rate": 4.394682835820896e-05, + "loss": 0.0003, + "step": 51912 + }, + { + "epoch": 48.43, + "learning_rate": 4.394636194029851e-05, + "loss": 0.0024, + "step": 51916 + }, + { + "epoch": 48.43, + "learning_rate": 4.394589552238806e-05, + "loss": 0.0, + "step": 51920 + }, + { + "epoch": 48.44, + "learning_rate": 4.394542910447761e-05, + "loss": 0.0001, + "step": 51924 + }, + { + "epoch": 48.44, + "learning_rate": 4.3944962686567166e-05, + "loss": 0.0, + "step": 51928 + }, + { + "epoch": 48.44, + "learning_rate": 4.394449626865672e-05, + "loss": 0.0, + "step": 51932 + }, + { + "epoch": 48.45, + "learning_rate": 4.394402985074627e-05, + "loss": 0.0, + "step": 51936 + }, + { + "epoch": 48.45, + "learning_rate": 4.3943563432835824e-05, + "loss": 0.0001, + "step": 51940 + }, + { + "epoch": 48.46, + "learning_rate": 4.394309701492538e-05, + "loss": 0.0, + "step": 51944 + }, + { + "epoch": 48.46, + "learning_rate": 4.394263059701493e-05, + "loss": 0.0029, + "step": 51948 + }, + { + "epoch": 48.46, + "learning_rate": 4.3942164179104475e-05, + "loss": 0.0, + "step": 51952 + }, + { + "epoch": 48.47, + "learning_rate": 4.394169776119404e-05, + "loss": 0.0, + "step": 51956 + }, + { + "epoch": 48.47, + "learning_rate": 4.3941231343283585e-05, + "loss": 0.0, + "step": 51960 + }, + { + "epoch": 48.47, + "learning_rate": 4.394076492537313e-05, + "loss": 0.0002, + "step": 51964 + }, + { + "epoch": 48.48, + "learning_rate": 4.394029850746269e-05, + "loss": 0.0, + "step": 51968 + }, + { + "epoch": 48.48, + "learning_rate": 4.393983208955224e-05, + "loss": 0.0, + "step": 51972 + }, + { + "epoch": 48.49, + "learning_rate": 4.393936567164179e-05, + "loss": 0.002, + "step": 51976 + }, + { + "epoch": 48.49, + "learning_rate": 4.3938899253731346e-05, + "loss": 0.0007, + "step": 51980 + }, + { + "epoch": 48.49, + "learning_rate": 4.3938432835820894e-05, + "loss": 0.0, + "step": 51984 + }, + { + "epoch": 48.5, + "learning_rate": 4.393796641791045e-05, + "loss": 0.001, + "step": 51988 + }, + { + "epoch": 48.5, + "learning_rate": 4.3937500000000004e-05, + "loss": 0.0, + "step": 51992 + }, + { + "epoch": 48.5, + "learning_rate": 4.393703358208955e-05, + "loss": 0.0, + "step": 51996 + }, + { + "epoch": 48.51, + "learning_rate": 4.393656716417911e-05, + "loss": 0.0018, + "step": 52000 + }, + { + "epoch": 48.51, + "eval_exact_match": 0.7446808510638298, + "eval_exec": 0.7746615087040619, + "eval_loss": 0.44412875175476074, + "eval_runtime": 1559.1098, + "eval_samples_per_second": 0.663, + "step": 52000 + }, + { + "epoch": 48.51, + "learning_rate": 4.393610074626866e-05, + "loss": 0.0, + "step": 52004 + }, + { + "epoch": 48.51, + "learning_rate": 4.393563432835821e-05, + "loss": 0.0, + "step": 52008 + }, + { + "epoch": 48.52, + "learning_rate": 4.393516791044776e-05, + "loss": 0.0, + "step": 52012 + }, + { + "epoch": 48.52, + "learning_rate": 4.393470149253732e-05, + "loss": 0.0, + "step": 52016 + }, + { + "epoch": 48.53, + "learning_rate": 4.393423507462687e-05, + "loss": 0.0, + "step": 52020 + }, + { + "epoch": 48.53, + "learning_rate": 4.3933768656716416e-05, + "loss": 0.0, + "step": 52024 + }, + { + "epoch": 48.53, + "learning_rate": 4.393330223880597e-05, + "loss": 0.0, + "step": 52028 + }, + { + "epoch": 48.54, + "learning_rate": 4.3932835820895526e-05, + "loss": 0.0003, + "step": 52032 + }, + { + "epoch": 48.54, + "learning_rate": 4.3932369402985074e-05, + "loss": 0.0018, + "step": 52036 + }, + { + "epoch": 48.54, + "learning_rate": 4.393190298507463e-05, + "loss": 0.0, + "step": 52040 + }, + { + "epoch": 48.55, + "learning_rate": 4.393143656716418e-05, + "loss": 0.0, + "step": 52044 + }, + { + "epoch": 48.55, + "learning_rate": 4.393097014925374e-05, + "loss": 0.0, + "step": 52048 + }, + { + "epoch": 48.56, + "learning_rate": 4.393050373134329e-05, + "loss": 0.0, + "step": 52052 + }, + { + "epoch": 48.56, + "learning_rate": 4.3930037313432835e-05, + "loss": 0.0, + "step": 52056 + }, + { + "epoch": 48.56, + "learning_rate": 4.392957089552239e-05, + "loss": 0.0022, + "step": 52060 + }, + { + "epoch": 48.57, + "learning_rate": 4.3929104477611945e-05, + "loss": 0.0001, + "step": 52064 + }, + { + "epoch": 48.57, + "learning_rate": 4.392863805970149e-05, + "loss": 0.0056, + "step": 52068 + }, + { + "epoch": 48.57, + "learning_rate": 4.392817164179105e-05, + "loss": 0.0, + "step": 52072 + }, + { + "epoch": 48.58, + "learning_rate": 4.39277052238806e-05, + "loss": 0.0, + "step": 52076 + }, + { + "epoch": 48.58, + "learning_rate": 4.392723880597015e-05, + "loss": 0.0, + "step": 52080 + }, + { + "epoch": 48.59, + "learning_rate": 4.3926772388059706e-05, + "loss": 0.0, + "step": 52084 + }, + { + "epoch": 48.59, + "learning_rate": 4.3926305970149254e-05, + "loss": 0.0, + "step": 52088 + }, + { + "epoch": 48.59, + "learning_rate": 4.392583955223881e-05, + "loss": 0.0, + "step": 52092 + }, + { + "epoch": 48.6, + "learning_rate": 4.3925373134328364e-05, + "loss": 0.0, + "step": 52096 + }, + { + "epoch": 48.6, + "learning_rate": 4.392490671641791e-05, + "loss": 0.0003, + "step": 52100 + }, + { + "epoch": 48.6, + "learning_rate": 4.392444029850746e-05, + "loss": 0.0, + "step": 52104 + }, + { + "epoch": 48.61, + "learning_rate": 4.392397388059702e-05, + "loss": 0.0, + "step": 52108 + }, + { + "epoch": 48.61, + "learning_rate": 4.392350746268657e-05, + "loss": 0.0, + "step": 52112 + }, + { + "epoch": 48.62, + "learning_rate": 4.392304104477612e-05, + "loss": 0.0019, + "step": 52116 + }, + { + "epoch": 48.62, + "learning_rate": 4.392257462686567e-05, + "loss": 0.0, + "step": 52120 + }, + { + "epoch": 48.62, + "learning_rate": 4.392210820895523e-05, + "loss": 0.0, + "step": 52124 + }, + { + "epoch": 48.63, + "learning_rate": 4.3921641791044776e-05, + "loss": 0.0, + "step": 52128 + }, + { + "epoch": 48.63, + "learning_rate": 4.392117537313433e-05, + "loss": 0.0005, + "step": 52132 + }, + { + "epoch": 48.63, + "learning_rate": 4.3920708955223886e-05, + "loss": 0.0001, + "step": 52136 + }, + { + "epoch": 48.64, + "learning_rate": 4.3920242537313434e-05, + "loss": 0.0001, + "step": 52140 + }, + { + "epoch": 48.64, + "learning_rate": 4.391977611940299e-05, + "loss": 0.0, + "step": 52144 + }, + { + "epoch": 48.65, + "learning_rate": 4.391930970149254e-05, + "loss": 0.0002, + "step": 52148 + }, + { + "epoch": 48.65, + "learning_rate": 4.391884328358209e-05, + "loss": 0.0001, + "step": 52152 + }, + { + "epoch": 48.65, + "learning_rate": 4.391837686567165e-05, + "loss": 0.0, + "step": 52156 + }, + { + "epoch": 48.66, + "learning_rate": 4.3917910447761195e-05, + "loss": 0.0, + "step": 52160 + }, + { + "epoch": 48.66, + "learning_rate": 4.391744402985074e-05, + "loss": 0.0, + "step": 52164 + }, + { + "epoch": 48.66, + "learning_rate": 4.3916977611940305e-05, + "loss": 0.0, + "step": 52168 + }, + { + "epoch": 48.67, + "learning_rate": 4.391651119402985e-05, + "loss": 0.0, + "step": 52172 + }, + { + "epoch": 48.67, + "learning_rate": 4.39160447761194e-05, + "loss": 0.0, + "step": 52176 + }, + { + "epoch": 48.68, + "learning_rate": 4.3915578358208956e-05, + "loss": 0.0001, + "step": 52180 + }, + { + "epoch": 48.68, + "learning_rate": 4.391511194029851e-05, + "loss": 0.0024, + "step": 52184 + }, + { + "epoch": 48.68, + "learning_rate": 4.391464552238806e-05, + "loss": 0.0001, + "step": 52188 + }, + { + "epoch": 48.69, + "learning_rate": 4.3914179104477614e-05, + "loss": 0.0, + "step": 52192 + }, + { + "epoch": 48.69, + "learning_rate": 4.391371268656717e-05, + "loss": 0.0, + "step": 52196 + }, + { + "epoch": 48.69, + "learning_rate": 4.391324626865672e-05, + "loss": 0.0, + "step": 52200 + }, + { + "epoch": 48.7, + "learning_rate": 4.391277985074627e-05, + "loss": 0.0, + "step": 52204 + }, + { + "epoch": 48.7, + "learning_rate": 4.391231343283582e-05, + "loss": 0.0001, + "step": 52208 + }, + { + "epoch": 48.71, + "learning_rate": 4.3911847014925375e-05, + "loss": 0.0, + "step": 52212 + }, + { + "epoch": 48.71, + "learning_rate": 4.391138059701493e-05, + "loss": 0.0, + "step": 52216 + }, + { + "epoch": 48.71, + "learning_rate": 4.391091417910448e-05, + "loss": 0.0016, + "step": 52220 + }, + { + "epoch": 48.72, + "learning_rate": 4.391044776119403e-05, + "loss": 0.0001, + "step": 52224 + }, + { + "epoch": 48.72, + "learning_rate": 4.390998134328359e-05, + "loss": 0.0, + "step": 52228 + }, + { + "epoch": 48.72, + "learning_rate": 4.3909514925373136e-05, + "loss": 0.0009, + "step": 52232 + }, + { + "epoch": 48.73, + "learning_rate": 4.390904850746269e-05, + "loss": 0.0, + "step": 52236 + }, + { + "epoch": 48.73, + "learning_rate": 4.390858208955224e-05, + "loss": 0.0, + "step": 52240 + }, + { + "epoch": 48.73, + "learning_rate": 4.3908115671641794e-05, + "loss": 0.0001, + "step": 52244 + }, + { + "epoch": 48.74, + "learning_rate": 4.390764925373135e-05, + "loss": 0.0, + "step": 52248 + }, + { + "epoch": 48.74, + "learning_rate": 4.39071828358209e-05, + "loss": 0.0005, + "step": 52252 + }, + { + "epoch": 48.75, + "learning_rate": 4.3906716417910445e-05, + "loss": 0.0001, + "step": 52256 + }, + { + "epoch": 48.75, + "learning_rate": 4.390625000000001e-05, + "loss": 0.0, + "step": 52260 + }, + { + "epoch": 48.75, + "learning_rate": 4.3905783582089555e-05, + "loss": 0.0, + "step": 52264 + }, + { + "epoch": 48.76, + "learning_rate": 4.39053171641791e-05, + "loss": 0.0, + "step": 52268 + }, + { + "epoch": 48.76, + "learning_rate": 4.390485074626866e-05, + "loss": 0.0, + "step": 52272 + }, + { + "epoch": 48.76, + "learning_rate": 4.390438432835821e-05, + "loss": 0.0, + "step": 52276 + }, + { + "epoch": 48.77, + "learning_rate": 4.390391791044776e-05, + "loss": 0.0, + "step": 52280 + }, + { + "epoch": 48.77, + "learning_rate": 4.3903451492537316e-05, + "loss": 0.0001, + "step": 52284 + }, + { + "epoch": 48.78, + "learning_rate": 4.390298507462687e-05, + "loss": 0.0, + "step": 52288 + }, + { + "epoch": 48.78, + "learning_rate": 4.390251865671642e-05, + "loss": 0.0, + "step": 52292 + }, + { + "epoch": 48.78, + "learning_rate": 4.3902052238805974e-05, + "loss": 0.0, + "step": 52296 + }, + { + "epoch": 48.79, + "learning_rate": 4.390158582089552e-05, + "loss": 0.0, + "step": 52300 + }, + { + "epoch": 48.79, + "learning_rate": 4.390111940298508e-05, + "loss": 0.0113, + "step": 52304 + }, + { + "epoch": 48.79, + "learning_rate": 4.390065298507463e-05, + "loss": 0.0003, + "step": 52308 + }, + { + "epoch": 48.8, + "learning_rate": 4.390018656716418e-05, + "loss": 0.0004, + "step": 52312 + }, + { + "epoch": 48.8, + "learning_rate": 4.389972014925373e-05, + "loss": 0.0001, + "step": 52316 + }, + { + "epoch": 48.81, + "learning_rate": 4.389925373134329e-05, + "loss": 0.0, + "step": 52320 + }, + { + "epoch": 48.81, + "learning_rate": 4.389878731343284e-05, + "loss": 0.0, + "step": 52324 + }, + { + "epoch": 48.81, + "learning_rate": 4.3898320895522386e-05, + "loss": 0.0001, + "step": 52328 + }, + { + "epoch": 48.82, + "learning_rate": 4.389785447761194e-05, + "loss": 0.0, + "step": 52332 + }, + { + "epoch": 48.82, + "learning_rate": 4.3897388059701496e-05, + "loss": 0.0, + "step": 52336 + }, + { + "epoch": 48.82, + "learning_rate": 4.3896921641791044e-05, + "loss": 0.0, + "step": 52340 + }, + { + "epoch": 48.83, + "learning_rate": 4.38964552238806e-05, + "loss": 0.0, + "step": 52344 + }, + { + "epoch": 48.83, + "learning_rate": 4.3895988805970154e-05, + "loss": 0.0, + "step": 52348 + }, + { + "epoch": 48.84, + "learning_rate": 4.38955223880597e-05, + "loss": 0.0, + "step": 52352 + }, + { + "epoch": 48.84, + "learning_rate": 4.389505597014926e-05, + "loss": 0.0008, + "step": 52356 + }, + { + "epoch": 48.84, + "learning_rate": 4.3894589552238805e-05, + "loss": 0.0001, + "step": 52360 + }, + { + "epoch": 48.85, + "learning_rate": 4.389412313432836e-05, + "loss": 0.0, + "step": 52364 + }, + { + "epoch": 48.85, + "learning_rate": 4.3893656716417915e-05, + "loss": 0.0, + "step": 52368 + }, + { + "epoch": 48.85, + "learning_rate": 4.389319029850746e-05, + "loss": 0.0001, + "step": 52372 + }, + { + "epoch": 48.86, + "learning_rate": 4.389272388059702e-05, + "loss": 0.0, + "step": 52376 + }, + { + "epoch": 48.86, + "learning_rate": 4.389225746268657e-05, + "loss": 0.0, + "step": 52380 + }, + { + "epoch": 48.87, + "learning_rate": 4.389179104477612e-05, + "loss": 0.0, + "step": 52384 + }, + { + "epoch": 48.87, + "learning_rate": 4.3891324626865676e-05, + "loss": 0.0003, + "step": 52388 + }, + { + "epoch": 48.87, + "learning_rate": 4.3890858208955224e-05, + "loss": 0.0, + "step": 52392 + }, + { + "epoch": 48.88, + "learning_rate": 4.389039179104478e-05, + "loss": 0.0, + "step": 52396 + }, + { + "epoch": 48.88, + "learning_rate": 4.3889925373134334e-05, + "loss": 0.0, + "step": 52400 + }, + { + "epoch": 48.88, + "learning_rate": 4.388945895522388e-05, + "loss": 0.0, + "step": 52404 + }, + { + "epoch": 48.89, + "learning_rate": 4.388899253731344e-05, + "loss": 0.0, + "step": 52408 + }, + { + "epoch": 48.89, + "learning_rate": 4.388852611940299e-05, + "loss": 0.0003, + "step": 52412 + }, + { + "epoch": 48.9, + "learning_rate": 4.388805970149254e-05, + "loss": 0.0, + "step": 52416 + }, + { + "epoch": 48.9, + "learning_rate": 4.388759328358209e-05, + "loss": 0.0, + "step": 52420 + }, + { + "epoch": 48.9, + "learning_rate": 4.388712686567164e-05, + "loss": 0.0, + "step": 52424 + }, + { + "epoch": 48.91, + "learning_rate": 4.38866604477612e-05, + "loss": 0.0, + "step": 52428 + }, + { + "epoch": 48.91, + "learning_rate": 4.3886194029850746e-05, + "loss": 0.0011, + "step": 52432 + }, + { + "epoch": 48.91, + "learning_rate": 4.38857276119403e-05, + "loss": 0.0, + "step": 52436 + }, + { + "epoch": 48.92, + "learning_rate": 4.3885261194029856e-05, + "loss": 0.0003, + "step": 52440 + }, + { + "epoch": 48.92, + "learning_rate": 4.3884794776119404e-05, + "loss": 0.0, + "step": 52444 + }, + { + "epoch": 48.93, + "learning_rate": 4.388432835820896e-05, + "loss": 0.0, + "step": 52448 + }, + { + "epoch": 48.93, + "learning_rate": 4.388386194029851e-05, + "loss": 0.0, + "step": 52452 + }, + { + "epoch": 48.93, + "learning_rate": 4.388339552238806e-05, + "loss": 0.0001, + "step": 52456 + }, + { + "epoch": 48.94, + "learning_rate": 4.3882929104477617e-05, + "loss": 0.0, + "step": 52460 + }, + { + "epoch": 48.94, + "learning_rate": 4.3882462686567165e-05, + "loss": 0.0, + "step": 52464 + }, + { + "epoch": 48.94, + "learning_rate": 4.388199626865672e-05, + "loss": 0.0, + "step": 52468 + }, + { + "epoch": 48.95, + "learning_rate": 4.3881529850746275e-05, + "loss": 0.0005, + "step": 52472 + }, + { + "epoch": 48.95, + "learning_rate": 4.388106343283582e-05, + "loss": 0.003, + "step": 52476 + }, + { + "epoch": 48.96, + "learning_rate": 4.388059701492537e-05, + "loss": 0.0, + "step": 52480 + }, + { + "epoch": 48.96, + "learning_rate": 4.3880130597014926e-05, + "loss": 0.0029, + "step": 52484 + }, + { + "epoch": 48.96, + "learning_rate": 4.387966417910448e-05, + "loss": 0.002, + "step": 52488 + }, + { + "epoch": 48.97, + "learning_rate": 4.387919776119403e-05, + "loss": 0.0, + "step": 52492 + }, + { + "epoch": 48.97, + "learning_rate": 4.3878731343283584e-05, + "loss": 0.0, + "step": 52496 + }, + { + "epoch": 48.97, + "learning_rate": 4.387826492537314e-05, + "loss": 0.0016, + "step": 52500 + }, + { + "epoch": 48.97, + "eval_exact_match": 0.741779497098646, + "eval_exec": 0.769825918762089, + "eval_loss": 0.4436146914958954, + "eval_runtime": 1145.0497, + "eval_samples_per_second": 0.903, + "step": 52500 + }, + { + "epoch": 48.98, + "learning_rate": 4.387779850746269e-05, + "loss": 0.0001, + "step": 52504 + }, + { + "epoch": 48.98, + "learning_rate": 4.387733208955224e-05, + "loss": 0.0001, + "step": 52508 + }, + { + "epoch": 48.98, + "learning_rate": 4.387686567164179e-05, + "loss": 0.0, + "step": 52512 + }, + { + "epoch": 48.99, + "learning_rate": 4.3876399253731345e-05, + "loss": 0.0019, + "step": 52516 + }, + { + "epoch": 48.99, + "learning_rate": 4.38759328358209e-05, + "loss": 0.0001, + "step": 52520 + }, + { + "epoch": 49.0, + "learning_rate": 4.387546641791045e-05, + "loss": 0.0, + "step": 52524 + }, + { + "epoch": 49.0, + "learning_rate": 4.3875e-05, + "loss": 0.0, + "step": 52528 + }, + { + "epoch": 49.0, + "learning_rate": 4.387453358208956e-05, + "loss": 0.0, + "step": 52532 + }, + { + "epoch": 49.01, + "learning_rate": 4.3874067164179106e-05, + "loss": 0.0, + "step": 52536 + }, + { + "epoch": 49.01, + "learning_rate": 4.387360074626866e-05, + "loss": 0.001, + "step": 52540 + }, + { + "epoch": 49.01, + "learning_rate": 4.387313432835821e-05, + "loss": 0.0, + "step": 52544 + }, + { + "epoch": 49.02, + "learning_rate": 4.3872667910447764e-05, + "loss": 0.0, + "step": 52548 + }, + { + "epoch": 49.02, + "learning_rate": 4.387220149253732e-05, + "loss": 0.0006, + "step": 52552 + }, + { + "epoch": 49.03, + "learning_rate": 4.387173507462687e-05, + "loss": 0.0, + "step": 52556 + }, + { + "epoch": 49.03, + "learning_rate": 4.387126865671642e-05, + "loss": 0.0029, + "step": 52560 + }, + { + "epoch": 49.03, + "learning_rate": 4.3870802238805976e-05, + "loss": 0.0, + "step": 52564 + }, + { + "epoch": 49.04, + "learning_rate": 4.3870335820895525e-05, + "loss": 0.0, + "step": 52568 + }, + { + "epoch": 49.04, + "learning_rate": 4.386986940298507e-05, + "loss": 0.0001, + "step": 52572 + }, + { + "epoch": 49.04, + "learning_rate": 4.3869402985074634e-05, + "loss": 0.0005, + "step": 52576 + }, + { + "epoch": 49.05, + "learning_rate": 4.386893656716418e-05, + "loss": 0.0, + "step": 52580 + }, + { + "epoch": 49.05, + "learning_rate": 4.386847014925373e-05, + "loss": 0.0, + "step": 52584 + }, + { + "epoch": 49.06, + "learning_rate": 4.3868003731343286e-05, + "loss": 0.0001, + "step": 52588 + }, + { + "epoch": 49.06, + "learning_rate": 4.386753731343284e-05, + "loss": 0.0, + "step": 52592 + }, + { + "epoch": 49.06, + "learning_rate": 4.386707089552239e-05, + "loss": 0.0001, + "step": 52596 + }, + { + "epoch": 49.07, + "learning_rate": 4.3866604477611943e-05, + "loss": 0.0009, + "step": 52600 + }, + { + "epoch": 49.07, + "learning_rate": 4.386613805970149e-05, + "loss": 0.0, + "step": 52604 + }, + { + "epoch": 49.07, + "learning_rate": 4.3865671641791047e-05, + "loss": 0.0001, + "step": 52608 + }, + { + "epoch": 49.08, + "learning_rate": 4.38652052238806e-05, + "loss": 0.0, + "step": 52612 + }, + { + "epoch": 49.08, + "learning_rate": 4.386473880597015e-05, + "loss": 0.0001, + "step": 52616 + }, + { + "epoch": 49.09, + "learning_rate": 4.3864272388059704e-05, + "loss": 0.0, + "step": 52620 + }, + { + "epoch": 49.09, + "learning_rate": 4.386380597014926e-05, + "loss": 0.0006, + "step": 52624 + }, + { + "epoch": 49.09, + "learning_rate": 4.386333955223881e-05, + "loss": 0.0008, + "step": 52628 + }, + { + "epoch": 49.1, + "learning_rate": 4.3862873134328356e-05, + "loss": 0.0001, + "step": 52632 + }, + { + "epoch": 49.1, + "learning_rate": 4.386240671641792e-05, + "loss": 0.0001, + "step": 52636 + }, + { + "epoch": 49.1, + "learning_rate": 4.3861940298507465e-05, + "loss": 0.0, + "step": 52640 + }, + { + "epoch": 49.11, + "learning_rate": 4.3861473880597014e-05, + "loss": 0.0003, + "step": 52644 + }, + { + "epoch": 49.11, + "learning_rate": 4.386100746268657e-05, + "loss": 0.0111, + "step": 52648 + }, + { + "epoch": 49.12, + "learning_rate": 4.3860541044776123e-05, + "loss": 0.0, + "step": 52652 + }, + { + "epoch": 49.12, + "learning_rate": 4.386007462686567e-05, + "loss": 0.0003, + "step": 52656 + }, + { + "epoch": 49.12, + "learning_rate": 4.3859608208955226e-05, + "loss": 0.0016, + "step": 52660 + }, + { + "epoch": 49.13, + "learning_rate": 4.3859141791044775e-05, + "loss": 0.0, + "step": 52664 + }, + { + "epoch": 49.13, + "learning_rate": 4.385867537313433e-05, + "loss": 0.0, + "step": 52668 + }, + { + "epoch": 49.13, + "learning_rate": 4.3858208955223884e-05, + "loss": 0.0, + "step": 52672 + }, + { + "epoch": 49.14, + "learning_rate": 4.385774253731343e-05, + "loss": 0.0005, + "step": 52676 + }, + { + "epoch": 49.14, + "learning_rate": 4.385727611940299e-05, + "loss": 0.0, + "step": 52680 + }, + { + "epoch": 49.15, + "learning_rate": 4.385680970149254e-05, + "loss": 0.0, + "step": 52684 + }, + { + "epoch": 49.15, + "learning_rate": 4.385634328358209e-05, + "loss": 0.0001, + "step": 52688 + }, + { + "epoch": 49.15, + "learning_rate": 4.385587686567164e-05, + "loss": 0.0, + "step": 52692 + }, + { + "epoch": 49.16, + "learning_rate": 4.38554104477612e-05, + "loss": 0.0001, + "step": 52696 + }, + { + "epoch": 49.16, + "learning_rate": 4.385494402985075e-05, + "loss": 0.0, + "step": 52700 + }, + { + "epoch": 49.16, + "learning_rate": 4.38544776119403e-05, + "loss": 0.0, + "step": 52704 + }, + { + "epoch": 49.17, + "learning_rate": 4.385401119402985e-05, + "loss": 0.0, + "step": 52708 + }, + { + "epoch": 49.17, + "learning_rate": 4.3853544776119406e-05, + "loss": 0.0003, + "step": 52712 + }, + { + "epoch": 49.18, + "learning_rate": 4.385307835820896e-05, + "loss": 0.0009, + "step": 52716 + }, + { + "epoch": 49.18, + "learning_rate": 4.385261194029851e-05, + "loss": 0.0001, + "step": 52720 + }, + { + "epoch": 49.18, + "learning_rate": 4.385214552238806e-05, + "loss": 0.0008, + "step": 52724 + }, + { + "epoch": 49.19, + "learning_rate": 4.385167910447762e-05, + "loss": 0.005, + "step": 52728 + }, + { + "epoch": 49.19, + "learning_rate": 4.385121268656717e-05, + "loss": 0.0, + "step": 52732 + }, + { + "epoch": 49.19, + "learning_rate": 4.3850746268656715e-05, + "loss": 0.0001, + "step": 52736 + }, + { + "epoch": 49.2, + "learning_rate": 4.385027985074627e-05, + "loss": 0.0, + "step": 52740 + }, + { + "epoch": 49.2, + "learning_rate": 4.3849813432835825e-05, + "loss": 0.0, + "step": 52744 + }, + { + "epoch": 49.21, + "learning_rate": 4.3849347014925373e-05, + "loss": 0.0, + "step": 52748 + }, + { + "epoch": 49.21, + "learning_rate": 4.384888059701493e-05, + "loss": 0.0002, + "step": 52752 + }, + { + "epoch": 49.21, + "learning_rate": 4.384841417910448e-05, + "loss": 0.0001, + "step": 52756 + }, + { + "epoch": 49.22, + "learning_rate": 4.384794776119403e-05, + "loss": 0.0, + "step": 52760 + }, + { + "epoch": 49.22, + "learning_rate": 4.3847481343283586e-05, + "loss": 0.0006, + "step": 52764 + }, + { + "epoch": 49.22, + "learning_rate": 4.3847014925373134e-05, + "loss": 0.0009, + "step": 52768 + }, + { + "epoch": 49.23, + "learning_rate": 4.384654850746269e-05, + "loss": 0.0, + "step": 52772 + }, + { + "epoch": 49.23, + "learning_rate": 4.3846082089552244e-05, + "loss": 0.0003, + "step": 52776 + }, + { + "epoch": 49.24, + "learning_rate": 4.384561567164179e-05, + "loss": 0.0001, + "step": 52780 + }, + { + "epoch": 49.24, + "learning_rate": 4.384514925373134e-05, + "loss": 0.0, + "step": 52784 + }, + { + "epoch": 49.24, + "learning_rate": 4.38446828358209e-05, + "loss": 0.0, + "step": 52788 + }, + { + "epoch": 49.25, + "learning_rate": 4.384421641791045e-05, + "loss": 0.0, + "step": 52792 + }, + { + "epoch": 49.25, + "learning_rate": 4.384375e-05, + "loss": 0.0008, + "step": 52796 + }, + { + "epoch": 49.25, + "learning_rate": 4.384328358208955e-05, + "loss": 0.0001, + "step": 52800 + }, + { + "epoch": 49.26, + "learning_rate": 4.384281716417911e-05, + "loss": 0.0, + "step": 52804 + }, + { + "epoch": 49.26, + "learning_rate": 4.3842350746268656e-05, + "loss": 0.0, + "step": 52808 + }, + { + "epoch": 49.26, + "learning_rate": 4.384188432835821e-05, + "loss": 0.0127, + "step": 52812 + }, + { + "epoch": 49.27, + "learning_rate": 4.3841417910447766e-05, + "loss": 0.0, + "step": 52816 + }, + { + "epoch": 49.27, + "learning_rate": 4.3840951492537314e-05, + "loss": 0.0001, + "step": 52820 + }, + { + "epoch": 49.28, + "learning_rate": 4.384048507462687e-05, + "loss": 0.0, + "step": 52824 + }, + { + "epoch": 49.28, + "learning_rate": 4.384001865671642e-05, + "loss": 0.0, + "step": 52828 + }, + { + "epoch": 49.28, + "learning_rate": 4.383955223880597e-05, + "loss": 0.0002, + "step": 52832 + }, + { + "epoch": 49.29, + "learning_rate": 4.383908582089553e-05, + "loss": 0.0, + "step": 52836 + }, + { + "epoch": 49.29, + "learning_rate": 4.3838619402985075e-05, + "loss": 0.0007, + "step": 52840 + }, + { + "epoch": 49.29, + "learning_rate": 4.3838152985074623e-05, + "loss": 0.0006, + "step": 52844 + }, + { + "epoch": 49.3, + "learning_rate": 4.3837686567164185e-05, + "loss": 0.0001, + "step": 52848 + }, + { + "epoch": 49.3, + "learning_rate": 4.383722014925373e-05, + "loss": 0.0, + "step": 52852 + }, + { + "epoch": 49.31, + "learning_rate": 4.383675373134328e-05, + "loss": 0.0001, + "step": 52856 + }, + { + "epoch": 49.31, + "learning_rate": 4.3836287313432836e-05, + "loss": 0.0004, + "step": 52860 + }, + { + "epoch": 49.31, + "learning_rate": 4.383582089552239e-05, + "loss": 0.0001, + "step": 52864 + }, + { + "epoch": 49.32, + "learning_rate": 4.3835354477611946e-05, + "loss": 0.0017, + "step": 52868 + }, + { + "epoch": 49.32, + "learning_rate": 4.3834888059701494e-05, + "loss": 0.0015, + "step": 52872 + }, + { + "epoch": 49.32, + "learning_rate": 4.383442164179105e-05, + "loss": 0.0, + "step": 52876 + }, + { + "epoch": 49.33, + "learning_rate": 4.3833955223880604e-05, + "loss": 0.0, + "step": 52880 + }, + { + "epoch": 49.33, + "learning_rate": 4.383348880597015e-05, + "loss": 0.0, + "step": 52884 + }, + { + "epoch": 49.34, + "learning_rate": 4.38330223880597e-05, + "loss": 0.0006, + "step": 52888 + }, + { + "epoch": 49.34, + "learning_rate": 4.3832555970149255e-05, + "loss": 0.0, + "step": 52892 + }, + { + "epoch": 49.34, + "learning_rate": 4.383208955223881e-05, + "loss": 0.0, + "step": 52896 + }, + { + "epoch": 49.35, + "learning_rate": 4.383162313432836e-05, + "loss": 0.0005, + "step": 52900 + }, + { + "epoch": 49.35, + "learning_rate": 4.383115671641791e-05, + "loss": 0.0, + "step": 52904 + }, + { + "epoch": 49.35, + "learning_rate": 4.383069029850747e-05, + "loss": 0.0005, + "step": 52908 + }, + { + "epoch": 49.36, + "learning_rate": 4.3830223880597016e-05, + "loss": 0.0, + "step": 52912 + }, + { + "epoch": 49.36, + "learning_rate": 4.382975746268657e-05, + "loss": 0.0001, + "step": 52916 + }, + { + "epoch": 49.37, + "learning_rate": 4.382929104477612e-05, + "loss": 0.0003, + "step": 52920 + }, + { + "epoch": 49.37, + "learning_rate": 4.3828824626865674e-05, + "loss": 0.0, + "step": 52924 + }, + { + "epoch": 49.37, + "learning_rate": 4.382835820895523e-05, + "loss": 0.0, + "step": 52928 + }, + { + "epoch": 49.38, + "learning_rate": 4.382789179104478e-05, + "loss": 0.0, + "step": 52932 + }, + { + "epoch": 49.38, + "learning_rate": 4.3827425373134325e-05, + "loss": 0.0002, + "step": 52936 + }, + { + "epoch": 49.38, + "learning_rate": 4.382695895522389e-05, + "loss": 0.0002, + "step": 52940 + }, + { + "epoch": 49.39, + "learning_rate": 4.3826492537313435e-05, + "loss": 0.0, + "step": 52944 + }, + { + "epoch": 49.39, + "learning_rate": 4.382602611940298e-05, + "loss": 0.0, + "step": 52948 + }, + { + "epoch": 49.4, + "learning_rate": 4.382555970149254e-05, + "loss": 0.0007, + "step": 52952 + }, + { + "epoch": 49.4, + "learning_rate": 4.382509328358209e-05, + "loss": 0.0001, + "step": 52956 + }, + { + "epoch": 49.4, + "learning_rate": 4.382462686567164e-05, + "loss": 0.0, + "step": 52960 + }, + { + "epoch": 49.41, + "learning_rate": 4.3824160447761196e-05, + "loss": 0.0001, + "step": 52964 + }, + { + "epoch": 49.41, + "learning_rate": 4.382369402985075e-05, + "loss": 0.0003, + "step": 52968 + }, + { + "epoch": 49.41, + "learning_rate": 4.38232276119403e-05, + "loss": 0.0006, + "step": 52972 + }, + { + "epoch": 49.42, + "learning_rate": 4.3822761194029854e-05, + "loss": 0.0, + "step": 52976 + }, + { + "epoch": 49.42, + "learning_rate": 4.38222947761194e-05, + "loss": 0.0, + "step": 52980 + }, + { + "epoch": 49.43, + "learning_rate": 4.382182835820896e-05, + "loss": 0.0001, + "step": 52984 + }, + { + "epoch": 49.43, + "learning_rate": 4.382136194029851e-05, + "loss": 0.0, + "step": 52988 + }, + { + "epoch": 49.43, + "learning_rate": 4.382089552238806e-05, + "loss": 0.0, + "step": 52992 + }, + { + "epoch": 49.44, + "learning_rate": 4.382042910447761e-05, + "loss": 0.0, + "step": 52996 + }, + { + "epoch": 49.44, + "learning_rate": 4.381996268656717e-05, + "loss": 0.0, + "step": 53000 + }, + { + "epoch": 49.44, + "eval_exact_match": 0.7485493230174082, + "eval_exec": 0.7804642166344294, + "eval_loss": 0.4629165828227997, + "eval_runtime": 1125.3708, + "eval_samples_per_second": 0.919, + "step": 53000 + }, + { + "epoch": 49.44, + "learning_rate": 4.381949626865672e-05, + "loss": 0.0061, + "step": 53004 + }, + { + "epoch": 49.45, + "learning_rate": 4.3819029850746266e-05, + "loss": 0.0, + "step": 53008 + }, + { + "epoch": 49.45, + "learning_rate": 4.381856343283582e-05, + "loss": 0.0001, + "step": 53012 + }, + { + "epoch": 49.46, + "learning_rate": 4.3818097014925376e-05, + "loss": 0.0, + "step": 53016 + }, + { + "epoch": 49.46, + "learning_rate": 4.3817630597014924e-05, + "loss": 0.0, + "step": 53020 + }, + { + "epoch": 49.46, + "learning_rate": 4.381716417910448e-05, + "loss": 0.0, + "step": 53024 + }, + { + "epoch": 49.47, + "learning_rate": 4.3816697761194034e-05, + "loss": 0.0013, + "step": 53028 + }, + { + "epoch": 49.47, + "learning_rate": 4.381623134328359e-05, + "loss": 0.0001, + "step": 53032 + }, + { + "epoch": 49.47, + "learning_rate": 4.381576492537314e-05, + "loss": 0.0, + "step": 53036 + }, + { + "epoch": 49.48, + "learning_rate": 4.3815298507462685e-05, + "loss": 0.0, + "step": 53040 + }, + { + "epoch": 49.48, + "learning_rate": 4.381483208955225e-05, + "loss": 0.0003, + "step": 53044 + }, + { + "epoch": 49.49, + "learning_rate": 4.3814365671641795e-05, + "loss": 0.0003, + "step": 53048 + }, + { + "epoch": 49.49, + "learning_rate": 4.381389925373134e-05, + "loss": 0.0, + "step": 53052 + }, + { + "epoch": 49.49, + "learning_rate": 4.38134328358209e-05, + "loss": 0.0, + "step": 53056 + }, + { + "epoch": 49.5, + "learning_rate": 4.381296641791045e-05, + "loss": 0.0001, + "step": 53060 + }, + { + "epoch": 49.5, + "learning_rate": 4.38125e-05, + "loss": 0.0, + "step": 53064 + }, + { + "epoch": 49.5, + "learning_rate": 4.3812033582089556e-05, + "loss": 0.0, + "step": 53068 + }, + { + "epoch": 49.51, + "learning_rate": 4.3811567164179104e-05, + "loss": 0.0002, + "step": 53072 + }, + { + "epoch": 49.51, + "learning_rate": 4.381110074626866e-05, + "loss": 0.0001, + "step": 53076 + }, + { + "epoch": 49.51, + "learning_rate": 4.3810634328358214e-05, + "loss": 0.0, + "step": 53080 + }, + { + "epoch": 49.52, + "learning_rate": 4.381016791044776e-05, + "loss": 0.0001, + "step": 53084 + }, + { + "epoch": 49.52, + "learning_rate": 4.380970149253732e-05, + "loss": 0.0, + "step": 53088 + }, + { + "epoch": 49.53, + "learning_rate": 4.380923507462687e-05, + "loss": 0.0006, + "step": 53092 + }, + { + "epoch": 49.53, + "learning_rate": 4.380876865671642e-05, + "loss": 0.0001, + "step": 53096 + }, + { + "epoch": 49.53, + "learning_rate": 4.380830223880597e-05, + "loss": 0.0, + "step": 53100 + }, + { + "epoch": 49.54, + "learning_rate": 4.380783582089552e-05, + "loss": 0.0028, + "step": 53104 + }, + { + "epoch": 49.54, + "learning_rate": 4.380736940298508e-05, + "loss": 0.0, + "step": 53108 + }, + { + "epoch": 49.54, + "learning_rate": 4.3806902985074626e-05, + "loss": 0.0001, + "step": 53112 + }, + { + "epoch": 49.55, + "learning_rate": 4.380643656716418e-05, + "loss": 0.0, + "step": 53116 + }, + { + "epoch": 49.55, + "learning_rate": 4.3805970149253736e-05, + "loss": 0.0, + "step": 53120 + }, + { + "epoch": 49.56, + "learning_rate": 4.3805503731343284e-05, + "loss": 0.0001, + "step": 53124 + }, + { + "epoch": 49.56, + "learning_rate": 4.380503731343284e-05, + "loss": 0.0, + "step": 53128 + }, + { + "epoch": 49.56, + "learning_rate": 4.380457089552239e-05, + "loss": 0.0, + "step": 53132 + }, + { + "epoch": 49.57, + "learning_rate": 4.380410447761194e-05, + "loss": 0.0, + "step": 53136 + }, + { + "epoch": 49.57, + "learning_rate": 4.38036380597015e-05, + "loss": 0.0001, + "step": 53140 + }, + { + "epoch": 49.57, + "learning_rate": 4.3803171641791045e-05, + "loss": 0.0001, + "step": 53144 + }, + { + "epoch": 49.58, + "learning_rate": 4.38027052238806e-05, + "loss": 0.0001, + "step": 53148 + }, + { + "epoch": 49.58, + "learning_rate": 4.3802238805970155e-05, + "loss": 0.0, + "step": 53152 + }, + { + "epoch": 49.59, + "learning_rate": 4.38017723880597e-05, + "loss": 0.0, + "step": 53156 + }, + { + "epoch": 49.59, + "learning_rate": 4.380130597014925e-05, + "loss": 0.0, + "step": 53160 + }, + { + "epoch": 49.59, + "learning_rate": 4.3800839552238806e-05, + "loss": 0.0001, + "step": 53164 + }, + { + "epoch": 49.6, + "learning_rate": 4.380037313432836e-05, + "loss": 0.0, + "step": 53168 + }, + { + "epoch": 49.6, + "learning_rate": 4.379990671641791e-05, + "loss": 0.0001, + "step": 53172 + }, + { + "epoch": 49.6, + "learning_rate": 4.3799440298507464e-05, + "loss": 0.0015, + "step": 53176 + }, + { + "epoch": 49.61, + "learning_rate": 4.379897388059702e-05, + "loss": 0.0, + "step": 53180 + }, + { + "epoch": 49.61, + "learning_rate": 4.379850746268657e-05, + "loss": 0.0, + "step": 53184 + }, + { + "epoch": 49.62, + "learning_rate": 4.379804104477612e-05, + "loss": 0.0, + "step": 53188 + }, + { + "epoch": 49.62, + "learning_rate": 4.379757462686567e-05, + "loss": 0.0007, + "step": 53192 + }, + { + "epoch": 49.62, + "learning_rate": 4.379710820895523e-05, + "loss": 0.0007, + "step": 53196 + }, + { + "epoch": 49.63, + "learning_rate": 4.379664179104478e-05, + "loss": 0.0, + "step": 53200 + }, + { + "epoch": 49.63, + "learning_rate": 4.379617537313433e-05, + "loss": 0.0002, + "step": 53204 + }, + { + "epoch": 49.63, + "learning_rate": 4.379570895522388e-05, + "loss": 0.0001, + "step": 53208 + }, + { + "epoch": 49.64, + "learning_rate": 4.379524253731344e-05, + "loss": 0.0003, + "step": 53212 + }, + { + "epoch": 49.64, + "learning_rate": 4.3794776119402986e-05, + "loss": 0.0, + "step": 53216 + }, + { + "epoch": 49.65, + "learning_rate": 4.379430970149254e-05, + "loss": 0.0, + "step": 53220 + }, + { + "epoch": 49.65, + "learning_rate": 4.379384328358209e-05, + "loss": 0.0, + "step": 53224 + }, + { + "epoch": 49.65, + "learning_rate": 4.3793376865671644e-05, + "loss": 0.0, + "step": 53228 + }, + { + "epoch": 49.66, + "learning_rate": 4.37929104477612e-05, + "loss": 0.0, + "step": 53232 + }, + { + "epoch": 49.66, + "learning_rate": 4.379244402985075e-05, + "loss": 0.0001, + "step": 53236 + }, + { + "epoch": 49.66, + "learning_rate": 4.37919776119403e-05, + "loss": 0.001, + "step": 53240 + }, + { + "epoch": 49.67, + "learning_rate": 4.379151119402986e-05, + "loss": 0.0, + "step": 53244 + }, + { + "epoch": 49.67, + "learning_rate": 4.3791044776119405e-05, + "loss": 0.0, + "step": 53248 + }, + { + "epoch": 49.68, + "learning_rate": 4.379057835820895e-05, + "loss": 0.0005, + "step": 53252 + }, + { + "epoch": 49.68, + "learning_rate": 4.3790111940298515e-05, + "loss": 0.0, + "step": 53256 + }, + { + "epoch": 49.68, + "learning_rate": 4.378964552238806e-05, + "loss": 0.0001, + "step": 53260 + }, + { + "epoch": 49.69, + "learning_rate": 4.378917910447761e-05, + "loss": 0.0, + "step": 53264 + }, + { + "epoch": 49.69, + "learning_rate": 4.3788712686567166e-05, + "loss": 0.0, + "step": 53268 + }, + { + "epoch": 49.69, + "learning_rate": 4.378824626865672e-05, + "loss": 0.0, + "step": 53272 + }, + { + "epoch": 49.7, + "learning_rate": 4.378777985074627e-05, + "loss": 0.0021, + "step": 53276 + }, + { + "epoch": 49.7, + "learning_rate": 4.3787313432835824e-05, + "loss": 0.0, + "step": 53280 + }, + { + "epoch": 49.71, + "learning_rate": 4.378684701492537e-05, + "loss": 0.0, + "step": 53284 + }, + { + "epoch": 49.71, + "learning_rate": 4.378638059701493e-05, + "loss": 0.0001, + "step": 53288 + }, + { + "epoch": 49.71, + "learning_rate": 4.378591417910448e-05, + "loss": 0.0, + "step": 53292 + }, + { + "epoch": 49.72, + "learning_rate": 4.378544776119403e-05, + "loss": 0.0, + "step": 53296 + }, + { + "epoch": 49.72, + "learning_rate": 4.3784981343283585e-05, + "loss": 0.0, + "step": 53300 + }, + { + "epoch": 49.72, + "learning_rate": 4.378451492537314e-05, + "loss": 0.0, + "step": 53304 + }, + { + "epoch": 49.73, + "learning_rate": 4.378404850746269e-05, + "loss": 0.0, + "step": 53308 + }, + { + "epoch": 49.73, + "learning_rate": 4.3783582089552236e-05, + "loss": 0.0, + "step": 53312 + }, + { + "epoch": 49.73, + "learning_rate": 4.37831156716418e-05, + "loss": 0.0, + "step": 53316 + }, + { + "epoch": 49.74, + "learning_rate": 4.3782649253731346e-05, + "loss": 0.0015, + "step": 53320 + }, + { + "epoch": 49.74, + "learning_rate": 4.3782182835820894e-05, + "loss": 0.0, + "step": 53324 + }, + { + "epoch": 49.75, + "learning_rate": 4.378171641791045e-05, + "loss": 0.0002, + "step": 53328 + }, + { + "epoch": 49.75, + "learning_rate": 4.3781250000000004e-05, + "loss": 0.0, + "step": 53332 + }, + { + "epoch": 49.75, + "learning_rate": 4.378078358208955e-05, + "loss": 0.001, + "step": 53336 + }, + { + "epoch": 49.76, + "learning_rate": 4.378031716417911e-05, + "loss": 0.0, + "step": 53340 + }, + { + "epoch": 49.76, + "learning_rate": 4.3779850746268655e-05, + "loss": 0.0007, + "step": 53344 + }, + { + "epoch": 49.76, + "learning_rate": 4.377938432835821e-05, + "loss": 0.0, + "step": 53348 + }, + { + "epoch": 49.77, + "learning_rate": 4.3778917910447765e-05, + "loss": 0.0, + "step": 53352 + }, + { + "epoch": 49.77, + "learning_rate": 4.377845149253731e-05, + "loss": 0.013, + "step": 53356 + }, + { + "epoch": 49.78, + "learning_rate": 4.377798507462687e-05, + "loss": 0.0007, + "step": 53360 + }, + { + "epoch": 49.78, + "learning_rate": 4.377751865671642e-05, + "loss": 0.0, + "step": 53364 + }, + { + "epoch": 49.78, + "learning_rate": 4.377705223880597e-05, + "loss": 0.0, + "step": 53368 + }, + { + "epoch": 49.79, + "learning_rate": 4.3776585820895526e-05, + "loss": 0.0, + "step": 53372 + }, + { + "epoch": 49.79, + "learning_rate": 4.377611940298508e-05, + "loss": 0.0, + "step": 53376 + }, + { + "epoch": 49.79, + "learning_rate": 4.377565298507463e-05, + "loss": 0.0, + "step": 53380 + }, + { + "epoch": 49.8, + "learning_rate": 4.3775186567164184e-05, + "loss": 0.0, + "step": 53384 + }, + { + "epoch": 49.8, + "learning_rate": 4.377472014925373e-05, + "loss": 0.0006, + "step": 53388 + }, + { + "epoch": 49.81, + "learning_rate": 4.377425373134329e-05, + "loss": 0.0032, + "step": 53392 + }, + { + "epoch": 49.81, + "learning_rate": 4.377378731343284e-05, + "loss": 0.0001, + "step": 53396 + }, + { + "epoch": 49.81, + "learning_rate": 4.377332089552239e-05, + "loss": 0.0, + "step": 53400 + }, + { + "epoch": 49.82, + "learning_rate": 4.377285447761194e-05, + "loss": 0.0, + "step": 53404 + }, + { + "epoch": 49.82, + "learning_rate": 4.37723880597015e-05, + "loss": 0.0, + "step": 53408 + }, + { + "epoch": 49.82, + "learning_rate": 4.377192164179105e-05, + "loss": 0.0001, + "step": 53412 + }, + { + "epoch": 49.83, + "learning_rate": 4.3771455223880596e-05, + "loss": 0.0, + "step": 53416 + }, + { + "epoch": 49.83, + "learning_rate": 4.377098880597015e-05, + "loss": 0.0, + "step": 53420 + }, + { + "epoch": 49.84, + "learning_rate": 4.3770522388059706e-05, + "loss": 0.0, + "step": 53424 + }, + { + "epoch": 49.84, + "learning_rate": 4.3770055970149254e-05, + "loss": 0.0, + "step": 53428 + }, + { + "epoch": 49.84, + "learning_rate": 4.376958955223881e-05, + "loss": 0.0, + "step": 53432 + }, + { + "epoch": 49.85, + "learning_rate": 4.3769123134328364e-05, + "loss": 0.0, + "step": 53436 + }, + { + "epoch": 49.85, + "learning_rate": 4.376865671641791e-05, + "loss": 0.0, + "step": 53440 + }, + { + "epoch": 49.85, + "learning_rate": 4.3768190298507467e-05, + "loss": 0.0021, + "step": 53444 + }, + { + "epoch": 49.86, + "learning_rate": 4.3767723880597015e-05, + "loss": 0.0, + "step": 53448 + }, + { + "epoch": 49.86, + "learning_rate": 4.376725746268657e-05, + "loss": 0.0002, + "step": 53452 + }, + { + "epoch": 49.87, + "learning_rate": 4.3766791044776125e-05, + "loss": 0.0021, + "step": 53456 + }, + { + "epoch": 49.87, + "learning_rate": 4.376632462686567e-05, + "loss": 0.0, + "step": 53460 + }, + { + "epoch": 49.87, + "learning_rate": 4.376585820895522e-05, + "loss": 0.0, + "step": 53464 + }, + { + "epoch": 49.88, + "learning_rate": 4.376539179104478e-05, + "loss": 0.0001, + "step": 53468 + }, + { + "epoch": 49.88, + "learning_rate": 4.376492537313433e-05, + "loss": 0.0, + "step": 53472 + }, + { + "epoch": 49.88, + "learning_rate": 4.376445895522388e-05, + "loss": 0.0, + "step": 53476 + }, + { + "epoch": 49.89, + "learning_rate": 4.3763992537313434e-05, + "loss": 0.0, + "step": 53480 + }, + { + "epoch": 49.89, + "learning_rate": 4.376352611940299e-05, + "loss": 0.0005, + "step": 53484 + }, + { + "epoch": 49.9, + "learning_rate": 4.376305970149254e-05, + "loss": 0.0, + "step": 53488 + }, + { + "epoch": 49.9, + "learning_rate": 4.376259328358209e-05, + "loss": 0.0, + "step": 53492 + }, + { + "epoch": 49.9, + "learning_rate": 4.3762126865671646e-05, + "loss": 0.0, + "step": 53496 + }, + { + "epoch": 49.91, + "learning_rate": 4.3761660447761195e-05, + "loss": 0.0, + "step": 53500 + }, + { + "epoch": 49.91, + "eval_exact_match": 0.7495164410058027, + "eval_exec": 0.7833655705996132, + "eval_loss": 0.4703017771244049, + "eval_runtime": 1159.4214, + "eval_samples_per_second": 0.892, + "step": 53500 + }, + { + "epoch": 49.91, + "learning_rate": 4.376119402985075e-05, + "loss": 0.001, + "step": 53504 + }, + { + "epoch": 49.91, + "learning_rate": 4.37607276119403e-05, + "loss": 0.0, + "step": 53508 + }, + { + "epoch": 49.92, + "learning_rate": 4.376026119402985e-05, + "loss": 0.0, + "step": 53512 + }, + { + "epoch": 49.92, + "learning_rate": 4.375979477611941e-05, + "loss": 0.0004, + "step": 53516 + }, + { + "epoch": 49.93, + "learning_rate": 4.3759328358208956e-05, + "loss": 0.0, + "step": 53520 + }, + { + "epoch": 49.93, + "learning_rate": 4.375886194029851e-05, + "loss": 0.0, + "step": 53524 + }, + { + "epoch": 49.93, + "learning_rate": 4.3758395522388065e-05, + "loss": 0.0, + "step": 53528 + }, + { + "epoch": 49.94, + "learning_rate": 4.3757929104477614e-05, + "loss": 0.0, + "step": 53532 + }, + { + "epoch": 49.94, + "learning_rate": 4.375746268656717e-05, + "loss": 0.0, + "step": 53536 + }, + { + "epoch": 49.94, + "learning_rate": 4.3756996268656717e-05, + "loss": 0.0023, + "step": 53540 + }, + { + "epoch": 49.95, + "learning_rate": 4.375652985074627e-05, + "loss": 0.0001, + "step": 53544 + }, + { + "epoch": 49.95, + "learning_rate": 4.3756063432835826e-05, + "loss": 0.0, + "step": 53548 + }, + { + "epoch": 49.96, + "learning_rate": 4.3755597014925375e-05, + "loss": 0.0, + "step": 53552 + }, + { + "epoch": 49.96, + "learning_rate": 4.375513059701493e-05, + "loss": 0.0, + "step": 53556 + }, + { + "epoch": 49.96, + "learning_rate": 4.3754664179104484e-05, + "loss": 0.0001, + "step": 53560 + }, + { + "epoch": 49.97, + "learning_rate": 4.375419776119403e-05, + "loss": 0.0001, + "step": 53564 + }, + { + "epoch": 49.97, + "learning_rate": 4.375373134328358e-05, + "loss": 0.0, + "step": 53568 + }, + { + "epoch": 49.97, + "learning_rate": 4.3753264925373136e-05, + "loss": 0.0001, + "step": 53572 + }, + { + "epoch": 49.98, + "learning_rate": 4.375279850746269e-05, + "loss": 0.0002, + "step": 53576 + }, + { + "epoch": 49.98, + "learning_rate": 4.375233208955224e-05, + "loss": 0.0003, + "step": 53580 + }, + { + "epoch": 49.98, + "learning_rate": 4.3751865671641793e-05, + "loss": 0.0001, + "step": 53584 + }, + { + "epoch": 49.99, + "learning_rate": 4.375139925373135e-05, + "loss": 0.0024, + "step": 53588 + }, + { + "epoch": 49.99, + "learning_rate": 4.3750932835820897e-05, + "loss": 0.0, + "step": 53592 + }, + { + "epoch": 50.0, + "learning_rate": 4.375046641791045e-05, + "loss": 0.0, + "step": 53596 + }, + { + "epoch": 50.0, + "learning_rate": 4.375e-05, + "loss": 0.0, + "step": 53600 + }, + { + "epoch": 50.0, + "learning_rate": 4.3749533582089554e-05, + "loss": 0.0, + "step": 53604 + }, + { + "epoch": 50.01, + "learning_rate": 4.374906716417911e-05, + "loss": 0.0, + "step": 53608 + }, + { + "epoch": 50.01, + "learning_rate": 4.374860074626866e-05, + "loss": 0.0008, + "step": 53612 + }, + { + "epoch": 50.01, + "learning_rate": 4.3748134328358206e-05, + "loss": 0.0, + "step": 53616 + }, + { + "epoch": 50.02, + "learning_rate": 4.374766791044777e-05, + "loss": 0.0003, + "step": 53620 + }, + { + "epoch": 50.02, + "learning_rate": 4.3747201492537315e-05, + "loss": 0.0001, + "step": 53624 + }, + { + "epoch": 50.03, + "learning_rate": 4.3746735074626864e-05, + "loss": 0.003, + "step": 53628 + }, + { + "epoch": 50.03, + "learning_rate": 4.374626865671642e-05, + "loss": 0.0002, + "step": 53632 + }, + { + "epoch": 50.03, + "learning_rate": 4.3745802238805973e-05, + "loss": 0.0001, + "step": 53636 + }, + { + "epoch": 50.04, + "learning_rate": 4.374533582089552e-05, + "loss": 0.0, + "step": 53640 + }, + { + "epoch": 50.04, + "learning_rate": 4.3744869402985076e-05, + "loss": 0.0003, + "step": 53644 + }, + { + "epoch": 50.04, + "learning_rate": 4.374440298507463e-05, + "loss": 0.0, + "step": 53648 + }, + { + "epoch": 50.05, + "learning_rate": 4.374393656716418e-05, + "loss": 0.0, + "step": 53652 + }, + { + "epoch": 50.05, + "learning_rate": 4.3743470149253734e-05, + "loss": 0.0, + "step": 53656 + }, + { + "epoch": 50.06, + "learning_rate": 4.374300373134328e-05, + "loss": 0.0002, + "step": 53660 + }, + { + "epoch": 50.06, + "learning_rate": 4.374253731343284e-05, + "loss": 0.0001, + "step": 53664 + }, + { + "epoch": 50.06, + "learning_rate": 4.374207089552239e-05, + "loss": 0.0005, + "step": 53668 + }, + { + "epoch": 50.07, + "learning_rate": 4.374160447761194e-05, + "loss": 0.0001, + "step": 53672 + }, + { + "epoch": 50.07, + "learning_rate": 4.374113805970149e-05, + "loss": 0.0001, + "step": 53676 + }, + { + "epoch": 50.07, + "learning_rate": 4.374067164179105e-05, + "loss": 0.0, + "step": 53680 + }, + { + "epoch": 50.08, + "learning_rate": 4.37402052238806e-05, + "loss": 0.006, + "step": 53684 + }, + { + "epoch": 50.08, + "learning_rate": 4.373973880597015e-05, + "loss": 0.0, + "step": 53688 + }, + { + "epoch": 50.09, + "learning_rate": 4.37392723880597e-05, + "loss": 0.0, + "step": 53692 + }, + { + "epoch": 50.09, + "learning_rate": 4.3738805970149256e-05, + "loss": 0.0001, + "step": 53696 + }, + { + "epoch": 50.09, + "learning_rate": 4.373833955223881e-05, + "loss": 0.0004, + "step": 53700 + }, + { + "epoch": 50.1, + "learning_rate": 4.373787313432836e-05, + "loss": 0.0, + "step": 53704 + }, + { + "epoch": 50.1, + "learning_rate": 4.3737406716417914e-05, + "loss": 0.0, + "step": 53708 + }, + { + "epoch": 50.1, + "learning_rate": 4.373694029850747e-05, + "loss": 0.0, + "step": 53712 + }, + { + "epoch": 50.11, + "learning_rate": 4.373647388059702e-05, + "loss": 0.0001, + "step": 53716 + }, + { + "epoch": 50.11, + "learning_rate": 4.3736007462686565e-05, + "loss": 0.0, + "step": 53720 + }, + { + "epoch": 50.12, + "learning_rate": 4.373554104477613e-05, + "loss": 0.0, + "step": 53724 + }, + { + "epoch": 50.12, + "learning_rate": 4.3735074626865675e-05, + "loss": 0.0, + "step": 53728 + }, + { + "epoch": 50.12, + "learning_rate": 4.3734608208955223e-05, + "loss": 0.0001, + "step": 53732 + }, + { + "epoch": 50.13, + "learning_rate": 4.373414179104478e-05, + "loss": 0.0022, + "step": 53736 + }, + { + "epoch": 50.13, + "learning_rate": 4.373367537313433e-05, + "loss": 0.0, + "step": 53740 + }, + { + "epoch": 50.13, + "learning_rate": 4.373320895522388e-05, + "loss": 0.0, + "step": 53744 + }, + { + "epoch": 50.14, + "learning_rate": 4.3732742537313436e-05, + "loss": 0.0, + "step": 53748 + }, + { + "epoch": 50.14, + "learning_rate": 4.3732276119402984e-05, + "loss": 0.0001, + "step": 53752 + }, + { + "epoch": 50.15, + "learning_rate": 4.373180970149254e-05, + "loss": 0.0, + "step": 53756 + }, + { + "epoch": 50.15, + "learning_rate": 4.3731343283582094e-05, + "loss": 0.0001, + "step": 53760 + }, + { + "epoch": 50.15, + "learning_rate": 4.373087686567164e-05, + "loss": 0.0001, + "step": 53764 + }, + { + "epoch": 50.16, + "learning_rate": 4.37304104477612e-05, + "loss": 0.0, + "step": 53768 + }, + { + "epoch": 50.16, + "learning_rate": 4.372994402985075e-05, + "loss": 0.0002, + "step": 53772 + }, + { + "epoch": 50.16, + "learning_rate": 4.37294776119403e-05, + "loss": 0.0, + "step": 53776 + }, + { + "epoch": 50.17, + "learning_rate": 4.372901119402985e-05, + "loss": 0.0, + "step": 53780 + }, + { + "epoch": 50.17, + "learning_rate": 4.37285447761194e-05, + "loss": 0.0, + "step": 53784 + }, + { + "epoch": 50.18, + "learning_rate": 4.372807835820896e-05, + "loss": 0.0, + "step": 53788 + }, + { + "epoch": 50.18, + "learning_rate": 4.3727611940298506e-05, + "loss": 0.0001, + "step": 53792 + }, + { + "epoch": 50.18, + "learning_rate": 4.372714552238806e-05, + "loss": 0.0002, + "step": 53796 + }, + { + "epoch": 50.19, + "learning_rate": 4.3726679104477616e-05, + "loss": 0.0003, + "step": 53800 + }, + { + "epoch": 50.19, + "learning_rate": 4.3726212686567164e-05, + "loss": 0.0, + "step": 53804 + }, + { + "epoch": 50.19, + "learning_rate": 4.372574626865672e-05, + "loss": 0.0006, + "step": 53808 + }, + { + "epoch": 50.2, + "learning_rate": 4.372527985074627e-05, + "loss": 0.0, + "step": 53812 + }, + { + "epoch": 50.2, + "learning_rate": 4.372481343283582e-05, + "loss": 0.0, + "step": 53816 + }, + { + "epoch": 50.21, + "learning_rate": 4.372434701492538e-05, + "loss": 0.0003, + "step": 53820 + }, + { + "epoch": 50.21, + "learning_rate": 4.3723880597014925e-05, + "loss": 0.0001, + "step": 53824 + }, + { + "epoch": 50.21, + "learning_rate": 4.372341417910448e-05, + "loss": 0.0001, + "step": 53828 + }, + { + "epoch": 50.22, + "learning_rate": 4.3722947761194035e-05, + "loss": 0.0, + "step": 53832 + }, + { + "epoch": 50.22, + "learning_rate": 4.372248134328358e-05, + "loss": 0.0003, + "step": 53836 + }, + { + "epoch": 50.22, + "learning_rate": 4.372201492537313e-05, + "loss": 0.0001, + "step": 53840 + }, + { + "epoch": 50.23, + "learning_rate": 4.3721548507462686e-05, + "loss": 0.0, + "step": 53844 + }, + { + "epoch": 50.23, + "learning_rate": 4.372108208955224e-05, + "loss": 0.0026, + "step": 53848 + }, + { + "epoch": 50.24, + "learning_rate": 4.3720615671641796e-05, + "loss": 0.0, + "step": 53852 + }, + { + "epoch": 50.24, + "learning_rate": 4.3720149253731344e-05, + "loss": 0.0017, + "step": 53856 + }, + { + "epoch": 50.24, + "learning_rate": 4.37196828358209e-05, + "loss": 0.0, + "step": 53860 + }, + { + "epoch": 50.25, + "learning_rate": 4.3719216417910454e-05, + "loss": 0.0, + "step": 53864 + }, + { + "epoch": 50.25, + "learning_rate": 4.371875e-05, + "loss": 0.0001, + "step": 53868 + }, + { + "epoch": 50.25, + "learning_rate": 4.371828358208955e-05, + "loss": 0.0, + "step": 53872 + }, + { + "epoch": 50.26, + "learning_rate": 4.371781716417911e-05, + "loss": 0.0003, + "step": 53876 + }, + { + "epoch": 50.26, + "learning_rate": 4.371735074626866e-05, + "loss": 0.0, + "step": 53880 + }, + { + "epoch": 50.26, + "learning_rate": 4.371688432835821e-05, + "loss": 0.0001, + "step": 53884 + }, + { + "epoch": 50.27, + "learning_rate": 4.371641791044776e-05, + "loss": 0.0, + "step": 53888 + }, + { + "epoch": 50.27, + "learning_rate": 4.371595149253732e-05, + "loss": 0.0, + "step": 53892 + }, + { + "epoch": 50.28, + "learning_rate": 4.3715485074626866e-05, + "loss": 0.0001, + "step": 53896 + }, + { + "epoch": 50.28, + "learning_rate": 4.371501865671642e-05, + "loss": 0.0001, + "step": 53900 + }, + { + "epoch": 50.28, + "learning_rate": 4.371455223880597e-05, + "loss": 0.0001, + "step": 53904 + }, + { + "epoch": 50.29, + "learning_rate": 4.3714085820895524e-05, + "loss": 0.0001, + "step": 53908 + }, + { + "epoch": 50.29, + "learning_rate": 4.371361940298508e-05, + "loss": 0.0, + "step": 53912 + }, + { + "epoch": 50.29, + "learning_rate": 4.371315298507463e-05, + "loss": 0.0006, + "step": 53916 + }, + { + "epoch": 50.3, + "learning_rate": 4.371268656716418e-05, + "loss": 0.0, + "step": 53920 + }, + { + "epoch": 50.3, + "learning_rate": 4.371222014925374e-05, + "loss": 0.0003, + "step": 53924 + }, + { + "epoch": 50.31, + "learning_rate": 4.3711753731343285e-05, + "loss": 0.0, + "step": 53928 + }, + { + "epoch": 50.31, + "learning_rate": 4.371128731343283e-05, + "loss": 0.0, + "step": 53932 + }, + { + "epoch": 50.31, + "learning_rate": 4.3710820895522395e-05, + "loss": 0.0071, + "step": 53936 + }, + { + "epoch": 50.32, + "learning_rate": 4.371035447761194e-05, + "loss": 0.0003, + "step": 53940 + }, + { + "epoch": 50.32, + "learning_rate": 4.370988805970149e-05, + "loss": 0.0, + "step": 53944 + }, + { + "epoch": 50.32, + "learning_rate": 4.3709421641791046e-05, + "loss": 0.0, + "step": 53948 + }, + { + "epoch": 50.33, + "learning_rate": 4.37089552238806e-05, + "loss": 0.0017, + "step": 53952 + }, + { + "epoch": 50.33, + "learning_rate": 4.370848880597015e-05, + "loss": 0.0029, + "step": 53956 + }, + { + "epoch": 50.34, + "learning_rate": 4.3708022388059704e-05, + "loss": 0.0, + "step": 53960 + }, + { + "epoch": 50.34, + "learning_rate": 4.370755597014925e-05, + "loss": 0.0, + "step": 53964 + }, + { + "epoch": 50.34, + "learning_rate": 4.370708955223881e-05, + "loss": 0.0, + "step": 53968 + }, + { + "epoch": 50.35, + "learning_rate": 4.370662313432836e-05, + "loss": 0.0, + "step": 53972 + }, + { + "epoch": 50.35, + "learning_rate": 4.370615671641791e-05, + "loss": 0.0, + "step": 53976 + }, + { + "epoch": 50.35, + "learning_rate": 4.3705690298507465e-05, + "loss": 0.0004, + "step": 53980 + }, + { + "epoch": 50.36, + "learning_rate": 4.370522388059702e-05, + "loss": 0.0, + "step": 53984 + }, + { + "epoch": 50.36, + "learning_rate": 4.370475746268657e-05, + "loss": 0.001, + "step": 53988 + }, + { + "epoch": 50.37, + "learning_rate": 4.3704291044776116e-05, + "loss": 0.0, + "step": 53992 + }, + { + "epoch": 50.37, + "learning_rate": 4.370382462686568e-05, + "loss": 0.0, + "step": 53996 + }, + { + "epoch": 50.37, + "learning_rate": 4.3703358208955226e-05, + "loss": 0.0, + "step": 54000 + }, + { + "epoch": 50.37, + "eval_exact_match": 0.746615087040619, + "eval_exec": 0.7669245647969052, + "eval_loss": 0.46396273374557495, + "eval_runtime": 1103.7037, + "eval_samples_per_second": 0.937, + "step": 54000 + }, + { + "epoch": 50.38, + "learning_rate": 4.3702891791044774e-05, + "loss": 0.0, + "step": 54004 + }, + { + "epoch": 50.38, + "learning_rate": 4.370242537313433e-05, + "loss": 0.0, + "step": 54008 + }, + { + "epoch": 50.38, + "learning_rate": 4.3701958955223884e-05, + "loss": 0.0002, + "step": 54012 + }, + { + "epoch": 50.39, + "learning_rate": 4.370149253731344e-05, + "loss": 0.0, + "step": 54016 + }, + { + "epoch": 50.39, + "learning_rate": 4.370102611940299e-05, + "loss": 0.0011, + "step": 54020 + }, + { + "epoch": 50.4, + "learning_rate": 4.3700559701492535e-05, + "loss": 0.0, + "step": 54024 + }, + { + "epoch": 50.4, + "learning_rate": 4.37000932835821e-05, + "loss": 0.0, + "step": 54028 + }, + { + "epoch": 50.4, + "learning_rate": 4.3699626865671645e-05, + "loss": 0.0, + "step": 54032 + }, + { + "epoch": 50.41, + "learning_rate": 4.369916044776119e-05, + "loss": 0.0, + "step": 54036 + }, + { + "epoch": 50.41, + "learning_rate": 4.369869402985075e-05, + "loss": 0.0, + "step": 54040 + }, + { + "epoch": 50.41, + "learning_rate": 4.36982276119403e-05, + "loss": 0.0001, + "step": 54044 + }, + { + "epoch": 50.42, + "learning_rate": 4.369776119402985e-05, + "loss": 0.0, + "step": 54048 + }, + { + "epoch": 50.42, + "learning_rate": 4.3697294776119406e-05, + "loss": 0.0, + "step": 54052 + }, + { + "epoch": 50.43, + "learning_rate": 4.369682835820896e-05, + "loss": 0.0, + "step": 54056 + }, + { + "epoch": 50.43, + "learning_rate": 4.369636194029851e-05, + "loss": 0.0004, + "step": 54060 + }, + { + "epoch": 50.43, + "learning_rate": 4.3695895522388064e-05, + "loss": 0.0, + "step": 54064 + }, + { + "epoch": 50.44, + "learning_rate": 4.369542910447761e-05, + "loss": 0.0, + "step": 54068 + }, + { + "epoch": 50.44, + "learning_rate": 4.369496268656717e-05, + "loss": 0.0004, + "step": 54072 + }, + { + "epoch": 50.44, + "learning_rate": 4.369449626865672e-05, + "loss": 0.0001, + "step": 54076 + }, + { + "epoch": 50.45, + "learning_rate": 4.369402985074627e-05, + "loss": 0.0, + "step": 54080 + }, + { + "epoch": 50.45, + "learning_rate": 4.369356343283582e-05, + "loss": 0.0, + "step": 54084 + }, + { + "epoch": 50.46, + "learning_rate": 4.369309701492538e-05, + "loss": 0.0, + "step": 54088 + }, + { + "epoch": 50.46, + "learning_rate": 4.369263059701493e-05, + "loss": 0.0, + "step": 54092 + }, + { + "epoch": 50.46, + "learning_rate": 4.3692164179104476e-05, + "loss": 0.0001, + "step": 54096 + }, + { + "epoch": 50.47, + "learning_rate": 4.369169776119403e-05, + "loss": 0.0, + "step": 54100 + }, + { + "epoch": 50.47, + "learning_rate": 4.3691231343283586e-05, + "loss": 0.0, + "step": 54104 + }, + { + "epoch": 50.47, + "learning_rate": 4.3690764925373134e-05, + "loss": 0.0, + "step": 54108 + }, + { + "epoch": 50.48, + "learning_rate": 4.369029850746269e-05, + "loss": 0.0003, + "step": 54112 + }, + { + "epoch": 50.48, + "learning_rate": 4.3689832089552244e-05, + "loss": 0.0, + "step": 54116 + }, + { + "epoch": 50.49, + "learning_rate": 4.368936567164179e-05, + "loss": 0.0, + "step": 54120 + }, + { + "epoch": 50.49, + "learning_rate": 4.368889925373135e-05, + "loss": 0.0009, + "step": 54124 + }, + { + "epoch": 50.49, + "learning_rate": 4.3688432835820895e-05, + "loss": 0.0, + "step": 54128 + }, + { + "epoch": 50.5, + "learning_rate": 4.368796641791045e-05, + "loss": 0.0001, + "step": 54132 + }, + { + "epoch": 50.5, + "learning_rate": 4.3687500000000005e-05, + "loss": 0.0, + "step": 54136 + }, + { + "epoch": 50.5, + "learning_rate": 4.368703358208955e-05, + "loss": 0.0003, + "step": 54140 + }, + { + "epoch": 50.51, + "learning_rate": 4.36865671641791e-05, + "loss": 0.0001, + "step": 54144 + }, + { + "epoch": 50.51, + "learning_rate": 4.368610074626866e-05, + "loss": 0.0003, + "step": 54148 + }, + { + "epoch": 50.51, + "learning_rate": 4.368563432835821e-05, + "loss": 0.0005, + "step": 54152 + }, + { + "epoch": 50.52, + "learning_rate": 4.368516791044776e-05, + "loss": 0.0, + "step": 54156 + }, + { + "epoch": 50.52, + "learning_rate": 4.3684701492537314e-05, + "loss": 0.0, + "step": 54160 + }, + { + "epoch": 50.53, + "learning_rate": 4.368423507462687e-05, + "loss": 0.0, + "step": 54164 + }, + { + "epoch": 50.53, + "learning_rate": 4.368376865671642e-05, + "loss": 0.0004, + "step": 54168 + }, + { + "epoch": 50.53, + "learning_rate": 4.368330223880597e-05, + "loss": 0.0002, + "step": 54172 + }, + { + "epoch": 50.54, + "learning_rate": 4.368283582089553e-05, + "loss": 0.0, + "step": 54176 + }, + { + "epoch": 50.54, + "learning_rate": 4.368236940298508e-05, + "loss": 0.0003, + "step": 54180 + }, + { + "epoch": 50.54, + "learning_rate": 4.368190298507463e-05, + "loss": 0.0, + "step": 54184 + }, + { + "epoch": 50.55, + "learning_rate": 4.368143656716418e-05, + "loss": 0.0, + "step": 54188 + }, + { + "epoch": 50.55, + "learning_rate": 4.368097014925373e-05, + "loss": 0.0, + "step": 54192 + }, + { + "epoch": 50.56, + "learning_rate": 4.368050373134329e-05, + "loss": 0.0, + "step": 54196 + }, + { + "epoch": 50.56, + "learning_rate": 4.3680037313432836e-05, + "loss": 0.0, + "step": 54200 + }, + { + "epoch": 50.56, + "learning_rate": 4.367957089552239e-05, + "loss": 0.0141, + "step": 54204 + }, + { + "epoch": 50.57, + "learning_rate": 4.3679104477611946e-05, + "loss": 0.0001, + "step": 54208 + }, + { + "epoch": 50.57, + "learning_rate": 4.3678638059701494e-05, + "loss": 0.0006, + "step": 54212 + }, + { + "epoch": 50.57, + "learning_rate": 4.367817164179105e-05, + "loss": 0.0, + "step": 54216 + }, + { + "epoch": 50.58, + "learning_rate": 4.36777052238806e-05, + "loss": 0.0, + "step": 54220 + }, + { + "epoch": 50.58, + "learning_rate": 4.367723880597015e-05, + "loss": 0.0, + "step": 54224 + }, + { + "epoch": 50.59, + "learning_rate": 4.367677238805971e-05, + "loss": 0.0, + "step": 54228 + }, + { + "epoch": 50.59, + "learning_rate": 4.3676305970149255e-05, + "loss": 0.0, + "step": 54232 + }, + { + "epoch": 50.59, + "learning_rate": 4.367583955223881e-05, + "loss": 0.0, + "step": 54236 + }, + { + "epoch": 50.6, + "learning_rate": 4.3675373134328365e-05, + "loss": 0.0006, + "step": 54240 + }, + { + "epoch": 50.6, + "learning_rate": 4.367490671641791e-05, + "loss": 0.0, + "step": 54244 + }, + { + "epoch": 50.6, + "learning_rate": 4.367444029850746e-05, + "loss": 0.0001, + "step": 54248 + }, + { + "epoch": 50.61, + "learning_rate": 4.3673973880597016e-05, + "loss": 0.0, + "step": 54252 + }, + { + "epoch": 50.61, + "learning_rate": 4.367350746268657e-05, + "loss": 0.0002, + "step": 54256 + }, + { + "epoch": 50.62, + "learning_rate": 4.367304104477612e-05, + "loss": 0.0, + "step": 54260 + }, + { + "epoch": 50.62, + "learning_rate": 4.3672574626865674e-05, + "loss": 0.0, + "step": 54264 + }, + { + "epoch": 50.62, + "learning_rate": 4.367210820895523e-05, + "loss": 0.0, + "step": 54268 + }, + { + "epoch": 50.63, + "learning_rate": 4.367164179104478e-05, + "loss": 0.0001, + "step": 54272 + }, + { + "epoch": 50.63, + "learning_rate": 4.367117537313433e-05, + "loss": 0.0057, + "step": 54276 + }, + { + "epoch": 50.63, + "learning_rate": 4.367070895522388e-05, + "loss": 0.0004, + "step": 54280 + }, + { + "epoch": 50.64, + "learning_rate": 4.3670242537313435e-05, + "loss": 0.0, + "step": 54284 + }, + { + "epoch": 50.64, + "learning_rate": 4.366977611940299e-05, + "loss": 0.0, + "step": 54288 + }, + { + "epoch": 50.65, + "learning_rate": 4.366930970149254e-05, + "loss": 0.0006, + "step": 54292 + }, + { + "epoch": 50.65, + "learning_rate": 4.366884328358209e-05, + "loss": 0.0, + "step": 54296 + }, + { + "epoch": 50.65, + "learning_rate": 4.366837686567165e-05, + "loss": 0.0, + "step": 54300 + }, + { + "epoch": 50.66, + "learning_rate": 4.3667910447761196e-05, + "loss": 0.0, + "step": 54304 + }, + { + "epoch": 50.66, + "learning_rate": 4.3667444029850744e-05, + "loss": 0.0001, + "step": 54308 + }, + { + "epoch": 50.66, + "learning_rate": 4.36669776119403e-05, + "loss": 0.0012, + "step": 54312 + }, + { + "epoch": 50.67, + "learning_rate": 4.3666511194029854e-05, + "loss": 0.0, + "step": 54316 + }, + { + "epoch": 50.67, + "learning_rate": 4.36660447761194e-05, + "loss": 0.0, + "step": 54320 + }, + { + "epoch": 50.68, + "learning_rate": 4.366557835820896e-05, + "loss": 0.0009, + "step": 54324 + }, + { + "epoch": 50.68, + "learning_rate": 4.366511194029851e-05, + "loss": 0.0001, + "step": 54328 + }, + { + "epoch": 50.68, + "learning_rate": 4.366464552238806e-05, + "loss": 0.0, + "step": 54332 + }, + { + "epoch": 50.69, + "learning_rate": 4.3664179104477615e-05, + "loss": 0.0, + "step": 54336 + }, + { + "epoch": 50.69, + "learning_rate": 4.366371268656716e-05, + "loss": 0.0, + "step": 54340 + }, + { + "epoch": 50.69, + "learning_rate": 4.3663246268656724e-05, + "loss": 0.0, + "step": 54344 + }, + { + "epoch": 50.7, + "learning_rate": 4.366277985074627e-05, + "loss": 0.0, + "step": 54348 + }, + { + "epoch": 50.7, + "learning_rate": 4.366231343283582e-05, + "loss": 0.0002, + "step": 54352 + }, + { + "epoch": 50.71, + "learning_rate": 4.3661847014925376e-05, + "loss": 0.0, + "step": 54356 + }, + { + "epoch": 50.71, + "learning_rate": 4.366138059701493e-05, + "loss": 0.0, + "step": 54360 + }, + { + "epoch": 50.71, + "learning_rate": 4.366091417910448e-05, + "loss": 0.0, + "step": 54364 + }, + { + "epoch": 50.72, + "learning_rate": 4.3660447761194034e-05, + "loss": 0.0, + "step": 54368 + }, + { + "epoch": 50.72, + "learning_rate": 4.365998134328358e-05, + "loss": 0.0, + "step": 54372 + }, + { + "epoch": 50.72, + "learning_rate": 4.365951492537314e-05, + "loss": 0.0, + "step": 54376 + }, + { + "epoch": 50.73, + "learning_rate": 4.365904850746269e-05, + "loss": 0.0001, + "step": 54380 + }, + { + "epoch": 50.73, + "learning_rate": 4.365858208955224e-05, + "loss": 0.0034, + "step": 54384 + }, + { + "epoch": 50.73, + "learning_rate": 4.3658115671641795e-05, + "loss": 0.0045, + "step": 54388 + }, + { + "epoch": 50.74, + "learning_rate": 4.365764925373135e-05, + "loss": 0.0, + "step": 54392 + }, + { + "epoch": 50.74, + "learning_rate": 4.36571828358209e-05, + "loss": 0.0, + "step": 54396 + }, + { + "epoch": 50.75, + "learning_rate": 4.3656716417910446e-05, + "loss": 0.0016, + "step": 54400 + }, + { + "epoch": 50.75, + "learning_rate": 4.365625000000001e-05, + "loss": 0.0011, + "step": 54404 + }, + { + "epoch": 50.75, + "learning_rate": 4.3655783582089556e-05, + "loss": 0.0026, + "step": 54408 + }, + { + "epoch": 50.76, + "learning_rate": 4.3655317164179104e-05, + "loss": 0.0, + "step": 54412 + }, + { + "epoch": 50.76, + "learning_rate": 4.365485074626866e-05, + "loss": 0.0, + "step": 54416 + }, + { + "epoch": 50.76, + "learning_rate": 4.3654384328358214e-05, + "loss": 0.0004, + "step": 54420 + }, + { + "epoch": 50.77, + "learning_rate": 4.365391791044776e-05, + "loss": 0.0006, + "step": 54424 + }, + { + "epoch": 50.77, + "learning_rate": 4.3653451492537317e-05, + "loss": 0.0, + "step": 54428 + }, + { + "epoch": 50.78, + "learning_rate": 4.3652985074626865e-05, + "loss": 0.0001, + "step": 54432 + }, + { + "epoch": 50.78, + "learning_rate": 4.365251865671642e-05, + "loss": 0.0, + "step": 54436 + }, + { + "epoch": 50.78, + "learning_rate": 4.3652052238805975e-05, + "loss": 0.0, + "step": 54440 + }, + { + "epoch": 50.79, + "learning_rate": 4.365158582089552e-05, + "loss": 0.0, + "step": 54444 + }, + { + "epoch": 50.79, + "learning_rate": 4.365111940298508e-05, + "loss": 0.0013, + "step": 54448 + }, + { + "epoch": 50.79, + "learning_rate": 4.365065298507463e-05, + "loss": 0.0, + "step": 54452 + }, + { + "epoch": 50.8, + "learning_rate": 4.365018656716418e-05, + "loss": 0.0, + "step": 54456 + }, + { + "epoch": 50.8, + "learning_rate": 4.364972014925373e-05, + "loss": 0.0, + "step": 54460 + }, + { + "epoch": 50.81, + "learning_rate": 4.3649253731343284e-05, + "loss": 0.0, + "step": 54464 + }, + { + "epoch": 50.81, + "learning_rate": 4.364878731343284e-05, + "loss": 0.0014, + "step": 54468 + }, + { + "epoch": 50.81, + "learning_rate": 4.364832089552239e-05, + "loss": 0.0028, + "step": 54472 + }, + { + "epoch": 50.82, + "learning_rate": 4.364785447761194e-05, + "loss": 0.0, + "step": 54476 + }, + { + "epoch": 50.82, + "learning_rate": 4.3647388059701496e-05, + "loss": 0.0, + "step": 54480 + }, + { + "epoch": 50.82, + "learning_rate": 4.3646921641791045e-05, + "loss": 0.0002, + "step": 54484 + }, + { + "epoch": 50.83, + "learning_rate": 4.36464552238806e-05, + "loss": 0.0, + "step": 54488 + }, + { + "epoch": 50.83, + "learning_rate": 4.364598880597015e-05, + "loss": 0.0, + "step": 54492 + }, + { + "epoch": 50.84, + "learning_rate": 4.36455223880597e-05, + "loss": 0.0017, + "step": 54496 + }, + { + "epoch": 50.84, + "learning_rate": 4.364505597014926e-05, + "loss": 0.0037, + "step": 54500 + }, + { + "epoch": 50.84, + "eval_exact_match": 0.7446808510638298, + "eval_exec": 0.7756286266924565, + "eval_loss": 0.45397448539733887, + "eval_runtime": 1155.302, + "eval_samples_per_second": 0.895, + "step": 54500 + }, + { + "epoch": 50.84, + "learning_rate": 4.3644589552238806e-05, + "loss": 0.0, + "step": 54504 + }, + { + "epoch": 50.85, + "learning_rate": 4.364412313432836e-05, + "loss": 0.0, + "step": 54508 + }, + { + "epoch": 50.85, + "learning_rate": 4.3643656716417915e-05, + "loss": 0.0, + "step": 54512 + }, + { + "epoch": 50.85, + "learning_rate": 4.3643190298507464e-05, + "loss": 0.0, + "step": 54516 + }, + { + "epoch": 50.86, + "learning_rate": 4.364272388059702e-05, + "loss": 0.0, + "step": 54520 + }, + { + "epoch": 50.86, + "learning_rate": 4.3642257462686567e-05, + "loss": 0.0, + "step": 54524 + }, + { + "epoch": 50.87, + "learning_rate": 4.364179104477612e-05, + "loss": 0.0001, + "step": 54528 + }, + { + "epoch": 50.87, + "learning_rate": 4.3641324626865676e-05, + "loss": 0.0, + "step": 54532 + }, + { + "epoch": 50.87, + "learning_rate": 4.3640858208955225e-05, + "loss": 0.0, + "step": 54536 + }, + { + "epoch": 50.88, + "learning_rate": 4.364039179104478e-05, + "loss": 0.0023, + "step": 54540 + }, + { + "epoch": 50.88, + "learning_rate": 4.3639925373134334e-05, + "loss": 0.0, + "step": 54544 + }, + { + "epoch": 50.88, + "learning_rate": 4.363945895522388e-05, + "loss": 0.0, + "step": 54548 + }, + { + "epoch": 50.89, + "learning_rate": 4.363899253731343e-05, + "loss": 0.0, + "step": 54552 + }, + { + "epoch": 50.89, + "learning_rate": 4.363852611940299e-05, + "loss": 0.0, + "step": 54556 + }, + { + "epoch": 50.9, + "learning_rate": 4.363805970149254e-05, + "loss": 0.0066, + "step": 54560 + }, + { + "epoch": 50.9, + "learning_rate": 4.363759328358209e-05, + "loss": 0.0, + "step": 54564 + }, + { + "epoch": 50.9, + "learning_rate": 4.3637126865671643e-05, + "loss": 0.0, + "step": 54568 + }, + { + "epoch": 50.91, + "learning_rate": 4.36366604477612e-05, + "loss": 0.0029, + "step": 54572 + }, + { + "epoch": 50.91, + "learning_rate": 4.3636194029850746e-05, + "loss": 0.0005, + "step": 54576 + }, + { + "epoch": 50.91, + "learning_rate": 4.36357276119403e-05, + "loss": 0.0, + "step": 54580 + }, + { + "epoch": 50.92, + "learning_rate": 4.363526119402985e-05, + "loss": 0.0, + "step": 54584 + }, + { + "epoch": 50.92, + "learning_rate": 4.3634794776119404e-05, + "loss": 0.0003, + "step": 54588 + }, + { + "epoch": 50.93, + "learning_rate": 4.363432835820896e-05, + "loss": 0.0001, + "step": 54592 + }, + { + "epoch": 50.93, + "learning_rate": 4.363386194029851e-05, + "loss": 0.0, + "step": 54596 + }, + { + "epoch": 50.93, + "learning_rate": 4.363339552238806e-05, + "loss": 0.0, + "step": 54600 + }, + { + "epoch": 50.94, + "learning_rate": 4.363292910447762e-05, + "loss": 0.0, + "step": 54604 + }, + { + "epoch": 50.94, + "learning_rate": 4.3632462686567165e-05, + "loss": 0.0032, + "step": 54608 + }, + { + "epoch": 50.94, + "learning_rate": 4.3631996268656714e-05, + "loss": 0.0, + "step": 54612 + }, + { + "epoch": 50.95, + "learning_rate": 4.3631529850746275e-05, + "loss": 0.0, + "step": 54616 + }, + { + "epoch": 50.95, + "learning_rate": 4.363106343283582e-05, + "loss": 0.0, + "step": 54620 + }, + { + "epoch": 50.96, + "learning_rate": 4.363059701492537e-05, + "loss": 0.0057, + "step": 54624 + }, + { + "epoch": 50.96, + "learning_rate": 4.3630130597014926e-05, + "loss": 0.0001, + "step": 54628 + }, + { + "epoch": 50.96, + "learning_rate": 4.362966417910448e-05, + "loss": 0.0, + "step": 54632 + }, + { + "epoch": 50.97, + "learning_rate": 4.362919776119403e-05, + "loss": 0.0001, + "step": 54636 + }, + { + "epoch": 50.97, + "learning_rate": 4.3628731343283584e-05, + "loss": 0.0, + "step": 54640 + }, + { + "epoch": 50.97, + "learning_rate": 4.362826492537313e-05, + "loss": 0.0, + "step": 54644 + }, + { + "epoch": 50.98, + "learning_rate": 4.362779850746269e-05, + "loss": 0.0001, + "step": 54648 + }, + { + "epoch": 50.98, + "learning_rate": 4.362733208955224e-05, + "loss": 0.0001, + "step": 54652 + }, + { + "epoch": 50.98, + "learning_rate": 4.362686567164179e-05, + "loss": 0.0, + "step": 54656 + }, + { + "epoch": 50.99, + "learning_rate": 4.3626399253731345e-05, + "loss": 0.0003, + "step": 54660 + }, + { + "epoch": 50.99, + "learning_rate": 4.36259328358209e-05, + "loss": 0.0, + "step": 54664 + }, + { + "epoch": 51.0, + "learning_rate": 4.362546641791045e-05, + "loss": 0.0, + "step": 54668 + }, + { + "epoch": 51.0, + "learning_rate": 4.3625e-05, + "loss": 0.0013, + "step": 54672 + }, + { + "epoch": 51.0, + "learning_rate": 4.362453358208956e-05, + "loss": 0.0006, + "step": 54676 + }, + { + "epoch": 51.01, + "learning_rate": 4.3624067164179106e-05, + "loss": 0.0006, + "step": 54680 + }, + { + "epoch": 51.01, + "learning_rate": 4.362360074626866e-05, + "loss": 0.0001, + "step": 54684 + }, + { + "epoch": 51.01, + "learning_rate": 4.362313432835821e-05, + "loss": 0.0001, + "step": 54688 + }, + { + "epoch": 51.02, + "learning_rate": 4.3622667910447764e-05, + "loss": 0.0001, + "step": 54692 + }, + { + "epoch": 51.02, + "learning_rate": 4.362220149253732e-05, + "loss": 0.0001, + "step": 54696 + }, + { + "epoch": 51.03, + "learning_rate": 4.362173507462687e-05, + "loss": 0.0, + "step": 54700 + }, + { + "epoch": 51.03, + "learning_rate": 4.3621268656716415e-05, + "loss": 0.0, + "step": 54704 + }, + { + "epoch": 51.03, + "learning_rate": 4.362080223880598e-05, + "loss": 0.0, + "step": 54708 + }, + { + "epoch": 51.04, + "learning_rate": 4.3620335820895525e-05, + "loss": 0.0001, + "step": 54712 + }, + { + "epoch": 51.04, + "learning_rate": 4.3619869402985073e-05, + "loss": 0.0023, + "step": 54716 + }, + { + "epoch": 51.04, + "learning_rate": 4.361940298507463e-05, + "loss": 0.0, + "step": 54720 + }, + { + "epoch": 51.05, + "learning_rate": 4.361893656716418e-05, + "loss": 0.0, + "step": 54724 + }, + { + "epoch": 51.05, + "learning_rate": 4.361847014925373e-05, + "loss": 0.0, + "step": 54728 + }, + { + "epoch": 51.06, + "learning_rate": 4.3618003731343286e-05, + "loss": 0.0, + "step": 54732 + }, + { + "epoch": 51.06, + "learning_rate": 4.361753731343284e-05, + "loss": 0.0, + "step": 54736 + }, + { + "epoch": 51.06, + "learning_rate": 4.361707089552239e-05, + "loss": 0.0002, + "step": 54740 + }, + { + "epoch": 51.07, + "learning_rate": 4.3616604477611944e-05, + "loss": 0.0007, + "step": 54744 + }, + { + "epoch": 51.07, + "learning_rate": 4.361613805970149e-05, + "loss": 0.0002, + "step": 54748 + }, + { + "epoch": 51.07, + "learning_rate": 4.361567164179105e-05, + "loss": 0.0005, + "step": 54752 + }, + { + "epoch": 51.08, + "learning_rate": 4.36152052238806e-05, + "loss": 0.0, + "step": 54756 + }, + { + "epoch": 51.08, + "learning_rate": 4.361473880597015e-05, + "loss": 0.0, + "step": 54760 + }, + { + "epoch": 51.09, + "learning_rate": 4.36142723880597e-05, + "loss": 0.0, + "step": 54764 + }, + { + "epoch": 51.09, + "learning_rate": 4.361380597014926e-05, + "loss": 0.0, + "step": 54768 + }, + { + "epoch": 51.09, + "learning_rate": 4.361333955223881e-05, + "loss": 0.0092, + "step": 54772 + }, + { + "epoch": 51.1, + "learning_rate": 4.3612873134328356e-05, + "loss": 0.0002, + "step": 54776 + }, + { + "epoch": 51.1, + "learning_rate": 4.361240671641791e-05, + "loss": 0.0003, + "step": 54780 + }, + { + "epoch": 51.1, + "learning_rate": 4.3611940298507466e-05, + "loss": 0.0001, + "step": 54784 + }, + { + "epoch": 51.11, + "learning_rate": 4.3611473880597014e-05, + "loss": 0.0, + "step": 54788 + }, + { + "epoch": 51.11, + "learning_rate": 4.361100746268657e-05, + "loss": 0.0, + "step": 54792 + }, + { + "epoch": 51.12, + "learning_rate": 4.3610541044776124e-05, + "loss": 0.0007, + "step": 54796 + }, + { + "epoch": 51.12, + "learning_rate": 4.361007462686567e-05, + "loss": 0.0, + "step": 54800 + }, + { + "epoch": 51.12, + "learning_rate": 4.360960820895523e-05, + "loss": 0.0001, + "step": 54804 + }, + { + "epoch": 51.13, + "learning_rate": 4.3609141791044775e-05, + "loss": 0.0, + "step": 54808 + }, + { + "epoch": 51.13, + "learning_rate": 4.360867537313433e-05, + "loss": 0.0, + "step": 54812 + }, + { + "epoch": 51.13, + "learning_rate": 4.3608208955223885e-05, + "loss": 0.0, + "step": 54816 + }, + { + "epoch": 51.14, + "learning_rate": 4.360774253731343e-05, + "loss": 0.0, + "step": 54820 + }, + { + "epoch": 51.14, + "learning_rate": 4.360727611940298e-05, + "loss": 0.0, + "step": 54824 + }, + { + "epoch": 51.15, + "learning_rate": 4.360680970149254e-05, + "loss": 0.0, + "step": 54828 + }, + { + "epoch": 51.15, + "learning_rate": 4.360634328358209e-05, + "loss": 0.0, + "step": 54832 + }, + { + "epoch": 51.15, + "learning_rate": 4.3605876865671646e-05, + "loss": 0.0012, + "step": 54836 + }, + { + "epoch": 51.16, + "learning_rate": 4.3605410447761194e-05, + "loss": 0.0001, + "step": 54840 + }, + { + "epoch": 51.16, + "learning_rate": 4.360494402985075e-05, + "loss": 0.0, + "step": 54844 + }, + { + "epoch": 51.16, + "learning_rate": 4.3604477611940304e-05, + "loss": 0.0, + "step": 54848 + }, + { + "epoch": 51.17, + "learning_rate": 4.360401119402985e-05, + "loss": 0.0, + "step": 54852 + }, + { + "epoch": 51.17, + "learning_rate": 4.360354477611941e-05, + "loss": 0.0, + "step": 54856 + }, + { + "epoch": 51.18, + "learning_rate": 4.360307835820896e-05, + "loss": 0.0, + "step": 54860 + }, + { + "epoch": 51.18, + "learning_rate": 4.360261194029851e-05, + "loss": 0.0003, + "step": 54864 + }, + { + "epoch": 51.18, + "learning_rate": 4.360214552238806e-05, + "loss": 0.0001, + "step": 54868 + }, + { + "epoch": 51.19, + "learning_rate": 4.360167910447761e-05, + "loss": 0.0, + "step": 54872 + }, + { + "epoch": 51.19, + "learning_rate": 4.360121268656717e-05, + "loss": 0.0001, + "step": 54876 + }, + { + "epoch": 51.19, + "learning_rate": 4.3600746268656716e-05, + "loss": 0.0009, + "step": 54880 + }, + { + "epoch": 51.2, + "learning_rate": 4.360027985074627e-05, + "loss": 0.0003, + "step": 54884 + }, + { + "epoch": 51.2, + "learning_rate": 4.3599813432835826e-05, + "loss": 0.0, + "step": 54888 + }, + { + "epoch": 51.21, + "learning_rate": 4.3599347014925374e-05, + "loss": 0.0001, + "step": 54892 + }, + { + "epoch": 51.21, + "learning_rate": 4.359888059701493e-05, + "loss": 0.0, + "step": 54896 + }, + { + "epoch": 51.21, + "learning_rate": 4.359841417910448e-05, + "loss": 0.0002, + "step": 54900 + }, + { + "epoch": 51.22, + "learning_rate": 4.359794776119403e-05, + "loss": 0.0, + "step": 54904 + }, + { + "epoch": 51.22, + "learning_rate": 4.359748134328359e-05, + "loss": 0.0, + "step": 54908 + }, + { + "epoch": 51.22, + "learning_rate": 4.3597014925373135e-05, + "loss": 0.0, + "step": 54912 + }, + { + "epoch": 51.23, + "learning_rate": 4.359654850746269e-05, + "loss": 0.0, + "step": 54916 + }, + { + "epoch": 51.23, + "learning_rate": 4.3596082089552245e-05, + "loss": 0.0, + "step": 54920 + }, + { + "epoch": 51.24, + "learning_rate": 4.359561567164179e-05, + "loss": 0.004, + "step": 54924 + }, + { + "epoch": 51.24, + "learning_rate": 4.359514925373134e-05, + "loss": 0.0, + "step": 54928 + }, + { + "epoch": 51.24, + "learning_rate": 4.3594682835820896e-05, + "loss": 0.0001, + "step": 54932 + }, + { + "epoch": 51.25, + "learning_rate": 4.359421641791045e-05, + "loss": 0.0001, + "step": 54936 + }, + { + "epoch": 51.25, + "learning_rate": 4.359375e-05, + "loss": 0.0, + "step": 54940 + }, + { + "epoch": 51.25, + "learning_rate": 4.3593283582089554e-05, + "loss": 0.0, + "step": 54944 + }, + { + "epoch": 51.26, + "learning_rate": 4.359281716417911e-05, + "loss": 0.0, + "step": 54948 + }, + { + "epoch": 51.26, + "learning_rate": 4.359235074626866e-05, + "loss": 0.0, + "step": 54952 + }, + { + "epoch": 51.26, + "learning_rate": 4.359188432835821e-05, + "loss": 0.0, + "step": 54956 + }, + { + "epoch": 51.27, + "learning_rate": 4.359141791044776e-05, + "loss": 0.0, + "step": 54960 + }, + { + "epoch": 51.27, + "learning_rate": 4.3590951492537315e-05, + "loss": 0.0, + "step": 54964 + }, + { + "epoch": 51.28, + "learning_rate": 4.359048507462687e-05, + "loss": 0.0001, + "step": 54968 + }, + { + "epoch": 51.28, + "learning_rate": 4.359001865671642e-05, + "loss": 0.0, + "step": 54972 + }, + { + "epoch": 51.28, + "learning_rate": 4.358955223880597e-05, + "loss": 0.0006, + "step": 54976 + }, + { + "epoch": 51.29, + "learning_rate": 4.358908582089553e-05, + "loss": 0.0003, + "step": 54980 + }, + { + "epoch": 51.29, + "learning_rate": 4.3588619402985076e-05, + "loss": 0.0, + "step": 54984 + }, + { + "epoch": 51.29, + "learning_rate": 4.3588152985074624e-05, + "loss": 0.0, + "step": 54988 + }, + { + "epoch": 51.3, + "learning_rate": 4.358768656716418e-05, + "loss": 0.0001, + "step": 54992 + }, + { + "epoch": 51.3, + "learning_rate": 4.3587220149253734e-05, + "loss": 0.0, + "step": 54996 + }, + { + "epoch": 51.31, + "learning_rate": 4.358675373134329e-05, + "loss": 0.0, + "step": 55000 + }, + { + "epoch": 51.31, + "eval_exact_match": 0.7379110251450677, + "eval_exec": 0.758220502901354, + "eval_loss": 0.4803534746170044, + "eval_runtime": 1126.6002, + "eval_samples_per_second": 0.918, + "step": 55000 + }, + { + "epoch": 51.31, + "learning_rate": 4.358628731343284e-05, + "loss": 0.0, + "step": 55004 + }, + { + "epoch": 51.31, + "learning_rate": 4.358582089552239e-05, + "loss": 0.0, + "step": 55008 + }, + { + "epoch": 51.32, + "learning_rate": 4.358535447761195e-05, + "loss": 0.0004, + "step": 55012 + }, + { + "epoch": 51.32, + "learning_rate": 4.3584888059701495e-05, + "loss": 0.001, + "step": 55016 + }, + { + "epoch": 51.32, + "learning_rate": 4.358442164179104e-05, + "loss": 0.0, + "step": 55020 + }, + { + "epoch": 51.33, + "learning_rate": 4.3583955223880605e-05, + "loss": 0.0, + "step": 55024 + }, + { + "epoch": 51.33, + "learning_rate": 4.358348880597015e-05, + "loss": 0.001, + "step": 55028 + }, + { + "epoch": 51.34, + "learning_rate": 4.35830223880597e-05, + "loss": 0.0003, + "step": 55032 + }, + { + "epoch": 51.34, + "learning_rate": 4.3582555970149256e-05, + "loss": 0.0001, + "step": 55036 + }, + { + "epoch": 51.34, + "learning_rate": 4.358208955223881e-05, + "loss": 0.0013, + "step": 55040 + }, + { + "epoch": 51.35, + "learning_rate": 4.358162313432836e-05, + "loss": 0.0, + "step": 55044 + }, + { + "epoch": 51.35, + "learning_rate": 4.3581156716417914e-05, + "loss": 0.0, + "step": 55048 + }, + { + "epoch": 51.35, + "learning_rate": 4.358069029850746e-05, + "loss": 0.0, + "step": 55052 + }, + { + "epoch": 51.36, + "learning_rate": 4.358022388059702e-05, + "loss": 0.0, + "step": 55056 + }, + { + "epoch": 51.36, + "learning_rate": 4.357975746268657e-05, + "loss": 0.0, + "step": 55060 + }, + { + "epoch": 51.37, + "learning_rate": 4.357929104477612e-05, + "loss": 0.0, + "step": 55064 + }, + { + "epoch": 51.37, + "learning_rate": 4.3578824626865675e-05, + "loss": 0.0, + "step": 55068 + }, + { + "epoch": 51.37, + "learning_rate": 4.357835820895523e-05, + "loss": 0.0053, + "step": 55072 + }, + { + "epoch": 51.38, + "learning_rate": 4.357789179104478e-05, + "loss": 0.0002, + "step": 55076 + }, + { + "epoch": 51.38, + "learning_rate": 4.3577425373134326e-05, + "loss": 0.0, + "step": 55080 + }, + { + "epoch": 51.38, + "learning_rate": 4.357695895522389e-05, + "loss": 0.0, + "step": 55084 + }, + { + "epoch": 51.39, + "learning_rate": 4.3576492537313436e-05, + "loss": 0.0, + "step": 55088 + }, + { + "epoch": 51.39, + "learning_rate": 4.3576026119402984e-05, + "loss": 0.0001, + "step": 55092 + }, + { + "epoch": 51.4, + "learning_rate": 4.357555970149254e-05, + "loss": 0.0, + "step": 55096 + }, + { + "epoch": 51.4, + "learning_rate": 4.3575093283582094e-05, + "loss": 0.0, + "step": 55100 + }, + { + "epoch": 51.4, + "learning_rate": 4.357462686567164e-05, + "loss": 0.0, + "step": 55104 + }, + { + "epoch": 51.41, + "learning_rate": 4.35741604477612e-05, + "loss": 0.0, + "step": 55108 + }, + { + "epoch": 51.41, + "learning_rate": 4.3573694029850745e-05, + "loss": 0.0002, + "step": 55112 + }, + { + "epoch": 51.41, + "learning_rate": 4.35732276119403e-05, + "loss": 0.0002, + "step": 55116 + }, + { + "epoch": 51.42, + "learning_rate": 4.3572761194029855e-05, + "loss": 0.0, + "step": 55120 + }, + { + "epoch": 51.42, + "learning_rate": 4.35722947761194e-05, + "loss": 0.0, + "step": 55124 + }, + { + "epoch": 51.43, + "learning_rate": 4.357182835820896e-05, + "loss": 0.0, + "step": 55128 + }, + { + "epoch": 51.43, + "learning_rate": 4.357136194029851e-05, + "loss": 0.0, + "step": 55132 + }, + { + "epoch": 51.43, + "learning_rate": 4.357089552238806e-05, + "loss": 0.0, + "step": 55136 + }, + { + "epoch": 51.44, + "learning_rate": 4.357042910447761e-05, + "loss": 0.0001, + "step": 55140 + }, + { + "epoch": 51.44, + "learning_rate": 4.3569962686567164e-05, + "loss": 0.0001, + "step": 55144 + }, + { + "epoch": 51.44, + "learning_rate": 4.356949626865672e-05, + "loss": 0.0001, + "step": 55148 + }, + { + "epoch": 51.45, + "learning_rate": 4.356902985074627e-05, + "loss": 0.0001, + "step": 55152 + }, + { + "epoch": 51.45, + "learning_rate": 4.356856343283582e-05, + "loss": 0.0039, + "step": 55156 + }, + { + "epoch": 51.46, + "learning_rate": 4.356809701492538e-05, + "loss": 0.0003, + "step": 55160 + }, + { + "epoch": 51.46, + "learning_rate": 4.356763059701493e-05, + "loss": 0.0, + "step": 55164 + }, + { + "epoch": 51.46, + "learning_rate": 4.356716417910448e-05, + "loss": 0.0, + "step": 55168 + }, + { + "epoch": 51.47, + "learning_rate": 4.356669776119403e-05, + "loss": 0.0, + "step": 55172 + }, + { + "epoch": 51.47, + "learning_rate": 4.356623134328359e-05, + "loss": 0.0, + "step": 55176 + }, + { + "epoch": 51.47, + "learning_rate": 4.356576492537314e-05, + "loss": 0.0004, + "step": 55180 + }, + { + "epoch": 51.48, + "learning_rate": 4.3565298507462686e-05, + "loss": 0.0001, + "step": 55184 + }, + { + "epoch": 51.48, + "learning_rate": 4.356483208955224e-05, + "loss": 0.0, + "step": 55188 + }, + { + "epoch": 51.49, + "learning_rate": 4.3564365671641796e-05, + "loss": 0.0001, + "step": 55192 + }, + { + "epoch": 51.49, + "learning_rate": 4.3563899253731344e-05, + "loss": 0.0001, + "step": 55196 + }, + { + "epoch": 51.49, + "learning_rate": 4.35634328358209e-05, + "loss": 0.0, + "step": 55200 + }, + { + "epoch": 51.5, + "learning_rate": 4.356296641791045e-05, + "loss": 0.0, + "step": 55204 + }, + { + "epoch": 51.5, + "learning_rate": 4.35625e-05, + "loss": 0.0002, + "step": 55208 + }, + { + "epoch": 51.5, + "learning_rate": 4.356203358208956e-05, + "loss": 0.0, + "step": 55212 + }, + { + "epoch": 51.51, + "learning_rate": 4.3561567164179105e-05, + "loss": 0.0001, + "step": 55216 + }, + { + "epoch": 51.51, + "learning_rate": 4.356110074626866e-05, + "loss": 0.0, + "step": 55220 + }, + { + "epoch": 51.51, + "learning_rate": 4.3560634328358215e-05, + "loss": 0.0, + "step": 55224 + }, + { + "epoch": 51.52, + "learning_rate": 4.356016791044776e-05, + "loss": 0.0001, + "step": 55228 + }, + { + "epoch": 51.52, + "learning_rate": 4.355970149253731e-05, + "loss": 0.0017, + "step": 55232 + }, + { + "epoch": 51.53, + "learning_rate": 4.355923507462687e-05, + "loss": 0.0003, + "step": 55236 + }, + { + "epoch": 51.53, + "learning_rate": 4.355876865671642e-05, + "loss": 0.0, + "step": 55240 + }, + { + "epoch": 51.53, + "learning_rate": 4.355830223880597e-05, + "loss": 0.0, + "step": 55244 + }, + { + "epoch": 51.54, + "learning_rate": 4.3557835820895524e-05, + "loss": 0.0, + "step": 55248 + }, + { + "epoch": 51.54, + "learning_rate": 4.355736940298508e-05, + "loss": 0.0, + "step": 55252 + }, + { + "epoch": 51.54, + "learning_rate": 4.355690298507463e-05, + "loss": 0.0005, + "step": 55256 + }, + { + "epoch": 51.55, + "learning_rate": 4.355643656716418e-05, + "loss": 0.0, + "step": 55260 + }, + { + "epoch": 51.55, + "learning_rate": 4.355597014925373e-05, + "loss": 0.0003, + "step": 55264 + }, + { + "epoch": 51.56, + "learning_rate": 4.3555503731343285e-05, + "loss": 0.0001, + "step": 55268 + }, + { + "epoch": 51.56, + "learning_rate": 4.355503731343284e-05, + "loss": 0.0, + "step": 55272 + }, + { + "epoch": 51.56, + "learning_rate": 4.355457089552239e-05, + "loss": 0.0, + "step": 55276 + }, + { + "epoch": 51.57, + "learning_rate": 4.355410447761194e-05, + "loss": 0.0, + "step": 55280 + }, + { + "epoch": 51.57, + "learning_rate": 4.35536380597015e-05, + "loss": 0.0009, + "step": 55284 + }, + { + "epoch": 51.57, + "learning_rate": 4.3553171641791046e-05, + "loss": 0.0, + "step": 55288 + }, + { + "epoch": 51.58, + "learning_rate": 4.3552705223880594e-05, + "loss": 0.0, + "step": 55292 + }, + { + "epoch": 51.58, + "learning_rate": 4.3552238805970156e-05, + "loss": 0.0, + "step": 55296 + }, + { + "epoch": 51.59, + "learning_rate": 4.3551772388059704e-05, + "loss": 0.0, + "step": 55300 + }, + { + "epoch": 51.59, + "learning_rate": 4.355130597014925e-05, + "loss": 0.004, + "step": 55304 + }, + { + "epoch": 51.59, + "learning_rate": 4.355083955223881e-05, + "loss": 0.0, + "step": 55308 + }, + { + "epoch": 51.6, + "learning_rate": 4.355037313432836e-05, + "loss": 0.0, + "step": 55312 + }, + { + "epoch": 51.6, + "learning_rate": 4.3549906716417917e-05, + "loss": 0.004, + "step": 55316 + }, + { + "epoch": 51.6, + "learning_rate": 4.3549440298507465e-05, + "loss": 0.0, + "step": 55320 + }, + { + "epoch": 51.61, + "learning_rate": 4.354897388059701e-05, + "loss": 0.0002, + "step": 55324 + }, + { + "epoch": 51.61, + "learning_rate": 4.3548507462686574e-05, + "loss": 0.0, + "step": 55328 + }, + { + "epoch": 51.62, + "learning_rate": 4.354804104477612e-05, + "loss": 0.0001, + "step": 55332 + }, + { + "epoch": 51.62, + "learning_rate": 4.354757462686567e-05, + "loss": 0.0017, + "step": 55336 + }, + { + "epoch": 51.62, + "learning_rate": 4.3547108208955226e-05, + "loss": 0.0, + "step": 55340 + }, + { + "epoch": 51.63, + "learning_rate": 4.354664179104478e-05, + "loss": 0.0002, + "step": 55344 + }, + { + "epoch": 51.63, + "learning_rate": 4.354617537313433e-05, + "loss": 0.0, + "step": 55348 + }, + { + "epoch": 51.63, + "learning_rate": 4.3545708955223884e-05, + "loss": 0.0, + "step": 55352 + }, + { + "epoch": 51.64, + "learning_rate": 4.354524253731344e-05, + "loss": 0.0001, + "step": 55356 + }, + { + "epoch": 51.64, + "learning_rate": 4.354477611940299e-05, + "loss": 0.0027, + "step": 55360 + }, + { + "epoch": 51.65, + "learning_rate": 4.354430970149254e-05, + "loss": 0.0, + "step": 55364 + }, + { + "epoch": 51.65, + "learning_rate": 4.354384328358209e-05, + "loss": 0.0, + "step": 55368 + }, + { + "epoch": 51.65, + "learning_rate": 4.3543376865671645e-05, + "loss": 0.0043, + "step": 55372 + }, + { + "epoch": 51.66, + "learning_rate": 4.35429104477612e-05, + "loss": 0.0015, + "step": 55376 + }, + { + "epoch": 51.66, + "learning_rate": 4.354244402985075e-05, + "loss": 0.0, + "step": 55380 + }, + { + "epoch": 51.66, + "learning_rate": 4.3541977611940296e-05, + "loss": 0.0005, + "step": 55384 + }, + { + "epoch": 51.67, + "learning_rate": 4.354151119402986e-05, + "loss": 0.0001, + "step": 55388 + }, + { + "epoch": 51.67, + "learning_rate": 4.3541044776119406e-05, + "loss": 0.0, + "step": 55392 + }, + { + "epoch": 51.68, + "learning_rate": 4.3540578358208954e-05, + "loss": 0.0, + "step": 55396 + }, + { + "epoch": 51.68, + "learning_rate": 4.354011194029851e-05, + "loss": 0.0001, + "step": 55400 + }, + { + "epoch": 51.68, + "learning_rate": 4.3539645522388063e-05, + "loss": 0.0, + "step": 55404 + }, + { + "epoch": 51.69, + "learning_rate": 4.353917910447761e-05, + "loss": 0.0, + "step": 55408 + }, + { + "epoch": 51.69, + "learning_rate": 4.3538712686567167e-05, + "loss": 0.0001, + "step": 55412 + }, + { + "epoch": 51.69, + "learning_rate": 4.353824626865672e-05, + "loss": 0.0001, + "step": 55416 + }, + { + "epoch": 51.7, + "learning_rate": 4.353777985074627e-05, + "loss": 0.0029, + "step": 55420 + }, + { + "epoch": 51.7, + "learning_rate": 4.3537313432835824e-05, + "loss": 0.0116, + "step": 55424 + }, + { + "epoch": 51.71, + "learning_rate": 4.353684701492537e-05, + "loss": 0.0006, + "step": 55428 + }, + { + "epoch": 51.71, + "learning_rate": 4.353638059701493e-05, + "loss": 0.0003, + "step": 55432 + }, + { + "epoch": 51.71, + "learning_rate": 4.353591417910448e-05, + "loss": 0.0, + "step": 55436 + }, + { + "epoch": 51.72, + "learning_rate": 4.353544776119403e-05, + "loss": 0.0006, + "step": 55440 + }, + { + "epoch": 51.72, + "learning_rate": 4.353498134328358e-05, + "loss": 0.0, + "step": 55444 + }, + { + "epoch": 51.72, + "learning_rate": 4.353451492537314e-05, + "loss": 0.0004, + "step": 55448 + }, + { + "epoch": 51.73, + "learning_rate": 4.353404850746269e-05, + "loss": 0.001, + "step": 55452 + }, + { + "epoch": 51.73, + "learning_rate": 4.353358208955224e-05, + "loss": 0.0003, + "step": 55456 + }, + { + "epoch": 51.73, + "learning_rate": 4.353311567164179e-05, + "loss": 0.001, + "step": 55460 + }, + { + "epoch": 51.74, + "learning_rate": 4.3532649253731346e-05, + "loss": 0.0, + "step": 55464 + }, + { + "epoch": 51.74, + "learning_rate": 4.3532182835820895e-05, + "loss": 0.0005, + "step": 55468 + }, + { + "epoch": 51.75, + "learning_rate": 4.353171641791045e-05, + "loss": 0.0009, + "step": 55472 + }, + { + "epoch": 51.75, + "learning_rate": 4.3531250000000004e-05, + "loss": 0.0001, + "step": 55476 + }, + { + "epoch": 51.75, + "learning_rate": 4.353078358208956e-05, + "loss": 0.0, + "step": 55480 + }, + { + "epoch": 51.76, + "learning_rate": 4.353031716417911e-05, + "loss": 0.0028, + "step": 55484 + }, + { + "epoch": 51.76, + "learning_rate": 4.3529850746268656e-05, + "loss": 0.0, + "step": 55488 + }, + { + "epoch": 51.76, + "learning_rate": 4.352938432835821e-05, + "loss": 0.0, + "step": 55492 + }, + { + "epoch": 51.77, + "learning_rate": 4.3528917910447765e-05, + "loss": 0.0, + "step": 55496 + }, + { + "epoch": 51.77, + "learning_rate": 4.3528451492537314e-05, + "loss": 0.0, + "step": 55500 + }, + { + "epoch": 51.77, + "eval_exact_match": 0.741779497098646, + "eval_exec": 0.758220502901354, + "eval_loss": 0.46916627883911133, + "eval_runtime": 1122.7652, + "eval_samples_per_second": 0.921, + "step": 55500 + }, + { + "epoch": 51.78, + "learning_rate": 4.352798507462687e-05, + "loss": 0.0, + "step": 55504 + }, + { + "epoch": 51.78, + "learning_rate": 4.352751865671642e-05, + "loss": 0.0, + "step": 55508 + }, + { + "epoch": 51.78, + "learning_rate": 4.352705223880597e-05, + "loss": 0.0034, + "step": 55512 + }, + { + "epoch": 51.79, + "learning_rate": 4.3526585820895526e-05, + "loss": 0.0009, + "step": 55516 + }, + { + "epoch": 51.79, + "learning_rate": 4.3526119402985075e-05, + "loss": 0.002, + "step": 55520 + }, + { + "epoch": 51.79, + "learning_rate": 4.352565298507463e-05, + "loss": 0.0001, + "step": 55524 + }, + { + "epoch": 51.8, + "learning_rate": 4.3525186567164184e-05, + "loss": 0.0008, + "step": 55528 + }, + { + "epoch": 51.8, + "learning_rate": 4.352472014925373e-05, + "loss": 0.0002, + "step": 55532 + }, + { + "epoch": 51.81, + "learning_rate": 4.352425373134329e-05, + "loss": 0.0, + "step": 55536 + }, + { + "epoch": 51.81, + "learning_rate": 4.352378731343284e-05, + "loss": 0.0005, + "step": 55540 + }, + { + "epoch": 51.81, + "learning_rate": 4.352332089552239e-05, + "loss": 0.0001, + "step": 55544 + }, + { + "epoch": 51.82, + "learning_rate": 4.352285447761194e-05, + "loss": 0.0, + "step": 55548 + }, + { + "epoch": 51.82, + "learning_rate": 4.3522388059701493e-05, + "loss": 0.0001, + "step": 55552 + }, + { + "epoch": 51.82, + "learning_rate": 4.352192164179105e-05, + "loss": 0.0, + "step": 55556 + }, + { + "epoch": 51.83, + "learning_rate": 4.3521455223880596e-05, + "loss": 0.0, + "step": 55560 + }, + { + "epoch": 51.83, + "learning_rate": 4.352098880597015e-05, + "loss": 0.0004, + "step": 55564 + }, + { + "epoch": 51.84, + "learning_rate": 4.3520522388059706e-05, + "loss": 0.0001, + "step": 55568 + }, + { + "epoch": 51.84, + "learning_rate": 4.3520055970149254e-05, + "loss": 0.0, + "step": 55572 + }, + { + "epoch": 51.84, + "learning_rate": 4.351958955223881e-05, + "loss": 0.0005, + "step": 55576 + }, + { + "epoch": 51.85, + "learning_rate": 4.351912313432836e-05, + "loss": 0.0001, + "step": 55580 + }, + { + "epoch": 51.85, + "learning_rate": 4.351865671641791e-05, + "loss": 0.0, + "step": 55584 + }, + { + "epoch": 51.85, + "learning_rate": 4.351819029850747e-05, + "loss": 0.0, + "step": 55588 + }, + { + "epoch": 51.86, + "learning_rate": 4.3517723880597015e-05, + "loss": 0.0, + "step": 55592 + }, + { + "epoch": 51.86, + "learning_rate": 4.351725746268657e-05, + "loss": 0.0001, + "step": 55596 + }, + { + "epoch": 51.87, + "learning_rate": 4.3516791044776125e-05, + "loss": 0.0001, + "step": 55600 + }, + { + "epoch": 51.87, + "learning_rate": 4.351632462686567e-05, + "loss": 0.0001, + "step": 55604 + }, + { + "epoch": 51.87, + "learning_rate": 4.351585820895522e-05, + "loss": 0.0, + "step": 55608 + }, + { + "epoch": 51.88, + "learning_rate": 4.3515391791044776e-05, + "loss": 0.0, + "step": 55612 + }, + { + "epoch": 51.88, + "learning_rate": 4.351492537313433e-05, + "loss": 0.0, + "step": 55616 + }, + { + "epoch": 51.88, + "learning_rate": 4.351445895522388e-05, + "loss": 0.0021, + "step": 55620 + }, + { + "epoch": 51.89, + "learning_rate": 4.3513992537313434e-05, + "loss": 0.0, + "step": 55624 + }, + { + "epoch": 51.89, + "learning_rate": 4.351352611940299e-05, + "loss": 0.0003, + "step": 55628 + }, + { + "epoch": 51.9, + "learning_rate": 4.351305970149254e-05, + "loss": 0.0013, + "step": 55632 + }, + { + "epoch": 51.9, + "learning_rate": 4.351259328358209e-05, + "loss": 0.0, + "step": 55636 + }, + { + "epoch": 51.9, + "learning_rate": 4.351212686567164e-05, + "loss": 0.0, + "step": 55640 + }, + { + "epoch": 51.91, + "learning_rate": 4.35116604477612e-05, + "loss": 0.0, + "step": 55644 + }, + { + "epoch": 51.91, + "learning_rate": 4.351119402985075e-05, + "loss": 0.0001, + "step": 55648 + }, + { + "epoch": 51.91, + "learning_rate": 4.35107276119403e-05, + "loss": 0.0001, + "step": 55652 + }, + { + "epoch": 51.92, + "learning_rate": 4.351026119402985e-05, + "loss": 0.0002, + "step": 55656 + }, + { + "epoch": 51.92, + "learning_rate": 4.350979477611941e-05, + "loss": 0.0008, + "step": 55660 + }, + { + "epoch": 51.93, + "learning_rate": 4.3509328358208956e-05, + "loss": 0.0, + "step": 55664 + }, + { + "epoch": 51.93, + "learning_rate": 4.350886194029851e-05, + "loss": 0.0, + "step": 55668 + }, + { + "epoch": 51.93, + "learning_rate": 4.350839552238806e-05, + "loss": 0.0, + "step": 55672 + }, + { + "epoch": 51.94, + "learning_rate": 4.3507929104477614e-05, + "loss": 0.0001, + "step": 55676 + }, + { + "epoch": 51.94, + "learning_rate": 4.350746268656717e-05, + "loss": 0.0, + "step": 55680 + }, + { + "epoch": 51.94, + "learning_rate": 4.350699626865672e-05, + "loss": 0.0, + "step": 55684 + }, + { + "epoch": 51.95, + "learning_rate": 4.350652985074627e-05, + "loss": 0.0004, + "step": 55688 + }, + { + "epoch": 51.95, + "learning_rate": 4.350606343283583e-05, + "loss": 0.0, + "step": 55692 + }, + { + "epoch": 51.96, + "learning_rate": 4.3505597014925375e-05, + "loss": 0.0001, + "step": 55696 + }, + { + "epoch": 51.96, + "learning_rate": 4.350513059701492e-05, + "loss": 0.0, + "step": 55700 + }, + { + "epoch": 51.96, + "learning_rate": 4.3504664179104485e-05, + "loss": 0.0001, + "step": 55704 + }, + { + "epoch": 51.97, + "learning_rate": 4.350419776119403e-05, + "loss": 0.0001, + "step": 55708 + }, + { + "epoch": 51.97, + "learning_rate": 4.350373134328358e-05, + "loss": 0.0003, + "step": 55712 + }, + { + "epoch": 51.97, + "learning_rate": 4.3503264925373136e-05, + "loss": 0.0, + "step": 55716 + }, + { + "epoch": 51.98, + "learning_rate": 4.350279850746269e-05, + "loss": 0.0027, + "step": 55720 + }, + { + "epoch": 51.98, + "learning_rate": 4.350233208955224e-05, + "loss": 0.0, + "step": 55724 + }, + { + "epoch": 51.98, + "learning_rate": 4.3501865671641794e-05, + "loss": 0.0002, + "step": 55728 + }, + { + "epoch": 51.99, + "learning_rate": 4.350139925373134e-05, + "loss": 0.0, + "step": 55732 + }, + { + "epoch": 51.99, + "learning_rate": 4.35009328358209e-05, + "loss": 0.0001, + "step": 55736 + }, + { + "epoch": 52.0, + "learning_rate": 4.350046641791045e-05, + "loss": 0.0003, + "step": 55740 + }, + { + "epoch": 52.0, + "learning_rate": 4.35e-05, + "loss": 0.0, + "step": 55744 + }, + { + "epoch": 52.0, + "learning_rate": 4.3499533582089555e-05, + "loss": 0.0, + "step": 55748 + }, + { + "epoch": 52.01, + "learning_rate": 4.349906716417911e-05, + "loss": 0.0, + "step": 55752 + }, + { + "epoch": 52.01, + "learning_rate": 4.349860074626866e-05, + "loss": 0.0, + "step": 55756 + }, + { + "epoch": 52.01, + "learning_rate": 4.3498134328358206e-05, + "loss": 0.0, + "step": 55760 + }, + { + "epoch": 52.02, + "learning_rate": 4.349766791044777e-05, + "loss": 0.0, + "step": 55764 + }, + { + "epoch": 52.02, + "learning_rate": 4.3497201492537316e-05, + "loss": 0.0, + "step": 55768 + }, + { + "epoch": 52.03, + "learning_rate": 4.3496735074626864e-05, + "loss": 0.0001, + "step": 55772 + }, + { + "epoch": 52.03, + "learning_rate": 4.349626865671642e-05, + "loss": 0.0, + "step": 55776 + }, + { + "epoch": 52.03, + "learning_rate": 4.3495802238805974e-05, + "loss": 0.0006, + "step": 55780 + }, + { + "epoch": 52.04, + "learning_rate": 4.349533582089552e-05, + "loss": 0.0044, + "step": 55784 + }, + { + "epoch": 52.04, + "learning_rate": 4.349486940298508e-05, + "loss": 0.0, + "step": 55788 + }, + { + "epoch": 52.04, + "learning_rate": 4.3494402985074625e-05, + "loss": 0.0, + "step": 55792 + }, + { + "epoch": 52.05, + "learning_rate": 4.349393656716418e-05, + "loss": 0.0, + "step": 55796 + }, + { + "epoch": 52.05, + "learning_rate": 4.3493470149253735e-05, + "loss": 0.0005, + "step": 55800 + }, + { + "epoch": 52.06, + "learning_rate": 4.349300373134328e-05, + "loss": 0.0, + "step": 55804 + }, + { + "epoch": 52.06, + "learning_rate": 4.349253731343284e-05, + "loss": 0.0, + "step": 55808 + }, + { + "epoch": 52.06, + "learning_rate": 4.349207089552239e-05, + "loss": 0.0001, + "step": 55812 + }, + { + "epoch": 52.07, + "learning_rate": 4.349160447761194e-05, + "loss": 0.0024, + "step": 55816 + }, + { + "epoch": 52.07, + "learning_rate": 4.3491138059701496e-05, + "loss": 0.0015, + "step": 55820 + }, + { + "epoch": 52.07, + "learning_rate": 4.3490671641791044e-05, + "loss": 0.0001, + "step": 55824 + }, + { + "epoch": 52.08, + "learning_rate": 4.34902052238806e-05, + "loss": 0.0, + "step": 55828 + }, + { + "epoch": 52.08, + "learning_rate": 4.3489738805970154e-05, + "loss": 0.0, + "step": 55832 + }, + { + "epoch": 52.09, + "learning_rate": 4.34892723880597e-05, + "loss": 0.003, + "step": 55836 + }, + { + "epoch": 52.09, + "learning_rate": 4.348880597014926e-05, + "loss": 0.0, + "step": 55840 + }, + { + "epoch": 52.09, + "learning_rate": 4.348833955223881e-05, + "loss": 0.0, + "step": 55844 + }, + { + "epoch": 52.1, + "learning_rate": 4.348787313432836e-05, + "loss": 0.0008, + "step": 55848 + }, + { + "epoch": 52.1, + "learning_rate": 4.348740671641791e-05, + "loss": 0.0, + "step": 55852 + }, + { + "epoch": 52.1, + "learning_rate": 4.348694029850747e-05, + "loss": 0.0002, + "step": 55856 + }, + { + "epoch": 52.11, + "learning_rate": 4.348647388059702e-05, + "loss": 0.0001, + "step": 55860 + }, + { + "epoch": 52.11, + "learning_rate": 4.3486007462686566e-05, + "loss": 0.0001, + "step": 55864 + }, + { + "epoch": 52.12, + "learning_rate": 4.348554104477612e-05, + "loss": 0.0, + "step": 55868 + }, + { + "epoch": 52.12, + "learning_rate": 4.3485074626865676e-05, + "loss": 0.0004, + "step": 55872 + }, + { + "epoch": 52.12, + "learning_rate": 4.3484608208955224e-05, + "loss": 0.0, + "step": 55876 + }, + { + "epoch": 52.13, + "learning_rate": 4.348414179104478e-05, + "loss": 0.0001, + "step": 55880 + }, + { + "epoch": 52.13, + "learning_rate": 4.348367537313433e-05, + "loss": 0.0, + "step": 55884 + }, + { + "epoch": 52.13, + "learning_rate": 4.348320895522388e-05, + "loss": 0.0, + "step": 55888 + }, + { + "epoch": 52.14, + "learning_rate": 4.348274253731344e-05, + "loss": 0.0, + "step": 55892 + }, + { + "epoch": 52.14, + "learning_rate": 4.3482276119402985e-05, + "loss": 0.0, + "step": 55896 + }, + { + "epoch": 52.15, + "learning_rate": 4.348180970149254e-05, + "loss": 0.0, + "step": 55900 + }, + { + "epoch": 52.15, + "learning_rate": 4.3481343283582095e-05, + "loss": 0.0, + "step": 55904 + }, + { + "epoch": 52.15, + "learning_rate": 4.348087686567164e-05, + "loss": 0.0001, + "step": 55908 + }, + { + "epoch": 52.16, + "learning_rate": 4.348041044776119e-05, + "loss": 0.0003, + "step": 55912 + }, + { + "epoch": 52.16, + "learning_rate": 4.347994402985075e-05, + "loss": 0.0, + "step": 55916 + }, + { + "epoch": 52.16, + "learning_rate": 4.34794776119403e-05, + "loss": 0.0014, + "step": 55920 + }, + { + "epoch": 52.17, + "learning_rate": 4.347901119402985e-05, + "loss": 0.0005, + "step": 55924 + }, + { + "epoch": 52.17, + "learning_rate": 4.3478544776119404e-05, + "loss": 0.0, + "step": 55928 + }, + { + "epoch": 52.18, + "learning_rate": 4.347807835820896e-05, + "loss": 0.0, + "step": 55932 + }, + { + "epoch": 52.18, + "learning_rate": 4.347761194029851e-05, + "loss": 0.0, + "step": 55936 + }, + { + "epoch": 52.18, + "learning_rate": 4.347714552238806e-05, + "loss": 0.0, + "step": 55940 + }, + { + "epoch": 52.19, + "learning_rate": 4.347667910447761e-05, + "loss": 0.0, + "step": 55944 + }, + { + "epoch": 52.19, + "learning_rate": 4.3476212686567165e-05, + "loss": 0.0, + "step": 55948 + }, + { + "epoch": 52.19, + "learning_rate": 4.347574626865672e-05, + "loss": 0.0, + "step": 55952 + }, + { + "epoch": 52.2, + "learning_rate": 4.347527985074627e-05, + "loss": 0.0001, + "step": 55956 + }, + { + "epoch": 52.2, + "learning_rate": 4.347481343283582e-05, + "loss": 0.0, + "step": 55960 + }, + { + "epoch": 52.21, + "learning_rate": 4.347434701492538e-05, + "loss": 0.0, + "step": 55964 + }, + { + "epoch": 52.21, + "learning_rate": 4.3473880597014926e-05, + "loss": 0.0, + "step": 55968 + }, + { + "epoch": 52.21, + "learning_rate": 4.347341417910448e-05, + "loss": 0.0, + "step": 55972 + }, + { + "epoch": 52.22, + "learning_rate": 4.3472947761194036e-05, + "loss": 0.0001, + "step": 55976 + }, + { + "epoch": 52.22, + "learning_rate": 4.3472481343283584e-05, + "loss": 0.0, + "step": 55980 + }, + { + "epoch": 52.22, + "learning_rate": 4.347201492537314e-05, + "loss": 0.0, + "step": 55984 + }, + { + "epoch": 52.23, + "learning_rate": 4.347154850746269e-05, + "loss": 0.0005, + "step": 55988 + }, + { + "epoch": 52.23, + "learning_rate": 4.347108208955224e-05, + "loss": 0.0, + "step": 55992 + }, + { + "epoch": 52.24, + "learning_rate": 4.34706156716418e-05, + "loss": 0.0, + "step": 55996 + }, + { + "epoch": 52.24, + "learning_rate": 4.3470149253731345e-05, + "loss": 0.0, + "step": 56000 + }, + { + "epoch": 52.24, + "eval_exact_match": 0.7398452611218569, + "eval_exec": 0.7630560928433269, + "eval_loss": 0.48453760147094727, + "eval_runtime": 1139.6139, + "eval_samples_per_second": 0.907, + "step": 56000 + }, + { + "epoch": 52.24, + "learning_rate": 4.346968283582089e-05, + "loss": 0.0, + "step": 56004 + }, + { + "epoch": 52.25, + "learning_rate": 4.3469216417910455e-05, + "loss": 0.0, + "step": 56008 + }, + { + "epoch": 52.25, + "learning_rate": 4.346875e-05, + "loss": 0.0, + "step": 56012 + }, + { + "epoch": 52.25, + "learning_rate": 4.346828358208955e-05, + "loss": 0.0, + "step": 56016 + }, + { + "epoch": 52.26, + "learning_rate": 4.3467817164179106e-05, + "loss": 0.0, + "step": 56020 + }, + { + "epoch": 52.26, + "learning_rate": 4.346735074626866e-05, + "loss": 0.0, + "step": 56024 + }, + { + "epoch": 52.26, + "learning_rate": 4.346688432835821e-05, + "loss": 0.0, + "step": 56028 + }, + { + "epoch": 52.27, + "learning_rate": 4.3466417910447764e-05, + "loss": 0.0, + "step": 56032 + }, + { + "epoch": 52.27, + "learning_rate": 4.346595149253732e-05, + "loss": 0.0001, + "step": 56036 + }, + { + "epoch": 52.28, + "learning_rate": 4.346548507462687e-05, + "loss": 0.0, + "step": 56040 + }, + { + "epoch": 52.28, + "learning_rate": 4.346501865671642e-05, + "loss": 0.0, + "step": 56044 + }, + { + "epoch": 52.28, + "learning_rate": 4.346455223880597e-05, + "loss": 0.0032, + "step": 56048 + }, + { + "epoch": 52.29, + "learning_rate": 4.3464085820895525e-05, + "loss": 0.0, + "step": 56052 + }, + { + "epoch": 52.29, + "learning_rate": 4.346361940298508e-05, + "loss": 0.0, + "step": 56056 + }, + { + "epoch": 52.29, + "learning_rate": 4.346315298507463e-05, + "loss": 0.0021, + "step": 56060 + }, + { + "epoch": 52.3, + "learning_rate": 4.3462686567164176e-05, + "loss": 0.0, + "step": 56064 + }, + { + "epoch": 52.3, + "learning_rate": 4.346222014925374e-05, + "loss": 0.0, + "step": 56068 + }, + { + "epoch": 52.31, + "learning_rate": 4.3461753731343286e-05, + "loss": 0.0, + "step": 56072 + }, + { + "epoch": 52.31, + "learning_rate": 4.3461287313432834e-05, + "loss": 0.0, + "step": 56076 + }, + { + "epoch": 52.31, + "learning_rate": 4.346082089552239e-05, + "loss": 0.0, + "step": 56080 + }, + { + "epoch": 52.32, + "learning_rate": 4.3460354477611944e-05, + "loss": 0.0, + "step": 56084 + }, + { + "epoch": 52.32, + "learning_rate": 4.345988805970149e-05, + "loss": 0.0, + "step": 56088 + }, + { + "epoch": 52.32, + "learning_rate": 4.345942164179105e-05, + "loss": 0.0001, + "step": 56092 + }, + { + "epoch": 52.33, + "learning_rate": 4.34589552238806e-05, + "loss": 0.0, + "step": 56096 + }, + { + "epoch": 52.33, + "learning_rate": 4.345848880597015e-05, + "loss": 0.0003, + "step": 56100 + }, + { + "epoch": 52.34, + "learning_rate": 4.3458022388059705e-05, + "loss": 0.0, + "step": 56104 + }, + { + "epoch": 52.34, + "learning_rate": 4.345755597014925e-05, + "loss": 0.0, + "step": 56108 + }, + { + "epoch": 52.34, + "learning_rate": 4.345708955223881e-05, + "loss": 0.0002, + "step": 56112 + }, + { + "epoch": 52.35, + "learning_rate": 4.345662313432836e-05, + "loss": 0.001, + "step": 56116 + }, + { + "epoch": 52.35, + "learning_rate": 4.345615671641791e-05, + "loss": 0.0005, + "step": 56120 + }, + { + "epoch": 52.35, + "learning_rate": 4.345569029850746e-05, + "loss": 0.0041, + "step": 56124 + }, + { + "epoch": 52.36, + "learning_rate": 4.345522388059702e-05, + "loss": 0.0, + "step": 56128 + }, + { + "epoch": 52.36, + "learning_rate": 4.345475746268657e-05, + "loss": 0.0, + "step": 56132 + }, + { + "epoch": 52.37, + "learning_rate": 4.3454291044776124e-05, + "loss": 0.0, + "step": 56136 + }, + { + "epoch": 52.37, + "learning_rate": 4.345382462686567e-05, + "loss": 0.0, + "step": 56140 + }, + { + "epoch": 52.37, + "learning_rate": 4.345335820895523e-05, + "loss": 0.0006, + "step": 56144 + }, + { + "epoch": 52.38, + "learning_rate": 4.345289179104478e-05, + "loss": 0.0001, + "step": 56148 + }, + { + "epoch": 52.38, + "learning_rate": 4.345242537313433e-05, + "loss": 0.0, + "step": 56152 + }, + { + "epoch": 52.38, + "learning_rate": 4.3451958955223885e-05, + "loss": 0.0, + "step": 56156 + }, + { + "epoch": 52.39, + "learning_rate": 4.345149253731344e-05, + "loss": 0.0, + "step": 56160 + }, + { + "epoch": 52.39, + "learning_rate": 4.345102611940299e-05, + "loss": 0.0001, + "step": 56164 + }, + { + "epoch": 52.4, + "learning_rate": 4.3450559701492536e-05, + "loss": 0.0, + "step": 56168 + }, + { + "epoch": 52.4, + "learning_rate": 4.345009328358209e-05, + "loss": 0.0, + "step": 56172 + }, + { + "epoch": 52.4, + "learning_rate": 4.3449626865671646e-05, + "loss": 0.0, + "step": 56176 + }, + { + "epoch": 52.41, + "learning_rate": 4.3449160447761194e-05, + "loss": 0.0, + "step": 56180 + }, + { + "epoch": 52.41, + "learning_rate": 4.344869402985075e-05, + "loss": 0.0, + "step": 56184 + }, + { + "epoch": 52.41, + "learning_rate": 4.3448227611940304e-05, + "loss": 0.0, + "step": 56188 + }, + { + "epoch": 52.42, + "learning_rate": 4.344776119402985e-05, + "loss": 0.0, + "step": 56192 + }, + { + "epoch": 52.42, + "learning_rate": 4.344729477611941e-05, + "loss": 0.0, + "step": 56196 + }, + { + "epoch": 52.43, + "learning_rate": 4.3446828358208955e-05, + "loss": 0.0001, + "step": 56200 + }, + { + "epoch": 52.43, + "learning_rate": 4.344636194029851e-05, + "loss": 0.0002, + "step": 56204 + }, + { + "epoch": 52.43, + "learning_rate": 4.3445895522388065e-05, + "loss": 0.0, + "step": 56208 + }, + { + "epoch": 52.44, + "learning_rate": 4.344542910447761e-05, + "loss": 0.0, + "step": 56212 + }, + { + "epoch": 52.44, + "learning_rate": 4.344496268656717e-05, + "loss": 0.0, + "step": 56216 + }, + { + "epoch": 52.44, + "learning_rate": 4.344449626865672e-05, + "loss": 0.0, + "step": 56220 + }, + { + "epoch": 52.45, + "learning_rate": 4.344402985074627e-05, + "loss": 0.0009, + "step": 56224 + }, + { + "epoch": 52.45, + "learning_rate": 4.344356343283582e-05, + "loss": 0.0008, + "step": 56228 + }, + { + "epoch": 52.46, + "learning_rate": 4.3443097014925374e-05, + "loss": 0.0, + "step": 56232 + }, + { + "epoch": 52.46, + "learning_rate": 4.344263059701493e-05, + "loss": 0.0014, + "step": 56236 + }, + { + "epoch": 52.46, + "learning_rate": 4.344216417910448e-05, + "loss": 0.0, + "step": 56240 + }, + { + "epoch": 52.47, + "learning_rate": 4.344169776119403e-05, + "loss": 0.0, + "step": 56244 + }, + { + "epoch": 52.47, + "learning_rate": 4.3441231343283587e-05, + "loss": 0.0002, + "step": 56248 + }, + { + "epoch": 52.47, + "learning_rate": 4.3440764925373135e-05, + "loss": 0.0, + "step": 56252 + }, + { + "epoch": 52.48, + "learning_rate": 4.344029850746269e-05, + "loss": 0.0, + "step": 56256 + }, + { + "epoch": 52.48, + "learning_rate": 4.343983208955224e-05, + "loss": 0.0002, + "step": 56260 + }, + { + "epoch": 52.49, + "learning_rate": 4.343936567164179e-05, + "loss": 0.0, + "step": 56264 + }, + { + "epoch": 52.49, + "learning_rate": 4.343889925373135e-05, + "loss": 0.0004, + "step": 56268 + }, + { + "epoch": 52.49, + "learning_rate": 4.3438432835820896e-05, + "loss": 0.0, + "step": 56272 + }, + { + "epoch": 52.5, + "learning_rate": 4.343796641791045e-05, + "loss": 0.0, + "step": 56276 + }, + { + "epoch": 52.5, + "learning_rate": 4.3437500000000006e-05, + "loss": 0.0, + "step": 56280 + }, + { + "epoch": 52.5, + "learning_rate": 4.3437033582089554e-05, + "loss": 0.0, + "step": 56284 + }, + { + "epoch": 52.51, + "learning_rate": 4.34365671641791e-05, + "loss": 0.0, + "step": 56288 + }, + { + "epoch": 52.51, + "learning_rate": 4.343610074626866e-05, + "loss": 0.0, + "step": 56292 + }, + { + "epoch": 52.51, + "learning_rate": 4.343563432835821e-05, + "loss": 0.0, + "step": 56296 + }, + { + "epoch": 52.52, + "learning_rate": 4.3435167910447766e-05, + "loss": 0.0, + "step": 56300 + }, + { + "epoch": 52.52, + "learning_rate": 4.3434701492537315e-05, + "loss": 0.0, + "step": 56304 + }, + { + "epoch": 52.53, + "learning_rate": 4.343423507462687e-05, + "loss": 0.0, + "step": 56308 + }, + { + "epoch": 52.53, + "learning_rate": 4.3433768656716424e-05, + "loss": 0.0, + "step": 56312 + }, + { + "epoch": 52.53, + "learning_rate": 4.343330223880597e-05, + "loss": 0.0, + "step": 56316 + }, + { + "epoch": 52.54, + "learning_rate": 4.343283582089552e-05, + "loss": 0.0002, + "step": 56320 + }, + { + "epoch": 52.54, + "learning_rate": 4.343236940298508e-05, + "loss": 0.0, + "step": 56324 + }, + { + "epoch": 52.54, + "learning_rate": 4.343190298507463e-05, + "loss": 0.0004, + "step": 56328 + }, + { + "epoch": 52.55, + "learning_rate": 4.343143656716418e-05, + "loss": 0.0, + "step": 56332 + }, + { + "epoch": 52.55, + "learning_rate": 4.3430970149253734e-05, + "loss": 0.0, + "step": 56336 + }, + { + "epoch": 52.56, + "learning_rate": 4.343050373134329e-05, + "loss": 0.0, + "step": 56340 + }, + { + "epoch": 52.56, + "learning_rate": 4.3430037313432837e-05, + "loss": 0.0, + "step": 56344 + }, + { + "epoch": 52.56, + "learning_rate": 4.342957089552239e-05, + "loss": 0.0, + "step": 56348 + }, + { + "epoch": 52.57, + "learning_rate": 4.342910447761194e-05, + "loss": 0.0, + "step": 56352 + }, + { + "epoch": 52.57, + "learning_rate": 4.3428638059701495e-05, + "loss": 0.0, + "step": 56356 + }, + { + "epoch": 52.57, + "learning_rate": 4.342817164179105e-05, + "loss": 0.0, + "step": 56360 + }, + { + "epoch": 52.58, + "learning_rate": 4.34277052238806e-05, + "loss": 0.0, + "step": 56364 + }, + { + "epoch": 52.58, + "learning_rate": 4.342723880597015e-05, + "loss": 0.0, + "step": 56368 + }, + { + "epoch": 52.59, + "learning_rate": 4.342677238805971e-05, + "loss": 0.0, + "step": 56372 + }, + { + "epoch": 52.59, + "learning_rate": 4.3426305970149256e-05, + "loss": 0.0, + "step": 56376 + }, + { + "epoch": 52.59, + "learning_rate": 4.3425839552238804e-05, + "loss": 0.0, + "step": 56380 + }, + { + "epoch": 52.6, + "learning_rate": 4.3425373134328365e-05, + "loss": 0.0004, + "step": 56384 + }, + { + "epoch": 52.6, + "learning_rate": 4.3424906716417913e-05, + "loss": 0.0, + "step": 56388 + }, + { + "epoch": 52.6, + "learning_rate": 4.342444029850746e-05, + "loss": 0.0001, + "step": 56392 + }, + { + "epoch": 52.61, + "learning_rate": 4.3423973880597017e-05, + "loss": 0.0, + "step": 56396 + }, + { + "epoch": 52.61, + "learning_rate": 4.342350746268657e-05, + "loss": 0.0002, + "step": 56400 + }, + { + "epoch": 52.62, + "learning_rate": 4.342304104477612e-05, + "loss": 0.0004, + "step": 56404 + }, + { + "epoch": 52.62, + "learning_rate": 4.3422574626865674e-05, + "loss": 0.0007, + "step": 56408 + }, + { + "epoch": 52.62, + "learning_rate": 4.342210820895522e-05, + "loss": 0.0, + "step": 56412 + }, + { + "epoch": 52.63, + "learning_rate": 4.342164179104478e-05, + "loss": 0.0, + "step": 56416 + }, + { + "epoch": 52.63, + "learning_rate": 4.342117537313433e-05, + "loss": 0.0, + "step": 56420 + }, + { + "epoch": 52.63, + "learning_rate": 4.342070895522388e-05, + "loss": 0.0, + "step": 56424 + }, + { + "epoch": 52.64, + "learning_rate": 4.3420242537313435e-05, + "loss": 0.0016, + "step": 56428 + }, + { + "epoch": 52.64, + "learning_rate": 4.341977611940299e-05, + "loss": 0.0, + "step": 56432 + }, + { + "epoch": 52.65, + "learning_rate": 4.341930970149254e-05, + "loss": 0.0, + "step": 56436 + }, + { + "epoch": 52.65, + "learning_rate": 4.341884328358209e-05, + "loss": 0.0006, + "step": 56440 + }, + { + "epoch": 52.65, + "learning_rate": 4.341837686567165e-05, + "loss": 0.0004, + "step": 56444 + }, + { + "epoch": 52.66, + "learning_rate": 4.3417910447761196e-05, + "loss": 0.0, + "step": 56448 + }, + { + "epoch": 52.66, + "learning_rate": 4.3417444029850745e-05, + "loss": 0.0003, + "step": 56452 + }, + { + "epoch": 52.66, + "learning_rate": 4.34169776119403e-05, + "loss": 0.0, + "step": 56456 + }, + { + "epoch": 52.67, + "learning_rate": 4.3416511194029854e-05, + "loss": 0.0, + "step": 56460 + }, + { + "epoch": 52.67, + "learning_rate": 4.341604477611941e-05, + "loss": 0.0, + "step": 56464 + }, + { + "epoch": 52.68, + "learning_rate": 4.341557835820896e-05, + "loss": 0.0001, + "step": 56468 + }, + { + "epoch": 52.68, + "learning_rate": 4.3415111940298506e-05, + "loss": 0.0, + "step": 56472 + }, + { + "epoch": 52.68, + "learning_rate": 4.341464552238807e-05, + "loss": 0.0, + "step": 56476 + }, + { + "epoch": 52.69, + "learning_rate": 4.3414179104477615e-05, + "loss": 0.0, + "step": 56480 + }, + { + "epoch": 52.69, + "learning_rate": 4.3413712686567163e-05, + "loss": 0.0, + "step": 56484 + }, + { + "epoch": 52.69, + "learning_rate": 4.341324626865672e-05, + "loss": 0.0, + "step": 56488 + }, + { + "epoch": 52.7, + "learning_rate": 4.341277985074627e-05, + "loss": 0.0007, + "step": 56492 + }, + { + "epoch": 52.7, + "learning_rate": 4.341231343283582e-05, + "loss": 0.0, + "step": 56496 + }, + { + "epoch": 52.71, + "learning_rate": 4.3411847014925376e-05, + "loss": 0.0, + "step": 56500 + }, + { + "epoch": 52.71, + "eval_exact_match": 0.7398452611218569, + "eval_exec": 0.7688588007736944, + "eval_loss": 0.48737451434135437, + "eval_runtime": 1152.9321, + "eval_samples_per_second": 0.897, + "step": 56500 + }, + { + "epoch": 52.71, + "learning_rate": 4.341138059701493e-05, + "loss": 0.0008, + "step": 56504 + }, + { + "epoch": 52.71, + "learning_rate": 4.341091417910448e-05, + "loss": 0.0, + "step": 56508 + }, + { + "epoch": 52.72, + "learning_rate": 4.3410447761194034e-05, + "loss": 0.0, + "step": 56512 + }, + { + "epoch": 52.72, + "learning_rate": 4.340998134328358e-05, + "loss": 0.0, + "step": 56516 + }, + { + "epoch": 52.72, + "learning_rate": 4.340951492537314e-05, + "loss": 0.0002, + "step": 56520 + }, + { + "epoch": 52.73, + "learning_rate": 4.340904850746269e-05, + "loss": 0.0, + "step": 56524 + }, + { + "epoch": 52.73, + "learning_rate": 4.340858208955224e-05, + "loss": 0.0, + "step": 56528 + }, + { + "epoch": 52.73, + "learning_rate": 4.340811567164179e-05, + "loss": 0.0, + "step": 56532 + }, + { + "epoch": 52.74, + "learning_rate": 4.340764925373135e-05, + "loss": 0.0022, + "step": 56536 + }, + { + "epoch": 52.74, + "learning_rate": 4.34071828358209e-05, + "loss": 0.0025, + "step": 56540 + }, + { + "epoch": 52.75, + "learning_rate": 4.3406716417910446e-05, + "loss": 0.0, + "step": 56544 + }, + { + "epoch": 52.75, + "learning_rate": 4.340625e-05, + "loss": 0.0001, + "step": 56548 + }, + { + "epoch": 52.75, + "learning_rate": 4.3405783582089556e-05, + "loss": 0.0001, + "step": 56552 + }, + { + "epoch": 52.76, + "learning_rate": 4.3405317164179104e-05, + "loss": 0.0001, + "step": 56556 + }, + { + "epoch": 52.76, + "learning_rate": 4.340485074626866e-05, + "loss": 0.0003, + "step": 56560 + }, + { + "epoch": 52.76, + "learning_rate": 4.340438432835821e-05, + "loss": 0.0004, + "step": 56564 + }, + { + "epoch": 52.77, + "learning_rate": 4.340391791044776e-05, + "loss": 0.0, + "step": 56568 + }, + { + "epoch": 52.77, + "learning_rate": 4.340345149253732e-05, + "loss": 0.0, + "step": 56572 + }, + { + "epoch": 52.78, + "learning_rate": 4.3402985074626865e-05, + "loss": 0.0004, + "step": 56576 + }, + { + "epoch": 52.78, + "learning_rate": 4.340251865671642e-05, + "loss": 0.0, + "step": 56580 + }, + { + "epoch": 52.78, + "learning_rate": 4.3402052238805975e-05, + "loss": 0.0, + "step": 56584 + }, + { + "epoch": 52.79, + "learning_rate": 4.340158582089552e-05, + "loss": 0.0001, + "step": 56588 + }, + { + "epoch": 52.79, + "learning_rate": 4.340111940298507e-05, + "loss": 0.0, + "step": 56592 + }, + { + "epoch": 52.79, + "learning_rate": 4.340065298507463e-05, + "loss": 0.0, + "step": 56596 + }, + { + "epoch": 52.8, + "learning_rate": 4.340018656716418e-05, + "loss": 0.0, + "step": 56600 + }, + { + "epoch": 52.8, + "learning_rate": 4.339972014925373e-05, + "loss": 0.0003, + "step": 56604 + }, + { + "epoch": 52.81, + "learning_rate": 4.3399253731343284e-05, + "loss": 0.0002, + "step": 56608 + }, + { + "epoch": 52.81, + "learning_rate": 4.339878731343284e-05, + "loss": 0.0009, + "step": 56612 + }, + { + "epoch": 52.81, + "learning_rate": 4.339832089552239e-05, + "loss": 0.0001, + "step": 56616 + }, + { + "epoch": 52.82, + "learning_rate": 4.339785447761194e-05, + "loss": 0.0009, + "step": 56620 + }, + { + "epoch": 52.82, + "learning_rate": 4.339738805970149e-05, + "loss": 0.0001, + "step": 56624 + }, + { + "epoch": 52.82, + "learning_rate": 4.339692164179105e-05, + "loss": 0.0, + "step": 56628 + }, + { + "epoch": 52.83, + "learning_rate": 4.33964552238806e-05, + "loss": 0.0001, + "step": 56632 + }, + { + "epoch": 52.83, + "learning_rate": 4.339598880597015e-05, + "loss": 0.0005, + "step": 56636 + }, + { + "epoch": 52.84, + "learning_rate": 4.33955223880597e-05, + "loss": 0.0, + "step": 56640 + }, + { + "epoch": 52.84, + "learning_rate": 4.339505597014926e-05, + "loss": 0.0, + "step": 56644 + }, + { + "epoch": 52.84, + "learning_rate": 4.3394589552238806e-05, + "loss": 0.0001, + "step": 56648 + }, + { + "epoch": 52.85, + "learning_rate": 4.339412313432836e-05, + "loss": 0.0001, + "step": 56652 + }, + { + "epoch": 52.85, + "learning_rate": 4.3393656716417916e-05, + "loss": 0.0003, + "step": 56656 + }, + { + "epoch": 52.85, + "learning_rate": 4.3393190298507464e-05, + "loss": 0.0084, + "step": 56660 + }, + { + "epoch": 52.86, + "learning_rate": 4.339272388059702e-05, + "loss": 0.0001, + "step": 56664 + }, + { + "epoch": 52.86, + "learning_rate": 4.339225746268657e-05, + "loss": 0.0, + "step": 56668 + }, + { + "epoch": 52.87, + "learning_rate": 4.339179104477612e-05, + "loss": 0.0, + "step": 56672 + }, + { + "epoch": 52.87, + "learning_rate": 4.339132462686568e-05, + "loss": 0.0, + "step": 56676 + }, + { + "epoch": 52.87, + "learning_rate": 4.3390858208955225e-05, + "loss": 0.0, + "step": 56680 + }, + { + "epoch": 52.88, + "learning_rate": 4.339039179104477e-05, + "loss": 0.0, + "step": 56684 + }, + { + "epoch": 52.88, + "learning_rate": 4.3389925373134335e-05, + "loss": 0.0, + "step": 56688 + }, + { + "epoch": 52.88, + "learning_rate": 4.338945895522388e-05, + "loss": 0.0, + "step": 56692 + }, + { + "epoch": 52.89, + "learning_rate": 4.338899253731343e-05, + "loss": 0.0035, + "step": 56696 + }, + { + "epoch": 52.89, + "learning_rate": 4.3388526119402986e-05, + "loss": 0.0, + "step": 56700 + }, + { + "epoch": 52.9, + "learning_rate": 4.338805970149254e-05, + "loss": 0.0001, + "step": 56704 + }, + { + "epoch": 52.9, + "learning_rate": 4.338759328358209e-05, + "loss": 0.0, + "step": 56708 + }, + { + "epoch": 52.9, + "learning_rate": 4.3387126865671644e-05, + "loss": 0.0, + "step": 56712 + }, + { + "epoch": 52.91, + "learning_rate": 4.33866604477612e-05, + "loss": 0.0005, + "step": 56716 + }, + { + "epoch": 52.91, + "learning_rate": 4.338619402985075e-05, + "loss": 0.0, + "step": 56720 + }, + { + "epoch": 52.91, + "learning_rate": 4.33857276119403e-05, + "loss": 0.0, + "step": 56724 + }, + { + "epoch": 52.92, + "learning_rate": 4.338526119402985e-05, + "loss": 0.0, + "step": 56728 + }, + { + "epoch": 52.92, + "learning_rate": 4.3384794776119405e-05, + "loss": 0.0001, + "step": 56732 + }, + { + "epoch": 52.93, + "learning_rate": 4.338432835820896e-05, + "loss": 0.0007, + "step": 56736 + }, + { + "epoch": 52.93, + "learning_rate": 4.338386194029851e-05, + "loss": 0.0, + "step": 56740 + }, + { + "epoch": 52.93, + "learning_rate": 4.3383395522388056e-05, + "loss": 0.0088, + "step": 56744 + }, + { + "epoch": 52.94, + "learning_rate": 4.338292910447762e-05, + "loss": 0.0, + "step": 56748 + }, + { + "epoch": 52.94, + "learning_rate": 4.3382462686567166e-05, + "loss": 0.0001, + "step": 56752 + }, + { + "epoch": 52.94, + "learning_rate": 4.3381996268656714e-05, + "loss": 0.0002, + "step": 56756 + }, + { + "epoch": 52.95, + "learning_rate": 4.338152985074627e-05, + "loss": 0.0001, + "step": 56760 + }, + { + "epoch": 52.95, + "learning_rate": 4.3381063432835824e-05, + "loss": 0.0003, + "step": 56764 + }, + { + "epoch": 52.96, + "learning_rate": 4.338059701492537e-05, + "loss": 0.0, + "step": 56768 + }, + { + "epoch": 52.96, + "learning_rate": 4.338013059701493e-05, + "loss": 0.0, + "step": 56772 + }, + { + "epoch": 52.96, + "learning_rate": 4.337966417910448e-05, + "loss": 0.0, + "step": 56776 + }, + { + "epoch": 52.97, + "learning_rate": 4.337919776119403e-05, + "loss": 0.0056, + "step": 56780 + }, + { + "epoch": 52.97, + "learning_rate": 4.3378731343283585e-05, + "loss": 0.0001, + "step": 56784 + }, + { + "epoch": 52.97, + "learning_rate": 4.337826492537313e-05, + "loss": 0.0, + "step": 56788 + }, + { + "epoch": 52.98, + "learning_rate": 4.337779850746269e-05, + "loss": 0.0008, + "step": 56792 + }, + { + "epoch": 52.98, + "learning_rate": 4.337733208955224e-05, + "loss": 0.0007, + "step": 56796 + }, + { + "epoch": 52.98, + "learning_rate": 4.337686567164179e-05, + "loss": 0.0002, + "step": 56800 + }, + { + "epoch": 52.99, + "learning_rate": 4.3376399253731346e-05, + "loss": 0.0001, + "step": 56804 + }, + { + "epoch": 52.99, + "learning_rate": 4.33759328358209e-05, + "loss": 0.0, + "step": 56808 + }, + { + "epoch": 53.0, + "learning_rate": 4.337546641791045e-05, + "loss": 0.0, + "step": 56812 + }, + { + "epoch": 53.0, + "learning_rate": 4.3375000000000004e-05, + "loss": 0.0005, + "step": 56816 + }, + { + "epoch": 53.0, + "learning_rate": 4.337453358208955e-05, + "loss": 0.0018, + "step": 56820 + }, + { + "epoch": 53.01, + "learning_rate": 4.337406716417911e-05, + "loss": 0.0017, + "step": 56824 + }, + { + "epoch": 53.01, + "learning_rate": 4.337360074626866e-05, + "loss": 0.0, + "step": 56828 + }, + { + "epoch": 53.01, + "learning_rate": 4.337313432835821e-05, + "loss": 0.0003, + "step": 56832 + }, + { + "epoch": 53.02, + "learning_rate": 4.3372667910447765e-05, + "loss": 0.0, + "step": 56836 + }, + { + "epoch": 53.02, + "learning_rate": 4.337220149253732e-05, + "loss": 0.0007, + "step": 56840 + }, + { + "epoch": 53.03, + "learning_rate": 4.337173507462687e-05, + "loss": 0.0, + "step": 56844 + }, + { + "epoch": 53.03, + "learning_rate": 4.3371268656716416e-05, + "loss": 0.0001, + "step": 56848 + }, + { + "epoch": 53.03, + "learning_rate": 4.337080223880597e-05, + "loss": 0.0, + "step": 56852 + }, + { + "epoch": 53.04, + "learning_rate": 4.3370335820895526e-05, + "loss": 0.0, + "step": 56856 + }, + { + "epoch": 53.04, + "learning_rate": 4.3369869402985074e-05, + "loss": 0.0003, + "step": 56860 + }, + { + "epoch": 53.04, + "learning_rate": 4.336940298507463e-05, + "loss": 0.0, + "step": 56864 + }, + { + "epoch": 53.05, + "learning_rate": 4.3368936567164184e-05, + "loss": 0.0, + "step": 56868 + }, + { + "epoch": 53.05, + "learning_rate": 4.336847014925373e-05, + "loss": 0.0, + "step": 56872 + }, + { + "epoch": 53.06, + "learning_rate": 4.336800373134329e-05, + "loss": 0.0, + "step": 56876 + }, + { + "epoch": 53.06, + "learning_rate": 4.3367537313432835e-05, + "loss": 0.0, + "step": 56880 + }, + { + "epoch": 53.06, + "learning_rate": 4.336707089552239e-05, + "loss": 0.0, + "step": 56884 + }, + { + "epoch": 53.07, + "learning_rate": 4.3366604477611945e-05, + "loss": 0.0, + "step": 56888 + }, + { + "epoch": 53.07, + "learning_rate": 4.336613805970149e-05, + "loss": 0.0, + "step": 56892 + }, + { + "epoch": 53.07, + "learning_rate": 4.336567164179105e-05, + "loss": 0.0, + "step": 56896 + }, + { + "epoch": 53.08, + "learning_rate": 4.33652052238806e-05, + "loss": 0.0, + "step": 56900 + }, + { + "epoch": 53.08, + "learning_rate": 4.336473880597015e-05, + "loss": 0.0008, + "step": 56904 + }, + { + "epoch": 53.09, + "learning_rate": 4.33642723880597e-05, + "loss": 0.0002, + "step": 56908 + }, + { + "epoch": 53.09, + "learning_rate": 4.3363805970149254e-05, + "loss": 0.0, + "step": 56912 + }, + { + "epoch": 53.09, + "learning_rate": 4.336333955223881e-05, + "loss": 0.0, + "step": 56916 + }, + { + "epoch": 53.1, + "learning_rate": 4.336287313432836e-05, + "loss": 0.0034, + "step": 56920 + }, + { + "epoch": 53.1, + "learning_rate": 4.336240671641791e-05, + "loss": 0.0001, + "step": 56924 + }, + { + "epoch": 53.1, + "learning_rate": 4.336194029850747e-05, + "loss": 0.0, + "step": 56928 + }, + { + "epoch": 53.11, + "learning_rate": 4.3361473880597015e-05, + "loss": 0.0, + "step": 56932 + }, + { + "epoch": 53.11, + "learning_rate": 4.336100746268657e-05, + "loss": 0.0001, + "step": 56936 + }, + { + "epoch": 53.12, + "learning_rate": 4.336054104477612e-05, + "loss": 0.0013, + "step": 56940 + }, + { + "epoch": 53.12, + "learning_rate": 4.336007462686567e-05, + "loss": 0.0, + "step": 56944 + }, + { + "epoch": 53.12, + "learning_rate": 4.335960820895523e-05, + "loss": 0.0, + "step": 56948 + }, + { + "epoch": 53.13, + "learning_rate": 4.3359141791044776e-05, + "loss": 0.0006, + "step": 56952 + }, + { + "epoch": 53.13, + "learning_rate": 4.335867537313433e-05, + "loss": 0.0, + "step": 56956 + }, + { + "epoch": 53.13, + "learning_rate": 4.3358208955223886e-05, + "loss": 0.0, + "step": 56960 + }, + { + "epoch": 53.14, + "learning_rate": 4.3357742537313434e-05, + "loss": 0.0, + "step": 56964 + }, + { + "epoch": 53.14, + "learning_rate": 4.335727611940299e-05, + "loss": 0.0, + "step": 56968 + }, + { + "epoch": 53.15, + "learning_rate": 4.335680970149254e-05, + "loss": 0.0, + "step": 56972 + }, + { + "epoch": 53.15, + "learning_rate": 4.335634328358209e-05, + "loss": 0.0, + "step": 56976 + }, + { + "epoch": 53.15, + "learning_rate": 4.335587686567165e-05, + "loss": 0.0003, + "step": 56980 + }, + { + "epoch": 53.16, + "learning_rate": 4.3355410447761195e-05, + "loss": 0.0, + "step": 56984 + }, + { + "epoch": 53.16, + "learning_rate": 4.335494402985075e-05, + "loss": 0.0003, + "step": 56988 + }, + { + "epoch": 53.16, + "learning_rate": 4.3354477611940305e-05, + "loss": 0.0, + "step": 56992 + }, + { + "epoch": 53.17, + "learning_rate": 4.335401119402985e-05, + "loss": 0.0, + "step": 56996 + }, + { + "epoch": 53.17, + "learning_rate": 4.33535447761194e-05, + "loss": 0.0, + "step": 57000 + }, + { + "epoch": 53.17, + "eval_exact_match": 0.7311411992263056, + "eval_exec": 0.7756286266924565, + "eval_loss": 0.4817250370979309, + "eval_runtime": 1118.231, + "eval_samples_per_second": 0.925, + "step": 57000 + }, + { + "epoch": 53.18, + "learning_rate": 4.335307835820896e-05, + "loss": 0.0001, + "step": 57004 + }, + { + "epoch": 53.18, + "learning_rate": 4.335261194029851e-05, + "loss": 0.0, + "step": 57008 + }, + { + "epoch": 53.18, + "learning_rate": 4.335214552238806e-05, + "loss": 0.0, + "step": 57012 + }, + { + "epoch": 53.19, + "learning_rate": 4.3351679104477614e-05, + "loss": 0.0, + "step": 57016 + }, + { + "epoch": 53.19, + "learning_rate": 4.335121268656717e-05, + "loss": 0.0, + "step": 57020 + }, + { + "epoch": 53.19, + "learning_rate": 4.335074626865672e-05, + "loss": 0.0001, + "step": 57024 + }, + { + "epoch": 53.2, + "learning_rate": 4.335027985074627e-05, + "loss": 0.0001, + "step": 57028 + }, + { + "epoch": 53.2, + "learning_rate": 4.334981343283582e-05, + "loss": 0.0, + "step": 57032 + }, + { + "epoch": 53.21, + "learning_rate": 4.3349347014925375e-05, + "loss": 0.0, + "step": 57036 + }, + { + "epoch": 53.21, + "learning_rate": 4.334888059701493e-05, + "loss": 0.0, + "step": 57040 + }, + { + "epoch": 53.21, + "learning_rate": 4.334841417910448e-05, + "loss": 0.0, + "step": 57044 + }, + { + "epoch": 53.22, + "learning_rate": 4.334794776119403e-05, + "loss": 0.0, + "step": 57048 + }, + { + "epoch": 53.22, + "learning_rate": 4.334748134328359e-05, + "loss": 0.0031, + "step": 57052 + }, + { + "epoch": 53.22, + "learning_rate": 4.3347014925373136e-05, + "loss": 0.0001, + "step": 57056 + }, + { + "epoch": 53.23, + "learning_rate": 4.3346548507462684e-05, + "loss": 0.0, + "step": 57060 + }, + { + "epoch": 53.23, + "learning_rate": 4.3346082089552246e-05, + "loss": 0.0117, + "step": 57064 + }, + { + "epoch": 53.24, + "learning_rate": 4.3345615671641794e-05, + "loss": 0.0, + "step": 57068 + }, + { + "epoch": 53.24, + "learning_rate": 4.334514925373134e-05, + "loss": 0.0, + "step": 57072 + }, + { + "epoch": 53.24, + "learning_rate": 4.33446828358209e-05, + "loss": 0.0, + "step": 57076 + }, + { + "epoch": 53.25, + "learning_rate": 4.334421641791045e-05, + "loss": 0.0, + "step": 57080 + }, + { + "epoch": 53.25, + "learning_rate": 4.334375e-05, + "loss": 0.0, + "step": 57084 + }, + { + "epoch": 53.25, + "learning_rate": 4.3343283582089555e-05, + "loss": 0.0006, + "step": 57088 + }, + { + "epoch": 53.26, + "learning_rate": 4.33428171641791e-05, + "loss": 0.0, + "step": 57092 + }, + { + "epoch": 53.26, + "learning_rate": 4.334235074626866e-05, + "loss": 0.0, + "step": 57096 + }, + { + "epoch": 53.26, + "learning_rate": 4.334188432835821e-05, + "loss": 0.0007, + "step": 57100 + }, + { + "epoch": 53.27, + "learning_rate": 4.334141791044776e-05, + "loss": 0.0014, + "step": 57104 + }, + { + "epoch": 53.27, + "learning_rate": 4.3340951492537316e-05, + "loss": 0.0, + "step": 57108 + }, + { + "epoch": 53.28, + "learning_rate": 4.334048507462687e-05, + "loss": 0.0001, + "step": 57112 + }, + { + "epoch": 53.28, + "learning_rate": 4.334001865671642e-05, + "loss": 0.0, + "step": 57116 + }, + { + "epoch": 53.28, + "learning_rate": 4.3339552238805974e-05, + "loss": 0.0065, + "step": 57120 + }, + { + "epoch": 53.29, + "learning_rate": 4.333908582089553e-05, + "loss": 0.0, + "step": 57124 + }, + { + "epoch": 53.29, + "learning_rate": 4.333861940298508e-05, + "loss": 0.0, + "step": 57128 + }, + { + "epoch": 53.29, + "learning_rate": 4.333815298507463e-05, + "loss": 0.0, + "step": 57132 + }, + { + "epoch": 53.3, + "learning_rate": 4.333768656716418e-05, + "loss": 0.0, + "step": 57136 + }, + { + "epoch": 53.3, + "learning_rate": 4.3337220149253735e-05, + "loss": 0.0, + "step": 57140 + }, + { + "epoch": 53.31, + "learning_rate": 4.333675373134329e-05, + "loss": 0.0, + "step": 57144 + }, + { + "epoch": 53.31, + "learning_rate": 4.333628731343284e-05, + "loss": 0.0001, + "step": 57148 + }, + { + "epoch": 53.31, + "learning_rate": 4.3335820895522386e-05, + "loss": 0.0, + "step": 57152 + }, + { + "epoch": 53.32, + "learning_rate": 4.333535447761195e-05, + "loss": 0.0005, + "step": 57156 + }, + { + "epoch": 53.32, + "learning_rate": 4.3334888059701496e-05, + "loss": 0.0, + "step": 57160 + }, + { + "epoch": 53.32, + "learning_rate": 4.3334421641791044e-05, + "loss": 0.0, + "step": 57164 + }, + { + "epoch": 53.33, + "learning_rate": 4.33339552238806e-05, + "loss": 0.0234, + "step": 57168 + }, + { + "epoch": 53.33, + "learning_rate": 4.3333488805970154e-05, + "loss": 0.0, + "step": 57172 + }, + { + "epoch": 53.34, + "learning_rate": 4.33330223880597e-05, + "loss": 0.0, + "step": 57176 + }, + { + "epoch": 53.34, + "learning_rate": 4.333255597014926e-05, + "loss": 0.0001, + "step": 57180 + }, + { + "epoch": 53.34, + "learning_rate": 4.333208955223881e-05, + "loss": 0.0041, + "step": 57184 + }, + { + "epoch": 53.35, + "learning_rate": 4.333162313432836e-05, + "loss": 0.0, + "step": 57188 + }, + { + "epoch": 53.35, + "learning_rate": 4.3331156716417915e-05, + "loss": 0.0, + "step": 57192 + }, + { + "epoch": 53.35, + "learning_rate": 4.333069029850746e-05, + "loss": 0.0, + "step": 57196 + }, + { + "epoch": 53.36, + "learning_rate": 4.333022388059702e-05, + "loss": 0.0, + "step": 57200 + }, + { + "epoch": 53.36, + "learning_rate": 4.332975746268657e-05, + "loss": 0.0001, + "step": 57204 + }, + { + "epoch": 53.37, + "learning_rate": 4.332929104477612e-05, + "loss": 0.0, + "step": 57208 + }, + { + "epoch": 53.37, + "learning_rate": 4.332882462686567e-05, + "loss": 0.0001, + "step": 57212 + }, + { + "epoch": 53.37, + "learning_rate": 4.332835820895523e-05, + "loss": 0.0, + "step": 57216 + }, + { + "epoch": 53.38, + "learning_rate": 4.332789179104478e-05, + "loss": 0.0005, + "step": 57220 + }, + { + "epoch": 53.38, + "learning_rate": 4.332742537313433e-05, + "loss": 0.0, + "step": 57224 + }, + { + "epoch": 53.38, + "learning_rate": 4.332695895522388e-05, + "loss": 0.0, + "step": 57228 + }, + { + "epoch": 53.39, + "learning_rate": 4.3326492537313437e-05, + "loss": 0.0003, + "step": 57232 + }, + { + "epoch": 53.39, + "learning_rate": 4.3326026119402985e-05, + "loss": 0.0, + "step": 57236 + }, + { + "epoch": 53.4, + "learning_rate": 4.332555970149254e-05, + "loss": 0.0, + "step": 57240 + }, + { + "epoch": 53.4, + "learning_rate": 4.332509328358209e-05, + "loss": 0.0, + "step": 57244 + }, + { + "epoch": 53.4, + "learning_rate": 4.332462686567164e-05, + "loss": 0.0, + "step": 57248 + }, + { + "epoch": 53.41, + "learning_rate": 4.33241604477612e-05, + "loss": 0.0, + "step": 57252 + }, + { + "epoch": 53.41, + "learning_rate": 4.3323694029850746e-05, + "loss": 0.0, + "step": 57256 + }, + { + "epoch": 53.41, + "learning_rate": 4.33232276119403e-05, + "loss": 0.0, + "step": 57260 + }, + { + "epoch": 53.42, + "learning_rate": 4.3322761194029855e-05, + "loss": 0.0, + "step": 57264 + }, + { + "epoch": 53.42, + "learning_rate": 4.3322294776119404e-05, + "loss": 0.0, + "step": 57268 + }, + { + "epoch": 53.43, + "learning_rate": 4.332182835820895e-05, + "loss": 0.0, + "step": 57272 + }, + { + "epoch": 53.43, + "learning_rate": 4.3321361940298513e-05, + "loss": 0.0, + "step": 57276 + }, + { + "epoch": 53.43, + "learning_rate": 4.332089552238806e-05, + "loss": 0.0, + "step": 57280 + }, + { + "epoch": 53.44, + "learning_rate": 4.3320429104477616e-05, + "loss": 0.0, + "step": 57284 + }, + { + "epoch": 53.44, + "learning_rate": 4.3319962686567165e-05, + "loss": 0.0, + "step": 57288 + }, + { + "epoch": 53.44, + "learning_rate": 4.331949626865672e-05, + "loss": 0.0, + "step": 57292 + }, + { + "epoch": 53.45, + "learning_rate": 4.3319029850746274e-05, + "loss": 0.0, + "step": 57296 + }, + { + "epoch": 53.45, + "learning_rate": 4.331856343283582e-05, + "loss": 0.0, + "step": 57300 + }, + { + "epoch": 53.46, + "learning_rate": 4.331809701492537e-05, + "loss": 0.0, + "step": 57304 + }, + { + "epoch": 53.46, + "learning_rate": 4.331763059701493e-05, + "loss": 0.0023, + "step": 57308 + }, + { + "epoch": 53.46, + "learning_rate": 4.331716417910448e-05, + "loss": 0.0001, + "step": 57312 + }, + { + "epoch": 53.47, + "learning_rate": 4.331669776119403e-05, + "loss": 0.0, + "step": 57316 + }, + { + "epoch": 53.47, + "learning_rate": 4.3316231343283584e-05, + "loss": 0.0006, + "step": 57320 + }, + { + "epoch": 53.47, + "learning_rate": 4.331576492537314e-05, + "loss": 0.0091, + "step": 57324 + }, + { + "epoch": 53.48, + "learning_rate": 4.3315298507462687e-05, + "loss": 0.0, + "step": 57328 + }, + { + "epoch": 53.48, + "learning_rate": 4.331483208955224e-05, + "loss": 0.0, + "step": 57332 + }, + { + "epoch": 53.49, + "learning_rate": 4.3314365671641796e-05, + "loss": 0.0, + "step": 57336 + }, + { + "epoch": 53.49, + "learning_rate": 4.3313899253731345e-05, + "loss": 0.0, + "step": 57340 + }, + { + "epoch": 53.49, + "learning_rate": 4.33134328358209e-05, + "loss": 0.0002, + "step": 57344 + }, + { + "epoch": 53.5, + "learning_rate": 4.331296641791045e-05, + "loss": 0.0, + "step": 57348 + }, + { + "epoch": 53.5, + "learning_rate": 4.33125e-05, + "loss": 0.0, + "step": 57352 + }, + { + "epoch": 53.5, + "learning_rate": 4.331203358208956e-05, + "loss": 0.0022, + "step": 57356 + }, + { + "epoch": 53.51, + "learning_rate": 4.3311567164179106e-05, + "loss": 0.0, + "step": 57360 + }, + { + "epoch": 53.51, + "learning_rate": 4.3311100746268654e-05, + "loss": 0.0, + "step": 57364 + }, + { + "epoch": 53.51, + "learning_rate": 4.3310634328358215e-05, + "loss": 0.0, + "step": 57368 + }, + { + "epoch": 53.52, + "learning_rate": 4.3310167910447763e-05, + "loss": 0.0001, + "step": 57372 + }, + { + "epoch": 53.52, + "learning_rate": 4.330970149253731e-05, + "loss": 0.0, + "step": 57376 + }, + { + "epoch": 53.53, + "learning_rate": 4.3309235074626866e-05, + "loss": 0.0, + "step": 57380 + }, + { + "epoch": 53.53, + "learning_rate": 4.330876865671642e-05, + "loss": 0.0, + "step": 57384 + }, + { + "epoch": 53.53, + "learning_rate": 4.330830223880597e-05, + "loss": 0.002, + "step": 57388 + }, + { + "epoch": 53.54, + "learning_rate": 4.3307835820895524e-05, + "loss": 0.0001, + "step": 57392 + }, + { + "epoch": 53.54, + "learning_rate": 4.330736940298508e-05, + "loss": 0.0001, + "step": 57396 + }, + { + "epoch": 53.54, + "learning_rate": 4.330690298507463e-05, + "loss": 0.0, + "step": 57400 + }, + { + "epoch": 53.55, + "learning_rate": 4.330643656716418e-05, + "loss": 0.0, + "step": 57404 + }, + { + "epoch": 53.55, + "learning_rate": 4.330597014925373e-05, + "loss": 0.0, + "step": 57408 + }, + { + "epoch": 53.56, + "learning_rate": 4.3305503731343285e-05, + "loss": 0.0021, + "step": 57412 + }, + { + "epoch": 53.56, + "learning_rate": 4.330503731343284e-05, + "loss": 0.0, + "step": 57416 + }, + { + "epoch": 53.56, + "learning_rate": 4.330457089552239e-05, + "loss": 0.0008, + "step": 57420 + }, + { + "epoch": 53.57, + "learning_rate": 4.3304104477611937e-05, + "loss": 0.0001, + "step": 57424 + }, + { + "epoch": 53.57, + "learning_rate": 4.33036380597015e-05, + "loss": 0.0, + "step": 57428 + }, + { + "epoch": 53.57, + "learning_rate": 4.3303171641791046e-05, + "loss": 0.0, + "step": 57432 + }, + { + "epoch": 53.58, + "learning_rate": 4.3302705223880595e-05, + "loss": 0.0005, + "step": 57436 + }, + { + "epoch": 53.58, + "learning_rate": 4.330223880597015e-05, + "loss": 0.0, + "step": 57440 + }, + { + "epoch": 53.59, + "learning_rate": 4.3301772388059704e-05, + "loss": 0.0, + "step": 57444 + }, + { + "epoch": 53.59, + "learning_rate": 4.330130597014926e-05, + "loss": 0.0, + "step": 57448 + }, + { + "epoch": 53.59, + "learning_rate": 4.330083955223881e-05, + "loss": 0.0, + "step": 57452 + }, + { + "epoch": 53.6, + "learning_rate": 4.330037313432836e-05, + "loss": 0.0, + "step": 57456 + }, + { + "epoch": 53.6, + "learning_rate": 4.329990671641792e-05, + "loss": 0.0001, + "step": 57460 + }, + { + "epoch": 53.6, + "learning_rate": 4.3299440298507465e-05, + "loss": 0.0, + "step": 57464 + }, + { + "epoch": 53.61, + "learning_rate": 4.3298973880597013e-05, + "loss": 0.0014, + "step": 57468 + }, + { + "epoch": 53.61, + "learning_rate": 4.329850746268657e-05, + "loss": 0.0, + "step": 57472 + }, + { + "epoch": 53.62, + "learning_rate": 4.329804104477612e-05, + "loss": 0.0, + "step": 57476 + }, + { + "epoch": 53.62, + "learning_rate": 4.329757462686567e-05, + "loss": 0.0076, + "step": 57480 + }, + { + "epoch": 53.62, + "learning_rate": 4.3297108208955226e-05, + "loss": 0.0, + "step": 57484 + }, + { + "epoch": 53.63, + "learning_rate": 4.329664179104478e-05, + "loss": 0.0, + "step": 57488 + }, + { + "epoch": 53.63, + "learning_rate": 4.329617537313433e-05, + "loss": 0.0, + "step": 57492 + }, + { + "epoch": 53.63, + "learning_rate": 4.3295708955223884e-05, + "loss": 0.0, + "step": 57496 + }, + { + "epoch": 53.64, + "learning_rate": 4.329524253731343e-05, + "loss": 0.001, + "step": 57500 + }, + { + "epoch": 53.64, + "eval_exact_match": 0.7446808510638298, + "eval_exec": 0.7823984526112185, + "eval_loss": 0.4641968011856079, + "eval_runtime": 1155.653, + "eval_samples_per_second": 0.895, + "step": 57500 + }, + { + "epoch": 53.64, + "learning_rate": 4.329477611940299e-05, + "loss": 0.0, + "step": 57504 + }, + { + "epoch": 53.65, + "learning_rate": 4.329430970149254e-05, + "loss": 0.0, + "step": 57508 + }, + { + "epoch": 53.65, + "learning_rate": 4.329384328358209e-05, + "loss": 0.0005, + "step": 57512 + }, + { + "epoch": 53.65, + "learning_rate": 4.3293376865671645e-05, + "loss": 0.0, + "step": 57516 + }, + { + "epoch": 53.66, + "learning_rate": 4.32929104477612e-05, + "loss": 0.0051, + "step": 57520 + }, + { + "epoch": 53.66, + "learning_rate": 4.329244402985075e-05, + "loss": 0.0001, + "step": 57524 + }, + { + "epoch": 53.66, + "learning_rate": 4.3291977611940296e-05, + "loss": 0.0, + "step": 57528 + }, + { + "epoch": 53.67, + "learning_rate": 4.329151119402985e-05, + "loss": 0.0, + "step": 57532 + }, + { + "epoch": 53.67, + "learning_rate": 4.3291044776119406e-05, + "loss": 0.0, + "step": 57536 + }, + { + "epoch": 53.68, + "learning_rate": 4.3290578358208954e-05, + "loss": 0.0, + "step": 57540 + }, + { + "epoch": 53.68, + "learning_rate": 4.329011194029851e-05, + "loss": 0.0, + "step": 57544 + }, + { + "epoch": 53.68, + "learning_rate": 4.3289645522388064e-05, + "loss": 0.0, + "step": 57548 + }, + { + "epoch": 53.69, + "learning_rate": 4.328917910447761e-05, + "loss": 0.0008, + "step": 57552 + }, + { + "epoch": 53.69, + "learning_rate": 4.328871268656717e-05, + "loss": 0.0002, + "step": 57556 + }, + { + "epoch": 53.69, + "learning_rate": 4.3288246268656715e-05, + "loss": 0.0001, + "step": 57560 + }, + { + "epoch": 53.7, + "learning_rate": 4.328777985074627e-05, + "loss": 0.0, + "step": 57564 + }, + { + "epoch": 53.7, + "learning_rate": 4.3287313432835825e-05, + "loss": 0.0, + "step": 57568 + }, + { + "epoch": 53.71, + "learning_rate": 4.328684701492537e-05, + "loss": 0.0, + "step": 57572 + }, + { + "epoch": 53.71, + "learning_rate": 4.328638059701493e-05, + "loss": 0.0, + "step": 57576 + }, + { + "epoch": 53.71, + "learning_rate": 4.328591417910448e-05, + "loss": 0.0015, + "step": 57580 + }, + { + "epoch": 53.72, + "learning_rate": 4.328544776119403e-05, + "loss": 0.0, + "step": 57584 + }, + { + "epoch": 53.72, + "learning_rate": 4.328498134328358e-05, + "loss": 0.0013, + "step": 57588 + }, + { + "epoch": 53.72, + "learning_rate": 4.3284514925373134e-05, + "loss": 0.0, + "step": 57592 + }, + { + "epoch": 53.73, + "learning_rate": 4.328404850746269e-05, + "loss": 0.0049, + "step": 57596 + }, + { + "epoch": 53.73, + "learning_rate": 4.328358208955224e-05, + "loss": 0.0, + "step": 57600 + }, + { + "epoch": 53.73, + "learning_rate": 4.328311567164179e-05, + "loss": 0.0, + "step": 57604 + }, + { + "epoch": 53.74, + "learning_rate": 4.328264925373135e-05, + "loss": 0.0, + "step": 57608 + }, + { + "epoch": 53.74, + "learning_rate": 4.32821828358209e-05, + "loss": 0.0, + "step": 57612 + }, + { + "epoch": 53.75, + "learning_rate": 4.328171641791045e-05, + "loss": 0.0001, + "step": 57616 + }, + { + "epoch": 53.75, + "learning_rate": 4.328125e-05, + "loss": 0.0006, + "step": 57620 + }, + { + "epoch": 53.75, + "learning_rate": 4.328078358208956e-05, + "loss": 0.0, + "step": 57624 + }, + { + "epoch": 53.76, + "learning_rate": 4.328031716417911e-05, + "loss": 0.0, + "step": 57628 + }, + { + "epoch": 53.76, + "learning_rate": 4.3279850746268656e-05, + "loss": 0.0004, + "step": 57632 + }, + { + "epoch": 53.76, + "learning_rate": 4.327938432835821e-05, + "loss": 0.0, + "step": 57636 + }, + { + "epoch": 53.77, + "learning_rate": 4.3278917910447766e-05, + "loss": 0.0, + "step": 57640 + }, + { + "epoch": 53.77, + "learning_rate": 4.3278451492537314e-05, + "loss": 0.0, + "step": 57644 + }, + { + "epoch": 53.78, + "learning_rate": 4.327798507462687e-05, + "loss": 0.0, + "step": 57648 + }, + { + "epoch": 53.78, + "learning_rate": 4.327751865671642e-05, + "loss": 0.0, + "step": 57652 + }, + { + "epoch": 53.78, + "learning_rate": 4.327705223880597e-05, + "loss": 0.0, + "step": 57656 + }, + { + "epoch": 53.79, + "learning_rate": 4.327658582089553e-05, + "loss": 0.0, + "step": 57660 + }, + { + "epoch": 53.79, + "learning_rate": 4.3276119402985075e-05, + "loss": 0.0001, + "step": 57664 + }, + { + "epoch": 53.79, + "learning_rate": 4.327565298507463e-05, + "loss": 0.0001, + "step": 57668 + }, + { + "epoch": 53.8, + "learning_rate": 4.3275186567164185e-05, + "loss": 0.0001, + "step": 57672 + }, + { + "epoch": 53.8, + "learning_rate": 4.327472014925373e-05, + "loss": 0.0, + "step": 57676 + }, + { + "epoch": 53.81, + "learning_rate": 4.327425373134328e-05, + "loss": 0.0, + "step": 57680 + }, + { + "epoch": 53.81, + "learning_rate": 4.327378731343284e-05, + "loss": 0.0, + "step": 57684 + }, + { + "epoch": 53.81, + "learning_rate": 4.327332089552239e-05, + "loss": 0.0, + "step": 57688 + }, + { + "epoch": 53.82, + "learning_rate": 4.327285447761194e-05, + "loss": 0.0, + "step": 57692 + }, + { + "epoch": 53.82, + "learning_rate": 4.3272388059701494e-05, + "loss": 0.0002, + "step": 57696 + }, + { + "epoch": 53.82, + "learning_rate": 4.327192164179105e-05, + "loss": 0.0, + "step": 57700 + }, + { + "epoch": 53.83, + "learning_rate": 4.32714552238806e-05, + "loss": 0.0, + "step": 57704 + }, + { + "epoch": 53.83, + "learning_rate": 4.327098880597015e-05, + "loss": 0.0, + "step": 57708 + }, + { + "epoch": 53.84, + "learning_rate": 4.32705223880597e-05, + "loss": 0.0, + "step": 57712 + }, + { + "epoch": 53.84, + "learning_rate": 4.3270055970149255e-05, + "loss": 0.0, + "step": 57716 + }, + { + "epoch": 53.84, + "learning_rate": 4.326958955223881e-05, + "loss": 0.0, + "step": 57720 + }, + { + "epoch": 53.85, + "learning_rate": 4.326912313432836e-05, + "loss": 0.0, + "step": 57724 + }, + { + "epoch": 53.85, + "learning_rate": 4.326865671641791e-05, + "loss": 0.0001, + "step": 57728 + }, + { + "epoch": 53.85, + "learning_rate": 4.326819029850747e-05, + "loss": 0.003, + "step": 57732 + }, + { + "epoch": 53.86, + "learning_rate": 4.3267723880597016e-05, + "loss": 0.0, + "step": 57736 + }, + { + "epoch": 53.86, + "learning_rate": 4.3267257462686564e-05, + "loss": 0.0003, + "step": 57740 + }, + { + "epoch": 53.87, + "learning_rate": 4.3266791044776126e-05, + "loss": 0.0, + "step": 57744 + }, + { + "epoch": 53.87, + "learning_rate": 4.3266324626865674e-05, + "loss": 0.0, + "step": 57748 + }, + { + "epoch": 53.87, + "learning_rate": 4.326585820895522e-05, + "loss": 0.0001, + "step": 57752 + }, + { + "epoch": 53.88, + "learning_rate": 4.326539179104478e-05, + "loss": 0.0, + "step": 57756 + }, + { + "epoch": 53.88, + "learning_rate": 4.326492537313433e-05, + "loss": 0.0001, + "step": 57760 + }, + { + "epoch": 53.88, + "learning_rate": 4.326445895522388e-05, + "loss": 0.0002, + "step": 57764 + }, + { + "epoch": 53.89, + "learning_rate": 4.3263992537313435e-05, + "loss": 0.0003, + "step": 57768 + }, + { + "epoch": 53.89, + "learning_rate": 4.326352611940298e-05, + "loss": 0.0, + "step": 57772 + }, + { + "epoch": 53.9, + "learning_rate": 4.3263059701492545e-05, + "loss": 0.0, + "step": 57776 + }, + { + "epoch": 53.9, + "learning_rate": 4.326259328358209e-05, + "loss": 0.0, + "step": 57780 + }, + { + "epoch": 53.9, + "learning_rate": 4.326212686567164e-05, + "loss": 0.0001, + "step": 57784 + }, + { + "epoch": 53.91, + "learning_rate": 4.3261660447761196e-05, + "loss": 0.0, + "step": 57788 + }, + { + "epoch": 53.91, + "learning_rate": 4.326119402985075e-05, + "loss": 0.0, + "step": 57792 + }, + { + "epoch": 53.91, + "learning_rate": 4.32607276119403e-05, + "loss": 0.0, + "step": 57796 + }, + { + "epoch": 53.92, + "learning_rate": 4.3260261194029854e-05, + "loss": 0.0, + "step": 57800 + }, + { + "epoch": 53.92, + "learning_rate": 4.325979477611941e-05, + "loss": 0.0, + "step": 57804 + }, + { + "epoch": 53.93, + "learning_rate": 4.325932835820896e-05, + "loss": 0.0, + "step": 57808 + }, + { + "epoch": 53.93, + "learning_rate": 4.325886194029851e-05, + "loss": 0.0, + "step": 57812 + }, + { + "epoch": 53.93, + "learning_rate": 4.325839552238806e-05, + "loss": 0.0, + "step": 57816 + }, + { + "epoch": 53.94, + "learning_rate": 4.3257929104477615e-05, + "loss": 0.0, + "step": 57820 + }, + { + "epoch": 53.94, + "learning_rate": 4.325746268656717e-05, + "loss": 0.0, + "step": 57824 + }, + { + "epoch": 53.94, + "learning_rate": 4.325699626865672e-05, + "loss": 0.0001, + "step": 57828 + }, + { + "epoch": 53.95, + "learning_rate": 4.3256529850746266e-05, + "loss": 0.0, + "step": 57832 + }, + { + "epoch": 53.95, + "learning_rate": 4.325606343283583e-05, + "loss": 0.0, + "step": 57836 + }, + { + "epoch": 53.96, + "learning_rate": 4.3255597014925376e-05, + "loss": 0.0, + "step": 57840 + }, + { + "epoch": 53.96, + "learning_rate": 4.3255130597014924e-05, + "loss": 0.0, + "step": 57844 + }, + { + "epoch": 53.96, + "learning_rate": 4.325466417910448e-05, + "loss": 0.0, + "step": 57848 + }, + { + "epoch": 53.97, + "learning_rate": 4.3254197761194034e-05, + "loss": 0.002, + "step": 57852 + }, + { + "epoch": 53.97, + "learning_rate": 4.325373134328358e-05, + "loss": 0.0, + "step": 57856 + }, + { + "epoch": 53.97, + "learning_rate": 4.325326492537314e-05, + "loss": 0.0, + "step": 57860 + }, + { + "epoch": 53.98, + "learning_rate": 4.325279850746269e-05, + "loss": 0.0, + "step": 57864 + }, + { + "epoch": 53.98, + "learning_rate": 4.325233208955224e-05, + "loss": 0.0, + "step": 57868 + }, + { + "epoch": 53.98, + "learning_rate": 4.3251865671641795e-05, + "loss": 0.0003, + "step": 57872 + }, + { + "epoch": 53.99, + "learning_rate": 4.325139925373134e-05, + "loss": 0.0004, + "step": 57876 + }, + { + "epoch": 53.99, + "learning_rate": 4.32509328358209e-05, + "loss": 0.0002, + "step": 57880 + }, + { + "epoch": 54.0, + "learning_rate": 4.325046641791045e-05, + "loss": 0.0, + "step": 57884 + }, + { + "epoch": 54.0, + "learning_rate": 4.325e-05, + "loss": 0.0001, + "step": 57888 + }, + { + "epoch": 54.0, + "learning_rate": 4.324953358208955e-05, + "loss": 0.0, + "step": 57892 + }, + { + "epoch": 54.01, + "learning_rate": 4.324906716417911e-05, + "loss": 0.0, + "step": 57896 + }, + { + "epoch": 54.01, + "learning_rate": 4.324860074626866e-05, + "loss": 0.0, + "step": 57900 + }, + { + "epoch": 54.01, + "learning_rate": 4.324813432835821e-05, + "loss": 0.0, + "step": 57904 + }, + { + "epoch": 54.02, + "learning_rate": 4.324766791044776e-05, + "loss": 0.0, + "step": 57908 + }, + { + "epoch": 54.02, + "learning_rate": 4.324720149253732e-05, + "loss": 0.0, + "step": 57912 + }, + { + "epoch": 54.03, + "learning_rate": 4.3246735074626865e-05, + "loss": 0.0001, + "step": 57916 + }, + { + "epoch": 54.03, + "learning_rate": 4.324626865671642e-05, + "loss": 0.0, + "step": 57920 + }, + { + "epoch": 54.03, + "learning_rate": 4.324580223880597e-05, + "loss": 0.0004, + "step": 57924 + }, + { + "epoch": 54.04, + "learning_rate": 4.324533582089552e-05, + "loss": 0.0, + "step": 57928 + }, + { + "epoch": 54.04, + "learning_rate": 4.324486940298508e-05, + "loss": 0.0, + "step": 57932 + }, + { + "epoch": 54.04, + "learning_rate": 4.3244402985074626e-05, + "loss": 0.0, + "step": 57936 + }, + { + "epoch": 54.05, + "learning_rate": 4.324393656716418e-05, + "loss": 0.0, + "step": 57940 + }, + { + "epoch": 54.05, + "learning_rate": 4.3243470149253736e-05, + "loss": 0.0, + "step": 57944 + }, + { + "epoch": 54.06, + "learning_rate": 4.3243003731343284e-05, + "loss": 0.0001, + "step": 57948 + }, + { + "epoch": 54.06, + "learning_rate": 4.324253731343284e-05, + "loss": 0.0013, + "step": 57952 + }, + { + "epoch": 54.06, + "learning_rate": 4.3242070895522394e-05, + "loss": 0.0, + "step": 57956 + }, + { + "epoch": 54.07, + "learning_rate": 4.324160447761194e-05, + "loss": 0.0001, + "step": 57960 + }, + { + "epoch": 54.07, + "learning_rate": 4.32411380597015e-05, + "loss": 0.0001, + "step": 57964 + }, + { + "epoch": 54.07, + "learning_rate": 4.3240671641791045e-05, + "loss": 0.0, + "step": 57968 + }, + { + "epoch": 54.08, + "learning_rate": 4.32402052238806e-05, + "loss": 0.0, + "step": 57972 + }, + { + "epoch": 54.08, + "learning_rate": 4.3239738805970155e-05, + "loss": 0.0, + "step": 57976 + }, + { + "epoch": 54.09, + "learning_rate": 4.32392723880597e-05, + "loss": 0.0001, + "step": 57980 + }, + { + "epoch": 54.09, + "learning_rate": 4.323880597014925e-05, + "loss": 0.0001, + "step": 57984 + }, + { + "epoch": 54.09, + "learning_rate": 4.323833955223881e-05, + "loss": 0.0, + "step": 57988 + }, + { + "epoch": 54.1, + "learning_rate": 4.323787313432836e-05, + "loss": 0.0, + "step": 57992 + }, + { + "epoch": 54.1, + "learning_rate": 4.323740671641791e-05, + "loss": 0.0002, + "step": 57996 + }, + { + "epoch": 54.1, + "learning_rate": 4.3236940298507464e-05, + "loss": 0.0001, + "step": 58000 + }, + { + "epoch": 54.1, + "eval_exact_match": 0.7427466150870407, + "eval_exec": 0.7785299806576402, + "eval_loss": 0.49178797006607056, + "eval_runtime": 1102.7711, + "eval_samples_per_second": 0.938, + "step": 58000 + }, + { + "epoch": 54.11, + "learning_rate": 4.323647388059702e-05, + "loss": 0.0, + "step": 58004 + }, + { + "epoch": 54.11, + "learning_rate": 4.323600746268657e-05, + "loss": 0.0001, + "step": 58008 + }, + { + "epoch": 54.12, + "learning_rate": 4.323554104477612e-05, + "loss": 0.0, + "step": 58012 + }, + { + "epoch": 54.12, + "learning_rate": 4.323507462686568e-05, + "loss": 0.0008, + "step": 58016 + }, + { + "epoch": 54.12, + "learning_rate": 4.3234608208955225e-05, + "loss": 0.0, + "step": 58020 + }, + { + "epoch": 54.13, + "learning_rate": 4.323414179104478e-05, + "loss": 0.0, + "step": 58024 + }, + { + "epoch": 54.13, + "learning_rate": 4.323367537313433e-05, + "loss": 0.0, + "step": 58028 + }, + { + "epoch": 54.13, + "learning_rate": 4.323320895522388e-05, + "loss": 0.0, + "step": 58032 + }, + { + "epoch": 54.14, + "learning_rate": 4.323274253731344e-05, + "loss": 0.0, + "step": 58036 + }, + { + "epoch": 54.14, + "learning_rate": 4.3232276119402986e-05, + "loss": 0.0, + "step": 58040 + }, + { + "epoch": 54.15, + "learning_rate": 4.3231809701492534e-05, + "loss": 0.0, + "step": 58044 + }, + { + "epoch": 54.15, + "learning_rate": 4.3231343283582096e-05, + "loss": 0.0005, + "step": 58048 + }, + { + "epoch": 54.15, + "learning_rate": 4.3230876865671644e-05, + "loss": 0.0, + "step": 58052 + }, + { + "epoch": 54.16, + "learning_rate": 4.323041044776119e-05, + "loss": 0.0002, + "step": 58056 + }, + { + "epoch": 54.16, + "learning_rate": 4.322994402985075e-05, + "loss": 0.0, + "step": 58060 + }, + { + "epoch": 54.16, + "learning_rate": 4.32294776119403e-05, + "loss": 0.0003, + "step": 58064 + }, + { + "epoch": 54.17, + "learning_rate": 4.322901119402985e-05, + "loss": 0.0, + "step": 58068 + }, + { + "epoch": 54.17, + "learning_rate": 4.3228544776119405e-05, + "loss": 0.0, + "step": 58072 + }, + { + "epoch": 54.18, + "learning_rate": 4.322807835820896e-05, + "loss": 0.0, + "step": 58076 + }, + { + "epoch": 54.18, + "learning_rate": 4.322761194029851e-05, + "loss": 0.0043, + "step": 58080 + }, + { + "epoch": 54.18, + "learning_rate": 4.322714552238806e-05, + "loss": 0.0, + "step": 58084 + }, + { + "epoch": 54.19, + "learning_rate": 4.322667910447761e-05, + "loss": 0.0, + "step": 58088 + }, + { + "epoch": 54.19, + "learning_rate": 4.3226212686567166e-05, + "loss": 0.0, + "step": 58092 + }, + { + "epoch": 54.19, + "learning_rate": 4.322574626865672e-05, + "loss": 0.0, + "step": 58096 + }, + { + "epoch": 54.2, + "learning_rate": 4.322527985074627e-05, + "loss": 0.0, + "step": 58100 + }, + { + "epoch": 54.2, + "learning_rate": 4.3224813432835824e-05, + "loss": 0.0, + "step": 58104 + }, + { + "epoch": 54.21, + "learning_rate": 4.322434701492538e-05, + "loss": 0.0, + "step": 58108 + }, + { + "epoch": 54.21, + "learning_rate": 4.322388059701493e-05, + "loss": 0.0, + "step": 58112 + }, + { + "epoch": 54.21, + "learning_rate": 4.322341417910448e-05, + "loss": 0.0003, + "step": 58116 + }, + { + "epoch": 54.22, + "learning_rate": 4.322294776119403e-05, + "loss": 0.0082, + "step": 58120 + }, + { + "epoch": 54.22, + "learning_rate": 4.3222481343283585e-05, + "loss": 0.0, + "step": 58124 + }, + { + "epoch": 54.22, + "learning_rate": 4.322201492537314e-05, + "loss": 0.0003, + "step": 58128 + }, + { + "epoch": 54.23, + "learning_rate": 4.322154850746269e-05, + "loss": 0.0063, + "step": 58132 + }, + { + "epoch": 54.23, + "learning_rate": 4.322108208955224e-05, + "loss": 0.0, + "step": 58136 + }, + { + "epoch": 54.24, + "learning_rate": 4.32206156716418e-05, + "loss": 0.0001, + "step": 58140 + }, + { + "epoch": 54.24, + "learning_rate": 4.3220149253731346e-05, + "loss": 0.0002, + "step": 58144 + }, + { + "epoch": 54.24, + "learning_rate": 4.3219682835820894e-05, + "loss": 0.0, + "step": 58148 + }, + { + "epoch": 54.25, + "learning_rate": 4.321921641791045e-05, + "loss": 0.0004, + "step": 58152 + }, + { + "epoch": 54.25, + "learning_rate": 4.3218750000000004e-05, + "loss": 0.0001, + "step": 58156 + }, + { + "epoch": 54.25, + "learning_rate": 4.321828358208955e-05, + "loss": 0.0, + "step": 58160 + }, + { + "epoch": 54.26, + "learning_rate": 4.321781716417911e-05, + "loss": 0.0, + "step": 58164 + }, + { + "epoch": 54.26, + "learning_rate": 4.321735074626866e-05, + "loss": 0.0, + "step": 58168 + }, + { + "epoch": 54.26, + "learning_rate": 4.321688432835821e-05, + "loss": 0.0, + "step": 58172 + }, + { + "epoch": 54.27, + "learning_rate": 4.3216417910447765e-05, + "loss": 0.0, + "step": 58176 + }, + { + "epoch": 54.27, + "learning_rate": 4.321595149253731e-05, + "loss": 0.0, + "step": 58180 + }, + { + "epoch": 54.28, + "learning_rate": 4.321548507462687e-05, + "loss": 0.0, + "step": 58184 + }, + { + "epoch": 54.28, + "learning_rate": 4.321501865671642e-05, + "loss": 0.0001, + "step": 58188 + }, + { + "epoch": 54.28, + "learning_rate": 4.321455223880597e-05, + "loss": 0.0, + "step": 58192 + }, + { + "epoch": 54.29, + "learning_rate": 4.3214085820895526e-05, + "loss": 0.001, + "step": 58196 + }, + { + "epoch": 54.29, + "learning_rate": 4.321361940298508e-05, + "loss": 0.0019, + "step": 58200 + }, + { + "epoch": 54.29, + "learning_rate": 4.321315298507463e-05, + "loss": 0.0, + "step": 58204 + }, + { + "epoch": 54.3, + "learning_rate": 4.321268656716418e-05, + "loss": 0.0, + "step": 58208 + }, + { + "epoch": 54.3, + "learning_rate": 4.321222014925373e-05, + "loss": 0.0, + "step": 58212 + }, + { + "epoch": 54.31, + "learning_rate": 4.3211753731343287e-05, + "loss": 0.0026, + "step": 58216 + }, + { + "epoch": 54.31, + "learning_rate": 4.3211287313432835e-05, + "loss": 0.0001, + "step": 58220 + }, + { + "epoch": 54.31, + "learning_rate": 4.321082089552239e-05, + "loss": 0.0003, + "step": 58224 + }, + { + "epoch": 54.32, + "learning_rate": 4.3210354477611944e-05, + "loss": 0.0, + "step": 58228 + }, + { + "epoch": 54.32, + "learning_rate": 4.320988805970149e-05, + "loss": 0.0001, + "step": 58232 + }, + { + "epoch": 54.32, + "learning_rate": 4.320942164179105e-05, + "loss": 0.0054, + "step": 58236 + }, + { + "epoch": 54.33, + "learning_rate": 4.3208955223880596e-05, + "loss": 0.0, + "step": 58240 + }, + { + "epoch": 54.33, + "learning_rate": 4.320848880597015e-05, + "loss": 0.0, + "step": 58244 + }, + { + "epoch": 54.34, + "learning_rate": 4.3208022388059705e-05, + "loss": 0.0, + "step": 58248 + }, + { + "epoch": 54.34, + "learning_rate": 4.3207555970149254e-05, + "loss": 0.0005, + "step": 58252 + }, + { + "epoch": 54.34, + "learning_rate": 4.320708955223881e-05, + "loss": 0.0003, + "step": 58256 + }, + { + "epoch": 54.35, + "learning_rate": 4.3206623134328363e-05, + "loss": 0.0, + "step": 58260 + }, + { + "epoch": 54.35, + "learning_rate": 4.320615671641791e-05, + "loss": 0.0, + "step": 58264 + }, + { + "epoch": 54.35, + "learning_rate": 4.3205690298507466e-05, + "loss": 0.0, + "step": 58268 + }, + { + "epoch": 54.36, + "learning_rate": 4.3205223880597015e-05, + "loss": 0.0, + "step": 58272 + }, + { + "epoch": 54.36, + "learning_rate": 4.320475746268657e-05, + "loss": 0.0, + "step": 58276 + }, + { + "epoch": 54.37, + "learning_rate": 4.3204291044776124e-05, + "loss": 0.0, + "step": 58280 + }, + { + "epoch": 54.37, + "learning_rate": 4.320382462686567e-05, + "loss": 0.0, + "step": 58284 + }, + { + "epoch": 54.37, + "learning_rate": 4.320335820895523e-05, + "loss": 0.0, + "step": 58288 + }, + { + "epoch": 54.38, + "learning_rate": 4.320289179104478e-05, + "loss": 0.0004, + "step": 58292 + }, + { + "epoch": 54.38, + "learning_rate": 4.320242537313433e-05, + "loss": 0.0, + "step": 58296 + }, + { + "epoch": 54.38, + "learning_rate": 4.320195895522388e-05, + "loss": 0.0, + "step": 58300 + }, + { + "epoch": 54.39, + "learning_rate": 4.320149253731344e-05, + "loss": 0.011, + "step": 58304 + }, + { + "epoch": 54.39, + "learning_rate": 4.320102611940299e-05, + "loss": 0.0052, + "step": 58308 + }, + { + "epoch": 54.4, + "learning_rate": 4.3200559701492537e-05, + "loss": 0.0001, + "step": 58312 + }, + { + "epoch": 54.4, + "learning_rate": 4.320009328358209e-05, + "loss": 0.0, + "step": 58316 + }, + { + "epoch": 54.4, + "learning_rate": 4.3199626865671646e-05, + "loss": 0.0, + "step": 58320 + }, + { + "epoch": 54.41, + "learning_rate": 4.3199160447761195e-05, + "loss": 0.0001, + "step": 58324 + }, + { + "epoch": 54.41, + "learning_rate": 4.319869402985075e-05, + "loss": 0.0, + "step": 58328 + }, + { + "epoch": 54.41, + "learning_rate": 4.31982276119403e-05, + "loss": 0.0003, + "step": 58332 + }, + { + "epoch": 54.42, + "learning_rate": 4.319776119402985e-05, + "loss": 0.0001, + "step": 58336 + }, + { + "epoch": 54.42, + "learning_rate": 4.319729477611941e-05, + "loss": 0.001, + "step": 58340 + }, + { + "epoch": 54.43, + "learning_rate": 4.3196828358208955e-05, + "loss": 0.0021, + "step": 58344 + }, + { + "epoch": 54.43, + "learning_rate": 4.319636194029851e-05, + "loss": 0.0003, + "step": 58348 + }, + { + "epoch": 54.43, + "learning_rate": 4.3195895522388065e-05, + "loss": 0.0, + "step": 58352 + }, + { + "epoch": 54.44, + "learning_rate": 4.3195429104477613e-05, + "loss": 0.0001, + "step": 58356 + }, + { + "epoch": 54.44, + "learning_rate": 4.319496268656716e-05, + "loss": 0.0001, + "step": 58360 + }, + { + "epoch": 54.44, + "learning_rate": 4.319449626865672e-05, + "loss": 0.0, + "step": 58364 + }, + { + "epoch": 54.45, + "learning_rate": 4.319402985074627e-05, + "loss": 0.0, + "step": 58368 + }, + { + "epoch": 54.45, + "learning_rate": 4.319356343283582e-05, + "loss": 0.0, + "step": 58372 + }, + { + "epoch": 54.46, + "learning_rate": 4.3193097014925374e-05, + "loss": 0.0016, + "step": 58376 + }, + { + "epoch": 54.46, + "learning_rate": 4.319263059701493e-05, + "loss": 0.0, + "step": 58380 + }, + { + "epoch": 54.46, + "learning_rate": 4.319216417910448e-05, + "loss": 0.0001, + "step": 58384 + }, + { + "epoch": 54.47, + "learning_rate": 4.319169776119403e-05, + "loss": 0.0, + "step": 58388 + }, + { + "epoch": 54.47, + "learning_rate": 4.319123134328358e-05, + "loss": 0.0, + "step": 58392 + }, + { + "epoch": 54.47, + "learning_rate": 4.3190764925373135e-05, + "loss": 0.0, + "step": 58396 + }, + { + "epoch": 54.48, + "learning_rate": 4.319029850746269e-05, + "loss": 0.0001, + "step": 58400 + }, + { + "epoch": 54.48, + "learning_rate": 4.318983208955224e-05, + "loss": 0.0, + "step": 58404 + }, + { + "epoch": 54.49, + "learning_rate": 4.318936567164179e-05, + "loss": 0.0001, + "step": 58408 + }, + { + "epoch": 54.49, + "learning_rate": 4.318889925373135e-05, + "loss": 0.0004, + "step": 58412 + }, + { + "epoch": 54.49, + "learning_rate": 4.3188432835820896e-05, + "loss": 0.0004, + "step": 58416 + }, + { + "epoch": 54.5, + "learning_rate": 4.3187966417910445e-05, + "loss": 0.0, + "step": 58420 + }, + { + "epoch": 54.5, + "learning_rate": 4.3187500000000006e-05, + "loss": 0.0001, + "step": 58424 + }, + { + "epoch": 54.5, + "learning_rate": 4.3187033582089554e-05, + "loss": 0.0, + "step": 58428 + }, + { + "epoch": 54.51, + "learning_rate": 4.318656716417911e-05, + "loss": 0.0004, + "step": 58432 + }, + { + "epoch": 54.51, + "learning_rate": 4.318610074626866e-05, + "loss": 0.0001, + "step": 58436 + }, + { + "epoch": 54.51, + "learning_rate": 4.318563432835821e-05, + "loss": 0.0, + "step": 58440 + }, + { + "epoch": 54.52, + "learning_rate": 4.318516791044777e-05, + "loss": 0.0, + "step": 58444 + }, + { + "epoch": 54.52, + "learning_rate": 4.3184701492537315e-05, + "loss": 0.0008, + "step": 58448 + }, + { + "epoch": 54.53, + "learning_rate": 4.3184235074626863e-05, + "loss": 0.0001, + "step": 58452 + }, + { + "epoch": 54.53, + "learning_rate": 4.3183768656716425e-05, + "loss": 0.0, + "step": 58456 + }, + { + "epoch": 54.53, + "learning_rate": 4.318330223880597e-05, + "loss": 0.0, + "step": 58460 + }, + { + "epoch": 54.54, + "learning_rate": 4.318283582089552e-05, + "loss": 0.0017, + "step": 58464 + }, + { + "epoch": 54.54, + "learning_rate": 4.3182369402985076e-05, + "loss": 0.0006, + "step": 58468 + }, + { + "epoch": 54.54, + "learning_rate": 4.318190298507463e-05, + "loss": 0.0001, + "step": 58472 + }, + { + "epoch": 54.55, + "learning_rate": 4.318143656716418e-05, + "loss": 0.0, + "step": 58476 + }, + { + "epoch": 54.55, + "learning_rate": 4.3180970149253734e-05, + "loss": 0.0, + "step": 58480 + }, + { + "epoch": 54.56, + "learning_rate": 4.318050373134329e-05, + "loss": 0.0, + "step": 58484 + }, + { + "epoch": 54.56, + "learning_rate": 4.318003731343284e-05, + "loss": 0.0029, + "step": 58488 + }, + { + "epoch": 54.56, + "learning_rate": 4.317957089552239e-05, + "loss": 0.0, + "step": 58492 + }, + { + "epoch": 54.57, + "learning_rate": 4.317910447761194e-05, + "loss": 0.0005, + "step": 58496 + }, + { + "epoch": 54.57, + "learning_rate": 4.3178638059701495e-05, + "loss": 0.0001, + "step": 58500 + }, + { + "epoch": 54.57, + "eval_exact_match": 0.7350096711798839, + "eval_exec": 0.7717601547388782, + "eval_loss": 0.4618161916732788, + "eval_runtime": 1164.5608, + "eval_samples_per_second": 0.888, + "step": 58500 + }, + { + "epoch": 54.57, + "learning_rate": 4.317817164179105e-05, + "loss": 0.0, + "step": 58504 + }, + { + "epoch": 54.58, + "learning_rate": 4.31777052238806e-05, + "loss": 0.0, + "step": 58508 + }, + { + "epoch": 54.58, + "learning_rate": 4.3177238805970146e-05, + "loss": 0.0, + "step": 58512 + }, + { + "epoch": 54.59, + "learning_rate": 4.317677238805971e-05, + "loss": 0.0, + "step": 58516 + }, + { + "epoch": 54.59, + "learning_rate": 4.3176305970149256e-05, + "loss": 0.0, + "step": 58520 + }, + { + "epoch": 54.59, + "learning_rate": 4.3175839552238804e-05, + "loss": 0.0, + "step": 58524 + }, + { + "epoch": 54.6, + "learning_rate": 4.317537313432836e-05, + "loss": 0.0, + "step": 58528 + }, + { + "epoch": 54.6, + "learning_rate": 4.3174906716417914e-05, + "loss": 0.0001, + "step": 58532 + }, + { + "epoch": 54.6, + "learning_rate": 4.317444029850746e-05, + "loss": 0.0, + "step": 58536 + }, + { + "epoch": 54.61, + "learning_rate": 4.317397388059702e-05, + "loss": 0.0, + "step": 58540 + }, + { + "epoch": 54.61, + "learning_rate": 4.317350746268657e-05, + "loss": 0.0, + "step": 58544 + }, + { + "epoch": 54.62, + "learning_rate": 4.317304104477612e-05, + "loss": 0.0, + "step": 58548 + }, + { + "epoch": 54.62, + "learning_rate": 4.3172574626865675e-05, + "loss": 0.0, + "step": 58552 + }, + { + "epoch": 54.62, + "learning_rate": 4.317210820895522e-05, + "loss": 0.0, + "step": 58556 + }, + { + "epoch": 54.63, + "learning_rate": 4.317164179104478e-05, + "loss": 0.0, + "step": 58560 + }, + { + "epoch": 54.63, + "learning_rate": 4.317117537313433e-05, + "loss": 0.0, + "step": 58564 + }, + { + "epoch": 54.63, + "learning_rate": 4.317070895522388e-05, + "loss": 0.0, + "step": 58568 + }, + { + "epoch": 54.64, + "learning_rate": 4.317024253731343e-05, + "loss": 0.004, + "step": 58572 + }, + { + "epoch": 54.64, + "learning_rate": 4.316977611940299e-05, + "loss": 0.0, + "step": 58576 + }, + { + "epoch": 54.65, + "learning_rate": 4.316930970149254e-05, + "loss": 0.0, + "step": 58580 + }, + { + "epoch": 54.65, + "learning_rate": 4.316884328358209e-05, + "loss": 0.0, + "step": 58584 + }, + { + "epoch": 54.65, + "learning_rate": 4.316837686567164e-05, + "loss": 0.0, + "step": 58588 + }, + { + "epoch": 54.66, + "learning_rate": 4.31679104477612e-05, + "loss": 0.0, + "step": 58592 + }, + { + "epoch": 54.66, + "learning_rate": 4.316744402985075e-05, + "loss": 0.0, + "step": 58596 + }, + { + "epoch": 54.66, + "learning_rate": 4.31669776119403e-05, + "loss": 0.0, + "step": 58600 + }, + { + "epoch": 54.67, + "learning_rate": 4.316651119402985e-05, + "loss": 0.0, + "step": 58604 + }, + { + "epoch": 54.67, + "learning_rate": 4.316604477611941e-05, + "loss": 0.0, + "step": 58608 + }, + { + "epoch": 54.68, + "learning_rate": 4.316557835820896e-05, + "loss": 0.0, + "step": 58612 + }, + { + "epoch": 54.68, + "learning_rate": 4.3165111940298506e-05, + "loss": 0.0, + "step": 58616 + }, + { + "epoch": 54.68, + "learning_rate": 4.316464552238806e-05, + "loss": 0.0, + "step": 58620 + }, + { + "epoch": 54.69, + "learning_rate": 4.3164179104477616e-05, + "loss": 0.0, + "step": 58624 + }, + { + "epoch": 54.69, + "learning_rate": 4.3163712686567164e-05, + "loss": 0.0, + "step": 58628 + }, + { + "epoch": 54.69, + "learning_rate": 4.316324626865672e-05, + "loss": 0.0001, + "step": 58632 + }, + { + "epoch": 54.7, + "learning_rate": 4.3162779850746274e-05, + "loss": 0.0, + "step": 58636 + }, + { + "epoch": 54.7, + "learning_rate": 4.316231343283582e-05, + "loss": 0.0, + "step": 58640 + }, + { + "epoch": 54.71, + "learning_rate": 4.316184701492538e-05, + "loss": 0.0, + "step": 58644 + }, + { + "epoch": 54.71, + "learning_rate": 4.3161380597014925e-05, + "loss": 0.0, + "step": 58648 + }, + { + "epoch": 54.71, + "learning_rate": 4.316091417910448e-05, + "loss": 0.0, + "step": 58652 + }, + { + "epoch": 54.72, + "learning_rate": 4.3160447761194035e-05, + "loss": 0.0, + "step": 58656 + }, + { + "epoch": 54.72, + "learning_rate": 4.315998134328358e-05, + "loss": 0.0, + "step": 58660 + }, + { + "epoch": 54.72, + "learning_rate": 4.315951492537313e-05, + "loss": 0.0001, + "step": 58664 + }, + { + "epoch": 54.73, + "learning_rate": 4.315904850746269e-05, + "loss": 0.0, + "step": 58668 + }, + { + "epoch": 54.73, + "learning_rate": 4.315858208955224e-05, + "loss": 0.0, + "step": 58672 + }, + { + "epoch": 54.73, + "learning_rate": 4.315811567164179e-05, + "loss": 0.0, + "step": 58676 + }, + { + "epoch": 54.74, + "learning_rate": 4.3157649253731344e-05, + "loss": 0.0004, + "step": 58680 + }, + { + "epoch": 54.74, + "learning_rate": 4.31571828358209e-05, + "loss": 0.0, + "step": 58684 + }, + { + "epoch": 54.75, + "learning_rate": 4.315671641791045e-05, + "loss": 0.0, + "step": 58688 + }, + { + "epoch": 54.75, + "learning_rate": 4.315625e-05, + "loss": 0.0, + "step": 58692 + }, + { + "epoch": 54.75, + "learning_rate": 4.315578358208956e-05, + "loss": 0.0, + "step": 58696 + }, + { + "epoch": 54.76, + "learning_rate": 4.3155317164179105e-05, + "loss": 0.0023, + "step": 58700 + }, + { + "epoch": 54.76, + "learning_rate": 4.315485074626866e-05, + "loss": 0.0039, + "step": 58704 + }, + { + "epoch": 54.76, + "learning_rate": 4.315438432835821e-05, + "loss": 0.0, + "step": 58708 + }, + { + "epoch": 54.77, + "learning_rate": 4.315391791044776e-05, + "loss": 0.0001, + "step": 58712 + }, + { + "epoch": 54.77, + "learning_rate": 4.315345149253732e-05, + "loss": 0.0, + "step": 58716 + }, + { + "epoch": 54.78, + "learning_rate": 4.3152985074626866e-05, + "loss": 0.0002, + "step": 58720 + }, + { + "epoch": 54.78, + "learning_rate": 4.3152518656716414e-05, + "loss": 0.0, + "step": 58724 + }, + { + "epoch": 54.78, + "learning_rate": 4.3152052238805976e-05, + "loss": 0.0, + "step": 58728 + }, + { + "epoch": 54.79, + "learning_rate": 4.3151585820895524e-05, + "loss": 0.0127, + "step": 58732 + }, + { + "epoch": 54.79, + "learning_rate": 4.315111940298507e-05, + "loss": 0.0, + "step": 58736 + }, + { + "epoch": 54.79, + "learning_rate": 4.315065298507463e-05, + "loss": 0.0006, + "step": 58740 + }, + { + "epoch": 54.8, + "learning_rate": 4.315018656716418e-05, + "loss": 0.0, + "step": 58744 + }, + { + "epoch": 54.8, + "learning_rate": 4.314972014925374e-05, + "loss": 0.0, + "step": 58748 + }, + { + "epoch": 54.81, + "learning_rate": 4.3149253731343285e-05, + "loss": 0.0009, + "step": 58752 + }, + { + "epoch": 54.81, + "learning_rate": 4.314878731343284e-05, + "loss": 0.0003, + "step": 58756 + }, + { + "epoch": 54.81, + "learning_rate": 4.3148320895522395e-05, + "loss": 0.0002, + "step": 58760 + }, + { + "epoch": 54.82, + "learning_rate": 4.314785447761194e-05, + "loss": 0.0001, + "step": 58764 + }, + { + "epoch": 54.82, + "learning_rate": 4.314738805970149e-05, + "loss": 0.0011, + "step": 58768 + }, + { + "epoch": 54.82, + "learning_rate": 4.3146921641791046e-05, + "loss": 0.0, + "step": 58772 + }, + { + "epoch": 54.83, + "learning_rate": 4.31464552238806e-05, + "loss": 0.0, + "step": 58776 + }, + { + "epoch": 54.83, + "learning_rate": 4.314598880597015e-05, + "loss": 0.0, + "step": 58780 + }, + { + "epoch": 54.84, + "learning_rate": 4.3145522388059704e-05, + "loss": 0.0, + "step": 58784 + }, + { + "epoch": 54.84, + "learning_rate": 4.314505597014926e-05, + "loss": 0.0003, + "step": 58788 + }, + { + "epoch": 54.84, + "learning_rate": 4.314458955223881e-05, + "loss": 0.0003, + "step": 58792 + }, + { + "epoch": 54.85, + "learning_rate": 4.314412313432836e-05, + "loss": 0.0005, + "step": 58796 + }, + { + "epoch": 54.85, + "learning_rate": 4.314365671641791e-05, + "loss": 0.0005, + "step": 58800 + }, + { + "epoch": 54.85, + "learning_rate": 4.3143190298507465e-05, + "loss": 0.0, + "step": 58804 + }, + { + "epoch": 54.86, + "learning_rate": 4.314272388059702e-05, + "loss": 0.0001, + "step": 58808 + }, + { + "epoch": 54.86, + "learning_rate": 4.314225746268657e-05, + "loss": 0.0, + "step": 58812 + }, + { + "epoch": 54.87, + "learning_rate": 4.314179104477612e-05, + "loss": 0.0, + "step": 58816 + }, + { + "epoch": 54.87, + "learning_rate": 4.314132462686568e-05, + "loss": 0.0002, + "step": 58820 + }, + { + "epoch": 54.87, + "learning_rate": 4.3140858208955226e-05, + "loss": 0.0, + "step": 58824 + }, + { + "epoch": 54.88, + "learning_rate": 4.3140391791044774e-05, + "loss": 0.0011, + "step": 58828 + }, + { + "epoch": 54.88, + "learning_rate": 4.313992537313433e-05, + "loss": 0.0, + "step": 58832 + }, + { + "epoch": 54.88, + "learning_rate": 4.3139458955223884e-05, + "loss": 0.0, + "step": 58836 + }, + { + "epoch": 54.89, + "learning_rate": 4.313899253731343e-05, + "loss": 0.0, + "step": 58840 + }, + { + "epoch": 54.89, + "learning_rate": 4.313852611940299e-05, + "loss": 0.0, + "step": 58844 + }, + { + "epoch": 54.9, + "learning_rate": 4.313805970149254e-05, + "loss": 0.0001, + "step": 58848 + }, + { + "epoch": 54.9, + "learning_rate": 4.313759328358209e-05, + "loss": 0.0, + "step": 58852 + }, + { + "epoch": 54.9, + "learning_rate": 4.3137126865671645e-05, + "loss": 0.0, + "step": 58856 + }, + { + "epoch": 54.91, + "learning_rate": 4.313666044776119e-05, + "loss": 0.0001, + "step": 58860 + }, + { + "epoch": 54.91, + "learning_rate": 4.313619402985075e-05, + "loss": 0.0001, + "step": 58864 + }, + { + "epoch": 54.91, + "learning_rate": 4.31357276119403e-05, + "loss": 0.0005, + "step": 58868 + }, + { + "epoch": 54.92, + "learning_rate": 4.313526119402985e-05, + "loss": 0.0, + "step": 58872 + }, + { + "epoch": 54.92, + "learning_rate": 4.3134794776119406e-05, + "loss": 0.0002, + "step": 58876 + }, + { + "epoch": 54.93, + "learning_rate": 4.313432835820896e-05, + "loss": 0.0, + "step": 58880 + }, + { + "epoch": 54.93, + "learning_rate": 4.313386194029851e-05, + "loss": 0.0006, + "step": 58884 + }, + { + "epoch": 54.93, + "learning_rate": 4.313339552238806e-05, + "loss": 0.0, + "step": 58888 + }, + { + "epoch": 54.94, + "learning_rate": 4.313292910447761e-05, + "loss": 0.0, + "step": 58892 + }, + { + "epoch": 54.94, + "learning_rate": 4.313246268656717e-05, + "loss": 0.0003, + "step": 58896 + }, + { + "epoch": 54.94, + "learning_rate": 4.3131996268656715e-05, + "loss": 0.0002, + "step": 58900 + }, + { + "epoch": 54.95, + "learning_rate": 4.313152985074627e-05, + "loss": 0.0, + "step": 58904 + }, + { + "epoch": 54.95, + "learning_rate": 4.3131063432835825e-05, + "loss": 0.0, + "step": 58908 + }, + { + "epoch": 54.96, + "learning_rate": 4.313059701492538e-05, + "loss": 0.0, + "step": 58912 + }, + { + "epoch": 54.96, + "learning_rate": 4.313013059701493e-05, + "loss": 0.0, + "step": 58916 + }, + { + "epoch": 54.96, + "learning_rate": 4.3129664179104476e-05, + "loss": 0.0001, + "step": 58920 + }, + { + "epoch": 54.97, + "learning_rate": 4.312919776119404e-05, + "loss": 0.0, + "step": 58924 + }, + { + "epoch": 54.97, + "learning_rate": 4.3128731343283586e-05, + "loss": 0.0, + "step": 58928 + }, + { + "epoch": 54.97, + "learning_rate": 4.3128264925373134e-05, + "loss": 0.0001, + "step": 58932 + }, + { + "epoch": 54.98, + "learning_rate": 4.312779850746269e-05, + "loss": 0.0, + "step": 58936 + }, + { + "epoch": 54.98, + "learning_rate": 4.3127332089552244e-05, + "loss": 0.0001, + "step": 58940 + }, + { + "epoch": 54.98, + "learning_rate": 4.312686567164179e-05, + "loss": 0.0003, + "step": 58944 + }, + { + "epoch": 54.99, + "learning_rate": 4.312639925373135e-05, + "loss": 0.0, + "step": 58948 + }, + { + "epoch": 54.99, + "learning_rate": 4.3125932835820895e-05, + "loss": 0.0, + "step": 58952 + }, + { + "epoch": 55.0, + "learning_rate": 4.312546641791045e-05, + "loss": 0.0008, + "step": 58956 + }, + { + "epoch": 55.0, + "learning_rate": 4.3125000000000005e-05, + "loss": 0.0004, + "step": 58960 + }, + { + "epoch": 55.0, + "learning_rate": 4.312453358208955e-05, + "loss": 0.0001, + "step": 58964 + }, + { + "epoch": 55.01, + "learning_rate": 4.312406716417911e-05, + "loss": 0.0, + "step": 58968 + }, + { + "epoch": 55.01, + "learning_rate": 4.312360074626866e-05, + "loss": 0.0, + "step": 58972 + }, + { + "epoch": 55.01, + "learning_rate": 4.312313432835821e-05, + "loss": 0.0, + "step": 58976 + }, + { + "epoch": 55.02, + "learning_rate": 4.312266791044776e-05, + "loss": 0.0, + "step": 58980 + }, + { + "epoch": 55.02, + "learning_rate": 4.312220149253732e-05, + "loss": 0.0, + "step": 58984 + }, + { + "epoch": 55.03, + "learning_rate": 4.312173507462687e-05, + "loss": 0.0002, + "step": 58988 + }, + { + "epoch": 55.03, + "learning_rate": 4.312126865671642e-05, + "loss": 0.0001, + "step": 58992 + }, + { + "epoch": 55.03, + "learning_rate": 4.312080223880597e-05, + "loss": 0.0, + "step": 58996 + }, + { + "epoch": 55.04, + "learning_rate": 4.312033582089553e-05, + "loss": 0.0, + "step": 59000 + }, + { + "epoch": 55.04, + "eval_exact_match": 0.7446808510638298, + "eval_exec": 0.781431334622824, + "eval_loss": 0.4688223898410797, + "eval_runtime": 1134.1914, + "eval_samples_per_second": 0.912, + "step": 59000 + }, + { + "epoch": 55.04, + "learning_rate": 4.3119869402985075e-05, + "loss": 0.0026, + "step": 59004 + }, + { + "epoch": 55.04, + "learning_rate": 4.311940298507463e-05, + "loss": 0.0, + "step": 59008 + }, + { + "epoch": 55.05, + "learning_rate": 4.311893656716418e-05, + "loss": 0.0, + "step": 59012 + }, + { + "epoch": 55.05, + "learning_rate": 4.311847014925373e-05, + "loss": 0.0, + "step": 59016 + }, + { + "epoch": 55.06, + "learning_rate": 4.311800373134329e-05, + "loss": 0.0095, + "step": 59020 + }, + { + "epoch": 55.06, + "learning_rate": 4.3117537313432836e-05, + "loss": 0.0001, + "step": 59024 + }, + { + "epoch": 55.06, + "learning_rate": 4.311707089552239e-05, + "loss": 0.0, + "step": 59028 + }, + { + "epoch": 55.07, + "learning_rate": 4.3116604477611946e-05, + "loss": 0.0001, + "step": 59032 + }, + { + "epoch": 55.07, + "learning_rate": 4.3116138059701494e-05, + "loss": 0.0, + "step": 59036 + }, + { + "epoch": 55.07, + "learning_rate": 4.311567164179104e-05, + "loss": 0.0, + "step": 59040 + }, + { + "epoch": 55.08, + "learning_rate": 4.3115205223880604e-05, + "loss": 0.0, + "step": 59044 + }, + { + "epoch": 55.08, + "learning_rate": 4.311473880597015e-05, + "loss": 0.0002, + "step": 59048 + }, + { + "epoch": 55.09, + "learning_rate": 4.31142723880597e-05, + "loss": 0.0, + "step": 59052 + }, + { + "epoch": 55.09, + "learning_rate": 4.3113805970149255e-05, + "loss": 0.0, + "step": 59056 + }, + { + "epoch": 55.09, + "learning_rate": 4.311333955223881e-05, + "loss": 0.0, + "step": 59060 + }, + { + "epoch": 55.1, + "learning_rate": 4.311287313432836e-05, + "loss": 0.0004, + "step": 59064 + }, + { + "epoch": 55.1, + "learning_rate": 4.311240671641791e-05, + "loss": 0.0002, + "step": 59068 + }, + { + "epoch": 55.1, + "learning_rate": 4.311194029850746e-05, + "loss": 0.0, + "step": 59072 + }, + { + "epoch": 55.11, + "learning_rate": 4.311147388059702e-05, + "loss": 0.0008, + "step": 59076 + }, + { + "epoch": 55.11, + "learning_rate": 4.311100746268657e-05, + "loss": 0.0, + "step": 59080 + }, + { + "epoch": 55.12, + "learning_rate": 4.311054104477612e-05, + "loss": 0.0003, + "step": 59084 + }, + { + "epoch": 55.12, + "learning_rate": 4.3110074626865674e-05, + "loss": 0.0, + "step": 59088 + }, + { + "epoch": 55.12, + "learning_rate": 4.310960820895523e-05, + "loss": 0.0023, + "step": 59092 + }, + { + "epoch": 55.13, + "learning_rate": 4.310914179104478e-05, + "loss": 0.001, + "step": 59096 + }, + { + "epoch": 55.13, + "learning_rate": 4.310867537313433e-05, + "loss": 0.0, + "step": 59100 + }, + { + "epoch": 55.13, + "learning_rate": 4.3108208955223886e-05, + "loss": 0.0, + "step": 59104 + }, + { + "epoch": 55.14, + "learning_rate": 4.3107742537313435e-05, + "loss": 0.0007, + "step": 59108 + }, + { + "epoch": 55.14, + "learning_rate": 4.310727611940299e-05, + "loss": 0.0, + "step": 59112 + }, + { + "epoch": 55.15, + "learning_rate": 4.310680970149254e-05, + "loss": 0.0, + "step": 59116 + }, + { + "epoch": 55.15, + "learning_rate": 4.310634328358209e-05, + "loss": 0.0045, + "step": 59120 + }, + { + "epoch": 55.15, + "learning_rate": 4.310587686567165e-05, + "loss": 0.0012, + "step": 59124 + }, + { + "epoch": 55.16, + "learning_rate": 4.3105410447761196e-05, + "loss": 0.0028, + "step": 59128 + }, + { + "epoch": 55.16, + "learning_rate": 4.3104944029850744e-05, + "loss": 0.0, + "step": 59132 + }, + { + "epoch": 55.16, + "learning_rate": 4.3104477611940305e-05, + "loss": 0.0, + "step": 59136 + }, + { + "epoch": 55.17, + "learning_rate": 4.3104011194029854e-05, + "loss": 0.0, + "step": 59140 + }, + { + "epoch": 55.17, + "learning_rate": 4.31035447761194e-05, + "loss": 0.0, + "step": 59144 + }, + { + "epoch": 55.18, + "learning_rate": 4.3103078358208957e-05, + "loss": 0.0, + "step": 59148 + }, + { + "epoch": 55.18, + "learning_rate": 4.310261194029851e-05, + "loss": 0.0011, + "step": 59152 + }, + { + "epoch": 55.18, + "learning_rate": 4.310214552238806e-05, + "loss": 0.0, + "step": 59156 + }, + { + "epoch": 55.19, + "learning_rate": 4.3101679104477615e-05, + "loss": 0.0, + "step": 59160 + }, + { + "epoch": 55.19, + "learning_rate": 4.310121268656717e-05, + "loss": 0.0, + "step": 59164 + }, + { + "epoch": 55.19, + "learning_rate": 4.310074626865672e-05, + "loss": 0.0, + "step": 59168 + }, + { + "epoch": 55.2, + "learning_rate": 4.310027985074627e-05, + "loss": 0.0, + "step": 59172 + }, + { + "epoch": 55.2, + "learning_rate": 4.309981343283582e-05, + "loss": 0.0, + "step": 59176 + }, + { + "epoch": 55.21, + "learning_rate": 4.3099347014925376e-05, + "loss": 0.0, + "step": 59180 + }, + { + "epoch": 55.21, + "learning_rate": 4.309888059701493e-05, + "loss": 0.0001, + "step": 59184 + }, + { + "epoch": 55.21, + "learning_rate": 4.309841417910448e-05, + "loss": 0.0, + "step": 59188 + }, + { + "epoch": 55.22, + "learning_rate": 4.309794776119403e-05, + "loss": 0.0001, + "step": 59192 + }, + { + "epoch": 55.22, + "learning_rate": 4.309748134328359e-05, + "loss": 0.0001, + "step": 59196 + }, + { + "epoch": 55.22, + "learning_rate": 4.3097014925373137e-05, + "loss": 0.0, + "step": 59200 + }, + { + "epoch": 55.23, + "learning_rate": 4.3096548507462685e-05, + "loss": 0.0001, + "step": 59204 + }, + { + "epoch": 55.23, + "learning_rate": 4.309608208955224e-05, + "loss": 0.0, + "step": 59208 + }, + { + "epoch": 55.24, + "learning_rate": 4.3095615671641794e-05, + "loss": 0.0, + "step": 59212 + }, + { + "epoch": 55.24, + "learning_rate": 4.309514925373134e-05, + "loss": 0.0044, + "step": 59216 + }, + { + "epoch": 55.24, + "learning_rate": 4.30946828358209e-05, + "loss": 0.0009, + "step": 59220 + }, + { + "epoch": 55.25, + "learning_rate": 4.309421641791045e-05, + "loss": 0.0001, + "step": 59224 + }, + { + "epoch": 55.25, + "learning_rate": 4.309375e-05, + "loss": 0.0, + "step": 59228 + }, + { + "epoch": 55.25, + "learning_rate": 4.3093283582089555e-05, + "loss": 0.0001, + "step": 59232 + }, + { + "epoch": 55.26, + "learning_rate": 4.3092817164179104e-05, + "loss": 0.0, + "step": 59236 + }, + { + "epoch": 55.26, + "learning_rate": 4.309235074626866e-05, + "loss": 0.0001, + "step": 59240 + }, + { + "epoch": 55.26, + "learning_rate": 4.3091884328358213e-05, + "loss": 0.0, + "step": 59244 + }, + { + "epoch": 55.27, + "learning_rate": 4.309141791044776e-05, + "loss": 0.0011, + "step": 59248 + }, + { + "epoch": 55.27, + "learning_rate": 4.3090951492537316e-05, + "loss": 0.0, + "step": 59252 + }, + { + "epoch": 55.28, + "learning_rate": 4.309048507462687e-05, + "loss": 0.0, + "step": 59256 + }, + { + "epoch": 55.28, + "learning_rate": 4.309001865671642e-05, + "loss": 0.0, + "step": 59260 + }, + { + "epoch": 55.28, + "learning_rate": 4.3089552238805974e-05, + "loss": 0.0, + "step": 59264 + }, + { + "epoch": 55.29, + "learning_rate": 4.308908582089552e-05, + "loss": 0.0, + "step": 59268 + }, + { + "epoch": 55.29, + "learning_rate": 4.308861940298508e-05, + "loss": 0.0001, + "step": 59272 + }, + { + "epoch": 55.29, + "learning_rate": 4.308815298507463e-05, + "loss": 0.0044, + "step": 59276 + }, + { + "epoch": 55.3, + "learning_rate": 4.308768656716418e-05, + "loss": 0.0002, + "step": 59280 + }, + { + "epoch": 55.3, + "learning_rate": 4.308722014925373e-05, + "loss": 0.0, + "step": 59284 + }, + { + "epoch": 55.31, + "learning_rate": 4.308675373134329e-05, + "loss": 0.0, + "step": 59288 + }, + { + "epoch": 55.31, + "learning_rate": 4.308628731343284e-05, + "loss": 0.0, + "step": 59292 + }, + { + "epoch": 55.31, + "learning_rate": 4.3085820895522387e-05, + "loss": 0.0, + "step": 59296 + }, + { + "epoch": 55.32, + "learning_rate": 4.308535447761194e-05, + "loss": 0.0, + "step": 59300 + }, + { + "epoch": 55.32, + "learning_rate": 4.3084888059701496e-05, + "loss": 0.0, + "step": 59304 + }, + { + "epoch": 55.32, + "learning_rate": 4.3084421641791044e-05, + "loss": 0.0, + "step": 59308 + }, + { + "epoch": 55.33, + "learning_rate": 4.30839552238806e-05, + "loss": 0.0001, + "step": 59312 + }, + { + "epoch": 55.33, + "learning_rate": 4.3083488805970154e-05, + "loss": 0.0, + "step": 59316 + }, + { + "epoch": 55.34, + "learning_rate": 4.30830223880597e-05, + "loss": 0.0, + "step": 59320 + }, + { + "epoch": 55.34, + "learning_rate": 4.308255597014926e-05, + "loss": 0.0, + "step": 59324 + }, + { + "epoch": 55.34, + "learning_rate": 4.3082089552238805e-05, + "loss": 0.0001, + "step": 59328 + }, + { + "epoch": 55.35, + "learning_rate": 4.308162313432836e-05, + "loss": 0.0, + "step": 59332 + }, + { + "epoch": 55.35, + "learning_rate": 4.3081156716417915e-05, + "loss": 0.0, + "step": 59336 + }, + { + "epoch": 55.35, + "learning_rate": 4.3080690298507463e-05, + "loss": 0.0, + "step": 59340 + }, + { + "epoch": 55.36, + "learning_rate": 4.308022388059701e-05, + "loss": 0.0, + "step": 59344 + }, + { + "epoch": 55.36, + "learning_rate": 4.307975746268657e-05, + "loss": 0.0, + "step": 59348 + }, + { + "epoch": 55.37, + "learning_rate": 4.307929104477612e-05, + "loss": 0.0, + "step": 59352 + }, + { + "epoch": 55.37, + "learning_rate": 4.307882462686567e-05, + "loss": 0.0, + "step": 59356 + }, + { + "epoch": 55.37, + "learning_rate": 4.3078358208955224e-05, + "loss": 0.0, + "step": 59360 + }, + { + "epoch": 55.38, + "learning_rate": 4.307789179104478e-05, + "loss": 0.0, + "step": 59364 + }, + { + "epoch": 55.38, + "learning_rate": 4.307742537313433e-05, + "loss": 0.0004, + "step": 59368 + }, + { + "epoch": 55.38, + "learning_rate": 4.307695895522388e-05, + "loss": 0.0, + "step": 59372 + }, + { + "epoch": 55.39, + "learning_rate": 4.307649253731344e-05, + "loss": 0.0, + "step": 59376 + }, + { + "epoch": 55.39, + "learning_rate": 4.3076026119402985e-05, + "loss": 0.0, + "step": 59380 + }, + { + "epoch": 55.4, + "learning_rate": 4.307555970149254e-05, + "loss": 0.0045, + "step": 59384 + }, + { + "epoch": 55.4, + "learning_rate": 4.307509328358209e-05, + "loss": 0.0, + "step": 59388 + }, + { + "epoch": 55.4, + "learning_rate": 4.307462686567164e-05, + "loss": 0.0, + "step": 59392 + }, + { + "epoch": 55.41, + "learning_rate": 4.30741604477612e-05, + "loss": 0.0, + "step": 59396 + }, + { + "epoch": 55.41, + "learning_rate": 4.3073694029850746e-05, + "loss": 0.0001, + "step": 59400 + }, + { + "epoch": 55.41, + "learning_rate": 4.30732276119403e-05, + "loss": 0.0001, + "step": 59404 + }, + { + "epoch": 55.42, + "learning_rate": 4.3072761194029856e-05, + "loss": 0.0, + "step": 59408 + }, + { + "epoch": 55.42, + "learning_rate": 4.3072294776119404e-05, + "loss": 0.0, + "step": 59412 + }, + { + "epoch": 55.43, + "learning_rate": 4.307182835820896e-05, + "loss": 0.0001, + "step": 59416 + }, + { + "epoch": 55.43, + "learning_rate": 4.307136194029851e-05, + "loss": 0.0, + "step": 59420 + }, + { + "epoch": 55.43, + "learning_rate": 4.307089552238806e-05, + "loss": 0.0002, + "step": 59424 + }, + { + "epoch": 55.44, + "learning_rate": 4.307042910447762e-05, + "loss": 0.0025, + "step": 59428 + }, + { + "epoch": 55.44, + "learning_rate": 4.3069962686567165e-05, + "loss": 0.0, + "step": 59432 + }, + { + "epoch": 55.44, + "learning_rate": 4.306949626865672e-05, + "loss": 0.0, + "step": 59436 + }, + { + "epoch": 55.45, + "learning_rate": 4.3069029850746275e-05, + "loss": 0.0, + "step": 59440 + }, + { + "epoch": 55.45, + "learning_rate": 4.306856343283582e-05, + "loss": 0.0001, + "step": 59444 + }, + { + "epoch": 55.46, + "learning_rate": 4.306809701492537e-05, + "loss": 0.0, + "step": 59448 + }, + { + "epoch": 55.46, + "learning_rate": 4.3067630597014926e-05, + "loss": 0.0, + "step": 59452 + }, + { + "epoch": 55.46, + "learning_rate": 4.306716417910448e-05, + "loss": 0.0, + "step": 59456 + }, + { + "epoch": 55.47, + "learning_rate": 4.306669776119403e-05, + "loss": 0.0, + "step": 59460 + }, + { + "epoch": 55.47, + "learning_rate": 4.3066231343283584e-05, + "loss": 0.0, + "step": 59464 + }, + { + "epoch": 55.47, + "learning_rate": 4.306576492537314e-05, + "loss": 0.0, + "step": 59468 + }, + { + "epoch": 55.48, + "learning_rate": 4.306529850746269e-05, + "loss": 0.0003, + "step": 59472 + }, + { + "epoch": 55.48, + "learning_rate": 4.306483208955224e-05, + "loss": 0.0003, + "step": 59476 + }, + { + "epoch": 55.49, + "learning_rate": 4.306436567164179e-05, + "loss": 0.0002, + "step": 59480 + }, + { + "epoch": 55.49, + "learning_rate": 4.3063899253731345e-05, + "loss": 0.0, + "step": 59484 + }, + { + "epoch": 55.49, + "learning_rate": 4.30634328358209e-05, + "loss": 0.0006, + "step": 59488 + }, + { + "epoch": 55.5, + "learning_rate": 4.306296641791045e-05, + "loss": 0.0007, + "step": 59492 + }, + { + "epoch": 55.5, + "learning_rate": 4.30625e-05, + "loss": 0.0002, + "step": 59496 + }, + { + "epoch": 55.5, + "learning_rate": 4.306203358208956e-05, + "loss": 0.0003, + "step": 59500 + }, + { + "epoch": 55.5, + "eval_exact_match": 0.7446808510638298, + "eval_exec": 0.7756286266924565, + "eval_loss": 0.48064661026000977, + "eval_runtime": 1173.6666, + "eval_samples_per_second": 0.881, + "step": 59500 + }, + { + "epoch": 55.51, + "learning_rate": 4.3061567164179106e-05, + "loss": 0.0001, + "step": 59504 + }, + { + "epoch": 55.51, + "learning_rate": 4.3061100746268654e-05, + "loss": 0.0, + "step": 59508 + }, + { + "epoch": 55.51, + "learning_rate": 4.306063432835821e-05, + "loss": 0.0001, + "step": 59512 + }, + { + "epoch": 55.52, + "learning_rate": 4.3060167910447764e-05, + "loss": 0.0001, + "step": 59516 + }, + { + "epoch": 55.52, + "learning_rate": 4.305970149253731e-05, + "loss": 0.0, + "step": 59520 + }, + { + "epoch": 55.53, + "learning_rate": 4.305923507462687e-05, + "loss": 0.001, + "step": 59524 + }, + { + "epoch": 55.53, + "learning_rate": 4.305876865671642e-05, + "loss": 0.0004, + "step": 59528 + }, + { + "epoch": 55.53, + "learning_rate": 4.305830223880597e-05, + "loss": 0.0002, + "step": 59532 + }, + { + "epoch": 55.54, + "learning_rate": 4.3057835820895525e-05, + "loss": 0.0, + "step": 59536 + }, + { + "epoch": 55.54, + "learning_rate": 4.305736940298507e-05, + "loss": 0.0, + "step": 59540 + }, + { + "epoch": 55.54, + "learning_rate": 4.305690298507463e-05, + "loss": 0.0, + "step": 59544 + }, + { + "epoch": 55.55, + "learning_rate": 4.305643656716418e-05, + "loss": 0.0001, + "step": 59548 + }, + { + "epoch": 55.55, + "learning_rate": 4.305597014925373e-05, + "loss": 0.0, + "step": 59552 + }, + { + "epoch": 55.56, + "learning_rate": 4.3055503731343286e-05, + "loss": 0.0003, + "step": 59556 + }, + { + "epoch": 55.56, + "learning_rate": 4.305503731343284e-05, + "loss": 0.0019, + "step": 59560 + }, + { + "epoch": 55.56, + "learning_rate": 4.305457089552239e-05, + "loss": 0.0, + "step": 59564 + }, + { + "epoch": 55.57, + "learning_rate": 4.3054104477611944e-05, + "loss": 0.0, + "step": 59568 + }, + { + "epoch": 55.57, + "learning_rate": 4.305363805970149e-05, + "loss": 0.0012, + "step": 59572 + }, + { + "epoch": 55.57, + "learning_rate": 4.305317164179105e-05, + "loss": 0.0, + "step": 59576 + }, + { + "epoch": 55.58, + "learning_rate": 4.30527052238806e-05, + "loss": 0.0, + "step": 59580 + }, + { + "epoch": 55.58, + "learning_rate": 4.305223880597015e-05, + "loss": 0.0, + "step": 59584 + }, + { + "epoch": 55.59, + "learning_rate": 4.3051772388059705e-05, + "loss": 0.0, + "step": 59588 + }, + { + "epoch": 55.59, + "learning_rate": 4.305130597014926e-05, + "loss": 0.0, + "step": 59592 + }, + { + "epoch": 55.59, + "learning_rate": 4.305083955223881e-05, + "loss": 0.0, + "step": 59596 + }, + { + "epoch": 55.6, + "learning_rate": 4.3050373134328356e-05, + "loss": 0.0003, + "step": 59600 + }, + { + "epoch": 55.6, + "learning_rate": 4.304990671641792e-05, + "loss": 0.0084, + "step": 59604 + }, + { + "epoch": 55.6, + "learning_rate": 4.3049440298507466e-05, + "loss": 0.0001, + "step": 59608 + }, + { + "epoch": 55.61, + "learning_rate": 4.3048973880597014e-05, + "loss": 0.0, + "step": 59612 + }, + { + "epoch": 55.61, + "learning_rate": 4.304850746268657e-05, + "loss": 0.0, + "step": 59616 + }, + { + "epoch": 55.62, + "learning_rate": 4.3048041044776124e-05, + "loss": 0.0, + "step": 59620 + }, + { + "epoch": 55.62, + "learning_rate": 4.304757462686567e-05, + "loss": 0.0, + "step": 59624 + }, + { + "epoch": 55.62, + "learning_rate": 4.304710820895523e-05, + "loss": 0.0, + "step": 59628 + }, + { + "epoch": 55.63, + "learning_rate": 4.3046641791044775e-05, + "loss": 0.0, + "step": 59632 + }, + { + "epoch": 55.63, + "learning_rate": 4.304617537313433e-05, + "loss": 0.0, + "step": 59636 + }, + { + "epoch": 55.63, + "learning_rate": 4.3045708955223885e-05, + "loss": 0.0, + "step": 59640 + }, + { + "epoch": 55.64, + "learning_rate": 4.304524253731343e-05, + "loss": 0.0, + "step": 59644 + }, + { + "epoch": 55.64, + "learning_rate": 4.304477611940299e-05, + "loss": 0.0, + "step": 59648 + }, + { + "epoch": 55.65, + "learning_rate": 4.304430970149254e-05, + "loss": 0.0, + "step": 59652 + }, + { + "epoch": 55.65, + "learning_rate": 4.304384328358209e-05, + "loss": 0.0001, + "step": 59656 + }, + { + "epoch": 55.65, + "learning_rate": 4.304337686567164e-05, + "loss": 0.0, + "step": 59660 + }, + { + "epoch": 55.66, + "learning_rate": 4.30429104477612e-05, + "loss": 0.0, + "step": 59664 + }, + { + "epoch": 55.66, + "learning_rate": 4.304244402985075e-05, + "loss": 0.0, + "step": 59668 + }, + { + "epoch": 55.66, + "learning_rate": 4.30419776119403e-05, + "loss": 0.0, + "step": 59672 + }, + { + "epoch": 55.67, + "learning_rate": 4.304151119402985e-05, + "loss": 0.0, + "step": 59676 + }, + { + "epoch": 55.67, + "learning_rate": 4.304104477611941e-05, + "loss": 0.0, + "step": 59680 + }, + { + "epoch": 55.68, + "learning_rate": 4.3040578358208955e-05, + "loss": 0.0, + "step": 59684 + }, + { + "epoch": 55.68, + "learning_rate": 4.304011194029851e-05, + "loss": 0.0, + "step": 59688 + }, + { + "epoch": 55.68, + "learning_rate": 4.303964552238806e-05, + "loss": 0.0001, + "step": 59692 + }, + { + "epoch": 55.69, + "learning_rate": 4.303917910447761e-05, + "loss": 0.0, + "step": 59696 + }, + { + "epoch": 55.69, + "learning_rate": 4.303871268656717e-05, + "loss": 0.0, + "step": 59700 + }, + { + "epoch": 55.69, + "learning_rate": 4.3038246268656716e-05, + "loss": 0.0, + "step": 59704 + }, + { + "epoch": 55.7, + "learning_rate": 4.303777985074627e-05, + "loss": 0.0, + "step": 59708 + }, + { + "epoch": 55.7, + "learning_rate": 4.3037313432835826e-05, + "loss": 0.0001, + "step": 59712 + }, + { + "epoch": 55.71, + "learning_rate": 4.3036847014925374e-05, + "loss": 0.0, + "step": 59716 + }, + { + "epoch": 55.71, + "learning_rate": 4.303638059701492e-05, + "loss": 0.0, + "step": 59720 + }, + { + "epoch": 55.71, + "learning_rate": 4.3035914179104484e-05, + "loss": 0.0, + "step": 59724 + }, + { + "epoch": 55.72, + "learning_rate": 4.303544776119403e-05, + "loss": 0.0037, + "step": 59728 + }, + { + "epoch": 55.72, + "learning_rate": 4.303498134328359e-05, + "loss": 0.0, + "step": 59732 + }, + { + "epoch": 55.72, + "learning_rate": 4.3034514925373135e-05, + "loss": 0.0, + "step": 59736 + }, + { + "epoch": 55.73, + "learning_rate": 4.303404850746269e-05, + "loss": 0.0, + "step": 59740 + }, + { + "epoch": 55.73, + "learning_rate": 4.3033582089552245e-05, + "loss": 0.0, + "step": 59744 + }, + { + "epoch": 55.73, + "learning_rate": 4.303311567164179e-05, + "loss": 0.0002, + "step": 59748 + }, + { + "epoch": 55.74, + "learning_rate": 4.303264925373134e-05, + "loss": 0.0, + "step": 59752 + }, + { + "epoch": 55.74, + "learning_rate": 4.30321828358209e-05, + "loss": 0.0, + "step": 59756 + }, + { + "epoch": 55.75, + "learning_rate": 4.303171641791045e-05, + "loss": 0.0, + "step": 59760 + }, + { + "epoch": 55.75, + "learning_rate": 4.303125e-05, + "loss": 0.0, + "step": 59764 + }, + { + "epoch": 55.75, + "learning_rate": 4.3030783582089554e-05, + "loss": 0.0, + "step": 59768 + }, + { + "epoch": 55.76, + "learning_rate": 4.303031716417911e-05, + "loss": 0.0001, + "step": 59772 + }, + { + "epoch": 55.76, + "learning_rate": 4.302985074626866e-05, + "loss": 0.0, + "step": 59776 + }, + { + "epoch": 55.76, + "learning_rate": 4.302938432835821e-05, + "loss": 0.0, + "step": 59780 + }, + { + "epoch": 55.77, + "learning_rate": 4.302891791044777e-05, + "loss": 0.0, + "step": 59784 + }, + { + "epoch": 55.77, + "learning_rate": 4.3028451492537315e-05, + "loss": 0.0, + "step": 59788 + }, + { + "epoch": 55.78, + "learning_rate": 4.302798507462687e-05, + "loss": 0.0, + "step": 59792 + }, + { + "epoch": 55.78, + "learning_rate": 4.302751865671642e-05, + "loss": 0.0001, + "step": 59796 + }, + { + "epoch": 55.78, + "learning_rate": 4.302705223880597e-05, + "loss": 0.0, + "step": 59800 + }, + { + "epoch": 55.79, + "learning_rate": 4.302658582089553e-05, + "loss": 0.0, + "step": 59804 + }, + { + "epoch": 55.79, + "learning_rate": 4.3026119402985076e-05, + "loss": 0.0001, + "step": 59808 + }, + { + "epoch": 55.79, + "learning_rate": 4.3025652985074624e-05, + "loss": 0.0, + "step": 59812 + }, + { + "epoch": 55.8, + "learning_rate": 4.3025186567164186e-05, + "loss": 0.0007, + "step": 59816 + }, + { + "epoch": 55.8, + "learning_rate": 4.3024720149253734e-05, + "loss": 0.0, + "step": 59820 + }, + { + "epoch": 55.81, + "learning_rate": 4.302425373134328e-05, + "loss": 0.0, + "step": 59824 + }, + { + "epoch": 55.81, + "learning_rate": 4.302378731343284e-05, + "loss": 0.0003, + "step": 59828 + }, + { + "epoch": 55.81, + "learning_rate": 4.302332089552239e-05, + "loss": 0.0, + "step": 59832 + }, + { + "epoch": 55.82, + "learning_rate": 4.302285447761194e-05, + "loss": 0.0, + "step": 59836 + }, + { + "epoch": 55.82, + "learning_rate": 4.3022388059701495e-05, + "loss": 0.0004, + "step": 59840 + }, + { + "epoch": 55.82, + "learning_rate": 4.302192164179105e-05, + "loss": 0.0001, + "step": 59844 + }, + { + "epoch": 55.83, + "learning_rate": 4.30214552238806e-05, + "loss": 0.0001, + "step": 59848 + }, + { + "epoch": 55.83, + "learning_rate": 4.302098880597015e-05, + "loss": 0.0, + "step": 59852 + }, + { + "epoch": 55.84, + "learning_rate": 4.30205223880597e-05, + "loss": 0.0, + "step": 59856 + }, + { + "epoch": 55.84, + "learning_rate": 4.3020055970149256e-05, + "loss": 0.0, + "step": 59860 + }, + { + "epoch": 55.84, + "learning_rate": 4.301958955223881e-05, + "loss": 0.0, + "step": 59864 + }, + { + "epoch": 55.85, + "learning_rate": 4.301912313432836e-05, + "loss": 0.0015, + "step": 59868 + }, + { + "epoch": 55.85, + "learning_rate": 4.301865671641791e-05, + "loss": 0.0, + "step": 59872 + }, + { + "epoch": 55.85, + "learning_rate": 4.301819029850747e-05, + "loss": 0.0, + "step": 59876 + }, + { + "epoch": 55.86, + "learning_rate": 4.301772388059702e-05, + "loss": 0.0, + "step": 59880 + }, + { + "epoch": 55.86, + "learning_rate": 4.3017257462686565e-05, + "loss": 0.0001, + "step": 59884 + }, + { + "epoch": 55.87, + "learning_rate": 4.301679104477612e-05, + "loss": 0.0001, + "step": 59888 + }, + { + "epoch": 55.87, + "learning_rate": 4.3016324626865675e-05, + "loss": 0.0, + "step": 59892 + }, + { + "epoch": 55.87, + "learning_rate": 4.301585820895523e-05, + "loss": 0.0, + "step": 59896 + }, + { + "epoch": 55.88, + "learning_rate": 4.301539179104478e-05, + "loss": 0.0001, + "step": 59900 + }, + { + "epoch": 55.88, + "learning_rate": 4.301492537313433e-05, + "loss": 0.0, + "step": 59904 + }, + { + "epoch": 55.88, + "learning_rate": 4.301445895522389e-05, + "loss": 0.0, + "step": 59908 + }, + { + "epoch": 55.89, + "learning_rate": 4.3013992537313436e-05, + "loss": 0.0009, + "step": 59912 + }, + { + "epoch": 55.89, + "learning_rate": 4.3013526119402984e-05, + "loss": 0.0, + "step": 59916 + }, + { + "epoch": 55.9, + "learning_rate": 4.301305970149254e-05, + "loss": 0.0, + "step": 59920 + }, + { + "epoch": 55.9, + "learning_rate": 4.3012593283582094e-05, + "loss": 0.0, + "step": 59924 + }, + { + "epoch": 55.9, + "learning_rate": 4.301212686567164e-05, + "loss": 0.0, + "step": 59928 + }, + { + "epoch": 55.91, + "learning_rate": 4.30116604477612e-05, + "loss": 0.0, + "step": 59932 + }, + { + "epoch": 55.91, + "learning_rate": 4.301119402985075e-05, + "loss": 0.0, + "step": 59936 + }, + { + "epoch": 55.91, + "learning_rate": 4.30107276119403e-05, + "loss": 0.0, + "step": 59940 + }, + { + "epoch": 55.92, + "learning_rate": 4.3010261194029855e-05, + "loss": 0.0, + "step": 59944 + }, + { + "epoch": 55.92, + "learning_rate": 4.30097947761194e-05, + "loss": 0.0, + "step": 59948 + }, + { + "epoch": 55.93, + "learning_rate": 4.300932835820896e-05, + "loss": 0.0, + "step": 59952 + }, + { + "epoch": 55.93, + "learning_rate": 4.300886194029851e-05, + "loss": 0.0, + "step": 59956 + }, + { + "epoch": 55.93, + "learning_rate": 4.300839552238806e-05, + "loss": 0.0, + "step": 59960 + }, + { + "epoch": 55.94, + "learning_rate": 4.300792910447761e-05, + "loss": 0.0, + "step": 59964 + }, + { + "epoch": 55.94, + "learning_rate": 4.300746268656717e-05, + "loss": 0.0008, + "step": 59968 + }, + { + "epoch": 55.94, + "learning_rate": 4.300699626865672e-05, + "loss": 0.0001, + "step": 59972 + }, + { + "epoch": 55.95, + "learning_rate": 4.300652985074627e-05, + "loss": 0.0, + "step": 59976 + }, + { + "epoch": 55.95, + "learning_rate": 4.300606343283582e-05, + "loss": 0.0, + "step": 59980 + }, + { + "epoch": 55.96, + "learning_rate": 4.300559701492538e-05, + "loss": 0.0, + "step": 59984 + }, + { + "epoch": 55.96, + "learning_rate": 4.3005130597014925e-05, + "loss": 0.0004, + "step": 59988 + }, + { + "epoch": 55.96, + "learning_rate": 4.300466417910448e-05, + "loss": 0.0, + "step": 59992 + }, + { + "epoch": 55.97, + "learning_rate": 4.3004197761194035e-05, + "loss": 0.0, + "step": 59996 + }, + { + "epoch": 55.97, + "learning_rate": 4.300373134328358e-05, + "loss": 0.0, + "step": 60000 + }, + { + "epoch": 55.97, + "eval_exact_match": 0.7514506769825918, + "eval_exec": 0.7843326885880078, + "eval_loss": 0.5125755667686462, + "eval_runtime": 1138.056, + "eval_samples_per_second": 0.909, + "step": 60000 + }, + { + "epoch": 55.97, + "learning_rate": 4.300326492537314e-05, + "loss": 0.0, + "step": 60004 + }, + { + "epoch": 55.98, + "learning_rate": 4.3002798507462686e-05, + "loss": 0.0, + "step": 60008 + }, + { + "epoch": 55.98, + "learning_rate": 4.300233208955224e-05, + "loss": 0.0001, + "step": 60012 + }, + { + "epoch": 55.98, + "learning_rate": 4.3001865671641796e-05, + "loss": 0.0, + "step": 60016 + }, + { + "epoch": 55.99, + "learning_rate": 4.3001399253731344e-05, + "loss": 0.0, + "step": 60020 + }, + { + "epoch": 55.99, + "learning_rate": 4.300093283582089e-05, + "loss": 0.0, + "step": 60024 + }, + { + "epoch": 56.0, + "learning_rate": 4.3000466417910454e-05, + "loss": 0.0, + "step": 60028 + }, + { + "epoch": 56.0, + "learning_rate": 4.3e-05, + "loss": 0.0, + "step": 60032 + }, + { + "epoch": 56.0, + "learning_rate": 4.299953358208955e-05, + "loss": 0.0, + "step": 60036 + }, + { + "epoch": 56.01, + "learning_rate": 4.2999067164179105e-05, + "loss": 0.0001, + "step": 60040 + }, + { + "epoch": 56.01, + "learning_rate": 4.299860074626866e-05, + "loss": 0.0001, + "step": 60044 + }, + { + "epoch": 56.01, + "learning_rate": 4.299813432835821e-05, + "loss": 0.0014, + "step": 60048 + }, + { + "epoch": 56.02, + "learning_rate": 4.299766791044776e-05, + "loss": 0.0, + "step": 60052 + }, + { + "epoch": 56.02, + "learning_rate": 4.299720149253732e-05, + "loss": 0.0, + "step": 60056 + }, + { + "epoch": 56.03, + "learning_rate": 4.299673507462687e-05, + "loss": 0.0, + "step": 60060 + }, + { + "epoch": 56.03, + "learning_rate": 4.299626865671642e-05, + "loss": 0.0011, + "step": 60064 + }, + { + "epoch": 56.03, + "learning_rate": 4.299580223880597e-05, + "loss": 0.0005, + "step": 60068 + }, + { + "epoch": 56.04, + "learning_rate": 4.299533582089553e-05, + "loss": 0.0, + "step": 60072 + }, + { + "epoch": 56.04, + "learning_rate": 4.299486940298508e-05, + "loss": 0.0, + "step": 60076 + }, + { + "epoch": 56.04, + "learning_rate": 4.299440298507463e-05, + "loss": 0.0, + "step": 60080 + }, + { + "epoch": 56.05, + "learning_rate": 4.299393656716418e-05, + "loss": 0.0, + "step": 60084 + }, + { + "epoch": 56.05, + "learning_rate": 4.2993470149253736e-05, + "loss": 0.0, + "step": 60088 + }, + { + "epoch": 56.06, + "learning_rate": 4.2993003731343285e-05, + "loss": 0.0, + "step": 60092 + }, + { + "epoch": 56.06, + "learning_rate": 4.299253731343284e-05, + "loss": 0.0, + "step": 60096 + }, + { + "epoch": 56.06, + "learning_rate": 4.299207089552239e-05, + "loss": 0.0, + "step": 60100 + }, + { + "epoch": 56.07, + "learning_rate": 4.299160447761194e-05, + "loss": 0.0, + "step": 60104 + }, + { + "epoch": 56.07, + "learning_rate": 4.29911380597015e-05, + "loss": 0.0, + "step": 60108 + }, + { + "epoch": 56.07, + "learning_rate": 4.2990671641791046e-05, + "loss": 0.0005, + "step": 60112 + }, + { + "epoch": 56.08, + "learning_rate": 4.29902052238806e-05, + "loss": 0.0001, + "step": 60116 + }, + { + "epoch": 56.08, + "learning_rate": 4.2989738805970155e-05, + "loss": 0.0, + "step": 60120 + }, + { + "epoch": 56.09, + "learning_rate": 4.2989272388059704e-05, + "loss": 0.0, + "step": 60124 + }, + { + "epoch": 56.09, + "learning_rate": 4.298880597014925e-05, + "loss": 0.0001, + "step": 60128 + }, + { + "epoch": 56.09, + "learning_rate": 4.2988339552238807e-05, + "loss": 0.0, + "step": 60132 + }, + { + "epoch": 56.1, + "learning_rate": 4.298787313432836e-05, + "loss": 0.0008, + "step": 60136 + }, + { + "epoch": 56.1, + "learning_rate": 4.298740671641791e-05, + "loss": 0.0001, + "step": 60140 + }, + { + "epoch": 56.1, + "learning_rate": 4.2986940298507465e-05, + "loss": 0.0009, + "step": 60144 + }, + { + "epoch": 56.11, + "learning_rate": 4.298647388059702e-05, + "loss": 0.0, + "step": 60148 + }, + { + "epoch": 56.11, + "learning_rate": 4.298600746268657e-05, + "loss": 0.0021, + "step": 60152 + }, + { + "epoch": 56.12, + "learning_rate": 4.298554104477612e-05, + "loss": 0.0009, + "step": 60156 + }, + { + "epoch": 56.12, + "learning_rate": 4.298507462686567e-05, + "loss": 0.0, + "step": 60160 + }, + { + "epoch": 56.12, + "learning_rate": 4.2984608208955226e-05, + "loss": 0.0001, + "step": 60164 + }, + { + "epoch": 56.13, + "learning_rate": 4.298414179104478e-05, + "loss": 0.0008, + "step": 60168 + }, + { + "epoch": 56.13, + "learning_rate": 4.298367537313433e-05, + "loss": 0.0, + "step": 60172 + }, + { + "epoch": 56.13, + "learning_rate": 4.2983208955223883e-05, + "loss": 0.0, + "step": 60176 + }, + { + "epoch": 56.14, + "learning_rate": 4.298274253731344e-05, + "loss": 0.0, + "step": 60180 + }, + { + "epoch": 56.14, + "learning_rate": 4.2982276119402987e-05, + "loss": 0.0, + "step": 60184 + }, + { + "epoch": 56.15, + "learning_rate": 4.2981809701492535e-05, + "loss": 0.0, + "step": 60188 + }, + { + "epoch": 56.15, + "learning_rate": 4.298134328358209e-05, + "loss": 0.0, + "step": 60192 + }, + { + "epoch": 56.15, + "learning_rate": 4.2980876865671644e-05, + "loss": 0.0, + "step": 60196 + }, + { + "epoch": 56.16, + "learning_rate": 4.298041044776119e-05, + "loss": 0.0001, + "step": 60200 + }, + { + "epoch": 56.16, + "learning_rate": 4.297994402985075e-05, + "loss": 0.001, + "step": 60204 + }, + { + "epoch": 56.16, + "learning_rate": 4.29794776119403e-05, + "loss": 0.0, + "step": 60208 + }, + { + "epoch": 56.17, + "learning_rate": 4.297901119402985e-05, + "loss": 0.0, + "step": 60212 + }, + { + "epoch": 56.17, + "learning_rate": 4.2978544776119405e-05, + "loss": 0.0, + "step": 60216 + }, + { + "epoch": 56.18, + "learning_rate": 4.2978078358208954e-05, + "loss": 0.0, + "step": 60220 + }, + { + "epoch": 56.18, + "learning_rate": 4.2977611940298515e-05, + "loss": 0.0, + "step": 60224 + }, + { + "epoch": 56.18, + "learning_rate": 4.297714552238806e-05, + "loss": 0.0019, + "step": 60228 + }, + { + "epoch": 56.19, + "learning_rate": 4.297667910447761e-05, + "loss": 0.0, + "step": 60232 + }, + { + "epoch": 56.19, + "learning_rate": 4.2976212686567166e-05, + "loss": 0.0, + "step": 60236 + }, + { + "epoch": 56.19, + "learning_rate": 4.297574626865672e-05, + "loss": 0.0001, + "step": 60240 + }, + { + "epoch": 56.2, + "learning_rate": 4.297527985074627e-05, + "loss": 0.0001, + "step": 60244 + }, + { + "epoch": 56.2, + "learning_rate": 4.2974813432835824e-05, + "loss": 0.0, + "step": 60248 + }, + { + "epoch": 56.21, + "learning_rate": 4.297434701492537e-05, + "loss": 0.0, + "step": 60252 + }, + { + "epoch": 56.21, + "learning_rate": 4.297388059701493e-05, + "loss": 0.0056, + "step": 60256 + }, + { + "epoch": 56.21, + "learning_rate": 4.297341417910448e-05, + "loss": 0.0, + "step": 60260 + }, + { + "epoch": 56.22, + "learning_rate": 4.297294776119403e-05, + "loss": 0.0, + "step": 60264 + }, + { + "epoch": 56.22, + "learning_rate": 4.2972481343283585e-05, + "loss": 0.0, + "step": 60268 + }, + { + "epoch": 56.22, + "learning_rate": 4.297201492537314e-05, + "loss": 0.0, + "step": 60272 + }, + { + "epoch": 56.23, + "learning_rate": 4.297154850746269e-05, + "loss": 0.0, + "step": 60276 + }, + { + "epoch": 56.23, + "learning_rate": 4.2971082089552237e-05, + "loss": 0.0, + "step": 60280 + }, + { + "epoch": 56.24, + "learning_rate": 4.29706156716418e-05, + "loss": 0.0002, + "step": 60284 + }, + { + "epoch": 56.24, + "learning_rate": 4.2970149253731346e-05, + "loss": 0.0, + "step": 60288 + }, + { + "epoch": 56.24, + "learning_rate": 4.2969682835820894e-05, + "loss": 0.0, + "step": 60292 + }, + { + "epoch": 56.25, + "learning_rate": 4.296921641791045e-05, + "loss": 0.0, + "step": 60296 + }, + { + "epoch": 56.25, + "learning_rate": 4.2968750000000004e-05, + "loss": 0.0, + "step": 60300 + }, + { + "epoch": 56.25, + "learning_rate": 4.296828358208955e-05, + "loss": 0.0, + "step": 60304 + }, + { + "epoch": 56.26, + "learning_rate": 4.296781716417911e-05, + "loss": 0.0, + "step": 60308 + }, + { + "epoch": 56.26, + "learning_rate": 4.2967350746268655e-05, + "loss": 0.0, + "step": 60312 + }, + { + "epoch": 56.26, + "learning_rate": 4.296688432835821e-05, + "loss": 0.0, + "step": 60316 + }, + { + "epoch": 56.27, + "learning_rate": 4.2966417910447765e-05, + "loss": 0.0, + "step": 60320 + }, + { + "epoch": 56.27, + "learning_rate": 4.2965951492537313e-05, + "loss": 0.0, + "step": 60324 + }, + { + "epoch": 56.28, + "learning_rate": 4.296548507462687e-05, + "loss": 0.0, + "step": 60328 + }, + { + "epoch": 56.28, + "learning_rate": 4.296501865671642e-05, + "loss": 0.0, + "step": 60332 + }, + { + "epoch": 56.28, + "learning_rate": 4.296455223880597e-05, + "loss": 0.0005, + "step": 60336 + }, + { + "epoch": 56.29, + "learning_rate": 4.296408582089552e-05, + "loss": 0.0, + "step": 60340 + }, + { + "epoch": 56.29, + "learning_rate": 4.296361940298508e-05, + "loss": 0.0, + "step": 60344 + }, + { + "epoch": 56.29, + "learning_rate": 4.296315298507463e-05, + "loss": 0.0, + "step": 60348 + }, + { + "epoch": 56.3, + "learning_rate": 4.296268656716418e-05, + "loss": 0.0, + "step": 60352 + }, + { + "epoch": 56.3, + "learning_rate": 4.296222014925373e-05, + "loss": 0.0, + "step": 60356 + }, + { + "epoch": 56.31, + "learning_rate": 4.296175373134329e-05, + "loss": 0.0004, + "step": 60360 + }, + { + "epoch": 56.31, + "learning_rate": 4.2961287313432835e-05, + "loss": 0.0, + "step": 60364 + }, + { + "epoch": 56.31, + "learning_rate": 4.296082089552239e-05, + "loss": 0.0, + "step": 60368 + }, + { + "epoch": 56.32, + "learning_rate": 4.296035447761194e-05, + "loss": 0.0, + "step": 60372 + }, + { + "epoch": 56.32, + "learning_rate": 4.295988805970149e-05, + "loss": 0.0, + "step": 60376 + }, + { + "epoch": 56.32, + "learning_rate": 4.295942164179105e-05, + "loss": 0.0, + "step": 60380 + }, + { + "epoch": 56.33, + "learning_rate": 4.2958955223880596e-05, + "loss": 0.0, + "step": 60384 + }, + { + "epoch": 56.33, + "learning_rate": 4.295848880597015e-05, + "loss": 0.001, + "step": 60388 + }, + { + "epoch": 56.34, + "learning_rate": 4.2958022388059706e-05, + "loss": 0.0, + "step": 60392 + }, + { + "epoch": 56.34, + "learning_rate": 4.2957555970149254e-05, + "loss": 0.0, + "step": 60396 + }, + { + "epoch": 56.34, + "learning_rate": 4.295708955223881e-05, + "loss": 0.0032, + "step": 60400 + }, + { + "epoch": 56.35, + "learning_rate": 4.2956623134328364e-05, + "loss": 0.0, + "step": 60404 + }, + { + "epoch": 56.35, + "learning_rate": 4.295615671641791e-05, + "loss": 0.0, + "step": 60408 + }, + { + "epoch": 56.35, + "learning_rate": 4.295569029850747e-05, + "loss": 0.0, + "step": 60412 + }, + { + "epoch": 56.36, + "learning_rate": 4.2955223880597015e-05, + "loss": 0.0001, + "step": 60416 + }, + { + "epoch": 56.36, + "learning_rate": 4.295475746268657e-05, + "loss": 0.0008, + "step": 60420 + }, + { + "epoch": 56.37, + "learning_rate": 4.2954291044776125e-05, + "loss": 0.0, + "step": 60424 + }, + { + "epoch": 56.37, + "learning_rate": 4.295382462686567e-05, + "loss": 0.0, + "step": 60428 + }, + { + "epoch": 56.37, + "learning_rate": 4.295335820895522e-05, + "loss": 0.0, + "step": 60432 + }, + { + "epoch": 56.38, + "learning_rate": 4.295289179104478e-05, + "loss": 0.001, + "step": 60436 + }, + { + "epoch": 56.38, + "learning_rate": 4.295242537313433e-05, + "loss": 0.0001, + "step": 60440 + }, + { + "epoch": 56.38, + "learning_rate": 4.295195895522388e-05, + "loss": 0.0003, + "step": 60444 + }, + { + "epoch": 56.39, + "learning_rate": 4.2951492537313434e-05, + "loss": 0.0, + "step": 60448 + }, + { + "epoch": 56.39, + "learning_rate": 4.295102611940299e-05, + "loss": 0.0, + "step": 60452 + }, + { + "epoch": 56.4, + "learning_rate": 4.295055970149254e-05, + "loss": 0.0, + "step": 60456 + }, + { + "epoch": 56.4, + "learning_rate": 4.295009328358209e-05, + "loss": 0.0, + "step": 60460 + }, + { + "epoch": 56.4, + "learning_rate": 4.294962686567165e-05, + "loss": 0.0, + "step": 60464 + }, + { + "epoch": 56.41, + "learning_rate": 4.2949160447761195e-05, + "loss": 0.0, + "step": 60468 + }, + { + "epoch": 56.41, + "learning_rate": 4.294869402985075e-05, + "loss": 0.0, + "step": 60472 + }, + { + "epoch": 56.41, + "learning_rate": 4.29482276119403e-05, + "loss": 0.0, + "step": 60476 + }, + { + "epoch": 56.42, + "learning_rate": 4.294776119402985e-05, + "loss": 0.0004, + "step": 60480 + }, + { + "epoch": 56.42, + "learning_rate": 4.294729477611941e-05, + "loss": 0.0088, + "step": 60484 + }, + { + "epoch": 56.43, + "learning_rate": 4.2946828358208956e-05, + "loss": 0.0, + "step": 60488 + }, + { + "epoch": 56.43, + "learning_rate": 4.2946361940298504e-05, + "loss": 0.0, + "step": 60492 + }, + { + "epoch": 56.43, + "learning_rate": 4.2945895522388066e-05, + "loss": 0.0009, + "step": 60496 + }, + { + "epoch": 56.44, + "learning_rate": 4.2945429104477614e-05, + "loss": 0.0002, + "step": 60500 + }, + { + "epoch": 56.44, + "eval_exact_match": 0.7524177949709865, + "eval_exec": 0.7833655705996132, + "eval_loss": 0.480731725692749, + "eval_runtime": 1132.1694, + "eval_samples_per_second": 0.913, + "step": 60500 + }, + { + "epoch": 56.44, + "learning_rate": 4.294496268656716e-05, + "loss": 0.0, + "step": 60504 + }, + { + "epoch": 56.44, + "learning_rate": 4.294449626865672e-05, + "loss": 0.0, + "step": 60508 + }, + { + "epoch": 56.45, + "learning_rate": 4.294402985074627e-05, + "loss": 0.0, + "step": 60512 + }, + { + "epoch": 56.45, + "learning_rate": 4.294356343283582e-05, + "loss": 0.0, + "step": 60516 + }, + { + "epoch": 56.46, + "learning_rate": 4.2943097014925375e-05, + "loss": 0.0, + "step": 60520 + }, + { + "epoch": 56.46, + "learning_rate": 4.294263059701493e-05, + "loss": 0.0, + "step": 60524 + }, + { + "epoch": 56.46, + "learning_rate": 4.294216417910448e-05, + "loss": 0.0, + "step": 60528 + }, + { + "epoch": 56.47, + "learning_rate": 4.294169776119403e-05, + "loss": 0.0, + "step": 60532 + }, + { + "epoch": 56.47, + "learning_rate": 4.294123134328358e-05, + "loss": 0.0, + "step": 60536 + }, + { + "epoch": 56.47, + "learning_rate": 4.2940764925373136e-05, + "loss": 0.0, + "step": 60540 + }, + { + "epoch": 56.48, + "learning_rate": 4.294029850746269e-05, + "loss": 0.0001, + "step": 60544 + }, + { + "epoch": 56.48, + "learning_rate": 4.293983208955224e-05, + "loss": 0.0, + "step": 60548 + }, + { + "epoch": 56.49, + "learning_rate": 4.2939365671641794e-05, + "loss": 0.0, + "step": 60552 + }, + { + "epoch": 56.49, + "learning_rate": 4.293889925373135e-05, + "loss": 0.0013, + "step": 60556 + }, + { + "epoch": 56.49, + "learning_rate": 4.29384328358209e-05, + "loss": 0.0, + "step": 60560 + }, + { + "epoch": 56.5, + "learning_rate": 4.293796641791045e-05, + "loss": 0.0, + "step": 60564 + }, + { + "epoch": 56.5, + "learning_rate": 4.29375e-05, + "loss": 0.0, + "step": 60568 + }, + { + "epoch": 56.5, + "learning_rate": 4.2937033582089555e-05, + "loss": 0.0, + "step": 60572 + }, + { + "epoch": 56.51, + "learning_rate": 4.293656716417911e-05, + "loss": 0.0, + "step": 60576 + }, + { + "epoch": 56.51, + "learning_rate": 4.293610074626866e-05, + "loss": 0.0, + "step": 60580 + }, + { + "epoch": 56.51, + "learning_rate": 4.293563432835821e-05, + "loss": 0.0, + "step": 60584 + }, + { + "epoch": 56.52, + "learning_rate": 4.293516791044777e-05, + "loss": 0.0, + "step": 60588 + }, + { + "epoch": 56.52, + "learning_rate": 4.2934701492537316e-05, + "loss": 0.0103, + "step": 60592 + }, + { + "epoch": 56.53, + "learning_rate": 4.2934235074626864e-05, + "loss": 0.0007, + "step": 60596 + }, + { + "epoch": 56.53, + "learning_rate": 4.293376865671642e-05, + "loss": 0.0001, + "step": 60600 + }, + { + "epoch": 56.53, + "learning_rate": 4.2933302238805974e-05, + "loss": 0.0, + "step": 60604 + }, + { + "epoch": 56.54, + "learning_rate": 4.293283582089552e-05, + "loss": 0.0, + "step": 60608 + }, + { + "epoch": 56.54, + "learning_rate": 4.293236940298508e-05, + "loss": 0.0007, + "step": 60612 + }, + { + "epoch": 56.54, + "learning_rate": 4.293190298507463e-05, + "loss": 0.0, + "step": 60616 + }, + { + "epoch": 56.55, + "learning_rate": 4.293143656716418e-05, + "loss": 0.0002, + "step": 60620 + }, + { + "epoch": 56.55, + "learning_rate": 4.2930970149253735e-05, + "loss": 0.0011, + "step": 60624 + }, + { + "epoch": 56.56, + "learning_rate": 4.293050373134328e-05, + "loss": 0.0001, + "step": 60628 + }, + { + "epoch": 56.56, + "learning_rate": 4.293003731343284e-05, + "loss": 0.0, + "step": 60632 + }, + { + "epoch": 56.56, + "learning_rate": 4.292957089552239e-05, + "loss": 0.0, + "step": 60636 + }, + { + "epoch": 56.57, + "learning_rate": 4.292910447761194e-05, + "loss": 0.0005, + "step": 60640 + }, + { + "epoch": 56.57, + "learning_rate": 4.292863805970149e-05, + "loss": 0.0, + "step": 60644 + }, + { + "epoch": 56.57, + "learning_rate": 4.292817164179105e-05, + "loss": 0.0, + "step": 60648 + }, + { + "epoch": 56.58, + "learning_rate": 4.29277052238806e-05, + "loss": 0.0, + "step": 60652 + }, + { + "epoch": 56.58, + "learning_rate": 4.292723880597015e-05, + "loss": 0.0, + "step": 60656 + }, + { + "epoch": 56.59, + "learning_rate": 4.29267723880597e-05, + "loss": 0.0002, + "step": 60660 + }, + { + "epoch": 56.59, + "learning_rate": 4.292630597014926e-05, + "loss": 0.0, + "step": 60664 + }, + { + "epoch": 56.59, + "learning_rate": 4.2925839552238805e-05, + "loss": 0.0001, + "step": 60668 + }, + { + "epoch": 56.6, + "learning_rate": 4.292537313432836e-05, + "loss": 0.0, + "step": 60672 + }, + { + "epoch": 56.6, + "learning_rate": 4.2924906716417915e-05, + "loss": 0.0001, + "step": 60676 + }, + { + "epoch": 56.6, + "learning_rate": 4.292444029850746e-05, + "loss": 0.0, + "step": 60680 + }, + { + "epoch": 56.61, + "learning_rate": 4.292397388059702e-05, + "loss": 0.0, + "step": 60684 + }, + { + "epoch": 56.61, + "learning_rate": 4.2923507462686566e-05, + "loss": 0.0, + "step": 60688 + }, + { + "epoch": 56.62, + "learning_rate": 4.292304104477612e-05, + "loss": 0.0, + "step": 60692 + }, + { + "epoch": 56.62, + "learning_rate": 4.2922574626865676e-05, + "loss": 0.0, + "step": 60696 + }, + { + "epoch": 56.62, + "learning_rate": 4.2922108208955224e-05, + "loss": 0.0001, + "step": 60700 + }, + { + "epoch": 56.63, + "learning_rate": 4.292164179104477e-05, + "loss": 0.0, + "step": 60704 + }, + { + "epoch": 56.63, + "learning_rate": 4.2921175373134334e-05, + "loss": 0.0, + "step": 60708 + }, + { + "epoch": 56.63, + "learning_rate": 4.292070895522388e-05, + "loss": 0.0, + "step": 60712 + }, + { + "epoch": 56.64, + "learning_rate": 4.292024253731344e-05, + "loss": 0.0, + "step": 60716 + }, + { + "epoch": 56.64, + "learning_rate": 4.2919776119402985e-05, + "loss": 0.0, + "step": 60720 + }, + { + "epoch": 56.65, + "learning_rate": 4.291930970149254e-05, + "loss": 0.0003, + "step": 60724 + }, + { + "epoch": 56.65, + "learning_rate": 4.2918843283582095e-05, + "loss": 0.0016, + "step": 60728 + }, + { + "epoch": 56.65, + "learning_rate": 4.291837686567164e-05, + "loss": 0.0197, + "step": 60732 + }, + { + "epoch": 56.66, + "learning_rate": 4.29179104477612e-05, + "loss": 0.0, + "step": 60736 + }, + { + "epoch": 56.66, + "learning_rate": 4.291744402985075e-05, + "loss": 0.0001, + "step": 60740 + }, + { + "epoch": 56.66, + "learning_rate": 4.29169776119403e-05, + "loss": 0.0, + "step": 60744 + }, + { + "epoch": 56.67, + "learning_rate": 4.291651119402985e-05, + "loss": 0.0002, + "step": 60748 + }, + { + "epoch": 56.67, + "learning_rate": 4.291604477611941e-05, + "loss": 0.0, + "step": 60752 + }, + { + "epoch": 56.68, + "learning_rate": 4.291557835820896e-05, + "loss": 0.0, + "step": 60756 + }, + { + "epoch": 56.68, + "learning_rate": 4.291511194029851e-05, + "loss": 0.0, + "step": 60760 + }, + { + "epoch": 56.68, + "learning_rate": 4.291464552238806e-05, + "loss": 0.0, + "step": 60764 + }, + { + "epoch": 56.69, + "learning_rate": 4.291417910447762e-05, + "loss": 0.0, + "step": 60768 + }, + { + "epoch": 56.69, + "learning_rate": 4.2913712686567165e-05, + "loss": 0.0, + "step": 60772 + }, + { + "epoch": 56.69, + "learning_rate": 4.291324626865672e-05, + "loss": 0.0, + "step": 60776 + }, + { + "epoch": 56.7, + "learning_rate": 4.291277985074627e-05, + "loss": 0.0001, + "step": 60780 + }, + { + "epoch": 56.7, + "learning_rate": 4.291231343283582e-05, + "loss": 0.0, + "step": 60784 + }, + { + "epoch": 56.71, + "learning_rate": 4.291184701492538e-05, + "loss": 0.0002, + "step": 60788 + }, + { + "epoch": 56.71, + "learning_rate": 4.2911380597014926e-05, + "loss": 0.0, + "step": 60792 + }, + { + "epoch": 56.71, + "learning_rate": 4.291091417910448e-05, + "loss": 0.0, + "step": 60796 + }, + { + "epoch": 56.72, + "learning_rate": 4.2910447761194036e-05, + "loss": 0.0001, + "step": 60800 + }, + { + "epoch": 56.72, + "learning_rate": 4.2909981343283584e-05, + "loss": 0.0001, + "step": 60804 + }, + { + "epoch": 56.72, + "learning_rate": 4.290951492537313e-05, + "loss": 0.0006, + "step": 60808 + }, + { + "epoch": 56.73, + "learning_rate": 4.290904850746269e-05, + "loss": 0.0, + "step": 60812 + }, + { + "epoch": 56.73, + "learning_rate": 4.290858208955224e-05, + "loss": 0.0006, + "step": 60816 + }, + { + "epoch": 56.73, + "learning_rate": 4.290811567164179e-05, + "loss": 0.0, + "step": 60820 + }, + { + "epoch": 56.74, + "learning_rate": 4.2907649253731345e-05, + "loss": 0.0, + "step": 60824 + }, + { + "epoch": 56.74, + "learning_rate": 4.29071828358209e-05, + "loss": 0.0, + "step": 60828 + }, + { + "epoch": 56.75, + "learning_rate": 4.290671641791045e-05, + "loss": 0.0011, + "step": 60832 + }, + { + "epoch": 56.75, + "learning_rate": 4.290625e-05, + "loss": 0.0, + "step": 60836 + }, + { + "epoch": 56.75, + "learning_rate": 4.290578358208955e-05, + "loss": 0.0003, + "step": 60840 + }, + { + "epoch": 56.76, + "learning_rate": 4.2905317164179106e-05, + "loss": 0.0002, + "step": 60844 + }, + { + "epoch": 56.76, + "learning_rate": 4.290485074626866e-05, + "loss": 0.0001, + "step": 60848 + }, + { + "epoch": 56.76, + "learning_rate": 4.290438432835821e-05, + "loss": 0.0, + "step": 60852 + }, + { + "epoch": 56.77, + "learning_rate": 4.2903917910447764e-05, + "loss": 0.0001, + "step": 60856 + }, + { + "epoch": 56.77, + "learning_rate": 4.290345149253732e-05, + "loss": 0.0, + "step": 60860 + }, + { + "epoch": 56.78, + "learning_rate": 4.290298507462687e-05, + "loss": 0.0, + "step": 60864 + }, + { + "epoch": 56.78, + "learning_rate": 4.2902518656716415e-05, + "loss": 0.0, + "step": 60868 + }, + { + "epoch": 56.78, + "learning_rate": 4.290205223880597e-05, + "loss": 0.001, + "step": 60872 + }, + { + "epoch": 56.79, + "learning_rate": 4.2901585820895525e-05, + "loss": 0.0, + "step": 60876 + }, + { + "epoch": 56.79, + "learning_rate": 4.290111940298508e-05, + "loss": 0.0, + "step": 60880 + }, + { + "epoch": 56.79, + "learning_rate": 4.290065298507463e-05, + "loss": 0.0, + "step": 60884 + }, + { + "epoch": 56.8, + "learning_rate": 4.290018656716418e-05, + "loss": 0.0, + "step": 60888 + }, + { + "epoch": 56.8, + "learning_rate": 4.289972014925374e-05, + "loss": 0.0, + "step": 60892 + }, + { + "epoch": 56.81, + "learning_rate": 4.2899253731343286e-05, + "loss": 0.0004, + "step": 60896 + }, + { + "epoch": 56.81, + "learning_rate": 4.2898787313432834e-05, + "loss": 0.0001, + "step": 60900 + }, + { + "epoch": 56.81, + "learning_rate": 4.2898320895522396e-05, + "loss": 0.0, + "step": 60904 + }, + { + "epoch": 56.82, + "learning_rate": 4.2897854477611944e-05, + "loss": 0.0, + "step": 60908 + }, + { + "epoch": 56.82, + "learning_rate": 4.289738805970149e-05, + "loss": 0.0002, + "step": 60912 + }, + { + "epoch": 56.82, + "learning_rate": 4.289692164179105e-05, + "loss": 0.0, + "step": 60916 + }, + { + "epoch": 56.83, + "learning_rate": 4.28964552238806e-05, + "loss": 0.0001, + "step": 60920 + }, + { + "epoch": 56.83, + "learning_rate": 4.289598880597015e-05, + "loss": 0.0, + "step": 60924 + }, + { + "epoch": 56.84, + "learning_rate": 4.2895522388059705e-05, + "loss": 0.0, + "step": 60928 + }, + { + "epoch": 56.84, + "learning_rate": 4.289505597014925e-05, + "loss": 0.0, + "step": 60932 + }, + { + "epoch": 56.84, + "learning_rate": 4.289458955223881e-05, + "loss": 0.0001, + "step": 60936 + }, + { + "epoch": 56.85, + "learning_rate": 4.289412313432836e-05, + "loss": 0.0, + "step": 60940 + }, + { + "epoch": 56.85, + "learning_rate": 4.289365671641791e-05, + "loss": 0.0001, + "step": 60944 + }, + { + "epoch": 56.85, + "learning_rate": 4.2893190298507466e-05, + "loss": 0.0, + "step": 60948 + }, + { + "epoch": 56.86, + "learning_rate": 4.289272388059702e-05, + "loss": 0.0, + "step": 60952 + }, + { + "epoch": 56.86, + "learning_rate": 4.289225746268657e-05, + "loss": 0.0, + "step": 60956 + }, + { + "epoch": 56.87, + "learning_rate": 4.289179104477612e-05, + "loss": 0.0, + "step": 60960 + }, + { + "epoch": 56.87, + "learning_rate": 4.289132462686568e-05, + "loss": 0.0, + "step": 60964 + }, + { + "epoch": 56.87, + "learning_rate": 4.289085820895523e-05, + "loss": 0.0, + "step": 60968 + }, + { + "epoch": 56.88, + "learning_rate": 4.2890391791044775e-05, + "loss": 0.0, + "step": 60972 + }, + { + "epoch": 56.88, + "learning_rate": 4.288992537313433e-05, + "loss": 0.0, + "step": 60976 + }, + { + "epoch": 56.88, + "learning_rate": 4.2889458955223885e-05, + "loss": 0.0001, + "step": 60980 + }, + { + "epoch": 56.89, + "learning_rate": 4.288899253731343e-05, + "loss": 0.0001, + "step": 60984 + }, + { + "epoch": 56.89, + "learning_rate": 4.288852611940299e-05, + "loss": 0.0, + "step": 60988 + }, + { + "epoch": 56.9, + "learning_rate": 4.2888059701492536e-05, + "loss": 0.0, + "step": 60992 + }, + { + "epoch": 56.9, + "learning_rate": 4.288759328358209e-05, + "loss": 0.0024, + "step": 60996 + }, + { + "epoch": 56.9, + "learning_rate": 4.2887126865671646e-05, + "loss": 0.0, + "step": 61000 + }, + { + "epoch": 56.9, + "eval_exact_match": 0.7495164410058027, + "eval_exec": 0.7775628626692457, + "eval_loss": 0.4850376546382904, + "eval_runtime": 1127.2294, + "eval_samples_per_second": 0.917, + "step": 61000 + }, + { + "epoch": 56.91, + "learning_rate": 4.2886660447761194e-05, + "loss": 0.0001, + "step": 61004 + }, + { + "epoch": 56.91, + "learning_rate": 4.288619402985075e-05, + "loss": 0.0, + "step": 61008 + }, + { + "epoch": 56.91, + "learning_rate": 4.2885727611940304e-05, + "loss": 0.0, + "step": 61012 + }, + { + "epoch": 56.92, + "learning_rate": 4.288526119402985e-05, + "loss": 0.0, + "step": 61016 + }, + { + "epoch": 56.92, + "learning_rate": 4.28847947761194e-05, + "loss": 0.0, + "step": 61020 + }, + { + "epoch": 56.93, + "learning_rate": 4.288432835820896e-05, + "loss": 0.0, + "step": 61024 + }, + { + "epoch": 56.93, + "learning_rate": 4.288386194029851e-05, + "loss": 0.0, + "step": 61028 + }, + { + "epoch": 56.93, + "learning_rate": 4.288339552238806e-05, + "loss": 0.0023, + "step": 61032 + }, + { + "epoch": 56.94, + "learning_rate": 4.288292910447761e-05, + "loss": 0.0001, + "step": 61036 + }, + { + "epoch": 56.94, + "learning_rate": 4.288246268656717e-05, + "loss": 0.0, + "step": 61040 + }, + { + "epoch": 56.94, + "learning_rate": 4.288199626865672e-05, + "loss": 0.0, + "step": 61044 + }, + { + "epoch": 56.95, + "learning_rate": 4.288152985074627e-05, + "loss": 0.0, + "step": 61048 + }, + { + "epoch": 56.95, + "learning_rate": 4.288106343283582e-05, + "loss": 0.0, + "step": 61052 + }, + { + "epoch": 56.96, + "learning_rate": 4.288059701492538e-05, + "loss": 0.0, + "step": 61056 + }, + { + "epoch": 56.96, + "learning_rate": 4.288013059701493e-05, + "loss": 0.0, + "step": 61060 + }, + { + "epoch": 56.96, + "learning_rate": 4.287966417910448e-05, + "loss": 0.0, + "step": 61064 + }, + { + "epoch": 56.97, + "learning_rate": 4.287919776119403e-05, + "loss": 0.0008, + "step": 61068 + }, + { + "epoch": 56.97, + "learning_rate": 4.2878731343283586e-05, + "loss": 0.0, + "step": 61072 + }, + { + "epoch": 56.97, + "learning_rate": 4.2878264925373135e-05, + "loss": 0.0, + "step": 61076 + }, + { + "epoch": 56.98, + "learning_rate": 4.287779850746269e-05, + "loss": 0.0, + "step": 61080 + }, + { + "epoch": 56.98, + "learning_rate": 4.2877332089552244e-05, + "loss": 0.0, + "step": 61084 + }, + { + "epoch": 56.98, + "learning_rate": 4.287686567164179e-05, + "loss": 0.0, + "step": 61088 + }, + { + "epoch": 56.99, + "learning_rate": 4.287639925373135e-05, + "loss": 0.0, + "step": 61092 + }, + { + "epoch": 56.99, + "learning_rate": 4.2875932835820896e-05, + "loss": 0.0001, + "step": 61096 + }, + { + "epoch": 57.0, + "learning_rate": 4.287546641791045e-05, + "loss": 0.0, + "step": 61100 + }, + { + "epoch": 57.0, + "learning_rate": 4.2875000000000005e-05, + "loss": 0.0, + "step": 61104 + }, + { + "epoch": 57.0, + "learning_rate": 4.2874533582089554e-05, + "loss": 0.0003, + "step": 61108 + }, + { + "epoch": 57.01, + "learning_rate": 4.28740671641791e-05, + "loss": 0.0, + "step": 61112 + }, + { + "epoch": 57.01, + "learning_rate": 4.287360074626866e-05, + "loss": 0.0178, + "step": 61116 + }, + { + "epoch": 57.01, + "learning_rate": 4.287313432835821e-05, + "loss": 0.0, + "step": 61120 + }, + { + "epoch": 57.02, + "learning_rate": 4.287266791044776e-05, + "loss": 0.0, + "step": 61124 + }, + { + "epoch": 57.02, + "learning_rate": 4.2872201492537315e-05, + "loss": 0.0, + "step": 61128 + }, + { + "epoch": 57.03, + "learning_rate": 4.287173507462687e-05, + "loss": 0.0001, + "step": 61132 + }, + { + "epoch": 57.03, + "learning_rate": 4.287126865671642e-05, + "loss": 0.0103, + "step": 61136 + }, + { + "epoch": 57.03, + "learning_rate": 4.287080223880597e-05, + "loss": 0.0001, + "step": 61140 + }, + { + "epoch": 57.04, + "learning_rate": 4.287033582089553e-05, + "loss": 0.0, + "step": 61144 + }, + { + "epoch": 57.04, + "learning_rate": 4.2869869402985075e-05, + "loss": 0.0, + "step": 61148 + }, + { + "epoch": 57.04, + "learning_rate": 4.286940298507463e-05, + "loss": 0.0, + "step": 61152 + }, + { + "epoch": 57.05, + "learning_rate": 4.286893656716418e-05, + "loss": 0.0, + "step": 61156 + }, + { + "epoch": 57.05, + "learning_rate": 4.2868470149253733e-05, + "loss": 0.0006, + "step": 61160 + }, + { + "epoch": 57.06, + "learning_rate": 4.286800373134329e-05, + "loss": 0.0, + "step": 61164 + }, + { + "epoch": 57.06, + "learning_rate": 4.2867537313432836e-05, + "loss": 0.0, + "step": 61168 + }, + { + "epoch": 57.06, + "learning_rate": 4.2867070895522385e-05, + "loss": 0.0, + "step": 61172 + }, + { + "epoch": 57.07, + "learning_rate": 4.2866604477611946e-05, + "loss": 0.0, + "step": 61176 + }, + { + "epoch": 57.07, + "learning_rate": 4.2866138059701494e-05, + "loss": 0.0, + "step": 61180 + }, + { + "epoch": 57.07, + "learning_rate": 4.286567164179104e-05, + "loss": 0.0001, + "step": 61184 + }, + { + "epoch": 57.08, + "learning_rate": 4.28652052238806e-05, + "loss": 0.0, + "step": 61188 + }, + { + "epoch": 57.08, + "learning_rate": 4.286473880597015e-05, + "loss": 0.0, + "step": 61192 + }, + { + "epoch": 57.09, + "learning_rate": 4.28642723880597e-05, + "loss": 0.0, + "step": 61196 + }, + { + "epoch": 57.09, + "learning_rate": 4.2863805970149255e-05, + "loss": 0.0, + "step": 61200 + }, + { + "epoch": 57.09, + "learning_rate": 4.286333955223881e-05, + "loss": 0.0011, + "step": 61204 + }, + { + "epoch": 57.1, + "learning_rate": 4.2862873134328365e-05, + "loss": 0.0, + "step": 61208 + }, + { + "epoch": 57.1, + "learning_rate": 4.286240671641791e-05, + "loss": 0.0, + "step": 61212 + }, + { + "epoch": 57.1, + "learning_rate": 4.286194029850746e-05, + "loss": 0.0009, + "step": 61216 + }, + { + "epoch": 57.11, + "learning_rate": 4.2861473880597016e-05, + "loss": 0.0, + "step": 61220 + }, + { + "epoch": 57.11, + "learning_rate": 4.286100746268657e-05, + "loss": 0.0, + "step": 61224 + }, + { + "epoch": 57.12, + "learning_rate": 4.286054104477612e-05, + "loss": 0.0044, + "step": 61228 + }, + { + "epoch": 57.12, + "learning_rate": 4.2860074626865674e-05, + "loss": 0.0, + "step": 61232 + }, + { + "epoch": 57.12, + "learning_rate": 4.285960820895523e-05, + "loss": 0.0, + "step": 61236 + }, + { + "epoch": 57.13, + "learning_rate": 4.285914179104478e-05, + "loss": 0.0, + "step": 61240 + }, + { + "epoch": 57.13, + "learning_rate": 4.285867537313433e-05, + "loss": 0.0001, + "step": 61244 + }, + { + "epoch": 57.13, + "learning_rate": 4.285820895522388e-05, + "loss": 0.0, + "step": 61248 + }, + { + "epoch": 57.14, + "learning_rate": 4.2857742537313435e-05, + "loss": 0.0, + "step": 61252 + }, + { + "epoch": 57.14, + "learning_rate": 4.285727611940299e-05, + "loss": 0.0001, + "step": 61256 + }, + { + "epoch": 57.15, + "learning_rate": 4.285680970149254e-05, + "loss": 0.0, + "step": 61260 + }, + { + "epoch": 57.15, + "learning_rate": 4.285634328358209e-05, + "loss": 0.0, + "step": 61264 + }, + { + "epoch": 57.15, + "learning_rate": 4.285587686567165e-05, + "loss": 0.0, + "step": 61268 + }, + { + "epoch": 57.16, + "learning_rate": 4.2855410447761196e-05, + "loss": 0.0, + "step": 61272 + }, + { + "epoch": 57.16, + "learning_rate": 4.2854944029850744e-05, + "loss": 0.0, + "step": 61276 + }, + { + "epoch": 57.16, + "learning_rate": 4.28544776119403e-05, + "loss": 0.0, + "step": 61280 + }, + { + "epoch": 57.17, + "learning_rate": 4.2854011194029854e-05, + "loss": 0.0, + "step": 61284 + }, + { + "epoch": 57.17, + "learning_rate": 4.28535447761194e-05, + "loss": 0.0, + "step": 61288 + }, + { + "epoch": 57.18, + "learning_rate": 4.285307835820896e-05, + "loss": 0.0, + "step": 61292 + }, + { + "epoch": 57.18, + "learning_rate": 4.285261194029851e-05, + "loss": 0.0, + "step": 61296 + }, + { + "epoch": 57.18, + "learning_rate": 4.285214552238806e-05, + "loss": 0.0, + "step": 61300 + }, + { + "epoch": 57.19, + "learning_rate": 4.2851679104477615e-05, + "loss": 0.0, + "step": 61304 + }, + { + "epoch": 57.19, + "learning_rate": 4.285121268656716e-05, + "loss": 0.0, + "step": 61308 + }, + { + "epoch": 57.19, + "learning_rate": 4.285074626865672e-05, + "loss": 0.0007, + "step": 61312 + }, + { + "epoch": 57.2, + "learning_rate": 4.285027985074627e-05, + "loss": 0.0, + "step": 61316 + }, + { + "epoch": 57.2, + "learning_rate": 4.284981343283582e-05, + "loss": 0.0, + "step": 61320 + }, + { + "epoch": 57.21, + "learning_rate": 4.284934701492537e-05, + "loss": 0.0001, + "step": 61324 + }, + { + "epoch": 57.21, + "learning_rate": 4.284888059701493e-05, + "loss": 0.0, + "step": 61328 + }, + { + "epoch": 57.21, + "learning_rate": 4.284841417910448e-05, + "loss": 0.0008, + "step": 61332 + }, + { + "epoch": 57.22, + "learning_rate": 4.284794776119403e-05, + "loss": 0.0001, + "step": 61336 + }, + { + "epoch": 57.22, + "learning_rate": 4.284748134328358e-05, + "loss": 0.0001, + "step": 61340 + }, + { + "epoch": 57.22, + "learning_rate": 4.284701492537314e-05, + "loss": 0.0003, + "step": 61344 + }, + { + "epoch": 57.23, + "learning_rate": 4.2846548507462685e-05, + "loss": 0.0, + "step": 61348 + }, + { + "epoch": 57.23, + "learning_rate": 4.284608208955224e-05, + "loss": 0.0, + "step": 61352 + }, + { + "epoch": 57.24, + "learning_rate": 4.2845615671641795e-05, + "loss": 0.0, + "step": 61356 + }, + { + "epoch": 57.24, + "learning_rate": 4.284514925373134e-05, + "loss": 0.0001, + "step": 61360 + }, + { + "epoch": 57.24, + "learning_rate": 4.28446828358209e-05, + "loss": 0.0001, + "step": 61364 + }, + { + "epoch": 57.25, + "learning_rate": 4.2844216417910446e-05, + "loss": 0.0, + "step": 61368 + }, + { + "epoch": 57.25, + "learning_rate": 4.284375000000001e-05, + "loss": 0.0, + "step": 61372 + }, + { + "epoch": 57.25, + "learning_rate": 4.2843283582089556e-05, + "loss": 0.0, + "step": 61376 + }, + { + "epoch": 57.26, + "learning_rate": 4.2842817164179104e-05, + "loss": 0.0, + "step": 61380 + }, + { + "epoch": 57.26, + "learning_rate": 4.284235074626866e-05, + "loss": 0.0, + "step": 61384 + }, + { + "epoch": 57.26, + "learning_rate": 4.2841884328358214e-05, + "loss": 0.0, + "step": 61388 + }, + { + "epoch": 57.27, + "learning_rate": 4.284141791044776e-05, + "loss": 0.0, + "step": 61392 + }, + { + "epoch": 57.27, + "learning_rate": 4.284095149253732e-05, + "loss": 0.0001, + "step": 61396 + }, + { + "epoch": 57.28, + "learning_rate": 4.2840485074626865e-05, + "loss": 0.0001, + "step": 61400 + }, + { + "epoch": 57.28, + "learning_rate": 4.284001865671642e-05, + "loss": 0.0, + "step": 61404 + }, + { + "epoch": 57.28, + "learning_rate": 4.2839552238805975e-05, + "loss": 0.0, + "step": 61408 + }, + { + "epoch": 57.29, + "learning_rate": 4.283908582089552e-05, + "loss": 0.0, + "step": 61412 + }, + { + "epoch": 57.29, + "learning_rate": 4.283861940298508e-05, + "loss": 0.0, + "step": 61416 + }, + { + "epoch": 57.29, + "learning_rate": 4.283815298507463e-05, + "loss": 0.0, + "step": 61420 + }, + { + "epoch": 57.3, + "learning_rate": 4.283768656716418e-05, + "loss": 0.0, + "step": 61424 + }, + { + "epoch": 57.3, + "learning_rate": 4.283722014925373e-05, + "loss": 0.0, + "step": 61428 + }, + { + "epoch": 57.31, + "learning_rate": 4.283675373134329e-05, + "loss": 0.0004, + "step": 61432 + }, + { + "epoch": 57.31, + "learning_rate": 4.283628731343284e-05, + "loss": 0.0001, + "step": 61436 + }, + { + "epoch": 57.31, + "learning_rate": 4.283582089552239e-05, + "loss": 0.0, + "step": 61440 + }, + { + "epoch": 57.32, + "learning_rate": 4.283535447761194e-05, + "loss": 0.0004, + "step": 61444 + }, + { + "epoch": 57.32, + "learning_rate": 4.28348880597015e-05, + "loss": 0.0, + "step": 61448 + }, + { + "epoch": 57.32, + "learning_rate": 4.2834421641791045e-05, + "loss": 0.0, + "step": 61452 + }, + { + "epoch": 57.33, + "learning_rate": 4.28339552238806e-05, + "loss": 0.0025, + "step": 61456 + }, + { + "epoch": 57.33, + "learning_rate": 4.283348880597015e-05, + "loss": 0.0002, + "step": 61460 + }, + { + "epoch": 57.34, + "learning_rate": 4.28330223880597e-05, + "loss": 0.0, + "step": 61464 + }, + { + "epoch": 57.34, + "learning_rate": 4.283255597014926e-05, + "loss": 0.0, + "step": 61468 + }, + { + "epoch": 57.34, + "learning_rate": 4.2832089552238806e-05, + "loss": 0.0, + "step": 61472 + }, + { + "epoch": 57.35, + "learning_rate": 4.283162313432836e-05, + "loss": 0.0, + "step": 61476 + }, + { + "epoch": 57.35, + "learning_rate": 4.2831156716417916e-05, + "loss": 0.0, + "step": 61480 + }, + { + "epoch": 57.35, + "learning_rate": 4.2830690298507464e-05, + "loss": 0.0, + "step": 61484 + }, + { + "epoch": 57.36, + "learning_rate": 4.283022388059701e-05, + "loss": 0.0, + "step": 61488 + }, + { + "epoch": 57.36, + "learning_rate": 4.282975746268657e-05, + "loss": 0.0009, + "step": 61492 + }, + { + "epoch": 57.37, + "learning_rate": 4.282929104477612e-05, + "loss": 0.0001, + "step": 61496 + }, + { + "epoch": 57.37, + "learning_rate": 4.282882462686567e-05, + "loss": 0.0, + "step": 61500 + }, + { + "epoch": 57.37, + "eval_exact_match": 0.7514506769825918, + "eval_exec": 0.7843326885880078, + "eval_loss": 0.4877242147922516, + "eval_runtime": 1153.3063, + "eval_samples_per_second": 0.897, + "step": 61500 + }, + { + "epoch": 57.37, + "learning_rate": 4.2828358208955225e-05, + "loss": 0.0, + "step": 61504 + }, + { + "epoch": 57.38, + "learning_rate": 4.282789179104478e-05, + "loss": 0.0, + "step": 61508 + }, + { + "epoch": 57.38, + "learning_rate": 4.282742537313433e-05, + "loss": 0.0001, + "step": 61512 + }, + { + "epoch": 57.38, + "learning_rate": 4.282695895522388e-05, + "loss": 0.0, + "step": 61516 + }, + { + "epoch": 57.39, + "learning_rate": 4.282649253731343e-05, + "loss": 0.0, + "step": 61520 + }, + { + "epoch": 57.39, + "learning_rate": 4.2826026119402986e-05, + "loss": 0.0038, + "step": 61524 + }, + { + "epoch": 57.4, + "learning_rate": 4.282555970149254e-05, + "loss": 0.0001, + "step": 61528 + }, + { + "epoch": 57.4, + "learning_rate": 4.282509328358209e-05, + "loss": 0.0, + "step": 61532 + }, + { + "epoch": 57.4, + "learning_rate": 4.2824626865671644e-05, + "loss": 0.0, + "step": 61536 + }, + { + "epoch": 57.41, + "learning_rate": 4.28241604477612e-05, + "loss": 0.0, + "step": 61540 + }, + { + "epoch": 57.41, + "learning_rate": 4.282369402985075e-05, + "loss": 0.0, + "step": 61544 + }, + { + "epoch": 57.41, + "learning_rate": 4.28232276119403e-05, + "loss": 0.0, + "step": 61548 + }, + { + "epoch": 57.42, + "learning_rate": 4.282276119402985e-05, + "loss": 0.0, + "step": 61552 + }, + { + "epoch": 57.42, + "learning_rate": 4.2822294776119405e-05, + "loss": 0.0, + "step": 61556 + }, + { + "epoch": 57.43, + "learning_rate": 4.282182835820896e-05, + "loss": 0.0, + "step": 61560 + }, + { + "epoch": 57.43, + "learning_rate": 4.282136194029851e-05, + "loss": 0.0, + "step": 61564 + }, + { + "epoch": 57.43, + "learning_rate": 4.282089552238806e-05, + "loss": 0.0, + "step": 61568 + }, + { + "epoch": 57.44, + "learning_rate": 4.282042910447762e-05, + "loss": 0.0, + "step": 61572 + }, + { + "epoch": 57.44, + "learning_rate": 4.2819962686567166e-05, + "loss": 0.0, + "step": 61576 + }, + { + "epoch": 57.44, + "learning_rate": 4.2819496268656714e-05, + "loss": 0.0002, + "step": 61580 + }, + { + "epoch": 57.45, + "learning_rate": 4.2819029850746276e-05, + "loss": 0.0007, + "step": 61584 + }, + { + "epoch": 57.45, + "learning_rate": 4.2818563432835824e-05, + "loss": 0.0, + "step": 61588 + }, + { + "epoch": 57.46, + "learning_rate": 4.281809701492537e-05, + "loss": 0.0, + "step": 61592 + }, + { + "epoch": 57.46, + "learning_rate": 4.281763059701493e-05, + "loss": 0.0, + "step": 61596 + }, + { + "epoch": 57.46, + "learning_rate": 4.281716417910448e-05, + "loss": 0.0, + "step": 61600 + }, + { + "epoch": 57.47, + "learning_rate": 4.281669776119403e-05, + "loss": 0.0001, + "step": 61604 + }, + { + "epoch": 57.47, + "learning_rate": 4.2816231343283585e-05, + "loss": 0.0, + "step": 61608 + }, + { + "epoch": 57.47, + "learning_rate": 4.281576492537313e-05, + "loss": 0.0, + "step": 61612 + }, + { + "epoch": 57.48, + "learning_rate": 4.281529850746269e-05, + "loss": 0.0, + "step": 61616 + }, + { + "epoch": 57.48, + "learning_rate": 4.281483208955224e-05, + "loss": 0.0, + "step": 61620 + }, + { + "epoch": 57.49, + "learning_rate": 4.281436567164179e-05, + "loss": 0.0003, + "step": 61624 + }, + { + "epoch": 57.49, + "learning_rate": 4.2813899253731346e-05, + "loss": 0.0, + "step": 61628 + }, + { + "epoch": 57.49, + "learning_rate": 4.28134328358209e-05, + "loss": 0.0, + "step": 61632 + }, + { + "epoch": 57.5, + "learning_rate": 4.281296641791045e-05, + "loss": 0.0, + "step": 61636 + }, + { + "epoch": 57.5, + "learning_rate": 4.28125e-05, + "loss": 0.0, + "step": 61640 + }, + { + "epoch": 57.5, + "learning_rate": 4.281203358208956e-05, + "loss": 0.0, + "step": 61644 + }, + { + "epoch": 57.51, + "learning_rate": 4.281156716417911e-05, + "loss": 0.0035, + "step": 61648 + }, + { + "epoch": 57.51, + "learning_rate": 4.2811100746268655e-05, + "loss": 0.0, + "step": 61652 + }, + { + "epoch": 57.51, + "learning_rate": 4.281063432835821e-05, + "loss": 0.0002, + "step": 61656 + }, + { + "epoch": 57.52, + "learning_rate": 4.2810167910447765e-05, + "loss": 0.0, + "step": 61660 + }, + { + "epoch": 57.52, + "learning_rate": 4.280970149253731e-05, + "loss": 0.0, + "step": 61664 + }, + { + "epoch": 57.53, + "learning_rate": 4.280923507462687e-05, + "loss": 0.0, + "step": 61668 + }, + { + "epoch": 57.53, + "learning_rate": 4.2808768656716416e-05, + "loss": 0.0, + "step": 61672 + }, + { + "epoch": 57.53, + "learning_rate": 4.280830223880597e-05, + "loss": 0.0001, + "step": 61676 + }, + { + "epoch": 57.54, + "learning_rate": 4.2807835820895526e-05, + "loss": 0.0, + "step": 61680 + }, + { + "epoch": 57.54, + "learning_rate": 4.2807369402985074e-05, + "loss": 0.0, + "step": 61684 + }, + { + "epoch": 57.54, + "learning_rate": 4.280690298507463e-05, + "loss": 0.0, + "step": 61688 + }, + { + "epoch": 57.55, + "learning_rate": 4.2806436567164184e-05, + "loss": 0.0, + "step": 61692 + }, + { + "epoch": 57.55, + "learning_rate": 4.280597014925373e-05, + "loss": 0.0, + "step": 61696 + }, + { + "epoch": 57.56, + "learning_rate": 4.280550373134329e-05, + "loss": 0.0, + "step": 61700 + }, + { + "epoch": 57.56, + "learning_rate": 4.280503731343284e-05, + "loss": 0.0, + "step": 61704 + }, + { + "epoch": 57.56, + "learning_rate": 4.280457089552239e-05, + "loss": 0.0, + "step": 61708 + }, + { + "epoch": 57.57, + "learning_rate": 4.2804104477611945e-05, + "loss": 0.0, + "step": 61712 + }, + { + "epoch": 57.57, + "learning_rate": 4.280363805970149e-05, + "loss": 0.0, + "step": 61716 + }, + { + "epoch": 57.57, + "learning_rate": 4.280317164179105e-05, + "loss": 0.0, + "step": 61720 + }, + { + "epoch": 57.58, + "learning_rate": 4.28027052238806e-05, + "loss": 0.0001, + "step": 61724 + }, + { + "epoch": 57.58, + "learning_rate": 4.280223880597015e-05, + "loss": 0.0, + "step": 61728 + }, + { + "epoch": 57.59, + "learning_rate": 4.28017723880597e-05, + "loss": 0.0, + "step": 61732 + }, + { + "epoch": 57.59, + "learning_rate": 4.280130597014926e-05, + "loss": 0.0, + "step": 61736 + }, + { + "epoch": 57.59, + "learning_rate": 4.280083955223881e-05, + "loss": 0.0, + "step": 61740 + }, + { + "epoch": 57.6, + "learning_rate": 4.280037313432836e-05, + "loss": 0.0142, + "step": 61744 + }, + { + "epoch": 57.6, + "learning_rate": 4.279990671641791e-05, + "loss": 0.0, + "step": 61748 + }, + { + "epoch": 57.6, + "learning_rate": 4.279944029850747e-05, + "loss": 0.0003, + "step": 61752 + }, + { + "epoch": 57.61, + "learning_rate": 4.2798973880597015e-05, + "loss": 0.0, + "step": 61756 + }, + { + "epoch": 57.61, + "learning_rate": 4.279850746268657e-05, + "loss": 0.0003, + "step": 61760 + }, + { + "epoch": 57.62, + "learning_rate": 4.2798041044776125e-05, + "loss": 0.0, + "step": 61764 + }, + { + "epoch": 57.62, + "learning_rate": 4.279757462686567e-05, + "loss": 0.0, + "step": 61768 + }, + { + "epoch": 57.62, + "learning_rate": 4.279710820895523e-05, + "loss": 0.0, + "step": 61772 + }, + { + "epoch": 57.63, + "learning_rate": 4.2796641791044776e-05, + "loss": 0.0002, + "step": 61776 + }, + { + "epoch": 57.63, + "learning_rate": 4.279617537313433e-05, + "loss": 0.0009, + "step": 61780 + }, + { + "epoch": 57.63, + "learning_rate": 4.2795708955223886e-05, + "loss": 0.0, + "step": 61784 + }, + { + "epoch": 57.64, + "learning_rate": 4.2795242537313434e-05, + "loss": 0.0, + "step": 61788 + }, + { + "epoch": 57.64, + "learning_rate": 4.279477611940298e-05, + "loss": 0.0003, + "step": 61792 + }, + { + "epoch": 57.65, + "learning_rate": 4.2794309701492544e-05, + "loss": 0.0002, + "step": 61796 + }, + { + "epoch": 57.65, + "learning_rate": 4.279384328358209e-05, + "loss": 0.0, + "step": 61800 + }, + { + "epoch": 57.65, + "learning_rate": 4.279337686567164e-05, + "loss": 0.0, + "step": 61804 + }, + { + "epoch": 57.66, + "learning_rate": 4.2792910447761195e-05, + "loss": 0.0, + "step": 61808 + }, + { + "epoch": 57.66, + "learning_rate": 4.279244402985075e-05, + "loss": 0.0, + "step": 61812 + }, + { + "epoch": 57.66, + "learning_rate": 4.27919776119403e-05, + "loss": 0.0, + "step": 61816 + }, + { + "epoch": 57.67, + "learning_rate": 4.279151119402985e-05, + "loss": 0.0, + "step": 61820 + }, + { + "epoch": 57.67, + "learning_rate": 4.279104477611941e-05, + "loss": 0.0003, + "step": 61824 + }, + { + "epoch": 57.68, + "learning_rate": 4.2790578358208956e-05, + "loss": 0.0, + "step": 61828 + }, + { + "epoch": 57.68, + "learning_rate": 4.279011194029851e-05, + "loss": 0.0001, + "step": 61832 + }, + { + "epoch": 57.68, + "learning_rate": 4.278964552238806e-05, + "loss": 0.0, + "step": 61836 + }, + { + "epoch": 57.69, + "learning_rate": 4.2789179104477614e-05, + "loss": 0.0, + "step": 61840 + }, + { + "epoch": 57.69, + "learning_rate": 4.278871268656717e-05, + "loss": 0.0011, + "step": 61844 + }, + { + "epoch": 57.69, + "learning_rate": 4.278824626865672e-05, + "loss": 0.0, + "step": 61848 + }, + { + "epoch": 57.7, + "learning_rate": 4.2787779850746265e-05, + "loss": 0.0, + "step": 61852 + }, + { + "epoch": 57.7, + "learning_rate": 4.2787313432835827e-05, + "loss": 0.0005, + "step": 61856 + }, + { + "epoch": 57.71, + "learning_rate": 4.2786847014925375e-05, + "loss": 0.0, + "step": 61860 + }, + { + "epoch": 57.71, + "learning_rate": 4.278638059701493e-05, + "loss": 0.0001, + "step": 61864 + }, + { + "epoch": 57.71, + "learning_rate": 4.278591417910448e-05, + "loss": 0.0, + "step": 61868 + }, + { + "epoch": 57.72, + "learning_rate": 4.278544776119403e-05, + "loss": 0.0003, + "step": 61872 + }, + { + "epoch": 57.72, + "learning_rate": 4.278498134328359e-05, + "loss": 0.0014, + "step": 61876 + }, + { + "epoch": 57.72, + "learning_rate": 4.2784514925373136e-05, + "loss": 0.0, + "step": 61880 + }, + { + "epoch": 57.73, + "learning_rate": 4.278404850746269e-05, + "loss": 0.0, + "step": 61884 + }, + { + "epoch": 57.73, + "learning_rate": 4.2783582089552246e-05, + "loss": 0.0, + "step": 61888 + }, + { + "epoch": 57.73, + "learning_rate": 4.2783115671641794e-05, + "loss": 0.0, + "step": 61892 + }, + { + "epoch": 57.74, + "learning_rate": 4.278264925373134e-05, + "loss": 0.0, + "step": 61896 + }, + { + "epoch": 57.74, + "learning_rate": 4.27821828358209e-05, + "loss": 0.0, + "step": 61900 + }, + { + "epoch": 57.75, + "learning_rate": 4.278171641791045e-05, + "loss": 0.0, + "step": 61904 + }, + { + "epoch": 57.75, + "learning_rate": 4.278125e-05, + "loss": 0.0, + "step": 61908 + }, + { + "epoch": 57.75, + "learning_rate": 4.2780783582089555e-05, + "loss": 0.0, + "step": 61912 + }, + { + "epoch": 57.76, + "learning_rate": 4.278031716417911e-05, + "loss": 0.0, + "step": 61916 + }, + { + "epoch": 57.76, + "learning_rate": 4.277985074626866e-05, + "loss": 0.0, + "step": 61920 + }, + { + "epoch": 57.76, + "learning_rate": 4.277938432835821e-05, + "loss": 0.0, + "step": 61924 + }, + { + "epoch": 57.77, + "learning_rate": 4.277891791044776e-05, + "loss": 0.0024, + "step": 61928 + }, + { + "epoch": 57.77, + "learning_rate": 4.2778451492537316e-05, + "loss": 0.0, + "step": 61932 + }, + { + "epoch": 57.78, + "learning_rate": 4.277798507462687e-05, + "loss": 0.0019, + "step": 61936 + }, + { + "epoch": 57.78, + "learning_rate": 4.277751865671642e-05, + "loss": 0.0, + "step": 61940 + }, + { + "epoch": 57.78, + "learning_rate": 4.2777052238805974e-05, + "loss": 0.0001, + "step": 61944 + }, + { + "epoch": 57.79, + "learning_rate": 4.277658582089553e-05, + "loss": 0.0, + "step": 61948 + }, + { + "epoch": 57.79, + "learning_rate": 4.2776119402985077e-05, + "loss": 0.0, + "step": 61952 + }, + { + "epoch": 57.79, + "learning_rate": 4.2775652985074625e-05, + "loss": 0.0, + "step": 61956 + }, + { + "epoch": 57.8, + "learning_rate": 4.277518656716418e-05, + "loss": 0.0, + "step": 61960 + }, + { + "epoch": 57.8, + "learning_rate": 4.2774720149253735e-05, + "loss": 0.0, + "step": 61964 + }, + { + "epoch": 57.81, + "learning_rate": 4.277425373134328e-05, + "loss": 0.0, + "step": 61968 + }, + { + "epoch": 57.81, + "learning_rate": 4.277378731343284e-05, + "loss": 0.0, + "step": 61972 + }, + { + "epoch": 57.81, + "learning_rate": 4.277332089552239e-05, + "loss": 0.0, + "step": 61976 + }, + { + "epoch": 57.82, + "learning_rate": 4.277285447761194e-05, + "loss": 0.0, + "step": 61980 + }, + { + "epoch": 57.82, + "learning_rate": 4.2772388059701496e-05, + "loss": 0.0, + "step": 61984 + }, + { + "epoch": 57.82, + "learning_rate": 4.2771921641791044e-05, + "loss": 0.0, + "step": 61988 + }, + { + "epoch": 57.83, + "learning_rate": 4.27714552238806e-05, + "loss": 0.0, + "step": 61992 + }, + { + "epoch": 57.83, + "learning_rate": 4.2770988805970153e-05, + "loss": 0.0, + "step": 61996 + }, + { + "epoch": 57.84, + "learning_rate": 4.27705223880597e-05, + "loss": 0.0, + "step": 62000 + }, + { + "epoch": 57.84, + "eval_exact_match": 0.7514506769825918, + "eval_exec": 0.7872340425531915, + "eval_loss": 0.4987480640411377, + "eval_runtime": 1158.3298, + "eval_samples_per_second": 0.893, + "step": 62000 + }, + { + "epoch": 57.84, + "learning_rate": 4.2770055970149257e-05, + "loss": 0.0, + "step": 62004 + }, + { + "epoch": 57.84, + "learning_rate": 4.276958955223881e-05, + "loss": 0.0, + "step": 62008 + }, + { + "epoch": 57.85, + "learning_rate": 4.276912313432836e-05, + "loss": 0.0, + "step": 62012 + }, + { + "epoch": 57.85, + "learning_rate": 4.2768656716417914e-05, + "loss": 0.0003, + "step": 62016 + }, + { + "epoch": 57.85, + "learning_rate": 4.276819029850746e-05, + "loss": 0.0, + "step": 62020 + }, + { + "epoch": 57.86, + "learning_rate": 4.276772388059702e-05, + "loss": 0.0035, + "step": 62024 + }, + { + "epoch": 57.86, + "learning_rate": 4.276725746268657e-05, + "loss": 0.0, + "step": 62028 + }, + { + "epoch": 57.87, + "learning_rate": 4.276679104477612e-05, + "loss": 0.0, + "step": 62032 + }, + { + "epoch": 57.87, + "learning_rate": 4.2766324626865675e-05, + "loss": 0.0, + "step": 62036 + }, + { + "epoch": 57.87, + "learning_rate": 4.276585820895523e-05, + "loss": 0.0, + "step": 62040 + }, + { + "epoch": 57.88, + "learning_rate": 4.276539179104478e-05, + "loss": 0.0, + "step": 62044 + }, + { + "epoch": 57.88, + "learning_rate": 4.276492537313433e-05, + "loss": 0.0, + "step": 62048 + }, + { + "epoch": 57.88, + "learning_rate": 4.276445895522389e-05, + "loss": 0.0, + "step": 62052 + }, + { + "epoch": 57.89, + "learning_rate": 4.2763992537313436e-05, + "loss": 0.0, + "step": 62056 + }, + { + "epoch": 57.89, + "learning_rate": 4.2763526119402985e-05, + "loss": 0.0, + "step": 62060 + }, + { + "epoch": 57.9, + "learning_rate": 4.276305970149254e-05, + "loss": 0.0, + "step": 62064 + }, + { + "epoch": 57.9, + "learning_rate": 4.2762593283582094e-05, + "loss": 0.0, + "step": 62068 + }, + { + "epoch": 57.9, + "learning_rate": 4.276212686567164e-05, + "loss": 0.0, + "step": 62072 + }, + { + "epoch": 57.91, + "learning_rate": 4.27616604477612e-05, + "loss": 0.0, + "step": 62076 + }, + { + "epoch": 57.91, + "learning_rate": 4.2761194029850746e-05, + "loss": 0.0, + "step": 62080 + }, + { + "epoch": 57.91, + "learning_rate": 4.27607276119403e-05, + "loss": 0.0001, + "step": 62084 + }, + { + "epoch": 57.92, + "learning_rate": 4.2760261194029855e-05, + "loss": 0.0001, + "step": 62088 + }, + { + "epoch": 57.92, + "learning_rate": 4.2759794776119404e-05, + "loss": 0.0, + "step": 62092 + }, + { + "epoch": 57.93, + "learning_rate": 4.275932835820896e-05, + "loss": 0.0, + "step": 62096 + }, + { + "epoch": 57.93, + "learning_rate": 4.275886194029851e-05, + "loss": 0.0, + "step": 62100 + }, + { + "epoch": 57.93, + "learning_rate": 4.275839552238806e-05, + "loss": 0.0027, + "step": 62104 + }, + { + "epoch": 57.94, + "learning_rate": 4.275792910447761e-05, + "loss": 0.0, + "step": 62108 + }, + { + "epoch": 57.94, + "learning_rate": 4.275746268656717e-05, + "loss": 0.0, + "step": 62112 + }, + { + "epoch": 57.94, + "learning_rate": 4.275699626865672e-05, + "loss": 0.0001, + "step": 62116 + }, + { + "epoch": 57.95, + "learning_rate": 4.275652985074627e-05, + "loss": 0.0, + "step": 62120 + }, + { + "epoch": 57.95, + "learning_rate": 4.275606343283582e-05, + "loss": 0.0, + "step": 62124 + }, + { + "epoch": 57.96, + "learning_rate": 4.275559701492538e-05, + "loss": 0.0, + "step": 62128 + }, + { + "epoch": 57.96, + "learning_rate": 4.2755130597014925e-05, + "loss": 0.0001, + "step": 62132 + }, + { + "epoch": 57.96, + "learning_rate": 4.275466417910448e-05, + "loss": 0.0001, + "step": 62136 + }, + { + "epoch": 57.97, + "learning_rate": 4.275419776119403e-05, + "loss": 0.0, + "step": 62140 + }, + { + "epoch": 57.97, + "learning_rate": 4.2753731343283583e-05, + "loss": 0.0, + "step": 62144 + }, + { + "epoch": 57.97, + "learning_rate": 4.275326492537314e-05, + "loss": 0.0014, + "step": 62148 + }, + { + "epoch": 57.98, + "learning_rate": 4.2752798507462686e-05, + "loss": 0.0, + "step": 62152 + }, + { + "epoch": 57.98, + "learning_rate": 4.275233208955224e-05, + "loss": 0.0, + "step": 62156 + }, + { + "epoch": 57.98, + "learning_rate": 4.2751865671641796e-05, + "loss": 0.0001, + "step": 62160 + }, + { + "epoch": 57.99, + "learning_rate": 4.2751399253731344e-05, + "loss": 0.0025, + "step": 62164 + }, + { + "epoch": 57.99, + "learning_rate": 4.275093283582089e-05, + "loss": 0.0, + "step": 62168 + }, + { + "epoch": 58.0, + "learning_rate": 4.275046641791045e-05, + "loss": 0.003, + "step": 62172 + }, + { + "epoch": 58.0, + "learning_rate": 4.275e-05, + "loss": 0.0001, + "step": 62176 + }, + { + "epoch": 58.0, + "learning_rate": 4.274953358208956e-05, + "loss": 0.0, + "step": 62180 + }, + { + "epoch": 58.01, + "learning_rate": 4.2749067164179105e-05, + "loss": 0.0007, + "step": 62184 + }, + { + "epoch": 58.01, + "learning_rate": 4.274860074626866e-05, + "loss": 0.0, + "step": 62188 + }, + { + "epoch": 58.01, + "learning_rate": 4.2748134328358215e-05, + "loss": 0.0001, + "step": 62192 + }, + { + "epoch": 58.02, + "learning_rate": 4.274766791044776e-05, + "loss": 0.0, + "step": 62196 + }, + { + "epoch": 58.02, + "learning_rate": 4.274720149253731e-05, + "loss": 0.0002, + "step": 62200 + }, + { + "epoch": 58.03, + "learning_rate": 4.274673507462687e-05, + "loss": 0.0, + "step": 62204 + }, + { + "epoch": 58.03, + "learning_rate": 4.274626865671642e-05, + "loss": 0.0, + "step": 62208 + }, + { + "epoch": 58.03, + "learning_rate": 4.274580223880597e-05, + "loss": 0.0, + "step": 62212 + }, + { + "epoch": 58.04, + "learning_rate": 4.2745335820895524e-05, + "loss": 0.0, + "step": 62216 + }, + { + "epoch": 58.04, + "learning_rate": 4.274486940298508e-05, + "loss": 0.0, + "step": 62220 + }, + { + "epoch": 58.04, + "learning_rate": 4.274440298507463e-05, + "loss": 0.0, + "step": 62224 + }, + { + "epoch": 58.05, + "learning_rate": 4.274393656716418e-05, + "loss": 0.0, + "step": 62228 + }, + { + "epoch": 58.05, + "learning_rate": 4.274347014925373e-05, + "loss": 0.0, + "step": 62232 + }, + { + "epoch": 58.06, + "learning_rate": 4.2743003731343285e-05, + "loss": 0.0, + "step": 62236 + }, + { + "epoch": 58.06, + "learning_rate": 4.274253731343284e-05, + "loss": 0.0, + "step": 62240 + }, + { + "epoch": 58.06, + "learning_rate": 4.274207089552239e-05, + "loss": 0.0, + "step": 62244 + }, + { + "epoch": 58.07, + "learning_rate": 4.274160447761194e-05, + "loss": 0.0, + "step": 62248 + }, + { + "epoch": 58.07, + "learning_rate": 4.27411380597015e-05, + "loss": 0.0, + "step": 62252 + }, + { + "epoch": 58.07, + "learning_rate": 4.2740671641791046e-05, + "loss": 0.0, + "step": 62256 + }, + { + "epoch": 58.08, + "learning_rate": 4.2740205223880594e-05, + "loss": 0.0, + "step": 62260 + }, + { + "epoch": 58.08, + "learning_rate": 4.2739738805970156e-05, + "loss": 0.0066, + "step": 62264 + }, + { + "epoch": 58.09, + "learning_rate": 4.2739272388059704e-05, + "loss": 0.0, + "step": 62268 + }, + { + "epoch": 58.09, + "learning_rate": 4.273880597014925e-05, + "loss": 0.0, + "step": 62272 + }, + { + "epoch": 58.09, + "learning_rate": 4.273833955223881e-05, + "loss": 0.0009, + "step": 62276 + }, + { + "epoch": 58.1, + "learning_rate": 4.273787313432836e-05, + "loss": 0.0, + "step": 62280 + }, + { + "epoch": 58.1, + "learning_rate": 4.273740671641791e-05, + "loss": 0.0, + "step": 62284 + }, + { + "epoch": 58.1, + "learning_rate": 4.2736940298507465e-05, + "loss": 0.0015, + "step": 62288 + }, + { + "epoch": 58.11, + "learning_rate": 4.273647388059701e-05, + "loss": 0.0, + "step": 62292 + }, + { + "epoch": 58.11, + "learning_rate": 4.273600746268657e-05, + "loss": 0.0005, + "step": 62296 + }, + { + "epoch": 58.12, + "learning_rate": 4.273554104477612e-05, + "loss": 0.002, + "step": 62300 + }, + { + "epoch": 58.12, + "learning_rate": 4.273507462686567e-05, + "loss": 0.0, + "step": 62304 + }, + { + "epoch": 58.12, + "learning_rate": 4.2734608208955226e-05, + "loss": 0.0006, + "step": 62308 + }, + { + "epoch": 58.13, + "learning_rate": 4.273414179104478e-05, + "loss": 0.0007, + "step": 62312 + }, + { + "epoch": 58.13, + "learning_rate": 4.273367537313433e-05, + "loss": 0.0, + "step": 62316 + }, + { + "epoch": 58.13, + "learning_rate": 4.273320895522388e-05, + "loss": 0.0, + "step": 62320 + }, + { + "epoch": 58.14, + "learning_rate": 4.273274253731344e-05, + "loss": 0.0, + "step": 62324 + }, + { + "epoch": 58.14, + "learning_rate": 4.273227611940299e-05, + "loss": 0.0, + "step": 62328 + }, + { + "epoch": 58.15, + "learning_rate": 4.2731809701492535e-05, + "loss": 0.0, + "step": 62332 + }, + { + "epoch": 58.15, + "learning_rate": 4.273134328358209e-05, + "loss": 0.0, + "step": 62336 + }, + { + "epoch": 58.15, + "learning_rate": 4.2730876865671645e-05, + "loss": 0.0009, + "step": 62340 + }, + { + "epoch": 58.16, + "learning_rate": 4.27304104477612e-05, + "loss": 0.0021, + "step": 62344 + }, + { + "epoch": 58.16, + "learning_rate": 4.272994402985075e-05, + "loss": 0.0, + "step": 62348 + }, + { + "epoch": 58.16, + "learning_rate": 4.2729477611940296e-05, + "loss": 0.0, + "step": 62352 + }, + { + "epoch": 58.17, + "learning_rate": 4.272901119402986e-05, + "loss": 0.0, + "step": 62356 + }, + { + "epoch": 58.17, + "learning_rate": 4.2728544776119406e-05, + "loss": 0.0006, + "step": 62360 + }, + { + "epoch": 58.18, + "learning_rate": 4.2728078358208954e-05, + "loss": 0.0, + "step": 62364 + }, + { + "epoch": 58.18, + "learning_rate": 4.272761194029851e-05, + "loss": 0.0, + "step": 62368 + }, + { + "epoch": 58.18, + "learning_rate": 4.2727145522388064e-05, + "loss": 0.0, + "step": 62372 + }, + { + "epoch": 58.19, + "learning_rate": 4.272667910447761e-05, + "loss": 0.0, + "step": 62376 + }, + { + "epoch": 58.19, + "learning_rate": 4.272621268656717e-05, + "loss": 0.0109, + "step": 62380 + }, + { + "epoch": 58.19, + "learning_rate": 4.272574626865672e-05, + "loss": 0.0007, + "step": 62384 + }, + { + "epoch": 58.2, + "learning_rate": 4.272527985074627e-05, + "loss": 0.0, + "step": 62388 + }, + { + "epoch": 58.2, + "learning_rate": 4.2724813432835825e-05, + "loss": 0.0, + "step": 62392 + }, + { + "epoch": 58.21, + "learning_rate": 4.272434701492537e-05, + "loss": 0.0, + "step": 62396 + }, + { + "epoch": 58.21, + "learning_rate": 4.272388059701493e-05, + "loss": 0.0001, + "step": 62400 + }, + { + "epoch": 58.21, + "learning_rate": 4.272341417910448e-05, + "loss": 0.0, + "step": 62404 + }, + { + "epoch": 58.22, + "learning_rate": 4.272294776119403e-05, + "loss": 0.002, + "step": 62408 + }, + { + "epoch": 58.22, + "learning_rate": 4.272248134328358e-05, + "loss": 0.0001, + "step": 62412 + }, + { + "epoch": 58.22, + "learning_rate": 4.272201492537314e-05, + "loss": 0.0003, + "step": 62416 + }, + { + "epoch": 58.23, + "learning_rate": 4.272154850746269e-05, + "loss": 0.0, + "step": 62420 + }, + { + "epoch": 58.23, + "learning_rate": 4.272108208955224e-05, + "loss": 0.0, + "step": 62424 + }, + { + "epoch": 58.24, + "learning_rate": 4.272061567164179e-05, + "loss": 0.0, + "step": 62428 + }, + { + "epoch": 58.24, + "learning_rate": 4.272014925373135e-05, + "loss": 0.0, + "step": 62432 + }, + { + "epoch": 58.24, + "learning_rate": 4.2719682835820895e-05, + "loss": 0.0, + "step": 62436 + }, + { + "epoch": 58.25, + "learning_rate": 4.271921641791045e-05, + "loss": 0.0, + "step": 62440 + }, + { + "epoch": 58.25, + "learning_rate": 4.2718750000000005e-05, + "loss": 0.0, + "step": 62444 + }, + { + "epoch": 58.25, + "learning_rate": 4.271828358208955e-05, + "loss": 0.0, + "step": 62448 + }, + { + "epoch": 58.26, + "learning_rate": 4.271781716417911e-05, + "loss": 0.0002, + "step": 62452 + }, + { + "epoch": 58.26, + "learning_rate": 4.2717350746268656e-05, + "loss": 0.0002, + "step": 62456 + }, + { + "epoch": 58.26, + "learning_rate": 4.271688432835821e-05, + "loss": 0.0, + "step": 62460 + }, + { + "epoch": 58.27, + "learning_rate": 4.2716417910447766e-05, + "loss": 0.0002, + "step": 62464 + }, + { + "epoch": 58.27, + "learning_rate": 4.2715951492537314e-05, + "loss": 0.0, + "step": 62468 + }, + { + "epoch": 58.28, + "learning_rate": 4.271548507462686e-05, + "loss": 0.0, + "step": 62472 + }, + { + "epoch": 58.28, + "learning_rate": 4.2715018656716424e-05, + "loss": 0.0, + "step": 62476 + }, + { + "epoch": 58.28, + "learning_rate": 4.271455223880597e-05, + "loss": 0.0, + "step": 62480 + }, + { + "epoch": 58.29, + "learning_rate": 4.271408582089552e-05, + "loss": 0.0, + "step": 62484 + }, + { + "epoch": 58.29, + "learning_rate": 4.2713619402985075e-05, + "loss": 0.0, + "step": 62488 + }, + { + "epoch": 58.29, + "learning_rate": 4.271315298507463e-05, + "loss": 0.0026, + "step": 62492 + }, + { + "epoch": 58.3, + "learning_rate": 4.271268656716418e-05, + "loss": 0.0002, + "step": 62496 + }, + { + "epoch": 58.3, + "learning_rate": 4.271222014925373e-05, + "loss": 0.0005, + "step": 62500 + }, + { + "epoch": 58.3, + "eval_exact_match": 0.7446808510638298, + "eval_exec": 0.7823984526112185, + "eval_loss": 0.46804332733154297, + "eval_runtime": 1167.1187, + "eval_samples_per_second": 0.886, + "step": 62500 + }, + { + "epoch": 58.31, + "learning_rate": 4.271175373134329e-05, + "loss": 0.0018, + "step": 62504 + }, + { + "epoch": 58.31, + "learning_rate": 4.271128731343284e-05, + "loss": 0.0, + "step": 62508 + }, + { + "epoch": 58.31, + "learning_rate": 4.271082089552239e-05, + "loss": 0.0, + "step": 62512 + }, + { + "epoch": 58.32, + "learning_rate": 4.271035447761194e-05, + "loss": 0.0, + "step": 62516 + }, + { + "epoch": 58.32, + "learning_rate": 4.2709888059701494e-05, + "loss": 0.0, + "step": 62520 + }, + { + "epoch": 58.32, + "learning_rate": 4.270942164179105e-05, + "loss": 0.0, + "step": 62524 + }, + { + "epoch": 58.33, + "learning_rate": 4.27089552238806e-05, + "loss": 0.0027, + "step": 62528 + }, + { + "epoch": 58.33, + "learning_rate": 4.270848880597015e-05, + "loss": 0.0, + "step": 62532 + }, + { + "epoch": 58.34, + "learning_rate": 4.270802238805971e-05, + "loss": 0.0003, + "step": 62536 + }, + { + "epoch": 58.34, + "learning_rate": 4.2707555970149255e-05, + "loss": 0.0, + "step": 62540 + }, + { + "epoch": 58.34, + "learning_rate": 4.270708955223881e-05, + "loss": 0.0, + "step": 62544 + }, + { + "epoch": 58.35, + "learning_rate": 4.270662313432836e-05, + "loss": 0.0, + "step": 62548 + }, + { + "epoch": 58.35, + "learning_rate": 4.270615671641791e-05, + "loss": 0.0, + "step": 62552 + }, + { + "epoch": 58.35, + "learning_rate": 4.270569029850747e-05, + "loss": 0.0003, + "step": 62556 + }, + { + "epoch": 58.36, + "learning_rate": 4.2705223880597016e-05, + "loss": 0.0, + "step": 62560 + }, + { + "epoch": 58.36, + "learning_rate": 4.270475746268657e-05, + "loss": 0.0003, + "step": 62564 + }, + { + "epoch": 58.37, + "learning_rate": 4.2704291044776126e-05, + "loss": 0.0, + "step": 62568 + }, + { + "epoch": 58.37, + "learning_rate": 4.2703824626865674e-05, + "loss": 0.0, + "step": 62572 + }, + { + "epoch": 58.37, + "learning_rate": 4.270335820895522e-05, + "loss": 0.0, + "step": 62576 + }, + { + "epoch": 58.38, + "learning_rate": 4.270289179104478e-05, + "loss": 0.0002, + "step": 62580 + }, + { + "epoch": 58.38, + "learning_rate": 4.270242537313433e-05, + "loss": 0.0, + "step": 62584 + }, + { + "epoch": 58.38, + "learning_rate": 4.270195895522388e-05, + "loss": 0.0, + "step": 62588 + }, + { + "epoch": 58.39, + "learning_rate": 4.2701492537313435e-05, + "loss": 0.0012, + "step": 62592 + }, + { + "epoch": 58.39, + "learning_rate": 4.270102611940299e-05, + "loss": 0.0004, + "step": 62596 + }, + { + "epoch": 58.4, + "learning_rate": 4.270055970149254e-05, + "loss": 0.0, + "step": 62600 + }, + { + "epoch": 58.4, + "learning_rate": 4.270009328358209e-05, + "loss": 0.0, + "step": 62604 + }, + { + "epoch": 58.4, + "learning_rate": 4.269962686567164e-05, + "loss": 0.0007, + "step": 62608 + }, + { + "epoch": 58.41, + "learning_rate": 4.2699160447761196e-05, + "loss": 0.0, + "step": 62612 + }, + { + "epoch": 58.41, + "learning_rate": 4.269869402985075e-05, + "loss": 0.0, + "step": 62616 + }, + { + "epoch": 58.41, + "learning_rate": 4.26982276119403e-05, + "loss": 0.0, + "step": 62620 + }, + { + "epoch": 58.42, + "learning_rate": 4.2697761194029854e-05, + "loss": 0.0, + "step": 62624 + }, + { + "epoch": 58.42, + "learning_rate": 4.269729477611941e-05, + "loss": 0.0, + "step": 62628 + }, + { + "epoch": 58.43, + "learning_rate": 4.269682835820896e-05, + "loss": 0.0059, + "step": 62632 + }, + { + "epoch": 58.43, + "learning_rate": 4.2696361940298505e-05, + "loss": 0.0003, + "step": 62636 + }, + { + "epoch": 58.43, + "learning_rate": 4.269589552238806e-05, + "loss": 0.0, + "step": 62640 + }, + { + "epoch": 58.44, + "learning_rate": 4.2695429104477615e-05, + "loss": 0.0, + "step": 62644 + }, + { + "epoch": 58.44, + "learning_rate": 4.269496268656716e-05, + "loss": 0.0, + "step": 62648 + }, + { + "epoch": 58.44, + "learning_rate": 4.269449626865672e-05, + "loss": 0.0009, + "step": 62652 + }, + { + "epoch": 58.45, + "learning_rate": 4.269402985074627e-05, + "loss": 0.0, + "step": 62656 + }, + { + "epoch": 58.45, + "learning_rate": 4.269356343283582e-05, + "loss": 0.0, + "step": 62660 + }, + { + "epoch": 58.46, + "learning_rate": 4.2693097014925376e-05, + "loss": 0.0, + "step": 62664 + }, + { + "epoch": 58.46, + "learning_rate": 4.2692630597014924e-05, + "loss": 0.0001, + "step": 62668 + }, + { + "epoch": 58.46, + "learning_rate": 4.2692164179104486e-05, + "loss": 0.0001, + "step": 62672 + }, + { + "epoch": 58.47, + "learning_rate": 4.2691697761194034e-05, + "loss": 0.0, + "step": 62676 + }, + { + "epoch": 58.47, + "learning_rate": 4.269123134328358e-05, + "loss": 0.0002, + "step": 62680 + }, + { + "epoch": 58.47, + "learning_rate": 4.269076492537314e-05, + "loss": 0.0, + "step": 62684 + }, + { + "epoch": 58.48, + "learning_rate": 4.269029850746269e-05, + "loss": 0.0002, + "step": 62688 + }, + { + "epoch": 58.48, + "learning_rate": 4.268983208955224e-05, + "loss": 0.0, + "step": 62692 + }, + { + "epoch": 58.49, + "learning_rate": 4.2689365671641795e-05, + "loss": 0.0001, + "step": 62696 + }, + { + "epoch": 58.49, + "learning_rate": 4.268889925373134e-05, + "loss": 0.0001, + "step": 62700 + }, + { + "epoch": 58.49, + "learning_rate": 4.26884328358209e-05, + "loss": 0.0, + "step": 62704 + }, + { + "epoch": 58.5, + "learning_rate": 4.268796641791045e-05, + "loss": 0.0, + "step": 62708 + }, + { + "epoch": 58.5, + "learning_rate": 4.26875e-05, + "loss": 0.0029, + "step": 62712 + }, + { + "epoch": 58.5, + "learning_rate": 4.2687033582089556e-05, + "loss": 0.0001, + "step": 62716 + }, + { + "epoch": 58.51, + "learning_rate": 4.268656716417911e-05, + "loss": 0.0004, + "step": 62720 + }, + { + "epoch": 58.51, + "learning_rate": 4.268610074626866e-05, + "loss": 0.0001, + "step": 62724 + }, + { + "epoch": 58.51, + "learning_rate": 4.268563432835821e-05, + "loss": 0.0, + "step": 62728 + }, + { + "epoch": 58.52, + "learning_rate": 4.268516791044777e-05, + "loss": 0.0, + "step": 62732 + }, + { + "epoch": 58.52, + "learning_rate": 4.268470149253732e-05, + "loss": 0.0, + "step": 62736 + }, + { + "epoch": 58.53, + "learning_rate": 4.2684235074626865e-05, + "loss": 0.0002, + "step": 62740 + }, + { + "epoch": 58.53, + "learning_rate": 4.268376865671642e-05, + "loss": 0.0, + "step": 62744 + }, + { + "epoch": 58.53, + "learning_rate": 4.2683302238805975e-05, + "loss": 0.002, + "step": 62748 + }, + { + "epoch": 58.54, + "learning_rate": 4.268283582089552e-05, + "loss": 0.0001, + "step": 62752 + }, + { + "epoch": 58.54, + "learning_rate": 4.268236940298508e-05, + "loss": 0.0, + "step": 62756 + }, + { + "epoch": 58.54, + "learning_rate": 4.2681902985074626e-05, + "loss": 0.0139, + "step": 62760 + }, + { + "epoch": 58.55, + "learning_rate": 4.268143656716418e-05, + "loss": 0.0, + "step": 62764 + }, + { + "epoch": 58.55, + "learning_rate": 4.2680970149253736e-05, + "loss": 0.0, + "step": 62768 + }, + { + "epoch": 58.56, + "learning_rate": 4.2680503731343284e-05, + "loss": 0.0, + "step": 62772 + }, + { + "epoch": 58.56, + "learning_rate": 4.268003731343284e-05, + "loss": 0.0001, + "step": 62776 + }, + { + "epoch": 58.56, + "learning_rate": 4.2679570895522394e-05, + "loss": 0.0, + "step": 62780 + }, + { + "epoch": 58.57, + "learning_rate": 4.267910447761194e-05, + "loss": 0.0, + "step": 62784 + }, + { + "epoch": 58.57, + "learning_rate": 4.267863805970149e-05, + "loss": 0.0, + "step": 62788 + }, + { + "epoch": 58.57, + "learning_rate": 4.267817164179105e-05, + "loss": 0.0, + "step": 62792 + }, + { + "epoch": 58.58, + "learning_rate": 4.26777052238806e-05, + "loss": 0.0, + "step": 62796 + }, + { + "epoch": 58.58, + "learning_rate": 4.267723880597015e-05, + "loss": 0.0002, + "step": 62800 + }, + { + "epoch": 58.59, + "learning_rate": 4.26767723880597e-05, + "loss": 0.0, + "step": 62804 + }, + { + "epoch": 58.59, + "learning_rate": 4.267630597014926e-05, + "loss": 0.0, + "step": 62808 + }, + { + "epoch": 58.59, + "learning_rate": 4.2675839552238806e-05, + "loss": 0.0, + "step": 62812 + }, + { + "epoch": 58.6, + "learning_rate": 4.267537313432836e-05, + "loss": 0.0002, + "step": 62816 + }, + { + "epoch": 58.6, + "learning_rate": 4.267490671641791e-05, + "loss": 0.0, + "step": 62820 + }, + { + "epoch": 58.6, + "learning_rate": 4.2674440298507464e-05, + "loss": 0.0, + "step": 62824 + }, + { + "epoch": 58.61, + "learning_rate": 4.267397388059702e-05, + "loss": 0.0001, + "step": 62828 + }, + { + "epoch": 58.61, + "learning_rate": 4.267350746268657e-05, + "loss": 0.0, + "step": 62832 + }, + { + "epoch": 58.62, + "learning_rate": 4.267304104477612e-05, + "loss": 0.0, + "step": 62836 + }, + { + "epoch": 58.62, + "learning_rate": 4.2672574626865677e-05, + "loss": 0.0, + "step": 62840 + }, + { + "epoch": 58.62, + "learning_rate": 4.2672108208955225e-05, + "loss": 0.0, + "step": 62844 + }, + { + "epoch": 58.63, + "learning_rate": 4.267164179104478e-05, + "loss": 0.0001, + "step": 62848 + }, + { + "epoch": 58.63, + "learning_rate": 4.267117537313433e-05, + "loss": 0.0003, + "step": 62852 + }, + { + "epoch": 58.63, + "learning_rate": 4.267070895522388e-05, + "loss": 0.0, + "step": 62856 + }, + { + "epoch": 58.64, + "learning_rate": 4.267024253731344e-05, + "loss": 0.0, + "step": 62860 + }, + { + "epoch": 58.64, + "learning_rate": 4.2669776119402986e-05, + "loss": 0.0, + "step": 62864 + }, + { + "epoch": 58.65, + "learning_rate": 4.266930970149254e-05, + "loss": 0.0, + "step": 62868 + }, + { + "epoch": 58.65, + "learning_rate": 4.2668843283582095e-05, + "loss": 0.0004, + "step": 62872 + }, + { + "epoch": 58.65, + "learning_rate": 4.2668376865671644e-05, + "loss": 0.0, + "step": 62876 + }, + { + "epoch": 58.66, + "learning_rate": 4.266791044776119e-05, + "loss": 0.0, + "step": 62880 + }, + { + "epoch": 58.66, + "learning_rate": 4.2667444029850753e-05, + "loss": 0.0001, + "step": 62884 + }, + { + "epoch": 58.66, + "learning_rate": 4.26669776119403e-05, + "loss": 0.0, + "step": 62888 + }, + { + "epoch": 58.67, + "learning_rate": 4.266651119402985e-05, + "loss": 0.0, + "step": 62892 + }, + { + "epoch": 58.67, + "learning_rate": 4.2666044776119405e-05, + "loss": 0.0, + "step": 62896 + }, + { + "epoch": 58.68, + "learning_rate": 4.266557835820896e-05, + "loss": 0.0002, + "step": 62900 + }, + { + "epoch": 58.68, + "learning_rate": 4.266511194029851e-05, + "loss": 0.0, + "step": 62904 + }, + { + "epoch": 58.68, + "learning_rate": 4.266464552238806e-05, + "loss": 0.0009, + "step": 62908 + }, + { + "epoch": 58.69, + "learning_rate": 4.266417910447761e-05, + "loss": 0.0, + "step": 62912 + }, + { + "epoch": 58.69, + "learning_rate": 4.2663712686567166e-05, + "loss": 0.0, + "step": 62916 + }, + { + "epoch": 58.69, + "learning_rate": 4.266324626865672e-05, + "loss": 0.0, + "step": 62920 + }, + { + "epoch": 58.7, + "learning_rate": 4.266277985074627e-05, + "loss": 0.0, + "step": 62924 + }, + { + "epoch": 58.7, + "learning_rate": 4.2662313432835824e-05, + "loss": 0.0006, + "step": 62928 + }, + { + "epoch": 58.71, + "learning_rate": 4.266184701492538e-05, + "loss": 0.0001, + "step": 62932 + }, + { + "epoch": 58.71, + "learning_rate": 4.2661380597014927e-05, + "loss": 0.0, + "step": 62936 + }, + { + "epoch": 58.71, + "learning_rate": 4.2660914179104475e-05, + "loss": 0.0, + "step": 62940 + }, + { + "epoch": 58.72, + "learning_rate": 4.2660447761194036e-05, + "loss": 0.0, + "step": 62944 + }, + { + "epoch": 58.72, + "learning_rate": 4.2659981343283585e-05, + "loss": 0.0003, + "step": 62948 + }, + { + "epoch": 58.72, + "learning_rate": 4.265951492537313e-05, + "loss": 0.0003, + "step": 62952 + }, + { + "epoch": 58.73, + "learning_rate": 4.265904850746269e-05, + "loss": 0.0, + "step": 62956 + }, + { + "epoch": 58.73, + "learning_rate": 4.265858208955224e-05, + "loss": 0.0031, + "step": 62960 + }, + { + "epoch": 58.73, + "learning_rate": 4.265811567164179e-05, + "loss": 0.0, + "step": 62964 + }, + { + "epoch": 58.74, + "learning_rate": 4.2657649253731346e-05, + "loss": 0.0, + "step": 62968 + }, + { + "epoch": 58.74, + "learning_rate": 4.2657182835820894e-05, + "loss": 0.0002, + "step": 62972 + }, + { + "epoch": 58.75, + "learning_rate": 4.265671641791045e-05, + "loss": 0.0, + "step": 62976 + }, + { + "epoch": 58.75, + "learning_rate": 4.2656250000000003e-05, + "loss": 0.0001, + "step": 62980 + }, + { + "epoch": 58.75, + "learning_rate": 4.265578358208955e-05, + "loss": 0.0, + "step": 62984 + }, + { + "epoch": 58.76, + "learning_rate": 4.2655317164179107e-05, + "loss": 0.0004, + "step": 62988 + }, + { + "epoch": 58.76, + "learning_rate": 4.265485074626866e-05, + "loss": 0.0001, + "step": 62992 + }, + { + "epoch": 58.76, + "learning_rate": 4.265438432835821e-05, + "loss": 0.0, + "step": 62996 + }, + { + "epoch": 58.77, + "learning_rate": 4.2653917910447764e-05, + "loss": 0.0, + "step": 63000 + }, + { + "epoch": 58.77, + "eval_exact_match": 0.741779497098646, + "eval_exec": 0.7872340425531915, + "eval_loss": 0.4804081916809082, + "eval_runtime": 1136.1578, + "eval_samples_per_second": 0.91, + "step": 63000 + }, + { + "epoch": 58.77, + "learning_rate": 4.265345149253732e-05, + "loss": 0.0, + "step": 63004 + }, + { + "epoch": 58.78, + "learning_rate": 4.265298507462687e-05, + "loss": 0.0, + "step": 63008 + }, + { + "epoch": 58.78, + "learning_rate": 4.265251865671642e-05, + "loss": 0.0003, + "step": 63012 + }, + { + "epoch": 58.78, + "learning_rate": 4.265205223880597e-05, + "loss": 0.0001, + "step": 63016 + }, + { + "epoch": 58.79, + "learning_rate": 4.2651585820895525e-05, + "loss": 0.0001, + "step": 63020 + }, + { + "epoch": 58.79, + "learning_rate": 4.265111940298508e-05, + "loss": 0.0, + "step": 63024 + }, + { + "epoch": 58.79, + "learning_rate": 4.265065298507463e-05, + "loss": 0.0, + "step": 63028 + }, + { + "epoch": 58.8, + "learning_rate": 4.2650186567164177e-05, + "loss": 0.0, + "step": 63032 + }, + { + "epoch": 58.8, + "learning_rate": 4.264972014925374e-05, + "loss": 0.0075, + "step": 63036 + }, + { + "epoch": 58.81, + "learning_rate": 4.2649253731343286e-05, + "loss": 0.0001, + "step": 63040 + }, + { + "epoch": 58.81, + "learning_rate": 4.2648787313432835e-05, + "loss": 0.0001, + "step": 63044 + }, + { + "epoch": 58.81, + "learning_rate": 4.264832089552239e-05, + "loss": 0.0, + "step": 63048 + }, + { + "epoch": 58.82, + "learning_rate": 4.2647854477611944e-05, + "loss": 0.0, + "step": 63052 + }, + { + "epoch": 58.82, + "learning_rate": 4.264738805970149e-05, + "loss": 0.0002, + "step": 63056 + }, + { + "epoch": 58.82, + "learning_rate": 4.264692164179105e-05, + "loss": 0.0, + "step": 63060 + }, + { + "epoch": 58.83, + "learning_rate": 4.26464552238806e-05, + "loss": 0.0, + "step": 63064 + }, + { + "epoch": 58.83, + "learning_rate": 4.264598880597015e-05, + "loss": 0.0001, + "step": 63068 + }, + { + "epoch": 58.84, + "learning_rate": 4.2645522388059705e-05, + "loss": 0.0001, + "step": 63072 + }, + { + "epoch": 58.84, + "learning_rate": 4.2645055970149253e-05, + "loss": 0.0, + "step": 63076 + }, + { + "epoch": 58.84, + "learning_rate": 4.264458955223881e-05, + "loss": 0.0, + "step": 63080 + }, + { + "epoch": 58.85, + "learning_rate": 4.264412313432836e-05, + "loss": 0.0, + "step": 63084 + }, + { + "epoch": 58.85, + "learning_rate": 4.264365671641791e-05, + "loss": 0.0, + "step": 63088 + }, + { + "epoch": 58.85, + "learning_rate": 4.264319029850746e-05, + "loss": 0.0, + "step": 63092 + }, + { + "epoch": 58.86, + "learning_rate": 4.264272388059702e-05, + "loss": 0.0, + "step": 63096 + }, + { + "epoch": 58.86, + "learning_rate": 4.264225746268657e-05, + "loss": 0.0, + "step": 63100 + }, + { + "epoch": 58.87, + "learning_rate": 4.264179104477612e-05, + "loss": 0.0, + "step": 63104 + }, + { + "epoch": 58.87, + "learning_rate": 4.264132462686567e-05, + "loss": 0.0019, + "step": 63108 + }, + { + "epoch": 58.87, + "learning_rate": 4.264085820895523e-05, + "loss": 0.0, + "step": 63112 + }, + { + "epoch": 58.88, + "learning_rate": 4.2640391791044775e-05, + "loss": 0.0, + "step": 63116 + }, + { + "epoch": 58.88, + "learning_rate": 4.263992537313433e-05, + "loss": 0.0, + "step": 63120 + }, + { + "epoch": 58.88, + "learning_rate": 4.2639458955223885e-05, + "loss": 0.0, + "step": 63124 + }, + { + "epoch": 58.89, + "learning_rate": 4.2638992537313433e-05, + "loss": 0.0001, + "step": 63128 + }, + { + "epoch": 58.89, + "learning_rate": 4.263852611940299e-05, + "loss": 0.0, + "step": 63132 + }, + { + "epoch": 58.9, + "learning_rate": 4.2638059701492536e-05, + "loss": 0.0001, + "step": 63136 + }, + { + "epoch": 58.9, + "learning_rate": 4.263759328358209e-05, + "loss": 0.0012, + "step": 63140 + }, + { + "epoch": 58.9, + "learning_rate": 4.2637126865671646e-05, + "loss": 0.0004, + "step": 63144 + }, + { + "epoch": 58.91, + "learning_rate": 4.2636660447761194e-05, + "loss": 0.0002, + "step": 63148 + }, + { + "epoch": 58.91, + "learning_rate": 4.263619402985074e-05, + "loss": 0.0, + "step": 63152 + }, + { + "epoch": 58.91, + "learning_rate": 4.2635727611940304e-05, + "loss": 0.0, + "step": 63156 + }, + { + "epoch": 58.92, + "learning_rate": 4.263526119402985e-05, + "loss": 0.0, + "step": 63160 + }, + { + "epoch": 58.92, + "learning_rate": 4.263479477611941e-05, + "loss": 0.0, + "step": 63164 + }, + { + "epoch": 58.93, + "learning_rate": 4.2634328358208955e-05, + "loss": 0.0002, + "step": 63168 + }, + { + "epoch": 58.93, + "learning_rate": 4.263386194029851e-05, + "loss": 0.0004, + "step": 63172 + }, + { + "epoch": 58.93, + "learning_rate": 4.2633395522388065e-05, + "loss": 0.0, + "step": 63176 + }, + { + "epoch": 58.94, + "learning_rate": 4.263292910447761e-05, + "loss": 0.0, + "step": 63180 + }, + { + "epoch": 58.94, + "learning_rate": 4.263246268656717e-05, + "loss": 0.0, + "step": 63184 + }, + { + "epoch": 58.94, + "learning_rate": 4.263199626865672e-05, + "loss": 0.0, + "step": 63188 + }, + { + "epoch": 58.95, + "learning_rate": 4.263152985074627e-05, + "loss": 0.0, + "step": 63192 + }, + { + "epoch": 58.95, + "learning_rate": 4.263106343283582e-05, + "loss": 0.0, + "step": 63196 + }, + { + "epoch": 58.96, + "learning_rate": 4.2630597014925374e-05, + "loss": 0.0, + "step": 63200 + }, + { + "epoch": 58.96, + "learning_rate": 4.263013059701493e-05, + "loss": 0.0038, + "step": 63204 + }, + { + "epoch": 58.96, + "learning_rate": 4.262966417910448e-05, + "loss": 0.0, + "step": 63208 + }, + { + "epoch": 58.97, + "learning_rate": 4.262919776119403e-05, + "loss": 0.0001, + "step": 63212 + }, + { + "epoch": 58.97, + "learning_rate": 4.262873134328359e-05, + "loss": 0.0156, + "step": 63216 + }, + { + "epoch": 58.97, + "learning_rate": 4.2628264925373135e-05, + "loss": 0.0, + "step": 63220 + }, + { + "epoch": 58.98, + "learning_rate": 4.262779850746269e-05, + "loss": 0.0001, + "step": 63224 + }, + { + "epoch": 58.98, + "learning_rate": 4.262733208955224e-05, + "loss": 0.0003, + "step": 63228 + }, + { + "epoch": 58.98, + "learning_rate": 4.262686567164179e-05, + "loss": 0.0, + "step": 63232 + }, + { + "epoch": 58.99, + "learning_rate": 4.262639925373135e-05, + "loss": 0.0, + "step": 63236 + }, + { + "epoch": 58.99, + "learning_rate": 4.2625932835820896e-05, + "loss": 0.0, + "step": 63240 + }, + { + "epoch": 59.0, + "learning_rate": 4.262546641791045e-05, + "loss": 0.0, + "step": 63244 + }, + { + "epoch": 59.0, + "learning_rate": 4.2625000000000006e-05, + "loss": 0.0, + "step": 63248 + }, + { + "epoch": 59.0, + "learning_rate": 4.2624533582089554e-05, + "loss": 0.0, + "step": 63252 + }, + { + "epoch": 59.01, + "learning_rate": 4.26240671641791e-05, + "loss": 0.0, + "step": 63256 + }, + { + "epoch": 59.01, + "learning_rate": 4.262360074626866e-05, + "loss": 0.0001, + "step": 63260 + }, + { + "epoch": 59.01, + "learning_rate": 4.262313432835821e-05, + "loss": 0.0, + "step": 63264 + }, + { + "epoch": 59.02, + "learning_rate": 4.262266791044776e-05, + "loss": 0.0, + "step": 63268 + }, + { + "epoch": 59.02, + "learning_rate": 4.2622201492537315e-05, + "loss": 0.0, + "step": 63272 + }, + { + "epoch": 59.03, + "learning_rate": 4.262173507462687e-05, + "loss": 0.0018, + "step": 63276 + }, + { + "epoch": 59.03, + "learning_rate": 4.262126865671642e-05, + "loss": 0.0002, + "step": 63280 + }, + { + "epoch": 59.03, + "learning_rate": 4.262080223880597e-05, + "loss": 0.0, + "step": 63284 + }, + { + "epoch": 59.04, + "learning_rate": 4.262033582089552e-05, + "loss": 0.0, + "step": 63288 + }, + { + "epoch": 59.04, + "learning_rate": 4.2619869402985076e-05, + "loss": 0.0002, + "step": 63292 + }, + { + "epoch": 59.04, + "learning_rate": 4.261940298507463e-05, + "loss": 0.0002, + "step": 63296 + }, + { + "epoch": 59.05, + "learning_rate": 4.261893656716418e-05, + "loss": 0.0, + "step": 63300 + }, + { + "epoch": 59.05, + "learning_rate": 4.2618470149253734e-05, + "loss": 0.0, + "step": 63304 + }, + { + "epoch": 59.06, + "learning_rate": 4.261800373134329e-05, + "loss": 0.0001, + "step": 63308 + }, + { + "epoch": 59.06, + "learning_rate": 4.261753731343284e-05, + "loss": 0.0, + "step": 63312 + }, + { + "epoch": 59.06, + "learning_rate": 4.2617070895522385e-05, + "loss": 0.0054, + "step": 63316 + }, + { + "epoch": 59.07, + "learning_rate": 4.261660447761194e-05, + "loss": 0.0, + "step": 63320 + }, + { + "epoch": 59.07, + "learning_rate": 4.2616138059701495e-05, + "loss": 0.0, + "step": 63324 + }, + { + "epoch": 59.07, + "learning_rate": 4.261567164179105e-05, + "loss": 0.0, + "step": 63328 + }, + { + "epoch": 59.08, + "learning_rate": 4.26152052238806e-05, + "loss": 0.0, + "step": 63332 + }, + { + "epoch": 59.08, + "learning_rate": 4.261473880597015e-05, + "loss": 0.0, + "step": 63336 + }, + { + "epoch": 59.09, + "learning_rate": 4.261427238805971e-05, + "loss": 0.0, + "step": 63340 + }, + { + "epoch": 59.09, + "learning_rate": 4.2613805970149256e-05, + "loss": 0.0007, + "step": 63344 + }, + { + "epoch": 59.09, + "learning_rate": 4.2613339552238804e-05, + "loss": 0.0, + "step": 63348 + }, + { + "epoch": 59.1, + "learning_rate": 4.2612873134328366e-05, + "loss": 0.0, + "step": 63352 + }, + { + "epoch": 59.1, + "learning_rate": 4.2612406716417914e-05, + "loss": 0.0, + "step": 63356 + }, + { + "epoch": 59.1, + "learning_rate": 4.261194029850746e-05, + "loss": 0.0, + "step": 63360 + }, + { + "epoch": 59.11, + "learning_rate": 4.261147388059702e-05, + "loss": 0.0, + "step": 63364 + }, + { + "epoch": 59.11, + "learning_rate": 4.261100746268657e-05, + "loss": 0.0, + "step": 63368 + }, + { + "epoch": 59.12, + "learning_rate": 4.261054104477612e-05, + "loss": 0.0001, + "step": 63372 + }, + { + "epoch": 59.12, + "learning_rate": 4.2610074626865675e-05, + "loss": 0.0, + "step": 63376 + }, + { + "epoch": 59.12, + "learning_rate": 4.260960820895522e-05, + "loss": 0.0, + "step": 63380 + }, + { + "epoch": 59.13, + "learning_rate": 4.260914179104478e-05, + "loss": 0.0, + "step": 63384 + }, + { + "epoch": 59.13, + "learning_rate": 4.260867537313433e-05, + "loss": 0.0007, + "step": 63388 + }, + { + "epoch": 59.13, + "learning_rate": 4.260820895522388e-05, + "loss": 0.0001, + "step": 63392 + }, + { + "epoch": 59.14, + "learning_rate": 4.2607742537313436e-05, + "loss": 0.0, + "step": 63396 + }, + { + "epoch": 59.14, + "learning_rate": 4.260727611940299e-05, + "loss": 0.0002, + "step": 63400 + }, + { + "epoch": 59.15, + "learning_rate": 4.260680970149254e-05, + "loss": 0.0, + "step": 63404 + }, + { + "epoch": 59.15, + "learning_rate": 4.260634328358209e-05, + "loss": 0.0, + "step": 63408 + }, + { + "epoch": 59.15, + "learning_rate": 4.260587686567165e-05, + "loss": 0.0019, + "step": 63412 + }, + { + "epoch": 59.16, + "learning_rate": 4.26054104477612e-05, + "loss": 0.0, + "step": 63416 + }, + { + "epoch": 59.16, + "learning_rate": 4.2604944029850745e-05, + "loss": 0.006, + "step": 63420 + }, + { + "epoch": 59.16, + "learning_rate": 4.26044776119403e-05, + "loss": 0.0, + "step": 63424 + }, + { + "epoch": 59.17, + "learning_rate": 4.2604011194029855e-05, + "loss": 0.0001, + "step": 63428 + }, + { + "epoch": 59.17, + "learning_rate": 4.26035447761194e-05, + "loss": 0.0008, + "step": 63432 + }, + { + "epoch": 59.18, + "learning_rate": 4.260307835820896e-05, + "loss": 0.0, + "step": 63436 + }, + { + "epoch": 59.18, + "learning_rate": 4.2602611940298506e-05, + "loss": 0.0032, + "step": 63440 + }, + { + "epoch": 59.18, + "learning_rate": 4.260214552238806e-05, + "loss": 0.0004, + "step": 63444 + }, + { + "epoch": 59.19, + "learning_rate": 4.2601679104477616e-05, + "loss": 0.0, + "step": 63448 + }, + { + "epoch": 59.19, + "learning_rate": 4.2601212686567164e-05, + "loss": 0.0, + "step": 63452 + }, + { + "epoch": 59.19, + "learning_rate": 4.260074626865672e-05, + "loss": 0.0, + "step": 63456 + }, + { + "epoch": 59.2, + "learning_rate": 4.2600279850746274e-05, + "loss": 0.0, + "step": 63460 + }, + { + "epoch": 59.2, + "learning_rate": 4.259981343283582e-05, + "loss": 0.0, + "step": 63464 + }, + { + "epoch": 59.21, + "learning_rate": 4.259934701492537e-05, + "loss": 0.0001, + "step": 63468 + }, + { + "epoch": 59.21, + "learning_rate": 4.259888059701493e-05, + "loss": 0.0001, + "step": 63472 + }, + { + "epoch": 59.21, + "learning_rate": 4.259841417910448e-05, + "loss": 0.0, + "step": 63476 + }, + { + "epoch": 59.22, + "learning_rate": 4.259794776119403e-05, + "loss": 0.0, + "step": 63480 + }, + { + "epoch": 59.22, + "learning_rate": 4.259748134328358e-05, + "loss": 0.0, + "step": 63484 + }, + { + "epoch": 59.22, + "learning_rate": 4.259701492537314e-05, + "loss": 0.0, + "step": 63488 + }, + { + "epoch": 59.23, + "learning_rate": 4.259654850746269e-05, + "loss": 0.0, + "step": 63492 + }, + { + "epoch": 59.23, + "learning_rate": 4.259608208955224e-05, + "loss": 0.0001, + "step": 63496 + }, + { + "epoch": 59.24, + "learning_rate": 4.259561567164179e-05, + "loss": 0.0002, + "step": 63500 + }, + { + "epoch": 59.24, + "eval_exact_match": 0.746615087040619, + "eval_exec": 0.7872340425531915, + "eval_loss": 0.48403027653694153, + "eval_runtime": 1101.428, + "eval_samples_per_second": 0.939, + "step": 63500 + }, + { + "epoch": 59.24, + "learning_rate": 4.259514925373135e-05, + "loss": 0.0, + "step": 63504 + }, + { + "epoch": 59.24, + "learning_rate": 4.25946828358209e-05, + "loss": 0.0001, + "step": 63508 + }, + { + "epoch": 59.25, + "learning_rate": 4.259421641791045e-05, + "loss": 0.0101, + "step": 63512 + }, + { + "epoch": 59.25, + "learning_rate": 4.259375e-05, + "loss": 0.0, + "step": 63516 + }, + { + "epoch": 59.25, + "learning_rate": 4.259328358208956e-05, + "loss": 0.0, + "step": 63520 + }, + { + "epoch": 59.26, + "learning_rate": 4.2592817164179105e-05, + "loss": 0.0, + "step": 63524 + }, + { + "epoch": 59.26, + "learning_rate": 4.259235074626866e-05, + "loss": 0.0, + "step": 63528 + }, + { + "epoch": 59.26, + "learning_rate": 4.259188432835821e-05, + "loss": 0.0, + "step": 63532 + }, + { + "epoch": 59.27, + "learning_rate": 4.259141791044776e-05, + "loss": 0.0, + "step": 63536 + }, + { + "epoch": 59.27, + "learning_rate": 4.259095149253732e-05, + "loss": 0.0, + "step": 63540 + }, + { + "epoch": 59.28, + "learning_rate": 4.2590485074626866e-05, + "loss": 0.0, + "step": 63544 + }, + { + "epoch": 59.28, + "learning_rate": 4.259001865671642e-05, + "loss": 0.0, + "step": 63548 + }, + { + "epoch": 59.28, + "learning_rate": 4.2589552238805976e-05, + "loss": 0.0, + "step": 63552 + }, + { + "epoch": 59.29, + "learning_rate": 4.2589085820895524e-05, + "loss": 0.0, + "step": 63556 + }, + { + "epoch": 59.29, + "learning_rate": 4.258861940298507e-05, + "loss": 0.0, + "step": 63560 + }, + { + "epoch": 59.29, + "learning_rate": 4.2588152985074634e-05, + "loss": 0.0, + "step": 63564 + }, + { + "epoch": 59.3, + "learning_rate": 4.258768656716418e-05, + "loss": 0.0011, + "step": 63568 + }, + { + "epoch": 59.3, + "learning_rate": 4.258722014925373e-05, + "loss": 0.0001, + "step": 63572 + }, + { + "epoch": 59.31, + "learning_rate": 4.2586753731343285e-05, + "loss": 0.0, + "step": 63576 + }, + { + "epoch": 59.31, + "learning_rate": 4.258628731343284e-05, + "loss": 0.0002, + "step": 63580 + }, + { + "epoch": 59.31, + "learning_rate": 4.258582089552239e-05, + "loss": 0.0, + "step": 63584 + }, + { + "epoch": 59.32, + "learning_rate": 4.258535447761194e-05, + "loss": 0.0, + "step": 63588 + }, + { + "epoch": 59.32, + "learning_rate": 4.258488805970149e-05, + "loss": 0.0, + "step": 63592 + }, + { + "epoch": 59.32, + "learning_rate": 4.2584421641791046e-05, + "loss": 0.0, + "step": 63596 + }, + { + "epoch": 59.33, + "learning_rate": 4.25839552238806e-05, + "loss": 0.0, + "step": 63600 + }, + { + "epoch": 59.33, + "learning_rate": 4.258348880597015e-05, + "loss": 0.0001, + "step": 63604 + }, + { + "epoch": 59.34, + "learning_rate": 4.2583022388059704e-05, + "loss": 0.0, + "step": 63608 + }, + { + "epoch": 59.34, + "learning_rate": 4.258255597014926e-05, + "loss": 0.0, + "step": 63612 + }, + { + "epoch": 59.34, + "learning_rate": 4.258208955223881e-05, + "loss": 0.0006, + "step": 63616 + }, + { + "epoch": 59.35, + "learning_rate": 4.2581623134328355e-05, + "loss": 0.0001, + "step": 63620 + }, + { + "epoch": 59.35, + "learning_rate": 4.258115671641792e-05, + "loss": 0.0, + "step": 63624 + }, + { + "epoch": 59.35, + "learning_rate": 4.2580690298507465e-05, + "loss": 0.0001, + "step": 63628 + }, + { + "epoch": 59.36, + "learning_rate": 4.258022388059701e-05, + "loss": 0.0, + "step": 63632 + }, + { + "epoch": 59.36, + "learning_rate": 4.257975746268657e-05, + "loss": 0.0, + "step": 63636 + }, + { + "epoch": 59.37, + "learning_rate": 4.257929104477612e-05, + "loss": 0.0001, + "step": 63640 + }, + { + "epoch": 59.37, + "learning_rate": 4.257882462686567e-05, + "loss": 0.0, + "step": 63644 + }, + { + "epoch": 59.37, + "learning_rate": 4.2578358208955226e-05, + "loss": 0.0006, + "step": 63648 + }, + { + "epoch": 59.38, + "learning_rate": 4.2577891791044774e-05, + "loss": 0.0, + "step": 63652 + }, + { + "epoch": 59.38, + "learning_rate": 4.2577425373134336e-05, + "loss": 0.0011, + "step": 63656 + }, + { + "epoch": 59.38, + "learning_rate": 4.2576958955223884e-05, + "loss": 0.0, + "step": 63660 + }, + { + "epoch": 59.39, + "learning_rate": 4.257649253731343e-05, + "loss": 0.0001, + "step": 63664 + }, + { + "epoch": 59.39, + "learning_rate": 4.257602611940299e-05, + "loss": 0.0, + "step": 63668 + }, + { + "epoch": 59.4, + "learning_rate": 4.257555970149254e-05, + "loss": 0.0, + "step": 63672 + }, + { + "epoch": 59.4, + "learning_rate": 4.257509328358209e-05, + "loss": 0.0, + "step": 63676 + }, + { + "epoch": 59.4, + "learning_rate": 4.2574626865671645e-05, + "loss": 0.0, + "step": 63680 + }, + { + "epoch": 59.41, + "learning_rate": 4.25741604477612e-05, + "loss": 0.0, + "step": 63684 + }, + { + "epoch": 59.41, + "learning_rate": 4.257369402985075e-05, + "loss": 0.0, + "step": 63688 + }, + { + "epoch": 59.41, + "learning_rate": 4.25732276119403e-05, + "loss": 0.0004, + "step": 63692 + }, + { + "epoch": 59.42, + "learning_rate": 4.257276119402985e-05, + "loss": 0.0, + "step": 63696 + }, + { + "epoch": 59.42, + "learning_rate": 4.2572294776119406e-05, + "loss": 0.0, + "step": 63700 + }, + { + "epoch": 59.43, + "learning_rate": 4.257182835820896e-05, + "loss": 0.0001, + "step": 63704 + }, + { + "epoch": 59.43, + "learning_rate": 4.257136194029851e-05, + "loss": 0.0001, + "step": 63708 + }, + { + "epoch": 59.43, + "learning_rate": 4.257089552238806e-05, + "loss": 0.0001, + "step": 63712 + }, + { + "epoch": 59.44, + "learning_rate": 4.257042910447762e-05, + "loss": 0.0, + "step": 63716 + }, + { + "epoch": 59.44, + "learning_rate": 4.256996268656717e-05, + "loss": 0.0, + "step": 63720 + }, + { + "epoch": 59.44, + "learning_rate": 4.2569496268656715e-05, + "loss": 0.0, + "step": 63724 + }, + { + "epoch": 59.45, + "learning_rate": 4.256902985074627e-05, + "loss": 0.0, + "step": 63728 + }, + { + "epoch": 59.45, + "learning_rate": 4.2568563432835825e-05, + "loss": 0.0, + "step": 63732 + }, + { + "epoch": 59.46, + "learning_rate": 4.256809701492537e-05, + "loss": 0.0, + "step": 63736 + }, + { + "epoch": 59.46, + "learning_rate": 4.256763059701493e-05, + "loss": 0.0001, + "step": 63740 + }, + { + "epoch": 59.46, + "learning_rate": 4.256716417910448e-05, + "loss": 0.0, + "step": 63744 + }, + { + "epoch": 59.47, + "learning_rate": 4.256669776119403e-05, + "loss": 0.0, + "step": 63748 + }, + { + "epoch": 59.47, + "learning_rate": 4.2566231343283586e-05, + "loss": 0.0001, + "step": 63752 + }, + { + "epoch": 59.47, + "learning_rate": 4.2565764925373134e-05, + "loss": 0.0, + "step": 63756 + }, + { + "epoch": 59.48, + "learning_rate": 4.256529850746269e-05, + "loss": 0.001, + "step": 63760 + }, + { + "epoch": 59.48, + "learning_rate": 4.2564832089552244e-05, + "loss": 0.0, + "step": 63764 + }, + { + "epoch": 59.49, + "learning_rate": 4.256436567164179e-05, + "loss": 0.0001, + "step": 63768 + }, + { + "epoch": 59.49, + "learning_rate": 4.256389925373134e-05, + "loss": 0.0001, + "step": 63772 + }, + { + "epoch": 59.49, + "learning_rate": 4.25634328358209e-05, + "loss": 0.0, + "step": 63776 + }, + { + "epoch": 59.5, + "learning_rate": 4.256296641791045e-05, + "loss": 0.0006, + "step": 63780 + }, + { + "epoch": 59.5, + "learning_rate": 4.25625e-05, + "loss": 0.0, + "step": 63784 + }, + { + "epoch": 59.5, + "learning_rate": 4.256203358208955e-05, + "loss": 0.0, + "step": 63788 + }, + { + "epoch": 59.51, + "learning_rate": 4.256156716417911e-05, + "loss": 0.0002, + "step": 63792 + }, + { + "epoch": 59.51, + "learning_rate": 4.2561100746268656e-05, + "loss": 0.0001, + "step": 63796 + }, + { + "epoch": 59.51, + "learning_rate": 4.256063432835821e-05, + "loss": 0.0, + "step": 63800 + }, + { + "epoch": 59.52, + "learning_rate": 4.2560167910447766e-05, + "loss": 0.0, + "step": 63804 + }, + { + "epoch": 59.52, + "learning_rate": 4.2559701492537314e-05, + "loss": 0.0, + "step": 63808 + }, + { + "epoch": 59.53, + "learning_rate": 4.255923507462687e-05, + "loss": 0.0, + "step": 63812 + }, + { + "epoch": 59.53, + "learning_rate": 4.255876865671642e-05, + "loss": 0.0, + "step": 63816 + }, + { + "epoch": 59.53, + "learning_rate": 4.255830223880597e-05, + "loss": 0.0, + "step": 63820 + }, + { + "epoch": 59.54, + "learning_rate": 4.2557835820895527e-05, + "loss": 0.0, + "step": 63824 + }, + { + "epoch": 59.54, + "learning_rate": 4.2557369402985075e-05, + "loss": 0.0, + "step": 63828 + }, + { + "epoch": 59.54, + "learning_rate": 4.255690298507463e-05, + "loss": 0.0, + "step": 63832 + }, + { + "epoch": 59.55, + "learning_rate": 4.2556436567164184e-05, + "loss": 0.0, + "step": 63836 + }, + { + "epoch": 59.55, + "learning_rate": 4.255597014925373e-05, + "loss": 0.0, + "step": 63840 + }, + { + "epoch": 59.56, + "learning_rate": 4.255550373134329e-05, + "loss": 0.0001, + "step": 63844 + }, + { + "epoch": 59.56, + "learning_rate": 4.2555037313432836e-05, + "loss": 0.0, + "step": 63848 + }, + { + "epoch": 59.56, + "learning_rate": 4.255457089552239e-05, + "loss": 0.0, + "step": 63852 + }, + { + "epoch": 59.57, + "learning_rate": 4.2554104477611945e-05, + "loss": 0.0, + "step": 63856 + }, + { + "epoch": 59.57, + "learning_rate": 4.2553638059701494e-05, + "loss": 0.0, + "step": 63860 + }, + { + "epoch": 59.57, + "learning_rate": 4.255317164179105e-05, + "loss": 0.0, + "step": 63864 + }, + { + "epoch": 59.58, + "learning_rate": 4.2552705223880603e-05, + "loss": 0.0, + "step": 63868 + }, + { + "epoch": 59.58, + "learning_rate": 4.255223880597015e-05, + "loss": 0.0, + "step": 63872 + }, + { + "epoch": 59.59, + "learning_rate": 4.25517723880597e-05, + "loss": 0.0, + "step": 63876 + }, + { + "epoch": 59.59, + "learning_rate": 4.2551305970149255e-05, + "loss": 0.0, + "step": 63880 + }, + { + "epoch": 59.59, + "learning_rate": 4.255083955223881e-05, + "loss": 0.0, + "step": 63884 + }, + { + "epoch": 59.6, + "learning_rate": 4.255037313432836e-05, + "loss": 0.0004, + "step": 63888 + }, + { + "epoch": 59.6, + "learning_rate": 4.254990671641791e-05, + "loss": 0.0004, + "step": 63892 + }, + { + "epoch": 59.6, + "learning_rate": 4.254944029850747e-05, + "loss": 0.0003, + "step": 63896 + }, + { + "epoch": 59.61, + "learning_rate": 4.2548973880597016e-05, + "loss": 0.0, + "step": 63900 + }, + { + "epoch": 59.61, + "learning_rate": 4.254850746268657e-05, + "loss": 0.0, + "step": 63904 + }, + { + "epoch": 59.62, + "learning_rate": 4.254804104477612e-05, + "loss": 0.0, + "step": 63908 + }, + { + "epoch": 59.62, + "learning_rate": 4.2547574626865674e-05, + "loss": 0.0, + "step": 63912 + }, + { + "epoch": 59.62, + "learning_rate": 4.254710820895523e-05, + "loss": 0.0006, + "step": 63916 + }, + { + "epoch": 59.63, + "learning_rate": 4.2546641791044777e-05, + "loss": 0.0, + "step": 63920 + }, + { + "epoch": 59.63, + "learning_rate": 4.254617537313433e-05, + "loss": 0.0, + "step": 63924 + }, + { + "epoch": 59.63, + "learning_rate": 4.2545708955223886e-05, + "loss": 0.0, + "step": 63928 + }, + { + "epoch": 59.64, + "learning_rate": 4.2545242537313435e-05, + "loss": 0.0, + "step": 63932 + }, + { + "epoch": 59.64, + "learning_rate": 4.254477611940298e-05, + "loss": 0.0002, + "step": 63936 + }, + { + "epoch": 59.65, + "learning_rate": 4.254430970149254e-05, + "loss": 0.0, + "step": 63940 + }, + { + "epoch": 59.65, + "learning_rate": 4.254384328358209e-05, + "loss": 0.0, + "step": 63944 + }, + { + "epoch": 59.65, + "learning_rate": 4.254337686567164e-05, + "loss": 0.0, + "step": 63948 + }, + { + "epoch": 59.66, + "learning_rate": 4.2542910447761195e-05, + "loss": 0.0, + "step": 63952 + }, + { + "epoch": 59.66, + "learning_rate": 4.254244402985075e-05, + "loss": 0.0, + "step": 63956 + }, + { + "epoch": 59.66, + "learning_rate": 4.25419776119403e-05, + "loss": 0.0, + "step": 63960 + }, + { + "epoch": 59.67, + "learning_rate": 4.2541511194029853e-05, + "loss": 0.0, + "step": 63964 + }, + { + "epoch": 59.67, + "learning_rate": 4.25410447761194e-05, + "loss": 0.0002, + "step": 63968 + }, + { + "epoch": 59.68, + "learning_rate": 4.2540578358208956e-05, + "loss": 0.0, + "step": 63972 + }, + { + "epoch": 59.68, + "learning_rate": 4.254011194029851e-05, + "loss": 0.0, + "step": 63976 + }, + { + "epoch": 59.68, + "learning_rate": 4.253964552238806e-05, + "loss": 0.0001, + "step": 63980 + }, + { + "epoch": 59.69, + "learning_rate": 4.2539179104477614e-05, + "loss": 0.0001, + "step": 63984 + }, + { + "epoch": 59.69, + "learning_rate": 4.253871268656717e-05, + "loss": 0.0, + "step": 63988 + }, + { + "epoch": 59.69, + "learning_rate": 4.253824626865672e-05, + "loss": 0.0, + "step": 63992 + }, + { + "epoch": 59.7, + "learning_rate": 4.253777985074627e-05, + "loss": 0.0, + "step": 63996 + }, + { + "epoch": 59.7, + "learning_rate": 4.253731343283582e-05, + "loss": 0.0014, + "step": 64000 + }, + { + "epoch": 59.7, + "eval_exact_match": 0.7437137330754352, + "eval_exec": 0.7891682785299806, + "eval_loss": 0.5204067826271057, + "eval_runtime": 1133.3553, + "eval_samples_per_second": 0.912, + "step": 64000 + }, + { + "epoch": 59.71, + "learning_rate": 4.2536847014925375e-05, + "loss": 0.0001, + "step": 64004 + }, + { + "epoch": 59.71, + "learning_rate": 4.253638059701493e-05, + "loss": 0.0, + "step": 64008 + }, + { + "epoch": 59.71, + "learning_rate": 4.253591417910448e-05, + "loss": 0.0, + "step": 64012 + }, + { + "epoch": 59.72, + "learning_rate": 4.253544776119403e-05, + "loss": 0.0, + "step": 64016 + }, + { + "epoch": 59.72, + "learning_rate": 4.253498134328359e-05, + "loss": 0.0, + "step": 64020 + }, + { + "epoch": 59.72, + "learning_rate": 4.2534514925373136e-05, + "loss": 0.003, + "step": 64024 + }, + { + "epoch": 59.73, + "learning_rate": 4.2534048507462685e-05, + "loss": 0.0, + "step": 64028 + }, + { + "epoch": 59.73, + "learning_rate": 4.2533582089552246e-05, + "loss": 0.0016, + "step": 64032 + }, + { + "epoch": 59.73, + "learning_rate": 4.2533115671641794e-05, + "loss": 0.0001, + "step": 64036 + }, + { + "epoch": 59.74, + "learning_rate": 4.253264925373134e-05, + "loss": 0.0, + "step": 64040 + }, + { + "epoch": 59.74, + "learning_rate": 4.25321828358209e-05, + "loss": 0.0, + "step": 64044 + }, + { + "epoch": 59.75, + "learning_rate": 4.253171641791045e-05, + "loss": 0.0014, + "step": 64048 + }, + { + "epoch": 59.75, + "learning_rate": 4.253125e-05, + "loss": 0.0, + "step": 64052 + }, + { + "epoch": 59.75, + "learning_rate": 4.2530783582089555e-05, + "loss": 0.0, + "step": 64056 + }, + { + "epoch": 59.76, + "learning_rate": 4.2530317164179103e-05, + "loss": 0.0, + "step": 64060 + }, + { + "epoch": 59.76, + "learning_rate": 4.252985074626866e-05, + "loss": 0.0, + "step": 64064 + }, + { + "epoch": 59.76, + "learning_rate": 4.252938432835821e-05, + "loss": 0.0, + "step": 64068 + }, + { + "epoch": 59.77, + "learning_rate": 4.252891791044776e-05, + "loss": 0.0, + "step": 64072 + }, + { + "epoch": 59.77, + "learning_rate": 4.2528451492537316e-05, + "loss": 0.0, + "step": 64076 + }, + { + "epoch": 59.78, + "learning_rate": 4.252798507462687e-05, + "loss": 0.0, + "step": 64080 + }, + { + "epoch": 59.78, + "learning_rate": 4.252751865671642e-05, + "loss": 0.0002, + "step": 64084 + }, + { + "epoch": 59.78, + "learning_rate": 4.252705223880597e-05, + "loss": 0.0, + "step": 64088 + }, + { + "epoch": 59.79, + "learning_rate": 4.252658582089553e-05, + "loss": 0.0, + "step": 64092 + }, + { + "epoch": 59.79, + "learning_rate": 4.252611940298508e-05, + "loss": 0.0, + "step": 64096 + }, + { + "epoch": 59.79, + "learning_rate": 4.2525652985074625e-05, + "loss": 0.0, + "step": 64100 + }, + { + "epoch": 59.8, + "learning_rate": 4.252518656716418e-05, + "loss": 0.0, + "step": 64104 + }, + { + "epoch": 59.8, + "learning_rate": 4.2524720149253735e-05, + "loss": 0.0, + "step": 64108 + }, + { + "epoch": 59.81, + "learning_rate": 4.252425373134328e-05, + "loss": 0.0, + "step": 64112 + }, + { + "epoch": 59.81, + "learning_rate": 4.252378731343284e-05, + "loss": 0.0, + "step": 64116 + }, + { + "epoch": 59.81, + "learning_rate": 4.2523320895522386e-05, + "loss": 0.0, + "step": 64120 + }, + { + "epoch": 59.82, + "learning_rate": 4.252285447761194e-05, + "loss": 0.0, + "step": 64124 + }, + { + "epoch": 59.82, + "learning_rate": 4.2522388059701496e-05, + "loss": 0.0, + "step": 64128 + }, + { + "epoch": 59.82, + "learning_rate": 4.2521921641791044e-05, + "loss": 0.0, + "step": 64132 + }, + { + "epoch": 59.83, + "learning_rate": 4.25214552238806e-05, + "loss": 0.0001, + "step": 64136 + }, + { + "epoch": 59.83, + "learning_rate": 4.2520988805970154e-05, + "loss": 0.0, + "step": 64140 + }, + { + "epoch": 59.84, + "learning_rate": 4.25205223880597e-05, + "loss": 0.0, + "step": 64144 + }, + { + "epoch": 59.84, + "learning_rate": 4.252005597014926e-05, + "loss": 0.0, + "step": 64148 + }, + { + "epoch": 59.84, + "learning_rate": 4.251958955223881e-05, + "loss": 0.0, + "step": 64152 + }, + { + "epoch": 59.85, + "learning_rate": 4.251912313432836e-05, + "loss": 0.0, + "step": 64156 + }, + { + "epoch": 59.85, + "learning_rate": 4.2518656716417915e-05, + "loss": 0.0, + "step": 64160 + }, + { + "epoch": 59.85, + "learning_rate": 4.251819029850746e-05, + "loss": 0.001, + "step": 64164 + }, + { + "epoch": 59.86, + "learning_rate": 4.251772388059702e-05, + "loss": 0.0001, + "step": 64168 + }, + { + "epoch": 59.86, + "learning_rate": 4.251725746268657e-05, + "loss": 0.0, + "step": 64172 + }, + { + "epoch": 59.87, + "learning_rate": 4.251679104477612e-05, + "loss": 0.0, + "step": 64176 + }, + { + "epoch": 59.87, + "learning_rate": 4.251632462686567e-05, + "loss": 0.0069, + "step": 64180 + }, + { + "epoch": 59.87, + "learning_rate": 4.251585820895523e-05, + "loss": 0.0, + "step": 64184 + }, + { + "epoch": 59.88, + "learning_rate": 4.251539179104478e-05, + "loss": 0.0, + "step": 64188 + }, + { + "epoch": 59.88, + "learning_rate": 4.251492537313433e-05, + "loss": 0.0004, + "step": 64192 + }, + { + "epoch": 59.88, + "learning_rate": 4.251445895522388e-05, + "loss": 0.0, + "step": 64196 + }, + { + "epoch": 59.89, + "learning_rate": 4.251399253731344e-05, + "loss": 0.0, + "step": 64200 + }, + { + "epoch": 59.89, + "learning_rate": 4.2513526119402985e-05, + "loss": 0.0, + "step": 64204 + }, + { + "epoch": 59.9, + "learning_rate": 4.251305970149254e-05, + "loss": 0.0, + "step": 64208 + }, + { + "epoch": 59.9, + "learning_rate": 4.2512593283582095e-05, + "loss": 0.0006, + "step": 64212 + }, + { + "epoch": 59.9, + "learning_rate": 4.251212686567164e-05, + "loss": 0.0, + "step": 64216 + }, + { + "epoch": 59.91, + "learning_rate": 4.25116604477612e-05, + "loss": 0.0, + "step": 64220 + }, + { + "epoch": 59.91, + "learning_rate": 4.2511194029850746e-05, + "loss": 0.0, + "step": 64224 + }, + { + "epoch": 59.91, + "learning_rate": 4.25107276119403e-05, + "loss": 0.0, + "step": 64228 + }, + { + "epoch": 59.92, + "learning_rate": 4.2510261194029856e-05, + "loss": 0.0001, + "step": 64232 + }, + { + "epoch": 59.92, + "learning_rate": 4.2509794776119404e-05, + "loss": 0.0, + "step": 64236 + }, + { + "epoch": 59.93, + "learning_rate": 4.250932835820895e-05, + "loss": 0.0, + "step": 64240 + }, + { + "epoch": 59.93, + "learning_rate": 4.2508861940298514e-05, + "loss": 0.0, + "step": 64244 + }, + { + "epoch": 59.93, + "learning_rate": 4.250839552238806e-05, + "loss": 0.0004, + "step": 64248 + }, + { + "epoch": 59.94, + "learning_rate": 4.250792910447761e-05, + "loss": 0.0001, + "step": 64252 + }, + { + "epoch": 59.94, + "learning_rate": 4.2507462686567165e-05, + "loss": 0.0, + "step": 64256 + }, + { + "epoch": 59.94, + "learning_rate": 4.250699626865672e-05, + "loss": 0.0, + "step": 64260 + }, + { + "epoch": 59.95, + "learning_rate": 4.250652985074627e-05, + "loss": 0.0, + "step": 64264 + }, + { + "epoch": 59.95, + "learning_rate": 4.250606343283582e-05, + "loss": 0.0, + "step": 64268 + }, + { + "epoch": 59.96, + "learning_rate": 4.250559701492537e-05, + "loss": 0.0003, + "step": 64272 + }, + { + "epoch": 59.96, + "learning_rate": 4.2505130597014926e-05, + "loss": 0.0, + "step": 64276 + }, + { + "epoch": 59.96, + "learning_rate": 4.250466417910448e-05, + "loss": 0.0, + "step": 64280 + }, + { + "epoch": 59.97, + "learning_rate": 4.250419776119403e-05, + "loss": 0.0, + "step": 64284 + }, + { + "epoch": 59.97, + "learning_rate": 4.2503731343283584e-05, + "loss": 0.0, + "step": 64288 + }, + { + "epoch": 59.97, + "learning_rate": 4.250326492537314e-05, + "loss": 0.0, + "step": 64292 + }, + { + "epoch": 59.98, + "learning_rate": 4.250279850746269e-05, + "loss": 0.0001, + "step": 64296 + }, + { + "epoch": 59.98, + "learning_rate": 4.2502332089552235e-05, + "loss": 0.0, + "step": 64300 + }, + { + "epoch": 59.98, + "learning_rate": 4.25018656716418e-05, + "loss": 0.0, + "step": 64304 + }, + { + "epoch": 59.99, + "learning_rate": 4.2501399253731345e-05, + "loss": 0.0, + "step": 64308 + }, + { + "epoch": 59.99, + "learning_rate": 4.25009328358209e-05, + "loss": 0.0, + "step": 64312 + }, + { + "epoch": 60.0, + "learning_rate": 4.250046641791045e-05, + "loss": 0.0, + "step": 64316 + }, + { + "epoch": 60.0, + "learning_rate": 4.25e-05, + "loss": 0.0, + "step": 64320 + }, + { + "epoch": 60.0, + "learning_rate": 4.249953358208956e-05, + "loss": 0.0, + "step": 64324 + }, + { + "epoch": 60.01, + "learning_rate": 4.2499067164179106e-05, + "loss": 0.0001, + "step": 64328 + }, + { + "epoch": 60.01, + "learning_rate": 4.2498600746268654e-05, + "loss": 0.0, + "step": 64332 + }, + { + "epoch": 60.01, + "learning_rate": 4.2498134328358216e-05, + "loss": 0.0, + "step": 64336 + }, + { + "epoch": 60.02, + "learning_rate": 4.2497667910447764e-05, + "loss": 0.0, + "step": 64340 + }, + { + "epoch": 60.02, + "learning_rate": 4.249720149253731e-05, + "loss": 0.0019, + "step": 64344 + }, + { + "epoch": 60.03, + "learning_rate": 4.249673507462687e-05, + "loss": 0.0013, + "step": 64348 + }, + { + "epoch": 60.03, + "learning_rate": 4.249626865671642e-05, + "loss": 0.0, + "step": 64352 + }, + { + "epoch": 60.03, + "learning_rate": 4.249580223880597e-05, + "loss": 0.0, + "step": 64356 + }, + { + "epoch": 60.04, + "learning_rate": 4.2495335820895525e-05, + "loss": 0.0, + "step": 64360 + }, + { + "epoch": 60.04, + "learning_rate": 4.249486940298508e-05, + "loss": 0.0, + "step": 64364 + }, + { + "epoch": 60.04, + "learning_rate": 4.249440298507463e-05, + "loss": 0.0, + "step": 64368 + }, + { + "epoch": 60.05, + "learning_rate": 4.249393656716418e-05, + "loss": 0.0, + "step": 64372 + }, + { + "epoch": 60.05, + "learning_rate": 4.249347014925373e-05, + "loss": 0.0, + "step": 64376 + }, + { + "epoch": 60.06, + "learning_rate": 4.2493003731343286e-05, + "loss": 0.0, + "step": 64380 + }, + { + "epoch": 60.06, + "learning_rate": 4.249253731343284e-05, + "loss": 0.0, + "step": 64384 + }, + { + "epoch": 60.06, + "learning_rate": 4.249207089552239e-05, + "loss": 0.0, + "step": 64388 + }, + { + "epoch": 60.07, + "learning_rate": 4.249160447761194e-05, + "loss": 0.0, + "step": 64392 + }, + { + "epoch": 60.07, + "learning_rate": 4.24911380597015e-05, + "loss": 0.0, + "step": 64396 + }, + { + "epoch": 60.07, + "learning_rate": 4.249067164179105e-05, + "loss": 0.0014, + "step": 64400 + }, + { + "epoch": 60.08, + "learning_rate": 4.2490205223880595e-05, + "loss": 0.0, + "step": 64404 + }, + { + "epoch": 60.08, + "learning_rate": 4.248973880597015e-05, + "loss": 0.0, + "step": 64408 + }, + { + "epoch": 60.09, + "learning_rate": 4.2489272388059705e-05, + "loss": 0.0, + "step": 64412 + }, + { + "epoch": 60.09, + "learning_rate": 4.248880597014925e-05, + "loss": 0.0, + "step": 64416 + }, + { + "epoch": 60.09, + "learning_rate": 4.248833955223881e-05, + "loss": 0.0, + "step": 64420 + }, + { + "epoch": 60.1, + "learning_rate": 4.248787313432836e-05, + "loss": 0.0001, + "step": 64424 + }, + { + "epoch": 60.1, + "learning_rate": 4.248740671641791e-05, + "loss": 0.0, + "step": 64428 + }, + { + "epoch": 60.1, + "learning_rate": 4.2486940298507466e-05, + "loss": 0.0001, + "step": 64432 + }, + { + "epoch": 60.11, + "learning_rate": 4.2486473880597014e-05, + "loss": 0.0, + "step": 64436 + }, + { + "epoch": 60.11, + "learning_rate": 4.248600746268657e-05, + "loss": 0.0, + "step": 64440 + }, + { + "epoch": 60.12, + "learning_rate": 4.2485541044776124e-05, + "loss": 0.0, + "step": 64444 + }, + { + "epoch": 60.12, + "learning_rate": 4.248507462686567e-05, + "loss": 0.0, + "step": 64448 + }, + { + "epoch": 60.12, + "learning_rate": 4.248460820895522e-05, + "loss": 0.0, + "step": 64452 + }, + { + "epoch": 60.13, + "learning_rate": 4.248414179104478e-05, + "loss": 0.0, + "step": 64456 + }, + { + "epoch": 60.13, + "learning_rate": 4.248367537313433e-05, + "loss": 0.0, + "step": 64460 + }, + { + "epoch": 60.13, + "learning_rate": 4.248320895522388e-05, + "loss": 0.0, + "step": 64464 + }, + { + "epoch": 60.14, + "learning_rate": 4.248274253731343e-05, + "loss": 0.0, + "step": 64468 + }, + { + "epoch": 60.14, + "learning_rate": 4.248227611940299e-05, + "loss": 0.0, + "step": 64472 + }, + { + "epoch": 60.15, + "learning_rate": 4.248180970149254e-05, + "loss": 0.0, + "step": 64476 + }, + { + "epoch": 60.15, + "learning_rate": 4.248134328358209e-05, + "loss": 0.0, + "step": 64480 + }, + { + "epoch": 60.15, + "learning_rate": 4.2480876865671646e-05, + "loss": 0.0, + "step": 64484 + }, + { + "epoch": 60.16, + "learning_rate": 4.24804104477612e-05, + "loss": 0.0, + "step": 64488 + }, + { + "epoch": 60.16, + "learning_rate": 4.247994402985075e-05, + "loss": 0.0, + "step": 64492 + }, + { + "epoch": 60.16, + "learning_rate": 4.24794776119403e-05, + "loss": 0.0002, + "step": 64496 + }, + { + "epoch": 60.17, + "learning_rate": 4.247901119402985e-05, + "loss": 0.0, + "step": 64500 + }, + { + "epoch": 60.17, + "eval_exact_match": 0.7485493230174082, + "eval_exec": 0.7911025145067698, + "eval_loss": 0.5219013690948486, + "eval_runtime": 1099.0283, + "eval_samples_per_second": 0.941, + "step": 64500 + }, + { + "epoch": 60.17, + "learning_rate": 4.247854477611941e-05, + "loss": 0.0, + "step": 64504 + }, + { + "epoch": 60.18, + "learning_rate": 4.2478078358208955e-05, + "loss": 0.0, + "step": 64508 + }, + { + "epoch": 60.18, + "learning_rate": 4.247761194029851e-05, + "loss": 0.0005, + "step": 64512 + }, + { + "epoch": 60.18, + "learning_rate": 4.2477145522388065e-05, + "loss": 0.0, + "step": 64516 + }, + { + "epoch": 60.19, + "learning_rate": 4.247667910447761e-05, + "loss": 0.0007, + "step": 64520 + }, + { + "epoch": 60.19, + "learning_rate": 4.247621268656717e-05, + "loss": 0.0, + "step": 64524 + }, + { + "epoch": 60.19, + "learning_rate": 4.2475746268656716e-05, + "loss": 0.0, + "step": 64528 + }, + { + "epoch": 60.2, + "learning_rate": 4.247527985074627e-05, + "loss": 0.0046, + "step": 64532 + }, + { + "epoch": 60.2, + "learning_rate": 4.2474813432835826e-05, + "loss": 0.0001, + "step": 64536 + }, + { + "epoch": 60.21, + "learning_rate": 4.2474347014925374e-05, + "loss": 0.0, + "step": 64540 + }, + { + "epoch": 60.21, + "learning_rate": 4.247388059701493e-05, + "loss": 0.0003, + "step": 64544 + }, + { + "epoch": 60.21, + "learning_rate": 4.2473414179104484e-05, + "loss": 0.0, + "step": 64548 + }, + { + "epoch": 60.22, + "learning_rate": 4.247294776119403e-05, + "loss": 0.0, + "step": 64552 + }, + { + "epoch": 60.22, + "learning_rate": 4.247248134328358e-05, + "loss": 0.0, + "step": 64556 + }, + { + "epoch": 60.22, + "learning_rate": 4.2472014925373135e-05, + "loss": 0.0001, + "step": 64560 + }, + { + "epoch": 60.23, + "learning_rate": 4.247154850746269e-05, + "loss": 0.0, + "step": 64564 + }, + { + "epoch": 60.23, + "learning_rate": 4.247108208955224e-05, + "loss": 0.0, + "step": 64568 + }, + { + "epoch": 60.24, + "learning_rate": 4.247061567164179e-05, + "loss": 0.0, + "step": 64572 + }, + { + "epoch": 60.24, + "learning_rate": 4.247014925373135e-05, + "loss": 0.0, + "step": 64576 + }, + { + "epoch": 60.24, + "learning_rate": 4.2469682835820896e-05, + "loss": 0.0, + "step": 64580 + }, + { + "epoch": 60.25, + "learning_rate": 4.246921641791045e-05, + "loss": 0.0004, + "step": 64584 + }, + { + "epoch": 60.25, + "learning_rate": 4.246875e-05, + "loss": 0.0009, + "step": 64588 + }, + { + "epoch": 60.25, + "learning_rate": 4.2468283582089554e-05, + "loss": 0.0002, + "step": 64592 + }, + { + "epoch": 60.26, + "learning_rate": 4.246781716417911e-05, + "loss": 0.0, + "step": 64596 + }, + { + "epoch": 60.26, + "learning_rate": 4.246735074626866e-05, + "loss": 0.0, + "step": 64600 + }, + { + "epoch": 60.26, + "learning_rate": 4.246688432835821e-05, + "loss": 0.0, + "step": 64604 + }, + { + "epoch": 60.27, + "learning_rate": 4.246641791044777e-05, + "loss": 0.0008, + "step": 64608 + }, + { + "epoch": 60.27, + "learning_rate": 4.2465951492537315e-05, + "loss": 0.0, + "step": 64612 + }, + { + "epoch": 60.28, + "learning_rate": 4.246548507462686e-05, + "loss": 0.0037, + "step": 64616 + }, + { + "epoch": 60.28, + "learning_rate": 4.246501865671642e-05, + "loss": 0.0, + "step": 64620 + }, + { + "epoch": 60.28, + "learning_rate": 4.246455223880597e-05, + "loss": 0.0, + "step": 64624 + }, + { + "epoch": 60.29, + "learning_rate": 4.246408582089552e-05, + "loss": 0.0, + "step": 64628 + }, + { + "epoch": 60.29, + "learning_rate": 4.2463619402985076e-05, + "loss": 0.0, + "step": 64632 + }, + { + "epoch": 60.29, + "learning_rate": 4.246315298507463e-05, + "loss": 0.0007, + "step": 64636 + }, + { + "epoch": 60.3, + "learning_rate": 4.2462686567164186e-05, + "loss": 0.0002, + "step": 64640 + }, + { + "epoch": 60.3, + "learning_rate": 4.2462220149253734e-05, + "loss": 0.0046, + "step": 64644 + }, + { + "epoch": 60.31, + "learning_rate": 4.246175373134328e-05, + "loss": 0.0, + "step": 64648 + }, + { + "epoch": 60.31, + "learning_rate": 4.2461287313432844e-05, + "loss": 0.0001, + "step": 64652 + }, + { + "epoch": 60.31, + "learning_rate": 4.246082089552239e-05, + "loss": 0.0, + "step": 64656 + }, + { + "epoch": 60.32, + "learning_rate": 4.246035447761194e-05, + "loss": 0.0, + "step": 64660 + }, + { + "epoch": 60.32, + "learning_rate": 4.2459888059701495e-05, + "loss": 0.0, + "step": 64664 + }, + { + "epoch": 60.32, + "learning_rate": 4.245942164179105e-05, + "loss": 0.0, + "step": 64668 + }, + { + "epoch": 60.33, + "learning_rate": 4.24589552238806e-05, + "loss": 0.0, + "step": 64672 + }, + { + "epoch": 60.33, + "learning_rate": 4.245848880597015e-05, + "loss": 0.0, + "step": 64676 + }, + { + "epoch": 60.34, + "learning_rate": 4.24580223880597e-05, + "loss": 0.0, + "step": 64680 + }, + { + "epoch": 60.34, + "learning_rate": 4.2457555970149256e-05, + "loss": 0.0, + "step": 64684 + }, + { + "epoch": 60.34, + "learning_rate": 4.245708955223881e-05, + "loss": 0.0, + "step": 64688 + }, + { + "epoch": 60.35, + "learning_rate": 4.245662313432836e-05, + "loss": 0.0, + "step": 64692 + }, + { + "epoch": 60.35, + "learning_rate": 4.2456156716417914e-05, + "loss": 0.0, + "step": 64696 + }, + { + "epoch": 60.35, + "learning_rate": 4.245569029850747e-05, + "loss": 0.0, + "step": 64700 + }, + { + "epoch": 60.36, + "learning_rate": 4.245522388059702e-05, + "loss": 0.0, + "step": 64704 + }, + { + "epoch": 60.36, + "learning_rate": 4.2454757462686565e-05, + "loss": 0.0001, + "step": 64708 + }, + { + "epoch": 60.37, + "learning_rate": 4.2454291044776127e-05, + "loss": 0.0021, + "step": 64712 + }, + { + "epoch": 60.37, + "learning_rate": 4.2453824626865675e-05, + "loss": 0.0004, + "step": 64716 + }, + { + "epoch": 60.37, + "learning_rate": 4.245335820895522e-05, + "loss": 0.0, + "step": 64720 + }, + { + "epoch": 60.38, + "learning_rate": 4.245289179104478e-05, + "loss": 0.0001, + "step": 64724 + }, + { + "epoch": 60.38, + "learning_rate": 4.245242537313433e-05, + "loss": 0.0, + "step": 64728 + }, + { + "epoch": 60.38, + "learning_rate": 4.245195895522388e-05, + "loss": 0.0, + "step": 64732 + }, + { + "epoch": 60.39, + "learning_rate": 4.2451492537313436e-05, + "loss": 0.0, + "step": 64736 + }, + { + "epoch": 60.39, + "learning_rate": 4.2451026119402984e-05, + "loss": 0.0, + "step": 64740 + }, + { + "epoch": 60.4, + "learning_rate": 4.245055970149254e-05, + "loss": 0.0011, + "step": 64744 + }, + { + "epoch": 60.4, + "learning_rate": 4.2450093283582094e-05, + "loss": 0.0, + "step": 64748 + }, + { + "epoch": 60.4, + "learning_rate": 4.244962686567164e-05, + "loss": 0.0001, + "step": 64752 + }, + { + "epoch": 60.41, + "learning_rate": 4.2449160447761197e-05, + "loss": 0.0, + "step": 64756 + }, + { + "epoch": 60.41, + "learning_rate": 4.244869402985075e-05, + "loss": 0.0001, + "step": 64760 + }, + { + "epoch": 60.41, + "learning_rate": 4.24482276119403e-05, + "loss": 0.0, + "step": 64764 + }, + { + "epoch": 60.42, + "learning_rate": 4.244776119402985e-05, + "loss": 0.0, + "step": 64768 + }, + { + "epoch": 60.42, + "learning_rate": 4.244729477611941e-05, + "loss": 0.0, + "step": 64772 + }, + { + "epoch": 60.43, + "learning_rate": 4.244682835820896e-05, + "loss": 0.0, + "step": 64776 + }, + { + "epoch": 60.43, + "learning_rate": 4.2446361940298506e-05, + "loss": 0.0, + "step": 64780 + }, + { + "epoch": 60.43, + "learning_rate": 4.244589552238806e-05, + "loss": 0.0001, + "step": 64784 + }, + { + "epoch": 60.44, + "learning_rate": 4.2445429104477616e-05, + "loss": 0.0, + "step": 64788 + }, + { + "epoch": 60.44, + "learning_rate": 4.2444962686567164e-05, + "loss": 0.0007, + "step": 64792 + }, + { + "epoch": 60.44, + "learning_rate": 4.244449626865672e-05, + "loss": 0.0086, + "step": 64796 + }, + { + "epoch": 60.45, + "learning_rate": 4.244402985074627e-05, + "loss": 0.0002, + "step": 64800 + }, + { + "epoch": 60.45, + "learning_rate": 4.244356343283583e-05, + "loss": 0.0, + "step": 64804 + }, + { + "epoch": 60.46, + "learning_rate": 4.2443097014925377e-05, + "loss": 0.0, + "step": 64808 + }, + { + "epoch": 60.46, + "learning_rate": 4.2442630597014925e-05, + "loss": 0.0, + "step": 64812 + }, + { + "epoch": 60.46, + "learning_rate": 4.244216417910448e-05, + "loss": 0.0, + "step": 64816 + }, + { + "epoch": 60.47, + "learning_rate": 4.2441697761194034e-05, + "loss": 0.0, + "step": 64820 + }, + { + "epoch": 60.47, + "learning_rate": 4.244123134328358e-05, + "loss": 0.0, + "step": 64824 + }, + { + "epoch": 60.47, + "learning_rate": 4.244076492537314e-05, + "loss": 0.0, + "step": 64828 + }, + { + "epoch": 60.48, + "learning_rate": 4.244029850746269e-05, + "loss": 0.0, + "step": 64832 + }, + { + "epoch": 60.48, + "learning_rate": 4.243983208955224e-05, + "loss": 0.0001, + "step": 64836 + }, + { + "epoch": 60.49, + "learning_rate": 4.2439365671641795e-05, + "loss": 0.0, + "step": 64840 + }, + { + "epoch": 60.49, + "learning_rate": 4.2438899253731344e-05, + "loss": 0.0, + "step": 64844 + }, + { + "epoch": 60.49, + "learning_rate": 4.24384328358209e-05, + "loss": 0.0, + "step": 64848 + }, + { + "epoch": 60.5, + "learning_rate": 4.2437966417910453e-05, + "loss": 0.0002, + "step": 64852 + }, + { + "epoch": 60.5, + "learning_rate": 4.24375e-05, + "loss": 0.0002, + "step": 64856 + }, + { + "epoch": 60.5, + "learning_rate": 4.243703358208955e-05, + "loss": 0.0, + "step": 64860 + }, + { + "epoch": 60.51, + "learning_rate": 4.243656716417911e-05, + "loss": 0.0001, + "step": 64864 + }, + { + "epoch": 60.51, + "learning_rate": 4.243610074626866e-05, + "loss": 0.0, + "step": 64868 + }, + { + "epoch": 60.51, + "learning_rate": 4.243563432835821e-05, + "loss": 0.0, + "step": 64872 + }, + { + "epoch": 60.52, + "learning_rate": 4.243516791044776e-05, + "loss": 0.0, + "step": 64876 + }, + { + "epoch": 60.52, + "learning_rate": 4.243470149253732e-05, + "loss": 0.0, + "step": 64880 + }, + { + "epoch": 60.53, + "learning_rate": 4.2434235074626866e-05, + "loss": 0.0001, + "step": 64884 + }, + { + "epoch": 60.53, + "learning_rate": 4.243376865671642e-05, + "loss": 0.0, + "step": 64888 + }, + { + "epoch": 60.53, + "learning_rate": 4.2433302238805975e-05, + "loss": 0.0001, + "step": 64892 + }, + { + "epoch": 60.54, + "learning_rate": 4.2432835820895524e-05, + "loss": 0.0, + "step": 64896 + }, + { + "epoch": 60.54, + "learning_rate": 4.243236940298508e-05, + "loss": 0.0003, + "step": 64900 + }, + { + "epoch": 60.54, + "learning_rate": 4.2431902985074627e-05, + "loss": 0.0009, + "step": 64904 + }, + { + "epoch": 60.55, + "learning_rate": 4.243143656716418e-05, + "loss": 0.0, + "step": 64908 + }, + { + "epoch": 60.55, + "learning_rate": 4.2430970149253736e-05, + "loss": 0.0, + "step": 64912 + }, + { + "epoch": 60.56, + "learning_rate": 4.2430503731343284e-05, + "loss": 0.0002, + "step": 64916 + }, + { + "epoch": 60.56, + "learning_rate": 4.243003731343283e-05, + "loss": 0.0004, + "step": 64920 + }, + { + "epoch": 60.56, + "learning_rate": 4.2429570895522394e-05, + "loss": 0.0, + "step": 64924 + }, + { + "epoch": 60.57, + "learning_rate": 4.242910447761194e-05, + "loss": 0.0, + "step": 64928 + }, + { + "epoch": 60.57, + "learning_rate": 4.242863805970149e-05, + "loss": 0.0, + "step": 64932 + }, + { + "epoch": 60.57, + "learning_rate": 4.2428171641791045e-05, + "loss": 0.0, + "step": 64936 + }, + { + "epoch": 60.58, + "learning_rate": 4.24277052238806e-05, + "loss": 0.0, + "step": 64940 + }, + { + "epoch": 60.58, + "learning_rate": 4.242723880597015e-05, + "loss": 0.0001, + "step": 64944 + }, + { + "epoch": 60.59, + "learning_rate": 4.2426772388059703e-05, + "loss": 0.0001, + "step": 64948 + }, + { + "epoch": 60.59, + "learning_rate": 4.242630597014925e-05, + "loss": 0.0001, + "step": 64952 + }, + { + "epoch": 60.59, + "learning_rate": 4.2425839552238806e-05, + "loss": 0.0001, + "step": 64956 + }, + { + "epoch": 60.6, + "learning_rate": 4.242537313432836e-05, + "loss": 0.0, + "step": 64960 + }, + { + "epoch": 60.6, + "learning_rate": 4.242490671641791e-05, + "loss": 0.0003, + "step": 64964 + }, + { + "epoch": 60.6, + "learning_rate": 4.2424440298507464e-05, + "loss": 0.0, + "step": 64968 + }, + { + "epoch": 60.61, + "learning_rate": 4.242397388059702e-05, + "loss": 0.0, + "step": 64972 + }, + { + "epoch": 60.61, + "learning_rate": 4.242350746268657e-05, + "loss": 0.0002, + "step": 64976 + }, + { + "epoch": 60.62, + "learning_rate": 4.242304104477612e-05, + "loss": 0.0, + "step": 64980 + }, + { + "epoch": 60.62, + "learning_rate": 4.242257462686568e-05, + "loss": 0.0, + "step": 64984 + }, + { + "epoch": 60.62, + "learning_rate": 4.2422108208955225e-05, + "loss": 0.0, + "step": 64988 + }, + { + "epoch": 60.63, + "learning_rate": 4.242164179104478e-05, + "loss": 0.0, + "step": 64992 + }, + { + "epoch": 60.63, + "learning_rate": 4.242117537313433e-05, + "loss": 0.0, + "step": 64996 + }, + { + "epoch": 60.63, + "learning_rate": 4.242070895522388e-05, + "loss": 0.0, + "step": 65000 + }, + { + "epoch": 60.63, + "eval_exact_match": 0.741779497098646, + "eval_exec": 0.7852998065764023, + "eval_loss": 0.5173532366752625, + "eval_runtime": 1115.6158, + "eval_samples_per_second": 0.927, + "step": 65000 + }, + { + "epoch": 60.64, + "learning_rate": 4.242024253731344e-05, + "loss": 0.0, + "step": 65004 + }, + { + "epoch": 60.64, + "learning_rate": 4.2419776119402986e-05, + "loss": 0.0, + "step": 65008 + }, + { + "epoch": 60.65, + "learning_rate": 4.2419309701492535e-05, + "loss": 0.0, + "step": 65012 + }, + { + "epoch": 60.65, + "learning_rate": 4.2418843283582096e-05, + "loss": 0.0, + "step": 65016 + }, + { + "epoch": 60.65, + "learning_rate": 4.2418376865671644e-05, + "loss": 0.0002, + "step": 65020 + }, + { + "epoch": 60.66, + "learning_rate": 4.241791044776119e-05, + "loss": 0.0001, + "step": 65024 + }, + { + "epoch": 60.66, + "learning_rate": 4.241744402985075e-05, + "loss": 0.0015, + "step": 65028 + }, + { + "epoch": 60.66, + "learning_rate": 4.24169776119403e-05, + "loss": 0.0004, + "step": 65032 + }, + { + "epoch": 60.67, + "learning_rate": 4.241651119402985e-05, + "loss": 0.0008, + "step": 65036 + }, + { + "epoch": 60.67, + "learning_rate": 4.2416044776119405e-05, + "loss": 0.0, + "step": 65040 + }, + { + "epoch": 60.68, + "learning_rate": 4.241557835820896e-05, + "loss": 0.0, + "step": 65044 + }, + { + "epoch": 60.68, + "learning_rate": 4.241511194029851e-05, + "loss": 0.0, + "step": 65048 + }, + { + "epoch": 60.68, + "learning_rate": 4.241464552238806e-05, + "loss": 0.0, + "step": 65052 + }, + { + "epoch": 60.69, + "learning_rate": 4.241417910447761e-05, + "loss": 0.0, + "step": 65056 + }, + { + "epoch": 60.69, + "learning_rate": 4.2413712686567166e-05, + "loss": 0.0001, + "step": 65060 + }, + { + "epoch": 60.69, + "learning_rate": 4.241324626865672e-05, + "loss": 0.0017, + "step": 65064 + }, + { + "epoch": 60.7, + "learning_rate": 4.241277985074627e-05, + "loss": 0.0, + "step": 65068 + }, + { + "epoch": 60.7, + "learning_rate": 4.241231343283582e-05, + "loss": 0.0, + "step": 65072 + }, + { + "epoch": 60.71, + "learning_rate": 4.241184701492538e-05, + "loss": 0.0, + "step": 65076 + }, + { + "epoch": 60.71, + "learning_rate": 4.241138059701493e-05, + "loss": 0.0002, + "step": 65080 + }, + { + "epoch": 60.71, + "learning_rate": 4.2410914179104475e-05, + "loss": 0.0, + "step": 65084 + }, + { + "epoch": 60.72, + "learning_rate": 4.241044776119403e-05, + "loss": 0.0, + "step": 65088 + }, + { + "epoch": 60.72, + "learning_rate": 4.2409981343283585e-05, + "loss": 0.0, + "step": 65092 + }, + { + "epoch": 60.72, + "learning_rate": 4.240951492537313e-05, + "loss": 0.0, + "step": 65096 + }, + { + "epoch": 60.73, + "learning_rate": 4.240904850746269e-05, + "loss": 0.0, + "step": 65100 + }, + { + "epoch": 60.73, + "learning_rate": 4.240858208955224e-05, + "loss": 0.0, + "step": 65104 + }, + { + "epoch": 60.73, + "learning_rate": 4.240811567164179e-05, + "loss": 0.0, + "step": 65108 + }, + { + "epoch": 60.74, + "learning_rate": 4.2407649253731346e-05, + "loss": 0.0042, + "step": 65112 + }, + { + "epoch": 60.74, + "learning_rate": 4.2407182835820894e-05, + "loss": 0.0, + "step": 65116 + }, + { + "epoch": 60.75, + "learning_rate": 4.240671641791045e-05, + "loss": 0.0, + "step": 65120 + }, + { + "epoch": 60.75, + "learning_rate": 4.2406250000000004e-05, + "loss": 0.0, + "step": 65124 + }, + { + "epoch": 60.75, + "learning_rate": 4.240578358208955e-05, + "loss": 0.0, + "step": 65128 + }, + { + "epoch": 60.76, + "learning_rate": 4.240531716417911e-05, + "loss": 0.0, + "step": 65132 + }, + { + "epoch": 60.76, + "learning_rate": 4.240485074626866e-05, + "loss": 0.0, + "step": 65136 + }, + { + "epoch": 60.76, + "learning_rate": 4.240438432835821e-05, + "loss": 0.0, + "step": 65140 + }, + { + "epoch": 60.77, + "learning_rate": 4.2403917910447765e-05, + "loss": 0.0001, + "step": 65144 + }, + { + "epoch": 60.77, + "learning_rate": 4.240345149253731e-05, + "loss": 0.0, + "step": 65148 + }, + { + "epoch": 60.78, + "learning_rate": 4.240298507462687e-05, + "loss": 0.0, + "step": 65152 + }, + { + "epoch": 60.78, + "learning_rate": 4.240251865671642e-05, + "loss": 0.0, + "step": 65156 + }, + { + "epoch": 60.78, + "learning_rate": 4.240205223880597e-05, + "loss": 0.0001, + "step": 65160 + }, + { + "epoch": 60.79, + "learning_rate": 4.2401585820895526e-05, + "loss": 0.0001, + "step": 65164 + }, + { + "epoch": 60.79, + "learning_rate": 4.240111940298508e-05, + "loss": 0.0, + "step": 65168 + }, + { + "epoch": 60.79, + "learning_rate": 4.240065298507463e-05, + "loss": 0.0001, + "step": 65172 + }, + { + "epoch": 60.8, + "learning_rate": 4.240018656716418e-05, + "loss": 0.0001, + "step": 65176 + }, + { + "epoch": 60.8, + "learning_rate": 4.239972014925373e-05, + "loss": 0.0, + "step": 65180 + }, + { + "epoch": 60.81, + "learning_rate": 4.239925373134329e-05, + "loss": 0.0001, + "step": 65184 + }, + { + "epoch": 60.81, + "learning_rate": 4.2398787313432835e-05, + "loss": 0.0, + "step": 65188 + }, + { + "epoch": 60.81, + "learning_rate": 4.239832089552239e-05, + "loss": 0.0, + "step": 65192 + }, + { + "epoch": 60.82, + "learning_rate": 4.2397854477611945e-05, + "loss": 0.0, + "step": 65196 + }, + { + "epoch": 60.82, + "learning_rate": 4.239738805970149e-05, + "loss": 0.0001, + "step": 65200 + }, + { + "epoch": 60.82, + "learning_rate": 4.239692164179105e-05, + "loss": 0.0, + "step": 65204 + }, + { + "epoch": 60.83, + "learning_rate": 4.2396455223880596e-05, + "loss": 0.0001, + "step": 65208 + }, + { + "epoch": 60.83, + "learning_rate": 4.239598880597015e-05, + "loss": 0.0, + "step": 65212 + }, + { + "epoch": 60.84, + "learning_rate": 4.2395522388059706e-05, + "loss": 0.0, + "step": 65216 + }, + { + "epoch": 60.84, + "learning_rate": 4.2395055970149254e-05, + "loss": 0.0, + "step": 65220 + }, + { + "epoch": 60.84, + "learning_rate": 4.239458955223881e-05, + "loss": 0.0098, + "step": 65224 + }, + { + "epoch": 60.85, + "learning_rate": 4.2394123134328364e-05, + "loss": 0.0, + "step": 65228 + }, + { + "epoch": 60.85, + "learning_rate": 4.239365671641791e-05, + "loss": 0.0, + "step": 65232 + }, + { + "epoch": 60.85, + "learning_rate": 4.239319029850746e-05, + "loss": 0.0, + "step": 65236 + }, + { + "epoch": 60.86, + "learning_rate": 4.2392723880597015e-05, + "loss": 0.0, + "step": 65240 + }, + { + "epoch": 60.86, + "learning_rate": 4.239225746268657e-05, + "loss": 0.0, + "step": 65244 + }, + { + "epoch": 60.87, + "learning_rate": 4.239179104477612e-05, + "loss": 0.0, + "step": 65248 + }, + { + "epoch": 60.87, + "learning_rate": 4.239132462686567e-05, + "loss": 0.0, + "step": 65252 + }, + { + "epoch": 60.87, + "learning_rate": 4.239085820895523e-05, + "loss": 0.0, + "step": 65256 + }, + { + "epoch": 60.88, + "learning_rate": 4.2390391791044776e-05, + "loss": 0.0, + "step": 65260 + }, + { + "epoch": 60.88, + "learning_rate": 4.238992537313433e-05, + "loss": 0.0, + "step": 65264 + }, + { + "epoch": 60.88, + "learning_rate": 4.238945895522388e-05, + "loss": 0.0028, + "step": 65268 + }, + { + "epoch": 60.89, + "learning_rate": 4.2388992537313434e-05, + "loss": 0.0, + "step": 65272 + }, + { + "epoch": 60.89, + "learning_rate": 4.238852611940299e-05, + "loss": 0.0002, + "step": 65276 + }, + { + "epoch": 60.9, + "learning_rate": 4.238805970149254e-05, + "loss": 0.0, + "step": 65280 + }, + { + "epoch": 60.9, + "learning_rate": 4.238759328358209e-05, + "loss": 0.0, + "step": 65284 + }, + { + "epoch": 60.9, + "learning_rate": 4.238712686567165e-05, + "loss": 0.0, + "step": 65288 + }, + { + "epoch": 60.91, + "learning_rate": 4.2386660447761195e-05, + "loss": 0.0, + "step": 65292 + }, + { + "epoch": 60.91, + "learning_rate": 4.238619402985075e-05, + "loss": 0.0, + "step": 65296 + }, + { + "epoch": 60.91, + "learning_rate": 4.23857276119403e-05, + "loss": 0.0, + "step": 65300 + }, + { + "epoch": 60.92, + "learning_rate": 4.238526119402985e-05, + "loss": 0.0011, + "step": 65304 + }, + { + "epoch": 60.92, + "learning_rate": 4.238479477611941e-05, + "loss": 0.0003, + "step": 65308 + }, + { + "epoch": 60.93, + "learning_rate": 4.2384328358208956e-05, + "loss": 0.001, + "step": 65312 + }, + { + "epoch": 60.93, + "learning_rate": 4.238386194029851e-05, + "loss": 0.0, + "step": 65316 + }, + { + "epoch": 60.93, + "learning_rate": 4.2383395522388066e-05, + "loss": 0.0, + "step": 65320 + }, + { + "epoch": 60.94, + "learning_rate": 4.2382929104477614e-05, + "loss": 0.0, + "step": 65324 + }, + { + "epoch": 60.94, + "learning_rate": 4.238246268656716e-05, + "loss": 0.0, + "step": 65328 + }, + { + "epoch": 60.94, + "learning_rate": 4.2381996268656724e-05, + "loss": 0.0, + "step": 65332 + }, + { + "epoch": 60.95, + "learning_rate": 4.238152985074627e-05, + "loss": 0.0, + "step": 65336 + }, + { + "epoch": 60.95, + "learning_rate": 4.238106343283582e-05, + "loss": 0.0, + "step": 65340 + }, + { + "epoch": 60.96, + "learning_rate": 4.2380597014925375e-05, + "loss": 0.0, + "step": 65344 + }, + { + "epoch": 60.96, + "learning_rate": 4.238013059701493e-05, + "loss": 0.0, + "step": 65348 + }, + { + "epoch": 60.96, + "learning_rate": 4.237966417910448e-05, + "loss": 0.0001, + "step": 65352 + }, + { + "epoch": 60.97, + "learning_rate": 4.237919776119403e-05, + "loss": 0.0019, + "step": 65356 + }, + { + "epoch": 60.97, + "learning_rate": 4.237873134328358e-05, + "loss": 0.0001, + "step": 65360 + }, + { + "epoch": 60.97, + "learning_rate": 4.2378264925373136e-05, + "loss": 0.0, + "step": 65364 + }, + { + "epoch": 60.98, + "learning_rate": 4.237779850746269e-05, + "loss": 0.0, + "step": 65368 + }, + { + "epoch": 60.98, + "learning_rate": 4.237733208955224e-05, + "loss": 0.0005, + "step": 65372 + }, + { + "epoch": 60.98, + "learning_rate": 4.2376865671641794e-05, + "loss": 0.0042, + "step": 65376 + }, + { + "epoch": 60.99, + "learning_rate": 4.237639925373135e-05, + "loss": 0.0, + "step": 65380 + }, + { + "epoch": 60.99, + "learning_rate": 4.23759328358209e-05, + "loss": 0.0, + "step": 65384 + }, + { + "epoch": 61.0, + "learning_rate": 4.2375466417910445e-05, + "loss": 0.0, + "step": 65388 + }, + { + "epoch": 61.0, + "learning_rate": 4.237500000000001e-05, + "loss": 0.0, + "step": 65392 + }, + { + "epoch": 61.0, + "learning_rate": 4.2374533582089555e-05, + "loss": 0.0, + "step": 65396 + }, + { + "epoch": 61.01, + "learning_rate": 4.23740671641791e-05, + "loss": 0.0, + "step": 65400 + }, + { + "epoch": 61.01, + "learning_rate": 4.237360074626866e-05, + "loss": 0.0, + "step": 65404 + }, + { + "epoch": 61.01, + "learning_rate": 4.237313432835821e-05, + "loss": 0.0, + "step": 65408 + }, + { + "epoch": 61.02, + "learning_rate": 4.237266791044776e-05, + "loss": 0.0, + "step": 65412 + }, + { + "epoch": 61.02, + "learning_rate": 4.2372201492537316e-05, + "loss": 0.0, + "step": 65416 + }, + { + "epoch": 61.03, + "learning_rate": 4.2371735074626864e-05, + "loss": 0.0, + "step": 65420 + }, + { + "epoch": 61.03, + "learning_rate": 4.237126865671642e-05, + "loss": 0.0126, + "step": 65424 + }, + { + "epoch": 61.03, + "learning_rate": 4.2370802238805974e-05, + "loss": 0.0, + "step": 65428 + }, + { + "epoch": 61.04, + "learning_rate": 4.237033582089552e-05, + "loss": 0.0, + "step": 65432 + }, + { + "epoch": 61.04, + "learning_rate": 4.236986940298508e-05, + "loss": 0.0, + "step": 65436 + }, + { + "epoch": 61.04, + "learning_rate": 4.236940298507463e-05, + "loss": 0.0, + "step": 65440 + }, + { + "epoch": 61.05, + "learning_rate": 4.236893656716418e-05, + "loss": 0.0, + "step": 65444 + }, + { + "epoch": 61.05, + "learning_rate": 4.2368470149253735e-05, + "loss": 0.0, + "step": 65448 + }, + { + "epoch": 61.06, + "learning_rate": 4.236800373134329e-05, + "loss": 0.0, + "step": 65452 + }, + { + "epoch": 61.06, + "learning_rate": 4.236753731343284e-05, + "loss": 0.0, + "step": 65456 + }, + { + "epoch": 61.06, + "learning_rate": 4.236707089552239e-05, + "loss": 0.0, + "step": 65460 + }, + { + "epoch": 61.07, + "learning_rate": 4.236660447761194e-05, + "loss": 0.0015, + "step": 65464 + }, + { + "epoch": 61.07, + "learning_rate": 4.2366138059701496e-05, + "loss": 0.0, + "step": 65468 + }, + { + "epoch": 61.07, + "learning_rate": 4.236567164179105e-05, + "loss": 0.0002, + "step": 65472 + }, + { + "epoch": 61.08, + "learning_rate": 4.23652052238806e-05, + "loss": 0.0, + "step": 65476 + }, + { + "epoch": 61.08, + "learning_rate": 4.236473880597015e-05, + "loss": 0.0, + "step": 65480 + }, + { + "epoch": 61.09, + "learning_rate": 4.236427238805971e-05, + "loss": 0.0001, + "step": 65484 + }, + { + "epoch": 61.09, + "learning_rate": 4.236380597014926e-05, + "loss": 0.0017, + "step": 65488 + }, + { + "epoch": 61.09, + "learning_rate": 4.2363339552238805e-05, + "loss": 0.0001, + "step": 65492 + }, + { + "epoch": 61.1, + "learning_rate": 4.236287313432836e-05, + "loss": 0.0, + "step": 65496 + }, + { + "epoch": 61.1, + "learning_rate": 4.2362406716417915e-05, + "loss": 0.0, + "step": 65500 + }, + { + "epoch": 61.1, + "eval_exact_match": 0.7475822050290135, + "eval_exec": 0.7833655705996132, + "eval_loss": 0.49484971165657043, + "eval_runtime": 1108.9884, + "eval_samples_per_second": 0.932, + "step": 65500 + }, + { + "epoch": 61.1, + "learning_rate": 4.236194029850746e-05, + "loss": 0.0, + "step": 65504 + }, + { + "epoch": 61.11, + "learning_rate": 4.236147388059702e-05, + "loss": 0.0014, + "step": 65508 + }, + { + "epoch": 61.11, + "learning_rate": 4.236100746268657e-05, + "loss": 0.0011, + "step": 65512 + }, + { + "epoch": 61.12, + "learning_rate": 4.236054104477612e-05, + "loss": 0.0, + "step": 65516 + }, + { + "epoch": 61.12, + "learning_rate": 4.2360074626865676e-05, + "loss": 0.0, + "step": 65520 + }, + { + "epoch": 61.12, + "learning_rate": 4.2359608208955224e-05, + "loss": 0.0001, + "step": 65524 + }, + { + "epoch": 61.13, + "learning_rate": 4.235914179104478e-05, + "loss": 0.0001, + "step": 65528 + }, + { + "epoch": 61.13, + "learning_rate": 4.2358675373134334e-05, + "loss": 0.0, + "step": 65532 + }, + { + "epoch": 61.13, + "learning_rate": 4.235820895522388e-05, + "loss": 0.0, + "step": 65536 + }, + { + "epoch": 61.14, + "learning_rate": 4.235774253731343e-05, + "loss": 0.0, + "step": 65540 + }, + { + "epoch": 61.14, + "learning_rate": 4.235727611940299e-05, + "loss": 0.0001, + "step": 65544 + }, + { + "epoch": 61.15, + "learning_rate": 4.235680970149254e-05, + "loss": 0.0, + "step": 65548 + }, + { + "epoch": 61.15, + "learning_rate": 4.235634328358209e-05, + "loss": 0.0, + "step": 65552 + }, + { + "epoch": 61.15, + "learning_rate": 4.235587686567164e-05, + "loss": 0.0, + "step": 65556 + }, + { + "epoch": 61.16, + "learning_rate": 4.23554104477612e-05, + "loss": 0.0, + "step": 65560 + }, + { + "epoch": 61.16, + "learning_rate": 4.2354944029850746e-05, + "loss": 0.0001, + "step": 65564 + }, + { + "epoch": 61.16, + "learning_rate": 4.23544776119403e-05, + "loss": 0.0026, + "step": 65568 + }, + { + "epoch": 61.17, + "learning_rate": 4.2354011194029856e-05, + "loss": 0.0001, + "step": 65572 + }, + { + "epoch": 61.17, + "learning_rate": 4.2353544776119404e-05, + "loss": 0.0001, + "step": 65576 + }, + { + "epoch": 61.18, + "learning_rate": 4.235307835820896e-05, + "loss": 0.0, + "step": 65580 + }, + { + "epoch": 61.18, + "learning_rate": 4.235261194029851e-05, + "loss": 0.0, + "step": 65584 + }, + { + "epoch": 61.18, + "learning_rate": 4.235214552238806e-05, + "loss": 0.0053, + "step": 65588 + }, + { + "epoch": 61.19, + "learning_rate": 4.235167910447762e-05, + "loss": 0.0003, + "step": 65592 + }, + { + "epoch": 61.19, + "learning_rate": 4.2351212686567165e-05, + "loss": 0.0006, + "step": 65596 + }, + { + "epoch": 61.19, + "learning_rate": 4.235074626865671e-05, + "loss": 0.0012, + "step": 65600 + }, + { + "epoch": 61.2, + "learning_rate": 4.2350279850746275e-05, + "loss": 0.0, + "step": 65604 + }, + { + "epoch": 61.2, + "learning_rate": 4.234981343283582e-05, + "loss": 0.0, + "step": 65608 + }, + { + "epoch": 61.21, + "learning_rate": 4.234934701492538e-05, + "loss": 0.0, + "step": 65612 + }, + { + "epoch": 61.21, + "learning_rate": 4.2348880597014926e-05, + "loss": 0.0, + "step": 65616 + }, + { + "epoch": 61.21, + "learning_rate": 4.234841417910448e-05, + "loss": 0.0004, + "step": 65620 + }, + { + "epoch": 61.22, + "learning_rate": 4.2347947761194036e-05, + "loss": 0.0, + "step": 65624 + }, + { + "epoch": 61.22, + "learning_rate": 4.2347481343283584e-05, + "loss": 0.0001, + "step": 65628 + }, + { + "epoch": 61.22, + "learning_rate": 4.234701492537313e-05, + "loss": 0.0, + "step": 65632 + }, + { + "epoch": 61.23, + "learning_rate": 4.2346548507462694e-05, + "loss": 0.0, + "step": 65636 + }, + { + "epoch": 61.23, + "learning_rate": 4.234608208955224e-05, + "loss": 0.0052, + "step": 65640 + }, + { + "epoch": 61.24, + "learning_rate": 4.234561567164179e-05, + "loss": 0.0, + "step": 65644 + }, + { + "epoch": 61.24, + "learning_rate": 4.2345149253731345e-05, + "loss": 0.0, + "step": 65648 + }, + { + "epoch": 61.24, + "learning_rate": 4.23446828358209e-05, + "loss": 0.0001, + "step": 65652 + }, + { + "epoch": 61.25, + "learning_rate": 4.234421641791045e-05, + "loss": 0.0001, + "step": 65656 + }, + { + "epoch": 61.25, + "learning_rate": 4.234375e-05, + "loss": 0.0, + "step": 65660 + }, + { + "epoch": 61.25, + "learning_rate": 4.234328358208956e-05, + "loss": 0.0, + "step": 65664 + }, + { + "epoch": 61.26, + "learning_rate": 4.2342817164179106e-05, + "loss": 0.0, + "step": 65668 + }, + { + "epoch": 61.26, + "learning_rate": 4.234235074626866e-05, + "loss": 0.0, + "step": 65672 + }, + { + "epoch": 61.26, + "learning_rate": 4.234188432835821e-05, + "loss": 0.0, + "step": 65676 + }, + { + "epoch": 61.27, + "learning_rate": 4.2341417910447764e-05, + "loss": 0.0, + "step": 65680 + }, + { + "epoch": 61.27, + "learning_rate": 4.234095149253732e-05, + "loss": 0.0004, + "step": 65684 + }, + { + "epoch": 61.28, + "learning_rate": 4.234048507462687e-05, + "loss": 0.0002, + "step": 65688 + }, + { + "epoch": 61.28, + "learning_rate": 4.2340018656716415e-05, + "loss": 0.0, + "step": 65692 + }, + { + "epoch": 61.28, + "learning_rate": 4.2339552238805976e-05, + "loss": 0.0003, + "step": 65696 + }, + { + "epoch": 61.29, + "learning_rate": 4.2339085820895525e-05, + "loss": 0.0021, + "step": 65700 + }, + { + "epoch": 61.29, + "learning_rate": 4.233861940298507e-05, + "loss": 0.0, + "step": 65704 + }, + { + "epoch": 61.29, + "learning_rate": 4.233815298507463e-05, + "loss": 0.0009, + "step": 65708 + }, + { + "epoch": 61.3, + "learning_rate": 4.233768656716418e-05, + "loss": 0.0008, + "step": 65712 + }, + { + "epoch": 61.3, + "learning_rate": 4.233722014925373e-05, + "loss": 0.0, + "step": 65716 + }, + { + "epoch": 61.31, + "learning_rate": 4.2336753731343286e-05, + "loss": 0.0, + "step": 65720 + }, + { + "epoch": 61.31, + "learning_rate": 4.233628731343284e-05, + "loss": 0.0, + "step": 65724 + }, + { + "epoch": 61.31, + "learning_rate": 4.233582089552239e-05, + "loss": 0.0, + "step": 65728 + }, + { + "epoch": 61.32, + "learning_rate": 4.2335354477611944e-05, + "loss": 0.0003, + "step": 65732 + }, + { + "epoch": 61.32, + "learning_rate": 4.233488805970149e-05, + "loss": 0.0015, + "step": 65736 + }, + { + "epoch": 61.32, + "learning_rate": 4.2334421641791047e-05, + "loss": 0.0, + "step": 65740 + }, + { + "epoch": 61.33, + "learning_rate": 4.23339552238806e-05, + "loss": 0.002, + "step": 65744 + }, + { + "epoch": 61.33, + "learning_rate": 4.233348880597015e-05, + "loss": 0.0, + "step": 65748 + }, + { + "epoch": 61.34, + "learning_rate": 4.23330223880597e-05, + "loss": 0.0051, + "step": 65752 + }, + { + "epoch": 61.34, + "learning_rate": 4.233255597014926e-05, + "loss": 0.0007, + "step": 65756 + }, + { + "epoch": 61.34, + "learning_rate": 4.233208955223881e-05, + "loss": 0.0, + "step": 65760 + }, + { + "epoch": 61.35, + "learning_rate": 4.2331623134328356e-05, + "loss": 0.0, + "step": 65764 + }, + { + "epoch": 61.35, + "learning_rate": 4.233115671641791e-05, + "loss": 0.0, + "step": 65768 + }, + { + "epoch": 61.35, + "learning_rate": 4.2330690298507466e-05, + "loss": 0.0, + "step": 65772 + }, + { + "epoch": 61.36, + "learning_rate": 4.233022388059702e-05, + "loss": 0.0, + "step": 65776 + }, + { + "epoch": 61.36, + "learning_rate": 4.232975746268657e-05, + "loss": 0.0, + "step": 65780 + }, + { + "epoch": 61.37, + "learning_rate": 4.2329291044776123e-05, + "loss": 0.0002, + "step": 65784 + }, + { + "epoch": 61.37, + "learning_rate": 4.232882462686568e-05, + "loss": 0.0001, + "step": 65788 + }, + { + "epoch": 61.37, + "learning_rate": 4.2328358208955227e-05, + "loss": 0.0089, + "step": 65792 + }, + { + "epoch": 61.38, + "learning_rate": 4.2327891791044775e-05, + "loss": 0.0, + "step": 65796 + }, + { + "epoch": 61.38, + "learning_rate": 4.232742537313433e-05, + "loss": 0.0, + "step": 65800 + }, + { + "epoch": 61.38, + "learning_rate": 4.2326958955223884e-05, + "loss": 0.0, + "step": 65804 + }, + { + "epoch": 61.39, + "learning_rate": 4.232649253731343e-05, + "loss": 0.0003, + "step": 65808 + }, + { + "epoch": 61.39, + "learning_rate": 4.232602611940299e-05, + "loss": 0.0, + "step": 65812 + }, + { + "epoch": 61.4, + "learning_rate": 4.232555970149254e-05, + "loss": 0.0, + "step": 65816 + }, + { + "epoch": 61.4, + "learning_rate": 4.232509328358209e-05, + "loss": 0.0, + "step": 65820 + }, + { + "epoch": 61.4, + "learning_rate": 4.2324626865671645e-05, + "loss": 0.0001, + "step": 65824 + }, + { + "epoch": 61.41, + "learning_rate": 4.2324160447761194e-05, + "loss": 0.0, + "step": 65828 + }, + { + "epoch": 61.41, + "learning_rate": 4.232369402985075e-05, + "loss": 0.0, + "step": 65832 + }, + { + "epoch": 61.41, + "learning_rate": 4.23232276119403e-05, + "loss": 0.0011, + "step": 65836 + }, + { + "epoch": 61.42, + "learning_rate": 4.232276119402985e-05, + "loss": 0.0, + "step": 65840 + }, + { + "epoch": 61.42, + "learning_rate": 4.2322294776119406e-05, + "loss": 0.0, + "step": 65844 + }, + { + "epoch": 61.43, + "learning_rate": 4.232182835820896e-05, + "loss": 0.0, + "step": 65848 + }, + { + "epoch": 61.43, + "learning_rate": 4.232136194029851e-05, + "loss": 0.0, + "step": 65852 + }, + { + "epoch": 61.43, + "learning_rate": 4.232089552238806e-05, + "loss": 0.0001, + "step": 65856 + }, + { + "epoch": 61.44, + "learning_rate": 4.232042910447761e-05, + "loss": 0.0, + "step": 65860 + }, + { + "epoch": 61.44, + "learning_rate": 4.231996268656717e-05, + "loss": 0.0, + "step": 65864 + }, + { + "epoch": 61.44, + "learning_rate": 4.2319496268656716e-05, + "loss": 0.0, + "step": 65868 + }, + { + "epoch": 61.45, + "learning_rate": 4.231902985074627e-05, + "loss": 0.0, + "step": 65872 + }, + { + "epoch": 61.45, + "learning_rate": 4.2318563432835825e-05, + "loss": 0.0, + "step": 65876 + }, + { + "epoch": 61.46, + "learning_rate": 4.2318097014925373e-05, + "loss": 0.0, + "step": 65880 + }, + { + "epoch": 61.46, + "learning_rate": 4.231763059701493e-05, + "loss": 0.0001, + "step": 65884 + }, + { + "epoch": 61.46, + "learning_rate": 4.2317164179104477e-05, + "loss": 0.0, + "step": 65888 + }, + { + "epoch": 61.47, + "learning_rate": 4.231669776119403e-05, + "loss": 0.0, + "step": 65892 + }, + { + "epoch": 61.47, + "learning_rate": 4.2316231343283586e-05, + "loss": 0.0, + "step": 65896 + }, + { + "epoch": 61.47, + "learning_rate": 4.2315764925373134e-05, + "loss": 0.0, + "step": 65900 + }, + { + "epoch": 61.48, + "learning_rate": 4.231529850746269e-05, + "loss": 0.0, + "step": 65904 + }, + { + "epoch": 61.48, + "learning_rate": 4.2314832089552244e-05, + "loss": 0.0, + "step": 65908 + }, + { + "epoch": 61.49, + "learning_rate": 4.231436567164179e-05, + "loss": 0.0, + "step": 65912 + }, + { + "epoch": 61.49, + "learning_rate": 4.231389925373134e-05, + "loss": 0.0, + "step": 65916 + }, + { + "epoch": 61.49, + "learning_rate": 4.2313432835820895e-05, + "loss": 0.0, + "step": 65920 + }, + { + "epoch": 61.5, + "learning_rate": 4.231296641791045e-05, + "loss": 0.0, + "step": 65924 + }, + { + "epoch": 61.5, + "learning_rate": 4.23125e-05, + "loss": 0.0, + "step": 65928 + }, + { + "epoch": 61.5, + "learning_rate": 4.2312033582089553e-05, + "loss": 0.0, + "step": 65932 + }, + { + "epoch": 61.51, + "learning_rate": 4.231156716417911e-05, + "loss": 0.0089, + "step": 65936 + }, + { + "epoch": 61.51, + "learning_rate": 4.231110074626866e-05, + "loss": 0.0, + "step": 65940 + }, + { + "epoch": 61.51, + "learning_rate": 4.231063432835821e-05, + "loss": 0.0, + "step": 65944 + }, + { + "epoch": 61.52, + "learning_rate": 4.231016791044776e-05, + "loss": 0.0005, + "step": 65948 + }, + { + "epoch": 61.52, + "learning_rate": 4.230970149253732e-05, + "loss": 0.0, + "step": 65952 + }, + { + "epoch": 61.53, + "learning_rate": 4.230923507462687e-05, + "loss": 0.0, + "step": 65956 + }, + { + "epoch": 61.53, + "learning_rate": 4.230876865671642e-05, + "loss": 0.0, + "step": 65960 + }, + { + "epoch": 61.53, + "learning_rate": 4.230830223880597e-05, + "loss": 0.0, + "step": 65964 + }, + { + "epoch": 61.54, + "learning_rate": 4.230783582089553e-05, + "loss": 0.0, + "step": 65968 + }, + { + "epoch": 61.54, + "learning_rate": 4.2307369402985075e-05, + "loss": 0.0, + "step": 65972 + }, + { + "epoch": 61.54, + "learning_rate": 4.230690298507463e-05, + "loss": 0.0, + "step": 65976 + }, + { + "epoch": 61.55, + "learning_rate": 4.230643656716418e-05, + "loss": 0.0, + "step": 65980 + }, + { + "epoch": 61.55, + "learning_rate": 4.230597014925373e-05, + "loss": 0.001, + "step": 65984 + }, + { + "epoch": 61.56, + "learning_rate": 4.230550373134329e-05, + "loss": 0.0, + "step": 65988 + }, + { + "epoch": 61.56, + "learning_rate": 4.2305037313432836e-05, + "loss": 0.0, + "step": 65992 + }, + { + "epoch": 61.56, + "learning_rate": 4.230457089552239e-05, + "loss": 0.0001, + "step": 65996 + }, + { + "epoch": 61.57, + "learning_rate": 4.2304104477611946e-05, + "loss": 0.0, + "step": 66000 + }, + { + "epoch": 61.57, + "eval_exact_match": 0.7446808510638298, + "eval_exec": 0.7775628626692457, + "eval_loss": 0.4943001866340637, + "eval_runtime": 1085.5598, + "eval_samples_per_second": 0.953, + "step": 66000 + }, + { + "epoch": 61.57, + "learning_rate": 4.2303638059701494e-05, + "loss": 0.0, + "step": 66004 + }, + { + "epoch": 61.57, + "learning_rate": 4.230317164179104e-05, + "loss": 0.0, + "step": 66008 + }, + { + "epoch": 61.58, + "learning_rate": 4.2302705223880604e-05, + "loss": 0.0, + "step": 66012 + }, + { + "epoch": 61.58, + "learning_rate": 4.230223880597015e-05, + "loss": 0.0, + "step": 66016 + }, + { + "epoch": 61.59, + "learning_rate": 4.23017723880597e-05, + "loss": 0.002, + "step": 66020 + }, + { + "epoch": 61.59, + "learning_rate": 4.2301305970149255e-05, + "loss": 0.0001, + "step": 66024 + }, + { + "epoch": 61.59, + "learning_rate": 4.230083955223881e-05, + "loss": 0.0, + "step": 66028 + }, + { + "epoch": 61.6, + "learning_rate": 4.230037313432836e-05, + "loss": 0.0, + "step": 66032 + }, + { + "epoch": 61.6, + "learning_rate": 4.229990671641791e-05, + "loss": 0.0005, + "step": 66036 + }, + { + "epoch": 61.6, + "learning_rate": 4.229944029850746e-05, + "loss": 0.0, + "step": 66040 + }, + { + "epoch": 61.61, + "learning_rate": 4.2298973880597016e-05, + "loss": 0.0, + "step": 66044 + }, + { + "epoch": 61.61, + "learning_rate": 4.229850746268657e-05, + "loss": 0.0, + "step": 66048 + }, + { + "epoch": 61.62, + "learning_rate": 4.229804104477612e-05, + "loss": 0.0, + "step": 66052 + }, + { + "epoch": 61.62, + "learning_rate": 4.2297574626865674e-05, + "loss": 0.0, + "step": 66056 + }, + { + "epoch": 61.62, + "learning_rate": 4.229710820895523e-05, + "loss": 0.0, + "step": 66060 + }, + { + "epoch": 61.63, + "learning_rate": 4.229664179104478e-05, + "loss": 0.0002, + "step": 66064 + }, + { + "epoch": 61.63, + "learning_rate": 4.2296175373134325e-05, + "loss": 0.0, + "step": 66068 + }, + { + "epoch": 61.63, + "learning_rate": 4.229570895522389e-05, + "loss": 0.0, + "step": 66072 + }, + { + "epoch": 61.64, + "learning_rate": 4.2295242537313435e-05, + "loss": 0.0, + "step": 66076 + }, + { + "epoch": 61.64, + "learning_rate": 4.229477611940298e-05, + "loss": 0.0, + "step": 66080 + }, + { + "epoch": 61.65, + "learning_rate": 4.229430970149254e-05, + "loss": 0.0, + "step": 66084 + }, + { + "epoch": 61.65, + "learning_rate": 4.229384328358209e-05, + "loss": 0.0, + "step": 66088 + }, + { + "epoch": 61.65, + "learning_rate": 4.229337686567164e-05, + "loss": 0.0, + "step": 66092 + }, + { + "epoch": 61.66, + "learning_rate": 4.2292910447761196e-05, + "loss": 0.0, + "step": 66096 + }, + { + "epoch": 61.66, + "learning_rate": 4.2292444029850744e-05, + "loss": 0.0, + "step": 66100 + }, + { + "epoch": 61.66, + "learning_rate": 4.2291977611940306e-05, + "loss": 0.005, + "step": 66104 + }, + { + "epoch": 61.67, + "learning_rate": 4.2291511194029854e-05, + "loss": 0.0, + "step": 66108 + }, + { + "epoch": 61.67, + "learning_rate": 4.22910447761194e-05, + "loss": 0.0, + "step": 66112 + }, + { + "epoch": 61.68, + "learning_rate": 4.229057835820896e-05, + "loss": 0.0, + "step": 66116 + }, + { + "epoch": 61.68, + "learning_rate": 4.229011194029851e-05, + "loss": 0.0, + "step": 66120 + }, + { + "epoch": 61.68, + "learning_rate": 4.228964552238806e-05, + "loss": 0.0, + "step": 66124 + }, + { + "epoch": 61.69, + "learning_rate": 4.2289179104477615e-05, + "loss": 0.0, + "step": 66128 + }, + { + "epoch": 61.69, + "learning_rate": 4.228871268656717e-05, + "loss": 0.0, + "step": 66132 + }, + { + "epoch": 61.69, + "learning_rate": 4.228824626865672e-05, + "loss": 0.0309, + "step": 66136 + }, + { + "epoch": 61.7, + "learning_rate": 4.228777985074627e-05, + "loss": 0.0, + "step": 66140 + }, + { + "epoch": 61.7, + "learning_rate": 4.228731343283582e-05, + "loss": 0.0, + "step": 66144 + }, + { + "epoch": 61.71, + "learning_rate": 4.2286847014925376e-05, + "loss": 0.0, + "step": 66148 + }, + { + "epoch": 61.71, + "learning_rate": 4.228638059701493e-05, + "loss": 0.0, + "step": 66152 + }, + { + "epoch": 61.71, + "learning_rate": 4.228591417910448e-05, + "loss": 0.0, + "step": 66156 + }, + { + "epoch": 61.72, + "learning_rate": 4.228544776119403e-05, + "loss": 0.0, + "step": 66160 + }, + { + "epoch": 61.72, + "learning_rate": 4.228498134328359e-05, + "loss": 0.0, + "step": 66164 + }, + { + "epoch": 61.72, + "learning_rate": 4.228451492537314e-05, + "loss": 0.0, + "step": 66168 + }, + { + "epoch": 61.73, + "learning_rate": 4.2284048507462685e-05, + "loss": 0.0, + "step": 66172 + }, + { + "epoch": 61.73, + "learning_rate": 4.228358208955224e-05, + "loss": 0.0, + "step": 66176 + }, + { + "epoch": 61.73, + "learning_rate": 4.2283115671641795e-05, + "loss": 0.0, + "step": 66180 + }, + { + "epoch": 61.74, + "learning_rate": 4.228264925373134e-05, + "loss": 0.0, + "step": 66184 + }, + { + "epoch": 61.74, + "learning_rate": 4.22821828358209e-05, + "loss": 0.0, + "step": 66188 + }, + { + "epoch": 61.75, + "learning_rate": 4.228171641791045e-05, + "loss": 0.0, + "step": 66192 + }, + { + "epoch": 61.75, + "learning_rate": 4.228125e-05, + "loss": 0.0001, + "step": 66196 + }, + { + "epoch": 61.75, + "learning_rate": 4.2280783582089556e-05, + "loss": 0.0, + "step": 66200 + }, + { + "epoch": 61.76, + "learning_rate": 4.2280317164179104e-05, + "loss": 0.0, + "step": 66204 + }, + { + "epoch": 61.76, + "learning_rate": 4.227985074626866e-05, + "loss": 0.0, + "step": 66208 + }, + { + "epoch": 61.76, + "learning_rate": 4.2279384328358214e-05, + "loss": 0.0, + "step": 66212 + }, + { + "epoch": 61.77, + "learning_rate": 4.227891791044776e-05, + "loss": 0.0, + "step": 66216 + }, + { + "epoch": 61.77, + "learning_rate": 4.227845149253731e-05, + "loss": 0.0, + "step": 66220 + }, + { + "epoch": 61.78, + "learning_rate": 4.227798507462687e-05, + "loss": 0.0, + "step": 66224 + }, + { + "epoch": 61.78, + "learning_rate": 4.227751865671642e-05, + "loss": 0.0, + "step": 66228 + }, + { + "epoch": 61.78, + "learning_rate": 4.227705223880597e-05, + "loss": 0.0, + "step": 66232 + }, + { + "epoch": 61.79, + "learning_rate": 4.227658582089552e-05, + "loss": 0.0, + "step": 66236 + }, + { + "epoch": 61.79, + "learning_rate": 4.227611940298508e-05, + "loss": 0.0, + "step": 66240 + }, + { + "epoch": 61.79, + "learning_rate": 4.2275652985074626e-05, + "loss": 0.0, + "step": 66244 + }, + { + "epoch": 61.8, + "learning_rate": 4.227518656716418e-05, + "loss": 0.0011, + "step": 66248 + }, + { + "epoch": 61.8, + "learning_rate": 4.2274720149253736e-05, + "loss": 0.0, + "step": 66252 + }, + { + "epoch": 61.81, + "learning_rate": 4.2274253731343284e-05, + "loss": 0.0, + "step": 66256 + }, + { + "epoch": 61.81, + "learning_rate": 4.227378731343284e-05, + "loss": 0.0001, + "step": 66260 + }, + { + "epoch": 61.81, + "learning_rate": 4.227332089552239e-05, + "loss": 0.0, + "step": 66264 + }, + { + "epoch": 61.82, + "learning_rate": 4.227285447761194e-05, + "loss": 0.0, + "step": 66268 + }, + { + "epoch": 61.82, + "learning_rate": 4.22723880597015e-05, + "loss": 0.0, + "step": 66272 + }, + { + "epoch": 61.82, + "learning_rate": 4.2271921641791045e-05, + "loss": 0.0007, + "step": 66276 + }, + { + "epoch": 61.83, + "learning_rate": 4.22714552238806e-05, + "loss": 0.0, + "step": 66280 + }, + { + "epoch": 61.83, + "learning_rate": 4.2270988805970155e-05, + "loss": 0.0111, + "step": 66284 + }, + { + "epoch": 61.84, + "learning_rate": 4.22705223880597e-05, + "loss": 0.0006, + "step": 66288 + }, + { + "epoch": 61.84, + "learning_rate": 4.227005597014926e-05, + "loss": 0.0, + "step": 66292 + }, + { + "epoch": 61.84, + "learning_rate": 4.2269589552238806e-05, + "loss": 0.0, + "step": 66296 + }, + { + "epoch": 61.85, + "learning_rate": 4.226912313432836e-05, + "loss": 0.0, + "step": 66300 + }, + { + "epoch": 61.85, + "learning_rate": 4.2268656716417916e-05, + "loss": 0.0, + "step": 66304 + }, + { + "epoch": 61.85, + "learning_rate": 4.2268190298507464e-05, + "loss": 0.0001, + "step": 66308 + }, + { + "epoch": 61.86, + "learning_rate": 4.226772388059701e-05, + "loss": 0.0029, + "step": 66312 + }, + { + "epoch": 61.86, + "learning_rate": 4.2267257462686574e-05, + "loss": 0.0003, + "step": 66316 + }, + { + "epoch": 61.87, + "learning_rate": 4.226679104477612e-05, + "loss": 0.0002, + "step": 66320 + }, + { + "epoch": 61.87, + "learning_rate": 4.226632462686567e-05, + "loss": 0.0002, + "step": 66324 + }, + { + "epoch": 61.87, + "learning_rate": 4.2265858208955225e-05, + "loss": 0.0001, + "step": 66328 + }, + { + "epoch": 61.88, + "learning_rate": 4.226539179104478e-05, + "loss": 0.0, + "step": 66332 + }, + { + "epoch": 61.88, + "learning_rate": 4.226492537313433e-05, + "loss": 0.0, + "step": 66336 + }, + { + "epoch": 61.88, + "learning_rate": 4.226445895522388e-05, + "loss": 0.0, + "step": 66340 + }, + { + "epoch": 61.89, + "learning_rate": 4.226399253731344e-05, + "loss": 0.0, + "step": 66344 + }, + { + "epoch": 61.89, + "learning_rate": 4.2263526119402986e-05, + "loss": 0.0002, + "step": 66348 + }, + { + "epoch": 61.9, + "learning_rate": 4.226305970149254e-05, + "loss": 0.0, + "step": 66352 + }, + { + "epoch": 61.9, + "learning_rate": 4.226259328358209e-05, + "loss": 0.0, + "step": 66356 + }, + { + "epoch": 61.9, + "learning_rate": 4.2262126865671644e-05, + "loss": 0.0009, + "step": 66360 + }, + { + "epoch": 61.91, + "learning_rate": 4.22616604477612e-05, + "loss": 0.0, + "step": 66364 + }, + { + "epoch": 61.91, + "learning_rate": 4.226119402985075e-05, + "loss": 0.0, + "step": 66368 + }, + { + "epoch": 61.91, + "learning_rate": 4.2260727611940295e-05, + "loss": 0.0, + "step": 66372 + }, + { + "epoch": 61.92, + "learning_rate": 4.226026119402986e-05, + "loss": 0.0, + "step": 66376 + }, + { + "epoch": 61.92, + "learning_rate": 4.2259794776119405e-05, + "loss": 0.0, + "step": 66380 + }, + { + "epoch": 61.93, + "learning_rate": 4.225932835820895e-05, + "loss": 0.0012, + "step": 66384 + }, + { + "epoch": 61.93, + "learning_rate": 4.225886194029851e-05, + "loss": 0.0, + "step": 66388 + }, + { + "epoch": 61.93, + "learning_rate": 4.225839552238806e-05, + "loss": 0.0, + "step": 66392 + }, + { + "epoch": 61.94, + "learning_rate": 4.225792910447761e-05, + "loss": 0.0, + "step": 66396 + }, + { + "epoch": 61.94, + "learning_rate": 4.2257462686567166e-05, + "loss": 0.0, + "step": 66400 + }, + { + "epoch": 61.94, + "learning_rate": 4.225699626865672e-05, + "loss": 0.0, + "step": 66404 + }, + { + "epoch": 61.95, + "learning_rate": 4.225652985074627e-05, + "loss": 0.0, + "step": 66408 + }, + { + "epoch": 61.95, + "learning_rate": 4.2256063432835824e-05, + "loss": 0.0005, + "step": 66412 + }, + { + "epoch": 61.96, + "learning_rate": 4.225559701492537e-05, + "loss": 0.0, + "step": 66416 + }, + { + "epoch": 61.96, + "learning_rate": 4.225513059701493e-05, + "loss": 0.0, + "step": 66420 + }, + { + "epoch": 61.96, + "learning_rate": 4.225466417910448e-05, + "loss": 0.0, + "step": 66424 + }, + { + "epoch": 61.97, + "learning_rate": 4.225419776119403e-05, + "loss": 0.0, + "step": 66428 + }, + { + "epoch": 61.97, + "learning_rate": 4.2253731343283585e-05, + "loss": 0.0, + "step": 66432 + }, + { + "epoch": 61.97, + "learning_rate": 4.225326492537314e-05, + "loss": 0.0, + "step": 66436 + }, + { + "epoch": 61.98, + "learning_rate": 4.225279850746269e-05, + "loss": 0.0, + "step": 66440 + }, + { + "epoch": 61.98, + "learning_rate": 4.225233208955224e-05, + "loss": 0.0, + "step": 66444 + }, + { + "epoch": 61.98, + "learning_rate": 4.225186567164179e-05, + "loss": 0.0, + "step": 66448 + }, + { + "epoch": 61.99, + "learning_rate": 4.2251399253731346e-05, + "loss": 0.0004, + "step": 66452 + }, + { + "epoch": 61.99, + "learning_rate": 4.22509328358209e-05, + "loss": 0.0001, + "step": 66456 + }, + { + "epoch": 62.0, + "learning_rate": 4.225046641791045e-05, + "loss": 0.0, + "step": 66460 + }, + { + "epoch": 62.0, + "learning_rate": 4.2250000000000004e-05, + "loss": 0.0, + "step": 66464 + }, + { + "epoch": 62.0, + "learning_rate": 4.224953358208956e-05, + "loss": 0.0, + "step": 66468 + }, + { + "epoch": 62.01, + "learning_rate": 4.224906716417911e-05, + "loss": 0.0, + "step": 66472 + }, + { + "epoch": 62.01, + "learning_rate": 4.2248600746268655e-05, + "loss": 0.0001, + "step": 66476 + }, + { + "epoch": 62.01, + "learning_rate": 4.224813432835821e-05, + "loss": 0.0, + "step": 66480 + }, + { + "epoch": 62.02, + "learning_rate": 4.2247667910447765e-05, + "loss": 0.0019, + "step": 66484 + }, + { + "epoch": 62.02, + "learning_rate": 4.224720149253731e-05, + "loss": 0.0, + "step": 66488 + }, + { + "epoch": 62.03, + "learning_rate": 4.224673507462687e-05, + "loss": 0.0, + "step": 66492 + }, + { + "epoch": 62.03, + "learning_rate": 4.224626865671642e-05, + "loss": 0.0, + "step": 66496 + }, + { + "epoch": 62.03, + "learning_rate": 4.224580223880597e-05, + "loss": 0.0, + "step": 66500 + }, + { + "epoch": 62.03, + "eval_exact_match": 0.7562862669245648, + "eval_exec": 0.7823984526112185, + "eval_loss": 0.4995794892311096, + "eval_runtime": 1097.5777, + "eval_samples_per_second": 0.942, + "step": 66500 + } + ], + "max_steps": 428800, + "num_train_epochs": 400, + "total_flos": 2.2981425097872384e+18, + "trial_name": null, + "trial_params": null +}