{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 203000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 4.9876847290640394e-05, "loss": 2.3164, "step": 500 }, { "epoch": 0.49, "learning_rate": 4.975369458128079e-05, "loss": 2.0051, "step": 1000 }, { "epoch": 0.74, "learning_rate": 4.9630541871921185e-05, "loss": 1.8659, "step": 1500 }, { "epoch": 0.99, "learning_rate": 4.950738916256158e-05, "loss": 1.7825, "step": 2000 }, { "epoch": 1.23, "learning_rate": 4.938423645320197e-05, "loss": 1.723, "step": 2500 }, { "epoch": 1.48, "learning_rate": 4.926108374384237e-05, "loss": 1.6874, "step": 3000 }, { "epoch": 1.72, "learning_rate": 4.913793103448276e-05, "loss": 1.6599, "step": 3500 }, { "epoch": 1.97, "learning_rate": 4.901477832512316e-05, "loss": 1.631, "step": 4000 }, { "epoch": 2.22, "learning_rate": 4.889162561576355e-05, "loss": 1.6082, "step": 4500 }, { "epoch": 2.46, "learning_rate": 4.876847290640394e-05, "loss": 1.5941, "step": 5000 }, { "epoch": 2.71, "learning_rate": 4.8645320197044334e-05, "loss": 1.5733, "step": 5500 }, { "epoch": 2.96, "learning_rate": 4.852216748768473e-05, "loss": 1.5622, "step": 6000 }, { "epoch": 3.2, "learning_rate": 4.8399014778325125e-05, "loss": 1.5521, "step": 6500 }, { "epoch": 3.45, "learning_rate": 4.827586206896552e-05, "loss": 1.54, "step": 7000 }, { "epoch": 3.69, "learning_rate": 4.8152709359605915e-05, "loss": 1.5182, "step": 7500 }, { "epoch": 3.94, "learning_rate": 4.802955665024631e-05, "loss": 1.5135, "step": 8000 }, { "epoch": 4.19, "learning_rate": 4.79064039408867e-05, "loss": 1.5018, "step": 8500 }, { "epoch": 4.43, "learning_rate": 4.77832512315271e-05, "loss": 1.4859, "step": 9000 }, { "epoch": 4.68, "learning_rate": 4.766009852216749e-05, "loss": 1.4816, "step": 9500 }, { "epoch": 4.93, "learning_rate": 4.753694581280788e-05, "loss": 1.4653, "step": 10000 }, { "epoch": 5.17, "learning_rate": 4.741379310344828e-05, "loss": 1.4564, "step": 10500 }, { "epoch": 5.42, "learning_rate": 4.729064039408867e-05, "loss": 1.4477, "step": 11000 }, { "epoch": 5.67, "learning_rate": 4.7167487684729064e-05, "loss": 1.4335, "step": 11500 }, { "epoch": 5.91, "learning_rate": 4.7044334975369456e-05, "loss": 1.43, "step": 12000 }, { "epoch": 6.16, "learning_rate": 4.6921182266009855e-05, "loss": 1.418, "step": 12500 }, { "epoch": 6.4, "learning_rate": 4.679802955665025e-05, "loss": 1.407, "step": 13000 }, { "epoch": 6.65, "learning_rate": 4.6674876847290645e-05, "loss": 1.4022, "step": 13500 }, { "epoch": 6.9, "learning_rate": 4.655172413793104e-05, "loss": 1.3932, "step": 14000 }, { "epoch": 7.14, "learning_rate": 4.642857142857143e-05, "loss": 1.3844, "step": 14500 }, { "epoch": 7.39, "learning_rate": 4.630541871921182e-05, "loss": 1.3764, "step": 15000 }, { "epoch": 7.64, "learning_rate": 4.618226600985222e-05, "loss": 1.3708, "step": 15500 }, { "epoch": 7.88, "learning_rate": 4.605911330049261e-05, "loss": 1.3614, "step": 16000 }, { "epoch": 8.13, "learning_rate": 4.593596059113301e-05, "loss": 1.3515, "step": 16500 }, { "epoch": 8.37, "learning_rate": 4.58128078817734e-05, "loss": 1.3433, "step": 17000 }, { "epoch": 8.62, "learning_rate": 4.5689655172413794e-05, "loss": 1.3365, "step": 17500 }, { "epoch": 8.87, "learning_rate": 4.5566502463054186e-05, "loss": 1.332, "step": 18000 }, { "epoch": 9.11, "learning_rate": 4.544334975369458e-05, "loss": 1.328, "step": 18500 }, { "epoch": 9.36, "learning_rate": 4.532019704433498e-05, "loss": 1.3269, "step": 19000 }, { "epoch": 9.61, "learning_rate": 4.519704433497537e-05, "loss": 1.3189, "step": 19500 }, { "epoch": 9.85, "learning_rate": 4.507389162561577e-05, "loss": 1.3119, "step": 20000 }, { "epoch": 10.1, "learning_rate": 4.495073891625616e-05, "loss": 1.3072, "step": 20500 }, { "epoch": 10.34, "learning_rate": 4.482758620689655e-05, "loss": 1.3027, "step": 21000 }, { "epoch": 10.59, "learning_rate": 4.4704433497536943e-05, "loss": 1.3003, "step": 21500 }, { "epoch": 10.84, "learning_rate": 4.458128078817734e-05, "loss": 1.2961, "step": 22000 }, { "epoch": 11.08, "learning_rate": 4.4458128078817734e-05, "loss": 1.2894, "step": 22500 }, { "epoch": 11.33, "learning_rate": 4.433497536945813e-05, "loss": 1.291, "step": 23000 }, { "epoch": 11.58, "learning_rate": 4.4211822660098525e-05, "loss": 1.2802, "step": 23500 }, { "epoch": 11.82, "learning_rate": 4.408866995073892e-05, "loss": 1.2777, "step": 24000 }, { "epoch": 12.07, "learning_rate": 4.396551724137931e-05, "loss": 1.2764, "step": 24500 }, { "epoch": 12.32, "learning_rate": 4.384236453201971e-05, "loss": 1.2685, "step": 25000 }, { "epoch": 12.56, "learning_rate": 4.37192118226601e-05, "loss": 1.2678, "step": 25500 }, { "epoch": 12.81, "learning_rate": 4.35960591133005e-05, "loss": 1.2634, "step": 26000 }, { "epoch": 13.05, "learning_rate": 4.347290640394089e-05, "loss": 1.2644, "step": 26500 }, { "epoch": 13.3, "learning_rate": 4.334975369458129e-05, "loss": 1.2507, "step": 27000 }, { "epoch": 13.55, "learning_rate": 4.3226600985221674e-05, "loss": 1.2557, "step": 27500 }, { "epoch": 13.79, "learning_rate": 4.3103448275862066e-05, "loss": 1.248, "step": 28000 }, { "epoch": 14.04, "learning_rate": 4.2980295566502464e-05, "loss": 1.249, "step": 28500 }, { "epoch": 14.29, "learning_rate": 4.2857142857142856e-05, "loss": 1.2395, "step": 29000 }, { "epoch": 14.53, "learning_rate": 4.2733990147783255e-05, "loss": 1.2419, "step": 29500 }, { "epoch": 14.78, "learning_rate": 4.261083743842365e-05, "loss": 1.2405, "step": 30000 }, { "epoch": 15.02, "learning_rate": 4.2487684729064046e-05, "loss": 1.2395, "step": 30500 }, { "epoch": 15.27, "learning_rate": 4.236453201970443e-05, "loss": 1.2332, "step": 31000 }, { "epoch": 15.52, "learning_rate": 4.224137931034483e-05, "loss": 1.229, "step": 31500 }, { "epoch": 15.76, "learning_rate": 4.211822660098522e-05, "loss": 1.2324, "step": 32000 }, { "epoch": 16.01, "learning_rate": 4.199507389162562e-05, "loss": 1.2274, "step": 32500 }, { "epoch": 16.26, "learning_rate": 4.187192118226601e-05, "loss": 1.2168, "step": 33000 }, { "epoch": 16.5, "learning_rate": 4.174876847290641e-05, "loss": 1.2198, "step": 33500 }, { "epoch": 16.75, "learning_rate": 4.1625615763546796e-05, "loss": 1.2205, "step": 34000 }, { "epoch": 17.0, "learning_rate": 4.1502463054187195e-05, "loss": 1.2136, "step": 34500 }, { "epoch": 17.24, "learning_rate": 4.1379310344827587e-05, "loss": 1.2102, "step": 35000 }, { "epoch": 17.49, "learning_rate": 4.1256157635467985e-05, "loss": 1.2102, "step": 35500 }, { "epoch": 17.73, "learning_rate": 4.113300492610838e-05, "loss": 1.2068, "step": 36000 }, { "epoch": 17.98, "learning_rate": 4.1009852216748776e-05, "loss": 1.204, "step": 36500 }, { "epoch": 18.23, "learning_rate": 4.088669950738917e-05, "loss": 1.2014, "step": 37000 }, { "epoch": 18.47, "learning_rate": 4.076354679802955e-05, "loss": 1.1988, "step": 37500 }, { "epoch": 18.72, "learning_rate": 4.064039408866995e-05, "loss": 1.1986, "step": 38000 }, { "epoch": 18.97, "learning_rate": 4.0517241379310344e-05, "loss": 1.1939, "step": 38500 }, { "epoch": 19.21, "learning_rate": 4.039408866995074e-05, "loss": 1.1923, "step": 39000 }, { "epoch": 19.46, "learning_rate": 4.0270935960591134e-05, "loss": 1.1919, "step": 39500 }, { "epoch": 19.7, "learning_rate": 4.014778325123153e-05, "loss": 1.1883, "step": 40000 }, { "epoch": 19.95, "learning_rate": 4.002463054187192e-05, "loss": 1.1878, "step": 40500 }, { "epoch": 20.2, "learning_rate": 3.990147783251232e-05, "loss": 1.1851, "step": 41000 }, { "epoch": 20.44, "learning_rate": 3.977832512315271e-05, "loss": 1.184, "step": 41500 }, { "epoch": 20.69, "learning_rate": 3.965517241379311e-05, "loss": 1.1791, "step": 42000 }, { "epoch": 20.94, "learning_rate": 3.95320197044335e-05, "loss": 1.1833, "step": 42500 }, { "epoch": 21.18, "learning_rate": 3.94088669950739e-05, "loss": 1.1757, "step": 43000 }, { "epoch": 21.43, "learning_rate": 3.928571428571429e-05, "loss": 1.176, "step": 43500 }, { "epoch": 21.67, "learning_rate": 3.916256157635468e-05, "loss": 1.1756, "step": 44000 }, { "epoch": 21.92, "learning_rate": 3.9039408866995074e-05, "loss": 1.1698, "step": 44500 }, { "epoch": 22.17, "learning_rate": 3.891625615763547e-05, "loss": 1.1689, "step": 45000 }, { "epoch": 22.41, "learning_rate": 3.8793103448275865e-05, "loss": 1.1655, "step": 45500 }, { "epoch": 22.66, "learning_rate": 3.866995073891626e-05, "loss": 1.1627, "step": 46000 }, { "epoch": 22.91, "learning_rate": 3.8546798029556655e-05, "loss": 1.1638, "step": 46500 }, { "epoch": 23.15, "learning_rate": 3.842364532019704e-05, "loss": 1.1609, "step": 47000 }, { "epoch": 23.4, "learning_rate": 3.830049261083744e-05, "loss": 1.1555, "step": 47500 }, { "epoch": 23.65, "learning_rate": 3.817733990147783e-05, "loss": 1.1593, "step": 48000 }, { "epoch": 23.89, "learning_rate": 3.805418719211823e-05, "loss": 1.1501, "step": 48500 }, { "epoch": 24.14, "learning_rate": 3.793103448275862e-05, "loss": 1.152, "step": 49000 }, { "epoch": 24.38, "learning_rate": 3.780788177339902e-05, "loss": 1.1482, "step": 49500 }, { "epoch": 24.63, "learning_rate": 3.768472906403941e-05, "loss": 1.1492, "step": 50000 }, { "epoch": 24.88, "learning_rate": 3.7561576354679804e-05, "loss": 1.1495, "step": 50500 }, { "epoch": 25.12, "learning_rate": 3.7438423645320196e-05, "loss": 1.1452, "step": 51000 }, { "epoch": 25.37, "learning_rate": 3.7315270935960595e-05, "loss": 1.1436, "step": 51500 }, { "epoch": 25.62, "learning_rate": 3.719211822660099e-05, "loss": 1.1429, "step": 52000 }, { "epoch": 25.86, "learning_rate": 3.7068965517241385e-05, "loss": 1.1409, "step": 52500 }, { "epoch": 26.11, "learning_rate": 3.694581280788178e-05, "loss": 1.1393, "step": 53000 }, { "epoch": 26.35, "learning_rate": 3.682266009852217e-05, "loss": 1.1333, "step": 53500 }, { "epoch": 26.6, "learning_rate": 3.669950738916256e-05, "loss": 1.1365, "step": 54000 }, { "epoch": 26.85, "learning_rate": 3.657635467980296e-05, "loss": 1.1314, "step": 54500 }, { "epoch": 27.09, "learning_rate": 3.645320197044335e-05, "loss": 1.1284, "step": 55000 }, { "epoch": 27.34, "learning_rate": 3.6330049261083744e-05, "loss": 1.1247, "step": 55500 }, { "epoch": 27.59, "learning_rate": 3.620689655172414e-05, "loss": 1.1283, "step": 56000 }, { "epoch": 27.83, "learning_rate": 3.6083743842364534e-05, "loss": 1.1269, "step": 56500 }, { "epoch": 28.08, "learning_rate": 3.5960591133004926e-05, "loss": 1.1239, "step": 57000 }, { "epoch": 28.33, "learning_rate": 3.583743842364532e-05, "loss": 1.1199, "step": 57500 }, { "epoch": 28.57, "learning_rate": 3.571428571428572e-05, "loss": 1.12, "step": 58000 }, { "epoch": 28.82, "learning_rate": 3.559113300492611e-05, "loss": 1.1196, "step": 58500 }, { "epoch": 29.06, "learning_rate": 3.546798029556651e-05, "loss": 1.1197, "step": 59000 }, { "epoch": 29.31, "learning_rate": 3.53448275862069e-05, "loss": 1.1121, "step": 59500 }, { "epoch": 29.56, "learning_rate": 3.522167487684729e-05, "loss": 1.1151, "step": 60000 }, { "epoch": 29.8, "learning_rate": 3.5098522167487683e-05, "loss": 1.1088, "step": 60500 }, { "epoch": 30.05, "learning_rate": 3.497536945812808e-05, "loss": 1.1123, "step": 61000 }, { "epoch": 30.3, "learning_rate": 3.4852216748768474e-05, "loss": 1.1064, "step": 61500 }, { "epoch": 30.54, "learning_rate": 3.472906403940887e-05, "loss": 1.1125, "step": 62000 }, { "epoch": 30.79, "learning_rate": 3.4605911330049265e-05, "loss": 1.1016, "step": 62500 }, { "epoch": 31.03, "learning_rate": 3.4482758620689657e-05, "loss": 1.1071, "step": 63000 }, { "epoch": 31.28, "learning_rate": 3.435960591133005e-05, "loss": 1.1043, "step": 63500 }, { "epoch": 31.53, "learning_rate": 3.423645320197045e-05, "loss": 1.0966, "step": 64000 }, { "epoch": 31.77, "learning_rate": 3.411330049261084e-05, "loss": 1.0971, "step": 64500 }, { "epoch": 32.02, "learning_rate": 3.399014778325123e-05, "loss": 1.0971, "step": 65000 }, { "epoch": 32.27, "learning_rate": 3.386699507389163e-05, "loss": 1.093, "step": 65500 }, { "epoch": 32.51, "learning_rate": 3.374384236453202e-05, "loss": 1.0926, "step": 66000 }, { "epoch": 32.76, "learning_rate": 3.3620689655172414e-05, "loss": 1.0959, "step": 66500 }, { "epoch": 33.0, "learning_rate": 3.3497536945812806e-05, "loss": 1.0963, "step": 67000 }, { "epoch": 33.25, "learning_rate": 3.3374384236453204e-05, "loss": 1.0886, "step": 67500 }, { "epoch": 33.5, "learning_rate": 3.3251231527093596e-05, "loss": 1.0844, "step": 68000 }, { "epoch": 33.74, "learning_rate": 3.3128078817733995e-05, "loss": 1.0889, "step": 68500 }, { "epoch": 33.99, "learning_rate": 3.300492610837439e-05, "loss": 1.0898, "step": 69000 }, { "epoch": 34.24, "learning_rate": 3.288177339901478e-05, "loss": 1.0824, "step": 69500 }, { "epoch": 34.48, "learning_rate": 3.275862068965517e-05, "loss": 1.0871, "step": 70000 }, { "epoch": 34.73, "learning_rate": 3.263546798029557e-05, "loss": 1.0813, "step": 70500 }, { "epoch": 34.98, "learning_rate": 3.251231527093596e-05, "loss": 1.079, "step": 71000 }, { "epoch": 35.22, "learning_rate": 3.238916256157636e-05, "loss": 1.0745, "step": 71500 }, { "epoch": 35.47, "learning_rate": 3.226600985221675e-05, "loss": 1.0752, "step": 72000 }, { "epoch": 35.71, "learning_rate": 3.2142857142857144e-05, "loss": 1.0769, "step": 72500 }, { "epoch": 35.96, "learning_rate": 3.2019704433497536e-05, "loss": 1.0739, "step": 73000 }, { "epoch": 36.21, "learning_rate": 3.1896551724137935e-05, "loss": 1.071, "step": 73500 }, { "epoch": 36.45, "learning_rate": 3.1773399014778326e-05, "loss": 1.0699, "step": 74000 }, { "epoch": 36.7, "learning_rate": 3.165024630541872e-05, "loss": 1.0685, "step": 74500 }, { "epoch": 36.95, "learning_rate": 3.152709359605912e-05, "loss": 1.0708, "step": 75000 }, { "epoch": 37.19, "learning_rate": 3.140394088669951e-05, "loss": 1.0636, "step": 75500 }, { "epoch": 37.44, "learning_rate": 3.12807881773399e-05, "loss": 1.0616, "step": 76000 }, { "epoch": 37.68, "learning_rate": 3.115763546798029e-05, "loss": 1.0658, "step": 76500 }, { "epoch": 37.93, "learning_rate": 3.103448275862069e-05, "loss": 1.0651, "step": 77000 }, { "epoch": 38.18, "learning_rate": 3.0911330049261084e-05, "loss": 1.0605, "step": 77500 }, { "epoch": 38.42, "learning_rate": 3.078817733990148e-05, "loss": 1.0576, "step": 78000 }, { "epoch": 38.67, "learning_rate": 3.0665024630541874e-05, "loss": 1.0626, "step": 78500 }, { "epoch": 38.92, "learning_rate": 3.0541871921182266e-05, "loss": 1.0574, "step": 79000 }, { "epoch": 39.16, "learning_rate": 3.041871921182266e-05, "loss": 1.0521, "step": 79500 }, { "epoch": 39.41, "learning_rate": 3.0295566502463057e-05, "loss": 1.0514, "step": 80000 }, { "epoch": 39.66, "learning_rate": 3.017241379310345e-05, "loss": 1.0535, "step": 80500 }, { "epoch": 39.9, "learning_rate": 3.0049261083743847e-05, "loss": 1.0526, "step": 81000 }, { "epoch": 40.15, "learning_rate": 2.9926108374384236e-05, "loss": 1.0499, "step": 81500 }, { "epoch": 40.39, "learning_rate": 2.9802955665024635e-05, "loss": 1.0482, "step": 82000 }, { "epoch": 40.64, "learning_rate": 2.9679802955665027e-05, "loss": 1.0472, "step": 82500 }, { "epoch": 40.89, "learning_rate": 2.9556650246305422e-05, "loss": 1.0486, "step": 83000 }, { "epoch": 41.13, "learning_rate": 2.9433497536945814e-05, "loss": 1.0422, "step": 83500 }, { "epoch": 41.38, "learning_rate": 2.9310344827586206e-05, "loss": 1.0435, "step": 84000 }, { "epoch": 41.63, "learning_rate": 2.9187192118226604e-05, "loss": 1.0405, "step": 84500 }, { "epoch": 41.87, "learning_rate": 2.9064039408866993e-05, "loss": 1.0426, "step": 85000 }, { "epoch": 42.12, "learning_rate": 2.894088669950739e-05, "loss": 1.0426, "step": 85500 }, { "epoch": 42.36, "learning_rate": 2.8817733990147784e-05, "loss": 1.0376, "step": 86000 }, { "epoch": 42.61, "learning_rate": 2.869458128078818e-05, "loss": 1.0333, "step": 86500 }, { "epoch": 42.86, "learning_rate": 2.857142857142857e-05, "loss": 1.0337, "step": 87000 }, { "epoch": 43.1, "learning_rate": 2.844827586206897e-05, "loss": 1.0322, "step": 87500 }, { "epoch": 43.35, "learning_rate": 2.8325123152709358e-05, "loss": 1.0307, "step": 88000 }, { "epoch": 43.6, "learning_rate": 2.8201970443349757e-05, "loss": 1.0311, "step": 88500 }, { "epoch": 43.84, "learning_rate": 2.807881773399015e-05, "loss": 1.0299, "step": 89000 }, { "epoch": 44.09, "learning_rate": 2.7955665024630544e-05, "loss": 1.0279, "step": 89500 }, { "epoch": 44.33, "learning_rate": 2.7832512315270936e-05, "loss": 1.0219, "step": 90000 }, { "epoch": 44.58, "learning_rate": 2.7709359605911335e-05, "loss": 1.0234, "step": 90500 }, { "epoch": 44.83, "learning_rate": 2.7586206896551727e-05, "loss": 1.0282, "step": 91000 }, { "epoch": 45.07, "learning_rate": 2.7463054187192122e-05, "loss": 1.0206, "step": 91500 }, { "epoch": 45.32, "learning_rate": 2.7339901477832514e-05, "loss": 1.0204, "step": 92000 }, { "epoch": 45.57, "learning_rate": 2.7216748768472906e-05, "loss": 1.0211, "step": 92500 }, { "epoch": 45.81, "learning_rate": 2.70935960591133e-05, "loss": 1.0212, "step": 93000 }, { "epoch": 46.06, "learning_rate": 2.6970443349753693e-05, "loss": 1.0124, "step": 93500 }, { "epoch": 46.31, "learning_rate": 2.6847290640394092e-05, "loss": 1.0156, "step": 94000 }, { "epoch": 46.55, "learning_rate": 2.672413793103448e-05, "loss": 1.0127, "step": 94500 }, { "epoch": 46.8, "learning_rate": 2.660098522167488e-05, "loss": 1.016, "step": 95000 }, { "epoch": 47.04, "learning_rate": 2.647783251231527e-05, "loss": 1.0183, "step": 95500 }, { "epoch": 47.29, "learning_rate": 2.6354679802955666e-05, "loss": 1.014, "step": 96000 }, { "epoch": 47.54, "learning_rate": 2.6231527093596058e-05, "loss": 1.01, "step": 96500 }, { "epoch": 47.78, "learning_rate": 2.6108374384236457e-05, "loss": 1.0105, "step": 97000 }, { "epoch": 48.03, "learning_rate": 2.598522167487685e-05, "loss": 1.0096, "step": 97500 }, { "epoch": 48.28, "learning_rate": 2.5862068965517244e-05, "loss": 1.0056, "step": 98000 }, { "epoch": 48.52, "learning_rate": 2.5738916256157636e-05, "loss": 1.0064, "step": 98500 }, { "epoch": 48.77, "learning_rate": 2.561576354679803e-05, "loss": 1.0081, "step": 99000 }, { "epoch": 49.01, "learning_rate": 2.5492610837438423e-05, "loss": 1.0014, "step": 99500 }, { "epoch": 49.26, "learning_rate": 2.5369458128078822e-05, "loss": 1.0005, "step": 100000 }, { "epoch": 49.51, "learning_rate": 2.5246305418719214e-05, "loss": 0.9986, "step": 100500 }, { "epoch": 49.75, "learning_rate": 2.512315270935961e-05, "loss": 1.003, "step": 101000 }, { "epoch": 50.0, "learning_rate": 2.5e-05, "loss": 1.0022, "step": 101500 }, { "epoch": 50.25, "learning_rate": 2.4876847290640397e-05, "loss": 0.9967, "step": 102000 }, { "epoch": 50.49, "learning_rate": 2.475369458128079e-05, "loss": 0.9949, "step": 102500 }, { "epoch": 50.74, "learning_rate": 2.4630541871921184e-05, "loss": 0.9946, "step": 103000 }, { "epoch": 50.99, "learning_rate": 2.450738916256158e-05, "loss": 0.9998, "step": 103500 }, { "epoch": 51.23, "learning_rate": 2.438423645320197e-05, "loss": 0.9916, "step": 104000 }, { "epoch": 51.48, "learning_rate": 2.4261083743842366e-05, "loss": 0.9964, "step": 104500 }, { "epoch": 51.72, "learning_rate": 2.413793103448276e-05, "loss": 0.9878, "step": 105000 }, { "epoch": 51.97, "learning_rate": 2.4014778325123154e-05, "loss": 0.9953, "step": 105500 }, { "epoch": 52.22, "learning_rate": 2.389162561576355e-05, "loss": 0.9853, "step": 106000 }, { "epoch": 52.46, "learning_rate": 2.376847290640394e-05, "loss": 0.9878, "step": 106500 }, { "epoch": 52.71, "learning_rate": 2.3645320197044336e-05, "loss": 0.987, "step": 107000 }, { "epoch": 52.96, "learning_rate": 2.3522167487684728e-05, "loss": 0.9866, "step": 107500 }, { "epoch": 53.2, "learning_rate": 2.3399014778325123e-05, "loss": 0.9821, "step": 108000 }, { "epoch": 53.45, "learning_rate": 2.327586206896552e-05, "loss": 0.9826, "step": 108500 }, { "epoch": 53.69, "learning_rate": 2.315270935960591e-05, "loss": 0.9806, "step": 109000 }, { "epoch": 53.94, "learning_rate": 2.3029556650246306e-05, "loss": 0.9836, "step": 109500 }, { "epoch": 54.19, "learning_rate": 2.29064039408867e-05, "loss": 0.9807, "step": 110000 }, { "epoch": 54.43, "learning_rate": 2.2783251231527093e-05, "loss": 0.9754, "step": 110500 }, { "epoch": 54.68, "learning_rate": 2.266009852216749e-05, "loss": 0.9767, "step": 111000 }, { "epoch": 54.93, "learning_rate": 2.2536945812807884e-05, "loss": 0.9777, "step": 111500 }, { "epoch": 55.17, "learning_rate": 2.2413793103448276e-05, "loss": 0.9746, "step": 112000 }, { "epoch": 55.42, "learning_rate": 2.229064039408867e-05, "loss": 0.974, "step": 112500 }, { "epoch": 55.67, "learning_rate": 2.2167487684729066e-05, "loss": 0.9755, "step": 113000 }, { "epoch": 55.91, "learning_rate": 2.204433497536946e-05, "loss": 0.9743, "step": 113500 }, { "epoch": 56.16, "learning_rate": 2.1921182266009854e-05, "loss": 0.9691, "step": 114000 }, { "epoch": 56.4, "learning_rate": 2.179802955665025e-05, "loss": 0.9699, "step": 114500 }, { "epoch": 56.65, "learning_rate": 2.1674876847290644e-05, "loss": 0.9742, "step": 115000 }, { "epoch": 56.9, "learning_rate": 2.1551724137931033e-05, "loss": 0.9672, "step": 115500 }, { "epoch": 57.14, "learning_rate": 2.1428571428571428e-05, "loss": 0.9701, "step": 116000 }, { "epoch": 57.39, "learning_rate": 2.1305418719211823e-05, "loss": 0.971, "step": 116500 }, { "epoch": 57.64, "learning_rate": 2.1182266009852215e-05, "loss": 0.9684, "step": 117000 }, { "epoch": 57.88, "learning_rate": 2.105911330049261e-05, "loss": 0.9674, "step": 117500 }, { "epoch": 58.13, "learning_rate": 2.0935960591133006e-05, "loss": 0.966, "step": 118000 }, { "epoch": 58.37, "learning_rate": 2.0812807881773398e-05, "loss": 0.9612, "step": 118500 }, { "epoch": 58.62, "learning_rate": 2.0689655172413793e-05, "loss": 0.9661, "step": 119000 }, { "epoch": 58.87, "learning_rate": 2.056650246305419e-05, "loss": 0.9628, "step": 119500 }, { "epoch": 59.11, "learning_rate": 2.0443349753694584e-05, "loss": 0.9589, "step": 120000 }, { "epoch": 59.36, "learning_rate": 2.0320197044334976e-05, "loss": 0.9559, "step": 120500 }, { "epoch": 59.61, "learning_rate": 2.019704433497537e-05, "loss": 0.9599, "step": 121000 }, { "epoch": 59.85, "learning_rate": 2.0073891625615766e-05, "loss": 0.96, "step": 121500 }, { "epoch": 60.1, "learning_rate": 1.995073891625616e-05, "loss": 0.9556, "step": 122000 }, { "epoch": 60.34, "learning_rate": 1.9827586206896554e-05, "loss": 0.9537, "step": 122500 }, { "epoch": 60.59, "learning_rate": 1.970443349753695e-05, "loss": 0.9548, "step": 123000 }, { "epoch": 60.84, "learning_rate": 1.958128078817734e-05, "loss": 0.956, "step": 123500 }, { "epoch": 61.08, "learning_rate": 1.9458128078817736e-05, "loss": 0.9535, "step": 124000 }, { "epoch": 61.33, "learning_rate": 1.933497536945813e-05, "loss": 0.9515, "step": 124500 }, { "epoch": 61.58, "learning_rate": 1.921182266009852e-05, "loss": 0.9514, "step": 125000 }, { "epoch": 61.82, "learning_rate": 1.9088669950738915e-05, "loss": 0.954, "step": 125500 }, { "epoch": 62.07, "learning_rate": 1.896551724137931e-05, "loss": 0.9512, "step": 126000 }, { "epoch": 62.32, "learning_rate": 1.8842364532019706e-05, "loss": 0.9458, "step": 126500 }, { "epoch": 62.56, "learning_rate": 1.8719211822660098e-05, "loss": 0.9492, "step": 127000 }, { "epoch": 62.81, "learning_rate": 1.8596059113300493e-05, "loss": 0.9438, "step": 127500 }, { "epoch": 63.05, "learning_rate": 1.847290640394089e-05, "loss": 0.9456, "step": 128000 }, { "epoch": 63.3, "learning_rate": 1.834975369458128e-05, "loss": 0.9424, "step": 128500 }, { "epoch": 63.55, "learning_rate": 1.8226600985221676e-05, "loss": 0.9447, "step": 129000 }, { "epoch": 63.79, "learning_rate": 1.810344827586207e-05, "loss": 0.9459, "step": 129500 }, { "epoch": 64.04, "learning_rate": 1.7980295566502463e-05, "loss": 0.947, "step": 130000 }, { "epoch": 64.29, "learning_rate": 1.785714285714286e-05, "loss": 0.9373, "step": 130500 }, { "epoch": 64.53, "learning_rate": 1.7733990147783254e-05, "loss": 0.9417, "step": 131000 }, { "epoch": 64.78, "learning_rate": 1.7610837438423646e-05, "loss": 0.9481, "step": 131500 }, { "epoch": 65.02, "learning_rate": 1.748768472906404e-05, "loss": 0.9422, "step": 132000 }, { "epoch": 65.27, "learning_rate": 1.7364532019704436e-05, "loss": 0.9333, "step": 132500 }, { "epoch": 65.52, "learning_rate": 1.7241379310344828e-05, "loss": 0.9369, "step": 133000 }, { "epoch": 65.76, "learning_rate": 1.7118226600985224e-05, "loss": 0.9407, "step": 133500 }, { "epoch": 66.01, "learning_rate": 1.6995073891625616e-05, "loss": 0.9374, "step": 134000 }, { "epoch": 66.26, "learning_rate": 1.687192118226601e-05, "loss": 0.9338, "step": 134500 }, { "epoch": 66.5, "learning_rate": 1.6748768472906403e-05, "loss": 0.934, "step": 135000 }, { "epoch": 66.75, "learning_rate": 1.6625615763546798e-05, "loss": 0.9339, "step": 135500 }, { "epoch": 67.0, "learning_rate": 1.6502463054187193e-05, "loss": 0.9358, "step": 136000 }, { "epoch": 67.24, "learning_rate": 1.6379310344827585e-05, "loss": 0.9305, "step": 136500 }, { "epoch": 67.49, "learning_rate": 1.625615763546798e-05, "loss": 0.9288, "step": 137000 }, { "epoch": 67.73, "learning_rate": 1.6133004926108376e-05, "loss": 0.9279, "step": 137500 }, { "epoch": 67.98, "learning_rate": 1.6009852216748768e-05, "loss": 0.9305, "step": 138000 }, { "epoch": 68.23, "learning_rate": 1.5886699507389163e-05, "loss": 0.9269, "step": 138500 }, { "epoch": 68.47, "learning_rate": 1.576354679802956e-05, "loss": 0.9291, "step": 139000 }, { "epoch": 68.72, "learning_rate": 1.564039408866995e-05, "loss": 0.9263, "step": 139500 }, { "epoch": 68.97, "learning_rate": 1.5517241379310346e-05, "loss": 0.9271, "step": 140000 }, { "epoch": 69.21, "learning_rate": 1.539408866995074e-05, "loss": 0.9241, "step": 140500 }, { "epoch": 69.46, "learning_rate": 1.5270935960591133e-05, "loss": 0.9268, "step": 141000 }, { "epoch": 69.7, "learning_rate": 1.5147783251231528e-05, "loss": 0.926, "step": 141500 }, { "epoch": 69.95, "learning_rate": 1.5024630541871924e-05, "loss": 0.9231, "step": 142000 }, { "epoch": 70.2, "learning_rate": 1.4901477832512317e-05, "loss": 0.923, "step": 142500 }, { "epoch": 70.44, "learning_rate": 1.4778325123152711e-05, "loss": 0.9211, "step": 143000 }, { "epoch": 70.69, "learning_rate": 1.4655172413793103e-05, "loss": 0.9243, "step": 143500 }, { "epoch": 70.94, "learning_rate": 1.4532019704433496e-05, "loss": 0.9218, "step": 144000 }, { "epoch": 71.18, "learning_rate": 1.4408866995073892e-05, "loss": 0.9178, "step": 144500 }, { "epoch": 71.43, "learning_rate": 1.4285714285714285e-05, "loss": 0.9153, "step": 145000 }, { "epoch": 71.67, "learning_rate": 1.4162561576354679e-05, "loss": 0.9182, "step": 145500 }, { "epoch": 71.92, "learning_rate": 1.4039408866995074e-05, "loss": 0.9184, "step": 146000 }, { "epoch": 72.17, "learning_rate": 1.3916256157635468e-05, "loss": 0.9186, "step": 146500 }, { "epoch": 72.41, "learning_rate": 1.3793103448275863e-05, "loss": 0.914, "step": 147000 }, { "epoch": 72.66, "learning_rate": 1.3669950738916257e-05, "loss": 0.914, "step": 147500 }, { "epoch": 72.91, "learning_rate": 1.354679802955665e-05, "loss": 0.9161, "step": 148000 }, { "epoch": 73.15, "learning_rate": 1.3423645320197046e-05, "loss": 0.9138, "step": 148500 }, { "epoch": 73.4, "learning_rate": 1.330049261083744e-05, "loss": 0.9106, "step": 149000 }, { "epoch": 73.65, "learning_rate": 1.3177339901477833e-05, "loss": 0.9115, "step": 149500 }, { "epoch": 73.89, "learning_rate": 1.3054187192118228e-05, "loss": 0.9135, "step": 150000 }, { "epoch": 74.14, "learning_rate": 1.2931034482758622e-05, "loss": 0.9116, "step": 150500 }, { "epoch": 74.38, "learning_rate": 1.2807881773399016e-05, "loss": 0.9109, "step": 151000 }, { "epoch": 74.63, "learning_rate": 1.2684729064039411e-05, "loss": 0.9097, "step": 151500 }, { "epoch": 74.88, "learning_rate": 1.2561576354679805e-05, "loss": 0.9099, "step": 152000 }, { "epoch": 75.12, "learning_rate": 1.2438423645320198e-05, "loss": 0.9106, "step": 152500 }, { "epoch": 75.37, "learning_rate": 1.2315270935960592e-05, "loss": 0.9098, "step": 153000 }, { "epoch": 75.62, "learning_rate": 1.2192118226600986e-05, "loss": 0.9098, "step": 153500 }, { "epoch": 75.86, "learning_rate": 1.206896551724138e-05, "loss": 0.9092, "step": 154000 }, { "epoch": 76.11, "learning_rate": 1.1945812807881774e-05, "loss": 0.9071, "step": 154500 }, { "epoch": 76.35, "learning_rate": 1.1822660098522168e-05, "loss": 0.9045, "step": 155000 }, { "epoch": 76.6, "learning_rate": 1.1699507389162562e-05, "loss": 0.9051, "step": 155500 }, { "epoch": 76.85, "learning_rate": 1.1576354679802955e-05, "loss": 0.9061, "step": 156000 }, { "epoch": 77.09, "learning_rate": 1.145320197044335e-05, "loss": 0.9023, "step": 156500 }, { "epoch": 77.34, "learning_rate": 1.1330049261083744e-05, "loss": 0.9014, "step": 157000 }, { "epoch": 77.59, "learning_rate": 1.1206896551724138e-05, "loss": 0.9047, "step": 157500 }, { "epoch": 77.83, "learning_rate": 1.1083743842364533e-05, "loss": 0.9004, "step": 158000 }, { "epoch": 78.08, "learning_rate": 1.0960591133004927e-05, "loss": 0.9053, "step": 158500 }, { "epoch": 78.33, "learning_rate": 1.0837438423645322e-05, "loss": 0.9031, "step": 159000 }, { "epoch": 78.57, "learning_rate": 1.0714285714285714e-05, "loss": 0.9027, "step": 159500 }, { "epoch": 78.82, "learning_rate": 1.0591133004926108e-05, "loss": 0.8996, "step": 160000 }, { "epoch": 79.06, "learning_rate": 1.0467980295566503e-05, "loss": 0.9031, "step": 160500 }, { "epoch": 79.31, "learning_rate": 1.0344827586206897e-05, "loss": 0.9, "step": 161000 }, { "epoch": 79.56, "learning_rate": 1.0221674876847292e-05, "loss": 0.9001, "step": 161500 }, { "epoch": 79.8, "learning_rate": 1.0098522167487686e-05, "loss": 0.8986, "step": 162000 }, { "epoch": 80.05, "learning_rate": 9.97536945812808e-06, "loss": 0.8956, "step": 162500 }, { "epoch": 80.3, "learning_rate": 9.852216748768475e-06, "loss": 0.8961, "step": 163000 }, { "epoch": 80.54, "learning_rate": 9.729064039408868e-06, "loss": 0.8973, "step": 163500 }, { "epoch": 80.79, "learning_rate": 9.60591133004926e-06, "loss": 0.8996, "step": 164000 }, { "epoch": 81.03, "learning_rate": 9.482758620689655e-06, "loss": 0.8954, "step": 164500 }, { "epoch": 81.28, "learning_rate": 9.359605911330049e-06, "loss": 0.8925, "step": 165000 }, { "epoch": 81.53, "learning_rate": 9.236453201970444e-06, "loss": 0.8986, "step": 165500 }, { "epoch": 81.77, "learning_rate": 9.113300492610838e-06, "loss": 0.8943, "step": 166000 }, { "epoch": 82.02, "learning_rate": 8.990147783251232e-06, "loss": 0.8972, "step": 166500 }, { "epoch": 82.27, "learning_rate": 8.866995073891627e-06, "loss": 0.8944, "step": 167000 }, { "epoch": 82.51, "learning_rate": 8.74384236453202e-06, "loss": 0.8898, "step": 167500 }, { "epoch": 82.76, "learning_rate": 8.620689655172414e-06, "loss": 0.8929, "step": 168000 }, { "epoch": 83.0, "learning_rate": 8.497536945812808e-06, "loss": 0.8932, "step": 168500 }, { "epoch": 83.25, "learning_rate": 8.374384236453201e-06, "loss": 0.8929, "step": 169000 }, { "epoch": 83.5, "learning_rate": 8.251231527093597e-06, "loss": 0.8909, "step": 169500 }, { "epoch": 83.74, "learning_rate": 8.12807881773399e-06, "loss": 0.889, "step": 170000 }, { "epoch": 83.99, "learning_rate": 8.004926108374384e-06, "loss": 0.8903, "step": 170500 }, { "epoch": 84.24, "learning_rate": 7.88177339901478e-06, "loss": 0.886, "step": 171000 }, { "epoch": 84.48, "learning_rate": 7.758620689655173e-06, "loss": 0.8935, "step": 171500 }, { "epoch": 84.73, "learning_rate": 7.635467980295567e-06, "loss": 0.8919, "step": 172000 }, { "epoch": 84.98, "learning_rate": 7.512315270935962e-06, "loss": 0.8903, "step": 172500 }, { "epoch": 85.22, "learning_rate": 7.3891625615763555e-06, "loss": 0.8864, "step": 173000 }, { "epoch": 85.47, "learning_rate": 7.266009852216748e-06, "loss": 0.8856, "step": 173500 }, { "epoch": 85.71, "learning_rate": 7.142857142857143e-06, "loss": 0.8862, "step": 174000 }, { "epoch": 85.96, "learning_rate": 7.019704433497537e-06, "loss": 0.8874, "step": 174500 }, { "epoch": 86.21, "learning_rate": 6.896551724137932e-06, "loss": 0.8838, "step": 175000 }, { "epoch": 86.45, "learning_rate": 6.773399014778325e-06, "loss": 0.8864, "step": 175500 }, { "epoch": 86.7, "learning_rate": 6.65024630541872e-06, "loss": 0.8805, "step": 176000 }, { "epoch": 86.95, "learning_rate": 6.527093596059114e-06, "loss": 0.8843, "step": 176500 }, { "epoch": 87.19, "learning_rate": 6.403940886699508e-06, "loss": 0.8831, "step": 177000 }, { "epoch": 87.44, "learning_rate": 6.280788177339902e-06, "loss": 0.8841, "step": 177500 }, { "epoch": 87.68, "learning_rate": 6.157635467980296e-06, "loss": 0.8849, "step": 178000 }, { "epoch": 87.93, "learning_rate": 6.03448275862069e-06, "loss": 0.8809, "step": 178500 }, { "epoch": 88.18, "learning_rate": 5.911330049261084e-06, "loss": 0.882, "step": 179000 }, { "epoch": 88.42, "learning_rate": 5.788177339901478e-06, "loss": 0.8855, "step": 179500 }, { "epoch": 88.67, "learning_rate": 5.665024630541872e-06, "loss": 0.8832, "step": 180000 }, { "epoch": 88.92, "learning_rate": 5.541871921182267e-06, "loss": 0.881, "step": 180500 }, { "epoch": 89.16, "learning_rate": 5.418719211822661e-06, "loss": 0.8789, "step": 181000 }, { "epoch": 89.41, "learning_rate": 5.295566502463054e-06, "loss": 0.8787, "step": 181500 }, { "epoch": 89.66, "learning_rate": 5.172413793103448e-06, "loss": 0.8833, "step": 182000 }, { "epoch": 89.9, "learning_rate": 5.049261083743843e-06, "loss": 0.8793, "step": 182500 }, { "epoch": 90.15, "learning_rate": 4.926108374384237e-06, "loss": 0.8768, "step": 183000 }, { "epoch": 90.39, "learning_rate": 4.80295566502463e-06, "loss": 0.8789, "step": 183500 }, { "epoch": 90.64, "learning_rate": 4.6798029556650245e-06, "loss": 0.8754, "step": 184000 }, { "epoch": 90.89, "learning_rate": 4.556650246305419e-06, "loss": 0.8768, "step": 184500 }, { "epoch": 91.13, "learning_rate": 4.4334975369458135e-06, "loss": 0.8786, "step": 185000 }, { "epoch": 91.38, "learning_rate": 4.310344827586207e-06, "loss": 0.8762, "step": 185500 }, { "epoch": 91.63, "learning_rate": 4.187192118226601e-06, "loss": 0.8744, "step": 186000 }, { "epoch": 91.87, "learning_rate": 4.064039408866995e-06, "loss": 0.8771, "step": 186500 }, { "epoch": 92.12, "learning_rate": 3.94088669950739e-06, "loss": 0.8752, "step": 187000 }, { "epoch": 92.36, "learning_rate": 3.817733990147783e-06, "loss": 0.8762, "step": 187500 }, { "epoch": 92.61, "learning_rate": 3.6945812807881777e-06, "loss": 0.8773, "step": 188000 }, { "epoch": 92.86, "learning_rate": 3.5714285714285714e-06, "loss": 0.8774, "step": 188500 }, { "epoch": 93.1, "learning_rate": 3.448275862068966e-06, "loss": 0.8739, "step": 189000 }, { "epoch": 93.35, "learning_rate": 3.32512315270936e-06, "loss": 0.8756, "step": 189500 }, { "epoch": 93.6, "learning_rate": 3.201970443349754e-06, "loss": 0.8725, "step": 190000 }, { "epoch": 93.84, "learning_rate": 3.078817733990148e-06, "loss": 0.8806, "step": 190500 }, { "epoch": 94.09, "learning_rate": 2.955665024630542e-06, "loss": 0.8739, "step": 191000 }, { "epoch": 94.33, "learning_rate": 2.832512315270936e-06, "loss": 0.869, "step": 191500 }, { "epoch": 94.58, "learning_rate": 2.7093596059113305e-06, "loss": 0.8755, "step": 192000 }, { "epoch": 94.83, "learning_rate": 2.586206896551724e-06, "loss": 0.8733, "step": 192500 }, { "epoch": 95.07, "learning_rate": 2.4630541871921186e-06, "loss": 0.8742, "step": 193000 }, { "epoch": 95.32, "learning_rate": 2.3399014778325123e-06, "loss": 0.8729, "step": 193500 }, { "epoch": 95.57, "learning_rate": 2.2167487684729067e-06, "loss": 0.8736, "step": 194000 }, { "epoch": 95.81, "learning_rate": 2.0935960591133003e-06, "loss": 0.8756, "step": 194500 }, { "epoch": 96.06, "learning_rate": 1.970443349753695e-06, "loss": 0.8696, "step": 195000 }, { "epoch": 96.31, "learning_rate": 1.8472906403940889e-06, "loss": 0.8692, "step": 195500 }, { "epoch": 96.55, "learning_rate": 1.724137931034483e-06, "loss": 0.8707, "step": 196000 }, { "epoch": 96.8, "learning_rate": 1.600985221674877e-06, "loss": 0.8748, "step": 196500 }, { "epoch": 97.04, "learning_rate": 1.477832512315271e-06, "loss": 0.8698, "step": 197000 }, { "epoch": 97.29, "learning_rate": 1.3546798029556653e-06, "loss": 0.8698, "step": 197500 }, { "epoch": 97.54, "learning_rate": 1.2315270935960593e-06, "loss": 0.8697, "step": 198000 }, { "epoch": 97.78, "learning_rate": 1.1083743842364534e-06, "loss": 0.8721, "step": 198500 }, { "epoch": 98.03, "learning_rate": 9.852216748768474e-07, "loss": 0.8695, "step": 199000 }, { "epoch": 98.28, "learning_rate": 8.620689655172415e-07, "loss": 0.8677, "step": 199500 }, { "epoch": 98.52, "learning_rate": 7.389162561576355e-07, "loss": 0.8715, "step": 200000 }, { "epoch": 98.77, "learning_rate": 6.157635467980297e-07, "loss": 0.8714, "step": 200500 }, { "epoch": 99.01, "learning_rate": 4.926108374384237e-07, "loss": 0.8703, "step": 201000 }, { "epoch": 99.26, "learning_rate": 3.6945812807881775e-07, "loss": 0.8681, "step": 201500 }, { "epoch": 99.51, "learning_rate": 2.4630541871921185e-07, "loss": 0.8684, "step": 202000 }, { "epoch": 99.75, "learning_rate": 1.2315270935960593e-07, "loss": 0.8731, "step": 202500 }, { "epoch": 100.0, "learning_rate": 0.0, "loss": 0.8699, "step": 203000 } ], "max_steps": 203000, "num_train_epochs": 100, "total_flos": 3.4424249629999104e+18, "trial_name": null, "trial_params": null }