{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.402405686167306, "global_step": 120000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 5e-05, "loss": 3.1294, "step": 500 }, { "epoch": 0.14, "learning_rate": 4.999658259859204e-05, "loss": 3.1422, "step": 1000 }, { "epoch": 0.21, "learning_rate": 4.999316519718406e-05, "loss": 3.1479, "step": 1500 }, { "epoch": 0.27, "learning_rate": 4.998974779577609e-05, "loss": 3.1412, "step": 2000 }, { "epoch": 0.34, "learning_rate": 4.998633039436813e-05, "loss": 3.1387, "step": 2500 }, { "epoch": 0.41, "learning_rate": 4.9982912992960154e-05, "loss": 3.1344, "step": 3000 }, { "epoch": 0.48, "learning_rate": 4.997949559155218e-05, "loss": 3.1353, "step": 3500 }, { "epoch": 0.55, "learning_rate": 4.9976078190144217e-05, "loss": 3.1283, "step": 4000 }, { "epoch": 0.62, "learning_rate": 4.9972660788736244e-05, "loss": 3.1291, "step": 4500 }, { "epoch": 0.68, "learning_rate": 4.996924338732828e-05, "loss": 3.1226, "step": 5000 }, { "epoch": 0.75, "learning_rate": 4.9965825985920306e-05, "loss": 3.1173, "step": 5500 }, { "epoch": 0.82, "learning_rate": 4.996240858451234e-05, "loss": 3.1126, "step": 6000 }, { "epoch": 0.89, "learning_rate": 4.995899118310437e-05, "loss": 3.1117, "step": 6500 }, { "epoch": 0.96, "learning_rate": 4.99555737816964e-05, "loss": 3.1037, "step": 7000 }, { "epoch": 1.03, "learning_rate": 4.995215638028843e-05, "loss": 3.0981, "step": 7500 }, { "epoch": 1.09, "learning_rate": 4.994873897888046e-05, "loss": 3.0682, "step": 8000 }, { "epoch": 1.16, "learning_rate": 4.994532157747249e-05, "loss": 3.0697, "step": 8500 }, { "epoch": 1.23, "learning_rate": 4.994190417606453e-05, "loss": 3.0673, "step": 9000 }, { "epoch": 1.3, "learning_rate": 4.993848677465655e-05, "loss": 3.0703, "step": 9500 }, { "epoch": 1.37, "learning_rate": 4.993506937324858e-05, "loss": 3.0624, "step": 10000 }, { "epoch": 1.44, "learning_rate": 4.993165197184062e-05, "loss": 3.0618, "step": 10500 }, { "epoch": 1.5, "learning_rate": 4.9928234570432645e-05, "loss": 3.0612, "step": 11000 }, { "epoch": 1.57, "learning_rate": 4.992481716902467e-05, "loss": 3.0651, "step": 11500 }, { "epoch": 1.64, "learning_rate": 4.992139976761671e-05, "loss": 3.0555, "step": 12000 }, { "epoch": 1.71, "learning_rate": 4.9917982366208735e-05, "loss": 3.0609, "step": 12500 }, { "epoch": 1.78, "learning_rate": 4.991456496480077e-05, "loss": 3.0564, "step": 13000 }, { "epoch": 1.85, "learning_rate": 4.99111475633928e-05, "loss": 3.0581, "step": 13500 }, { "epoch": 1.91, "learning_rate": 4.990773016198483e-05, "loss": 3.054, "step": 14000 }, { "epoch": 1.98, "learning_rate": 4.990431276057686e-05, "loss": 3.0559, "step": 14500 }, { "epoch": 2.05, "learning_rate": 4.990089535916889e-05, "loss": 3.0273, "step": 15000 }, { "epoch": 2.12, "learning_rate": 4.989747795776092e-05, "loss": 3.0221, "step": 15500 }, { "epoch": 2.19, "learning_rate": 4.989406055635295e-05, "loss": 3.0218, "step": 16000 }, { "epoch": 2.26, "learning_rate": 4.989064315494498e-05, "loss": 3.0245, "step": 16500 }, { "epoch": 2.32, "learning_rate": 4.988722575353702e-05, "loss": 3.0237, "step": 17000 }, { "epoch": 2.39, "learning_rate": 4.988380835212904e-05, "loss": 3.0224, "step": 17500 }, { "epoch": 2.46, "learning_rate": 4.988039095072107e-05, "loss": 3.0183, "step": 18000 }, { "epoch": 2.53, "learning_rate": 4.987697354931311e-05, "loss": 3.0113, "step": 18500 }, { "epoch": 2.6, "learning_rate": 4.9873556147905135e-05, "loss": 3.0211, "step": 19000 }, { "epoch": 2.67, "learning_rate": 4.987013874649716e-05, "loss": 3.0253, "step": 19500 }, { "epoch": 2.73, "learning_rate": 4.98667213450892e-05, "loss": 3.0143, "step": 20000 }, { "epoch": 2.8, "learning_rate": 4.9863303943681225e-05, "loss": 3.0192, "step": 20500 }, { "epoch": 2.87, "learning_rate": 4.985988654227326e-05, "loss": 3.012, "step": 21000 }, { "epoch": 2.94, "learning_rate": 4.985646914086529e-05, "loss": 3.016, "step": 21500 }, { "epoch": 3.01, "learning_rate": 4.985305173945732e-05, "loss": 3.0087, "step": 22000 }, { "epoch": 3.08, "learning_rate": 4.984963433804935e-05, "loss": 2.9676, "step": 22500 }, { "epoch": 3.14, "learning_rate": 4.9846216936641384e-05, "loss": 2.9644, "step": 23000 }, { "epoch": 3.21, "learning_rate": 4.984279953523341e-05, "loss": 2.9714, "step": 23500 }, { "epoch": 3.28, "learning_rate": 4.983938213382544e-05, "loss": 2.9816, "step": 24000 }, { "epoch": 3.35, "learning_rate": 4.9835964732417474e-05, "loss": 2.9759, "step": 24500 }, { "epoch": 3.42, "learning_rate": 4.983254733100951e-05, "loss": 2.977, "step": 25000 }, { "epoch": 3.49, "learning_rate": 4.982912992960153e-05, "loss": 2.9841, "step": 25500 }, { "epoch": 3.55, "learning_rate": 4.982571252819356e-05, "loss": 2.9731, "step": 26000 }, { "epoch": 3.62, "learning_rate": 4.98222951267856e-05, "loss": 2.9716, "step": 26500 }, { "epoch": 3.69, "learning_rate": 4.9818877725377625e-05, "loss": 2.9762, "step": 27000 }, { "epoch": 3.76, "learning_rate": 4.981546032396965e-05, "loss": 2.9774, "step": 27500 }, { "epoch": 3.83, "learning_rate": 4.981204292256169e-05, "loss": 2.9743, "step": 28000 }, { "epoch": 3.9, "learning_rate": 4.9808625521153715e-05, "loss": 2.9737, "step": 28500 }, { "epoch": 3.96, "learning_rate": 4.980520811974575e-05, "loss": 2.9701, "step": 29000 }, { "epoch": 4.03, "learning_rate": 4.980179071833778e-05, "loss": 2.9589, "step": 29500 }, { "epoch": 4.1, "learning_rate": 4.9798373316929805e-05, "loss": 2.926, "step": 30000 }, { "epoch": 4.17, "learning_rate": 4.979495591552184e-05, "loss": 2.9327, "step": 30500 }, { "epoch": 4.24, "learning_rate": 4.9791538514113874e-05, "loss": 2.9401, "step": 31000 }, { "epoch": 4.31, "learning_rate": 4.97881211127059e-05, "loss": 2.9345, "step": 31500 }, { "epoch": 4.37, "learning_rate": 4.978470371129793e-05, "loss": 2.9348, "step": 32000 }, { "epoch": 4.44, "learning_rate": 4.9781286309889964e-05, "loss": 2.9386, "step": 32500 }, { "epoch": 4.51, "learning_rate": 4.977786890848199e-05, "loss": 2.9312, "step": 33000 }, { "epoch": 4.58, "learning_rate": 4.977445150707402e-05, "loss": 2.9361, "step": 33500 }, { "epoch": 4.65, "learning_rate": 4.9771034105666054e-05, "loss": 2.9316, "step": 34000 }, { "epoch": 4.72, "learning_rate": 4.976761670425809e-05, "loss": 2.9397, "step": 34500 }, { "epoch": 4.78, "learning_rate": 4.9764199302850116e-05, "loss": 2.9377, "step": 35000 }, { "epoch": 4.85, "learning_rate": 4.9760781901442144e-05, "loss": 2.9378, "step": 35500 }, { "epoch": 4.92, "learning_rate": 4.975736450003418e-05, "loss": 2.9306, "step": 36000 }, { "epoch": 4.99, "learning_rate": 4.9753947098626206e-05, "loss": 2.9339, "step": 36500 }, { "epoch": 5.06, "learning_rate": 4.975052969721824e-05, "loss": 2.8964, "step": 37000 }, { "epoch": 5.13, "learning_rate": 4.974711229581027e-05, "loss": 2.8929, "step": 37500 }, { "epoch": 5.19, "learning_rate": 4.9743694894402296e-05, "loss": 2.901, "step": 38000 }, { "epoch": 5.26, "learning_rate": 4.974027749299433e-05, "loss": 2.8934, "step": 38500 }, { "epoch": 5.33, "learning_rate": 4.9736860091586364e-05, "loss": 2.9019, "step": 39000 }, { "epoch": 5.4, "learning_rate": 4.973344269017839e-05, "loss": 2.9037, "step": 39500 }, { "epoch": 5.47, "learning_rate": 4.973002528877042e-05, "loss": 2.8915, "step": 40000 }, { "epoch": 5.54, "learning_rate": 4.9726607887362454e-05, "loss": 2.899, "step": 40500 }, { "epoch": 5.6, "learning_rate": 4.972319048595448e-05, "loss": 2.9026, "step": 41000 }, { "epoch": 5.67, "learning_rate": 4.971977308454651e-05, "loss": 2.9062, "step": 41500 }, { "epoch": 5.74, "learning_rate": 4.9716355683138544e-05, "loss": 2.8974, "step": 42000 }, { "epoch": 5.81, "learning_rate": 4.971293828173058e-05, "loss": 2.8969, "step": 42500 }, { "epoch": 5.88, "learning_rate": 4.9709520880322606e-05, "loss": 2.8972, "step": 43000 }, { "epoch": 5.95, "learning_rate": 4.9706103478914634e-05, "loss": 2.8994, "step": 43500 }, { "epoch": 6.01, "learning_rate": 4.970268607750667e-05, "loss": 2.8925, "step": 44000 }, { "epoch": 6.08, "learning_rate": 4.9699268676098696e-05, "loss": 2.8637, "step": 44500 }, { "epoch": 6.15, "learning_rate": 4.969585127469073e-05, "loss": 2.859, "step": 45000 }, { "epoch": 6.22, "learning_rate": 4.969243387328276e-05, "loss": 2.8624, "step": 45500 }, { "epoch": 6.29, "learning_rate": 4.9689016471874786e-05, "loss": 2.8595, "step": 46000 }, { "epoch": 6.36, "learning_rate": 4.968559907046682e-05, "loss": 2.8632, "step": 46500 }, { "epoch": 6.42, "learning_rate": 4.9682181669058855e-05, "loss": 2.8609, "step": 47000 }, { "epoch": 6.49, "learning_rate": 4.9678764267650876e-05, "loss": 2.8702, "step": 47500 }, { "epoch": 6.56, "learning_rate": 4.967534686624291e-05, "loss": 2.8714, "step": 48000 }, { "epoch": 6.63, "learning_rate": 4.9671929464834945e-05, "loss": 2.8636, "step": 48500 }, { "epoch": 6.7, "learning_rate": 4.966851206342697e-05, "loss": 2.8666, "step": 49000 }, { "epoch": 6.77, "learning_rate": 4.9665094662019e-05, "loss": 2.8701, "step": 49500 }, { "epoch": 6.83, "learning_rate": 4.9661677260611034e-05, "loss": 2.87, "step": 50000 }, { "epoch": 6.9, "learning_rate": 4.965825985920307e-05, "loss": 2.8608, "step": 50500 }, { "epoch": 6.97, "learning_rate": 4.96548424577951e-05, "loss": 2.867, "step": 51000 }, { "epoch": 7.04, "learning_rate": 4.9651425056387124e-05, "loss": 2.8444, "step": 51500 }, { "epoch": 7.11, "learning_rate": 4.964800765497916e-05, "loss": 2.8337, "step": 52000 }, { "epoch": 7.18, "learning_rate": 4.9644590253571186e-05, "loss": 2.8337, "step": 52500 }, { "epoch": 7.24, "learning_rate": 4.964117285216322e-05, "loss": 2.8319, "step": 53000 }, { "epoch": 7.31, "learning_rate": 4.963775545075525e-05, "loss": 2.8317, "step": 53500 }, { "epoch": 7.38, "learning_rate": 4.9634338049347276e-05, "loss": 2.8342, "step": 54000 }, { "epoch": 7.45, "learning_rate": 4.963092064793931e-05, "loss": 2.8273, "step": 54500 }, { "epoch": 7.52, "learning_rate": 4.962750324653134e-05, "loss": 2.8347, "step": 55000 }, { "epoch": 7.59, "learning_rate": 4.9624085845123366e-05, "loss": 2.8383, "step": 55500 }, { "epoch": 7.65, "learning_rate": 4.96206684437154e-05, "loss": 2.8313, "step": 56000 }, { "epoch": 7.72, "learning_rate": 4.9617251042307435e-05, "loss": 2.8387, "step": 56500 }, { "epoch": 7.79, "learning_rate": 4.961383364089946e-05, "loss": 2.834, "step": 57000 }, { "epoch": 7.86, "learning_rate": 4.961041623949149e-05, "loss": 2.8377, "step": 57500 }, { "epoch": 7.93, "learning_rate": 4.9606998838083525e-05, "loss": 2.8365, "step": 58000 }, { "epoch": 8.0, "learning_rate": 4.960358143667555e-05, "loss": 2.8338, "step": 58500 }, { "epoch": 8.06, "learning_rate": 4.960016403526758e-05, "loss": 2.7898, "step": 59000 }, { "epoch": 8.13, "learning_rate": 4.9596746633859615e-05, "loss": 2.7955, "step": 59500 }, { "epoch": 8.2, "learning_rate": 4.959332923245165e-05, "loss": 2.7901, "step": 60000 }, { "epoch": 8.27, "learning_rate": 4.958991183104368e-05, "loss": 2.7974, "step": 60500 }, { "epoch": 8.34, "learning_rate": 4.9586494429635705e-05, "loss": 2.8055, "step": 61000 }, { "epoch": 8.41, "learning_rate": 4.958307702822774e-05, "loss": 2.8088, "step": 61500 }, { "epoch": 8.47, "learning_rate": 4.957965962681977e-05, "loss": 2.8061, "step": 62000 }, { "epoch": 8.54, "learning_rate": 4.95762422254118e-05, "loss": 2.8005, "step": 62500 }, { "epoch": 8.61, "learning_rate": 4.957282482400383e-05, "loss": 2.8056, "step": 63000 }, { "epoch": 8.68, "learning_rate": 4.9569407422595856e-05, "loss": 2.8096, "step": 63500 }, { "epoch": 8.75, "learning_rate": 4.956599002118789e-05, "loss": 2.8077, "step": 64000 }, { "epoch": 8.82, "learning_rate": 4.9562572619779925e-05, "loss": 2.8107, "step": 64500 }, { "epoch": 8.88, "learning_rate": 4.955915521837195e-05, "loss": 2.8123, "step": 65000 }, { "epoch": 8.95, "learning_rate": 4.955573781696398e-05, "loss": 2.8121, "step": 65500 }, { "epoch": 9.02, "learning_rate": 4.9552320415556015e-05, "loss": 2.7953, "step": 66000 }, { "epoch": 9.09, "learning_rate": 4.954890301414804e-05, "loss": 2.7705, "step": 66500 }, { "epoch": 9.16, "learning_rate": 4.954548561274007e-05, "loss": 2.7682, "step": 67000 }, { "epoch": 9.23, "learning_rate": 4.9542068211332105e-05, "loss": 2.7656, "step": 67500 }, { "epoch": 9.29, "learning_rate": 4.953865080992414e-05, "loss": 2.7742, "step": 68000 }, { "epoch": 9.36, "learning_rate": 4.953523340851617e-05, "loss": 2.7677, "step": 68500 }, { "epoch": 9.43, "learning_rate": 4.9531816007108195e-05, "loss": 2.7772, "step": 69000 }, { "epoch": 9.5, "learning_rate": 4.952839860570023e-05, "loss": 2.7756, "step": 69500 }, { "epoch": 9.57, "learning_rate": 4.952498120429226e-05, "loss": 2.7735, "step": 70000 }, { "epoch": 9.64, "learning_rate": 4.952156380288429e-05, "loss": 2.7794, "step": 70500 }, { "epoch": 9.7, "learning_rate": 4.951814640147632e-05, "loss": 2.7803, "step": 71000 }, { "epoch": 9.77, "learning_rate": 4.951472900006835e-05, "loss": 2.7819, "step": 71500 }, { "epoch": 9.84, "learning_rate": 4.951131159866038e-05, "loss": 2.7798, "step": 72000 }, { "epoch": 9.91, "learning_rate": 4.9507894197252416e-05, "loss": 2.7858, "step": 72500 }, { "epoch": 9.98, "learning_rate": 4.950447679584444e-05, "loss": 2.7831, "step": 73000 }, { "epoch": 10.05, "learning_rate": 4.950105939443647e-05, "loss": 2.7485, "step": 73500 }, { "epoch": 10.11, "learning_rate": 4.9497641993028506e-05, "loss": 2.7361, "step": 74000 }, { "epoch": 10.18, "learning_rate": 4.949422459162053e-05, "loss": 2.739, "step": 74500 }, { "epoch": 10.25, "learning_rate": 4.949080719021256e-05, "loss": 2.7434, "step": 75000 }, { "epoch": 10.32, "learning_rate": 4.9487389788804595e-05, "loss": 2.7427, "step": 75500 }, { "epoch": 10.39, "learning_rate": 4.948397238739663e-05, "loss": 2.7424, "step": 76000 }, { "epoch": 10.46, "learning_rate": 4.948055498598866e-05, "loss": 2.756, "step": 76500 }, { "epoch": 10.52, "learning_rate": 4.9477137584580685e-05, "loss": 2.7434, "step": 77000 }, { "epoch": 10.59, "learning_rate": 4.947372018317272e-05, "loss": 2.7512, "step": 77500 }, { "epoch": 10.66, "learning_rate": 4.947030278176475e-05, "loss": 2.7527, "step": 78000 }, { "epoch": 10.73, "learning_rate": 4.946688538035678e-05, "loss": 2.7605, "step": 78500 }, { "epoch": 10.8, "learning_rate": 4.946346797894881e-05, "loss": 2.7624, "step": 79000 }, { "epoch": 10.87, "learning_rate": 4.946005057754084e-05, "loss": 2.7563, "step": 79500 }, { "epoch": 10.93, "learning_rate": 4.945663317613287e-05, "loss": 2.7531, "step": 80000 }, { "epoch": 11.0, "learning_rate": 4.9453215774724906e-05, "loss": 2.7539, "step": 80500 }, { "epoch": 11.07, "learning_rate": 4.944979837331693e-05, "loss": 2.7114, "step": 81000 }, { "epoch": 11.14, "learning_rate": 4.944638097190896e-05, "loss": 2.7128, "step": 81500 }, { "epoch": 11.21, "learning_rate": 4.9442963570500996e-05, "loss": 2.7177, "step": 82000 }, { "epoch": 11.28, "learning_rate": 4.9439546169093024e-05, "loss": 2.7212, "step": 82500 }, { "epoch": 11.34, "learning_rate": 4.943612876768505e-05, "loss": 2.7224, "step": 83000 }, { "epoch": 11.41, "learning_rate": 4.9432711366277086e-05, "loss": 2.7234, "step": 83500 }, { "epoch": 11.48, "learning_rate": 4.9429293964869113e-05, "loss": 2.7233, "step": 84000 }, { "epoch": 11.55, "learning_rate": 4.942587656346115e-05, "loss": 2.7297, "step": 84500 }, { "epoch": 11.62, "learning_rate": 4.9422459162053176e-05, "loss": 2.7211, "step": 85000 }, { "epoch": 11.69, "learning_rate": 4.941904176064521e-05, "loss": 2.7282, "step": 85500 }, { "epoch": 11.76, "learning_rate": 4.941562435923724e-05, "loss": 2.7272, "step": 86000 }, { "epoch": 11.82, "learning_rate": 4.941220695782927e-05, "loss": 2.7325, "step": 86500 }, { "epoch": 11.89, "learning_rate": 4.94087895564213e-05, "loss": 2.7275, "step": 87000 }, { "epoch": 11.96, "learning_rate": 4.940537215501333e-05, "loss": 2.7303, "step": 87500 }, { "epoch": 12.03, "learning_rate": 4.940195475360536e-05, "loss": 2.7091, "step": 88000 }, { "epoch": 12.1, "learning_rate": 4.9398537352197397e-05, "loss": 2.6863, "step": 88500 }, { "epoch": 12.17, "learning_rate": 4.939511995078942e-05, "loss": 2.6867, "step": 89000 }, { "epoch": 12.23, "learning_rate": 4.939170254938145e-05, "loss": 2.6934, "step": 89500 }, { "epoch": 12.3, "learning_rate": 4.9388285147973486e-05, "loss": 2.6936, "step": 90000 }, { "epoch": 12.37, "learning_rate": 4.9384867746565514e-05, "loss": 2.6953, "step": 90500 }, { "epoch": 12.44, "learning_rate": 4.938145034515754e-05, "loss": 2.6973, "step": 91000 }, { "epoch": 12.51, "learning_rate": 4.9378032943749576e-05, "loss": 2.7022, "step": 91500 }, { "epoch": 12.58, "learning_rate": 4.9374615542341604e-05, "loss": 2.6977, "step": 92000 }, { "epoch": 12.64, "learning_rate": 4.937119814093364e-05, "loss": 2.6996, "step": 92500 }, { "epoch": 12.71, "learning_rate": 4.9367780739525666e-05, "loss": 2.7008, "step": 93000 }, { "epoch": 12.78, "learning_rate": 4.93643633381177e-05, "loss": 2.7091, "step": 93500 }, { "epoch": 12.85, "learning_rate": 4.936094593670973e-05, "loss": 2.7057, "step": 94000 }, { "epoch": 12.92, "learning_rate": 4.935752853530176e-05, "loss": 2.7084, "step": 94500 }, { "epoch": 12.99, "learning_rate": 4.935411113389379e-05, "loss": 2.7114, "step": 95000 }, { "epoch": 13.05, "learning_rate": 4.935069373248582e-05, "loss": 2.6727, "step": 95500 }, { "epoch": 13.12, "learning_rate": 4.934727633107785e-05, "loss": 2.6586, "step": 96000 }, { "epoch": 13.19, "learning_rate": 4.934385892966989e-05, "loss": 2.6631, "step": 96500 }, { "epoch": 13.26, "learning_rate": 4.934044152826191e-05, "loss": 2.6632, "step": 97000 }, { "epoch": 13.33, "learning_rate": 4.933702412685394e-05, "loss": 2.6727, "step": 97500 }, { "epoch": 13.4, "learning_rate": 4.933360672544598e-05, "loss": 2.6671, "step": 98000 }, { "epoch": 13.46, "learning_rate": 4.9330189324038004e-05, "loss": 2.686, "step": 98500 }, { "epoch": 13.53, "learning_rate": 4.932677192263003e-05, "loss": 2.6796, "step": 99000 }, { "epoch": 13.6, "learning_rate": 4.9323354521222067e-05, "loss": 2.6745, "step": 99500 }, { "epoch": 13.67, "learning_rate": 4.9319937119814094e-05, "loss": 2.6825, "step": 100000 }, { "epoch": 13.74, "learning_rate": 4.931651971840613e-05, "loss": 2.6857, "step": 100500 }, { "epoch": 13.81, "learning_rate": 4.9313102316998156e-05, "loss": 2.6812, "step": 101000 }, { "epoch": 13.87, "learning_rate": 4.9309684915590184e-05, "loss": 2.6805, "step": 101500 }, { "epoch": 13.94, "learning_rate": 4.930626751418222e-05, "loss": 2.6857, "step": 102000 }, { "epoch": 14.01, "learning_rate": 4.930285011277425e-05, "loss": 2.6768, "step": 102500 }, { "epoch": 14.08, "learning_rate": 4.929943271136628e-05, "loss": 2.6435, "step": 103000 }, { "epoch": 14.15, "learning_rate": 4.929601530995831e-05, "loss": 2.6508, "step": 103500 }, { "epoch": 14.22, "learning_rate": 4.929259790855034e-05, "loss": 2.6444, "step": 104000 }, { "epoch": 14.28, "learning_rate": 4.928918050714238e-05, "loss": 2.6396, "step": 104500 }, { "epoch": 14.35, "learning_rate": 4.92857631057344e-05, "loss": 2.6452, "step": 105000 }, { "epoch": 14.42, "learning_rate": 4.928234570432643e-05, "loss": 2.6535, "step": 105500 }, { "epoch": 14.49, "learning_rate": 4.927892830291847e-05, "loss": 2.6482, "step": 106000 }, { "epoch": 14.56, "learning_rate": 4.9275510901510495e-05, "loss": 2.6567, "step": 106500 }, { "epoch": 14.63, "learning_rate": 4.927209350010252e-05, "loss": 2.6589, "step": 107000 }, { "epoch": 14.69, "learning_rate": 4.926867609869456e-05, "loss": 2.6564, "step": 107500 }, { "epoch": 14.76, "learning_rate": 4.9265258697286585e-05, "loss": 2.6579, "step": 108000 }, { "epoch": 14.83, "learning_rate": 4.926184129587861e-05, "loss": 2.658, "step": 108500 }, { "epoch": 14.9, "learning_rate": 4.925842389447065e-05, "loss": 2.6658, "step": 109000 }, { "epoch": 14.97, "learning_rate": 4.9255006493062674e-05, "loss": 2.6535, "step": 109500 }, { "epoch": 15.04, "learning_rate": 4.925158909165471e-05, "loss": 2.6404, "step": 110000 }, { "epoch": 15.1, "learning_rate": 4.9248171690246737e-05, "loss": 2.6185, "step": 110500 }, { "epoch": 15.17, "learning_rate": 4.924475428883877e-05, "loss": 2.6158, "step": 111000 }, { "epoch": 15.24, "learning_rate": 4.92413368874308e-05, "loss": 2.621, "step": 111500 }, { "epoch": 15.31, "learning_rate": 4.923791948602283e-05, "loss": 2.6296, "step": 112000 }, { "epoch": 15.38, "learning_rate": 4.923450208461486e-05, "loss": 2.6284, "step": 112500 }, { "epoch": 15.45, "learning_rate": 4.923108468320689e-05, "loss": 2.6252, "step": 113000 }, { "epoch": 15.51, "learning_rate": 4.922766728179892e-05, "loss": 2.6355, "step": 113500 }, { "epoch": 15.58, "learning_rate": 4.922424988039096e-05, "loss": 2.6337, "step": 114000 }, { "epoch": 15.65, "learning_rate": 4.922083247898298e-05, "loss": 2.6337, "step": 114500 }, { "epoch": 15.72, "learning_rate": 4.921741507757501e-05, "loss": 2.6354, "step": 115000 }, { "epoch": 15.79, "learning_rate": 4.921399767616705e-05, "loss": 2.6371, "step": 115500 }, { "epoch": 15.86, "learning_rate": 4.9210580274759075e-05, "loss": 2.6427, "step": 116000 }, { "epoch": 15.92, "learning_rate": 4.92071628733511e-05, "loss": 2.639, "step": 116500 }, { "epoch": 15.99, "learning_rate": 4.920374547194314e-05, "loss": 2.6426, "step": 117000 }, { "epoch": 16.06, "learning_rate": 4.9200328070535165e-05, "loss": 2.5977, "step": 117500 }, { "epoch": 16.13, "learning_rate": 4.91969106691272e-05, "loss": 2.5961, "step": 118000 }, { "epoch": 16.2, "learning_rate": 4.919349326771923e-05, "loss": 2.6007, "step": 118500 }, { "epoch": 16.27, "learning_rate": 4.919007586631126e-05, "loss": 2.6015, "step": 119000 }, { "epoch": 16.33, "learning_rate": 4.918665846490329e-05, "loss": 2.6064, "step": 119500 }, { "epoch": 16.4, "learning_rate": 4.9183241063495324e-05, "loss": 2.6125, "step": 120000 } ], "max_steps": 7316000, "num_train_epochs": 1000, "total_flos": 241553911465377792, "trial_name": null, "trial_params": null }