{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 451623, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.920484962997953e-07, "loss": 10.3537, "step": 500 }, { "epoch": 0.0, "learning_rate": 9.840969925995906e-07, "loss": 3.8759, "step": 1000 }, { "epoch": 0.0, "learning_rate": 1.4761454888993861e-06, "loss": 3.3657, "step": 1500 }, { "epoch": 0.0, "learning_rate": 1.9681939851991812e-06, "loss": 3.2292, "step": 2000 }, { "epoch": 0.0, "learning_rate": 2.4602424814989765e-06, "loss": 3.1654, "step": 2500 }, { "epoch": 0.01, "learning_rate": 2.9522909777987723e-06, "loss": 3.1135, "step": 3000 }, { "epoch": 0.01, "learning_rate": 3.444339474098567e-06, "loss": 3.0643, "step": 3500 }, { "epoch": 0.01, "learning_rate": 3.9363879703983625e-06, "loss": 3.0263, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.428436466698158e-06, "loss": 2.9939, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.920484962997953e-06, "loss": 2.9628, "step": 5000 }, { "epoch": 0.01, "learning_rate": 5.412533459297749e-06, "loss": 2.9285, "step": 5500 }, { "epoch": 0.01, "learning_rate": 5.9045819555975445e-06, "loss": 2.897, "step": 6000 }, { "epoch": 0.01, "learning_rate": 6.396630451897339e-06, "loss": 2.8763, "step": 6500 }, { "epoch": 0.01, "learning_rate": 6.888678948197134e-06, "loss": 2.8517, "step": 7000 }, { "epoch": 0.01, "learning_rate": 7.380727444496929e-06, "loss": 2.8288, "step": 7500 }, { "epoch": 0.02, "learning_rate": 7.872775940796725e-06, "loss": 2.8043, "step": 8000 }, { "epoch": 0.02, "learning_rate": 8.36482443709652e-06, "loss": 2.7839, "step": 8500 }, { "epoch": 0.02, "learning_rate": 8.856872933396316e-06, "loss": 2.7614, "step": 9000 }, { "epoch": 0.02, "learning_rate": 9.348921429696111e-06, "loss": 2.7452, "step": 9500 }, { "epoch": 0.02, "learning_rate": 9.840969925995906e-06, "loss": 2.7313, "step": 10000 }, { "epoch": 0.02, "learning_rate": 1.0333018422295703e-05, "loss": 2.7144, "step": 10500 }, { "epoch": 0.02, "learning_rate": 1.0825066918595498e-05, "loss": 2.6997, "step": 11000 }, { "epoch": 0.02, "learning_rate": 1.1317115414895292e-05, "loss": 2.6785, "step": 11500 }, { "epoch": 0.02, "learning_rate": 1.1809163911195089e-05, "loss": 2.6693, "step": 12000 }, { "epoch": 0.02, "learning_rate": 1.2301212407494884e-05, "loss": 2.6494, "step": 12500 }, { "epoch": 0.03, "learning_rate": 1.2793260903794679e-05, "loss": 2.6353, "step": 13000 }, { "epoch": 0.03, "learning_rate": 1.3285309400094472e-05, "loss": 2.6276, "step": 13500 }, { "epoch": 0.03, "learning_rate": 1.3777357896394269e-05, "loss": 2.6187, "step": 14000 }, { "epoch": 0.03, "learning_rate": 1.4269406392694065e-05, "loss": 2.6011, "step": 14500 }, { "epoch": 0.03, "learning_rate": 1.4761454888993858e-05, "loss": 2.5932, "step": 15000 }, { "epoch": 0.03, "learning_rate": 1.5253503385293655e-05, "loss": 2.5815, "step": 15500 }, { "epoch": 0.03, "learning_rate": 1.574555188159345e-05, "loss": 2.5736, "step": 16000 }, { "epoch": 0.03, "learning_rate": 1.6237600377893248e-05, "loss": 2.5592, "step": 16500 }, { "epoch": 0.03, "learning_rate": 1.672964887419304e-05, "loss": 2.5488, "step": 17000 }, { "epoch": 0.03, "learning_rate": 1.7221697370492838e-05, "loss": 2.5394, "step": 17500 }, { "epoch": 0.04, "learning_rate": 1.7713745866792633e-05, "loss": 2.5307, "step": 18000 }, { "epoch": 0.04, "learning_rate": 1.8205794363092428e-05, "loss": 2.5219, "step": 18500 }, { "epoch": 0.04, "learning_rate": 1.8697842859392223e-05, "loss": 2.514, "step": 19000 }, { "epoch": 0.04, "learning_rate": 1.9189891355692017e-05, "loss": 2.5038, "step": 19500 }, { "epoch": 0.04, "learning_rate": 1.9681939851991812e-05, "loss": 2.4932, "step": 20000 }, { "epoch": 0.04, "learning_rate": 2.0173988348291607e-05, "loss": 2.4895, "step": 20500 }, { "epoch": 0.04, "learning_rate": 2.0666036844591405e-05, "loss": 2.4754, "step": 21000 }, { "epoch": 0.04, "learning_rate": 2.1158085340891197e-05, "loss": 2.4703, "step": 21500 }, { "epoch": 0.04, "learning_rate": 2.1650133837190995e-05, "loss": 2.4602, "step": 22000 }, { "epoch": 0.04, "learning_rate": 2.214218233349079e-05, "loss": 2.4609, "step": 22500 }, { "epoch": 0.05, "learning_rate": 2.2634230829790585e-05, "loss": 2.4458, "step": 23000 }, { "epoch": 0.05, "learning_rate": 2.312627932609038e-05, "loss": 2.439, "step": 23500 }, { "epoch": 0.05, "learning_rate": 2.3618327822390178e-05, "loss": 2.4354, "step": 24000 }, { "epoch": 0.05, "learning_rate": 2.411037631868997e-05, "loss": 2.4263, "step": 24500 }, { "epoch": 0.05, "learning_rate": 2.4602424814989768e-05, "loss": 2.4238, "step": 25000 }, { "epoch": 0.05, "learning_rate": 2.5094473311289563e-05, "loss": 2.4135, "step": 25500 }, { "epoch": 0.05, "learning_rate": 2.5586521807589358e-05, "loss": 2.4098, "step": 26000 }, { "epoch": 0.05, "learning_rate": 2.6078570303889156e-05, "loss": 2.4012, "step": 26500 }, { "epoch": 0.05, "learning_rate": 2.6570618800188944e-05, "loss": 2.3966, "step": 27000 }, { "epoch": 0.05, "learning_rate": 2.7062667296488742e-05, "loss": 2.386, "step": 27500 }, { "epoch": 0.06, "learning_rate": 2.7554715792788537e-05, "loss": 2.3838, "step": 28000 }, { "epoch": 0.06, "learning_rate": 2.8046764289088335e-05, "loss": 2.3787, "step": 28500 }, { "epoch": 0.06, "learning_rate": 2.853881278538813e-05, "loss": 2.3711, "step": 29000 }, { "epoch": 0.06, "learning_rate": 2.9030861281687925e-05, "loss": 2.365, "step": 29500 }, { "epoch": 0.06, "learning_rate": 2.9522909777987717e-05, "loss": 2.3625, "step": 30000 }, { "epoch": 0.06, "learning_rate": 3.0014958274287515e-05, "loss": 2.3555, "step": 30500 }, { "epoch": 0.06, "learning_rate": 3.050700677058731e-05, "loss": 2.349, "step": 31000 }, { "epoch": 0.06, "learning_rate": 3.099905526688711e-05, "loss": 2.3439, "step": 31500 }, { "epoch": 0.06, "learning_rate": 3.14911037631869e-05, "loss": 2.3382, "step": 32000 }, { "epoch": 0.06, "learning_rate": 3.19831522594867e-05, "loss": 2.3355, "step": 32500 }, { "epoch": 0.06, "learning_rate": 3.2475200755786496e-05, "loss": 2.3265, "step": 33000 }, { "epoch": 0.07, "learning_rate": 3.296724925208629e-05, "loss": 2.3289, "step": 33500 }, { "epoch": 0.07, "learning_rate": 3.345929774838608e-05, "loss": 2.3275, "step": 34000 }, { "epoch": 0.07, "learning_rate": 3.395134624468588e-05, "loss": 2.3242, "step": 34500 }, { "epoch": 0.07, "learning_rate": 3.4443394740985676e-05, "loss": 2.311, "step": 35000 }, { "epoch": 0.07, "learning_rate": 3.493544323728547e-05, "loss": 2.3148, "step": 35500 }, { "epoch": 0.07, "learning_rate": 3.5427491733585265e-05, "loss": 2.3088, "step": 36000 }, { "epoch": 0.07, "learning_rate": 3.591954022988506e-05, "loss": 2.301, "step": 36500 }, { "epoch": 0.07, "learning_rate": 3.6411588726184855e-05, "loss": 2.2969, "step": 37000 }, { "epoch": 0.07, "learning_rate": 3.690363722248465e-05, "loss": 2.2989, "step": 37500 }, { "epoch": 0.07, "learning_rate": 3.7395685718784445e-05, "loss": 2.2882, "step": 38000 }, { "epoch": 0.08, "learning_rate": 3.788773421508424e-05, "loss": 2.2869, "step": 38500 }, { "epoch": 0.08, "learning_rate": 3.8379782711384035e-05, "loss": 2.279, "step": 39000 }, { "epoch": 0.08, "learning_rate": 3.8871831207683826e-05, "loss": 2.2817, "step": 39500 }, { "epoch": 0.08, "learning_rate": 3.9363879703983625e-05, "loss": 2.2769, "step": 40000 }, { "epoch": 0.08, "learning_rate": 3.985592820028342e-05, "loss": 2.271, "step": 40500 }, { "epoch": 0.08, "learning_rate": 4.0347976696583214e-05, "loss": 2.2703, "step": 41000 }, { "epoch": 0.08, "learning_rate": 4.084002519288301e-05, "loss": 2.2644, "step": 41500 }, { "epoch": 0.08, "learning_rate": 4.133207368918281e-05, "loss": 2.2578, "step": 42000 }, { "epoch": 0.08, "learning_rate": 4.18241221854826e-05, "loss": 2.2568, "step": 42500 }, { "epoch": 0.08, "learning_rate": 4.2316170681782394e-05, "loss": 2.2565, "step": 43000 }, { "epoch": 0.09, "learning_rate": 4.280821917808219e-05, "loss": 2.2539, "step": 43500 }, { "epoch": 0.09, "learning_rate": 4.330026767438199e-05, "loss": 2.2493, "step": 44000 }, { "epoch": 0.09, "learning_rate": 4.379231617068179e-05, "loss": 2.2488, "step": 44500 }, { "epoch": 0.09, "learning_rate": 4.428436466698158e-05, "loss": 2.2388, "step": 45000 }, { "epoch": 0.09, "learning_rate": 4.477641316328138e-05, "loss": 2.2368, "step": 45500 }, { "epoch": 0.09, "learning_rate": 4.526846165958117e-05, "loss": 2.2366, "step": 46000 }, { "epoch": 0.09, "learning_rate": 4.576051015588096e-05, "loss": 2.2323, "step": 46500 }, { "epoch": 0.09, "learning_rate": 4.625255865218076e-05, "loss": 2.2288, "step": 47000 }, { "epoch": 0.09, "learning_rate": 4.674460714848056e-05, "loss": 2.23, "step": 47500 }, { "epoch": 0.09, "learning_rate": 4.7236655644780356e-05, "loss": 2.223, "step": 48000 }, { "epoch": 0.1, "learning_rate": 4.772870414108015e-05, "loss": 2.2232, "step": 48500 }, { "epoch": 0.1, "learning_rate": 4.822075263737994e-05, "loss": 2.2188, "step": 49000 }, { "epoch": 0.1, "learning_rate": 4.871280113367974e-05, "loss": 2.217, "step": 49500 }, { "epoch": 0.1, "learning_rate": 4.9204849629979536e-05, "loss": 2.2118, "step": 50000 }, { "epoch": 0.1, "learning_rate": 4.969689812627933e-05, "loss": 2.2089, "step": 50500 }, { "epoch": 0.1, "learning_rate": 4.997900574717671e-05, "loss": 2.2101, "step": 51000 }, { "epoch": 0.1, "learning_rate": 4.992433321378273e-05, "loss": 2.2048, "step": 51500 }, { "epoch": 0.1, "learning_rate": 4.986966068038875e-05, "loss": 2.2026, "step": 52000 }, { "epoch": 0.1, "learning_rate": 4.981498814699476e-05, "loss": 2.2, "step": 52500 }, { "epoch": 0.1, "learning_rate": 4.976031561360078e-05, "loss": 2.1998, "step": 53000 }, { "epoch": 0.11, "learning_rate": 4.97056430802068e-05, "loss": 2.1905, "step": 53500 }, { "epoch": 0.11, "learning_rate": 4.965097054681281e-05, "loss": 2.1896, "step": 54000 }, { "epoch": 0.11, "learning_rate": 4.959629801341883e-05, "loss": 2.1886, "step": 54500 }, { "epoch": 0.11, "learning_rate": 4.954162548002484e-05, "loss": 2.1825, "step": 55000 }, { "epoch": 0.11, "learning_rate": 4.948695294663086e-05, "loss": 2.1807, "step": 55500 }, { "epoch": 0.11, "learning_rate": 4.943228041323688e-05, "loss": 2.1814, "step": 56000 }, { "epoch": 0.11, "learning_rate": 4.9377607879842896e-05, "loss": 2.1797, "step": 56500 }, { "epoch": 0.11, "learning_rate": 4.9322935346448914e-05, "loss": 2.1714, "step": 57000 }, { "epoch": 0.11, "learning_rate": 4.9268262813054925e-05, "loss": 2.1766, "step": 57500 }, { "epoch": 0.11, "learning_rate": 4.921359027966094e-05, "loss": 2.1725, "step": 58000 }, { "epoch": 0.12, "learning_rate": 4.915891774626696e-05, "loss": 2.1672, "step": 58500 }, { "epoch": 0.12, "learning_rate": 4.910424521287298e-05, "loss": 2.1707, "step": 59000 }, { "epoch": 0.12, "learning_rate": 4.9049572679479e-05, "loss": 2.1636, "step": 59500 }, { "epoch": 0.12, "learning_rate": 4.899490014608501e-05, "loss": 2.1627, "step": 60000 }, { "epoch": 0.12, "learning_rate": 4.894022761269103e-05, "loss": 2.1598, "step": 60500 }, { "epoch": 0.12, "learning_rate": 4.8885555079297044e-05, "loss": 2.1535, "step": 61000 }, { "epoch": 0.12, "learning_rate": 4.883088254590306e-05, "loss": 2.1531, "step": 61500 }, { "epoch": 0.12, "learning_rate": 4.8776210012509074e-05, "loss": 2.1548, "step": 62000 }, { "epoch": 0.12, "learning_rate": 4.872153747911509e-05, "loss": 2.1451, "step": 62500 }, { "epoch": 0.12, "learning_rate": 4.8666864945721116e-05, "loss": 2.1497, "step": 63000 }, { "epoch": 0.12, "learning_rate": 4.861219241232713e-05, "loss": 2.1474, "step": 63500 }, { "epoch": 0.13, "learning_rate": 4.8557519878933146e-05, "loss": 2.1442, "step": 64000 }, { "epoch": 0.13, "learning_rate": 4.850284734553916e-05, "loss": 2.1417, "step": 64500 }, { "epoch": 0.13, "learning_rate": 4.8448174812145175e-05, "loss": 2.1426, "step": 65000 }, { "epoch": 0.13, "learning_rate": 4.839350227875119e-05, "loss": 2.1396, "step": 65500 }, { "epoch": 0.13, "learning_rate": 4.833882974535721e-05, "loss": 2.1371, "step": 66000 }, { "epoch": 0.13, "learning_rate": 4.828415721196323e-05, "loss": 2.1352, "step": 66500 }, { "epoch": 0.13, "learning_rate": 4.822948467856924e-05, "loss": 2.1342, "step": 67000 }, { "epoch": 0.13, "learning_rate": 4.8174812145175265e-05, "loss": 2.1329, "step": 67500 }, { "epoch": 0.13, "learning_rate": 4.8120139611781276e-05, "loss": 2.1272, "step": 68000 }, { "epoch": 0.13, "learning_rate": 4.8065467078387294e-05, "loss": 2.1278, "step": 68500 }, { "epoch": 0.14, "learning_rate": 4.801079454499331e-05, "loss": 2.1251, "step": 69000 }, { "epoch": 0.14, "learning_rate": 4.7956122011599323e-05, "loss": 2.1222, "step": 69500 }, { "epoch": 0.14, "learning_rate": 4.790144947820535e-05, "loss": 2.1157, "step": 70000 }, { "epoch": 0.14, "learning_rate": 4.784677694481136e-05, "loss": 2.1234, "step": 70500 }, { "epoch": 0.14, "learning_rate": 4.779210441141738e-05, "loss": 2.1167, "step": 71000 }, { "epoch": 0.14, "learning_rate": 4.773743187802339e-05, "loss": 2.1182, "step": 71500 }, { "epoch": 0.14, "learning_rate": 4.768275934462941e-05, "loss": 2.121, "step": 72000 }, { "epoch": 0.14, "learning_rate": 4.762808681123543e-05, "loss": 2.1115, "step": 72500 }, { "epoch": 0.14, "learning_rate": 4.757341427784144e-05, "loss": 2.1171, "step": 73000 }, { "epoch": 0.14, "learning_rate": 4.751874174444746e-05, "loss": 2.11, "step": 73500 }, { "epoch": 0.15, "learning_rate": 4.746406921105347e-05, "loss": 2.1108, "step": 74000 }, { "epoch": 0.15, "learning_rate": 4.74093966776595e-05, "loss": 2.1058, "step": 74500 }, { "epoch": 0.15, "learning_rate": 4.735472414426551e-05, "loss": 2.1005, "step": 75000 }, { "epoch": 0.15, "learning_rate": 4.7300051610871526e-05, "loss": 2.1043, "step": 75500 }, { "epoch": 0.15, "learning_rate": 4.7245379077477544e-05, "loss": 2.1027, "step": 76000 }, { "epoch": 0.15, "learning_rate": 4.7190706544083555e-05, "loss": 2.0973, "step": 76500 }, { "epoch": 0.15, "learning_rate": 4.713603401068958e-05, "loss": 2.0947, "step": 77000 }, { "epoch": 0.15, "learning_rate": 4.708136147729559e-05, "loss": 2.1008, "step": 77500 }, { "epoch": 0.15, "learning_rate": 4.702668894390161e-05, "loss": 2.0995, "step": 78000 }, { "epoch": 0.15, "learning_rate": 4.697201641050763e-05, "loss": 2.0932, "step": 78500 }, { "epoch": 0.16, "learning_rate": 4.691734387711364e-05, "loss": 2.0966, "step": 79000 }, { "epoch": 0.16, "learning_rate": 4.686267134371966e-05, "loss": 2.0932, "step": 79500 }, { "epoch": 0.16, "learning_rate": 4.6807998810325674e-05, "loss": 2.0949, "step": 80000 }, { "epoch": 0.16, "learning_rate": 4.675332627693169e-05, "loss": 2.0912, "step": 80500 }, { "epoch": 0.16, "learning_rate": 4.669865374353771e-05, "loss": 2.0885, "step": 81000 }, { "epoch": 0.16, "learning_rate": 4.664398121014373e-05, "loss": 2.0904, "step": 81500 }, { "epoch": 0.16, "learning_rate": 4.6589308676749746e-05, "loss": 2.0858, "step": 82000 }, { "epoch": 0.16, "learning_rate": 4.653463614335576e-05, "loss": 2.0828, "step": 82500 }, { "epoch": 0.16, "learning_rate": 4.6479963609961776e-05, "loss": 2.0827, "step": 83000 }, { "epoch": 0.16, "learning_rate": 4.642529107656779e-05, "loss": 2.0816, "step": 83500 }, { "epoch": 0.17, "learning_rate": 4.637061854317381e-05, "loss": 2.0789, "step": 84000 }, { "epoch": 0.17, "learning_rate": 4.631594600977982e-05, "loss": 2.0789, "step": 84500 }, { "epoch": 0.17, "learning_rate": 4.626127347638584e-05, "loss": 2.0811, "step": 85000 }, { "epoch": 0.17, "learning_rate": 4.620660094299186e-05, "loss": 2.0819, "step": 85500 }, { "epoch": 0.17, "learning_rate": 4.615192840959787e-05, "loss": 2.0768, "step": 86000 }, { "epoch": 0.17, "learning_rate": 4.6097255876203895e-05, "loss": 2.0715, "step": 86500 }, { "epoch": 0.17, "learning_rate": 4.6042583342809906e-05, "loss": 2.0742, "step": 87000 }, { "epoch": 0.17, "learning_rate": 4.5987910809415924e-05, "loss": 2.0699, "step": 87500 }, { "epoch": 0.17, "learning_rate": 4.593323827602194e-05, "loss": 2.0743, "step": 88000 }, { "epoch": 0.17, "learning_rate": 4.587856574262796e-05, "loss": 2.0694, "step": 88500 }, { "epoch": 0.18, "learning_rate": 4.582389320923398e-05, "loss": 2.0675, "step": 89000 }, { "epoch": 0.18, "learning_rate": 4.576922067583999e-05, "loss": 2.0656, "step": 89500 }, { "epoch": 0.18, "learning_rate": 4.571454814244601e-05, "loss": 2.0637, "step": 90000 }, { "epoch": 0.18, "learning_rate": 4.5659875609052025e-05, "loss": 2.0667, "step": 90500 }, { "epoch": 0.18, "learning_rate": 4.560520307565804e-05, "loss": 2.062, "step": 91000 }, { "epoch": 0.18, "learning_rate": 4.555053054226406e-05, "loss": 2.0659, "step": 91500 }, { "epoch": 0.18, "learning_rate": 4.549585800887007e-05, "loss": 2.0597, "step": 92000 }, { "epoch": 0.18, "learning_rate": 4.544118547547609e-05, "loss": 2.067, "step": 92500 }, { "epoch": 0.18, "learning_rate": 4.53865129420821e-05, "loss": 2.0593, "step": 93000 }, { "epoch": 0.18, "learning_rate": 4.5331840408688126e-05, "loss": 2.0562, "step": 93500 }, { "epoch": 0.19, "learning_rate": 4.527716787529414e-05, "loss": 2.0589, "step": 94000 }, { "epoch": 0.19, "learning_rate": 4.5222495341900156e-05, "loss": 2.0566, "step": 94500 }, { "epoch": 0.19, "learning_rate": 4.5167822808506174e-05, "loss": 2.0576, "step": 95000 }, { "epoch": 0.19, "learning_rate": 4.511315027511219e-05, "loss": 2.058, "step": 95500 }, { "epoch": 0.19, "learning_rate": 4.505847774171821e-05, "loss": 2.0528, "step": 96000 }, { "epoch": 0.19, "learning_rate": 4.500380520832422e-05, "loss": 2.0562, "step": 96500 }, { "epoch": 0.19, "learning_rate": 4.494913267493024e-05, "loss": 2.049, "step": 97000 }, { "epoch": 0.19, "learning_rate": 4.489446014153626e-05, "loss": 2.0536, "step": 97500 }, { "epoch": 0.19, "learning_rate": 4.4839787608142275e-05, "loss": 2.0538, "step": 98000 }, { "epoch": 0.19, "learning_rate": 4.478511507474829e-05, "loss": 2.0466, "step": 98500 }, { "epoch": 0.19, "learning_rate": 4.4730442541354304e-05, "loss": 2.049, "step": 99000 }, { "epoch": 0.2, "learning_rate": 4.467577000796032e-05, "loss": 2.0455, "step": 99500 }, { "epoch": 0.2, "learning_rate": 4.462109747456634e-05, "loss": 2.0441, "step": 100000 }, { "epoch": 0.2, "learning_rate": 4.456642494117236e-05, "loss": 2.0477, "step": 100500 }, { "epoch": 0.2, "learning_rate": 4.4511752407778376e-05, "loss": 2.0425, "step": 101000 }, { "epoch": 0.2, "learning_rate": 4.445707987438439e-05, "loss": 2.0475, "step": 101500 }, { "epoch": 0.2, "learning_rate": 4.4402407340990405e-05, "loss": 2.0482, "step": 102000 }, { "epoch": 0.2, "learning_rate": 4.434773480759642e-05, "loss": 2.0483, "step": 102500 }, { "epoch": 0.2, "learning_rate": 4.429306227420244e-05, "loss": 2.0375, "step": 103000 }, { "epoch": 0.2, "learning_rate": 4.423838974080845e-05, "loss": 2.0384, "step": 103500 }, { "epoch": 0.2, "learning_rate": 4.418371720741447e-05, "loss": 2.0383, "step": 104000 }, { "epoch": 0.21, "learning_rate": 4.412904467402049e-05, "loss": 2.037, "step": 104500 }, { "epoch": 0.21, "learning_rate": 4.4074372140626507e-05, "loss": 2.0315, "step": 105000 }, { "epoch": 0.21, "learning_rate": 4.4019699607232525e-05, "loss": 2.0375, "step": 105500 }, { "epoch": 0.21, "learning_rate": 4.3965027073838536e-05, "loss": 2.0414, "step": 106000 }, { "epoch": 0.21, "learning_rate": 4.3910354540444554e-05, "loss": 2.0324, "step": 106500 }, { "epoch": 0.21, "learning_rate": 4.385568200705057e-05, "loss": 2.0316, "step": 107000 }, { "epoch": 0.21, "learning_rate": 4.380100947365659e-05, "loss": 2.0343, "step": 107500 }, { "epoch": 0.21, "learning_rate": 4.374633694026261e-05, "loss": 2.0314, "step": 108000 }, { "epoch": 0.21, "learning_rate": 4.369166440686862e-05, "loss": 2.0347, "step": 108500 }, { "epoch": 0.21, "learning_rate": 4.363699187347464e-05, "loss": 2.0268, "step": 109000 }, { "epoch": 0.22, "learning_rate": 4.3582319340080655e-05, "loss": 2.0323, "step": 109500 }, { "epoch": 0.22, "learning_rate": 4.352764680668667e-05, "loss": 2.03, "step": 110000 }, { "epoch": 0.22, "learning_rate": 4.347297427329269e-05, "loss": 2.0231, "step": 110500 }, { "epoch": 0.22, "learning_rate": 4.34183017398987e-05, "loss": 2.0279, "step": 111000 }, { "epoch": 0.22, "learning_rate": 4.336362920650472e-05, "loss": 2.0298, "step": 111500 }, { "epoch": 0.22, "learning_rate": 4.330895667311074e-05, "loss": 2.0408, "step": 112000 }, { "epoch": 0.22, "learning_rate": 4.3254284139716756e-05, "loss": 2.0199, "step": 112500 }, { "epoch": 0.22, "learning_rate": 4.319961160632277e-05, "loss": 2.0297, "step": 113000 }, { "epoch": 0.22, "learning_rate": 4.3144939072928786e-05, "loss": 2.0287, "step": 113500 }, { "epoch": 0.22, "learning_rate": 4.309026653953481e-05, "loss": 2.0256, "step": 114000 }, { "epoch": 0.23, "learning_rate": 4.303559400614082e-05, "loss": 2.0197, "step": 114500 }, { "epoch": 0.23, "learning_rate": 4.298092147274684e-05, "loss": 2.0234, "step": 115000 }, { "epoch": 0.23, "learning_rate": 4.292624893935285e-05, "loss": 2.0391, "step": 115500 }, { "epoch": 0.23, "learning_rate": 4.287157640595887e-05, "loss": 2.0772, "step": 116000 }, { "epoch": 0.23, "learning_rate": 4.281690387256489e-05, "loss": 2.0586, "step": 116500 }, { "epoch": 0.23, "learning_rate": 4.2762231339170905e-05, "loss": 2.0336, "step": 117000 }, { "epoch": 0.23, "learning_rate": 4.270755880577692e-05, "loss": 2.0207, "step": 117500 }, { "epoch": 0.23, "learning_rate": 4.2652886272382934e-05, "loss": 2.0197, "step": 118000 }, { "epoch": 0.23, "learning_rate": 4.259821373898895e-05, "loss": 2.0184, "step": 118500 }, { "epoch": 0.23, "learning_rate": 4.254354120559497e-05, "loss": 2.0162, "step": 119000 }, { "epoch": 0.24, "learning_rate": 4.248886867220099e-05, "loss": 2.0139, "step": 119500 }, { "epoch": 0.24, "learning_rate": 4.2434196138807006e-05, "loss": 2.0141, "step": 120000 }, { "epoch": 0.24, "learning_rate": 4.237952360541302e-05, "loss": 2.0119, "step": 120500 }, { "epoch": 0.24, "learning_rate": 4.232485107201904e-05, "loss": 2.0109, "step": 121000 }, { "epoch": 0.24, "learning_rate": 4.227017853862505e-05, "loss": 2.0112, "step": 121500 }, { "epoch": 0.24, "learning_rate": 4.221550600523107e-05, "loss": 2.0106, "step": 122000 }, { "epoch": 0.24, "learning_rate": 4.216083347183708e-05, "loss": 2.0115, "step": 122500 }, { "epoch": 0.24, "learning_rate": 4.21061609384431e-05, "loss": 2.0164, "step": 123000 }, { "epoch": 0.24, "learning_rate": 4.2051488405049125e-05, "loss": 2.0055, "step": 123500 }, { "epoch": 0.24, "learning_rate": 4.1996815871655136e-05, "loss": 2.0117, "step": 124000 }, { "epoch": 0.25, "learning_rate": 4.1942143338261154e-05, "loss": 2.008, "step": 124500 }, { "epoch": 0.25, "learning_rate": 4.1887470804867166e-05, "loss": 2.0077, "step": 125000 }, { "epoch": 0.25, "learning_rate": 4.1832798271473184e-05, "loss": 2.009, "step": 125500 }, { "epoch": 0.25, "learning_rate": 4.17781257380792e-05, "loss": 2.0096, "step": 126000 }, { "epoch": 0.25, "learning_rate": 4.172345320468522e-05, "loss": 2.0025, "step": 126500 }, { "epoch": 0.25, "learning_rate": 4.166878067129124e-05, "loss": 2.0013, "step": 127000 }, { "epoch": 0.25, "learning_rate": 4.161410813789725e-05, "loss": 2.0031, "step": 127500 }, { "epoch": 0.25, "learning_rate": 4.1559435604503274e-05, "loss": 2.0039, "step": 128000 }, { "epoch": 0.25, "learning_rate": 4.1504763071109285e-05, "loss": 2.0002, "step": 128500 }, { "epoch": 0.25, "learning_rate": 4.14500905377153e-05, "loss": 2.0007, "step": 129000 }, { "epoch": 0.25, "learning_rate": 4.139541800432132e-05, "loss": 2.0078, "step": 129500 }, { "epoch": 0.26, "learning_rate": 4.134074547092733e-05, "loss": 1.9981, "step": 130000 }, { "epoch": 0.26, "learning_rate": 4.128607293753336e-05, "loss": 1.9989, "step": 130500 }, { "epoch": 0.26, "learning_rate": 4.123140040413937e-05, "loss": 1.997, "step": 131000 }, { "epoch": 0.26, "learning_rate": 4.1176727870745386e-05, "loss": 2.0004, "step": 131500 }, { "epoch": 0.26, "learning_rate": 4.11220553373514e-05, "loss": 1.9983, "step": 132000 }, { "epoch": 0.26, "learning_rate": 4.1067382803957415e-05, "loss": 1.9995, "step": 132500 }, { "epoch": 0.26, "learning_rate": 4.101271027056344e-05, "loss": 1.9994, "step": 133000 }, { "epoch": 0.26, "learning_rate": 4.095803773716945e-05, "loss": 1.9956, "step": 133500 }, { "epoch": 0.26, "learning_rate": 4.090336520377547e-05, "loss": 1.994, "step": 134000 }, { "epoch": 0.26, "learning_rate": 4.084869267038148e-05, "loss": 1.9955, "step": 134500 }, { "epoch": 0.27, "learning_rate": 4.0794020136987505e-05, "loss": 1.9994, "step": 135000 }, { "epoch": 0.27, "learning_rate": 4.0739347603593517e-05, "loss": 1.9897, "step": 135500 }, { "epoch": 0.27, "learning_rate": 4.0684675070199535e-05, "loss": 1.9961, "step": 136000 }, { "epoch": 0.27, "learning_rate": 4.063000253680555e-05, "loss": 1.9946, "step": 136500 }, { "epoch": 0.27, "learning_rate": 4.0575330003411564e-05, "loss": 1.991, "step": 137000 }, { "epoch": 0.27, "learning_rate": 4.052065747001759e-05, "loss": 1.9952, "step": 137500 }, { "epoch": 0.27, "learning_rate": 4.04659849366236e-05, "loss": 1.9848, "step": 138000 }, { "epoch": 0.27, "learning_rate": 4.041131240322962e-05, "loss": 1.9937, "step": 138500 }, { "epoch": 0.27, "learning_rate": 4.0356639869835636e-05, "loss": 1.9893, "step": 139000 }, { "epoch": 0.27, "learning_rate": 4.030196733644165e-05, "loss": 1.9871, "step": 139500 }, { "epoch": 0.28, "learning_rate": 4.024729480304767e-05, "loss": 1.9855, "step": 140000 }, { "epoch": 0.28, "learning_rate": 4.019262226965368e-05, "loss": 1.9887, "step": 140500 }, { "epoch": 0.28, "learning_rate": 4.01379497362597e-05, "loss": 1.9863, "step": 141000 }, { "epoch": 0.28, "learning_rate": 4.008327720286571e-05, "loss": 1.9904, "step": 141500 }, { "epoch": 0.28, "learning_rate": 4.002860466947174e-05, "loss": 1.9886, "step": 142000 }, { "epoch": 0.28, "learning_rate": 3.9973932136077755e-05, "loss": 1.9871, "step": 142500 }, { "epoch": 0.28, "learning_rate": 3.9919259602683766e-05, "loss": 1.9843, "step": 143000 }, { "epoch": 0.28, "learning_rate": 3.9864587069289784e-05, "loss": 1.9891, "step": 143500 }, { "epoch": 0.28, "learning_rate": 3.9809914535895795e-05, "loss": 1.985, "step": 144000 }, { "epoch": 0.28, "learning_rate": 3.975524200250182e-05, "loss": 1.9826, "step": 144500 }, { "epoch": 0.29, "learning_rate": 3.970056946910783e-05, "loss": 1.9839, "step": 145000 }, { "epoch": 0.29, "learning_rate": 3.964589693571385e-05, "loss": 1.9831, "step": 145500 }, { "epoch": 0.29, "learning_rate": 3.959122440231987e-05, "loss": 1.9843, "step": 146000 }, { "epoch": 0.29, "learning_rate": 3.953655186892588e-05, "loss": 1.9825, "step": 146500 }, { "epoch": 0.29, "learning_rate": 3.9481879335531903e-05, "loss": 1.9752, "step": 147000 }, { "epoch": 0.29, "learning_rate": 3.9427206802137915e-05, "loss": 1.9787, "step": 147500 }, { "epoch": 0.29, "learning_rate": 3.937253426874393e-05, "loss": 1.984, "step": 148000 }, { "epoch": 0.29, "learning_rate": 3.931786173534995e-05, "loss": 1.9783, "step": 148500 }, { "epoch": 0.29, "learning_rate": 3.926318920195597e-05, "loss": 1.9785, "step": 149000 }, { "epoch": 0.29, "learning_rate": 3.920851666856199e-05, "loss": 1.9759, "step": 149500 }, { "epoch": 0.3, "learning_rate": 3.9153844135168e-05, "loss": 1.9803, "step": 150000 }, { "epoch": 0.3, "learning_rate": 3.9099171601774016e-05, "loss": 1.9733, "step": 150500 }, { "epoch": 0.3, "learning_rate": 3.904449906838003e-05, "loss": 1.9759, "step": 151000 }, { "epoch": 0.3, "learning_rate": 3.898982653498605e-05, "loss": 1.9767, "step": 151500 }, { "epoch": 0.3, "learning_rate": 3.893515400159207e-05, "loss": 1.9773, "step": 152000 }, { "epoch": 0.3, "learning_rate": 3.888048146819808e-05, "loss": 1.9736, "step": 152500 }, { "epoch": 0.3, "learning_rate": 3.88258089348041e-05, "loss": 1.9729, "step": 153000 }, { "epoch": 0.3, "learning_rate": 3.877113640141012e-05, "loss": 1.9756, "step": 153500 }, { "epoch": 0.3, "learning_rate": 3.8716463868016135e-05, "loss": 1.9747, "step": 154000 }, { "epoch": 0.3, "learning_rate": 3.8661791334622146e-05, "loss": 1.9732, "step": 154500 }, { "epoch": 0.31, "learning_rate": 3.8607118801228164e-05, "loss": 1.9696, "step": 155000 }, { "epoch": 0.31, "learning_rate": 3.855244626783418e-05, "loss": 1.9731, "step": 155500 }, { "epoch": 0.31, "learning_rate": 3.84977737344402e-05, "loss": 1.9741, "step": 156000 }, { "epoch": 0.31, "learning_rate": 3.844310120104622e-05, "loss": 1.9747, "step": 156500 }, { "epoch": 0.31, "learning_rate": 3.838842866765223e-05, "loss": 1.9717, "step": 157000 }, { "epoch": 0.31, "learning_rate": 3.833375613425825e-05, "loss": 1.9724, "step": 157500 }, { "epoch": 0.31, "learning_rate": 3.8279083600864266e-05, "loss": 1.9663, "step": 158000 }, { "epoch": 0.31, "learning_rate": 3.8224411067470284e-05, "loss": 1.9671, "step": 158500 }, { "epoch": 0.31, "learning_rate": 3.81697385340763e-05, "loss": 1.9684, "step": 159000 }, { "epoch": 0.31, "learning_rate": 3.811506600068231e-05, "loss": 1.9683, "step": 159500 }, { "epoch": 0.31, "learning_rate": 3.806039346728833e-05, "loss": 1.9682, "step": 160000 }, { "epoch": 0.32, "learning_rate": 3.800572093389435e-05, "loss": 1.9673, "step": 160500 }, { "epoch": 0.32, "learning_rate": 3.795104840050037e-05, "loss": 1.9639, "step": 161000 }, { "epoch": 0.32, "learning_rate": 3.7896375867106385e-05, "loss": 1.9633, "step": 161500 }, { "epoch": 0.32, "learning_rate": 3.7841703333712396e-05, "loss": 1.964, "step": 162000 }, { "epoch": 0.32, "learning_rate": 3.7787030800318414e-05, "loss": 1.9648, "step": 162500 }, { "epoch": 0.32, "learning_rate": 3.773235826692443e-05, "loss": 1.9636, "step": 163000 }, { "epoch": 0.32, "learning_rate": 3.767768573353045e-05, "loss": 1.9623, "step": 163500 }, { "epoch": 0.32, "learning_rate": 3.762301320013646e-05, "loss": 1.9656, "step": 164000 }, { "epoch": 0.32, "learning_rate": 3.756834066674248e-05, "loss": 1.9636, "step": 164500 }, { "epoch": 0.32, "learning_rate": 3.75136681333485e-05, "loss": 1.9672, "step": 165000 }, { "epoch": 0.33, "learning_rate": 3.7458995599954515e-05, "loss": 1.9639, "step": 165500 }, { "epoch": 0.33, "learning_rate": 3.740432306656053e-05, "loss": 1.9628, "step": 166000 }, { "epoch": 0.33, "learning_rate": 3.7349650533166545e-05, "loss": 1.9647, "step": 166500 }, { "epoch": 0.33, "learning_rate": 3.729497799977256e-05, "loss": 1.9633, "step": 167000 }, { "epoch": 0.33, "learning_rate": 3.724030546637858e-05, "loss": 1.9585, "step": 167500 }, { "epoch": 0.33, "learning_rate": 3.71856329329846e-05, "loss": 1.9599, "step": 168000 }, { "epoch": 0.33, "learning_rate": 3.7130960399590617e-05, "loss": 1.9601, "step": 168500 }, { "epoch": 0.33, "learning_rate": 3.707628786619663e-05, "loss": 1.9617, "step": 169000 }, { "epoch": 0.33, "learning_rate": 3.7021615332802646e-05, "loss": 1.9583, "step": 169500 }, { "epoch": 0.33, "learning_rate": 3.6966942799408664e-05, "loss": 1.9606, "step": 170000 }, { "epoch": 0.34, "learning_rate": 3.691227026601468e-05, "loss": 1.955, "step": 170500 }, { "epoch": 0.34, "learning_rate": 3.68575977326207e-05, "loss": 1.956, "step": 171000 }, { "epoch": 0.34, "learning_rate": 3.680292519922671e-05, "loss": 1.9584, "step": 171500 }, { "epoch": 0.34, "learning_rate": 3.674825266583273e-05, "loss": 1.9575, "step": 172000 }, { "epoch": 0.34, "learning_rate": 3.669358013243875e-05, "loss": 1.9531, "step": 172500 }, { "epoch": 0.34, "learning_rate": 3.6638907599044765e-05, "loss": 1.9603, "step": 173000 }, { "epoch": 0.34, "learning_rate": 3.6584235065650776e-05, "loss": 1.9542, "step": 173500 }, { "epoch": 0.34, "learning_rate": 3.6529562532256794e-05, "loss": 1.9515, "step": 174000 }, { "epoch": 0.34, "learning_rate": 3.647488999886282e-05, "loss": 1.9594, "step": 174500 }, { "epoch": 0.34, "learning_rate": 3.642021746546883e-05, "loss": 1.955, "step": 175000 }, { "epoch": 0.35, "learning_rate": 3.636554493207485e-05, "loss": 1.9521, "step": 175500 }, { "epoch": 0.35, "learning_rate": 3.631087239868086e-05, "loss": 1.9564, "step": 176000 }, { "epoch": 0.35, "learning_rate": 3.625619986528688e-05, "loss": 1.9556, "step": 176500 }, { "epoch": 0.35, "learning_rate": 3.6201527331892895e-05, "loss": 1.9517, "step": 177000 }, { "epoch": 0.35, "learning_rate": 3.6146854798498913e-05, "loss": 1.9569, "step": 177500 }, { "epoch": 0.35, "learning_rate": 3.609218226510493e-05, "loss": 1.9482, "step": 178000 }, { "epoch": 0.35, "learning_rate": 3.603750973171094e-05, "loss": 1.9496, "step": 178500 }, { "epoch": 0.35, "learning_rate": 3.598283719831696e-05, "loss": 1.95, "step": 179000 }, { "epoch": 0.35, "learning_rate": 3.592816466492298e-05, "loss": 1.9519, "step": 179500 }, { "epoch": 0.35, "learning_rate": 3.5873492131529e-05, "loss": 1.9477, "step": 180000 }, { "epoch": 0.36, "learning_rate": 3.5818819598135015e-05, "loss": 1.9482, "step": 180500 }, { "epoch": 0.36, "learning_rate": 3.5764147064741026e-05, "loss": 1.9511, "step": 181000 }, { "epoch": 0.36, "learning_rate": 3.570947453134705e-05, "loss": 1.9464, "step": 181500 }, { "epoch": 0.36, "learning_rate": 3.565480199795306e-05, "loss": 1.9493, "step": 182000 }, { "epoch": 0.36, "learning_rate": 3.560012946455908e-05, "loss": 1.9462, "step": 182500 }, { "epoch": 0.36, "learning_rate": 3.554545693116509e-05, "loss": 1.9493, "step": 183000 }, { "epoch": 0.36, "learning_rate": 3.549078439777111e-05, "loss": 1.9472, "step": 183500 }, { "epoch": 0.36, "learning_rate": 3.5436111864377134e-05, "loss": 1.9475, "step": 184000 }, { "epoch": 0.36, "learning_rate": 3.5381439330983145e-05, "loss": 1.9461, "step": 184500 }, { "epoch": 0.36, "learning_rate": 3.532676679758916e-05, "loss": 1.9409, "step": 185000 }, { "epoch": 0.37, "learning_rate": 3.5272094264195174e-05, "loss": 1.9498, "step": 185500 }, { "epoch": 0.37, "learning_rate": 3.521742173080119e-05, "loss": 1.9475, "step": 186000 }, { "epoch": 0.37, "learning_rate": 3.516274919740721e-05, "loss": 1.9491, "step": 186500 }, { "epoch": 0.37, "learning_rate": 3.510807666401323e-05, "loss": 1.9433, "step": 187000 }, { "epoch": 0.37, "learning_rate": 3.5053404130619246e-05, "loss": 1.9436, "step": 187500 }, { "epoch": 0.37, "learning_rate": 3.499873159722526e-05, "loss": 1.9425, "step": 188000 }, { "epoch": 0.37, "learning_rate": 3.494405906383128e-05, "loss": 1.9384, "step": 188500 }, { "epoch": 0.37, "learning_rate": 3.4889386530437294e-05, "loss": 1.9398, "step": 189000 }, { "epoch": 0.37, "learning_rate": 3.483471399704331e-05, "loss": 1.9428, "step": 189500 }, { "epoch": 0.37, "learning_rate": 3.478004146364933e-05, "loss": 1.9416, "step": 190000 }, { "epoch": 0.37, "learning_rate": 3.472536893025534e-05, "loss": 1.9409, "step": 190500 }, { "epoch": 0.38, "learning_rate": 3.4670696396861366e-05, "loss": 1.9423, "step": 191000 }, { "epoch": 0.38, "learning_rate": 3.461602386346738e-05, "loss": 1.9409, "step": 191500 }, { "epoch": 0.38, "learning_rate": 3.4561351330073395e-05, "loss": 1.9399, "step": 192000 }, { "epoch": 0.38, "learning_rate": 3.4506678796679406e-05, "loss": 1.94, "step": 192500 }, { "epoch": 0.38, "learning_rate": 3.4452006263285424e-05, "loss": 1.9386, "step": 193000 }, { "epoch": 0.38, "learning_rate": 3.439733372989145e-05, "loss": 1.9391, "step": 193500 }, { "epoch": 0.38, "learning_rate": 3.434266119649746e-05, "loss": 1.9412, "step": 194000 }, { "epoch": 0.38, "learning_rate": 3.428798866310348e-05, "loss": 1.9384, "step": 194500 }, { "epoch": 0.38, "learning_rate": 3.423331612970949e-05, "loss": 1.9364, "step": 195000 }, { "epoch": 0.38, "learning_rate": 3.4178643596315514e-05, "loss": 1.9354, "step": 195500 }, { "epoch": 0.39, "learning_rate": 3.4123971062921525e-05, "loss": 1.9374, "step": 196000 }, { "epoch": 0.39, "learning_rate": 3.406929852952754e-05, "loss": 1.9363, "step": 196500 }, { "epoch": 0.39, "learning_rate": 3.401462599613356e-05, "loss": 1.9333, "step": 197000 }, { "epoch": 0.39, "learning_rate": 3.395995346273957e-05, "loss": 1.9375, "step": 197500 }, { "epoch": 0.39, "learning_rate": 3.39052809293456e-05, "loss": 1.9422, "step": 198000 }, { "epoch": 0.39, "learning_rate": 3.385060839595161e-05, "loss": 1.9363, "step": 198500 }, { "epoch": 0.39, "learning_rate": 3.3795935862557626e-05, "loss": 1.9335, "step": 199000 }, { "epoch": 0.39, "learning_rate": 3.3741263329163644e-05, "loss": 1.9378, "step": 199500 }, { "epoch": 0.39, "learning_rate": 3.3686590795769656e-05, "loss": 1.9394, "step": 200000 }, { "epoch": 0.39, "learning_rate": 3.363191826237568e-05, "loss": 1.927, "step": 200500 }, { "epoch": 0.4, "learning_rate": 3.357724572898169e-05, "loss": 1.9303, "step": 201000 }, { "epoch": 0.4, "learning_rate": 3.352257319558771e-05, "loss": 1.9333, "step": 201500 }, { "epoch": 0.4, "learning_rate": 3.346790066219372e-05, "loss": 1.9285, "step": 202000 }, { "epoch": 0.4, "learning_rate": 3.3413228128799746e-05, "loss": 1.9337, "step": 202500 }, { "epoch": 0.4, "learning_rate": 3.3358555595405764e-05, "loss": 1.9339, "step": 203000 }, { "epoch": 0.4, "learning_rate": 3.3303883062011775e-05, "loss": 1.9368, "step": 203500 }, { "epoch": 0.4, "learning_rate": 3.324921052861779e-05, "loss": 1.934, "step": 204000 }, { "epoch": 0.4, "learning_rate": 3.3194537995223804e-05, "loss": 1.9356, "step": 204500 }, { "epoch": 0.4, "learning_rate": 3.313986546182983e-05, "loss": 1.9305, "step": 205000 }, { "epoch": 0.4, "learning_rate": 3.308519292843584e-05, "loss": 1.9291, "step": 205500 }, { "epoch": 0.41, "learning_rate": 3.303052039504186e-05, "loss": 1.9323, "step": 206000 }, { "epoch": 0.41, "learning_rate": 3.2975847861647876e-05, "loss": 1.9343, "step": 206500 }, { "epoch": 0.41, "learning_rate": 3.2921175328253894e-05, "loss": 1.9315, "step": 207000 }, { "epoch": 0.41, "learning_rate": 3.286650279485991e-05, "loss": 1.9278, "step": 207500 }, { "epoch": 0.41, "learning_rate": 3.2811830261465923e-05, "loss": 1.9295, "step": 208000 }, { "epoch": 0.41, "learning_rate": 3.275715772807194e-05, "loss": 1.9323, "step": 208500 }, { "epoch": 0.41, "learning_rate": 3.270248519467796e-05, "loss": 1.9287, "step": 209000 }, { "epoch": 0.41, "learning_rate": 3.264781266128398e-05, "loss": 1.9327, "step": 209500 }, { "epoch": 0.41, "learning_rate": 3.2593140127889995e-05, "loss": 1.9311, "step": 210000 }, { "epoch": 0.41, "learning_rate": 3.253846759449601e-05, "loss": 1.9262, "step": 210500 }, { "epoch": 0.42, "learning_rate": 3.2483795061102025e-05, "loss": 1.9273, "step": 211000 }, { "epoch": 0.42, "learning_rate": 3.2429122527708036e-05, "loss": 1.9276, "step": 211500 }, { "epoch": 0.42, "learning_rate": 3.237444999431406e-05, "loss": 1.9307, "step": 212000 }, { "epoch": 0.42, "learning_rate": 3.231977746092008e-05, "loss": 1.9261, "step": 212500 }, { "epoch": 0.42, "learning_rate": 3.226510492752609e-05, "loss": 1.9286, "step": 213000 }, { "epoch": 0.42, "learning_rate": 3.221043239413211e-05, "loss": 1.9315, "step": 213500 }, { "epoch": 0.42, "learning_rate": 3.2155759860738126e-05, "loss": 1.9297, "step": 214000 }, { "epoch": 0.42, "learning_rate": 3.2101087327344144e-05, "loss": 1.9317, "step": 214500 }, { "epoch": 0.42, "learning_rate": 3.2046414793950155e-05, "loss": 1.927, "step": 215000 }, { "epoch": 0.42, "learning_rate": 3.199174226055617e-05, "loss": 1.9279, "step": 215500 }, { "epoch": 0.43, "learning_rate": 3.193706972716219e-05, "loss": 1.922, "step": 216000 }, { "epoch": 0.43, "learning_rate": 3.188239719376821e-05, "loss": 1.9259, "step": 216500 }, { "epoch": 0.43, "learning_rate": 3.182772466037423e-05, "loss": 1.9234, "step": 217000 }, { "epoch": 0.43, "learning_rate": 3.177305212698024e-05, "loss": 1.9232, "step": 217500 }, { "epoch": 0.43, "learning_rate": 3.1718379593586256e-05, "loss": 1.9226, "step": 218000 }, { "epoch": 0.43, "learning_rate": 3.1663707060192274e-05, "loss": 1.922, "step": 218500 }, { "epoch": 0.43, "learning_rate": 3.160903452679829e-05, "loss": 1.9215, "step": 219000 }, { "epoch": 0.43, "learning_rate": 3.155436199340431e-05, "loss": 1.9207, "step": 219500 }, { "epoch": 0.43, "learning_rate": 3.149968946001032e-05, "loss": 1.9232, "step": 220000 }, { "epoch": 0.43, "learning_rate": 3.144501692661634e-05, "loss": 1.9223, "step": 220500 }, { "epoch": 0.43, "learning_rate": 3.139034439322236e-05, "loss": 1.9224, "step": 221000 }, { "epoch": 0.44, "learning_rate": 3.1335671859828376e-05, "loss": 1.9219, "step": 221500 }, { "epoch": 0.44, "learning_rate": 3.1280999326434394e-05, "loss": 1.9235, "step": 222000 }, { "epoch": 0.44, "learning_rate": 3.1226326793040405e-05, "loss": 1.9225, "step": 222500 }, { "epoch": 0.44, "learning_rate": 3.117165425964642e-05, "loss": 1.92, "step": 223000 }, { "epoch": 0.44, "learning_rate": 3.111698172625244e-05, "loss": 1.9148, "step": 223500 }, { "epoch": 0.44, "learning_rate": 3.106230919285846e-05, "loss": 1.9245, "step": 224000 }, { "epoch": 0.44, "learning_rate": 3.100763665946447e-05, "loss": 1.9184, "step": 224500 }, { "epoch": 0.44, "learning_rate": 3.095296412607049e-05, "loss": 1.9193, "step": 225000 }, { "epoch": 0.44, "learning_rate": 3.0898291592676506e-05, "loss": 1.917, "step": 225500 }, { "epoch": 0.44, "learning_rate": 3.0843619059282524e-05, "loss": 1.9092, "step": 226000 }, { "epoch": 0.45, "learning_rate": 3.078894652588854e-05, "loss": 1.9185, "step": 226500 }, { "epoch": 0.45, "learning_rate": 3.073427399249455e-05, "loss": 1.9165, "step": 227000 }, { "epoch": 0.45, "learning_rate": 3.067960145910057e-05, "loss": 1.9171, "step": 227500 }, { "epoch": 0.45, "learning_rate": 3.062492892570659e-05, "loss": 1.9182, "step": 228000 }, { "epoch": 0.45, "learning_rate": 3.057025639231261e-05, "loss": 1.9182, "step": 228500 }, { "epoch": 0.45, "learning_rate": 3.0515583858918622e-05, "loss": 1.9155, "step": 229000 }, { "epoch": 0.45, "learning_rate": 3.0460911325524636e-05, "loss": 1.9137, "step": 229500 }, { "epoch": 0.45, "learning_rate": 3.0406238792130654e-05, "loss": 1.9162, "step": 230000 }, { "epoch": 0.45, "learning_rate": 3.0351566258736676e-05, "loss": 1.9165, "step": 230500 }, { "epoch": 0.45, "learning_rate": 3.029689372534269e-05, "loss": 1.9184, "step": 231000 }, { "epoch": 0.46, "learning_rate": 3.0242221191948705e-05, "loss": 1.9127, "step": 231500 }, { "epoch": 0.46, "learning_rate": 3.018754865855472e-05, "loss": 1.9197, "step": 232000 }, { "epoch": 0.46, "learning_rate": 3.0132876125160738e-05, "loss": 1.915, "step": 232500 }, { "epoch": 0.46, "learning_rate": 3.0078203591766756e-05, "loss": 1.9189, "step": 233000 }, { "epoch": 0.46, "learning_rate": 3.0023531058372774e-05, "loss": 1.9154, "step": 233500 }, { "epoch": 0.46, "learning_rate": 2.9968858524978788e-05, "loss": 1.9117, "step": 234000 }, { "epoch": 0.46, "learning_rate": 2.9914185991584803e-05, "loss": 1.911, "step": 234500 }, { "epoch": 0.46, "learning_rate": 2.9859513458190824e-05, "loss": 1.9191, "step": 235000 }, { "epoch": 0.46, "learning_rate": 2.980484092479684e-05, "loss": 1.9158, "step": 235500 }, { "epoch": 0.46, "learning_rate": 2.9750168391402854e-05, "loss": 1.9149, "step": 236000 }, { "epoch": 0.47, "learning_rate": 2.969549585800887e-05, "loss": 1.9146, "step": 236500 }, { "epoch": 0.47, "learning_rate": 2.9640823324614886e-05, "loss": 1.9111, "step": 237000 }, { "epoch": 0.47, "learning_rate": 2.9586150791220908e-05, "loss": 1.9105, "step": 237500 }, { "epoch": 0.47, "learning_rate": 2.9531478257826922e-05, "loss": 1.9125, "step": 238000 }, { "epoch": 0.47, "learning_rate": 2.9476805724432937e-05, "loss": 1.9051, "step": 238500 }, { "epoch": 0.47, "learning_rate": 2.942213319103895e-05, "loss": 1.913, "step": 239000 }, { "epoch": 0.47, "learning_rate": 2.936746065764497e-05, "loss": 1.9067, "step": 239500 }, { "epoch": 0.47, "learning_rate": 2.931278812425099e-05, "loss": 1.9093, "step": 240000 }, { "epoch": 0.47, "learning_rate": 2.9258115590857005e-05, "loss": 1.9095, "step": 240500 }, { "epoch": 0.47, "learning_rate": 2.920344305746302e-05, "loss": 1.9129, "step": 241000 }, { "epoch": 0.48, "learning_rate": 2.9148770524069035e-05, "loss": 1.9119, "step": 241500 }, { "epoch": 0.48, "learning_rate": 2.9094097990675056e-05, "loss": 1.911, "step": 242000 }, { "epoch": 0.48, "learning_rate": 2.903942545728107e-05, "loss": 1.9078, "step": 242500 }, { "epoch": 0.48, "learning_rate": 2.898475292388709e-05, "loss": 1.9113, "step": 243000 }, { "epoch": 0.48, "learning_rate": 2.8930080390493103e-05, "loss": 1.9058, "step": 243500 }, { "epoch": 0.48, "learning_rate": 2.8875407857099118e-05, "loss": 1.9114, "step": 244000 }, { "epoch": 0.48, "learning_rate": 2.882073532370514e-05, "loss": 1.9073, "step": 244500 }, { "epoch": 0.48, "learning_rate": 2.8766062790311154e-05, "loss": 1.906, "step": 245000 }, { "epoch": 0.48, "learning_rate": 2.871139025691717e-05, "loss": 1.9072, "step": 245500 }, { "epoch": 0.48, "learning_rate": 2.8656717723523186e-05, "loss": 1.9083, "step": 246000 }, { "epoch": 0.49, "learning_rate": 2.86020451901292e-05, "loss": 1.9047, "step": 246500 }, { "epoch": 0.49, "learning_rate": 2.8547372656735222e-05, "loss": 1.9051, "step": 247000 }, { "epoch": 0.49, "learning_rate": 2.8492700123341237e-05, "loss": 1.9053, "step": 247500 }, { "epoch": 0.49, "learning_rate": 2.843802758994725e-05, "loss": 1.9107, "step": 248000 }, { "epoch": 0.49, "learning_rate": 2.838335505655327e-05, "loss": 1.9094, "step": 248500 }, { "epoch": 0.49, "learning_rate": 2.8328682523159288e-05, "loss": 1.9081, "step": 249000 }, { "epoch": 0.49, "learning_rate": 2.8274009989765306e-05, "loss": 1.9067, "step": 249500 }, { "epoch": 0.49, "learning_rate": 2.821933745637132e-05, "loss": 1.9054, "step": 250000 }, { "epoch": 0.49, "learning_rate": 2.8164664922977335e-05, "loss": 1.9007, "step": 250500 }, { "epoch": 0.49, "learning_rate": 2.810999238958335e-05, "loss": 1.9064, "step": 251000 }, { "epoch": 0.5, "learning_rate": 2.805531985618937e-05, "loss": 1.8992, "step": 251500 }, { "epoch": 0.5, "learning_rate": 2.8000647322795386e-05, "loss": 1.9074, "step": 252000 }, { "epoch": 0.5, "learning_rate": 2.7945974789401404e-05, "loss": 1.9061, "step": 252500 }, { "epoch": 0.5, "learning_rate": 2.7891302256007418e-05, "loss": 1.9017, "step": 253000 }, { "epoch": 0.5, "learning_rate": 2.7836629722613433e-05, "loss": 1.9018, "step": 253500 }, { "epoch": 0.5, "learning_rate": 2.7781957189219454e-05, "loss": 1.9027, "step": 254000 }, { "epoch": 0.5, "learning_rate": 2.772728465582547e-05, "loss": 1.9062, "step": 254500 }, { "epoch": 0.5, "learning_rate": 2.7672612122431483e-05, "loss": 1.9026, "step": 255000 }, { "epoch": 0.5, "learning_rate": 2.76179395890375e-05, "loss": 1.9, "step": 255500 }, { "epoch": 0.5, "learning_rate": 2.7563267055643523e-05, "loss": 1.9043, "step": 256000 }, { "epoch": 0.5, "learning_rate": 2.7508594522249537e-05, "loss": 1.9041, "step": 256500 }, { "epoch": 0.51, "learning_rate": 2.7453921988855552e-05, "loss": 1.9025, "step": 257000 }, { "epoch": 0.51, "learning_rate": 2.7399249455461567e-05, "loss": 1.8992, "step": 257500 }, { "epoch": 0.51, "learning_rate": 2.7344576922067585e-05, "loss": 1.9006, "step": 258000 }, { "epoch": 0.51, "learning_rate": 2.7289904388673603e-05, "loss": 1.8998, "step": 258500 }, { "epoch": 0.51, "learning_rate": 2.723523185527962e-05, "loss": 1.8978, "step": 259000 }, { "epoch": 0.51, "learning_rate": 2.7180559321885635e-05, "loss": 1.8991, "step": 259500 }, { "epoch": 0.51, "learning_rate": 2.712588678849165e-05, "loss": 1.898, "step": 260000 }, { "epoch": 0.51, "learning_rate": 2.707121425509767e-05, "loss": 1.9011, "step": 260500 }, { "epoch": 0.51, "learning_rate": 2.7016541721703686e-05, "loss": 1.8968, "step": 261000 }, { "epoch": 0.51, "learning_rate": 2.69618691883097e-05, "loss": 1.9062, "step": 261500 }, { "epoch": 0.52, "learning_rate": 2.690719665491572e-05, "loss": 1.9018, "step": 262000 }, { "epoch": 0.52, "learning_rate": 2.6852524121521733e-05, "loss": 1.9018, "step": 262500 }, { "epoch": 0.52, "learning_rate": 2.6797851588127754e-05, "loss": 1.8995, "step": 263000 }, { "epoch": 0.52, "learning_rate": 2.674317905473377e-05, "loss": 1.8954, "step": 263500 }, { "epoch": 0.52, "learning_rate": 2.6688506521339784e-05, "loss": 1.8992, "step": 264000 }, { "epoch": 0.52, "learning_rate": 2.6633833987945798e-05, "loss": 1.8957, "step": 264500 }, { "epoch": 0.52, "learning_rate": 2.6579161454551816e-05, "loss": 1.9017, "step": 265000 }, { "epoch": 0.52, "learning_rate": 2.6524488921157838e-05, "loss": 1.8975, "step": 265500 }, { "epoch": 0.52, "learning_rate": 2.6469816387763852e-05, "loss": 1.8987, "step": 266000 }, { "epoch": 0.52, "learning_rate": 2.6415143854369867e-05, "loss": 1.8962, "step": 266500 }, { "epoch": 0.53, "learning_rate": 2.636047132097588e-05, "loss": 1.9013, "step": 267000 }, { "epoch": 0.53, "learning_rate": 2.6305798787581903e-05, "loss": 1.9004, "step": 267500 }, { "epoch": 0.53, "learning_rate": 2.6251126254187917e-05, "loss": 1.8955, "step": 268000 }, { "epoch": 0.53, "learning_rate": 2.6196453720793935e-05, "loss": 1.8956, "step": 268500 }, { "epoch": 0.53, "learning_rate": 2.614178118739995e-05, "loss": 1.8941, "step": 269000 }, { "epoch": 0.53, "learning_rate": 2.6087108654005965e-05, "loss": 1.9004, "step": 269500 }, { "epoch": 0.53, "learning_rate": 2.6032436120611986e-05, "loss": 1.8978, "step": 270000 }, { "epoch": 0.53, "learning_rate": 2.5977763587218e-05, "loss": 1.8954, "step": 270500 }, { "epoch": 0.53, "learning_rate": 2.5923091053824015e-05, "loss": 1.8948, "step": 271000 }, { "epoch": 0.53, "learning_rate": 2.5868418520430033e-05, "loss": 1.8946, "step": 271500 }, { "epoch": 0.54, "learning_rate": 2.5813745987036048e-05, "loss": 1.897, "step": 272000 }, { "epoch": 0.54, "learning_rate": 2.575907345364207e-05, "loss": 1.897, "step": 272500 }, { "epoch": 0.54, "learning_rate": 2.5704400920248084e-05, "loss": 1.8961, "step": 273000 }, { "epoch": 0.54, "learning_rate": 2.56497283868541e-05, "loss": 1.8925, "step": 273500 }, { "epoch": 0.54, "learning_rate": 2.5595055853460113e-05, "loss": 1.8938, "step": 274000 }, { "epoch": 0.54, "learning_rate": 2.5540383320066135e-05, "loss": 1.8928, "step": 274500 }, { "epoch": 0.54, "learning_rate": 2.5485710786672153e-05, "loss": 1.8963, "step": 275000 }, { "epoch": 0.54, "learning_rate": 2.5431038253278167e-05, "loss": 1.8923, "step": 275500 }, { "epoch": 0.54, "learning_rate": 2.5376365719884182e-05, "loss": 1.8882, "step": 276000 }, { "epoch": 0.54, "learning_rate": 2.5321693186490196e-05, "loss": 1.8887, "step": 276500 }, { "epoch": 0.55, "learning_rate": 2.5267020653096218e-05, "loss": 1.8903, "step": 277000 }, { "epoch": 0.55, "learning_rate": 2.5212348119702232e-05, "loss": 1.8941, "step": 277500 }, { "epoch": 0.55, "learning_rate": 2.515767558630825e-05, "loss": 1.8943, "step": 278000 }, { "epoch": 0.55, "learning_rate": 2.5103003052914265e-05, "loss": 1.8924, "step": 278500 }, { "epoch": 0.55, "learning_rate": 2.504833051952028e-05, "loss": 1.8855, "step": 279000 }, { "epoch": 0.55, "learning_rate": 2.4993657986126298e-05, "loss": 1.8918, "step": 279500 }, { "epoch": 0.55, "learning_rate": 2.4938985452732316e-05, "loss": 1.8891, "step": 280000 }, { "epoch": 0.55, "learning_rate": 2.488431291933833e-05, "loss": 1.8874, "step": 280500 }, { "epoch": 0.55, "learning_rate": 2.4829640385944348e-05, "loss": 1.8907, "step": 281000 }, { "epoch": 0.55, "learning_rate": 2.4774967852550366e-05, "loss": 1.8913, "step": 281500 }, { "epoch": 0.56, "learning_rate": 2.472029531915638e-05, "loss": 1.8888, "step": 282000 }, { "epoch": 0.56, "learning_rate": 2.46656227857624e-05, "loss": 1.8924, "step": 282500 }, { "epoch": 0.56, "learning_rate": 2.4610950252368413e-05, "loss": 1.8889, "step": 283000 }, { "epoch": 0.56, "learning_rate": 2.455627771897443e-05, "loss": 1.8866, "step": 283500 }, { "epoch": 0.56, "learning_rate": 2.450160518558045e-05, "loss": 1.8915, "step": 284000 }, { "epoch": 0.56, "learning_rate": 2.4446932652186467e-05, "loss": 1.8896, "step": 284500 }, { "epoch": 0.56, "learning_rate": 2.4392260118792482e-05, "loss": 1.8921, "step": 285000 }, { "epoch": 0.56, "learning_rate": 2.43375875853985e-05, "loss": 1.8854, "step": 285500 }, { "epoch": 0.56, "learning_rate": 2.4282915052004515e-05, "loss": 1.8887, "step": 286000 }, { "epoch": 0.56, "learning_rate": 2.422824251861053e-05, "loss": 1.8892, "step": 286500 }, { "epoch": 0.56, "learning_rate": 2.4173569985216547e-05, "loss": 1.8908, "step": 287000 }, { "epoch": 0.57, "learning_rate": 2.4118897451822565e-05, "loss": 1.8901, "step": 287500 }, { "epoch": 0.57, "learning_rate": 2.4064224918428583e-05, "loss": 1.8906, "step": 288000 }, { "epoch": 0.57, "learning_rate": 2.4009552385034598e-05, "loss": 1.8862, "step": 288500 }, { "epoch": 0.57, "learning_rate": 2.3954879851640616e-05, "loss": 1.8915, "step": 289000 }, { "epoch": 0.57, "learning_rate": 2.390020731824663e-05, "loss": 1.8891, "step": 289500 }, { "epoch": 0.57, "learning_rate": 2.3845534784852645e-05, "loss": 1.8855, "step": 290000 }, { "epoch": 0.57, "learning_rate": 2.3790862251458667e-05, "loss": 1.8828, "step": 290500 }, { "epoch": 0.57, "learning_rate": 2.373618971806468e-05, "loss": 1.8845, "step": 291000 }, { "epoch": 0.57, "learning_rate": 2.36815171846707e-05, "loss": 1.8824, "step": 291500 }, { "epoch": 0.57, "learning_rate": 2.3626844651276714e-05, "loss": 1.8832, "step": 292000 }, { "epoch": 0.58, "learning_rate": 2.3572172117882732e-05, "loss": 1.8862, "step": 292500 }, { "epoch": 0.58, "learning_rate": 2.3517499584488746e-05, "loss": 1.8862, "step": 293000 }, { "epoch": 0.58, "learning_rate": 2.3462827051094764e-05, "loss": 1.8837, "step": 293500 }, { "epoch": 0.58, "learning_rate": 2.3408154517700782e-05, "loss": 1.8864, "step": 294000 }, { "epoch": 0.58, "learning_rate": 2.3353481984306797e-05, "loss": 1.886, "step": 294500 }, { "epoch": 0.58, "learning_rate": 2.3298809450912815e-05, "loss": 1.884, "step": 295000 }, { "epoch": 0.58, "learning_rate": 2.324413691751883e-05, "loss": 1.8834, "step": 295500 }, { "epoch": 0.58, "learning_rate": 2.3189464384124848e-05, "loss": 1.8848, "step": 296000 }, { "epoch": 0.58, "learning_rate": 2.3134791850730862e-05, "loss": 1.8856, "step": 296500 }, { "epoch": 0.58, "learning_rate": 2.308011931733688e-05, "loss": 1.8874, "step": 297000 }, { "epoch": 0.59, "learning_rate": 2.3025446783942898e-05, "loss": 1.8775, "step": 297500 }, { "epoch": 0.59, "learning_rate": 2.2970774250548913e-05, "loss": 1.8859, "step": 298000 }, { "epoch": 0.59, "learning_rate": 2.291610171715493e-05, "loss": 1.8799, "step": 298500 }, { "epoch": 0.59, "learning_rate": 2.2861429183760945e-05, "loss": 1.8833, "step": 299000 }, { "epoch": 0.59, "learning_rate": 2.2806756650366963e-05, "loss": 1.882, "step": 299500 }, { "epoch": 0.59, "learning_rate": 2.275208411697298e-05, "loss": 1.8872, "step": 300000 }, { "epoch": 0.67, "learning_rate": 1.8590144171628205e-05, "loss": 1.8825, "step": 300500 }, { "epoch": 0.67, "learning_rate": 1.852863750430547e-05, "loss": 1.8818, "step": 301000 }, { "epoch": 0.67, "learning_rate": 1.846713083698273e-05, "loss": 1.8859, "step": 301500 }, { "epoch": 0.67, "learning_rate": 1.8405624169659992e-05, "loss": 1.8773, "step": 302000 }, { "epoch": 0.67, "learning_rate": 1.8344117502337256e-05, "loss": 1.8764, "step": 302500 }, { "epoch": 0.67, "learning_rate": 1.8282610835014516e-05, "loss": 1.8826, "step": 303000 }, { "epoch": 0.67, "learning_rate": 1.822110416769178e-05, "loss": 1.8805, "step": 303500 }, { "epoch": 0.67, "learning_rate": 1.815959750036904e-05, "loss": 1.8787, "step": 304000 }, { "epoch": 0.67, "learning_rate": 1.8098090833046304e-05, "loss": 1.8793, "step": 304500 }, { "epoch": 0.68, "learning_rate": 1.8036584165723567e-05, "loss": 1.8797, "step": 305000 }, { "epoch": 0.68, "learning_rate": 1.7975077498400827e-05, "loss": 1.875, "step": 305500 }, { "epoch": 0.68, "learning_rate": 1.7913570831078088e-05, "loss": 1.8747, "step": 306000 }, { "epoch": 0.68, "learning_rate": 1.785206416375535e-05, "loss": 1.8795, "step": 306500 }, { "epoch": 0.68, "learning_rate": 1.7790557496432615e-05, "loss": 1.8761, "step": 307000 }, { "epoch": 0.68, "learning_rate": 1.7729050829109875e-05, "loss": 1.878, "step": 307500 }, { "epoch": 0.68, "learning_rate": 1.766754416178714e-05, "loss": 1.8743, "step": 308000 }, { "epoch": 0.68, "learning_rate": 1.76060374944644e-05, "loss": 1.8762, "step": 308500 }, { "epoch": 0.68, "learning_rate": 1.7544530827141666e-05, "loss": 1.8739, "step": 309000 }, { "epoch": 0.69, "learning_rate": 1.7483024159818926e-05, "loss": 1.8747, "step": 309500 }, { "epoch": 0.69, "learning_rate": 1.7421517492496186e-05, "loss": 1.8703, "step": 310000 }, { "epoch": 0.69, "learning_rate": 1.736001082517345e-05, "loss": 1.8709, "step": 310500 }, { "epoch": 0.69, "learning_rate": 1.729850415785071e-05, "loss": 1.8757, "step": 311000 }, { "epoch": 0.69, "learning_rate": 1.7236997490527974e-05, "loss": 1.8746, "step": 311500 }, { "epoch": 0.69, "learning_rate": 1.7175490823205237e-05, "loss": 1.8744, "step": 312000 }, { "epoch": 0.69, "learning_rate": 1.7113984155882498e-05, "loss": 1.8742, "step": 312500 }, { "epoch": 0.69, "learning_rate": 1.7052477488559758e-05, "loss": 1.8791, "step": 313000 }, { "epoch": 0.69, "learning_rate": 1.6990970821237025e-05, "loss": 1.871, "step": 313500 }, { "epoch": 0.7, "learning_rate": 1.6929464153914285e-05, "loss": 1.8742, "step": 314000 }, { "epoch": 0.7, "learning_rate": 1.686795748659155e-05, "loss": 1.873, "step": 314500 }, { "epoch": 0.7, "learning_rate": 1.680645081926881e-05, "loss": 1.8775, "step": 315000 }, { "epoch": 0.7, "learning_rate": 1.6744944151946072e-05, "loss": 1.8746, "step": 315500 }, { "epoch": 0.7, "learning_rate": 1.6683437484623336e-05, "loss": 1.8686, "step": 316000 }, { "epoch": 0.7, "learning_rate": 1.6621930817300596e-05, "loss": 1.8667, "step": 316500 }, { "epoch": 0.7, "learning_rate": 1.6560424149977856e-05, "loss": 1.8716, "step": 317000 }, { "epoch": 0.7, "learning_rate": 1.649891748265512e-05, "loss": 1.8693, "step": 317500 }, { "epoch": 0.7, "learning_rate": 1.6437410815332384e-05, "loss": 1.8752, "step": 318000 }, { "epoch": 0.71, "learning_rate": 1.6375904148009644e-05, "loss": 1.8677, "step": 318500 }, { "epoch": 0.71, "learning_rate": 1.6314397480686908e-05, "loss": 1.8724, "step": 319000 }, { "epoch": 0.71, "learning_rate": 1.6252890813364168e-05, "loss": 1.8675, "step": 319500 }, { "epoch": 0.71, "learning_rate": 1.619138414604143e-05, "loss": 1.8674, "step": 320000 }, { "epoch": 0.71, "learning_rate": 1.6129877478718695e-05, "loss": 1.8689, "step": 320500 }, { "epoch": 0.71, "learning_rate": 1.6068370811395955e-05, "loss": 1.8717, "step": 321000 }, { "epoch": 0.71, "learning_rate": 1.600686414407322e-05, "loss": 1.8695, "step": 321500 }, { "epoch": 0.71, "learning_rate": 1.594535747675048e-05, "loss": 1.869, "step": 322000 }, { "epoch": 0.71, "learning_rate": 1.5883850809427743e-05, "loss": 1.8737, "step": 322500 }, { "epoch": 0.72, "learning_rate": 1.5822344142105006e-05, "loss": 1.868, "step": 323000 }, { "epoch": 0.72, "learning_rate": 1.5760837474782266e-05, "loss": 1.87, "step": 323500 }, { "epoch": 0.72, "learning_rate": 1.5699330807459527e-05, "loss": 1.8717, "step": 324000 }, { "epoch": 0.72, "learning_rate": 1.5637824140136794e-05, "loss": 1.8675, "step": 324500 }, { "epoch": 0.72, "learning_rate": 1.5576317472814054e-05, "loss": 1.8716, "step": 325000 }, { "epoch": 0.72, "learning_rate": 1.5514810805491317e-05, "loss": 1.8706, "step": 325500 }, { "epoch": 0.72, "learning_rate": 1.5453304138168578e-05, "loss": 1.867, "step": 326000 }, { "epoch": 0.72, "learning_rate": 1.5391797470845838e-05, "loss": 1.8638, "step": 326500 }, { "epoch": 0.72, "learning_rate": 1.5330290803523105e-05, "loss": 1.8642, "step": 327000 }, { "epoch": 0.73, "learning_rate": 1.5268784136200365e-05, "loss": 1.8672, "step": 327500 }, { "epoch": 0.73, "learning_rate": 1.5207277468877625e-05, "loss": 1.8677, "step": 328000 }, { "epoch": 0.73, "learning_rate": 1.514577080155489e-05, "loss": 1.867, "step": 328500 }, { "epoch": 0.73, "learning_rate": 1.508426413423215e-05, "loss": 1.8665, "step": 329000 }, { "epoch": 0.73, "learning_rate": 1.5022757466909413e-05, "loss": 1.8673, "step": 329500 }, { "epoch": 0.73, "learning_rate": 1.4961250799586676e-05, "loss": 1.8719, "step": 330000 }, { "epoch": 0.73, "learning_rate": 1.4899744132263938e-05, "loss": 1.8652, "step": 330500 }, { "epoch": 0.73, "learning_rate": 1.4838237464941202e-05, "loss": 1.865, "step": 331000 }, { "epoch": 0.73, "learning_rate": 1.4776730797618462e-05, "loss": 1.8645, "step": 331500 }, { "epoch": 0.74, "learning_rate": 1.4715224130295724e-05, "loss": 1.8661, "step": 332000 }, { "epoch": 0.74, "learning_rate": 1.4653717462972988e-05, "loss": 1.8658, "step": 332500 }, { "epoch": 0.74, "learning_rate": 1.459221079565025e-05, "loss": 1.8679, "step": 333000 }, { "epoch": 0.74, "learning_rate": 1.453070412832751e-05, "loss": 1.867, "step": 333500 }, { "epoch": 0.74, "learning_rate": 1.4469197461004775e-05, "loss": 1.8648, "step": 334000 }, { "epoch": 0.74, "learning_rate": 1.4407690793682035e-05, "loss": 1.8678, "step": 334500 }, { "epoch": 0.74, "learning_rate": 1.4346184126359297e-05, "loss": 1.8665, "step": 335000 }, { "epoch": 0.74, "learning_rate": 1.428467745903656e-05, "loss": 1.8717, "step": 335500 }, { "epoch": 0.74, "learning_rate": 1.4223170791713821e-05, "loss": 1.8683, "step": 336000 }, { "epoch": 0.75, "learning_rate": 1.4161664124391086e-05, "loss": 1.8657, "step": 336500 }, { "epoch": 0.75, "learning_rate": 1.4100157457068347e-05, "loss": 1.864, "step": 337000 }, { "epoch": 0.75, "learning_rate": 1.4038650789745608e-05, "loss": 1.8622, "step": 337500 }, { "epoch": 0.75, "learning_rate": 1.3977144122422872e-05, "loss": 1.8679, "step": 338000 }, { "epoch": 0.75, "learning_rate": 1.3915637455100134e-05, "loss": 1.8636, "step": 338500 }, { "epoch": 0.75, "learning_rate": 1.3854130787777394e-05, "loss": 1.8688, "step": 339000 }, { "epoch": 0.75, "learning_rate": 1.3792624120454658e-05, "loss": 1.8667, "step": 339500 }, { "epoch": 0.75, "learning_rate": 1.373111745313192e-05, "loss": 1.8619, "step": 340000 }, { "epoch": 0.75, "learning_rate": 1.3669610785809183e-05, "loss": 1.8677, "step": 340500 }, { "epoch": 0.76, "learning_rate": 1.3608104118486445e-05, "loss": 1.8635, "step": 341000 }, { "epoch": 0.76, "learning_rate": 1.3546597451163705e-05, "loss": 1.8602, "step": 341500 }, { "epoch": 0.76, "learning_rate": 1.348509078384097e-05, "loss": 1.8627, "step": 342000 }, { "epoch": 0.76, "learning_rate": 1.3423584116518231e-05, "loss": 1.8631, "step": 342500 }, { "epoch": 0.76, "learning_rate": 1.3362077449195493e-05, "loss": 1.8664, "step": 343000 }, { "epoch": 0.76, "learning_rate": 1.3300570781872756e-05, "loss": 1.8601, "step": 343500 }, { "epoch": 0.76, "learning_rate": 1.3239064114550018e-05, "loss": 1.8602, "step": 344000 }, { "epoch": 0.76, "learning_rate": 1.3177557447227279e-05, "loss": 1.8646, "step": 344500 }, { "epoch": 0.76, "learning_rate": 1.3116050779904542e-05, "loss": 1.8606, "step": 345000 }, { "epoch": 0.77, "learning_rate": 1.3054544112581804e-05, "loss": 1.8605, "step": 345500 }, { "epoch": 0.77, "learning_rate": 1.2993037445259068e-05, "loss": 1.8622, "step": 346000 }, { "epoch": 0.77, "learning_rate": 1.293153077793633e-05, "loss": 1.8609, "step": 346500 }, { "epoch": 0.77, "learning_rate": 1.287002411061359e-05, "loss": 1.8543, "step": 347000 }, { "epoch": 0.77, "learning_rate": 1.2808517443290855e-05, "loss": 1.8598, "step": 347500 }, { "epoch": 0.77, "learning_rate": 1.2747010775968115e-05, "loss": 1.8589, "step": 348000 }, { "epoch": 0.77, "learning_rate": 1.2685504108645377e-05, "loss": 1.8633, "step": 348500 }, { "epoch": 0.77, "learning_rate": 1.2623997441322641e-05, "loss": 1.8633, "step": 349000 }, { "epoch": 0.77, "learning_rate": 1.2562490773999903e-05, "loss": 1.8596, "step": 349500 }, { "epoch": 0.77, "learning_rate": 1.2500984106677163e-05, "loss": 1.8577, "step": 350000 }, { "epoch": 0.78, "learning_rate": 1.2439477439354427e-05, "loss": 1.8595, "step": 350500 }, { "epoch": 0.78, "learning_rate": 1.2377970772031689e-05, "loss": 1.8702, "step": 351000 }, { "epoch": 0.78, "learning_rate": 1.231646410470895e-05, "loss": 1.8531, "step": 351500 }, { "epoch": 0.78, "learning_rate": 1.2254957437386214e-05, "loss": 1.8599, "step": 352000 }, { "epoch": 0.78, "learning_rate": 1.2193450770063474e-05, "loss": 1.862, "step": 352500 }, { "epoch": 0.78, "learning_rate": 1.2131944102740738e-05, "loss": 1.8601, "step": 353000 }, { "epoch": 0.78, "learning_rate": 1.2070437435418e-05, "loss": 1.8608, "step": 353500 }, { "epoch": 0.78, "learning_rate": 1.2008930768095263e-05, "loss": 1.8589, "step": 354000 }, { "epoch": 0.78, "learning_rate": 1.1947424100772524e-05, "loss": 1.8623, "step": 354500 }, { "epoch": 0.79, "learning_rate": 1.1885917433449786e-05, "loss": 1.8616, "step": 355000 }, { "epoch": 0.79, "learning_rate": 1.1824410766127049e-05, "loss": 1.8555, "step": 355500 }, { "epoch": 0.79, "learning_rate": 1.1762904098804311e-05, "loss": 1.8579, "step": 356000 }, { "epoch": 0.79, "learning_rate": 1.1701397431481573e-05, "loss": 1.8634, "step": 356500 }, { "epoch": 0.79, "learning_rate": 1.1639890764158835e-05, "loss": 1.8557, "step": 357000 }, { "epoch": 0.79, "learning_rate": 1.1578384096836098e-05, "loss": 1.8579, "step": 357500 }, { "epoch": 0.79, "learning_rate": 1.1516877429513359e-05, "loss": 1.8614, "step": 358000 }, { "epoch": 0.79, "learning_rate": 1.1455370762190622e-05, "loss": 1.8598, "step": 358500 }, { "epoch": 0.79, "learning_rate": 1.1393864094867884e-05, "loss": 1.8567, "step": 359000 }, { "epoch": 0.8, "learning_rate": 1.1332357427545146e-05, "loss": 1.855, "step": 359500 }, { "epoch": 0.8, "learning_rate": 1.1270850760222408e-05, "loss": 1.8578, "step": 360000 }, { "epoch": 0.8, "learning_rate": 1.120934409289967e-05, "loss": 1.856, "step": 360500 }, { "epoch": 0.8, "learning_rate": 1.1147837425576934e-05, "loss": 1.8532, "step": 361000 }, { "epoch": 0.8, "learning_rate": 1.1086330758254195e-05, "loss": 1.8625, "step": 361500 }, { "epoch": 0.8, "learning_rate": 1.1024824090931457e-05, "loss": 1.8591, "step": 362000 }, { "epoch": 0.8, "learning_rate": 1.096331742360872e-05, "loss": 1.8595, "step": 362500 }, { "epoch": 0.8, "learning_rate": 1.0901810756285983e-05, "loss": 1.8557, "step": 363000 }, { "epoch": 0.8, "learning_rate": 1.0840304088963243e-05, "loss": 1.8576, "step": 363500 }, { "epoch": 0.81, "learning_rate": 1.0778797421640507e-05, "loss": 1.8548, "step": 364000 }, { "epoch": 0.81, "learning_rate": 1.0717290754317769e-05, "loss": 1.8605, "step": 364500 }, { "epoch": 0.81, "learning_rate": 1.065578408699503e-05, "loss": 1.8505, "step": 365000 }, { "epoch": 0.81, "learning_rate": 1.0594277419672292e-05, "loss": 1.8578, "step": 365500 }, { "epoch": 0.81, "learning_rate": 1.0532770752349554e-05, "loss": 1.857, "step": 366000 }, { "epoch": 0.81, "learning_rate": 1.0471264085026818e-05, "loss": 1.8545, "step": 366500 }, { "epoch": 0.81, "learning_rate": 1.040975741770408e-05, "loss": 1.8557, "step": 367000 }, { "epoch": 0.81, "learning_rate": 1.0348250750381342e-05, "loss": 1.8554, "step": 367500 }, { "epoch": 0.81, "learning_rate": 1.0286744083058604e-05, "loss": 1.8548, "step": 368000 }, { "epoch": 0.82, "learning_rate": 1.0225237415735867e-05, "loss": 1.8558, "step": 368500 }, { "epoch": 0.82, "learning_rate": 1.0163730748413128e-05, "loss": 1.8564, "step": 369000 }, { "epoch": 0.82, "learning_rate": 1.0102224081090391e-05, "loss": 1.8581, "step": 369500 }, { "epoch": 0.82, "learning_rate": 1.0040717413767653e-05, "loss": 1.8541, "step": 370000 }, { "epoch": 0.82, "learning_rate": 9.979210746444915e-06, "loss": 1.8522, "step": 370500 }, { "epoch": 0.82, "learning_rate": 9.917704079122177e-06, "loss": 1.8551, "step": 371000 }, { "epoch": 0.82, "learning_rate": 9.856197411799439e-06, "loss": 1.8572, "step": 371500 }, { "epoch": 0.82, "learning_rate": 9.794690744476702e-06, "loss": 1.8544, "step": 372000 }, { "epoch": 0.82, "learning_rate": 9.733184077153964e-06, "loss": 1.8509, "step": 372500 }, { "epoch": 0.83, "learning_rate": 9.671677409831226e-06, "loss": 1.8538, "step": 373000 }, { "epoch": 0.83, "learning_rate": 9.610170742508488e-06, "loss": 1.8561, "step": 373500 }, { "epoch": 0.83, "learning_rate": 9.54866407518575e-06, "loss": 1.8559, "step": 374000 }, { "epoch": 0.83, "learning_rate": 9.487157407863014e-06, "loss": 1.8559, "step": 374500 }, { "epoch": 0.83, "learning_rate": 9.425650740540274e-06, "loss": 1.8507, "step": 375000 }, { "epoch": 0.83, "learning_rate": 9.364144073217537e-06, "loss": 1.8526, "step": 375500 }, { "epoch": 0.83, "learning_rate": 9.3026374058948e-06, "loss": 1.8552, "step": 376000 }, { "epoch": 0.83, "learning_rate": 9.241130738572061e-06, "loss": 1.8526, "step": 376500 }, { "epoch": 0.83, "learning_rate": 9.179624071249323e-06, "loss": 1.8534, "step": 377000 }, { "epoch": 0.84, "learning_rate": 9.118117403926587e-06, "loss": 1.8539, "step": 377500 }, { "epoch": 0.84, "learning_rate": 9.056610736603849e-06, "loss": 1.8558, "step": 378000 }, { "epoch": 0.84, "learning_rate": 8.99510406928111e-06, "loss": 1.8532, "step": 378500 }, { "epoch": 0.84, "learning_rate": 8.933597401958373e-06, "loss": 1.8557, "step": 379000 }, { "epoch": 0.84, "learning_rate": 8.872090734635634e-06, "loss": 1.8528, "step": 379500 }, { "epoch": 0.84, "learning_rate": 8.810584067312898e-06, "loss": 1.8554, "step": 380000 }, { "epoch": 0.84, "learning_rate": 8.749077399990158e-06, "loss": 1.8508, "step": 380500 }, { "epoch": 0.84, "learning_rate": 8.687570732667422e-06, "loss": 1.8505, "step": 381000 }, { "epoch": 0.84, "learning_rate": 8.626064065344684e-06, "loss": 1.8489, "step": 381500 }, { "epoch": 0.85, "learning_rate": 8.564557398021946e-06, "loss": 1.8519, "step": 382000 }, { "epoch": 0.85, "learning_rate": 8.503050730699208e-06, "loss": 1.8565, "step": 382500 }, { "epoch": 0.85, "learning_rate": 8.441544063376471e-06, "loss": 1.852, "step": 383000 }, { "epoch": 0.85, "learning_rate": 8.380037396053733e-06, "loss": 1.8553, "step": 383500 }, { "epoch": 0.85, "learning_rate": 8.318530728730995e-06, "loss": 1.8512, "step": 384000 }, { "epoch": 0.85, "learning_rate": 8.257024061408257e-06, "loss": 1.8521, "step": 384500 }, { "epoch": 0.85, "learning_rate": 8.195517394085519e-06, "loss": 1.8495, "step": 385000 }, { "epoch": 0.85, "learning_rate": 8.134010726762783e-06, "loss": 1.8563, "step": 385500 }, { "epoch": 0.85, "learning_rate": 8.072504059440043e-06, "loss": 1.8524, "step": 386000 }, { "epoch": 0.86, "learning_rate": 8.010997392117306e-06, "loss": 1.8537, "step": 386500 }, { "epoch": 0.86, "learning_rate": 7.949490724794568e-06, "loss": 1.8481, "step": 387000 }, { "epoch": 0.86, "learning_rate": 7.88798405747183e-06, "loss": 1.8521, "step": 387500 }, { "epoch": 0.86, "learning_rate": 7.826477390149092e-06, "loss": 1.8488, "step": 388000 }, { "epoch": 0.86, "learning_rate": 7.764970722826356e-06, "loss": 1.856, "step": 388500 }, { "epoch": 0.86, "learning_rate": 7.703464055503618e-06, "loss": 1.8502, "step": 389000 }, { "epoch": 0.86, "learning_rate": 7.64195738818088e-06, "loss": 1.8534, "step": 389500 }, { "epoch": 0.86, "learning_rate": 7.580450720858141e-06, "loss": 1.8481, "step": 390000 }, { "epoch": 0.86, "learning_rate": 7.518944053535404e-06, "loss": 1.8516, "step": 390500 }, { "epoch": 0.87, "learning_rate": 7.457437386212666e-06, "loss": 1.8508, "step": 391000 }, { "epoch": 0.87, "learning_rate": 7.395930718889928e-06, "loss": 1.8442, "step": 391500 }, { "epoch": 0.87, "learning_rate": 7.33442405156719e-06, "loss": 1.8469, "step": 392000 }, { "epoch": 0.87, "learning_rate": 7.272917384244453e-06, "loss": 1.85, "step": 392500 }, { "epoch": 0.87, "learning_rate": 7.211410716921714e-06, "loss": 1.8454, "step": 393000 }, { "epoch": 0.87, "learning_rate": 7.1499040495989765e-06, "loss": 1.8523, "step": 393500 }, { "epoch": 0.87, "learning_rate": 7.088397382276239e-06, "loss": 1.8479, "step": 394000 }, { "epoch": 0.87, "learning_rate": 7.026890714953502e-06, "loss": 1.8438, "step": 394500 }, { "epoch": 0.87, "learning_rate": 6.965384047630763e-06, "loss": 1.8491, "step": 395000 }, { "epoch": 0.88, "learning_rate": 6.903877380308026e-06, "loss": 1.8492, "step": 395500 }, { "epoch": 0.88, "learning_rate": 6.842370712985289e-06, "loss": 1.8506, "step": 396000 }, { "epoch": 0.88, "learning_rate": 6.7808640456625505e-06, "loss": 1.8511, "step": 396500 }, { "epoch": 0.88, "learning_rate": 6.719357378339812e-06, "loss": 1.8479, "step": 397000 }, { "epoch": 0.88, "learning_rate": 6.657850711017074e-06, "loss": 1.8474, "step": 397500 }, { "epoch": 0.88, "learning_rate": 6.596344043694337e-06, "loss": 1.8472, "step": 398000 }, { "epoch": 0.88, "learning_rate": 6.534837376371598e-06, "loss": 1.8536, "step": 398500 }, { "epoch": 0.88, "learning_rate": 6.473330709048861e-06, "loss": 1.8487, "step": 399000 }, { "epoch": 0.88, "learning_rate": 6.411824041726124e-06, "loss": 1.8507, "step": 399500 }, { "epoch": 0.89, "learning_rate": 6.3503173744033864e-06, "loss": 1.8478, "step": 400000 }, { "epoch": 0.89, "learning_rate": 6.2888107070806475e-06, "loss": 1.8488, "step": 400500 }, { "epoch": 0.89, "learning_rate": 6.22730403975791e-06, "loss": 1.8462, "step": 401000 }, { "epoch": 0.89, "learning_rate": 6.165797372435172e-06, "loss": 1.8501, "step": 401500 }, { "epoch": 0.89, "learning_rate": 6.104290705112434e-06, "loss": 1.8495, "step": 402000 }, { "epoch": 0.89, "learning_rate": 6.042784037789697e-06, "loss": 1.8479, "step": 402500 }, { "epoch": 0.89, "learning_rate": 5.981277370466959e-06, "loss": 1.8474, "step": 403000 }, { "epoch": 0.89, "learning_rate": 5.919770703144221e-06, "loss": 1.851, "step": 403500 }, { "epoch": 0.89, "learning_rate": 5.8582640358214834e-06, "loss": 1.8451, "step": 404000 }, { "epoch": 0.9, "learning_rate": 5.796757368498745e-06, "loss": 1.8458, "step": 404500 }, { "epoch": 0.9, "learning_rate": 5.735250701176008e-06, "loss": 1.8485, "step": 405000 }, { "epoch": 0.9, "learning_rate": 5.67374403385327e-06, "loss": 1.8494, "step": 405500 }, { "epoch": 0.9, "learning_rate": 5.612237366530533e-06, "loss": 1.8437, "step": 406000 }, { "epoch": 0.9, "learning_rate": 5.550730699207794e-06, "loss": 1.8435, "step": 406500 }, { "epoch": 0.9, "learning_rate": 5.489224031885057e-06, "loss": 1.8472, "step": 407000 }, { "epoch": 0.9, "learning_rate": 5.4277173645623185e-06, "loss": 1.8469, "step": 407500 }, { "epoch": 0.9, "learning_rate": 5.366210697239581e-06, "loss": 1.845, "step": 408000 }, { "epoch": 0.9, "learning_rate": 5.304704029916843e-06, "loss": 1.8451, "step": 408500 }, { "epoch": 0.91, "learning_rate": 5.243197362594105e-06, "loss": 1.85, "step": 409000 }, { "epoch": 0.91, "learning_rate": 5.181690695271368e-06, "loss": 1.8436, "step": 409500 }, { "epoch": 0.91, "learning_rate": 5.12018402794863e-06, "loss": 1.8435, "step": 410000 }, { "epoch": 0.91, "learning_rate": 5.0586773606258925e-06, "loss": 1.8447, "step": 410500 }, { "epoch": 0.91, "learning_rate": 4.9971706933031544e-06, "loss": 1.847, "step": 411000 }, { "epoch": 0.91, "learning_rate": 4.935664025980416e-06, "loss": 1.8522, "step": 411500 }, { "epoch": 0.91, "learning_rate": 4.874157358657678e-06, "loss": 1.8474, "step": 412000 }, { "epoch": 0.91, "learning_rate": 4.812650691334941e-06, "loss": 1.8473, "step": 412500 }, { "epoch": 0.91, "learning_rate": 4.751144024012203e-06, "loss": 1.8446, "step": 413000 }, { "epoch": 0.92, "learning_rate": 4.689637356689466e-06, "loss": 1.8471, "step": 413500 }, { "epoch": 0.92, "learning_rate": 4.628130689366728e-06, "loss": 1.8472, "step": 414000 }, { "epoch": 0.92, "learning_rate": 4.5666240220439895e-06, "loss": 1.8456, "step": 414500 }, { "epoch": 0.92, "learning_rate": 4.505117354721252e-06, "loss": 1.8446, "step": 415000 }, { "epoch": 0.92, "learning_rate": 4.443610687398514e-06, "loss": 1.8441, "step": 415500 }, { "epoch": 0.92, "learning_rate": 4.382104020075776e-06, "loss": 1.8466, "step": 416000 }, { "epoch": 0.92, "learning_rate": 4.320597352753038e-06, "loss": 1.8424, "step": 416500 }, { "epoch": 0.92, "learning_rate": 4.259090685430301e-06, "loss": 1.8472, "step": 417000 }, { "epoch": 0.92, "learning_rate": 4.197584018107563e-06, "loss": 1.8423, "step": 417500 }, { "epoch": 0.93, "learning_rate": 4.1360773507848255e-06, "loss": 1.8463, "step": 418000 }, { "epoch": 0.93, "learning_rate": 4.074570683462087e-06, "loss": 1.8438, "step": 418500 }, { "epoch": 0.93, "learning_rate": 4.01306401613935e-06, "loss": 1.8399, "step": 419000 }, { "epoch": 0.93, "learning_rate": 3.951557348816612e-06, "loss": 1.8463, "step": 419500 }, { "epoch": 0.93, "learning_rate": 3.890050681493874e-06, "loss": 1.8404, "step": 420000 }, { "epoch": 0.93, "learning_rate": 3.828544014171137e-06, "loss": 1.8457, "step": 420500 }, { "epoch": 0.93, "learning_rate": 3.767037346848398e-06, "loss": 1.8451, "step": 421000 }, { "epoch": 0.93, "learning_rate": 3.705530679525661e-06, "loss": 1.8447, "step": 421500 }, { "epoch": 0.93, "learning_rate": 3.644024012202923e-06, "loss": 1.8427, "step": 422000 }, { "epoch": 0.94, "learning_rate": 3.582517344880185e-06, "loss": 1.8406, "step": 422500 }, { "epoch": 0.94, "learning_rate": 3.521010677557447e-06, "loss": 1.842, "step": 423000 }, { "epoch": 0.94, "learning_rate": 3.45950401023471e-06, "loss": 1.8426, "step": 423500 }, { "epoch": 0.94, "learning_rate": 3.397997342911972e-06, "loss": 1.8455, "step": 424000 }, { "epoch": 0.94, "learning_rate": 3.336490675589234e-06, "loss": 1.841, "step": 424500 }, { "epoch": 0.94, "learning_rate": 3.274984008266496e-06, "loss": 1.8418, "step": 425000 }, { "epoch": 0.94, "learning_rate": 3.213477340943759e-06, "loss": 1.8415, "step": 425500 }, { "epoch": 0.94, "learning_rate": 3.1519706736210207e-06, "loss": 1.8452, "step": 426000 }, { "epoch": 0.94, "learning_rate": 3.090464006298283e-06, "loss": 1.8472, "step": 426500 }, { "epoch": 0.95, "learning_rate": 3.0289573389755454e-06, "loss": 1.8403, "step": 427000 }, { "epoch": 0.95, "learning_rate": 2.9674506716528073e-06, "loss": 1.8406, "step": 427500 }, { "epoch": 0.95, "learning_rate": 2.9059440043300696e-06, "loss": 1.8425, "step": 428000 }, { "epoch": 0.95, "learning_rate": 2.8444373370073315e-06, "loss": 1.8417, "step": 428500 }, { "epoch": 0.95, "learning_rate": 2.782930669684594e-06, "loss": 1.8402, "step": 429000 }, { "epoch": 0.95, "learning_rate": 2.7214240023618562e-06, "loss": 1.8465, "step": 429500 }, { "epoch": 0.95, "learning_rate": 2.659917335039118e-06, "loss": 1.8454, "step": 430000 }, { "epoch": 0.95, "learning_rate": 2.5984106677163805e-06, "loss": 1.8389, "step": 430500 }, { "epoch": 0.95, "learning_rate": 2.536904000393643e-06, "loss": 1.8439, "step": 431000 }, { "epoch": 0.96, "learning_rate": 2.475397333070905e-06, "loss": 1.8451, "step": 431500 }, { "epoch": 0.96, "learning_rate": 2.413890665748167e-06, "loss": 1.8393, "step": 432000 }, { "epoch": 0.96, "learning_rate": 2.3523839984254294e-06, "loss": 1.8428, "step": 432500 }, { "epoch": 0.96, "learning_rate": 2.2908773311026917e-06, "loss": 1.8397, "step": 433000 }, { "epoch": 0.96, "learning_rate": 2.229370663779954e-06, "loss": 1.8435, "step": 433500 }, { "epoch": 0.96, "learning_rate": 2.1678639964572164e-06, "loss": 1.8411, "step": 434000 }, { "epoch": 0.96, "learning_rate": 2.1063573291344783e-06, "loss": 1.8435, "step": 434500 }, { "epoch": 0.96, "learning_rate": 2.0448506618117402e-06, "loss": 1.8427, "step": 435000 }, { "epoch": 0.96, "learning_rate": 1.9833439944890026e-06, "loss": 1.8371, "step": 435500 }, { "epoch": 0.97, "learning_rate": 1.921837327166265e-06, "loss": 1.8378, "step": 436000 }, { "epoch": 0.97, "learning_rate": 1.860330659843527e-06, "loss": 1.8403, "step": 436500 }, { "epoch": 0.97, "learning_rate": 1.7988239925207894e-06, "loss": 1.8427, "step": 437000 }, { "epoch": 0.97, "learning_rate": 1.7373173251980517e-06, "loss": 1.8414, "step": 437500 }, { "epoch": 0.97, "learning_rate": 1.6758106578753138e-06, "loss": 1.8371, "step": 438000 }, { "epoch": 0.97, "learning_rate": 1.6143039905525761e-06, "loss": 1.8388, "step": 438500 }, { "epoch": 0.97, "learning_rate": 1.552797323229838e-06, "loss": 1.8447, "step": 439000 }, { "epoch": 0.97, "learning_rate": 1.4912906559071004e-06, "loss": 1.8438, "step": 439500 }, { "epoch": 0.97, "learning_rate": 1.4297839885843625e-06, "loss": 1.84, "step": 440000 }, { "epoch": 0.98, "learning_rate": 1.3682773212616249e-06, "loss": 1.844, "step": 440500 }, { "epoch": 0.98, "learning_rate": 1.3067706539388872e-06, "loss": 1.8395, "step": 441000 }, { "epoch": 0.98, "learning_rate": 1.245263986616149e-06, "loss": 1.8408, "step": 441500 }, { "epoch": 0.98, "learning_rate": 1.1837573192934114e-06, "loss": 1.8426, "step": 442000 }, { "epoch": 0.98, "learning_rate": 1.1222506519706736e-06, "loss": 1.8412, "step": 442500 }, { "epoch": 0.98, "learning_rate": 1.060743984647936e-06, "loss": 1.8389, "step": 443000 }, { "epoch": 0.98, "learning_rate": 9.992373173251982e-07, "loss": 1.839, "step": 443500 }, { "epoch": 0.98, "learning_rate": 9.377306500024604e-07, "loss": 1.8411, "step": 444000 }, { "epoch": 0.98, "learning_rate": 8.762239826797225e-07, "loss": 1.843, "step": 444500 }, { "epoch": 0.99, "learning_rate": 8.147173153569847e-07, "loss": 1.84, "step": 445000 }, { "epoch": 0.99, "learning_rate": 7.532106480342469e-07, "loss": 1.847, "step": 445500 }, { "epoch": 0.99, "learning_rate": 6.917039807115092e-07, "loss": 1.8363, "step": 446000 }, { "epoch": 0.99, "learning_rate": 6.301973133887713e-07, "loss": 1.8402, "step": 446500 }, { "epoch": 0.99, "learning_rate": 5.686906460660336e-07, "loss": 1.8397, "step": 447000 }, { "epoch": 0.99, "learning_rate": 5.071839787432959e-07, "loss": 1.8424, "step": 447500 }, { "epoch": 0.99, "learning_rate": 4.45677311420558e-07, "loss": 1.8357, "step": 448000 }, { "epoch": 0.99, "learning_rate": 3.841706440978202e-07, "loss": 1.843, "step": 448500 }, { "epoch": 0.99, "learning_rate": 3.2266397677508245e-07, "loss": 1.8371, "step": 449000 }, { "epoch": 1.0, "learning_rate": 2.6115730945234463e-07, "loss": 1.8429, "step": 449500 }, { "epoch": 1.0, "learning_rate": 1.9965064212960688e-07, "loss": 1.8424, "step": 450000 }, { "epoch": 1.0, "learning_rate": 1.381439748068691e-07, "loss": 1.8414, "step": 450500 }, { "epoch": 1.0, "learning_rate": 7.663730748413129e-08, "loss": 1.8446, "step": 451000 }, { "epoch": 1.0, "learning_rate": 1.5130640161393495e-08, "loss": 1.8381, "step": 451500 }, { "epoch": 1.0, "step": 451623, "total_flos": 1.5894400168611545e+19, "train_loss": 0.6229404193166468, "train_runtime": 298967.217, "train_samples_per_second": 870.111, "train_steps_per_second": 1.511 } ], "max_steps": 451623, "num_train_epochs": 1, "total_flos": 1.5894400168611545e+19, "trial_name": null, "trial_params": null }