doc2txt_model2 / trainer_state.json
danurahul's picture
Initial commit
4c75ca8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 16.402405686167306,
"global_step": 120000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 5e-05,
"loss": 3.1294,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 4.999658259859204e-05,
"loss": 3.1422,
"step": 1000
},
{
"epoch": 0.21,
"learning_rate": 4.999316519718406e-05,
"loss": 3.1479,
"step": 1500
},
{
"epoch": 0.27,
"learning_rate": 4.998974779577609e-05,
"loss": 3.1412,
"step": 2000
},
{
"epoch": 0.34,
"learning_rate": 4.998633039436813e-05,
"loss": 3.1387,
"step": 2500
},
{
"epoch": 0.41,
"learning_rate": 4.9982912992960154e-05,
"loss": 3.1344,
"step": 3000
},
{
"epoch": 0.48,
"learning_rate": 4.997949559155218e-05,
"loss": 3.1353,
"step": 3500
},
{
"epoch": 0.55,
"learning_rate": 4.9976078190144217e-05,
"loss": 3.1283,
"step": 4000
},
{
"epoch": 0.62,
"learning_rate": 4.9972660788736244e-05,
"loss": 3.1291,
"step": 4500
},
{
"epoch": 0.68,
"learning_rate": 4.996924338732828e-05,
"loss": 3.1226,
"step": 5000
},
{
"epoch": 0.75,
"learning_rate": 4.9965825985920306e-05,
"loss": 3.1173,
"step": 5500
},
{
"epoch": 0.82,
"learning_rate": 4.996240858451234e-05,
"loss": 3.1126,
"step": 6000
},
{
"epoch": 0.89,
"learning_rate": 4.995899118310437e-05,
"loss": 3.1117,
"step": 6500
},
{
"epoch": 0.96,
"learning_rate": 4.99555737816964e-05,
"loss": 3.1037,
"step": 7000
},
{
"epoch": 1.03,
"learning_rate": 4.995215638028843e-05,
"loss": 3.0981,
"step": 7500
},
{
"epoch": 1.09,
"learning_rate": 4.994873897888046e-05,
"loss": 3.0682,
"step": 8000
},
{
"epoch": 1.16,
"learning_rate": 4.994532157747249e-05,
"loss": 3.0697,
"step": 8500
},
{
"epoch": 1.23,
"learning_rate": 4.994190417606453e-05,
"loss": 3.0673,
"step": 9000
},
{
"epoch": 1.3,
"learning_rate": 4.993848677465655e-05,
"loss": 3.0703,
"step": 9500
},
{
"epoch": 1.37,
"learning_rate": 4.993506937324858e-05,
"loss": 3.0624,
"step": 10000
},
{
"epoch": 1.44,
"learning_rate": 4.993165197184062e-05,
"loss": 3.0618,
"step": 10500
},
{
"epoch": 1.5,
"learning_rate": 4.9928234570432645e-05,
"loss": 3.0612,
"step": 11000
},
{
"epoch": 1.57,
"learning_rate": 4.992481716902467e-05,
"loss": 3.0651,
"step": 11500
},
{
"epoch": 1.64,
"learning_rate": 4.992139976761671e-05,
"loss": 3.0555,
"step": 12000
},
{
"epoch": 1.71,
"learning_rate": 4.9917982366208735e-05,
"loss": 3.0609,
"step": 12500
},
{
"epoch": 1.78,
"learning_rate": 4.991456496480077e-05,
"loss": 3.0564,
"step": 13000
},
{
"epoch": 1.85,
"learning_rate": 4.99111475633928e-05,
"loss": 3.0581,
"step": 13500
},
{
"epoch": 1.91,
"learning_rate": 4.990773016198483e-05,
"loss": 3.054,
"step": 14000
},
{
"epoch": 1.98,
"learning_rate": 4.990431276057686e-05,
"loss": 3.0559,
"step": 14500
},
{
"epoch": 2.05,
"learning_rate": 4.990089535916889e-05,
"loss": 3.0273,
"step": 15000
},
{
"epoch": 2.12,
"learning_rate": 4.989747795776092e-05,
"loss": 3.0221,
"step": 15500
},
{
"epoch": 2.19,
"learning_rate": 4.989406055635295e-05,
"loss": 3.0218,
"step": 16000
},
{
"epoch": 2.26,
"learning_rate": 4.989064315494498e-05,
"loss": 3.0245,
"step": 16500
},
{
"epoch": 2.32,
"learning_rate": 4.988722575353702e-05,
"loss": 3.0237,
"step": 17000
},
{
"epoch": 2.39,
"learning_rate": 4.988380835212904e-05,
"loss": 3.0224,
"step": 17500
},
{
"epoch": 2.46,
"learning_rate": 4.988039095072107e-05,
"loss": 3.0183,
"step": 18000
},
{
"epoch": 2.53,
"learning_rate": 4.987697354931311e-05,
"loss": 3.0113,
"step": 18500
},
{
"epoch": 2.6,
"learning_rate": 4.9873556147905135e-05,
"loss": 3.0211,
"step": 19000
},
{
"epoch": 2.67,
"learning_rate": 4.987013874649716e-05,
"loss": 3.0253,
"step": 19500
},
{
"epoch": 2.73,
"learning_rate": 4.98667213450892e-05,
"loss": 3.0143,
"step": 20000
},
{
"epoch": 2.8,
"learning_rate": 4.9863303943681225e-05,
"loss": 3.0192,
"step": 20500
},
{
"epoch": 2.87,
"learning_rate": 4.985988654227326e-05,
"loss": 3.012,
"step": 21000
},
{
"epoch": 2.94,
"learning_rate": 4.985646914086529e-05,
"loss": 3.016,
"step": 21500
},
{
"epoch": 3.01,
"learning_rate": 4.985305173945732e-05,
"loss": 3.0087,
"step": 22000
},
{
"epoch": 3.08,
"learning_rate": 4.984963433804935e-05,
"loss": 2.9676,
"step": 22500
},
{
"epoch": 3.14,
"learning_rate": 4.9846216936641384e-05,
"loss": 2.9644,
"step": 23000
},
{
"epoch": 3.21,
"learning_rate": 4.984279953523341e-05,
"loss": 2.9714,
"step": 23500
},
{
"epoch": 3.28,
"learning_rate": 4.983938213382544e-05,
"loss": 2.9816,
"step": 24000
},
{
"epoch": 3.35,
"learning_rate": 4.9835964732417474e-05,
"loss": 2.9759,
"step": 24500
},
{
"epoch": 3.42,
"learning_rate": 4.983254733100951e-05,
"loss": 2.977,
"step": 25000
},
{
"epoch": 3.49,
"learning_rate": 4.982912992960153e-05,
"loss": 2.9841,
"step": 25500
},
{
"epoch": 3.55,
"learning_rate": 4.982571252819356e-05,
"loss": 2.9731,
"step": 26000
},
{
"epoch": 3.62,
"learning_rate": 4.98222951267856e-05,
"loss": 2.9716,
"step": 26500
},
{
"epoch": 3.69,
"learning_rate": 4.9818877725377625e-05,
"loss": 2.9762,
"step": 27000
},
{
"epoch": 3.76,
"learning_rate": 4.981546032396965e-05,
"loss": 2.9774,
"step": 27500
},
{
"epoch": 3.83,
"learning_rate": 4.981204292256169e-05,
"loss": 2.9743,
"step": 28000
},
{
"epoch": 3.9,
"learning_rate": 4.9808625521153715e-05,
"loss": 2.9737,
"step": 28500
},
{
"epoch": 3.96,
"learning_rate": 4.980520811974575e-05,
"loss": 2.9701,
"step": 29000
},
{
"epoch": 4.03,
"learning_rate": 4.980179071833778e-05,
"loss": 2.9589,
"step": 29500
},
{
"epoch": 4.1,
"learning_rate": 4.9798373316929805e-05,
"loss": 2.926,
"step": 30000
},
{
"epoch": 4.17,
"learning_rate": 4.979495591552184e-05,
"loss": 2.9327,
"step": 30500
},
{
"epoch": 4.24,
"learning_rate": 4.9791538514113874e-05,
"loss": 2.9401,
"step": 31000
},
{
"epoch": 4.31,
"learning_rate": 4.97881211127059e-05,
"loss": 2.9345,
"step": 31500
},
{
"epoch": 4.37,
"learning_rate": 4.978470371129793e-05,
"loss": 2.9348,
"step": 32000
},
{
"epoch": 4.44,
"learning_rate": 4.9781286309889964e-05,
"loss": 2.9386,
"step": 32500
},
{
"epoch": 4.51,
"learning_rate": 4.977786890848199e-05,
"loss": 2.9312,
"step": 33000
},
{
"epoch": 4.58,
"learning_rate": 4.977445150707402e-05,
"loss": 2.9361,
"step": 33500
},
{
"epoch": 4.65,
"learning_rate": 4.9771034105666054e-05,
"loss": 2.9316,
"step": 34000
},
{
"epoch": 4.72,
"learning_rate": 4.976761670425809e-05,
"loss": 2.9397,
"step": 34500
},
{
"epoch": 4.78,
"learning_rate": 4.9764199302850116e-05,
"loss": 2.9377,
"step": 35000
},
{
"epoch": 4.85,
"learning_rate": 4.9760781901442144e-05,
"loss": 2.9378,
"step": 35500
},
{
"epoch": 4.92,
"learning_rate": 4.975736450003418e-05,
"loss": 2.9306,
"step": 36000
},
{
"epoch": 4.99,
"learning_rate": 4.9753947098626206e-05,
"loss": 2.9339,
"step": 36500
},
{
"epoch": 5.06,
"learning_rate": 4.975052969721824e-05,
"loss": 2.8964,
"step": 37000
},
{
"epoch": 5.13,
"learning_rate": 4.974711229581027e-05,
"loss": 2.8929,
"step": 37500
},
{
"epoch": 5.19,
"learning_rate": 4.9743694894402296e-05,
"loss": 2.901,
"step": 38000
},
{
"epoch": 5.26,
"learning_rate": 4.974027749299433e-05,
"loss": 2.8934,
"step": 38500
},
{
"epoch": 5.33,
"learning_rate": 4.9736860091586364e-05,
"loss": 2.9019,
"step": 39000
},
{
"epoch": 5.4,
"learning_rate": 4.973344269017839e-05,
"loss": 2.9037,
"step": 39500
},
{
"epoch": 5.47,
"learning_rate": 4.973002528877042e-05,
"loss": 2.8915,
"step": 40000
},
{
"epoch": 5.54,
"learning_rate": 4.9726607887362454e-05,
"loss": 2.899,
"step": 40500
},
{
"epoch": 5.6,
"learning_rate": 4.972319048595448e-05,
"loss": 2.9026,
"step": 41000
},
{
"epoch": 5.67,
"learning_rate": 4.971977308454651e-05,
"loss": 2.9062,
"step": 41500
},
{
"epoch": 5.74,
"learning_rate": 4.9716355683138544e-05,
"loss": 2.8974,
"step": 42000
},
{
"epoch": 5.81,
"learning_rate": 4.971293828173058e-05,
"loss": 2.8969,
"step": 42500
},
{
"epoch": 5.88,
"learning_rate": 4.9709520880322606e-05,
"loss": 2.8972,
"step": 43000
},
{
"epoch": 5.95,
"learning_rate": 4.9706103478914634e-05,
"loss": 2.8994,
"step": 43500
},
{
"epoch": 6.01,
"learning_rate": 4.970268607750667e-05,
"loss": 2.8925,
"step": 44000
},
{
"epoch": 6.08,
"learning_rate": 4.9699268676098696e-05,
"loss": 2.8637,
"step": 44500
},
{
"epoch": 6.15,
"learning_rate": 4.969585127469073e-05,
"loss": 2.859,
"step": 45000
},
{
"epoch": 6.22,
"learning_rate": 4.969243387328276e-05,
"loss": 2.8624,
"step": 45500
},
{
"epoch": 6.29,
"learning_rate": 4.9689016471874786e-05,
"loss": 2.8595,
"step": 46000
},
{
"epoch": 6.36,
"learning_rate": 4.968559907046682e-05,
"loss": 2.8632,
"step": 46500
},
{
"epoch": 6.42,
"learning_rate": 4.9682181669058855e-05,
"loss": 2.8609,
"step": 47000
},
{
"epoch": 6.49,
"learning_rate": 4.9678764267650876e-05,
"loss": 2.8702,
"step": 47500
},
{
"epoch": 6.56,
"learning_rate": 4.967534686624291e-05,
"loss": 2.8714,
"step": 48000
},
{
"epoch": 6.63,
"learning_rate": 4.9671929464834945e-05,
"loss": 2.8636,
"step": 48500
},
{
"epoch": 6.7,
"learning_rate": 4.966851206342697e-05,
"loss": 2.8666,
"step": 49000
},
{
"epoch": 6.77,
"learning_rate": 4.9665094662019e-05,
"loss": 2.8701,
"step": 49500
},
{
"epoch": 6.83,
"learning_rate": 4.9661677260611034e-05,
"loss": 2.87,
"step": 50000
},
{
"epoch": 6.9,
"learning_rate": 4.965825985920307e-05,
"loss": 2.8608,
"step": 50500
},
{
"epoch": 6.97,
"learning_rate": 4.96548424577951e-05,
"loss": 2.867,
"step": 51000
},
{
"epoch": 7.04,
"learning_rate": 4.9651425056387124e-05,
"loss": 2.8444,
"step": 51500
},
{
"epoch": 7.11,
"learning_rate": 4.964800765497916e-05,
"loss": 2.8337,
"step": 52000
},
{
"epoch": 7.18,
"learning_rate": 4.9644590253571186e-05,
"loss": 2.8337,
"step": 52500
},
{
"epoch": 7.24,
"learning_rate": 4.964117285216322e-05,
"loss": 2.8319,
"step": 53000
},
{
"epoch": 7.31,
"learning_rate": 4.963775545075525e-05,
"loss": 2.8317,
"step": 53500
},
{
"epoch": 7.38,
"learning_rate": 4.9634338049347276e-05,
"loss": 2.8342,
"step": 54000
},
{
"epoch": 7.45,
"learning_rate": 4.963092064793931e-05,
"loss": 2.8273,
"step": 54500
},
{
"epoch": 7.52,
"learning_rate": 4.962750324653134e-05,
"loss": 2.8347,
"step": 55000
},
{
"epoch": 7.59,
"learning_rate": 4.9624085845123366e-05,
"loss": 2.8383,
"step": 55500
},
{
"epoch": 7.65,
"learning_rate": 4.96206684437154e-05,
"loss": 2.8313,
"step": 56000
},
{
"epoch": 7.72,
"learning_rate": 4.9617251042307435e-05,
"loss": 2.8387,
"step": 56500
},
{
"epoch": 7.79,
"learning_rate": 4.961383364089946e-05,
"loss": 2.834,
"step": 57000
},
{
"epoch": 7.86,
"learning_rate": 4.961041623949149e-05,
"loss": 2.8377,
"step": 57500
},
{
"epoch": 7.93,
"learning_rate": 4.9606998838083525e-05,
"loss": 2.8365,
"step": 58000
},
{
"epoch": 8.0,
"learning_rate": 4.960358143667555e-05,
"loss": 2.8338,
"step": 58500
},
{
"epoch": 8.06,
"learning_rate": 4.960016403526758e-05,
"loss": 2.7898,
"step": 59000
},
{
"epoch": 8.13,
"learning_rate": 4.9596746633859615e-05,
"loss": 2.7955,
"step": 59500
},
{
"epoch": 8.2,
"learning_rate": 4.959332923245165e-05,
"loss": 2.7901,
"step": 60000
},
{
"epoch": 8.27,
"learning_rate": 4.958991183104368e-05,
"loss": 2.7974,
"step": 60500
},
{
"epoch": 8.34,
"learning_rate": 4.9586494429635705e-05,
"loss": 2.8055,
"step": 61000
},
{
"epoch": 8.41,
"learning_rate": 4.958307702822774e-05,
"loss": 2.8088,
"step": 61500
},
{
"epoch": 8.47,
"learning_rate": 4.957965962681977e-05,
"loss": 2.8061,
"step": 62000
},
{
"epoch": 8.54,
"learning_rate": 4.95762422254118e-05,
"loss": 2.8005,
"step": 62500
},
{
"epoch": 8.61,
"learning_rate": 4.957282482400383e-05,
"loss": 2.8056,
"step": 63000
},
{
"epoch": 8.68,
"learning_rate": 4.9569407422595856e-05,
"loss": 2.8096,
"step": 63500
},
{
"epoch": 8.75,
"learning_rate": 4.956599002118789e-05,
"loss": 2.8077,
"step": 64000
},
{
"epoch": 8.82,
"learning_rate": 4.9562572619779925e-05,
"loss": 2.8107,
"step": 64500
},
{
"epoch": 8.88,
"learning_rate": 4.955915521837195e-05,
"loss": 2.8123,
"step": 65000
},
{
"epoch": 8.95,
"learning_rate": 4.955573781696398e-05,
"loss": 2.8121,
"step": 65500
},
{
"epoch": 9.02,
"learning_rate": 4.9552320415556015e-05,
"loss": 2.7953,
"step": 66000
},
{
"epoch": 9.09,
"learning_rate": 4.954890301414804e-05,
"loss": 2.7705,
"step": 66500
},
{
"epoch": 9.16,
"learning_rate": 4.954548561274007e-05,
"loss": 2.7682,
"step": 67000
},
{
"epoch": 9.23,
"learning_rate": 4.9542068211332105e-05,
"loss": 2.7656,
"step": 67500
},
{
"epoch": 9.29,
"learning_rate": 4.953865080992414e-05,
"loss": 2.7742,
"step": 68000
},
{
"epoch": 9.36,
"learning_rate": 4.953523340851617e-05,
"loss": 2.7677,
"step": 68500
},
{
"epoch": 9.43,
"learning_rate": 4.9531816007108195e-05,
"loss": 2.7772,
"step": 69000
},
{
"epoch": 9.5,
"learning_rate": 4.952839860570023e-05,
"loss": 2.7756,
"step": 69500
},
{
"epoch": 9.57,
"learning_rate": 4.952498120429226e-05,
"loss": 2.7735,
"step": 70000
},
{
"epoch": 9.64,
"learning_rate": 4.952156380288429e-05,
"loss": 2.7794,
"step": 70500
},
{
"epoch": 9.7,
"learning_rate": 4.951814640147632e-05,
"loss": 2.7803,
"step": 71000
},
{
"epoch": 9.77,
"learning_rate": 4.951472900006835e-05,
"loss": 2.7819,
"step": 71500
},
{
"epoch": 9.84,
"learning_rate": 4.951131159866038e-05,
"loss": 2.7798,
"step": 72000
},
{
"epoch": 9.91,
"learning_rate": 4.9507894197252416e-05,
"loss": 2.7858,
"step": 72500
},
{
"epoch": 9.98,
"learning_rate": 4.950447679584444e-05,
"loss": 2.7831,
"step": 73000
},
{
"epoch": 10.05,
"learning_rate": 4.950105939443647e-05,
"loss": 2.7485,
"step": 73500
},
{
"epoch": 10.11,
"learning_rate": 4.9497641993028506e-05,
"loss": 2.7361,
"step": 74000
},
{
"epoch": 10.18,
"learning_rate": 4.949422459162053e-05,
"loss": 2.739,
"step": 74500
},
{
"epoch": 10.25,
"learning_rate": 4.949080719021256e-05,
"loss": 2.7434,
"step": 75000
},
{
"epoch": 10.32,
"learning_rate": 4.9487389788804595e-05,
"loss": 2.7427,
"step": 75500
},
{
"epoch": 10.39,
"learning_rate": 4.948397238739663e-05,
"loss": 2.7424,
"step": 76000
},
{
"epoch": 10.46,
"learning_rate": 4.948055498598866e-05,
"loss": 2.756,
"step": 76500
},
{
"epoch": 10.52,
"learning_rate": 4.9477137584580685e-05,
"loss": 2.7434,
"step": 77000
},
{
"epoch": 10.59,
"learning_rate": 4.947372018317272e-05,
"loss": 2.7512,
"step": 77500
},
{
"epoch": 10.66,
"learning_rate": 4.947030278176475e-05,
"loss": 2.7527,
"step": 78000
},
{
"epoch": 10.73,
"learning_rate": 4.946688538035678e-05,
"loss": 2.7605,
"step": 78500
},
{
"epoch": 10.8,
"learning_rate": 4.946346797894881e-05,
"loss": 2.7624,
"step": 79000
},
{
"epoch": 10.87,
"learning_rate": 4.946005057754084e-05,
"loss": 2.7563,
"step": 79500
},
{
"epoch": 10.93,
"learning_rate": 4.945663317613287e-05,
"loss": 2.7531,
"step": 80000
},
{
"epoch": 11.0,
"learning_rate": 4.9453215774724906e-05,
"loss": 2.7539,
"step": 80500
},
{
"epoch": 11.07,
"learning_rate": 4.944979837331693e-05,
"loss": 2.7114,
"step": 81000
},
{
"epoch": 11.14,
"learning_rate": 4.944638097190896e-05,
"loss": 2.7128,
"step": 81500
},
{
"epoch": 11.21,
"learning_rate": 4.9442963570500996e-05,
"loss": 2.7177,
"step": 82000
},
{
"epoch": 11.28,
"learning_rate": 4.9439546169093024e-05,
"loss": 2.7212,
"step": 82500
},
{
"epoch": 11.34,
"learning_rate": 4.943612876768505e-05,
"loss": 2.7224,
"step": 83000
},
{
"epoch": 11.41,
"learning_rate": 4.9432711366277086e-05,
"loss": 2.7234,
"step": 83500
},
{
"epoch": 11.48,
"learning_rate": 4.9429293964869113e-05,
"loss": 2.7233,
"step": 84000
},
{
"epoch": 11.55,
"learning_rate": 4.942587656346115e-05,
"loss": 2.7297,
"step": 84500
},
{
"epoch": 11.62,
"learning_rate": 4.9422459162053176e-05,
"loss": 2.7211,
"step": 85000
},
{
"epoch": 11.69,
"learning_rate": 4.941904176064521e-05,
"loss": 2.7282,
"step": 85500
},
{
"epoch": 11.76,
"learning_rate": 4.941562435923724e-05,
"loss": 2.7272,
"step": 86000
},
{
"epoch": 11.82,
"learning_rate": 4.941220695782927e-05,
"loss": 2.7325,
"step": 86500
},
{
"epoch": 11.89,
"learning_rate": 4.94087895564213e-05,
"loss": 2.7275,
"step": 87000
},
{
"epoch": 11.96,
"learning_rate": 4.940537215501333e-05,
"loss": 2.7303,
"step": 87500
},
{
"epoch": 12.03,
"learning_rate": 4.940195475360536e-05,
"loss": 2.7091,
"step": 88000
},
{
"epoch": 12.1,
"learning_rate": 4.9398537352197397e-05,
"loss": 2.6863,
"step": 88500
},
{
"epoch": 12.17,
"learning_rate": 4.939511995078942e-05,
"loss": 2.6867,
"step": 89000
},
{
"epoch": 12.23,
"learning_rate": 4.939170254938145e-05,
"loss": 2.6934,
"step": 89500
},
{
"epoch": 12.3,
"learning_rate": 4.9388285147973486e-05,
"loss": 2.6936,
"step": 90000
},
{
"epoch": 12.37,
"learning_rate": 4.9384867746565514e-05,
"loss": 2.6953,
"step": 90500
},
{
"epoch": 12.44,
"learning_rate": 4.938145034515754e-05,
"loss": 2.6973,
"step": 91000
},
{
"epoch": 12.51,
"learning_rate": 4.9378032943749576e-05,
"loss": 2.7022,
"step": 91500
},
{
"epoch": 12.58,
"learning_rate": 4.9374615542341604e-05,
"loss": 2.6977,
"step": 92000
},
{
"epoch": 12.64,
"learning_rate": 4.937119814093364e-05,
"loss": 2.6996,
"step": 92500
},
{
"epoch": 12.71,
"learning_rate": 4.9367780739525666e-05,
"loss": 2.7008,
"step": 93000
},
{
"epoch": 12.78,
"learning_rate": 4.93643633381177e-05,
"loss": 2.7091,
"step": 93500
},
{
"epoch": 12.85,
"learning_rate": 4.936094593670973e-05,
"loss": 2.7057,
"step": 94000
},
{
"epoch": 12.92,
"learning_rate": 4.935752853530176e-05,
"loss": 2.7084,
"step": 94500
},
{
"epoch": 12.99,
"learning_rate": 4.935411113389379e-05,
"loss": 2.7114,
"step": 95000
},
{
"epoch": 13.05,
"learning_rate": 4.935069373248582e-05,
"loss": 2.6727,
"step": 95500
},
{
"epoch": 13.12,
"learning_rate": 4.934727633107785e-05,
"loss": 2.6586,
"step": 96000
},
{
"epoch": 13.19,
"learning_rate": 4.934385892966989e-05,
"loss": 2.6631,
"step": 96500
},
{
"epoch": 13.26,
"learning_rate": 4.934044152826191e-05,
"loss": 2.6632,
"step": 97000
},
{
"epoch": 13.33,
"learning_rate": 4.933702412685394e-05,
"loss": 2.6727,
"step": 97500
},
{
"epoch": 13.4,
"learning_rate": 4.933360672544598e-05,
"loss": 2.6671,
"step": 98000
},
{
"epoch": 13.46,
"learning_rate": 4.9330189324038004e-05,
"loss": 2.686,
"step": 98500
},
{
"epoch": 13.53,
"learning_rate": 4.932677192263003e-05,
"loss": 2.6796,
"step": 99000
},
{
"epoch": 13.6,
"learning_rate": 4.9323354521222067e-05,
"loss": 2.6745,
"step": 99500
},
{
"epoch": 13.67,
"learning_rate": 4.9319937119814094e-05,
"loss": 2.6825,
"step": 100000
},
{
"epoch": 13.74,
"learning_rate": 4.931651971840613e-05,
"loss": 2.6857,
"step": 100500
},
{
"epoch": 13.81,
"learning_rate": 4.9313102316998156e-05,
"loss": 2.6812,
"step": 101000
},
{
"epoch": 13.87,
"learning_rate": 4.9309684915590184e-05,
"loss": 2.6805,
"step": 101500
},
{
"epoch": 13.94,
"learning_rate": 4.930626751418222e-05,
"loss": 2.6857,
"step": 102000
},
{
"epoch": 14.01,
"learning_rate": 4.930285011277425e-05,
"loss": 2.6768,
"step": 102500
},
{
"epoch": 14.08,
"learning_rate": 4.929943271136628e-05,
"loss": 2.6435,
"step": 103000
},
{
"epoch": 14.15,
"learning_rate": 4.929601530995831e-05,
"loss": 2.6508,
"step": 103500
},
{
"epoch": 14.22,
"learning_rate": 4.929259790855034e-05,
"loss": 2.6444,
"step": 104000
},
{
"epoch": 14.28,
"learning_rate": 4.928918050714238e-05,
"loss": 2.6396,
"step": 104500
},
{
"epoch": 14.35,
"learning_rate": 4.92857631057344e-05,
"loss": 2.6452,
"step": 105000
},
{
"epoch": 14.42,
"learning_rate": 4.928234570432643e-05,
"loss": 2.6535,
"step": 105500
},
{
"epoch": 14.49,
"learning_rate": 4.927892830291847e-05,
"loss": 2.6482,
"step": 106000
},
{
"epoch": 14.56,
"learning_rate": 4.9275510901510495e-05,
"loss": 2.6567,
"step": 106500
},
{
"epoch": 14.63,
"learning_rate": 4.927209350010252e-05,
"loss": 2.6589,
"step": 107000
},
{
"epoch": 14.69,
"learning_rate": 4.926867609869456e-05,
"loss": 2.6564,
"step": 107500
},
{
"epoch": 14.76,
"learning_rate": 4.9265258697286585e-05,
"loss": 2.6579,
"step": 108000
},
{
"epoch": 14.83,
"learning_rate": 4.926184129587861e-05,
"loss": 2.658,
"step": 108500
},
{
"epoch": 14.9,
"learning_rate": 4.925842389447065e-05,
"loss": 2.6658,
"step": 109000
},
{
"epoch": 14.97,
"learning_rate": 4.9255006493062674e-05,
"loss": 2.6535,
"step": 109500
},
{
"epoch": 15.04,
"learning_rate": 4.925158909165471e-05,
"loss": 2.6404,
"step": 110000
},
{
"epoch": 15.1,
"learning_rate": 4.9248171690246737e-05,
"loss": 2.6185,
"step": 110500
},
{
"epoch": 15.17,
"learning_rate": 4.924475428883877e-05,
"loss": 2.6158,
"step": 111000
},
{
"epoch": 15.24,
"learning_rate": 4.92413368874308e-05,
"loss": 2.621,
"step": 111500
},
{
"epoch": 15.31,
"learning_rate": 4.923791948602283e-05,
"loss": 2.6296,
"step": 112000
},
{
"epoch": 15.38,
"learning_rate": 4.923450208461486e-05,
"loss": 2.6284,
"step": 112500
},
{
"epoch": 15.45,
"learning_rate": 4.923108468320689e-05,
"loss": 2.6252,
"step": 113000
},
{
"epoch": 15.51,
"learning_rate": 4.922766728179892e-05,
"loss": 2.6355,
"step": 113500
},
{
"epoch": 15.58,
"learning_rate": 4.922424988039096e-05,
"loss": 2.6337,
"step": 114000
},
{
"epoch": 15.65,
"learning_rate": 4.922083247898298e-05,
"loss": 2.6337,
"step": 114500
},
{
"epoch": 15.72,
"learning_rate": 4.921741507757501e-05,
"loss": 2.6354,
"step": 115000
},
{
"epoch": 15.79,
"learning_rate": 4.921399767616705e-05,
"loss": 2.6371,
"step": 115500
},
{
"epoch": 15.86,
"learning_rate": 4.9210580274759075e-05,
"loss": 2.6427,
"step": 116000
},
{
"epoch": 15.92,
"learning_rate": 4.92071628733511e-05,
"loss": 2.639,
"step": 116500
},
{
"epoch": 15.99,
"learning_rate": 4.920374547194314e-05,
"loss": 2.6426,
"step": 117000
},
{
"epoch": 16.06,
"learning_rate": 4.9200328070535165e-05,
"loss": 2.5977,
"step": 117500
},
{
"epoch": 16.13,
"learning_rate": 4.91969106691272e-05,
"loss": 2.5961,
"step": 118000
},
{
"epoch": 16.2,
"learning_rate": 4.919349326771923e-05,
"loss": 2.6007,
"step": 118500
},
{
"epoch": 16.27,
"learning_rate": 4.919007586631126e-05,
"loss": 2.6015,
"step": 119000
},
{
"epoch": 16.33,
"learning_rate": 4.918665846490329e-05,
"loss": 2.6064,
"step": 119500
},
{
"epoch": 16.4,
"learning_rate": 4.9183241063495324e-05,
"loss": 2.6125,
"step": 120000
}
],
"max_steps": 7316000,
"num_train_epochs": 1000,
"total_flos": 241553911465377792,
"trial_name": null,
"trial_params": null
}