chatbotgpt-turkish-latin / trainer_state.json
cenkersisman's picture
Upload 11 files
ab57cca
raw
history blame
No virus
216 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 62.24066390041494,
"global_step": 870000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 4.9982114751752754e-05,
"loss": 4.1215,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 4.996422950350551e-05,
"loss": 3.3722,
"step": 1000
},
{
"epoch": 0.11,
"learning_rate": 4.9946344255258265e-05,
"loss": 3.0718,
"step": 1500
},
{
"epoch": 0.14,
"learning_rate": 4.9928459007011016e-05,
"loss": 2.944,
"step": 2000
},
{
"epoch": 0.18,
"learning_rate": 4.9910573758763775e-05,
"loss": 2.7645,
"step": 2500
},
{
"epoch": 0.21,
"learning_rate": 4.989268851051653e-05,
"loss": 2.7008,
"step": 3000
},
{
"epoch": 0.25,
"learning_rate": 4.987480326226928e-05,
"loss": 2.6103,
"step": 3500
},
{
"epoch": 0.29,
"learning_rate": 4.985691801402204e-05,
"loss": 2.5739,
"step": 4000
},
{
"epoch": 0.32,
"learning_rate": 4.983903276577479e-05,
"loss": 2.4965,
"step": 4500
},
{
"epoch": 0.36,
"learning_rate": 4.982114751752755e-05,
"loss": 2.4231,
"step": 5000
},
{
"epoch": 0.39,
"learning_rate": 4.98032622692803e-05,
"loss": 2.3954,
"step": 5500
},
{
"epoch": 0.43,
"learning_rate": 4.978537702103305e-05,
"loss": 2.365,
"step": 6000
},
{
"epoch": 0.47,
"learning_rate": 4.976749177278581e-05,
"loss": 2.3183,
"step": 6500
},
{
"epoch": 0.5,
"learning_rate": 4.974960652453856e-05,
"loss": 2.3116,
"step": 7000
},
{
"epoch": 0.54,
"learning_rate": 4.973172127629131e-05,
"loss": 2.2515,
"step": 7500
},
{
"epoch": 0.57,
"learning_rate": 4.971383602804407e-05,
"loss": 2.2368,
"step": 8000
},
{
"epoch": 0.61,
"learning_rate": 4.9695950779796824e-05,
"loss": 2.2348,
"step": 8500
},
{
"epoch": 0.64,
"learning_rate": 4.9678065531549575e-05,
"loss": 2.2251,
"step": 9000
},
{
"epoch": 0.68,
"learning_rate": 4.9660180283302334e-05,
"loss": 2.1608,
"step": 9500
},
{
"epoch": 0.72,
"learning_rate": 4.9642295035055086e-05,
"loss": 2.1046,
"step": 10000
},
{
"epoch": 0.75,
"learning_rate": 4.962440978680784e-05,
"loss": 2.1329,
"step": 10500
},
{
"epoch": 0.79,
"learning_rate": 4.96065245385606e-05,
"loss": 2.103,
"step": 11000
},
{
"epoch": 0.82,
"learning_rate": 4.9588639290313355e-05,
"loss": 2.107,
"step": 11500
},
{
"epoch": 0.86,
"learning_rate": 4.9570754042066106e-05,
"loss": 2.0909,
"step": 12000
},
{
"epoch": 0.89,
"learning_rate": 4.9552868793818865e-05,
"loss": 2.0483,
"step": 12500
},
{
"epoch": 0.93,
"learning_rate": 4.953498354557162e-05,
"loss": 2.0902,
"step": 13000
},
{
"epoch": 0.97,
"learning_rate": 4.951709829732437e-05,
"loss": 2.0132,
"step": 13500
},
{
"epoch": 1.0,
"learning_rate": 4.949921304907713e-05,
"loss": 1.9799,
"step": 14000
},
{
"epoch": 1.04,
"learning_rate": 4.948132780082988e-05,
"loss": 1.8414,
"step": 14500
},
{
"epoch": 1.07,
"learning_rate": 4.946344255258263e-05,
"loss": 1.8561,
"step": 15000
},
{
"epoch": 1.11,
"learning_rate": 4.944555730433539e-05,
"loss": 1.8412,
"step": 15500
},
{
"epoch": 1.14,
"learning_rate": 4.942767205608814e-05,
"loss": 1.8654,
"step": 16000
},
{
"epoch": 1.18,
"learning_rate": 4.94097868078409e-05,
"loss": 1.8863,
"step": 16500
},
{
"epoch": 1.22,
"learning_rate": 4.939190155959365e-05,
"loss": 1.8676,
"step": 17000
},
{
"epoch": 1.25,
"learning_rate": 4.93740163113464e-05,
"loss": 1.8614,
"step": 17500
},
{
"epoch": 1.29,
"learning_rate": 4.935613106309916e-05,
"loss": 1.8483,
"step": 18000
},
{
"epoch": 1.32,
"learning_rate": 4.9338245814851914e-05,
"loss": 1.7865,
"step": 18500
},
{
"epoch": 1.36,
"learning_rate": 4.9320360566604665e-05,
"loss": 1.7937,
"step": 19000
},
{
"epoch": 1.4,
"learning_rate": 4.9302475318357424e-05,
"loss": 1.8259,
"step": 19500
},
{
"epoch": 1.43,
"learning_rate": 4.9284590070110176e-05,
"loss": 1.818,
"step": 20000
},
{
"epoch": 1.47,
"learning_rate": 4.926670482186293e-05,
"loss": 1.8051,
"step": 20500
},
{
"epoch": 1.5,
"learning_rate": 4.9248819573615686e-05,
"loss": 1.8204,
"step": 21000
},
{
"epoch": 1.54,
"learning_rate": 4.923093432536844e-05,
"loss": 1.7976,
"step": 21500
},
{
"epoch": 1.57,
"learning_rate": 4.921304907712119e-05,
"loss": 1.805,
"step": 22000
},
{
"epoch": 1.61,
"learning_rate": 4.919516382887395e-05,
"loss": 1.7588,
"step": 22500
},
{
"epoch": 1.65,
"learning_rate": 4.91772785806267e-05,
"loss": 1.7988,
"step": 23000
},
{
"epoch": 1.68,
"learning_rate": 4.915939333237946e-05,
"loss": 1.7542,
"step": 23500
},
{
"epoch": 1.72,
"learning_rate": 4.914150808413221e-05,
"loss": 1.7743,
"step": 24000
},
{
"epoch": 1.75,
"learning_rate": 4.912362283588496e-05,
"loss": 1.7507,
"step": 24500
},
{
"epoch": 1.79,
"learning_rate": 4.910573758763772e-05,
"loss": 1.8013,
"step": 25000
},
{
"epoch": 1.82,
"learning_rate": 4.908785233939047e-05,
"loss": 1.7607,
"step": 25500
},
{
"epoch": 1.86,
"learning_rate": 4.9069967091143224e-05,
"loss": 1.7696,
"step": 26000
},
{
"epoch": 1.9,
"learning_rate": 4.905208184289598e-05,
"loss": 1.7501,
"step": 26500
},
{
"epoch": 1.93,
"learning_rate": 4.9034196594648735e-05,
"loss": 1.7406,
"step": 27000
},
{
"epoch": 1.97,
"learning_rate": 4.9016311346401487e-05,
"loss": 1.7568,
"step": 27500
},
{
"epoch": 2.0,
"learning_rate": 4.8998426098154245e-05,
"loss": 1.6709,
"step": 28000
},
{
"epoch": 2.04,
"learning_rate": 4.8980540849907e-05,
"loss": 1.5369,
"step": 28500
},
{
"epoch": 2.07,
"learning_rate": 4.896265560165975e-05,
"loss": 1.54,
"step": 29000
},
{
"epoch": 2.11,
"learning_rate": 4.894477035341251e-05,
"loss": 1.5577,
"step": 29500
},
{
"epoch": 2.15,
"learning_rate": 4.892688510516526e-05,
"loss": 1.5302,
"step": 30000
},
{
"epoch": 2.18,
"learning_rate": 4.890899985691801e-05,
"loss": 1.5484,
"step": 30500
},
{
"epoch": 2.22,
"learning_rate": 4.889111460867077e-05,
"loss": 1.5573,
"step": 31000
},
{
"epoch": 2.25,
"learning_rate": 4.887322936042352e-05,
"loss": 1.5223,
"step": 31500
},
{
"epoch": 2.29,
"learning_rate": 4.885534411217628e-05,
"loss": 1.5303,
"step": 32000
},
{
"epoch": 2.33,
"learning_rate": 4.883745886392903e-05,
"loss": 1.5337,
"step": 32500
},
{
"epoch": 2.36,
"learning_rate": 4.881957361568179e-05,
"loss": 1.5726,
"step": 33000
},
{
"epoch": 2.4,
"learning_rate": 4.880168836743454e-05,
"loss": 1.5842,
"step": 33500
},
{
"epoch": 2.43,
"learning_rate": 4.87838031191873e-05,
"loss": 1.5437,
"step": 34000
},
{
"epoch": 2.47,
"learning_rate": 4.876591787094005e-05,
"loss": 1.5371,
"step": 34500
},
{
"epoch": 2.5,
"learning_rate": 4.8748032622692804e-05,
"loss": 1.5382,
"step": 35000
},
{
"epoch": 2.54,
"learning_rate": 4.873014737444556e-05,
"loss": 1.5205,
"step": 35500
},
{
"epoch": 2.58,
"learning_rate": 4.8712262126198314e-05,
"loss": 1.5504,
"step": 36000
},
{
"epoch": 2.61,
"learning_rate": 4.869437687795107e-05,
"loss": 1.5382,
"step": 36500
},
{
"epoch": 2.65,
"learning_rate": 4.8676491629703825e-05,
"loss": 1.5698,
"step": 37000
},
{
"epoch": 2.68,
"learning_rate": 4.865860638145658e-05,
"loss": 1.5631,
"step": 37500
},
{
"epoch": 2.72,
"learning_rate": 4.8640721133209335e-05,
"loss": 1.5449,
"step": 38000
},
{
"epoch": 2.75,
"learning_rate": 4.862283588496209e-05,
"loss": 1.5287,
"step": 38500
},
{
"epoch": 2.79,
"learning_rate": 4.860495063671484e-05,
"loss": 1.5721,
"step": 39000
},
{
"epoch": 2.83,
"learning_rate": 4.85870653884676e-05,
"loss": 1.5536,
"step": 39500
},
{
"epoch": 2.86,
"learning_rate": 4.856918014022035e-05,
"loss": 1.5328,
"step": 40000
},
{
"epoch": 2.9,
"learning_rate": 4.85512948919731e-05,
"loss": 1.557,
"step": 40500
},
{
"epoch": 2.93,
"learning_rate": 4.853340964372586e-05,
"loss": 1.5637,
"step": 41000
},
{
"epoch": 2.97,
"learning_rate": 4.851552439547861e-05,
"loss": 1.552,
"step": 41500
},
{
"epoch": 3.0,
"learning_rate": 4.849763914723136e-05,
"loss": 1.5156,
"step": 42000
},
{
"epoch": 3.04,
"learning_rate": 4.847975389898412e-05,
"loss": 1.2963,
"step": 42500
},
{
"epoch": 3.08,
"learning_rate": 4.8461868650736873e-05,
"loss": 1.3098,
"step": 43000
},
{
"epoch": 3.11,
"learning_rate": 4.844398340248963e-05,
"loss": 1.3217,
"step": 43500
},
{
"epoch": 3.15,
"learning_rate": 4.8426098154242384e-05,
"loss": 1.3361,
"step": 44000
},
{
"epoch": 3.18,
"learning_rate": 4.8408212905995136e-05,
"loss": 1.3243,
"step": 44500
},
{
"epoch": 3.22,
"learning_rate": 4.8390327657747894e-05,
"loss": 1.3336,
"step": 45000
},
{
"epoch": 3.26,
"learning_rate": 4.8372442409500646e-05,
"loss": 1.3447,
"step": 45500
},
{
"epoch": 3.29,
"learning_rate": 4.83545571612534e-05,
"loss": 1.3436,
"step": 46000
},
{
"epoch": 3.33,
"learning_rate": 4.8336671913006156e-05,
"loss": 1.3384,
"step": 46500
},
{
"epoch": 3.36,
"learning_rate": 4.831878666475891e-05,
"loss": 1.3493,
"step": 47000
},
{
"epoch": 3.4,
"learning_rate": 4.830090141651166e-05,
"loss": 1.3428,
"step": 47500
},
{
"epoch": 3.43,
"learning_rate": 4.828301616826442e-05,
"loss": 1.3309,
"step": 48000
},
{
"epoch": 3.47,
"learning_rate": 4.826513092001717e-05,
"loss": 1.3716,
"step": 48500
},
{
"epoch": 3.51,
"learning_rate": 4.824724567176992e-05,
"loss": 1.326,
"step": 49000
},
{
"epoch": 3.54,
"learning_rate": 4.822936042352268e-05,
"loss": 1.3752,
"step": 49500
},
{
"epoch": 3.58,
"learning_rate": 4.821147517527543e-05,
"loss": 1.3721,
"step": 50000
},
{
"epoch": 3.61,
"learning_rate": 4.819358992702819e-05,
"loss": 1.3383,
"step": 50500
},
{
"epoch": 3.65,
"learning_rate": 4.817570467878094e-05,
"loss": 1.3797,
"step": 51000
},
{
"epoch": 3.68,
"learning_rate": 4.8157819430533695e-05,
"loss": 1.3499,
"step": 51500
},
{
"epoch": 3.72,
"learning_rate": 4.813993418228645e-05,
"loss": 1.3518,
"step": 52000
},
{
"epoch": 3.76,
"learning_rate": 4.8122048934039205e-05,
"loss": 1.3707,
"step": 52500
},
{
"epoch": 3.79,
"learning_rate": 4.810416368579196e-05,
"loss": 1.3867,
"step": 53000
},
{
"epoch": 3.83,
"learning_rate": 4.8086278437544715e-05,
"loss": 1.3884,
"step": 53500
},
{
"epoch": 3.86,
"learning_rate": 4.806839318929747e-05,
"loss": 1.3647,
"step": 54000
},
{
"epoch": 3.9,
"learning_rate": 4.8050507941050226e-05,
"loss": 1.4088,
"step": 54500
},
{
"epoch": 3.93,
"learning_rate": 4.8032622692802984e-05,
"loss": 1.375,
"step": 55000
},
{
"epoch": 3.97,
"learning_rate": 4.8014737444555736e-05,
"loss": 1.3769,
"step": 55500
},
{
"epoch": 4.01,
"learning_rate": 4.799685219630849e-05,
"loss": 1.33,
"step": 56000
},
{
"epoch": 4.04,
"learning_rate": 4.7978966948061246e-05,
"loss": 1.0742,
"step": 56500
},
{
"epoch": 4.08,
"learning_rate": 4.7961081699814e-05,
"loss": 1.1,
"step": 57000
},
{
"epoch": 4.11,
"learning_rate": 4.794319645156675e-05,
"loss": 1.1375,
"step": 57500
},
{
"epoch": 4.15,
"learning_rate": 4.792531120331951e-05,
"loss": 1.1721,
"step": 58000
},
{
"epoch": 4.19,
"learning_rate": 4.790742595507226e-05,
"loss": 1.1278,
"step": 58500
},
{
"epoch": 4.22,
"learning_rate": 4.788954070682501e-05,
"loss": 1.1652,
"step": 59000
},
{
"epoch": 4.26,
"learning_rate": 4.787165545857777e-05,
"loss": 1.177,
"step": 59500
},
{
"epoch": 4.29,
"learning_rate": 4.785377021033052e-05,
"loss": 1.1758,
"step": 60000
},
{
"epoch": 4.33,
"learning_rate": 4.7835884962083274e-05,
"loss": 1.208,
"step": 60500
},
{
"epoch": 4.36,
"learning_rate": 4.781799971383603e-05,
"loss": 1.1889,
"step": 61000
},
{
"epoch": 4.4,
"learning_rate": 4.7800114465588785e-05,
"loss": 1.1945,
"step": 61500
},
{
"epoch": 4.44,
"learning_rate": 4.7782229217341536e-05,
"loss": 1.1915,
"step": 62000
},
{
"epoch": 4.47,
"learning_rate": 4.7764343969094295e-05,
"loss": 1.1849,
"step": 62500
},
{
"epoch": 4.51,
"learning_rate": 4.774645872084705e-05,
"loss": 1.1912,
"step": 63000
},
{
"epoch": 4.54,
"learning_rate": 4.7728573472599805e-05,
"loss": 1.1983,
"step": 63500
},
{
"epoch": 4.58,
"learning_rate": 4.771068822435256e-05,
"loss": 1.2211,
"step": 64000
},
{
"epoch": 4.61,
"learning_rate": 4.769280297610531e-05,
"loss": 1.1834,
"step": 64500
},
{
"epoch": 4.65,
"learning_rate": 4.767491772785807e-05,
"loss": 1.2044,
"step": 65000
},
{
"epoch": 4.69,
"learning_rate": 4.765703247961082e-05,
"loss": 1.1973,
"step": 65500
},
{
"epoch": 4.72,
"learning_rate": 4.763914723136357e-05,
"loss": 1.2021,
"step": 66000
},
{
"epoch": 4.76,
"learning_rate": 4.762126198311633e-05,
"loss": 1.2255,
"step": 66500
},
{
"epoch": 4.79,
"learning_rate": 4.760337673486908e-05,
"loss": 1.2106,
"step": 67000
},
{
"epoch": 4.83,
"learning_rate": 4.758549148662183e-05,
"loss": 1.2529,
"step": 67500
},
{
"epoch": 4.86,
"learning_rate": 4.756760623837459e-05,
"loss": 1.2126,
"step": 68000
},
{
"epoch": 4.9,
"learning_rate": 4.7549720990127344e-05,
"loss": 1.2128,
"step": 68500
},
{
"epoch": 4.94,
"learning_rate": 4.7531835741880095e-05,
"loss": 1.2421,
"step": 69000
},
{
"epoch": 4.97,
"learning_rate": 4.7513950493632854e-05,
"loss": 1.2148,
"step": 69500
},
{
"epoch": 5.01,
"learning_rate": 4.7496065245385606e-05,
"loss": 1.1539,
"step": 70000
},
{
"epoch": 5.04,
"learning_rate": 4.7478179997138364e-05,
"loss": 0.9842,
"step": 70500
},
{
"epoch": 5.08,
"learning_rate": 4.7460294748891116e-05,
"loss": 0.9861,
"step": 71000
},
{
"epoch": 5.12,
"learning_rate": 4.744240950064387e-05,
"loss": 1.0112,
"step": 71500
},
{
"epoch": 5.15,
"learning_rate": 4.7424524252396627e-05,
"loss": 1.0102,
"step": 72000
},
{
"epoch": 5.19,
"learning_rate": 4.740663900414938e-05,
"loss": 0.9797,
"step": 72500
},
{
"epoch": 5.22,
"learning_rate": 4.738875375590213e-05,
"loss": 1.0238,
"step": 73000
},
{
"epoch": 5.26,
"learning_rate": 4.737086850765489e-05,
"loss": 0.9947,
"step": 73500
},
{
"epoch": 5.29,
"learning_rate": 4.735298325940764e-05,
"loss": 1.0288,
"step": 74000
},
{
"epoch": 5.33,
"learning_rate": 4.733509801116039e-05,
"loss": 1.0148,
"step": 74500
},
{
"epoch": 5.37,
"learning_rate": 4.731721276291315e-05,
"loss": 1.0149,
"step": 75000
},
{
"epoch": 5.4,
"learning_rate": 4.72993275146659e-05,
"loss": 1.015,
"step": 75500
},
{
"epoch": 5.44,
"learning_rate": 4.728144226641866e-05,
"loss": 1.0409,
"step": 76000
},
{
"epoch": 5.47,
"learning_rate": 4.726355701817142e-05,
"loss": 1.0343,
"step": 76500
},
{
"epoch": 5.51,
"learning_rate": 4.724567176992417e-05,
"loss": 1.039,
"step": 77000
},
{
"epoch": 5.54,
"learning_rate": 4.7227786521676923e-05,
"loss": 1.0662,
"step": 77500
},
{
"epoch": 5.58,
"learning_rate": 4.720990127342968e-05,
"loss": 1.0426,
"step": 78000
},
{
"epoch": 5.62,
"learning_rate": 4.7192016025182434e-05,
"loss": 1.0727,
"step": 78500
},
{
"epoch": 5.65,
"learning_rate": 4.7174130776935186e-05,
"loss": 1.0579,
"step": 79000
},
{
"epoch": 5.69,
"learning_rate": 4.7156245528687944e-05,
"loss": 1.0527,
"step": 79500
},
{
"epoch": 5.72,
"learning_rate": 4.7138360280440696e-05,
"loss": 1.0907,
"step": 80000
},
{
"epoch": 5.76,
"learning_rate": 4.712047503219345e-05,
"loss": 1.0797,
"step": 80500
},
{
"epoch": 5.79,
"learning_rate": 4.7102589783946206e-05,
"loss": 1.0814,
"step": 81000
},
{
"epoch": 5.83,
"learning_rate": 4.708470453569896e-05,
"loss": 1.0784,
"step": 81500
},
{
"epoch": 5.87,
"learning_rate": 4.706681928745172e-05,
"loss": 1.0639,
"step": 82000
},
{
"epoch": 5.9,
"learning_rate": 4.704893403920447e-05,
"loss": 1.0881,
"step": 82500
},
{
"epoch": 5.94,
"learning_rate": 4.703104879095722e-05,
"loss": 1.0744,
"step": 83000
},
{
"epoch": 5.97,
"learning_rate": 4.701316354270998e-05,
"loss": 1.1065,
"step": 83500
},
{
"epoch": 6.01,
"learning_rate": 4.699527829446273e-05,
"loss": 1.037,
"step": 84000
},
{
"epoch": 6.05,
"learning_rate": 4.697739304621548e-05,
"loss": 0.8384,
"step": 84500
},
{
"epoch": 6.08,
"learning_rate": 4.695950779796824e-05,
"loss": 0.8547,
"step": 85000
},
{
"epoch": 6.12,
"learning_rate": 4.694162254972099e-05,
"loss": 0.869,
"step": 85500
},
{
"epoch": 6.15,
"learning_rate": 4.6923737301473745e-05,
"loss": 0.8673,
"step": 86000
},
{
"epoch": 6.19,
"learning_rate": 4.69058520532265e-05,
"loss": 0.8863,
"step": 86500
},
{
"epoch": 6.22,
"learning_rate": 4.6887966804979255e-05,
"loss": 0.867,
"step": 87000
},
{
"epoch": 6.26,
"learning_rate": 4.687008155673201e-05,
"loss": 0.908,
"step": 87500
},
{
"epoch": 6.3,
"learning_rate": 4.6852196308484765e-05,
"loss": 0.9023,
"step": 88000
},
{
"epoch": 6.33,
"learning_rate": 4.683431106023752e-05,
"loss": 0.9064,
"step": 88500
},
{
"epoch": 6.37,
"learning_rate": 4.681642581199027e-05,
"loss": 0.8924,
"step": 89000
},
{
"epoch": 6.4,
"learning_rate": 4.679854056374303e-05,
"loss": 0.8938,
"step": 89500
},
{
"epoch": 6.44,
"learning_rate": 4.678065531549578e-05,
"loss": 0.9082,
"step": 90000
},
{
"epoch": 6.47,
"learning_rate": 4.676277006724854e-05,
"loss": 0.9036,
"step": 90500
},
{
"epoch": 6.51,
"learning_rate": 4.674488481900129e-05,
"loss": 0.9085,
"step": 91000
},
{
"epoch": 6.55,
"learning_rate": 4.672699957075404e-05,
"loss": 0.9263,
"step": 91500
},
{
"epoch": 6.58,
"learning_rate": 4.67091143225068e-05,
"loss": 0.9085,
"step": 92000
},
{
"epoch": 6.62,
"learning_rate": 4.669122907425955e-05,
"loss": 0.9289,
"step": 92500
},
{
"epoch": 6.65,
"learning_rate": 4.6673343826012304e-05,
"loss": 0.9407,
"step": 93000
},
{
"epoch": 6.69,
"learning_rate": 4.665545857776506e-05,
"loss": 0.9326,
"step": 93500
},
{
"epoch": 6.72,
"learning_rate": 4.6637573329517814e-05,
"loss": 0.9551,
"step": 94000
},
{
"epoch": 6.76,
"learning_rate": 4.6619688081270566e-05,
"loss": 0.9407,
"step": 94500
},
{
"epoch": 6.8,
"learning_rate": 4.6601802833023324e-05,
"loss": 0.9694,
"step": 95000
},
{
"epoch": 6.83,
"learning_rate": 4.6583917584776076e-05,
"loss": 0.9743,
"step": 95500
},
{
"epoch": 6.87,
"learning_rate": 4.656603233652883e-05,
"loss": 0.9643,
"step": 96000
},
{
"epoch": 6.9,
"learning_rate": 4.6548147088281586e-05,
"loss": 0.9497,
"step": 96500
},
{
"epoch": 6.94,
"learning_rate": 4.653026184003434e-05,
"loss": 0.9532,
"step": 97000
},
{
"epoch": 6.98,
"learning_rate": 4.65123765917871e-05,
"loss": 0.9539,
"step": 97500
},
{
"epoch": 7.01,
"learning_rate": 4.6494491343539855e-05,
"loss": 0.8958,
"step": 98000
},
{
"epoch": 7.05,
"learning_rate": 4.647660609529261e-05,
"loss": 0.709,
"step": 98500
},
{
"epoch": 7.08,
"learning_rate": 4.645872084704536e-05,
"loss": 0.727,
"step": 99000
},
{
"epoch": 7.12,
"learning_rate": 4.644083559879812e-05,
"loss": 0.7395,
"step": 99500
},
{
"epoch": 7.15,
"learning_rate": 4.642295035055087e-05,
"loss": 0.756,
"step": 100000
},
{
"epoch": 7.19,
"learning_rate": 4.640506510230362e-05,
"loss": 0.7668,
"step": 100500
},
{
"epoch": 7.23,
"learning_rate": 4.638717985405638e-05,
"loss": 0.7806,
"step": 101000
},
{
"epoch": 7.26,
"learning_rate": 4.636929460580913e-05,
"loss": 0.7723,
"step": 101500
},
{
"epoch": 7.3,
"learning_rate": 4.635140935756189e-05,
"loss": 0.776,
"step": 102000
},
{
"epoch": 7.33,
"learning_rate": 4.633352410931464e-05,
"loss": 0.7923,
"step": 102500
},
{
"epoch": 7.37,
"learning_rate": 4.6315638861067394e-05,
"loss": 0.7955,
"step": 103000
},
{
"epoch": 7.4,
"learning_rate": 4.629775361282015e-05,
"loss": 0.8003,
"step": 103500
},
{
"epoch": 7.44,
"learning_rate": 4.6279868364572904e-05,
"loss": 0.8045,
"step": 104000
},
{
"epoch": 7.48,
"learning_rate": 4.6261983116325656e-05,
"loss": 0.8138,
"step": 104500
},
{
"epoch": 7.51,
"learning_rate": 4.6244097868078414e-05,
"loss": 0.8075,
"step": 105000
},
{
"epoch": 7.55,
"learning_rate": 4.6226212619831166e-05,
"loss": 0.8381,
"step": 105500
},
{
"epoch": 7.58,
"learning_rate": 4.620832737158392e-05,
"loss": 0.8036,
"step": 106000
},
{
"epoch": 7.62,
"learning_rate": 4.6190442123336676e-05,
"loss": 0.8151,
"step": 106500
},
{
"epoch": 7.65,
"learning_rate": 4.617255687508943e-05,
"loss": 0.8434,
"step": 107000
},
{
"epoch": 7.69,
"learning_rate": 4.615467162684218e-05,
"loss": 0.8278,
"step": 107500
},
{
"epoch": 7.73,
"learning_rate": 4.613678637859494e-05,
"loss": 0.8461,
"step": 108000
},
{
"epoch": 7.76,
"learning_rate": 4.611890113034769e-05,
"loss": 0.8149,
"step": 108500
},
{
"epoch": 7.8,
"learning_rate": 4.610101588210045e-05,
"loss": 0.8384,
"step": 109000
},
{
"epoch": 7.83,
"learning_rate": 4.60831306338532e-05,
"loss": 0.8548,
"step": 109500
},
{
"epoch": 7.87,
"learning_rate": 4.606524538560595e-05,
"loss": 0.8473,
"step": 110000
},
{
"epoch": 7.91,
"learning_rate": 4.604736013735871e-05,
"loss": 0.8604,
"step": 110500
},
{
"epoch": 7.94,
"learning_rate": 4.602947488911146e-05,
"loss": 0.8566,
"step": 111000
},
{
"epoch": 7.98,
"learning_rate": 4.6011589640864215e-05,
"loss": 0.8816,
"step": 111500
},
{
"epoch": 8.01,
"learning_rate": 4.599370439261697e-05,
"loss": 0.7863,
"step": 112000
},
{
"epoch": 8.05,
"learning_rate": 4.5975819144369725e-05,
"loss": 0.6376,
"step": 112500
},
{
"epoch": 8.08,
"learning_rate": 4.595793389612248e-05,
"loss": 0.6403,
"step": 113000
},
{
"epoch": 8.12,
"learning_rate": 4.5940048647875235e-05,
"loss": 0.6446,
"step": 113500
},
{
"epoch": 8.16,
"learning_rate": 4.592216339962799e-05,
"loss": 0.6589,
"step": 114000
},
{
"epoch": 8.19,
"learning_rate": 4.590427815138074e-05,
"loss": 0.6604,
"step": 114500
},
{
"epoch": 8.23,
"learning_rate": 4.58863929031335e-05,
"loss": 0.6791,
"step": 115000
},
{
"epoch": 8.26,
"learning_rate": 4.586850765488625e-05,
"loss": 0.6944,
"step": 115500
},
{
"epoch": 8.3,
"learning_rate": 4.5850622406639e-05,
"loss": 0.682,
"step": 116000
},
{
"epoch": 8.33,
"learning_rate": 4.583273715839176e-05,
"loss": 0.6984,
"step": 116500
},
{
"epoch": 8.37,
"learning_rate": 4.581485191014451e-05,
"loss": 0.6765,
"step": 117000
},
{
"epoch": 8.41,
"learning_rate": 4.579696666189727e-05,
"loss": 0.7103,
"step": 117500
},
{
"epoch": 8.44,
"learning_rate": 4.577908141365002e-05,
"loss": 0.7166,
"step": 118000
},
{
"epoch": 8.48,
"learning_rate": 4.5761196165402774e-05,
"loss": 0.7078,
"step": 118500
},
{
"epoch": 8.51,
"learning_rate": 4.574331091715553e-05,
"loss": 0.7309,
"step": 119000
},
{
"epoch": 8.55,
"learning_rate": 4.572542566890829e-05,
"loss": 0.7273,
"step": 119500
},
{
"epoch": 8.58,
"learning_rate": 4.570754042066104e-05,
"loss": 0.73,
"step": 120000
},
{
"epoch": 8.62,
"learning_rate": 4.5689655172413794e-05,
"loss": 0.7467,
"step": 120500
},
{
"epoch": 8.66,
"learning_rate": 4.567176992416655e-05,
"loss": 0.7282,
"step": 121000
},
{
"epoch": 8.69,
"learning_rate": 4.5653884675919305e-05,
"loss": 0.7432,
"step": 121500
},
{
"epoch": 8.73,
"learning_rate": 4.5635999427672063e-05,
"loss": 0.7357,
"step": 122000
},
{
"epoch": 8.76,
"learning_rate": 4.5618114179424815e-05,
"loss": 0.7583,
"step": 122500
},
{
"epoch": 8.8,
"learning_rate": 4.560022893117757e-05,
"loss": 0.7458,
"step": 123000
},
{
"epoch": 8.84,
"learning_rate": 4.5582343682930326e-05,
"loss": 0.7393,
"step": 123500
},
{
"epoch": 8.87,
"learning_rate": 4.556445843468308e-05,
"loss": 0.7569,
"step": 124000
},
{
"epoch": 8.91,
"learning_rate": 4.554657318643583e-05,
"loss": 0.7656,
"step": 124500
},
{
"epoch": 8.94,
"learning_rate": 4.552868793818859e-05,
"loss": 0.7781,
"step": 125000
},
{
"epoch": 8.98,
"learning_rate": 4.551080268994134e-05,
"loss": 0.7802,
"step": 125500
},
{
"epoch": 9.01,
"learning_rate": 4.549291744169409e-05,
"loss": 0.6816,
"step": 126000
},
{
"epoch": 9.05,
"learning_rate": 4.547503219344685e-05,
"loss": 0.5608,
"step": 126500
},
{
"epoch": 9.09,
"learning_rate": 4.54571469451996e-05,
"loss": 0.5721,
"step": 127000
},
{
"epoch": 9.12,
"learning_rate": 4.5439261696952353e-05,
"loss": 0.5815,
"step": 127500
},
{
"epoch": 9.16,
"learning_rate": 4.542137644870511e-05,
"loss": 0.5897,
"step": 128000
},
{
"epoch": 9.19,
"learning_rate": 4.5403491200457864e-05,
"loss": 0.5972,
"step": 128500
},
{
"epoch": 9.23,
"learning_rate": 4.538560595221062e-05,
"loss": 0.5935,
"step": 129000
},
{
"epoch": 9.26,
"learning_rate": 4.5367720703963374e-05,
"loss": 0.6069,
"step": 129500
},
{
"epoch": 9.3,
"learning_rate": 4.5349835455716126e-05,
"loss": 0.6047,
"step": 130000
},
{
"epoch": 9.34,
"learning_rate": 4.5331950207468885e-05,
"loss": 0.6132,
"step": 130500
},
{
"epoch": 9.37,
"learning_rate": 4.5314064959221636e-05,
"loss": 0.6186,
"step": 131000
},
{
"epoch": 9.41,
"learning_rate": 4.529617971097439e-05,
"loss": 0.6303,
"step": 131500
},
{
"epoch": 9.44,
"learning_rate": 4.527829446272715e-05,
"loss": 0.6312,
"step": 132000
},
{
"epoch": 9.48,
"learning_rate": 4.52604092144799e-05,
"loss": 0.6482,
"step": 132500
},
{
"epoch": 9.51,
"learning_rate": 4.524252396623265e-05,
"loss": 0.6371,
"step": 133000
},
{
"epoch": 9.55,
"learning_rate": 4.522463871798541e-05,
"loss": 0.6346,
"step": 133500
},
{
"epoch": 9.59,
"learning_rate": 4.520675346973816e-05,
"loss": 0.6522,
"step": 134000
},
{
"epoch": 9.62,
"learning_rate": 4.518886822149091e-05,
"loss": 0.6564,
"step": 134500
},
{
"epoch": 9.66,
"learning_rate": 4.517098297324367e-05,
"loss": 0.668,
"step": 135000
},
{
"epoch": 9.69,
"learning_rate": 4.515309772499642e-05,
"loss": 0.6653,
"step": 135500
},
{
"epoch": 9.73,
"learning_rate": 4.5135212476749175e-05,
"loss": 0.6668,
"step": 136000
},
{
"epoch": 9.77,
"learning_rate": 4.511732722850193e-05,
"loss": 0.6692,
"step": 136500
},
{
"epoch": 9.8,
"learning_rate": 4.5099441980254685e-05,
"loss": 0.678,
"step": 137000
},
{
"epoch": 9.84,
"learning_rate": 4.5081556732007444e-05,
"loss": 0.647,
"step": 137500
},
{
"epoch": 9.87,
"learning_rate": 4.5063671483760195e-05,
"loss": 0.6806,
"step": 138000
},
{
"epoch": 9.91,
"learning_rate": 4.504578623551295e-05,
"loss": 0.6787,
"step": 138500
},
{
"epoch": 9.94,
"learning_rate": 4.5027900987265706e-05,
"loss": 0.6978,
"step": 139000
},
{
"epoch": 9.98,
"learning_rate": 4.501001573901846e-05,
"loss": 0.6912,
"step": 139500
},
{
"epoch": 10.02,
"learning_rate": 4.499213049077121e-05,
"loss": 0.6072,
"step": 140000
},
{
"epoch": 10.05,
"learning_rate": 4.497424524252397e-05,
"loss": 0.5022,
"step": 140500
},
{
"epoch": 10.09,
"learning_rate": 4.495635999427672e-05,
"loss": 0.5045,
"step": 141000
},
{
"epoch": 10.12,
"learning_rate": 4.493847474602948e-05,
"loss": 0.5298,
"step": 141500
},
{
"epoch": 10.16,
"learning_rate": 4.492058949778224e-05,
"loss": 0.5233,
"step": 142000
},
{
"epoch": 10.19,
"learning_rate": 4.490270424953499e-05,
"loss": 0.5314,
"step": 142500
},
{
"epoch": 10.23,
"learning_rate": 4.488481900128774e-05,
"loss": 0.5469,
"step": 143000
},
{
"epoch": 10.27,
"learning_rate": 4.48669337530405e-05,
"loss": 0.5494,
"step": 143500
},
{
"epoch": 10.3,
"learning_rate": 4.484904850479325e-05,
"loss": 0.5448,
"step": 144000
},
{
"epoch": 10.34,
"learning_rate": 4.4831163256546e-05,
"loss": 0.5507,
"step": 144500
},
{
"epoch": 10.37,
"learning_rate": 4.481327800829876e-05,
"loss": 0.5609,
"step": 145000
},
{
"epoch": 10.41,
"learning_rate": 4.479539276005151e-05,
"loss": 0.5691,
"step": 145500
},
{
"epoch": 10.44,
"learning_rate": 4.4777507511804265e-05,
"loss": 0.5656,
"step": 146000
},
{
"epoch": 10.48,
"learning_rate": 4.475962226355702e-05,
"loss": 0.5652,
"step": 146500
},
{
"epoch": 10.52,
"learning_rate": 4.4741737015309775e-05,
"loss": 0.5794,
"step": 147000
},
{
"epoch": 10.55,
"learning_rate": 4.472385176706253e-05,
"loss": 0.5787,
"step": 147500
},
{
"epoch": 10.59,
"learning_rate": 4.4705966518815285e-05,
"loss": 0.5883,
"step": 148000
},
{
"epoch": 10.62,
"learning_rate": 4.468808127056804e-05,
"loss": 0.5821,
"step": 148500
},
{
"epoch": 10.66,
"learning_rate": 4.4670196022320796e-05,
"loss": 0.5918,
"step": 149000
},
{
"epoch": 10.7,
"learning_rate": 4.465231077407355e-05,
"loss": 0.6038,
"step": 149500
},
{
"epoch": 10.73,
"learning_rate": 4.46344255258263e-05,
"loss": 0.6114,
"step": 150000
},
{
"epoch": 10.77,
"learning_rate": 4.461654027757906e-05,
"loss": 0.6023,
"step": 150500
},
{
"epoch": 10.8,
"learning_rate": 4.459865502933181e-05,
"loss": 0.6016,
"step": 151000
},
{
"epoch": 10.84,
"learning_rate": 4.458076978108456e-05,
"loss": 0.6057,
"step": 151500
},
{
"epoch": 10.87,
"learning_rate": 4.456288453283732e-05,
"loss": 0.6218,
"step": 152000
},
{
"epoch": 10.91,
"learning_rate": 4.454499928459007e-05,
"loss": 0.6226,
"step": 152500
},
{
"epoch": 10.95,
"learning_rate": 4.4527114036342824e-05,
"loss": 0.6037,
"step": 153000
},
{
"epoch": 10.98,
"learning_rate": 4.450922878809558e-05,
"loss": 0.6197,
"step": 153500
},
{
"epoch": 11.02,
"learning_rate": 4.4491343539848334e-05,
"loss": 0.539,
"step": 154000
},
{
"epoch": 11.05,
"learning_rate": 4.4473458291601086e-05,
"loss": 0.4517,
"step": 154500
},
{
"epoch": 11.09,
"learning_rate": 4.4455573043353844e-05,
"loss": 0.4682,
"step": 155000
},
{
"epoch": 11.12,
"learning_rate": 4.4437687795106596e-05,
"loss": 0.4675,
"step": 155500
},
{
"epoch": 11.16,
"learning_rate": 4.4419802546859355e-05,
"loss": 0.4814,
"step": 156000
},
{
"epoch": 11.2,
"learning_rate": 4.4401917298612107e-05,
"loss": 0.4774,
"step": 156500
},
{
"epoch": 11.23,
"learning_rate": 4.438403205036486e-05,
"loss": 0.4855,
"step": 157000
},
{
"epoch": 11.27,
"learning_rate": 4.436614680211762e-05,
"loss": 0.494,
"step": 157500
},
{
"epoch": 11.3,
"learning_rate": 4.434826155387037e-05,
"loss": 0.5003,
"step": 158000
},
{
"epoch": 11.34,
"learning_rate": 4.433037630562312e-05,
"loss": 0.5193,
"step": 158500
},
{
"epoch": 11.38,
"learning_rate": 4.431249105737588e-05,
"loss": 0.5116,
"step": 159000
},
{
"epoch": 11.41,
"learning_rate": 4.429460580912863e-05,
"loss": 0.5133,
"step": 159500
},
{
"epoch": 11.45,
"learning_rate": 4.427672056088138e-05,
"loss": 0.531,
"step": 160000
},
{
"epoch": 11.48,
"learning_rate": 4.425883531263414e-05,
"loss": 0.5267,
"step": 160500
},
{
"epoch": 11.52,
"learning_rate": 4.424095006438689e-05,
"loss": 0.5286,
"step": 161000
},
{
"epoch": 11.55,
"learning_rate": 4.4223064816139645e-05,
"loss": 0.5306,
"step": 161500
},
{
"epoch": 11.59,
"learning_rate": 4.4205179567892403e-05,
"loss": 0.5261,
"step": 162000
},
{
"epoch": 11.63,
"learning_rate": 4.4187294319645155e-05,
"loss": 0.5332,
"step": 162500
},
{
"epoch": 11.66,
"learning_rate": 4.4169409071397914e-05,
"loss": 0.5393,
"step": 163000
},
{
"epoch": 11.7,
"learning_rate": 4.415152382315067e-05,
"loss": 0.5249,
"step": 163500
},
{
"epoch": 11.73,
"learning_rate": 4.4133638574903424e-05,
"loss": 0.5566,
"step": 164000
},
{
"epoch": 11.77,
"learning_rate": 4.4115753326656176e-05,
"loss": 0.5509,
"step": 164500
},
{
"epoch": 11.8,
"learning_rate": 4.4097868078408934e-05,
"loss": 0.5398,
"step": 165000
},
{
"epoch": 11.84,
"learning_rate": 4.4079982830161686e-05,
"loss": 0.5641,
"step": 165500
},
{
"epoch": 11.88,
"learning_rate": 4.406209758191444e-05,
"loss": 0.5623,
"step": 166000
},
{
"epoch": 11.91,
"learning_rate": 4.40442123336672e-05,
"loss": 0.5612,
"step": 166500
},
{
"epoch": 11.95,
"learning_rate": 4.402632708541995e-05,
"loss": 0.576,
"step": 167000
},
{
"epoch": 11.98,
"learning_rate": 4.400844183717271e-05,
"loss": 0.5653,
"step": 167500
},
{
"epoch": 12.02,
"learning_rate": 4.399055658892546e-05,
"loss": 0.4821,
"step": 168000
},
{
"epoch": 12.05,
"learning_rate": 4.397267134067821e-05,
"loss": 0.416,
"step": 168500
},
{
"epoch": 12.09,
"learning_rate": 4.395478609243097e-05,
"loss": 0.4239,
"step": 169000
},
{
"epoch": 12.13,
"learning_rate": 4.393690084418372e-05,
"loss": 0.4336,
"step": 169500
},
{
"epoch": 12.16,
"learning_rate": 4.391901559593647e-05,
"loss": 0.4388,
"step": 170000
},
{
"epoch": 12.2,
"learning_rate": 4.390113034768923e-05,
"loss": 0.453,
"step": 170500
},
{
"epoch": 12.23,
"learning_rate": 4.388324509944198e-05,
"loss": 0.4524,
"step": 171000
},
{
"epoch": 12.27,
"learning_rate": 4.3865359851194735e-05,
"loss": 0.4563,
"step": 171500
},
{
"epoch": 12.31,
"learning_rate": 4.3847474602947493e-05,
"loss": 0.4651,
"step": 172000
},
{
"epoch": 12.34,
"learning_rate": 4.3829589354700245e-05,
"loss": 0.4644,
"step": 172500
},
{
"epoch": 12.38,
"learning_rate": 4.3811704106453e-05,
"loss": 0.4749,
"step": 173000
},
{
"epoch": 12.41,
"learning_rate": 4.3793818858205756e-05,
"loss": 0.4855,
"step": 173500
},
{
"epoch": 12.45,
"learning_rate": 4.377593360995851e-05,
"loss": 0.4845,
"step": 174000
},
{
"epoch": 12.48,
"learning_rate": 4.375804836171126e-05,
"loss": 0.4799,
"step": 174500
},
{
"epoch": 12.52,
"learning_rate": 4.374016311346402e-05,
"loss": 0.4844,
"step": 175000
},
{
"epoch": 12.56,
"learning_rate": 4.372227786521677e-05,
"loss": 0.4894,
"step": 175500
},
{
"epoch": 12.59,
"learning_rate": 4.370439261696953e-05,
"loss": 0.4855,
"step": 176000
},
{
"epoch": 12.63,
"learning_rate": 4.368650736872228e-05,
"loss": 0.4963,
"step": 176500
},
{
"epoch": 12.66,
"learning_rate": 4.366862212047503e-05,
"loss": 0.5021,
"step": 177000
},
{
"epoch": 12.7,
"learning_rate": 4.365073687222779e-05,
"loss": 0.5055,
"step": 177500
},
{
"epoch": 12.73,
"learning_rate": 4.363285162398054e-05,
"loss": 0.5034,
"step": 178000
},
{
"epoch": 12.77,
"learning_rate": 4.3614966375733294e-05,
"loss": 0.508,
"step": 178500
},
{
"epoch": 12.81,
"learning_rate": 4.359708112748605e-05,
"loss": 0.5115,
"step": 179000
},
{
"epoch": 12.84,
"learning_rate": 4.3579195879238804e-05,
"loss": 0.5159,
"step": 179500
},
{
"epoch": 12.88,
"learning_rate": 4.3561310630991556e-05,
"loss": 0.5193,
"step": 180000
},
{
"epoch": 12.91,
"learning_rate": 4.3543425382744315e-05,
"loss": 0.5171,
"step": 180500
},
{
"epoch": 12.95,
"learning_rate": 4.3525540134497066e-05,
"loss": 0.5272,
"step": 181000
},
{
"epoch": 12.98,
"learning_rate": 4.350765488624982e-05,
"loss": 0.5217,
"step": 181500
},
{
"epoch": 13.02,
"learning_rate": 4.348976963800258e-05,
"loss": 0.4477,
"step": 182000
},
{
"epoch": 13.06,
"learning_rate": 4.347188438975533e-05,
"loss": 0.3926,
"step": 182500
},
{
"epoch": 13.09,
"learning_rate": 4.345399914150809e-05,
"loss": 0.4036,
"step": 183000
},
{
"epoch": 13.13,
"learning_rate": 4.343611389326084e-05,
"loss": 0.4139,
"step": 183500
},
{
"epoch": 13.16,
"learning_rate": 4.341822864501359e-05,
"loss": 0.4115,
"step": 184000
},
{
"epoch": 13.2,
"learning_rate": 4.340034339676635e-05,
"loss": 0.4216,
"step": 184500
},
{
"epoch": 13.24,
"learning_rate": 4.338245814851911e-05,
"loss": 0.4288,
"step": 185000
},
{
"epoch": 13.27,
"learning_rate": 4.336457290027186e-05,
"loss": 0.4333,
"step": 185500
},
{
"epoch": 13.31,
"learning_rate": 4.334668765202461e-05,
"loss": 0.4295,
"step": 186000
},
{
"epoch": 13.34,
"learning_rate": 4.332880240377737e-05,
"loss": 0.4312,
"step": 186500
},
{
"epoch": 13.38,
"learning_rate": 4.331091715553012e-05,
"loss": 0.4358,
"step": 187000
},
{
"epoch": 13.41,
"learning_rate": 4.329303190728288e-05,
"loss": 0.4436,
"step": 187500
},
{
"epoch": 13.45,
"learning_rate": 4.327514665903563e-05,
"loss": 0.4472,
"step": 188000
},
{
"epoch": 13.49,
"learning_rate": 4.3257261410788384e-05,
"loss": 0.4484,
"step": 188500
},
{
"epoch": 13.52,
"learning_rate": 4.323937616254114e-05,
"loss": 0.455,
"step": 189000
},
{
"epoch": 13.56,
"learning_rate": 4.3221490914293894e-05,
"loss": 0.4617,
"step": 189500
},
{
"epoch": 13.59,
"learning_rate": 4.3203605666046646e-05,
"loss": 0.4521,
"step": 190000
},
{
"epoch": 13.63,
"learning_rate": 4.3185720417799405e-05,
"loss": 0.4618,
"step": 190500
},
{
"epoch": 13.66,
"learning_rate": 4.3167835169552156e-05,
"loss": 0.4668,
"step": 191000
},
{
"epoch": 13.7,
"learning_rate": 4.314994992130491e-05,
"loss": 0.4688,
"step": 191500
},
{
"epoch": 13.74,
"learning_rate": 4.313206467305767e-05,
"loss": 0.4778,
"step": 192000
},
{
"epoch": 13.77,
"learning_rate": 4.311417942481042e-05,
"loss": 0.4733,
"step": 192500
},
{
"epoch": 13.81,
"learning_rate": 4.309629417656317e-05,
"loss": 0.4715,
"step": 193000
},
{
"epoch": 13.84,
"learning_rate": 4.307840892831593e-05,
"loss": 0.477,
"step": 193500
},
{
"epoch": 13.88,
"learning_rate": 4.306052368006868e-05,
"loss": 0.4782,
"step": 194000
},
{
"epoch": 13.91,
"learning_rate": 4.304263843182143e-05,
"loss": 0.4848,
"step": 194500
},
{
"epoch": 13.95,
"learning_rate": 4.302475318357419e-05,
"loss": 0.4871,
"step": 195000
},
{
"epoch": 13.99,
"learning_rate": 4.300686793532694e-05,
"loss": 0.4858,
"step": 195500
},
{
"epoch": 14.02,
"learning_rate": 4.29889826870797e-05,
"loss": 0.4106,
"step": 196000
},
{
"epoch": 14.06,
"learning_rate": 4.297109743883245e-05,
"loss": 0.376,
"step": 196500
},
{
"epoch": 14.09,
"learning_rate": 4.2953212190585205e-05,
"loss": 0.3836,
"step": 197000
},
{
"epoch": 14.13,
"learning_rate": 4.2935326942337964e-05,
"loss": 0.3844,
"step": 197500
},
{
"epoch": 14.17,
"learning_rate": 4.2917441694090715e-05,
"loss": 0.3944,
"step": 198000
},
{
"epoch": 14.2,
"learning_rate": 4.289955644584347e-05,
"loss": 0.3978,
"step": 198500
},
{
"epoch": 14.24,
"learning_rate": 4.2881671197596226e-05,
"loss": 0.4,
"step": 199000
},
{
"epoch": 14.27,
"learning_rate": 4.286378594934898e-05,
"loss": 0.4034,
"step": 199500
},
{
"epoch": 14.31,
"learning_rate": 4.284590070110173e-05,
"loss": 0.4116,
"step": 200000
},
{
"epoch": 14.34,
"learning_rate": 4.282801545285449e-05,
"loss": 0.4154,
"step": 200500
},
{
"epoch": 14.38,
"learning_rate": 4.281013020460724e-05,
"loss": 0.4204,
"step": 201000
},
{
"epoch": 14.42,
"learning_rate": 4.279224495635999e-05,
"loss": 0.4219,
"step": 201500
},
{
"epoch": 14.45,
"learning_rate": 4.277435970811275e-05,
"loss": 0.4204,
"step": 202000
},
{
"epoch": 14.49,
"learning_rate": 4.27564744598655e-05,
"loss": 0.4196,
"step": 202500
},
{
"epoch": 14.52,
"learning_rate": 4.273858921161826e-05,
"loss": 0.4194,
"step": 203000
},
{
"epoch": 14.56,
"learning_rate": 4.272070396337101e-05,
"loss": 0.4289,
"step": 203500
},
{
"epoch": 14.59,
"learning_rate": 4.2702818715123764e-05,
"loss": 0.4321,
"step": 204000
},
{
"epoch": 14.63,
"learning_rate": 4.268493346687652e-05,
"loss": 0.4371,
"step": 204500
},
{
"epoch": 14.67,
"learning_rate": 4.2667048218629274e-05,
"loss": 0.4447,
"step": 205000
},
{
"epoch": 14.7,
"learning_rate": 4.2649162970382026e-05,
"loss": 0.4428,
"step": 205500
},
{
"epoch": 14.74,
"learning_rate": 4.2631277722134785e-05,
"loss": 0.4365,
"step": 206000
},
{
"epoch": 14.77,
"learning_rate": 4.2613392473887543e-05,
"loss": 0.4416,
"step": 206500
},
{
"epoch": 14.81,
"learning_rate": 4.2595507225640295e-05,
"loss": 0.4451,
"step": 207000
},
{
"epoch": 14.84,
"learning_rate": 4.2577621977393054e-05,
"loss": 0.4488,
"step": 207500
},
{
"epoch": 14.88,
"learning_rate": 4.2559736729145806e-05,
"loss": 0.4489,
"step": 208000
},
{
"epoch": 14.92,
"learning_rate": 4.254185148089856e-05,
"loss": 0.4529,
"step": 208500
},
{
"epoch": 14.95,
"learning_rate": 4.2523966232651316e-05,
"loss": 0.45,
"step": 209000
},
{
"epoch": 14.99,
"learning_rate": 4.250608098440407e-05,
"loss": 0.4585,
"step": 209500
},
{
"epoch": 15.02,
"learning_rate": 4.248819573615682e-05,
"loss": 0.387,
"step": 210000
},
{
"epoch": 15.06,
"learning_rate": 4.247031048790958e-05,
"loss": 0.3537,
"step": 210500
},
{
"epoch": 15.1,
"learning_rate": 4.245242523966233e-05,
"loss": 0.362,
"step": 211000
},
{
"epoch": 15.13,
"learning_rate": 4.243453999141508e-05,
"loss": 0.3754,
"step": 211500
},
{
"epoch": 15.17,
"learning_rate": 4.241665474316784e-05,
"loss": 0.3778,
"step": 212000
},
{
"epoch": 15.2,
"learning_rate": 4.239876949492059e-05,
"loss": 0.374,
"step": 212500
},
{
"epoch": 15.24,
"learning_rate": 4.2380884246673344e-05,
"loss": 0.3781,
"step": 213000
},
{
"epoch": 15.27,
"learning_rate": 4.23629989984261e-05,
"loss": 0.391,
"step": 213500
},
{
"epoch": 15.31,
"learning_rate": 4.2345113750178854e-05,
"loss": 0.3932,
"step": 214000
},
{
"epoch": 15.35,
"learning_rate": 4.232722850193161e-05,
"loss": 0.3911,
"step": 214500
},
{
"epoch": 15.38,
"learning_rate": 4.2309343253684365e-05,
"loss": 0.3946,
"step": 215000
},
{
"epoch": 15.42,
"learning_rate": 4.2291458005437116e-05,
"loss": 0.4036,
"step": 215500
},
{
"epoch": 15.45,
"learning_rate": 4.2273572757189875e-05,
"loss": 0.4014,
"step": 216000
},
{
"epoch": 15.49,
"learning_rate": 4.225568750894263e-05,
"loss": 0.3999,
"step": 216500
},
{
"epoch": 15.52,
"learning_rate": 4.223780226069538e-05,
"loss": 0.4079,
"step": 217000
},
{
"epoch": 15.56,
"learning_rate": 4.221991701244814e-05,
"loss": 0.4132,
"step": 217500
},
{
"epoch": 15.6,
"learning_rate": 4.220203176420089e-05,
"loss": 0.4144,
"step": 218000
},
{
"epoch": 15.63,
"learning_rate": 4.218414651595364e-05,
"loss": 0.4104,
"step": 218500
},
{
"epoch": 15.67,
"learning_rate": 4.21662612677064e-05,
"loss": 0.4113,
"step": 219000
},
{
"epoch": 15.7,
"learning_rate": 4.214837601945915e-05,
"loss": 0.4185,
"step": 219500
},
{
"epoch": 15.74,
"learning_rate": 4.21304907712119e-05,
"loss": 0.4216,
"step": 220000
},
{
"epoch": 15.77,
"learning_rate": 4.211260552296466e-05,
"loss": 0.419,
"step": 220500
},
{
"epoch": 15.81,
"learning_rate": 4.209472027471741e-05,
"loss": 0.4235,
"step": 221000
},
{
"epoch": 15.85,
"learning_rate": 4.2076835026470165e-05,
"loss": 0.4206,
"step": 221500
},
{
"epoch": 15.88,
"learning_rate": 4.2058949778222924e-05,
"loss": 0.4276,
"step": 222000
},
{
"epoch": 15.92,
"learning_rate": 4.2041064529975675e-05,
"loss": 0.4292,
"step": 222500
},
{
"epoch": 15.95,
"learning_rate": 4.2023179281728434e-05,
"loss": 0.4214,
"step": 223000
},
{
"epoch": 15.99,
"learning_rate": 4.2005294033481186e-05,
"loss": 0.4331,
"step": 223500
},
{
"epoch": 16.03,
"learning_rate": 4.198740878523394e-05,
"loss": 0.3667,
"step": 224000
},
{
"epoch": 16.06,
"learning_rate": 4.1969523536986696e-05,
"loss": 0.3454,
"step": 224500
},
{
"epoch": 16.1,
"learning_rate": 4.195163828873945e-05,
"loss": 0.3508,
"step": 225000
},
{
"epoch": 16.13,
"learning_rate": 4.19337530404922e-05,
"loss": 0.3603,
"step": 225500
},
{
"epoch": 16.17,
"learning_rate": 4.191586779224496e-05,
"loss": 0.3652,
"step": 226000
},
{
"epoch": 16.2,
"learning_rate": 4.189798254399771e-05,
"loss": 0.3647,
"step": 226500
},
{
"epoch": 16.24,
"learning_rate": 4.188009729575046e-05,
"loss": 0.3712,
"step": 227000
},
{
"epoch": 16.28,
"learning_rate": 4.186221204750322e-05,
"loss": 0.3706,
"step": 227500
},
{
"epoch": 16.31,
"learning_rate": 4.184432679925598e-05,
"loss": 0.371,
"step": 228000
},
{
"epoch": 16.35,
"learning_rate": 4.182644155100873e-05,
"loss": 0.3764,
"step": 228500
},
{
"epoch": 16.38,
"learning_rate": 4.180855630276149e-05,
"loss": 0.3832,
"step": 229000
},
{
"epoch": 16.42,
"learning_rate": 4.179067105451424e-05,
"loss": 0.3803,
"step": 229500
},
{
"epoch": 16.45,
"learning_rate": 4.177278580626699e-05,
"loss": 0.3902,
"step": 230000
},
{
"epoch": 16.49,
"learning_rate": 4.175490055801975e-05,
"loss": 0.3894,
"step": 230500
},
{
"epoch": 16.53,
"learning_rate": 4.17370153097725e-05,
"loss": 0.3893,
"step": 231000
},
{
"epoch": 16.56,
"learning_rate": 4.1719130061525255e-05,
"loss": 0.3876,
"step": 231500
},
{
"epoch": 16.6,
"learning_rate": 4.1701244813278014e-05,
"loss": 0.4002,
"step": 232000
},
{
"epoch": 16.63,
"learning_rate": 4.1683359565030765e-05,
"loss": 0.397,
"step": 232500
},
{
"epoch": 16.67,
"learning_rate": 4.166547431678352e-05,
"loss": 0.3983,
"step": 233000
},
{
"epoch": 16.7,
"learning_rate": 4.1647589068536276e-05,
"loss": 0.3978,
"step": 233500
},
{
"epoch": 16.74,
"learning_rate": 4.162970382028903e-05,
"loss": 0.4041,
"step": 234000
},
{
"epoch": 16.78,
"learning_rate": 4.1611818572041786e-05,
"loss": 0.3996,
"step": 234500
},
{
"epoch": 16.81,
"learning_rate": 4.159393332379454e-05,
"loss": 0.4062,
"step": 235000
},
{
"epoch": 16.85,
"learning_rate": 4.157604807554729e-05,
"loss": 0.4031,
"step": 235500
},
{
"epoch": 16.88,
"learning_rate": 4.155816282730005e-05,
"loss": 0.4089,
"step": 236000
},
{
"epoch": 16.92,
"learning_rate": 4.15402775790528e-05,
"loss": 0.406,
"step": 236500
},
{
"epoch": 16.96,
"learning_rate": 4.152239233080555e-05,
"loss": 0.4121,
"step": 237000
},
{
"epoch": 16.99,
"learning_rate": 4.150450708255831e-05,
"loss": 0.4119,
"step": 237500
},
{
"epoch": 17.03,
"learning_rate": 4.148662183431106e-05,
"loss": 0.3483,
"step": 238000
},
{
"epoch": 17.06,
"learning_rate": 4.1468736586063814e-05,
"loss": 0.3395,
"step": 238500
},
{
"epoch": 17.1,
"learning_rate": 4.145085133781657e-05,
"loss": 0.3376,
"step": 239000
},
{
"epoch": 17.13,
"learning_rate": 4.1432966089569324e-05,
"loss": 0.3422,
"step": 239500
},
{
"epoch": 17.17,
"learning_rate": 4.1415080841322076e-05,
"loss": 0.3459,
"step": 240000
},
{
"epoch": 17.21,
"learning_rate": 4.1397195593074835e-05,
"loss": 0.3527,
"step": 240500
},
{
"epoch": 17.24,
"learning_rate": 4.1379310344827587e-05,
"loss": 0.356,
"step": 241000
},
{
"epoch": 17.28,
"learning_rate": 4.1361425096580345e-05,
"loss": 0.3553,
"step": 241500
},
{
"epoch": 17.31,
"learning_rate": 4.13435398483331e-05,
"loss": 0.359,
"step": 242000
},
{
"epoch": 17.35,
"learning_rate": 4.132565460008585e-05,
"loss": 0.3635,
"step": 242500
},
{
"epoch": 17.38,
"learning_rate": 4.130776935183861e-05,
"loss": 0.3632,
"step": 243000
},
{
"epoch": 17.42,
"learning_rate": 4.128988410359136e-05,
"loss": 0.3686,
"step": 243500
},
{
"epoch": 17.46,
"learning_rate": 4.127199885534411e-05,
"loss": 0.3738,
"step": 244000
},
{
"epoch": 17.49,
"learning_rate": 4.125411360709687e-05,
"loss": 0.372,
"step": 244500
},
{
"epoch": 17.53,
"learning_rate": 4.123622835884962e-05,
"loss": 0.3742,
"step": 245000
},
{
"epoch": 17.56,
"learning_rate": 4.121834311060237e-05,
"loss": 0.3711,
"step": 245500
},
{
"epoch": 17.6,
"learning_rate": 4.120045786235513e-05,
"loss": 0.3755,
"step": 246000
},
{
"epoch": 17.63,
"learning_rate": 4.118257261410788e-05,
"loss": 0.3787,
"step": 246500
},
{
"epoch": 17.67,
"learning_rate": 4.1164687365860635e-05,
"loss": 0.3793,
"step": 247000
},
{
"epoch": 17.71,
"learning_rate": 4.1146802117613394e-05,
"loss": 0.3878,
"step": 247500
},
{
"epoch": 17.74,
"learning_rate": 4.1128916869366146e-05,
"loss": 0.3891,
"step": 248000
},
{
"epoch": 17.78,
"learning_rate": 4.11110316211189e-05,
"loss": 0.3875,
"step": 248500
},
{
"epoch": 17.81,
"learning_rate": 4.1093146372871656e-05,
"loss": 0.3858,
"step": 249000
},
{
"epoch": 17.85,
"learning_rate": 4.107526112462441e-05,
"loss": 0.3906,
"step": 249500
},
{
"epoch": 17.89,
"learning_rate": 4.1057375876377166e-05,
"loss": 0.3943,
"step": 250000
},
{
"epoch": 17.92,
"learning_rate": 4.1039490628129925e-05,
"loss": 0.3953,
"step": 250500
},
{
"epoch": 17.96,
"learning_rate": 4.102160537988268e-05,
"loss": 0.4001,
"step": 251000
},
{
"epoch": 17.99,
"learning_rate": 4.100372013163543e-05,
"loss": 0.3985,
"step": 251500
},
{
"epoch": 18.03,
"learning_rate": 4.098583488338819e-05,
"loss": 0.3321,
"step": 252000
},
{
"epoch": 18.06,
"learning_rate": 4.096794963514094e-05,
"loss": 0.3254,
"step": 252500
},
{
"epoch": 18.1,
"learning_rate": 4.095006438689369e-05,
"loss": 0.3344,
"step": 253000
},
{
"epoch": 18.14,
"learning_rate": 4.093217913864645e-05,
"loss": 0.3348,
"step": 253500
},
{
"epoch": 18.17,
"learning_rate": 4.09142938903992e-05,
"loss": 0.3461,
"step": 254000
},
{
"epoch": 18.21,
"learning_rate": 4.089640864215196e-05,
"loss": 0.3354,
"step": 254500
},
{
"epoch": 18.24,
"learning_rate": 4.087852339390471e-05,
"loss": 0.3484,
"step": 255000
},
{
"epoch": 18.28,
"learning_rate": 4.086063814565746e-05,
"loss": 0.3487,
"step": 255500
},
{
"epoch": 18.31,
"learning_rate": 4.084275289741022e-05,
"loss": 0.3496,
"step": 256000
},
{
"epoch": 18.35,
"learning_rate": 4.0824867649162973e-05,
"loss": 0.3552,
"step": 256500
},
{
"epoch": 18.39,
"learning_rate": 4.0806982400915725e-05,
"loss": 0.3553,
"step": 257000
},
{
"epoch": 18.42,
"learning_rate": 4.0789097152668484e-05,
"loss": 0.3562,
"step": 257500
},
{
"epoch": 18.46,
"learning_rate": 4.0771211904421236e-05,
"loss": 0.3692,
"step": 258000
},
{
"epoch": 18.49,
"learning_rate": 4.075332665617399e-05,
"loss": 0.3625,
"step": 258500
},
{
"epoch": 18.53,
"learning_rate": 4.0735441407926746e-05,
"loss": 0.3625,
"step": 259000
},
{
"epoch": 18.56,
"learning_rate": 4.07175561596795e-05,
"loss": 0.3605,
"step": 259500
},
{
"epoch": 18.6,
"learning_rate": 4.069967091143225e-05,
"loss": 0.3614,
"step": 260000
},
{
"epoch": 18.64,
"learning_rate": 4.068178566318501e-05,
"loss": 0.3658,
"step": 260500
},
{
"epoch": 18.67,
"learning_rate": 4.066390041493776e-05,
"loss": 0.3677,
"step": 261000
},
{
"epoch": 18.71,
"learning_rate": 4.064601516669052e-05,
"loss": 0.3718,
"step": 261500
},
{
"epoch": 18.74,
"learning_rate": 4.062812991844327e-05,
"loss": 0.3752,
"step": 262000
},
{
"epoch": 18.78,
"learning_rate": 4.061024467019602e-05,
"loss": 0.3833,
"step": 262500
},
{
"epoch": 18.82,
"learning_rate": 4.059235942194878e-05,
"loss": 0.3696,
"step": 263000
},
{
"epoch": 18.85,
"learning_rate": 4.057447417370153e-05,
"loss": 0.3756,
"step": 263500
},
{
"epoch": 18.89,
"learning_rate": 4.0556588925454284e-05,
"loss": 0.3803,
"step": 264000
},
{
"epoch": 18.92,
"learning_rate": 4.053870367720704e-05,
"loss": 0.3803,
"step": 264500
},
{
"epoch": 18.96,
"learning_rate": 4.0520818428959795e-05,
"loss": 0.382,
"step": 265000
},
{
"epoch": 18.99,
"learning_rate": 4.0502933180712546e-05,
"loss": 0.3811,
"step": 265500
},
{
"epoch": 19.03,
"learning_rate": 4.0485047932465305e-05,
"loss": 0.3226,
"step": 266000
},
{
"epoch": 19.07,
"learning_rate": 4.046716268421806e-05,
"loss": 0.3189,
"step": 266500
},
{
"epoch": 19.1,
"learning_rate": 4.044927743597081e-05,
"loss": 0.3253,
"step": 267000
},
{
"epoch": 19.14,
"learning_rate": 4.043139218772357e-05,
"loss": 0.3247,
"step": 267500
},
{
"epoch": 19.17,
"learning_rate": 4.041350693947632e-05,
"loss": 0.3269,
"step": 268000
},
{
"epoch": 19.21,
"learning_rate": 4.039562169122908e-05,
"loss": 0.3356,
"step": 268500
},
{
"epoch": 19.24,
"learning_rate": 4.037773644298183e-05,
"loss": 0.3369,
"step": 269000
},
{
"epoch": 19.28,
"learning_rate": 4.035985119473458e-05,
"loss": 0.3366,
"step": 269500
},
{
"epoch": 19.32,
"learning_rate": 4.034196594648734e-05,
"loss": 0.339,
"step": 270000
},
{
"epoch": 19.35,
"learning_rate": 4.032408069824009e-05,
"loss": 0.3418,
"step": 270500
},
{
"epoch": 19.39,
"learning_rate": 4.030619544999284e-05,
"loss": 0.3432,
"step": 271000
},
{
"epoch": 19.42,
"learning_rate": 4.02883102017456e-05,
"loss": 0.3492,
"step": 271500
},
{
"epoch": 19.46,
"learning_rate": 4.027042495349836e-05,
"loss": 0.3447,
"step": 272000
},
{
"epoch": 19.49,
"learning_rate": 4.025253970525111e-05,
"loss": 0.3545,
"step": 272500
},
{
"epoch": 19.53,
"learning_rate": 4.023465445700387e-05,
"loss": 0.3492,
"step": 273000
},
{
"epoch": 19.57,
"learning_rate": 4.021676920875662e-05,
"loss": 0.3497,
"step": 273500
},
{
"epoch": 19.6,
"learning_rate": 4.0198883960509374e-05,
"loss": 0.3532,
"step": 274000
},
{
"epoch": 19.64,
"learning_rate": 4.018099871226213e-05,
"loss": 0.3543,
"step": 274500
},
{
"epoch": 19.67,
"learning_rate": 4.0163113464014885e-05,
"loss": 0.3614,
"step": 275000
},
{
"epoch": 19.71,
"learning_rate": 4.0145228215767636e-05,
"loss": 0.3606,
"step": 275500
},
{
"epoch": 19.75,
"learning_rate": 4.0127342967520395e-05,
"loss": 0.3653,
"step": 276000
},
{
"epoch": 19.78,
"learning_rate": 4.010945771927315e-05,
"loss": 0.3695,
"step": 276500
},
{
"epoch": 19.82,
"learning_rate": 4.00915724710259e-05,
"loss": 0.361,
"step": 277000
},
{
"epoch": 19.85,
"learning_rate": 4.007368722277866e-05,
"loss": 0.3617,
"step": 277500
},
{
"epoch": 19.89,
"learning_rate": 4.005580197453141e-05,
"loss": 0.365,
"step": 278000
},
{
"epoch": 19.92,
"learning_rate": 4.003791672628416e-05,
"loss": 0.3664,
"step": 278500
},
{
"epoch": 19.96,
"learning_rate": 4.002003147803692e-05,
"loss": 0.3689,
"step": 279000
},
{
"epoch": 20.0,
"learning_rate": 4.000214622978967e-05,
"loss": 0.3769,
"step": 279500
},
{
"epoch": 20.03,
"learning_rate": 3.998426098154242e-05,
"loss": 0.3129,
"step": 280000
},
{
"epoch": 20.07,
"learning_rate": 3.996637573329518e-05,
"loss": 0.3107,
"step": 280500
},
{
"epoch": 20.1,
"learning_rate": 3.994849048504793e-05,
"loss": 0.3165,
"step": 281000
},
{
"epoch": 20.14,
"learning_rate": 3.993060523680069e-05,
"loss": 0.319,
"step": 281500
},
{
"epoch": 20.17,
"learning_rate": 3.9912719988553444e-05,
"loss": 0.3204,
"step": 282000
},
{
"epoch": 20.21,
"learning_rate": 3.9894834740306195e-05,
"loss": 0.3273,
"step": 282500
},
{
"epoch": 20.25,
"learning_rate": 3.9876949492058954e-05,
"loss": 0.3274,
"step": 283000
},
{
"epoch": 20.28,
"learning_rate": 3.9859064243811706e-05,
"loss": 0.3288,
"step": 283500
},
{
"epoch": 20.32,
"learning_rate": 3.984117899556446e-05,
"loss": 0.3309,
"step": 284000
},
{
"epoch": 20.35,
"learning_rate": 3.9823293747317216e-05,
"loss": 0.3272,
"step": 284500
},
{
"epoch": 20.39,
"learning_rate": 3.980540849906997e-05,
"loss": 0.336,
"step": 285000
},
{
"epoch": 20.42,
"learning_rate": 3.978752325082272e-05,
"loss": 0.3403,
"step": 285500
},
{
"epoch": 20.46,
"learning_rate": 3.976963800257548e-05,
"loss": 0.3471,
"step": 286000
},
{
"epoch": 20.5,
"learning_rate": 3.975175275432823e-05,
"loss": 0.3415,
"step": 286500
},
{
"epoch": 20.53,
"learning_rate": 3.973386750608098e-05,
"loss": 0.3453,
"step": 287000
},
{
"epoch": 20.57,
"learning_rate": 3.971598225783374e-05,
"loss": 0.3416,
"step": 287500
},
{
"epoch": 20.6,
"learning_rate": 3.969809700958649e-05,
"loss": 0.3482,
"step": 288000
},
{
"epoch": 20.64,
"learning_rate": 3.968021176133925e-05,
"loss": 0.3491,
"step": 288500
},
{
"epoch": 20.68,
"learning_rate": 3.9662326513092e-05,
"loss": 0.3517,
"step": 289000
},
{
"epoch": 20.71,
"learning_rate": 3.9644441264844754e-05,
"loss": 0.3593,
"step": 289500
},
{
"epoch": 20.75,
"learning_rate": 3.962655601659751e-05,
"loss": 0.3568,
"step": 290000
},
{
"epoch": 20.78,
"learning_rate": 3.9608670768350265e-05,
"loss": 0.3547,
"step": 290500
},
{
"epoch": 20.82,
"learning_rate": 3.959078552010302e-05,
"loss": 0.3511,
"step": 291000
},
{
"epoch": 20.85,
"learning_rate": 3.9572900271855775e-05,
"loss": 0.3517,
"step": 291500
},
{
"epoch": 20.89,
"learning_rate": 3.955501502360853e-05,
"loss": 0.3564,
"step": 292000
},
{
"epoch": 20.93,
"learning_rate": 3.953712977536128e-05,
"loss": 0.3566,
"step": 292500
},
{
"epoch": 20.96,
"learning_rate": 3.951924452711404e-05,
"loss": 0.3621,
"step": 293000
},
{
"epoch": 21.0,
"learning_rate": 3.9501359278866796e-05,
"loss": 0.362,
"step": 293500
},
{
"epoch": 21.03,
"learning_rate": 3.948347403061955e-05,
"loss": 0.3025,
"step": 294000
},
{
"epoch": 21.07,
"learning_rate": 3.9465588782372306e-05,
"loss": 0.2993,
"step": 294500
},
{
"epoch": 21.1,
"learning_rate": 3.944770353412506e-05,
"loss": 0.3111,
"step": 295000
},
{
"epoch": 21.14,
"learning_rate": 3.942981828587781e-05,
"loss": 0.3089,
"step": 295500
},
{
"epoch": 21.18,
"learning_rate": 3.941193303763057e-05,
"loss": 0.3137,
"step": 296000
},
{
"epoch": 21.21,
"learning_rate": 3.939404778938332e-05,
"loss": 0.3169,
"step": 296500
},
{
"epoch": 21.25,
"learning_rate": 3.937616254113607e-05,
"loss": 0.3173,
"step": 297000
},
{
"epoch": 21.28,
"learning_rate": 3.935827729288883e-05,
"loss": 0.3182,
"step": 297500
},
{
"epoch": 21.32,
"learning_rate": 3.934039204464158e-05,
"loss": 0.3248,
"step": 298000
},
{
"epoch": 21.35,
"learning_rate": 3.9322506796394334e-05,
"loss": 0.3279,
"step": 298500
},
{
"epoch": 21.39,
"learning_rate": 3.930462154814709e-05,
"loss": 0.3251,
"step": 299000
},
{
"epoch": 21.43,
"learning_rate": 3.9286736299899845e-05,
"loss": 0.325,
"step": 299500
},
{
"epoch": 21.46,
"learning_rate": 3.92688510516526e-05,
"loss": 0.3341,
"step": 300000
},
{
"epoch": 21.5,
"learning_rate": 3.9250965803405355e-05,
"loss": 0.3362,
"step": 300500
},
{
"epoch": 21.53,
"learning_rate": 3.923308055515811e-05,
"loss": 0.3396,
"step": 301000
},
{
"epoch": 21.57,
"learning_rate": 3.9215195306910865e-05,
"loss": 0.3381,
"step": 301500
},
{
"epoch": 21.61,
"learning_rate": 3.919731005866362e-05,
"loss": 0.3352,
"step": 302000
},
{
"epoch": 21.64,
"learning_rate": 3.917942481041637e-05,
"loss": 0.3374,
"step": 302500
},
{
"epoch": 21.68,
"learning_rate": 3.916153956216913e-05,
"loss": 0.3379,
"step": 303000
},
{
"epoch": 21.71,
"learning_rate": 3.914365431392188e-05,
"loss": 0.3381,
"step": 303500
},
{
"epoch": 21.75,
"learning_rate": 3.912576906567463e-05,
"loss": 0.3405,
"step": 304000
},
{
"epoch": 21.78,
"learning_rate": 3.910788381742739e-05,
"loss": 0.3459,
"step": 304500
},
{
"epoch": 21.82,
"learning_rate": 3.908999856918014e-05,
"loss": 0.3475,
"step": 305000
},
{
"epoch": 21.86,
"learning_rate": 3.907211332093289e-05,
"loss": 0.3469,
"step": 305500
},
{
"epoch": 21.89,
"learning_rate": 3.905422807268565e-05,
"loss": 0.3519,
"step": 306000
},
{
"epoch": 21.93,
"learning_rate": 3.9036342824438404e-05,
"loss": 0.352,
"step": 306500
},
{
"epoch": 21.96,
"learning_rate": 3.9018457576191155e-05,
"loss": 0.3554,
"step": 307000
},
{
"epoch": 22.0,
"learning_rate": 3.9000572327943914e-05,
"loss": 0.3475,
"step": 307500
},
{
"epoch": 22.03,
"learning_rate": 3.8982687079696666e-05,
"loss": 0.2957,
"step": 308000
},
{
"epoch": 22.07,
"learning_rate": 3.8964801831449424e-05,
"loss": 0.2972,
"step": 308500
},
{
"epoch": 22.11,
"learning_rate": 3.8946916583202176e-05,
"loss": 0.3036,
"step": 309000
},
{
"epoch": 22.14,
"learning_rate": 3.892903133495493e-05,
"loss": 0.3058,
"step": 309500
},
{
"epoch": 22.18,
"learning_rate": 3.8911146086707686e-05,
"loss": 0.3062,
"step": 310000
},
{
"epoch": 22.21,
"learning_rate": 3.889326083846044e-05,
"loss": 0.3097,
"step": 310500
},
{
"epoch": 22.25,
"learning_rate": 3.887537559021319e-05,
"loss": 0.3151,
"step": 311000
},
{
"epoch": 22.29,
"learning_rate": 3.885749034196595e-05,
"loss": 0.3191,
"step": 311500
},
{
"epoch": 22.32,
"learning_rate": 3.88396050937187e-05,
"loss": 0.313,
"step": 312000
},
{
"epoch": 22.36,
"learning_rate": 3.882171984547145e-05,
"loss": 0.3222,
"step": 312500
},
{
"epoch": 22.39,
"learning_rate": 3.880383459722421e-05,
"loss": 0.3276,
"step": 313000
},
{
"epoch": 22.43,
"learning_rate": 3.878594934897696e-05,
"loss": 0.3218,
"step": 313500
},
{
"epoch": 22.46,
"learning_rate": 3.8768064100729714e-05,
"loss": 0.3268,
"step": 314000
},
{
"epoch": 22.5,
"learning_rate": 3.875017885248247e-05,
"loss": 0.3251,
"step": 314500
},
{
"epoch": 22.54,
"learning_rate": 3.873229360423523e-05,
"loss": 0.3252,
"step": 315000
},
{
"epoch": 22.57,
"learning_rate": 3.871440835598798e-05,
"loss": 0.3233,
"step": 315500
},
{
"epoch": 22.61,
"learning_rate": 3.869652310774074e-05,
"loss": 0.3311,
"step": 316000
},
{
"epoch": 22.64,
"learning_rate": 3.8678637859493494e-05,
"loss": 0.3344,
"step": 316500
},
{
"epoch": 22.68,
"learning_rate": 3.8660752611246245e-05,
"loss": 0.3332,
"step": 317000
},
{
"epoch": 22.71,
"learning_rate": 3.8642867362999004e-05,
"loss": 0.3341,
"step": 317500
},
{
"epoch": 22.75,
"learning_rate": 3.8624982114751756e-05,
"loss": 0.3355,
"step": 318000
},
{
"epoch": 22.79,
"learning_rate": 3.860709686650451e-05,
"loss": 0.3344,
"step": 318500
},
{
"epoch": 22.82,
"learning_rate": 3.8589211618257266e-05,
"loss": 0.3376,
"step": 319000
},
{
"epoch": 22.86,
"learning_rate": 3.857132637001002e-05,
"loss": 0.3361,
"step": 319500
},
{
"epoch": 22.89,
"learning_rate": 3.8553441121762776e-05,
"loss": 0.3411,
"step": 320000
},
{
"epoch": 22.93,
"learning_rate": 3.853555587351553e-05,
"loss": 0.3404,
"step": 320500
},
{
"epoch": 22.96,
"learning_rate": 3.851767062526828e-05,
"loss": 0.346,
"step": 321000
},
{
"epoch": 23.0,
"learning_rate": 3.849978537702104e-05,
"loss": 0.3427,
"step": 321500
},
{
"epoch": 23.04,
"learning_rate": 3.848190012877379e-05,
"loss": 0.2876,
"step": 322000
},
{
"epoch": 23.07,
"learning_rate": 3.846401488052654e-05,
"loss": 0.2948,
"step": 322500
},
{
"epoch": 23.11,
"learning_rate": 3.84461296322793e-05,
"loss": 0.3026,
"step": 323000
},
{
"epoch": 23.14,
"learning_rate": 3.842824438403205e-05,
"loss": 0.2998,
"step": 323500
},
{
"epoch": 23.18,
"learning_rate": 3.8410359135784804e-05,
"loss": 0.3039,
"step": 324000
},
{
"epoch": 23.22,
"learning_rate": 3.839247388753756e-05,
"loss": 0.3022,
"step": 324500
},
{
"epoch": 23.25,
"learning_rate": 3.8374588639290315e-05,
"loss": 0.3082,
"step": 325000
},
{
"epoch": 23.29,
"learning_rate": 3.8356703391043067e-05,
"loss": 0.3123,
"step": 325500
},
{
"epoch": 23.32,
"learning_rate": 3.8338818142795825e-05,
"loss": 0.3179,
"step": 326000
},
{
"epoch": 23.36,
"learning_rate": 3.832093289454858e-05,
"loss": 0.3118,
"step": 326500
},
{
"epoch": 23.39,
"learning_rate": 3.8303047646301335e-05,
"loss": 0.3164,
"step": 327000
},
{
"epoch": 23.43,
"learning_rate": 3.828516239805409e-05,
"loss": 0.3102,
"step": 327500
},
{
"epoch": 23.47,
"learning_rate": 3.826727714980684e-05,
"loss": 0.3231,
"step": 328000
},
{
"epoch": 23.5,
"learning_rate": 3.82493919015596e-05,
"loss": 0.3154,
"step": 328500
},
{
"epoch": 23.54,
"learning_rate": 3.823150665331235e-05,
"loss": 0.319,
"step": 329000
},
{
"epoch": 23.57,
"learning_rate": 3.82136214050651e-05,
"loss": 0.3192,
"step": 329500
},
{
"epoch": 23.61,
"learning_rate": 3.819573615681786e-05,
"loss": 0.3166,
"step": 330000
},
{
"epoch": 23.64,
"learning_rate": 3.817785090857061e-05,
"loss": 0.3197,
"step": 330500
},
{
"epoch": 23.68,
"learning_rate": 3.815996566032336e-05,
"loss": 0.3218,
"step": 331000
},
{
"epoch": 23.72,
"learning_rate": 3.814208041207612e-05,
"loss": 0.3324,
"step": 331500
},
{
"epoch": 23.75,
"learning_rate": 3.8124195163828874e-05,
"loss": 0.3284,
"step": 332000
},
{
"epoch": 23.79,
"learning_rate": 3.8106309915581626e-05,
"loss": 0.3291,
"step": 332500
},
{
"epoch": 23.82,
"learning_rate": 3.8088424667334384e-05,
"loss": 0.3295,
"step": 333000
},
{
"epoch": 23.86,
"learning_rate": 3.8070539419087136e-05,
"loss": 0.332,
"step": 333500
},
{
"epoch": 23.89,
"learning_rate": 3.805265417083989e-05,
"loss": 0.3307,
"step": 334000
},
{
"epoch": 23.93,
"learning_rate": 3.8034768922592646e-05,
"loss": 0.3313,
"step": 334500
},
{
"epoch": 23.97,
"learning_rate": 3.80168836743454e-05,
"loss": 0.3375,
"step": 335000
},
{
"epoch": 24.0,
"learning_rate": 3.799899842609816e-05,
"loss": 0.3292,
"step": 335500
},
{
"epoch": 24.04,
"learning_rate": 3.798111317785091e-05,
"loss": 0.2779,
"step": 336000
},
{
"epoch": 24.07,
"learning_rate": 3.796322792960367e-05,
"loss": 0.2897,
"step": 336500
},
{
"epoch": 24.11,
"learning_rate": 3.794534268135642e-05,
"loss": 0.2903,
"step": 337000
},
{
"epoch": 24.15,
"learning_rate": 3.792745743310918e-05,
"loss": 0.2981,
"step": 337500
},
{
"epoch": 24.18,
"learning_rate": 3.790957218486193e-05,
"loss": 0.2931,
"step": 338000
},
{
"epoch": 24.22,
"learning_rate": 3.789168693661468e-05,
"loss": 0.3005,
"step": 338500
},
{
"epoch": 24.25,
"learning_rate": 3.787380168836744e-05,
"loss": 0.3019,
"step": 339000
},
{
"epoch": 24.29,
"learning_rate": 3.785591644012019e-05,
"loss": 0.3112,
"step": 339500
},
{
"epoch": 24.32,
"learning_rate": 3.783803119187295e-05,
"loss": 0.3052,
"step": 340000
},
{
"epoch": 24.36,
"learning_rate": 3.78201459436257e-05,
"loss": 0.3052,
"step": 340500
},
{
"epoch": 24.4,
"learning_rate": 3.7802260695378453e-05,
"loss": 0.3081,
"step": 341000
},
{
"epoch": 24.43,
"learning_rate": 3.778437544713121e-05,
"loss": 0.3086,
"step": 341500
},
{
"epoch": 24.47,
"learning_rate": 3.7766490198883964e-05,
"loss": 0.3115,
"step": 342000
},
{
"epoch": 24.5,
"learning_rate": 3.7748604950636716e-05,
"loss": 0.3115,
"step": 342500
},
{
"epoch": 24.54,
"learning_rate": 3.7730719702389474e-05,
"loss": 0.3175,
"step": 343000
},
{
"epoch": 24.57,
"learning_rate": 3.7712834454142226e-05,
"loss": 0.3134,
"step": 343500
},
{
"epoch": 24.61,
"learning_rate": 3.769494920589498e-05,
"loss": 0.3181,
"step": 344000
},
{
"epoch": 24.65,
"learning_rate": 3.7677063957647736e-05,
"loss": 0.3161,
"step": 344500
},
{
"epoch": 24.68,
"learning_rate": 3.765917870940049e-05,
"loss": 0.3205,
"step": 345000
},
{
"epoch": 24.72,
"learning_rate": 3.764129346115324e-05,
"loss": 0.3182,
"step": 345500
},
{
"epoch": 24.75,
"learning_rate": 3.7623408212906e-05,
"loss": 0.3232,
"step": 346000
},
{
"epoch": 24.79,
"learning_rate": 3.760552296465875e-05,
"loss": 0.3235,
"step": 346500
},
{
"epoch": 24.82,
"learning_rate": 3.758763771641151e-05,
"loss": 0.3194,
"step": 347000
},
{
"epoch": 24.86,
"learning_rate": 3.756975246816426e-05,
"loss": 0.3172,
"step": 347500
},
{
"epoch": 24.9,
"learning_rate": 3.755186721991701e-05,
"loss": 0.3244,
"step": 348000
},
{
"epoch": 24.93,
"learning_rate": 3.753398197166977e-05,
"loss": 0.3262,
"step": 348500
},
{
"epoch": 24.97,
"learning_rate": 3.751609672342252e-05,
"loss": 0.3262,
"step": 349000
},
{
"epoch": 25.0,
"learning_rate": 3.7498211475175275e-05,
"loss": 0.3218,
"step": 349500
},
{
"epoch": 25.04,
"learning_rate": 3.748032622692803e-05,
"loss": 0.2829,
"step": 350000
},
{
"epoch": 25.08,
"learning_rate": 3.7462440978680785e-05,
"loss": 0.2893,
"step": 350500
},
{
"epoch": 25.11,
"learning_rate": 3.744455573043354e-05,
"loss": 0.2878,
"step": 351000
},
{
"epoch": 25.15,
"learning_rate": 3.7426670482186295e-05,
"loss": 0.2876,
"step": 351500
},
{
"epoch": 25.18,
"learning_rate": 3.740878523393905e-05,
"loss": 0.292,
"step": 352000
},
{
"epoch": 25.22,
"learning_rate": 3.73908999856918e-05,
"loss": 0.292,
"step": 352500
},
{
"epoch": 25.25,
"learning_rate": 3.737301473744456e-05,
"loss": 0.3027,
"step": 353000
},
{
"epoch": 25.29,
"learning_rate": 3.735512948919731e-05,
"loss": 0.299,
"step": 353500
},
{
"epoch": 25.33,
"learning_rate": 3.733724424095006e-05,
"loss": 0.3027,
"step": 354000
},
{
"epoch": 25.36,
"learning_rate": 3.731935899270282e-05,
"loss": 0.302,
"step": 354500
},
{
"epoch": 25.4,
"learning_rate": 3.730147374445557e-05,
"loss": 0.2994,
"step": 355000
},
{
"epoch": 25.43,
"learning_rate": 3.728358849620833e-05,
"loss": 0.3042,
"step": 355500
},
{
"epoch": 25.47,
"learning_rate": 3.726570324796108e-05,
"loss": 0.3068,
"step": 356000
},
{
"epoch": 25.5,
"learning_rate": 3.7247817999713834e-05,
"loss": 0.3123,
"step": 356500
},
{
"epoch": 25.54,
"learning_rate": 3.722993275146659e-05,
"loss": 0.3098,
"step": 357000
},
{
"epoch": 25.58,
"learning_rate": 3.7212047503219344e-05,
"loss": 0.3083,
"step": 357500
},
{
"epoch": 25.61,
"learning_rate": 3.7194162254972096e-05,
"loss": 0.3115,
"step": 358000
},
{
"epoch": 25.65,
"learning_rate": 3.717627700672486e-05,
"loss": 0.3047,
"step": 358500
},
{
"epoch": 25.68,
"learning_rate": 3.715839175847761e-05,
"loss": 0.3141,
"step": 359000
},
{
"epoch": 25.72,
"learning_rate": 3.7140506510230365e-05,
"loss": 0.3159,
"step": 359500
},
{
"epoch": 25.75,
"learning_rate": 3.712262126198312e-05,
"loss": 0.3112,
"step": 360000
},
{
"epoch": 25.79,
"learning_rate": 3.7104736013735875e-05,
"loss": 0.3096,
"step": 360500
},
{
"epoch": 25.83,
"learning_rate": 3.708685076548863e-05,
"loss": 0.3173,
"step": 361000
},
{
"epoch": 25.86,
"learning_rate": 3.7068965517241385e-05,
"loss": 0.3201,
"step": 361500
},
{
"epoch": 25.9,
"learning_rate": 3.705108026899414e-05,
"loss": 0.3185,
"step": 362000
},
{
"epoch": 25.93,
"learning_rate": 3.703319502074689e-05,
"loss": 0.3176,
"step": 362500
},
{
"epoch": 25.97,
"learning_rate": 3.701530977249965e-05,
"loss": 0.3206,
"step": 363000
},
{
"epoch": 26.01,
"learning_rate": 3.69974245242524e-05,
"loss": 0.3193,
"step": 363500
},
{
"epoch": 26.04,
"learning_rate": 3.697953927600515e-05,
"loss": 0.274,
"step": 364000
},
{
"epoch": 26.08,
"learning_rate": 3.696165402775791e-05,
"loss": 0.2791,
"step": 364500
},
{
"epoch": 26.11,
"learning_rate": 3.694376877951066e-05,
"loss": 0.2841,
"step": 365000
},
{
"epoch": 26.15,
"learning_rate": 3.692588353126341e-05,
"loss": 0.279,
"step": 365500
},
{
"epoch": 26.18,
"learning_rate": 3.690799828301617e-05,
"loss": 0.2893,
"step": 366000
},
{
"epoch": 26.22,
"learning_rate": 3.6890113034768924e-05,
"loss": 0.2937,
"step": 366500
},
{
"epoch": 26.26,
"learning_rate": 3.687222778652168e-05,
"loss": 0.2902,
"step": 367000
},
{
"epoch": 26.29,
"learning_rate": 3.6854342538274434e-05,
"loss": 0.2968,
"step": 367500
},
{
"epoch": 26.33,
"learning_rate": 3.6836457290027186e-05,
"loss": 0.295,
"step": 368000
},
{
"epoch": 26.36,
"learning_rate": 3.6818572041779944e-05,
"loss": 0.2923,
"step": 368500
},
{
"epoch": 26.4,
"learning_rate": 3.6800686793532696e-05,
"loss": 0.2989,
"step": 369000
},
{
"epoch": 26.43,
"learning_rate": 3.678280154528545e-05,
"loss": 0.3007,
"step": 369500
},
{
"epoch": 26.47,
"learning_rate": 3.6764916297038207e-05,
"loss": 0.3044,
"step": 370000
},
{
"epoch": 26.51,
"learning_rate": 3.674703104879096e-05,
"loss": 0.2978,
"step": 370500
},
{
"epoch": 26.54,
"learning_rate": 3.672914580054371e-05,
"loss": 0.2971,
"step": 371000
},
{
"epoch": 26.58,
"learning_rate": 3.671126055229647e-05,
"loss": 0.3058,
"step": 371500
},
{
"epoch": 26.61,
"learning_rate": 3.669337530404922e-05,
"loss": 0.3078,
"step": 372000
},
{
"epoch": 26.65,
"learning_rate": 3.667549005580197e-05,
"loss": 0.3016,
"step": 372500
},
{
"epoch": 26.68,
"learning_rate": 3.665760480755473e-05,
"loss": 0.308,
"step": 373000
},
{
"epoch": 26.72,
"learning_rate": 3.663971955930748e-05,
"loss": 0.3086,
"step": 373500
},
{
"epoch": 26.76,
"learning_rate": 3.662183431106024e-05,
"loss": 0.3112,
"step": 374000
},
{
"epoch": 26.79,
"learning_rate": 3.660394906281299e-05,
"loss": 0.3071,
"step": 374500
},
{
"epoch": 26.83,
"learning_rate": 3.6586063814565745e-05,
"loss": 0.313,
"step": 375000
},
{
"epoch": 26.86,
"learning_rate": 3.65681785663185e-05,
"loss": 0.3117,
"step": 375500
},
{
"epoch": 26.9,
"learning_rate": 3.6550293318071255e-05,
"loss": 0.3135,
"step": 376000
},
{
"epoch": 26.94,
"learning_rate": 3.653240806982401e-05,
"loss": 0.3138,
"step": 376500
},
{
"epoch": 26.97,
"learning_rate": 3.6514522821576766e-05,
"loss": 0.3138,
"step": 377000
},
{
"epoch": 27.01,
"learning_rate": 3.649663757332952e-05,
"loss": 0.3043,
"step": 377500
},
{
"epoch": 27.04,
"learning_rate": 3.647875232508227e-05,
"loss": 0.2736,
"step": 378000
},
{
"epoch": 27.08,
"learning_rate": 3.646086707683503e-05,
"loss": 0.2799,
"step": 378500
},
{
"epoch": 27.11,
"learning_rate": 3.644298182858778e-05,
"loss": 0.278,
"step": 379000
},
{
"epoch": 27.15,
"learning_rate": 3.642509658034053e-05,
"loss": 0.2836,
"step": 379500
},
{
"epoch": 27.19,
"learning_rate": 3.64072113320933e-05,
"loss": 0.282,
"step": 380000
},
{
"epoch": 27.22,
"learning_rate": 3.638932608384605e-05,
"loss": 0.2837,
"step": 380500
},
{
"epoch": 27.26,
"learning_rate": 3.63714408355988e-05,
"loss": 0.2837,
"step": 381000
},
{
"epoch": 27.29,
"learning_rate": 3.635355558735156e-05,
"loss": 0.2897,
"step": 381500
},
{
"epoch": 27.33,
"learning_rate": 3.633567033910431e-05,
"loss": 0.2914,
"step": 382000
},
{
"epoch": 27.36,
"learning_rate": 3.631778509085706e-05,
"loss": 0.2928,
"step": 382500
},
{
"epoch": 27.4,
"learning_rate": 3.629989984260982e-05,
"loss": 0.2941,
"step": 383000
},
{
"epoch": 27.44,
"learning_rate": 3.628201459436257e-05,
"loss": 0.293,
"step": 383500
},
{
"epoch": 27.47,
"learning_rate": 3.6264129346115325e-05,
"loss": 0.2942,
"step": 384000
},
{
"epoch": 27.51,
"learning_rate": 3.624624409786808e-05,
"loss": 0.2941,
"step": 384500
},
{
"epoch": 27.54,
"learning_rate": 3.6228358849620835e-05,
"loss": 0.2965,
"step": 385000
},
{
"epoch": 27.58,
"learning_rate": 3.6210473601373593e-05,
"loss": 0.3013,
"step": 385500
},
{
"epoch": 27.61,
"learning_rate": 3.6192588353126345e-05,
"loss": 0.2992,
"step": 386000
},
{
"epoch": 27.65,
"learning_rate": 3.61747031048791e-05,
"loss": 0.2991,
"step": 386500
},
{
"epoch": 27.69,
"learning_rate": 3.6156817856631856e-05,
"loss": 0.2993,
"step": 387000
},
{
"epoch": 27.72,
"learning_rate": 3.613893260838461e-05,
"loss": 0.3044,
"step": 387500
},
{
"epoch": 27.76,
"learning_rate": 3.612104736013736e-05,
"loss": 0.3013,
"step": 388000
},
{
"epoch": 27.79,
"learning_rate": 3.610316211189012e-05,
"loss": 0.3072,
"step": 388500
},
{
"epoch": 27.83,
"learning_rate": 3.608527686364287e-05,
"loss": 0.3065,
"step": 389000
},
{
"epoch": 27.87,
"learning_rate": 3.606739161539562e-05,
"loss": 0.3077,
"step": 389500
},
{
"epoch": 27.9,
"learning_rate": 3.604950636714838e-05,
"loss": 0.3092,
"step": 390000
},
{
"epoch": 27.94,
"learning_rate": 3.603162111890113e-05,
"loss": 0.3065,
"step": 390500
},
{
"epoch": 27.97,
"learning_rate": 3.6013735870653884e-05,
"loss": 0.3074,
"step": 391000
},
{
"epoch": 28.01,
"learning_rate": 3.599585062240664e-05,
"loss": 0.2985,
"step": 391500
},
{
"epoch": 28.04,
"learning_rate": 3.5977965374159394e-05,
"loss": 0.2652,
"step": 392000
},
{
"epoch": 28.08,
"learning_rate": 3.5960080125912146e-05,
"loss": 0.2739,
"step": 392500
},
{
"epoch": 28.12,
"learning_rate": 3.5942194877664904e-05,
"loss": 0.2782,
"step": 393000
},
{
"epoch": 28.15,
"learning_rate": 3.5924309629417656e-05,
"loss": 0.2794,
"step": 393500
},
{
"epoch": 28.19,
"learning_rate": 3.5906424381170415e-05,
"loss": 0.2835,
"step": 394000
},
{
"epoch": 28.22,
"learning_rate": 3.5888539132923166e-05,
"loss": 0.2797,
"step": 394500
},
{
"epoch": 28.26,
"learning_rate": 3.587065388467592e-05,
"loss": 0.2821,
"step": 395000
},
{
"epoch": 28.29,
"learning_rate": 3.585276863642868e-05,
"loss": 0.2846,
"step": 395500
},
{
"epoch": 28.33,
"learning_rate": 3.583488338818143e-05,
"loss": 0.2871,
"step": 396000
},
{
"epoch": 28.37,
"learning_rate": 3.581699813993418e-05,
"loss": 0.2847,
"step": 396500
},
{
"epoch": 28.4,
"learning_rate": 3.579911289168694e-05,
"loss": 0.2892,
"step": 397000
},
{
"epoch": 28.44,
"learning_rate": 3.578122764343969e-05,
"loss": 0.2895,
"step": 397500
},
{
"epoch": 28.47,
"learning_rate": 3.576334239519244e-05,
"loss": 0.2917,
"step": 398000
},
{
"epoch": 28.51,
"learning_rate": 3.57454571469452e-05,
"loss": 0.2965,
"step": 398500
},
{
"epoch": 28.54,
"learning_rate": 3.572757189869795e-05,
"loss": 0.2958,
"step": 399000
},
{
"epoch": 28.58,
"learning_rate": 3.5709686650450705e-05,
"loss": 0.2948,
"step": 399500
},
{
"epoch": 28.62,
"learning_rate": 3.569180140220346e-05,
"loss": 0.3015,
"step": 400000
},
{
"epoch": 28.65,
"learning_rate": 3.5673916153956215e-05,
"loss": 0.2949,
"step": 400500
},
{
"epoch": 28.69,
"learning_rate": 3.5656030905708974e-05,
"loss": 0.3006,
"step": 401000
},
{
"epoch": 28.72,
"learning_rate": 3.5638145657461725e-05,
"loss": 0.3004,
"step": 401500
},
{
"epoch": 28.76,
"learning_rate": 3.5620260409214484e-05,
"loss": 0.301,
"step": 402000
},
{
"epoch": 28.8,
"learning_rate": 3.5602375160967236e-05,
"loss": 0.2974,
"step": 402500
},
{
"epoch": 28.83,
"learning_rate": 3.5584489912719994e-05,
"loss": 0.2978,
"step": 403000
},
{
"epoch": 28.87,
"learning_rate": 3.5566604664472746e-05,
"loss": 0.2994,
"step": 403500
},
{
"epoch": 28.9,
"learning_rate": 3.55487194162255e-05,
"loss": 0.3041,
"step": 404000
},
{
"epoch": 28.94,
"learning_rate": 3.5530834167978256e-05,
"loss": 0.3043,
"step": 404500
},
{
"epoch": 28.97,
"learning_rate": 3.551294891973101e-05,
"loss": 0.3016,
"step": 405000
},
{
"epoch": 29.01,
"learning_rate": 3.549506367148377e-05,
"loss": 0.2955,
"step": 405500
},
{
"epoch": 29.05,
"learning_rate": 3.547717842323652e-05,
"loss": 0.2647,
"step": 406000
},
{
"epoch": 29.08,
"learning_rate": 3.545929317498927e-05,
"loss": 0.2694,
"step": 406500
},
{
"epoch": 29.12,
"learning_rate": 3.544140792674203e-05,
"loss": 0.2691,
"step": 407000
},
{
"epoch": 29.15,
"learning_rate": 3.542352267849478e-05,
"loss": 0.2695,
"step": 407500
},
{
"epoch": 29.19,
"learning_rate": 3.540563743024753e-05,
"loss": 0.2747,
"step": 408000
},
{
"epoch": 29.22,
"learning_rate": 3.538775218200029e-05,
"loss": 0.2794,
"step": 408500
},
{
"epoch": 29.26,
"learning_rate": 3.536986693375304e-05,
"loss": 0.2796,
"step": 409000
},
{
"epoch": 29.3,
"learning_rate": 3.5351981685505795e-05,
"loss": 0.2837,
"step": 409500
},
{
"epoch": 29.33,
"learning_rate": 3.533409643725855e-05,
"loss": 0.2849,
"step": 410000
},
{
"epoch": 29.37,
"learning_rate": 3.5316211189011305e-05,
"loss": 0.2858,
"step": 410500
},
{
"epoch": 29.4,
"learning_rate": 3.529832594076406e-05,
"loss": 0.2839,
"step": 411000
},
{
"epoch": 29.44,
"learning_rate": 3.5280440692516815e-05,
"loss": 0.2864,
"step": 411500
},
{
"epoch": 29.47,
"learning_rate": 3.526255544426957e-05,
"loss": 0.2858,
"step": 412000
},
{
"epoch": 29.51,
"learning_rate": 3.5244670196022326e-05,
"loss": 0.29,
"step": 412500
},
{
"epoch": 29.55,
"learning_rate": 3.522678494777508e-05,
"loss": 0.2869,
"step": 413000
},
{
"epoch": 29.58,
"learning_rate": 3.520889969952783e-05,
"loss": 0.2902,
"step": 413500
},
{
"epoch": 29.62,
"learning_rate": 3.519101445128059e-05,
"loss": 0.2906,
"step": 414000
},
{
"epoch": 29.65,
"learning_rate": 3.517312920303334e-05,
"loss": 0.2997,
"step": 414500
},
{
"epoch": 29.69,
"learning_rate": 3.515524395478609e-05,
"loss": 0.2961,
"step": 415000
},
{
"epoch": 29.73,
"learning_rate": 3.513735870653885e-05,
"loss": 0.2937,
"step": 415500
},
{
"epoch": 29.76,
"learning_rate": 3.51194734582916e-05,
"loss": 0.2919,
"step": 416000
},
{
"epoch": 29.8,
"learning_rate": 3.5101588210044354e-05,
"loss": 0.293,
"step": 416500
},
{
"epoch": 29.83,
"learning_rate": 3.508370296179711e-05,
"loss": 0.2976,
"step": 417000
},
{
"epoch": 29.87,
"learning_rate": 3.5065817713549864e-05,
"loss": 0.2962,
"step": 417500
},
{
"epoch": 29.9,
"learning_rate": 3.5047932465302616e-05,
"loss": 0.299,
"step": 418000
},
{
"epoch": 29.94,
"learning_rate": 3.5030047217055374e-05,
"loss": 0.2975,
"step": 418500
},
{
"epoch": 29.98,
"learning_rate": 3.5012161968808126e-05,
"loss": 0.2969,
"step": 419000
},
{
"epoch": 30.01,
"learning_rate": 3.499427672056088e-05,
"loss": 0.2894,
"step": 419500
},
{
"epoch": 30.05,
"learning_rate": 3.497639147231364e-05,
"loss": 0.2614,
"step": 420000
},
{
"epoch": 30.08,
"learning_rate": 3.495850622406639e-05,
"loss": 0.2653,
"step": 420500
},
{
"epoch": 30.12,
"learning_rate": 3.494062097581915e-05,
"loss": 0.272,
"step": 421000
},
{
"epoch": 30.15,
"learning_rate": 3.49227357275719e-05,
"loss": 0.2693,
"step": 421500
},
{
"epoch": 30.19,
"learning_rate": 3.490485047932465e-05,
"loss": 0.2741,
"step": 422000
},
{
"epoch": 30.23,
"learning_rate": 3.488696523107741e-05,
"loss": 0.2695,
"step": 422500
},
{
"epoch": 30.26,
"learning_rate": 3.486907998283016e-05,
"loss": 0.2766,
"step": 423000
},
{
"epoch": 30.3,
"learning_rate": 3.485119473458292e-05,
"loss": 0.2787,
"step": 423500
},
{
"epoch": 30.33,
"learning_rate": 3.483330948633567e-05,
"loss": 0.2804,
"step": 424000
},
{
"epoch": 30.37,
"learning_rate": 3.481542423808843e-05,
"loss": 0.2745,
"step": 424500
},
{
"epoch": 30.4,
"learning_rate": 3.479753898984118e-05,
"loss": 0.2776,
"step": 425000
},
{
"epoch": 30.44,
"learning_rate": 3.477965374159394e-05,
"loss": 0.2767,
"step": 425500
},
{
"epoch": 30.48,
"learning_rate": 3.476176849334669e-05,
"loss": 0.2842,
"step": 426000
},
{
"epoch": 30.51,
"learning_rate": 3.4743883245099444e-05,
"loss": 0.2832,
"step": 426500
},
{
"epoch": 30.55,
"learning_rate": 3.47259979968522e-05,
"loss": 0.2852,
"step": 427000
},
{
"epoch": 30.58,
"learning_rate": 3.4708112748604954e-05,
"loss": 0.2922,
"step": 427500
},
{
"epoch": 30.62,
"learning_rate": 3.4690227500357706e-05,
"loss": 0.2883,
"step": 428000
},
{
"epoch": 30.66,
"learning_rate": 3.4672342252110465e-05,
"loss": 0.2925,
"step": 428500
},
{
"epoch": 30.69,
"learning_rate": 3.4654457003863216e-05,
"loss": 0.2925,
"step": 429000
},
{
"epoch": 30.73,
"learning_rate": 3.463657175561597e-05,
"loss": 0.2914,
"step": 429500
},
{
"epoch": 30.76,
"learning_rate": 3.461868650736873e-05,
"loss": 0.2897,
"step": 430000
},
{
"epoch": 30.8,
"learning_rate": 3.460080125912148e-05,
"loss": 0.2925,
"step": 430500
},
{
"epoch": 30.83,
"learning_rate": 3.458291601087423e-05,
"loss": 0.2948,
"step": 431000
},
{
"epoch": 30.87,
"learning_rate": 3.456503076262699e-05,
"loss": 0.2928,
"step": 431500
},
{
"epoch": 30.91,
"learning_rate": 3.454714551437974e-05,
"loss": 0.2896,
"step": 432000
},
{
"epoch": 30.94,
"learning_rate": 3.45292602661325e-05,
"loss": 0.2935,
"step": 432500
},
{
"epoch": 30.98,
"learning_rate": 3.451137501788525e-05,
"loss": 0.2906,
"step": 433000
},
{
"epoch": 31.01,
"learning_rate": 3.4493489769638e-05,
"loss": 0.2811,
"step": 433500
},
{
"epoch": 31.05,
"learning_rate": 3.447560452139076e-05,
"loss": 0.2524,
"step": 434000
},
{
"epoch": 31.08,
"learning_rate": 3.445771927314351e-05,
"loss": 0.2664,
"step": 434500
},
{
"epoch": 31.12,
"learning_rate": 3.4439834024896265e-05,
"loss": 0.2655,
"step": 435000
},
{
"epoch": 31.16,
"learning_rate": 3.4421948776649024e-05,
"loss": 0.2686,
"step": 435500
},
{
"epoch": 31.19,
"learning_rate": 3.4404063528401775e-05,
"loss": 0.2724,
"step": 436000
},
{
"epoch": 31.23,
"learning_rate": 3.438617828015453e-05,
"loss": 0.2699,
"step": 436500
},
{
"epoch": 31.26,
"learning_rate": 3.4368293031907286e-05,
"loss": 0.2718,
"step": 437000
},
{
"epoch": 31.3,
"learning_rate": 3.435040778366004e-05,
"loss": 0.2763,
"step": 437500
},
{
"epoch": 31.33,
"learning_rate": 3.433252253541279e-05,
"loss": 0.2759,
"step": 438000
},
{
"epoch": 31.37,
"learning_rate": 3.431463728716555e-05,
"loss": 0.2789,
"step": 438500
},
{
"epoch": 31.41,
"learning_rate": 3.42967520389183e-05,
"loss": 0.2791,
"step": 439000
},
{
"epoch": 31.44,
"learning_rate": 3.427886679067105e-05,
"loss": 0.2783,
"step": 439500
},
{
"epoch": 31.48,
"learning_rate": 3.426098154242381e-05,
"loss": 0.2806,
"step": 440000
},
{
"epoch": 31.51,
"learning_rate": 3.424309629417656e-05,
"loss": 0.2834,
"step": 440500
},
{
"epoch": 31.55,
"learning_rate": 3.422521104592932e-05,
"loss": 0.2781,
"step": 441000
},
{
"epoch": 31.59,
"learning_rate": 3.420732579768207e-05,
"loss": 0.2836,
"step": 441500
},
{
"epoch": 31.62,
"learning_rate": 3.4189440549434824e-05,
"loss": 0.2841,
"step": 442000
},
{
"epoch": 31.66,
"learning_rate": 3.417155530118758e-05,
"loss": 0.2825,
"step": 442500
},
{
"epoch": 31.69,
"learning_rate": 3.4153670052940334e-05,
"loss": 0.286,
"step": 443000
},
{
"epoch": 31.73,
"learning_rate": 3.4135784804693086e-05,
"loss": 0.2892,
"step": 443500
},
{
"epoch": 31.76,
"learning_rate": 3.4117899556445845e-05,
"loss": 0.286,
"step": 444000
},
{
"epoch": 31.8,
"learning_rate": 3.4100014308198596e-05,
"loss": 0.2922,
"step": 444500
},
{
"epoch": 31.84,
"learning_rate": 3.4082129059951355e-05,
"loss": 0.2836,
"step": 445000
},
{
"epoch": 31.87,
"learning_rate": 3.4064243811704114e-05,
"loss": 0.2906,
"step": 445500
},
{
"epoch": 31.91,
"learning_rate": 3.4046358563456865e-05,
"loss": 0.2913,
"step": 446000
},
{
"epoch": 31.94,
"learning_rate": 3.402847331520962e-05,
"loss": 0.2911,
"step": 446500
},
{
"epoch": 31.98,
"learning_rate": 3.4010588066962376e-05,
"loss": 0.2941,
"step": 447000
},
{
"epoch": 32.01,
"learning_rate": 3.399270281871513e-05,
"loss": 0.2761,
"step": 447500
},
{
"epoch": 32.05,
"learning_rate": 3.397481757046788e-05,
"loss": 0.2583,
"step": 448000
},
{
"epoch": 32.09,
"learning_rate": 3.395693232222064e-05,
"loss": 0.2592,
"step": 448500
},
{
"epoch": 32.12,
"learning_rate": 3.393904707397339e-05,
"loss": 0.2603,
"step": 449000
},
{
"epoch": 32.16,
"learning_rate": 3.392116182572614e-05,
"loss": 0.2613,
"step": 449500
},
{
"epoch": 32.19,
"learning_rate": 3.39032765774789e-05,
"loss": 0.2671,
"step": 450000
},
{
"epoch": 32.23,
"learning_rate": 3.388539132923165e-05,
"loss": 0.2727,
"step": 450500
},
{
"epoch": 32.26,
"learning_rate": 3.3867506080984404e-05,
"loss": 0.2692,
"step": 451000
},
{
"epoch": 32.3,
"learning_rate": 3.384962083273716e-05,
"loss": 0.2671,
"step": 451500
},
{
"epoch": 32.34,
"learning_rate": 3.3831735584489914e-05,
"loss": 0.276,
"step": 452000
},
{
"epoch": 32.37,
"learning_rate": 3.381385033624267e-05,
"loss": 0.2731,
"step": 452500
},
{
"epoch": 32.41,
"learning_rate": 3.3795965087995424e-05,
"loss": 0.2734,
"step": 453000
},
{
"epoch": 32.44,
"learning_rate": 3.3778079839748176e-05,
"loss": 0.2755,
"step": 453500
},
{
"epoch": 32.48,
"learning_rate": 3.3760194591500935e-05,
"loss": 0.2786,
"step": 454000
},
{
"epoch": 32.52,
"learning_rate": 3.3742309343253687e-05,
"loss": 0.28,
"step": 454500
},
{
"epoch": 32.55,
"learning_rate": 3.372442409500644e-05,
"loss": 0.2768,
"step": 455000
},
{
"epoch": 32.59,
"learning_rate": 3.37065388467592e-05,
"loss": 0.2833,
"step": 455500
},
{
"epoch": 32.62,
"learning_rate": 3.368865359851195e-05,
"loss": 0.2782,
"step": 456000
},
{
"epoch": 32.66,
"learning_rate": 3.36707683502647e-05,
"loss": 0.2823,
"step": 456500
},
{
"epoch": 32.69,
"learning_rate": 3.365288310201746e-05,
"loss": 0.2795,
"step": 457000
},
{
"epoch": 32.73,
"learning_rate": 3.363499785377021e-05,
"loss": 0.2782,
"step": 457500
},
{
"epoch": 32.77,
"learning_rate": 3.361711260552296e-05,
"loss": 0.285,
"step": 458000
},
{
"epoch": 32.8,
"learning_rate": 3.359922735727572e-05,
"loss": 0.2854,
"step": 458500
},
{
"epoch": 32.84,
"learning_rate": 3.358134210902847e-05,
"loss": 0.2825,
"step": 459000
},
{
"epoch": 32.87,
"learning_rate": 3.356345686078123e-05,
"loss": 0.2834,
"step": 459500
},
{
"epoch": 32.91,
"learning_rate": 3.354557161253398e-05,
"loss": 0.2848,
"step": 460000
},
{
"epoch": 32.94,
"learning_rate": 3.3527686364286735e-05,
"loss": 0.2843,
"step": 460500
},
{
"epoch": 32.98,
"learning_rate": 3.3509801116039494e-05,
"loss": 0.2864,
"step": 461000
},
{
"epoch": 33.02,
"learning_rate": 3.3491915867792246e-05,
"loss": 0.2755,
"step": 461500
},
{
"epoch": 33.05,
"learning_rate": 3.3474030619545e-05,
"loss": 0.2541,
"step": 462000
},
{
"epoch": 33.09,
"learning_rate": 3.3456145371297756e-05,
"loss": 0.2543,
"step": 462500
},
{
"epoch": 33.12,
"learning_rate": 3.343826012305051e-05,
"loss": 0.2596,
"step": 463000
},
{
"epoch": 33.16,
"learning_rate": 3.342037487480326e-05,
"loss": 0.2639,
"step": 463500
},
{
"epoch": 33.2,
"learning_rate": 3.340248962655602e-05,
"loss": 0.2669,
"step": 464000
},
{
"epoch": 33.23,
"learning_rate": 3.338460437830877e-05,
"loss": 0.2641,
"step": 464500
},
{
"epoch": 33.27,
"learning_rate": 3.336671913006152e-05,
"loss": 0.2689,
"step": 465000
},
{
"epoch": 33.3,
"learning_rate": 3.334883388181428e-05,
"loss": 0.262,
"step": 465500
},
{
"epoch": 33.34,
"learning_rate": 3.333094863356703e-05,
"loss": 0.2656,
"step": 466000
},
{
"epoch": 33.37,
"learning_rate": 3.3313063385319784e-05,
"loss": 0.2665,
"step": 466500
},
{
"epoch": 33.41,
"learning_rate": 3.329517813707255e-05,
"loss": 0.2705,
"step": 467000
},
{
"epoch": 33.45,
"learning_rate": 3.32772928888253e-05,
"loss": 0.2744,
"step": 467500
},
{
"epoch": 33.48,
"learning_rate": 3.325940764057805e-05,
"loss": 0.266,
"step": 468000
},
{
"epoch": 33.52,
"learning_rate": 3.324152239233081e-05,
"loss": 0.2723,
"step": 468500
},
{
"epoch": 33.55,
"learning_rate": 3.322363714408356e-05,
"loss": 0.275,
"step": 469000
},
{
"epoch": 33.59,
"learning_rate": 3.3205751895836315e-05,
"loss": 0.2751,
"step": 469500
},
{
"epoch": 33.62,
"learning_rate": 3.3187866647589073e-05,
"loss": 0.2801,
"step": 470000
},
{
"epoch": 33.66,
"learning_rate": 3.3169981399341825e-05,
"loss": 0.2795,
"step": 470500
},
{
"epoch": 33.7,
"learning_rate": 3.3152096151094584e-05,
"loss": 0.2764,
"step": 471000
},
{
"epoch": 33.73,
"learning_rate": 3.3134210902847336e-05,
"loss": 0.2799,
"step": 471500
},
{
"epoch": 33.77,
"learning_rate": 3.311632565460009e-05,
"loss": 0.2812,
"step": 472000
},
{
"epoch": 33.8,
"learning_rate": 3.3098440406352846e-05,
"loss": 0.2851,
"step": 472500
},
{
"epoch": 33.84,
"learning_rate": 3.30805551581056e-05,
"loss": 0.2855,
"step": 473000
},
{
"epoch": 33.87,
"learning_rate": 3.306266990985835e-05,
"loss": 0.2858,
"step": 473500
},
{
"epoch": 33.91,
"learning_rate": 3.304478466161111e-05,
"loss": 0.2815,
"step": 474000
},
{
"epoch": 33.95,
"learning_rate": 3.302689941336386e-05,
"loss": 0.2766,
"step": 474500
},
{
"epoch": 33.98,
"learning_rate": 3.300901416511661e-05,
"loss": 0.2831,
"step": 475000
},
{
"epoch": 34.02,
"learning_rate": 3.299112891686937e-05,
"loss": 0.2673,
"step": 475500
},
{
"epoch": 34.05,
"learning_rate": 3.297324366862212e-05,
"loss": 0.2541,
"step": 476000
},
{
"epoch": 34.09,
"learning_rate": 3.2955358420374874e-05,
"loss": 0.2556,
"step": 476500
},
{
"epoch": 34.13,
"learning_rate": 3.293747317212763e-05,
"loss": 0.2588,
"step": 477000
},
{
"epoch": 34.16,
"learning_rate": 3.2919587923880384e-05,
"loss": 0.2612,
"step": 477500
},
{
"epoch": 34.2,
"learning_rate": 3.2901702675633136e-05,
"loss": 0.262,
"step": 478000
},
{
"epoch": 34.23,
"learning_rate": 3.2883817427385895e-05,
"loss": 0.2604,
"step": 478500
},
{
"epoch": 34.27,
"learning_rate": 3.2865932179138646e-05,
"loss": 0.2644,
"step": 479000
},
{
"epoch": 34.3,
"learning_rate": 3.2848046930891405e-05,
"loss": 0.2628,
"step": 479500
},
{
"epoch": 34.34,
"learning_rate": 3.283016168264416e-05,
"loss": 0.2678,
"step": 480000
},
{
"epoch": 34.38,
"learning_rate": 3.281227643439691e-05,
"loss": 0.2623,
"step": 480500
},
{
"epoch": 34.41,
"learning_rate": 3.279439118614967e-05,
"loss": 0.2665,
"step": 481000
},
{
"epoch": 34.45,
"learning_rate": 3.277650593790242e-05,
"loss": 0.2633,
"step": 481500
},
{
"epoch": 34.48,
"learning_rate": 3.275862068965517e-05,
"loss": 0.2692,
"step": 482000
},
{
"epoch": 34.52,
"learning_rate": 3.274073544140793e-05,
"loss": 0.2678,
"step": 482500
},
{
"epoch": 34.55,
"learning_rate": 3.272285019316068e-05,
"loss": 0.2725,
"step": 483000
},
{
"epoch": 34.59,
"learning_rate": 3.270496494491343e-05,
"loss": 0.2722,
"step": 483500
},
{
"epoch": 34.63,
"learning_rate": 3.268707969666619e-05,
"loss": 0.2747,
"step": 484000
},
{
"epoch": 34.66,
"learning_rate": 3.266919444841894e-05,
"loss": 0.277,
"step": 484500
},
{
"epoch": 34.7,
"learning_rate": 3.2651309200171695e-05,
"loss": 0.2744,
"step": 485000
},
{
"epoch": 34.73,
"learning_rate": 3.2633423951924454e-05,
"loss": 0.273,
"step": 485500
},
{
"epoch": 34.77,
"learning_rate": 3.2615538703677205e-05,
"loss": 0.2784,
"step": 486000
},
{
"epoch": 34.8,
"learning_rate": 3.2597653455429964e-05,
"loss": 0.2808,
"step": 486500
},
{
"epoch": 34.84,
"learning_rate": 3.2579768207182716e-05,
"loss": 0.274,
"step": 487000
},
{
"epoch": 34.88,
"learning_rate": 3.256188295893547e-05,
"loss": 0.2794,
"step": 487500
},
{
"epoch": 34.91,
"learning_rate": 3.2543997710688226e-05,
"loss": 0.2818,
"step": 488000
},
{
"epoch": 34.95,
"learning_rate": 3.2526112462440985e-05,
"loss": 0.2789,
"step": 488500
},
{
"epoch": 34.98,
"learning_rate": 3.2508227214193736e-05,
"loss": 0.2775,
"step": 489000
},
{
"epoch": 35.02,
"learning_rate": 3.249034196594649e-05,
"loss": 0.2633,
"step": 489500
},
{
"epoch": 35.06,
"learning_rate": 3.247245671769925e-05,
"loss": 0.2477,
"step": 490000
},
{
"epoch": 35.09,
"learning_rate": 3.2454571469452e-05,
"loss": 0.25,
"step": 490500
},
{
"epoch": 35.13,
"learning_rate": 3.243668622120476e-05,
"loss": 0.2536,
"step": 491000
},
{
"epoch": 35.16,
"learning_rate": 3.241880097295751e-05,
"loss": 0.254,
"step": 491500
},
{
"epoch": 35.2,
"learning_rate": 3.240091572471026e-05,
"loss": 0.2561,
"step": 492000
},
{
"epoch": 35.23,
"learning_rate": 3.238303047646302e-05,
"loss": 0.2616,
"step": 492500
},
{
"epoch": 35.27,
"learning_rate": 3.236514522821577e-05,
"loss": 0.2619,
"step": 493000
},
{
"epoch": 35.31,
"learning_rate": 3.234725997996852e-05,
"loss": 0.2596,
"step": 493500
},
{
"epoch": 35.34,
"learning_rate": 3.232937473172128e-05,
"loss": 0.2612,
"step": 494000
},
{
"epoch": 35.38,
"learning_rate": 3.231148948347403e-05,
"loss": 0.2629,
"step": 494500
},
{
"epoch": 35.41,
"learning_rate": 3.2293604235226785e-05,
"loss": 0.2649,
"step": 495000
},
{
"epoch": 35.45,
"learning_rate": 3.2275718986979544e-05,
"loss": 0.2672,
"step": 495500
},
{
"epoch": 35.48,
"learning_rate": 3.2257833738732295e-05,
"loss": 0.2701,
"step": 496000
},
{
"epoch": 35.52,
"learning_rate": 3.223994849048505e-05,
"loss": 0.268,
"step": 496500
},
{
"epoch": 35.56,
"learning_rate": 3.2222063242237806e-05,
"loss": 0.2701,
"step": 497000
},
{
"epoch": 35.59,
"learning_rate": 3.220417799399056e-05,
"loss": 0.2734,
"step": 497500
},
{
"epoch": 35.63,
"learning_rate": 3.218629274574331e-05,
"loss": 0.2714,
"step": 498000
},
{
"epoch": 35.66,
"learning_rate": 3.216840749749607e-05,
"loss": 0.2679,
"step": 498500
},
{
"epoch": 35.7,
"learning_rate": 3.215052224924882e-05,
"loss": 0.2712,
"step": 499000
},
{
"epoch": 35.73,
"learning_rate": 3.213263700100158e-05,
"loss": 0.2728,
"step": 499500
},
{
"epoch": 35.77,
"learning_rate": 3.211475175275433e-05,
"loss": 0.2749,
"step": 500000
},
{
"epoch": 35.81,
"learning_rate": 3.209686650450708e-05,
"loss": 0.2746,
"step": 500500
},
{
"epoch": 35.84,
"learning_rate": 3.207898125625984e-05,
"loss": 0.2742,
"step": 501000
},
{
"epoch": 35.88,
"learning_rate": 3.206109600801259e-05,
"loss": 0.2765,
"step": 501500
},
{
"epoch": 35.91,
"learning_rate": 3.2043210759765344e-05,
"loss": 0.2739,
"step": 502000
},
{
"epoch": 35.95,
"learning_rate": 3.20253255115181e-05,
"loss": 0.2808,
"step": 502500
},
{
"epoch": 35.99,
"learning_rate": 3.2007440263270854e-05,
"loss": 0.2783,
"step": 503000
},
{
"epoch": 36.02,
"learning_rate": 3.1989555015023606e-05,
"loss": 0.2615,
"step": 503500
},
{
"epoch": 36.06,
"learning_rate": 3.1971669766776365e-05,
"loss": 0.25,
"step": 504000
},
{
"epoch": 36.09,
"learning_rate": 3.195378451852912e-05,
"loss": 0.2464,
"step": 504500
},
{
"epoch": 36.13,
"learning_rate": 3.193589927028187e-05,
"loss": 0.2554,
"step": 505000
},
{
"epoch": 36.16,
"learning_rate": 3.191801402203463e-05,
"loss": 0.2573,
"step": 505500
},
{
"epoch": 36.2,
"learning_rate": 3.190012877378738e-05,
"loss": 0.258,
"step": 506000
},
{
"epoch": 36.24,
"learning_rate": 3.188224352554014e-05,
"loss": 0.2633,
"step": 506500
},
{
"epoch": 36.27,
"learning_rate": 3.186435827729289e-05,
"loss": 0.262,
"step": 507000
},
{
"epoch": 36.31,
"learning_rate": 3.184647302904564e-05,
"loss": 0.2673,
"step": 507500
},
{
"epoch": 36.34,
"learning_rate": 3.18285877807984e-05,
"loss": 0.2617,
"step": 508000
},
{
"epoch": 36.38,
"learning_rate": 3.181070253255115e-05,
"loss": 0.2615,
"step": 508500
},
{
"epoch": 36.41,
"learning_rate": 3.17928172843039e-05,
"loss": 0.2644,
"step": 509000
},
{
"epoch": 36.45,
"learning_rate": 3.177493203605666e-05,
"loss": 0.2632,
"step": 509500
},
{
"epoch": 36.49,
"learning_rate": 3.1757046787809413e-05,
"loss": 0.2633,
"step": 510000
},
{
"epoch": 36.52,
"learning_rate": 3.173916153956217e-05,
"loss": 0.2657,
"step": 510500
},
{
"epoch": 36.56,
"learning_rate": 3.172127629131493e-05,
"loss": 0.2652,
"step": 511000
},
{
"epoch": 36.59,
"learning_rate": 3.170339104306768e-05,
"loss": 0.2676,
"step": 511500
},
{
"epoch": 36.63,
"learning_rate": 3.1685505794820434e-05,
"loss": 0.2654,
"step": 512000
},
{
"epoch": 36.66,
"learning_rate": 3.166762054657319e-05,
"loss": 0.2714,
"step": 512500
},
{
"epoch": 36.7,
"learning_rate": 3.1649735298325945e-05,
"loss": 0.2712,
"step": 513000
},
{
"epoch": 36.74,
"learning_rate": 3.1631850050078696e-05,
"loss": 0.2687,
"step": 513500
},
{
"epoch": 36.77,
"learning_rate": 3.1613964801831455e-05,
"loss": 0.2659,
"step": 514000
},
{
"epoch": 36.81,
"learning_rate": 3.159607955358421e-05,
"loss": 0.2666,
"step": 514500
},
{
"epoch": 36.84,
"learning_rate": 3.157819430533696e-05,
"loss": 0.2734,
"step": 515000
},
{
"epoch": 36.88,
"learning_rate": 3.156030905708972e-05,
"loss": 0.2696,
"step": 515500
},
{
"epoch": 36.92,
"learning_rate": 3.154242380884247e-05,
"loss": 0.2714,
"step": 516000
},
{
"epoch": 36.95,
"learning_rate": 3.152453856059522e-05,
"loss": 0.2701,
"step": 516500
},
{
"epoch": 36.99,
"learning_rate": 3.150665331234798e-05,
"loss": 0.2733,
"step": 517000
},
{
"epoch": 37.02,
"learning_rate": 3.148876806410073e-05,
"loss": 0.2569,
"step": 517500
},
{
"epoch": 37.06,
"learning_rate": 3.147088281585349e-05,
"loss": 0.2466,
"step": 518000
},
{
"epoch": 37.09,
"learning_rate": 3.145299756760624e-05,
"loss": 0.2461,
"step": 518500
},
{
"epoch": 37.13,
"learning_rate": 3.143511231935899e-05,
"loss": 0.2507,
"step": 519000
},
{
"epoch": 37.17,
"learning_rate": 3.141722707111175e-05,
"loss": 0.2533,
"step": 519500
},
{
"epoch": 37.2,
"learning_rate": 3.1399341822864504e-05,
"loss": 0.256,
"step": 520000
},
{
"epoch": 37.24,
"learning_rate": 3.1381456574617255e-05,
"loss": 0.2479,
"step": 520500
},
{
"epoch": 37.27,
"learning_rate": 3.1363571326370014e-05,
"loss": 0.2547,
"step": 521000
},
{
"epoch": 37.31,
"learning_rate": 3.1345686078122766e-05,
"loss": 0.2567,
"step": 521500
},
{
"epoch": 37.34,
"learning_rate": 3.132780082987552e-05,
"loss": 0.2581,
"step": 522000
},
{
"epoch": 37.38,
"learning_rate": 3.1309915581628276e-05,
"loss": 0.2609,
"step": 522500
},
{
"epoch": 37.42,
"learning_rate": 3.129203033338103e-05,
"loss": 0.2594,
"step": 523000
},
{
"epoch": 37.45,
"learning_rate": 3.127414508513378e-05,
"loss": 0.2607,
"step": 523500
},
{
"epoch": 37.49,
"learning_rate": 3.125625983688654e-05,
"loss": 0.2675,
"step": 524000
},
{
"epoch": 37.52,
"learning_rate": 3.123837458863929e-05,
"loss": 0.2652,
"step": 524500
},
{
"epoch": 37.56,
"learning_rate": 3.122048934039204e-05,
"loss": 0.2616,
"step": 525000
},
{
"epoch": 37.59,
"learning_rate": 3.12026040921448e-05,
"loss": 0.2635,
"step": 525500
},
{
"epoch": 37.63,
"learning_rate": 3.118471884389755e-05,
"loss": 0.2699,
"step": 526000
},
{
"epoch": 37.67,
"learning_rate": 3.116683359565031e-05,
"loss": 0.2641,
"step": 526500
},
{
"epoch": 37.7,
"learning_rate": 3.114894834740306e-05,
"loss": 0.2685,
"step": 527000
},
{
"epoch": 37.74,
"learning_rate": 3.1131063099155814e-05,
"loss": 0.2675,
"step": 527500
},
{
"epoch": 37.77,
"learning_rate": 3.111317785090857e-05,
"loss": 0.2666,
"step": 528000
},
{
"epoch": 37.81,
"learning_rate": 3.1095292602661325e-05,
"loss": 0.2665,
"step": 528500
},
{
"epoch": 37.85,
"learning_rate": 3.1077407354414076e-05,
"loss": 0.2671,
"step": 529000
},
{
"epoch": 37.88,
"learning_rate": 3.1059522106166835e-05,
"loss": 0.2667,
"step": 529500
},
{
"epoch": 37.92,
"learning_rate": 3.104163685791959e-05,
"loss": 0.2695,
"step": 530000
},
{
"epoch": 37.95,
"learning_rate": 3.102375160967234e-05,
"loss": 0.2651,
"step": 530500
},
{
"epoch": 37.99,
"learning_rate": 3.10058663614251e-05,
"loss": 0.2732,
"step": 531000
},
{
"epoch": 38.02,
"learning_rate": 3.098798111317785e-05,
"loss": 0.2519,
"step": 531500
},
{
"epoch": 38.06,
"learning_rate": 3.097009586493061e-05,
"loss": 0.238,
"step": 532000
},
{
"epoch": 38.1,
"learning_rate": 3.0952210616683366e-05,
"loss": 0.2483,
"step": 532500
},
{
"epoch": 38.13,
"learning_rate": 3.093432536843612e-05,
"loss": 0.2463,
"step": 533000
},
{
"epoch": 38.17,
"learning_rate": 3.091644012018887e-05,
"loss": 0.2484,
"step": 533500
},
{
"epoch": 38.2,
"learning_rate": 3.089855487194163e-05,
"loss": 0.2522,
"step": 534000
},
{
"epoch": 38.24,
"learning_rate": 3.088066962369438e-05,
"loss": 0.2464,
"step": 534500
},
{
"epoch": 38.27,
"learning_rate": 3.086278437544713e-05,
"loss": 0.2515,
"step": 535000
},
{
"epoch": 38.31,
"learning_rate": 3.084489912719989e-05,
"loss": 0.261,
"step": 535500
},
{
"epoch": 38.35,
"learning_rate": 3.082701387895264e-05,
"loss": 0.255,
"step": 536000
},
{
"epoch": 38.38,
"learning_rate": 3.0809128630705394e-05,
"loss": 0.2597,
"step": 536500
},
{
"epoch": 38.42,
"learning_rate": 3.079124338245815e-05,
"loss": 0.2584,
"step": 537000
},
{
"epoch": 38.45,
"learning_rate": 3.0773358134210904e-05,
"loss": 0.2596,
"step": 537500
},
{
"epoch": 38.49,
"learning_rate": 3.075547288596366e-05,
"loss": 0.2641,
"step": 538000
},
{
"epoch": 38.52,
"learning_rate": 3.0737587637716415e-05,
"loss": 0.265,
"step": 538500
},
{
"epoch": 38.56,
"learning_rate": 3.0719702389469167e-05,
"loss": 0.2587,
"step": 539000
},
{
"epoch": 38.6,
"learning_rate": 3.0701817141221925e-05,
"loss": 0.2596,
"step": 539500
},
{
"epoch": 38.63,
"learning_rate": 3.068393189297468e-05,
"loss": 0.2658,
"step": 540000
},
{
"epoch": 38.67,
"learning_rate": 3.066604664472743e-05,
"loss": 0.2638,
"step": 540500
},
{
"epoch": 38.7,
"learning_rate": 3.064816139648019e-05,
"loss": 0.2663,
"step": 541000
},
{
"epoch": 38.74,
"learning_rate": 3.063027614823294e-05,
"loss": 0.2677,
"step": 541500
},
{
"epoch": 38.78,
"learning_rate": 3.061239089998569e-05,
"loss": 0.2613,
"step": 542000
},
{
"epoch": 38.81,
"learning_rate": 3.059450565173845e-05,
"loss": 0.2654,
"step": 542500
},
{
"epoch": 38.85,
"learning_rate": 3.05766204034912e-05,
"loss": 0.2592,
"step": 543000
},
{
"epoch": 38.88,
"learning_rate": 3.055873515524395e-05,
"loss": 0.264,
"step": 543500
},
{
"epoch": 38.92,
"learning_rate": 3.054084990699671e-05,
"loss": 0.263,
"step": 544000
},
{
"epoch": 38.95,
"learning_rate": 3.052296465874946e-05,
"loss": 0.2662,
"step": 544500
},
{
"epoch": 38.99,
"learning_rate": 3.050507941050222e-05,
"loss": 0.2636,
"step": 545000
},
{
"epoch": 39.03,
"learning_rate": 3.0487194162254974e-05,
"loss": 0.2439,
"step": 545500
},
{
"epoch": 39.06,
"learning_rate": 3.0469308914007726e-05,
"loss": 0.2415,
"step": 546000
},
{
"epoch": 39.1,
"learning_rate": 3.045142366576048e-05,
"loss": 0.2504,
"step": 546500
},
{
"epoch": 39.13,
"learning_rate": 3.0433538417513236e-05,
"loss": 0.2458,
"step": 547000
},
{
"epoch": 39.17,
"learning_rate": 3.041565316926599e-05,
"loss": 0.251,
"step": 547500
},
{
"epoch": 39.2,
"learning_rate": 3.0397767921018743e-05,
"loss": 0.248,
"step": 548000
},
{
"epoch": 39.24,
"learning_rate": 3.0379882672771498e-05,
"loss": 0.2464,
"step": 548500
},
{
"epoch": 39.28,
"learning_rate": 3.0361997424524253e-05,
"loss": 0.2522,
"step": 549000
},
{
"epoch": 39.31,
"learning_rate": 3.0344112176277005e-05,
"loss": 0.2542,
"step": 549500
},
{
"epoch": 39.35,
"learning_rate": 3.032622692802976e-05,
"loss": 0.2563,
"step": 550000
},
{
"epoch": 39.38,
"learning_rate": 3.0308341679782515e-05,
"loss": 0.2619,
"step": 550500
},
{
"epoch": 39.42,
"learning_rate": 3.029045643153527e-05,
"loss": 0.26,
"step": 551000
},
{
"epoch": 39.45,
"learning_rate": 3.0272571183288022e-05,
"loss": 0.2595,
"step": 551500
},
{
"epoch": 39.49,
"learning_rate": 3.0254685935040778e-05,
"loss": 0.2562,
"step": 552000
},
{
"epoch": 39.53,
"learning_rate": 3.0236800686793533e-05,
"loss": 0.2542,
"step": 552500
},
{
"epoch": 39.56,
"learning_rate": 3.0218915438546285e-05,
"loss": 0.2604,
"step": 553000
},
{
"epoch": 39.6,
"learning_rate": 3.0201030190299046e-05,
"loss": 0.2613,
"step": 553500
},
{
"epoch": 39.63,
"learning_rate": 3.0183144942051798e-05,
"loss": 0.2621,
"step": 554000
},
{
"epoch": 39.67,
"learning_rate": 3.0165259693804553e-05,
"loss": 0.2626,
"step": 554500
},
{
"epoch": 39.71,
"learning_rate": 3.014737444555731e-05,
"loss": 0.2608,
"step": 555000
},
{
"epoch": 39.74,
"learning_rate": 3.0129489197310064e-05,
"loss": 0.2594,
"step": 555500
},
{
"epoch": 39.78,
"learning_rate": 3.0111603949062816e-05,
"loss": 0.2638,
"step": 556000
},
{
"epoch": 39.81,
"learning_rate": 3.009371870081557e-05,
"loss": 0.2637,
"step": 556500
},
{
"epoch": 39.85,
"learning_rate": 3.0075833452568326e-05,
"loss": 0.262,
"step": 557000
},
{
"epoch": 39.88,
"learning_rate": 3.0057948204321078e-05,
"loss": 0.26,
"step": 557500
},
{
"epoch": 39.92,
"learning_rate": 3.0040062956073833e-05,
"loss": 0.2621,
"step": 558000
},
{
"epoch": 39.96,
"learning_rate": 3.0022177707826588e-05,
"loss": 0.2602,
"step": 558500
},
{
"epoch": 39.99,
"learning_rate": 3.0004292459579343e-05,
"loss": 0.2648,
"step": 559000
},
{
"epoch": 40.03,
"learning_rate": 2.9986407211332095e-05,
"loss": 0.2462,
"step": 559500
},
{
"epoch": 40.06,
"learning_rate": 2.996852196308485e-05,
"loss": 0.239,
"step": 560000
},
{
"epoch": 40.1,
"learning_rate": 2.9950636714837605e-05,
"loss": 0.2381,
"step": 560500
},
{
"epoch": 40.13,
"learning_rate": 2.9932751466590357e-05,
"loss": 0.2468,
"step": 561000
},
{
"epoch": 40.17,
"learning_rate": 2.9914866218343112e-05,
"loss": 0.2426,
"step": 561500
},
{
"epoch": 40.21,
"learning_rate": 2.9896980970095868e-05,
"loss": 0.2468,
"step": 562000
},
{
"epoch": 40.24,
"learning_rate": 2.9879095721848623e-05,
"loss": 0.2513,
"step": 562500
},
{
"epoch": 40.28,
"learning_rate": 2.9861210473601375e-05,
"loss": 0.2499,
"step": 563000
},
{
"epoch": 40.31,
"learning_rate": 2.984332522535413e-05,
"loss": 0.2448,
"step": 563500
},
{
"epoch": 40.35,
"learning_rate": 2.9825439977106885e-05,
"loss": 0.2501,
"step": 564000
},
{
"epoch": 40.38,
"learning_rate": 2.9807554728859637e-05,
"loss": 0.2494,
"step": 564500
},
{
"epoch": 40.42,
"learning_rate": 2.9789669480612392e-05,
"loss": 0.2553,
"step": 565000
},
{
"epoch": 40.46,
"learning_rate": 2.9771784232365147e-05,
"loss": 0.2581,
"step": 565500
},
{
"epoch": 40.49,
"learning_rate": 2.97538989841179e-05,
"loss": 0.251,
"step": 566000
},
{
"epoch": 40.53,
"learning_rate": 2.9736013735870654e-05,
"loss": 0.2581,
"step": 566500
},
{
"epoch": 40.56,
"learning_rate": 2.971812848762341e-05,
"loss": 0.2608,
"step": 567000
},
{
"epoch": 40.6,
"learning_rate": 2.9700243239376164e-05,
"loss": 0.258,
"step": 567500
},
{
"epoch": 40.64,
"learning_rate": 2.9682357991128916e-05,
"loss": 0.2545,
"step": 568000
},
{
"epoch": 40.67,
"learning_rate": 2.966447274288167e-05,
"loss": 0.2585,
"step": 568500
},
{
"epoch": 40.71,
"learning_rate": 2.9646587494634427e-05,
"loss": 0.2617,
"step": 569000
},
{
"epoch": 40.74,
"learning_rate": 2.962870224638718e-05,
"loss": 0.2595,
"step": 569500
},
{
"epoch": 40.78,
"learning_rate": 2.9610816998139934e-05,
"loss": 0.2563,
"step": 570000
},
{
"epoch": 40.81,
"learning_rate": 2.959293174989269e-05,
"loss": 0.2641,
"step": 570500
},
{
"epoch": 40.85,
"learning_rate": 2.9575046501645444e-05,
"loss": 0.2613,
"step": 571000
},
{
"epoch": 40.89,
"learning_rate": 2.9557161253398196e-05,
"loss": 0.2599,
"step": 571500
},
{
"epoch": 40.92,
"learning_rate": 2.953927600515095e-05,
"loss": 0.2633,
"step": 572000
},
{
"epoch": 40.96,
"learning_rate": 2.9521390756903706e-05,
"loss": 0.263,
"step": 572500
},
{
"epoch": 40.99,
"learning_rate": 2.9503505508656458e-05,
"loss": 0.266,
"step": 573000
},
{
"epoch": 41.03,
"learning_rate": 2.9485620260409213e-05,
"loss": 0.2452,
"step": 573500
},
{
"epoch": 41.06,
"learning_rate": 2.9467735012161968e-05,
"loss": 0.2366,
"step": 574000
},
{
"epoch": 41.1,
"learning_rate": 2.9449849763914723e-05,
"loss": 0.241,
"step": 574500
},
{
"epoch": 41.14,
"learning_rate": 2.9431964515667475e-05,
"loss": 0.243,
"step": 575000
},
{
"epoch": 41.17,
"learning_rate": 2.9414079267420237e-05,
"loss": 0.2443,
"step": 575500
},
{
"epoch": 41.21,
"learning_rate": 2.939619401917299e-05,
"loss": 0.2498,
"step": 576000
},
{
"epoch": 41.24,
"learning_rate": 2.9378308770925744e-05,
"loss": 0.2469,
"step": 576500
},
{
"epoch": 41.28,
"learning_rate": 2.93604235226785e-05,
"loss": 0.249,
"step": 577000
},
{
"epoch": 41.31,
"learning_rate": 2.934253827443125e-05,
"loss": 0.2507,
"step": 577500
},
{
"epoch": 41.35,
"learning_rate": 2.9324653026184006e-05,
"loss": 0.247,
"step": 578000
},
{
"epoch": 41.39,
"learning_rate": 2.930676777793676e-05,
"loss": 0.2545,
"step": 578500
},
{
"epoch": 41.42,
"learning_rate": 2.9288882529689517e-05,
"loss": 0.2523,
"step": 579000
},
{
"epoch": 41.46,
"learning_rate": 2.927099728144227e-05,
"loss": 0.253,
"step": 579500
},
{
"epoch": 41.49,
"learning_rate": 2.9253112033195024e-05,
"loss": 0.2498,
"step": 580000
},
{
"epoch": 41.53,
"learning_rate": 2.923522678494778e-05,
"loss": 0.2473,
"step": 580500
},
{
"epoch": 41.57,
"learning_rate": 2.921734153670053e-05,
"loss": 0.2541,
"step": 581000
},
{
"epoch": 41.6,
"learning_rate": 2.9199456288453286e-05,
"loss": 0.2509,
"step": 581500
},
{
"epoch": 41.64,
"learning_rate": 2.918157104020604e-05,
"loss": 0.259,
"step": 582000
},
{
"epoch": 41.67,
"learning_rate": 2.9163685791958796e-05,
"loss": 0.2557,
"step": 582500
},
{
"epoch": 41.71,
"learning_rate": 2.9145800543711548e-05,
"loss": 0.2578,
"step": 583000
},
{
"epoch": 41.74,
"learning_rate": 2.9127915295464303e-05,
"loss": 0.2598,
"step": 583500
},
{
"epoch": 41.78,
"learning_rate": 2.911003004721706e-05,
"loss": 0.2557,
"step": 584000
},
{
"epoch": 41.82,
"learning_rate": 2.909214479896981e-05,
"loss": 0.2593,
"step": 584500
},
{
"epoch": 41.85,
"learning_rate": 2.9074259550722565e-05,
"loss": 0.2664,
"step": 585000
},
{
"epoch": 41.89,
"learning_rate": 2.905637430247532e-05,
"loss": 0.2594,
"step": 585500
},
{
"epoch": 41.92,
"learning_rate": 2.9038489054228076e-05,
"loss": 0.2563,
"step": 586000
},
{
"epoch": 41.96,
"learning_rate": 2.9020603805980827e-05,
"loss": 0.2558,
"step": 586500
},
{
"epoch": 41.99,
"learning_rate": 2.9002718557733583e-05,
"loss": 0.2603,
"step": 587000
},
{
"epoch": 42.03,
"learning_rate": 2.8984833309486338e-05,
"loss": 0.2398,
"step": 587500
},
{
"epoch": 42.07,
"learning_rate": 2.896694806123909e-05,
"loss": 0.2339,
"step": 588000
},
{
"epoch": 42.1,
"learning_rate": 2.8949062812991845e-05,
"loss": 0.2406,
"step": 588500
},
{
"epoch": 42.14,
"learning_rate": 2.89311775647446e-05,
"loss": 0.24,
"step": 589000
},
{
"epoch": 42.17,
"learning_rate": 2.8913292316497355e-05,
"loss": 0.2433,
"step": 589500
},
{
"epoch": 42.21,
"learning_rate": 2.8895407068250107e-05,
"loss": 0.2432,
"step": 590000
},
{
"epoch": 42.24,
"learning_rate": 2.8877521820002862e-05,
"loss": 0.2432,
"step": 590500
},
{
"epoch": 42.28,
"learning_rate": 2.8859636571755617e-05,
"loss": 0.2468,
"step": 591000
},
{
"epoch": 42.32,
"learning_rate": 2.884175132350837e-05,
"loss": 0.2476,
"step": 591500
},
{
"epoch": 42.35,
"learning_rate": 2.8823866075261124e-05,
"loss": 0.2459,
"step": 592000
},
{
"epoch": 42.39,
"learning_rate": 2.880598082701388e-05,
"loss": 0.2457,
"step": 592500
},
{
"epoch": 42.42,
"learning_rate": 2.878809557876663e-05,
"loss": 0.2484,
"step": 593000
},
{
"epoch": 42.46,
"learning_rate": 2.8770210330519386e-05,
"loss": 0.2493,
"step": 593500
},
{
"epoch": 42.5,
"learning_rate": 2.875232508227214e-05,
"loss": 0.2507,
"step": 594000
},
{
"epoch": 42.53,
"learning_rate": 2.8734439834024897e-05,
"loss": 0.2537,
"step": 594500
},
{
"epoch": 42.57,
"learning_rate": 2.871655458577765e-05,
"loss": 0.2551,
"step": 595000
},
{
"epoch": 42.6,
"learning_rate": 2.8698669337530404e-05,
"loss": 0.2578,
"step": 595500
},
{
"epoch": 42.64,
"learning_rate": 2.868078408928316e-05,
"loss": 0.2527,
"step": 596000
},
{
"epoch": 42.67,
"learning_rate": 2.866289884103591e-05,
"loss": 0.2514,
"step": 596500
},
{
"epoch": 42.71,
"learning_rate": 2.8645013592788673e-05,
"loss": 0.256,
"step": 597000
},
{
"epoch": 42.75,
"learning_rate": 2.8627128344541428e-05,
"loss": 0.2561,
"step": 597500
},
{
"epoch": 42.78,
"learning_rate": 2.860924309629418e-05,
"loss": 0.2579,
"step": 598000
},
{
"epoch": 42.82,
"learning_rate": 2.8591357848046935e-05,
"loss": 0.2582,
"step": 598500
},
{
"epoch": 42.85,
"learning_rate": 2.857347259979969e-05,
"loss": 0.2539,
"step": 599000
},
{
"epoch": 42.89,
"learning_rate": 2.8555587351552442e-05,
"loss": 0.2577,
"step": 599500
},
{
"epoch": 42.92,
"learning_rate": 2.8537702103305197e-05,
"loss": 0.2621,
"step": 600000
},
{
"epoch": 42.96,
"learning_rate": 2.8519816855057952e-05,
"loss": 0.2553,
"step": 600500
},
{
"epoch": 43.0,
"learning_rate": 2.8501931606810704e-05,
"loss": 0.2562,
"step": 601000
},
{
"epoch": 43.03,
"learning_rate": 2.848404635856346e-05,
"loss": 0.2319,
"step": 601500
},
{
"epoch": 43.07,
"learning_rate": 2.8466161110316214e-05,
"loss": 0.2392,
"step": 602000
},
{
"epoch": 43.1,
"learning_rate": 2.844827586206897e-05,
"loss": 0.241,
"step": 602500
},
{
"epoch": 43.14,
"learning_rate": 2.843039061382172e-05,
"loss": 0.2375,
"step": 603000
},
{
"epoch": 43.17,
"learning_rate": 2.8412505365574477e-05,
"loss": 0.237,
"step": 603500
},
{
"epoch": 43.21,
"learning_rate": 2.8394620117327232e-05,
"loss": 0.2426,
"step": 604000
},
{
"epoch": 43.25,
"learning_rate": 2.8376734869079984e-05,
"loss": 0.2474,
"step": 604500
},
{
"epoch": 43.28,
"learning_rate": 2.835884962083274e-05,
"loss": 0.2445,
"step": 605000
},
{
"epoch": 43.32,
"learning_rate": 2.8340964372585494e-05,
"loss": 0.2448,
"step": 605500
},
{
"epoch": 43.35,
"learning_rate": 2.832307912433825e-05,
"loss": 0.2425,
"step": 606000
},
{
"epoch": 43.39,
"learning_rate": 2.8305193876091e-05,
"loss": 0.2461,
"step": 606500
},
{
"epoch": 43.43,
"learning_rate": 2.8287308627843756e-05,
"loss": 0.243,
"step": 607000
},
{
"epoch": 43.46,
"learning_rate": 2.826942337959651e-05,
"loss": 0.2498,
"step": 607500
},
{
"epoch": 43.5,
"learning_rate": 2.8251538131349263e-05,
"loss": 0.2506,
"step": 608000
},
{
"epoch": 43.53,
"learning_rate": 2.8233652883102018e-05,
"loss": 0.2472,
"step": 608500
},
{
"epoch": 43.57,
"learning_rate": 2.8215767634854773e-05,
"loss": 0.2504,
"step": 609000
},
{
"epoch": 43.6,
"learning_rate": 2.819788238660753e-05,
"loss": 0.2483,
"step": 609500
},
{
"epoch": 43.64,
"learning_rate": 2.817999713836028e-05,
"loss": 0.2513,
"step": 610000
},
{
"epoch": 43.68,
"learning_rate": 2.8162111890113036e-05,
"loss": 0.2544,
"step": 610500
},
{
"epoch": 43.71,
"learning_rate": 2.814422664186579e-05,
"loss": 0.2489,
"step": 611000
},
{
"epoch": 43.75,
"learning_rate": 2.8126341393618543e-05,
"loss": 0.2535,
"step": 611500
},
{
"epoch": 43.78,
"learning_rate": 2.8108456145371298e-05,
"loss": 0.2552,
"step": 612000
},
{
"epoch": 43.82,
"learning_rate": 2.8090570897124053e-05,
"loss": 0.2566,
"step": 612500
},
{
"epoch": 43.85,
"learning_rate": 2.8072685648876808e-05,
"loss": 0.2534,
"step": 613000
},
{
"epoch": 43.89,
"learning_rate": 2.805480040062956e-05,
"loss": 0.2544,
"step": 613500
},
{
"epoch": 43.93,
"learning_rate": 2.8036915152382315e-05,
"loss": 0.2547,
"step": 614000
},
{
"epoch": 43.96,
"learning_rate": 2.801902990413507e-05,
"loss": 0.2552,
"step": 614500
},
{
"epoch": 44.0,
"learning_rate": 2.8001144655887822e-05,
"loss": 0.2602,
"step": 615000
},
{
"epoch": 44.03,
"learning_rate": 2.7983259407640577e-05,
"loss": 0.2329,
"step": 615500
},
{
"epoch": 44.07,
"learning_rate": 2.7965374159393332e-05,
"loss": 0.2327,
"step": 616000
},
{
"epoch": 44.11,
"learning_rate": 2.7947488911146084e-05,
"loss": 0.2426,
"step": 616500
},
{
"epoch": 44.14,
"learning_rate": 2.792960366289884e-05,
"loss": 0.2345,
"step": 617000
},
{
"epoch": 44.18,
"learning_rate": 2.7911718414651595e-05,
"loss": 0.2345,
"step": 617500
},
{
"epoch": 44.21,
"learning_rate": 2.789383316640435e-05,
"loss": 0.2434,
"step": 618000
},
{
"epoch": 44.25,
"learning_rate": 2.78759479181571e-05,
"loss": 0.242,
"step": 618500
},
{
"epoch": 44.28,
"learning_rate": 2.7858062669909863e-05,
"loss": 0.2386,
"step": 619000
},
{
"epoch": 44.32,
"learning_rate": 2.7840177421662615e-05,
"loss": 0.2414,
"step": 619500
},
{
"epoch": 44.36,
"learning_rate": 2.782229217341537e-05,
"loss": 0.2461,
"step": 620000
},
{
"epoch": 44.39,
"learning_rate": 2.7804406925168126e-05,
"loss": 0.244,
"step": 620500
},
{
"epoch": 44.43,
"learning_rate": 2.778652167692088e-05,
"loss": 0.2448,
"step": 621000
},
{
"epoch": 44.46,
"learning_rate": 2.7768636428673633e-05,
"loss": 0.2493,
"step": 621500
},
{
"epoch": 44.5,
"learning_rate": 2.7750751180426388e-05,
"loss": 0.2479,
"step": 622000
},
{
"epoch": 44.53,
"learning_rate": 2.7732865932179143e-05,
"loss": 0.2464,
"step": 622500
},
{
"epoch": 44.57,
"learning_rate": 2.7714980683931895e-05,
"loss": 0.247,
"step": 623000
},
{
"epoch": 44.61,
"learning_rate": 2.769709543568465e-05,
"loss": 0.2493,
"step": 623500
},
{
"epoch": 44.64,
"learning_rate": 2.7679210187437405e-05,
"loss": 0.2492,
"step": 624000
},
{
"epoch": 44.68,
"learning_rate": 2.7661324939190157e-05,
"loss": 0.2452,
"step": 624500
},
{
"epoch": 44.71,
"learning_rate": 2.7643439690942912e-05,
"loss": 0.2474,
"step": 625000
},
{
"epoch": 44.75,
"learning_rate": 2.7625554442695667e-05,
"loss": 0.2558,
"step": 625500
},
{
"epoch": 44.78,
"learning_rate": 2.7607669194448422e-05,
"loss": 0.2558,
"step": 626000
},
{
"epoch": 44.82,
"learning_rate": 2.7589783946201174e-05,
"loss": 0.2494,
"step": 626500
},
{
"epoch": 44.86,
"learning_rate": 2.757189869795393e-05,
"loss": 0.2515,
"step": 627000
},
{
"epoch": 44.89,
"learning_rate": 2.7554013449706685e-05,
"loss": 0.2508,
"step": 627500
},
{
"epoch": 44.93,
"learning_rate": 2.7536128201459436e-05,
"loss": 0.2533,
"step": 628000
},
{
"epoch": 44.96,
"learning_rate": 2.751824295321219e-05,
"loss": 0.2569,
"step": 628500
},
{
"epoch": 45.0,
"learning_rate": 2.7500357704964947e-05,
"loss": 0.2562,
"step": 629000
},
{
"epoch": 45.04,
"learning_rate": 2.7482472456717702e-05,
"loss": 0.2325,
"step": 629500
},
{
"epoch": 45.07,
"learning_rate": 2.7464587208470454e-05,
"loss": 0.2268,
"step": 630000
},
{
"epoch": 45.11,
"learning_rate": 2.744670196022321e-05,
"loss": 0.2312,
"step": 630500
},
{
"epoch": 45.14,
"learning_rate": 2.7428816711975964e-05,
"loss": 0.2372,
"step": 631000
},
{
"epoch": 45.18,
"learning_rate": 2.7410931463728716e-05,
"loss": 0.2392,
"step": 631500
},
{
"epoch": 45.21,
"learning_rate": 2.739304621548147e-05,
"loss": 0.2398,
"step": 632000
},
{
"epoch": 45.25,
"learning_rate": 2.7375160967234226e-05,
"loss": 0.239,
"step": 632500
},
{
"epoch": 45.29,
"learning_rate": 2.735727571898698e-05,
"loss": 0.2435,
"step": 633000
},
{
"epoch": 45.32,
"learning_rate": 2.7339390470739733e-05,
"loss": 0.2401,
"step": 633500
},
{
"epoch": 45.36,
"learning_rate": 2.732150522249249e-05,
"loss": 0.2465,
"step": 634000
},
{
"epoch": 45.39,
"learning_rate": 2.7303619974245244e-05,
"loss": 0.2471,
"step": 634500
},
{
"epoch": 45.43,
"learning_rate": 2.7285734725997995e-05,
"loss": 0.2449,
"step": 635000
},
{
"epoch": 45.46,
"learning_rate": 2.726784947775075e-05,
"loss": 0.2466,
"step": 635500
},
{
"epoch": 45.5,
"learning_rate": 2.7249964229503506e-05,
"loss": 0.2436,
"step": 636000
},
{
"epoch": 45.54,
"learning_rate": 2.723207898125626e-05,
"loss": 0.2419,
"step": 636500
},
{
"epoch": 45.57,
"learning_rate": 2.7214193733009013e-05,
"loss": 0.2493,
"step": 637000
},
{
"epoch": 45.61,
"learning_rate": 2.7196308484761768e-05,
"loss": 0.251,
"step": 637500
},
{
"epoch": 45.64,
"learning_rate": 2.7178423236514523e-05,
"loss": 0.2458,
"step": 638000
},
{
"epoch": 45.68,
"learning_rate": 2.7160537988267275e-05,
"loss": 0.2484,
"step": 638500
},
{
"epoch": 45.71,
"learning_rate": 2.714265274002003e-05,
"loss": 0.2491,
"step": 639000
},
{
"epoch": 45.75,
"learning_rate": 2.7124767491772785e-05,
"loss": 0.2513,
"step": 639500
},
{
"epoch": 45.79,
"learning_rate": 2.710688224352554e-05,
"loss": 0.2521,
"step": 640000
},
{
"epoch": 45.82,
"learning_rate": 2.70889969952783e-05,
"loss": 0.2504,
"step": 640500
},
{
"epoch": 45.86,
"learning_rate": 2.7071111747031054e-05,
"loss": 0.2476,
"step": 641000
},
{
"epoch": 45.89,
"learning_rate": 2.7053226498783806e-05,
"loss": 0.2465,
"step": 641500
},
{
"epoch": 45.93,
"learning_rate": 2.703534125053656e-05,
"loss": 0.2506,
"step": 642000
},
{
"epoch": 45.97,
"learning_rate": 2.7017456002289316e-05,
"loss": 0.2446,
"step": 642500
},
{
"epoch": 46.0,
"learning_rate": 2.6999570754042068e-05,
"loss": 0.2535,
"step": 643000
},
{
"epoch": 46.04,
"learning_rate": 2.6981685505794823e-05,
"loss": 0.2261,
"step": 643500
},
{
"epoch": 46.07,
"learning_rate": 2.696380025754758e-05,
"loss": 0.2312,
"step": 644000
},
{
"epoch": 46.11,
"learning_rate": 2.6945915009300334e-05,
"loss": 0.2356,
"step": 644500
},
{
"epoch": 46.14,
"learning_rate": 2.6928029761053085e-05,
"loss": 0.2361,
"step": 645000
},
{
"epoch": 46.18,
"learning_rate": 2.691014451280584e-05,
"loss": 0.2377,
"step": 645500
},
{
"epoch": 46.22,
"learning_rate": 2.6892259264558596e-05,
"loss": 0.235,
"step": 646000
},
{
"epoch": 46.25,
"learning_rate": 2.6874374016311348e-05,
"loss": 0.2389,
"step": 646500
},
{
"epoch": 46.29,
"learning_rate": 2.6856488768064103e-05,
"loss": 0.2435,
"step": 647000
},
{
"epoch": 46.32,
"learning_rate": 2.6838603519816858e-05,
"loss": 0.2377,
"step": 647500
},
{
"epoch": 46.36,
"learning_rate": 2.6820718271569613e-05,
"loss": 0.2389,
"step": 648000
},
{
"epoch": 46.39,
"learning_rate": 2.6802833023322365e-05,
"loss": 0.2455,
"step": 648500
},
{
"epoch": 46.43,
"learning_rate": 2.678494777507512e-05,
"loss": 0.243,
"step": 649000
},
{
"epoch": 46.47,
"learning_rate": 2.6767062526827875e-05,
"loss": 0.2384,
"step": 649500
},
{
"epoch": 46.5,
"learning_rate": 2.6749177278580627e-05,
"loss": 0.2427,
"step": 650000
},
{
"epoch": 46.54,
"learning_rate": 2.6731292030333382e-05,
"loss": 0.2447,
"step": 650500
},
{
"epoch": 46.57,
"learning_rate": 2.6713406782086137e-05,
"loss": 0.2443,
"step": 651000
},
{
"epoch": 46.61,
"learning_rate": 2.669552153383889e-05,
"loss": 0.2427,
"step": 651500
},
{
"epoch": 46.64,
"learning_rate": 2.6677636285591644e-05,
"loss": 0.25,
"step": 652000
},
{
"epoch": 46.68,
"learning_rate": 2.66597510373444e-05,
"loss": 0.2472,
"step": 652500
},
{
"epoch": 46.72,
"learning_rate": 2.6641865789097155e-05,
"loss": 0.2537,
"step": 653000
},
{
"epoch": 46.75,
"learning_rate": 2.6623980540849907e-05,
"loss": 0.2502,
"step": 653500
},
{
"epoch": 46.79,
"learning_rate": 2.6606095292602662e-05,
"loss": 0.2447,
"step": 654000
},
{
"epoch": 46.82,
"learning_rate": 2.6588210044355417e-05,
"loss": 0.244,
"step": 654500
},
{
"epoch": 46.86,
"learning_rate": 2.657032479610817e-05,
"loss": 0.2479,
"step": 655000
},
{
"epoch": 46.9,
"learning_rate": 2.6552439547860924e-05,
"loss": 0.2476,
"step": 655500
},
{
"epoch": 46.93,
"learning_rate": 2.653455429961368e-05,
"loss": 0.2506,
"step": 656000
},
{
"epoch": 46.97,
"learning_rate": 2.6516669051366434e-05,
"loss": 0.2504,
"step": 656500
},
{
"epoch": 47.0,
"learning_rate": 2.6498783803119186e-05,
"loss": 0.2449,
"step": 657000
},
{
"epoch": 47.04,
"learning_rate": 2.648089855487194e-05,
"loss": 0.2255,
"step": 657500
},
{
"epoch": 47.07,
"learning_rate": 2.6463013306624696e-05,
"loss": 0.2274,
"step": 658000
},
{
"epoch": 47.11,
"learning_rate": 2.6445128058377448e-05,
"loss": 0.2333,
"step": 658500
},
{
"epoch": 47.15,
"learning_rate": 2.6427242810130203e-05,
"loss": 0.2355,
"step": 659000
},
{
"epoch": 47.18,
"learning_rate": 2.640935756188296e-05,
"loss": 0.238,
"step": 659500
},
{
"epoch": 47.22,
"learning_rate": 2.6391472313635714e-05,
"loss": 0.2366,
"step": 660000
},
{
"epoch": 47.25,
"learning_rate": 2.6373587065388466e-05,
"loss": 0.2376,
"step": 660500
},
{
"epoch": 47.29,
"learning_rate": 2.635570181714122e-05,
"loss": 0.236,
"step": 661000
},
{
"epoch": 47.32,
"learning_rate": 2.6337816568893976e-05,
"loss": 0.2394,
"step": 661500
},
{
"epoch": 47.36,
"learning_rate": 2.6319931320646728e-05,
"loss": 0.2385,
"step": 662000
},
{
"epoch": 47.4,
"learning_rate": 2.630204607239949e-05,
"loss": 0.2375,
"step": 662500
},
{
"epoch": 47.43,
"learning_rate": 2.628416082415224e-05,
"loss": 0.2388,
"step": 663000
},
{
"epoch": 47.47,
"learning_rate": 2.6266275575904997e-05,
"loss": 0.238,
"step": 663500
},
{
"epoch": 47.5,
"learning_rate": 2.6248390327657752e-05,
"loss": 0.2389,
"step": 664000
},
{
"epoch": 47.54,
"learning_rate": 2.6230505079410507e-05,
"loss": 0.2468,
"step": 664500
},
{
"epoch": 47.57,
"learning_rate": 2.621261983116326e-05,
"loss": 0.2439,
"step": 665000
},
{
"epoch": 47.61,
"learning_rate": 2.6194734582916014e-05,
"loss": 0.2458,
"step": 665500
},
{
"epoch": 47.65,
"learning_rate": 2.617684933466877e-05,
"loss": 0.2526,
"step": 666000
},
{
"epoch": 47.68,
"learning_rate": 2.615896408642152e-05,
"loss": 0.2443,
"step": 666500
},
{
"epoch": 47.72,
"learning_rate": 2.6141078838174276e-05,
"loss": 0.2496,
"step": 667000
},
{
"epoch": 47.75,
"learning_rate": 2.612319358992703e-05,
"loss": 0.2489,
"step": 667500
},
{
"epoch": 47.79,
"learning_rate": 2.6105308341679787e-05,
"loss": 0.2431,
"step": 668000
},
{
"epoch": 47.83,
"learning_rate": 2.608742309343254e-05,
"loss": 0.2453,
"step": 668500
},
{
"epoch": 47.86,
"learning_rate": 2.6069537845185294e-05,
"loss": 0.2484,
"step": 669000
},
{
"epoch": 47.9,
"learning_rate": 2.605165259693805e-05,
"loss": 0.2515,
"step": 669500
},
{
"epoch": 47.93,
"learning_rate": 2.60337673486908e-05,
"loss": 0.2473,
"step": 670000
},
{
"epoch": 47.97,
"learning_rate": 2.6015882100443556e-05,
"loss": 0.2507,
"step": 670500
},
{
"epoch": 48.0,
"learning_rate": 2.599799685219631e-05,
"loss": 0.2447,
"step": 671000
},
{
"epoch": 48.04,
"learning_rate": 2.5980111603949066e-05,
"loss": 0.2267,
"step": 671500
},
{
"epoch": 48.08,
"learning_rate": 2.5962226355701818e-05,
"loss": 0.2293,
"step": 672000
},
{
"epoch": 48.11,
"learning_rate": 2.5944341107454573e-05,
"loss": 0.2335,
"step": 672500
},
{
"epoch": 48.15,
"learning_rate": 2.5926455859207328e-05,
"loss": 0.2307,
"step": 673000
},
{
"epoch": 48.18,
"learning_rate": 2.590857061096008e-05,
"loss": 0.2302,
"step": 673500
},
{
"epoch": 48.22,
"learning_rate": 2.5890685362712835e-05,
"loss": 0.236,
"step": 674000
},
{
"epoch": 48.25,
"learning_rate": 2.587280011446559e-05,
"loss": 0.2328,
"step": 674500
},
{
"epoch": 48.29,
"learning_rate": 2.5854914866218342e-05,
"loss": 0.2393,
"step": 675000
},
{
"epoch": 48.33,
"learning_rate": 2.5837029617971097e-05,
"loss": 0.2397,
"step": 675500
},
{
"epoch": 48.36,
"learning_rate": 2.5819144369723853e-05,
"loss": 0.2359,
"step": 676000
},
{
"epoch": 48.4,
"learning_rate": 2.5801259121476608e-05,
"loss": 0.2399,
"step": 676500
},
{
"epoch": 48.43,
"learning_rate": 2.578337387322936e-05,
"loss": 0.2389,
"step": 677000
},
{
"epoch": 48.47,
"learning_rate": 2.5765488624982115e-05,
"loss": 0.2423,
"step": 677500
},
{
"epoch": 48.5,
"learning_rate": 2.574760337673487e-05,
"loss": 0.2386,
"step": 678000
},
{
"epoch": 48.54,
"learning_rate": 2.572971812848762e-05,
"loss": 0.2459,
"step": 678500
},
{
"epoch": 48.58,
"learning_rate": 2.5711832880240377e-05,
"loss": 0.2391,
"step": 679000
},
{
"epoch": 48.61,
"learning_rate": 2.5693947631993132e-05,
"loss": 0.2481,
"step": 679500
},
{
"epoch": 48.65,
"learning_rate": 2.5676062383745887e-05,
"loss": 0.2418,
"step": 680000
},
{
"epoch": 48.68,
"learning_rate": 2.565817713549864e-05,
"loss": 0.2395,
"step": 680500
},
{
"epoch": 48.72,
"learning_rate": 2.5640291887251394e-05,
"loss": 0.2446,
"step": 681000
},
{
"epoch": 48.76,
"learning_rate": 2.562240663900415e-05,
"loss": 0.2462,
"step": 681500
},
{
"epoch": 48.79,
"learning_rate": 2.56045213907569e-05,
"loss": 0.244,
"step": 682000
},
{
"epoch": 48.83,
"learning_rate": 2.5586636142509656e-05,
"loss": 0.2481,
"step": 682500
},
{
"epoch": 48.86,
"learning_rate": 2.556875089426241e-05,
"loss": 0.2437,
"step": 683000
},
{
"epoch": 48.9,
"learning_rate": 2.5550865646015167e-05,
"loss": 0.2446,
"step": 683500
},
{
"epoch": 48.93,
"learning_rate": 2.5532980397767925e-05,
"loss": 0.2463,
"step": 684000
},
{
"epoch": 48.97,
"learning_rate": 2.551509514952068e-05,
"loss": 0.2451,
"step": 684500
},
{
"epoch": 49.01,
"learning_rate": 2.5497209901273432e-05,
"loss": 0.2429,
"step": 685000
},
{
"epoch": 49.04,
"learning_rate": 2.5479324653026187e-05,
"loss": 0.2262,
"step": 685500
},
{
"epoch": 49.08,
"learning_rate": 2.5461439404778943e-05,
"loss": 0.2276,
"step": 686000
},
{
"epoch": 49.11,
"learning_rate": 2.5443554156531694e-05,
"loss": 0.2299,
"step": 686500
},
{
"epoch": 49.15,
"learning_rate": 2.542566890828445e-05,
"loss": 0.2301,
"step": 687000
},
{
"epoch": 49.18,
"learning_rate": 2.5407783660037205e-05,
"loss": 0.2305,
"step": 687500
},
{
"epoch": 49.22,
"learning_rate": 2.538989841178996e-05,
"loss": 0.2322,
"step": 688000
},
{
"epoch": 49.26,
"learning_rate": 2.5372013163542712e-05,
"loss": 0.2372,
"step": 688500
},
{
"epoch": 49.29,
"learning_rate": 2.5354127915295467e-05,
"loss": 0.23,
"step": 689000
},
{
"epoch": 49.33,
"learning_rate": 2.5336242667048222e-05,
"loss": 0.2373,
"step": 689500
},
{
"epoch": 49.36,
"learning_rate": 2.5318357418800974e-05,
"loss": 0.2361,
"step": 690000
},
{
"epoch": 49.4,
"learning_rate": 2.530047217055373e-05,
"loss": 0.239,
"step": 690500
},
{
"epoch": 49.43,
"learning_rate": 2.5282586922306484e-05,
"loss": 0.239,
"step": 691000
},
{
"epoch": 49.47,
"learning_rate": 2.526470167405924e-05,
"loss": 0.2447,
"step": 691500
},
{
"epoch": 49.51,
"learning_rate": 2.524681642581199e-05,
"loss": 0.2379,
"step": 692000
},
{
"epoch": 49.54,
"learning_rate": 2.5228931177564746e-05,
"loss": 0.2372,
"step": 692500
},
{
"epoch": 49.58,
"learning_rate": 2.52110459293175e-05,
"loss": 0.2403,
"step": 693000
},
{
"epoch": 49.61,
"learning_rate": 2.5193160681070253e-05,
"loss": 0.2406,
"step": 693500
},
{
"epoch": 49.65,
"learning_rate": 2.517527543282301e-05,
"loss": 0.2407,
"step": 694000
},
{
"epoch": 49.69,
"learning_rate": 2.5157390184575764e-05,
"loss": 0.2424,
"step": 694500
},
{
"epoch": 49.72,
"learning_rate": 2.513950493632852e-05,
"loss": 0.2448,
"step": 695000
},
{
"epoch": 49.76,
"learning_rate": 2.512161968808127e-05,
"loss": 0.2398,
"step": 695500
},
{
"epoch": 49.79,
"learning_rate": 2.5103734439834026e-05,
"loss": 0.2426,
"step": 696000
},
{
"epoch": 49.83,
"learning_rate": 2.508584919158678e-05,
"loss": 0.2468,
"step": 696500
},
{
"epoch": 49.86,
"learning_rate": 2.5067963943339533e-05,
"loss": 0.2444,
"step": 697000
},
{
"epoch": 49.9,
"learning_rate": 2.5050078695092288e-05,
"loss": 0.2428,
"step": 697500
},
{
"epoch": 49.94,
"learning_rate": 2.5032193446845043e-05,
"loss": 0.2418,
"step": 698000
},
{
"epoch": 49.97,
"learning_rate": 2.50143081985978e-05,
"loss": 0.2439,
"step": 698500
},
{
"epoch": 50.01,
"learning_rate": 2.499642295035055e-05,
"loss": 0.2434,
"step": 699000
},
{
"epoch": 50.04,
"learning_rate": 2.4978537702103305e-05,
"loss": 0.2212,
"step": 699500
},
{
"epoch": 50.08,
"learning_rate": 2.496065245385606e-05,
"loss": 0.2255,
"step": 700000
},
{
"epoch": 50.11,
"learning_rate": 2.4942767205608816e-05,
"loss": 0.228,
"step": 700500
},
{
"epoch": 50.15,
"learning_rate": 2.492488195736157e-05,
"loss": 0.2311,
"step": 701000
},
{
"epoch": 50.19,
"learning_rate": 2.4906996709114326e-05,
"loss": 0.2328,
"step": 701500
},
{
"epoch": 50.22,
"learning_rate": 2.4889111460867078e-05,
"loss": 0.2318,
"step": 702000
},
{
"epoch": 50.26,
"learning_rate": 2.4871226212619833e-05,
"loss": 0.2308,
"step": 702500
},
{
"epoch": 50.29,
"learning_rate": 2.4853340964372588e-05,
"loss": 0.2352,
"step": 703000
},
{
"epoch": 50.33,
"learning_rate": 2.483545571612534e-05,
"loss": 0.2306,
"step": 703500
},
{
"epoch": 50.36,
"learning_rate": 2.4817570467878095e-05,
"loss": 0.2304,
"step": 704000
},
{
"epoch": 50.4,
"learning_rate": 2.479968521963085e-05,
"loss": 0.2363,
"step": 704500
},
{
"epoch": 50.44,
"learning_rate": 2.4781799971383606e-05,
"loss": 0.2379,
"step": 705000
},
{
"epoch": 50.47,
"learning_rate": 2.4763914723136357e-05,
"loss": 0.2329,
"step": 705500
},
{
"epoch": 50.51,
"learning_rate": 2.4746029474889113e-05,
"loss": 0.2426,
"step": 706000
},
{
"epoch": 50.54,
"learning_rate": 2.4728144226641868e-05,
"loss": 0.2365,
"step": 706500
},
{
"epoch": 50.58,
"learning_rate": 2.471025897839462e-05,
"loss": 0.2417,
"step": 707000
},
{
"epoch": 50.62,
"learning_rate": 2.4692373730147375e-05,
"loss": 0.2406,
"step": 707500
},
{
"epoch": 50.65,
"learning_rate": 2.467448848190013e-05,
"loss": 0.2399,
"step": 708000
},
{
"epoch": 50.69,
"learning_rate": 2.4656603233652885e-05,
"loss": 0.2408,
"step": 708500
},
{
"epoch": 50.72,
"learning_rate": 2.4638717985405637e-05,
"loss": 0.2423,
"step": 709000
},
{
"epoch": 50.76,
"learning_rate": 2.4620832737158392e-05,
"loss": 0.2411,
"step": 709500
},
{
"epoch": 50.79,
"learning_rate": 2.4602947488911147e-05,
"loss": 0.241,
"step": 710000
},
{
"epoch": 50.83,
"learning_rate": 2.45850622406639e-05,
"loss": 0.2471,
"step": 710500
},
{
"epoch": 50.87,
"learning_rate": 2.4567176992416658e-05,
"loss": 0.2425,
"step": 711000
},
{
"epoch": 50.9,
"learning_rate": 2.4549291744169413e-05,
"loss": 0.243,
"step": 711500
},
{
"epoch": 50.94,
"learning_rate": 2.4531406495922165e-05,
"loss": 0.2454,
"step": 712000
},
{
"epoch": 50.97,
"learning_rate": 2.451352124767492e-05,
"loss": 0.2426,
"step": 712500
},
{
"epoch": 51.01,
"learning_rate": 2.4495635999427675e-05,
"loss": 0.2365,
"step": 713000
},
{
"epoch": 51.04,
"learning_rate": 2.4477750751180427e-05,
"loss": 0.2231,
"step": 713500
},
{
"epoch": 51.08,
"learning_rate": 2.4459865502933182e-05,
"loss": 0.2296,
"step": 714000
},
{
"epoch": 51.12,
"learning_rate": 2.4441980254685937e-05,
"loss": 0.2256,
"step": 714500
},
{
"epoch": 51.15,
"learning_rate": 2.4424095006438692e-05,
"loss": 0.2299,
"step": 715000
},
{
"epoch": 51.19,
"learning_rate": 2.4406209758191444e-05,
"loss": 0.2304,
"step": 715500
},
{
"epoch": 51.22,
"learning_rate": 2.43883245099442e-05,
"loss": 0.2285,
"step": 716000
},
{
"epoch": 51.26,
"learning_rate": 2.4370439261696954e-05,
"loss": 0.2318,
"step": 716500
},
{
"epoch": 51.29,
"learning_rate": 2.4352554013449706e-05,
"loss": 0.2296,
"step": 717000
},
{
"epoch": 51.33,
"learning_rate": 2.433466876520246e-05,
"loss": 0.235,
"step": 717500
},
{
"epoch": 51.37,
"learning_rate": 2.4316783516955217e-05,
"loss": 0.2347,
"step": 718000
},
{
"epoch": 51.4,
"learning_rate": 2.4298898268707972e-05,
"loss": 0.2316,
"step": 718500
},
{
"epoch": 51.44,
"learning_rate": 2.4281013020460724e-05,
"loss": 0.236,
"step": 719000
},
{
"epoch": 51.47,
"learning_rate": 2.426312777221348e-05,
"loss": 0.2393,
"step": 719500
},
{
"epoch": 51.51,
"learning_rate": 2.4245242523966234e-05,
"loss": 0.2388,
"step": 720000
},
{
"epoch": 51.55,
"learning_rate": 2.4227357275718986e-05,
"loss": 0.2338,
"step": 720500
},
{
"epoch": 51.58,
"learning_rate": 2.420947202747174e-05,
"loss": 0.2367,
"step": 721000
},
{
"epoch": 51.62,
"learning_rate": 2.4191586779224496e-05,
"loss": 0.2388,
"step": 721500
},
{
"epoch": 51.65,
"learning_rate": 2.417370153097725e-05,
"loss": 0.2295,
"step": 722000
},
{
"epoch": 51.69,
"learning_rate": 2.4155816282730006e-05,
"loss": 0.2405,
"step": 722500
},
{
"epoch": 51.72,
"learning_rate": 2.413793103448276e-05,
"loss": 0.2402,
"step": 723000
},
{
"epoch": 51.76,
"learning_rate": 2.4120045786235513e-05,
"loss": 0.2407,
"step": 723500
},
{
"epoch": 51.8,
"learning_rate": 2.410216053798827e-05,
"loss": 0.2367,
"step": 724000
},
{
"epoch": 51.83,
"learning_rate": 2.4084275289741024e-05,
"loss": 0.2437,
"step": 724500
},
{
"epoch": 51.87,
"learning_rate": 2.406639004149378e-05,
"loss": 0.2356,
"step": 725000
},
{
"epoch": 51.9,
"learning_rate": 2.404850479324653e-05,
"loss": 0.2409,
"step": 725500
},
{
"epoch": 51.94,
"learning_rate": 2.4030619544999286e-05,
"loss": 0.2458,
"step": 726000
},
{
"epoch": 51.97,
"learning_rate": 2.401273429675204e-05,
"loss": 0.2438,
"step": 726500
},
{
"epoch": 52.01,
"learning_rate": 2.3994849048504793e-05,
"loss": 0.2385,
"step": 727000
},
{
"epoch": 52.05,
"learning_rate": 2.3976963800257548e-05,
"loss": 0.2212,
"step": 727500
},
{
"epoch": 52.08,
"learning_rate": 2.3959078552010303e-05,
"loss": 0.2228,
"step": 728000
},
{
"epoch": 52.12,
"learning_rate": 2.394119330376306e-05,
"loss": 0.2285,
"step": 728500
},
{
"epoch": 52.15,
"learning_rate": 2.392330805551581e-05,
"loss": 0.2295,
"step": 729000
},
{
"epoch": 52.19,
"learning_rate": 2.3905422807268565e-05,
"loss": 0.2284,
"step": 729500
},
{
"epoch": 52.22,
"learning_rate": 2.388753755902132e-05,
"loss": 0.2295,
"step": 730000
},
{
"epoch": 52.26,
"learning_rate": 2.3869652310774072e-05,
"loss": 0.237,
"step": 730500
},
{
"epoch": 52.3,
"learning_rate": 2.3851767062526828e-05,
"loss": 0.2283,
"step": 731000
},
{
"epoch": 52.33,
"learning_rate": 2.3833881814279583e-05,
"loss": 0.2308,
"step": 731500
},
{
"epoch": 52.37,
"learning_rate": 2.3815996566032338e-05,
"loss": 0.2321,
"step": 732000
},
{
"epoch": 52.4,
"learning_rate": 2.379811131778509e-05,
"loss": 0.2337,
"step": 732500
},
{
"epoch": 52.44,
"learning_rate": 2.378022606953785e-05,
"loss": 0.2367,
"step": 733000
},
{
"epoch": 52.48,
"learning_rate": 2.37623408212906e-05,
"loss": 0.2342,
"step": 733500
},
{
"epoch": 52.51,
"learning_rate": 2.3744455573043355e-05,
"loss": 0.2352,
"step": 734000
},
{
"epoch": 52.55,
"learning_rate": 2.372657032479611e-05,
"loss": 0.2311,
"step": 734500
},
{
"epoch": 52.58,
"learning_rate": 2.3708685076548866e-05,
"loss": 0.2338,
"step": 735000
},
{
"epoch": 52.62,
"learning_rate": 2.3690799828301617e-05,
"loss": 0.2335,
"step": 735500
},
{
"epoch": 52.65,
"learning_rate": 2.3672914580054373e-05,
"loss": 0.2377,
"step": 736000
},
{
"epoch": 52.69,
"learning_rate": 2.3655029331807128e-05,
"loss": 0.237,
"step": 736500
},
{
"epoch": 52.73,
"learning_rate": 2.363714408355988e-05,
"loss": 0.2356,
"step": 737000
},
{
"epoch": 52.76,
"learning_rate": 2.3619258835312635e-05,
"loss": 0.2399,
"step": 737500
},
{
"epoch": 52.8,
"learning_rate": 2.360137358706539e-05,
"loss": 0.2417,
"step": 738000
},
{
"epoch": 52.83,
"learning_rate": 2.3583488338818145e-05,
"loss": 0.2382,
"step": 738500
},
{
"epoch": 52.87,
"learning_rate": 2.3565603090570897e-05,
"loss": 0.2356,
"step": 739000
},
{
"epoch": 52.9,
"learning_rate": 2.3547717842323652e-05,
"loss": 0.243,
"step": 739500
},
{
"epoch": 52.94,
"learning_rate": 2.3529832594076407e-05,
"loss": 0.2427,
"step": 740000
},
{
"epoch": 52.98,
"learning_rate": 2.351194734582916e-05,
"loss": 0.2347,
"step": 740500
},
{
"epoch": 53.01,
"learning_rate": 2.3494062097581914e-05,
"loss": 0.2362,
"step": 741000
},
{
"epoch": 53.05,
"learning_rate": 2.347617684933467e-05,
"loss": 0.2206,
"step": 741500
},
{
"epoch": 53.08,
"learning_rate": 2.3458291601087425e-05,
"loss": 0.2206,
"step": 742000
},
{
"epoch": 53.12,
"learning_rate": 2.3440406352840176e-05,
"loss": 0.2229,
"step": 742500
},
{
"epoch": 53.15,
"learning_rate": 2.342252110459293e-05,
"loss": 0.2269,
"step": 743000
},
{
"epoch": 53.19,
"learning_rate": 2.340463585634569e-05,
"loss": 0.2252,
"step": 743500
},
{
"epoch": 53.23,
"learning_rate": 2.3386750608098442e-05,
"loss": 0.2302,
"step": 744000
},
{
"epoch": 53.26,
"learning_rate": 2.3368865359851197e-05,
"loss": 0.2293,
"step": 744500
},
{
"epoch": 53.3,
"learning_rate": 2.3350980111603952e-05,
"loss": 0.2288,
"step": 745000
},
{
"epoch": 53.33,
"learning_rate": 2.3333094863356704e-05,
"loss": 0.2309,
"step": 745500
},
{
"epoch": 53.37,
"learning_rate": 2.331520961510946e-05,
"loss": 0.2347,
"step": 746000
},
{
"epoch": 53.41,
"learning_rate": 2.3297324366862215e-05,
"loss": 0.2333,
"step": 746500
},
{
"epoch": 53.44,
"learning_rate": 2.3279439118614966e-05,
"loss": 0.2318,
"step": 747000
},
{
"epoch": 53.48,
"learning_rate": 2.326155387036772e-05,
"loss": 0.2356,
"step": 747500
},
{
"epoch": 53.51,
"learning_rate": 2.3243668622120477e-05,
"loss": 0.233,
"step": 748000
},
{
"epoch": 53.55,
"learning_rate": 2.3225783373873232e-05,
"loss": 0.2354,
"step": 748500
},
{
"epoch": 53.58,
"learning_rate": 2.3207898125625984e-05,
"loss": 0.2326,
"step": 749000
},
{
"epoch": 53.62,
"learning_rate": 2.319001287737874e-05,
"loss": 0.2323,
"step": 749500
},
{
"epoch": 53.66,
"learning_rate": 2.3172127629131494e-05,
"loss": 0.2354,
"step": 750000
},
{
"epoch": 53.69,
"learning_rate": 2.3154242380884246e-05,
"loss": 0.2332,
"step": 750500
},
{
"epoch": 53.73,
"learning_rate": 2.3136357132637e-05,
"loss": 0.2374,
"step": 751000
},
{
"epoch": 53.76,
"learning_rate": 2.3118471884389756e-05,
"loss": 0.2415,
"step": 751500
},
{
"epoch": 53.8,
"learning_rate": 2.310058663614251e-05,
"loss": 0.2372,
"step": 752000
},
{
"epoch": 53.83,
"learning_rate": 2.3082701387895263e-05,
"loss": 0.2362,
"step": 752500
},
{
"epoch": 53.87,
"learning_rate": 2.306481613964802e-05,
"loss": 0.2349,
"step": 753000
},
{
"epoch": 53.91,
"learning_rate": 2.3046930891400774e-05,
"loss": 0.2444,
"step": 753500
},
{
"epoch": 53.94,
"learning_rate": 2.3029045643153525e-05,
"loss": 0.2417,
"step": 754000
},
{
"epoch": 53.98,
"learning_rate": 2.3011160394906284e-05,
"loss": 0.2428,
"step": 754500
},
{
"epoch": 54.01,
"learning_rate": 2.299327514665904e-05,
"loss": 0.2304,
"step": 755000
},
{
"epoch": 54.05,
"learning_rate": 2.297538989841179e-05,
"loss": 0.2201,
"step": 755500
},
{
"epoch": 54.08,
"learning_rate": 2.2957504650164546e-05,
"loss": 0.2235,
"step": 756000
},
{
"epoch": 54.12,
"learning_rate": 2.29396194019173e-05,
"loss": 0.2288,
"step": 756500
},
{
"epoch": 54.16,
"learning_rate": 2.2921734153670056e-05,
"loss": 0.2296,
"step": 757000
},
{
"epoch": 54.19,
"learning_rate": 2.2903848905422808e-05,
"loss": 0.2258,
"step": 757500
},
{
"epoch": 54.23,
"learning_rate": 2.2885963657175563e-05,
"loss": 0.2265,
"step": 758000
},
{
"epoch": 54.26,
"learning_rate": 2.286807840892832e-05,
"loss": 0.2308,
"step": 758500
},
{
"epoch": 54.3,
"learning_rate": 2.285019316068107e-05,
"loss": 0.2284,
"step": 759000
},
{
"epoch": 54.34,
"learning_rate": 2.2832307912433826e-05,
"loss": 0.2238,
"step": 759500
},
{
"epoch": 54.37,
"learning_rate": 2.281442266418658e-05,
"loss": 0.2287,
"step": 760000
},
{
"epoch": 54.41,
"learning_rate": 2.2796537415939333e-05,
"loss": 0.2337,
"step": 760500
},
{
"epoch": 54.44,
"learning_rate": 2.2778652167692088e-05,
"loss": 0.2303,
"step": 761000
},
{
"epoch": 54.48,
"learning_rate": 2.2760766919444843e-05,
"loss": 0.228,
"step": 761500
},
{
"epoch": 54.51,
"learning_rate": 2.2742881671197598e-05,
"loss": 0.2354,
"step": 762000
},
{
"epoch": 54.55,
"learning_rate": 2.272499642295035e-05,
"loss": 0.2308,
"step": 762500
},
{
"epoch": 54.59,
"learning_rate": 2.2707111174703105e-05,
"loss": 0.2365,
"step": 763000
},
{
"epoch": 54.62,
"learning_rate": 2.268922592645586e-05,
"loss": 0.2382,
"step": 763500
},
{
"epoch": 54.66,
"learning_rate": 2.2671340678208612e-05,
"loss": 0.2351,
"step": 764000
},
{
"epoch": 54.69,
"learning_rate": 2.2653455429961367e-05,
"loss": 0.2302,
"step": 764500
},
{
"epoch": 54.73,
"learning_rate": 2.2635570181714122e-05,
"loss": 0.2303,
"step": 765000
},
{
"epoch": 54.76,
"learning_rate": 2.2617684933466878e-05,
"loss": 0.2356,
"step": 765500
},
{
"epoch": 54.8,
"learning_rate": 2.2599799685219633e-05,
"loss": 0.2352,
"step": 766000
},
{
"epoch": 54.84,
"learning_rate": 2.2581914436972388e-05,
"loss": 0.2344,
"step": 766500
},
{
"epoch": 54.87,
"learning_rate": 2.2564029188725143e-05,
"loss": 0.2404,
"step": 767000
},
{
"epoch": 54.91,
"learning_rate": 2.2546143940477895e-05,
"loss": 0.2363,
"step": 767500
},
{
"epoch": 54.94,
"learning_rate": 2.252825869223065e-05,
"loss": 0.239,
"step": 768000
},
{
"epoch": 54.98,
"learning_rate": 2.2510373443983405e-05,
"loss": 0.2356,
"step": 768500
},
{
"epoch": 55.02,
"learning_rate": 2.2492488195736157e-05,
"loss": 0.2273,
"step": 769000
},
{
"epoch": 55.05,
"learning_rate": 2.2474602947488912e-05,
"loss": 0.2172,
"step": 769500
},
{
"epoch": 55.09,
"learning_rate": 2.2456717699241667e-05,
"loss": 0.2258,
"step": 770000
},
{
"epoch": 55.12,
"learning_rate": 2.243883245099442e-05,
"loss": 0.222,
"step": 770500
},
{
"epoch": 55.16,
"learning_rate": 2.2420947202747174e-05,
"loss": 0.2234,
"step": 771000
},
{
"epoch": 55.19,
"learning_rate": 2.240306195449993e-05,
"loss": 0.2272,
"step": 771500
},
{
"epoch": 55.23,
"learning_rate": 2.2385176706252685e-05,
"loss": 0.2247,
"step": 772000
},
{
"epoch": 55.27,
"learning_rate": 2.2367291458005437e-05,
"loss": 0.226,
"step": 772500
},
{
"epoch": 55.3,
"learning_rate": 2.2349406209758192e-05,
"loss": 0.2293,
"step": 773000
},
{
"epoch": 55.34,
"learning_rate": 2.2331520961510947e-05,
"loss": 0.2277,
"step": 773500
},
{
"epoch": 55.37,
"learning_rate": 2.23136357132637e-05,
"loss": 0.2258,
"step": 774000
},
{
"epoch": 55.41,
"learning_rate": 2.2295750465016454e-05,
"loss": 0.2301,
"step": 774500
},
{
"epoch": 55.44,
"learning_rate": 2.227786521676921e-05,
"loss": 0.2339,
"step": 775000
},
{
"epoch": 55.48,
"learning_rate": 2.2259979968521964e-05,
"loss": 0.2279,
"step": 775500
},
{
"epoch": 55.52,
"learning_rate": 2.224209472027472e-05,
"loss": 0.2362,
"step": 776000
},
{
"epoch": 55.55,
"learning_rate": 2.2224209472027475e-05,
"loss": 0.2362,
"step": 776500
},
{
"epoch": 55.59,
"learning_rate": 2.220632422378023e-05,
"loss": 0.2249,
"step": 777000
},
{
"epoch": 55.62,
"learning_rate": 2.218843897553298e-05,
"loss": 0.238,
"step": 777500
},
{
"epoch": 55.66,
"learning_rate": 2.2170553727285737e-05,
"loss": 0.2312,
"step": 778000
},
{
"epoch": 55.69,
"learning_rate": 2.2152668479038492e-05,
"loss": 0.2401,
"step": 778500
},
{
"epoch": 55.73,
"learning_rate": 2.2134783230791244e-05,
"loss": 0.232,
"step": 779000
},
{
"epoch": 55.77,
"learning_rate": 2.2116897982544e-05,
"loss": 0.2295,
"step": 779500
},
{
"epoch": 55.8,
"learning_rate": 2.2099012734296754e-05,
"loss": 0.2361,
"step": 780000
},
{
"epoch": 55.84,
"learning_rate": 2.208112748604951e-05,
"loss": 0.2346,
"step": 780500
},
{
"epoch": 55.87,
"learning_rate": 2.206324223780226e-05,
"loss": 0.2333,
"step": 781000
},
{
"epoch": 55.91,
"learning_rate": 2.2045356989555016e-05,
"loss": 0.2397,
"step": 781500
},
{
"epoch": 55.95,
"learning_rate": 2.202747174130777e-05,
"loss": 0.2283,
"step": 782000
},
{
"epoch": 55.98,
"learning_rate": 2.2009586493060523e-05,
"loss": 0.2348,
"step": 782500
},
{
"epoch": 56.02,
"learning_rate": 2.199170124481328e-05,
"loss": 0.2286,
"step": 783000
},
{
"epoch": 56.05,
"learning_rate": 2.1973815996566034e-05,
"loss": 0.2158,
"step": 783500
},
{
"epoch": 56.09,
"learning_rate": 2.1955930748318785e-05,
"loss": 0.2168,
"step": 784000
},
{
"epoch": 56.12,
"learning_rate": 2.193804550007154e-05,
"loss": 0.2196,
"step": 784500
},
{
"epoch": 56.16,
"learning_rate": 2.1920160251824296e-05,
"loss": 0.2221,
"step": 785000
},
{
"epoch": 56.2,
"learning_rate": 2.190227500357705e-05,
"loss": 0.2247,
"step": 785500
},
{
"epoch": 56.23,
"learning_rate": 2.1884389755329803e-05,
"loss": 0.2241,
"step": 786000
},
{
"epoch": 56.27,
"learning_rate": 2.1866504507082558e-05,
"loss": 0.2252,
"step": 786500
},
{
"epoch": 56.3,
"learning_rate": 2.1848619258835316e-05,
"loss": 0.2271,
"step": 787000
},
{
"epoch": 56.34,
"learning_rate": 2.1830734010588068e-05,
"loss": 0.2289,
"step": 787500
},
{
"epoch": 56.37,
"learning_rate": 2.1812848762340823e-05,
"loss": 0.2297,
"step": 788000
},
{
"epoch": 56.41,
"learning_rate": 2.179496351409358e-05,
"loss": 0.2346,
"step": 788500
},
{
"epoch": 56.45,
"learning_rate": 2.177707826584633e-05,
"loss": 0.2285,
"step": 789000
},
{
"epoch": 56.48,
"learning_rate": 2.1759193017599086e-05,
"loss": 0.2272,
"step": 789500
},
{
"epoch": 56.52,
"learning_rate": 2.174130776935184e-05,
"loss": 0.2298,
"step": 790000
},
{
"epoch": 56.55,
"learning_rate": 2.1723422521104596e-05,
"loss": 0.2334,
"step": 790500
},
{
"epoch": 56.59,
"learning_rate": 2.1705537272857348e-05,
"loss": 0.2305,
"step": 791000
},
{
"epoch": 56.62,
"learning_rate": 2.1687652024610103e-05,
"loss": 0.2349,
"step": 791500
},
{
"epoch": 56.66,
"learning_rate": 2.1669766776362858e-05,
"loss": 0.2294,
"step": 792000
},
{
"epoch": 56.7,
"learning_rate": 2.165188152811561e-05,
"loss": 0.2373,
"step": 792500
},
{
"epoch": 56.73,
"learning_rate": 2.1633996279868365e-05,
"loss": 0.2319,
"step": 793000
},
{
"epoch": 56.77,
"learning_rate": 2.161611103162112e-05,
"loss": 0.2341,
"step": 793500
},
{
"epoch": 56.8,
"learning_rate": 2.1598225783373875e-05,
"loss": 0.2311,
"step": 794000
},
{
"epoch": 56.84,
"learning_rate": 2.1580340535126627e-05,
"loss": 0.234,
"step": 794500
},
{
"epoch": 56.88,
"learning_rate": 2.1562455286879382e-05,
"loss": 0.235,
"step": 795000
},
{
"epoch": 56.91,
"learning_rate": 2.1544570038632138e-05,
"loss": 0.2416,
"step": 795500
},
{
"epoch": 56.95,
"learning_rate": 2.152668479038489e-05,
"loss": 0.236,
"step": 796000
},
{
"epoch": 56.98,
"learning_rate": 2.1508799542137645e-05,
"loss": 0.2375,
"step": 796500
},
{
"epoch": 57.02,
"learning_rate": 2.14909142938904e-05,
"loss": 0.2239,
"step": 797000
},
{
"epoch": 57.05,
"learning_rate": 2.147302904564315e-05,
"loss": 0.2109,
"step": 797500
},
{
"epoch": 57.09,
"learning_rate": 2.145514379739591e-05,
"loss": 0.2177,
"step": 798000
},
{
"epoch": 57.13,
"learning_rate": 2.1437258549148665e-05,
"loss": 0.2187,
"step": 798500
},
{
"epoch": 57.16,
"learning_rate": 2.1419373300901417e-05,
"loss": 0.2258,
"step": 799000
},
{
"epoch": 57.2,
"learning_rate": 2.1401488052654172e-05,
"loss": 0.2228,
"step": 799500
},
{
"epoch": 57.23,
"learning_rate": 2.1383602804406927e-05,
"loss": 0.2236,
"step": 800000
},
{
"epoch": 57.27,
"learning_rate": 2.1365717556159683e-05,
"loss": 0.2232,
"step": 800500
},
{
"epoch": 57.3,
"learning_rate": 2.1347832307912434e-05,
"loss": 0.2246,
"step": 801000
},
{
"epoch": 57.34,
"learning_rate": 2.132994705966519e-05,
"loss": 0.2273,
"step": 801500
},
{
"epoch": 57.38,
"learning_rate": 2.1312061811417945e-05,
"loss": 0.2282,
"step": 802000
},
{
"epoch": 57.41,
"learning_rate": 2.1294176563170697e-05,
"loss": 0.2269,
"step": 802500
},
{
"epoch": 57.45,
"learning_rate": 2.1276291314923452e-05,
"loss": 0.2298,
"step": 803000
},
{
"epoch": 57.48,
"learning_rate": 2.1258406066676207e-05,
"loss": 0.2284,
"step": 803500
},
{
"epoch": 57.52,
"learning_rate": 2.1240520818428962e-05,
"loss": 0.2275,
"step": 804000
},
{
"epoch": 57.55,
"learning_rate": 2.1222635570181714e-05,
"loss": 0.2373,
"step": 804500
},
{
"epoch": 57.59,
"learning_rate": 2.120475032193447e-05,
"loss": 0.23,
"step": 805000
},
{
"epoch": 57.63,
"learning_rate": 2.1186865073687224e-05,
"loss": 0.2297,
"step": 805500
},
{
"epoch": 57.66,
"learning_rate": 2.1168979825439976e-05,
"loss": 0.2306,
"step": 806000
},
{
"epoch": 57.7,
"learning_rate": 2.115109457719273e-05,
"loss": 0.2331,
"step": 806500
},
{
"epoch": 57.73,
"learning_rate": 2.1133209328945486e-05,
"loss": 0.2343,
"step": 807000
},
{
"epoch": 57.77,
"learning_rate": 2.111532408069824e-05,
"loss": 0.2298,
"step": 807500
},
{
"epoch": 57.81,
"learning_rate": 2.1097438832450993e-05,
"loss": 0.2313,
"step": 808000
},
{
"epoch": 57.84,
"learning_rate": 2.107955358420375e-05,
"loss": 0.2329,
"step": 808500
},
{
"epoch": 57.88,
"learning_rate": 2.1061668335956504e-05,
"loss": 0.232,
"step": 809000
},
{
"epoch": 57.91,
"learning_rate": 2.104378308770926e-05,
"loss": 0.2298,
"step": 809500
},
{
"epoch": 57.95,
"learning_rate": 2.1025897839462014e-05,
"loss": 0.2349,
"step": 810000
},
{
"epoch": 57.98,
"learning_rate": 2.100801259121477e-05,
"loss": 0.2345,
"step": 810500
},
{
"epoch": 58.02,
"learning_rate": 2.099012734296752e-05,
"loss": 0.2235,
"step": 811000
},
{
"epoch": 58.06,
"learning_rate": 2.0972242094720276e-05,
"loss": 0.2161,
"step": 811500
},
{
"epoch": 58.09,
"learning_rate": 2.095435684647303e-05,
"loss": 0.2149,
"step": 812000
},
{
"epoch": 58.13,
"learning_rate": 2.0936471598225783e-05,
"loss": 0.2213,
"step": 812500
},
{
"epoch": 58.16,
"learning_rate": 2.091858634997854e-05,
"loss": 0.2232,
"step": 813000
},
{
"epoch": 58.2,
"learning_rate": 2.0900701101731294e-05,
"loss": 0.221,
"step": 813500
},
{
"epoch": 58.23,
"learning_rate": 2.088281585348405e-05,
"loss": 0.2233,
"step": 814000
},
{
"epoch": 58.27,
"learning_rate": 2.08649306052368e-05,
"loss": 0.2231,
"step": 814500
},
{
"epoch": 58.31,
"learning_rate": 2.0847045356989556e-05,
"loss": 0.2225,
"step": 815000
},
{
"epoch": 58.34,
"learning_rate": 2.082916010874231e-05,
"loss": 0.2299,
"step": 815500
},
{
"epoch": 58.38,
"learning_rate": 2.0811274860495063e-05,
"loss": 0.2217,
"step": 816000
},
{
"epoch": 58.41,
"learning_rate": 2.0793389612247818e-05,
"loss": 0.2294,
"step": 816500
},
{
"epoch": 58.45,
"learning_rate": 2.0775504364000573e-05,
"loss": 0.2272,
"step": 817000
},
{
"epoch": 58.48,
"learning_rate": 2.075761911575333e-05,
"loss": 0.2274,
"step": 817500
},
{
"epoch": 58.52,
"learning_rate": 2.073973386750608e-05,
"loss": 0.2324,
"step": 818000
},
{
"epoch": 58.56,
"learning_rate": 2.0721848619258835e-05,
"loss": 0.2313,
"step": 818500
},
{
"epoch": 58.59,
"learning_rate": 2.070396337101159e-05,
"loss": 0.2285,
"step": 819000
},
{
"epoch": 58.63,
"learning_rate": 2.0686078122764346e-05,
"loss": 0.2287,
"step": 819500
},
{
"epoch": 58.66,
"learning_rate": 2.06681928745171e-05,
"loss": 0.225,
"step": 820000
},
{
"epoch": 58.7,
"learning_rate": 2.0650307626269856e-05,
"loss": 0.233,
"step": 820500
},
{
"epoch": 58.74,
"learning_rate": 2.0632422378022608e-05,
"loss": 0.232,
"step": 821000
},
{
"epoch": 58.77,
"learning_rate": 2.0614537129775363e-05,
"loss": 0.234,
"step": 821500
},
{
"epoch": 58.81,
"learning_rate": 2.0596651881528118e-05,
"loss": 0.232,
"step": 822000
},
{
"epoch": 58.84,
"learning_rate": 2.057876663328087e-05,
"loss": 0.2346,
"step": 822500
},
{
"epoch": 58.88,
"learning_rate": 2.0560881385033625e-05,
"loss": 0.2309,
"step": 823000
},
{
"epoch": 58.91,
"learning_rate": 2.054299613678638e-05,
"loss": 0.2332,
"step": 823500
},
{
"epoch": 58.95,
"learning_rate": 2.0525110888539136e-05,
"loss": 0.2293,
"step": 824000
},
{
"epoch": 58.99,
"learning_rate": 2.0507225640291887e-05,
"loss": 0.2305,
"step": 824500
},
{
"epoch": 59.02,
"learning_rate": 2.0489340392044643e-05,
"loss": 0.2231,
"step": 825000
},
{
"epoch": 59.06,
"learning_rate": 2.0471455143797398e-05,
"loss": 0.216,
"step": 825500
},
{
"epoch": 59.09,
"learning_rate": 2.045356989555015e-05,
"loss": 0.215,
"step": 826000
},
{
"epoch": 59.13,
"learning_rate": 2.0435684647302905e-05,
"loss": 0.2172,
"step": 826500
},
{
"epoch": 59.16,
"learning_rate": 2.041779939905566e-05,
"loss": 0.2196,
"step": 827000
},
{
"epoch": 59.2,
"learning_rate": 2.0399914150808415e-05,
"loss": 0.2188,
"step": 827500
},
{
"epoch": 59.24,
"learning_rate": 2.0382028902561167e-05,
"loss": 0.2201,
"step": 828000
},
{
"epoch": 59.27,
"learning_rate": 2.0364143654313922e-05,
"loss": 0.2238,
"step": 828500
},
{
"epoch": 59.31,
"learning_rate": 2.0346258406066677e-05,
"loss": 0.2251,
"step": 829000
},
{
"epoch": 59.34,
"learning_rate": 2.032837315781943e-05,
"loss": 0.2217,
"step": 829500
},
{
"epoch": 59.38,
"learning_rate": 2.0310487909572184e-05,
"loss": 0.2241,
"step": 830000
},
{
"epoch": 59.41,
"learning_rate": 2.0292602661324943e-05,
"loss": 0.2246,
"step": 830500
},
{
"epoch": 59.45,
"learning_rate": 2.0274717413077695e-05,
"loss": 0.2285,
"step": 831000
},
{
"epoch": 59.49,
"learning_rate": 2.025683216483045e-05,
"loss": 0.2288,
"step": 831500
},
{
"epoch": 59.52,
"learning_rate": 2.0238946916583205e-05,
"loss": 0.2267,
"step": 832000
},
{
"epoch": 59.56,
"learning_rate": 2.0221061668335957e-05,
"loss": 0.2266,
"step": 832500
},
{
"epoch": 59.59,
"learning_rate": 2.0203176420088712e-05,
"loss": 0.2269,
"step": 833000
},
{
"epoch": 59.63,
"learning_rate": 2.0185291171841467e-05,
"loss": 0.2297,
"step": 833500
},
{
"epoch": 59.67,
"learning_rate": 2.0167405923594222e-05,
"loss": 0.2321,
"step": 834000
},
{
"epoch": 59.7,
"learning_rate": 2.0149520675346974e-05,
"loss": 0.2337,
"step": 834500
},
{
"epoch": 59.74,
"learning_rate": 2.013163542709973e-05,
"loss": 0.2355,
"step": 835000
},
{
"epoch": 59.77,
"learning_rate": 2.0113750178852484e-05,
"loss": 0.2271,
"step": 835500
},
{
"epoch": 59.81,
"learning_rate": 2.0095864930605236e-05,
"loss": 0.2323,
"step": 836000
},
{
"epoch": 59.84,
"learning_rate": 2.007797968235799e-05,
"loss": 0.2307,
"step": 836500
},
{
"epoch": 59.88,
"learning_rate": 2.0060094434110747e-05,
"loss": 0.2296,
"step": 837000
},
{
"epoch": 59.92,
"learning_rate": 2.0042209185863502e-05,
"loss": 0.229,
"step": 837500
},
{
"epoch": 59.95,
"learning_rate": 2.0024323937616254e-05,
"loss": 0.2335,
"step": 838000
},
{
"epoch": 59.99,
"learning_rate": 2.000643868936901e-05,
"loss": 0.2288,
"step": 838500
},
{
"epoch": 60.02,
"learning_rate": 1.9988553441121764e-05,
"loss": 0.2223,
"step": 839000
},
{
"epoch": 60.06,
"learning_rate": 1.9970668192874516e-05,
"loss": 0.2112,
"step": 839500
},
{
"epoch": 60.09,
"learning_rate": 1.995278294462727e-05,
"loss": 0.2118,
"step": 840000
},
{
"epoch": 60.13,
"learning_rate": 1.9934897696380026e-05,
"loss": 0.2179,
"step": 840500
},
{
"epoch": 60.17,
"learning_rate": 1.991701244813278e-05,
"loss": 0.2205,
"step": 841000
},
{
"epoch": 60.2,
"learning_rate": 1.9899127199885536e-05,
"loss": 0.2214,
"step": 841500
},
{
"epoch": 60.24,
"learning_rate": 1.988124195163829e-05,
"loss": 0.2216,
"step": 842000
},
{
"epoch": 60.27,
"learning_rate": 1.9863356703391043e-05,
"loss": 0.2264,
"step": 842500
},
{
"epoch": 60.31,
"learning_rate": 1.98454714551438e-05,
"loss": 0.2233,
"step": 843000
},
{
"epoch": 60.34,
"learning_rate": 1.9827586206896554e-05,
"loss": 0.2307,
"step": 843500
},
{
"epoch": 60.38,
"learning_rate": 1.980970095864931e-05,
"loss": 0.2233,
"step": 844000
},
{
"epoch": 60.42,
"learning_rate": 1.979181571040206e-05,
"loss": 0.2245,
"step": 844500
},
{
"epoch": 60.45,
"learning_rate": 1.9773930462154816e-05,
"loss": 0.226,
"step": 845000
},
{
"epoch": 60.49,
"learning_rate": 1.975604521390757e-05,
"loss": 0.2275,
"step": 845500
},
{
"epoch": 60.52,
"learning_rate": 1.9738159965660323e-05,
"loss": 0.2276,
"step": 846000
},
{
"epoch": 60.56,
"learning_rate": 1.9720274717413078e-05,
"loss": 0.2228,
"step": 846500
},
{
"epoch": 60.6,
"learning_rate": 1.9702389469165833e-05,
"loss": 0.226,
"step": 847000
},
{
"epoch": 60.63,
"learning_rate": 1.968450422091859e-05,
"loss": 0.2277,
"step": 847500
},
{
"epoch": 60.67,
"learning_rate": 1.966661897267134e-05,
"loss": 0.2318,
"step": 848000
},
{
"epoch": 60.7,
"learning_rate": 1.9648733724424095e-05,
"loss": 0.2249,
"step": 848500
},
{
"epoch": 60.74,
"learning_rate": 1.963084847617685e-05,
"loss": 0.2243,
"step": 849000
},
{
"epoch": 60.77,
"learning_rate": 1.9612963227929602e-05,
"loss": 0.2293,
"step": 849500
},
{
"epoch": 60.81,
"learning_rate": 1.9595077979682358e-05,
"loss": 0.2277,
"step": 850000
},
{
"epoch": 60.85,
"learning_rate": 1.9577192731435113e-05,
"loss": 0.2305,
"step": 850500
},
{
"epoch": 60.88,
"learning_rate": 1.9559307483187868e-05,
"loss": 0.232,
"step": 851000
},
{
"epoch": 60.92,
"learning_rate": 1.954142223494062e-05,
"loss": 0.2284,
"step": 851500
},
{
"epoch": 60.95,
"learning_rate": 1.9523536986693378e-05,
"loss": 0.2268,
"step": 852000
},
{
"epoch": 60.99,
"learning_rate": 1.9505651738446133e-05,
"loss": 0.2282,
"step": 852500
},
{
"epoch": 61.02,
"learning_rate": 1.9487766490198885e-05,
"loss": 0.2163,
"step": 853000
},
{
"epoch": 61.06,
"learning_rate": 1.946988124195164e-05,
"loss": 0.2177,
"step": 853500
},
{
"epoch": 61.1,
"learning_rate": 1.9451995993704396e-05,
"loss": 0.2179,
"step": 854000
},
{
"epoch": 61.13,
"learning_rate": 1.9434110745457147e-05,
"loss": 0.2188,
"step": 854500
},
{
"epoch": 61.17,
"learning_rate": 1.9416225497209903e-05,
"loss": 0.2208,
"step": 855000
},
{
"epoch": 61.2,
"learning_rate": 1.9398340248962658e-05,
"loss": 0.2166,
"step": 855500
},
{
"epoch": 61.24,
"learning_rate": 1.938045500071541e-05,
"loss": 0.2196,
"step": 856000
},
{
"epoch": 61.27,
"learning_rate": 1.9362569752468165e-05,
"loss": 0.2162,
"step": 856500
},
{
"epoch": 61.31,
"learning_rate": 1.934468450422092e-05,
"loss": 0.2179,
"step": 857000
},
{
"epoch": 61.35,
"learning_rate": 1.9326799255973675e-05,
"loss": 0.2201,
"step": 857500
},
{
"epoch": 61.38,
"learning_rate": 1.9308914007726427e-05,
"loss": 0.2275,
"step": 858000
},
{
"epoch": 61.42,
"learning_rate": 1.9291028759479182e-05,
"loss": 0.2219,
"step": 858500
},
{
"epoch": 61.45,
"learning_rate": 1.9273143511231937e-05,
"loss": 0.223,
"step": 859000
},
{
"epoch": 61.49,
"learning_rate": 1.925525826298469e-05,
"loss": 0.2248,
"step": 859500
},
{
"epoch": 61.53,
"learning_rate": 1.9237373014737444e-05,
"loss": 0.2247,
"step": 860000
},
{
"epoch": 61.56,
"learning_rate": 1.92194877664902e-05,
"loss": 0.2284,
"step": 860500
},
{
"epoch": 61.6,
"learning_rate": 1.9201602518242955e-05,
"loss": 0.2274,
"step": 861000
},
{
"epoch": 61.63,
"learning_rate": 1.9183717269995706e-05,
"loss": 0.2285,
"step": 861500
},
{
"epoch": 61.67,
"learning_rate": 1.916583202174846e-05,
"loss": 0.2239,
"step": 862000
},
{
"epoch": 61.7,
"learning_rate": 1.9147946773501217e-05,
"loss": 0.2276,
"step": 862500
},
{
"epoch": 61.74,
"learning_rate": 1.9130061525253972e-05,
"loss": 0.2247,
"step": 863000
},
{
"epoch": 61.78,
"learning_rate": 1.9112176277006727e-05,
"loss": 0.2242,
"step": 863500
},
{
"epoch": 61.81,
"learning_rate": 1.9094291028759482e-05,
"loss": 0.2299,
"step": 864000
},
{
"epoch": 61.85,
"learning_rate": 1.9076405780512234e-05,
"loss": 0.2314,
"step": 864500
},
{
"epoch": 61.88,
"learning_rate": 1.905852053226499e-05,
"loss": 0.2283,
"step": 865000
},
{
"epoch": 61.92,
"learning_rate": 1.9040635284017744e-05,
"loss": 0.2293,
"step": 865500
},
{
"epoch": 61.95,
"learning_rate": 1.90227500357705e-05,
"loss": 0.2288,
"step": 866000
},
{
"epoch": 61.99,
"learning_rate": 1.900486478752325e-05,
"loss": 0.2341,
"step": 866500
},
{
"epoch": 62.03,
"learning_rate": 1.8986979539276007e-05,
"loss": 0.2178,
"step": 867000
},
{
"epoch": 62.06,
"learning_rate": 1.8969094291028762e-05,
"loss": 0.2113,
"step": 867500
},
{
"epoch": 62.1,
"learning_rate": 1.8951209042781514e-05,
"loss": 0.2171,
"step": 868000
},
{
"epoch": 62.13,
"learning_rate": 1.893332379453427e-05,
"loss": 0.217,
"step": 868500
},
{
"epoch": 62.17,
"learning_rate": 1.8915438546287024e-05,
"loss": 0.2164,
"step": 869000
},
{
"epoch": 62.2,
"learning_rate": 1.8897553298039776e-05,
"loss": 0.2217,
"step": 869500
},
{
"epoch": 62.24,
"learning_rate": 1.887966804979253e-05,
"loss": 0.2215,
"step": 870000
}
],
"max_steps": 1397800,
"num_train_epochs": 100,
"total_flos": 6.2780322161664e+16,
"trial_name": null,
"trial_params": null
}