SentencePieceBPE-CC100-FR / trainer_state.json
qanastek's picture
Upload 41 files
95cd968
raw
history blame contribute delete
No virus
23.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 26.0,
"global_step": 95628,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-09,
"loss": 10.5649,
"step": 1
},
{
"epoch": 0.14,
"learning_rate": 2.5e-06,
"loss": 9.6075,
"step": 500
},
{
"epoch": 0.27,
"learning_rate": 5e-06,
"loss": 8.0262,
"step": 1000
},
{
"epoch": 0.41,
"learning_rate": 7.5e-06,
"loss": 6.9785,
"step": 1500
},
{
"epoch": 0.54,
"learning_rate": 1e-05,
"loss": 6.6731,
"step": 2000
},
{
"epoch": 0.68,
"learning_rate": 1.25e-05,
"loss": 6.5046,
"step": 2500
},
{
"epoch": 0.82,
"learning_rate": 1.5e-05,
"loss": 6.3911,
"step": 3000
},
{
"epoch": 0.95,
"learning_rate": 1.75e-05,
"loss": 6.2957,
"step": 3500
},
{
"epoch": 1.09,
"learning_rate": 2e-05,
"loss": 6.2241,
"step": 4000
},
{
"epoch": 1.22,
"learning_rate": 2.25e-05,
"loss": 6.1595,
"step": 4500
},
{
"epoch": 1.36,
"learning_rate": 2.5e-05,
"loss": 6.1063,
"step": 5000
},
{
"epoch": 1.5,
"learning_rate": 2.7500000000000004e-05,
"loss": 6.0549,
"step": 5500
},
{
"epoch": 1.63,
"learning_rate": 3e-05,
"loss": 6.0125,
"step": 6000
},
{
"epoch": 1.77,
"learning_rate": 3.2500000000000004e-05,
"loss": 5.9775,
"step": 6500
},
{
"epoch": 1.9,
"learning_rate": 3.5e-05,
"loss": 5.9469,
"step": 7000
},
{
"epoch": 2.04,
"learning_rate": 3.7500000000000003e-05,
"loss": 5.9122,
"step": 7500
},
{
"epoch": 2.18,
"learning_rate": 4e-05,
"loss": 5.8878,
"step": 8000
},
{
"epoch": 2.31,
"learning_rate": 4.2495e-05,
"loss": 5.8635,
"step": 8500
},
{
"epoch": 2.45,
"learning_rate": 4.4995000000000005e-05,
"loss": 5.838,
"step": 9000
},
{
"epoch": 2.58,
"learning_rate": 4.7495e-05,
"loss": 5.8177,
"step": 9500
},
{
"epoch": 2.72,
"learning_rate": 4.9995000000000005e-05,
"loss": 5.8012,
"step": 10000
},
{
"epoch": 2.85,
"learning_rate": 4.99829592116069e-05,
"loss": 5.7852,
"step": 10500
},
{
"epoch": 2.99,
"learning_rate": 4.9965849986312624e-05,
"loss": 5.768,
"step": 11000
},
{
"epoch": 3.13,
"learning_rate": 4.9948740761018345e-05,
"loss": 5.7555,
"step": 11500
},
{
"epoch": 3.26,
"learning_rate": 4.9931631535724066e-05,
"loss": 5.742,
"step": 12000
},
{
"epoch": 3.4,
"learning_rate": 4.991452231042979e-05,
"loss": 5.728,
"step": 12500
},
{
"epoch": 3.53,
"learning_rate": 4.98974473035861e-05,
"loss": 5.7197,
"step": 13000
},
{
"epoch": 3.67,
"learning_rate": 4.988033807829182e-05,
"loss": 5.7113,
"step": 13500
},
{
"epoch": 3.81,
"learning_rate": 4.986322885299754e-05,
"loss": 5.7034,
"step": 14000
},
{
"epoch": 3.94,
"learning_rate": 4.984611962770326e-05,
"loss": 5.6963,
"step": 14500
},
{
"epoch": 4.08,
"learning_rate": 4.982904462085957e-05,
"loss": 5.6869,
"step": 15000
},
{
"epoch": 4.21,
"learning_rate": 4.981193539556529e-05,
"loss": 5.6802,
"step": 15500
},
{
"epoch": 4.35,
"learning_rate": 4.9794826170271015e-05,
"loss": 5.6739,
"step": 16000
},
{
"epoch": 4.49,
"learning_rate": 4.977771694497673e-05,
"loss": 5.6674,
"step": 16500
},
{
"epoch": 4.62,
"learning_rate": 4.9760641938133046e-05,
"loss": 5.6611,
"step": 17000
},
{
"epoch": 4.76,
"learning_rate": 4.974353271283876e-05,
"loss": 5.6567,
"step": 17500
},
{
"epoch": 4.89,
"learning_rate": 4.972642348754449e-05,
"loss": 5.6491,
"step": 18000
},
{
"epoch": 5.03,
"learning_rate": 4.97093142622502e-05,
"loss": 5.6447,
"step": 18500
},
{
"epoch": 5.17,
"learning_rate": 4.969223925540652e-05,
"loss": 5.6407,
"step": 19000
},
{
"epoch": 5.3,
"learning_rate": 4.9675130030112235e-05,
"loss": 5.6356,
"step": 19500
},
{
"epoch": 5.44,
"learning_rate": 4.965802080481796e-05,
"loss": 5.6318,
"step": 20000
},
{
"epoch": 5.57,
"learning_rate": 4.964091157952368e-05,
"loss": 5.6285,
"step": 20500
},
{
"epoch": 5.71,
"learning_rate": 4.9623802354229405e-05,
"loss": 5.6227,
"step": 21000
},
{
"epoch": 5.85,
"learning_rate": 4.960672734738571e-05,
"loss": 5.6178,
"step": 21500
},
{
"epoch": 5.98,
"learning_rate": 4.958961812209144e-05,
"loss": 5.6137,
"step": 22000
},
{
"epoch": 6.12,
"learning_rate": 4.957250889679715e-05,
"loss": 5.6102,
"step": 22500
},
{
"epoch": 6.25,
"learning_rate": 4.955539967150288e-05,
"loss": 5.6042,
"step": 23000
},
{
"epoch": 6.39,
"learning_rate": 4.953832466465918e-05,
"loss": 5.6023,
"step": 23500
},
{
"epoch": 6.53,
"learning_rate": 4.952121543936491e-05,
"loss": 5.6011,
"step": 24000
},
{
"epoch": 6.66,
"learning_rate": 4.9504106214070626e-05,
"loss": 5.5958,
"step": 24500
},
{
"epoch": 6.8,
"learning_rate": 4.9486996988776353e-05,
"loss": 5.5914,
"step": 25000
},
{
"epoch": 6.93,
"learning_rate": 4.946992198193266e-05,
"loss": 5.5901,
"step": 25500
},
{
"epoch": 7.07,
"learning_rate": 4.9452812756638385e-05,
"loss": 5.5889,
"step": 26000
},
{
"epoch": 7.21,
"learning_rate": 4.94357035313441e-05,
"loss": 5.5859,
"step": 26500
},
{
"epoch": 7.34,
"learning_rate": 4.941862852450042e-05,
"loss": 5.5826,
"step": 27000
},
{
"epoch": 7.48,
"learning_rate": 4.940151929920613e-05,
"loss": 5.5774,
"step": 27500
},
{
"epoch": 7.61,
"learning_rate": 4.938441007391186e-05,
"loss": 5.5748,
"step": 28000
},
{
"epoch": 7.75,
"learning_rate": 4.9367300848617574e-05,
"loss": 5.5737,
"step": 28500
},
{
"epoch": 7.88,
"learning_rate": 4.93501916233233e-05,
"loss": 5.5726,
"step": 29000
},
{
"epoch": 8.02,
"learning_rate": 4.9333082398029016e-05,
"loss": 5.5696,
"step": 29500
},
{
"epoch": 8.16,
"learning_rate": 4.9315973172734744e-05,
"loss": 5.5651,
"step": 30000
},
{
"epoch": 8.29,
"learning_rate": 4.929886394744046e-05,
"loss": 5.562,
"step": 30500
},
{
"epoch": 8.43,
"learning_rate": 4.9281788940596776e-05,
"loss": 5.5569,
"step": 31000
},
{
"epoch": 8.56,
"learning_rate": 4.926467971530249e-05,
"loss": 5.5546,
"step": 31500
},
{
"epoch": 8.7,
"learning_rate": 4.924760470845881e-05,
"loss": 5.5564,
"step": 32000
},
{
"epoch": 8.84,
"learning_rate": 4.923049548316452e-05,
"loss": 5.5569,
"step": 32500
},
{
"epoch": 8.97,
"learning_rate": 4.921338625787025e-05,
"loss": 5.5486,
"step": 33000
},
{
"epoch": 9.11,
"learning_rate": 4.9196277032575965e-05,
"loss": 5.5491,
"step": 33500
},
{
"epoch": 9.24,
"learning_rate": 4.917916780728169e-05,
"loss": 5.5471,
"step": 34000
},
{
"epoch": 9.38,
"learning_rate": 4.916205858198741e-05,
"loss": 5.4798,
"step": 34500
},
{
"epoch": 9.52,
"learning_rate": 4.9144949356693135e-05,
"loss": 5.2155,
"step": 35000
},
{
"epoch": 9.65,
"learning_rate": 4.912784013139885e-05,
"loss": 5.0368,
"step": 35500
},
{
"epoch": 9.79,
"learning_rate": 4.911073090610458e-05,
"loss": 4.8597,
"step": 36000
},
{
"epoch": 9.92,
"learning_rate": 4.909365589926088e-05,
"loss": 4.6933,
"step": 36500
},
{
"epoch": 10.06,
"learning_rate": 4.907654667396661e-05,
"loss": 4.5374,
"step": 37000
},
{
"epoch": 10.2,
"learning_rate": 4.905943744867232e-05,
"loss": 4.3855,
"step": 37500
},
{
"epoch": 10.33,
"learning_rate": 4.904232822337805e-05,
"loss": 4.2293,
"step": 38000
},
{
"epoch": 10.47,
"learning_rate": 4.9025253216534355e-05,
"loss": 4.0771,
"step": 38500
},
{
"epoch": 10.6,
"learning_rate": 4.9008143991240076e-05,
"loss": 3.9247,
"step": 39000
},
{
"epoch": 10.74,
"learning_rate": 4.89910347659458e-05,
"loss": 3.7601,
"step": 39500
},
{
"epoch": 10.88,
"learning_rate": 4.897392554065152e-05,
"loss": 3.3989,
"step": 40000
},
{
"epoch": 11.01,
"learning_rate": 4.895685053380783e-05,
"loss": 2.6924,
"step": 40500
},
{
"epoch": 11.15,
"learning_rate": 4.893974130851355e-05,
"loss": 2.3507,
"step": 41000
},
{
"epoch": 11.28,
"learning_rate": 4.892263208321927e-05,
"loss": 2.1874,
"step": 41500
},
{
"epoch": 11.42,
"learning_rate": 4.890555707637558e-05,
"loss": 2.0827,
"step": 42000
},
{
"epoch": 11.56,
"learning_rate": 4.8888447851081304e-05,
"loss": 2.001,
"step": 42500
},
{
"epoch": 11.69,
"learning_rate": 4.8871338625787025e-05,
"loss": 1.9375,
"step": 43000
},
{
"epoch": 11.83,
"learning_rate": 4.8854229400492746e-05,
"loss": 1.8864,
"step": 43500
},
{
"epoch": 11.96,
"learning_rate": 4.8837154393649057e-05,
"loss": 1.8373,
"step": 44000
},
{
"epoch": 12.1,
"learning_rate": 4.882004516835478e-05,
"loss": 1.7978,
"step": 44500
},
{
"epoch": 12.23,
"learning_rate": 4.88029359430605e-05,
"loss": 1.7603,
"step": 45000
},
{
"epoch": 12.37,
"learning_rate": 4.878582671776622e-05,
"loss": 1.7258,
"step": 45500
},
{
"epoch": 12.51,
"learning_rate": 4.876875171092253e-05,
"loss": 1.6949,
"step": 46000
},
{
"epoch": 12.64,
"learning_rate": 4.875167670407884e-05,
"loss": 1.6671,
"step": 46500
},
{
"epoch": 12.78,
"learning_rate": 4.873456747878456e-05,
"loss": 1.6404,
"step": 47000
},
{
"epoch": 12.91,
"learning_rate": 4.8717458253490284e-05,
"loss": 1.6187,
"step": 47500
},
{
"epoch": 13.05,
"learning_rate": 4.8700349028196005e-05,
"loss": 1.5947,
"step": 48000
},
{
"epoch": 13.19,
"learning_rate": 4.8683239802901726e-05,
"loss": 1.5738,
"step": 48500
},
{
"epoch": 13.32,
"learning_rate": 4.866613057760745e-05,
"loss": 1.5511,
"step": 49000
},
{
"epoch": 13.46,
"learning_rate": 4.864902135231317e-05,
"loss": 1.5372,
"step": 49500
},
{
"epoch": 13.59,
"learning_rate": 4.863191212701889e-05,
"loss": 1.5169,
"step": 50000
},
{
"epoch": 13.73,
"learning_rate": 4.86148371201752e-05,
"loss": 1.5009,
"step": 50500
},
{
"epoch": 13.87,
"learning_rate": 4.859772789488092e-05,
"loss": 1.4868,
"step": 51000
},
{
"epoch": 14.0,
"learning_rate": 4.858061866958664e-05,
"loss": 1.4705,
"step": 51500
},
{
"epoch": 14.14,
"learning_rate": 4.8563509444292364e-05,
"loss": 1.4536,
"step": 52000
},
{
"epoch": 14.27,
"learning_rate": 4.8546400218998085e-05,
"loss": 1.4392,
"step": 52500
},
{
"epoch": 14.41,
"learning_rate": 4.8529359430604985e-05,
"loss": 1.4263,
"step": 53000
},
{
"epoch": 14.55,
"learning_rate": 4.8512250205310706e-05,
"loss": 1.4116,
"step": 53500
},
{
"epoch": 14.68,
"learning_rate": 4.849514098001643e-05,
"loss": 1.3983,
"step": 54000
},
{
"epoch": 14.82,
"learning_rate": 4.847803175472215e-05,
"loss": 1.3863,
"step": 54500
},
{
"epoch": 14.95,
"learning_rate": 4.846092252942787e-05,
"loss": 1.3787,
"step": 55000
},
{
"epoch": 15.09,
"learning_rate": 4.844381330413359e-05,
"loss": 1.3675,
"step": 55500
},
{
"epoch": 15.23,
"learning_rate": 4.84267382972899e-05,
"loss": 1.3571,
"step": 56000
},
{
"epoch": 15.36,
"learning_rate": 4.840962907199562e-05,
"loss": 1.3476,
"step": 56500
},
{
"epoch": 15.5,
"learning_rate": 4.8392519846701344e-05,
"loss": 1.3398,
"step": 57000
},
{
"epoch": 15.63,
"learning_rate": 4.8375410621407065e-05,
"loss": 1.3287,
"step": 57500
},
{
"epoch": 15.77,
"learning_rate": 4.8358301396112786e-05,
"loss": 1.3205,
"step": 58000
},
{
"epoch": 15.91,
"learning_rate": 4.834119217081851e-05,
"loss": 1.3132,
"step": 58500
},
{
"epoch": 16.04,
"learning_rate": 4.832408294552423e-05,
"loss": 1.3066,
"step": 59000
},
{
"epoch": 16.18,
"learning_rate": 4.830697372022995e-05,
"loss": 1.2967,
"step": 59500
},
{
"epoch": 16.31,
"learning_rate": 4.828986449493567e-05,
"loss": 1.2888,
"step": 60000
},
{
"epoch": 16.45,
"learning_rate": 4.827278948809198e-05,
"loss": 1.284,
"step": 60500
},
{
"epoch": 16.59,
"learning_rate": 4.82556802627977e-05,
"loss": 1.2765,
"step": 61000
},
{
"epoch": 16.72,
"learning_rate": 4.8238571037503424e-05,
"loss": 1.2695,
"step": 61500
},
{
"epoch": 16.86,
"learning_rate": 4.8221461812209145e-05,
"loss": 1.2617,
"step": 62000
},
{
"epoch": 16.99,
"learning_rate": 4.8204386805365456e-05,
"loss": 1.2541,
"step": 62500
},
{
"epoch": 17.13,
"learning_rate": 4.818727758007118e-05,
"loss": 1.2493,
"step": 63000
},
{
"epoch": 17.26,
"learning_rate": 4.81701683547769e-05,
"loss": 1.2412,
"step": 63500
},
{
"epoch": 17.4,
"learning_rate": 4.815305912948262e-05,
"loss": 1.2379,
"step": 64000
},
{
"epoch": 17.54,
"learning_rate": 4.813594990418834e-05,
"loss": 1.2318,
"step": 64500
},
{
"epoch": 17.67,
"learning_rate": 4.811887489734465e-05,
"loss": 1.2239,
"step": 65000
},
{
"epoch": 17.81,
"learning_rate": 4.810176567205037e-05,
"loss": 1.2199,
"step": 65500
},
{
"epoch": 17.94,
"learning_rate": 4.808465644675609e-05,
"loss": 1.2146,
"step": 66000
},
{
"epoch": 18.08,
"learning_rate": 4.8067581439912404e-05,
"loss": 1.2083,
"step": 66500
},
{
"epoch": 18.22,
"learning_rate": 4.8050472214618125e-05,
"loss": 1.2046,
"step": 67000
},
{
"epoch": 18.35,
"learning_rate": 4.8033362989323846e-05,
"loss": 1.1977,
"step": 67500
},
{
"epoch": 18.49,
"learning_rate": 4.801625376402957e-05,
"loss": 1.1926,
"step": 68000
},
{
"epoch": 18.62,
"learning_rate": 4.799914453873529e-05,
"loss": 1.1862,
"step": 68500
},
{
"epoch": 18.76,
"learning_rate": 4.798203531344101e-05,
"loss": 1.1821,
"step": 69000
},
{
"epoch": 18.9,
"learning_rate": 4.796496030659732e-05,
"loss": 1.1815,
"step": 69500
},
{
"epoch": 19.03,
"learning_rate": 4.794785108130304e-05,
"loss": 1.1741,
"step": 70000
},
{
"epoch": 19.17,
"learning_rate": 4.793074185600876e-05,
"loss": 1.1687,
"step": 70500
},
{
"epoch": 19.3,
"learning_rate": 4.7913632630714484e-05,
"loss": 1.166,
"step": 71000
},
{
"epoch": 19.44,
"learning_rate": 4.7896557623870795e-05,
"loss": 1.1619,
"step": 71500
},
{
"epoch": 19.58,
"learning_rate": 4.7879448398576516e-05,
"loss": 1.1554,
"step": 72000
},
{
"epoch": 19.71,
"learning_rate": 4.786233917328223e-05,
"loss": 1.1516,
"step": 72500
},
{
"epoch": 19.85,
"learning_rate": 4.784522994798796e-05,
"loss": 1.1487,
"step": 73000
},
{
"epoch": 19.98,
"learning_rate": 4.782815494114426e-05,
"loss": 1.1398,
"step": 73500
},
{
"epoch": 20.12,
"learning_rate": 4.781104571584999e-05,
"loss": 1.1383,
"step": 74000
},
{
"epoch": 20.26,
"learning_rate": 4.7793936490555704e-05,
"loss": 1.1351,
"step": 74500
},
{
"epoch": 20.39,
"learning_rate": 4.777682726526143e-05,
"loss": 1.1313,
"step": 75000
},
{
"epoch": 20.53,
"learning_rate": 4.7759752258417736e-05,
"loss": 1.1274,
"step": 75500
},
{
"epoch": 20.66,
"learning_rate": 4.7742643033123464e-05,
"loss": 1.1229,
"step": 76000
},
{
"epoch": 20.8,
"learning_rate": 4.772553380782918e-05,
"loss": 1.1199,
"step": 76500
},
{
"epoch": 20.94,
"learning_rate": 4.7708424582534906e-05,
"loss": 1.1179,
"step": 77000
},
{
"epoch": 21.07,
"learning_rate": 4.769134957569121e-05,
"loss": 1.1129,
"step": 77500
},
{
"epoch": 21.21,
"learning_rate": 4.767424035039694e-05,
"loss": 1.1104,
"step": 78000
},
{
"epoch": 21.34,
"learning_rate": 4.765713112510265e-05,
"loss": 1.1044,
"step": 78500
},
{
"epoch": 21.48,
"learning_rate": 4.764002189980838e-05,
"loss": 1.1028,
"step": 79000
},
{
"epoch": 21.62,
"learning_rate": 4.76229126745141e-05,
"loss": 1.0974,
"step": 79500
},
{
"epoch": 21.75,
"learning_rate": 4.760580344921982e-05,
"loss": 1.0953,
"step": 80000
},
{
"epoch": 21.89,
"learning_rate": 4.7588694223925544e-05,
"loss": 1.0926,
"step": 80500
},
{
"epoch": 22.02,
"learning_rate": 4.7571584998631265e-05,
"loss": 1.0895,
"step": 81000
},
{
"epoch": 22.16,
"learning_rate": 4.7554475773336986e-05,
"loss": 1.0855,
"step": 81500
},
{
"epoch": 22.29,
"learning_rate": 4.75374007664933e-05,
"loss": 1.0816,
"step": 82000
},
{
"epoch": 22.43,
"learning_rate": 4.752029154119902e-05,
"loss": 1.08,
"step": 82500
},
{
"epoch": 22.57,
"learning_rate": 4.750318231590474e-05,
"loss": 1.0775,
"step": 83000
},
{
"epoch": 22.7,
"learning_rate": 4.748610730906104e-05,
"loss": 1.0736,
"step": 83500
},
{
"epoch": 22.84,
"learning_rate": 4.746899808376677e-05,
"loss": 1.0715,
"step": 84000
},
{
"epoch": 22.97,
"learning_rate": 4.745188885847249e-05,
"loss": 1.0663,
"step": 84500
},
{
"epoch": 23.11,
"learning_rate": 4.7434779633178214e-05,
"loss": 1.0654,
"step": 85000
},
{
"epoch": 23.25,
"learning_rate": 4.741770462633452e-05,
"loss": 1.0603,
"step": 85500
},
{
"epoch": 23.38,
"learning_rate": 4.7400595401040245e-05,
"loss": 1.061,
"step": 86000
},
{
"epoch": 23.52,
"learning_rate": 4.7383486175745967e-05,
"loss": 1.0583,
"step": 86500
},
{
"epoch": 23.65,
"learning_rate": 4.736637695045169e-05,
"loss": 1.0546,
"step": 87000
},
{
"epoch": 23.79,
"learning_rate": 4.734926772515741e-05,
"loss": 1.0525,
"step": 87500
},
{
"epoch": 23.93,
"learning_rate": 4.733215849986313e-05,
"loss": 1.0493,
"step": 88000
},
{
"epoch": 24.06,
"learning_rate": 4.731508349301944e-05,
"loss": 1.0467,
"step": 88500
},
{
"epoch": 24.2,
"learning_rate": 4.729797426772516e-05,
"loss": 1.0433,
"step": 89000
},
{
"epoch": 24.33,
"learning_rate": 4.728086504243088e-05,
"loss": 1.0436,
"step": 89500
},
{
"epoch": 24.47,
"learning_rate": 4.7263790035587194e-05,
"loss": 1.0389,
"step": 90000
},
{
"epoch": 24.61,
"learning_rate": 4.7246680810292915e-05,
"loss": 1.0378,
"step": 90500
},
{
"epoch": 24.74,
"learning_rate": 4.7229571584998636e-05,
"loss": 1.0358,
"step": 91000
},
{
"epoch": 24.88,
"learning_rate": 4.721246235970436e-05,
"loss": 1.0338,
"step": 91500
},
{
"epoch": 25.01,
"learning_rate": 4.719535313441008e-05,
"loss": 1.0303,
"step": 92000
},
{
"epoch": 25.15,
"learning_rate": 4.71782439091158e-05,
"loss": 1.0277,
"step": 92500
},
{
"epoch": 25.29,
"learning_rate": 4.716113468382152e-05,
"loss": 1.0288,
"step": 93000
},
{
"epoch": 25.42,
"learning_rate": 4.714402545852724e-05,
"loss": 1.0229,
"step": 93500
},
{
"epoch": 25.56,
"learning_rate": 4.7126950451683546e-05,
"loss": 1.0254,
"step": 94000
},
{
"epoch": 25.69,
"learning_rate": 4.7109841226389274e-05,
"loss": 1.0194,
"step": 94500
},
{
"epoch": 25.83,
"learning_rate": 4.709273200109499e-05,
"loss": 1.0182,
"step": 95000
},
{
"epoch": 25.97,
"learning_rate": 4.7075622775800716e-05,
"loss": 1.0175,
"step": 95500
}
],
"max_steps": 1471200,
"num_train_epochs": 400,
"total_flos": 2.5771642200931697e+19,
"trial_name": null,
"trial_params": null
}