Bingsu's picture
Training in progress, step 90000
d268d93
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3867640739149119,
"global_step": 90000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.0004211081201405e-05,
"loss": 3.9197,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 4.0016844250917146e-05,
"loss": 3.9108,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 4.003789928748371e-05,
"loss": 3.8655,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 4.006737582146571e-05,
"loss": 3.8602,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 4.010527333566261e-05,
"loss": 3.8478,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.0151591165118474e-05,
"loss": 3.8608,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 4.020632849713237e-05,
"loss": 3.8431,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 4.0269484371273996e-05,
"loss": 3.8447,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 4.034105767939918e-05,
"loss": 3.8422,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 4.0421047165670686e-05,
"loss": 3.8019,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.050945142657905e-05,
"loss": 3.8109,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 4.0606268910968035e-05,
"loss": 3.8192,
"step": 2400
},
{
"epoch": 0.01,
"learning_rate": 4.071149792006154e-05,
"loss": 3.8037,
"step": 2600
},
{
"epoch": 0.01,
"learning_rate": 4.082513660749298e-05,
"loss": 3.8074,
"step": 2800
},
{
"epoch": 0.01,
"learning_rate": 4.0947182979338874e-05,
"loss": 3.801,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.1077634894152375e-05,
"loss": 3.786,
"step": 3200
},
{
"epoch": 0.01,
"learning_rate": 4.121649006300163e-05,
"loss": 3.7799,
"step": 3400
},
{
"epoch": 0.02,
"learning_rate": 4.136374604951046e-05,
"loss": 3.7721,
"step": 3600
},
{
"epoch": 0.02,
"learning_rate": 4.1519400269899535e-05,
"loss": 3.7829,
"step": 3800
},
{
"epoch": 0.02,
"learning_rate": 4.1683449993033545e-05,
"loss": 3.7922,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 4.185589234046783e-05,
"loss": 3.7686,
"step": 4200
},
{
"epoch": 0.02,
"learning_rate": 4.203672428649923e-05,
"loss": 3.7742,
"step": 4400
},
{
"epoch": 0.02,
"learning_rate": 4.2225942658219505e-05,
"loss": 3.753,
"step": 4600
},
{
"epoch": 0.02,
"learning_rate": 4.242354413557057e-05,
"loss": 3.7663,
"step": 4800
},
{
"epoch": 0.02,
"learning_rate": 4.262952525140289e-05,
"loss": 3.7589,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.2843882391536666e-05,
"loss": 3.7652,
"step": 5200
},
{
"epoch": 0.02,
"learning_rate": 4.306661179482435e-05,
"loss": 3.7618,
"step": 5400
},
{
"epoch": 0.02,
"learning_rate": 4.329770955321802e-05,
"loss": 3.7441,
"step": 5600
},
{
"epoch": 0.02,
"learning_rate": 4.3537171611836355e-05,
"loss": 3.7364,
"step": 5800
},
{
"epoch": 0.03,
"learning_rate": 4.3784993769037384e-05,
"loss": 3.7501,
"step": 6000
},
{
"epoch": 0.03,
"learning_rate": 4.404117167649071e-05,
"loss": 3.7363,
"step": 6200
},
{
"epoch": 0.03,
"learning_rate": 4.43057008392547e-05,
"loss": 3.7279,
"step": 6400
},
{
"epoch": 0.03,
"learning_rate": 4.457857661585541e-05,
"loss": 3.7158,
"step": 6600
},
{
"epoch": 0.03,
"learning_rate": 4.485979421836779e-05,
"loss": 3.74,
"step": 6800
},
{
"epoch": 0.03,
"learning_rate": 4.514934871249906e-05,
"loss": 3.7441,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 4.544723501767687e-05,
"loss": 3.7141,
"step": 7200
},
{
"epoch": 0.03,
"learning_rate": 4.575344790713656e-05,
"loss": 3.7264,
"step": 7400
},
{
"epoch": 0.03,
"learning_rate": 4.6067982008014406e-05,
"loss": 3.7221,
"step": 7600
},
{
"epoch": 0.03,
"learning_rate": 4.639083180144098e-05,
"loss": 3.7363,
"step": 7800
},
{
"epoch": 0.03,
"learning_rate": 4.672199162263839e-05,
"loss": 3.73,
"step": 8000
},
{
"epoch": 0.04,
"learning_rate": 4.706145566101981e-05,
"loss": 3.7205,
"step": 8200
},
{
"epoch": 0.04,
"learning_rate": 4.740921796029065e-05,
"loss": 3.7192,
"step": 8400
},
{
"epoch": 0.04,
"learning_rate": 4.776527241855393e-05,
"loss": 3.7095,
"step": 8600
},
{
"epoch": 0.04,
"learning_rate": 4.812961278841707e-05,
"loss": 3.7311,
"step": 8800
},
{
"epoch": 0.04,
"learning_rate": 4.85022326771014e-05,
"loss": 3.7031,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 4.888312554655438e-05,
"loss": 3.6934,
"step": 9200
},
{
"epoch": 0.04,
"learning_rate": 4.92722847135643e-05,
"loss": 3.7218,
"step": 9400
},
{
"epoch": 0.04,
"learning_rate": 4.9669703349877704e-05,
"loss": 3.711,
"step": 9600
},
{
"epoch": 0.04,
"learning_rate": 5.0075374482318863e-05,
"loss": 3.7002,
"step": 9800
},
{
"epoch": 0.04,
"learning_rate": 5.0489290992912625e-05,
"loss": 3.698,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 5.0911445619008545e-05,
"loss": 3.7097,
"step": 10200
},
{
"epoch": 0.04,
"learning_rate": 5.1341830953409266e-05,
"loss": 3.7012,
"step": 10400
},
{
"epoch": 0.05,
"learning_rate": 5.1780439444499746e-05,
"loss": 3.6847,
"step": 10600
},
{
"epoch": 0.05,
"learning_rate": 5.2227263396380295e-05,
"loss": 3.7072,
"step": 10800
},
{
"epoch": 0.05,
"learning_rate": 5.2682294969000907e-05,
"loss": 3.7052,
"step": 11000
},
{
"epoch": 0.05,
"learning_rate": 5.31455261782996e-05,
"loss": 3.6917,
"step": 11200
},
{
"epoch": 0.05,
"learning_rate": 5.361694889634207e-05,
"loss": 3.6951,
"step": 11400
},
{
"epoch": 0.05,
"learning_rate": 5.409655485146426e-05,
"loss": 3.6798,
"step": 11600
},
{
"epoch": 0.05,
"learning_rate": 5.458433562841795e-05,
"loss": 3.6888,
"step": 11800
},
{
"epoch": 0.05,
"learning_rate": 5.50802826685176e-05,
"loss": 3.685,
"step": 12000
},
{
"epoch": 0.05,
"learning_rate": 5.558438726979166e-05,
"loss": 3.6653,
"step": 12200
},
{
"epoch": 0.05,
"learning_rate": 5.6096640587133956e-05,
"loss": 3.6933,
"step": 12400
},
{
"epoch": 0.05,
"learning_rate": 5.661703363246001e-05,
"loss": 3.685,
"step": 12600
},
{
"epoch": 0.06,
"learning_rate": 5.714555727486404e-05,
"loss": 3.6799,
"step": 12800
},
{
"epoch": 0.06,
"learning_rate": 5.768220224077955e-05,
"loss": 3.656,
"step": 13000
},
{
"epoch": 0.06,
"learning_rate": 5.822695911414171e-05,
"loss": 3.675,
"step": 13200
},
{
"epoch": 0.06,
"learning_rate": 5.877981833655294e-05,
"loss": 3.6582,
"step": 13400
},
{
"epoch": 0.06,
"learning_rate": 5.934077020745062e-05,
"loss": 3.7009,
"step": 13600
},
{
"epoch": 0.06,
"learning_rate": 5.990980488427666e-05,
"loss": 3.6732,
"step": 13800
},
{
"epoch": 0.06,
"learning_rate": 6.048691238265099e-05,
"loss": 3.6509,
"step": 14000
},
{
"epoch": 0.06,
"learning_rate": 6.107208257654653e-05,
"loss": 3.6353,
"step": 14200
},
{
"epoch": 0.06,
"learning_rate": 6.166530519846635e-05,
"loss": 3.6662,
"step": 14400
},
{
"epoch": 0.06,
"learning_rate": 6.226656983962476e-05,
"loss": 3.6634,
"step": 14600
},
{
"epoch": 0.06,
"learning_rate": 6.287586595012898e-05,
"loss": 3.6607,
"step": 14800
},
{
"epoch": 0.06,
"learning_rate": 6.349318283916478e-05,
"loss": 3.654,
"step": 15000
},
{
"epoch": 0.07,
"learning_rate": 6.411850967518416e-05,
"loss": 3.6465,
"step": 15200
},
{
"epoch": 0.07,
"learning_rate": 6.475183548609517e-05,
"loss": 3.6527,
"step": 15400
},
{
"epoch": 0.07,
"learning_rate": 6.539314915945428e-05,
"loss": 3.6401,
"step": 15600
},
{
"epoch": 0.07,
"learning_rate": 6.604243944266183e-05,
"loss": 3.6453,
"step": 15800
},
{
"epoch": 0.07,
"learning_rate": 6.669969494315882e-05,
"loss": 3.6364,
"step": 16000
},
{
"epoch": 0.07,
"learning_rate": 6.736490412862753e-05,
"loss": 3.6379,
"step": 16200
},
{
"epoch": 0.07,
"learning_rate": 6.803805532719332e-05,
"loss": 3.6245,
"step": 16400
},
{
"epoch": 0.07,
"learning_rate": 6.871913672763005e-05,
"loss": 3.6333,
"step": 16600
},
{
"epoch": 0.07,
"learning_rate": 6.940813637956603e-05,
"loss": 3.6452,
"step": 16800
},
{
"epoch": 0.07,
"learning_rate": 7.010504219369547e-05,
"loss": 3.6487,
"step": 17000
},
{
"epoch": 0.07,
"learning_rate": 7.080984194198902e-05,
"loss": 3.6442,
"step": 17200
},
{
"epoch": 0.07,
"learning_rate": 7.152252325790952e-05,
"loss": 3.6384,
"step": 17400
},
{
"epoch": 0.08,
"learning_rate": 7.22430736366282e-05,
"loss": 3.6199,
"step": 17600
},
{
"epoch": 0.08,
"learning_rate": 7.297148043524434e-05,
"loss": 3.6341,
"step": 17800
},
{
"epoch": 0.08,
"learning_rate": 7.370773087300741e-05,
"loss": 3.6322,
"step": 18000
},
{
"epoch": 0.08,
"learning_rate": 7.445181203154054e-05,
"loss": 3.629,
"step": 18200
},
{
"epoch": 0.08,
"learning_rate": 7.52037108550682e-05,
"loss": 3.6105,
"step": 18400
},
{
"epoch": 0.08,
"learning_rate": 7.596341415064452e-05,
"loss": 3.6412,
"step": 18600
},
{
"epoch": 0.08,
"learning_rate": 7.673090858838487e-05,
"loss": 3.6355,
"step": 18800
},
{
"epoch": 0.08,
"learning_rate": 7.750618070170043e-05,
"loss": 3.6114,
"step": 19000
},
{
"epoch": 0.08,
"learning_rate": 7.828921688753326e-05,
"loss": 3.6142,
"step": 19200
},
{
"epoch": 0.08,
"learning_rate": 7.908000340659633e-05,
"loss": 3.6209,
"step": 19400
},
{
"epoch": 0.08,
"learning_rate": 7.987852638361337e-05,
"loss": 3.5979,
"step": 19600
},
{
"epoch": 0.09,
"learning_rate": 8.06847718075632e-05,
"loss": 3.6031,
"step": 19800
},
{
"epoch": 0.09,
"learning_rate": 8.149872553192528e-05,
"loss": 3.606,
"step": 20000
},
{
"epoch": 0.09,
"learning_rate": 8.232037327492777e-05,
"loss": 3.6232,
"step": 20200
},
{
"epoch": 0.09,
"learning_rate": 8.314970061979832e-05,
"loss": 3.6182,
"step": 20400
},
{
"epoch": 0.09,
"learning_rate": 8.398669301501699e-05,
"loss": 3.5904,
"step": 20600
},
{
"epoch": 0.09,
"learning_rate": 8.483133577457161e-05,
"loss": 3.6162,
"step": 20800
},
{
"epoch": 0.09,
"learning_rate": 8.568361407821506e-05,
"loss": 3.6084,
"step": 21000
},
{
"epoch": 0.09,
"learning_rate": 8.65435129717262e-05,
"loss": 3.598,
"step": 21200
},
{
"epoch": 0.09,
"learning_rate": 8.74110173671712e-05,
"loss": 3.6229,
"step": 21400
},
{
"epoch": 0.09,
"learning_rate": 8.828611204316915e-05,
"loss": 3.6101,
"step": 21600
},
{
"epoch": 0.09,
"learning_rate": 8.916878164515847e-05,
"loss": 3.5859,
"step": 21800
},
{
"epoch": 0.09,
"learning_rate": 9.005901068566706e-05,
"loss": 3.6207,
"step": 22000
},
{
"epoch": 0.1,
"learning_rate": 9.095678354458317e-05,
"loss": 3.6058,
"step": 22200
},
{
"epoch": 0.1,
"learning_rate": 9.18620844694301e-05,
"loss": 3.5935,
"step": 22400
},
{
"epoch": 0.1,
"learning_rate": 9.27748975756424e-05,
"loss": 3.5917,
"step": 22600
},
{
"epoch": 0.1,
"learning_rate": 9.369520684684475e-05,
"loss": 3.5861,
"step": 22800
},
{
"epoch": 0.1,
"learning_rate": 9.462299613513243e-05,
"loss": 3.5976,
"step": 23000
},
{
"epoch": 0.1,
"learning_rate": 9.555824916135554e-05,
"loss": 3.5914,
"step": 23200
},
{
"epoch": 0.1,
"learning_rate": 9.650094951540393e-05,
"loss": 3.5778,
"step": 23400
},
{
"epoch": 0.1,
"learning_rate": 9.745108065649507e-05,
"loss": 3.6049,
"step": 23600
},
{
"epoch": 0.1,
"learning_rate": 9.840862591346507e-05,
"loss": 3.5878,
"step": 23800
},
{
"epoch": 0.1,
"learning_rate": 9.93735684850606e-05,
"loss": 3.5918,
"step": 24000
},
{
"epoch": 0.1,
"learning_rate": 0.00010034589144023332,
"loss": 3.59,
"step": 24200
},
{
"epoch": 0.1,
"learning_rate": 0.00010132557771843796,
"loss": 3.5815,
"step": 24400
},
{
"epoch": 0.11,
"learning_rate": 0.00010231261012993076,
"loss": 3.5806,
"step": 24600
},
{
"epoch": 0.11,
"learning_rate": 0.0001033069713560718,
"loss": 3.5927,
"step": 24800
},
{
"epoch": 0.11,
"learning_rate": 0.00010430864394962811,
"loss": 3.595,
"step": 25000
},
{
"epoch": 0.11,
"learning_rate": 0.00010531761033508039,
"loss": 3.5751,
"step": 25200
},
{
"epoch": 0.11,
"learning_rate": 0.00010633385280893134,
"loss": 3.5741,
"step": 25400
},
{
"epoch": 0.11,
"learning_rate": 0.00010735735354001593,
"loss": 3.5752,
"step": 25600
},
{
"epoch": 0.11,
"learning_rate": 0.00010838809456981471,
"loss": 3.5666,
"step": 25800
},
{
"epoch": 0.11,
"learning_rate": 0.00010942605781276871,
"loss": 3.5594,
"step": 26000
},
{
"epoch": 0.11,
"learning_rate": 0.00011047122505659652,
"loss": 3.5652,
"step": 26200
},
{
"epoch": 0.11,
"learning_rate": 0.00011152357796261427,
"loss": 3.5546,
"step": 26400
},
{
"epoch": 0.11,
"learning_rate": 0.00011258309806605742,
"loss": 3.5427,
"step": 26600
},
{
"epoch": 0.12,
"learning_rate": 0.00011364976677640404,
"loss": 3.5766,
"step": 26800
},
{
"epoch": 0.12,
"learning_rate": 0.00011472356537770196,
"loss": 3.5791,
"step": 27000
},
{
"epoch": 0.12,
"learning_rate": 0.00011580447502889649,
"loss": 3.5722,
"step": 27200
},
{
"epoch": 0.12,
"learning_rate": 0.00011689247676416152,
"loss": 3.5775,
"step": 27400
},
{
"epoch": 0.12,
"learning_rate": 0.00011798755149323179,
"loss": 3.5579,
"step": 27600
},
{
"epoch": 0.12,
"learning_rate": 0.00011908968000173793,
"loss": 3.5618,
"step": 27800
},
{
"epoch": 0.12,
"learning_rate": 0.00012019884295154414,
"loss": 3.5614,
"step": 28000
},
{
"epoch": 0.12,
"learning_rate": 0.00012131502088108667,
"loss": 3.5444,
"step": 28200
},
{
"epoch": 0.12,
"learning_rate": 0.00012243819420571607,
"loss": 3.5463,
"step": 28400
},
{
"epoch": 0.12,
"learning_rate": 0.00012356834321804048,
"loss": 3.5502,
"step": 28600
},
{
"epoch": 0.12,
"learning_rate": 0.00012470544808827115,
"loss": 3.5287,
"step": 28800
},
{
"epoch": 0.12,
"learning_rate": 0.00012584948886457082,
"loss": 3.5414,
"step": 29000
},
{
"epoch": 0.13,
"learning_rate": 0.00012700044547340377,
"loss": 3.5504,
"step": 29200
},
{
"epoch": 0.13,
"learning_rate": 0.00012815829771988744,
"loss": 3.5381,
"step": 29400
},
{
"epoch": 0.13,
"learning_rate": 0.00012932302528814797,
"loss": 3.5551,
"step": 29600
},
{
"epoch": 0.13,
"learning_rate": 0.00013049460774167522,
"loss": 3.5331,
"step": 29800
},
{
"epoch": 0.13,
"learning_rate": 0.00013167302452368236,
"loss": 3.5359,
"step": 30000
},
{
"epoch": 0.13,
"learning_rate": 0.0001328582549574664,
"loss": 3.5252,
"step": 30200
},
{
"epoch": 0.13,
"learning_rate": 0.00013405027824677038,
"loss": 3.5104,
"step": 30400
},
{
"epoch": 0.13,
"learning_rate": 0.00013524907347614926,
"loss": 3.5253,
"step": 30600
},
{
"epoch": 0.13,
"learning_rate": 0.00013645461961133603,
"loss": 3.5249,
"step": 30800
},
{
"epoch": 0.13,
"learning_rate": 0.00013766689549961136,
"loss": 3.5374,
"step": 31000
},
{
"epoch": 0.13,
"learning_rate": 0.00013888587987017427,
"loss": 3.5093,
"step": 31200
},
{
"epoch": 0.13,
"learning_rate": 0.00014011155133451586,
"loss": 3.5465,
"step": 31400
},
{
"epoch": 0.14,
"learning_rate": 0.00014134388838679408,
"loss": 3.5362,
"step": 31600
},
{
"epoch": 0.14,
"learning_rate": 0.00014258286940421164,
"loss": 3.5233,
"step": 31800
},
{
"epoch": 0.14,
"learning_rate": 0.00014382847264739456,
"loss": 3.5425,
"step": 32000
},
{
"epoch": 0.14,
"learning_rate": 0.00014508067626077482,
"loss": 3.5178,
"step": 32200
},
{
"epoch": 0.14,
"learning_rate": 0.00014633945827297273,
"loss": 3.5288,
"step": 32400
},
{
"epoch": 0.14,
"learning_rate": 0.00014760479659718304,
"loss": 3.52,
"step": 32600
},
{
"epoch": 0.14,
"learning_rate": 0.00014887666903156218,
"loss": 3.5121,
"step": 32800
},
{
"epoch": 0.14,
"learning_rate": 0.0001501550532596183,
"loss": 3.5191,
"step": 33000
},
{
"epoch": 0.14,
"learning_rate": 0.00015143992685060208,
"loss": 3.5097,
"step": 33200
},
{
"epoch": 0.14,
"learning_rate": 0.00015273126725990098,
"loss": 3.5034,
"step": 33400
},
{
"epoch": 0.14,
"learning_rate": 0.00015402905182943438,
"loss": 3.484,
"step": 33600
},
{
"epoch": 0.15,
"learning_rate": 0.00015533325778805166,
"loss": 3.5111,
"step": 33800
},
{
"epoch": 0.15,
"learning_rate": 0.00015664386225193092,
"loss": 3.5041,
"step": 34000
},
{
"epoch": 0.15,
"learning_rate": 0.00015796084222498145,
"loss": 3.5126,
"step": 34200
},
{
"epoch": 0.15,
"learning_rate": 0.0001592841745992464,
"loss": 3.4939,
"step": 34400
},
{
"epoch": 0.15,
"learning_rate": 0.00016061383615530893,
"loss": 3.4728,
"step": 34600
},
{
"epoch": 0.15,
"learning_rate": 0.0001619498035626989,
"loss": 3.4671,
"step": 34800
},
{
"epoch": 0.15,
"learning_rate": 0.00016329205338030289,
"loss": 3.5058,
"step": 35000
},
{
"epoch": 0.15,
"learning_rate": 0.00016464056205677522,
"loss": 3.4973,
"step": 35200
},
{
"epoch": 0.15,
"learning_rate": 0.0001659953059309513,
"loss": 3.488,
"step": 35400
},
{
"epoch": 0.15,
"learning_rate": 0.00016735626123226218,
"loss": 3.4953,
"step": 35600
},
{
"epoch": 0.15,
"learning_rate": 0.00016872340408115283,
"loss": 3.4772,
"step": 35800
},
{
"epoch": 0.15,
"learning_rate": 0.00017009671048950003,
"loss": 3.4866,
"step": 36000
},
{
"epoch": 0.16,
"learning_rate": 0.00017147615636103365,
"loss": 3.4752,
"step": 36200
},
{
"epoch": 0.16,
"learning_rate": 0.00017286171749175986,
"loss": 3.4878,
"step": 36400
},
{
"epoch": 0.16,
"learning_rate": 0.0001742533695703849,
"loss": 3.468,
"step": 36600
},
{
"epoch": 0.16,
"learning_rate": 0.0001756510881787427,
"loss": 3.4708,
"step": 36800
},
{
"epoch": 0.16,
"learning_rate": 0.00017705484879222265,
"loss": 3.4879,
"step": 37000
},
{
"epoch": 0.16,
"learning_rate": 0.0001784646267801997,
"loss": 3.4787,
"step": 37200
},
{
"epoch": 0.16,
"learning_rate": 0.00017988039740646764,
"loss": 3.4673,
"step": 37400
},
{
"epoch": 0.16,
"learning_rate": 0.00018130213582967188,
"loss": 3.4794,
"step": 37600
},
{
"epoch": 0.16,
"learning_rate": 0.00018272981710374596,
"loss": 3.4561,
"step": 37800
},
{
"epoch": 0.16,
"learning_rate": 0.00018416341617834915,
"loss": 3.4599,
"step": 38000
},
{
"epoch": 0.16,
"learning_rate": 0.00018560290789930596,
"loss": 3.454,
"step": 38200
},
{
"epoch": 0.17,
"learning_rate": 0.00018704826700904756,
"loss": 3.4628,
"step": 38400
},
{
"epoch": 0.17,
"learning_rate": 0.00018849946814705483,
"loss": 3.4557,
"step": 38600
},
{
"epoch": 0.17,
"learning_rate": 0.0001899564858503036,
"loss": 3.4584,
"step": 38800
},
{
"epoch": 0.17,
"learning_rate": 0.00019141929455371092,
"loss": 3.4492,
"step": 39000
},
{
"epoch": 0.17,
"learning_rate": 0.00019288786859058442,
"loss": 3.4641,
"step": 39200
},
{
"epoch": 0.17,
"learning_rate": 0.00019436218219307173,
"loss": 3.4665,
"step": 39400
},
{
"epoch": 0.17,
"learning_rate": 0.00019584220949261325,
"loss": 3.4503,
"step": 39600
},
{
"epoch": 0.17,
"learning_rate": 0.00019732792452039607,
"loss": 3.4438,
"step": 39800
},
{
"epoch": 0.17,
"learning_rate": 0.00019881930120780906,
"loss": 3.4454,
"step": 40000
},
{
"epoch": 0.17,
"learning_rate": 0.00020031631338690114,
"loss": 3.4357,
"step": 40200
},
{
"epoch": 0.17,
"learning_rate": 0.00020181893479083945,
"loss": 3.4561,
"step": 40400
},
{
"epoch": 0.17,
"learning_rate": 0.00020332713905437056,
"loss": 3.4535,
"step": 40600
},
{
"epoch": 0.18,
"learning_rate": 0.00020484089971428406,
"loss": 3.4523,
"step": 40800
},
{
"epoch": 0.18,
"learning_rate": 0.00020636019020987535,
"loss": 3.4449,
"step": 41000
},
{
"epoch": 0.18,
"learning_rate": 0.00020788498388341244,
"loss": 3.4488,
"step": 41200
},
{
"epoch": 0.18,
"learning_rate": 0.00020941525398060392,
"loss": 3.431,
"step": 41400
},
{
"epoch": 0.18,
"learning_rate": 0.00021095097365106798,
"loss": 3.4441,
"step": 41600
},
{
"epoch": 0.18,
"learning_rate": 0.0002124921159488034,
"loss": 3.4197,
"step": 41800
},
{
"epoch": 0.18,
"learning_rate": 0.0002140386538326632,
"loss": 3.4281,
"step": 42000
},
{
"epoch": 0.18,
"learning_rate": 0.00021559056016682795,
"loss": 3.4229,
"step": 42200
},
{
"epoch": 0.18,
"learning_rate": 0.00021714780772128265,
"loss": 3.4486,
"step": 42400
},
{
"epoch": 0.18,
"learning_rate": 0.00021871036917229462,
"loss": 3.4449,
"step": 42600
},
{
"epoch": 0.18,
"learning_rate": 0.00022027821710289205,
"loss": 3.4099,
"step": 42800
},
{
"epoch": 0.18,
"learning_rate": 0.0002218513240033463,
"loss": 3.4309,
"step": 43000
},
{
"epoch": 0.19,
"learning_rate": 0.00022342966227165346,
"loss": 3.4188,
"step": 43200
},
{
"epoch": 0.19,
"learning_rate": 0.00022501320421401943,
"loss": 3.4238,
"step": 43400
},
{
"epoch": 0.19,
"learning_rate": 0.00022660192204534534,
"loss": 3.4067,
"step": 43600
},
{
"epoch": 0.19,
"learning_rate": 0.00022819578788971543,
"loss": 3.428,
"step": 43800
},
{
"epoch": 0.19,
"learning_rate": 0.000229794773780886,
"loss": 3.4282,
"step": 44000
},
{
"epoch": 0.19,
"learning_rate": 0.00023139885166277606,
"loss": 3.411,
"step": 44200
},
{
"epoch": 0.19,
"learning_rate": 0.00023300799338995962,
"loss": 3.4144,
"step": 44400
},
{
"epoch": 0.19,
"learning_rate": 0.00023462217072816,
"loss": 3.3912,
"step": 44600
},
{
"epoch": 0.19,
"learning_rate": 0.0002362413553547441,
"loss": 3.4082,
"step": 44800
},
{
"epoch": 0.19,
"learning_rate": 0.00023786551885922075,
"loss": 3.4071,
"step": 45000
},
{
"epoch": 0.19,
"learning_rate": 0.0002394946327437383,
"loss": 3.3977,
"step": 45200
},
{
"epoch": 0.2,
"learning_rate": 0.00024112866842358467,
"loss": 3.4155,
"step": 45400
},
{
"epoch": 0.2,
"learning_rate": 0.00024276759722768946,
"loss": 3.4112,
"step": 45600
},
{
"epoch": 0.2,
"learning_rate": 0.0002444113903991265,
"loss": 3.4177,
"step": 45800
},
{
"epoch": 0.2,
"learning_rate": 0.0002460600190956186,
"loss": 3.3751,
"step": 46000
},
{
"epoch": 0.2,
"learning_rate": 0.0002477134543900436,
"loss": 3.4168,
"step": 46200
},
{
"epoch": 0.2,
"learning_rate": 0.00024937166727094203,
"loss": 3.3941,
"step": 46400
},
{
"epoch": 0.2,
"learning_rate": 0.0002510346286430259,
"loss": 3.3995,
"step": 46600
},
{
"epoch": 0.2,
"learning_rate": 0.0002527023093276896,
"loss": 3.401,
"step": 46800
},
{
"epoch": 0.2,
"learning_rate": 0.0002543746800635215,
"loss": 3.3874,
"step": 47000
},
{
"epoch": 0.2,
"learning_rate": 0.0002560517115068175,
"loss": 3.3856,
"step": 47200
},
{
"epoch": 0.2,
"learning_rate": 0.00025773337423209606,
"loss": 3.3732,
"step": 47400
},
{
"epoch": 0.2,
"learning_rate": 0.0002594196387326143,
"loss": 3.3986,
"step": 47600
},
{
"epoch": 0.21,
"learning_rate": 0.00026111047542088587,
"loss": 3.3909,
"step": 47800
},
{
"epoch": 0.21,
"learning_rate": 0.00026280585462919984,
"loss": 3.3851,
"step": 48000
},
{
"epoch": 0.21,
"learning_rate": 0.0002645057466101418,
"loss": 3.3819,
"step": 48200
},
{
"epoch": 0.21,
"learning_rate": 0.000266210121537115,
"loss": 3.3786,
"step": 48400
},
{
"epoch": 0.21,
"learning_rate": 0.0002679189495048644,
"loss": 3.3629,
"step": 48600
},
{
"epoch": 0.21,
"learning_rate": 0.00026963220053000114,
"loss": 3.3762,
"step": 48800
},
{
"epoch": 0.21,
"learning_rate": 0.0002713498445515287,
"loss": 3.3814,
"step": 49000
},
{
"epoch": 0.21,
"learning_rate": 0.00027307185143137023,
"loss": 3.3762,
"step": 49200
},
{
"epoch": 0.21,
"learning_rate": 0.0002747981909548971,
"loss": 3.3616,
"step": 49400
},
{
"epoch": 0.21,
"learning_rate": 0.0002765288328314602,
"loss": 3.3614,
"step": 49600
},
{
"epoch": 0.21,
"learning_rate": 0.0002782637466949195,
"loss": 3.3758,
"step": 49800
},
{
"epoch": 0.21,
"learning_rate": 0.0002800029021041788,
"loss": 3.367,
"step": 50000
},
{
"epoch": 0.22,
"learning_rate": 0.00028174626854371866,
"loss": 3.3536,
"step": 50200
},
{
"epoch": 0.22,
"learning_rate": 0.0002834938154241324,
"loss": 3.3545,
"step": 50400
},
{
"epoch": 0.22,
"learning_rate": 0.00028524551208266224,
"loss": 3.369,
"step": 50600
},
{
"epoch": 0.22,
"learning_rate": 0.0002870013277837379,
"loss": 3.3578,
"step": 50800
},
{
"epoch": 0.22,
"learning_rate": 0.00028876123171951576,
"loss": 3.3439,
"step": 51000
},
{
"epoch": 0.22,
"learning_rate": 0.0002905251930104192,
"loss": 3.3568,
"step": 51200
},
{
"epoch": 0.22,
"learning_rate": 0.00029229318070568056,
"loss": 3.3605,
"step": 51400
},
{
"epoch": 0.22,
"learning_rate": 0.0002940651637838844,
"loss": 3.358,
"step": 51600
},
{
"epoch": 0.22,
"learning_rate": 0.0002958411111535111,
"loss": 3.3603,
"step": 51800
},
{
"epoch": 0.22,
"learning_rate": 0.00029762099165348357,
"loss": 3.3527,
"step": 52000
},
{
"epoch": 0.22,
"learning_rate": 0.0002994047740537128,
"loss": 3.3409,
"step": 52200
},
{
"epoch": 0.23,
"learning_rate": 0.00030119242705564664,
"loss": 3.3495,
"step": 52400
},
{
"epoch": 0.23,
"learning_rate": 0.00030298391929281857,
"loss": 3.328,
"step": 52600
},
{
"epoch": 0.23,
"learning_rate": 0.00030477921933139823,
"loss": 3.3592,
"step": 52800
},
{
"epoch": 0.23,
"learning_rate": 0.00030657829567074305,
"loss": 3.36,
"step": 53000
},
{
"epoch": 0.23,
"learning_rate": 0.0003083811167439507,
"loss": 3.3558,
"step": 53200
},
{
"epoch": 0.23,
"learning_rate": 0.0003101876509184131,
"loss": 3.3206,
"step": 53400
},
{
"epoch": 0.23,
"learning_rate": 0.00031199786649637145,
"loss": 3.3376,
"step": 53600
},
{
"epoch": 0.23,
"learning_rate": 0.0003138117317154723,
"loss": 3.3242,
"step": 53800
},
{
"epoch": 0.23,
"learning_rate": 0.0003156292147493255,
"loss": 3.3368,
"step": 54000
},
{
"epoch": 0.23,
"learning_rate": 0.00031745028370806165,
"loss": 3.3366,
"step": 54200
},
{
"epoch": 0.23,
"learning_rate": 0.00031927490663889203,
"loss": 3.3267,
"step": 54400
},
{
"epoch": 0.23,
"learning_rate": 0.00032110305152666953,
"loss": 3.302,
"step": 54600
},
{
"epoch": 0.24,
"learning_rate": 0.00032293468629445007,
"loss": 3.3237,
"step": 54800
},
{
"epoch": 0.24,
"learning_rate": 0.00032476977880405546,
"loss": 3.3207,
"step": 55000
},
{
"epoch": 0.24,
"learning_rate": 0.00032660829685663773,
"loss": 3.3215,
"step": 55200
},
{
"epoch": 0.24,
"learning_rate": 0.00032845020819324334,
"loss": 3.3107,
"step": 55400
},
{
"epoch": 0.24,
"learning_rate": 0.0003302954804953797,
"loss": 3.3153,
"step": 55600
},
{
"epoch": 0.24,
"learning_rate": 0.00033214408138558256,
"loss": 3.341,
"step": 55800
},
{
"epoch": 0.24,
"learning_rate": 0.0003339959784279831,
"loss": 3.3183,
"step": 56000
},
{
"epoch": 0.24,
"learning_rate": 0.00033585113912887776,
"loss": 3.3295,
"step": 56200
},
{
"epoch": 0.24,
"learning_rate": 0.0003377095309372985,
"loss": 3.3293,
"step": 56400
},
{
"epoch": 0.24,
"learning_rate": 0.0003395711212455839,
"loss": 3.3181,
"step": 56600
},
{
"epoch": 0.24,
"learning_rate": 0.0003414358773899506,
"loss": 3.3075,
"step": 56800
},
{
"epoch": 0.24,
"learning_rate": 0.00034330376665106695,
"loss": 3.3213,
"step": 57000
},
{
"epoch": 0.25,
"learning_rate": 0.0003451747562546278,
"loss": 3.3105,
"step": 57200
},
{
"epoch": 0.25,
"learning_rate": 0.00034704881337192784,
"loss": 3.2828,
"step": 57400
},
{
"epoch": 0.25,
"learning_rate": 0.00034892590512043947,
"loss": 3.3044,
"step": 57600
},
{
"epoch": 0.25,
"learning_rate": 0.00035080599856438877,
"loss": 3.306,
"step": 57800
},
{
"epoch": 0.25,
"learning_rate": 0.00035268906071533304,
"loss": 3.3226,
"step": 58000
},
{
"epoch": 0.25,
"learning_rate": 0.0003545750585327406,
"loss": 3.3227,
"step": 58200
},
{
"epoch": 0.25,
"learning_rate": 0.0003564639589245703,
"loss": 3.2888,
"step": 58400
},
{
"epoch": 0.25,
"learning_rate": 0.0003583557287478512,
"loss": 3.2918,
"step": 58600
},
{
"epoch": 0.25,
"learning_rate": 0.0003602503348092654,
"loss": 3.2857,
"step": 58800
},
{
"epoch": 0.25,
"learning_rate": 0.0003621477438657296,
"loss": 3.324,
"step": 59000
},
{
"epoch": 0.25,
"learning_rate": 0.00036404792262497885,
"loss": 3.2991,
"step": 59200
},
{
"epoch": 0.26,
"learning_rate": 0.00036595083774615053,
"loss": 3.2798,
"step": 59400
},
{
"epoch": 0.26,
"learning_rate": 0.0003678564558403689,
"loss": 3.3164,
"step": 59600
},
{
"epoch": 0.26,
"learning_rate": 0.0003697647434713321,
"loss": 3.2885,
"step": 59800
},
{
"epoch": 0.26,
"learning_rate": 0.0003716756671558975,
"loss": 3.3157,
"step": 60000
},
{
"epoch": 0.26,
"learning_rate": 0.0003735891933646703,
"loss": 3.2991,
"step": 60200
},
{
"epoch": 0.26,
"learning_rate": 0.00037550528852259106,
"loss": 3.2628,
"step": 60400
},
{
"epoch": 0.26,
"learning_rate": 0.00037742391900952516,
"loss": 3.2928,
"step": 60600
},
{
"epoch": 0.26,
"learning_rate": 0.0003793450511608526,
"loss": 3.2828,
"step": 60800
},
{
"epoch": 0.26,
"learning_rate": 0.00038126865126805905,
"loss": 3.2863,
"step": 61000
},
{
"epoch": 0.26,
"learning_rate": 0.0003831946855793267,
"loss": 3.2951,
"step": 61200
},
{
"epoch": 0.26,
"learning_rate": 0.00038512312030012676,
"loss": 3.2583,
"step": 61400
},
{
"epoch": 0.26,
"learning_rate": 0.0003870539215938128,
"loss": 3.2872,
"step": 61600
},
{
"epoch": 0.27,
"learning_rate": 0.00038898705558221367,
"loss": 3.2748,
"step": 61800
},
{
"epoch": 0.27,
"learning_rate": 0.00039092248834622883,
"loss": 3.3026,
"step": 62000
},
{
"epoch": 0.27,
"learning_rate": 0.00039286018592642224,
"loss": 3.2734,
"step": 62200
},
{
"epoch": 0.27,
"learning_rate": 0.00039480011432362007,
"loss": 3.2849,
"step": 62400
},
{
"epoch": 0.27,
"learning_rate": 0.00039674223949950514,
"loss": 3.2889,
"step": 62600
},
{
"epoch": 0.27,
"learning_rate": 0.0003986865273772159,
"loss": 3.2938,
"step": 62800
},
{
"epoch": 0.27,
"learning_rate": 0.00040063294384194367,
"loss": 3.2755,
"step": 63000
},
{
"epoch": 0.27,
"learning_rate": 0.0004025814547415307,
"loss": 3.2774,
"step": 63200
},
{
"epoch": 0.27,
"learning_rate": 0.00040453202588707036,
"loss": 3.2819,
"step": 63400
},
{
"epoch": 0.27,
"learning_rate": 0.0004064846230535067,
"loss": 3.2867,
"step": 63600
},
{
"epoch": 0.27,
"learning_rate": 0.00040843921198023417,
"loss": 3.2856,
"step": 63800
},
{
"epoch": 0.28,
"learning_rate": 0.0004103957583717001,
"loss": 3.2932,
"step": 64000
},
{
"epoch": 0.28,
"learning_rate": 0.0004123542278980058,
"loss": 3.272,
"step": 64200
},
{
"epoch": 0.28,
"learning_rate": 0.00041431458619550874,
"loss": 3.2697,
"step": 64400
},
{
"epoch": 0.28,
"learning_rate": 0.00041627679886742527,
"loss": 3.276,
"step": 64600
},
{
"epoch": 0.28,
"learning_rate": 0.0004182408314844355,
"loss": 3.2732,
"step": 64800
},
{
"epoch": 0.28,
"learning_rate": 0.00042020664958528574,
"loss": 3.2535,
"step": 65000
},
{
"epoch": 0.28,
"learning_rate": 0.0004221742186773941,
"loss": 3.2791,
"step": 65200
},
{
"epoch": 0.28,
"learning_rate": 0.0004241435042374555,
"loss": 3.2718,
"step": 65400
},
{
"epoch": 0.28,
"learning_rate": 0.0004261144717120477,
"loss": 3.2672,
"step": 65600
},
{
"epoch": 0.28,
"learning_rate": 0.00042808708651823654,
"loss": 3.2555,
"step": 65800
},
{
"epoch": 0.28,
"learning_rate": 0.00043006131404418424,
"loss": 3.2717,
"step": 66000
},
{
"epoch": 0.28,
"learning_rate": 0.00043203711964975595,
"loss": 3.2497,
"step": 66200
},
{
"epoch": 0.29,
"learning_rate": 0.00043401446866712684,
"loss": 3.2525,
"step": 66400
},
{
"epoch": 0.29,
"learning_rate": 0.000435993326401392,
"loss": 3.2523,
"step": 66600
},
{
"epoch": 0.29,
"learning_rate": 0.0004379736581311737,
"loss": 3.2431,
"step": 66800
},
{
"epoch": 0.29,
"learning_rate": 0.00043995542910923167,
"loss": 3.2571,
"step": 67000
},
{
"epoch": 0.29,
"learning_rate": 0.0004419386045630716,
"loss": 3.2445,
"step": 67200
},
{
"epoch": 0.29,
"learning_rate": 0.0004439231496955571,
"loss": 3.2508,
"step": 67400
},
{
"epoch": 0.29,
"learning_rate": 0.00044590902968551834,
"loss": 3.2435,
"step": 67600
},
{
"epoch": 0.29,
"learning_rate": 0.00044789620968836404,
"loss": 3.2436,
"step": 67800
},
{
"epoch": 0.29,
"learning_rate": 0.0004498846548366927,
"loss": 3.2533,
"step": 68000
},
{
"epoch": 0.29,
"learning_rate": 0.0004518743302409047,
"loss": 3.2523,
"step": 68200
},
{
"epoch": 0.29,
"learning_rate": 0.00045386520098981335,
"loss": 3.2284,
"step": 68400
},
{
"epoch": 0.29,
"learning_rate": 0.0004558572321512592,
"loss": 3.2285,
"step": 68600
},
{
"epoch": 0.3,
"learning_rate": 0.00045785038877272114,
"loss": 3.2395,
"step": 68800
},
{
"epoch": 0.3,
"learning_rate": 0.00045984463588193104,
"loss": 3.2592,
"step": 69000
},
{
"epoch": 0.3,
"learning_rate": 0.00046183993848748675,
"loss": 3.2478,
"step": 69200
},
{
"epoch": 0.3,
"learning_rate": 0.0004638362615794662,
"loss": 3.2303,
"step": 69400
},
{
"epoch": 0.3,
"learning_rate": 0.00046583357013004194,
"loss": 3.242,
"step": 69600
},
{
"epoch": 0.3,
"learning_rate": 0.00046783182909409496,
"loss": 3.2582,
"step": 69800
},
{
"epoch": 0.3,
"learning_rate": 0.00046983100340983056,
"loss": 3.2461,
"step": 70000
},
{
"epoch": 0.3,
"learning_rate": 0.00047183105799939297,
"loss": 3.2495,
"step": 70200
},
{
"epoch": 0.3,
"learning_rate": 0.0004738319577694809,
"loss": 3.2325,
"step": 70400
},
{
"epoch": 0.3,
"learning_rate": 0.0004758336676119636,
"loss": 3.2335,
"step": 70600
},
{
"epoch": 0.3,
"learning_rate": 0.0004778361524044967,
"loss": 3.2499,
"step": 70800
},
{
"epoch": 0.31,
"learning_rate": 0.00047983937701113794,
"loss": 3.2323,
"step": 71000
},
{
"epoch": 0.31,
"learning_rate": 0.00048184330628296484,
"loss": 3.221,
"step": 71200
},
{
"epoch": 0.31,
"learning_rate": 0.0004838479050586898,
"loss": 3.2344,
"step": 71400
},
{
"epoch": 0.31,
"learning_rate": 0.0004858531381652792,
"loss": 3.241,
"step": 71600
},
{
"epoch": 0.31,
"learning_rate": 0.0004878589704185682,
"loss": 3.2434,
"step": 71800
},
{
"epoch": 0.31,
"learning_rate": 0.00048986536662388,
"loss": 3.2375,
"step": 72000
},
{
"epoch": 0.31,
"learning_rate": 0.0004918722915766426,
"loss": 3.2237,
"step": 72200
},
{
"epoch": 0.31,
"learning_rate": 0.0004938797100630057,
"loss": 3.2665,
"step": 72400
},
{
"epoch": 0.31,
"learning_rate": 0.0004958875868604602,
"loss": 3.218,
"step": 72600
},
{
"epoch": 0.31,
"learning_rate": 0.0004978958867384551,
"loss": 3.2474,
"step": 72800
},
{
"epoch": 0.31,
"learning_rate": 0.000499904574459016,
"loss": 3.2423,
"step": 73000
},
{
"epoch": 0.31,
"learning_rate": 0.000501913614777363,
"loss": 3.2288,
"step": 73200
},
{
"epoch": 0.32,
"learning_rate": 0.0005039229724425303,
"loss": 3.2148,
"step": 73400
},
{
"epoch": 0.32,
"learning_rate": 0.0005059326121979831,
"loss": 3.2383,
"step": 73600
},
{
"epoch": 0.32,
"learning_rate": 0.0005079424987822374,
"loss": 3.236,
"step": 73800
},
{
"epoch": 0.32,
"learning_rate": 0.0005099525969294778,
"loss": 3.2231,
"step": 74000
},
{
"epoch": 0.32,
"learning_rate": 0.0005119628713701773,
"loss": 3.2215,
"step": 74200
},
{
"epoch": 0.32,
"learning_rate": 0.0005139732868317155,
"loss": 3.227,
"step": 74400
},
{
"epoch": 0.32,
"learning_rate": 0.0005159838080389977,
"loss": 3.2006,
"step": 74600
},
{
"epoch": 0.32,
"learning_rate": 0.0005179943997150736,
"loss": 3.2426,
"step": 74800
},
{
"epoch": 0.32,
"learning_rate": 0.0005200050265817561,
"loss": 3.2194,
"step": 75000
},
{
"epoch": 0.32,
"learning_rate": 0.0005220156533602416,
"loss": 3.223,
"step": 75200
},
{
"epoch": 0.32,
"learning_rate": 0.0005240262447717271,
"loss": 3.224,
"step": 75400
},
{
"epoch": 0.32,
"learning_rate": 0.0005260367655380307,
"loss": 3.2138,
"step": 75600
},
{
"epoch": 0.33,
"learning_rate": 0.0005280471803822096,
"loss": 3.2123,
"step": 75800
},
{
"epoch": 0.33,
"learning_rate": 0.0005300574540291803,
"loss": 3.2208,
"step": 76000
},
{
"epoch": 0.33,
"learning_rate": 0.000532067551206336,
"loss": 3.2199,
"step": 76200
},
{
"epoch": 0.33,
"learning_rate": 0.0005340774366441665,
"loss": 3.2367,
"step": 76400
},
{
"epoch": 0.33,
"learning_rate": 0.0005360870750768769,
"loss": 3.21,
"step": 76600
},
{
"epoch": 0.33,
"learning_rate": 0.0005380964312430063,
"loss": 3.2221,
"step": 76800
},
{
"epoch": 0.33,
"learning_rate": 0.0005401054698860466,
"loss": 3.2021,
"step": 77000
},
{
"epoch": 0.33,
"learning_rate": 0.0005421141557550603,
"loss": 3.2352,
"step": 77200
},
{
"epoch": 0.33,
"learning_rate": 0.0005441224536053012,
"loss": 3.1962,
"step": 77400
},
{
"epoch": 0.33,
"learning_rate": 0.0005461303281988298,
"loss": 3.2127,
"step": 77600
},
{
"epoch": 0.33,
"learning_rate": 0.000548137744305134,
"loss": 3.2307,
"step": 77800
},
{
"epoch": 0.34,
"learning_rate": 0.0005501446667017461,
"loss": 3.2053,
"step": 78000
},
{
"epoch": 0.34,
"learning_rate": 0.0005521510601748613,
"loss": 3.1936,
"step": 78200
},
{
"epoch": 0.34,
"learning_rate": 0.0005541568895199552,
"loss": 3.2019,
"step": 78400
},
{
"epoch": 0.34,
"learning_rate": 0.0005561621195424016,
"loss": 3.203,
"step": 78600
},
{
"epoch": 0.34,
"learning_rate": 0.0005581667150580907,
"loss": 3.2125,
"step": 78800
},
{
"epoch": 0.34,
"learning_rate": 0.0005601706408940451,
"loss": 3.2205,
"step": 79000
},
{
"epoch": 0.34,
"learning_rate": 0.0005621738618890382,
"loss": 3.2215,
"step": 79200
},
{
"epoch": 0.34,
"learning_rate": 0.0005641763428942106,
"loss": 3.2052,
"step": 79400
},
{
"epoch": 0.34,
"learning_rate": 0.0005661780487736866,
"loss": 3.2249,
"step": 79600
},
{
"epoch": 0.34,
"learning_rate": 0.0005681789444051913,
"loss": 3.1952,
"step": 79800
},
{
"epoch": 0.34,
"learning_rate": 0.0005701789946806666,
"loss": 3.1995,
"step": 80000
},
{
"epoch": 0.34,
"learning_rate": 0.0005721781645068867,
"loss": 3.2116,
"step": 80200
},
{
"epoch": 0.35,
"learning_rate": 0.000574176418806075,
"loss": 3.2256,
"step": 80400
},
{
"epoch": 0.35,
"learning_rate": 0.0005761737225165182,
"loss": 3.2221,
"step": 80600
},
{
"epoch": 0.35,
"learning_rate": 0.0005781700405931827,
"loss": 3.1956,
"step": 80800
},
{
"epoch": 0.35,
"learning_rate": 0.0005801653380083288,
"loss": 3.2031,
"step": 81000
},
{
"epoch": 0.35,
"learning_rate": 0.0005821595797521253,
"loss": 3.2029,
"step": 81200
},
{
"epoch": 0.35,
"learning_rate": 0.0005841527308332645,
"loss": 3.2065,
"step": 81400
},
{
"epoch": 0.35,
"learning_rate": 0.0005861447562795751,
"loss": 3.1783,
"step": 81600
},
{
"epoch": 0.35,
"learning_rate": 0.0005881356211386371,
"loss": 3.2181,
"step": 81800
},
{
"epoch": 0.35,
"learning_rate": 0.0005901252904783932,
"loss": 3.1991,
"step": 82000
},
{
"epoch": 0.35,
"learning_rate": 0.0005921137293877644,
"loss": 3.2011,
"step": 82200
},
{
"epoch": 0.35,
"learning_rate": 0.0005941009029772594,
"loss": 3.1852,
"step": 82400
},
{
"epoch": 0.35,
"learning_rate": 0.00059608677637959,
"loss": 3.1911,
"step": 82600
},
{
"epoch": 0.36,
"learning_rate": 0.00059807131475028,
"loss": 3.1999,
"step": 82800
},
{
"epoch": 0.36,
"learning_rate": 0.000600054483268279,
"loss": 3.1853,
"step": 83000
},
{
"epoch": 0.36,
"learning_rate": 0.0006020362471365711,
"loss": 3.1928,
"step": 83200
},
{
"epoch": 0.36,
"learning_rate": 0.0006040165715827878,
"loss": 3.1833,
"step": 83400
},
{
"epoch": 0.36,
"learning_rate": 0.0006059954218598161,
"loss": 3.1972,
"step": 83600
},
{
"epoch": 0.36,
"learning_rate": 0.0006079727632464092,
"loss": 3.2052,
"step": 83800
},
{
"epoch": 0.36,
"learning_rate": 0.000609948561047796,
"loss": 3.1881,
"step": 84000
},
{
"epoch": 0.36,
"learning_rate": 0.0006119227805962891,
"loss": 3.174,
"step": 84200
},
{
"epoch": 0.36,
"learning_rate": 0.0006138953872518932,
"loss": 3.1831,
"step": 84400
},
{
"epoch": 0.36,
"learning_rate": 0.0006158663464029133,
"loss": 3.1961,
"step": 84600
},
{
"epoch": 0.36,
"learning_rate": 0.0006178356234665618,
"loss": 3.1759,
"step": 84800
},
{
"epoch": 0.37,
"learning_rate": 0.0006198031838895652,
"loss": 3.1728,
"step": 85000
},
{
"epoch": 0.37,
"learning_rate": 0.0006217689931487707,
"loss": 3.194,
"step": 85200
},
{
"epoch": 0.37,
"learning_rate": 0.0006237330167517514,
"loss": 3.1747,
"step": 85400
},
{
"epoch": 0.37,
"learning_rate": 0.0006256952202374121,
"loss": 3.1805,
"step": 85600
},
{
"epoch": 0.37,
"learning_rate": 0.0006276555691765935,
"loss": 3.1781,
"step": 85800
},
{
"epoch": 0.37,
"learning_rate": 0.0006296140291726768,
"loss": 3.1829,
"step": 86000
},
{
"epoch": 0.37,
"learning_rate": 0.0006315705658621865,
"loss": 3.1832,
"step": 86200
},
{
"epoch": 0.37,
"learning_rate": 0.0006335251449153937,
"loss": 3.1634,
"step": 86400
},
{
"epoch": 0.37,
"learning_rate": 0.0006354777320369192,
"loss": 3.1664,
"step": 86600
},
{
"epoch": 0.37,
"learning_rate": 0.0006374282929663341,
"loss": 3.1576,
"step": 86800
},
{
"epoch": 0.37,
"learning_rate": 0.0006393767934787615,
"loss": 3.1667,
"step": 87000
},
{
"epoch": 0.37,
"learning_rate": 0.0006413231993854767,
"loss": 3.1718,
"step": 87200
},
{
"epoch": 0.38,
"learning_rate": 0.0006432674765345077,
"loss": 3.1645,
"step": 87400
},
{
"epoch": 0.38,
"learning_rate": 0.0006452095908112336,
"loss": 3.2063,
"step": 87600
},
{
"epoch": 0.38,
"learning_rate": 0.0006471495081389843,
"loss": 3.2638,
"step": 87800
},
{
"epoch": 0.38,
"learning_rate": 0.0006490871944796369,
"loss": 3.2685,
"step": 88000
},
{
"epoch": 0.38,
"learning_rate": 0.0006510226158342146,
"loss": 3.2519,
"step": 88200
},
{
"epoch": 0.38,
"learning_rate": 0.0006529557382434818,
"loss": 3.2101,
"step": 88400
},
{
"epoch": 0.38,
"learning_rate": 0.0006548865277885411,
"loss": 3.2109,
"step": 88600
},
{
"epoch": 0.38,
"learning_rate": 0.0006568149505914273,
"loss": 3.2291,
"step": 88800
},
{
"epoch": 0.38,
"learning_rate": 0.0006587409728157031,
"loss": 3.2282,
"step": 89000
},
{
"epoch": 0.38,
"learning_rate": 0.000660664560667052,
"loss": 3.2404,
"step": 89200
},
{
"epoch": 0.38,
"learning_rate": 0.0006625856803938711,
"loss": 3.2334,
"step": 89400
},
{
"epoch": 0.39,
"learning_rate": 0.000664504298287864,
"loss": 3.243,
"step": 89600
},
{
"epoch": 0.39,
"learning_rate": 0.0006664203806846319,
"loss": 3.2475,
"step": 89800
},
{
"epoch": 0.39,
"learning_rate": 0.0006683338939642638,
"loss": 3.3556,
"step": 90000
}
],
"max_steps": 500000,
"num_train_epochs": 3,
"total_flos": 1.4344469839872e+17,
"trial_name": null,
"trial_params": null
}