{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3867640739149119, "global_step": 90000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.0004211081201405e-05, "loss": 3.9197, "step": 200 }, { "epoch": 0.0, "learning_rate": 4.0016844250917146e-05, "loss": 3.9108, "step": 400 }, { "epoch": 0.0, "learning_rate": 4.003789928748371e-05, "loss": 3.8655, "step": 600 }, { "epoch": 0.0, "learning_rate": 4.006737582146571e-05, "loss": 3.8602, "step": 800 }, { "epoch": 0.0, "learning_rate": 4.010527333566261e-05, "loss": 3.8478, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.0151591165118474e-05, "loss": 3.8608, "step": 1200 }, { "epoch": 0.01, "learning_rate": 4.020632849713237e-05, "loss": 3.8431, "step": 1400 }, { "epoch": 0.01, "learning_rate": 4.0269484371273996e-05, "loss": 3.8447, "step": 1600 }, { "epoch": 0.01, "learning_rate": 4.034105767939918e-05, "loss": 3.8422, "step": 1800 }, { "epoch": 0.01, "learning_rate": 4.0421047165670686e-05, "loss": 3.8019, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.050945142657905e-05, "loss": 3.8109, "step": 2200 }, { "epoch": 0.01, "learning_rate": 4.0606268910968035e-05, "loss": 3.8192, "step": 2400 }, { "epoch": 0.01, "learning_rate": 4.071149792006154e-05, "loss": 3.8037, "step": 2600 }, { "epoch": 0.01, "learning_rate": 4.082513660749298e-05, "loss": 3.8074, "step": 2800 }, { "epoch": 0.01, "learning_rate": 4.0947182979338874e-05, "loss": 3.801, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.1077634894152375e-05, "loss": 3.786, "step": 3200 }, { "epoch": 0.01, "learning_rate": 4.121649006300163e-05, "loss": 3.7799, "step": 3400 }, { "epoch": 0.02, "learning_rate": 4.136374604951046e-05, "loss": 3.7721, "step": 3600 }, { "epoch": 0.02, "learning_rate": 4.1519400269899535e-05, "loss": 3.7829, "step": 3800 }, { "epoch": 0.02, "learning_rate": 4.1683449993033545e-05, "loss": 3.7922, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.185589234046783e-05, "loss": 3.7686, "step": 4200 }, { "epoch": 0.02, "learning_rate": 4.203672428649923e-05, "loss": 3.7742, "step": 4400 }, { "epoch": 0.02, "learning_rate": 4.2225942658219505e-05, "loss": 3.753, "step": 4600 }, { "epoch": 0.02, "learning_rate": 4.242354413557057e-05, "loss": 3.7663, "step": 4800 }, { "epoch": 0.02, "learning_rate": 4.262952525140289e-05, "loss": 3.7589, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.2843882391536666e-05, "loss": 3.7652, "step": 5200 }, { "epoch": 0.02, "learning_rate": 4.306661179482435e-05, "loss": 3.7618, "step": 5400 }, { "epoch": 0.02, "learning_rate": 4.329770955321802e-05, "loss": 3.7441, "step": 5600 }, { "epoch": 0.02, "learning_rate": 4.3537171611836355e-05, "loss": 3.7364, "step": 5800 }, { "epoch": 0.03, "learning_rate": 4.3784993769037384e-05, "loss": 3.7501, "step": 6000 }, { "epoch": 0.03, "learning_rate": 4.404117167649071e-05, "loss": 3.7363, "step": 6200 }, { "epoch": 0.03, "learning_rate": 4.43057008392547e-05, "loss": 3.7279, "step": 6400 }, { "epoch": 0.03, "learning_rate": 4.457857661585541e-05, "loss": 3.7158, "step": 6600 }, { "epoch": 0.03, "learning_rate": 4.485979421836779e-05, "loss": 3.74, "step": 6800 }, { "epoch": 0.03, "learning_rate": 4.514934871249906e-05, "loss": 3.7441, "step": 7000 }, { "epoch": 0.03, "learning_rate": 4.544723501767687e-05, "loss": 3.7141, "step": 7200 }, { "epoch": 0.03, "learning_rate": 4.575344790713656e-05, "loss": 3.7264, "step": 7400 }, { "epoch": 0.03, "learning_rate": 4.6067982008014406e-05, "loss": 3.7221, "step": 7600 }, { "epoch": 0.03, "learning_rate": 4.639083180144098e-05, "loss": 3.7363, "step": 7800 }, { "epoch": 0.03, "learning_rate": 4.672199162263839e-05, "loss": 3.73, "step": 8000 }, { "epoch": 0.04, "learning_rate": 4.706145566101981e-05, "loss": 3.7205, "step": 8200 }, { "epoch": 0.04, "learning_rate": 4.740921796029065e-05, "loss": 3.7192, "step": 8400 }, { "epoch": 0.04, "learning_rate": 4.776527241855393e-05, "loss": 3.7095, "step": 8600 }, { "epoch": 0.04, "learning_rate": 4.812961278841707e-05, "loss": 3.7311, "step": 8800 }, { "epoch": 0.04, "learning_rate": 4.85022326771014e-05, "loss": 3.7031, "step": 9000 }, { "epoch": 0.04, "learning_rate": 4.888312554655438e-05, "loss": 3.6934, "step": 9200 }, { "epoch": 0.04, "learning_rate": 4.92722847135643e-05, "loss": 3.7218, "step": 9400 }, { "epoch": 0.04, "learning_rate": 4.9669703349877704e-05, "loss": 3.711, "step": 9600 }, { "epoch": 0.04, "learning_rate": 5.0075374482318863e-05, "loss": 3.7002, "step": 9800 }, { "epoch": 0.04, "learning_rate": 5.0489290992912625e-05, "loss": 3.698, "step": 10000 }, { "epoch": 0.04, "learning_rate": 5.0911445619008545e-05, "loss": 3.7097, "step": 10200 }, { "epoch": 0.04, "learning_rate": 5.1341830953409266e-05, "loss": 3.7012, "step": 10400 }, { "epoch": 0.05, "learning_rate": 5.1780439444499746e-05, "loss": 3.6847, "step": 10600 }, { "epoch": 0.05, "learning_rate": 5.2227263396380295e-05, "loss": 3.7072, "step": 10800 }, { "epoch": 0.05, "learning_rate": 5.2682294969000907e-05, "loss": 3.7052, "step": 11000 }, { "epoch": 0.05, "learning_rate": 5.31455261782996e-05, "loss": 3.6917, "step": 11200 }, { "epoch": 0.05, "learning_rate": 5.361694889634207e-05, "loss": 3.6951, "step": 11400 }, { "epoch": 0.05, "learning_rate": 5.409655485146426e-05, "loss": 3.6798, "step": 11600 }, { "epoch": 0.05, "learning_rate": 5.458433562841795e-05, "loss": 3.6888, "step": 11800 }, { "epoch": 0.05, "learning_rate": 5.50802826685176e-05, "loss": 3.685, "step": 12000 }, { "epoch": 0.05, "learning_rate": 5.558438726979166e-05, "loss": 3.6653, "step": 12200 }, { "epoch": 0.05, "learning_rate": 5.6096640587133956e-05, "loss": 3.6933, "step": 12400 }, { "epoch": 0.05, "learning_rate": 5.661703363246001e-05, "loss": 3.685, "step": 12600 }, { "epoch": 0.06, "learning_rate": 5.714555727486404e-05, "loss": 3.6799, "step": 12800 }, { "epoch": 0.06, "learning_rate": 5.768220224077955e-05, "loss": 3.656, "step": 13000 }, { "epoch": 0.06, "learning_rate": 5.822695911414171e-05, "loss": 3.675, "step": 13200 }, { "epoch": 0.06, "learning_rate": 5.877981833655294e-05, "loss": 3.6582, "step": 13400 }, { "epoch": 0.06, "learning_rate": 5.934077020745062e-05, "loss": 3.7009, "step": 13600 }, { "epoch": 0.06, "learning_rate": 5.990980488427666e-05, "loss": 3.6732, "step": 13800 }, { "epoch": 0.06, "learning_rate": 6.048691238265099e-05, "loss": 3.6509, "step": 14000 }, { "epoch": 0.06, "learning_rate": 6.107208257654653e-05, "loss": 3.6353, "step": 14200 }, { "epoch": 0.06, "learning_rate": 6.166530519846635e-05, "loss": 3.6662, "step": 14400 }, { "epoch": 0.06, "learning_rate": 6.226656983962476e-05, "loss": 3.6634, "step": 14600 }, { "epoch": 0.06, "learning_rate": 6.287586595012898e-05, "loss": 3.6607, "step": 14800 }, { "epoch": 0.06, "learning_rate": 6.349318283916478e-05, "loss": 3.654, "step": 15000 }, { "epoch": 0.07, "learning_rate": 6.411850967518416e-05, "loss": 3.6465, "step": 15200 }, { "epoch": 0.07, "learning_rate": 6.475183548609517e-05, "loss": 3.6527, "step": 15400 }, { "epoch": 0.07, "learning_rate": 6.539314915945428e-05, "loss": 3.6401, "step": 15600 }, { "epoch": 0.07, "learning_rate": 6.604243944266183e-05, "loss": 3.6453, "step": 15800 }, { "epoch": 0.07, "learning_rate": 6.669969494315882e-05, "loss": 3.6364, "step": 16000 }, { "epoch": 0.07, "learning_rate": 6.736490412862753e-05, "loss": 3.6379, "step": 16200 }, { "epoch": 0.07, "learning_rate": 6.803805532719332e-05, "loss": 3.6245, "step": 16400 }, { "epoch": 0.07, "learning_rate": 6.871913672763005e-05, "loss": 3.6333, "step": 16600 }, { "epoch": 0.07, "learning_rate": 6.940813637956603e-05, "loss": 3.6452, "step": 16800 }, { "epoch": 0.07, "learning_rate": 7.010504219369547e-05, "loss": 3.6487, "step": 17000 }, { "epoch": 0.07, "learning_rate": 7.080984194198902e-05, "loss": 3.6442, "step": 17200 }, { "epoch": 0.07, "learning_rate": 7.152252325790952e-05, "loss": 3.6384, "step": 17400 }, { "epoch": 0.08, "learning_rate": 7.22430736366282e-05, "loss": 3.6199, "step": 17600 }, { "epoch": 0.08, "learning_rate": 7.297148043524434e-05, "loss": 3.6341, "step": 17800 }, { "epoch": 0.08, "learning_rate": 7.370773087300741e-05, "loss": 3.6322, "step": 18000 }, { "epoch": 0.08, "learning_rate": 7.445181203154054e-05, "loss": 3.629, "step": 18200 }, { "epoch": 0.08, "learning_rate": 7.52037108550682e-05, "loss": 3.6105, "step": 18400 }, { "epoch": 0.08, "learning_rate": 7.596341415064452e-05, "loss": 3.6412, "step": 18600 }, { "epoch": 0.08, "learning_rate": 7.673090858838487e-05, "loss": 3.6355, "step": 18800 }, { "epoch": 0.08, "learning_rate": 7.750618070170043e-05, "loss": 3.6114, "step": 19000 }, { "epoch": 0.08, "learning_rate": 7.828921688753326e-05, "loss": 3.6142, "step": 19200 }, { "epoch": 0.08, "learning_rate": 7.908000340659633e-05, "loss": 3.6209, "step": 19400 }, { "epoch": 0.08, "learning_rate": 7.987852638361337e-05, "loss": 3.5979, "step": 19600 }, { "epoch": 0.09, "learning_rate": 8.06847718075632e-05, "loss": 3.6031, "step": 19800 }, { "epoch": 0.09, "learning_rate": 8.149872553192528e-05, "loss": 3.606, "step": 20000 }, { "epoch": 0.09, "learning_rate": 8.232037327492777e-05, "loss": 3.6232, "step": 20200 }, { "epoch": 0.09, "learning_rate": 8.314970061979832e-05, "loss": 3.6182, "step": 20400 }, { "epoch": 0.09, "learning_rate": 8.398669301501699e-05, "loss": 3.5904, "step": 20600 }, { "epoch": 0.09, "learning_rate": 8.483133577457161e-05, "loss": 3.6162, "step": 20800 }, { "epoch": 0.09, "learning_rate": 8.568361407821506e-05, "loss": 3.6084, "step": 21000 }, { "epoch": 0.09, "learning_rate": 8.65435129717262e-05, "loss": 3.598, "step": 21200 }, { "epoch": 0.09, "learning_rate": 8.74110173671712e-05, "loss": 3.6229, "step": 21400 }, { "epoch": 0.09, "learning_rate": 8.828611204316915e-05, "loss": 3.6101, "step": 21600 }, { "epoch": 0.09, "learning_rate": 8.916878164515847e-05, "loss": 3.5859, "step": 21800 }, { "epoch": 0.09, "learning_rate": 9.005901068566706e-05, "loss": 3.6207, "step": 22000 }, { "epoch": 0.1, "learning_rate": 9.095678354458317e-05, "loss": 3.6058, "step": 22200 }, { "epoch": 0.1, "learning_rate": 9.18620844694301e-05, "loss": 3.5935, "step": 22400 }, { "epoch": 0.1, "learning_rate": 9.27748975756424e-05, "loss": 3.5917, "step": 22600 }, { "epoch": 0.1, "learning_rate": 9.369520684684475e-05, "loss": 3.5861, "step": 22800 }, { "epoch": 0.1, "learning_rate": 9.462299613513243e-05, "loss": 3.5976, "step": 23000 }, { "epoch": 0.1, "learning_rate": 9.555824916135554e-05, "loss": 3.5914, "step": 23200 }, { "epoch": 0.1, "learning_rate": 9.650094951540393e-05, "loss": 3.5778, "step": 23400 }, { "epoch": 0.1, "learning_rate": 9.745108065649507e-05, "loss": 3.6049, "step": 23600 }, { "epoch": 0.1, "learning_rate": 9.840862591346507e-05, "loss": 3.5878, "step": 23800 }, { "epoch": 0.1, "learning_rate": 9.93735684850606e-05, "loss": 3.5918, "step": 24000 }, { "epoch": 0.1, "learning_rate": 0.00010034589144023332, "loss": 3.59, "step": 24200 }, { "epoch": 0.1, "learning_rate": 0.00010132557771843796, "loss": 3.5815, "step": 24400 }, { "epoch": 0.11, "learning_rate": 0.00010231261012993076, "loss": 3.5806, "step": 24600 }, { "epoch": 0.11, "learning_rate": 0.0001033069713560718, "loss": 3.5927, "step": 24800 }, { "epoch": 0.11, "learning_rate": 0.00010430864394962811, "loss": 3.595, "step": 25000 }, { "epoch": 0.11, "learning_rate": 0.00010531761033508039, "loss": 3.5751, "step": 25200 }, { "epoch": 0.11, "learning_rate": 0.00010633385280893134, "loss": 3.5741, "step": 25400 }, { "epoch": 0.11, "learning_rate": 0.00010735735354001593, "loss": 3.5752, "step": 25600 }, { "epoch": 0.11, "learning_rate": 0.00010838809456981471, "loss": 3.5666, "step": 25800 }, { "epoch": 0.11, "learning_rate": 0.00010942605781276871, "loss": 3.5594, "step": 26000 }, { "epoch": 0.11, "learning_rate": 0.00011047122505659652, "loss": 3.5652, "step": 26200 }, { "epoch": 0.11, "learning_rate": 0.00011152357796261427, "loss": 3.5546, "step": 26400 }, { "epoch": 0.11, "learning_rate": 0.00011258309806605742, "loss": 3.5427, "step": 26600 }, { "epoch": 0.12, "learning_rate": 0.00011364976677640404, "loss": 3.5766, "step": 26800 }, { "epoch": 0.12, "learning_rate": 0.00011472356537770196, "loss": 3.5791, "step": 27000 }, { "epoch": 0.12, "learning_rate": 0.00011580447502889649, "loss": 3.5722, "step": 27200 }, { "epoch": 0.12, "learning_rate": 0.00011689247676416152, "loss": 3.5775, "step": 27400 }, { "epoch": 0.12, "learning_rate": 0.00011798755149323179, "loss": 3.5579, "step": 27600 }, { "epoch": 0.12, "learning_rate": 0.00011908968000173793, "loss": 3.5618, "step": 27800 }, { "epoch": 0.12, "learning_rate": 0.00012019884295154414, "loss": 3.5614, "step": 28000 }, { "epoch": 0.12, "learning_rate": 0.00012131502088108667, "loss": 3.5444, "step": 28200 }, { "epoch": 0.12, "learning_rate": 0.00012243819420571607, "loss": 3.5463, "step": 28400 }, { "epoch": 0.12, "learning_rate": 0.00012356834321804048, "loss": 3.5502, "step": 28600 }, { "epoch": 0.12, "learning_rate": 0.00012470544808827115, "loss": 3.5287, "step": 28800 }, { "epoch": 0.12, "learning_rate": 0.00012584948886457082, "loss": 3.5414, "step": 29000 }, { "epoch": 0.13, "learning_rate": 0.00012700044547340377, "loss": 3.5504, "step": 29200 }, { "epoch": 0.13, "learning_rate": 0.00012815829771988744, "loss": 3.5381, "step": 29400 }, { "epoch": 0.13, "learning_rate": 0.00012932302528814797, "loss": 3.5551, "step": 29600 }, { "epoch": 0.13, "learning_rate": 0.00013049460774167522, "loss": 3.5331, "step": 29800 }, { "epoch": 0.13, "learning_rate": 0.00013167302452368236, "loss": 3.5359, "step": 30000 }, { "epoch": 0.13, "learning_rate": 0.0001328582549574664, "loss": 3.5252, "step": 30200 }, { "epoch": 0.13, "learning_rate": 0.00013405027824677038, "loss": 3.5104, "step": 30400 }, { "epoch": 0.13, "learning_rate": 0.00013524907347614926, "loss": 3.5253, "step": 30600 }, { "epoch": 0.13, "learning_rate": 0.00013645461961133603, "loss": 3.5249, "step": 30800 }, { "epoch": 0.13, "learning_rate": 0.00013766689549961136, "loss": 3.5374, "step": 31000 }, { "epoch": 0.13, "learning_rate": 0.00013888587987017427, "loss": 3.5093, "step": 31200 }, { "epoch": 0.13, "learning_rate": 0.00014011155133451586, "loss": 3.5465, "step": 31400 }, { "epoch": 0.14, "learning_rate": 0.00014134388838679408, "loss": 3.5362, "step": 31600 }, { "epoch": 0.14, "learning_rate": 0.00014258286940421164, "loss": 3.5233, "step": 31800 }, { "epoch": 0.14, "learning_rate": 0.00014382847264739456, "loss": 3.5425, "step": 32000 }, { "epoch": 0.14, "learning_rate": 0.00014508067626077482, "loss": 3.5178, "step": 32200 }, { "epoch": 0.14, "learning_rate": 0.00014633945827297273, "loss": 3.5288, "step": 32400 }, { "epoch": 0.14, "learning_rate": 0.00014760479659718304, "loss": 3.52, "step": 32600 }, { "epoch": 0.14, "learning_rate": 0.00014887666903156218, "loss": 3.5121, "step": 32800 }, { "epoch": 0.14, "learning_rate": 0.0001501550532596183, "loss": 3.5191, "step": 33000 }, { "epoch": 0.14, "learning_rate": 0.00015143992685060208, "loss": 3.5097, "step": 33200 }, { "epoch": 0.14, "learning_rate": 0.00015273126725990098, "loss": 3.5034, "step": 33400 }, { "epoch": 0.14, "learning_rate": 0.00015402905182943438, "loss": 3.484, "step": 33600 }, { "epoch": 0.15, "learning_rate": 0.00015533325778805166, "loss": 3.5111, "step": 33800 }, { "epoch": 0.15, "learning_rate": 0.00015664386225193092, "loss": 3.5041, "step": 34000 }, { "epoch": 0.15, "learning_rate": 0.00015796084222498145, "loss": 3.5126, "step": 34200 }, { "epoch": 0.15, "learning_rate": 0.0001592841745992464, "loss": 3.4939, "step": 34400 }, { "epoch": 0.15, "learning_rate": 0.00016061383615530893, "loss": 3.4728, "step": 34600 }, { "epoch": 0.15, "learning_rate": 0.0001619498035626989, "loss": 3.4671, "step": 34800 }, { "epoch": 0.15, "learning_rate": 0.00016329205338030289, "loss": 3.5058, "step": 35000 }, { "epoch": 0.15, "learning_rate": 0.00016464056205677522, "loss": 3.4973, "step": 35200 }, { "epoch": 0.15, "learning_rate": 0.0001659953059309513, "loss": 3.488, "step": 35400 }, { "epoch": 0.15, "learning_rate": 0.00016735626123226218, "loss": 3.4953, "step": 35600 }, { "epoch": 0.15, "learning_rate": 0.00016872340408115283, "loss": 3.4772, "step": 35800 }, { "epoch": 0.15, "learning_rate": 0.00017009671048950003, "loss": 3.4866, "step": 36000 }, { "epoch": 0.16, "learning_rate": 0.00017147615636103365, "loss": 3.4752, "step": 36200 }, { "epoch": 0.16, "learning_rate": 0.00017286171749175986, "loss": 3.4878, "step": 36400 }, { "epoch": 0.16, "learning_rate": 0.0001742533695703849, "loss": 3.468, "step": 36600 }, { "epoch": 0.16, "learning_rate": 0.0001756510881787427, "loss": 3.4708, "step": 36800 }, { "epoch": 0.16, "learning_rate": 0.00017705484879222265, "loss": 3.4879, "step": 37000 }, { "epoch": 0.16, "learning_rate": 0.0001784646267801997, "loss": 3.4787, "step": 37200 }, { "epoch": 0.16, "learning_rate": 0.00017988039740646764, "loss": 3.4673, "step": 37400 }, { "epoch": 0.16, "learning_rate": 0.00018130213582967188, "loss": 3.4794, "step": 37600 }, { "epoch": 0.16, "learning_rate": 0.00018272981710374596, "loss": 3.4561, "step": 37800 }, { "epoch": 0.16, "learning_rate": 0.00018416341617834915, "loss": 3.4599, "step": 38000 }, { "epoch": 0.16, "learning_rate": 0.00018560290789930596, "loss": 3.454, "step": 38200 }, { "epoch": 0.17, "learning_rate": 0.00018704826700904756, "loss": 3.4628, "step": 38400 }, { "epoch": 0.17, "learning_rate": 0.00018849946814705483, "loss": 3.4557, "step": 38600 }, { "epoch": 0.17, "learning_rate": 0.0001899564858503036, "loss": 3.4584, "step": 38800 }, { "epoch": 0.17, "learning_rate": 0.00019141929455371092, "loss": 3.4492, "step": 39000 }, { "epoch": 0.17, "learning_rate": 0.00019288786859058442, "loss": 3.4641, "step": 39200 }, { "epoch": 0.17, "learning_rate": 0.00019436218219307173, "loss": 3.4665, "step": 39400 }, { "epoch": 0.17, "learning_rate": 0.00019584220949261325, "loss": 3.4503, "step": 39600 }, { "epoch": 0.17, "learning_rate": 0.00019732792452039607, "loss": 3.4438, "step": 39800 }, { "epoch": 0.17, "learning_rate": 0.00019881930120780906, "loss": 3.4454, "step": 40000 }, { "epoch": 0.17, "learning_rate": 0.00020031631338690114, "loss": 3.4357, "step": 40200 }, { "epoch": 0.17, "learning_rate": 0.00020181893479083945, "loss": 3.4561, "step": 40400 }, { "epoch": 0.17, "learning_rate": 0.00020332713905437056, "loss": 3.4535, "step": 40600 }, { "epoch": 0.18, "learning_rate": 0.00020484089971428406, "loss": 3.4523, "step": 40800 }, { "epoch": 0.18, "learning_rate": 0.00020636019020987535, "loss": 3.4449, "step": 41000 }, { "epoch": 0.18, "learning_rate": 0.00020788498388341244, "loss": 3.4488, "step": 41200 }, { "epoch": 0.18, "learning_rate": 0.00020941525398060392, "loss": 3.431, "step": 41400 }, { "epoch": 0.18, "learning_rate": 0.00021095097365106798, "loss": 3.4441, "step": 41600 }, { "epoch": 0.18, "learning_rate": 0.0002124921159488034, "loss": 3.4197, "step": 41800 }, { "epoch": 0.18, "learning_rate": 0.0002140386538326632, "loss": 3.4281, "step": 42000 }, { "epoch": 0.18, "learning_rate": 0.00021559056016682795, "loss": 3.4229, "step": 42200 }, { "epoch": 0.18, "learning_rate": 0.00021714780772128265, "loss": 3.4486, "step": 42400 }, { "epoch": 0.18, "learning_rate": 0.00021871036917229462, "loss": 3.4449, "step": 42600 }, { "epoch": 0.18, "learning_rate": 0.00022027821710289205, "loss": 3.4099, "step": 42800 }, { "epoch": 0.18, "learning_rate": 0.0002218513240033463, "loss": 3.4309, "step": 43000 }, { "epoch": 0.19, "learning_rate": 0.00022342966227165346, "loss": 3.4188, "step": 43200 }, { "epoch": 0.19, "learning_rate": 0.00022501320421401943, "loss": 3.4238, "step": 43400 }, { "epoch": 0.19, "learning_rate": 0.00022660192204534534, "loss": 3.4067, "step": 43600 }, { "epoch": 0.19, "learning_rate": 0.00022819578788971543, "loss": 3.428, "step": 43800 }, { "epoch": 0.19, "learning_rate": 0.000229794773780886, "loss": 3.4282, "step": 44000 }, { "epoch": 0.19, "learning_rate": 0.00023139885166277606, "loss": 3.411, "step": 44200 }, { "epoch": 0.19, "learning_rate": 0.00023300799338995962, "loss": 3.4144, "step": 44400 }, { "epoch": 0.19, "learning_rate": 0.00023462217072816, "loss": 3.3912, "step": 44600 }, { "epoch": 0.19, "learning_rate": 0.0002362413553547441, "loss": 3.4082, "step": 44800 }, { "epoch": 0.19, "learning_rate": 0.00023786551885922075, "loss": 3.4071, "step": 45000 }, { "epoch": 0.19, "learning_rate": 0.0002394946327437383, "loss": 3.3977, "step": 45200 }, { "epoch": 0.2, "learning_rate": 0.00024112866842358467, "loss": 3.4155, "step": 45400 }, { "epoch": 0.2, "learning_rate": 0.00024276759722768946, "loss": 3.4112, "step": 45600 }, { "epoch": 0.2, "learning_rate": 0.0002444113903991265, "loss": 3.4177, "step": 45800 }, { "epoch": 0.2, "learning_rate": 0.0002460600190956186, "loss": 3.3751, "step": 46000 }, { "epoch": 0.2, "learning_rate": 0.0002477134543900436, "loss": 3.4168, "step": 46200 }, { "epoch": 0.2, "learning_rate": 0.00024937166727094203, "loss": 3.3941, "step": 46400 }, { "epoch": 0.2, "learning_rate": 0.0002510346286430259, "loss": 3.3995, "step": 46600 }, { "epoch": 0.2, "learning_rate": 0.0002527023093276896, "loss": 3.401, "step": 46800 }, { "epoch": 0.2, "learning_rate": 0.0002543746800635215, "loss": 3.3874, "step": 47000 }, { "epoch": 0.2, "learning_rate": 0.0002560517115068175, "loss": 3.3856, "step": 47200 }, { "epoch": 0.2, "learning_rate": 0.00025773337423209606, "loss": 3.3732, "step": 47400 }, { "epoch": 0.2, "learning_rate": 0.0002594196387326143, "loss": 3.3986, "step": 47600 }, { "epoch": 0.21, "learning_rate": 0.00026111047542088587, "loss": 3.3909, "step": 47800 }, { "epoch": 0.21, "learning_rate": 0.00026280585462919984, "loss": 3.3851, "step": 48000 }, { "epoch": 0.21, "learning_rate": 0.0002645057466101418, "loss": 3.3819, "step": 48200 }, { "epoch": 0.21, "learning_rate": 0.000266210121537115, "loss": 3.3786, "step": 48400 }, { "epoch": 0.21, "learning_rate": 0.0002679189495048644, "loss": 3.3629, "step": 48600 }, { "epoch": 0.21, "learning_rate": 0.00026963220053000114, "loss": 3.3762, "step": 48800 }, { "epoch": 0.21, "learning_rate": 0.0002713498445515287, "loss": 3.3814, "step": 49000 }, { "epoch": 0.21, "learning_rate": 0.00027307185143137023, "loss": 3.3762, "step": 49200 }, { "epoch": 0.21, "learning_rate": 0.0002747981909548971, "loss": 3.3616, "step": 49400 }, { "epoch": 0.21, "learning_rate": 0.0002765288328314602, "loss": 3.3614, "step": 49600 }, { "epoch": 0.21, "learning_rate": 0.0002782637466949195, "loss": 3.3758, "step": 49800 }, { "epoch": 0.21, "learning_rate": 0.0002800029021041788, "loss": 3.367, "step": 50000 }, { "epoch": 0.22, "learning_rate": 0.00028174626854371866, "loss": 3.3536, "step": 50200 }, { "epoch": 0.22, "learning_rate": 0.0002834938154241324, "loss": 3.3545, "step": 50400 }, { "epoch": 0.22, "learning_rate": 0.00028524551208266224, "loss": 3.369, "step": 50600 }, { "epoch": 0.22, "learning_rate": 0.0002870013277837379, "loss": 3.3578, "step": 50800 }, { "epoch": 0.22, "learning_rate": 0.00028876123171951576, "loss": 3.3439, "step": 51000 }, { "epoch": 0.22, "learning_rate": 0.0002905251930104192, "loss": 3.3568, "step": 51200 }, { "epoch": 0.22, "learning_rate": 0.00029229318070568056, "loss": 3.3605, "step": 51400 }, { "epoch": 0.22, "learning_rate": 0.0002940651637838844, "loss": 3.358, "step": 51600 }, { "epoch": 0.22, "learning_rate": 0.0002958411111535111, "loss": 3.3603, "step": 51800 }, { "epoch": 0.22, "learning_rate": 0.00029762099165348357, "loss": 3.3527, "step": 52000 }, { "epoch": 0.22, "learning_rate": 0.0002994047740537128, "loss": 3.3409, "step": 52200 }, { "epoch": 0.23, "learning_rate": 0.00030119242705564664, "loss": 3.3495, "step": 52400 }, { "epoch": 0.23, "learning_rate": 0.00030298391929281857, "loss": 3.328, "step": 52600 }, { "epoch": 0.23, "learning_rate": 0.00030477921933139823, "loss": 3.3592, "step": 52800 }, { "epoch": 0.23, "learning_rate": 0.00030657829567074305, "loss": 3.36, "step": 53000 }, { "epoch": 0.23, "learning_rate": 0.0003083811167439507, "loss": 3.3558, "step": 53200 }, { "epoch": 0.23, "learning_rate": 0.0003101876509184131, "loss": 3.3206, "step": 53400 }, { "epoch": 0.23, "learning_rate": 0.00031199786649637145, "loss": 3.3376, "step": 53600 }, { "epoch": 0.23, "learning_rate": 0.0003138117317154723, "loss": 3.3242, "step": 53800 }, { "epoch": 0.23, "learning_rate": 0.0003156292147493255, "loss": 3.3368, "step": 54000 }, { "epoch": 0.23, "learning_rate": 0.00031745028370806165, "loss": 3.3366, "step": 54200 }, { "epoch": 0.23, "learning_rate": 0.00031927490663889203, "loss": 3.3267, "step": 54400 }, { "epoch": 0.23, "learning_rate": 0.00032110305152666953, "loss": 3.302, "step": 54600 }, { "epoch": 0.24, "learning_rate": 0.00032293468629445007, "loss": 3.3237, "step": 54800 }, { "epoch": 0.24, "learning_rate": 0.00032476977880405546, "loss": 3.3207, "step": 55000 }, { "epoch": 0.24, "learning_rate": 0.00032660829685663773, "loss": 3.3215, "step": 55200 }, { "epoch": 0.24, "learning_rate": 0.00032845020819324334, "loss": 3.3107, "step": 55400 }, { "epoch": 0.24, "learning_rate": 0.0003302954804953797, "loss": 3.3153, "step": 55600 }, { "epoch": 0.24, "learning_rate": 0.00033214408138558256, "loss": 3.341, "step": 55800 }, { "epoch": 0.24, "learning_rate": 0.0003339959784279831, "loss": 3.3183, "step": 56000 }, { "epoch": 0.24, "learning_rate": 0.00033585113912887776, "loss": 3.3295, "step": 56200 }, { "epoch": 0.24, "learning_rate": 0.0003377095309372985, "loss": 3.3293, "step": 56400 }, { "epoch": 0.24, "learning_rate": 0.0003395711212455839, "loss": 3.3181, "step": 56600 }, { "epoch": 0.24, "learning_rate": 0.0003414358773899506, "loss": 3.3075, "step": 56800 }, { "epoch": 0.24, "learning_rate": 0.00034330376665106695, "loss": 3.3213, "step": 57000 }, { "epoch": 0.25, "learning_rate": 0.0003451747562546278, "loss": 3.3105, "step": 57200 }, { "epoch": 0.25, "learning_rate": 0.00034704881337192784, "loss": 3.2828, "step": 57400 }, { "epoch": 0.25, "learning_rate": 0.00034892590512043947, "loss": 3.3044, "step": 57600 }, { "epoch": 0.25, "learning_rate": 0.00035080599856438877, "loss": 3.306, "step": 57800 }, { "epoch": 0.25, "learning_rate": 0.00035268906071533304, "loss": 3.3226, "step": 58000 }, { "epoch": 0.25, "learning_rate": 0.0003545750585327406, "loss": 3.3227, "step": 58200 }, { "epoch": 0.25, "learning_rate": 0.0003564639589245703, "loss": 3.2888, "step": 58400 }, { "epoch": 0.25, "learning_rate": 0.0003583557287478512, "loss": 3.2918, "step": 58600 }, { "epoch": 0.25, "learning_rate": 0.0003602503348092654, "loss": 3.2857, "step": 58800 }, { "epoch": 0.25, "learning_rate": 0.0003621477438657296, "loss": 3.324, "step": 59000 }, { "epoch": 0.25, "learning_rate": 0.00036404792262497885, "loss": 3.2991, "step": 59200 }, { "epoch": 0.26, "learning_rate": 0.00036595083774615053, "loss": 3.2798, "step": 59400 }, { "epoch": 0.26, "learning_rate": 0.0003678564558403689, "loss": 3.3164, "step": 59600 }, { "epoch": 0.26, "learning_rate": 0.0003697647434713321, "loss": 3.2885, "step": 59800 }, { "epoch": 0.26, "learning_rate": 0.0003716756671558975, "loss": 3.3157, "step": 60000 }, { "epoch": 0.26, "learning_rate": 0.0003735891933646703, "loss": 3.2991, "step": 60200 }, { "epoch": 0.26, "learning_rate": 0.00037550528852259106, "loss": 3.2628, "step": 60400 }, { "epoch": 0.26, "learning_rate": 0.00037742391900952516, "loss": 3.2928, "step": 60600 }, { "epoch": 0.26, "learning_rate": 0.0003793450511608526, "loss": 3.2828, "step": 60800 }, { "epoch": 0.26, "learning_rate": 0.00038126865126805905, "loss": 3.2863, "step": 61000 }, { "epoch": 0.26, "learning_rate": 0.0003831946855793267, "loss": 3.2951, "step": 61200 }, { "epoch": 0.26, "learning_rate": 0.00038512312030012676, "loss": 3.2583, "step": 61400 }, { "epoch": 0.26, "learning_rate": 0.0003870539215938128, "loss": 3.2872, "step": 61600 }, { "epoch": 0.27, "learning_rate": 0.00038898705558221367, "loss": 3.2748, "step": 61800 }, { "epoch": 0.27, "learning_rate": 0.00039092248834622883, "loss": 3.3026, "step": 62000 }, { "epoch": 0.27, "learning_rate": 0.00039286018592642224, "loss": 3.2734, "step": 62200 }, { "epoch": 0.27, "learning_rate": 0.00039480011432362007, "loss": 3.2849, "step": 62400 }, { "epoch": 0.27, "learning_rate": 0.00039674223949950514, "loss": 3.2889, "step": 62600 }, { "epoch": 0.27, "learning_rate": 0.0003986865273772159, "loss": 3.2938, "step": 62800 }, { "epoch": 0.27, "learning_rate": 0.00040063294384194367, "loss": 3.2755, "step": 63000 }, { "epoch": 0.27, "learning_rate": 0.0004025814547415307, "loss": 3.2774, "step": 63200 }, { "epoch": 0.27, "learning_rate": 0.00040453202588707036, "loss": 3.2819, "step": 63400 }, { "epoch": 0.27, "learning_rate": 0.0004064846230535067, "loss": 3.2867, "step": 63600 }, { "epoch": 0.27, "learning_rate": 0.00040843921198023417, "loss": 3.2856, "step": 63800 }, { "epoch": 0.28, "learning_rate": 0.0004103957583717001, "loss": 3.2932, "step": 64000 }, { "epoch": 0.28, "learning_rate": 0.0004123542278980058, "loss": 3.272, "step": 64200 }, { "epoch": 0.28, "learning_rate": 0.00041431458619550874, "loss": 3.2697, "step": 64400 }, { "epoch": 0.28, "learning_rate": 0.00041627679886742527, "loss": 3.276, "step": 64600 }, { "epoch": 0.28, "learning_rate": 0.0004182408314844355, "loss": 3.2732, "step": 64800 }, { "epoch": 0.28, "learning_rate": 0.00042020664958528574, "loss": 3.2535, "step": 65000 }, { "epoch": 0.28, "learning_rate": 0.0004221742186773941, "loss": 3.2791, "step": 65200 }, { "epoch": 0.28, "learning_rate": 0.0004241435042374555, "loss": 3.2718, "step": 65400 }, { "epoch": 0.28, "learning_rate": 0.0004261144717120477, "loss": 3.2672, "step": 65600 }, { "epoch": 0.28, "learning_rate": 0.00042808708651823654, "loss": 3.2555, "step": 65800 }, { "epoch": 0.28, "learning_rate": 0.00043006131404418424, "loss": 3.2717, "step": 66000 }, { "epoch": 0.28, "learning_rate": 0.00043203711964975595, "loss": 3.2497, "step": 66200 }, { "epoch": 0.29, "learning_rate": 0.00043401446866712684, "loss": 3.2525, "step": 66400 }, { "epoch": 0.29, "learning_rate": 0.000435993326401392, "loss": 3.2523, "step": 66600 }, { "epoch": 0.29, "learning_rate": 0.0004379736581311737, "loss": 3.2431, "step": 66800 }, { "epoch": 0.29, "learning_rate": 0.00043995542910923167, "loss": 3.2571, "step": 67000 }, { "epoch": 0.29, "learning_rate": 0.0004419386045630716, "loss": 3.2445, "step": 67200 }, { "epoch": 0.29, "learning_rate": 0.0004439231496955571, "loss": 3.2508, "step": 67400 }, { "epoch": 0.29, "learning_rate": 0.00044590902968551834, "loss": 3.2435, "step": 67600 }, { "epoch": 0.29, "learning_rate": 0.00044789620968836404, "loss": 3.2436, "step": 67800 }, { "epoch": 0.29, "learning_rate": 0.0004498846548366927, "loss": 3.2533, "step": 68000 }, { "epoch": 0.29, "learning_rate": 0.0004518743302409047, "loss": 3.2523, "step": 68200 }, { "epoch": 0.29, "learning_rate": 0.00045386520098981335, "loss": 3.2284, "step": 68400 }, { "epoch": 0.29, "learning_rate": 0.0004558572321512592, "loss": 3.2285, "step": 68600 }, { "epoch": 0.3, "learning_rate": 0.00045785038877272114, "loss": 3.2395, "step": 68800 }, { "epoch": 0.3, "learning_rate": 0.00045984463588193104, "loss": 3.2592, "step": 69000 }, { "epoch": 0.3, "learning_rate": 0.00046183993848748675, "loss": 3.2478, "step": 69200 }, { "epoch": 0.3, "learning_rate": 0.0004638362615794662, "loss": 3.2303, "step": 69400 }, { "epoch": 0.3, "learning_rate": 0.00046583357013004194, "loss": 3.242, "step": 69600 }, { "epoch": 0.3, "learning_rate": 0.00046783182909409496, "loss": 3.2582, "step": 69800 }, { "epoch": 0.3, "learning_rate": 0.00046983100340983056, "loss": 3.2461, "step": 70000 }, { "epoch": 0.3, "learning_rate": 0.00047183105799939297, "loss": 3.2495, "step": 70200 }, { "epoch": 0.3, "learning_rate": 0.0004738319577694809, "loss": 3.2325, "step": 70400 }, { "epoch": 0.3, "learning_rate": 0.0004758336676119636, "loss": 3.2335, "step": 70600 }, { "epoch": 0.3, "learning_rate": 0.0004778361524044967, "loss": 3.2499, "step": 70800 }, { "epoch": 0.31, "learning_rate": 0.00047983937701113794, "loss": 3.2323, "step": 71000 }, { "epoch": 0.31, "learning_rate": 0.00048184330628296484, "loss": 3.221, "step": 71200 }, { "epoch": 0.31, "learning_rate": 0.0004838479050586898, "loss": 3.2344, "step": 71400 }, { "epoch": 0.31, "learning_rate": 0.0004858531381652792, "loss": 3.241, "step": 71600 }, { "epoch": 0.31, "learning_rate": 0.0004878589704185682, "loss": 3.2434, "step": 71800 }, { "epoch": 0.31, "learning_rate": 0.00048986536662388, "loss": 3.2375, "step": 72000 }, { "epoch": 0.31, "learning_rate": 0.0004918722915766426, "loss": 3.2237, "step": 72200 }, { "epoch": 0.31, "learning_rate": 0.0004938797100630057, "loss": 3.2665, "step": 72400 }, { "epoch": 0.31, "learning_rate": 0.0004958875868604602, "loss": 3.218, "step": 72600 }, { "epoch": 0.31, "learning_rate": 0.0004978958867384551, "loss": 3.2474, "step": 72800 }, { "epoch": 0.31, "learning_rate": 0.000499904574459016, "loss": 3.2423, "step": 73000 }, { "epoch": 0.31, "learning_rate": 0.000501913614777363, "loss": 3.2288, "step": 73200 }, { "epoch": 0.32, "learning_rate": 0.0005039229724425303, "loss": 3.2148, "step": 73400 }, { "epoch": 0.32, "learning_rate": 0.0005059326121979831, "loss": 3.2383, "step": 73600 }, { "epoch": 0.32, "learning_rate": 0.0005079424987822374, "loss": 3.236, "step": 73800 }, { "epoch": 0.32, "learning_rate": 0.0005099525969294778, "loss": 3.2231, "step": 74000 }, { "epoch": 0.32, "learning_rate": 0.0005119628713701773, "loss": 3.2215, "step": 74200 }, { "epoch": 0.32, "learning_rate": 0.0005139732868317155, "loss": 3.227, "step": 74400 }, { "epoch": 0.32, "learning_rate": 0.0005159838080389977, "loss": 3.2006, "step": 74600 }, { "epoch": 0.32, "learning_rate": 0.0005179943997150736, "loss": 3.2426, "step": 74800 }, { "epoch": 0.32, "learning_rate": 0.0005200050265817561, "loss": 3.2194, "step": 75000 }, { "epoch": 0.32, "learning_rate": 0.0005220156533602416, "loss": 3.223, "step": 75200 }, { "epoch": 0.32, "learning_rate": 0.0005240262447717271, "loss": 3.224, "step": 75400 }, { "epoch": 0.32, "learning_rate": 0.0005260367655380307, "loss": 3.2138, "step": 75600 }, { "epoch": 0.33, "learning_rate": 0.0005280471803822096, "loss": 3.2123, "step": 75800 }, { "epoch": 0.33, "learning_rate": 0.0005300574540291803, "loss": 3.2208, "step": 76000 }, { "epoch": 0.33, "learning_rate": 0.000532067551206336, "loss": 3.2199, "step": 76200 }, { "epoch": 0.33, "learning_rate": 0.0005340774366441665, "loss": 3.2367, "step": 76400 }, { "epoch": 0.33, "learning_rate": 0.0005360870750768769, "loss": 3.21, "step": 76600 }, { "epoch": 0.33, "learning_rate": 0.0005380964312430063, "loss": 3.2221, "step": 76800 }, { "epoch": 0.33, "learning_rate": 0.0005401054698860466, "loss": 3.2021, "step": 77000 }, { "epoch": 0.33, "learning_rate": 0.0005421141557550603, "loss": 3.2352, "step": 77200 }, { "epoch": 0.33, "learning_rate": 0.0005441224536053012, "loss": 3.1962, "step": 77400 }, { "epoch": 0.33, "learning_rate": 0.0005461303281988298, "loss": 3.2127, "step": 77600 }, { "epoch": 0.33, "learning_rate": 0.000548137744305134, "loss": 3.2307, "step": 77800 }, { "epoch": 0.34, "learning_rate": 0.0005501446667017461, "loss": 3.2053, "step": 78000 }, { "epoch": 0.34, "learning_rate": 0.0005521510601748613, "loss": 3.1936, "step": 78200 }, { "epoch": 0.34, "learning_rate": 0.0005541568895199552, "loss": 3.2019, "step": 78400 }, { "epoch": 0.34, "learning_rate": 0.0005561621195424016, "loss": 3.203, "step": 78600 }, { "epoch": 0.34, "learning_rate": 0.0005581667150580907, "loss": 3.2125, "step": 78800 }, { "epoch": 0.34, "learning_rate": 0.0005601706408940451, "loss": 3.2205, "step": 79000 }, { "epoch": 0.34, "learning_rate": 0.0005621738618890382, "loss": 3.2215, "step": 79200 }, { "epoch": 0.34, "learning_rate": 0.0005641763428942106, "loss": 3.2052, "step": 79400 }, { "epoch": 0.34, "learning_rate": 0.0005661780487736866, "loss": 3.2249, "step": 79600 }, { "epoch": 0.34, "learning_rate": 0.0005681789444051913, "loss": 3.1952, "step": 79800 }, { "epoch": 0.34, "learning_rate": 0.0005701789946806666, "loss": 3.1995, "step": 80000 }, { "epoch": 0.34, "learning_rate": 0.0005721781645068867, "loss": 3.2116, "step": 80200 }, { "epoch": 0.35, "learning_rate": 0.000574176418806075, "loss": 3.2256, "step": 80400 }, { "epoch": 0.35, "learning_rate": 0.0005761737225165182, "loss": 3.2221, "step": 80600 }, { "epoch": 0.35, "learning_rate": 0.0005781700405931827, "loss": 3.1956, "step": 80800 }, { "epoch": 0.35, "learning_rate": 0.0005801653380083288, "loss": 3.2031, "step": 81000 }, { "epoch": 0.35, "learning_rate": 0.0005821595797521253, "loss": 3.2029, "step": 81200 }, { "epoch": 0.35, "learning_rate": 0.0005841527308332645, "loss": 3.2065, "step": 81400 }, { "epoch": 0.35, "learning_rate": 0.0005861447562795751, "loss": 3.1783, "step": 81600 }, { "epoch": 0.35, "learning_rate": 0.0005881356211386371, "loss": 3.2181, "step": 81800 }, { "epoch": 0.35, "learning_rate": 0.0005901252904783932, "loss": 3.1991, "step": 82000 }, { "epoch": 0.35, "learning_rate": 0.0005921137293877644, "loss": 3.2011, "step": 82200 }, { "epoch": 0.35, "learning_rate": 0.0005941009029772594, "loss": 3.1852, "step": 82400 }, { "epoch": 0.35, "learning_rate": 0.00059608677637959, "loss": 3.1911, "step": 82600 }, { "epoch": 0.36, "learning_rate": 0.00059807131475028, "loss": 3.1999, "step": 82800 }, { "epoch": 0.36, "learning_rate": 0.000600054483268279, "loss": 3.1853, "step": 83000 }, { "epoch": 0.36, "learning_rate": 0.0006020362471365711, "loss": 3.1928, "step": 83200 }, { "epoch": 0.36, "learning_rate": 0.0006040165715827878, "loss": 3.1833, "step": 83400 }, { "epoch": 0.36, "learning_rate": 0.0006059954218598161, "loss": 3.1972, "step": 83600 }, { "epoch": 0.36, "learning_rate": 0.0006079727632464092, "loss": 3.2052, "step": 83800 }, { "epoch": 0.36, "learning_rate": 0.000609948561047796, "loss": 3.1881, "step": 84000 }, { "epoch": 0.36, "learning_rate": 0.0006119227805962891, "loss": 3.174, "step": 84200 }, { "epoch": 0.36, "learning_rate": 0.0006138953872518932, "loss": 3.1831, "step": 84400 }, { "epoch": 0.36, "learning_rate": 0.0006158663464029133, "loss": 3.1961, "step": 84600 }, { "epoch": 0.36, "learning_rate": 0.0006178356234665618, "loss": 3.1759, "step": 84800 }, { "epoch": 0.37, "learning_rate": 0.0006198031838895652, "loss": 3.1728, "step": 85000 }, { "epoch": 0.37, "learning_rate": 0.0006217689931487707, "loss": 3.194, "step": 85200 }, { "epoch": 0.37, "learning_rate": 0.0006237330167517514, "loss": 3.1747, "step": 85400 }, { "epoch": 0.37, "learning_rate": 0.0006256952202374121, "loss": 3.1805, "step": 85600 }, { "epoch": 0.37, "learning_rate": 0.0006276555691765935, "loss": 3.1781, "step": 85800 }, { "epoch": 0.37, "learning_rate": 0.0006296140291726768, "loss": 3.1829, "step": 86000 }, { "epoch": 0.37, "learning_rate": 0.0006315705658621865, "loss": 3.1832, "step": 86200 }, { "epoch": 0.37, "learning_rate": 0.0006335251449153937, "loss": 3.1634, "step": 86400 }, { "epoch": 0.37, "learning_rate": 0.0006354777320369192, "loss": 3.1664, "step": 86600 }, { "epoch": 0.37, "learning_rate": 0.0006374282929663341, "loss": 3.1576, "step": 86800 }, { "epoch": 0.37, "learning_rate": 0.0006393767934787615, "loss": 3.1667, "step": 87000 }, { "epoch": 0.37, "learning_rate": 0.0006413231993854767, "loss": 3.1718, "step": 87200 }, { "epoch": 0.38, "learning_rate": 0.0006432674765345077, "loss": 3.1645, "step": 87400 }, { "epoch": 0.38, "learning_rate": 0.0006452095908112336, "loss": 3.2063, "step": 87600 }, { "epoch": 0.38, "learning_rate": 0.0006471495081389843, "loss": 3.2638, "step": 87800 }, { "epoch": 0.38, "learning_rate": 0.0006490871944796369, "loss": 3.2685, "step": 88000 }, { "epoch": 0.38, "learning_rate": 0.0006510226158342146, "loss": 3.2519, "step": 88200 }, { "epoch": 0.38, "learning_rate": 0.0006529557382434818, "loss": 3.2101, "step": 88400 }, { "epoch": 0.38, "learning_rate": 0.0006548865277885411, "loss": 3.2109, "step": 88600 }, { "epoch": 0.38, "learning_rate": 0.0006568149505914273, "loss": 3.2291, "step": 88800 }, { "epoch": 0.38, "learning_rate": 0.0006587409728157031, "loss": 3.2282, "step": 89000 }, { "epoch": 0.38, "learning_rate": 0.000660664560667052, "loss": 3.2404, "step": 89200 }, { "epoch": 0.38, "learning_rate": 0.0006625856803938711, "loss": 3.2334, "step": 89400 }, { "epoch": 0.39, "learning_rate": 0.000664504298287864, "loss": 3.243, "step": 89600 }, { "epoch": 0.39, "learning_rate": 0.0006664203806846319, "loss": 3.2475, "step": 89800 }, { "epoch": 0.39, "learning_rate": 0.0006683338939642638, "loss": 3.3556, "step": 90000 } ], "max_steps": 500000, "num_train_epochs": 3, "total_flos": 1.4344469839872e+17, "trial_name": null, "trial_params": null }