|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 4566, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004380201489268506, |
|
"grad_norm": 244.058147496229, |
|
"learning_rate": 4.37636761487965e-08, |
|
"loss": 8.7812, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002190100744634253, |
|
"grad_norm": 242.66751047909037, |
|
"learning_rate": 2.188183807439825e-07, |
|
"loss": 8.8047, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004380201489268506, |
|
"grad_norm": 224.45025961352744, |
|
"learning_rate": 4.37636761487965e-07, |
|
"loss": 8.7406, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006570302233902759, |
|
"grad_norm": 202.25250028934636, |
|
"learning_rate": 6.564551422319475e-07, |
|
"loss": 8.6438, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.008760402978537012, |
|
"grad_norm": 194.17623442189665, |
|
"learning_rate": 8.7527352297593e-07, |
|
"loss": 8.3891, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010950503723171266, |
|
"grad_norm": 110.65671385292053, |
|
"learning_rate": 1.0940919037199126e-06, |
|
"loss": 7.8109, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.013140604467805518, |
|
"grad_norm": 65.62760076712048, |
|
"learning_rate": 1.312910284463895e-06, |
|
"loss": 7.175, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.015330705212439772, |
|
"grad_norm": 53.67520132013994, |
|
"learning_rate": 1.5317286652078775e-06, |
|
"loss": 6.6734, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.017520805957074025, |
|
"grad_norm": 37.22908552601946, |
|
"learning_rate": 1.75054704595186e-06, |
|
"loss": 5.9109, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01971090670170828, |
|
"grad_norm": 33.17042166374813, |
|
"learning_rate": 1.9693654266958425e-06, |
|
"loss": 5.2781, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.021901007446342532, |
|
"grad_norm": 24.17805937125211, |
|
"learning_rate": 2.188183807439825e-06, |
|
"loss": 4.6617, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.024091108190976786, |
|
"grad_norm": 15.533482402019871, |
|
"learning_rate": 2.4070021881838077e-06, |
|
"loss": 3.6313, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.026281208935611037, |
|
"grad_norm": 13.054298424254013, |
|
"learning_rate": 2.62582056892779e-06, |
|
"loss": 3.0438, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02847130968024529, |
|
"grad_norm": 10.046368116334548, |
|
"learning_rate": 2.8446389496717725e-06, |
|
"loss": 2.5609, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.030661410424879545, |
|
"grad_norm": 3.63825446752271, |
|
"learning_rate": 3.063457330415755e-06, |
|
"loss": 2.1242, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0328515111695138, |
|
"grad_norm": 2.5205471934789943, |
|
"learning_rate": 3.2822757111597377e-06, |
|
"loss": 1.8531, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03504161191414805, |
|
"grad_norm": 1.5353534970314644, |
|
"learning_rate": 3.50109409190372e-06, |
|
"loss": 1.7168, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03723171265878231, |
|
"grad_norm": 0.9320337937023512, |
|
"learning_rate": 3.7199124726477025e-06, |
|
"loss": 1.6402, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03942181340341656, |
|
"grad_norm": 0.7724916688386967, |
|
"learning_rate": 3.938730853391685e-06, |
|
"loss": 1.5934, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04161191414805081, |
|
"grad_norm": 0.5985215738037675, |
|
"learning_rate": 4.157549234135668e-06, |
|
"loss": 1.5066, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.043802014892685065, |
|
"grad_norm": 0.5238425867378174, |
|
"learning_rate": 4.37636761487965e-06, |
|
"loss": 1.4219, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.045992115637319315, |
|
"grad_norm": 0.4867641557206586, |
|
"learning_rate": 4.595185995623633e-06, |
|
"loss": 1.4422, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.04818221638195357, |
|
"grad_norm": 0.4337246016382216, |
|
"learning_rate": 4.8140043763676155e-06, |
|
"loss": 1.418, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05037231712658782, |
|
"grad_norm": 0.40770721306392865, |
|
"learning_rate": 5.032822757111597e-06, |
|
"loss": 1.3773, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.052562417871222074, |
|
"grad_norm": 0.4233334394962591, |
|
"learning_rate": 5.25164113785558e-06, |
|
"loss": 1.3613, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05475251861585633, |
|
"grad_norm": 0.42178615143473047, |
|
"learning_rate": 5.470459518599562e-06, |
|
"loss": 1.391, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05694261936049058, |
|
"grad_norm": 0.4006940117433896, |
|
"learning_rate": 5.689277899343545e-06, |
|
"loss": 1.3297, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05913272010512484, |
|
"grad_norm": 0.3651420010557856, |
|
"learning_rate": 5.908096280087528e-06, |
|
"loss": 1.3313, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06132282084975909, |
|
"grad_norm": 0.3747020330363452, |
|
"learning_rate": 6.12691466083151e-06, |
|
"loss": 1.3332, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06351292159439334, |
|
"grad_norm": 0.3728552218659468, |
|
"learning_rate": 6.345733041575493e-06, |
|
"loss": 1.3004, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0657030223390276, |
|
"grad_norm": 0.37018947461483953, |
|
"learning_rate": 6.564551422319475e-06, |
|
"loss": 1.3484, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06789312308366185, |
|
"grad_norm": 0.3529067831254164, |
|
"learning_rate": 6.783369803063458e-06, |
|
"loss": 1.3023, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0700832238282961, |
|
"grad_norm": 0.3518414768064083, |
|
"learning_rate": 7.00218818380744e-06, |
|
"loss": 1.3152, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07227332457293036, |
|
"grad_norm": 0.34420293663926643, |
|
"learning_rate": 7.221006564551422e-06, |
|
"loss": 1.3258, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07446342531756461, |
|
"grad_norm": 0.3406303226830583, |
|
"learning_rate": 7.439824945295405e-06, |
|
"loss": 1.2852, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07665352606219886, |
|
"grad_norm": 0.3366612677735102, |
|
"learning_rate": 7.658643326039388e-06, |
|
"loss": 1.2812, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07884362680683311, |
|
"grad_norm": 0.3437595783050795, |
|
"learning_rate": 7.87746170678337e-06, |
|
"loss": 1.3117, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08103372755146737, |
|
"grad_norm": 0.35966923330984457, |
|
"learning_rate": 8.096280087527353e-06, |
|
"loss": 1.2727, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08322382829610162, |
|
"grad_norm": 0.3341663091683573, |
|
"learning_rate": 8.315098468271335e-06, |
|
"loss": 1.2867, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08541392904073587, |
|
"grad_norm": 0.35172271498862123, |
|
"learning_rate": 8.533916849015318e-06, |
|
"loss": 1.2664, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.08760402978537013, |
|
"grad_norm": 0.35659032911139527, |
|
"learning_rate": 8.7527352297593e-06, |
|
"loss": 1.2742, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08979413053000437, |
|
"grad_norm": 0.3350456396328354, |
|
"learning_rate": 8.971553610503283e-06, |
|
"loss": 1.2781, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09198423127463863, |
|
"grad_norm": 0.33449398836221755, |
|
"learning_rate": 9.190371991247266e-06, |
|
"loss": 1.2613, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09417433201927289, |
|
"grad_norm": 0.3589466098486552, |
|
"learning_rate": 9.409190371991248e-06, |
|
"loss": 1.2559, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.09636443276390715, |
|
"grad_norm": 0.368082616736705, |
|
"learning_rate": 9.628008752735231e-06, |
|
"loss": 1.2852, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09855453350854139, |
|
"grad_norm": 0.3512400018084896, |
|
"learning_rate": 9.846827133479214e-06, |
|
"loss": 1.2352, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10074463425317565, |
|
"grad_norm": 0.3284893195501342, |
|
"learning_rate": 1.0065645514223194e-05, |
|
"loss": 1.2566, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1029347349978099, |
|
"grad_norm": 0.35906080776203964, |
|
"learning_rate": 1.0284463894967179e-05, |
|
"loss": 1.2617, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.10512483574244415, |
|
"grad_norm": 0.3270591540034968, |
|
"learning_rate": 1.050328227571116e-05, |
|
"loss": 1.2375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1073149364870784, |
|
"grad_norm": 0.3532550836201906, |
|
"learning_rate": 1.0722100656455144e-05, |
|
"loss": 1.2086, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.10950503723171266, |
|
"grad_norm": 0.3237648295967401, |
|
"learning_rate": 1.0940919037199125e-05, |
|
"loss": 1.2398, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1116951379763469, |
|
"grad_norm": 0.35822141700690474, |
|
"learning_rate": 1.1159737417943109e-05, |
|
"loss": 1.2355, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.11388523872098116, |
|
"grad_norm": 0.3563825451290871, |
|
"learning_rate": 1.137855579868709e-05, |
|
"loss": 1.2617, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11607533946561542, |
|
"grad_norm": 0.3225396305771842, |
|
"learning_rate": 1.1597374179431074e-05, |
|
"loss": 1.2105, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.11826544021024968, |
|
"grad_norm": 0.3122994394367223, |
|
"learning_rate": 1.1816192560175055e-05, |
|
"loss": 1.2051, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12045554095488392, |
|
"grad_norm": 0.33769463403346195, |
|
"learning_rate": 1.2035010940919038e-05, |
|
"loss": 1.2402, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12264564169951818, |
|
"grad_norm": 0.323094652208957, |
|
"learning_rate": 1.225382932166302e-05, |
|
"loss": 1.2363, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12483574244415244, |
|
"grad_norm": 0.32685421614831767, |
|
"learning_rate": 1.2472647702407003e-05, |
|
"loss": 1.2297, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.12702584318878668, |
|
"grad_norm": 0.3406152378644622, |
|
"learning_rate": 1.2691466083150986e-05, |
|
"loss": 1.2293, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12921594393342092, |
|
"grad_norm": 0.31234607391709834, |
|
"learning_rate": 1.2910284463894968e-05, |
|
"loss": 1.2, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.1314060446780552, |
|
"grad_norm": 0.36244801139958155, |
|
"learning_rate": 1.312910284463895e-05, |
|
"loss": 1.2113, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13359614542268944, |
|
"grad_norm": 0.3158237405680623, |
|
"learning_rate": 1.3347921225382933e-05, |
|
"loss": 1.2008, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.1357862461673237, |
|
"grad_norm": 0.35444090899453623, |
|
"learning_rate": 1.3566739606126916e-05, |
|
"loss": 1.2273, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13797634691195795, |
|
"grad_norm": 0.3233212629925759, |
|
"learning_rate": 1.3785557986870899e-05, |
|
"loss": 1.191, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.1401664476565922, |
|
"grad_norm": 0.3625756011702934, |
|
"learning_rate": 1.400437636761488e-05, |
|
"loss": 1.173, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14235654840122647, |
|
"grad_norm": 0.30517921759823324, |
|
"learning_rate": 1.4223194748358864e-05, |
|
"loss": 1.2137, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1445466491458607, |
|
"grad_norm": 0.31776810415254086, |
|
"learning_rate": 1.4442013129102845e-05, |
|
"loss": 1.1977, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14673674989049496, |
|
"grad_norm": 0.3141513649782591, |
|
"learning_rate": 1.4660831509846829e-05, |
|
"loss": 1.1906, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.14892685063512923, |
|
"grad_norm": 0.326520661370947, |
|
"learning_rate": 1.487964989059081e-05, |
|
"loss": 1.1848, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15111695137976347, |
|
"grad_norm": 0.3245323516153073, |
|
"learning_rate": 1.5098468271334794e-05, |
|
"loss": 1.2023, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.1533070521243977, |
|
"grad_norm": 0.3106198673900871, |
|
"learning_rate": 1.5317286652078775e-05, |
|
"loss": 1.2039, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15549715286903198, |
|
"grad_norm": 0.31738937212374596, |
|
"learning_rate": 1.553610503282276e-05, |
|
"loss": 1.1891, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.15768725361366623, |
|
"grad_norm": 0.3185701621576024, |
|
"learning_rate": 1.575492341356674e-05, |
|
"loss": 1.1965, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15987735435830047, |
|
"grad_norm": 0.3376037242993364, |
|
"learning_rate": 1.5973741794310725e-05, |
|
"loss": 1.1895, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.16206745510293474, |
|
"grad_norm": 0.31121755906355864, |
|
"learning_rate": 1.6192560175054705e-05, |
|
"loss": 1.2, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.164257555847569, |
|
"grad_norm": 0.3217495152934365, |
|
"learning_rate": 1.641137855579869e-05, |
|
"loss": 1.1836, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.16644765659220323, |
|
"grad_norm": 0.2966935919095498, |
|
"learning_rate": 1.663019693654267e-05, |
|
"loss": 1.1887, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1686377573368375, |
|
"grad_norm": 0.31806956377186374, |
|
"learning_rate": 1.6849015317286655e-05, |
|
"loss": 1.2281, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.17082785808147175, |
|
"grad_norm": 0.3110118252043724, |
|
"learning_rate": 1.7067833698030636e-05, |
|
"loss": 1.2066, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.173017958826106, |
|
"grad_norm": 0.3125792284015961, |
|
"learning_rate": 1.728665207877462e-05, |
|
"loss": 1.2027, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.17520805957074026, |
|
"grad_norm": 0.31848490451529393, |
|
"learning_rate": 1.75054704595186e-05, |
|
"loss": 1.2059, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1773981603153745, |
|
"grad_norm": 0.29444069603586365, |
|
"learning_rate": 1.7724288840262585e-05, |
|
"loss": 1.1617, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.17958826106000875, |
|
"grad_norm": 0.2980836708326727, |
|
"learning_rate": 1.7943107221006566e-05, |
|
"loss": 1.1824, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18177836180464302, |
|
"grad_norm": 0.31147307412521985, |
|
"learning_rate": 1.816192560175055e-05, |
|
"loss": 1.1695, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.18396846254927726, |
|
"grad_norm": 0.3101768840043127, |
|
"learning_rate": 1.838074398249453e-05, |
|
"loss": 1.177, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18615856329391153, |
|
"grad_norm": 0.3203742924172763, |
|
"learning_rate": 1.8599562363238512e-05, |
|
"loss": 1.1992, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.18834866403854578, |
|
"grad_norm": 0.3109730786465228, |
|
"learning_rate": 1.8818380743982497e-05, |
|
"loss": 1.1965, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19053876478318002, |
|
"grad_norm": 0.3283883952699153, |
|
"learning_rate": 1.9037199124726478e-05, |
|
"loss": 1.191, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.1927288655278143, |
|
"grad_norm": 0.310293473406716, |
|
"learning_rate": 1.9256017505470462e-05, |
|
"loss": 1.1687, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19491896627244854, |
|
"grad_norm": 0.33957719240402645, |
|
"learning_rate": 1.9474835886214443e-05, |
|
"loss": 1.1785, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.19710906701708278, |
|
"grad_norm": 0.3097291115799612, |
|
"learning_rate": 1.9693654266958427e-05, |
|
"loss": 1.1879, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19929916776171705, |
|
"grad_norm": 0.3466959419750145, |
|
"learning_rate": 1.9912472647702408e-05, |
|
"loss": 1.1793, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.2014892685063513, |
|
"grad_norm": 0.33898635574038505, |
|
"learning_rate": 1.9999973694910354e-05, |
|
"loss": 1.1668, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.20367936925098554, |
|
"grad_norm": 0.3227952712343219, |
|
"learning_rate": 1.9999812942085888e-05, |
|
"loss": 1.1633, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.2058694699956198, |
|
"grad_norm": 0.31482981716458963, |
|
"learning_rate": 1.9999506052722038e-05, |
|
"loss": 1.166, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.20805957074025405, |
|
"grad_norm": 0.3014515240380428, |
|
"learning_rate": 1.9999053031303655e-05, |
|
"loss": 1.1484, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2102496714848883, |
|
"grad_norm": 0.2864843044702426, |
|
"learning_rate": 1.9998453884451173e-05, |
|
"loss": 1.1711, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.21243977222952257, |
|
"grad_norm": 0.31537308079673587, |
|
"learning_rate": 1.9997708620920465e-05, |
|
"loss": 1.1602, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2146298729741568, |
|
"grad_norm": 0.2910479831367777, |
|
"learning_rate": 1.9996817251602773e-05, |
|
"loss": 1.1699, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.21681997371879105, |
|
"grad_norm": 0.29723625812900056, |
|
"learning_rate": 1.9995779789524494e-05, |
|
"loss": 1.1738, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.21901007446342532, |
|
"grad_norm": 0.32928417993263026, |
|
"learning_rate": 1.9994596249847024e-05, |
|
"loss": 1.1469, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22120017520805957, |
|
"grad_norm": 0.318545598564101, |
|
"learning_rate": 1.999326664986653e-05, |
|
"loss": 1.1529, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2233902759526938, |
|
"grad_norm": 0.3053735425350909, |
|
"learning_rate": 1.9991791009013687e-05, |
|
"loss": 1.1863, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.22558037669732808, |
|
"grad_norm": 0.3294132278225112, |
|
"learning_rate": 1.99901693488534e-05, |
|
"loss": 1.168, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.22777047744196233, |
|
"grad_norm": 0.30255802988311753, |
|
"learning_rate": 1.9988401693084502e-05, |
|
"loss": 1.1641, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.22996057818659657, |
|
"grad_norm": 0.31614096881549886, |
|
"learning_rate": 1.9986488067539378e-05, |
|
"loss": 1.1695, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.23215067893123084, |
|
"grad_norm": 0.29107544403138813, |
|
"learning_rate": 1.9984428500183616e-05, |
|
"loss": 1.1879, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.23434077967586509, |
|
"grad_norm": 0.3064129449057793, |
|
"learning_rate": 1.998222302111558e-05, |
|
"loss": 1.1453, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.23653088042049936, |
|
"grad_norm": 0.298260787884317, |
|
"learning_rate": 1.9979871662565982e-05, |
|
"loss": 1.15, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2387209811651336, |
|
"grad_norm": 0.29812547479564855, |
|
"learning_rate": 1.9977374458897408e-05, |
|
"loss": 1.1465, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.24091108190976784, |
|
"grad_norm": 0.3198201131884772, |
|
"learning_rate": 1.9974731446603805e-05, |
|
"loss": 1.1531, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24310118265440211, |
|
"grad_norm": 0.3026280375781704, |
|
"learning_rate": 1.997194266430997e-05, |
|
"loss": 1.1732, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.24529128339903636, |
|
"grad_norm": 0.3030023979215963, |
|
"learning_rate": 1.996900815277096e-05, |
|
"loss": 1.1793, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2474813841436706, |
|
"grad_norm": 0.3121701164570895, |
|
"learning_rate": 1.9965927954871516e-05, |
|
"loss": 1.157, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.24967148488830487, |
|
"grad_norm": 0.3037543042825315, |
|
"learning_rate": 1.996270211562542e-05, |
|
"loss": 1.1588, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2518615856329391, |
|
"grad_norm": 0.31986392002534736, |
|
"learning_rate": 1.9959330682174863e-05, |
|
"loss": 1.1359, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.25405168637757336, |
|
"grad_norm": 0.30039161810050113, |
|
"learning_rate": 1.9955813703789717e-05, |
|
"loss": 1.1492, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.25624178712220763, |
|
"grad_norm": 0.30051328246651055, |
|
"learning_rate": 1.9952151231866858e-05, |
|
"loss": 1.1404, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.25843188786684185, |
|
"grad_norm": 0.2993163815080819, |
|
"learning_rate": 1.9948343319929377e-05, |
|
"loss": 1.1797, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2606219886114761, |
|
"grad_norm": 0.2925299399789088, |
|
"learning_rate": 1.9944390023625827e-05, |
|
"loss": 1.1727, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.2628120893561104, |
|
"grad_norm": 0.28733870474954115, |
|
"learning_rate": 1.9940291400729385e-05, |
|
"loss": 1.1449, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.26500219010074466, |
|
"grad_norm": 0.3166965904102066, |
|
"learning_rate": 1.993604751113704e-05, |
|
"loss": 1.1453, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.2671922908453789, |
|
"grad_norm": 0.2938712170029366, |
|
"learning_rate": 1.9931658416868677e-05, |
|
"loss": 1.1637, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.26938239159001315, |
|
"grad_norm": 0.2919926264580296, |
|
"learning_rate": 1.9927124182066205e-05, |
|
"loss": 1.1449, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.2715724923346474, |
|
"grad_norm": 0.2911062410962241, |
|
"learning_rate": 1.9922444872992604e-05, |
|
"loss": 1.1533, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.27376259307928164, |
|
"grad_norm": 0.285422153507294, |
|
"learning_rate": 1.991762055803095e-05, |
|
"loss": 1.1707, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2759526938239159, |
|
"grad_norm": 0.2994545512735193, |
|
"learning_rate": 1.9912651307683433e-05, |
|
"loss": 1.1574, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2781427945685502, |
|
"grad_norm": 0.2832612329809898, |
|
"learning_rate": 1.9907537194570315e-05, |
|
"loss": 1.1527, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.2803328953131844, |
|
"grad_norm": 0.283860545913851, |
|
"learning_rate": 1.9902278293428883e-05, |
|
"loss": 1.1613, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.28252299605781866, |
|
"grad_norm": 0.2857818677163762, |
|
"learning_rate": 1.9896874681112323e-05, |
|
"loss": 1.159, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.28471309680245294, |
|
"grad_norm": 0.28550178083083544, |
|
"learning_rate": 1.989132643658864e-05, |
|
"loss": 1.1516, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.28690319754708715, |
|
"grad_norm": 0.301608563919381, |
|
"learning_rate": 1.9885633640939475e-05, |
|
"loss": 1.1594, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.2890932982917214, |
|
"grad_norm": 0.302035660129087, |
|
"learning_rate": 1.987979637735893e-05, |
|
"loss": 1.165, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2912833990363557, |
|
"grad_norm": 0.2867968194946794, |
|
"learning_rate": 1.9873814731152346e-05, |
|
"loss": 1.1492, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.2934734997809899, |
|
"grad_norm": 0.2866293446184061, |
|
"learning_rate": 1.9867688789735075e-05, |
|
"loss": 1.1445, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2956636005256242, |
|
"grad_norm": 0.3014820377030796, |
|
"learning_rate": 1.9861418642631173e-05, |
|
"loss": 1.1387, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.29785370127025845, |
|
"grad_norm": 0.27958880178191026, |
|
"learning_rate": 1.9855004381472113e-05, |
|
"loss": 1.1537, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.30004380201489267, |
|
"grad_norm": 0.3044820311596824, |
|
"learning_rate": 1.984844609999544e-05, |
|
"loss": 1.1414, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.30223390275952694, |
|
"grad_norm": 0.28640742624947424, |
|
"learning_rate": 1.9841743894043412e-05, |
|
"loss": 1.125, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3044240035041612, |
|
"grad_norm": 0.27345860578003156, |
|
"learning_rate": 1.9834897861561572e-05, |
|
"loss": 1.1301, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.3066141042487954, |
|
"grad_norm": 0.2789976740559993, |
|
"learning_rate": 1.9827908102597342e-05, |
|
"loss": 1.1637, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3088042049934297, |
|
"grad_norm": 0.29865777361212825, |
|
"learning_rate": 1.9820774719298553e-05, |
|
"loss": 1.1555, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.31099430573806397, |
|
"grad_norm": 0.29781806230277835, |
|
"learning_rate": 1.981349781591195e-05, |
|
"loss": 1.1422, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3131844064826982, |
|
"grad_norm": 0.28285008218859387, |
|
"learning_rate": 1.9806077498781667e-05, |
|
"loss": 1.1375, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.31537450722733246, |
|
"grad_norm": 0.299690829321009, |
|
"learning_rate": 1.9798513876347686e-05, |
|
"loss": 1.1324, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.31756460797196673, |
|
"grad_norm": 0.29123536907457154, |
|
"learning_rate": 1.9790807059144224e-05, |
|
"loss": 1.1406, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.31975470871660094, |
|
"grad_norm": 0.28343901662871235, |
|
"learning_rate": 1.978295715979816e-05, |
|
"loss": 1.159, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3219448094612352, |
|
"grad_norm": 0.2983120464171406, |
|
"learning_rate": 1.9774964293027343e-05, |
|
"loss": 1.1172, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.3241349102058695, |
|
"grad_norm": 0.30619911851808346, |
|
"learning_rate": 1.976682857563895e-05, |
|
"loss": 1.1316, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3263250109505037, |
|
"grad_norm": 0.29955540950645765, |
|
"learning_rate": 1.9758550126527763e-05, |
|
"loss": 1.1332, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.328515111695138, |
|
"grad_norm": 0.3225950257313704, |
|
"learning_rate": 1.975012906667444e-05, |
|
"loss": 1.1285, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.33070521243977224, |
|
"grad_norm": 0.30076771908251354, |
|
"learning_rate": 1.974156551914373e-05, |
|
"loss": 1.1582, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.33289531318440646, |
|
"grad_norm": 0.26730681052415284, |
|
"learning_rate": 1.9732859609082703e-05, |
|
"loss": 1.1363, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.33508541392904073, |
|
"grad_norm": 0.296320423317251, |
|
"learning_rate": 1.9724011463718886e-05, |
|
"loss": 1.1475, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.337275514673675, |
|
"grad_norm": 0.2831250199600264, |
|
"learning_rate": 1.971502121235844e-05, |
|
"loss": 1.1504, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3394656154183092, |
|
"grad_norm": 0.30579339517624693, |
|
"learning_rate": 1.9705888986384237e-05, |
|
"loss": 1.149, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.3416557161629435, |
|
"grad_norm": 0.279787903018849, |
|
"learning_rate": 1.969661491925397e-05, |
|
"loss": 1.1285, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.34384581690757776, |
|
"grad_norm": 0.2984680376613341, |
|
"learning_rate": 1.9687199146498184e-05, |
|
"loss": 1.1326, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.346035917652212, |
|
"grad_norm": 0.303788532910488, |
|
"learning_rate": 1.9677641805718287e-05, |
|
"loss": 1.1332, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.34822601839684625, |
|
"grad_norm": 0.28544564287464336, |
|
"learning_rate": 1.9667943036584572e-05, |
|
"loss": 1.1332, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.3504161191414805, |
|
"grad_norm": 0.302702688121673, |
|
"learning_rate": 1.9658102980834147e-05, |
|
"loss": 1.1271, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.35260621988611474, |
|
"grad_norm": 0.2861342406686224, |
|
"learning_rate": 1.9648121782268862e-05, |
|
"loss": 1.127, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.354796320630749, |
|
"grad_norm": 0.285579818423958, |
|
"learning_rate": 1.9637999586753236e-05, |
|
"loss": 1.1344, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3569864213753833, |
|
"grad_norm": 0.2747392171436459, |
|
"learning_rate": 1.9627736542212292e-05, |
|
"loss": 1.1566, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.3591765221200175, |
|
"grad_norm": 0.2836525027135021, |
|
"learning_rate": 1.961733279862942e-05, |
|
"loss": 1.1314, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.36136662286465177, |
|
"grad_norm": 0.2814212118759052, |
|
"learning_rate": 1.9606788508044176e-05, |
|
"loss": 1.1271, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.36355672360928604, |
|
"grad_norm": 0.2751373320963047, |
|
"learning_rate": 1.959610382455005e-05, |
|
"loss": 1.1324, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3657468243539203, |
|
"grad_norm": 0.29671327096904027, |
|
"learning_rate": 1.9585278904292232e-05, |
|
"loss": 1.1238, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.3679369250985545, |
|
"grad_norm": 0.2807910552149839, |
|
"learning_rate": 1.9574313905465317e-05, |
|
"loss": 1.1473, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.3701270258431888, |
|
"grad_norm": 0.2750707155460053, |
|
"learning_rate": 1.956320898831101e-05, |
|
"loss": 1.0916, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.37231712658782307, |
|
"grad_norm": 0.28600560772614464, |
|
"learning_rate": 1.9551964315115755e-05, |
|
"loss": 1.1438, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3745072273324573, |
|
"grad_norm": 0.2945488706090824, |
|
"learning_rate": 1.954058005020839e-05, |
|
"loss": 1.1252, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.37669732807709155, |
|
"grad_norm": 0.2821523000109209, |
|
"learning_rate": 1.952905635995773e-05, |
|
"loss": 1.1215, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3788874288217258, |
|
"grad_norm": 0.2809319217223354, |
|
"learning_rate": 1.9517393412770154e-05, |
|
"loss": 1.1438, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.38107752956636004, |
|
"grad_norm": 0.29258642255243755, |
|
"learning_rate": 1.9505591379087126e-05, |
|
"loss": 1.1406, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3832676303109943, |
|
"grad_norm": 0.2782515672234601, |
|
"learning_rate": 1.9493650431382702e-05, |
|
"loss": 1.127, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3854577310556286, |
|
"grad_norm": 0.2651516436947355, |
|
"learning_rate": 1.9481570744161024e-05, |
|
"loss": 1.1293, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3876478318002628, |
|
"grad_norm": 0.2778922300040792, |
|
"learning_rate": 1.9469352493953767e-05, |
|
"loss": 1.1621, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.38983793254489707, |
|
"grad_norm": 0.277626897098439, |
|
"learning_rate": 1.945699585931755e-05, |
|
"loss": 1.1348, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.39202803328953134, |
|
"grad_norm": 0.28606581063244546, |
|
"learning_rate": 1.944450102083133e-05, |
|
"loss": 1.1, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.39421813403416556, |
|
"grad_norm": 0.2766274824029753, |
|
"learning_rate": 1.9431868161093773e-05, |
|
"loss": 1.1344, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.39640823477879983, |
|
"grad_norm": 0.28411728929167873, |
|
"learning_rate": 1.941909746472057e-05, |
|
"loss": 1.1352, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.3985983355234341, |
|
"grad_norm": 0.28207030065486227, |
|
"learning_rate": 1.9406189118341752e-05, |
|
"loss": 1.1338, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4007884362680683, |
|
"grad_norm": 0.2889032305193362, |
|
"learning_rate": 1.939314331059895e-05, |
|
"loss": 1.1449, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.4029785370127026, |
|
"grad_norm": 0.2607847605615642, |
|
"learning_rate": 1.9379960232142655e-05, |
|
"loss": 1.1332, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.40516863775733686, |
|
"grad_norm": 0.27283416214879747, |
|
"learning_rate": 1.936664007562941e-05, |
|
"loss": 1.1187, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4073587385019711, |
|
"grad_norm": 0.27360396328426034, |
|
"learning_rate": 1.9353183035719027e-05, |
|
"loss": 1.1111, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.40954883924660535, |
|
"grad_norm": 0.27641067744827497, |
|
"learning_rate": 1.9339589309071694e-05, |
|
"loss": 1.1146, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.4117389399912396, |
|
"grad_norm": 0.28012688463597135, |
|
"learning_rate": 1.932585909434515e-05, |
|
"loss": 1.1273, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.41392904073587383, |
|
"grad_norm": 0.2986899749731716, |
|
"learning_rate": 1.931199259219176e-05, |
|
"loss": 1.1516, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.4161191414805081, |
|
"grad_norm": 0.2746086383673798, |
|
"learning_rate": 1.929799000525557e-05, |
|
"loss": 1.1383, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4183092422251424, |
|
"grad_norm": 0.2705485338360411, |
|
"learning_rate": 1.9283851538169376e-05, |
|
"loss": 1.1281, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.4204993429697766, |
|
"grad_norm": 0.2693102198514296, |
|
"learning_rate": 1.9269577397551698e-05, |
|
"loss": 1.1297, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.42268944371441086, |
|
"grad_norm": 0.2812249914983643, |
|
"learning_rate": 1.9255167792003803e-05, |
|
"loss": 1.1488, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.42487954445904513, |
|
"grad_norm": 0.2780616972935151, |
|
"learning_rate": 1.9240622932106606e-05, |
|
"loss": 1.1148, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.42706964520367935, |
|
"grad_norm": 0.2811437839638983, |
|
"learning_rate": 1.922594303041764e-05, |
|
"loss": 1.1242, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4292597459483136, |
|
"grad_norm": 0.268208376235582, |
|
"learning_rate": 1.9211128301467913e-05, |
|
"loss": 1.1516, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4314498466929479, |
|
"grad_norm": 0.2777452313433181, |
|
"learning_rate": 1.919617896175881e-05, |
|
"loss": 1.1273, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.4336399474375821, |
|
"grad_norm": 0.28289566775790254, |
|
"learning_rate": 1.918109522975888e-05, |
|
"loss": 1.1387, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4358300481822164, |
|
"grad_norm": 0.28637508184497684, |
|
"learning_rate": 1.9165877325900696e-05, |
|
"loss": 1.1275, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.43802014892685065, |
|
"grad_norm": 0.2798034291448599, |
|
"learning_rate": 1.91505254725776e-05, |
|
"loss": 1.1135, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.44021024967148487, |
|
"grad_norm": 0.27447002943494403, |
|
"learning_rate": 1.9135039894140446e-05, |
|
"loss": 1.1383, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.44240035041611914, |
|
"grad_norm": 0.2645078006001802, |
|
"learning_rate": 1.911942081689437e-05, |
|
"loss": 1.134, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4445904511607534, |
|
"grad_norm": 0.27970364517188895, |
|
"learning_rate": 1.910366846909542e-05, |
|
"loss": 1.1195, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.4467805519053876, |
|
"grad_norm": 0.27465951372753394, |
|
"learning_rate": 1.9087783080947263e-05, |
|
"loss": 1.1332, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4489706526500219, |
|
"grad_norm": 0.26634601919762435, |
|
"learning_rate": 1.9071764884597812e-05, |
|
"loss": 1.1236, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.45116075339465617, |
|
"grad_norm": 0.2794646407733667, |
|
"learning_rate": 1.905561411413582e-05, |
|
"loss": 1.1582, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4533508541392904, |
|
"grad_norm": 0.28173898357125154, |
|
"learning_rate": 1.903933100558747e-05, |
|
"loss": 1.1332, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.45554095488392465, |
|
"grad_norm": 0.2736898887292246, |
|
"learning_rate": 1.902291579691293e-05, |
|
"loss": 1.1434, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4577310556285589, |
|
"grad_norm": 0.27191810888639717, |
|
"learning_rate": 1.9006368728002864e-05, |
|
"loss": 1.1275, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.45992115637319314, |
|
"grad_norm": 0.2828367794791594, |
|
"learning_rate": 1.8989690040674937e-05, |
|
"loss": 1.1555, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4621112571178274, |
|
"grad_norm": 0.27719265415892497, |
|
"learning_rate": 1.897287997867027e-05, |
|
"loss": 1.1092, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.4643013578624617, |
|
"grad_norm": 0.2872145245485876, |
|
"learning_rate": 1.8955938787649896e-05, |
|
"loss": 1.1672, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.4664914586070959, |
|
"grad_norm": 0.28308043695745805, |
|
"learning_rate": 1.8938866715191137e-05, |
|
"loss": 1.1285, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.46868155935173017, |
|
"grad_norm": 0.28356329044611883, |
|
"learning_rate": 1.892166401078402e-05, |
|
"loss": 1.1186, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.47087166009636444, |
|
"grad_norm": 0.2870427948470036, |
|
"learning_rate": 1.8904330925827628e-05, |
|
"loss": 1.1344, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4730617608409987, |
|
"grad_norm": 0.2711551541427653, |
|
"learning_rate": 1.8886867713626393e-05, |
|
"loss": 1.1174, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.47525186158563293, |
|
"grad_norm": 0.29791754553385197, |
|
"learning_rate": 1.8869274629386433e-05, |
|
"loss": 1.1207, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.4774419623302672, |
|
"grad_norm": 0.2843662761979316, |
|
"learning_rate": 1.8851551930211803e-05, |
|
"loss": 1.1449, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.47963206307490147, |
|
"grad_norm": 0.27143984381476377, |
|
"learning_rate": 1.8833699875100735e-05, |
|
"loss": 1.1035, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.4818221638195357, |
|
"grad_norm": 0.2757462217218276, |
|
"learning_rate": 1.881571872494187e-05, |
|
"loss": 1.1105, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.48401226456416996, |
|
"grad_norm": 0.2765481966040783, |
|
"learning_rate": 1.879760874251043e-05, |
|
"loss": 1.1156, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.48620236530880423, |
|
"grad_norm": 0.282978908681222, |
|
"learning_rate": 1.8779370192464378e-05, |
|
"loss": 1.1201, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.48839246605343845, |
|
"grad_norm": 0.27284223511573796, |
|
"learning_rate": 1.876100334134056e-05, |
|
"loss": 1.1266, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.4905825667980727, |
|
"grad_norm": 0.280578986759108, |
|
"learning_rate": 1.8742508457550804e-05, |
|
"loss": 1.1117, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.492772667542707, |
|
"grad_norm": 0.27600914760958584, |
|
"learning_rate": 1.8723885811377998e-05, |
|
"loss": 1.125, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.4949627682873412, |
|
"grad_norm": 0.2792414213857675, |
|
"learning_rate": 1.8705135674972133e-05, |
|
"loss": 1.1043, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.4971528690319755, |
|
"grad_norm": 0.29355325689986594, |
|
"learning_rate": 1.868625832234635e-05, |
|
"loss": 1.1283, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.49934296977660975, |
|
"grad_norm": 0.2959742597776297, |
|
"learning_rate": 1.8667254029372898e-05, |
|
"loss": 1.1379, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.501533070521244, |
|
"grad_norm": 0.2803561452383429, |
|
"learning_rate": 1.8648123073779136e-05, |
|
"loss": 1.1008, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.5037231712658782, |
|
"grad_norm": 0.28730614143136535, |
|
"learning_rate": 1.8628865735143464e-05, |
|
"loss": 1.0941, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5059132720105125, |
|
"grad_norm": 0.2730078243750208, |
|
"learning_rate": 1.860948229489122e-05, |
|
"loss": 1.1369, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.5081033727551467, |
|
"grad_norm": 0.28244458331532185, |
|
"learning_rate": 1.8589973036290597e-05, |
|
"loss": 1.124, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5102934734997809, |
|
"grad_norm": 0.27033517409253727, |
|
"learning_rate": 1.857033824444848e-05, |
|
"loss": 1.1002, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.5124835742444153, |
|
"grad_norm": 0.27997442688862706, |
|
"learning_rate": 1.855057820630629e-05, |
|
"loss": 1.127, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5146736749890495, |
|
"grad_norm": 0.2924889952428353, |
|
"learning_rate": 1.8530693210635785e-05, |
|
"loss": 1.1121, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5168637757336837, |
|
"grad_norm": 0.27255392506521753, |
|
"learning_rate": 1.8510683548034853e-05, |
|
"loss": 1.1289, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.519053876478318, |
|
"grad_norm": 0.27789463711596296, |
|
"learning_rate": 1.8490549510923243e-05, |
|
"loss": 1.1313, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.5212439772229522, |
|
"grad_norm": 0.270522153270926, |
|
"learning_rate": 1.8470291393538308e-05, |
|
"loss": 1.148, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5234340779675866, |
|
"grad_norm": 0.2903581607172235, |
|
"learning_rate": 1.8449909491930707e-05, |
|
"loss": 1.1336, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.5256241787122208, |
|
"grad_norm": 0.28130785295367783, |
|
"learning_rate": 1.8429404103960068e-05, |
|
"loss": 1.0943, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.527814279456855, |
|
"grad_norm": 0.2847352994330959, |
|
"learning_rate": 1.840877552929064e-05, |
|
"loss": 1.1072, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.5300043802014893, |
|
"grad_norm": 0.27594764699978475, |
|
"learning_rate": 1.8388024069386913e-05, |
|
"loss": 1.1031, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5321944809461235, |
|
"grad_norm": 0.27259818678775977, |
|
"learning_rate": 1.836715002750921e-05, |
|
"loss": 1.1352, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.5343845816907578, |
|
"grad_norm": 0.28100802685276305, |
|
"learning_rate": 1.8346153708709267e-05, |
|
"loss": 1.1191, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5365746824353921, |
|
"grad_norm": 0.2691241413663141, |
|
"learning_rate": 1.832503541982576e-05, |
|
"loss": 1.0869, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5387647831800263, |
|
"grad_norm": 0.2709716330941766, |
|
"learning_rate": 1.8303795469479824e-05, |
|
"loss": 1.1207, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5409548839246605, |
|
"grad_norm": 0.2941920285023787, |
|
"learning_rate": 1.8282434168070554e-05, |
|
"loss": 1.1223, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.5431449846692948, |
|
"grad_norm": 0.290222410153825, |
|
"learning_rate": 1.826095182777045e-05, |
|
"loss": 1.1059, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.545335085413929, |
|
"grad_norm": 0.2806597130704421, |
|
"learning_rate": 1.8239348762520877e-05, |
|
"loss": 1.1268, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.5475251861585633, |
|
"grad_norm": 0.26150041135869034, |
|
"learning_rate": 1.8217625288027453e-05, |
|
"loss": 1.1258, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5497152869031976, |
|
"grad_norm": 0.2685954735341038, |
|
"learning_rate": 1.8195781721755464e-05, |
|
"loss": 1.1062, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.5519053876478318, |
|
"grad_norm": 0.281704971443474, |
|
"learning_rate": 1.8173818382925196e-05, |
|
"loss": 1.1219, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.554095488392466, |
|
"grad_norm": 0.28159951912225467, |
|
"learning_rate": 1.8151735592507285e-05, |
|
"loss": 1.0965, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.5562855891371004, |
|
"grad_norm": 0.2706729959241479, |
|
"learning_rate": 1.8129533673218026e-05, |
|
"loss": 1.109, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5584756898817346, |
|
"grad_norm": 0.2885774782422386, |
|
"learning_rate": 1.8107212949514648e-05, |
|
"loss": 1.1039, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5606657906263688, |
|
"grad_norm": 0.27545326234938333, |
|
"learning_rate": 1.8084773747590594e-05, |
|
"loss": 1.1096, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5628558913710031, |
|
"grad_norm": 0.2689447608381848, |
|
"learning_rate": 1.8062216395370723e-05, |
|
"loss": 1.1244, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.5650459921156373, |
|
"grad_norm": 0.2697532238591275, |
|
"learning_rate": 1.8039541222506544e-05, |
|
"loss": 1.1002, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5672360928602715, |
|
"grad_norm": 0.2671203153481593, |
|
"learning_rate": 1.801674856037138e-05, |
|
"loss": 1.1066, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.5694261936049059, |
|
"grad_norm": 0.2761723227472548, |
|
"learning_rate": 1.7993838742055544e-05, |
|
"loss": 1.11, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5716162943495401, |
|
"grad_norm": 0.28615486924844014, |
|
"learning_rate": 1.7970812102361455e-05, |
|
"loss": 1.1371, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.5738063950941743, |
|
"grad_norm": 0.2747844784537908, |
|
"learning_rate": 1.7947668977798748e-05, |
|
"loss": 1.1049, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.5759964958388086, |
|
"grad_norm": 0.28073244882701426, |
|
"learning_rate": 1.7924409706579366e-05, |
|
"loss": 1.1221, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.5781865965834428, |
|
"grad_norm": 0.2943655545530222, |
|
"learning_rate": 1.7901034628612603e-05, |
|
"loss": 1.1371, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5803766973280771, |
|
"grad_norm": 0.2824289348253016, |
|
"learning_rate": 1.7877544085500156e-05, |
|
"loss": 1.107, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.5825667980727114, |
|
"grad_norm": 0.26639957586943036, |
|
"learning_rate": 1.785393842053111e-05, |
|
"loss": 1.1281, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5847568988173456, |
|
"grad_norm": 0.2846585279548851, |
|
"learning_rate": 1.7830217978676935e-05, |
|
"loss": 1.0809, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.5869469995619798, |
|
"grad_norm": 0.27391331375101896, |
|
"learning_rate": 1.780638310658645e-05, |
|
"loss": 1.1395, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.5891371003066141, |
|
"grad_norm": 0.28513106577718356, |
|
"learning_rate": 1.778243415258074e-05, |
|
"loss": 1.118, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.5913272010512484, |
|
"grad_norm": 0.26859438508124023, |
|
"learning_rate": 1.7758371466648076e-05, |
|
"loss": 1.1133, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5935173017958826, |
|
"grad_norm": 0.27262765520782745, |
|
"learning_rate": 1.7734195400438804e-05, |
|
"loss": 1.1105, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.5957074025405169, |
|
"grad_norm": 0.26578867382487387, |
|
"learning_rate": 1.7709906307260193e-05, |
|
"loss": 1.109, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.5978975032851511, |
|
"grad_norm": 0.2739484520721144, |
|
"learning_rate": 1.7685504542071294e-05, |
|
"loss": 1.1168, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.6000876040297853, |
|
"grad_norm": 0.2613962343706309, |
|
"learning_rate": 1.7660990461477717e-05, |
|
"loss": 1.1187, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6022777047744197, |
|
"grad_norm": 0.27907516560188245, |
|
"learning_rate": 1.7636364423726468e-05, |
|
"loss": 1.1059, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6044678055190539, |
|
"grad_norm": 0.2692785993705207, |
|
"learning_rate": 1.7611626788700658e-05, |
|
"loss": 1.1176, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6066579062636881, |
|
"grad_norm": 0.2826591838293577, |
|
"learning_rate": 1.75867779179143e-05, |
|
"loss": 1.1215, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.6088480070083224, |
|
"grad_norm": 0.2585536815951757, |
|
"learning_rate": 1.7561818174506976e-05, |
|
"loss": 1.1041, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6110381077529566, |
|
"grad_norm": 0.2690038568592748, |
|
"learning_rate": 1.7536747923238566e-05, |
|
"loss": 1.0959, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.6132282084975909, |
|
"grad_norm": 0.2794428452568368, |
|
"learning_rate": 1.75115675304839e-05, |
|
"loss": 1.1203, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6154183092422252, |
|
"grad_norm": 0.2712753678084759, |
|
"learning_rate": 1.7486277364227406e-05, |
|
"loss": 1.1428, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.6176084099868594, |
|
"grad_norm": 0.30076453779446854, |
|
"learning_rate": 1.7460877794057736e-05, |
|
"loss": 1.1297, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6197985107314936, |
|
"grad_norm": 0.27142722196615265, |
|
"learning_rate": 1.7435369191162357e-05, |
|
"loss": 1.1109, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.6219886114761279, |
|
"grad_norm": 0.27765521985915914, |
|
"learning_rate": 1.7409751928322143e-05, |
|
"loss": 1.1043, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6241787122207622, |
|
"grad_norm": 0.2736713650444242, |
|
"learning_rate": 1.738402637990591e-05, |
|
"loss": 1.1289, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6263688129653964, |
|
"grad_norm": 0.27068657267510915, |
|
"learning_rate": 1.7358192921864955e-05, |
|
"loss": 1.0949, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6285589137100307, |
|
"grad_norm": 0.2731706398238531, |
|
"learning_rate": 1.7332251931727547e-05, |
|
"loss": 1.1039, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.6307490144546649, |
|
"grad_norm": 0.27367550283243264, |
|
"learning_rate": 1.7306203788593436e-05, |
|
"loss": 1.0953, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6329391151992991, |
|
"grad_norm": 0.2738816381357539, |
|
"learning_rate": 1.7280048873128296e-05, |
|
"loss": 1.1121, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.6351292159439335, |
|
"grad_norm": 0.2698317289481176, |
|
"learning_rate": 1.7253787567558152e-05, |
|
"loss": 1.0783, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6373193166885677, |
|
"grad_norm": 0.2769632550292509, |
|
"learning_rate": 1.722742025566382e-05, |
|
"loss": 1.1086, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.6395094174332019, |
|
"grad_norm": 0.2897254427604234, |
|
"learning_rate": 1.7200947322775276e-05, |
|
"loss": 1.1316, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6416995181778362, |
|
"grad_norm": 0.2640477629321144, |
|
"learning_rate": 1.7174369155766037e-05, |
|
"loss": 1.1121, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.6438896189224704, |
|
"grad_norm": 0.2773590006369379, |
|
"learning_rate": 1.7147686143047507e-05, |
|
"loss": 1.1031, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6460797196671046, |
|
"grad_norm": 0.2837149025424185, |
|
"learning_rate": 1.7120898674563292e-05, |
|
"loss": 1.109, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.648269820411739, |
|
"grad_norm": 0.28232081520676006, |
|
"learning_rate": 1.7094007141783512e-05, |
|
"loss": 1.0969, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6504599211563732, |
|
"grad_norm": 0.27647777357776215, |
|
"learning_rate": 1.7067011937699067e-05, |
|
"loss": 1.0973, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.6526500219010074, |
|
"grad_norm": 0.272043266444083, |
|
"learning_rate": 1.703991345681591e-05, |
|
"loss": 1.1156, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6548401226456417, |
|
"grad_norm": 0.27378936375046253, |
|
"learning_rate": 1.7012712095149268e-05, |
|
"loss": 1.1102, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.657030223390276, |
|
"grad_norm": 0.275330797988678, |
|
"learning_rate": 1.6985408250217866e-05, |
|
"loss": 1.1324, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6592203241349102, |
|
"grad_norm": 0.2713748379437795, |
|
"learning_rate": 1.6958002321038106e-05, |
|
"loss": 1.1156, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.6614104248795445, |
|
"grad_norm": 0.28092417423165894, |
|
"learning_rate": 1.693049470811825e-05, |
|
"loss": 1.1201, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6636005256241787, |
|
"grad_norm": 0.27389632704021605, |
|
"learning_rate": 1.690288581345255e-05, |
|
"loss": 1.1156, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.6657906263688129, |
|
"grad_norm": 0.27561700781744836, |
|
"learning_rate": 1.6875176040515383e-05, |
|
"loss": 1.0928, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6679807271134472, |
|
"grad_norm": 0.262557233485991, |
|
"learning_rate": 1.6847365794255363e-05, |
|
"loss": 1.1049, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.6701708278580815, |
|
"grad_norm": 0.3001616781731598, |
|
"learning_rate": 1.68194554810894e-05, |
|
"loss": 1.1039, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6723609286027157, |
|
"grad_norm": 0.27415291968349004, |
|
"learning_rate": 1.6791445508896784e-05, |
|
"loss": 1.1135, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.67455102934735, |
|
"grad_norm": 0.2732543195254785, |
|
"learning_rate": 1.6763336287013216e-05, |
|
"loss": 1.1082, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.6767411300919842, |
|
"grad_norm": 0.26943767063054014, |
|
"learning_rate": 1.6735128226224816e-05, |
|
"loss": 1.0922, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.6789312308366184, |
|
"grad_norm": 0.277744541655017, |
|
"learning_rate": 1.6706821738762138e-05, |
|
"loss": 1.0938, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6811213315812528, |
|
"grad_norm": 0.2719454837646402, |
|
"learning_rate": 1.6678417238294128e-05, |
|
"loss": 1.0947, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.683311432325887, |
|
"grad_norm": 0.2630586272470944, |
|
"learning_rate": 1.6649915139922093e-05, |
|
"loss": 1.1033, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.6855015330705212, |
|
"grad_norm": 0.2724307550324142, |
|
"learning_rate": 1.6621315860173627e-05, |
|
"loss": 1.1441, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.6876916338151555, |
|
"grad_norm": 0.2708643111258457, |
|
"learning_rate": 1.659261981699653e-05, |
|
"loss": 1.0734, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.6898817345597897, |
|
"grad_norm": 0.2769771935758939, |
|
"learning_rate": 1.656382742975268e-05, |
|
"loss": 1.1176, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.692071835304424, |
|
"grad_norm": 0.265923617478963, |
|
"learning_rate": 1.6534939119211935e-05, |
|
"loss": 1.1164, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.6942619360490583, |
|
"grad_norm": 0.26879461120855397, |
|
"learning_rate": 1.6505955307545972e-05, |
|
"loss": 1.0928, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.6964520367936925, |
|
"grad_norm": 0.275380991512307, |
|
"learning_rate": 1.64768764183221e-05, |
|
"loss": 1.1559, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.6986421375383267, |
|
"grad_norm": 0.27286451730113803, |
|
"learning_rate": 1.6447702876497097e-05, |
|
"loss": 1.0912, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.700832238282961, |
|
"grad_norm": 0.2599528062396287, |
|
"learning_rate": 1.641843510841098e-05, |
|
"loss": 1.1156, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7030223390275953, |
|
"grad_norm": 0.2825057968855528, |
|
"learning_rate": 1.6389073541780784e-05, |
|
"loss": 1.1156, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.7052124397722295, |
|
"grad_norm": 0.26388995603859694, |
|
"learning_rate": 1.635961860569431e-05, |
|
"loss": 1.1191, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7074025405168638, |
|
"grad_norm": 0.2736402329338208, |
|
"learning_rate": 1.633007073060385e-05, |
|
"loss": 1.0949, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.709592641261498, |
|
"grad_norm": 0.2738242842182442, |
|
"learning_rate": 1.6300430348319903e-05, |
|
"loss": 1.115, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7117827420061322, |
|
"grad_norm": 0.27283103691493066, |
|
"learning_rate": 1.627069789200487e-05, |
|
"loss": 1.1236, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7139728427507666, |
|
"grad_norm": 0.27042925057968625, |
|
"learning_rate": 1.6240873796166696e-05, |
|
"loss": 1.1115, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7161629434954008, |
|
"grad_norm": 0.2648740003276731, |
|
"learning_rate": 1.621095849665255e-05, |
|
"loss": 1.1135, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.718353044240035, |
|
"grad_norm": 0.2699312698156825, |
|
"learning_rate": 1.6180952430642452e-05, |
|
"loss": 1.0814, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7205431449846693, |
|
"grad_norm": 0.2727113593417385, |
|
"learning_rate": 1.615085603664286e-05, |
|
"loss": 1.1227, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.7227332457293035, |
|
"grad_norm": 0.261310750518031, |
|
"learning_rate": 1.6120669754480295e-05, |
|
"loss": 1.1074, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7249233464739377, |
|
"grad_norm": 0.2572311428806488, |
|
"learning_rate": 1.6090394025294885e-05, |
|
"loss": 1.1078, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.7271134472185721, |
|
"grad_norm": 0.2626062536380091, |
|
"learning_rate": 1.606002929153394e-05, |
|
"loss": 1.0889, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7293035479632063, |
|
"grad_norm": 0.27208847516673046, |
|
"learning_rate": 1.602957599694547e-05, |
|
"loss": 1.1285, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.7314936487078406, |
|
"grad_norm": 0.25895874544574315, |
|
"learning_rate": 1.5999034586571705e-05, |
|
"loss": 1.0859, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7336837494524748, |
|
"grad_norm": 0.26323986195539395, |
|
"learning_rate": 1.59684055067426e-05, |
|
"loss": 1.0869, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.735873850197109, |
|
"grad_norm": 0.26562354048699777, |
|
"learning_rate": 1.5937689205069304e-05, |
|
"loss": 1.1176, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7380639509417434, |
|
"grad_norm": 0.2685404295636409, |
|
"learning_rate": 1.5906886130437606e-05, |
|
"loss": 1.1092, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.7402540516863776, |
|
"grad_norm": 0.26206885479027087, |
|
"learning_rate": 1.5875996733001405e-05, |
|
"loss": 1.1057, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7424441524310118, |
|
"grad_norm": 0.2684814010119893, |
|
"learning_rate": 1.5845021464176114e-05, |
|
"loss": 1.1266, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.7446342531756461, |
|
"grad_norm": 0.270738097510599, |
|
"learning_rate": 1.581396077663206e-05, |
|
"loss": 1.1207, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7468243539202803, |
|
"grad_norm": 0.26716036294720524, |
|
"learning_rate": 1.5782815124287867e-05, |
|
"loss": 1.1014, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.7490144546649146, |
|
"grad_norm": 0.26694423569483944, |
|
"learning_rate": 1.575158496230383e-05, |
|
"loss": 1.1039, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.7512045554095489, |
|
"grad_norm": 0.2815172017782616, |
|
"learning_rate": 1.5720270747075277e-05, |
|
"loss": 1.0941, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.7533946561541831, |
|
"grad_norm": 0.2624397530715869, |
|
"learning_rate": 1.568887293622587e-05, |
|
"loss": 1.0947, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7555847568988173, |
|
"grad_norm": 0.26315263302354647, |
|
"learning_rate": 1.565739198860093e-05, |
|
"loss": 1.101, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7577748576434516, |
|
"grad_norm": 0.26819487927961905, |
|
"learning_rate": 1.562582836426074e-05, |
|
"loss": 1.1062, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7599649583880859, |
|
"grad_norm": 0.2752388588069863, |
|
"learning_rate": 1.559418252447381e-05, |
|
"loss": 1.118, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.7621550591327201, |
|
"grad_norm": 0.2743015464537201, |
|
"learning_rate": 1.5562454931710146e-05, |
|
"loss": 1.1061, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7643451598773544, |
|
"grad_norm": 0.2606577851181012, |
|
"learning_rate": 1.5530646049634473e-05, |
|
"loss": 1.0871, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.7665352606219886, |
|
"grad_norm": 0.26480322824428465, |
|
"learning_rate": 1.5498756343099495e-05, |
|
"loss": 1.1221, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7687253613666228, |
|
"grad_norm": 0.2668874639775243, |
|
"learning_rate": 1.5466786278139054e-05, |
|
"loss": 1.1434, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.7709154621112572, |
|
"grad_norm": 0.26968985298684783, |
|
"learning_rate": 1.543473632196136e-05, |
|
"loss": 1.1061, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.7731055628558914, |
|
"grad_norm": 0.270316749677291, |
|
"learning_rate": 1.540260694294214e-05, |
|
"loss": 1.1309, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.7752956636005256, |
|
"grad_norm": 0.27316189703503946, |
|
"learning_rate": 1.5370398610617804e-05, |
|
"loss": 1.1191, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7774857643451599, |
|
"grad_norm": 0.27324029056852334, |
|
"learning_rate": 1.5338111795678585e-05, |
|
"loss": 1.1016, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.7796758650897941, |
|
"grad_norm": 0.26275659139758095, |
|
"learning_rate": 1.530574696996164e-05, |
|
"loss": 1.091, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.7818659658344284, |
|
"grad_norm": 0.2608113586761751, |
|
"learning_rate": 1.5273304606444185e-05, |
|
"loss": 1.1047, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.7840560665790627, |
|
"grad_norm": 0.2669656071169941, |
|
"learning_rate": 1.5240785179236556e-05, |
|
"loss": 1.1051, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.7862461673236969, |
|
"grad_norm": 0.2787846832682112, |
|
"learning_rate": 1.5208189163575306e-05, |
|
"loss": 1.0857, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.7884362680683311, |
|
"grad_norm": 0.2681859587142459, |
|
"learning_rate": 1.5175517035816236e-05, |
|
"loss": 1.0865, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7906263688129654, |
|
"grad_norm": 0.2647278576482758, |
|
"learning_rate": 1.5142769273427445e-05, |
|
"loss": 1.0879, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.7928164695575997, |
|
"grad_norm": 0.26988725688107346, |
|
"learning_rate": 1.5109946354982352e-05, |
|
"loss": 1.1203, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.7950065703022339, |
|
"grad_norm": 0.26671062187636824, |
|
"learning_rate": 1.5077048760152701e-05, |
|
"loss": 1.102, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.7971966710468682, |
|
"grad_norm": 0.26783415660796006, |
|
"learning_rate": 1.5044076969701551e-05, |
|
"loss": 1.0967, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.7993867717915024, |
|
"grad_norm": 0.26130280226328173, |
|
"learning_rate": 1.5011031465476249e-05, |
|
"loss": 1.1078, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.8015768725361366, |
|
"grad_norm": 0.27999326528902496, |
|
"learning_rate": 1.4977912730401397e-05, |
|
"loss": 1.0814, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.803766973280771, |
|
"grad_norm": 0.25954065164515167, |
|
"learning_rate": 1.4944721248471776e-05, |
|
"loss": 1.1092, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.8059570740254052, |
|
"grad_norm": 0.2664277831190814, |
|
"learning_rate": 1.491145750474529e-05, |
|
"loss": 1.1137, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8081471747700394, |
|
"grad_norm": 0.30116014543803976, |
|
"learning_rate": 1.4878121985335879e-05, |
|
"loss": 1.0996, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.8103372755146737, |
|
"grad_norm": 0.2634337997526123, |
|
"learning_rate": 1.484471517740639e-05, |
|
"loss": 1.0869, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8125273762593079, |
|
"grad_norm": 0.2666163201346335, |
|
"learning_rate": 1.4811237569161491e-05, |
|
"loss": 1.0826, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.8147174770039421, |
|
"grad_norm": 0.28026478782735814, |
|
"learning_rate": 1.4777689649840518e-05, |
|
"loss": 1.0844, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8169075777485765, |
|
"grad_norm": 0.2618418896808571, |
|
"learning_rate": 1.4744071909710323e-05, |
|
"loss": 1.1223, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.8190976784932107, |
|
"grad_norm": 0.2785664585456496, |
|
"learning_rate": 1.4710384840058114e-05, |
|
"loss": 1.1012, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8212877792378449, |
|
"grad_norm": 0.2690351334092324, |
|
"learning_rate": 1.4676628933184278e-05, |
|
"loss": 1.1176, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8234778799824792, |
|
"grad_norm": 0.27324737770550867, |
|
"learning_rate": 1.4642804682395186e-05, |
|
"loss": 1.0885, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8256679807271134, |
|
"grad_norm": 0.2579358276970761, |
|
"learning_rate": 1.4608912581995982e-05, |
|
"loss": 1.1043, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.8278580814717477, |
|
"grad_norm": 0.26105385274922205, |
|
"learning_rate": 1.4574953127283353e-05, |
|
"loss": 1.102, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.830048182216382, |
|
"grad_norm": 0.2673842799065729, |
|
"learning_rate": 1.4540926814538303e-05, |
|
"loss": 1.0922, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.8322382829610162, |
|
"grad_norm": 0.26252808529604915, |
|
"learning_rate": 1.4506834141018895e-05, |
|
"loss": 1.0918, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8344283837056504, |
|
"grad_norm": 0.26910609526126184, |
|
"learning_rate": 1.4472675604952979e-05, |
|
"loss": 1.0902, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.8366184844502847, |
|
"grad_norm": 0.26133364932976627, |
|
"learning_rate": 1.443845170553092e-05, |
|
"loss": 1.0889, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.838808585194919, |
|
"grad_norm": 0.27193238728414193, |
|
"learning_rate": 1.440416294289829e-05, |
|
"loss": 1.091, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.8409986859395532, |
|
"grad_norm": 0.262829311113063, |
|
"learning_rate": 1.4369809818148586e-05, |
|
"loss": 1.0875, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8431887866841875, |
|
"grad_norm": 0.26524372560054243, |
|
"learning_rate": 1.4335392833315862e-05, |
|
"loss": 1.1064, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.8453788874288217, |
|
"grad_norm": 0.2584636235528877, |
|
"learning_rate": 1.430091249136744e-05, |
|
"loss": 1.1074, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8475689881734559, |
|
"grad_norm": 0.2775481255437133, |
|
"learning_rate": 1.4266369296196532e-05, |
|
"loss": 1.0938, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.8497590889180903, |
|
"grad_norm": 0.26138245663872295, |
|
"learning_rate": 1.4231763752614876e-05, |
|
"loss": 1.1285, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8519491896627245, |
|
"grad_norm": 0.2707757340268956, |
|
"learning_rate": 1.4197096366345372e-05, |
|
"loss": 1.1139, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.8541392904073587, |
|
"grad_norm": 0.26782566168738436, |
|
"learning_rate": 1.4162367644014683e-05, |
|
"loss": 1.107, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.856329391151993, |
|
"grad_norm": 0.26526168698615754, |
|
"learning_rate": 1.4127578093145833e-05, |
|
"loss": 1.1086, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.8585194918966272, |
|
"grad_norm": 0.263651586567362, |
|
"learning_rate": 1.4092728222150789e-05, |
|
"loss": 1.107, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.8607095926412615, |
|
"grad_norm": 0.2753196350258517, |
|
"learning_rate": 1.4057818540323034e-05, |
|
"loss": 1.1176, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.8628996933858958, |
|
"grad_norm": 0.2844631244822961, |
|
"learning_rate": 1.4022849557830113e-05, |
|
"loss": 1.0777, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.86508979413053, |
|
"grad_norm": 0.2934373431409096, |
|
"learning_rate": 1.3987821785706206e-05, |
|
"loss": 1.1, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.8672798948751642, |
|
"grad_norm": 0.25800409226074444, |
|
"learning_rate": 1.395273573584462e-05, |
|
"loss": 1.1039, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.8694699956197985, |
|
"grad_norm": 0.27027320276142963, |
|
"learning_rate": 1.3917591920990339e-05, |
|
"loss": 1.1207, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.8716600963644328, |
|
"grad_norm": 0.2693164308545569, |
|
"learning_rate": 1.3882390854732518e-05, |
|
"loss": 1.0965, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.873850197109067, |
|
"grad_norm": 0.2606202597234289, |
|
"learning_rate": 1.3847133051496981e-05, |
|
"loss": 1.1125, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.8760402978537013, |
|
"grad_norm": 0.2694454731907674, |
|
"learning_rate": 1.3811819026538702e-05, |
|
"loss": 1.0926, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8782303985983355, |
|
"grad_norm": 0.26631733983589906, |
|
"learning_rate": 1.3776449295934274e-05, |
|
"loss": 1.0949, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.8804204993429697, |
|
"grad_norm": 0.26589757058511515, |
|
"learning_rate": 1.3741024376574369e-05, |
|
"loss": 1.098, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.8826106000876041, |
|
"grad_norm": 0.2736792848915224, |
|
"learning_rate": 1.3705544786156183e-05, |
|
"loss": 1.1156, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.8848007008322383, |
|
"grad_norm": 0.2658722341229626, |
|
"learning_rate": 1.3670011043175871e-05, |
|
"loss": 1.1164, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.8869908015768725, |
|
"grad_norm": 0.26178245994021004, |
|
"learning_rate": 1.3634423666920968e-05, |
|
"loss": 1.1336, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.8891809023215068, |
|
"grad_norm": 0.27553942746876153, |
|
"learning_rate": 1.3598783177462807e-05, |
|
"loss": 1.1238, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.891371003066141, |
|
"grad_norm": 0.2681764008089611, |
|
"learning_rate": 1.3563090095648907e-05, |
|
"loss": 1.1117, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.8935611038107752, |
|
"grad_norm": 0.2669696710935724, |
|
"learning_rate": 1.3527344943095374e-05, |
|
"loss": 1.1184, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.8957512045554096, |
|
"grad_norm": 0.2573801447434789, |
|
"learning_rate": 1.3491548242179267e-05, |
|
"loss": 1.0996, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.8979413053000438, |
|
"grad_norm": 0.2746222462070939, |
|
"learning_rate": 1.345570051603097e-05, |
|
"loss": 1.102, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.900131406044678, |
|
"grad_norm": 0.2671374820083521, |
|
"learning_rate": 1.3419802288526551e-05, |
|
"loss": 1.0863, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.9023215067893123, |
|
"grad_norm": 0.26407337641785006, |
|
"learning_rate": 1.3383854084280088e-05, |
|
"loss": 1.0873, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9045116075339465, |
|
"grad_norm": 0.26664290685548253, |
|
"learning_rate": 1.3347856428636037e-05, |
|
"loss": 1.0939, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.9067017082785808, |
|
"grad_norm": 0.27742565755152865, |
|
"learning_rate": 1.3311809847661512e-05, |
|
"loss": 1.1012, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9088918090232151, |
|
"grad_norm": 0.26844433618793634, |
|
"learning_rate": 1.3275714868138629e-05, |
|
"loss": 1.0992, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.9110819097678493, |
|
"grad_norm": 0.31156760423493884, |
|
"learning_rate": 1.3239572017556792e-05, |
|
"loss": 1.127, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9132720105124835, |
|
"grad_norm": 0.2737693950564783, |
|
"learning_rate": 1.3203381824105001e-05, |
|
"loss": 1.0795, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.9154621112571178, |
|
"grad_norm": 0.2655988793911631, |
|
"learning_rate": 1.31671448166641e-05, |
|
"loss": 1.0982, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9176522120017521, |
|
"grad_norm": 0.278932714958382, |
|
"learning_rate": 1.3130861524799091e-05, |
|
"loss": 1.0846, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.9198423127463863, |
|
"grad_norm": 0.2613274740773185, |
|
"learning_rate": 1.309453247875136e-05, |
|
"loss": 1.1, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9220324134910206, |
|
"grad_norm": 0.2713075516855544, |
|
"learning_rate": 1.3058158209430944e-05, |
|
"loss": 1.1074, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.9242225142356548, |
|
"grad_norm": 0.2609833323859274, |
|
"learning_rate": 1.3021739248408776e-05, |
|
"loss": 1.107, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.926412614980289, |
|
"grad_norm": 0.2977024683808482, |
|
"learning_rate": 1.2985276127908897e-05, |
|
"loss": 1.1176, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.9286027157249234, |
|
"grad_norm": 0.27394749452645756, |
|
"learning_rate": 1.2948769380800706e-05, |
|
"loss": 1.0777, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9307928164695576, |
|
"grad_norm": 0.2566984919681051, |
|
"learning_rate": 1.2912219540591145e-05, |
|
"loss": 1.1031, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9329829172141918, |
|
"grad_norm": 0.2648683843482838, |
|
"learning_rate": 1.2875627141416926e-05, |
|
"loss": 1.1004, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9351730179588261, |
|
"grad_norm": 0.27158910578936263, |
|
"learning_rate": 1.2838992718036707e-05, |
|
"loss": 1.1086, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.9373631187034603, |
|
"grad_norm": 0.28448174882950567, |
|
"learning_rate": 1.2802316805823293e-05, |
|
"loss": 1.0879, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9395532194480947, |
|
"grad_norm": 0.2662017189971071, |
|
"learning_rate": 1.27655999407558e-05, |
|
"loss": 1.1258, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.9417433201927289, |
|
"grad_norm": 0.27194322611948335, |
|
"learning_rate": 1.2728842659411815e-05, |
|
"loss": 1.091, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9439334209373631, |
|
"grad_norm": 0.25534754454963765, |
|
"learning_rate": 1.2692045498959584e-05, |
|
"loss": 1.0658, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.9461235216819974, |
|
"grad_norm": 0.26353467479376913, |
|
"learning_rate": 1.2655208997150134e-05, |
|
"loss": 1.084, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9483136224266316, |
|
"grad_norm": 0.2627027873198173, |
|
"learning_rate": 1.2618333692309426e-05, |
|
"loss": 1.1117, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.9505037231712659, |
|
"grad_norm": 0.2689357563785859, |
|
"learning_rate": 1.2581420123330476e-05, |
|
"loss": 1.1363, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9526938239159002, |
|
"grad_norm": 0.26046024725339567, |
|
"learning_rate": 1.2544468829665503e-05, |
|
"loss": 1.1094, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.9548839246605344, |
|
"grad_norm": 0.2598928969406913, |
|
"learning_rate": 1.2507480351318032e-05, |
|
"loss": 1.1092, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9570740254051686, |
|
"grad_norm": 0.2698599669899258, |
|
"learning_rate": 1.2470455228834987e-05, |
|
"loss": 1.1307, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.9592641261498029, |
|
"grad_norm": 0.28035682075399004, |
|
"learning_rate": 1.243339400329882e-05, |
|
"loss": 1.1156, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.9614542268944372, |
|
"grad_norm": 0.2645856709181594, |
|
"learning_rate": 1.2396297216319588e-05, |
|
"loss": 1.0924, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.9636443276390714, |
|
"grad_norm": 0.263618320158081, |
|
"learning_rate": 1.2359165410027038e-05, |
|
"loss": 1.0682, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9658344283837057, |
|
"grad_norm": 0.2650009200265081, |
|
"learning_rate": 1.232199912706269e-05, |
|
"loss": 1.1012, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.9680245291283399, |
|
"grad_norm": 0.26416661065971203, |
|
"learning_rate": 1.2284798910571898e-05, |
|
"loss": 1.1186, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.9702146298729741, |
|
"grad_norm": 0.25736813068625275, |
|
"learning_rate": 1.2247565304195924e-05, |
|
"loss": 1.0914, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.9724047306176085, |
|
"grad_norm": 0.2586875575139371, |
|
"learning_rate": 1.2210298852063984e-05, |
|
"loss": 1.1008, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.9745948313622427, |
|
"grad_norm": 0.26173267154631147, |
|
"learning_rate": 1.2173000098785299e-05, |
|
"loss": 1.0914, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.9767849321068769, |
|
"grad_norm": 0.26864230049294224, |
|
"learning_rate": 1.2135669589441141e-05, |
|
"loss": 1.0641, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.9789750328515112, |
|
"grad_norm": 0.2655742223524583, |
|
"learning_rate": 1.2098307869576858e-05, |
|
"loss": 1.1055, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.9811651335961454, |
|
"grad_norm": 0.2671231847391253, |
|
"learning_rate": 1.2060915485193907e-05, |
|
"loss": 1.0871, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.9833552343407796, |
|
"grad_norm": 0.26788579163236637, |
|
"learning_rate": 1.2023492982741875e-05, |
|
"loss": 1.1068, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.985545335085414, |
|
"grad_norm": 0.28062285715180646, |
|
"learning_rate": 1.1986040909110494e-05, |
|
"loss": 1.1516, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9877354358300482, |
|
"grad_norm": 0.27291257622616144, |
|
"learning_rate": 1.1948559811621645e-05, |
|
"loss": 1.0926, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.9899255365746824, |
|
"grad_norm": 0.2688314222045062, |
|
"learning_rate": 1.1911050238021362e-05, |
|
"loss": 1.0813, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.9921156373193167, |
|
"grad_norm": 0.26864057707347405, |
|
"learning_rate": 1.1873512736471829e-05, |
|
"loss": 1.0973, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.994305738063951, |
|
"grad_norm": 0.2594131843312388, |
|
"learning_rate": 1.1835947855543356e-05, |
|
"loss": 1.0924, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.9964958388085852, |
|
"grad_norm": 0.26027744632734207, |
|
"learning_rate": 1.1798356144206395e-05, |
|
"loss": 1.1008, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.9986859395532195, |
|
"grad_norm": 0.2723136597045715, |
|
"learning_rate": 1.1760738151823474e-05, |
|
"loss": 1.0941, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.0008760402978536, |
|
"grad_norm": 0.26592399363964636, |
|
"learning_rate": 1.1723094428141202e-05, |
|
"loss": 1.0813, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.003066141042488, |
|
"grad_norm": 0.2741757414547693, |
|
"learning_rate": 1.1685425523282218e-05, |
|
"loss": 1.0701, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.0052562417871223, |
|
"grad_norm": 0.2785639167681659, |
|
"learning_rate": 1.1647731987737163e-05, |
|
"loss": 1.0855, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.0074463425317564, |
|
"grad_norm": 0.25921909631409684, |
|
"learning_rate": 1.1610014372356621e-05, |
|
"loss": 1.05, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0096364432763907, |
|
"grad_norm": 0.26730561024556904, |
|
"learning_rate": 1.1572273228343085e-05, |
|
"loss": 1.0709, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.011826544021025, |
|
"grad_norm": 0.2666226352769794, |
|
"learning_rate": 1.1534509107242886e-05, |
|
"loss": 1.0814, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.0140166447656591, |
|
"grad_norm": 0.26883958965423044, |
|
"learning_rate": 1.1496722560938147e-05, |
|
"loss": 1.0398, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.0162067455102934, |
|
"grad_norm": 0.26461136654012524, |
|
"learning_rate": 1.1458914141638708e-05, |
|
"loss": 1.0699, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.0183968462549278, |
|
"grad_norm": 0.2621701797623292, |
|
"learning_rate": 1.1421084401874063e-05, |
|
"loss": 1.0684, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.0205869469995619, |
|
"grad_norm": 0.2638410480632078, |
|
"learning_rate": 1.1383233894485278e-05, |
|
"loss": 1.0539, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.0227770477441962, |
|
"grad_norm": 0.2773665371526558, |
|
"learning_rate": 1.134536317261691e-05, |
|
"loss": 1.0666, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.0249671484888305, |
|
"grad_norm": 0.27738246423485236, |
|
"learning_rate": 1.1307472789708942e-05, |
|
"loss": 1.0904, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.0271572492334646, |
|
"grad_norm": 0.26555678082360085, |
|
"learning_rate": 1.1269563299488673e-05, |
|
"loss": 1.0764, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.029347349978099, |
|
"grad_norm": 0.2702748541117833, |
|
"learning_rate": 1.123163525596264e-05, |
|
"loss": 1.0652, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0315374507227333, |
|
"grad_norm": 0.2832727236287343, |
|
"learning_rate": 1.119368921340851e-05, |
|
"loss": 1.0742, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.0337275514673676, |
|
"grad_norm": 0.2749938354036527, |
|
"learning_rate": 1.1155725726366992e-05, |
|
"loss": 1.0711, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.0359176522120017, |
|
"grad_norm": 0.26230244379974577, |
|
"learning_rate": 1.1117745349633723e-05, |
|
"loss": 1.0717, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.038107752956636, |
|
"grad_norm": 0.2664760872899082, |
|
"learning_rate": 1.1079748638251175e-05, |
|
"loss": 1.0805, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.0402978537012704, |
|
"grad_norm": 0.27367024800241485, |
|
"learning_rate": 1.1041736147500522e-05, |
|
"loss": 1.0707, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.0424879544459045, |
|
"grad_norm": 0.27181089092067195, |
|
"learning_rate": 1.1003708432893531e-05, |
|
"loss": 1.0416, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.0446780551905388, |
|
"grad_norm": 0.2665172852872497, |
|
"learning_rate": 1.0965666050164477e-05, |
|
"loss": 1.0686, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.0468681559351731, |
|
"grad_norm": 0.27115445836831914, |
|
"learning_rate": 1.0927609555261962e-05, |
|
"loss": 1.0721, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.0490582566798072, |
|
"grad_norm": 0.26962744736855904, |
|
"learning_rate": 1.088953950434084e-05, |
|
"loss": 1.073, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.0512483574244416, |
|
"grad_norm": 0.2682198503489364, |
|
"learning_rate": 1.0851456453754068e-05, |
|
"loss": 1.0363, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0534384581690759, |
|
"grad_norm": 0.26286369163664847, |
|
"learning_rate": 1.0813360960044579e-05, |
|
"loss": 1.0785, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.05562855891371, |
|
"grad_norm": 0.2669382343591095, |
|
"learning_rate": 1.0775253579937148e-05, |
|
"loss": 1.075, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.0578186596583443, |
|
"grad_norm": 0.2752037755375215, |
|
"learning_rate": 1.0737134870330255e-05, |
|
"loss": 1.068, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.0600087604029786, |
|
"grad_norm": 0.271664654445446, |
|
"learning_rate": 1.0699005388287959e-05, |
|
"loss": 1.1002, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.0621988611476127, |
|
"grad_norm": 0.272588814423304, |
|
"learning_rate": 1.0660865691031729e-05, |
|
"loss": 1.0742, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.064388961892247, |
|
"grad_norm": 0.266723825837105, |
|
"learning_rate": 1.0622716335932336e-05, |
|
"loss": 1.0908, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.0665790626368814, |
|
"grad_norm": 0.2633628906535874, |
|
"learning_rate": 1.0584557880501678e-05, |
|
"loss": 1.0723, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.0687691633815155, |
|
"grad_norm": 0.2715087145721187, |
|
"learning_rate": 1.0546390882384649e-05, |
|
"loss": 1.0924, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.0709592641261498, |
|
"grad_norm": 0.2643851262115833, |
|
"learning_rate": 1.0508215899350986e-05, |
|
"loss": 1.0527, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.0731493648707842, |
|
"grad_norm": 0.264344202748604, |
|
"learning_rate": 1.0470033489287115e-05, |
|
"loss": 1.0697, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.0753394656154183, |
|
"grad_norm": 0.27016171316360305, |
|
"learning_rate": 1.0431844210188e-05, |
|
"loss": 1.067, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.0775295663600526, |
|
"grad_norm": 0.26462744675653105, |
|
"learning_rate": 1.039364862014899e-05, |
|
"loss": 1.0553, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.079719667104687, |
|
"grad_norm": 0.25957249915751707, |
|
"learning_rate": 1.0355447277357667e-05, |
|
"loss": 1.0662, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.081909767849321, |
|
"grad_norm": 0.2682650123947121, |
|
"learning_rate": 1.0317240740085666e-05, |
|
"loss": 1.0496, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.0840998685939554, |
|
"grad_norm": 0.27783105584443907, |
|
"learning_rate": 1.0279029566680556e-05, |
|
"loss": 1.0521, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.0862899693385897, |
|
"grad_norm": 0.26626718276311767, |
|
"learning_rate": 1.024081431555764e-05, |
|
"loss": 1.0441, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.0884800700832238, |
|
"grad_norm": 0.2629927537429945, |
|
"learning_rate": 1.0202595545191827e-05, |
|
"loss": 1.0576, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.090670170827858, |
|
"grad_norm": 0.2620068996693131, |
|
"learning_rate": 1.0164373814109439e-05, |
|
"loss": 1.0625, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.0928602715724924, |
|
"grad_norm": 0.2653845655607556, |
|
"learning_rate": 1.0126149680880084e-05, |
|
"loss": 1.0582, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.0950503723171265, |
|
"grad_norm": 0.27299625220300644, |
|
"learning_rate": 1.0087923704108462e-05, |
|
"loss": 1.0934, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0972404730617609, |
|
"grad_norm": 0.26498736474890866, |
|
"learning_rate": 1.0049696442426222e-05, |
|
"loss": 1.058, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.0994305738063952, |
|
"grad_norm": 0.2709488000586329, |
|
"learning_rate": 1.0011468454483781e-05, |
|
"loss": 1.0719, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.1016206745510293, |
|
"grad_norm": 0.26135794077525687, |
|
"learning_rate": 9.973240298942187e-06, |
|
"loss": 1.0545, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.1038107752956636, |
|
"grad_norm": 0.2582671924491208, |
|
"learning_rate": 9.93501253446491e-06, |
|
"loss": 1.0656, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.106000876040298, |
|
"grad_norm": 0.2649770195518186, |
|
"learning_rate": 9.896785719709735e-06, |
|
"loss": 1.0789, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.108190976784932, |
|
"grad_norm": 0.26940514713119723, |
|
"learning_rate": 9.85856041332055e-06, |
|
"loss": 1.0754, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.1103810775295664, |
|
"grad_norm": 0.26672673745336384, |
|
"learning_rate": 9.820337173919201e-06, |
|
"loss": 1.0426, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.1125711782742007, |
|
"grad_norm": 0.26266521432117856, |
|
"learning_rate": 9.782116560097334e-06, |
|
"loss": 1.0736, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.1147612790188348, |
|
"grad_norm": 0.2661017661171288, |
|
"learning_rate": 9.743899130408216e-06, |
|
"loss": 1.0596, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.1169513797634691, |
|
"grad_norm": 0.26244326034773946, |
|
"learning_rate": 9.705685443358586e-06, |
|
"loss": 1.0768, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.1191414805081035, |
|
"grad_norm": 0.26228295242456245, |
|
"learning_rate": 9.667476057400492e-06, |
|
"loss": 1.0412, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.1213315812527376, |
|
"grad_norm": 0.2629461038584397, |
|
"learning_rate": 9.629271530923126e-06, |
|
"loss": 1.0465, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.123521681997372, |
|
"grad_norm": 0.271093061900036, |
|
"learning_rate": 9.591072422244654e-06, |
|
"loss": 1.0406, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.1257117827420062, |
|
"grad_norm": 0.2822471803911114, |
|
"learning_rate": 9.552879289604075e-06, |
|
"loss": 1.074, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.1279018834866403, |
|
"grad_norm": 0.26861806302874436, |
|
"learning_rate": 9.514692691153058e-06, |
|
"loss": 1.0783, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.1300919842312747, |
|
"grad_norm": 0.27817880338620504, |
|
"learning_rate": 9.476513184947769e-06, |
|
"loss": 1.066, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.132282084975909, |
|
"grad_norm": 0.270732129128024, |
|
"learning_rate": 9.438341328940742e-06, |
|
"loss": 1.0664, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.134472185720543, |
|
"grad_norm": 0.26430623776114975, |
|
"learning_rate": 9.400177680972696e-06, |
|
"loss": 1.0818, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.1366622864651774, |
|
"grad_norm": 0.2696174677769872, |
|
"learning_rate": 9.362022798764424e-06, |
|
"loss": 1.0467, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.1388523872098117, |
|
"grad_norm": 0.2739729791232298, |
|
"learning_rate": 9.323877239908587e-06, |
|
"loss": 1.0523, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.1410424879544458, |
|
"grad_norm": 0.26804487327058313, |
|
"learning_rate": 9.28574156186162e-06, |
|
"loss": 1.0672, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.1432325886990802, |
|
"grad_norm": 0.2591521070265315, |
|
"learning_rate": 9.247616321935539e-06, |
|
"loss": 1.0719, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.1454226894437145, |
|
"grad_norm": 0.25951317593104206, |
|
"learning_rate": 9.209502077289836e-06, |
|
"loss": 1.0447, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.1476127901883486, |
|
"grad_norm": 0.2652460820102422, |
|
"learning_rate": 9.17139938492331e-06, |
|
"loss": 1.0521, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.149802890932983, |
|
"grad_norm": 0.2602152514750744, |
|
"learning_rate": 9.133308801665937e-06, |
|
"loss": 1.0523, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.1519929916776173, |
|
"grad_norm": 0.2705995487451913, |
|
"learning_rate": 9.095230884170727e-06, |
|
"loss": 1.0396, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.1541830924222514, |
|
"grad_norm": 0.2604370066458989, |
|
"learning_rate": 9.057166188905604e-06, |
|
"loss": 1.0588, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.1563731931668857, |
|
"grad_norm": 0.2737060731254108, |
|
"learning_rate": 9.01911527214526e-06, |
|
"loss": 1.0502, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.15856329391152, |
|
"grad_norm": 0.27030630333355876, |
|
"learning_rate": 8.98107868996302e-06, |
|
"loss": 1.0725, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.1607533946561541, |
|
"grad_norm": 0.26859131666568503, |
|
"learning_rate": 8.943056998222733e-06, |
|
"loss": 1.0652, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.1629434954007885, |
|
"grad_norm": 0.2641393090266361, |
|
"learning_rate": 8.905050752570637e-06, |
|
"loss": 1.0668, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.1651335961454228, |
|
"grad_norm": 0.2756537145265912, |
|
"learning_rate": 8.867060508427245e-06, |
|
"loss": 1.0738, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.1673236968900569, |
|
"grad_norm": 0.26685077848408933, |
|
"learning_rate": 8.82908682097922e-06, |
|
"loss": 1.0604, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.1695137976346912, |
|
"grad_norm": 0.2753178085823332, |
|
"learning_rate": 8.791130245171278e-06, |
|
"loss": 1.0676, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.1717038983793255, |
|
"grad_norm": 0.2732703952765648, |
|
"learning_rate": 8.753191335698049e-06, |
|
"loss": 1.0744, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.1738939991239596, |
|
"grad_norm": 0.2603013658010823, |
|
"learning_rate": 8.715270646996002e-06, |
|
"loss": 1.0529, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.176084099868594, |
|
"grad_norm": 0.2634391048696047, |
|
"learning_rate": 8.677368733235328e-06, |
|
"loss": 1.0801, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.1782742006132283, |
|
"grad_norm": 0.27467962055701084, |
|
"learning_rate": 8.639486148311833e-06, |
|
"loss": 1.0725, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.1804643013578624, |
|
"grad_norm": 0.2721967843702617, |
|
"learning_rate": 8.601623445838865e-06, |
|
"loss": 1.0406, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.1826544021024967, |
|
"grad_norm": 0.25984926483960213, |
|
"learning_rate": 8.563781179139202e-06, |
|
"loss": 1.0504, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.184844502847131, |
|
"grad_norm": 0.2632769236380217, |
|
"learning_rate": 8.525959901236975e-06, |
|
"loss": 1.052, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.1870346035917652, |
|
"grad_norm": 0.2752690229086118, |
|
"learning_rate": 8.488160164849596e-06, |
|
"loss": 1.0611, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.1892247043363995, |
|
"grad_norm": 0.25299421098700864, |
|
"learning_rate": 8.450382522379668e-06, |
|
"loss": 1.0732, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.1914148050810338, |
|
"grad_norm": 0.26715997535203595, |
|
"learning_rate": 8.412627525906902e-06, |
|
"loss": 1.0445, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.193604905825668, |
|
"grad_norm": 0.284833895697399, |
|
"learning_rate": 8.374895727180079e-06, |
|
"loss": 1.0334, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.1957950065703022, |
|
"grad_norm": 0.26556547667567937, |
|
"learning_rate": 8.33718767760896e-06, |
|
"loss": 1.0762, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.1979851073149366, |
|
"grad_norm": 0.27538602694646613, |
|
"learning_rate": 8.299503928256238e-06, |
|
"loss": 1.0738, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.2001752080595707, |
|
"grad_norm": 0.2705036393436721, |
|
"learning_rate": 8.261845029829488e-06, |
|
"loss": 1.0736, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.202365308804205, |
|
"grad_norm": 0.26006945720175584, |
|
"learning_rate": 8.224211532673117e-06, |
|
"loss": 1.0613, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.2045554095488393, |
|
"grad_norm": 0.2723552029684137, |
|
"learning_rate": 8.186603986760316e-06, |
|
"loss": 1.0287, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.2067455102934734, |
|
"grad_norm": 0.25851377292505096, |
|
"learning_rate": 8.149022941685023e-06, |
|
"loss": 1.0803, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.2089356110381078, |
|
"grad_norm": 0.26629820551366745, |
|
"learning_rate": 8.111468946653901e-06, |
|
"loss": 1.073, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.211125711782742, |
|
"grad_norm": 0.2699109190074932, |
|
"learning_rate": 8.073942550478307e-06, |
|
"loss": 1.0629, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.2133158125273762, |
|
"grad_norm": 0.25618392505342463, |
|
"learning_rate": 8.03644430156626e-06, |
|
"loss": 1.0521, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.2155059132720105, |
|
"grad_norm": 0.2694808758637472, |
|
"learning_rate": 7.998974747914449e-06, |
|
"loss": 1.0732, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.2176960140166448, |
|
"grad_norm": 0.27962922484388153, |
|
"learning_rate": 7.9615344371002e-06, |
|
"loss": 1.0859, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.219886114761279, |
|
"grad_norm": 0.27750174727788907, |
|
"learning_rate": 7.924123916273504e-06, |
|
"loss": 1.0875, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.2220762155059133, |
|
"grad_norm": 0.2592875926566742, |
|
"learning_rate": 7.886743732148986e-06, |
|
"loss": 1.0826, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.2242663162505476, |
|
"grad_norm": 0.2654730019317536, |
|
"learning_rate": 7.849394430997941e-06, |
|
"loss": 1.0619, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.2264564169951817, |
|
"grad_norm": 0.26072631883327624, |
|
"learning_rate": 7.81207655864034e-06, |
|
"loss": 1.0437, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.228646517739816, |
|
"grad_norm": 0.26825325034393344, |
|
"learning_rate": 7.774790660436857e-06, |
|
"loss": 1.0676, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.2308366184844504, |
|
"grad_norm": 0.2566995101810123, |
|
"learning_rate": 7.7375372812809e-06, |
|
"loss": 1.0529, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.2330267192290845, |
|
"grad_norm": 0.2604309539637625, |
|
"learning_rate": 7.700316965590638e-06, |
|
"loss": 1.0619, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.2352168199737188, |
|
"grad_norm": 0.2625246921961206, |
|
"learning_rate": 7.663130257301064e-06, |
|
"loss": 1.0891, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.2374069207183531, |
|
"grad_norm": 0.25855563736281145, |
|
"learning_rate": 7.62597769985603e-06, |
|
"loss": 1.0566, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.2395970214629872, |
|
"grad_norm": 0.26232370362085894, |
|
"learning_rate": 7.588859836200309e-06, |
|
"loss": 1.0561, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.2417871222076216, |
|
"grad_norm": 0.26917013720230804, |
|
"learning_rate": 7.551777208771659e-06, |
|
"loss": 1.0471, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.2439772229522559, |
|
"grad_norm": 0.26375355389890487, |
|
"learning_rate": 7.514730359492905e-06, |
|
"loss": 1.0592, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.24616732369689, |
|
"grad_norm": 0.26648356006456103, |
|
"learning_rate": 7.477719829764008e-06, |
|
"loss": 1.0469, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.2483574244415243, |
|
"grad_norm": 0.2728857993840234, |
|
"learning_rate": 7.440746160454156e-06, |
|
"loss": 1.0646, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.2505475251861586, |
|
"grad_norm": 0.26552837575529725, |
|
"learning_rate": 7.403809891893865e-06, |
|
"loss": 1.0557, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.2527376259307927, |
|
"grad_norm": 0.2688170697408606, |
|
"learning_rate": 7.366911563867086e-06, |
|
"loss": 1.0533, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.254927726675427, |
|
"grad_norm": 0.2738164410511349, |
|
"learning_rate": 7.330051715603295e-06, |
|
"loss": 1.0559, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.2571178274200614, |
|
"grad_norm": 0.2619756596211397, |
|
"learning_rate": 7.293230885769638e-06, |
|
"loss": 1.052, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.2593079281646955, |
|
"grad_norm": 0.2729439802092583, |
|
"learning_rate": 7.2564496124630455e-06, |
|
"loss": 1.0621, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.2614980289093298, |
|
"grad_norm": 0.2618676111912421, |
|
"learning_rate": 7.219708433202368e-06, |
|
"loss": 1.0527, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.2636881296539642, |
|
"grad_norm": 0.2575770131741819, |
|
"learning_rate": 7.183007884920534e-06, |
|
"loss": 1.0813, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.2658782303985983, |
|
"grad_norm": 0.268317818796291, |
|
"learning_rate": 7.14634850395668e-06, |
|
"loss": 1.0744, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.2680683311432326, |
|
"grad_norm": 0.25989715732532104, |
|
"learning_rate": 7.109730826048344e-06, |
|
"loss": 1.0586, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.270258431887867, |
|
"grad_norm": 0.2632165775296998, |
|
"learning_rate": 7.073155386323602e-06, |
|
"loss": 1.0463, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.272448532632501, |
|
"grad_norm": 0.2561543218662453, |
|
"learning_rate": 7.036622719293278e-06, |
|
"loss": 1.0414, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.2746386333771353, |
|
"grad_norm": 0.25666741513918634, |
|
"learning_rate": 7.0001333588431055e-06, |
|
"loss": 1.0973, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.2768287341217697, |
|
"grad_norm": 0.2720271858166436, |
|
"learning_rate": 6.963687838225948e-06, |
|
"loss": 1.0404, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.2790188348664038, |
|
"grad_norm": 0.2664079925854839, |
|
"learning_rate": 6.927286690053996e-06, |
|
"loss": 1.0627, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.281208935611038, |
|
"grad_norm": 0.2616457737793427, |
|
"learning_rate": 6.890930446290976e-06, |
|
"loss": 1.076, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.2833990363556724, |
|
"grad_norm": 0.26503317946827504, |
|
"learning_rate": 6.854619638244399e-06, |
|
"loss": 1.0523, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.2855891371003065, |
|
"grad_norm": 0.2643470479220203, |
|
"learning_rate": 6.8183547965577735e-06, |
|
"loss": 1.0723, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.2877792378449409, |
|
"grad_norm": 0.2547905467870185, |
|
"learning_rate": 6.782136451202857e-06, |
|
"loss": 1.0273, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.2899693385895752, |
|
"grad_norm": 0.261647302996271, |
|
"learning_rate": 6.745965131471915e-06, |
|
"loss": 1.0414, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.2921594393342093, |
|
"grad_norm": 0.24989074380882761, |
|
"learning_rate": 6.709841365969989e-06, |
|
"loss": 1.0352, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.2943495400788436, |
|
"grad_norm": 0.259577860201249, |
|
"learning_rate": 6.673765682607155e-06, |
|
"loss": 1.0445, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.296539640823478, |
|
"grad_norm": 0.2644082431413658, |
|
"learning_rate": 6.637738608590831e-06, |
|
"loss": 1.0443, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.298729741568112, |
|
"grad_norm": 0.2618054773070754, |
|
"learning_rate": 6.6017606704180555e-06, |
|
"loss": 1.0693, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.3009198423127464, |
|
"grad_norm": 0.26097876761733935, |
|
"learning_rate": 6.565832393867808e-06, |
|
"loss": 1.0543, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.3031099430573807, |
|
"grad_norm": 0.2594806373702017, |
|
"learning_rate": 6.529954303993305e-06, |
|
"loss": 1.0521, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.3053000438020148, |
|
"grad_norm": 0.25960390582097226, |
|
"learning_rate": 6.494126925114341e-06, |
|
"loss": 1.0398, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.3074901445466491, |
|
"grad_norm": 0.26253903837359144, |
|
"learning_rate": 6.458350780809634e-06, |
|
"loss": 1.042, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.3096802452912835, |
|
"grad_norm": 0.2654661460334143, |
|
"learning_rate": 6.422626393909151e-06, |
|
"loss": 1.0592, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.3118703460359176, |
|
"grad_norm": 0.2492587439304439, |
|
"learning_rate": 6.386954286486485e-06, |
|
"loss": 1.0678, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.314060446780552, |
|
"grad_norm": 0.26726116413928985, |
|
"learning_rate": 6.351334979851218e-06, |
|
"loss": 1.0725, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.3162505475251862, |
|
"grad_norm": 0.26088237090419825, |
|
"learning_rate": 6.315768994541316e-06, |
|
"loss": 1.0654, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.3184406482698203, |
|
"grad_norm": 0.26118786607312605, |
|
"learning_rate": 6.280256850315496e-06, |
|
"loss": 1.05, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.3206307490144547, |
|
"grad_norm": 0.26714886343958494, |
|
"learning_rate": 6.244799066145657e-06, |
|
"loss": 1.0594, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.322820849759089, |
|
"grad_norm": 0.2697209076699916, |
|
"learning_rate": 6.209396160209275e-06, |
|
"loss": 1.0604, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.325010950503723, |
|
"grad_norm": 0.2592918928971803, |
|
"learning_rate": 6.1740486498818454e-06, |
|
"loss": 1.0617, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.3272010512483574, |
|
"grad_norm": 0.2744613652569151, |
|
"learning_rate": 6.138757051729316e-06, |
|
"loss": 1.0656, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.3293911519929917, |
|
"grad_norm": 0.26102063894859023, |
|
"learning_rate": 6.103521881500531e-06, |
|
"loss": 1.0643, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.3315812527376258, |
|
"grad_norm": 0.26308106234970585, |
|
"learning_rate": 6.068343654119711e-06, |
|
"loss": 1.0703, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.3337713534822602, |
|
"grad_norm": 0.27250738827855164, |
|
"learning_rate": 6.033222883678915e-06, |
|
"loss": 1.0545, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.3359614542268945, |
|
"grad_norm": 0.25548627781504274, |
|
"learning_rate": 5.998160083430529e-06, |
|
"loss": 1.0406, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.3381515549715286, |
|
"grad_norm": 0.27175999771187154, |
|
"learning_rate": 5.963155765779762e-06, |
|
"loss": 1.0582, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.340341655716163, |
|
"grad_norm": 0.2766896501373885, |
|
"learning_rate": 5.928210442277176e-06, |
|
"loss": 1.0434, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.3425317564607973, |
|
"grad_norm": 0.26421628687562304, |
|
"learning_rate": 5.893324623611178e-06, |
|
"loss": 1.05, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.3447218572054314, |
|
"grad_norm": 0.26784348877239994, |
|
"learning_rate": 5.858498819600591e-06, |
|
"loss": 1.0545, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.3469119579500657, |
|
"grad_norm": 0.26753929682342953, |
|
"learning_rate": 5.823733539187184e-06, |
|
"loss": 1.0752, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.3491020586947, |
|
"grad_norm": 0.2876683161118842, |
|
"learning_rate": 5.789029290428234e-06, |
|
"loss": 1.0551, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.3512921594393341, |
|
"grad_norm": 0.2655828094593293, |
|
"learning_rate": 5.754386580489118e-06, |
|
"loss": 1.0725, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.3534822601839684, |
|
"grad_norm": 0.2642140873534744, |
|
"learning_rate": 5.719805915635872e-06, |
|
"loss": 1.0303, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.3556723609286028, |
|
"grad_norm": 0.2662184748201957, |
|
"learning_rate": 5.685287801227819e-06, |
|
"loss": 1.0682, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.3578624616732369, |
|
"grad_norm": 0.2602261042223262, |
|
"learning_rate": 5.65083274171018e-06, |
|
"loss": 1.0734, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.3600525624178712, |
|
"grad_norm": 0.26422024768061875, |
|
"learning_rate": 5.616441240606685e-06, |
|
"loss": 1.0686, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.3622426631625055, |
|
"grad_norm": 0.26487517368734986, |
|
"learning_rate": 5.5821138005122275e-06, |
|
"loss": 1.0586, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.3644327639071396, |
|
"grad_norm": 0.2665684900016343, |
|
"learning_rate": 5.547850923085525e-06, |
|
"loss": 1.0486, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.366622864651774, |
|
"grad_norm": 0.26846022882339726, |
|
"learning_rate": 5.513653109041784e-06, |
|
"loss": 1.0537, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.3688129653964083, |
|
"grad_norm": 0.2667169718942767, |
|
"learning_rate": 5.479520858145366e-06, |
|
"loss": 1.0777, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.3710030661410424, |
|
"grad_norm": 0.26370064314585917, |
|
"learning_rate": 5.4454546692025014e-06, |
|
"loss": 1.0596, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.3731931668856767, |
|
"grad_norm": 0.25983283968870546, |
|
"learning_rate": 5.411455040054008e-06, |
|
"loss": 1.0668, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.375383267630311, |
|
"grad_norm": 0.2694060610420749, |
|
"learning_rate": 5.377522467567988e-06, |
|
"loss": 1.0311, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.3775733683749452, |
|
"grad_norm": 0.2708644298850711, |
|
"learning_rate": 5.343657447632593e-06, |
|
"loss": 1.0604, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.3797634691195795, |
|
"grad_norm": 0.2663715611937321, |
|
"learning_rate": 5.30986047514875e-06, |
|
"loss": 1.0434, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.3819535698642138, |
|
"grad_norm": 0.2683831238281801, |
|
"learning_rate": 5.276132044022976e-06, |
|
"loss": 1.0648, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.384143670608848, |
|
"grad_norm": 0.2710281366615512, |
|
"learning_rate": 5.242472647160104e-06, |
|
"loss": 1.0621, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.3863337713534822, |
|
"grad_norm": 0.26635212498540184, |
|
"learning_rate": 5.208882776456112e-06, |
|
"loss": 1.0344, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.3885238720981166, |
|
"grad_norm": 0.2646040753621649, |
|
"learning_rate": 5.175362922790925e-06, |
|
"loss": 1.0584, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.3907139728427507, |
|
"grad_norm": 0.2606068539201707, |
|
"learning_rate": 5.1419135760212546e-06, |
|
"loss": 1.0566, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.392904073587385, |
|
"grad_norm": 0.2692058993237337, |
|
"learning_rate": 5.108535224973421e-06, |
|
"loss": 1.084, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.3950941743320193, |
|
"grad_norm": 0.2692100394219459, |
|
"learning_rate": 5.075228357436215e-06, |
|
"loss": 1.0594, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.3972842750766534, |
|
"grad_norm": 0.2590485616992936, |
|
"learning_rate": 5.04199346015378e-06, |
|
"loss": 1.0588, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.3994743758212878, |
|
"grad_norm": 0.2600923581263787, |
|
"learning_rate": 5.0088310188184954e-06, |
|
"loss": 1.0795, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.401664476565922, |
|
"grad_norm": 0.26786057407617736, |
|
"learning_rate": 4.975741518063863e-06, |
|
"loss": 1.0332, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.4038545773105562, |
|
"grad_norm": 0.2553993419762132, |
|
"learning_rate": 4.9427254414574355e-06, |
|
"loss": 1.0643, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.4060446780551905, |
|
"grad_norm": 0.2650885102717858, |
|
"learning_rate": 4.909783271493768e-06, |
|
"loss": 1.0574, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.4082347787998248, |
|
"grad_norm": 0.2690705321050312, |
|
"learning_rate": 4.87691548958733e-06, |
|
"loss": 1.076, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.410424879544459, |
|
"grad_norm": 0.2635061966804039, |
|
"learning_rate": 4.844122576065494e-06, |
|
"loss": 1.0605, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.4126149802890933, |
|
"grad_norm": 0.2707741119898313, |
|
"learning_rate": 4.81140501016152e-06, |
|
"loss": 1.0809, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.4148050810337276, |
|
"grad_norm": 0.2648037571113503, |
|
"learning_rate": 4.77876327000754e-06, |
|
"loss": 1.0645, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.4169951817783617, |
|
"grad_norm": 0.2639644819174903, |
|
"learning_rate": 4.7461978326275686e-06, |
|
"loss": 1.0477, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.419185282522996, |
|
"grad_norm": 0.2719513637037621, |
|
"learning_rate": 4.7137091739305356e-06, |
|
"loss": 1.0807, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.4213753832676304, |
|
"grad_norm": 0.26159805380609785, |
|
"learning_rate": 4.681297768703346e-06, |
|
"loss": 1.0668, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.4235654840122645, |
|
"grad_norm": 0.25856122844361584, |
|
"learning_rate": 4.648964090603913e-06, |
|
"loss": 1.0664, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.4257555847568988, |
|
"grad_norm": 0.26782272235769733, |
|
"learning_rate": 4.616708612154258e-06, |
|
"loss": 1.0363, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.4279456855015331, |
|
"grad_norm": 0.259074751270484, |
|
"learning_rate": 4.5845318047336e-06, |
|
"loss": 1.0656, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.4301357862461672, |
|
"grad_norm": 0.26806061151907784, |
|
"learning_rate": 4.5524341385714675e-06, |
|
"loss": 1.0324, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.4323258869908015, |
|
"grad_norm": 0.2713642276306849, |
|
"learning_rate": 4.520416082740816e-06, |
|
"loss": 1.0514, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.4345159877354359, |
|
"grad_norm": 0.2517619955218095, |
|
"learning_rate": 4.4884781051511835e-06, |
|
"loss": 1.0453, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.43670608848007, |
|
"grad_norm": 0.2573589678247352, |
|
"learning_rate": 4.456620672541859e-06, |
|
"loss": 1.0744, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.4388961892247043, |
|
"grad_norm": 0.26411247973588675, |
|
"learning_rate": 4.424844250475043e-06, |
|
"loss": 1.077, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.4410862899693386, |
|
"grad_norm": 0.25817254393906147, |
|
"learning_rate": 4.39314930332906e-06, |
|
"loss": 1.067, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.4432763907139727, |
|
"grad_norm": 0.255853414459153, |
|
"learning_rate": 4.361536294291555e-06, |
|
"loss": 1.075, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.445466491458607, |
|
"grad_norm": 0.2621787716237385, |
|
"learning_rate": 4.330005685352751e-06, |
|
"loss": 1.0625, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4476565922032414, |
|
"grad_norm": 0.25777071723483336, |
|
"learning_rate": 4.29855793729868e-06, |
|
"loss": 1.0596, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.4498466929478755, |
|
"grad_norm": 0.26127021300557535, |
|
"learning_rate": 4.267193509704438e-06, |
|
"loss": 1.0771, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.4520367936925098, |
|
"grad_norm": 0.25960438092364824, |
|
"learning_rate": 4.235912860927489e-06, |
|
"loss": 1.0543, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.4542268944371441, |
|
"grad_norm": 0.27211721141661616, |
|
"learning_rate": 4.204716448100967e-06, |
|
"loss": 1.0865, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.4564169951817783, |
|
"grad_norm": 0.25962589139875863, |
|
"learning_rate": 4.173604727126978e-06, |
|
"loss": 1.06, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.4586070959264126, |
|
"grad_norm": 0.2563513023704075, |
|
"learning_rate": 4.142578152669946e-06, |
|
"loss": 1.0633, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.460797196671047, |
|
"grad_norm": 0.2575843897502921, |
|
"learning_rate": 4.111637178149978e-06, |
|
"loss": 1.0527, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.462987297415681, |
|
"grad_norm": 0.26535361865514706, |
|
"learning_rate": 4.0807822557362305e-06, |
|
"loss": 1.0498, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.4651773981603153, |
|
"grad_norm": 0.2666398926972173, |
|
"learning_rate": 4.050013836340294e-06, |
|
"loss": 1.0668, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.4673674989049497, |
|
"grad_norm": 0.27206398230995277, |
|
"learning_rate": 4.019332369609608e-06, |
|
"loss": 1.0471, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.4695575996495838, |
|
"grad_norm": 0.26515734844142613, |
|
"learning_rate": 3.9887383039209045e-06, |
|
"loss": 1.0787, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.471747700394218, |
|
"grad_norm": 0.2627505561616997, |
|
"learning_rate": 3.9582320863736315e-06, |
|
"loss": 1.0564, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.4739378011388524, |
|
"grad_norm": 0.2602708632102437, |
|
"learning_rate": 3.927814162783431e-06, |
|
"loss": 1.0602, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.4761279018834865, |
|
"grad_norm": 0.2707223435716907, |
|
"learning_rate": 3.897484977675634e-06, |
|
"loss": 1.0525, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.4783180026281209, |
|
"grad_norm": 0.2627452484071492, |
|
"learning_rate": 3.867244974278741e-06, |
|
"loss": 1.0527, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.4805081033727552, |
|
"grad_norm": 0.26390966503788216, |
|
"learning_rate": 3.83709459451797e-06, |
|
"loss": 1.0367, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.4826982041173893, |
|
"grad_norm": 0.25536736348425665, |
|
"learning_rate": 3.80703427900877e-06, |
|
"loss": 1.0518, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.4848883048620236, |
|
"grad_norm": 0.2773298314887989, |
|
"learning_rate": 3.777064467050415e-06, |
|
"loss": 1.0789, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.487078405606658, |
|
"grad_norm": 0.2728892928772842, |
|
"learning_rate": 3.7471855966195556e-06, |
|
"loss": 1.0832, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.489268506351292, |
|
"grad_norm": 0.26553829473522816, |
|
"learning_rate": 3.7173981043638317e-06, |
|
"loss": 1.0607, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.4914586070959264, |
|
"grad_norm": 0.2694225835399303, |
|
"learning_rate": 3.687702425595485e-06, |
|
"loss": 1.0824, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.4936487078405607, |
|
"grad_norm": 0.26543120146820526, |
|
"learning_rate": 3.658098994285011e-06, |
|
"loss": 1.0738, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.4958388085851948, |
|
"grad_norm": 0.26713945932896793, |
|
"learning_rate": 3.628588243054807e-06, |
|
"loss": 1.048, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.4980289093298291, |
|
"grad_norm": 0.26162062775036266, |
|
"learning_rate": 3.59917060317284e-06, |
|
"loss": 1.0436, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.5002190100744635, |
|
"grad_norm": 0.26485362804622087, |
|
"learning_rate": 3.5698465045463594e-06, |
|
"loss": 1.0617, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.5024091108190976, |
|
"grad_norm": 0.2531141962202315, |
|
"learning_rate": 3.5406163757156177e-06, |
|
"loss": 1.0281, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.5045992115637319, |
|
"grad_norm": 0.26406689492465707, |
|
"learning_rate": 3.511480643847588e-06, |
|
"loss": 1.042, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.5067893123083662, |
|
"grad_norm": 0.26405636994206744, |
|
"learning_rate": 3.4824397347297356e-06, |
|
"loss": 1.0633, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.5089794130530003, |
|
"grad_norm": 0.26746329470551067, |
|
"learning_rate": 3.453494072763801e-06, |
|
"loss": 1.0584, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.5111695137976346, |
|
"grad_norm": 0.2617239083579329, |
|
"learning_rate": 3.4246440809595782e-06, |
|
"loss": 1.0818, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.513359614542269, |
|
"grad_norm": 0.2577442031154383, |
|
"learning_rate": 3.395890180928756e-06, |
|
"loss": 1.0635, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.515549715286903, |
|
"grad_norm": 0.25802319557110337, |
|
"learning_rate": 3.367232792878733e-06, |
|
"loss": 1.0387, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.5177398160315374, |
|
"grad_norm": 0.26395535667353776, |
|
"learning_rate": 3.338672335606501e-06, |
|
"loss": 1.0463, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.5199299167761717, |
|
"grad_norm": 0.2675824852661886, |
|
"learning_rate": 3.3102092264925034e-06, |
|
"loss": 1.0412, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.5221200175208058, |
|
"grad_norm": 0.2635866153828133, |
|
"learning_rate": 3.2818438814945443e-06, |
|
"loss": 1.0436, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.5243101182654402, |
|
"grad_norm": 0.2634172643790584, |
|
"learning_rate": 3.2535767151417196e-06, |
|
"loss": 1.0939, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.5265002190100745, |
|
"grad_norm": 0.26747117493686146, |
|
"learning_rate": 3.2254081405283368e-06, |
|
"loss": 1.0701, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.5286903197547086, |
|
"grad_norm": 0.26083933140484994, |
|
"learning_rate": 3.1973385693079053e-06, |
|
"loss": 1.0594, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.530880420499343, |
|
"grad_norm": 0.2631801552676203, |
|
"learning_rate": 3.1693684116870915e-06, |
|
"loss": 1.059, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.5330705212439772, |
|
"grad_norm": 0.2628999198155687, |
|
"learning_rate": 3.141498076419751e-06, |
|
"loss": 1.066, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5352606219886114, |
|
"grad_norm": 0.2673654861785837, |
|
"learning_rate": 3.113727970800935e-06, |
|
"loss": 1.0768, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.5374507227332457, |
|
"grad_norm": 0.2670303119820206, |
|
"learning_rate": 3.0860585006609476e-06, |
|
"loss": 1.0506, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.53964082347788, |
|
"grad_norm": 0.2579786497495102, |
|
"learning_rate": 3.0584900703594124e-06, |
|
"loss": 1.0613, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.541830924222514, |
|
"grad_norm": 0.26312720898642156, |
|
"learning_rate": 3.0310230827793698e-06, |
|
"loss": 1.0754, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.5440210249671484, |
|
"grad_norm": 0.27154475514562654, |
|
"learning_rate": 3.0036579393213738e-06, |
|
"loss": 1.0521, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.5462111257117828, |
|
"grad_norm": 0.2660044823931567, |
|
"learning_rate": 2.9763950398976494e-06, |
|
"loss": 1.0717, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.5484012264564169, |
|
"grad_norm": 0.25584411175808575, |
|
"learning_rate": 2.949234782926218e-06, |
|
"loss": 1.0689, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.5505913272010512, |
|
"grad_norm": 0.26504323222247284, |
|
"learning_rate": 2.9221775653251094e-06, |
|
"loss": 1.0561, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.5527814279456855, |
|
"grad_norm": 0.2621451181651719, |
|
"learning_rate": 2.89522378250653e-06, |
|
"loss": 1.076, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.5549715286903196, |
|
"grad_norm": 0.2633161653877227, |
|
"learning_rate": 2.8683738283711007e-06, |
|
"loss": 1.0471, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.557161629434954, |
|
"grad_norm": 0.2607111198003952, |
|
"learning_rate": 2.8416280953021036e-06, |
|
"loss": 1.0451, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.5593517301795883, |
|
"grad_norm": 0.25674063766814414, |
|
"learning_rate": 2.8149869741597323e-06, |
|
"loss": 1.032, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.5615418309242224, |
|
"grad_norm": 0.26291694066848564, |
|
"learning_rate": 2.7884508542754008e-06, |
|
"loss": 1.0594, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.563731931668857, |
|
"grad_norm": 0.26018069572571767, |
|
"learning_rate": 2.7620201234460296e-06, |
|
"loss": 1.0637, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.565922032413491, |
|
"grad_norm": 0.2598568109349574, |
|
"learning_rate": 2.735695167928405e-06, |
|
"loss": 1.0525, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.5681121331581251, |
|
"grad_norm": 0.2698254467493303, |
|
"learning_rate": 2.7094763724335084e-06, |
|
"loss": 1.0754, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.5703022339027597, |
|
"grad_norm": 0.26635873399369575, |
|
"learning_rate": 2.6833641201209083e-06, |
|
"loss": 1.0693, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.5724923346473938, |
|
"grad_norm": 0.26869051331331034, |
|
"learning_rate": 2.6573587925931676e-06, |
|
"loss": 1.0598, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.574682435392028, |
|
"grad_norm": 0.2659077562702536, |
|
"learning_rate": 2.631460769890248e-06, |
|
"loss": 1.0475, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.5768725361366625, |
|
"grad_norm": 0.2621915748487364, |
|
"learning_rate": 2.605670430483975e-06, |
|
"loss": 1.0725, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.5790626368812966, |
|
"grad_norm": 0.25199719840433615, |
|
"learning_rate": 2.579988151272489e-06, |
|
"loss": 1.035, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.5812527376259307, |
|
"grad_norm": 0.2576740526950345, |
|
"learning_rate": 2.5544143075747563e-06, |
|
"loss": 1.0592, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.5834428383705652, |
|
"grad_norm": 0.25887381760680483, |
|
"learning_rate": 2.5289492731250665e-06, |
|
"loss": 1.0541, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.5856329391151993, |
|
"grad_norm": 0.2630343324716044, |
|
"learning_rate": 2.503593420067579e-06, |
|
"loss": 1.0375, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.5878230398598334, |
|
"grad_norm": 0.2532971997224149, |
|
"learning_rate": 2.4783471189508945e-06, |
|
"loss": 1.0594, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.590013140604468, |
|
"grad_norm": 0.25821760865745746, |
|
"learning_rate": 2.4532107387226176e-06, |
|
"loss": 1.059, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.592203241349102, |
|
"grad_norm": 0.26225107109059886, |
|
"learning_rate": 2.42818464672398e-06, |
|
"loss": 1.0518, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.5943933420937362, |
|
"grad_norm": 0.2622443538659936, |
|
"learning_rate": 2.4032692086844755e-06, |
|
"loss": 1.043, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.5965834428383707, |
|
"grad_norm": 0.2638282719915154, |
|
"learning_rate": 2.378464788716498e-06, |
|
"loss": 1.059, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.5987735435830048, |
|
"grad_norm": 0.2697815422805798, |
|
"learning_rate": 2.3537717493100455e-06, |
|
"loss": 1.0818, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.600963644327639, |
|
"grad_norm": 0.2571237597778176, |
|
"learning_rate": 2.3291904513273976e-06, |
|
"loss": 1.0768, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.6031537450722735, |
|
"grad_norm": 0.2591593934526409, |
|
"learning_rate": 2.3047212539978515e-06, |
|
"loss": 1.0314, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.6053438458169076, |
|
"grad_norm": 0.25288794506056167, |
|
"learning_rate": 2.2803645149124853e-06, |
|
"loss": 1.0463, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.6075339465615417, |
|
"grad_norm": 0.26274198683291683, |
|
"learning_rate": 2.2561205900189064e-06, |
|
"loss": 1.0641, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.6097240473061762, |
|
"grad_norm": 0.2585964477343107, |
|
"learning_rate": 2.2319898336160782e-06, |
|
"loss": 1.0406, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.6119141480508103, |
|
"grad_norm": 0.26454251839814774, |
|
"learning_rate": 2.207972598349114e-06, |
|
"loss": 1.0473, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.6141042487954445, |
|
"grad_norm": 0.26167402823368013, |
|
"learning_rate": 2.184069235204149e-06, |
|
"loss": 1.0555, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.616294349540079, |
|
"grad_norm": 0.2617176344796563, |
|
"learning_rate": 2.160280093503193e-06, |
|
"loss": 1.0537, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.618484450284713, |
|
"grad_norm": 0.2557415990045297, |
|
"learning_rate": 2.136605520899029e-06, |
|
"loss": 1.0576, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.6206745510293472, |
|
"grad_norm": 0.26094617139109133, |
|
"learning_rate": 2.113045863370148e-06, |
|
"loss": 1.0738, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.6228646517739818, |
|
"grad_norm": 0.27449534463755304, |
|
"learning_rate": 2.0896014652156673e-06, |
|
"loss": 1.0518, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.6250547525186159, |
|
"grad_norm": 0.26830283731724036, |
|
"learning_rate": 2.0662726690503153e-06, |
|
"loss": 1.0445, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.62724485326325, |
|
"grad_norm": 0.26178239860462027, |
|
"learning_rate": 2.0430598157994263e-06, |
|
"loss": 1.0457, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.6294349540078845, |
|
"grad_norm": 0.26997488755625093, |
|
"learning_rate": 2.0199632446939523e-06, |
|
"loss": 1.0572, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.6316250547525186, |
|
"grad_norm": 0.26067940678020124, |
|
"learning_rate": 1.996983293265502e-06, |
|
"loss": 1.0721, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.6338151554971527, |
|
"grad_norm": 0.26125656461287955, |
|
"learning_rate": 1.9741202973414133e-06, |
|
"loss": 1.0678, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.6360052562417873, |
|
"grad_norm": 0.2640142873058846, |
|
"learning_rate": 1.9513745910398494e-06, |
|
"loss": 1.0588, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 1.6381953569864214, |
|
"grad_norm": 0.25790596663792437, |
|
"learning_rate": 1.928746506764909e-06, |
|
"loss": 1.05, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.6403854577310555, |
|
"grad_norm": 0.2610429685420584, |
|
"learning_rate": 1.9062363752017666e-06, |
|
"loss": 1.0607, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 1.64257555847569, |
|
"grad_norm": 0.2625385391766983, |
|
"learning_rate": 1.883844525311851e-06, |
|
"loss": 1.068, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.6447656592203241, |
|
"grad_norm": 0.26068949311696277, |
|
"learning_rate": 1.861571284328032e-06, |
|
"loss": 1.0451, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 1.6469557599649582, |
|
"grad_norm": 0.26311841661647273, |
|
"learning_rate": 1.8394169777498306e-06, |
|
"loss": 1.0588, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.6491458607095928, |
|
"grad_norm": 0.2618370787264574, |
|
"learning_rate": 1.817381929338673e-06, |
|
"loss": 1.0596, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 1.651335961454227, |
|
"grad_norm": 0.2624185244357276, |
|
"learning_rate": 1.7954664611131522e-06, |
|
"loss": 1.0578, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.653526062198861, |
|
"grad_norm": 0.2553246881271621, |
|
"learning_rate": 1.7736708933443335e-06, |
|
"loss": 1.0367, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.6557161629434956, |
|
"grad_norm": 0.2540067566235003, |
|
"learning_rate": 1.7519955445510562e-06, |
|
"loss": 1.073, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.6579062636881297, |
|
"grad_norm": 0.2606857530240499, |
|
"learning_rate": 1.7304407314952898e-06, |
|
"loss": 1.0631, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 1.6600963644327638, |
|
"grad_norm": 0.2613143652543251, |
|
"learning_rate": 1.709006769177508e-06, |
|
"loss": 1.0648, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.6622864651773983, |
|
"grad_norm": 0.2561120275008954, |
|
"learning_rate": 1.6876939708320806e-06, |
|
"loss": 1.0453, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 1.6644765659220324, |
|
"grad_norm": 0.2606335593974457, |
|
"learning_rate": 1.6665026479226908e-06, |
|
"loss": 1.0299, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.27032112797171276, |
|
"learning_rate": 1.6454331101377875e-06, |
|
"loss": 1.0395, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 1.668856767411301, |
|
"grad_norm": 0.2592791939318623, |
|
"learning_rate": 1.6244856653860696e-06, |
|
"loss": 1.0395, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.6710468681559352, |
|
"grad_norm": 0.2573885200586874, |
|
"learning_rate": 1.6036606197919703e-06, |
|
"loss": 1.0248, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 1.6732369689005693, |
|
"grad_norm": 0.2632705169188632, |
|
"learning_rate": 1.582958277691189e-06, |
|
"loss": 1.0627, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.6754270696452038, |
|
"grad_norm": 0.2585213551771273, |
|
"learning_rate": 1.5623789416262513e-06, |
|
"loss": 1.0445, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.677617170389838, |
|
"grad_norm": 0.2638970821888065, |
|
"learning_rate": 1.5419229123420799e-06, |
|
"loss": 1.0396, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.679807271134472, |
|
"grad_norm": 0.25489306481434604, |
|
"learning_rate": 1.5215904887815969e-06, |
|
"loss": 1.0545, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 1.6819973718791066, |
|
"grad_norm": 0.2570139660401704, |
|
"learning_rate": 1.5013819680813602e-06, |
|
"loss": 1.0393, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.6841874726237407, |
|
"grad_norm": 0.26986499526339125, |
|
"learning_rate": 1.4812976455672278e-06, |
|
"loss": 1.0561, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 1.6863775733683748, |
|
"grad_norm": 0.2539719372535899, |
|
"learning_rate": 1.4613378147500257e-06, |
|
"loss": 1.0738, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.6885676741130093, |
|
"grad_norm": 0.25998054837825935, |
|
"learning_rate": 1.4415027673212712e-06, |
|
"loss": 1.0789, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 1.6907577748576434, |
|
"grad_norm": 0.2683313676445113, |
|
"learning_rate": 1.4217927931488996e-06, |
|
"loss": 1.0592, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.6929478756022776, |
|
"grad_norm": 0.25855774771508333, |
|
"learning_rate": 1.4022081802730503e-06, |
|
"loss": 1.0514, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 1.695137976346912, |
|
"grad_norm": 0.2657756452696875, |
|
"learning_rate": 1.3827492149018285e-06, |
|
"loss": 1.073, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.6973280770915462, |
|
"grad_norm": 0.25795267450071285, |
|
"learning_rate": 1.363416181407139e-06, |
|
"loss": 1.0561, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.6995181778361803, |
|
"grad_norm": 0.2604007114404956, |
|
"learning_rate": 1.3442093623205243e-06, |
|
"loss": 1.0592, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.7017082785808149, |
|
"grad_norm": 0.2577655438176319, |
|
"learning_rate": 1.3251290383290493e-06, |
|
"loss": 1.0547, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 1.703898379325449, |
|
"grad_norm": 0.2594147352663477, |
|
"learning_rate": 1.3061754882711775e-06, |
|
"loss": 1.0514, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.706088480070083, |
|
"grad_norm": 0.26762797623037493, |
|
"learning_rate": 1.2873489891327096e-06, |
|
"loss": 1.0592, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 1.7082785808147176, |
|
"grad_norm": 0.25919122876440764, |
|
"learning_rate": 1.2686498160427384e-06, |
|
"loss": 1.0568, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.7104686815593517, |
|
"grad_norm": 0.25374064783512074, |
|
"learning_rate": 1.2500782422696211e-06, |
|
"loss": 1.0443, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 1.7126587823039858, |
|
"grad_norm": 0.2544658335365746, |
|
"learning_rate": 1.231634539216986e-06, |
|
"loss": 1.0439, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.7148488830486204, |
|
"grad_norm": 0.2602599493437291, |
|
"learning_rate": 1.2133189764197661e-06, |
|
"loss": 1.0725, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 1.7170389837932545, |
|
"grad_norm": 0.2544098863630771, |
|
"learning_rate": 1.1951318215402674e-06, |
|
"loss": 1.0559, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.7192290845378886, |
|
"grad_norm": 0.2666420834689722, |
|
"learning_rate": 1.1770733403642498e-06, |
|
"loss": 1.0688, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.7214191852825231, |
|
"grad_norm": 0.2571596859481218, |
|
"learning_rate": 1.1591437967970399e-06, |
|
"loss": 1.0619, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.7236092860271572, |
|
"grad_norm": 0.25709888915380735, |
|
"learning_rate": 1.1413434528596879e-06, |
|
"loss": 1.059, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 1.7257993867717913, |
|
"grad_norm": 0.265414809756402, |
|
"learning_rate": 1.1236725686851268e-06, |
|
"loss": 1.092, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.727989487516426, |
|
"grad_norm": 0.261455574018407, |
|
"learning_rate": 1.106131402514372e-06, |
|
"loss": 1.0342, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 1.73017958826106, |
|
"grad_norm": 0.2546962789705564, |
|
"learning_rate": 1.0887202106927485e-06, |
|
"loss": 1.035, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.732369689005694, |
|
"grad_norm": 0.25295413120526167, |
|
"learning_rate": 1.0714392476661518e-06, |
|
"loss": 1.0451, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 1.7345597897503287, |
|
"grad_norm": 0.26081673975148323, |
|
"learning_rate": 1.054288765977317e-06, |
|
"loss": 1.0471, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.7367498904949628, |
|
"grad_norm": 0.2622781961728633, |
|
"learning_rate": 1.0372690162621368e-06, |
|
"loss": 1.0703, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 1.7389399912395969, |
|
"grad_norm": 0.2639733492692751, |
|
"learning_rate": 1.0203802472459934e-06, |
|
"loss": 1.0783, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.7411300919842314, |
|
"grad_norm": 0.26422423738917283, |
|
"learning_rate": 1.003622705740136e-06, |
|
"loss": 1.0549, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.7433201927288655, |
|
"grad_norm": 0.25474550459007383, |
|
"learning_rate": 9.869966366380546e-07, |
|
"loss": 1.0742, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.7455102934734996, |
|
"grad_norm": 0.25885409380951035, |
|
"learning_rate": 9.70502282911915e-07, |
|
"loss": 1.0559, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 1.7477003942181342, |
|
"grad_norm": 0.26987519340353844, |
|
"learning_rate": 9.54139885609e-07, |
|
"loss": 1.0602, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.7498904949627683, |
|
"grad_norm": 0.2606876106625255, |
|
"learning_rate": 9.379096838481993e-07, |
|
"loss": 1.067, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 1.7520805957074024, |
|
"grad_norm": 0.2703733043459076, |
|
"learning_rate": 9.218119148165006e-07, |
|
"loss": 1.0918, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.754270696452037, |
|
"grad_norm": 0.2594581284532797, |
|
"learning_rate": 9.058468137655251e-07, |
|
"loss": 1.0652, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 1.756460797196671, |
|
"grad_norm": 0.25391150674998686, |
|
"learning_rate": 8.900146140081045e-07, |
|
"loss": 1.0443, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.7586508979413051, |
|
"grad_norm": 0.26854247497016676, |
|
"learning_rate": 8.743155469148556e-07, |
|
"loss": 1.0961, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 1.7608409986859397, |
|
"grad_norm": 0.25915592104435276, |
|
"learning_rate": 8.587498419108009e-07, |
|
"loss": 1.035, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.7630310994305738, |
|
"grad_norm": 0.26010447740581344, |
|
"learning_rate": 8.433177264720205e-07, |
|
"loss": 1.0627, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.765221200175208, |
|
"grad_norm": 0.2610936282674673, |
|
"learning_rate": 8.280194261223318e-07, |
|
"loss": 1.0422, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.7674113009198424, |
|
"grad_norm": 0.25929224377828625, |
|
"learning_rate": 8.128551644299809e-07, |
|
"loss": 1.0715, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 1.7696014016644765, |
|
"grad_norm": 0.2786652487296327, |
|
"learning_rate": 7.978251630043854e-07, |
|
"loss": 1.0668, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.7717915024091109, |
|
"grad_norm": 0.2625714433234073, |
|
"learning_rate": 7.829296414928944e-07, |
|
"loss": 1.0465, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 1.7739816031537452, |
|
"grad_norm": 0.2516813690067178, |
|
"learning_rate": 7.681688175775792e-07, |
|
"loss": 1.0225, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.7761717038983793, |
|
"grad_norm": 0.2562341997538078, |
|
"learning_rate": 7.535429069720446e-07, |
|
"loss": 1.0557, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 1.7783618046430136, |
|
"grad_norm": 0.2660365664245146, |
|
"learning_rate": 7.390521234182835e-07, |
|
"loss": 1.0758, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.780551905387648, |
|
"grad_norm": 0.2611768825441285, |
|
"learning_rate": 7.246966786835563e-07, |
|
"loss": 1.0311, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 1.782742006132282, |
|
"grad_norm": 0.25377711508428263, |
|
"learning_rate": 7.104767825572878e-07, |
|
"loss": 1.0586, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.7849321068769164, |
|
"grad_norm": 0.25560505594472754, |
|
"learning_rate": 6.96392642848005e-07, |
|
"loss": 1.0762, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.7871222076215507, |
|
"grad_norm": 0.2540736575686416, |
|
"learning_rate": 6.82444465380303e-07, |
|
"loss": 1.0582, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.7893123083661848, |
|
"grad_norm": 0.26520844557122947, |
|
"learning_rate": 6.686324539918343e-07, |
|
"loss": 1.0639, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 1.7915024091108191, |
|
"grad_norm": 0.2605220753523269, |
|
"learning_rate": 6.549568105303283e-07, |
|
"loss": 1.0344, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.7936925098554535, |
|
"grad_norm": 0.2646533823325038, |
|
"learning_rate": 6.414177348506423e-07, |
|
"loss": 1.0582, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 1.7958826106000876, |
|
"grad_norm": 0.26517750832452447, |
|
"learning_rate": 6.280154248118475e-07, |
|
"loss": 1.0656, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.798072711344722, |
|
"grad_norm": 0.25435449716739056, |
|
"learning_rate": 6.147500762743263e-07, |
|
"loss": 1.0574, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 1.8002628120893562, |
|
"grad_norm": 0.2649605554429642, |
|
"learning_rate": 6.0162188309692e-07, |
|
"loss": 1.0723, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.8024529128339903, |
|
"grad_norm": 0.2594557118791885, |
|
"learning_rate": 5.886310371340853e-07, |
|
"loss": 1.0688, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 1.8046430135786247, |
|
"grad_norm": 0.2547123772715979, |
|
"learning_rate": 5.757777282331034e-07, |
|
"loss": 1.0748, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.806833114323259, |
|
"grad_norm": 0.2549766396239622, |
|
"learning_rate": 5.630621442312978e-07, |
|
"loss": 1.0711, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.809023215067893, |
|
"grad_norm": 0.2552394207988368, |
|
"learning_rate": 5.504844709532864e-07, |
|
"loss": 1.0443, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.8112133158125274, |
|
"grad_norm": 0.2568508170285463, |
|
"learning_rate": 5.380448922082726e-07, |
|
"loss": 1.0686, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 1.8134034165571618, |
|
"grad_norm": 0.26793785186152136, |
|
"learning_rate": 5.257435897873564e-07, |
|
"loss": 1.0627, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.8155935173017959, |
|
"grad_norm": 0.2556345040237505, |
|
"learning_rate": 5.135807434608764e-07, |
|
"loss": 1.0824, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 1.8177836180464302, |
|
"grad_norm": 0.2522974784155926, |
|
"learning_rate": 5.015565309757841e-07, |
|
"loss": 1.0561, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.8199737187910645, |
|
"grad_norm": 0.25993666225105416, |
|
"learning_rate": 4.896711280530475e-07, |
|
"loss": 1.0658, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 1.8221638195356986, |
|
"grad_norm": 0.25711695701360565, |
|
"learning_rate": 4.779247083850814e-07, |
|
"loss": 1.0594, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.824353920280333, |
|
"grad_norm": 0.2582563617934845, |
|
"learning_rate": 4.6631744363320964e-07, |
|
"loss": 1.0373, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 1.8265440210249673, |
|
"grad_norm": 0.2575689764561888, |
|
"learning_rate": 4.548495034251521e-07, |
|
"loss": 1.0604, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.8287341217696014, |
|
"grad_norm": 0.2599323407971448, |
|
"learning_rate": 4.435210553525571e-07, |
|
"loss": 1.0646, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.8309242225142357, |
|
"grad_norm": 0.2598200040268689, |
|
"learning_rate": 4.323322649685391e-07, |
|
"loss": 1.0629, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.83311432325887, |
|
"grad_norm": 0.2617388138565943, |
|
"learning_rate": 4.2128329578526636e-07, |
|
"loss": 1.065, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 1.8353044240035041, |
|
"grad_norm": 0.2525471022943904, |
|
"learning_rate": 4.1037430927157507e-07, |
|
"loss": 1.0395, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.8374945247481385, |
|
"grad_norm": 0.2673015600105575, |
|
"learning_rate": 3.996054648505965e-07, |
|
"loss": 1.0555, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 1.8396846254927728, |
|
"grad_norm": 0.25746822209162834, |
|
"learning_rate": 3.8897691989744467e-07, |
|
"loss": 1.0723, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.841874726237407, |
|
"grad_norm": 0.259721450178856, |
|
"learning_rate": 3.784888297368994e-07, |
|
"loss": 1.0324, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 1.8440648269820412, |
|
"grad_norm": 0.2557254088083905, |
|
"learning_rate": 3.6814134764114997e-07, |
|
"loss": 1.0424, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.8462549277266755, |
|
"grad_norm": 0.26821986701418704, |
|
"learning_rate": 3.5793462482754613e-07, |
|
"loss": 1.0525, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 1.8484450284713096, |
|
"grad_norm": 0.26526821999725914, |
|
"learning_rate": 3.478688104563943e-07, |
|
"loss": 1.0332, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.850635129215944, |
|
"grad_norm": 0.2657264529801212, |
|
"learning_rate": 3.379440516287724e-07, |
|
"loss": 1.0424, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.8528252299605783, |
|
"grad_norm": 0.2622695643818941, |
|
"learning_rate": 3.281604933843852e-07, |
|
"loss": 1.067, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.8550153307052124, |
|
"grad_norm": 0.2621090614558729, |
|
"learning_rate": 3.185182786994423e-07, |
|
"loss": 1.0805, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 1.8572054314498467, |
|
"grad_norm": 0.26019161340403885, |
|
"learning_rate": 3.090175484845681e-07, |
|
"loss": 1.066, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.859395532194481, |
|
"grad_norm": 0.2526519420768719, |
|
"learning_rate": 2.996584415827419e-07, |
|
"loss": 1.0564, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 1.8615856329391152, |
|
"grad_norm": 0.2623067668598087, |
|
"learning_rate": 2.904410947672731e-07, |
|
"loss": 1.0965, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.8637757336837495, |
|
"grad_norm": 0.2665677286699009, |
|
"learning_rate": 2.8136564273979816e-07, |
|
"loss": 1.0484, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 1.8659658344283838, |
|
"grad_norm": 0.2570070789968646, |
|
"learning_rate": 2.724322181283112e-07, |
|
"loss": 1.0926, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.868155935173018, |
|
"grad_norm": 0.25969876794186386, |
|
"learning_rate": 2.6364095148523114e-07, |
|
"loss": 1.0557, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 1.8703460359176522, |
|
"grad_norm": 0.25841316628132993, |
|
"learning_rate": 2.549919712854909e-07, |
|
"loss": 1.0721, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.8725361366622866, |
|
"grad_norm": 0.2622532841527044, |
|
"learning_rate": 2.4648540392465783e-07, |
|
"loss": 1.042, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.8747262374069207, |
|
"grad_norm": 0.262227535263612, |
|
"learning_rate": 2.3812137371708732e-07, |
|
"loss": 1.075, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.876916338151555, |
|
"grad_norm": 0.26352495433091844, |
|
"learning_rate": 2.299000028941112e-07, |
|
"loss": 1.0654, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 1.8791064388961893, |
|
"grad_norm": 0.26358888945101755, |
|
"learning_rate": 2.2182141160224325e-07, |
|
"loss": 1.035, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.8812965396408234, |
|
"grad_norm": 0.2653279390073888, |
|
"learning_rate": 2.1388571790142865e-07, |
|
"loss": 1.0668, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 1.8834866403854578, |
|
"grad_norm": 0.257677335409702, |
|
"learning_rate": 2.0609303776332078e-07, |
|
"loss": 1.0529, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.885676741130092, |
|
"grad_norm": 0.26428455446729876, |
|
"learning_rate": 1.9844348506957824e-07, |
|
"loss": 1.0467, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 1.8878668418747262, |
|
"grad_norm": 0.26294409986685496, |
|
"learning_rate": 1.909371716102093e-07, |
|
"loss": 1.0568, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.8900569426193605, |
|
"grad_norm": 0.26329149601630425, |
|
"learning_rate": 1.835742070819335e-07, |
|
"loss": 1.0672, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 1.8922470433639949, |
|
"grad_norm": 0.26651307963750537, |
|
"learning_rate": 1.7635469908657832e-07, |
|
"loss": 1.0617, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.894437144108629, |
|
"grad_norm": 0.2663947225115879, |
|
"learning_rate": 1.6927875312950927e-07, |
|
"loss": 1.0506, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.8966272448532633, |
|
"grad_norm": 0.2534729439266378, |
|
"learning_rate": 1.6234647261808678e-07, |
|
"loss": 1.0457, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.8988173455978976, |
|
"grad_norm": 0.25515531687003673, |
|
"learning_rate": 1.555579588601519e-07, |
|
"loss": 1.0537, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 1.9010074463425317, |
|
"grad_norm": 0.2598580190509047, |
|
"learning_rate": 1.489133110625529e-07, |
|
"loss": 1.0549, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.903197547087166, |
|
"grad_norm": 0.25780277018916953, |
|
"learning_rate": 1.42412626329691e-07, |
|
"loss": 1.0813, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 1.9053876478318004, |
|
"grad_norm": 0.25921414992499353, |
|
"learning_rate": 1.3605599966209803e-07, |
|
"loss": 1.0811, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.9075777485764345, |
|
"grad_norm": 0.2632565173184216, |
|
"learning_rate": 1.298435239550544e-07, |
|
"loss": 1.0566, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 1.9097678493210688, |
|
"grad_norm": 0.25942353454065054, |
|
"learning_rate": 1.2377528999723e-07, |
|
"loss": 1.0555, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.9119579500657031, |
|
"grad_norm": 0.25904309238444745, |
|
"learning_rate": 1.1785138646935313e-07, |
|
"loss": 1.0555, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 1.9141480508103372, |
|
"grad_norm": 0.2592095253843778, |
|
"learning_rate": 1.1207189994291934e-07, |
|
"loss": 1.0545, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.9163381515549716, |
|
"grad_norm": 0.2575477136605174, |
|
"learning_rate": 1.0643691487892572e-07, |
|
"loss": 1.0414, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.9185282522996059, |
|
"grad_norm": 0.256629839507532, |
|
"learning_rate": 1.0094651362663299e-07, |
|
"loss": 1.0807, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.92071835304424, |
|
"grad_norm": 0.2631594825530231, |
|
"learning_rate": 9.560077642236765e-08, |
|
"loss": 1.0635, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 1.9229084537888743, |
|
"grad_norm": 0.25757118647856275, |
|
"learning_rate": 9.039978138834282e-08, |
|
"loss": 1.0559, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.9250985545335086, |
|
"grad_norm": 0.25635783319220967, |
|
"learning_rate": 8.534360453152369e-08, |
|
"loss": 1.0736, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 1.9272886552781427, |
|
"grad_norm": 0.26497079430057574, |
|
"learning_rate": 8.043231974250942e-08, |
|
"loss": 1.0393, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.929478756022777, |
|
"grad_norm": 0.2574631334108864, |
|
"learning_rate": 7.566599879445968e-08, |
|
"loss": 1.0646, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 1.9316688567674114, |
|
"grad_norm": 0.25966994233199864, |
|
"learning_rate": 7.104471134204205e-08, |
|
"loss": 1.0629, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.9338589575120455, |
|
"grad_norm": 0.259903869118688, |
|
"learning_rate": 6.656852492041621e-08, |
|
"loss": 1.0799, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 1.9360490582566798, |
|
"grad_norm": 0.26803973886426774, |
|
"learning_rate": 6.223750494424363e-08, |
|
"loss": 1.0887, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.9382391590013142, |
|
"grad_norm": 0.254641347090638, |
|
"learning_rate": 5.8051714706737203e-08, |
|
"loss": 1.048, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.9404292597459483, |
|
"grad_norm": 0.2561371140140117, |
|
"learning_rate": 5.401121537872978e-08, |
|
"loss": 1.0555, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.9426193604905826, |
|
"grad_norm": 0.25821846272770893, |
|
"learning_rate": 5.011606600778596e-08, |
|
"loss": 1.0459, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 1.944809461235217, |
|
"grad_norm": 0.2713573159900797, |
|
"learning_rate": 4.636632351733394e-08, |
|
"loss": 1.0768, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.946999561979851, |
|
"grad_norm": 0.24621746440958298, |
|
"learning_rate": 4.27620427058395e-08, |
|
"loss": 1.0227, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 1.9491896627244854, |
|
"grad_norm": 0.26617144432411904, |
|
"learning_rate": 3.930327624599994e-08, |
|
"loss": 1.0582, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.9513797634691197, |
|
"grad_norm": 0.2588098327195591, |
|
"learning_rate": 3.599007468397586e-08, |
|
"loss": 1.0408, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 1.9535698642137538, |
|
"grad_norm": 0.2588717220296684, |
|
"learning_rate": 3.282248643865504e-08, |
|
"loss": 1.0371, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.955759964958388, |
|
"grad_norm": 0.2612067459198219, |
|
"learning_rate": 2.9800557800941932e-08, |
|
"loss": 1.06, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 1.9579500657030224, |
|
"grad_norm": 0.26040948454378465, |
|
"learning_rate": 2.69243329330815e-08, |
|
"loss": 1.0809, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.9601401664476565, |
|
"grad_norm": 0.2587545182578282, |
|
"learning_rate": 2.4193853868014206e-08, |
|
"loss": 1.0627, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.9623302671922909, |
|
"grad_norm": 0.2601426689640191, |
|
"learning_rate": 2.160916050876427e-08, |
|
"loss": 1.0479, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.9645203679369252, |
|
"grad_norm": 0.27136109068065567, |
|
"learning_rate": 1.9170290627851253e-08, |
|
"loss": 1.0672, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 1.9667104686815593, |
|
"grad_norm": 0.25835360666843193, |
|
"learning_rate": 1.68772798667427e-08, |
|
"loss": 1.0355, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.9689005694261936, |
|
"grad_norm": 0.25394949222511376, |
|
"learning_rate": 1.4730161735331262e-08, |
|
"loss": 1.0496, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 1.971090670170828, |
|
"grad_norm": 0.25972727406913415, |
|
"learning_rate": 1.2728967611445042e-08, |
|
"loss": 1.0676, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.973280770915462, |
|
"grad_norm": 0.28013541605837033, |
|
"learning_rate": 1.0873726740390223e-08, |
|
"loss": 1.0594, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 1.9754708716600964, |
|
"grad_norm": 0.2582001095059419, |
|
"learning_rate": 9.164466234521385e-09, |
|
"loss": 1.0531, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.9776609724047307, |
|
"grad_norm": 0.2637625227746746, |
|
"learning_rate": 7.601211072846282e-09, |
|
"loss": 1.0441, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 1.9798510731493648, |
|
"grad_norm": 0.2656010194757306, |
|
"learning_rate": 6.183984100663898e-09, |
|
"loss": 1.0732, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.9820411738939991, |
|
"grad_norm": 0.2557112398087727, |
|
"learning_rate": 4.912806029225836e-09, |
|
"loss": 1.049, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.9842312746386335, |
|
"grad_norm": 0.2640281308017009, |
|
"learning_rate": 3.787695435434336e-09, |
|
"loss": 1.0367, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.9864213753832676, |
|
"grad_norm": 0.2550981215142995, |
|
"learning_rate": 2.808668761576927e-09, |
|
"loss": 1.0549, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 1.988611476127902, |
|
"grad_norm": 0.2611486955899783, |
|
"learning_rate": 1.975740315075525e-09, |
|
"loss": 1.0496, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.9908015768725362, |
|
"grad_norm": 0.26343127951064566, |
|
"learning_rate": 1.2889222682865854e-09, |
|
"loss": 1.048, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 1.9929916776171703, |
|
"grad_norm": 0.2617943886231076, |
|
"learning_rate": 7.482246583201402e-10, |
|
"loss": 1.0607, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.9951817783618047, |
|
"grad_norm": 0.267332987436094, |
|
"learning_rate": 3.5365538689102754e-10, |
|
"loss": 1.0668, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 1.997371879106439, |
|
"grad_norm": 0.25204221832833196, |
|
"learning_rate": 1.0522022020564848e-10, |
|
"loss": 1.0562, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.999561979851073, |
|
"grad_norm": 0.25875087028369437, |
|
"learning_rate": 2.9227888775906764e-12, |
|
"loss": 1.0584, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 4566, |
|
"total_flos": 432050892963840.0, |
|
"train_loss": 1.1768484963589576, |
|
"train_runtime": 15595.6601, |
|
"train_samples_per_second": 18.73, |
|
"train_steps_per_second": 0.293 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4566, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 432050892963840.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|