|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 6849, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004380201489268506, |
|
"grad_norm": 244.05783349993243, |
|
"learning_rate": 2.9197080291970803e-08, |
|
"loss": 8.7812, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002190100744634253, |
|
"grad_norm": 242.80510323947792, |
|
"learning_rate": 1.4598540145985402e-07, |
|
"loss": 8.8047, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004380201489268506, |
|
"grad_norm": 225.0335670574099, |
|
"learning_rate": 2.9197080291970804e-07, |
|
"loss": 8.7531, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006570302233902759, |
|
"grad_norm": 228.72582215453716, |
|
"learning_rate": 4.379562043795621e-07, |
|
"loss": 8.7125, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.008760402978537012, |
|
"grad_norm": 202.96584235540402, |
|
"learning_rate": 5.839416058394161e-07, |
|
"loss": 8.5219, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010950503723171266, |
|
"grad_norm": 125.01523628364068, |
|
"learning_rate": 7.299270072992701e-07, |
|
"loss": 8.2953, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.013140604467805518, |
|
"grad_norm": 110.40505502297347, |
|
"learning_rate": 8.759124087591242e-07, |
|
"loss": 7.7859, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.015330705212439772, |
|
"grad_norm": 68.45912276297945, |
|
"learning_rate": 1.0218978102189781e-06, |
|
"loss": 7.3016, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.017520805957074025, |
|
"grad_norm": 55.590663092336015, |
|
"learning_rate": 1.1678832116788322e-06, |
|
"loss": 6.7047, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01971090670170828, |
|
"grad_norm": 40.0844953425019, |
|
"learning_rate": 1.3138686131386864e-06, |
|
"loss": 6.3906, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.021901007446342532, |
|
"grad_norm": 34.48948950173287, |
|
"learning_rate": 1.4598540145985402e-06, |
|
"loss": 5.575, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.024091108190976786, |
|
"grad_norm": 31.68892363070654, |
|
"learning_rate": 1.6058394160583942e-06, |
|
"loss": 5.1031, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.026281208935611037, |
|
"grad_norm": 24.216002249213382, |
|
"learning_rate": 1.7518248175182485e-06, |
|
"loss": 4.5187, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02847130968024529, |
|
"grad_norm": 16.153964519214966, |
|
"learning_rate": 1.8978102189781023e-06, |
|
"loss": 3.6203, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.030661410424879545, |
|
"grad_norm": 13.836195759408959, |
|
"learning_rate": 2.0437956204379563e-06, |
|
"loss": 3.1273, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0328515111695138, |
|
"grad_norm": 11.652340272071394, |
|
"learning_rate": 2.1897810218978103e-06, |
|
"loss": 2.6828, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03504161191414805, |
|
"grad_norm": 6.333091358966209, |
|
"learning_rate": 2.3357664233576643e-06, |
|
"loss": 2.2922, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03723171265878231, |
|
"grad_norm": 2.9939785076399814, |
|
"learning_rate": 2.4817518248175183e-06, |
|
"loss": 2.0016, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03942181340341656, |
|
"grad_norm": 2.6606355077846895, |
|
"learning_rate": 2.627737226277373e-06, |
|
"loss": 1.8633, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04161191414805081, |
|
"grad_norm": 1.2346022297538288, |
|
"learning_rate": 2.7737226277372264e-06, |
|
"loss": 1.7285, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.043802014892685065, |
|
"grad_norm": 0.9865905253164456, |
|
"learning_rate": 2.9197080291970804e-06, |
|
"loss": 1.5969, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.045992115637319315, |
|
"grad_norm": 0.8076275483624458, |
|
"learning_rate": 3.065693430656935e-06, |
|
"loss": 1.5781, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.04818221638195357, |
|
"grad_norm": 0.6036339872996322, |
|
"learning_rate": 3.2116788321167884e-06, |
|
"loss": 1.5227, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05037231712658782, |
|
"grad_norm": 0.5227962690437455, |
|
"learning_rate": 3.3576642335766425e-06, |
|
"loss": 1.466, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.052562417871222074, |
|
"grad_norm": 0.5546021102398396, |
|
"learning_rate": 3.503649635036497e-06, |
|
"loss": 1.4375, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05475251861585633, |
|
"grad_norm": 0.5037444771795104, |
|
"learning_rate": 3.6496350364963505e-06, |
|
"loss": 1.4559, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05694261936049058, |
|
"grad_norm": 0.4581953982365608, |
|
"learning_rate": 3.7956204379562045e-06, |
|
"loss": 1.3863, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05913272010512484, |
|
"grad_norm": 0.42553885665065, |
|
"learning_rate": 3.9416058394160585e-06, |
|
"loss": 1.3832, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06132282084975909, |
|
"grad_norm": 0.4179093460042933, |
|
"learning_rate": 4.0875912408759126e-06, |
|
"loss": 1.3824, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06351292159439334, |
|
"grad_norm": 0.40273353738694206, |
|
"learning_rate": 4.233576642335767e-06, |
|
"loss": 1.3465, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0657030223390276, |
|
"grad_norm": 0.41160581205347213, |
|
"learning_rate": 4.379562043795621e-06, |
|
"loss": 1.3945, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06789312308366185, |
|
"grad_norm": 0.3949551182499691, |
|
"learning_rate": 4.525547445255475e-06, |
|
"loss": 1.3434, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0700832238282961, |
|
"grad_norm": 0.387740320945072, |
|
"learning_rate": 4.671532846715329e-06, |
|
"loss": 1.3551, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07227332457293036, |
|
"grad_norm": 0.37700600156165553, |
|
"learning_rate": 4.8175182481751835e-06, |
|
"loss": 1.3637, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07446342531756461, |
|
"grad_norm": 0.36760005726354067, |
|
"learning_rate": 4.963503649635037e-06, |
|
"loss": 1.3211, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07665352606219886, |
|
"grad_norm": 0.36407987579057216, |
|
"learning_rate": 5.1094890510948916e-06, |
|
"loss": 1.3168, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07884362680683311, |
|
"grad_norm": 0.3639367357256408, |
|
"learning_rate": 5.255474452554746e-06, |
|
"loss": 1.3477, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08103372755146737, |
|
"grad_norm": 0.3695352411819165, |
|
"learning_rate": 5.401459854014599e-06, |
|
"loss": 1.3062, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08322382829610162, |
|
"grad_norm": 0.33882020026658927, |
|
"learning_rate": 5.547445255474453e-06, |
|
"loss": 1.3195, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08541392904073587, |
|
"grad_norm": 0.3519909386154269, |
|
"learning_rate": 5.693430656934307e-06, |
|
"loss": 1.298, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.08760402978537013, |
|
"grad_norm": 0.3677025946352492, |
|
"learning_rate": 5.839416058394161e-06, |
|
"loss": 1.3055, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08979413053000437, |
|
"grad_norm": 0.3553999324591866, |
|
"learning_rate": 5.985401459854016e-06, |
|
"loss": 1.3102, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09198423127463863, |
|
"grad_norm": 0.339876890506739, |
|
"learning_rate": 6.13138686131387e-06, |
|
"loss": 1.2914, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09417433201927289, |
|
"grad_norm": 0.36515602431235566, |
|
"learning_rate": 6.277372262773723e-06, |
|
"loss": 1.2844, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.09636443276390715, |
|
"grad_norm": 0.3937869670816605, |
|
"learning_rate": 6.423357664233577e-06, |
|
"loss": 1.3137, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09855453350854139, |
|
"grad_norm": 0.34727218148619815, |
|
"learning_rate": 6.569343065693431e-06, |
|
"loss": 1.2637, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10074463425317565, |
|
"grad_norm": 0.3582494230319763, |
|
"learning_rate": 6.715328467153285e-06, |
|
"loss": 1.2852, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1029347349978099, |
|
"grad_norm": 0.3570600153368335, |
|
"learning_rate": 6.86131386861314e-06, |
|
"loss": 1.2887, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.10512483574244415, |
|
"grad_norm": 0.3346226350011156, |
|
"learning_rate": 7.007299270072994e-06, |
|
"loss": 1.2633, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1073149364870784, |
|
"grad_norm": 0.33400332391330406, |
|
"learning_rate": 7.153284671532848e-06, |
|
"loss": 1.2336, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.10950503723171266, |
|
"grad_norm": 0.3315126078930196, |
|
"learning_rate": 7.299270072992701e-06, |
|
"loss": 1.2664, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1116951379763469, |
|
"grad_norm": 0.3709768940217129, |
|
"learning_rate": 7.445255474452555e-06, |
|
"loss": 1.2621, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.11388523872098116, |
|
"grad_norm": 0.32948286354246736, |
|
"learning_rate": 7.591240875912409e-06, |
|
"loss": 1.2867, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11607533946561542, |
|
"grad_norm": 0.33294579346715825, |
|
"learning_rate": 7.737226277372264e-06, |
|
"loss": 1.2348, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.11826544021024968, |
|
"grad_norm": 0.323499147467889, |
|
"learning_rate": 7.883211678832117e-06, |
|
"loss": 1.2281, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12045554095488392, |
|
"grad_norm": 0.3615814763070892, |
|
"learning_rate": 8.029197080291972e-06, |
|
"loss": 1.2637, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12264564169951818, |
|
"grad_norm": 0.3538080065403599, |
|
"learning_rate": 8.175182481751825e-06, |
|
"loss": 1.2617, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12483574244415244, |
|
"grad_norm": 0.336790971357565, |
|
"learning_rate": 8.32116788321168e-06, |
|
"loss": 1.2527, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.12702584318878668, |
|
"grad_norm": 0.37615337138920746, |
|
"learning_rate": 8.467153284671533e-06, |
|
"loss": 1.2527, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12921594393342092, |
|
"grad_norm": 0.33532555778207584, |
|
"learning_rate": 8.613138686131386e-06, |
|
"loss": 1.223, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.1314060446780552, |
|
"grad_norm": 0.33649845965070907, |
|
"learning_rate": 8.759124087591241e-06, |
|
"loss": 1.2332, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13359614542268944, |
|
"grad_norm": 0.3202250374399337, |
|
"learning_rate": 8.905109489051096e-06, |
|
"loss": 1.2219, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.1357862461673237, |
|
"grad_norm": 0.34514025665060105, |
|
"learning_rate": 9.05109489051095e-06, |
|
"loss": 1.2477, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13797634691195795, |
|
"grad_norm": 0.33020924127428763, |
|
"learning_rate": 9.197080291970804e-06, |
|
"loss": 1.2105, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.1401664476565922, |
|
"grad_norm": 0.35152454516434467, |
|
"learning_rate": 9.343065693430657e-06, |
|
"loss": 1.1941, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14235654840122647, |
|
"grad_norm": 0.316946041818773, |
|
"learning_rate": 9.48905109489051e-06, |
|
"loss": 1.2348, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1445466491458607, |
|
"grad_norm": 0.3171098592158982, |
|
"learning_rate": 9.635036496350367e-06, |
|
"loss": 1.2172, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14673674989049496, |
|
"grad_norm": 0.32347075273927617, |
|
"learning_rate": 9.78102189781022e-06, |
|
"loss": 1.2082, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.14892685063512923, |
|
"grad_norm": 0.31992621701805507, |
|
"learning_rate": 9.927007299270073e-06, |
|
"loss": 1.2039, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15111695137976347, |
|
"grad_norm": 0.33669777465454753, |
|
"learning_rate": 1.0072992700729928e-05, |
|
"loss": 1.2199, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.1533070521243977, |
|
"grad_norm": 0.3242034424279669, |
|
"learning_rate": 1.0218978102189783e-05, |
|
"loss": 1.2219, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15549715286903198, |
|
"grad_norm": 0.3383473976099628, |
|
"learning_rate": 1.0364963503649636e-05, |
|
"loss": 1.2082, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.15768725361366623, |
|
"grad_norm": 0.3375586020448848, |
|
"learning_rate": 1.0510948905109491e-05, |
|
"loss": 1.2156, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15987735435830047, |
|
"grad_norm": 0.3277741096524666, |
|
"learning_rate": 1.0656934306569344e-05, |
|
"loss": 1.207, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.16206745510293474, |
|
"grad_norm": 0.3337717738586561, |
|
"learning_rate": 1.0802919708029198e-05, |
|
"loss": 1.2176, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.164257555847569, |
|
"grad_norm": 0.34816058383267184, |
|
"learning_rate": 1.0948905109489052e-05, |
|
"loss": 1.2023, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.16644765659220323, |
|
"grad_norm": 0.31393085773851587, |
|
"learning_rate": 1.1094890510948906e-05, |
|
"loss": 1.2066, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1686377573368375, |
|
"grad_norm": 0.34170466792783555, |
|
"learning_rate": 1.124087591240876e-05, |
|
"loss": 1.2457, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.17082785808147175, |
|
"grad_norm": 0.32289309385714443, |
|
"learning_rate": 1.1386861313868614e-05, |
|
"loss": 1.2234, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.173017958826106, |
|
"grad_norm": 0.31607508887925223, |
|
"learning_rate": 1.1532846715328467e-05, |
|
"loss": 1.2184, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.17520805957074026, |
|
"grad_norm": 0.3324717224401286, |
|
"learning_rate": 1.1678832116788322e-05, |
|
"loss": 1.2223, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1773981603153745, |
|
"grad_norm": 0.29789000161051715, |
|
"learning_rate": 1.1824817518248176e-05, |
|
"loss": 1.1777, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.17958826106000875, |
|
"grad_norm": 0.31662586926381675, |
|
"learning_rate": 1.1970802919708031e-05, |
|
"loss": 1.198, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18177836180464302, |
|
"grad_norm": 0.31124419804470477, |
|
"learning_rate": 1.2116788321167885e-05, |
|
"loss": 1.1855, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.18396846254927726, |
|
"grad_norm": 0.31302760748427677, |
|
"learning_rate": 1.226277372262774e-05, |
|
"loss": 1.1934, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18615856329391153, |
|
"grad_norm": 0.34309852924466416, |
|
"learning_rate": 1.2408759124087593e-05, |
|
"loss": 1.2152, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.18834866403854578, |
|
"grad_norm": 0.319117428903726, |
|
"learning_rate": 1.2554744525547446e-05, |
|
"loss": 1.2125, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19053876478318002, |
|
"grad_norm": 0.33718547337261756, |
|
"learning_rate": 1.27007299270073e-05, |
|
"loss": 1.207, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.1927288655278143, |
|
"grad_norm": 0.32668740457102435, |
|
"learning_rate": 1.2846715328467154e-05, |
|
"loss": 1.1836, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19491896627244854, |
|
"grad_norm": 0.32088829919202544, |
|
"learning_rate": 1.2992700729927009e-05, |
|
"loss": 1.1941, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.19710906701708278, |
|
"grad_norm": 0.3431063641531018, |
|
"learning_rate": 1.3138686131386862e-05, |
|
"loss": 1.2031, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19929916776171705, |
|
"grad_norm": 0.3268567982657968, |
|
"learning_rate": 1.3284671532846715e-05, |
|
"loss": 1.1941, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.2014892685063513, |
|
"grad_norm": 0.31848928369863305, |
|
"learning_rate": 1.343065693430657e-05, |
|
"loss": 1.1812, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.20367936925098554, |
|
"grad_norm": 0.31588925727105266, |
|
"learning_rate": 1.3576642335766423e-05, |
|
"loss": 1.1785, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.2058694699956198, |
|
"grad_norm": 0.32886857710303746, |
|
"learning_rate": 1.372262773722628e-05, |
|
"loss": 1.1816, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.20805957074025405, |
|
"grad_norm": 0.30015782909456273, |
|
"learning_rate": 1.3868613138686133e-05, |
|
"loss": 1.1641, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2102496714848883, |
|
"grad_norm": 0.30042222734891383, |
|
"learning_rate": 1.4014598540145988e-05, |
|
"loss": 1.1879, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.21243977222952257, |
|
"grad_norm": 0.3082053551352324, |
|
"learning_rate": 1.416058394160584e-05, |
|
"loss": 1.1762, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2146298729741568, |
|
"grad_norm": 0.3069511061152226, |
|
"learning_rate": 1.4306569343065696e-05, |
|
"loss": 1.1852, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.21681997371879105, |
|
"grad_norm": 0.30587098461412204, |
|
"learning_rate": 1.4452554744525549e-05, |
|
"loss": 1.1887, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.21901007446342532, |
|
"grad_norm": 0.3472745496182846, |
|
"learning_rate": 1.4598540145985402e-05, |
|
"loss": 1.1621, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22120017520805957, |
|
"grad_norm": 0.3383694408481445, |
|
"learning_rate": 1.4744525547445257e-05, |
|
"loss": 1.1691, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2233902759526938, |
|
"grad_norm": 0.30917693397844287, |
|
"learning_rate": 1.489051094890511e-05, |
|
"loss": 1.2016, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.22558037669732808, |
|
"grad_norm": 0.32724498784049777, |
|
"learning_rate": 1.5036496350364965e-05, |
|
"loss": 1.1824, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.22777047744196233, |
|
"grad_norm": 0.30742723190833204, |
|
"learning_rate": 1.5182481751824818e-05, |
|
"loss": 1.1797, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.22996057818659657, |
|
"grad_norm": 0.34475225249504454, |
|
"learning_rate": 1.5328467153284673e-05, |
|
"loss": 1.184, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.23215067893123084, |
|
"grad_norm": 0.3129106984746414, |
|
"learning_rate": 1.5474452554744528e-05, |
|
"loss": 1.2012, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.23434077967586509, |
|
"grad_norm": 0.31049881643452515, |
|
"learning_rate": 1.5620437956204383e-05, |
|
"loss": 1.1604, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.23653088042049936, |
|
"grad_norm": 0.3086241541949012, |
|
"learning_rate": 1.5766423357664234e-05, |
|
"loss": 1.1648, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2387209811651336, |
|
"grad_norm": 0.3189864576022323, |
|
"learning_rate": 1.591240875912409e-05, |
|
"loss": 1.1602, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.24091108190976784, |
|
"grad_norm": 0.32841841575252745, |
|
"learning_rate": 1.6058394160583944e-05, |
|
"loss": 1.166, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24310118265440211, |
|
"grad_norm": 0.3238032151207836, |
|
"learning_rate": 1.62043795620438e-05, |
|
"loss": 1.1871, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.24529128339903636, |
|
"grad_norm": 0.3105247743890357, |
|
"learning_rate": 1.635036496350365e-05, |
|
"loss": 1.193, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2474813841436706, |
|
"grad_norm": 0.326200553936659, |
|
"learning_rate": 1.6496350364963505e-05, |
|
"loss": 1.1703, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.24967148488830487, |
|
"grad_norm": 0.3137884080668668, |
|
"learning_rate": 1.664233576642336e-05, |
|
"loss": 1.1707, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2518615856329391, |
|
"grad_norm": 0.327163922779202, |
|
"learning_rate": 1.678832116788321e-05, |
|
"loss": 1.148, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.25405168637757336, |
|
"grad_norm": 0.3063941514339094, |
|
"learning_rate": 1.6934306569343066e-05, |
|
"loss": 1.1613, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.25624178712220763, |
|
"grad_norm": 0.3108041781813456, |
|
"learning_rate": 1.708029197080292e-05, |
|
"loss": 1.1535, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.25843188786684185, |
|
"grad_norm": 0.3133301402975079, |
|
"learning_rate": 1.7226277372262773e-05, |
|
"loss": 1.1914, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2606219886114761, |
|
"grad_norm": 0.3154737069033297, |
|
"learning_rate": 1.737226277372263e-05, |
|
"loss": 1.1836, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.2628120893561104, |
|
"grad_norm": 0.3011426797487246, |
|
"learning_rate": 1.7518248175182482e-05, |
|
"loss": 1.1559, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.26500219010074466, |
|
"grad_norm": 0.3416641336195657, |
|
"learning_rate": 1.7664233576642337e-05, |
|
"loss": 1.1572, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.2671922908453789, |
|
"grad_norm": 0.30613307226505193, |
|
"learning_rate": 1.7810218978102192e-05, |
|
"loss": 1.1762, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.26938239159001315, |
|
"grad_norm": 0.29981894574558626, |
|
"learning_rate": 1.7956204379562047e-05, |
|
"loss": 1.1562, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.2715724923346474, |
|
"grad_norm": 0.29887010220063054, |
|
"learning_rate": 1.81021897810219e-05, |
|
"loss": 1.1645, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.27376259307928164, |
|
"grad_norm": 0.2966732076878118, |
|
"learning_rate": 1.8248175182481753e-05, |
|
"loss": 1.1824, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2759526938239159, |
|
"grad_norm": 0.29935159320241506, |
|
"learning_rate": 1.8394160583941608e-05, |
|
"loss": 1.1684, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2781427945685502, |
|
"grad_norm": 0.2884901367267061, |
|
"learning_rate": 1.854014598540146e-05, |
|
"loss": 1.1625, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.2803328953131844, |
|
"grad_norm": 0.29675051316205636, |
|
"learning_rate": 1.8686131386861315e-05, |
|
"loss": 1.1711, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.28252299605781866, |
|
"grad_norm": 0.2994167858357806, |
|
"learning_rate": 1.883211678832117e-05, |
|
"loss": 1.1691, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.28471309680245294, |
|
"grad_norm": 0.3058796858332876, |
|
"learning_rate": 1.897810218978102e-05, |
|
"loss": 1.1621, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.28690319754708715, |
|
"grad_norm": 0.2972115649292112, |
|
"learning_rate": 1.912408759124088e-05, |
|
"loss": 1.1687, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.2890932982917214, |
|
"grad_norm": 0.3246749282257886, |
|
"learning_rate": 1.9270072992700734e-05, |
|
"loss": 1.176, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2912833990363557, |
|
"grad_norm": 0.2949778092608126, |
|
"learning_rate": 1.9416058394160586e-05, |
|
"loss": 1.1602, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.2934734997809899, |
|
"grad_norm": 0.29601206389636064, |
|
"learning_rate": 1.956204379562044e-05, |
|
"loss": 1.1543, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2956636005256242, |
|
"grad_norm": 0.31730205467856953, |
|
"learning_rate": 1.9708029197080295e-05, |
|
"loss": 1.1484, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.29785370127025845, |
|
"grad_norm": 0.29248038800784815, |
|
"learning_rate": 1.9854014598540147e-05, |
|
"loss": 1.1629, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.30004380201489267, |
|
"grad_norm": 0.324442556976409, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1504, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.30223390275952694, |
|
"grad_norm": 0.29652947714633526, |
|
"learning_rate": 1.9999967529852368e-05, |
|
"loss": 1.134, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3044240035041612, |
|
"grad_norm": 0.28765128738836787, |
|
"learning_rate": 1.9999870119620323e-05, |
|
"loss": 1.1391, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.3066141042487954, |
|
"grad_norm": 0.2977547290672057, |
|
"learning_rate": 1.999970776993646e-05, |
|
"loss": 1.1738, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3088042049934297, |
|
"grad_norm": 0.3092291985228049, |
|
"learning_rate": 1.9999480481855074e-05, |
|
"loss": 1.166, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.31099430573806397, |
|
"grad_norm": 0.3056767165612742, |
|
"learning_rate": 1.9999188256852184e-05, |
|
"loss": 1.1516, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3131844064826982, |
|
"grad_norm": 0.2963548040389902, |
|
"learning_rate": 1.999883109682551e-05, |
|
"loss": 1.1465, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.31537450722733246, |
|
"grad_norm": 0.30234744446219075, |
|
"learning_rate": 1.9998409004094455e-05, |
|
"loss": 1.1414, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.31756460797196673, |
|
"grad_norm": 0.30066223568227457, |
|
"learning_rate": 1.999792198140011e-05, |
|
"loss": 1.1504, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.31975470871660094, |
|
"grad_norm": 0.2889539765396387, |
|
"learning_rate": 1.9997370031905204e-05, |
|
"loss": 1.1687, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3219448094612352, |
|
"grad_norm": 0.3119167096280213, |
|
"learning_rate": 1.999675315919412e-05, |
|
"loss": 1.1258, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.3241349102058695, |
|
"grad_norm": 0.3082566009232655, |
|
"learning_rate": 1.9996071367272846e-05, |
|
"loss": 1.1398, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3263250109505037, |
|
"grad_norm": 0.3190515342314494, |
|
"learning_rate": 1.999532466056896e-05, |
|
"loss": 1.1422, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.328515111695138, |
|
"grad_norm": 0.3137645553915184, |
|
"learning_rate": 1.9994513043931596e-05, |
|
"loss": 1.1367, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.33070521243977224, |
|
"grad_norm": 0.309872007079329, |
|
"learning_rate": 1.9993636522631417e-05, |
|
"loss": 1.1664, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.33289531318440646, |
|
"grad_norm": 0.27597463143785256, |
|
"learning_rate": 1.9992695102360576e-05, |
|
"loss": 1.1438, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.33508541392904073, |
|
"grad_norm": 0.29218301221271276, |
|
"learning_rate": 1.9991688789232687e-05, |
|
"loss": 1.1551, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.337275514673675, |
|
"grad_norm": 0.30069339984117943, |
|
"learning_rate": 1.9990617589782778e-05, |
|
"loss": 1.1582, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3394656154183092, |
|
"grad_norm": 0.3092042415654092, |
|
"learning_rate": 1.9989481510967245e-05, |
|
"loss": 1.1572, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.3416557161629435, |
|
"grad_norm": 0.286252883483907, |
|
"learning_rate": 1.9988280560163825e-05, |
|
"loss": 1.1359, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.34384581690757776, |
|
"grad_norm": 0.2965378350747857, |
|
"learning_rate": 1.998701474517152e-05, |
|
"loss": 1.1404, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.346035917652212, |
|
"grad_norm": 0.32399058891091354, |
|
"learning_rate": 1.9985684074210573e-05, |
|
"loss": 1.141, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.34822601839684625, |
|
"grad_norm": 0.29631463717134965, |
|
"learning_rate": 1.9984288555922402e-05, |
|
"loss": 1.141, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.3504161191414805, |
|
"grad_norm": 0.3066454043832438, |
|
"learning_rate": 1.998282819936954e-05, |
|
"loss": 1.1346, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.35260621988611474, |
|
"grad_norm": 0.3002290039251923, |
|
"learning_rate": 1.9981303014035593e-05, |
|
"loss": 1.1344, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.354796320630749, |
|
"grad_norm": 0.31658840783678355, |
|
"learning_rate": 1.9979713009825152e-05, |
|
"loss": 1.1422, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3569864213753833, |
|
"grad_norm": 0.28116056868919015, |
|
"learning_rate": 1.9978058197063756e-05, |
|
"loss": 1.1645, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.3591765221200175, |
|
"grad_norm": 0.2988466651254511, |
|
"learning_rate": 1.9976338586497804e-05, |
|
"loss": 1.1383, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.36136662286465177, |
|
"grad_norm": 0.29802628898129574, |
|
"learning_rate": 1.9974554189294504e-05, |
|
"loss": 1.1344, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.36355672360928604, |
|
"grad_norm": 0.2847009818824244, |
|
"learning_rate": 1.9972705017041777e-05, |
|
"loss": 1.1383, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3657468243539203, |
|
"grad_norm": 0.3028159290266467, |
|
"learning_rate": 1.9970791081748208e-05, |
|
"loss": 1.1316, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.3679369250985545, |
|
"grad_norm": 0.287479339437057, |
|
"learning_rate": 1.9968812395842942e-05, |
|
"loss": 1.1543, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.3701270258431888, |
|
"grad_norm": 0.28447649711277756, |
|
"learning_rate": 1.996676897217563e-05, |
|
"loss": 1.0986, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.37231712658782307, |
|
"grad_norm": 0.28951032395742343, |
|
"learning_rate": 1.9964660824016327e-05, |
|
"loss": 1.1508, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3745072273324573, |
|
"grad_norm": 0.2977760588633454, |
|
"learning_rate": 1.9962487965055403e-05, |
|
"loss": 1.1322, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.37669732807709155, |
|
"grad_norm": 0.2955483265627134, |
|
"learning_rate": 1.996025040940347e-05, |
|
"loss": 1.1285, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3788874288217258, |
|
"grad_norm": 0.287939559842973, |
|
"learning_rate": 1.9957948171591286e-05, |
|
"loss": 1.15, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.38107752956636004, |
|
"grad_norm": 0.30141508811651624, |
|
"learning_rate": 1.9955581266569648e-05, |
|
"loss": 1.1465, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3832676303109943, |
|
"grad_norm": 0.28984961674810866, |
|
"learning_rate": 1.9953149709709304e-05, |
|
"loss": 1.1328, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3854577310556286, |
|
"grad_norm": 0.2693443491254494, |
|
"learning_rate": 1.995065351680086e-05, |
|
"loss": 1.1348, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3876478318002628, |
|
"grad_norm": 0.28451110876798263, |
|
"learning_rate": 1.9948092704054664e-05, |
|
"loss": 1.1684, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.38983793254489707, |
|
"grad_norm": 0.27443501432665496, |
|
"learning_rate": 1.9945467288100714e-05, |
|
"loss": 1.1422, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.39202803328953134, |
|
"grad_norm": 0.285419754074599, |
|
"learning_rate": 1.9942777285988527e-05, |
|
"loss": 1.1064, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.39421813403416556, |
|
"grad_norm": 0.283204897016455, |
|
"learning_rate": 1.9940022715187074e-05, |
|
"loss": 1.1402, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.39640823477879983, |
|
"grad_norm": 0.2975243255381765, |
|
"learning_rate": 1.9937203593584604e-05, |
|
"loss": 1.1422, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.3985983355234341, |
|
"grad_norm": 0.286375682431649, |
|
"learning_rate": 1.993431993948858e-05, |
|
"loss": 1.1395, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4007884362680683, |
|
"grad_norm": 0.2935563141727228, |
|
"learning_rate": 1.9931371771625545e-05, |
|
"loss": 1.15, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.4029785370127026, |
|
"grad_norm": 0.2667947999658235, |
|
"learning_rate": 1.9928359109140976e-05, |
|
"loss": 1.1398, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.40516863775733686, |
|
"grad_norm": 0.2743643642469239, |
|
"learning_rate": 1.99252819715992e-05, |
|
"loss": 1.125, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4073587385019711, |
|
"grad_norm": 0.2791187567058821, |
|
"learning_rate": 1.9922140378983235e-05, |
|
"loss": 1.116, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.40954883924660535, |
|
"grad_norm": 0.2798610384611816, |
|
"learning_rate": 1.991893435169468e-05, |
|
"loss": 1.1199, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.4117389399912396, |
|
"grad_norm": 0.28612583032215144, |
|
"learning_rate": 1.991566391055357e-05, |
|
"loss": 1.1344, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.41392904073587383, |
|
"grad_norm": 0.30128821800137673, |
|
"learning_rate": 1.991232907679824e-05, |
|
"loss": 1.1574, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.4161191414805081, |
|
"grad_norm": 0.2819831413759113, |
|
"learning_rate": 1.9908929872085205e-05, |
|
"loss": 1.1438, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4183092422251424, |
|
"grad_norm": 0.2758868673995733, |
|
"learning_rate": 1.9905466318489e-05, |
|
"loss": 1.1332, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.4204993429697766, |
|
"grad_norm": 0.2720381391855681, |
|
"learning_rate": 1.990193843850205e-05, |
|
"loss": 1.1363, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.42268944371441086, |
|
"grad_norm": 0.2844557339068452, |
|
"learning_rate": 1.9898346255034503e-05, |
|
"loss": 1.1555, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.42487954445904513, |
|
"grad_norm": 0.2892067533735414, |
|
"learning_rate": 1.9894689791414102e-05, |
|
"loss": 1.1207, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.42706964520367935, |
|
"grad_norm": 0.28260412206082436, |
|
"learning_rate": 1.9890969071386038e-05, |
|
"loss": 1.1281, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4292597459483136, |
|
"grad_norm": 0.27462057678308865, |
|
"learning_rate": 1.9887184119112776e-05, |
|
"loss": 1.1555, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4314498466929479, |
|
"grad_norm": 0.2777174448301392, |
|
"learning_rate": 1.9883334959173905e-05, |
|
"loss": 1.1336, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.4336399474375821, |
|
"grad_norm": 0.29745925368348247, |
|
"learning_rate": 1.9879421616565984e-05, |
|
"loss": 1.1445, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4358300481822164, |
|
"grad_norm": 0.2977067104078218, |
|
"learning_rate": 1.9875444116702377e-05, |
|
"loss": 1.1336, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.43802014892685065, |
|
"grad_norm": 0.29064501481596583, |
|
"learning_rate": 1.987140248541308e-05, |
|
"loss": 1.1187, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.44021024967148487, |
|
"grad_norm": 0.28202894919724386, |
|
"learning_rate": 1.9867296748944575e-05, |
|
"loss": 1.1426, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.44240035041611914, |
|
"grad_norm": 0.2671051370902169, |
|
"learning_rate": 1.9863126933959623e-05, |
|
"loss": 1.1398, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4445904511607534, |
|
"grad_norm": 0.2829190919447246, |
|
"learning_rate": 1.985889306753714e-05, |
|
"loss": 1.1262, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.4467805519053876, |
|
"grad_norm": 0.27938603807782114, |
|
"learning_rate": 1.9854595177171968e-05, |
|
"loss": 1.1396, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4489706526500219, |
|
"grad_norm": 0.2726811187740653, |
|
"learning_rate": 1.9850233290774744e-05, |
|
"loss": 1.1283, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.45116075339465617, |
|
"grad_norm": 0.2811174352400791, |
|
"learning_rate": 1.984580743667168e-05, |
|
"loss": 1.1625, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4533508541392904, |
|
"grad_norm": 0.28726266639080117, |
|
"learning_rate": 1.9841317643604404e-05, |
|
"loss": 1.1379, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.45554095488392465, |
|
"grad_norm": 0.2790715625514297, |
|
"learning_rate": 1.9836763940729765e-05, |
|
"loss": 1.1465, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4577310556285589, |
|
"grad_norm": 0.2756123869189833, |
|
"learning_rate": 1.9832146357619647e-05, |
|
"loss": 1.132, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.45992115637319314, |
|
"grad_norm": 0.28910724603592386, |
|
"learning_rate": 1.982746492426077e-05, |
|
"loss": 1.1613, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4621112571178274, |
|
"grad_norm": 0.28038299192799726, |
|
"learning_rate": 1.9822719671054498e-05, |
|
"loss": 1.1141, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.4643013578624617, |
|
"grad_norm": 0.29312368250159127, |
|
"learning_rate": 1.981791062881665e-05, |
|
"loss": 1.1715, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.4664914586070959, |
|
"grad_norm": 0.28833172213041786, |
|
"learning_rate": 1.9813037828777276e-05, |
|
"loss": 1.1328, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.46868155935173017, |
|
"grad_norm": 0.2951339006961912, |
|
"learning_rate": 1.98081013025805e-05, |
|
"loss": 1.1234, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.47087166009636444, |
|
"grad_norm": 0.2797023938943954, |
|
"learning_rate": 1.980310108228426e-05, |
|
"loss": 1.1385, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4730617608409987, |
|
"grad_norm": 0.2759952236533748, |
|
"learning_rate": 1.9798037200360135e-05, |
|
"loss": 1.1221, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.47525186158563293, |
|
"grad_norm": 0.28412146048511483, |
|
"learning_rate": 1.9792909689693124e-05, |
|
"loss": 1.1246, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.4774419623302672, |
|
"grad_norm": 0.29446603696048546, |
|
"learning_rate": 1.978771858358143e-05, |
|
"loss": 1.148, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.47963206307490147, |
|
"grad_norm": 0.27023047414464874, |
|
"learning_rate": 1.9782463915736255e-05, |
|
"loss": 1.108, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.4818221638195357, |
|
"grad_norm": 0.2834629828500795, |
|
"learning_rate": 1.977714572028157e-05, |
|
"loss": 1.116, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.48401226456416996, |
|
"grad_norm": 0.2928628093709197, |
|
"learning_rate": 1.977176403175388e-05, |
|
"loss": 1.1217, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.48620236530880423, |
|
"grad_norm": 0.2798052929515212, |
|
"learning_rate": 1.976631888510204e-05, |
|
"loss": 1.125, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.48839246605343845, |
|
"grad_norm": 0.2815012858261469, |
|
"learning_rate": 1.976081031568699e-05, |
|
"loss": 1.132, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.4905825667980727, |
|
"grad_norm": 0.28767140618274256, |
|
"learning_rate": 1.9755238359281545e-05, |
|
"loss": 1.117, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.492772667542707, |
|
"grad_norm": 0.2818985849019624, |
|
"learning_rate": 1.9749603052070147e-05, |
|
"loss": 1.1289, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.4949627682873412, |
|
"grad_norm": 0.2786359761353917, |
|
"learning_rate": 1.9743904430648654e-05, |
|
"loss": 1.1086, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.4971528690319755, |
|
"grad_norm": 0.30082948310927515, |
|
"learning_rate": 1.973814253202408e-05, |
|
"loss": 1.1326, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.49934296977660975, |
|
"grad_norm": 0.28823463423411355, |
|
"learning_rate": 1.9732317393614365e-05, |
|
"loss": 1.1418, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.501533070521244, |
|
"grad_norm": 0.2835666012503442, |
|
"learning_rate": 1.972642905324813e-05, |
|
"loss": 1.1066, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.5037231712658782, |
|
"grad_norm": 0.28208696915543563, |
|
"learning_rate": 1.9720477549164435e-05, |
|
"loss": 1.0969, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5059132720105125, |
|
"grad_norm": 0.27273009984309005, |
|
"learning_rate": 1.9714462920012515e-05, |
|
"loss": 1.1406, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.5081033727551467, |
|
"grad_norm": 0.2833154120822429, |
|
"learning_rate": 1.9708385204851552e-05, |
|
"loss": 1.1293, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5102934734997809, |
|
"grad_norm": 0.2734595441879901, |
|
"learning_rate": 1.9702244443150412e-05, |
|
"loss": 1.1037, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.5124835742444153, |
|
"grad_norm": 0.28386641782695426, |
|
"learning_rate": 1.9696040674787383e-05, |
|
"loss": 1.1309, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5146736749890495, |
|
"grad_norm": 0.2770001252770981, |
|
"learning_rate": 1.9689773940049913e-05, |
|
"loss": 1.116, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5168637757336837, |
|
"grad_norm": 0.2738518763278271, |
|
"learning_rate": 1.9683444279634364e-05, |
|
"loss": 1.1316, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.519053876478318, |
|
"grad_norm": 0.2758683563717482, |
|
"learning_rate": 1.9677051734645744e-05, |
|
"loss": 1.134, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.5212439772229522, |
|
"grad_norm": 0.2709237152430827, |
|
"learning_rate": 1.9670596346597425e-05, |
|
"loss": 1.1512, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5234340779675866, |
|
"grad_norm": 0.2970767298116832, |
|
"learning_rate": 1.966407815741089e-05, |
|
"loss": 1.1371, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.5256241787122208, |
|
"grad_norm": 0.28308210048437704, |
|
"learning_rate": 1.9657497209415443e-05, |
|
"loss": 1.098, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.527814279456855, |
|
"grad_norm": 0.28960642709808776, |
|
"learning_rate": 1.9650853545347967e-05, |
|
"loss": 1.1119, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.5300043802014893, |
|
"grad_norm": 0.275714098894066, |
|
"learning_rate": 1.96441472083526e-05, |
|
"loss": 1.1066, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5321944809461235, |
|
"grad_norm": 0.27558043103307944, |
|
"learning_rate": 1.9637378241980504e-05, |
|
"loss": 1.1391, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.5343845816907578, |
|
"grad_norm": 0.27922191954409015, |
|
"learning_rate": 1.9630546690189544e-05, |
|
"loss": 1.1238, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5365746824353921, |
|
"grad_norm": 0.27077948716119843, |
|
"learning_rate": 1.9623652597344015e-05, |
|
"loss": 1.091, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5387647831800263, |
|
"grad_norm": 0.2722865027554513, |
|
"learning_rate": 1.961669600821436e-05, |
|
"loss": 1.1236, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5409548839246605, |
|
"grad_norm": 0.28922569582983776, |
|
"learning_rate": 1.9609676967976873e-05, |
|
"loss": 1.1262, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.5431449846692948, |
|
"grad_norm": 0.2965889559255691, |
|
"learning_rate": 1.9602595522213414e-05, |
|
"loss": 1.1086, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.545335085413929, |
|
"grad_norm": 0.2762851160005322, |
|
"learning_rate": 1.9595451716911097e-05, |
|
"loss": 1.1309, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.5475251861585633, |
|
"grad_norm": 0.2669273952815999, |
|
"learning_rate": 1.958824559846201e-05, |
|
"loss": 1.1297, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5497152869031976, |
|
"grad_norm": 0.2730652771013225, |
|
"learning_rate": 1.958097721366289e-05, |
|
"loss": 1.11, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.5519053876478318, |
|
"grad_norm": 0.2842957029219278, |
|
"learning_rate": 1.957364660971485e-05, |
|
"loss": 1.1273, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.554095488392466, |
|
"grad_norm": 0.2840418180943608, |
|
"learning_rate": 1.9566253834223042e-05, |
|
"loss": 1.1006, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.5562855891371004, |
|
"grad_norm": 0.27712160762674326, |
|
"learning_rate": 1.9558798935196376e-05, |
|
"loss": 1.1135, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5584756898817346, |
|
"grad_norm": 0.2857322595691661, |
|
"learning_rate": 1.955128196104718e-05, |
|
"loss": 1.107, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5606657906263688, |
|
"grad_norm": 0.28868118619459854, |
|
"learning_rate": 1.9543702960590903e-05, |
|
"loss": 1.1141, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5628558913710031, |
|
"grad_norm": 0.26786400432490776, |
|
"learning_rate": 1.9536061983045812e-05, |
|
"loss": 1.1277, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.5650459921156373, |
|
"grad_norm": 0.27964141513626756, |
|
"learning_rate": 1.9528359078032624e-05, |
|
"loss": 1.1027, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5672360928602715, |
|
"grad_norm": 0.27097789156538216, |
|
"learning_rate": 1.9520594295574245e-05, |
|
"loss": 1.1102, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.5694261936049059, |
|
"grad_norm": 0.2881923750285588, |
|
"learning_rate": 1.9512767686095397e-05, |
|
"loss": 1.1137, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5716162943495401, |
|
"grad_norm": 0.28928505042046504, |
|
"learning_rate": 1.9504879300422308e-05, |
|
"loss": 1.1414, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.5738063950941743, |
|
"grad_norm": 0.2870532574710609, |
|
"learning_rate": 1.9496929189782393e-05, |
|
"loss": 1.1092, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.5759964958388086, |
|
"grad_norm": 0.28512356312967324, |
|
"learning_rate": 1.9488917405803902e-05, |
|
"loss": 1.1244, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.5781865965834428, |
|
"grad_norm": 0.31392762060438606, |
|
"learning_rate": 1.94808440005156e-05, |
|
"loss": 1.1406, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5803766973280771, |
|
"grad_norm": 0.2767171970254651, |
|
"learning_rate": 1.947270902634642e-05, |
|
"loss": 1.1107, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.5825667980727114, |
|
"grad_norm": 0.27800092456236136, |
|
"learning_rate": 1.946451253612512e-05, |
|
"loss": 1.1324, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5847568988173456, |
|
"grad_norm": 0.2885057649952374, |
|
"learning_rate": 1.9456254583079954e-05, |
|
"loss": 1.085, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.5869469995619798, |
|
"grad_norm": 0.2761292875453245, |
|
"learning_rate": 1.9447935220838308e-05, |
|
"loss": 1.1422, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.5891371003066141, |
|
"grad_norm": 0.28880466519196507, |
|
"learning_rate": 1.943955450342637e-05, |
|
"loss": 1.1199, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.5913272010512484, |
|
"grad_norm": 0.2700534828962362, |
|
"learning_rate": 1.9431112485268764e-05, |
|
"loss": 1.1172, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5935173017958826, |
|
"grad_norm": 0.2747262063569435, |
|
"learning_rate": 1.9422609221188208e-05, |
|
"loss": 1.1137, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.5957074025405169, |
|
"grad_norm": 0.26972085330648815, |
|
"learning_rate": 1.9414044766405146e-05, |
|
"loss": 1.1125, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.5978975032851511, |
|
"grad_norm": 0.2855247406870803, |
|
"learning_rate": 1.9405419176537405e-05, |
|
"loss": 1.1207, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.6000876040297853, |
|
"grad_norm": 0.2601474614595161, |
|
"learning_rate": 1.9396732507599816e-05, |
|
"loss": 1.1211, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6022777047744197, |
|
"grad_norm": 0.2854935378780939, |
|
"learning_rate": 1.9387984816003868e-05, |
|
"loss": 1.1094, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6044678055190539, |
|
"grad_norm": 0.2679659367257591, |
|
"learning_rate": 1.937917615855732e-05, |
|
"loss": 1.1199, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6066579062636881, |
|
"grad_norm": 0.2817414130754905, |
|
"learning_rate": 1.9370306592463866e-05, |
|
"loss": 1.125, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.6088480070083224, |
|
"grad_norm": 0.2598674707004089, |
|
"learning_rate": 1.936137617532272e-05, |
|
"loss": 1.1064, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6110381077529566, |
|
"grad_norm": 0.2691317233389999, |
|
"learning_rate": 1.935238496512828e-05, |
|
"loss": 1.0988, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.6132282084975909, |
|
"grad_norm": 0.2859835765343508, |
|
"learning_rate": 1.9343333020269724e-05, |
|
"loss": 1.1227, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6154183092422252, |
|
"grad_norm": 0.2694914982156586, |
|
"learning_rate": 1.9334220399530662e-05, |
|
"loss": 1.1449, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.6176084099868594, |
|
"grad_norm": 0.30824208779553697, |
|
"learning_rate": 1.9325047162088707e-05, |
|
"loss": 1.1332, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6197985107314936, |
|
"grad_norm": 0.26679438460192956, |
|
"learning_rate": 1.9315813367515142e-05, |
|
"loss": 1.1146, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.6219886114761279, |
|
"grad_norm": 0.2797877766039786, |
|
"learning_rate": 1.9306519075774504e-05, |
|
"loss": 1.107, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6241787122207622, |
|
"grad_norm": 0.2760924748552255, |
|
"learning_rate": 1.929716434722419e-05, |
|
"loss": 1.1313, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6263688129653964, |
|
"grad_norm": 0.26866416743615296, |
|
"learning_rate": 1.928774924261409e-05, |
|
"loss": 1.0984, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6285589137100307, |
|
"grad_norm": 0.2771243619587273, |
|
"learning_rate": 1.927827382308617e-05, |
|
"loss": 1.1062, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.6307490144546649, |
|
"grad_norm": 0.26961265825183367, |
|
"learning_rate": 1.9268738150174084e-05, |
|
"loss": 1.0975, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6329391151992991, |
|
"grad_norm": 0.28071640780132767, |
|
"learning_rate": 1.925914228580277e-05, |
|
"loss": 1.1156, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.6351292159439335, |
|
"grad_norm": 0.266937201343873, |
|
"learning_rate": 1.924948629228806e-05, |
|
"loss": 1.0809, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6373193166885677, |
|
"grad_norm": 0.27461403399812495, |
|
"learning_rate": 1.9239770232336258e-05, |
|
"loss": 1.1105, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.6395094174332019, |
|
"grad_norm": 0.2986924180856064, |
|
"learning_rate": 1.922999416904374e-05, |
|
"loss": 1.134, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6416995181778362, |
|
"grad_norm": 0.2714601828449212, |
|
"learning_rate": 1.9220158165896557e-05, |
|
"loss": 1.1152, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.6438896189224704, |
|
"grad_norm": 0.2793036577569871, |
|
"learning_rate": 1.9210262286770005e-05, |
|
"loss": 1.1055, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6460797196671046, |
|
"grad_norm": 0.2909420705461525, |
|
"learning_rate": 1.9200306595928207e-05, |
|
"loss": 1.1107, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.648269820411739, |
|
"grad_norm": 0.28963291188379814, |
|
"learning_rate": 1.919029115802372e-05, |
|
"loss": 1.0994, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6504599211563732, |
|
"grad_norm": 0.2870590741002279, |
|
"learning_rate": 1.918021603809709e-05, |
|
"loss": 1.1002, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.6526500219010074, |
|
"grad_norm": 0.2871696912380895, |
|
"learning_rate": 1.9170081301576444e-05, |
|
"loss": 1.1187, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6548401226456417, |
|
"grad_norm": 0.27381019486120817, |
|
"learning_rate": 1.915988701427706e-05, |
|
"loss": 1.1137, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.657030223390276, |
|
"grad_norm": 0.27657948021093287, |
|
"learning_rate": 1.9149633242400942e-05, |
|
"loss": 1.1352, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6592203241349102, |
|
"grad_norm": 0.2751875181246786, |
|
"learning_rate": 1.9139320052536388e-05, |
|
"loss": 1.1193, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.6614104248795445, |
|
"grad_norm": 0.27766900983637177, |
|
"learning_rate": 1.9128947511657558e-05, |
|
"loss": 1.1221, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6636005256241787, |
|
"grad_norm": 0.2749718770075062, |
|
"learning_rate": 1.911851568712403e-05, |
|
"loss": 1.1176, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.6657906263688129, |
|
"grad_norm": 0.2772174321273592, |
|
"learning_rate": 1.910802464668039e-05, |
|
"loss": 1.0959, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6679807271134472, |
|
"grad_norm": 0.2620155432938436, |
|
"learning_rate": 1.9097474458455766e-05, |
|
"loss": 1.1064, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.6701708278580815, |
|
"grad_norm": 0.2979089630943025, |
|
"learning_rate": 1.9086865190963384e-05, |
|
"loss": 1.1059, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6723609286027157, |
|
"grad_norm": 0.27611457258125455, |
|
"learning_rate": 1.9076196913100146e-05, |
|
"loss": 1.116, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.67455102934735, |
|
"grad_norm": 0.27818118501142747, |
|
"learning_rate": 1.906546969414616e-05, |
|
"loss": 1.1115, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.6767411300919842, |
|
"grad_norm": 0.27699675699597703, |
|
"learning_rate": 1.9054683603764305e-05, |
|
"loss": 1.0965, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.6789312308366184, |
|
"grad_norm": 0.27740082677306727, |
|
"learning_rate": 1.904383871199977e-05, |
|
"loss": 1.0965, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6811213315812528, |
|
"grad_norm": 0.2809043424196064, |
|
"learning_rate": 1.90329350892796e-05, |
|
"loss": 1.098, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.683311432325887, |
|
"grad_norm": 0.2639066667854838, |
|
"learning_rate": 1.9021972806412248e-05, |
|
"loss": 1.1078, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.6855015330705212, |
|
"grad_norm": 0.276203210313125, |
|
"learning_rate": 1.9010951934587098e-05, |
|
"loss": 1.1457, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.6876916338151555, |
|
"grad_norm": 0.27076390154989444, |
|
"learning_rate": 1.899987254537402e-05, |
|
"loss": 1.0775, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.6898817345597897, |
|
"grad_norm": 0.2774038504373021, |
|
"learning_rate": 1.898873471072289e-05, |
|
"loss": 1.1191, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.692071835304424, |
|
"grad_norm": 0.26935741887190473, |
|
"learning_rate": 1.897753850296314e-05, |
|
"loss": 1.1195, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.6942619360490583, |
|
"grad_norm": 0.27030811654802295, |
|
"learning_rate": 1.896628399480327e-05, |
|
"loss": 1.0945, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.6964520367936925, |
|
"grad_norm": 0.27721250821361343, |
|
"learning_rate": 1.8954971259330393e-05, |
|
"loss": 1.1582, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.6986421375383267, |
|
"grad_norm": 0.26784617062540544, |
|
"learning_rate": 1.8943600370009744e-05, |
|
"loss": 1.0934, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.700832238282961, |
|
"grad_norm": 0.2715953252890686, |
|
"learning_rate": 1.8932171400684215e-05, |
|
"loss": 1.1187, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7030223390275953, |
|
"grad_norm": 0.2827134309634548, |
|
"learning_rate": 1.8920684425573865e-05, |
|
"loss": 1.1193, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.7052124397722295, |
|
"grad_norm": 0.26957691881610674, |
|
"learning_rate": 1.890913951927546e-05, |
|
"loss": 1.1219, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7074025405168638, |
|
"grad_norm": 0.27433959110638084, |
|
"learning_rate": 1.8897536756761954e-05, |
|
"loss": 1.0973, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.709592641261498, |
|
"grad_norm": 0.2818454037953233, |
|
"learning_rate": 1.8885876213382032e-05, |
|
"loss": 1.1164, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7117827420061322, |
|
"grad_norm": 0.27685333665328266, |
|
"learning_rate": 1.887415796485961e-05, |
|
"loss": 1.1262, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7139728427507666, |
|
"grad_norm": 0.2744600694708599, |
|
"learning_rate": 1.8862382087293333e-05, |
|
"loss": 1.1148, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7161629434954008, |
|
"grad_norm": 0.26756099510595055, |
|
"learning_rate": 1.8850548657156106e-05, |
|
"loss": 1.1148, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.718353044240035, |
|
"grad_norm": 0.26844914903776257, |
|
"learning_rate": 1.8838657751294564e-05, |
|
"loss": 1.085, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7205431449846693, |
|
"grad_norm": 0.2769618210155935, |
|
"learning_rate": 1.8826709446928612e-05, |
|
"loss": 1.1254, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.7227332457293035, |
|
"grad_norm": 0.2640705844978224, |
|
"learning_rate": 1.8814703821650883e-05, |
|
"loss": 1.1105, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7249233464739377, |
|
"grad_norm": 0.25859071008307577, |
|
"learning_rate": 1.8802640953426263e-05, |
|
"loss": 1.1096, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.7271134472185721, |
|
"grad_norm": 0.262574438661076, |
|
"learning_rate": 1.8790520920591376e-05, |
|
"loss": 1.0914, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7293035479632063, |
|
"grad_norm": 0.27713581623207095, |
|
"learning_rate": 1.8778343801854074e-05, |
|
"loss": 1.1324, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.7314936487078406, |
|
"grad_norm": 0.2616321001095485, |
|
"learning_rate": 1.8766109676292922e-05, |
|
"loss": 1.0879, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7336837494524748, |
|
"grad_norm": 0.26898963160284506, |
|
"learning_rate": 1.8753818623356702e-05, |
|
"loss": 1.0885, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.735873850197109, |
|
"grad_norm": 0.2691621602639776, |
|
"learning_rate": 1.874147072286386e-05, |
|
"loss": 1.1187, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7380639509417434, |
|
"grad_norm": 0.26783674649445033, |
|
"learning_rate": 1.8729066055002037e-05, |
|
"loss": 1.1117, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.7402540516863776, |
|
"grad_norm": 0.2666441183588182, |
|
"learning_rate": 1.8716604700327516e-05, |
|
"loss": 1.1078, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7424441524310118, |
|
"grad_norm": 0.2738375436810693, |
|
"learning_rate": 1.870408673976469e-05, |
|
"loss": 1.1277, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.7446342531756461, |
|
"grad_norm": 0.2738186764180608, |
|
"learning_rate": 1.869151225460557e-05, |
|
"loss": 1.1234, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7468243539202803, |
|
"grad_norm": 0.27101152403848194, |
|
"learning_rate": 1.8678881326509236e-05, |
|
"loss": 1.1023, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.7490144546649146, |
|
"grad_norm": 0.2727444445299331, |
|
"learning_rate": 1.8666194037501308e-05, |
|
"loss": 1.1055, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.7512045554095489, |
|
"grad_norm": 0.2975855374946256, |
|
"learning_rate": 1.865345046997341e-05, |
|
"loss": 1.0961, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.7533946561541831, |
|
"grad_norm": 0.26858349969011003, |
|
"learning_rate": 1.864065070668265e-05, |
|
"loss": 1.0971, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7555847568988173, |
|
"grad_norm": 0.265988934542856, |
|
"learning_rate": 1.8627794830751068e-05, |
|
"loss": 1.1025, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7577748576434516, |
|
"grad_norm": 0.27148968054194217, |
|
"learning_rate": 1.8614882925665107e-05, |
|
"loss": 1.1094, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7599649583880859, |
|
"grad_norm": 0.27790159663787395, |
|
"learning_rate": 1.8601915075275052e-05, |
|
"loss": 1.1211, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.7621550591327201, |
|
"grad_norm": 0.2758132531875579, |
|
"learning_rate": 1.858889136379451e-05, |
|
"loss": 1.1076, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7643451598773544, |
|
"grad_norm": 0.2623757976766024, |
|
"learning_rate": 1.8575811875799846e-05, |
|
"loss": 1.0887, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.7665352606219886, |
|
"grad_norm": 0.26151492176840657, |
|
"learning_rate": 1.8562676696229646e-05, |
|
"loss": 1.124, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7687253613666228, |
|
"grad_norm": 0.271833755533329, |
|
"learning_rate": 1.8549485910384145e-05, |
|
"loss": 1.1453, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.7709154621112572, |
|
"grad_norm": 0.2716257068895584, |
|
"learning_rate": 1.8536239603924707e-05, |
|
"loss": 1.1084, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.7731055628558914, |
|
"grad_norm": 0.27019868315503554, |
|
"learning_rate": 1.852293786287323e-05, |
|
"loss": 1.1328, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.7752956636005256, |
|
"grad_norm": 0.27402054743522397, |
|
"learning_rate": 1.850958077361161e-05, |
|
"loss": 1.1211, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7774857643451599, |
|
"grad_norm": 0.26295913539462323, |
|
"learning_rate": 1.8496168422881192e-05, |
|
"loss": 1.1033, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.7796758650897941, |
|
"grad_norm": 0.26609455658638365, |
|
"learning_rate": 1.8482700897782166e-05, |
|
"loss": 1.0914, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.7818659658344284, |
|
"grad_norm": 0.2611758289091626, |
|
"learning_rate": 1.846917828577304e-05, |
|
"loss": 1.107, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.7840560665790627, |
|
"grad_norm": 0.2733500279507379, |
|
"learning_rate": 1.8455600674670054e-05, |
|
"loss": 1.1066, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.7862461673236969, |
|
"grad_norm": 0.27632403418777396, |
|
"learning_rate": 1.8441968152646623e-05, |
|
"loss": 1.0865, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.7884362680683311, |
|
"grad_norm": 0.2613214944333076, |
|
"learning_rate": 1.842828080823274e-05, |
|
"loss": 1.0889, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7906263688129654, |
|
"grad_norm": 0.26071387464509554, |
|
"learning_rate": 1.8414538730314428e-05, |
|
"loss": 1.0898, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.7928164695575997, |
|
"grad_norm": 0.2697069485769201, |
|
"learning_rate": 1.8400742008133146e-05, |
|
"loss": 1.1227, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.7950065703022339, |
|
"grad_norm": 0.2686347723972552, |
|
"learning_rate": 1.838689073128521e-05, |
|
"loss": 1.1051, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.7971966710468682, |
|
"grad_norm": 0.2749670020061178, |
|
"learning_rate": 1.8372984989721232e-05, |
|
"loss": 1.0992, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.7993867717915024, |
|
"grad_norm": 0.26279534392981274, |
|
"learning_rate": 1.83590248737455e-05, |
|
"loss": 1.1102, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.8015768725361366, |
|
"grad_norm": 0.27530772454657076, |
|
"learning_rate": 1.8345010474015418e-05, |
|
"loss": 1.0838, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.803766973280771, |
|
"grad_norm": 0.273234030161823, |
|
"learning_rate": 1.8330941881540917e-05, |
|
"loss": 1.1107, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.8059570740254052, |
|
"grad_norm": 0.2710268252498512, |
|
"learning_rate": 1.831681918768385e-05, |
|
"loss": 1.1148, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8081471747700394, |
|
"grad_norm": 0.2991459154978479, |
|
"learning_rate": 1.8302642484157407e-05, |
|
"loss": 1.1016, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.8103372755146737, |
|
"grad_norm": 0.2644212945605006, |
|
"learning_rate": 1.8288411863025518e-05, |
|
"loss": 1.0885, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8125273762593079, |
|
"grad_norm": 0.26633412635914927, |
|
"learning_rate": 1.8274127416702262e-05, |
|
"loss": 1.0869, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.8147174770039421, |
|
"grad_norm": 0.28059854237511905, |
|
"learning_rate": 1.8259789237951248e-05, |
|
"loss": 1.0859, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8169075777485765, |
|
"grad_norm": 0.2618027813328137, |
|
"learning_rate": 1.824539741988504e-05, |
|
"loss": 1.123, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.8190976784932107, |
|
"grad_norm": 0.27214989117780813, |
|
"learning_rate": 1.8230952055964518e-05, |
|
"loss": 1.1027, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8212877792378449, |
|
"grad_norm": 0.2666332713432548, |
|
"learning_rate": 1.8216453239998318e-05, |
|
"loss": 1.1191, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8234778799824792, |
|
"grad_norm": 0.28508298692561346, |
|
"learning_rate": 1.8201901066142166e-05, |
|
"loss": 1.0912, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8256679807271134, |
|
"grad_norm": 0.26214833981514946, |
|
"learning_rate": 1.8187295628898315e-05, |
|
"loss": 1.1059, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.8278580814717477, |
|
"grad_norm": 0.2582420612465903, |
|
"learning_rate": 1.81726370231149e-05, |
|
"loss": 1.1027, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.830048182216382, |
|
"grad_norm": 0.2648228026122393, |
|
"learning_rate": 1.8157925343985344e-05, |
|
"loss": 1.0934, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.8322382829610162, |
|
"grad_norm": 0.26577633349371715, |
|
"learning_rate": 1.814316068704773e-05, |
|
"loss": 1.0934, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8344283837056504, |
|
"grad_norm": 0.2767314302797419, |
|
"learning_rate": 1.812834314818417e-05, |
|
"loss": 1.0918, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.8366184844502847, |
|
"grad_norm": 0.2654350963998228, |
|
"learning_rate": 1.8113472823620196e-05, |
|
"loss": 1.0906, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.838808585194919, |
|
"grad_norm": 0.2788726099269675, |
|
"learning_rate": 1.8098549809924143e-05, |
|
"loss": 1.0926, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.8409986859395532, |
|
"grad_norm": 0.27306335537213267, |
|
"learning_rate": 1.8083574204006496e-05, |
|
"loss": 1.0895, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8431887866841875, |
|
"grad_norm": 0.2680339031442287, |
|
"learning_rate": 1.8068546103119286e-05, |
|
"loss": 1.1086, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.8453788874288217, |
|
"grad_norm": 0.26391750461525426, |
|
"learning_rate": 1.805346560485544e-05, |
|
"loss": 1.1084, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8475689881734559, |
|
"grad_norm": 0.27946114587100535, |
|
"learning_rate": 1.8038332807148165e-05, |
|
"loss": 1.0947, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.8497590889180903, |
|
"grad_norm": 0.25762923925047615, |
|
"learning_rate": 1.8023147808270293e-05, |
|
"loss": 1.1301, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8519491896627245, |
|
"grad_norm": 0.2704818192172617, |
|
"learning_rate": 1.8007910706833648e-05, |
|
"loss": 1.1143, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.8541392904073587, |
|
"grad_norm": 0.27049489080275974, |
|
"learning_rate": 1.799262160178843e-05, |
|
"loss": 1.1086, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.856329391151993, |
|
"grad_norm": 0.2659644550309275, |
|
"learning_rate": 1.797728059242253e-05, |
|
"loss": 1.1098, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.8585194918966272, |
|
"grad_norm": 0.26336208498756253, |
|
"learning_rate": 1.7961887778360916e-05, |
|
"loss": 1.1074, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.8607095926412615, |
|
"grad_norm": 0.2801383860699238, |
|
"learning_rate": 1.7946443259564982e-05, |
|
"loss": 1.1187, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.8628996933858958, |
|
"grad_norm": 0.27770606630491224, |
|
"learning_rate": 1.7930947136331884e-05, |
|
"loss": 1.0789, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.86508979413053, |
|
"grad_norm": 0.2998967999026723, |
|
"learning_rate": 1.7915399509293906e-05, |
|
"loss": 1.1016, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.8672798948751642, |
|
"grad_norm": 0.2648191447375537, |
|
"learning_rate": 1.7899800479417796e-05, |
|
"loss": 1.1051, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.8694699956197985, |
|
"grad_norm": 0.2749847601598037, |
|
"learning_rate": 1.7884150148004117e-05, |
|
"loss": 1.1215, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.8716600963644328, |
|
"grad_norm": 0.26906571047619404, |
|
"learning_rate": 1.7868448616686577e-05, |
|
"loss": 1.0973, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.873850197109067, |
|
"grad_norm": 0.2641533486933865, |
|
"learning_rate": 1.785269598743139e-05, |
|
"loss": 1.1125, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.8760402978537013, |
|
"grad_norm": 0.2745178776293995, |
|
"learning_rate": 1.78368923625366e-05, |
|
"loss": 1.0943, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8782303985983355, |
|
"grad_norm": 0.26767546603196474, |
|
"learning_rate": 1.7821037844631404e-05, |
|
"loss": 1.0961, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.8804204993429697, |
|
"grad_norm": 0.2674524087347779, |
|
"learning_rate": 1.7805132536675512e-05, |
|
"loss": 1.0988, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.8826106000876041, |
|
"grad_norm": 0.276219025861902, |
|
"learning_rate": 1.7789176541958466e-05, |
|
"loss": 1.1166, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.8848007008322383, |
|
"grad_norm": 0.2726202235697706, |
|
"learning_rate": 1.7773169964098965e-05, |
|
"loss": 1.1184, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.8869908015768725, |
|
"grad_norm": 0.2634542597431643, |
|
"learning_rate": 1.77571129070442e-05, |
|
"loss": 1.1344, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.8891809023215068, |
|
"grad_norm": 0.2848518603393168, |
|
"learning_rate": 1.774100547506917e-05, |
|
"loss": 1.125, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.891371003066141, |
|
"grad_norm": 0.2786252562346639, |
|
"learning_rate": 1.772484777277602e-05, |
|
"loss": 1.1127, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.8935611038107752, |
|
"grad_norm": 0.26740734293412094, |
|
"learning_rate": 1.7708639905093337e-05, |
|
"loss": 1.1203, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.8957512045554096, |
|
"grad_norm": 0.2747291608481027, |
|
"learning_rate": 1.76923819772755e-05, |
|
"loss": 1.0998, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.8979413053000438, |
|
"grad_norm": 0.2869604556453588, |
|
"learning_rate": 1.7676074094901975e-05, |
|
"loss": 1.1035, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.900131406044678, |
|
"grad_norm": 0.2651779430966185, |
|
"learning_rate": 1.7659716363876616e-05, |
|
"loss": 1.0871, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.9023215067893123, |
|
"grad_norm": 0.2729012408536021, |
|
"learning_rate": 1.7643308890427027e-05, |
|
"loss": 1.0877, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9045116075339465, |
|
"grad_norm": 0.27230010821588146, |
|
"learning_rate": 1.762685178110382e-05, |
|
"loss": 1.0955, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.9067017082785808, |
|
"grad_norm": 0.2859811730936888, |
|
"learning_rate": 1.761034514277995e-05, |
|
"loss": 1.1016, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9088918090232151, |
|
"grad_norm": 0.2729495630885254, |
|
"learning_rate": 1.759378908265001e-05, |
|
"loss": 1.1006, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.9110819097678493, |
|
"grad_norm": 0.29579631337299594, |
|
"learning_rate": 1.7577183708229545e-05, |
|
"loss": 1.1277, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9132720105124835, |
|
"grad_norm": 0.2678446315371196, |
|
"learning_rate": 1.756052912735435e-05, |
|
"loss": 1.0807, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.9154621112571178, |
|
"grad_norm": 0.27329266459572743, |
|
"learning_rate": 1.754382544817976e-05, |
|
"loss": 1.0998, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9176522120017521, |
|
"grad_norm": 0.2744618273856731, |
|
"learning_rate": 1.752707277917996e-05, |
|
"loss": 1.0861, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.9198423127463863, |
|
"grad_norm": 0.26947221466303073, |
|
"learning_rate": 1.7510271229147286e-05, |
|
"loss": 1.1004, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9220324134910206, |
|
"grad_norm": 0.2862528549331771, |
|
"learning_rate": 1.7493420907191493e-05, |
|
"loss": 1.109, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.9242225142356548, |
|
"grad_norm": 0.2620725898469788, |
|
"learning_rate": 1.7476521922739067e-05, |
|
"loss": 1.1082, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.926412614980289, |
|
"grad_norm": 0.2785905306845852, |
|
"learning_rate": 1.7459574385532518e-05, |
|
"loss": 1.1191, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.9286027157249234, |
|
"grad_norm": 0.2637140594813735, |
|
"learning_rate": 1.744257840562965e-05, |
|
"loss": 1.0779, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9307928164695576, |
|
"grad_norm": 0.2714092567203416, |
|
"learning_rate": 1.742553409340286e-05, |
|
"loss": 1.1043, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9329829172141918, |
|
"grad_norm": 0.2700689511416106, |
|
"learning_rate": 1.740844155953841e-05, |
|
"loss": 1.1018, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9351730179588261, |
|
"grad_norm": 0.278450005569163, |
|
"learning_rate": 1.7391300915035728e-05, |
|
"loss": 1.1096, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.9373631187034603, |
|
"grad_norm": 0.2817205107300403, |
|
"learning_rate": 1.7374112271206658e-05, |
|
"loss": 1.0885, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9395532194480947, |
|
"grad_norm": 0.2747437897419016, |
|
"learning_rate": 1.735687573967476e-05, |
|
"loss": 1.1262, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.9417433201927289, |
|
"grad_norm": 0.2723091223016179, |
|
"learning_rate": 1.733959143237459e-05, |
|
"loss": 1.093, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9439334209373631, |
|
"grad_norm": 0.2549767505673877, |
|
"learning_rate": 1.732225946155094e-05, |
|
"loss": 1.0664, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.9461235216819974, |
|
"grad_norm": 0.26305174791961355, |
|
"learning_rate": 1.730487993975814e-05, |
|
"loss": 1.0848, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9483136224266316, |
|
"grad_norm": 0.2643783576244969, |
|
"learning_rate": 1.728745297985932e-05, |
|
"loss": 1.1121, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.9505037231712659, |
|
"grad_norm": 0.2743918837868961, |
|
"learning_rate": 1.7269978695025674e-05, |
|
"loss": 1.1375, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9526938239159002, |
|
"grad_norm": 0.26269555514318277, |
|
"learning_rate": 1.7252457198735722e-05, |
|
"loss": 1.1109, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.9548839246605344, |
|
"grad_norm": 0.26810701345053684, |
|
"learning_rate": 1.7234888604774576e-05, |
|
"loss": 1.1102, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9570740254051686, |
|
"grad_norm": 0.2637883296717608, |
|
"learning_rate": 1.721727302723321e-05, |
|
"loss": 1.1318, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.9592641261498029, |
|
"grad_norm": 0.2719515531587497, |
|
"learning_rate": 1.7199610580507696e-05, |
|
"loss": 1.1156, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.9614542268944372, |
|
"grad_norm": 0.2594336301706438, |
|
"learning_rate": 1.7181901379298493e-05, |
|
"loss": 1.0939, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.9636443276390714, |
|
"grad_norm": 0.2610060749161414, |
|
"learning_rate": 1.7164145538609668e-05, |
|
"loss": 1.0693, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9658344283837057, |
|
"grad_norm": 0.26884834201604224, |
|
"learning_rate": 1.7146343173748185e-05, |
|
"loss": 1.102, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.9680245291283399, |
|
"grad_norm": 0.2678668170662298, |
|
"learning_rate": 1.712849440032312e-05, |
|
"loss": 1.1195, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.9702146298729741, |
|
"grad_norm": 0.2621622732714593, |
|
"learning_rate": 1.7110599334244936e-05, |
|
"loss": 1.0922, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.9724047306176085, |
|
"grad_norm": 0.25732866295844903, |
|
"learning_rate": 1.7092658091724722e-05, |
|
"loss": 1.1021, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.9745948313622427, |
|
"grad_norm": 0.2596998662779607, |
|
"learning_rate": 1.7074670789273434e-05, |
|
"loss": 1.0922, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.9767849321068769, |
|
"grad_norm": 0.2663765137611305, |
|
"learning_rate": 1.7056637543701145e-05, |
|
"loss": 1.0643, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.9789750328515112, |
|
"grad_norm": 0.26236680268310164, |
|
"learning_rate": 1.7038558472116288e-05, |
|
"loss": 1.1059, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.9811651335961454, |
|
"grad_norm": 0.2694955545673036, |
|
"learning_rate": 1.7020433691924885e-05, |
|
"loss": 1.0875, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.9833552343407796, |
|
"grad_norm": 0.2684464368662257, |
|
"learning_rate": 1.7002263320829796e-05, |
|
"loss": 1.1076, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.985545335085414, |
|
"grad_norm": 0.2765402521694828, |
|
"learning_rate": 1.6984047476829944e-05, |
|
"loss": 1.152, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9877354358300482, |
|
"grad_norm": 0.2750352215522162, |
|
"learning_rate": 1.696578627821956e-05, |
|
"loss": 1.0938, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.9899255365746824, |
|
"grad_norm": 0.2652853027178437, |
|
"learning_rate": 1.6947479843587406e-05, |
|
"loss": 1.0824, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.9921156373193167, |
|
"grad_norm": 0.27290730679118463, |
|
"learning_rate": 1.692912829181601e-05, |
|
"loss": 1.098, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.994305738063951, |
|
"grad_norm": 0.25125441787653247, |
|
"learning_rate": 1.691073174208089e-05, |
|
"loss": 1.0924, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.9964958388085852, |
|
"grad_norm": 0.25849286326507437, |
|
"learning_rate": 1.6892290313849783e-05, |
|
"loss": 1.101, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.9986859395532195, |
|
"grad_norm": 0.2748639842655102, |
|
"learning_rate": 1.687380412688187e-05, |
|
"loss": 1.0953, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.0008760402978536, |
|
"grad_norm": 0.26892282605547124, |
|
"learning_rate": 1.6855273301226997e-05, |
|
"loss": 1.0813, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.003066141042488, |
|
"grad_norm": 0.2798609549581797, |
|
"learning_rate": 1.683669795722489e-05, |
|
"loss": 1.0699, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.0052562417871223, |
|
"grad_norm": 0.2922624161897083, |
|
"learning_rate": 1.681807821550438e-05, |
|
"loss": 1.0848, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.0074463425317564, |
|
"grad_norm": 0.259860907163824, |
|
"learning_rate": 1.679941419698262e-05, |
|
"loss": 1.0492, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0096364432763907, |
|
"grad_norm": 0.2712856437968212, |
|
"learning_rate": 1.67807060228643e-05, |
|
"loss": 1.0705, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.011826544021025, |
|
"grad_norm": 0.26676093929792954, |
|
"learning_rate": 1.6761953814640853e-05, |
|
"loss": 1.0822, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.0140166447656591, |
|
"grad_norm": 0.2645720914080042, |
|
"learning_rate": 1.6743157694089675e-05, |
|
"loss": 1.0393, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.0162067455102934, |
|
"grad_norm": 0.273166262887767, |
|
"learning_rate": 1.6724317783273323e-05, |
|
"loss": 1.0701, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.0183968462549278, |
|
"grad_norm": 0.2732861335077073, |
|
"learning_rate": 1.670543420453874e-05, |
|
"loss": 1.0686, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.0205869469995619, |
|
"grad_norm": 0.27513602666197295, |
|
"learning_rate": 1.668650708051644e-05, |
|
"loss": 1.0537, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.0227770477441962, |
|
"grad_norm": 0.2924048948993731, |
|
"learning_rate": 1.666753653411973e-05, |
|
"loss": 1.0666, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.0249671484888305, |
|
"grad_norm": 0.28407032693449563, |
|
"learning_rate": 1.6648522688543893e-05, |
|
"loss": 1.0896, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.0271572492334646, |
|
"grad_norm": 0.27607464243536883, |
|
"learning_rate": 1.66294656672654e-05, |
|
"loss": 1.0766, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.029347349978099, |
|
"grad_norm": 0.2775777140300583, |
|
"learning_rate": 1.6610365594041127e-05, |
|
"loss": 1.0645, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0315374507227333, |
|
"grad_norm": 0.28212695538172056, |
|
"learning_rate": 1.6591222592907493e-05, |
|
"loss": 1.0746, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.0337275514673676, |
|
"grad_norm": 0.26708386148841196, |
|
"learning_rate": 1.6572036788179728e-05, |
|
"loss": 1.0709, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.0359176522120017, |
|
"grad_norm": 0.2647326361883933, |
|
"learning_rate": 1.6552808304451e-05, |
|
"loss": 1.0715, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.038107752956636, |
|
"grad_norm": 0.27682819921543267, |
|
"learning_rate": 1.6533537266591665e-05, |
|
"loss": 1.0797, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.0402978537012704, |
|
"grad_norm": 0.2736226995262712, |
|
"learning_rate": 1.6514223799748402e-05, |
|
"loss": 1.0703, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.0424879544459045, |
|
"grad_norm": 0.27393594039449576, |
|
"learning_rate": 1.649486802934344e-05, |
|
"loss": 1.042, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.0446780551905388, |
|
"grad_norm": 0.26691728521745745, |
|
"learning_rate": 1.6475470081073724e-05, |
|
"loss": 1.0688, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.0468681559351731, |
|
"grad_norm": 0.2687411433553384, |
|
"learning_rate": 1.6456030080910093e-05, |
|
"loss": 1.0721, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.0490582566798072, |
|
"grad_norm": 0.26858936807043243, |
|
"learning_rate": 1.6436548155096495e-05, |
|
"loss": 1.0723, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.0512483574244416, |
|
"grad_norm": 0.2631218458201007, |
|
"learning_rate": 1.6417024430149126e-05, |
|
"loss": 1.0354, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0534384581690759, |
|
"grad_norm": 0.2715486126833225, |
|
"learning_rate": 1.639745903285563e-05, |
|
"loss": 1.0779, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.05562855891371, |
|
"grad_norm": 0.2770109959912853, |
|
"learning_rate": 1.6377852090274278e-05, |
|
"loss": 1.0754, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.0578186596583443, |
|
"grad_norm": 0.27924640149556096, |
|
"learning_rate": 1.635820372973313e-05, |
|
"loss": 1.0688, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.0600087604029786, |
|
"grad_norm": 0.28178668312812843, |
|
"learning_rate": 1.6338514078829227e-05, |
|
"loss": 1.0996, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.0621988611476127, |
|
"grad_norm": 0.27443250432543503, |
|
"learning_rate": 1.6318783265427736e-05, |
|
"loss": 1.0734, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.064388961892247, |
|
"grad_norm": 0.27067404276566015, |
|
"learning_rate": 1.6299011417661146e-05, |
|
"loss": 1.0912, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.0665790626368814, |
|
"grad_norm": 0.26818614853532363, |
|
"learning_rate": 1.6279198663928415e-05, |
|
"loss": 1.0727, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.0687691633815155, |
|
"grad_norm": 0.27692110633499695, |
|
"learning_rate": 1.625934513289416e-05, |
|
"loss": 1.0918, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.0709592641261498, |
|
"grad_norm": 0.2683984093970962, |
|
"learning_rate": 1.6239450953487786e-05, |
|
"loss": 1.0533, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.0731493648707842, |
|
"grad_norm": 0.2680808660452308, |
|
"learning_rate": 1.6219516254902697e-05, |
|
"loss": 1.0691, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.0753394656154183, |
|
"grad_norm": 0.276528567930805, |
|
"learning_rate": 1.6199541166595397e-05, |
|
"loss": 1.067, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.0775295663600526, |
|
"grad_norm": 0.27014467052751223, |
|
"learning_rate": 1.617952581828471e-05, |
|
"loss": 1.0555, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.079719667104687, |
|
"grad_norm": 0.2670920984515106, |
|
"learning_rate": 1.6159470339950898e-05, |
|
"loss": 1.0645, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.081909767849321, |
|
"grad_norm": 0.26650906620699233, |
|
"learning_rate": 1.6139374861834827e-05, |
|
"loss": 1.0488, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.0840998685939554, |
|
"grad_norm": 0.28613285356865026, |
|
"learning_rate": 1.6119239514437125e-05, |
|
"loss": 1.0527, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.0862899693385897, |
|
"grad_norm": 0.2687551351307193, |
|
"learning_rate": 1.6099064428517334e-05, |
|
"loss": 1.0445, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.0884800700832238, |
|
"grad_norm": 0.26480671253991495, |
|
"learning_rate": 1.6078849735093057e-05, |
|
"loss": 1.0588, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.090670170827858, |
|
"grad_norm": 0.25864231413225064, |
|
"learning_rate": 1.605859556543911e-05, |
|
"loss": 1.0623, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.0928602715724924, |
|
"grad_norm": 0.2714842359217486, |
|
"learning_rate": 1.603830205108667e-05, |
|
"loss": 1.0584, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.0950503723171265, |
|
"grad_norm": 0.2754034686856655, |
|
"learning_rate": 1.6017969323822417e-05, |
|
"loss": 1.0939, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0972404730617609, |
|
"grad_norm": 0.26974950416433874, |
|
"learning_rate": 1.5997597515687678e-05, |
|
"loss": 1.058, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.0994305738063952, |
|
"grad_norm": 0.2846310921186313, |
|
"learning_rate": 1.5977186758977586e-05, |
|
"loss": 1.0715, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.1016206745510293, |
|
"grad_norm": 0.2648151245986546, |
|
"learning_rate": 1.595673718624019e-05, |
|
"loss": 1.0555, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.1038107752956636, |
|
"grad_norm": 0.26291167200610804, |
|
"learning_rate": 1.5936248930275625e-05, |
|
"loss": 1.0656, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.106000876040298, |
|
"grad_norm": 0.26794335368410055, |
|
"learning_rate": 1.5915722124135227e-05, |
|
"loss": 1.0785, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.108190976784932, |
|
"grad_norm": 0.2700870903979981, |
|
"learning_rate": 1.589515690112068e-05, |
|
"loss": 1.0752, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.1103810775295664, |
|
"grad_norm": 0.2761573536656991, |
|
"learning_rate": 1.587455339478315e-05, |
|
"loss": 1.0414, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.1125711782742007, |
|
"grad_norm": 0.26100878660436766, |
|
"learning_rate": 1.5853911738922416e-05, |
|
"loss": 1.0742, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.1147612790188348, |
|
"grad_norm": 0.2699651621314423, |
|
"learning_rate": 1.5833232067586003e-05, |
|
"loss": 1.0598, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.1169513797634691, |
|
"grad_norm": 0.2643500086322407, |
|
"learning_rate": 1.5812514515068304e-05, |
|
"loss": 1.0758, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.1191414805081035, |
|
"grad_norm": 0.26158426366408605, |
|
"learning_rate": 1.579175921590972e-05, |
|
"loss": 1.0414, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.1213315812527376, |
|
"grad_norm": 0.2747194691521627, |
|
"learning_rate": 1.5770966304895773e-05, |
|
"loss": 1.0461, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.123521681997372, |
|
"grad_norm": 0.2816065347391309, |
|
"learning_rate": 1.5750135917056245e-05, |
|
"loss": 1.04, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.1257117827420062, |
|
"grad_norm": 0.27210023210552514, |
|
"learning_rate": 1.5729268187664284e-05, |
|
"loss": 1.0736, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.1279018834866403, |
|
"grad_norm": 0.26597948498457047, |
|
"learning_rate": 1.570836325223555e-05, |
|
"loss": 1.0779, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.1300919842312747, |
|
"grad_norm": 0.27937206024905653, |
|
"learning_rate": 1.56874212465273e-05, |
|
"loss": 1.066, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.132282084975909, |
|
"grad_norm": 0.2710428960854059, |
|
"learning_rate": 1.5666442306537542e-05, |
|
"loss": 1.0652, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.134472185720543, |
|
"grad_norm": 0.26833840792123953, |
|
"learning_rate": 1.5645426568504135e-05, |
|
"loss": 1.0826, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.1366622864651774, |
|
"grad_norm": 0.27620664297921765, |
|
"learning_rate": 1.5624374168903894e-05, |
|
"loss": 1.0475, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.1388523872098117, |
|
"grad_norm": 0.26832807339683407, |
|
"learning_rate": 1.560328524445174e-05, |
|
"loss": 1.0529, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.1410424879544458, |
|
"grad_norm": 0.2771183218880352, |
|
"learning_rate": 1.558215993209975e-05, |
|
"loss": 1.0668, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.1432325886990802, |
|
"grad_norm": 0.27093032012891505, |
|
"learning_rate": 1.556099836903634e-05, |
|
"loss": 1.0719, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.1454226894437145, |
|
"grad_norm": 0.26498102520925576, |
|
"learning_rate": 1.5539800692685326e-05, |
|
"loss": 1.0437, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.1476127901883486, |
|
"grad_norm": 0.2784489095370376, |
|
"learning_rate": 1.5518567040705036e-05, |
|
"loss": 1.0525, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.149802890932983, |
|
"grad_norm": 0.25991368601083464, |
|
"learning_rate": 1.5497297550987438e-05, |
|
"loss": 1.0521, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.1519929916776173, |
|
"grad_norm": 0.27972691777909914, |
|
"learning_rate": 1.5475992361657228e-05, |
|
"loss": 1.0402, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.1541830924222514, |
|
"grad_norm": 0.2662833992907027, |
|
"learning_rate": 1.545465161107093e-05, |
|
"loss": 1.059, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.1563731931668857, |
|
"grad_norm": 0.2736333701331792, |
|
"learning_rate": 1.5433275437816004e-05, |
|
"loss": 1.0498, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.15856329391152, |
|
"grad_norm": 0.27897424796493925, |
|
"learning_rate": 1.5411863980709962e-05, |
|
"loss": 1.0717, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.1607533946561541, |
|
"grad_norm": 0.2840334326774718, |
|
"learning_rate": 1.539041737879943e-05, |
|
"loss": 1.0652, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.1629434954007885, |
|
"grad_norm": 0.26536289863258733, |
|
"learning_rate": 1.536893577135928e-05, |
|
"loss": 1.0662, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.1651335961454228, |
|
"grad_norm": 0.26830852227928204, |
|
"learning_rate": 1.5347419297891706e-05, |
|
"loss": 1.0736, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.1673236968900569, |
|
"grad_norm": 0.2648868718385574, |
|
"learning_rate": 1.5325868098125312e-05, |
|
"loss": 1.06, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.1695137976346912, |
|
"grad_norm": 0.2807483907173534, |
|
"learning_rate": 1.5304282312014235e-05, |
|
"loss": 1.0676, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.1717038983793255, |
|
"grad_norm": 0.2861338277467685, |
|
"learning_rate": 1.5282662079737206e-05, |
|
"loss": 1.0738, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.1738939991239596, |
|
"grad_norm": 0.28438493797626496, |
|
"learning_rate": 1.526100754169665e-05, |
|
"loss": 1.0527, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.176084099868594, |
|
"grad_norm": 0.2680358924154674, |
|
"learning_rate": 1.5239318838517772e-05, |
|
"loss": 1.0805, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.1782742006132283, |
|
"grad_norm": 0.28144376585272174, |
|
"learning_rate": 1.5217596111047656e-05, |
|
"loss": 1.0715, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.1804643013578624, |
|
"grad_norm": 0.27730236843145956, |
|
"learning_rate": 1.5195839500354337e-05, |
|
"loss": 1.0402, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.1826544021024967, |
|
"grad_norm": 0.2739919694755342, |
|
"learning_rate": 1.5174049147725885e-05, |
|
"loss": 1.0498, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.184844502847131, |
|
"grad_norm": 0.27303973140567345, |
|
"learning_rate": 1.5152225194669495e-05, |
|
"loss": 1.0508, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.1870346035917652, |
|
"grad_norm": 0.27671454231651477, |
|
"learning_rate": 1.5130367782910557e-05, |
|
"loss": 1.0604, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.1892247043363995, |
|
"grad_norm": 0.2641264661635561, |
|
"learning_rate": 1.5108477054391756e-05, |
|
"loss": 1.0721, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.1914148050810338, |
|
"grad_norm": 0.26276026147789583, |
|
"learning_rate": 1.5086553151272126e-05, |
|
"loss": 1.0445, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.193604905825668, |
|
"grad_norm": 0.27530344528775347, |
|
"learning_rate": 1.5064596215926139e-05, |
|
"loss": 1.0324, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.1957950065703022, |
|
"grad_norm": 0.26851666471510427, |
|
"learning_rate": 1.5042606390942785e-05, |
|
"loss": 1.0758, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.1979851073149366, |
|
"grad_norm": 0.27786184283527277, |
|
"learning_rate": 1.5020583819124633e-05, |
|
"loss": 1.0736, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.2001752080595707, |
|
"grad_norm": 0.2748103368049444, |
|
"learning_rate": 1.4998528643486915e-05, |
|
"loss": 1.073, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.202365308804205, |
|
"grad_norm": 0.2641215184110416, |
|
"learning_rate": 1.4976441007256599e-05, |
|
"loss": 1.0604, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.2045554095488393, |
|
"grad_norm": 0.2717405510915443, |
|
"learning_rate": 1.4954321053871444e-05, |
|
"loss": 1.0283, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.2067455102934734, |
|
"grad_norm": 0.26585144660715426, |
|
"learning_rate": 1.4932168926979074e-05, |
|
"loss": 1.0799, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.2089356110381078, |
|
"grad_norm": 0.26852091548064677, |
|
"learning_rate": 1.4909984770436064e-05, |
|
"loss": 1.0727, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.211125711782742, |
|
"grad_norm": 0.2832438116695105, |
|
"learning_rate": 1.4887768728306974e-05, |
|
"loss": 1.0629, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.2133158125273762, |
|
"grad_norm": 0.2584259442397719, |
|
"learning_rate": 1.4865520944863445e-05, |
|
"loss": 1.0502, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.2155059132720105, |
|
"grad_norm": 0.27567941486908737, |
|
"learning_rate": 1.4843241564583236e-05, |
|
"loss": 1.0723, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.2176960140166448, |
|
"grad_norm": 0.27009311931492574, |
|
"learning_rate": 1.4820930732149297e-05, |
|
"loss": 1.0848, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.219886114761279, |
|
"grad_norm": 0.2813111088617309, |
|
"learning_rate": 1.4798588592448834e-05, |
|
"loss": 1.0867, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.2220762155059133, |
|
"grad_norm": 0.26607157411575105, |
|
"learning_rate": 1.477621529057237e-05, |
|
"loss": 1.082, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.2242663162505476, |
|
"grad_norm": 0.2803710769666951, |
|
"learning_rate": 1.4753810971812779e-05, |
|
"loss": 1.0607, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.2264564169951817, |
|
"grad_norm": 0.27648257549873206, |
|
"learning_rate": 1.473137578166437e-05, |
|
"loss": 1.0436, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.228646517739816, |
|
"grad_norm": 0.2771472647218359, |
|
"learning_rate": 1.4708909865821932e-05, |
|
"loss": 1.0676, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.2308366184844504, |
|
"grad_norm": 0.2587044578585095, |
|
"learning_rate": 1.4686413370179786e-05, |
|
"loss": 1.0531, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.2330267192290845, |
|
"grad_norm": 0.26482046648511565, |
|
"learning_rate": 1.466388644083084e-05, |
|
"loss": 1.0615, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.2352168199737188, |
|
"grad_norm": 0.26764213866360803, |
|
"learning_rate": 1.4641329224065635e-05, |
|
"loss": 1.0891, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.2374069207183531, |
|
"grad_norm": 0.25538458354646704, |
|
"learning_rate": 1.4618741866371404e-05, |
|
"loss": 1.0562, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.2395970214629872, |
|
"grad_norm": 0.2620736158974201, |
|
"learning_rate": 1.4596124514431117e-05, |
|
"loss": 1.0555, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.2417871222076216, |
|
"grad_norm": 0.2568051923995238, |
|
"learning_rate": 1.457347731512252e-05, |
|
"loss": 1.0471, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.2439772229522559, |
|
"grad_norm": 0.2582881510400402, |
|
"learning_rate": 1.4550800415517201e-05, |
|
"loss": 1.0586, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.24616732369689, |
|
"grad_norm": 0.2590493852659133, |
|
"learning_rate": 1.452809396287961e-05, |
|
"loss": 1.0453, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.2483574244415243, |
|
"grad_norm": 0.26593849032587, |
|
"learning_rate": 1.4505358104666125e-05, |
|
"loss": 1.0633, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.2505475251861586, |
|
"grad_norm": 0.260260223527052, |
|
"learning_rate": 1.4482592988524076e-05, |
|
"loss": 1.0553, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.2527376259307927, |
|
"grad_norm": 0.27392055036341906, |
|
"learning_rate": 1.4459798762290807e-05, |
|
"loss": 1.0533, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.254927726675427, |
|
"grad_norm": 0.2854949267142112, |
|
"learning_rate": 1.4436975573992687e-05, |
|
"loss": 1.0551, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.2571178274200614, |
|
"grad_norm": 0.26738529406237527, |
|
"learning_rate": 1.4414123571844178e-05, |
|
"loss": 1.0508, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.2593079281646955, |
|
"grad_norm": 0.2714894374228558, |
|
"learning_rate": 1.439124290424686e-05, |
|
"loss": 1.0609, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.2614980289093298, |
|
"grad_norm": 0.26386369834766954, |
|
"learning_rate": 1.436833371978846e-05, |
|
"loss": 1.0516, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.2636881296539642, |
|
"grad_norm": 0.26154902396246443, |
|
"learning_rate": 1.4345396167241903e-05, |
|
"loss": 1.082, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.2658782303985983, |
|
"grad_norm": 0.27009603608193167, |
|
"learning_rate": 1.4322430395564326e-05, |
|
"loss": 1.073, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.2680683311432326, |
|
"grad_norm": 0.2607395450893893, |
|
"learning_rate": 1.4299436553896133e-05, |
|
"loss": 1.0578, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.270258431887867, |
|
"grad_norm": 0.26182126573431935, |
|
"learning_rate": 1.4276414791560012e-05, |
|
"loss": 1.0455, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.272448532632501, |
|
"grad_norm": 0.26055004587884284, |
|
"learning_rate": 1.4253365258059964e-05, |
|
"loss": 1.041, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.2746386333771353, |
|
"grad_norm": 0.26542997354867837, |
|
"learning_rate": 1.4230288103080339e-05, |
|
"loss": 1.0973, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.2768287341217697, |
|
"grad_norm": 0.28297483635713, |
|
"learning_rate": 1.4207183476484864e-05, |
|
"loss": 1.0398, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.2790188348664038, |
|
"grad_norm": 0.2720216676413036, |
|
"learning_rate": 1.418405152831567e-05, |
|
"loss": 1.0619, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.281208935611038, |
|
"grad_norm": 0.25854803685185235, |
|
"learning_rate": 1.4160892408792307e-05, |
|
"loss": 1.0756, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.2833990363556724, |
|
"grad_norm": 0.27269182259029, |
|
"learning_rate": 1.4137706268310784e-05, |
|
"loss": 1.0521, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.2855891371003065, |
|
"grad_norm": 0.2656775626074289, |
|
"learning_rate": 1.411449325744258e-05, |
|
"loss": 1.0713, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.2877792378449409, |
|
"grad_norm": 0.2578007150112824, |
|
"learning_rate": 1.4091253526933672e-05, |
|
"loss": 1.0256, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.2899693385895752, |
|
"grad_norm": 0.2578283345450258, |
|
"learning_rate": 1.4067987227703558e-05, |
|
"loss": 1.0398, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.2921594393342093, |
|
"grad_norm": 0.25629426766106156, |
|
"learning_rate": 1.4044694510844274e-05, |
|
"loss": 1.0334, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.2943495400788436, |
|
"grad_norm": 0.2608317134978303, |
|
"learning_rate": 1.4021375527619409e-05, |
|
"loss": 1.0443, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.296539640823478, |
|
"grad_norm": 0.2633979929205152, |
|
"learning_rate": 1.3998030429463127e-05, |
|
"loss": 1.0436, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.298729741568112, |
|
"grad_norm": 0.27013153256191164, |
|
"learning_rate": 1.3974659367979186e-05, |
|
"loss": 1.0682, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.3009198423127464, |
|
"grad_norm": 0.26004507355183054, |
|
"learning_rate": 1.395126249493995e-05, |
|
"loss": 1.0529, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.3031099430573807, |
|
"grad_norm": 0.26291757732552634, |
|
"learning_rate": 1.3927839962285406e-05, |
|
"loss": 1.0496, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.3053000438020148, |
|
"grad_norm": 0.26738381407622364, |
|
"learning_rate": 1.3904391922122165e-05, |
|
"loss": 1.0385, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.3074901445466491, |
|
"grad_norm": 0.2695960064844618, |
|
"learning_rate": 1.3880918526722497e-05, |
|
"loss": 1.0418, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.3096802452912835, |
|
"grad_norm": 0.2786190745701212, |
|
"learning_rate": 1.3857419928523328e-05, |
|
"loss": 1.0584, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.3118703460359176, |
|
"grad_norm": 0.25865708239628743, |
|
"learning_rate": 1.3833896280125245e-05, |
|
"loss": 1.0664, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.314060446780552, |
|
"grad_norm": 0.27043550106818665, |
|
"learning_rate": 1.3810347734291511e-05, |
|
"loss": 1.0713, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.3162505475251862, |
|
"grad_norm": 0.2786466259752348, |
|
"learning_rate": 1.3786774443947087e-05, |
|
"loss": 1.0654, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.3184406482698203, |
|
"grad_norm": 0.2643200048634759, |
|
"learning_rate": 1.3763176562177609e-05, |
|
"loss": 1.0482, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.3206307490144547, |
|
"grad_norm": 0.2713055037479327, |
|
"learning_rate": 1.3739554242228421e-05, |
|
"loss": 1.0584, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.322820849759089, |
|
"grad_norm": 0.271441501246053, |
|
"learning_rate": 1.3715907637503569e-05, |
|
"loss": 1.0602, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.325010950503723, |
|
"grad_norm": 0.2829209579812269, |
|
"learning_rate": 1.3692236901564797e-05, |
|
"loss": 1.0604, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.3272010512483574, |
|
"grad_norm": 0.2767208834441249, |
|
"learning_rate": 1.3668542188130567e-05, |
|
"loss": 1.0646, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.3293911519929917, |
|
"grad_norm": 0.2598545734511999, |
|
"learning_rate": 1.3644823651075046e-05, |
|
"loss": 1.0635, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.3315812527376258, |
|
"grad_norm": 0.2651920796665307, |
|
"learning_rate": 1.3621081444427114e-05, |
|
"loss": 1.0689, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.3337713534822602, |
|
"grad_norm": 0.26723030516899665, |
|
"learning_rate": 1.3597315722369363e-05, |
|
"loss": 1.0531, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.3359614542268945, |
|
"grad_norm": 0.26169000253454394, |
|
"learning_rate": 1.3573526639237089e-05, |
|
"loss": 1.0404, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.3381515549715286, |
|
"grad_norm": 0.2796414283834146, |
|
"learning_rate": 1.3549714349517307e-05, |
|
"loss": 1.058, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.340341655716163, |
|
"grad_norm": 0.28015821037583777, |
|
"learning_rate": 1.352587900784773e-05, |
|
"loss": 1.042, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.3425317564607973, |
|
"grad_norm": 0.26587778740964085, |
|
"learning_rate": 1.3502020769015762e-05, |
|
"loss": 1.049, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.3447218572054314, |
|
"grad_norm": 0.27151273634562134, |
|
"learning_rate": 1.3478139787957521e-05, |
|
"loss": 1.0533, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.3469119579500657, |
|
"grad_norm": 0.27774671237927817, |
|
"learning_rate": 1.3454236219756794e-05, |
|
"loss": 1.074, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.3491020586947, |
|
"grad_norm": 0.30548698754204073, |
|
"learning_rate": 1.3430310219644063e-05, |
|
"loss": 1.0541, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.3512921594393341, |
|
"grad_norm": 0.2769958664505269, |
|
"learning_rate": 1.3406361942995481e-05, |
|
"loss": 1.0709, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.3534822601839684, |
|
"grad_norm": 0.26621017281065856, |
|
"learning_rate": 1.3382391545331861e-05, |
|
"loss": 1.0289, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.3556723609286028, |
|
"grad_norm": 0.2654596239842093, |
|
"learning_rate": 1.3358399182317672e-05, |
|
"loss": 1.0676, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.3578624616732369, |
|
"grad_norm": 0.2605361900805774, |
|
"learning_rate": 1.3334385009760032e-05, |
|
"loss": 1.0711, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.3600525624178712, |
|
"grad_norm": 0.2734264004539264, |
|
"learning_rate": 1.3310349183607682e-05, |
|
"loss": 1.0678, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.3622426631625055, |
|
"grad_norm": 0.2607045359371018, |
|
"learning_rate": 1.3286291859949992e-05, |
|
"loss": 1.0584, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.3644327639071396, |
|
"grad_norm": 0.26946117404138387, |
|
"learning_rate": 1.3262213195015928e-05, |
|
"loss": 1.0473, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.366622864651774, |
|
"grad_norm": 0.2649730727308054, |
|
"learning_rate": 1.323811334517305e-05, |
|
"loss": 1.0521, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.3688129653964083, |
|
"grad_norm": 0.2714465080707081, |
|
"learning_rate": 1.3213992466926498e-05, |
|
"loss": 1.0771, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.3710030661410424, |
|
"grad_norm": 0.2703058507590705, |
|
"learning_rate": 1.3189850716917967e-05, |
|
"loss": 1.0582, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.3731931668856767, |
|
"grad_norm": 0.2628441678919807, |
|
"learning_rate": 1.3165688251924694e-05, |
|
"loss": 1.0656, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.375383267630311, |
|
"grad_norm": 0.2683866578537538, |
|
"learning_rate": 1.3141505228858438e-05, |
|
"loss": 1.0287, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.3775733683749452, |
|
"grad_norm": 0.27295127357891913, |
|
"learning_rate": 1.3117301804764467e-05, |
|
"loss": 1.0592, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.3797634691195795, |
|
"grad_norm": 0.2662649769317535, |
|
"learning_rate": 1.3093078136820534e-05, |
|
"loss": 1.0418, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.3819535698642138, |
|
"grad_norm": 0.2721595648282966, |
|
"learning_rate": 1.3068834382335847e-05, |
|
"loss": 1.0617, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.384143670608848, |
|
"grad_norm": 0.27547589811798506, |
|
"learning_rate": 1.304457069875007e-05, |
|
"loss": 1.0607, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.3863337713534822, |
|
"grad_norm": 0.2773048299021727, |
|
"learning_rate": 1.3020287243632279e-05, |
|
"loss": 1.0328, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.3885238720981166, |
|
"grad_norm": 0.26593053511889214, |
|
"learning_rate": 1.2995984174679946e-05, |
|
"loss": 1.0564, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.3907139728427507, |
|
"grad_norm": 0.26522350000493905, |
|
"learning_rate": 1.2971661649717921e-05, |
|
"loss": 1.0553, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.392904073587385, |
|
"grad_norm": 0.2739759291020745, |
|
"learning_rate": 1.2947319826697398e-05, |
|
"loss": 1.0828, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.3950941743320193, |
|
"grad_norm": 0.2653056984623836, |
|
"learning_rate": 1.2922958863694897e-05, |
|
"loss": 1.058, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.3972842750766534, |
|
"grad_norm": 0.2703400930314117, |
|
"learning_rate": 1.2898578918911225e-05, |
|
"loss": 1.0578, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.3994743758212878, |
|
"grad_norm": 0.26118537050181506, |
|
"learning_rate": 1.287418015067047e-05, |
|
"loss": 1.0775, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.401664476565922, |
|
"grad_norm": 0.2674813901144202, |
|
"learning_rate": 1.2849762717418952e-05, |
|
"loss": 1.0314, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.4038545773105562, |
|
"grad_norm": 0.2696947890894864, |
|
"learning_rate": 1.2825326777724199e-05, |
|
"loss": 1.0627, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.4060446780551905, |
|
"grad_norm": 0.26602192805217667, |
|
"learning_rate": 1.280087249027393e-05, |
|
"loss": 1.0561, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.4082347787998248, |
|
"grad_norm": 0.26896422544093695, |
|
"learning_rate": 1.2776400013875006e-05, |
|
"loss": 1.073, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.410424879544459, |
|
"grad_norm": 0.27033565405608107, |
|
"learning_rate": 1.2751909507452416e-05, |
|
"loss": 1.059, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.4126149802890933, |
|
"grad_norm": 0.2815290747437615, |
|
"learning_rate": 1.2727401130048227e-05, |
|
"loss": 1.0789, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.4148050810337276, |
|
"grad_norm": 0.2667387754809302, |
|
"learning_rate": 1.270287504082057e-05, |
|
"loss": 1.0639, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.4169951817783617, |
|
"grad_norm": 0.27178389185852914, |
|
"learning_rate": 1.2678331399042585e-05, |
|
"loss": 1.0469, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.419185282522996, |
|
"grad_norm": 0.2760250342430515, |
|
"learning_rate": 1.2653770364101416e-05, |
|
"loss": 1.0783, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.4213753832676304, |
|
"grad_norm": 0.2789228780835345, |
|
"learning_rate": 1.2629192095497143e-05, |
|
"loss": 1.0654, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.4235654840122645, |
|
"grad_norm": 0.26547832460110765, |
|
"learning_rate": 1.260459675284177e-05, |
|
"loss": 1.0646, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.4257555847568988, |
|
"grad_norm": 0.26556213425120645, |
|
"learning_rate": 1.2579984495858179e-05, |
|
"loss": 1.0348, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.4279456855015331, |
|
"grad_norm": 0.2661116149656591, |
|
"learning_rate": 1.255535548437909e-05, |
|
"loss": 1.0637, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.4301357862461672, |
|
"grad_norm": 0.2737166623631643, |
|
"learning_rate": 1.2530709878346037e-05, |
|
"loss": 1.0309, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.4323258869908015, |
|
"grad_norm": 0.27818732407942237, |
|
"learning_rate": 1.2506047837808308e-05, |
|
"loss": 1.049, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.4345159877354359, |
|
"grad_norm": 0.2559208288121395, |
|
"learning_rate": 1.2481369522921925e-05, |
|
"loss": 1.0439, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.43670608848007, |
|
"grad_norm": 0.2652327115101503, |
|
"learning_rate": 1.2456675093948592e-05, |
|
"loss": 1.0732, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.4388961892247043, |
|
"grad_norm": 0.2672613734124192, |
|
"learning_rate": 1.243196471125466e-05, |
|
"loss": 1.0754, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.4410862899693386, |
|
"grad_norm": 0.26771969463623996, |
|
"learning_rate": 1.2407238535310084e-05, |
|
"loss": 1.0664, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.4432763907139727, |
|
"grad_norm": 0.26928472122583363, |
|
"learning_rate": 1.2382496726687383e-05, |
|
"loss": 1.0736, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.445466491458607, |
|
"grad_norm": 0.26495634423319697, |
|
"learning_rate": 1.2357739446060593e-05, |
|
"loss": 1.0602, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4476565922032414, |
|
"grad_norm": 0.26514448226579496, |
|
"learning_rate": 1.2332966854204219e-05, |
|
"loss": 1.0582, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.4498466929478755, |
|
"grad_norm": 0.26861128400347345, |
|
"learning_rate": 1.230817911199221e-05, |
|
"loss": 1.0766, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.4520367936925098, |
|
"grad_norm": 0.26551344406201943, |
|
"learning_rate": 1.2283376380396894e-05, |
|
"loss": 1.0525, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.4542268944371441, |
|
"grad_norm": 0.2710500634387481, |
|
"learning_rate": 1.2258558820487944e-05, |
|
"loss": 1.0854, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.4564169951817783, |
|
"grad_norm": 0.26286300066867274, |
|
"learning_rate": 1.2233726593431322e-05, |
|
"loss": 1.0578, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.4586070959264126, |
|
"grad_norm": 0.26988115045815153, |
|
"learning_rate": 1.220887986048825e-05, |
|
"loss": 1.0605, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.460797196671047, |
|
"grad_norm": 0.26499152320248637, |
|
"learning_rate": 1.218401878301414e-05, |
|
"loss": 1.0506, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.462987297415681, |
|
"grad_norm": 0.2894193501769946, |
|
"learning_rate": 1.2159143522457568e-05, |
|
"loss": 1.0463, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.4651773981603153, |
|
"grad_norm": 0.3054608297159258, |
|
"learning_rate": 1.2134254240359206e-05, |
|
"loss": 1.0648, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.4673674989049497, |
|
"grad_norm": 0.2726029462886979, |
|
"learning_rate": 1.210935109835079e-05, |
|
"loss": 1.0445, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.4695575996495838, |
|
"grad_norm": 0.2701355868570653, |
|
"learning_rate": 1.2084434258154059e-05, |
|
"loss": 1.0764, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.471747700394218, |
|
"grad_norm": 0.2689474409120184, |
|
"learning_rate": 1.2059503881579708e-05, |
|
"loss": 1.0551, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.4739378011388524, |
|
"grad_norm": 0.2657448254752709, |
|
"learning_rate": 1.2034560130526341e-05, |
|
"loss": 1.0586, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.4761279018834865, |
|
"grad_norm": 0.2831462519055798, |
|
"learning_rate": 1.2009603166979409e-05, |
|
"loss": 1.0494, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.4783180026281209, |
|
"grad_norm": 0.2643934359070407, |
|
"learning_rate": 1.1984633153010175e-05, |
|
"loss": 1.0506, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.4805081033727552, |
|
"grad_norm": 0.26997363450497397, |
|
"learning_rate": 1.1959650250774644e-05, |
|
"loss": 1.0344, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.4826982041173893, |
|
"grad_norm": 0.26573567629495287, |
|
"learning_rate": 1.1934654622512521e-05, |
|
"loss": 1.0496, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.4848883048620236, |
|
"grad_norm": 0.27525372160484834, |
|
"learning_rate": 1.1909646430546155e-05, |
|
"loss": 1.0766, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.487078405606658, |
|
"grad_norm": 0.27889170137924035, |
|
"learning_rate": 1.1884625837279483e-05, |
|
"loss": 1.0816, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.489268506351292, |
|
"grad_norm": 0.2661192505700371, |
|
"learning_rate": 1.1859593005196979e-05, |
|
"loss": 1.0592, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.4914586070959264, |
|
"grad_norm": 0.2735408402306572, |
|
"learning_rate": 1.1834548096862588e-05, |
|
"loss": 1.0807, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.4936487078405607, |
|
"grad_norm": 0.2673420736962167, |
|
"learning_rate": 1.1809491274918692e-05, |
|
"loss": 1.0715, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.4958388085851948, |
|
"grad_norm": 0.26821892445211293, |
|
"learning_rate": 1.1784422702085024e-05, |
|
"loss": 1.0467, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.4980289093298291, |
|
"grad_norm": 0.2575092291317386, |
|
"learning_rate": 1.1759342541157638e-05, |
|
"loss": 1.0402, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.5002190100744635, |
|
"grad_norm": 0.28334435595013674, |
|
"learning_rate": 1.1734250955007844e-05, |
|
"loss": 1.06, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.5024091108190976, |
|
"grad_norm": 0.2606484435934838, |
|
"learning_rate": 1.170914810658114e-05, |
|
"loss": 1.0244, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.5045992115637319, |
|
"grad_norm": 0.274800688330061, |
|
"learning_rate": 1.168403415889617e-05, |
|
"loss": 1.0402, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.5067893123083662, |
|
"grad_norm": 0.27404489640156304, |
|
"learning_rate": 1.1658909275043644e-05, |
|
"loss": 1.0605, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.5089794130530003, |
|
"grad_norm": 0.2901107001823462, |
|
"learning_rate": 1.1633773618185302e-05, |
|
"loss": 1.0562, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.5111695137976346, |
|
"grad_norm": 0.2727747313422447, |
|
"learning_rate": 1.1608627351552846e-05, |
|
"loss": 1.0807, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.513359614542269, |
|
"grad_norm": 0.2717095695436325, |
|
"learning_rate": 1.1583470638446872e-05, |
|
"loss": 1.0604, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.515549715286903, |
|
"grad_norm": 0.2673851569182466, |
|
"learning_rate": 1.1558303642235813e-05, |
|
"loss": 1.0363, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.5177398160315374, |
|
"grad_norm": 0.2613355528607941, |
|
"learning_rate": 1.1533126526354892e-05, |
|
"loss": 1.0441, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.5199299167761717, |
|
"grad_norm": 0.2732685633304171, |
|
"learning_rate": 1.1507939454305037e-05, |
|
"loss": 1.0379, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.5221200175208058, |
|
"grad_norm": 0.26638353891069905, |
|
"learning_rate": 1.1482742589651843e-05, |
|
"loss": 1.041, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.5243101182654402, |
|
"grad_norm": 0.2647954737777941, |
|
"learning_rate": 1.1457536096024493e-05, |
|
"loss": 1.0908, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.5265002190100745, |
|
"grad_norm": 0.2745155747431527, |
|
"learning_rate": 1.1432320137114697e-05, |
|
"loss": 1.0682, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.5286903197547086, |
|
"grad_norm": 0.26032815898238637, |
|
"learning_rate": 1.1407094876675638e-05, |
|
"loss": 1.0568, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.530880420499343, |
|
"grad_norm": 0.2678966018798207, |
|
"learning_rate": 1.13818604785209e-05, |
|
"loss": 1.0572, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.5330705212439772, |
|
"grad_norm": 0.2625845890583784, |
|
"learning_rate": 1.1356617106523418e-05, |
|
"loss": 1.0645, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5352606219886114, |
|
"grad_norm": 0.2702043111010277, |
|
"learning_rate": 1.1331364924614387e-05, |
|
"loss": 1.0742, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.5374507227332457, |
|
"grad_norm": 0.26793561456953935, |
|
"learning_rate": 1.1306104096782223e-05, |
|
"loss": 1.0475, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.53964082347788, |
|
"grad_norm": 0.25713647329994405, |
|
"learning_rate": 1.1280834787071488e-05, |
|
"loss": 1.06, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.541830924222514, |
|
"grad_norm": 0.27664155878420915, |
|
"learning_rate": 1.1255557159581829e-05, |
|
"loss": 1.0729, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.5440210249671484, |
|
"grad_norm": 0.28295251997882537, |
|
"learning_rate": 1.1230271378466907e-05, |
|
"loss": 1.0494, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.5462111257117828, |
|
"grad_norm": 0.2702429683089981, |
|
"learning_rate": 1.1204977607933321e-05, |
|
"loss": 1.0693, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.5484012264564169, |
|
"grad_norm": 0.26693259673082964, |
|
"learning_rate": 1.117967601223957e-05, |
|
"loss": 1.0648, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.5505913272010512, |
|
"grad_norm": 0.26707808657095433, |
|
"learning_rate": 1.1154366755694964e-05, |
|
"loss": 1.0539, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.5527814279456855, |
|
"grad_norm": 0.26813358409375265, |
|
"learning_rate": 1.1129050002658563e-05, |
|
"loss": 1.073, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.5549715286903196, |
|
"grad_norm": 0.2808358737118327, |
|
"learning_rate": 1.1103725917538106e-05, |
|
"loss": 1.0449, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.557161629434954, |
|
"grad_norm": 0.2653292385507869, |
|
"learning_rate": 1.107839466478895e-05, |
|
"loss": 1.0426, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.5593517301795883, |
|
"grad_norm": 0.2606618878063292, |
|
"learning_rate": 1.1053056408913001e-05, |
|
"loss": 1.0307, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.5615418309242224, |
|
"grad_norm": 0.27074122465063255, |
|
"learning_rate": 1.1027711314457637e-05, |
|
"loss": 1.0561, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.563731931668857, |
|
"grad_norm": 0.26005870439969836, |
|
"learning_rate": 1.1002359546014654e-05, |
|
"loss": 1.0613, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.565922032413491, |
|
"grad_norm": 0.2639594548692859, |
|
"learning_rate": 1.097700126821918e-05, |
|
"loss": 1.0494, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.5681121331581251, |
|
"grad_norm": 0.2766008683933031, |
|
"learning_rate": 1.0951636645748624e-05, |
|
"loss": 1.073, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.5703022339027597, |
|
"grad_norm": 0.28265496721239436, |
|
"learning_rate": 1.0926265843321591e-05, |
|
"loss": 1.066, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.5724923346473938, |
|
"grad_norm": 0.2693455630957891, |
|
"learning_rate": 1.0900889025696824e-05, |
|
"loss": 1.0572, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.574682435392028, |
|
"grad_norm": 0.27906985295838976, |
|
"learning_rate": 1.0875506357672121e-05, |
|
"loss": 1.0457, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.5768725361366625, |
|
"grad_norm": 0.2624004808687805, |
|
"learning_rate": 1.0850118004083279e-05, |
|
"loss": 1.0697, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.5790626368812966, |
|
"grad_norm": 0.26122640361086863, |
|
"learning_rate": 1.0824724129803019e-05, |
|
"loss": 1.0334, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.5812527376259307, |
|
"grad_norm": 0.2619259175583039, |
|
"learning_rate": 1.0799324899739907e-05, |
|
"loss": 1.057, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.5834428383705652, |
|
"grad_norm": 0.261437829445439, |
|
"learning_rate": 1.0773920478837297e-05, |
|
"loss": 1.0529, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.5856329391151993, |
|
"grad_norm": 0.26033725853769885, |
|
"learning_rate": 1.0748511032072244e-05, |
|
"loss": 1.0355, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.5878230398598334, |
|
"grad_norm": 0.2566394274506097, |
|
"learning_rate": 1.0723096724454447e-05, |
|
"loss": 1.0559, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.590013140604468, |
|
"grad_norm": 0.26276427225977134, |
|
"learning_rate": 1.069767772102517e-05, |
|
"loss": 1.0566, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.592203241349102, |
|
"grad_norm": 0.2632371371848792, |
|
"learning_rate": 1.0672254186856177e-05, |
|
"loss": 1.0482, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.5943933420937362, |
|
"grad_norm": 0.2641026930868911, |
|
"learning_rate": 1.064682628704864e-05, |
|
"loss": 1.04, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.5965834428383707, |
|
"grad_norm": 0.26424518556825116, |
|
"learning_rate": 1.0621394186732098e-05, |
|
"loss": 1.0549, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.5987735435830048, |
|
"grad_norm": 0.27923099117512623, |
|
"learning_rate": 1.0595958051063357e-05, |
|
"loss": 1.0773, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.600963644327639, |
|
"grad_norm": 0.26998119469928633, |
|
"learning_rate": 1.0570518045225438e-05, |
|
"loss": 1.0725, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.6031537450722735, |
|
"grad_norm": 0.2685343508566471, |
|
"learning_rate": 1.0545074334426489e-05, |
|
"loss": 1.027, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.6053438458169076, |
|
"grad_norm": 0.2595911690857489, |
|
"learning_rate": 1.0519627083898716e-05, |
|
"loss": 1.043, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.6075339465615417, |
|
"grad_norm": 0.27641158129272464, |
|
"learning_rate": 1.0494176458897316e-05, |
|
"loss": 1.0598, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.6097240473061762, |
|
"grad_norm": 0.2648548237861228, |
|
"learning_rate": 1.0468722624699401e-05, |
|
"loss": 1.0371, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.6119141480508103, |
|
"grad_norm": 0.2702767226474014, |
|
"learning_rate": 1.0443265746602924e-05, |
|
"loss": 1.0434, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.6141042487954445, |
|
"grad_norm": 0.2621468550352411, |
|
"learning_rate": 1.0417805989925599e-05, |
|
"loss": 1.0529, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.616294349540079, |
|
"grad_norm": 0.2637165057836258, |
|
"learning_rate": 1.0392343520003836e-05, |
|
"loss": 1.0516, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.618484450284713, |
|
"grad_norm": 0.2667005261052336, |
|
"learning_rate": 1.036687850219167e-05, |
|
"loss": 1.0539, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.6206745510293472, |
|
"grad_norm": 0.26569655972150674, |
|
"learning_rate": 1.034141110185968e-05, |
|
"loss": 1.0701, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.6228646517739818, |
|
"grad_norm": 0.26574857790395595, |
|
"learning_rate": 1.0315941484393915e-05, |
|
"loss": 1.0473, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.6250547525186159, |
|
"grad_norm": 0.2768540484061404, |
|
"learning_rate": 1.0290469815194816e-05, |
|
"loss": 1.0416, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.62724485326325, |
|
"grad_norm": 0.270209664019903, |
|
"learning_rate": 1.0264996259676166e-05, |
|
"loss": 1.0434, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.6294349540078845, |
|
"grad_norm": 0.27949726855416546, |
|
"learning_rate": 1.0239520983263977e-05, |
|
"loss": 1.0547, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.6316250547525186, |
|
"grad_norm": 0.2638818494131406, |
|
"learning_rate": 1.0214044151395455e-05, |
|
"loss": 1.0682, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.6338151554971527, |
|
"grad_norm": 0.2675675222607541, |
|
"learning_rate": 1.018856592951789e-05, |
|
"loss": 1.0639, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.6360052562417873, |
|
"grad_norm": 0.2672082249953611, |
|
"learning_rate": 1.0163086483087612e-05, |
|
"loss": 1.0562, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 1.6381953569864214, |
|
"grad_norm": 0.26705014594504956, |
|
"learning_rate": 1.0137605977568896e-05, |
|
"loss": 1.0473, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.6403854577310555, |
|
"grad_norm": 0.2660961478360659, |
|
"learning_rate": 1.0112124578432901e-05, |
|
"loss": 1.0559, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 1.64257555847569, |
|
"grad_norm": 0.26645965796828935, |
|
"learning_rate": 1.0086642451156583e-05, |
|
"loss": 1.0648, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.6447656592203241, |
|
"grad_norm": 0.2738983150978546, |
|
"learning_rate": 1.0061159761221629e-05, |
|
"loss": 1.043, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 1.6469557599649582, |
|
"grad_norm": 0.2798777975444317, |
|
"learning_rate": 1.0035676674113379e-05, |
|
"loss": 1.0551, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.6491458607095928, |
|
"grad_norm": 0.265220883116891, |
|
"learning_rate": 1.001019335531975e-05, |
|
"loss": 1.0543, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 1.651335961454227, |
|
"grad_norm": 0.2682284038098586, |
|
"learning_rate": 9.984709970330176e-06, |
|
"loss": 1.0549, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.653526062198861, |
|
"grad_norm": 0.25832893494460807, |
|
"learning_rate": 9.959226684634502e-06, |
|
"loss": 1.032, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.6557161629434956, |
|
"grad_norm": 0.257904307751333, |
|
"learning_rate": 9.933743663721945e-06, |
|
"loss": 1.0686, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.6579062636881297, |
|
"grad_norm": 0.2689638363753139, |
|
"learning_rate": 9.908261073079991e-06, |
|
"loss": 1.0602, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 1.6600963644327638, |
|
"grad_norm": 0.2686614118320103, |
|
"learning_rate": 9.88277907819334e-06, |
|
"loss": 1.0617, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.6622864651773983, |
|
"grad_norm": 0.2627709715046705, |
|
"learning_rate": 9.857297844542812e-06, |
|
"loss": 1.0422, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 1.6644765659220324, |
|
"grad_norm": 0.26431236568218625, |
|
"learning_rate": 9.831817537604296e-06, |
|
"loss": 1.0258, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.28223545312633347, |
|
"learning_rate": 9.80633832284766e-06, |
|
"loss": 1.0355, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 1.668856767411301, |
|
"grad_norm": 0.2598098177349332, |
|
"learning_rate": 9.78086036573567e-06, |
|
"loss": 1.0344, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.6710468681559352, |
|
"grad_norm": 0.26435044314602746, |
|
"learning_rate": 9.755383831722939e-06, |
|
"loss": 1.0223, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 1.6732369689005693, |
|
"grad_norm": 0.26109212937735365, |
|
"learning_rate": 9.729908886254825e-06, |
|
"loss": 1.0592, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.6754270696452038, |
|
"grad_norm": 0.2654720571684063, |
|
"learning_rate": 9.704435694766377e-06, |
|
"loss": 1.0408, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.677617170389838, |
|
"grad_norm": 0.258522042520765, |
|
"learning_rate": 9.678964422681258e-06, |
|
"loss": 1.0346, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.679807271134472, |
|
"grad_norm": 0.26289147613332475, |
|
"learning_rate": 9.653495235410654e-06, |
|
"loss": 1.0488, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 1.6819973718791066, |
|
"grad_norm": 0.25642033853461843, |
|
"learning_rate": 9.628028298352226e-06, |
|
"loss": 1.0359, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.6841874726237407, |
|
"grad_norm": 0.2808061166004908, |
|
"learning_rate": 9.602563776889008e-06, |
|
"loss": 1.0516, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 1.6863775733683748, |
|
"grad_norm": 0.26470050862147015, |
|
"learning_rate": 9.57710183638836e-06, |
|
"loss": 1.0697, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.6885676741130093, |
|
"grad_norm": 0.26606691954269995, |
|
"learning_rate": 9.551642642200875e-06, |
|
"loss": 1.0758, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 1.6907577748576434, |
|
"grad_norm": 0.2736697572447457, |
|
"learning_rate": 9.526186359659307e-06, |
|
"loss": 1.0545, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.6929478756022776, |
|
"grad_norm": 0.2603250399961473, |
|
"learning_rate": 9.500733154077508e-06, |
|
"loss": 1.0471, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 1.695137976346912, |
|
"grad_norm": 0.274926658101149, |
|
"learning_rate": 9.475283190749356e-06, |
|
"loss": 1.0699, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.6973280770915462, |
|
"grad_norm": 0.26279297784971317, |
|
"learning_rate": 9.44983663494765e-06, |
|
"loss": 1.0512, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.6995181778361803, |
|
"grad_norm": 0.2641150816670752, |
|
"learning_rate": 9.424393651923084e-06, |
|
"loss": 1.0564, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.7017082785808149, |
|
"grad_norm": 0.25856068896220813, |
|
"learning_rate": 9.398954406903137e-06, |
|
"loss": 1.0494, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 1.703898379325449, |
|
"grad_norm": 0.27030707323741804, |
|
"learning_rate": 9.373519065091019e-06, |
|
"loss": 1.0475, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.706088480070083, |
|
"grad_norm": 0.2666495942417461, |
|
"learning_rate": 9.348087791664591e-06, |
|
"loss": 1.0553, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 1.7082785808147176, |
|
"grad_norm": 0.2612477606748569, |
|
"learning_rate": 9.32266075177529e-06, |
|
"loss": 1.0531, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.7104686815593517, |
|
"grad_norm": 0.25589631959691345, |
|
"learning_rate": 9.297238110547075e-06, |
|
"loss": 1.0408, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 1.7126587823039858, |
|
"grad_norm": 0.2616645026641856, |
|
"learning_rate": 9.271820033075311e-06, |
|
"loss": 1.0406, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.7148488830486204, |
|
"grad_norm": 0.2586391146779032, |
|
"learning_rate": 9.246406684425757e-06, |
|
"loss": 1.0682, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 1.7170389837932545, |
|
"grad_norm": 0.2627435639301313, |
|
"learning_rate": 9.220998229633447e-06, |
|
"loss": 1.0512, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.7192290845378886, |
|
"grad_norm": 0.27131216390216845, |
|
"learning_rate": 9.195594833701634e-06, |
|
"loss": 1.0637, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.7214191852825231, |
|
"grad_norm": 0.2669482190799787, |
|
"learning_rate": 9.170196661600725e-06, |
|
"loss": 1.0574, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.7236092860271572, |
|
"grad_norm": 0.26420187232497483, |
|
"learning_rate": 9.144803878267191e-06, |
|
"loss": 1.0557, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 1.7257993867717913, |
|
"grad_norm": 0.2659324687775046, |
|
"learning_rate": 9.119416648602526e-06, |
|
"loss": 1.0875, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.727989487516426, |
|
"grad_norm": 0.2704024968469015, |
|
"learning_rate": 9.094035137472148e-06, |
|
"loss": 1.0291, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 1.73017958826106, |
|
"grad_norm": 0.2704315331590373, |
|
"learning_rate": 9.068659509704337e-06, |
|
"loss": 1.0303, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.732369689005694, |
|
"grad_norm": 0.26075951030660144, |
|
"learning_rate": 9.043289930089166e-06, |
|
"loss": 1.0416, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 1.7345597897503287, |
|
"grad_norm": 0.2798473730769966, |
|
"learning_rate": 9.017926563377444e-06, |
|
"loss": 1.0432, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.7367498904949628, |
|
"grad_norm": 0.287189918659511, |
|
"learning_rate": 8.992569574279613e-06, |
|
"loss": 1.0646, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 1.7389399912395969, |
|
"grad_norm": 0.271002464852271, |
|
"learning_rate": 8.967219127464714e-06, |
|
"loss": 1.0738, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.7411300919842314, |
|
"grad_norm": 0.28869898620552076, |
|
"learning_rate": 8.941875387559296e-06, |
|
"loss": 1.0512, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.7433201927288655, |
|
"grad_norm": 0.2580747875213114, |
|
"learning_rate": 8.916538519146355e-06, |
|
"loss": 1.0697, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.7455102934734996, |
|
"grad_norm": 0.26720671681904395, |
|
"learning_rate": 8.891208686764263e-06, |
|
"loss": 1.0516, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 1.7477003942181342, |
|
"grad_norm": 0.27472866838079185, |
|
"learning_rate": 8.865886054905695e-06, |
|
"loss": 1.0564, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.7498904949627683, |
|
"grad_norm": 0.26848146534574147, |
|
"learning_rate": 8.840570788016578e-06, |
|
"loss": 1.0631, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 1.7520805957074024, |
|
"grad_norm": 0.26914337171369807, |
|
"learning_rate": 8.815263050494994e-06, |
|
"loss": 1.0875, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.754270696452037, |
|
"grad_norm": 0.2636833138291496, |
|
"learning_rate": 8.789963006690142e-06, |
|
"loss": 1.0604, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 1.756460797196671, |
|
"grad_norm": 0.2583287936852473, |
|
"learning_rate": 8.764670820901256e-06, |
|
"loss": 1.0395, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.7586508979413051, |
|
"grad_norm": 0.2785046304924881, |
|
"learning_rate": 8.739386657376532e-06, |
|
"loss": 1.091, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 1.7608409986859397, |
|
"grad_norm": 0.27037923704909567, |
|
"learning_rate": 8.714110680312082e-06, |
|
"loss": 1.0301, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.7630310994305738, |
|
"grad_norm": 0.2649755210179008, |
|
"learning_rate": 8.688843053850834e-06, |
|
"loss": 1.0574, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.765221200175208, |
|
"grad_norm": 0.2671017408683602, |
|
"learning_rate": 8.663583942081514e-06, |
|
"loss": 1.0371, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.7674113009198424, |
|
"grad_norm": 0.26430762243636857, |
|
"learning_rate": 8.638333509037537e-06, |
|
"loss": 1.0664, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 1.7696014016644765, |
|
"grad_norm": 0.281966657857878, |
|
"learning_rate": 8.613091918695954e-06, |
|
"loss": 1.0617, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.7717915024091109, |
|
"grad_norm": 0.2641603417837672, |
|
"learning_rate": 8.587859334976405e-06, |
|
"loss": 1.0414, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 1.7739816031537452, |
|
"grad_norm": 0.2550399068901032, |
|
"learning_rate": 8.562635921740029e-06, |
|
"loss": 1.0186, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.7761717038983793, |
|
"grad_norm": 0.258436517607505, |
|
"learning_rate": 8.537421842788416e-06, |
|
"loss": 1.0506, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 1.7783618046430136, |
|
"grad_norm": 0.27070416646617784, |
|
"learning_rate": 8.512217261862546e-06, |
|
"loss": 1.0707, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.780551905387648, |
|
"grad_norm": 0.2623515052424796, |
|
"learning_rate": 8.487022342641706e-06, |
|
"loss": 1.0268, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 1.782742006132282, |
|
"grad_norm": 0.2580098583224272, |
|
"learning_rate": 8.461837248742445e-06, |
|
"loss": 1.0547, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.7849321068769164, |
|
"grad_norm": 0.271853498810519, |
|
"learning_rate": 8.43666214371751e-06, |
|
"loss": 1.0709, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.7871222076215507, |
|
"grad_norm": 0.2659287637845158, |
|
"learning_rate": 8.411497191054772e-06, |
|
"loss": 1.0518, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.7893123083661848, |
|
"grad_norm": 0.27283290836176466, |
|
"learning_rate": 8.386342554176184e-06, |
|
"loss": 1.0584, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 1.7915024091108191, |
|
"grad_norm": 0.2651207636177441, |
|
"learning_rate": 8.361198396436688e-06, |
|
"loss": 1.0291, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.7936925098554535, |
|
"grad_norm": 0.2703741744888326, |
|
"learning_rate": 8.336064881123199e-06, |
|
"loss": 1.0535, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 1.7958826106000876, |
|
"grad_norm": 0.277174424727332, |
|
"learning_rate": 8.310942171453502e-06, |
|
"loss": 1.0611, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.798072711344722, |
|
"grad_norm": 0.2605646974649158, |
|
"learning_rate": 8.285830430575218e-06, |
|
"loss": 1.0518, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 1.8002628120893562, |
|
"grad_norm": 0.27113214575980377, |
|
"learning_rate": 8.260729821564733e-06, |
|
"loss": 1.066, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.8024529128339903, |
|
"grad_norm": 0.26188100923955243, |
|
"learning_rate": 8.235640507426136e-06, |
|
"loss": 1.0637, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 1.8046430135786247, |
|
"grad_norm": 0.2647927642271762, |
|
"learning_rate": 8.210562651090184e-06, |
|
"loss": 1.0709, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.806833114323259, |
|
"grad_norm": 0.2659054715179537, |
|
"learning_rate": 8.185496415413215e-06, |
|
"loss": 1.0664, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.809023215067893, |
|
"grad_norm": 0.26152364121681076, |
|
"learning_rate": 8.160441963176097e-06, |
|
"loss": 1.0395, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.8112133158125274, |
|
"grad_norm": 0.2598225560908046, |
|
"learning_rate": 8.13539945708319e-06, |
|
"loss": 1.0629, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 1.8134034165571618, |
|
"grad_norm": 0.26952205543318625, |
|
"learning_rate": 8.110369059761264e-06, |
|
"loss": 1.0576, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.8155935173017959, |
|
"grad_norm": 0.27030120261610047, |
|
"learning_rate": 8.085350933758456e-06, |
|
"loss": 1.0762, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 1.8177836180464302, |
|
"grad_norm": 0.25627881011325426, |
|
"learning_rate": 8.060345241543222e-06, |
|
"loss": 1.051, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.8199737187910645, |
|
"grad_norm": 0.2655782481715189, |
|
"learning_rate": 8.035352145503258e-06, |
|
"loss": 1.0602, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 1.8221638195356986, |
|
"grad_norm": 0.26293288923024916, |
|
"learning_rate": 8.010371807944475e-06, |
|
"loss": 1.0561, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.824353920280333, |
|
"grad_norm": 0.2670317031054168, |
|
"learning_rate": 7.985404391089917e-06, |
|
"loss": 1.0318, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 1.8265440210249673, |
|
"grad_norm": 0.2599529233299643, |
|
"learning_rate": 7.960450057078726e-06, |
|
"loss": 1.0555, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.8287341217696014, |
|
"grad_norm": 0.2640429458071465, |
|
"learning_rate": 7.935508967965092e-06, |
|
"loss": 1.0582, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.8309242225142357, |
|
"grad_norm": 0.2611223291897593, |
|
"learning_rate": 7.910581285717173e-06, |
|
"loss": 1.058, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.83311432325887, |
|
"grad_norm": 0.27332241824491227, |
|
"learning_rate": 7.885667172216079e-06, |
|
"loss": 1.0605, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 1.8353044240035041, |
|
"grad_norm": 0.26068149812972996, |
|
"learning_rate": 7.860766789254802e-06, |
|
"loss": 1.0338, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.8374945247481385, |
|
"grad_norm": 0.27123772619112196, |
|
"learning_rate": 7.835880298537158e-06, |
|
"loss": 1.0496, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 1.8396846254927728, |
|
"grad_norm": 0.2595476192894215, |
|
"learning_rate": 7.811007861676759e-06, |
|
"loss": 1.0666, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.841874726237407, |
|
"grad_norm": 0.26422252826709297, |
|
"learning_rate": 7.786149640195933e-06, |
|
"loss": 1.0271, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 1.8440648269820412, |
|
"grad_norm": 0.26134111456960313, |
|
"learning_rate": 7.761305795524716e-06, |
|
"loss": 1.0363, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.8462549277266755, |
|
"grad_norm": 0.2756776421734931, |
|
"learning_rate": 7.736476488999768e-06, |
|
"loss": 1.0475, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 1.8484450284713096, |
|
"grad_norm": 0.2702364297034732, |
|
"learning_rate": 7.711661881863331e-06, |
|
"loss": 1.0268, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.850635129215944, |
|
"grad_norm": 0.26975541398998254, |
|
"learning_rate": 7.686862135262205e-06, |
|
"loss": 1.0371, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.8528252299605783, |
|
"grad_norm": 0.2644048495956391, |
|
"learning_rate": 7.66207741024667e-06, |
|
"loss": 1.0611, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.8550153307052124, |
|
"grad_norm": 0.26966735694678484, |
|
"learning_rate": 7.637307867769462e-06, |
|
"loss": 1.0771, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 1.8572054314498467, |
|
"grad_norm": 0.26866968091520127, |
|
"learning_rate": 7.612553668684729e-06, |
|
"loss": 1.06, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.859395532194481, |
|
"grad_norm": 0.25872064254050153, |
|
"learning_rate": 7.587814973746963e-06, |
|
"loss": 1.051, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 1.8615856329391152, |
|
"grad_norm": 0.27201780645953577, |
|
"learning_rate": 7.563091943609984e-06, |
|
"loss": 1.0908, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.8637757336837495, |
|
"grad_norm": 0.27623982941637193, |
|
"learning_rate": 7.538384738825876e-06, |
|
"loss": 1.0432, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 1.8659658344283838, |
|
"grad_norm": 0.2658326988358835, |
|
"learning_rate": 7.513693519843956e-06, |
|
"loss": 1.0855, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.868155935173018, |
|
"grad_norm": 0.2603354692417316, |
|
"learning_rate": 7.489018447009737e-06, |
|
"loss": 1.0502, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 1.8703460359176522, |
|
"grad_norm": 0.2634605332883379, |
|
"learning_rate": 7.46435968056386e-06, |
|
"loss": 1.0662, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.8725361366622866, |
|
"grad_norm": 0.26436138724333197, |
|
"learning_rate": 7.439717380641087e-06, |
|
"loss": 1.0361, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.8747262374069207, |
|
"grad_norm": 0.2695505088765098, |
|
"learning_rate": 7.415091707269246e-06, |
|
"loss": 1.0701, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.876916338151555, |
|
"grad_norm": 0.2711600184846312, |
|
"learning_rate": 7.390482820368179e-06, |
|
"loss": 1.06, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 1.8791064388961893, |
|
"grad_norm": 0.27114060939285184, |
|
"learning_rate": 7.365890879748732e-06, |
|
"loss": 1.0307, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.8812965396408234, |
|
"grad_norm": 0.27852182132375797, |
|
"learning_rate": 7.341316045111683e-06, |
|
"loss": 1.0596, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 1.8834866403854578, |
|
"grad_norm": 0.2657978196594915, |
|
"learning_rate": 7.3167584760467415e-06, |
|
"loss": 1.0471, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.885676741130092, |
|
"grad_norm": 0.2674058529573235, |
|
"learning_rate": 7.292218332031489e-06, |
|
"loss": 1.0408, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 1.8878668418747262, |
|
"grad_norm": 0.26909849042401646, |
|
"learning_rate": 7.267695772430336e-06, |
|
"loss": 1.05, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.8900569426193605, |
|
"grad_norm": 0.2717698485268081, |
|
"learning_rate": 7.243190956493518e-06, |
|
"loss": 1.0623, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 1.8922470433639949, |
|
"grad_norm": 0.2841986212548669, |
|
"learning_rate": 7.218704043356026e-06, |
|
"loss": 1.0555, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.894437144108629, |
|
"grad_norm": 0.27274737898653567, |
|
"learning_rate": 7.194235192036598e-06, |
|
"loss": 1.0453, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.8966272448532633, |
|
"grad_norm": 0.26224406560969943, |
|
"learning_rate": 7.1697845614366825e-06, |
|
"loss": 1.0391, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.8988173455978976, |
|
"grad_norm": 0.26110104192389827, |
|
"learning_rate": 7.145352310339391e-06, |
|
"loss": 1.0475, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 1.9010074463425317, |
|
"grad_norm": 0.2624783893516754, |
|
"learning_rate": 7.120938597408487e-06, |
|
"loss": 1.0494, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.903197547087166, |
|
"grad_norm": 0.2642985663653997, |
|
"learning_rate": 7.0965435811873384e-06, |
|
"loss": 1.075, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 1.9053876478318004, |
|
"grad_norm": 0.26634192070298696, |
|
"learning_rate": 7.072167420097904e-06, |
|
"loss": 1.0734, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.9075777485764345, |
|
"grad_norm": 0.2672053957248454, |
|
"learning_rate": 7.047810272439699e-06, |
|
"loss": 1.0496, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 1.9097678493210688, |
|
"grad_norm": 0.25758269759882013, |
|
"learning_rate": 7.023472296388747e-06, |
|
"loss": 1.05, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.9119579500657031, |
|
"grad_norm": 0.26583912661168085, |
|
"learning_rate": 6.999153649996595e-06, |
|
"loss": 1.0494, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 1.9141480508103372, |
|
"grad_norm": 0.2614518285010944, |
|
"learning_rate": 6.974854491189243e-06, |
|
"loss": 1.0479, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.9163381515549716, |
|
"grad_norm": 0.25956078456974213, |
|
"learning_rate": 6.95057497776615e-06, |
|
"loss": 1.0346, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.9185282522996059, |
|
"grad_norm": 0.26309155812958857, |
|
"learning_rate": 6.926315267399193e-06, |
|
"loss": 1.0738, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.92071835304424, |
|
"grad_norm": 0.26694616315083763, |
|
"learning_rate": 6.902075517631642e-06, |
|
"loss": 1.057, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 1.9229084537888743, |
|
"grad_norm": 0.2715223161888118, |
|
"learning_rate": 6.877855885877154e-06, |
|
"loss": 1.0488, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.9250985545335086, |
|
"grad_norm": 0.2692563426788601, |
|
"learning_rate": 6.853656529418733e-06, |
|
"loss": 1.0666, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 1.9272886552781427, |
|
"grad_norm": 0.2637247892608681, |
|
"learning_rate": 6.82947760540771e-06, |
|
"loss": 1.0316, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.929478756022777, |
|
"grad_norm": 0.2631628957902492, |
|
"learning_rate": 6.805319270862736e-06, |
|
"loss": 1.0568, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 1.9316688567674114, |
|
"grad_norm": 0.2639983365666047, |
|
"learning_rate": 6.7811816826687425e-06, |
|
"loss": 1.0564, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.9338589575120455, |
|
"grad_norm": 0.26678744265203513, |
|
"learning_rate": 6.757064997575944e-06, |
|
"loss": 1.0738, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 1.9360490582566798, |
|
"grad_norm": 0.276712279089104, |
|
"learning_rate": 6.732969372198808e-06, |
|
"loss": 1.0814, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.9382391590013142, |
|
"grad_norm": 0.25482014051465834, |
|
"learning_rate": 6.708894963015034e-06, |
|
"loss": 1.0412, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.9404292597459483, |
|
"grad_norm": 0.2632248605954728, |
|
"learning_rate": 6.684841926364547e-06, |
|
"loss": 1.0496, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.9426193604905826, |
|
"grad_norm": 0.2612596534133882, |
|
"learning_rate": 6.660810418448475e-06, |
|
"loss": 1.0391, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 1.944809461235217, |
|
"grad_norm": 0.28939889023140064, |
|
"learning_rate": 6.63680059532814e-06, |
|
"loss": 1.0693, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.946999561979851, |
|
"grad_norm": 0.2586881368868953, |
|
"learning_rate": 6.612812612924051e-06, |
|
"loss": 1.016, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 1.9491896627244854, |
|
"grad_norm": 0.27522451220029226, |
|
"learning_rate": 6.58884662701486e-06, |
|
"loss": 1.0527, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.9513797634691197, |
|
"grad_norm": 0.2598623624479845, |
|
"learning_rate": 6.564902793236398e-06, |
|
"loss": 1.0336, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 1.9535698642137538, |
|
"grad_norm": 0.27748246484682554, |
|
"learning_rate": 6.540981267080624e-06, |
|
"loss": 1.0297, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.955759964958388, |
|
"grad_norm": 0.2685656534630219, |
|
"learning_rate": 6.517082203894636e-06, |
|
"loss": 1.0537, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 1.9579500657030224, |
|
"grad_norm": 0.26891368891515, |
|
"learning_rate": 6.493205758879657e-06, |
|
"loss": 1.0734, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.9601401664476565, |
|
"grad_norm": 0.2674766199421492, |
|
"learning_rate": 6.469352087090022e-06, |
|
"loss": 1.0562, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.9623302671922909, |
|
"grad_norm": 0.25788874477297874, |
|
"learning_rate": 6.445521343432189e-06, |
|
"loss": 1.0404, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.9645203679369252, |
|
"grad_norm": 0.2819872242228695, |
|
"learning_rate": 6.421713682663697e-06, |
|
"loss": 1.0604, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 1.9667104686815593, |
|
"grad_norm": 0.27010244363463976, |
|
"learning_rate": 6.397929259392208e-06, |
|
"loss": 1.0295, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.9689005694261936, |
|
"grad_norm": 0.25928825181925214, |
|
"learning_rate": 6.374168228074471e-06, |
|
"loss": 1.0434, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 1.971090670170828, |
|
"grad_norm": 0.2670727572943277, |
|
"learning_rate": 6.3504307430153165e-06, |
|
"loss": 1.0598, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.973280770915462, |
|
"grad_norm": 0.27662824636944733, |
|
"learning_rate": 6.326716958366676e-06, |
|
"loss": 1.052, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 1.9754708716600964, |
|
"grad_norm": 0.2645319859255144, |
|
"learning_rate": 6.3030270281265745e-06, |
|
"loss": 1.0461, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.9776609724047307, |
|
"grad_norm": 0.27311415864779, |
|
"learning_rate": 6.27936110613811e-06, |
|
"loss": 1.0367, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 1.9798510731493648, |
|
"grad_norm": 0.2640788171682172, |
|
"learning_rate": 6.255719346088483e-06, |
|
"loss": 1.068, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.9820411738939991, |
|
"grad_norm": 0.26269876183491886, |
|
"learning_rate": 6.232101901507978e-06, |
|
"loss": 1.0424, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.9842312746386335, |
|
"grad_norm": 0.2666587948643593, |
|
"learning_rate": 6.20850892576898e-06, |
|
"loss": 1.0307, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.9864213753832676, |
|
"grad_norm": 0.25956508969478437, |
|
"learning_rate": 6.184940572084972e-06, |
|
"loss": 1.048, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 1.988611476127902, |
|
"grad_norm": 0.2716220580126288, |
|
"learning_rate": 6.16139699350953e-06, |
|
"loss": 1.0408, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.9908015768725362, |
|
"grad_norm": 0.27436626928192, |
|
"learning_rate": 6.1378783429353596e-06, |
|
"loss": 1.0406, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 1.9929916776171703, |
|
"grad_norm": 0.267050414240659, |
|
"learning_rate": 6.114384773093265e-06, |
|
"loss": 1.0527, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.9951817783618047, |
|
"grad_norm": 0.2791092291038011, |
|
"learning_rate": 6.090916436551185e-06, |
|
"loss": 1.0594, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 1.997371879106439, |
|
"grad_norm": 0.2536778001853949, |
|
"learning_rate": 6.067473485713191e-06, |
|
"loss": 1.0494, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.999561979851073, |
|
"grad_norm": 0.26436103598489785, |
|
"learning_rate": 6.044056072818495e-06, |
|
"loss": 1.05, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 2.001752080595707, |
|
"grad_norm": 0.26093422619007045, |
|
"learning_rate": 6.020664349940471e-06, |
|
"loss": 1.0168, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.0039421813403417, |
|
"grad_norm": 0.2689011008218334, |
|
"learning_rate": 5.997298468985652e-06, |
|
"loss": 1.0332, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.006132282084976, |
|
"grad_norm": 0.27970037489217886, |
|
"learning_rate": 5.973958581692765e-06, |
|
"loss": 1.0086, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.00832238282961, |
|
"grad_norm": 0.257258807469164, |
|
"learning_rate": 5.950644839631727e-06, |
|
"loss": 1.0227, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 2.0105124835742445, |
|
"grad_norm": 0.2623857732646142, |
|
"learning_rate": 5.927357394202665e-06, |
|
"loss": 1.0256, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.0127025843188786, |
|
"grad_norm": 0.26667045216828666, |
|
"learning_rate": 5.904096396634935e-06, |
|
"loss": 0.9852, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 2.0148926850635127, |
|
"grad_norm": 0.2654589837312285, |
|
"learning_rate": 5.880861997986151e-06, |
|
"loss": 1.0217, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.0170827858081473, |
|
"grad_norm": 0.2622730969078804, |
|
"learning_rate": 5.8576543491411734e-06, |
|
"loss": 1.0072, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 2.0192728865527814, |
|
"grad_norm": 0.25609804027452565, |
|
"learning_rate": 5.834473600811168e-06, |
|
"loss": 0.9986, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.0214629872974155, |
|
"grad_norm": 0.26444734638461304, |
|
"learning_rate": 5.8113199035325885e-06, |
|
"loss": 0.9914, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 2.02365308804205, |
|
"grad_norm": 0.2759313276801203, |
|
"learning_rate": 5.788193407666234e-06, |
|
"loss": 1.0354, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.025843188786684, |
|
"grad_norm": 0.26725617714670247, |
|
"learning_rate": 5.765094263396257e-06, |
|
"loss": 1.0139, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.0280332895313182, |
|
"grad_norm": 0.2664407521165841, |
|
"learning_rate": 5.742022620729177e-06, |
|
"loss": 1.0203, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.0302233902759528, |
|
"grad_norm": 0.2610201181123097, |
|
"learning_rate": 5.718978629492924e-06, |
|
"loss": 1.0201, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 2.032413491020587, |
|
"grad_norm": 0.2709130624912754, |
|
"learning_rate": 5.695962439335853e-06, |
|
"loss": 1.0299, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.034603591765221, |
|
"grad_norm": 0.2685235958201942, |
|
"learning_rate": 5.672974199725785e-06, |
|
"loss": 1.0191, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 2.0367936925098555, |
|
"grad_norm": 0.26748654369266783, |
|
"learning_rate": 5.650014059949033e-06, |
|
"loss": 0.982, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.0389837932544896, |
|
"grad_norm": 0.2605321089033867, |
|
"learning_rate": 5.627082169109417e-06, |
|
"loss": 1.0314, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 2.0411738939991237, |
|
"grad_norm": 0.2587831303240616, |
|
"learning_rate": 5.604178676127314e-06, |
|
"loss": 1.0309, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.0433639947437583, |
|
"grad_norm": 0.2756781260939094, |
|
"learning_rate": 5.581303729738681e-06, |
|
"loss": 1.0369, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 2.0455540954883924, |
|
"grad_norm": 0.26243765749849607, |
|
"learning_rate": 5.558457478494096e-06, |
|
"loss": 1.0271, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.0477441962330265, |
|
"grad_norm": 0.2674529202379436, |
|
"learning_rate": 5.5356400707578e-06, |
|
"loss": 1.0256, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.049934296977661, |
|
"grad_norm": 0.26296862723387604, |
|
"learning_rate": 5.512851654706694e-06, |
|
"loss": 1.0414, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.052124397722295, |
|
"grad_norm": 0.27045918514005246, |
|
"learning_rate": 5.490092378329441e-06, |
|
"loss": 1.0229, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 2.0543144984669293, |
|
"grad_norm": 0.27477672949598386, |
|
"learning_rate": 5.467362389425441e-06, |
|
"loss": 1.0129, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.056504599211564, |
|
"grad_norm": 0.2805167534895326, |
|
"learning_rate": 5.444661835603925e-06, |
|
"loss": 1.0311, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 2.058694699956198, |
|
"grad_norm": 0.26414962391577573, |
|
"learning_rate": 5.421990864282953e-06, |
|
"loss": 1.0109, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.060884800700832, |
|
"grad_norm": 0.27441404134914826, |
|
"learning_rate": 5.399349622688479e-06, |
|
"loss": 1.0135, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 2.0630749014454666, |
|
"grad_norm": 0.2579083757623819, |
|
"learning_rate": 5.3767382578534e-06, |
|
"loss": 0.9945, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.0652650021901007, |
|
"grad_norm": 0.26992308351772354, |
|
"learning_rate": 5.354156916616589e-06, |
|
"loss": 1.0139, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 2.0674551029347352, |
|
"grad_norm": 0.27138173828265577, |
|
"learning_rate": 5.331605745621938e-06, |
|
"loss": 1.0236, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.0696452036793693, |
|
"grad_norm": 0.2643761326181544, |
|
"learning_rate": 5.30908489131742e-06, |
|
"loss": 1.0051, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.0718353044240034, |
|
"grad_norm": 0.2701535434252902, |
|
"learning_rate": 5.286594499954121e-06, |
|
"loss": 1.0361, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.074025405168638, |
|
"grad_norm": 0.2792019275770248, |
|
"learning_rate": 5.264134717585313e-06, |
|
"loss": 0.9674, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 2.076215505913272, |
|
"grad_norm": 0.2731806421275931, |
|
"learning_rate": 5.241705690065489e-06, |
|
"loss": 1.0164, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.078405606657906, |
|
"grad_norm": 0.28547898058692295, |
|
"learning_rate": 5.219307563049411e-06, |
|
"loss": 1.0262, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 2.0805957074025407, |
|
"grad_norm": 0.26085931171959603, |
|
"learning_rate": 5.196940481991179e-06, |
|
"loss": 1.0084, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.082785808147175, |
|
"grad_norm": 0.2819568795934252, |
|
"learning_rate": 5.174604592143273e-06, |
|
"loss": 1.0195, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 2.084975908891809, |
|
"grad_norm": 0.25860822102705483, |
|
"learning_rate": 5.152300038555624e-06, |
|
"loss": 1.0324, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.0871660096364435, |
|
"grad_norm": 0.2672824104490307, |
|
"learning_rate": 5.130026966074671e-06, |
|
"loss": 1.0164, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 2.0893561103810776, |
|
"grad_norm": 0.2659892744135754, |
|
"learning_rate": 5.107785519342388e-06, |
|
"loss": 1.0213, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.0915462111257117, |
|
"grad_norm": 0.26571378582465105, |
|
"learning_rate": 5.085575842795398e-06, |
|
"loss": 1.0381, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.0937363118703463, |
|
"grad_norm": 0.2688782038959789, |
|
"learning_rate": 5.06339808066399e-06, |
|
"loss": 1.0475, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.0959264126149804, |
|
"grad_norm": 0.26362420646124834, |
|
"learning_rate": 5.041252376971213e-06, |
|
"loss": 1.0102, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 2.0981165133596145, |
|
"grad_norm": 0.27101641391517345, |
|
"learning_rate": 5.019138875531917e-06, |
|
"loss": 1.0389, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.100306614104249, |
|
"grad_norm": 0.2576814050017412, |
|
"learning_rate": 4.99705771995183e-06, |
|
"loss": 1.0223, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 2.102496714848883, |
|
"grad_norm": 0.27045735827617223, |
|
"learning_rate": 4.975009053626634e-06, |
|
"loss": 1.0145, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.1046868155935172, |
|
"grad_norm": 0.26113258147724255, |
|
"learning_rate": 4.952993019741013e-06, |
|
"loss": 1.0297, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 2.1068769163381518, |
|
"grad_norm": 0.27448549506036557, |
|
"learning_rate": 4.93100976126775e-06, |
|
"loss": 1.0096, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.109067017082786, |
|
"grad_norm": 0.27280933957719383, |
|
"learning_rate": 4.909059420966768e-06, |
|
"loss": 1.0092, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 2.11125711782742, |
|
"grad_norm": 0.264506277380279, |
|
"learning_rate": 4.887142141384222e-06, |
|
"loss": 1.0426, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.1134472185720545, |
|
"grad_norm": 0.2770787791773093, |
|
"learning_rate": 4.865258064851579e-06, |
|
"loss": 1.0316, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.1156373193166886, |
|
"grad_norm": 0.2557758077437797, |
|
"learning_rate": 4.8434073334846785e-06, |
|
"loss": 1.0371, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.1178274200613227, |
|
"grad_norm": 0.2610392778424005, |
|
"learning_rate": 4.821590089182814e-06, |
|
"loss": 1.0043, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 2.1200175208059573, |
|
"grad_norm": 0.26958938148423095, |
|
"learning_rate": 4.799806473627813e-06, |
|
"loss": 1.0297, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.1222076215505914, |
|
"grad_norm": 0.2660024950094293, |
|
"learning_rate": 4.778056628283114e-06, |
|
"loss": 0.9949, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 2.1243977222952255, |
|
"grad_norm": 0.2703860686810657, |
|
"learning_rate": 4.756340694392863e-06, |
|
"loss": 1.0152, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.12658782303986, |
|
"grad_norm": 0.2726552697268506, |
|
"learning_rate": 4.734658812980973e-06, |
|
"loss": 1.0508, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 2.128777923784494, |
|
"grad_norm": 0.2606367742943384, |
|
"learning_rate": 4.7130111248502175e-06, |
|
"loss": 1.008, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.1309680245291283, |
|
"grad_norm": 0.26232587434114535, |
|
"learning_rate": 4.6913977705813296e-06, |
|
"loss": 1.0148, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 2.133158125273763, |
|
"grad_norm": 0.27500445217893804, |
|
"learning_rate": 4.6698188905320615e-06, |
|
"loss": 1.0271, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.135348226018397, |
|
"grad_norm": 0.26713378585912395, |
|
"learning_rate": 4.648274624836309e-06, |
|
"loss": 1.0221, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.137538326763031, |
|
"grad_norm": 0.2762560729124893, |
|
"learning_rate": 4.626765113403161e-06, |
|
"loss": 1.0234, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.1397284275076656, |
|
"grad_norm": 0.2689363110437547, |
|
"learning_rate": 4.605290495916018e-06, |
|
"loss": 1.0357, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 2.1419185282522997, |
|
"grad_norm": 0.26265385480996006, |
|
"learning_rate": 4.5838509118316896e-06, |
|
"loss": 1.0123, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.1441086289969338, |
|
"grad_norm": 0.26738349884810464, |
|
"learning_rate": 4.5624465003794575e-06, |
|
"loss": 1.0416, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 2.1462987297415683, |
|
"grad_norm": 0.2641315930523429, |
|
"learning_rate": 4.54107740056021e-06, |
|
"loss": 1.0109, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.1484888304862024, |
|
"grad_norm": 0.2759532152378268, |
|
"learning_rate": 4.51974375114551e-06, |
|
"loss": 1.035, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 2.1506789312308365, |
|
"grad_norm": 0.2711223796119766, |
|
"learning_rate": 4.4984456906767e-06, |
|
"loss": 1.0186, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.152869031975471, |
|
"grad_norm": 0.2622897744480132, |
|
"learning_rate": 4.477183357464021e-06, |
|
"loss": 1.0242, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 2.155059132720105, |
|
"grad_norm": 0.2750413280265702, |
|
"learning_rate": 4.455956889585698e-06, |
|
"loss": 1.0242, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.1572492334647393, |
|
"grad_norm": 0.2756527614489596, |
|
"learning_rate": 4.434766424887024e-06, |
|
"loss": 1.0461, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.159439334209374, |
|
"grad_norm": 0.2701065410839812, |
|
"learning_rate": 4.413612100979516e-06, |
|
"loss": 1.0447, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.161629434954008, |
|
"grad_norm": 0.26414864633307716, |
|
"learning_rate": 4.392494055239971e-06, |
|
"loss": 1.0115, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 2.163819535698642, |
|
"grad_norm": 0.2597662178542703, |
|
"learning_rate": 4.371412424809607e-06, |
|
"loss": 0.9982, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.1660096364432766, |
|
"grad_norm": 0.265049318136912, |
|
"learning_rate": 4.350367346593151e-06, |
|
"loss": 1.0076, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 2.1681997371879107, |
|
"grad_norm": 0.26480844249657876, |
|
"learning_rate": 4.3293589572579585e-06, |
|
"loss": 1.0127, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.170389837932545, |
|
"grad_norm": 0.26650586635339474, |
|
"learning_rate": 4.30838739323314e-06, |
|
"loss": 1.0211, |
|
"step": 4955 |
|
}, |
|
{ |
|
"epoch": 2.1725799386771794, |
|
"grad_norm": 0.2615690992438174, |
|
"learning_rate": 4.287452790708641e-06, |
|
"loss": 1.0082, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.1747700394218135, |
|
"grad_norm": 0.26000121930930187, |
|
"learning_rate": 4.266555285634398e-06, |
|
"loss": 1.0309, |
|
"step": 4965 |
|
}, |
|
{ |
|
"epoch": 2.1769601401664476, |
|
"grad_norm": 0.2651977563284172, |
|
"learning_rate": 4.245695013719421e-06, |
|
"loss": 1.01, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.179150240911082, |
|
"grad_norm": 0.26742093385668575, |
|
"learning_rate": 4.224872110430929e-06, |
|
"loss": 1.0119, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.181340341655716, |
|
"grad_norm": 0.2685843051356311, |
|
"learning_rate": 4.204086710993477e-06, |
|
"loss": 0.9998, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.1835304424003503, |
|
"grad_norm": 0.2741238172517032, |
|
"learning_rate": 4.1833389503880574e-06, |
|
"loss": 1.0191, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 2.185720543144985, |
|
"grad_norm": 0.26609648886123183, |
|
"learning_rate": 4.162628963351245e-06, |
|
"loss": 1.0188, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.187910643889619, |
|
"grad_norm": 0.26640975647603027, |
|
"learning_rate": 4.141956884374304e-06, |
|
"loss": 1.034, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 2.190100744634253, |
|
"grad_norm": 0.2616553108981328, |
|
"learning_rate": 4.1213228477023235e-06, |
|
"loss": 1.007, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.1922908453788876, |
|
"grad_norm": 0.25930380462776687, |
|
"learning_rate": 4.100726987333353e-06, |
|
"loss": 1.0047, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 2.1944809461235217, |
|
"grad_norm": 0.26376308398121895, |
|
"learning_rate": 4.080169437017516e-06, |
|
"loss": 1.017, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.196671046868156, |
|
"grad_norm": 0.2708631621391155, |
|
"learning_rate": 4.059650330256145e-06, |
|
"loss": 1.0291, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 2.1988611476127904, |
|
"grad_norm": 0.26235140363314685, |
|
"learning_rate": 4.039169800300933e-06, |
|
"loss": 1.0307, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.2010512483574245, |
|
"grad_norm": 0.27081362352421917, |
|
"learning_rate": 4.018727980153041e-06, |
|
"loss": 1.0273, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.2032413491020586, |
|
"grad_norm": 0.2680987698793304, |
|
"learning_rate": 3.998325002562258e-06, |
|
"loss": 1.0236, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.205431449846693, |
|
"grad_norm": 0.2723369033529761, |
|
"learning_rate": 3.977961000026119e-06, |
|
"loss": 1.033, |
|
"step": 5035 |
|
}, |
|
{ |
|
"epoch": 2.2076215505913273, |
|
"grad_norm": 0.27329071897001517, |
|
"learning_rate": 3.957636104789056e-06, |
|
"loss": 1.0227, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.2098116513359614, |
|
"grad_norm": 0.266253622886688, |
|
"learning_rate": 3.937350448841544e-06, |
|
"loss": 1.0, |
|
"step": 5045 |
|
}, |
|
{ |
|
"epoch": 2.212001752080596, |
|
"grad_norm": 0.2845347733345193, |
|
"learning_rate": 3.917104163919226e-06, |
|
"loss": 1.0328, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.21419185282523, |
|
"grad_norm": 0.2671087468266786, |
|
"learning_rate": 3.896897381502081e-06, |
|
"loss": 1.0188, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 2.216381953569864, |
|
"grad_norm": 0.2647712357087782, |
|
"learning_rate": 3.8767302328135456e-06, |
|
"loss": 1.015, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.2185720543144987, |
|
"grad_norm": 0.2647110909009258, |
|
"learning_rate": 3.856602848819677e-06, |
|
"loss": 1.0416, |
|
"step": 5065 |
|
}, |
|
{ |
|
"epoch": 2.2207621550591328, |
|
"grad_norm": 0.26237178804400635, |
|
"learning_rate": 3.836515360228308e-06, |
|
"loss": 1.0424, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.222952255803767, |
|
"grad_norm": 0.2658640348878226, |
|
"learning_rate": 3.816467897488175e-06, |
|
"loss": 1.0449, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.2251423565484014, |
|
"grad_norm": 0.2601983313467295, |
|
"learning_rate": 3.7964605907881014e-06, |
|
"loss": 1.0, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.2273324572930355, |
|
"grad_norm": 0.27724490452236145, |
|
"learning_rate": 3.7764935700561222e-06, |
|
"loss": 1.0033, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 2.2295225580376696, |
|
"grad_norm": 0.28674009424588764, |
|
"learning_rate": 3.7565669649586577e-06, |
|
"loss": 1.0377, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.231712658782304, |
|
"grad_norm": 0.26041573255312417, |
|
"learning_rate": 3.7366809048996755e-06, |
|
"loss": 1.0219, |
|
"step": 5095 |
|
}, |
|
{ |
|
"epoch": 2.2339027595269383, |
|
"grad_norm": 0.26439782352388286, |
|
"learning_rate": 3.7168355190198336e-06, |
|
"loss": 1.0268, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.2360928602715724, |
|
"grad_norm": 0.2666477044206433, |
|
"learning_rate": 3.6970309361956514e-06, |
|
"loss": 1.0215, |
|
"step": 5105 |
|
}, |
|
{ |
|
"epoch": 2.238282961016207, |
|
"grad_norm": 0.27347497537887744, |
|
"learning_rate": 3.6772672850386803e-06, |
|
"loss": 0.9965, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.240473061760841, |
|
"grad_norm": 0.2733236300572501, |
|
"learning_rate": 3.657544693894648e-06, |
|
"loss": 1.0197, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 2.242663162505475, |
|
"grad_norm": 0.27287749992945903, |
|
"learning_rate": 3.6378632908426504e-06, |
|
"loss": 1.034, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.2448532632501097, |
|
"grad_norm": 0.2585480868756628, |
|
"learning_rate": 3.6182232036942967e-06, |
|
"loss": 1.0184, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.247043363994744, |
|
"grad_norm": 0.26361194041457353, |
|
"learning_rate": 3.5986245599928903e-06, |
|
"loss": 1.0135, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.249233464739378, |
|
"grad_norm": 0.2662253344158049, |
|
"learning_rate": 3.5790674870126073e-06, |
|
"loss": 1.0063, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 2.2514235654840125, |
|
"grad_norm": 0.26616630879092884, |
|
"learning_rate": 3.5595521117576513e-06, |
|
"loss": 1.0043, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.2536136662286466, |
|
"grad_norm": 0.2736827752131189, |
|
"learning_rate": 3.540078560961452e-06, |
|
"loss": 1.0125, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 2.2558037669732807, |
|
"grad_norm": 0.2710314880260435, |
|
"learning_rate": 3.5206469610858207e-06, |
|
"loss": 1.0432, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.257993867717915, |
|
"grad_norm": 0.268667093843008, |
|
"learning_rate": 3.501257438320136e-06, |
|
"loss": 1.0404, |
|
"step": 5155 |
|
}, |
|
{ |
|
"epoch": 2.2601839684625493, |
|
"grad_norm": 0.2723797726229181, |
|
"learning_rate": 3.4819101185805383e-06, |
|
"loss": 1.0123, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.2623740692071834, |
|
"grad_norm": 0.2603114146629379, |
|
"learning_rate": 3.4626051275090865e-06, |
|
"loss": 0.9975, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 2.264564169951818, |
|
"grad_norm": 0.27790374707917465, |
|
"learning_rate": 3.44334259047297e-06, |
|
"loss": 1.0217, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.266754270696452, |
|
"grad_norm": 0.25899914233411553, |
|
"learning_rate": 3.424122632563669e-06, |
|
"loss": 1.0504, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.268944371441086, |
|
"grad_norm": 0.26446806135188367, |
|
"learning_rate": 3.404945378596155e-06, |
|
"loss": 1.0127, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.2711344721857207, |
|
"grad_norm": 0.2612602747990249, |
|
"learning_rate": 3.3858109531080887e-06, |
|
"loss": 1.0156, |
|
"step": 5185 |
|
}, |
|
{ |
|
"epoch": 2.273324572930355, |
|
"grad_norm": 0.26445200917852263, |
|
"learning_rate": 3.3667194803589885e-06, |
|
"loss": 1.0119, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.275514673674989, |
|
"grad_norm": 0.28094575273309763, |
|
"learning_rate": 3.3476710843294415e-06, |
|
"loss": 1.0281, |
|
"step": 5195 |
|
}, |
|
{ |
|
"epoch": 2.2777047744196235, |
|
"grad_norm": 0.26736737190018384, |
|
"learning_rate": 3.3286658887202994e-06, |
|
"loss": 1.0037, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.2798948751642576, |
|
"grad_norm": 0.2640795352734308, |
|
"learning_rate": 3.3097040169518567e-06, |
|
"loss": 1.0406, |
|
"step": 5205 |
|
}, |
|
{ |
|
"epoch": 2.2820849759088917, |
|
"grad_norm": 0.26426495072100187, |
|
"learning_rate": 3.2907855921630747e-06, |
|
"loss": 1.033, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.2842750766535262, |
|
"grad_norm": 0.26566279121236197, |
|
"learning_rate": 3.2719107372107615e-06, |
|
"loss": 1.0197, |
|
"step": 5215 |
|
}, |
|
{ |
|
"epoch": 2.2864651773981604, |
|
"grad_norm": 0.2653552548457873, |
|
"learning_rate": 3.253079574668777e-06, |
|
"loss": 1.0006, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.2886552781427945, |
|
"grad_norm": 0.26398157254205407, |
|
"learning_rate": 3.234292226827255e-06, |
|
"loss": 0.9961, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.290845378887429, |
|
"grad_norm": 0.2768441293910038, |
|
"learning_rate": 3.215548815691779e-06, |
|
"loss": 1.0348, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.293035479632063, |
|
"grad_norm": 0.26131639882439045, |
|
"learning_rate": 3.196849462982622e-06, |
|
"loss": 1.0018, |
|
"step": 5235 |
|
}, |
|
{ |
|
"epoch": 2.295225580376697, |
|
"grad_norm": 0.2722287901413304, |
|
"learning_rate": 3.178194290133929e-06, |
|
"loss": 1.0232, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.2974156811213318, |
|
"grad_norm": 0.2761279729955395, |
|
"learning_rate": 3.1595834182929396e-06, |
|
"loss": 1.0309, |
|
"step": 5245 |
|
}, |
|
{ |
|
"epoch": 2.299605781865966, |
|
"grad_norm": 0.264451768689309, |
|
"learning_rate": 3.1410169683192114e-06, |
|
"loss": 1.0291, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.3017958826106, |
|
"grad_norm": 0.2720396323147097, |
|
"learning_rate": 3.1224950607838147e-06, |
|
"loss": 1.0318, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 2.3039859833552345, |
|
"grad_norm": 0.2659690024816963, |
|
"learning_rate": 3.1040178159685675e-06, |
|
"loss": 1.0279, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.3061760840998686, |
|
"grad_norm": 0.26752050191400945, |
|
"learning_rate": 3.08558535386524e-06, |
|
"loss": 1.0295, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 2.3083661848445027, |
|
"grad_norm": 0.26719252797672, |
|
"learning_rate": 3.0671977941747843e-06, |
|
"loss": 1.0074, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.3105562855891373, |
|
"grad_norm": 0.27119586183987965, |
|
"learning_rate": 3.048855256306562e-06, |
|
"loss": 0.9959, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.3127463863337714, |
|
"grad_norm": 0.2599492206066672, |
|
"learning_rate": 3.030557859377551e-06, |
|
"loss": 1.027, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.3149364870784055, |
|
"grad_norm": 0.2622696675757602, |
|
"learning_rate": 3.0123057222115835e-06, |
|
"loss": 1.015, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 2.31712658782304, |
|
"grad_norm": 0.26724171900385185, |
|
"learning_rate": 2.9940989633385866e-06, |
|
"loss": 1.0105, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.319316688567674, |
|
"grad_norm": 0.26121450618999226, |
|
"learning_rate": 2.9759377009937807e-06, |
|
"loss": 1.017, |
|
"step": 5295 |
|
}, |
|
{ |
|
"epoch": 2.3215067893123082, |
|
"grad_norm": 0.2705484002824616, |
|
"learning_rate": 2.957822053116948e-06, |
|
"loss": 1.0334, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.323696890056943, |
|
"grad_norm": 0.27808085682505257, |
|
"learning_rate": 2.9397521373516357e-06, |
|
"loss": 1.0045, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 2.325886990801577, |
|
"grad_norm": 0.26449757221896497, |
|
"learning_rate": 2.9217280710444084e-06, |
|
"loss": 1.0076, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.328077091546211, |
|
"grad_norm": 0.2681810130131999, |
|
"learning_rate": 2.9037499712440907e-06, |
|
"loss": 1.0102, |
|
"step": 5315 |
|
}, |
|
{ |
|
"epoch": 2.3302671922908456, |
|
"grad_norm": 0.267694962057186, |
|
"learning_rate": 2.885817954700988e-06, |
|
"loss": 1.0125, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.3324572930354797, |
|
"grad_norm": 0.27046123405151434, |
|
"learning_rate": 2.8679321378661506e-06, |
|
"loss": 1.0197, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.3346473937801138, |
|
"grad_norm": 0.2602187883267958, |
|
"learning_rate": 2.850092636890599e-06, |
|
"loss": 1.0006, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.3368374945247483, |
|
"grad_norm": 0.2697969479776123, |
|
"learning_rate": 2.8322995676245756e-06, |
|
"loss": 1.0207, |
|
"step": 5335 |
|
}, |
|
{ |
|
"epoch": 2.3390275952693824, |
|
"grad_norm": 0.26593532687029453, |
|
"learning_rate": 2.8145530456168034e-06, |
|
"loss": 1.0064, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.3412176960140165, |
|
"grad_norm": 0.260443619701446, |
|
"learning_rate": 2.796853186113715e-06, |
|
"loss": 1.0381, |
|
"step": 5345 |
|
}, |
|
{ |
|
"epoch": 2.343407796758651, |
|
"grad_norm": 0.2656577780398318, |
|
"learning_rate": 2.7792001040587256e-06, |
|
"loss": 1.0186, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.345597897503285, |
|
"grad_norm": 0.26695059912132757, |
|
"learning_rate": 2.7615939140914704e-06, |
|
"loss": 0.9973, |
|
"step": 5355 |
|
}, |
|
{ |
|
"epoch": 2.3477879982479193, |
|
"grad_norm": 0.2698545962387733, |
|
"learning_rate": 2.7440347305470604e-06, |
|
"loss": 1.0262, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.349978098992554, |
|
"grad_norm": 0.2621284105960352, |
|
"learning_rate": 2.7265226674553613e-06, |
|
"loss": 1.0186, |
|
"step": 5365 |
|
}, |
|
{ |
|
"epoch": 2.352168199737188, |
|
"grad_norm": 0.2629369792774262, |
|
"learning_rate": 2.7090578385402243e-06, |
|
"loss": 0.9812, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.354358300481822, |
|
"grad_norm": 0.26223979769748595, |
|
"learning_rate": 2.691640357218759e-06, |
|
"loss": 1.0314, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.3565484012264566, |
|
"grad_norm": 0.2638015005792946, |
|
"learning_rate": 2.674270336600612e-06, |
|
"loss": 1.024, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.3587385019710907, |
|
"grad_norm": 0.2643561856353618, |
|
"learning_rate": 2.6569478894872014e-06, |
|
"loss": 1.0102, |
|
"step": 5385 |
|
}, |
|
{ |
|
"epoch": 2.360928602715725, |
|
"grad_norm": 0.27732040588272344, |
|
"learning_rate": 2.6396731283710185e-06, |
|
"loss": 1.0422, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.3631187034603593, |
|
"grad_norm": 0.2644682180760317, |
|
"learning_rate": 2.6224461654348686e-06, |
|
"loss": 1.0139, |
|
"step": 5395 |
|
}, |
|
{ |
|
"epoch": 2.3653088042049935, |
|
"grad_norm": 0.26210054764945073, |
|
"learning_rate": 2.605267112551154e-06, |
|
"loss": 1.0309, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.3674989049496276, |
|
"grad_norm": 0.2709828298883479, |
|
"learning_rate": 2.5881360812811584e-06, |
|
"loss": 1.0332, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 2.369689005694262, |
|
"grad_norm": 0.2730880205333291, |
|
"learning_rate": 2.571053182874298e-06, |
|
"loss": 1.0281, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.371879106438896, |
|
"grad_norm": 0.2596551205971999, |
|
"learning_rate": 2.5540185282674247e-06, |
|
"loss": 1.0342, |
|
"step": 5415 |
|
}, |
|
{ |
|
"epoch": 2.3740692071835303, |
|
"grad_norm": 0.2695558996090938, |
|
"learning_rate": 2.5370322280840863e-06, |
|
"loss": 0.9879, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.376259307928165, |
|
"grad_norm": 0.26055214066353855, |
|
"learning_rate": 2.520094392633815e-06, |
|
"loss": 1.0318, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.378449408672799, |
|
"grad_norm": 0.26376924748943736, |
|
"learning_rate": 2.5032051319114203e-06, |
|
"loss": 0.984, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.380639509417433, |
|
"grad_norm": 0.2659522914893529, |
|
"learning_rate": 2.4863645555962535e-06, |
|
"loss": 1.0074, |
|
"step": 5435 |
|
}, |
|
{ |
|
"epoch": 2.3828296101620676, |
|
"grad_norm": 0.26652092795558513, |
|
"learning_rate": 2.469572773051524e-06, |
|
"loss": 1.0002, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.3850197109067017, |
|
"grad_norm": 0.2721077024859027, |
|
"learning_rate": 2.4528298933235504e-06, |
|
"loss": 1.0012, |
|
"step": 5445 |
|
}, |
|
{ |
|
"epoch": 2.387209811651336, |
|
"grad_norm": 0.2714038096859095, |
|
"learning_rate": 2.4361360251410958e-06, |
|
"loss": 1.0395, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.3893999123959704, |
|
"grad_norm": 0.259937301986236, |
|
"learning_rate": 2.419491276914635e-06, |
|
"loss": 0.9941, |
|
"step": 5455 |
|
}, |
|
{ |
|
"epoch": 2.3915900131406045, |
|
"grad_norm": 0.2693725837181157, |
|
"learning_rate": 2.4028957567356504e-06, |
|
"loss": 1.0162, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.3937801138852386, |
|
"grad_norm": 0.2614519634595169, |
|
"learning_rate": 2.386349572375939e-06, |
|
"loss": 1.0273, |
|
"step": 5465 |
|
}, |
|
{ |
|
"epoch": 2.395970214629873, |
|
"grad_norm": 0.26516778440783295, |
|
"learning_rate": 2.3698528312869164e-06, |
|
"loss": 1.0143, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.3981603153745072, |
|
"grad_norm": 0.2617725830901367, |
|
"learning_rate": 2.3534056405988992e-06, |
|
"loss": 1.016, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.4003504161191414, |
|
"grad_norm": 0.26739935073726256, |
|
"learning_rate": 2.3370081071204355e-06, |
|
"loss": 1.0293, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.402540516863776, |
|
"grad_norm": 0.26968306076232107, |
|
"learning_rate": 2.3206603373375893e-06, |
|
"loss": 1.002, |
|
"step": 5485 |
|
}, |
|
{ |
|
"epoch": 2.40473061760841, |
|
"grad_norm": 0.27295936217638855, |
|
"learning_rate": 2.304362437413258e-06, |
|
"loss": 1.0021, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.406920718353044, |
|
"grad_norm": 0.2708387281457088, |
|
"learning_rate": 2.28811451318649e-06, |
|
"loss": 1.0225, |
|
"step": 5495 |
|
}, |
|
{ |
|
"epoch": 2.4091108190976787, |
|
"grad_norm": 0.26354121947186615, |
|
"learning_rate": 2.2719166701717786e-06, |
|
"loss": 1.0031, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.4113009198423128, |
|
"grad_norm": 0.2637002110660547, |
|
"learning_rate": 2.2557690135584008e-06, |
|
"loss": 1.0184, |
|
"step": 5505 |
|
}, |
|
{ |
|
"epoch": 2.413491020586947, |
|
"grad_norm": 0.2698479127999328, |
|
"learning_rate": 2.2396716482097135e-06, |
|
"loss": 1.0189, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.4156811213315814, |
|
"grad_norm": 0.26409864234783814, |
|
"learning_rate": 2.2236246786624794e-06, |
|
"loss": 1.0211, |
|
"step": 5515 |
|
}, |
|
{ |
|
"epoch": 2.4178712220762155, |
|
"grad_norm": 0.27268499867925555, |
|
"learning_rate": 2.207628209126198e-06, |
|
"loss": 1.0412, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.4200613228208496, |
|
"grad_norm": 0.2622606967406724, |
|
"learning_rate": 2.1916823434824087e-06, |
|
"loss": 1.0209, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.422251423565484, |
|
"grad_norm": 0.27635573904338784, |
|
"learning_rate": 2.175787185284042e-06, |
|
"loss": 1.0305, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.4244415243101183, |
|
"grad_norm": 0.25710396782274697, |
|
"learning_rate": 2.1599428377547137e-06, |
|
"loss": 1.0164, |
|
"step": 5535 |
|
}, |
|
{ |
|
"epoch": 2.4266316250547524, |
|
"grad_norm": 0.2633283094129851, |
|
"learning_rate": 2.144149403788092e-06, |
|
"loss": 1.0199, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.428821725799387, |
|
"grad_norm": 0.26415517310658565, |
|
"learning_rate": 2.1284069859472035e-06, |
|
"loss": 1.0223, |
|
"step": 5545 |
|
}, |
|
{ |
|
"epoch": 2.431011826544021, |
|
"grad_norm": 0.265930449211507, |
|
"learning_rate": 2.112715686463772e-06, |
|
"loss": 1.0098, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.433201927288655, |
|
"grad_norm": 0.25723984456483956, |
|
"learning_rate": 2.097075607237559e-06, |
|
"loss": 1.0248, |
|
"step": 5555 |
|
}, |
|
{ |
|
"epoch": 2.4353920280332897, |
|
"grad_norm": 0.2598804894344425, |
|
"learning_rate": 2.0814868498357042e-06, |
|
"loss": 1.0082, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.437582128777924, |
|
"grad_norm": 0.2697688309874498, |
|
"learning_rate": 2.065949515492054e-06, |
|
"loss": 1.0238, |
|
"step": 5565 |
|
}, |
|
{ |
|
"epoch": 2.439772229522558, |
|
"grad_norm": 0.27620146863759415, |
|
"learning_rate": 2.0504637051065212e-06, |
|
"loss": 1.0223, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.4419623302671924, |
|
"grad_norm": 0.26231043488916006, |
|
"learning_rate": 2.0350295192444135e-06, |
|
"loss": 1.0211, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.4441524310118266, |
|
"grad_norm": 0.2634847594383747, |
|
"learning_rate": 2.0196470581357865e-06, |
|
"loss": 1.0063, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.4463425317564607, |
|
"grad_norm": 0.2585906098421852, |
|
"learning_rate": 2.0043164216748034e-06, |
|
"loss": 1.0312, |
|
"step": 5585 |
|
}, |
|
{ |
|
"epoch": 2.448532632501095, |
|
"grad_norm": 0.2599866945351205, |
|
"learning_rate": 1.9890377094190626e-06, |
|
"loss": 1.0141, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.4507227332457293, |
|
"grad_norm": 0.26235044645446814, |
|
"learning_rate": 1.9738110205889803e-06, |
|
"loss": 1.0279, |
|
"step": 5595 |
|
}, |
|
{ |
|
"epoch": 2.4529128339903634, |
|
"grad_norm": 0.26350829783533997, |
|
"learning_rate": 1.958636454067119e-06, |
|
"loss": 1.0377, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.455102934734998, |
|
"grad_norm": 0.2697500929246074, |
|
"learning_rate": 1.9435141083975596e-06, |
|
"loss": 0.9988, |
|
"step": 5605 |
|
}, |
|
{ |
|
"epoch": 2.457293035479632, |
|
"grad_norm": 0.25991940084532544, |
|
"learning_rate": 1.9284440817852666e-06, |
|
"loss": 0.9961, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.459483136224266, |
|
"grad_norm": 0.2609678334326761, |
|
"learning_rate": 1.9134264720954353e-06, |
|
"loss": 1.0229, |
|
"step": 5615 |
|
}, |
|
{ |
|
"epoch": 2.4616732369689007, |
|
"grad_norm": 0.2563823694294751, |
|
"learning_rate": 1.8984613768528658e-06, |
|
"loss": 1.0031, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.463863337713535, |
|
"grad_norm": 0.26389657889097734, |
|
"learning_rate": 1.8835488932413272e-06, |
|
"loss": 0.9926, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.466053438458169, |
|
"grad_norm": 0.2679236979583576, |
|
"learning_rate": 1.868689118102931e-06, |
|
"loss": 1.0238, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.4682435392028035, |
|
"grad_norm": 0.2715746821595108, |
|
"learning_rate": 1.8538821479374968e-06, |
|
"loss": 1.024, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 2.4704336399474376, |
|
"grad_norm": 0.26957317386333224, |
|
"learning_rate": 1.839128078901924e-06, |
|
"loss": 1.0096, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.4726237406920717, |
|
"grad_norm": 0.2622405945557025, |
|
"learning_rate": 1.8244270068095727e-06, |
|
"loss": 1.0381, |
|
"step": 5645 |
|
}, |
|
{ |
|
"epoch": 2.4748138414367062, |
|
"grad_norm": 0.2740756090414041, |
|
"learning_rate": 1.8097790271296378e-06, |
|
"loss": 1.0223, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.4770039421813403, |
|
"grad_norm": 0.2661221374455693, |
|
"learning_rate": 1.7951842349865323e-06, |
|
"loss": 1.025, |
|
"step": 5655 |
|
}, |
|
{ |
|
"epoch": 2.4791940429259745, |
|
"grad_norm": 0.25667749629367814, |
|
"learning_rate": 1.7806427251592717e-06, |
|
"loss": 0.9939, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.481384143670609, |
|
"grad_norm": 0.2647393667585526, |
|
"learning_rate": 1.7661545920808465e-06, |
|
"loss": 1.0166, |
|
"step": 5665 |
|
}, |
|
{ |
|
"epoch": 2.483574244415243, |
|
"grad_norm": 0.2571248265493812, |
|
"learning_rate": 1.7517199298376187e-06, |
|
"loss": 1.0135, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.485764345159877, |
|
"grad_norm": 0.26602653787423514, |
|
"learning_rate": 1.7373388321687145e-06, |
|
"loss": 1.0326, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.4879544459045118, |
|
"grad_norm": 0.25813672302306245, |
|
"learning_rate": 1.7230113924654047e-06, |
|
"loss": 1.0357, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.490144546649146, |
|
"grad_norm": 0.2579671571163789, |
|
"learning_rate": 1.7087377037705121e-06, |
|
"loss": 1.0311, |
|
"step": 5685 |
|
}, |
|
{ |
|
"epoch": 2.49233464739378, |
|
"grad_norm": 0.2563802679936426, |
|
"learning_rate": 1.694517858777781e-06, |
|
"loss": 1.0252, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.4945247481384145, |
|
"grad_norm": 0.2662164827240698, |
|
"learning_rate": 1.6803519498313126e-06, |
|
"loss": 1.0135, |
|
"step": 5695 |
|
}, |
|
{ |
|
"epoch": 2.4967148488830486, |
|
"grad_norm": 0.26122565812958937, |
|
"learning_rate": 1.666240068924937e-06, |
|
"loss": 1.0414, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.4989049496276827, |
|
"grad_norm": 0.2605998958832977, |
|
"learning_rate": 1.6521823077016253e-06, |
|
"loss": 1.0152, |
|
"step": 5705 |
|
}, |
|
{ |
|
"epoch": 2.5010950503723173, |
|
"grad_norm": 0.26609122798906276, |
|
"learning_rate": 1.638178757452894e-06, |
|
"loss": 1.0201, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.5032851511169514, |
|
"grad_norm": 0.26241620338438515, |
|
"learning_rate": 1.6242295091182082e-06, |
|
"loss": 0.9906, |
|
"step": 5715 |
|
}, |
|
{ |
|
"epoch": 2.5054752518615855, |
|
"grad_norm": 0.26393016094462074, |
|
"learning_rate": 1.6103346532844011e-06, |
|
"loss": 1.0312, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.50766535260622, |
|
"grad_norm": 0.256500142243546, |
|
"learning_rate": 1.5964942801850802e-06, |
|
"loss": 1.0223, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.509855453350854, |
|
"grad_norm": 0.26878064802677665, |
|
"learning_rate": 1.5827084797000336e-06, |
|
"loss": 1.0113, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.5120455540954882, |
|
"grad_norm": 0.2707677013243957, |
|
"learning_rate": 1.5689773413546571e-06, |
|
"loss": 0.9873, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 2.514235654840123, |
|
"grad_norm": 0.26025385637699205, |
|
"learning_rate": 1.5553009543193677e-06, |
|
"loss": 1.002, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.516425755584757, |
|
"grad_norm": 0.26424240891929196, |
|
"learning_rate": 1.5416794074090258e-06, |
|
"loss": 1.0338, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 2.518615856329391, |
|
"grad_norm": 0.2715208586256904, |
|
"learning_rate": 1.5281127890823633e-06, |
|
"loss": 1.0029, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.5208059570740255, |
|
"grad_norm": 0.26863340452995355, |
|
"learning_rate": 1.514601187441399e-06, |
|
"loss": 1.0311, |
|
"step": 5755 |
|
}, |
|
{ |
|
"epoch": 2.5229960578186597, |
|
"grad_norm": 0.26339075175746157, |
|
"learning_rate": 1.5011446902308714e-06, |
|
"loss": 1.0016, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.5251861585632938, |
|
"grad_norm": 0.2780659159629088, |
|
"learning_rate": 1.4877433848376688e-06, |
|
"loss": 1.0357, |
|
"step": 5765 |
|
}, |
|
{ |
|
"epoch": 2.5273762593079283, |
|
"grad_norm": 0.2824820354932007, |
|
"learning_rate": 1.4743973582902659e-06, |
|
"loss": 1.0451, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.5295663600525624, |
|
"grad_norm": 0.26541836104789196, |
|
"learning_rate": 1.4611066972581588e-06, |
|
"loss": 1.0393, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.5317564607971965, |
|
"grad_norm": 0.26392038520386774, |
|
"learning_rate": 1.4478714880512824e-06, |
|
"loss": 1.0098, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.533946561541831, |
|
"grad_norm": 0.2670274065680639, |
|
"learning_rate": 1.4346918166194811e-06, |
|
"loss": 1.0318, |
|
"step": 5785 |
|
}, |
|
{ |
|
"epoch": 2.536136662286465, |
|
"grad_norm": 0.26637327926132287, |
|
"learning_rate": 1.4215677685519335e-06, |
|
"loss": 1.0457, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.5383267630310993, |
|
"grad_norm": 0.2718315157534932, |
|
"learning_rate": 1.4084994290765918e-06, |
|
"loss": 1.0229, |
|
"step": 5795 |
|
}, |
|
{ |
|
"epoch": 2.540516863775734, |
|
"grad_norm": 0.265091011136329, |
|
"learning_rate": 1.3954868830596391e-06, |
|
"loss": 1.0135, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.542706964520368, |
|
"grad_norm": 0.27179242757473226, |
|
"learning_rate": 1.3825302150049313e-06, |
|
"loss": 1.0445, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 2.544897065265002, |
|
"grad_norm": 0.2634719242658282, |
|
"learning_rate": 1.369629509053455e-06, |
|
"loss": 1.0252, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.5470871660096366, |
|
"grad_norm": 0.26428182587169274, |
|
"learning_rate": 1.3567848489827773e-06, |
|
"loss": 1.0268, |
|
"step": 5815 |
|
}, |
|
{ |
|
"epoch": 2.5492772667542707, |
|
"grad_norm": 0.27171679493352563, |
|
"learning_rate": 1.3439963182064996e-06, |
|
"loss": 1.0414, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.551467367498905, |
|
"grad_norm": 0.26868016597068656, |
|
"learning_rate": 1.3312639997737166e-06, |
|
"loss": 1.0059, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.5536574682435393, |
|
"grad_norm": 0.26796885929306313, |
|
"learning_rate": 1.3185879763684784e-06, |
|
"loss": 1.0229, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.5558475689881734, |
|
"grad_norm": 0.2747441448985468, |
|
"learning_rate": 1.3059683303092574e-06, |
|
"loss": 1.0383, |
|
"step": 5835 |
|
}, |
|
{ |
|
"epoch": 2.5580376697328076, |
|
"grad_norm": 0.2656630564773151, |
|
"learning_rate": 1.2934051435484107e-06, |
|
"loss": 1.017, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.560227770477442, |
|
"grad_norm": 0.2714058989519959, |
|
"learning_rate": 1.2808984976716421e-06, |
|
"loss": 1.0297, |
|
"step": 5845 |
|
}, |
|
{ |
|
"epoch": 2.562417871222076, |
|
"grad_norm": 0.2672456887749402, |
|
"learning_rate": 1.2684484738974778e-06, |
|
"loss": 1.0324, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.5646079719667103, |
|
"grad_norm": 0.26136420409749284, |
|
"learning_rate": 1.2560551530767384e-06, |
|
"loss": 1.0004, |
|
"step": 5855 |
|
}, |
|
{ |
|
"epoch": 2.566798072711345, |
|
"grad_norm": 0.2605185305648504, |
|
"learning_rate": 1.2437186156920167e-06, |
|
"loss": 1.0158, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.568988173455979, |
|
"grad_norm": 0.2617789541796642, |
|
"learning_rate": 1.231438941857156e-06, |
|
"loss": 1.0293, |
|
"step": 5865 |
|
}, |
|
{ |
|
"epoch": 2.571178274200613, |
|
"grad_norm": 0.26468651594530146, |
|
"learning_rate": 1.2192162113167117e-06, |
|
"loss": 1.0162, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.5733683749452476, |
|
"grad_norm": 0.26905249931056374, |
|
"learning_rate": 1.207050503445464e-06, |
|
"loss": 1.007, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.5755584756898817, |
|
"grad_norm": 0.2560987056326573, |
|
"learning_rate": 1.1949418972478743e-06, |
|
"loss": 0.9949, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.577748576434516, |
|
"grad_norm": 0.2659226997423034, |
|
"learning_rate": 1.1828904713575917e-06, |
|
"loss": 1.0291, |
|
"step": 5885 |
|
}, |
|
{ |
|
"epoch": 2.5799386771791504, |
|
"grad_norm": 0.252859900042723, |
|
"learning_rate": 1.1708963040369315e-06, |
|
"loss": 0.9893, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.5821287779237845, |
|
"grad_norm": 0.2626662868814629, |
|
"learning_rate": 1.1589594731763676e-06, |
|
"loss": 1.0025, |
|
"step": 5895 |
|
}, |
|
{ |
|
"epoch": 2.5843188786684186, |
|
"grad_norm": 0.26008729946148035, |
|
"learning_rate": 1.1470800562940355e-06, |
|
"loss": 1.0303, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.586508979413053, |
|
"grad_norm": 0.26338286175111314, |
|
"learning_rate": 1.1352581305352184e-06, |
|
"loss": 1.0236, |
|
"step": 5905 |
|
}, |
|
{ |
|
"epoch": 2.5886990801576872, |
|
"grad_norm": 0.2730522605992461, |
|
"learning_rate": 1.1234937726718508e-06, |
|
"loss": 1.0174, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.5908891809023213, |
|
"grad_norm": 0.26606670762402423, |
|
"learning_rate": 1.1117870591020207e-06, |
|
"loss": 1.0275, |
|
"step": 5915 |
|
}, |
|
{ |
|
"epoch": 2.593079281646956, |
|
"grad_norm": 0.2635576121376948, |
|
"learning_rate": 1.1001380658494688e-06, |
|
"loss": 1.0195, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.59526938239159, |
|
"grad_norm": 0.2632812590069056, |
|
"learning_rate": 1.088546868563104e-06, |
|
"loss": 1.0059, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.597459483136224, |
|
"grad_norm": 0.26590583110590243, |
|
"learning_rate": 1.077013542516504e-06, |
|
"loss": 1.0133, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.5996495838808586, |
|
"grad_norm": 0.2592095997153406, |
|
"learning_rate": 1.065538162607429e-06, |
|
"loss": 1.0439, |
|
"step": 5935 |
|
}, |
|
{ |
|
"epoch": 2.6018396846254928, |
|
"grad_norm": 0.26304009255367145, |
|
"learning_rate": 1.0541208033573325e-06, |
|
"loss": 1.0258, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.604029785370127, |
|
"grad_norm": 0.26783388307534434, |
|
"learning_rate": 1.0427615389108836e-06, |
|
"loss": 1.0203, |
|
"step": 5945 |
|
}, |
|
{ |
|
"epoch": 2.6062198861147614, |
|
"grad_norm": 0.25445386428299266, |
|
"learning_rate": 1.0314604430354803e-06, |
|
"loss": 1.0096, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.6084099868593955, |
|
"grad_norm": 0.25432480950933944, |
|
"learning_rate": 1.0202175891207777e-06, |
|
"loss": 1.0211, |
|
"step": 5955 |
|
}, |
|
{ |
|
"epoch": 2.6106000876040296, |
|
"grad_norm": 0.2599533384422445, |
|
"learning_rate": 1.0090330501781942e-06, |
|
"loss": 1.0381, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.612790188348664, |
|
"grad_norm": 0.2687854621922093, |
|
"learning_rate": 9.979068988404607e-07, |
|
"loss": 1.0324, |
|
"step": 5965 |
|
}, |
|
{ |
|
"epoch": 2.6149802890932983, |
|
"grad_norm": 0.2747054491371782, |
|
"learning_rate": 9.868392073611277e-07, |
|
"loss": 1.016, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.6171703898379324, |
|
"grad_norm": 0.2605323280615755, |
|
"learning_rate": 9.75830047614117e-07, |
|
"loss": 1.0221, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.619360490582567, |
|
"grad_norm": 0.26132755345912295, |
|
"learning_rate": 9.648794910932334e-07, |
|
"loss": 1.0146, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.621550591327201, |
|
"grad_norm": 0.26679877508377275, |
|
"learning_rate": 9.539876089117128e-07, |
|
"loss": 1.0129, |
|
"step": 5985 |
|
}, |
|
{ |
|
"epoch": 2.623740692071835, |
|
"grad_norm": 0.25873816940118294, |
|
"learning_rate": 9.431544718017615e-07, |
|
"loss": 1.0215, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.6259307928164697, |
|
"grad_norm": 0.26420753117085755, |
|
"learning_rate": 9.323801501140927e-07, |
|
"loss": 1.0291, |
|
"step": 5995 |
|
}, |
|
{ |
|
"epoch": 2.628120893561104, |
|
"grad_norm": 0.2673456929005932, |
|
"learning_rate": 9.216647138174695e-07, |
|
"loss": 1.0328, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.630310994305738, |
|
"grad_norm": 0.27110840415327153, |
|
"learning_rate": 9.110082324982506e-07, |
|
"loss": 1.0119, |
|
"step": 6005 |
|
}, |
|
{ |
|
"epoch": 2.6325010950503724, |
|
"grad_norm": 0.25958577599037735, |
|
"learning_rate": 9.004107753599389e-07, |
|
"loss": 0.9982, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.6346911957950065, |
|
"grad_norm": 0.26952299942882346, |
|
"learning_rate": 8.898724112227353e-07, |
|
"loss": 0.9979, |
|
"step": 6015 |
|
}, |
|
{ |
|
"epoch": 2.6368812965396407, |
|
"grad_norm": 0.25890135882874327, |
|
"learning_rate": 8.793932085230894e-07, |
|
"loss": 1.001, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.639071397284275, |
|
"grad_norm": 0.26093797224431725, |
|
"learning_rate": 8.689732353132518e-07, |
|
"loss": 1.0008, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.6412614980289093, |
|
"grad_norm": 0.2630437536693983, |
|
"learning_rate": 8.586125592608363e-07, |
|
"loss": 0.9936, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.6434515987735434, |
|
"grad_norm": 0.2781661808204497, |
|
"learning_rate": 8.483112476483757e-07, |
|
"loss": 1.0029, |
|
"step": 6035 |
|
}, |
|
{ |
|
"epoch": 2.645641699518178, |
|
"grad_norm": 0.266907247409542, |
|
"learning_rate": 8.380693673728957e-07, |
|
"loss": 1.0256, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.647831800262812, |
|
"grad_norm": 0.27026510023326716, |
|
"learning_rate": 8.278869849454718e-07, |
|
"loss": 1.0441, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 2.650021901007446, |
|
"grad_norm": 0.2715664818445047, |
|
"learning_rate": 8.177641664907887e-07, |
|
"loss": 1.0318, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.6522120017520807, |
|
"grad_norm": 0.262452995725068, |
|
"learning_rate": 8.077009777467337e-07, |
|
"loss": 1.0148, |
|
"step": 6055 |
|
}, |
|
{ |
|
"epoch": 2.654402102496715, |
|
"grad_norm": 0.26944180313288235, |
|
"learning_rate": 7.976974840639484e-07, |
|
"loss": 1.0268, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.656592203241349, |
|
"grad_norm": 0.26970515074858986, |
|
"learning_rate": 7.8775375040542e-07, |
|
"loss": 1.035, |
|
"step": 6065 |
|
}, |
|
{ |
|
"epoch": 2.6587823039859835, |
|
"grad_norm": 0.26603146543022527, |
|
"learning_rate": 7.778698413460462e-07, |
|
"loss": 1.0385, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.6609724047306176, |
|
"grad_norm": 0.26268076522726097, |
|
"learning_rate": 7.68045821072223e-07, |
|
"loss": 1.0205, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.6631625054752517, |
|
"grad_norm": 0.26151474090954424, |
|
"learning_rate": 7.582817533814313e-07, |
|
"loss": 1.0076, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.6653526062198862, |
|
"grad_norm": 0.26380289604041596, |
|
"learning_rate": 7.485777016818119e-07, |
|
"loss": 1.0047, |
|
"step": 6085 |
|
}, |
|
{ |
|
"epoch": 2.6675427069645203, |
|
"grad_norm": 0.2640739120564622, |
|
"learning_rate": 7.389337289917653e-07, |
|
"loss": 1.0238, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.6697328077091544, |
|
"grad_norm": 0.25824476005629765, |
|
"learning_rate": 7.293498979395353e-07, |
|
"loss": 1.033, |
|
"step": 6095 |
|
}, |
|
{ |
|
"epoch": 2.671922908453789, |
|
"grad_norm": 0.2761915579791404, |
|
"learning_rate": 7.198262707628e-07, |
|
"loss": 1.0457, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.674113009198423, |
|
"grad_norm": 0.2591538186339242, |
|
"learning_rate": 7.103629093082776e-07, |
|
"loss": 1.0104, |
|
"step": 6105 |
|
}, |
|
{ |
|
"epoch": 2.676303109943057, |
|
"grad_norm": 0.2667664421930206, |
|
"learning_rate": 7.009598750313195e-07, |
|
"loss": 1.0127, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.6784932106876918, |
|
"grad_norm": 0.2718642700436106, |
|
"learning_rate": 6.916172289955059e-07, |
|
"loss": 1.0279, |
|
"step": 6115 |
|
}, |
|
{ |
|
"epoch": 2.680683311432326, |
|
"grad_norm": 0.268922489593917, |
|
"learning_rate": 6.823350318722555e-07, |
|
"loss": 1.0236, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.68287341217696, |
|
"grad_norm": 0.26563093628712836, |
|
"learning_rate": 6.731133439404291e-07, |
|
"loss": 1.0229, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.6850635129215945, |
|
"grad_norm": 0.27212083901058975, |
|
"learning_rate": 6.63952225085942e-07, |
|
"loss": 1.0072, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.6872536136662286, |
|
"grad_norm": 0.27909050468631363, |
|
"learning_rate": 6.548517348013739e-07, |
|
"loss": 1.0186, |
|
"step": 6135 |
|
}, |
|
{ |
|
"epoch": 2.6894437144108627, |
|
"grad_norm": 0.2634021147116165, |
|
"learning_rate": 6.458119321855705e-07, |
|
"loss": 1.0383, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.6916338151554973, |
|
"grad_norm": 0.2604570689525165, |
|
"learning_rate": 6.368328759432829e-07, |
|
"loss": 1.0018, |
|
"step": 6145 |
|
}, |
|
{ |
|
"epoch": 2.6938239159001314, |
|
"grad_norm": 0.2651829616366712, |
|
"learning_rate": 6.279146243847622e-07, |
|
"loss": 1.0262, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.6960140166447655, |
|
"grad_norm": 0.2626234864167782, |
|
"learning_rate": 6.190572354254031e-07, |
|
"loss": 1.0094, |
|
"step": 6155 |
|
}, |
|
{ |
|
"epoch": 2.6982041173894, |
|
"grad_norm": 0.2676356622187676, |
|
"learning_rate": 6.102607665853477e-07, |
|
"loss": 1.023, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.700394218134034, |
|
"grad_norm": 0.2573872899177252, |
|
"learning_rate": 6.015252749891231e-07, |
|
"loss": 1.0111, |
|
"step": 6165 |
|
}, |
|
{ |
|
"epoch": 2.7025843188786682, |
|
"grad_norm": 0.26768422170634076, |
|
"learning_rate": 5.928508173652736e-07, |
|
"loss": 1.0414, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.704774419623303, |
|
"grad_norm": 0.2702917236559311, |
|
"learning_rate": 5.842374500459791e-07, |
|
"loss": 1.0123, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.706964520367937, |
|
"grad_norm": 0.26063052725857316, |
|
"learning_rate": 5.756852289667048e-07, |
|
"loss": 1.0143, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.709154621112571, |
|
"grad_norm": 0.2736945051716679, |
|
"learning_rate": 5.671942096658256e-07, |
|
"loss": 1.0219, |
|
"step": 6185 |
|
}, |
|
{ |
|
"epoch": 2.7113447218572055, |
|
"grad_norm": 0.26033814003152445, |
|
"learning_rate": 5.587644472842691e-07, |
|
"loss": 1.027, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.7135348226018396, |
|
"grad_norm": 0.2747695664781286, |
|
"learning_rate": 5.503959965651651e-07, |
|
"loss": 1.0328, |
|
"step": 6195 |
|
}, |
|
{ |
|
"epoch": 2.7157249233464738, |
|
"grad_norm": 0.2639436223194078, |
|
"learning_rate": 5.420889118534767e-07, |
|
"loss": 1.0184, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.7179150240911083, |
|
"grad_norm": 0.26625978732822686, |
|
"learning_rate": 5.33843247095659e-07, |
|
"loss": 1.0137, |
|
"step": 6205 |
|
}, |
|
{ |
|
"epoch": 2.7201051248357424, |
|
"grad_norm": 0.2649792774062934, |
|
"learning_rate": 5.256590558393016e-07, |
|
"loss": 1.0195, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.7222952255803765, |
|
"grad_norm": 0.2617036738785984, |
|
"learning_rate": 5.175363912327835e-07, |
|
"loss": 1.017, |
|
"step": 6215 |
|
}, |
|
{ |
|
"epoch": 2.724485326325011, |
|
"grad_norm": 0.2699060497399551, |
|
"learning_rate": 5.094753060249302e-07, |
|
"loss": 1.0291, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.726675427069645, |
|
"grad_norm": 0.2649872703564233, |
|
"learning_rate": 5.014758525646679e-07, |
|
"loss": 0.9908, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.7288655278142793, |
|
"grad_norm": 0.2651642492888453, |
|
"learning_rate": 4.935380828006797e-07, |
|
"loss": 1.0137, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.731055628558914, |
|
"grad_norm": 0.26425590598727583, |
|
"learning_rate": 4.856620482810803e-07, |
|
"loss": 1.0154, |
|
"step": 6235 |
|
}, |
|
{ |
|
"epoch": 2.733245729303548, |
|
"grad_norm": 0.2761774875687218, |
|
"learning_rate": 4.778478001530673e-07, |
|
"loss": 1.0252, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.735435830048182, |
|
"grad_norm": 0.2690718325322844, |
|
"learning_rate": 4.7009538916260254e-07, |
|
"loss": 1.0211, |
|
"step": 6245 |
|
}, |
|
{ |
|
"epoch": 2.7376259307928166, |
|
"grad_norm": 0.26094127741106216, |
|
"learning_rate": 4.624048656540703e-07, |
|
"loss": 1.0018, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.7398160315374507, |
|
"grad_norm": 0.26073608470470544, |
|
"learning_rate": 4.547762795699551e-07, |
|
"loss": 1.0293, |
|
"step": 6255 |
|
}, |
|
{ |
|
"epoch": 2.742006132282085, |
|
"grad_norm": 0.26176116543356426, |
|
"learning_rate": 4.4720968045052327e-07, |
|
"loss": 1.0281, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.7441962330267193, |
|
"grad_norm": 0.2587633523361001, |
|
"learning_rate": 4.3970511743349095e-07, |
|
"loss": 1.0119, |
|
"step": 6265 |
|
}, |
|
{ |
|
"epoch": 2.7463863337713534, |
|
"grad_norm": 0.26467827125109494, |
|
"learning_rate": 4.3226263925371323e-07, |
|
"loss": 1.0189, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.7485764345159875, |
|
"grad_norm": 0.26838751461155635, |
|
"learning_rate": 4.248822942428643e-07, |
|
"loss": 1.0141, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.750766535260622, |
|
"grad_norm": 0.27455488470046974, |
|
"learning_rate": 4.175641303291178e-07, |
|
"loss": 1.0303, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.752956636005256, |
|
"grad_norm": 0.2608592038076227, |
|
"learning_rate": 4.103081950368526e-07, |
|
"loss": 1.0201, |
|
"step": 6285 |
|
}, |
|
{ |
|
"epoch": 2.7551467367498903, |
|
"grad_norm": 0.26905266300546427, |
|
"learning_rate": 4.0311453548632084e-07, |
|
"loss": 1.0504, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.757336837494525, |
|
"grad_norm": 0.26225270534619144, |
|
"learning_rate": 3.9598319839336484e-07, |
|
"loss": 1.0301, |
|
"step": 6295 |
|
}, |
|
{ |
|
"epoch": 2.759526938239159, |
|
"grad_norm": 0.275637498653482, |
|
"learning_rate": 3.8891423006909623e-07, |
|
"loss": 1.0344, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.761717038983793, |
|
"grad_norm": 0.25859464573598573, |
|
"learning_rate": 3.819076764196028e-07, |
|
"loss": 1.023, |
|
"step": 6305 |
|
}, |
|
{ |
|
"epoch": 2.7639071397284276, |
|
"grad_norm": 0.2623767091254652, |
|
"learning_rate": 3.7496358294565325e-07, |
|
"loss": 1.0355, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.7660972404730617, |
|
"grad_norm": 0.2614628167949124, |
|
"learning_rate": 3.6808199474239413e-07, |
|
"loss": 1.0074, |
|
"step": 6315 |
|
}, |
|
{ |
|
"epoch": 2.768287341217696, |
|
"grad_norm": 0.2589581748365727, |
|
"learning_rate": 3.612629564990622e-07, |
|
"loss": 1.0012, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.7704774419623304, |
|
"grad_norm": 0.26135089658558563, |
|
"learning_rate": 3.545065124986935e-07, |
|
"loss": 1.0516, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.7726675427069645, |
|
"grad_norm": 0.2550260966747239, |
|
"learning_rate": 3.4781270661783495e-07, |
|
"loss": 1.0309, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 2.7748576434515986, |
|
"grad_norm": 0.26241337371995144, |
|
"learning_rate": 3.4118158232626095e-07, |
|
"loss": 1.0102, |
|
"step": 6335 |
|
}, |
|
{ |
|
"epoch": 2.777047744196233, |
|
"grad_norm": 0.2717236091770032, |
|
"learning_rate": 3.346131826866872e-07, |
|
"loss": 1.0031, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 2.7792378449408672, |
|
"grad_norm": 0.2627537948727433, |
|
"learning_rate": 3.281075503544928e-07, |
|
"loss": 1.0246, |
|
"step": 6345 |
|
}, |
|
{ |
|
"epoch": 2.7814279456855013, |
|
"grad_norm": 0.2733462694900371, |
|
"learning_rate": 3.216647275774509e-07, |
|
"loss": 1.0326, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.783618046430136, |
|
"grad_norm": 0.2653361262256602, |
|
"learning_rate": 3.1528475619543755e-07, |
|
"loss": 1.0336, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 2.78580814717477, |
|
"grad_norm": 0.2637059071556029, |
|
"learning_rate": 3.0896767764018066e-07, |
|
"loss": 1.0447, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 2.787998247919404, |
|
"grad_norm": 0.2643417975733627, |
|
"learning_rate": 3.0271353293497174e-07, |
|
"loss": 1.0256, |
|
"step": 6365 |
|
}, |
|
{ |
|
"epoch": 2.7901883486640386, |
|
"grad_norm": 0.2664848830571204, |
|
"learning_rate": 2.9652236269441027e-07, |
|
"loss": 1.0205, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 2.7923784494086727, |
|
"grad_norm": 0.25803930539316744, |
|
"learning_rate": 2.903942071241406e-07, |
|
"loss": 1.0258, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.794568550153307, |
|
"grad_norm": 0.2595092497202521, |
|
"learning_rate": 2.843291060205855e-07, |
|
"loss": 1.0129, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 2.7967586508979414, |
|
"grad_norm": 0.26891951593800467, |
|
"learning_rate": 2.7832709877068985e-07, |
|
"loss": 1.0174, |
|
"step": 6385 |
|
}, |
|
{ |
|
"epoch": 2.7989487516425755, |
|
"grad_norm": 0.2618144209855571, |
|
"learning_rate": 2.723882243516685e-07, |
|
"loss": 1.0289, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 2.8011388523872096, |
|
"grad_norm": 0.26115034114899394, |
|
"learning_rate": 2.665125213307429e-07, |
|
"loss": 1.0096, |
|
"step": 6395 |
|
}, |
|
{ |
|
"epoch": 2.803328953131844, |
|
"grad_norm": 0.2635263953836736, |
|
"learning_rate": 2.6070002786490523e-07, |
|
"loss": 1.034, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.8055190538764783, |
|
"grad_norm": 0.26418026561714664, |
|
"learning_rate": 2.549507817006602e-07, |
|
"loss": 0.9963, |
|
"step": 6405 |
|
}, |
|
{ |
|
"epoch": 2.8077091546211124, |
|
"grad_norm": 0.2603026964757617, |
|
"learning_rate": 2.492648201737791e-07, |
|
"loss": 1.0178, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 2.809899255365747, |
|
"grad_norm": 0.2627929976284597, |
|
"learning_rate": 2.4364218020906605e-07, |
|
"loss": 1.0445, |
|
"step": 6415 |
|
}, |
|
{ |
|
"epoch": 2.812089356110381, |
|
"grad_norm": 0.2669549562791691, |
|
"learning_rate": 2.3808289832011112e-07, |
|
"loss": 1.0066, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 2.814279456855015, |
|
"grad_norm": 0.2721770667997164, |
|
"learning_rate": 2.325870106090544e-07, |
|
"loss": 1.0373, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.8164695575996497, |
|
"grad_norm": 0.26354214498011763, |
|
"learning_rate": 2.2715455276635323e-07, |
|
"loss": 1.0289, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 2.818659658344284, |
|
"grad_norm": 0.27146122428424413, |
|
"learning_rate": 2.2178556007054876e-07, |
|
"loss": 1.0225, |
|
"step": 6435 |
|
}, |
|
{ |
|
"epoch": 2.820849759088918, |
|
"grad_norm": 0.2680864657143132, |
|
"learning_rate": 2.164800673880385e-07, |
|
"loss": 0.9902, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 2.8230398598335524, |
|
"grad_norm": 0.256788359012176, |
|
"learning_rate": 2.1123810917284883e-07, |
|
"loss": 1.0104, |
|
"step": 6445 |
|
}, |
|
{ |
|
"epoch": 2.8252299605781865, |
|
"grad_norm": 0.25921069217588105, |
|
"learning_rate": 2.0605971946641045e-07, |
|
"loss": 1.018, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.8274200613228206, |
|
"grad_norm": 0.262881975020387, |
|
"learning_rate": 2.0094493189734e-07, |
|
"loss": 1.0277, |
|
"step": 6455 |
|
}, |
|
{ |
|
"epoch": 2.829610162067455, |
|
"grad_norm": 0.26033018837371813, |
|
"learning_rate": 1.958937796812177e-07, |
|
"loss": 1.0266, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.8318002628120893, |
|
"grad_norm": 0.2725677982906727, |
|
"learning_rate": 1.9090629562037777e-07, |
|
"loss": 1.0311, |
|
"step": 6465 |
|
}, |
|
{ |
|
"epoch": 2.8339903635567234, |
|
"grad_norm": 0.2578506507690712, |
|
"learning_rate": 1.8598251210368624e-07, |
|
"loss": 1.0258, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.836180464301358, |
|
"grad_norm": 0.26040068151802814, |
|
"learning_rate": 1.811224611063378e-07, |
|
"loss": 1.0254, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.838370565045992, |
|
"grad_norm": 0.2654370924009657, |
|
"learning_rate": 1.7632617418965047e-07, |
|
"loss": 1.0264, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.840560665790626, |
|
"grad_norm": 0.2624447866987324, |
|
"learning_rate": 1.7159368250085128e-07, |
|
"loss": 1.008, |
|
"step": 6485 |
|
}, |
|
{ |
|
"epoch": 2.8427507665352607, |
|
"grad_norm": 0.2693327061312894, |
|
"learning_rate": 1.66925016772882e-07, |
|
"loss": 1.0328, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.844940867279895, |
|
"grad_norm": 0.2623948762195147, |
|
"learning_rate": 1.6232020732419363e-07, |
|
"loss": 1.025, |
|
"step": 6495 |
|
}, |
|
{ |
|
"epoch": 2.847130968024529, |
|
"grad_norm": 0.2682313581066649, |
|
"learning_rate": 1.5777928405855681e-07, |
|
"loss": 1.0533, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.8493210687691635, |
|
"grad_norm": 0.2634435740939065, |
|
"learning_rate": 1.5330227646486063e-07, |
|
"loss": 1.0012, |
|
"step": 6505 |
|
}, |
|
{ |
|
"epoch": 2.8515111695137976, |
|
"grad_norm": 0.2648570311945631, |
|
"learning_rate": 1.4888921361692288e-07, |
|
"loss": 1.0111, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.8537012702584317, |
|
"grad_norm": 0.2523702071185083, |
|
"learning_rate": 1.4454012417330688e-07, |
|
"loss": 1.0203, |
|
"step": 6515 |
|
}, |
|
{ |
|
"epoch": 2.8558913710030662, |
|
"grad_norm": 0.25829191001092155, |
|
"learning_rate": 1.402550363771271e-07, |
|
"loss": 1.0205, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.8580814717477003, |
|
"grad_norm": 0.2711962263676663, |
|
"learning_rate": 1.3603397805586837e-07, |
|
"loss": 1.0012, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.8602715724923344, |
|
"grad_norm": 0.26761518076252255, |
|
"learning_rate": 1.3187697662121135e-07, |
|
"loss": 1.0025, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.862461673236969, |
|
"grad_norm": 0.25953013799411634, |
|
"learning_rate": 1.2778405906884173e-07, |
|
"loss": 1.0326, |
|
"step": 6535 |
|
}, |
|
{ |
|
"epoch": 2.864651773981603, |
|
"grad_norm": 0.26481196168192916, |
|
"learning_rate": 1.2375525197829142e-07, |
|
"loss": 1.0117, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.866841874726237, |
|
"grad_norm": 0.26480303588564624, |
|
"learning_rate": 1.197905815127498e-07, |
|
"loss": 1.0092, |
|
"step": 6545 |
|
}, |
|
{ |
|
"epoch": 2.8690319754708717, |
|
"grad_norm": 0.26673811546819176, |
|
"learning_rate": 1.158900734189039e-07, |
|
"loss": 1.0354, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.871222076215506, |
|
"grad_norm": 0.25622503188012385, |
|
"learning_rate": 1.1205375302677068e-07, |
|
"loss": 0.9949, |
|
"step": 6555 |
|
}, |
|
{ |
|
"epoch": 2.87341217696014, |
|
"grad_norm": 0.26585114917527647, |
|
"learning_rate": 1.0828164524952611e-07, |
|
"loss": 1.0129, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.8756022777047745, |
|
"grad_norm": 0.2658125353437615, |
|
"learning_rate": 1.0457377458334972e-07, |
|
"loss": 1.0309, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 2.8777923784494086, |
|
"grad_norm": 0.2644621372001252, |
|
"learning_rate": 1.0093016510726361e-07, |
|
"loss": 1.0238, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.8799824791940427, |
|
"grad_norm": 0.26285181246145756, |
|
"learning_rate": 9.735084048297482e-08, |
|
"loss": 1.057, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.8821725799386773, |
|
"grad_norm": 0.26347424345721254, |
|
"learning_rate": 9.383582395472435e-08, |
|
"loss": 1.0172, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.8843626806833114, |
|
"grad_norm": 0.25880950335332537, |
|
"learning_rate": 9.038513834913165e-08, |
|
"loss": 1.0102, |
|
"step": 6585 |
|
}, |
|
{ |
|
"epoch": 2.8865527814279455, |
|
"grad_norm": 0.2626233208204072, |
|
"learning_rate": 8.699880607505151e-08, |
|
"loss": 1.034, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.88874288217258, |
|
"grad_norm": 0.26481783365515854, |
|
"learning_rate": 8.367684912342522e-08, |
|
"loss": 1.0252, |
|
"step": 6595 |
|
}, |
|
{ |
|
"epoch": 2.890932982917214, |
|
"grad_norm": 0.25531476024902067, |
|
"learning_rate": 8.041928906714069e-08, |
|
"loss": 1.0205, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.8931230836618482, |
|
"grad_norm": 0.2622947023545746, |
|
"learning_rate": 7.722614706088815e-08, |
|
"loss": 0.9975, |
|
"step": 6605 |
|
}, |
|
{ |
|
"epoch": 2.8953131844064828, |
|
"grad_norm": 0.26062229732752173, |
|
"learning_rate": 7.409744384102691e-08, |
|
"loss": 1.0113, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.897503285151117, |
|
"grad_norm": 0.2723792115437162, |
|
"learning_rate": 7.103319972544653e-08, |
|
"loss": 1.0068, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 2.899693385895751, |
|
"grad_norm": 0.2616779503781998, |
|
"learning_rate": 6.803343461344037e-08, |
|
"loss": 1.0088, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.9018834866403855, |
|
"grad_norm": 0.2525101695328338, |
|
"learning_rate": 6.509816798557112e-08, |
|
"loss": 1.0082, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.9040735873850196, |
|
"grad_norm": 0.25967364820532146, |
|
"learning_rate": 6.222741890354655e-08, |
|
"loss": 0.9977, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.9062636881296537, |
|
"grad_norm": 0.26366528034026343, |
|
"learning_rate": 5.942120601009627e-08, |
|
"loss": 1.0125, |
|
"step": 6635 |
|
}, |
|
{ |
|
"epoch": 2.9084537888742883, |
|
"grad_norm": 0.25629635035286313, |
|
"learning_rate": 5.667954752884841e-08, |
|
"loss": 1.0059, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 2.9106438896189224, |
|
"grad_norm": 0.2596464176195038, |
|
"learning_rate": 5.400246126421649e-08, |
|
"loss": 1.008, |
|
"step": 6645 |
|
}, |
|
{ |
|
"epoch": 2.9128339903635565, |
|
"grad_norm": 0.2679323332344789, |
|
"learning_rate": 5.13899646012761e-08, |
|
"loss": 1.0508, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.915024091108191, |
|
"grad_norm": 0.26006077791672805, |
|
"learning_rate": 4.8842074505658366e-08, |
|
"loss": 1.0084, |
|
"step": 6655 |
|
}, |
|
{ |
|
"epoch": 2.917214191852825, |
|
"grad_norm": 0.2636907745428655, |
|
"learning_rate": 4.635880752343669e-08, |
|
"loss": 1.0357, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 2.9194042925974593, |
|
"grad_norm": 0.26795706149424414, |
|
"learning_rate": 4.394017978101905e-08, |
|
"loss": 1.0348, |
|
"step": 6665 |
|
}, |
|
{ |
|
"epoch": 2.921594393342094, |
|
"grad_norm": 0.2608332255135537, |
|
"learning_rate": 4.1586206985047006e-08, |
|
"loss": 1.0221, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.923784494086728, |
|
"grad_norm": 0.26982387956347165, |
|
"learning_rate": 3.9296904422289064e-08, |
|
"loss": 1.0326, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.925974594831362, |
|
"grad_norm": 0.26654827231521183, |
|
"learning_rate": 3.707228695954301e-08, |
|
"loss": 1.0227, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 2.9281646955759966, |
|
"grad_norm": 0.26353562404229947, |
|
"learning_rate": 3.491236904354156e-08, |
|
"loss": 1.0156, |
|
"step": 6685 |
|
}, |
|
{ |
|
"epoch": 2.9303547963206307, |
|
"grad_norm": 0.26222190486801167, |
|
"learning_rate": 3.28171647008535e-08, |
|
"loss": 0.9871, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 2.9325448970652648, |
|
"grad_norm": 0.26937698718561326, |
|
"learning_rate": 3.0786687537800454e-08, |
|
"loss": 1.0158, |
|
"step": 6695 |
|
}, |
|
{ |
|
"epoch": 2.9347349978098993, |
|
"grad_norm": 0.2667931921324664, |
|
"learning_rate": 2.8820950740358066e-08, |
|
"loss": 1.0244, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.9369250985545334, |
|
"grad_norm": 0.25675719984449435, |
|
"learning_rate": 2.691996707408051e-08, |
|
"loss": 1.0252, |
|
"step": 6705 |
|
}, |
|
{ |
|
"epoch": 2.9391151992991675, |
|
"grad_norm": 0.26641359009583415, |
|
"learning_rate": 2.5083748884012772e-08, |
|
"loss": 1.0232, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 2.941305300043802, |
|
"grad_norm": 0.2643669870540917, |
|
"learning_rate": 2.3312308094607382e-08, |
|
"loss": 1.0402, |
|
"step": 6715 |
|
}, |
|
{ |
|
"epoch": 2.943495400788436, |
|
"grad_norm": 0.2640482226409207, |
|
"learning_rate": 2.1605656209656713e-08, |
|
"loss": 1.0123, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.9456855015330703, |
|
"grad_norm": 0.257416143001543, |
|
"learning_rate": 1.996380431220635e-08, |
|
"loss": 1.0018, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.947875602277705, |
|
"grad_norm": 0.25925869910092936, |
|
"learning_rate": 1.838676306448961e-08, |
|
"loss": 1.0105, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 2.950065703022339, |
|
"grad_norm": 0.2682720189373006, |
|
"learning_rate": 1.687454270786204e-08, |
|
"loss": 1.0258, |
|
"step": 6735 |
|
}, |
|
{ |
|
"epoch": 2.952255803766973, |
|
"grad_norm": 0.2638121872050763, |
|
"learning_rate": 1.542715306272591e-08, |
|
"loss": 1.0398, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 2.9544459045116076, |
|
"grad_norm": 0.2552510538750082, |
|
"learning_rate": 1.4044603528471368e-08, |
|
"loss": 1.0221, |
|
"step": 6745 |
|
}, |
|
{ |
|
"epoch": 2.9566360052562417, |
|
"grad_norm": 0.2660512986505126, |
|
"learning_rate": 1.2726903083417619e-08, |
|
"loss": 1.0199, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.958826106000876, |
|
"grad_norm": 0.26097986341434426, |
|
"learning_rate": 1.1474060284748512e-08, |
|
"loss": 0.9941, |
|
"step": 6755 |
|
}, |
|
{ |
|
"epoch": 2.9610162067455104, |
|
"grad_norm": 0.2696098253592492, |
|
"learning_rate": 1.0286083268463698e-08, |
|
"loss": 1.0227, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 2.9632063074901445, |
|
"grad_norm": 0.2651201330068607, |
|
"learning_rate": 9.16297974931979e-09, |
|
"loss": 1.0469, |
|
"step": 6765 |
|
}, |
|
{ |
|
"epoch": 2.9653964082347786, |
|
"grad_norm": 0.261068060678048, |
|
"learning_rate": 8.104757020784838e-09, |
|
"loss": 1.0203, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 2.967586508979413, |
|
"grad_norm": 0.26764709627692834, |
|
"learning_rate": 7.111421954989484e-09, |
|
"loss": 1.0379, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.9697766097240472, |
|
"grad_norm": 0.26991775591350575, |
|
"learning_rate": 6.182981002679223e-09, |
|
"loss": 1.027, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 2.9719667104686813, |
|
"grad_norm": 0.2662528611485649, |
|
"learning_rate": 5.31944019317665e-09, |
|
"loss": 1.0146, |
|
"step": 6785 |
|
}, |
|
{ |
|
"epoch": 2.974156811213316, |
|
"grad_norm": 0.265221109043864, |
|
"learning_rate": 4.52080513434372e-09, |
|
"loss": 1.0186, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 2.97634691195795, |
|
"grad_norm": 0.26743085714777837, |
|
"learning_rate": 3.7870810125384406e-09, |
|
"loss": 1.0064, |
|
"step": 6795 |
|
}, |
|
{ |
|
"epoch": 2.978537012702584, |
|
"grad_norm": 0.26402623363889577, |
|
"learning_rate": 3.118272592587124e-09, |
|
"loss": 1.0344, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.9807271134472186, |
|
"grad_norm": 0.25773362294157315, |
|
"learning_rate": 2.5143842177510758e-09, |
|
"loss": 1.0084, |
|
"step": 6805 |
|
}, |
|
{ |
|
"epoch": 2.9829172141918527, |
|
"grad_norm": 0.26980590288864464, |
|
"learning_rate": 1.975419809699952e-09, |
|
"loss": 1.0227, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 2.985107314936487, |
|
"grad_norm": 0.2530142845867315, |
|
"learning_rate": 1.5013828684840025e-09, |
|
"loss": 0.9992, |
|
"step": 6815 |
|
}, |
|
{ |
|
"epoch": 2.9872974156811214, |
|
"grad_norm": 0.26702416469760076, |
|
"learning_rate": 1.0922764725140867e-09, |
|
"loss": 1.0303, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 2.9894875164257555, |
|
"grad_norm": 0.26248596298655585, |
|
"learning_rate": 7.481032785372488e-10, |
|
"loss": 1.0215, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 2.9916776171703896, |
|
"grad_norm": 0.26614527758419954, |
|
"learning_rate": 4.688655216256167e-10, |
|
"loss": 1.0164, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 2.993867717915024, |
|
"grad_norm": 0.2620390464236342, |
|
"learning_rate": 2.545650151586365e-10, |
|
"loss": 1.0002, |
|
"step": 6835 |
|
}, |
|
{ |
|
"epoch": 2.9960578186596583, |
|
"grad_norm": 0.25671053477383604, |
|
"learning_rate": 1.0520315080753129e-10, |
|
"loss": 1.0336, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 2.9982479194042924, |
|
"grad_norm": 0.26588732851444397, |
|
"learning_rate": 2.0780898535299742e-11, |
|
"loss": 1.0246, |
|
"step": 6845 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 6849, |
|
"total_flos": 648076339445760.0, |
|
"train_loss": 1.1352002231165133, |
|
"train_runtime": 23571.371, |
|
"train_samples_per_second": 18.589, |
|
"train_steps_per_second": 0.291 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 6849, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 648076339445760.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|