|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9987239472564866, |
|
"eval_steps": 500, |
|
"global_step": 587, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0017014036580178648, |
|
"grad_norm": 2.168562940412461, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 1.1424, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0034028073160357296, |
|
"grad_norm": 2.250032849091299, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 1.1697, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005104210974053594, |
|
"grad_norm": 2.124744531823449, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.1364, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006805614632071459, |
|
"grad_norm": 2.060484893473413, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 1.1471, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.008507018290089324, |
|
"grad_norm": 1.89749522427158, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 1.1181, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010208421948107189, |
|
"grad_norm": 1.3357851941439673, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.0893, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011909825606125054, |
|
"grad_norm": 1.2850663628144225, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 1.0484, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.013611229264142918, |
|
"grad_norm": 1.636445160753731, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.0308, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.015312632922160783, |
|
"grad_norm": 1.3002228280073427, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0234, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.017014036580178648, |
|
"grad_norm": 1.2380680351381246, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 0.9064, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01871544023819651, |
|
"grad_norm": 1.2765329463296096, |
|
"learning_rate": 1.2222222222222224e-05, |
|
"loss": 0.8696, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.020416843896214378, |
|
"grad_norm": 1.060585818467504, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.8444, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02211824755423224, |
|
"grad_norm": 1.1232952035148502, |
|
"learning_rate": 1.4444444444444446e-05, |
|
"loss": 0.841, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.023819651212250107, |
|
"grad_norm": 1.107943114775762, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 0.7655, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02552105487026797, |
|
"grad_norm": 1.0059483053370937, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.7063, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.027222458528285837, |
|
"grad_norm": 0.9803969915500654, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.6458, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0289238621863037, |
|
"grad_norm": 0.9105498405702808, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.6321, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.030625265844321566, |
|
"grad_norm": 0.9433546278214147, |
|
"learning_rate": 2e-05, |
|
"loss": 0.493, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03232666950233943, |
|
"grad_norm": 0.8625027370409255, |
|
"learning_rate": 1.9999847579243196e-05, |
|
"loss": 0.5339, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.034028073160357296, |
|
"grad_norm": 0.8101152462345085, |
|
"learning_rate": 1.9999390321619196e-05, |
|
"loss": 0.4566, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03572947681837516, |
|
"grad_norm": 0.7923424067716904, |
|
"learning_rate": 1.9998628241067113e-05, |
|
"loss": 0.4507, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03743088047639302, |
|
"grad_norm": 0.7681394677537327, |
|
"learning_rate": 1.9997561360818322e-05, |
|
"loss": 0.4265, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03913228413441089, |
|
"grad_norm": 0.7031739222010267, |
|
"learning_rate": 1.999618971339577e-05, |
|
"loss": 0.4174, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.040833687792428755, |
|
"grad_norm": 0.6603135403892707, |
|
"learning_rate": 1.9994513340612957e-05, |
|
"loss": 0.3599, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04253509145044662, |
|
"grad_norm": 0.7260664469398356, |
|
"learning_rate": 1.9992532293572688e-05, |
|
"loss": 0.3398, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04423649510846448, |
|
"grad_norm": 0.590669951494315, |
|
"learning_rate": 1.9990246632665503e-05, |
|
"loss": 0.2616, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04593789876648235, |
|
"grad_norm": 0.6319158523814317, |
|
"learning_rate": 1.998765642756783e-05, |
|
"loss": 0.3082, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.047639302424500214, |
|
"grad_norm": 0.6331615276638973, |
|
"learning_rate": 1.9984761757239878e-05, |
|
"loss": 0.2933, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04934070608251808, |
|
"grad_norm": 0.6376428367891515, |
|
"learning_rate": 1.998156270992321e-05, |
|
"loss": 0.2612, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05104210974053594, |
|
"grad_norm": 0.6322085618139611, |
|
"learning_rate": 1.9978059383138073e-05, |
|
"loss": 0.2397, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05274351339855381, |
|
"grad_norm": 0.6174252074442552, |
|
"learning_rate": 1.997425188368041e-05, |
|
"loss": 0.2566, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05444491705657167, |
|
"grad_norm": 0.5327916753070241, |
|
"learning_rate": 1.9970140327618612e-05, |
|
"loss": 0.2416, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05614632071458953, |
|
"grad_norm": 0.5479078907290673, |
|
"learning_rate": 1.9965724840289972e-05, |
|
"loss": 0.1781, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0578477243726074, |
|
"grad_norm": 0.5315756790547577, |
|
"learning_rate": 1.9961005556296875e-05, |
|
"loss": 0.2258, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.059549128030625266, |
|
"grad_norm": 0.48386295113930106, |
|
"learning_rate": 1.9955982619502693e-05, |
|
"loss": 0.1679, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06125053168864313, |
|
"grad_norm": 0.5185655350233463, |
|
"learning_rate": 1.9950656183027392e-05, |
|
"loss": 0.2022, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06295193534666099, |
|
"grad_norm": 0.5425773293966369, |
|
"learning_rate": 1.994502640924286e-05, |
|
"loss": 0.2144, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06465333900467886, |
|
"grad_norm": 0.39700624229353654, |
|
"learning_rate": 1.993909346976798e-05, |
|
"loss": 0.1361, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06635474266269673, |
|
"grad_norm": 0.5024564814818023, |
|
"learning_rate": 1.993285754546338e-05, |
|
"loss": 0.1618, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06805614632071459, |
|
"grad_norm": 0.4298514607710804, |
|
"learning_rate": 1.9926318826425905e-05, |
|
"loss": 0.1801, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06975754997873246, |
|
"grad_norm": 0.4631733150959672, |
|
"learning_rate": 1.9919477511982873e-05, |
|
"loss": 0.1212, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.07145895363675032, |
|
"grad_norm": 0.46629563189251916, |
|
"learning_rate": 1.991233381068594e-05, |
|
"loss": 0.1538, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07316035729476818, |
|
"grad_norm": 0.4077353003444054, |
|
"learning_rate": 1.990488794030478e-05, |
|
"loss": 0.1601, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07486176095278604, |
|
"grad_norm": 0.3874990470468177, |
|
"learning_rate": 1.9897140127820432e-05, |
|
"loss": 0.125, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07656316461080391, |
|
"grad_norm": 0.3638523211055093, |
|
"learning_rate": 1.9889090609418384e-05, |
|
"loss": 0.1366, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07826456826882178, |
|
"grad_norm": 0.5006533990444182, |
|
"learning_rate": 1.9880739630481376e-05, |
|
"loss": 0.1466, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07996597192683964, |
|
"grad_norm": 0.4320089119157406, |
|
"learning_rate": 1.9872087445581912e-05, |
|
"loss": 0.1256, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.08166737558485751, |
|
"grad_norm": 0.3330675873783333, |
|
"learning_rate": 1.9863134318474504e-05, |
|
"loss": 0.1066, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08336877924287538, |
|
"grad_norm": 0.4410537592519413, |
|
"learning_rate": 1.985388052208764e-05, |
|
"loss": 0.1381, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.08507018290089324, |
|
"grad_norm": 0.4003560082493989, |
|
"learning_rate": 1.9844326338515444e-05, |
|
"loss": 0.1202, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0867715865589111, |
|
"grad_norm": 0.41012939531437076, |
|
"learning_rate": 1.9834472059009097e-05, |
|
"loss": 0.1246, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08847299021692896, |
|
"grad_norm": 0.408091381990657, |
|
"learning_rate": 1.982431798396794e-05, |
|
"loss": 0.1289, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.09017439387494683, |
|
"grad_norm": 0.4821914250638501, |
|
"learning_rate": 1.9813864422930345e-05, |
|
"loss": 0.1312, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0918757975329647, |
|
"grad_norm": 0.40793016164627693, |
|
"learning_rate": 1.9803111694564246e-05, |
|
"loss": 0.1237, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.09357720119098256, |
|
"grad_norm": 0.3734013818822545, |
|
"learning_rate": 1.9792060126657437e-05, |
|
"loss": 0.1049, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09527860484900043, |
|
"grad_norm": 0.3186480775193613, |
|
"learning_rate": 1.9780710056107587e-05, |
|
"loss": 0.0911, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0969800085070183, |
|
"grad_norm": 0.39667217504563396, |
|
"learning_rate": 1.976906182891197e-05, |
|
"loss": 0.0918, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.09868141216503616, |
|
"grad_norm": 0.4482874203710604, |
|
"learning_rate": 1.97571158001569e-05, |
|
"loss": 0.1209, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.10038281582305401, |
|
"grad_norm": 0.2965972804159924, |
|
"learning_rate": 1.9744872334006936e-05, |
|
"loss": 0.0932, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.10208421948107188, |
|
"grad_norm": 0.3566484676555622, |
|
"learning_rate": 1.973233180369374e-05, |
|
"loss": 0.1248, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10378562313908975, |
|
"grad_norm": 0.30896394609488814, |
|
"learning_rate": 1.9719494591504747e-05, |
|
"loss": 0.0851, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.10548702679710761, |
|
"grad_norm": 0.2903927724315073, |
|
"learning_rate": 1.9706361088771474e-05, |
|
"loss": 0.0763, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.10718843045512548, |
|
"grad_norm": 0.3405843767078732, |
|
"learning_rate": 1.96929316958576e-05, |
|
"loss": 0.103, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.10888983411314335, |
|
"grad_norm": 0.40152278275783043, |
|
"learning_rate": 1.9679206822146776e-05, |
|
"loss": 0.1084, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.11059123777116121, |
|
"grad_norm": 0.3372032580489575, |
|
"learning_rate": 1.9665186886030135e-05, |
|
"loss": 0.1022, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.11229264142917907, |
|
"grad_norm": 0.332966030135067, |
|
"learning_rate": 1.9650872314893523e-05, |
|
"loss": 0.0891, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.11399404508719693, |
|
"grad_norm": 0.3832837550171733, |
|
"learning_rate": 1.9636263545104498e-05, |
|
"loss": 0.1181, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1156954487452148, |
|
"grad_norm": 0.40356329916123346, |
|
"learning_rate": 1.962136102199901e-05, |
|
"loss": 0.1262, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.11739685240323267, |
|
"grad_norm": 0.3689423450550846, |
|
"learning_rate": 1.9606165199867822e-05, |
|
"loss": 0.0826, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.11909825606125053, |
|
"grad_norm": 0.3457217662571401, |
|
"learning_rate": 1.959067654194268e-05, |
|
"loss": 0.1007, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1207996597192684, |
|
"grad_norm": 0.3511006571491925, |
|
"learning_rate": 1.9574895520382183e-05, |
|
"loss": 0.1263, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.12250106337728627, |
|
"grad_norm": 0.36819206266348997, |
|
"learning_rate": 1.955882261625737e-05, |
|
"loss": 0.1137, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.12420246703530413, |
|
"grad_norm": 0.2607004742274578, |
|
"learning_rate": 1.9542458319537094e-05, |
|
"loss": 0.0914, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.12590387069332198, |
|
"grad_norm": 0.2773076381582836, |
|
"learning_rate": 1.9525803129073046e-05, |
|
"loss": 0.0592, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.12760527435133986, |
|
"grad_norm": 0.28951070788934496, |
|
"learning_rate": 1.9508857552584574e-05, |
|
"loss": 0.0928, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12930667800935772, |
|
"grad_norm": 0.35718485474187367, |
|
"learning_rate": 1.9491622106643195e-05, |
|
"loss": 0.0952, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1310080816673756, |
|
"grad_norm": 0.34891501382529494, |
|
"learning_rate": 1.9474097316656856e-05, |
|
"loss": 0.1004, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.13270948532539345, |
|
"grad_norm": 0.28733346641687174, |
|
"learning_rate": 1.9456283716853906e-05, |
|
"loss": 0.0823, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1344108889834113, |
|
"grad_norm": 0.3735237899238901, |
|
"learning_rate": 1.9438181850266815e-05, |
|
"loss": 0.121, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.13611229264142918, |
|
"grad_norm": 0.2904974285120679, |
|
"learning_rate": 1.941979226871563e-05, |
|
"loss": 0.0768, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13781369629944704, |
|
"grad_norm": 0.32117427662146836, |
|
"learning_rate": 1.9401115532791134e-05, |
|
"loss": 0.0837, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13951509995746492, |
|
"grad_norm": 0.40967624537549835, |
|
"learning_rate": 1.938215221183777e-05, |
|
"loss": 0.1061, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.14121650361548277, |
|
"grad_norm": 0.33629708001931885, |
|
"learning_rate": 1.936290288393629e-05, |
|
"loss": 0.0892, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.14291790727350065, |
|
"grad_norm": 0.31834590604771723, |
|
"learning_rate": 1.9343368135886112e-05, |
|
"loss": 0.1088, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1446193109315185, |
|
"grad_norm": 0.30988146926514176, |
|
"learning_rate": 1.932354856318746e-05, |
|
"loss": 0.0864, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.14632071458953635, |
|
"grad_norm": 0.41148714128819724, |
|
"learning_rate": 1.9303444770023184e-05, |
|
"loss": 0.0867, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.14802211824755424, |
|
"grad_norm": 0.3641579696865324, |
|
"learning_rate": 1.9283057369240358e-05, |
|
"loss": 0.1149, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1497235219055721, |
|
"grad_norm": 0.3359968169832728, |
|
"learning_rate": 1.9262386982331596e-05, |
|
"loss": 0.0959, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.15142492556358997, |
|
"grad_norm": 0.2849324477084618, |
|
"learning_rate": 1.9241434239416093e-05, |
|
"loss": 0.0957, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.15312632922160782, |
|
"grad_norm": 0.29673544787967593, |
|
"learning_rate": 1.922019977922045e-05, |
|
"loss": 0.0823, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1548277328796257, |
|
"grad_norm": 0.28046315847608455, |
|
"learning_rate": 1.919868424905915e-05, |
|
"loss": 0.0885, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.15652913653764355, |
|
"grad_norm": 0.3024116439443639, |
|
"learning_rate": 1.9176888304814882e-05, |
|
"loss": 0.0919, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.15823054019566143, |
|
"grad_norm": 0.4139703560637819, |
|
"learning_rate": 1.9154812610918503e-05, |
|
"loss": 0.0938, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1599319438536793, |
|
"grad_norm": 0.2618424473915207, |
|
"learning_rate": 1.913245784032881e-05, |
|
"loss": 0.0684, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.16163334751169714, |
|
"grad_norm": 0.31266175001831703, |
|
"learning_rate": 1.9109824674512014e-05, |
|
"loss": 0.0673, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.16333475116971502, |
|
"grad_norm": 0.3741829507920355, |
|
"learning_rate": 1.9086913803420966e-05, |
|
"loss": 0.1097, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.16503615482773287, |
|
"grad_norm": 0.3356628202803492, |
|
"learning_rate": 1.906372592547413e-05, |
|
"loss": 0.1028, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.16673755848575075, |
|
"grad_norm": 0.34106497626751014, |
|
"learning_rate": 1.9040261747534282e-05, |
|
"loss": 0.0989, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.1684389621437686, |
|
"grad_norm": 0.29746452263621875, |
|
"learning_rate": 1.9016521984886984e-05, |
|
"loss": 0.0718, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1701403658017865, |
|
"grad_norm": 0.2990092253885084, |
|
"learning_rate": 1.8992507361218743e-05, |
|
"loss": 0.0765, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17184176945980434, |
|
"grad_norm": 0.2177337033832474, |
|
"learning_rate": 1.8968218608594987e-05, |
|
"loss": 0.056, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.1735431731178222, |
|
"grad_norm": 0.39840165284276846, |
|
"learning_rate": 1.8943656467437726e-05, |
|
"loss": 0.1111, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.17524457677584007, |
|
"grad_norm": 0.3378745899319231, |
|
"learning_rate": 1.8918821686502992e-05, |
|
"loss": 0.0931, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.17694598043385792, |
|
"grad_norm": 0.2637083690262847, |
|
"learning_rate": 1.8893715022858e-05, |
|
"loss": 0.0759, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1786473840918758, |
|
"grad_norm": 0.36645660481595893, |
|
"learning_rate": 1.886833724185809e-05, |
|
"loss": 0.0956, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.18034878774989366, |
|
"grad_norm": 0.2626991205572612, |
|
"learning_rate": 1.8842689117123377e-05, |
|
"loss": 0.0622, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.18205019140791154, |
|
"grad_norm": 0.28778996504448945, |
|
"learning_rate": 1.8816771430515178e-05, |
|
"loss": 0.0778, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1837515950659294, |
|
"grad_norm": 0.30016452527135956, |
|
"learning_rate": 1.8790584972112174e-05, |
|
"loss": 0.0658, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.18545299872394724, |
|
"grad_norm": 0.5203118503020492, |
|
"learning_rate": 1.876413054018633e-05, |
|
"loss": 0.146, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.18715440238196512, |
|
"grad_norm": 0.30134320864935904, |
|
"learning_rate": 1.873740894117854e-05, |
|
"loss": 0.0821, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18885580603998298, |
|
"grad_norm": 0.34344915555537253, |
|
"learning_rate": 1.8710420989674093e-05, |
|
"loss": 0.075, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.19055720969800086, |
|
"grad_norm": 0.2458501444323483, |
|
"learning_rate": 1.8683167508377775e-05, |
|
"loss": 0.0596, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1922586133560187, |
|
"grad_norm": 0.2651178569441844, |
|
"learning_rate": 1.8655649328088836e-05, |
|
"loss": 0.0674, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.1939600170140366, |
|
"grad_norm": 0.2662036987842302, |
|
"learning_rate": 1.862786728767565e-05, |
|
"loss": 0.0769, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.19566142067205444, |
|
"grad_norm": 0.28498333925442115, |
|
"learning_rate": 1.8599822234050143e-05, |
|
"loss": 0.0778, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.19736282433007232, |
|
"grad_norm": 0.26717816913116776, |
|
"learning_rate": 1.8571515022141974e-05, |
|
"loss": 0.0896, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.19906422798809018, |
|
"grad_norm": 0.3374842493414297, |
|
"learning_rate": 1.8542946514872478e-05, |
|
"loss": 0.0992, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.20076563164610803, |
|
"grad_norm": 0.21117730752547184, |
|
"learning_rate": 1.851411758312835e-05, |
|
"loss": 0.0454, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2024670353041259, |
|
"grad_norm": 0.19203553955386238, |
|
"learning_rate": 1.8485029105735112e-05, |
|
"loss": 0.0611, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.20416843896214376, |
|
"grad_norm": 0.22445524786962626, |
|
"learning_rate": 1.8455681969430307e-05, |
|
"loss": 0.0584, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.20586984262016164, |
|
"grad_norm": 0.23148081400071185, |
|
"learning_rate": 1.8426077068836487e-05, |
|
"loss": 0.0629, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2075712462781795, |
|
"grad_norm": 0.17709972528549517, |
|
"learning_rate": 1.839621530643392e-05, |
|
"loss": 0.0563, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.20927264993619737, |
|
"grad_norm": 0.3048264391849215, |
|
"learning_rate": 1.8366097592533095e-05, |
|
"loss": 0.0778, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.21097405359421523, |
|
"grad_norm": 0.32208607157512736, |
|
"learning_rate": 1.8335724845246948e-05, |
|
"loss": 0.1028, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.21267545725223308, |
|
"grad_norm": 0.23725153079241285, |
|
"learning_rate": 1.830509799046292e-05, |
|
"loss": 0.0803, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21437686091025096, |
|
"grad_norm": 0.2996136224560952, |
|
"learning_rate": 1.8274217961814682e-05, |
|
"loss": 0.0718, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2160782645682688, |
|
"grad_norm": 0.23529202737787724, |
|
"learning_rate": 1.8243085700653698e-05, |
|
"loss": 0.058, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2177796682262867, |
|
"grad_norm": 0.2602674833841576, |
|
"learning_rate": 1.821170215602053e-05, |
|
"loss": 0.0847, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.21948107188430455, |
|
"grad_norm": 0.23134307276310323, |
|
"learning_rate": 1.818006828461591e-05, |
|
"loss": 0.0568, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.22118247554232243, |
|
"grad_norm": 0.26453961985179086, |
|
"learning_rate": 1.8148185050771554e-05, |
|
"loss": 0.0801, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22288387920034028, |
|
"grad_norm": 0.25318378680529585, |
|
"learning_rate": 1.8116053426420793e-05, |
|
"loss": 0.0749, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.22458528285835813, |
|
"grad_norm": 0.20502925194857724, |
|
"learning_rate": 1.8083674391068925e-05, |
|
"loss": 0.0629, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.226286686516376, |
|
"grad_norm": 0.19279005026370763, |
|
"learning_rate": 1.8051048931763366e-05, |
|
"loss": 0.0463, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.22798809017439386, |
|
"grad_norm": 0.2986931928862026, |
|
"learning_rate": 1.8018178043063554e-05, |
|
"loss": 0.0869, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.22968949383241175, |
|
"grad_norm": 0.2580936970306385, |
|
"learning_rate": 1.798506272701064e-05, |
|
"loss": 0.0689, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2313908974904296, |
|
"grad_norm": 0.35433155424672486, |
|
"learning_rate": 1.795170399309692e-05, |
|
"loss": 0.0889, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.23309230114844748, |
|
"grad_norm": 0.3342563856245416, |
|
"learning_rate": 1.7918102858235103e-05, |
|
"loss": 0.1088, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.23479370480646533, |
|
"grad_norm": 0.2658901230535673, |
|
"learning_rate": 1.7884260346727257e-05, |
|
"loss": 0.0967, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2364951084644832, |
|
"grad_norm": 0.3116747759735481, |
|
"learning_rate": 1.7850177490233635e-05, |
|
"loss": 0.0763, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.23819651212250106, |
|
"grad_norm": 0.2561525037232419, |
|
"learning_rate": 1.7815855327741185e-05, |
|
"loss": 0.0956, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23989791578051892, |
|
"grad_norm": 0.2246820236163285, |
|
"learning_rate": 1.7781294905531908e-05, |
|
"loss": 0.0792, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2415993194385368, |
|
"grad_norm": 0.3264214270885714, |
|
"learning_rate": 1.774649727715094e-05, |
|
"loss": 0.0792, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.24330072309655465, |
|
"grad_norm": 0.19102790309255646, |
|
"learning_rate": 1.7711463503374466e-05, |
|
"loss": 0.0627, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.24500212675457253, |
|
"grad_norm": 0.29766375426258634, |
|
"learning_rate": 1.7676194652177333e-05, |
|
"loss": 0.0529, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.24670353041259038, |
|
"grad_norm": 0.29002494096896503, |
|
"learning_rate": 1.764069179870055e-05, |
|
"loss": 0.0873, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.24840493407060826, |
|
"grad_norm": 0.1961062743167957, |
|
"learning_rate": 1.760495602521847e-05, |
|
"loss": 0.0549, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2501063377286261, |
|
"grad_norm": 0.3391386450150704, |
|
"learning_rate": 1.756898842110582e-05, |
|
"loss": 0.0855, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.25180774138664397, |
|
"grad_norm": 0.2237841919957392, |
|
"learning_rate": 1.753279008280449e-05, |
|
"loss": 0.0611, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.2535091450446618, |
|
"grad_norm": 0.1863997964345379, |
|
"learning_rate": 1.74963621137901e-05, |
|
"loss": 0.0557, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.25521054870267973, |
|
"grad_norm": 0.2322748935860178, |
|
"learning_rate": 1.7459705624538383e-05, |
|
"loss": 0.0744, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2569119523606976, |
|
"grad_norm": 0.2811884338929891, |
|
"learning_rate": 1.7422821732491297e-05, |
|
"loss": 0.0869, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.25861335601871543, |
|
"grad_norm": 0.2336367022405289, |
|
"learning_rate": 1.7385711562022988e-05, |
|
"loss": 0.0662, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2603147596767333, |
|
"grad_norm": 0.2525517790918003, |
|
"learning_rate": 1.734837624440551e-05, |
|
"loss": 0.0709, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2620161633347512, |
|
"grad_norm": 0.25674452456443186, |
|
"learning_rate": 1.731081691777434e-05, |
|
"loss": 0.0536, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.26371756699276905, |
|
"grad_norm": 0.2380952734859674, |
|
"learning_rate": 1.7273034727093677e-05, |
|
"loss": 0.0797, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2654189706507869, |
|
"grad_norm": 0.22013348660975807, |
|
"learning_rate": 1.7235030824121542e-05, |
|
"loss": 0.0608, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.26712037430880475, |
|
"grad_norm": 0.22281523456342697, |
|
"learning_rate": 1.7196806367374656e-05, |
|
"loss": 0.0635, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.2688217779668226, |
|
"grad_norm": 0.2102268959812806, |
|
"learning_rate": 1.7158362522093153e-05, |
|
"loss": 0.0682, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.2705231816248405, |
|
"grad_norm": 0.2981020750225053, |
|
"learning_rate": 1.7119700460205026e-05, |
|
"loss": 0.0748, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.27222458528285837, |
|
"grad_norm": 0.23396857925074813, |
|
"learning_rate": 1.7080821360290426e-05, |
|
"loss": 0.0668, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2739259889408762, |
|
"grad_norm": 0.14039625188691493, |
|
"learning_rate": 1.7041726407545716e-05, |
|
"loss": 0.0334, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.27562739259889407, |
|
"grad_norm": 0.2591445596342466, |
|
"learning_rate": 1.7002416793747354e-05, |
|
"loss": 0.0613, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2773287962569119, |
|
"grad_norm": 0.29367133177482335, |
|
"learning_rate": 1.696289371721556e-05, |
|
"loss": 0.0713, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.27903019991492983, |
|
"grad_norm": 0.3690305678977906, |
|
"learning_rate": 1.692315838277778e-05, |
|
"loss": 0.1274, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.2807316035729477, |
|
"grad_norm": 0.22214467316639894, |
|
"learning_rate": 1.6883212001731956e-05, |
|
"loss": 0.0655, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.28243300723096554, |
|
"grad_norm": 0.1574976967869379, |
|
"learning_rate": 1.6843055791809623e-05, |
|
"loss": 0.0322, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.2841344108889834, |
|
"grad_norm": 0.27654091402143877, |
|
"learning_rate": 1.680269097713876e-05, |
|
"loss": 0.0922, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.2858358145470013, |
|
"grad_norm": 0.28597652739783896, |
|
"learning_rate": 1.6762118788206488e-05, |
|
"loss": 0.079, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.28753721820501915, |
|
"grad_norm": 0.26938794460799176, |
|
"learning_rate": 1.6721340461821555e-05, |
|
"loss": 0.0871, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.289238621863037, |
|
"grad_norm": 0.32524563962835895, |
|
"learning_rate": 1.6680357241076632e-05, |
|
"loss": 0.1052, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.29094002552105486, |
|
"grad_norm": 0.3017728585235927, |
|
"learning_rate": 1.6639170375310422e-05, |
|
"loss": 0.0705, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2926414291790727, |
|
"grad_norm": 0.23611080555948813, |
|
"learning_rate": 1.6597781120069584e-05, |
|
"loss": 0.0735, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2943428328370906, |
|
"grad_norm": 0.2824684340323033, |
|
"learning_rate": 1.655619073707043e-05, |
|
"loss": 0.0957, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.29604423649510847, |
|
"grad_norm": 0.22439711399094156, |
|
"learning_rate": 1.6514400494160498e-05, |
|
"loss": 0.0572, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2977456401531263, |
|
"grad_norm": 0.18939390048237154, |
|
"learning_rate": 1.6472411665279872e-05, |
|
"loss": 0.057, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2994470438111442, |
|
"grad_norm": 0.22456676896884215, |
|
"learning_rate": 1.643022553042237e-05, |
|
"loss": 0.0557, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.3011484474691621, |
|
"grad_norm": 0.18664835436430186, |
|
"learning_rate": 1.6387843375596513e-05, |
|
"loss": 0.0494, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.30284985112717994, |
|
"grad_norm": 0.23489112753979385, |
|
"learning_rate": 1.634526649278632e-05, |
|
"loss": 0.0821, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3045512547851978, |
|
"grad_norm": 0.2194109402248629, |
|
"learning_rate": 1.630249617991194e-05, |
|
"loss": 0.0672, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.30625265844321564, |
|
"grad_norm": 0.3321350768766143, |
|
"learning_rate": 1.6259533740790055e-05, |
|
"loss": 0.1135, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3079540621012335, |
|
"grad_norm": 0.2502151994073321, |
|
"learning_rate": 1.6216380485094164e-05, |
|
"loss": 0.0718, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3096554657592514, |
|
"grad_norm": 0.2670386782819874, |
|
"learning_rate": 1.617303772831465e-05, |
|
"loss": 0.0728, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.31135686941726926, |
|
"grad_norm": 0.26272098469205685, |
|
"learning_rate": 1.6129506791718665e-05, |
|
"loss": 0.0832, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3130582730752871, |
|
"grad_norm": 0.2246929104575992, |
|
"learning_rate": 1.6085789002309873e-05, |
|
"loss": 0.0631, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.31475967673330496, |
|
"grad_norm": 0.21270610161779943, |
|
"learning_rate": 1.6041885692787985e-05, |
|
"loss": 0.067, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.31646108039132287, |
|
"grad_norm": 0.21242584741643203, |
|
"learning_rate": 1.599779820150813e-05, |
|
"loss": 0.068, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3181624840493407, |
|
"grad_norm": 0.20419485410339863, |
|
"learning_rate": 1.5953527872440063e-05, |
|
"loss": 0.0757, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3198638877073586, |
|
"grad_norm": 0.27210472135834674, |
|
"learning_rate": 1.5909076055127202e-05, |
|
"loss": 0.0853, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.3215652913653764, |
|
"grad_norm": 0.22338089059411625, |
|
"learning_rate": 1.5864444104645473e-05, |
|
"loss": 0.0742, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.3232666950233943, |
|
"grad_norm": 0.2732168335391855, |
|
"learning_rate": 1.581963338156201e-05, |
|
"loss": 0.0908, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3249680986814122, |
|
"grad_norm": 0.2467079865485088, |
|
"learning_rate": 1.5774645251893673e-05, |
|
"loss": 0.0591, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.32666950233943004, |
|
"grad_norm": 0.2313614767876932, |
|
"learning_rate": 1.5729481087065423e-05, |
|
"loss": 0.0892, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3283709059974479, |
|
"grad_norm": 0.2023763116089166, |
|
"learning_rate": 1.5684142263868493e-05, |
|
"loss": 0.0672, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.33007230965546575, |
|
"grad_norm": 0.31753011619888016, |
|
"learning_rate": 1.5638630164418435e-05, |
|
"loss": 0.0712, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3317737133134836, |
|
"grad_norm": 0.2757080957715778, |
|
"learning_rate": 1.5592946176112973e-05, |
|
"loss": 0.1015, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3334751169715015, |
|
"grad_norm": 0.2505973926615177, |
|
"learning_rate": 1.554709169158972e-05, |
|
"loss": 0.0971, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.33517652062951936, |
|
"grad_norm": 0.2226907461950715, |
|
"learning_rate": 1.550106810868373e-05, |
|
"loss": 0.0513, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.3368779242875372, |
|
"grad_norm": 0.15446367505194006, |
|
"learning_rate": 1.5454876830384868e-05, |
|
"loss": 0.057, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.33857932794555506, |
|
"grad_norm": 0.30643512505114406, |
|
"learning_rate": 1.540851926479505e-05, |
|
"loss": 0.0975, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.340280731603573, |
|
"grad_norm": 0.25762810344960363, |
|
"learning_rate": 1.536199682508533e-05, |
|
"loss": 0.0633, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3419821352615908, |
|
"grad_norm": 0.19158691969988545, |
|
"learning_rate": 1.531531092945279e-05, |
|
"loss": 0.0569, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3436835389196087, |
|
"grad_norm": 0.2834266199692826, |
|
"learning_rate": 1.526846300107734e-05, |
|
"loss": 0.0988, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.34538494257762653, |
|
"grad_norm": 0.2556087083863337, |
|
"learning_rate": 1.5221454468078336e-05, |
|
"loss": 0.0689, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.3470863462356444, |
|
"grad_norm": 0.2438742301334132, |
|
"learning_rate": 1.5174286763470995e-05, |
|
"loss": 0.0715, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.3487877498936623, |
|
"grad_norm": 0.21904569801568857, |
|
"learning_rate": 1.5126961325122773e-05, |
|
"loss": 0.0715, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.35048915355168014, |
|
"grad_norm": 0.32118887699890014, |
|
"learning_rate": 1.5079479595709493e-05, |
|
"loss": 0.1042, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.352190557209698, |
|
"grad_norm": 0.22503680461445366, |
|
"learning_rate": 1.5031843022671377e-05, |
|
"loss": 0.0516, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.35389196086771585, |
|
"grad_norm": 0.20476968089029912, |
|
"learning_rate": 1.4984053058168936e-05, |
|
"loss": 0.0651, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.35559336452573376, |
|
"grad_norm": 0.26260720806295024, |
|
"learning_rate": 1.4936111159038677e-05, |
|
"loss": 0.078, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3572947681837516, |
|
"grad_norm": 0.21335908324799727, |
|
"learning_rate": 1.4888018786748713e-05, |
|
"loss": 0.0531, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35899617184176946, |
|
"grad_norm": 0.24776977384172608, |
|
"learning_rate": 1.4839777407354194e-05, |
|
"loss": 0.0711, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.3606975754997873, |
|
"grad_norm": 0.26283265478228607, |
|
"learning_rate": 1.4791388491452637e-05, |
|
"loss": 0.1005, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.36239897915780517, |
|
"grad_norm": 0.21912083728071574, |
|
"learning_rate": 1.4742853514139076e-05, |
|
"loss": 0.0759, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.3641003828158231, |
|
"grad_norm": 0.30538258609344554, |
|
"learning_rate": 1.4694173954961105e-05, |
|
"loss": 0.1009, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.36580178647384093, |
|
"grad_norm": 0.26764402263401943, |
|
"learning_rate": 1.4645351297873774e-05, |
|
"loss": 0.0792, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.3675031901318588, |
|
"grad_norm": 0.23675986028315632, |
|
"learning_rate": 1.4596387031194354e-05, |
|
"loss": 0.0743, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.36920459378987663, |
|
"grad_norm": 0.223485373234782, |
|
"learning_rate": 1.4547282647556964e-05, |
|
"loss": 0.0929, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.3709059974478945, |
|
"grad_norm": 0.29133370479541304, |
|
"learning_rate": 1.449803964386706e-05, |
|
"loss": 0.0798, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3726074011059124, |
|
"grad_norm": 0.18643880537818264, |
|
"learning_rate": 1.4448659521255823e-05, |
|
"loss": 0.0569, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.37430880476393025, |
|
"grad_norm": 0.280997156813028, |
|
"learning_rate": 1.4399143785034388e-05, |
|
"loss": 0.0999, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3760102084219481, |
|
"grad_norm": 0.24715686808729184, |
|
"learning_rate": 1.4349493944647953e-05, |
|
"loss": 0.0627, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.37771161207996595, |
|
"grad_norm": 0.2653638113311677, |
|
"learning_rate": 1.4299711513629759e-05, |
|
"loss": 0.0863, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.37941301573798386, |
|
"grad_norm": 0.3152170570199493, |
|
"learning_rate": 1.4249798009554979e-05, |
|
"loss": 0.0962, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.3811144193960017, |
|
"grad_norm": 0.25435032990077316, |
|
"learning_rate": 1.419975495399442e-05, |
|
"loss": 0.0937, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.38281582305401957, |
|
"grad_norm": 0.15047418159614007, |
|
"learning_rate": 1.4149583872468165e-05, |
|
"loss": 0.0482, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3845172267120374, |
|
"grad_norm": 0.15295980990137148, |
|
"learning_rate": 1.4099286294399051e-05, |
|
"loss": 0.0382, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.38621863037005527, |
|
"grad_norm": 0.2533178513009288, |
|
"learning_rate": 1.404886375306607e-05, |
|
"loss": 0.0948, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3879200340280732, |
|
"grad_norm": 0.23107415073266793, |
|
"learning_rate": 1.3998317785557597e-05, |
|
"loss": 0.0556, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.38962143768609103, |
|
"grad_norm": 0.22917339521301278, |
|
"learning_rate": 1.3947649932724563e-05, |
|
"loss": 0.0843, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3913228413441089, |
|
"grad_norm": 0.17860874606051796, |
|
"learning_rate": 1.3896861739133456e-05, |
|
"loss": 0.0488, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.39302424500212674, |
|
"grad_norm": 0.20110618464098665, |
|
"learning_rate": 1.384595475301926e-05, |
|
"loss": 0.0707, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.39472564866014465, |
|
"grad_norm": 0.20490708885008196, |
|
"learning_rate": 1.3794930526238246e-05, |
|
"loss": 0.0638, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3964270523181625, |
|
"grad_norm": 0.23510478688343137, |
|
"learning_rate": 1.3743790614220664e-05, |
|
"loss": 0.0795, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.39812845597618035, |
|
"grad_norm": 0.23100407798838418, |
|
"learning_rate": 1.3692536575923334e-05, |
|
"loss": 0.0761, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3998298596341982, |
|
"grad_norm": 0.256927774293719, |
|
"learning_rate": 1.3641169973782117e-05, |
|
"loss": 0.0669, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.40153126329221606, |
|
"grad_norm": 0.265960997604412, |
|
"learning_rate": 1.3589692373664288e-05, |
|
"loss": 0.0792, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.40323266695023396, |
|
"grad_norm": 0.2915525993978868, |
|
"learning_rate": 1.3538105344820798e-05, |
|
"loss": 0.0995, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4049340706082518, |
|
"grad_norm": 0.2578778351844946, |
|
"learning_rate": 1.3486410459838448e-05, |
|
"loss": 0.0719, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.40663547426626967, |
|
"grad_norm": 0.23384387017059058, |
|
"learning_rate": 1.343460929459193e-05, |
|
"loss": 0.0712, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.4083368779242875, |
|
"grad_norm": 0.3019784671101925, |
|
"learning_rate": 1.3382703428195812e-05, |
|
"loss": 0.1115, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4100382815823054, |
|
"grad_norm": 0.1352734841435768, |
|
"learning_rate": 1.3330694442956376e-05, |
|
"loss": 0.0464, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4117396852403233, |
|
"grad_norm": 0.17202787234184866, |
|
"learning_rate": 1.3278583924323405e-05, |
|
"loss": 0.0454, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.41344108889834114, |
|
"grad_norm": 0.197390798554472, |
|
"learning_rate": 1.3226373460841835e-05, |
|
"loss": 0.0643, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.415142492556359, |
|
"grad_norm": 0.21059005106394152, |
|
"learning_rate": 1.3174064644103334e-05, |
|
"loss": 0.0619, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.41684389621437684, |
|
"grad_norm": 0.1722308196492277, |
|
"learning_rate": 1.3121659068697797e-05, |
|
"loss": 0.0454, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.41854529987239475, |
|
"grad_norm": 0.32958443926088915, |
|
"learning_rate": 1.306915833216471e-05, |
|
"loss": 0.1037, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.4202467035304126, |
|
"grad_norm": 0.2544357662171513, |
|
"learning_rate": 1.3016564034944473e-05, |
|
"loss": 0.0706, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.42194810718843045, |
|
"grad_norm": 0.2585510606952109, |
|
"learning_rate": 1.29638777803296e-05, |
|
"loss": 0.0825, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.4236495108464483, |
|
"grad_norm": 0.20275321109441732, |
|
"learning_rate": 1.2911101174415861e-05, |
|
"loss": 0.0526, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.42535091450446616, |
|
"grad_norm": 0.233981891948638, |
|
"learning_rate": 1.2858235826053294e-05, |
|
"loss": 0.0695, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42705231816248407, |
|
"grad_norm": 0.21493578674491315, |
|
"learning_rate": 1.2805283346797179e-05, |
|
"loss": 0.0653, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.4287537218205019, |
|
"grad_norm": 0.24141724836500014, |
|
"learning_rate": 1.2752245350858905e-05, |
|
"loss": 0.0797, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.4304551254785198, |
|
"grad_norm": 0.1489771140649588, |
|
"learning_rate": 1.2699123455056777e-05, |
|
"loss": 0.03, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.4321565291365376, |
|
"grad_norm": 0.24828101075324488, |
|
"learning_rate": 1.26459192787667e-05, |
|
"loss": 0.0819, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.43385793279455553, |
|
"grad_norm": 0.17372013690514643, |
|
"learning_rate": 1.2592634443872842e-05, |
|
"loss": 0.0461, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.4355593364525734, |
|
"grad_norm": 0.2764346314356569, |
|
"learning_rate": 1.2539270574718172e-05, |
|
"loss": 0.0806, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.43726074011059124, |
|
"grad_norm": 0.29987546473911214, |
|
"learning_rate": 1.2485829298054952e-05, |
|
"loss": 0.0846, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.4389621437686091, |
|
"grad_norm": 0.31175687442320515, |
|
"learning_rate": 1.2432312242995158e-05, |
|
"loss": 0.0971, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.44066354742662694, |
|
"grad_norm": 0.3009904680059143, |
|
"learning_rate": 1.2378721040960788e-05, |
|
"loss": 0.0994, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.44236495108464485, |
|
"grad_norm": 0.2562384582969849, |
|
"learning_rate": 1.232505732563416e-05, |
|
"loss": 0.0759, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4440663547426627, |
|
"grad_norm": 0.2550190452410635, |
|
"learning_rate": 1.2271322732908091e-05, |
|
"loss": 0.0733, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.44576775840068056, |
|
"grad_norm": 0.2515270792806656, |
|
"learning_rate": 1.2217518900836045e-05, |
|
"loss": 0.0708, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.4474691620586984, |
|
"grad_norm": 0.18357019713578807, |
|
"learning_rate": 1.2163647469582181e-05, |
|
"loss": 0.0515, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.44917056571671626, |
|
"grad_norm": 0.2671034404389676, |
|
"learning_rate": 1.210971008137136e-05, |
|
"loss": 0.0825, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.45087196937473417, |
|
"grad_norm": 0.2571681277129728, |
|
"learning_rate": 1.2055708380439089e-05, |
|
"loss": 0.1042, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.452573373032752, |
|
"grad_norm": 0.21793020282041717, |
|
"learning_rate": 1.2001644012981392e-05, |
|
"loss": 0.0672, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.4542747766907699, |
|
"grad_norm": 0.3515840793799933, |
|
"learning_rate": 1.1947518627104637e-05, |
|
"loss": 0.1232, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.45597618034878773, |
|
"grad_norm": 0.22270456303325817, |
|
"learning_rate": 1.1893333872775275e-05, |
|
"loss": 0.084, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.45767758400680564, |
|
"grad_norm": 0.1770352144116934, |
|
"learning_rate": 1.1839091401769559e-05, |
|
"loss": 0.0435, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.4593789876648235, |
|
"grad_norm": 0.19476419487806804, |
|
"learning_rate": 1.1784792867623179e-05, |
|
"loss": 0.0535, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.46108039132284134, |
|
"grad_norm": 0.2961488962332661, |
|
"learning_rate": 1.1730439925580876e-05, |
|
"loss": 0.1054, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.4627817949808592, |
|
"grad_norm": 0.23209294477341702, |
|
"learning_rate": 1.1676034232545963e-05, |
|
"loss": 0.0898, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.46448319863887705, |
|
"grad_norm": 0.3192995286716727, |
|
"learning_rate": 1.1621577447029816e-05, |
|
"loss": 0.0864, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.46618460229689496, |
|
"grad_norm": 0.22033834311649156, |
|
"learning_rate": 1.1567071229101332e-05, |
|
"loss": 0.061, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.4678860059549128, |
|
"grad_norm": 0.24431011933295152, |
|
"learning_rate": 1.1512517240336304e-05, |
|
"loss": 0.05, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.46958740961293066, |
|
"grad_norm": 0.2501002015877452, |
|
"learning_rate": 1.1457917143766786e-05, |
|
"loss": 0.0811, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.4712888132709485, |
|
"grad_norm": 0.15591637050035256, |
|
"learning_rate": 1.1403272603830384e-05, |
|
"loss": 0.0439, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.4729902169289664, |
|
"grad_norm": 0.23445891677475122, |
|
"learning_rate": 1.1348585286319529e-05, |
|
"loss": 0.0562, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.4746916205869843, |
|
"grad_norm": 0.22953431606642624, |
|
"learning_rate": 1.1293856858330678e-05, |
|
"loss": 0.0712, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.47639302424500213, |
|
"grad_norm": 0.24410355979016798, |
|
"learning_rate": 1.1239088988213522e-05, |
|
"loss": 0.0652, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.47809442790302, |
|
"grad_norm": 0.18183702432279936, |
|
"learning_rate": 1.11842833455201e-05, |
|
"loss": 0.0464, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.47979583156103783, |
|
"grad_norm": 0.3068246013758465, |
|
"learning_rate": 1.1129441600953916e-05, |
|
"loss": 0.101, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.48149723521905574, |
|
"grad_norm": 0.2725071360381625, |
|
"learning_rate": 1.1074565426319014e-05, |
|
"loss": 0.0906, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4831986388770736, |
|
"grad_norm": 0.23078769715046143, |
|
"learning_rate": 1.101965649446901e-05, |
|
"loss": 0.0659, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.48490004253509145, |
|
"grad_norm": 0.24607556757801813, |
|
"learning_rate": 1.0964716479256094e-05, |
|
"loss": 0.0581, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4866014461931093, |
|
"grad_norm": 0.2039783514676341, |
|
"learning_rate": 1.0909747055480004e-05, |
|
"loss": 0.042, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.4883028498511272, |
|
"grad_norm": 0.2132647216715679, |
|
"learning_rate": 1.0854749898836974e-05, |
|
"loss": 0.042, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.49000425350914506, |
|
"grad_norm": 0.17146104368359683, |
|
"learning_rate": 1.0799726685868648e-05, |
|
"loss": 0.0486, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.4917056571671629, |
|
"grad_norm": 0.3101626941591736, |
|
"learning_rate": 1.0744679093910987e-05, |
|
"loss": 0.0855, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.49340706082518077, |
|
"grad_norm": 0.26359884054130955, |
|
"learning_rate": 1.0689608801043107e-05, |
|
"loss": 0.0671, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4951084644831986, |
|
"grad_norm": 0.21556126895353891, |
|
"learning_rate": 1.063451748603616e-05, |
|
"loss": 0.076, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.4968098681412165, |
|
"grad_norm": 0.27181671931172413, |
|
"learning_rate": 1.0579406828302124e-05, |
|
"loss": 0.0847, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.4985112717992344, |
|
"grad_norm": 0.31490941982013704, |
|
"learning_rate": 1.0524278507842637e-05, |
|
"loss": 0.1254, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5002126754572522, |
|
"grad_norm": 0.28738576717755915, |
|
"learning_rate": 1.0469134205197762e-05, |
|
"loss": 0.0741, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.5019140791152701, |
|
"grad_norm": 0.24967744618311208, |
|
"learning_rate": 1.0413975601394765e-05, |
|
"loss": 0.0952, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5036154827732879, |
|
"grad_norm": 0.18653535652798736, |
|
"learning_rate": 1.0358804377896876e-05, |
|
"loss": 0.0666, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.5053168864313058, |
|
"grad_norm": 0.31524703768298645, |
|
"learning_rate": 1.0303622216552022e-05, |
|
"loss": 0.0821, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.5070182900893236, |
|
"grad_norm": 0.19693363449938797, |
|
"learning_rate": 1.0248430799541564e-05, |
|
"loss": 0.0486, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5087196937473416, |
|
"grad_norm": 0.3100958390915376, |
|
"learning_rate": 1.019323180932901e-05, |
|
"loss": 0.075, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.5104210974053595, |
|
"grad_norm": 0.2263867395185222, |
|
"learning_rate": 1.013802692860873e-05, |
|
"loss": 0.0729, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5121225010633773, |
|
"grad_norm": 0.30499024558777427, |
|
"learning_rate": 1.0082817840254667e-05, |
|
"loss": 0.0949, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.5138239047213952, |
|
"grad_norm": 0.27412297007732506, |
|
"learning_rate": 1.0027606227269026e-05, |
|
"loss": 0.0711, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.515525308379413, |
|
"grad_norm": 0.2236619712267448, |
|
"learning_rate": 9.972393772730975e-06, |
|
"loss": 0.0711, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.5172267120374309, |
|
"grad_norm": 0.21645526531787626, |
|
"learning_rate": 9.917182159745335e-06, |
|
"loss": 0.0696, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.5189281156954487, |
|
"grad_norm": 0.19297627616781193, |
|
"learning_rate": 9.861973071391272e-06, |
|
"loss": 0.0723, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5206295193534666, |
|
"grad_norm": 0.17935274621615926, |
|
"learning_rate": 9.806768190670994e-06, |
|
"loss": 0.0603, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.5223309230114844, |
|
"grad_norm": 0.36516731843883593, |
|
"learning_rate": 9.751569200458438e-06, |
|
"loss": 0.1183, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.5240323266695024, |
|
"grad_norm": 0.2666543854374252, |
|
"learning_rate": 9.69637778344798e-06, |
|
"loss": 0.0683, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.5257337303275202, |
|
"grad_norm": 0.1559589176353223, |
|
"learning_rate": 9.641195622103126e-06, |
|
"loss": 0.0457, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.5274351339855381, |
|
"grad_norm": 0.21822959242881637, |
|
"learning_rate": 9.586024398605238e-06, |
|
"loss": 0.0728, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.529136537643556, |
|
"grad_norm": 0.1910067933489864, |
|
"learning_rate": 9.530865794802243e-06, |
|
"loss": 0.0518, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.5308379413015738, |
|
"grad_norm": 0.2286981911174439, |
|
"learning_rate": 9.475721492157365e-06, |
|
"loss": 0.0538, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.5325393449595917, |
|
"grad_norm": 0.2649695673029832, |
|
"learning_rate": 9.420593171697876e-06, |
|
"loss": 0.086, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.5342407486176095, |
|
"grad_norm": 0.2420342613285877, |
|
"learning_rate": 9.365482513963844e-06, |
|
"loss": 0.0972, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.5359421522756274, |
|
"grad_norm": 0.2855237701478863, |
|
"learning_rate": 9.310391198956896e-06, |
|
"loss": 0.0795, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5376435559336452, |
|
"grad_norm": 0.19062890218128994, |
|
"learning_rate": 9.255320906089017e-06, |
|
"loss": 0.0385, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.5393449595916632, |
|
"grad_norm": 0.12818263127205254, |
|
"learning_rate": 9.200273314131356e-06, |
|
"loss": 0.0358, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.541046363249681, |
|
"grad_norm": 0.21271005750639677, |
|
"learning_rate": 9.145250101163032e-06, |
|
"loss": 0.0511, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.5427477669076989, |
|
"grad_norm": 0.3230145385857894, |
|
"learning_rate": 9.090252944520002e-06, |
|
"loss": 0.1249, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.5444491705657167, |
|
"grad_norm": 0.17129666760859652, |
|
"learning_rate": 9.035283520743911e-06, |
|
"loss": 0.0473, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5461505742237346, |
|
"grad_norm": 0.25326344089197617, |
|
"learning_rate": 8.980343505530988e-06, |
|
"loss": 0.0613, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.5478519778817524, |
|
"grad_norm": 0.2847520686151069, |
|
"learning_rate": 8.925434573680986e-06, |
|
"loss": 0.0883, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.5495533815397703, |
|
"grad_norm": 0.358576412450163, |
|
"learning_rate": 8.870558399046086e-06, |
|
"loss": 0.1097, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.5512547851977881, |
|
"grad_norm": 0.21816479879448794, |
|
"learning_rate": 8.815716654479903e-06, |
|
"loss": 0.0766, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.552956188855806, |
|
"grad_norm": 0.27098416317022683, |
|
"learning_rate": 8.76091101178648e-06, |
|
"loss": 0.0959, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5546575925138238, |
|
"grad_norm": 0.16228239529758662, |
|
"learning_rate": 8.706143141669324e-06, |
|
"loss": 0.0427, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.5563589961718418, |
|
"grad_norm": 0.19457073616768888, |
|
"learning_rate": 8.651414713680474e-06, |
|
"loss": 0.0674, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.5580603998298597, |
|
"grad_norm": 0.21506185063350097, |
|
"learning_rate": 8.59672739616962e-06, |
|
"loss": 0.0725, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.5597618034878775, |
|
"grad_norm": 0.25928980929110046, |
|
"learning_rate": 8.542082856233216e-06, |
|
"loss": 0.0926, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.5614632071458954, |
|
"grad_norm": 0.19301809133671421, |
|
"learning_rate": 8.487482759663696e-06, |
|
"loss": 0.0661, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5631646108039132, |
|
"grad_norm": 0.19336796199191325, |
|
"learning_rate": 8.43292877089867e-06, |
|
"loss": 0.0694, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.5648660144619311, |
|
"grad_norm": 0.2300001624245782, |
|
"learning_rate": 8.378422552970185e-06, |
|
"loss": 0.0746, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.5665674181199489, |
|
"grad_norm": 0.23998094239502984, |
|
"learning_rate": 8.32396576745404e-06, |
|
"loss": 0.0696, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.5682688217779668, |
|
"grad_norm": 0.2052361707208072, |
|
"learning_rate": 8.269560074419126e-06, |
|
"loss": 0.0624, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.5699702254359846, |
|
"grad_norm": 0.2962367385563096, |
|
"learning_rate": 8.215207132376824e-06, |
|
"loss": 0.124, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5716716290940026, |
|
"grad_norm": 0.24752974776344203, |
|
"learning_rate": 8.160908598230448e-06, |
|
"loss": 0.0653, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.5733730327520205, |
|
"grad_norm": 0.18127533151541284, |
|
"learning_rate": 8.10666612722473e-06, |
|
"loss": 0.0591, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.5750744364100383, |
|
"grad_norm": 0.18346580605719615, |
|
"learning_rate": 8.052481372895363e-06, |
|
"loss": 0.0488, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.5767758400680562, |
|
"grad_norm": 0.31877710293947625, |
|
"learning_rate": 7.998355987018606e-06, |
|
"loss": 0.0872, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.578477243726074, |
|
"grad_norm": 0.1344651628761348, |
|
"learning_rate": 7.944291619560914e-06, |
|
"loss": 0.0403, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5801786473840919, |
|
"grad_norm": 0.13295165420726127, |
|
"learning_rate": 7.890289918628644e-06, |
|
"loss": 0.0476, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.5818800510421097, |
|
"grad_norm": 0.27781942031149137, |
|
"learning_rate": 7.836352530417824e-06, |
|
"loss": 0.0925, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.5835814547001276, |
|
"grad_norm": 0.2923332990407699, |
|
"learning_rate": 7.782481099163958e-06, |
|
"loss": 0.1173, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.5852828583581454, |
|
"grad_norm": 0.20398201081622527, |
|
"learning_rate": 7.728677267091912e-06, |
|
"loss": 0.0712, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.5869842620161634, |
|
"grad_norm": 0.1827445671136079, |
|
"learning_rate": 7.674942674365847e-06, |
|
"loss": 0.0588, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5886856656741812, |
|
"grad_norm": 0.16240437969547905, |
|
"learning_rate": 7.621278959039217e-06, |
|
"loss": 0.0637, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5903870693321991, |
|
"grad_norm": 0.16062106806552065, |
|
"learning_rate": 7.567687757004843e-06, |
|
"loss": 0.0414, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5920884729902169, |
|
"grad_norm": 0.2689581192862772, |
|
"learning_rate": 7.514170701945047e-06, |
|
"loss": 0.0897, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5937898766482348, |
|
"grad_norm": 0.2681187781079101, |
|
"learning_rate": 7.460729425281831e-06, |
|
"loss": 0.0709, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5954912803062526, |
|
"grad_norm": 0.17526558434710648, |
|
"learning_rate": 7.407365556127162e-06, |
|
"loss": 0.0539, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5971926839642705, |
|
"grad_norm": 0.1648773603270098, |
|
"learning_rate": 7.354080721233303e-06, |
|
"loss": 0.0503, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.5988940876222884, |
|
"grad_norm": 0.17999080092184985, |
|
"learning_rate": 7.300876544943227e-06, |
|
"loss": 0.0605, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.6005954912803062, |
|
"grad_norm": 0.20308998126444186, |
|
"learning_rate": 7.247754649141097e-06, |
|
"loss": 0.0769, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.6022968949383242, |
|
"grad_norm": 0.16018163485236867, |
|
"learning_rate": 7.194716653202826e-06, |
|
"loss": 0.0545, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.603998298596342, |
|
"grad_norm": 0.24744753769790884, |
|
"learning_rate": 7.1417641739467104e-06, |
|
"loss": 0.0776, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6056997022543599, |
|
"grad_norm": 0.2298041880240223, |
|
"learning_rate": 7.088898825584139e-06, |
|
"loss": 0.0674, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.6074011059123777, |
|
"grad_norm": 0.19841362318124559, |
|
"learning_rate": 7.036122219670398e-06, |
|
"loss": 0.0635, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.6091025095703956, |
|
"grad_norm": 0.18877644807321198, |
|
"learning_rate": 6.9834359650555305e-06, |
|
"loss": 0.0777, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.6108039132284134, |
|
"grad_norm": 0.16102641349173863, |
|
"learning_rate": 6.930841667835295e-06, |
|
"loss": 0.0576, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.6125053168864313, |
|
"grad_norm": 0.20224797100905906, |
|
"learning_rate": 6.878340931302208e-06, |
|
"loss": 0.0754, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6142067205444491, |
|
"grad_norm": 0.2857194289415506, |
|
"learning_rate": 6.825935355896669e-06, |
|
"loss": 0.1052, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.615908124202467, |
|
"grad_norm": 0.20616924754434873, |
|
"learning_rate": 6.773626539158171e-06, |
|
"loss": 0.0716, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.617609527860485, |
|
"grad_norm": 0.23846455066099467, |
|
"learning_rate": 6.721416075676601e-06, |
|
"loss": 0.0847, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.6193109315185028, |
|
"grad_norm": 0.14989055759308637, |
|
"learning_rate": 6.669305557043626e-06, |
|
"loss": 0.0371, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.6210123351765207, |
|
"grad_norm": 0.25146318527723016, |
|
"learning_rate": 6.617296571804191e-06, |
|
"loss": 0.0938, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6227137388345385, |
|
"grad_norm": 0.2795331744757292, |
|
"learning_rate": 6.565390705408072e-06, |
|
"loss": 0.0503, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.6244151424925564, |
|
"grad_norm": 0.17139219570184439, |
|
"learning_rate": 6.513589540161556e-06, |
|
"loss": 0.0578, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.6261165461505742, |
|
"grad_norm": 0.157560985399721, |
|
"learning_rate": 6.461894655179204e-06, |
|
"loss": 0.0582, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.6278179498085921, |
|
"grad_norm": 0.22752999438352467, |
|
"learning_rate": 6.410307626335717e-06, |
|
"loss": 0.0779, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.6295193534666099, |
|
"grad_norm": 0.27521908636927156, |
|
"learning_rate": 6.358830026217887e-06, |
|
"loss": 0.0826, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6312207571246278, |
|
"grad_norm": 0.21611309006232896, |
|
"learning_rate": 6.30746342407667e-06, |
|
"loss": 0.0575, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.6329221607826457, |
|
"grad_norm": 0.2022628238984182, |
|
"learning_rate": 6.256209385779341e-06, |
|
"loss": 0.0597, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.6346235644406636, |
|
"grad_norm": 0.20087283890633761, |
|
"learning_rate": 6.205069473761756e-06, |
|
"loss": 0.0565, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.6363249680986814, |
|
"grad_norm": 0.27973559478745097, |
|
"learning_rate": 6.154045246980742e-06, |
|
"loss": 0.0777, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.6380263717566993, |
|
"grad_norm": 0.2153647046344647, |
|
"learning_rate": 6.1031382608665456e-06, |
|
"loss": 0.065, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6397277754147171, |
|
"grad_norm": 0.15832658385378948, |
|
"learning_rate": 6.052350067275441e-06, |
|
"loss": 0.0463, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.641429179072735, |
|
"grad_norm": 0.24889956825525697, |
|
"learning_rate": 6.001682214442406e-06, |
|
"loss": 0.0868, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.6431305827307529, |
|
"grad_norm": 0.2537522589782198, |
|
"learning_rate": 5.951136246933933e-06, |
|
"loss": 0.0771, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.6448319863887707, |
|
"grad_norm": 0.35384965176549915, |
|
"learning_rate": 5.900713705600951e-06, |
|
"loss": 0.0885, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.6465333900467886, |
|
"grad_norm": 0.24583176378622248, |
|
"learning_rate": 5.850416127531841e-06, |
|
"loss": 0.076, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6482347937048064, |
|
"grad_norm": 0.19401849479737754, |
|
"learning_rate": 5.800245046005585e-06, |
|
"loss": 0.055, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.6499361973628244, |
|
"grad_norm": 0.22292321754995165, |
|
"learning_rate": 5.750201990445024e-06, |
|
"loss": 0.0837, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.6516376010208422, |
|
"grad_norm": 0.15980625639550533, |
|
"learning_rate": 5.70028848637024e-06, |
|
"loss": 0.053, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.6533390046788601, |
|
"grad_norm": 0.17476218437373806, |
|
"learning_rate": 5.650506055352052e-06, |
|
"loss": 0.047, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.6550404083368779, |
|
"grad_norm": 0.26159431180356163, |
|
"learning_rate": 5.600856214965613e-06, |
|
"loss": 0.075, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6567418119948958, |
|
"grad_norm": 0.24983048217170784, |
|
"learning_rate": 5.551340478744176e-06, |
|
"loss": 0.0819, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.6584432156529136, |
|
"grad_norm": 0.277677983790708, |
|
"learning_rate": 5.501960356132945e-06, |
|
"loss": 0.0743, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.6601446193109315, |
|
"grad_norm": 0.2687619515031017, |
|
"learning_rate": 5.4527173524430395e-06, |
|
"loss": 0.076, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.6618460229689493, |
|
"grad_norm": 0.18825889492381687, |
|
"learning_rate": 5.403612968805649e-06, |
|
"loss": 0.0533, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.6635474266269672, |
|
"grad_norm": 0.22722813251197366, |
|
"learning_rate": 5.354648702126229e-06, |
|
"loss": 0.0669, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6652488302849852, |
|
"grad_norm": 0.15428942499610793, |
|
"learning_rate": 5.305826045038899e-06, |
|
"loss": 0.0496, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.666950233943003, |
|
"grad_norm": 0.29481966419649847, |
|
"learning_rate": 5.257146485860927e-06, |
|
"loss": 0.0871, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.6686516376010209, |
|
"grad_norm": 0.2132988743676148, |
|
"learning_rate": 5.208611508547367e-06, |
|
"loss": 0.072, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.6703530412590387, |
|
"grad_norm": 0.1932211736507852, |
|
"learning_rate": 5.160222592645808e-06, |
|
"loss": 0.0672, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.6720544449170566, |
|
"grad_norm": 0.32017863808058095, |
|
"learning_rate": 5.111981213251293e-06, |
|
"loss": 0.0996, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6737558485750744, |
|
"grad_norm": 0.33076228774617505, |
|
"learning_rate": 5.063888840961325e-06, |
|
"loss": 0.1062, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.6754572522330923, |
|
"grad_norm": 0.2152145333210106, |
|
"learning_rate": 5.015946941831064e-06, |
|
"loss": 0.0682, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.6771586558911101, |
|
"grad_norm": 0.21543841192984545, |
|
"learning_rate": 4.968156977328626e-06, |
|
"loss": 0.0572, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.678860059549128, |
|
"grad_norm": 0.16950132912260057, |
|
"learning_rate": 4.920520404290512e-06, |
|
"loss": 0.0577, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.680561463207146, |
|
"grad_norm": 0.20787322030508298, |
|
"learning_rate": 4.87303867487723e-06, |
|
"loss": 0.0561, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6822628668651638, |
|
"grad_norm": 0.1533621298140527, |
|
"learning_rate": 4.825713236529005e-06, |
|
"loss": 0.0435, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.6839642705231816, |
|
"grad_norm": 0.18296820958014204, |
|
"learning_rate": 4.778545531921668e-06, |
|
"loss": 0.0538, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.6856656741811995, |
|
"grad_norm": 0.24677189398080018, |
|
"learning_rate": 4.731536998922657e-06, |
|
"loss": 0.0715, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.6873670778392174, |
|
"grad_norm": 0.18381971512083234, |
|
"learning_rate": 4.684689070547216e-06, |
|
"loss": 0.0589, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.6890684814972352, |
|
"grad_norm": 0.19563535366138982, |
|
"learning_rate": 4.638003174914675e-06, |
|
"loss": 0.0375, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6907698851552531, |
|
"grad_norm": 0.27920320369616836, |
|
"learning_rate": 4.591480735204953e-06, |
|
"loss": 0.0657, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.6924712888132709, |
|
"grad_norm": 0.22501473081164228, |
|
"learning_rate": 4.545123169615134e-06, |
|
"loss": 0.0754, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.6941726924712888, |
|
"grad_norm": 0.18685344173190274, |
|
"learning_rate": 4.49893189131627e-06, |
|
"loss": 0.0753, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.6958740961293067, |
|
"grad_norm": 0.17185385088024444, |
|
"learning_rate": 4.45290830841028e-06, |
|
"loss": 0.0514, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.6975754997873246, |
|
"grad_norm": 0.2702549233525611, |
|
"learning_rate": 4.407053823887033e-06, |
|
"loss": 0.0833, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6992769034453424, |
|
"grad_norm": 0.23362806883478313, |
|
"learning_rate": 4.361369835581569e-06, |
|
"loss": 0.0769, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.7009783071033603, |
|
"grad_norm": 0.2101592538580294, |
|
"learning_rate": 4.315857736131508e-06, |
|
"loss": 0.0602, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.7026797107613781, |
|
"grad_norm": 0.20842960868238944, |
|
"learning_rate": 4.2705189129345814e-06, |
|
"loss": 0.074, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.704381114419396, |
|
"grad_norm": 0.18803427484767865, |
|
"learning_rate": 4.225354748106328e-06, |
|
"loss": 0.07, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.7060825180774138, |
|
"grad_norm": 0.3066569805512131, |
|
"learning_rate": 4.180366618437996e-06, |
|
"loss": 0.093, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7077839217354317, |
|
"grad_norm": 0.1744953221856188, |
|
"learning_rate": 4.13555589535453e-06, |
|
"loss": 0.0555, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.7094853253934496, |
|
"grad_norm": 0.166243605934049, |
|
"learning_rate": 4.0909239448727985e-06, |
|
"loss": 0.061, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.7111867290514675, |
|
"grad_norm": 0.20351482232627222, |
|
"learning_rate": 4.046472127559937e-06, |
|
"loss": 0.0715, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.7128881327094854, |
|
"grad_norm": 0.21225234915881963, |
|
"learning_rate": 4.002201798491875e-06, |
|
"loss": 0.0502, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.7145895363675032, |
|
"grad_norm": 0.20113105383651986, |
|
"learning_rate": 3.958114307212018e-06, |
|
"loss": 0.0645, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7162909400255211, |
|
"grad_norm": 0.18912603242706336, |
|
"learning_rate": 3.91421099769013e-06, |
|
"loss": 0.0642, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.7179923436835389, |
|
"grad_norm": 0.1879026045376002, |
|
"learning_rate": 3.870493208281337e-06, |
|
"loss": 0.0479, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.7196937473415568, |
|
"grad_norm": 0.26477442490530756, |
|
"learning_rate": 3.826962271685351e-06, |
|
"loss": 0.0831, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.7213951509995746, |
|
"grad_norm": 0.29428954187807327, |
|
"learning_rate": 3.7836195149058386e-06, |
|
"loss": 0.0724, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.7230965546575925, |
|
"grad_norm": 0.23675032676032767, |
|
"learning_rate": 3.7404662592099483e-06, |
|
"loss": 0.0854, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7247979583156103, |
|
"grad_norm": 0.283635317471, |
|
"learning_rate": 3.697503820088063e-06, |
|
"loss": 0.0805, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.7264993619736282, |
|
"grad_norm": 0.20299536681350067, |
|
"learning_rate": 3.654733507213678e-06, |
|
"loss": 0.0629, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.7282007656316462, |
|
"grad_norm": 0.2737835974179114, |
|
"learning_rate": 3.61215662440349e-06, |
|
"loss": 0.0813, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.729902169289664, |
|
"grad_norm": 0.2292958769762407, |
|
"learning_rate": 3.5697744695776326e-06, |
|
"loss": 0.0625, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.7316035729476819, |
|
"grad_norm": 0.2690574028374729, |
|
"learning_rate": 3.5275883347201336e-06, |
|
"loss": 0.0895, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7333049766056997, |
|
"grad_norm": 0.2612064488237033, |
|
"learning_rate": 3.4855995058395066e-06, |
|
"loss": 0.076, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.7350063802637176, |
|
"grad_norm": 0.20866850048700003, |
|
"learning_rate": 3.443809262929575e-06, |
|
"loss": 0.0719, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.7367077839217354, |
|
"grad_norm": 0.2274495669126989, |
|
"learning_rate": 3.4022188799304214e-06, |
|
"loss": 0.0754, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.7384091875797533, |
|
"grad_norm": 0.21436057034429756, |
|
"learning_rate": 3.36082962468958e-06, |
|
"loss": 0.0634, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.7401105912377711, |
|
"grad_norm": 0.2122758067179719, |
|
"learning_rate": 3.3196427589233725e-06, |
|
"loss": 0.0605, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.741811994895789, |
|
"grad_norm": 0.2387395787166068, |
|
"learning_rate": 3.2786595381784512e-06, |
|
"loss": 0.0679, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.7435133985538069, |
|
"grad_norm": 0.31722612816219875, |
|
"learning_rate": 3.2378812117935154e-06, |
|
"loss": 0.1076, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.7452148022118248, |
|
"grad_norm": 0.34631520054017056, |
|
"learning_rate": 3.1973090228612404e-06, |
|
"loss": 0.1121, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.7469162058698426, |
|
"grad_norm": 0.23043404441429802, |
|
"learning_rate": 3.15694420819038e-06, |
|
"loss": 0.0877, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.7486176095278605, |
|
"grad_norm": 0.09929972754097662, |
|
"learning_rate": 3.116787998268046e-06, |
|
"loss": 0.0281, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7503190131858783, |
|
"grad_norm": 0.2540228794638467, |
|
"learning_rate": 3.076841617222228e-06, |
|
"loss": 0.1016, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.7520204168438962, |
|
"grad_norm": 0.20530486566659917, |
|
"learning_rate": 3.0371062827844434e-06, |
|
"loss": 0.0759, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.753721820501914, |
|
"grad_norm": 0.22889025759698128, |
|
"learning_rate": 2.997583206252647e-06, |
|
"loss": 0.0641, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.7554232241599319, |
|
"grad_norm": 0.22376344098617418, |
|
"learning_rate": 2.958273592454285e-06, |
|
"loss": 0.0696, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.7571246278179498, |
|
"grad_norm": 0.29335139294143503, |
|
"learning_rate": 2.9191786397095778e-06, |
|
"loss": 0.0722, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7588260314759677, |
|
"grad_norm": 0.19904638249374088, |
|
"learning_rate": 2.880299539794975e-06, |
|
"loss": 0.0644, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.7605274351339856, |
|
"grad_norm": 0.27398415500191214, |
|
"learning_rate": 2.841637477906851e-06, |
|
"loss": 0.097, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.7622288387920034, |
|
"grad_norm": 0.1909061169980495, |
|
"learning_rate": 2.803193632625346e-06, |
|
"loss": 0.0653, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.7639302424500213, |
|
"grad_norm": 0.1839577194240098, |
|
"learning_rate": 2.7649691758784603e-06, |
|
"loss": 0.0612, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.7656316461080391, |
|
"grad_norm": 0.18749598930597564, |
|
"learning_rate": 2.7269652729063233e-06, |
|
"loss": 0.0586, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.767333049766057, |
|
"grad_norm": 0.2550319493358391, |
|
"learning_rate": 2.689183082225659e-06, |
|
"loss": 0.0784, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.7690344534240748, |
|
"grad_norm": 0.2702034699283639, |
|
"learning_rate": 2.65162375559449e-06, |
|
"loss": 0.1012, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.7707358570820927, |
|
"grad_norm": 0.22699731806268653, |
|
"learning_rate": 2.614288437977014e-06, |
|
"loss": 0.08, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.7724372607401105, |
|
"grad_norm": 0.28841442095335584, |
|
"learning_rate": 2.5771782675087078e-06, |
|
"loss": 0.105, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.7741386643981285, |
|
"grad_norm": 0.2545180037798505, |
|
"learning_rate": 2.5402943754616182e-06, |
|
"loss": 0.0847, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.7758400680561464, |
|
"grad_norm": 0.16486311867632228, |
|
"learning_rate": 2.5036378862099e-06, |
|
"loss": 0.0409, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.7775414717141642, |
|
"grad_norm": 0.263761370929647, |
|
"learning_rate": 2.467209917195513e-06, |
|
"loss": 0.096, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.7792428753721821, |
|
"grad_norm": 0.235530147590817, |
|
"learning_rate": 2.4310115788941855e-06, |
|
"loss": 0.0595, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.7809442790301999, |
|
"grad_norm": 0.2110709448726579, |
|
"learning_rate": 2.3950439747815357e-06, |
|
"loss": 0.07, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.7826456826882178, |
|
"grad_norm": 0.1763868737174647, |
|
"learning_rate": 2.359308201299454e-06, |
|
"loss": 0.0586, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7843470863462356, |
|
"grad_norm": 0.16676117425431294, |
|
"learning_rate": 2.3238053478226665e-06, |
|
"loss": 0.0492, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.7860484900042535, |
|
"grad_norm": 0.15717970250735389, |
|
"learning_rate": 2.2885364966255372e-06, |
|
"loss": 0.0487, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.7877498936622713, |
|
"grad_norm": 0.28197077618286126, |
|
"learning_rate": 2.2535027228490582e-06, |
|
"loss": 0.0857, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.7894512973202893, |
|
"grad_norm": 0.264862322279995, |
|
"learning_rate": 2.2187050944680942e-06, |
|
"loss": 0.0937, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.7911527009783071, |
|
"grad_norm": 0.22066167775922854, |
|
"learning_rate": 2.18414467225882e-06, |
|
"loss": 0.0642, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.792854104636325, |
|
"grad_norm": 0.2250702122829751, |
|
"learning_rate": 2.1498225097663695e-06, |
|
"loss": 0.0831, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.7945555082943428, |
|
"grad_norm": 0.22295479048611572, |
|
"learning_rate": 2.115739653272747e-06, |
|
"loss": 0.0631, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.7962569119523607, |
|
"grad_norm": 0.24242984035739493, |
|
"learning_rate": 2.0818971417649013e-06, |
|
"loss": 0.0591, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.7979583156103786, |
|
"grad_norm": 0.2612637093823693, |
|
"learning_rate": 2.048296006903081e-06, |
|
"loss": 0.1046, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.7996597192683964, |
|
"grad_norm": 0.1792782441746806, |
|
"learning_rate": 2.0149372729893646e-06, |
|
"loss": 0.0445, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8013611229264143, |
|
"grad_norm": 0.29350099593257656, |
|
"learning_rate": 1.981821956936448e-06, |
|
"loss": 0.0804, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.8030625265844321, |
|
"grad_norm": 0.22341777662676934, |
|
"learning_rate": 1.9489510682366363e-06, |
|
"loss": 0.0745, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.8047639302424501, |
|
"grad_norm": 0.1589102792801742, |
|
"learning_rate": 1.916325608931079e-06, |
|
"loss": 0.047, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.8064653339004679, |
|
"grad_norm": 0.2345326238035068, |
|
"learning_rate": 1.8839465735792095e-06, |
|
"loss": 0.0572, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.8081667375584858, |
|
"grad_norm": 0.24004839314637838, |
|
"learning_rate": 1.8518149492284477e-06, |
|
"loss": 0.0884, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8098681412165036, |
|
"grad_norm": 0.29421822191095054, |
|
"learning_rate": 1.8199317153840933e-06, |
|
"loss": 0.0887, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.8115695448745215, |
|
"grad_norm": 0.29025654033124915, |
|
"learning_rate": 1.7882978439794708e-06, |
|
"loss": 0.1021, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.8132709485325393, |
|
"grad_norm": 0.25878138649406646, |
|
"learning_rate": 1.756914299346304e-06, |
|
"loss": 0.0616, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.8149723521905572, |
|
"grad_norm": 0.2102766744469287, |
|
"learning_rate": 1.7257820381853197e-06, |
|
"loss": 0.0627, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.816673755848575, |
|
"grad_norm": 0.24781694684746497, |
|
"learning_rate": 1.6949020095370816e-06, |
|
"loss": 0.0766, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8183751595065929, |
|
"grad_norm": 0.1734085990747018, |
|
"learning_rate": 1.6642751547530512e-06, |
|
"loss": 0.0514, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.8200765631646108, |
|
"grad_norm": 0.2117204201108364, |
|
"learning_rate": 1.6339024074669107e-06, |
|
"loss": 0.0717, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.8217779668226287, |
|
"grad_norm": 0.23022449445835655, |
|
"learning_rate": 1.6037846935660807e-06, |
|
"loss": 0.0697, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.8234793704806466, |
|
"grad_norm": 0.2031147008426011, |
|
"learning_rate": 1.5739229311635152e-06, |
|
"loss": 0.0647, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.8251807741386644, |
|
"grad_norm": 0.16452534724080284, |
|
"learning_rate": 1.5443180305696948e-06, |
|
"loss": 0.0477, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8268821777966823, |
|
"grad_norm": 0.22807646976291562, |
|
"learning_rate": 1.5149708942648922e-06, |
|
"loss": 0.0814, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.8285835814547001, |
|
"grad_norm": 0.24827387251547514, |
|
"learning_rate": 1.4858824168716524e-06, |
|
"loss": 0.0755, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.830284985112718, |
|
"grad_norm": 0.22526986180364844, |
|
"learning_rate": 1.4570534851275241e-06, |
|
"loss": 0.076, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.8319863887707358, |
|
"grad_norm": 0.20155351877240635, |
|
"learning_rate": 1.4284849778580279e-06, |
|
"loss": 0.0698, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.8336877924287537, |
|
"grad_norm": 0.1495610561199564, |
|
"learning_rate": 1.4001777659498584e-06, |
|
"loss": 0.04, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8353891960867715, |
|
"grad_norm": 0.22042874488356587, |
|
"learning_rate": 1.3721327123243533e-06, |
|
"loss": 0.0696, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.8370905997447895, |
|
"grad_norm": 0.217650029772456, |
|
"learning_rate": 1.3443506719111666e-06, |
|
"loss": 0.0499, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.8387920034028074, |
|
"grad_norm": 0.28478642607874244, |
|
"learning_rate": 1.3168324916222296e-06, |
|
"loss": 0.1052, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.8404934070608252, |
|
"grad_norm": 0.2847136340573529, |
|
"learning_rate": 1.28957901032591e-06, |
|
"loss": 0.0772, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.8421948107188431, |
|
"grad_norm": 0.21335659065873505, |
|
"learning_rate": 1.2625910588214608e-06, |
|
"loss": 0.0651, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8438962143768609, |
|
"grad_norm": 0.2007624647622523, |
|
"learning_rate": 1.2358694598136755e-06, |
|
"loss": 0.0579, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.8455976180348788, |
|
"grad_norm": 0.22052289165556443, |
|
"learning_rate": 1.2094150278878303e-06, |
|
"loss": 0.0564, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.8472990216928966, |
|
"grad_norm": 0.23003203856097848, |
|
"learning_rate": 1.1832285694848255e-06, |
|
"loss": 0.0604, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.8490004253509145, |
|
"grad_norm": 0.18674042534277024, |
|
"learning_rate": 1.1573108828766255e-06, |
|
"loss": 0.0442, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.8507018290089323, |
|
"grad_norm": 0.21647795156393285, |
|
"learning_rate": 1.1316627581419137e-06, |
|
"loss": 0.0535, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8524032326669503, |
|
"grad_norm": 0.3119714991626263, |
|
"learning_rate": 1.1062849771420025e-06, |
|
"loss": 0.1191, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.8541046363249681, |
|
"grad_norm": 0.21164862051641084, |
|
"learning_rate": 1.0811783134970132e-06, |
|
"loss": 0.0658, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.855806039982986, |
|
"grad_norm": 0.2724719475504451, |
|
"learning_rate": 1.0563435325622762e-06, |
|
"loss": 0.0736, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.8575074436410038, |
|
"grad_norm": 0.22358105859093347, |
|
"learning_rate": 1.0317813914050157e-06, |
|
"loss": 0.0711, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.8592088472990217, |
|
"grad_norm": 0.2978912375008609, |
|
"learning_rate": 1.007492638781259e-06, |
|
"loss": 0.0895, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.8609102509570395, |
|
"grad_norm": 0.209790599151719, |
|
"learning_rate": 9.834780151130196e-07, |
|
"loss": 0.0718, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.8626116546150574, |
|
"grad_norm": 0.1681086042265431, |
|
"learning_rate": 9.597382524657173e-07, |
|
"loss": 0.0592, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.8643130582730753, |
|
"grad_norm": 0.20035287400774016, |
|
"learning_rate": 9.362740745258736e-07, |
|
"loss": 0.074, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.8660144619310931, |
|
"grad_norm": 0.17882197027245497, |
|
"learning_rate": 9.13086196579035e-07, |
|
"loss": 0.0481, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.8677158655891111, |
|
"grad_norm": 0.2010107765861354, |
|
"learning_rate": 8.901753254879885e-07, |
|
"loss": 0.0599, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8694172692471289, |
|
"grad_norm": 0.1784357377756698, |
|
"learning_rate": 8.67542159671192e-07, |
|
"loss": 0.0422, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.8711186729051468, |
|
"grad_norm": 0.2595041553389473, |
|
"learning_rate": 8.451873890814988e-07, |
|
"loss": 0.0834, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.8728200765631646, |
|
"grad_norm": 0.18720029506475785, |
|
"learning_rate": 8.231116951851204e-07, |
|
"loss": 0.0441, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.8745214802211825, |
|
"grad_norm": 0.13605762437865937, |
|
"learning_rate": 8.013157509408509e-07, |
|
"loss": 0.0499, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.8762228838792003, |
|
"grad_norm": 0.3392074002609155, |
|
"learning_rate": 7.79800220779554e-07, |
|
"loss": 0.0935, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.8779242875372182, |
|
"grad_norm": 0.27563422248531844, |
|
"learning_rate": 7.585657605839059e-07, |
|
"loss": 0.0749, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.879625691195236, |
|
"grad_norm": 0.2928759924757975, |
|
"learning_rate": 7.376130176684082e-07, |
|
"loss": 0.107, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.8813270948532539, |
|
"grad_norm": 0.20820494565138964, |
|
"learning_rate": 7.169426307596428e-07, |
|
"loss": 0.0711, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.8830284985112719, |
|
"grad_norm": 0.1778541297140114, |
|
"learning_rate": 6.965552299768186e-07, |
|
"loss": 0.0548, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.8847299021692897, |
|
"grad_norm": 0.18347737015999377, |
|
"learning_rate": 6.764514368125419e-07, |
|
"loss": 0.0468, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8864313058273076, |
|
"grad_norm": 0.22402928085104057, |
|
"learning_rate": 6.566318641138902e-07, |
|
"loss": 0.0819, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.8881327094853254, |
|
"grad_norm": 0.10087048048840686, |
|
"learning_rate": 6.370971160637129e-07, |
|
"loss": 0.0257, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.8898341131433433, |
|
"grad_norm": 0.24244337938824437, |
|
"learning_rate": 6.178477881622325e-07, |
|
"loss": 0.0929, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.8915355168013611, |
|
"grad_norm": 0.2215797215803862, |
|
"learning_rate": 5.98884467208869e-07, |
|
"loss": 0.0707, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.893236920459379, |
|
"grad_norm": 0.21837004790407136, |
|
"learning_rate": 5.802077312843723e-07, |
|
"loss": 0.0601, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.8949383241173968, |
|
"grad_norm": 0.13798341621078078, |
|
"learning_rate": 5.618181497331865e-07, |
|
"loss": 0.0387, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.8966397277754147, |
|
"grad_norm": 0.18913792414386757, |
|
"learning_rate": 5.437162831460962e-07, |
|
"loss": 0.0498, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.8983411314334325, |
|
"grad_norm": 0.22877307977873534, |
|
"learning_rate": 5.259026833431468e-07, |
|
"loss": 0.0704, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.9000425350914505, |
|
"grad_norm": 0.20234348876984656, |
|
"learning_rate": 5.083778933568073e-07, |
|
"loss": 0.0649, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.9017439387494683, |
|
"grad_norm": 0.2531050005740147, |
|
"learning_rate": 4.911424474154314e-07, |
|
"loss": 0.0878, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9034453424074862, |
|
"grad_norm": 0.24193903233807051, |
|
"learning_rate": 4.741968709269573e-07, |
|
"loss": 0.073, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.905146746065504, |
|
"grad_norm": 0.2541146802125869, |
|
"learning_rate": 4.575416804629085e-07, |
|
"loss": 0.0563, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.9068481497235219, |
|
"grad_norm": 0.16762038698102058, |
|
"learning_rate": 4.411773837426303e-07, |
|
"loss": 0.053, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.9085495533815398, |
|
"grad_norm": 0.23076714189266884, |
|
"learning_rate": 4.2510447961782055e-07, |
|
"loss": 0.0687, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.9102509570395576, |
|
"grad_norm": 0.26029701749366, |
|
"learning_rate": 4.093234580573202e-07, |
|
"loss": 0.0765, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.9119523606975755, |
|
"grad_norm": 0.2130287074707999, |
|
"learning_rate": 3.938348001321812e-07, |
|
"loss": 0.062, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.9136537643555933, |
|
"grad_norm": 0.18154744728141692, |
|
"learning_rate": 3.786389780009958e-07, |
|
"loss": 0.0521, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.9153551680136113, |
|
"grad_norm": 0.15469238363767565, |
|
"learning_rate": 3.637364548955047e-07, |
|
"loss": 0.0358, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.9170565716716291, |
|
"grad_norm": 0.3678961865523596, |
|
"learning_rate": 3.491276851064784e-07, |
|
"loss": 0.0881, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.918757975329647, |
|
"grad_norm": 0.23428870195639928, |
|
"learning_rate": 3.3481311396986626e-07, |
|
"loss": 0.0844, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9204593789876648, |
|
"grad_norm": 0.2491236742624812, |
|
"learning_rate": 3.2079317785322363e-07, |
|
"loss": 0.0767, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.9221607826456827, |
|
"grad_norm": 0.27866990645467393, |
|
"learning_rate": 3.0706830414240164e-07, |
|
"loss": 0.0862, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.9238621863037005, |
|
"grad_norm": 0.13956980085260687, |
|
"learning_rate": 2.9363891122853097e-07, |
|
"loss": 0.0437, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.9255635899617184, |
|
"grad_norm": 0.1975336620060032, |
|
"learning_rate": 2.805054084952552e-07, |
|
"loss": 0.076, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.9272649936197362, |
|
"grad_norm": 0.22649643287994412, |
|
"learning_rate": 2.6766819630626216e-07, |
|
"loss": 0.0647, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.9289663972777541, |
|
"grad_norm": 0.1932416005125246, |
|
"learning_rate": 2.5512766599306903e-07, |
|
"loss": 0.0642, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.9306678009357721, |
|
"grad_norm": 0.1779695034006232, |
|
"learning_rate": 2.4288419984310086e-07, |
|
"loss": 0.0439, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.9323692045937899, |
|
"grad_norm": 0.18689194535029088, |
|
"learning_rate": 2.3093817108803318e-07, |
|
"loss": 0.0761, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.9340706082518078, |
|
"grad_norm": 0.13597627329781425, |
|
"learning_rate": 2.1928994389241454e-07, |
|
"loss": 0.0369, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.9357720119098256, |
|
"grad_norm": 0.21989658377102142, |
|
"learning_rate": 2.0793987334256637e-07, |
|
"loss": 0.0625, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9374734155678435, |
|
"grad_norm": 0.21266747025038635, |
|
"learning_rate": 1.968883054357562e-07, |
|
"loss": 0.0689, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.9391748192258613, |
|
"grad_norm": 0.25928931998255494, |
|
"learning_rate": 1.861355770696549e-07, |
|
"loss": 0.1025, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.9408762228838792, |
|
"grad_norm": 0.2801452061064216, |
|
"learning_rate": 1.7568201603205827e-07, |
|
"loss": 0.0869, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.942577626541897, |
|
"grad_norm": 0.3090393640358726, |
|
"learning_rate": 1.6552794099090718e-07, |
|
"loss": 0.1212, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.9442790301999149, |
|
"grad_norm": 0.17720681659522422, |
|
"learning_rate": 1.5567366148455887e-07, |
|
"loss": 0.0355, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9459804338579328, |
|
"grad_norm": 0.15472295591692306, |
|
"learning_rate": 1.4611947791236314e-07, |
|
"loss": 0.0395, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.9476818375159507, |
|
"grad_norm": 0.26730416240977917, |
|
"learning_rate": 1.3686568152549539e-07, |
|
"loss": 0.0595, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.9493832411739686, |
|
"grad_norm": 0.21978673843573918, |
|
"learning_rate": 1.2791255441809037e-07, |
|
"loss": 0.064, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.9510846448319864, |
|
"grad_norm": 0.19683818868694625, |
|
"learning_rate": 1.1926036951862563e-07, |
|
"loss": 0.0672, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.9527860484900043, |
|
"grad_norm": 0.19189970865124908, |
|
"learning_rate": 1.109093905816172e-07, |
|
"loss": 0.0569, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9544874521480221, |
|
"grad_norm": 0.22200476537406752, |
|
"learning_rate": 1.0285987217957038e-07, |
|
"loss": 0.0761, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.95618885580604, |
|
"grad_norm": 0.2253757142644252, |
|
"learning_rate": 9.511205969522263e-08, |
|
"loss": 0.0645, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.9578902594640578, |
|
"grad_norm": 0.218059392950448, |
|
"learning_rate": 8.76661893140629e-08, |
|
"loss": 0.0682, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.9595916631220757, |
|
"grad_norm": 0.2649165636045151, |
|
"learning_rate": 8.052248801712958e-08, |
|
"loss": 0.0912, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.9612930667800936, |
|
"grad_norm": 0.20253828553816175, |
|
"learning_rate": 7.36811735740961e-08, |
|
"loss": 0.0571, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.9629944704381115, |
|
"grad_norm": 0.19699087263694953, |
|
"learning_rate": 6.714245453662504e-08, |
|
"loss": 0.0457, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.9646958740961293, |
|
"grad_norm": 0.27159809390838907, |
|
"learning_rate": 6.090653023201997e-08, |
|
"loss": 0.1057, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.9663972777541472, |
|
"grad_norm": 0.2589995420361442, |
|
"learning_rate": 5.497359075714026e-08, |
|
"loss": 0.102, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.968098681412165, |
|
"grad_norm": 0.19790945958126713, |
|
"learning_rate": 4.934381697261015e-08, |
|
"loss": 0.0608, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.9698000850701829, |
|
"grad_norm": 0.24043673016274164, |
|
"learning_rate": 4.401738049730653e-08, |
|
"loss": 0.0609, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9715014887282007, |
|
"grad_norm": 0.2690784354034665, |
|
"learning_rate": 3.899444370312533e-08, |
|
"loss": 0.0861, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.9732028923862186, |
|
"grad_norm": 0.22841785491856118, |
|
"learning_rate": 3.4275159710032146e-08, |
|
"loss": 0.0712, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.9749042960442365, |
|
"grad_norm": 0.1950332471845292, |
|
"learning_rate": 2.9859672381392644e-08, |
|
"loss": 0.0532, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.9766056997022544, |
|
"grad_norm": 0.22678060127201066, |
|
"learning_rate": 2.574811631959273e-08, |
|
"loss": 0.0771, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.9783071033602723, |
|
"grad_norm": 0.3698823942978684, |
|
"learning_rate": 2.1940616861929608e-08, |
|
"loss": 0.0693, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9800085070182901, |
|
"grad_norm": 0.23298311393693824, |
|
"learning_rate": 1.8437290076792624e-08, |
|
"loss": 0.0725, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.981709910676308, |
|
"grad_norm": 0.2028362088180894, |
|
"learning_rate": 1.5238242760126088e-08, |
|
"loss": 0.0756, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.9834113143343258, |
|
"grad_norm": 0.29483561036206646, |
|
"learning_rate": 1.234357243217188e-08, |
|
"loss": 0.0988, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.9851127179923437, |
|
"grad_norm": 0.2525424387277673, |
|
"learning_rate": 9.753367334499608e-09, |
|
"loss": 0.0771, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.9868141216503615, |
|
"grad_norm": 0.26576715050467775, |
|
"learning_rate": 7.467706427312093e-09, |
|
"loss": 0.0612, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9885155253083794, |
|
"grad_norm": 0.1729974193289849, |
|
"learning_rate": 5.486659387043958e-09, |
|
"loss": 0.0371, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.9902169289663972, |
|
"grad_norm": 0.19337221194799736, |
|
"learning_rate": 3.810286604232216e-09, |
|
"loss": 0.0548, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.9919183326244151, |
|
"grad_norm": 0.2881735818611327, |
|
"learning_rate": 2.4386391816777488e-09, |
|
"loss": 0.0866, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.993619736282433, |
|
"grad_norm": 0.25389258821605704, |
|
"learning_rate": 1.3717589328898773e-09, |
|
"loss": 0.0442, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.9953211399404509, |
|
"grad_norm": 0.2721835551378759, |
|
"learning_rate": 6.096783808062778e-10, |
|
"loss": 0.0977, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.9970225435984688, |
|
"grad_norm": 0.2313901379023938, |
|
"learning_rate": 1.524207568059932e-10, |
|
"loss": 0.0628, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.9987239472564866, |
|
"grad_norm": 0.24018936080778358, |
|
"learning_rate": 0.0, |
|
"loss": 0.0614, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.9987239472564866, |
|
"step": 587, |
|
"total_flos": 1551551083839488.0, |
|
"train_loss": 0.1089990799881683, |
|
"train_runtime": 5367.2694, |
|
"train_samples_per_second": 14.016, |
|
"train_steps_per_second": 0.109 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 587, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1551551083839488.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|