|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1047, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0028653295128939827, |
|
"grad_norm": 257.7583052380938, |
|
"learning_rate": 1.904761904761905e-07, |
|
"loss": 9.25, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.014326647564469915, |
|
"grad_norm": 241.29073677978286, |
|
"learning_rate": 9.523809523809525e-07, |
|
"loss": 9.2969, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02865329512893983, |
|
"grad_norm": 225.58659295977776, |
|
"learning_rate": 1.904761904761905e-06, |
|
"loss": 9.0344, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04297994269340974, |
|
"grad_norm": 87.70853454265482, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 7.9078, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05730659025787966, |
|
"grad_norm": 43.9183222857209, |
|
"learning_rate": 3.80952380952381e-06, |
|
"loss": 6.8359, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07163323782234957, |
|
"grad_norm": 31.094485209793813, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 5.4719, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08595988538681948, |
|
"grad_norm": 13.666017374081363, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 4.1055, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10028653295128939, |
|
"grad_norm": 10.161245236008533, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 3.25, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.11461318051575932, |
|
"grad_norm": 3.2026690104839344, |
|
"learning_rate": 7.61904761904762e-06, |
|
"loss": 2.3469, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12893982808022922, |
|
"grad_norm": 1.9300134153720614, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 1.9957, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.14326647564469913, |
|
"grad_norm": 0.868748661209738, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 1.7973, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15759312320916904, |
|
"grad_norm": 0.8403053660761957, |
|
"learning_rate": 1.0476190476190477e-05, |
|
"loss": 1.777, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.17191977077363896, |
|
"grad_norm": 0.6204488839484352, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 1.6609, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18624641833810887, |
|
"grad_norm": 0.46072894517704527, |
|
"learning_rate": 1.2380952380952383e-05, |
|
"loss": 1.5734, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.20057306590257878, |
|
"grad_norm": 0.40370134421433296, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.568, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2148997134670487, |
|
"grad_norm": 0.4261850895407627, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 1.5102, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22922636103151864, |
|
"grad_norm": 0.385686551593444, |
|
"learning_rate": 1.523809523809524e-05, |
|
"loss": 1.5504, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24355300859598855, |
|
"grad_norm": 0.3521586438938912, |
|
"learning_rate": 1.6190476190476193e-05, |
|
"loss": 1.4641, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.25787965616045844, |
|
"grad_norm": 0.37387478168247884, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 1.5211, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2722063037249284, |
|
"grad_norm": 0.3334520851844191, |
|
"learning_rate": 1.8095238095238097e-05, |
|
"loss": 1.4211, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.28653295128939826, |
|
"grad_norm": 0.3374208227712567, |
|
"learning_rate": 1.904761904761905e-05, |
|
"loss": 1.4105, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3008595988538682, |
|
"grad_norm": 0.3494984015612909, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3641, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3151862464183381, |
|
"grad_norm": 0.3291034349461991, |
|
"learning_rate": 1.999860973403976e-05, |
|
"loss": 1.4148, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.32951289398280803, |
|
"grad_norm": 0.3435680154282283, |
|
"learning_rate": 1.999443932272694e-05, |
|
"loss": 1.4477, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3438395415472779, |
|
"grad_norm": 0.3218335176321063, |
|
"learning_rate": 1.99874899256577e-05, |
|
"loss": 1.3348, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.35816618911174786, |
|
"grad_norm": 0.3169060110914379, |
|
"learning_rate": 1.997776347513409e-05, |
|
"loss": 1.3887, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.37249283667621774, |
|
"grad_norm": 0.31497640525358284, |
|
"learning_rate": 1.9965262675626726e-05, |
|
"loss": 1.348, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3868194842406877, |
|
"grad_norm": 0.31512378020322396, |
|
"learning_rate": 1.994999100302281e-05, |
|
"loss": 1.3641, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.40114613180515757, |
|
"grad_norm": 0.33739494340791354, |
|
"learning_rate": 1.9931952703659655e-05, |
|
"loss": 1.3059, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4154727793696275, |
|
"grad_norm": 0.31137992113387536, |
|
"learning_rate": 1.991115279314398e-05, |
|
"loss": 1.3754, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4297994269340974, |
|
"grad_norm": 0.36579857260459275, |
|
"learning_rate": 1.9887597054957304e-05, |
|
"loss": 1.3375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.44412607449856734, |
|
"grad_norm": 0.33522933646678277, |
|
"learning_rate": 1.9861292038847818e-05, |
|
"loss": 1.3645, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4584527220630373, |
|
"grad_norm": 0.32743853323105904, |
|
"learning_rate": 1.983224505900921e-05, |
|
"loss": 1.3012, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.47277936962750716, |
|
"grad_norm": 0.3294434325423303, |
|
"learning_rate": 1.9800464192046956e-05, |
|
"loss": 1.368, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4871060171919771, |
|
"grad_norm": 0.3258072749981886, |
|
"learning_rate": 1.976595827473255e-05, |
|
"loss": 1.3148, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.501432664756447, |
|
"grad_norm": 0.3864088044514439, |
|
"learning_rate": 1.9728736901546454e-05, |
|
"loss": 1.3098, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5157593123209169, |
|
"grad_norm": 0.3364423513855298, |
|
"learning_rate": 1.968881042201029e-05, |
|
"loss": 1.3059, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5300859598853869, |
|
"grad_norm": 0.34214151520580405, |
|
"learning_rate": 1.9646189937809145e-05, |
|
"loss": 1.3352, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5444126074498568, |
|
"grad_norm": 0.3053274945471879, |
|
"learning_rate": 1.9600887299704694e-05, |
|
"loss": 1.3387, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5587392550143266, |
|
"grad_norm": 0.3041769711953325, |
|
"learning_rate": 1.9552915104240067e-05, |
|
"loss": 1.3188, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5730659025787965, |
|
"grad_norm": 0.31747827349059277, |
|
"learning_rate": 1.950228669023735e-05, |
|
"loss": 1.343, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5873925501432665, |
|
"grad_norm": 0.30247749181312306, |
|
"learning_rate": 1.9449016135088657e-05, |
|
"loss": 1.3676, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6017191977077364, |
|
"grad_norm": 0.31090923349754856, |
|
"learning_rate": 1.9393118250841897e-05, |
|
"loss": 1.3371, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6160458452722063, |
|
"grad_norm": 0.3058957610153376, |
|
"learning_rate": 1.9334608580082204e-05, |
|
"loss": 1.3062, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6303724928366762, |
|
"grad_norm": 0.3201938803930117, |
|
"learning_rate": 1.9273503391610307e-05, |
|
"loss": 1.309, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6446991404011462, |
|
"grad_norm": 0.30643913797653993, |
|
"learning_rate": 1.920981967591891e-05, |
|
"loss": 1.3035, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6590257879656161, |
|
"grad_norm": 0.3091049342115299, |
|
"learning_rate": 1.914357514046844e-05, |
|
"loss": 1.3672, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.673352435530086, |
|
"grad_norm": 0.3211019921093764, |
|
"learning_rate": 1.9074788204763438e-05, |
|
"loss": 1.3309, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6876790830945558, |
|
"grad_norm": 0.3142136640616353, |
|
"learning_rate": 1.9003477995230942e-05, |
|
"loss": 1.3301, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7020057306590258, |
|
"grad_norm": 0.2969129447372341, |
|
"learning_rate": 1.8929664339902342e-05, |
|
"loss": 1.2844, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.7163323782234957, |
|
"grad_norm": 0.32166405234870055, |
|
"learning_rate": 1.8853367762900117e-05, |
|
"loss": 1.2605, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7306590257879656, |
|
"grad_norm": 0.3214367335695634, |
|
"learning_rate": 1.8774609478731048e-05, |
|
"loss": 1.2793, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7449856733524355, |
|
"grad_norm": 0.31356330763968654, |
|
"learning_rate": 1.8693411386387445e-05, |
|
"loss": 1.3105, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7593123209169055, |
|
"grad_norm": 0.28277004921839216, |
|
"learning_rate": 1.8609796063258076e-05, |
|
"loss": 1.3352, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7736389684813754, |
|
"grad_norm": 0.29899645005685277, |
|
"learning_rate": 1.8523786758850436e-05, |
|
"loss": 1.2777, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7879656160458453, |
|
"grad_norm": 0.2904988184310563, |
|
"learning_rate": 1.8435407388326167e-05, |
|
"loss": 1.2992, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8022922636103151, |
|
"grad_norm": 0.3003438883173033, |
|
"learning_rate": 1.834468252585135e-05, |
|
"loss": 1.3004, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8166189111747851, |
|
"grad_norm": 0.3125023034962173, |
|
"learning_rate": 1.8251637397763597e-05, |
|
"loss": 1.227, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.830945558739255, |
|
"grad_norm": 0.30438015798153173, |
|
"learning_rate": 1.8156297875557777e-05, |
|
"loss": 1.259, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8452722063037249, |
|
"grad_norm": 0.30190146232606113, |
|
"learning_rate": 1.8058690468692366e-05, |
|
"loss": 1.2824, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8595988538681948, |
|
"grad_norm": 0.3023797380550246, |
|
"learning_rate": 1.7958842317218413e-05, |
|
"loss": 1.277, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8739255014326648, |
|
"grad_norm": 0.3102084836884206, |
|
"learning_rate": 1.7856781184233152e-05, |
|
"loss": 1.1988, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8882521489971347, |
|
"grad_norm": 0.2970584447964946, |
|
"learning_rate": 1.7752535448160395e-05, |
|
"loss": 1.2727, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9025787965616046, |
|
"grad_norm": 0.30487145100844953, |
|
"learning_rate": 1.7646134094859816e-05, |
|
"loss": 1.2566, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9169054441260746, |
|
"grad_norm": 0.3048375178754921, |
|
"learning_rate": 1.7537606709567336e-05, |
|
"loss": 1.2457, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9312320916905444, |
|
"grad_norm": 0.2948102666221556, |
|
"learning_rate": 1.742698346866886e-05, |
|
"loss": 1.2965, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9455587392550143, |
|
"grad_norm": 0.31264839407509326, |
|
"learning_rate": 1.731429513130964e-05, |
|
"loss": 1.2801, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9598853868194842, |
|
"grad_norm": 0.29735923550950955, |
|
"learning_rate": 1.7199573030841577e-05, |
|
"loss": 1.2605, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9742120343839542, |
|
"grad_norm": 0.3039565840031905, |
|
"learning_rate": 1.708284906611091e-05, |
|
"loss": 1.234, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9885386819484241, |
|
"grad_norm": 0.3073836805156935, |
|
"learning_rate": 1.696415569258862e-05, |
|
"loss": 1.259, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.002865329512894, |
|
"grad_norm": 0.31664041219314865, |
|
"learning_rate": 1.6843525913346087e-05, |
|
"loss": 1.2664, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0171919770773639, |
|
"grad_norm": 0.29695347553484913, |
|
"learning_rate": 1.6720993269878486e-05, |
|
"loss": 1.217, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.0315186246418337, |
|
"grad_norm": 0.300646007452569, |
|
"learning_rate": 1.659659183277847e-05, |
|
"loss": 1.2168, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0458452722063036, |
|
"grad_norm": 0.3055551516874735, |
|
"learning_rate": 1.647035619226271e-05, |
|
"loss": 1.1906, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.0601719197707737, |
|
"grad_norm": 0.30061799520952165, |
|
"learning_rate": 1.634232144855401e-05, |
|
"loss": 1.2289, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0744985673352436, |
|
"grad_norm": 0.29060382670998225, |
|
"learning_rate": 1.6212523202121547e-05, |
|
"loss": 1.2109, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0888252148997135, |
|
"grad_norm": 0.30881741047203054, |
|
"learning_rate": 1.6080997543782063e-05, |
|
"loss": 1.2297, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1031518624641834, |
|
"grad_norm": 0.29077207858842863, |
|
"learning_rate": 1.5947781044664696e-05, |
|
"loss": 1.2512, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.1174785100286533, |
|
"grad_norm": 0.2984009106567584, |
|
"learning_rate": 1.581291074604226e-05, |
|
"loss": 1.1762, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1318051575931232, |
|
"grad_norm": 0.28683159244265233, |
|
"learning_rate": 1.5676424149031798e-05, |
|
"loss": 1.1719, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.146131805157593, |
|
"grad_norm": 0.3016666749794573, |
|
"learning_rate": 1.5538359204167285e-05, |
|
"loss": 1.2754, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1604584527220632, |
|
"grad_norm": 0.2861211530074967, |
|
"learning_rate": 1.5398754300847346e-05, |
|
"loss": 1.2566, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.174785100286533, |
|
"grad_norm": 0.3013734628684354, |
|
"learning_rate": 1.525764825666097e-05, |
|
"loss": 1.1691, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.189111747851003, |
|
"grad_norm": 0.29971634545543485, |
|
"learning_rate": 1.5115080306594172e-05, |
|
"loss": 1.1811, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.2034383954154728, |
|
"grad_norm": 0.31994782495783924, |
|
"learning_rate": 1.4971090092120544e-05, |
|
"loss": 1.2414, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2177650429799427, |
|
"grad_norm": 0.29658597163323713, |
|
"learning_rate": 1.4825717650178846e-05, |
|
"loss": 1.1973, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.2320916905444126, |
|
"grad_norm": 0.30035988308067263, |
|
"learning_rate": 1.4679003402040593e-05, |
|
"loss": 1.2164, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.2464183381088825, |
|
"grad_norm": 0.30992527789489627, |
|
"learning_rate": 1.4530988142070802e-05, |
|
"loss": 1.1625, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.2607449856733524, |
|
"grad_norm": 0.30039271345012736, |
|
"learning_rate": 1.438171302638498e-05, |
|
"loss": 1.2523, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2750716332378222, |
|
"grad_norm": 0.301258281954922, |
|
"learning_rate": 1.4231219561405533e-05, |
|
"loss": 1.2164, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.2893982808022924, |
|
"grad_norm": 0.30070972022384745, |
|
"learning_rate": 1.4079549592320782e-05, |
|
"loss": 1.2371, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3037249283667622, |
|
"grad_norm": 0.2937945088213742, |
|
"learning_rate": 1.3926745291449773e-05, |
|
"loss": 1.2227, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.3180515759312321, |
|
"grad_norm": 0.2927965843522735, |
|
"learning_rate": 1.3772849146516114e-05, |
|
"loss": 1.2098, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.332378223495702, |
|
"grad_norm": 0.29991379304860427, |
|
"learning_rate": 1.3617903948834155e-05, |
|
"loss": 1.1414, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.346704871060172, |
|
"grad_norm": 0.2895474514716426, |
|
"learning_rate": 1.34619527814107e-05, |
|
"loss": 1.2188, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.3610315186246418, |
|
"grad_norm": 0.30135691213637283, |
|
"learning_rate": 1.3305039006965657e-05, |
|
"loss": 1.2746, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.3753581661891117, |
|
"grad_norm": 0.2808237365976051, |
|
"learning_rate": 1.3147206255874886e-05, |
|
"loss": 1.1936, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3896848137535818, |
|
"grad_norm": 0.29073762199706943, |
|
"learning_rate": 1.2988498414038635e-05, |
|
"loss": 1.1734, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.4040114613180517, |
|
"grad_norm": 0.3166900967170064, |
|
"learning_rate": 1.282895961067893e-05, |
|
"loss": 1.1973, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.4183381088825215, |
|
"grad_norm": 0.3011306233980793, |
|
"learning_rate": 1.2668634206069305e-05, |
|
"loss": 1.2238, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.4326647564469914, |
|
"grad_norm": 0.2901803615383375, |
|
"learning_rate": 1.2507566779200273e-05, |
|
"loss": 1.2496, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4469914040114613, |
|
"grad_norm": 0.29696637870766024, |
|
"learning_rate": 1.2345802115384014e-05, |
|
"loss": 1.1768, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.4613180515759312, |
|
"grad_norm": 0.2863270222332892, |
|
"learning_rate": 1.2183385193801655e-05, |
|
"loss": 1.2156, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.475644699140401, |
|
"grad_norm": 0.3025458017583608, |
|
"learning_rate": 1.2020361174996694e-05, |
|
"loss": 1.173, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.4899713467048712, |
|
"grad_norm": 0.287124352936146, |
|
"learning_rate": 1.1856775388317936e-05, |
|
"loss": 1.1773, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5042979942693409, |
|
"grad_norm": 0.30845106692202556, |
|
"learning_rate": 1.1692673319315541e-05, |
|
"loss": 1.2316, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.518624641833811, |
|
"grad_norm": 0.2934898590653195, |
|
"learning_rate": 1.1528100597093617e-05, |
|
"loss": 1.1652, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.5329512893982808, |
|
"grad_norm": 0.5593622869918241, |
|
"learning_rate": 1.13631029816229e-05, |
|
"loss": 1.2328, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.5472779369627507, |
|
"grad_norm": 0.29465220623927774, |
|
"learning_rate": 1.1197726351017052e-05, |
|
"loss": 1.1785, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.5616045845272206, |
|
"grad_norm": 0.2914162673673657, |
|
"learning_rate": 1.1032016688776106e-05, |
|
"loss": 1.2613, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.5759312320916905, |
|
"grad_norm": 0.2990525095465911, |
|
"learning_rate": 1.0866020071000597e-05, |
|
"loss": 1.2006, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5902578796561606, |
|
"grad_norm": 0.2855200603052628, |
|
"learning_rate": 1.0699782653579973e-05, |
|
"loss": 1.2094, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.6045845272206303, |
|
"grad_norm": 0.2909907948504594, |
|
"learning_rate": 1.0533350659358779e-05, |
|
"loss": 1.2035, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.6189111747851004, |
|
"grad_norm": 0.289103776445201, |
|
"learning_rate": 1.0366770365284271e-05, |
|
"loss": 1.1848, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.63323782234957, |
|
"grad_norm": 0.2825139343735471, |
|
"learning_rate": 1.0200088089538944e-05, |
|
"loss": 1.2031, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.6475644699140402, |
|
"grad_norm": 0.27750397580689223, |
|
"learning_rate": 1.0033350178661633e-05, |
|
"loss": 1.1998, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.66189111747851, |
|
"grad_norm": 0.2845108454176054, |
|
"learning_rate": 9.866602994660688e-06, |
|
"loss": 1.1523, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.67621776504298, |
|
"grad_norm": 0.2903678786080609, |
|
"learning_rate": 9.699892902122887e-06, |
|
"loss": 1.1922, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.6905444126074498, |
|
"grad_norm": 0.2935036283873705, |
|
"learning_rate": 9.53326625532161e-06, |
|
"loss": 1.2277, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.7048710601719197, |
|
"grad_norm": 0.29136738669186435, |
|
"learning_rate": 9.366769385327875e-06, |
|
"loss": 1.1641, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.7191977077363898, |
|
"grad_norm": 0.29992898897918896, |
|
"learning_rate": 9.200448587127852e-06, |
|
"loss": 1.1887, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7335243553008595, |
|
"grad_norm": 0.2980387182263642, |
|
"learning_rate": 9.034350106750383e-06, |
|
"loss": 1.2117, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.7478510028653296, |
|
"grad_norm": 0.2973187539429791, |
|
"learning_rate": 8.868520128408134e-06, |
|
"loss": 1.2273, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.7621776504297995, |
|
"grad_norm": 0.2939664139251852, |
|
"learning_rate": 8.703004761655918e-06, |
|
"loss": 1.2121, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.7765042979942693, |
|
"grad_norm": 0.2948724111626734, |
|
"learning_rate": 8.537850028569796e-06, |
|
"loss": 1.1727, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.7908309455587392, |
|
"grad_norm": 0.2900310921230476, |
|
"learning_rate": 8.37310185095048e-06, |
|
"loss": 1.1705, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.8051575931232091, |
|
"grad_norm": 0.2878990205353967, |
|
"learning_rate": 8.208806037554645e-06, |
|
"loss": 1.1781, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.8194842406876792, |
|
"grad_norm": 0.29396661211641484, |
|
"learning_rate": 8.045008271357644e-06, |
|
"loss": 1.2625, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.8338108882521489, |
|
"grad_norm": 0.2853159419583723, |
|
"learning_rate": 7.88175409685122e-06, |
|
"loss": 1.1562, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.848137535816619, |
|
"grad_norm": 0.2924378192139063, |
|
"learning_rate": 7.719088907379705e-06, |
|
"loss": 1.2141, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.8624641833810889, |
|
"grad_norm": 0.2835974358624726, |
|
"learning_rate": 7.557057932518274e-06, |
|
"loss": 1.1344, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8767908309455588, |
|
"grad_norm": 0.2852512548851884, |
|
"learning_rate": 7.39570622549669e-06, |
|
"loss": 1.2395, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.8911174785100286, |
|
"grad_norm": 0.29264683297829125, |
|
"learning_rate": 7.235078650672141e-06, |
|
"loss": 1.1797, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.9054441260744985, |
|
"grad_norm": 0.2898654877917546, |
|
"learning_rate": 7.075219871054528e-06, |
|
"loss": 1.2227, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.9197707736389686, |
|
"grad_norm": 0.29421755996555327, |
|
"learning_rate": 6.91617433588781e-06, |
|
"loss": 1.1711, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.9340974212034383, |
|
"grad_norm": 0.2800223774944341, |
|
"learning_rate": 6.757986268290713e-06, |
|
"loss": 1.2025, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.9484240687679084, |
|
"grad_norm": 0.28987949528203105, |
|
"learning_rate": 6.600699652960383e-06, |
|
"loss": 1.1891, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.962750716332378, |
|
"grad_norm": 0.2845787595193169, |
|
"learning_rate": 6.4443582239422744e-06, |
|
"loss": 1.1602, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.9770773638968482, |
|
"grad_norm": 0.27697906700119446, |
|
"learning_rate": 6.289005452469778e-06, |
|
"loss": 1.2195, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.991404011461318, |
|
"grad_norm": 0.2864230953591413, |
|
"learning_rate": 6.134684534876892e-06, |
|
"loss": 1.1859, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.005730659025788, |
|
"grad_norm": 0.3004607532045262, |
|
"learning_rate": 5.981438380587355e-06, |
|
"loss": 1.2074, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.020057306590258, |
|
"grad_norm": 0.29024736883757196, |
|
"learning_rate": 5.829309600183536e-06, |
|
"loss": 1.1371, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.0343839541547277, |
|
"grad_norm": 0.285433189631626, |
|
"learning_rate": 5.678340493558427e-06, |
|
"loss": 1.2063, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.048710601719198, |
|
"grad_norm": 0.2957416068435454, |
|
"learning_rate": 5.528573038154028e-06, |
|
"loss": 1.1945, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.0630372492836675, |
|
"grad_norm": 0.2948232121670883, |
|
"learning_rate": 5.380048877289381e-06, |
|
"loss": 1.1439, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0773638968481376, |
|
"grad_norm": 0.2919439807242836, |
|
"learning_rate": 5.232809308581504e-06, |
|
"loss": 1.1496, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.0916905444126073, |
|
"grad_norm": 0.2867698512059567, |
|
"learning_rate": 5.086895272462475e-06, |
|
"loss": 1.1186, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.1060171919770774, |
|
"grad_norm": 0.2917668731887502, |
|
"learning_rate": 4.942347340795803e-06, |
|
"loss": 1.125, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.1203438395415475, |
|
"grad_norm": 0.2870682782334859, |
|
"learning_rate": 4.799205705595294e-06, |
|
"loss": 1.0992, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.134670487106017, |
|
"grad_norm": 0.2869641039546839, |
|
"learning_rate": 4.657510167849525e-06, |
|
"loss": 1.141, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.1489971346704873, |
|
"grad_norm": 0.28154741238148256, |
|
"learning_rate": 4.5173001264550665e-06, |
|
"loss": 1.0984, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.163323782234957, |
|
"grad_norm": 0.29022223670208136, |
|
"learning_rate": 4.378614567261487e-06, |
|
"loss": 1.1313, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.177650429799427, |
|
"grad_norm": 0.28112870621050123, |
|
"learning_rate": 4.241492052231213e-06, |
|
"loss": 1.1865, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.1919770773638967, |
|
"grad_norm": 0.27708366870966095, |
|
"learning_rate": 4.105970708717244e-06, |
|
"loss": 1.1467, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.206303724928367, |
|
"grad_norm": 0.28678919627949717, |
|
"learning_rate": 3.972088218861738e-06, |
|
"loss": 1.1592, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.2206303724928365, |
|
"grad_norm": 0.2813821171114588, |
|
"learning_rate": 3.83988180911836e-06, |
|
"loss": 1.1549, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.2349570200573066, |
|
"grad_norm": 0.29052831071379115, |
|
"learning_rate": 3.7093882399013504e-06, |
|
"loss": 1.1742, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.2492836676217767, |
|
"grad_norm": 0.28664925785362466, |
|
"learning_rate": 3.580643795364166e-06, |
|
"loss": 1.1883, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.2636103151862463, |
|
"grad_norm": 0.28360378572017647, |
|
"learning_rate": 3.4536842733105702e-06, |
|
"loss": 1.1783, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.2779369627507164, |
|
"grad_norm": 0.2784641405735339, |
|
"learning_rate": 3.3285449752409315e-06, |
|
"loss": 1.1119, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.292263610315186, |
|
"grad_norm": 0.28470450318470536, |
|
"learning_rate": 3.205260696536534e-06, |
|
"loss": 1.15, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.306590257879656, |
|
"grad_norm": 0.27908057259095853, |
|
"learning_rate": 3.083865716784592e-06, |
|
"loss": 1.1469, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.3209169054441263, |
|
"grad_norm": 0.28684050381253245, |
|
"learning_rate": 2.964393790246728e-06, |
|
"loss": 1.0791, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.335243553008596, |
|
"grad_norm": 0.2833942155479012, |
|
"learning_rate": 2.846878136473472e-06, |
|
"loss": 1.1854, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.349570200573066, |
|
"grad_norm": 0.2813005058217904, |
|
"learning_rate": 2.7313514310674826e-06, |
|
"loss": 1.1102, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.3638968481375358, |
|
"grad_norm": 0.27630773934160313, |
|
"learning_rate": 2.6178457965979543e-06, |
|
"loss": 1.1111, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.378223495702006, |
|
"grad_norm": 0.28322453198420466, |
|
"learning_rate": 2.506392793668869e-06, |
|
"loss": 1.1262, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.3925501432664755, |
|
"grad_norm": 0.27434180065283353, |
|
"learning_rate": 2.3970234121434555e-06, |
|
"loss": 1.158, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.4068767908309456, |
|
"grad_norm": 0.28158095452878135, |
|
"learning_rate": 2.2897680625273623e-06, |
|
"loss": 1.1152, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.4212034383954153, |
|
"grad_norm": 0.29278876714519303, |
|
"learning_rate": 2.1846565675129074e-06, |
|
"loss": 1.1395, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.4355300859598854, |
|
"grad_norm": 0.27557568657805176, |
|
"learning_rate": 2.0817181536868035e-06, |
|
"loss": 1.1469, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.4498567335243555, |
|
"grad_norm": 0.2754040319685035, |
|
"learning_rate": 1.9809814434036e-06, |
|
"loss": 1.1393, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.464183381088825, |
|
"grad_norm": 0.2810022564605419, |
|
"learning_rate": 1.8824744468271506e-06, |
|
"loss": 1.1043, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.4785100286532953, |
|
"grad_norm": 0.27969700189605484, |
|
"learning_rate": 1.786224554142285e-06, |
|
"loss": 1.116, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.492836676217765, |
|
"grad_norm": 0.27897638050493495, |
|
"learning_rate": 1.6922585279389037e-06, |
|
"loss": 1.1367, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.507163323782235, |
|
"grad_norm": 0.2815481626728245, |
|
"learning_rate": 1.6006024957705357e-06, |
|
"loss": 1.1365, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.5214899713467047, |
|
"grad_norm": 0.2769674435573708, |
|
"learning_rate": 1.5112819428894976e-06, |
|
"loss": 1.1832, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.535816618911175, |
|
"grad_norm": 0.28423912540069224, |
|
"learning_rate": 1.4243217051606285e-06, |
|
"loss": 1.2, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.5501432664756445, |
|
"grad_norm": 0.28433511602868367, |
|
"learning_rate": 1.339745962155613e-06, |
|
"loss": 1.1258, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.5644699140401146, |
|
"grad_norm": 0.27492757921736494, |
|
"learning_rate": 1.2575782304297647e-06, |
|
"loss": 1.1631, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.5787965616045847, |
|
"grad_norm": 0.28357440374503834, |
|
"learning_rate": 1.1778413569831726e-06, |
|
"loss": 1.1508, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.5931232091690544, |
|
"grad_norm": 0.28309913251371766, |
|
"learning_rate": 1.1005575129080203e-06, |
|
"loss": 1.1596, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.6074498567335245, |
|
"grad_norm": 0.2830020221949328, |
|
"learning_rate": 1.0257481872238483e-06, |
|
"loss": 1.1809, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.621776504297994, |
|
"grad_norm": 0.2738198006004891, |
|
"learning_rate": 9.534341809024583e-07, |
|
"loss": 1.0836, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.6361031518624642, |
|
"grad_norm": 0.27780926764956043, |
|
"learning_rate": 8.836356010841385e-07, |
|
"loss": 1.1633, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.6504297994269344, |
|
"grad_norm": 0.28167674862359654, |
|
"learning_rate": 8.16371855486805e-07, |
|
"loss": 1.152, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.664756446991404, |
|
"grad_norm": 0.28736011956752383, |
|
"learning_rate": 7.516616470096317e-07, |
|
"loss": 1.1127, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.6790830945558737, |
|
"grad_norm": 0.27167353567975117, |
|
"learning_rate": 6.895229685326443e-07, |
|
"loss": 1.1574, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.693409742120344, |
|
"grad_norm": 0.29082932902589526, |
|
"learning_rate": 6.299730979137419e-07, |
|
"loss": 1.1426, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.707736389684814, |
|
"grad_norm": 0.27278739042779193, |
|
"learning_rate": 5.730285931845381e-07, |
|
"loss": 1.1113, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.7220630372492836, |
|
"grad_norm": 0.2695715592534727, |
|
"learning_rate": 5.187052879463394e-07, |
|
"loss": 1.1182, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.7363896848137537, |
|
"grad_norm": 0.2767698424495714, |
|
"learning_rate": 4.6701828696757213e-07, |
|
"loss": 1.1264, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.7507163323782233, |
|
"grad_norm": 0.2862004769810508, |
|
"learning_rate": 4.1798196198384545e-07, |
|
"loss": 1.1766, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.7650429799426934, |
|
"grad_norm": 0.27956749462161845, |
|
"learning_rate": 3.716099477018475e-07, |
|
"loss": 1.1463, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.7793696275071635, |
|
"grad_norm": 0.28226367416458953, |
|
"learning_rate": 3.279151380081691e-07, |
|
"loss": 1.1898, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.793696275071633, |
|
"grad_norm": 0.2759717986692799, |
|
"learning_rate": 2.8690968238412444e-07, |
|
"loss": 1.1193, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.8080229226361033, |
|
"grad_norm": 0.2913913242847074, |
|
"learning_rate": 2.4860498252753827e-07, |
|
"loss": 1.2113, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.822349570200573, |
|
"grad_norm": 0.28827782734894336, |
|
"learning_rate": 2.130116891824796e-07, |
|
"loss": 1.1344, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.836676217765043, |
|
"grad_norm": 0.2726866974754793, |
|
"learning_rate": 1.8013969917777484e-07, |
|
"loss": 1.1385, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.8510028653295127, |
|
"grad_norm": 0.2818762723051636, |
|
"learning_rate": 1.4999815267517593e-07, |
|
"loss": 1.1732, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.865329512893983, |
|
"grad_norm": 0.2806296409646362, |
|
"learning_rate": 1.225954306279009e-07, |
|
"loss": 1.1609, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.8796561604584525, |
|
"grad_norm": 0.27946799660404814, |
|
"learning_rate": 9.793915245028595e-08, |
|
"loss": 1.1875, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.8939828080229226, |
|
"grad_norm": 0.2854513869083756, |
|
"learning_rate": 7.603617389918106e-08, |
|
"loss": 1.1342, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.9083094555873927, |
|
"grad_norm": 0.2764976233678434, |
|
"learning_rate": 5.689258516768825e-08, |
|
"loss": 1.1789, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.9226361031518624, |
|
"grad_norm": 0.2843787086373923, |
|
"learning_rate": 4.05137091917629e-08, |
|
"loss": 1.177, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.9369627507163325, |
|
"grad_norm": 0.2793191895679787, |
|
"learning_rate": 2.6904100170150883e-08, |
|
"loss": 1.1459, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.951289398280802, |
|
"grad_norm": 0.2820836844171885, |
|
"learning_rate": 1.6067542298083826e-08, |
|
"loss": 1.2051, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.9656160458452723, |
|
"grad_norm": 0.280116788571808, |
|
"learning_rate": 8.007048715068522e-09, |
|
"loss": 1.1293, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.9799426934097424, |
|
"grad_norm": 0.2760942837882742, |
|
"learning_rate": 2.7248606670760012e-09, |
|
"loss": 1.1348, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.994269340974212, |
|
"grad_norm": 0.2785194035391107, |
|
"learning_rate": 2.2244688335226749e-10, |
|
"loss": 1.1268, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1047, |
|
"total_flos": 99070802657280.0, |
|
"train_loss": 1.4185899444842407, |
|
"train_runtime": 3548.2371, |
|
"train_samples_per_second": 18.883, |
|
"train_steps_per_second": 0.295 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1047, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 99070802657280.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|