|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 5170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.6557956322798859, |
|
"learning_rate": 5.802707930367505e-09, |
|
"loss": 1.2371, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.555537955912783, |
|
"learning_rate": 2.9013539651837526e-08, |
|
"loss": 1.2332, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.6560886812503646, |
|
"learning_rate": 5.802707930367505e-08, |
|
"loss": 1.2786, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.6387105093397438, |
|
"learning_rate": 8.704061895551257e-08, |
|
"loss": 1.2475, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.6532524825579088, |
|
"learning_rate": 1.160541586073501e-07, |
|
"loss": 1.2885, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.7627038914530329, |
|
"learning_rate": 1.450676982591876e-07, |
|
"loss": 1.2707, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6865350725464494, |
|
"learning_rate": 1.7408123791102514e-07, |
|
"loss": 1.2573, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5944185772651999, |
|
"learning_rate": 2.0309477756286268e-07, |
|
"loss": 1.2236, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.570802653009915, |
|
"learning_rate": 2.321083172147002e-07, |
|
"loss": 1.2354, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6254702727850132, |
|
"learning_rate": 2.6112185686653774e-07, |
|
"loss": 1.2396, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6505985921117032, |
|
"learning_rate": 2.901353965183752e-07, |
|
"loss": 1.2415, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5645977078030876, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"loss": 1.2242, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5679797693766294, |
|
"learning_rate": 3.481624758220503e-07, |
|
"loss": 1.2021, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5881688854072822, |
|
"learning_rate": 3.771760154738878e-07, |
|
"loss": 1.241, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6171671681192435, |
|
"learning_rate": 4.0618955512572535e-07, |
|
"loss": 1.238, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6251626743581414, |
|
"learning_rate": 4.3520309477756283e-07, |
|
"loss": 1.2623, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.536899552637251, |
|
"learning_rate": 4.642166344294004e-07, |
|
"loss": 1.2525, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6363284385208114, |
|
"learning_rate": 4.93230174081238e-07, |
|
"loss": 1.2807, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7156337106339976, |
|
"learning_rate": 5.222437137330755e-07, |
|
"loss": 1.233, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.4922672527647211, |
|
"learning_rate": 5.512572533849129e-07, |
|
"loss": 1.2296, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5995169385614093, |
|
"learning_rate": 5.802707930367504e-07, |
|
"loss": 1.2263, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5270868824616388, |
|
"learning_rate": 6.092843326885881e-07, |
|
"loss": 1.256, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.45317247546954, |
|
"learning_rate": 6.382978723404255e-07, |
|
"loss": 1.2344, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.43848067309021005, |
|
"learning_rate": 6.67311411992263e-07, |
|
"loss": 1.2667, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.4920996149570544, |
|
"learning_rate": 6.963249516441006e-07, |
|
"loss": 1.2048, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5458024812690938, |
|
"learning_rate": 7.253384912959381e-07, |
|
"loss": 1.221, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.49239446969985223, |
|
"learning_rate": 7.543520309477756e-07, |
|
"loss": 1.2122, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6153580588728397, |
|
"learning_rate": 7.833655705996132e-07, |
|
"loss": 1.254, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5652181830271799, |
|
"learning_rate": 8.123791102514507e-07, |
|
"loss": 1.2144, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5688846265610772, |
|
"learning_rate": 8.413926499032881e-07, |
|
"loss": 1.2215, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.43147864423001453, |
|
"learning_rate": 8.704061895551257e-07, |
|
"loss": 1.2489, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.41719278261688, |
|
"learning_rate": 8.994197292069632e-07, |
|
"loss": 1.2424, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.4879569605970493, |
|
"learning_rate": 9.284332688588008e-07, |
|
"loss": 1.2103, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5016955054762027, |
|
"learning_rate": 9.574468085106384e-07, |
|
"loss": 1.2229, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.41471316258825075, |
|
"learning_rate": 9.86460348162476e-07, |
|
"loss": 1.2301, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.37515142480035085, |
|
"learning_rate": 1.0154738878143134e-06, |
|
"loss": 1.2339, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5357151850842313, |
|
"learning_rate": 1.044487427466151e-06, |
|
"loss": 1.1782, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3653264458948328, |
|
"learning_rate": 1.0735009671179885e-06, |
|
"loss": 1.1319, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4383207715794612, |
|
"learning_rate": 1.1025145067698258e-06, |
|
"loss": 1.2032, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.519104554154961, |
|
"learning_rate": 1.1315280464216634e-06, |
|
"loss": 1.2016, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3108713036635766, |
|
"learning_rate": 1.1605415860735009e-06, |
|
"loss": 1.2205, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.39072663996333046, |
|
"learning_rate": 1.1895551257253386e-06, |
|
"loss": 1.178, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4159099519388948, |
|
"learning_rate": 1.2185686653771762e-06, |
|
"loss": 1.196, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.40315240562925386, |
|
"learning_rate": 1.2475822050290137e-06, |
|
"loss": 1.2217, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.381069833836644, |
|
"learning_rate": 1.276595744680851e-06, |
|
"loss": 1.1392, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.45265751317200875, |
|
"learning_rate": 1.3056092843326885e-06, |
|
"loss": 1.1618, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4277031654442431, |
|
"learning_rate": 1.334622823984526e-06, |
|
"loss": 1.1457, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3444956221080674, |
|
"learning_rate": 1.3636363636363636e-06, |
|
"loss": 1.2013, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3421518714382308, |
|
"learning_rate": 1.3926499032882011e-06, |
|
"loss": 1.1327, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.36336912925802345, |
|
"learning_rate": 1.4216634429400387e-06, |
|
"loss": 1.1283, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.29948215466081957, |
|
"learning_rate": 1.4506769825918762e-06, |
|
"loss": 1.1337, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.32776001109162717, |
|
"learning_rate": 1.4796905222437137e-06, |
|
"loss": 1.1058, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.2866893939255288, |
|
"learning_rate": 1.5087040618955513e-06, |
|
"loss": 1.1276, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.33107588796069914, |
|
"learning_rate": 1.5377176015473888e-06, |
|
"loss": 1.1665, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.2771016817248348, |
|
"learning_rate": 1.5667311411992263e-06, |
|
"loss": 1.1298, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.2950279622080215, |
|
"learning_rate": 1.5957446808510639e-06, |
|
"loss": 1.1179, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.28628767272935396, |
|
"learning_rate": 1.6247582205029014e-06, |
|
"loss": 1.121, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.32121632735389827, |
|
"learning_rate": 1.653771760154739e-06, |
|
"loss": 1.1191, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.2673147970392464, |
|
"learning_rate": 1.6827852998065763e-06, |
|
"loss": 1.1308, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.25690056840881836, |
|
"learning_rate": 1.7117988394584138e-06, |
|
"loss": 1.1028, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.34733553615110824, |
|
"learning_rate": 1.7408123791102513e-06, |
|
"loss": 1.1204, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.21641858619604487, |
|
"learning_rate": 1.7698259187620889e-06, |
|
"loss": 1.0929, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.2671526741250324, |
|
"learning_rate": 1.7988394584139264e-06, |
|
"loss": 1.106, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.2418205867189562, |
|
"learning_rate": 1.8278529980657641e-06, |
|
"loss": 1.0815, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.2864594215791563, |
|
"learning_rate": 1.8568665377176017e-06, |
|
"loss": 1.0888, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.22573020842814867, |
|
"learning_rate": 1.8858800773694392e-06, |
|
"loss": 1.108, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.18929290310787175, |
|
"learning_rate": 1.9148936170212767e-06, |
|
"loss": 1.1112, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.229402778930268, |
|
"learning_rate": 1.943907156673114e-06, |
|
"loss": 1.0799, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.2459230628386859, |
|
"learning_rate": 1.972920696324952e-06, |
|
"loss": 1.1307, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.207680539285622, |
|
"learning_rate": 2.001934235976789e-06, |
|
"loss": 1.0809, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.2175863700443701, |
|
"learning_rate": 2.030947775628627e-06, |
|
"loss": 1.057, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.23383978650057566, |
|
"learning_rate": 2.059961315280464e-06, |
|
"loss": 1.0926, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.2200995259512634, |
|
"learning_rate": 2.088974854932302e-06, |
|
"loss": 1.0747, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.21194139151827074, |
|
"learning_rate": 2.1179883945841393e-06, |
|
"loss": 1.0342, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.30994391618458367, |
|
"learning_rate": 2.147001934235977e-06, |
|
"loss": 1.0625, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.24113685123230963, |
|
"learning_rate": 2.1760154738878143e-06, |
|
"loss": 1.0649, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.18918994099116018, |
|
"learning_rate": 2.2050290135396516e-06, |
|
"loss": 1.0749, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.21256820119734426, |
|
"learning_rate": 2.2340425531914894e-06, |
|
"loss": 1.0562, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.17510665051403598, |
|
"learning_rate": 2.2630560928433267e-06, |
|
"loss": 1.0538, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.2008673882220927, |
|
"learning_rate": 2.2920696324951644e-06, |
|
"loss": 1.0571, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.23508712912756172, |
|
"learning_rate": 2.3210831721470018e-06, |
|
"loss": 1.0505, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.20306310295149868, |
|
"learning_rate": 2.3500967117988395e-06, |
|
"loss": 1.0454, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.21068331150828368, |
|
"learning_rate": 2.3791102514506773e-06, |
|
"loss": 1.0551, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.13689273954962194, |
|
"learning_rate": 2.4081237911025146e-06, |
|
"loss": 1.0474, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.16160461077870894, |
|
"learning_rate": 2.4371373307543523e-06, |
|
"loss": 1.0478, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.17395359723345014, |
|
"learning_rate": 2.4661508704061896e-06, |
|
"loss": 1.0525, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.18279794025145507, |
|
"learning_rate": 2.4951644100580274e-06, |
|
"loss": 0.9952, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.15844978893507106, |
|
"learning_rate": 2.5241779497098647e-06, |
|
"loss": 1.0216, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.1436621940675145, |
|
"learning_rate": 2.553191489361702e-06, |
|
"loss": 1.0133, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.1746824119521344, |
|
"learning_rate": 2.5822050290135398e-06, |
|
"loss": 1.0086, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.16843680061281097, |
|
"learning_rate": 2.611218568665377e-06, |
|
"loss": 1.0124, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.13899776670000386, |
|
"learning_rate": 2.640232108317215e-06, |
|
"loss": 1.0258, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.16885171252583628, |
|
"learning_rate": 2.669245647969052e-06, |
|
"loss": 1.0025, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.13947774165225663, |
|
"learning_rate": 2.69825918762089e-06, |
|
"loss": 1.0209, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.16016862657644082, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 1.0202, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.1560774813153456, |
|
"learning_rate": 2.7562862669245645e-06, |
|
"loss": 1.0094, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.15349128775811013, |
|
"learning_rate": 2.7852998065764023e-06, |
|
"loss": 1.0222, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.1581693949842608, |
|
"learning_rate": 2.8143133462282396e-06, |
|
"loss": 1.0099, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.1338165501774845, |
|
"learning_rate": 2.8433268858800774e-06, |
|
"loss": 0.9919, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.16833331660791553, |
|
"learning_rate": 2.872340425531915e-06, |
|
"loss": 1.0109, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.12418854228845702, |
|
"learning_rate": 2.9013539651837524e-06, |
|
"loss": 1.0114, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.17269898089870625, |
|
"learning_rate": 2.93036750483559e-06, |
|
"loss": 1.0116, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.1398155290290283, |
|
"learning_rate": 2.9593810444874275e-06, |
|
"loss": 0.9834, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.13812113362847925, |
|
"learning_rate": 2.9883945841392652e-06, |
|
"loss": 1.0278, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.1304950913697068, |
|
"learning_rate": 2.9999969229307894e-06, |
|
"loss": 0.994, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.12161480167420421, |
|
"learning_rate": 2.999978118664665e-06, |
|
"loss": 0.9931, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.12896036742563166, |
|
"learning_rate": 2.9999422198293556e-06, |
|
"loss": 1.0097, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.13140405926118037, |
|
"learning_rate": 2.9998892268339835e-06, |
|
"loss": 1.004, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.12459210092265861, |
|
"learning_rate": 2.999819140282485e-06, |
|
"loss": 0.9933, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.11304692947321707, |
|
"learning_rate": 2.9997319609736057e-06, |
|
"loss": 1.024, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.10712457456439482, |
|
"learning_rate": 2.9996276899008886e-06, |
|
"loss": 0.997, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.12723199655873965, |
|
"learning_rate": 2.9995063282526635e-06, |
|
"loss": 0.999, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.1117511114286072, |
|
"learning_rate": 2.9993678774120335e-06, |
|
"loss": 1.0005, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.11921831176513668, |
|
"learning_rate": 2.9992123389568606e-06, |
|
"loss": 1.0128, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.13264814163917732, |
|
"learning_rate": 2.9990397146597453e-06, |
|
"loss": 0.9958, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.10853923751603145, |
|
"learning_rate": 2.998850006488009e-06, |
|
"loss": 1.008, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.11972246590833577, |
|
"learning_rate": 2.9986432166036694e-06, |
|
"loss": 0.984, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.11545852087538627, |
|
"learning_rate": 2.9984193473634165e-06, |
|
"loss": 0.9846, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.10903802682217484, |
|
"learning_rate": 2.998178401318586e-06, |
|
"loss": 1.0103, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.1056100500869663, |
|
"learning_rate": 2.9979203812151314e-06, |
|
"loss": 0.9846, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.1086522268106536, |
|
"learning_rate": 2.9976452899935897e-06, |
|
"loss": 1.012, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.11349091913892485, |
|
"learning_rate": 2.997353130789052e-06, |
|
"loss": 0.9868, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.11520557146606587, |
|
"learning_rate": 2.9970439069311227e-06, |
|
"loss": 0.9859, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.10731896545642554, |
|
"learning_rate": 2.996717621943886e-06, |
|
"loss": 0.9677, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.10199258578885077, |
|
"learning_rate": 2.9963742795458634e-06, |
|
"loss": 0.9912, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.1004784901885883, |
|
"learning_rate": 2.9960138836499727e-06, |
|
"loss": 0.974, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.11816705487740833, |
|
"learning_rate": 2.9956364383634826e-06, |
|
"loss": 1.0011, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.10828490126824755, |
|
"learning_rate": 2.9952419479879643e-06, |
|
"loss": 1.0004, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.1064264302560214, |
|
"learning_rate": 2.9948304170192465e-06, |
|
"loss": 0.9906, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.1111563680571522, |
|
"learning_rate": 2.99440185014736e-06, |
|
"loss": 0.9785, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.10140413194917164, |
|
"learning_rate": 2.9939562522564877e-06, |
|
"loss": 1.0137, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.10517761570743359, |
|
"learning_rate": 2.9934936284249047e-06, |
|
"loss": 0.9954, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.10621130418725885, |
|
"learning_rate": 2.993013983924926e-06, |
|
"loss": 0.9724, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.10215078910100184, |
|
"learning_rate": 2.992517324222842e-06, |
|
"loss": 0.9902, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.10412671370027254, |
|
"learning_rate": 2.9920036549788573e-06, |
|
"loss": 0.9809, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.11732922386471892, |
|
"learning_rate": 2.991472982047027e-06, |
|
"loss": 0.9623, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.09628074134733036, |
|
"learning_rate": 2.990925311475189e-06, |
|
"loss": 0.9882, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.10783922143897923, |
|
"learning_rate": 2.9903606495048965e-06, |
|
"loss": 0.983, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.10412363645704524, |
|
"learning_rate": 2.9897790025713453e-06, |
|
"loss": 1.0016, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.10425594495538223, |
|
"learning_rate": 2.9891803773033017e-06, |
|
"loss": 0.9834, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.10570028300258567, |
|
"learning_rate": 2.9885647805230253e-06, |
|
"loss": 0.9608, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.1156869590258465, |
|
"learning_rate": 2.987932219246193e-06, |
|
"loss": 1.0092, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.1055268544146359, |
|
"learning_rate": 2.987282700681819e-06, |
|
"loss": 0.9927, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0962316020072262, |
|
"learning_rate": 2.9866162322321704e-06, |
|
"loss": 0.9824, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.09521663449081877, |
|
"learning_rate": 2.9859328214926856e-06, |
|
"loss": 0.9623, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.10471356087898716, |
|
"learning_rate": 2.9852324762518867e-06, |
|
"loss": 1.0006, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.10041568704144976, |
|
"learning_rate": 2.98451520449129e-06, |
|
"loss": 0.9904, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.11321291186855274, |
|
"learning_rate": 2.9837810143853162e-06, |
|
"loss": 1.0015, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.09570436916323284, |
|
"learning_rate": 2.9830299143011955e-06, |
|
"loss": 0.9659, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.11085995422039718, |
|
"learning_rate": 2.982261912798876e-06, |
|
"loss": 0.9865, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.09744417943549286, |
|
"learning_rate": 2.9814770186309197e-06, |
|
"loss": 0.9662, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.10429458744917756, |
|
"learning_rate": 2.980675240742411e-06, |
|
"loss": 0.9846, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.10446461600654952, |
|
"learning_rate": 2.979856588270846e-06, |
|
"loss": 0.9822, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.10542142436501455, |
|
"learning_rate": 2.979021070546038e-06, |
|
"loss": 0.9805, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.10172022425622489, |
|
"learning_rate": 2.9781686970899998e-06, |
|
"loss": 0.9702, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.11458755826070066, |
|
"learning_rate": 2.9772994776168466e-06, |
|
"loss": 0.9773, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.09277700475991077, |
|
"learning_rate": 2.976413422032677e-06, |
|
"loss": 0.9767, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.10884138178056307, |
|
"learning_rate": 2.9755105404354637e-06, |
|
"loss": 0.9742, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.09160224444355052, |
|
"learning_rate": 2.974590843114939e-06, |
|
"loss": 0.9874, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.09983015498372462, |
|
"learning_rate": 2.9736543405524747e-06, |
|
"loss": 0.9689, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.10901476562057139, |
|
"learning_rate": 2.9727010434209652e-06, |
|
"loss": 0.9591, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.0945610375498226, |
|
"learning_rate": 2.9717309625847053e-06, |
|
"loss": 0.9997, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.1063437137043428, |
|
"learning_rate": 2.970744109099265e-06, |
|
"loss": 0.9787, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.10165659250994069, |
|
"learning_rate": 2.9697404942113655e-06, |
|
"loss": 0.9559, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.09899486787838711, |
|
"learning_rate": 2.9687201293587495e-06, |
|
"loss": 0.9515, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.10160491144724951, |
|
"learning_rate": 2.967683026170052e-06, |
|
"loss": 0.9478, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.10508036334433964, |
|
"learning_rate": 2.9666291964646663e-06, |
|
"loss": 0.966, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.10340523865343762, |
|
"learning_rate": 2.9655586522526115e-06, |
|
"loss": 0.9757, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.10542777862775049, |
|
"learning_rate": 2.9644714057343925e-06, |
|
"loss": 0.9753, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.10247584372465218, |
|
"learning_rate": 2.9633674693008656e-06, |
|
"loss": 0.9607, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.09993848260863396, |
|
"learning_rate": 2.9622468555330916e-06, |
|
"loss": 0.9775, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.10504556965317913, |
|
"learning_rate": 2.961109577202197e-06, |
|
"loss": 0.9727, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.10286749907745354, |
|
"learning_rate": 2.9599556472692262e-06, |
|
"loss": 0.9796, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.0946833657296664, |
|
"learning_rate": 2.9587850788849942e-06, |
|
"loss": 0.9667, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.10526928030356757, |
|
"learning_rate": 2.9575978853899377e-06, |
|
"loss": 0.9623, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.0967614781879407, |
|
"learning_rate": 2.9563940803139607e-06, |
|
"loss": 0.9607, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.10162924582712457, |
|
"learning_rate": 2.955173677376284e-06, |
|
"loss": 0.9698, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.1144304223101541, |
|
"learning_rate": 2.9539366904852843e-06, |
|
"loss": 0.9852, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.10636310716558237, |
|
"learning_rate": 2.9526831337383394e-06, |
|
"loss": 0.9606, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.112534519091396, |
|
"learning_rate": 2.9514130214216665e-06, |
|
"loss": 0.9736, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.10324814326836546, |
|
"learning_rate": 2.9501263680101588e-06, |
|
"loss": 0.9816, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10818625130621462, |
|
"learning_rate": 2.9488231881672203e-06, |
|
"loss": 0.9326, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10965885593819354, |
|
"learning_rate": 2.9475034967445993e-06, |
|
"loss": 0.9767, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10090790956890588, |
|
"learning_rate": 2.9461673087822204e-06, |
|
"loss": 0.9706, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10727272176169492, |
|
"learning_rate": 2.94481463950801e-06, |
|
"loss": 0.9647, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10704584677647615, |
|
"learning_rate": 2.9434455043377255e-06, |
|
"loss": 0.9683, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.09401780473717516, |
|
"learning_rate": 2.9420599188747786e-06, |
|
"loss": 0.9499, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10562202866274173, |
|
"learning_rate": 2.9406578989100573e-06, |
|
"loss": 0.9814, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.11078753036018767, |
|
"learning_rate": 2.9392394604217463e-06, |
|
"loss": 0.9522, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10599183497345623, |
|
"learning_rate": 2.937804619575144e-06, |
|
"loss": 0.9785, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10821166303387769, |
|
"learning_rate": 2.936353392722481e-06, |
|
"loss": 0.9484, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.10164561197403676, |
|
"learning_rate": 2.934885796402729e-06, |
|
"loss": 0.9695, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.0963480746857516, |
|
"learning_rate": 2.933401847341417e-06, |
|
"loss": 0.9704, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.11877783442156198, |
|
"learning_rate": 2.931901562450439e-06, |
|
"loss": 0.9727, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.11235009932152866, |
|
"learning_rate": 2.93038495882786e-06, |
|
"loss": 0.9836, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.10489582071240841, |
|
"learning_rate": 2.9288520537577223e-06, |
|
"loss": 0.9715, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.10530019662564404, |
|
"learning_rate": 2.927302864709848e-06, |
|
"loss": 0.947, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.10948590499675388, |
|
"learning_rate": 2.9257374093396423e-06, |
|
"loss": 0.9544, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.1101428849431525, |
|
"learning_rate": 2.9241557054878876e-06, |
|
"loss": 0.9736, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.10161055478860785, |
|
"learning_rate": 2.9225577711805446e-06, |
|
"loss": 0.9579, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.10234104818115608, |
|
"learning_rate": 2.920943624628545e-06, |
|
"loss": 0.9494, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.11125410578921856, |
|
"learning_rate": 2.9193132842275834e-06, |
|
"loss": 0.9665, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.10448226196682155, |
|
"learning_rate": 2.917666768557908e-06, |
|
"loss": 0.9492, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.10464379124965918, |
|
"learning_rate": 2.916004096384112e-06, |
|
"loss": 0.9485, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.11240399703260856, |
|
"learning_rate": 2.9143252866549126e-06, |
|
"loss": 0.9805, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.10262467247255205, |
|
"learning_rate": 2.9126303585029424e-06, |
|
"loss": 0.9533, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.10660529098219367, |
|
"learning_rate": 2.9109193312445277e-06, |
|
"loss": 0.9797, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.10233872211834552, |
|
"learning_rate": 2.909192224379469e-06, |
|
"loss": 0.9755, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.10675971763004717, |
|
"learning_rate": 2.907449057590818e-06, |
|
"loss": 0.958, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.11494064560394811, |
|
"learning_rate": 2.9056898507446553e-06, |
|
"loss": 0.9426, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.11375236977081475, |
|
"learning_rate": 2.9039146238898615e-06, |
|
"loss": 0.9438, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.10951341713317103, |
|
"learning_rate": 2.9021233972578917e-06, |
|
"loss": 0.954, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.11289886578757521, |
|
"learning_rate": 2.9003161912625412e-06, |
|
"loss": 0.9651, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.10761927389343565, |
|
"learning_rate": 2.8984930264997153e-06, |
|
"loss": 0.9855, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.11084863010339359, |
|
"learning_rate": 2.8966539237471957e-06, |
|
"loss": 0.9749, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.10914255215103161, |
|
"learning_rate": 2.8947989039644e-06, |
|
"loss": 0.9434, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.11264654500852792, |
|
"learning_rate": 2.8929279882921465e-06, |
|
"loss": 0.9776, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.11132127023457686, |
|
"learning_rate": 2.891041198052411e-06, |
|
"loss": 0.9507, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.10653706131717319, |
|
"learning_rate": 2.8891385547480846e-06, |
|
"loss": 0.9535, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.1151077199781543, |
|
"learning_rate": 2.887220080062729e-06, |
|
"loss": 0.9761, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.10470041169674428, |
|
"learning_rate": 2.8852857958603284e-06, |
|
"loss": 0.9736, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.11918031326179271, |
|
"learning_rate": 2.883335724185041e-06, |
|
"loss": 0.9437, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.1107590462985888, |
|
"learning_rate": 2.8813698872609478e-06, |
|
"loss": 0.9461, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.11545803285412702, |
|
"learning_rate": 2.8793883074917996e-06, |
|
"loss": 0.9741, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.10989642132296704, |
|
"learning_rate": 2.8773910074607604e-06, |
|
"loss": 0.9375, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.11350061884840995, |
|
"learning_rate": 2.875378009930151e-06, |
|
"loss": 0.9762, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.10670246968609752, |
|
"learning_rate": 2.8733493378411908e-06, |
|
"loss": 0.9611, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.11315693520491527, |
|
"learning_rate": 2.8713050143137327e-06, |
|
"loss": 0.9574, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.10792619360055242, |
|
"learning_rate": 2.869245062646004e-06, |
|
"loss": 0.9755, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.11860109794592168, |
|
"learning_rate": 2.8671695063143373e-06, |
|
"loss": 0.9682, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.11295073321194798, |
|
"learning_rate": 2.865078368972907e-06, |
|
"loss": 0.9732, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.10877608828125572, |
|
"learning_rate": 2.862971674453453e-06, |
|
"loss": 0.9319, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.11905716007142109, |
|
"learning_rate": 2.860849446765017e-06, |
|
"loss": 0.9563, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.108659995668165, |
|
"learning_rate": 2.8587117100936642e-06, |
|
"loss": 0.9323, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.11330495373273691, |
|
"learning_rate": 2.856558488802207e-06, |
|
"loss": 0.9461, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.11183891702192725, |
|
"learning_rate": 2.854389807429932e-06, |
|
"loss": 0.937, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.10647343953458478, |
|
"learning_rate": 2.8522056906923136e-06, |
|
"loss": 0.9432, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.10989413716941382, |
|
"learning_rate": 2.8500061634807397e-06, |
|
"loss": 0.9434, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.10609789647222649, |
|
"learning_rate": 2.847791250862222e-06, |
|
"loss": 0.9708, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.11449356746375824, |
|
"learning_rate": 2.845560978079113e-06, |
|
"loss": 0.9493, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.11863005755900938, |
|
"learning_rate": 2.843315370548819e-06, |
|
"loss": 0.9402, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.11152572810815058, |
|
"learning_rate": 2.8410544538635086e-06, |
|
"loss": 0.9669, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.1118716690063177, |
|
"learning_rate": 2.838778253789822e-06, |
|
"loss": 0.9469, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.11513622367346048, |
|
"learning_rate": 2.8364867962685775e-06, |
|
"loss": 0.9732, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.11916471698468781, |
|
"learning_rate": 2.834180107414476e-06, |
|
"loss": 0.9588, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.10636597317515512, |
|
"learning_rate": 2.831858213515802e-06, |
|
"loss": 0.9781, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.1144580288076685, |
|
"learning_rate": 2.829521141034125e-06, |
|
"loss": 0.956, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.11413870856691348, |
|
"learning_rate": 2.8271689166039986e-06, |
|
"loss": 0.9568, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.11086368743242728, |
|
"learning_rate": 2.8248015670326564e-06, |
|
"loss": 0.9455, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.10920372063922966, |
|
"learning_rate": 2.822419119299706e-06, |
|
"loss": 0.9435, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.11758849733693692, |
|
"learning_rate": 2.8200216005568218e-06, |
|
"loss": 0.9421, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.10936170827027436, |
|
"learning_rate": 2.817609038127435e-06, |
|
"loss": 0.9538, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.11188798595384854, |
|
"learning_rate": 2.815181459506425e-06, |
|
"loss": 0.9823, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.14295692634361193, |
|
"learning_rate": 2.8127388923598008e-06, |
|
"loss": 0.9533, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.1147686682077821, |
|
"learning_rate": 2.810281364524392e-06, |
|
"loss": 0.9714, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.11140244758844407, |
|
"learning_rate": 2.807808904007526e-06, |
|
"loss": 0.9554, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.10882014916760172, |
|
"learning_rate": 2.805321538986713e-06, |
|
"loss": 0.9445, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.11197943958186041, |
|
"learning_rate": 2.802819297809321e-06, |
|
"loss": 0.9433, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.1127674218112967, |
|
"learning_rate": 2.8003022089922564e-06, |
|
"loss": 0.9612, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.11516015506964294, |
|
"learning_rate": 2.7977703012216375e-06, |
|
"loss": 0.9562, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.12293491236553014, |
|
"learning_rate": 2.7952236033524658e-06, |
|
"loss": 0.9593, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.12143251214899849, |
|
"learning_rate": 2.7926621444083015e-06, |
|
"loss": 0.9569, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.1094629806227622, |
|
"learning_rate": 2.790085953580927e-06, |
|
"loss": 0.9568, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.11455799113079224, |
|
"learning_rate": 2.7874950602300197e-06, |
|
"loss": 0.953, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.11535461499254474, |
|
"learning_rate": 2.7848894938828134e-06, |
|
"loss": 0.9035, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.1149148628451183, |
|
"learning_rate": 2.7822692842337654e-06, |
|
"loss": 0.9709, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.10799586189243776, |
|
"learning_rate": 2.7796344611442133e-06, |
|
"loss": 0.9492, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.11235503423781165, |
|
"learning_rate": 2.7769850546420396e-06, |
|
"loss": 1.0031, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.11596192217593441, |
|
"learning_rate": 2.774321094921326e-06, |
|
"loss": 0.9478, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.11386850099809975, |
|
"learning_rate": 2.7716426123420114e-06, |
|
"loss": 0.9464, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.12212660771198781, |
|
"learning_rate": 2.768949637429546e-06, |
|
"loss": 0.9588, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.11252131716640058, |
|
"learning_rate": 2.76624220087454e-06, |
|
"loss": 0.942, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.11804484758605481, |
|
"learning_rate": 2.7635203335324185e-06, |
|
"loss": 0.9492, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.11757492161038868, |
|
"learning_rate": 2.7607840664230674e-06, |
|
"loss": 0.9664, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.11458358844876558, |
|
"learning_rate": 2.758033430730479e-06, |
|
"loss": 0.9495, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.1228068142781927, |
|
"learning_rate": 2.7552684578023998e-06, |
|
"loss": 0.9473, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.1167571702347808, |
|
"learning_rate": 2.752489179149969e-06, |
|
"loss": 0.9743, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.11259236811273615, |
|
"learning_rate": 2.7496956264473635e-06, |
|
"loss": 0.9517, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.114209224633214, |
|
"learning_rate": 2.746887831531434e-06, |
|
"loss": 0.9608, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.12180028064162973, |
|
"learning_rate": 2.744065826401344e-06, |
|
"loss": 0.9357, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.11724624103676141, |
|
"learning_rate": 2.7412296432182035e-06, |
|
"loss": 0.955, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.11928864261797452, |
|
"learning_rate": 2.738379314304704e-06, |
|
"loss": 0.938, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.1183836619102445, |
|
"learning_rate": 2.735514872144749e-06, |
|
"loss": 0.9638, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.11343899459393017, |
|
"learning_rate": 2.732636349383085e-06, |
|
"loss": 0.9648, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.11856005446375929, |
|
"learning_rate": 2.7297437788249276e-06, |
|
"loss": 0.9663, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.1502591765852686, |
|
"learning_rate": 2.72683719343559e-06, |
|
"loss": 0.9361, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.11914573111945241, |
|
"learning_rate": 2.7239166263401056e-06, |
|
"loss": 0.9595, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.1230018369983442, |
|
"learning_rate": 2.7209821108228497e-06, |
|
"loss": 0.9565, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.12071540798615119, |
|
"learning_rate": 2.718033680327163e-06, |
|
"loss": 0.9737, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.11960934492681863, |
|
"learning_rate": 2.715071368454969e-06, |
|
"loss": 0.9185, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.11733580889296377, |
|
"learning_rate": 2.7120952089663894e-06, |
|
"loss": 0.9414, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.11053633322216715, |
|
"learning_rate": 2.7091052357793627e-06, |
|
"loss": 0.9349, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.12986099128088718, |
|
"learning_rate": 2.7061014829692546e-06, |
|
"loss": 0.9807, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.12933111225425914, |
|
"learning_rate": 2.703083984768471e-06, |
|
"loss": 0.9442, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.115955353513501, |
|
"learning_rate": 2.7000527755660684e-06, |
|
"loss": 0.948, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.11943463657143313, |
|
"learning_rate": 2.697007889907361e-06, |
|
"loss": 0.9573, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.11673257150242644, |
|
"learning_rate": 2.693949362493527e-06, |
|
"loss": 0.9387, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.11899065882144028, |
|
"learning_rate": 2.690877228181215e-06, |
|
"loss": 0.9493, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.12183609939386461, |
|
"learning_rate": 2.6877915219821427e-06, |
|
"loss": 0.9539, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.11127194999506593, |
|
"learning_rate": 2.6846922790627024e-06, |
|
"loss": 0.9443, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.11553816505121053, |
|
"learning_rate": 2.6815795347435577e-06, |
|
"loss": 0.9298, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.11447251373564948, |
|
"learning_rate": 2.6784533244992416e-06, |
|
"loss": 0.9375, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.12166179843418228, |
|
"learning_rate": 2.6753136839577522e-06, |
|
"loss": 0.9349, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.12100641500283442, |
|
"learning_rate": 2.6721606489001457e-06, |
|
"loss": 0.9293, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.12359155363514698, |
|
"learning_rate": 2.668994255260131e-06, |
|
"loss": 0.9624, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.11179542251492335, |
|
"learning_rate": 2.6658145391236574e-06, |
|
"loss": 0.9375, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.1113458165161423, |
|
"learning_rate": 2.6626215367285054e-06, |
|
"loss": 0.92, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.11826530596830692, |
|
"learning_rate": 2.659415284463873e-06, |
|
"loss": 0.9829, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.1223957965290031, |
|
"learning_rate": 2.6561958188699604e-06, |
|
"loss": 0.9485, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.12481799756632796, |
|
"learning_rate": 2.6529631766375546e-06, |
|
"loss": 0.9532, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.11360313455180103, |
|
"learning_rate": 2.6497173946076098e-06, |
|
"loss": 0.9648, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.11832505401191586, |
|
"learning_rate": 2.64645850977083e-06, |
|
"loss": 0.9353, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.11917615522222746, |
|
"learning_rate": 2.643186559267245e-06, |
|
"loss": 0.9453, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.12184287945841704, |
|
"learning_rate": 2.6399015803857885e-06, |
|
"loss": 0.9543, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.11651535092179631, |
|
"learning_rate": 2.636603610563872e-06, |
|
"loss": 0.946, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.13089416464535625, |
|
"learning_rate": 2.6332926873869595e-06, |
|
"loss": 0.9612, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.12640696317783878, |
|
"learning_rate": 2.629968848588138e-06, |
|
"loss": 0.9485, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.12467989812698095, |
|
"learning_rate": 2.6266321320476893e-06, |
|
"loss": 0.9467, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.12292162295288515, |
|
"learning_rate": 2.6232825757926555e-06, |
|
"loss": 0.9526, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.12693069162671494, |
|
"learning_rate": 2.6199202179964064e-06, |
|
"loss": 0.9495, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.12251580300224744, |
|
"learning_rate": 2.6165450969782074e-06, |
|
"loss": 0.9479, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.12239346691673264, |
|
"learning_rate": 2.61315725120278e-06, |
|
"loss": 0.9592, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.11659453736794827, |
|
"learning_rate": 2.609756719279862e-06, |
|
"loss": 0.9378, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.12683066622391057, |
|
"learning_rate": 2.606343539963772e-06, |
|
"loss": 0.9412, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.11735665618288187, |
|
"learning_rate": 2.6029177521529633e-06, |
|
"loss": 0.936, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.1216598234634421, |
|
"learning_rate": 2.5994793948895835e-06, |
|
"loss": 0.9627, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12601877660770533, |
|
"learning_rate": 2.596028507359029e-06, |
|
"loss": 0.9529, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12702458316754647, |
|
"learning_rate": 2.5925651288894965e-06, |
|
"loss": 0.9515, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12058379659459599, |
|
"learning_rate": 2.5890892989515367e-06, |
|
"loss": 0.9298, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12573278202145702, |
|
"learning_rate": 2.585601057157605e-06, |
|
"loss": 0.9575, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.11676843442815175, |
|
"learning_rate": 2.582100443261609e-06, |
|
"loss": 0.9466, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12406638621197374, |
|
"learning_rate": 2.5785874971584536e-06, |
|
"loss": 0.9403, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12389135267465634, |
|
"learning_rate": 2.5750622588835903e-06, |
|
"loss": 0.9423, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12180646520632062, |
|
"learning_rate": 2.571524768612558e-06, |
|
"loss": 0.9223, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.11486707403126087, |
|
"learning_rate": 2.567975066660527e-06, |
|
"loss": 0.9275, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12857636220545796, |
|
"learning_rate": 2.564413193481837e-06, |
|
"loss": 0.9749, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12086931508695424, |
|
"learning_rate": 2.5608391896695388e-06, |
|
"loss": 0.9439, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.12178686326127208, |
|
"learning_rate": 2.55725309595493e-06, |
|
"loss": 0.954, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.12960869330311783, |
|
"learning_rate": 2.5536549532070913e-06, |
|
"loss": 0.9352, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.12553474416457935, |
|
"learning_rate": 2.550044802432422e-06, |
|
"loss": 0.9442, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.12732282668760914, |
|
"learning_rate": 2.5464226847741695e-06, |
|
"loss": 0.9314, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.13055875843349435, |
|
"learning_rate": 2.5427886415119635e-06, |
|
"loss": 0.9186, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.12814219216348366, |
|
"learning_rate": 2.539142714061344e-06, |
|
"loss": 0.93, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.13703362060653562, |
|
"learning_rate": 2.5354849439732902e-06, |
|
"loss": 0.9353, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.11733228892071898, |
|
"learning_rate": 2.5318153729337457e-06, |
|
"loss": 0.9549, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.12639938357266184, |
|
"learning_rate": 2.5281340427631445e-06, |
|
"loss": 0.9479, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.12858912657134408, |
|
"learning_rate": 2.5244409954159343e-06, |
|
"loss": 0.9157, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.13765344027585624, |
|
"learning_rate": 2.5207362729800986e-06, |
|
"loss": 0.9567, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.1188666008027966, |
|
"learning_rate": 2.5170199176766746e-06, |
|
"loss": 0.9454, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.12528858240136181, |
|
"learning_rate": 2.5132919718592767e-06, |
|
"loss": 0.9445, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.12298871563801664, |
|
"learning_rate": 2.5095524780136096e-06, |
|
"loss": 0.9543, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.1311433270714553, |
|
"learning_rate": 2.5058014787569847e-06, |
|
"loss": 0.9501, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.12625986029021932, |
|
"learning_rate": 2.5020390168378376e-06, |
|
"loss": 0.991, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.12627600348385226, |
|
"learning_rate": 2.498265135135237e-06, |
|
"loss": 0.9804, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.12480939156448727, |
|
"learning_rate": 2.4944798766583986e-06, |
|
"loss": 0.9575, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.12814473985468958, |
|
"learning_rate": 2.490683284546193e-06, |
|
"loss": 0.94, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.12354291356370957, |
|
"learning_rate": 2.4868754020666566e-06, |
|
"loss": 0.9441, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.1230166173419696, |
|
"learning_rate": 2.4830562726164958e-06, |
|
"loss": 0.9207, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.11599834288712259, |
|
"learning_rate": 2.479225939720593e-06, |
|
"loss": 0.9233, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.12460890724939186, |
|
"learning_rate": 2.4753844470315135e-06, |
|
"loss": 0.938, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.1235331336241235, |
|
"learning_rate": 2.4715318383290037e-06, |
|
"loss": 0.9638, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.12749668661162603, |
|
"learning_rate": 2.4676681575194943e-06, |
|
"loss": 0.9297, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.13092231220069622, |
|
"learning_rate": 2.4637934486356012e-06, |
|
"loss": 0.9482, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.12567362421402142, |
|
"learning_rate": 2.4599077558356207e-06, |
|
"loss": 0.9716, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.12291260255078236, |
|
"learning_rate": 2.456011123403028e-06, |
|
"loss": 0.9442, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.13018458909985667, |
|
"learning_rate": 2.452103595745974e-06, |
|
"loss": 0.9583, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.12359082787357942, |
|
"learning_rate": 2.4481852173967746e-06, |
|
"loss": 0.9143, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.12792177515126044, |
|
"learning_rate": 2.4442560330114092e-06, |
|
"loss": 0.9359, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12085993870314579, |
|
"learning_rate": 2.4403160873690063e-06, |
|
"loss": 0.9397, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12401548468347032, |
|
"learning_rate": 2.436365425371337e-06, |
|
"loss": 0.8997, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.13214018330862026, |
|
"learning_rate": 2.432404092042301e-06, |
|
"loss": 0.927, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12399112060015242, |
|
"learning_rate": 2.4284321325274144e-06, |
|
"loss": 0.9359, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.1251239358952118, |
|
"learning_rate": 2.424449592093296e-06, |
|
"loss": 0.9526, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.1255660262761407, |
|
"learning_rate": 2.42045651612715e-06, |
|
"loss": 0.9569, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12365762191881352, |
|
"learning_rate": 2.416452950136248e-06, |
|
"loss": 0.9303, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12535472693272393, |
|
"learning_rate": 2.412438939747414e-06, |
|
"loss": 0.9374, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.1339666694324748, |
|
"learning_rate": 2.4084145307065e-06, |
|
"loss": 0.9214, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12465441927649695, |
|
"learning_rate": 2.404379768877868e-06, |
|
"loss": 0.9258, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.13785224280245373, |
|
"learning_rate": 2.4003347002438657e-06, |
|
"loss": 0.9534, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.12897658276955143, |
|
"learning_rate": 2.396279370904303e-06, |
|
"loss": 0.9378, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.1342231606408141, |
|
"learning_rate": 2.3922138270759247e-06, |
|
"loss": 0.9313, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.12478455394570859, |
|
"learning_rate": 2.388138115091888e-06, |
|
"loss": 0.9715, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.12400147036199631, |
|
"learning_rate": 2.3840522814012304e-06, |
|
"loss": 0.9335, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.12614825735019372, |
|
"learning_rate": 2.379956372568343e-06, |
|
"loss": 0.9389, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.12819460837673466, |
|
"learning_rate": 2.375850435272437e-06, |
|
"loss": 0.9298, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.13218189606634853, |
|
"learning_rate": 2.371734516307015e-06, |
|
"loss": 0.9271, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.12031632571886923, |
|
"learning_rate": 2.3676086625793353e-06, |
|
"loss": 0.9191, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.1368420193828202, |
|
"learning_rate": 2.3634729211098786e-06, |
|
"loss": 0.9335, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.12612432940530838, |
|
"learning_rate": 2.3593273390318118e-06, |
|
"loss": 0.9505, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.1291015620161155, |
|
"learning_rate": 2.355171963590451e-06, |
|
"loss": 0.9072, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.13561142947536237, |
|
"learning_rate": 2.3510068421427205e-06, |
|
"loss": 0.9557, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.1304501468364583, |
|
"learning_rate": 2.3468320221566194e-06, |
|
"loss": 0.9606, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.12763130154749866, |
|
"learning_rate": 2.3426475512106737e-06, |
|
"loss": 0.9699, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.13527714632992727, |
|
"learning_rate": 2.3384534769933968e-06, |
|
"loss": 0.9303, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.12632208652934207, |
|
"learning_rate": 2.3342498473027487e-06, |
|
"loss": 0.9403, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.13322198624906814, |
|
"learning_rate": 2.3300367100455857e-06, |
|
"loss": 0.946, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.12936391117162524, |
|
"learning_rate": 2.3258141132371215e-06, |
|
"loss": 0.9489, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.13012323743231977, |
|
"learning_rate": 2.321582105000371e-06, |
|
"loss": 0.9474, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.12787846971167063, |
|
"learning_rate": 2.317340733565611e-06, |
|
"loss": 0.9546, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.13369044731603097, |
|
"learning_rate": 2.3130900472698252e-06, |
|
"loss": 0.9638, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.13019183472993442, |
|
"learning_rate": 2.308830094556153e-06, |
|
"loss": 0.9474, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.13265098197617997, |
|
"learning_rate": 2.30456092397334e-06, |
|
"loss": 0.9323, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.1302535176783885, |
|
"learning_rate": 2.300282584175186e-06, |
|
"loss": 0.9167, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.14336847605116843, |
|
"learning_rate": 2.2959951239199844e-06, |
|
"loss": 0.9724, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.1314846076011854, |
|
"learning_rate": 2.291698592069972e-06, |
|
"loss": 0.9379, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.12542537335155546, |
|
"learning_rate": 2.2873930375907707e-06, |
|
"loss": 0.9416, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.13451062144224887, |
|
"learning_rate": 2.283078509550829e-06, |
|
"loss": 0.9423, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.12982420568281253, |
|
"learning_rate": 2.278755057120863e-06, |
|
"loss": 0.9643, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.1323171694681192, |
|
"learning_rate": 2.2744227295732956e-06, |
|
"loss": 0.9301, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.12532510813835535, |
|
"learning_rate": 2.270081576281696e-06, |
|
"loss": 0.9423, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.13571475473397304, |
|
"learning_rate": 2.2657316467202156e-06, |
|
"loss": 0.9503, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.1375461995901152, |
|
"learning_rate": 2.2613729904630256e-06, |
|
"loss": 0.9081, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.12670725904405272, |
|
"learning_rate": 2.257005657183752e-06, |
|
"loss": 0.9642, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.11945786027175435, |
|
"learning_rate": 2.2526296966549072e-06, |
|
"loss": 0.9197, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.14286568243399034, |
|
"learning_rate": 2.2482451587473258e-06, |
|
"loss": 0.9399, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.13710025446972535, |
|
"learning_rate": 2.2438520934295943e-06, |
|
"loss": 0.9213, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.13298225052401855, |
|
"learning_rate": 2.2394505507674825e-06, |
|
"loss": 0.9547, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.1314181279581931, |
|
"learning_rate": 2.2350405809233722e-06, |
|
"loss": 0.9401, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.1350139369080771, |
|
"learning_rate": 2.2306222341556866e-06, |
|
"loss": 0.9255, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.125979705316961, |
|
"learning_rate": 2.226195560818317e-06, |
|
"loss": 0.9196, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.13645001654584013, |
|
"learning_rate": 2.221760611360048e-06, |
|
"loss": 0.9383, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.13497646785844908, |
|
"learning_rate": 2.217317436323983e-06, |
|
"loss": 0.9438, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.13373081145156018, |
|
"learning_rate": 2.212866086346971e-06, |
|
"loss": 0.9498, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.15418672754446455, |
|
"learning_rate": 2.2084066121590242e-06, |
|
"loss": 0.9542, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.13946090813340417, |
|
"learning_rate": 2.2039390645827443e-06, |
|
"loss": 0.9182, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.13403421008347952, |
|
"learning_rate": 2.1994634945327416e-06, |
|
"loss": 0.9411, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.14217560114276748, |
|
"learning_rate": 2.1949799530150545e-06, |
|
"loss": 0.9449, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.13116778692015293, |
|
"learning_rate": 2.1904884911265695e-06, |
|
"loss": 0.9236, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.14745228268417065, |
|
"learning_rate": 2.185989160054436e-06, |
|
"loss": 0.9564, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.1321060448025065, |
|
"learning_rate": 2.1814820110754874e-06, |
|
"loss": 0.9392, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.12064961504005225, |
|
"learning_rate": 2.1769670955556526e-06, |
|
"loss": 0.9381, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.19148426920556538, |
|
"learning_rate": 2.1724444649493733e-06, |
|
"loss": 0.9465, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.14149357097999177, |
|
"learning_rate": 2.167914170799014e-06, |
|
"loss": 0.9536, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.13696368177795465, |
|
"learning_rate": 2.163376264734281e-06, |
|
"loss": 0.9426, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.132327168385185, |
|
"learning_rate": 2.1588307984716276e-06, |
|
"loss": 0.9415, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.1226548149068113, |
|
"learning_rate": 2.154277823813668e-06, |
|
"loss": 0.9126, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.13412829227143383, |
|
"learning_rate": 2.1497173926485853e-06, |
|
"loss": 0.9263, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.1370416338042778, |
|
"learning_rate": 2.145149556949542e-06, |
|
"loss": 0.9222, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.1348834947967263, |
|
"learning_rate": 2.1405743687740865e-06, |
|
"loss": 0.9143, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.14200964631669566, |
|
"learning_rate": 2.13599188026356e-06, |
|
"loss": 0.8973, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.14200012930084902, |
|
"learning_rate": 2.1314021436425027e-06, |
|
"loss": 0.9438, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.14116621082081662, |
|
"learning_rate": 2.126805211218057e-06, |
|
"loss": 0.9604, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.1262029892604575, |
|
"learning_rate": 2.1222011353793735e-06, |
|
"loss": 0.9436, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.13476560349631844, |
|
"learning_rate": 2.1175899685970133e-06, |
|
"loss": 0.958, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.13243928557585383, |
|
"learning_rate": 2.112971763422349e-06, |
|
"loss": 0.9356, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.14102330703687754, |
|
"learning_rate": 2.1083465724869675e-06, |
|
"loss": 0.9183, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.13195228822616697, |
|
"learning_rate": 2.1037144485020684e-06, |
|
"loss": 0.9225, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.14110146966641385, |
|
"learning_rate": 2.0990754442578637e-06, |
|
"loss": 0.9396, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.1281526405819837, |
|
"learning_rate": 2.0944296126229784e-06, |
|
"loss": 0.9115, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.1309793127327286, |
|
"learning_rate": 2.0897770065438444e-06, |
|
"loss": 0.9408, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.13497364113187624, |
|
"learning_rate": 2.0851176790440995e-06, |
|
"loss": 0.8897, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1306605766376586, |
|
"learning_rate": 2.080451683223983e-06, |
|
"loss": 0.9038, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1355543202117501, |
|
"learning_rate": 2.075779072259729e-06, |
|
"loss": 0.9391, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.12860928764170376, |
|
"learning_rate": 2.0710998994029625e-06, |
|
"loss": 0.9426, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.144696140607215, |
|
"learning_rate": 2.0664142179800904e-06, |
|
"loss": 0.9302, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1417303172042183, |
|
"learning_rate": 2.061722081391695e-06, |
|
"loss": 0.9168, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1413230349204647, |
|
"learning_rate": 2.057023543111926e-06, |
|
"loss": 0.936, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.14177967145771603, |
|
"learning_rate": 2.052318656687889e-06, |
|
"loss": 0.9258, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.13957587807590546, |
|
"learning_rate": 2.0476074757390377e-06, |
|
"loss": 0.9244, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.14172401928885273, |
|
"learning_rate": 2.042890053956561e-06, |
|
"loss": 0.9325, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.13902009470334928, |
|
"learning_rate": 2.0381664451027717e-06, |
|
"loss": 0.9226, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.1342038780728634, |
|
"learning_rate": 2.0334367030104936e-06, |
|
"loss": 0.9549, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.1355958677734405, |
|
"learning_rate": 2.0287008815824494e-06, |
|
"loss": 0.924, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.13040433649479655, |
|
"learning_rate": 2.023959034790644e-06, |
|
"loss": 0.94, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.13624869289738803, |
|
"learning_rate": 2.019211216675751e-06, |
|
"loss": 0.9112, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.14498884964787695, |
|
"learning_rate": 2.0144574813464972e-06, |
|
"loss": 0.9188, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.14355069485318295, |
|
"learning_rate": 2.009697882979044e-06, |
|
"loss": 0.9434, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.13722263129525908, |
|
"learning_rate": 2.0049324758163714e-06, |
|
"loss": 0.9304, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.13150966633376412, |
|
"learning_rate": 2.000161314167661e-06, |
|
"loss": 0.9359, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.13841892885164878, |
|
"learning_rate": 1.995384452407673e-06, |
|
"loss": 0.9394, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.13321843330058455, |
|
"learning_rate": 1.990601944976133e-06, |
|
"loss": 0.9711, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.15061604889591293, |
|
"learning_rate": 1.985813846377103e-06, |
|
"loss": 0.9272, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.1361929007088866, |
|
"learning_rate": 1.9810202111783694e-06, |
|
"loss": 0.9525, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.13830562465905946, |
|
"learning_rate": 1.976221094010814e-06, |
|
"loss": 0.9283, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.1279081357272712, |
|
"learning_rate": 1.9714165495677955e-06, |
|
"loss": 0.9431, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.14809572862407092, |
|
"learning_rate": 1.9666066326045235e-06, |
|
"loss": 0.9341, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.13408718583428794, |
|
"learning_rate": 1.961791397937437e-06, |
|
"loss": 0.9423, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.13271952301063553, |
|
"learning_rate": 1.9569709004435776e-06, |
|
"loss": 0.9167, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.14683108447360405, |
|
"learning_rate": 1.9521451950599658e-06, |
|
"loss": 0.929, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.14319313513188295, |
|
"learning_rate": 1.947314336782973e-06, |
|
"loss": 0.9152, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.14548075342960598, |
|
"learning_rate": 1.942478380667697e-06, |
|
"loss": 0.9561, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.14674027808693163, |
|
"learning_rate": 1.937637381827332e-06, |
|
"loss": 0.9176, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.13901704473297072, |
|
"learning_rate": 1.932791395432543e-06, |
|
"loss": 0.943, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.14173457335906417, |
|
"learning_rate": 1.927940476710836e-06, |
|
"loss": 0.974, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.14520506983186532, |
|
"learning_rate": 1.9230846809459268e-06, |
|
"loss": 0.9347, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.14661866708240862, |
|
"learning_rate": 1.918224063477114e-06, |
|
"loss": 0.9229, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.13549663100208073, |
|
"learning_rate": 1.9133586796986475e-06, |
|
"loss": 0.9021, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.13619817098434184, |
|
"learning_rate": 1.9084885850590945e-06, |
|
"loss": 0.9563, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.14561195607002267, |
|
"learning_rate": 1.9036138350607125e-06, |
|
"loss": 0.9473, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.13462906219833434, |
|
"learning_rate": 1.8987344852588126e-06, |
|
"loss": 0.9247, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.15242666999590032, |
|
"learning_rate": 1.893850591261127e-06, |
|
"loss": 0.9364, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.13889983715691157, |
|
"learning_rate": 1.8889622087271771e-06, |
|
"loss": 0.9413, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.14135800831918405, |
|
"learning_rate": 1.8840693933676378e-06, |
|
"loss": 0.9207, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.1372780862087748, |
|
"learning_rate": 1.879172200943704e-06, |
|
"loss": 0.9331, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.15017799031764617, |
|
"learning_rate": 1.8742706872664516e-06, |
|
"loss": 0.9336, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.13301418405514617, |
|
"learning_rate": 1.8693649081962059e-06, |
|
"loss": 0.9575, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.13920099695451857, |
|
"learning_rate": 1.864454919641902e-06, |
|
"loss": 0.9452, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.13582823833343818, |
|
"learning_rate": 1.8595407775604495e-06, |
|
"loss": 0.914, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.14431368387268362, |
|
"learning_rate": 1.8546225379560928e-06, |
|
"loss": 0.9199, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.14026316815195494, |
|
"learning_rate": 1.8497002568797739e-06, |
|
"loss": 0.9411, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.13878672097268965, |
|
"learning_rate": 1.844773990428495e-06, |
|
"loss": 0.9208, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.13541879797436218, |
|
"learning_rate": 1.839843794744676e-06, |
|
"loss": 0.9554, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.14283432429319542, |
|
"learning_rate": 1.8349097260155178e-06, |
|
"loss": 0.941, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.14515720243880362, |
|
"learning_rate": 1.8299718404723604e-06, |
|
"loss": 0.9102, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.14050528252451772, |
|
"learning_rate": 1.8250301943900415e-06, |
|
"loss": 0.9124, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.1435086593604132, |
|
"learning_rate": 1.8200848440862568e-06, |
|
"loss": 0.9384, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.14004434383038292, |
|
"learning_rate": 1.8151358459209168e-06, |
|
"loss": 0.9256, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.1504347210308783, |
|
"learning_rate": 1.810183256295506e-06, |
|
"loss": 0.9181, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.13844797279531637, |
|
"learning_rate": 1.805227131652438e-06, |
|
"loss": 0.9286, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.13788813272277844, |
|
"learning_rate": 1.800267528474414e-06, |
|
"loss": 0.9098, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.14105382541615677, |
|
"learning_rate": 1.7953045032837773e-06, |
|
"loss": 0.9289, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.1376343316669543, |
|
"learning_rate": 1.7903381126418725e-06, |
|
"loss": 0.9147, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.1393057886246714, |
|
"learning_rate": 1.7853684131483972e-06, |
|
"loss": 0.9583, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.1347402653981868, |
|
"learning_rate": 1.7803954614407588e-06, |
|
"loss": 0.956, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.1420224811227815, |
|
"learning_rate": 1.7754193141934286e-06, |
|
"loss": 0.9288, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.14376583147030975, |
|
"learning_rate": 1.7704400281172962e-06, |
|
"loss": 0.9195, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.13126659878484417, |
|
"learning_rate": 1.7654576599590229e-06, |
|
"loss": 0.9468, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.13581121757423928, |
|
"learning_rate": 1.7604722665003958e-06, |
|
"loss": 0.906, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.15390158532500306, |
|
"learning_rate": 1.7554839045576778e-06, |
|
"loss": 0.9699, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.14405781787739771, |
|
"learning_rate": 1.7504926309809655e-06, |
|
"loss": 0.9174, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.13696698824707879, |
|
"learning_rate": 1.7454985026535348e-06, |
|
"loss": 0.9178, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.14183018142151826, |
|
"learning_rate": 1.7405015764911985e-06, |
|
"loss": 0.93, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.14244315668176377, |
|
"learning_rate": 1.735501909441654e-06, |
|
"loss": 0.9081, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.13715525306632836, |
|
"learning_rate": 1.7304995584838346e-06, |
|
"loss": 0.9453, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.14544225825185408, |
|
"learning_rate": 1.7254945806272619e-06, |
|
"loss": 0.9377, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.12862157823453402, |
|
"learning_rate": 1.7204870329113952e-06, |
|
"loss": 0.9111, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.14387593948992988, |
|
"learning_rate": 1.7154769724049805e-06, |
|
"loss": 0.9179, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.14310554120599442, |
|
"learning_rate": 1.7104644562054017e-06, |
|
"loss": 0.9264, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.14369268756275277, |
|
"learning_rate": 1.705449541438028e-06, |
|
"loss": 0.9179, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.14156019346421533, |
|
"learning_rate": 1.7004322852555657e-06, |
|
"loss": 0.9411, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.13431638177331276, |
|
"learning_rate": 1.6954127448374036e-06, |
|
"loss": 0.9211, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.14619505394748813, |
|
"learning_rate": 1.6903909773889638e-06, |
|
"loss": 0.9272, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.14836272472317252, |
|
"learning_rate": 1.6853670401410484e-06, |
|
"loss": 0.9343, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.1373985024055969, |
|
"learning_rate": 1.6803409903491877e-06, |
|
"loss": 0.9318, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.15326454301403541, |
|
"learning_rate": 1.6753128852929884e-06, |
|
"loss": 0.9578, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.14559815110391214, |
|
"learning_rate": 1.6702827822754788e-06, |
|
"loss": 0.9272, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.13744648077417837, |
|
"learning_rate": 1.6652507386224587e-06, |
|
"loss": 0.8995, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.14647628387598488, |
|
"learning_rate": 1.6602168116818428e-06, |
|
"loss": 0.9162, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.14404142802195286, |
|
"learning_rate": 1.65518105882301e-06, |
|
"loss": 0.9242, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.15243037059220865, |
|
"learning_rate": 1.6501435374361478e-06, |
|
"loss": 0.93, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.14267865323341203, |
|
"learning_rate": 1.6451043049315989e-06, |
|
"loss": 0.9137, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.1481814130813317, |
|
"learning_rate": 1.6400634187392068e-06, |
|
"loss": 0.9295, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.14103202841048518, |
|
"learning_rate": 1.635020936307662e-06, |
|
"loss": 0.9286, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.14180132297439638, |
|
"learning_rate": 1.629976915103845e-06, |
|
"loss": 0.9472, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.14440535406295116, |
|
"learning_rate": 1.6249314126121743e-06, |
|
"loss": 0.916, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.14297238889743974, |
|
"learning_rate": 1.61988448633395e-06, |
|
"loss": 0.9428, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.14278206678104752, |
|
"learning_rate": 1.614836193786698e-06, |
|
"loss": 0.9388, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.14708980236362657, |
|
"learning_rate": 1.6097865925035148e-06, |
|
"loss": 0.9263, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.1493782348617741, |
|
"learning_rate": 1.6047357400324125e-06, |
|
"loss": 0.9453, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.14792348168682745, |
|
"learning_rate": 1.599683693935662e-06, |
|
"loss": 0.9471, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.14644504231264188, |
|
"learning_rate": 1.5946305117891372e-06, |
|
"loss": 0.9543, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.15603436515208155, |
|
"learning_rate": 1.5895762511816603e-06, |
|
"loss": 0.9403, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.1492183413320477, |
|
"learning_rate": 1.5845209697143427e-06, |
|
"loss": 0.9347, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.14310505430645265, |
|
"learning_rate": 1.5794647249999302e-06, |
|
"loss": 0.9284, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.15219696170127922, |
|
"learning_rate": 1.5744075746621477e-06, |
|
"loss": 0.9446, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.14278980302720323, |
|
"learning_rate": 1.5693495763350399e-06, |
|
"loss": 0.942, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.15485157792551277, |
|
"learning_rate": 1.5642907876623155e-06, |
|
"loss": 0.9495, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.14415653748935103, |
|
"learning_rate": 1.5592312662966912e-06, |
|
"loss": 0.95, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.14626040552803168, |
|
"learning_rate": 1.5541710698992333e-06, |
|
"loss": 0.9272, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.14459798185856082, |
|
"learning_rate": 1.5491102561387017e-06, |
|
"loss": 0.9287, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.14606201408180883, |
|
"learning_rate": 1.5440488826908916e-06, |
|
"loss": 0.9093, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.15371875458355483, |
|
"learning_rate": 1.5389870072379764e-06, |
|
"loss": 0.9365, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.1402365522072789, |
|
"learning_rate": 1.5339246874678514e-06, |
|
"loss": 0.9179, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.1543389255747757, |
|
"learning_rate": 1.528861981073475e-06, |
|
"loss": 0.9223, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.14977412118551237, |
|
"learning_rate": 1.523798945752212e-06, |
|
"loss": 0.9246, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.15214617356605256, |
|
"learning_rate": 1.5187356392051763e-06, |
|
"loss": 0.9199, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.14138993564510816, |
|
"learning_rate": 1.5136721191365722e-06, |
|
"loss": 0.9678, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.14972625540064466, |
|
"learning_rate": 1.5086084432530372e-06, |
|
"loss": 0.9371, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.15911745951707099, |
|
"learning_rate": 1.5035446692629851e-06, |
|
"loss": 0.9264, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.15920032142731483, |
|
"learning_rate": 1.498480854875948e-06, |
|
"loss": 0.9483, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.14963882238441822, |
|
"learning_rate": 1.4934170578019175e-06, |
|
"loss": 0.9339, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.15286707979378059, |
|
"learning_rate": 1.488353335750689e-06, |
|
"loss": 0.9406, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.1521031280190717, |
|
"learning_rate": 1.483289746431202e-06, |
|
"loss": 0.9127, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.15222047385687712, |
|
"learning_rate": 1.4782263475508832e-06, |
|
"loss": 0.9315, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.14731189974755135, |
|
"learning_rate": 1.4731631968149895e-06, |
|
"loss": 0.904, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.13976065735361923, |
|
"learning_rate": 1.4681003519259502e-06, |
|
"loss": 0.9117, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.14696780830304437, |
|
"learning_rate": 1.463037870582708e-06, |
|
"loss": 0.9206, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.14710865075058713, |
|
"learning_rate": 1.457975810480063e-06, |
|
"loss": 0.9188, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.14110608951947717, |
|
"learning_rate": 1.4529142293080148e-06, |
|
"loss": 0.9563, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.15100956168840318, |
|
"learning_rate": 1.447853184751104e-06, |
|
"loss": 0.9117, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.14173007606413557, |
|
"learning_rate": 1.4427927344877572e-06, |
|
"loss": 0.9197, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.15488805146642348, |
|
"learning_rate": 1.437732936189626e-06, |
|
"loss": 0.9286, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.1541544010687315, |
|
"learning_rate": 1.4326738475209337e-06, |
|
"loss": 0.9599, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.14095218223371167, |
|
"learning_rate": 1.427615526137815e-06, |
|
"loss": 0.8989, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.15293667596322041, |
|
"learning_rate": 1.4225580296876608e-06, |
|
"loss": 0.9447, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.14652744921172597, |
|
"learning_rate": 1.417501415808461e-06, |
|
"loss": 0.9217, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.1504944288827222, |
|
"learning_rate": 1.4124457421281463e-06, |
|
"loss": 0.9673, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.15585236316583084, |
|
"learning_rate": 1.4073910662639332e-06, |
|
"loss": 0.9065, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.14742315841540685, |
|
"learning_rate": 1.402337445821666e-06, |
|
"loss": 0.9411, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.15430455119028572, |
|
"learning_rate": 1.3972849383951611e-06, |
|
"loss": 0.9355, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.1555666818366108, |
|
"learning_rate": 1.3922336015655506e-06, |
|
"loss": 0.9167, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.14962961591804877, |
|
"learning_rate": 1.3871834929006256e-06, |
|
"loss": 0.941, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.15455497529184967, |
|
"learning_rate": 1.3821346699541796e-06, |
|
"loss": 0.9192, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.14982121619688704, |
|
"learning_rate": 1.3770871902653545e-06, |
|
"loss": 0.9248, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.14737972139745104, |
|
"learning_rate": 1.3720411113579831e-06, |
|
"loss": 0.9282, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.1435094517528824, |
|
"learning_rate": 1.3669964907399345e-06, |
|
"loss": 0.931, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.15220182643955849, |
|
"learning_rate": 1.361953385902458e-06, |
|
"loss": 0.9452, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.1565960862677695, |
|
"learning_rate": 1.3569118543195285e-06, |
|
"loss": 0.9265, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.1468387324121908, |
|
"learning_rate": 1.3518719534471912e-06, |
|
"loss": 0.929, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.1603995121000358, |
|
"learning_rate": 1.3468337407229064e-06, |
|
"loss": 0.9226, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.1571110585893993, |
|
"learning_rate": 1.341797273564896e-06, |
|
"loss": 0.929, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.14317070475253646, |
|
"learning_rate": 1.3367626093714884e-06, |
|
"loss": 0.9173, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.14419206921916547, |
|
"learning_rate": 1.3317298055204635e-06, |
|
"loss": 0.9381, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.1479165442891011, |
|
"learning_rate": 1.3266989193684006e-06, |
|
"loss": 0.9229, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.1509048030581506, |
|
"learning_rate": 1.3216700082500238e-06, |
|
"loss": 0.9346, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.1524427495079411, |
|
"learning_rate": 1.3166431294775486e-06, |
|
"loss": 0.9263, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.1363749471323768, |
|
"learning_rate": 1.3116183403400286e-06, |
|
"loss": 0.9233, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.15571108260671188, |
|
"learning_rate": 1.3065956981027027e-06, |
|
"loss": 0.9254, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.14738034590459953, |
|
"learning_rate": 1.3015752600063428e-06, |
|
"loss": 0.9356, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.15214039883958294, |
|
"learning_rate": 1.2965570832666014e-06, |
|
"loss": 0.9479, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.1367470768499345, |
|
"learning_rate": 1.2915412250733592e-06, |
|
"loss": 0.9328, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.14302733162614767, |
|
"learning_rate": 1.2865277425900725e-06, |
|
"loss": 0.9212, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.14429292224162268, |
|
"learning_rate": 1.2815166929531242e-06, |
|
"loss": 0.9071, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.15632981550136582, |
|
"learning_rate": 1.2765081332711703e-06, |
|
"loss": 0.9196, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.16001339885276877, |
|
"learning_rate": 1.2715021206244902e-06, |
|
"loss": 0.9241, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.15777057516222137, |
|
"learning_rate": 1.266498712064336e-06, |
|
"loss": 0.9261, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.15137232676250217, |
|
"learning_rate": 1.2614979646122817e-06, |
|
"loss": 0.9437, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.1509908233769637, |
|
"learning_rate": 1.2564999352595746e-06, |
|
"loss": 0.9022, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.1446990823675756, |
|
"learning_rate": 1.2515046809664841e-06, |
|
"loss": 0.9324, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.14529842278696345, |
|
"learning_rate": 1.2465122586616548e-06, |
|
"loss": 0.9186, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.14890041058005424, |
|
"learning_rate": 1.2415227252414555e-06, |
|
"loss": 0.8839, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.15401076762014934, |
|
"learning_rate": 1.2365361375693311e-06, |
|
"loss": 0.9526, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.147809786608213, |
|
"learning_rate": 1.2315525524751565e-06, |
|
"loss": 0.9561, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.13972074920029123, |
|
"learning_rate": 1.226572026754587e-06, |
|
"loss": 0.906, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.1463242760428321, |
|
"learning_rate": 1.2215946171684115e-06, |
|
"loss": 0.9261, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.15629569558792494, |
|
"learning_rate": 1.216620380441906e-06, |
|
"loss": 0.9301, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.15002654546854, |
|
"learning_rate": 1.2116493732641862e-06, |
|
"loss": 0.9271, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.154693913708028, |
|
"learning_rate": 1.2066816522875634e-06, |
|
"loss": 0.9603, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.15302292304097528, |
|
"learning_rate": 1.2017172741268962e-06, |
|
"loss": 0.9562, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.15744416055939195, |
|
"learning_rate": 1.1967562953589479e-06, |
|
"loss": 0.9249, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.15274452723176532, |
|
"learning_rate": 1.1917987725217386e-06, |
|
"loss": 0.9098, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.14903013683191468, |
|
"learning_rate": 1.1868447621139045e-06, |
|
"loss": 0.9341, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.14670691399106886, |
|
"learning_rate": 1.181894320594052e-06, |
|
"loss": 0.9349, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.1408976877727933, |
|
"learning_rate": 1.1769475043801133e-06, |
|
"loss": 0.9112, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.14431525425160555, |
|
"learning_rate": 1.1720043698487063e-06, |
|
"loss": 0.9384, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.1524691711633023, |
|
"learning_rate": 1.167064973334489e-06, |
|
"loss": 0.9309, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.1496938194214256, |
|
"learning_rate": 1.16212937112952e-06, |
|
"loss": 0.9498, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.158618142809653, |
|
"learning_rate": 1.157197619482615e-06, |
|
"loss": 0.9136, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.14386034744502232, |
|
"learning_rate": 1.1522697745987075e-06, |
|
"loss": 0.9168, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.13819775275169346, |
|
"learning_rate": 1.147345892638207e-06, |
|
"loss": 0.9169, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.1463761110381215, |
|
"learning_rate": 1.1424260297163588e-06, |
|
"loss": 0.9229, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.15981042409351381, |
|
"learning_rate": 1.1375102419026054e-06, |
|
"loss": 0.9111, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.15444315539373016, |
|
"learning_rate": 1.132598585219948e-06, |
|
"loss": 0.9368, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.1523026328416241, |
|
"learning_rate": 1.1276911156443059e-06, |
|
"loss": 0.9424, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.1420446227672182, |
|
"learning_rate": 1.122787889103881e-06, |
|
"loss": 0.9238, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.1523202126073691, |
|
"learning_rate": 1.117888961478518e-06, |
|
"loss": 0.9231, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.14814643719132345, |
|
"learning_rate": 1.1129943885990697e-06, |
|
"loss": 0.9214, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.14772444209447835, |
|
"learning_rate": 1.10810422624676e-06, |
|
"loss": 0.9151, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.14630970085839903, |
|
"learning_rate": 1.103218530152548e-06, |
|
"loss": 0.8945, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.15164841898736303, |
|
"learning_rate": 1.098337355996491e-06, |
|
"loss": 0.9372, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.1544725525584187, |
|
"learning_rate": 1.0934607594071146e-06, |
|
"loss": 0.9416, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.14788884792842535, |
|
"learning_rate": 1.0885887959607744e-06, |
|
"loss": 0.9274, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.1574408320339308, |
|
"learning_rate": 1.0837215211810242e-06, |
|
"loss": 0.929, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.1613973917431587, |
|
"learning_rate": 1.078858990537984e-06, |
|
"loss": 0.8949, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.1598506012172115, |
|
"learning_rate": 1.074001259447706e-06, |
|
"loss": 0.9188, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.1588121231918017, |
|
"learning_rate": 1.0691483832715451e-06, |
|
"loss": 0.9245, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.1649194556317263, |
|
"learning_rate": 1.0643004173155262e-06, |
|
"loss": 0.9288, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.14006308330926057, |
|
"learning_rate": 1.059457416829715e-06, |
|
"loss": 0.964, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.1485669272917807, |
|
"learning_rate": 1.0546194370075883e-06, |
|
"loss": 0.9181, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.14858509037038672, |
|
"learning_rate": 1.049786532985403e-06, |
|
"loss": 0.9272, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.14487395597716682, |
|
"learning_rate": 1.0449587598415714e-06, |
|
"loss": 0.917, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.14710872415034287, |
|
"learning_rate": 1.040136172596031e-06, |
|
"loss": 0.9247, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.1626479302951273, |
|
"learning_rate": 1.0353188262096175e-06, |
|
"loss": 0.9275, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.15781073519135003, |
|
"learning_rate": 1.0305067755834393e-06, |
|
"loss": 0.9253, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.14128421978977157, |
|
"learning_rate": 1.0257000755582512e-06, |
|
"loss": 0.9211, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.15143104599521942, |
|
"learning_rate": 1.0208987809138298e-06, |
|
"loss": 0.922, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.1622702675456415, |
|
"learning_rate": 1.0161029463683486e-06, |
|
"loss": 0.9305, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.1438713828814328, |
|
"learning_rate": 1.0113126265777563e-06, |
|
"loss": 0.9423, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.16559678050928944, |
|
"learning_rate": 1.00652787613515e-06, |
|
"loss": 0.9419, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.15093953285534764, |
|
"learning_rate": 1.0017487495701574e-06, |
|
"loss": 0.9137, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.15008677116743874, |
|
"learning_rate": 9.969753013483127e-07, |
|
"loss": 0.9304, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.16077001381591202, |
|
"learning_rate": 9.922075858704368e-07, |
|
"loss": 0.9129, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.14718070657922894, |
|
"learning_rate": 9.87445657472017e-07, |
|
"loss": 0.9213, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.14856763126559924, |
|
"learning_rate": 9.82689570422588e-07, |
|
"loss": 0.9165, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.17239452738903066, |
|
"learning_rate": 9.779393789251134e-07, |
|
"loss": 0.9234, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.1432746904130774, |
|
"learning_rate": 9.731951371153675e-07, |
|
"loss": 0.9329, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.17149057728657496, |
|
"learning_rate": 9.684568990613192e-07, |
|
"loss": 0.9489, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.15393538189231964, |
|
"learning_rate": 9.637247187625146e-07, |
|
"loss": 0.916, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.15174331734813315, |
|
"learning_rate": 9.58998650149463e-07, |
|
"loss": 0.9205, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.14922998507146534, |
|
"learning_rate": 9.542787470830209e-07, |
|
"loss": 0.9343, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.1500848629292273, |
|
"learning_rate": 9.4956506335378e-07, |
|
"loss": 0.9241, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.14949618673786386, |
|
"learning_rate": 9.44857652681452e-07, |
|
"loss": 0.9327, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.15397347493957886, |
|
"learning_rate": 9.401565687142579e-07, |
|
"loss": 0.9407, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.1533729596254089, |
|
"learning_rate": 9.354618650283159e-07, |
|
"loss": 0.9226, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.15161923850474765, |
|
"learning_rate": 9.307735951270313e-07, |
|
"loss": 0.9279, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.1522671281157681, |
|
"learning_rate": 9.260918124404861e-07, |
|
"loss": 0.9145, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.15106036750963397, |
|
"learning_rate": 9.214165703248314e-07, |
|
"loss": 0.8922, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.1508508419060107, |
|
"learning_rate": 9.167479220616762e-07, |
|
"loss": 0.9096, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.15691718553565023, |
|
"learning_rate": 9.120859208574848e-07, |
|
"loss": 0.9276, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.14818102813867765, |
|
"learning_rate": 9.074306198429669e-07, |
|
"loss": 0.9062, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.1473880251613391, |
|
"learning_rate": 9.02782072072473e-07, |
|
"loss": 0.9197, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.1526172026124484, |
|
"learning_rate": 8.981403305233904e-07, |
|
"loss": 0.9514, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.17512228057747395, |
|
"learning_rate": 8.935054480955389e-07, |
|
"loss": 0.9107, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.15716161285598004, |
|
"learning_rate": 8.888774776105679e-07, |
|
"loss": 0.8967, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.14818527302391432, |
|
"learning_rate": 8.842564718113546e-07, |
|
"loss": 0.9309, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.1575709023231356, |
|
"learning_rate": 8.796424833614026e-07, |
|
"loss": 0.9042, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.16168153914116115, |
|
"learning_rate": 8.750355648442425e-07, |
|
"loss": 0.9109, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.156594691500565, |
|
"learning_rate": 8.704357687628317e-07, |
|
"loss": 0.9162, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.14861481291797754, |
|
"learning_rate": 8.658431475389554e-07, |
|
"loss": 0.9169, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.14500543908075786, |
|
"learning_rate": 8.612577535126329e-07, |
|
"loss": 0.9372, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.15940891480295918, |
|
"learning_rate": 8.566796389415154e-07, |
|
"loss": 0.9415, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.15562117644201193, |
|
"learning_rate": 8.521088560002961e-07, |
|
"loss": 0.9133, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.15740195784796523, |
|
"learning_rate": 8.475454567801106e-07, |
|
"loss": 0.9177, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.15359832007067548, |
|
"learning_rate": 8.429894932879477e-07, |
|
"loss": 0.9243, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.16314606057976921, |
|
"learning_rate": 8.384410174460524e-07, |
|
"loss": 0.9526, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.14174905302998128, |
|
"learning_rate": 8.339000810913388e-07, |
|
"loss": 0.9268, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.1508355501376482, |
|
"learning_rate": 8.293667359747949e-07, |
|
"loss": 0.9111, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.15620625232845353, |
|
"learning_rate": 8.248410337608957e-07, |
|
"loss": 0.9258, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.15836435275133684, |
|
"learning_rate": 8.203230260270127e-07, |
|
"loss": 0.9202, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.1490971119373064, |
|
"learning_rate": 8.158127642628285e-07, |
|
"loss": 0.93, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.1498758710125973, |
|
"learning_rate": 8.113102998697464e-07, |
|
"loss": 0.9332, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.15427230559339164, |
|
"learning_rate": 8.068156841603089e-07, |
|
"loss": 0.9393, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.1554483829700364, |
|
"learning_rate": 8.02328968357608e-07, |
|
"loss": 0.9365, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.16829536457561103, |
|
"learning_rate": 7.978502035947067e-07, |
|
"loss": 0.9185, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.15319450100142196, |
|
"learning_rate": 7.933794409140512e-07, |
|
"loss": 0.9302, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.15517496668151076, |
|
"learning_rate": 7.889167312668937e-07, |
|
"loss": 0.962, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.1496297874459852, |
|
"learning_rate": 7.844621255127083e-07, |
|
"loss": 0.9217, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.15476953612136735, |
|
"learning_rate": 7.800156744186124e-07, |
|
"loss": 0.9519, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.15224644017556324, |
|
"learning_rate": 7.755774286587901e-07, |
|
"loss": 0.932, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.14505520530000182, |
|
"learning_rate": 7.711474388139111e-07, |
|
"loss": 0.9125, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.16828167804824415, |
|
"learning_rate": 7.667257553705584e-07, |
|
"loss": 0.9132, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.16605220073103116, |
|
"learning_rate": 7.623124287206483e-07, |
|
"loss": 0.9549, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.16228786623012695, |
|
"learning_rate": 7.579075091608605e-07, |
|
"loss": 0.9203, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.1599479558357973, |
|
"learning_rate": 7.535110468920611e-07, |
|
"loss": 0.9262, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.1573093783507514, |
|
"learning_rate": 7.491230920187344e-07, |
|
"loss": 0.9366, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.15963712113833328, |
|
"learning_rate": 7.447436945484082e-07, |
|
"loss": 0.9013, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.1399827930339598, |
|
"learning_rate": 7.40372904391086e-07, |
|
"loss": 0.9457, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.1562062988863496, |
|
"learning_rate": 7.360107713586768e-07, |
|
"loss": 0.9352, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.1488352670738681, |
|
"learning_rate": 7.316573451644303e-07, |
|
"loss": 0.8734, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.1552420073883167, |
|
"learning_rate": 7.27312675422366e-07, |
|
"loss": 0.9386, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.15531261843265345, |
|
"learning_rate": 7.229768116467124e-07, |
|
"loss": 0.929, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.15795986251543662, |
|
"learning_rate": 7.186498032513378e-07, |
|
"loss": 0.9157, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.14791527959203932, |
|
"learning_rate": 7.143316995491923e-07, |
|
"loss": 0.9391, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.1552589054762009, |
|
"learning_rate": 7.100225497517415e-07, |
|
"loss": 0.906, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.15429494696285626, |
|
"learning_rate": 7.05722402968409e-07, |
|
"loss": 0.9301, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.1539770736197306, |
|
"learning_rate": 7.014313082060122e-07, |
|
"loss": 0.9409, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.15378389305689893, |
|
"learning_rate": 6.971493143682105e-07, |
|
"loss": 0.9536, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.14841643591153145, |
|
"learning_rate": 6.928764702549411e-07, |
|
"loss": 0.9455, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.1424702517238874, |
|
"learning_rate": 6.886128245618684e-07, |
|
"loss": 0.9177, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.1559018526800424, |
|
"learning_rate": 6.843584258798242e-07, |
|
"loss": 0.9376, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.15722991689607704, |
|
"learning_rate": 6.801133226942587e-07, |
|
"loss": 0.9208, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.14610737260386278, |
|
"learning_rate": 6.758775633846834e-07, |
|
"loss": 0.9095, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.16090010441145305, |
|
"learning_rate": 6.716511962241237e-07, |
|
"loss": 0.929, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.1554441965831577, |
|
"learning_rate": 6.674342693785651e-07, |
|
"loss": 0.9394, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.1546491123373585, |
|
"learning_rate": 6.632268309064086e-07, |
|
"loss": 0.9409, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.14715256425800258, |
|
"learning_rate": 6.590289287579178e-07, |
|
"loss": 0.9055, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.14599984212897624, |
|
"learning_rate": 6.548406107746771e-07, |
|
"loss": 0.9433, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.16347085408735626, |
|
"learning_rate": 6.506619246890428e-07, |
|
"loss": 0.9548, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.15831789964538523, |
|
"learning_rate": 6.464929181236033e-07, |
|
"loss": 0.9386, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.15355363351088241, |
|
"learning_rate": 6.423336385906309e-07, |
|
"loss": 0.9344, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.1501784569255906, |
|
"learning_rate": 6.381841334915464e-07, |
|
"loss": 0.9293, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.16042288291190004, |
|
"learning_rate": 6.340444501163731e-07, |
|
"loss": 0.9393, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.16316861059946366, |
|
"learning_rate": 6.29914635643203e-07, |
|
"loss": 0.929, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.1643610541161199, |
|
"learning_rate": 6.257947371376546e-07, |
|
"loss": 0.9426, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.14616311834478152, |
|
"learning_rate": 6.216848015523392e-07, |
|
"loss": 0.9377, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.16152029615425262, |
|
"learning_rate": 6.175848757263268e-07, |
|
"loss": 0.9429, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.15699642229578464, |
|
"learning_rate": 6.134950063846083e-07, |
|
"loss": 0.9199, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.1530620485910159, |
|
"learning_rate": 6.094152401375673e-07, |
|
"loss": 0.922, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.15712637436393728, |
|
"learning_rate": 6.053456234804455e-07, |
|
"loss": 0.9433, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.15422389473184495, |
|
"learning_rate": 6.012862027928163e-07, |
|
"loss": 0.888, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.1566886231926118, |
|
"learning_rate": 5.972370243380519e-07, |
|
"loss": 0.925, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.159378149867113, |
|
"learning_rate": 5.931981342628009e-07, |
|
"loss": 0.9244, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.1633607022367415, |
|
"learning_rate": 5.891695785964572e-07, |
|
"loss": 0.905, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.16458140405159416, |
|
"learning_rate": 5.851514032506414e-07, |
|
"loss": 0.9371, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.17783212358538342, |
|
"learning_rate": 5.811436540186702e-07, |
|
"loss": 0.9275, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.1557140157487442, |
|
"learning_rate": 5.771463765750429e-07, |
|
"loss": 0.9483, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.16047316591282632, |
|
"learning_rate": 5.731596164749129e-07, |
|
"loss": 0.9286, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.1562396765552942, |
|
"learning_rate": 5.691834191535754e-07, |
|
"loss": 0.9419, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.16172641388514977, |
|
"learning_rate": 5.652178299259437e-07, |
|
"loss": 0.952, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.16113805975363696, |
|
"learning_rate": 5.612628939860378e-07, |
|
"loss": 0.9351, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.1437287491294515, |
|
"learning_rate": 5.573186564064649e-07, |
|
"loss": 0.9505, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.16834817212399983, |
|
"learning_rate": 5.533851621379097e-07, |
|
"loss": 0.959, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.1616903254935569, |
|
"learning_rate": 5.494624560086189e-07, |
|
"loss": 0.9197, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.15641360380963865, |
|
"learning_rate": 5.455505827238926e-07, |
|
"loss": 0.902, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.16461127069550324, |
|
"learning_rate": 5.416495868655723e-07, |
|
"loss": 0.9054, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.15407955424345696, |
|
"learning_rate": 5.377595128915371e-07, |
|
"loss": 0.9383, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.16169022882100972, |
|
"learning_rate": 5.338804051351918e-07, |
|
"loss": 0.9203, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.1671241995582254, |
|
"learning_rate": 5.30012307804966e-07, |
|
"loss": 0.9165, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.1474324575569325, |
|
"learning_rate": 5.261552649838068e-07, |
|
"loss": 0.9235, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.1585059938742927, |
|
"learning_rate": 5.223093206286801e-07, |
|
"loss": 0.9214, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.1497680366174517, |
|
"learning_rate": 5.184745185700654e-07, |
|
"loss": 0.9314, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.1546850610459381, |
|
"learning_rate": 5.146509025114608e-07, |
|
"loss": 0.9235, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.15045132406714182, |
|
"learning_rate": 5.108385160288809e-07, |
|
"loss": 0.9202, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.1583744842728306, |
|
"learning_rate": 5.070374025703618e-07, |
|
"loss": 0.9146, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.1562840274285093, |
|
"learning_rate": 5.032476054554679e-07, |
|
"loss": 0.9302, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.15846456988668864, |
|
"learning_rate": 4.994691678747944e-07, |
|
"loss": 0.9339, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.16307391364816642, |
|
"learning_rate": 4.957021328894786e-07, |
|
"loss": 0.9005, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.16346422226751806, |
|
"learning_rate": 4.919465434307062e-07, |
|
"loss": 0.9345, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.1695222160282978, |
|
"learning_rate": 4.882024422992248e-07, |
|
"loss": 0.9234, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.16278959029884632, |
|
"learning_rate": 4.844698721648531e-07, |
|
"loss": 0.9196, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.15957302783445512, |
|
"learning_rate": 4.807488755659985e-07, |
|
"loss": 0.9413, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.1560040784890354, |
|
"learning_rate": 4.770394949091678e-07, |
|
"loss": 0.9355, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.1413278988058616, |
|
"learning_rate": 4.7334177246848794e-07, |
|
"loss": 0.9295, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.1605581810941067, |
|
"learning_rate": 4.6965575038522055e-07, |
|
"loss": 0.9239, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.15990736616330292, |
|
"learning_rate": 4.6598147066728613e-07, |
|
"loss": 0.927, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.1523970977431131, |
|
"learning_rate": 4.6231897518878015e-07, |
|
"loss": 0.9268, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.15895125148362393, |
|
"learning_rate": 4.5866830568950103e-07, |
|
"loss": 0.9083, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.16109242763716947, |
|
"learning_rate": 4.550295037744694e-07, |
|
"loss": 0.9295, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.15788410886290616, |
|
"learning_rate": 4.5140261091345867e-07, |
|
"loss": 0.9355, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.16134673966779897, |
|
"learning_rate": 4.4778766844051793e-07, |
|
"loss": 0.884, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.1532078082485744, |
|
"learning_rate": 4.4418471755350544e-07, |
|
"loss": 0.9288, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.15779718366905426, |
|
"learning_rate": 4.405937993136151e-07, |
|
"loss": 0.9344, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.14884341139812074, |
|
"learning_rate": 4.370149546449109e-07, |
|
"loss": 0.891, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.1600941628867109, |
|
"learning_rate": 4.3344822433385896e-07, |
|
"loss": 0.9287, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.1617161511554377, |
|
"learning_rate": 4.2989364902886545e-07, |
|
"loss": 0.94, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.15247804429393655, |
|
"learning_rate": 4.263512692398091e-07, |
|
"loss": 0.9213, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.16018944449095257, |
|
"learning_rate": 4.228211253375843e-07, |
|
"loss": 0.9024, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.16007374852429948, |
|
"learning_rate": 4.193032575536363e-07, |
|
"loss": 0.9241, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.15889603417134412, |
|
"learning_rate": 4.1579770597950693e-07, |
|
"loss": 0.9239, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.17459525180727348, |
|
"learning_rate": 4.123045105663743e-07, |
|
"loss": 0.8917, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.15763900800425762, |
|
"learning_rate": 4.088237111246e-07, |
|
"loss": 0.9211, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.1575161337518218, |
|
"learning_rate": 4.053553473232742e-07, |
|
"loss": 0.915, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.15300282276303448, |
|
"learning_rate": 4.018994586897624e-07, |
|
"loss": 0.9249, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.1504264330652952, |
|
"learning_rate": 3.9845608460925854e-07, |
|
"loss": 0.9508, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.14959628504165817, |
|
"learning_rate": 3.950252643243317e-07, |
|
"loss": 0.9095, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.1630583733534405, |
|
"learning_rate": 3.916070369344831e-07, |
|
"loss": 0.927, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.15434075528005992, |
|
"learning_rate": 3.8820144139569635e-07, |
|
"loss": 0.9441, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.15652793092715717, |
|
"learning_rate": 3.8480851651999785e-07, |
|
"loss": 0.9061, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.24690420142064057, |
|
"learning_rate": 3.814283009750098e-07, |
|
"loss": 0.9291, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.1561055095998655, |
|
"learning_rate": 3.7806083328351425e-07, |
|
"loss": 0.9141, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.15547212939941615, |
|
"learning_rate": 3.7470615182301005e-07, |
|
"loss": 0.936, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.1398346443024953, |
|
"learning_rate": 3.713642948252779e-07, |
|
"loss": 0.9173, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.1596932224744843, |
|
"learning_rate": 3.680353003759433e-07, |
|
"loss": 0.9354, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.15975646326696724, |
|
"learning_rate": 3.6471920641404466e-07, |
|
"loss": 0.9448, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.1508038761650183, |
|
"learning_rate": 3.614160507315973e-07, |
|
"loss": 0.9207, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.1569010196095641, |
|
"learning_rate": 3.581258709731671e-07, |
|
"loss": 0.9152, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.15735074242891792, |
|
"learning_rate": 3.548487046354368e-07, |
|
"loss": 0.9048, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.1696586295135802, |
|
"learning_rate": 3.515845890667835e-07, |
|
"loss": 0.9265, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.16094178127192826, |
|
"learning_rate": 3.4833356146684856e-07, |
|
"loss": 0.9095, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.15722063022161345, |
|
"learning_rate": 3.450956588861173e-07, |
|
"loss": 0.8987, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.15767024672708221, |
|
"learning_rate": 3.418709182254943e-07, |
|
"loss": 0.9444, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.15337104954670655, |
|
"learning_rate": 3.3865937623588354e-07, |
|
"loss": 0.9231, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.16566851753448866, |
|
"learning_rate": 3.3546106951776993e-07, |
|
"loss": 0.9007, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.16192142530048215, |
|
"learning_rate": 3.322760345208031e-07, |
|
"loss": 0.9421, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.163107039234031, |
|
"learning_rate": 3.2910430754337874e-07, |
|
"loss": 0.9318, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.1545452516991764, |
|
"learning_rate": 3.259459247322295e-07, |
|
"loss": 0.9136, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.15663230748229626, |
|
"learning_rate": 3.2280092208200853e-07, |
|
"loss": 0.8954, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.15500795050421287, |
|
"learning_rate": 3.19669335434883e-07, |
|
"loss": 0.941, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.15928222838638134, |
|
"learning_rate": 3.1655120048012244e-07, |
|
"loss": 0.9408, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.15639857359171666, |
|
"learning_rate": 3.1344655275369524e-07, |
|
"loss": 0.9159, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.15912741439338757, |
|
"learning_rate": 3.1035542763786077e-07, |
|
"loss": 0.9083, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.15518719515657925, |
|
"learning_rate": 3.072778603607672e-07, |
|
"loss": 0.8945, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.15102896207837074, |
|
"learning_rate": 3.0421388599605167e-07, |
|
"loss": 0.9241, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.15953041281656985, |
|
"learning_rate": 3.0116353946243717e-07, |
|
"loss": 0.9552, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.15914050285439954, |
|
"learning_rate": 2.981268555233376e-07, |
|
"loss": 0.9346, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.1535411640949839, |
|
"learning_rate": 2.9510386878646066e-07, |
|
"loss": 0.9208, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.1624383805682274, |
|
"learning_rate": 2.920946137034121e-07, |
|
"loss": 0.9115, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.1576525021299245, |
|
"learning_rate": 2.890991245693059e-07, |
|
"loss": 0.9175, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.1689987920094538, |
|
"learning_rate": 2.861174355223702e-07, |
|
"loss": 0.9467, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.15702484189102198, |
|
"learning_rate": 2.8314958054356106e-07, |
|
"loss": 0.9432, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.16865544192759432, |
|
"learning_rate": 2.801955934561731e-07, |
|
"loss": 0.9287, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.15716123855295028, |
|
"learning_rate": 2.772555079254547e-07, |
|
"loss": 0.9393, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.15715960298271278, |
|
"learning_rate": 2.74329357458226e-07, |
|
"loss": 0.9396, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.1567680825612272, |
|
"learning_rate": 2.714171754024935e-07, |
|
"loss": 0.9387, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.1583254982488342, |
|
"learning_rate": 2.6851899494707397e-07, |
|
"loss": 0.9149, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.1528791273519717, |
|
"learning_rate": 2.6563484912121284e-07, |
|
"loss": 0.9263, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.1548488356579178, |
|
"learning_rate": 2.627647707942103e-07, |
|
"loss": 0.9125, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.16277999785485375, |
|
"learning_rate": 2.5990879267504456e-07, |
|
"loss": 0.9203, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.15633370731860066, |
|
"learning_rate": 2.5706694731200194e-07, |
|
"loss": 0.8966, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.16163084541833064, |
|
"learning_rate": 2.542392670923014e-07, |
|
"loss": 0.9185, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.1507577049675187, |
|
"learning_rate": 2.5142578424173116e-07, |
|
"loss": 0.931, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.15543875551416902, |
|
"learning_rate": 2.486265308242761e-07, |
|
"loss": 0.9197, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.16140974809909986, |
|
"learning_rate": 2.458415387417565e-07, |
|
"loss": 0.9098, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.1541006388383221, |
|
"learning_rate": 2.4307083973346144e-07, |
|
"loss": 0.9075, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.1536331376849194, |
|
"learning_rate": 2.403144653757892e-07, |
|
"loss": 0.9226, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.16664523654188895, |
|
"learning_rate": 2.3757244708188557e-07, |
|
"loss": 0.929, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.15068817418063912, |
|
"learning_rate": 2.3484481610128815e-07, |
|
"loss": 0.9422, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.15114736535904352, |
|
"learning_rate": 2.3213160351956725e-07, |
|
"loss": 0.8884, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.16457615699636646, |
|
"learning_rate": 2.2943284025797523e-07, |
|
"loss": 0.9331, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.15618814241892395, |
|
"learning_rate": 2.2674855707308938e-07, |
|
"loss": 0.9194, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.15796367715432505, |
|
"learning_rate": 2.2407878455646667e-07, |
|
"loss": 0.9318, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.14282593168610988, |
|
"learning_rate": 2.2142355313429136e-07, |
|
"loss": 0.9244, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.15601800726624074, |
|
"learning_rate": 2.1878289306702986e-07, |
|
"loss": 0.9152, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.157516630029107, |
|
"learning_rate": 2.1615683444908517e-07, |
|
"loss": 0.9228, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.16141550481008685, |
|
"learning_rate": 2.1354540720845456e-07, |
|
"loss": 0.9437, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.15114290632124544, |
|
"learning_rate": 2.1094864110638746e-07, |
|
"loss": 0.9113, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.1629233031052345, |
|
"learning_rate": 2.0836656573704817e-07, |
|
"loss": 0.9359, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.1540979875498575, |
|
"learning_rate": 2.057992105271762e-07, |
|
"loss": 0.9232, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.16470643668441212, |
|
"learning_rate": 2.0324660473575218e-07, |
|
"loss": 0.9267, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.1552333426235452, |
|
"learning_rate": 2.0070877745366546e-07, |
|
"loss": 0.9037, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.15933287223217704, |
|
"learning_rate": 1.9818575760337991e-07, |
|
"loss": 0.9572, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.1543328823907667, |
|
"learning_rate": 1.9567757393860735e-07, |
|
"loss": 0.9204, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.15475180185924467, |
|
"learning_rate": 1.9318425504397675e-07, |
|
"loss": 0.9289, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.15563510159853952, |
|
"learning_rate": 1.9070582933471158e-07, |
|
"loss": 0.9104, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.1553271103735474, |
|
"learning_rate": 1.88242325056303e-07, |
|
"loss": 0.938, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.15980736761542, |
|
"learning_rate": 1.8579377028419082e-07, |
|
"loss": 0.9622, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.15927747684592666, |
|
"learning_rate": 1.833601929234406e-07, |
|
"loss": 0.9094, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.160839016361983, |
|
"learning_rate": 1.809416207084293e-07, |
|
"loss": 0.9395, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.15946588975620996, |
|
"learning_rate": 1.7853808120252403e-07, |
|
"loss": 0.9223, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.15940399215075765, |
|
"learning_rate": 1.7614960179777373e-07, |
|
"loss": 0.9353, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.15770387513942463, |
|
"learning_rate": 1.7377620971459251e-07, |
|
"loss": 0.9348, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.15059096100398262, |
|
"learning_rate": 1.7141793200145234e-07, |
|
"loss": 0.9414, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.14981443053828966, |
|
"learning_rate": 1.6907479553457228e-07, |
|
"loss": 0.9561, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.14989095698958532, |
|
"learning_rate": 1.6674682701761496e-07, |
|
"loss": 0.9482, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.16037252571389166, |
|
"learning_rate": 1.644340529813791e-07, |
|
"loss": 0.9025, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.15692439644356643, |
|
"learning_rate": 1.6213649978350042e-07, |
|
"loss": 0.9276, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.15736372136885854, |
|
"learning_rate": 1.5985419360814878e-07, |
|
"loss": 0.903, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.16863539575661857, |
|
"learning_rate": 1.5758716046573068e-07, |
|
"loss": 0.9147, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.16040567859365207, |
|
"learning_rate": 1.553354261925925e-07, |
|
"loss": 0.9162, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.157140629226284, |
|
"learning_rate": 1.5309901645072777e-07, |
|
"loss": 0.948, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.1608315729686174, |
|
"learning_rate": 1.5087795672748156e-07, |
|
"loss": 0.9321, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.167819710766768, |
|
"learning_rate": 1.4867227233526303e-07, |
|
"loss": 0.9123, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.15821303219930233, |
|
"learning_rate": 1.4648198841125454e-07, |
|
"loss": 0.9211, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.1551506251400254, |
|
"learning_rate": 1.443071299171278e-07, |
|
"loss": 0.921, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.16270472915600528, |
|
"learning_rate": 1.4214772163875618e-07, |
|
"loss": 0.9476, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.16257841775735968, |
|
"learning_rate": 1.4000378818593534e-07, |
|
"loss": 0.9233, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.156562288683159, |
|
"learning_rate": 1.3787535399210094e-07, |
|
"loss": 0.9182, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.15476777543631168, |
|
"learning_rate": 1.3576244331404987e-07, |
|
"loss": 0.9282, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.1526747916838123, |
|
"learning_rate": 1.3366508023166618e-07, |
|
"loss": 0.9109, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.15868273523439957, |
|
"learning_rate": 1.3158328864764325e-07, |
|
"loss": 0.9183, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.157997397996658, |
|
"learning_rate": 1.2951709228721466e-07, |
|
"loss": 0.8927, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.15965300631556217, |
|
"learning_rate": 1.274665146978812e-07, |
|
"loss": 0.9422, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.16102069291165114, |
|
"learning_rate": 1.2543157924914451e-07, |
|
"loss": 0.9136, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.16644483473793495, |
|
"learning_rate": 1.234123091322389e-07, |
|
"loss": 0.9182, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.16785461200909674, |
|
"learning_rate": 1.2140872735986908e-07, |
|
"loss": 0.9019, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.15102979016575213, |
|
"learning_rate": 1.1942085676594617e-07, |
|
"loss": 0.9242, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.15019421611211958, |
|
"learning_rate": 1.1744872000532814e-07, |
|
"loss": 0.8977, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.16805390366486953, |
|
"learning_rate": 1.1549233955356143e-07, |
|
"loss": 0.9281, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.15062889503591012, |
|
"learning_rate": 1.1355173770662592e-07, |
|
"loss": 0.9197, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.15940944783864333, |
|
"learning_rate": 1.1162693658067852e-07, |
|
"loss": 0.8694, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.1594448371528468, |
|
"learning_rate": 1.0971795811180402e-07, |
|
"loss": 0.9173, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.15723697995020128, |
|
"learning_rate": 1.0782482405576194e-07, |
|
"loss": 0.9331, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.16818241136639087, |
|
"learning_rate": 1.0594755598774192e-07, |
|
"loss": 0.9224, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.16505122113793874, |
|
"learning_rate": 1.0408617530211473e-07, |
|
"loss": 0.9146, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.16226442869385957, |
|
"learning_rate": 1.0224070321219065e-07, |
|
"loss": 0.9163, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.15760858471916925, |
|
"learning_rate": 1.004111607499768e-07, |
|
"loss": 0.9125, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.167789388282492, |
|
"learning_rate": 9.859756876593723e-08, |
|
"loss": 0.953, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1589666026077562, |
|
"learning_rate": 9.679994792875585e-08, |
|
"loss": 0.9142, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1603705114000376, |
|
"learning_rate": 9.501831872510086e-08, |
|
"loss": 0.9343, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.16565499499646644, |
|
"learning_rate": 9.325270145939075e-08, |
|
"loss": 0.9568, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.16897573366166183, |
|
"learning_rate": 9.150311625356378e-08, |
|
"loss": 0.9335, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.15687694406207942, |
|
"learning_rate": 8.976958304684707e-08, |
|
"loss": 0.913, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1560825616023689, |
|
"learning_rate": 8.805212159553171e-08, |
|
"loss": 0.9184, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1517859392958044, |
|
"learning_rate": 8.635075147274501e-08, |
|
"loss": 0.9127, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.15438722856001558, |
|
"learning_rate": 8.466549206822993e-08, |
|
"loss": 0.9096, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1652750261831842, |
|
"learning_rate": 8.299636258812199e-08, |
|
"loss": 0.9247, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.16015267448347073, |
|
"learning_rate": 8.134338205473124e-08, |
|
"loss": 0.9228, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.16415589317028237, |
|
"learning_rate": 7.970656930632663e-08, |
|
"loss": 0.9351, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.1603710732217631, |
|
"learning_rate": 7.808594299691902e-08, |
|
"loss": 0.9308, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.16313461612356253, |
|
"learning_rate": 7.64815215960501e-08, |
|
"loss": 0.9093, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.16350768777439237, |
|
"learning_rate": 7.489332338858202e-08, |
|
"loss": 0.9133, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.15832339009257615, |
|
"learning_rate": 7.332136647448795e-08, |
|
"loss": 0.9108, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.15056874945041787, |
|
"learning_rate": 7.176566876864699e-08, |
|
"loss": 0.9266, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.15648829717155813, |
|
"learning_rate": 7.022624800063876e-08, |
|
"loss": 0.924, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.1621955135948283, |
|
"learning_rate": 6.870312171454296e-08, |
|
"loss": 0.9451, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.1604060029409816, |
|
"learning_rate": 6.719630726873748e-08, |
|
"loss": 0.9418, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.1676076936878336, |
|
"learning_rate": 6.570582183570211e-08, |
|
"loss": 0.9424, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.16378443128060385, |
|
"learning_rate": 6.42316824018223e-08, |
|
"loss": 0.925, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.1541488328010328, |
|
"learning_rate": 6.277390576719538e-08, |
|
"loss": 0.9308, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.15020602128889035, |
|
"learning_rate": 6.133250854543948e-08, |
|
"loss": 0.9044, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.15152489639770436, |
|
"learning_rate": 5.990750716350374e-08, |
|
"loss": 0.9107, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.15521047884723635, |
|
"learning_rate": 5.849891786148193e-08, |
|
"loss": 0.9013, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.1516663880407599, |
|
"learning_rate": 5.710675669242577e-08, |
|
"loss": 0.9183, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.176609659206763, |
|
"learning_rate": 5.573103952216457e-08, |
|
"loss": 0.9266, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.16607598187822306, |
|
"learning_rate": 5.4371782029121074e-08, |
|
"loss": 0.9317, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.16280029991460063, |
|
"learning_rate": 5.302899970413588e-08, |
|
"loss": 0.9407, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.1501137796362083, |
|
"learning_rate": 5.17027078502888e-08, |
|
"loss": 0.9098, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.16436918493854402, |
|
"learning_rate": 5.039292158272596e-08, |
|
"loss": 0.9244, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.16169601675540524, |
|
"learning_rate": 4.909965582848614e-08, |
|
"loss": 0.8792, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1600927235212448, |
|
"learning_rate": 4.782292532633187e-08, |
|
"loss": 0.953, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.16263427667936725, |
|
"learning_rate": 4.656274462658028e-08, |
|
"loss": 0.9308, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.16315939755816827, |
|
"learning_rate": 4.5319128090938686e-08, |
|
"loss": 0.9051, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1612104752152402, |
|
"learning_rate": 4.409208989233943e-08, |
|
"loss": 0.9317, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1654098368858686, |
|
"learning_rate": 4.288164401477995e-08, |
|
"loss": 0.9066, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1609318647136051, |
|
"learning_rate": 4.1687804253161485e-08, |
|
"loss": 0.9053, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.16875575692294012, |
|
"learning_rate": 4.05105842131338e-08, |
|
"loss": 0.9519, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1549606486338682, |
|
"learning_rate": 3.934999731093852e-08, |
|
"loss": 0.9307, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.1676676232013085, |
|
"learning_rate": 3.820605677325756e-08, |
|
"loss": 0.9626, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.15773160804940514, |
|
"learning_rate": 3.707877563706158e-08, |
|
"loss": 0.9165, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.15826127121512837, |
|
"learning_rate": 3.5968166749461463e-08, |
|
"loss": 0.8953, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.1544058879678145, |
|
"learning_rate": 3.487424276756207e-08, |
|
"loss": 0.9007, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.1735946591788913, |
|
"learning_rate": 3.379701615831837e-08, |
|
"loss": 0.9368, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.16629351979756113, |
|
"learning_rate": 3.273649919839239e-08, |
|
"loss": 0.9366, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.1670530577981281, |
|
"learning_rate": 3.16927039740143e-08, |
|
"loss": 0.8889, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.18047483077210333, |
|
"learning_rate": 3.06656423808439e-08, |
|
"loss": 0.9581, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.14862297706656663, |
|
"learning_rate": 2.9655326123835702e-08, |
|
"loss": 0.9082, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.14156600846882356, |
|
"learning_rate": 2.866176671710502e-08, |
|
"loss": 0.9334, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.152720300985099, |
|
"learning_rate": 2.7684975483797113e-08, |
|
"loss": 0.9098, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.1541159122179482, |
|
"learning_rate": 2.6724963555957937e-08, |
|
"loss": 0.9086, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.1639110432246253, |
|
"learning_rate": 2.5781741874407073e-08, |
|
"loss": 0.9278, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.15764633567026584, |
|
"learning_rate": 2.4855321188614e-08, |
|
"loss": 0.9199, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.16367449606999576, |
|
"learning_rate": 2.3945712056573866e-08, |
|
"loss": 0.9218, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.1556361631328831, |
|
"learning_rate": 2.3052924844689237e-08, |
|
"loss": 0.9185, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.14651345358604176, |
|
"learning_rate": 2.2176969727650043e-08, |
|
"loss": 0.8805, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.16911174215759373, |
|
"learning_rate": 2.1317856688318815e-08, |
|
"loss": 0.9463, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.16222471379625428, |
|
"learning_rate": 2.0475595517616465e-08, |
|
"loss": 0.9126, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.15781798751754259, |
|
"learning_rate": 1.9650195814411353e-08, |
|
"loss": 0.9225, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.14707569100008253, |
|
"learning_rate": 1.8841666985408568e-08, |
|
"loss": 0.8883, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.1714960810767451, |
|
"learning_rate": 1.8050018245043987e-08, |
|
"loss": 0.9226, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.16621777369986387, |
|
"learning_rate": 1.7275258615378377e-08, |
|
"loss": 0.9245, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.16812621541501718, |
|
"learning_rate": 1.65173969259958e-08, |
|
"loss": 0.9409, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.1587153310627031, |
|
"learning_rate": 1.5776441813901197e-08, |
|
"loss": 0.9004, |
|
"step": 4955 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.16493984218007582, |
|
"learning_rate": 1.5052401723423815e-08, |
|
"loss": 0.9166, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.15890567241661818, |
|
"learning_rate": 1.4345284906119082e-08, |
|
"loss": 0.9117, |
|
"step": 4965 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.15091338059065335, |
|
"learning_rate": 1.3655099420676553e-08, |
|
"loss": 0.9404, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.15944974898000105, |
|
"learning_rate": 1.2981853132826293e-08, |
|
"loss": 0.9531, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.15818858463324417, |
|
"learning_rate": 1.2325553715250792e-08, |
|
"loss": 0.912, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.15569962439666524, |
|
"learning_rate": 1.1686208647496032e-08, |
|
"loss": 0.8903, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.1783511934328571, |
|
"learning_rate": 1.1063825215887557e-08, |
|
"loss": 0.9388, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.16371221965885455, |
|
"learning_rate": 1.0458410513446203e-08, |
|
"loss": 0.9171, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.16608474399678907, |
|
"learning_rate": 9.869971439808834e-09, |
|
"loss": 0.924, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.16264204123747195, |
|
"learning_rate": 9.298514701147897e-09, |
|
"loss": 0.932, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.15931886887708344, |
|
"learning_rate": 8.744046810096329e-09, |
|
"loss": 0.9317, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.16017670612884605, |
|
"learning_rate": 8.206574085672769e-09, |
|
"loss": 0.9398, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.15982177239936074, |
|
"learning_rate": 7.68610265320946e-09, |
|
"loss": 0.9272, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.15118097697397376, |
|
"learning_rate": 7.182638444283296e-09, |
|
"loss": 0.9019, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.16365399699832117, |
|
"learning_rate": 6.6961871966470525e-09, |
|
"loss": 0.9341, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.16106966499050715, |
|
"learning_rate": 6.2267544541642625e-09, |
|
"loss": 0.9142, |
|
"step": 5035 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.16356964266490379, |
|
"learning_rate": 5.774345566746942e-09, |
|
"loss": 0.9136, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.16839366533706696, |
|
"learning_rate": 5.338965690293795e-09, |
|
"loss": 0.9341, |
|
"step": 5045 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.15742905820134248, |
|
"learning_rate": 4.920619786630942e-09, |
|
"loss": 0.9209, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.14496950871622408, |
|
"learning_rate": 4.519312623457117e-09, |
|
"loss": 0.9016, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.15117212300765606, |
|
"learning_rate": 4.135048774287553e-09, |
|
"loss": 0.9103, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.16671405192275532, |
|
"learning_rate": 3.767832618402689e-09, |
|
"loss": 0.9248, |
|
"step": 5065 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.1602778426913734, |
|
"learning_rate": 3.4176683407983744e-09, |
|
"loss": 0.9405, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.14293417532619934, |
|
"learning_rate": 3.0845599321377427e-09, |
|
"loss": 0.8966, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.1513599972047693, |
|
"learning_rate": 2.7685111887059133e-09, |
|
"loss": 0.9002, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.15441045322521502, |
|
"learning_rate": 2.4695257123668602e-09, |
|
"loss": 0.8972, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.15337187659541607, |
|
"learning_rate": 2.1876069105224437e-09, |
|
"loss": 0.9051, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.15842607503539186, |
|
"learning_rate": 1.9227579960729434e-09, |
|
"loss": 0.9358, |
|
"step": 5095 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.16236135287127654, |
|
"learning_rate": 1.6749819873810857e-09, |
|
"loss": 0.9403, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.15112590057516626, |
|
"learning_rate": 1.4442817082377379e-09, |
|
"loss": 0.8968, |
|
"step": 5105 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.16919776377500648, |
|
"learning_rate": 1.2306597878289361e-09, |
|
"loss": 0.9206, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.16262851154680527, |
|
"learning_rate": 1.03411866070674e-09, |
|
"loss": 0.9096, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.14577473193606455, |
|
"learning_rate": 8.546605667610896e-10, |
|
"loss": 0.9482, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.15883938592062852, |
|
"learning_rate": 6.922875511943261e-10, |
|
"loss": 0.9248, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.15113652144655165, |
|
"learning_rate": 5.470014644980426e-10, |
|
"loss": 0.9265, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.1510997478676236, |
|
"learning_rate": 4.18803962431602e-10, |
|
"loss": 0.8814, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.15722861475429373, |
|
"learning_rate": 3.076965060038184e-10, |
|
"loss": 0.9215, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.15915656709758227, |
|
"learning_rate": 2.1368036145597013e-10, |
|
"loss": 0.9631, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.14617851831272438, |
|
"learning_rate": 1.3675660024714541e-10, |
|
"loss": 0.9366, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.15902826549810223, |
|
"learning_rate": 7.692609904258463e-11, |
|
"loss": 0.9326, |
|
"step": 5155 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.16030959960966326, |
|
"learning_rate": 3.4189539703355364e-11, |
|
"loss": 0.8914, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.16538320040136756, |
|
"learning_rate": 8.547409278525376e-12, |
|
"loss": 0.9356, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.1617655412465529, |
|
"learning_rate": 0.0, |
|
"loss": 0.918, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.9631486535072327, |
|
"eval_runtime": 5393.193, |
|
"eval_samples_per_second": 5.747, |
|
"eval_steps_per_second": 0.12, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 5170, |
|
"total_flos": 1.360193862500352e+16, |
|
"train_loss": 0.9572911218241059, |
|
"train_runtime": 140901.3648, |
|
"train_samples_per_second": 1.761, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 5170, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 1.360193862500352e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|