|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 60701, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016474193176389186, |
|
"grad_norm": 35.35807800292969, |
|
"learning_rate": 4.1186161449752885e-07, |
|
"loss": 1.1167, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0032948386352778373, |
|
"grad_norm": 2.662261724472046, |
|
"learning_rate": 8.237232289950577e-07, |
|
"loss": 0.8293, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.004942257952916756, |
|
"grad_norm": 3.5144577026367188, |
|
"learning_rate": 1.2355848434925866e-06, |
|
"loss": 0.8225, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.006589677270555675, |
|
"grad_norm": 2.4613759517669678, |
|
"learning_rate": 1.6474464579901154e-06, |
|
"loss": 0.767, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.008237096588194593, |
|
"grad_norm": 2.598069906234741, |
|
"learning_rate": 2.0593080724876445e-06, |
|
"loss": 0.687, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.009884515905833512, |
|
"grad_norm": 3.49182391166687, |
|
"learning_rate": 2.471169686985173e-06, |
|
"loss": 0.6412, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01153193522347243, |
|
"grad_norm": 1.9722175598144531, |
|
"learning_rate": 2.883031301482702e-06, |
|
"loss": 0.6173, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.01317935454111135, |
|
"grad_norm": 2.084155321121216, |
|
"learning_rate": 3.294892915980231e-06, |
|
"loss": 0.622, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.014826773858750269, |
|
"grad_norm": 2.001030206680298, |
|
"learning_rate": 3.70675453047776e-06, |
|
"loss": 0.5975, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.016474193176389186, |
|
"grad_norm": 2.722954034805298, |
|
"learning_rate": 4.118616144975289e-06, |
|
"loss": 0.6171, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.018121612494028105, |
|
"grad_norm": 2.851048469543457, |
|
"learning_rate": 4.5304777594728176e-06, |
|
"loss": 0.5398, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.019769031811667025, |
|
"grad_norm": 2.0754776000976562, |
|
"learning_rate": 4.942339373970346e-06, |
|
"loss": 0.5444, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.021416451129305944, |
|
"grad_norm": 1.9554790258407593, |
|
"learning_rate": 4.999974215318018e-06, |
|
"loss": 0.5688, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.02306387044694486, |
|
"grad_norm": 2.532405376434326, |
|
"learning_rate": 4.999879388694095e-06, |
|
"loss": 0.5549, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.02471128976458378, |
|
"grad_norm": 2.0328919887542725, |
|
"learning_rate": 4.999714839456846e-06, |
|
"loss": 0.5484, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0263587090822227, |
|
"grad_norm": 1.7955541610717773, |
|
"learning_rate": 4.999480572195616e-06, |
|
"loss": 0.5765, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.028006128399861618, |
|
"grad_norm": 1.7495211362838745, |
|
"learning_rate": 4.999176593444209e-06, |
|
"loss": 0.5829, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.029653547717500537, |
|
"grad_norm": 2.1942079067230225, |
|
"learning_rate": 4.9988029116807125e-06, |
|
"loss": 0.5331, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.03130096703513945, |
|
"grad_norm": 2.9001498222351074, |
|
"learning_rate": 4.998359537327255e-06, |
|
"loss": 0.5108, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.03294838635277837, |
|
"grad_norm": 2.320958375930786, |
|
"learning_rate": 4.997846482749723e-06, |
|
"loss": 0.5484, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03459580567041729, |
|
"grad_norm": 2.4439444541931152, |
|
"learning_rate": 4.9972637622574074e-06, |
|
"loss": 0.5448, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.03624322498805621, |
|
"grad_norm": 2.403137445449829, |
|
"learning_rate": 4.996611392102611e-06, |
|
"loss": 0.519, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.03789064430569513, |
|
"grad_norm": 1.4548203945159912, |
|
"learning_rate": 4.995889390480193e-06, |
|
"loss": 0.4869, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.03953806362333405, |
|
"grad_norm": 2.335745334625244, |
|
"learning_rate": 4.99509777752706e-06, |
|
"loss": 0.5545, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.04118548294097297, |
|
"grad_norm": 2.894595146179199, |
|
"learning_rate": 4.994236575321607e-06, |
|
"loss": 0.5364, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04283290225861189, |
|
"grad_norm": 3.079472064971924, |
|
"learning_rate": 4.993305807883101e-06, |
|
"loss": 0.5514, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.0444803215762508, |
|
"grad_norm": 2.3833718299865723, |
|
"learning_rate": 4.9923055011710075e-06, |
|
"loss": 0.492, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.04612774089388972, |
|
"grad_norm": 2.7838637828826904, |
|
"learning_rate": 4.991235683084274e-06, |
|
"loss": 0.5156, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.04777516021152864, |
|
"grad_norm": 1.7487517595291138, |
|
"learning_rate": 4.9900963834605445e-06, |
|
"loss": 0.514, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.04942257952916756, |
|
"grad_norm": 1.7354815006256104, |
|
"learning_rate": 4.98888763407533e-06, |
|
"loss": 0.5202, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05106999884680648, |
|
"grad_norm": 4.250129222869873, |
|
"learning_rate": 4.987609468641125e-06, |
|
"loss": 0.5069, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.0527174181644454, |
|
"grad_norm": 2.1309328079223633, |
|
"learning_rate": 4.986261922806461e-06, |
|
"loss": 0.5372, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.054364837482084316, |
|
"grad_norm": 2.0532209873199463, |
|
"learning_rate": 4.9848450341549196e-06, |
|
"loss": 0.5118, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.056012256799723235, |
|
"grad_norm": 2.774035692214966, |
|
"learning_rate": 4.983358842204078e-06, |
|
"loss": 0.5082, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.057659676117362155, |
|
"grad_norm": 4.331142425537109, |
|
"learning_rate": 4.981803388404411e-06, |
|
"loss": 0.5328, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.059307095435001074, |
|
"grad_norm": 2.5397560596466064, |
|
"learning_rate": 4.980178716138135e-06, |
|
"loss": 0.5173, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.060954514752639986, |
|
"grad_norm": 2.2354204654693604, |
|
"learning_rate": 4.978484870717991e-06, |
|
"loss": 0.4946, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.0626019340702789, |
|
"grad_norm": 1.8501393795013428, |
|
"learning_rate": 4.976721899385992e-06, |
|
"loss": 0.5341, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.06424935338791783, |
|
"grad_norm": 1.828378677368164, |
|
"learning_rate": 4.974889851312098e-06, |
|
"loss": 0.5097, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.06589677270555674, |
|
"grad_norm": 2.1924521923065186, |
|
"learning_rate": 4.972988777592845e-06, |
|
"loss": 0.505, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06754419202319567, |
|
"grad_norm": 1.9084734916687012, |
|
"learning_rate": 4.971018731249923e-06, |
|
"loss": 0.5043, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.06919161134083458, |
|
"grad_norm": 2.8705804347991943, |
|
"learning_rate": 4.968979767228693e-06, |
|
"loss": 0.5118, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.0708390306584735, |
|
"grad_norm": 2.0432722568511963, |
|
"learning_rate": 4.96687194239666e-06, |
|
"loss": 0.5295, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.07248644997611242, |
|
"grad_norm": 2.022822380065918, |
|
"learning_rate": 4.964695315541883e-06, |
|
"loss": 0.5649, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.07413386929375133, |
|
"grad_norm": 2.284590721130371, |
|
"learning_rate": 4.962449947371334e-06, |
|
"loss": 0.4841, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.07578128861139026, |
|
"grad_norm": 3.217561721801758, |
|
"learning_rate": 4.9601359005092095e-06, |
|
"loss": 0.5401, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.07742870792902917, |
|
"grad_norm": 1.9388020038604736, |
|
"learning_rate": 4.957753239495181e-06, |
|
"loss": 0.5251, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.0790761272466681, |
|
"grad_norm": 1.3349353075027466, |
|
"learning_rate": 4.955302030782596e-06, |
|
"loss": 0.4962, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.08072354656430701, |
|
"grad_norm": 2.4485511779785156, |
|
"learning_rate": 4.952782342736625e-06, |
|
"loss": 0.4939, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.08237096588194594, |
|
"grad_norm": 3.657675266265869, |
|
"learning_rate": 4.950194245632349e-06, |
|
"loss": 0.5123, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08401838519958485, |
|
"grad_norm": 2.871431589126587, |
|
"learning_rate": 4.9475378116528105e-06, |
|
"loss": 0.5063, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.08566580451722378, |
|
"grad_norm": 1.394823431968689, |
|
"learning_rate": 4.944813114886991e-06, |
|
"loss": 0.4939, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.08731322383486269, |
|
"grad_norm": 1.6979378461837769, |
|
"learning_rate": 4.942020231327749e-06, |
|
"loss": 0.5102, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.0889606431525016, |
|
"grad_norm": 1.941582202911377, |
|
"learning_rate": 4.939159238869698e-06, |
|
"loss": 0.5347, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.09060806247014053, |
|
"grad_norm": 1.9074257612228394, |
|
"learning_rate": 4.936230217307035e-06, |
|
"loss": 0.4935, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.09225548178777944, |
|
"grad_norm": 2.327624797821045, |
|
"learning_rate": 4.933233248331317e-06, |
|
"loss": 0.5218, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.09390290110541837, |
|
"grad_norm": 2.605468988418579, |
|
"learning_rate": 4.930168415529181e-06, |
|
"loss": 0.4831, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.09555032042305728, |
|
"grad_norm": 2.137749671936035, |
|
"learning_rate": 4.927035804380012e-06, |
|
"loss": 0.4983, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.0971977397406962, |
|
"grad_norm": 1.9908422231674194, |
|
"learning_rate": 4.923835502253558e-06, |
|
"loss": 0.4991, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.09884515905833512, |
|
"grad_norm": 1.8356066942214966, |
|
"learning_rate": 4.920567598407498e-06, |
|
"loss": 0.4907, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.10049257837597404, |
|
"grad_norm": 2.3301796913146973, |
|
"learning_rate": 4.917232183984946e-06, |
|
"loss": 0.4833, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.10213999769361295, |
|
"grad_norm": 2.835822582244873, |
|
"learning_rate": 4.913829352011914e-06, |
|
"loss": 0.554, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.10378741701125187, |
|
"grad_norm": 1.825016736984253, |
|
"learning_rate": 4.910359197394717e-06, |
|
"loss": 0.5082, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.1054348363288908, |
|
"grad_norm": 3.021340847015381, |
|
"learning_rate": 4.9068218169173245e-06, |
|
"loss": 0.4945, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.1070822556465297, |
|
"grad_norm": 3.6816606521606445, |
|
"learning_rate": 4.903217309238658e-06, |
|
"loss": 0.54, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.10872967496416863, |
|
"grad_norm": 2.1384148597717285, |
|
"learning_rate": 4.899545774889848e-06, |
|
"loss": 0.497, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.11037709428180754, |
|
"grad_norm": 2.311786651611328, |
|
"learning_rate": 4.895807316271421e-06, |
|
"loss": 0.4927, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.11202451359944647, |
|
"grad_norm": 1.765767216682434, |
|
"learning_rate": 4.892002037650451e-06, |
|
"loss": 0.4984, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.11367193291708538, |
|
"grad_norm": 1.8108317852020264, |
|
"learning_rate": 4.888130045157645e-06, |
|
"loss": 0.4957, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.11531935223472431, |
|
"grad_norm": 2.6695711612701416, |
|
"learning_rate": 4.884191446784387e-06, |
|
"loss": 0.4992, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.11696677155236322, |
|
"grad_norm": 2.477202892303467, |
|
"learning_rate": 4.880186352379726e-06, |
|
"loss": 0.4947, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.11861419087000215, |
|
"grad_norm": 3.69132399559021, |
|
"learning_rate": 4.876114873647308e-06, |
|
"loss": 0.5092, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.12026161018764106, |
|
"grad_norm": 2.353121042251587, |
|
"learning_rate": 4.871977124142271e-06, |
|
"loss": 0.4752, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.12190902950527997, |
|
"grad_norm": 2.3746302127838135, |
|
"learning_rate": 4.867773219268062e-06, |
|
"loss": 0.5186, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.1235564488229189, |
|
"grad_norm": 2.437284469604492, |
|
"learning_rate": 4.863503276273232e-06, |
|
"loss": 0.4882, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.1252038681405578, |
|
"grad_norm": 2.287785291671753, |
|
"learning_rate": 4.859167414248163e-06, |
|
"loss": 0.4755, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.12685128745819674, |
|
"grad_norm": 4.1828413009643555, |
|
"learning_rate": 4.854765754121738e-06, |
|
"loss": 0.5062, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.12849870677583566, |
|
"grad_norm": 2.3262546062469482, |
|
"learning_rate": 4.85029841865798e-06, |
|
"loss": 0.4756, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.13014612609347456, |
|
"grad_norm": 1.2054634094238281, |
|
"learning_rate": 4.8457655324526215e-06, |
|
"loss": 0.4827, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.1317935454111135, |
|
"grad_norm": 2.3276774883270264, |
|
"learning_rate": 4.8411672219296304e-06, |
|
"loss": 0.4833, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13344096472875241, |
|
"grad_norm": 1.9837372303009033, |
|
"learning_rate": 4.836503615337684e-06, |
|
"loss": 0.4681, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.13508838404639134, |
|
"grad_norm": 1.6989622116088867, |
|
"learning_rate": 4.831774842746595e-06, |
|
"loss": 0.5375, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.13673580336403024, |
|
"grad_norm": 2.29801869392395, |
|
"learning_rate": 4.826981036043677e-06, |
|
"loss": 0.5102, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.13838322268166917, |
|
"grad_norm": 8.920065879821777, |
|
"learning_rate": 4.822122328930076e-06, |
|
"loss": 0.5145, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.1400306419993081, |
|
"grad_norm": 2.425342321395874, |
|
"learning_rate": 4.817198856917029e-06, |
|
"loss": 0.4888, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.141678061316947, |
|
"grad_norm": 2.2098586559295654, |
|
"learning_rate": 4.812210757322096e-06, |
|
"loss": 0.5088, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.14332548063458592, |
|
"grad_norm": 2.6320948600769043, |
|
"learning_rate": 4.807158169265326e-06, |
|
"loss": 0.4868, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.14497289995222484, |
|
"grad_norm": 2.660802125930786, |
|
"learning_rate": 4.802041233665373e-06, |
|
"loss": 0.4742, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.14662031926986377, |
|
"grad_norm": 2.3442442417144775, |
|
"learning_rate": 4.796860093235572e-06, |
|
"loss": 0.4789, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.14826773858750267, |
|
"grad_norm": 2.416050434112549, |
|
"learning_rate": 4.791614892479956e-06, |
|
"loss": 0.5149, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.1499151579051416, |
|
"grad_norm": 2.576631784439087, |
|
"learning_rate": 4.786305777689222e-06, |
|
"loss": 0.5096, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.15156257722278052, |
|
"grad_norm": 1.699407935142517, |
|
"learning_rate": 4.7809328969366585e-06, |
|
"loss": 0.5006, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.15320999654041945, |
|
"grad_norm": 2.303194046020508, |
|
"learning_rate": 4.7754964000740086e-06, |
|
"loss": 0.5113, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.15485741585805834, |
|
"grad_norm": 2.021639347076416, |
|
"learning_rate": 4.7699964387272964e-06, |
|
"loss": 0.4823, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.15650483517569727, |
|
"grad_norm": 1.7534514665603638, |
|
"learning_rate": 4.764433166292593e-06, |
|
"loss": 0.4912, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.1581522544933362, |
|
"grad_norm": 2.9182558059692383, |
|
"learning_rate": 4.758806737931741e-06, |
|
"loss": 0.4957, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.1597996738109751, |
|
"grad_norm": 2.112656831741333, |
|
"learning_rate": 4.753117310568026e-06, |
|
"loss": 0.4733, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.16144709312861402, |
|
"grad_norm": 2.052156686782837, |
|
"learning_rate": 4.7473650428818025e-06, |
|
"loss": 0.4794, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.16309451244625295, |
|
"grad_norm": 2.4516518115997314, |
|
"learning_rate": 4.741550095306065e-06, |
|
"loss": 0.4807, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.16474193176389187, |
|
"grad_norm": 1.8814926147460938, |
|
"learning_rate": 4.7356726300219715e-06, |
|
"loss": 0.4392, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.16638935108153077, |
|
"grad_norm": 1.6867588758468628, |
|
"learning_rate": 4.729732810954329e-06, |
|
"loss": 0.489, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.1680367703991697, |
|
"grad_norm": 1.996559739112854, |
|
"learning_rate": 4.723730803767014e-06, |
|
"loss": 0.45, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.16968418971680863, |
|
"grad_norm": 2.4676289558410645, |
|
"learning_rate": 4.71766677585835e-06, |
|
"loss": 0.49, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.17133160903444755, |
|
"grad_norm": 2.4000778198242188, |
|
"learning_rate": 4.711540896356447e-06, |
|
"loss": 0.5133, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.17297902835208645, |
|
"grad_norm": 1.6576099395751953, |
|
"learning_rate": 4.70535333611448e-06, |
|
"loss": 0.4682, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.17462644766972538, |
|
"grad_norm": 2.6019415855407715, |
|
"learning_rate": 4.699104267705921e-06, |
|
"loss": 0.5221, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.1762738669873643, |
|
"grad_norm": 2.8221852779388428, |
|
"learning_rate": 4.692793865419731e-06, |
|
"loss": 0.5142, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.1779212863050032, |
|
"grad_norm": 1.781231164932251, |
|
"learning_rate": 4.686422305255498e-06, |
|
"loss": 0.4908, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.17956870562264213, |
|
"grad_norm": 2.3753836154937744, |
|
"learning_rate": 4.679989764918524e-06, |
|
"loss": 0.4894, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.18121612494028105, |
|
"grad_norm": 1.7550493478775024, |
|
"learning_rate": 4.673496423814874e-06, |
|
"loss": 0.4707, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.18286354425791998, |
|
"grad_norm": 1.6989047527313232, |
|
"learning_rate": 4.666942463046369e-06, |
|
"loss": 0.5209, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.18451096357555888, |
|
"grad_norm": 2.0338029861450195, |
|
"learning_rate": 4.660328065405537e-06, |
|
"loss": 0.5168, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.1861583828931978, |
|
"grad_norm": 2.14629864692688, |
|
"learning_rate": 4.6536534153705135e-06, |
|
"loss": 0.4802, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.18780580221083673, |
|
"grad_norm": 1.9664320945739746, |
|
"learning_rate": 4.646918699099898e-06, |
|
"loss": 0.505, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.18945322152847563, |
|
"grad_norm": 2.435833692550659, |
|
"learning_rate": 4.640124104427558e-06, |
|
"loss": 0.5205, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.19110064084611456, |
|
"grad_norm": 1.8850288391113281, |
|
"learning_rate": 4.633269820857397e-06, |
|
"loss": 0.4964, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.19274806016375348, |
|
"grad_norm": 1.9810831546783447, |
|
"learning_rate": 4.626356039558061e-06, |
|
"loss": 0.5006, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.1943954794813924, |
|
"grad_norm": 2.52791166305542, |
|
"learning_rate": 4.619382953357615e-06, |
|
"loss": 0.4809, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.1960428987990313, |
|
"grad_norm": 2.0693445205688477, |
|
"learning_rate": 4.612350756738157e-06, |
|
"loss": 0.4591, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.19769031811667023, |
|
"grad_norm": 2.312404155731201, |
|
"learning_rate": 4.6052596458303996e-06, |
|
"loss": 0.4695, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.19933773743430916, |
|
"grad_norm": 2.2149617671966553, |
|
"learning_rate": 4.5981098184081995e-06, |
|
"loss": 0.4743, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.20098515675194809, |
|
"grad_norm": 2.597283124923706, |
|
"learning_rate": 4.590901473883037e-06, |
|
"loss": 0.4893, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.20263257606958698, |
|
"grad_norm": 1.9223053455352783, |
|
"learning_rate": 4.5836348132984584e-06, |
|
"loss": 0.4706, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.2042799953872259, |
|
"grad_norm": 1.0610065460205078, |
|
"learning_rate": 4.57631003932447e-06, |
|
"loss": 0.4566, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.20592741470486484, |
|
"grad_norm": 2.5029940605163574, |
|
"learning_rate": 4.568927356251878e-06, |
|
"loss": 0.451, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.20757483402250373, |
|
"grad_norm": 1.3197004795074463, |
|
"learning_rate": 4.5614869699866e-06, |
|
"loss": 0.4583, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.20922225334014266, |
|
"grad_norm": 1.5407695770263672, |
|
"learning_rate": 4.553989088043919e-06, |
|
"loss": 0.4673, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.2108696726577816, |
|
"grad_norm": 1.6594492197036743, |
|
"learning_rate": 4.546433919542691e-06, |
|
"loss": 0.5023, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.2125170919754205, |
|
"grad_norm": 1.9056370258331299, |
|
"learning_rate": 4.538821675199521e-06, |
|
"loss": 0.5202, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.2141645112930594, |
|
"grad_norm": 3.2313265800476074, |
|
"learning_rate": 4.531152567322877e-06, |
|
"loss": 0.4649, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.21581193061069834, |
|
"grad_norm": 2.2487971782684326, |
|
"learning_rate": 4.5234268098071766e-06, |
|
"loss": 0.4611, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.21745934992833726, |
|
"grad_norm": 2.0419654846191406, |
|
"learning_rate": 4.515644618126816e-06, |
|
"loss": 0.4851, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.2191067692459762, |
|
"grad_norm": 1.4483575820922852, |
|
"learning_rate": 4.507806209330165e-06, |
|
"loss": 0.4789, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.2207541885636151, |
|
"grad_norm": 2.3362390995025635, |
|
"learning_rate": 4.499911802033508e-06, |
|
"loss": 0.4846, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.22240160788125402, |
|
"grad_norm": 2.0402286052703857, |
|
"learning_rate": 4.491961616414948e-06, |
|
"loss": 0.5099, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.22404902719889294, |
|
"grad_norm": 2.0675928592681885, |
|
"learning_rate": 4.483955874208273e-06, |
|
"loss": 0.4878, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.22569644651653184, |
|
"grad_norm": 1.6327743530273438, |
|
"learning_rate": 4.4758947986967614e-06, |
|
"loss": 0.4765, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.22734386583417077, |
|
"grad_norm": 2.0917341709136963, |
|
"learning_rate": 4.4677786147069595e-06, |
|
"loss": 0.4525, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.2289912851518097, |
|
"grad_norm": 1.5012590885162354, |
|
"learning_rate": 4.459607548602412e-06, |
|
"loss": 0.4699, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.23063870446944862, |
|
"grad_norm": 2.0980496406555176, |
|
"learning_rate": 4.451381828277346e-06, |
|
"loss": 0.5045, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.23228612378708752, |
|
"grad_norm": 1.8820241689682007, |
|
"learning_rate": 4.443101683150316e-06, |
|
"loss": 0.4918, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.23393354310472644, |
|
"grad_norm": 2.0610568523406982, |
|
"learning_rate": 4.434767344157808e-06, |
|
"loss": 0.4917, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.23558096242236537, |
|
"grad_norm": 2.2509660720825195, |
|
"learning_rate": 4.426379043747793e-06, |
|
"loss": 0.4933, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.2372283817400043, |
|
"grad_norm": 3.667386531829834, |
|
"learning_rate": 4.417937015873249e-06, |
|
"loss": 0.4784, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.2388758010576432, |
|
"grad_norm": 2.4788925647735596, |
|
"learning_rate": 4.409441495985632e-06, |
|
"loss": 0.4901, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.24052322037528212, |
|
"grad_norm": 1.6511657238006592, |
|
"learning_rate": 4.4008927210283144e-06, |
|
"loss": 0.4777, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.24217063969292105, |
|
"grad_norm": 1.7784366607666016, |
|
"learning_rate": 4.392290929429971e-06, |
|
"loss": 0.4863, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.24381805901055995, |
|
"grad_norm": 2.4235856533050537, |
|
"learning_rate": 4.383636361097931e-06, |
|
"loss": 0.4578, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.24546547832819887, |
|
"grad_norm": 1.6377619504928589, |
|
"learning_rate": 4.3749292574114886e-06, |
|
"loss": 0.4846, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.2471128976458378, |
|
"grad_norm": 1.5944766998291016, |
|
"learning_rate": 4.366169861215168e-06, |
|
"loss": 0.4744, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.24876031696347672, |
|
"grad_norm": 2.405319929122925, |
|
"learning_rate": 4.357358416811955e-06, |
|
"loss": 0.4685, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.2504077362811156, |
|
"grad_norm": 2.4015884399414062, |
|
"learning_rate": 4.348495169956477e-06, |
|
"loss": 0.4783, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.2520551555987546, |
|
"grad_norm": 2.325193166732788, |
|
"learning_rate": 4.339580367848153e-06, |
|
"loss": 0.4579, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.2537025749163935, |
|
"grad_norm": 1.8238539695739746, |
|
"learning_rate": 4.3306142591243e-06, |
|
"loss": 0.4697, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.2553499942340324, |
|
"grad_norm": 1.4284635782241821, |
|
"learning_rate": 4.321597093853194e-06, |
|
"loss": 0.452, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.2569974135516713, |
|
"grad_norm": 1.5146524906158447, |
|
"learning_rate": 4.3125291235271e-06, |
|
"loss": 0.4858, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.2586448328693102, |
|
"grad_norm": 2.1129367351531982, |
|
"learning_rate": 4.303410601055253e-06, |
|
"loss": 0.4986, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.2602922521869491, |
|
"grad_norm": 2.0981929302215576, |
|
"learning_rate": 4.29424178075681e-06, |
|
"loss": 0.4505, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.2619396715045881, |
|
"grad_norm": 1.3321784734725952, |
|
"learning_rate": 4.285022918353755e-06, |
|
"loss": 0.4983, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.263587090822227, |
|
"grad_norm": 2.7090718746185303, |
|
"learning_rate": 4.275754270963763e-06, |
|
"loss": 0.482, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2652345101398659, |
|
"grad_norm": 1.5834273099899292, |
|
"learning_rate": 4.26643609709303e-06, |
|
"loss": 0.5029, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.26688192945750483, |
|
"grad_norm": 2.400024175643921, |
|
"learning_rate": 4.257068656629071e-06, |
|
"loss": 0.4579, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.26852934877514373, |
|
"grad_norm": 1.9160480499267578, |
|
"learning_rate": 4.24765221083346e-06, |
|
"loss": 0.4892, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.2701767680927827, |
|
"grad_norm": 2.4766881465911865, |
|
"learning_rate": 4.238187022334553e-06, |
|
"loss": 0.4633, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.2718241874104216, |
|
"grad_norm": 2.2665488719940186, |
|
"learning_rate": 4.228673355120156e-06, |
|
"loss": 0.4682, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.2734716067280605, |
|
"grad_norm": 2.582789897918701, |
|
"learning_rate": 4.2191114745301654e-06, |
|
"loss": 0.4761, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.27511902604569943, |
|
"grad_norm": 2.240748882293701, |
|
"learning_rate": 4.20950164724917e-06, |
|
"loss": 0.4613, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.27676644536333833, |
|
"grad_norm": 2.4156808853149414, |
|
"learning_rate": 4.1998441412990085e-06, |
|
"loss": 0.4907, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.27841386468097723, |
|
"grad_norm": 2.348371744155884, |
|
"learning_rate": 4.190139226031297e-06, |
|
"loss": 0.4675, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.2800612839986162, |
|
"grad_norm": 1.7973005771636963, |
|
"learning_rate": 4.180387172119916e-06, |
|
"loss": 0.4738, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.2817087033162551, |
|
"grad_norm": 2.322040557861328, |
|
"learning_rate": 4.17058825155346e-06, |
|
"loss": 0.4644, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.283356122633894, |
|
"grad_norm": 2.3491313457489014, |
|
"learning_rate": 4.160742737627656e-06, |
|
"loss": 0.5077, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.28500354195153293, |
|
"grad_norm": 1.630631446838379, |
|
"learning_rate": 4.150850904937733e-06, |
|
"loss": 0.4797, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.28665096126917183, |
|
"grad_norm": 2.0471599102020264, |
|
"learning_rate": 4.140913029370774e-06, |
|
"loss": 0.461, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.2882983805868108, |
|
"grad_norm": 2.4391767978668213, |
|
"learning_rate": 4.130929388098011e-06, |
|
"loss": 0.4962, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.2899457999044497, |
|
"grad_norm": 2.0148985385894775, |
|
"learning_rate": 4.120900259567103e-06, |
|
"loss": 0.4634, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.2915932192220886, |
|
"grad_norm": 2.3383798599243164, |
|
"learning_rate": 4.110825923494365e-06, |
|
"loss": 0.4553, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.29324063853972754, |
|
"grad_norm": 1.539428949356079, |
|
"learning_rate": 4.100706660856968e-06, |
|
"loss": 0.4864, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.29488805785736644, |
|
"grad_norm": 1.8251954317092896, |
|
"learning_rate": 4.090542753885101e-06, |
|
"loss": 0.487, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.29653547717500534, |
|
"grad_norm": 2.269007921218872, |
|
"learning_rate": 4.080334486054104e-06, |
|
"loss": 0.4423, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.2981828964926443, |
|
"grad_norm": 2.4436540603637695, |
|
"learning_rate": 4.0700821420765566e-06, |
|
"loss": 0.4916, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.2998303158102832, |
|
"grad_norm": 2.570488929748535, |
|
"learning_rate": 4.05978600789434e-06, |
|
"loss": 0.4536, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.3014777351279221, |
|
"grad_norm": 2.247633934020996, |
|
"learning_rate": 4.049446370670661e-06, |
|
"loss": 0.4891, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.30312515444556104, |
|
"grad_norm": 1.7023581266403198, |
|
"learning_rate": 4.0390635187820435e-06, |
|
"loss": 0.4594, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.30477257376319994, |
|
"grad_norm": 2.0368921756744385, |
|
"learning_rate": 4.028637741810285e-06, |
|
"loss": 0.4191, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.3064199930808389, |
|
"grad_norm": 2.0896544456481934, |
|
"learning_rate": 4.018169330534381e-06, |
|
"loss": 0.4691, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.3080674123984778, |
|
"grad_norm": 2.5784189701080322, |
|
"learning_rate": 4.007658576922413e-06, |
|
"loss": 0.4442, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.3097148317161167, |
|
"grad_norm": 2.169424057006836, |
|
"learning_rate": 3.997105774123409e-06, |
|
"loss": 0.4552, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.31136225103375564, |
|
"grad_norm": 2.076741933822632, |
|
"learning_rate": 3.986511216459163e-06, |
|
"loss": 0.462, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.31300967035139454, |
|
"grad_norm": 2.33245849609375, |
|
"learning_rate": 3.97587519941603e-06, |
|
"loss": 0.5015, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.31465708966903344, |
|
"grad_norm": 2.465367555618286, |
|
"learning_rate": 3.965198019636684e-06, |
|
"loss": 0.4726, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.3163045089866724, |
|
"grad_norm": 2.0327184200286865, |
|
"learning_rate": 3.95447997491184e-06, |
|
"loss": 0.4602, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.3179519283043113, |
|
"grad_norm": 2.6782443523406982, |
|
"learning_rate": 3.943721364171957e-06, |
|
"loss": 0.4676, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.3195993476219502, |
|
"grad_norm": 2.373873233795166, |
|
"learning_rate": 3.932922487478894e-06, |
|
"loss": 0.4466, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.32124676693958915, |
|
"grad_norm": 2.5210931301116943, |
|
"learning_rate": 3.9220836460175415e-06, |
|
"loss": 0.4543, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.32289418625722804, |
|
"grad_norm": 2.384608268737793, |
|
"learning_rate": 3.911205142087425e-06, |
|
"loss": 0.4758, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.324541605574867, |
|
"grad_norm": 2.8322508335113525, |
|
"learning_rate": 3.900287279094274e-06, |
|
"loss": 0.4597, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.3261890248925059, |
|
"grad_norm": 3.4156792163848877, |
|
"learning_rate": 3.889330361541552e-06, |
|
"loss": 0.4552, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.3278364442101448, |
|
"grad_norm": 1.7643976211547852, |
|
"learning_rate": 3.878334695021973e-06, |
|
"loss": 0.4589, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.32948386352778375, |
|
"grad_norm": 1.7313556671142578, |
|
"learning_rate": 3.867300586208975e-06, |
|
"loss": 0.4444, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.33113128284542265, |
|
"grad_norm": 1.821792721748352, |
|
"learning_rate": 3.856228342848167e-06, |
|
"loss": 0.4945, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.33277870216306155, |
|
"grad_norm": 2.735888719558716, |
|
"learning_rate": 3.845118273748743e-06, |
|
"loss": 0.4431, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.3344261214807005, |
|
"grad_norm": 2.3234407901763916, |
|
"learning_rate": 3.833970688774872e-06, |
|
"loss": 0.4838, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.3360735407983394, |
|
"grad_norm": 1.709910273551941, |
|
"learning_rate": 3.822785898837058e-06, |
|
"loss": 0.4754, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.3377209601159783, |
|
"grad_norm": 2.435945987701416, |
|
"learning_rate": 3.811564215883463e-06, |
|
"loss": 0.4737, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.33936837943361725, |
|
"grad_norm": 1.9514074325561523, |
|
"learning_rate": 3.8003059528912123e-06, |
|
"loss": 0.4861, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.34101579875125615, |
|
"grad_norm": 2.4523439407348633, |
|
"learning_rate": 3.7890114238576616e-06, |
|
"loss": 0.4814, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.3426632180688951, |
|
"grad_norm": 2.690749406814575, |
|
"learning_rate": 3.777680943791639e-06, |
|
"loss": 0.4837, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.344310637386534, |
|
"grad_norm": 1.8186627626419067, |
|
"learning_rate": 3.7663148287046635e-06, |
|
"loss": 0.4384, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.3459580567041729, |
|
"grad_norm": 2.5133306980133057, |
|
"learning_rate": 3.754913395602129e-06, |
|
"loss": 0.4612, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.34760547602181185, |
|
"grad_norm": 1.9760069847106934, |
|
"learning_rate": 3.7434769624744586e-06, |
|
"loss": 0.4619, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.34925289533945075, |
|
"grad_norm": 2.461090326309204, |
|
"learning_rate": 3.732005848288245e-06, |
|
"loss": 0.4762, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.35090031465708965, |
|
"grad_norm": 1.82012939453125, |
|
"learning_rate": 3.7205003729773454e-06, |
|
"loss": 0.4309, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.3525477339747286, |
|
"grad_norm": 1.5199309587478638, |
|
"learning_rate": 3.708960857433964e-06, |
|
"loss": 0.4632, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.3541951532923675, |
|
"grad_norm": 1.8525145053863525, |
|
"learning_rate": 3.6973876234997004e-06, |
|
"loss": 0.4595, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.3558425726100064, |
|
"grad_norm": 1.7146118879318237, |
|
"learning_rate": 3.6857809939565724e-06, |
|
"loss": 0.4414, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.35748999192764536, |
|
"grad_norm": 2.75750994682312, |
|
"learning_rate": 3.6741412925180153e-06, |
|
"loss": 0.4624, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.35913741124528425, |
|
"grad_norm": 2.6996710300445557, |
|
"learning_rate": 3.6624688438198506e-06, |
|
"loss": 0.4888, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.36078483056292315, |
|
"grad_norm": 1.895980715751648, |
|
"learning_rate": 3.650763973411238e-06, |
|
"loss": 0.4395, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.3624322498805621, |
|
"grad_norm": 2.5552258491516113, |
|
"learning_rate": 3.639027007745585e-06, |
|
"loss": 0.465, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.364079669198201, |
|
"grad_norm": 1.6127821207046509, |
|
"learning_rate": 3.6272582741714547e-06, |
|
"loss": 0.4282, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.36572708851583996, |
|
"grad_norm": 2.0909807682037354, |
|
"learning_rate": 3.615458100923425e-06, |
|
"loss": 0.4713, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.36737450783347886, |
|
"grad_norm": 1.798374056816101, |
|
"learning_rate": 3.603626817112941e-06, |
|
"loss": 0.4784, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.36902192715111776, |
|
"grad_norm": 2.0519778728485107, |
|
"learning_rate": 3.5917647527191328e-06, |
|
"loss": 0.4782, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.3706693464687567, |
|
"grad_norm": 2.137410879135132, |
|
"learning_rate": 3.5798722385796137e-06, |
|
"loss": 0.4599, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.3723167657863956, |
|
"grad_norm": 2.040231943130493, |
|
"learning_rate": 3.5679496063812507e-06, |
|
"loss": 0.434, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.3739641851040345, |
|
"grad_norm": 2.0495615005493164, |
|
"learning_rate": 3.5559971886509163e-06, |
|
"loss": 0.473, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.37561160442167346, |
|
"grad_norm": 2.5767838954925537, |
|
"learning_rate": 3.5440153187462146e-06, |
|
"loss": 0.4522, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.37725902373931236, |
|
"grad_norm": 2.11317777633667, |
|
"learning_rate": 3.5320043308461784e-06, |
|
"loss": 0.4971, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.37890644305695126, |
|
"grad_norm": 2.7997255325317383, |
|
"learning_rate": 3.5199645599419574e-06, |
|
"loss": 0.4562, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.3805538623745902, |
|
"grad_norm": 2.3313941955566406, |
|
"learning_rate": 3.5078963418274666e-06, |
|
"loss": 0.4466, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.3822012816922291, |
|
"grad_norm": 1.4548770189285278, |
|
"learning_rate": 3.4958000130900273e-06, |
|
"loss": 0.4628, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.38384870100986807, |
|
"grad_norm": 1.5566315650939941, |
|
"learning_rate": 3.4836759111009767e-06, |
|
"loss": 0.47, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.38549612032750696, |
|
"grad_norm": 1.3899728059768677, |
|
"learning_rate": 3.4715243740062577e-06, |
|
"loss": 0.46, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.38714353964514586, |
|
"grad_norm": 2.3716745376586914, |
|
"learning_rate": 3.4593457407169896e-06, |
|
"loss": 0.4389, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.3887909589627848, |
|
"grad_norm": 2.0501861572265625, |
|
"learning_rate": 3.4471403509000166e-06, |
|
"loss": 0.4621, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.3904383782804237, |
|
"grad_norm": 2.131397008895874, |
|
"learning_rate": 3.4349085449684306e-06, |
|
"loss": 0.4643, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.3920857975980626, |
|
"grad_norm": 2.515228509902954, |
|
"learning_rate": 3.4226506640720804e-06, |
|
"loss": 0.4691, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.39373321691570157, |
|
"grad_norm": 1.9131451845169067, |
|
"learning_rate": 3.4103670500880564e-06, |
|
"loss": 0.4583, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.39538063623334047, |
|
"grad_norm": 2.1132075786590576, |
|
"learning_rate": 3.3980580456111528e-06, |
|
"loss": 0.4572, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.39702805555097936, |
|
"grad_norm": 2.0267536640167236, |
|
"learning_rate": 3.385723993944317e-06, |
|
"loss": 0.4605, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.3986754748686183, |
|
"grad_norm": 1.9140433073043823, |
|
"learning_rate": 3.3733652390890714e-06, |
|
"loss": 0.4634, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.4003228941862572, |
|
"grad_norm": 1.319580078125, |
|
"learning_rate": 3.3609821257359187e-06, |
|
"loss": 0.4607, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.40197031350389617, |
|
"grad_norm": 2.329153299331665, |
|
"learning_rate": 3.3485749992547312e-06, |
|
"loss": 0.4864, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.40361773282153507, |
|
"grad_norm": 1.709675669670105, |
|
"learning_rate": 3.336144205685117e-06, |
|
"loss": 0.4772, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.40526515213917397, |
|
"grad_norm": 1.869702696800232, |
|
"learning_rate": 3.3236900917267663e-06, |
|
"loss": 0.4691, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.4069125714568129, |
|
"grad_norm": 2.017636775970459, |
|
"learning_rate": 3.311213004729787e-06, |
|
"loss": 0.4568, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.4085599907744518, |
|
"grad_norm": 2.2239317893981934, |
|
"learning_rate": 3.2987132926850123e-06, |
|
"loss": 0.4976, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.4102074100920907, |
|
"grad_norm": 2.3074443340301514, |
|
"learning_rate": 3.286191304214296e-06, |
|
"loss": 0.4669, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.4118548294097297, |
|
"grad_norm": 1.9659165143966675, |
|
"learning_rate": 3.2736473885607932e-06, |
|
"loss": 0.4794, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.41350224872736857, |
|
"grad_norm": 2.3997573852539062, |
|
"learning_rate": 3.2610818955792135e-06, |
|
"loss": 0.4847, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.41514966804500747, |
|
"grad_norm": 2.5638508796691895, |
|
"learning_rate": 3.248495175726068e-06, |
|
"loss": 0.4452, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.4167970873626464, |
|
"grad_norm": 1.7153327465057373, |
|
"learning_rate": 3.235887580049893e-06, |
|
"loss": 0.4598, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.4184445066802853, |
|
"grad_norm": 2.540421485900879, |
|
"learning_rate": 3.223259460181461e-06, |
|
"loss": 0.4573, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.4200919259979243, |
|
"grad_norm": 2.420246124267578, |
|
"learning_rate": 3.2106111683239703e-06, |
|
"loss": 0.4593, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.4217393453155632, |
|
"grad_norm": 2.1598918437957764, |
|
"learning_rate": 3.1979430572432256e-06, |
|
"loss": 0.4343, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.4233867646332021, |
|
"grad_norm": 2.091474771499634, |
|
"learning_rate": 3.185255480257797e-06, |
|
"loss": 0.4423, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.425034183950841, |
|
"grad_norm": 3.1766490936279297, |
|
"learning_rate": 3.1725487912291654e-06, |
|
"loss": 0.4499, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.4266816032684799, |
|
"grad_norm": 1.8975087404251099, |
|
"learning_rate": 3.1598233445518544e-06, |
|
"loss": 0.4833, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.4283290225861188, |
|
"grad_norm": 2.459707498550415, |
|
"learning_rate": 3.1470794951435473e-06, |
|
"loss": 0.4563, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.4299764419037578, |
|
"grad_norm": 1.9212175607681274, |
|
"learning_rate": 3.1343175984351842e-06, |
|
"loss": 0.4451, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.4316238612213967, |
|
"grad_norm": 2.1869616508483887, |
|
"learning_rate": 3.121538010361054e-06, |
|
"loss": 0.4438, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.4332712805390356, |
|
"grad_norm": 2.3515875339508057, |
|
"learning_rate": 3.108741087348862e-06, |
|
"loss": 0.4433, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.43491869985667453, |
|
"grad_norm": 2.7230703830718994, |
|
"learning_rate": 3.095927186309795e-06, |
|
"loss": 0.452, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.4365661191743134, |
|
"grad_norm": 1.987182855606079, |
|
"learning_rate": 3.08309666462856e-06, |
|
"loss": 0.4508, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.4382135384919524, |
|
"grad_norm": 1.8598235845565796, |
|
"learning_rate": 3.0702498801534234e-06, |
|
"loss": 0.4502, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.4398609578095913, |
|
"grad_norm": 1.3509740829467773, |
|
"learning_rate": 3.0573871911862252e-06, |
|
"loss": 0.4618, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.4415083771272302, |
|
"grad_norm": 2.3464887142181396, |
|
"learning_rate": 3.044508956472388e-06, |
|
"loss": 0.4687, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.44315579644486913, |
|
"grad_norm": 2.453792095184326, |
|
"learning_rate": 3.0316155351909136e-06, |
|
"loss": 0.4581, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.44480321576250803, |
|
"grad_norm": 1.8684953451156616, |
|
"learning_rate": 3.0187072869443595e-06, |
|
"loss": 0.4775, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.44645063508014693, |
|
"grad_norm": 2.501569986343384, |
|
"learning_rate": 3.005784571748816e-06, |
|
"loss": 0.4721, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.4480980543977859, |
|
"grad_norm": 2.526435613632202, |
|
"learning_rate": 2.992847750023861e-06, |
|
"loss": 0.4327, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.4497454737154248, |
|
"grad_norm": 2.1223368644714355, |
|
"learning_rate": 2.9798971825825107e-06, |
|
"loss": 0.4494, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.4513928930330637, |
|
"grad_norm": 3.0751936435699463, |
|
"learning_rate": 2.9669332306211513e-06, |
|
"loss": 0.4513, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.45304031235070263, |
|
"grad_norm": 1.7349650859832764, |
|
"learning_rate": 2.95395625570947e-06, |
|
"loss": 0.4516, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.45468773166834153, |
|
"grad_norm": 1.474882960319519, |
|
"learning_rate": 2.9409666197803715e-06, |
|
"loss": 0.4269, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.4563351509859805, |
|
"grad_norm": 1.845004916191101, |
|
"learning_rate": 2.9279646851198796e-06, |
|
"loss": 0.4598, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.4579825703036194, |
|
"grad_norm": 1.4891762733459473, |
|
"learning_rate": 2.9149508143570317e-06, |
|
"loss": 0.4383, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.4596299896212583, |
|
"grad_norm": 2.5375092029571533, |
|
"learning_rate": 2.9019253704537725e-06, |
|
"loss": 0.4903, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.46127740893889724, |
|
"grad_norm": 2.7068655490875244, |
|
"learning_rate": 2.888888716694824e-06, |
|
"loss": 0.4673, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.46292482825653614, |
|
"grad_norm": 1.9553802013397217, |
|
"learning_rate": 2.8758412166775536e-06, |
|
"loss": 0.4722, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.46457224757417503, |
|
"grad_norm": 2.417858362197876, |
|
"learning_rate": 2.8627832343018392e-06, |
|
"loss": 0.4778, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.466219666891814, |
|
"grad_norm": 2.021970748901367, |
|
"learning_rate": 2.849715133759912e-06, |
|
"loss": 0.438, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.4678670862094529, |
|
"grad_norm": 1.203245997428894, |
|
"learning_rate": 2.8366372795262043e-06, |
|
"loss": 0.448, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.4695145055270918, |
|
"grad_norm": 1.60651433467865, |
|
"learning_rate": 2.8235500363471835e-06, |
|
"loss": 0.4667, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.47116192484473074, |
|
"grad_norm": 2.5438413619995117, |
|
"learning_rate": 2.8104537692311772e-06, |
|
"loss": 0.4411, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.47280934416236964, |
|
"grad_norm": 1.9837552309036255, |
|
"learning_rate": 2.7973488434381936e-06, |
|
"loss": 0.4772, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.4744567634800086, |
|
"grad_norm": 4.9808573722839355, |
|
"learning_rate": 2.7842356244697365e-06, |
|
"loss": 0.4585, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.4761041827976475, |
|
"grad_norm": 2.3967010974884033, |
|
"learning_rate": 2.771114478058609e-06, |
|
"loss": 0.4434, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.4777516021152864, |
|
"grad_norm": 2.0720436573028564, |
|
"learning_rate": 2.757985770158712e-06, |
|
"loss": 0.4553, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.47939902143292534, |
|
"grad_norm": 2.0397377014160156, |
|
"learning_rate": 2.744849866934843e-06, |
|
"loss": 0.4335, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.48104644075056424, |
|
"grad_norm": 1.8307183980941772, |
|
"learning_rate": 2.7317071347524756e-06, |
|
"loss": 0.4575, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.48269386006820314, |
|
"grad_norm": 2.0401103496551514, |
|
"learning_rate": 2.7185579401675478e-06, |
|
"loss": 0.4536, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.4843412793858421, |
|
"grad_norm": 1.5589044094085693, |
|
"learning_rate": 2.705402649916238e-06, |
|
"loss": 0.4464, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.485988698703481, |
|
"grad_norm": 1.7465211153030396, |
|
"learning_rate": 2.692241630904732e-06, |
|
"loss": 0.443, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.4876361180211199, |
|
"grad_norm": 1.9152140617370605, |
|
"learning_rate": 2.679075250198995e-06, |
|
"loss": 0.4453, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.48928353733875884, |
|
"grad_norm": 1.9584287405014038, |
|
"learning_rate": 2.665903875014531e-06, |
|
"loss": 0.4412, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.49093095665639774, |
|
"grad_norm": 2.4530208110809326, |
|
"learning_rate": 2.6527278727061438e-06, |
|
"loss": 0.455, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.4925783759740367, |
|
"grad_norm": 2.28879451751709, |
|
"learning_rate": 2.6395476107576866e-06, |
|
"loss": 0.4545, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.4942257952916756, |
|
"grad_norm": 2.3238701820373535, |
|
"learning_rate": 2.626363456771818e-06, |
|
"loss": 0.4659, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.4958732146093145, |
|
"grad_norm": 2.5362935066223145, |
|
"learning_rate": 2.613175778459746e-06, |
|
"loss": 0.475, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.49752063392695345, |
|
"grad_norm": 1.6304713487625122, |
|
"learning_rate": 2.599984943630974e-06, |
|
"loss": 0.4344, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.49916805324459235, |
|
"grad_norm": 2.1046688556671143, |
|
"learning_rate": 2.5867913201830415e-06, |
|
"loss": 0.442, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.5008154725622312, |
|
"grad_norm": 2.016679048538208, |
|
"learning_rate": 2.5735952760912623e-06, |
|
"loss": 0.4468, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.5024628918798701, |
|
"grad_norm": 1.700775384902954, |
|
"learning_rate": 2.560397179398467e-06, |
|
"loss": 0.4755, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.5041103111975092, |
|
"grad_norm": 2.6758084297180176, |
|
"learning_rate": 2.5471973982047283e-06, |
|
"loss": 0.4734, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.505757730515148, |
|
"grad_norm": 2.0318357944488525, |
|
"learning_rate": 2.533996300657105e-06, |
|
"loss": 0.4257, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.507405149832787, |
|
"grad_norm": 1.755279779434204, |
|
"learning_rate": 2.5207942549393678e-06, |
|
"loss": 0.4311, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.5090525691504258, |
|
"grad_norm": 1.3220248222351074, |
|
"learning_rate": 2.507591629261732e-06, |
|
"loss": 0.4586, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.5106999884680647, |
|
"grad_norm": 1.8418200016021729, |
|
"learning_rate": 2.4943887918505887e-06, |
|
"loss": 0.4856, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.5123474077857036, |
|
"grad_norm": 2.0014216899871826, |
|
"learning_rate": 2.4811861109382337e-06, |
|
"loss": 0.4691, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.5139948271033427, |
|
"grad_norm": 2.2227587699890137, |
|
"learning_rate": 2.4679839547526e-06, |
|
"loss": 0.4465, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.5156422464209816, |
|
"grad_norm": 2.022191047668457, |
|
"learning_rate": 2.4547826915069816e-06, |
|
"loss": 0.4344, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.5172896657386205, |
|
"grad_norm": 1.4360835552215576, |
|
"learning_rate": 2.441582689389772e-06, |
|
"loss": 0.446, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.5189370850562594, |
|
"grad_norm": 2.100766658782959, |
|
"learning_rate": 2.4283843165541914e-06, |
|
"loss": 0.4457, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.5205845043738982, |
|
"grad_norm": 1.6528244018554688, |
|
"learning_rate": 2.4151879411080144e-06, |
|
"loss": 0.4477, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.5222319236915373, |
|
"grad_norm": 2.4091269969940186, |
|
"learning_rate": 2.401993931103312e-06, |
|
"loss": 0.4764, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.5238793430091762, |
|
"grad_norm": 2.416269302368164, |
|
"learning_rate": 2.388802654526182e-06, |
|
"loss": 0.4572, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.5255267623268151, |
|
"grad_norm": 1.747132420539856, |
|
"learning_rate": 2.3756144792864812e-06, |
|
"loss": 0.4439, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.527174181644454, |
|
"grad_norm": 1.7760906219482422, |
|
"learning_rate": 2.3624297732075747e-06, |
|
"loss": 0.4589, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.5288216009620929, |
|
"grad_norm": 1.9603146314620972, |
|
"learning_rate": 2.349248904016069e-06, |
|
"loss": 0.4464, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.5304690202797318, |
|
"grad_norm": 2.7575228214263916, |
|
"learning_rate": 2.336072239331555e-06, |
|
"loss": 0.425, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.5321164395973708, |
|
"grad_norm": 3.160569190979004, |
|
"learning_rate": 2.3229001466563647e-06, |
|
"loss": 0.4493, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.5337638589150097, |
|
"grad_norm": 1.3065659999847412, |
|
"learning_rate": 2.3097329933653116e-06, |
|
"loss": 0.4134, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.5354112782326486, |
|
"grad_norm": 1.933773159980774, |
|
"learning_rate": 2.2965711466954444e-06, |
|
"loss": 0.4465, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.5370586975502875, |
|
"grad_norm": 1.7939263582229614, |
|
"learning_rate": 2.283414973735816e-06, |
|
"loss": 0.4577, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.5387061168679264, |
|
"grad_norm": 2.202970027923584, |
|
"learning_rate": 2.270264841417229e-06, |
|
"loss": 0.4506, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.5403535361855654, |
|
"grad_norm": 1.2232089042663574, |
|
"learning_rate": 2.2571211165020164e-06, |
|
"loss": 0.4412, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.5420009555032043, |
|
"grad_norm": 2.2651045322418213, |
|
"learning_rate": 2.243984165573804e-06, |
|
"loss": 0.4838, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.5436483748208432, |
|
"grad_norm": 1.1817712783813477, |
|
"learning_rate": 2.2308543550272853e-06, |
|
"loss": 0.4426, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.5452957941384821, |
|
"grad_norm": 3.3513026237487793, |
|
"learning_rate": 2.2177320510580115e-06, |
|
"loss": 0.4432, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.546943213456121, |
|
"grad_norm": 2.345806837081909, |
|
"learning_rate": 2.2046176196521706e-06, |
|
"loss": 0.4591, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.5485906327737599, |
|
"grad_norm": 2.1807124614715576, |
|
"learning_rate": 2.191511426576377e-06, |
|
"loss": 0.4589, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.5502380520913989, |
|
"grad_norm": 2.6100516319274902, |
|
"learning_rate": 2.1784138373674817e-06, |
|
"loss": 0.4644, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.5518854714090378, |
|
"grad_norm": 1.3514959812164307, |
|
"learning_rate": 2.165325217322367e-06, |
|
"loss": 0.4123, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.5535328907266767, |
|
"grad_norm": 2.2316343784332275, |
|
"learning_rate": 2.1522459314877603e-06, |
|
"loss": 0.4329, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.5551803100443156, |
|
"grad_norm": 1.948644757270813, |
|
"learning_rate": 2.1391763446500583e-06, |
|
"loss": 0.4485, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.5568277293619545, |
|
"grad_norm": 2.1561203002929688, |
|
"learning_rate": 2.1261168213251465e-06, |
|
"loss": 0.4557, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.5584751486795935, |
|
"grad_norm": 2.097280263900757, |
|
"learning_rate": 2.1130677257482328e-06, |
|
"loss": 0.4535, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.5601225679972324, |
|
"grad_norm": 2.417245388031006, |
|
"learning_rate": 2.1000294218636963e-06, |
|
"loss": 0.4758, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.5617699873148713, |
|
"grad_norm": 1.9167017936706543, |
|
"learning_rate": 2.0870022733149287e-06, |
|
"loss": 0.4742, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.5634174066325102, |
|
"grad_norm": 1.482334017753601, |
|
"learning_rate": 2.073986643434193e-06, |
|
"loss": 0.4287, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.5650648259501491, |
|
"grad_norm": 1.6773154735565186, |
|
"learning_rate": 2.0609828952324954e-06, |
|
"loss": 0.4211, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.566712245267788, |
|
"grad_norm": 1.883154273033142, |
|
"learning_rate": 2.047991391389458e-06, |
|
"loss": 0.453, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.568359664585427, |
|
"grad_norm": 2.0675201416015625, |
|
"learning_rate": 2.035012494243198e-06, |
|
"loss": 0.4762, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.5700070839030659, |
|
"grad_norm": 2.362501382827759, |
|
"learning_rate": 2.0220465657802322e-06, |
|
"loss": 0.4566, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.5716545032207048, |
|
"grad_norm": 1.8373854160308838, |
|
"learning_rate": 2.0090939676253744e-06, |
|
"loss": 0.442, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.5733019225383437, |
|
"grad_norm": 1.8830519914627075, |
|
"learning_rate": 1.9961550610316477e-06, |
|
"loss": 0.4521, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.5749493418559826, |
|
"grad_norm": 1.484971523284912, |
|
"learning_rate": 1.9832302068702162e-06, |
|
"loss": 0.4795, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.5765967611736216, |
|
"grad_norm": 1.619246482849121, |
|
"learning_rate": 1.9703197656203153e-06, |
|
"loss": 0.4525, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.5782441804912605, |
|
"grad_norm": 1.589003562927246, |
|
"learning_rate": 1.9574240973591955e-06, |
|
"loss": 0.4346, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.5798915998088994, |
|
"grad_norm": 1.2750858068466187, |
|
"learning_rate": 1.944543561752088e-06, |
|
"loss": 0.4595, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.5815390191265383, |
|
"grad_norm": 2.5024302005767822, |
|
"learning_rate": 1.931678518042165e-06, |
|
"loss": 0.4469, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.5831864384441772, |
|
"grad_norm": 2.244246244430542, |
|
"learning_rate": 1.918829325040523e-06, |
|
"loss": 0.4475, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.5848338577618161, |
|
"grad_norm": 1.7237255573272705, |
|
"learning_rate": 1.9059963411161788e-06, |
|
"loss": 0.4578, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.5864812770794551, |
|
"grad_norm": 1.9429930448532104, |
|
"learning_rate": 1.8931799241860704e-06, |
|
"loss": 0.4776, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.588128696397094, |
|
"grad_norm": 2.0698490142822266, |
|
"learning_rate": 1.880380431705075e-06, |
|
"loss": 0.4422, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.5897761157147329, |
|
"grad_norm": 1.440127968788147, |
|
"learning_rate": 1.8675982206560417e-06, |
|
"loss": 0.4528, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.5914235350323718, |
|
"grad_norm": 2.600696563720703, |
|
"learning_rate": 1.854833647539833e-06, |
|
"loss": 0.4167, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.5930709543500107, |
|
"grad_norm": 2.2462635040283203, |
|
"learning_rate": 1.8420870683653819e-06, |
|
"loss": 0.4461, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.5947183736676497, |
|
"grad_norm": 2.301934003829956, |
|
"learning_rate": 1.8293588386397646e-06, |
|
"loss": 0.4609, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.5963657929852886, |
|
"grad_norm": 1.231947422027588, |
|
"learning_rate": 1.816649313358284e-06, |
|
"loss": 0.4617, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.5980132123029275, |
|
"grad_norm": 1.6088837385177612, |
|
"learning_rate": 1.8039588469945675e-06, |
|
"loss": 0.4298, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.5996606316205664, |
|
"grad_norm": 2.1999731063842773, |
|
"learning_rate": 1.791287793490682e-06, |
|
"loss": 0.4576, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.6013080509382053, |
|
"grad_norm": 1.9624534845352173, |
|
"learning_rate": 1.7786365062472645e-06, |
|
"loss": 0.4416, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.6029554702558442, |
|
"grad_norm": 2.441080093383789, |
|
"learning_rate": 1.7660053381136593e-06, |
|
"loss": 0.4613, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.6046028895734832, |
|
"grad_norm": 1.7500004768371582, |
|
"learning_rate": 1.7533946413780845e-06, |
|
"loss": 0.4493, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.6062503088911221, |
|
"grad_norm": 1.9511518478393555, |
|
"learning_rate": 1.7408047677578016e-06, |
|
"loss": 0.4487, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.607897728208761, |
|
"grad_norm": 2.2485551834106445, |
|
"learning_rate": 1.7282360683893057e-06, |
|
"loss": 0.4515, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.6095451475263999, |
|
"grad_norm": 2.1224875450134277, |
|
"learning_rate": 1.7156888938185373e-06, |
|
"loss": 0.4384, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.6111925668440388, |
|
"grad_norm": 2.6325182914733887, |
|
"learning_rate": 1.7031635939910968e-06, |
|
"loss": 0.4625, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.6128399861616778, |
|
"grad_norm": 1.8848086595535278, |
|
"learning_rate": 1.6906605182424942e-06, |
|
"loss": 0.4627, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.6144874054793167, |
|
"grad_norm": 1.8694807291030884, |
|
"learning_rate": 1.6781800152884004e-06, |
|
"loss": 0.4572, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.6161348247969556, |
|
"grad_norm": 1.9170241355895996, |
|
"learning_rate": 1.6657224332149185e-06, |
|
"loss": 0.4646, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.6177822441145945, |
|
"grad_norm": 2.1769967079162598, |
|
"learning_rate": 1.6532881194688843e-06, |
|
"loss": 0.4584, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.6194296634322334, |
|
"grad_norm": 2.1281752586364746, |
|
"learning_rate": 1.640877420848169e-06, |
|
"loss": 0.4588, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.6210770827498723, |
|
"grad_norm": 3.2545199394226074, |
|
"learning_rate": 1.6284906834920056e-06, |
|
"loss": 0.4494, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.6227245020675113, |
|
"grad_norm": 2.595705032348633, |
|
"learning_rate": 1.6161282528713429e-06, |
|
"loss": 0.4702, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.6243719213851502, |
|
"grad_norm": 2.0563864707946777, |
|
"learning_rate": 1.6037904737792037e-06, |
|
"loss": 0.4374, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.6260193407027891, |
|
"grad_norm": 2.5470025539398193, |
|
"learning_rate": 1.5914776903210675e-06, |
|
"loss": 0.4467, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.627666760020428, |
|
"grad_norm": 2.6239607334136963, |
|
"learning_rate": 1.5791902459052793e-06, |
|
"loss": 0.4156, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.6293141793380669, |
|
"grad_norm": 1.202338457107544, |
|
"learning_rate": 1.5669284832334671e-06, |
|
"loss": 0.4163, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.6309615986557059, |
|
"grad_norm": 2.398700714111328, |
|
"learning_rate": 1.554692744290984e-06, |
|
"loss": 0.4515, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.6326090179733448, |
|
"grad_norm": 2.2210938930511475, |
|
"learning_rate": 1.542483370337372e-06, |
|
"loss": 0.4704, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.6342564372909837, |
|
"grad_norm": 1.1223909854888916, |
|
"learning_rate": 1.530300701896844e-06, |
|
"loss": 0.4231, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.6359038566086226, |
|
"grad_norm": 2.2360265254974365, |
|
"learning_rate": 1.5181450787487839e-06, |
|
"loss": 0.4339, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.6375512759262615, |
|
"grad_norm": 1.6431453227996826, |
|
"learning_rate": 1.5060168399182731e-06, |
|
"loss": 0.4341, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.6391986952439004, |
|
"grad_norm": 1.9951646327972412, |
|
"learning_rate": 1.4939163236666338e-06, |
|
"loss": 0.4744, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.6408461145615394, |
|
"grad_norm": 3.3914270401000977, |
|
"learning_rate": 1.4818438674819934e-06, |
|
"loss": 0.4595, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.6424935338791783, |
|
"grad_norm": 2.1617212295532227, |
|
"learning_rate": 1.4697998080698745e-06, |
|
"loss": 0.4465, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.6441409531968172, |
|
"grad_norm": 2.4593045711517334, |
|
"learning_rate": 1.4577844813438022e-06, |
|
"loss": 0.4695, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.6457883725144561, |
|
"grad_norm": 2.2030935287475586, |
|
"learning_rate": 1.4457982224159346e-06, |
|
"loss": 0.4449, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.647435791832095, |
|
"grad_norm": 1.3730400800704956, |
|
"learning_rate": 1.433841365587719e-06, |
|
"loss": 0.4382, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.649083211149734, |
|
"grad_norm": 3.4730331897735596, |
|
"learning_rate": 1.421914244340567e-06, |
|
"loss": 0.4469, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.6507306304673729, |
|
"grad_norm": 1.946877360343933, |
|
"learning_rate": 1.410017191326551e-06, |
|
"loss": 0.4685, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.6523780497850118, |
|
"grad_norm": 1.6987239122390747, |
|
"learning_rate": 1.39815053835913e-06, |
|
"loss": 0.4469, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.6540254691026507, |
|
"grad_norm": 1.93442964553833, |
|
"learning_rate": 1.3863146164038946e-06, |
|
"loss": 0.4523, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.6556728884202896, |
|
"grad_norm": 2.016063690185547, |
|
"learning_rate": 1.3745097555693343e-06, |
|
"loss": 0.4079, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.6573203077379285, |
|
"grad_norm": 1.9582340717315674, |
|
"learning_rate": 1.3627362850976323e-06, |
|
"loss": 0.4524, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.6589677270555675, |
|
"grad_norm": 1.6741374731063843, |
|
"learning_rate": 1.3509945333554828e-06, |
|
"loss": 0.4346, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.6606151463732064, |
|
"grad_norm": 2.514186382293701, |
|
"learning_rate": 1.3392848278249298e-06, |
|
"loss": 0.4761, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.6622625656908453, |
|
"grad_norm": 2.4352760314941406, |
|
"learning_rate": 1.3276074950942381e-06, |
|
"loss": 0.4182, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.6639099850084842, |
|
"grad_norm": 1.9086421728134155, |
|
"learning_rate": 1.3159628608487848e-06, |
|
"loss": 0.4431, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.6655574043261231, |
|
"grad_norm": 1.9062386751174927, |
|
"learning_rate": 1.3043512498619677e-06, |
|
"loss": 0.4494, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.6672048236437621, |
|
"grad_norm": 2.4138245582580566, |
|
"learning_rate": 1.2927729859861571e-06, |
|
"loss": 0.4493, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.668852242961401, |
|
"grad_norm": 2.2896976470947266, |
|
"learning_rate": 1.2812283921436597e-06, |
|
"loss": 0.4383, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.6704996622790399, |
|
"grad_norm": 2.136972427368164, |
|
"learning_rate": 1.2697177903177077e-06, |
|
"loss": 0.4233, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.6721470815966788, |
|
"grad_norm": 1.7220128774642944, |
|
"learning_rate": 1.2582415015434857e-06, |
|
"loss": 0.4331, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.6737945009143177, |
|
"grad_norm": 2.0941953659057617, |
|
"learning_rate": 1.2467998458991768e-06, |
|
"loss": 0.482, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.6754419202319566, |
|
"grad_norm": 2.6354613304138184, |
|
"learning_rate": 1.2353931424970258e-06, |
|
"loss": 0.4487, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.6770893395495956, |
|
"grad_norm": 2.2864413261413574, |
|
"learning_rate": 1.224021709474451e-06, |
|
"loss": 0.4668, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.6787367588672345, |
|
"grad_norm": 1.8881123065948486, |
|
"learning_rate": 1.2126858639851649e-06, |
|
"loss": 0.4572, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.6803841781848734, |
|
"grad_norm": 2.1788628101348877, |
|
"learning_rate": 1.2013859221903273e-06, |
|
"loss": 0.4589, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.6820315975025123, |
|
"grad_norm": 2.4340453147888184, |
|
"learning_rate": 1.190122199249733e-06, |
|
"loss": 0.4363, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.6836790168201512, |
|
"grad_norm": 2.3238346576690674, |
|
"learning_rate": 1.1788950093130177e-06, |
|
"loss": 0.4187, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.6853264361377902, |
|
"grad_norm": 2.4663116931915283, |
|
"learning_rate": 1.1677046655108974e-06, |
|
"loss": 0.4542, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.6869738554554291, |
|
"grad_norm": 1.5595173835754395, |
|
"learning_rate": 1.1565514799464354e-06, |
|
"loss": 0.4612, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.688621274773068, |
|
"grad_norm": 2.0184364318847656, |
|
"learning_rate": 1.145435763686335e-06, |
|
"loss": 0.4535, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.6902686940907069, |
|
"grad_norm": 3.0829389095306396, |
|
"learning_rate": 1.134357826752269e-06, |
|
"loss": 0.4307, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.6919161134083458, |
|
"grad_norm": 2.8656702041625977, |
|
"learning_rate": 1.1233179781122286e-06, |
|
"loss": 0.4511, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.6935635327259847, |
|
"grad_norm": 2.2438855171203613, |
|
"learning_rate": 1.1123165256719077e-06, |
|
"loss": 0.4358, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.6952109520436237, |
|
"grad_norm": 2.6837387084960938, |
|
"learning_rate": 1.1013537762661147e-06, |
|
"loss": 0.4702, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.6968583713612626, |
|
"grad_norm": 2.0240025520324707, |
|
"learning_rate": 1.0904300356502174e-06, |
|
"loss": 0.4211, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.6985057906789015, |
|
"grad_norm": 2.1769285202026367, |
|
"learning_rate": 1.0795456084916095e-06, |
|
"loss": 0.4635, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.7001532099965404, |
|
"grad_norm": 1.203687310218811, |
|
"learning_rate": 1.0687007983612189e-06, |
|
"loss": 0.4241, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.7018006293141793, |
|
"grad_norm": 2.5927300453186035, |
|
"learning_rate": 1.0578959077250417e-06, |
|
"loss": 0.4603, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.7034480486318182, |
|
"grad_norm": 1.3485939502716064, |
|
"learning_rate": 1.0471312379356991e-06, |
|
"loss": 0.4563, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.7050954679494572, |
|
"grad_norm": 1.8091089725494385, |
|
"learning_rate": 1.03640708922404e-06, |
|
"loss": 0.4303, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.7067428872670961, |
|
"grad_norm": 2.243220090866089, |
|
"learning_rate": 1.0257237606907647e-06, |
|
"loss": 0.4484, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.708390306584735, |
|
"grad_norm": 1.7703299522399902, |
|
"learning_rate": 1.0150815502980804e-06, |
|
"loss": 0.4459, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.7100377259023739, |
|
"grad_norm": 1.7074419260025024, |
|
"learning_rate": 1.0044807548613947e-06, |
|
"loss": 0.3932, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.7116851452200128, |
|
"grad_norm": 2.930617332458496, |
|
"learning_rate": 9.939216700410387e-07, |
|
"loss": 0.4411, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.7133325645376518, |
|
"grad_norm": 1.8758985996246338, |
|
"learning_rate": 9.834045903340127e-07, |
|
"loss": 0.434, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.7149799838552907, |
|
"grad_norm": 2.038867712020874, |
|
"learning_rate": 9.729298090657821e-07, |
|
"loss": 0.4666, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.7166274031729296, |
|
"grad_norm": 2.4463798999786377, |
|
"learning_rate": 9.624976183820914e-07, |
|
"loss": 0.4492, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.7182748224905685, |
|
"grad_norm": 0.9264168739318848, |
|
"learning_rate": 9.521083092408148e-07, |
|
"loss": 0.4308, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.7199222418082074, |
|
"grad_norm": 1.8402535915374756, |
|
"learning_rate": 9.417621714038455e-07, |
|
"loss": 0.4375, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.7215696611258463, |
|
"grad_norm": 2.28937029838562, |
|
"learning_rate": 9.314594934290147e-07, |
|
"loss": 0.4451, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.7232170804434853, |
|
"grad_norm": 2.710644245147705, |
|
"learning_rate": 9.212005626620354e-07, |
|
"loss": 0.4923, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.7248644997611242, |
|
"grad_norm": 1.6825114488601685, |
|
"learning_rate": 9.109856652284979e-07, |
|
"loss": 0.4281, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.7265119190787631, |
|
"grad_norm": 1.5312185287475586, |
|
"learning_rate": 9.008150860258852e-07, |
|
"loss": 0.4252, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.728159338396402, |
|
"grad_norm": 1.606581449508667, |
|
"learning_rate": 8.90689108715625e-07, |
|
"loss": 0.4449, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.7298067577140409, |
|
"grad_norm": 2.8217248916625977, |
|
"learning_rate": 8.806080157151828e-07, |
|
"loss": 0.4399, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.7314541770316799, |
|
"grad_norm": 2.25714373588562, |
|
"learning_rate": 8.705720881901855e-07, |
|
"loss": 0.435, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.7331015963493188, |
|
"grad_norm": 2.2999300956726074, |
|
"learning_rate": 8.605816060465725e-07, |
|
"loss": 0.4481, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.7347490156669577, |
|
"grad_norm": 2.1442625522613525, |
|
"learning_rate": 8.506368479227958e-07, |
|
"loss": 0.4396, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.7363964349845966, |
|
"grad_norm": 2.097804307937622, |
|
"learning_rate": 8.407380911820487e-07, |
|
"loss": 0.4486, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.7380438543022355, |
|
"grad_norm": 2.046945333480835, |
|
"learning_rate": 8.308856119045239e-07, |
|
"loss": 0.4639, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.7396912736198744, |
|
"grad_norm": 1.8260259628295898, |
|
"learning_rate": 8.210796848797193e-07, |
|
"loss": 0.4433, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.7413386929375134, |
|
"grad_norm": 2.123908281326294, |
|
"learning_rate": 8.113205835987756e-07, |
|
"loss": 0.4183, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.7429861122551523, |
|
"grad_norm": 2.8095531463623047, |
|
"learning_rate": 8.016085802468399e-07, |
|
"loss": 0.4357, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.7446335315727912, |
|
"grad_norm": 3.761507511138916, |
|
"learning_rate": 7.919439456954822e-07, |
|
"loss": 0.4282, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.7462809508904301, |
|
"grad_norm": 1.9820051193237305, |
|
"learning_rate": 7.823269494951394e-07, |
|
"loss": 0.4714, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.747928370208069, |
|
"grad_norm": 1.8739370107650757, |
|
"learning_rate": 7.727578598675917e-07, |
|
"loss": 0.4312, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.749575789525708, |
|
"grad_norm": 2.4350790977478027, |
|
"learning_rate": 7.632369436984921e-07, |
|
"loss": 0.4308, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.7512232088433469, |
|
"grad_norm": 2.3461410999298096, |
|
"learning_rate": 7.53764466529914e-07, |
|
"loss": 0.4495, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.7528706281609858, |
|
"grad_norm": 2.332594633102417, |
|
"learning_rate": 7.443406925529467e-07, |
|
"loss": 0.4271, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.7545180474786247, |
|
"grad_norm": 2.7010247707366943, |
|
"learning_rate": 7.349658846003318e-07, |
|
"loss": 0.4581, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.7561654667962636, |
|
"grad_norm": 2.0763182640075684, |
|
"learning_rate": 7.256403041391258e-07, |
|
"loss": 0.4599, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.7578128861139025, |
|
"grad_norm": 1.678594708442688, |
|
"learning_rate": 7.163642112634134e-07, |
|
"loss": 0.4614, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.7594603054315415, |
|
"grad_norm": 1.6114099025726318, |
|
"learning_rate": 7.071378646870525e-07, |
|
"loss": 0.4352, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.7611077247491804, |
|
"grad_norm": 2.531679391860962, |
|
"learning_rate": 6.979615217364539e-07, |
|
"loss": 0.452, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.7627551440668193, |
|
"grad_norm": 1.2857202291488647, |
|
"learning_rate": 6.888354383434098e-07, |
|
"loss": 0.4425, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.7644025633844582, |
|
"grad_norm": 1.769644021987915, |
|
"learning_rate": 6.797598690379542e-07, |
|
"loss": 0.4325, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.7660499827020971, |
|
"grad_norm": 1.5384021997451782, |
|
"learning_rate": 6.707350669412613e-07, |
|
"loss": 0.4739, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.7676974020197361, |
|
"grad_norm": 2.200972318649292, |
|
"learning_rate": 6.617612837585887e-07, |
|
"loss": 0.4702, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.769344821337375, |
|
"grad_norm": 2.062885046005249, |
|
"learning_rate": 6.528387697722599e-07, |
|
"loss": 0.4703, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.7709922406550139, |
|
"grad_norm": 1.4489109516143799, |
|
"learning_rate": 6.439677738346752e-07, |
|
"loss": 0.4403, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.7726396599726528, |
|
"grad_norm": 3.070599317550659, |
|
"learning_rate": 6.351485433613799e-07, |
|
"loss": 0.4353, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.7742870792902917, |
|
"grad_norm": 2.201493978500366, |
|
"learning_rate": 6.263813243241593e-07, |
|
"loss": 0.4201, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.7759344986079306, |
|
"grad_norm": 2.203810930252075, |
|
"learning_rate": 6.176663612441785e-07, |
|
"loss": 0.4681, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.7775819179255696, |
|
"grad_norm": 2.4481027126312256, |
|
"learning_rate": 6.090038971851642e-07, |
|
"loss": 0.4721, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.7792293372432085, |
|
"grad_norm": 1.9644261598587036, |
|
"learning_rate": 6.003941737466273e-07, |
|
"loss": 0.4365, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.7808767565608474, |
|
"grad_norm": 1.6432219743728638, |
|
"learning_rate": 5.918374310571176e-07, |
|
"loss": 0.4291, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.7825241758784863, |
|
"grad_norm": 2.489579200744629, |
|
"learning_rate": 5.833339077675343e-07, |
|
"loss": 0.4396, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.7841715951961252, |
|
"grad_norm": 1.5569617748260498, |
|
"learning_rate": 5.748838410444665e-07, |
|
"loss": 0.4491, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.7858190145137642, |
|
"grad_norm": 2.200166702270508, |
|
"learning_rate": 5.664874665635767e-07, |
|
"loss": 0.4672, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.7874664338314031, |
|
"grad_norm": 2.1616365909576416, |
|
"learning_rate": 5.581450185030315e-07, |
|
"loss": 0.4579, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.789113853149042, |
|
"grad_norm": 1.2923545837402344, |
|
"learning_rate": 5.4985672953697e-07, |
|
"loss": 0.4424, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.7907612724666809, |
|
"grad_norm": 2.338345527648926, |
|
"learning_rate": 5.416228308290095e-07, |
|
"loss": 0.4416, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.7924086917843198, |
|
"grad_norm": 1.684395670890808, |
|
"learning_rate": 5.334435520258039e-07, |
|
"loss": 0.4136, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.7940561111019587, |
|
"grad_norm": 1.9474413394927979, |
|
"learning_rate": 5.25319121250637e-07, |
|
"loss": 0.4252, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.7957035304195977, |
|
"grad_norm": 2.8479621410369873, |
|
"learning_rate": 5.172497650970567e-07, |
|
"loss": 0.4375, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.7973509497372366, |
|
"grad_norm": 1.9628188610076904, |
|
"learning_rate": 5.092357086225627e-07, |
|
"loss": 0.4455, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.7989983690548755, |
|
"grad_norm": 1.8695141077041626, |
|
"learning_rate": 5.012771753423223e-07, |
|
"loss": 0.4819, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.8006457883725144, |
|
"grad_norm": 1.873336672782898, |
|
"learning_rate": 4.933743872229388e-07, |
|
"loss": 0.4405, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.8022932076901533, |
|
"grad_norm": 2.134643077850342, |
|
"learning_rate": 4.85527564676262e-07, |
|
"loss": 0.4381, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.8039406270077923, |
|
"grad_norm": 2.1162221431732178, |
|
"learning_rate": 4.777369265532408e-07, |
|
"loss": 0.4577, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.8055880463254312, |
|
"grad_norm": 2.036649227142334, |
|
"learning_rate": 4.7000269013781604e-07, |
|
"loss": 0.4238, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.8072354656430701, |
|
"grad_norm": 1.4969152212142944, |
|
"learning_rate": 4.6232507114086613e-07, |
|
"loss": 0.45, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.808882884960709, |
|
"grad_norm": 1.9845752716064453, |
|
"learning_rate": 4.547042836941865e-07, |
|
"loss": 0.4548, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.8105303042783479, |
|
"grad_norm": 1.967536449432373, |
|
"learning_rate": 4.4714054034451585e-07, |
|
"loss": 0.4057, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.8121777235959868, |
|
"grad_norm": 1.79136323928833, |
|
"learning_rate": 4.3963405204761416e-07, |
|
"loss": 0.4456, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.8138251429136258, |
|
"grad_norm": 2.0205838680267334, |
|
"learning_rate": 4.3218502816237433e-07, |
|
"loss": 0.398, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.8154725622312647, |
|
"grad_norm": 1.4011536836624146, |
|
"learning_rate": 4.247936764449828e-07, |
|
"loss": 0.4542, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.8171199815489036, |
|
"grad_norm": 1.8763850927352905, |
|
"learning_rate": 4.174602030431299e-07, |
|
"loss": 0.4464, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.8187674008665425, |
|
"grad_norm": 1.8748266696929932, |
|
"learning_rate": 4.1018481249025523e-07, |
|
"loss": 0.4608, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.8204148201841814, |
|
"grad_norm": 2.887885808944702, |
|
"learning_rate": 4.0296770769984393e-07, |
|
"loss": 0.468, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.8220622395018204, |
|
"grad_norm": 3.4386472702026367, |
|
"learning_rate": 3.958090899597705e-07, |
|
"loss": 0.4487, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.8237096588194593, |
|
"grad_norm": 2.4126787185668945, |
|
"learning_rate": 3.8870915892668253e-07, |
|
"loss": 0.452, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.8253570781370982, |
|
"grad_norm": 1.8389333486557007, |
|
"learning_rate": 3.816681126204297e-07, |
|
"loss": 0.4666, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.8270044974547371, |
|
"grad_norm": 2.392357349395752, |
|
"learning_rate": 3.746861474185487e-07, |
|
"loss": 0.4457, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.828651916772376, |
|
"grad_norm": 2.450810194015503, |
|
"learning_rate": 3.677634580507758e-07, |
|
"loss": 0.4777, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.8302993360900149, |
|
"grad_norm": 2.1401236057281494, |
|
"learning_rate": 3.609002375936244e-07, |
|
"loss": 0.4546, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.831946755407654, |
|
"grad_norm": 2.275261163711548, |
|
"learning_rate": 3.540966774649962e-07, |
|
"loss": 0.4286, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.8335941747252928, |
|
"grad_norm": 2.4037744998931885, |
|
"learning_rate": 3.4735296741884113e-07, |
|
"loss": 0.441, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.8352415940429317, |
|
"grad_norm": 1.7885956764221191, |
|
"learning_rate": 3.406692955398699e-07, |
|
"loss": 0.4487, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.8368890133605706, |
|
"grad_norm": 2.087801456451416, |
|
"learning_rate": 3.340458482383038e-07, |
|
"loss": 0.4414, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.8385364326782095, |
|
"grad_norm": 1.9815489053726196, |
|
"learning_rate": 3.2748281024467615e-07, |
|
"loss": 0.4408, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.8401838519958486, |
|
"grad_norm": 2.0206503868103027, |
|
"learning_rate": 3.209803646046825e-07, |
|
"loss": 0.4769, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.8418312713134875, |
|
"grad_norm": 2.112884521484375, |
|
"learning_rate": 3.14538692674074e-07, |
|
"loss": 0.4392, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.8434786906311263, |
|
"grad_norm": 1.9830784797668457, |
|
"learning_rate": 3.0815797411359705e-07, |
|
"loss": 0.4534, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.8451261099487652, |
|
"grad_norm": 2.5792412757873535, |
|
"learning_rate": 3.0183838688398834e-07, |
|
"loss": 0.4141, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.8467735292664041, |
|
"grad_norm": 1.4945428371429443, |
|
"learning_rate": 2.9558010724100556e-07, |
|
"loss": 0.4413, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.848420948584043, |
|
"grad_norm": 1.6658538579940796, |
|
"learning_rate": 2.893833097305135e-07, |
|
"loss": 0.4381, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.850068367901682, |
|
"grad_norm": 1.9433872699737549, |
|
"learning_rate": 2.832481671836174e-07, |
|
"loss": 0.4916, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.851715787219321, |
|
"grad_norm": 2.8448355197906494, |
|
"learning_rate": 2.771748507118413e-07, |
|
"loss": 0.4529, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.8533632065369599, |
|
"grad_norm": 1.6692224740982056, |
|
"learning_rate": 2.711635297023546e-07, |
|
"loss": 0.4331, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.8550106258545987, |
|
"grad_norm": 2.085247039794922, |
|
"learning_rate": 2.6521437181325105e-07, |
|
"loss": 0.4573, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.8566580451722376, |
|
"grad_norm": 1.9214270114898682, |
|
"learning_rate": 2.593275429688699e-07, |
|
"loss": 0.443, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.8583054644898767, |
|
"grad_norm": 1.856969952583313, |
|
"learning_rate": 2.535032073551677e-07, |
|
"loss": 0.4804, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.8599528838075156, |
|
"grad_norm": 2.086461067199707, |
|
"learning_rate": 2.4774152741514207e-07, |
|
"loss": 0.4505, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.8616003031251545, |
|
"grad_norm": 2.729485511779785, |
|
"learning_rate": 2.4204266384429855e-07, |
|
"loss": 0.4661, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.8632477224427934, |
|
"grad_norm": 1.9726873636245728, |
|
"learning_rate": 2.3640677558616875e-07, |
|
"loss": 0.4561, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.8648951417604323, |
|
"grad_norm": 1.9894851446151733, |
|
"learning_rate": 2.308340198278808e-07, |
|
"loss": 0.4564, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.8665425610780711, |
|
"grad_norm": 1.4880281686782837, |
|
"learning_rate": 2.2532455199577085e-07, |
|
"loss": 0.43, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.8681899803957102, |
|
"grad_norm": 1.956846833229065, |
|
"learning_rate": 2.198785257510491e-07, |
|
"loss": 0.4671, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.8698373997133491, |
|
"grad_norm": 2.6969892978668213, |
|
"learning_rate": 2.144960929855175e-07, |
|
"loss": 0.4306, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.871484819030988, |
|
"grad_norm": 2.5215413570404053, |
|
"learning_rate": 2.091774038173297e-07, |
|
"loss": 0.4458, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.8731322383486269, |
|
"grad_norm": 1.9688514471054077, |
|
"learning_rate": 2.039226065868044e-07, |
|
"loss": 0.4283, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.8747796576662658, |
|
"grad_norm": 2.583317995071411, |
|
"learning_rate": 1.9873184785229205e-07, |
|
"loss": 0.4429, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.8764270769839048, |
|
"grad_norm": 1.426698088645935, |
|
"learning_rate": 1.9360527238608206e-07, |
|
"loss": 0.4559, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.8780744963015437, |
|
"grad_norm": 1.861429214477539, |
|
"learning_rate": 1.8854302317036805e-07, |
|
"loss": 0.4513, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.8797219156191826, |
|
"grad_norm": 1.8271915912628174, |
|
"learning_rate": 1.8354524139325923e-07, |
|
"loss": 0.4387, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.8813693349368215, |
|
"grad_norm": 1.5195509195327759, |
|
"learning_rate": 1.786120664448432e-07, |
|
"loss": 0.4354, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.8830167542544604, |
|
"grad_norm": 1.372504711151123, |
|
"learning_rate": 1.7374363591329768e-07, |
|
"loss": 0.4212, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.8846641735720993, |
|
"grad_norm": 1.619235634803772, |
|
"learning_rate": 1.6894008558105274e-07, |
|
"loss": 0.427, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.8863115928897383, |
|
"grad_norm": 2.1850979328155518, |
|
"learning_rate": 1.6420154942100585e-07, |
|
"loss": 0.4412, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.8879590122073772, |
|
"grad_norm": 2.942978858947754, |
|
"learning_rate": 1.5952815959278168e-07, |
|
"loss": 0.4453, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.8896064315250161, |
|
"grad_norm": 2.521692991256714, |
|
"learning_rate": 1.5492004643904962e-07, |
|
"loss": 0.4242, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.891253850842655, |
|
"grad_norm": 2.2875068187713623, |
|
"learning_rate": 1.5037733848188658e-07, |
|
"loss": 0.4234, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.8929012701602939, |
|
"grad_norm": 2.937547445297241, |
|
"learning_rate": 1.4590016241919357e-07, |
|
"loss": 0.4557, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.8945486894779329, |
|
"grad_norm": 2.359915256500244, |
|
"learning_rate": 1.4148864312116124e-07, |
|
"loss": 0.4355, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.8961961087955718, |
|
"grad_norm": 1.8787094354629517, |
|
"learning_rate": 1.3714290362678685e-07, |
|
"loss": 0.4478, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.8978435281132107, |
|
"grad_norm": 1.8454256057739258, |
|
"learning_rate": 1.328630651404436e-07, |
|
"loss": 0.4374, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.8994909474308496, |
|
"grad_norm": 1.6232373714447021, |
|
"learning_rate": 1.286492470285e-07, |
|
"loss": 0.4501, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.9011383667484885, |
|
"grad_norm": 2.0913541316986084, |
|
"learning_rate": 1.2450156681598964e-07, |
|
"loss": 0.4564, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.9027857860661274, |
|
"grad_norm": 3.337273120880127, |
|
"learning_rate": 1.2042014018333575e-07, |
|
"loss": 0.444, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.9044332053837664, |
|
"grad_norm": 1.986515760421753, |
|
"learning_rate": 1.1640508096312259e-07, |
|
"loss": 0.409, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.9060806247014053, |
|
"grad_norm": 2.8050506114959717, |
|
"learning_rate": 1.1245650113692052e-07, |
|
"loss": 0.4345, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.9077280440190442, |
|
"grad_norm": 1.7033820152282715, |
|
"learning_rate": 1.085745108321648e-07, |
|
"loss": 0.443, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.9093754633366831, |
|
"grad_norm": 1.3102610111236572, |
|
"learning_rate": 1.0475921831908265e-07, |
|
"loss": 0.452, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.911022882654322, |
|
"grad_norm": 1.4171772003173828, |
|
"learning_rate": 1.0101073000767264e-07, |
|
"loss": 0.4472, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.912670301971961, |
|
"grad_norm": 2.2562355995178223, |
|
"learning_rate": 9.732915044474017e-08, |
|
"loss": 0.4424, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.9143177212895999, |
|
"grad_norm": 1.537164330482483, |
|
"learning_rate": 9.371458231097807e-08, |
|
"loss": 0.4339, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.9159651406072388, |
|
"grad_norm": 1.478975534439087, |
|
"learning_rate": 9.016712641810393e-08, |
|
"loss": 0.4746, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.9176125599248777, |
|
"grad_norm": 2.3379318714141846, |
|
"learning_rate": 8.668688170604955e-08, |
|
"loss": 0.4573, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.9192599792425166, |
|
"grad_norm": 2.287503242492676, |
|
"learning_rate": 8.327394524020094e-08, |
|
"loss": 0.459, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.9209073985601555, |
|
"grad_norm": 2.074932098388672, |
|
"learning_rate": 7.992841220868908e-08, |
|
"loss": 0.4406, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.9225548178777945, |
|
"grad_norm": 2.3185274600982666, |
|
"learning_rate": 7.665037591973873e-08, |
|
"loss": 0.4315, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.9242022371954334, |
|
"grad_norm": 2.681718587875366, |
|
"learning_rate": 7.343992779906328e-08, |
|
"loss": 0.4496, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.9258496565130723, |
|
"grad_norm": 2.437779188156128, |
|
"learning_rate": 7.029715738731541e-08, |
|
"loss": 0.4363, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.9274970758307112, |
|
"grad_norm": 2.111402988433838, |
|
"learning_rate": 6.722215233759071e-08, |
|
"loss": 0.446, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.9291444951483501, |
|
"grad_norm": 1.8886587619781494, |
|
"learning_rate": 6.421499841298195e-08, |
|
"loss": 0.4414, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.9307919144659891, |
|
"grad_norm": 1.649271011352539, |
|
"learning_rate": 6.127577948418728e-08, |
|
"loss": 0.4409, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.932439333783628, |
|
"grad_norm": 2.6484766006469727, |
|
"learning_rate": 5.84045775271716e-08, |
|
"loss": 0.4325, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.9340867531012669, |
|
"grad_norm": 1.9493142366409302, |
|
"learning_rate": 5.560147262088034e-08, |
|
"loss": 0.4165, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.9357341724189058, |
|
"grad_norm": 1.875835657119751, |
|
"learning_rate": 5.286654294500454e-08, |
|
"loss": 0.433, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.9373815917365447, |
|
"grad_norm": 1.9242185354232788, |
|
"learning_rate": 5.019986477780181e-08, |
|
"loss": 0.445, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.9390290110541836, |
|
"grad_norm": 2.1051392555236816, |
|
"learning_rate": 4.7601512493968824e-08, |
|
"loss": 0.4469, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.9406764303718226, |
|
"grad_norm": 1.5556972026824951, |
|
"learning_rate": 4.507155856256634e-08, |
|
"loss": 0.4746, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.9423238496894615, |
|
"grad_norm": 1.9394145011901855, |
|
"learning_rate": 4.2610073544998577e-08, |
|
"loss": 0.4347, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.9439712690071004, |
|
"grad_norm": 1.9497727155685425, |
|
"learning_rate": 4.021712609304507e-08, |
|
"loss": 0.4426, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.9456186883247393, |
|
"grad_norm": 1.587270736694336, |
|
"learning_rate": 3.789278294694498e-08, |
|
"loss": 0.4277, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.9472661076423782, |
|
"grad_norm": 1.201451301574707, |
|
"learning_rate": 3.563710893353778e-08, |
|
"loss": 0.4448, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.9489135269600172, |
|
"grad_norm": 2.1374833583831787, |
|
"learning_rate": 3.345016696445297e-08, |
|
"loss": 0.4276, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.9505609462776561, |
|
"grad_norm": 2.4307470321655273, |
|
"learning_rate": 3.133201803435737e-08, |
|
"loss": 0.4353, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.952208365595295, |
|
"grad_norm": 1.3492801189422607, |
|
"learning_rate": 2.928272121925202e-08, |
|
"loss": 0.4129, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.9538557849129339, |
|
"grad_norm": 1.4907076358795166, |
|
"learning_rate": 2.7302333674827098e-08, |
|
"loss": 0.4478, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.9555032042305728, |
|
"grad_norm": 1.893916368484497, |
|
"learning_rate": 2.539091063486432e-08, |
|
"loss": 0.4465, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.9571506235482117, |
|
"grad_norm": 2.277837038040161, |
|
"learning_rate": 2.354850540969983e-08, |
|
"loss": 0.4326, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.9587980428658507, |
|
"grad_norm": 1.9928171634674072, |
|
"learning_rate": 2.177516938473567e-08, |
|
"loss": 0.418, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.9604454621834896, |
|
"grad_norm": 2.096127986907959, |
|
"learning_rate": 2.0070952019006496e-08, |
|
"loss": 0.453, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.9620928815011285, |
|
"grad_norm": 2.574500322341919, |
|
"learning_rate": 1.8435900843800926e-08, |
|
"loss": 0.4425, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.9637403008187674, |
|
"grad_norm": 2.5897390842437744, |
|
"learning_rate": 1.6870061461335685e-08, |
|
"loss": 0.4273, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.9653877201364063, |
|
"grad_norm": 1.7342420816421509, |
|
"learning_rate": 1.5373477543482453e-08, |
|
"loss": 0.4365, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.9670351394540453, |
|
"grad_norm": 3.1810550689697266, |
|
"learning_rate": 1.3946190830552431e-08, |
|
"loss": 0.4385, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.9686825587716842, |
|
"grad_norm": 2.5934085845947266, |
|
"learning_rate": 1.2588241130129242e-08, |
|
"loss": 0.4453, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.9703299780893231, |
|
"grad_norm": 3.0193750858306885, |
|
"learning_rate": 1.1299666315961743e-08, |
|
"loss": 0.4181, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.971977397406962, |
|
"grad_norm": 2.132373809814453, |
|
"learning_rate": 1.0080502326904329e-08, |
|
"loss": 0.4217, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.9736248167246009, |
|
"grad_norm": 2.04423189163208, |
|
"learning_rate": 8.930783165917723e-09, |
|
"loss": 0.4313, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.9752722360422398, |
|
"grad_norm": 1.6803611516952515, |
|
"learning_rate": 7.85054089911863e-09, |
|
"loss": 0.4507, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.9769196553598788, |
|
"grad_norm": 2.210566520690918, |
|
"learning_rate": 6.8398056548860116e-09, |
|
"loss": 0.4446, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.9785670746775177, |
|
"grad_norm": 1.9046763181686401, |
|
"learning_rate": 5.898605623021192e-09, |
|
"loss": 0.4478, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.9802144939951566, |
|
"grad_norm": 1.7694292068481445, |
|
"learning_rate": 5.026967053960441e-09, |
|
"loss": 0.4296, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.9818619133127955, |
|
"grad_norm": 1.8257120847702026, |
|
"learning_rate": 4.224914258044721e-09, |
|
"loss": 0.4303, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.9835093326304344, |
|
"grad_norm": 1.4642283916473389, |
|
"learning_rate": 3.4924696048396765e-09, |
|
"loss": 0.4322, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.9851567519480734, |
|
"grad_norm": 2.159425973892212, |
|
"learning_rate": 2.829653522513076e-09, |
|
"loss": 0.4279, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.9868041712657123, |
|
"grad_norm": 1.9656975269317627, |
|
"learning_rate": 2.2364844972647125e-09, |
|
"loss": 0.4386, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.9884515905833512, |
|
"grad_norm": 2.556670665740967, |
|
"learning_rate": 1.7129790728101503e-09, |
|
"loss": 0.4393, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.9900990099009901, |
|
"grad_norm": 2.0833001136779785, |
|
"learning_rate": 1.2591518499208143e-09, |
|
"loss": 0.4191, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.991746429218629, |
|
"grad_norm": 2.159656524658203, |
|
"learning_rate": 8.750154860151516e-10, |
|
"loss": 0.4675, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.9933938485362679, |
|
"grad_norm": 2.0303680896759033, |
|
"learning_rate": 5.605806948061343e-10, |
|
"loss": 0.447, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.9950412678539069, |
|
"grad_norm": 1.8287807703018188, |
|
"learning_rate": 3.1585624600372066e-10, |
|
"loss": 0.4306, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.9966886871715458, |
|
"grad_norm": 2.2728703022003174, |
|
"learning_rate": 1.4084896506783018e-10, |
|
"loss": 0.4284, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.9983361064891847, |
|
"grad_norm": 2.0561728477478027, |
|
"learning_rate": 3.556373302016081e-11, |
|
"loss": 0.4195, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.9999835258068236, |
|
"grad_norm": 2.020707130432129, |
|
"learning_rate": 3.4863070763613284e-15, |
|
"loss": 0.4415, |
|
"step": 60700 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 60701, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.004310214013092e+17, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|