Qwen2-0.5Bchp-690-MultiBio / trainer_state.json
werent4's picture
Upload trainer_state.json with huggingface_hub
3ba32f2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 60701,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0016474193176389186,
"grad_norm": 35.35807800292969,
"learning_rate": 4.1186161449752885e-07,
"loss": 1.1167,
"step": 100
},
{
"epoch": 0.0032948386352778373,
"grad_norm": 2.662261724472046,
"learning_rate": 8.237232289950577e-07,
"loss": 0.8293,
"step": 200
},
{
"epoch": 0.004942257952916756,
"grad_norm": 3.5144577026367188,
"learning_rate": 1.2355848434925866e-06,
"loss": 0.8225,
"step": 300
},
{
"epoch": 0.006589677270555675,
"grad_norm": 2.4613759517669678,
"learning_rate": 1.6474464579901154e-06,
"loss": 0.767,
"step": 400
},
{
"epoch": 0.008237096588194593,
"grad_norm": 2.598069906234741,
"learning_rate": 2.0593080724876445e-06,
"loss": 0.687,
"step": 500
},
{
"epoch": 0.009884515905833512,
"grad_norm": 3.49182391166687,
"learning_rate": 2.471169686985173e-06,
"loss": 0.6412,
"step": 600
},
{
"epoch": 0.01153193522347243,
"grad_norm": 1.9722175598144531,
"learning_rate": 2.883031301482702e-06,
"loss": 0.6173,
"step": 700
},
{
"epoch": 0.01317935454111135,
"grad_norm": 2.084155321121216,
"learning_rate": 3.294892915980231e-06,
"loss": 0.622,
"step": 800
},
{
"epoch": 0.014826773858750269,
"grad_norm": 2.001030206680298,
"learning_rate": 3.70675453047776e-06,
"loss": 0.5975,
"step": 900
},
{
"epoch": 0.016474193176389186,
"grad_norm": 2.722954034805298,
"learning_rate": 4.118616144975289e-06,
"loss": 0.6171,
"step": 1000
},
{
"epoch": 0.018121612494028105,
"grad_norm": 2.851048469543457,
"learning_rate": 4.5304777594728176e-06,
"loss": 0.5398,
"step": 1100
},
{
"epoch": 0.019769031811667025,
"grad_norm": 2.0754776000976562,
"learning_rate": 4.942339373970346e-06,
"loss": 0.5444,
"step": 1200
},
{
"epoch": 0.021416451129305944,
"grad_norm": 1.9554790258407593,
"learning_rate": 4.999974215318018e-06,
"loss": 0.5688,
"step": 1300
},
{
"epoch": 0.02306387044694486,
"grad_norm": 2.532405376434326,
"learning_rate": 4.999879388694095e-06,
"loss": 0.5549,
"step": 1400
},
{
"epoch": 0.02471128976458378,
"grad_norm": 2.0328919887542725,
"learning_rate": 4.999714839456846e-06,
"loss": 0.5484,
"step": 1500
},
{
"epoch": 0.0263587090822227,
"grad_norm": 1.7955541610717773,
"learning_rate": 4.999480572195616e-06,
"loss": 0.5765,
"step": 1600
},
{
"epoch": 0.028006128399861618,
"grad_norm": 1.7495211362838745,
"learning_rate": 4.999176593444209e-06,
"loss": 0.5829,
"step": 1700
},
{
"epoch": 0.029653547717500537,
"grad_norm": 2.1942079067230225,
"learning_rate": 4.9988029116807125e-06,
"loss": 0.5331,
"step": 1800
},
{
"epoch": 0.03130096703513945,
"grad_norm": 2.9001498222351074,
"learning_rate": 4.998359537327255e-06,
"loss": 0.5108,
"step": 1900
},
{
"epoch": 0.03294838635277837,
"grad_norm": 2.320958375930786,
"learning_rate": 4.997846482749723e-06,
"loss": 0.5484,
"step": 2000
},
{
"epoch": 0.03459580567041729,
"grad_norm": 2.4439444541931152,
"learning_rate": 4.9972637622574074e-06,
"loss": 0.5448,
"step": 2100
},
{
"epoch": 0.03624322498805621,
"grad_norm": 2.403137445449829,
"learning_rate": 4.996611392102611e-06,
"loss": 0.519,
"step": 2200
},
{
"epoch": 0.03789064430569513,
"grad_norm": 1.4548203945159912,
"learning_rate": 4.995889390480193e-06,
"loss": 0.4869,
"step": 2300
},
{
"epoch": 0.03953806362333405,
"grad_norm": 2.335745334625244,
"learning_rate": 4.99509777752706e-06,
"loss": 0.5545,
"step": 2400
},
{
"epoch": 0.04118548294097297,
"grad_norm": 2.894595146179199,
"learning_rate": 4.994236575321607e-06,
"loss": 0.5364,
"step": 2500
},
{
"epoch": 0.04283290225861189,
"grad_norm": 3.079472064971924,
"learning_rate": 4.993305807883101e-06,
"loss": 0.5514,
"step": 2600
},
{
"epoch": 0.0444803215762508,
"grad_norm": 2.3833718299865723,
"learning_rate": 4.9923055011710075e-06,
"loss": 0.492,
"step": 2700
},
{
"epoch": 0.04612774089388972,
"grad_norm": 2.7838637828826904,
"learning_rate": 4.991235683084274e-06,
"loss": 0.5156,
"step": 2800
},
{
"epoch": 0.04777516021152864,
"grad_norm": 1.7487517595291138,
"learning_rate": 4.9900963834605445e-06,
"loss": 0.514,
"step": 2900
},
{
"epoch": 0.04942257952916756,
"grad_norm": 1.7354815006256104,
"learning_rate": 4.98888763407533e-06,
"loss": 0.5202,
"step": 3000
},
{
"epoch": 0.05106999884680648,
"grad_norm": 4.250129222869873,
"learning_rate": 4.987609468641125e-06,
"loss": 0.5069,
"step": 3100
},
{
"epoch": 0.0527174181644454,
"grad_norm": 2.1309328079223633,
"learning_rate": 4.986261922806461e-06,
"loss": 0.5372,
"step": 3200
},
{
"epoch": 0.054364837482084316,
"grad_norm": 2.0532209873199463,
"learning_rate": 4.9848450341549196e-06,
"loss": 0.5118,
"step": 3300
},
{
"epoch": 0.056012256799723235,
"grad_norm": 2.774035692214966,
"learning_rate": 4.983358842204078e-06,
"loss": 0.5082,
"step": 3400
},
{
"epoch": 0.057659676117362155,
"grad_norm": 4.331142425537109,
"learning_rate": 4.981803388404411e-06,
"loss": 0.5328,
"step": 3500
},
{
"epoch": 0.059307095435001074,
"grad_norm": 2.5397560596466064,
"learning_rate": 4.980178716138135e-06,
"loss": 0.5173,
"step": 3600
},
{
"epoch": 0.060954514752639986,
"grad_norm": 2.2354204654693604,
"learning_rate": 4.978484870717991e-06,
"loss": 0.4946,
"step": 3700
},
{
"epoch": 0.0626019340702789,
"grad_norm": 1.8501393795013428,
"learning_rate": 4.976721899385992e-06,
"loss": 0.5341,
"step": 3800
},
{
"epoch": 0.06424935338791783,
"grad_norm": 1.828378677368164,
"learning_rate": 4.974889851312098e-06,
"loss": 0.5097,
"step": 3900
},
{
"epoch": 0.06589677270555674,
"grad_norm": 2.1924521923065186,
"learning_rate": 4.972988777592845e-06,
"loss": 0.505,
"step": 4000
},
{
"epoch": 0.06754419202319567,
"grad_norm": 1.9084734916687012,
"learning_rate": 4.971018731249923e-06,
"loss": 0.5043,
"step": 4100
},
{
"epoch": 0.06919161134083458,
"grad_norm": 2.8705804347991943,
"learning_rate": 4.968979767228693e-06,
"loss": 0.5118,
"step": 4200
},
{
"epoch": 0.0708390306584735,
"grad_norm": 2.0432722568511963,
"learning_rate": 4.96687194239666e-06,
"loss": 0.5295,
"step": 4300
},
{
"epoch": 0.07248644997611242,
"grad_norm": 2.022822380065918,
"learning_rate": 4.964695315541883e-06,
"loss": 0.5649,
"step": 4400
},
{
"epoch": 0.07413386929375133,
"grad_norm": 2.284590721130371,
"learning_rate": 4.962449947371334e-06,
"loss": 0.4841,
"step": 4500
},
{
"epoch": 0.07578128861139026,
"grad_norm": 3.217561721801758,
"learning_rate": 4.9601359005092095e-06,
"loss": 0.5401,
"step": 4600
},
{
"epoch": 0.07742870792902917,
"grad_norm": 1.9388020038604736,
"learning_rate": 4.957753239495181e-06,
"loss": 0.5251,
"step": 4700
},
{
"epoch": 0.0790761272466681,
"grad_norm": 1.3349353075027466,
"learning_rate": 4.955302030782596e-06,
"loss": 0.4962,
"step": 4800
},
{
"epoch": 0.08072354656430701,
"grad_norm": 2.4485511779785156,
"learning_rate": 4.952782342736625e-06,
"loss": 0.4939,
"step": 4900
},
{
"epoch": 0.08237096588194594,
"grad_norm": 3.657675266265869,
"learning_rate": 4.950194245632349e-06,
"loss": 0.5123,
"step": 5000
},
{
"epoch": 0.08401838519958485,
"grad_norm": 2.871431589126587,
"learning_rate": 4.9475378116528105e-06,
"loss": 0.5063,
"step": 5100
},
{
"epoch": 0.08566580451722378,
"grad_norm": 1.394823431968689,
"learning_rate": 4.944813114886991e-06,
"loss": 0.4939,
"step": 5200
},
{
"epoch": 0.08731322383486269,
"grad_norm": 1.6979378461837769,
"learning_rate": 4.942020231327749e-06,
"loss": 0.5102,
"step": 5300
},
{
"epoch": 0.0889606431525016,
"grad_norm": 1.941582202911377,
"learning_rate": 4.939159238869698e-06,
"loss": 0.5347,
"step": 5400
},
{
"epoch": 0.09060806247014053,
"grad_norm": 1.9074257612228394,
"learning_rate": 4.936230217307035e-06,
"loss": 0.4935,
"step": 5500
},
{
"epoch": 0.09225548178777944,
"grad_norm": 2.327624797821045,
"learning_rate": 4.933233248331317e-06,
"loss": 0.5218,
"step": 5600
},
{
"epoch": 0.09390290110541837,
"grad_norm": 2.605468988418579,
"learning_rate": 4.930168415529181e-06,
"loss": 0.4831,
"step": 5700
},
{
"epoch": 0.09555032042305728,
"grad_norm": 2.137749671936035,
"learning_rate": 4.927035804380012e-06,
"loss": 0.4983,
"step": 5800
},
{
"epoch": 0.0971977397406962,
"grad_norm": 1.9908422231674194,
"learning_rate": 4.923835502253558e-06,
"loss": 0.4991,
"step": 5900
},
{
"epoch": 0.09884515905833512,
"grad_norm": 1.8356066942214966,
"learning_rate": 4.920567598407498e-06,
"loss": 0.4907,
"step": 6000
},
{
"epoch": 0.10049257837597404,
"grad_norm": 2.3301796913146973,
"learning_rate": 4.917232183984946e-06,
"loss": 0.4833,
"step": 6100
},
{
"epoch": 0.10213999769361295,
"grad_norm": 2.835822582244873,
"learning_rate": 4.913829352011914e-06,
"loss": 0.554,
"step": 6200
},
{
"epoch": 0.10378741701125187,
"grad_norm": 1.825016736984253,
"learning_rate": 4.910359197394717e-06,
"loss": 0.5082,
"step": 6300
},
{
"epoch": 0.1054348363288908,
"grad_norm": 3.021340847015381,
"learning_rate": 4.9068218169173245e-06,
"loss": 0.4945,
"step": 6400
},
{
"epoch": 0.1070822556465297,
"grad_norm": 3.6816606521606445,
"learning_rate": 4.903217309238658e-06,
"loss": 0.54,
"step": 6500
},
{
"epoch": 0.10872967496416863,
"grad_norm": 2.1384148597717285,
"learning_rate": 4.899545774889848e-06,
"loss": 0.497,
"step": 6600
},
{
"epoch": 0.11037709428180754,
"grad_norm": 2.311786651611328,
"learning_rate": 4.895807316271421e-06,
"loss": 0.4927,
"step": 6700
},
{
"epoch": 0.11202451359944647,
"grad_norm": 1.765767216682434,
"learning_rate": 4.892002037650451e-06,
"loss": 0.4984,
"step": 6800
},
{
"epoch": 0.11367193291708538,
"grad_norm": 1.8108317852020264,
"learning_rate": 4.888130045157645e-06,
"loss": 0.4957,
"step": 6900
},
{
"epoch": 0.11531935223472431,
"grad_norm": 2.6695711612701416,
"learning_rate": 4.884191446784387e-06,
"loss": 0.4992,
"step": 7000
},
{
"epoch": 0.11696677155236322,
"grad_norm": 2.477202892303467,
"learning_rate": 4.880186352379726e-06,
"loss": 0.4947,
"step": 7100
},
{
"epoch": 0.11861419087000215,
"grad_norm": 3.69132399559021,
"learning_rate": 4.876114873647308e-06,
"loss": 0.5092,
"step": 7200
},
{
"epoch": 0.12026161018764106,
"grad_norm": 2.353121042251587,
"learning_rate": 4.871977124142271e-06,
"loss": 0.4752,
"step": 7300
},
{
"epoch": 0.12190902950527997,
"grad_norm": 2.3746302127838135,
"learning_rate": 4.867773219268062e-06,
"loss": 0.5186,
"step": 7400
},
{
"epoch": 0.1235564488229189,
"grad_norm": 2.437284469604492,
"learning_rate": 4.863503276273232e-06,
"loss": 0.4882,
"step": 7500
},
{
"epoch": 0.1252038681405578,
"grad_norm": 2.287785291671753,
"learning_rate": 4.859167414248163e-06,
"loss": 0.4755,
"step": 7600
},
{
"epoch": 0.12685128745819674,
"grad_norm": 4.1828413009643555,
"learning_rate": 4.854765754121738e-06,
"loss": 0.5062,
"step": 7700
},
{
"epoch": 0.12849870677583566,
"grad_norm": 2.3262546062469482,
"learning_rate": 4.85029841865798e-06,
"loss": 0.4756,
"step": 7800
},
{
"epoch": 0.13014612609347456,
"grad_norm": 1.2054634094238281,
"learning_rate": 4.8457655324526215e-06,
"loss": 0.4827,
"step": 7900
},
{
"epoch": 0.1317935454111135,
"grad_norm": 2.3276774883270264,
"learning_rate": 4.8411672219296304e-06,
"loss": 0.4833,
"step": 8000
},
{
"epoch": 0.13344096472875241,
"grad_norm": 1.9837372303009033,
"learning_rate": 4.836503615337684e-06,
"loss": 0.4681,
"step": 8100
},
{
"epoch": 0.13508838404639134,
"grad_norm": 1.6989622116088867,
"learning_rate": 4.831774842746595e-06,
"loss": 0.5375,
"step": 8200
},
{
"epoch": 0.13673580336403024,
"grad_norm": 2.29801869392395,
"learning_rate": 4.826981036043677e-06,
"loss": 0.5102,
"step": 8300
},
{
"epoch": 0.13838322268166917,
"grad_norm": 8.920065879821777,
"learning_rate": 4.822122328930076e-06,
"loss": 0.5145,
"step": 8400
},
{
"epoch": 0.1400306419993081,
"grad_norm": 2.425342321395874,
"learning_rate": 4.817198856917029e-06,
"loss": 0.4888,
"step": 8500
},
{
"epoch": 0.141678061316947,
"grad_norm": 2.2098586559295654,
"learning_rate": 4.812210757322096e-06,
"loss": 0.5088,
"step": 8600
},
{
"epoch": 0.14332548063458592,
"grad_norm": 2.6320948600769043,
"learning_rate": 4.807158169265326e-06,
"loss": 0.4868,
"step": 8700
},
{
"epoch": 0.14497289995222484,
"grad_norm": 2.660802125930786,
"learning_rate": 4.802041233665373e-06,
"loss": 0.4742,
"step": 8800
},
{
"epoch": 0.14662031926986377,
"grad_norm": 2.3442442417144775,
"learning_rate": 4.796860093235572e-06,
"loss": 0.4789,
"step": 8900
},
{
"epoch": 0.14826773858750267,
"grad_norm": 2.416050434112549,
"learning_rate": 4.791614892479956e-06,
"loss": 0.5149,
"step": 9000
},
{
"epoch": 0.1499151579051416,
"grad_norm": 2.576631784439087,
"learning_rate": 4.786305777689222e-06,
"loss": 0.5096,
"step": 9100
},
{
"epoch": 0.15156257722278052,
"grad_norm": 1.699407935142517,
"learning_rate": 4.7809328969366585e-06,
"loss": 0.5006,
"step": 9200
},
{
"epoch": 0.15320999654041945,
"grad_norm": 2.303194046020508,
"learning_rate": 4.7754964000740086e-06,
"loss": 0.5113,
"step": 9300
},
{
"epoch": 0.15485741585805834,
"grad_norm": 2.021639347076416,
"learning_rate": 4.7699964387272964e-06,
"loss": 0.4823,
"step": 9400
},
{
"epoch": 0.15650483517569727,
"grad_norm": 1.7534514665603638,
"learning_rate": 4.764433166292593e-06,
"loss": 0.4912,
"step": 9500
},
{
"epoch": 0.1581522544933362,
"grad_norm": 2.9182558059692383,
"learning_rate": 4.758806737931741e-06,
"loss": 0.4957,
"step": 9600
},
{
"epoch": 0.1597996738109751,
"grad_norm": 2.112656831741333,
"learning_rate": 4.753117310568026e-06,
"loss": 0.4733,
"step": 9700
},
{
"epoch": 0.16144709312861402,
"grad_norm": 2.052156686782837,
"learning_rate": 4.7473650428818025e-06,
"loss": 0.4794,
"step": 9800
},
{
"epoch": 0.16309451244625295,
"grad_norm": 2.4516518115997314,
"learning_rate": 4.741550095306065e-06,
"loss": 0.4807,
"step": 9900
},
{
"epoch": 0.16474193176389187,
"grad_norm": 1.8814926147460938,
"learning_rate": 4.7356726300219715e-06,
"loss": 0.4392,
"step": 10000
},
{
"epoch": 0.16638935108153077,
"grad_norm": 1.6867588758468628,
"learning_rate": 4.729732810954329e-06,
"loss": 0.489,
"step": 10100
},
{
"epoch": 0.1680367703991697,
"grad_norm": 1.996559739112854,
"learning_rate": 4.723730803767014e-06,
"loss": 0.45,
"step": 10200
},
{
"epoch": 0.16968418971680863,
"grad_norm": 2.4676289558410645,
"learning_rate": 4.71766677585835e-06,
"loss": 0.49,
"step": 10300
},
{
"epoch": 0.17133160903444755,
"grad_norm": 2.4000778198242188,
"learning_rate": 4.711540896356447e-06,
"loss": 0.5133,
"step": 10400
},
{
"epoch": 0.17297902835208645,
"grad_norm": 1.6576099395751953,
"learning_rate": 4.70535333611448e-06,
"loss": 0.4682,
"step": 10500
},
{
"epoch": 0.17462644766972538,
"grad_norm": 2.6019415855407715,
"learning_rate": 4.699104267705921e-06,
"loss": 0.5221,
"step": 10600
},
{
"epoch": 0.1762738669873643,
"grad_norm": 2.8221852779388428,
"learning_rate": 4.692793865419731e-06,
"loss": 0.5142,
"step": 10700
},
{
"epoch": 0.1779212863050032,
"grad_norm": 1.781231164932251,
"learning_rate": 4.686422305255498e-06,
"loss": 0.4908,
"step": 10800
},
{
"epoch": 0.17956870562264213,
"grad_norm": 2.3753836154937744,
"learning_rate": 4.679989764918524e-06,
"loss": 0.4894,
"step": 10900
},
{
"epoch": 0.18121612494028105,
"grad_norm": 1.7550493478775024,
"learning_rate": 4.673496423814874e-06,
"loss": 0.4707,
"step": 11000
},
{
"epoch": 0.18286354425791998,
"grad_norm": 1.6989047527313232,
"learning_rate": 4.666942463046369e-06,
"loss": 0.5209,
"step": 11100
},
{
"epoch": 0.18451096357555888,
"grad_norm": 2.0338029861450195,
"learning_rate": 4.660328065405537e-06,
"loss": 0.5168,
"step": 11200
},
{
"epoch": 0.1861583828931978,
"grad_norm": 2.14629864692688,
"learning_rate": 4.6536534153705135e-06,
"loss": 0.4802,
"step": 11300
},
{
"epoch": 0.18780580221083673,
"grad_norm": 1.9664320945739746,
"learning_rate": 4.646918699099898e-06,
"loss": 0.505,
"step": 11400
},
{
"epoch": 0.18945322152847563,
"grad_norm": 2.435833692550659,
"learning_rate": 4.640124104427558e-06,
"loss": 0.5205,
"step": 11500
},
{
"epoch": 0.19110064084611456,
"grad_norm": 1.8850288391113281,
"learning_rate": 4.633269820857397e-06,
"loss": 0.4964,
"step": 11600
},
{
"epoch": 0.19274806016375348,
"grad_norm": 1.9810831546783447,
"learning_rate": 4.626356039558061e-06,
"loss": 0.5006,
"step": 11700
},
{
"epoch": 0.1943954794813924,
"grad_norm": 2.52791166305542,
"learning_rate": 4.619382953357615e-06,
"loss": 0.4809,
"step": 11800
},
{
"epoch": 0.1960428987990313,
"grad_norm": 2.0693445205688477,
"learning_rate": 4.612350756738157e-06,
"loss": 0.4591,
"step": 11900
},
{
"epoch": 0.19769031811667023,
"grad_norm": 2.312404155731201,
"learning_rate": 4.6052596458303996e-06,
"loss": 0.4695,
"step": 12000
},
{
"epoch": 0.19933773743430916,
"grad_norm": 2.2149617671966553,
"learning_rate": 4.5981098184081995e-06,
"loss": 0.4743,
"step": 12100
},
{
"epoch": 0.20098515675194809,
"grad_norm": 2.597283124923706,
"learning_rate": 4.590901473883037e-06,
"loss": 0.4893,
"step": 12200
},
{
"epoch": 0.20263257606958698,
"grad_norm": 1.9223053455352783,
"learning_rate": 4.5836348132984584e-06,
"loss": 0.4706,
"step": 12300
},
{
"epoch": 0.2042799953872259,
"grad_norm": 1.0610065460205078,
"learning_rate": 4.57631003932447e-06,
"loss": 0.4566,
"step": 12400
},
{
"epoch": 0.20592741470486484,
"grad_norm": 2.5029940605163574,
"learning_rate": 4.568927356251878e-06,
"loss": 0.451,
"step": 12500
},
{
"epoch": 0.20757483402250373,
"grad_norm": 1.3197004795074463,
"learning_rate": 4.5614869699866e-06,
"loss": 0.4583,
"step": 12600
},
{
"epoch": 0.20922225334014266,
"grad_norm": 1.5407695770263672,
"learning_rate": 4.553989088043919e-06,
"loss": 0.4673,
"step": 12700
},
{
"epoch": 0.2108696726577816,
"grad_norm": 1.6594492197036743,
"learning_rate": 4.546433919542691e-06,
"loss": 0.5023,
"step": 12800
},
{
"epoch": 0.2125170919754205,
"grad_norm": 1.9056370258331299,
"learning_rate": 4.538821675199521e-06,
"loss": 0.5202,
"step": 12900
},
{
"epoch": 0.2141645112930594,
"grad_norm": 3.2313265800476074,
"learning_rate": 4.531152567322877e-06,
"loss": 0.4649,
"step": 13000
},
{
"epoch": 0.21581193061069834,
"grad_norm": 2.2487971782684326,
"learning_rate": 4.5234268098071766e-06,
"loss": 0.4611,
"step": 13100
},
{
"epoch": 0.21745934992833726,
"grad_norm": 2.0419654846191406,
"learning_rate": 4.515644618126816e-06,
"loss": 0.4851,
"step": 13200
},
{
"epoch": 0.2191067692459762,
"grad_norm": 1.4483575820922852,
"learning_rate": 4.507806209330165e-06,
"loss": 0.4789,
"step": 13300
},
{
"epoch": 0.2207541885636151,
"grad_norm": 2.3362390995025635,
"learning_rate": 4.499911802033508e-06,
"loss": 0.4846,
"step": 13400
},
{
"epoch": 0.22240160788125402,
"grad_norm": 2.0402286052703857,
"learning_rate": 4.491961616414948e-06,
"loss": 0.5099,
"step": 13500
},
{
"epoch": 0.22404902719889294,
"grad_norm": 2.0675928592681885,
"learning_rate": 4.483955874208273e-06,
"loss": 0.4878,
"step": 13600
},
{
"epoch": 0.22569644651653184,
"grad_norm": 1.6327743530273438,
"learning_rate": 4.4758947986967614e-06,
"loss": 0.4765,
"step": 13700
},
{
"epoch": 0.22734386583417077,
"grad_norm": 2.0917341709136963,
"learning_rate": 4.4677786147069595e-06,
"loss": 0.4525,
"step": 13800
},
{
"epoch": 0.2289912851518097,
"grad_norm": 1.5012590885162354,
"learning_rate": 4.459607548602412e-06,
"loss": 0.4699,
"step": 13900
},
{
"epoch": 0.23063870446944862,
"grad_norm": 2.0980496406555176,
"learning_rate": 4.451381828277346e-06,
"loss": 0.5045,
"step": 14000
},
{
"epoch": 0.23228612378708752,
"grad_norm": 1.8820241689682007,
"learning_rate": 4.443101683150316e-06,
"loss": 0.4918,
"step": 14100
},
{
"epoch": 0.23393354310472644,
"grad_norm": 2.0610568523406982,
"learning_rate": 4.434767344157808e-06,
"loss": 0.4917,
"step": 14200
},
{
"epoch": 0.23558096242236537,
"grad_norm": 2.2509660720825195,
"learning_rate": 4.426379043747793e-06,
"loss": 0.4933,
"step": 14300
},
{
"epoch": 0.2372283817400043,
"grad_norm": 3.667386531829834,
"learning_rate": 4.417937015873249e-06,
"loss": 0.4784,
"step": 14400
},
{
"epoch": 0.2388758010576432,
"grad_norm": 2.4788925647735596,
"learning_rate": 4.409441495985632e-06,
"loss": 0.4901,
"step": 14500
},
{
"epoch": 0.24052322037528212,
"grad_norm": 1.6511657238006592,
"learning_rate": 4.4008927210283144e-06,
"loss": 0.4777,
"step": 14600
},
{
"epoch": 0.24217063969292105,
"grad_norm": 1.7784366607666016,
"learning_rate": 4.392290929429971e-06,
"loss": 0.4863,
"step": 14700
},
{
"epoch": 0.24381805901055995,
"grad_norm": 2.4235856533050537,
"learning_rate": 4.383636361097931e-06,
"loss": 0.4578,
"step": 14800
},
{
"epoch": 0.24546547832819887,
"grad_norm": 1.6377619504928589,
"learning_rate": 4.3749292574114886e-06,
"loss": 0.4846,
"step": 14900
},
{
"epoch": 0.2471128976458378,
"grad_norm": 1.5944766998291016,
"learning_rate": 4.366169861215168e-06,
"loss": 0.4744,
"step": 15000
},
{
"epoch": 0.24876031696347672,
"grad_norm": 2.405319929122925,
"learning_rate": 4.357358416811955e-06,
"loss": 0.4685,
"step": 15100
},
{
"epoch": 0.2504077362811156,
"grad_norm": 2.4015884399414062,
"learning_rate": 4.348495169956477e-06,
"loss": 0.4783,
"step": 15200
},
{
"epoch": 0.2520551555987546,
"grad_norm": 2.325193166732788,
"learning_rate": 4.339580367848153e-06,
"loss": 0.4579,
"step": 15300
},
{
"epoch": 0.2537025749163935,
"grad_norm": 1.8238539695739746,
"learning_rate": 4.3306142591243e-06,
"loss": 0.4697,
"step": 15400
},
{
"epoch": 0.2553499942340324,
"grad_norm": 1.4284635782241821,
"learning_rate": 4.321597093853194e-06,
"loss": 0.452,
"step": 15500
},
{
"epoch": 0.2569974135516713,
"grad_norm": 1.5146524906158447,
"learning_rate": 4.3125291235271e-06,
"loss": 0.4858,
"step": 15600
},
{
"epoch": 0.2586448328693102,
"grad_norm": 2.1129367351531982,
"learning_rate": 4.303410601055253e-06,
"loss": 0.4986,
"step": 15700
},
{
"epoch": 0.2602922521869491,
"grad_norm": 2.0981929302215576,
"learning_rate": 4.29424178075681e-06,
"loss": 0.4505,
"step": 15800
},
{
"epoch": 0.2619396715045881,
"grad_norm": 1.3321784734725952,
"learning_rate": 4.285022918353755e-06,
"loss": 0.4983,
"step": 15900
},
{
"epoch": 0.263587090822227,
"grad_norm": 2.7090718746185303,
"learning_rate": 4.275754270963763e-06,
"loss": 0.482,
"step": 16000
},
{
"epoch": 0.2652345101398659,
"grad_norm": 1.5834273099899292,
"learning_rate": 4.26643609709303e-06,
"loss": 0.5029,
"step": 16100
},
{
"epoch": 0.26688192945750483,
"grad_norm": 2.400024175643921,
"learning_rate": 4.257068656629071e-06,
"loss": 0.4579,
"step": 16200
},
{
"epoch": 0.26852934877514373,
"grad_norm": 1.9160480499267578,
"learning_rate": 4.24765221083346e-06,
"loss": 0.4892,
"step": 16300
},
{
"epoch": 0.2701767680927827,
"grad_norm": 2.4766881465911865,
"learning_rate": 4.238187022334553e-06,
"loss": 0.4633,
"step": 16400
},
{
"epoch": 0.2718241874104216,
"grad_norm": 2.2665488719940186,
"learning_rate": 4.228673355120156e-06,
"loss": 0.4682,
"step": 16500
},
{
"epoch": 0.2734716067280605,
"grad_norm": 2.582789897918701,
"learning_rate": 4.2191114745301654e-06,
"loss": 0.4761,
"step": 16600
},
{
"epoch": 0.27511902604569943,
"grad_norm": 2.240748882293701,
"learning_rate": 4.20950164724917e-06,
"loss": 0.4613,
"step": 16700
},
{
"epoch": 0.27676644536333833,
"grad_norm": 2.4156808853149414,
"learning_rate": 4.1998441412990085e-06,
"loss": 0.4907,
"step": 16800
},
{
"epoch": 0.27841386468097723,
"grad_norm": 2.348371744155884,
"learning_rate": 4.190139226031297e-06,
"loss": 0.4675,
"step": 16900
},
{
"epoch": 0.2800612839986162,
"grad_norm": 1.7973005771636963,
"learning_rate": 4.180387172119916e-06,
"loss": 0.4738,
"step": 17000
},
{
"epoch": 0.2817087033162551,
"grad_norm": 2.322040557861328,
"learning_rate": 4.17058825155346e-06,
"loss": 0.4644,
"step": 17100
},
{
"epoch": 0.283356122633894,
"grad_norm": 2.3491313457489014,
"learning_rate": 4.160742737627656e-06,
"loss": 0.5077,
"step": 17200
},
{
"epoch": 0.28500354195153293,
"grad_norm": 1.630631446838379,
"learning_rate": 4.150850904937733e-06,
"loss": 0.4797,
"step": 17300
},
{
"epoch": 0.28665096126917183,
"grad_norm": 2.0471599102020264,
"learning_rate": 4.140913029370774e-06,
"loss": 0.461,
"step": 17400
},
{
"epoch": 0.2882983805868108,
"grad_norm": 2.4391767978668213,
"learning_rate": 4.130929388098011e-06,
"loss": 0.4962,
"step": 17500
},
{
"epoch": 0.2899457999044497,
"grad_norm": 2.0148985385894775,
"learning_rate": 4.120900259567103e-06,
"loss": 0.4634,
"step": 17600
},
{
"epoch": 0.2915932192220886,
"grad_norm": 2.3383798599243164,
"learning_rate": 4.110825923494365e-06,
"loss": 0.4553,
"step": 17700
},
{
"epoch": 0.29324063853972754,
"grad_norm": 1.539428949356079,
"learning_rate": 4.100706660856968e-06,
"loss": 0.4864,
"step": 17800
},
{
"epoch": 0.29488805785736644,
"grad_norm": 1.8251954317092896,
"learning_rate": 4.090542753885101e-06,
"loss": 0.487,
"step": 17900
},
{
"epoch": 0.29653547717500534,
"grad_norm": 2.269007921218872,
"learning_rate": 4.080334486054104e-06,
"loss": 0.4423,
"step": 18000
},
{
"epoch": 0.2981828964926443,
"grad_norm": 2.4436540603637695,
"learning_rate": 4.0700821420765566e-06,
"loss": 0.4916,
"step": 18100
},
{
"epoch": 0.2998303158102832,
"grad_norm": 2.570488929748535,
"learning_rate": 4.05978600789434e-06,
"loss": 0.4536,
"step": 18200
},
{
"epoch": 0.3014777351279221,
"grad_norm": 2.247633934020996,
"learning_rate": 4.049446370670661e-06,
"loss": 0.4891,
"step": 18300
},
{
"epoch": 0.30312515444556104,
"grad_norm": 1.7023581266403198,
"learning_rate": 4.0390635187820435e-06,
"loss": 0.4594,
"step": 18400
},
{
"epoch": 0.30477257376319994,
"grad_norm": 2.0368921756744385,
"learning_rate": 4.028637741810285e-06,
"loss": 0.4191,
"step": 18500
},
{
"epoch": 0.3064199930808389,
"grad_norm": 2.0896544456481934,
"learning_rate": 4.018169330534381e-06,
"loss": 0.4691,
"step": 18600
},
{
"epoch": 0.3080674123984778,
"grad_norm": 2.5784189701080322,
"learning_rate": 4.007658576922413e-06,
"loss": 0.4442,
"step": 18700
},
{
"epoch": 0.3097148317161167,
"grad_norm": 2.169424057006836,
"learning_rate": 3.997105774123409e-06,
"loss": 0.4552,
"step": 18800
},
{
"epoch": 0.31136225103375564,
"grad_norm": 2.076741933822632,
"learning_rate": 3.986511216459163e-06,
"loss": 0.462,
"step": 18900
},
{
"epoch": 0.31300967035139454,
"grad_norm": 2.33245849609375,
"learning_rate": 3.97587519941603e-06,
"loss": 0.5015,
"step": 19000
},
{
"epoch": 0.31465708966903344,
"grad_norm": 2.465367555618286,
"learning_rate": 3.965198019636684e-06,
"loss": 0.4726,
"step": 19100
},
{
"epoch": 0.3163045089866724,
"grad_norm": 2.0327184200286865,
"learning_rate": 3.95447997491184e-06,
"loss": 0.4602,
"step": 19200
},
{
"epoch": 0.3179519283043113,
"grad_norm": 2.6782443523406982,
"learning_rate": 3.943721364171957e-06,
"loss": 0.4676,
"step": 19300
},
{
"epoch": 0.3195993476219502,
"grad_norm": 2.373873233795166,
"learning_rate": 3.932922487478894e-06,
"loss": 0.4466,
"step": 19400
},
{
"epoch": 0.32124676693958915,
"grad_norm": 2.5210931301116943,
"learning_rate": 3.9220836460175415e-06,
"loss": 0.4543,
"step": 19500
},
{
"epoch": 0.32289418625722804,
"grad_norm": 2.384608268737793,
"learning_rate": 3.911205142087425e-06,
"loss": 0.4758,
"step": 19600
},
{
"epoch": 0.324541605574867,
"grad_norm": 2.8322508335113525,
"learning_rate": 3.900287279094274e-06,
"loss": 0.4597,
"step": 19700
},
{
"epoch": 0.3261890248925059,
"grad_norm": 3.4156792163848877,
"learning_rate": 3.889330361541552e-06,
"loss": 0.4552,
"step": 19800
},
{
"epoch": 0.3278364442101448,
"grad_norm": 1.7643976211547852,
"learning_rate": 3.878334695021973e-06,
"loss": 0.4589,
"step": 19900
},
{
"epoch": 0.32948386352778375,
"grad_norm": 1.7313556671142578,
"learning_rate": 3.867300586208975e-06,
"loss": 0.4444,
"step": 20000
},
{
"epoch": 0.33113128284542265,
"grad_norm": 1.821792721748352,
"learning_rate": 3.856228342848167e-06,
"loss": 0.4945,
"step": 20100
},
{
"epoch": 0.33277870216306155,
"grad_norm": 2.735888719558716,
"learning_rate": 3.845118273748743e-06,
"loss": 0.4431,
"step": 20200
},
{
"epoch": 0.3344261214807005,
"grad_norm": 2.3234407901763916,
"learning_rate": 3.833970688774872e-06,
"loss": 0.4838,
"step": 20300
},
{
"epoch": 0.3360735407983394,
"grad_norm": 1.709910273551941,
"learning_rate": 3.822785898837058e-06,
"loss": 0.4754,
"step": 20400
},
{
"epoch": 0.3377209601159783,
"grad_norm": 2.435945987701416,
"learning_rate": 3.811564215883463e-06,
"loss": 0.4737,
"step": 20500
},
{
"epoch": 0.33936837943361725,
"grad_norm": 1.9514074325561523,
"learning_rate": 3.8003059528912123e-06,
"loss": 0.4861,
"step": 20600
},
{
"epoch": 0.34101579875125615,
"grad_norm": 2.4523439407348633,
"learning_rate": 3.7890114238576616e-06,
"loss": 0.4814,
"step": 20700
},
{
"epoch": 0.3426632180688951,
"grad_norm": 2.690749406814575,
"learning_rate": 3.777680943791639e-06,
"loss": 0.4837,
"step": 20800
},
{
"epoch": 0.344310637386534,
"grad_norm": 1.8186627626419067,
"learning_rate": 3.7663148287046635e-06,
"loss": 0.4384,
"step": 20900
},
{
"epoch": 0.3459580567041729,
"grad_norm": 2.5133306980133057,
"learning_rate": 3.754913395602129e-06,
"loss": 0.4612,
"step": 21000
},
{
"epoch": 0.34760547602181185,
"grad_norm": 1.9760069847106934,
"learning_rate": 3.7434769624744586e-06,
"loss": 0.4619,
"step": 21100
},
{
"epoch": 0.34925289533945075,
"grad_norm": 2.461090326309204,
"learning_rate": 3.732005848288245e-06,
"loss": 0.4762,
"step": 21200
},
{
"epoch": 0.35090031465708965,
"grad_norm": 1.82012939453125,
"learning_rate": 3.7205003729773454e-06,
"loss": 0.4309,
"step": 21300
},
{
"epoch": 0.3525477339747286,
"grad_norm": 1.5199309587478638,
"learning_rate": 3.708960857433964e-06,
"loss": 0.4632,
"step": 21400
},
{
"epoch": 0.3541951532923675,
"grad_norm": 1.8525145053863525,
"learning_rate": 3.6973876234997004e-06,
"loss": 0.4595,
"step": 21500
},
{
"epoch": 0.3558425726100064,
"grad_norm": 1.7146118879318237,
"learning_rate": 3.6857809939565724e-06,
"loss": 0.4414,
"step": 21600
},
{
"epoch": 0.35748999192764536,
"grad_norm": 2.75750994682312,
"learning_rate": 3.6741412925180153e-06,
"loss": 0.4624,
"step": 21700
},
{
"epoch": 0.35913741124528425,
"grad_norm": 2.6996710300445557,
"learning_rate": 3.6624688438198506e-06,
"loss": 0.4888,
"step": 21800
},
{
"epoch": 0.36078483056292315,
"grad_norm": 1.895980715751648,
"learning_rate": 3.650763973411238e-06,
"loss": 0.4395,
"step": 21900
},
{
"epoch": 0.3624322498805621,
"grad_norm": 2.5552258491516113,
"learning_rate": 3.639027007745585e-06,
"loss": 0.465,
"step": 22000
},
{
"epoch": 0.364079669198201,
"grad_norm": 1.6127821207046509,
"learning_rate": 3.6272582741714547e-06,
"loss": 0.4282,
"step": 22100
},
{
"epoch": 0.36572708851583996,
"grad_norm": 2.0909807682037354,
"learning_rate": 3.615458100923425e-06,
"loss": 0.4713,
"step": 22200
},
{
"epoch": 0.36737450783347886,
"grad_norm": 1.798374056816101,
"learning_rate": 3.603626817112941e-06,
"loss": 0.4784,
"step": 22300
},
{
"epoch": 0.36902192715111776,
"grad_norm": 2.0519778728485107,
"learning_rate": 3.5917647527191328e-06,
"loss": 0.4782,
"step": 22400
},
{
"epoch": 0.3706693464687567,
"grad_norm": 2.137410879135132,
"learning_rate": 3.5798722385796137e-06,
"loss": 0.4599,
"step": 22500
},
{
"epoch": 0.3723167657863956,
"grad_norm": 2.040231943130493,
"learning_rate": 3.5679496063812507e-06,
"loss": 0.434,
"step": 22600
},
{
"epoch": 0.3739641851040345,
"grad_norm": 2.0495615005493164,
"learning_rate": 3.5559971886509163e-06,
"loss": 0.473,
"step": 22700
},
{
"epoch": 0.37561160442167346,
"grad_norm": 2.5767838954925537,
"learning_rate": 3.5440153187462146e-06,
"loss": 0.4522,
"step": 22800
},
{
"epoch": 0.37725902373931236,
"grad_norm": 2.11317777633667,
"learning_rate": 3.5320043308461784e-06,
"loss": 0.4971,
"step": 22900
},
{
"epoch": 0.37890644305695126,
"grad_norm": 2.7997255325317383,
"learning_rate": 3.5199645599419574e-06,
"loss": 0.4562,
"step": 23000
},
{
"epoch": 0.3805538623745902,
"grad_norm": 2.3313941955566406,
"learning_rate": 3.5078963418274666e-06,
"loss": 0.4466,
"step": 23100
},
{
"epoch": 0.3822012816922291,
"grad_norm": 1.4548770189285278,
"learning_rate": 3.4958000130900273e-06,
"loss": 0.4628,
"step": 23200
},
{
"epoch": 0.38384870100986807,
"grad_norm": 1.5566315650939941,
"learning_rate": 3.4836759111009767e-06,
"loss": 0.47,
"step": 23300
},
{
"epoch": 0.38549612032750696,
"grad_norm": 1.3899728059768677,
"learning_rate": 3.4715243740062577e-06,
"loss": 0.46,
"step": 23400
},
{
"epoch": 0.38714353964514586,
"grad_norm": 2.3716745376586914,
"learning_rate": 3.4593457407169896e-06,
"loss": 0.4389,
"step": 23500
},
{
"epoch": 0.3887909589627848,
"grad_norm": 2.0501861572265625,
"learning_rate": 3.4471403509000166e-06,
"loss": 0.4621,
"step": 23600
},
{
"epoch": 0.3904383782804237,
"grad_norm": 2.131397008895874,
"learning_rate": 3.4349085449684306e-06,
"loss": 0.4643,
"step": 23700
},
{
"epoch": 0.3920857975980626,
"grad_norm": 2.515228509902954,
"learning_rate": 3.4226506640720804e-06,
"loss": 0.4691,
"step": 23800
},
{
"epoch": 0.39373321691570157,
"grad_norm": 1.9131451845169067,
"learning_rate": 3.4103670500880564e-06,
"loss": 0.4583,
"step": 23900
},
{
"epoch": 0.39538063623334047,
"grad_norm": 2.1132075786590576,
"learning_rate": 3.3980580456111528e-06,
"loss": 0.4572,
"step": 24000
},
{
"epoch": 0.39702805555097936,
"grad_norm": 2.0267536640167236,
"learning_rate": 3.385723993944317e-06,
"loss": 0.4605,
"step": 24100
},
{
"epoch": 0.3986754748686183,
"grad_norm": 1.9140433073043823,
"learning_rate": 3.3733652390890714e-06,
"loss": 0.4634,
"step": 24200
},
{
"epoch": 0.4003228941862572,
"grad_norm": 1.319580078125,
"learning_rate": 3.3609821257359187e-06,
"loss": 0.4607,
"step": 24300
},
{
"epoch": 0.40197031350389617,
"grad_norm": 2.329153299331665,
"learning_rate": 3.3485749992547312e-06,
"loss": 0.4864,
"step": 24400
},
{
"epoch": 0.40361773282153507,
"grad_norm": 1.709675669670105,
"learning_rate": 3.336144205685117e-06,
"loss": 0.4772,
"step": 24500
},
{
"epoch": 0.40526515213917397,
"grad_norm": 1.869702696800232,
"learning_rate": 3.3236900917267663e-06,
"loss": 0.4691,
"step": 24600
},
{
"epoch": 0.4069125714568129,
"grad_norm": 2.017636775970459,
"learning_rate": 3.311213004729787e-06,
"loss": 0.4568,
"step": 24700
},
{
"epoch": 0.4085599907744518,
"grad_norm": 2.2239317893981934,
"learning_rate": 3.2987132926850123e-06,
"loss": 0.4976,
"step": 24800
},
{
"epoch": 0.4102074100920907,
"grad_norm": 2.3074443340301514,
"learning_rate": 3.286191304214296e-06,
"loss": 0.4669,
"step": 24900
},
{
"epoch": 0.4118548294097297,
"grad_norm": 1.9659165143966675,
"learning_rate": 3.2736473885607932e-06,
"loss": 0.4794,
"step": 25000
},
{
"epoch": 0.41350224872736857,
"grad_norm": 2.3997573852539062,
"learning_rate": 3.2610818955792135e-06,
"loss": 0.4847,
"step": 25100
},
{
"epoch": 0.41514966804500747,
"grad_norm": 2.5638508796691895,
"learning_rate": 3.248495175726068e-06,
"loss": 0.4452,
"step": 25200
},
{
"epoch": 0.4167970873626464,
"grad_norm": 1.7153327465057373,
"learning_rate": 3.235887580049893e-06,
"loss": 0.4598,
"step": 25300
},
{
"epoch": 0.4184445066802853,
"grad_norm": 2.540421485900879,
"learning_rate": 3.223259460181461e-06,
"loss": 0.4573,
"step": 25400
},
{
"epoch": 0.4200919259979243,
"grad_norm": 2.420246124267578,
"learning_rate": 3.2106111683239703e-06,
"loss": 0.4593,
"step": 25500
},
{
"epoch": 0.4217393453155632,
"grad_norm": 2.1598918437957764,
"learning_rate": 3.1979430572432256e-06,
"loss": 0.4343,
"step": 25600
},
{
"epoch": 0.4233867646332021,
"grad_norm": 2.091474771499634,
"learning_rate": 3.185255480257797e-06,
"loss": 0.4423,
"step": 25700
},
{
"epoch": 0.425034183950841,
"grad_norm": 3.1766490936279297,
"learning_rate": 3.1725487912291654e-06,
"loss": 0.4499,
"step": 25800
},
{
"epoch": 0.4266816032684799,
"grad_norm": 1.8975087404251099,
"learning_rate": 3.1598233445518544e-06,
"loss": 0.4833,
"step": 25900
},
{
"epoch": 0.4283290225861188,
"grad_norm": 2.459707498550415,
"learning_rate": 3.1470794951435473e-06,
"loss": 0.4563,
"step": 26000
},
{
"epoch": 0.4299764419037578,
"grad_norm": 1.9212175607681274,
"learning_rate": 3.1343175984351842e-06,
"loss": 0.4451,
"step": 26100
},
{
"epoch": 0.4316238612213967,
"grad_norm": 2.1869616508483887,
"learning_rate": 3.121538010361054e-06,
"loss": 0.4438,
"step": 26200
},
{
"epoch": 0.4332712805390356,
"grad_norm": 2.3515875339508057,
"learning_rate": 3.108741087348862e-06,
"loss": 0.4433,
"step": 26300
},
{
"epoch": 0.43491869985667453,
"grad_norm": 2.7230703830718994,
"learning_rate": 3.095927186309795e-06,
"loss": 0.452,
"step": 26400
},
{
"epoch": 0.4365661191743134,
"grad_norm": 1.987182855606079,
"learning_rate": 3.08309666462856e-06,
"loss": 0.4508,
"step": 26500
},
{
"epoch": 0.4382135384919524,
"grad_norm": 1.8598235845565796,
"learning_rate": 3.0702498801534234e-06,
"loss": 0.4502,
"step": 26600
},
{
"epoch": 0.4398609578095913,
"grad_norm": 1.3509740829467773,
"learning_rate": 3.0573871911862252e-06,
"loss": 0.4618,
"step": 26700
},
{
"epoch": 0.4415083771272302,
"grad_norm": 2.3464887142181396,
"learning_rate": 3.044508956472388e-06,
"loss": 0.4687,
"step": 26800
},
{
"epoch": 0.44315579644486913,
"grad_norm": 2.453792095184326,
"learning_rate": 3.0316155351909136e-06,
"loss": 0.4581,
"step": 26900
},
{
"epoch": 0.44480321576250803,
"grad_norm": 1.8684953451156616,
"learning_rate": 3.0187072869443595e-06,
"loss": 0.4775,
"step": 27000
},
{
"epoch": 0.44645063508014693,
"grad_norm": 2.501569986343384,
"learning_rate": 3.005784571748816e-06,
"loss": 0.4721,
"step": 27100
},
{
"epoch": 0.4480980543977859,
"grad_norm": 2.526435613632202,
"learning_rate": 2.992847750023861e-06,
"loss": 0.4327,
"step": 27200
},
{
"epoch": 0.4497454737154248,
"grad_norm": 2.1223368644714355,
"learning_rate": 2.9798971825825107e-06,
"loss": 0.4494,
"step": 27300
},
{
"epoch": 0.4513928930330637,
"grad_norm": 3.0751936435699463,
"learning_rate": 2.9669332306211513e-06,
"loss": 0.4513,
"step": 27400
},
{
"epoch": 0.45304031235070263,
"grad_norm": 1.7349650859832764,
"learning_rate": 2.95395625570947e-06,
"loss": 0.4516,
"step": 27500
},
{
"epoch": 0.45468773166834153,
"grad_norm": 1.474882960319519,
"learning_rate": 2.9409666197803715e-06,
"loss": 0.4269,
"step": 27600
},
{
"epoch": 0.4563351509859805,
"grad_norm": 1.845004916191101,
"learning_rate": 2.9279646851198796e-06,
"loss": 0.4598,
"step": 27700
},
{
"epoch": 0.4579825703036194,
"grad_norm": 1.4891762733459473,
"learning_rate": 2.9149508143570317e-06,
"loss": 0.4383,
"step": 27800
},
{
"epoch": 0.4596299896212583,
"grad_norm": 2.5375092029571533,
"learning_rate": 2.9019253704537725e-06,
"loss": 0.4903,
"step": 27900
},
{
"epoch": 0.46127740893889724,
"grad_norm": 2.7068655490875244,
"learning_rate": 2.888888716694824e-06,
"loss": 0.4673,
"step": 28000
},
{
"epoch": 0.46292482825653614,
"grad_norm": 1.9553802013397217,
"learning_rate": 2.8758412166775536e-06,
"loss": 0.4722,
"step": 28100
},
{
"epoch": 0.46457224757417503,
"grad_norm": 2.417858362197876,
"learning_rate": 2.8627832343018392e-06,
"loss": 0.4778,
"step": 28200
},
{
"epoch": 0.466219666891814,
"grad_norm": 2.021970748901367,
"learning_rate": 2.849715133759912e-06,
"loss": 0.438,
"step": 28300
},
{
"epoch": 0.4678670862094529,
"grad_norm": 1.203245997428894,
"learning_rate": 2.8366372795262043e-06,
"loss": 0.448,
"step": 28400
},
{
"epoch": 0.4695145055270918,
"grad_norm": 1.60651433467865,
"learning_rate": 2.8235500363471835e-06,
"loss": 0.4667,
"step": 28500
},
{
"epoch": 0.47116192484473074,
"grad_norm": 2.5438413619995117,
"learning_rate": 2.8104537692311772e-06,
"loss": 0.4411,
"step": 28600
},
{
"epoch": 0.47280934416236964,
"grad_norm": 1.9837552309036255,
"learning_rate": 2.7973488434381936e-06,
"loss": 0.4772,
"step": 28700
},
{
"epoch": 0.4744567634800086,
"grad_norm": 4.9808573722839355,
"learning_rate": 2.7842356244697365e-06,
"loss": 0.4585,
"step": 28800
},
{
"epoch": 0.4761041827976475,
"grad_norm": 2.3967010974884033,
"learning_rate": 2.771114478058609e-06,
"loss": 0.4434,
"step": 28900
},
{
"epoch": 0.4777516021152864,
"grad_norm": 2.0720436573028564,
"learning_rate": 2.757985770158712e-06,
"loss": 0.4553,
"step": 29000
},
{
"epoch": 0.47939902143292534,
"grad_norm": 2.0397377014160156,
"learning_rate": 2.744849866934843e-06,
"loss": 0.4335,
"step": 29100
},
{
"epoch": 0.48104644075056424,
"grad_norm": 1.8307183980941772,
"learning_rate": 2.7317071347524756e-06,
"loss": 0.4575,
"step": 29200
},
{
"epoch": 0.48269386006820314,
"grad_norm": 2.0401103496551514,
"learning_rate": 2.7185579401675478e-06,
"loss": 0.4536,
"step": 29300
},
{
"epoch": 0.4843412793858421,
"grad_norm": 1.5589044094085693,
"learning_rate": 2.705402649916238e-06,
"loss": 0.4464,
"step": 29400
},
{
"epoch": 0.485988698703481,
"grad_norm": 1.7465211153030396,
"learning_rate": 2.692241630904732e-06,
"loss": 0.443,
"step": 29500
},
{
"epoch": 0.4876361180211199,
"grad_norm": 1.9152140617370605,
"learning_rate": 2.679075250198995e-06,
"loss": 0.4453,
"step": 29600
},
{
"epoch": 0.48928353733875884,
"grad_norm": 1.9584287405014038,
"learning_rate": 2.665903875014531e-06,
"loss": 0.4412,
"step": 29700
},
{
"epoch": 0.49093095665639774,
"grad_norm": 2.4530208110809326,
"learning_rate": 2.6527278727061438e-06,
"loss": 0.455,
"step": 29800
},
{
"epoch": 0.4925783759740367,
"grad_norm": 2.28879451751709,
"learning_rate": 2.6395476107576866e-06,
"loss": 0.4545,
"step": 29900
},
{
"epoch": 0.4942257952916756,
"grad_norm": 2.3238701820373535,
"learning_rate": 2.626363456771818e-06,
"loss": 0.4659,
"step": 30000
},
{
"epoch": 0.4958732146093145,
"grad_norm": 2.5362935066223145,
"learning_rate": 2.613175778459746e-06,
"loss": 0.475,
"step": 30100
},
{
"epoch": 0.49752063392695345,
"grad_norm": 1.6304713487625122,
"learning_rate": 2.599984943630974e-06,
"loss": 0.4344,
"step": 30200
},
{
"epoch": 0.49916805324459235,
"grad_norm": 2.1046688556671143,
"learning_rate": 2.5867913201830415e-06,
"loss": 0.442,
"step": 30300
},
{
"epoch": 0.5008154725622312,
"grad_norm": 2.016679048538208,
"learning_rate": 2.5735952760912623e-06,
"loss": 0.4468,
"step": 30400
},
{
"epoch": 0.5024628918798701,
"grad_norm": 1.700775384902954,
"learning_rate": 2.560397179398467e-06,
"loss": 0.4755,
"step": 30500
},
{
"epoch": 0.5041103111975092,
"grad_norm": 2.6758084297180176,
"learning_rate": 2.5471973982047283e-06,
"loss": 0.4734,
"step": 30600
},
{
"epoch": 0.505757730515148,
"grad_norm": 2.0318357944488525,
"learning_rate": 2.533996300657105e-06,
"loss": 0.4257,
"step": 30700
},
{
"epoch": 0.507405149832787,
"grad_norm": 1.755279779434204,
"learning_rate": 2.5207942549393678e-06,
"loss": 0.4311,
"step": 30800
},
{
"epoch": 0.5090525691504258,
"grad_norm": 1.3220248222351074,
"learning_rate": 2.507591629261732e-06,
"loss": 0.4586,
"step": 30900
},
{
"epoch": 0.5106999884680647,
"grad_norm": 1.8418200016021729,
"learning_rate": 2.4943887918505887e-06,
"loss": 0.4856,
"step": 31000
},
{
"epoch": 0.5123474077857036,
"grad_norm": 2.0014216899871826,
"learning_rate": 2.4811861109382337e-06,
"loss": 0.4691,
"step": 31100
},
{
"epoch": 0.5139948271033427,
"grad_norm": 2.2227587699890137,
"learning_rate": 2.4679839547526e-06,
"loss": 0.4465,
"step": 31200
},
{
"epoch": 0.5156422464209816,
"grad_norm": 2.022191047668457,
"learning_rate": 2.4547826915069816e-06,
"loss": 0.4344,
"step": 31300
},
{
"epoch": 0.5172896657386205,
"grad_norm": 1.4360835552215576,
"learning_rate": 2.441582689389772e-06,
"loss": 0.446,
"step": 31400
},
{
"epoch": 0.5189370850562594,
"grad_norm": 2.100766658782959,
"learning_rate": 2.4283843165541914e-06,
"loss": 0.4457,
"step": 31500
},
{
"epoch": 0.5205845043738982,
"grad_norm": 1.6528244018554688,
"learning_rate": 2.4151879411080144e-06,
"loss": 0.4477,
"step": 31600
},
{
"epoch": 0.5222319236915373,
"grad_norm": 2.4091269969940186,
"learning_rate": 2.401993931103312e-06,
"loss": 0.4764,
"step": 31700
},
{
"epoch": 0.5238793430091762,
"grad_norm": 2.416269302368164,
"learning_rate": 2.388802654526182e-06,
"loss": 0.4572,
"step": 31800
},
{
"epoch": 0.5255267623268151,
"grad_norm": 1.747132420539856,
"learning_rate": 2.3756144792864812e-06,
"loss": 0.4439,
"step": 31900
},
{
"epoch": 0.527174181644454,
"grad_norm": 1.7760906219482422,
"learning_rate": 2.3624297732075747e-06,
"loss": 0.4589,
"step": 32000
},
{
"epoch": 0.5288216009620929,
"grad_norm": 1.9603146314620972,
"learning_rate": 2.349248904016069e-06,
"loss": 0.4464,
"step": 32100
},
{
"epoch": 0.5304690202797318,
"grad_norm": 2.7575228214263916,
"learning_rate": 2.336072239331555e-06,
"loss": 0.425,
"step": 32200
},
{
"epoch": 0.5321164395973708,
"grad_norm": 3.160569190979004,
"learning_rate": 2.3229001466563647e-06,
"loss": 0.4493,
"step": 32300
},
{
"epoch": 0.5337638589150097,
"grad_norm": 1.3065659999847412,
"learning_rate": 2.3097329933653116e-06,
"loss": 0.4134,
"step": 32400
},
{
"epoch": 0.5354112782326486,
"grad_norm": 1.933773159980774,
"learning_rate": 2.2965711466954444e-06,
"loss": 0.4465,
"step": 32500
},
{
"epoch": 0.5370586975502875,
"grad_norm": 1.7939263582229614,
"learning_rate": 2.283414973735816e-06,
"loss": 0.4577,
"step": 32600
},
{
"epoch": 0.5387061168679264,
"grad_norm": 2.202970027923584,
"learning_rate": 2.270264841417229e-06,
"loss": 0.4506,
"step": 32700
},
{
"epoch": 0.5403535361855654,
"grad_norm": 1.2232089042663574,
"learning_rate": 2.2571211165020164e-06,
"loss": 0.4412,
"step": 32800
},
{
"epoch": 0.5420009555032043,
"grad_norm": 2.2651045322418213,
"learning_rate": 2.243984165573804e-06,
"loss": 0.4838,
"step": 32900
},
{
"epoch": 0.5436483748208432,
"grad_norm": 1.1817712783813477,
"learning_rate": 2.2308543550272853e-06,
"loss": 0.4426,
"step": 33000
},
{
"epoch": 0.5452957941384821,
"grad_norm": 3.3513026237487793,
"learning_rate": 2.2177320510580115e-06,
"loss": 0.4432,
"step": 33100
},
{
"epoch": 0.546943213456121,
"grad_norm": 2.345806837081909,
"learning_rate": 2.2046176196521706e-06,
"loss": 0.4591,
"step": 33200
},
{
"epoch": 0.5485906327737599,
"grad_norm": 2.1807124614715576,
"learning_rate": 2.191511426576377e-06,
"loss": 0.4589,
"step": 33300
},
{
"epoch": 0.5502380520913989,
"grad_norm": 2.6100516319274902,
"learning_rate": 2.1784138373674817e-06,
"loss": 0.4644,
"step": 33400
},
{
"epoch": 0.5518854714090378,
"grad_norm": 1.3514959812164307,
"learning_rate": 2.165325217322367e-06,
"loss": 0.4123,
"step": 33500
},
{
"epoch": 0.5535328907266767,
"grad_norm": 2.2316343784332275,
"learning_rate": 2.1522459314877603e-06,
"loss": 0.4329,
"step": 33600
},
{
"epoch": 0.5551803100443156,
"grad_norm": 1.948644757270813,
"learning_rate": 2.1391763446500583e-06,
"loss": 0.4485,
"step": 33700
},
{
"epoch": 0.5568277293619545,
"grad_norm": 2.1561203002929688,
"learning_rate": 2.1261168213251465e-06,
"loss": 0.4557,
"step": 33800
},
{
"epoch": 0.5584751486795935,
"grad_norm": 2.097280263900757,
"learning_rate": 2.1130677257482328e-06,
"loss": 0.4535,
"step": 33900
},
{
"epoch": 0.5601225679972324,
"grad_norm": 2.417245388031006,
"learning_rate": 2.1000294218636963e-06,
"loss": 0.4758,
"step": 34000
},
{
"epoch": 0.5617699873148713,
"grad_norm": 1.9167017936706543,
"learning_rate": 2.0870022733149287e-06,
"loss": 0.4742,
"step": 34100
},
{
"epoch": 0.5634174066325102,
"grad_norm": 1.482334017753601,
"learning_rate": 2.073986643434193e-06,
"loss": 0.4287,
"step": 34200
},
{
"epoch": 0.5650648259501491,
"grad_norm": 1.6773154735565186,
"learning_rate": 2.0609828952324954e-06,
"loss": 0.4211,
"step": 34300
},
{
"epoch": 0.566712245267788,
"grad_norm": 1.883154273033142,
"learning_rate": 2.047991391389458e-06,
"loss": 0.453,
"step": 34400
},
{
"epoch": 0.568359664585427,
"grad_norm": 2.0675201416015625,
"learning_rate": 2.035012494243198e-06,
"loss": 0.4762,
"step": 34500
},
{
"epoch": 0.5700070839030659,
"grad_norm": 2.362501382827759,
"learning_rate": 2.0220465657802322e-06,
"loss": 0.4566,
"step": 34600
},
{
"epoch": 0.5716545032207048,
"grad_norm": 1.8373854160308838,
"learning_rate": 2.0090939676253744e-06,
"loss": 0.442,
"step": 34700
},
{
"epoch": 0.5733019225383437,
"grad_norm": 1.8830519914627075,
"learning_rate": 1.9961550610316477e-06,
"loss": 0.4521,
"step": 34800
},
{
"epoch": 0.5749493418559826,
"grad_norm": 1.484971523284912,
"learning_rate": 1.9832302068702162e-06,
"loss": 0.4795,
"step": 34900
},
{
"epoch": 0.5765967611736216,
"grad_norm": 1.619246482849121,
"learning_rate": 1.9703197656203153e-06,
"loss": 0.4525,
"step": 35000
},
{
"epoch": 0.5782441804912605,
"grad_norm": 1.589003562927246,
"learning_rate": 1.9574240973591955e-06,
"loss": 0.4346,
"step": 35100
},
{
"epoch": 0.5798915998088994,
"grad_norm": 1.2750858068466187,
"learning_rate": 1.944543561752088e-06,
"loss": 0.4595,
"step": 35200
},
{
"epoch": 0.5815390191265383,
"grad_norm": 2.5024302005767822,
"learning_rate": 1.931678518042165e-06,
"loss": 0.4469,
"step": 35300
},
{
"epoch": 0.5831864384441772,
"grad_norm": 2.244246244430542,
"learning_rate": 1.918829325040523e-06,
"loss": 0.4475,
"step": 35400
},
{
"epoch": 0.5848338577618161,
"grad_norm": 1.7237255573272705,
"learning_rate": 1.9059963411161788e-06,
"loss": 0.4578,
"step": 35500
},
{
"epoch": 0.5864812770794551,
"grad_norm": 1.9429930448532104,
"learning_rate": 1.8931799241860704e-06,
"loss": 0.4776,
"step": 35600
},
{
"epoch": 0.588128696397094,
"grad_norm": 2.0698490142822266,
"learning_rate": 1.880380431705075e-06,
"loss": 0.4422,
"step": 35700
},
{
"epoch": 0.5897761157147329,
"grad_norm": 1.440127968788147,
"learning_rate": 1.8675982206560417e-06,
"loss": 0.4528,
"step": 35800
},
{
"epoch": 0.5914235350323718,
"grad_norm": 2.600696563720703,
"learning_rate": 1.854833647539833e-06,
"loss": 0.4167,
"step": 35900
},
{
"epoch": 0.5930709543500107,
"grad_norm": 2.2462635040283203,
"learning_rate": 1.8420870683653819e-06,
"loss": 0.4461,
"step": 36000
},
{
"epoch": 0.5947183736676497,
"grad_norm": 2.301934003829956,
"learning_rate": 1.8293588386397646e-06,
"loss": 0.4609,
"step": 36100
},
{
"epoch": 0.5963657929852886,
"grad_norm": 1.231947422027588,
"learning_rate": 1.816649313358284e-06,
"loss": 0.4617,
"step": 36200
},
{
"epoch": 0.5980132123029275,
"grad_norm": 1.6088837385177612,
"learning_rate": 1.8039588469945675e-06,
"loss": 0.4298,
"step": 36300
},
{
"epoch": 0.5996606316205664,
"grad_norm": 2.1999731063842773,
"learning_rate": 1.791287793490682e-06,
"loss": 0.4576,
"step": 36400
},
{
"epoch": 0.6013080509382053,
"grad_norm": 1.9624534845352173,
"learning_rate": 1.7786365062472645e-06,
"loss": 0.4416,
"step": 36500
},
{
"epoch": 0.6029554702558442,
"grad_norm": 2.441080093383789,
"learning_rate": 1.7660053381136593e-06,
"loss": 0.4613,
"step": 36600
},
{
"epoch": 0.6046028895734832,
"grad_norm": 1.7500004768371582,
"learning_rate": 1.7533946413780845e-06,
"loss": 0.4493,
"step": 36700
},
{
"epoch": 0.6062503088911221,
"grad_norm": 1.9511518478393555,
"learning_rate": 1.7408047677578016e-06,
"loss": 0.4487,
"step": 36800
},
{
"epoch": 0.607897728208761,
"grad_norm": 2.2485551834106445,
"learning_rate": 1.7282360683893057e-06,
"loss": 0.4515,
"step": 36900
},
{
"epoch": 0.6095451475263999,
"grad_norm": 2.1224875450134277,
"learning_rate": 1.7156888938185373e-06,
"loss": 0.4384,
"step": 37000
},
{
"epoch": 0.6111925668440388,
"grad_norm": 2.6325182914733887,
"learning_rate": 1.7031635939910968e-06,
"loss": 0.4625,
"step": 37100
},
{
"epoch": 0.6128399861616778,
"grad_norm": 1.8848086595535278,
"learning_rate": 1.6906605182424942e-06,
"loss": 0.4627,
"step": 37200
},
{
"epoch": 0.6144874054793167,
"grad_norm": 1.8694807291030884,
"learning_rate": 1.6781800152884004e-06,
"loss": 0.4572,
"step": 37300
},
{
"epoch": 0.6161348247969556,
"grad_norm": 1.9170241355895996,
"learning_rate": 1.6657224332149185e-06,
"loss": 0.4646,
"step": 37400
},
{
"epoch": 0.6177822441145945,
"grad_norm": 2.1769967079162598,
"learning_rate": 1.6532881194688843e-06,
"loss": 0.4584,
"step": 37500
},
{
"epoch": 0.6194296634322334,
"grad_norm": 2.1281752586364746,
"learning_rate": 1.640877420848169e-06,
"loss": 0.4588,
"step": 37600
},
{
"epoch": 0.6210770827498723,
"grad_norm": 3.2545199394226074,
"learning_rate": 1.6284906834920056e-06,
"loss": 0.4494,
"step": 37700
},
{
"epoch": 0.6227245020675113,
"grad_norm": 2.595705032348633,
"learning_rate": 1.6161282528713429e-06,
"loss": 0.4702,
"step": 37800
},
{
"epoch": 0.6243719213851502,
"grad_norm": 2.0563864707946777,
"learning_rate": 1.6037904737792037e-06,
"loss": 0.4374,
"step": 37900
},
{
"epoch": 0.6260193407027891,
"grad_norm": 2.5470025539398193,
"learning_rate": 1.5914776903210675e-06,
"loss": 0.4467,
"step": 38000
},
{
"epoch": 0.627666760020428,
"grad_norm": 2.6239607334136963,
"learning_rate": 1.5791902459052793e-06,
"loss": 0.4156,
"step": 38100
},
{
"epoch": 0.6293141793380669,
"grad_norm": 1.202338457107544,
"learning_rate": 1.5669284832334671e-06,
"loss": 0.4163,
"step": 38200
},
{
"epoch": 0.6309615986557059,
"grad_norm": 2.398700714111328,
"learning_rate": 1.554692744290984e-06,
"loss": 0.4515,
"step": 38300
},
{
"epoch": 0.6326090179733448,
"grad_norm": 2.2210938930511475,
"learning_rate": 1.542483370337372e-06,
"loss": 0.4704,
"step": 38400
},
{
"epoch": 0.6342564372909837,
"grad_norm": 1.1223909854888916,
"learning_rate": 1.530300701896844e-06,
"loss": 0.4231,
"step": 38500
},
{
"epoch": 0.6359038566086226,
"grad_norm": 2.2360265254974365,
"learning_rate": 1.5181450787487839e-06,
"loss": 0.4339,
"step": 38600
},
{
"epoch": 0.6375512759262615,
"grad_norm": 1.6431453227996826,
"learning_rate": 1.5060168399182731e-06,
"loss": 0.4341,
"step": 38700
},
{
"epoch": 0.6391986952439004,
"grad_norm": 1.9951646327972412,
"learning_rate": 1.4939163236666338e-06,
"loss": 0.4744,
"step": 38800
},
{
"epoch": 0.6408461145615394,
"grad_norm": 3.3914270401000977,
"learning_rate": 1.4818438674819934e-06,
"loss": 0.4595,
"step": 38900
},
{
"epoch": 0.6424935338791783,
"grad_norm": 2.1617212295532227,
"learning_rate": 1.4697998080698745e-06,
"loss": 0.4465,
"step": 39000
},
{
"epoch": 0.6441409531968172,
"grad_norm": 2.4593045711517334,
"learning_rate": 1.4577844813438022e-06,
"loss": 0.4695,
"step": 39100
},
{
"epoch": 0.6457883725144561,
"grad_norm": 2.2030935287475586,
"learning_rate": 1.4457982224159346e-06,
"loss": 0.4449,
"step": 39200
},
{
"epoch": 0.647435791832095,
"grad_norm": 1.3730400800704956,
"learning_rate": 1.433841365587719e-06,
"loss": 0.4382,
"step": 39300
},
{
"epoch": 0.649083211149734,
"grad_norm": 3.4730331897735596,
"learning_rate": 1.421914244340567e-06,
"loss": 0.4469,
"step": 39400
},
{
"epoch": 0.6507306304673729,
"grad_norm": 1.946877360343933,
"learning_rate": 1.410017191326551e-06,
"loss": 0.4685,
"step": 39500
},
{
"epoch": 0.6523780497850118,
"grad_norm": 1.6987239122390747,
"learning_rate": 1.39815053835913e-06,
"loss": 0.4469,
"step": 39600
},
{
"epoch": 0.6540254691026507,
"grad_norm": 1.93442964553833,
"learning_rate": 1.3863146164038946e-06,
"loss": 0.4523,
"step": 39700
},
{
"epoch": 0.6556728884202896,
"grad_norm": 2.016063690185547,
"learning_rate": 1.3745097555693343e-06,
"loss": 0.4079,
"step": 39800
},
{
"epoch": 0.6573203077379285,
"grad_norm": 1.9582340717315674,
"learning_rate": 1.3627362850976323e-06,
"loss": 0.4524,
"step": 39900
},
{
"epoch": 0.6589677270555675,
"grad_norm": 1.6741374731063843,
"learning_rate": 1.3509945333554828e-06,
"loss": 0.4346,
"step": 40000
},
{
"epoch": 0.6606151463732064,
"grad_norm": 2.514186382293701,
"learning_rate": 1.3392848278249298e-06,
"loss": 0.4761,
"step": 40100
},
{
"epoch": 0.6622625656908453,
"grad_norm": 2.4352760314941406,
"learning_rate": 1.3276074950942381e-06,
"loss": 0.4182,
"step": 40200
},
{
"epoch": 0.6639099850084842,
"grad_norm": 1.9086421728134155,
"learning_rate": 1.3159628608487848e-06,
"loss": 0.4431,
"step": 40300
},
{
"epoch": 0.6655574043261231,
"grad_norm": 1.9062386751174927,
"learning_rate": 1.3043512498619677e-06,
"loss": 0.4494,
"step": 40400
},
{
"epoch": 0.6672048236437621,
"grad_norm": 2.4138245582580566,
"learning_rate": 1.2927729859861571e-06,
"loss": 0.4493,
"step": 40500
},
{
"epoch": 0.668852242961401,
"grad_norm": 2.2896976470947266,
"learning_rate": 1.2812283921436597e-06,
"loss": 0.4383,
"step": 40600
},
{
"epoch": 0.6704996622790399,
"grad_norm": 2.136972427368164,
"learning_rate": 1.2697177903177077e-06,
"loss": 0.4233,
"step": 40700
},
{
"epoch": 0.6721470815966788,
"grad_norm": 1.7220128774642944,
"learning_rate": 1.2582415015434857e-06,
"loss": 0.4331,
"step": 40800
},
{
"epoch": 0.6737945009143177,
"grad_norm": 2.0941953659057617,
"learning_rate": 1.2467998458991768e-06,
"loss": 0.482,
"step": 40900
},
{
"epoch": 0.6754419202319566,
"grad_norm": 2.6354613304138184,
"learning_rate": 1.2353931424970258e-06,
"loss": 0.4487,
"step": 41000
},
{
"epoch": 0.6770893395495956,
"grad_norm": 2.2864413261413574,
"learning_rate": 1.224021709474451e-06,
"loss": 0.4668,
"step": 41100
},
{
"epoch": 0.6787367588672345,
"grad_norm": 1.8881123065948486,
"learning_rate": 1.2126858639851649e-06,
"loss": 0.4572,
"step": 41200
},
{
"epoch": 0.6803841781848734,
"grad_norm": 2.1788628101348877,
"learning_rate": 1.2013859221903273e-06,
"loss": 0.4589,
"step": 41300
},
{
"epoch": 0.6820315975025123,
"grad_norm": 2.4340453147888184,
"learning_rate": 1.190122199249733e-06,
"loss": 0.4363,
"step": 41400
},
{
"epoch": 0.6836790168201512,
"grad_norm": 2.3238346576690674,
"learning_rate": 1.1788950093130177e-06,
"loss": 0.4187,
"step": 41500
},
{
"epoch": 0.6853264361377902,
"grad_norm": 2.4663116931915283,
"learning_rate": 1.1677046655108974e-06,
"loss": 0.4542,
"step": 41600
},
{
"epoch": 0.6869738554554291,
"grad_norm": 1.5595173835754395,
"learning_rate": 1.1565514799464354e-06,
"loss": 0.4612,
"step": 41700
},
{
"epoch": 0.688621274773068,
"grad_norm": 2.0184364318847656,
"learning_rate": 1.145435763686335e-06,
"loss": 0.4535,
"step": 41800
},
{
"epoch": 0.6902686940907069,
"grad_norm": 3.0829389095306396,
"learning_rate": 1.134357826752269e-06,
"loss": 0.4307,
"step": 41900
},
{
"epoch": 0.6919161134083458,
"grad_norm": 2.8656702041625977,
"learning_rate": 1.1233179781122286e-06,
"loss": 0.4511,
"step": 42000
},
{
"epoch": 0.6935635327259847,
"grad_norm": 2.2438855171203613,
"learning_rate": 1.1123165256719077e-06,
"loss": 0.4358,
"step": 42100
},
{
"epoch": 0.6952109520436237,
"grad_norm": 2.6837387084960938,
"learning_rate": 1.1013537762661147e-06,
"loss": 0.4702,
"step": 42200
},
{
"epoch": 0.6968583713612626,
"grad_norm": 2.0240025520324707,
"learning_rate": 1.0904300356502174e-06,
"loss": 0.4211,
"step": 42300
},
{
"epoch": 0.6985057906789015,
"grad_norm": 2.1769285202026367,
"learning_rate": 1.0795456084916095e-06,
"loss": 0.4635,
"step": 42400
},
{
"epoch": 0.7001532099965404,
"grad_norm": 1.203687310218811,
"learning_rate": 1.0687007983612189e-06,
"loss": 0.4241,
"step": 42500
},
{
"epoch": 0.7018006293141793,
"grad_norm": 2.5927300453186035,
"learning_rate": 1.0578959077250417e-06,
"loss": 0.4603,
"step": 42600
},
{
"epoch": 0.7034480486318182,
"grad_norm": 1.3485939502716064,
"learning_rate": 1.0471312379356991e-06,
"loss": 0.4563,
"step": 42700
},
{
"epoch": 0.7050954679494572,
"grad_norm": 1.8091089725494385,
"learning_rate": 1.03640708922404e-06,
"loss": 0.4303,
"step": 42800
},
{
"epoch": 0.7067428872670961,
"grad_norm": 2.243220090866089,
"learning_rate": 1.0257237606907647e-06,
"loss": 0.4484,
"step": 42900
},
{
"epoch": 0.708390306584735,
"grad_norm": 1.7703299522399902,
"learning_rate": 1.0150815502980804e-06,
"loss": 0.4459,
"step": 43000
},
{
"epoch": 0.7100377259023739,
"grad_norm": 1.7074419260025024,
"learning_rate": 1.0044807548613947e-06,
"loss": 0.3932,
"step": 43100
},
{
"epoch": 0.7116851452200128,
"grad_norm": 2.930617332458496,
"learning_rate": 9.939216700410387e-07,
"loss": 0.4411,
"step": 43200
},
{
"epoch": 0.7133325645376518,
"grad_norm": 1.8758985996246338,
"learning_rate": 9.834045903340127e-07,
"loss": 0.434,
"step": 43300
},
{
"epoch": 0.7149799838552907,
"grad_norm": 2.038867712020874,
"learning_rate": 9.729298090657821e-07,
"loss": 0.4666,
"step": 43400
},
{
"epoch": 0.7166274031729296,
"grad_norm": 2.4463798999786377,
"learning_rate": 9.624976183820914e-07,
"loss": 0.4492,
"step": 43500
},
{
"epoch": 0.7182748224905685,
"grad_norm": 0.9264168739318848,
"learning_rate": 9.521083092408148e-07,
"loss": 0.4308,
"step": 43600
},
{
"epoch": 0.7199222418082074,
"grad_norm": 1.8402535915374756,
"learning_rate": 9.417621714038455e-07,
"loss": 0.4375,
"step": 43700
},
{
"epoch": 0.7215696611258463,
"grad_norm": 2.28937029838562,
"learning_rate": 9.314594934290147e-07,
"loss": 0.4451,
"step": 43800
},
{
"epoch": 0.7232170804434853,
"grad_norm": 2.710644245147705,
"learning_rate": 9.212005626620354e-07,
"loss": 0.4923,
"step": 43900
},
{
"epoch": 0.7248644997611242,
"grad_norm": 1.6825114488601685,
"learning_rate": 9.109856652284979e-07,
"loss": 0.4281,
"step": 44000
},
{
"epoch": 0.7265119190787631,
"grad_norm": 1.5312185287475586,
"learning_rate": 9.008150860258852e-07,
"loss": 0.4252,
"step": 44100
},
{
"epoch": 0.728159338396402,
"grad_norm": 1.606581449508667,
"learning_rate": 8.90689108715625e-07,
"loss": 0.4449,
"step": 44200
},
{
"epoch": 0.7298067577140409,
"grad_norm": 2.8217248916625977,
"learning_rate": 8.806080157151828e-07,
"loss": 0.4399,
"step": 44300
},
{
"epoch": 0.7314541770316799,
"grad_norm": 2.25714373588562,
"learning_rate": 8.705720881901855e-07,
"loss": 0.435,
"step": 44400
},
{
"epoch": 0.7331015963493188,
"grad_norm": 2.2999300956726074,
"learning_rate": 8.605816060465725e-07,
"loss": 0.4481,
"step": 44500
},
{
"epoch": 0.7347490156669577,
"grad_norm": 2.1442625522613525,
"learning_rate": 8.506368479227958e-07,
"loss": 0.4396,
"step": 44600
},
{
"epoch": 0.7363964349845966,
"grad_norm": 2.097804307937622,
"learning_rate": 8.407380911820487e-07,
"loss": 0.4486,
"step": 44700
},
{
"epoch": 0.7380438543022355,
"grad_norm": 2.046945333480835,
"learning_rate": 8.308856119045239e-07,
"loss": 0.4639,
"step": 44800
},
{
"epoch": 0.7396912736198744,
"grad_norm": 1.8260259628295898,
"learning_rate": 8.210796848797193e-07,
"loss": 0.4433,
"step": 44900
},
{
"epoch": 0.7413386929375134,
"grad_norm": 2.123908281326294,
"learning_rate": 8.113205835987756e-07,
"loss": 0.4183,
"step": 45000
},
{
"epoch": 0.7429861122551523,
"grad_norm": 2.8095531463623047,
"learning_rate": 8.016085802468399e-07,
"loss": 0.4357,
"step": 45100
},
{
"epoch": 0.7446335315727912,
"grad_norm": 3.761507511138916,
"learning_rate": 7.919439456954822e-07,
"loss": 0.4282,
"step": 45200
},
{
"epoch": 0.7462809508904301,
"grad_norm": 1.9820051193237305,
"learning_rate": 7.823269494951394e-07,
"loss": 0.4714,
"step": 45300
},
{
"epoch": 0.747928370208069,
"grad_norm": 1.8739370107650757,
"learning_rate": 7.727578598675917e-07,
"loss": 0.4312,
"step": 45400
},
{
"epoch": 0.749575789525708,
"grad_norm": 2.4350790977478027,
"learning_rate": 7.632369436984921e-07,
"loss": 0.4308,
"step": 45500
},
{
"epoch": 0.7512232088433469,
"grad_norm": 2.3461410999298096,
"learning_rate": 7.53764466529914e-07,
"loss": 0.4495,
"step": 45600
},
{
"epoch": 0.7528706281609858,
"grad_norm": 2.332594633102417,
"learning_rate": 7.443406925529467e-07,
"loss": 0.4271,
"step": 45700
},
{
"epoch": 0.7545180474786247,
"grad_norm": 2.7010247707366943,
"learning_rate": 7.349658846003318e-07,
"loss": 0.4581,
"step": 45800
},
{
"epoch": 0.7561654667962636,
"grad_norm": 2.0763182640075684,
"learning_rate": 7.256403041391258e-07,
"loss": 0.4599,
"step": 45900
},
{
"epoch": 0.7578128861139025,
"grad_norm": 1.678594708442688,
"learning_rate": 7.163642112634134e-07,
"loss": 0.4614,
"step": 46000
},
{
"epoch": 0.7594603054315415,
"grad_norm": 1.6114099025726318,
"learning_rate": 7.071378646870525e-07,
"loss": 0.4352,
"step": 46100
},
{
"epoch": 0.7611077247491804,
"grad_norm": 2.531679391860962,
"learning_rate": 6.979615217364539e-07,
"loss": 0.452,
"step": 46200
},
{
"epoch": 0.7627551440668193,
"grad_norm": 1.2857202291488647,
"learning_rate": 6.888354383434098e-07,
"loss": 0.4425,
"step": 46300
},
{
"epoch": 0.7644025633844582,
"grad_norm": 1.769644021987915,
"learning_rate": 6.797598690379542e-07,
"loss": 0.4325,
"step": 46400
},
{
"epoch": 0.7660499827020971,
"grad_norm": 1.5384021997451782,
"learning_rate": 6.707350669412613e-07,
"loss": 0.4739,
"step": 46500
},
{
"epoch": 0.7676974020197361,
"grad_norm": 2.200972318649292,
"learning_rate": 6.617612837585887e-07,
"loss": 0.4702,
"step": 46600
},
{
"epoch": 0.769344821337375,
"grad_norm": 2.062885046005249,
"learning_rate": 6.528387697722599e-07,
"loss": 0.4703,
"step": 46700
},
{
"epoch": 0.7709922406550139,
"grad_norm": 1.4489109516143799,
"learning_rate": 6.439677738346752e-07,
"loss": 0.4403,
"step": 46800
},
{
"epoch": 0.7726396599726528,
"grad_norm": 3.070599317550659,
"learning_rate": 6.351485433613799e-07,
"loss": 0.4353,
"step": 46900
},
{
"epoch": 0.7742870792902917,
"grad_norm": 2.201493978500366,
"learning_rate": 6.263813243241593e-07,
"loss": 0.4201,
"step": 47000
},
{
"epoch": 0.7759344986079306,
"grad_norm": 2.203810930252075,
"learning_rate": 6.176663612441785e-07,
"loss": 0.4681,
"step": 47100
},
{
"epoch": 0.7775819179255696,
"grad_norm": 2.4481027126312256,
"learning_rate": 6.090038971851642e-07,
"loss": 0.4721,
"step": 47200
},
{
"epoch": 0.7792293372432085,
"grad_norm": 1.9644261598587036,
"learning_rate": 6.003941737466273e-07,
"loss": 0.4365,
"step": 47300
},
{
"epoch": 0.7808767565608474,
"grad_norm": 1.6432219743728638,
"learning_rate": 5.918374310571176e-07,
"loss": 0.4291,
"step": 47400
},
{
"epoch": 0.7825241758784863,
"grad_norm": 2.489579200744629,
"learning_rate": 5.833339077675343e-07,
"loss": 0.4396,
"step": 47500
},
{
"epoch": 0.7841715951961252,
"grad_norm": 1.5569617748260498,
"learning_rate": 5.748838410444665e-07,
"loss": 0.4491,
"step": 47600
},
{
"epoch": 0.7858190145137642,
"grad_norm": 2.200166702270508,
"learning_rate": 5.664874665635767e-07,
"loss": 0.4672,
"step": 47700
},
{
"epoch": 0.7874664338314031,
"grad_norm": 2.1616365909576416,
"learning_rate": 5.581450185030315e-07,
"loss": 0.4579,
"step": 47800
},
{
"epoch": 0.789113853149042,
"grad_norm": 1.2923545837402344,
"learning_rate": 5.4985672953697e-07,
"loss": 0.4424,
"step": 47900
},
{
"epoch": 0.7907612724666809,
"grad_norm": 2.338345527648926,
"learning_rate": 5.416228308290095e-07,
"loss": 0.4416,
"step": 48000
},
{
"epoch": 0.7924086917843198,
"grad_norm": 1.684395670890808,
"learning_rate": 5.334435520258039e-07,
"loss": 0.4136,
"step": 48100
},
{
"epoch": 0.7940561111019587,
"grad_norm": 1.9474413394927979,
"learning_rate": 5.25319121250637e-07,
"loss": 0.4252,
"step": 48200
},
{
"epoch": 0.7957035304195977,
"grad_norm": 2.8479621410369873,
"learning_rate": 5.172497650970567e-07,
"loss": 0.4375,
"step": 48300
},
{
"epoch": 0.7973509497372366,
"grad_norm": 1.9628188610076904,
"learning_rate": 5.092357086225627e-07,
"loss": 0.4455,
"step": 48400
},
{
"epoch": 0.7989983690548755,
"grad_norm": 1.8695141077041626,
"learning_rate": 5.012771753423223e-07,
"loss": 0.4819,
"step": 48500
},
{
"epoch": 0.8006457883725144,
"grad_norm": 1.873336672782898,
"learning_rate": 4.933743872229388e-07,
"loss": 0.4405,
"step": 48600
},
{
"epoch": 0.8022932076901533,
"grad_norm": 2.134643077850342,
"learning_rate": 4.85527564676262e-07,
"loss": 0.4381,
"step": 48700
},
{
"epoch": 0.8039406270077923,
"grad_norm": 2.1162221431732178,
"learning_rate": 4.777369265532408e-07,
"loss": 0.4577,
"step": 48800
},
{
"epoch": 0.8055880463254312,
"grad_norm": 2.036649227142334,
"learning_rate": 4.7000269013781604e-07,
"loss": 0.4238,
"step": 48900
},
{
"epoch": 0.8072354656430701,
"grad_norm": 1.4969152212142944,
"learning_rate": 4.6232507114086613e-07,
"loss": 0.45,
"step": 49000
},
{
"epoch": 0.808882884960709,
"grad_norm": 1.9845752716064453,
"learning_rate": 4.547042836941865e-07,
"loss": 0.4548,
"step": 49100
},
{
"epoch": 0.8105303042783479,
"grad_norm": 1.967536449432373,
"learning_rate": 4.4714054034451585e-07,
"loss": 0.4057,
"step": 49200
},
{
"epoch": 0.8121777235959868,
"grad_norm": 1.79136323928833,
"learning_rate": 4.3963405204761416e-07,
"loss": 0.4456,
"step": 49300
},
{
"epoch": 0.8138251429136258,
"grad_norm": 2.0205838680267334,
"learning_rate": 4.3218502816237433e-07,
"loss": 0.398,
"step": 49400
},
{
"epoch": 0.8154725622312647,
"grad_norm": 1.4011536836624146,
"learning_rate": 4.247936764449828e-07,
"loss": 0.4542,
"step": 49500
},
{
"epoch": 0.8171199815489036,
"grad_norm": 1.8763850927352905,
"learning_rate": 4.174602030431299e-07,
"loss": 0.4464,
"step": 49600
},
{
"epoch": 0.8187674008665425,
"grad_norm": 1.8748266696929932,
"learning_rate": 4.1018481249025523e-07,
"loss": 0.4608,
"step": 49700
},
{
"epoch": 0.8204148201841814,
"grad_norm": 2.887885808944702,
"learning_rate": 4.0296770769984393e-07,
"loss": 0.468,
"step": 49800
},
{
"epoch": 0.8220622395018204,
"grad_norm": 3.4386472702026367,
"learning_rate": 3.958090899597705e-07,
"loss": 0.4487,
"step": 49900
},
{
"epoch": 0.8237096588194593,
"grad_norm": 2.4126787185668945,
"learning_rate": 3.8870915892668253e-07,
"loss": 0.452,
"step": 50000
},
{
"epoch": 0.8253570781370982,
"grad_norm": 1.8389333486557007,
"learning_rate": 3.816681126204297e-07,
"loss": 0.4666,
"step": 50100
},
{
"epoch": 0.8270044974547371,
"grad_norm": 2.392357349395752,
"learning_rate": 3.746861474185487e-07,
"loss": 0.4457,
"step": 50200
},
{
"epoch": 0.828651916772376,
"grad_norm": 2.450810194015503,
"learning_rate": 3.677634580507758e-07,
"loss": 0.4777,
"step": 50300
},
{
"epoch": 0.8302993360900149,
"grad_norm": 2.1401236057281494,
"learning_rate": 3.609002375936244e-07,
"loss": 0.4546,
"step": 50400
},
{
"epoch": 0.831946755407654,
"grad_norm": 2.275261163711548,
"learning_rate": 3.540966774649962e-07,
"loss": 0.4286,
"step": 50500
},
{
"epoch": 0.8335941747252928,
"grad_norm": 2.4037744998931885,
"learning_rate": 3.4735296741884113e-07,
"loss": 0.441,
"step": 50600
},
{
"epoch": 0.8352415940429317,
"grad_norm": 1.7885956764221191,
"learning_rate": 3.406692955398699e-07,
"loss": 0.4487,
"step": 50700
},
{
"epoch": 0.8368890133605706,
"grad_norm": 2.087801456451416,
"learning_rate": 3.340458482383038e-07,
"loss": 0.4414,
"step": 50800
},
{
"epoch": 0.8385364326782095,
"grad_norm": 1.9815489053726196,
"learning_rate": 3.2748281024467615e-07,
"loss": 0.4408,
"step": 50900
},
{
"epoch": 0.8401838519958486,
"grad_norm": 2.0206503868103027,
"learning_rate": 3.209803646046825e-07,
"loss": 0.4769,
"step": 51000
},
{
"epoch": 0.8418312713134875,
"grad_norm": 2.112884521484375,
"learning_rate": 3.14538692674074e-07,
"loss": 0.4392,
"step": 51100
},
{
"epoch": 0.8434786906311263,
"grad_norm": 1.9830784797668457,
"learning_rate": 3.0815797411359705e-07,
"loss": 0.4534,
"step": 51200
},
{
"epoch": 0.8451261099487652,
"grad_norm": 2.5792412757873535,
"learning_rate": 3.0183838688398834e-07,
"loss": 0.4141,
"step": 51300
},
{
"epoch": 0.8467735292664041,
"grad_norm": 1.4945428371429443,
"learning_rate": 2.9558010724100556e-07,
"loss": 0.4413,
"step": 51400
},
{
"epoch": 0.848420948584043,
"grad_norm": 1.6658538579940796,
"learning_rate": 2.893833097305135e-07,
"loss": 0.4381,
"step": 51500
},
{
"epoch": 0.850068367901682,
"grad_norm": 1.9433872699737549,
"learning_rate": 2.832481671836174e-07,
"loss": 0.4916,
"step": 51600
},
{
"epoch": 0.851715787219321,
"grad_norm": 2.8448355197906494,
"learning_rate": 2.771748507118413e-07,
"loss": 0.4529,
"step": 51700
},
{
"epoch": 0.8533632065369599,
"grad_norm": 1.6692224740982056,
"learning_rate": 2.711635297023546e-07,
"loss": 0.4331,
"step": 51800
},
{
"epoch": 0.8550106258545987,
"grad_norm": 2.085247039794922,
"learning_rate": 2.6521437181325105e-07,
"loss": 0.4573,
"step": 51900
},
{
"epoch": 0.8566580451722376,
"grad_norm": 1.9214270114898682,
"learning_rate": 2.593275429688699e-07,
"loss": 0.443,
"step": 52000
},
{
"epoch": 0.8583054644898767,
"grad_norm": 1.856969952583313,
"learning_rate": 2.535032073551677e-07,
"loss": 0.4804,
"step": 52100
},
{
"epoch": 0.8599528838075156,
"grad_norm": 2.086461067199707,
"learning_rate": 2.4774152741514207e-07,
"loss": 0.4505,
"step": 52200
},
{
"epoch": 0.8616003031251545,
"grad_norm": 2.729485511779785,
"learning_rate": 2.4204266384429855e-07,
"loss": 0.4661,
"step": 52300
},
{
"epoch": 0.8632477224427934,
"grad_norm": 1.9726873636245728,
"learning_rate": 2.3640677558616875e-07,
"loss": 0.4561,
"step": 52400
},
{
"epoch": 0.8648951417604323,
"grad_norm": 1.9894851446151733,
"learning_rate": 2.308340198278808e-07,
"loss": 0.4564,
"step": 52500
},
{
"epoch": 0.8665425610780711,
"grad_norm": 1.4880281686782837,
"learning_rate": 2.2532455199577085e-07,
"loss": 0.43,
"step": 52600
},
{
"epoch": 0.8681899803957102,
"grad_norm": 1.956846833229065,
"learning_rate": 2.198785257510491e-07,
"loss": 0.4671,
"step": 52700
},
{
"epoch": 0.8698373997133491,
"grad_norm": 2.6969892978668213,
"learning_rate": 2.144960929855175e-07,
"loss": 0.4306,
"step": 52800
},
{
"epoch": 0.871484819030988,
"grad_norm": 2.5215413570404053,
"learning_rate": 2.091774038173297e-07,
"loss": 0.4458,
"step": 52900
},
{
"epoch": 0.8731322383486269,
"grad_norm": 1.9688514471054077,
"learning_rate": 2.039226065868044e-07,
"loss": 0.4283,
"step": 53000
},
{
"epoch": 0.8747796576662658,
"grad_norm": 2.583317995071411,
"learning_rate": 1.9873184785229205e-07,
"loss": 0.4429,
"step": 53100
},
{
"epoch": 0.8764270769839048,
"grad_norm": 1.426698088645935,
"learning_rate": 1.9360527238608206e-07,
"loss": 0.4559,
"step": 53200
},
{
"epoch": 0.8780744963015437,
"grad_norm": 1.861429214477539,
"learning_rate": 1.8854302317036805e-07,
"loss": 0.4513,
"step": 53300
},
{
"epoch": 0.8797219156191826,
"grad_norm": 1.8271915912628174,
"learning_rate": 1.8354524139325923e-07,
"loss": 0.4387,
"step": 53400
},
{
"epoch": 0.8813693349368215,
"grad_norm": 1.5195509195327759,
"learning_rate": 1.786120664448432e-07,
"loss": 0.4354,
"step": 53500
},
{
"epoch": 0.8830167542544604,
"grad_norm": 1.372504711151123,
"learning_rate": 1.7374363591329768e-07,
"loss": 0.4212,
"step": 53600
},
{
"epoch": 0.8846641735720993,
"grad_norm": 1.619235634803772,
"learning_rate": 1.6894008558105274e-07,
"loss": 0.427,
"step": 53700
},
{
"epoch": 0.8863115928897383,
"grad_norm": 2.1850979328155518,
"learning_rate": 1.6420154942100585e-07,
"loss": 0.4412,
"step": 53800
},
{
"epoch": 0.8879590122073772,
"grad_norm": 2.942978858947754,
"learning_rate": 1.5952815959278168e-07,
"loss": 0.4453,
"step": 53900
},
{
"epoch": 0.8896064315250161,
"grad_norm": 2.521692991256714,
"learning_rate": 1.5492004643904962e-07,
"loss": 0.4242,
"step": 54000
},
{
"epoch": 0.891253850842655,
"grad_norm": 2.2875068187713623,
"learning_rate": 1.5037733848188658e-07,
"loss": 0.4234,
"step": 54100
},
{
"epoch": 0.8929012701602939,
"grad_norm": 2.937547445297241,
"learning_rate": 1.4590016241919357e-07,
"loss": 0.4557,
"step": 54200
},
{
"epoch": 0.8945486894779329,
"grad_norm": 2.359915256500244,
"learning_rate": 1.4148864312116124e-07,
"loss": 0.4355,
"step": 54300
},
{
"epoch": 0.8961961087955718,
"grad_norm": 1.8787094354629517,
"learning_rate": 1.3714290362678685e-07,
"loss": 0.4478,
"step": 54400
},
{
"epoch": 0.8978435281132107,
"grad_norm": 1.8454256057739258,
"learning_rate": 1.328630651404436e-07,
"loss": 0.4374,
"step": 54500
},
{
"epoch": 0.8994909474308496,
"grad_norm": 1.6232373714447021,
"learning_rate": 1.286492470285e-07,
"loss": 0.4501,
"step": 54600
},
{
"epoch": 0.9011383667484885,
"grad_norm": 2.0913541316986084,
"learning_rate": 1.2450156681598964e-07,
"loss": 0.4564,
"step": 54700
},
{
"epoch": 0.9027857860661274,
"grad_norm": 3.337273120880127,
"learning_rate": 1.2042014018333575e-07,
"loss": 0.444,
"step": 54800
},
{
"epoch": 0.9044332053837664,
"grad_norm": 1.986515760421753,
"learning_rate": 1.1640508096312259e-07,
"loss": 0.409,
"step": 54900
},
{
"epoch": 0.9060806247014053,
"grad_norm": 2.8050506114959717,
"learning_rate": 1.1245650113692052e-07,
"loss": 0.4345,
"step": 55000
},
{
"epoch": 0.9077280440190442,
"grad_norm": 1.7033820152282715,
"learning_rate": 1.085745108321648e-07,
"loss": 0.443,
"step": 55100
},
{
"epoch": 0.9093754633366831,
"grad_norm": 1.3102610111236572,
"learning_rate": 1.0475921831908265e-07,
"loss": 0.452,
"step": 55200
},
{
"epoch": 0.911022882654322,
"grad_norm": 1.4171772003173828,
"learning_rate": 1.0101073000767264e-07,
"loss": 0.4472,
"step": 55300
},
{
"epoch": 0.912670301971961,
"grad_norm": 2.2562355995178223,
"learning_rate": 9.732915044474017e-08,
"loss": 0.4424,
"step": 55400
},
{
"epoch": 0.9143177212895999,
"grad_norm": 1.537164330482483,
"learning_rate": 9.371458231097807e-08,
"loss": 0.4339,
"step": 55500
},
{
"epoch": 0.9159651406072388,
"grad_norm": 1.478975534439087,
"learning_rate": 9.016712641810393e-08,
"loss": 0.4746,
"step": 55600
},
{
"epoch": 0.9176125599248777,
"grad_norm": 2.3379318714141846,
"learning_rate": 8.668688170604955e-08,
"loss": 0.4573,
"step": 55700
},
{
"epoch": 0.9192599792425166,
"grad_norm": 2.287503242492676,
"learning_rate": 8.327394524020094e-08,
"loss": 0.459,
"step": 55800
},
{
"epoch": 0.9209073985601555,
"grad_norm": 2.074932098388672,
"learning_rate": 7.992841220868908e-08,
"loss": 0.4406,
"step": 55900
},
{
"epoch": 0.9225548178777945,
"grad_norm": 2.3185274600982666,
"learning_rate": 7.665037591973873e-08,
"loss": 0.4315,
"step": 56000
},
{
"epoch": 0.9242022371954334,
"grad_norm": 2.681718587875366,
"learning_rate": 7.343992779906328e-08,
"loss": 0.4496,
"step": 56100
},
{
"epoch": 0.9258496565130723,
"grad_norm": 2.437779188156128,
"learning_rate": 7.029715738731541e-08,
"loss": 0.4363,
"step": 56200
},
{
"epoch": 0.9274970758307112,
"grad_norm": 2.111402988433838,
"learning_rate": 6.722215233759071e-08,
"loss": 0.446,
"step": 56300
},
{
"epoch": 0.9291444951483501,
"grad_norm": 1.8886587619781494,
"learning_rate": 6.421499841298195e-08,
"loss": 0.4414,
"step": 56400
},
{
"epoch": 0.9307919144659891,
"grad_norm": 1.649271011352539,
"learning_rate": 6.127577948418728e-08,
"loss": 0.4409,
"step": 56500
},
{
"epoch": 0.932439333783628,
"grad_norm": 2.6484766006469727,
"learning_rate": 5.84045775271716e-08,
"loss": 0.4325,
"step": 56600
},
{
"epoch": 0.9340867531012669,
"grad_norm": 1.9493142366409302,
"learning_rate": 5.560147262088034e-08,
"loss": 0.4165,
"step": 56700
},
{
"epoch": 0.9357341724189058,
"grad_norm": 1.875835657119751,
"learning_rate": 5.286654294500454e-08,
"loss": 0.433,
"step": 56800
},
{
"epoch": 0.9373815917365447,
"grad_norm": 1.9242185354232788,
"learning_rate": 5.019986477780181e-08,
"loss": 0.445,
"step": 56900
},
{
"epoch": 0.9390290110541836,
"grad_norm": 2.1051392555236816,
"learning_rate": 4.7601512493968824e-08,
"loss": 0.4469,
"step": 57000
},
{
"epoch": 0.9406764303718226,
"grad_norm": 1.5556972026824951,
"learning_rate": 4.507155856256634e-08,
"loss": 0.4746,
"step": 57100
},
{
"epoch": 0.9423238496894615,
"grad_norm": 1.9394145011901855,
"learning_rate": 4.2610073544998577e-08,
"loss": 0.4347,
"step": 57200
},
{
"epoch": 0.9439712690071004,
"grad_norm": 1.9497727155685425,
"learning_rate": 4.021712609304507e-08,
"loss": 0.4426,
"step": 57300
},
{
"epoch": 0.9456186883247393,
"grad_norm": 1.587270736694336,
"learning_rate": 3.789278294694498e-08,
"loss": 0.4277,
"step": 57400
},
{
"epoch": 0.9472661076423782,
"grad_norm": 1.201451301574707,
"learning_rate": 3.563710893353778e-08,
"loss": 0.4448,
"step": 57500
},
{
"epoch": 0.9489135269600172,
"grad_norm": 2.1374833583831787,
"learning_rate": 3.345016696445297e-08,
"loss": 0.4276,
"step": 57600
},
{
"epoch": 0.9505609462776561,
"grad_norm": 2.4307470321655273,
"learning_rate": 3.133201803435737e-08,
"loss": 0.4353,
"step": 57700
},
{
"epoch": 0.952208365595295,
"grad_norm": 1.3492801189422607,
"learning_rate": 2.928272121925202e-08,
"loss": 0.4129,
"step": 57800
},
{
"epoch": 0.9538557849129339,
"grad_norm": 1.4907076358795166,
"learning_rate": 2.7302333674827098e-08,
"loss": 0.4478,
"step": 57900
},
{
"epoch": 0.9555032042305728,
"grad_norm": 1.893916368484497,
"learning_rate": 2.539091063486432e-08,
"loss": 0.4465,
"step": 58000
},
{
"epoch": 0.9571506235482117,
"grad_norm": 2.277837038040161,
"learning_rate": 2.354850540969983e-08,
"loss": 0.4326,
"step": 58100
},
{
"epoch": 0.9587980428658507,
"grad_norm": 1.9928171634674072,
"learning_rate": 2.177516938473567e-08,
"loss": 0.418,
"step": 58200
},
{
"epoch": 0.9604454621834896,
"grad_norm": 2.096127986907959,
"learning_rate": 2.0070952019006496e-08,
"loss": 0.453,
"step": 58300
},
{
"epoch": 0.9620928815011285,
"grad_norm": 2.574500322341919,
"learning_rate": 1.8435900843800926e-08,
"loss": 0.4425,
"step": 58400
},
{
"epoch": 0.9637403008187674,
"grad_norm": 2.5897390842437744,
"learning_rate": 1.6870061461335685e-08,
"loss": 0.4273,
"step": 58500
},
{
"epoch": 0.9653877201364063,
"grad_norm": 1.7342420816421509,
"learning_rate": 1.5373477543482453e-08,
"loss": 0.4365,
"step": 58600
},
{
"epoch": 0.9670351394540453,
"grad_norm": 3.1810550689697266,
"learning_rate": 1.3946190830552431e-08,
"loss": 0.4385,
"step": 58700
},
{
"epoch": 0.9686825587716842,
"grad_norm": 2.5934085845947266,
"learning_rate": 1.2588241130129242e-08,
"loss": 0.4453,
"step": 58800
},
{
"epoch": 0.9703299780893231,
"grad_norm": 3.0193750858306885,
"learning_rate": 1.1299666315961743e-08,
"loss": 0.4181,
"step": 58900
},
{
"epoch": 0.971977397406962,
"grad_norm": 2.132373809814453,
"learning_rate": 1.0080502326904329e-08,
"loss": 0.4217,
"step": 59000
},
{
"epoch": 0.9736248167246009,
"grad_norm": 2.04423189163208,
"learning_rate": 8.930783165917723e-09,
"loss": 0.4313,
"step": 59100
},
{
"epoch": 0.9752722360422398,
"grad_norm": 1.6803611516952515,
"learning_rate": 7.85054089911863e-09,
"loss": 0.4507,
"step": 59200
},
{
"epoch": 0.9769196553598788,
"grad_norm": 2.210566520690918,
"learning_rate": 6.8398056548860116e-09,
"loss": 0.4446,
"step": 59300
},
{
"epoch": 0.9785670746775177,
"grad_norm": 1.9046763181686401,
"learning_rate": 5.898605623021192e-09,
"loss": 0.4478,
"step": 59400
},
{
"epoch": 0.9802144939951566,
"grad_norm": 1.7694292068481445,
"learning_rate": 5.026967053960441e-09,
"loss": 0.4296,
"step": 59500
},
{
"epoch": 0.9818619133127955,
"grad_norm": 1.8257120847702026,
"learning_rate": 4.224914258044721e-09,
"loss": 0.4303,
"step": 59600
},
{
"epoch": 0.9835093326304344,
"grad_norm": 1.4642283916473389,
"learning_rate": 3.4924696048396765e-09,
"loss": 0.4322,
"step": 59700
},
{
"epoch": 0.9851567519480734,
"grad_norm": 2.159425973892212,
"learning_rate": 2.829653522513076e-09,
"loss": 0.4279,
"step": 59800
},
{
"epoch": 0.9868041712657123,
"grad_norm": 1.9656975269317627,
"learning_rate": 2.2364844972647125e-09,
"loss": 0.4386,
"step": 59900
},
{
"epoch": 0.9884515905833512,
"grad_norm": 2.556670665740967,
"learning_rate": 1.7129790728101503e-09,
"loss": 0.4393,
"step": 60000
},
{
"epoch": 0.9900990099009901,
"grad_norm": 2.0833001136779785,
"learning_rate": 1.2591518499208143e-09,
"loss": 0.4191,
"step": 60100
},
{
"epoch": 0.991746429218629,
"grad_norm": 2.159656524658203,
"learning_rate": 8.750154860151516e-10,
"loss": 0.4675,
"step": 60200
},
{
"epoch": 0.9933938485362679,
"grad_norm": 2.0303680896759033,
"learning_rate": 5.605806948061343e-10,
"loss": 0.447,
"step": 60300
},
{
"epoch": 0.9950412678539069,
"grad_norm": 1.8287807703018188,
"learning_rate": 3.1585624600372066e-10,
"loss": 0.4306,
"step": 60400
},
{
"epoch": 0.9966886871715458,
"grad_norm": 2.2728703022003174,
"learning_rate": 1.4084896506783018e-10,
"loss": 0.4284,
"step": 60500
},
{
"epoch": 0.9983361064891847,
"grad_norm": 2.0561728477478027,
"learning_rate": 3.556373302016081e-11,
"loss": 0.4195,
"step": 60600
},
{
"epoch": 0.9999835258068236,
"grad_norm": 2.020707130432129,
"learning_rate": 3.4863070763613284e-15,
"loss": 0.4415,
"step": 60700
}
],
"logging_steps": 100,
"max_steps": 60701,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.004310214013092e+17,
"train_batch_size": 3,
"trial_name": null,
"trial_params": null
}