{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.997756086615055, "eval_steps": 500, "global_step": 66840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.4561903178691864, "learning_rate": 4.9999999309637996e-05, "loss": 1.7761, "step": 5 }, { "epoch": 0.0, "grad_norm": 0.6195425391197205, "learning_rate": 4.999999723855201e-05, "loss": 1.6886, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.6293871402740479, "learning_rate": 4.9999993786742175e-05, "loss": 1.6967, "step": 15 }, { "epoch": 0.01, "grad_norm": 0.6493249535560608, "learning_rate": 4.999998895420865e-05, "loss": 1.8346, "step": 20 }, { "epoch": 0.01, "grad_norm": 0.6501443982124329, "learning_rate": 4.999998274095173e-05, "loss": 1.7955, "step": 25 }, { "epoch": 0.01, "grad_norm": 1.5174015760421753, "learning_rate": 4.999997514697176e-05, "loss": 1.8013, "step": 30 }, { "epoch": 0.01, "grad_norm": 0.7737800478935242, "learning_rate": 4.999996617226913e-05, "loss": 1.6485, "step": 35 }, { "epoch": 0.01, "grad_norm": 1.2001756429672241, "learning_rate": 4.999995581684437e-05, "loss": 1.3865, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.1627835035324097, "learning_rate": 4.9999944080698024e-05, "loss": 1.2725, "step": 45 }, { "epoch": 0.01, "grad_norm": 0.5961304306983948, "learning_rate": 4.999993096383076e-05, "loss": 1.5426, "step": 50 }, { "epoch": 0.02, "grad_norm": 0.7442084550857544, "learning_rate": 4.999991646624329e-05, "loss": 1.5626, "step": 55 }, { "epoch": 0.02, "grad_norm": 0.9939375519752502, "learning_rate": 4.9999900587936426e-05, "loss": 1.427, "step": 60 }, { "epoch": 0.02, "grad_norm": 0.8643019199371338, "learning_rate": 4.9999883328911034e-05, "loss": 1.8313, "step": 65 }, { "epoch": 0.02, "grad_norm": 0.8557299375534058, "learning_rate": 4.999986468916807e-05, "loss": 1.4777, "step": 70 }, { "epoch": 0.02, "grad_norm": 0.45538774132728577, "learning_rate": 4.9999844668708574e-05, "loss": 1.5458, "step": 75 }, { "epoch": 0.02, "grad_norm": 0.6336299777030945, "learning_rate": 4.9999823267533627e-05, "loss": 1.3938, "step": 80 }, { "epoch": 0.03, "grad_norm": 0.778620719909668, "learning_rate": 4.9999800485644445e-05, "loss": 1.4802, "step": 85 }, { "epoch": 0.03, "grad_norm": 0.8213117718696594, "learning_rate": 4.9999776323042255e-05, "loss": 1.351, "step": 90 }, { "epoch": 0.03, "grad_norm": 0.6166107058525085, "learning_rate": 4.999975077972841e-05, "loss": 1.4811, "step": 95 }, { "epoch": 0.03, "grad_norm": 0.6074905395507812, "learning_rate": 4.999972385570432e-05, "loss": 1.4381, "step": 100 }, { "epoch": 0.03, "grad_norm": 0.7994800209999084, "learning_rate": 4.999969555097146e-05, "loss": 1.4045, "step": 105 }, { "epoch": 0.03, "grad_norm": 0.5199000239372253, "learning_rate": 4.99996658655314e-05, "loss": 1.479, "step": 110 }, { "epoch": 0.03, "grad_norm": 0.5505474209785461, "learning_rate": 4.999963479938577e-05, "loss": 1.5272, "step": 115 }, { "epoch": 0.04, "grad_norm": 1.027380108833313, "learning_rate": 4.999960235253631e-05, "loss": 1.4791, "step": 120 }, { "epoch": 0.04, "grad_norm": 0.7694772481918335, "learning_rate": 4.99995685249848e-05, "loss": 1.4485, "step": 125 }, { "epoch": 0.04, "grad_norm": 0.9029272794723511, "learning_rate": 4.99995333167331e-05, "loss": 1.3591, "step": 130 }, { "epoch": 0.04, "grad_norm": 0.46942275762557983, "learning_rate": 4.999949672778316e-05, "loss": 1.3628, "step": 135 }, { "epoch": 0.04, "grad_norm": 0.8350025415420532, "learning_rate": 4.999945875813701e-05, "loss": 1.4336, "step": 140 }, { "epoch": 0.04, "grad_norm": 0.6389840841293335, "learning_rate": 4.999941940779673e-05, "loss": 1.4277, "step": 145 }, { "epoch": 0.04, "grad_norm": 0.38392823934555054, "learning_rate": 4.99993786767645e-05, "loss": 1.4192, "step": 150 }, { "epoch": 0.05, "grad_norm": 1.229735255241394, "learning_rate": 4.999933656504257e-05, "loss": 1.514, "step": 155 }, { "epoch": 0.05, "grad_norm": 1.3870686292648315, "learning_rate": 4.9999293072633273e-05, "loss": 1.4909, "step": 160 }, { "epoch": 0.05, "grad_norm": 2.531494617462158, "learning_rate": 4.9999248199539006e-05, "loss": 1.4567, "step": 165 }, { "epoch": 0.05, "grad_norm": 0.5871274471282959, "learning_rate": 4.9999201945762244e-05, "loss": 1.326, "step": 170 }, { "epoch": 0.05, "grad_norm": 0.35784173011779785, "learning_rate": 4.999915431130554e-05, "loss": 1.4567, "step": 175 }, { "epoch": 0.05, "grad_norm": 0.901470422744751, "learning_rate": 4.9999105296171535e-05, "loss": 1.5553, "step": 180 }, { "epoch": 0.06, "grad_norm": 1.3752151727676392, "learning_rate": 4.9999054900362915e-05, "loss": 1.389, "step": 185 }, { "epoch": 0.06, "grad_norm": 1.1029325723648071, "learning_rate": 4.9999003123882494e-05, "loss": 1.4993, "step": 190 }, { "epoch": 0.06, "grad_norm": 1.255204200744629, "learning_rate": 4.999894996673311e-05, "loss": 1.5035, "step": 195 }, { "epoch": 0.06, "grad_norm": 1.002902865409851, "learning_rate": 4.9998895428917704e-05, "loss": 1.3108, "step": 200 }, { "epoch": 0.06, "grad_norm": 1.3146979808807373, "learning_rate": 4.9998839510439286e-05, "loss": 1.5487, "step": 205 }, { "epoch": 0.06, "grad_norm": 0.5592719316482544, "learning_rate": 4.999878221130095e-05, "loss": 1.3763, "step": 210 }, { "epoch": 0.06, "grad_norm": 0.5583714246749878, "learning_rate": 4.999872353150586e-05, "loss": 1.4305, "step": 215 }, { "epoch": 0.07, "grad_norm": 1.0968598127365112, "learning_rate": 4.999866347105725e-05, "loss": 1.4064, "step": 220 }, { "epoch": 0.07, "grad_norm": 0.7956498265266418, "learning_rate": 4.999860202995844e-05, "loss": 1.3986, "step": 225 }, { "epoch": 0.07, "grad_norm": 1.1131305694580078, "learning_rate": 4.999853920821283e-05, "loss": 1.4269, "step": 230 }, { "epoch": 0.07, "grad_norm": 1.5098061561584473, "learning_rate": 4.999847500582388e-05, "loss": 1.5158, "step": 235 }, { "epoch": 0.07, "grad_norm": 0.8009808659553528, "learning_rate": 4.999840942279514e-05, "loss": 1.4029, "step": 240 }, { "epoch": 0.07, "grad_norm": 0.5857890248298645, "learning_rate": 4.999834245913023e-05, "loss": 1.4703, "step": 245 }, { "epoch": 0.07, "grad_norm": 0.9343911409378052, "learning_rate": 4.9998274114832854e-05, "loss": 1.358, "step": 250 }, { "epoch": 0.08, "grad_norm": 1.430106520652771, "learning_rate": 4.999820438990678e-05, "loss": 1.3883, "step": 255 }, { "epoch": 0.08, "grad_norm": 0.5393354296684265, "learning_rate": 4.999813328435586e-05, "loss": 1.4375, "step": 260 }, { "epoch": 0.08, "grad_norm": 0.7708584666252136, "learning_rate": 4.999806079818403e-05, "loss": 1.4298, "step": 265 }, { "epoch": 0.08, "grad_norm": 0.8902773261070251, "learning_rate": 4.999798693139528e-05, "loss": 1.4823, "step": 270 }, { "epoch": 0.08, "grad_norm": 0.7855858206748962, "learning_rate": 4.99979116839937e-05, "loss": 1.3869, "step": 275 }, { "epoch": 0.08, "grad_norm": 0.864590585231781, "learning_rate": 4.9997835055983436e-05, "loss": 1.3566, "step": 280 }, { "epoch": 0.09, "grad_norm": 1.4891940355300903, "learning_rate": 4.999775704736873e-05, "loss": 1.3024, "step": 285 }, { "epoch": 0.09, "grad_norm": 1.1072434186935425, "learning_rate": 4.9997677658153885e-05, "loss": 1.4124, "step": 290 }, { "epoch": 0.09, "grad_norm": 3.1894373893737793, "learning_rate": 4.999759688834329e-05, "loss": 1.5361, "step": 295 }, { "epoch": 0.09, "grad_norm": 0.6644517183303833, "learning_rate": 4.99975147379414e-05, "loss": 1.4811, "step": 300 }, { "epoch": 0.09, "grad_norm": 1.1441906690597534, "learning_rate": 4.999743120695275e-05, "loss": 1.4017, "step": 305 }, { "epoch": 0.09, "grad_norm": 0.748852550983429, "learning_rate": 4.999734629538197e-05, "loss": 1.3883, "step": 310 }, { "epoch": 0.09, "grad_norm": 0.8531980514526367, "learning_rate": 4.999726000323373e-05, "loss": 1.3784, "step": 315 }, { "epoch": 0.1, "grad_norm": 0.746377170085907, "learning_rate": 4.99971723305128e-05, "loss": 1.4622, "step": 320 }, { "epoch": 0.1, "grad_norm": 0.8359838128089905, "learning_rate": 4.999708327722402e-05, "loss": 1.2369, "step": 325 }, { "epoch": 0.1, "grad_norm": 1.8505661487579346, "learning_rate": 4.999699284337232e-05, "loss": 1.6082, "step": 330 }, { "epoch": 0.1, "grad_norm": 1.608825922012329, "learning_rate": 4.99969010289627e-05, "loss": 1.5367, "step": 335 }, { "epoch": 0.1, "grad_norm": 1.0551233291625977, "learning_rate": 4.99968078340002e-05, "loss": 1.4654, "step": 340 }, { "epoch": 0.1, "grad_norm": 1.0916029214859009, "learning_rate": 4.999671325848999e-05, "loss": 1.585, "step": 345 }, { "epoch": 0.1, "grad_norm": 0.6593181490898132, "learning_rate": 4.9996617302437296e-05, "loss": 1.4262, "step": 350 }, { "epoch": 0.11, "grad_norm": 0.575879693031311, "learning_rate": 4.999651996584741e-05, "loss": 1.4462, "step": 355 }, { "epoch": 0.11, "grad_norm": 1.3045742511749268, "learning_rate": 4.999642124872571e-05, "loss": 1.492, "step": 360 }, { "epoch": 0.11, "grad_norm": 0.6474262475967407, "learning_rate": 4.999632115107764e-05, "loss": 1.3665, "step": 365 }, { "epoch": 0.11, "grad_norm": 0.9724390506744385, "learning_rate": 4.999621967290874e-05, "loss": 1.2883, "step": 370 }, { "epoch": 0.11, "grad_norm": 0.917503297328949, "learning_rate": 4.999611681422461e-05, "loss": 1.5095, "step": 375 }, { "epoch": 0.11, "grad_norm": 1.5174071788787842, "learning_rate": 4.999601257503093e-05, "loss": 1.5043, "step": 380 }, { "epoch": 0.12, "grad_norm": 1.2477240562438965, "learning_rate": 4.999590695533345e-05, "loss": 1.4741, "step": 385 }, { "epoch": 0.12, "grad_norm": 1.0032185316085815, "learning_rate": 4.9995799955138025e-05, "loss": 1.3719, "step": 390 }, { "epoch": 0.12, "grad_norm": 1.0506250858306885, "learning_rate": 4.999569157445054e-05, "loss": 1.4187, "step": 395 }, { "epoch": 0.12, "grad_norm": 0.5169631242752075, "learning_rate": 4.999558181327699e-05, "loss": 1.3557, "step": 400 }, { "epoch": 0.12, "grad_norm": 1.166700839996338, "learning_rate": 4.9995470671623446e-05, "loss": 1.5298, "step": 405 }, { "epoch": 0.12, "grad_norm": 0.9644590616226196, "learning_rate": 4.999535814949603e-05, "loss": 1.4766, "step": 410 }, { "epoch": 0.12, "grad_norm": 1.849117636680603, "learning_rate": 4.999524424690097e-05, "loss": 1.3298, "step": 415 }, { "epoch": 0.13, "grad_norm": 0.5561323761940002, "learning_rate": 4.9995128963844545e-05, "loss": 1.5718, "step": 420 }, { "epoch": 0.13, "grad_norm": 1.6733776330947876, "learning_rate": 4.9995012300333134e-05, "loss": 1.3693, "step": 425 }, { "epoch": 0.13, "grad_norm": 0.8910932540893555, "learning_rate": 4.999489425637317e-05, "loss": 1.5194, "step": 430 }, { "epoch": 0.13, "grad_norm": 3.7125866413116455, "learning_rate": 4.9994774831971184e-05, "loss": 1.6596, "step": 435 }, { "epoch": 0.13, "grad_norm": 0.8103092312812805, "learning_rate": 4.999465402713376e-05, "loss": 1.4965, "step": 440 }, { "epoch": 0.13, "grad_norm": 0.902019739151001, "learning_rate": 4.999453184186757e-05, "loss": 1.3179, "step": 445 }, { "epoch": 0.13, "grad_norm": 0.9942508935928345, "learning_rate": 4.999440827617938e-05, "loss": 1.3365, "step": 450 }, { "epoch": 0.14, "grad_norm": 0.8688194751739502, "learning_rate": 4.9994283330075983e-05, "loss": 1.3126, "step": 455 }, { "epoch": 0.14, "grad_norm": 0.632107675075531, "learning_rate": 4.9994157003564314e-05, "loss": 1.4769, "step": 460 }, { "epoch": 0.14, "grad_norm": 1.341265082359314, "learning_rate": 4.999402929665133e-05, "loss": 1.3436, "step": 465 }, { "epoch": 0.14, "grad_norm": 1.463435173034668, "learning_rate": 4.999390020934408e-05, "loss": 1.3653, "step": 470 }, { "epoch": 0.14, "grad_norm": 0.5955095291137695, "learning_rate": 4.9993769741649707e-05, "loss": 1.3658, "step": 475 }, { "epoch": 0.14, "grad_norm": 0.7632045149803162, "learning_rate": 4.999363789357541e-05, "loss": 1.2948, "step": 480 }, { "epoch": 0.15, "grad_norm": 0.9410882592201233, "learning_rate": 4.9993504665128474e-05, "loss": 1.4352, "step": 485 }, { "epoch": 0.15, "grad_norm": 1.489148736000061, "learning_rate": 4.999337005631625e-05, "loss": 1.5849, "step": 490 }, { "epoch": 0.15, "grad_norm": 1.0961707830429077, "learning_rate": 4.9993234067146186e-05, "loss": 1.4885, "step": 495 }, { "epoch": 0.15, "grad_norm": 0.9856806993484497, "learning_rate": 4.999309669762577e-05, "loss": 1.6123, "step": 500 }, { "epoch": 0.15, "grad_norm": 0.6104403734207153, "learning_rate": 4.999295794776261e-05, "loss": 1.4599, "step": 505 }, { "epoch": 0.15, "grad_norm": 1.5283727645874023, "learning_rate": 4.9992817817564366e-05, "loss": 1.5948, "step": 510 }, { "epoch": 0.15, "grad_norm": 2.664371967315674, "learning_rate": 4.9992676307038765e-05, "loss": 1.5192, "step": 515 }, { "epoch": 0.16, "grad_norm": 0.813569962978363, "learning_rate": 4.999253341619363e-05, "loss": 1.3386, "step": 520 }, { "epoch": 0.16, "grad_norm": 0.8261412382125854, "learning_rate": 4.999238914503686e-05, "loss": 1.3626, "step": 525 }, { "epoch": 0.16, "grad_norm": 1.0505256652832031, "learning_rate": 4.999224349357641e-05, "loss": 1.3934, "step": 530 }, { "epoch": 0.16, "grad_norm": 1.3349155187606812, "learning_rate": 4.999209646182033e-05, "loss": 1.3489, "step": 535 }, { "epoch": 0.16, "grad_norm": 0.8616823554039001, "learning_rate": 4.999194804977674e-05, "loss": 1.4238, "step": 540 }, { "epoch": 0.16, "grad_norm": 0.6977237462997437, "learning_rate": 4.9991798257453834e-05, "loss": 1.4843, "step": 545 }, { "epoch": 0.16, "grad_norm": 1.441572666168213, "learning_rate": 4.9991647084859894e-05, "loss": 1.3559, "step": 550 }, { "epoch": 0.17, "grad_norm": 1.1105183362960815, "learning_rate": 4.9991494532003255e-05, "loss": 1.5466, "step": 555 }, { "epoch": 0.17, "grad_norm": 1.4936366081237793, "learning_rate": 4.9991340598892355e-05, "loss": 1.256, "step": 560 }, { "epoch": 0.17, "grad_norm": 1.0292248725891113, "learning_rate": 4.999118528553569e-05, "loss": 1.4012, "step": 565 }, { "epoch": 0.17, "grad_norm": 0.9438468217849731, "learning_rate": 4.9991028591941846e-05, "loss": 1.4391, "step": 570 }, { "epoch": 0.17, "grad_norm": 1.2015315294265747, "learning_rate": 4.9990870518119456e-05, "loss": 1.2896, "step": 575 }, { "epoch": 0.17, "grad_norm": 1.021628737449646, "learning_rate": 4.9990711064077276e-05, "loss": 1.4336, "step": 580 }, { "epoch": 0.18, "grad_norm": 1.3274457454681396, "learning_rate": 4.999055022982409e-05, "loss": 1.3766, "step": 585 }, { "epoch": 0.18, "grad_norm": 0.7466582655906677, "learning_rate": 4.99903880153688e-05, "loss": 1.4709, "step": 590 }, { "epoch": 0.18, "grad_norm": 1.1090950965881348, "learning_rate": 4.999022442072036e-05, "loss": 1.505, "step": 595 }, { "epoch": 0.18, "grad_norm": 1.0768952369689941, "learning_rate": 4.999005944588778e-05, "loss": 1.4515, "step": 600 }, { "epoch": 0.18, "grad_norm": 1.0834194421768188, "learning_rate": 4.998989309088021e-05, "loss": 1.5076, "step": 605 }, { "epoch": 0.18, "grad_norm": 1.119468092918396, "learning_rate": 4.998972535570682e-05, "loss": 1.3812, "step": 610 }, { "epoch": 0.18, "grad_norm": 1.5594128370285034, "learning_rate": 4.9989556240376864e-05, "loss": 1.3452, "step": 615 }, { "epoch": 0.19, "grad_norm": 1.9768519401550293, "learning_rate": 4.9989385744899705e-05, "loss": 1.5971, "step": 620 }, { "epoch": 0.19, "grad_norm": 0.7751976847648621, "learning_rate": 4.9989213869284734e-05, "loss": 1.5723, "step": 625 }, { "epoch": 0.19, "grad_norm": 0.8533837795257568, "learning_rate": 4.998904061354146e-05, "loss": 1.3348, "step": 630 }, { "epoch": 0.19, "grad_norm": 0.8224301338195801, "learning_rate": 4.9988865977679454e-05, "loss": 1.4717, "step": 635 }, { "epoch": 0.19, "grad_norm": 1.4539473056793213, "learning_rate": 4.998868996170835e-05, "loss": 1.4803, "step": 640 }, { "epoch": 0.19, "grad_norm": 1.3216567039489746, "learning_rate": 4.9988512565637866e-05, "loss": 1.4922, "step": 645 }, { "epoch": 0.19, "grad_norm": 0.8094632029533386, "learning_rate": 4.998833378947781e-05, "loss": 1.521, "step": 650 }, { "epoch": 0.2, "grad_norm": 1.1426763534545898, "learning_rate": 4.9988153633238065e-05, "loss": 1.2602, "step": 655 }, { "epoch": 0.2, "grad_norm": 0.7161766886711121, "learning_rate": 4.998797209692856e-05, "loss": 1.3155, "step": 660 }, { "epoch": 0.2, "grad_norm": 0.38354432582855225, "learning_rate": 4.998778918055933e-05, "loss": 1.4266, "step": 665 }, { "epoch": 0.2, "grad_norm": 0.6859254837036133, "learning_rate": 4.9987604884140485e-05, "loss": 1.3925, "step": 670 }, { "epoch": 0.2, "grad_norm": 1.218139886856079, "learning_rate": 4.9987419207682186e-05, "loss": 1.4437, "step": 675 }, { "epoch": 0.2, "grad_norm": 0.8014891743659973, "learning_rate": 4.99872321511947e-05, "loss": 1.398, "step": 680 }, { "epoch": 0.2, "grad_norm": 1.764227271080017, "learning_rate": 4.998704371468835e-05, "loss": 1.4238, "step": 685 }, { "epoch": 0.21, "grad_norm": 0.8913390636444092, "learning_rate": 4.998685389817355e-05, "loss": 1.5147, "step": 690 }, { "epoch": 0.21, "grad_norm": 1.5515832901000977, "learning_rate": 4.998666270166078e-05, "loss": 1.5834, "step": 695 }, { "epoch": 0.21, "grad_norm": 0.7237977385520935, "learning_rate": 4.998647012516061e-05, "loss": 1.3329, "step": 700 }, { "epoch": 0.21, "grad_norm": 1.647252082824707, "learning_rate": 4.998627616868366e-05, "loss": 1.3771, "step": 705 }, { "epoch": 0.21, "grad_norm": 0.8865868449211121, "learning_rate": 4.9986080832240646e-05, "loss": 1.4632, "step": 710 }, { "epoch": 0.21, "grad_norm": 2.0670599937438965, "learning_rate": 4.998588411584236e-05, "loss": 1.3726, "step": 715 }, { "epoch": 0.22, "grad_norm": 0.742981493473053, "learning_rate": 4.998568601949968e-05, "loss": 1.3809, "step": 720 }, { "epoch": 0.22, "grad_norm": 0.7563424110412598, "learning_rate": 4.998548654322351e-05, "loss": 1.5463, "step": 725 }, { "epoch": 0.22, "grad_norm": 1.2141450643539429, "learning_rate": 4.998528568702491e-05, "loss": 1.4768, "step": 730 }, { "epoch": 0.22, "grad_norm": 1.1518967151641846, "learning_rate": 4.998508345091494e-05, "loss": 1.5653, "step": 735 }, { "epoch": 0.22, "grad_norm": 0.5695226788520813, "learning_rate": 4.998487983490478e-05, "loss": 1.3969, "step": 740 }, { "epoch": 0.22, "grad_norm": 0.9854166507720947, "learning_rate": 4.998467483900568e-05, "loss": 1.5679, "step": 745 }, { "epoch": 0.22, "grad_norm": 1.0108509063720703, "learning_rate": 4.998446846322897e-05, "loss": 1.3145, "step": 750 }, { "epoch": 0.23, "grad_norm": 0.8769714832305908, "learning_rate": 4.998426070758603e-05, "loss": 1.2757, "step": 755 }, { "epoch": 0.23, "grad_norm": 1.4470043182373047, "learning_rate": 4.998405157208833e-05, "loss": 1.4435, "step": 760 }, { "epoch": 0.23, "grad_norm": 1.0755447149276733, "learning_rate": 4.9983841056747446e-05, "loss": 1.3629, "step": 765 }, { "epoch": 0.23, "grad_norm": 0.8289944529533386, "learning_rate": 4.998362916157498e-05, "loss": 1.4345, "step": 770 }, { "epoch": 0.23, "grad_norm": 0.8254856467247009, "learning_rate": 4.998341588658265e-05, "loss": 1.4909, "step": 775 }, { "epoch": 0.23, "grad_norm": 0.4825249910354614, "learning_rate": 4.998320123178223e-05, "loss": 1.3529, "step": 780 }, { "epoch": 0.23, "grad_norm": 1.111685037612915, "learning_rate": 4.998298519718557e-05, "loss": 1.292, "step": 785 }, { "epoch": 0.24, "grad_norm": 0.8751310706138611, "learning_rate": 4.998276778280461e-05, "loss": 1.2957, "step": 790 }, { "epoch": 0.24, "grad_norm": 2.0307741165161133, "learning_rate": 4.998254898865134e-05, "loss": 1.364, "step": 795 }, { "epoch": 0.24, "grad_norm": 1.2933695316314697, "learning_rate": 4.998232881473787e-05, "loss": 1.4821, "step": 800 }, { "epoch": 0.24, "grad_norm": 1.1037958860397339, "learning_rate": 4.998210726107635e-05, "loss": 1.3466, "step": 805 }, { "epoch": 0.24, "grad_norm": 1.1049686670303345, "learning_rate": 4.9981884327679005e-05, "loss": 1.3488, "step": 810 }, { "epoch": 0.24, "grad_norm": 1.055496335029602, "learning_rate": 4.9981660014558165e-05, "loss": 1.4615, "step": 815 }, { "epoch": 0.25, "grad_norm": 0.8814008831977844, "learning_rate": 4.99814343217262e-05, "loss": 1.4898, "step": 820 }, { "epoch": 0.25, "grad_norm": 3.712348222732544, "learning_rate": 4.998120724919559e-05, "loss": 1.4881, "step": 825 }, { "epoch": 0.25, "grad_norm": 0.7310431003570557, "learning_rate": 4.9980978796978865e-05, "loss": 1.3357, "step": 830 }, { "epoch": 0.25, "grad_norm": 0.5869240164756775, "learning_rate": 4.998074896508865e-05, "loss": 1.4089, "step": 835 }, { "epoch": 0.25, "grad_norm": 0.99127197265625, "learning_rate": 4.9980517753537634e-05, "loss": 1.5001, "step": 840 }, { "epoch": 0.25, "grad_norm": 1.083500623703003, "learning_rate": 4.998028516233859e-05, "loss": 1.334, "step": 845 }, { "epoch": 0.25, "grad_norm": 0.7491528391838074, "learning_rate": 4.998005119150436e-05, "loss": 1.5227, "step": 850 }, { "epoch": 0.26, "grad_norm": 0.5255687832832336, "learning_rate": 4.997981584104788e-05, "loss": 1.3318, "step": 855 }, { "epoch": 0.26, "grad_norm": 0.6178213357925415, "learning_rate": 4.997957911098212e-05, "loss": 1.4365, "step": 860 }, { "epoch": 0.26, "grad_norm": 1.9565240144729614, "learning_rate": 4.997934100132018e-05, "loss": 1.3325, "step": 865 }, { "epoch": 0.26, "grad_norm": 0.6852719187736511, "learning_rate": 4.99791015120752e-05, "loss": 1.4526, "step": 870 }, { "epoch": 0.26, "grad_norm": 1.6580244302749634, "learning_rate": 4.99788606432604e-05, "loss": 1.585, "step": 875 }, { "epoch": 0.26, "grad_norm": 1.2035952806472778, "learning_rate": 4.9978618394889097e-05, "loss": 1.3965, "step": 880 }, { "epoch": 0.26, "grad_norm": 1.3621057271957397, "learning_rate": 4.9978374766974664e-05, "loss": 1.4035, "step": 885 }, { "epoch": 0.27, "grad_norm": 1.326550006866455, "learning_rate": 4.997812975953056e-05, "loss": 1.6006, "step": 890 }, { "epoch": 0.27, "grad_norm": 0.5781381726264954, "learning_rate": 4.997788337257031e-05, "loss": 1.376, "step": 895 }, { "epoch": 0.27, "grad_norm": 0.650931715965271, "learning_rate": 4.997763560610752e-05, "loss": 1.4043, "step": 900 }, { "epoch": 0.27, "grad_norm": 0.7104997038841248, "learning_rate": 4.997738646015588e-05, "loss": 1.3388, "step": 905 }, { "epoch": 0.27, "grad_norm": 1.88402259349823, "learning_rate": 4.997713593472915e-05, "loss": 1.2928, "step": 910 }, { "epoch": 0.27, "grad_norm": 1.100692868232727, "learning_rate": 4.997688402984116e-05, "loss": 1.3306, "step": 915 }, { "epoch": 0.28, "grad_norm": 0.7148817777633667, "learning_rate": 4.997663074550584e-05, "loss": 1.2836, "step": 920 }, { "epoch": 0.28, "grad_norm": 1.2554770708084106, "learning_rate": 4.9976376081737154e-05, "loss": 1.365, "step": 925 }, { "epoch": 0.28, "grad_norm": 0.8165137767791748, "learning_rate": 4.9976120038549187e-05, "loss": 1.3327, "step": 930 }, { "epoch": 0.28, "grad_norm": 1.1443816423416138, "learning_rate": 4.997586261595606e-05, "loss": 1.6129, "step": 935 }, { "epoch": 0.28, "grad_norm": 0.6165381669998169, "learning_rate": 4.997560381397201e-05, "loss": 1.3688, "step": 940 }, { "epoch": 0.28, "grad_norm": 0.8930396437644958, "learning_rate": 4.997534363261132e-05, "loss": 1.2127, "step": 945 }, { "epoch": 0.28, "grad_norm": 1.0518968105316162, "learning_rate": 4.997508207188836e-05, "loss": 1.407, "step": 950 }, { "epoch": 0.29, "grad_norm": 0.8033773303031921, "learning_rate": 4.997481913181758e-05, "loss": 1.4512, "step": 955 }, { "epoch": 0.29, "grad_norm": 0.7851698398590088, "learning_rate": 4.99745548124135e-05, "loss": 1.4645, "step": 960 }, { "epoch": 0.29, "grad_norm": 0.4588683843612671, "learning_rate": 4.997428911369072e-05, "loss": 1.3892, "step": 965 }, { "epoch": 0.29, "grad_norm": 0.6735519170761108, "learning_rate": 4.99740220356639e-05, "loss": 1.4797, "step": 970 }, { "epoch": 0.29, "grad_norm": 0.6732616424560547, "learning_rate": 4.997375357834781e-05, "loss": 1.4322, "step": 975 }, { "epoch": 0.29, "grad_norm": 0.9135339856147766, "learning_rate": 4.997348374175727e-05, "loss": 1.489, "step": 980 }, { "epoch": 0.29, "grad_norm": 0.7825419902801514, "learning_rate": 4.9973212525907176e-05, "loss": 1.4497, "step": 985 }, { "epoch": 0.3, "grad_norm": 1.4693922996520996, "learning_rate": 4.997293993081251e-05, "loss": 1.4971, "step": 990 }, { "epoch": 0.3, "grad_norm": 0.7837210893630981, "learning_rate": 4.997266595648834e-05, "loss": 1.4497, "step": 995 }, { "epoch": 0.3, "grad_norm": 1.0610207319259644, "learning_rate": 4.997239060294978e-05, "loss": 1.3558, "step": 1000 }, { "epoch": 0.3, "grad_norm": 0.9275992512702942, "learning_rate": 4.997211387021204e-05, "loss": 1.3527, "step": 1005 }, { "epoch": 0.3, "grad_norm": 0.9326786994934082, "learning_rate": 4.997183575829042e-05, "loss": 1.4296, "step": 1010 }, { "epoch": 0.3, "grad_norm": 1.0663551092147827, "learning_rate": 4.997155626720026e-05, "loss": 1.4676, "step": 1015 }, { "epoch": 0.31, "grad_norm": 0.5471872091293335, "learning_rate": 4.997127539695701e-05, "loss": 1.2381, "step": 1020 }, { "epoch": 0.31, "grad_norm": 1.2285401821136475, "learning_rate": 4.997099314757617e-05, "loss": 1.3078, "step": 1025 }, { "epoch": 0.31, "grad_norm": 0.5964838862419128, "learning_rate": 4.9970709519073334e-05, "loss": 1.4322, "step": 1030 }, { "epoch": 0.31, "grad_norm": 6.244211196899414, "learning_rate": 4.997042451146417e-05, "loss": 1.3203, "step": 1035 }, { "epoch": 0.31, "grad_norm": 0.8410915732383728, "learning_rate": 4.997013812476442e-05, "loss": 1.6386, "step": 1040 }, { "epoch": 0.31, "grad_norm": 1.518509030342102, "learning_rate": 4.996985035898989e-05, "loss": 1.4518, "step": 1045 }, { "epoch": 0.31, "grad_norm": 0.8771889209747314, "learning_rate": 4.996956121415648e-05, "loss": 1.3745, "step": 1050 }, { "epoch": 0.32, "grad_norm": 1.0236855745315552, "learning_rate": 4.996927069028016e-05, "loss": 1.3879, "step": 1055 }, { "epoch": 0.32, "grad_norm": 1.9780511856079102, "learning_rate": 4.996897878737697e-05, "loss": 1.3488, "step": 1060 }, { "epoch": 0.32, "grad_norm": 0.8157287836074829, "learning_rate": 4.9968685505463036e-05, "loss": 1.2988, "step": 1065 }, { "epoch": 0.32, "grad_norm": 1.7686500549316406, "learning_rate": 4.9968390844554556e-05, "loss": 1.5909, "step": 1070 }, { "epoch": 0.32, "grad_norm": 1.0402336120605469, "learning_rate": 4.9968094804667805e-05, "loss": 1.3257, "step": 1075 }, { "epoch": 0.32, "grad_norm": 0.8256892561912537, "learning_rate": 4.9967797385819135e-05, "loss": 1.3697, "step": 1080 }, { "epoch": 0.32, "grad_norm": 0.6995580792427063, "learning_rate": 4.9967498588024956e-05, "loss": 1.3315, "step": 1085 }, { "epoch": 0.33, "grad_norm": 0.6209663152694702, "learning_rate": 4.996719841130179e-05, "loss": 1.3986, "step": 1090 }, { "epoch": 0.33, "grad_norm": 0.9525293707847595, "learning_rate": 4.996689685566621e-05, "loss": 1.5316, "step": 1095 }, { "epoch": 0.33, "grad_norm": 0.5040790438652039, "learning_rate": 4.9966593921134863e-05, "loss": 1.5408, "step": 1100 }, { "epoch": 0.33, "grad_norm": 1.3051776885986328, "learning_rate": 4.996628960772449e-05, "loss": 1.4065, "step": 1105 }, { "epoch": 0.33, "grad_norm": 1.1322470903396606, "learning_rate": 4.996598391545189e-05, "loss": 1.3907, "step": 1110 }, { "epoch": 0.33, "grad_norm": 1.1070497035980225, "learning_rate": 4.996567684433395e-05, "loss": 1.4401, "step": 1115 }, { "epoch": 0.34, "grad_norm": 2.0832529067993164, "learning_rate": 4.996536839438762e-05, "loss": 1.3753, "step": 1120 }, { "epoch": 0.34, "grad_norm": 1.2609474658966064, "learning_rate": 4.996505856562995e-05, "loss": 1.4671, "step": 1125 }, { "epoch": 0.34, "grad_norm": 0.5162214040756226, "learning_rate": 4.996474735807805e-05, "loss": 1.3594, "step": 1130 }, { "epoch": 0.34, "grad_norm": 0.6403785347938538, "learning_rate": 4.9964434771749105e-05, "loss": 1.441, "step": 1135 }, { "epoch": 0.34, "grad_norm": 1.0005476474761963, "learning_rate": 4.996412080666036e-05, "loss": 1.323, "step": 1140 }, { "epoch": 0.34, "grad_norm": 0.7727034687995911, "learning_rate": 4.996380546282919e-05, "loss": 1.4661, "step": 1145 }, { "epoch": 0.34, "grad_norm": 1.1217765808105469, "learning_rate": 4.996348874027298e-05, "loss": 1.4096, "step": 1150 }, { "epoch": 0.35, "grad_norm": 0.5037316083908081, "learning_rate": 4.9963170639009246e-05, "loss": 1.3398, "step": 1155 }, { "epoch": 0.35, "grad_norm": 1.459764838218689, "learning_rate": 4.996285115905554e-05, "loss": 1.5387, "step": 1160 }, { "epoch": 0.35, "grad_norm": 0.856709897518158, "learning_rate": 4.996253030042951e-05, "loss": 1.4499, "step": 1165 }, { "epoch": 0.35, "grad_norm": 0.6828152537345886, "learning_rate": 4.996220806314888e-05, "loss": 1.3039, "step": 1170 }, { "epoch": 0.35, "grad_norm": 1.1198612451553345, "learning_rate": 4.996188444723144e-05, "loss": 1.356, "step": 1175 }, { "epoch": 0.35, "grad_norm": 0.6652525067329407, "learning_rate": 4.996155945269507e-05, "loss": 1.3835, "step": 1180 }, { "epoch": 0.35, "grad_norm": 1.4609639644622803, "learning_rate": 4.996123307955773e-05, "loss": 1.4632, "step": 1185 }, { "epoch": 0.36, "grad_norm": 1.3598580360412598, "learning_rate": 4.996090532783742e-05, "loss": 1.4897, "step": 1190 }, { "epoch": 0.36, "grad_norm": 1.24635910987854, "learning_rate": 4.996057619755225e-05, "loss": 1.417, "step": 1195 }, { "epoch": 0.36, "grad_norm": 1.0729330778121948, "learning_rate": 4.996024568872042e-05, "loss": 1.3506, "step": 1200 }, { "epoch": 0.36, "grad_norm": 0.6681000590324402, "learning_rate": 4.9959913801360156e-05, "loss": 1.3517, "step": 1205 }, { "epoch": 0.36, "grad_norm": 0.8434261679649353, "learning_rate": 4.995958053548979e-05, "loss": 1.6053, "step": 1210 }, { "epoch": 0.36, "grad_norm": 0.7810892462730408, "learning_rate": 4.9959245891127745e-05, "loss": 1.4025, "step": 1215 }, { "epoch": 0.37, "grad_norm": 0.6281015276908875, "learning_rate": 4.995890986829249e-05, "loss": 1.2792, "step": 1220 }, { "epoch": 0.37, "grad_norm": 1.185341477394104, "learning_rate": 4.9958572467002586e-05, "loss": 1.5974, "step": 1225 }, { "epoch": 0.37, "grad_norm": 0.9366752505302429, "learning_rate": 4.995823368727667e-05, "loss": 1.3961, "step": 1230 }, { "epoch": 0.37, "grad_norm": 2.3230159282684326, "learning_rate": 4.995789352913345e-05, "loss": 1.5296, "step": 1235 }, { "epoch": 0.37, "grad_norm": 1.690661907196045, "learning_rate": 4.995755199259171e-05, "loss": 1.522, "step": 1240 }, { "epoch": 0.37, "grad_norm": 1.1200650930404663, "learning_rate": 4.995720907767031e-05, "loss": 1.6149, "step": 1245 }, { "epoch": 0.37, "grad_norm": 0.8390517830848694, "learning_rate": 4.9956864784388204e-05, "loss": 1.4568, "step": 1250 }, { "epoch": 0.38, "grad_norm": 0.49734947085380554, "learning_rate": 4.9956519112764385e-05, "loss": 1.4331, "step": 1255 }, { "epoch": 0.38, "grad_norm": 1.3497544527053833, "learning_rate": 4.995617206281797e-05, "loss": 1.4477, "step": 1260 }, { "epoch": 0.38, "grad_norm": 0.4694572687149048, "learning_rate": 4.9955823634568105e-05, "loss": 1.3934, "step": 1265 }, { "epoch": 0.38, "grad_norm": 0.5619333982467651, "learning_rate": 4.9955473828034045e-05, "loss": 1.5304, "step": 1270 }, { "epoch": 0.38, "grad_norm": 0.7059439420700073, "learning_rate": 4.99551226432351e-05, "loss": 1.548, "step": 1275 }, { "epoch": 0.38, "grad_norm": 2.1901280879974365, "learning_rate": 4.995477008019067e-05, "loss": 1.5823, "step": 1280 }, { "epoch": 0.38, "grad_norm": 0.7577235698699951, "learning_rate": 4.9954416138920235e-05, "loss": 1.3385, "step": 1285 }, { "epoch": 0.39, "grad_norm": 1.0513741970062256, "learning_rate": 4.995406081944333e-05, "loss": 1.5494, "step": 1290 }, { "epoch": 0.39, "grad_norm": 0.9030594229698181, "learning_rate": 4.995370412177959e-05, "loss": 1.5816, "step": 1295 }, { "epoch": 0.39, "grad_norm": 0.6190074682235718, "learning_rate": 4.99533460459487e-05, "loss": 1.4034, "step": 1300 }, { "epoch": 0.39, "grad_norm": 0.6334771513938904, "learning_rate": 4.995298659197045e-05, "loss": 1.4598, "step": 1305 }, { "epoch": 0.39, "grad_norm": 1.5323703289031982, "learning_rate": 4.9952625759864694e-05, "loss": 1.3749, "step": 1310 }, { "epoch": 0.39, "grad_norm": 1.3500525951385498, "learning_rate": 4.9952263549651346e-05, "loss": 1.38, "step": 1315 }, { "epoch": 0.39, "grad_norm": 0.885956883430481, "learning_rate": 4.995189996135042e-05, "loss": 1.4463, "step": 1320 }, { "epoch": 0.4, "grad_norm": 0.774631917476654, "learning_rate": 4.9951534994981994e-05, "loss": 1.24, "step": 1325 }, { "epoch": 0.4, "grad_norm": 0.670936644077301, "learning_rate": 4.9951168650566226e-05, "loss": 1.348, "step": 1330 }, { "epoch": 0.4, "grad_norm": 0.5310854315757751, "learning_rate": 4.995080092812335e-05, "loss": 1.4768, "step": 1335 }, { "epoch": 0.4, "grad_norm": 0.7614613771438599, "learning_rate": 4.995043182767368e-05, "loss": 1.4268, "step": 1340 }, { "epoch": 0.4, "grad_norm": 1.5375897884368896, "learning_rate": 4.995006134923759e-05, "loss": 1.3634, "step": 1345 }, { "epoch": 0.4, "grad_norm": 0.8461021780967712, "learning_rate": 4.994968949283554e-05, "loss": 1.4866, "step": 1350 }, { "epoch": 0.41, "grad_norm": 0.7309428453445435, "learning_rate": 4.994931625848808e-05, "loss": 1.5075, "step": 1355 }, { "epoch": 0.41, "grad_norm": 1.151624321937561, "learning_rate": 4.994894164621581e-05, "loss": 1.4178, "step": 1360 }, { "epoch": 0.41, "grad_norm": 0.812978208065033, "learning_rate": 4.9948565656039434e-05, "loss": 1.2951, "step": 1365 }, { "epoch": 0.41, "grad_norm": 0.8754581212997437, "learning_rate": 4.99481882879797e-05, "loss": 1.4593, "step": 1370 }, { "epoch": 0.41, "grad_norm": 1.1998158693313599, "learning_rate": 4.994780954205747e-05, "loss": 1.3958, "step": 1375 }, { "epoch": 0.41, "grad_norm": 0.9774079918861389, "learning_rate": 4.994742941829364e-05, "loss": 1.3993, "step": 1380 }, { "epoch": 0.41, "grad_norm": 0.7645953297615051, "learning_rate": 4.9947047916709224e-05, "loss": 1.4456, "step": 1385 }, { "epoch": 0.42, "grad_norm": 0.7388116717338562, "learning_rate": 4.994666503732528e-05, "loss": 1.4401, "step": 1390 }, { "epoch": 0.42, "grad_norm": 1.9941197633743286, "learning_rate": 4.994628078016296e-05, "loss": 1.4012, "step": 1395 }, { "epoch": 0.42, "grad_norm": 1.6217386722564697, "learning_rate": 4.994589514524347e-05, "loss": 1.5009, "step": 1400 }, { "epoch": 0.42, "grad_norm": 1.7688075304031372, "learning_rate": 4.9945508132588134e-05, "loss": 1.332, "step": 1405 }, { "epoch": 0.42, "grad_norm": 0.620936393737793, "learning_rate": 4.994511974221831e-05, "loss": 1.4864, "step": 1410 }, { "epoch": 0.42, "grad_norm": 0.8650867938995361, "learning_rate": 4.9944729974155444e-05, "loss": 1.5008, "step": 1415 }, { "epoch": 0.42, "grad_norm": 1.145591139793396, "learning_rate": 4.994433882842108e-05, "loss": 1.242, "step": 1420 }, { "epoch": 0.43, "grad_norm": 1.6311044692993164, "learning_rate": 4.9943946305036806e-05, "loss": 1.6072, "step": 1425 }, { "epoch": 0.43, "grad_norm": 0.777138888835907, "learning_rate": 4.9943552404024303e-05, "loss": 1.4589, "step": 1430 }, { "epoch": 0.43, "grad_norm": 1.3018752336502075, "learning_rate": 4.994315712540533e-05, "loss": 1.5466, "step": 1435 }, { "epoch": 0.43, "grad_norm": 1.3640024662017822, "learning_rate": 4.994276046920172e-05, "loss": 1.5863, "step": 1440 }, { "epoch": 0.43, "grad_norm": 1.0018880367279053, "learning_rate": 4.994236243543537e-05, "loss": 1.3074, "step": 1445 }, { "epoch": 0.43, "grad_norm": 0.5440133810043335, "learning_rate": 4.994196302412827e-05, "loss": 1.5295, "step": 1450 }, { "epoch": 0.44, "grad_norm": 2.098235845565796, "learning_rate": 4.994156223530248e-05, "loss": 1.4089, "step": 1455 }, { "epoch": 0.44, "grad_norm": 1.157474398612976, "learning_rate": 4.994116006898013e-05, "loss": 1.4596, "step": 1460 }, { "epoch": 0.44, "grad_norm": 1.6245862245559692, "learning_rate": 4.994075652518344e-05, "loss": 1.3593, "step": 1465 }, { "epoch": 0.44, "grad_norm": 1.1995525360107422, "learning_rate": 4.994035160393469e-05, "loss": 1.4744, "step": 1470 }, { "epoch": 0.44, "grad_norm": 0.7864271402359009, "learning_rate": 4.993994530525624e-05, "loss": 1.3689, "step": 1475 }, { "epoch": 0.44, "grad_norm": 0.8403075337409973, "learning_rate": 4.9939537629170544e-05, "loss": 1.4856, "step": 1480 }, { "epoch": 0.44, "grad_norm": 0.5901763439178467, "learning_rate": 4.99391285757001e-05, "loss": 1.3932, "step": 1485 }, { "epoch": 0.45, "grad_norm": 0.9339320063591003, "learning_rate": 4.993871814486751e-05, "loss": 1.5516, "step": 1490 }, { "epoch": 0.45, "grad_norm": 1.3036901950836182, "learning_rate": 4.993830633669544e-05, "loss": 1.4868, "step": 1495 }, { "epoch": 0.45, "grad_norm": 0.6879803538322449, "learning_rate": 4.9937893151206626e-05, "loss": 1.38, "step": 1500 }, { "epoch": 0.45, "grad_norm": 0.7101956009864807, "learning_rate": 4.99374785884239e-05, "loss": 1.3969, "step": 1505 }, { "epoch": 0.45, "grad_norm": 1.220004916191101, "learning_rate": 4.9937062648370154e-05, "loss": 1.4985, "step": 1510 }, { "epoch": 0.45, "grad_norm": 0.5632555484771729, "learning_rate": 4.993664533106835e-05, "loss": 1.3545, "step": 1515 }, { "epoch": 0.45, "grad_norm": 1.4013103246688843, "learning_rate": 4.993622663654156e-05, "loss": 1.4475, "step": 1520 }, { "epoch": 0.46, "grad_norm": 0.9690436124801636, "learning_rate": 4.993580656481288e-05, "loss": 1.3327, "step": 1525 }, { "epoch": 0.46, "grad_norm": 0.6049666404724121, "learning_rate": 4.993538511590553e-05, "loss": 1.1947, "step": 1530 }, { "epoch": 0.46, "grad_norm": 0.7417116165161133, "learning_rate": 4.993496228984278e-05, "loss": 1.5365, "step": 1535 }, { "epoch": 0.46, "grad_norm": 1.1022894382476807, "learning_rate": 4.993453808664797e-05, "loss": 1.4447, "step": 1540 }, { "epoch": 0.46, "grad_norm": 0.834717333316803, "learning_rate": 4.993411250634455e-05, "loss": 1.4464, "step": 1545 }, { "epoch": 0.46, "grad_norm": 0.6041282415390015, "learning_rate": 4.9933685548956014e-05, "loss": 1.388, "step": 1550 }, { "epoch": 0.47, "grad_norm": 0.8697533011436462, "learning_rate": 4.993325721450594e-05, "loss": 1.425, "step": 1555 }, { "epoch": 0.47, "grad_norm": 1.2372421026229858, "learning_rate": 4.993282750301799e-05, "loss": 1.3586, "step": 1560 }, { "epoch": 0.47, "grad_norm": 2.791959285736084, "learning_rate": 4.993239641451588e-05, "loss": 1.7594, "step": 1565 }, { "epoch": 0.47, "grad_norm": 3.356498956680298, "learning_rate": 4.993196394902344e-05, "loss": 1.4429, "step": 1570 }, { "epoch": 0.47, "grad_norm": 0.8572433590888977, "learning_rate": 4.993153010656455e-05, "loss": 1.3813, "step": 1575 }, { "epoch": 0.47, "grad_norm": 1.199879765510559, "learning_rate": 4.993109488716316e-05, "loss": 1.2936, "step": 1580 }, { "epoch": 0.47, "grad_norm": 0.6142605543136597, "learning_rate": 4.993065829084332e-05, "loss": 1.3402, "step": 1585 }, { "epoch": 0.48, "grad_norm": 0.4303763806819916, "learning_rate": 4.993022031762914e-05, "loss": 1.3431, "step": 1590 }, { "epoch": 0.48, "grad_norm": 0.7792863249778748, "learning_rate": 4.99297809675448e-05, "loss": 1.5333, "step": 1595 }, { "epoch": 0.48, "grad_norm": 1.015516996383667, "learning_rate": 4.992934024061456e-05, "loss": 1.5036, "step": 1600 }, { "epoch": 0.48, "grad_norm": 0.5127567648887634, "learning_rate": 4.992889813686279e-05, "loss": 1.3214, "step": 1605 }, { "epoch": 0.48, "grad_norm": 1.4127864837646484, "learning_rate": 4.992845465631388e-05, "loss": 1.4556, "step": 1610 }, { "epoch": 0.48, "grad_norm": 2.0938849449157715, "learning_rate": 4.992800979899233e-05, "loss": 1.372, "step": 1615 }, { "epoch": 0.48, "grad_norm": 0.7442615032196045, "learning_rate": 4.992756356492271e-05, "loss": 1.293, "step": 1620 }, { "epoch": 0.49, "grad_norm": 0.879451334476471, "learning_rate": 4.9927115954129665e-05, "loss": 1.4263, "step": 1625 }, { "epoch": 0.49, "grad_norm": 0.867012619972229, "learning_rate": 4.9926666966637914e-05, "loss": 1.3219, "step": 1630 }, { "epoch": 0.49, "grad_norm": 2.4864187240600586, "learning_rate": 4.992621660247226e-05, "loss": 1.5152, "step": 1635 }, { "epoch": 0.49, "grad_norm": 1.02019202709198, "learning_rate": 4.992576486165758e-05, "loss": 1.3683, "step": 1640 }, { "epoch": 0.49, "grad_norm": 0.8681535720825195, "learning_rate": 4.99253117442188e-05, "loss": 1.4345, "step": 1645 }, { "epoch": 0.49, "grad_norm": 0.6605016589164734, "learning_rate": 4.992485725018097e-05, "loss": 1.3767, "step": 1650 }, { "epoch": 0.5, "grad_norm": 1.7456071376800537, "learning_rate": 4.9924401379569174e-05, "loss": 1.17, "step": 1655 }, { "epoch": 0.5, "grad_norm": 1.299971342086792, "learning_rate": 4.992394413240861e-05, "loss": 1.545, "step": 1660 }, { "epoch": 0.5, "grad_norm": 0.6303769946098328, "learning_rate": 4.992348550872451e-05, "loss": 1.3899, "step": 1665 }, { "epoch": 0.5, "grad_norm": 0.5625646710395813, "learning_rate": 4.992302550854221e-05, "loss": 1.3778, "step": 1670 }, { "epoch": 0.5, "grad_norm": 1.0175740718841553, "learning_rate": 4.992256413188712e-05, "loss": 1.3702, "step": 1675 }, { "epoch": 0.5, "grad_norm": 0.6424248814582825, "learning_rate": 4.992210137878472e-05, "loss": 1.3868, "step": 1680 }, { "epoch": 0.5, "grad_norm": 1.0193471908569336, "learning_rate": 4.992163724926057e-05, "loss": 1.3882, "step": 1685 }, { "epoch": 0.51, "grad_norm": 0.6782774925231934, "learning_rate": 4.992117174334029e-05, "loss": 1.4042, "step": 1690 }, { "epoch": 0.51, "grad_norm": 0.7803168892860413, "learning_rate": 4.99207048610496e-05, "loss": 1.2801, "step": 1695 }, { "epoch": 0.51, "grad_norm": 0.7628136873245239, "learning_rate": 4.992023660241429e-05, "loss": 1.4438, "step": 1700 }, { "epoch": 0.51, "grad_norm": 0.7955535054206848, "learning_rate": 4.991976696746021e-05, "loss": 1.2711, "step": 1705 }, { "epoch": 0.51, "grad_norm": 1.0430067777633667, "learning_rate": 4.991929595621331e-05, "loss": 1.3843, "step": 1710 }, { "epoch": 0.51, "grad_norm": 1.0455753803253174, "learning_rate": 4.991882356869959e-05, "loss": 1.3456, "step": 1715 }, { "epoch": 0.51, "grad_norm": 0.5499788522720337, "learning_rate": 4.9918349804945154e-05, "loss": 1.4326, "step": 1720 }, { "epoch": 0.52, "grad_norm": 0.6572498083114624, "learning_rate": 4.991787466497615e-05, "loss": 1.5624, "step": 1725 }, { "epoch": 0.52, "grad_norm": 1.2623273134231567, "learning_rate": 4.9917398148818836e-05, "loss": 1.5035, "step": 1730 }, { "epoch": 0.52, "grad_norm": 1.5261033773422241, "learning_rate": 4.991692025649952e-05, "loss": 1.492, "step": 1735 }, { "epoch": 0.52, "grad_norm": 0.8616695404052734, "learning_rate": 4.991644098804459e-05, "loss": 1.3531, "step": 1740 }, { "epoch": 0.52, "grad_norm": 1.3723504543304443, "learning_rate": 4.991596034348053e-05, "loss": 1.5159, "step": 1745 }, { "epoch": 0.52, "grad_norm": 0.5867530703544617, "learning_rate": 4.991547832283389e-05, "loss": 1.5382, "step": 1750 }, { "epoch": 0.53, "grad_norm": 1.0854787826538086, "learning_rate": 4.9914994926131265e-05, "loss": 1.4862, "step": 1755 }, { "epoch": 0.53, "grad_norm": 1.023762822151184, "learning_rate": 4.991451015339937e-05, "loss": 1.4294, "step": 1760 }, { "epoch": 0.53, "grad_norm": 1.219565749168396, "learning_rate": 4.9914024004664986e-05, "loss": 1.3301, "step": 1765 }, { "epoch": 0.53, "grad_norm": 0.8434814214706421, "learning_rate": 4.991353647995494e-05, "loss": 1.5169, "step": 1770 }, { "epoch": 0.53, "grad_norm": 1.8482433557510376, "learning_rate": 4.9913047579296177e-05, "loss": 1.4846, "step": 1775 }, { "epoch": 0.53, "grad_norm": 0.639321506023407, "learning_rate": 4.991255730271569e-05, "loss": 1.3135, "step": 1780 }, { "epoch": 0.53, "grad_norm": 1.1192939281463623, "learning_rate": 4.991206565024056e-05, "loss": 1.3945, "step": 1785 }, { "epoch": 0.54, "grad_norm": 0.9012823700904846, "learning_rate": 4.991157262189794e-05, "loss": 1.2153, "step": 1790 }, { "epoch": 0.54, "grad_norm": 1.0617835521697998, "learning_rate": 4.991107821771506e-05, "loss": 1.3468, "step": 1795 }, { "epoch": 0.54, "grad_norm": 1.25299870967865, "learning_rate": 4.991058243771922e-05, "loss": 1.436, "step": 1800 }, { "epoch": 0.54, "grad_norm": 0.779280960559845, "learning_rate": 4.9910085281937804e-05, "loss": 1.3551, "step": 1805 }, { "epoch": 0.54, "grad_norm": 0.6145398020744324, "learning_rate": 4.9909586750398274e-05, "loss": 1.2627, "step": 1810 }, { "epoch": 0.54, "grad_norm": 0.7118735313415527, "learning_rate": 4.9909086843128154e-05, "loss": 1.5455, "step": 1815 }, { "epoch": 0.54, "grad_norm": 1.2611353397369385, "learning_rate": 4.990858556015506e-05, "loss": 1.4429, "step": 1820 }, { "epoch": 0.55, "grad_norm": 1.1818840503692627, "learning_rate": 4.990808290150668e-05, "loss": 1.586, "step": 1825 }, { "epoch": 0.55, "grad_norm": 0.6355205774307251, "learning_rate": 4.990757886721077e-05, "loss": 1.3538, "step": 1830 }, { "epoch": 0.55, "grad_norm": 0.7018294334411621, "learning_rate": 4.990707345729517e-05, "loss": 1.5147, "step": 1835 }, { "epoch": 0.55, "grad_norm": 1.5799555778503418, "learning_rate": 4.99065666717878e-05, "loss": 1.4748, "step": 1840 }, { "epoch": 0.55, "grad_norm": 1.1363238096237183, "learning_rate": 4.990605851071664e-05, "loss": 1.3812, "step": 1845 }, { "epoch": 0.55, "grad_norm": 0.8724756836891174, "learning_rate": 4.9905548974109746e-05, "loss": 1.4634, "step": 1850 }, { "epoch": 0.55, "grad_norm": 1.3491142988204956, "learning_rate": 4.990503806199527e-05, "loss": 1.4406, "step": 1855 }, { "epoch": 0.56, "grad_norm": 0.8592522740364075, "learning_rate": 4.990452577440144e-05, "loss": 1.55, "step": 1860 }, { "epoch": 0.56, "grad_norm": 0.8113953471183777, "learning_rate": 4.9904012111356536e-05, "loss": 1.5138, "step": 1865 }, { "epoch": 0.56, "grad_norm": 0.8070884943008423, "learning_rate": 4.990349707288892e-05, "loss": 1.5115, "step": 1870 }, { "epoch": 0.56, "grad_norm": 1.018451452255249, "learning_rate": 4.990298065902706e-05, "loss": 1.4229, "step": 1875 }, { "epoch": 0.56, "grad_norm": 0.5133267641067505, "learning_rate": 4.9902462869799446e-05, "loss": 1.1838, "step": 1880 }, { "epoch": 0.56, "grad_norm": 0.795879065990448, "learning_rate": 4.990194370523471e-05, "loss": 1.5473, "step": 1885 }, { "epoch": 0.57, "grad_norm": 0.8955612182617188, "learning_rate": 4.990142316536149e-05, "loss": 1.4088, "step": 1890 }, { "epoch": 0.57, "grad_norm": 1.3825359344482422, "learning_rate": 4.990090125020857e-05, "loss": 1.3161, "step": 1895 }, { "epoch": 0.57, "grad_norm": 0.9346737265586853, "learning_rate": 4.990037795980474e-05, "loss": 1.3944, "step": 1900 }, { "epoch": 0.57, "grad_norm": 0.5142995119094849, "learning_rate": 4.989985329417893e-05, "loss": 1.3262, "step": 1905 }, { "epoch": 0.57, "grad_norm": 1.0340321063995361, "learning_rate": 4.98993272533601e-05, "loss": 1.4552, "step": 1910 }, { "epoch": 0.57, "grad_norm": 0.5901709794998169, "learning_rate": 4.989879983737732e-05, "loss": 1.2281, "step": 1915 }, { "epoch": 0.57, "grad_norm": 1.0993763208389282, "learning_rate": 4.989827104625969e-05, "loss": 1.4189, "step": 1920 }, { "epoch": 0.58, "grad_norm": 0.9149566292762756, "learning_rate": 4.989774088003644e-05, "loss": 1.5347, "step": 1925 }, { "epoch": 0.58, "grad_norm": 0.6457690596580505, "learning_rate": 4.989720933873683e-05, "loss": 1.2577, "step": 1930 }, { "epoch": 0.58, "grad_norm": 0.5968732237815857, "learning_rate": 4.989667642239023e-05, "loss": 1.3495, "step": 1935 }, { "epoch": 0.58, "grad_norm": 0.9515772461891174, "learning_rate": 4.989614213102608e-05, "loss": 1.4536, "step": 1940 }, { "epoch": 0.58, "grad_norm": 0.9571760892868042, "learning_rate": 4.989560646467387e-05, "loss": 1.4527, "step": 1945 }, { "epoch": 0.58, "grad_norm": 0.5556336045265198, "learning_rate": 4.989506942336319e-05, "loss": 1.414, "step": 1950 }, { "epoch": 0.58, "grad_norm": 1.146568775177002, "learning_rate": 4.989453100712371e-05, "loss": 1.3903, "step": 1955 }, { "epoch": 0.59, "grad_norm": 0.47517749667167664, "learning_rate": 4.989399121598515e-05, "loss": 1.2966, "step": 1960 }, { "epoch": 0.59, "grad_norm": 1.3402255773544312, "learning_rate": 4.989345004997734e-05, "loss": 1.4773, "step": 1965 }, { "epoch": 0.59, "grad_norm": 1.027206540107727, "learning_rate": 4.9892907509130156e-05, "loss": 1.5167, "step": 1970 }, { "epoch": 0.59, "grad_norm": 0.7391974925994873, "learning_rate": 4.989236359347356e-05, "loss": 1.492, "step": 1975 }, { "epoch": 0.59, "grad_norm": 2.5391299724578857, "learning_rate": 4.989181830303761e-05, "loss": 1.3457, "step": 1980 }, { "epoch": 0.59, "grad_norm": 0.5835283398628235, "learning_rate": 4.9891271637852396e-05, "loss": 1.3803, "step": 1985 }, { "epoch": 0.6, "grad_norm": 1.2071964740753174, "learning_rate": 4.9890723597948126e-05, "loss": 1.4644, "step": 1990 }, { "epoch": 0.6, "grad_norm": 0.8295852541923523, "learning_rate": 4.989017418335507e-05, "loss": 1.3478, "step": 1995 }, { "epoch": 0.6, "grad_norm": 1.2899019718170166, "learning_rate": 4.988962339410356e-05, "loss": 1.5883, "step": 2000 }, { "epoch": 0.6, "grad_norm": 0.8376094698905945, "learning_rate": 4.988907123022401e-05, "loss": 1.6507, "step": 2005 }, { "epoch": 0.6, "grad_norm": 1.5938405990600586, "learning_rate": 4.988851769174695e-05, "loss": 1.5191, "step": 2010 }, { "epoch": 0.6, "grad_norm": 0.7297775149345398, "learning_rate": 4.988796277870291e-05, "loss": 1.4755, "step": 2015 }, { "epoch": 0.6, "grad_norm": 0.8325863480567932, "learning_rate": 4.988740649112256e-05, "loss": 1.455, "step": 2020 }, { "epoch": 0.61, "grad_norm": 0.6817718148231506, "learning_rate": 4.9886848829036624e-05, "loss": 1.3709, "step": 2025 }, { "epoch": 0.61, "grad_norm": 0.8563414812088013, "learning_rate": 4.9886289792475894e-05, "loss": 1.3933, "step": 2030 }, { "epoch": 0.61, "grad_norm": 0.8419439792633057, "learning_rate": 4.9885729381471244e-05, "loss": 1.2793, "step": 2035 }, { "epoch": 0.61, "grad_norm": 1.1437691450119019, "learning_rate": 4.988516759605363e-05, "loss": 1.4561, "step": 2040 }, { "epoch": 0.61, "grad_norm": 1.0974056720733643, "learning_rate": 4.9884604436254065e-05, "loss": 1.3317, "step": 2045 }, { "epoch": 0.61, "grad_norm": 0.7327514886856079, "learning_rate": 4.9884039902103674e-05, "loss": 1.5174, "step": 2050 }, { "epoch": 0.61, "grad_norm": 0.5889623165130615, "learning_rate": 4.9883473993633626e-05, "loss": 1.4063, "step": 2055 }, { "epoch": 0.62, "grad_norm": 1.461192011833191, "learning_rate": 4.988290671087517e-05, "loss": 1.4681, "step": 2060 }, { "epoch": 0.62, "grad_norm": 0.7430127859115601, "learning_rate": 4.9882338053859646e-05, "loss": 1.3817, "step": 2065 }, { "epoch": 0.62, "grad_norm": 1.4986989498138428, "learning_rate": 4.988176802261845e-05, "loss": 1.3504, "step": 2070 }, { "epoch": 0.62, "grad_norm": 0.7772125601768494, "learning_rate": 4.988119661718307e-05, "loss": 1.36, "step": 2075 }, { "epoch": 0.62, "grad_norm": 1.2904176712036133, "learning_rate": 4.988062383758506e-05, "loss": 1.4057, "step": 2080 }, { "epoch": 0.62, "grad_norm": 1.2020084857940674, "learning_rate": 4.9880049683856066e-05, "loss": 1.5082, "step": 2085 }, { "epoch": 0.63, "grad_norm": 0.7529141306877136, "learning_rate": 4.987947415602778e-05, "loss": 1.3725, "step": 2090 }, { "epoch": 0.63, "grad_norm": 0.8067646622657776, "learning_rate": 4.987889725413201e-05, "loss": 1.3523, "step": 2095 }, { "epoch": 0.63, "grad_norm": 0.7970091104507446, "learning_rate": 4.987831897820059e-05, "loss": 1.6039, "step": 2100 }, { "epoch": 0.63, "grad_norm": 0.6938903331756592, "learning_rate": 4.987773932826548e-05, "loss": 1.4473, "step": 2105 }, { "epoch": 0.63, "grad_norm": 0.8393217921257019, "learning_rate": 4.9877158304358687e-05, "loss": 1.4723, "step": 2110 }, { "epoch": 0.63, "grad_norm": 1.2300384044647217, "learning_rate": 4.987657590651229e-05, "loss": 1.2791, "step": 2115 }, { "epoch": 0.63, "grad_norm": 0.7245568633079529, "learning_rate": 4.9875992134758476e-05, "loss": 1.4415, "step": 2120 }, { "epoch": 0.64, "grad_norm": 0.7715205550193787, "learning_rate": 4.987540698912947e-05, "loss": 1.4555, "step": 2125 }, { "epoch": 0.64, "grad_norm": 0.9755416512489319, "learning_rate": 4.987482046965759e-05, "loss": 1.4118, "step": 2130 }, { "epoch": 0.64, "grad_norm": 1.0845292806625366, "learning_rate": 4.987423257637523e-05, "loss": 1.4648, "step": 2135 }, { "epoch": 0.64, "grad_norm": 0.7946663498878479, "learning_rate": 4.9873643309314864e-05, "loss": 1.4335, "step": 2140 }, { "epoch": 0.64, "grad_norm": 1.767040491104126, "learning_rate": 4.987305266850903e-05, "loss": 1.3784, "step": 2145 }, { "epoch": 0.64, "grad_norm": 1.0675404071807861, "learning_rate": 4.987246065399035e-05, "loss": 1.573, "step": 2150 }, { "epoch": 0.64, "grad_norm": 1.144506573677063, "learning_rate": 4.987186726579153e-05, "loss": 1.3506, "step": 2155 }, { "epoch": 0.65, "grad_norm": 0.6684775948524475, "learning_rate": 4.987127250394532e-05, "loss": 1.2895, "step": 2160 }, { "epoch": 0.65, "grad_norm": 0.9211767911911011, "learning_rate": 4.987067636848459e-05, "loss": 1.2889, "step": 2165 }, { "epoch": 0.65, "grad_norm": 1.1036546230316162, "learning_rate": 4.987007885944226e-05, "loss": 1.39, "step": 2170 }, { "epoch": 0.65, "grad_norm": 0.8043128252029419, "learning_rate": 4.986947997685132e-05, "loss": 1.4297, "step": 2175 }, { "epoch": 0.65, "grad_norm": 2.3548777103424072, "learning_rate": 4.986887972074485e-05, "loss": 1.4425, "step": 2180 }, { "epoch": 0.65, "grad_norm": 1.4966070652008057, "learning_rate": 4.9868278091156e-05, "loss": 1.4042, "step": 2185 }, { "epoch": 0.66, "grad_norm": 0.8505388498306274, "learning_rate": 4.986767508811801e-05, "loss": 1.4276, "step": 2190 }, { "epoch": 0.66, "grad_norm": 1.0283442735671997, "learning_rate": 4.986707071166417e-05, "loss": 1.4598, "step": 2195 }, { "epoch": 0.66, "grad_norm": 0.680404007434845, "learning_rate": 4.9866464961827856e-05, "loss": 1.4773, "step": 2200 }, { "epoch": 0.66, "grad_norm": 0.575295090675354, "learning_rate": 4.986585783864254e-05, "loss": 1.3162, "step": 2205 }, { "epoch": 0.66, "grad_norm": 0.7415833473205566, "learning_rate": 4.9865249342141726e-05, "loss": 1.4289, "step": 2210 }, { "epoch": 0.66, "grad_norm": 0.8124527335166931, "learning_rate": 4.986463947235905e-05, "loss": 1.4606, "step": 2215 }, { "epoch": 0.66, "grad_norm": 0.7351284623146057, "learning_rate": 4.9864028229328186e-05, "loss": 1.5149, "step": 2220 }, { "epoch": 0.67, "grad_norm": 0.9113266468048096, "learning_rate": 4.9863415613082876e-05, "loss": 1.5249, "step": 2225 }, { "epoch": 0.67, "grad_norm": 1.276861310005188, "learning_rate": 4.986280162365697e-05, "loss": 1.4372, "step": 2230 }, { "epoch": 0.67, "grad_norm": 0.6544708013534546, "learning_rate": 4.9862186261084374e-05, "loss": 1.3714, "step": 2235 }, { "epoch": 0.67, "grad_norm": 0.9754915833473206, "learning_rate": 4.986156952539908e-05, "loss": 1.3048, "step": 2240 }, { "epoch": 0.67, "grad_norm": 0.7614801526069641, "learning_rate": 4.9860951416635126e-05, "loss": 1.4061, "step": 2245 }, { "epoch": 0.67, "grad_norm": 1.1788151264190674, "learning_rate": 4.986033193482668e-05, "loss": 1.2955, "step": 2250 }, { "epoch": 0.67, "grad_norm": 0.917193591594696, "learning_rate": 4.9859711080007944e-05, "loss": 1.351, "step": 2255 }, { "epoch": 0.68, "grad_norm": 0.7633816003799438, "learning_rate": 4.9859088852213196e-05, "loss": 1.3889, "step": 2260 }, { "epoch": 0.68, "grad_norm": 0.4886833429336548, "learning_rate": 4.985846525147681e-05, "loss": 1.4501, "step": 2265 }, { "epoch": 0.68, "grad_norm": 0.9673319458961487, "learning_rate": 4.9857840277833236e-05, "loss": 1.4624, "step": 2270 }, { "epoch": 0.68, "grad_norm": 0.7718215584754944, "learning_rate": 4.9857213931316984e-05, "loss": 1.4116, "step": 2275 }, { "epoch": 0.68, "grad_norm": 0.6662101745605469, "learning_rate": 4.985658621196263e-05, "loss": 1.425, "step": 2280 }, { "epoch": 0.68, "grad_norm": 0.7783111333847046, "learning_rate": 4.985595711980486e-05, "loss": 1.5157, "step": 2285 }, { "epoch": 0.69, "grad_norm": 0.4051775634288788, "learning_rate": 4.985532665487843e-05, "loss": 1.3326, "step": 2290 }, { "epoch": 0.69, "grad_norm": 0.8741587996482849, "learning_rate": 4.9854694817218125e-05, "loss": 1.3785, "step": 2295 }, { "epoch": 0.69, "grad_norm": 0.6999533772468567, "learning_rate": 4.985406160685887e-05, "loss": 1.3553, "step": 2300 }, { "epoch": 0.69, "grad_norm": 0.7438477277755737, "learning_rate": 4.985342702383563e-05, "loss": 1.4612, "step": 2305 }, { "epoch": 0.69, "grad_norm": 1.4302748441696167, "learning_rate": 4.985279106818345e-05, "loss": 1.4045, "step": 2310 }, { "epoch": 0.69, "grad_norm": 0.8926997780799866, "learning_rate": 4.9852153739937444e-05, "loss": 1.448, "step": 2315 }, { "epoch": 0.69, "grad_norm": 0.7506667375564575, "learning_rate": 4.9851515039132824e-05, "loss": 1.3386, "step": 2320 }, { "epoch": 0.7, "grad_norm": 0.7566189169883728, "learning_rate": 4.985087496580485e-05, "loss": 1.5076, "step": 2325 }, { "epoch": 0.7, "grad_norm": 2.2634682655334473, "learning_rate": 4.98502335199889e-05, "loss": 1.5423, "step": 2330 }, { "epoch": 0.7, "grad_norm": 1.0470796823501587, "learning_rate": 4.984959070172037e-05, "loss": 1.3496, "step": 2335 }, { "epoch": 0.7, "grad_norm": 0.8911975026130676, "learning_rate": 4.984894651103478e-05, "loss": 1.4615, "step": 2340 }, { "epoch": 0.7, "grad_norm": 0.8113840222358704, "learning_rate": 4.98483009479677e-05, "loss": 1.4067, "step": 2345 }, { "epoch": 0.7, "grad_norm": 0.6011930704116821, "learning_rate": 4.984765401255479e-05, "loss": 1.3069, "step": 2350 }, { "epoch": 0.7, "grad_norm": 0.4766760468482971, "learning_rate": 4.984700570483178e-05, "loss": 1.552, "step": 2355 }, { "epoch": 0.71, "grad_norm": 1.0068391561508179, "learning_rate": 4.984635602483447e-05, "loss": 1.3886, "step": 2360 }, { "epoch": 0.71, "grad_norm": 1.179116129875183, "learning_rate": 4.984570497259874e-05, "loss": 1.4761, "step": 2365 }, { "epoch": 0.71, "grad_norm": 0.623014509677887, "learning_rate": 4.9845052548160554e-05, "loss": 1.627, "step": 2370 }, { "epoch": 0.71, "grad_norm": 0.6420192718505859, "learning_rate": 4.984439875155593e-05, "loss": 1.2861, "step": 2375 }, { "epoch": 0.71, "grad_norm": 0.6435323357582092, "learning_rate": 4.9843743582821e-05, "loss": 1.3463, "step": 2380 }, { "epoch": 0.71, "grad_norm": 0.8503981828689575, "learning_rate": 4.984308704199193e-05, "loss": 1.4085, "step": 2385 }, { "epoch": 0.72, "grad_norm": 0.8631937503814697, "learning_rate": 4.984242912910499e-05, "loss": 1.3638, "step": 2390 }, { "epoch": 0.72, "grad_norm": 0.8973201513290405, "learning_rate": 4.984176984419651e-05, "loss": 1.3254, "step": 2395 }, { "epoch": 0.72, "grad_norm": 0.8003751635551453, "learning_rate": 4.9841109187302896e-05, "loss": 1.3815, "step": 2400 }, { "epoch": 0.72, "grad_norm": 1.4101213216781616, "learning_rate": 4.984044715846065e-05, "loss": 1.5289, "step": 2405 }, { "epoch": 0.72, "grad_norm": 0.5882824063301086, "learning_rate": 4.983978375770633e-05, "loss": 1.369, "step": 2410 }, { "epoch": 0.72, "grad_norm": 0.5322262644767761, "learning_rate": 4.983911898507656e-05, "loss": 1.6157, "step": 2415 }, { "epoch": 0.72, "grad_norm": 1.2763606309890747, "learning_rate": 4.9838452840608076e-05, "loss": 1.4812, "step": 2420 }, { "epoch": 0.73, "grad_norm": 0.9247528910636902, "learning_rate": 4.983778532433766e-05, "loss": 1.4012, "step": 2425 }, { "epoch": 0.73, "grad_norm": 1.0083926916122437, "learning_rate": 4.983711643630218e-05, "loss": 1.4391, "step": 2430 }, { "epoch": 0.73, "grad_norm": 1.7945544719696045, "learning_rate": 4.983644617653857e-05, "loss": 1.3384, "step": 2435 }, { "epoch": 0.73, "grad_norm": 0.8284363150596619, "learning_rate": 4.983577454508386e-05, "loss": 1.497, "step": 2440 }, { "epoch": 0.73, "grad_norm": 0.9625481367111206, "learning_rate": 4.9835101541975125e-05, "loss": 1.5237, "step": 2445 }, { "epoch": 0.73, "grad_norm": 0.6448972821235657, "learning_rate": 4.983442716724956e-05, "loss": 1.3189, "step": 2450 }, { "epoch": 0.73, "grad_norm": 0.6880027651786804, "learning_rate": 4.983375142094439e-05, "loss": 1.4792, "step": 2455 }, { "epoch": 0.74, "grad_norm": 0.7837314605712891, "learning_rate": 4.983307430309695e-05, "loss": 1.4544, "step": 2460 }, { "epoch": 0.74, "grad_norm": 0.7839219570159912, "learning_rate": 4.9832395813744614e-05, "loss": 1.4142, "step": 2465 }, { "epoch": 0.74, "grad_norm": 1.2939424514770508, "learning_rate": 4.983171595292489e-05, "loss": 1.3861, "step": 2470 }, { "epoch": 0.74, "grad_norm": 0.47123095393180847, "learning_rate": 4.983103472067529e-05, "loss": 1.4055, "step": 2475 }, { "epoch": 0.74, "grad_norm": 0.9300950169563293, "learning_rate": 4.983035211703345e-05, "loss": 1.3981, "step": 2480 }, { "epoch": 0.74, "grad_norm": 0.5792704820632935, "learning_rate": 4.982966814203708e-05, "loss": 1.2605, "step": 2485 }, { "epoch": 0.74, "grad_norm": 1.7978287935256958, "learning_rate": 4.9828982795723944e-05, "loss": 1.573, "step": 2490 }, { "epoch": 0.75, "grad_norm": 0.7604524493217468, "learning_rate": 4.98282960781319e-05, "loss": 1.4199, "step": 2495 }, { "epoch": 0.75, "grad_norm": 0.9219200015068054, "learning_rate": 4.982760798929887e-05, "loss": 1.3895, "step": 2500 }, { "epoch": 0.75, "grad_norm": 0.7405526638031006, "learning_rate": 4.982691852926286e-05, "loss": 1.3229, "step": 2505 }, { "epoch": 0.75, "grad_norm": 0.8719181418418884, "learning_rate": 4.982622769806193e-05, "loss": 1.3892, "step": 2510 }, { "epoch": 0.75, "grad_norm": 1.5789086818695068, "learning_rate": 4.982553549573427e-05, "loss": 1.3667, "step": 2515 }, { "epoch": 0.75, "grad_norm": 0.6426258683204651, "learning_rate": 4.982484192231808e-05, "loss": 1.463, "step": 2520 }, { "epoch": 0.76, "grad_norm": 0.7936124205589294, "learning_rate": 4.982414697785168e-05, "loss": 1.4307, "step": 2525 }, { "epoch": 0.76, "grad_norm": 0.7490212917327881, "learning_rate": 4.982345066237344e-05, "loss": 1.3233, "step": 2530 }, { "epoch": 0.76, "grad_norm": 0.6938835978507996, "learning_rate": 4.9822752975921826e-05, "loss": 1.4814, "step": 2535 }, { "epoch": 0.76, "grad_norm": 0.6656054854393005, "learning_rate": 4.982205391853536e-05, "loss": 1.3243, "step": 2540 }, { "epoch": 0.76, "grad_norm": 0.8422638773918152, "learning_rate": 4.982135349025266e-05, "loss": 1.4865, "step": 2545 }, { "epoch": 0.76, "grad_norm": 0.7025824785232544, "learning_rate": 4.982065169111241e-05, "loss": 1.3322, "step": 2550 }, { "epoch": 0.76, "grad_norm": 1.2645024061203003, "learning_rate": 4.981994852115337e-05, "loss": 1.405, "step": 2555 }, { "epoch": 0.77, "grad_norm": 0.8437483310699463, "learning_rate": 4.981924398041437e-05, "loss": 1.49, "step": 2560 }, { "epoch": 0.77, "grad_norm": 1.1599018573760986, "learning_rate": 4.9818538068934314e-05, "loss": 1.3704, "step": 2565 }, { "epoch": 0.77, "grad_norm": 1.4388209581375122, "learning_rate": 4.981783078675221e-05, "loss": 1.3873, "step": 2570 }, { "epoch": 0.77, "grad_norm": 1.019699215888977, "learning_rate": 4.98171221339071e-05, "loss": 1.2774, "step": 2575 }, { "epoch": 0.77, "grad_norm": 0.7524467706680298, "learning_rate": 4.981641211043814e-05, "loss": 1.3507, "step": 2580 }, { "epoch": 0.77, "grad_norm": 0.7237170934677124, "learning_rate": 4.981570071638453e-05, "loss": 1.4106, "step": 2585 }, { "epoch": 0.77, "grad_norm": 2.224061965942383, "learning_rate": 4.981498795178556e-05, "loss": 1.4087, "step": 2590 }, { "epoch": 0.78, "grad_norm": 0.733247697353363, "learning_rate": 4.98142738166806e-05, "loss": 1.4756, "step": 2595 }, { "epoch": 0.78, "grad_norm": 0.9564744830131531, "learning_rate": 4.9813558311109095e-05, "loss": 1.4537, "step": 2600 }, { "epoch": 0.78, "grad_norm": 0.5492211580276489, "learning_rate": 4.981284143511055e-05, "loss": 1.1261, "step": 2605 }, { "epoch": 0.78, "grad_norm": 0.8463557362556458, "learning_rate": 4.981212318872457e-05, "loss": 1.4015, "step": 2610 }, { "epoch": 0.78, "grad_norm": 1.0302629470825195, "learning_rate": 4.981140357199081e-05, "loss": 1.4069, "step": 2615 }, { "epoch": 0.78, "grad_norm": 0.7704905867576599, "learning_rate": 4.981068258494903e-05, "loss": 1.3306, "step": 2620 }, { "epoch": 0.79, "grad_norm": 1.499060034751892, "learning_rate": 4.980996022763904e-05, "loss": 1.5796, "step": 2625 }, { "epoch": 0.79, "grad_norm": 0.660788357257843, "learning_rate": 4.980923650010072e-05, "loss": 1.401, "step": 2630 }, { "epoch": 0.79, "grad_norm": 2.033163547515869, "learning_rate": 4.980851140237407e-05, "loss": 1.442, "step": 2635 }, { "epoch": 0.79, "grad_norm": 1.5330935716629028, "learning_rate": 4.9807784934499125e-05, "loss": 1.5339, "step": 2640 }, { "epoch": 0.79, "grad_norm": 3.3255956172943115, "learning_rate": 4.9807057096516e-05, "loss": 1.5884, "step": 2645 }, { "epoch": 0.79, "grad_norm": 0.7848919630050659, "learning_rate": 4.9806327888464885e-05, "loss": 1.4167, "step": 2650 }, { "epoch": 0.79, "grad_norm": 0.9711475968360901, "learning_rate": 4.980559731038608e-05, "loss": 1.4576, "step": 2655 }, { "epoch": 0.8, "grad_norm": 0.9157351851463318, "learning_rate": 4.9804865362319914e-05, "loss": 1.3899, "step": 2660 }, { "epoch": 0.8, "grad_norm": 1.0029135942459106, "learning_rate": 4.980413204430682e-05, "loss": 1.4998, "step": 2665 }, { "epoch": 0.8, "grad_norm": 1.5365571975708008, "learning_rate": 4.980339735638729e-05, "loss": 1.3776, "step": 2670 }, { "epoch": 0.8, "grad_norm": 0.6640515327453613, "learning_rate": 4.980266129860191e-05, "loss": 1.3586, "step": 2675 }, { "epoch": 0.8, "grad_norm": 0.6773358583450317, "learning_rate": 4.9801923870991326e-05, "loss": 1.3586, "step": 2680 }, { "epoch": 0.8, "grad_norm": 0.62249755859375, "learning_rate": 4.9801185073596266e-05, "loss": 1.5044, "step": 2685 }, { "epoch": 0.8, "grad_norm": 1.2861700057983398, "learning_rate": 4.980044490645754e-05, "loss": 1.5279, "step": 2690 }, { "epoch": 0.81, "grad_norm": 1.6400649547576904, "learning_rate": 4.979970336961601e-05, "loss": 1.4849, "step": 2695 }, { "epoch": 0.81, "grad_norm": 1.2481294870376587, "learning_rate": 4.9798960463112654e-05, "loss": 1.3853, "step": 2700 }, { "epoch": 0.81, "grad_norm": 1.038743019104004, "learning_rate": 4.979821618698848e-05, "loss": 1.3042, "step": 2705 }, { "epoch": 0.81, "grad_norm": 1.1778807640075684, "learning_rate": 4.979747054128461e-05, "loss": 1.4474, "step": 2710 }, { "epoch": 0.81, "grad_norm": 0.8384398818016052, "learning_rate": 4.97967235260422e-05, "loss": 1.3712, "step": 2715 }, { "epoch": 0.81, "grad_norm": 1.2256791591644287, "learning_rate": 4.9795975141302545e-05, "loss": 1.6502, "step": 2720 }, { "epoch": 0.82, "grad_norm": 0.5445688962936401, "learning_rate": 4.979522538710695e-05, "loss": 1.414, "step": 2725 }, { "epoch": 0.82, "grad_norm": 1.1054037809371948, "learning_rate": 4.979447426349682e-05, "loss": 1.3975, "step": 2730 }, { "epoch": 0.82, "grad_norm": 0.6710256934165955, "learning_rate": 4.979372177051366e-05, "loss": 1.3354, "step": 2735 }, { "epoch": 0.82, "grad_norm": 1.0434719324111938, "learning_rate": 4.979296790819901e-05, "loss": 1.4389, "step": 2740 }, { "epoch": 0.82, "grad_norm": 0.8868400454521179, "learning_rate": 4.9792212676594516e-05, "loss": 1.3882, "step": 2745 }, { "epoch": 0.82, "grad_norm": 0.7899145483970642, "learning_rate": 4.9791456075741895e-05, "loss": 1.2246, "step": 2750 }, { "epoch": 0.82, "grad_norm": 0.6027569770812988, "learning_rate": 4.979069810568292e-05, "loss": 1.4354, "step": 2755 }, { "epoch": 0.83, "grad_norm": 1.1097197532653809, "learning_rate": 4.9789938766459446e-05, "loss": 1.4536, "step": 2760 }, { "epoch": 0.83, "grad_norm": 0.8584468960762024, "learning_rate": 4.9789178058113434e-05, "loss": 1.3242, "step": 2765 }, { "epoch": 0.83, "grad_norm": 0.6569173336029053, "learning_rate": 4.978841598068688e-05, "loss": 1.4388, "step": 2770 }, { "epoch": 0.83, "grad_norm": 0.6124112010002136, "learning_rate": 4.978765253422188e-05, "loss": 1.3181, "step": 2775 }, { "epoch": 0.83, "grad_norm": 0.7764132022857666, "learning_rate": 4.978688771876059e-05, "loss": 1.4396, "step": 2780 }, { "epoch": 0.83, "grad_norm": 1.035800576210022, "learning_rate": 4.9786121534345265e-05, "loss": 1.3593, "step": 2785 }, { "epoch": 0.83, "grad_norm": 0.6600098609924316, "learning_rate": 4.97853539810182e-05, "loss": 1.4073, "step": 2790 }, { "epoch": 0.84, "grad_norm": 0.6001609563827515, "learning_rate": 4.9784585058821807e-05, "loss": 1.3929, "step": 2795 }, { "epoch": 0.84, "grad_norm": 0.7586673498153687, "learning_rate": 4.9783814767798545e-05, "loss": 1.1887, "step": 2800 }, { "epoch": 0.84, "grad_norm": 0.542594313621521, "learning_rate": 4.9783043107990946e-05, "loss": 1.4094, "step": 2805 }, { "epoch": 0.84, "grad_norm": 1.1905423402786255, "learning_rate": 4.978227007944164e-05, "loss": 1.4199, "step": 2810 }, { "epoch": 0.84, "grad_norm": 1.1273014545440674, "learning_rate": 4.978149568219332e-05, "loss": 1.5138, "step": 2815 }, { "epoch": 0.84, "grad_norm": 0.8764979839324951, "learning_rate": 4.978071991628875e-05, "loss": 1.5155, "step": 2820 }, { "epoch": 0.85, "grad_norm": 0.8033466339111328, "learning_rate": 4.9779942781770776e-05, "loss": 1.3844, "step": 2825 }, { "epoch": 0.85, "grad_norm": 0.9946221113204956, "learning_rate": 4.9779164278682324e-05, "loss": 1.3978, "step": 2830 }, { "epoch": 0.85, "grad_norm": 0.596258282661438, "learning_rate": 4.977838440706638e-05, "loss": 1.4013, "step": 2835 }, { "epoch": 0.85, "grad_norm": 1.155501365661621, "learning_rate": 4.977760316696603e-05, "loss": 1.4486, "step": 2840 }, { "epoch": 0.85, "grad_norm": 0.8394220471382141, "learning_rate": 4.9776820558424396e-05, "loss": 1.4483, "step": 2845 }, { "epoch": 0.85, "grad_norm": 0.8399643301963806, "learning_rate": 4.977603658148473e-05, "loss": 1.4999, "step": 2850 }, { "epoch": 0.85, "grad_norm": 1.7223281860351562, "learning_rate": 4.977525123619031e-05, "loss": 1.4209, "step": 2855 }, { "epoch": 0.86, "grad_norm": 0.8585662245750427, "learning_rate": 4.9774464522584516e-05, "loss": 1.2834, "step": 2860 }, { "epoch": 0.86, "grad_norm": 0.903371274471283, "learning_rate": 4.9773676440710804e-05, "loss": 1.514, "step": 2865 }, { "epoch": 0.86, "grad_norm": 0.6225999593734741, "learning_rate": 4.977288699061269e-05, "loss": 1.4325, "step": 2870 }, { "epoch": 0.86, "grad_norm": 0.8646724224090576, "learning_rate": 4.977209617233378e-05, "loss": 1.2028, "step": 2875 }, { "epoch": 0.86, "grad_norm": 0.7862356305122375, "learning_rate": 4.9771303985917745e-05, "loss": 1.4069, "step": 2880 }, { "epoch": 0.86, "grad_norm": 0.635420024394989, "learning_rate": 4.977051043140834e-05, "loss": 1.4109, "step": 2885 }, { "epoch": 0.86, "grad_norm": 0.7677288055419922, "learning_rate": 4.976971550884939e-05, "loss": 1.1608, "step": 2890 }, { "epoch": 0.87, "grad_norm": 0.845956027507782, "learning_rate": 4.9768919218284804e-05, "loss": 1.4072, "step": 2895 }, { "epoch": 0.87, "grad_norm": 0.9149651527404785, "learning_rate": 4.976812155975855e-05, "loss": 1.3957, "step": 2900 }, { "epoch": 0.87, "grad_norm": 0.9289765954017639, "learning_rate": 4.9767322533314685e-05, "loss": 1.4147, "step": 2905 }, { "epoch": 0.87, "grad_norm": 1.1953096389770508, "learning_rate": 4.9766522138997347e-05, "loss": 1.4155, "step": 2910 }, { "epoch": 0.87, "grad_norm": 0.9393072128295898, "learning_rate": 4.976572037685073e-05, "loss": 1.4166, "step": 2915 }, { "epoch": 0.87, "grad_norm": 1.012731909751892, "learning_rate": 4.9764917246919125e-05, "loss": 1.4684, "step": 2920 }, { "epoch": 0.88, "grad_norm": 0.9099532961845398, "learning_rate": 4.9764112749246876e-05, "loss": 1.4124, "step": 2925 }, { "epoch": 0.88, "grad_norm": 0.69303959608078, "learning_rate": 4.976330688387842e-05, "loss": 1.4344, "step": 2930 }, { "epoch": 0.88, "grad_norm": 0.6986569762229919, "learning_rate": 4.9762499650858274e-05, "loss": 1.3507, "step": 2935 }, { "epoch": 0.88, "grad_norm": 0.5651214718818665, "learning_rate": 4.976169105023101e-05, "loss": 1.2479, "step": 2940 }, { "epoch": 0.88, "grad_norm": 0.7775252461433411, "learning_rate": 4.9760881082041275e-05, "loss": 1.3942, "step": 2945 }, { "epoch": 0.88, "grad_norm": 1.379727840423584, "learning_rate": 4.976006974633383e-05, "loss": 1.4428, "step": 2950 }, { "epoch": 0.88, "grad_norm": 0.850517749786377, "learning_rate": 4.9759257043153454e-05, "loss": 1.4216, "step": 2955 }, { "epoch": 0.89, "grad_norm": 0.9250400066375732, "learning_rate": 4.975844297254506e-05, "loss": 1.4909, "step": 2960 }, { "epoch": 0.89, "grad_norm": 1.5304796695709229, "learning_rate": 4.975762753455359e-05, "loss": 1.4814, "step": 2965 }, { "epoch": 0.89, "grad_norm": 1.1414655447006226, "learning_rate": 4.975681072922409e-05, "loss": 1.408, "step": 2970 }, { "epoch": 0.89, "grad_norm": 0.5515819191932678, "learning_rate": 4.975599255660166e-05, "loss": 1.3775, "step": 2975 }, { "epoch": 0.89, "grad_norm": 0.8293300867080688, "learning_rate": 4.97551730167315e-05, "loss": 1.3132, "step": 2980 }, { "epoch": 0.89, "grad_norm": 1.0170328617095947, "learning_rate": 4.9754352109658865e-05, "loss": 1.4993, "step": 2985 }, { "epoch": 0.89, "grad_norm": 0.937022864818573, "learning_rate": 4.9753529835429094e-05, "loss": 1.3501, "step": 2990 }, { "epoch": 0.9, "grad_norm": 0.9053025245666504, "learning_rate": 4.97527061940876e-05, "loss": 1.4049, "step": 2995 }, { "epoch": 0.9, "grad_norm": 0.49352753162384033, "learning_rate": 4.975188118567986e-05, "loss": 1.4339, "step": 3000 }, { "epoch": 0.9, "grad_norm": 1.048647165298462, "learning_rate": 4.975105481025146e-05, "loss": 1.3139, "step": 3005 }, { "epoch": 0.9, "grad_norm": 0.6800316572189331, "learning_rate": 4.9750227067848034e-05, "loss": 1.2836, "step": 3010 }, { "epoch": 0.9, "grad_norm": 1.1411221027374268, "learning_rate": 4.974939795851529e-05, "loss": 1.4515, "step": 3015 }, { "epoch": 0.9, "grad_norm": 0.7004803419113159, "learning_rate": 4.9748567482299025e-05, "loss": 1.551, "step": 3020 }, { "epoch": 0.91, "grad_norm": 1.4398937225341797, "learning_rate": 4.97477356392451e-05, "loss": 1.4192, "step": 3025 }, { "epoch": 0.91, "grad_norm": 0.5903295278549194, "learning_rate": 4.974690242939946e-05, "loss": 1.5211, "step": 3030 }, { "epoch": 0.91, "grad_norm": 0.6511791348457336, "learning_rate": 4.974606785280812e-05, "loss": 1.5211, "step": 3035 }, { "epoch": 0.91, "grad_norm": 1.2127279043197632, "learning_rate": 4.9745231909517176e-05, "loss": 1.3477, "step": 3040 }, { "epoch": 0.91, "grad_norm": 2.202561855316162, "learning_rate": 4.9744394599572795e-05, "loss": 1.5234, "step": 3045 }, { "epoch": 0.91, "grad_norm": 0.7070451378822327, "learning_rate": 4.974355592302122e-05, "loss": 1.4987, "step": 3050 }, { "epoch": 0.91, "grad_norm": 0.9108610153198242, "learning_rate": 4.974271587990877e-05, "loss": 1.2861, "step": 3055 }, { "epoch": 0.92, "grad_norm": 0.9873246550559998, "learning_rate": 4.974187447028184e-05, "loss": 1.4222, "step": 3060 }, { "epoch": 0.92, "grad_norm": 1.2977701425552368, "learning_rate": 4.9741031694186904e-05, "loss": 1.4062, "step": 3065 }, { "epoch": 0.92, "grad_norm": 1.025950312614441, "learning_rate": 4.9740187551670505e-05, "loss": 1.5189, "step": 3070 }, { "epoch": 0.92, "grad_norm": 1.1865170001983643, "learning_rate": 4.973934204277926e-05, "loss": 1.3828, "step": 3075 }, { "epoch": 0.92, "grad_norm": 0.818302571773529, "learning_rate": 4.973849516755987e-05, "loss": 1.467, "step": 3080 }, { "epoch": 0.92, "grad_norm": 0.8224700689315796, "learning_rate": 4.9737646926059104e-05, "loss": 1.7332, "step": 3085 }, { "epoch": 0.92, "grad_norm": 1.3279811143875122, "learning_rate": 4.973679731832381e-05, "loss": 1.5156, "step": 3090 }, { "epoch": 0.93, "grad_norm": 0.5964494347572327, "learning_rate": 4.973594634440092e-05, "loss": 1.3524, "step": 3095 }, { "epoch": 0.93, "grad_norm": 1.196366786956787, "learning_rate": 4.9735094004337427e-05, "loss": 1.5278, "step": 3100 }, { "epoch": 0.93, "grad_norm": 0.7338801622390747, "learning_rate": 4.97342402981804e-05, "loss": 1.4148, "step": 3105 }, { "epoch": 0.93, "grad_norm": 0.7936698794364929, "learning_rate": 4.973338522597698e-05, "loss": 1.4142, "step": 3110 }, { "epoch": 0.93, "grad_norm": 0.5636515021324158, "learning_rate": 4.9732528787774416e-05, "loss": 1.5779, "step": 3115 }, { "epoch": 0.93, "grad_norm": 0.5962295532226562, "learning_rate": 4.973167098361999e-05, "loss": 1.6513, "step": 3120 }, { "epoch": 0.93, "grad_norm": 0.47786709666252136, "learning_rate": 4.9730811813561083e-05, "loss": 1.3843, "step": 3125 }, { "epoch": 0.94, "grad_norm": 0.7690017223358154, "learning_rate": 4.972995127764515e-05, "loss": 1.2518, "step": 3130 }, { "epoch": 0.94, "grad_norm": 0.7740575671195984, "learning_rate": 4.97290893759197e-05, "loss": 1.487, "step": 3135 }, { "epoch": 0.94, "grad_norm": 0.7592212557792664, "learning_rate": 4.972822610843236e-05, "loss": 1.4385, "step": 3140 }, { "epoch": 0.94, "grad_norm": 0.5696493983268738, "learning_rate": 4.972736147523079e-05, "loss": 1.4802, "step": 3145 }, { "epoch": 0.94, "grad_norm": 0.5601330995559692, "learning_rate": 4.9726495476362756e-05, "loss": 1.4203, "step": 3150 }, { "epoch": 0.94, "grad_norm": 1.0239603519439697, "learning_rate": 4.972562811187608e-05, "loss": 1.4046, "step": 3155 }, { "epoch": 0.95, "grad_norm": 0.7492188811302185, "learning_rate": 4.972475938181865e-05, "loss": 1.3938, "step": 3160 }, { "epoch": 0.95, "grad_norm": 1.0243171453475952, "learning_rate": 4.972388928623847e-05, "loss": 1.2638, "step": 3165 }, { "epoch": 0.95, "grad_norm": 1.4914931058883667, "learning_rate": 4.972301782518358e-05, "loss": 1.4414, "step": 3170 }, { "epoch": 0.95, "grad_norm": 1.6274807453155518, "learning_rate": 4.972214499870212e-05, "loss": 1.4207, "step": 3175 }, { "epoch": 0.95, "grad_norm": 0.9303854703903198, "learning_rate": 4.9721270806842277e-05, "loss": 1.4353, "step": 3180 }, { "epoch": 0.95, "grad_norm": 0.8997390866279602, "learning_rate": 4.9720395249652355e-05, "loss": 1.3493, "step": 3185 }, { "epoch": 0.95, "grad_norm": 1.1290831565856934, "learning_rate": 4.97195183271807e-05, "loss": 1.4754, "step": 3190 }, { "epoch": 0.96, "grad_norm": 0.7762711644172668, "learning_rate": 4.971864003947573e-05, "loss": 1.45, "step": 3195 }, { "epoch": 0.96, "grad_norm": 0.6150183081626892, "learning_rate": 4.971776038658598e-05, "loss": 1.2738, "step": 3200 }, { "epoch": 0.96, "grad_norm": 0.6307562589645386, "learning_rate": 4.971687936856e-05, "loss": 1.4348, "step": 3205 }, { "epoch": 0.96, "grad_norm": 0.7655115127563477, "learning_rate": 4.971599698544648e-05, "loss": 1.2861, "step": 3210 }, { "epoch": 0.96, "grad_norm": 0.8392760157585144, "learning_rate": 4.971511323729412e-05, "loss": 1.4351, "step": 3215 }, { "epoch": 0.96, "grad_norm": 1.204959511756897, "learning_rate": 4.9714228124151756e-05, "loss": 1.4165, "step": 3220 }, { "epoch": 0.96, "grad_norm": 0.6539895534515381, "learning_rate": 4.9713341646068264e-05, "loss": 1.3264, "step": 3225 }, { "epoch": 0.97, "grad_norm": 0.6086089611053467, "learning_rate": 4.97124538030926e-05, "loss": 1.3271, "step": 3230 }, { "epoch": 0.97, "grad_norm": 0.9812304377555847, "learning_rate": 4.97115645952738e-05, "loss": 1.4253, "step": 3235 }, { "epoch": 0.97, "grad_norm": 0.5627865791320801, "learning_rate": 4.9710674022660964e-05, "loss": 1.4749, "step": 3240 }, { "epoch": 0.97, "grad_norm": 1.062355399131775, "learning_rate": 4.970978208530329e-05, "loss": 1.3088, "step": 3245 }, { "epoch": 0.97, "grad_norm": 0.5620614290237427, "learning_rate": 4.9708888783250047e-05, "loss": 1.3848, "step": 3250 }, { "epoch": 0.97, "grad_norm": 1.4596127271652222, "learning_rate": 4.970799411655055e-05, "loss": 1.5558, "step": 3255 }, { "epoch": 0.98, "grad_norm": 0.5726821422576904, "learning_rate": 4.9707098085254224e-05, "loss": 1.4792, "step": 3260 }, { "epoch": 0.98, "grad_norm": 1.5486913919448853, "learning_rate": 4.970620068941055e-05, "loss": 1.3452, "step": 3265 }, { "epoch": 0.98, "grad_norm": 0.5982449650764465, "learning_rate": 4.9705301929069094e-05, "loss": 1.3137, "step": 3270 }, { "epoch": 0.98, "grad_norm": 1.129350185394287, "learning_rate": 4.9704401804279495e-05, "loss": 1.3673, "step": 3275 }, { "epoch": 0.98, "grad_norm": 0.7439037561416626, "learning_rate": 4.9703500315091455e-05, "loss": 1.3652, "step": 3280 }, { "epoch": 0.98, "grad_norm": 0.6493015885353088, "learning_rate": 4.970259746155478e-05, "loss": 1.3959, "step": 3285 }, { "epoch": 0.98, "grad_norm": 1.614264726638794, "learning_rate": 4.9701693243719324e-05, "loss": 1.5581, "step": 3290 }, { "epoch": 0.99, "grad_norm": 0.9825220108032227, "learning_rate": 4.970078766163502e-05, "loss": 1.3638, "step": 3295 }, { "epoch": 0.99, "grad_norm": 0.509028971195221, "learning_rate": 4.9699880715351884e-05, "loss": 1.4559, "step": 3300 }, { "epoch": 0.99, "grad_norm": 0.7303828001022339, "learning_rate": 4.969897240492002e-05, "loss": 1.2282, "step": 3305 }, { "epoch": 0.99, "grad_norm": 0.7061027884483337, "learning_rate": 4.9698062730389586e-05, "loss": 1.4301, "step": 3310 }, { "epoch": 0.99, "grad_norm": 0.5220503211021423, "learning_rate": 4.9697151691810814e-05, "loss": 1.5012, "step": 3315 }, { "epoch": 0.99, "grad_norm": 1.2105666399002075, "learning_rate": 4.969623928923402e-05, "loss": 1.5438, "step": 3320 }, { "epoch": 0.99, "grad_norm": 0.6172266006469727, "learning_rate": 4.969532552270961e-05, "loss": 1.3556, "step": 3325 }, { "epoch": 1.0, "grad_norm": 1.089318037033081, "learning_rate": 4.969441039228803e-05, "loss": 1.3997, "step": 3330 }, { "epoch": 1.0, "grad_norm": 0.8561093211174011, "learning_rate": 4.969349389801984e-05, "loss": 1.3716, "step": 3335 }, { "epoch": 1.0, "grad_norm": 0.4685690701007843, "learning_rate": 4.969257603995565e-05, "loss": 1.4295, "step": 3340 }, { "epoch": 1.0, "grad_norm": 1.1991277933120728, "learning_rate": 4.9691656818146146e-05, "loss": 1.3521, "step": 3345 }, { "epoch": 1.0, "grad_norm": 1.0424867868423462, "learning_rate": 4.969073623264211e-05, "loss": 1.4858, "step": 3350 }, { "epoch": 1.0, "grad_norm": 1.1299071311950684, "learning_rate": 4.968981428349438e-05, "loss": 1.3135, "step": 3355 }, { "epoch": 1.01, "grad_norm": 0.6989724636077881, "learning_rate": 4.9688890970753856e-05, "loss": 1.4063, "step": 3360 }, { "epoch": 1.01, "grad_norm": 0.4703225791454315, "learning_rate": 4.968796629447155e-05, "loss": 1.3124, "step": 3365 }, { "epoch": 1.01, "grad_norm": 1.0257688760757446, "learning_rate": 4.968704025469853e-05, "loss": 1.3443, "step": 3370 }, { "epoch": 1.01, "grad_norm": 0.8301354050636292, "learning_rate": 4.968611285148594e-05, "loss": 1.2692, "step": 3375 }, { "epoch": 1.01, "grad_norm": 1.3049982786178589, "learning_rate": 4.968518408488499e-05, "loss": 1.4013, "step": 3380 }, { "epoch": 1.01, "grad_norm": 1.38068425655365, "learning_rate": 4.968425395494699e-05, "loss": 1.3014, "step": 3385 }, { "epoch": 1.01, "grad_norm": 0.8539844751358032, "learning_rate": 4.96833224617233e-05, "loss": 1.3768, "step": 3390 }, { "epoch": 1.02, "grad_norm": 1.6281200647354126, "learning_rate": 4.968238960526537e-05, "loss": 1.3423, "step": 3395 }, { "epoch": 1.02, "grad_norm": 1.638692855834961, "learning_rate": 4.968145538562471e-05, "loss": 1.4069, "step": 3400 }, { "epoch": 1.02, "grad_norm": 1.0412074327468872, "learning_rate": 4.968051980285293e-05, "loss": 1.3327, "step": 3405 }, { "epoch": 1.02, "grad_norm": 1.5191795825958252, "learning_rate": 4.967958285700169e-05, "loss": 1.4662, "step": 3410 }, { "epoch": 1.02, "grad_norm": 1.035595417022705, "learning_rate": 4.967864454812274e-05, "loss": 1.4095, "step": 3415 }, { "epoch": 1.02, "grad_norm": 1.3538129329681396, "learning_rate": 4.967770487626791e-05, "loss": 1.4562, "step": 3420 }, { "epoch": 1.02, "grad_norm": 0.785943865776062, "learning_rate": 4.9676763841489093e-05, "loss": 1.2219, "step": 3425 }, { "epoch": 1.03, "grad_norm": 0.9118176698684692, "learning_rate": 4.967582144383826e-05, "loss": 1.3501, "step": 3430 }, { "epoch": 1.03, "grad_norm": 0.7986450791358948, "learning_rate": 4.967487768336745e-05, "loss": 1.5203, "step": 3435 }, { "epoch": 1.03, "grad_norm": 0.6016656160354614, "learning_rate": 4.967393256012879e-05, "loss": 1.3272, "step": 3440 }, { "epoch": 1.03, "grad_norm": 1.1561404466629028, "learning_rate": 4.967298607417449e-05, "loss": 1.3276, "step": 3445 }, { "epoch": 1.03, "grad_norm": 1.0883400440216064, "learning_rate": 4.9672038225556816e-05, "loss": 1.4162, "step": 3450 }, { "epoch": 1.03, "grad_norm": 1.2652868032455444, "learning_rate": 4.967108901432811e-05, "loss": 1.4396, "step": 3455 }, { "epoch": 1.04, "grad_norm": 1.0570058822631836, "learning_rate": 4.967013844054081e-05, "loss": 1.2359, "step": 3460 }, { "epoch": 1.04, "grad_norm": 0.9641575217247009, "learning_rate": 4.96691865042474e-05, "loss": 1.3216, "step": 3465 }, { "epoch": 1.04, "grad_norm": 0.511062502861023, "learning_rate": 4.966823320550047e-05, "loss": 1.4389, "step": 3470 }, { "epoch": 1.04, "grad_norm": 0.7163792252540588, "learning_rate": 4.9667278544352653e-05, "loss": 1.4032, "step": 3475 }, { "epoch": 1.04, "grad_norm": 1.194694995880127, "learning_rate": 4.966632252085668e-05, "loss": 1.3817, "step": 3480 }, { "epoch": 1.04, "grad_norm": 1.0525634288787842, "learning_rate": 4.9665365135065365e-05, "loss": 1.3344, "step": 3485 }, { "epoch": 1.04, "grad_norm": 3.383202075958252, "learning_rate": 4.966440638703156e-05, "loss": 1.3635, "step": 3490 }, { "epoch": 1.05, "grad_norm": 0.7507001757621765, "learning_rate": 4.9663446276808235e-05, "loss": 1.1945, "step": 3495 }, { "epoch": 1.05, "grad_norm": 1.1535072326660156, "learning_rate": 4.9662484804448404e-05, "loss": 1.5449, "step": 3500 }, { "epoch": 1.05, "grad_norm": 1.666061282157898, "learning_rate": 4.966152197000517e-05, "loss": 1.4527, "step": 3505 }, { "epoch": 1.05, "grad_norm": 0.6894571781158447, "learning_rate": 4.9660557773531723e-05, "loss": 1.4377, "step": 3510 }, { "epoch": 1.05, "grad_norm": 0.7343246936798096, "learning_rate": 4.9659592215081296e-05, "loss": 1.6301, "step": 3515 }, { "epoch": 1.05, "grad_norm": 1.4154441356658936, "learning_rate": 4.9658625294707226e-05, "loss": 1.4235, "step": 3520 }, { "epoch": 1.05, "grad_norm": 1.732731580734253, "learning_rate": 4.9657657012462904e-05, "loss": 1.491, "step": 3525 }, { "epoch": 1.06, "grad_norm": 1.0298057794570923, "learning_rate": 4.965668736840182e-05, "loss": 1.4066, "step": 3530 }, { "epoch": 1.06, "grad_norm": 0.9874246716499329, "learning_rate": 4.9655716362577525e-05, "loss": 1.4336, "step": 3535 }, { "epoch": 1.06, "grad_norm": 0.7678321003913879, "learning_rate": 4.965474399504364e-05, "loss": 1.4643, "step": 3540 }, { "epoch": 1.06, "grad_norm": 0.8920181393623352, "learning_rate": 4.9653770265853874e-05, "loss": 1.514, "step": 3545 }, { "epoch": 1.06, "grad_norm": 0.7572281956672668, "learning_rate": 4.9652795175062005e-05, "loss": 1.3438, "step": 3550 }, { "epoch": 1.06, "grad_norm": 0.7819041609764099, "learning_rate": 4.965181872272188e-05, "loss": 1.3153, "step": 3555 }, { "epoch": 1.07, "grad_norm": 0.8489050269126892, "learning_rate": 4.965084090888743e-05, "loss": 1.3211, "step": 3560 }, { "epoch": 1.07, "grad_norm": 2.9903485774993896, "learning_rate": 4.9649861733612654e-05, "loss": 1.3094, "step": 3565 }, { "epoch": 1.07, "grad_norm": 0.8128799796104431, "learning_rate": 4.9648881196951647e-05, "loss": 1.3364, "step": 3570 }, { "epoch": 1.07, "grad_norm": 1.2447357177734375, "learning_rate": 4.964789929895855e-05, "loss": 1.5058, "step": 3575 }, { "epoch": 1.07, "grad_norm": 1.014669418334961, "learning_rate": 4.96469160396876e-05, "loss": 1.2516, "step": 3580 }, { "epoch": 1.07, "grad_norm": 0.8545610308647156, "learning_rate": 4.964593141919308e-05, "loss": 1.3384, "step": 3585 }, { "epoch": 1.07, "grad_norm": 1.7975364923477173, "learning_rate": 4.96449454375294e-05, "loss": 1.4322, "step": 3590 }, { "epoch": 1.08, "grad_norm": 0.7121825218200684, "learning_rate": 4.9643958094750996e-05, "loss": 1.3345, "step": 3595 }, { "epoch": 1.08, "grad_norm": 0.9037702083587646, "learning_rate": 4.9642969390912394e-05, "loss": 1.4319, "step": 3600 }, { "epoch": 1.08, "grad_norm": 1.2663486003875732, "learning_rate": 4.964197932606822e-05, "loss": 1.6255, "step": 3605 }, { "epoch": 1.08, "grad_norm": 0.8707221150398254, "learning_rate": 4.964098790027314e-05, "loss": 1.4954, "step": 3610 }, { "epoch": 1.08, "grad_norm": 1.0182719230651855, "learning_rate": 4.963999511358191e-05, "loss": 1.2573, "step": 3615 }, { "epoch": 1.08, "grad_norm": 0.5255160927772522, "learning_rate": 4.963900096604936e-05, "loss": 1.2647, "step": 3620 }, { "epoch": 1.08, "grad_norm": 1.4247102737426758, "learning_rate": 4.96380054577304e-05, "loss": 1.3231, "step": 3625 }, { "epoch": 1.09, "grad_norm": 0.5628049969673157, "learning_rate": 4.963700858868e-05, "loss": 1.2791, "step": 3630 }, { "epoch": 1.09, "grad_norm": 0.755516529083252, "learning_rate": 4.9636010358953235e-05, "loss": 1.4192, "step": 3635 }, { "epoch": 1.09, "grad_norm": 0.752839982509613, "learning_rate": 4.963501076860522e-05, "loss": 1.3058, "step": 3640 }, { "epoch": 1.09, "grad_norm": 1.969702959060669, "learning_rate": 4.963400981769117e-05, "loss": 1.2481, "step": 3645 }, { "epoch": 1.09, "grad_norm": 0.9159872531890869, "learning_rate": 4.963300750626636e-05, "loss": 1.4535, "step": 3650 }, { "epoch": 1.09, "grad_norm": 0.8840392827987671, "learning_rate": 4.963200383438615e-05, "loss": 1.4412, "step": 3655 }, { "epoch": 1.1, "grad_norm": 1.0793917179107666, "learning_rate": 4.9630998802105975e-05, "loss": 1.3668, "step": 3660 }, { "epoch": 1.1, "grad_norm": 1.078147053718567, "learning_rate": 4.962999240948134e-05, "loss": 1.3659, "step": 3665 }, { "epoch": 1.1, "grad_norm": 0.8372591137886047, "learning_rate": 4.962898465656782e-05, "loss": 1.3811, "step": 3670 }, { "epoch": 1.1, "grad_norm": 1.0935574769973755, "learning_rate": 4.962797554342108e-05, "loss": 1.4259, "step": 3675 }, { "epoch": 1.1, "grad_norm": 1.2438262701034546, "learning_rate": 4.962696507009685e-05, "loss": 1.4393, "step": 3680 }, { "epoch": 1.1, "grad_norm": 0.8623482584953308, "learning_rate": 4.962595323665094e-05, "loss": 1.5026, "step": 3685 }, { "epoch": 1.1, "grad_norm": 4.854569911956787, "learning_rate": 4.962494004313923e-05, "loss": 1.4825, "step": 3690 }, { "epoch": 1.11, "grad_norm": 1.069012999534607, "learning_rate": 4.962392548961767e-05, "loss": 1.428, "step": 3695 }, { "epoch": 1.11, "grad_norm": 1.0599278211593628, "learning_rate": 4.9622909576142304e-05, "loss": 1.5312, "step": 3700 }, { "epoch": 1.11, "grad_norm": 1.7758433818817139, "learning_rate": 4.962189230276925e-05, "loss": 1.3304, "step": 3705 }, { "epoch": 1.11, "grad_norm": 1.406180500984192, "learning_rate": 4.962087366955466e-05, "loss": 1.4616, "step": 3710 }, { "epoch": 1.11, "grad_norm": 0.6990206837654114, "learning_rate": 4.9619853676554814e-05, "loss": 1.3334, "step": 3715 }, { "epoch": 1.11, "grad_norm": 0.6970654726028442, "learning_rate": 4.9618832323826036e-05, "loss": 1.3963, "step": 3720 }, { "epoch": 1.11, "grad_norm": 0.8413794040679932, "learning_rate": 4.9617809611424745e-05, "loss": 1.3082, "step": 3725 }, { "epoch": 1.12, "grad_norm": 0.9142487645149231, "learning_rate": 4.9616785539407414e-05, "loss": 1.3984, "step": 3730 }, { "epoch": 1.12, "grad_norm": 0.7050864696502686, "learning_rate": 4.961576010783061e-05, "loss": 1.3641, "step": 3735 }, { "epoch": 1.12, "grad_norm": 0.7158265709877014, "learning_rate": 4.961473331675096e-05, "loss": 1.3355, "step": 3740 }, { "epoch": 1.12, "grad_norm": 1.8010573387145996, "learning_rate": 4.961370516622518e-05, "loss": 1.3238, "step": 3745 }, { "epoch": 1.12, "grad_norm": 0.9509638547897339, "learning_rate": 4.961267565631004e-05, "loss": 1.4979, "step": 3750 }, { "epoch": 1.12, "grad_norm": 0.8768844604492188, "learning_rate": 4.96116447870624e-05, "loss": 1.3485, "step": 3755 }, { "epoch": 1.12, "grad_norm": 1.029345989227295, "learning_rate": 4.961061255853921e-05, "loss": 1.4198, "step": 3760 }, { "epoch": 1.13, "grad_norm": 1.0226683616638184, "learning_rate": 4.960957897079748e-05, "loss": 1.501, "step": 3765 }, { "epoch": 1.13, "grad_norm": 1.1053155660629272, "learning_rate": 4.9608544023894276e-05, "loss": 1.5134, "step": 3770 }, { "epoch": 1.13, "grad_norm": 0.7653586864471436, "learning_rate": 4.960750771788676e-05, "loss": 1.3514, "step": 3775 }, { "epoch": 1.13, "grad_norm": 1.1900826692581177, "learning_rate": 4.9606470052832174e-05, "loss": 1.3914, "step": 3780 }, { "epoch": 1.13, "grad_norm": 0.7287219762802124, "learning_rate": 4.960543102878782e-05, "loss": 1.2752, "step": 3785 }, { "epoch": 1.13, "grad_norm": 0.8212308287620544, "learning_rate": 4.96043906458111e-05, "loss": 1.3529, "step": 3790 }, { "epoch": 1.14, "grad_norm": 1.0006697177886963, "learning_rate": 4.960334890395944e-05, "loss": 1.2957, "step": 3795 }, { "epoch": 1.14, "grad_norm": 0.5815529823303223, "learning_rate": 4.96023058032904e-05, "loss": 1.6203, "step": 3800 }, { "epoch": 1.14, "grad_norm": 1.1902085542678833, "learning_rate": 4.9601261343861586e-05, "loss": 1.5133, "step": 3805 }, { "epoch": 1.14, "grad_norm": 0.7083724141120911, "learning_rate": 4.960021552573068e-05, "loss": 1.415, "step": 3810 }, { "epoch": 1.14, "grad_norm": 1.382954716682434, "learning_rate": 4.959916834895544e-05, "loss": 1.4189, "step": 3815 }, { "epoch": 1.14, "grad_norm": 1.2294280529022217, "learning_rate": 4.95981198135937e-05, "loss": 1.5006, "step": 3820 }, { "epoch": 1.14, "grad_norm": 0.6995631456375122, "learning_rate": 4.9597069919703375e-05, "loss": 1.5182, "step": 3825 }, { "epoch": 1.15, "grad_norm": 0.8354824185371399, "learning_rate": 4.959601866734245e-05, "loss": 1.4081, "step": 3830 }, { "epoch": 1.15, "grad_norm": 2.210512399673462, "learning_rate": 4.959496605656897e-05, "loss": 1.428, "step": 3835 }, { "epoch": 1.15, "grad_norm": 0.7832702994346619, "learning_rate": 4.959391208744108e-05, "loss": 1.2983, "step": 3840 }, { "epoch": 1.15, "grad_norm": 0.9913911819458008, "learning_rate": 4.959285676001699e-05, "loss": 1.4373, "step": 3845 }, { "epoch": 1.15, "grad_norm": 0.5717589855194092, "learning_rate": 4.9591800074354987e-05, "loss": 1.446, "step": 3850 }, { "epoch": 1.15, "grad_norm": 0.780748188495636, "learning_rate": 4.959074203051343e-05, "loss": 1.2086, "step": 3855 }, { "epoch": 1.15, "grad_norm": 1.7067710161209106, "learning_rate": 4.958968262855075e-05, "loss": 1.4519, "step": 3860 }, { "epoch": 1.16, "grad_norm": 1.3409600257873535, "learning_rate": 4.958862186852545e-05, "loss": 1.3027, "step": 3865 }, { "epoch": 1.16, "grad_norm": 1.7507779598236084, "learning_rate": 4.9587559750496135e-05, "loss": 1.415, "step": 3870 }, { "epoch": 1.16, "grad_norm": 0.7299233078956604, "learning_rate": 4.9586496274521446e-05, "loss": 1.3096, "step": 3875 }, { "epoch": 1.16, "grad_norm": 0.76593017578125, "learning_rate": 4.958543144066012e-05, "loss": 1.4701, "step": 3880 }, { "epoch": 1.16, "grad_norm": 1.9224169254302979, "learning_rate": 4.958436524897098e-05, "loss": 1.4677, "step": 3885 }, { "epoch": 1.16, "grad_norm": 0.777151882648468, "learning_rate": 4.95832976995129e-05, "loss": 1.4735, "step": 3890 }, { "epoch": 1.17, "grad_norm": 1.2696973085403442, "learning_rate": 4.958222879234483e-05, "loss": 1.4598, "step": 3895 }, { "epoch": 1.17, "grad_norm": 1.122017741203308, "learning_rate": 4.958115852752583e-05, "loss": 1.2936, "step": 3900 }, { "epoch": 1.17, "grad_norm": 1.7346524000167847, "learning_rate": 4.9580086905114984e-05, "loss": 1.4084, "step": 3905 }, { "epoch": 1.17, "grad_norm": 1.49281907081604, "learning_rate": 4.957901392517149e-05, "loss": 1.2421, "step": 3910 }, { "epoch": 1.17, "grad_norm": 0.9897387027740479, "learning_rate": 4.957793958775461e-05, "loss": 1.4164, "step": 3915 }, { "epoch": 1.17, "grad_norm": 1.001846432685852, "learning_rate": 4.9576863892923676e-05, "loss": 1.1946, "step": 3920 }, { "epoch": 1.17, "grad_norm": 0.9374973773956299, "learning_rate": 4.9575786840738085e-05, "loss": 1.4949, "step": 3925 }, { "epoch": 1.18, "grad_norm": 1.216041088104248, "learning_rate": 4.957470843125734e-05, "loss": 1.4853, "step": 3930 }, { "epoch": 1.18, "grad_norm": 1.8419601917266846, "learning_rate": 4.9573628664540985e-05, "loss": 1.2595, "step": 3935 }, { "epoch": 1.18, "grad_norm": 3.0801103115081787, "learning_rate": 4.957254754064867e-05, "loss": 1.2968, "step": 3940 }, { "epoch": 1.18, "grad_norm": 1.5771223306655884, "learning_rate": 4.9571465059640094e-05, "loss": 1.3501, "step": 3945 }, { "epoch": 1.18, "grad_norm": 1.0766230821609497, "learning_rate": 4.957038122157504e-05, "loss": 1.4484, "step": 3950 }, { "epoch": 1.18, "grad_norm": 1.5095306634902954, "learning_rate": 4.9569296026513374e-05, "loss": 1.3842, "step": 3955 }, { "epoch": 1.18, "grad_norm": 1.0730971097946167, "learning_rate": 4.956820947451503e-05, "loss": 1.3855, "step": 3960 }, { "epoch": 1.19, "grad_norm": 1.0428813695907593, "learning_rate": 4.956712156564001e-05, "loss": 1.4501, "step": 3965 }, { "epoch": 1.19, "grad_norm": 1.020739197731018, "learning_rate": 4.9566032299948394e-05, "loss": 1.2739, "step": 3970 }, { "epoch": 1.19, "grad_norm": 0.895963191986084, "learning_rate": 4.956494167750036e-05, "loss": 1.2692, "step": 3975 }, { "epoch": 1.19, "grad_norm": 0.5474923849105835, "learning_rate": 4.956384969835612e-05, "loss": 1.348, "step": 3980 }, { "epoch": 1.19, "grad_norm": 0.9486867785453796, "learning_rate": 4.956275636257601e-05, "loss": 1.3352, "step": 3985 }, { "epoch": 1.19, "grad_norm": 0.9013283848762512, "learning_rate": 4.9561661670220386e-05, "loss": 1.4037, "step": 3990 }, { "epoch": 1.2, "grad_norm": 0.8151796460151672, "learning_rate": 4.956056562134972e-05, "loss": 1.383, "step": 3995 }, { "epoch": 1.2, "grad_norm": 1.0700517892837524, "learning_rate": 4.955946821602455e-05, "loss": 1.4735, "step": 4000 }, { "epoch": 1.2, "grad_norm": 0.7493061423301697, "learning_rate": 4.9558369454305476e-05, "loss": 1.4365, "step": 4005 }, { "epoch": 1.2, "grad_norm": 0.9456654191017151, "learning_rate": 4.955726933625318e-05, "loss": 1.2838, "step": 4010 }, { "epoch": 1.2, "grad_norm": 1.0961682796478271, "learning_rate": 4.955616786192843e-05, "loss": 1.3222, "step": 4015 }, { "epoch": 1.2, "grad_norm": 1.0026607513427734, "learning_rate": 4.9555065031392044e-05, "loss": 1.3279, "step": 4020 }, { "epoch": 1.2, "grad_norm": 1.1015264987945557, "learning_rate": 4.955396084470495e-05, "loss": 1.2958, "step": 4025 }, { "epoch": 1.21, "grad_norm": 1.213651180267334, "learning_rate": 4.9552855301928114e-05, "loss": 1.3827, "step": 4030 }, { "epoch": 1.21, "grad_norm": 0.8415979743003845, "learning_rate": 4.9551748403122604e-05, "loss": 1.3701, "step": 4035 }, { "epoch": 1.21, "grad_norm": 1.27717924118042, "learning_rate": 4.955064014834955e-05, "loss": 1.2613, "step": 4040 }, { "epoch": 1.21, "grad_norm": 0.7791318297386169, "learning_rate": 4.954953053767016e-05, "loss": 1.2606, "step": 4045 }, { "epoch": 1.21, "grad_norm": 0.8190057277679443, "learning_rate": 4.954841957114572e-05, "loss": 1.3877, "step": 4050 }, { "epoch": 1.21, "grad_norm": 1.4794509410858154, "learning_rate": 4.954730724883757e-05, "loss": 1.3563, "step": 4055 }, { "epoch": 1.21, "grad_norm": 0.701057493686676, "learning_rate": 4.954619357080717e-05, "loss": 1.3641, "step": 4060 }, { "epoch": 1.22, "grad_norm": 0.7502091526985168, "learning_rate": 4.954507853711601e-05, "loss": 1.3876, "step": 4065 }, { "epoch": 1.22, "grad_norm": 1.5233298540115356, "learning_rate": 4.9543962147825675e-05, "loss": 1.3748, "step": 4070 }, { "epoch": 1.22, "grad_norm": 0.7826765775680542, "learning_rate": 4.954284440299782e-05, "loss": 1.3566, "step": 4075 }, { "epoch": 1.22, "grad_norm": 0.7345137596130371, "learning_rate": 4.9541725302694185e-05, "loss": 1.3793, "step": 4080 }, { "epoch": 1.22, "grad_norm": 0.9479917883872986, "learning_rate": 4.954060484697657e-05, "loss": 1.4317, "step": 4085 }, { "epoch": 1.22, "grad_norm": 1.6998075246810913, "learning_rate": 4.9539483035906854e-05, "loss": 1.5301, "step": 4090 }, { "epoch": 1.23, "grad_norm": 0.9664790034294128, "learning_rate": 4.9538359869546996e-05, "loss": 1.4804, "step": 4095 }, { "epoch": 1.23, "grad_norm": 0.9688673615455627, "learning_rate": 4.9537235347959034e-05, "loss": 1.3919, "step": 4100 }, { "epoch": 1.23, "grad_norm": 0.8500693440437317, "learning_rate": 4.953610947120506e-05, "loss": 1.4584, "step": 4105 }, { "epoch": 1.23, "grad_norm": 1.2263480424880981, "learning_rate": 4.953498223934727e-05, "loss": 1.3839, "step": 4110 }, { "epoch": 1.23, "grad_norm": 0.9976312518119812, "learning_rate": 4.953385365244791e-05, "loss": 1.4062, "step": 4115 }, { "epoch": 1.23, "grad_norm": 1.165010690689087, "learning_rate": 4.953272371056933e-05, "loss": 1.3283, "step": 4120 }, { "epoch": 1.23, "grad_norm": NaN, "learning_rate": 4.953181878152334e-05, "loss": 1.3901, "step": 4125 }, { "epoch": 1.24, "grad_norm": 0.8692017793655396, "learning_rate": 4.9530686400839435e-05, "loss": 1.4443, "step": 4130 }, { "epoch": 1.24, "grad_norm": 1.5308072566986084, "learning_rate": 4.952955266535122e-05, "loss": 1.3798, "step": 4135 }, { "epoch": 1.24, "grad_norm": 1.0614334344863892, "learning_rate": 4.952841757512131e-05, "loss": 1.2199, "step": 4140 }, { "epoch": 1.24, "grad_norm": 0.5995301604270935, "learning_rate": 4.952728113021239e-05, "loss": 1.3149, "step": 4145 }, { "epoch": 1.24, "grad_norm": 0.9404197335243225, "learning_rate": 4.9526143330687224e-05, "loss": 1.4628, "step": 4150 }, { "epoch": 1.24, "grad_norm": 1.0483546257019043, "learning_rate": 4.9525004176608655e-05, "loss": 1.4337, "step": 4155 }, { "epoch": 1.24, "grad_norm": 1.1325901746749878, "learning_rate": 4.9523863668039606e-05, "loss": 1.3896, "step": 4160 }, { "epoch": 1.25, "grad_norm": 2.8560702800750732, "learning_rate": 4.952272180504306e-05, "loss": 1.5117, "step": 4165 }, { "epoch": 1.25, "grad_norm": 2.0659074783325195, "learning_rate": 4.9521578587682074e-05, "loss": 1.4522, "step": 4170 }, { "epoch": 1.25, "grad_norm": 0.6918451189994812, "learning_rate": 4.952043401601979e-05, "loss": 1.2501, "step": 4175 }, { "epoch": 1.25, "grad_norm": 1.1893597841262817, "learning_rate": 4.951928809011942e-05, "loss": 1.507, "step": 4180 }, { "epoch": 1.25, "grad_norm": 1.9000897407531738, "learning_rate": 4.951814081004426e-05, "loss": 1.3378, "step": 4185 }, { "epoch": 1.25, "grad_norm": 1.5121021270751953, "learning_rate": 4.9516992175857665e-05, "loss": 1.2653, "step": 4190 }, { "epoch": 1.26, "grad_norm": 0.9484086632728577, "learning_rate": 4.9515842187623076e-05, "loss": 1.304, "step": 4195 }, { "epoch": 1.26, "grad_norm": 0.6229005455970764, "learning_rate": 4.951469084540401e-05, "loss": 1.3531, "step": 4200 }, { "epoch": 1.26, "grad_norm": 0.877293050289154, "learning_rate": 4.951353814926405e-05, "loss": 1.2548, "step": 4205 }, { "epoch": 1.26, "grad_norm": 0.8774591088294983, "learning_rate": 4.9512384099266854e-05, "loss": 1.3658, "step": 4210 }, { "epoch": 1.26, "grad_norm": 1.7639179229736328, "learning_rate": 4.9511228695476165e-05, "loss": 1.4088, "step": 4215 }, { "epoch": 1.26, "grad_norm": 0.9273248314857483, "learning_rate": 4.9510071937955794e-05, "loss": 1.3634, "step": 4220 }, { "epoch": 1.26, "grad_norm": 2.5506839752197266, "learning_rate": 4.950891382676963e-05, "loss": 1.4951, "step": 4225 }, { "epoch": 1.27, "grad_norm": 1.1920723915100098, "learning_rate": 4.9507754361981625e-05, "loss": 1.3341, "step": 4230 }, { "epoch": 1.27, "grad_norm": 0.6930038332939148, "learning_rate": 4.9506593543655824e-05, "loss": 1.4112, "step": 4235 }, { "epoch": 1.27, "grad_norm": 0.8218011260032654, "learning_rate": 4.9505431371856334e-05, "loss": 1.2737, "step": 4240 }, { "epoch": 1.27, "grad_norm": 1.1834139823913574, "learning_rate": 4.950426784664734e-05, "loss": 1.4452, "step": 4245 }, { "epoch": 1.27, "grad_norm": 0.9107833504676819, "learning_rate": 4.950310296809311e-05, "loss": 1.3029, "step": 4250 }, { "epoch": 1.27, "grad_norm": 0.9917646646499634, "learning_rate": 4.950193673625796e-05, "loss": 1.181, "step": 4255 }, { "epoch": 1.27, "grad_norm": 0.8065804243087769, "learning_rate": 4.9500769151206325e-05, "loss": 1.3394, "step": 4260 }, { "epoch": 1.28, "grad_norm": 1.371374249458313, "learning_rate": 4.9499600213002673e-05, "loss": 1.374, "step": 4265 }, { "epoch": 1.28, "grad_norm": 1.1488021612167358, "learning_rate": 4.9498429921711566e-05, "loss": 1.3386, "step": 4270 }, { "epoch": 1.28, "grad_norm": 0.6660547256469727, "learning_rate": 4.9497258277397635e-05, "loss": 1.4149, "step": 4275 }, { "epoch": 1.28, "grad_norm": 1.1452709436416626, "learning_rate": 4.94960852801256e-05, "loss": 1.5156, "step": 4280 }, { "epoch": 1.28, "grad_norm": 0.7714586853981018, "learning_rate": 4.949491092996023e-05, "loss": 1.3724, "step": 4285 }, { "epoch": 1.28, "grad_norm": 1.2154569625854492, "learning_rate": 4.94937352269664e-05, "loss": 1.3114, "step": 4290 }, { "epoch": 1.29, "grad_norm": 1.180679440498352, "learning_rate": 4.949255817120903e-05, "loss": 1.4339, "step": 4295 }, { "epoch": 1.29, "grad_norm": 1.1234863996505737, "learning_rate": 4.949137976275312e-05, "loss": 1.238, "step": 4300 }, { "epoch": 1.29, "grad_norm": 1.8907201290130615, "learning_rate": 4.949020000166378e-05, "loss": 1.1503, "step": 4305 }, { "epoch": 1.29, "grad_norm": 0.9505663514137268, "learning_rate": 4.9489018888006136e-05, "loss": 1.3278, "step": 4310 }, { "epoch": 1.29, "grad_norm": 2.0911295413970947, "learning_rate": 4.948783642184544e-05, "loss": 1.3792, "step": 4315 }, { "epoch": 1.29, "grad_norm": 1.295822262763977, "learning_rate": 4.948665260324699e-05, "loss": 1.4859, "step": 4320 }, { "epoch": 1.29, "grad_norm": 0.6218513250350952, "learning_rate": 4.948546743227617e-05, "loss": 1.2983, "step": 4325 }, { "epoch": 1.3, "grad_norm": 0.901931881904602, "learning_rate": 4.948428090899844e-05, "loss": 1.4346, "step": 4330 }, { "epoch": 1.3, "grad_norm": 2.49190616607666, "learning_rate": 4.9483093033479324e-05, "loss": 1.245, "step": 4335 }, { "epoch": 1.3, "grad_norm": 0.7503476738929749, "learning_rate": 4.948190380578442e-05, "loss": 1.3196, "step": 4340 }, { "epoch": 1.3, "grad_norm": 0.7911471724510193, "learning_rate": 4.948071322597942e-05, "loss": 1.3248, "step": 4345 }, { "epoch": 1.3, "grad_norm": 2.4888834953308105, "learning_rate": 4.947952129413008e-05, "loss": 1.4045, "step": 4350 }, { "epoch": 1.3, "grad_norm": 0.8818658590316772, "learning_rate": 4.9478328010302225e-05, "loss": 1.4385, "step": 4355 }, { "epoch": 1.3, "grad_norm": 1.1271659135818481, "learning_rate": 4.947713337456175e-05, "loss": 1.3797, "step": 4360 }, { "epoch": 1.31, "grad_norm": 4.77896785736084, "learning_rate": 4.9475937386974645e-05, "loss": 1.4799, "step": 4365 }, { "epoch": 1.31, "grad_norm": 0.8201467394828796, "learning_rate": 4.947474004760696e-05, "loss": 1.4012, "step": 4370 }, { "epoch": 1.31, "grad_norm": 1.4401566982269287, "learning_rate": 4.947354135652482e-05, "loss": 1.3238, "step": 4375 }, { "epoch": 1.31, "grad_norm": 0.7641358971595764, "learning_rate": 4.947234131379444e-05, "loss": 1.4674, "step": 4380 }, { "epoch": 1.31, "grad_norm": 1.366176962852478, "learning_rate": 4.947113991948207e-05, "loss": 1.3375, "step": 4385 }, { "epoch": 1.31, "grad_norm": 1.671121597290039, "learning_rate": 4.9469937173654094e-05, "loss": 1.251, "step": 4390 }, { "epoch": 1.31, "grad_norm": 0.6068382859230042, "learning_rate": 4.9468733076376906e-05, "loss": 1.4187, "step": 4395 }, { "epoch": 1.32, "grad_norm": 0.8216054439544678, "learning_rate": 4.9467527627717036e-05, "loss": 1.3438, "step": 4400 }, { "epoch": 1.32, "grad_norm": 0.8097289800643921, "learning_rate": 4.946632082774104e-05, "loss": 1.5979, "step": 4405 }, { "epoch": 1.32, "grad_norm": 1.1140400171279907, "learning_rate": 4.946511267651559e-05, "loss": 1.2796, "step": 4410 }, { "epoch": 1.32, "grad_norm": 0.9381734728813171, "learning_rate": 4.9463903174107386e-05, "loss": 1.3549, "step": 4415 }, { "epoch": 1.32, "grad_norm": 1.5891177654266357, "learning_rate": 4.9462692320583236e-05, "loss": 1.4088, "step": 4420 }, { "epoch": 1.32, "grad_norm": 2.4111642837524414, "learning_rate": 4.946148011601003e-05, "loss": 1.32, "step": 4425 }, { "epoch": 1.33, "grad_norm": 1.5155541896820068, "learning_rate": 4.9460266560454696e-05, "loss": 1.2985, "step": 4430 }, { "epoch": 1.33, "grad_norm": 1.3506906032562256, "learning_rate": 4.9459051653984253e-05, "loss": 1.2462, "step": 4435 }, { "epoch": 1.33, "grad_norm": 2.700868844985962, "learning_rate": 4.945783539666583e-05, "loss": 1.5184, "step": 4440 }, { "epoch": 1.33, "grad_norm": 1.9799214601516724, "learning_rate": 4.9456617788566576e-05, "loss": 1.4099, "step": 4445 }, { "epoch": 1.33, "grad_norm": 1.3236010074615479, "learning_rate": 4.945539882975373e-05, "loss": 1.3994, "step": 4450 }, { "epoch": 1.33, "grad_norm": 3.069741725921631, "learning_rate": 4.9454178520294634e-05, "loss": 1.5627, "step": 4455 }, { "epoch": 1.33, "grad_norm": 3.1382434368133545, "learning_rate": 4.9452956860256685e-05, "loss": 1.415, "step": 4460 }, { "epoch": 1.34, "grad_norm": 0.9453111886978149, "learning_rate": 4.945173384970734e-05, "loss": 1.3312, "step": 4465 }, { "epoch": 1.34, "grad_norm": 1.3048676252365112, "learning_rate": 4.9450509488714146e-05, "loss": 1.3344, "step": 4470 }, { "epoch": 1.34, "grad_norm": 0.9417716860771179, "learning_rate": 4.9449283777344736e-05, "loss": 1.4794, "step": 4475 }, { "epoch": 1.34, "grad_norm": 2.0928821563720703, "learning_rate": 4.944805671566679e-05, "loss": 1.5453, "step": 4480 }, { "epoch": 1.34, "grad_norm": 2.025001049041748, "learning_rate": 4.944682830374809e-05, "loss": 1.3634, "step": 4485 }, { "epoch": 1.34, "grad_norm": 1.4281930923461914, "learning_rate": 4.944559854165647e-05, "loss": 1.2763, "step": 4490 }, { "epoch": 1.34, "grad_norm": 1.3360381126403809, "learning_rate": 4.944436742945985e-05, "loss": 1.4286, "step": 4495 }, { "epoch": 1.35, "grad_norm": 1.891915202140808, "learning_rate": 4.944313496722623e-05, "loss": 1.2857, "step": 4500 }, { "epoch": 1.35, "grad_norm": 1.244935154914856, "learning_rate": 4.944190115502367e-05, "loss": 1.3384, "step": 4505 }, { "epoch": 1.35, "grad_norm": 1.4173117876052856, "learning_rate": 4.944066599292032e-05, "loss": 1.4777, "step": 4510 }, { "epoch": 1.35, "grad_norm": 1.03322434425354, "learning_rate": 4.943942948098439e-05, "loss": 1.4247, "step": 4515 }, { "epoch": 1.35, "grad_norm": 0.9695542454719543, "learning_rate": 4.943819161928417e-05, "loss": 1.3148, "step": 4520 }, { "epoch": 1.35, "grad_norm": 1.5252701044082642, "learning_rate": 4.943695240788803e-05, "loss": 1.4396, "step": 4525 }, { "epoch": 1.36, "grad_norm": 1.4560002088546753, "learning_rate": 4.943571184686441e-05, "loss": 1.5466, "step": 4530 }, { "epoch": 1.36, "grad_norm": 1.7746248245239258, "learning_rate": 4.9434469936281825e-05, "loss": 1.4468, "step": 4535 }, { "epoch": 1.36, "grad_norm": 0.7101891040802002, "learning_rate": 4.943322667620886e-05, "loss": 1.3618, "step": 4540 }, { "epoch": 1.36, "grad_norm": 0.8597649931907654, "learning_rate": 4.9431982066714186e-05, "loss": 1.3144, "step": 4545 }, { "epoch": 1.36, "grad_norm": 1.0961127281188965, "learning_rate": 4.9430736107866535e-05, "loss": 1.3769, "step": 4550 }, { "epoch": 1.36, "grad_norm": 1.1628168821334839, "learning_rate": 4.942948879973472e-05, "loss": 1.427, "step": 4555 }, { "epoch": 1.36, "grad_norm": 0.946785569190979, "learning_rate": 4.9428240142387646e-05, "loss": 1.4057, "step": 4560 }, { "epoch": 1.37, "grad_norm": 0.9042614698410034, "learning_rate": 4.9426990135894245e-05, "loss": 1.3838, "step": 4565 }, { "epoch": 1.37, "grad_norm": 1.2647825479507446, "learning_rate": 4.942573878032358e-05, "loss": 1.3895, "step": 4570 }, { "epoch": 1.37, "grad_norm": 1.29331636428833, "learning_rate": 4.9424486075744745e-05, "loss": 1.4304, "step": 4575 }, { "epoch": 1.37, "grad_norm": 1.3520721197128296, "learning_rate": 4.9423232022226936e-05, "loss": 1.2225, "step": 4580 }, { "epoch": 1.37, "grad_norm": 0.6012137532234192, "learning_rate": 4.942197661983941e-05, "loss": 1.3025, "step": 4585 }, { "epoch": 1.37, "grad_norm": 0.9096260070800781, "learning_rate": 4.9420719868651496e-05, "loss": 1.3349, "step": 4590 }, { "epoch": 1.37, "grad_norm": 1.007503628730774, "learning_rate": 4.941946176873261e-05, "loss": 1.3936, "step": 4595 }, { "epoch": 1.38, "grad_norm": 1.1042200326919556, "learning_rate": 4.941820232015223e-05, "loss": 1.3694, "step": 4600 }, { "epoch": 1.38, "grad_norm": 0.5444115400314331, "learning_rate": 4.941694152297992e-05, "loss": 1.3672, "step": 4605 }, { "epoch": 1.38, "grad_norm": 0.9802212715148926, "learning_rate": 4.9415679377285305e-05, "loss": 1.4417, "step": 4610 }, { "epoch": 1.38, "grad_norm": 0.9913745522499084, "learning_rate": 4.9414415883138106e-05, "loss": 1.3331, "step": 4615 }, { "epoch": 1.38, "grad_norm": 1.2390682697296143, "learning_rate": 4.941315104060808e-05, "loss": 1.4843, "step": 4620 }, { "epoch": 1.38, "grad_norm": 0.7250832915306091, "learning_rate": 4.941188484976512e-05, "loss": 1.4415, "step": 4625 }, { "epoch": 1.39, "grad_norm": 0.6558685898780823, "learning_rate": 4.941061731067912e-05, "loss": 1.3587, "step": 4630 }, { "epoch": 1.39, "grad_norm": 2.1380136013031006, "learning_rate": 4.94093484234201e-05, "loss": 1.3663, "step": 4635 }, { "epoch": 1.39, "grad_norm": 2.294971227645874, "learning_rate": 4.9408078188058145e-05, "loss": 1.402, "step": 4640 }, { "epoch": 1.39, "grad_norm": 2.2890818119049072, "learning_rate": 4.940680660466339e-05, "loss": 1.3872, "step": 4645 }, { "epoch": 1.39, "grad_norm": 0.7086050510406494, "learning_rate": 4.9405533673306094e-05, "loss": 1.4863, "step": 4650 }, { "epoch": 1.39, "grad_norm": 1.047402024269104, "learning_rate": 4.940425939405653e-05, "loss": 1.4587, "step": 4655 }, { "epoch": 1.39, "grad_norm": 1.5536178350448608, "learning_rate": 4.940298376698508e-05, "loss": 1.333, "step": 4660 }, { "epoch": 1.4, "grad_norm": 1.2016557455062866, "learning_rate": 4.9401706792162215e-05, "loss": 1.3375, "step": 4665 }, { "epoch": 1.4, "grad_norm": 1.0145758390426636, "learning_rate": 4.940042846965844e-05, "loss": 1.3985, "step": 4670 }, { "epoch": 1.4, "grad_norm": 1.1620700359344482, "learning_rate": 4.939914879954437e-05, "loss": 1.4384, "step": 4675 }, { "epoch": 1.4, "grad_norm": 0.6000904440879822, "learning_rate": 4.9397867781890665e-05, "loss": 1.3831, "step": 4680 }, { "epoch": 1.4, "grad_norm": 1.2584675550460815, "learning_rate": 4.939658541676809e-05, "loss": 1.3704, "step": 4685 }, { "epoch": 1.4, "grad_norm": 1.9861582517623901, "learning_rate": 4.939530170424745e-05, "loss": 1.4437, "step": 4690 }, { "epoch": 1.4, "grad_norm": 1.411476492881775, "learning_rate": 4.9394016644399666e-05, "loss": 1.3568, "step": 4695 }, { "epoch": 1.41, "grad_norm": 0.8464270234107971, "learning_rate": 4.93927302372957e-05, "loss": 1.414, "step": 4700 }, { "epoch": 1.41, "grad_norm": 1.0788897275924683, "learning_rate": 4.939144248300659e-05, "loss": 1.4118, "step": 4705 }, { "epoch": 1.41, "grad_norm": 1.8137201070785522, "learning_rate": 4.9390153381603466e-05, "loss": 1.4496, "step": 4710 }, { "epoch": 1.41, "grad_norm": 0.9756670594215393, "learning_rate": 4.938886293315753e-05, "loss": 1.4993, "step": 4715 }, { "epoch": 1.41, "grad_norm": 2.05425763130188, "learning_rate": 4.938757113774004e-05, "loss": 1.2575, "step": 4720 }, { "epoch": 1.41, "grad_norm": 1.3005917072296143, "learning_rate": 4.938627799542235e-05, "loss": 1.3646, "step": 4725 }, { "epoch": 1.42, "grad_norm": 0.8430652618408203, "learning_rate": 4.9384983506275864e-05, "loss": 1.4074, "step": 4730 }, { "epoch": 1.42, "grad_norm": 0.666009247303009, "learning_rate": 4.9383687670372094e-05, "loss": 1.2717, "step": 4735 }, { "epoch": 1.42, "grad_norm": 0.5754490494728088, "learning_rate": 4.9382390487782594e-05, "loss": 1.3396, "step": 4740 }, { "epoch": 1.42, "grad_norm": 0.9268550276756287, "learning_rate": 4.938109195857902e-05, "loss": 1.3328, "step": 4745 }, { "epoch": 1.42, "grad_norm": 3.1448147296905518, "learning_rate": 4.9379792082833076e-05, "loss": 1.4236, "step": 4750 }, { "epoch": 1.42, "grad_norm": 0.8515195250511169, "learning_rate": 4.937849086061656e-05, "loss": 1.3495, "step": 4755 }, { "epoch": 1.42, "grad_norm": 1.5721395015716553, "learning_rate": 4.937718829200132e-05, "loss": 1.5374, "step": 4760 }, { "epoch": 1.43, "grad_norm": 1.147706389427185, "learning_rate": 4.9375884377059324e-05, "loss": 1.2307, "step": 4765 }, { "epoch": 1.43, "grad_norm": 0.7554968595504761, "learning_rate": 4.937457911586256e-05, "loss": 1.3832, "step": 4770 }, { "epoch": 1.43, "grad_norm": 1.0911380052566528, "learning_rate": 4.9373272508483135e-05, "loss": 1.2698, "step": 4775 }, { "epoch": 1.43, "grad_norm": 1.6409761905670166, "learning_rate": 4.93719645549932e-05, "loss": 1.2276, "step": 4780 }, { "epoch": 1.43, "grad_norm": 1.3163679838180542, "learning_rate": 4.9370655255464996e-05, "loss": 1.3913, "step": 4785 }, { "epoch": 1.43, "grad_norm": 0.9681575298309326, "learning_rate": 4.9369344609970837e-05, "loss": 1.3934, "step": 4790 }, { "epoch": 1.43, "grad_norm": 0.6123712658882141, "learning_rate": 4.9368032618583106e-05, "loss": 1.3078, "step": 4795 }, { "epoch": 1.44, "grad_norm": 1.4885551929473877, "learning_rate": 4.9366719281374264e-05, "loss": 1.3818, "step": 4800 }, { "epoch": 1.44, "grad_norm": 0.8683892488479614, "learning_rate": 4.936540459841684e-05, "loss": 1.2567, "step": 4805 }, { "epoch": 1.44, "grad_norm": 1.2614054679870605, "learning_rate": 4.936408856978345e-05, "loss": 1.3863, "step": 4810 }, { "epoch": 1.44, "grad_norm": 0.7793049216270447, "learning_rate": 4.9362771195546767e-05, "loss": 1.3391, "step": 4815 }, { "epoch": 1.44, "grad_norm": 0.7818357944488525, "learning_rate": 4.936145247577956e-05, "loss": 1.425, "step": 4820 }, { "epoch": 1.44, "grad_norm": 2.959354877471924, "learning_rate": 4.936013241055465e-05, "loss": 1.4967, "step": 4825 }, { "epoch": 1.45, "grad_norm": 1.2867703437805176, "learning_rate": 4.935881099994495e-05, "loss": 1.3091, "step": 4830 }, { "epoch": 1.45, "grad_norm": 1.163822889328003, "learning_rate": 4.935748824402344e-05, "loss": 1.3299, "step": 4835 }, { "epoch": 1.45, "grad_norm": 1.4018052816390991, "learning_rate": 4.9356164142863174e-05, "loss": 1.2329, "step": 4840 }, { "epoch": 1.45, "grad_norm": 3.0731418132781982, "learning_rate": 4.935483869653728e-05, "loss": 1.448, "step": 4845 }, { "epoch": 1.45, "grad_norm": 1.6475387811660767, "learning_rate": 4.9353511905118954e-05, "loss": 1.3146, "step": 4850 }, { "epoch": 1.45, "grad_norm": 0.7880772948265076, "learning_rate": 4.935218376868149e-05, "loss": 1.3605, "step": 4855 }, { "epoch": 1.45, "grad_norm": 1.6786603927612305, "learning_rate": 4.9350854287298224e-05, "loss": 1.2491, "step": 4860 }, { "epoch": 1.46, "grad_norm": 0.7267608642578125, "learning_rate": 4.9349523461042576e-05, "loss": 1.4106, "step": 4865 }, { "epoch": 1.46, "grad_norm": 1.2122886180877686, "learning_rate": 4.9348191289988064e-05, "loss": 1.4208, "step": 4870 }, { "epoch": 1.46, "grad_norm": 1.0894712209701538, "learning_rate": 4.934685777420827e-05, "loss": 1.3931, "step": 4875 }, { "epoch": 1.46, "grad_norm": 0.950336217880249, "learning_rate": 4.934552291377681e-05, "loss": 1.4107, "step": 4880 }, { "epoch": 1.46, "grad_norm": 1.30669367313385, "learning_rate": 4.934418670876743e-05, "loss": 1.3144, "step": 4885 }, { "epoch": 1.46, "grad_norm": 1.539128065109253, "learning_rate": 4.934284915925392e-05, "loss": 1.2911, "step": 4890 }, { "epoch": 1.46, "grad_norm": 1.0481332540512085, "learning_rate": 4.934151026531016e-05, "loss": 1.3932, "step": 4895 }, { "epoch": 1.47, "grad_norm": 0.9544585943222046, "learning_rate": 4.934017002701009e-05, "loss": 1.4928, "step": 4900 }, { "epoch": 1.47, "grad_norm": 0.628352165222168, "learning_rate": 4.933882844442773e-05, "loss": 1.3738, "step": 4905 }, { "epoch": 1.47, "grad_norm": 1.2132518291473389, "learning_rate": 4.9337485517637174e-05, "loss": 1.3586, "step": 4910 }, { "epoch": 1.47, "grad_norm": 1.3063114881515503, "learning_rate": 4.9336141246712585e-05, "loss": 1.2829, "step": 4915 }, { "epoch": 1.47, "grad_norm": 1.5061261653900146, "learning_rate": 4.933479563172822e-05, "loss": 1.3731, "step": 4920 }, { "epoch": 1.47, "grad_norm": 1.1727710962295532, "learning_rate": 4.933344867275837e-05, "loss": 1.5842, "step": 4925 }, { "epoch": 1.47, "grad_norm": 1.4313628673553467, "learning_rate": 4.9332100369877457e-05, "loss": 1.3292, "step": 4930 }, { "epoch": 1.48, "grad_norm": 0.8247719407081604, "learning_rate": 4.9330750723159924e-05, "loss": 1.2706, "step": 4935 }, { "epoch": 1.48, "grad_norm": 0.8011229634284973, "learning_rate": 4.932939973268033e-05, "loss": 1.5348, "step": 4940 }, { "epoch": 1.48, "grad_norm": 1.4439839124679565, "learning_rate": 4.932804739851327e-05, "loss": 1.5019, "step": 4945 }, { "epoch": 1.48, "grad_norm": 1.8708484172821045, "learning_rate": 4.9326693720733434e-05, "loss": 1.3857, "step": 4950 }, { "epoch": 1.48, "grad_norm": 1.144667387008667, "learning_rate": 4.93253386994156e-05, "loss": 1.3401, "step": 4955 }, { "epoch": 1.48, "grad_norm": 0.9807694554328918, "learning_rate": 4.932398233463459e-05, "loss": 1.3908, "step": 4960 }, { "epoch": 1.49, "grad_norm": 1.2701613903045654, "learning_rate": 4.932262462646532e-05, "loss": 1.3377, "step": 4965 }, { "epoch": 1.49, "grad_norm": 1.4072773456573486, "learning_rate": 4.932126557498278e-05, "loss": 1.4369, "step": 4970 }, { "epoch": 1.49, "grad_norm": 0.901105523109436, "learning_rate": 4.931990518026202e-05, "loss": 1.4648, "step": 4975 }, { "epoch": 1.49, "grad_norm": 1.1365903615951538, "learning_rate": 4.931854344237816e-05, "loss": 1.4489, "step": 4980 }, { "epoch": 1.49, "grad_norm": 1.307646632194519, "learning_rate": 4.931718036140644e-05, "loss": 1.3409, "step": 4985 }, { "epoch": 1.49, "grad_norm": 1.375880479812622, "learning_rate": 4.9315815937422124e-05, "loss": 1.3656, "step": 4990 }, { "epoch": 1.49, "grad_norm": 0.6324434876441956, "learning_rate": 4.931445017050056e-05, "loss": 1.4595, "step": 4995 }, { "epoch": 1.5, "grad_norm": 1.9427233934402466, "learning_rate": 4.93130830607172e-05, "loss": 1.3704, "step": 5000 }, { "epoch": 1.5, "grad_norm": 1.6412770748138428, "learning_rate": 4.931171460814753e-05, "loss": 1.4681, "step": 5005 }, { "epoch": 1.5, "grad_norm": 1.330474615097046, "learning_rate": 4.931034481286713e-05, "loss": 1.3628, "step": 5010 }, { "epoch": 1.5, "grad_norm": 0.8582301735877991, "learning_rate": 4.9308973674951656e-05, "loss": 1.3646, "step": 5015 }, { "epoch": 1.5, "grad_norm": 0.6373203992843628, "learning_rate": 4.9307601194476825e-05, "loss": 1.2416, "step": 5020 }, { "epoch": 1.5, "grad_norm": 1.919718623161316, "learning_rate": 4.9306227371518455e-05, "loss": 1.3791, "step": 5025 }, { "epoch": 1.5, "grad_norm": 0.8354704976081848, "learning_rate": 4.9304852206152415e-05, "loss": 1.393, "step": 5030 }, { "epoch": 1.51, "grad_norm": 1.773903489112854, "learning_rate": 4.9303475698454645e-05, "loss": 1.283, "step": 5035 }, { "epoch": 1.51, "grad_norm": 0.9701683521270752, "learning_rate": 4.9302097848501176e-05, "loss": 1.41, "step": 5040 }, { "epoch": 1.51, "grad_norm": 1.3933402299880981, "learning_rate": 4.9300718656368104e-05, "loss": 1.3723, "step": 5045 }, { "epoch": 1.51, "grad_norm": 1.408264398574829, "learning_rate": 4.92993381221316e-05, "loss": 1.274, "step": 5050 }, { "epoch": 1.51, "grad_norm": 1.3229018449783325, "learning_rate": 4.929795624586791e-05, "loss": 1.3723, "step": 5055 }, { "epoch": 1.51, "grad_norm": 0.9255488514900208, "learning_rate": 4.9296573027653353e-05, "loss": 1.2437, "step": 5060 }, { "epoch": 1.52, "grad_norm": 1.3084803819656372, "learning_rate": 4.9295188467564324e-05, "loss": 1.2855, "step": 5065 }, { "epoch": 1.52, "grad_norm": 1.0244333744049072, "learning_rate": 4.9293802565677284e-05, "loss": 1.3545, "step": 5070 }, { "epoch": 1.52, "grad_norm": 1.627091646194458, "learning_rate": 4.9292415322068785e-05, "loss": 1.443, "step": 5075 }, { "epoch": 1.52, "grad_norm": 0.5619713664054871, "learning_rate": 4.929102673681544e-05, "loss": 1.54, "step": 5080 }, { "epoch": 1.52, "grad_norm": 1.0287528038024902, "learning_rate": 4.928963680999393e-05, "loss": 1.3472, "step": 5085 }, { "epoch": 1.52, "grad_norm": 0.9265909194946289, "learning_rate": 4.9288245541681036e-05, "loss": 1.3337, "step": 5090 }, { "epoch": 1.52, "grad_norm": 1.3812994956970215, "learning_rate": 4.9286852931953576e-05, "loss": 1.4148, "step": 5095 }, { "epoch": 1.53, "grad_norm": 0.8753069043159485, "learning_rate": 4.928545898088848e-05, "loss": 1.4186, "step": 5100 }, { "epoch": 1.53, "grad_norm": 0.826627254486084, "learning_rate": 4.928406368856273e-05, "loss": 1.509, "step": 5105 }, { "epoch": 1.53, "grad_norm": 2.941596508026123, "learning_rate": 4.928266705505338e-05, "loss": 1.4212, "step": 5110 }, { "epoch": 1.53, "grad_norm": 1.5150552988052368, "learning_rate": 4.928126908043757e-05, "loss": 1.3308, "step": 5115 }, { "epoch": 1.53, "grad_norm": 0.9515051245689392, "learning_rate": 4.927986976479251e-05, "loss": 1.3392, "step": 5120 }, { "epoch": 1.53, "grad_norm": 1.2691097259521484, "learning_rate": 4.927846910819548e-05, "loss": 1.2719, "step": 5125 }, { "epoch": 1.53, "grad_norm": 0.8292602896690369, "learning_rate": 4.927706711072383e-05, "loss": 1.2852, "step": 5130 }, { "epoch": 1.54, "grad_norm": 1.4237391948699951, "learning_rate": 4.927566377245501e-05, "loss": 1.5021, "step": 5135 }, { "epoch": 1.54, "grad_norm": 0.7673839926719666, "learning_rate": 4.92742590934665e-05, "loss": 1.5058, "step": 5140 }, { "epoch": 1.54, "grad_norm": 1.159803867340088, "learning_rate": 4.92728530738359e-05, "loss": 1.3966, "step": 5145 }, { "epoch": 1.54, "grad_norm": 1.0266951322555542, "learning_rate": 4.927144571364085e-05, "loss": 1.387, "step": 5150 }, { "epoch": 1.54, "grad_norm": 1.0855332612991333, "learning_rate": 4.927003701295909e-05, "loss": 1.5594, "step": 5155 }, { "epoch": 1.54, "grad_norm": 1.604235053062439, "learning_rate": 4.92686269718684e-05, "loss": 1.4042, "step": 5160 }, { "epoch": 1.55, "grad_norm": 1.2728831768035889, "learning_rate": 4.926721559044668e-05, "loss": 1.3788, "step": 5165 }, { "epoch": 1.55, "grad_norm": 1.2990281581878662, "learning_rate": 4.926580286877187e-05, "loss": 1.3904, "step": 5170 }, { "epoch": 1.55, "grad_norm": 0.8917083144187927, "learning_rate": 4.926438880692198e-05, "loss": 1.3893, "step": 5175 }, { "epoch": 1.55, "grad_norm": 1.6582956314086914, "learning_rate": 4.9262973404975124e-05, "loss": 1.3848, "step": 5180 }, { "epoch": 1.55, "grad_norm": 0.7999622225761414, "learning_rate": 4.9261556663009465e-05, "loss": 1.5204, "step": 5185 }, { "epoch": 1.55, "grad_norm": 1.0454286336898804, "learning_rate": 4.926013858110326e-05, "loss": 1.4529, "step": 5190 }, { "epoch": 1.55, "grad_norm": 0.8179815411567688, "learning_rate": 4.92587191593348e-05, "loss": 1.3022, "step": 5195 }, { "epoch": 1.56, "grad_norm": 1.9816410541534424, "learning_rate": 4.92572983977825e-05, "loss": 1.3634, "step": 5200 }, { "epoch": 1.56, "grad_norm": 0.7995482087135315, "learning_rate": 4.925587629652483e-05, "loss": 1.464, "step": 5205 }, { "epoch": 1.56, "grad_norm": 1.1661982536315918, "learning_rate": 4.925445285564032e-05, "loss": 1.5452, "step": 5210 }, { "epoch": 1.56, "grad_norm": 0.9501356482505798, "learning_rate": 4.9253028075207595e-05, "loss": 1.2182, "step": 5215 }, { "epoch": 1.56, "grad_norm": 1.2546149492263794, "learning_rate": 4.925160195530534e-05, "loss": 1.5899, "step": 5220 }, { "epoch": 1.56, "grad_norm": 0.617793619632721, "learning_rate": 4.9250174496012316e-05, "loss": 1.4278, "step": 5225 }, { "epoch": 1.56, "grad_norm": 0.8397419452667236, "learning_rate": 4.9248745697407353e-05, "loss": 1.2532, "step": 5230 }, { "epoch": 1.57, "grad_norm": 1.4492918252944946, "learning_rate": 4.924731555956938e-05, "loss": 1.3777, "step": 5235 }, { "epoch": 1.57, "grad_norm": 1.5298033952713013, "learning_rate": 4.924588408257736e-05, "loss": 1.3783, "step": 5240 }, { "epoch": 1.57, "grad_norm": 0.9098453521728516, "learning_rate": 4.9244451266510384e-05, "loss": 1.3461, "step": 5245 }, { "epoch": 1.57, "grad_norm": 1.8338463306427002, "learning_rate": 4.9243017111447556e-05, "loss": 1.4105, "step": 5250 }, { "epoch": 1.57, "grad_norm": 2.729804039001465, "learning_rate": 4.924158161746809e-05, "loss": 1.491, "step": 5255 }, { "epoch": 1.57, "grad_norm": 3.264284610748291, "learning_rate": 4.9240144784651265e-05, "loss": 1.3767, "step": 5260 }, { "epoch": 1.58, "grad_norm": 1.7262938022613525, "learning_rate": 4.923870661307645e-05, "loss": 1.3735, "step": 5265 }, { "epoch": 1.58, "grad_norm": 0.7457590103149414, "learning_rate": 4.923726710282305e-05, "loss": 1.485, "step": 5270 }, { "epoch": 1.58, "grad_norm": 0.9725492000579834, "learning_rate": 4.923582625397059e-05, "loss": 1.393, "step": 5275 }, { "epoch": 1.58, "grad_norm": 2.8428640365600586, "learning_rate": 4.923438406659864e-05, "loss": 1.3126, "step": 5280 }, { "epoch": 1.58, "grad_norm": 1.4272032976150513, "learning_rate": 4.923294054078684e-05, "loss": 1.3194, "step": 5285 }, { "epoch": 1.58, "grad_norm": 0.783457338809967, "learning_rate": 4.923149567661492e-05, "loss": 1.3534, "step": 5290 }, { "epoch": 1.58, "grad_norm": 2.0013182163238525, "learning_rate": 4.9230049474162697e-05, "loss": 1.3938, "step": 5295 }, { "epoch": 1.59, "grad_norm": 0.9497959017753601, "learning_rate": 4.922860193351002e-05, "loss": 1.4592, "step": 5300 }, { "epoch": 1.59, "grad_norm": 1.3006082773208618, "learning_rate": 4.922715305473684e-05, "loss": 1.3387, "step": 5305 }, { "epoch": 1.59, "grad_norm": 1.0174909830093384, "learning_rate": 4.922570283792318e-05, "loss": 1.3116, "step": 5310 }, { "epoch": 1.59, "grad_norm": 1.6044186353683472, "learning_rate": 4.9224251283149136e-05, "loss": 1.4901, "step": 5315 }, { "epoch": 1.59, "grad_norm": 0.9575896859169006, "learning_rate": 4.9222798390494874e-05, "loss": 1.549, "step": 5320 }, { "epoch": 1.59, "grad_norm": 0.8215287923812866, "learning_rate": 4.922134416004063e-05, "loss": 1.2743, "step": 5325 }, { "epoch": 1.59, "grad_norm": 1.3678843975067139, "learning_rate": 4.9219888591866725e-05, "loss": 1.3831, "step": 5330 }, { "epoch": 1.6, "grad_norm": 1.1509759426116943, "learning_rate": 4.921843168605355e-05, "loss": 1.4874, "step": 5335 }, { "epoch": 1.6, "grad_norm": 0.6248118281364441, "learning_rate": 4.921697344268157e-05, "loss": 1.4002, "step": 5340 }, { "epoch": 1.6, "grad_norm": 0.9303049445152283, "learning_rate": 4.9215513861831316e-05, "loss": 1.4764, "step": 5345 }, { "epoch": 1.6, "grad_norm": 1.2478617429733276, "learning_rate": 4.92140529435834e-05, "loss": 1.3353, "step": 5350 }, { "epoch": 1.6, "grad_norm": 1.1409193277359009, "learning_rate": 4.921259068801851e-05, "loss": 1.4389, "step": 5355 }, { "epoch": 1.6, "grad_norm": 3.311739921569824, "learning_rate": 4.921112709521741e-05, "loss": 1.456, "step": 5360 }, { "epoch": 1.61, "grad_norm": 1.399682879447937, "learning_rate": 4.920966216526092e-05, "loss": 1.3554, "step": 5365 }, { "epoch": 1.61, "grad_norm": 1.2935621738433838, "learning_rate": 4.920819589822995e-05, "loss": 1.2121, "step": 5370 }, { "epoch": 1.61, "grad_norm": 0.8797827363014221, "learning_rate": 4.92067282942055e-05, "loss": 1.2156, "step": 5375 }, { "epoch": 1.61, "grad_norm": 0.6572903990745544, "learning_rate": 4.9205259353268585e-05, "loss": 1.4867, "step": 5380 }, { "epoch": 1.61, "grad_norm": 1.5460227727890015, "learning_rate": 4.920378907550037e-05, "loss": 1.4242, "step": 5385 }, { "epoch": 1.61, "grad_norm": 1.2212427854537964, "learning_rate": 4.9202317460982037e-05, "loss": 1.3154, "step": 5390 }, { "epoch": 1.61, "grad_norm": 1.2694352865219116, "learning_rate": 4.9200844509794876e-05, "loss": 1.3032, "step": 5395 }, { "epoch": 1.62, "grad_norm": 0.9184812903404236, "learning_rate": 4.919937022202022e-05, "loss": 1.3117, "step": 5400 }, { "epoch": 1.62, "grad_norm": 1.309106469154358, "learning_rate": 4.9197894597739505e-05, "loss": 1.3399, "step": 5405 }, { "epoch": 1.62, "grad_norm": 1.238127589225769, "learning_rate": 4.919641763703422e-05, "loss": 1.3066, "step": 5410 }, { "epoch": 1.62, "grad_norm": 1.8524130582809448, "learning_rate": 4.919493933998594e-05, "loss": 1.5195, "step": 5415 }, { "epoch": 1.62, "grad_norm": 1.081799030303955, "learning_rate": 4.919345970667631e-05, "loss": 1.2854, "step": 5420 }, { "epoch": 1.62, "grad_norm": 0.8055438995361328, "learning_rate": 4.919197873718705e-05, "loss": 1.1592, "step": 5425 }, { "epoch": 1.62, "grad_norm": 1.1697745323181152, "learning_rate": 4.919049643159995e-05, "loss": 1.3942, "step": 5430 }, { "epoch": 1.63, "grad_norm": 1.0526283979415894, "learning_rate": 4.918901278999687e-05, "loss": 1.4919, "step": 5435 }, { "epoch": 1.63, "grad_norm": 0.9364911913871765, "learning_rate": 4.918752781245976e-05, "loss": 1.2354, "step": 5440 }, { "epoch": 1.63, "grad_norm": 1.036669135093689, "learning_rate": 4.918604149907064e-05, "loss": 1.4115, "step": 5445 }, { "epoch": 1.63, "grad_norm": 1.323757529258728, "learning_rate": 4.918455384991159e-05, "loss": 1.4734, "step": 5450 }, { "epoch": 1.63, "grad_norm": 1.043481707572937, "learning_rate": 4.9183064865064756e-05, "loss": 1.3592, "step": 5455 }, { "epoch": 1.63, "grad_norm": 1.0515973567962646, "learning_rate": 4.918157454461238e-05, "loss": 1.3578, "step": 5460 }, { "epoch": 1.64, "grad_norm": 1.5430241823196411, "learning_rate": 4.918008288863679e-05, "loss": 1.5445, "step": 5465 }, { "epoch": 1.64, "grad_norm": 1.392940640449524, "learning_rate": 4.917858989722036e-05, "loss": 1.3484, "step": 5470 }, { "epoch": 1.64, "grad_norm": 0.8935360312461853, "learning_rate": 4.917709557044553e-05, "loss": 1.35, "step": 5475 }, { "epoch": 1.64, "grad_norm": 0.7475391626358032, "learning_rate": 4.9175599908394854e-05, "loss": 1.331, "step": 5480 }, { "epoch": 1.64, "grad_norm": 1.0226927995681763, "learning_rate": 4.917410291115092e-05, "loss": 1.4033, "step": 5485 }, { "epoch": 1.64, "grad_norm": 1.5844924449920654, "learning_rate": 4.917260457879641e-05, "loss": 1.3624, "step": 5490 }, { "epoch": 1.64, "grad_norm": 2.3369362354278564, "learning_rate": 4.917110491141407e-05, "loss": 1.4102, "step": 5495 }, { "epoch": 1.65, "grad_norm": 1.9585330486297607, "learning_rate": 4.9169603909086736e-05, "loss": 1.3082, "step": 5500 }, { "epoch": 1.65, "grad_norm": 0.9265090227127075, "learning_rate": 4.91681015718973e-05, "loss": 1.3516, "step": 5505 }, { "epoch": 1.65, "grad_norm": 0.8970497250556946, "learning_rate": 4.9166597899928735e-05, "loss": 1.2882, "step": 5510 }, { "epoch": 1.65, "grad_norm": 1.3045713901519775, "learning_rate": 4.9165092893264086e-05, "loss": 1.4484, "step": 5515 }, { "epoch": 1.65, "grad_norm": 1.7835524082183838, "learning_rate": 4.9163586551986475e-05, "loss": 1.4134, "step": 5520 }, { "epoch": 1.65, "grad_norm": 0.7851400971412659, "learning_rate": 4.9162078876179095e-05, "loss": 1.1785, "step": 5525 }, { "epoch": 1.65, "grad_norm": 0.5514227747917175, "learning_rate": 4.916056986592522e-05, "loss": 1.2639, "step": 5530 }, { "epoch": 1.66, "grad_norm": 1.4473460912704468, "learning_rate": 4.915905952130818e-05, "loss": 1.3462, "step": 5535 }, { "epoch": 1.66, "grad_norm": 1.3507695198059082, "learning_rate": 4.9157547842411387e-05, "loss": 1.2437, "step": 5540 }, { "epoch": 1.66, "grad_norm": 1.2776063680648804, "learning_rate": 4.9156034829318345e-05, "loss": 1.3707, "step": 5545 }, { "epoch": 1.66, "grad_norm": 2.255897045135498, "learning_rate": 4.915452048211261e-05, "loss": 1.2916, "step": 5550 }, { "epoch": 1.66, "grad_norm": 1.0228066444396973, "learning_rate": 4.915300480087781e-05, "loss": 1.4058, "step": 5555 }, { "epoch": 1.66, "grad_norm": 1.0739355087280273, "learning_rate": 4.915148778569767e-05, "loss": 1.4693, "step": 5560 }, { "epoch": 1.66, "grad_norm": 1.723423719406128, "learning_rate": 4.914996943665596e-05, "loss": 1.0389, "step": 5565 }, { "epoch": 1.67, "grad_norm": 1.2610810995101929, "learning_rate": 4.9148449753836534e-05, "loss": 1.3547, "step": 5570 }, { "epoch": 1.67, "grad_norm": 0.9140579700469971, "learning_rate": 4.9146928737323334e-05, "loss": 1.3006, "step": 5575 }, { "epoch": 1.67, "grad_norm": 0.7020224332809448, "learning_rate": 4.914540638720035e-05, "loss": 1.3814, "step": 5580 }, { "epoch": 1.67, "grad_norm": 1.6752337217330933, "learning_rate": 4.9143882703551685e-05, "loss": 1.3788, "step": 5585 }, { "epoch": 1.67, "grad_norm": 1.1528271436691284, "learning_rate": 4.914235768646147e-05, "loss": 1.4718, "step": 5590 }, { "epoch": 1.67, "grad_norm": 0.9976640343666077, "learning_rate": 4.9140831336013925e-05, "loss": 1.4391, "step": 5595 }, { "epoch": 1.68, "grad_norm": 0.7416092157363892, "learning_rate": 4.9139303652293365e-05, "loss": 1.2846, "step": 5600 }, { "epoch": 1.68, "grad_norm": 0.8383505940437317, "learning_rate": 4.913777463538416e-05, "loss": 1.3933, "step": 5605 }, { "epoch": 1.68, "grad_norm": 0.7380107045173645, "learning_rate": 4.9136244285370746e-05, "loss": 1.4613, "step": 5610 }, { "epoch": 1.68, "grad_norm": 0.8241648077964783, "learning_rate": 4.913471260233765e-05, "loss": 1.3475, "step": 5615 }, { "epoch": 1.68, "grad_norm": 1.6132783889770508, "learning_rate": 4.913317958636946e-05, "loss": 1.4238, "step": 5620 }, { "epoch": 1.68, "grad_norm": 0.8694246411323547, "learning_rate": 4.913164523755085e-05, "loss": 1.6346, "step": 5625 }, { "epoch": 1.68, "grad_norm": 1.2814140319824219, "learning_rate": 4.9130109555966565e-05, "loss": 1.3782, "step": 5630 }, { "epoch": 1.69, "grad_norm": 1.7262847423553467, "learning_rate": 4.91285725417014e-05, "loss": 1.3747, "step": 5635 }, { "epoch": 1.69, "grad_norm": 0.8753448128700256, "learning_rate": 4.912703419484026e-05, "loss": 1.3918, "step": 5640 }, { "epoch": 1.69, "grad_norm": 1.002820611000061, "learning_rate": 4.912549451546809e-05, "loss": 1.2981, "step": 5645 }, { "epoch": 1.69, "grad_norm": 1.3369395732879639, "learning_rate": 4.912395350366994e-05, "loss": 1.3446, "step": 5650 }, { "epoch": 1.69, "grad_norm": 1.4645837545394897, "learning_rate": 4.9122411159530916e-05, "loss": 1.5831, "step": 5655 }, { "epoch": 1.69, "grad_norm": 1.1211307048797607, "learning_rate": 4.91208674831362e-05, "loss": 1.3518, "step": 5660 }, { "epoch": 1.69, "grad_norm": 1.3223671913146973, "learning_rate": 4.911932247457104e-05, "loss": 1.4612, "step": 5665 }, { "epoch": 1.7, "grad_norm": 1.5868595838546753, "learning_rate": 4.911777613392077e-05, "loss": 1.2637, "step": 5670 }, { "epoch": 1.7, "grad_norm": 1.2033578157424927, "learning_rate": 4.91162284612708e-05, "loss": 1.1865, "step": 5675 }, { "epoch": 1.7, "grad_norm": 1.1434866189956665, "learning_rate": 4.9114679456706594e-05, "loss": 1.5031, "step": 5680 }, { "epoch": 1.7, "grad_norm": 1.0561349391937256, "learning_rate": 4.911312912031371e-05, "loss": 1.3647, "step": 5685 }, { "epoch": 1.7, "grad_norm": 2.2689900398254395, "learning_rate": 4.911157745217776e-05, "loss": 1.4179, "step": 5690 }, { "epoch": 1.7, "grad_norm": 0.9906610250473022, "learning_rate": 4.911002445238446e-05, "loss": 1.4319, "step": 5695 }, { "epoch": 1.71, "grad_norm": 2.0266451835632324, "learning_rate": 4.9108470121019565e-05, "loss": 1.2428, "step": 5700 }, { "epoch": 1.71, "grad_norm": 1.1348594427108765, "learning_rate": 4.910691445816893e-05, "loss": 1.3954, "step": 5705 }, { "epoch": 1.71, "grad_norm": 1.3033210039138794, "learning_rate": 4.910535746391846e-05, "loss": 1.3513, "step": 5710 }, { "epoch": 1.71, "grad_norm": 1.4596507549285889, "learning_rate": 4.910379913835416e-05, "loss": 1.4849, "step": 5715 }, { "epoch": 1.71, "grad_norm": 1.2325527667999268, "learning_rate": 4.910223948156208e-05, "loss": 1.231, "step": 5720 }, { "epoch": 1.71, "grad_norm": 1.0921927690505981, "learning_rate": 4.9100678493628374e-05, "loss": 1.4783, "step": 5725 }, { "epoch": 1.71, "grad_norm": 0.5836876034736633, "learning_rate": 4.909911617463925e-05, "loss": 1.3841, "step": 5730 }, { "epoch": 1.72, "grad_norm": 0.6414868831634521, "learning_rate": 4.909755252468098e-05, "loss": 1.414, "step": 5735 }, { "epoch": 1.72, "grad_norm": 1.1332942247390747, "learning_rate": 4.909598754383994e-05, "loss": 1.364, "step": 5740 }, { "epoch": 1.72, "grad_norm": 1.3707212209701538, "learning_rate": 4.909442123220255e-05, "loss": 1.3071, "step": 5745 }, { "epoch": 1.72, "grad_norm": 0.9404252171516418, "learning_rate": 4.909285358985532e-05, "loss": 1.332, "step": 5750 }, { "epoch": 1.72, "grad_norm": 0.9168292880058289, "learning_rate": 4.9091284616884824e-05, "loss": 1.313, "step": 5755 }, { "epoch": 1.72, "grad_norm": 0.628679633140564, "learning_rate": 4.908971431337772e-05, "loss": 1.3079, "step": 5760 }, { "epoch": 1.72, "grad_norm": 2.0647614002227783, "learning_rate": 4.908814267942074e-05, "loss": 1.2487, "step": 5765 }, { "epoch": 1.73, "grad_norm": 1.053024411201477, "learning_rate": 4.908656971510068e-05, "loss": 1.3222, "step": 5770 }, { "epoch": 1.73, "grad_norm": 0.9378986358642578, "learning_rate": 4.908499542050441e-05, "loss": 1.3492, "step": 5775 }, { "epoch": 1.73, "grad_norm": 0.9233798980712891, "learning_rate": 4.9083419795718875e-05, "loss": 1.4543, "step": 5780 }, { "epoch": 1.73, "grad_norm": 1.063833236694336, "learning_rate": 4.9081842840831104e-05, "loss": 1.3152, "step": 5785 }, { "epoch": 1.73, "grad_norm": 1.2852481603622437, "learning_rate": 4.908026455592818e-05, "loss": 1.2847, "step": 5790 }, { "epoch": 1.73, "grad_norm": 1.1695432662963867, "learning_rate": 4.9078684941097275e-05, "loss": 1.2973, "step": 5795 }, { "epoch": 1.74, "grad_norm": 0.908960223197937, "learning_rate": 4.9077103996425625e-05, "loss": 1.1404, "step": 5800 }, { "epoch": 1.74, "grad_norm": 1.2946605682373047, "learning_rate": 4.9075521722000556e-05, "loss": 1.3848, "step": 5805 }, { "epoch": 1.74, "grad_norm": 0.6911953091621399, "learning_rate": 4.907393811790945e-05, "loss": 1.4204, "step": 5810 }, { "epoch": 1.74, "grad_norm": 0.9933083653450012, "learning_rate": 4.907235318423975e-05, "loss": 1.4721, "step": 5815 }, { "epoch": 1.74, "grad_norm": 1.4515044689178467, "learning_rate": 4.9070766921079014e-05, "loss": 1.3615, "step": 5820 }, { "epoch": 1.74, "grad_norm": 1.067295789718628, "learning_rate": 4.906917932851484e-05, "loss": 1.3254, "step": 5825 }, { "epoch": 1.74, "grad_norm": 1.1964728832244873, "learning_rate": 4.9067590406634914e-05, "loss": 1.2453, "step": 5830 }, { "epoch": 1.75, "grad_norm": 0.9193742275238037, "learning_rate": 4.906600015552698e-05, "loss": 1.5016, "step": 5835 }, { "epoch": 1.75, "grad_norm": 1.4056816101074219, "learning_rate": 4.906440857527888e-05, "loss": 1.2901, "step": 5840 }, { "epoch": 1.75, "grad_norm": 1.7364879846572876, "learning_rate": 4.9062815665978504e-05, "loss": 1.3488, "step": 5845 }, { "epoch": 1.75, "grad_norm": 1.1966763734817505, "learning_rate": 4.9061221427713835e-05, "loss": 1.3019, "step": 5850 }, { "epoch": 1.75, "grad_norm": 1.1406923532485962, "learning_rate": 4.905962586057291e-05, "loss": 1.27, "step": 5855 }, { "epoch": 1.75, "grad_norm": 1.5411876440048218, "learning_rate": 4.9058028964643865e-05, "loss": 1.2951, "step": 5860 }, { "epoch": 1.75, "grad_norm": 0.9900470972061157, "learning_rate": 4.9056430740014883e-05, "loss": 1.3227, "step": 5865 }, { "epoch": 1.76, "grad_norm": 1.95987868309021, "learning_rate": 4.905483118677423e-05, "loss": 1.4196, "step": 5870 }, { "epoch": 1.76, "grad_norm": 1.0949276685714722, "learning_rate": 4.9053230305010264e-05, "loss": 1.2241, "step": 5875 }, { "epoch": 1.76, "grad_norm": 0.6950658559799194, "learning_rate": 4.9051628094811386e-05, "loss": 1.2839, "step": 5880 }, { "epoch": 1.76, "grad_norm": 1.1980772018432617, "learning_rate": 4.905002455626609e-05, "loss": 1.458, "step": 5885 }, { "epoch": 1.76, "grad_norm": 1.1213877201080322, "learning_rate": 4.904841968946293e-05, "loss": 1.4148, "step": 5890 }, { "epoch": 1.76, "grad_norm": 1.1307765245437622, "learning_rate": 4.904681349449056e-05, "loss": 1.4134, "step": 5895 }, { "epoch": 1.77, "grad_norm": 1.358079433441162, "learning_rate": 4.904520597143767e-05, "loss": 1.3841, "step": 5900 }, { "epoch": 1.77, "grad_norm": 1.1629695892333984, "learning_rate": 4.904359712039304e-05, "loss": 1.39, "step": 5905 }, { "epoch": 1.77, "grad_norm": 1.8049236536026, "learning_rate": 4.904198694144554e-05, "loss": 1.3112, "step": 5910 }, { "epoch": 1.77, "grad_norm": 1.5202418565750122, "learning_rate": 4.904037543468409e-05, "loss": 1.2515, "step": 5915 }, { "epoch": 1.77, "grad_norm": 1.6032389402389526, "learning_rate": 4.90387626001977e-05, "loss": 1.4008, "step": 5920 }, { "epoch": 1.77, "grad_norm": 1.2656590938568115, "learning_rate": 4.903714843807543e-05, "loss": 1.4304, "step": 5925 }, { "epoch": 1.77, "grad_norm": 1.622446894645691, "learning_rate": 4.9035532948406436e-05, "loss": 1.4896, "step": 5930 }, { "epoch": 1.78, "grad_norm": 1.3047736883163452, "learning_rate": 4.903391613127995e-05, "loss": 1.3628, "step": 5935 }, { "epoch": 1.78, "grad_norm": 0.8946179747581482, "learning_rate": 4.903229798678525e-05, "loss": 1.3129, "step": 5940 }, { "epoch": 1.78, "grad_norm": 0.7396437525749207, "learning_rate": 4.903067851501172e-05, "loss": 1.4662, "step": 5945 }, { "epoch": 1.78, "grad_norm": 0.7600730061531067, "learning_rate": 4.9029057716048786e-05, "loss": 1.3341, "step": 5950 }, { "epoch": 1.78, "grad_norm": 1.3639189004898071, "learning_rate": 4.902743558998597e-05, "loss": 1.4028, "step": 5955 }, { "epoch": 1.78, "grad_norm": 1.2151553630828857, "learning_rate": 4.9025812136912874e-05, "loss": 1.2892, "step": 5960 }, { "epoch": 1.78, "grad_norm": 1.493870496749878, "learning_rate": 4.902418735691914e-05, "loss": 1.3913, "step": 5965 }, { "epoch": 1.79, "grad_norm": 2.329575538635254, "learning_rate": 4.90225612500945e-05, "loss": 1.3196, "step": 5970 }, { "epoch": 1.79, "grad_norm": 1.2628295421600342, "learning_rate": 4.9020933816528784e-05, "loss": 1.3211, "step": 5975 }, { "epoch": 1.79, "grad_norm": 1.0996872186660767, "learning_rate": 4.901930505631186e-05, "loss": 1.402, "step": 5980 }, { "epoch": 1.79, "grad_norm": 0.5428398251533508, "learning_rate": 4.901767496953368e-05, "loss": 1.5071, "step": 5985 }, { "epoch": 1.79, "grad_norm": 0.6966472864151001, "learning_rate": 4.9016043556284284e-05, "loss": 1.4231, "step": 5990 }, { "epoch": 1.79, "grad_norm": 1.003456473350525, "learning_rate": 4.901441081665376e-05, "loss": 1.2952, "step": 5995 }, { "epoch": 1.8, "grad_norm": 1.4980223178863525, "learning_rate": 4.9012776750732295e-05, "loss": 1.2411, "step": 6000 }, { "epoch": 1.8, "grad_norm": 1.0407806634902954, "learning_rate": 4.901114135861012e-05, "loss": 1.3536, "step": 6005 }, { "epoch": 1.8, "grad_norm": 0.7962636947631836, "learning_rate": 4.9009504640377565e-05, "loss": 1.4762, "step": 6010 }, { "epoch": 1.8, "grad_norm": 1.4784387350082397, "learning_rate": 4.900786659612504e-05, "loss": 1.3125, "step": 6015 }, { "epoch": 1.8, "grad_norm": 0.9292671084403992, "learning_rate": 4.900622722594299e-05, "loss": 1.4477, "step": 6020 }, { "epoch": 1.8, "grad_norm": 1.320400357246399, "learning_rate": 4.9004586529921955e-05, "loss": 1.3478, "step": 6025 }, { "epoch": 1.8, "grad_norm": 0.593423068523407, "learning_rate": 4.9002944508152567e-05, "loss": 1.2153, "step": 6030 }, { "epoch": 1.81, "grad_norm": 1.3448033332824707, "learning_rate": 4.90013011607255e-05, "loss": 1.4262, "step": 6035 }, { "epoch": 1.81, "grad_norm": 1.65395987033844, "learning_rate": 4.8999656487731516e-05, "loss": 1.4308, "step": 6040 }, { "epoch": 1.81, "grad_norm": 1.5153121948242188, "learning_rate": 4.899801048926146e-05, "loss": 1.3404, "step": 6045 }, { "epoch": 1.81, "grad_norm": 0.942297637462616, "learning_rate": 4.899636316540622e-05, "loss": 1.4883, "step": 6050 }, { "epoch": 1.81, "grad_norm": 0.9569664597511292, "learning_rate": 4.899471451625678e-05, "loss": 1.3667, "step": 6055 }, { "epoch": 1.81, "grad_norm": 1.1966135501861572, "learning_rate": 4.899306454190421e-05, "loss": 1.1763, "step": 6060 }, { "epoch": 1.81, "grad_norm": 1.0947861671447754, "learning_rate": 4.899141324243962e-05, "loss": 1.2947, "step": 6065 }, { "epoch": 1.82, "grad_norm": 0.9983508586883545, "learning_rate": 4.8989760617954215e-05, "loss": 1.7462, "step": 6070 }, { "epoch": 1.82, "grad_norm": 0.8864436745643616, "learning_rate": 4.898810666853927e-05, "loss": 1.526, "step": 6075 }, { "epoch": 1.82, "grad_norm": 1.3127366304397583, "learning_rate": 4.898645139428613e-05, "loss": 1.3631, "step": 6080 }, { "epoch": 1.82, "grad_norm": 0.9836210608482361, "learning_rate": 4.8984794795286196e-05, "loss": 1.2838, "step": 6085 }, { "epoch": 1.82, "grad_norm": 1.2298458814620972, "learning_rate": 4.8983136871630995e-05, "loss": 1.3527, "step": 6090 }, { "epoch": 1.82, "grad_norm": 1.1192662715911865, "learning_rate": 4.8981477623412064e-05, "loss": 1.4846, "step": 6095 }, { "epoch": 1.83, "grad_norm": 0.9733280539512634, "learning_rate": 4.897981705072105e-05, "loss": 1.4492, "step": 6100 }, { "epoch": 1.83, "grad_norm": 0.5876336097717285, "learning_rate": 4.897815515364967e-05, "loss": 1.3106, "step": 6105 }, { "epoch": 1.83, "grad_norm": 1.3915202617645264, "learning_rate": 4.89764919322897e-05, "loss": 1.2493, "step": 6110 }, { "epoch": 1.83, "grad_norm": 1.1110719442367554, "learning_rate": 4.897482738673301e-05, "loss": 1.5083, "step": 6115 }, { "epoch": 1.83, "grad_norm": 0.9961562752723694, "learning_rate": 4.897316151707152e-05, "loss": 1.5106, "step": 6120 }, { "epoch": 1.83, "grad_norm": 0.7797302603721619, "learning_rate": 4.897149432339724e-05, "loss": 1.3252, "step": 6125 }, { "epoch": 1.83, "grad_norm": 1.6558862924575806, "learning_rate": 4.896982580580224e-05, "loss": 1.5234, "step": 6130 }, { "epoch": 1.84, "grad_norm": 0.8228052854537964, "learning_rate": 4.896815596437868e-05, "loss": 1.3884, "step": 6135 }, { "epoch": 1.84, "grad_norm": 0.7382698059082031, "learning_rate": 4.896648479921878e-05, "loss": 1.3653, "step": 6140 }, { "epoch": 1.84, "grad_norm": 1.1932933330535889, "learning_rate": 4.896481231041483e-05, "loss": 1.6213, "step": 6145 }, { "epoch": 1.84, "grad_norm": 1.208572506904602, "learning_rate": 4.896313849805921e-05, "loss": 1.4082, "step": 6150 }, { "epoch": 1.84, "grad_norm": 1.3084590435028076, "learning_rate": 4.896146336224436e-05, "loss": 1.5231, "step": 6155 }, { "epoch": 1.84, "grad_norm": 1.2458943128585815, "learning_rate": 4.89597869030628e-05, "loss": 1.2657, "step": 6160 }, { "epoch": 1.84, "grad_norm": 1.221347689628601, "learning_rate": 4.8958109120607117e-05, "loss": 1.4708, "step": 6165 }, { "epoch": 1.85, "grad_norm": 0.9371591210365295, "learning_rate": 4.895643001496996e-05, "loss": 1.3741, "step": 6170 }, { "epoch": 1.85, "grad_norm": 1.7532001733779907, "learning_rate": 4.8954749586244074e-05, "loss": 1.3621, "step": 6175 }, { "epoch": 1.85, "grad_norm": 1.8896582126617432, "learning_rate": 4.895306783452228e-05, "loss": 1.398, "step": 6180 }, { "epoch": 1.85, "grad_norm": 1.096148133277893, "learning_rate": 4.895138475989743e-05, "loss": 1.2032, "step": 6185 }, { "epoch": 1.85, "grad_norm": 0.7891479730606079, "learning_rate": 4.894970036246251e-05, "loss": 1.3164, "step": 6190 }, { "epoch": 1.85, "grad_norm": 0.9778878092765808, "learning_rate": 4.894801464231053e-05, "loss": 1.4422, "step": 6195 }, { "epoch": 1.85, "grad_norm": 1.088839054107666, "learning_rate": 4.894632759953459e-05, "loss": 1.3869, "step": 6200 }, { "epoch": 1.86, "grad_norm": 1.0717918872833252, "learning_rate": 4.894463923422787e-05, "loss": 1.3477, "step": 6205 }, { "epoch": 1.86, "grad_norm": 1.6605066061019897, "learning_rate": 4.894294954648362e-05, "loss": 1.2396, "step": 6210 }, { "epoch": 1.86, "grad_norm": 1.642807126045227, "learning_rate": 4.894125853639514e-05, "loss": 1.3233, "step": 6215 }, { "epoch": 1.86, "grad_norm": 2.835897207260132, "learning_rate": 4.893956620405585e-05, "loss": 1.4308, "step": 6220 }, { "epoch": 1.86, "grad_norm": 0.6643049120903015, "learning_rate": 4.893787254955919e-05, "loss": 1.5178, "step": 6225 }, { "epoch": 1.86, "grad_norm": 0.7968789935112, "learning_rate": 4.893617757299872e-05, "loss": 1.2129, "step": 6230 }, { "epoch": 1.87, "grad_norm": 1.484046459197998, "learning_rate": 4.893448127446805e-05, "loss": 1.2537, "step": 6235 }, { "epoch": 1.87, "grad_norm": 0.99761962890625, "learning_rate": 4.8932783654060844e-05, "loss": 1.3536, "step": 6240 }, { "epoch": 1.87, "grad_norm": 0.965552806854248, "learning_rate": 4.8931084711870876e-05, "loss": 1.3365, "step": 6245 }, { "epoch": 1.87, "grad_norm": 0.8811823129653931, "learning_rate": 4.8929384447991974e-05, "loss": 1.4075, "step": 6250 }, { "epoch": 1.87, "grad_norm": 1.0002772808074951, "learning_rate": 4.8927682862518044e-05, "loss": 1.4315, "step": 6255 }, { "epoch": 1.87, "grad_norm": 0.4986729919910431, "learning_rate": 4.8925979955543067e-05, "loss": 1.1747, "step": 6260 }, { "epoch": 1.87, "grad_norm": 1.492148995399475, "learning_rate": 4.892427572716108e-05, "loss": 1.3318, "step": 6265 }, { "epoch": 1.88, "grad_norm": 1.448102593421936, "learning_rate": 4.892257017746621e-05, "loss": 1.2832, "step": 6270 }, { "epoch": 1.88, "grad_norm": 1.4307514429092407, "learning_rate": 4.892086330655266e-05, "loss": 1.5149, "step": 6275 }, { "epoch": 1.88, "grad_norm": 0.8795152306556702, "learning_rate": 4.8919155114514695e-05, "loss": 1.4482, "step": 6280 }, { "epoch": 1.88, "grad_norm": 0.9869243502616882, "learning_rate": 4.891744560144666e-05, "loss": 1.3061, "step": 6285 }, { "epoch": 1.88, "grad_norm": 0.6511804461479187, "learning_rate": 4.891573476744295e-05, "loss": 1.4904, "step": 6290 }, { "epoch": 1.88, "grad_norm": 0.5018410086631775, "learning_rate": 4.891402261259807e-05, "loss": 1.374, "step": 6295 }, { "epoch": 1.88, "grad_norm": 0.5579232573509216, "learning_rate": 4.891230913700659e-05, "loss": 1.2526, "step": 6300 }, { "epoch": 1.89, "grad_norm": 1.1983088254928589, "learning_rate": 4.8910594340763126e-05, "loss": 1.5322, "step": 6305 }, { "epoch": 1.89, "grad_norm": 1.0148495435714722, "learning_rate": 4.89088782239624e-05, "loss": 1.4543, "step": 6310 }, { "epoch": 1.89, "grad_norm": 0.739857017993927, "learning_rate": 4.890716078669917e-05, "loss": 1.373, "step": 6315 }, { "epoch": 1.89, "grad_norm": 1.4641748666763306, "learning_rate": 4.8905442029068296e-05, "loss": 1.3945, "step": 6320 }, { "epoch": 1.89, "grad_norm": 1.2002661228179932, "learning_rate": 4.890372195116471e-05, "loss": 1.3935, "step": 6325 }, { "epoch": 1.89, "grad_norm": 0.9854932427406311, "learning_rate": 4.890200055308342e-05, "loss": 1.4561, "step": 6330 }, { "epoch": 1.9, "grad_norm": 0.8449004888534546, "learning_rate": 4.8900277834919475e-05, "loss": 1.3978, "step": 6335 }, { "epoch": 1.9, "grad_norm": 1.8262367248535156, "learning_rate": 4.889855379676802e-05, "loss": 1.3593, "step": 6340 }, { "epoch": 1.9, "grad_norm": 1.1936458349227905, "learning_rate": 4.889682843872429e-05, "loss": 1.3747, "step": 6345 }, { "epoch": 1.9, "grad_norm": 1.5638678073883057, "learning_rate": 4.8895101760883566e-05, "loss": 1.2618, "step": 6350 }, { "epoch": 1.9, "grad_norm": 1.084822654724121, "learning_rate": 4.88933737633412e-05, "loss": 1.4142, "step": 6355 }, { "epoch": 1.9, "grad_norm": 3.615086793899536, "learning_rate": 4.889164444619264e-05, "loss": 1.3423, "step": 6360 }, { "epoch": 1.9, "grad_norm": 1.1368343830108643, "learning_rate": 4.88899138095334e-05, "loss": 1.325, "step": 6365 }, { "epoch": 1.91, "grad_norm": 0.9688156247138977, "learning_rate": 4.8888181853459046e-05, "loss": 1.3005, "step": 6370 }, { "epoch": 1.91, "grad_norm": 1.1333907842636108, "learning_rate": 4.8886448578065236e-05, "loss": 1.5213, "step": 6375 }, { "epoch": 1.91, "grad_norm": 1.5067886114120483, "learning_rate": 4.8884713983447704e-05, "loss": 1.4821, "step": 6380 }, { "epoch": 1.91, "grad_norm": 1.2759270668029785, "learning_rate": 4.8882978069702246e-05, "loss": 1.2604, "step": 6385 }, { "epoch": 1.91, "grad_norm": 1.2852680683135986, "learning_rate": 4.888124083692473e-05, "loss": 1.4365, "step": 6390 }, { "epoch": 1.91, "grad_norm": 1.2797343730926514, "learning_rate": 4.887950228521111e-05, "loss": 1.3997, "step": 6395 }, { "epoch": 1.91, "grad_norm": 1.6342377662658691, "learning_rate": 4.8877762414657394e-05, "loss": 1.3325, "step": 6400 }, { "epoch": 1.92, "grad_norm": 1.4419025182724, "learning_rate": 4.8876021225359684e-05, "loss": 1.3404, "step": 6405 }, { "epoch": 1.92, "grad_norm": 0.9858295321464539, "learning_rate": 4.887427871741414e-05, "loss": 1.3812, "step": 6410 }, { "epoch": 1.92, "grad_norm": 1.2529702186584473, "learning_rate": 4.8872534890916996e-05, "loss": 1.3797, "step": 6415 }, { "epoch": 1.92, "grad_norm": 1.7771295309066772, "learning_rate": 4.8870789745964566e-05, "loss": 1.3254, "step": 6420 }, { "epoch": 1.92, "grad_norm": 0.7619970440864563, "learning_rate": 4.8869043282653234e-05, "loss": 1.3069, "step": 6425 }, { "epoch": 1.92, "grad_norm": 1.5197542905807495, "learning_rate": 4.886729550107945e-05, "loss": 1.4575, "step": 6430 }, { "epoch": 1.93, "grad_norm": 0.9382418990135193, "learning_rate": 4.8865546401339736e-05, "loss": 1.4566, "step": 6435 }, { "epoch": 1.93, "grad_norm": 0.7788457274436951, "learning_rate": 4.886379598353071e-05, "loss": 1.242, "step": 6440 }, { "epoch": 1.93, "grad_norm": 1.4081120491027832, "learning_rate": 4.8862044247749034e-05, "loss": 1.4399, "step": 6445 }, { "epoch": 1.93, "grad_norm": 1.1004774570465088, "learning_rate": 4.886029119409146e-05, "loss": 1.4171, "step": 6450 }, { "epoch": 1.93, "grad_norm": 1.2400892972946167, "learning_rate": 4.88585368226548e-05, "loss": 1.3873, "step": 6455 }, { "epoch": 1.93, "grad_norm": 0.6717667579650879, "learning_rate": 4.8856781133535955e-05, "loss": 1.1926, "step": 6460 }, { "epoch": 1.93, "grad_norm": 0.7015379667282104, "learning_rate": 4.8855024126831886e-05, "loss": 1.2202, "step": 6465 }, { "epoch": 1.94, "grad_norm": 1.3034001588821411, "learning_rate": 4.8853265802639625e-05, "loss": 1.2759, "step": 6470 }, { "epoch": 1.94, "grad_norm": 1.2087301015853882, "learning_rate": 4.8851506161056296e-05, "loss": 1.1914, "step": 6475 }, { "epoch": 1.94, "grad_norm": 0.8940001726150513, "learning_rate": 4.8849745202179064e-05, "loss": 1.4391, "step": 6480 }, { "epoch": 1.94, "grad_norm": 1.4564568996429443, "learning_rate": 4.8847982926105195e-05, "loss": 1.4835, "step": 6485 }, { "epoch": 1.94, "grad_norm": 1.3092164993286133, "learning_rate": 4.884621933293203e-05, "loss": 1.3326, "step": 6490 }, { "epoch": 1.94, "grad_norm": 1.8018982410430908, "learning_rate": 4.8844454422756946e-05, "loss": 1.4825, "step": 6495 }, { "epoch": 1.94, "grad_norm": 0.8499402403831482, "learning_rate": 4.884268819567743e-05, "loss": 1.38, "step": 6500 }, { "epoch": 1.95, "grad_norm": 1.2163305282592773, "learning_rate": 4.8840920651791036e-05, "loss": 1.2982, "step": 6505 }, { "epoch": 1.95, "grad_norm": 1.5904464721679688, "learning_rate": 4.883915179119537e-05, "loss": 1.3852, "step": 6510 }, { "epoch": 1.95, "grad_norm": 1.2518161535263062, "learning_rate": 4.883738161398813e-05, "loss": 1.4104, "step": 6515 }, { "epoch": 1.95, "grad_norm": 1.155625581741333, "learning_rate": 4.883561012026708e-05, "loss": 1.3967, "step": 6520 }, { "epoch": 1.95, "grad_norm": 0.6556622982025146, "learning_rate": 4.883383731013007e-05, "loss": 1.3625, "step": 6525 }, { "epoch": 1.95, "grad_norm": 0.9793677926063538, "learning_rate": 4.883206318367499e-05, "loss": 1.4226, "step": 6530 }, { "epoch": 1.96, "grad_norm": 1.0961387157440186, "learning_rate": 4.883028774099983e-05, "loss": 1.4849, "step": 6535 }, { "epoch": 1.96, "grad_norm": 0.9702796936035156, "learning_rate": 4.882851098220265e-05, "loss": 1.3409, "step": 6540 }, { "epoch": 1.96, "grad_norm": 1.4797945022583008, "learning_rate": 4.882673290738158e-05, "loss": 1.3482, "step": 6545 }, { "epoch": 1.96, "grad_norm": 1.1663824319839478, "learning_rate": 4.8824953516634816e-05, "loss": 1.2391, "step": 6550 }, { "epoch": 1.96, "grad_norm": 0.7633179426193237, "learning_rate": 4.882317281006064e-05, "loss": 1.2115, "step": 6555 }, { "epoch": 1.96, "grad_norm": 0.6997314095497131, "learning_rate": 4.8821390787757384e-05, "loss": 1.3422, "step": 6560 }, { "epoch": 1.96, "grad_norm": 1.2793208360671997, "learning_rate": 4.8819607449823476e-05, "loss": 1.4855, "step": 6565 }, { "epoch": 1.97, "grad_norm": 1.2282752990722656, "learning_rate": 4.881782279635741e-05, "loss": 1.5351, "step": 6570 }, { "epoch": 1.97, "grad_norm": 1.7947735786437988, "learning_rate": 4.8816036827457745e-05, "loss": 1.2795, "step": 6575 }, { "epoch": 1.97, "grad_norm": 1.136925458908081, "learning_rate": 4.8814249543223125e-05, "loss": 1.3417, "step": 6580 }, { "epoch": 1.97, "grad_norm": 0.9631902575492859, "learning_rate": 4.8812460943752256e-05, "loss": 1.4113, "step": 6585 }, { "epoch": 1.97, "grad_norm": 0.7148537039756775, "learning_rate": 4.881067102914392e-05, "loss": 1.3135, "step": 6590 }, { "epoch": 1.97, "grad_norm": 2.7874670028686523, "learning_rate": 4.880887979949698e-05, "loss": 1.3566, "step": 6595 }, { "epoch": 1.97, "grad_norm": 0.9266506433486938, "learning_rate": 4.8807087254910344e-05, "loss": 1.3909, "step": 6600 }, { "epoch": 1.98, "grad_norm": 1.1458144187927246, "learning_rate": 4.880529339548303e-05, "loss": 1.4995, "step": 6605 }, { "epoch": 1.98, "grad_norm": 0.6967890858650208, "learning_rate": 4.8803498221314106e-05, "loss": 1.5207, "step": 6610 }, { "epoch": 1.98, "grad_norm": 1.142874836921692, "learning_rate": 4.880170173250272e-05, "loss": 1.335, "step": 6615 }, { "epoch": 1.98, "grad_norm": 2.030205726623535, "learning_rate": 4.879990392914809e-05, "loss": 1.2267, "step": 6620 }, { "epoch": 1.98, "grad_norm": 0.5845741033554077, "learning_rate": 4.8798104811349496e-05, "loss": 1.4528, "step": 6625 }, { "epoch": 1.98, "grad_norm": 1.0889317989349365, "learning_rate": 4.879630437920631e-05, "loss": 1.3085, "step": 6630 }, { "epoch": 1.99, "grad_norm": 1.1721889972686768, "learning_rate": 4.8794502632817983e-05, "loss": 1.3531, "step": 6635 }, { "epoch": 1.99, "grad_norm": 1.2524924278259277, "learning_rate": 4.8792699572283996e-05, "loss": 1.482, "step": 6640 }, { "epoch": 1.99, "grad_norm": 0.9633756875991821, "learning_rate": 4.879089519770395e-05, "loss": 1.3769, "step": 6645 }, { "epoch": 1.99, "grad_norm": 1.5347559452056885, "learning_rate": 4.8789089509177485e-05, "loss": 1.3181, "step": 6650 }, { "epoch": 1.99, "grad_norm": 0.992938220500946, "learning_rate": 4.878728250680433e-05, "loss": 1.376, "step": 6655 }, { "epoch": 1.99, "grad_norm": 1.4422589540481567, "learning_rate": 4.87854741906843e-05, "loss": 1.2264, "step": 6660 }, { "epoch": 1.99, "grad_norm": 1.2222764492034912, "learning_rate": 4.878366456091724e-05, "loss": 1.3726, "step": 6665 }, { "epoch": 2.0, "grad_norm": 0.5847399830818176, "learning_rate": 4.8781853617603116e-05, "loss": 1.3855, "step": 6670 }, { "epoch": 2.0, "grad_norm": 1.1310397386550903, "learning_rate": 4.878004136084194e-05, "loss": 1.4062, "step": 6675 }, { "epoch": 2.0, "grad_norm": 2.0542705059051514, "learning_rate": 4.877822779073379e-05, "loss": 1.641, "step": 6680 }, { "epoch": 2.0, "grad_norm": 1.2997204065322876, "learning_rate": 4.877641290737884e-05, "loss": 1.4196, "step": 6685 }, { "epoch": 2.0, "grad_norm": 0.9382727146148682, "learning_rate": 4.8774596710877315e-05, "loss": 1.3389, "step": 6690 }, { "epoch": 2.0, "grad_norm": 0.9488927125930786, "learning_rate": 4.877277920132953e-05, "loss": 1.1815, "step": 6695 }, { "epoch": 2.0, "grad_norm": 0.7831886410713196, "learning_rate": 4.877096037883586e-05, "loss": 1.2372, "step": 6700 }, { "epoch": 2.01, "grad_norm": 1.236717939376831, "learning_rate": 4.876914024349676e-05, "loss": 1.3009, "step": 6705 }, { "epoch": 2.01, "grad_norm": 0.9065530896186829, "learning_rate": 4.8767318795412746e-05, "loss": 1.4108, "step": 6710 }, { "epoch": 2.01, "grad_norm": 0.7689698934555054, "learning_rate": 4.876549603468442e-05, "loss": 1.3159, "step": 6715 }, { "epoch": 2.01, "grad_norm": 1.1612657308578491, "learning_rate": 4.876367196141245e-05, "loss": 1.4331, "step": 6720 }, { "epoch": 2.01, "grad_norm": 0.7282540798187256, "learning_rate": 4.876184657569758e-05, "loss": 1.2104, "step": 6725 }, { "epoch": 2.01, "grad_norm": 1.3885889053344727, "learning_rate": 4.876001987764063e-05, "loss": 1.3829, "step": 6730 }, { "epoch": 2.02, "grad_norm": 1.1671245098114014, "learning_rate": 4.8758191867342465e-05, "loss": 1.3192, "step": 6735 }, { "epoch": 2.02, "grad_norm": 1.369300365447998, "learning_rate": 4.875636254490406e-05, "loss": 1.202, "step": 6740 }, { "epoch": 2.02, "grad_norm": 0.8616713881492615, "learning_rate": 4.875453191042646e-05, "loss": 1.3398, "step": 6745 }, { "epoch": 2.02, "grad_norm": 1.1366583108901978, "learning_rate": 4.875269996401074e-05, "loss": 1.306, "step": 6750 }, { "epoch": 2.02, "grad_norm": 1.2424169778823853, "learning_rate": 4.8750866705758106e-05, "loss": 1.2174, "step": 6755 }, { "epoch": 2.02, "grad_norm": 1.3406630754470825, "learning_rate": 4.874903213576977e-05, "loss": 1.2905, "step": 6760 }, { "epoch": 2.02, "grad_norm": 0.9850149750709534, "learning_rate": 4.874719625414709e-05, "loss": 1.4, "step": 6765 }, { "epoch": 2.03, "grad_norm": 1.6209102869033813, "learning_rate": 4.874535906099144e-05, "loss": 1.347, "step": 6770 }, { "epoch": 2.03, "grad_norm": 1.3967558145523071, "learning_rate": 4.874352055640429e-05, "loss": 1.2893, "step": 6775 }, { "epoch": 2.03, "grad_norm": 1.4221364259719849, "learning_rate": 4.874204880857213e-05, "loss": 1.3171, "step": 6780 }, { "epoch": 2.03, "grad_norm": 1.1001510620117188, "learning_rate": 4.8740207943664204e-05, "loss": 1.2465, "step": 6785 }, { "epoch": 2.03, "grad_norm": 0.5954471230506897, "learning_rate": 4.8738365767609275e-05, "loss": 1.2439, "step": 6790 }, { "epoch": 2.03, "grad_norm": 0.8812218308448792, "learning_rate": 4.873652228050908e-05, "loss": 1.3691, "step": 6795 }, { "epoch": 2.03, "grad_norm": 1.045620083808899, "learning_rate": 4.873467748246543e-05, "loss": 1.2654, "step": 6800 }, { "epoch": 2.04, "grad_norm": 1.3172338008880615, "learning_rate": 4.8732831373580216e-05, "loss": 1.3099, "step": 6805 }, { "epoch": 2.04, "grad_norm": 0.8830695748329163, "learning_rate": 4.873098395395539e-05, "loss": 1.1458, "step": 6810 }, { "epoch": 2.04, "grad_norm": 1.0735907554626465, "learning_rate": 4.872913522369299e-05, "loss": 1.2193, "step": 6815 }, { "epoch": 2.04, "grad_norm": 1.7402642965316772, "learning_rate": 4.8727285182895124e-05, "loss": 1.3219, "step": 6820 }, { "epoch": 2.04, "grad_norm": 1.2762502431869507, "learning_rate": 4.8725433831663944e-05, "loss": 1.2906, "step": 6825 }, { "epoch": 2.04, "grad_norm": 0.894281804561615, "learning_rate": 4.872358117010173e-05, "loss": 1.4766, "step": 6830 }, { "epoch": 2.04, "grad_norm": 1.1440064907073975, "learning_rate": 4.872172719831078e-05, "loss": 1.2786, "step": 6835 }, { "epoch": 2.05, "grad_norm": 1.659610629081726, "learning_rate": 4.8719871916393495e-05, "loss": 1.4152, "step": 6840 }, { "epoch": 2.05, "grad_norm": 1.5597167015075684, "learning_rate": 4.8718015324452336e-05, "loss": 1.2945, "step": 6845 }, { "epoch": 2.05, "grad_norm": 1.578195571899414, "learning_rate": 4.8716157422589855e-05, "loss": 1.4838, "step": 6850 }, { "epoch": 2.05, "grad_norm": 0.8329493403434753, "learning_rate": 4.8714298210908646e-05, "loss": 1.2861, "step": 6855 }, { "epoch": 2.05, "grad_norm": 1.347894310951233, "learning_rate": 4.8712437689511395e-05, "loss": 1.3531, "step": 6860 }, { "epoch": 2.05, "grad_norm": 2.246103286743164, "learning_rate": 4.871057585850085e-05, "loss": 1.3114, "step": 6865 }, { "epoch": 2.06, "grad_norm": 0.8596112132072449, "learning_rate": 4.870871271797986e-05, "loss": 1.4278, "step": 6870 }, { "epoch": 2.06, "grad_norm": 0.986096203327179, "learning_rate": 4.8706848268051305e-05, "loss": 1.3325, "step": 6875 }, { "epoch": 2.06, "grad_norm": 1.3321117162704468, "learning_rate": 4.870498250881816e-05, "loss": 1.3678, "step": 6880 }, { "epoch": 2.06, "grad_norm": 1.3195933103561401, "learning_rate": 4.8703115440383474e-05, "loss": 1.2902, "step": 6885 }, { "epoch": 2.06, "grad_norm": 0.8268415927886963, "learning_rate": 4.8701247062850355e-05, "loss": 1.2304, "step": 6890 }, { "epoch": 2.06, "grad_norm": 0.8425890207290649, "learning_rate": 4.8699377376322e-05, "loss": 1.3488, "step": 6895 }, { "epoch": 2.06, "grad_norm": 1.5140923261642456, "learning_rate": 4.869750638090167e-05, "loss": 1.3143, "step": 6900 }, { "epoch": 2.07, "grad_norm": 1.7727601528167725, "learning_rate": 4.8695634076692696e-05, "loss": 1.4197, "step": 6905 }, { "epoch": 2.07, "grad_norm": 1.5425249338150024, "learning_rate": 4.8693760463798476e-05, "loss": 1.2215, "step": 6910 }, { "epoch": 2.07, "grad_norm": 1.0112886428833008, "learning_rate": 4.869188554232249e-05, "loss": 1.4421, "step": 6915 }, { "epoch": 2.07, "grad_norm": 0.9172187447547913, "learning_rate": 4.86900093123683e-05, "loss": 1.4047, "step": 6920 }, { "epoch": 2.07, "grad_norm": 1.2613557577133179, "learning_rate": 4.868813177403952e-05, "loss": 1.1333, "step": 6925 }, { "epoch": 2.07, "grad_norm": 0.811103880405426, "learning_rate": 4.8686252927439844e-05, "loss": 1.2167, "step": 6930 }, { "epoch": 2.07, "grad_norm": 2.1426024436950684, "learning_rate": 4.868437277267304e-05, "loss": 1.4477, "step": 6935 }, { "epoch": 2.08, "grad_norm": 0.6741098165512085, "learning_rate": 4.868249130984294e-05, "loss": 1.4966, "step": 6940 }, { "epoch": 2.08, "grad_norm": 0.6223343014717102, "learning_rate": 4.868060853905346e-05, "loss": 1.3415, "step": 6945 }, { "epoch": 2.08, "grad_norm": 1.3729140758514404, "learning_rate": 4.86787244604086e-05, "loss": 1.2362, "step": 6950 }, { "epoch": 2.08, "grad_norm": 1.3088891506195068, "learning_rate": 4.8676839074012385e-05, "loss": 1.4647, "step": 6955 }, { "epoch": 2.08, "grad_norm": 1.7941012382507324, "learning_rate": 4.867495237996897e-05, "loss": 1.2942, "step": 6960 }, { "epoch": 2.08, "grad_norm": 1.6377042531967163, "learning_rate": 4.867306437838254e-05, "loss": 1.4394, "step": 6965 }, { "epoch": 2.09, "grad_norm": 1.298292636871338, "learning_rate": 4.867117506935737e-05, "loss": 1.3146, "step": 6970 }, { "epoch": 2.09, "grad_norm": 1.6398062705993652, "learning_rate": 4.8669284452997795e-05, "loss": 1.1931, "step": 6975 }, { "epoch": 2.09, "grad_norm": 0.8299776911735535, "learning_rate": 4.866739252940826e-05, "loss": 1.3339, "step": 6980 }, { "epoch": 2.09, "grad_norm": 1.9005075693130493, "learning_rate": 4.866549929869323e-05, "loss": 1.289, "step": 6985 }, { "epoch": 2.09, "grad_norm": 0.9177480340003967, "learning_rate": 4.866360476095727e-05, "loss": 1.4014, "step": 6990 }, { "epoch": 2.09, "grad_norm": 1.6924612522125244, "learning_rate": 4.866170891630502e-05, "loss": 1.2609, "step": 6995 }, { "epoch": 2.09, "grad_norm": 0.8772937059402466, "learning_rate": 4.865981176484118e-05, "loss": 1.2517, "step": 7000 }, { "epoch": 2.1, "grad_norm": 1.7310841083526611, "learning_rate": 4.865791330667053e-05, "loss": 1.3634, "step": 7005 }, { "epoch": 2.1, "grad_norm": 1.4149776697158813, "learning_rate": 4.865601354189792e-05, "loss": 1.28, "step": 7010 }, { "epoch": 2.1, "grad_norm": 0.9106690287590027, "learning_rate": 4.865411247062827e-05, "loss": 1.4158, "step": 7015 }, { "epoch": 2.1, "grad_norm": 1.424204707145691, "learning_rate": 4.865221009296657e-05, "loss": 1.4474, "step": 7020 }, { "epoch": 2.1, "grad_norm": 0.823806881904602, "learning_rate": 4.86503064090179e-05, "loss": 1.1959, "step": 7025 }, { "epoch": 2.1, "grad_norm": 1.7646584510803223, "learning_rate": 4.864840141888739e-05, "loss": 1.2428, "step": 7030 }, { "epoch": 2.1, "grad_norm": 0.7892530560493469, "learning_rate": 4.864649512268024e-05, "loss": 1.4816, "step": 7035 }, { "epoch": 2.11, "grad_norm": 1.802615761756897, "learning_rate": 4.864458752050175e-05, "loss": 1.2827, "step": 7040 }, { "epoch": 2.11, "grad_norm": 1.3489775657653809, "learning_rate": 4.864267861245727e-05, "loss": 1.3001, "step": 7045 }, { "epoch": 2.11, "grad_norm": 1.7209609746932983, "learning_rate": 4.8640768398652224e-05, "loss": 1.2558, "step": 7050 }, { "epoch": 2.11, "grad_norm": 1.2249550819396973, "learning_rate": 4.863885687919212e-05, "loss": 1.4765, "step": 7055 }, { "epoch": 2.11, "grad_norm": 1.2855435609817505, "learning_rate": 4.863694405418251e-05, "loss": 1.1862, "step": 7060 }, { "epoch": 2.11, "grad_norm": 1.4414563179016113, "learning_rate": 4.863502992372906e-05, "loss": 1.1531, "step": 7065 }, { "epoch": 2.12, "grad_norm": 0.9723905920982361, "learning_rate": 4.863311448793746e-05, "loss": 1.3402, "step": 7070 }, { "epoch": 2.12, "grad_norm": 1.274014949798584, "learning_rate": 4.8631197746913525e-05, "loss": 1.5264, "step": 7075 }, { "epoch": 2.12, "grad_norm": 0.9938822388648987, "learning_rate": 4.86292797007631e-05, "loss": 1.2517, "step": 7080 }, { "epoch": 2.12, "grad_norm": 2.461939811706543, "learning_rate": 4.862736034959211e-05, "loss": 1.3553, "step": 7085 }, { "epoch": 2.12, "grad_norm": 0.8186405897140503, "learning_rate": 4.8625439693506576e-05, "loss": 1.3186, "step": 7090 }, { "epoch": 2.12, "grad_norm": 2.9292609691619873, "learning_rate": 4.862351773261256e-05, "loss": 1.3257, "step": 7095 }, { "epoch": 2.12, "grad_norm": 0.6807072758674622, "learning_rate": 4.8621594467016216e-05, "loss": 1.2733, "step": 7100 }, { "epoch": 2.13, "grad_norm": 0.9890395402908325, "learning_rate": 4.8619669896823766e-05, "loss": 1.4189, "step": 7105 }, { "epoch": 2.13, "grad_norm": 0.6927660703659058, "learning_rate": 4.8617744022141496e-05, "loss": 1.0253, "step": 7110 }, { "epoch": 2.13, "grad_norm": 2.2300829887390137, "learning_rate": 4.861581684307577e-05, "loss": 1.3179, "step": 7115 }, { "epoch": 2.13, "grad_norm": 2.7834036350250244, "learning_rate": 4.8613888359733035e-05, "loss": 1.4093, "step": 7120 }, { "epoch": 2.13, "grad_norm": 1.634334683418274, "learning_rate": 4.861195857221978e-05, "loss": 1.3292, "step": 7125 }, { "epoch": 2.13, "grad_norm": 2.0878982543945312, "learning_rate": 4.86100274806426e-05, "loss": 1.2545, "step": 7130 }, { "epoch": 2.13, "grad_norm": 1.6050611734390259, "learning_rate": 4.8608095085108155e-05, "loss": 1.2622, "step": 7135 }, { "epoch": 2.14, "grad_norm": 1.9072321653366089, "learning_rate": 4.8606161385723147e-05, "loss": 1.2726, "step": 7140 }, { "epoch": 2.14, "grad_norm": 1.5780624151229858, "learning_rate": 4.860422638259439e-05, "loss": 1.3947, "step": 7145 }, { "epoch": 2.14, "grad_norm": 2.0767362117767334, "learning_rate": 4.860229007582874e-05, "loss": 1.4344, "step": 7150 }, { "epoch": 2.14, "grad_norm": 1.11172354221344, "learning_rate": 4.860035246553314e-05, "loss": 1.3602, "step": 7155 }, { "epoch": 2.14, "grad_norm": 0.8127298951148987, "learning_rate": 4.859841355181461e-05, "loss": 1.351, "step": 7160 }, { "epoch": 2.14, "grad_norm": 1.1209958791732788, "learning_rate": 4.8596473334780225e-05, "loss": 1.279, "step": 7165 }, { "epoch": 2.15, "grad_norm": 1.7135673761367798, "learning_rate": 4.859453181453715e-05, "loss": 1.5372, "step": 7170 }, { "epoch": 2.15, "grad_norm": 1.5923312902450562, "learning_rate": 4.85925889911926e-05, "loss": 1.3879, "step": 7175 }, { "epoch": 2.15, "grad_norm": 1.2418073415756226, "learning_rate": 4.8590644864853886e-05, "loss": 1.3691, "step": 7180 }, { "epoch": 2.15, "grad_norm": 0.9829604625701904, "learning_rate": 4.858869943562838e-05, "loss": 1.3597, "step": 7185 }, { "epoch": 2.15, "grad_norm": 1.1962437629699707, "learning_rate": 4.8586752703623516e-05, "loss": 1.3715, "step": 7190 }, { "epoch": 2.15, "grad_norm": 1.7202482223510742, "learning_rate": 4.8584804668946825e-05, "loss": 1.446, "step": 7195 }, { "epoch": 2.15, "grad_norm": 2.16395902633667, "learning_rate": 4.858285533170589e-05, "loss": 1.2798, "step": 7200 }, { "epoch": 2.16, "grad_norm": 0.6793248057365417, "learning_rate": 4.858090469200835e-05, "loss": 1.3275, "step": 7205 }, { "epoch": 2.16, "grad_norm": 0.6636670231819153, "learning_rate": 4.8578952749961974e-05, "loss": 1.433, "step": 7210 }, { "epoch": 2.16, "grad_norm": 1.3391855955123901, "learning_rate": 4.8576999505674546e-05, "loss": 1.4, "step": 7215 }, { "epoch": 2.16, "grad_norm": 0.8590707182884216, "learning_rate": 4.857504495925393e-05, "loss": 1.4144, "step": 7220 }, { "epoch": 2.16, "grad_norm": 1.8004813194274902, "learning_rate": 4.85730891108081e-05, "loss": 1.2784, "step": 7225 }, { "epoch": 2.16, "grad_norm": 1.125076413154602, "learning_rate": 4.8571131960445046e-05, "loss": 1.3068, "step": 7230 }, { "epoch": 2.16, "grad_norm": 0.909472644329071, "learning_rate": 4.856917350827289e-05, "loss": 1.3304, "step": 7235 }, { "epoch": 2.17, "grad_norm": 1.6477268934249878, "learning_rate": 4.8567213754399764e-05, "loss": 1.2217, "step": 7240 }, { "epoch": 2.17, "grad_norm": 0.9648934006690979, "learning_rate": 4.856525269893393e-05, "loss": 1.4223, "step": 7245 }, { "epoch": 2.17, "grad_norm": 2.0322964191436768, "learning_rate": 4.856329034198368e-05, "loss": 1.4918, "step": 7250 }, { "epoch": 2.17, "grad_norm": 1.6450649499893188, "learning_rate": 4.8561326683657405e-05, "loss": 1.3366, "step": 7255 }, { "epoch": 2.17, "grad_norm": 1.0612916946411133, "learning_rate": 4.855936172406354e-05, "loss": 1.2437, "step": 7260 }, { "epoch": 2.17, "grad_norm": 1.8631542921066284, "learning_rate": 4.855739546331062e-05, "loss": 1.3482, "step": 7265 }, { "epoch": 2.18, "grad_norm": 2.2706685066223145, "learning_rate": 4.855542790150723e-05, "loss": 1.3031, "step": 7270 }, { "epoch": 2.18, "grad_norm": 1.1304792165756226, "learning_rate": 4.855345903876204e-05, "loss": 1.4319, "step": 7275 }, { "epoch": 2.18, "grad_norm": 1.4051589965820312, "learning_rate": 4.8551488875183794e-05, "loss": 1.2979, "step": 7280 }, { "epoch": 2.18, "grad_norm": 1.5046559572219849, "learning_rate": 4.8549517410881296e-05, "loss": 1.3051, "step": 7285 }, { "epoch": 2.18, "grad_norm": 1.2949678897857666, "learning_rate": 4.8547544645963435e-05, "loss": 1.4025, "step": 7290 }, { "epoch": 2.18, "grad_norm": 1.3546310663223267, "learning_rate": 4.854557058053915e-05, "loss": 1.2204, "step": 7295 }, { "epoch": 2.18, "grad_norm": 1.055916666984558, "learning_rate": 4.8543595214717486e-05, "loss": 1.3216, "step": 7300 }, { "epoch": 2.19, "grad_norm": 1.6973356008529663, "learning_rate": 4.8541618548607525e-05, "loss": 1.1717, "step": 7305 }, { "epoch": 2.19, "grad_norm": 0.9131316542625427, "learning_rate": 4.853964058231844e-05, "loss": 1.224, "step": 7310 }, { "epoch": 2.19, "grad_norm": 1.3941514492034912, "learning_rate": 4.853766131595948e-05, "loss": 1.4302, "step": 7315 }, { "epoch": 2.19, "grad_norm": 0.6351519823074341, "learning_rate": 4.853568074963994e-05, "loss": 1.3049, "step": 7320 }, { "epoch": 2.19, "grad_norm": 1.1367075443267822, "learning_rate": 4.853369888346923e-05, "loss": 1.4376, "step": 7325 }, { "epoch": 2.19, "grad_norm": 1.2908804416656494, "learning_rate": 4.853171571755679e-05, "loss": 1.3539, "step": 7330 }, { "epoch": 2.19, "grad_norm": 0.7789058089256287, "learning_rate": 4.8529731252012145e-05, "loss": 1.409, "step": 7335 }, { "epoch": 2.2, "grad_norm": 1.526838779449463, "learning_rate": 4.8527745486944906e-05, "loss": 1.3515, "step": 7340 }, { "epoch": 2.2, "grad_norm": 1.1574357748031616, "learning_rate": 4.852575842246474e-05, "loss": 1.3984, "step": 7345 }, { "epoch": 2.2, "grad_norm": 2.089668035507202, "learning_rate": 4.852377005868138e-05, "loss": 1.3556, "step": 7350 }, { "epoch": 2.2, "grad_norm": 1.4345306158065796, "learning_rate": 4.852178039570466e-05, "loss": 1.2993, "step": 7355 }, { "epoch": 2.2, "grad_norm": 1.0445202589035034, "learning_rate": 4.851978943364446e-05, "loss": 1.4464, "step": 7360 }, { "epoch": 2.2, "grad_norm": 2.4098684787750244, "learning_rate": 4.851779717261072e-05, "loss": 1.4884, "step": 7365 }, { "epoch": 2.21, "grad_norm": 0.9191934466362, "learning_rate": 4.851580361271351e-05, "loss": 1.5069, "step": 7370 }, { "epoch": 2.21, "grad_norm": 1.705700397491455, "learning_rate": 4.8513808754062894e-05, "loss": 1.2528, "step": 7375 }, { "epoch": 2.21, "grad_norm": 1.1692149639129639, "learning_rate": 4.851181259676907e-05, "loss": 1.3765, "step": 7380 }, { "epoch": 2.21, "grad_norm": 3.2977681159973145, "learning_rate": 4.850981514094228e-05, "loss": 1.2544, "step": 7385 }, { "epoch": 2.21, "grad_norm": 1.1639879941940308, "learning_rate": 4.850781638669283e-05, "loss": 1.342, "step": 7390 }, { "epoch": 2.21, "grad_norm": 0.9928296208381653, "learning_rate": 4.8505816334131116e-05, "loss": 1.2802, "step": 7395 }, { "epoch": 2.21, "grad_norm": 0.9788908958435059, "learning_rate": 4.85038149833676e-05, "loss": 1.39, "step": 7400 }, { "epoch": 2.22, "grad_norm": 1.9940471649169922, "learning_rate": 4.850181233451281e-05, "loss": 1.354, "step": 7405 }, { "epoch": 2.22, "grad_norm": 1.0245496034622192, "learning_rate": 4.849980838767736e-05, "loss": 1.3212, "step": 7410 }, { "epoch": 2.22, "grad_norm": 2.179013967514038, "learning_rate": 4.849780314297191e-05, "loss": 1.5085, "step": 7415 }, { "epoch": 2.22, "grad_norm": 1.598319411277771, "learning_rate": 4.8495796600507226e-05, "loss": 1.4813, "step": 7420 }, { "epoch": 2.22, "grad_norm": 1.519370675086975, "learning_rate": 4.8493788760394115e-05, "loss": 1.1946, "step": 7425 }, { "epoch": 2.22, "grad_norm": 1.5374704599380493, "learning_rate": 4.849177962274347e-05, "loss": 1.5137, "step": 7430 }, { "epoch": 2.22, "grad_norm": 0.7265501022338867, "learning_rate": 4.8489769187666255e-05, "loss": 1.3113, "step": 7435 }, { "epoch": 2.23, "grad_norm": 1.2072696685791016, "learning_rate": 4.848775745527351e-05, "loss": 1.3973, "step": 7440 }, { "epoch": 2.23, "grad_norm": 1.156033992767334, "learning_rate": 4.848574442567633e-05, "loss": 1.4471, "step": 7445 }, { "epoch": 2.23, "grad_norm": 1.4983317852020264, "learning_rate": 4.848373009898589e-05, "loss": 1.3487, "step": 7450 }, { "epoch": 2.23, "grad_norm": 1.2913556098937988, "learning_rate": 4.848171447531346e-05, "loss": 1.3145, "step": 7455 }, { "epoch": 2.23, "grad_norm": 1.8600425720214844, "learning_rate": 4.847969755477034e-05, "loss": 1.3808, "step": 7460 }, { "epoch": 2.23, "grad_norm": 1.028699278831482, "learning_rate": 4.847767933746793e-05, "loss": 1.3718, "step": 7465 }, { "epoch": 2.23, "grad_norm": 1.1770877838134766, "learning_rate": 4.8475659823517695e-05, "loss": 1.4073, "step": 7470 }, { "epoch": 2.24, "grad_norm": 0.6877673268318176, "learning_rate": 4.847363901303117e-05, "loss": 1.3549, "step": 7475 }, { "epoch": 2.24, "grad_norm": 0.6186959743499756, "learning_rate": 4.847161690611996e-05, "loss": 1.2288, "step": 7480 }, { "epoch": 2.24, "grad_norm": 1.7008882761001587, "learning_rate": 4.846959350289575e-05, "loss": 1.1874, "step": 7485 }, { "epoch": 2.24, "grad_norm": 1.5733880996704102, "learning_rate": 4.846756880347029e-05, "loss": 1.3694, "step": 7490 }, { "epoch": 2.24, "grad_norm": 0.7959824800491333, "learning_rate": 4.846554280795539e-05, "loss": 1.3386, "step": 7495 }, { "epoch": 2.24, "grad_norm": 1.6827507019042969, "learning_rate": 4.8463515516462946e-05, "loss": 1.3071, "step": 7500 }, { "epoch": 2.25, "grad_norm": 1.2505954504013062, "learning_rate": 4.8461486929104936e-05, "loss": 1.3838, "step": 7505 }, { "epoch": 2.25, "grad_norm": 0.9889876246452332, "learning_rate": 4.8459457045993396e-05, "loss": 1.4403, "step": 7510 }, { "epoch": 2.25, "grad_norm": 1.9707186222076416, "learning_rate": 4.845742586724042e-05, "loss": 1.3676, "step": 7515 }, { "epoch": 2.25, "grad_norm": 2.145528554916382, "learning_rate": 4.845539339295819e-05, "loss": 1.5171, "step": 7520 }, { "epoch": 2.25, "grad_norm": 1.8875707387924194, "learning_rate": 4.845335962325897e-05, "loss": 1.236, "step": 7525 }, { "epoch": 2.25, "grad_norm": 1.5600254535675049, "learning_rate": 4.845132455825508e-05, "loss": 1.1585, "step": 7530 }, { "epoch": 2.25, "grad_norm": 1.4640624523162842, "learning_rate": 4.844928819805892e-05, "loss": 1.2519, "step": 7535 }, { "epoch": 2.26, "grad_norm": 0.7027156949043274, "learning_rate": 4.844725054278293e-05, "loss": 1.1937, "step": 7540 }, { "epoch": 2.26, "grad_norm": 1.2013248205184937, "learning_rate": 4.8445211592539674e-05, "loss": 1.3886, "step": 7545 }, { "epoch": 2.26, "grad_norm": 0.7057489156723022, "learning_rate": 4.844317134744174e-05, "loss": 1.3281, "step": 7550 }, { "epoch": 2.26, "grad_norm": 0.8883240818977356, "learning_rate": 4.8441129807601834e-05, "loss": 1.3181, "step": 7555 }, { "epoch": 2.26, "grad_norm": 1.322933554649353, "learning_rate": 4.8439086973132684e-05, "loss": 1.4801, "step": 7560 }, { "epoch": 2.26, "grad_norm": 2.008849859237671, "learning_rate": 4.843704284414713e-05, "loss": 1.4635, "step": 7565 }, { "epoch": 2.26, "grad_norm": 1.0462682247161865, "learning_rate": 4.8434997420758065e-05, "loss": 1.4163, "step": 7570 }, { "epoch": 2.27, "grad_norm": 1.419507622718811, "learning_rate": 4.843295070307844e-05, "loss": 1.366, "step": 7575 }, { "epoch": 2.27, "grad_norm": 1.5404951572418213, "learning_rate": 4.8430902691221314e-05, "loss": 1.4692, "step": 7580 }, { "epoch": 2.27, "grad_norm": 1.5255093574523926, "learning_rate": 4.842885338529979e-05, "loss": 1.5013, "step": 7585 }, { "epoch": 2.27, "grad_norm": 2.1905360221862793, "learning_rate": 4.842680278542704e-05, "loss": 1.3308, "step": 7590 }, { "epoch": 2.27, "grad_norm": 1.8828074932098389, "learning_rate": 4.842475089171632e-05, "loss": 1.2902, "step": 7595 }, { "epoch": 2.27, "grad_norm": 1.31754732131958, "learning_rate": 4.842269770428096e-05, "loss": 1.2712, "step": 7600 }, { "epoch": 2.28, "grad_norm": 1.3499248027801514, "learning_rate": 4.842064322323436e-05, "loss": 1.419, "step": 7605 }, { "epoch": 2.28, "grad_norm": 2.2709991931915283, "learning_rate": 4.841858744868997e-05, "loss": 1.3574, "step": 7610 }, { "epoch": 2.28, "grad_norm": 0.6626339554786682, "learning_rate": 4.8416530380761335e-05, "loss": 1.3227, "step": 7615 }, { "epoch": 2.28, "grad_norm": 1.2082407474517822, "learning_rate": 4.841447201956208e-05, "loss": 1.1635, "step": 7620 }, { "epoch": 2.28, "grad_norm": 1.1640417575836182, "learning_rate": 4.841241236520586e-05, "loss": 1.2477, "step": 7625 }, { "epoch": 2.28, "grad_norm": 1.2419222593307495, "learning_rate": 4.8410351417806454e-05, "loss": 1.1595, "step": 7630 }, { "epoch": 2.28, "grad_norm": 2.1427175998687744, "learning_rate": 4.840828917747766e-05, "loss": 1.3203, "step": 7635 }, { "epoch": 2.29, "grad_norm": 1.4415032863616943, "learning_rate": 4.8406225644333395e-05, "loss": 1.2786, "step": 7640 }, { "epoch": 2.29, "grad_norm": 1.9205933809280396, "learning_rate": 4.8404160818487615e-05, "loss": 1.3197, "step": 7645 }, { "epoch": 2.29, "grad_norm": 2.3150992393493652, "learning_rate": 4.840209470005436e-05, "loss": 1.3841, "step": 7650 }, { "epoch": 2.29, "grad_norm": 2.114997148513794, "learning_rate": 4.8400027289147746e-05, "loss": 1.4485, "step": 7655 }, { "epoch": 2.29, "grad_norm": 1.2688688039779663, "learning_rate": 4.8397958585881934e-05, "loss": 1.2421, "step": 7660 }, { "epoch": 2.29, "grad_norm": 2.3500120639801025, "learning_rate": 4.83958885903712e-05, "loss": 1.4009, "step": 7665 }, { "epoch": 2.29, "grad_norm": 0.87721848487854, "learning_rate": 4.839381730272985e-05, "loss": 1.2508, "step": 7670 }, { "epoch": 2.3, "grad_norm": 2.1031057834625244, "learning_rate": 4.83917447230723e-05, "loss": 1.5297, "step": 7675 }, { "epoch": 2.3, "grad_norm": 0.6989408731460571, "learning_rate": 4.838967085151299e-05, "loss": 1.1791, "step": 7680 }, { "epoch": 2.3, "grad_norm": 1.4262398481369019, "learning_rate": 4.8387595688166474e-05, "loss": 1.4372, "step": 7685 }, { "epoch": 2.3, "grad_norm": 1.9426538944244385, "learning_rate": 4.8385519233147355e-05, "loss": 1.2325, "step": 7690 }, { "epoch": 2.3, "grad_norm": 1.5246938467025757, "learning_rate": 4.838344148657033e-05, "loss": 1.3084, "step": 7695 }, { "epoch": 2.3, "grad_norm": 1.8816734552383423, "learning_rate": 4.8381362448550126e-05, "loss": 1.3766, "step": 7700 }, { "epoch": 2.31, "grad_norm": 1.2604855298995972, "learning_rate": 4.837928211920159e-05, "loss": 1.2206, "step": 7705 }, { "epoch": 2.31, "grad_norm": 1.575439691543579, "learning_rate": 4.837720049863958e-05, "loss": 1.4169, "step": 7710 }, { "epoch": 2.31, "grad_norm": 0.9932951331138611, "learning_rate": 4.837511758697911e-05, "loss": 1.3783, "step": 7715 }, { "epoch": 2.31, "grad_norm": 0.9457315802574158, "learning_rate": 4.8373033384335185e-05, "loss": 1.3412, "step": 7720 }, { "epoch": 2.31, "grad_norm": 1.7361222505569458, "learning_rate": 4.8370947890822914e-05, "loss": 1.3247, "step": 7725 }, { "epoch": 2.31, "grad_norm": 0.6409812569618225, "learning_rate": 4.8368861106557494e-05, "loss": 1.3878, "step": 7730 }, { "epoch": 2.31, "grad_norm": 1.8458154201507568, "learning_rate": 4.8366773031654155e-05, "loss": 1.3113, "step": 7735 }, { "epoch": 2.32, "grad_norm": 1.0293338298797607, "learning_rate": 4.836468366622824e-05, "loss": 1.5338, "step": 7740 }, { "epoch": 2.32, "grad_norm": 1.2134134769439697, "learning_rate": 4.836259301039513e-05, "loss": 1.4984, "step": 7745 }, { "epoch": 2.32, "grad_norm": 1.1873372793197632, "learning_rate": 4.8360501064270293e-05, "loss": 1.2362, "step": 7750 }, { "epoch": 2.32, "grad_norm": 2.671053647994995, "learning_rate": 4.835840782796925e-05, "loss": 1.4207, "step": 7755 }, { "epoch": 2.32, "grad_norm": 1.1975551843643188, "learning_rate": 4.835631330160764e-05, "loss": 1.3198, "step": 7760 }, { "epoch": 2.32, "grad_norm": 1.6073672771453857, "learning_rate": 4.835421748530112e-05, "loss": 1.2571, "step": 7765 }, { "epoch": 2.32, "grad_norm": 1.3273591995239258, "learning_rate": 4.8352120379165444e-05, "loss": 1.5365, "step": 7770 }, { "epoch": 2.33, "grad_norm": 1.3509129285812378, "learning_rate": 4.835002198331643e-05, "loss": 1.4124, "step": 7775 }, { "epoch": 2.33, "grad_norm": 1.4344903230667114, "learning_rate": 4.834792229786997e-05, "loss": 1.2365, "step": 7780 }, { "epoch": 2.33, "grad_norm": 0.8779138922691345, "learning_rate": 4.834582132294203e-05, "loss": 1.3173, "step": 7785 }, { "epoch": 2.33, "grad_norm": 1.2786595821380615, "learning_rate": 4.834371905864865e-05, "loss": 1.5267, "step": 7790 }, { "epoch": 2.33, "grad_norm": 1.5323184728622437, "learning_rate": 4.834161550510593e-05, "loss": 1.2912, "step": 7795 }, { "epoch": 2.33, "grad_norm": 0.7492801547050476, "learning_rate": 4.8339510662430046e-05, "loss": 1.3253, "step": 7800 }, { "epoch": 2.34, "grad_norm": 0.9587426781654358, "learning_rate": 4.833740453073725e-05, "loss": 1.3364, "step": 7805 }, { "epoch": 2.34, "grad_norm": 1.607540488243103, "learning_rate": 4.833529711014386e-05, "loss": 1.2618, "step": 7810 }, { "epoch": 2.34, "grad_norm": 1.6319369077682495, "learning_rate": 4.833318840076626e-05, "loss": 1.2573, "step": 7815 }, { "epoch": 2.34, "grad_norm": 1.2608624696731567, "learning_rate": 4.833107840272092e-05, "loss": 1.1074, "step": 7820 }, { "epoch": 2.34, "grad_norm": 0.8057272434234619, "learning_rate": 4.832896711612438e-05, "loss": 1.3739, "step": 7825 }, { "epoch": 2.34, "grad_norm": 1.2614953517913818, "learning_rate": 4.832685454109323e-05, "loss": 1.4891, "step": 7830 }, { "epoch": 2.34, "grad_norm": 1.4242806434631348, "learning_rate": 4.8324740677744154e-05, "loss": 1.254, "step": 7835 }, { "epoch": 2.35, "grad_norm": 1.3085395097732544, "learning_rate": 4.832262552619389e-05, "loss": 1.3165, "step": 7840 }, { "epoch": 2.35, "grad_norm": 1.3229748010635376, "learning_rate": 4.832050908655926e-05, "loss": 1.4134, "step": 7845 }, { "epoch": 2.35, "grad_norm": 2.4007372856140137, "learning_rate": 4.831839135895716e-05, "loss": 1.3104, "step": 7850 }, { "epoch": 2.35, "grad_norm": 1.3286653757095337, "learning_rate": 4.831627234350453e-05, "loss": 1.2269, "step": 7855 }, { "epoch": 2.35, "grad_norm": 1.3296312093734741, "learning_rate": 4.831415204031843e-05, "loss": 1.3776, "step": 7860 }, { "epoch": 2.35, "grad_norm": 0.8926234841346741, "learning_rate": 4.831203044951593e-05, "loss": 1.3842, "step": 7865 }, { "epoch": 2.35, "grad_norm": 1.3321502208709717, "learning_rate": 4.8309907571214234e-05, "loss": 1.2039, "step": 7870 }, { "epoch": 2.36, "grad_norm": 0.7737197875976562, "learning_rate": 4.830778340553057e-05, "loss": 1.3269, "step": 7875 }, { "epoch": 2.36, "grad_norm": 1.7216429710388184, "learning_rate": 4.830565795258225e-05, "loss": 1.4176, "step": 7880 }, { "epoch": 2.36, "grad_norm": 7.1848835945129395, "learning_rate": 4.830353121248667e-05, "loss": 1.429, "step": 7885 }, { "epoch": 2.36, "grad_norm": 1.1508842706680298, "learning_rate": 4.830140318536128e-05, "loss": 1.361, "step": 7890 }, { "epoch": 2.36, "grad_norm": 0.9978459477424622, "learning_rate": 4.829927387132362e-05, "loss": 1.2397, "step": 7895 }, { "epoch": 2.36, "grad_norm": 1.3493157625198364, "learning_rate": 4.829714327049127e-05, "loss": 1.3463, "step": 7900 }, { "epoch": 2.37, "grad_norm": 1.3078923225402832, "learning_rate": 4.829501138298192e-05, "loss": 1.3451, "step": 7905 }, { "epoch": 2.37, "grad_norm": 1.330299735069275, "learning_rate": 4.829287820891332e-05, "loss": 1.4147, "step": 7910 }, { "epoch": 2.37, "grad_norm": 1.1133418083190918, "learning_rate": 4.829074374840325e-05, "loss": 1.2857, "step": 7915 }, { "epoch": 2.37, "grad_norm": 1.8395391702651978, "learning_rate": 4.828860800156961e-05, "loss": 1.262, "step": 7920 }, { "epoch": 2.37, "grad_norm": 1.1168737411499023, "learning_rate": 4.8286470968530375e-05, "loss": 1.364, "step": 7925 }, { "epoch": 2.37, "grad_norm": 1.8225456476211548, "learning_rate": 4.8284332649403534e-05, "loss": 1.3554, "step": 7930 }, { "epoch": 2.37, "grad_norm": 1.4843734502792358, "learning_rate": 4.8282193044307213e-05, "loss": 1.3841, "step": 7935 }, { "epoch": 2.38, "grad_norm": 2.07279896736145, "learning_rate": 4.8280052153359565e-05, "loss": 1.1941, "step": 7940 }, { "epoch": 2.38, "grad_norm": 0.881043016910553, "learning_rate": 4.8277909976678847e-05, "loss": 1.2924, "step": 7945 }, { "epoch": 2.38, "grad_norm": 1.8441745042800903, "learning_rate": 4.8275766514383346e-05, "loss": 1.3775, "step": 7950 }, { "epoch": 2.38, "grad_norm": 1.2822072505950928, "learning_rate": 4.827362176659146e-05, "loss": 1.3072, "step": 7955 }, { "epoch": 2.38, "grad_norm": 0.9727440476417542, "learning_rate": 4.8271475733421636e-05, "loss": 1.4031, "step": 7960 }, { "epoch": 2.38, "grad_norm": 1.616499900817871, "learning_rate": 4.826932841499239e-05, "loss": 1.383, "step": 7965 }, { "epoch": 2.38, "grad_norm": 1.3687835931777954, "learning_rate": 4.826717981142233e-05, "loss": 1.2824, "step": 7970 }, { "epoch": 2.39, "grad_norm": 6.598733901977539, "learning_rate": 4.826502992283011e-05, "loss": 1.2172, "step": 7975 }, { "epoch": 2.39, "grad_norm": 1.1800307035446167, "learning_rate": 4.826287874933446e-05, "loss": 1.2832, "step": 7980 }, { "epoch": 2.39, "grad_norm": 0.8195118308067322, "learning_rate": 4.826072629105422e-05, "loss": 1.2309, "step": 7985 }, { "epoch": 2.39, "grad_norm": 3.522782564163208, "learning_rate": 4.8258572548108226e-05, "loss": 1.4097, "step": 7990 }, { "epoch": 2.39, "grad_norm": 1.1727079153060913, "learning_rate": 4.8256417520615446e-05, "loss": 1.507, "step": 7995 }, { "epoch": 2.39, "grad_norm": 0.7329881191253662, "learning_rate": 4.825426120869491e-05, "loss": 1.2724, "step": 8000 }, { "epoch": 2.4, "grad_norm": 1.2820346355438232, "learning_rate": 4.825210361246569e-05, "loss": 1.4193, "step": 8005 }, { "epoch": 2.4, "grad_norm": 3.400033712387085, "learning_rate": 4.824994473204697e-05, "loss": 1.2408, "step": 8010 }, { "epoch": 2.4, "grad_norm": 1.259271502494812, "learning_rate": 4.824778456755796e-05, "loss": 1.1783, "step": 8015 }, { "epoch": 2.4, "grad_norm": 1.4319287538528442, "learning_rate": 4.824562311911798e-05, "loss": 1.2565, "step": 8020 }, { "epoch": 2.4, "grad_norm": 1.389338493347168, "learning_rate": 4.824346038684638e-05, "loss": 1.4043, "step": 8025 }, { "epoch": 2.4, "grad_norm": 0.8132510185241699, "learning_rate": 4.824129637086264e-05, "loss": 1.3739, "step": 8030 }, { "epoch": 2.4, "grad_norm": 1.4358786344528198, "learning_rate": 4.823913107128626e-05, "loss": 1.2356, "step": 8035 }, { "epoch": 2.41, "grad_norm": 1.380957841873169, "learning_rate": 4.823696448823681e-05, "loss": 1.41, "step": 8040 }, { "epoch": 2.41, "grad_norm": 0.9512829780578613, "learning_rate": 4.823479662183398e-05, "loss": 1.1268, "step": 8045 }, { "epoch": 2.41, "grad_norm": 1.8032479286193848, "learning_rate": 4.823262747219749e-05, "loss": 1.3039, "step": 8050 }, { "epoch": 2.41, "grad_norm": 1.2832984924316406, "learning_rate": 4.823045703944712e-05, "loss": 1.3076, "step": 8055 }, { "epoch": 2.41, "grad_norm": 1.5915247201919556, "learning_rate": 4.8228285323702754e-05, "loss": 1.3724, "step": 8060 }, { "epoch": 2.41, "grad_norm": 0.7308753132820129, "learning_rate": 4.8226112325084335e-05, "loss": 1.1823, "step": 8065 }, { "epoch": 2.41, "grad_norm": 1.5579122304916382, "learning_rate": 4.822393804371188e-05, "loss": 1.2714, "step": 8070 }, { "epoch": 2.42, "grad_norm": 1.8810973167419434, "learning_rate": 4.822176247970547e-05, "loss": 1.3433, "step": 8075 }, { "epoch": 2.42, "grad_norm": 0.7145785689353943, "learning_rate": 4.821958563318524e-05, "loss": 1.2667, "step": 8080 }, { "epoch": 2.42, "grad_norm": 1.8525540828704834, "learning_rate": 4.8217407504271446e-05, "loss": 1.3189, "step": 8085 }, { "epoch": 2.42, "grad_norm": 1.6017142534255981, "learning_rate": 4.821522809308436e-05, "loss": 1.2954, "step": 8090 }, { "epoch": 2.42, "grad_norm": 1.1823608875274658, "learning_rate": 4.821304739974437e-05, "loss": 1.4232, "step": 8095 }, { "epoch": 2.42, "grad_norm": 1.2978624105453491, "learning_rate": 4.821086542437189e-05, "loss": 1.3639, "step": 8100 }, { "epoch": 2.42, "grad_norm": 0.7997733354568481, "learning_rate": 4.8208682167087436e-05, "loss": 1.3463, "step": 8105 }, { "epoch": 2.43, "grad_norm": 1.6938416957855225, "learning_rate": 4.820649762801159e-05, "loss": 1.3213, "step": 8110 }, { "epoch": 2.43, "grad_norm": 0.9703941941261292, "learning_rate": 4.820431180726501e-05, "loss": 1.2537, "step": 8115 }, { "epoch": 2.43, "grad_norm": 1.5119740962982178, "learning_rate": 4.820212470496841e-05, "loss": 1.2918, "step": 8120 }, { "epoch": 2.43, "grad_norm": 1.7154700756072998, "learning_rate": 4.8199936321242576e-05, "loss": 1.4548, "step": 8125 }, { "epoch": 2.43, "grad_norm": 0.7919926643371582, "learning_rate": 4.819774665620837e-05, "loss": 1.2193, "step": 8130 }, { "epoch": 2.43, "grad_norm": 1.4892973899841309, "learning_rate": 4.819555570998673e-05, "loss": 1.325, "step": 8135 }, { "epoch": 2.44, "grad_norm": 1.446234107017517, "learning_rate": 4.819336348269866e-05, "loss": 1.371, "step": 8140 }, { "epoch": 2.44, "grad_norm": 1.9358090162277222, "learning_rate": 4.8191169974465235e-05, "loss": 1.3127, "step": 8145 }, { "epoch": 2.44, "grad_norm": 1.372162938117981, "learning_rate": 4.818897518540759e-05, "loss": 1.3957, "step": 8150 }, { "epoch": 2.44, "grad_norm": 1.8464466333389282, "learning_rate": 4.818677911564696e-05, "loss": 1.3159, "step": 8155 }, { "epoch": 2.44, "grad_norm": 0.7592960000038147, "learning_rate": 4.8184581765304616e-05, "loss": 1.3153, "step": 8160 }, { "epoch": 2.44, "grad_norm": 0.8226920366287231, "learning_rate": 4.8182383134501915e-05, "loss": 1.4496, "step": 8165 }, { "epoch": 2.44, "grad_norm": 1.1266101598739624, "learning_rate": 4.81801832233603e-05, "loss": 1.2566, "step": 8170 }, { "epoch": 2.45, "grad_norm": 1.9116572141647339, "learning_rate": 4.817798203200126e-05, "loss": 1.2411, "step": 8175 }, { "epoch": 2.45, "grad_norm": 1.1480777263641357, "learning_rate": 4.8175779560546357e-05, "loss": 1.4541, "step": 8180 }, { "epoch": 2.45, "grad_norm": 1.545530080795288, "learning_rate": 4.8173575809117246e-05, "loss": 1.2647, "step": 8185 }, { "epoch": 2.45, "grad_norm": 2.0556013584136963, "learning_rate": 4.817137077783562e-05, "loss": 1.4301, "step": 8190 }, { "epoch": 2.45, "grad_norm": 1.490013599395752, "learning_rate": 4.816916446682328e-05, "loss": 1.5971, "step": 8195 }, { "epoch": 2.45, "grad_norm": 1.620077133178711, "learning_rate": 4.8166956876202066e-05, "loss": 1.2097, "step": 8200 }, { "epoch": 2.45, "grad_norm": 1.2473911046981812, "learning_rate": 4.816474800609391e-05, "loss": 1.4232, "step": 8205 }, { "epoch": 2.46, "grad_norm": 1.5744521617889404, "learning_rate": 4.816253785662079e-05, "loss": 1.2997, "step": 8210 }, { "epoch": 2.46, "grad_norm": 2.0682075023651123, "learning_rate": 4.816032642790479e-05, "loss": 1.2491, "step": 8215 }, { "epoch": 2.46, "grad_norm": 0.989682674407959, "learning_rate": 4.815811372006803e-05, "loss": 1.2638, "step": 8220 }, { "epoch": 2.46, "grad_norm": 0.9861974120140076, "learning_rate": 4.8155899733232724e-05, "loss": 1.3134, "step": 8225 }, { "epoch": 2.46, "grad_norm": 1.6601946353912354, "learning_rate": 4.8153684467521145e-05, "loss": 1.2995, "step": 8230 }, { "epoch": 2.46, "grad_norm": 1.8159310817718506, "learning_rate": 4.8151467923055636e-05, "loss": 1.4021, "step": 8235 }, { "epoch": 2.47, "grad_norm": 1.435726523399353, "learning_rate": 4.814925009995862e-05, "loss": 1.2735, "step": 8240 }, { "epoch": 2.47, "grad_norm": 1.530027985572815, "learning_rate": 4.8147030998352585e-05, "loss": 1.436, "step": 8245 }, { "epoch": 2.47, "grad_norm": 1.6053141355514526, "learning_rate": 4.814481061836008e-05, "loss": 1.2369, "step": 8250 }, { "epoch": 2.47, "grad_norm": 1.5459723472595215, "learning_rate": 4.814258896010375e-05, "loss": 1.2969, "step": 8255 }, { "epoch": 2.47, "grad_norm": 1.4241658449172974, "learning_rate": 4.814036602370628e-05, "loss": 1.2725, "step": 8260 }, { "epoch": 2.47, "grad_norm": 1.7884745597839355, "learning_rate": 4.813814180929046e-05, "loss": 1.3672, "step": 8265 }, { "epoch": 2.47, "grad_norm": 1.1821926832199097, "learning_rate": 4.8135916316979114e-05, "loss": 1.2665, "step": 8270 }, { "epoch": 2.48, "grad_norm": 1.176249623298645, "learning_rate": 4.813368954689516e-05, "loss": 1.2877, "step": 8275 }, { "epoch": 2.48, "grad_norm": 2.3001742362976074, "learning_rate": 4.813146149916157e-05, "loss": 1.1879, "step": 8280 }, { "epoch": 2.48, "grad_norm": 1.2863318920135498, "learning_rate": 4.812923217390141e-05, "loss": 1.2886, "step": 8285 }, { "epoch": 2.48, "grad_norm": 1.8792412281036377, "learning_rate": 4.81270015712378e-05, "loss": 1.3733, "step": 8290 }, { "epoch": 2.48, "grad_norm": 1.0877119302749634, "learning_rate": 4.8124769691293925e-05, "loss": 1.2741, "step": 8295 }, { "epoch": 2.48, "grad_norm": 1.0546271800994873, "learning_rate": 4.812253653419306e-05, "loss": 1.2784, "step": 8300 }, { "epoch": 2.48, "grad_norm": 0.8137794137001038, "learning_rate": 4.8120302100058545e-05, "loss": 1.3681, "step": 8305 }, { "epoch": 2.49, "grad_norm": 2.380187749862671, "learning_rate": 4.811806638901377e-05, "loss": 1.2771, "step": 8310 }, { "epoch": 2.49, "grad_norm": 1.1586872339248657, "learning_rate": 4.8115829401182224e-05, "loss": 1.3558, "step": 8315 }, { "epoch": 2.49, "grad_norm": 0.9364005327224731, "learning_rate": 4.811359113668744e-05, "loss": 1.4381, "step": 8320 }, { "epoch": 2.49, "grad_norm": 1.160302758216858, "learning_rate": 4.8111351595653044e-05, "loss": 1.4166, "step": 8325 }, { "epoch": 2.49, "grad_norm": 1.486975908279419, "learning_rate": 4.810911077820273e-05, "loss": 1.3687, "step": 8330 }, { "epoch": 2.49, "grad_norm": 1.307428002357483, "learning_rate": 4.810686868446024e-05, "loss": 1.2001, "step": 8335 }, { "epoch": 2.5, "grad_norm": 1.7174419164657593, "learning_rate": 4.8104625314549414e-05, "loss": 1.3438, "step": 8340 }, { "epoch": 2.5, "grad_norm": 2.201108455657959, "learning_rate": 4.810238066859415e-05, "loss": 1.3922, "step": 8345 }, { "epoch": 2.5, "grad_norm": 1.2420109510421753, "learning_rate": 4.8100134746718405e-05, "loss": 1.352, "step": 8350 }, { "epoch": 2.5, "grad_norm": 1.2646502256393433, "learning_rate": 4.809788754904624e-05, "loss": 1.291, "step": 8355 }, { "epoch": 2.5, "grad_norm": 1.1232274770736694, "learning_rate": 4.809563907570175e-05, "loss": 1.1746, "step": 8360 }, { "epoch": 2.5, "grad_norm": 1.6200834512710571, "learning_rate": 4.809338932680912e-05, "loss": 1.3263, "step": 8365 }, { "epoch": 2.5, "grad_norm": 1.9596359729766846, "learning_rate": 4.809113830249261e-05, "loss": 1.4203, "step": 8370 }, { "epoch": 2.51, "grad_norm": 1.5598084926605225, "learning_rate": 4.808888600287652e-05, "loss": 1.2957, "step": 8375 }, { "epoch": 2.51, "grad_norm": 0.9596273899078369, "learning_rate": 4.808663242808526e-05, "loss": 1.3509, "step": 8380 }, { "epoch": 2.51, "grad_norm": 1.8248343467712402, "learning_rate": 4.80843775782433e-05, "loss": 1.4082, "step": 8385 }, { "epoch": 2.51, "grad_norm": 1.0814073085784912, "learning_rate": 4.808212145347515e-05, "loss": 1.382, "step": 8390 }, { "epoch": 2.51, "grad_norm": 1.3564740419387817, "learning_rate": 4.807986405390543e-05, "loss": 1.3745, "step": 8395 }, { "epoch": 2.51, "grad_norm": 1.0324515104293823, "learning_rate": 4.8077605379658804e-05, "loss": 1.2376, "step": 8400 }, { "epoch": 2.51, "grad_norm": 1.1277930736541748, "learning_rate": 4.807534543086002e-05, "loss": 1.3123, "step": 8405 }, { "epoch": 2.52, "grad_norm": 2.6605470180511475, "learning_rate": 4.807308420763389e-05, "loss": 1.3997, "step": 8410 }, { "epoch": 2.52, "grad_norm": 0.9557091593742371, "learning_rate": 4.807082171010531e-05, "loss": 1.3075, "step": 8415 }, { "epoch": 2.52, "grad_norm": 0.9519081115722656, "learning_rate": 4.8068557938399225e-05, "loss": 1.3437, "step": 8420 }, { "epoch": 2.52, "grad_norm": 1.2350136041641235, "learning_rate": 4.8066292892640666e-05, "loss": 1.3418, "step": 8425 }, { "epoch": 2.52, "grad_norm": 1.178934931755066, "learning_rate": 4.8064026572954726e-05, "loss": 1.2916, "step": 8430 }, { "epoch": 2.52, "grad_norm": 0.9107993841171265, "learning_rate": 4.806175897946657e-05, "loss": 1.1943, "step": 8435 }, { "epoch": 2.53, "grad_norm": 1.4889490604400635, "learning_rate": 4.805949011230144e-05, "loss": 1.3919, "step": 8440 }, { "epoch": 2.53, "grad_norm": 0.8420235514640808, "learning_rate": 4.805721997158463e-05, "loss": 1.323, "step": 8445 }, { "epoch": 2.53, "grad_norm": 1.4694463014602661, "learning_rate": 4.8054948557441535e-05, "loss": 1.2482, "step": 8450 }, { "epoch": 2.53, "grad_norm": 1.4326045513153076, "learning_rate": 4.8052675869997596e-05, "loss": 1.2654, "step": 8455 }, { "epoch": 2.53, "grad_norm": 1.6901538372039795, "learning_rate": 4.805040190937833e-05, "loss": 1.2435, "step": 8460 }, { "epoch": 2.53, "grad_norm": 1.1050915718078613, "learning_rate": 4.804812667570933e-05, "loss": 1.2554, "step": 8465 }, { "epoch": 2.53, "grad_norm": 1.468774676322937, "learning_rate": 4.8045850169116244e-05, "loss": 1.319, "step": 8470 }, { "epoch": 2.54, "grad_norm": 1.1394233703613281, "learning_rate": 4.804357238972482e-05, "loss": 1.3279, "step": 8475 }, { "epoch": 2.54, "grad_norm": 1.781924843788147, "learning_rate": 4.804129333766083e-05, "loss": 1.3171, "step": 8480 }, { "epoch": 2.54, "grad_norm": 1.5067170858383179, "learning_rate": 4.803901301305017e-05, "loss": 1.3916, "step": 8485 }, { "epoch": 2.54, "grad_norm": 1.2595572471618652, "learning_rate": 4.803673141601877e-05, "loss": 1.3188, "step": 8490 }, { "epoch": 2.54, "grad_norm": 1.1094878911972046, "learning_rate": 4.803444854669262e-05, "loss": 1.346, "step": 8495 }, { "epoch": 2.54, "grad_norm": 1.4350841045379639, "learning_rate": 4.803216440519784e-05, "loss": 1.3852, "step": 8500 }, { "epoch": 2.54, "grad_norm": 1.89030122756958, "learning_rate": 4.8029878991660556e-05, "loss": 1.4023, "step": 8505 }, { "epoch": 2.55, "grad_norm": 0.8968062996864319, "learning_rate": 4.802759230620699e-05, "loss": 1.329, "step": 8510 }, { "epoch": 2.55, "grad_norm": 1.008400559425354, "learning_rate": 4.802530434896344e-05, "loss": 1.321, "step": 8515 }, { "epoch": 2.55, "grad_norm": 1.945785641670227, "learning_rate": 4.802301512005626e-05, "loss": 1.4198, "step": 8520 }, { "epoch": 2.55, "grad_norm": 1.0313044786453247, "learning_rate": 4.802072461961189e-05, "loss": 1.1841, "step": 8525 }, { "epoch": 2.55, "grad_norm": 0.9936201572418213, "learning_rate": 4.8018432847756823e-05, "loss": 1.4208, "step": 8530 }, { "epoch": 2.55, "grad_norm": 1.7192137241363525, "learning_rate": 4.8016139804617646e-05, "loss": 1.4593, "step": 8535 }, { "epoch": 2.56, "grad_norm": 1.4999003410339355, "learning_rate": 4.801384549032099e-05, "loss": 1.388, "step": 8540 }, { "epoch": 2.56, "grad_norm": 1.0595996379852295, "learning_rate": 4.8011549904993555e-05, "loss": 1.3635, "step": 8545 }, { "epoch": 2.56, "grad_norm": 2.0259523391723633, "learning_rate": 4.800925304876215e-05, "loss": 1.4075, "step": 8550 }, { "epoch": 2.56, "grad_norm": 1.3080471754074097, "learning_rate": 4.800695492175361e-05, "loss": 1.4913, "step": 8555 }, { "epoch": 2.56, "grad_norm": 1.0340911149978638, "learning_rate": 4.800465552409487e-05, "loss": 1.3514, "step": 8560 }, { "epoch": 2.56, "grad_norm": 0.8910084366798401, "learning_rate": 4.800235485591291e-05, "loss": 1.3424, "step": 8565 }, { "epoch": 2.56, "grad_norm": 1.5599783658981323, "learning_rate": 4.8000052917334815e-05, "loss": 1.4032, "step": 8570 }, { "epoch": 2.57, "grad_norm": 1.667799949645996, "learning_rate": 4.7997749708487695e-05, "loss": 1.4332, "step": 8575 }, { "epoch": 2.57, "grad_norm": 2.000908374786377, "learning_rate": 4.799544522949876e-05, "loss": 1.4121, "step": 8580 }, { "epoch": 2.57, "grad_norm": 0.7706380486488342, "learning_rate": 4.799313948049529e-05, "loss": 1.1469, "step": 8585 }, { "epoch": 2.57, "grad_norm": 1.065397024154663, "learning_rate": 4.799083246160463e-05, "loss": 1.4589, "step": 8590 }, { "epoch": 2.57, "grad_norm": 1.4384722709655762, "learning_rate": 4.798852417295418e-05, "loss": 1.3352, "step": 8595 }, { "epoch": 2.57, "grad_norm": 1.4065508842468262, "learning_rate": 4.798621461467146e-05, "loss": 1.4188, "step": 8600 }, { "epoch": 2.57, "grad_norm": 1.0686558485031128, "learning_rate": 4.798390378688398e-05, "loss": 1.5157, "step": 8605 }, { "epoch": 2.58, "grad_norm": 1.01014244556427, "learning_rate": 4.798159168971938e-05, "loss": 1.305, "step": 8610 }, { "epoch": 2.58, "grad_norm": 1.5456489324569702, "learning_rate": 4.7979278323305364e-05, "loss": 1.4341, "step": 8615 }, { "epoch": 2.58, "grad_norm": 1.8445463180541992, "learning_rate": 4.7976963687769696e-05, "loss": 1.3751, "step": 8620 }, { "epoch": 2.58, "grad_norm": 1.879667043685913, "learning_rate": 4.797464778324021e-05, "loss": 1.3944, "step": 8625 }, { "epoch": 2.58, "grad_norm": 0.7700638175010681, "learning_rate": 4.79723306098448e-05, "loss": 1.2801, "step": 8630 }, { "epoch": 2.58, "grad_norm": 1.3582429885864258, "learning_rate": 4.7970012167711456e-05, "loss": 1.4009, "step": 8635 }, { "epoch": 2.58, "grad_norm": 1.5631026029586792, "learning_rate": 4.7967692456968207e-05, "loss": 1.3642, "step": 8640 }, { "epoch": 2.59, "grad_norm": 1.840862512588501, "learning_rate": 4.7965371477743185e-05, "loss": 1.1852, "step": 8645 }, { "epoch": 2.59, "grad_norm": 1.0810298919677734, "learning_rate": 4.7963049230164556e-05, "loss": 1.3303, "step": 8650 }, { "epoch": 2.59, "grad_norm": 1.0809688568115234, "learning_rate": 4.7960725714360596e-05, "loss": 1.4119, "step": 8655 }, { "epoch": 2.59, "grad_norm": 1.8407636880874634, "learning_rate": 4.7958400930459626e-05, "loss": 1.2693, "step": 8660 }, { "epoch": 2.59, "grad_norm": 2.3845536708831787, "learning_rate": 4.795607487859003e-05, "loss": 1.3055, "step": 8665 }, { "epoch": 2.59, "grad_norm": 1.7638427019119263, "learning_rate": 4.795374755888028e-05, "loss": 1.2628, "step": 8670 }, { "epoch": 2.6, "grad_norm": 1.1101248264312744, "learning_rate": 4.7951418971458915e-05, "loss": 1.3121, "step": 8675 }, { "epoch": 2.6, "grad_norm": 1.5876396894454956, "learning_rate": 4.794908911645453e-05, "loss": 1.3242, "step": 8680 }, { "epoch": 2.6, "grad_norm": 4.759340763092041, "learning_rate": 4.7946757993995815e-05, "loss": 1.2821, "step": 8685 }, { "epoch": 2.6, "grad_norm": 1.5042836666107178, "learning_rate": 4.794442560421151e-05, "loss": 1.423, "step": 8690 }, { "epoch": 2.6, "grad_norm": 1.0872600078582764, "learning_rate": 4.794209194723042e-05, "loss": 1.2986, "step": 8695 }, { "epoch": 2.6, "grad_norm": 0.748469352722168, "learning_rate": 4.7939757023181435e-05, "loss": 1.364, "step": 8700 }, { "epoch": 2.6, "grad_norm": 0.6730393171310425, "learning_rate": 4.793742083219353e-05, "loss": 1.3123, "step": 8705 }, { "epoch": 2.61, "grad_norm": 1.2897133827209473, "learning_rate": 4.793508337439569e-05, "loss": 1.3758, "step": 8710 }, { "epoch": 2.61, "grad_norm": 1.8004686832427979, "learning_rate": 4.793274464991706e-05, "loss": 1.3204, "step": 8715 }, { "epoch": 2.61, "grad_norm": 3.192774772644043, "learning_rate": 4.7930404658886766e-05, "loss": 1.25, "step": 8720 }, { "epoch": 2.61, "grad_norm": 1.638178825378418, "learning_rate": 4.7928063401434065e-05, "loss": 1.4043, "step": 8725 }, { "epoch": 2.61, "grad_norm": 1.2886130809783936, "learning_rate": 4.792572087768825e-05, "loss": 1.4547, "step": 8730 }, { "epoch": 2.61, "grad_norm": 1.1815823316574097, "learning_rate": 4.7923377087778695e-05, "loss": 1.4566, "step": 8735 }, { "epoch": 2.61, "grad_norm": 1.7591919898986816, "learning_rate": 4.7921032031834864e-05, "loss": 1.3245, "step": 8740 }, { "epoch": 2.62, "grad_norm": 1.386547565460205, "learning_rate": 4.7918685709986254e-05, "loss": 1.3942, "step": 8745 }, { "epoch": 2.62, "grad_norm": 1.2307301759719849, "learning_rate": 4.791633812236245e-05, "loss": 1.2166, "step": 8750 }, { "epoch": 2.62, "grad_norm": 1.1974438428878784, "learning_rate": 4.791398926909312e-05, "loss": 1.361, "step": 8755 }, { "epoch": 2.62, "grad_norm": 1.449912190437317, "learning_rate": 4.791163915030797e-05, "loss": 1.4183, "step": 8760 }, { "epoch": 2.62, "grad_norm": 1.0868198871612549, "learning_rate": 4.790928776613682e-05, "loss": 1.4563, "step": 8765 }, { "epoch": 2.62, "grad_norm": 1.4005632400512695, "learning_rate": 4.790693511670951e-05, "loss": 1.189, "step": 8770 }, { "epoch": 2.63, "grad_norm": 1.2266290187835693, "learning_rate": 4.7904581202155983e-05, "loss": 1.435, "step": 8775 }, { "epoch": 2.63, "grad_norm": 0.5857153534889221, "learning_rate": 4.790222602260625e-05, "loss": 1.3531, "step": 8780 }, { "epoch": 2.63, "grad_norm": 3.5321526527404785, "learning_rate": 4.789986957819037e-05, "loss": 1.361, "step": 8785 }, { "epoch": 2.63, "grad_norm": 1.288081407546997, "learning_rate": 4.78975118690385e-05, "loss": 1.3323, "step": 8790 }, { "epoch": 2.63, "grad_norm": 1.5145295858383179, "learning_rate": 4.7895152895280856e-05, "loss": 1.3989, "step": 8795 }, { "epoch": 2.63, "grad_norm": 1.247381329536438, "learning_rate": 4.7892792657047714e-05, "loss": 1.2564, "step": 8800 }, { "epoch": 2.63, "grad_norm": 0.8097814917564392, "learning_rate": 4.7890903556126435e-05, "loss": 1.44, "step": 8805 }, { "epoch": 2.64, "grad_norm": 2.263486385345459, "learning_rate": 4.7888541042165937e-05, "loss": 1.4437, "step": 8810 }, { "epoch": 2.64, "grad_norm": 1.0362577438354492, "learning_rate": 4.78861772640951e-05, "loss": 1.3168, "step": 8815 }, { "epoch": 2.64, "grad_norm": 1.632360816001892, "learning_rate": 4.7883812222044486e-05, "loss": 1.287, "step": 8820 }, { "epoch": 2.64, "grad_norm": 2.122438669204712, "learning_rate": 4.788144591614472e-05, "loss": 1.3109, "step": 8825 }, { "epoch": 2.64, "grad_norm": 1.8862602710723877, "learning_rate": 4.7879078346526464e-05, "loss": 1.3944, "step": 8830 }, { "epoch": 2.64, "grad_norm": 1.2232091426849365, "learning_rate": 4.7876709513320506e-05, "loss": 1.4264, "step": 8835 }, { "epoch": 2.64, "grad_norm": 1.3420339822769165, "learning_rate": 4.787433941665765e-05, "loss": 1.2224, "step": 8840 }, { "epoch": 2.65, "grad_norm": 1.9875779151916504, "learning_rate": 4.787196805666881e-05, "loss": 1.1951, "step": 8845 }, { "epoch": 2.65, "grad_norm": 1.106386661529541, "learning_rate": 4.7869595433484946e-05, "loss": 1.4364, "step": 8850 }, { "epoch": 2.65, "grad_norm": 1.7247858047485352, "learning_rate": 4.78672215472371e-05, "loss": 1.2084, "step": 8855 }, { "epoch": 2.65, "grad_norm": 1.5715019702911377, "learning_rate": 4.786484639805637e-05, "loss": 1.4345, "step": 8860 }, { "epoch": 2.65, "grad_norm": 1.0408717393875122, "learning_rate": 4.7862469986073954e-05, "loss": 1.4077, "step": 8865 }, { "epoch": 2.65, "grad_norm": 1.5695548057556152, "learning_rate": 4.786009231142108e-05, "loss": 1.2914, "step": 8870 }, { "epoch": 2.66, "grad_norm": 1.7669672966003418, "learning_rate": 4.7857713374229066e-05, "loss": 1.2795, "step": 8875 }, { "epoch": 2.66, "grad_norm": 1.5824629068374634, "learning_rate": 4.78553331746293e-05, "loss": 1.2562, "step": 8880 }, { "epoch": 2.66, "grad_norm": 0.8411540389060974, "learning_rate": 4.7852951712753244e-05, "loss": 1.4505, "step": 8885 }, { "epoch": 2.66, "grad_norm": 1.6968480348587036, "learning_rate": 4.7850568988732416e-05, "loss": 1.3871, "step": 8890 }, { "epoch": 2.66, "grad_norm": 1.4560775756835938, "learning_rate": 4.7848185002698416e-05, "loss": 1.4416, "step": 8895 }, { "epoch": 2.66, "grad_norm": 1.1410988569259644, "learning_rate": 4.7845799754782907e-05, "loss": 1.3336, "step": 8900 }, { "epoch": 2.66, "grad_norm": 1.136461615562439, "learning_rate": 4.784341324511762e-05, "loss": 1.3175, "step": 8905 }, { "epoch": 2.67, "grad_norm": 1.0604544878005981, "learning_rate": 4.784102547383437e-05, "loss": 1.3411, "step": 8910 }, { "epoch": 2.67, "grad_norm": 0.7411584258079529, "learning_rate": 4.783863644106502e-05, "loss": 1.1918, "step": 8915 }, { "epoch": 2.67, "grad_norm": 1.8481019735336304, "learning_rate": 4.783624614694153e-05, "loss": 1.4247, "step": 8920 }, { "epoch": 2.67, "grad_norm": 1.549062967300415, "learning_rate": 4.7833854591595895e-05, "loss": 1.2813, "step": 8925 }, { "epoch": 2.67, "grad_norm": 1.4914495944976807, "learning_rate": 4.78314617751602e-05, "loss": 1.3358, "step": 8930 }, { "epoch": 2.67, "grad_norm": 2.6367831230163574, "learning_rate": 4.782906769776661e-05, "loss": 1.2798, "step": 8935 }, { "epoch": 2.67, "grad_norm": 1.661944031715393, "learning_rate": 4.7826672359547343e-05, "loss": 1.3176, "step": 8940 }, { "epoch": 2.68, "grad_norm": 1.858271598815918, "learning_rate": 4.782427576063468e-05, "loss": 1.1787, "step": 8945 }, { "epoch": 2.68, "grad_norm": 1.1787148714065552, "learning_rate": 4.7821877901160996e-05, "loss": 1.3318, "step": 8950 }, { "epoch": 2.68, "grad_norm": 0.9741628170013428, "learning_rate": 4.781947878125872e-05, "loss": 1.3084, "step": 8955 }, { "epoch": 2.68, "grad_norm": 1.8977104425430298, "learning_rate": 4.781707840106034e-05, "loss": 1.3595, "step": 8960 }, { "epoch": 2.68, "grad_norm": 1.1290730237960815, "learning_rate": 4.781467676069845e-05, "loss": 1.3126, "step": 8965 }, { "epoch": 2.68, "grad_norm": 1.0687497854232788, "learning_rate": 4.7812273860305665e-05, "loss": 1.2979, "step": 8970 }, { "epoch": 2.69, "grad_norm": 1.463879108428955, "learning_rate": 4.780986970001472e-05, "loss": 1.3988, "step": 8975 }, { "epoch": 2.69, "grad_norm": 0.7218008637428284, "learning_rate": 4.780746427995837e-05, "loss": 1.3782, "step": 8980 }, { "epoch": 2.69, "grad_norm": 0.7819427847862244, "learning_rate": 4.7805057600269485e-05, "loss": 1.2902, "step": 8985 }, { "epoch": 2.69, "grad_norm": 0.7912634015083313, "learning_rate": 4.780264966108097e-05, "loss": 1.2542, "step": 8990 }, { "epoch": 2.69, "grad_norm": 1.160401701927185, "learning_rate": 4.780024046252581e-05, "loss": 1.2758, "step": 8995 }, { "epoch": 2.69, "grad_norm": 0.7142820954322815, "learning_rate": 4.779783000473707e-05, "loss": 1.278, "step": 9000 }, { "epoch": 2.69, "grad_norm": 1.7503546476364136, "learning_rate": 4.779541828784788e-05, "loss": 1.3523, "step": 9005 }, { "epoch": 2.7, "grad_norm": 1.522162914276123, "learning_rate": 4.779300531199143e-05, "loss": 1.3737, "step": 9010 }, { "epoch": 2.7, "grad_norm": 1.4219088554382324, "learning_rate": 4.779059107730099e-05, "loss": 1.4303, "step": 9015 }, { "epoch": 2.7, "grad_norm": 1.6250290870666504, "learning_rate": 4.778817558390989e-05, "loss": 1.366, "step": 9020 }, { "epoch": 2.7, "grad_norm": 1.2301737070083618, "learning_rate": 4.7785758831951543e-05, "loss": 1.3006, "step": 9025 }, { "epoch": 2.7, "grad_norm": 1.753127932548523, "learning_rate": 4.778334082155942e-05, "loss": 1.4303, "step": 9030 }, { "epoch": 2.7, "grad_norm": 1.423354148864746, "learning_rate": 4.778092155286707e-05, "loss": 1.2672, "step": 9035 }, { "epoch": 2.7, "grad_norm": 2.4239237308502197, "learning_rate": 4.777850102600809e-05, "loss": 1.2822, "step": 9040 }, { "epoch": 2.71, "grad_norm": 1.1485992670059204, "learning_rate": 4.777607924111619e-05, "loss": 1.3411, "step": 9045 }, { "epoch": 2.71, "grad_norm": 0.7235462665557861, "learning_rate": 4.77736561983251e-05, "loss": 1.3456, "step": 9050 }, { "epoch": 2.71, "grad_norm": 1.1545169353485107, "learning_rate": 4.7771231897768655e-05, "loss": 1.4223, "step": 9055 }, { "epoch": 2.71, "grad_norm": 2.123680353164673, "learning_rate": 4.776880633958073e-05, "loss": 1.4271, "step": 9060 }, { "epoch": 2.71, "grad_norm": 1.6497604846954346, "learning_rate": 4.776637952389531e-05, "loss": 1.4639, "step": 9065 }, { "epoch": 2.71, "grad_norm": 1.2287499904632568, "learning_rate": 4.776395145084641e-05, "loss": 1.2522, "step": 9070 }, { "epoch": 2.72, "grad_norm": 1.5195343494415283, "learning_rate": 4.7761522120568134e-05, "loss": 1.4156, "step": 9075 }, { "epoch": 2.72, "grad_norm": 1.1889725923538208, "learning_rate": 4.775909153319465e-05, "loss": 1.2898, "step": 9080 }, { "epoch": 2.72, "grad_norm": 1.7549879550933838, "learning_rate": 4.775665968886019e-05, "loss": 1.2376, "step": 9085 }, { "epoch": 2.72, "grad_norm": 1.9946390390396118, "learning_rate": 4.775422658769908e-05, "loss": 1.4544, "step": 9090 }, { "epoch": 2.72, "grad_norm": 1.594741940498352, "learning_rate": 4.775179222984568e-05, "loss": 1.2683, "step": 9095 }, { "epoch": 2.72, "grad_norm": 1.0693196058273315, "learning_rate": 4.774935661543445e-05, "loss": 1.3477, "step": 9100 }, { "epoch": 2.72, "grad_norm": 2.610431671142578, "learning_rate": 4.774691974459989e-05, "loss": 1.321, "step": 9105 }, { "epoch": 2.73, "grad_norm": 1.1639457941055298, "learning_rate": 4.774448161747661e-05, "loss": 1.3614, "step": 9110 }, { "epoch": 2.73, "grad_norm": 2.8405585289001465, "learning_rate": 4.774204223419925e-05, "loss": 1.2398, "step": 9115 }, { "epoch": 2.73, "grad_norm": 2.058262586593628, "learning_rate": 4.773960159490253e-05, "loss": 1.2883, "step": 9120 }, { "epoch": 2.73, "grad_norm": 6.126988410949707, "learning_rate": 4.773715969972125e-05, "loss": 1.4032, "step": 9125 }, { "epoch": 2.73, "grad_norm": 1.726212739944458, "learning_rate": 4.7734716548790274e-05, "loss": 1.362, "step": 9130 }, { "epoch": 2.73, "grad_norm": 2.678607940673828, "learning_rate": 4.773227214224454e-05, "loss": 1.3634, "step": 9135 }, { "epoch": 2.73, "grad_norm": 0.9776396155357361, "learning_rate": 4.7729826480219044e-05, "loss": 1.274, "step": 9140 }, { "epoch": 2.74, "grad_norm": 1.055379033088684, "learning_rate": 4.772737956284885e-05, "loss": 1.219, "step": 9145 }, { "epoch": 2.74, "grad_norm": 1.7453423738479614, "learning_rate": 4.7724931390269115e-05, "loss": 1.1836, "step": 9150 }, { "epoch": 2.74, "grad_norm": 1.390795350074768, "learning_rate": 4.772248196261504e-05, "loss": 1.4451, "step": 9155 }, { "epoch": 2.74, "grad_norm": 1.7880897521972656, "learning_rate": 4.7720031280021905e-05, "loss": 1.3696, "step": 9160 }, { "epoch": 2.74, "grad_norm": 1.2243832349777222, "learning_rate": 4.771757934262505e-05, "loss": 1.2609, "step": 9165 }, { "epoch": 2.74, "grad_norm": 2.5606729984283447, "learning_rate": 4.771512615055991e-05, "loss": 1.4425, "step": 9170 }, { "epoch": 2.75, "grad_norm": 1.7605032920837402, "learning_rate": 4.771267170396196e-05, "loss": 1.4839, "step": 9175 }, { "epoch": 2.75, "grad_norm": 2.1422829627990723, "learning_rate": 4.771021600296676e-05, "loss": 1.4384, "step": 9180 }, { "epoch": 2.75, "grad_norm": 1.3212814331054688, "learning_rate": 4.770775904770994e-05, "loss": 1.4812, "step": 9185 }, { "epoch": 2.75, "grad_norm": 0.6888114213943481, "learning_rate": 4.770530083832719e-05, "loss": 1.3274, "step": 9190 }, { "epoch": 2.75, "grad_norm": 1.3165194988250732, "learning_rate": 4.770284137495428e-05, "loss": 1.3288, "step": 9195 }, { "epoch": 2.75, "grad_norm": 0.9350600242614746, "learning_rate": 4.7700380657727027e-05, "loss": 1.459, "step": 9200 }, { "epoch": 2.75, "grad_norm": 1.657118797302246, "learning_rate": 4.769791868678135e-05, "loss": 1.277, "step": 9205 }, { "epoch": 2.76, "grad_norm": 0.7231934666633606, "learning_rate": 4.769545546225322e-05, "loss": 1.3761, "step": 9210 }, { "epoch": 2.76, "grad_norm": 1.0625771284103394, "learning_rate": 4.7692990984278676e-05, "loss": 1.294, "step": 9215 }, { "epoch": 2.76, "grad_norm": 1.9042518138885498, "learning_rate": 4.769052525299383e-05, "loss": 1.4183, "step": 9220 }, { "epoch": 2.76, "grad_norm": 2.654555082321167, "learning_rate": 4.7688058268534855e-05, "loss": 1.365, "step": 9225 }, { "epoch": 2.76, "grad_norm": 0.9852314591407776, "learning_rate": 4.768559003103801e-05, "loss": 1.4423, "step": 9230 }, { "epoch": 2.76, "grad_norm": 1.300337791442871, "learning_rate": 4.76831205406396e-05, "loss": 1.2752, "step": 9235 }, { "epoch": 2.76, "grad_norm": 1.255034327507019, "learning_rate": 4.768064979747603e-05, "loss": 1.3097, "step": 9240 }, { "epoch": 2.77, "grad_norm": 1.3751298189163208, "learning_rate": 4.767817780168374e-05, "loss": 1.2666, "step": 9245 }, { "epoch": 2.77, "grad_norm": 0.7448214888572693, "learning_rate": 4.7675704553399265e-05, "loss": 1.4518, "step": 9250 }, { "epoch": 2.77, "grad_norm": 2.0621886253356934, "learning_rate": 4.76732300527592e-05, "loss": 1.4063, "step": 9255 }, { "epoch": 2.77, "grad_norm": 1.179983139038086, "learning_rate": 4.76707542999002e-05, "loss": 1.425, "step": 9260 }, { "epoch": 2.77, "grad_norm": 1.047968864440918, "learning_rate": 4.7668277294959006e-05, "loss": 1.3343, "step": 9265 }, { "epoch": 2.77, "grad_norm": 1.8152706623077393, "learning_rate": 4.766579903807242e-05, "loss": 1.3582, "step": 9270 }, { "epoch": 2.77, "grad_norm": 1.3376197814941406, "learning_rate": 4.7663319529377323e-05, "loss": 1.4053, "step": 9275 }, { "epoch": 2.78, "grad_norm": 1.8099796772003174, "learning_rate": 4.7660838769010635e-05, "loss": 1.5009, "step": 9280 }, { "epoch": 2.78, "grad_norm": 1.4460123777389526, "learning_rate": 4.765835675710938e-05, "loss": 1.5084, "step": 9285 }, { "epoch": 2.78, "grad_norm": 2.1671342849731445, "learning_rate": 4.765587349381063e-05, "loss": 1.3576, "step": 9290 }, { "epoch": 2.78, "grad_norm": 0.9347168803215027, "learning_rate": 4.765338897925154e-05, "loss": 1.4602, "step": 9295 }, { "epoch": 2.78, "grad_norm": 1.7557331323623657, "learning_rate": 4.765090321356932e-05, "loss": 1.4104, "step": 9300 }, { "epoch": 2.78, "grad_norm": 0.8540169596672058, "learning_rate": 4.764841619690127e-05, "loss": 1.3433, "step": 9305 }, { "epoch": 2.79, "grad_norm": 2.3423655033111572, "learning_rate": 4.764592792938473e-05, "loss": 1.3206, "step": 9310 }, { "epoch": 2.79, "grad_norm": 1.7521955966949463, "learning_rate": 4.764343841115712e-05, "loss": 1.2059, "step": 9315 }, { "epoch": 2.79, "grad_norm": 2.0334832668304443, "learning_rate": 4.764094764235595e-05, "loss": 1.3869, "step": 9320 }, { "epoch": 2.79, "grad_norm": 6.126003265380859, "learning_rate": 4.763845562311877e-05, "loss": 1.4294, "step": 9325 }, { "epoch": 2.79, "grad_norm": 0.9254896640777588, "learning_rate": 4.763596235358323e-05, "loss": 1.3488, "step": 9330 }, { "epoch": 2.79, "grad_norm": 1.4646904468536377, "learning_rate": 4.7633467833887017e-05, "loss": 1.397, "step": 9335 }, { "epoch": 2.79, "grad_norm": 2.3618175983428955, "learning_rate": 4.763097206416789e-05, "loss": 1.4709, "step": 9340 }, { "epoch": 2.8, "grad_norm": 0.8631429672241211, "learning_rate": 4.76284750445637e-05, "loss": 1.4577, "step": 9345 }, { "epoch": 2.8, "grad_norm": 1.02370285987854, "learning_rate": 4.762597677521237e-05, "loss": 1.4129, "step": 9350 }, { "epoch": 2.8, "grad_norm": 0.7775421142578125, "learning_rate": 4.762347725625184e-05, "loss": 1.3969, "step": 9355 }, { "epoch": 2.8, "grad_norm": 1.6427661180496216, "learning_rate": 4.762097648782019e-05, "loss": 1.4851, "step": 9360 }, { "epoch": 2.8, "grad_norm": 1.482051968574524, "learning_rate": 4.761847447005552e-05, "loss": 1.4408, "step": 9365 }, { "epoch": 2.8, "grad_norm": 2.8775594234466553, "learning_rate": 4.761597120309602e-05, "loss": 1.338, "step": 9370 }, { "epoch": 2.8, "grad_norm": 2.230642795562744, "learning_rate": 4.7613466687079924e-05, "loss": 1.3657, "step": 9375 }, { "epoch": 2.81, "grad_norm": 1.0118967294692993, "learning_rate": 4.7610960922145585e-05, "loss": 1.3863, "step": 9380 }, { "epoch": 2.81, "grad_norm": 1.517298936843872, "learning_rate": 4.7608453908431365e-05, "loss": 1.337, "step": 9385 }, { "epoch": 2.81, "grad_norm": 1.0825563669204712, "learning_rate": 4.760594564607574e-05, "loss": 1.1625, "step": 9390 }, { "epoch": 2.81, "grad_norm": 1.539588212966919, "learning_rate": 4.760343613521724e-05, "loss": 1.3975, "step": 9395 }, { "epoch": 2.81, "grad_norm": 0.7479655742645264, "learning_rate": 4.760092537599445e-05, "loss": 1.3449, "step": 9400 }, { "epoch": 2.81, "grad_norm": 1.136042594909668, "learning_rate": 4.7598413368546045e-05, "loss": 1.3229, "step": 9405 }, { "epoch": 2.82, "grad_norm": 1.4666575193405151, "learning_rate": 4.759590011301076e-05, "loss": 1.382, "step": 9410 }, { "epoch": 2.82, "grad_norm": 1.5006098747253418, "learning_rate": 4.7593385609527406e-05, "loss": 1.1975, "step": 9415 }, { "epoch": 2.82, "grad_norm": 1.135817527770996, "learning_rate": 4.7590869858234837e-05, "loss": 1.3296, "step": 9420 }, { "epoch": 2.82, "grad_norm": 1.2089482545852661, "learning_rate": 4.758835285927201e-05, "loss": 1.195, "step": 9425 }, { "epoch": 2.82, "grad_norm": 1.0782078504562378, "learning_rate": 4.758583461277794e-05, "loss": 1.295, "step": 9430 }, { "epoch": 2.82, "grad_norm": 0.9145467877388, "learning_rate": 4.75833151188917e-05, "loss": 1.2709, "step": 9435 }, { "epoch": 2.82, "grad_norm": 1.9970406293869019, "learning_rate": 4.7580794377752436e-05, "loss": 1.1399, "step": 9440 }, { "epoch": 2.83, "grad_norm": 0.8484485149383545, "learning_rate": 4.7578272389499375e-05, "loss": 1.3806, "step": 9445 }, { "epoch": 2.83, "grad_norm": 1.205751895904541, "learning_rate": 4.75757491542718e-05, "loss": 1.4711, "step": 9450 }, { "epoch": 2.83, "grad_norm": 2.451490640640259, "learning_rate": 4.757322467220905e-05, "loss": 1.5658, "step": 9455 }, { "epoch": 2.83, "grad_norm": 1.7217803001403809, "learning_rate": 4.757069894345058e-05, "loss": 1.5043, "step": 9460 }, { "epoch": 2.83, "grad_norm": 1.288859248161316, "learning_rate": 4.756817196813587e-05, "loss": 1.4451, "step": 9465 }, { "epoch": 2.83, "grad_norm": 2.3524889945983887, "learning_rate": 4.756564374640447e-05, "loss": 1.1978, "step": 9470 }, { "epoch": 2.83, "grad_norm": 2.2573511600494385, "learning_rate": 4.756311427839602e-05, "loss": 1.4399, "step": 9475 }, { "epoch": 2.84, "grad_norm": 0.9026219844818115, "learning_rate": 4.756058356425024e-05, "loss": 1.262, "step": 9480 }, { "epoch": 2.84, "grad_norm": 1.9231290817260742, "learning_rate": 4.755805160410686e-05, "loss": 1.2867, "step": 9485 }, { "epoch": 2.84, "grad_norm": 2.3469276428222656, "learning_rate": 4.755551839810575e-05, "loss": 1.425, "step": 9490 }, { "epoch": 2.84, "grad_norm": 1.1398067474365234, "learning_rate": 4.75529839463868e-05, "loss": 1.3015, "step": 9495 }, { "epoch": 2.84, "grad_norm": 1.265406847000122, "learning_rate": 4.755044824908998e-05, "loss": 1.4007, "step": 9500 }, { "epoch": 2.84, "grad_norm": 1.610261082649231, "learning_rate": 4.754791130635537e-05, "loss": 1.4184, "step": 9505 }, { "epoch": 2.85, "grad_norm": 1.8379100561141968, "learning_rate": 4.754537311832303e-05, "loss": 1.3009, "step": 9510 }, { "epoch": 2.85, "grad_norm": 2.690635919570923, "learning_rate": 4.754283368513317e-05, "loss": 1.3863, "step": 9515 }, { "epoch": 2.85, "grad_norm": 1.1724915504455566, "learning_rate": 4.754029300692604e-05, "loss": 1.236, "step": 9520 }, { "epoch": 2.85, "grad_norm": 1.0479933023452759, "learning_rate": 4.753775108384196e-05, "loss": 1.2245, "step": 9525 }, { "epoch": 2.85, "grad_norm": 2.2428202629089355, "learning_rate": 4.753520791602132e-05, "loss": 1.3393, "step": 9530 }, { "epoch": 2.85, "grad_norm": 1.5128904581069946, "learning_rate": 4.753266350360456e-05, "loss": 1.3574, "step": 9535 }, { "epoch": 2.85, "grad_norm": 0.9393275380134583, "learning_rate": 4.7530117846732224e-05, "loss": 1.4423, "step": 9540 }, { "epoch": 2.86, "grad_norm": 1.992976427078247, "learning_rate": 4.75275709455449e-05, "loss": 1.4572, "step": 9545 }, { "epoch": 2.86, "grad_norm": 1.182762622833252, "learning_rate": 4.752502280018324e-05, "loss": 1.3492, "step": 9550 }, { "epoch": 2.86, "grad_norm": 1.1004512310028076, "learning_rate": 4.7522473410787985e-05, "loss": 1.3265, "step": 9555 }, { "epoch": 2.86, "grad_norm": 0.9491164684295654, "learning_rate": 4.751992277749994e-05, "loss": 1.2525, "step": 9560 }, { "epoch": 2.86, "grad_norm": 1.315471887588501, "learning_rate": 4.751737090045996e-05, "loss": 1.3748, "step": 9565 }, { "epoch": 2.86, "grad_norm": 1.0210347175598145, "learning_rate": 4.7514817779809e-05, "loss": 1.3718, "step": 9570 }, { "epoch": 2.86, "grad_norm": 1.1683263778686523, "learning_rate": 4.7512263415688054e-05, "loss": 1.3103, "step": 9575 }, { "epoch": 2.87, "grad_norm": 0.9375793933868408, "learning_rate": 4.75097078082382e-05, "loss": 1.4674, "step": 9580 }, { "epoch": 2.87, "grad_norm": 1.5116485357284546, "learning_rate": 4.750715095760058e-05, "loss": 1.3561, "step": 9585 }, { "epoch": 2.87, "grad_norm": 1.965453028678894, "learning_rate": 4.7504592863916405e-05, "loss": 1.4002, "step": 9590 }, { "epoch": 2.87, "grad_norm": 1.2619178295135498, "learning_rate": 4.750203352732696e-05, "loss": 1.2696, "step": 9595 }, { "epoch": 2.87, "grad_norm": 2.385756731033325, "learning_rate": 4.749947294797359e-05, "loss": 1.4395, "step": 9600 }, { "epoch": 2.87, "grad_norm": 1.714622139930725, "learning_rate": 4.749691112599772e-05, "loss": 1.1337, "step": 9605 }, { "epoch": 2.88, "grad_norm": 0.6344813704490662, "learning_rate": 4.7494348061540835e-05, "loss": 1.3205, "step": 9610 }, { "epoch": 2.88, "grad_norm": 2.1575703620910645, "learning_rate": 4.749178375474448e-05, "loss": 1.325, "step": 9615 }, { "epoch": 2.88, "grad_norm": 1.1499351263046265, "learning_rate": 4.7489218205750295e-05, "loss": 1.3551, "step": 9620 }, { "epoch": 2.88, "grad_norm": 2.3958709239959717, "learning_rate": 4.7486651414699965e-05, "loss": 1.2264, "step": 9625 }, { "epoch": 2.88, "grad_norm": 1.359483242034912, "learning_rate": 4.748408338173525e-05, "loss": 1.3342, "step": 9630 }, { "epoch": 2.88, "grad_norm": 1.6914485692977905, "learning_rate": 4.7481514106997975e-05, "loss": 1.4204, "step": 9635 }, { "epoch": 2.88, "grad_norm": 3.1051025390625, "learning_rate": 4.747894359063005e-05, "loss": 1.273, "step": 9640 }, { "epoch": 2.89, "grad_norm": 1.179664134979248, "learning_rate": 4.747637183277343e-05, "loss": 1.1155, "step": 9645 }, { "epoch": 2.89, "grad_norm": 1.4367886781692505, "learning_rate": 4.747379883357016e-05, "loss": 1.2716, "step": 9650 }, { "epoch": 2.89, "grad_norm": 0.8933409452438354, "learning_rate": 4.7471224593162346e-05, "loss": 1.2237, "step": 9655 }, { "epoch": 2.89, "grad_norm": 1.1432609558105469, "learning_rate": 4.7468649111692145e-05, "loss": 1.4141, "step": 9660 }, { "epoch": 2.89, "grad_norm": 1.6368004083633423, "learning_rate": 4.746607238930182e-05, "loss": 1.3289, "step": 9665 }, { "epoch": 2.89, "grad_norm": 1.7630443572998047, "learning_rate": 4.746349442613366e-05, "loss": 1.2229, "step": 9670 }, { "epoch": 2.89, "grad_norm": 1.9641880989074707, "learning_rate": 4.7460915222330054e-05, "loss": 1.2851, "step": 9675 }, { "epoch": 2.9, "grad_norm": 1.504604458808899, "learning_rate": 4.7458334778033446e-05, "loss": 1.2702, "step": 9680 }, { "epoch": 2.9, "grad_norm": 1.346604585647583, "learning_rate": 4.745575309338636e-05, "loss": 1.3684, "step": 9685 }, { "epoch": 2.9, "grad_norm": 1.1776371002197266, "learning_rate": 4.745317016853137e-05, "loss": 1.2718, "step": 9690 }, { "epoch": 2.9, "grad_norm": 0.8132845163345337, "learning_rate": 4.745058600361112e-05, "loss": 1.5028, "step": 9695 }, { "epoch": 2.9, "grad_norm": 3.2717533111572266, "learning_rate": 4.7448000598768346e-05, "loss": 1.2721, "step": 9700 }, { "epoch": 2.9, "grad_norm": 1.1330522298812866, "learning_rate": 4.7445413954145834e-05, "loss": 1.1869, "step": 9705 }, { "epoch": 2.91, "grad_norm": 1.326873779296875, "learning_rate": 4.744282606988645e-05, "loss": 1.2496, "step": 9710 }, { "epoch": 2.91, "grad_norm": 0.6587218642234802, "learning_rate": 4.74402369461331e-05, "loss": 1.2123, "step": 9715 }, { "epoch": 2.91, "grad_norm": 1.4744004011154175, "learning_rate": 4.7437646583028784e-05, "loss": 1.4327, "step": 9720 }, { "epoch": 2.91, "grad_norm": 2.3345212936401367, "learning_rate": 4.7435054980716576e-05, "loss": 1.2863, "step": 9725 }, { "epoch": 2.91, "grad_norm": 2.557159185409546, "learning_rate": 4.74324621393396e-05, "loss": 1.4434, "step": 9730 }, { "epoch": 2.91, "grad_norm": 1.1000850200653076, "learning_rate": 4.742986805904106e-05, "loss": 1.4368, "step": 9735 }, { "epoch": 2.91, "grad_norm": 1.9850451946258545, "learning_rate": 4.742727273996422e-05, "loss": 1.3663, "step": 9740 }, { "epoch": 2.92, "grad_norm": 1.2970614433288574, "learning_rate": 4.7424676182252414e-05, "loss": 1.1515, "step": 9745 }, { "epoch": 2.92, "grad_norm": 1.1286096572875977, "learning_rate": 4.742207838604906e-05, "loss": 1.4357, "step": 9750 }, { "epoch": 2.92, "grad_norm": 2.1564157009124756, "learning_rate": 4.741947935149762e-05, "loss": 1.29, "step": 9755 }, { "epoch": 2.92, "grad_norm": 1.436423897743225, "learning_rate": 4.741687907874164e-05, "loss": 1.3875, "step": 9760 }, { "epoch": 2.92, "grad_norm": 2.328244924545288, "learning_rate": 4.7414277567924723e-05, "loss": 1.2351, "step": 9765 }, { "epoch": 2.92, "grad_norm": 1.7096843719482422, "learning_rate": 4.741167481919056e-05, "loss": 1.3325, "step": 9770 }, { "epoch": 2.92, "grad_norm": 1.5687915086746216, "learning_rate": 4.740907083268289e-05, "loss": 1.3996, "step": 9775 }, { "epoch": 2.93, "grad_norm": 0.8579280376434326, "learning_rate": 4.7406465608545534e-05, "loss": 1.4976, "step": 9780 }, { "epoch": 2.93, "grad_norm": 1.4446935653686523, "learning_rate": 4.740385914692237e-05, "loss": 1.2704, "step": 9785 }, { "epoch": 2.93, "grad_norm": 0.7776928544044495, "learning_rate": 4.7401251447957354e-05, "loss": 1.4133, "step": 9790 }, { "epoch": 2.93, "grad_norm": 0.9897927641868591, "learning_rate": 4.73986425117945e-05, "loss": 1.3766, "step": 9795 }, { "epoch": 2.93, "grad_norm": 1.700108528137207, "learning_rate": 4.739603233857791e-05, "loss": 1.451, "step": 9800 }, { "epoch": 2.93, "grad_norm": 0.8862264156341553, "learning_rate": 4.7393420928451733e-05, "loss": 1.3687, "step": 9805 }, { "epoch": 2.94, "grad_norm": 1.4384057521820068, "learning_rate": 4.739080828156019e-05, "loss": 1.3752, "step": 9810 }, { "epoch": 2.94, "grad_norm": 0.7352772951126099, "learning_rate": 4.738819439804758e-05, "loss": 1.2511, "step": 9815 }, { "epoch": 2.94, "grad_norm": 0.937332272529602, "learning_rate": 4.738557927805827e-05, "loss": 1.3718, "step": 9820 }, { "epoch": 2.94, "grad_norm": 1.5250344276428223, "learning_rate": 4.738296292173668e-05, "loss": 1.3754, "step": 9825 }, { "epoch": 2.94, "grad_norm": 1.4641368389129639, "learning_rate": 4.7380345329227315e-05, "loss": 1.4706, "step": 9830 }, { "epoch": 2.94, "grad_norm": 2.3265063762664795, "learning_rate": 4.737772650067474e-05, "loss": 1.3016, "step": 9835 }, { "epoch": 2.94, "grad_norm": 1.2815556526184082, "learning_rate": 4.737510643622359e-05, "loss": 1.4134, "step": 9840 }, { "epoch": 2.95, "grad_norm": 1.8234738111495972, "learning_rate": 4.7372485136018577e-05, "loss": 1.2904, "step": 9845 }, { "epoch": 2.95, "grad_norm": 1.526788353919983, "learning_rate": 4.736986260020445e-05, "loss": 1.222, "step": 9850 }, { "epoch": 2.95, "grad_norm": 2.026932716369629, "learning_rate": 4.736723882892607e-05, "loss": 1.3834, "step": 9855 }, { "epoch": 2.95, "grad_norm": 1.5664438009262085, "learning_rate": 4.736461382232835e-05, "loss": 1.4893, "step": 9860 }, { "epoch": 2.95, "grad_norm": 0.8656167387962341, "learning_rate": 4.736198758055624e-05, "loss": 1.2126, "step": 9865 }, { "epoch": 2.95, "grad_norm": 2.0590291023254395, "learning_rate": 4.735936010375481e-05, "loss": 1.504, "step": 9870 }, { "epoch": 2.95, "grad_norm": 1.066401481628418, "learning_rate": 4.7356731392069154e-05, "loss": 1.3136, "step": 9875 }, { "epoch": 2.96, "grad_norm": 1.326509952545166, "learning_rate": 4.7354101445644475e-05, "loss": 1.2444, "step": 9880 }, { "epoch": 2.96, "grad_norm": 1.4632080793380737, "learning_rate": 4.7351470264625995e-05, "loss": 1.4441, "step": 9885 }, { "epoch": 2.96, "grad_norm": 3.273226261138916, "learning_rate": 4.734883784915905e-05, "loss": 1.2485, "step": 9890 }, { "epoch": 2.96, "grad_norm": 1.4411187171936035, "learning_rate": 4.734620419938902e-05, "loss": 1.2447, "step": 9895 }, { "epoch": 2.96, "grad_norm": 1.597489833831787, "learning_rate": 4.734356931546137e-05, "loss": 1.3136, "step": 9900 }, { "epoch": 2.96, "grad_norm": 1.6306302547454834, "learning_rate": 4.7340933197521595e-05, "loss": 1.4123, "step": 9905 }, { "epoch": 2.96, "grad_norm": 1.817524790763855, "learning_rate": 4.7338295845715316e-05, "loss": 1.4538, "step": 9910 }, { "epoch": 2.97, "grad_norm": 1.2677677869796753, "learning_rate": 4.733565726018816e-05, "loss": 1.2595, "step": 9915 }, { "epoch": 2.97, "grad_norm": 1.2904586791992188, "learning_rate": 4.7333017441085884e-05, "loss": 1.3257, "step": 9920 }, { "epoch": 2.97, "grad_norm": 1.0129778385162354, "learning_rate": 4.733037638855427e-05, "loss": 1.2028, "step": 9925 }, { "epoch": 2.97, "grad_norm": 0.7746032476425171, "learning_rate": 4.732773410273917e-05, "loss": 1.174, "step": 9930 }, { "epoch": 2.97, "grad_norm": 2.065194845199585, "learning_rate": 4.732509058378653e-05, "loss": 1.2716, "step": 9935 }, { "epoch": 2.97, "grad_norm": 1.3352677822113037, "learning_rate": 4.732244583184234e-05, "loss": 1.4513, "step": 9940 }, { "epoch": 2.98, "grad_norm": 1.7781025171279907, "learning_rate": 4.731979984705267e-05, "loss": 1.3069, "step": 9945 }, { "epoch": 2.98, "grad_norm": 1.0501564741134644, "learning_rate": 4.731715262956365e-05, "loss": 1.3894, "step": 9950 }, { "epoch": 2.98, "grad_norm": 2.136993885040283, "learning_rate": 4.7314504179521505e-05, "loss": 1.3747, "step": 9955 }, { "epoch": 2.98, "grad_norm": 1.8982741832733154, "learning_rate": 4.7311854497072474e-05, "loss": 1.4344, "step": 9960 }, { "epoch": 2.98, "grad_norm": 2.735962390899658, "learning_rate": 4.730920358236291e-05, "loss": 1.3851, "step": 9965 }, { "epoch": 2.98, "grad_norm": 1.7903727293014526, "learning_rate": 4.730655143553922e-05, "loss": 1.3806, "step": 9970 }, { "epoch": 2.98, "grad_norm": 1.4778308868408203, "learning_rate": 4.7303898056747895e-05, "loss": 1.312, "step": 9975 }, { "epoch": 2.99, "grad_norm": 4.839424133300781, "learning_rate": 4.730124344613545e-05, "loss": 1.3264, "step": 9980 }, { "epoch": 2.99, "grad_norm": 1.6125905513763428, "learning_rate": 4.729858760384851e-05, "loss": 1.3275, "step": 9985 }, { "epoch": 2.99, "grad_norm": 1.407729148864746, "learning_rate": 4.7295930530033765e-05, "loss": 1.401, "step": 9990 }, { "epoch": 2.99, "grad_norm": 0.9895592331886292, "learning_rate": 4.729327222483795e-05, "loss": 1.3163, "step": 9995 }, { "epoch": 2.99, "grad_norm": 1.6113483905792236, "learning_rate": 4.729061268840788e-05, "loss": 1.3118, "step": 10000 }, { "epoch": 2.99, "grad_norm": 2.3144493103027344, "learning_rate": 4.728795192089044e-05, "loss": 1.4342, "step": 10005 }, { "epoch": 2.99, "grad_norm": 2.2775607109069824, "learning_rate": 4.728528992243258e-05, "loss": 1.3856, "step": 10010 }, { "epoch": 3.0, "grad_norm": 0.822791576385498, "learning_rate": 4.728262669318132e-05, "loss": 1.3132, "step": 10015 }, { "epoch": 3.0, "grad_norm": 0.9759201407432556, "learning_rate": 4.727996223328376e-05, "loss": 1.3979, "step": 10020 }, { "epoch": 3.0, "grad_norm": 1.3269437551498413, "learning_rate": 4.727729654288704e-05, "loss": 1.3228, "step": 10025 }, { "epoch": 3.0, "grad_norm": 0.9397942423820496, "learning_rate": 4.7274629622138384e-05, "loss": 1.2653, "step": 10030 }, { "epoch": 3.0, "grad_norm": 0.8297321200370789, "learning_rate": 4.7271961471185086e-05, "loss": 1.2575, "step": 10035 }, { "epoch": 3.0, "grad_norm": 1.417336106300354, "learning_rate": 4.7269292090174514e-05, "loss": 1.2264, "step": 10040 }, { "epoch": 3.01, "grad_norm": 2.3243861198425293, "learning_rate": 4.7266621479254084e-05, "loss": 1.2769, "step": 10045 }, { "epoch": 3.01, "grad_norm": 0.8861342072486877, "learning_rate": 4.726394963857129e-05, "loss": 1.137, "step": 10050 }, { "epoch": 3.01, "grad_norm": 1.0285983085632324, "learning_rate": 4.72612765682737e-05, "loss": 1.3641, "step": 10055 }, { "epoch": 3.01, "grad_norm": 1.3051286935806274, "learning_rate": 4.725860226850896e-05, "loss": 1.2869, "step": 10060 }, { "epoch": 3.01, "grad_norm": 1.4701013565063477, "learning_rate": 4.725592673942473e-05, "loss": 1.3181, "step": 10065 }, { "epoch": 3.01, "grad_norm": 1.0764509439468384, "learning_rate": 4.725324998116881e-05, "loss": 1.1691, "step": 10070 }, { "epoch": 3.01, "grad_norm": 0.9032073616981506, "learning_rate": 4.7250571993889025e-05, "loss": 1.181, "step": 10075 }, { "epoch": 3.02, "grad_norm": 1.8329408168792725, "learning_rate": 4.724789277773328e-05, "loss": 1.2904, "step": 10080 }, { "epoch": 3.02, "grad_norm": 1.2701138257980347, "learning_rate": 4.7245212332849544e-05, "loss": 1.3036, "step": 10085 }, { "epoch": 3.02, "grad_norm": 1.293056607246399, "learning_rate": 4.7242530659385845e-05, "loss": 1.4167, "step": 10090 }, { "epoch": 3.02, "grad_norm": 0.7726661562919617, "learning_rate": 4.72398477574903e-05, "loss": 1.3607, "step": 10095 }, { "epoch": 3.02, "grad_norm": 1.5324546098709106, "learning_rate": 4.7237163627311084e-05, "loss": 1.475, "step": 10100 }, { "epoch": 3.02, "grad_norm": 1.288621187210083, "learning_rate": 4.723447826899644e-05, "loss": 1.3092, "step": 10105 }, { "epoch": 3.02, "grad_norm": 1.084633469581604, "learning_rate": 4.723179168269466e-05, "loss": 1.2224, "step": 10110 }, { "epoch": 3.03, "grad_norm": 1.096497654914856, "learning_rate": 4.7229103868554136e-05, "loss": 1.2714, "step": 10115 }, { "epoch": 3.03, "grad_norm": 1.8230210542678833, "learning_rate": 4.722641482672332e-05, "loss": 1.3204, "step": 10120 }, { "epoch": 3.03, "grad_norm": 2.226288080215454, "learning_rate": 4.7223724557350714e-05, "loss": 1.3604, "step": 10125 }, { "epoch": 3.03, "grad_norm": 0.8786224722862244, "learning_rate": 4.7221033060584897e-05, "loss": 1.3312, "step": 10130 }, { "epoch": 3.03, "grad_norm": 0.790179431438446, "learning_rate": 4.7218340336574516e-05, "loss": 1.2433, "step": 10135 }, { "epoch": 3.03, "grad_norm": 0.7193413376808167, "learning_rate": 4.7215646385468304e-05, "loss": 1.1751, "step": 10140 }, { "epoch": 3.04, "grad_norm": 1.1704049110412598, "learning_rate": 4.721295120741503e-05, "loss": 1.2481, "step": 10145 }, { "epoch": 3.04, "grad_norm": 1.1936416625976562, "learning_rate": 4.7210254802563547e-05, "loss": 1.382, "step": 10150 }, { "epoch": 3.04, "grad_norm": 1.2714142799377441, "learning_rate": 4.7207557171062784e-05, "loss": 1.3151, "step": 10155 }, { "epoch": 3.04, "grad_norm": 1.973885178565979, "learning_rate": 4.7204858313061715e-05, "loss": 1.3476, "step": 10160 }, { "epoch": 3.04, "grad_norm": 1.704947829246521, "learning_rate": 4.7202158228709404e-05, "loss": 1.2753, "step": 10165 }, { "epoch": 3.04, "grad_norm": 1.6987488269805908, "learning_rate": 4.719945691815498e-05, "loss": 1.3038, "step": 10170 }, { "epoch": 3.04, "grad_norm": 1.8484985828399658, "learning_rate": 4.719675438154761e-05, "loss": 1.2721, "step": 10175 }, { "epoch": 3.05, "grad_norm": 1.5991469621658325, "learning_rate": 4.719405061903658e-05, "loss": 1.2864, "step": 10180 }, { "epoch": 3.05, "grad_norm": 0.9577586054801941, "learning_rate": 4.71913456307712e-05, "loss": 1.2446, "step": 10185 }, { "epoch": 3.05, "grad_norm": 1.2344039678573608, "learning_rate": 4.718863941690087e-05, "loss": 1.3046, "step": 10190 }, { "epoch": 3.05, "grad_norm": 1.5823438167572021, "learning_rate": 4.7185931977575046e-05, "loss": 1.2316, "step": 10195 }, { "epoch": 3.05, "grad_norm": 1.7910969257354736, "learning_rate": 4.7183223312943257e-05, "loss": 1.3293, "step": 10200 }, { "epoch": 3.05, "grad_norm": 1.5367566347122192, "learning_rate": 4.7180513423155105e-05, "loss": 1.2248, "step": 10205 }, { "epoch": 3.05, "grad_norm": 1.397963047027588, "learning_rate": 4.717780230836025e-05, "loss": 1.3568, "step": 10210 }, { "epoch": 3.06, "grad_norm": 1.2412103414535522, "learning_rate": 4.717508996870843e-05, "loss": 1.2943, "step": 10215 }, { "epoch": 3.06, "grad_norm": 2.0877139568328857, "learning_rate": 4.7172376404349436e-05, "loss": 1.415, "step": 10220 }, { "epoch": 3.06, "grad_norm": 1.540336012840271, "learning_rate": 4.716966161543315e-05, "loss": 1.1981, "step": 10225 }, { "epoch": 3.06, "grad_norm": 1.3535550832748413, "learning_rate": 4.716694560210949e-05, "loss": 1.2493, "step": 10230 }, { "epoch": 3.06, "grad_norm": 1.6786630153656006, "learning_rate": 4.7164228364528464e-05, "loss": 1.2774, "step": 10235 }, { "epoch": 3.06, "grad_norm": 1.0593065023422241, "learning_rate": 4.716150990284015e-05, "loss": 1.1643, "step": 10240 }, { "epoch": 3.07, "grad_norm": 1.656699299812317, "learning_rate": 4.715879021719467e-05, "loss": 1.2421, "step": 10245 }, { "epoch": 3.07, "grad_norm": 1.0437387228012085, "learning_rate": 4.7156069307742244e-05, "loss": 1.2925, "step": 10250 }, { "epoch": 3.07, "grad_norm": 1.7811616659164429, "learning_rate": 4.7153347174633145e-05, "loss": 1.3529, "step": 10255 }, { "epoch": 3.07, "grad_norm": 1.5187066793441772, "learning_rate": 4.71506238180177e-05, "loss": 1.5044, "step": 10260 }, { "epoch": 3.07, "grad_norm": 2.582772731781006, "learning_rate": 4.714789923804633e-05, "loss": 1.4396, "step": 10265 }, { "epoch": 3.07, "grad_norm": 1.7767497301101685, "learning_rate": 4.714517343486951e-05, "loss": 1.2826, "step": 10270 }, { "epoch": 3.07, "grad_norm": 1.2945784330368042, "learning_rate": 4.7142446408637774e-05, "loss": 1.2212, "step": 10275 }, { "epoch": 3.08, "grad_norm": 0.8587417006492615, "learning_rate": 4.7139718159501747e-05, "loss": 1.2171, "step": 10280 }, { "epoch": 3.08, "grad_norm": 3.030939817428589, "learning_rate": 4.71369886876121e-05, "loss": 1.3301, "step": 10285 }, { "epoch": 3.08, "grad_norm": 1.777191162109375, "learning_rate": 4.7134257993119564e-05, "loss": 1.2228, "step": 10290 }, { "epoch": 3.08, "grad_norm": 1.1202077865600586, "learning_rate": 4.713152607617497e-05, "loss": 1.2215, "step": 10295 }, { "epoch": 3.08, "grad_norm": 0.6653339862823486, "learning_rate": 4.71287929369292e-05, "loss": 1.4115, "step": 10300 }, { "epoch": 3.08, "grad_norm": 1.3566327095031738, "learning_rate": 4.712605857553319e-05, "loss": 1.332, "step": 10305 }, { "epoch": 3.08, "grad_norm": 1.673338532447815, "learning_rate": 4.7123322992137975e-05, "loss": 1.4227, "step": 10310 }, { "epoch": 3.09, "grad_norm": 0.8734028339385986, "learning_rate": 4.712058618689462e-05, "loss": 1.2329, "step": 10315 }, { "epoch": 3.09, "grad_norm": 1.6159021854400635, "learning_rate": 4.7117848159954294e-05, "loss": 1.1735, "step": 10320 }, { "epoch": 3.09, "grad_norm": 1.456439733505249, "learning_rate": 4.7115108911468194e-05, "loss": 1.3611, "step": 10325 }, { "epoch": 3.09, "grad_norm": 1.3023260831832886, "learning_rate": 4.7112368441587615e-05, "loss": 1.3157, "step": 10330 }, { "epoch": 3.09, "grad_norm": 0.9196366667747498, "learning_rate": 4.710962675046391e-05, "loss": 1.3092, "step": 10335 }, { "epoch": 3.09, "grad_norm": 0.8758362531661987, "learning_rate": 4.7106883838248505e-05, "loss": 1.4856, "step": 10340 }, { "epoch": 3.1, "grad_norm": 1.404257893562317, "learning_rate": 4.710413970509289e-05, "loss": 1.3546, "step": 10345 }, { "epoch": 3.1, "grad_norm": 2.7018842697143555, "learning_rate": 4.710139435114861e-05, "loss": 1.196, "step": 10350 }, { "epoch": 3.1, "grad_norm": 1.2335234880447388, "learning_rate": 4.709864777656729e-05, "loss": 1.145, "step": 10355 }, { "epoch": 3.1, "grad_norm": 2.3632590770721436, "learning_rate": 4.709589998150063e-05, "loss": 1.4927, "step": 10360 }, { "epoch": 3.1, "grad_norm": 0.9068330526351929, "learning_rate": 4.709315096610038e-05, "loss": 1.2923, "step": 10365 }, { "epoch": 3.1, "grad_norm": 1.093493103981018, "learning_rate": 4.709040073051837e-05, "loss": 1.2462, "step": 10370 }, { "epoch": 3.1, "grad_norm": 0.9117778539657593, "learning_rate": 4.7087649274906475e-05, "loss": 1.2301, "step": 10375 }, { "epoch": 3.11, "grad_norm": 1.3600761890411377, "learning_rate": 4.7084896599416685e-05, "loss": 1.3659, "step": 10380 }, { "epoch": 3.11, "grad_norm": 2.149157762527466, "learning_rate": 4.7082142704200996e-05, "loss": 1.2298, "step": 10385 }, { "epoch": 3.11, "grad_norm": 1.4319911003112793, "learning_rate": 4.707938758941153e-05, "loss": 1.2099, "step": 10390 }, { "epoch": 3.11, "grad_norm": 0.8007309436798096, "learning_rate": 4.7076631255200436e-05, "loss": 1.208, "step": 10395 }, { "epoch": 3.11, "grad_norm": 2.5758299827575684, "learning_rate": 4.707387370171995e-05, "loss": 1.457, "step": 10400 }, { "epoch": 3.11, "grad_norm": 1.3891574144363403, "learning_rate": 4.707111492912235e-05, "loss": 1.1751, "step": 10405 }, { "epoch": 3.11, "grad_norm": 1.3793649673461914, "learning_rate": 4.7068354937560026e-05, "loss": 1.2248, "step": 10410 }, { "epoch": 3.12, "grad_norm": 1.7429753541946411, "learning_rate": 4.7065593727185395e-05, "loss": 1.3831, "step": 10415 }, { "epoch": 3.12, "grad_norm": 0.9534806609153748, "learning_rate": 4.706283129815095e-05, "loss": 1.3393, "step": 10420 }, { "epoch": 3.12, "grad_norm": 0.9170734286308289, "learning_rate": 4.706006765060928e-05, "loss": 1.2613, "step": 10425 }, { "epoch": 3.12, "grad_norm": 1.9768500328063965, "learning_rate": 4.7057302784713e-05, "loss": 1.3073, "step": 10430 }, { "epoch": 3.12, "grad_norm": 1.7326947450637817, "learning_rate": 4.705453670061481e-05, "loss": 1.2648, "step": 10435 }, { "epoch": 3.12, "grad_norm": 4.0265069007873535, "learning_rate": 4.7051769398467484e-05, "loss": 1.3263, "step": 10440 }, { "epoch": 3.13, "grad_norm": 1.0905771255493164, "learning_rate": 4.7049000878423856e-05, "loss": 1.1149, "step": 10445 }, { "epoch": 3.13, "grad_norm": 2.596532106399536, "learning_rate": 4.7046231140636826e-05, "loss": 1.3235, "step": 10450 }, { "epoch": 3.13, "grad_norm": 1.497429370880127, "learning_rate": 4.704346018525937e-05, "loss": 1.3615, "step": 10455 }, { "epoch": 3.13, "grad_norm": 0.9473041892051697, "learning_rate": 4.704068801244452e-05, "loss": 1.2733, "step": 10460 }, { "epoch": 3.13, "grad_norm": 3.628065824508667, "learning_rate": 4.703791462234537e-05, "loss": 1.3351, "step": 10465 }, { "epoch": 3.13, "grad_norm": 0.9832010865211487, "learning_rate": 4.703514001511512e-05, "loss": 1.3246, "step": 10470 }, { "epoch": 3.13, "grad_norm": 1.0642294883728027, "learning_rate": 4.7032364190906985e-05, "loss": 1.309, "step": 10475 }, { "epoch": 3.14, "grad_norm": 2.5547728538513184, "learning_rate": 4.702958714987427e-05, "loss": 1.2542, "step": 10480 }, { "epoch": 3.14, "grad_norm": 2.006378173828125, "learning_rate": 4.702680889217036e-05, "loss": 1.2813, "step": 10485 }, { "epoch": 3.14, "grad_norm": 0.6523733735084534, "learning_rate": 4.702402941794869e-05, "loss": 1.2206, "step": 10490 }, { "epoch": 3.14, "grad_norm": 1.642492651939392, "learning_rate": 4.702124872736277e-05, "loss": 1.3215, "step": 10495 }, { "epoch": 3.14, "grad_norm": 3.4225122928619385, "learning_rate": 4.7018466820566174e-05, "loss": 1.2342, "step": 10500 }, { "epoch": 3.14, "grad_norm": 1.2977136373519897, "learning_rate": 4.701568369771254e-05, "loss": 1.3561, "step": 10505 }, { "epoch": 3.14, "grad_norm": 2.203577995300293, "learning_rate": 4.701289935895558e-05, "loss": 1.2129, "step": 10510 }, { "epoch": 3.15, "grad_norm": 1.4925588369369507, "learning_rate": 4.701011380444907e-05, "loss": 1.2579, "step": 10515 }, { "epoch": 3.15, "grad_norm": 2.624371290206909, "learning_rate": 4.700732703434685e-05, "loss": 1.4046, "step": 10520 }, { "epoch": 3.15, "grad_norm": 1.6034959554672241, "learning_rate": 4.7004539048802834e-05, "loss": 1.2614, "step": 10525 }, { "epoch": 3.15, "grad_norm": 1.3191356658935547, "learning_rate": 4.7001749847971e-05, "loss": 1.2469, "step": 10530 }, { "epoch": 3.15, "grad_norm": 1.2245357036590576, "learning_rate": 4.6998959432005393e-05, "loss": 1.3599, "step": 10535 }, { "epoch": 3.15, "grad_norm": 0.8731427192687988, "learning_rate": 4.699616780106012e-05, "loss": 1.2735, "step": 10540 }, { "epoch": 3.15, "grad_norm": 2.4979584217071533, "learning_rate": 4.699337495528937e-05, "loss": 1.3179, "step": 10545 }, { "epoch": 3.16, "grad_norm": 0.922391414642334, "learning_rate": 4.699058089484737e-05, "loss": 1.3218, "step": 10550 }, { "epoch": 3.16, "grad_norm": 1.5169897079467773, "learning_rate": 4.6987785619888455e-05, "loss": 1.3695, "step": 10555 }, { "epoch": 3.16, "grad_norm": 1.7197904586791992, "learning_rate": 4.698498913056699e-05, "loss": 1.3514, "step": 10560 }, { "epoch": 3.16, "grad_norm": 1.886755108833313, "learning_rate": 4.698219142703743e-05, "loss": 1.3594, "step": 10565 }, { "epoch": 3.16, "grad_norm": 2.066739082336426, "learning_rate": 4.6979392509454286e-05, "loss": 1.2485, "step": 10570 }, { "epoch": 3.16, "grad_norm": 1.8264986276626587, "learning_rate": 4.697659237797214e-05, "loss": 1.2989, "step": 10575 }, { "epoch": 3.17, "grad_norm": 1.5807194709777832, "learning_rate": 4.697379103274564e-05, "loss": 1.1532, "step": 10580 }, { "epoch": 3.17, "grad_norm": 2.379920244216919, "learning_rate": 4.69709884739295e-05, "loss": 1.1664, "step": 10585 }, { "epoch": 3.17, "grad_norm": 2.331644296646118, "learning_rate": 4.696818470167851e-05, "loss": 1.2713, "step": 10590 }, { "epoch": 3.17, "grad_norm": 2.402665376663208, "learning_rate": 4.6965379716147504e-05, "loss": 1.3309, "step": 10595 }, { "epoch": 3.17, "grad_norm": 3.28230881690979, "learning_rate": 4.6962573517491414e-05, "loss": 1.2569, "step": 10600 }, { "epoch": 3.17, "grad_norm": 1.6851478815078735, "learning_rate": 4.6959766105865225e-05, "loss": 1.4249, "step": 10605 }, { "epoch": 3.17, "grad_norm": 3.4568557739257812, "learning_rate": 4.695695748142397e-05, "loss": 1.4145, "step": 10610 }, { "epoch": 3.18, "grad_norm": 3.6388516426086426, "learning_rate": 4.695414764432278e-05, "loss": 1.3806, "step": 10615 }, { "epoch": 3.18, "grad_norm": 2.0879287719726562, "learning_rate": 4.695133659471683e-05, "loss": 1.2937, "step": 10620 }, { "epoch": 3.18, "grad_norm": 1.9023692607879639, "learning_rate": 4.694852433276138e-05, "loss": 1.3197, "step": 10625 }, { "epoch": 3.18, "grad_norm": 3.421626329421997, "learning_rate": 4.6945710858611746e-05, "loss": 1.2769, "step": 10630 }, { "epoch": 3.18, "grad_norm": 1.5215307474136353, "learning_rate": 4.694289617242331e-05, "loss": 1.3148, "step": 10635 }, { "epoch": 3.18, "grad_norm": 1.3588393926620483, "learning_rate": 4.694008027435154e-05, "loss": 1.2765, "step": 10640 }, { "epoch": 3.18, "grad_norm": 1.517864465713501, "learning_rate": 4.6937263164551926e-05, "loss": 1.1829, "step": 10645 }, { "epoch": 3.19, "grad_norm": 1.18218994140625, "learning_rate": 4.693444484318008e-05, "loss": 1.3342, "step": 10650 }, { "epoch": 3.19, "grad_norm": 0.9616156816482544, "learning_rate": 4.693162531039164e-05, "loss": 1.4308, "step": 10655 }, { "epoch": 3.19, "grad_norm": 1.3494954109191895, "learning_rate": 4.692880456634233e-05, "loss": 1.2483, "step": 10660 }, { "epoch": 3.19, "grad_norm": 1.6697038412094116, "learning_rate": 4.6925982611187934e-05, "loss": 1.2745, "step": 10665 }, { "epoch": 3.19, "grad_norm": 1.6610711812973022, "learning_rate": 4.692315944508432e-05, "loss": 1.3191, "step": 10670 }, { "epoch": 3.19, "grad_norm": 2.5130321979522705, "learning_rate": 4.692033506818739e-05, "loss": 1.0889, "step": 10675 }, { "epoch": 3.2, "grad_norm": 1.1333353519439697, "learning_rate": 4.6917509480653146e-05, "loss": 1.1744, "step": 10680 }, { "epoch": 3.2, "grad_norm": 0.9024247527122498, "learning_rate": 4.6914682682637626e-05, "loss": 1.2681, "step": 10685 }, { "epoch": 3.2, "grad_norm": 1.6913889646530151, "learning_rate": 4.6911854674296964e-05, "loss": 1.246, "step": 10690 }, { "epoch": 3.2, "grad_norm": 2.867292881011963, "learning_rate": 4.690902545578735e-05, "loss": 1.3205, "step": 10695 }, { "epoch": 3.2, "grad_norm": 1.3072094917297363, "learning_rate": 4.690619502726502e-05, "loss": 1.3059, "step": 10700 }, { "epoch": 3.2, "grad_norm": 1.9709635972976685, "learning_rate": 4.6903363388886325e-05, "loss": 1.321, "step": 10705 }, { "epoch": 3.2, "grad_norm": 1.7038654088974, "learning_rate": 4.6900530540807624e-05, "loss": 1.3357, "step": 10710 }, { "epoch": 3.21, "grad_norm": 0.885414183139801, "learning_rate": 4.68976964831854e-05, "loss": 1.2921, "step": 10715 }, { "epoch": 3.21, "grad_norm": 1.5345251560211182, "learning_rate": 4.689486121617615e-05, "loss": 1.4398, "step": 10720 }, { "epoch": 3.21, "grad_norm": 1.8840970993041992, "learning_rate": 4.689202473993647e-05, "loss": 1.1967, "step": 10725 }, { "epoch": 3.21, "grad_norm": 1.861816167831421, "learning_rate": 4.6889187054623016e-05, "loss": 1.2309, "step": 10730 }, { "epoch": 3.21, "grad_norm": 1.6276249885559082, "learning_rate": 4.688634816039253e-05, "loss": 1.2227, "step": 10735 }, { "epoch": 3.21, "grad_norm": 1.3406356573104858, "learning_rate": 4.688350805740178e-05, "loss": 1.3609, "step": 10740 }, { "epoch": 3.21, "grad_norm": 2.407961130142212, "learning_rate": 4.6880666745807625e-05, "loss": 1.0896, "step": 10745 }, { "epoch": 3.22, "grad_norm": 1.9864040613174438, "learning_rate": 4.687782422576698e-05, "loss": 1.2063, "step": 10750 }, { "epoch": 3.22, "grad_norm": 1.8912273645401, "learning_rate": 4.687498049743685e-05, "loss": 1.2833, "step": 10755 }, { "epoch": 3.22, "grad_norm": 1.490443468093872, "learning_rate": 4.6872135560974285e-05, "loss": 1.3106, "step": 10760 }, { "epoch": 3.22, "grad_norm": 2.050848960876465, "learning_rate": 4.686928941653641e-05, "loss": 1.2577, "step": 10765 }, { "epoch": 3.22, "grad_norm": 1.508426308631897, "learning_rate": 4.686644206428041e-05, "loss": 1.2863, "step": 10770 }, { "epoch": 3.22, "grad_norm": 1.549778699874878, "learning_rate": 4.686359350436354e-05, "loss": 1.1178, "step": 10775 }, { "epoch": 3.23, "grad_norm": 1.004459023475647, "learning_rate": 4.6860743736943134e-05, "loss": 1.3404, "step": 10780 }, { "epoch": 3.23, "grad_norm": 3.65075945854187, "learning_rate": 4.685789276217658e-05, "loss": 1.449, "step": 10785 }, { "epoch": 3.23, "grad_norm": 0.863338053226471, "learning_rate": 4.6855040580221323e-05, "loss": 1.1967, "step": 10790 }, { "epoch": 3.23, "grad_norm": 1.1816414594650269, "learning_rate": 4.685218719123489e-05, "loss": 1.2813, "step": 10795 }, { "epoch": 3.23, "grad_norm": 1.7856791019439697, "learning_rate": 4.6849332595374864e-05, "loss": 1.4305, "step": 10800 }, { "epoch": 3.23, "grad_norm": 1.4339046478271484, "learning_rate": 4.684647679279892e-05, "loss": 1.3854, "step": 10805 }, { "epoch": 3.23, "grad_norm": 0.8699505925178528, "learning_rate": 4.684361978366477e-05, "loss": 1.2915, "step": 10810 }, { "epoch": 3.24, "grad_norm": 1.3646979331970215, "learning_rate": 4.6840761568130206e-05, "loss": 1.3623, "step": 10815 }, { "epoch": 3.24, "grad_norm": 2.1360511779785156, "learning_rate": 4.6837902146353076e-05, "loss": 1.2835, "step": 10820 }, { "epoch": 3.24, "grad_norm": 1.1500669717788696, "learning_rate": 4.683504151849132e-05, "loss": 1.2727, "step": 10825 }, { "epoch": 3.24, "grad_norm": 4.0960187911987305, "learning_rate": 4.68321796847029e-05, "loss": 1.2047, "step": 10830 }, { "epoch": 3.24, "grad_norm": 0.9620679020881653, "learning_rate": 4.68293166451459e-05, "loss": 1.2046, "step": 10835 }, { "epoch": 3.24, "grad_norm": 1.1444592475891113, "learning_rate": 4.6826452399978436e-05, "loss": 1.391, "step": 10840 }, { "epoch": 3.24, "grad_norm": 2.369436740875244, "learning_rate": 4.6823586949358686e-05, "loss": 1.2937, "step": 10845 }, { "epoch": 3.25, "grad_norm": 1.327525019645691, "learning_rate": 4.682072029344492e-05, "loss": 1.4105, "step": 10850 }, { "epoch": 3.25, "grad_norm": 1.3739736080169678, "learning_rate": 4.681785243239545e-05, "loss": 1.4381, "step": 10855 }, { "epoch": 3.25, "grad_norm": 1.522316813468933, "learning_rate": 4.681498336636867e-05, "loss": 1.3176, "step": 10860 }, { "epoch": 3.25, "grad_norm": 1.123241901397705, "learning_rate": 4.681211309552304e-05, "loss": 1.2045, "step": 10865 }, { "epoch": 3.25, "grad_norm": 0.9612917900085449, "learning_rate": 4.680924162001706e-05, "loss": 1.3388, "step": 10870 }, { "epoch": 3.25, "grad_norm": 1.3160079717636108, "learning_rate": 4.680636894000935e-05, "loss": 1.338, "step": 10875 }, { "epoch": 3.26, "grad_norm": 0.9297557473182678, "learning_rate": 4.680349505565854e-05, "loss": 1.2748, "step": 10880 }, { "epoch": 3.26, "grad_norm": 0.926292359828949, "learning_rate": 4.6800619967123373e-05, "loss": 1.3359, "step": 10885 }, { "epoch": 3.26, "grad_norm": 1.0277527570724487, "learning_rate": 4.679774367456261e-05, "loss": 1.2692, "step": 10890 }, { "epoch": 3.26, "grad_norm": 1.3001703023910522, "learning_rate": 4.679486617813513e-05, "loss": 1.2465, "step": 10895 }, { "epoch": 3.26, "grad_norm": 1.934599757194519, "learning_rate": 4.679198747799984e-05, "loss": 1.318, "step": 10900 }, { "epoch": 3.26, "grad_norm": 1.1883105039596558, "learning_rate": 4.678910757431574e-05, "loss": 1.2774, "step": 10905 }, { "epoch": 3.26, "grad_norm": 2.791804075241089, "learning_rate": 4.678622646724188e-05, "loss": 1.3288, "step": 10910 }, { "epoch": 3.27, "grad_norm": 1.5142138004302979, "learning_rate": 4.678334415693737e-05, "loss": 1.444, "step": 10915 }, { "epoch": 3.27, "grad_norm": 1.1314914226531982, "learning_rate": 4.678046064356141e-05, "loss": 1.3735, "step": 10920 }, { "epoch": 3.27, "grad_norm": 2.2148430347442627, "learning_rate": 4.677757592727325e-05, "loss": 1.2055, "step": 10925 }, { "epoch": 3.27, "grad_norm": 1.7565327882766724, "learning_rate": 4.67746900082322e-05, "loss": 1.2262, "step": 10930 }, { "epoch": 3.27, "grad_norm": 4.066123962402344, "learning_rate": 4.677180288659766e-05, "loss": 1.3309, "step": 10935 }, { "epoch": 3.27, "grad_norm": 2.2226760387420654, "learning_rate": 4.676891456252908e-05, "loss": 1.3966, "step": 10940 }, { "epoch": 3.27, "grad_norm": 2.1616628170013428, "learning_rate": 4.676602503618597e-05, "loss": 1.1928, "step": 10945 }, { "epoch": 3.28, "grad_norm": 1.3349906206130981, "learning_rate": 4.6763134307727916e-05, "loss": 1.2268, "step": 10950 }, { "epoch": 3.28, "grad_norm": 1.6452038288116455, "learning_rate": 4.676024237731459e-05, "loss": 1.1698, "step": 10955 }, { "epoch": 3.28, "grad_norm": 1.8451074361801147, "learning_rate": 4.675734924510569e-05, "loss": 1.2763, "step": 10960 }, { "epoch": 3.28, "grad_norm": 1.9175277948379517, "learning_rate": 4.6754454911261005e-05, "loss": 1.3425, "step": 10965 }, { "epoch": 3.28, "grad_norm": 1.0430094003677368, "learning_rate": 4.6751559375940384e-05, "loss": 1.324, "step": 10970 }, { "epoch": 3.28, "grad_norm": 4.597285747528076, "learning_rate": 4.674866263930375e-05, "loss": 1.2731, "step": 10975 }, { "epoch": 3.29, "grad_norm": 1.3398327827453613, "learning_rate": 4.674576470151109e-05, "loss": 1.3967, "step": 10980 }, { "epoch": 3.29, "grad_norm": 1.1260203123092651, "learning_rate": 4.674344548655217e-05, "loss": 1.4702, "step": 10985 }, { "epoch": 3.29, "grad_norm": 1.8859392404556274, "learning_rate": 4.674054538708202e-05, "loss": 1.3604, "step": 10990 }, { "epoch": 3.29, "grad_norm": 1.0552682876586914, "learning_rate": 4.6737644086904156e-05, "loss": 1.2871, "step": 10995 }, { "epoch": 3.29, "grad_norm": 1.1921944618225098, "learning_rate": 4.6734741586178794e-05, "loss": 1.411, "step": 11000 }, { "epoch": 3.29, "grad_norm": 1.534670352935791, "learning_rate": 4.673183788506625e-05, "loss": 1.1321, "step": 11005 }, { "epoch": 3.29, "grad_norm": 0.8128979802131653, "learning_rate": 4.672893298372688e-05, "loss": 1.1416, "step": 11010 }, { "epoch": 3.3, "grad_norm": 1.2440390586853027, "learning_rate": 4.672602688232114e-05, "loss": 1.3895, "step": 11015 }, { "epoch": 3.3, "grad_norm": 0.7895771861076355, "learning_rate": 4.672311958100951e-05, "loss": 1.2575, "step": 11020 }, { "epoch": 3.3, "grad_norm": 8.737719535827637, "learning_rate": 4.672021107995257e-05, "loss": 1.2813, "step": 11025 }, { "epoch": 3.3, "grad_norm": 1.1101980209350586, "learning_rate": 4.671730137931095e-05, "loss": 1.3107, "step": 11030 }, { "epoch": 3.3, "grad_norm": 1.0142308473587036, "learning_rate": 4.671439047924535e-05, "loss": 1.2947, "step": 11035 }, { "epoch": 3.3, "grad_norm": 1.9950822591781616, "learning_rate": 4.671147837991653e-05, "loss": 1.4065, "step": 11040 }, { "epoch": 3.3, "grad_norm": 1.4179012775421143, "learning_rate": 4.670856508148532e-05, "loss": 1.3455, "step": 11045 }, { "epoch": 3.31, "grad_norm": 1.6330708265304565, "learning_rate": 4.670565058411264e-05, "loss": 1.3063, "step": 11050 }, { "epoch": 3.31, "grad_norm": 1.7123674154281616, "learning_rate": 4.6702734887959434e-05, "loss": 1.2452, "step": 11055 }, { "epoch": 3.31, "grad_norm": 1.1619246006011963, "learning_rate": 4.669981799318674e-05, "loss": 1.361, "step": 11060 }, { "epoch": 3.31, "grad_norm": 2.159376382827759, "learning_rate": 4.669689989995565e-05, "loss": 1.2923, "step": 11065 }, { "epoch": 3.31, "grad_norm": 1.8677573204040527, "learning_rate": 4.6693980608427326e-05, "loss": 1.2485, "step": 11070 }, { "epoch": 3.31, "grad_norm": 1.9316105842590332, "learning_rate": 4.669106011876301e-05, "loss": 1.2627, "step": 11075 }, { "epoch": 3.32, "grad_norm": 1.5687687397003174, "learning_rate": 4.6688138431124e-05, "loss": 1.5126, "step": 11080 }, { "epoch": 3.32, "grad_norm": 1.8287721872329712, "learning_rate": 4.6685215545671634e-05, "loss": 1.1865, "step": 11085 }, { "epoch": 3.32, "grad_norm": 1.737056851387024, "learning_rate": 4.668229146256735e-05, "loss": 1.2676, "step": 11090 }, { "epoch": 3.32, "grad_norm": 1.2314867973327637, "learning_rate": 4.667936618197266e-05, "loss": 1.2986, "step": 11095 }, { "epoch": 3.32, "grad_norm": 2.5323379039764404, "learning_rate": 4.66764397040491e-05, "loss": 1.2406, "step": 11100 }, { "epoch": 3.32, "grad_norm": 2.0463790893554688, "learning_rate": 4.667351202895831e-05, "loss": 1.3175, "step": 11105 }, { "epoch": 3.32, "grad_norm": 1.8490899801254272, "learning_rate": 4.6670583156861984e-05, "loss": 1.273, "step": 11110 }, { "epoch": 3.33, "grad_norm": 2.06270694732666, "learning_rate": 4.666765308792187e-05, "loss": 1.2421, "step": 11115 }, { "epoch": 3.33, "grad_norm": 2.100942611694336, "learning_rate": 4.66647218222998e-05, "loss": 1.3169, "step": 11120 }, { "epoch": 3.33, "grad_norm": 2.400592565536499, "learning_rate": 4.666178936015767e-05, "loss": 1.4987, "step": 11125 }, { "epoch": 3.33, "grad_norm": 1.5925202369689941, "learning_rate": 4.665885570165742e-05, "loss": 1.2487, "step": 11130 }, { "epoch": 3.33, "grad_norm": 0.8102724552154541, "learning_rate": 4.665592084696108e-05, "loss": 1.2773, "step": 11135 }, { "epoch": 3.33, "grad_norm": 0.9131859540939331, "learning_rate": 4.665298479623075e-05, "loss": 1.0861, "step": 11140 }, { "epoch": 3.33, "grad_norm": 2.433790445327759, "learning_rate": 4.665004754962857e-05, "loss": 1.1546, "step": 11145 }, { "epoch": 3.34, "grad_norm": 1.100435495376587, "learning_rate": 4.664710910731677e-05, "loss": 1.2528, "step": 11150 }, { "epoch": 3.34, "grad_norm": 3.4649596214294434, "learning_rate": 4.6644169469457635e-05, "loss": 1.3513, "step": 11155 }, { "epoch": 3.34, "grad_norm": 1.005937933921814, "learning_rate": 4.664122863621352e-05, "loss": 1.2872, "step": 11160 }, { "epoch": 3.34, "grad_norm": 1.2074639797210693, "learning_rate": 4.663828660774684e-05, "loss": 1.341, "step": 11165 }, { "epoch": 3.34, "grad_norm": 2.826364040374756, "learning_rate": 4.663534338422009e-05, "loss": 1.2848, "step": 11170 }, { "epoch": 3.34, "grad_norm": 2.759699821472168, "learning_rate": 4.663239896579581e-05, "loss": 1.4064, "step": 11175 }, { "epoch": 3.34, "grad_norm": 2.0414791107177734, "learning_rate": 4.662945335263662e-05, "loss": 1.3405, "step": 11180 }, { "epoch": 3.35, "grad_norm": 1.7230271100997925, "learning_rate": 4.6626506544905194e-05, "loss": 1.3063, "step": 11185 }, { "epoch": 3.35, "grad_norm": 0.979017972946167, "learning_rate": 4.66235585427643e-05, "loss": 1.2698, "step": 11190 }, { "epoch": 3.35, "grad_norm": 1.1741644144058228, "learning_rate": 4.662060934637674e-05, "loss": 1.1668, "step": 11195 }, { "epoch": 3.35, "grad_norm": 1.050943374633789, "learning_rate": 4.661765895590541e-05, "loss": 1.3493, "step": 11200 }, { "epoch": 3.35, "grad_norm": 3.3978419303894043, "learning_rate": 4.661470737151323e-05, "loss": 1.2133, "step": 11205 }, { "epoch": 3.35, "grad_norm": 1.509317398071289, "learning_rate": 4.661175459336324e-05, "loss": 1.3368, "step": 11210 }, { "epoch": 3.36, "grad_norm": 2.1096768379211426, "learning_rate": 4.6608800621618506e-05, "loss": 1.3254, "step": 11215 }, { "epoch": 3.36, "grad_norm": 1.4031864404678345, "learning_rate": 4.660584545644218e-05, "loss": 1.4675, "step": 11220 }, { "epoch": 3.36, "grad_norm": 1.8638592958450317, "learning_rate": 4.660288909799746e-05, "loss": 1.4674, "step": 11225 }, { "epoch": 3.36, "grad_norm": 1.681207299232483, "learning_rate": 4.659993154644763e-05, "loss": 1.1908, "step": 11230 }, { "epoch": 3.36, "grad_norm": 1.484300136566162, "learning_rate": 4.659697280195604e-05, "loss": 1.1835, "step": 11235 }, { "epoch": 3.36, "grad_norm": 1.0395090579986572, "learning_rate": 4.6594012864686084e-05, "loss": 1.2898, "step": 11240 }, { "epoch": 3.36, "grad_norm": 2.5014450550079346, "learning_rate": 4.6591051734801246e-05, "loss": 1.2174, "step": 11245 }, { "epoch": 3.37, "grad_norm": 2.0165107250213623, "learning_rate": 4.658808941246506e-05, "loss": 1.2818, "step": 11250 }, { "epoch": 3.37, "grad_norm": 1.2676451206207275, "learning_rate": 4.658512589784114e-05, "loss": 1.3866, "step": 11255 }, { "epoch": 3.37, "grad_norm": 1.7123560905456543, "learning_rate": 4.658216119109315e-05, "loss": 1.4062, "step": 11260 }, { "epoch": 3.37, "grad_norm": 2.4512739181518555, "learning_rate": 4.6579195292384825e-05, "loss": 1.2707, "step": 11265 }, { "epoch": 3.37, "grad_norm": 1.6923884153366089, "learning_rate": 4.657622820187998e-05, "loss": 1.3526, "step": 11270 }, { "epoch": 3.37, "grad_norm": 2.5449416637420654, "learning_rate": 4.6573259919742484e-05, "loss": 1.1957, "step": 11275 }, { "epoch": 3.37, "grad_norm": 1.0603396892547607, "learning_rate": 4.657029044613626e-05, "loss": 1.4085, "step": 11280 }, { "epoch": 3.38, "grad_norm": 2.8096063137054443, "learning_rate": 4.6567319781225313e-05, "loss": 1.4052, "step": 11285 }, { "epoch": 3.38, "grad_norm": 2.0202414989471436, "learning_rate": 4.656434792517372e-05, "loss": 1.3356, "step": 11290 }, { "epoch": 3.38, "grad_norm": 1.3925175666809082, "learning_rate": 4.6561374878145606e-05, "loss": 1.2098, "step": 11295 }, { "epoch": 3.38, "grad_norm": 1.2433990240097046, "learning_rate": 4.655840064030517e-05, "loss": 1.3263, "step": 11300 }, { "epoch": 3.38, "grad_norm": 1.0137583017349243, "learning_rate": 4.6555425211816675e-05, "loss": 1.2833, "step": 11305 }, { "epoch": 3.38, "grad_norm": 2.3041789531707764, "learning_rate": 4.655244859284444e-05, "loss": 1.4461, "step": 11310 }, { "epoch": 3.39, "grad_norm": 1.4320485591888428, "learning_rate": 4.6549470783552886e-05, "loss": 1.3514, "step": 11315 }, { "epoch": 3.39, "grad_norm": 1.6517833471298218, "learning_rate": 4.654649178410645e-05, "loss": 1.3607, "step": 11320 }, { "epoch": 3.39, "grad_norm": 2.3063910007476807, "learning_rate": 4.6543511594669675e-05, "loss": 1.2331, "step": 11325 }, { "epoch": 3.39, "grad_norm": 1.5468136072158813, "learning_rate": 4.654053021540714e-05, "loss": 1.207, "step": 11330 }, { "epoch": 3.39, "grad_norm": 0.7770300507545471, "learning_rate": 4.653754764648352e-05, "loss": 1.2422, "step": 11335 }, { "epoch": 3.39, "grad_norm": 1.037409782409668, "learning_rate": 4.6534563888063534e-05, "loss": 1.1782, "step": 11340 }, { "epoch": 3.39, "grad_norm": 1.4780035018920898, "learning_rate": 4.653157894031196e-05, "loss": 1.281, "step": 11345 }, { "epoch": 3.4, "grad_norm": 1.644164800643921, "learning_rate": 4.652859280339366e-05, "loss": 1.2121, "step": 11350 }, { "epoch": 3.4, "grad_norm": 1.3956807851791382, "learning_rate": 4.6525605477473564e-05, "loss": 1.2194, "step": 11355 }, { "epoch": 3.4, "grad_norm": 1.0469894409179688, "learning_rate": 4.6522616962716646e-05, "loss": 1.3257, "step": 11360 }, { "epoch": 3.4, "grad_norm": 1.9847218990325928, "learning_rate": 4.651962725928797e-05, "loss": 1.4013, "step": 11365 }, { "epoch": 3.4, "grad_norm": 1.7343077659606934, "learning_rate": 4.6516636367352646e-05, "loss": 1.3517, "step": 11370 }, { "epoch": 3.4, "grad_norm": 2.5026791095733643, "learning_rate": 4.6513644287075866e-05, "loss": 1.4136, "step": 11375 }, { "epoch": 3.4, "grad_norm": 2.037338972091675, "learning_rate": 4.651065101862286e-05, "loss": 1.3706, "step": 11380 }, { "epoch": 3.41, "grad_norm": 2.3901917934417725, "learning_rate": 4.650765656215898e-05, "loss": 1.2601, "step": 11385 }, { "epoch": 3.41, "grad_norm": 1.2326328754425049, "learning_rate": 4.650466091784956e-05, "loss": 1.4596, "step": 11390 }, { "epoch": 3.41, "grad_norm": 1.7991396188735962, "learning_rate": 4.650166408586009e-05, "loss": 1.2718, "step": 11395 }, { "epoch": 3.41, "grad_norm": 1.9020781517028809, "learning_rate": 4.649866606635605e-05, "loss": 1.2791, "step": 11400 }, { "epoch": 3.41, "grad_norm": 1.2564764022827148, "learning_rate": 4.649566685950304e-05, "loss": 1.2263, "step": 11405 }, { "epoch": 3.41, "grad_norm": 1.6249325275421143, "learning_rate": 4.649266646546668e-05, "loss": 1.3176, "step": 11410 }, { "epoch": 3.42, "grad_norm": 1.1263751983642578, "learning_rate": 4.64896648844127e-05, "loss": 1.3471, "step": 11415 }, { "epoch": 3.42, "grad_norm": 1.0438363552093506, "learning_rate": 4.648666211650686e-05, "loss": 1.3099, "step": 11420 }, { "epoch": 3.42, "grad_norm": 1.0559061765670776, "learning_rate": 4.648365816191501e-05, "loss": 1.2612, "step": 11425 }, { "epoch": 3.42, "grad_norm": 0.788719654083252, "learning_rate": 4.648065302080305e-05, "loss": 1.2446, "step": 11430 }, { "epoch": 3.42, "grad_norm": 2.290250539779663, "learning_rate": 4.647764669333695e-05, "loss": 1.2215, "step": 11435 }, { "epoch": 3.42, "grad_norm": 1.0487291812896729, "learning_rate": 4.647463917968275e-05, "loss": 1.1949, "step": 11440 }, { "epoch": 3.42, "grad_norm": 1.4382762908935547, "learning_rate": 4.647163048000655e-05, "loss": 1.2458, "step": 11445 }, { "epoch": 3.43, "grad_norm": 2.330712080001831, "learning_rate": 4.6468620594474515e-05, "loss": 1.2711, "step": 11450 }, { "epoch": 3.43, "grad_norm": 3.3765647411346436, "learning_rate": 4.6465609523252884e-05, "loss": 1.359, "step": 11455 }, { "epoch": 3.43, "grad_norm": 1.7210214138031006, "learning_rate": 4.646259726650794e-05, "loss": 1.2642, "step": 11460 }, { "epoch": 3.43, "grad_norm": 1.9674427509307861, "learning_rate": 4.645958382440607e-05, "loss": 1.2704, "step": 11465 }, { "epoch": 3.43, "grad_norm": 1.1404820680618286, "learning_rate": 4.645656919711369e-05, "loss": 1.3052, "step": 11470 }, { "epoch": 3.43, "grad_norm": 1.92625892162323, "learning_rate": 4.645355338479729e-05, "loss": 1.1419, "step": 11475 }, { "epoch": 3.43, "grad_norm": 2.301802396774292, "learning_rate": 4.645053638762344e-05, "loss": 1.2296, "step": 11480 }, { "epoch": 3.44, "grad_norm": 2.9896774291992188, "learning_rate": 4.6447518205758765e-05, "loss": 1.3242, "step": 11485 }, { "epoch": 3.44, "grad_norm": 1.1062313318252563, "learning_rate": 4.6444498839369956e-05, "loss": 1.3635, "step": 11490 }, { "epoch": 3.44, "grad_norm": 1.2494499683380127, "learning_rate": 4.644147828862375e-05, "loss": 1.3554, "step": 11495 }, { "epoch": 3.44, "grad_norm": 4.519656658172607, "learning_rate": 4.6438456553687005e-05, "loss": 1.3204, "step": 11500 }, { "epoch": 3.44, "grad_norm": 1.678928256034851, "learning_rate": 4.6435433634726575e-05, "loss": 1.2169, "step": 11505 }, { "epoch": 3.44, "grad_norm": 1.1864182949066162, "learning_rate": 4.6432409531909434e-05, "loss": 1.2554, "step": 11510 }, { "epoch": 3.45, "grad_norm": 1.6995583772659302, "learning_rate": 4.6429384245402585e-05, "loss": 1.3414, "step": 11515 }, { "epoch": 3.45, "grad_norm": 1.1473424434661865, "learning_rate": 4.642635777537312e-05, "loss": 1.1982, "step": 11520 }, { "epoch": 3.45, "grad_norm": 1.1320407390594482, "learning_rate": 4.6423330121988196e-05, "loss": 1.3277, "step": 11525 }, { "epoch": 3.45, "grad_norm": 0.7268291115760803, "learning_rate": 4.6420301285415005e-05, "loss": 1.4298, "step": 11530 }, { "epoch": 3.45, "grad_norm": 1.2748498916625977, "learning_rate": 4.641727126582085e-05, "loss": 1.3528, "step": 11535 }, { "epoch": 3.45, "grad_norm": 1.8894537687301636, "learning_rate": 4.641424006337306e-05, "loss": 1.3222, "step": 11540 }, { "epoch": 3.45, "grad_norm": 0.8295863270759583, "learning_rate": 4.641120767823905e-05, "loss": 1.2138, "step": 11545 }, { "epoch": 3.46, "grad_norm": 2.017294406890869, "learning_rate": 4.640817411058629e-05, "loss": 1.3272, "step": 11550 }, { "epoch": 3.46, "grad_norm": 2.1451973915100098, "learning_rate": 4.640513936058233e-05, "loss": 1.2706, "step": 11555 }, { "epoch": 3.46, "grad_norm": 1.6634198427200317, "learning_rate": 4.640210342839478e-05, "loss": 1.2066, "step": 11560 }, { "epoch": 3.46, "grad_norm": 2.191540002822876, "learning_rate": 4.6399066314191294e-05, "loss": 1.2551, "step": 11565 }, { "epoch": 3.46, "grad_norm": 1.4875872135162354, "learning_rate": 4.639602801813963e-05, "loss": 1.4758, "step": 11570 }, { "epoch": 3.46, "grad_norm": 1.266520619392395, "learning_rate": 4.6392988540407564e-05, "loss": 1.2998, "step": 11575 }, { "epoch": 3.46, "grad_norm": 1.3954042196273804, "learning_rate": 4.6389947881162984e-05, "loss": 1.2636, "step": 11580 }, { "epoch": 3.47, "grad_norm": 1.5587852001190186, "learning_rate": 4.6386906040573825e-05, "loss": 1.2853, "step": 11585 }, { "epoch": 3.47, "grad_norm": 2.421710729598999, "learning_rate": 4.638386301880807e-05, "loss": 1.3729, "step": 11590 }, { "epoch": 3.47, "grad_norm": 1.5814123153686523, "learning_rate": 4.638081881603378e-05, "loss": 1.4307, "step": 11595 }, { "epoch": 3.47, "grad_norm": 0.9030851125717163, "learning_rate": 4.63777734324191e-05, "loss": 1.3591, "step": 11600 }, { "epoch": 3.47, "grad_norm": 2.414091110229492, "learning_rate": 4.637472686813221e-05, "loss": 1.2515, "step": 11605 }, { "epoch": 3.47, "grad_norm": 1.2490521669387817, "learning_rate": 4.637167912334138e-05, "loss": 1.3923, "step": 11610 }, { "epoch": 3.48, "grad_norm": 1.478395700454712, "learning_rate": 4.6368630198214916e-05, "loss": 1.3068, "step": 11615 }, { "epoch": 3.48, "grad_norm": 1.7249629497528076, "learning_rate": 4.636558009292122e-05, "loss": 1.4022, "step": 11620 }, { "epoch": 3.48, "grad_norm": 1.7434816360473633, "learning_rate": 4.636252880762875e-05, "loss": 1.2509, "step": 11625 }, { "epoch": 3.48, "grad_norm": 1.605729341506958, "learning_rate": 4.6359476342506015e-05, "loss": 1.2654, "step": 11630 }, { "epoch": 3.48, "grad_norm": 1.8081423044204712, "learning_rate": 4.63564226977216e-05, "loss": 1.2407, "step": 11635 }, { "epoch": 3.48, "grad_norm": 1.519515037536621, "learning_rate": 4.6353367873444165e-05, "loss": 1.427, "step": 11640 }, { "epoch": 3.48, "grad_norm": 1.4996379613876343, "learning_rate": 4.635031186984241e-05, "loss": 1.2309, "step": 11645 }, { "epoch": 3.49, "grad_norm": 1.2170788049697876, "learning_rate": 4.634725468708513e-05, "loss": 1.2613, "step": 11650 }, { "epoch": 3.49, "grad_norm": 1.4180010557174683, "learning_rate": 4.634419632534116e-05, "loss": 1.3331, "step": 11655 }, { "epoch": 3.49, "grad_norm": 0.9083313345909119, "learning_rate": 4.6341136784779415e-05, "loss": 1.3276, "step": 11660 }, { "epoch": 3.49, "grad_norm": 2.1086177825927734, "learning_rate": 4.633807606556887e-05, "loss": 1.2769, "step": 11665 }, { "epoch": 3.49, "grad_norm": 1.2872865200042725, "learning_rate": 4.6335014167878557e-05, "loss": 1.1635, "step": 11670 }, { "epoch": 3.49, "grad_norm": 0.8815200924873352, "learning_rate": 4.633195109187759e-05, "loss": 1.2593, "step": 11675 }, { "epoch": 3.49, "grad_norm": 2.032335042953491, "learning_rate": 4.632888683773514e-05, "loss": 1.3319, "step": 11680 }, { "epoch": 3.5, "grad_norm": 2.3916192054748535, "learning_rate": 4.632582140562044e-05, "loss": 1.2105, "step": 11685 }, { "epoch": 3.5, "grad_norm": 1.9669456481933594, "learning_rate": 4.6322754795702795e-05, "loss": 1.2198, "step": 11690 }, { "epoch": 3.5, "grad_norm": 1.4312375783920288, "learning_rate": 4.6319687008151555e-05, "loss": 1.2218, "step": 11695 }, { "epoch": 3.5, "grad_norm": 1.3405520915985107, "learning_rate": 4.6316618043136175e-05, "loss": 1.1403, "step": 11700 }, { "epoch": 3.5, "grad_norm": 1.2970833778381348, "learning_rate": 4.6313547900826124e-05, "loss": 1.4566, "step": 11705 }, { "epoch": 3.5, "grad_norm": 1.1170778274536133, "learning_rate": 4.6310476581390985e-05, "loss": 1.3428, "step": 11710 }, { "epoch": 3.5, "grad_norm": 2.171121835708618, "learning_rate": 4.6307404085000374e-05, "loss": 1.2992, "step": 11715 }, { "epoch": 3.51, "grad_norm": 1.2334961891174316, "learning_rate": 4.630433041182398e-05, "loss": 1.1957, "step": 11720 }, { "epoch": 3.51, "grad_norm": 2.142889976501465, "learning_rate": 4.630125556203156e-05, "loss": 1.3488, "step": 11725 }, { "epoch": 3.51, "grad_norm": 1.060781478881836, "learning_rate": 4.629817953579295e-05, "loss": 1.0665, "step": 11730 }, { "epoch": 3.51, "grad_norm": 1.4236716032028198, "learning_rate": 4.629510233327802e-05, "loss": 1.2998, "step": 11735 }, { "epoch": 3.51, "grad_norm": 1.210963249206543, "learning_rate": 4.6292023954656716e-05, "loss": 1.3734, "step": 11740 }, { "epoch": 3.51, "grad_norm": 1.2984216213226318, "learning_rate": 4.628894440009906e-05, "loss": 1.3029, "step": 11745 }, { "epoch": 3.52, "grad_norm": 1.2461925745010376, "learning_rate": 4.628586366977513e-05, "loss": 1.2483, "step": 11750 }, { "epoch": 3.52, "grad_norm": 1.2298219203948975, "learning_rate": 4.628278176385509e-05, "loss": 1.1885, "step": 11755 }, { "epoch": 3.52, "grad_norm": 2.885982036590576, "learning_rate": 4.627969868250912e-05, "loss": 1.2902, "step": 11760 }, { "epoch": 3.52, "grad_norm": 1.220379114151001, "learning_rate": 4.6276614425907514e-05, "loss": 1.2562, "step": 11765 }, { "epoch": 3.52, "grad_norm": 1.2900983095169067, "learning_rate": 4.6273528994220616e-05, "loss": 1.2722, "step": 11770 }, { "epoch": 3.52, "grad_norm": 1.6189454793930054, "learning_rate": 4.627044238761882e-05, "loss": 1.2755, "step": 11775 }, { "epoch": 3.52, "grad_norm": 2.1183040142059326, "learning_rate": 4.62673546062726e-05, "loss": 1.3765, "step": 11780 }, { "epoch": 3.53, "grad_norm": 1.5245985984802246, "learning_rate": 4.6264265650352494e-05, "loss": 1.3768, "step": 11785 }, { "epoch": 3.53, "grad_norm": 1.0661191940307617, "learning_rate": 4.62611755200291e-05, "loss": 1.2853, "step": 11790 }, { "epoch": 3.53, "grad_norm": 2.844651699066162, "learning_rate": 4.625808421547307e-05, "loss": 1.3109, "step": 11795 }, { "epoch": 3.53, "grad_norm": 1.5837798118591309, "learning_rate": 4.6254991736855156e-05, "loss": 1.192, "step": 11800 }, { "epoch": 3.53, "grad_norm": 1.2192292213439941, "learning_rate": 4.625189808434614e-05, "loss": 1.207, "step": 11805 }, { "epoch": 3.53, "grad_norm": 2.5890302658081055, "learning_rate": 4.624880325811689e-05, "loss": 1.3307, "step": 11810 }, { "epoch": 3.53, "grad_norm": 1.7649799585342407, "learning_rate": 4.624570725833831e-05, "loss": 1.3292, "step": 11815 }, { "epoch": 3.54, "grad_norm": 2.073655605316162, "learning_rate": 4.624261008518141e-05, "loss": 1.2353, "step": 11820 }, { "epoch": 3.54, "grad_norm": 1.5090842247009277, "learning_rate": 4.623951173881723e-05, "loss": 1.2454, "step": 11825 }, { "epoch": 3.54, "grad_norm": 1.7265774011611938, "learning_rate": 4.62364122194169e-05, "loss": 1.2835, "step": 11830 }, { "epoch": 3.54, "grad_norm": 2.3008291721343994, "learning_rate": 4.62333115271516e-05, "loss": 1.3273, "step": 11835 }, { "epoch": 3.54, "grad_norm": 1.107966661453247, "learning_rate": 4.6230209662192565e-05, "loss": 1.2891, "step": 11840 }, { "epoch": 3.54, "grad_norm": 1.2190146446228027, "learning_rate": 4.622710662471112e-05, "loss": 1.4084, "step": 11845 }, { "epoch": 3.55, "grad_norm": 1.5428764820098877, "learning_rate": 4.6224002414878644e-05, "loss": 1.1754, "step": 11850 }, { "epoch": 3.55, "grad_norm": 1.452290654182434, "learning_rate": 4.6220897032866574e-05, "loss": 1.2888, "step": 11855 }, { "epoch": 3.55, "grad_norm": 1.2167850732803345, "learning_rate": 4.621779047884642e-05, "loss": 1.3761, "step": 11860 }, { "epoch": 3.55, "grad_norm": 0.8653399348258972, "learning_rate": 4.6214682752989746e-05, "loss": 1.3106, "step": 11865 }, { "epoch": 3.55, "grad_norm": 1.1357594728469849, "learning_rate": 4.6211573855468205e-05, "loss": 1.3966, "step": 11870 }, { "epoch": 3.55, "grad_norm": 2.4457480907440186, "learning_rate": 4.6208463786453485e-05, "loss": 1.3881, "step": 11875 }, { "epoch": 3.55, "grad_norm": 2.0536937713623047, "learning_rate": 4.6205352546117356e-05, "loss": 1.3554, "step": 11880 }, { "epoch": 3.56, "grad_norm": 1.843001127243042, "learning_rate": 4.6202240134631644e-05, "loss": 1.3486, "step": 11885 }, { "epoch": 3.56, "grad_norm": 1.340734839439392, "learning_rate": 4.619912655216825e-05, "loss": 1.2276, "step": 11890 }, { "epoch": 3.56, "grad_norm": 0.9751700162887573, "learning_rate": 4.619601179889913e-05, "loss": 1.1469, "step": 11895 }, { "epoch": 3.56, "grad_norm": 1.659359335899353, "learning_rate": 4.619289587499631e-05, "loss": 1.271, "step": 11900 }, { "epoch": 3.56, "grad_norm": 1.9473052024841309, "learning_rate": 4.618977878063188e-05, "loss": 1.2939, "step": 11905 }, { "epoch": 3.56, "grad_norm": 1.0142465829849243, "learning_rate": 4.6186660515978e-05, "loss": 1.2489, "step": 11910 }, { "epoch": 3.56, "grad_norm": 1.311125636100769, "learning_rate": 4.618354108120687e-05, "loss": 1.2565, "step": 11915 }, { "epoch": 3.57, "grad_norm": 1.6890449523925781, "learning_rate": 4.618042047649079e-05, "loss": 1.2294, "step": 11920 }, { "epoch": 3.57, "grad_norm": 2.259535789489746, "learning_rate": 4.6177298702002106e-05, "loss": 1.3299, "step": 11925 }, { "epoch": 3.57, "grad_norm": 1.944066047668457, "learning_rate": 4.6174800440290745e-05, "loss": 1.4415, "step": 11930 }, { "epoch": 3.57, "grad_norm": 1.1252658367156982, "learning_rate": 4.617167656064589e-05, "loss": 1.271, "step": 11935 }, { "epoch": 3.57, "grad_norm": 1.6652323007583618, "learning_rate": 4.616855151171134e-05, "loss": 1.2467, "step": 11940 }, { "epoch": 3.57, "grad_norm": 1.1521717309951782, "learning_rate": 4.61654252936597e-05, "loss": 1.3624, "step": 11945 }, { "epoch": 3.58, "grad_norm": 1.9085320234298706, "learning_rate": 4.616229790666362e-05, "loss": 1.2027, "step": 11950 }, { "epoch": 3.58, "grad_norm": 1.8030284643173218, "learning_rate": 4.6159169350895825e-05, "loss": 1.2746, "step": 11955 }, { "epoch": 3.58, "grad_norm": 1.5365478992462158, "learning_rate": 4.61560396265291e-05, "loss": 1.4625, "step": 11960 }, { "epoch": 3.58, "grad_norm": 2.0654516220092773, "learning_rate": 4.615290873373629e-05, "loss": 1.2609, "step": 11965 }, { "epoch": 3.58, "grad_norm": 2.4061896800994873, "learning_rate": 4.614977667269033e-05, "loss": 1.2647, "step": 11970 }, { "epoch": 3.58, "grad_norm": 3.210092544555664, "learning_rate": 4.614664344356417e-05, "loss": 1.2957, "step": 11975 }, { "epoch": 3.58, "grad_norm": 2.317610025405884, "learning_rate": 4.614350904653089e-05, "loss": 1.2691, "step": 11980 }, { "epoch": 3.59, "grad_norm": 1.450660228729248, "learning_rate": 4.614037348176358e-05, "loss": 1.2757, "step": 11985 }, { "epoch": 3.59, "grad_norm": 1.0978591442108154, "learning_rate": 4.6137236749435413e-05, "loss": 1.2778, "step": 11990 }, { "epoch": 3.59, "grad_norm": 1.411629319190979, "learning_rate": 4.613409884971963e-05, "loss": 1.5244, "step": 11995 }, { "epoch": 3.59, "grad_norm": 4.375972270965576, "learning_rate": 4.613095978278954e-05, "loss": 1.3383, "step": 12000 }, { "epoch": 3.59, "grad_norm": 1.4129486083984375, "learning_rate": 4.6127819548818506e-05, "loss": 1.4209, "step": 12005 }, { "epoch": 3.59, "grad_norm": 1.0286214351654053, "learning_rate": 4.612467814797996e-05, "loss": 1.3274, "step": 12010 }, { "epoch": 3.59, "grad_norm": 1.4175000190734863, "learning_rate": 4.61215355804474e-05, "loss": 1.261, "step": 12015 }, { "epoch": 3.6, "grad_norm": 1.794857144355774, "learning_rate": 4.611839184639437e-05, "loss": 1.2601, "step": 12020 }, { "epoch": 3.6, "grad_norm": 2.3006887435913086, "learning_rate": 4.611524694599452e-05, "loss": 1.1114, "step": 12025 }, { "epoch": 3.6, "grad_norm": 2.3993430137634277, "learning_rate": 4.6112100879421524e-05, "loss": 1.3558, "step": 12030 }, { "epoch": 3.6, "grad_norm": 1.9399762153625488, "learning_rate": 4.610895364684915e-05, "loss": 1.3497, "step": 12035 }, { "epoch": 3.6, "grad_norm": 1.2342357635498047, "learning_rate": 4.61058052484512e-05, "loss": 1.3263, "step": 12040 }, { "epoch": 3.6, "grad_norm": 2.044203042984009, "learning_rate": 4.610265568440157e-05, "loss": 1.3344, "step": 12045 }, { "epoch": 3.61, "grad_norm": 0.6386117935180664, "learning_rate": 4.609950495487419e-05, "loss": 1.143, "step": 12050 }, { "epoch": 3.61, "grad_norm": 1.735022783279419, "learning_rate": 4.60963530600431e-05, "loss": 1.4065, "step": 12055 }, { "epoch": 3.61, "grad_norm": 1.7873737812042236, "learning_rate": 4.6093200000082346e-05, "loss": 1.3379, "step": 12060 }, { "epoch": 3.61, "grad_norm": 1.9546738862991333, "learning_rate": 4.609004577516609e-05, "loss": 1.1978, "step": 12065 }, { "epoch": 3.61, "grad_norm": 1.1246274709701538, "learning_rate": 4.6086890385468526e-05, "loss": 1.2626, "step": 12070 }, { "epoch": 3.61, "grad_norm": 1.5824791193008423, "learning_rate": 4.6083733831163925e-05, "loss": 1.3883, "step": 12075 }, { "epoch": 3.61, "grad_norm": 1.4226622581481934, "learning_rate": 4.608057611242662e-05, "loss": 1.2575, "step": 12080 }, { "epoch": 3.62, "grad_norm": 1.6612082719802856, "learning_rate": 4.6077417229431e-05, "loss": 1.107, "step": 12085 }, { "epoch": 3.62, "grad_norm": 1.722304105758667, "learning_rate": 4.6074257182351546e-05, "loss": 1.1742, "step": 12090 }, { "epoch": 3.62, "grad_norm": 2.5315911769866943, "learning_rate": 4.607109597136277e-05, "loss": 1.2671, "step": 12095 }, { "epoch": 3.62, "grad_norm": 1.4968048334121704, "learning_rate": 4.606793359663926e-05, "loss": 1.2445, "step": 12100 }, { "epoch": 3.62, "grad_norm": 0.9814133644104004, "learning_rate": 4.6064770058355675e-05, "loss": 1.2989, "step": 12105 }, { "epoch": 3.62, "grad_norm": 1.6419323682785034, "learning_rate": 4.6061605356686746e-05, "loss": 1.4119, "step": 12110 }, { "epoch": 3.62, "grad_norm": 1.662709355354309, "learning_rate": 4.605843949180724e-05, "loss": 1.4737, "step": 12115 }, { "epoch": 3.63, "grad_norm": 1.9931641817092896, "learning_rate": 4.605527246389201e-05, "loss": 1.3292, "step": 12120 }, { "epoch": 3.63, "grad_norm": 2.7940378189086914, "learning_rate": 4.605210427311596e-05, "loss": 1.3846, "step": 12125 }, { "epoch": 3.63, "grad_norm": 1.8893239498138428, "learning_rate": 4.604893491965409e-05, "loss": 1.2893, "step": 12130 }, { "epoch": 3.63, "grad_norm": 2.463541030883789, "learning_rate": 4.6045764403681415e-05, "loss": 1.2914, "step": 12135 }, { "epoch": 3.63, "grad_norm": 2.0132181644439697, "learning_rate": 4.604259272537304e-05, "loss": 1.4236, "step": 12140 }, { "epoch": 3.63, "grad_norm": 1.1866495609283447, "learning_rate": 4.603941988490415e-05, "loss": 1.3439, "step": 12145 }, { "epoch": 3.64, "grad_norm": 1.9764355421066284, "learning_rate": 4.603624588244997e-05, "loss": 1.3349, "step": 12150 }, { "epoch": 3.64, "grad_norm": 1.9496948719024658, "learning_rate": 4.603307071818579e-05, "loss": 1.2836, "step": 12155 }, { "epoch": 3.64, "grad_norm": 3.0015974044799805, "learning_rate": 4.602989439228698e-05, "loss": 1.367, "step": 12160 }, { "epoch": 3.64, "grad_norm": 1.3181309700012207, "learning_rate": 4.6026716904928965e-05, "loss": 1.4144, "step": 12165 }, { "epoch": 3.64, "grad_norm": 1.3011974096298218, "learning_rate": 4.602353825628722e-05, "loss": 1.2887, "step": 12170 }, { "epoch": 3.64, "grad_norm": 2.6620357036590576, "learning_rate": 4.602035844653733e-05, "loss": 1.3088, "step": 12175 }, { "epoch": 3.64, "grad_norm": 1.3964091539382935, "learning_rate": 4.601717747585488e-05, "loss": 1.2074, "step": 12180 }, { "epoch": 3.65, "grad_norm": 2.161696434020996, "learning_rate": 4.601399534441556e-05, "loss": 1.3595, "step": 12185 }, { "epoch": 3.65, "grad_norm": 1.0250306129455566, "learning_rate": 4.601081205239512e-05, "loss": 1.3982, "step": 12190 }, { "epoch": 3.65, "grad_norm": 1.1172339916229248, "learning_rate": 4.6007627599969385e-05, "loss": 1.1501, "step": 12195 }, { "epoch": 3.65, "grad_norm": 1.6155879497528076, "learning_rate": 4.60044419873142e-05, "loss": 1.3539, "step": 12200 }, { "epoch": 3.65, "grad_norm": 1.6924985647201538, "learning_rate": 4.600125521460552e-05, "loss": 1.2307, "step": 12205 }, { "epoch": 3.65, "grad_norm": 1.349064826965332, "learning_rate": 4.599806728201935e-05, "loss": 1.1824, "step": 12210 }, { "epoch": 3.65, "grad_norm": 2.545496940612793, "learning_rate": 4.599487818973174e-05, "loss": 1.3419, "step": 12215 }, { "epoch": 3.66, "grad_norm": 2.050647258758545, "learning_rate": 4.599168793791884e-05, "loss": 1.21, "step": 12220 }, { "epoch": 3.66, "grad_norm": 2.1457366943359375, "learning_rate": 4.598849652675683e-05, "loss": 1.3711, "step": 12225 }, { "epoch": 3.66, "grad_norm": 2.319671630859375, "learning_rate": 4.598530395642197e-05, "loss": 1.2425, "step": 12230 }, { "epoch": 3.66, "grad_norm": 1.073062777519226, "learning_rate": 4.598211022709059e-05, "loss": 1.3835, "step": 12235 }, { "epoch": 3.66, "grad_norm": 1.0914404392242432, "learning_rate": 4.597891533893908e-05, "loss": 1.5263, "step": 12240 }, { "epoch": 3.66, "grad_norm": 0.8850699663162231, "learning_rate": 4.5975719292143865e-05, "loss": 1.299, "step": 12245 }, { "epoch": 3.67, "grad_norm": 1.4798288345336914, "learning_rate": 4.5972522086881485e-05, "loss": 1.3877, "step": 12250 }, { "epoch": 3.67, "grad_norm": 0.8555247783660889, "learning_rate": 4.5969323723328505e-05, "loss": 1.4655, "step": 12255 }, { "epoch": 3.67, "grad_norm": 1.9914946556091309, "learning_rate": 4.596612420166158e-05, "loss": 1.2558, "step": 12260 }, { "epoch": 3.67, "grad_norm": 1.4527971744537354, "learning_rate": 4.596292352205741e-05, "loss": 1.2612, "step": 12265 }, { "epoch": 3.67, "grad_norm": 1.6714884042739868, "learning_rate": 4.595972168469276e-05, "loss": 1.2867, "step": 12270 }, { "epoch": 3.67, "grad_norm": 1.638249397277832, "learning_rate": 4.595651868974447e-05, "loss": 1.3167, "step": 12275 }, { "epoch": 3.67, "grad_norm": 1.3472903966903687, "learning_rate": 4.5953314537389426e-05, "loss": 1.2534, "step": 12280 }, { "epoch": 3.68, "grad_norm": 1.781898856163025, "learning_rate": 4.59501092278046e-05, "loss": 1.2795, "step": 12285 }, { "epoch": 3.68, "grad_norm": 2.2303428649902344, "learning_rate": 4.594690276116703e-05, "loss": 1.3091, "step": 12290 }, { "epoch": 3.68, "grad_norm": 1.383138656616211, "learning_rate": 4.594369513765379e-05, "loss": 1.2663, "step": 12295 }, { "epoch": 3.68, "grad_norm": 1.707878828048706, "learning_rate": 4.594048635744203e-05, "loss": 1.2528, "step": 12300 }, { "epoch": 3.68, "grad_norm": 0.8633110523223877, "learning_rate": 4.5937276420708985e-05, "loss": 1.3231, "step": 12305 }, { "epoch": 3.68, "grad_norm": 1.4325439929962158, "learning_rate": 4.593406532763192e-05, "loss": 1.21, "step": 12310 }, { "epoch": 3.68, "grad_norm": 1.455572247505188, "learning_rate": 4.5930853078388185e-05, "loss": 1.2322, "step": 12315 }, { "epoch": 3.69, "grad_norm": 1.189207911491394, "learning_rate": 4.59276396731552e-05, "loss": 1.3804, "step": 12320 }, { "epoch": 3.69, "grad_norm": 1.2645961046218872, "learning_rate": 4.592442511211042e-05, "loss": 1.3339, "step": 12325 }, { "epoch": 3.69, "grad_norm": 1.2472920417785645, "learning_rate": 4.59212093954314e-05, "loss": 1.2354, "step": 12330 }, { "epoch": 3.69, "grad_norm": 1.2860960960388184, "learning_rate": 4.5917992523295716e-05, "loss": 1.2905, "step": 12335 }, { "epoch": 3.69, "grad_norm": 1.902571439743042, "learning_rate": 4.591477449588106e-05, "loss": 1.2967, "step": 12340 }, { "epoch": 3.69, "grad_norm": 2.72182559967041, "learning_rate": 4.591155531336514e-05, "loss": 1.3203, "step": 12345 }, { "epoch": 3.69, "grad_norm": 4.221126079559326, "learning_rate": 4.590833497592576e-05, "loss": 1.3342, "step": 12350 }, { "epoch": 3.7, "grad_norm": 2.888122320175171, "learning_rate": 4.590511348374078e-05, "loss": 1.2249, "step": 12355 }, { "epoch": 3.7, "grad_norm": 1.8218414783477783, "learning_rate": 4.5901890836988107e-05, "loss": 1.3318, "step": 12360 }, { "epoch": 3.7, "grad_norm": 1.3401216268539429, "learning_rate": 4.589866703584573e-05, "loss": 1.355, "step": 12365 }, { "epoch": 3.7, "grad_norm": 1.530899167060852, "learning_rate": 4.5895442080491694e-05, "loss": 1.1148, "step": 12370 }, { "epoch": 3.7, "grad_norm": 1.1697745323181152, "learning_rate": 4.589221597110411e-05, "loss": 1.3208, "step": 12375 }, { "epoch": 3.7, "grad_norm": 1.9336832761764526, "learning_rate": 4.588898870786116e-05, "loss": 1.3834, "step": 12380 }, { "epoch": 3.71, "grad_norm": 1.2124789953231812, "learning_rate": 4.588576029094106e-05, "loss": 1.2128, "step": 12385 }, { "epoch": 3.71, "grad_norm": 1.4189821481704712, "learning_rate": 4.588253072052214e-05, "loss": 1.3005, "step": 12390 }, { "epoch": 3.71, "grad_norm": 1.8805235624313354, "learning_rate": 4.5879299996782765e-05, "loss": 1.3208, "step": 12395 }, { "epoch": 3.71, "grad_norm": 2.4041588306427, "learning_rate": 4.587606811990134e-05, "loss": 1.3952, "step": 12400 }, { "epoch": 3.71, "grad_norm": 1.0374820232391357, "learning_rate": 4.5872835090056375e-05, "loss": 1.3175, "step": 12405 }, { "epoch": 3.71, "grad_norm": 2.124577760696411, "learning_rate": 4.586960090742643e-05, "loss": 1.2484, "step": 12410 }, { "epoch": 3.71, "grad_norm": 4.781686305999756, "learning_rate": 4.586636557219011e-05, "loss": 1.2985, "step": 12415 }, { "epoch": 3.72, "grad_norm": 1.7843397855758667, "learning_rate": 4.586312908452612e-05, "loss": 1.2425, "step": 12420 }, { "epoch": 3.72, "grad_norm": 3.2184629440307617, "learning_rate": 4.585989144461319e-05, "loss": 1.3105, "step": 12425 }, { "epoch": 3.72, "grad_norm": 1.4963960647583008, "learning_rate": 4.585665265263014e-05, "loss": 1.2972, "step": 12430 }, { "epoch": 3.72, "grad_norm": 3.5858564376831055, "learning_rate": 4.585341270875584e-05, "loss": 1.3527, "step": 12435 }, { "epoch": 3.72, "grad_norm": 2.781733989715576, "learning_rate": 4.5850171613169235e-05, "loss": 1.3733, "step": 12440 }, { "epoch": 3.72, "grad_norm": 0.917702317237854, "learning_rate": 4.5846929366049316e-05, "loss": 1.2823, "step": 12445 }, { "epoch": 3.72, "grad_norm": 1.9473000764846802, "learning_rate": 4.584368596757517e-05, "loss": 1.2604, "step": 12450 }, { "epoch": 3.73, "grad_norm": 1.2440733909606934, "learning_rate": 4.584044141792591e-05, "loss": 1.3558, "step": 12455 }, { "epoch": 3.73, "grad_norm": 1.291435956954956, "learning_rate": 4.5837195717280736e-05, "loss": 1.2116, "step": 12460 }, { "epoch": 3.73, "grad_norm": 1.013763427734375, "learning_rate": 4.583394886581889e-05, "loss": 1.3011, "step": 12465 }, { "epoch": 3.73, "grad_norm": 1.0628958940505981, "learning_rate": 4.583070086371971e-05, "loss": 1.4285, "step": 12470 }, { "epoch": 3.73, "grad_norm": 1.822394847869873, "learning_rate": 4.5827451711162575e-05, "loss": 1.2755, "step": 12475 }, { "epoch": 3.73, "grad_norm": 1.7311921119689941, "learning_rate": 4.5824201408326934e-05, "loss": 1.4643, "step": 12480 }, { "epoch": 3.74, "grad_norm": 1.9452160596847534, "learning_rate": 4.582094995539229e-05, "loss": 1.2745, "step": 12485 }, { "epoch": 3.74, "grad_norm": 4.922383785247803, "learning_rate": 4.581769735253822e-05, "loss": 1.3349, "step": 12490 }, { "epoch": 3.74, "grad_norm": 2.8117499351501465, "learning_rate": 4.581444359994437e-05, "loss": 1.4012, "step": 12495 }, { "epoch": 3.74, "grad_norm": 1.1426706314086914, "learning_rate": 4.581118869779043e-05, "loss": 1.2974, "step": 12500 }, { "epoch": 3.74, "grad_norm": 1.857204556465149, "learning_rate": 4.580793264625618e-05, "loss": 1.2929, "step": 12505 }, { "epoch": 3.74, "grad_norm": 1.1504136323928833, "learning_rate": 4.580467544552143e-05, "loss": 1.3019, "step": 12510 }, { "epoch": 3.74, "grad_norm": 3.0354089736938477, "learning_rate": 4.580141709576608e-05, "loss": 1.3482, "step": 12515 }, { "epoch": 3.75, "grad_norm": 1.1074090003967285, "learning_rate": 4.579815759717009e-05, "loss": 1.4458, "step": 12520 }, { "epoch": 3.75, "grad_norm": 1.3502519130706787, "learning_rate": 4.579489694991347e-05, "loss": 1.1843, "step": 12525 }, { "epoch": 3.75, "grad_norm": 0.9303249716758728, "learning_rate": 4.579163515417631e-05, "loss": 1.3647, "step": 12530 }, { "epoch": 3.75, "grad_norm": 0.86869877576828, "learning_rate": 4.578837221013875e-05, "loss": 1.2498, "step": 12535 }, { "epoch": 3.75, "grad_norm": 1.8906731605529785, "learning_rate": 4.5785108117981e-05, "loss": 1.3277, "step": 12540 }, { "epoch": 3.75, "grad_norm": 1.9639962911605835, "learning_rate": 4.578184287788334e-05, "loss": 1.4021, "step": 12545 }, { "epoch": 3.75, "grad_norm": 2.0274109840393066, "learning_rate": 4.5778576490026094e-05, "loss": 1.2441, "step": 12550 }, { "epoch": 3.76, "grad_norm": 2.2819442749023438, "learning_rate": 4.577530895458967e-05, "loss": 1.4074, "step": 12555 }, { "epoch": 3.76, "grad_norm": 1.8426662683486938, "learning_rate": 4.577204027175453e-05, "loss": 1.2962, "step": 12560 }, { "epoch": 3.76, "grad_norm": 1.2728874683380127, "learning_rate": 4.576877044170118e-05, "loss": 1.2356, "step": 12565 }, { "epoch": 3.76, "grad_norm": 1.636061668395996, "learning_rate": 4.576549946461024e-05, "loss": 1.4087, "step": 12570 }, { "epoch": 3.76, "grad_norm": 1.0893962383270264, "learning_rate": 4.576222734066235e-05, "loss": 1.1445, "step": 12575 }, { "epoch": 3.76, "grad_norm": 1.898051381111145, "learning_rate": 4.575895407003822e-05, "loss": 1.3225, "step": 12580 }, { "epoch": 3.77, "grad_norm": 1.7080368995666504, "learning_rate": 4.575567965291864e-05, "loss": 1.3176, "step": 12585 }, { "epoch": 3.77, "grad_norm": 2.100494861602783, "learning_rate": 4.575240408948443e-05, "loss": 1.4143, "step": 12590 }, { "epoch": 3.77, "grad_norm": 1.711904764175415, "learning_rate": 4.5749127379916536e-05, "loss": 1.4185, "step": 12595 }, { "epoch": 3.77, "grad_norm": 3.1518118381500244, "learning_rate": 4.57458495243959e-05, "loss": 1.3182, "step": 12600 }, { "epoch": 3.77, "grad_norm": 2.016057252883911, "learning_rate": 4.574257052310355e-05, "loss": 1.3566, "step": 12605 }, { "epoch": 3.77, "grad_norm": 1.0968761444091797, "learning_rate": 4.573929037622059e-05, "loss": 1.2916, "step": 12610 }, { "epoch": 3.77, "grad_norm": 1.1773560047149658, "learning_rate": 4.573600908392819e-05, "loss": 1.2659, "step": 12615 }, { "epoch": 3.78, "grad_norm": 1.2752783298492432, "learning_rate": 4.573272664640755e-05, "loss": 1.1999, "step": 12620 }, { "epoch": 3.78, "grad_norm": 1.5268641710281372, "learning_rate": 4.5729443063839986e-05, "loss": 1.4445, "step": 12625 }, { "epoch": 3.78, "grad_norm": 1.8288689851760864, "learning_rate": 4.572615833640681e-05, "loss": 1.2565, "step": 12630 }, { "epoch": 3.78, "grad_norm": 2.030205726623535, "learning_rate": 4.572287246428946e-05, "loss": 1.2579, "step": 12635 }, { "epoch": 3.78, "grad_norm": 2.9597010612487793, "learning_rate": 4.57195854476694e-05, "loss": 1.1755, "step": 12640 }, { "epoch": 3.78, "grad_norm": 1.5973179340362549, "learning_rate": 4.571629728672818e-05, "loss": 1.1414, "step": 12645 }, { "epoch": 3.78, "grad_norm": 1.202883005142212, "learning_rate": 4.5713007981647394e-05, "loss": 1.3216, "step": 12650 }, { "epoch": 3.79, "grad_norm": 1.9232937097549438, "learning_rate": 4.57097175326087e-05, "loss": 1.306, "step": 12655 }, { "epoch": 3.79, "grad_norm": 1.9437748193740845, "learning_rate": 4.570642593979384e-05, "loss": 1.2824, "step": 12660 }, { "epoch": 3.79, "grad_norm": 1.9514751434326172, "learning_rate": 4.5703133203384594e-05, "loss": 1.404, "step": 12665 }, { "epoch": 3.79, "grad_norm": 1.337649941444397, "learning_rate": 4.5699839323562824e-05, "loss": 1.4712, "step": 12670 }, { "epoch": 3.79, "grad_norm": 0.9567248225212097, "learning_rate": 4.569654430051045e-05, "loss": 1.2779, "step": 12675 }, { "epoch": 3.79, "grad_norm": 1.1744178533554077, "learning_rate": 4.5693248134409434e-05, "loss": 1.3132, "step": 12680 }, { "epoch": 3.8, "grad_norm": 2.404231548309326, "learning_rate": 4.568995082544184e-05, "loss": 1.3972, "step": 12685 }, { "epoch": 3.8, "grad_norm": 1.4296953678131104, "learning_rate": 4.5686652373789764e-05, "loss": 1.3201, "step": 12690 }, { "epoch": 3.8, "grad_norm": 0.6986697912216187, "learning_rate": 4.568335277963538e-05, "loss": 1.2344, "step": 12695 }, { "epoch": 3.8, "grad_norm": 1.2852948904037476, "learning_rate": 4.568005204316093e-05, "loss": 1.3389, "step": 12700 }, { "epoch": 3.8, "grad_norm": 1.1272681951522827, "learning_rate": 4.567675016454869e-05, "loss": 1.3613, "step": 12705 }, { "epoch": 3.8, "grad_norm": 3.3429181575775146, "learning_rate": 4.567344714398104e-05, "loss": 1.3005, "step": 12710 }, { "epoch": 3.8, "grad_norm": 1.5739295482635498, "learning_rate": 4.5670142981640384e-05, "loss": 1.0753, "step": 12715 }, { "epoch": 3.81, "grad_norm": 1.3423209190368652, "learning_rate": 4.566683767770923e-05, "loss": 1.1526, "step": 12720 }, { "epoch": 3.81, "grad_norm": 1.0714255571365356, "learning_rate": 4.5663531232370105e-05, "loss": 1.4233, "step": 12725 }, { "epoch": 3.81, "grad_norm": 0.9389391541481018, "learning_rate": 4.5660223645805624e-05, "loss": 1.3871, "step": 12730 }, { "epoch": 3.81, "grad_norm": 1.9636523723602295, "learning_rate": 4.5656914918198465e-05, "loss": 1.2896, "step": 12735 }, { "epoch": 3.81, "grad_norm": 2.509476661682129, "learning_rate": 4.565360504973138e-05, "loss": 1.2616, "step": 12740 }, { "epoch": 3.81, "grad_norm": 0.8651650547981262, "learning_rate": 4.565029404058715e-05, "loss": 1.4291, "step": 12745 }, { "epoch": 3.81, "grad_norm": 1.635857105255127, "learning_rate": 4.564698189094864e-05, "loss": 1.2209, "step": 12750 }, { "epoch": 3.82, "grad_norm": 2.29955792427063, "learning_rate": 4.564366860099879e-05, "loss": 1.3041, "step": 12755 }, { "epoch": 3.82, "grad_norm": 1.347915768623352, "learning_rate": 4.5640354170920575e-05, "loss": 1.2319, "step": 12760 }, { "epoch": 3.82, "grad_norm": 3.2308144569396973, "learning_rate": 4.563703860089705e-05, "loss": 1.3922, "step": 12765 }, { "epoch": 3.82, "grad_norm": 1.212275505065918, "learning_rate": 4.5633721891111336e-05, "loss": 1.4356, "step": 12770 }, { "epoch": 3.82, "grad_norm": 1.5803014039993286, "learning_rate": 4.563040404174662e-05, "loss": 1.2631, "step": 12775 }, { "epoch": 3.82, "grad_norm": 2.851174831390381, "learning_rate": 4.562708505298612e-05, "loss": 1.3305, "step": 12780 }, { "epoch": 3.83, "grad_norm": 1.705056071281433, "learning_rate": 4.5623764925013155e-05, "loss": 1.1758, "step": 12785 }, { "epoch": 3.83, "grad_norm": 1.0315181016921997, "learning_rate": 4.56204436580111e-05, "loss": 1.1893, "step": 12790 }, { "epoch": 3.83, "grad_norm": 1.8870512247085571, "learning_rate": 4.561712125216337e-05, "loss": 1.4365, "step": 12795 }, { "epoch": 3.83, "grad_norm": 2.1169257164001465, "learning_rate": 4.561379770765346e-05, "loss": 1.3076, "step": 12800 }, { "epoch": 3.83, "grad_norm": 2.4839279651641846, "learning_rate": 4.5610473024664935e-05, "loss": 1.2516, "step": 12805 }, { "epoch": 3.83, "grad_norm": 1.1137679815292358, "learning_rate": 4.560714720338141e-05, "loss": 1.3052, "step": 12810 }, { "epoch": 3.83, "grad_norm": 1.6442543268203735, "learning_rate": 4.560382024398655e-05, "loss": 1.3419, "step": 12815 }, { "epoch": 3.84, "grad_norm": 1.8083311319351196, "learning_rate": 4.560049214666413e-05, "loss": 1.304, "step": 12820 }, { "epoch": 3.84, "grad_norm": 1.2691807746887207, "learning_rate": 4.559716291159793e-05, "loss": 1.2481, "step": 12825 }, { "epoch": 3.84, "grad_norm": 1.8940715789794922, "learning_rate": 4.5593832538971846e-05, "loss": 1.2207, "step": 12830 }, { "epoch": 3.84, "grad_norm": 1.9223408699035645, "learning_rate": 4.5590501028969787e-05, "loss": 1.313, "step": 12835 }, { "epoch": 3.84, "grad_norm": 1.92839777469635, "learning_rate": 4.5587168381775756e-05, "loss": 1.2413, "step": 12840 }, { "epoch": 3.84, "grad_norm": 1.7354521751403809, "learning_rate": 4.5583834597573825e-05, "loss": 1.2551, "step": 12845 }, { "epoch": 3.84, "grad_norm": 1.491490125656128, "learning_rate": 4.55804996765481e-05, "loss": 1.3245, "step": 12850 }, { "epoch": 3.85, "grad_norm": 0.8341639041900635, "learning_rate": 4.5577163618882766e-05, "loss": 1.3094, "step": 12855 }, { "epoch": 3.85, "grad_norm": 0.9003182053565979, "learning_rate": 4.557382642476208e-05, "loss": 1.3996, "step": 12860 }, { "epoch": 3.85, "grad_norm": 2.012786388397217, "learning_rate": 4.557048809437035e-05, "loss": 1.4447, "step": 12865 }, { "epoch": 3.85, "grad_norm": 1.0975584983825684, "learning_rate": 4.556714862789193e-05, "loss": 1.0958, "step": 12870 }, { "epoch": 3.85, "grad_norm": 1.1538339853286743, "learning_rate": 4.556380802551128e-05, "loss": 1.268, "step": 12875 }, { "epoch": 3.85, "grad_norm": 2.0543277263641357, "learning_rate": 4.556046628741288e-05, "loss": 1.4274, "step": 12880 }, { "epoch": 3.86, "grad_norm": 1.767110824584961, "learning_rate": 4.555712341378131e-05, "loss": 1.3229, "step": 12885 }, { "epoch": 3.86, "grad_norm": 1.8329064846038818, "learning_rate": 4.555377940480118e-05, "loss": 1.2361, "step": 12890 }, { "epoch": 3.86, "grad_norm": 0.9706895351409912, "learning_rate": 4.5550434260657174e-05, "loss": 1.0816, "step": 12895 }, { "epoch": 3.86, "grad_norm": 1.0541021823883057, "learning_rate": 4.554708798153404e-05, "loss": 1.1388, "step": 12900 }, { "epoch": 3.86, "grad_norm": 1.4227367639541626, "learning_rate": 4.55437405676166e-05, "loss": 1.3312, "step": 12905 }, { "epoch": 3.86, "grad_norm": 1.4533671140670776, "learning_rate": 4.554039201908972e-05, "loss": 1.2984, "step": 12910 }, { "epoch": 3.86, "grad_norm": 1.0998986959457397, "learning_rate": 4.553704233613833e-05, "loss": 1.2591, "step": 12915 }, { "epoch": 3.87, "grad_norm": 1.8056262731552124, "learning_rate": 4.553369151894746e-05, "loss": 1.113, "step": 12920 }, { "epoch": 3.87, "grad_norm": 0.9125102162361145, "learning_rate": 4.553033956770214e-05, "loss": 1.3231, "step": 12925 }, { "epoch": 3.87, "grad_norm": 1.3653115034103394, "learning_rate": 4.55269864825875e-05, "loss": 1.4139, "step": 12930 }, { "epoch": 3.87, "grad_norm": 1.144234299659729, "learning_rate": 4.552363226378874e-05, "loss": 1.2501, "step": 12935 }, { "epoch": 3.87, "grad_norm": 2.3261189460754395, "learning_rate": 4.5520276911491104e-05, "loss": 1.2052, "step": 12940 }, { "epoch": 3.87, "grad_norm": 1.1465058326721191, "learning_rate": 4.5516920425879905e-05, "loss": 1.3006, "step": 12945 }, { "epoch": 3.87, "grad_norm": 1.5299936532974243, "learning_rate": 4.5513562807140515e-05, "loss": 1.3346, "step": 12950 }, { "epoch": 3.88, "grad_norm": 1.8486464023590088, "learning_rate": 4.551020405545837e-05, "loss": 1.3022, "step": 12955 }, { "epoch": 3.88, "grad_norm": 1.537140130996704, "learning_rate": 4.550684417101898e-05, "loss": 1.3352, "step": 12960 }, { "epoch": 3.88, "grad_norm": 1.5041508674621582, "learning_rate": 4.55034831540079e-05, "loss": 1.3945, "step": 12965 }, { "epoch": 3.88, "grad_norm": 2.660659074783325, "learning_rate": 4.550012100461075e-05, "loss": 1.3366, "step": 12970 }, { "epoch": 3.88, "grad_norm": 1.6128523349761963, "learning_rate": 4.549675772301323e-05, "loss": 1.3376, "step": 12975 }, { "epoch": 3.88, "grad_norm": 1.45650315284729, "learning_rate": 4.549339330940109e-05, "loss": 1.5226, "step": 12980 }, { "epoch": 3.88, "grad_norm": 1.1668511629104614, "learning_rate": 4.5490027763960144e-05, "loss": 1.2782, "step": 12985 }, { "epoch": 3.89, "grad_norm": 1.2220723628997803, "learning_rate": 4.548666108687625e-05, "loss": 1.3738, "step": 12990 }, { "epoch": 3.89, "grad_norm": 2.1042776107788086, "learning_rate": 4.548329327833537e-05, "loss": 1.3442, "step": 12995 }, { "epoch": 3.89, "grad_norm": 1.7469139099121094, "learning_rate": 4.547992433852349e-05, "loss": 1.2097, "step": 13000 }, { "epoch": 3.89, "grad_norm": 0.8937451839447021, "learning_rate": 4.547655426762668e-05, "loss": 1.3553, "step": 13005 }, { "epoch": 3.89, "grad_norm": 2.8568267822265625, "learning_rate": 4.5473183065831045e-05, "loss": 1.4715, "step": 13010 }, { "epoch": 3.89, "grad_norm": 1.3825722932815552, "learning_rate": 4.546981073332281e-05, "loss": 1.2806, "step": 13015 }, { "epoch": 3.9, "grad_norm": 1.2610100507736206, "learning_rate": 4.54664372702882e-05, "loss": 1.1896, "step": 13020 }, { "epoch": 3.9, "grad_norm": 1.5070395469665527, "learning_rate": 4.5463062676913527e-05, "loss": 1.3712, "step": 13025 }, { "epoch": 3.9, "grad_norm": 2.2432565689086914, "learning_rate": 4.545968695338518e-05, "loss": 1.3497, "step": 13030 }, { "epoch": 3.9, "grad_norm": 1.4944096803665161, "learning_rate": 4.545631009988958e-05, "loss": 1.2663, "step": 13035 }, { "epoch": 3.9, "grad_norm": 2.186475992202759, "learning_rate": 4.545293211661324e-05, "loss": 1.3296, "step": 13040 }, { "epoch": 3.9, "grad_norm": 1.5511248111724854, "learning_rate": 4.544955300374273e-05, "loss": 1.1667, "step": 13045 }, { "epoch": 3.9, "grad_norm": 2.8364615440368652, "learning_rate": 4.544617276146465e-05, "loss": 1.2472, "step": 13050 }, { "epoch": 3.91, "grad_norm": 0.9596519470214844, "learning_rate": 4.544279138996571e-05, "loss": 1.3599, "step": 13055 }, { "epoch": 3.91, "grad_norm": 1.2000133991241455, "learning_rate": 4.543940888943264e-05, "loss": 1.229, "step": 13060 }, { "epoch": 3.91, "grad_norm": 1.129855751991272, "learning_rate": 4.543602526005227e-05, "loss": 1.2954, "step": 13065 }, { "epoch": 3.91, "grad_norm": 1.3284913301467896, "learning_rate": 4.543264050201146e-05, "loss": 1.3602, "step": 13070 }, { "epoch": 3.91, "grad_norm": 1.7732700109481812, "learning_rate": 4.5429254615497165e-05, "loss": 1.3526, "step": 13075 }, { "epoch": 3.91, "grad_norm": 1.5674593448638916, "learning_rate": 4.542586760069637e-05, "loss": 1.4001, "step": 13080 }, { "epoch": 3.91, "grad_norm": 1.350285291671753, "learning_rate": 4.542247945779613e-05, "loss": 1.3551, "step": 13085 }, { "epoch": 3.92, "grad_norm": 1.2830190658569336, "learning_rate": 4.5419090186983587e-05, "loss": 1.3441, "step": 13090 }, { "epoch": 3.92, "grad_norm": 1.5413031578063965, "learning_rate": 4.541569978844591e-05, "loss": 1.2436, "step": 13095 }, { "epoch": 3.92, "grad_norm": 1.2088831663131714, "learning_rate": 4.541230826237036e-05, "loss": 1.2817, "step": 13100 }, { "epoch": 3.92, "grad_norm": 1.644108533859253, "learning_rate": 4.540891560894424e-05, "loss": 1.3467, "step": 13105 }, { "epoch": 3.92, "grad_norm": 1.8455935716629028, "learning_rate": 4.5405521828354924e-05, "loss": 1.2956, "step": 13110 }, { "epoch": 3.92, "grad_norm": 1.0476539134979248, "learning_rate": 4.5402126920789854e-05, "loss": 1.2251, "step": 13115 }, { "epoch": 3.93, "grad_norm": 1.2740333080291748, "learning_rate": 4.539873088643651e-05, "loss": 1.4182, "step": 13120 }, { "epoch": 3.93, "grad_norm": 1.2473523616790771, "learning_rate": 4.539533372548247e-05, "loss": 1.4605, "step": 13125 }, { "epoch": 3.93, "grad_norm": 1.2522354125976562, "learning_rate": 4.5391935438115355e-05, "loss": 1.4085, "step": 13130 }, { "epoch": 3.93, "grad_norm": 0.8400856852531433, "learning_rate": 4.538853602452283e-05, "loss": 1.4076, "step": 13135 }, { "epoch": 3.93, "grad_norm": 3.1586833000183105, "learning_rate": 4.538513548489265e-05, "loss": 1.2214, "step": 13140 }, { "epoch": 3.93, "grad_norm": 1.3957751989364624, "learning_rate": 4.538173381941264e-05, "loss": 1.3048, "step": 13145 }, { "epoch": 3.93, "grad_norm": 1.6071856021881104, "learning_rate": 4.537833102827065e-05, "loss": 1.2303, "step": 13150 }, { "epoch": 3.94, "grad_norm": 1.384070873260498, "learning_rate": 4.537492711165462e-05, "loss": 1.3229, "step": 13155 }, { "epoch": 3.94, "grad_norm": 2.3858485221862793, "learning_rate": 4.5371522069752544e-05, "loss": 1.3097, "step": 13160 }, { "epoch": 3.94, "grad_norm": 1.255236029624939, "learning_rate": 4.536811590275247e-05, "loss": 1.3769, "step": 13165 }, { "epoch": 3.94, "grad_norm": 1.0633465051651, "learning_rate": 4.5364708610842545e-05, "loss": 1.2269, "step": 13170 }, { "epoch": 3.94, "grad_norm": 1.6456542015075684, "learning_rate": 4.536130019421092e-05, "loss": 1.4208, "step": 13175 }, { "epoch": 3.94, "grad_norm": 1.0649845600128174, "learning_rate": 4.535789065304585e-05, "loss": 1.2724, "step": 13180 }, { "epoch": 3.94, "grad_norm": 2.226824998855591, "learning_rate": 4.535447998753564e-05, "loss": 1.2443, "step": 13185 }, { "epoch": 3.95, "grad_norm": 1.2029057741165161, "learning_rate": 4.535106819786866e-05, "loss": 1.3256, "step": 13190 }, { "epoch": 3.95, "grad_norm": 0.9885445237159729, "learning_rate": 4.534765528423333e-05, "loss": 1.3866, "step": 13195 }, { "epoch": 3.95, "grad_norm": 4.255756855010986, "learning_rate": 4.5344241246818154e-05, "loss": 1.1524, "step": 13200 }, { "epoch": 3.95, "grad_norm": 1.9906721115112305, "learning_rate": 4.5340826085811684e-05, "loss": 1.3169, "step": 13205 }, { "epoch": 3.95, "grad_norm": 1.9341821670532227, "learning_rate": 4.533740980140253e-05, "loss": 1.4293, "step": 13210 }, { "epoch": 3.95, "grad_norm": 2.2988078594207764, "learning_rate": 4.533399239377937e-05, "loss": 1.2559, "step": 13215 }, { "epoch": 3.96, "grad_norm": 1.1931335926055908, "learning_rate": 4.5330573863130946e-05, "loss": 1.2239, "step": 13220 }, { "epoch": 3.96, "grad_norm": 1.9085408449172974, "learning_rate": 4.5327154209646065e-05, "loss": 1.3006, "step": 13225 }, { "epoch": 3.96, "grad_norm": 1.42606782913208, "learning_rate": 4.532373343351358e-05, "loss": 1.1926, "step": 13230 }, { "epoch": 3.96, "grad_norm": 1.9960684776306152, "learning_rate": 4.5320311534922425e-05, "loss": 1.4581, "step": 13235 }, { "epoch": 3.96, "grad_norm": 1.3302810192108154, "learning_rate": 4.531688851406159e-05, "loss": 1.2497, "step": 13240 }, { "epoch": 3.96, "grad_norm": 1.0140787363052368, "learning_rate": 4.531346437112012e-05, "loss": 1.2587, "step": 13245 }, { "epoch": 3.96, "grad_norm": 2.0420432090759277, "learning_rate": 4.5310039106287115e-05, "loss": 1.2299, "step": 13250 }, { "epoch": 3.97, "grad_norm": 0.9707965850830078, "learning_rate": 4.530661271975177e-05, "loss": 1.2625, "step": 13255 }, { "epoch": 3.97, "grad_norm": 0.9889094829559326, "learning_rate": 4.530318521170332e-05, "loss": 1.445, "step": 13260 }, { "epoch": 3.97, "grad_norm": 1.5554451942443848, "learning_rate": 4.529975658233104e-05, "loss": 1.1236, "step": 13265 }, { "epoch": 3.97, "grad_norm": 1.6578177213668823, "learning_rate": 4.529632683182432e-05, "loss": 1.1677, "step": 13270 }, { "epoch": 3.97, "grad_norm": 2.4614758491516113, "learning_rate": 4.529289596037256e-05, "loss": 1.2574, "step": 13275 }, { "epoch": 3.97, "grad_norm": 1.8018752336502075, "learning_rate": 4.528946396816524e-05, "loss": 1.3027, "step": 13280 }, { "epoch": 3.97, "grad_norm": 2.3157856464385986, "learning_rate": 4.5286030855391924e-05, "loss": 1.439, "step": 13285 }, { "epoch": 3.98, "grad_norm": 1.605214238166809, "learning_rate": 4.528259662224221e-05, "loss": 1.2793, "step": 13290 }, { "epoch": 3.98, "grad_norm": 3.1263720989227295, "learning_rate": 4.527916126890576e-05, "loss": 1.386, "step": 13295 }, { "epoch": 3.98, "grad_norm": 0.8434714078903198, "learning_rate": 4.527572479557232e-05, "loss": 1.3058, "step": 13300 }, { "epoch": 3.98, "grad_norm": 1.2560151815414429, "learning_rate": 4.5272287202431674e-05, "loss": 1.2894, "step": 13305 }, { "epoch": 3.98, "grad_norm": 2.4032607078552246, "learning_rate": 4.526884848967368e-05, "loss": 1.2046, "step": 13310 }, { "epoch": 3.98, "grad_norm": 0.8934091329574585, "learning_rate": 4.526540865748824e-05, "loss": 1.3464, "step": 13315 }, { "epoch": 3.99, "grad_norm": 1.3520596027374268, "learning_rate": 4.526196770606536e-05, "loss": 1.3311, "step": 13320 }, { "epoch": 3.99, "grad_norm": 1.0915952920913696, "learning_rate": 4.5258525635595054e-05, "loss": 1.3354, "step": 13325 }, { "epoch": 3.99, "grad_norm": 1.1796505451202393, "learning_rate": 4.525508244626743e-05, "loss": 1.3232, "step": 13330 }, { "epoch": 3.99, "grad_norm": 1.1852450370788574, "learning_rate": 4.525163813827267e-05, "loss": 1.1876, "step": 13335 }, { "epoch": 3.99, "grad_norm": 3.0009610652923584, "learning_rate": 4.524819271180098e-05, "loss": 1.1891, "step": 13340 }, { "epoch": 3.99, "grad_norm": 0.9333651065826416, "learning_rate": 4.524474616704265e-05, "loss": 1.3117, "step": 13345 }, { "epoch": 3.99, "grad_norm": 1.0289852619171143, "learning_rate": 4.524129850418803e-05, "loss": 1.3284, "step": 13350 }, { "epoch": 4.0, "grad_norm": 1.6752545833587646, "learning_rate": 4.523784972342755e-05, "loss": 1.4082, "step": 13355 }, { "epoch": 4.0, "grad_norm": 1.345482349395752, "learning_rate": 4.523439982495166e-05, "loss": 1.2208, "step": 13360 }, { "epoch": 4.0, "grad_norm": 1.1788907051086426, "learning_rate": 4.5230948808950894e-05, "loss": 1.2674, "step": 13365 }, { "epoch": 4.0, "grad_norm": 0.9993130564689636, "learning_rate": 4.522749667561586e-05, "loss": 1.1215, "step": 13370 }, { "epoch": 4.0, "grad_norm": 1.5533525943756104, "learning_rate": 4.52240434251372e-05, "loss": 1.409, "step": 13375 }, { "epoch": 4.0, "grad_norm": 0.6221991181373596, "learning_rate": 4.522058905770564e-05, "loss": 1.2527, "step": 13380 }, { "epoch": 4.0, "grad_norm": 1.9359819889068604, "learning_rate": 4.521713357351198e-05, "loss": 1.1879, "step": 13385 }, { "epoch": 4.01, "grad_norm": 1.0392394065856934, "learning_rate": 4.521367697274704e-05, "loss": 1.1468, "step": 13390 }, { "epoch": 4.01, "grad_norm": 1.0424681901931763, "learning_rate": 4.5210219255601734e-05, "loss": 1.2535, "step": 13395 }, { "epoch": 4.01, "grad_norm": 1.4294004440307617, "learning_rate": 4.5206760422267025e-05, "loss": 1.3226, "step": 13400 }, { "epoch": 4.01, "grad_norm": 1.275223731994629, "learning_rate": 4.520330047293394e-05, "loss": 1.3615, "step": 13405 }, { "epoch": 4.01, "grad_norm": 1.919305443763733, "learning_rate": 4.519983940779357e-05, "loss": 1.1126, "step": 13410 }, { "epoch": 4.01, "grad_norm": 0.8619056344032288, "learning_rate": 4.519637722703707e-05, "loss": 1.2955, "step": 13415 }, { "epoch": 4.02, "grad_norm": 3.1386544704437256, "learning_rate": 4.519291393085564e-05, "loss": 1.0872, "step": 13420 }, { "epoch": 4.02, "grad_norm": 1.826619029045105, "learning_rate": 4.5189449519440575e-05, "loss": 1.3617, "step": 13425 }, { "epoch": 4.02, "grad_norm": 0.8781746029853821, "learning_rate": 4.5185983992983186e-05, "loss": 1.1874, "step": 13430 }, { "epoch": 4.02, "grad_norm": 1.6987659931182861, "learning_rate": 4.518251735167489e-05, "loss": 1.5476, "step": 13435 }, { "epoch": 4.02, "grad_norm": 1.178309679031372, "learning_rate": 4.517904959570714e-05, "loss": 1.1798, "step": 13440 }, { "epoch": 4.02, "grad_norm": 1.655390977859497, "learning_rate": 4.517558072527146e-05, "loss": 1.3168, "step": 13445 }, { "epoch": 4.02, "grad_norm": 2.087698459625244, "learning_rate": 4.517211074055942e-05, "loss": 1.1618, "step": 13450 }, { "epoch": 4.03, "grad_norm": 1.096436858177185, "learning_rate": 4.516863964176268e-05, "loss": 1.1491, "step": 13455 }, { "epoch": 4.03, "grad_norm": 1.1264992952346802, "learning_rate": 4.5165167429072924e-05, "loss": 1.2237, "step": 13460 }, { "epoch": 4.03, "grad_norm": 4.63450813293457, "learning_rate": 4.516169410268194e-05, "loss": 1.2954, "step": 13465 }, { "epoch": 4.03, "grad_norm": 1.207798719406128, "learning_rate": 4.5158219662781546e-05, "loss": 1.3565, "step": 13470 }, { "epoch": 4.03, "grad_norm": 1.3179618120193481, "learning_rate": 4.515474410956363e-05, "loss": 1.358, "step": 13475 }, { "epoch": 4.03, "grad_norm": 2.3414742946624756, "learning_rate": 4.5151267443220146e-05, "loss": 1.2395, "step": 13480 }, { "epoch": 4.03, "grad_norm": 1.8621050119400024, "learning_rate": 4.514778966394312e-05, "loss": 1.318, "step": 13485 }, { "epoch": 4.04, "grad_norm": 1.7734013795852661, "learning_rate": 4.5144310771924606e-05, "loss": 1.261, "step": 13490 }, { "epoch": 4.04, "grad_norm": 1.8414225578308105, "learning_rate": 4.514083076735674e-05, "loss": 1.1795, "step": 13495 }, { "epoch": 4.04, "grad_norm": 1.5032846927642822, "learning_rate": 4.5137349650431735e-05, "loss": 1.2926, "step": 13500 }, { "epoch": 4.04, "grad_norm": 2.254154920578003, "learning_rate": 4.513386742134183e-05, "loss": 1.227, "step": 13505 }, { "epoch": 4.04, "grad_norm": 1.056298017501831, "learning_rate": 4.5130384080279364e-05, "loss": 1.1288, "step": 13510 }, { "epoch": 4.04, "grad_norm": 1.4214680194854736, "learning_rate": 4.512689962743671e-05, "loss": 1.317, "step": 13515 }, { "epoch": 4.05, "grad_norm": 2.0844547748565674, "learning_rate": 4.512341406300631e-05, "loss": 1.152, "step": 13520 }, { "epoch": 4.05, "grad_norm": 1.6370080709457397, "learning_rate": 4.511992738718066e-05, "loss": 1.2748, "step": 13525 }, { "epoch": 4.05, "grad_norm": 1.4384336471557617, "learning_rate": 4.511643960015234e-05, "loss": 1.3395, "step": 13530 }, { "epoch": 4.05, "grad_norm": 1.0424606800079346, "learning_rate": 4.5112950702113975e-05, "loss": 1.2358, "step": 13535 }, { "epoch": 4.05, "grad_norm": 1.1179556846618652, "learning_rate": 4.5109460693258245e-05, "loss": 1.1542, "step": 13540 }, { "epoch": 4.05, "grad_norm": 1.2349655628204346, "learning_rate": 4.51059695737779e-05, "loss": 1.2682, "step": 13545 }, { "epoch": 4.05, "grad_norm": 2.0068607330322266, "learning_rate": 4.510247734386576e-05, "loss": 1.2837, "step": 13550 }, { "epoch": 4.06, "grad_norm": 3.1415653228759766, "learning_rate": 4.5098984003714686e-05, "loss": 1.2172, "step": 13555 }, { "epoch": 4.06, "grad_norm": 2.988642454147339, "learning_rate": 4.5095489553517625e-05, "loss": 1.1815, "step": 13560 }, { "epoch": 4.06, "grad_norm": 2.215256690979004, "learning_rate": 4.509199399346756e-05, "loss": 1.2083, "step": 13565 }, { "epoch": 4.06, "grad_norm": 2.271375894546509, "learning_rate": 4.508849732375755e-05, "loss": 1.4811, "step": 13570 }, { "epoch": 4.06, "grad_norm": 2.4488816261291504, "learning_rate": 4.5084999544580714e-05, "loss": 1.3544, "step": 13575 }, { "epoch": 4.06, "grad_norm": 1.7777202129364014, "learning_rate": 4.5081500656130225e-05, "loss": 1.0847, "step": 13580 }, { "epoch": 4.06, "grad_norm": 1.3816187381744385, "learning_rate": 4.507800065859934e-05, "loss": 1.2128, "step": 13585 }, { "epoch": 4.07, "grad_norm": 1.5490256547927856, "learning_rate": 4.507449955218134e-05, "loss": 1.2547, "step": 13590 }, { "epoch": 4.07, "grad_norm": 1.968616247177124, "learning_rate": 4.50709973370696e-05, "loss": 1.2077, "step": 13595 }, { "epoch": 4.07, "grad_norm": 1.046992540359497, "learning_rate": 4.506749401345754e-05, "loss": 1.3679, "step": 13600 }, { "epoch": 4.07, "grad_norm": 2.8304526805877686, "learning_rate": 4.506398958153864e-05, "loss": 1.2516, "step": 13605 }, { "epoch": 4.07, "grad_norm": 2.89141845703125, "learning_rate": 4.5060484041506454e-05, "loss": 1.2331, "step": 13610 }, { "epoch": 4.07, "grad_norm": 1.2688446044921875, "learning_rate": 4.505697739355459e-05, "loss": 1.2566, "step": 13615 }, { "epoch": 4.07, "grad_norm": 2.1087491512298584, "learning_rate": 4.505346963787671e-05, "loss": 1.0886, "step": 13620 }, { "epoch": 4.08, "grad_norm": 2.119562864303589, "learning_rate": 4.5049960774666546e-05, "loss": 1.1783, "step": 13625 }, { "epoch": 4.08, "grad_norm": 1.7251992225646973, "learning_rate": 4.504645080411789e-05, "loss": 1.1867, "step": 13630 }, { "epoch": 4.08, "grad_norm": 2.285336494445801, "learning_rate": 4.5042939726424585e-05, "loss": 1.223, "step": 13635 }, { "epoch": 4.08, "grad_norm": 1.3887274265289307, "learning_rate": 4.503942754178056e-05, "loss": 1.1539, "step": 13640 }, { "epoch": 4.08, "grad_norm": 2.003840923309326, "learning_rate": 4.503591425037978e-05, "loss": 1.3083, "step": 13645 }, { "epoch": 4.08, "grad_norm": 1.603243112564087, "learning_rate": 4.5032399852416285e-05, "loss": 1.3067, "step": 13650 }, { "epoch": 4.09, "grad_norm": 1.1768264770507812, "learning_rate": 4.5028884348084166e-05, "loss": 1.4105, "step": 13655 }, { "epoch": 4.09, "grad_norm": 1.2902339696884155, "learning_rate": 4.502536773757758e-05, "loss": 1.306, "step": 13660 }, { "epoch": 4.09, "grad_norm": 1.6430679559707642, "learning_rate": 4.502185002109075e-05, "loss": 1.3452, "step": 13665 }, { "epoch": 4.09, "grad_norm": 1.537097454071045, "learning_rate": 4.5018331198817966e-05, "loss": 1.2132, "step": 13670 }, { "epoch": 4.09, "grad_norm": 0.9888067245483398, "learning_rate": 4.5014811270953546e-05, "loss": 1.1079, "step": 13675 }, { "epoch": 4.09, "grad_norm": 2.0669586658477783, "learning_rate": 4.5011290237691905e-05, "loss": 1.3401, "step": 13680 }, { "epoch": 4.09, "grad_norm": 1.0341452360153198, "learning_rate": 4.500776809922751e-05, "loss": 1.2518, "step": 13685 }, { "epoch": 4.1, "grad_norm": 1.6001540422439575, "learning_rate": 4.500424485575487e-05, "loss": 1.2202, "step": 13690 }, { "epoch": 4.1, "grad_norm": 1.255566120147705, "learning_rate": 4.500072050746859e-05, "loss": 1.187, "step": 13695 }, { "epoch": 4.1, "grad_norm": 1.929617166519165, "learning_rate": 4.4997195054563304e-05, "loss": 1.1811, "step": 13700 }, { "epoch": 4.1, "grad_norm": 5.457527160644531, "learning_rate": 4.499366849723372e-05, "loss": 1.296, "step": 13705 }, { "epoch": 4.1, "grad_norm": 1.2115625143051147, "learning_rate": 4.4990140835674606e-05, "loss": 1.3369, "step": 13710 }, { "epoch": 4.1, "grad_norm": 0.7389108538627625, "learning_rate": 4.498661207008079e-05, "loss": 1.2963, "step": 13715 }, { "epoch": 4.1, "grad_norm": 1.6749463081359863, "learning_rate": 4.498308220064717e-05, "loss": 1.1794, "step": 13720 }, { "epoch": 4.11, "grad_norm": 2.0370562076568604, "learning_rate": 4.497955122756868e-05, "loss": 1.4314, "step": 13725 }, { "epoch": 4.11, "grad_norm": 3.5120744705200195, "learning_rate": 4.4976019151040364e-05, "loss": 1.1369, "step": 13730 }, { "epoch": 4.11, "grad_norm": 2.3618757724761963, "learning_rate": 4.497248597125726e-05, "loss": 1.3716, "step": 13735 }, { "epoch": 4.11, "grad_norm": 1.495405673980713, "learning_rate": 4.496895168841452e-05, "loss": 1.3534, "step": 13740 }, { "epoch": 4.11, "grad_norm": 3.024486541748047, "learning_rate": 4.496541630270733e-05, "loss": 1.2207, "step": 13745 }, { "epoch": 4.11, "grad_norm": 2.2674665451049805, "learning_rate": 4.496187981433095e-05, "loss": 1.4337, "step": 13750 }, { "epoch": 4.12, "grad_norm": 2.4405548572540283, "learning_rate": 4.49583422234807e-05, "loss": 1.318, "step": 13755 }, { "epoch": 4.12, "grad_norm": 0.9450554847717285, "learning_rate": 4.495480353035196e-05, "loss": 1.3367, "step": 13760 }, { "epoch": 4.12, "grad_norm": 2.9744069576263428, "learning_rate": 4.4951263735140156e-05, "loss": 1.2266, "step": 13765 }, { "epoch": 4.12, "grad_norm": 2.1863605976104736, "learning_rate": 4.4947722838040795e-05, "loss": 1.2261, "step": 13770 }, { "epoch": 4.12, "grad_norm": 1.7596246004104614, "learning_rate": 4.494418083924944e-05, "loss": 1.187, "step": 13775 }, { "epoch": 4.12, "grad_norm": 1.8887165784835815, "learning_rate": 4.4940637738961697e-05, "loss": 1.2505, "step": 13780 }, { "epoch": 4.12, "grad_norm": 1.4990458488464355, "learning_rate": 4.493709353737327e-05, "loss": 1.2451, "step": 13785 }, { "epoch": 4.13, "grad_norm": 1.5014214515686035, "learning_rate": 4.493354823467989e-05, "loss": 1.2606, "step": 13790 }, { "epoch": 4.13, "grad_norm": 2.0150039196014404, "learning_rate": 4.4930001831077355e-05, "loss": 1.4081, "step": 13795 }, { "epoch": 4.13, "grad_norm": 2.609929323196411, "learning_rate": 4.492645432676154e-05, "loss": 1.2381, "step": 13800 }, { "epoch": 4.13, "grad_norm": 1.365830659866333, "learning_rate": 4.4922905721928366e-05, "loss": 1.2332, "step": 13805 }, { "epoch": 4.13, "grad_norm": 1.4366599321365356, "learning_rate": 4.491935601677381e-05, "loss": 1.1557, "step": 13810 }, { "epoch": 4.13, "grad_norm": 3.553328037261963, "learning_rate": 4.491580521149393e-05, "loss": 1.2962, "step": 13815 }, { "epoch": 4.13, "grad_norm": 1.386233925819397, "learning_rate": 4.4912253306284835e-05, "loss": 1.1249, "step": 13820 }, { "epoch": 4.14, "grad_norm": 2.232959747314453, "learning_rate": 4.490870030134268e-05, "loss": 1.0412, "step": 13825 }, { "epoch": 4.14, "grad_norm": 1.5061743259429932, "learning_rate": 4.49051461968637e-05, "loss": 1.2668, "step": 13830 }, { "epoch": 4.14, "grad_norm": 2.83976674079895, "learning_rate": 4.49015909930442e-05, "loss": 1.3338, "step": 13835 }, { "epoch": 4.14, "grad_norm": 1.8282452821731567, "learning_rate": 4.48980346900805e-05, "loss": 1.2672, "step": 13840 }, { "epoch": 4.14, "grad_norm": 1.090014100074768, "learning_rate": 4.489447728816904e-05, "loss": 1.1899, "step": 13845 }, { "epoch": 4.14, "grad_norm": 2.0513787269592285, "learning_rate": 4.489091878750627e-05, "loss": 1.2211, "step": 13850 }, { "epoch": 4.15, "grad_norm": 1.3292232751846313, "learning_rate": 4.4887359188288724e-05, "loss": 1.199, "step": 13855 }, { "epoch": 4.15, "grad_norm": 2.754366397857666, "learning_rate": 4.4883798490713014e-05, "loss": 1.2695, "step": 13860 }, { "epoch": 4.15, "grad_norm": 1.0987385511398315, "learning_rate": 4.488023669497578e-05, "loss": 1.327, "step": 13865 }, { "epoch": 4.15, "grad_norm": 0.9801754951477051, "learning_rate": 4.487667380127373e-05, "loss": 1.291, "step": 13870 }, { "epoch": 4.15, "grad_norm": 1.8694130182266235, "learning_rate": 4.4873109809803654e-05, "loss": 1.3247, "step": 13875 }, { "epoch": 4.15, "grad_norm": 1.1474727392196655, "learning_rate": 4.486954472076238e-05, "loss": 1.2209, "step": 13880 }, { "epoch": 4.15, "grad_norm": 1.853026032447815, "learning_rate": 4.48659785343468e-05, "loss": 1.1524, "step": 13885 }, { "epoch": 4.16, "grad_norm": 1.3957430124282837, "learning_rate": 4.4862411250753875e-05, "loss": 1.3743, "step": 13890 }, { "epoch": 4.16, "grad_norm": 1.5522494316101074, "learning_rate": 4.485884287018063e-05, "loss": 1.2599, "step": 13895 }, { "epoch": 4.16, "grad_norm": 2.7096736431121826, "learning_rate": 4.485527339282412e-05, "loss": 1.3198, "step": 13900 }, { "epoch": 4.16, "grad_norm": 1.6966811418533325, "learning_rate": 4.485170281888151e-05, "loss": 1.2198, "step": 13905 }, { "epoch": 4.16, "grad_norm": 1.197799563407898, "learning_rate": 4.484813114854999e-05, "loss": 1.3353, "step": 13910 }, { "epoch": 4.16, "grad_norm": 1.5476326942443848, "learning_rate": 4.4844558382026814e-05, "loss": 1.1109, "step": 13915 }, { "epoch": 4.16, "grad_norm": 2.144993543624878, "learning_rate": 4.484098451950931e-05, "loss": 1.2156, "step": 13920 }, { "epoch": 4.17, "grad_norm": 1.5577327013015747, "learning_rate": 4.483740956119485e-05, "loss": 1.3845, "step": 13925 }, { "epoch": 4.17, "grad_norm": 1.9885210990905762, "learning_rate": 4.4833833507280884e-05, "loss": 1.2396, "step": 13930 }, { "epoch": 4.17, "grad_norm": 1.833871841430664, "learning_rate": 4.483025635796491e-05, "loss": 1.3424, "step": 13935 }, { "epoch": 4.17, "grad_norm": 2.1424660682678223, "learning_rate": 4.482667811344448e-05, "loss": 1.2628, "step": 13940 }, { "epoch": 4.17, "grad_norm": 2.6087942123413086, "learning_rate": 4.4823098773917235e-05, "loss": 1.1262, "step": 13945 }, { "epoch": 4.17, "grad_norm": 1.847556710243225, "learning_rate": 4.4819518339580844e-05, "loss": 1.2522, "step": 13950 }, { "epoch": 4.18, "grad_norm": 1.6152390241622925, "learning_rate": 4.4815936810633066e-05, "loss": 1.4579, "step": 13955 }, { "epoch": 4.18, "grad_norm": 3.4430344104766846, "learning_rate": 4.4812354187271686e-05, "loss": 1.2838, "step": 13960 }, { "epoch": 4.18, "grad_norm": 1.8315435647964478, "learning_rate": 4.4808770469694584e-05, "loss": 1.322, "step": 13965 }, { "epoch": 4.18, "grad_norm": 1.6161450147628784, "learning_rate": 4.480518565809967e-05, "loss": 1.4559, "step": 13970 }, { "epoch": 4.18, "grad_norm": 1.8581215143203735, "learning_rate": 4.480159975268494e-05, "loss": 1.2889, "step": 13975 }, { "epoch": 4.18, "grad_norm": 1.6320557594299316, "learning_rate": 4.479801275364845e-05, "loss": 1.3009, "step": 13980 }, { "epoch": 4.18, "grad_norm": 1.0718029737472534, "learning_rate": 4.4794424661188286e-05, "loss": 1.3986, "step": 13985 }, { "epoch": 4.19, "grad_norm": 2.1297664642333984, "learning_rate": 4.479083547550263e-05, "loss": 1.1725, "step": 13990 }, { "epoch": 4.19, "grad_norm": 2.0025558471679688, "learning_rate": 4.478724519678969e-05, "loss": 1.1128, "step": 13995 }, { "epoch": 4.19, "grad_norm": 0.8468442559242249, "learning_rate": 4.4783653825247776e-05, "loss": 1.1415, "step": 14000 }, { "epoch": 4.19, "grad_norm": 1.221956491470337, "learning_rate": 4.478006136107522e-05, "loss": 1.4145, "step": 14005 }, { "epoch": 4.19, "grad_norm": 2.629194974899292, "learning_rate": 4.477646780447043e-05, "loss": 1.4677, "step": 14010 }, { "epoch": 4.19, "grad_norm": 1.3865878582000732, "learning_rate": 4.477287315563189e-05, "loss": 1.1557, "step": 14015 }, { "epoch": 4.19, "grad_norm": 1.6378535032272339, "learning_rate": 4.4769277414758115e-05, "loss": 1.1965, "step": 14020 }, { "epoch": 4.2, "grad_norm": 7.970203876495361, "learning_rate": 4.47656805820477e-05, "loss": 1.2789, "step": 14025 }, { "epoch": 4.2, "grad_norm": 2.070845365524292, "learning_rate": 4.476208265769929e-05, "loss": 1.2761, "step": 14030 }, { "epoch": 4.2, "grad_norm": 4.676011085510254, "learning_rate": 4.475848364191159e-05, "loss": 1.2767, "step": 14035 }, { "epoch": 4.2, "grad_norm": 2.338252305984497, "learning_rate": 4.4754883534883384e-05, "loss": 1.3701, "step": 14040 }, { "epoch": 4.2, "grad_norm": 1.2467647790908813, "learning_rate": 4.4751282336813494e-05, "loss": 1.1699, "step": 14045 }, { "epoch": 4.2, "grad_norm": 1.1198375225067139, "learning_rate": 4.474768004790081e-05, "loss": 1.3193, "step": 14050 }, { "epoch": 4.21, "grad_norm": 2.231294870376587, "learning_rate": 4.474407666834428e-05, "loss": 1.3017, "step": 14055 }, { "epoch": 4.21, "grad_norm": 1.3994890451431274, "learning_rate": 4.474047219834292e-05, "loss": 1.181, "step": 14060 }, { "epoch": 4.21, "grad_norm": 1.440882921218872, "learning_rate": 4.47368666380958e-05, "loss": 1.3528, "step": 14065 }, { "epoch": 4.21, "grad_norm": 2.0190446376800537, "learning_rate": 4.4733259987802046e-05, "loss": 1.3357, "step": 14070 }, { "epoch": 4.21, "grad_norm": 1.6590640544891357, "learning_rate": 4.4729652247660855e-05, "loss": 1.2267, "step": 14075 }, { "epoch": 4.21, "grad_norm": 1.370227336883545, "learning_rate": 4.472604341787149e-05, "loss": 1.2786, "step": 14080 }, { "epoch": 4.21, "grad_norm": 1.0957473516464233, "learning_rate": 4.472243349863324e-05, "loss": 1.11, "step": 14085 }, { "epoch": 4.22, "grad_norm": 2.918076992034912, "learning_rate": 4.47188224901455e-05, "loss": 1.2905, "step": 14090 }, { "epoch": 4.22, "grad_norm": 1.5842289924621582, "learning_rate": 4.4715210392607675e-05, "loss": 1.1318, "step": 14095 }, { "epoch": 4.22, "grad_norm": 0.9747616052627563, "learning_rate": 4.471159720621928e-05, "loss": 1.3338, "step": 14100 }, { "epoch": 4.22, "grad_norm": 1.7682857513427734, "learning_rate": 4.470798293117986e-05, "loss": 1.0593, "step": 14105 }, { "epoch": 4.22, "grad_norm": 1.311083436012268, "learning_rate": 4.470436756768903e-05, "loss": 1.1753, "step": 14110 }, { "epoch": 4.22, "grad_norm": 1.279892086982727, "learning_rate": 4.470075111594646e-05, "loss": 1.2065, "step": 14115 }, { "epoch": 4.22, "grad_norm": 1.927426815032959, "learning_rate": 4.4697133576151885e-05, "loss": 1.3022, "step": 14120 }, { "epoch": 4.23, "grad_norm": 2.021284580230713, "learning_rate": 4.4693514948505095e-05, "loss": 1.1597, "step": 14125 }, { "epoch": 4.23, "grad_norm": 1.2196049690246582, "learning_rate": 4.4689895233205945e-05, "loss": 1.3091, "step": 14130 }, { "epoch": 4.23, "grad_norm": 2.0658938884735107, "learning_rate": 4.4686274430454346e-05, "loss": 1.2713, "step": 14135 }, { "epoch": 4.23, "grad_norm": 1.2542707920074463, "learning_rate": 4.4682652540450274e-05, "loss": 1.2778, "step": 14140 }, { "epoch": 4.23, "grad_norm": 1.4691362380981445, "learning_rate": 4.4679029563393756e-05, "loss": 1.073, "step": 14145 }, { "epoch": 4.23, "grad_norm": 2.216343641281128, "learning_rate": 4.46754054994849e-05, "loss": 1.3336, "step": 14150 }, { "epoch": 4.24, "grad_norm": 3.187505006790161, "learning_rate": 4.467178034892384e-05, "loss": 1.2517, "step": 14155 }, { "epoch": 4.24, "grad_norm": 1.3655002117156982, "learning_rate": 4.466815411191081e-05, "loss": 1.3117, "step": 14160 }, { "epoch": 4.24, "grad_norm": 1.2147845029830933, "learning_rate": 4.466452678864607e-05, "loss": 1.4121, "step": 14165 }, { "epoch": 4.24, "grad_norm": 0.8723526000976562, "learning_rate": 4.466089837932995e-05, "loss": 1.2157, "step": 14170 }, { "epoch": 4.24, "grad_norm": 2.1111645698547363, "learning_rate": 4.465726888416285e-05, "loss": 1.3665, "step": 14175 }, { "epoch": 4.24, "grad_norm": 3.004091739654541, "learning_rate": 4.4653638303345225e-05, "loss": 1.4633, "step": 14180 }, { "epoch": 4.24, "grad_norm": 2.4839401245117188, "learning_rate": 4.465000663707758e-05, "loss": 1.3408, "step": 14185 }, { "epoch": 4.25, "grad_norm": 1.8565268516540527, "learning_rate": 4.46463738855605e-05, "loss": 1.2731, "step": 14190 }, { "epoch": 4.25, "grad_norm": 2.492748975753784, "learning_rate": 4.464274004899461e-05, "loss": 1.3723, "step": 14195 }, { "epoch": 4.25, "grad_norm": 2.4697351455688477, "learning_rate": 4.46391051275806e-05, "loss": 1.1248, "step": 14200 }, { "epoch": 4.25, "grad_norm": 2.914813280105591, "learning_rate": 4.4635469121519234e-05, "loss": 1.0966, "step": 14205 }, { "epoch": 4.25, "grad_norm": 1.3312268257141113, "learning_rate": 4.463183203101131e-05, "loss": 1.218, "step": 14210 }, { "epoch": 4.25, "grad_norm": 3.274707317352295, "learning_rate": 4.4628193856257714e-05, "loss": 1.1181, "step": 14215 }, { "epoch": 4.25, "grad_norm": 1.0050827264785767, "learning_rate": 4.462455459745938e-05, "loss": 1.28, "step": 14220 }, { "epoch": 4.26, "grad_norm": 1.2786221504211426, "learning_rate": 4.462091425481728e-05, "loss": 1.1789, "step": 14225 }, { "epoch": 4.26, "grad_norm": 1.9664305448532104, "learning_rate": 4.4617272828532495e-05, "loss": 1.2938, "step": 14230 }, { "epoch": 4.26, "grad_norm": 1.6089929342269897, "learning_rate": 4.461363031880611e-05, "loss": 1.2964, "step": 14235 }, { "epoch": 4.26, "grad_norm": 2.8070948123931885, "learning_rate": 4.460998672583933e-05, "loss": 1.3383, "step": 14240 }, { "epoch": 4.26, "grad_norm": 1.145674705505371, "learning_rate": 4.460634204983334e-05, "loss": 1.3326, "step": 14245 }, { "epoch": 4.26, "grad_norm": 2.9494822025299072, "learning_rate": 4.4602696290989477e-05, "loss": 1.2622, "step": 14250 }, { "epoch": 4.26, "grad_norm": 2.005913734436035, "learning_rate": 4.459904944950907e-05, "loss": 1.267, "step": 14255 }, { "epoch": 4.27, "grad_norm": 1.1085389852523804, "learning_rate": 4.4595401525593525e-05, "loss": 1.2181, "step": 14260 }, { "epoch": 4.27, "grad_norm": 3.622462272644043, "learning_rate": 4.459175251944433e-05, "loss": 1.3522, "step": 14265 }, { "epoch": 4.27, "grad_norm": 2.914292573928833, "learning_rate": 4.458810243126301e-05, "loss": 1.3473, "step": 14270 }, { "epoch": 4.27, "grad_norm": 2.464303493499756, "learning_rate": 4.458445126125115e-05, "loss": 1.1967, "step": 14275 }, { "epoch": 4.27, "grad_norm": 1.2096035480499268, "learning_rate": 4.45807990096104e-05, "loss": 1.1164, "step": 14280 }, { "epoch": 4.27, "grad_norm": 2.403764247894287, "learning_rate": 4.4577145676542474e-05, "loss": 1.1992, "step": 14285 }, { "epoch": 4.28, "grad_norm": 1.3285304307937622, "learning_rate": 4.4573491262249144e-05, "loss": 1.2245, "step": 14290 }, { "epoch": 4.28, "grad_norm": 1.1528428792953491, "learning_rate": 4.4569835766932244e-05, "loss": 1.3815, "step": 14295 }, { "epoch": 4.28, "grad_norm": 1.0769155025482178, "learning_rate": 4.4566179190793646e-05, "loss": 1.1992, "step": 14300 }, { "epoch": 4.28, "grad_norm": 1.2317618131637573, "learning_rate": 4.4562521534035316e-05, "loss": 1.2237, "step": 14305 }, { "epoch": 4.28, "grad_norm": 1.0465807914733887, "learning_rate": 4.455886279685925e-05, "loss": 1.3325, "step": 14310 }, { "epoch": 4.28, "grad_norm": 2.072091579437256, "learning_rate": 4.4555202979467526e-05, "loss": 1.3239, "step": 14315 }, { "epoch": 4.28, "grad_norm": 1.5427582263946533, "learning_rate": 4.455154208206227e-05, "loss": 1.284, "step": 14320 }, { "epoch": 4.29, "grad_norm": 1.5451370477676392, "learning_rate": 4.454788010484566e-05, "loss": 1.2576, "step": 14325 }, { "epoch": 4.29, "grad_norm": 1.7191996574401855, "learning_rate": 4.454421704801996e-05, "loss": 1.3133, "step": 14330 }, { "epoch": 4.29, "grad_norm": 1.3877907991409302, "learning_rate": 4.454055291178746e-05, "loss": 1.1534, "step": 14335 }, { "epoch": 4.29, "grad_norm": 1.9861388206481934, "learning_rate": 4.453688769635054e-05, "loss": 1.2559, "step": 14340 }, { "epoch": 4.29, "grad_norm": 2.1039087772369385, "learning_rate": 4.453322140191162e-05, "loss": 1.3874, "step": 14345 }, { "epoch": 4.29, "grad_norm": 1.7028883695602417, "learning_rate": 4.452955402867318e-05, "loss": 1.3288, "step": 14350 }, { "epoch": 4.29, "grad_norm": 2.711984634399414, "learning_rate": 4.452588557683777e-05, "loss": 1.2802, "step": 14355 }, { "epoch": 4.3, "grad_norm": 1.9355015754699707, "learning_rate": 4.4522216046608004e-05, "loss": 1.2044, "step": 14360 }, { "epoch": 4.3, "grad_norm": 1.0914154052734375, "learning_rate": 4.451854543818653e-05, "loss": 1.0817, "step": 14365 }, { "epoch": 4.3, "grad_norm": 1.7667779922485352, "learning_rate": 4.451487375177608e-05, "loss": 1.2535, "step": 14370 }, { "epoch": 4.3, "grad_norm": 2.6625638008117676, "learning_rate": 4.4511200987579445e-05, "loss": 1.3501, "step": 14375 }, { "epoch": 4.3, "grad_norm": 2.3096730709075928, "learning_rate": 4.450752714579946e-05, "loss": 1.2074, "step": 14380 }, { "epoch": 4.3, "grad_norm": 1.5009289979934692, "learning_rate": 4.450385222663902e-05, "loss": 1.2544, "step": 14385 }, { "epoch": 4.31, "grad_norm": 2.635723352432251, "learning_rate": 4.4500176230301095e-05, "loss": 1.2226, "step": 14390 }, { "epoch": 4.31, "grad_norm": 1.7658405303955078, "learning_rate": 4.449649915698871e-05, "loss": 1.2582, "step": 14395 }, { "epoch": 4.31, "grad_norm": 3.9313101768493652, "learning_rate": 4.449282100690494e-05, "loss": 1.2111, "step": 14400 }, { "epoch": 4.31, "grad_norm": 2.178057909011841, "learning_rate": 4.448914178025293e-05, "loss": 1.4299, "step": 14405 }, { "epoch": 4.31, "grad_norm": 1.1012213230133057, "learning_rate": 4.4485461477235865e-05, "loss": 1.0573, "step": 14410 }, { "epoch": 4.31, "grad_norm": 1.9754661321640015, "learning_rate": 4.448178009805704e-05, "loss": 1.2845, "step": 14415 }, { "epoch": 4.31, "grad_norm": 1.2951463460922241, "learning_rate": 4.4478097642919734e-05, "loss": 1.2188, "step": 14420 }, { "epoch": 4.32, "grad_norm": 1.210935354232788, "learning_rate": 4.447441411202734e-05, "loss": 1.3247, "step": 14425 }, { "epoch": 4.32, "grad_norm": 1.0859736204147339, "learning_rate": 4.447072950558331e-05, "loss": 1.2225, "step": 14430 }, { "epoch": 4.32, "grad_norm": 0.7958767414093018, "learning_rate": 4.446704382379111e-05, "loss": 1.3079, "step": 14435 }, { "epoch": 4.32, "grad_norm": 1.6416263580322266, "learning_rate": 4.4463357066854326e-05, "loss": 1.1547, "step": 14440 }, { "epoch": 4.32, "grad_norm": 1.793069839477539, "learning_rate": 4.445966923497656e-05, "loss": 1.388, "step": 14445 }, { "epoch": 4.32, "grad_norm": 2.6959428787231445, "learning_rate": 4.44559803283615e-05, "loss": 1.2552, "step": 14450 }, { "epoch": 4.32, "grad_norm": 3.44150710105896, "learning_rate": 4.445229034721285e-05, "loss": 1.2653, "step": 14455 }, { "epoch": 4.33, "grad_norm": 2.6374642848968506, "learning_rate": 4.444859929173444e-05, "loss": 1.204, "step": 14460 }, { "epoch": 4.33, "grad_norm": 1.654412031173706, "learning_rate": 4.44449071621301e-05, "loss": 1.3707, "step": 14465 }, { "epoch": 4.33, "grad_norm": 1.6137704849243164, "learning_rate": 4.444121395860375e-05, "loss": 1.29, "step": 14470 }, { "epoch": 4.33, "grad_norm": 1.6746236085891724, "learning_rate": 4.443751968135936e-05, "loss": 1.4198, "step": 14475 }, { "epoch": 4.33, "grad_norm": 1.7420883178710938, "learning_rate": 4.4433824330600964e-05, "loss": 1.245, "step": 14480 }, { "epoch": 4.33, "grad_norm": 5.652534484863281, "learning_rate": 4.443012790653265e-05, "loss": 1.2699, "step": 14485 }, { "epoch": 4.34, "grad_norm": 2.45516037940979, "learning_rate": 4.442643040935856e-05, "loss": 1.1824, "step": 14490 }, { "epoch": 4.34, "grad_norm": 2.3738396167755127, "learning_rate": 4.442273183928293e-05, "loss": 1.1621, "step": 14495 }, { "epoch": 4.34, "grad_norm": 1.6174094676971436, "learning_rate": 4.4419032196509994e-05, "loss": 1.3374, "step": 14500 }, { "epoch": 4.34, "grad_norm": 2.4241867065429688, "learning_rate": 4.44153314812441e-05, "loss": 1.1839, "step": 14505 }, { "epoch": 4.34, "grad_norm": 2.8648736476898193, "learning_rate": 4.4411629693689626e-05, "loss": 1.2719, "step": 14510 }, { "epoch": 4.34, "grad_norm": 2.2779765129089355, "learning_rate": 4.4407926834051025e-05, "loss": 1.2144, "step": 14515 }, { "epoch": 4.34, "grad_norm": 3.223737955093384, "learning_rate": 4.44042229025328e-05, "loss": 1.112, "step": 14520 }, { "epoch": 4.35, "grad_norm": 1.5524216890335083, "learning_rate": 4.440051789933951e-05, "loss": 1.2005, "step": 14525 }, { "epoch": 4.35, "grad_norm": 2.5354983806610107, "learning_rate": 4.439681182467579e-05, "loss": 1.0579, "step": 14530 }, { "epoch": 4.35, "grad_norm": 1.221150279045105, "learning_rate": 4.43931046787463e-05, "loss": 1.3449, "step": 14535 }, { "epoch": 4.35, "grad_norm": 1.6341768503189087, "learning_rate": 4.4389396461755804e-05, "loss": 1.2786, "step": 14540 }, { "epoch": 4.35, "grad_norm": 1.0187619924545288, "learning_rate": 4.4385687173909093e-05, "loss": 1.303, "step": 14545 }, { "epoch": 4.35, "grad_norm": 1.5471796989440918, "learning_rate": 4.4381976815411036e-05, "loss": 1.1909, "step": 14550 }, { "epoch": 4.35, "grad_norm": 1.6268233060836792, "learning_rate": 4.437826538646655e-05, "loss": 1.2544, "step": 14555 }, { "epoch": 4.36, "grad_norm": 1.3081377744674683, "learning_rate": 4.4374552887280594e-05, "loss": 1.3514, "step": 14560 }, { "epoch": 4.36, "grad_norm": 1.7368911504745483, "learning_rate": 4.4370839318058235e-05, "loss": 1.3513, "step": 14565 }, { "epoch": 4.36, "grad_norm": 0.9481498003005981, "learning_rate": 4.4367124679004545e-05, "loss": 1.2849, "step": 14570 }, { "epoch": 4.36, "grad_norm": 1.1011961698532104, "learning_rate": 4.4363408970324696e-05, "loss": 1.3394, "step": 14575 }, { "epoch": 4.36, "grad_norm": 0.9509512782096863, "learning_rate": 4.435969219222389e-05, "loss": 1.1828, "step": 14580 }, { "epoch": 4.36, "grad_norm": 0.9883051514625549, "learning_rate": 4.4355974344907414e-05, "loss": 1.1986, "step": 14585 }, { "epoch": 4.37, "grad_norm": 1.3182764053344727, "learning_rate": 4.4352255428580595e-05, "loss": 1.4436, "step": 14590 }, { "epoch": 4.37, "grad_norm": 1.5108343362808228, "learning_rate": 4.434853544344882e-05, "loss": 1.2572, "step": 14595 }, { "epoch": 4.37, "grad_norm": 1.2329615354537964, "learning_rate": 4.434481438971754e-05, "loss": 1.16, "step": 14600 }, { "epoch": 4.37, "grad_norm": 2.406236171722412, "learning_rate": 4.4341092267592276e-05, "loss": 1.2099, "step": 14605 }, { "epoch": 4.37, "grad_norm": 2.0363478660583496, "learning_rate": 4.433736907727859e-05, "loss": 1.2245, "step": 14610 }, { "epoch": 4.37, "grad_norm": 1.3022775650024414, "learning_rate": 4.4333644818982115e-05, "loss": 1.2434, "step": 14615 }, { "epoch": 4.37, "grad_norm": 0.9420449733734131, "learning_rate": 4.4329919492908526e-05, "loss": 1.2814, "step": 14620 }, { "epoch": 4.38, "grad_norm": 4.690912246704102, "learning_rate": 4.432619309926357e-05, "loss": 1.2965, "step": 14625 }, { "epoch": 4.38, "grad_norm": 2.9743857383728027, "learning_rate": 4.432246563825306e-05, "loss": 1.1934, "step": 14630 }, { "epoch": 4.38, "grad_norm": 1.6823005676269531, "learning_rate": 4.431873711008286e-05, "loss": 1.3078, "step": 14635 }, { "epoch": 4.38, "grad_norm": 2.596446990966797, "learning_rate": 4.4315007514958896e-05, "loss": 1.1404, "step": 14640 }, { "epoch": 4.38, "grad_norm": 2.1992132663726807, "learning_rate": 4.4311276853087144e-05, "loss": 1.0836, "step": 14645 }, { "epoch": 4.38, "grad_norm": 1.4632948637008667, "learning_rate": 4.430754512467364e-05, "loss": 1.2743, "step": 14650 }, { "epoch": 4.38, "grad_norm": 1.9432785511016846, "learning_rate": 4.430381232992449e-05, "loss": 1.1317, "step": 14655 }, { "epoch": 4.39, "grad_norm": 2.028427839279175, "learning_rate": 4.430007846904585e-05, "loss": 1.135, "step": 14660 }, { "epoch": 4.39, "grad_norm": 1.5426450967788696, "learning_rate": 4.4296343542243926e-05, "loss": 1.3249, "step": 14665 }, { "epoch": 4.39, "grad_norm": 1.5564109086990356, "learning_rate": 4.4292607549725016e-05, "loss": 1.269, "step": 14670 }, { "epoch": 4.39, "grad_norm": 1.5921878814697266, "learning_rate": 4.428887049169544e-05, "loss": 1.3068, "step": 14675 }, { "epoch": 4.39, "grad_norm": 1.6243040561676025, "learning_rate": 4.4285132368361606e-05, "loss": 1.0717, "step": 14680 }, { "epoch": 4.39, "grad_norm": 2.8081164360046387, "learning_rate": 4.428139317992995e-05, "loss": 1.1523, "step": 14685 }, { "epoch": 4.4, "grad_norm": 3.132068157196045, "learning_rate": 4.4277652926607e-05, "loss": 1.3471, "step": 14690 }, { "epoch": 4.4, "grad_norm": 2.9057159423828125, "learning_rate": 4.42739116085993e-05, "loss": 1.2094, "step": 14695 }, { "epoch": 4.4, "grad_norm": 1.643255591392517, "learning_rate": 4.42701692261135e-05, "loss": 1.5053, "step": 14700 }, { "epoch": 4.4, "grad_norm": 1.549208164215088, "learning_rate": 4.426642577935629e-05, "loss": 1.1451, "step": 14705 }, { "epoch": 4.4, "grad_norm": 1.8293100595474243, "learning_rate": 4.426268126853441e-05, "loss": 1.2295, "step": 14710 }, { "epoch": 4.4, "grad_norm": 2.3333680629730225, "learning_rate": 4.425893569385466e-05, "loss": 1.1442, "step": 14715 }, { "epoch": 4.4, "grad_norm": 1.7189589738845825, "learning_rate": 4.425518905552392e-05, "loss": 1.3381, "step": 14720 }, { "epoch": 4.41, "grad_norm": 3.104936361312866, "learning_rate": 4.42514413537491e-05, "loss": 1.2112, "step": 14725 }, { "epoch": 4.41, "grad_norm": 1.1761854887008667, "learning_rate": 4.424769258873718e-05, "loss": 1.2502, "step": 14730 }, { "epoch": 4.41, "grad_norm": 1.4067808389663696, "learning_rate": 4.424394276069521e-05, "loss": 1.3833, "step": 14735 }, { "epoch": 4.41, "grad_norm": 1.7447682619094849, "learning_rate": 4.424019186983028e-05, "loss": 1.203, "step": 14740 }, { "epoch": 4.41, "grad_norm": 1.739342212677002, "learning_rate": 4.423643991634956e-05, "loss": 1.2031, "step": 14745 }, { "epoch": 4.41, "grad_norm": 2.5015289783477783, "learning_rate": 4.423268690046025e-05, "loss": 1.4271, "step": 14750 }, { "epoch": 4.41, "grad_norm": 1.010996699333191, "learning_rate": 4.422893282236963e-05, "loss": 1.2328, "step": 14755 }, { "epoch": 4.42, "grad_norm": 1.5030901432037354, "learning_rate": 4.422517768228505e-05, "loss": 1.3878, "step": 14760 }, { "epoch": 4.42, "grad_norm": 1.2070742845535278, "learning_rate": 4.422142148041388e-05, "loss": 1.3728, "step": 14765 }, { "epoch": 4.42, "grad_norm": 1.405918002128601, "learning_rate": 4.421766421696358e-05, "loss": 1.3906, "step": 14770 }, { "epoch": 4.42, "grad_norm": 2.8007607460021973, "learning_rate": 4.4213905892141674e-05, "loss": 1.3337, "step": 14775 }, { "epoch": 4.42, "grad_norm": 1.6845779418945312, "learning_rate": 4.421014650615571e-05, "loss": 1.3817, "step": 14780 }, { "epoch": 4.42, "grad_norm": 2.3008716106414795, "learning_rate": 4.420638605921332e-05, "loss": 1.2791, "step": 14785 }, { "epoch": 4.42, "grad_norm": 2.7502546310424805, "learning_rate": 4.4202624551522195e-05, "loss": 1.1827, "step": 14790 }, { "epoch": 4.43, "grad_norm": 1.4934040307998657, "learning_rate": 4.419886198329008e-05, "loss": 1.4211, "step": 14795 }, { "epoch": 4.43, "grad_norm": 2.8900766372680664, "learning_rate": 4.419509835472476e-05, "loss": 1.3247, "step": 14800 }, { "epoch": 4.43, "grad_norm": 1.89850652217865, "learning_rate": 4.4191333666034124e-05, "loss": 1.3627, "step": 14805 }, { "epoch": 4.43, "grad_norm": 2.504714250564575, "learning_rate": 4.4187567917426074e-05, "loss": 1.2384, "step": 14810 }, { "epoch": 4.43, "grad_norm": 2.1975607872009277, "learning_rate": 4.418380110910859e-05, "loss": 1.1535, "step": 14815 }, { "epoch": 4.43, "grad_norm": 2.044182062149048, "learning_rate": 4.4180033241289706e-05, "loss": 1.2568, "step": 14820 }, { "epoch": 4.44, "grad_norm": 1.6117463111877441, "learning_rate": 4.417626431417753e-05, "loss": 1.0122, "step": 14825 }, { "epoch": 4.44, "grad_norm": 1.8464300632476807, "learning_rate": 4.4172494327980205e-05, "loss": 1.19, "step": 14830 }, { "epoch": 4.44, "grad_norm": 2.2936418056488037, "learning_rate": 4.4168723282905954e-05, "loss": 1.0822, "step": 14835 }, { "epoch": 4.44, "grad_norm": 1.4737282991409302, "learning_rate": 4.416495117916304e-05, "loss": 1.4572, "step": 14840 }, { "epoch": 4.44, "grad_norm": 1.4652832746505737, "learning_rate": 4.416117801695979e-05, "loss": 1.219, "step": 14845 }, { "epoch": 4.44, "grad_norm": 1.4503779411315918, "learning_rate": 4.415740379650459e-05, "loss": 1.3568, "step": 14850 }, { "epoch": 4.44, "grad_norm": 1.4976866245269775, "learning_rate": 4.41536285180059e-05, "loss": 1.1892, "step": 14855 }, { "epoch": 4.45, "grad_norm": 5.945674896240234, "learning_rate": 4.414985218167221e-05, "loss": 1.146, "step": 14860 }, { "epoch": 4.45, "grad_norm": 1.585028886795044, "learning_rate": 4.414607478771209e-05, "loss": 1.1714, "step": 14865 }, { "epoch": 4.45, "grad_norm": 1.4117640256881714, "learning_rate": 4.4142296336334166e-05, "loss": 1.2991, "step": 14870 }, { "epoch": 4.45, "grad_norm": 2.092742443084717, "learning_rate": 4.41385168277471e-05, "loss": 1.1138, "step": 14875 }, { "epoch": 4.45, "grad_norm": 2.3509175777435303, "learning_rate": 4.413473626215965e-05, "loss": 1.3, "step": 14880 }, { "epoch": 4.45, "grad_norm": 2.7636947631835938, "learning_rate": 4.4130954639780616e-05, "loss": 1.2863, "step": 14885 }, { "epoch": 4.45, "grad_norm": 1.4940133094787598, "learning_rate": 4.412717196081883e-05, "loss": 1.2997, "step": 14890 }, { "epoch": 4.46, "grad_norm": 2.2368600368499756, "learning_rate": 4.412338822548322e-05, "loss": 1.2766, "step": 14895 }, { "epoch": 4.46, "grad_norm": 1.0473464727401733, "learning_rate": 4.411960343398276e-05, "loss": 1.2436, "step": 14900 }, { "epoch": 4.46, "grad_norm": 1.2610361576080322, "learning_rate": 4.4115817586526475e-05, "loss": 1.2788, "step": 14905 }, { "epoch": 4.46, "grad_norm": 1.074568271636963, "learning_rate": 4.411203068332345e-05, "loss": 1.2546, "step": 14910 }, { "epoch": 4.46, "grad_norm": 1.4182261228561401, "learning_rate": 4.4108242724582836e-05, "loss": 1.0938, "step": 14915 }, { "epoch": 4.46, "grad_norm": 5.393111705780029, "learning_rate": 4.410445371051385e-05, "loss": 1.2659, "step": 14920 }, { "epoch": 4.47, "grad_norm": 1.6223890781402588, "learning_rate": 4.410066364132573e-05, "loss": 1.3116, "step": 14925 }, { "epoch": 4.47, "grad_norm": 2.0401763916015625, "learning_rate": 4.409687251722782e-05, "loss": 1.2728, "step": 14930 }, { "epoch": 4.47, "grad_norm": 2.3179221153259277, "learning_rate": 4.4093080338429485e-05, "loss": 0.9557, "step": 14935 }, { "epoch": 4.47, "grad_norm": 0.9907518029212952, "learning_rate": 4.4089287105140176e-05, "loss": 1.2843, "step": 14940 }, { "epoch": 4.47, "grad_norm": 3.440904378890991, "learning_rate": 4.4085492817569375e-05, "loss": 1.2877, "step": 14945 }, { "epoch": 4.47, "grad_norm": 1.7463809251785278, "learning_rate": 4.408169747592665e-05, "loss": 1.2269, "step": 14950 }, { "epoch": 4.47, "grad_norm": 1.2096577882766724, "learning_rate": 4.40779010804216e-05, "loss": 1.318, "step": 14955 }, { "epoch": 4.48, "grad_norm": 1.5482676029205322, "learning_rate": 4.407410363126391e-05, "loss": 1.2836, "step": 14960 }, { "epoch": 4.48, "grad_norm": 0.8990564346313477, "learning_rate": 4.4070305128663306e-05, "loss": 1.175, "step": 14965 }, { "epoch": 4.48, "grad_norm": 1.5660568475723267, "learning_rate": 4.406650557282957e-05, "loss": 1.0142, "step": 14970 }, { "epoch": 4.48, "grad_norm": 1.8954380750656128, "learning_rate": 4.406270496397254e-05, "loss": 1.1563, "step": 14975 }, { "epoch": 4.48, "grad_norm": 3.845919609069824, "learning_rate": 4.4058903302302136e-05, "loss": 1.1624, "step": 14980 }, { "epoch": 4.48, "grad_norm": 2.766300678253174, "learning_rate": 4.4055100588028315e-05, "loss": 1.2274, "step": 14985 }, { "epoch": 4.48, "grad_norm": 1.7678008079528809, "learning_rate": 4.405129682136109e-05, "loss": 1.2156, "step": 14990 }, { "epoch": 4.49, "grad_norm": 1.1421842575073242, "learning_rate": 4.404749200251055e-05, "loss": 1.2917, "step": 14995 }, { "epoch": 4.49, "grad_norm": 2.411649227142334, "learning_rate": 4.4043686131686825e-05, "loss": 1.177, "step": 15000 }, { "epoch": 4.49, "grad_norm": 0.9833216667175293, "learning_rate": 4.403987920910011e-05, "loss": 1.4059, "step": 15005 }, { "epoch": 4.49, "grad_norm": 1.068702220916748, "learning_rate": 4.403607123496065e-05, "loss": 1.0853, "step": 15010 }, { "epoch": 4.49, "grad_norm": 2.074406385421753, "learning_rate": 4.4032262209478774e-05, "loss": 1.4351, "step": 15015 }, { "epoch": 4.49, "grad_norm": 2.2396271228790283, "learning_rate": 4.402845213286483e-05, "loss": 1.3586, "step": 15020 }, { "epoch": 4.5, "grad_norm": 2.4395458698272705, "learning_rate": 4.402464100532926e-05, "loss": 1.2868, "step": 15025 }, { "epoch": 4.5, "grad_norm": 2.2216920852661133, "learning_rate": 4.402082882708254e-05, "loss": 1.3321, "step": 15030 }, { "epoch": 4.5, "grad_norm": 1.8366425037384033, "learning_rate": 4.401701559833521e-05, "loss": 1.3224, "step": 15035 }, { "epoch": 4.5, "grad_norm": 2.4641847610473633, "learning_rate": 4.401320131929788e-05, "loss": 1.0611, "step": 15040 }, { "epoch": 4.5, "grad_norm": 1.1396476030349731, "learning_rate": 4.400938599018121e-05, "loss": 1.3745, "step": 15045 }, { "epoch": 4.5, "grad_norm": 2.272923469543457, "learning_rate": 4.40055696111959e-05, "loss": 1.2228, "step": 15050 }, { "epoch": 4.5, "grad_norm": 3.0234322547912598, "learning_rate": 4.400175218255274e-05, "loss": 1.2259, "step": 15055 }, { "epoch": 4.51, "grad_norm": 4.0134501457214355, "learning_rate": 4.3997933704462555e-05, "loss": 1.1956, "step": 15060 }, { "epoch": 4.51, "grad_norm": 1.733201503753662, "learning_rate": 4.399411417713625e-05, "loss": 1.3304, "step": 15065 }, { "epoch": 4.51, "grad_norm": 1.0404208898544312, "learning_rate": 4.3990293600784754e-05, "loss": 1.2417, "step": 15070 }, { "epoch": 4.51, "grad_norm": 1.6445467472076416, "learning_rate": 4.398647197561908e-05, "loss": 1.4209, "step": 15075 }, { "epoch": 4.51, "grad_norm": 2.307591676712036, "learning_rate": 4.3982649301850296e-05, "loss": 1.4352, "step": 15080 }, { "epoch": 4.51, "grad_norm": 1.3652942180633545, "learning_rate": 4.397882557968952e-05, "loss": 1.4074, "step": 15085 }, { "epoch": 4.51, "grad_norm": 1.15928053855896, "learning_rate": 4.397500080934794e-05, "loss": 1.175, "step": 15090 }, { "epoch": 4.52, "grad_norm": 1.9573633670806885, "learning_rate": 4.397117499103679e-05, "loss": 1.3634, "step": 15095 }, { "epoch": 4.52, "grad_norm": 2.113678455352783, "learning_rate": 4.3967348124967365e-05, "loss": 1.1655, "step": 15100 }, { "epoch": 4.52, "grad_norm": 2.7751240730285645, "learning_rate": 4.396352021135101e-05, "loss": 1.1952, "step": 15105 }, { "epoch": 4.52, "grad_norm": 3.2183516025543213, "learning_rate": 4.395969125039915e-05, "loss": 1.4013, "step": 15110 }, { "epoch": 4.52, "grad_norm": 3.216507911682129, "learning_rate": 4.395586124232325e-05, "loss": 1.2403, "step": 15115 }, { "epoch": 4.52, "grad_norm": 1.8810077905654907, "learning_rate": 4.395203018733484e-05, "loss": 1.3741, "step": 15120 }, { "epoch": 4.53, "grad_norm": 1.4608815908432007, "learning_rate": 4.3948198085645495e-05, "loss": 1.2175, "step": 15125 }, { "epoch": 4.53, "grad_norm": 2.5550155639648438, "learning_rate": 4.394436493746687e-05, "loss": 1.3009, "step": 15130 }, { "epoch": 4.53, "grad_norm": 1.188637375831604, "learning_rate": 4.394053074301066e-05, "loss": 1.3576, "step": 15135 }, { "epoch": 4.53, "grad_norm": 1.8632690906524658, "learning_rate": 4.3936695502488623e-05, "loss": 1.2394, "step": 15140 }, { "epoch": 4.53, "grad_norm": 2.738532304763794, "learning_rate": 4.3932859216112584e-05, "loss": 1.2289, "step": 15145 }, { "epoch": 4.53, "grad_norm": 1.352799415588379, "learning_rate": 4.39290218840944e-05, "loss": 1.2893, "step": 15150 }, { "epoch": 4.53, "grad_norm": 1.2402245998382568, "learning_rate": 4.392518350664602e-05, "loss": 1.2977, "step": 15155 }, { "epoch": 4.54, "grad_norm": 1.9382325410842896, "learning_rate": 4.392134408397942e-05, "loss": 1.197, "step": 15160 }, { "epoch": 4.54, "grad_norm": 3.8150017261505127, "learning_rate": 4.391750361630666e-05, "loss": 1.3319, "step": 15165 }, { "epoch": 4.54, "grad_norm": 1.3974863290786743, "learning_rate": 4.391366210383984e-05, "loss": 1.2548, "step": 15170 }, { "epoch": 4.54, "grad_norm": 1.6200858354568481, "learning_rate": 4.390981954679112e-05, "loss": 1.3474, "step": 15175 }, { "epoch": 4.54, "grad_norm": 2.464508056640625, "learning_rate": 4.390597594537272e-05, "loss": 1.2525, "step": 15180 }, { "epoch": 4.54, "grad_norm": 2.1834940910339355, "learning_rate": 4.390213129979692e-05, "loss": 1.2651, "step": 15185 }, { "epoch": 4.54, "grad_norm": 1.6160869598388672, "learning_rate": 4.3898285610276056e-05, "loss": 1.3427, "step": 15190 }, { "epoch": 4.55, "grad_norm": 2.14302921295166, "learning_rate": 4.389443887702252e-05, "loss": 1.3329, "step": 15195 }, { "epoch": 4.55, "grad_norm": 1.1593176126480103, "learning_rate": 4.3890591100248773e-05, "loss": 1.2283, "step": 15200 }, { "epoch": 4.55, "grad_norm": 1.1334714889526367, "learning_rate": 4.388674228016731e-05, "loss": 1.2044, "step": 15205 }, { "epoch": 4.55, "grad_norm": 1.1369993686676025, "learning_rate": 4.38828924169907e-05, "loss": 1.1575, "step": 15210 }, { "epoch": 4.55, "grad_norm": 1.315269112586975, "learning_rate": 4.387904151093157e-05, "loss": 1.3368, "step": 15215 }, { "epoch": 4.55, "grad_norm": 1.833358883857727, "learning_rate": 4.38751895622026e-05, "loss": 1.2118, "step": 15220 }, { "epoch": 4.56, "grad_norm": 2.8720498085021973, "learning_rate": 4.387133657101654e-05, "loss": 1.2441, "step": 15225 }, { "epoch": 4.56, "grad_norm": 2.276634931564331, "learning_rate": 4.386748253758617e-05, "loss": 1.1128, "step": 15230 }, { "epoch": 4.56, "grad_norm": 1.893906831741333, "learning_rate": 4.386362746212435e-05, "loss": 1.2714, "step": 15235 }, { "epoch": 4.56, "grad_norm": 2.4002392292022705, "learning_rate": 4.385977134484399e-05, "loss": 1.2763, "step": 15240 }, { "epoch": 4.56, "grad_norm": 1.7395859956741333, "learning_rate": 4.385591418595807e-05, "loss": 1.2394, "step": 15245 }, { "epoch": 4.56, "grad_norm": 2.103240966796875, "learning_rate": 4.38520559856796e-05, "loss": 1.2259, "step": 15250 }, { "epoch": 4.56, "grad_norm": 1.817021369934082, "learning_rate": 4.384819674422168e-05, "loss": 1.4099, "step": 15255 }, { "epoch": 4.57, "grad_norm": 3.440264940261841, "learning_rate": 4.3844336461797445e-05, "loss": 1.1517, "step": 15260 }, { "epoch": 4.57, "grad_norm": 2.334442615509033, "learning_rate": 4.384047513862009e-05, "loss": 1.1874, "step": 15265 }, { "epoch": 4.57, "grad_norm": 1.8950352668762207, "learning_rate": 4.383661277490289e-05, "loss": 1.2903, "step": 15270 }, { "epoch": 4.57, "grad_norm": 1.6918821334838867, "learning_rate": 4.3832749370859124e-05, "loss": 1.1923, "step": 15275 }, { "epoch": 4.57, "grad_norm": 2.603799819946289, "learning_rate": 4.38288849267022e-05, "loss": 1.3905, "step": 15280 }, { "epoch": 4.57, "grad_norm": 1.2318238019943237, "learning_rate": 4.3825019442645534e-05, "loss": 1.2528, "step": 15285 }, { "epoch": 4.57, "grad_norm": 2.4402077198028564, "learning_rate": 4.382115291890261e-05, "loss": 1.2203, "step": 15290 }, { "epoch": 4.58, "grad_norm": 1.929358720779419, "learning_rate": 4.3817285355686973e-05, "loss": 1.2942, "step": 15295 }, { "epoch": 4.58, "grad_norm": 2.6186788082122803, "learning_rate": 4.3813416753212224e-05, "loss": 1.1942, "step": 15300 }, { "epoch": 4.58, "grad_norm": 3.274463653564453, "learning_rate": 4.3809547111692024e-05, "loss": 1.3078, "step": 15305 }, { "epoch": 4.58, "grad_norm": 3.3684141635894775, "learning_rate": 4.380567643134009e-05, "loss": 1.2054, "step": 15310 }, { "epoch": 4.58, "grad_norm": 0.9591112732887268, "learning_rate": 4.380180471237019e-05, "loss": 1.1912, "step": 15315 }, { "epoch": 4.58, "grad_norm": 3.8308017253875732, "learning_rate": 4.379793195499616e-05, "loss": 1.2198, "step": 15320 }, { "epoch": 4.59, "grad_norm": 1.5418450832366943, "learning_rate": 4.3794058159431895e-05, "loss": 1.3401, "step": 15325 }, { "epoch": 4.59, "grad_norm": 1.2798864841461182, "learning_rate": 4.379018332589132e-05, "loss": 1.0988, "step": 15330 }, { "epoch": 4.59, "grad_norm": 1.1847316026687622, "learning_rate": 4.378630745458846e-05, "loss": 1.378, "step": 15335 }, { "epoch": 4.59, "grad_norm": 2.026315450668335, "learning_rate": 4.3782430545737377e-05, "loss": 1.267, "step": 15340 }, { "epoch": 4.59, "grad_norm": 1.8548667430877686, "learning_rate": 4.3778552599552156e-05, "loss": 1.2185, "step": 15345 }, { "epoch": 4.59, "grad_norm": 1.9220659732818604, "learning_rate": 4.377467361624701e-05, "loss": 1.2383, "step": 15350 }, { "epoch": 4.59, "grad_norm": 1.5379008054733276, "learning_rate": 4.3770793596036145e-05, "loss": 1.2225, "step": 15355 }, { "epoch": 4.6, "grad_norm": 1.2834293842315674, "learning_rate": 4.3766912539133864e-05, "loss": 1.1713, "step": 15360 }, { "epoch": 4.6, "grad_norm": 1.6671650409698486, "learning_rate": 4.3763030445754514e-05, "loss": 1.3282, "step": 15365 }, { "epoch": 4.6, "grad_norm": 1.0815520286560059, "learning_rate": 4.37591473161125e-05, "loss": 1.3631, "step": 15370 }, { "epoch": 4.6, "grad_norm": 2.721876859664917, "learning_rate": 4.375526315042227e-05, "loss": 1.1959, "step": 15375 }, { "epoch": 4.6, "grad_norm": 1.370292067527771, "learning_rate": 4.3751377948898356e-05, "loss": 1.2007, "step": 15380 }, { "epoch": 4.6, "grad_norm": 2.0268495082855225, "learning_rate": 4.374749171175533e-05, "loss": 1.243, "step": 15385 }, { "epoch": 4.6, "grad_norm": 1.4112547636032104, "learning_rate": 4.3743604439207817e-05, "loss": 1.1799, "step": 15390 }, { "epoch": 4.61, "grad_norm": 1.6107220649719238, "learning_rate": 4.373971613147051e-05, "loss": 1.3369, "step": 15395 }, { "epoch": 4.61, "grad_norm": 2.6075730323791504, "learning_rate": 4.373582678875817e-05, "loss": 1.2638, "step": 15400 }, { "epoch": 4.61, "grad_norm": 1.0856952667236328, "learning_rate": 4.373193641128559e-05, "loss": 1.2009, "step": 15405 }, { "epoch": 4.61, "grad_norm": 2.2727081775665283, "learning_rate": 4.372804499926762e-05, "loss": 1.2948, "step": 15410 }, { "epoch": 4.61, "grad_norm": 1.7927814722061157, "learning_rate": 4.3724152552919214e-05, "loss": 1.1401, "step": 15415 }, { "epoch": 4.61, "grad_norm": 2.271836996078491, "learning_rate": 4.372025907245532e-05, "loss": 1.2022, "step": 15420 }, { "epoch": 4.61, "grad_norm": 2.37410306930542, "learning_rate": 4.371636455809096e-05, "loss": 1.2243, "step": 15425 }, { "epoch": 4.62, "grad_norm": 1.7014080286026, "learning_rate": 4.371246901004125e-05, "loss": 1.2851, "step": 15430 }, { "epoch": 4.62, "grad_norm": 2.186640977859497, "learning_rate": 4.3708572428521334e-05, "loss": 1.4657, "step": 15435 }, { "epoch": 4.62, "grad_norm": 2.204148769378662, "learning_rate": 4.370467481374639e-05, "loss": 1.1876, "step": 15440 }, { "epoch": 4.62, "grad_norm": 1.9742987155914307, "learning_rate": 4.3700776165931716e-05, "loss": 1.2819, "step": 15445 }, { "epoch": 4.62, "grad_norm": 1.3668123483657837, "learning_rate": 4.369687648529261e-05, "loss": 1.2395, "step": 15450 }, { "epoch": 4.62, "grad_norm": 3.831627368927002, "learning_rate": 4.3692975772044444e-05, "loss": 1.1898, "step": 15455 }, { "epoch": 4.63, "grad_norm": 1.8598977327346802, "learning_rate": 4.3689074026402665e-05, "loss": 1.2271, "step": 15460 }, { "epoch": 4.63, "grad_norm": 1.0298603773117065, "learning_rate": 4.3685171248582744e-05, "loss": 1.2812, "step": 15465 }, { "epoch": 4.63, "grad_norm": 2.9451205730438232, "learning_rate": 4.368126743880024e-05, "loss": 1.2084, "step": 15470 }, { "epoch": 4.63, "grad_norm": 1.8287848234176636, "learning_rate": 4.367736259727076e-05, "loss": 1.2462, "step": 15475 }, { "epoch": 4.63, "grad_norm": 1.9537088871002197, "learning_rate": 4.367345672420995e-05, "loss": 1.2031, "step": 15480 }, { "epoch": 4.63, "grad_norm": 2.9025750160217285, "learning_rate": 4.366954981983354e-05, "loss": 1.4563, "step": 15485 }, { "epoch": 4.63, "grad_norm": 1.587408185005188, "learning_rate": 4.3665641884357294e-05, "loss": 1.1538, "step": 15490 }, { "epoch": 4.64, "grad_norm": 1.1759958267211914, "learning_rate": 4.366173291799705e-05, "loss": 1.2645, "step": 15495 }, { "epoch": 4.64, "grad_norm": 4.82040548324585, "learning_rate": 4.3657822920968706e-05, "loss": 1.1618, "step": 15500 }, { "epoch": 4.64, "grad_norm": 0.7620186805725098, "learning_rate": 4.365391189348818e-05, "loss": 1.3226, "step": 15505 }, { "epoch": 4.64, "grad_norm": 1.329471230506897, "learning_rate": 4.3649999835771496e-05, "loss": 1.2517, "step": 15510 }, { "epoch": 4.64, "grad_norm": 1.040663480758667, "learning_rate": 4.364608674803471e-05, "loss": 1.2898, "step": 15515 }, { "epoch": 4.64, "grad_norm": 2.1228902339935303, "learning_rate": 4.3642172630493925e-05, "loss": 1.2581, "step": 15520 }, { "epoch": 4.64, "grad_norm": 1.2862385511398315, "learning_rate": 4.3638257483365336e-05, "loss": 1.2414, "step": 15525 }, { "epoch": 4.65, "grad_norm": 1.5629061460494995, "learning_rate": 4.363434130686515e-05, "loss": 1.201, "step": 15530 }, { "epoch": 4.65, "grad_norm": 1.894118309020996, "learning_rate": 4.3630424101209663e-05, "loss": 1.2576, "step": 15535 }, { "epoch": 4.65, "grad_norm": 4.182260513305664, "learning_rate": 4.3626505866615224e-05, "loss": 1.1643, "step": 15540 }, { "epoch": 4.65, "grad_norm": 2.6900877952575684, "learning_rate": 4.362258660329822e-05, "loss": 1.1906, "step": 15545 }, { "epoch": 4.65, "grad_norm": 9.078900337219238, "learning_rate": 4.361866631147512e-05, "loss": 1.2331, "step": 15550 }, { "epoch": 4.65, "grad_norm": 3.6981396675109863, "learning_rate": 4.361474499136243e-05, "loss": 1.3355, "step": 15555 }, { "epoch": 4.66, "grad_norm": 1.7250741720199585, "learning_rate": 4.361082264317673e-05, "loss": 1.3161, "step": 15560 }, { "epoch": 4.66, "grad_norm": 2.3266186714172363, "learning_rate": 4.360689926713464e-05, "loss": 1.3587, "step": 15565 }, { "epoch": 4.66, "grad_norm": 1.8503769636154175, "learning_rate": 4.3602974863452835e-05, "loss": 1.3439, "step": 15570 }, { "epoch": 4.66, "grad_norm": 2.670262098312378, "learning_rate": 4.3599049432348074e-05, "loss": 1.2923, "step": 15575 }, { "epoch": 4.66, "grad_norm": 1.2793365716934204, "learning_rate": 4.359512297403714e-05, "loss": 1.4055, "step": 15580 }, { "epoch": 4.66, "grad_norm": 1.302943468093872, "learning_rate": 4.359119548873689e-05, "loss": 1.2158, "step": 15585 }, { "epoch": 4.66, "grad_norm": 1.2726792097091675, "learning_rate": 4.358726697666424e-05, "loss": 1.2166, "step": 15590 }, { "epoch": 4.67, "grad_norm": 2.570844888687134, "learning_rate": 4.358333743803616e-05, "loss": 1.1484, "step": 15595 }, { "epoch": 4.67, "grad_norm": 2.5889105796813965, "learning_rate": 4.3579406873069664e-05, "loss": 1.31, "step": 15600 }, { "epoch": 4.67, "grad_norm": 2.399143695831299, "learning_rate": 4.3575475281981844e-05, "loss": 1.2022, "step": 15605 }, { "epoch": 4.67, "grad_norm": 0.9160411357879639, "learning_rate": 4.357154266498983e-05, "loss": 1.2351, "step": 15610 }, { "epoch": 4.67, "grad_norm": 2.3706166744232178, "learning_rate": 4.356760902231082e-05, "loss": 1.3131, "step": 15615 }, { "epoch": 4.67, "grad_norm": 3.5267927646636963, "learning_rate": 4.356367435416205e-05, "loss": 1.4429, "step": 15620 }, { "epoch": 4.67, "grad_norm": 2.2916717529296875, "learning_rate": 4.3559738660760854e-05, "loss": 1.4017, "step": 15625 }, { "epoch": 4.68, "grad_norm": 1.031050205230713, "learning_rate": 4.3555801942324584e-05, "loss": 1.4196, "step": 15630 }, { "epoch": 4.68, "grad_norm": 1.4319988489151, "learning_rate": 4.355186419907066e-05, "loss": 1.1985, "step": 15635 }, { "epoch": 4.68, "grad_norm": 5.716891765594482, "learning_rate": 4.354792543121656e-05, "loss": 1.2971, "step": 15640 }, { "epoch": 4.68, "grad_norm": 2.955399751663208, "learning_rate": 4.3543985638979816e-05, "loss": 1.3152, "step": 15645 }, { "epoch": 4.68, "grad_norm": 3.042884349822998, "learning_rate": 4.354004482257802e-05, "loss": 1.2771, "step": 15650 }, { "epoch": 4.68, "grad_norm": 1.6653623580932617, "learning_rate": 4.353610298222882e-05, "loss": 1.3533, "step": 15655 }, { "epoch": 4.69, "grad_norm": 1.1561282873153687, "learning_rate": 4.353216011814992e-05, "loss": 1.2868, "step": 15660 }, { "epoch": 4.69, "grad_norm": 1.7067124843597412, "learning_rate": 4.352821623055908e-05, "loss": 1.2304, "step": 15665 }, { "epoch": 4.69, "grad_norm": 1.450724720954895, "learning_rate": 4.352427131967412e-05, "loss": 1.1762, "step": 15670 }, { "epoch": 4.69, "grad_norm": 2.304478406906128, "learning_rate": 4.3520325385712904e-05, "loss": 1.1929, "step": 15675 }, { "epoch": 4.69, "grad_norm": 1.6809667348861694, "learning_rate": 4.3516378428893366e-05, "loss": 1.238, "step": 15680 }, { "epoch": 4.69, "grad_norm": 1.2087801694869995, "learning_rate": 4.35124304494335e-05, "loss": 1.2465, "step": 15685 }, { "epoch": 4.69, "grad_norm": 1.7735761404037476, "learning_rate": 4.350848144755134e-05, "loss": 1.2547, "step": 15690 }, { "epoch": 4.7, "grad_norm": 2.593214988708496, "learning_rate": 4.3504531423464996e-05, "loss": 1.2919, "step": 15695 }, { "epoch": 4.7, "grad_norm": 2.0241143703460693, "learning_rate": 4.350058037739261e-05, "loss": 1.3598, "step": 15700 }, { "epoch": 4.7, "grad_norm": 1.4365257024765015, "learning_rate": 4.3496628309552395e-05, "loss": 1.2709, "step": 15705 }, { "epoch": 4.7, "grad_norm": 1.55208420753479, "learning_rate": 4.349267522016263e-05, "loss": 1.2687, "step": 15710 }, { "epoch": 4.7, "grad_norm": 1.5196239948272705, "learning_rate": 4.348872110944163e-05, "loss": 1.3805, "step": 15715 }, { "epoch": 4.7, "grad_norm": 2.587491750717163, "learning_rate": 4.348476597760779e-05, "loss": 1.1504, "step": 15720 }, { "epoch": 4.7, "grad_norm": 1.250502586364746, "learning_rate": 4.348080982487953e-05, "loss": 1.351, "step": 15725 }, { "epoch": 4.71, "grad_norm": 1.627832055091858, "learning_rate": 4.347685265147536e-05, "loss": 1.2934, "step": 15730 }, { "epoch": 4.71, "grad_norm": 1.428882360458374, "learning_rate": 4.347289445761382e-05, "loss": 1.3566, "step": 15735 }, { "epoch": 4.71, "grad_norm": 1.8175216913223267, "learning_rate": 4.346893524351352e-05, "loss": 1.3705, "step": 15740 }, { "epoch": 4.71, "grad_norm": 2.059349536895752, "learning_rate": 4.3464975009393124e-05, "loss": 1.2061, "step": 15745 }, { "epoch": 4.71, "grad_norm": 2.046034336090088, "learning_rate": 4.3461013755471354e-05, "loss": 1.0747, "step": 15750 }, { "epoch": 4.71, "grad_norm": 1.8597091436386108, "learning_rate": 4.345705148196698e-05, "loss": 1.2378, "step": 15755 }, { "epoch": 4.72, "grad_norm": 1.176646113395691, "learning_rate": 4.345308818909884e-05, "loss": 1.4726, "step": 15760 }, { "epoch": 4.72, "grad_norm": 3.254624605178833, "learning_rate": 4.344912387708582e-05, "loss": 1.2737, "step": 15765 }, { "epoch": 4.72, "grad_norm": 1.0571980476379395, "learning_rate": 4.344515854614686e-05, "loss": 1.1995, "step": 15770 }, { "epoch": 4.72, "grad_norm": 2.313420057296753, "learning_rate": 4.3441192196500976e-05, "loss": 1.1803, "step": 15775 }, { "epoch": 4.72, "grad_norm": 1.746813416481018, "learning_rate": 4.3437224828367205e-05, "loss": 1.3353, "step": 15780 }, { "epoch": 4.72, "grad_norm": 1.8490636348724365, "learning_rate": 4.343325644196468e-05, "loss": 1.1052, "step": 15785 }, { "epoch": 4.72, "grad_norm": 1.9969300031661987, "learning_rate": 4.342928703751256e-05, "loss": 1.1735, "step": 15790 }, { "epoch": 4.73, "grad_norm": 1.4050122499465942, "learning_rate": 4.342531661523007e-05, "loss": 1.1179, "step": 15795 }, { "epoch": 4.73, "grad_norm": 1.8106729984283447, "learning_rate": 4.34213451753365e-05, "loss": 1.4247, "step": 15800 }, { "epoch": 4.73, "grad_norm": 1.9320409297943115, "learning_rate": 4.341737271805118e-05, "loss": 1.2531, "step": 15805 }, { "epoch": 4.73, "grad_norm": 2.287996292114258, "learning_rate": 4.3413399243593513e-05, "loss": 1.3139, "step": 15810 }, { "epoch": 4.73, "grad_norm": 0.8202207088470459, "learning_rate": 4.3409424752182934e-05, "loss": 1.1417, "step": 15815 }, { "epoch": 4.73, "grad_norm": 1.7704706192016602, "learning_rate": 4.340544924403898e-05, "loss": 1.307, "step": 15820 }, { "epoch": 4.73, "grad_norm": 2.2683053016662598, "learning_rate": 4.340147271938118e-05, "loss": 1.3663, "step": 15825 }, { "epoch": 4.74, "grad_norm": 2.761477470397949, "learning_rate": 4.339749517842917e-05, "loss": 1.3196, "step": 15830 }, { "epoch": 4.74, "grad_norm": 2.3123843669891357, "learning_rate": 4.339351662140263e-05, "loss": 1.1369, "step": 15835 }, { "epoch": 4.74, "grad_norm": 3.907461166381836, "learning_rate": 4.338953704852128e-05, "loss": 1.2576, "step": 15840 }, { "epoch": 4.74, "grad_norm": 1.5930169820785522, "learning_rate": 4.3385556460004925e-05, "loss": 1.2911, "step": 15845 }, { "epoch": 4.74, "grad_norm": 3.523927688598633, "learning_rate": 4.338157485607339e-05, "loss": 1.1452, "step": 15850 }, { "epoch": 4.74, "grad_norm": 1.7907129526138306, "learning_rate": 4.3377592236946585e-05, "loss": 1.3091, "step": 15855 }, { "epoch": 4.75, "grad_norm": 1.3438159227371216, "learning_rate": 4.337360860284446e-05, "loss": 1.2201, "step": 15860 }, { "epoch": 4.75, "grad_norm": 3.469675064086914, "learning_rate": 4.336962395398703e-05, "loss": 1.147, "step": 15865 }, { "epoch": 4.75, "grad_norm": 1.553625464439392, "learning_rate": 4.3365638290594366e-05, "loss": 1.4353, "step": 15870 }, { "epoch": 4.75, "grad_norm": 2.0750131607055664, "learning_rate": 4.336165161288659e-05, "loss": 1.2967, "step": 15875 }, { "epoch": 4.75, "grad_norm": 2.1773085594177246, "learning_rate": 4.335766392108387e-05, "loss": 1.2372, "step": 15880 }, { "epoch": 4.75, "grad_norm": 2.4969215393066406, "learning_rate": 4.3353675215406465e-05, "loss": 1.2196, "step": 15885 }, { "epoch": 4.75, "grad_norm": 1.2031995058059692, "learning_rate": 4.334968549607465e-05, "loss": 1.1849, "step": 15890 }, { "epoch": 4.76, "grad_norm": 2.3058178424835205, "learning_rate": 4.3345694763308786e-05, "loss": 1.2714, "step": 15895 }, { "epoch": 4.76, "grad_norm": 1.7053008079528809, "learning_rate": 4.334170301732926e-05, "loss": 1.2352, "step": 15900 }, { "epoch": 4.76, "grad_norm": 2.5460364818573, "learning_rate": 4.333771025835655e-05, "loss": 1.1808, "step": 15905 }, { "epoch": 4.76, "grad_norm": 1.2482750415802002, "learning_rate": 4.333371648661116e-05, "loss": 1.2871, "step": 15910 }, { "epoch": 4.76, "grad_norm": 2.138806104660034, "learning_rate": 4.3329721702313665e-05, "loss": 1.3245, "step": 15915 }, { "epoch": 4.76, "grad_norm": 1.7979081869125366, "learning_rate": 4.332572590568469e-05, "loss": 1.2463, "step": 15920 }, { "epoch": 4.76, "grad_norm": 0.921890914440155, "learning_rate": 4.332172909694493e-05, "loss": 1.34, "step": 15925 }, { "epoch": 4.77, "grad_norm": 2.6406264305114746, "learning_rate": 4.331773127631511e-05, "loss": 1.195, "step": 15930 }, { "epoch": 4.77, "grad_norm": 2.4825634956359863, "learning_rate": 4.331373244401603e-05, "loss": 1.144, "step": 15935 }, { "epoch": 4.77, "grad_norm": 1.4051223993301392, "learning_rate": 4.330973260026855e-05, "loss": 1.3728, "step": 15940 }, { "epoch": 4.77, "grad_norm": 1.1359541416168213, "learning_rate": 4.330573174529356e-05, "loss": 1.3258, "step": 15945 }, { "epoch": 4.77, "grad_norm": 1.348656177520752, "learning_rate": 4.330172987931205e-05, "loss": 1.251, "step": 15950 }, { "epoch": 4.77, "grad_norm": 2.296485185623169, "learning_rate": 4.3297727002545006e-05, "loss": 1.1964, "step": 15955 }, { "epoch": 4.78, "grad_norm": 1.3783894777297974, "learning_rate": 4.329372311521353e-05, "loss": 1.261, "step": 15960 }, { "epoch": 4.78, "grad_norm": 3.1686336994171143, "learning_rate": 4.328971821753873e-05, "loss": 1.31, "step": 15965 }, { "epoch": 4.78, "grad_norm": 2.1778533458709717, "learning_rate": 4.328571230974181e-05, "loss": 1.1521, "step": 15970 }, { "epoch": 4.78, "grad_norm": 1.734237790107727, "learning_rate": 4.328170539204401e-05, "loss": 1.392, "step": 15975 }, { "epoch": 4.78, "grad_norm": 2.2675347328186035, "learning_rate": 4.327769746466662e-05, "loss": 1.3108, "step": 15980 }, { "epoch": 4.78, "grad_norm": 1.0224782228469849, "learning_rate": 4.3273688527831e-05, "loss": 1.2305, "step": 15985 }, { "epoch": 4.78, "grad_norm": 2.3099098205566406, "learning_rate": 4.3269678581758556e-05, "loss": 1.1456, "step": 15990 }, { "epoch": 4.79, "grad_norm": 3.697842836380005, "learning_rate": 4.326566762667075e-05, "loss": 1.161, "step": 15995 }, { "epoch": 4.79, "grad_norm": 1.7592895030975342, "learning_rate": 4.3261655662789115e-05, "loss": 1.3616, "step": 16000 }, { "epoch": 4.79, "grad_norm": 2.622274160385132, "learning_rate": 4.325764269033521e-05, "loss": 1.2472, "step": 16005 }, { "epoch": 4.79, "grad_norm": 1.370152473449707, "learning_rate": 4.325362870953069e-05, "loss": 1.2645, "step": 16010 }, { "epoch": 4.79, "grad_norm": 1.3675168752670288, "learning_rate": 4.324961372059722e-05, "loss": 1.2626, "step": 16015 }, { "epoch": 4.79, "grad_norm": 2.024503231048584, "learning_rate": 4.324559772375656e-05, "loss": 1.3285, "step": 16020 }, { "epoch": 4.79, "grad_norm": 1.5145565271377563, "learning_rate": 4.32415807192305e-05, "loss": 1.287, "step": 16025 }, { "epoch": 4.8, "grad_norm": 3.959343671798706, "learning_rate": 4.3237562707240897e-05, "loss": 1.2395, "step": 16030 }, { "epoch": 4.8, "grad_norm": 1.6277161836624146, "learning_rate": 4.3233543688009657e-05, "loss": 1.1997, "step": 16035 }, { "epoch": 4.8, "grad_norm": 1.912680983543396, "learning_rate": 4.322952366175876e-05, "loss": 1.2068, "step": 16040 }, { "epoch": 4.8, "grad_norm": 3.4466264247894287, "learning_rate": 4.322550262871021e-05, "loss": 1.2512, "step": 16045 }, { "epoch": 4.8, "grad_norm": 1.7129842042922974, "learning_rate": 4.3221480589086104e-05, "loss": 1.1405, "step": 16050 }, { "epoch": 4.8, "grad_norm": 1.4409843683242798, "learning_rate": 4.321745754310856e-05, "loss": 1.3464, "step": 16055 }, { "epoch": 4.8, "grad_norm": 1.8235528469085693, "learning_rate": 4.321343349099978e-05, "loss": 1.2919, "step": 16060 }, { "epoch": 4.81, "grad_norm": 1.156563639640808, "learning_rate": 4.3210213525047607e-05, "loss": 1.2472, "step": 16065 }, { "epoch": 4.81, "grad_norm": 2.0325233936309814, "learning_rate": 4.320618766246267e-05, "loss": 1.3308, "step": 16070 }, { "epoch": 4.81, "grad_norm": 1.3709142208099365, "learning_rate": 4.320216079436892e-05, "loss": 1.3726, "step": 16075 }, { "epoch": 4.81, "grad_norm": 2.0729141235351562, "learning_rate": 4.3198132920988746e-05, "loss": 1.2768, "step": 16080 }, { "epoch": 4.81, "grad_norm": 2.903198480606079, "learning_rate": 4.3194104042544606e-05, "loss": 1.1628, "step": 16085 }, { "epoch": 4.81, "grad_norm": 2.467470645904541, "learning_rate": 4.319007415925901e-05, "loss": 1.2066, "step": 16090 }, { "epoch": 4.82, "grad_norm": 1.2951477766036987, "learning_rate": 4.3186043271354534e-05, "loss": 1.3537, "step": 16095 }, { "epoch": 4.82, "grad_norm": 1.79331374168396, "learning_rate": 4.318201137905379e-05, "loss": 1.1477, "step": 16100 }, { "epoch": 4.82, "grad_norm": 2.5787532329559326, "learning_rate": 4.317797848257945e-05, "loss": 1.2367, "step": 16105 }, { "epoch": 4.82, "grad_norm": 2.574690580368042, "learning_rate": 4.3173944582154256e-05, "loss": 1.2734, "step": 16110 }, { "epoch": 4.82, "grad_norm": 3.0416769981384277, "learning_rate": 4.316990967800099e-05, "loss": 1.2573, "step": 16115 }, { "epoch": 4.82, "grad_norm": 1.79559326171875, "learning_rate": 4.31658737703425e-05, "loss": 1.1909, "step": 16120 }, { "epoch": 4.82, "grad_norm": 3.641195774078369, "learning_rate": 4.31618368594017e-05, "loss": 1.2127, "step": 16125 }, { "epoch": 4.83, "grad_norm": 2.858097553253174, "learning_rate": 4.315779894540151e-05, "loss": 1.1407, "step": 16130 }, { "epoch": 4.83, "grad_norm": 2.4541454315185547, "learning_rate": 4.315376002856496e-05, "loss": 1.2637, "step": 16135 }, { "epoch": 4.83, "grad_norm": 4.378109455108643, "learning_rate": 4.3149720109115116e-05, "loss": 1.3395, "step": 16140 }, { "epoch": 4.83, "grad_norm": 4.454669952392578, "learning_rate": 4.31456791872751e-05, "loss": 1.2609, "step": 16145 }, { "epoch": 4.83, "grad_norm": 0.9385771751403809, "learning_rate": 4.3141637263268074e-05, "loss": 1.4461, "step": 16150 }, { "epoch": 4.83, "grad_norm": 4.431612014770508, "learning_rate": 4.313759433731729e-05, "loss": 1.2072, "step": 16155 }, { "epoch": 4.83, "grad_norm": 3.2460215091705322, "learning_rate": 4.313355040964602e-05, "loss": 1.3532, "step": 16160 }, { "epoch": 4.84, "grad_norm": 1.6554932594299316, "learning_rate": 4.312950548047761e-05, "loss": 1.4051, "step": 16165 }, { "epoch": 4.84, "grad_norm": 1.5239330530166626, "learning_rate": 4.3125459550035454e-05, "loss": 1.2129, "step": 16170 }, { "epoch": 4.84, "grad_norm": 1.9607256650924683, "learning_rate": 4.3121412618543014e-05, "loss": 1.1648, "step": 16175 }, { "epoch": 4.84, "grad_norm": 1.0055090188980103, "learning_rate": 4.311736468622378e-05, "loss": 1.398, "step": 16180 }, { "epoch": 4.84, "grad_norm": 1.6140061616897583, "learning_rate": 4.311331575330134e-05, "loss": 1.3406, "step": 16185 }, { "epoch": 4.84, "grad_norm": 8.580751419067383, "learning_rate": 4.310926581999929e-05, "loss": 1.285, "step": 16190 }, { "epoch": 4.85, "grad_norm": 2.2242555618286133, "learning_rate": 4.3105214886541315e-05, "loss": 1.1491, "step": 16195 }, { "epoch": 4.85, "grad_norm": 4.345371246337891, "learning_rate": 4.310116295315114e-05, "loss": 1.1643, "step": 16200 }, { "epoch": 4.85, "grad_norm": 3.590310573577881, "learning_rate": 4.309711002005255e-05, "loss": 1.2797, "step": 16205 }, { "epoch": 4.85, "grad_norm": 1.9311707019805908, "learning_rate": 4.309305608746939e-05, "loss": 1.287, "step": 16210 }, { "epoch": 4.85, "grad_norm": 2.0158779621124268, "learning_rate": 4.308900115562554e-05, "loss": 1.222, "step": 16215 }, { "epoch": 4.85, "grad_norm": 0.8616341352462769, "learning_rate": 4.308494522474496e-05, "loss": 1.2951, "step": 16220 }, { "epoch": 4.85, "grad_norm": 1.497802495956421, "learning_rate": 4.308088829505166e-05, "loss": 1.1485, "step": 16225 }, { "epoch": 4.86, "grad_norm": 1.7404403686523438, "learning_rate": 4.307683036676968e-05, "loss": 1.2864, "step": 16230 }, { "epoch": 4.86, "grad_norm": 3.3228366374969482, "learning_rate": 4.3072771440123164e-05, "loss": 1.2396, "step": 16235 }, { "epoch": 4.86, "grad_norm": 1.6669496297836304, "learning_rate": 4.3068711515336254e-05, "loss": 1.1937, "step": 16240 }, { "epoch": 4.86, "grad_norm": 2.0878639221191406, "learning_rate": 4.3064650592633206e-05, "loss": 1.2871, "step": 16245 }, { "epoch": 4.86, "grad_norm": 1.9474278688430786, "learning_rate": 4.3060588672238266e-05, "loss": 1.264, "step": 16250 }, { "epoch": 4.86, "grad_norm": 1.4628163576126099, "learning_rate": 4.30565257543758e-05, "loss": 1.2895, "step": 16255 }, { "epoch": 4.86, "grad_norm": 2.5341408252716064, "learning_rate": 4.3052461839270176e-05, "loss": 1.2429, "step": 16260 }, { "epoch": 4.87, "grad_norm": 2.7245945930480957, "learning_rate": 4.3048396927145854e-05, "loss": 1.3481, "step": 16265 }, { "epoch": 4.87, "grad_norm": 1.005662202835083, "learning_rate": 4.3044331018227324e-05, "loss": 1.1808, "step": 16270 }, { "epoch": 4.87, "grad_norm": 0.959071695804596, "learning_rate": 4.304026411273915e-05, "loss": 1.2639, "step": 16275 }, { "epoch": 4.87, "grad_norm": 1.850095272064209, "learning_rate": 4.303619621090594e-05, "loss": 1.319, "step": 16280 }, { "epoch": 4.87, "grad_norm": 2.577181816101074, "learning_rate": 4.3032127312952367e-05, "loss": 1.2413, "step": 16285 }, { "epoch": 4.87, "grad_norm": 2.0503063201904297, "learning_rate": 4.302805741910314e-05, "loss": 1.1287, "step": 16290 }, { "epoch": 4.88, "grad_norm": 2.769632339477539, "learning_rate": 4.3023986529583046e-05, "loss": 1.2283, "step": 16295 }, { "epoch": 4.88, "grad_norm": 2.0074775218963623, "learning_rate": 4.3019914644616904e-05, "loss": 1.1908, "step": 16300 }, { "epoch": 4.88, "grad_norm": 1.1852775812149048, "learning_rate": 4.301584176442961e-05, "loss": 1.314, "step": 16305 }, { "epoch": 4.88, "grad_norm": 2.0226988792419434, "learning_rate": 4.3011767889246105e-05, "loss": 1.2051, "step": 16310 }, { "epoch": 4.88, "grad_norm": 2.0487165451049805, "learning_rate": 4.300769301929138e-05, "loss": 1.1422, "step": 16315 }, { "epoch": 4.88, "grad_norm": 2.2297685146331787, "learning_rate": 4.300361715479049e-05, "loss": 1.3844, "step": 16320 }, { "epoch": 4.88, "grad_norm": 1.5562113523483276, "learning_rate": 4.2999540295968535e-05, "loss": 1.2164, "step": 16325 }, { "epoch": 4.89, "grad_norm": 1.00557541847229, "learning_rate": 4.2995462443050674e-05, "loss": 1.1486, "step": 16330 }, { "epoch": 4.89, "grad_norm": 1.7114874124526978, "learning_rate": 4.299138359626213e-05, "loss": 1.3049, "step": 16335 }, { "epoch": 4.89, "grad_norm": 1.610742449760437, "learning_rate": 4.2987303755828176e-05, "loss": 1.1456, "step": 16340 }, { "epoch": 4.89, "grad_norm": 2.69520902633667, "learning_rate": 4.298322292197413e-05, "loss": 1.2903, "step": 16345 }, { "epoch": 4.89, "grad_norm": 1.4834461212158203, "learning_rate": 4.2979141094925365e-05, "loss": 1.1353, "step": 16350 }, { "epoch": 4.89, "grad_norm": 2.4924049377441406, "learning_rate": 4.297505827490734e-05, "loss": 1.2812, "step": 16355 }, { "epoch": 4.89, "grad_norm": 1.1296648979187012, "learning_rate": 4.297097446214553e-05, "loss": 1.0921, "step": 16360 }, { "epoch": 4.9, "grad_norm": 0.9421242475509644, "learning_rate": 4.296688965686547e-05, "loss": 1.1506, "step": 16365 }, { "epoch": 4.9, "grad_norm": 1.1938999891281128, "learning_rate": 4.2962803859292776e-05, "loss": 1.243, "step": 16370 }, { "epoch": 4.9, "grad_norm": 2.3676552772521973, "learning_rate": 4.29587170696531e-05, "loss": 1.2123, "step": 16375 }, { "epoch": 4.9, "grad_norm": 1.4618945121765137, "learning_rate": 4.295462928817214e-05, "loss": 1.4101, "step": 16380 }, { "epoch": 4.9, "grad_norm": 1.811956763267517, "learning_rate": 4.2950540515075664e-05, "loss": 1.2196, "step": 16385 }, { "epoch": 4.9, "grad_norm": 2.578814744949341, "learning_rate": 4.294645075058951e-05, "loss": 1.3067, "step": 16390 }, { "epoch": 4.91, "grad_norm": 2.0412240028381348, "learning_rate": 4.294235999493952e-05, "loss": 1.123, "step": 16395 }, { "epoch": 4.91, "grad_norm": 1.7934796810150146, "learning_rate": 4.293826824835164e-05, "loss": 1.1079, "step": 16400 }, { "epoch": 4.91, "grad_norm": 1.1003563404083252, "learning_rate": 4.293417551105186e-05, "loss": 1.2645, "step": 16405 }, { "epoch": 4.91, "grad_norm": 2.166717767715454, "learning_rate": 4.29300817832662e-05, "loss": 1.1873, "step": 16410 }, { "epoch": 4.91, "grad_norm": 2.064626455307007, "learning_rate": 4.292598706522075e-05, "loss": 1.3719, "step": 16415 }, { "epoch": 4.91, "grad_norm": 2.102949857711792, "learning_rate": 4.2921891357141686e-05, "loss": 1.3074, "step": 16420 }, { "epoch": 4.91, "grad_norm": 2.0334112644195557, "learning_rate": 4.2917794659255183e-05, "loss": 1.3698, "step": 16425 }, { "epoch": 4.92, "grad_norm": 1.0431818962097168, "learning_rate": 4.29136969717875e-05, "loss": 1.2163, "step": 16430 }, { "epoch": 4.92, "grad_norm": 1.496396541595459, "learning_rate": 4.290959829496497e-05, "loss": 1.3081, "step": 16435 }, { "epoch": 4.92, "grad_norm": 1.6219655275344849, "learning_rate": 4.290549862901393e-05, "loss": 1.2083, "step": 16440 }, { "epoch": 4.92, "grad_norm": 1.4473518133163452, "learning_rate": 4.290139797416081e-05, "loss": 1.1601, "step": 16445 }, { "epoch": 4.92, "grad_norm": 1.5219734907150269, "learning_rate": 4.28972963306321e-05, "loss": 1.0509, "step": 16450 }, { "epoch": 4.92, "grad_norm": 3.6221630573272705, "learning_rate": 4.28931936986543e-05, "loss": 1.2632, "step": 16455 }, { "epoch": 4.92, "grad_norm": 1.292728066444397, "learning_rate": 4.2889090078454016e-05, "loss": 1.3043, "step": 16460 }, { "epoch": 4.93, "grad_norm": 2.52746844291687, "learning_rate": 4.2884985470257885e-05, "loss": 1.2606, "step": 16465 }, { "epoch": 4.93, "grad_norm": 2.0615930557250977, "learning_rate": 4.288087987429261e-05, "loss": 1.4306, "step": 16470 }, { "epoch": 4.93, "grad_norm": 2.1873435974121094, "learning_rate": 4.287677329078491e-05, "loss": 1.3791, "step": 16475 }, { "epoch": 4.93, "grad_norm": 0.9649629592895508, "learning_rate": 4.2872665719961605e-05, "loss": 1.2359, "step": 16480 }, { "epoch": 4.93, "grad_norm": 2.3767576217651367, "learning_rate": 4.2868557162049546e-05, "loss": 1.3599, "step": 16485 }, { "epoch": 4.93, "grad_norm": 4.670454025268555, "learning_rate": 4.286444761727566e-05, "loss": 1.1474, "step": 16490 }, { "epoch": 4.94, "grad_norm": 3.1629886627197266, "learning_rate": 4.286033708586689e-05, "loss": 1.2123, "step": 16495 }, { "epoch": 4.94, "grad_norm": 1.4645919799804688, "learning_rate": 4.2856225568050266e-05, "loss": 1.4218, "step": 16500 }, { "epoch": 4.94, "grad_norm": 1.6503212451934814, "learning_rate": 4.2852113064052874e-05, "loss": 1.2461, "step": 16505 }, { "epoch": 4.94, "grad_norm": 1.6822096109390259, "learning_rate": 4.2847999574101826e-05, "loss": 1.127, "step": 16510 }, { "epoch": 4.94, "grad_norm": 1.8511792421340942, "learning_rate": 4.284388509842432e-05, "loss": 1.0953, "step": 16515 }, { "epoch": 4.94, "grad_norm": 3.1232521533966064, "learning_rate": 4.2839769637247584e-05, "loss": 1.1931, "step": 16520 }, { "epoch": 4.94, "grad_norm": 1.0340989828109741, "learning_rate": 4.283565319079892e-05, "loss": 1.2855, "step": 16525 }, { "epoch": 4.95, "grad_norm": 1.5845731496810913, "learning_rate": 4.2831535759305664e-05, "loss": 1.2968, "step": 16530 }, { "epoch": 4.95, "grad_norm": 1.5348531007766724, "learning_rate": 4.282741734299523e-05, "loss": 1.3528, "step": 16535 }, { "epoch": 4.95, "grad_norm": 1.6758315563201904, "learning_rate": 4.2823297942095054e-05, "loss": 1.3494, "step": 16540 }, { "epoch": 4.95, "grad_norm": 2.661583185195923, "learning_rate": 4.2819177556832665e-05, "loss": 1.3549, "step": 16545 }, { "epoch": 4.95, "grad_norm": 1.3939776420593262, "learning_rate": 4.281505618743562e-05, "loss": 1.25, "step": 16550 }, { "epoch": 4.95, "grad_norm": 4.4011616706848145, "learning_rate": 4.281093383413154e-05, "loss": 1.1899, "step": 16555 }, { "epoch": 4.95, "grad_norm": 2.488006830215454, "learning_rate": 4.28068104971481e-05, "loss": 1.153, "step": 16560 }, { "epoch": 4.96, "grad_norm": 2.225281000137329, "learning_rate": 4.2802686176713026e-05, "loss": 1.2394, "step": 16565 }, { "epoch": 4.96, "grad_norm": 0.9629842042922974, "learning_rate": 4.27985608730541e-05, "loss": 1.1729, "step": 16570 }, { "epoch": 4.96, "grad_norm": 2.1733222007751465, "learning_rate": 4.279443458639916e-05, "loss": 1.0447, "step": 16575 }, { "epoch": 4.96, "grad_norm": 2.7707877159118652, "learning_rate": 4.279030731697609e-05, "loss": 1.3554, "step": 16580 }, { "epoch": 4.96, "grad_norm": 1.399963617324829, "learning_rate": 4.278617906501283e-05, "loss": 1.3338, "step": 16585 }, { "epoch": 4.96, "grad_norm": 1.5772522687911987, "learning_rate": 4.2782049830737394e-05, "loss": 1.4114, "step": 16590 }, { "epoch": 4.97, "grad_norm": 2.678196907043457, "learning_rate": 4.277791961437784e-05, "loss": 1.2212, "step": 16595 }, { "epoch": 4.97, "grad_norm": 0.8356932997703552, "learning_rate": 4.277378841616225e-05, "loss": 1.1251, "step": 16600 }, { "epoch": 4.97, "grad_norm": 2.3631153106689453, "learning_rate": 4.276965623631881e-05, "loss": 1.0944, "step": 16605 }, { "epoch": 4.97, "grad_norm": 1.5095417499542236, "learning_rate": 4.276552307507572e-05, "loss": 1.2006, "step": 16610 }, { "epoch": 4.97, "grad_norm": 1.5276120901107788, "learning_rate": 4.2761388932661264e-05, "loss": 1.2489, "step": 16615 }, { "epoch": 4.97, "grad_norm": 2.6862080097198486, "learning_rate": 4.275725380930375e-05, "loss": 1.2177, "step": 16620 }, { "epoch": 4.97, "grad_norm": 3.0933616161346436, "learning_rate": 4.275311770523157e-05, "loss": 1.3473, "step": 16625 }, { "epoch": 4.98, "grad_norm": 2.2336065769195557, "learning_rate": 4.2748980620673155e-05, "loss": 1.4323, "step": 16630 }, { "epoch": 4.98, "grad_norm": 1.266717791557312, "learning_rate": 4.274484255585699e-05, "loss": 1.2058, "step": 16635 }, { "epoch": 4.98, "grad_norm": 3.835813522338867, "learning_rate": 4.274070351101161e-05, "loss": 1.2881, "step": 16640 }, { "epoch": 4.98, "grad_norm": 1.388725757598877, "learning_rate": 4.273656348636562e-05, "loss": 1.1769, "step": 16645 }, { "epoch": 4.98, "grad_norm": 1.5018970966339111, "learning_rate": 4.273242248214767e-05, "loss": 1.2342, "step": 16650 }, { "epoch": 4.98, "grad_norm": 1.3790775537490845, "learning_rate": 4.272828049858645e-05, "loss": 1.2862, "step": 16655 }, { "epoch": 4.98, "grad_norm": 1.3459662199020386, "learning_rate": 4.2724137535910727e-05, "loss": 1.2824, "step": 16660 }, { "epoch": 4.99, "grad_norm": 2.3956856727600098, "learning_rate": 4.2719993594349316e-05, "loss": 1.2141, "step": 16665 }, { "epoch": 4.99, "grad_norm": 4.897758960723877, "learning_rate": 4.271584867413107e-05, "loss": 1.2121, "step": 16670 }, { "epoch": 4.99, "grad_norm": 1.78498113155365, "learning_rate": 4.271170277548492e-05, "loss": 1.4079, "step": 16675 }, { "epoch": 4.99, "grad_norm": 2.1399378776550293, "learning_rate": 4.270755589863983e-05, "loss": 1.205, "step": 16680 }, { "epoch": 4.99, "grad_norm": 2.0815482139587402, "learning_rate": 4.2703408043824845e-05, "loss": 1.2547, "step": 16685 }, { "epoch": 4.99, "grad_norm": 2.381340980529785, "learning_rate": 4.2699259211269025e-05, "loss": 1.3422, "step": 16690 }, { "epoch": 4.99, "grad_norm": 2.151851177215576, "learning_rate": 4.269510940120152e-05, "loss": 1.2764, "step": 16695 }, { "epoch": 5.0, "grad_norm": 1.3695425987243652, "learning_rate": 4.269095861385152e-05, "loss": 1.2128, "step": 16700 }, { "epoch": 5.0, "grad_norm": 2.25834321975708, "learning_rate": 4.268680684944826e-05, "loss": 1.5246, "step": 16705 }, { "epoch": 5.0, "grad_norm": 1.995069980621338, "learning_rate": 4.268265410822104e-05, "loss": 1.3022, "step": 16710 }, { "epoch": 5.0, "grad_norm": 1.8739442825317383, "learning_rate": 4.267850039039922e-05, "loss": 1.1678, "step": 16715 }, { "epoch": 5.0, "grad_norm": 1.2266640663146973, "learning_rate": 4.26743456962122e-05, "loss": 1.1569, "step": 16720 }, { "epoch": 5.0, "grad_norm": 2.092092990875244, "learning_rate": 4.2670190025889436e-05, "loss": 1.3194, "step": 16725 }, { "epoch": 5.01, "grad_norm": 2.1528186798095703, "learning_rate": 4.266603337966044e-05, "loss": 1.2965, "step": 16730 }, { "epoch": 5.01, "grad_norm": 1.078891396522522, "learning_rate": 4.266187575775479e-05, "loss": 1.1757, "step": 16735 }, { "epoch": 5.01, "grad_norm": 2.155930995941162, "learning_rate": 4.265771716040209e-05, "loss": 1.1875, "step": 16740 }, { "epoch": 5.01, "grad_norm": 1.9117164611816406, "learning_rate": 4.265355758783203e-05, "loss": 1.1627, "step": 16745 }, { "epoch": 5.01, "grad_norm": 2.450892210006714, "learning_rate": 4.264939704027434e-05, "loss": 1.3303, "step": 16750 }, { "epoch": 5.01, "grad_norm": 1.585326910018921, "learning_rate": 4.2645235517958796e-05, "loss": 0.9849, "step": 16755 }, { "epoch": 5.01, "grad_norm": 1.436105728149414, "learning_rate": 4.264107302111523e-05, "loss": 1.2115, "step": 16760 }, { "epoch": 5.02, "grad_norm": 3.469156503677368, "learning_rate": 4.2636909549973535e-05, "loss": 1.2543, "step": 16765 }, { "epoch": 5.02, "grad_norm": 1.467289924621582, "learning_rate": 4.263274510476366e-05, "loss": 1.2511, "step": 16770 }, { "epoch": 5.02, "grad_norm": 1.1534559726715088, "learning_rate": 4.262857968571561e-05, "loss": 1.1398, "step": 16775 }, { "epoch": 5.02, "grad_norm": 2.7166788578033447, "learning_rate": 4.262441329305942e-05, "loss": 1.213, "step": 16780 }, { "epoch": 5.02, "grad_norm": 3.0506703853607178, "learning_rate": 4.262024592702521e-05, "loss": 1.2207, "step": 16785 }, { "epoch": 5.02, "grad_norm": 2.2028605937957764, "learning_rate": 4.2616077587843126e-05, "loss": 1.2165, "step": 16790 }, { "epoch": 5.02, "grad_norm": 1.4191491603851318, "learning_rate": 4.26119082757434e-05, "loss": 1.2871, "step": 16795 }, { "epoch": 5.03, "grad_norm": 0.948824942111969, "learning_rate": 4.2607737990956276e-05, "loss": 1.1885, "step": 16800 }, { "epoch": 5.03, "grad_norm": 1.8765445947647095, "learning_rate": 4.260356673371208e-05, "loss": 1.228, "step": 16805 }, { "epoch": 5.03, "grad_norm": 1.0872840881347656, "learning_rate": 4.2599394504241195e-05, "loss": 1.2076, "step": 16810 }, { "epoch": 5.03, "grad_norm": 1.3634966611862183, "learning_rate": 4.259522130277405e-05, "loss": 1.2841, "step": 16815 }, { "epoch": 5.03, "grad_norm": 2.5034995079040527, "learning_rate": 4.259104712954112e-05, "loss": 1.2005, "step": 16820 }, { "epoch": 5.03, "grad_norm": 1.3833112716674805, "learning_rate": 4.2586871984772943e-05, "loss": 1.0861, "step": 16825 }, { "epoch": 5.04, "grad_norm": 2.4615163803100586, "learning_rate": 4.25826958687001e-05, "loss": 1.2638, "step": 16830 }, { "epoch": 5.04, "grad_norm": 3.113060474395752, "learning_rate": 4.257851878155324e-05, "loss": 1.3006, "step": 16835 }, { "epoch": 5.04, "grad_norm": 2.4361579418182373, "learning_rate": 4.257434072356307e-05, "loss": 1.2466, "step": 16840 }, { "epoch": 5.04, "grad_norm": 2.1952314376831055, "learning_rate": 4.257016169496032e-05, "loss": 1.2439, "step": 16845 }, { "epoch": 5.04, "grad_norm": 1.1961103677749634, "learning_rate": 4.256598169597581e-05, "loss": 1.306, "step": 16850 }, { "epoch": 5.04, "grad_norm": 1.3207539319992065, "learning_rate": 4.2561800726840385e-05, "loss": 1.2848, "step": 16855 }, { "epoch": 5.04, "grad_norm": 1.850471019744873, "learning_rate": 4.255761878778496e-05, "loss": 1.3269, "step": 16860 }, { "epoch": 5.05, "grad_norm": 1.6024876832962036, "learning_rate": 4.255343587904051e-05, "loss": 1.2962, "step": 16865 }, { "epoch": 5.05, "grad_norm": 2.1519381999969482, "learning_rate": 4.2549252000838025e-05, "loss": 1.3354, "step": 16870 }, { "epoch": 5.05, "grad_norm": 2.02545166015625, "learning_rate": 4.25450671534086e-05, "loss": 1.2417, "step": 16875 }, { "epoch": 5.05, "grad_norm": 1.6265103816986084, "learning_rate": 4.2540881336983357e-05, "loss": 1.2181, "step": 16880 }, { "epoch": 5.05, "grad_norm": 2.541177988052368, "learning_rate": 4.253669455179347e-05, "loss": 1.2521, "step": 16885 }, { "epoch": 5.05, "grad_norm": 1.0835845470428467, "learning_rate": 4.253250679807017e-05, "loss": 1.138, "step": 16890 }, { "epoch": 5.05, "grad_norm": 1.4941011667251587, "learning_rate": 4.252831807604475e-05, "loss": 1.2487, "step": 16895 }, { "epoch": 5.06, "grad_norm": 1.4814062118530273, "learning_rate": 4.252412838594853e-05, "loss": 1.217, "step": 16900 }, { "epoch": 5.06, "grad_norm": 1.4170145988464355, "learning_rate": 4.251993772801292e-05, "loss": 1.2874, "step": 16905 }, { "epoch": 5.06, "grad_norm": 1.7767305374145508, "learning_rate": 4.251574610246935e-05, "loss": 1.2459, "step": 16910 }, { "epoch": 5.06, "grad_norm": 1.7504329681396484, "learning_rate": 4.251155350954935e-05, "loss": 1.1763, "step": 16915 }, { "epoch": 5.06, "grad_norm": 2.1863224506378174, "learning_rate": 4.250735994948444e-05, "loss": 1.1726, "step": 16920 }, { "epoch": 5.06, "grad_norm": 2.441636085510254, "learning_rate": 4.250316542250624e-05, "loss": 1.2343, "step": 16925 }, { "epoch": 5.07, "grad_norm": 2.9591567516326904, "learning_rate": 4.249896992884641e-05, "loss": 1.2953, "step": 16930 }, { "epoch": 5.07, "grad_norm": 1.866086721420288, "learning_rate": 4.2494773468736654e-05, "loss": 1.1143, "step": 16935 }, { "epoch": 5.07, "grad_norm": 2.113767147064209, "learning_rate": 4.249057604240875e-05, "loss": 1.2088, "step": 16940 }, { "epoch": 5.07, "grad_norm": 1.1835196018218994, "learning_rate": 4.2486377650094505e-05, "loss": 1.1204, "step": 16945 }, { "epoch": 5.07, "grad_norm": 2.831669807434082, "learning_rate": 4.24821782920258e-05, "loss": 1.0658, "step": 16950 }, { "epoch": 5.07, "grad_norm": 2.2646048069000244, "learning_rate": 4.247797796843456e-05, "loss": 1.3129, "step": 16955 }, { "epoch": 5.07, "grad_norm": 1.5404542684555054, "learning_rate": 4.247377667955277e-05, "loss": 1.1941, "step": 16960 }, { "epoch": 5.08, "grad_norm": 1.8242815732955933, "learning_rate": 4.246957442561245e-05, "loss": 1.194, "step": 16965 }, { "epoch": 5.08, "grad_norm": 1.6594616174697876, "learning_rate": 4.246537120684569e-05, "loss": 1.217, "step": 16970 }, { "epoch": 5.08, "grad_norm": 2.1032214164733887, "learning_rate": 4.2462007937313256e-05, "loss": 1.1512, "step": 16975 }, { "epoch": 5.08, "grad_norm": 1.7993156909942627, "learning_rate": 4.245780298244394e-05, "loss": 1.3622, "step": 16980 }, { "epoch": 5.08, "grad_norm": 2.3220622539520264, "learning_rate": 4.245359706339832e-05, "loss": 1.1386, "step": 16985 }, { "epoch": 5.08, "grad_norm": 3.0997767448425293, "learning_rate": 4.2449390180408666e-05, "loss": 1.0817, "step": 16990 }, { "epoch": 5.08, "grad_norm": 1.8498963117599487, "learning_rate": 4.2445182333707334e-05, "loss": 1.1428, "step": 16995 }, { "epoch": 5.09, "grad_norm": 2.013101816177368, "learning_rate": 4.244097352352672e-05, "loss": 1.0904, "step": 17000 }, { "epoch": 5.09, "grad_norm": 1.1816306114196777, "learning_rate": 4.243676375009926e-05, "loss": 1.1456, "step": 17005 }, { "epoch": 5.09, "grad_norm": 2.447376012802124, "learning_rate": 4.243255301365746e-05, "loss": 1.2639, "step": 17010 }, { "epoch": 5.09, "grad_norm": 7.374262809753418, "learning_rate": 4.2428341314433884e-05, "loss": 1.3451, "step": 17015 }, { "epoch": 5.09, "grad_norm": 1.150455117225647, "learning_rate": 4.2424128652661135e-05, "loss": 1.4079, "step": 17020 }, { "epoch": 5.09, "grad_norm": 3.0257742404937744, "learning_rate": 4.241991502857187e-05, "loss": 1.0875, "step": 17025 }, { "epoch": 5.1, "grad_norm": 2.3345866203308105, "learning_rate": 4.24157004423988e-05, "loss": 1.0676, "step": 17030 }, { "epoch": 5.1, "grad_norm": 2.740541696548462, "learning_rate": 4.24114848943747e-05, "loss": 1.2396, "step": 17035 }, { "epoch": 5.1, "grad_norm": 1.1719865798950195, "learning_rate": 4.240726838473239e-05, "loss": 1.2915, "step": 17040 }, { "epoch": 5.1, "grad_norm": 2.3873939514160156, "learning_rate": 4.240305091370473e-05, "loss": 1.0754, "step": 17045 }, { "epoch": 5.1, "grad_norm": 2.359999179840088, "learning_rate": 4.239883248152467e-05, "loss": 1.0515, "step": 17050 }, { "epoch": 5.1, "grad_norm": 2.249455451965332, "learning_rate": 4.2394613088425176e-05, "loss": 1.1345, "step": 17055 }, { "epoch": 5.1, "grad_norm": 3.461733818054199, "learning_rate": 4.239039273463927e-05, "loss": 1.3695, "step": 17060 }, { "epoch": 5.11, "grad_norm": 1.5373154878616333, "learning_rate": 4.238617142040007e-05, "loss": 1.3273, "step": 17065 }, { "epoch": 5.11, "grad_norm": 6.110920429229736, "learning_rate": 4.238194914594068e-05, "loss": 1.3379, "step": 17070 }, { "epoch": 5.11, "grad_norm": 1.5171079635620117, "learning_rate": 4.237772591149431e-05, "loss": 1.2593, "step": 17075 }, { "epoch": 5.11, "grad_norm": 1.9108515977859497, "learning_rate": 4.23735017172942e-05, "loss": 1.4072, "step": 17080 }, { "epoch": 5.11, "grad_norm": 3.4119062423706055, "learning_rate": 4.2369276563573645e-05, "loss": 1.3117, "step": 17085 }, { "epoch": 5.11, "grad_norm": 1.446298599243164, "learning_rate": 4.2365050450566e-05, "loss": 1.2207, "step": 17090 }, { "epoch": 5.11, "grad_norm": 3.0925724506378174, "learning_rate": 4.2360823378504674e-05, "loss": 1.1398, "step": 17095 }, { "epoch": 5.12, "grad_norm": 3.060494899749756, "learning_rate": 4.235659534762312e-05, "loss": 1.2885, "step": 17100 }, { "epoch": 5.12, "grad_norm": 2.6073570251464844, "learning_rate": 4.235236635815484e-05, "loss": 1.2522, "step": 17105 }, { "epoch": 5.12, "grad_norm": 0.9218636751174927, "learning_rate": 4.23481364103334e-05, "loss": 1.253, "step": 17110 }, { "epoch": 5.12, "grad_norm": 0.8496900200843811, "learning_rate": 4.234390550439243e-05, "loss": 1.3157, "step": 17115 }, { "epoch": 5.12, "grad_norm": 2.110830783843994, "learning_rate": 4.233967364056558e-05, "loss": 1.2034, "step": 17120 }, { "epoch": 5.12, "grad_norm": 1.333644986152649, "learning_rate": 4.233544081908658e-05, "loss": 1.0453, "step": 17125 }, { "epoch": 5.13, "grad_norm": 3.8100926876068115, "learning_rate": 4.233120704018921e-05, "loss": 1.1498, "step": 17130 }, { "epoch": 5.13, "grad_norm": 1.855100154876709, "learning_rate": 4.2326972304107284e-05, "loss": 1.1742, "step": 17135 }, { "epoch": 5.13, "grad_norm": 1.8866065740585327, "learning_rate": 4.232273661107468e-05, "loss": 1.2759, "step": 17140 }, { "epoch": 5.13, "grad_norm": 1.6143709421157837, "learning_rate": 4.231849996132535e-05, "loss": 1.1766, "step": 17145 }, { "epoch": 5.13, "grad_norm": 1.1877189874649048, "learning_rate": 4.231426235509326e-05, "loss": 1.3377, "step": 17150 }, { "epoch": 5.13, "grad_norm": 3.203878879547119, "learning_rate": 4.2310023792612466e-05, "loss": 1.2728, "step": 17155 }, { "epoch": 5.13, "grad_norm": 2.035184860229492, "learning_rate": 4.230578427411705e-05, "loss": 1.2227, "step": 17160 }, { "epoch": 5.14, "grad_norm": 2.1178765296936035, "learning_rate": 4.230154379984115e-05, "loss": 1.0745, "step": 17165 }, { "epoch": 5.14, "grad_norm": 1.0364670753479004, "learning_rate": 4.229730237001897e-05, "loss": 1.1975, "step": 17170 }, { "epoch": 5.14, "grad_norm": 1.6296310424804688, "learning_rate": 4.2293059984884765e-05, "loss": 1.301, "step": 17175 }, { "epoch": 5.14, "grad_norm": 2.1096887588500977, "learning_rate": 4.228881664467282e-05, "loss": 1.2428, "step": 17180 }, { "epoch": 5.14, "grad_norm": 3.0210041999816895, "learning_rate": 4.228457234961752e-05, "loss": 1.2328, "step": 17185 }, { "epoch": 5.14, "grad_norm": 1.9511592388153076, "learning_rate": 4.2280327099953245e-05, "loss": 1.084, "step": 17190 }, { "epoch": 5.14, "grad_norm": 2.9204232692718506, "learning_rate": 4.227608089591447e-05, "loss": 1.2676, "step": 17195 }, { "epoch": 5.15, "grad_norm": 1.7769725322723389, "learning_rate": 4.22718337377357e-05, "loss": 1.2958, "step": 17200 }, { "epoch": 5.15, "grad_norm": 1.5279228687286377, "learning_rate": 4.2267585625651516e-05, "loss": 1.2537, "step": 17205 }, { "epoch": 5.15, "grad_norm": 2.0030903816223145, "learning_rate": 4.2263336559896514e-05, "loss": 1.1446, "step": 17210 }, { "epoch": 5.15, "grad_norm": 1.8052384853363037, "learning_rate": 4.2259086540705384e-05, "loss": 1.3123, "step": 17215 }, { "epoch": 5.15, "grad_norm": 1.5273395776748657, "learning_rate": 4.225483556831284e-05, "loss": 1.3197, "step": 17220 }, { "epoch": 5.15, "grad_norm": 7.197212219238281, "learning_rate": 4.225058364295367e-05, "loss": 1.2388, "step": 17225 }, { "epoch": 5.16, "grad_norm": 2.869080066680908, "learning_rate": 4.22463307648627e-05, "loss": 1.1988, "step": 17230 }, { "epoch": 5.16, "grad_norm": 1.3211283683776855, "learning_rate": 4.22420769342748e-05, "loss": 1.3071, "step": 17235 }, { "epoch": 5.16, "grad_norm": 0.8859264850616455, "learning_rate": 4.223782215142491e-05, "loss": 1.2127, "step": 17240 }, { "epoch": 5.16, "grad_norm": 2.70227313041687, "learning_rate": 4.2233566416548024e-05, "loss": 1.2588, "step": 17245 }, { "epoch": 5.16, "grad_norm": 1.2390819787979126, "learning_rate": 4.2229309729879174e-05, "loss": 1.3169, "step": 17250 }, { "epoch": 5.16, "grad_norm": 1.9545568227767944, "learning_rate": 4.222505209165346e-05, "loss": 1.1279, "step": 17255 }, { "epoch": 5.16, "grad_norm": 2.1074938774108887, "learning_rate": 4.2220793502106035e-05, "loss": 1.093, "step": 17260 }, { "epoch": 5.17, "grad_norm": 2.4729721546173096, "learning_rate": 4.2216533961472074e-05, "loss": 1.2353, "step": 17265 }, { "epoch": 5.17, "grad_norm": 2.2002952098846436, "learning_rate": 4.221227346998684e-05, "loss": 1.3197, "step": 17270 }, { "epoch": 5.17, "grad_norm": 2.468754768371582, "learning_rate": 4.220801202788563e-05, "loss": 1.2085, "step": 17275 }, { "epoch": 5.17, "grad_norm": 1.546784520149231, "learning_rate": 4.22037496354038e-05, "loss": 1.0504, "step": 17280 }, { "epoch": 5.17, "grad_norm": 2.2986133098602295, "learning_rate": 4.219948629277677e-05, "loss": 1.0963, "step": 17285 }, { "epoch": 5.17, "grad_norm": 1.0062131881713867, "learning_rate": 4.219522200023999e-05, "loss": 1.1797, "step": 17290 }, { "epoch": 5.17, "grad_norm": 1.0225803852081299, "learning_rate": 4.219095675802896e-05, "loss": 1.1858, "step": 17295 }, { "epoch": 5.18, "grad_norm": 1.347981572151184, "learning_rate": 4.218669056637926e-05, "loss": 1.2715, "step": 17300 }, { "epoch": 5.18, "grad_norm": 2.4075846672058105, "learning_rate": 4.218242342552651e-05, "loss": 1.0566, "step": 17305 }, { "epoch": 5.18, "grad_norm": 4.251238822937012, "learning_rate": 4.217815533570637e-05, "loss": 1.1106, "step": 17310 }, { "epoch": 5.18, "grad_norm": 2.3025827407836914, "learning_rate": 4.2173886297154575e-05, "loss": 1.2947, "step": 17315 }, { "epoch": 5.18, "grad_norm": 2.7054014205932617, "learning_rate": 4.216961631010689e-05, "loss": 1.1512, "step": 17320 }, { "epoch": 5.18, "grad_norm": 2.6047892570495605, "learning_rate": 4.2165345374799134e-05, "loss": 1.1665, "step": 17325 }, { "epoch": 5.18, "grad_norm": 2.1984410285949707, "learning_rate": 4.2161073491467196e-05, "loss": 1.2841, "step": 17330 }, { "epoch": 5.19, "grad_norm": 1.6015806198120117, "learning_rate": 4.215680066034701e-05, "loss": 1.3405, "step": 17335 }, { "epoch": 5.19, "grad_norm": 5.569428443908691, "learning_rate": 4.215252688167456e-05, "loss": 1.0902, "step": 17340 }, { "epoch": 5.19, "grad_norm": 2.5024139881134033, "learning_rate": 4.2148252155685875e-05, "loss": 1.174, "step": 17345 }, { "epoch": 5.19, "grad_norm": 1.9789233207702637, "learning_rate": 4.2143976482617055e-05, "loss": 1.2465, "step": 17350 }, { "epoch": 5.19, "grad_norm": 2.1717369556427, "learning_rate": 4.213969986270423e-05, "loss": 1.1812, "step": 17355 }, { "epoch": 5.19, "grad_norm": 3.2507376670837402, "learning_rate": 4.21354222961836e-05, "loss": 1.2907, "step": 17360 }, { "epoch": 5.2, "grad_norm": 1.7060433626174927, "learning_rate": 4.21311437832914e-05, "loss": 1.2651, "step": 17365 }, { "epoch": 5.2, "grad_norm": 5.012210369110107, "learning_rate": 4.212686432426394e-05, "loss": 1.1516, "step": 17370 }, { "epoch": 5.2, "grad_norm": 5.409265995025635, "learning_rate": 4.2122583919337566e-05, "loss": 1.1225, "step": 17375 }, { "epoch": 5.2, "grad_norm": 2.1262056827545166, "learning_rate": 4.211830256874868e-05, "loss": 1.2868, "step": 17380 }, { "epoch": 5.2, "grad_norm": 1.5993378162384033, "learning_rate": 4.211402027273373e-05, "loss": 1.333, "step": 17385 }, { "epoch": 5.2, "grad_norm": 1.085625171661377, "learning_rate": 4.2109737031529245e-05, "loss": 1.2228, "step": 17390 }, { "epoch": 5.2, "grad_norm": 1.2686785459518433, "learning_rate": 4.2105452845371754e-05, "loss": 1.3888, "step": 17395 }, { "epoch": 5.21, "grad_norm": 1.7684992551803589, "learning_rate": 4.210116771449789e-05, "loss": 1.1147, "step": 17400 }, { "epoch": 5.21, "grad_norm": 4.321854114532471, "learning_rate": 4.209688163914431e-05, "loss": 1.2644, "step": 17405 }, { "epoch": 5.21, "grad_norm": 1.0918437242507935, "learning_rate": 4.209259461954772e-05, "loss": 0.9742, "step": 17410 }, { "epoch": 5.21, "grad_norm": 1.0674161911010742, "learning_rate": 4.20883066559449e-05, "loss": 1.3029, "step": 17415 }, { "epoch": 5.21, "grad_norm": 10.988029479980469, "learning_rate": 4.208401774857267e-05, "loss": 1.4338, "step": 17420 }, { "epoch": 5.21, "grad_norm": 1.3051105737686157, "learning_rate": 4.2079727897667896e-05, "loss": 1.1686, "step": 17425 }, { "epoch": 5.21, "grad_norm": 1.0172728300094604, "learning_rate": 4.2075437103467495e-05, "loss": 1.1133, "step": 17430 }, { "epoch": 5.22, "grad_norm": 1.7416491508483887, "learning_rate": 4.207114536620846e-05, "loss": 1.1215, "step": 17435 }, { "epoch": 5.22, "grad_norm": 1.3212878704071045, "learning_rate": 4.206685268612781e-05, "loss": 1.1689, "step": 17440 }, { "epoch": 5.22, "grad_norm": 3.1066105365753174, "learning_rate": 4.206255906346262e-05, "loss": 1.0547, "step": 17445 }, { "epoch": 5.22, "grad_norm": 4.053175449371338, "learning_rate": 4.205826449845005e-05, "loss": 1.2893, "step": 17450 }, { "epoch": 5.22, "grad_norm": 1.900181531906128, "learning_rate": 4.205396899132724e-05, "loss": 1.2453, "step": 17455 }, { "epoch": 5.22, "grad_norm": 0.7705186009407043, "learning_rate": 4.2049672542331454e-05, "loss": 1.2095, "step": 17460 }, { "epoch": 5.23, "grad_norm": 1.5405861139297485, "learning_rate": 4.2045375151699976e-05, "loss": 1.2622, "step": 17465 }, { "epoch": 5.23, "grad_norm": 3.1085848808288574, "learning_rate": 4.204107681967015e-05, "loss": 1.1181, "step": 17470 }, { "epoch": 5.23, "grad_norm": 1.0741022825241089, "learning_rate": 4.203677754647936e-05, "loss": 1.1253, "step": 17475 }, { "epoch": 5.23, "grad_norm": 1.2874560356140137, "learning_rate": 4.2032477332365054e-05, "loss": 1.1252, "step": 17480 }, { "epoch": 5.23, "grad_norm": 2.286153554916382, "learning_rate": 4.2028176177564736e-05, "loss": 1.1286, "step": 17485 }, { "epoch": 5.23, "grad_norm": 1.706329107284546, "learning_rate": 4.202387408231595e-05, "loss": 1.1123, "step": 17490 }, { "epoch": 5.23, "grad_norm": 2.1467063426971436, "learning_rate": 4.2019571046856284e-05, "loss": 1.2309, "step": 17495 }, { "epoch": 5.24, "grad_norm": 2.3436472415924072, "learning_rate": 4.2015267071423404e-05, "loss": 1.1822, "step": 17500 }, { "epoch": 5.24, "grad_norm": 1.5204914808273315, "learning_rate": 4.2010962156255004e-05, "loss": 1.1763, "step": 17505 }, { "epoch": 5.24, "grad_norm": 1.3198552131652832, "learning_rate": 4.200665630158885e-05, "loss": 1.174, "step": 17510 }, { "epoch": 5.24, "grad_norm": 1.4208104610443115, "learning_rate": 4.200234950766275e-05, "loss": 1.1142, "step": 17515 }, { "epoch": 5.24, "grad_norm": 2.4655168056488037, "learning_rate": 4.199804177471456e-05, "loss": 0.9846, "step": 17520 }, { "epoch": 5.24, "grad_norm": 1.046281099319458, "learning_rate": 4.199373310298219e-05, "loss": 1.2307, "step": 17525 }, { "epoch": 5.24, "grad_norm": 1.329183578491211, "learning_rate": 4.19894234927036e-05, "loss": 1.1682, "step": 17530 }, { "epoch": 5.25, "grad_norm": 1.4482097625732422, "learning_rate": 4.198511294411681e-05, "loss": 1.2648, "step": 17535 }, { "epoch": 5.25, "grad_norm": 1.9892488718032837, "learning_rate": 4.1980801457459895e-05, "loss": 1.0417, "step": 17540 }, { "epoch": 5.25, "grad_norm": 2.176584243774414, "learning_rate": 4.197648903297096e-05, "loss": 1.2264, "step": 17545 }, { "epoch": 5.25, "grad_norm": 1.82011878490448, "learning_rate": 4.197217567088818e-05, "loss": 1.1264, "step": 17550 }, { "epoch": 5.25, "grad_norm": 1.9624288082122803, "learning_rate": 4.196786137144979e-05, "loss": 1.2737, "step": 17555 }, { "epoch": 5.25, "grad_norm": 1.3855583667755127, "learning_rate": 4.196354613489404e-05, "loss": 1.3331, "step": 17560 }, { "epoch": 5.26, "grad_norm": 1.9667317867279053, "learning_rate": 4.195922996145928e-05, "loss": 1.1719, "step": 17565 }, { "epoch": 5.26, "grad_norm": 1.6437666416168213, "learning_rate": 4.1954912851383864e-05, "loss": 1.1407, "step": 17570 }, { "epoch": 5.26, "grad_norm": 1.1012728214263916, "learning_rate": 4.1950594804906246e-05, "loss": 1.3084, "step": 17575 }, { "epoch": 5.26, "grad_norm": 1.9337821006774902, "learning_rate": 4.1946275822264904e-05, "loss": 1.1738, "step": 17580 }, { "epoch": 5.26, "grad_norm": 3.0107462406158447, "learning_rate": 4.194195590369835e-05, "loss": 1.2292, "step": 17585 }, { "epoch": 5.26, "grad_norm": 1.6357334852218628, "learning_rate": 4.193763504944518e-05, "loss": 1.2465, "step": 17590 }, { "epoch": 5.26, "grad_norm": 3.3010284900665283, "learning_rate": 4.193331325974403e-05, "loss": 1.2547, "step": 17595 }, { "epoch": 5.27, "grad_norm": 1.5490888357162476, "learning_rate": 4.19289905348336e-05, "loss": 1.2406, "step": 17600 }, { "epoch": 5.27, "grad_norm": 1.6628612279891968, "learning_rate": 4.192466687495262e-05, "loss": 1.0981, "step": 17605 }, { "epoch": 5.27, "grad_norm": 1.1924035549163818, "learning_rate": 4.192034228033987e-05, "loss": 1.1726, "step": 17610 }, { "epoch": 5.27, "grad_norm": 2.363252639770508, "learning_rate": 4.191601675123422e-05, "loss": 1.2418, "step": 17615 }, { "epoch": 5.27, "grad_norm": 2.983738899230957, "learning_rate": 4.1911690287874535e-05, "loss": 1.1598, "step": 17620 }, { "epoch": 5.27, "grad_norm": 2.698246955871582, "learning_rate": 4.190736289049977e-05, "loss": 1.256, "step": 17625 }, { "epoch": 5.27, "grad_norm": 2.4981305599212646, "learning_rate": 4.190303455934894e-05, "loss": 1.2023, "step": 17630 }, { "epoch": 5.28, "grad_norm": 1.513262152671814, "learning_rate": 4.189870529466107e-05, "loss": 1.1823, "step": 17635 }, { "epoch": 5.28, "grad_norm": 2.1456382274627686, "learning_rate": 4.1894375096675274e-05, "loss": 1.007, "step": 17640 }, { "epoch": 5.28, "grad_norm": 2.4811346530914307, "learning_rate": 4.189004396563071e-05, "loss": 1.2597, "step": 17645 }, { "epoch": 5.28, "grad_norm": 2.1642322540283203, "learning_rate": 4.1885711901766564e-05, "loss": 1.2648, "step": 17650 }, { "epoch": 5.28, "grad_norm": 2.7846784591674805, "learning_rate": 4.188137890532211e-05, "loss": 1.0887, "step": 17655 }, { "epoch": 5.28, "grad_norm": 2.358762502670288, "learning_rate": 4.187704497653665e-05, "loss": 1.2641, "step": 17660 }, { "epoch": 5.29, "grad_norm": 1.4334055185317993, "learning_rate": 4.1872710115649525e-05, "loss": 1.2008, "step": 17665 }, { "epoch": 5.29, "grad_norm": 1.1440168619155884, "learning_rate": 4.1868374322900163e-05, "loss": 1.2595, "step": 17670 }, { "epoch": 5.29, "grad_norm": 1.5361130237579346, "learning_rate": 4.186403759852802e-05, "loss": 1.2388, "step": 17675 }, { "epoch": 5.29, "grad_norm": 2.4247677326202393, "learning_rate": 4.185969994277262e-05, "loss": 1.0073, "step": 17680 }, { "epoch": 5.29, "grad_norm": 1.3010989427566528, "learning_rate": 4.1855361355873506e-05, "loss": 1.3015, "step": 17685 }, { "epoch": 5.29, "grad_norm": 2.8107826709747314, "learning_rate": 4.185102183807031e-05, "loss": 1.167, "step": 17690 }, { "epoch": 5.29, "grad_norm": 1.6690555810928345, "learning_rate": 4.1846681389602686e-05, "loss": 1.2343, "step": 17695 }, { "epoch": 5.3, "grad_norm": 1.7709966897964478, "learning_rate": 4.1842340010710366e-05, "loss": 1.3241, "step": 17700 }, { "epoch": 5.3, "grad_norm": 4.506913661956787, "learning_rate": 4.1837997701633115e-05, "loss": 1.3076, "step": 17705 }, { "epoch": 5.3, "grad_norm": 2.404618501663208, "learning_rate": 4.183365446261075e-05, "loss": 1.1784, "step": 17710 }, { "epoch": 5.3, "grad_norm": 1.8350248336791992, "learning_rate": 4.182931029388315e-05, "loss": 1.2567, "step": 17715 }, { "epoch": 5.3, "grad_norm": 1.5399670600891113, "learning_rate": 4.182496519569023e-05, "loss": 1.1838, "step": 17720 }, { "epoch": 5.3, "grad_norm": 0.7773048281669617, "learning_rate": 4.1820619168271975e-05, "loss": 1.2516, "step": 17725 }, { "epoch": 5.3, "grad_norm": 2.276472330093384, "learning_rate": 4.181627221186841e-05, "loss": 1.37, "step": 17730 }, { "epoch": 5.31, "grad_norm": 2.218336582183838, "learning_rate": 4.18119243267196e-05, "loss": 1.3398, "step": 17735 }, { "epoch": 5.31, "grad_norm": 1.3375170230865479, "learning_rate": 4.180757551306569e-05, "loss": 1.3, "step": 17740 }, { "epoch": 5.31, "grad_norm": 1.2494932413101196, "learning_rate": 4.180322577114686e-05, "loss": 1.2727, "step": 17745 }, { "epoch": 5.31, "grad_norm": 1.8490458726882935, "learning_rate": 4.1798875101203336e-05, "loss": 1.3535, "step": 17750 }, { "epoch": 5.31, "grad_norm": 3.2048110961914062, "learning_rate": 4.179452350347539e-05, "loss": 1.1024, "step": 17755 }, { "epoch": 5.31, "grad_norm": 2.3998045921325684, "learning_rate": 4.179017097820338e-05, "loss": 1.1105, "step": 17760 }, { "epoch": 5.32, "grad_norm": 3.1060426235198975, "learning_rate": 4.178581752562767e-05, "loss": 1.223, "step": 17765 }, { "epoch": 5.32, "grad_norm": 2.371044874191284, "learning_rate": 4.178146314598872e-05, "loss": 1.2038, "step": 17770 }, { "epoch": 5.32, "grad_norm": 1.7323418855667114, "learning_rate": 4.177710783952699e-05, "loss": 1.2121, "step": 17775 }, { "epoch": 5.32, "grad_norm": 2.284435272216797, "learning_rate": 4.177275160648304e-05, "loss": 1.3172, "step": 17780 }, { "epoch": 5.32, "grad_norm": 3.9668426513671875, "learning_rate": 4.1768394447097446e-05, "loss": 1.2761, "step": 17785 }, { "epoch": 5.32, "grad_norm": 0.9461215734481812, "learning_rate": 4.176403636161086e-05, "loss": 1.2843, "step": 17790 }, { "epoch": 5.32, "grad_norm": 2.7739803791046143, "learning_rate": 4.1759677350263976e-05, "loss": 1.4001, "step": 17795 }, { "epoch": 5.33, "grad_norm": 1.284010648727417, "learning_rate": 4.1755317413297526e-05, "loss": 1.2278, "step": 17800 }, { "epoch": 5.33, "grad_norm": 2.6691951751708984, "learning_rate": 4.175095655095232e-05, "loss": 1.1456, "step": 17805 }, { "epoch": 5.33, "grad_norm": 2.771559476852417, "learning_rate": 4.174659476346919e-05, "loss": 1.1955, "step": 17810 }, { "epoch": 5.33, "grad_norm": 1.4969909191131592, "learning_rate": 4.174223205108904e-05, "loss": 1.2585, "step": 17815 }, { "epoch": 5.33, "grad_norm": 1.8227806091308594, "learning_rate": 4.1737868414052817e-05, "loss": 1.3033, "step": 17820 }, { "epoch": 5.33, "grad_norm": 1.4669899940490723, "learning_rate": 4.1733503852601516e-05, "loss": 1.2654, "step": 17825 }, { "epoch": 5.33, "grad_norm": 3.621481418609619, "learning_rate": 4.172913836697619e-05, "loss": 1.2247, "step": 17830 }, { "epoch": 5.34, "grad_norm": 1.8669898509979248, "learning_rate": 4.172477195741794e-05, "loss": 1.1747, "step": 17835 }, { "epoch": 5.34, "grad_norm": 1.145015001296997, "learning_rate": 4.1720404624167925e-05, "loss": 1.3372, "step": 17840 }, { "epoch": 5.34, "grad_norm": 1.5588964223861694, "learning_rate": 4.171603636746734e-05, "loss": 1.1368, "step": 17845 }, { "epoch": 5.34, "grad_norm": 3.020059823989868, "learning_rate": 4.171166718755744e-05, "loss": 1.1614, "step": 17850 }, { "epoch": 5.34, "grad_norm": 2.658262252807617, "learning_rate": 4.170729708467953e-05, "loss": 1.1811, "step": 17855 }, { "epoch": 5.34, "grad_norm": 1.9605822563171387, "learning_rate": 4.170292605907498e-05, "loss": 1.1067, "step": 17860 }, { "epoch": 5.35, "grad_norm": 2.259817600250244, "learning_rate": 4.169855411098517e-05, "loss": 1.3053, "step": 17865 }, { "epoch": 5.35, "grad_norm": 1.7595691680908203, "learning_rate": 4.169418124065159e-05, "loss": 1.2888, "step": 17870 }, { "epoch": 5.35, "grad_norm": 1.319011926651001, "learning_rate": 4.168980744831572e-05, "loss": 1.0001, "step": 17875 }, { "epoch": 5.35, "grad_norm": 1.3110008239746094, "learning_rate": 4.168543273421913e-05, "loss": 1.3487, "step": 17880 }, { "epoch": 5.35, "grad_norm": 2.2493362426757812, "learning_rate": 4.168105709860344e-05, "loss": 1.2849, "step": 17885 }, { "epoch": 5.35, "grad_norm": 1.7619404792785645, "learning_rate": 4.167668054171031e-05, "loss": 1.237, "step": 17890 }, { "epoch": 5.35, "grad_norm": 1.684557318687439, "learning_rate": 4.167230306378144e-05, "loss": 1.2893, "step": 17895 }, { "epoch": 5.36, "grad_norm": 3.6283462047576904, "learning_rate": 4.1667924665058605e-05, "loss": 1.3225, "step": 17900 }, { "epoch": 5.36, "grad_norm": 1.8916960954666138, "learning_rate": 4.166354534578362e-05, "loss": 1.2069, "step": 17905 }, { "epoch": 5.36, "grad_norm": 1.7808947563171387, "learning_rate": 4.165916510619834e-05, "loss": 1.2129, "step": 17910 }, { "epoch": 5.36, "grad_norm": 2.4525959491729736, "learning_rate": 4.1654783946544695e-05, "loss": 1.2451, "step": 17915 }, { "epoch": 5.36, "grad_norm": 1.7354241609573364, "learning_rate": 4.165040186706464e-05, "loss": 1.2769, "step": 17920 }, { "epoch": 5.36, "grad_norm": 2.6670055389404297, "learning_rate": 4.1646018868000194e-05, "loss": 1.3005, "step": 17925 }, { "epoch": 5.36, "grad_norm": 2.470824956893921, "learning_rate": 4.164163494959342e-05, "loss": 1.1619, "step": 17930 }, { "epoch": 5.37, "grad_norm": 2.5797417163848877, "learning_rate": 4.1637250112086466e-05, "loss": 1.3173, "step": 17935 }, { "epoch": 5.37, "grad_norm": 2.280491828918457, "learning_rate": 4.163286435572147e-05, "loss": 1.323, "step": 17940 }, { "epoch": 5.37, "grad_norm": 1.8656688928604126, "learning_rate": 4.162847768074067e-05, "loss": 1.3305, "step": 17945 }, { "epoch": 5.37, "grad_norm": 1.572505235671997, "learning_rate": 4.162409008738632e-05, "loss": 1.1925, "step": 17950 }, { "epoch": 5.37, "grad_norm": 1.0871065855026245, "learning_rate": 4.161970157590077e-05, "loss": 1.2474, "step": 17955 }, { "epoch": 5.37, "grad_norm": 1.81941819190979, "learning_rate": 4.161531214652637e-05, "loss": 1.1525, "step": 17960 }, { "epoch": 5.37, "grad_norm": 1.6600680351257324, "learning_rate": 4.161092179950555e-05, "loss": 1.1595, "step": 17965 }, { "epoch": 5.38, "grad_norm": 1.9169402122497559, "learning_rate": 4.160653053508079e-05, "loss": 1.2836, "step": 17970 }, { "epoch": 5.38, "grad_norm": 2.0961391925811768, "learning_rate": 4.16021383534946e-05, "loss": 1.1986, "step": 17975 }, { "epoch": 5.38, "grad_norm": 1.4828705787658691, "learning_rate": 4.159774525498957e-05, "loss": 1.215, "step": 17980 }, { "epoch": 5.38, "grad_norm": 1.643730640411377, "learning_rate": 4.159335123980833e-05, "loss": 1.1822, "step": 17985 }, { "epoch": 5.38, "grad_norm": 2.4399468898773193, "learning_rate": 4.158895630819354e-05, "loss": 1.3621, "step": 17990 }, { "epoch": 5.38, "grad_norm": 1.5563558340072632, "learning_rate": 4.158456046038794e-05, "loss": 1.1871, "step": 17995 }, { "epoch": 5.39, "grad_norm": 2.194761037826538, "learning_rate": 4.15801636966343e-05, "loss": 1.269, "step": 18000 }, { "epoch": 5.39, "grad_norm": 3.4397332668304443, "learning_rate": 4.157576601717546e-05, "loss": 1.2749, "step": 18005 }, { "epoch": 5.39, "grad_norm": 1.9305585622787476, "learning_rate": 4.1571367422254296e-05, "loss": 1.2515, "step": 18010 }, { "epoch": 5.39, "grad_norm": 2.5230295658111572, "learning_rate": 4.156696791211372e-05, "loss": 1.3452, "step": 18015 }, { "epoch": 5.39, "grad_norm": 4.581267833709717, "learning_rate": 4.156256748699673e-05, "loss": 1.0429, "step": 18020 }, { "epoch": 5.39, "grad_norm": 2.436643123626709, "learning_rate": 4.155816614714636e-05, "loss": 1.2162, "step": 18025 }, { "epoch": 5.39, "grad_norm": 3.484376907348633, "learning_rate": 4.155376389280569e-05, "loss": 1.1535, "step": 18030 }, { "epoch": 5.4, "grad_norm": 0.869539201259613, "learning_rate": 4.1549360724217835e-05, "loss": 1.238, "step": 18035 }, { "epoch": 5.4, "grad_norm": 2.1661620140075684, "learning_rate": 4.1544956641625996e-05, "loss": 1.3041, "step": 18040 }, { "epoch": 5.4, "grad_norm": 2.650646209716797, "learning_rate": 4.15405516452734e-05, "loss": 1.1902, "step": 18045 }, { "epoch": 5.4, "grad_norm": 3.9446349143981934, "learning_rate": 4.153614573540332e-05, "loss": 1.337, "step": 18050 }, { "epoch": 5.4, "grad_norm": 1.5067466497421265, "learning_rate": 4.153173891225911e-05, "loss": 1.2288, "step": 18055 }, { "epoch": 5.4, "grad_norm": 3.6524410247802734, "learning_rate": 4.152733117608413e-05, "loss": 1.2194, "step": 18060 }, { "epoch": 5.4, "grad_norm": 1.016907811164856, "learning_rate": 4.1522922527121846e-05, "loss": 1.1394, "step": 18065 }, { "epoch": 5.41, "grad_norm": 2.3441882133483887, "learning_rate": 4.151851296561572e-05, "loss": 1.1889, "step": 18070 }, { "epoch": 5.41, "grad_norm": 2.799960136413574, "learning_rate": 4.1514102491809286e-05, "loss": 1.1934, "step": 18075 }, { "epoch": 5.41, "grad_norm": 3.4592952728271484, "learning_rate": 4.1509691105946145e-05, "loss": 1.2256, "step": 18080 }, { "epoch": 5.41, "grad_norm": 1.311344027519226, "learning_rate": 4.150527880826992e-05, "loss": 1.1273, "step": 18085 }, { "epoch": 5.41, "grad_norm": 2.329275131225586, "learning_rate": 4.15008655990243e-05, "loss": 1.3712, "step": 18090 }, { "epoch": 5.41, "grad_norm": 4.58236026763916, "learning_rate": 4.149645147845303e-05, "loss": 1.141, "step": 18095 }, { "epoch": 5.42, "grad_norm": 1.4890109300613403, "learning_rate": 4.149203644679989e-05, "loss": 1.1462, "step": 18100 }, { "epoch": 5.42, "grad_norm": 3.4665701389312744, "learning_rate": 4.148762050430872e-05, "loss": 1.2364, "step": 18105 }, { "epoch": 5.42, "grad_norm": 2.3981378078460693, "learning_rate": 4.148320365122341e-05, "loss": 1.2632, "step": 18110 }, { "epoch": 5.42, "grad_norm": 2.2879130840301514, "learning_rate": 4.147878588778789e-05, "loss": 1.1986, "step": 18115 }, { "epoch": 5.42, "grad_norm": 1.7750098705291748, "learning_rate": 4.1474367214246156e-05, "loss": 1.168, "step": 18120 }, { "epoch": 5.42, "grad_norm": 1.1014642715454102, "learning_rate": 4.146994763084225e-05, "loss": 1.2932, "step": 18125 }, { "epoch": 5.42, "grad_norm": 2.6048409938812256, "learning_rate": 4.1465527137820255e-05, "loss": 1.2311, "step": 18130 }, { "epoch": 5.43, "grad_norm": 1.7655155658721924, "learning_rate": 4.146110573542431e-05, "loss": 1.1891, "step": 18135 }, { "epoch": 5.43, "grad_norm": 2.196241617202759, "learning_rate": 4.14566834238986e-05, "loss": 1.1351, "step": 18140 }, { "epoch": 5.43, "grad_norm": 1.649719476699829, "learning_rate": 4.145226020348737e-05, "loss": 1.2341, "step": 18145 }, { "epoch": 5.43, "grad_norm": 1.7216572761535645, "learning_rate": 4.1447836074434916e-05, "loss": 1.2227, "step": 18150 }, { "epoch": 5.43, "grad_norm": 2.0533690452575684, "learning_rate": 4.144341103698557e-05, "loss": 1.2416, "step": 18155 }, { "epoch": 5.43, "grad_norm": 3.26836895942688, "learning_rate": 4.143898509138373e-05, "loss": 1.1773, "step": 18160 }, { "epoch": 5.43, "grad_norm": 1.7304611206054688, "learning_rate": 4.1434558237873824e-05, "loss": 1.3271, "step": 18165 }, { "epoch": 5.44, "grad_norm": 1.9267404079437256, "learning_rate": 4.143013047670035e-05, "loss": 1.2153, "step": 18170 }, { "epoch": 5.44, "grad_norm": 1.807952642440796, "learning_rate": 4.1425701808107855e-05, "loss": 1.2352, "step": 18175 }, { "epoch": 5.44, "grad_norm": 1.5293511152267456, "learning_rate": 4.142127223234091e-05, "loss": 1.2766, "step": 18180 }, { "epoch": 5.44, "grad_norm": 2.029411554336548, "learning_rate": 4.1416841749644174e-05, "loss": 1.0328, "step": 18185 }, { "epoch": 5.44, "grad_norm": 1.7925702333450317, "learning_rate": 4.1412410360262334e-05, "loss": 1.366, "step": 18190 }, { "epoch": 5.44, "grad_norm": 0.8310097455978394, "learning_rate": 4.140797806444013e-05, "loss": 1.212, "step": 18195 }, { "epoch": 5.45, "grad_norm": 3.394538402557373, "learning_rate": 4.140354486242235e-05, "loss": 1.3675, "step": 18200 }, { "epoch": 5.45, "grad_norm": 4.078660488128662, "learning_rate": 4.139911075445384e-05, "loss": 1.1619, "step": 18205 }, { "epoch": 5.45, "grad_norm": 1.416306495666504, "learning_rate": 4.1394675740779485e-05, "loss": 1.0888, "step": 18210 }, { "epoch": 5.45, "grad_norm": 3.147977828979492, "learning_rate": 4.139023982164424e-05, "loss": 1.1083, "step": 18215 }, { "epoch": 5.45, "grad_norm": 3.413358688354492, "learning_rate": 4.138580299729308e-05, "loss": 1.1791, "step": 18220 }, { "epoch": 5.45, "grad_norm": 1.7212944030761719, "learning_rate": 4.138136526797105e-05, "loss": 1.2017, "step": 18225 }, { "epoch": 5.45, "grad_norm": 2.027756690979004, "learning_rate": 4.137692663392325e-05, "loss": 1.1783, "step": 18230 }, { "epoch": 5.46, "grad_norm": 0.9645988345146179, "learning_rate": 4.137248709539481e-05, "loss": 1.1465, "step": 18235 }, { "epoch": 5.46, "grad_norm": 2.282986879348755, "learning_rate": 4.1368046652630924e-05, "loss": 1.2574, "step": 18240 }, { "epoch": 5.46, "grad_norm": 2.9821066856384277, "learning_rate": 4.136360530587684e-05, "loss": 1.2142, "step": 18245 }, { "epoch": 5.46, "grad_norm": 2.920485258102417, "learning_rate": 4.135916305537784e-05, "loss": 1.1583, "step": 18250 }, { "epoch": 5.46, "grad_norm": 2.7918076515197754, "learning_rate": 4.135471990137927e-05, "loss": 1.1705, "step": 18255 }, { "epoch": 5.46, "grad_norm": 1.458203911781311, "learning_rate": 4.135027584412653e-05, "loss": 1.2112, "step": 18260 }, { "epoch": 5.46, "grad_norm": 3.7946949005126953, "learning_rate": 4.134583088386504e-05, "loss": 1.2094, "step": 18265 }, { "epoch": 5.47, "grad_norm": 2.0881757736206055, "learning_rate": 4.134138502084029e-05, "loss": 1.3534, "step": 18270 }, { "epoch": 5.47, "grad_norm": 3.244400978088379, "learning_rate": 4.133693825529785e-05, "loss": 1.3165, "step": 18275 }, { "epoch": 5.47, "grad_norm": 1.4288554191589355, "learning_rate": 4.1332490587483286e-05, "loss": 1.2475, "step": 18280 }, { "epoch": 5.47, "grad_norm": 3.9546115398406982, "learning_rate": 4.132804201764224e-05, "loss": 1.2295, "step": 18285 }, { "epoch": 5.47, "grad_norm": 2.304332971572876, "learning_rate": 4.132448251247545e-05, "loss": 1.3196, "step": 18290 }, { "epoch": 5.47, "grad_norm": 2.4342777729034424, "learning_rate": 4.132003231960591e-05, "loss": 1.1165, "step": 18295 }, { "epoch": 5.48, "grad_norm": 2.890040397644043, "learning_rate": 4.131558122539796e-05, "loss": 1.1342, "step": 18300 }, { "epoch": 5.48, "grad_norm": 1.576117992401123, "learning_rate": 4.131112923009741e-05, "loss": 1.2944, "step": 18305 }, { "epoch": 5.48, "grad_norm": 2.7480485439300537, "learning_rate": 4.130667633395015e-05, "loss": 1.3669, "step": 18310 }, { "epoch": 5.48, "grad_norm": 2.0813324451446533, "learning_rate": 4.1302222537202104e-05, "loss": 1.1589, "step": 18315 }, { "epoch": 5.48, "grad_norm": 5.030991077423096, "learning_rate": 4.129776784009926e-05, "loss": 1.3765, "step": 18320 }, { "epoch": 5.48, "grad_norm": 4.91030740737915, "learning_rate": 4.129331224288763e-05, "loss": 1.0896, "step": 18325 }, { "epoch": 5.48, "grad_norm": 6.223639965057373, "learning_rate": 4.1288855745813303e-05, "loss": 1.2247, "step": 18330 }, { "epoch": 5.49, "grad_norm": 3.3995022773742676, "learning_rate": 4.128439834912241e-05, "loss": 1.4152, "step": 18335 }, { "epoch": 5.49, "grad_norm": 1.390557885169983, "learning_rate": 4.127994005306112e-05, "loss": 1.321, "step": 18340 }, { "epoch": 5.49, "grad_norm": 1.9957035779953003, "learning_rate": 4.127548085787566e-05, "loss": 1.2822, "step": 18345 }, { "epoch": 5.49, "grad_norm": 3.40853214263916, "learning_rate": 4.127102076381231e-05, "loss": 1.159, "step": 18350 }, { "epoch": 5.49, "grad_norm": 1.8385875225067139, "learning_rate": 4.12665597711174e-05, "loss": 1.2222, "step": 18355 }, { "epoch": 5.49, "grad_norm": 2.742072582244873, "learning_rate": 4.12620978800373e-05, "loss": 1.1475, "step": 18360 }, { "epoch": 5.49, "grad_norm": 2.955695629119873, "learning_rate": 4.125763509081844e-05, "loss": 1.2988, "step": 18365 }, { "epoch": 5.5, "grad_norm": 2.316948652267456, "learning_rate": 4.125317140370729e-05, "loss": 1.1109, "step": 18370 }, { "epoch": 5.5, "grad_norm": 1.7137174606323242, "learning_rate": 4.1248706818950376e-05, "loss": 1.2146, "step": 18375 }, { "epoch": 5.5, "grad_norm": 3.3981897830963135, "learning_rate": 4.124424133679428e-05, "loss": 1.291, "step": 18380 }, { "epoch": 5.5, "grad_norm": 1.657471776008606, "learning_rate": 4.123977495748561e-05, "loss": 1.1274, "step": 18385 }, { "epoch": 5.5, "grad_norm": 1.293222427368164, "learning_rate": 4.123530768127105e-05, "loss": 1.0961, "step": 18390 }, { "epoch": 5.5, "grad_norm": 2.9801487922668457, "learning_rate": 4.123083950839733e-05, "loss": 1.1632, "step": 18395 }, { "epoch": 5.51, "grad_norm": 2.470231294631958, "learning_rate": 4.122637043911122e-05, "loss": 1.0898, "step": 18400 }, { "epoch": 5.51, "grad_norm": 1.0047528743743896, "learning_rate": 4.122190047365952e-05, "loss": 1.174, "step": 18405 }, { "epoch": 5.51, "grad_norm": 5.736430644989014, "learning_rate": 4.121742961228913e-05, "loss": 1.1775, "step": 18410 }, { "epoch": 5.51, "grad_norm": 1.3789207935333252, "learning_rate": 4.121295785524696e-05, "loss": 1.2466, "step": 18415 }, { "epoch": 5.51, "grad_norm": 1.9205785989761353, "learning_rate": 4.120848520277998e-05, "loss": 1.0881, "step": 18420 }, { "epoch": 5.51, "grad_norm": 2.249664783477783, "learning_rate": 4.12040116551352e-05, "loss": 1.3297, "step": 18425 }, { "epoch": 5.51, "grad_norm": 1.7185654640197754, "learning_rate": 4.1199537212559705e-05, "loss": 1.1238, "step": 18430 }, { "epoch": 5.52, "grad_norm": 3.920592784881592, "learning_rate": 4.119506187530061e-05, "loss": 1.1606, "step": 18435 }, { "epoch": 5.52, "grad_norm": 1.8103728294372559, "learning_rate": 4.119058564360509e-05, "loss": 1.3541, "step": 18440 }, { "epoch": 5.52, "grad_norm": 2.7962160110473633, "learning_rate": 4.1186108517720344e-05, "loss": 1.1774, "step": 18445 }, { "epoch": 5.52, "grad_norm": 4.4177350997924805, "learning_rate": 4.1181630497893645e-05, "loss": 1.1272, "step": 18450 }, { "epoch": 5.52, "grad_norm": 1.938101887702942, "learning_rate": 4.117715158437232e-05, "loss": 1.1472, "step": 18455 }, { "epoch": 5.52, "grad_norm": 1.0241434574127197, "learning_rate": 4.117267177740373e-05, "loss": 1.2322, "step": 18460 }, { "epoch": 5.52, "grad_norm": 1.9051687717437744, "learning_rate": 4.116819107723529e-05, "loss": 1.1203, "step": 18465 }, { "epoch": 5.53, "grad_norm": 0.9832600951194763, "learning_rate": 4.1163709484114456e-05, "loss": 1.1336, "step": 18470 }, { "epoch": 5.53, "grad_norm": 1.4782296419143677, "learning_rate": 4.1159226998288754e-05, "loss": 1.3825, "step": 18475 }, { "epoch": 5.53, "grad_norm": 2.1868507862091064, "learning_rate": 4.1154743620005734e-05, "loss": 1.3936, "step": 18480 }, { "epoch": 5.53, "grad_norm": 1.0652060508728027, "learning_rate": 4.115025934951302e-05, "loss": 1.0603, "step": 18485 }, { "epoch": 5.53, "grad_norm": 1.29878568649292, "learning_rate": 4.1145774187058265e-05, "loss": 1.1407, "step": 18490 }, { "epoch": 5.53, "grad_norm": 1.3626949787139893, "learning_rate": 4.114128813288919e-05, "loss": 1.1889, "step": 18495 }, { "epoch": 5.53, "grad_norm": 1.5440853834152222, "learning_rate": 4.113680118725355e-05, "loss": 1.333, "step": 18500 }, { "epoch": 5.54, "grad_norm": 4.131174564361572, "learning_rate": 4.1132313350399155e-05, "loss": 1.0847, "step": 18505 }, { "epoch": 5.54, "grad_norm": 2.0806546211242676, "learning_rate": 4.112782462257386e-05, "loss": 1.3398, "step": 18510 }, { "epoch": 5.54, "grad_norm": 2.146475076675415, "learning_rate": 4.112333500402558e-05, "loss": 1.1576, "step": 18515 }, { "epoch": 5.54, "grad_norm": 2.3793232440948486, "learning_rate": 4.111884449500225e-05, "loss": 1.2433, "step": 18520 }, { "epoch": 5.54, "grad_norm": 1.008089303970337, "learning_rate": 4.11143530957519e-05, "loss": 1.2024, "step": 18525 }, { "epoch": 5.54, "grad_norm": 2.3187623023986816, "learning_rate": 4.110986080652259e-05, "loss": 1.1646, "step": 18530 }, { "epoch": 5.55, "grad_norm": 2.2866110801696777, "learning_rate": 4.1105367627562405e-05, "loss": 1.2074, "step": 18535 }, { "epoch": 5.55, "grad_norm": 3.7783994674682617, "learning_rate": 4.110087355911951e-05, "loss": 1.1768, "step": 18540 }, { "epoch": 5.55, "grad_norm": 3.170124053955078, "learning_rate": 4.1096378601442095e-05, "loss": 1.0867, "step": 18545 }, { "epoch": 5.55, "grad_norm": 1.4972915649414062, "learning_rate": 4.109188275477843e-05, "loss": 1.0619, "step": 18550 }, { "epoch": 5.55, "grad_norm": 3.528488874435425, "learning_rate": 4.1087386019376804e-05, "loss": 1.3686, "step": 18555 }, { "epoch": 5.55, "grad_norm": 2.188445806503296, "learning_rate": 4.108288839548557e-05, "loss": 1.2527, "step": 18560 }, { "epoch": 5.55, "grad_norm": 1.9165037870407104, "learning_rate": 4.107838988335313e-05, "loss": 1.3966, "step": 18565 }, { "epoch": 5.56, "grad_norm": 3.255675792694092, "learning_rate": 4.1073890483227925e-05, "loss": 1.1216, "step": 18570 }, { "epoch": 5.56, "grad_norm": 1.5940176248550415, "learning_rate": 4.106939019535846e-05, "loss": 1.2242, "step": 18575 }, { "epoch": 5.56, "grad_norm": 0.7701283097267151, "learning_rate": 4.106488901999328e-05, "loss": 1.3358, "step": 18580 }, { "epoch": 5.56, "grad_norm": 1.4922525882720947, "learning_rate": 4.106038695738097e-05, "loss": 1.1585, "step": 18585 }, { "epoch": 5.56, "grad_norm": 3.2928543090820312, "learning_rate": 4.105588400777018e-05, "loss": 1.1905, "step": 18590 }, { "epoch": 5.56, "grad_norm": 2.7126290798187256, "learning_rate": 4.1051380171409616e-05, "loss": 1.2003, "step": 18595 }, { "epoch": 5.56, "grad_norm": 3.020399332046509, "learning_rate": 4.104687544854801e-05, "loss": 1.1377, "step": 18600 }, { "epoch": 5.57, "grad_norm": 0.8808854222297668, "learning_rate": 4.104236983943415e-05, "loss": 1.2103, "step": 18605 }, { "epoch": 5.57, "grad_norm": 4.727932929992676, "learning_rate": 4.1037863344316875e-05, "loss": 1.1696, "step": 18610 }, { "epoch": 5.57, "grad_norm": 6.011281967163086, "learning_rate": 4.103335596344508e-05, "loss": 1.199, "step": 18615 }, { "epoch": 5.57, "grad_norm": 1.4836233854293823, "learning_rate": 4.10288476970677e-05, "loss": 1.1668, "step": 18620 }, { "epoch": 5.57, "grad_norm": 1.927235722541809, "learning_rate": 4.1024338545433724e-05, "loss": 1.2543, "step": 18625 }, { "epoch": 5.57, "grad_norm": 3.635470390319824, "learning_rate": 4.101982850879218e-05, "loss": 1.0165, "step": 18630 }, { "epoch": 5.58, "grad_norm": 2.8242850303649902, "learning_rate": 4.101531758739217e-05, "loss": 1.3089, "step": 18635 }, { "epoch": 5.58, "grad_norm": 2.512054920196533, "learning_rate": 4.101080578148281e-05, "loss": 1.1262, "step": 18640 }, { "epoch": 5.58, "grad_norm": 2.9997546672821045, "learning_rate": 4.10062930913133e-05, "loss": 1.2634, "step": 18645 }, { "epoch": 5.58, "grad_norm": 2.239625930786133, "learning_rate": 4.1001779517132846e-05, "loss": 1.1789, "step": 18650 }, { "epoch": 5.58, "grad_norm": 1.2925533056259155, "learning_rate": 4.099726505919075e-05, "loss": 1.3636, "step": 18655 }, { "epoch": 5.58, "grad_norm": 2.2485687732696533, "learning_rate": 4.099274971773632e-05, "loss": 1.1971, "step": 18660 }, { "epoch": 5.58, "grad_norm": 1.4676896333694458, "learning_rate": 4.098823349301896e-05, "loss": 1.0614, "step": 18665 }, { "epoch": 5.59, "grad_norm": 1.4249932765960693, "learning_rate": 4.0983716385288083e-05, "loss": 0.9559, "step": 18670 }, { "epoch": 5.59, "grad_norm": 1.0708461999893188, "learning_rate": 4.097919839479316e-05, "loss": 1.2269, "step": 18675 }, { "epoch": 5.59, "grad_norm": 2.775709390640259, "learning_rate": 4.097467952178372e-05, "loss": 1.1256, "step": 18680 }, { "epoch": 5.59, "grad_norm": 3.5211985111236572, "learning_rate": 4.097015976650934e-05, "loss": 1.3445, "step": 18685 }, { "epoch": 5.59, "grad_norm": 4.323126316070557, "learning_rate": 4.0965639129219626e-05, "loss": 1.3169, "step": 18690 }, { "epoch": 5.59, "grad_norm": 1.8097007274627686, "learning_rate": 4.096111761016426e-05, "loss": 1.219, "step": 18695 }, { "epoch": 5.59, "grad_norm": 1.6146286725997925, "learning_rate": 4.095659520959297e-05, "loss": 1.3955, "step": 18700 }, { "epoch": 5.6, "grad_norm": 2.256901741027832, "learning_rate": 4.09520719277555e-05, "loss": 0.9899, "step": 18705 }, { "epoch": 5.6, "grad_norm": 2.670027494430542, "learning_rate": 4.094754776490168e-05, "loss": 1.2336, "step": 18710 }, { "epoch": 5.6, "grad_norm": 1.1742836236953735, "learning_rate": 4.094302272128138e-05, "loss": 1.3383, "step": 18715 }, { "epoch": 5.6, "grad_norm": 1.9772650003433228, "learning_rate": 4.0938496797144506e-05, "loss": 1.2239, "step": 18720 }, { "epoch": 5.6, "grad_norm": 1.8488539457321167, "learning_rate": 4.093396999274102e-05, "loss": 1.0871, "step": 18725 }, { "epoch": 5.6, "grad_norm": 3.4919984340667725, "learning_rate": 4.092944230832093e-05, "loss": 1.2676, "step": 18730 }, { "epoch": 5.61, "grad_norm": 1.2979923486709595, "learning_rate": 4.092491374413431e-05, "loss": 1.2437, "step": 18735 }, { "epoch": 5.61, "grad_norm": 1.2796027660369873, "learning_rate": 4.092038430043125e-05, "loss": 1.1508, "step": 18740 }, { "epoch": 5.61, "grad_norm": 1.175561547279358, "learning_rate": 4.09158539774619e-05, "loss": 1.1637, "step": 18745 }, { "epoch": 5.61, "grad_norm": 3.259338140487671, "learning_rate": 4.0911322775476494e-05, "loss": 1.3101, "step": 18750 }, { "epoch": 5.61, "grad_norm": 1.183301568031311, "learning_rate": 4.0906790694725275e-05, "loss": 1.1849, "step": 18755 }, { "epoch": 5.61, "grad_norm": 3.8314900398254395, "learning_rate": 4.090225773545853e-05, "loss": 1.2224, "step": 18760 }, { "epoch": 5.61, "grad_norm": 1.5235013961791992, "learning_rate": 4.089772389792662e-05, "loss": 1.3048, "step": 18765 }, { "epoch": 5.62, "grad_norm": 2.4287803173065186, "learning_rate": 4.089318918237994e-05, "loss": 1.1388, "step": 18770 }, { "epoch": 5.62, "grad_norm": 1.4137362241744995, "learning_rate": 4.0888653589068946e-05, "loss": 1.2584, "step": 18775 }, { "epoch": 5.62, "grad_norm": 1.061883807182312, "learning_rate": 4.0884117118244136e-05, "loss": 1.3847, "step": 18780 }, { "epoch": 5.62, "grad_norm": 3.9546823501586914, "learning_rate": 4.087957977015604e-05, "loss": 1.2478, "step": 18785 }, { "epoch": 5.62, "grad_norm": 1.4414864778518677, "learning_rate": 4.087504154505526e-05, "loss": 1.3395, "step": 18790 }, { "epoch": 5.62, "grad_norm": 1.690242052078247, "learning_rate": 4.0870502443192446e-05, "loss": 1.0451, "step": 18795 }, { "epoch": 5.62, "grad_norm": 2.187337636947632, "learning_rate": 4.086596246481826e-05, "loss": 1.1884, "step": 18800 }, { "epoch": 5.63, "grad_norm": 2.2674190998077393, "learning_rate": 4.086142161018347e-05, "loss": 1.323, "step": 18805 }, { "epoch": 5.63, "grad_norm": 2.5989344120025635, "learning_rate": 4.0856879879538854e-05, "loss": 1.1205, "step": 18810 }, { "epoch": 5.63, "grad_norm": 1.1521856784820557, "learning_rate": 4.085233727313524e-05, "loss": 1.2544, "step": 18815 }, { "epoch": 5.63, "grad_norm": 3.605713367462158, "learning_rate": 4.084779379122352e-05, "loss": 1.2099, "step": 18820 }, { "epoch": 5.63, "grad_norm": 1.6035972833633423, "learning_rate": 4.0843249434054624e-05, "loss": 1.3516, "step": 18825 }, { "epoch": 5.63, "grad_norm": 1.7690247297286987, "learning_rate": 4.083870420187953e-05, "loss": 1.2547, "step": 18830 }, { "epoch": 5.64, "grad_norm": 1.6774495840072632, "learning_rate": 4.083415809494926e-05, "loss": 1.1669, "step": 18835 }, { "epoch": 5.64, "grad_norm": 2.1360936164855957, "learning_rate": 4.08296111135149e-05, "loss": 1.1833, "step": 18840 }, { "epoch": 5.64, "grad_norm": 3.100994110107422, "learning_rate": 4.082506325782757e-05, "loss": 1.1586, "step": 18845 }, { "epoch": 5.64, "grad_norm": 2.5224337577819824, "learning_rate": 4.0820514528138444e-05, "loss": 1.2535, "step": 18850 }, { "epoch": 5.64, "grad_norm": 1.2927287817001343, "learning_rate": 4.0815964924698745e-05, "loss": 1.1492, "step": 18855 }, { "epoch": 5.64, "grad_norm": 1.2006562948226929, "learning_rate": 4.0811414447759755e-05, "loss": 1.1727, "step": 18860 }, { "epoch": 5.64, "grad_norm": 2.254915475845337, "learning_rate": 4.080686309757277e-05, "loss": 1.2347, "step": 18865 }, { "epoch": 5.65, "grad_norm": 1.6122604608535767, "learning_rate": 4.080231087438916e-05, "loss": 1.0974, "step": 18870 }, { "epoch": 5.65, "grad_norm": 2.5616037845611572, "learning_rate": 4.079775777846036e-05, "loss": 1.1608, "step": 18875 }, { "epoch": 5.65, "grad_norm": 4.505390644073486, "learning_rate": 4.07932038100378e-05, "loss": 1.1997, "step": 18880 }, { "epoch": 5.65, "grad_norm": 3.1718831062316895, "learning_rate": 4.0788648969373035e-05, "loss": 1.2127, "step": 18885 }, { "epoch": 5.65, "grad_norm": 2.7245662212371826, "learning_rate": 4.078409325671758e-05, "loss": 1.2764, "step": 18890 }, { "epoch": 5.65, "grad_norm": 0.9645751118659973, "learning_rate": 4.0779536672323074e-05, "loss": 1.2709, "step": 18895 }, { "epoch": 5.65, "grad_norm": 2.0981924533843994, "learning_rate": 4.077497921644115e-05, "loss": 1.2451, "step": 18900 }, { "epoch": 5.66, "grad_norm": 1.6078838109970093, "learning_rate": 4.077042088932352e-05, "loss": 1.1987, "step": 18905 }, { "epoch": 5.66, "grad_norm": 3.138634443283081, "learning_rate": 4.0765861691221943e-05, "loss": 1.2224, "step": 18910 }, { "epoch": 5.66, "grad_norm": 1.9491277933120728, "learning_rate": 4.0761301622388204e-05, "loss": 1.0712, "step": 18915 }, { "epoch": 5.66, "grad_norm": 1.6134378910064697, "learning_rate": 4.075674068307417e-05, "loss": 1.2271, "step": 18920 }, { "epoch": 5.66, "grad_norm": 4.156822681427002, "learning_rate": 4.075217887353172e-05, "loss": 1.2066, "step": 18925 }, { "epoch": 5.66, "grad_norm": 2.2299163341522217, "learning_rate": 4.074761619401281e-05, "loss": 1.2968, "step": 18930 }, { "epoch": 5.67, "grad_norm": 1.9773372411727905, "learning_rate": 4.0743052644769416e-05, "loss": 1.1542, "step": 18935 }, { "epoch": 5.67, "grad_norm": 1.7787487506866455, "learning_rate": 4.0738488226053595e-05, "loss": 1.2944, "step": 18940 }, { "epoch": 5.67, "grad_norm": 1.2517333030700684, "learning_rate": 4.073392293811742e-05, "loss": 1.2636, "step": 18945 }, { "epoch": 5.67, "grad_norm": 2.801917314529419, "learning_rate": 4.072935678121305e-05, "loss": 1.2396, "step": 18950 }, { "epoch": 5.67, "grad_norm": 2.199772357940674, "learning_rate": 4.0724789755592654e-05, "loss": 1.3366, "step": 18955 }, { "epoch": 5.67, "grad_norm": 1.5385900735855103, "learning_rate": 4.072022186150846e-05, "loss": 1.1959, "step": 18960 }, { "epoch": 5.67, "grad_norm": 1.959558367729187, "learning_rate": 4.0715653099212744e-05, "loss": 1.1745, "step": 18965 }, { "epoch": 5.68, "grad_norm": 4.36882209777832, "learning_rate": 4.071108346895786e-05, "loss": 1.1226, "step": 18970 }, { "epoch": 5.68, "grad_norm": 3.006443738937378, "learning_rate": 4.0706512970996145e-05, "loss": 1.1088, "step": 18975 }, { "epoch": 5.68, "grad_norm": 1.4092053174972534, "learning_rate": 4.070194160558006e-05, "loss": 1.2145, "step": 18980 }, { "epoch": 5.68, "grad_norm": 2.4782750606536865, "learning_rate": 4.069736937296206e-05, "loss": 1.2773, "step": 18985 }, { "epoch": 5.68, "grad_norm": 2.681567430496216, "learning_rate": 4.069279627339466e-05, "loss": 1.2458, "step": 18990 }, { "epoch": 5.68, "grad_norm": 2.1898365020751953, "learning_rate": 4.068822230713044e-05, "loss": 1.2287, "step": 18995 }, { "epoch": 5.68, "grad_norm": 1.1778751611709595, "learning_rate": 4.068364747442201e-05, "loss": 1.3985, "step": 19000 }, { "epoch": 5.69, "grad_norm": 2.775921106338501, "learning_rate": 4.0679071775522024e-05, "loss": 1.3502, "step": 19005 }, { "epoch": 5.69, "grad_norm": 0.9329620003700256, "learning_rate": 4.0674495210683214e-05, "loss": 1.0845, "step": 19010 }, { "epoch": 5.69, "grad_norm": 2.163052797317505, "learning_rate": 4.0669917780158315e-05, "loss": 1.1712, "step": 19015 }, { "epoch": 5.69, "grad_norm": 1.6568050384521484, "learning_rate": 4.066533948420015e-05, "loss": 1.3567, "step": 19020 }, { "epoch": 5.69, "grad_norm": 1.2599117755889893, "learning_rate": 4.0660760323061564e-05, "loss": 1.4662, "step": 19025 }, { "epoch": 5.69, "grad_norm": 1.5358624458312988, "learning_rate": 4.065618029699547e-05, "loss": 1.372, "step": 19030 }, { "epoch": 5.7, "grad_norm": 2.070814609527588, "learning_rate": 4.06515994062548e-05, "loss": 1.2393, "step": 19035 }, { "epoch": 5.7, "grad_norm": 1.2835952043533325, "learning_rate": 4.0647017651092575e-05, "loss": 1.1488, "step": 19040 }, { "epoch": 5.7, "grad_norm": 3.1609346866607666, "learning_rate": 4.064243503176183e-05, "loss": 1.2701, "step": 19045 }, { "epoch": 5.7, "grad_norm": 4.8114471435546875, "learning_rate": 4.063785154851565e-05, "loss": 1.1842, "step": 19050 }, { "epoch": 5.7, "grad_norm": 2.764596462249756, "learning_rate": 4.0633267201607197e-05, "loss": 1.1671, "step": 19055 }, { "epoch": 5.7, "grad_norm": 1.5256291627883911, "learning_rate": 4.062868199128964e-05, "loss": 1.2943, "step": 19060 }, { "epoch": 5.7, "grad_norm": 3.720947504043579, "learning_rate": 4.062409591781622e-05, "loss": 1.2156, "step": 19065 }, { "epoch": 5.71, "grad_norm": 1.2810076475143433, "learning_rate": 4.061950898144021e-05, "loss": 1.1736, "step": 19070 }, { "epoch": 5.71, "grad_norm": 1.708155632019043, "learning_rate": 4.061492118241497e-05, "loss": 1.2937, "step": 19075 }, { "epoch": 5.71, "grad_norm": 2.4375946521759033, "learning_rate": 4.0610332520993866e-05, "loss": 1.2401, "step": 19080 }, { "epoch": 5.71, "grad_norm": 2.5409374237060547, "learning_rate": 4.060574299743032e-05, "loss": 1.1797, "step": 19085 }, { "epoch": 5.71, "grad_norm": 1.2663114070892334, "learning_rate": 4.0601152611977797e-05, "loss": 1.3379, "step": 19090 }, { "epoch": 5.71, "grad_norm": 1.4234334230422974, "learning_rate": 4.059656136488985e-05, "loss": 1.2776, "step": 19095 }, { "epoch": 5.71, "grad_norm": 2.477080821990967, "learning_rate": 4.059196925642002e-05, "loss": 1.1275, "step": 19100 }, { "epoch": 5.72, "grad_norm": 2.8818910121917725, "learning_rate": 4.058737628682193e-05, "loss": 1.2485, "step": 19105 }, { "epoch": 5.72, "grad_norm": 1.8685482740402222, "learning_rate": 4.0582782456349264e-05, "loss": 1.1152, "step": 19110 }, { "epoch": 5.72, "grad_norm": 2.1116809844970703, "learning_rate": 4.057818776525571e-05, "loss": 1.2397, "step": 19115 }, { "epoch": 5.72, "grad_norm": 3.2058467864990234, "learning_rate": 4.0573592213795045e-05, "loss": 1.3506, "step": 19120 }, { "epoch": 5.72, "grad_norm": 2.4281482696533203, "learning_rate": 4.0568995802221066e-05, "loss": 1.0978, "step": 19125 }, { "epoch": 5.72, "grad_norm": 3.1970787048339844, "learning_rate": 4.056439853078763e-05, "loss": 1.1549, "step": 19130 }, { "epoch": 5.72, "grad_norm": 4.877747535705566, "learning_rate": 4.0559800399748645e-05, "loss": 1.186, "step": 19135 }, { "epoch": 5.73, "grad_norm": 1.5900225639343262, "learning_rate": 4.055520140935806e-05, "loss": 1.2458, "step": 19140 }, { "epoch": 5.73, "grad_norm": 2.55224871635437, "learning_rate": 4.055060155986986e-05, "loss": 1.2647, "step": 19145 }, { "epoch": 5.73, "grad_norm": 1.5223407745361328, "learning_rate": 4.054600085153811e-05, "loss": 1.2311, "step": 19150 }, { "epoch": 5.73, "grad_norm": 1.986936330795288, "learning_rate": 4.054139928461689e-05, "loss": 1.1517, "step": 19155 }, { "epoch": 5.73, "grad_norm": 2.5802133083343506, "learning_rate": 4.0536796859360336e-05, "loss": 1.2503, "step": 19160 }, { "epoch": 5.73, "grad_norm": 1.5326321125030518, "learning_rate": 4.053219357602265e-05, "loss": 1.2379, "step": 19165 }, { "epoch": 5.74, "grad_norm": 2.684556484222412, "learning_rate": 4.0527589434858046e-05, "loss": 1.2704, "step": 19170 }, { "epoch": 5.74, "grad_norm": 1.2292155027389526, "learning_rate": 4.0522984436120826e-05, "loss": 1.4196, "step": 19175 }, { "epoch": 5.74, "grad_norm": 3.3510990142822266, "learning_rate": 4.051837858006531e-05, "loss": 1.0128, "step": 19180 }, { "epoch": 5.74, "grad_norm": 2.779120922088623, "learning_rate": 4.051377186694588e-05, "loss": 1.3174, "step": 19185 }, { "epoch": 5.74, "grad_norm": 2.3590750694274902, "learning_rate": 4.0509164297016944e-05, "loss": 1.2303, "step": 19190 }, { "epoch": 5.74, "grad_norm": 1.3253639936447144, "learning_rate": 4.050455587053299e-05, "loss": 1.3627, "step": 19195 }, { "epoch": 5.74, "grad_norm": 2.0595991611480713, "learning_rate": 4.049994658774853e-05, "loss": 1.2418, "step": 19200 }, { "epoch": 5.75, "grad_norm": 1.3671808242797852, "learning_rate": 4.0495336448918135e-05, "loss": 1.2585, "step": 19205 }, { "epoch": 5.75, "grad_norm": 1.1783967018127441, "learning_rate": 4.0490725454296414e-05, "loss": 1.1087, "step": 19210 }, { "epoch": 5.75, "grad_norm": 1.43436598777771, "learning_rate": 4.048611360413803e-05, "loss": 1.0994, "step": 19215 }, { "epoch": 5.75, "grad_norm": 2.8289105892181396, "learning_rate": 4.048150089869768e-05, "loss": 1.2857, "step": 19220 }, { "epoch": 5.75, "grad_norm": 2.906419277191162, "learning_rate": 4.047688733823013e-05, "loss": 1.3255, "step": 19225 }, { "epoch": 5.75, "grad_norm": 1.00028395652771, "learning_rate": 4.0472272922990185e-05, "loss": 1.3454, "step": 19230 }, { "epoch": 5.75, "grad_norm": 1.3520787954330444, "learning_rate": 4.046765765323269e-05, "loss": 1.2744, "step": 19235 }, { "epoch": 5.76, "grad_norm": 3.6939780712127686, "learning_rate": 4.046304152921253e-05, "loss": 1.277, "step": 19240 }, { "epoch": 5.76, "grad_norm": 1.3129464387893677, "learning_rate": 4.045842455118467e-05, "loss": 1.2569, "step": 19245 }, { "epoch": 5.76, "grad_norm": 1.3802201747894287, "learning_rate": 4.045380671940409e-05, "loss": 1.3112, "step": 19250 }, { "epoch": 5.76, "grad_norm": 2.282783031463623, "learning_rate": 4.0449188034125825e-05, "loss": 1.1128, "step": 19255 }, { "epoch": 5.76, "grad_norm": 2.5896713733673096, "learning_rate": 4.044456849560496e-05, "loss": 1.2365, "step": 19260 }, { "epoch": 5.76, "grad_norm": 1.469196081161499, "learning_rate": 4.043994810409664e-05, "loss": 1.2037, "step": 19265 }, { "epoch": 5.77, "grad_norm": 2.223722457885742, "learning_rate": 4.043532685985602e-05, "loss": 1.377, "step": 19270 }, { "epoch": 5.77, "grad_norm": 1.6373085975646973, "learning_rate": 4.043070476313835e-05, "loss": 1.1027, "step": 19275 }, { "epoch": 5.77, "grad_norm": 1.401114821434021, "learning_rate": 4.0426081814198905e-05, "loss": 1.148, "step": 19280 }, { "epoch": 5.77, "grad_norm": 1.4653375148773193, "learning_rate": 4.042145801329298e-05, "loss": 1.3143, "step": 19285 }, { "epoch": 5.77, "grad_norm": 1.659494161605835, "learning_rate": 4.0416833360675966e-05, "loss": 1.2026, "step": 19290 }, { "epoch": 5.77, "grad_norm": 2.820892810821533, "learning_rate": 4.0412207856603266e-05, "loss": 1.1208, "step": 19295 }, { "epoch": 5.77, "grad_norm": 2.835115909576416, "learning_rate": 4.040758150133035e-05, "loss": 1.2334, "step": 19300 }, { "epoch": 5.78, "grad_norm": 1.002402901649475, "learning_rate": 4.040295429511273e-05, "loss": 1.2697, "step": 19305 }, { "epoch": 5.78, "grad_norm": 1.9748258590698242, "learning_rate": 4.0398326238205946e-05, "loss": 1.3538, "step": 19310 }, { "epoch": 5.78, "grad_norm": 1.5948344469070435, "learning_rate": 4.039369733086561e-05, "loss": 1.2056, "step": 19315 }, { "epoch": 5.78, "grad_norm": 5.312704563140869, "learning_rate": 4.038906757334737e-05, "loss": 1.2161, "step": 19320 }, { "epoch": 5.78, "grad_norm": 2.936213970184326, "learning_rate": 4.0384436965906924e-05, "loss": 1.1106, "step": 19325 }, { "epoch": 5.78, "grad_norm": 2.379530429840088, "learning_rate": 4.037980550880002e-05, "loss": 1.2153, "step": 19330 }, { "epoch": 5.78, "grad_norm": 3.4053995609283447, "learning_rate": 4.0375173202282444e-05, "loss": 1.1903, "step": 19335 }, { "epoch": 5.79, "grad_norm": 2.1918978691101074, "learning_rate": 4.0370540046610026e-05, "loss": 1.1617, "step": 19340 }, { "epoch": 5.79, "grad_norm": 1.1753573417663574, "learning_rate": 4.036590604203867e-05, "loss": 1.141, "step": 19345 }, { "epoch": 5.79, "grad_norm": 2.638122081756592, "learning_rate": 4.036127118882429e-05, "loss": 1.0728, "step": 19350 }, { "epoch": 5.79, "grad_norm": 1.511003017425537, "learning_rate": 4.035663548722287e-05, "loss": 1.3012, "step": 19355 }, { "epoch": 5.79, "grad_norm": 1.878172516822815, "learning_rate": 4.035199893749043e-05, "loss": 1.2585, "step": 19360 }, { "epoch": 5.79, "grad_norm": 1.5874683856964111, "learning_rate": 4.0347361539883045e-05, "loss": 1.1257, "step": 19365 }, { "epoch": 5.8, "grad_norm": 2.5368497371673584, "learning_rate": 4.034272329465684e-05, "loss": 1.277, "step": 19370 }, { "epoch": 5.8, "grad_norm": 1.9644641876220703, "learning_rate": 4.033808420206798e-05, "loss": 1.267, "step": 19375 }, { "epoch": 5.8, "grad_norm": 3.0246572494506836, "learning_rate": 4.0333444262372666e-05, "loss": 1.1498, "step": 19380 }, { "epoch": 5.8, "grad_norm": 2.288479804992676, "learning_rate": 4.032880347582716e-05, "loss": 1.2349, "step": 19385 }, { "epoch": 5.8, "grad_norm": 1.7660423517227173, "learning_rate": 4.032416184268778e-05, "loss": 1.2463, "step": 19390 }, { "epoch": 5.8, "grad_norm": 2.469409465789795, "learning_rate": 4.031951936321086e-05, "loss": 1.2732, "step": 19395 }, { "epoch": 5.8, "grad_norm": 2.5984413623809814, "learning_rate": 4.0314876037652814e-05, "loss": 1.1642, "step": 19400 }, { "epoch": 5.81, "grad_norm": 4.0700225830078125, "learning_rate": 4.0310231866270086e-05, "loss": 1.1763, "step": 19405 }, { "epoch": 5.81, "grad_norm": 1.4601423740386963, "learning_rate": 4.030558684931917e-05, "loss": 1.1787, "step": 19410 }, { "epoch": 5.81, "grad_norm": 2.451631546020508, "learning_rate": 4.0300940987056596e-05, "loss": 1.0628, "step": 19415 }, { "epoch": 5.81, "grad_norm": 2.339339017868042, "learning_rate": 4.029629427973895e-05, "loss": 1.1614, "step": 19420 }, { "epoch": 5.81, "grad_norm": 2.694387674331665, "learning_rate": 4.0291646727622875e-05, "loss": 1.1025, "step": 19425 }, { "epoch": 5.81, "grad_norm": 3.171658754348755, "learning_rate": 4.0286998330965056e-05, "loss": 1.2235, "step": 19430 }, { "epoch": 5.81, "grad_norm": 3.675020694732666, "learning_rate": 4.02823490900222e-05, "loss": 1.2227, "step": 19435 }, { "epoch": 5.82, "grad_norm": 1.9126026630401611, "learning_rate": 4.027769900505109e-05, "loss": 1.0964, "step": 19440 }, { "epoch": 5.82, "grad_norm": 2.5049381256103516, "learning_rate": 4.027304807630854e-05, "loss": 1.3127, "step": 19445 }, { "epoch": 5.82, "grad_norm": 1.2870092391967773, "learning_rate": 4.0268396304051426e-05, "loss": 1.3275, "step": 19450 }, { "epoch": 5.82, "grad_norm": 2.4361467361450195, "learning_rate": 4.026374368853665e-05, "loss": 1.0307, "step": 19455 }, { "epoch": 5.82, "grad_norm": 2.4936602115631104, "learning_rate": 4.025909023002118e-05, "loss": 1.176, "step": 19460 }, { "epoch": 5.82, "grad_norm": 3.3425686359405518, "learning_rate": 4.025443592876201e-05, "loss": 1.0798, "step": 19465 }, { "epoch": 5.83, "grad_norm": 2.0376148223876953, "learning_rate": 4.024978078501621e-05, "loss": 1.1973, "step": 19470 }, { "epoch": 5.83, "grad_norm": 4.597989559173584, "learning_rate": 4.0245124799040864e-05, "loss": 1.3608, "step": 19475 }, { "epoch": 5.83, "grad_norm": 1.8406076431274414, "learning_rate": 4.024046797109312e-05, "loss": 1.2414, "step": 19480 }, { "epoch": 5.83, "grad_norm": 1.4122562408447266, "learning_rate": 4.023581030143018e-05, "loss": 1.2824, "step": 19485 }, { "epoch": 5.83, "grad_norm": 1.3209871053695679, "learning_rate": 4.023115179030926e-05, "loss": 1.2982, "step": 19490 }, { "epoch": 5.83, "grad_norm": 2.4503726959228516, "learning_rate": 4.0226492437987676e-05, "loss": 1.1107, "step": 19495 }, { "epoch": 5.83, "grad_norm": 1.1615526676177979, "learning_rate": 4.022183224472272e-05, "loss": 1.1436, "step": 19500 }, { "epoch": 5.84, "grad_norm": 2.6597816944122314, "learning_rate": 4.021717121077181e-05, "loss": 1.189, "step": 19505 }, { "epoch": 5.84, "grad_norm": 2.0554018020629883, "learning_rate": 4.0212509336392345e-05, "loss": 1.1992, "step": 19510 }, { "epoch": 5.84, "grad_norm": 2.7470314502716064, "learning_rate": 4.02078466218418e-05, "loss": 1.378, "step": 19515 }, { "epoch": 5.84, "grad_norm": 1.204832673072815, "learning_rate": 4.020318306737769e-05, "loss": 1.1576, "step": 19520 }, { "epoch": 5.84, "grad_norm": 1.8212392330169678, "learning_rate": 4.019851867325759e-05, "loss": 1.3113, "step": 19525 }, { "epoch": 5.84, "grad_norm": 2.319030523300171, "learning_rate": 4.01938534397391e-05, "loss": 1.313, "step": 19530 }, { "epoch": 5.84, "grad_norm": 1.1338316202163696, "learning_rate": 4.018918736707988e-05, "loss": 1.1278, "step": 19535 }, { "epoch": 5.85, "grad_norm": 1.2906646728515625, "learning_rate": 4.018452045553762e-05, "loss": 1.2246, "step": 19540 }, { "epoch": 5.85, "grad_norm": 3.1656301021575928, "learning_rate": 4.017985270537009e-05, "loss": 1.2851, "step": 19545 }, { "epoch": 5.85, "grad_norm": 1.0056490898132324, "learning_rate": 4.017518411683507e-05, "loss": 1.1831, "step": 19550 }, { "epoch": 5.85, "grad_norm": 1.4432793855667114, "learning_rate": 4.01705146901904e-05, "loss": 1.2026, "step": 19555 }, { "epoch": 5.85, "grad_norm": 2.5495402812957764, "learning_rate": 4.016584442569398e-05, "loss": 1.1733, "step": 19560 }, { "epoch": 5.85, "grad_norm": 4.6060099601745605, "learning_rate": 4.016117332360373e-05, "loss": 1.1664, "step": 19565 }, { "epoch": 5.86, "grad_norm": 1.2806930541992188, "learning_rate": 4.015650138417764e-05, "loss": 1.2543, "step": 19570 }, { "epoch": 5.86, "grad_norm": 1.6852878332138062, "learning_rate": 4.015182860767373e-05, "loss": 1.1064, "step": 19575 }, { "epoch": 5.86, "grad_norm": 1.5390417575836182, "learning_rate": 4.014715499435008e-05, "loss": 1.2544, "step": 19580 }, { "epoch": 5.86, "grad_norm": 2.724890947341919, "learning_rate": 4.01424805444648e-05, "loss": 1.1592, "step": 19585 }, { "epoch": 5.86, "grad_norm": 5.490366458892822, "learning_rate": 4.013780525827606e-05, "loss": 1.151, "step": 19590 }, { "epoch": 5.86, "grad_norm": 2.0615406036376953, "learning_rate": 4.0133129136042066e-05, "loss": 1.2411, "step": 19595 }, { "epoch": 5.86, "grad_norm": 5.847554683685303, "learning_rate": 4.012845217802109e-05, "loss": 1.2015, "step": 19600 }, { "epoch": 5.87, "grad_norm": 1.873935341835022, "learning_rate": 4.0123774384471425e-05, "loss": 1.2761, "step": 19605 }, { "epoch": 5.87, "grad_norm": 2.046846389770508, "learning_rate": 4.0119095755651414e-05, "loss": 1.2653, "step": 19610 }, { "epoch": 5.87, "grad_norm": 2.1344308853149414, "learning_rate": 4.011441629181946e-05, "loss": 1.3688, "step": 19615 }, { "epoch": 5.87, "grad_norm": 1.3133376836776733, "learning_rate": 4.010973599323401e-05, "loss": 1.3191, "step": 19620 }, { "epoch": 5.87, "grad_norm": 1.7745246887207031, "learning_rate": 4.010505486015354e-05, "loss": 1.1906, "step": 19625 }, { "epoch": 5.87, "grad_norm": 2.6860477924346924, "learning_rate": 4.010037289283659e-05, "loss": 1.1099, "step": 19630 }, { "epoch": 5.87, "grad_norm": 2.37652850151062, "learning_rate": 4.009569009154175e-05, "loss": 1.0423, "step": 19635 }, { "epoch": 5.88, "grad_norm": 1.3366721868515015, "learning_rate": 4.0091006456527634e-05, "loss": 1.3428, "step": 19640 }, { "epoch": 5.88, "grad_norm": 5.54878568649292, "learning_rate": 4.008632198805292e-05, "loss": 1.1666, "step": 19645 }, { "epoch": 5.88, "grad_norm": 0.9139274954795837, "learning_rate": 4.008163668637632e-05, "loss": 1.0742, "step": 19650 }, { "epoch": 5.88, "grad_norm": 1.8782241344451904, "learning_rate": 4.00769505517566e-05, "loss": 1.2777, "step": 19655 }, { "epoch": 5.88, "grad_norm": 1.8892041444778442, "learning_rate": 4.0072263584452576e-05, "loss": 1.3023, "step": 19660 }, { "epoch": 5.88, "grad_norm": 1.7364705801010132, "learning_rate": 4.0067575784723104e-05, "loss": 1.2377, "step": 19665 }, { "epoch": 5.89, "grad_norm": 0.6901882886886597, "learning_rate": 4.0062887152827075e-05, "loss": 1.2162, "step": 19670 }, { "epoch": 5.89, "grad_norm": 1.6840230226516724, "learning_rate": 4.005819768902346e-05, "loss": 1.3165, "step": 19675 }, { "epoch": 5.89, "grad_norm": 1.284379243850708, "learning_rate": 4.005350739357122e-05, "loss": 1.2768, "step": 19680 }, { "epoch": 5.89, "grad_norm": 1.735634446144104, "learning_rate": 4.004881626672943e-05, "loss": 1.1276, "step": 19685 }, { "epoch": 5.89, "grad_norm": 1.7231804132461548, "learning_rate": 4.004412430875715e-05, "loss": 1.2146, "step": 19690 }, { "epoch": 5.89, "grad_norm": 3.919288396835327, "learning_rate": 4.0039431519913525e-05, "loss": 1.1341, "step": 19695 }, { "epoch": 5.89, "grad_norm": 1.3154077529907227, "learning_rate": 4.0034737900457734e-05, "loss": 1.209, "step": 19700 }, { "epoch": 5.9, "grad_norm": 2.288764715194702, "learning_rate": 4.003004345064899e-05, "loss": 1.2333, "step": 19705 }, { "epoch": 5.9, "grad_norm": 1.616042137145996, "learning_rate": 4.002534817074657e-05, "loss": 1.2067, "step": 19710 }, { "epoch": 5.9, "grad_norm": 2.097806692123413, "learning_rate": 4.002065206100979e-05, "loss": 1.2761, "step": 19715 }, { "epoch": 5.9, "grad_norm": 4.625036716461182, "learning_rate": 4.001595512169801e-05, "loss": 1.1133, "step": 19720 }, { "epoch": 5.9, "grad_norm": 1.4965091943740845, "learning_rate": 4.001125735307063e-05, "loss": 1.2998, "step": 19725 }, { "epoch": 5.9, "grad_norm": 1.4583796262741089, "learning_rate": 4.000655875538712e-05, "loss": 1.1086, "step": 19730 }, { "epoch": 5.9, "grad_norm": 2.9411721229553223, "learning_rate": 4.000185932890697e-05, "loss": 1.2033, "step": 19735 }, { "epoch": 5.91, "grad_norm": 1.419301986694336, "learning_rate": 3.999715907388971e-05, "loss": 1.2032, "step": 19740 }, { "epoch": 5.91, "grad_norm": 1.0543030500411987, "learning_rate": 3.999245799059496e-05, "loss": 1.3008, "step": 19745 }, { "epoch": 5.91, "grad_norm": 4.684767723083496, "learning_rate": 3.998775607928232e-05, "loss": 1.3266, "step": 19750 }, { "epoch": 5.91, "grad_norm": 3.58650541305542, "learning_rate": 3.99830533402115e-05, "loss": 1.2931, "step": 19755 }, { "epoch": 5.91, "grad_norm": 1.2763997316360474, "learning_rate": 3.997834977364222e-05, "loss": 1.0974, "step": 19760 }, { "epoch": 5.91, "grad_norm": 2.0618629455566406, "learning_rate": 3.9973645379834255e-05, "loss": 1.1197, "step": 19765 }, { "epoch": 5.91, "grad_norm": 1.2482560873031616, "learning_rate": 3.9968940159047416e-05, "loss": 1.1536, "step": 19770 }, { "epoch": 5.92, "grad_norm": 1.5299739837646484, "learning_rate": 3.9964234111541567e-05, "loss": 1.2666, "step": 19775 }, { "epoch": 5.92, "grad_norm": 6.283170223236084, "learning_rate": 3.9959527237576624e-05, "loss": 1.2071, "step": 19780 }, { "epoch": 5.92, "grad_norm": 1.4811429977416992, "learning_rate": 3.995481953741254e-05, "loss": 1.1695, "step": 19785 }, { "epoch": 5.92, "grad_norm": 2.082146406173706, "learning_rate": 3.995011101130932e-05, "loss": 1.2389, "step": 19790 }, { "epoch": 5.92, "grad_norm": 1.7142033576965332, "learning_rate": 3.994540165952701e-05, "loss": 1.3164, "step": 19795 }, { "epoch": 5.92, "grad_norm": 1.3246126174926758, "learning_rate": 3.99406914823257e-05, "loss": 1.33, "step": 19800 }, { "epoch": 5.93, "grad_norm": 3.4477603435516357, "learning_rate": 3.993598047996553e-05, "loss": 1.1553, "step": 19805 }, { "epoch": 5.93, "grad_norm": 1.8627071380615234, "learning_rate": 3.9931268652706676e-05, "loss": 1.1348, "step": 19810 }, { "epoch": 5.93, "grad_norm": 3.652338981628418, "learning_rate": 3.992655600080938e-05, "loss": 1.03, "step": 19815 }, { "epoch": 5.93, "grad_norm": 2.0452940464019775, "learning_rate": 3.992184252453392e-05, "loss": 1.0227, "step": 19820 }, { "epoch": 5.93, "grad_norm": 2.3490209579467773, "learning_rate": 3.99171282241406e-05, "loss": 1.2441, "step": 19825 }, { "epoch": 5.93, "grad_norm": 1.5200634002685547, "learning_rate": 3.991241309988979e-05, "loss": 1.2541, "step": 19830 }, { "epoch": 5.93, "grad_norm": 2.570288896560669, "learning_rate": 3.9907697152041915e-05, "loss": 1.3228, "step": 19835 }, { "epoch": 5.94, "grad_norm": 1.5330473184585571, "learning_rate": 3.9902980380857414e-05, "loss": 1.2349, "step": 19840 }, { "epoch": 5.94, "grad_norm": 1.0376076698303223, "learning_rate": 3.9898262786596794e-05, "loss": 1.1803, "step": 19845 }, { "epoch": 5.94, "grad_norm": 1.4914852380752563, "learning_rate": 3.989354436952061e-05, "loss": 1.1023, "step": 19850 }, { "epoch": 5.94, "grad_norm": 1.438751220703125, "learning_rate": 3.988882512988945e-05, "loss": 1.2842, "step": 19855 }, { "epoch": 5.94, "grad_norm": 2.238565683364868, "learning_rate": 3.988410506796396e-05, "loss": 1.2185, "step": 19860 }, { "epoch": 5.94, "grad_norm": 5.389926910400391, "learning_rate": 3.987938418400482e-05, "loss": 1.1369, "step": 19865 }, { "epoch": 5.94, "grad_norm": 4.195040702819824, "learning_rate": 3.987466247827275e-05, "loss": 1.1121, "step": 19870 }, { "epoch": 5.95, "grad_norm": 1.3536615371704102, "learning_rate": 3.986993995102853e-05, "loss": 1.3349, "step": 19875 }, { "epoch": 5.95, "grad_norm": 1.4799764156341553, "learning_rate": 3.9865216602532994e-05, "loss": 1.2105, "step": 19880 }, { "epoch": 5.95, "grad_norm": 1.5953404903411865, "learning_rate": 3.986049243304699e-05, "loss": 1.1709, "step": 19885 }, { "epoch": 5.95, "grad_norm": 3.760248899459839, "learning_rate": 3.9855767442831436e-05, "loss": 1.1154, "step": 19890 }, { "epoch": 5.95, "grad_norm": 4.010507106781006, "learning_rate": 3.985104163214729e-05, "loss": 1.3683, "step": 19895 }, { "epoch": 5.95, "grad_norm": 2.708240032196045, "learning_rate": 3.984631500125555e-05, "loss": 1.233, "step": 19900 }, { "epoch": 5.96, "grad_norm": 1.024338960647583, "learning_rate": 3.984158755041726e-05, "loss": 1.1541, "step": 19905 }, { "epoch": 5.96, "grad_norm": 3.15925931930542, "learning_rate": 3.9836859279893526e-05, "loss": 1.2759, "step": 19910 }, { "epoch": 5.96, "grad_norm": 3.913562536239624, "learning_rate": 3.9832130189945475e-05, "loss": 1.1862, "step": 19915 }, { "epoch": 5.96, "grad_norm": 2.1706655025482178, "learning_rate": 3.982740028083428e-05, "loss": 1.252, "step": 19920 }, { "epoch": 5.96, "grad_norm": 1.2419073581695557, "learning_rate": 3.982266955282119e-05, "loss": 1.3196, "step": 19925 }, { "epoch": 5.96, "grad_norm": 1.833250880241394, "learning_rate": 3.9817938006167465e-05, "loss": 1.1608, "step": 19930 }, { "epoch": 5.96, "grad_norm": 1.1308797597885132, "learning_rate": 3.9813205641134424e-05, "loss": 1.2362, "step": 19935 }, { "epoch": 5.97, "grad_norm": 1.6005351543426514, "learning_rate": 3.980847245798344e-05, "loss": 1.2911, "step": 19940 }, { "epoch": 5.97, "grad_norm": 1.2938203811645508, "learning_rate": 3.9803738456975905e-05, "loss": 1.342, "step": 19945 }, { "epoch": 5.97, "grad_norm": 5.012538433074951, "learning_rate": 3.9799003638373283e-05, "loss": 1.1956, "step": 19950 }, { "epoch": 5.97, "grad_norm": 2.233893394470215, "learning_rate": 3.979426800243708e-05, "loss": 1.2192, "step": 19955 }, { "epoch": 5.97, "grad_norm": 4.2855448722839355, "learning_rate": 3.978953154942883e-05, "loss": 1.2098, "step": 19960 }, { "epoch": 5.97, "grad_norm": 2.717545747756958, "learning_rate": 3.978479427961012e-05, "loss": 1.265, "step": 19965 }, { "epoch": 5.97, "grad_norm": 2.452308177947998, "learning_rate": 3.97800561932426e-05, "loss": 1.2003, "step": 19970 }, { "epoch": 5.98, "grad_norm": 3.6436851024627686, "learning_rate": 3.977531729058793e-05, "loss": 1.3046, "step": 19975 }, { "epoch": 5.98, "grad_norm": 2.9817821979522705, "learning_rate": 3.977057757190785e-05, "loss": 1.0906, "step": 19980 }, { "epoch": 5.98, "grad_norm": 2.8185782432556152, "learning_rate": 3.9765837037464124e-05, "loss": 1.3706, "step": 19985 }, { "epoch": 5.98, "grad_norm": 3.414907217025757, "learning_rate": 3.9761095687518565e-05, "loss": 1.1195, "step": 19990 }, { "epoch": 5.98, "grad_norm": 2.366452217102051, "learning_rate": 3.9756353522333034e-05, "loss": 1.3436, "step": 19995 }, { "epoch": 5.98, "grad_norm": 2.4633522033691406, "learning_rate": 3.975161054216944e-05, "loss": 1.2036, "step": 20000 }, { "epoch": 5.99, "grad_norm": 2.6752207279205322, "learning_rate": 3.9746866747289726e-05, "loss": 1.3225, "step": 20005 }, { "epoch": 5.99, "grad_norm": 1.7009152173995972, "learning_rate": 3.9742122137955884e-05, "loss": 1.2566, "step": 20010 }, { "epoch": 5.99, "grad_norm": 2.860818386077881, "learning_rate": 3.9737376714429964e-05, "loss": 1.2588, "step": 20015 }, { "epoch": 5.99, "grad_norm": 4.960500717163086, "learning_rate": 3.973263047697405e-05, "loss": 1.1748, "step": 20020 }, { "epoch": 5.99, "grad_norm": 1.3426363468170166, "learning_rate": 3.972788342585027e-05, "loss": 1.2358, "step": 20025 }, { "epoch": 5.99, "grad_norm": 5.134890556335449, "learning_rate": 3.972313556132079e-05, "loss": 1.2629, "step": 20030 }, { "epoch": 5.99, "grad_norm": 3.40563702583313, "learning_rate": 3.971838688364784e-05, "loss": 1.186, "step": 20035 }, { "epoch": 6.0, "grad_norm": 2.111565113067627, "learning_rate": 3.9713637393093686e-05, "loss": 1.2938, "step": 20040 }, { "epoch": 6.0, "grad_norm": 1.0406635999679565, "learning_rate": 3.970888708992063e-05, "loss": 1.1778, "step": 20045 }, { "epoch": 6.0, "grad_norm": 2.193850517272949, "learning_rate": 3.9704135974391026e-05, "loss": 1.206, "step": 20050 }, { "epoch": 6.0, "grad_norm": 0.8618187308311462, "learning_rate": 3.969938404676728e-05, "loss": 1.2366, "step": 20055 }, { "epoch": 6.0, "grad_norm": 1.8126907348632812, "learning_rate": 3.969463130731183e-05, "loss": 0.9741, "step": 20060 }, { "epoch": 6.0, "grad_norm": 1.3693639039993286, "learning_rate": 3.968987775628717e-05, "loss": 1.0962, "step": 20065 }, { "epoch": 6.0, "grad_norm": 1.7154806852340698, "learning_rate": 3.9685123393955824e-05, "loss": 1.4039, "step": 20070 }, { "epoch": 6.01, "grad_norm": 1.4863255023956299, "learning_rate": 3.968036822058038e-05, "loss": 1.1968, "step": 20075 }, { "epoch": 6.01, "grad_norm": 9.683221817016602, "learning_rate": 3.9675612236423466e-05, "loss": 1.1326, "step": 20080 }, { "epoch": 6.01, "grad_norm": 2.053426504135132, "learning_rate": 3.9670855441747737e-05, "loss": 1.2913, "step": 20085 }, { "epoch": 6.01, "grad_norm": 1.7866415977478027, "learning_rate": 3.9666097836815915e-05, "loss": 1.2354, "step": 20090 }, { "epoch": 6.01, "grad_norm": 1.6911275386810303, "learning_rate": 3.9661339421890746e-05, "loss": 1.0844, "step": 20095 }, { "epoch": 6.01, "grad_norm": 1.2909938097000122, "learning_rate": 3.965658019723505e-05, "loss": 1.2505, "step": 20100 }, { "epoch": 6.02, "grad_norm": 2.46484375, "learning_rate": 3.965182016311165e-05, "loss": 0.985, "step": 20105 }, { "epoch": 6.02, "grad_norm": 1.2409467697143555, "learning_rate": 3.964705931978346e-05, "loss": 1.314, "step": 20110 }, { "epoch": 6.02, "grad_norm": 1.6112093925476074, "learning_rate": 3.964229766751342e-05, "loss": 1.1812, "step": 20115 }, { "epoch": 6.02, "grad_norm": 2.3558475971221924, "learning_rate": 3.9637535206564485e-05, "loss": 1.2568, "step": 20120 }, { "epoch": 6.02, "grad_norm": 2.557868242263794, "learning_rate": 3.96327719371997e-05, "loss": 1.0185, "step": 20125 }, { "epoch": 6.02, "grad_norm": 1.7992500066757202, "learning_rate": 3.962800785968213e-05, "loss": 1.115, "step": 20130 }, { "epoch": 6.02, "grad_norm": 5.036953926086426, "learning_rate": 3.962324297427489e-05, "loss": 1.0766, "step": 20135 }, { "epoch": 6.03, "grad_norm": 1.705336332321167, "learning_rate": 3.9618477281241146e-05, "loss": 1.1394, "step": 20140 }, { "epoch": 6.03, "grad_norm": 1.967340111732483, "learning_rate": 3.9613710780844096e-05, "loss": 1.2392, "step": 20145 }, { "epoch": 6.03, "grad_norm": 2.6726629734039307, "learning_rate": 3.9608943473346985e-05, "loss": 1.2141, "step": 20150 }, { "epoch": 6.03, "grad_norm": 1.7967979907989502, "learning_rate": 3.960417535901311e-05, "loss": 1.0865, "step": 20155 }, { "epoch": 6.03, "grad_norm": 2.194570779800415, "learning_rate": 3.959940643810581e-05, "loss": 1.0641, "step": 20160 }, { "epoch": 6.03, "grad_norm": 3.644632339477539, "learning_rate": 3.9594636710888475e-05, "loss": 1.2237, "step": 20165 }, { "epoch": 6.03, "grad_norm": 1.7848505973815918, "learning_rate": 3.9589866177624515e-05, "loss": 1.1259, "step": 20170 }, { "epoch": 6.04, "grad_norm": 1.647910714149475, "learning_rate": 3.958509483857742e-05, "loss": 1.3501, "step": 20175 }, { "epoch": 6.04, "grad_norm": 2.3628077507019043, "learning_rate": 3.95803226940107e-05, "loss": 1.2317, "step": 20180 }, { "epoch": 6.04, "grad_norm": 1.816735863685608, "learning_rate": 3.95755497441879e-05, "loss": 0.9811, "step": 20185 }, { "epoch": 6.04, "grad_norm": 1.5639019012451172, "learning_rate": 3.957077598937264e-05, "loss": 1.1717, "step": 20190 }, { "epoch": 6.04, "grad_norm": 1.4008920192718506, "learning_rate": 3.956600142982858e-05, "loss": 1.1988, "step": 20195 }, { "epoch": 6.04, "grad_norm": 3.0697457790374756, "learning_rate": 3.956122606581939e-05, "loss": 1.0945, "step": 20200 }, { "epoch": 6.05, "grad_norm": 1.7359205484390259, "learning_rate": 3.9556449897608824e-05, "loss": 1.1508, "step": 20205 }, { "epoch": 6.05, "grad_norm": 2.08030104637146, "learning_rate": 3.955167292546066e-05, "loss": 1.0043, "step": 20210 }, { "epoch": 6.05, "grad_norm": 2.151505470275879, "learning_rate": 3.9546895149638737e-05, "loss": 1.3255, "step": 20215 }, { "epoch": 6.05, "grad_norm": 2.6911120414733887, "learning_rate": 3.954211657040691e-05, "loss": 1.0752, "step": 20220 }, { "epoch": 6.05, "grad_norm": 3.3755829334259033, "learning_rate": 3.953733718802909e-05, "loss": 1.3089, "step": 20225 }, { "epoch": 6.05, "grad_norm": 1.3556479215621948, "learning_rate": 3.953255700276925e-05, "loss": 1.1838, "step": 20230 }, { "epoch": 6.05, "grad_norm": 1.6404752731323242, "learning_rate": 3.95277760148914e-05, "loss": 1.1368, "step": 20235 }, { "epoch": 6.06, "grad_norm": 1.5696030855178833, "learning_rate": 3.9522994224659576e-05, "loss": 1.0713, "step": 20240 }, { "epoch": 6.06, "grad_norm": 0.9167082905769348, "learning_rate": 3.951821163233788e-05, "loss": 1.0272, "step": 20245 }, { "epoch": 6.06, "grad_norm": 2.055103063583374, "learning_rate": 3.951342823819044e-05, "loss": 1.1271, "step": 20250 }, { "epoch": 6.06, "grad_norm": 3.4541079998016357, "learning_rate": 3.950864404248145e-05, "loss": 1.3, "step": 20255 }, { "epoch": 6.06, "grad_norm": 3.939716100692749, "learning_rate": 3.950385904547513e-05, "loss": 1.2012, "step": 20260 }, { "epoch": 6.06, "grad_norm": 1.5123727321624756, "learning_rate": 3.9499073247435755e-05, "loss": 1.2772, "step": 20265 }, { "epoch": 6.06, "grad_norm": 1.4127657413482666, "learning_rate": 3.949428664862762e-05, "loss": 1.3369, "step": 20270 }, { "epoch": 6.07, "grad_norm": 2.0082712173461914, "learning_rate": 3.948949924931512e-05, "loss": 1.2377, "step": 20275 }, { "epoch": 6.07, "grad_norm": 1.3078731298446655, "learning_rate": 3.9484711049762625e-05, "loss": 1.097, "step": 20280 }, { "epoch": 6.07, "grad_norm": 1.3073629140853882, "learning_rate": 3.94799220502346e-05, "loss": 1.0726, "step": 20285 }, { "epoch": 6.07, "grad_norm": 1.1648201942443848, "learning_rate": 3.9475132250995525e-05, "loss": 1.2021, "step": 20290 }, { "epoch": 6.07, "grad_norm": 4.134536266326904, "learning_rate": 3.947034165230995e-05, "loss": 1.1795, "step": 20295 }, { "epoch": 6.07, "grad_norm": 1.0476043224334717, "learning_rate": 3.946555025444244e-05, "loss": 1.1613, "step": 20300 }, { "epoch": 6.08, "grad_norm": 2.554640769958496, "learning_rate": 3.9460758057657626e-05, "loss": 1.1222, "step": 20305 }, { "epoch": 6.08, "grad_norm": 3.514789342880249, "learning_rate": 3.945596506222018e-05, "loss": 1.0743, "step": 20310 }, { "epoch": 6.08, "grad_norm": 1.703673243522644, "learning_rate": 3.945117126839481e-05, "loss": 1.1762, "step": 20315 }, { "epoch": 6.08, "grad_norm": 2.6589813232421875, "learning_rate": 3.944637667644627e-05, "loss": 1.1546, "step": 20320 }, { "epoch": 6.08, "grad_norm": 3.949831008911133, "learning_rate": 3.9441581286639365e-05, "loss": 1.2165, "step": 20325 }, { "epoch": 6.08, "grad_norm": 2.2509963512420654, "learning_rate": 3.943678509923893e-05, "loss": 1.2908, "step": 20330 }, { "epoch": 6.08, "grad_norm": 2.6351144313812256, "learning_rate": 3.9431988114509854e-05, "loss": 1.2144, "step": 20335 }, { "epoch": 6.09, "grad_norm": 1.2921767234802246, "learning_rate": 3.942719033271709e-05, "loss": 1.2182, "step": 20340 }, { "epoch": 6.09, "grad_norm": 1.6154694557189941, "learning_rate": 3.9422391754125596e-05, "loss": 1.1248, "step": 20345 }, { "epoch": 6.09, "grad_norm": 1.5173944234848022, "learning_rate": 3.94175923790004e-05, "loss": 1.1687, "step": 20350 }, { "epoch": 6.09, "grad_norm": 1.5915690660476685, "learning_rate": 3.941279220760655e-05, "loss": 1.3048, "step": 20355 }, { "epoch": 6.09, "grad_norm": 2.2365562915802, "learning_rate": 3.940799124020918e-05, "loss": 1.1774, "step": 20360 }, { "epoch": 6.09, "grad_norm": 2.3418896198272705, "learning_rate": 3.9403189477073424e-05, "loss": 1.387, "step": 20365 }, { "epoch": 6.09, "grad_norm": 1.1092318296432495, "learning_rate": 3.939838691846449e-05, "loss": 1.2785, "step": 20370 }, { "epoch": 6.1, "grad_norm": 1.0265756845474243, "learning_rate": 3.939358356464761e-05, "loss": 1.139, "step": 20375 }, { "epoch": 6.1, "grad_norm": 1.068686842918396, "learning_rate": 3.9388779415888075e-05, "loss": 1.1121, "step": 20380 }, { "epoch": 6.1, "grad_norm": 1.821524977684021, "learning_rate": 3.93839744724512e-05, "loss": 1.008, "step": 20385 }, { "epoch": 6.1, "grad_norm": 2.26947021484375, "learning_rate": 3.937916873460237e-05, "loss": 1.2466, "step": 20390 }, { "epoch": 6.1, "grad_norm": 2.6822071075439453, "learning_rate": 3.9374362202607e-05, "loss": 1.0863, "step": 20395 }, { "epoch": 6.1, "grad_norm": 2.3898754119873047, "learning_rate": 3.936955487673054e-05, "loss": 1.1818, "step": 20400 }, { "epoch": 6.1, "grad_norm": 2.3284709453582764, "learning_rate": 3.9364746757238515e-05, "loss": 1.2534, "step": 20405 }, { "epoch": 6.11, "grad_norm": 2.5323164463043213, "learning_rate": 3.935993784439644e-05, "loss": 1.1993, "step": 20410 }, { "epoch": 6.11, "grad_norm": 1.2077038288116455, "learning_rate": 3.935512813846994e-05, "loss": 1.0831, "step": 20415 }, { "epoch": 6.11, "grad_norm": 1.9154731035232544, "learning_rate": 3.935031763972462e-05, "loss": 1.1491, "step": 20420 }, { "epoch": 6.11, "grad_norm": 1.4190819263458252, "learning_rate": 3.9345506348426184e-05, "loss": 1.0991, "step": 20425 }, { "epoch": 6.11, "grad_norm": 1.8110177516937256, "learning_rate": 3.934069426484034e-05, "loss": 1.3591, "step": 20430 }, { "epoch": 6.11, "grad_norm": 2.452357530593872, "learning_rate": 3.9335881389232854e-05, "loss": 1.1955, "step": 20435 }, { "epoch": 6.12, "grad_norm": 3.6811835765838623, "learning_rate": 3.9331067721869555e-05, "loss": 1.042, "step": 20440 }, { "epoch": 6.12, "grad_norm": 1.9267451763153076, "learning_rate": 3.932625326301627e-05, "loss": 1.0402, "step": 20445 }, { "epoch": 6.12, "grad_norm": 1.685020089149475, "learning_rate": 3.9321438012938906e-05, "loss": 1.1954, "step": 20450 }, { "epoch": 6.12, "grad_norm": 2.5964527130126953, "learning_rate": 3.931662197190341e-05, "loss": 1.069, "step": 20455 }, { "epoch": 6.12, "grad_norm": 6.929611682891846, "learning_rate": 3.931180514017576e-05, "loss": 1.1414, "step": 20460 }, { "epoch": 6.12, "grad_norm": 1.4876823425292969, "learning_rate": 3.9306987518022e-05, "loss": 1.293, "step": 20465 }, { "epoch": 6.12, "grad_norm": 1.7589577436447144, "learning_rate": 3.930216910570818e-05, "loss": 1.0805, "step": 20470 }, { "epoch": 6.13, "grad_norm": 3.688732862472534, "learning_rate": 3.929734990350043e-05, "loss": 1.2528, "step": 20475 }, { "epoch": 6.13, "grad_norm": 1.360373854637146, "learning_rate": 3.929252991166491e-05, "loss": 1.3237, "step": 20480 }, { "epoch": 6.13, "grad_norm": 6.932022571563721, "learning_rate": 3.928770913046781e-05, "loss": 1.1989, "step": 20485 }, { "epoch": 6.13, "grad_norm": 2.913423538208008, "learning_rate": 3.928288756017539e-05, "loss": 1.1123, "step": 20490 }, { "epoch": 6.13, "grad_norm": 1.7784669399261475, "learning_rate": 3.927806520105394e-05, "loss": 1.1983, "step": 20495 }, { "epoch": 6.13, "grad_norm": 1.5603896379470825, "learning_rate": 3.927324205336979e-05, "loss": 1.2451, "step": 20500 }, { "epoch": 6.13, "grad_norm": 3.020375967025757, "learning_rate": 3.9268418117389314e-05, "loss": 1.1557, "step": 20505 }, { "epoch": 6.14, "grad_norm": 2.243635416030884, "learning_rate": 3.926359339337894e-05, "loss": 1.1213, "step": 20510 }, { "epoch": 6.14, "grad_norm": 3.9490649700164795, "learning_rate": 3.9258767881605126e-05, "loss": 1.1739, "step": 20515 }, { "epoch": 6.14, "grad_norm": 2.237060308456421, "learning_rate": 3.925394158233438e-05, "loss": 1.2768, "step": 20520 }, { "epoch": 6.14, "grad_norm": 1.9132599830627441, "learning_rate": 3.924911449583326e-05, "loss": 1.0558, "step": 20525 }, { "epoch": 6.14, "grad_norm": 1.5452789068222046, "learning_rate": 3.924428662236836e-05, "loss": 1.3715, "step": 20530 }, { "epoch": 6.14, "grad_norm": 4.26951265335083, "learning_rate": 3.923945796220632e-05, "loss": 1.2972, "step": 20535 }, { "epoch": 6.15, "grad_norm": 8.05988883972168, "learning_rate": 3.9234628515613806e-05, "loss": 1.1404, "step": 20540 }, { "epoch": 6.15, "grad_norm": 2.191908359527588, "learning_rate": 3.922979828285757e-05, "loss": 1.1919, "step": 20545 }, { "epoch": 6.15, "grad_norm": 1.6250762939453125, "learning_rate": 3.922496726420435e-05, "loss": 1.1798, "step": 20550 }, { "epoch": 6.15, "grad_norm": 2.0135927200317383, "learning_rate": 3.9220135459920984e-05, "loss": 1.1462, "step": 20555 }, { "epoch": 6.15, "grad_norm": 2.279970407485962, "learning_rate": 3.921530287027431e-05, "loss": 1.2313, "step": 20560 }, { "epoch": 6.15, "grad_norm": 1.4251229763031006, "learning_rate": 3.921046949553124e-05, "loss": 1.2208, "step": 20565 }, { "epoch": 6.15, "grad_norm": 2.408446788787842, "learning_rate": 3.920563533595871e-05, "loss": 0.9747, "step": 20570 }, { "epoch": 6.16, "grad_norm": 4.922577381134033, "learning_rate": 3.9200800391823705e-05, "loss": 1.1785, "step": 20575 }, { "epoch": 6.16, "grad_norm": 1.8073464632034302, "learning_rate": 3.919596466339326e-05, "loss": 1.0418, "step": 20580 }, { "epoch": 6.16, "grad_norm": 1.2471339702606201, "learning_rate": 3.9191128150934435e-05, "loss": 1.1297, "step": 20585 }, { "epoch": 6.16, "grad_norm": 3.711719036102295, "learning_rate": 3.918629085471436e-05, "loss": 1.1975, "step": 20590 }, { "epoch": 6.16, "grad_norm": 4.0079498291015625, "learning_rate": 3.918145277500018e-05, "loss": 1.2807, "step": 20595 }, { "epoch": 6.16, "grad_norm": 1.5881859064102173, "learning_rate": 3.9176613912059114e-05, "loss": 1.1369, "step": 20600 }, { "epoch": 6.16, "grad_norm": 1.388296127319336, "learning_rate": 3.917177426615839e-05, "loss": 1.1525, "step": 20605 }, { "epoch": 6.17, "grad_norm": 1.6135921478271484, "learning_rate": 3.916790198588648e-05, "loss": 1.2703, "step": 20610 }, { "epoch": 6.17, "grad_norm": 1.2012652158737183, "learning_rate": 3.916306093133198e-05, "loss": 1.2268, "step": 20615 }, { "epoch": 6.17, "grad_norm": 1.4433081150054932, "learning_rate": 3.915821909456635e-05, "loss": 1.2495, "step": 20620 }, { "epoch": 6.17, "grad_norm": 1.3392014503479004, "learning_rate": 3.9153376475856995e-05, "loss": 1.2917, "step": 20625 }, { "epoch": 6.17, "grad_norm": 1.8551833629608154, "learning_rate": 3.9148533075471364e-05, "loss": 1.1313, "step": 20630 }, { "epoch": 6.17, "grad_norm": 2.474085569381714, "learning_rate": 3.914368889367697e-05, "loss": 1.123, "step": 20635 }, { "epoch": 6.18, "grad_norm": 1.6884278059005737, "learning_rate": 3.9138843930741334e-05, "loss": 1.0056, "step": 20640 }, { "epoch": 6.18, "grad_norm": 9.735162734985352, "learning_rate": 3.9133998186932036e-05, "loss": 1.1052, "step": 20645 }, { "epoch": 6.18, "grad_norm": 3.7487850189208984, "learning_rate": 3.912915166251672e-05, "loss": 1.2539, "step": 20650 }, { "epoch": 6.18, "grad_norm": 2.0177783966064453, "learning_rate": 3.912430435776304e-05, "loss": 1.0794, "step": 20655 }, { "epoch": 6.18, "grad_norm": 1.6745036840438843, "learning_rate": 3.911945627293871e-05, "loss": 1.1716, "step": 20660 }, { "epoch": 6.18, "grad_norm": 3.979721784591675, "learning_rate": 3.9114607408311486e-05, "loss": 1.1044, "step": 20665 }, { "epoch": 6.18, "grad_norm": 3.073629140853882, "learning_rate": 3.9109757764149166e-05, "loss": 1.0468, "step": 20670 }, { "epoch": 6.19, "grad_norm": 1.9026330709457397, "learning_rate": 3.91049073407196e-05, "loss": 1.1539, "step": 20675 }, { "epoch": 6.19, "grad_norm": 2.6179862022399902, "learning_rate": 3.910005613829065e-05, "loss": 1.142, "step": 20680 }, { "epoch": 6.19, "grad_norm": 1.8899461030960083, "learning_rate": 3.909520415713027e-05, "loss": 1.2276, "step": 20685 }, { "epoch": 6.19, "grad_norm": 5.7648444175720215, "learning_rate": 3.909035139750641e-05, "loss": 1.166, "step": 20690 }, { "epoch": 6.19, "grad_norm": 6.039294719696045, "learning_rate": 3.908549785968708e-05, "loss": 1.1015, "step": 20695 }, { "epoch": 6.19, "grad_norm": 1.34919273853302, "learning_rate": 3.908064354394035e-05, "loss": 1.215, "step": 20700 }, { "epoch": 6.19, "grad_norm": 2.3863883018493652, "learning_rate": 3.907578845053432e-05, "loss": 1.3305, "step": 20705 }, { "epoch": 6.2, "grad_norm": 1.9854397773742676, "learning_rate": 3.907093257973712e-05, "loss": 1.2554, "step": 20710 }, { "epoch": 6.2, "grad_norm": 2.0534327030181885, "learning_rate": 3.9066075931816934e-05, "loss": 1.132, "step": 20715 }, { "epoch": 6.2, "grad_norm": 4.463151454925537, "learning_rate": 3.9061218507042e-05, "loss": 1.1111, "step": 20720 }, { "epoch": 6.2, "grad_norm": 1.3392293453216553, "learning_rate": 3.905636030568058e-05, "loss": 1.0829, "step": 20725 }, { "epoch": 6.2, "grad_norm": 2.5353002548217773, "learning_rate": 3.905150132800099e-05, "loss": 1.1461, "step": 20730 }, { "epoch": 6.2, "grad_norm": 4.353736400604248, "learning_rate": 3.90466415742716e-05, "loss": 1.187, "step": 20735 }, { "epoch": 6.21, "grad_norm": 4.582551956176758, "learning_rate": 3.904178104476078e-05, "loss": 1.132, "step": 20740 }, { "epoch": 6.21, "grad_norm": 1.9312151670455933, "learning_rate": 3.9036919739737e-05, "loss": 1.1476, "step": 20745 }, { "epoch": 6.21, "grad_norm": 2.270596981048584, "learning_rate": 3.9032057659468734e-05, "loss": 1.0766, "step": 20750 }, { "epoch": 6.21, "grad_norm": 1.6759518384933472, "learning_rate": 3.902719480422451e-05, "loss": 1.0496, "step": 20755 }, { "epoch": 6.21, "grad_norm": 1.878208875656128, "learning_rate": 3.902233117427289e-05, "loss": 1.3187, "step": 20760 }, { "epoch": 6.21, "grad_norm": 1.0941290855407715, "learning_rate": 3.9017466769882494e-05, "loss": 1.1848, "step": 20765 }, { "epoch": 6.21, "grad_norm": 4.294778347015381, "learning_rate": 3.901260159132198e-05, "loss": 1.2239, "step": 20770 }, { "epoch": 6.22, "grad_norm": 3.6383235454559326, "learning_rate": 3.900773563886004e-05, "loss": 1.1626, "step": 20775 }, { "epoch": 6.22, "grad_norm": 2.137620449066162, "learning_rate": 3.900286891276543e-05, "loss": 1.273, "step": 20780 }, { "epoch": 6.22, "grad_norm": 3.62593150138855, "learning_rate": 3.8998001413306926e-05, "loss": 1.1256, "step": 20785 }, { "epoch": 6.22, "grad_norm": 3.164325475692749, "learning_rate": 3.899313314075335e-05, "loss": 1.1586, "step": 20790 }, { "epoch": 6.22, "grad_norm": 3.6429853439331055, "learning_rate": 3.898826409537358e-05, "loss": 1.1242, "step": 20795 }, { "epoch": 6.22, "grad_norm": 2.646456241607666, "learning_rate": 3.898339427743652e-05, "loss": 1.2348, "step": 20800 }, { "epoch": 6.22, "grad_norm": 2.920931100845337, "learning_rate": 3.897852368721113e-05, "loss": 1.1977, "step": 20805 }, { "epoch": 6.23, "grad_norm": 3.684735059738159, "learning_rate": 3.8973652324966404e-05, "loss": 1.2288, "step": 20810 }, { "epoch": 6.23, "grad_norm": 2.1889755725860596, "learning_rate": 3.896878019097139e-05, "loss": 1.1471, "step": 20815 }, { "epoch": 6.23, "grad_norm": 3.232788324356079, "learning_rate": 3.896390728549516e-05, "loss": 1.1925, "step": 20820 }, { "epoch": 6.23, "grad_norm": 1.4268032312393188, "learning_rate": 3.895903360880685e-05, "loss": 1.0222, "step": 20825 }, { "epoch": 6.23, "grad_norm": 2.271416187286377, "learning_rate": 3.895415916117562e-05, "loss": 1.0918, "step": 20830 }, { "epoch": 6.23, "grad_norm": 4.371371269226074, "learning_rate": 3.894928394287068e-05, "loss": 1.2376, "step": 20835 }, { "epoch": 6.24, "grad_norm": 1.4696122407913208, "learning_rate": 3.89444079541613e-05, "loss": 1.3061, "step": 20840 }, { "epoch": 6.24, "grad_norm": 2.775634527206421, "learning_rate": 3.893953119531676e-05, "loss": 1.069, "step": 20845 }, { "epoch": 6.24, "grad_norm": 1.066505789756775, "learning_rate": 3.893465366660639e-05, "loss": 1.2153, "step": 20850 }, { "epoch": 6.24, "grad_norm": 1.7460033893585205, "learning_rate": 3.8929775368299595e-05, "loss": 1.0575, "step": 20855 }, { "epoch": 6.24, "grad_norm": 1.4495829343795776, "learning_rate": 3.892489630066578e-05, "loss": 1.1096, "step": 20860 }, { "epoch": 6.24, "grad_norm": 2.495727062225342, "learning_rate": 3.892001646397441e-05, "loss": 1.1207, "step": 20865 }, { "epoch": 6.24, "grad_norm": 2.2231380939483643, "learning_rate": 3.891513585849501e-05, "loss": 1.1967, "step": 20870 }, { "epoch": 6.25, "grad_norm": 7.131553649902344, "learning_rate": 3.891025448449711e-05, "loss": 1.1515, "step": 20875 }, { "epoch": 6.25, "grad_norm": 1.6254703998565674, "learning_rate": 3.890537234225033e-05, "loss": 1.2101, "step": 20880 }, { "epoch": 6.25, "grad_norm": 2.034390687942505, "learning_rate": 3.890048943202428e-05, "loss": 1.0613, "step": 20885 }, { "epoch": 6.25, "grad_norm": 2.165940999984741, "learning_rate": 3.8895605754088646e-05, "loss": 1.0818, "step": 20890 }, { "epoch": 6.25, "grad_norm": 1.1313718557357788, "learning_rate": 3.889072130871315e-05, "loss": 1.0127, "step": 20895 }, { "epoch": 6.25, "grad_norm": 3.206895589828491, "learning_rate": 3.888583609616755e-05, "loss": 1.2237, "step": 20900 }, { "epoch": 6.25, "grad_norm": 4.657969951629639, "learning_rate": 3.888095011672167e-05, "loss": 1.1528, "step": 20905 }, { "epoch": 6.26, "grad_norm": 1.6976109743118286, "learning_rate": 3.887606337064534e-05, "loss": 1.1705, "step": 20910 }, { "epoch": 6.26, "grad_norm": 1.7403982877731323, "learning_rate": 3.887117585820844e-05, "loss": 1.1797, "step": 20915 }, { "epoch": 6.26, "grad_norm": 3.2801010608673096, "learning_rate": 3.8866287579680925e-05, "loss": 1.1335, "step": 20920 }, { "epoch": 6.26, "grad_norm": 2.5484225749969482, "learning_rate": 3.886139853533276e-05, "loss": 1.2226, "step": 20925 }, { "epoch": 6.26, "grad_norm": 2.7119576930999756, "learning_rate": 3.8856508725433966e-05, "loss": 1.3315, "step": 20930 }, { "epoch": 6.26, "grad_norm": 4.890354156494141, "learning_rate": 3.8851618150254594e-05, "loss": 1.1151, "step": 20935 }, { "epoch": 6.27, "grad_norm": 2.2012619972229004, "learning_rate": 3.884672681006475e-05, "loss": 1.1209, "step": 20940 }, { "epoch": 6.27, "grad_norm": 2.108077049255371, "learning_rate": 3.884183470513457e-05, "loss": 1.2317, "step": 20945 }, { "epoch": 6.27, "grad_norm": 2.5535576343536377, "learning_rate": 3.883694183573426e-05, "loss": 1.253, "step": 20950 }, { "epoch": 6.27, "grad_norm": 3.921041965484619, "learning_rate": 3.883204820213403e-05, "loss": 1.2734, "step": 20955 }, { "epoch": 6.27, "grad_norm": 2.2731990814208984, "learning_rate": 3.882715380460416e-05, "loss": 1.1832, "step": 20960 }, { "epoch": 6.27, "grad_norm": 2.7740683555603027, "learning_rate": 3.882225864341494e-05, "loss": 1.2319, "step": 20965 }, { "epoch": 6.27, "grad_norm": 1.0402427911758423, "learning_rate": 3.8817362718836755e-05, "loss": 1.3091, "step": 20970 }, { "epoch": 6.28, "grad_norm": 3.553283214569092, "learning_rate": 3.8812466031139996e-05, "loss": 1.0222, "step": 20975 }, { "epoch": 6.28, "grad_norm": 2.163384437561035, "learning_rate": 3.8807568580595085e-05, "loss": 1.1629, "step": 20980 }, { "epoch": 6.28, "grad_norm": 5.054582118988037, "learning_rate": 3.8802670367472517e-05, "loss": 1.1109, "step": 20985 }, { "epoch": 6.28, "grad_norm": 3.024733543395996, "learning_rate": 3.879777139204281e-05, "loss": 1.1245, "step": 20990 }, { "epoch": 6.28, "grad_norm": 2.468595027923584, "learning_rate": 3.879287165457654e-05, "loss": 1.3344, "step": 20995 }, { "epoch": 6.28, "grad_norm": 5.894863605499268, "learning_rate": 3.878797115534429e-05, "loss": 1.3523, "step": 21000 }, { "epoch": 6.28, "grad_norm": 1.7448843717575073, "learning_rate": 3.878306989461673e-05, "loss": 1.1512, "step": 21005 }, { "epoch": 6.29, "grad_norm": 2.0726964473724365, "learning_rate": 3.8778167872664554e-05, "loss": 1.1512, "step": 21010 }, { "epoch": 6.29, "grad_norm": 2.647890090942383, "learning_rate": 3.8773265089758483e-05, "loss": 1.1089, "step": 21015 }, { "epoch": 6.29, "grad_norm": 2.005052328109741, "learning_rate": 3.87683615461693e-05, "loss": 1.2923, "step": 21020 }, { "epoch": 6.29, "grad_norm": 2.600606918334961, "learning_rate": 3.8763457242167816e-05, "loss": 1.1266, "step": 21025 }, { "epoch": 6.29, "grad_norm": 1.3668138980865479, "learning_rate": 3.87585521780249e-05, "loss": 1.1652, "step": 21030 }, { "epoch": 6.29, "grad_norm": 2.5960161685943604, "learning_rate": 3.8753646354011444e-05, "loss": 1.1451, "step": 21035 }, { "epoch": 6.29, "grad_norm": 2.091399669647217, "learning_rate": 3.87487397703984e-05, "loss": 1.0908, "step": 21040 }, { "epoch": 6.3, "grad_norm": 1.7568382024765015, "learning_rate": 3.8743832427456736e-05, "loss": 1.1526, "step": 21045 }, { "epoch": 6.3, "grad_norm": 1.715632677078247, "learning_rate": 3.873892432545751e-05, "loss": 1.2262, "step": 21050 }, { "epoch": 6.3, "grad_norm": 2.9848005771636963, "learning_rate": 3.873401546467177e-05, "loss": 1.2044, "step": 21055 }, { "epoch": 6.3, "grad_norm": 1.247778058052063, "learning_rate": 3.872910584537063e-05, "loss": 1.1523, "step": 21060 }, { "epoch": 6.3, "grad_norm": 3.150620460510254, "learning_rate": 3.872419546782524e-05, "loss": 1.0884, "step": 21065 }, { "epoch": 6.3, "grad_norm": 1.8623440265655518, "learning_rate": 3.8719284332306804e-05, "loss": 1.2578, "step": 21070 }, { "epoch": 6.31, "grad_norm": 1.1926594972610474, "learning_rate": 3.871437243908655e-05, "loss": 1.1856, "step": 21075 }, { "epoch": 6.31, "grad_norm": 2.9586501121520996, "learning_rate": 3.870945978843578e-05, "loss": 1.0101, "step": 21080 }, { "epoch": 6.31, "grad_norm": 1.617187261581421, "learning_rate": 3.8704546380625776e-05, "loss": 1.1834, "step": 21085 }, { "epoch": 6.31, "grad_norm": 1.3432159423828125, "learning_rate": 3.869963221592793e-05, "loss": 1.2137, "step": 21090 }, { "epoch": 6.31, "grad_norm": 1.8698570728302002, "learning_rate": 3.8694717294613625e-05, "loss": 1.2116, "step": 21095 }, { "epoch": 6.31, "grad_norm": 2.4080424308776855, "learning_rate": 3.868980161695433e-05, "loss": 1.2621, "step": 21100 }, { "epoch": 6.31, "grad_norm": 4.020505905151367, "learning_rate": 3.868488518322152e-05, "loss": 1.106, "step": 21105 }, { "epoch": 6.32, "grad_norm": 1.8761104345321655, "learning_rate": 3.8679967993686726e-05, "loss": 1.1593, "step": 21110 }, { "epoch": 6.32, "grad_norm": 1.0828648805618286, "learning_rate": 3.867505004862152e-05, "loss": 1.1695, "step": 21115 }, { "epoch": 6.32, "grad_norm": 2.3236336708068848, "learning_rate": 3.8670131348297514e-05, "loss": 1.2771, "step": 21120 }, { "epoch": 6.32, "grad_norm": 2.3681468963623047, "learning_rate": 3.8665211892986355e-05, "loss": 1.3653, "step": 21125 }, { "epoch": 6.32, "grad_norm": 3.011075973510742, "learning_rate": 3.8660291682959753e-05, "loss": 1.1936, "step": 21130 }, { "epoch": 6.32, "grad_norm": 1.638193964958191, "learning_rate": 3.865537071848944e-05, "loss": 1.2263, "step": 21135 }, { "epoch": 6.32, "grad_norm": 0.8209080696105957, "learning_rate": 3.86504489998472e-05, "loss": 1.1663, "step": 21140 }, { "epoch": 6.33, "grad_norm": 2.627833843231201, "learning_rate": 3.864552652730485e-05, "loss": 1.0747, "step": 21145 }, { "epoch": 6.33, "grad_norm": 1.2440749406814575, "learning_rate": 3.8640603301134245e-05, "loss": 1.211, "step": 21150 }, { "epoch": 6.33, "grad_norm": 1.7105375528335571, "learning_rate": 3.863567932160731e-05, "loss": 1.177, "step": 21155 }, { "epoch": 6.33, "grad_norm": 1.2131013870239258, "learning_rate": 3.863075458899598e-05, "loss": 1.0843, "step": 21160 }, { "epoch": 6.33, "grad_norm": 5.162259101867676, "learning_rate": 3.862582910357223e-05, "loss": 1.2929, "step": 21165 }, { "epoch": 6.33, "grad_norm": 3.677018165588379, "learning_rate": 3.862090286560811e-05, "loss": 1.0206, "step": 21170 }, { "epoch": 6.34, "grad_norm": 1.5079269409179688, "learning_rate": 3.861597587537568e-05, "loss": 1.2481, "step": 21175 }, { "epoch": 6.34, "grad_norm": 2.0663700103759766, "learning_rate": 3.861104813314705e-05, "loss": 1.0672, "step": 21180 }, { "epoch": 6.34, "grad_norm": 2.495488166809082, "learning_rate": 3.8606119639194394e-05, "loss": 1.1044, "step": 21185 }, { "epoch": 6.34, "grad_norm": 2.68648099899292, "learning_rate": 3.8601190393789885e-05, "loss": 1.1525, "step": 21190 }, { "epoch": 6.34, "grad_norm": 1.7816789150238037, "learning_rate": 3.8596260397205766e-05, "loss": 1.2182, "step": 21195 }, { "epoch": 6.34, "grad_norm": 1.921111822128296, "learning_rate": 3.859132964971432e-05, "loss": 1.1855, "step": 21200 }, { "epoch": 6.34, "grad_norm": 0.9423299431800842, "learning_rate": 3.8586398151587864e-05, "loss": 1.2339, "step": 21205 }, { "epoch": 6.35, "grad_norm": 1.9230526685714722, "learning_rate": 3.858146590309877e-05, "loss": 1.1414, "step": 21210 }, { "epoch": 6.35, "grad_norm": 1.7740641832351685, "learning_rate": 3.857653290451941e-05, "loss": 1.3103, "step": 21215 }, { "epoch": 6.35, "grad_norm": 2.815051555633545, "learning_rate": 3.857159915612227e-05, "loss": 1.3497, "step": 21220 }, { "epoch": 6.35, "grad_norm": 2.79278302192688, "learning_rate": 3.856666465817981e-05, "loss": 1.129, "step": 21225 }, { "epoch": 6.35, "grad_norm": 1.6344680786132812, "learning_rate": 3.8561729410964556e-05, "loss": 1.1296, "step": 21230 }, { "epoch": 6.35, "grad_norm": 2.4964382648468018, "learning_rate": 3.855679341474909e-05, "loss": 1.2656, "step": 21235 }, { "epoch": 6.35, "grad_norm": 2.5263328552246094, "learning_rate": 3.855185666980602e-05, "loss": 1.3019, "step": 21240 }, { "epoch": 6.36, "grad_norm": 3.0418660640716553, "learning_rate": 3.854691917640798e-05, "loss": 1.2104, "step": 21245 }, { "epoch": 6.36, "grad_norm": 2.407768726348877, "learning_rate": 3.854198093482768e-05, "loss": 1.0718, "step": 21250 }, { "epoch": 6.36, "grad_norm": 3.624297857284546, "learning_rate": 3.853704194533785e-05, "loss": 1.2319, "step": 21255 }, { "epoch": 6.36, "grad_norm": 3.435147285461426, "learning_rate": 3.8532102208211265e-05, "loss": 1.1739, "step": 21260 }, { "epoch": 6.36, "grad_norm": 2.9471611976623535, "learning_rate": 3.852716172372074e-05, "loss": 1.1199, "step": 21265 }, { "epoch": 6.36, "grad_norm": 3.1769185066223145, "learning_rate": 3.8522220492139136e-05, "loss": 1.1471, "step": 21270 }, { "epoch": 6.37, "grad_norm": 3.634047269821167, "learning_rate": 3.8517278513739345e-05, "loss": 1.1882, "step": 21275 }, { "epoch": 6.37, "grad_norm": 3.191857099533081, "learning_rate": 3.851233578879432e-05, "loss": 1.1523, "step": 21280 }, { "epoch": 6.37, "grad_norm": 3.0446479320526123, "learning_rate": 3.850739231757702e-05, "loss": 1.1287, "step": 21285 }, { "epoch": 6.37, "grad_norm": 1.9125072956085205, "learning_rate": 3.850244810036049e-05, "loss": 1.0729, "step": 21290 }, { "epoch": 6.37, "grad_norm": 6.34651517868042, "learning_rate": 3.849750313741779e-05, "loss": 1.1506, "step": 21295 }, { "epoch": 6.37, "grad_norm": 1.6267149448394775, "learning_rate": 3.8492557429022026e-05, "loss": 1.0437, "step": 21300 }, { "epoch": 6.37, "grad_norm": 2.244823694229126, "learning_rate": 3.848761097544633e-05, "loss": 1.1806, "step": 21305 }, { "epoch": 6.38, "grad_norm": 1.7018314599990845, "learning_rate": 3.8482663776963904e-05, "loss": 1.1546, "step": 21310 }, { "epoch": 6.38, "grad_norm": 1.9691760540008545, "learning_rate": 3.847771583384797e-05, "loss": 1.089, "step": 21315 }, { "epoch": 6.38, "grad_norm": 2.011826992034912, "learning_rate": 3.8472767146371805e-05, "loss": 1.0998, "step": 21320 }, { "epoch": 6.38, "grad_norm": 1.590854525566101, "learning_rate": 3.846781771480871e-05, "loss": 1.2689, "step": 21325 }, { "epoch": 6.38, "grad_norm": 3.3987576961517334, "learning_rate": 3.846286753943205e-05, "loss": 1.1582, "step": 21330 }, { "epoch": 6.38, "grad_norm": 1.704407811164856, "learning_rate": 3.84579166205152e-05, "loss": 1.234, "step": 21335 }, { "epoch": 6.38, "grad_norm": 1.0222856998443604, "learning_rate": 3.845296495833161e-05, "loss": 1.2021, "step": 21340 }, { "epoch": 6.39, "grad_norm": 2.390643835067749, "learning_rate": 3.844801255315474e-05, "loss": 1.3623, "step": 21345 }, { "epoch": 6.39, "grad_norm": 1.4073227643966675, "learning_rate": 3.844305940525812e-05, "loss": 1.2204, "step": 21350 }, { "epoch": 6.39, "grad_norm": 3.146881341934204, "learning_rate": 3.84381055149153e-05, "loss": 1.3272, "step": 21355 }, { "epoch": 6.39, "grad_norm": 1.6763017177581787, "learning_rate": 3.843315088239988e-05, "loss": 1.1796, "step": 21360 }, { "epoch": 6.39, "grad_norm": 2.7298102378845215, "learning_rate": 3.8428195507985505e-05, "loss": 1.1095, "step": 21365 }, { "epoch": 6.39, "grad_norm": 2.3021061420440674, "learning_rate": 3.842323939194584e-05, "loss": 1.0336, "step": 21370 }, { "epoch": 6.4, "grad_norm": 2.4729456901550293, "learning_rate": 3.841828253455463e-05, "loss": 1.3022, "step": 21375 }, { "epoch": 6.4, "grad_norm": 2.751861572265625, "learning_rate": 3.841332493608561e-05, "loss": 1.3114, "step": 21380 }, { "epoch": 6.4, "grad_norm": 3.295260190963745, "learning_rate": 3.840836659681261e-05, "loss": 1.1114, "step": 21385 }, { "epoch": 6.4, "grad_norm": 4.862136363983154, "learning_rate": 3.840340751700945e-05, "loss": 1.1214, "step": 21390 }, { "epoch": 6.4, "grad_norm": 1.8735803365707397, "learning_rate": 3.8398447696950036e-05, "loss": 1.1672, "step": 21395 }, { "epoch": 6.4, "grad_norm": 1.558856725692749, "learning_rate": 3.839348713690827e-05, "loss": 1.2898, "step": 21400 }, { "epoch": 6.4, "grad_norm": 7.87240743637085, "learning_rate": 3.838852583715814e-05, "loss": 1.1136, "step": 21405 }, { "epoch": 6.41, "grad_norm": 3.504246473312378, "learning_rate": 3.8383563797973634e-05, "loss": 1.0798, "step": 21410 }, { "epoch": 6.41, "grad_norm": 1.1940808296203613, "learning_rate": 3.837860101962882e-05, "loss": 1.1356, "step": 21415 }, { "epoch": 6.41, "grad_norm": 1.6067959070205688, "learning_rate": 3.8373637502397775e-05, "loss": 1.1774, "step": 21420 }, { "epoch": 6.41, "grad_norm": 5.702824115753174, "learning_rate": 3.836867324655463e-05, "loss": 1.1839, "step": 21425 }, { "epoch": 6.41, "grad_norm": 1.5713484287261963, "learning_rate": 3.8363708252373563e-05, "loss": 1.1561, "step": 21430 }, { "epoch": 6.41, "grad_norm": 2.0264546871185303, "learning_rate": 3.835874252012878e-05, "loss": 1.2943, "step": 21435 }, { "epoch": 6.41, "grad_norm": 1.9369525909423828, "learning_rate": 3.8353776050094524e-05, "loss": 1.0951, "step": 21440 }, { "epoch": 6.42, "grad_norm": 1.6942447423934937, "learning_rate": 3.8348808842545106e-05, "loss": 1.0946, "step": 21445 }, { "epoch": 6.42, "grad_norm": 1.3976869583129883, "learning_rate": 3.8343840897754845e-05, "loss": 1.2452, "step": 21450 }, { "epoch": 6.42, "grad_norm": 3.386245012283325, "learning_rate": 3.833887221599812e-05, "loss": 1.3216, "step": 21455 }, { "epoch": 6.42, "grad_norm": 1.7361987829208374, "learning_rate": 3.833390279754935e-05, "loss": 1.2033, "step": 21460 }, { "epoch": 6.42, "grad_norm": 4.732967853546143, "learning_rate": 3.832893264268299e-05, "loss": 1.202, "step": 21465 }, { "epoch": 6.42, "grad_norm": 11.390531539916992, "learning_rate": 3.8323961751673545e-05, "loss": 1.1177, "step": 21470 }, { "epoch": 6.43, "grad_norm": 1.5871869325637817, "learning_rate": 3.831899012479553e-05, "loss": 1.1747, "step": 21475 }, { "epoch": 6.43, "grad_norm": 2.522251605987549, "learning_rate": 3.8314017762323526e-05, "loss": 1.0981, "step": 21480 }, { "epoch": 6.43, "grad_norm": 2.3659377098083496, "learning_rate": 3.830904466453218e-05, "loss": 1.1489, "step": 21485 }, { "epoch": 6.43, "grad_norm": 2.8032987117767334, "learning_rate": 3.830407083169612e-05, "loss": 1.1038, "step": 21490 }, { "epoch": 6.43, "grad_norm": 3.808497428894043, "learning_rate": 3.829909626409006e-05, "loss": 1.1027, "step": 21495 }, { "epoch": 6.43, "grad_norm": 2.006352663040161, "learning_rate": 3.829412096198873e-05, "loss": 1.3041, "step": 21500 }, { "epoch": 6.43, "grad_norm": 2.3574745655059814, "learning_rate": 3.828914492566693e-05, "loss": 1.1786, "step": 21505 }, { "epoch": 6.44, "grad_norm": 2.2047548294067383, "learning_rate": 3.828416815539946e-05, "loss": 1.2032, "step": 21510 }, { "epoch": 6.44, "grad_norm": 3.6130099296569824, "learning_rate": 3.8279190651461195e-05, "loss": 1.0937, "step": 21515 }, { "epoch": 6.44, "grad_norm": 2.250490427017212, "learning_rate": 3.827421241412703e-05, "loss": 1.3764, "step": 21520 }, { "epoch": 6.44, "grad_norm": 2.148017168045044, "learning_rate": 3.8269233443671914e-05, "loss": 1.2094, "step": 21525 }, { "epoch": 6.44, "grad_norm": 3.4997782707214355, "learning_rate": 3.826425374037083e-05, "loss": 1.1462, "step": 21530 }, { "epoch": 6.44, "grad_norm": 1.9769136905670166, "learning_rate": 3.825927330449879e-05, "loss": 1.0678, "step": 21535 }, { "epoch": 6.44, "grad_norm": 1.7285351753234863, "learning_rate": 3.825429213633087e-05, "loss": 1.3424, "step": 21540 }, { "epoch": 6.45, "grad_norm": 2.1386027336120605, "learning_rate": 3.8249310236142175e-05, "loss": 1.0702, "step": 21545 }, { "epoch": 6.45, "grad_norm": 4.701780319213867, "learning_rate": 3.8244327604207856e-05, "loss": 1.2914, "step": 21550 }, { "epoch": 6.45, "grad_norm": 0.979941725730896, "learning_rate": 3.8239344240803077e-05, "loss": 1.313, "step": 21555 }, { "epoch": 6.45, "grad_norm": 2.4116852283477783, "learning_rate": 3.823436014620308e-05, "loss": 1.1429, "step": 21560 }, { "epoch": 6.45, "grad_norm": 2.062809705734253, "learning_rate": 3.822937532068314e-05, "loss": 1.064, "step": 21565 }, { "epoch": 6.45, "grad_norm": 0.9960473775863647, "learning_rate": 3.822438976451854e-05, "loss": 1.3403, "step": 21570 }, { "epoch": 6.45, "grad_norm": 0.9557723999023438, "learning_rate": 3.821940347798464e-05, "loss": 1.2886, "step": 21575 }, { "epoch": 6.46, "grad_norm": 1.1672139167785645, "learning_rate": 3.8214416461356825e-05, "loss": 1.0831, "step": 21580 }, { "epoch": 6.46, "grad_norm": 1.0356839895248413, "learning_rate": 3.8209428714910536e-05, "loss": 1.2411, "step": 21585 }, { "epoch": 6.46, "grad_norm": 2.4075944423675537, "learning_rate": 3.820444023892122e-05, "loss": 1.3309, "step": 21590 }, { "epoch": 6.46, "grad_norm": 3.5480706691741943, "learning_rate": 3.8199451033664395e-05, "loss": 1.214, "step": 21595 }, { "epoch": 6.46, "grad_norm": 14.751591682434082, "learning_rate": 3.8194461099415615e-05, "loss": 1.1567, "step": 21600 }, { "epoch": 6.46, "grad_norm": 2.6197705268859863, "learning_rate": 3.818947043645046e-05, "loss": 1.3019, "step": 21605 }, { "epoch": 6.47, "grad_norm": 3.7465360164642334, "learning_rate": 3.818447904504456e-05, "loss": 1.0903, "step": 21610 }, { "epoch": 6.47, "grad_norm": 1.5686691999435425, "learning_rate": 3.817948692547358e-05, "loss": 1.1012, "step": 21615 }, { "epoch": 6.47, "grad_norm": 1.792001485824585, "learning_rate": 3.8174494078013254e-05, "loss": 1.2688, "step": 21620 }, { "epoch": 6.47, "grad_norm": 1.9300975799560547, "learning_rate": 3.8169500502939305e-05, "loss": 1.0274, "step": 21625 }, { "epoch": 6.47, "grad_norm": 2.6718950271606445, "learning_rate": 3.816450620052754e-05, "loss": 1.2466, "step": 21630 }, { "epoch": 6.47, "grad_norm": 1.7628939151763916, "learning_rate": 3.8159511171053767e-05, "loss": 1.1826, "step": 21635 }, { "epoch": 6.47, "grad_norm": 3.9780194759368896, "learning_rate": 3.8154515414793874e-05, "loss": 1.0958, "step": 21640 }, { "epoch": 6.48, "grad_norm": 1.881233811378479, "learning_rate": 3.814951893202377e-05, "loss": 1.0608, "step": 21645 }, { "epoch": 6.48, "grad_norm": 2.578082323074341, "learning_rate": 3.814452172301941e-05, "loss": 1.1508, "step": 21650 }, { "epoch": 6.48, "grad_norm": 4.1634721755981445, "learning_rate": 3.813952378805677e-05, "loss": 1.1515, "step": 21655 }, { "epoch": 6.48, "grad_norm": 2.9157748222351074, "learning_rate": 3.8134525127411896e-05, "loss": 1.2368, "step": 21660 }, { "epoch": 6.48, "grad_norm": 4.483683109283447, "learning_rate": 3.812952574136085e-05, "loss": 1.0416, "step": 21665 }, { "epoch": 6.48, "grad_norm": 2.9984755516052246, "learning_rate": 3.812452563017974e-05, "loss": 1.0572, "step": 21670 }, { "epoch": 6.48, "grad_norm": 1.295201301574707, "learning_rate": 3.8119524794144724e-05, "loss": 1.0067, "step": 21675 }, { "epoch": 6.49, "grad_norm": 2.1043081283569336, "learning_rate": 3.811452323353199e-05, "loss": 1.2859, "step": 21680 }, { "epoch": 6.49, "grad_norm": 1.3787553310394287, "learning_rate": 3.810952094861777e-05, "loss": 1.1761, "step": 21685 }, { "epoch": 6.49, "grad_norm": 1.4187425374984741, "learning_rate": 3.810451793967834e-05, "loss": 1.2269, "step": 21690 }, { "epoch": 6.49, "grad_norm": 4.034739017486572, "learning_rate": 3.809951420699e-05, "loss": 1.0818, "step": 21695 }, { "epoch": 6.49, "grad_norm": 1.0711634159088135, "learning_rate": 3.809450975082911e-05, "loss": 1.1358, "step": 21700 }, { "epoch": 6.49, "grad_norm": 7.229355335235596, "learning_rate": 3.808950457147205e-05, "loss": 1.3021, "step": 21705 }, { "epoch": 6.5, "grad_norm": 1.897261619567871, "learning_rate": 3.808449866919527e-05, "loss": 1.2492, "step": 21710 }, { "epoch": 6.5, "grad_norm": 2.2863614559173584, "learning_rate": 3.807949204427522e-05, "loss": 1.0721, "step": 21715 }, { "epoch": 6.5, "grad_norm": 2.1453750133514404, "learning_rate": 3.807448469698842e-05, "loss": 1.1794, "step": 21720 }, { "epoch": 6.5, "grad_norm": 0.794268012046814, "learning_rate": 3.806947662761142e-05, "loss": 1.3931, "step": 21725 }, { "epoch": 6.5, "grad_norm": 1.1213440895080566, "learning_rate": 3.8064467836420815e-05, "loss": 1.2736, "step": 21730 }, { "epoch": 6.5, "grad_norm": 4.008541584014893, "learning_rate": 3.805945832369322e-05, "loss": 1.247, "step": 21735 }, { "epoch": 6.5, "grad_norm": 1.5506445169448853, "learning_rate": 3.805444808970533e-05, "loss": 1.024, "step": 21740 }, { "epoch": 6.51, "grad_norm": 1.3730429410934448, "learning_rate": 3.8049437134733834e-05, "loss": 1.2982, "step": 21745 }, { "epoch": 6.51, "grad_norm": 1.3284574747085571, "learning_rate": 3.80444254590555e-05, "loss": 1.2488, "step": 21750 }, { "epoch": 6.51, "grad_norm": 3.1834280490875244, "learning_rate": 3.80394130629471e-05, "loss": 1.1852, "step": 21755 }, { "epoch": 6.51, "grad_norm": 1.5427137613296509, "learning_rate": 3.8034399946685466e-05, "loss": 1.2332, "step": 21760 }, { "epoch": 6.51, "grad_norm": 2.425147533416748, "learning_rate": 3.802938611054747e-05, "loss": 1.1036, "step": 21765 }, { "epoch": 6.51, "grad_norm": 16.511322021484375, "learning_rate": 3.802437155481003e-05, "loss": 1.0699, "step": 21770 }, { "epoch": 6.51, "grad_norm": 1.3702104091644287, "learning_rate": 3.801935627975008e-05, "loss": 1.2717, "step": 21775 }, { "epoch": 6.52, "grad_norm": 2.96932315826416, "learning_rate": 3.8014340285644625e-05, "loss": 1.2624, "step": 21780 }, { "epoch": 6.52, "grad_norm": 1.8278169631958008, "learning_rate": 3.8009323572770684e-05, "loss": 1.1947, "step": 21785 }, { "epoch": 6.52, "grad_norm": 1.3895366191864014, "learning_rate": 3.8004306141405314e-05, "loss": 1.1731, "step": 21790 }, { "epoch": 6.52, "grad_norm": 2.925675868988037, "learning_rate": 3.799928799182564e-05, "loss": 1.22, "step": 21795 }, { "epoch": 6.52, "grad_norm": 1.1851224899291992, "learning_rate": 3.79942691243088e-05, "loss": 1.0118, "step": 21800 }, { "epoch": 6.52, "grad_norm": 2.040398120880127, "learning_rate": 3.7989249539131984e-05, "loss": 1.0473, "step": 21805 }, { "epoch": 6.53, "grad_norm": 3.938803195953369, "learning_rate": 3.798422923657241e-05, "loss": 1.0397, "step": 21810 }, { "epoch": 6.53, "grad_norm": 2.930795907974243, "learning_rate": 3.797920821690736e-05, "loss": 1.1349, "step": 21815 }, { "epoch": 6.53, "grad_norm": 2.4224655628204346, "learning_rate": 3.797418648041413e-05, "loss": 1.2474, "step": 21820 }, { "epoch": 6.53, "grad_norm": 2.1420652866363525, "learning_rate": 3.796916402737007e-05, "loss": 1.1601, "step": 21825 }, { "epoch": 6.53, "grad_norm": 2.40444016456604, "learning_rate": 3.7964140858052555e-05, "loss": 1.252, "step": 21830 }, { "epoch": 6.53, "grad_norm": 3.1540682315826416, "learning_rate": 3.795911697273902e-05, "loss": 1.2526, "step": 21835 }, { "epoch": 6.53, "grad_norm": 1.9476728439331055, "learning_rate": 3.7954092371706925e-05, "loss": 1.0338, "step": 21840 }, { "epoch": 6.54, "grad_norm": 1.7248507738113403, "learning_rate": 3.7949067055233774e-05, "loss": 1.1222, "step": 21845 }, { "epoch": 6.54, "grad_norm": 3.3292136192321777, "learning_rate": 3.7944041023597105e-05, "loss": 1.1172, "step": 21850 }, { "epoch": 6.54, "grad_norm": 2.2748050689697266, "learning_rate": 3.793901427707451e-05, "loss": 1.1926, "step": 21855 }, { "epoch": 6.54, "grad_norm": 1.3561511039733887, "learning_rate": 3.79339868159436e-05, "loss": 1.2057, "step": 21860 }, { "epoch": 6.54, "grad_norm": 4.775590896606445, "learning_rate": 3.7928958640482046e-05, "loss": 1.112, "step": 21865 }, { "epoch": 6.54, "grad_norm": 1.4836394786834717, "learning_rate": 3.792392975096754e-05, "loss": 1.3004, "step": 21870 }, { "epoch": 6.54, "grad_norm": 1.8242735862731934, "learning_rate": 3.791890014767783e-05, "loss": 1.3185, "step": 21875 }, { "epoch": 6.55, "grad_norm": 3.001962661743164, "learning_rate": 3.79138698308907e-05, "loss": 1.082, "step": 21880 }, { "epoch": 6.55, "grad_norm": 1.9447215795516968, "learning_rate": 3.790883880088396e-05, "loss": 1.3185, "step": 21885 }, { "epoch": 6.55, "grad_norm": 2.3919167518615723, "learning_rate": 3.790380705793547e-05, "loss": 1.2078, "step": 21890 }, { "epoch": 6.55, "grad_norm": 6.295918941497803, "learning_rate": 3.789877460232313e-05, "loss": 0.922, "step": 21895 }, { "epoch": 6.55, "grad_norm": 2.696068525314331, "learning_rate": 3.789374143432487e-05, "loss": 1.3105, "step": 21900 }, { "epoch": 6.55, "grad_norm": 3.568495035171509, "learning_rate": 3.788870755421867e-05, "loss": 1.128, "step": 21905 }, { "epoch": 6.56, "grad_norm": 1.2198554277420044, "learning_rate": 3.7883672962282555e-05, "loss": 1.2149, "step": 21910 }, { "epoch": 6.56, "grad_norm": 5.560957908630371, "learning_rate": 3.787863765879458e-05, "loss": 1.1389, "step": 21915 }, { "epoch": 6.56, "grad_norm": 3.2426624298095703, "learning_rate": 3.787360164403283e-05, "loss": 1.0907, "step": 21920 }, { "epoch": 6.56, "grad_norm": 2.8648648262023926, "learning_rate": 3.786856491827544e-05, "loss": 0.966, "step": 21925 }, { "epoch": 6.56, "grad_norm": 1.9964818954467773, "learning_rate": 3.786352748180059e-05, "loss": 1.2, "step": 21930 }, { "epoch": 6.56, "grad_norm": 1.6703797578811646, "learning_rate": 3.7858489334886477e-05, "loss": 1.2223, "step": 21935 }, { "epoch": 6.56, "grad_norm": 2.2925643920898438, "learning_rate": 3.785345047781137e-05, "loss": 1.3513, "step": 21940 }, { "epoch": 6.57, "grad_norm": 1.0569705963134766, "learning_rate": 3.784841091085356e-05, "loss": 1.1399, "step": 21945 }, { "epoch": 6.57, "grad_norm": 3.3244879245758057, "learning_rate": 3.784337063429136e-05, "loss": 1.3942, "step": 21950 }, { "epoch": 6.57, "grad_norm": 3.7458391189575195, "learning_rate": 3.783832964840316e-05, "loss": 1.1377, "step": 21955 }, { "epoch": 6.57, "grad_norm": 1.8675755262374878, "learning_rate": 3.7833287953467354e-05, "loss": 1.249, "step": 21960 }, { "epoch": 6.57, "grad_norm": 2.4439284801483154, "learning_rate": 3.782824554976239e-05, "loss": 1.2981, "step": 21965 }, { "epoch": 6.57, "grad_norm": 5.8578877449035645, "learning_rate": 3.7823202437566764e-05, "loss": 1.2135, "step": 21970 }, { "epoch": 6.57, "grad_norm": 1.4064592123031616, "learning_rate": 3.7818158617158995e-05, "loss": 1.3478, "step": 21975 }, { "epoch": 6.58, "grad_norm": 2.7772834300994873, "learning_rate": 3.781311408881765e-05, "loss": 1.2668, "step": 21980 }, { "epoch": 6.58, "grad_norm": 2.9502522945404053, "learning_rate": 3.7808068852821334e-05, "loss": 1.1482, "step": 21985 }, { "epoch": 6.58, "grad_norm": 4.143762588500977, "learning_rate": 3.780302290944868e-05, "loss": 1.1882, "step": 21990 }, { "epoch": 6.58, "grad_norm": 5.523019313812256, "learning_rate": 3.7797976258978386e-05, "loss": 1.3205, "step": 21995 }, { "epoch": 6.58, "grad_norm": 1.1239092350006104, "learning_rate": 3.779292890168916e-05, "loss": 1.3228, "step": 22000 }, { "epoch": 6.58, "grad_norm": 2.243865966796875, "learning_rate": 3.7787880837859767e-05, "loss": 1.3075, "step": 22005 }, { "epoch": 6.59, "grad_norm": 2.5616250038146973, "learning_rate": 3.778283206776902e-05, "loss": 1.2133, "step": 22010 }, { "epoch": 6.59, "grad_norm": 3.7556142807006836, "learning_rate": 3.777778259169574e-05, "loss": 1.1166, "step": 22015 }, { "epoch": 6.59, "grad_norm": 2.411454439163208, "learning_rate": 3.77727324099188e-05, "loss": 1.2067, "step": 22020 }, { "epoch": 6.59, "grad_norm": 7.664334297180176, "learning_rate": 3.776768152271713e-05, "loss": 1.0729, "step": 22025 }, { "epoch": 6.59, "grad_norm": 1.7373452186584473, "learning_rate": 3.776262993036969e-05, "loss": 1.1381, "step": 22030 }, { "epoch": 6.59, "grad_norm": 1.6284407377243042, "learning_rate": 3.7757577633155464e-05, "loss": 1.1592, "step": 22035 }, { "epoch": 6.59, "grad_norm": 1.5111100673675537, "learning_rate": 3.7752524631353484e-05, "loss": 1.1819, "step": 22040 }, { "epoch": 6.6, "grad_norm": 2.4430501461029053, "learning_rate": 3.7747470925242815e-05, "loss": 1.3295, "step": 22045 }, { "epoch": 6.6, "grad_norm": 1.4049407243728638, "learning_rate": 3.774241651510258e-05, "loss": 1.1802, "step": 22050 }, { "epoch": 6.6, "grad_norm": 1.4053436517715454, "learning_rate": 3.773736140121193e-05, "loss": 1.2387, "step": 22055 }, { "epoch": 6.6, "grad_norm": 1.9086112976074219, "learning_rate": 3.7732305583850044e-05, "loss": 1.1988, "step": 22060 }, { "epoch": 6.6, "grad_norm": 2.53025221824646, "learning_rate": 3.772724906329616e-05, "loss": 1.1728, "step": 22065 }, { "epoch": 6.6, "grad_norm": 2.8648312091827393, "learning_rate": 3.772219183982953e-05, "loss": 1.0513, "step": 22070 }, { "epoch": 6.6, "grad_norm": 1.3704460859298706, "learning_rate": 3.771713391372949e-05, "loss": 1.2856, "step": 22075 }, { "epoch": 6.61, "grad_norm": 5.734248161315918, "learning_rate": 3.771207528527534e-05, "loss": 1.1455, "step": 22080 }, { "epoch": 6.61, "grad_norm": 2.4370834827423096, "learning_rate": 3.77070159547465e-05, "loss": 1.1426, "step": 22085 }, { "epoch": 6.61, "grad_norm": 13.942612648010254, "learning_rate": 3.770195592242237e-05, "loss": 1.2256, "step": 22090 }, { "epoch": 6.61, "grad_norm": 2.1054158210754395, "learning_rate": 3.769689518858242e-05, "loss": 1.2302, "step": 22095 }, { "epoch": 6.61, "grad_norm": 2.0124964714050293, "learning_rate": 3.7691833753506145e-05, "loss": 1.2412, "step": 22100 }, { "epoch": 6.61, "grad_norm": 2.5039613246917725, "learning_rate": 3.7686771617473094e-05, "loss": 1.0837, "step": 22105 }, { "epoch": 6.62, "grad_norm": 1.1294031143188477, "learning_rate": 3.768170878076283e-05, "loss": 1.2652, "step": 22110 }, { "epoch": 6.62, "grad_norm": 1.8122518062591553, "learning_rate": 3.767664524365496e-05, "loss": 1.3259, "step": 22115 }, { "epoch": 6.62, "grad_norm": 2.7276618480682373, "learning_rate": 3.767158100642916e-05, "loss": 1.0632, "step": 22120 }, { "epoch": 6.62, "grad_norm": 6.643666744232178, "learning_rate": 3.766651606936511e-05, "loss": 1.1819, "step": 22125 }, { "epoch": 6.62, "grad_norm": 1.3447577953338623, "learning_rate": 3.7661450432742534e-05, "loss": 1.3445, "step": 22130 }, { "epoch": 6.62, "grad_norm": 4.023565769195557, "learning_rate": 3.7656384096841226e-05, "loss": 1.1085, "step": 22135 }, { "epoch": 6.62, "grad_norm": 2.0256614685058594, "learning_rate": 3.7651317061940975e-05, "loss": 1.2472, "step": 22140 }, { "epoch": 6.63, "grad_norm": 1.7763872146606445, "learning_rate": 3.764624932832163e-05, "loss": 1.1751, "step": 22145 }, { "epoch": 6.63, "grad_norm": 2.526588201522827, "learning_rate": 3.7641180896263085e-05, "loss": 1.0481, "step": 22150 }, { "epoch": 6.63, "grad_norm": 2.3050858974456787, "learning_rate": 3.7636111766045254e-05, "loss": 1.1158, "step": 22155 }, { "epoch": 6.63, "grad_norm": 3.152416944503784, "learning_rate": 3.76310419379481e-05, "loss": 1.188, "step": 22160 }, { "epoch": 6.63, "grad_norm": 2.187168598175049, "learning_rate": 3.7625971412251636e-05, "loss": 1.2462, "step": 22165 }, { "epoch": 6.63, "grad_norm": 1.528957486152649, "learning_rate": 3.76209001892359e-05, "loss": 1.0216, "step": 22170 }, { "epoch": 6.63, "grad_norm": 2.985142230987549, "learning_rate": 3.7615828269180955e-05, "loss": 1.1851, "step": 22175 }, { "epoch": 6.64, "grad_norm": 1.8914645910263062, "learning_rate": 3.7610755652366933e-05, "loss": 1.1384, "step": 22180 }, { "epoch": 6.64, "grad_norm": 1.485687017440796, "learning_rate": 3.7605682339073986e-05, "loss": 1.1725, "step": 22185 }, { "epoch": 6.64, "grad_norm": 2.7884113788604736, "learning_rate": 3.76006083295823e-05, "loss": 1.0369, "step": 22190 }, { "epoch": 6.64, "grad_norm": 2.6795337200164795, "learning_rate": 3.759553362417212e-05, "loss": 1.2506, "step": 22195 }, { "epoch": 6.64, "grad_norm": 3.732194662094116, "learning_rate": 3.759045822312371e-05, "loss": 1.2024, "step": 22200 }, { "epoch": 6.64, "grad_norm": 2.955843210220337, "learning_rate": 3.758538212671738e-05, "loss": 1.092, "step": 22205 }, { "epoch": 6.64, "grad_norm": 2.543309450149536, "learning_rate": 3.7580305335233466e-05, "loss": 1.1417, "step": 22210 }, { "epoch": 6.65, "grad_norm": 1.1416964530944824, "learning_rate": 3.757522784895238e-05, "loss": 1.2911, "step": 22215 }, { "epoch": 6.65, "grad_norm": 2.8476202487945557, "learning_rate": 3.757014966815452e-05, "loss": 1.1666, "step": 22220 }, { "epoch": 6.65, "grad_norm": 3.7398064136505127, "learning_rate": 3.7565070793120366e-05, "loss": 1.2501, "step": 22225 }, { "epoch": 6.65, "grad_norm": 1.3023401498794556, "learning_rate": 3.755999122413041e-05, "loss": 1.0223, "step": 22230 }, { "epoch": 6.65, "grad_norm": 3.140843391418457, "learning_rate": 3.7554910961465204e-05, "loss": 1.1367, "step": 22235 }, { "epoch": 6.65, "grad_norm": 1.410656452178955, "learning_rate": 3.75498300054053e-05, "loss": 1.227, "step": 22240 }, { "epoch": 6.66, "grad_norm": 3.442096710205078, "learning_rate": 3.754474835623134e-05, "loss": 1.0639, "step": 22245 }, { "epoch": 6.66, "grad_norm": 2.1821417808532715, "learning_rate": 3.753966601422396e-05, "loss": 1.2744, "step": 22250 }, { "epoch": 6.66, "grad_norm": 1.2946699857711792, "learning_rate": 3.753458297966387e-05, "loss": 1.1973, "step": 22255 }, { "epoch": 6.66, "grad_norm": 1.561705231666565, "learning_rate": 3.752949925283178e-05, "loss": 1.1897, "step": 22260 }, { "epoch": 6.66, "grad_norm": 1.594902515411377, "learning_rate": 3.7524414834008475e-05, "loss": 1.1862, "step": 22265 }, { "epoch": 6.66, "grad_norm": 2.135448932647705, "learning_rate": 3.751932972347476e-05, "loss": 1.2566, "step": 22270 }, { "epoch": 6.66, "grad_norm": 4.568878650665283, "learning_rate": 3.751424392151147e-05, "loss": 0.9779, "step": 22275 }, { "epoch": 6.67, "grad_norm": 1.9273889064788818, "learning_rate": 3.75091574283995e-05, "loss": 1.2662, "step": 22280 }, { "epoch": 6.67, "grad_norm": 2.1159157752990723, "learning_rate": 3.750407024441977e-05, "loss": 1.1524, "step": 22285 }, { "epoch": 6.67, "grad_norm": 2.8787682056427, "learning_rate": 3.7498982369853235e-05, "loss": 1.1924, "step": 22290 }, { "epoch": 6.67, "grad_norm": 3.1426780223846436, "learning_rate": 3.7493893804980897e-05, "loss": 0.9211, "step": 22295 }, { "epoch": 6.67, "grad_norm": 2.8271982669830322, "learning_rate": 3.748880455008379e-05, "loss": 1.3036, "step": 22300 }, { "epoch": 6.67, "grad_norm": 3.4877443313598633, "learning_rate": 3.7483714605442996e-05, "loss": 1.1612, "step": 22305 }, { "epoch": 6.67, "grad_norm": 1.2759697437286377, "learning_rate": 3.747862397133961e-05, "loss": 1.2321, "step": 22310 }, { "epoch": 6.68, "grad_norm": 2.5716593265533447, "learning_rate": 3.74735326480548e-05, "loss": 1.0509, "step": 22315 }, { "epoch": 6.68, "grad_norm": 1.8768802881240845, "learning_rate": 3.7468440635869746e-05, "loss": 1.2344, "step": 22320 }, { "epoch": 6.68, "grad_norm": 3.4969723224639893, "learning_rate": 3.746334793506567e-05, "loss": 1.1697, "step": 22325 }, { "epoch": 6.68, "grad_norm": 2.126424551010132, "learning_rate": 3.7458254545923855e-05, "loss": 1.2722, "step": 22330 }, { "epoch": 6.68, "grad_norm": 2.6972811222076416, "learning_rate": 3.745316046872558e-05, "loss": 1.3685, "step": 22335 }, { "epoch": 6.68, "grad_norm": 1.37725830078125, "learning_rate": 3.74480657037522e-05, "loss": 1.2091, "step": 22340 }, { "epoch": 6.69, "grad_norm": 1.036205768585205, "learning_rate": 3.7442970251285095e-05, "loss": 1.3278, "step": 22345 }, { "epoch": 6.69, "grad_norm": 1.2511407136917114, "learning_rate": 3.743787411160567e-05, "loss": 1.1185, "step": 22350 }, { "epoch": 6.69, "grad_norm": 2.403078079223633, "learning_rate": 3.7432777284995384e-05, "loss": 1.187, "step": 22355 }, { "epoch": 6.69, "grad_norm": 2.58774471282959, "learning_rate": 3.742767977173573e-05, "loss": 1.2784, "step": 22360 }, { "epoch": 6.69, "grad_norm": 1.8022263050079346, "learning_rate": 3.742258157210825e-05, "loss": 1.1472, "step": 22365 }, { "epoch": 6.69, "grad_norm": 1.9385359287261963, "learning_rate": 3.74174826863945e-05, "loss": 1.0361, "step": 22370 }, { "epoch": 6.69, "grad_norm": 1.3857041597366333, "learning_rate": 3.741238311487609e-05, "loss": 1.2892, "step": 22375 }, { "epoch": 6.7, "grad_norm": 1.456871747970581, "learning_rate": 3.740728285783466e-05, "loss": 1.251, "step": 22380 }, { "epoch": 6.7, "grad_norm": 1.2048922777175903, "learning_rate": 3.740218191555189e-05, "loss": 1.2085, "step": 22385 }, { "epoch": 6.7, "grad_norm": 1.9001728296279907, "learning_rate": 3.7397080288309504e-05, "loss": 1.1827, "step": 22390 }, { "epoch": 6.7, "grad_norm": 1.7842788696289062, "learning_rate": 3.739197797638927e-05, "loss": 1.1712, "step": 22395 }, { "epoch": 6.7, "grad_norm": 2.8742237091064453, "learning_rate": 3.7386874980072965e-05, "loss": 1.346, "step": 22400 }, { "epoch": 6.7, "grad_norm": 0.9042024612426758, "learning_rate": 3.7381771299642434e-05, "loss": 1.1113, "step": 22405 }, { "epoch": 6.7, "grad_norm": 2.389392852783203, "learning_rate": 3.737666693537954e-05, "loss": 1.2051, "step": 22410 }, { "epoch": 6.71, "grad_norm": 1.8585515022277832, "learning_rate": 3.73715618875662e-05, "loss": 1.0961, "step": 22415 }, { "epoch": 6.71, "grad_norm": 1.445894479751587, "learning_rate": 3.736645615648435e-05, "loss": 1.205, "step": 22420 }, { "epoch": 6.71, "grad_norm": 1.4859192371368408, "learning_rate": 3.736134974241599e-05, "loss": 1.2583, "step": 22425 }, { "epoch": 6.71, "grad_norm": 1.2996731996536255, "learning_rate": 3.735624264564313e-05, "loss": 1.1439, "step": 22430 }, { "epoch": 6.71, "grad_norm": 2.0021395683288574, "learning_rate": 3.735113486644783e-05, "loss": 1.153, "step": 22435 }, { "epoch": 6.71, "grad_norm": 1.7902204990386963, "learning_rate": 3.734602640511219e-05, "loss": 1.0953, "step": 22440 }, { "epoch": 6.72, "grad_norm": 2.417541027069092, "learning_rate": 3.734091726191834e-05, "loss": 1.2148, "step": 22445 }, { "epoch": 6.72, "grad_norm": 3.7760798931121826, "learning_rate": 3.7335807437148454e-05, "loss": 1.1157, "step": 22450 }, { "epoch": 6.72, "grad_norm": 1.5918861627578735, "learning_rate": 3.733069693108476e-05, "loss": 1.2129, "step": 22455 }, { "epoch": 6.72, "grad_norm": 0.9340783953666687, "learning_rate": 3.732558574400948e-05, "loss": 1.2252, "step": 22460 }, { "epoch": 6.72, "grad_norm": 1.9744924306869507, "learning_rate": 3.732047387620492e-05, "loss": 1.1734, "step": 22465 }, { "epoch": 6.72, "grad_norm": 1.9062458276748657, "learning_rate": 3.731536132795339e-05, "loss": 1.1843, "step": 22470 }, { "epoch": 6.72, "grad_norm": 2.320115566253662, "learning_rate": 3.731024809953726e-05, "loss": 1.0732, "step": 22475 }, { "epoch": 6.73, "grad_norm": 2.1954009532928467, "learning_rate": 3.7305134191238914e-05, "loss": 1.195, "step": 22480 }, { "epoch": 6.73, "grad_norm": 2.1128578186035156, "learning_rate": 3.73000196033408e-05, "loss": 1.1004, "step": 22485 }, { "epoch": 6.73, "grad_norm": 2.868645191192627, "learning_rate": 3.729490433612539e-05, "loss": 1.1917, "step": 22490 }, { "epoch": 6.73, "grad_norm": 4.787483215332031, "learning_rate": 3.7289788389875195e-05, "loss": 1.1271, "step": 22495 }, { "epoch": 6.73, "grad_norm": 1.8350549936294556, "learning_rate": 3.7284671764872764e-05, "loss": 1.1971, "step": 22500 }, { "epoch": 6.73, "grad_norm": 1.9952346086502075, "learning_rate": 3.7279554461400675e-05, "loss": 1.2058, "step": 22505 }, { "epoch": 6.73, "grad_norm": 2.3155946731567383, "learning_rate": 3.727443647974156e-05, "loss": 1.0397, "step": 22510 }, { "epoch": 6.74, "grad_norm": 2.454383611679077, "learning_rate": 3.726931782017807e-05, "loss": 1.2128, "step": 22515 }, { "epoch": 6.74, "grad_norm": 1.4196258783340454, "learning_rate": 3.726419848299292e-05, "loss": 1.3271, "step": 22520 }, { "epoch": 6.74, "grad_norm": 4.044848918914795, "learning_rate": 3.725907846846883e-05, "loss": 1.1593, "step": 22525 }, { "epoch": 6.74, "grad_norm": 1.5444008111953735, "learning_rate": 3.725395777688858e-05, "loss": 1.165, "step": 22530 }, { "epoch": 6.74, "grad_norm": 2.297614097595215, "learning_rate": 3.7248836408534975e-05, "loss": 1.259, "step": 22535 }, { "epoch": 6.74, "grad_norm": 1.7064695358276367, "learning_rate": 3.724371436369087e-05, "loss": 1.2259, "step": 22540 }, { "epoch": 6.75, "grad_norm": 1.426712155342102, "learning_rate": 3.7238591642639155e-05, "loss": 1.1047, "step": 22545 }, { "epoch": 6.75, "grad_norm": 2.8516054153442383, "learning_rate": 3.723346824566273e-05, "loss": 1.1642, "step": 22550 }, { "epoch": 6.75, "grad_norm": 1.5631628036499023, "learning_rate": 3.722834417304459e-05, "loss": 1.1772, "step": 22555 }, { "epoch": 6.75, "grad_norm": 1.1760963201522827, "learning_rate": 3.7223219425067694e-05, "loss": 1.2157, "step": 22560 }, { "epoch": 6.75, "grad_norm": 1.5245758295059204, "learning_rate": 3.7218094002015104e-05, "loss": 1.0718, "step": 22565 }, { "epoch": 6.75, "grad_norm": 5.258598327636719, "learning_rate": 3.7212967904169874e-05, "loss": 1.0714, "step": 22570 }, { "epoch": 6.75, "grad_norm": 0.8288945555686951, "learning_rate": 3.720784113181513e-05, "loss": 1.1171, "step": 22575 }, { "epoch": 6.76, "grad_norm": 2.472649097442627, "learning_rate": 3.7202713685234006e-05, "loss": 1.2317, "step": 22580 }, { "epoch": 6.76, "grad_norm": 6.6396565437316895, "learning_rate": 3.719758556470969e-05, "loss": 1.1746, "step": 22585 }, { "epoch": 6.76, "grad_norm": 1.3930727243423462, "learning_rate": 3.71924567705254e-05, "loss": 1.2441, "step": 22590 }, { "epoch": 6.76, "grad_norm": 1.635603666305542, "learning_rate": 3.71873273029644e-05, "loss": 1.4045, "step": 22595 }, { "epoch": 6.76, "grad_norm": 2.988915205001831, "learning_rate": 3.718219716230998e-05, "loss": 1.2706, "step": 22600 }, { "epoch": 6.76, "grad_norm": 1.915685772895813, "learning_rate": 3.717706634884547e-05, "loss": 1.3597, "step": 22605 }, { "epoch": 6.76, "grad_norm": 2.3813059329986572, "learning_rate": 3.7171934862854244e-05, "loss": 1.2544, "step": 22610 }, { "epoch": 6.77, "grad_norm": 3.367213010787964, "learning_rate": 3.716680270461971e-05, "loss": 1.2843, "step": 22615 }, { "epoch": 6.77, "grad_norm": 0.9341633319854736, "learning_rate": 3.7161669874425304e-05, "loss": 1.1621, "step": 22620 }, { "epoch": 6.77, "grad_norm": 2.1656148433685303, "learning_rate": 3.715653637255452e-05, "loss": 1.0864, "step": 22625 }, { "epoch": 6.77, "grad_norm": 2.8741602897644043, "learning_rate": 3.715140219929086e-05, "loss": 1.331, "step": 22630 }, { "epoch": 6.77, "grad_norm": 3.68282151222229, "learning_rate": 3.714626735491789e-05, "loss": 1.2079, "step": 22635 }, { "epoch": 6.77, "grad_norm": 1.4136219024658203, "learning_rate": 3.71411318397192e-05, "loss": 1.168, "step": 22640 }, { "epoch": 6.78, "grad_norm": 2.178769588470459, "learning_rate": 3.7135995653978415e-05, "loss": 1.2211, "step": 22645 }, { "epoch": 6.78, "grad_norm": 2.099884033203125, "learning_rate": 3.713085879797921e-05, "loss": 1.2584, "step": 22650 }, { "epoch": 6.78, "grad_norm": 2.237651824951172, "learning_rate": 3.7125721272005285e-05, "loss": 1.1906, "step": 22655 }, { "epoch": 6.78, "grad_norm": 5.40058708190918, "learning_rate": 3.7120583076340374e-05, "loss": 1.177, "step": 22660 }, { "epoch": 6.78, "grad_norm": 3.936767578125, "learning_rate": 3.711544421126826e-05, "loss": 1.1745, "step": 22665 }, { "epoch": 6.78, "grad_norm": 9.406877517700195, "learning_rate": 3.711030467707275e-05, "loss": 1.1282, "step": 22670 }, { "epoch": 6.78, "grad_norm": 4.18208122253418, "learning_rate": 3.7105164474037706e-05, "loss": 1.3561, "step": 22675 }, { "epoch": 6.79, "grad_norm": 1.8916975259780884, "learning_rate": 3.710002360244701e-05, "loss": 1.2032, "step": 22680 }, { "epoch": 6.79, "grad_norm": 1.6227880716323853, "learning_rate": 3.709488206258459e-05, "loss": 1.1442, "step": 22685 }, { "epoch": 6.79, "grad_norm": 2.2588720321655273, "learning_rate": 3.7089739854734406e-05, "loss": 1.1536, "step": 22690 }, { "epoch": 6.79, "grad_norm": 2.82357120513916, "learning_rate": 3.708459697918045e-05, "loss": 1.1142, "step": 22695 }, { "epoch": 6.79, "grad_norm": 2.484433650970459, "learning_rate": 3.707945343620677e-05, "loss": 1.2712, "step": 22700 }, { "epoch": 6.79, "grad_norm": 3.071495771408081, "learning_rate": 3.7074309226097424e-05, "loss": 1.0004, "step": 22705 }, { "epoch": 6.79, "grad_norm": 1.225174069404602, "learning_rate": 3.706916434913654e-05, "loss": 0.9823, "step": 22710 }, { "epoch": 6.8, "grad_norm": 2.9054837226867676, "learning_rate": 3.7064018805608244e-05, "loss": 1.1946, "step": 22715 }, { "epoch": 6.8, "grad_norm": 2.1783711910247803, "learning_rate": 3.705887259579674e-05, "loss": 1.1034, "step": 22720 }, { "epoch": 6.8, "grad_norm": 7.1069016456604, "learning_rate": 3.7053725719986234e-05, "loss": 1.2409, "step": 22725 }, { "epoch": 6.8, "grad_norm": 1.7012306451797485, "learning_rate": 3.704960774000956e-05, "loss": 1.0688, "step": 22730 }, { "epoch": 6.8, "grad_norm": 2.2616653442382812, "learning_rate": 3.704445966611721e-05, "loss": 1.2547, "step": 22735 }, { "epoch": 6.8, "grad_norm": 0.8807179927825928, "learning_rate": 3.703931092702188e-05, "loss": 1.0968, "step": 22740 }, { "epoch": 6.81, "grad_norm": 3.9191973209381104, "learning_rate": 3.7034161523007905e-05, "loss": 1.008, "step": 22745 }, { "epoch": 6.81, "grad_norm": 2.1995856761932373, "learning_rate": 3.70290114543597e-05, "loss": 1.1851, "step": 22750 }, { "epoch": 6.81, "grad_norm": 2.6970908641815186, "learning_rate": 3.70238607213617e-05, "loss": 1.1741, "step": 22755 }, { "epoch": 6.81, "grad_norm": 1.7191367149353027, "learning_rate": 3.7018709324298364e-05, "loss": 1.3658, "step": 22760 }, { "epoch": 6.81, "grad_norm": 2.820333242416382, "learning_rate": 3.701355726345421e-05, "loss": 0.9673, "step": 22765 }, { "epoch": 6.81, "grad_norm": 3.5425102710723877, "learning_rate": 3.700840453911378e-05, "loss": 1.2862, "step": 22770 }, { "epoch": 6.81, "grad_norm": 2.4167778491973877, "learning_rate": 3.700325115156165e-05, "loss": 1.1087, "step": 22775 }, { "epoch": 6.82, "grad_norm": 1.7471559047698975, "learning_rate": 3.6998097101082424e-05, "loss": 1.2492, "step": 22780 }, { "epoch": 6.82, "grad_norm": 4.646815776824951, "learning_rate": 3.6992942387960766e-05, "loss": 1.2538, "step": 22785 }, { "epoch": 6.82, "grad_norm": 1.7695302963256836, "learning_rate": 3.698778701248137e-05, "loss": 1.0982, "step": 22790 }, { "epoch": 6.82, "grad_norm": 3.9096667766571045, "learning_rate": 3.698263097492896e-05, "loss": 1.1196, "step": 22795 }, { "epoch": 6.82, "grad_norm": 2.3695907592773438, "learning_rate": 3.69774742755883e-05, "loss": 0.9765, "step": 22800 }, { "epoch": 6.82, "grad_norm": 2.3105177879333496, "learning_rate": 3.697231691474419e-05, "loss": 1.1265, "step": 22805 }, { "epoch": 6.82, "grad_norm": 8.283111572265625, "learning_rate": 3.6967158892681456e-05, "loss": 1.1517, "step": 22810 }, { "epoch": 6.83, "grad_norm": 1.0593783855438232, "learning_rate": 3.696200020968497e-05, "loss": 1.2822, "step": 22815 }, { "epoch": 6.83, "grad_norm": 2.067911386489868, "learning_rate": 3.695684086603964e-05, "loss": 1.1125, "step": 22820 }, { "epoch": 6.83, "grad_norm": 7.895202159881592, "learning_rate": 3.695168086203044e-05, "loss": 1.1565, "step": 22825 }, { "epoch": 6.83, "grad_norm": 2.0913262367248535, "learning_rate": 3.694652019794231e-05, "loss": 1.159, "step": 22830 }, { "epoch": 6.83, "grad_norm": 2.38482403755188, "learning_rate": 3.6941358874060295e-05, "loss": 1.2542, "step": 22835 }, { "epoch": 6.83, "grad_norm": 3.0547502040863037, "learning_rate": 3.693619689066944e-05, "loss": 1.3171, "step": 22840 }, { "epoch": 6.83, "grad_norm": 4.048938751220703, "learning_rate": 3.6931034248054834e-05, "loss": 1.0786, "step": 22845 }, { "epoch": 6.84, "grad_norm": 4.187254428863525, "learning_rate": 3.6925870946501615e-05, "loss": 1.1805, "step": 22850 }, { "epoch": 6.84, "grad_norm": 2.219910144805908, "learning_rate": 3.692070698629493e-05, "loss": 1.2842, "step": 22855 }, { "epoch": 6.84, "grad_norm": 3.7236175537109375, "learning_rate": 3.6915542367720005e-05, "loss": 1.2186, "step": 22860 }, { "epoch": 6.84, "grad_norm": 3.3912012577056885, "learning_rate": 3.691037709106205e-05, "loss": 1.0233, "step": 22865 }, { "epoch": 6.84, "grad_norm": 3.296980857849121, "learning_rate": 3.6905211156606347e-05, "loss": 1.2314, "step": 22870 }, { "epoch": 6.84, "grad_norm": 2.4683995246887207, "learning_rate": 3.690004456463821e-05, "loss": 1.3122, "step": 22875 }, { "epoch": 6.85, "grad_norm": 2.9964542388916016, "learning_rate": 3.689487731544298e-05, "loss": 1.2645, "step": 22880 }, { "epoch": 6.85, "grad_norm": 2.661977529525757, "learning_rate": 3.688970940930604e-05, "loss": 0.9723, "step": 22885 }, { "epoch": 6.85, "grad_norm": 1.609717607498169, "learning_rate": 3.6884540846512806e-05, "loss": 1.0939, "step": 22890 }, { "epoch": 6.85, "grad_norm": 2.814666271209717, "learning_rate": 3.687937162734874e-05, "loss": 1.2832, "step": 22895 }, { "epoch": 6.85, "grad_norm": 6.085357666015625, "learning_rate": 3.687420175209933e-05, "loss": 1.3523, "step": 22900 }, { "epoch": 6.85, "grad_norm": 2.151413679122925, "learning_rate": 3.686903122105009e-05, "loss": 1.0956, "step": 22905 }, { "epoch": 6.85, "grad_norm": 1.2115709781646729, "learning_rate": 3.686386003448659e-05, "loss": 0.9451, "step": 22910 }, { "epoch": 6.86, "grad_norm": 1.7795747518539429, "learning_rate": 3.685868819269444e-05, "loss": 1.272, "step": 22915 }, { "epoch": 6.86, "grad_norm": 1.4842565059661865, "learning_rate": 3.6853515695959264e-05, "loss": 1.3025, "step": 22920 }, { "epoch": 6.86, "grad_norm": 1.450868010520935, "learning_rate": 3.684834254456674e-05, "loss": 1.2952, "step": 22925 }, { "epoch": 6.86, "grad_norm": 2.6749181747436523, "learning_rate": 3.684316873880257e-05, "loss": 1.2248, "step": 22930 }, { "epoch": 6.86, "grad_norm": 1.4082355499267578, "learning_rate": 3.6837994278952506e-05, "loss": 1.2363, "step": 22935 }, { "epoch": 6.86, "grad_norm": 1.7880902290344238, "learning_rate": 3.6832819165302325e-05, "loss": 1.1894, "step": 22940 }, { "epoch": 6.86, "grad_norm": 2.6306281089782715, "learning_rate": 3.682764339813783e-05, "loss": 1.1545, "step": 22945 }, { "epoch": 6.87, "grad_norm": 4.578315734863281, "learning_rate": 3.682246697774489e-05, "loss": 1.0853, "step": 22950 }, { "epoch": 6.87, "grad_norm": 1.6041889190673828, "learning_rate": 3.6817289904409394e-05, "loss": 1.2719, "step": 22955 }, { "epoch": 6.87, "grad_norm": 4.389896869659424, "learning_rate": 3.6812112178417244e-05, "loss": 1.12, "step": 22960 }, { "epoch": 6.87, "grad_norm": 3.254216432571411, "learning_rate": 3.6806933800054425e-05, "loss": 1.2594, "step": 22965 }, { "epoch": 6.87, "grad_norm": 1.4171181917190552, "learning_rate": 3.680175476960693e-05, "loss": 1.2488, "step": 22970 }, { "epoch": 6.87, "grad_norm": 8.962324142456055, "learning_rate": 3.679657508736078e-05, "loss": 1.1462, "step": 22975 }, { "epoch": 6.88, "grad_norm": 4.2052321434021, "learning_rate": 3.6791394753602054e-05, "loss": 1.241, "step": 22980 }, { "epoch": 6.88, "grad_norm": 1.6290329694747925, "learning_rate": 3.678621376861685e-05, "loss": 1.2826, "step": 22985 }, { "epoch": 6.88, "grad_norm": 2.770118236541748, "learning_rate": 3.678103213269131e-05, "loss": 1.3042, "step": 22990 }, { "epoch": 6.88, "grad_norm": 2.3370397090911865, "learning_rate": 3.6775849846111614e-05, "loss": 1.1647, "step": 22995 }, { "epoch": 6.88, "grad_norm": 2.9405107498168945, "learning_rate": 3.677066690916397e-05, "loss": 1.231, "step": 23000 }, { "epoch": 6.88, "grad_norm": 3.535926580429077, "learning_rate": 3.6765483322134625e-05, "loss": 1.1303, "step": 23005 }, { "epoch": 6.88, "grad_norm": 1.9250695705413818, "learning_rate": 3.676029908530987e-05, "loss": 1.2256, "step": 23010 }, { "epoch": 6.89, "grad_norm": 1.7612509727478027, "learning_rate": 3.6755114198976016e-05, "loss": 1.061, "step": 23015 }, { "epoch": 6.89, "grad_norm": 2.3679986000061035, "learning_rate": 3.674992866341943e-05, "loss": 1.1068, "step": 23020 }, { "epoch": 6.89, "grad_norm": 1.4026302099227905, "learning_rate": 3.6744742478926485e-05, "loss": 1.1352, "step": 23025 }, { "epoch": 6.89, "grad_norm": 2.6781442165374756, "learning_rate": 3.673955564578364e-05, "loss": 1.3133, "step": 23030 }, { "epoch": 6.89, "grad_norm": 3.6922409534454346, "learning_rate": 3.673436816427732e-05, "loss": 1.1815, "step": 23035 }, { "epoch": 6.89, "grad_norm": 1.3594402074813843, "learning_rate": 3.6729180034694055e-05, "loss": 1.2121, "step": 23040 }, { "epoch": 6.89, "grad_norm": 2.578176736831665, "learning_rate": 3.672399125732036e-05, "loss": 1.2374, "step": 23045 }, { "epoch": 6.9, "grad_norm": 2.196401357650757, "learning_rate": 3.671880183244282e-05, "loss": 1.2618, "step": 23050 }, { "epoch": 6.9, "grad_norm": 1.1504014730453491, "learning_rate": 3.671361176034803e-05, "loss": 1.2077, "step": 23055 }, { "epoch": 6.9, "grad_norm": 1.4980058670043945, "learning_rate": 3.670842104132265e-05, "loss": 1.1627, "step": 23060 }, { "epoch": 6.9, "grad_norm": 2.5145270824432373, "learning_rate": 3.670322967565333e-05, "loss": 1.2164, "step": 23065 }, { "epoch": 6.9, "grad_norm": 1.6475614309310913, "learning_rate": 3.6698037663626815e-05, "loss": 1.1775, "step": 23070 }, { "epoch": 6.9, "grad_norm": 1.1627910137176514, "learning_rate": 3.669284500552983e-05, "loss": 1.2284, "step": 23075 }, { "epoch": 6.91, "grad_norm": 2.22105073928833, "learning_rate": 3.668765170164917e-05, "loss": 1.1692, "step": 23080 }, { "epoch": 6.91, "grad_norm": 1.5115783214569092, "learning_rate": 3.668245775227166e-05, "loss": 1.0566, "step": 23085 }, { "epoch": 6.91, "grad_norm": 1.735473394393921, "learning_rate": 3.667726315768415e-05, "loss": 1.0803, "step": 23090 }, { "epoch": 6.91, "grad_norm": 2.8493192195892334, "learning_rate": 3.667206791817354e-05, "loss": 1.2563, "step": 23095 }, { "epoch": 6.91, "grad_norm": 2.09541916847229, "learning_rate": 3.6666872034026746e-05, "loss": 1.2383, "step": 23100 }, { "epoch": 6.91, "grad_norm": 2.5542640686035156, "learning_rate": 3.666167550553073e-05, "loss": 1.1892, "step": 23105 }, { "epoch": 6.91, "grad_norm": 3.2478058338165283, "learning_rate": 3.66564783329725e-05, "loss": 1.3445, "step": 23110 }, { "epoch": 6.92, "grad_norm": 3.8739407062530518, "learning_rate": 3.665128051663909e-05, "loss": 1.2336, "step": 23115 }, { "epoch": 6.92, "grad_norm": 1.4774131774902344, "learning_rate": 3.664608205681757e-05, "loss": 1.0818, "step": 23120 }, { "epoch": 6.92, "grad_norm": 2.790187120437622, "learning_rate": 3.664088295379505e-05, "loss": 1.1317, "step": 23125 }, { "epoch": 6.92, "grad_norm": 5.5581278800964355, "learning_rate": 3.663568320785865e-05, "loss": 1.2463, "step": 23130 }, { "epoch": 6.92, "grad_norm": 1.6298890113830566, "learning_rate": 3.6630482819295564e-05, "loss": 1.1815, "step": 23135 }, { "epoch": 6.92, "grad_norm": 2.767247200012207, "learning_rate": 3.662528178839301e-05, "loss": 1.1912, "step": 23140 }, { "epoch": 6.92, "grad_norm": 2.707919120788574, "learning_rate": 3.6620080115438214e-05, "loss": 1.1293, "step": 23145 }, { "epoch": 6.93, "grad_norm": 4.199367046356201, "learning_rate": 3.661487780071848e-05, "loss": 1.0369, "step": 23150 }, { "epoch": 6.93, "grad_norm": 0.9766875505447388, "learning_rate": 3.6609674844521113e-05, "loss": 1.2359, "step": 23155 }, { "epoch": 6.93, "grad_norm": 2.218275308609009, "learning_rate": 3.6604471247133476e-05, "loss": 1.314, "step": 23160 }, { "epoch": 6.93, "grad_norm": 3.2757742404937744, "learning_rate": 3.659926700884296e-05, "loss": 1.1953, "step": 23165 }, { "epoch": 6.93, "grad_norm": 1.7651225328445435, "learning_rate": 3.6594062129936974e-05, "loss": 1.2148, "step": 23170 }, { "epoch": 6.93, "grad_norm": 6.852691650390625, "learning_rate": 3.6588856610702994e-05, "loss": 1.1669, "step": 23175 }, { "epoch": 6.94, "grad_norm": 4.03468132019043, "learning_rate": 3.65836504514285e-05, "loss": 1.3234, "step": 23180 }, { "epoch": 6.94, "grad_norm": 1.6668856143951416, "learning_rate": 3.657844365240104e-05, "loss": 1.2555, "step": 23185 }, { "epoch": 6.94, "grad_norm": 2.5867419242858887, "learning_rate": 3.657323621390818e-05, "loss": 1.2328, "step": 23190 }, { "epoch": 6.94, "grad_norm": 4.226202011108398, "learning_rate": 3.65680281362375e-05, "loss": 1.2422, "step": 23195 }, { "epoch": 6.94, "grad_norm": 1.6796958446502686, "learning_rate": 3.656281941967665e-05, "loss": 1.3767, "step": 23200 }, { "epoch": 6.94, "grad_norm": 2.898170232772827, "learning_rate": 3.6557610064513314e-05, "loss": 1.2212, "step": 23205 }, { "epoch": 6.94, "grad_norm": 2.079902172088623, "learning_rate": 3.6552400071035184e-05, "loss": 1.1753, "step": 23210 }, { "epoch": 6.95, "grad_norm": 2.3818612098693848, "learning_rate": 3.654718943953e-05, "loss": 1.2634, "step": 23215 }, { "epoch": 6.95, "grad_norm": 2.0267412662506104, "learning_rate": 3.654197817028555e-05, "loss": 1.294, "step": 23220 }, { "epoch": 6.95, "grad_norm": 2.046330213546753, "learning_rate": 3.6536766263589646e-05, "loss": 1.2195, "step": 23225 }, { "epoch": 6.95, "grad_norm": 1.5229865312576294, "learning_rate": 3.653155371973012e-05, "loss": 1.2128, "step": 23230 }, { "epoch": 6.95, "grad_norm": 1.3038949966430664, "learning_rate": 3.652634053899487e-05, "loss": 1.1954, "step": 23235 }, { "epoch": 6.95, "grad_norm": 2.71051025390625, "learning_rate": 3.652112672167183e-05, "loss": 1.1658, "step": 23240 }, { "epoch": 6.95, "grad_norm": 2.3642067909240723, "learning_rate": 3.651591226804892e-05, "loss": 1.2206, "step": 23245 }, { "epoch": 6.96, "grad_norm": 1.8010873794555664, "learning_rate": 3.6510697178414156e-05, "loss": 1.2646, "step": 23250 }, { "epoch": 6.96, "grad_norm": 2.7175979614257812, "learning_rate": 3.6505481453055554e-05, "loss": 1.097, "step": 23255 }, { "epoch": 6.96, "grad_norm": 7.800134181976318, "learning_rate": 3.6500265092261164e-05, "loss": 0.961, "step": 23260 }, { "epoch": 6.96, "grad_norm": 3.0702438354492188, "learning_rate": 3.6495048096319084e-05, "loss": 1.1067, "step": 23265 }, { "epoch": 6.96, "grad_norm": 2.858368158340454, "learning_rate": 3.648983046551745e-05, "loss": 1.1169, "step": 23270 }, { "epoch": 6.96, "grad_norm": 4.364511966705322, "learning_rate": 3.6484612200144425e-05, "loss": 1.057, "step": 23275 }, { "epoch": 6.97, "grad_norm": 1.3396683931350708, "learning_rate": 3.64793933004882e-05, "loss": 1.1468, "step": 23280 }, { "epoch": 6.97, "grad_norm": 2.03324294090271, "learning_rate": 3.647417376683702e-05, "loss": 1.126, "step": 23285 }, { "epoch": 6.97, "grad_norm": 2.8686647415161133, "learning_rate": 3.646895359947915e-05, "loss": 1.1806, "step": 23290 }, { "epoch": 6.97, "grad_norm": 1.482798457145691, "learning_rate": 3.646373279870289e-05, "loss": 1.2207, "step": 23295 }, { "epoch": 6.97, "grad_norm": 9.18463134765625, "learning_rate": 3.6458511364796585e-05, "loss": 1.2255, "step": 23300 }, { "epoch": 6.97, "grad_norm": 2.2677526473999023, "learning_rate": 3.645328929804861e-05, "loss": 1.3509, "step": 23305 }, { "epoch": 6.97, "grad_norm": 1.5335679054260254, "learning_rate": 3.6448066598747365e-05, "loss": 1.2393, "step": 23310 }, { "epoch": 6.98, "grad_norm": 3.603954792022705, "learning_rate": 3.644284326718131e-05, "loss": 1.1396, "step": 23315 }, { "epoch": 6.98, "grad_norm": 2.735201835632324, "learning_rate": 3.6437619303638906e-05, "loss": 1.0535, "step": 23320 }, { "epoch": 6.98, "grad_norm": 2.017690420150757, "learning_rate": 3.6432394708408684e-05, "loss": 1.1505, "step": 23325 }, { "epoch": 6.98, "grad_norm": 2.649900436401367, "learning_rate": 3.6427169481779185e-05, "loss": 1.1615, "step": 23330 }, { "epoch": 6.98, "grad_norm": 3.7284982204437256, "learning_rate": 3.642194362403899e-05, "loss": 1.0333, "step": 23335 }, { "epoch": 6.98, "grad_norm": 1.5677194595336914, "learning_rate": 3.6416717135476726e-05, "loss": 1.1199, "step": 23340 }, { "epoch": 6.98, "grad_norm": 1.4740290641784668, "learning_rate": 3.6411490016381036e-05, "loss": 1.2606, "step": 23345 }, { "epoch": 6.99, "grad_norm": 2.8093674182891846, "learning_rate": 3.640626226704063e-05, "loss": 1.0608, "step": 23350 }, { "epoch": 6.99, "grad_norm": 2.619047164916992, "learning_rate": 3.640103388774419e-05, "loss": 1.2281, "step": 23355 }, { "epoch": 6.99, "grad_norm": 3.5901520252227783, "learning_rate": 3.6395804878780514e-05, "loss": 1.0943, "step": 23360 }, { "epoch": 6.99, "grad_norm": 2.7254862785339355, "learning_rate": 3.639057524043838e-05, "loss": 1.1382, "step": 23365 }, { "epoch": 6.99, "grad_norm": 2.1018872261047363, "learning_rate": 3.638534497300661e-05, "loss": 1.1479, "step": 23370 }, { "epoch": 6.99, "grad_norm": 3.5326809883117676, "learning_rate": 3.6380114076774076e-05, "loss": 1.2146, "step": 23375 }, { "epoch": 7.0, "grad_norm": 3.996938467025757, "learning_rate": 3.637488255202967e-05, "loss": 1.1037, "step": 23380 }, { "epoch": 7.0, "grad_norm": 5.027413845062256, "learning_rate": 3.636965039906232e-05, "loss": 1.1256, "step": 23385 }, { "epoch": 7.0, "grad_norm": 2.1053760051727295, "learning_rate": 3.6364417618161e-05, "loss": 1.1905, "step": 23390 }, { "epoch": 7.0, "grad_norm": 2.8059771060943604, "learning_rate": 3.635918420961471e-05, "loss": 1.2107, "step": 23395 }, { "epoch": 7.0, "grad_norm": 3.2289247512817383, "learning_rate": 3.635395017371248e-05, "loss": 1.0534, "step": 23400 }, { "epoch": 7.0, "grad_norm": 2.3690922260284424, "learning_rate": 3.634871551074338e-05, "loss": 1.0033, "step": 23405 }, { "epoch": 7.0, "grad_norm": 1.940168857574463, "learning_rate": 3.634348022099652e-05, "loss": 1.1098, "step": 23410 }, { "epoch": 7.01, "grad_norm": 2.454968214035034, "learning_rate": 3.633824430476104e-05, "loss": 1.0398, "step": 23415 }, { "epoch": 7.01, "grad_norm": 2.5789737701416016, "learning_rate": 3.6333007762326114e-05, "loss": 1.1326, "step": 23420 }, { "epoch": 7.01, "grad_norm": 2.819851875305176, "learning_rate": 3.6327770593980946e-05, "loss": 0.9739, "step": 23425 }, { "epoch": 7.01, "grad_norm": Infinity, "learning_rate": 3.6323580408843814e-05, "loss": 1.1279, "step": 23430 }, { "epoch": 7.01, "grad_norm": 2.4087419509887695, "learning_rate": 3.631834211458914e-05, "loss": 1.0759, "step": 23435 }, { "epoch": 7.01, "grad_norm": 1.9813141822814941, "learning_rate": 3.6313103195234194e-05, "loss": 1.2067, "step": 23440 }, { "epoch": 7.01, "grad_norm": 1.0360766649246216, "learning_rate": 3.6307863651068315e-05, "loss": 1.1645, "step": 23445 }, { "epoch": 7.02, "grad_norm": 2.9632136821746826, "learning_rate": 3.6302623482380876e-05, "loss": 1.1632, "step": 23450 }, { "epoch": 7.02, "grad_norm": 2.059884786605835, "learning_rate": 3.62973826894613e-05, "loss": 1.0065, "step": 23455 }, { "epoch": 7.02, "grad_norm": 1.9883657693862915, "learning_rate": 3.629214127259901e-05, "loss": 1.1301, "step": 23460 }, { "epoch": 7.02, "grad_norm": 1.7367918491363525, "learning_rate": 3.628689923208351e-05, "loss": 1.1835, "step": 23465 }, { "epoch": 7.02, "grad_norm": 1.1696290969848633, "learning_rate": 3.6281656568204285e-05, "loss": 1.241, "step": 23470 }, { "epoch": 7.02, "grad_norm": 2.884432315826416, "learning_rate": 3.62764132812509e-05, "loss": 1.0894, "step": 23475 }, { "epoch": 7.02, "grad_norm": 1.4623018503189087, "learning_rate": 3.627116937151293e-05, "loss": 1.1655, "step": 23480 }, { "epoch": 7.03, "grad_norm": 4.936588287353516, "learning_rate": 3.626592483927999e-05, "loss": 1.0457, "step": 23485 }, { "epoch": 7.03, "grad_norm": 2.926048517227173, "learning_rate": 3.626067968484172e-05, "loss": 1.1325, "step": 23490 }, { "epoch": 7.03, "grad_norm": 2.242962598800659, "learning_rate": 3.625543390848783e-05, "loss": 1.0459, "step": 23495 }, { "epoch": 7.03, "grad_norm": 1.152270793914795, "learning_rate": 3.625018751050803e-05, "loss": 0.974, "step": 23500 }, { "epoch": 7.03, "grad_norm": 2.016425371170044, "learning_rate": 3.624494049119205e-05, "loss": 1.1342, "step": 23505 }, { "epoch": 7.03, "grad_norm": 2.164924144744873, "learning_rate": 3.623969285082971e-05, "loss": 1.1836, "step": 23510 }, { "epoch": 7.04, "grad_norm": 2.6692936420440674, "learning_rate": 3.623444458971081e-05, "loss": 1.2556, "step": 23515 }, { "epoch": 7.04, "grad_norm": 1.107765793800354, "learning_rate": 3.622919570812521e-05, "loss": 1.2507, "step": 23520 }, { "epoch": 7.04, "grad_norm": 1.9478957653045654, "learning_rate": 3.622394620636281e-05, "loss": 1.0116, "step": 23525 }, { "epoch": 7.04, "grad_norm": 2.816403865814209, "learning_rate": 3.621869608471352e-05, "loss": 0.9359, "step": 23530 }, { "epoch": 7.04, "grad_norm": 2.90093994140625, "learning_rate": 3.621344534346732e-05, "loss": 1.109, "step": 23535 }, { "epoch": 7.04, "grad_norm": 2.196246385574341, "learning_rate": 3.620819398291418e-05, "loss": 1.0691, "step": 23540 }, { "epoch": 7.04, "grad_norm": 2.3277361392974854, "learning_rate": 3.6202942003344134e-05, "loss": 1.0831, "step": 23545 }, { "epoch": 7.05, "grad_norm": 3.5473828315734863, "learning_rate": 3.619768940504725e-05, "loss": 1.0406, "step": 23550 }, { "epoch": 7.05, "grad_norm": 3.1161794662475586, "learning_rate": 3.619243618831362e-05, "loss": 1.145, "step": 23555 }, { "epoch": 7.05, "grad_norm": 1.5470253229141235, "learning_rate": 3.618718235343337e-05, "loss": 1.0475, "step": 23560 }, { "epoch": 7.05, "grad_norm": 1.8705344200134277, "learning_rate": 3.618192790069668e-05, "loss": 1.1717, "step": 23565 }, { "epoch": 7.05, "grad_norm": 2.805480480194092, "learning_rate": 3.617667283039372e-05, "loss": 1.1476, "step": 23570 }, { "epoch": 7.05, "grad_norm": 1.1502296924591064, "learning_rate": 3.617141714281473e-05, "loss": 0.9713, "step": 23575 }, { "epoch": 7.05, "grad_norm": 2.4745311737060547, "learning_rate": 3.616616083825e-05, "loss": 1.1653, "step": 23580 }, { "epoch": 7.06, "grad_norm": 1.45720636844635, "learning_rate": 3.616090391698981e-05, "loss": 1.2729, "step": 23585 }, { "epoch": 7.06, "grad_norm": 1.0925184488296509, "learning_rate": 3.615564637932449e-05, "loss": 0.9977, "step": 23590 }, { "epoch": 7.06, "grad_norm": 2.9500515460968018, "learning_rate": 3.615038822554442e-05, "loss": 1.0472, "step": 23595 }, { "epoch": 7.06, "grad_norm": 1.715450644493103, "learning_rate": 3.614512945594001e-05, "loss": 1.2471, "step": 23600 }, { "epoch": 7.06, "grad_norm": 1.3677220344543457, "learning_rate": 3.6139870070801675e-05, "loss": 1.0056, "step": 23605 }, { "epoch": 7.06, "grad_norm": 1.0849230289459229, "learning_rate": 3.613461007041989e-05, "loss": 1.0685, "step": 23610 }, { "epoch": 7.07, "grad_norm": 1.6670022010803223, "learning_rate": 3.612934945508517e-05, "loss": 1.1105, "step": 23615 }, { "epoch": 7.07, "grad_norm": 1.0212535858154297, "learning_rate": 3.612408822508805e-05, "loss": 1.1527, "step": 23620 }, { "epoch": 7.07, "grad_norm": 3.0256199836730957, "learning_rate": 3.611882638071909e-05, "loss": 1.1113, "step": 23625 }, { "epoch": 7.07, "grad_norm": 7.463032245635986, "learning_rate": 3.6113563922268926e-05, "loss": 1.104, "step": 23630 }, { "epoch": 7.07, "grad_norm": 2.4572513103485107, "learning_rate": 3.6108300850028165e-05, "loss": 1.2712, "step": 23635 }, { "epoch": 7.07, "grad_norm": 2.344938278198242, "learning_rate": 3.61030371642875e-05, "loss": 0.9881, "step": 23640 }, { "epoch": 7.07, "grad_norm": 2.6021649837493896, "learning_rate": 3.609777286533763e-05, "loss": 1.2082, "step": 23645 }, { "epoch": 7.08, "grad_norm": 2.0514180660247803, "learning_rate": 3.6092507953469305e-05, "loss": 0.9766, "step": 23650 }, { "epoch": 7.08, "grad_norm": 4.5105881690979, "learning_rate": 3.608724242897328e-05, "loss": 0.937, "step": 23655 }, { "epoch": 7.08, "grad_norm": 3.2438158988952637, "learning_rate": 3.6081976292140395e-05, "loss": 1.138, "step": 23660 }, { "epoch": 7.08, "grad_norm": 3.015331745147705, "learning_rate": 3.607670954326147e-05, "loss": 1.1613, "step": 23665 }, { "epoch": 7.08, "grad_norm": 1.9946476221084595, "learning_rate": 3.6071442182627395e-05, "loss": 1.2047, "step": 23670 }, { "epoch": 7.08, "grad_norm": 3.2494962215423584, "learning_rate": 3.606617421052908e-05, "loss": 1.0119, "step": 23675 }, { "epoch": 7.08, "grad_norm": 2.6945230960845947, "learning_rate": 3.6060905627257455e-05, "loss": 1.0004, "step": 23680 }, { "epoch": 7.09, "grad_norm": 2.1307501792907715, "learning_rate": 3.6055636433103524e-05, "loss": 0.9985, "step": 23685 }, { "epoch": 7.09, "grad_norm": 1.7671560049057007, "learning_rate": 3.6050366628358276e-05, "loss": 1.1638, "step": 23690 }, { "epoch": 7.09, "grad_norm": 4.131464004516602, "learning_rate": 3.6045096213312766e-05, "loss": 1.0299, "step": 23695 }, { "epoch": 7.09, "grad_norm": 1.8803554773330688, "learning_rate": 3.6039825188258075e-05, "loss": 1.1465, "step": 23700 }, { "epoch": 7.09, "grad_norm": 4.313520908355713, "learning_rate": 3.603455355348531e-05, "loss": 1.1605, "step": 23705 }, { "epoch": 7.09, "grad_norm": 1.4319663047790527, "learning_rate": 3.602928130928563e-05, "loss": 1.13, "step": 23710 }, { "epoch": 7.1, "grad_norm": 1.6968344449996948, "learning_rate": 3.60240084559502e-05, "loss": 1.0394, "step": 23715 }, { "epoch": 7.1, "grad_norm": 2.2089247703552246, "learning_rate": 3.601873499377024e-05, "loss": 1.0584, "step": 23720 }, { "epoch": 7.1, "grad_norm": 2.618340253829956, "learning_rate": 3.601346092303701e-05, "loss": 1.1993, "step": 23725 }, { "epoch": 7.1, "grad_norm": 1.327785849571228, "learning_rate": 3.600818624404177e-05, "loss": 1.0758, "step": 23730 }, { "epoch": 7.1, "grad_norm": 3.1127471923828125, "learning_rate": 3.600291095707585e-05, "loss": 1.1543, "step": 23735 }, { "epoch": 7.1, "grad_norm": 1.783038854598999, "learning_rate": 3.59976350624306e-05, "loss": 1.0839, "step": 23740 }, { "epoch": 7.1, "grad_norm": 1.7736161947250366, "learning_rate": 3.5992358560397394e-05, "loss": 1.0779, "step": 23745 }, { "epoch": 7.11, "grad_norm": 2.9156086444854736, "learning_rate": 3.5987081451267646e-05, "loss": 1.1455, "step": 23750 }, { "epoch": 7.11, "grad_norm": 3.304860830307007, "learning_rate": 3.598180373533281e-05, "loss": 1.0505, "step": 23755 }, { "epoch": 7.11, "grad_norm": 1.9659478664398193, "learning_rate": 3.597652541288438e-05, "loss": 1.1696, "step": 23760 }, { "epoch": 7.11, "grad_norm": 3.989715814590454, "learning_rate": 3.597124648421384e-05, "loss": 1.1278, "step": 23765 }, { "epoch": 7.11, "grad_norm": 2.289445400238037, "learning_rate": 3.596596694961278e-05, "loss": 1.2564, "step": 23770 }, { "epoch": 7.11, "grad_norm": 1.6638239622116089, "learning_rate": 3.5960686809372756e-05, "loss": 1.2167, "step": 23775 }, { "epoch": 7.11, "grad_norm": 2.3342809677124023, "learning_rate": 3.595540606378539e-05, "loss": 1.1625, "step": 23780 }, { "epoch": 7.12, "grad_norm": 2.215494155883789, "learning_rate": 3.595012471314234e-05, "loss": 0.9338, "step": 23785 }, { "epoch": 7.12, "grad_norm": 2.7711009979248047, "learning_rate": 3.594484275773529e-05, "loss": 1.0951, "step": 23790 }, { "epoch": 7.12, "grad_norm": 2.727311611175537, "learning_rate": 3.593956019785594e-05, "loss": 1.1065, "step": 23795 }, { "epoch": 7.12, "grad_norm": 1.7198123931884766, "learning_rate": 3.5934277033796055e-05, "loss": 1.102, "step": 23800 }, { "epoch": 7.12, "grad_norm": 1.2289972305297852, "learning_rate": 3.592899326584741e-05, "loss": 1.1591, "step": 23805 }, { "epoch": 7.12, "grad_norm": 9.438529968261719, "learning_rate": 3.5923708894301836e-05, "loss": 1.2098, "step": 23810 }, { "epoch": 7.13, "grad_norm": 2.6810221672058105, "learning_rate": 3.591842391945117e-05, "loss": 1.1817, "step": 23815 }, { "epoch": 7.13, "grad_norm": 3.3912224769592285, "learning_rate": 3.591313834158729e-05, "loss": 1.1386, "step": 23820 }, { "epoch": 7.13, "grad_norm": 3.810546398162842, "learning_rate": 3.590785216100214e-05, "loss": 1.1549, "step": 23825 }, { "epoch": 7.13, "grad_norm": 2.7697300910949707, "learning_rate": 3.590256537798765e-05, "loss": 1.2004, "step": 23830 }, { "epoch": 7.13, "grad_norm": 1.2456960678100586, "learning_rate": 3.58972779928358e-05, "loss": 1.1418, "step": 23835 }, { "epoch": 7.13, "grad_norm": 2.144900321960449, "learning_rate": 3.589199000583862e-05, "loss": 1.199, "step": 23840 }, { "epoch": 7.13, "grad_norm": 3.3847482204437256, "learning_rate": 3.588670141728815e-05, "loss": 1.1028, "step": 23845 }, { "epoch": 7.14, "grad_norm": 2.134644031524658, "learning_rate": 3.5881412227476476e-05, "loss": 1.0711, "step": 23850 }, { "epoch": 7.14, "grad_norm": 1.6338741779327393, "learning_rate": 3.587612243669571e-05, "loss": 1.1405, "step": 23855 }, { "epoch": 7.14, "grad_norm": 4.937558650970459, "learning_rate": 3.5870832045238013e-05, "loss": 1.1193, "step": 23860 }, { "epoch": 7.14, "grad_norm": 2.3749191761016846, "learning_rate": 3.5865541053395564e-05, "loss": 1.0552, "step": 23865 }, { "epoch": 7.14, "grad_norm": 1.3177196979522705, "learning_rate": 3.586024946146057e-05, "loss": 1.2673, "step": 23870 }, { "epoch": 7.14, "grad_norm": 5.730068206787109, "learning_rate": 3.585495726972529e-05, "loss": 1.181, "step": 23875 }, { "epoch": 7.14, "grad_norm": 2.8973639011383057, "learning_rate": 3.584966447848201e-05, "loss": 1.1447, "step": 23880 }, { "epoch": 7.15, "grad_norm": 2.603912830352783, "learning_rate": 3.584437108802303e-05, "loss": 1.1617, "step": 23885 }, { "epoch": 7.15, "grad_norm": 2.1435694694519043, "learning_rate": 3.583907709864072e-05, "loss": 1.1438, "step": 23890 }, { "epoch": 7.15, "grad_norm": 3.071255922317505, "learning_rate": 3.5833782510627436e-05, "loss": 1.1503, "step": 23895 }, { "epoch": 7.15, "grad_norm": 2.3412327766418457, "learning_rate": 3.582848732427561e-05, "loss": 1.2069, "step": 23900 }, { "epoch": 7.15, "grad_norm": 2.986461639404297, "learning_rate": 3.5823191539877674e-05, "loss": 1.0559, "step": 23905 }, { "epoch": 7.15, "grad_norm": 1.189259648323059, "learning_rate": 3.581789515772613e-05, "loss": 1.2836, "step": 23910 }, { "epoch": 7.16, "grad_norm": 2.6002511978149414, "learning_rate": 3.5812598178113476e-05, "loss": 1.2934, "step": 23915 }, { "epoch": 7.16, "grad_norm": 1.7060097455978394, "learning_rate": 3.580730060133227e-05, "loss": 1.1425, "step": 23920 }, { "epoch": 7.16, "grad_norm": 2.59647536277771, "learning_rate": 3.580200242767508e-05, "loss": 1.0499, "step": 23925 }, { "epoch": 7.16, "grad_norm": 1.8929245471954346, "learning_rate": 3.5796703657434526e-05, "loss": 1.2425, "step": 23930 }, { "epoch": 7.16, "grad_norm": 1.3510212898254395, "learning_rate": 3.579140429090325e-05, "loss": 1.0355, "step": 23935 }, { "epoch": 7.16, "grad_norm": 1.923653483390808, "learning_rate": 3.578610432837393e-05, "loss": 1.1344, "step": 23940 }, { "epoch": 7.16, "grad_norm": 2.9542462825775146, "learning_rate": 3.578080377013928e-05, "loss": 1.2874, "step": 23945 }, { "epoch": 7.17, "grad_norm": 3.119513988494873, "learning_rate": 3.577550261649204e-05, "loss": 1.1657, "step": 23950 }, { "epoch": 7.17, "grad_norm": 2.2563698291778564, "learning_rate": 3.5770200867725e-05, "loss": 1.3417, "step": 23955 }, { "epoch": 7.17, "grad_norm": 3.3380630016326904, "learning_rate": 3.5764898524130965e-05, "loss": 1.2012, "step": 23960 }, { "epoch": 7.17, "grad_norm": 3.131051540374756, "learning_rate": 3.5759595586002765e-05, "loss": 1.0301, "step": 23965 }, { "epoch": 7.17, "grad_norm": 4.349595069885254, "learning_rate": 3.575429205363329e-05, "loss": 1.1835, "step": 23970 }, { "epoch": 7.17, "grad_norm": 1.5668723583221436, "learning_rate": 3.574898792731544e-05, "loss": 1.2774, "step": 23975 }, { "epoch": 7.17, "grad_norm": 6.6469855308532715, "learning_rate": 3.5743683207342154e-05, "loss": 1.2421, "step": 23980 }, { "epoch": 7.18, "grad_norm": 1.9514319896697998, "learning_rate": 3.573837789400643e-05, "loss": 1.1226, "step": 23985 }, { "epoch": 7.18, "grad_norm": 1.3996671438217163, "learning_rate": 3.5733071987601235e-05, "loss": 1.195, "step": 23990 }, { "epoch": 7.18, "grad_norm": 0.9684573411941528, "learning_rate": 3.572776548841964e-05, "loss": 1.1764, "step": 23995 }, { "epoch": 7.18, "grad_norm": 1.6927798986434937, "learning_rate": 3.572245839675471e-05, "loss": 1.094, "step": 24000 }, { "epoch": 7.18, "grad_norm": 3.316277265548706, "learning_rate": 3.571715071289954e-05, "loss": 1.0117, "step": 24005 }, { "epoch": 7.18, "grad_norm": 3.6141722202301025, "learning_rate": 3.571184243714729e-05, "loss": 1.0562, "step": 24010 }, { "epoch": 7.19, "grad_norm": 3.6086862087249756, "learning_rate": 3.570653356979111e-05, "loss": 1.1708, "step": 24015 }, { "epoch": 7.19, "grad_norm": 1.5085479021072388, "learning_rate": 3.5701224111124206e-05, "loss": 1.1285, "step": 24020 }, { "epoch": 7.19, "grad_norm": 3.2829747200012207, "learning_rate": 3.5695914061439816e-05, "loss": 1.227, "step": 24025 }, { "epoch": 7.19, "grad_norm": 3.2367653846740723, "learning_rate": 3.569060342103121e-05, "loss": 1.0892, "step": 24030 }, { "epoch": 7.19, "grad_norm": 1.7712103128433228, "learning_rate": 3.56852921901917e-05, "loss": 1.0643, "step": 24035 }, { "epoch": 7.19, "grad_norm": 3.2912991046905518, "learning_rate": 3.56799803692146e-05, "loss": 1.1242, "step": 24040 }, { "epoch": 7.19, "grad_norm": 2.6889803409576416, "learning_rate": 3.5674667958393286e-05, "loss": 1.1923, "step": 24045 }, { "epoch": 7.2, "grad_norm": 3.47308349609375, "learning_rate": 3.566935495802117e-05, "loss": 1.2753, "step": 24050 }, { "epoch": 7.2, "grad_norm": 2.2733964920043945, "learning_rate": 3.566404136839165e-05, "loss": 1.2105, "step": 24055 }, { "epoch": 7.2, "grad_norm": 1.1582354307174683, "learning_rate": 3.565872718979822e-05, "loss": 0.9991, "step": 24060 }, { "epoch": 7.2, "grad_norm": 2.0912468433380127, "learning_rate": 3.565341242253437e-05, "loss": 1.1606, "step": 24065 }, { "epoch": 7.2, "grad_norm": 4.56016731262207, "learning_rate": 3.5648097066893614e-05, "loss": 1.2603, "step": 24070 }, { "epoch": 7.2, "grad_norm": 2.5408365726470947, "learning_rate": 3.564278112316953e-05, "loss": 1.1349, "step": 24075 }, { "epoch": 7.2, "grad_norm": 2.359050989151001, "learning_rate": 3.563746459165571e-05, "loss": 1.1326, "step": 24080 }, { "epoch": 7.21, "grad_norm": 2.704974889755249, "learning_rate": 3.563214747264578e-05, "loss": 1.2259, "step": 24085 }, { "epoch": 7.21, "grad_norm": 3.2640304565429688, "learning_rate": 3.562682976643339e-05, "loss": 1.0601, "step": 24090 }, { "epoch": 7.21, "grad_norm": 2.6645710468292236, "learning_rate": 3.562151147331224e-05, "loss": 1.2108, "step": 24095 }, { "epoch": 7.21, "grad_norm": 3.9869096279144287, "learning_rate": 3.5616192593576055e-05, "loss": 1.144, "step": 24100 }, { "epoch": 7.21, "grad_norm": 1.9926941394805908, "learning_rate": 3.561087312751858e-05, "loss": 1.0182, "step": 24105 }, { "epoch": 7.21, "grad_norm": 1.0935914516448975, "learning_rate": 3.560555307543362e-05, "loss": 1.3015, "step": 24110 }, { "epoch": 7.21, "grad_norm": 3.214768409729004, "learning_rate": 3.5600232437614984e-05, "loss": 1.3117, "step": 24115 }, { "epoch": 7.22, "grad_norm": 3.5183026790618896, "learning_rate": 3.559491121435653e-05, "loss": 1.1814, "step": 24120 }, { "epoch": 7.22, "grad_norm": 0.981540858745575, "learning_rate": 3.558958940595214e-05, "loss": 1.2303, "step": 24125 }, { "epoch": 7.22, "grad_norm": 2.553018569946289, "learning_rate": 3.558426701269574e-05, "loss": 1.1696, "step": 24130 }, { "epoch": 7.22, "grad_norm": 2.0565733909606934, "learning_rate": 3.557894403488127e-05, "loss": 1.2089, "step": 24135 }, { "epoch": 7.22, "grad_norm": 1.6102100610733032, "learning_rate": 3.5573620472802714e-05, "loss": 1.1943, "step": 24140 }, { "epoch": 7.22, "grad_norm": 1.952919363975525, "learning_rate": 3.55682963267541e-05, "loss": 1.0149, "step": 24145 }, { "epoch": 7.23, "grad_norm": 2.255753517150879, "learning_rate": 3.556297159702946e-05, "loss": 1.1736, "step": 24150 }, { "epoch": 7.23, "grad_norm": 2.6610512733459473, "learning_rate": 3.5557646283922875e-05, "loss": 1.2155, "step": 24155 }, { "epoch": 7.23, "grad_norm": 3.2673702239990234, "learning_rate": 3.555232038772846e-05, "loss": 0.9963, "step": 24160 }, { "epoch": 7.23, "grad_norm": 3.3183624744415283, "learning_rate": 3.554699390874036e-05, "loss": 1.101, "step": 24165 }, { "epoch": 7.23, "grad_norm": 1.4142159223556519, "learning_rate": 3.554166684725275e-05, "loss": 1.0708, "step": 24170 }, { "epoch": 7.23, "grad_norm": 2.9164345264434814, "learning_rate": 3.553633920355983e-05, "loss": 1.1302, "step": 24175 }, { "epoch": 7.23, "grad_norm": 2.0309815406799316, "learning_rate": 3.5531010977955865e-05, "loss": 1.0906, "step": 24180 }, { "epoch": 7.24, "grad_norm": 2.8416526317596436, "learning_rate": 3.55256821707351e-05, "loss": 1.0474, "step": 24185 }, { "epoch": 7.24, "grad_norm": 0.9157441854476929, "learning_rate": 3.5520352782191845e-05, "loss": 1.2212, "step": 24190 }, { "epoch": 7.24, "grad_norm": 6.699018478393555, "learning_rate": 3.5515022812620446e-05, "loss": 1.1924, "step": 24195 }, { "epoch": 7.24, "grad_norm": 4.399092674255371, "learning_rate": 3.550969226231527e-05, "loss": 1.047, "step": 24200 }, { "epoch": 7.24, "grad_norm": 2.4295222759246826, "learning_rate": 3.550436113157071e-05, "loss": 1.2108, "step": 24205 }, { "epoch": 7.24, "grad_norm": 3.0615289211273193, "learning_rate": 3.54990294206812e-05, "loss": 1.0198, "step": 24210 }, { "epoch": 7.24, "grad_norm": 5.101573467254639, "learning_rate": 3.549369712994122e-05, "loss": 1.0861, "step": 24215 }, { "epoch": 7.25, "grad_norm": 2.2024030685424805, "learning_rate": 3.548836425964524e-05, "loss": 1.0598, "step": 24220 }, { "epoch": 7.25, "grad_norm": 3.093062400817871, "learning_rate": 3.548303081008781e-05, "loss": 1.1864, "step": 24225 }, { "epoch": 7.25, "grad_norm": 1.283988118171692, "learning_rate": 3.547769678156349e-05, "loss": 1.2008, "step": 24230 }, { "epoch": 7.25, "grad_norm": 1.7267249822616577, "learning_rate": 3.547236217436686e-05, "loss": 1.1673, "step": 24235 }, { "epoch": 7.25, "grad_norm": 2.7360587120056152, "learning_rate": 3.546702698879256e-05, "loss": 1.0917, "step": 24240 }, { "epoch": 7.25, "grad_norm": 3.3460683822631836, "learning_rate": 3.546169122513524e-05, "loss": 1.1798, "step": 24245 }, { "epoch": 7.26, "grad_norm": 3.3018956184387207, "learning_rate": 3.545635488368959e-05, "loss": 1.1226, "step": 24250 }, { "epoch": 7.26, "grad_norm": 1.762122392654419, "learning_rate": 3.5451017964750316e-05, "loss": 1.1001, "step": 24255 }, { "epoch": 7.26, "grad_norm": 2.407038688659668, "learning_rate": 3.544568046861219e-05, "loss": 1.1731, "step": 24260 }, { "epoch": 7.26, "grad_norm": 2.4787652492523193, "learning_rate": 3.544034239556999e-05, "loss": 1.0903, "step": 24265 }, { "epoch": 7.26, "grad_norm": 3.5315353870391846, "learning_rate": 3.543500374591853e-05, "loss": 1.2661, "step": 24270 }, { "epoch": 7.26, "grad_norm": 1.81735360622406, "learning_rate": 3.5429664519952664e-05, "loss": 1.0972, "step": 24275 }, { "epoch": 7.26, "grad_norm": 3.0193166732788086, "learning_rate": 3.542432471796726e-05, "loss": 1.227, "step": 24280 }, { "epoch": 7.27, "grad_norm": 2.1791579723358154, "learning_rate": 3.5418984340257245e-05, "loss": 1.1118, "step": 24285 }, { "epoch": 7.27, "grad_norm": 2.2652621269226074, "learning_rate": 3.541364338711755e-05, "loss": 1.2193, "step": 24290 }, { "epoch": 7.27, "grad_norm": 3.2821550369262695, "learning_rate": 3.5408301858843155e-05, "loss": 1.1751, "step": 24295 }, { "epoch": 7.27, "grad_norm": 1.8815666437149048, "learning_rate": 3.540295975572907e-05, "loss": 1.0835, "step": 24300 }, { "epoch": 7.27, "grad_norm": 6.063453674316406, "learning_rate": 3.539761707807032e-05, "loss": 1.081, "step": 24305 }, { "epoch": 7.27, "grad_norm": 1.7175376415252686, "learning_rate": 3.5392273826162004e-05, "loss": 1.1278, "step": 24310 }, { "epoch": 7.27, "grad_norm": 1.4363011121749878, "learning_rate": 3.538693000029919e-05, "loss": 1.0402, "step": 24315 }, { "epoch": 7.28, "grad_norm": 2.4211199283599854, "learning_rate": 3.538158560077704e-05, "loss": 1.3065, "step": 24320 }, { "epoch": 7.28, "grad_norm": 4.585165023803711, "learning_rate": 3.537624062789071e-05, "loss": 1.0919, "step": 24325 }, { "epoch": 7.28, "grad_norm": 2.929621934890747, "learning_rate": 3.537089508193539e-05, "loss": 1.2163, "step": 24330 }, { "epoch": 7.28, "grad_norm": 3.6469857692718506, "learning_rate": 3.536554896320632e-05, "loss": 1.3069, "step": 24335 }, { "epoch": 7.28, "grad_norm": 2.2444517612457275, "learning_rate": 3.536020227199875e-05, "loss": 1.2386, "step": 24340 }, { "epoch": 7.28, "grad_norm": 1.8699992895126343, "learning_rate": 3.535485500860798e-05, "loss": 1.1221, "step": 24345 }, { "epoch": 7.29, "grad_norm": 2.0010950565338135, "learning_rate": 3.5349507173329324e-05, "loss": 1.157, "step": 24350 }, { "epoch": 7.29, "grad_norm": 1.865695595741272, "learning_rate": 3.534415876645815e-05, "loss": 0.9495, "step": 24355 }, { "epoch": 7.29, "grad_norm": 1.9279874563217163, "learning_rate": 3.533880978828984e-05, "loss": 1.2705, "step": 24360 }, { "epoch": 7.29, "grad_norm": 2.339104175567627, "learning_rate": 3.5333460239119814e-05, "loss": 1.081, "step": 24365 }, { "epoch": 7.29, "grad_norm": 2.058978319168091, "learning_rate": 3.5328110119243515e-05, "loss": 0.9748, "step": 24370 }, { "epoch": 7.29, "grad_norm": 5.336772441864014, "learning_rate": 3.532275942895644e-05, "loss": 1.1164, "step": 24375 }, { "epoch": 7.29, "grad_norm": 13.560040473937988, "learning_rate": 3.531740816855408e-05, "loss": 1.2611, "step": 24380 }, { "epoch": 7.3, "grad_norm": 3.361783742904663, "learning_rate": 3.5312056338331986e-05, "loss": 1.0729, "step": 24385 }, { "epoch": 7.3, "grad_norm": 1.754549503326416, "learning_rate": 3.530670393858575e-05, "loss": 1.1315, "step": 24390 }, { "epoch": 7.3, "grad_norm": 1.895077109336853, "learning_rate": 3.530135096961097e-05, "loss": 1.1235, "step": 24395 }, { "epoch": 7.3, "grad_norm": 2.7250521183013916, "learning_rate": 3.529599743170328e-05, "loss": 0.9917, "step": 24400 }, { "epoch": 7.3, "grad_norm": 3.637592077255249, "learning_rate": 3.529064332515836e-05, "loss": 1.015, "step": 24405 }, { "epoch": 7.3, "grad_norm": 2.967895984649658, "learning_rate": 3.5285288650271896e-05, "loss": 1.285, "step": 24410 }, { "epoch": 7.3, "grad_norm": 3.107884407043457, "learning_rate": 3.527993340733964e-05, "loss": 1.0319, "step": 24415 }, { "epoch": 7.31, "grad_norm": 1.9659065008163452, "learning_rate": 3.527457759665734e-05, "loss": 1.1129, "step": 24420 }, { "epoch": 7.31, "grad_norm": 3.5118746757507324, "learning_rate": 3.52692212185208e-05, "loss": 1.1486, "step": 24425 }, { "epoch": 7.31, "grad_norm": 3.2912468910217285, "learning_rate": 3.526386427322585e-05, "loss": 1.1864, "step": 24430 }, { "epoch": 7.31, "grad_norm": 3.768181562423706, "learning_rate": 3.5258506761068344e-05, "loss": 1.2152, "step": 24435 }, { "epoch": 7.31, "grad_norm": 4.644777774810791, "learning_rate": 3.525314868234417e-05, "loss": 1.145, "step": 24440 }, { "epoch": 7.31, "grad_norm": 1.425614595413208, "learning_rate": 3.524779003734925e-05, "loss": 1.1543, "step": 24445 }, { "epoch": 7.32, "grad_norm": 2.4408340454101562, "learning_rate": 3.524243082637954e-05, "loss": 1.1951, "step": 24450 }, { "epoch": 7.32, "grad_norm": 1.1493414640426636, "learning_rate": 3.523707104973102e-05, "loss": 1.0654, "step": 24455 }, { "epoch": 7.32, "grad_norm": 2.574376106262207, "learning_rate": 3.523171070769972e-05, "loss": 1.0558, "step": 24460 }, { "epoch": 7.32, "grad_norm": 2.255390167236328, "learning_rate": 3.522634980058166e-05, "loss": 1.2315, "step": 24465 }, { "epoch": 7.32, "grad_norm": 1.4033783674240112, "learning_rate": 3.5220988328672935e-05, "loss": 1.0303, "step": 24470 }, { "epoch": 7.32, "grad_norm": 2.0258255004882812, "learning_rate": 3.521562629226965e-05, "loss": 1.1492, "step": 24475 }, { "epoch": 7.32, "grad_norm": 2.6093392372131348, "learning_rate": 3.521026369166793e-05, "loss": 1.1797, "step": 24480 }, { "epoch": 7.33, "grad_norm": 4.753375053405762, "learning_rate": 3.520490052716397e-05, "loss": 1.1525, "step": 24485 }, { "epoch": 7.33, "grad_norm": 2.713651657104492, "learning_rate": 3.5199536799053965e-05, "loss": 1.0279, "step": 24490 }, { "epoch": 7.33, "grad_norm": 2.4132895469665527, "learning_rate": 3.5194172507634136e-05, "loss": 1.0203, "step": 24495 }, { "epoch": 7.33, "grad_norm": 2.83127760887146, "learning_rate": 3.5188807653200764e-05, "loss": 1.2186, "step": 24500 }, { "epoch": 7.33, "grad_norm": 2.688488245010376, "learning_rate": 3.5183442236050126e-05, "loss": 1.2564, "step": 24505 }, { "epoch": 7.33, "grad_norm": 1.0452677011489868, "learning_rate": 3.517807625647857e-05, "loss": 1.1982, "step": 24510 }, { "epoch": 7.33, "grad_norm": 3.4765284061431885, "learning_rate": 3.5172709714782435e-05, "loss": 1.1172, "step": 24515 }, { "epoch": 7.34, "grad_norm": 4.563774108886719, "learning_rate": 3.5167342611258114e-05, "loss": 1.2248, "step": 24520 }, { "epoch": 7.34, "grad_norm": 3.0111424922943115, "learning_rate": 3.5161974946202035e-05, "loss": 0.9888, "step": 24525 }, { "epoch": 7.34, "grad_norm": 1.582155466079712, "learning_rate": 3.515660671991064e-05, "loss": 1.0793, "step": 24530 }, { "epoch": 7.34, "grad_norm": 2.5320088863372803, "learning_rate": 3.515123793268042e-05, "loss": 1.2144, "step": 24535 }, { "epoch": 7.34, "grad_norm": 1.5951532125473022, "learning_rate": 3.5145868584807875e-05, "loss": 1.0229, "step": 24540 }, { "epoch": 7.34, "grad_norm": 1.4666327238082886, "learning_rate": 3.514049867658955e-05, "loss": 1.273, "step": 24545 }, { "epoch": 7.35, "grad_norm": 3.8786826133728027, "learning_rate": 3.513512820832202e-05, "loss": 1.1415, "step": 24550 }, { "epoch": 7.35, "grad_norm": 1.4749125242233276, "learning_rate": 3.5129757180301906e-05, "loss": 1.2209, "step": 24555 }, { "epoch": 7.35, "grad_norm": 6.20326566696167, "learning_rate": 3.512438559282583e-05, "loss": 0.953, "step": 24560 }, { "epoch": 7.35, "grad_norm": 1.2090144157409668, "learning_rate": 3.511901344619045e-05, "loss": 1.195, "step": 24565 }, { "epoch": 7.35, "grad_norm": 2.7025344371795654, "learning_rate": 3.5113640740692485e-05, "loss": 1.1066, "step": 24570 }, { "epoch": 7.35, "grad_norm": 2.9294729232788086, "learning_rate": 3.510826747662865e-05, "loss": 1.1074, "step": 24575 }, { "epoch": 7.35, "grad_norm": 3.470428943634033, "learning_rate": 3.510289365429571e-05, "loss": 1.166, "step": 24580 }, { "epoch": 7.36, "grad_norm": 10.227189064025879, "learning_rate": 3.509751927399046e-05, "loss": 1.1535, "step": 24585 }, { "epoch": 7.36, "grad_norm": 1.4875662326812744, "learning_rate": 3.509214433600971e-05, "loss": 1.2838, "step": 24590 }, { "epoch": 7.36, "grad_norm": 2.2764317989349365, "learning_rate": 3.508676884065032e-05, "loss": 1.1669, "step": 24595 }, { "epoch": 7.36, "grad_norm": 3.8440630435943604, "learning_rate": 3.5081392788209176e-05, "loss": 1.0723, "step": 24600 }, { "epoch": 7.36, "grad_norm": 3.417236089706421, "learning_rate": 3.507601617898319e-05, "loss": 1.193, "step": 24605 }, { "epoch": 7.36, "grad_norm": 2.8095319271087646, "learning_rate": 3.5070639013269296e-05, "loss": 1.1633, "step": 24610 }, { "epoch": 7.36, "grad_norm": 4.380091190338135, "learning_rate": 3.5065261291364485e-05, "loss": 1.1677, "step": 24615 }, { "epoch": 7.37, "grad_norm": 1.5369083881378174, "learning_rate": 3.505988301356574e-05, "loss": 1.2476, "step": 24620 }, { "epoch": 7.37, "grad_norm": 1.350716233253479, "learning_rate": 3.505450418017012e-05, "loss": 1.0492, "step": 24625 }, { "epoch": 7.37, "grad_norm": 1.859533429145813, "learning_rate": 3.5049124791474696e-05, "loss": 0.9475, "step": 24630 }, { "epoch": 7.37, "grad_norm": 3.090188503265381, "learning_rate": 3.504374484777655e-05, "loss": 1.0635, "step": 24635 }, { "epoch": 7.37, "grad_norm": 4.572514533996582, "learning_rate": 3.503836434937281e-05, "loss": 1.0462, "step": 24640 }, { "epoch": 7.37, "grad_norm": 1.258280634880066, "learning_rate": 3.503298329656064e-05, "loss": 1.0908, "step": 24645 }, { "epoch": 7.37, "grad_norm": 3.248828887939453, "learning_rate": 3.5027601689637244e-05, "loss": 1.1565, "step": 24650 }, { "epoch": 7.38, "grad_norm": 3.4819695949554443, "learning_rate": 3.502221952889981e-05, "loss": 1.2237, "step": 24655 }, { "epoch": 7.38, "grad_norm": 2.005113363265991, "learning_rate": 3.5016836814645624e-05, "loss": 1.2283, "step": 24660 }, { "epoch": 7.38, "grad_norm": 4.542233943939209, "learning_rate": 3.5011453547171954e-05, "loss": 1.0564, "step": 24665 }, { "epoch": 7.38, "grad_norm": 1.675430178642273, "learning_rate": 3.50060697267761e-05, "loss": 1.0971, "step": 24670 }, { "epoch": 7.38, "grad_norm": 3.372847318649292, "learning_rate": 3.500068535375543e-05, "loss": 1.0592, "step": 24675 }, { "epoch": 7.38, "grad_norm": 2.804337978363037, "learning_rate": 3.499530042840728e-05, "loss": 0.9945, "step": 24680 }, { "epoch": 7.39, "grad_norm": 2.0974247455596924, "learning_rate": 3.49899149510291e-05, "loss": 1.0258, "step": 24685 }, { "epoch": 7.39, "grad_norm": 1.919437289237976, "learning_rate": 3.498452892191829e-05, "loss": 1.028, "step": 24690 }, { "epoch": 7.39, "grad_norm": 3.193256139755249, "learning_rate": 3.4979142341372337e-05, "loss": 1.1782, "step": 24695 }, { "epoch": 7.39, "grad_norm": 2.895226240158081, "learning_rate": 3.4973755209688716e-05, "loss": 1.1318, "step": 24700 }, { "epoch": 7.39, "grad_norm": 1.8412282466888428, "learning_rate": 3.496836752716496e-05, "loss": 1.335, "step": 24705 }, { "epoch": 7.39, "grad_norm": 4.568256378173828, "learning_rate": 3.496297929409863e-05, "loss": 0.9776, "step": 24710 }, { "epoch": 7.39, "grad_norm": 2.8621883392333984, "learning_rate": 3.4957590510787306e-05, "loss": 0.984, "step": 24715 }, { "epoch": 7.4, "grad_norm": 3.2574169635772705, "learning_rate": 3.495220117752861e-05, "loss": 1.0085, "step": 24720 }, { "epoch": 7.4, "grad_norm": 4.439530849456787, "learning_rate": 3.4946811294620196e-05, "loss": 1.0522, "step": 24725 }, { "epoch": 7.4, "grad_norm": 3.148686170578003, "learning_rate": 3.494142086235972e-05, "loss": 0.8726, "step": 24730 }, { "epoch": 7.4, "grad_norm": 8.771424293518066, "learning_rate": 3.4936029881044917e-05, "loss": 1.1329, "step": 24735 }, { "epoch": 7.4, "grad_norm": 2.2926177978515625, "learning_rate": 3.493063835097351e-05, "loss": 1.1628, "step": 24740 }, { "epoch": 7.4, "grad_norm": 1.3365390300750732, "learning_rate": 3.4925246272443256e-05, "loss": 1.2449, "step": 24745 }, { "epoch": 7.4, "grad_norm": 2.2112948894500732, "learning_rate": 3.491985364575197e-05, "loss": 1.0303, "step": 24750 }, { "epoch": 7.41, "grad_norm": 1.917383074760437, "learning_rate": 3.4914460471197486e-05, "loss": 1.3089, "step": 24755 }, { "epoch": 7.41, "grad_norm": 3.297748565673828, "learning_rate": 3.4909066749077654e-05, "loss": 1.222, "step": 24760 }, { "epoch": 7.41, "grad_norm": 2.1157360076904297, "learning_rate": 3.490367247969036e-05, "loss": 1.1927, "step": 24765 }, { "epoch": 7.41, "grad_norm": 3.120800733566284, "learning_rate": 3.489827766333353e-05, "loss": 1.2292, "step": 24770 }, { "epoch": 7.41, "grad_norm": 1.5992114543914795, "learning_rate": 3.4892882300305127e-05, "loss": 1.2151, "step": 24775 }, { "epoch": 7.41, "grad_norm": 1.0426512956619263, "learning_rate": 3.48874863909031e-05, "loss": 1.1554, "step": 24780 }, { "epoch": 7.42, "grad_norm": 2.195253372192383, "learning_rate": 3.488208993542549e-05, "loss": 1.042, "step": 24785 }, { "epoch": 7.42, "grad_norm": 2.7746365070343018, "learning_rate": 3.487669293417032e-05, "loss": 1.104, "step": 24790 }, { "epoch": 7.42, "grad_norm": 3.3738327026367188, "learning_rate": 3.487129538743567e-05, "loss": 1.0831, "step": 24795 }, { "epoch": 7.42, "grad_norm": 2.653986930847168, "learning_rate": 3.4865897295519624e-05, "loss": 1.1528, "step": 24800 }, { "epoch": 7.42, "grad_norm": 3.24540638923645, "learning_rate": 3.486049865872033e-05, "loss": 1.2233, "step": 24805 }, { "epoch": 7.42, "grad_norm": 3.4608511924743652, "learning_rate": 3.485509947733595e-05, "loss": 1.2021, "step": 24810 }, { "epoch": 7.42, "grad_norm": 1.495871663093567, "learning_rate": 3.4849699751664664e-05, "loss": 1.2353, "step": 24815 }, { "epoch": 7.43, "grad_norm": 1.2478872537612915, "learning_rate": 3.484429948200471e-05, "loss": 1.2628, "step": 24820 }, { "epoch": 7.43, "grad_norm": 4.952610015869141, "learning_rate": 3.483889866865432e-05, "loss": 1.1575, "step": 24825 }, { "epoch": 7.43, "grad_norm": 1.0654171705245972, "learning_rate": 3.483349731191178e-05, "loss": 1.265, "step": 24830 }, { "epoch": 7.43, "grad_norm": 5.432766437530518, "learning_rate": 3.48280954120754e-05, "loss": 1.0648, "step": 24835 }, { "epoch": 7.43, "grad_norm": 6.357416152954102, "learning_rate": 3.482269296944354e-05, "loss": 1.0119, "step": 24840 }, { "epoch": 7.43, "grad_norm": 2.8604278564453125, "learning_rate": 3.481728998431455e-05, "loss": 1.2266, "step": 24845 }, { "epoch": 7.43, "grad_norm": 3.6308391094207764, "learning_rate": 3.481188645698684e-05, "loss": 1.2901, "step": 24850 }, { "epoch": 7.44, "grad_norm": 2.2123422622680664, "learning_rate": 3.4806482387758846e-05, "loss": 1.2228, "step": 24855 }, { "epoch": 7.44, "grad_norm": 1.6754069328308105, "learning_rate": 3.4801077776929016e-05, "loss": 1.1403, "step": 24860 }, { "epoch": 7.44, "grad_norm": 1.3092021942138672, "learning_rate": 3.479567262479584e-05, "loss": 1.1213, "step": 24865 }, { "epoch": 7.44, "grad_norm": 3.2713966369628906, "learning_rate": 3.479026693165786e-05, "loss": 1.0768, "step": 24870 }, { "epoch": 7.44, "grad_norm": 2.951756000518799, "learning_rate": 3.478486069781361e-05, "loss": 1.1956, "step": 24875 }, { "epoch": 7.44, "grad_norm": 2.3628857135772705, "learning_rate": 3.4779453923561675e-05, "loss": 1.2273, "step": 24880 }, { "epoch": 7.45, "grad_norm": 2.3509888648986816, "learning_rate": 3.477404660920066e-05, "loss": 1.1566, "step": 24885 }, { "epoch": 7.45, "grad_norm": 1.5141558647155762, "learning_rate": 3.4768638755029226e-05, "loss": 1.1827, "step": 24890 }, { "epoch": 7.45, "grad_norm": 1.5687611103057861, "learning_rate": 3.476323036134601e-05, "loss": 1.1956, "step": 24895 }, { "epoch": 7.45, "grad_norm": 1.7349860668182373, "learning_rate": 3.475782142844974e-05, "loss": 1.1523, "step": 24900 }, { "epoch": 7.45, "grad_norm": 2.322187900543213, "learning_rate": 3.475241195663913e-05, "loss": 1.2337, "step": 24905 }, { "epoch": 7.45, "grad_norm": 1.1443284749984741, "learning_rate": 3.4747001946212944e-05, "loss": 1.0199, "step": 24910 }, { "epoch": 7.45, "grad_norm": 1.456677794456482, "learning_rate": 3.4741591397469975e-05, "loss": 1.0837, "step": 24915 }, { "epoch": 7.46, "grad_norm": 1.2312963008880615, "learning_rate": 3.473618031070903e-05, "loss": 1.1786, "step": 24920 }, { "epoch": 7.46, "grad_norm": 1.1006726026535034, "learning_rate": 3.4730768686228976e-05, "loss": 1.1402, "step": 24925 }, { "epoch": 7.46, "grad_norm": 3.3980886936187744, "learning_rate": 3.4725356524328686e-05, "loss": 1.0626, "step": 24930 }, { "epoch": 7.46, "grad_norm": 4.119764804840088, "learning_rate": 3.471994382530706e-05, "loss": 1.1438, "step": 24935 }, { "epoch": 7.46, "grad_norm": 1.62360417842865, "learning_rate": 3.471453058946303e-05, "loss": 1.1758, "step": 24940 }, { "epoch": 7.46, "grad_norm": 2.4606423377990723, "learning_rate": 3.4709116817095584e-05, "loss": 0.9549, "step": 24945 }, { "epoch": 7.46, "grad_norm": 2.0009765625, "learning_rate": 3.4703702508503704e-05, "loss": 1.2969, "step": 24950 }, { "epoch": 7.47, "grad_norm": 2.6999094486236572, "learning_rate": 3.469828766398643e-05, "loss": 1.1032, "step": 24955 }, { "epoch": 7.47, "grad_norm": 4.238891124725342, "learning_rate": 3.46928722838428e-05, "loss": 1.186, "step": 24960 }, { "epoch": 7.47, "grad_norm": 3.6732418537139893, "learning_rate": 3.468745636837191e-05, "loss": 1.1267, "step": 24965 }, { "epoch": 7.47, "grad_norm": 4.03202486038208, "learning_rate": 3.468203991787287e-05, "loss": 1.08, "step": 24970 }, { "epoch": 7.47, "grad_norm": 2.4040608406066895, "learning_rate": 3.467662293264484e-05, "loss": 0.9691, "step": 24975 }, { "epoch": 7.47, "grad_norm": 2.929931640625, "learning_rate": 3.4671205412986975e-05, "loss": 1.0961, "step": 24980 }, { "epoch": 7.48, "grad_norm": 3.1068196296691895, "learning_rate": 3.466578735919849e-05, "loss": 1.2342, "step": 24985 }, { "epoch": 7.48, "grad_norm": 3.1283774375915527, "learning_rate": 3.466036877157862e-05, "loss": 1.1537, "step": 24990 }, { "epoch": 7.48, "grad_norm": 2.7555837631225586, "learning_rate": 3.465494965042662e-05, "loss": 1.1126, "step": 24995 }, { "epoch": 7.48, "grad_norm": 1.7763209342956543, "learning_rate": 3.4649529996041784e-05, "loss": 1.1552, "step": 25000 }, { "epoch": 7.48, "grad_norm": 2.4423739910125732, "learning_rate": 3.464410980872344e-05, "loss": 0.9622, "step": 25005 }, { "epoch": 7.48, "grad_norm": 2.395561933517456, "learning_rate": 3.463868908877094e-05, "loss": 1.0734, "step": 25010 }, { "epoch": 7.48, "grad_norm": 1.395330548286438, "learning_rate": 3.463326783648365e-05, "loss": 0.914, "step": 25015 }, { "epoch": 7.49, "grad_norm": 2.5400102138519287, "learning_rate": 3.4627846052161e-05, "loss": 1.1574, "step": 25020 }, { "epoch": 7.49, "grad_norm": 1.9044983386993408, "learning_rate": 3.4622423736102414e-05, "loss": 1.2176, "step": 25025 }, { "epoch": 7.49, "grad_norm": 2.5244264602661133, "learning_rate": 3.461700088860737e-05, "loss": 1.0935, "step": 25030 }, { "epoch": 7.49, "grad_norm": 1.8527662754058838, "learning_rate": 3.4611577509975366e-05, "loss": 1.1516, "step": 25035 }, { "epoch": 7.49, "grad_norm": 2.225349187850952, "learning_rate": 3.460615360050592e-05, "loss": 1.0199, "step": 25040 }, { "epoch": 7.49, "grad_norm": 1.780076026916504, "learning_rate": 3.46007291604986e-05, "loss": 1.2104, "step": 25045 }, { "epoch": 7.49, "grad_norm": 4.2586669921875, "learning_rate": 3.4595304190252993e-05, "loss": 1.1166, "step": 25050 }, { "epoch": 7.5, "grad_norm": 2.9571564197540283, "learning_rate": 3.45898786900687e-05, "loss": 1.1133, "step": 25055 }, { "epoch": 7.5, "grad_norm": 1.0347540378570557, "learning_rate": 3.458445266024538e-05, "loss": 1.0487, "step": 25060 }, { "epoch": 7.5, "grad_norm": 1.9431984424591064, "learning_rate": 3.45790261010827e-05, "loss": 1.0142, "step": 25065 }, { "epoch": 7.5, "grad_norm": 4.437554359436035, "learning_rate": 3.4573599012880364e-05, "loss": 1.1976, "step": 25070 }, { "epoch": 7.5, "grad_norm": 1.2235515117645264, "learning_rate": 3.456817139593811e-05, "loss": 1.1345, "step": 25075 }, { "epoch": 7.5, "grad_norm": 3.0580387115478516, "learning_rate": 3.45627432505557e-05, "loss": 1.2483, "step": 25080 }, { "epoch": 7.51, "grad_norm": 5.822119235992432, "learning_rate": 3.4557314577032915e-05, "loss": 1.0357, "step": 25085 }, { "epoch": 7.51, "grad_norm": 4.656231880187988, "learning_rate": 3.455188537566957e-05, "loss": 1.2377, "step": 25090 }, { "epoch": 7.51, "grad_norm": 1.6126227378845215, "learning_rate": 3.4546455646765535e-05, "loss": 1.3903, "step": 25095 }, { "epoch": 7.51, "grad_norm": 2.4167838096618652, "learning_rate": 3.454102539062068e-05, "loss": 1.1542, "step": 25100 }, { "epoch": 7.51, "grad_norm": 1.3699878454208374, "learning_rate": 3.45355946075349e-05, "loss": 1.1183, "step": 25105 }, { "epoch": 7.51, "grad_norm": 0.8811430335044861, "learning_rate": 3.453016329780815e-05, "loss": 1.1796, "step": 25110 }, { "epoch": 7.51, "grad_norm": 2.27093768119812, "learning_rate": 3.452473146174038e-05, "loss": 1.0819, "step": 25115 }, { "epoch": 7.52, "grad_norm": 2.351771116256714, "learning_rate": 3.45192990996316e-05, "loss": 1.2461, "step": 25120 }, { "epoch": 7.52, "grad_norm": 2.03128719329834, "learning_rate": 3.451386621178182e-05, "loss": 1.2742, "step": 25125 }, { "epoch": 7.52, "grad_norm": 6.1879448890686035, "learning_rate": 3.45084327984911e-05, "loss": 1.2229, "step": 25130 }, { "epoch": 7.52, "grad_norm": 2.5790390968322754, "learning_rate": 3.4502998860059514e-05, "loss": 1.0641, "step": 25135 }, { "epoch": 7.52, "grad_norm": 4.369030475616455, "learning_rate": 3.4497564396787185e-05, "loss": 1.3368, "step": 25140 }, { "epoch": 7.52, "grad_norm": 1.9594241380691528, "learning_rate": 3.449212940897425e-05, "loss": 1.1571, "step": 25145 }, { "epoch": 7.52, "grad_norm": 3.453566312789917, "learning_rate": 3.4486693896920874e-05, "loss": 1.1184, "step": 25150 }, { "epoch": 7.53, "grad_norm": 19.127910614013672, "learning_rate": 3.448125786092725e-05, "loss": 1.102, "step": 25155 }, { "epoch": 7.53, "grad_norm": 4.914632320404053, "learning_rate": 3.447582130129361e-05, "loss": 1.1391, "step": 25160 }, { "epoch": 7.53, "grad_norm": 3.1397294998168945, "learning_rate": 3.4470384218320205e-05, "loss": 1.0924, "step": 25165 }, { "epoch": 7.53, "grad_norm": 3.5343799591064453, "learning_rate": 3.446494661230733e-05, "loss": 1.0208, "step": 25170 }, { "epoch": 7.53, "grad_norm": 1.5835540294647217, "learning_rate": 3.445950848355529e-05, "loss": 1.0526, "step": 25175 }, { "epoch": 7.53, "grad_norm": 2.000260591506958, "learning_rate": 3.445406983236443e-05, "loss": 1.0166, "step": 25180 }, { "epoch": 7.54, "grad_norm": 2.638507843017578, "learning_rate": 3.4448630659035126e-05, "loss": 1.2353, "step": 25185 }, { "epoch": 7.54, "grad_norm": 3.0085575580596924, "learning_rate": 3.444319096386777e-05, "loss": 1.2625, "step": 25190 }, { "epoch": 7.54, "grad_norm": 3.4091453552246094, "learning_rate": 3.4437750747162776e-05, "loss": 0.9272, "step": 25195 }, { "epoch": 7.54, "grad_norm": 2.23293137550354, "learning_rate": 3.443231000922063e-05, "loss": 1.2352, "step": 25200 }, { "epoch": 7.54, "grad_norm": 1.904063105583191, "learning_rate": 3.4426868750341805e-05, "loss": 1.1851, "step": 25205 }, { "epoch": 7.54, "grad_norm": 3.55661678314209, "learning_rate": 3.4421426970826826e-05, "loss": 1.0767, "step": 25210 }, { "epoch": 7.54, "grad_norm": 2.7817044258117676, "learning_rate": 3.441598467097622e-05, "loss": 1.2299, "step": 25215 }, { "epoch": 7.55, "grad_norm": 1.7373440265655518, "learning_rate": 3.441054185109057e-05, "loss": 1.1791, "step": 25220 }, { "epoch": 7.55, "grad_norm": 3.398428440093994, "learning_rate": 3.440509851147047e-05, "loss": 1.0867, "step": 25225 }, { "epoch": 7.55, "grad_norm": 2.2971742153167725, "learning_rate": 3.4399654652416566e-05, "loss": 1.109, "step": 25230 }, { "epoch": 7.55, "grad_norm": 3.605184555053711, "learning_rate": 3.4394210274229496e-05, "loss": 1.1545, "step": 25235 }, { "epoch": 7.55, "grad_norm": 2.362635374069214, "learning_rate": 3.4388765377209964e-05, "loss": 1.1122, "step": 25240 }, { "epoch": 7.55, "grad_norm": 1.9791773557662964, "learning_rate": 3.438331996165868e-05, "loss": 1.2522, "step": 25245 }, { "epoch": 7.55, "grad_norm": 3.294543743133545, "learning_rate": 3.437787402787639e-05, "loss": 1.2164, "step": 25250 }, { "epoch": 7.56, "grad_norm": 3.8431739807128906, "learning_rate": 3.4372427576163856e-05, "loss": 1.108, "step": 25255 }, { "epoch": 7.56, "grad_norm": 4.042465686798096, "learning_rate": 3.4366980606821895e-05, "loss": 1.1426, "step": 25260 }, { "epoch": 7.56, "grad_norm": 1.2647730112075806, "learning_rate": 3.436153312015133e-05, "loss": 1.0345, "step": 25265 }, { "epoch": 7.56, "grad_norm": 1.1158740520477295, "learning_rate": 3.435608511645302e-05, "loss": 1.1481, "step": 25270 }, { "epoch": 7.56, "grad_norm": 1.05342698097229, "learning_rate": 3.4350636596027853e-05, "loss": 1.0948, "step": 25275 }, { "epoch": 7.56, "grad_norm": 2.2656328678131104, "learning_rate": 3.434518755917675e-05, "loss": 1.2436, "step": 25280 }, { "epoch": 7.56, "grad_norm": 4.6103901863098145, "learning_rate": 3.433973800620065e-05, "loss": 1.0903, "step": 25285 }, { "epoch": 7.57, "grad_norm": 2.1559901237487793, "learning_rate": 3.4334287937400526e-05, "loss": 1.0622, "step": 25290 }, { "epoch": 7.57, "grad_norm": 2.366748332977295, "learning_rate": 3.432883735307739e-05, "loss": 1.1494, "step": 25295 }, { "epoch": 7.57, "grad_norm": 2.5366220474243164, "learning_rate": 3.4323386253532254e-05, "loss": 1.0923, "step": 25300 }, { "epoch": 7.57, "grad_norm": 3.0016655921936035, "learning_rate": 3.431793463906619e-05, "loss": 1.0665, "step": 25305 }, { "epoch": 7.57, "grad_norm": 2.379429340362549, "learning_rate": 3.431248250998028e-05, "loss": 1.2129, "step": 25310 }, { "epoch": 7.57, "grad_norm": 3.5074679851531982, "learning_rate": 3.4307029866575645e-05, "loss": 1.1391, "step": 25315 }, { "epoch": 7.58, "grad_norm": 2.7369179725646973, "learning_rate": 3.4301576709153424e-05, "loss": 1.2312, "step": 25320 }, { "epoch": 7.58, "grad_norm": 2.9599127769470215, "learning_rate": 3.4296123038014786e-05, "loss": 1.218, "step": 25325 }, { "epoch": 7.58, "grad_norm": 1.0387095212936401, "learning_rate": 3.429066885346094e-05, "loss": 1.2556, "step": 25330 }, { "epoch": 7.58, "grad_norm": 2.1688525676727295, "learning_rate": 3.42852141557931e-05, "loss": 1.2108, "step": 25335 }, { "epoch": 7.58, "grad_norm": 2.080822229385376, "learning_rate": 3.427975894531255e-05, "loss": 1.175, "step": 25340 }, { "epoch": 7.58, "grad_norm": 1.2066692113876343, "learning_rate": 3.427430322232055e-05, "loss": 1.2416, "step": 25345 }, { "epoch": 7.58, "grad_norm": 1.1532701253890991, "learning_rate": 3.4268846987118426e-05, "loss": 1.171, "step": 25350 }, { "epoch": 7.59, "grad_norm": 1.9684667587280273, "learning_rate": 3.426339024000751e-05, "loss": 1.1804, "step": 25355 }, { "epoch": 7.59, "grad_norm": 1.9980390071868896, "learning_rate": 3.4257932981289184e-05, "loss": 1.1734, "step": 25360 }, { "epoch": 7.59, "grad_norm": 1.5516365766525269, "learning_rate": 3.4252475211264846e-05, "loss": 1.2121, "step": 25365 }, { "epoch": 7.59, "grad_norm": 1.7034841775894165, "learning_rate": 3.424701693023591e-05, "loss": 1.1121, "step": 25370 }, { "epoch": 7.59, "grad_norm": 3.420742988586426, "learning_rate": 3.424155813850385e-05, "loss": 1.07, "step": 25375 }, { "epoch": 7.59, "grad_norm": 2.169203519821167, "learning_rate": 3.423609883637014e-05, "loss": 1.1631, "step": 25380 }, { "epoch": 7.59, "grad_norm": 4.135627746582031, "learning_rate": 3.423063902413629e-05, "loss": 1.2078, "step": 25385 }, { "epoch": 7.6, "grad_norm": 3.5442159175872803, "learning_rate": 3.422517870210384e-05, "loss": 1.1549, "step": 25390 }, { "epoch": 7.6, "grad_norm": 5.373948574066162, "learning_rate": 3.421971787057436e-05, "loss": 0.976, "step": 25395 }, { "epoch": 7.6, "grad_norm": 2.204765558242798, "learning_rate": 3.421425652984944e-05, "loss": 1.2897, "step": 25400 }, { "epoch": 7.6, "grad_norm": 4.4537811279296875, "learning_rate": 3.420879468023072e-05, "loss": 1.0213, "step": 25405 }, { "epoch": 7.6, "grad_norm": 2.6348259449005127, "learning_rate": 3.4203332322019835e-05, "loss": 1.1508, "step": 25410 }, { "epoch": 7.6, "grad_norm": 1.4462038278579712, "learning_rate": 3.419786945551848e-05, "loss": 1.2851, "step": 25415 }, { "epoch": 7.61, "grad_norm": 4.519046783447266, "learning_rate": 3.419240608102834e-05, "loss": 1.2382, "step": 25420 }, { "epoch": 7.61, "grad_norm": 2.593632936477661, "learning_rate": 3.418694219885118e-05, "loss": 1.1794, "step": 25425 }, { "epoch": 7.61, "grad_norm": 2.167607545852661, "learning_rate": 3.418147780928875e-05, "loss": 1.2187, "step": 25430 }, { "epoch": 7.61, "grad_norm": 3.661607027053833, "learning_rate": 3.4176012912642844e-05, "loss": 1.0878, "step": 25435 }, { "epoch": 7.61, "grad_norm": 2.0092809200286865, "learning_rate": 3.4170547509215286e-05, "loss": 1.134, "step": 25440 }, { "epoch": 7.61, "grad_norm": 7.421758651733398, "learning_rate": 3.416508159930791e-05, "loss": 0.9357, "step": 25445 }, { "epoch": 7.61, "grad_norm": 4.737497329711914, "learning_rate": 3.415961518322262e-05, "loss": 1.0698, "step": 25450 }, { "epoch": 7.62, "grad_norm": 2.214827299118042, "learning_rate": 3.4154148261261285e-05, "loss": 1.051, "step": 25455 }, { "epoch": 7.62, "grad_norm": 1.7568135261535645, "learning_rate": 3.414868083372587e-05, "loss": 1.2068, "step": 25460 }, { "epoch": 7.62, "grad_norm": 1.6030669212341309, "learning_rate": 3.414321290091831e-05, "loss": 1.1984, "step": 25465 }, { "epoch": 7.62, "grad_norm": 1.4295237064361572, "learning_rate": 3.413774446314062e-05, "loss": 1.1206, "step": 25470 }, { "epoch": 7.62, "grad_norm": 5.009536266326904, "learning_rate": 3.41322755206948e-05, "loss": 1.0359, "step": 25475 }, { "epoch": 7.62, "grad_norm": 1.4227309226989746, "learning_rate": 3.4126806073882886e-05, "loss": 1.1036, "step": 25480 }, { "epoch": 7.62, "grad_norm": 1.6048041582107544, "learning_rate": 3.4121336123006965e-05, "loss": 1.3853, "step": 25485 }, { "epoch": 7.63, "grad_norm": 1.398371696472168, "learning_rate": 3.411586566836913e-05, "loss": 1.076, "step": 25490 }, { "epoch": 7.63, "grad_norm": 1.4785361289978027, "learning_rate": 3.4110394710271504e-05, "loss": 1.1772, "step": 25495 }, { "epoch": 7.63, "grad_norm": 2.355851411819458, "learning_rate": 3.410492324901626e-05, "loss": 1.1964, "step": 25500 }, { "epoch": 7.63, "grad_norm": 1.825348973274231, "learning_rate": 3.4099451284905556e-05, "loss": 1.2284, "step": 25505 }, { "epoch": 7.63, "grad_norm": 1.8260172605514526, "learning_rate": 3.409397881824163e-05, "loss": 1.2915, "step": 25510 }, { "epoch": 7.63, "grad_norm": 2.6344926357269287, "learning_rate": 3.4088505849326697e-05, "loss": 1.1483, "step": 25515 }, { "epoch": 7.64, "grad_norm": 1.9603842496871948, "learning_rate": 3.408303237846303e-05, "loss": 1.1863, "step": 25520 }, { "epoch": 7.64, "grad_norm": 1.3052462339401245, "learning_rate": 3.407755840595294e-05, "loss": 1.2192, "step": 25525 }, { "epoch": 7.64, "grad_norm": 3.303022623062134, "learning_rate": 3.407208393209872e-05, "loss": 1.2268, "step": 25530 }, { "epoch": 7.64, "grad_norm": 1.622067928314209, "learning_rate": 3.406660895720275e-05, "loss": 1.2071, "step": 25535 }, { "epoch": 7.64, "grad_norm": 3.006840467453003, "learning_rate": 3.406113348156738e-05, "loss": 1.1447, "step": 25540 }, { "epoch": 7.64, "grad_norm": 2.42177152633667, "learning_rate": 3.405565750549502e-05, "loss": 1.1148, "step": 25545 }, { "epoch": 7.64, "grad_norm": 2.3825762271881104, "learning_rate": 3.405018102928812e-05, "loss": 1.2127, "step": 25550 }, { "epoch": 7.65, "grad_norm": 2.4480338096618652, "learning_rate": 3.404470405324912e-05, "loss": 1.0913, "step": 25555 }, { "epoch": 7.65, "grad_norm": 1.3634631633758545, "learning_rate": 3.403922657768052e-05, "loss": 1.2583, "step": 25560 }, { "epoch": 7.65, "grad_norm": 3.5564489364624023, "learning_rate": 3.403374860288484e-05, "loss": 1.14, "step": 25565 }, { "epoch": 7.65, "grad_norm": 2.949016571044922, "learning_rate": 3.402827012916461e-05, "loss": 1.3249, "step": 25570 }, { "epoch": 7.65, "grad_norm": 1.4162577390670776, "learning_rate": 3.4022791156822395e-05, "loss": 1.2632, "step": 25575 }, { "epoch": 7.65, "grad_norm": 1.9798351526260376, "learning_rate": 3.401731168616081e-05, "loss": 1.3101, "step": 25580 }, { "epoch": 7.65, "grad_norm": 1.9318190813064575, "learning_rate": 3.401183171748248e-05, "loss": 1.1082, "step": 25585 }, { "epoch": 7.66, "grad_norm": 2.191795825958252, "learning_rate": 3.400635125109005e-05, "loss": 1.2841, "step": 25590 }, { "epoch": 7.66, "grad_norm": 3.378875732421875, "learning_rate": 3.40008702872862e-05, "loss": 0.9573, "step": 25595 }, { "epoch": 7.66, "grad_norm": 3.725365400314331, "learning_rate": 3.399538882637364e-05, "loss": 1.1023, "step": 25600 }, { "epoch": 7.66, "grad_norm": 2.4383790493011475, "learning_rate": 3.3989906868655104e-05, "loss": 1.1758, "step": 25605 }, { "epoch": 7.66, "grad_norm": 3.5799477100372314, "learning_rate": 3.398442441443336e-05, "loss": 1.1651, "step": 25610 }, { "epoch": 7.66, "grad_norm": 3.4080264568328857, "learning_rate": 3.397894146401118e-05, "loss": 1.0039, "step": 25615 }, { "epoch": 7.67, "grad_norm": 1.794622540473938, "learning_rate": 3.397345801769141e-05, "loss": 1.1809, "step": 25620 }, { "epoch": 7.67, "grad_norm": 2.9816510677337646, "learning_rate": 3.3967974075776875e-05, "loss": 1.263, "step": 25625 }, { "epoch": 7.67, "grad_norm": 2.4955384731292725, "learning_rate": 3.3962489638570464e-05, "loss": 1.151, "step": 25630 }, { "epoch": 7.67, "grad_norm": 2.3042807579040527, "learning_rate": 3.395700470637506e-05, "loss": 1.0265, "step": 25635 }, { "epoch": 7.67, "grad_norm": 1.0877710580825806, "learning_rate": 3.3951519279493585e-05, "loss": 1.2262, "step": 25640 }, { "epoch": 7.67, "grad_norm": 2.408581018447876, "learning_rate": 3.394603335822902e-05, "loss": 1.1331, "step": 25645 }, { "epoch": 7.67, "grad_norm": 1.5194898843765259, "learning_rate": 3.3940546942884324e-05, "loss": 1.1822, "step": 25650 }, { "epoch": 7.68, "grad_norm": 1.858493447303772, "learning_rate": 3.393506003376251e-05, "loss": 1.0194, "step": 25655 }, { "epoch": 7.68, "grad_norm": 1.8844908475875854, "learning_rate": 3.392957263116663e-05, "loss": 1.1915, "step": 25660 }, { "epoch": 7.68, "grad_norm": 2.1716675758361816, "learning_rate": 3.392408473539973e-05, "loss": 1.1354, "step": 25665 }, { "epoch": 7.68, "grad_norm": 5.164560317993164, "learning_rate": 3.391859634676491e-05, "loss": 1.1757, "step": 25670 }, { "epoch": 7.68, "grad_norm": 3.698439359664917, "learning_rate": 3.3913107465565274e-05, "loss": 1.0446, "step": 25675 }, { "epoch": 7.68, "grad_norm": 7.462309837341309, "learning_rate": 3.390761809210398e-05, "loss": 1.1909, "step": 25680 }, { "epoch": 7.68, "grad_norm": 3.320486545562744, "learning_rate": 3.39021282266842e-05, "loss": 1.1427, "step": 25685 }, { "epoch": 7.69, "grad_norm": 1.506848931312561, "learning_rate": 3.389663786960913e-05, "loss": 1.2685, "step": 25690 }, { "epoch": 7.69, "grad_norm": 0.9463053345680237, "learning_rate": 3.3891147021182004e-05, "loss": 1.2191, "step": 25695 }, { "epoch": 7.69, "grad_norm": 1.4177323579788208, "learning_rate": 3.388565568170607e-05, "loss": 1.1461, "step": 25700 }, { "epoch": 7.69, "grad_norm": 1.9486898183822632, "learning_rate": 3.38801638514846e-05, "loss": 1.064, "step": 25705 }, { "epoch": 7.69, "grad_norm": 3.4581215381622314, "learning_rate": 3.3874671530820915e-05, "loss": 1.1031, "step": 25710 }, { "epoch": 7.69, "grad_norm": 2.35516095161438, "learning_rate": 3.386917872001835e-05, "loss": 1.1787, "step": 25715 }, { "epoch": 7.7, "grad_norm": 5.054357528686523, "learning_rate": 3.3863685419380254e-05, "loss": 1.2588, "step": 25720 }, { "epoch": 7.7, "grad_norm": 2.7185957431793213, "learning_rate": 3.385819162921003e-05, "loss": 0.9498, "step": 25725 }, { "epoch": 7.7, "grad_norm": 1.8890604972839355, "learning_rate": 3.385269734981109e-05, "loss": 1.2021, "step": 25730 }, { "epoch": 7.7, "grad_norm": 2.1678149700164795, "learning_rate": 3.384720258148688e-05, "loss": 1.1627, "step": 25735 }, { "epoch": 7.7, "grad_norm": 2.4753189086914062, "learning_rate": 3.384170732454087e-05, "loss": 1.2426, "step": 25740 }, { "epoch": 7.7, "grad_norm": 1.5026155710220337, "learning_rate": 3.3836211579276546e-05, "loss": 1.1725, "step": 25745 }, { "epoch": 7.7, "grad_norm": 2.9787325859069824, "learning_rate": 3.3830715345997444e-05, "loss": 1.2786, "step": 25750 }, { "epoch": 7.71, "grad_norm": 1.6327831745147705, "learning_rate": 3.382521862500712e-05, "loss": 1.1652, "step": 25755 }, { "epoch": 7.71, "grad_norm": 3.5435101985931396, "learning_rate": 3.381972141660914e-05, "loss": 1.08, "step": 25760 }, { "epoch": 7.71, "grad_norm": 2.974608898162842, "learning_rate": 3.381422372110711e-05, "loss": 1.2794, "step": 25765 }, { "epoch": 7.71, "grad_norm": 1.1757948398590088, "learning_rate": 3.3808725538804667e-05, "loss": 1.1428, "step": 25770 }, { "epoch": 7.71, "grad_norm": 2.5623114109039307, "learning_rate": 3.380322687000547e-05, "loss": 1.1777, "step": 25775 }, { "epoch": 7.71, "grad_norm": 1.8030344247817993, "learning_rate": 3.37977277150132e-05, "loss": 1.3782, "step": 25780 }, { "epoch": 7.71, "grad_norm": 2.135564088821411, "learning_rate": 3.379222807413158e-05, "loss": 1.2467, "step": 25785 }, { "epoch": 7.72, "grad_norm": 6.063063144683838, "learning_rate": 3.3786727947664344e-05, "loss": 1.1731, "step": 25790 }, { "epoch": 7.72, "grad_norm": 2.186962842941284, "learning_rate": 3.378122733591525e-05, "loss": 1.2181, "step": 25795 }, { "epoch": 7.72, "grad_norm": 1.757891058921814, "learning_rate": 3.37757262391881e-05, "loss": 1.151, "step": 25800 }, { "epoch": 7.72, "grad_norm": 17.243751525878906, "learning_rate": 3.377022465778671e-05, "loss": 1.1469, "step": 25805 }, { "epoch": 7.72, "grad_norm": 3.9934499263763428, "learning_rate": 3.376472259201493e-05, "loss": 1.055, "step": 25810 }, { "epoch": 7.72, "grad_norm": 1.4664825201034546, "learning_rate": 3.375922004217663e-05, "loss": 1.2219, "step": 25815 }, { "epoch": 7.73, "grad_norm": 2.3855884075164795, "learning_rate": 3.3753717008575716e-05, "loss": 1.2283, "step": 25820 }, { "epoch": 7.73, "grad_norm": 3.64817476272583, "learning_rate": 3.374821349151611e-05, "loss": 1.0902, "step": 25825 }, { "epoch": 7.73, "grad_norm": 5.835208415985107, "learning_rate": 3.374270949130176e-05, "loss": 1.1913, "step": 25830 }, { "epoch": 7.73, "grad_norm": 8.944743156433105, "learning_rate": 3.373720500823666e-05, "loss": 1.1375, "step": 25835 }, { "epoch": 7.73, "grad_norm": 1.3471863269805908, "learning_rate": 3.37317000426248e-05, "loss": 1.1213, "step": 25840 }, { "epoch": 7.73, "grad_norm": 1.5816328525543213, "learning_rate": 3.3726194594770224e-05, "loss": 1.3163, "step": 25845 }, { "epoch": 7.73, "grad_norm": 3.4675426483154297, "learning_rate": 3.3720688664976996e-05, "loss": 1.1634, "step": 25850 }, { "epoch": 7.74, "grad_norm": 1.961869478225708, "learning_rate": 3.3715182253549205e-05, "loss": 1.1437, "step": 25855 }, { "epoch": 7.74, "grad_norm": 3.2622334957122803, "learning_rate": 3.3709675360790945e-05, "loss": 1.1406, "step": 25860 }, { "epoch": 7.74, "grad_norm": 2.293381690979004, "learning_rate": 3.370416798700637e-05, "loss": 1.314, "step": 25865 }, { "epoch": 7.74, "grad_norm": 3.0880744457244873, "learning_rate": 3.369866013249965e-05, "loss": 1.2104, "step": 25870 }, { "epoch": 7.74, "grad_norm": 0.9818011522293091, "learning_rate": 3.369315179757496e-05, "loss": 1.205, "step": 25875 }, { "epoch": 7.74, "grad_norm": 1.3982211351394653, "learning_rate": 3.368764298253654e-05, "loss": 1.1116, "step": 25880 }, { "epoch": 7.74, "grad_norm": 2.2384934425354004, "learning_rate": 3.368213368768863e-05, "loss": 1.1532, "step": 25885 }, { "epoch": 7.75, "grad_norm": 2.110853433609009, "learning_rate": 3.3676623913335507e-05, "loss": 1.3154, "step": 25890 }, { "epoch": 7.75, "grad_norm": 5.248104095458984, "learning_rate": 3.367111365978146e-05, "loss": 1.187, "step": 25895 }, { "epoch": 7.75, "grad_norm": 2.6548986434936523, "learning_rate": 3.3665602927330814e-05, "loss": 1.0928, "step": 25900 }, { "epoch": 7.75, "grad_norm": 2.44582200050354, "learning_rate": 3.3660091716287925e-05, "loss": 1.2958, "step": 25905 }, { "epoch": 7.75, "grad_norm": 2.185173273086548, "learning_rate": 3.3654580026957176e-05, "loss": 1.0973, "step": 25910 }, { "epoch": 7.75, "grad_norm": 4.19801664352417, "learning_rate": 3.364906785964297e-05, "loss": 1.2329, "step": 25915 }, { "epoch": 7.75, "grad_norm": 2.9229116439819336, "learning_rate": 3.364355521464974e-05, "loss": 1.0089, "step": 25920 }, { "epoch": 7.76, "grad_norm": 3.5953376293182373, "learning_rate": 3.363804209228192e-05, "loss": 1.1683, "step": 25925 }, { "epoch": 7.76, "grad_norm": 3.6482677459716797, "learning_rate": 3.363252849284404e-05, "loss": 1.1713, "step": 25930 }, { "epoch": 7.76, "grad_norm": 3.2520852088928223, "learning_rate": 3.3627014416640565e-05, "loss": 1.4132, "step": 25935 }, { "epoch": 7.76, "grad_norm": 3.2956299781799316, "learning_rate": 3.362149986397606e-05, "loss": 1.3033, "step": 25940 }, { "epoch": 7.76, "grad_norm": 2.4566397666931152, "learning_rate": 3.361598483515507e-05, "loss": 1.1382, "step": 25945 }, { "epoch": 7.76, "grad_norm": 2.1512653827667236, "learning_rate": 3.3610469330482205e-05, "loss": 1.0305, "step": 25950 }, { "epoch": 7.77, "grad_norm": 2.5325844287872314, "learning_rate": 3.360495335026207e-05, "loss": 1.0878, "step": 25955 }, { "epoch": 7.77, "grad_norm": 10.988487243652344, "learning_rate": 3.35994368947993e-05, "loss": 1.0437, "step": 25960 }, { "epoch": 7.77, "grad_norm": 1.2049709558486938, "learning_rate": 3.359391996439857e-05, "loss": 1.2115, "step": 25965 }, { "epoch": 7.77, "grad_norm": 4.1528143882751465, "learning_rate": 3.358840255936457e-05, "loss": 1.2731, "step": 25970 }, { "epoch": 7.77, "grad_norm": 1.840437889099121, "learning_rate": 3.3582884680002024e-05, "loss": 1.1867, "step": 25975 }, { "epoch": 7.77, "grad_norm": 1.5152828693389893, "learning_rate": 3.3577366326615676e-05, "loss": 1.297, "step": 25980 }, { "epoch": 7.77, "grad_norm": 2.739409923553467, "learning_rate": 3.357184749951031e-05, "loss": 1.205, "step": 25985 }, { "epoch": 7.78, "grad_norm": 2.3803489208221436, "learning_rate": 3.3566328198990713e-05, "loss": 0.934, "step": 25990 }, { "epoch": 7.78, "grad_norm": 1.749653697013855, "learning_rate": 3.356080842536171e-05, "loss": 1.246, "step": 25995 }, { "epoch": 7.78, "grad_norm": 1.9984244108200073, "learning_rate": 3.355528817892816e-05, "loss": 1.162, "step": 26000 }, { "epoch": 7.78, "grad_norm": 1.7096799612045288, "learning_rate": 3.354976745999494e-05, "loss": 1.1473, "step": 26005 }, { "epoch": 7.78, "grad_norm": 2.5412979125976562, "learning_rate": 3.354424626886694e-05, "loss": 1.2047, "step": 26010 }, { "epoch": 7.78, "grad_norm": 1.5412076711654663, "learning_rate": 3.3538724605849115e-05, "loss": 1.2091, "step": 26015 }, { "epoch": 7.78, "grad_norm": 1.5192663669586182, "learning_rate": 3.353320247124639e-05, "loss": 1.2168, "step": 26020 }, { "epoch": 7.79, "grad_norm": 1.3354765176773071, "learning_rate": 3.352767986536377e-05, "loss": 1.177, "step": 26025 }, { "epoch": 7.79, "grad_norm": 2.89026141166687, "learning_rate": 3.352215678850625e-05, "loss": 1.0474, "step": 26030 }, { "epoch": 7.79, "grad_norm": 1.4622420072555542, "learning_rate": 3.351663324097888e-05, "loss": 1.0757, "step": 26035 }, { "epoch": 7.79, "grad_norm": 3.365771532058716, "learning_rate": 3.35111092230867e-05, "loss": 1.1737, "step": 26040 }, { "epoch": 7.79, "grad_norm": 8.960780143737793, "learning_rate": 3.35055847351348e-05, "loss": 1.1499, "step": 26045 }, { "epoch": 7.79, "grad_norm": 3.380896806716919, "learning_rate": 3.350005977742831e-05, "loss": 1.1441, "step": 26050 }, { "epoch": 7.8, "grad_norm": 2.153420925140381, "learning_rate": 3.3494534350272344e-05, "loss": 1.1652, "step": 26055 }, { "epoch": 7.8, "grad_norm": 3.0268092155456543, "learning_rate": 3.348900845397208e-05, "loss": 1.3158, "step": 26060 }, { "epoch": 7.8, "grad_norm": 5.089029788970947, "learning_rate": 3.34834820888327e-05, "loss": 1.093, "step": 26065 }, { "epoch": 7.8, "grad_norm": 1.8281073570251465, "learning_rate": 3.347795525515942e-05, "loss": 1.2282, "step": 26070 }, { "epoch": 7.8, "grad_norm": 2.1014137268066406, "learning_rate": 3.347242795325749e-05, "loss": 1.1495, "step": 26075 }, { "epoch": 7.8, "grad_norm": 2.044297933578491, "learning_rate": 3.3466900183432164e-05, "loss": 1.0403, "step": 26080 }, { "epoch": 7.8, "grad_norm": 2.4856858253479004, "learning_rate": 3.346137194598874e-05, "loss": 1.1332, "step": 26085 }, { "epoch": 7.81, "grad_norm": 2.4128828048706055, "learning_rate": 3.345584324123254e-05, "loss": 1.0393, "step": 26090 }, { "epoch": 7.81, "grad_norm": 1.2683279514312744, "learning_rate": 3.345031406946891e-05, "loss": 1.2443, "step": 26095 }, { "epoch": 7.81, "grad_norm": 2.806605577468872, "learning_rate": 3.344478443100322e-05, "loss": 1.1122, "step": 26100 }, { "epoch": 7.81, "grad_norm": 2.0906083583831787, "learning_rate": 3.343925432614086e-05, "loss": 1.0038, "step": 26105 }, { "epoch": 7.81, "grad_norm": 2.3186562061309814, "learning_rate": 3.3433723755187255e-05, "loss": 1.1656, "step": 26110 }, { "epoch": 7.81, "grad_norm": 2.0462429523468018, "learning_rate": 3.342819271844787e-05, "loss": 1.1485, "step": 26115 }, { "epoch": 7.81, "grad_norm": 1.9167912006378174, "learning_rate": 3.342266121622814e-05, "loss": 1.2493, "step": 26120 }, { "epoch": 7.82, "grad_norm": 3.755826473236084, "learning_rate": 3.3417129248833596e-05, "loss": 1.1989, "step": 26125 }, { "epoch": 7.82, "grad_norm": 3.0041463375091553, "learning_rate": 3.3411596816569746e-05, "loss": 1.1634, "step": 26130 }, { "epoch": 7.82, "grad_norm": 3.452164649963379, "learning_rate": 3.340606391974215e-05, "loss": 1.2504, "step": 26135 }, { "epoch": 7.82, "grad_norm": 6.089219093322754, "learning_rate": 3.340053055865639e-05, "loss": 1.0353, "step": 26140 }, { "epoch": 7.82, "grad_norm": 0.934475839138031, "learning_rate": 3.339499673361805e-05, "loss": 1.2358, "step": 26145 }, { "epoch": 7.82, "grad_norm": 7.745322227478027, "learning_rate": 3.3389462444932765e-05, "loss": 1.1976, "step": 26150 }, { "epoch": 7.83, "grad_norm": 1.4742351770401, "learning_rate": 3.338392769290619e-05, "loss": 1.2129, "step": 26155 }, { "epoch": 7.83, "grad_norm": 1.67327880859375, "learning_rate": 3.337839247784401e-05, "loss": 1.1415, "step": 26160 }, { "epoch": 7.83, "grad_norm": 4.258670806884766, "learning_rate": 3.337285680005192e-05, "loss": 1.0752, "step": 26165 }, { "epoch": 7.83, "grad_norm": 2.024472236633301, "learning_rate": 3.336732065983565e-05, "loss": 1.0806, "step": 26170 }, { "epoch": 7.83, "grad_norm": 1.1518867015838623, "learning_rate": 3.336178405750095e-05, "loss": 1.2075, "step": 26175 }, { "epoch": 7.83, "grad_norm": 3.1105754375457764, "learning_rate": 3.3356246993353617e-05, "loss": 1.1183, "step": 26180 }, { "epoch": 7.83, "grad_norm": 1.1292957067489624, "learning_rate": 3.335070946769945e-05, "loss": 1.381, "step": 26185 }, { "epoch": 7.84, "grad_norm": 1.872825264930725, "learning_rate": 3.3345171480844275e-05, "loss": 1.0752, "step": 26190 }, { "epoch": 7.84, "grad_norm": 1.4173269271850586, "learning_rate": 3.3339633033093955e-05, "loss": 1.0798, "step": 26195 }, { "epoch": 7.84, "grad_norm": 3.544238567352295, "learning_rate": 3.333409412475437e-05, "loss": 1.2383, "step": 26200 }, { "epoch": 7.84, "grad_norm": 2.445939064025879, "learning_rate": 3.3328554756131423e-05, "loss": 1.1121, "step": 26205 }, { "epoch": 7.84, "grad_norm": 2.592607021331787, "learning_rate": 3.332301492753107e-05, "loss": 1.0819, "step": 26210 }, { "epoch": 7.84, "grad_norm": 2.417942523956299, "learning_rate": 3.3317474639259245e-05, "loss": 1.128, "step": 26215 }, { "epoch": 7.84, "grad_norm": 1.7295610904693604, "learning_rate": 3.331193389162194e-05, "loss": 1.18, "step": 26220 }, { "epoch": 7.85, "grad_norm": 7.857126235961914, "learning_rate": 3.330639268492517e-05, "loss": 0.9577, "step": 26225 }, { "epoch": 7.85, "grad_norm": 1.7152934074401855, "learning_rate": 3.330085101947496e-05, "loss": 1.2462, "step": 26230 }, { "epoch": 7.85, "grad_norm": 1.7840052843093872, "learning_rate": 3.3295308895577376e-05, "loss": 0.9696, "step": 26235 }, { "epoch": 7.85, "grad_norm": 1.5356841087341309, "learning_rate": 3.32897663135385e-05, "loss": 1.1286, "step": 26240 }, { "epoch": 7.85, "grad_norm": 1.8642561435699463, "learning_rate": 3.3284223273664465e-05, "loss": 0.9585, "step": 26245 }, { "epoch": 7.85, "grad_norm": 1.54176664352417, "learning_rate": 3.327867977626138e-05, "loss": 1.177, "step": 26250 }, { "epoch": 7.86, "grad_norm": 3.919142961502075, "learning_rate": 3.327313582163542e-05, "loss": 1.1158, "step": 26255 }, { "epoch": 7.86, "grad_norm": 2.54856276512146, "learning_rate": 3.326759141009276e-05, "loss": 1.1494, "step": 26260 }, { "epoch": 7.86, "grad_norm": 2.491251230239868, "learning_rate": 3.326204654193962e-05, "loss": 1.2343, "step": 26265 }, { "epoch": 7.86, "grad_norm": 4.712347030639648, "learning_rate": 3.325650121748225e-05, "loss": 1.1057, "step": 26270 }, { "epoch": 7.86, "grad_norm": 1.5245498418807983, "learning_rate": 3.325095543702688e-05, "loss": 1.1575, "step": 26275 }, { "epoch": 7.86, "grad_norm": 3.232236385345459, "learning_rate": 3.324540920087983e-05, "loss": 1.1603, "step": 26280 }, { "epoch": 7.86, "grad_norm": 1.0489842891693115, "learning_rate": 3.3239862509347396e-05, "loss": 1.2751, "step": 26285 }, { "epoch": 7.87, "grad_norm": 1.4679367542266846, "learning_rate": 3.3234315362735926e-05, "loss": 1.0614, "step": 26290 }, { "epoch": 7.87, "grad_norm": 4.200760364532471, "learning_rate": 3.3228767761351776e-05, "loss": 1.251, "step": 26295 }, { "epoch": 7.87, "grad_norm": 3.265259265899658, "learning_rate": 3.3223219705501334e-05, "loss": 1.0798, "step": 26300 }, { "epoch": 7.87, "grad_norm": 5.21251106262207, "learning_rate": 3.3217671195491016e-05, "loss": 1.0074, "step": 26305 }, { "epoch": 7.87, "grad_norm": 2.8614084720611572, "learning_rate": 3.3212122231627265e-05, "loss": 0.9399, "step": 26310 }, { "epoch": 7.87, "grad_norm": 2.9637887477874756, "learning_rate": 3.320657281421653e-05, "loss": 1.1851, "step": 26315 }, { "epoch": 7.87, "grad_norm": 2.9170193672180176, "learning_rate": 3.320102294356531e-05, "loss": 1.3154, "step": 26320 }, { "epoch": 7.88, "grad_norm": 4.612870216369629, "learning_rate": 3.319547261998012e-05, "loss": 1.0731, "step": 26325 }, { "epoch": 7.88, "grad_norm": 3.057469129562378, "learning_rate": 3.31899218437675e-05, "loss": 1.0565, "step": 26330 }, { "epoch": 7.88, "grad_norm": 3.9015371799468994, "learning_rate": 3.3184370615234004e-05, "loss": 1.0798, "step": 26335 }, { "epoch": 7.88, "grad_norm": 2.964965343475342, "learning_rate": 3.317881893468623e-05, "loss": 1.0914, "step": 26340 }, { "epoch": 7.88, "grad_norm": 7.357722282409668, "learning_rate": 3.3173266802430784e-05, "loss": 1.3035, "step": 26345 }, { "epoch": 7.88, "grad_norm": 1.9388129711151123, "learning_rate": 3.3167714218774315e-05, "loss": 1.1861, "step": 26350 }, { "epoch": 7.89, "grad_norm": 1.838700532913208, "learning_rate": 3.316216118402347e-05, "loss": 1.0244, "step": 26355 }, { "epoch": 7.89, "grad_norm": 2.831838846206665, "learning_rate": 3.315660769848495e-05, "loss": 1.0736, "step": 26360 }, { "epoch": 7.89, "grad_norm": 4.200833320617676, "learning_rate": 3.315105376246547e-05, "loss": 1.2928, "step": 26365 }, { "epoch": 7.89, "grad_norm": 6.066209316253662, "learning_rate": 3.3145499376271754e-05, "loss": 1.0764, "step": 26370 }, { "epoch": 7.89, "grad_norm": 1.2264248132705688, "learning_rate": 3.3139944540210574e-05, "loss": 1.2454, "step": 26375 }, { "epoch": 7.89, "grad_norm": 1.508028268814087, "learning_rate": 3.3134389254588724e-05, "loss": 1.0258, "step": 26380 }, { "epoch": 7.89, "grad_norm": 2.4264774322509766, "learning_rate": 3.312883351971301e-05, "loss": 1.0888, "step": 26385 }, { "epoch": 7.9, "grad_norm": 3.008584499359131, "learning_rate": 3.312327733589027e-05, "loss": 1.2136, "step": 26390 }, { "epoch": 7.9, "grad_norm": 2.624093532562256, "learning_rate": 3.3117720703427365e-05, "loss": 1.0683, "step": 26395 }, { "epoch": 7.9, "grad_norm": 2.020382881164551, "learning_rate": 3.311216362263119e-05, "loss": 1.283, "step": 26400 }, { "epoch": 7.9, "grad_norm": 1.6679294109344482, "learning_rate": 3.310660609380865e-05, "loss": 1.193, "step": 26405 }, { "epoch": 7.9, "grad_norm": 1.7250815629959106, "learning_rate": 3.3101048117266675e-05, "loss": 1.1785, "step": 26410 }, { "epoch": 7.9, "grad_norm": 3.259265661239624, "learning_rate": 3.3095489693312234e-05, "loss": 1.1396, "step": 26415 }, { "epoch": 7.9, "grad_norm": 6.142773151397705, "learning_rate": 3.308993082225231e-05, "loss": 1.0735, "step": 26420 }, { "epoch": 7.91, "grad_norm": 2.9731175899505615, "learning_rate": 3.308437150439392e-05, "loss": 1.1468, "step": 26425 }, { "epoch": 7.91, "grad_norm": 2.0407536029815674, "learning_rate": 3.3078811740044096e-05, "loss": 1.3368, "step": 26430 }, { "epoch": 7.91, "grad_norm": 2.396974563598633, "learning_rate": 3.30732515295099e-05, "loss": 1.0735, "step": 26435 }, { "epoch": 7.91, "grad_norm": 1.7831975221633911, "learning_rate": 3.306769087309841e-05, "loss": 1.212, "step": 26440 }, { "epoch": 7.91, "grad_norm": 2.968552827835083, "learning_rate": 3.3062129771116734e-05, "loss": 1.0222, "step": 26445 }, { "epoch": 7.91, "grad_norm": 2.5638179779052734, "learning_rate": 3.305656822387201e-05, "loss": 1.1809, "step": 26450 }, { "epoch": 7.92, "grad_norm": 11.847745895385742, "learning_rate": 3.30510062316714e-05, "loss": 1.2684, "step": 26455 }, { "epoch": 7.92, "grad_norm": 2.3828535079956055, "learning_rate": 3.304544379482209e-05, "loss": 1.2027, "step": 26460 }, { "epoch": 7.92, "grad_norm": 3.3371334075927734, "learning_rate": 3.303988091363128e-05, "loss": 1.2209, "step": 26465 }, { "epoch": 7.92, "grad_norm": 1.5403908491134644, "learning_rate": 3.3034317588406205e-05, "loss": 1.0056, "step": 26470 }, { "epoch": 7.92, "grad_norm": 3.629544973373413, "learning_rate": 3.302875381945412e-05, "loss": 1.2427, "step": 26475 }, { "epoch": 7.92, "grad_norm": 1.5579273700714111, "learning_rate": 3.30231896070823e-05, "loss": 1.2536, "step": 26480 }, { "epoch": 7.92, "grad_norm": 3.202047824859619, "learning_rate": 3.3017624951598066e-05, "loss": 1.0117, "step": 26485 }, { "epoch": 7.93, "grad_norm": 4.338261127471924, "learning_rate": 3.301205985330873e-05, "loss": 0.9417, "step": 26490 }, { "epoch": 7.93, "grad_norm": 0.7901287078857422, "learning_rate": 3.300649431252166e-05, "loss": 1.005, "step": 26495 }, { "epoch": 7.93, "grad_norm": 2.2430081367492676, "learning_rate": 3.300092832954425e-05, "loss": 1.2206, "step": 26500 }, { "epoch": 7.93, "grad_norm": 2.6997382640838623, "learning_rate": 3.2995361904683866e-05, "loss": 1.3153, "step": 26505 }, { "epoch": 7.93, "grad_norm": 4.912943363189697, "learning_rate": 3.2989795038247956e-05, "loss": 1.1331, "step": 26510 }, { "epoch": 7.93, "grad_norm": 1.721016526222229, "learning_rate": 3.298422773054397e-05, "loss": 1.3072, "step": 26515 }, { "epoch": 7.93, "grad_norm": 0.9054770469665527, "learning_rate": 3.297865998187939e-05, "loss": 1.2066, "step": 26520 }, { "epoch": 7.94, "grad_norm": 1.856542706489563, "learning_rate": 3.297309179256171e-05, "loss": 1.2847, "step": 26525 }, { "epoch": 7.94, "grad_norm": 2.9849486351013184, "learning_rate": 3.2967523162898465e-05, "loss": 1.2153, "step": 26530 }, { "epoch": 7.94, "grad_norm": 2.4759957790374756, "learning_rate": 3.296195409319719e-05, "loss": 1.1859, "step": 26535 }, { "epoch": 7.94, "grad_norm": 1.966263771057129, "learning_rate": 3.295638458376546e-05, "loss": 1.2078, "step": 26540 }, { "epoch": 7.94, "grad_norm": 3.476435422897339, "learning_rate": 3.295081463491089e-05, "loss": 1.1706, "step": 26545 }, { "epoch": 7.94, "grad_norm": 3.4568331241607666, "learning_rate": 3.294524424694109e-05, "loss": 1.2634, "step": 26550 }, { "epoch": 7.94, "grad_norm": 1.4559932947158813, "learning_rate": 3.2939673420163706e-05, "loss": 1.0625, "step": 26555 }, { "epoch": 7.95, "grad_norm": 1.8391364812850952, "learning_rate": 3.293410215488642e-05, "loss": 1.1624, "step": 26560 }, { "epoch": 7.95, "grad_norm": 2.4696767330169678, "learning_rate": 3.292853045141691e-05, "loss": 1.2677, "step": 26565 }, { "epoch": 7.95, "grad_norm": 1.480440378189087, "learning_rate": 3.2922958310062904e-05, "loss": 1.0199, "step": 26570 }, { "epoch": 7.95, "grad_norm": 1.3950989246368408, "learning_rate": 3.291738573113216e-05, "loss": 1.2905, "step": 26575 }, { "epoch": 7.95, "grad_norm": 2.3112564086914062, "learning_rate": 3.291181271493242e-05, "loss": 1.1195, "step": 26580 }, { "epoch": 7.95, "grad_norm": 5.1455302238464355, "learning_rate": 3.290623926177148e-05, "loss": 1.1371, "step": 26585 }, { "epoch": 7.96, "grad_norm": 7.398214340209961, "learning_rate": 3.290066537195717e-05, "loss": 1.1394, "step": 26590 }, { "epoch": 7.96, "grad_norm": 3.4421448707580566, "learning_rate": 3.2895091045797335e-05, "loss": 1.372, "step": 26595 }, { "epoch": 7.96, "grad_norm": 5.3825764656066895, "learning_rate": 3.288951628359982e-05, "loss": 1.3115, "step": 26600 }, { "epoch": 7.96, "grad_norm": 2.211780548095703, "learning_rate": 3.288394108567252e-05, "loss": 1.1863, "step": 26605 }, { "epoch": 7.96, "grad_norm": 4.291854381561279, "learning_rate": 3.287836545232335e-05, "loss": 1.1253, "step": 26610 }, { "epoch": 7.96, "grad_norm": 2.0238168239593506, "learning_rate": 3.2872789383860246e-05, "loss": 1.34, "step": 26615 }, { "epoch": 7.96, "grad_norm": 2.0490705966949463, "learning_rate": 3.286721288059116e-05, "loss": 1.0287, "step": 26620 }, { "epoch": 7.97, "grad_norm": 2.1137607097625732, "learning_rate": 3.28616359428241e-05, "loss": 0.9508, "step": 26625 }, { "epoch": 7.97, "grad_norm": 3.983616352081299, "learning_rate": 3.285605857086704e-05, "loss": 1.3123, "step": 26630 }, { "epoch": 7.97, "grad_norm": 2.677525520324707, "learning_rate": 3.285048076502805e-05, "loss": 0.9677, "step": 26635 }, { "epoch": 7.97, "grad_norm": 3.7401962280273438, "learning_rate": 3.284490252561515e-05, "loss": 1.2097, "step": 26640 }, { "epoch": 7.97, "grad_norm": 2.0874180793762207, "learning_rate": 3.283932385293644e-05, "loss": 0.9952, "step": 26645 }, { "epoch": 7.97, "grad_norm": 2.5544826984405518, "learning_rate": 3.283374474730003e-05, "loss": 1.1315, "step": 26650 }, { "epoch": 7.97, "grad_norm": 1.7675901651382446, "learning_rate": 3.2828165209014036e-05, "loss": 1.1583, "step": 26655 }, { "epoch": 7.98, "grad_norm": 3.3144235610961914, "learning_rate": 3.282258523838663e-05, "loss": 1.1255, "step": 26660 }, { "epoch": 7.98, "grad_norm": 3.3691461086273193, "learning_rate": 3.281700483572595e-05, "loss": 1.2332, "step": 26665 }, { "epoch": 7.98, "grad_norm": 3.544421911239624, "learning_rate": 3.281142400134023e-05, "loss": 1.0824, "step": 26670 }, { "epoch": 7.98, "grad_norm": 1.068796157836914, "learning_rate": 3.280584273553768e-05, "loss": 1.14, "step": 26675 }, { "epoch": 7.98, "grad_norm": 2.5210955142974854, "learning_rate": 3.2800261038626544e-05, "loss": 1.1527, "step": 26680 }, { "epoch": 7.98, "grad_norm": 2.1437249183654785, "learning_rate": 3.279467891091511e-05, "loss": 0.9096, "step": 26685 }, { "epoch": 7.99, "grad_norm": 1.478752613067627, "learning_rate": 3.278909635271165e-05, "loss": 1.0211, "step": 26690 }, { "epoch": 7.99, "grad_norm": 4.072501182556152, "learning_rate": 3.27835133643245e-05, "loss": 1.1708, "step": 26695 }, { "epoch": 7.99, "grad_norm": 3.866422653198242, "learning_rate": 3.2777929946062005e-05, "loss": 1.1402, "step": 26700 }, { "epoch": 7.99, "grad_norm": 2.2160439491271973, "learning_rate": 3.277234609823251e-05, "loss": 1.2121, "step": 26705 }, { "epoch": 7.99, "grad_norm": 3.4925968647003174, "learning_rate": 3.276676182114443e-05, "loss": 1.188, "step": 26710 }, { "epoch": 7.99, "grad_norm": 1.9253541231155396, "learning_rate": 3.276117711510616e-05, "loss": 1.1559, "step": 26715 }, { "epoch": 7.99, "grad_norm": 1.081268072128296, "learning_rate": 3.2755591980426146e-05, "loss": 1.2158, "step": 26720 }, { "epoch": 8.0, "grad_norm": 1.8544811010360718, "learning_rate": 3.275000641741285e-05, "loss": 1.1147, "step": 26725 }, { "epoch": 8.0, "grad_norm": 1.938436508178711, "learning_rate": 3.2744420426374755e-05, "loss": 1.2673, "step": 26730 }, { "epoch": 8.0, "grad_norm": 2.7037627696990967, "learning_rate": 3.273883400762037e-05, "loss": 1.2175, "step": 26735 }, { "epoch": 8.0, "grad_norm": 2.8835654258728027, "learning_rate": 3.2733247161458224e-05, "loss": 1.1878, "step": 26740 }, { "epoch": 8.0, "grad_norm": 3.2548410892486572, "learning_rate": 3.272765988819688e-05, "loss": 1.2278, "step": 26745 }, { "epoch": 8.0, "grad_norm": 2.562904119491577, "learning_rate": 3.2722072188144916e-05, "loss": 1.1261, "step": 26750 }, { "epoch": 8.0, "grad_norm": 1.355686902999878, "learning_rate": 3.271648406161092e-05, "loss": 0.9932, "step": 26755 }, { "epoch": 8.01, "grad_norm": 2.448178768157959, "learning_rate": 3.2710895508903546e-05, "loss": 1.1223, "step": 26760 }, { "epoch": 8.01, "grad_norm": 1.8387428522109985, "learning_rate": 3.270530653033142e-05, "loss": 1.2345, "step": 26765 }, { "epoch": 8.01, "grad_norm": 2.6244566440582275, "learning_rate": 3.269971712620322e-05, "loss": 1.2351, "step": 26770 }, { "epoch": 8.01, "grad_norm": 1.3992868661880493, "learning_rate": 3.269412729682765e-05, "loss": 1.3125, "step": 26775 }, { "epoch": 8.01, "grad_norm": 1.4152156114578247, "learning_rate": 3.268853704251342e-05, "loss": 1.1555, "step": 26780 }, { "epoch": 8.01, "grad_norm": 3.2571237087249756, "learning_rate": 3.2682946363569286e-05, "loss": 0.986, "step": 26785 }, { "epoch": 8.02, "grad_norm": 1.7282525300979614, "learning_rate": 3.267735526030402e-05, "loss": 1.1626, "step": 26790 }, { "epoch": 8.02, "grad_norm": 1.6741749048233032, "learning_rate": 3.267176373302639e-05, "loss": 1.1052, "step": 26795 }, { "epoch": 8.02, "grad_norm": 2.290086030960083, "learning_rate": 3.266617178204523e-05, "loss": 0.9702, "step": 26800 }, { "epoch": 8.02, "grad_norm": 1.1538450717926025, "learning_rate": 3.2660579407669374e-05, "loss": 1.2276, "step": 26805 }, { "epoch": 8.02, "grad_norm": 5.698193073272705, "learning_rate": 3.265498661020767e-05, "loss": 0.9967, "step": 26810 }, { "epoch": 8.02, "grad_norm": 3.099355459213257, "learning_rate": 3.2649393389969016e-05, "loss": 0.9655, "step": 26815 }, { "epoch": 8.02, "grad_norm": 2.5500590801239014, "learning_rate": 3.264379974726232e-05, "loss": 1.1817, "step": 26820 }, { "epoch": 8.03, "grad_norm": 2.5358612537384033, "learning_rate": 3.2638205682396504e-05, "loss": 1.1314, "step": 26825 }, { "epoch": 8.03, "grad_norm": 1.88272225856781, "learning_rate": 3.2632611195680535e-05, "loss": 1.0866, "step": 26830 }, { "epoch": 8.03, "grad_norm": 2.205331325531006, "learning_rate": 3.262701628742338e-05, "loss": 1.1186, "step": 26835 }, { "epoch": 8.03, "grad_norm": 3.861008644104004, "learning_rate": 3.262142095793404e-05, "loss": 0.9824, "step": 26840 }, { "epoch": 8.03, "grad_norm": 1.2620456218719482, "learning_rate": 3.261582520752154e-05, "loss": 1.1634, "step": 26845 }, { "epoch": 8.03, "grad_norm": 2.01745343208313, "learning_rate": 3.261022903649494e-05, "loss": 1.1332, "step": 26850 }, { "epoch": 8.03, "grad_norm": 2.3341500759124756, "learning_rate": 3.26046324451633e-05, "loss": 1.1015, "step": 26855 }, { "epoch": 8.04, "grad_norm": 2.3520326614379883, "learning_rate": 3.2599035433835706e-05, "loss": 0.9539, "step": 26860 }, { "epoch": 8.04, "grad_norm": 2.2860143184661865, "learning_rate": 3.2593438002821286e-05, "loss": 1.098, "step": 26865 }, { "epoch": 8.04, "grad_norm": 4.713287830352783, "learning_rate": 3.2587840152429186e-05, "loss": 1.0839, "step": 26870 }, { "epoch": 8.04, "grad_norm": 1.481998324394226, "learning_rate": 3.258224188296855e-05, "loss": 1.096, "step": 26875 }, { "epoch": 8.04, "grad_norm": 2.8222692012786865, "learning_rate": 3.257664319474858e-05, "loss": 1.1853, "step": 26880 }, { "epoch": 8.04, "grad_norm": 2.920170307159424, "learning_rate": 3.257104408807848e-05, "loss": 1.0507, "step": 26885 }, { "epoch": 8.05, "grad_norm": 3.3776135444641113, "learning_rate": 3.25654445632675e-05, "loss": 1.0174, "step": 26890 }, { "epoch": 8.05, "grad_norm": 2.022411346435547, "learning_rate": 3.255984462062487e-05, "loss": 1.1012, "step": 26895 }, { "epoch": 8.05, "grad_norm": 4.32869291305542, "learning_rate": 3.255424426045987e-05, "loss": 1.0786, "step": 26900 }, { "epoch": 8.05, "grad_norm": 1.3376225233078003, "learning_rate": 3.254864348308182e-05, "loss": 1.1797, "step": 26905 }, { "epoch": 8.05, "grad_norm": 1.1576924324035645, "learning_rate": 3.2543042288800035e-05, "loss": 1.0853, "step": 26910 }, { "epoch": 8.05, "grad_norm": 1.6126631498336792, "learning_rate": 3.2537440677923864e-05, "loss": 1.0122, "step": 26915 }, { "epoch": 8.05, "grad_norm": 2.130544900894165, "learning_rate": 3.253183865076269e-05, "loss": 1.1658, "step": 26920 }, { "epoch": 8.06, "grad_norm": 3.746469259262085, "learning_rate": 3.252623620762589e-05, "loss": 1.1788, "step": 26925 }, { "epoch": 8.06, "grad_norm": 2.503143548965454, "learning_rate": 3.2520633348822884e-05, "loss": 1.1909, "step": 26930 }, { "epoch": 8.06, "grad_norm": 2.250931978225708, "learning_rate": 3.251503007466311e-05, "loss": 1.1565, "step": 26935 }, { "epoch": 8.06, "grad_norm": 1.9409987926483154, "learning_rate": 3.2509426385456046e-05, "loss": 1.1417, "step": 26940 }, { "epoch": 8.06, "grad_norm": 2.2973732948303223, "learning_rate": 3.250382228151116e-05, "loss": 1.1895, "step": 26945 }, { "epoch": 8.06, "grad_norm": 3.5512242317199707, "learning_rate": 3.249821776313798e-05, "loss": 1.1509, "step": 26950 }, { "epoch": 8.06, "grad_norm": 3.031540870666504, "learning_rate": 3.2492612830646025e-05, "loss": 1.1068, "step": 26955 }, { "epoch": 8.07, "grad_norm": 2.2181246280670166, "learning_rate": 3.248700748434485e-05, "loss": 1.1758, "step": 26960 }, { "epoch": 8.07, "grad_norm": 1.102636456489563, "learning_rate": 3.248140172454403e-05, "loss": 1.0737, "step": 26965 }, { "epoch": 8.07, "grad_norm": 1.951006293296814, "learning_rate": 3.2475795551553166e-05, "loss": 1.1618, "step": 26970 }, { "epoch": 8.07, "grad_norm": 1.9825594425201416, "learning_rate": 3.2470188965681894e-05, "loss": 1.0879, "step": 26975 }, { "epoch": 8.07, "grad_norm": 4.088083267211914, "learning_rate": 3.246458196723985e-05, "loss": 1.1725, "step": 26980 }, { "epoch": 8.07, "grad_norm": 6.844773292541504, "learning_rate": 3.2458974556536694e-05, "loss": 1.0785, "step": 26985 }, { "epoch": 8.08, "grad_norm": 1.5896713733673096, "learning_rate": 3.245336673388213e-05, "loss": 1.0559, "step": 26990 }, { "epoch": 8.08, "grad_norm": 1.1929066181182861, "learning_rate": 3.244775849958587e-05, "loss": 1.0536, "step": 26995 }, { "epoch": 8.08, "grad_norm": 1.1825475692749023, "learning_rate": 3.244214985395765e-05, "loss": 1.2364, "step": 27000 }, { "epoch": 8.08, "grad_norm": 3.0042402744293213, "learning_rate": 3.2436540797307224e-05, "loss": 1.1558, "step": 27005 }, { "epoch": 8.08, "grad_norm": 1.305233359336853, "learning_rate": 3.2430931329944384e-05, "loss": 1.1471, "step": 27010 }, { "epoch": 8.08, "grad_norm": 2.5870091915130615, "learning_rate": 3.242532145217894e-05, "loss": 1.2085, "step": 27015 }, { "epoch": 8.08, "grad_norm": 1.0035154819488525, "learning_rate": 3.24197111643207e-05, "loss": 0.9627, "step": 27020 }, { "epoch": 8.09, "grad_norm": 2.0611767768859863, "learning_rate": 3.241410046667952e-05, "loss": 1.0554, "step": 27025 }, { "epoch": 8.09, "grad_norm": 1.6869128942489624, "learning_rate": 3.2408489359565286e-05, "loss": 1.0618, "step": 27030 }, { "epoch": 8.09, "grad_norm": 1.8879674673080444, "learning_rate": 3.240287784328789e-05, "loss": 1.1033, "step": 27035 }, { "epoch": 8.09, "grad_norm": 3.194080114364624, "learning_rate": 3.239726591815724e-05, "loss": 0.9735, "step": 27040 }, { "epoch": 8.09, "grad_norm": 2.4983654022216797, "learning_rate": 3.239165358448327e-05, "loss": 1.1598, "step": 27045 }, { "epoch": 8.09, "grad_norm": 1.8231905698776245, "learning_rate": 3.2386040842575976e-05, "loss": 1.1378, "step": 27050 }, { "epoch": 8.09, "grad_norm": 4.191860198974609, "learning_rate": 3.238042769274531e-05, "loss": 0.9522, "step": 27055 }, { "epoch": 8.1, "grad_norm": 3.744335412979126, "learning_rate": 3.23748141353013e-05, "loss": 1.3162, "step": 27060 }, { "epoch": 8.1, "grad_norm": 2.127502202987671, "learning_rate": 3.236920017055397e-05, "loss": 0.9964, "step": 27065 }, { "epoch": 8.1, "grad_norm": 3.770505666732788, "learning_rate": 3.2363585798813376e-05, "loss": 1.2214, "step": 27070 }, { "epoch": 8.1, "grad_norm": 3.2522053718566895, "learning_rate": 3.2357971020389586e-05, "loss": 1.1507, "step": 27075 }, { "epoch": 8.1, "grad_norm": 1.4464082717895508, "learning_rate": 3.235235583559271e-05, "loss": 1.0405, "step": 27080 }, { "epoch": 8.1, "grad_norm": 2.175246238708496, "learning_rate": 3.2346740244732866e-05, "loss": 1.284, "step": 27085 }, { "epoch": 8.11, "grad_norm": 3.6082305908203125, "learning_rate": 3.234112424812019e-05, "loss": 1.0077, "step": 27090 }, { "epoch": 8.11, "grad_norm": 2.128209114074707, "learning_rate": 3.233550784606486e-05, "loss": 1.1639, "step": 27095 }, { "epoch": 8.11, "grad_norm": 1.130769968032837, "learning_rate": 3.232989103887704e-05, "loss": 1.2106, "step": 27100 }, { "epoch": 8.11, "grad_norm": 1.5438563823699951, "learning_rate": 3.232427382686697e-05, "loss": 1.1424, "step": 27105 }, { "epoch": 8.11, "grad_norm": 5.550532817840576, "learning_rate": 3.231865621034486e-05, "loss": 1.1006, "step": 27110 }, { "epoch": 8.11, "grad_norm": 20.100221633911133, "learning_rate": 3.2313038189620995e-05, "loss": 0.8935, "step": 27115 }, { "epoch": 8.11, "grad_norm": 2.989924430847168, "learning_rate": 3.230741976500562e-05, "loss": 1.3188, "step": 27120 }, { "epoch": 8.12, "grad_norm": 2.9077560901641846, "learning_rate": 3.2301800936809044e-05, "loss": 1.1537, "step": 27125 }, { "epoch": 8.12, "grad_norm": 1.805272102355957, "learning_rate": 3.229618170534159e-05, "loss": 1.1026, "step": 27130 }, { "epoch": 8.12, "grad_norm": 3.8493740558624268, "learning_rate": 3.2290562070913613e-05, "loss": 1.1274, "step": 27135 }, { "epoch": 8.12, "grad_norm": 3.3296542167663574, "learning_rate": 3.2284942033835464e-05, "loss": 1.0671, "step": 27140 }, { "epoch": 8.12, "grad_norm": 1.8140013217926025, "learning_rate": 3.2279321594417546e-05, "loss": 1.0956, "step": 27145 }, { "epoch": 8.12, "grad_norm": 2.7062671184539795, "learning_rate": 3.227370075297026e-05, "loss": 0.9826, "step": 27150 }, { "epoch": 8.12, "grad_norm": 1.546039342880249, "learning_rate": 3.226807950980404e-05, "loss": 1.2878, "step": 27155 }, { "epoch": 8.13, "grad_norm": 1.861967921257019, "learning_rate": 3.2262457865229337e-05, "loss": 0.9757, "step": 27160 }, { "epoch": 8.13, "grad_norm": 2.4952914714813232, "learning_rate": 3.2256835819556643e-05, "loss": 1.0902, "step": 27165 }, { "epoch": 8.13, "grad_norm": 4.320837497711182, "learning_rate": 3.225121337309645e-05, "loss": 1.2187, "step": 27170 }, { "epoch": 8.13, "grad_norm": 3.9226810932159424, "learning_rate": 3.224559052615928e-05, "loss": 1.06, "step": 27175 }, { "epoch": 8.13, "grad_norm": 1.381098985671997, "learning_rate": 3.2239967279055675e-05, "loss": 1.1461, "step": 27180 }, { "epoch": 8.13, "grad_norm": 2.233602285385132, "learning_rate": 3.22343436320962e-05, "loss": 1.2388, "step": 27185 }, { "epoch": 8.13, "grad_norm": 1.6678953170776367, "learning_rate": 3.222871958559144e-05, "loss": 1.2351, "step": 27190 }, { "epoch": 8.14, "grad_norm": 2.178392171859741, "learning_rate": 3.2223095139852024e-05, "loss": 1.0681, "step": 27195 }, { "epoch": 8.14, "grad_norm": 1.1121859550476074, "learning_rate": 3.221747029518857e-05, "loss": 1.3043, "step": 27200 }, { "epoch": 8.14, "grad_norm": 1.504831075668335, "learning_rate": 3.221184505191173e-05, "loss": 1.164, "step": 27205 }, { "epoch": 8.14, "grad_norm": 1.5870822668075562, "learning_rate": 3.2206219410332184e-05, "loss": 1.1253, "step": 27210 }, { "epoch": 8.14, "grad_norm": 2.400480270385742, "learning_rate": 3.220059337076063e-05, "loss": 1.065, "step": 27215 }, { "epoch": 8.14, "grad_norm": 6.77662992477417, "learning_rate": 3.2194966933507794e-05, "loss": 1.043, "step": 27220 }, { "epoch": 8.15, "grad_norm": 2.4106528759002686, "learning_rate": 3.2189340098884405e-05, "loss": 1.33, "step": 27225 }, { "epoch": 8.15, "grad_norm": 6.033102989196777, "learning_rate": 3.2183712867201236e-05, "loss": 1.1468, "step": 27230 }, { "epoch": 8.15, "grad_norm": 6.416871547698975, "learning_rate": 3.2178085238769076e-05, "loss": 1.0036, "step": 27235 }, { "epoch": 8.15, "grad_norm": 3.3531298637390137, "learning_rate": 3.217245721389873e-05, "loss": 1.2439, "step": 27240 }, { "epoch": 8.15, "grad_norm": 1.8699911832809448, "learning_rate": 3.2166828792901025e-05, "loss": 1.0194, "step": 27245 }, { "epoch": 8.15, "grad_norm": 2.013090133666992, "learning_rate": 3.216119997608682e-05, "loss": 1.013, "step": 27250 }, { "epoch": 8.15, "grad_norm": 1.574511170387268, "learning_rate": 3.215557076376698e-05, "loss": 1.1492, "step": 27255 }, { "epoch": 8.16, "grad_norm": 1.5347402095794678, "learning_rate": 3.2149941156252406e-05, "loss": 1.0025, "step": 27260 }, { "epoch": 8.16, "grad_norm": 2.3538808822631836, "learning_rate": 3.214431115385401e-05, "loss": 1.0706, "step": 27265 }, { "epoch": 8.16, "grad_norm": 2.6728079319000244, "learning_rate": 3.213868075688273e-05, "loss": 1.0859, "step": 27270 }, { "epoch": 8.16, "grad_norm": 2.588715076446533, "learning_rate": 3.213304996564955e-05, "loss": 1.058, "step": 27275 }, { "epoch": 8.16, "grad_norm": 2.8418338298797607, "learning_rate": 3.2127418780465423e-05, "loss": 1.2622, "step": 27280 }, { "epoch": 8.16, "grad_norm": 3.8776187896728516, "learning_rate": 3.212178720164136e-05, "loss": 1.0961, "step": 27285 }, { "epoch": 8.16, "grad_norm": 1.6147956848144531, "learning_rate": 3.2116155229488404e-05, "loss": 1.0547, "step": 27290 }, { "epoch": 8.17, "grad_norm": 1.412688136100769, "learning_rate": 3.211052286431759e-05, "loss": 1.2792, "step": 27295 }, { "epoch": 8.17, "grad_norm": 4.9000654220581055, "learning_rate": 3.210489010643998e-05, "loss": 1.2015, "step": 27300 }, { "epoch": 8.17, "grad_norm": 0.983841061592102, "learning_rate": 3.2099256956166684e-05, "loss": 1.022, "step": 27305 }, { "epoch": 8.17, "grad_norm": 2.0872743129730225, "learning_rate": 3.20936234138088e-05, "loss": 0.9733, "step": 27310 }, { "epoch": 8.17, "grad_norm": 2.8705453872680664, "learning_rate": 3.208798947967748e-05, "loss": 1.1142, "step": 27315 }, { "epoch": 8.17, "grad_norm": 2.3172919750213623, "learning_rate": 3.208235515408385e-05, "loss": 1.217, "step": 27320 }, { "epoch": 8.18, "grad_norm": 2.136756658554077, "learning_rate": 3.207672043733912e-05, "loss": 1.1509, "step": 27325 }, { "epoch": 8.18, "grad_norm": 1.0697053670883179, "learning_rate": 3.207108532975447e-05, "loss": 0.9292, "step": 27330 }, { "epoch": 8.18, "grad_norm": 2.2047574520111084, "learning_rate": 3.206544983164113e-05, "loss": 1.1085, "step": 27335 }, { "epoch": 8.18, "grad_norm": 1.8211325407028198, "learning_rate": 3.205981394331035e-05, "loss": 1.1218, "step": 27340 }, { "epoch": 8.18, "grad_norm": 10.550779342651367, "learning_rate": 3.205417766507336e-05, "loss": 1.009, "step": 27345 }, { "epoch": 8.18, "grad_norm": 1.852550745010376, "learning_rate": 3.204854099724148e-05, "loss": 1.0607, "step": 27350 }, { "epoch": 8.18, "grad_norm": 2.7020022869110107, "learning_rate": 3.2042903940126015e-05, "loss": 1.0989, "step": 27355 }, { "epoch": 8.19, "grad_norm": 1.7700706720352173, "learning_rate": 3.203726649403828e-05, "loss": 1.0878, "step": 27360 }, { "epoch": 8.19, "grad_norm": 6.152558326721191, "learning_rate": 3.203162865928963e-05, "loss": 1.1536, "step": 27365 }, { "epoch": 8.19, "grad_norm": 1.028457522392273, "learning_rate": 3.202599043619145e-05, "loss": 1.3372, "step": 27370 }, { "epoch": 8.19, "grad_norm": 2.56754994392395, "learning_rate": 3.2020351825055114e-05, "loss": 1.1325, "step": 27375 }, { "epoch": 8.19, "grad_norm": 8.181985855102539, "learning_rate": 3.201471282619204e-05, "loss": 1.1545, "step": 27380 }, { "epoch": 8.19, "grad_norm": 0.9413318037986755, "learning_rate": 3.200907343991367e-05, "loss": 1.1858, "step": 27385 }, { "epoch": 8.19, "grad_norm": 2.4057810306549072, "learning_rate": 3.2003433666531456e-05, "loss": 1.0307, "step": 27390 }, { "epoch": 8.2, "grad_norm": 1.1477302312850952, "learning_rate": 3.199779350635688e-05, "loss": 1.1957, "step": 27395 }, { "epoch": 8.2, "grad_norm": 2.2809982299804688, "learning_rate": 3.199215295970145e-05, "loss": 1.2828, "step": 27400 }, { "epoch": 8.2, "grad_norm": 10.860541343688965, "learning_rate": 3.198651202687668e-05, "loss": 1.0256, "step": 27405 }, { "epoch": 8.2, "grad_norm": 2.4457149505615234, "learning_rate": 3.198087070819411e-05, "loss": 1.1488, "step": 27410 }, { "epoch": 8.2, "grad_norm": 2.509361743927002, "learning_rate": 3.1975229003965305e-05, "loss": 0.9908, "step": 27415 }, { "epoch": 8.2, "grad_norm": 2.225059747695923, "learning_rate": 3.1969586914501854e-05, "loss": 1.1636, "step": 27420 }, { "epoch": 8.21, "grad_norm": 4.2079386711120605, "learning_rate": 3.196394444011536e-05, "loss": 1.0119, "step": 27425 }, { "epoch": 8.21, "grad_norm": 3.070852041244507, "learning_rate": 3.1958301581117455e-05, "loss": 1.0048, "step": 27430 }, { "epoch": 8.21, "grad_norm": 1.48086416721344, "learning_rate": 3.195265833781979e-05, "loss": 0.8762, "step": 27435 }, { "epoch": 8.21, "grad_norm": 2.2743024826049805, "learning_rate": 3.1947014710534024e-05, "loss": 1.0763, "step": 27440 }, { "epoch": 8.21, "grad_norm": 1.9955936670303345, "learning_rate": 3.194137069957186e-05, "loss": 1.1179, "step": 27445 }, { "epoch": 8.21, "grad_norm": 1.0980212688446045, "learning_rate": 3.1935726305245e-05, "loss": 1.0278, "step": 27450 }, { "epoch": 8.21, "grad_norm": 2.3399531841278076, "learning_rate": 3.19300815278652e-05, "loss": 1.0881, "step": 27455 }, { "epoch": 8.22, "grad_norm": 2.5593791007995605, "learning_rate": 3.192443636774419e-05, "loss": 1.003, "step": 27460 }, { "epoch": 8.22, "grad_norm": 2.6024117469787598, "learning_rate": 3.1918790825193764e-05, "loss": 0.9777, "step": 27465 }, { "epoch": 8.22, "grad_norm": 3.8112480640411377, "learning_rate": 3.191314490052572e-05, "loss": 1.1839, "step": 27470 }, { "epoch": 8.22, "grad_norm": 3.738812208175659, "learning_rate": 3.190749859405185e-05, "loss": 0.9292, "step": 27475 }, { "epoch": 8.22, "grad_norm": 1.4232354164123535, "learning_rate": 3.1901851906084025e-05, "loss": 1.1799, "step": 27480 }, { "epoch": 8.22, "grad_norm": 2.850757122039795, "learning_rate": 3.189620483693409e-05, "loss": 1.1459, "step": 27485 }, { "epoch": 8.22, "grad_norm": 9.836990356445312, "learning_rate": 3.189055738691393e-05, "loss": 1.2023, "step": 27490 }, { "epoch": 8.23, "grad_norm": 2.8823812007904053, "learning_rate": 3.188490955633545e-05, "loss": 1.1401, "step": 27495 }, { "epoch": 8.23, "grad_norm": 4.259209632873535, "learning_rate": 3.187926134551057e-05, "loss": 1.0947, "step": 27500 }, { "epoch": 8.23, "grad_norm": 1.9353116750717163, "learning_rate": 3.1873612754751234e-05, "loss": 0.9909, "step": 27505 }, { "epoch": 8.23, "grad_norm": 1.2285420894622803, "learning_rate": 3.1867963784369415e-05, "loss": 1.1093, "step": 27510 }, { "epoch": 8.23, "grad_norm": 2.983264446258545, "learning_rate": 3.186231443467709e-05, "loss": 0.963, "step": 27515 }, { "epoch": 8.23, "grad_norm": 2.1744792461395264, "learning_rate": 3.185666470598627e-05, "loss": 1.1857, "step": 27520 }, { "epoch": 8.24, "grad_norm": 6.445066928863525, "learning_rate": 3.1851014598608994e-05, "loss": 1.0607, "step": 27525 }, { "epoch": 8.24, "grad_norm": 3.2869045734405518, "learning_rate": 3.1845364112857294e-05, "loss": 1.0861, "step": 27530 }, { "epoch": 8.24, "grad_norm": 2.0690512657165527, "learning_rate": 3.183971324904325e-05, "loss": 1.1103, "step": 27535 }, { "epoch": 8.24, "grad_norm": 3.5820834636688232, "learning_rate": 3.183406200747896e-05, "loss": 1.1151, "step": 27540 }, { "epoch": 8.24, "grad_norm": 3.357940435409546, "learning_rate": 3.1828410388476526e-05, "loss": 0.9717, "step": 27545 }, { "epoch": 8.24, "grad_norm": 1.1148250102996826, "learning_rate": 3.182275839234808e-05, "loss": 1.188, "step": 27550 }, { "epoch": 8.24, "grad_norm": 2.7538514137268066, "learning_rate": 3.181710601940578e-05, "loss": 1.0378, "step": 27555 }, { "epoch": 8.25, "grad_norm": 1.7496304512023926, "learning_rate": 3.1811453269961804e-05, "loss": 1.0702, "step": 27560 }, { "epoch": 8.25, "grad_norm": 1.491807460784912, "learning_rate": 3.180580014432835e-05, "loss": 1.1796, "step": 27565 }, { "epoch": 8.25, "grad_norm": 1.2797476053237915, "learning_rate": 3.180014664281762e-05, "loss": 0.8992, "step": 27570 }, { "epoch": 8.25, "grad_norm": 2.5790648460388184, "learning_rate": 3.179449276574186e-05, "loss": 1.1928, "step": 27575 }, { "epoch": 8.25, "grad_norm": 3.154162883758545, "learning_rate": 3.178883851341333e-05, "loss": 1.176, "step": 27580 }, { "epoch": 8.25, "grad_norm": 1.5420721769332886, "learning_rate": 3.1783183886144305e-05, "loss": 1.162, "step": 27585 }, { "epoch": 8.25, "grad_norm": 2.776522159576416, "learning_rate": 3.177752888424708e-05, "loss": 1.0938, "step": 27590 }, { "epoch": 8.26, "grad_norm": 2.355360507965088, "learning_rate": 3.177187350803398e-05, "loss": 0.9884, "step": 27595 }, { "epoch": 8.26, "grad_norm": 1.3630084991455078, "learning_rate": 3.176621775781736e-05, "loss": 1.1724, "step": 27600 }, { "epoch": 8.26, "grad_norm": 2.4475739002227783, "learning_rate": 3.1760561633909546e-05, "loss": 1.1558, "step": 27605 }, { "epoch": 8.26, "grad_norm": 3.5432145595550537, "learning_rate": 3.175490513662295e-05, "loss": 0.9509, "step": 27610 }, { "epoch": 8.26, "grad_norm": 2.6473562717437744, "learning_rate": 3.1749248266269966e-05, "loss": 1.0777, "step": 27615 }, { "epoch": 8.26, "grad_norm": 2.7413153648376465, "learning_rate": 3.174359102316301e-05, "loss": 1.2854, "step": 27620 }, { "epoch": 8.27, "grad_norm": 2.960883378982544, "learning_rate": 3.173793340761453e-05, "loss": 1.1436, "step": 27625 }, { "epoch": 8.27, "grad_norm": 3.2864279747009277, "learning_rate": 3.1732275419937e-05, "loss": 1.1531, "step": 27630 }, { "epoch": 8.27, "grad_norm": 2.264131546020508, "learning_rate": 3.1726617060442884e-05, "loss": 1.1501, "step": 27635 }, { "epoch": 8.27, "grad_norm": 4.641735553741455, "learning_rate": 3.172095832944472e-05, "loss": 1.1033, "step": 27640 }, { "epoch": 8.27, "grad_norm": 2.3437931537628174, "learning_rate": 3.1715299227255e-05, "loss": 1.1294, "step": 27645 }, { "epoch": 8.27, "grad_norm": 4.831884860992432, "learning_rate": 3.170963975418628e-05, "loss": 1.0589, "step": 27650 }, { "epoch": 8.27, "grad_norm": 5.432398796081543, "learning_rate": 3.170397991055114e-05, "loss": 0.97, "step": 27655 }, { "epoch": 8.28, "grad_norm": 2.6022696495056152, "learning_rate": 3.1698319696662156e-05, "loss": 1.1821, "step": 27660 }, { "epoch": 8.28, "grad_norm": 2.880967378616333, "learning_rate": 3.1692659112831934e-05, "loss": 0.9731, "step": 27665 }, { "epoch": 8.28, "grad_norm": 1.824782371520996, "learning_rate": 3.16869981593731e-05, "loss": 1.1546, "step": 27670 }, { "epoch": 8.28, "grad_norm": 1.6104397773742676, "learning_rate": 3.168133683659832e-05, "loss": 1.0998, "step": 27675 }, { "epoch": 8.28, "grad_norm": 3.1559090614318848, "learning_rate": 3.167567514482025e-05, "loss": 1.1066, "step": 27680 }, { "epoch": 8.28, "grad_norm": 1.1295137405395508, "learning_rate": 3.1670013084351575e-05, "loss": 1.2197, "step": 27685 }, { "epoch": 8.28, "grad_norm": 3.0015058517456055, "learning_rate": 3.166435065550501e-05, "loss": 1.1269, "step": 27690 }, { "epoch": 8.29, "grad_norm": 4.1721391677856445, "learning_rate": 3.1658687858593294e-05, "loss": 0.992, "step": 27695 }, { "epoch": 8.29, "grad_norm": 2.583388328552246, "learning_rate": 3.165302469392917e-05, "loss": 1.0925, "step": 27700 }, { "epoch": 8.29, "grad_norm": 8.460959434509277, "learning_rate": 3.16473611618254e-05, "loss": 1.0993, "step": 27705 }, { "epoch": 8.29, "grad_norm": 5.1278767585754395, "learning_rate": 3.164169726259477e-05, "loss": 1.0187, "step": 27710 }, { "epoch": 8.29, "grad_norm": 2.619326591491699, "learning_rate": 3.163603299655012e-05, "loss": 0.9933, "step": 27715 }, { "epoch": 8.29, "grad_norm": 2.9472780227661133, "learning_rate": 3.1630368364004264e-05, "loss": 1.1644, "step": 27720 }, { "epoch": 8.29, "grad_norm": 1.8340462446212769, "learning_rate": 3.162470336527006e-05, "loss": 1.2497, "step": 27725 }, { "epoch": 8.3, "grad_norm": 4.899162292480469, "learning_rate": 3.161903800066037e-05, "loss": 1.0712, "step": 27730 }, { "epoch": 8.3, "grad_norm": 3.0266971588134766, "learning_rate": 3.161337227048809e-05, "loss": 1.1594, "step": 27735 }, { "epoch": 8.3, "grad_norm": 1.212259292602539, "learning_rate": 3.1607706175066134e-05, "loss": 1.0818, "step": 27740 }, { "epoch": 8.3, "grad_norm": 2.2834479808807373, "learning_rate": 3.1602039714707434e-05, "loss": 1.0276, "step": 27745 }, { "epoch": 8.3, "grad_norm": 1.508610486984253, "learning_rate": 3.159637288972494e-05, "loss": 1.1272, "step": 27750 }, { "epoch": 8.3, "grad_norm": 1.2361581325531006, "learning_rate": 3.159070570043163e-05, "loss": 1.0414, "step": 27755 }, { "epoch": 8.31, "grad_norm": 2.1319968700408936, "learning_rate": 3.15850381471405e-05, "loss": 1.052, "step": 27760 }, { "epoch": 8.31, "grad_norm": 3.3206193447113037, "learning_rate": 3.157937023016456e-05, "loss": 1.0506, "step": 27765 }, { "epoch": 8.31, "grad_norm": 3.340884208679199, "learning_rate": 3.157370194981683e-05, "loss": 1.1607, "step": 27770 }, { "epoch": 8.31, "grad_norm": 12.616667747497559, "learning_rate": 3.156803330641038e-05, "loss": 1.2882, "step": 27775 }, { "epoch": 8.31, "grad_norm": 3.9304230213165283, "learning_rate": 3.1562364300258275e-05, "loss": 1.302, "step": 27780 }, { "epoch": 8.31, "grad_norm": 3.3284659385681152, "learning_rate": 3.1556694931673615e-05, "loss": 1.0514, "step": 27785 }, { "epoch": 8.31, "grad_norm": 2.4775948524475098, "learning_rate": 3.155102520096951e-05, "loss": 1.0072, "step": 27790 }, { "epoch": 8.32, "grad_norm": 2.661914110183716, "learning_rate": 3.154535510845909e-05, "loss": 1.1323, "step": 27795 }, { "epoch": 8.32, "grad_norm": 3.137718677520752, "learning_rate": 3.15396846544555e-05, "loss": 1.248, "step": 27800 }, { "epoch": 8.32, "grad_norm": 1.339923620223999, "learning_rate": 3.153401383927194e-05, "loss": 1.0493, "step": 27805 }, { "epoch": 8.32, "grad_norm": 1.203647494316101, "learning_rate": 3.1528342663221576e-05, "loss": 0.9536, "step": 27810 }, { "epoch": 8.32, "grad_norm": 1.4011954069137573, "learning_rate": 3.152267112661764e-05, "loss": 1.257, "step": 27815 }, { "epoch": 8.32, "grad_norm": 3.3855531215667725, "learning_rate": 3.151699922977336e-05, "loss": 1.1184, "step": 27820 }, { "epoch": 8.32, "grad_norm": 2.566340208053589, "learning_rate": 3.151132697300199e-05, "loss": 1.0047, "step": 27825 }, { "epoch": 8.33, "grad_norm": 2.9394350051879883, "learning_rate": 3.150565435661679e-05, "loss": 1.244, "step": 27830 }, { "epoch": 8.33, "grad_norm": 3.349531412124634, "learning_rate": 3.149998138093107e-05, "loss": 1.1824, "step": 27835 }, { "epoch": 8.33, "grad_norm": 1.7213515043258667, "learning_rate": 3.149430804625812e-05, "loss": 1.1984, "step": 27840 }, { "epoch": 8.33, "grad_norm": 1.7168335914611816, "learning_rate": 3.148863435291129e-05, "loss": 1.1489, "step": 27845 }, { "epoch": 8.33, "grad_norm": 2.286252975463867, "learning_rate": 3.148296030120394e-05, "loss": 1.2274, "step": 27850 }, { "epoch": 8.33, "grad_norm": 1.7521454095840454, "learning_rate": 3.1477285891449434e-05, "loss": 1.1893, "step": 27855 }, { "epoch": 8.34, "grad_norm": 3.142904281616211, "learning_rate": 3.147161112396115e-05, "loss": 1.0052, "step": 27860 }, { "epoch": 8.34, "grad_norm": 2.172449827194214, "learning_rate": 3.1465935999052514e-05, "loss": 1.0957, "step": 27865 }, { "epoch": 8.34, "grad_norm": 2.116454601287842, "learning_rate": 3.146026051703695e-05, "loss": 1.0053, "step": 27870 }, { "epoch": 8.34, "grad_norm": 0.8631601333618164, "learning_rate": 3.145458467822792e-05, "loss": 0.9305, "step": 27875 }, { "epoch": 8.34, "grad_norm": 2.0894715785980225, "learning_rate": 3.144890848293889e-05, "loss": 1.1072, "step": 27880 }, { "epoch": 8.34, "grad_norm": 4.275918006896973, "learning_rate": 3.1443231931483334e-05, "loss": 1.0385, "step": 27885 }, { "epoch": 8.34, "grad_norm": 2.0376479625701904, "learning_rate": 3.143755502417478e-05, "loss": 1.3225, "step": 27890 }, { "epoch": 8.35, "grad_norm": 2.2488062381744385, "learning_rate": 3.143187776132676e-05, "loss": 1.1994, "step": 27895 }, { "epoch": 8.35, "grad_norm": 2.3104982376098633, "learning_rate": 3.1426200143252815e-05, "loss": 1.0693, "step": 27900 }, { "epoch": 8.35, "grad_norm": 2.0975565910339355, "learning_rate": 3.142052217026651e-05, "loss": 1.0613, "step": 27905 }, { "epoch": 8.35, "grad_norm": 3.1652896404266357, "learning_rate": 3.1414843842681455e-05, "loss": 1.1656, "step": 27910 }, { "epoch": 8.35, "grad_norm": 2.0645837783813477, "learning_rate": 3.1409165160811226e-05, "loss": 1.1103, "step": 27915 }, { "epoch": 8.35, "grad_norm": 2.3577146530151367, "learning_rate": 3.140348612496947e-05, "loss": 1.0475, "step": 27920 }, { "epoch": 8.35, "grad_norm": 3.1144940853118896, "learning_rate": 3.1398942641647336e-05, "loss": 1.3145, "step": 27925 }, { "epoch": 8.36, "grad_norm": 2.6503536701202393, "learning_rate": 3.139326296944723e-05, "loss": 1.0987, "step": 27930 }, { "epoch": 8.36, "grad_norm": 1.5339775085449219, "learning_rate": 3.138758294415386e-05, "loss": 1.0772, "step": 27935 }, { "epoch": 8.36, "grad_norm": 2.08730149269104, "learning_rate": 3.138190256608093e-05, "loss": 1.1344, "step": 27940 }, { "epoch": 8.36, "grad_norm": 1.825244426727295, "learning_rate": 3.137622183554215e-05, "loss": 1.2571, "step": 27945 }, { "epoch": 8.36, "grad_norm": 2.94596004486084, "learning_rate": 3.137054075285126e-05, "loss": 1.1669, "step": 27950 }, { "epoch": 8.36, "grad_norm": 1.1492950916290283, "learning_rate": 3.1364859318322025e-05, "loss": 1.2209, "step": 27955 }, { "epoch": 8.37, "grad_norm": 5.7294769287109375, "learning_rate": 3.135917753226823e-05, "loss": 1.0503, "step": 27960 }, { "epoch": 8.37, "grad_norm": 1.8514894247055054, "learning_rate": 3.1353495395003675e-05, "loss": 1.0441, "step": 27965 }, { "epoch": 8.37, "grad_norm": 1.6761661767959595, "learning_rate": 3.134781290684216e-05, "loss": 0.9575, "step": 27970 }, { "epoch": 8.37, "grad_norm": 2.4914565086364746, "learning_rate": 3.134213006809755e-05, "loss": 1.2053, "step": 27975 }, { "epoch": 8.37, "grad_norm": 1.1112005710601807, "learning_rate": 3.133644687908368e-05, "loss": 1.1066, "step": 27980 }, { "epoch": 8.37, "grad_norm": 5.55382776260376, "learning_rate": 3.133076334011443e-05, "loss": 0.8715, "step": 27985 }, { "epoch": 8.37, "grad_norm": 3.3123645782470703, "learning_rate": 3.1325079451503715e-05, "loss": 1.0493, "step": 27990 }, { "epoch": 8.38, "grad_norm": 29.16041374206543, "learning_rate": 3.131939521356543e-05, "loss": 1.0889, "step": 27995 }, { "epoch": 8.38, "grad_norm": 2.6477432250976562, "learning_rate": 3.131371062661351e-05, "loss": 0.995, "step": 28000 }, { "epoch": 8.38, "grad_norm": 1.398253321647644, "learning_rate": 3.130802569096194e-05, "loss": 1.0378, "step": 28005 }, { "epoch": 8.38, "grad_norm": 1.7362818717956543, "learning_rate": 3.130234040692464e-05, "loss": 1.0509, "step": 28010 }, { "epoch": 8.38, "grad_norm": 2.3737027645111084, "learning_rate": 3.129665477481564e-05, "loss": 1.1996, "step": 28015 }, { "epoch": 8.38, "grad_norm": 2.6281936168670654, "learning_rate": 3.129096879494894e-05, "loss": 0.9499, "step": 28020 }, { "epoch": 8.38, "grad_norm": 5.763638019561768, "learning_rate": 3.1285282467638577e-05, "loss": 1.1019, "step": 28025 }, { "epoch": 8.39, "grad_norm": 3.136121988296509, "learning_rate": 3.1279595793198593e-05, "loss": 1.084, "step": 28030 }, { "epoch": 8.39, "grad_norm": 1.8044530153274536, "learning_rate": 3.1273908771943064e-05, "loss": 1.2044, "step": 28035 }, { "epoch": 8.39, "grad_norm": 4.135902404785156, "learning_rate": 3.126822140418607e-05, "loss": 1.086, "step": 28040 }, { "epoch": 8.39, "grad_norm": 2.257347822189331, "learning_rate": 3.1262533690241726e-05, "loss": 1.2456, "step": 28045 }, { "epoch": 8.39, "grad_norm": 3.2046022415161133, "learning_rate": 3.1256845630424144e-05, "loss": 1.214, "step": 28050 }, { "epoch": 8.39, "grad_norm": 3.7938222885131836, "learning_rate": 3.125115722504749e-05, "loss": 0.989, "step": 28055 }, { "epoch": 8.4, "grad_norm": 3.6777851581573486, "learning_rate": 3.124546847442593e-05, "loss": 1.0389, "step": 28060 }, { "epoch": 8.4, "grad_norm": 2.9035661220550537, "learning_rate": 3.123977937887363e-05, "loss": 0.9988, "step": 28065 }, { "epoch": 8.4, "grad_norm": 1.5526783466339111, "learning_rate": 3.1234089938704805e-05, "loss": 1.1668, "step": 28070 }, { "epoch": 8.4, "grad_norm": 3.05960750579834, "learning_rate": 3.122840015423367e-05, "loss": 1.1137, "step": 28075 }, { "epoch": 8.4, "grad_norm": 2.577622413635254, "learning_rate": 3.122271002577446e-05, "loss": 1.0632, "step": 28080 }, { "epoch": 8.4, "grad_norm": 2.9090895652770996, "learning_rate": 3.121701955364146e-05, "loss": 1.1106, "step": 28085 }, { "epoch": 8.4, "grad_norm": 1.0737097263336182, "learning_rate": 3.121132873814892e-05, "loss": 1.1976, "step": 28090 }, { "epoch": 8.41, "grad_norm": 5.77609920501709, "learning_rate": 3.1205637579611154e-05, "loss": 1.2236, "step": 28095 }, { "epoch": 8.41, "grad_norm": 6.517035961151123, "learning_rate": 3.119994607834248e-05, "loss": 0.9589, "step": 28100 }, { "epoch": 8.41, "grad_norm": 3.637007236480713, "learning_rate": 3.1194254234657225e-05, "loss": 1.1054, "step": 28105 }, { "epoch": 8.41, "grad_norm": 5.6374359130859375, "learning_rate": 3.118856204886974e-05, "loss": 1.1144, "step": 28110 }, { "epoch": 8.41, "grad_norm": 1.1041793823242188, "learning_rate": 3.118286952129441e-05, "loss": 1.0831, "step": 28115 }, { "epoch": 8.41, "grad_norm": 4.296421051025391, "learning_rate": 3.117717665224562e-05, "loss": 1.1175, "step": 28120 }, { "epoch": 8.41, "grad_norm": 1.8130741119384766, "learning_rate": 3.117148344203779e-05, "loss": 1.14, "step": 28125 }, { "epoch": 8.42, "grad_norm": 5.4478864669799805, "learning_rate": 3.116578989098534e-05, "loss": 1.1562, "step": 28130 }, { "epoch": 8.42, "grad_norm": 3.172430992126465, "learning_rate": 3.116009599940273e-05, "loss": 1.2238, "step": 28135 }, { "epoch": 8.42, "grad_norm": 1.4552370309829712, "learning_rate": 3.1154401767604415e-05, "loss": 1.2188, "step": 28140 }, { "epoch": 8.42, "grad_norm": 2.109438419342041, "learning_rate": 3.114870719590489e-05, "loss": 1.0155, "step": 28145 }, { "epoch": 8.42, "grad_norm": 1.051721215248108, "learning_rate": 3.114301228461866e-05, "loss": 1.0963, "step": 28150 }, { "epoch": 8.42, "grad_norm": 1.4635205268859863, "learning_rate": 3.1137317034060236e-05, "loss": 1.0117, "step": 28155 }, { "epoch": 8.43, "grad_norm": 2.354686975479126, "learning_rate": 3.113162144454418e-05, "loss": 1.2232, "step": 28160 }, { "epoch": 8.43, "grad_norm": 2.211465358734131, "learning_rate": 3.112592551638505e-05, "loss": 1.1567, "step": 28165 }, { "epoch": 8.43, "grad_norm": 2.628493070602417, "learning_rate": 3.112022924989741e-05, "loss": 1.0378, "step": 28170 }, { "epoch": 8.43, "grad_norm": 11.486804008483887, "learning_rate": 3.111453264539588e-05, "loss": 1.0214, "step": 28175 }, { "epoch": 8.43, "grad_norm": 2.4132378101348877, "learning_rate": 3.110883570319507e-05, "loss": 1.265, "step": 28180 }, { "epoch": 8.43, "grad_norm": 3.6286280155181885, "learning_rate": 3.11031384236096e-05, "loss": 1.031, "step": 28185 }, { "epoch": 8.43, "grad_norm": 2.399932622909546, "learning_rate": 3.109744080695415e-05, "loss": 1.0519, "step": 28190 }, { "epoch": 8.44, "grad_norm": 2.9167540073394775, "learning_rate": 3.109174285354338e-05, "loss": 1.1285, "step": 28195 }, { "epoch": 8.44, "grad_norm": 2.6910743713378906, "learning_rate": 3.1086044563691984e-05, "loss": 1.2128, "step": 28200 }, { "epoch": 8.44, "grad_norm": 2.2151172161102295, "learning_rate": 3.108034593771467e-05, "loss": 1.1706, "step": 28205 }, { "epoch": 8.44, "grad_norm": 4.152978897094727, "learning_rate": 3.1074646975926176e-05, "loss": 1.1396, "step": 28210 }, { "epoch": 8.44, "grad_norm": 4.019749164581299, "learning_rate": 3.106894767864124e-05, "loss": 1.1841, "step": 28215 }, { "epoch": 8.44, "grad_norm": 2.265693187713623, "learning_rate": 3.106324804617463e-05, "loss": 1.1932, "step": 28220 }, { "epoch": 8.44, "grad_norm": 2.9576539993286133, "learning_rate": 3.105754807884113e-05, "loss": 1.1587, "step": 28225 }, { "epoch": 8.45, "grad_norm": 3.3422529697418213, "learning_rate": 3.105184777695555e-05, "loss": 1.1099, "step": 28230 }, { "epoch": 8.45, "grad_norm": 3.2703285217285156, "learning_rate": 3.104614714083271e-05, "loss": 1.0236, "step": 28235 }, { "epoch": 8.45, "grad_norm": 2.098249912261963, "learning_rate": 3.1040446170787444e-05, "loss": 1.0275, "step": 28240 }, { "epoch": 8.45, "grad_norm": 3.4206290245056152, "learning_rate": 3.103474486713462e-05, "loss": 0.9629, "step": 28245 }, { "epoch": 8.45, "grad_norm": 3.716006278991699, "learning_rate": 3.1029043230189106e-05, "loss": 1.0353, "step": 28250 }, { "epoch": 8.45, "grad_norm": 1.844235897064209, "learning_rate": 3.10233412602658e-05, "loss": 1.1328, "step": 28255 }, { "epoch": 8.46, "grad_norm": 3.6148314476013184, "learning_rate": 3.101763895767962e-05, "loss": 1.0627, "step": 28260 }, { "epoch": 8.46, "grad_norm": 2.58854603767395, "learning_rate": 3.101193632274549e-05, "loss": 1.0033, "step": 28265 }, { "epoch": 8.46, "grad_norm": 1.9010263681411743, "learning_rate": 3.100623335577837e-05, "loss": 1.1728, "step": 28270 }, { "epoch": 8.46, "grad_norm": 3.03183913230896, "learning_rate": 3.100053005709323e-05, "loss": 1.133, "step": 28275 }, { "epoch": 8.46, "grad_norm": 2.4932944774627686, "learning_rate": 3.0994826427005044e-05, "loss": 1.1185, "step": 28280 }, { "epoch": 8.46, "grad_norm": 3.512427568435669, "learning_rate": 3.098912246582884e-05, "loss": 1.0649, "step": 28285 }, { "epoch": 8.46, "grad_norm": 1.8340367078781128, "learning_rate": 3.098341817387961e-05, "loss": 1.048, "step": 28290 }, { "epoch": 8.47, "grad_norm": 2.6581850051879883, "learning_rate": 3.0977713551472424e-05, "loss": 1.1243, "step": 28295 }, { "epoch": 8.47, "grad_norm": 2.2674968242645264, "learning_rate": 3.097200859892232e-05, "loss": 1.0713, "step": 28300 }, { "epoch": 8.47, "grad_norm": 2.0417089462280273, "learning_rate": 3.09663033165444e-05, "loss": 1.142, "step": 28305 }, { "epoch": 8.47, "grad_norm": 3.8348848819732666, "learning_rate": 3.096059770465375e-05, "loss": 1.0195, "step": 28310 }, { "epoch": 8.47, "grad_norm": 1.9417195320129395, "learning_rate": 3.095489176356548e-05, "loss": 1.1107, "step": 28315 }, { "epoch": 8.47, "grad_norm": 3.2145209312438965, "learning_rate": 3.094918549359473e-05, "loss": 1.1599, "step": 28320 }, { "epoch": 8.47, "grad_norm": 1.5097711086273193, "learning_rate": 3.0943478895056645e-05, "loss": 1.1153, "step": 28325 }, { "epoch": 8.48, "grad_norm": 1.1623693704605103, "learning_rate": 3.09377719682664e-05, "loss": 1.0584, "step": 28330 }, { "epoch": 8.48, "grad_norm": 3.9560139179229736, "learning_rate": 3.093206471353918e-05, "loss": 1.0023, "step": 28335 }, { "epoch": 8.48, "grad_norm": 2.495631217956543, "learning_rate": 3.0926357131190196e-05, "loss": 0.9913, "step": 28340 }, { "epoch": 8.48, "grad_norm": 1.4161936044692993, "learning_rate": 3.092064922153466e-05, "loss": 1.1667, "step": 28345 }, { "epoch": 8.48, "grad_norm": 2.1678714752197266, "learning_rate": 3.091494098488783e-05, "loss": 1.1517, "step": 28350 }, { "epoch": 8.48, "grad_norm": 1.4280831813812256, "learning_rate": 3.090923242156496e-05, "loss": 1.1162, "step": 28355 }, { "epoch": 8.48, "grad_norm": 5.203249931335449, "learning_rate": 3.0903523531881325e-05, "loss": 1.0205, "step": 28360 }, { "epoch": 8.49, "grad_norm": 2.362901449203491, "learning_rate": 3.0897814316152214e-05, "loss": 1.095, "step": 28365 }, { "epoch": 8.49, "grad_norm": 1.590612769126892, "learning_rate": 3.089210477469295e-05, "loss": 1.0452, "step": 28370 }, { "epoch": 8.49, "grad_norm": 1.0684711933135986, "learning_rate": 3.0886394907818864e-05, "loss": 1.0676, "step": 28375 }, { "epoch": 8.49, "grad_norm": 2.432785749435425, "learning_rate": 3.088068471584531e-05, "loss": 1.2619, "step": 28380 }, { "epoch": 8.49, "grad_norm": 4.381847381591797, "learning_rate": 3.0874974199087654e-05, "loss": 0.9273, "step": 28385 }, { "epoch": 8.49, "grad_norm": 2.7747817039489746, "learning_rate": 3.086926335786128e-05, "loss": 1.1676, "step": 28390 }, { "epoch": 8.5, "grad_norm": 2.8812899589538574, "learning_rate": 3.086355219248158e-05, "loss": 1.0638, "step": 28395 }, { "epoch": 8.5, "grad_norm": 3.0816097259521484, "learning_rate": 3.0857840703263996e-05, "loss": 0.9701, "step": 28400 }, { "epoch": 8.5, "grad_norm": 3.1254990100860596, "learning_rate": 3.0852128890523954e-05, "loss": 1.2058, "step": 28405 }, { "epoch": 8.5, "grad_norm": 3.639291286468506, "learning_rate": 3.084641675457692e-05, "loss": 1.1903, "step": 28410 }, { "epoch": 8.5, "grad_norm": 12.763455390930176, "learning_rate": 3.0840704295738364e-05, "loss": 0.9558, "step": 28415 }, { "epoch": 8.5, "grad_norm": 2.450136423110962, "learning_rate": 3.083499151432378e-05, "loss": 1.1158, "step": 28420 }, { "epoch": 8.5, "grad_norm": 2.2202651500701904, "learning_rate": 3.082927841064869e-05, "loss": 1.1919, "step": 28425 }, { "epoch": 8.51, "grad_norm": 5.485016822814941, "learning_rate": 3.0823564985028596e-05, "loss": 1.1658, "step": 28430 }, { "epoch": 8.51, "grad_norm": 3.856153964996338, "learning_rate": 3.081785123777907e-05, "loss": 0.9867, "step": 28435 }, { "epoch": 8.51, "grad_norm": 2.2953991889953613, "learning_rate": 3.081213716921567e-05, "loss": 1.1779, "step": 28440 }, { "epoch": 8.51, "grad_norm": 2.756497859954834, "learning_rate": 3.0806422779653974e-05, "loss": 1.1818, "step": 28445 }, { "epoch": 8.51, "grad_norm": 2.887840986251831, "learning_rate": 3.080070806940958e-05, "loss": 1.0941, "step": 28450 }, { "epoch": 8.51, "grad_norm": 1.7637460231781006, "learning_rate": 3.0794993038798114e-05, "loss": 1.1327, "step": 28455 }, { "epoch": 8.51, "grad_norm": 2.2904860973358154, "learning_rate": 3.07892776881352e-05, "loss": 1.2831, "step": 28460 }, { "epoch": 8.52, "grad_norm": 1.0153388977050781, "learning_rate": 3.07835620177365e-05, "loss": 1.0142, "step": 28465 }, { "epoch": 8.52, "grad_norm": 1.7611110210418701, "learning_rate": 3.077784602791768e-05, "loss": 1.2258, "step": 28470 }, { "epoch": 8.52, "grad_norm": 2.403919219970703, "learning_rate": 3.077212971899443e-05, "loss": 0.9231, "step": 28475 }, { "epoch": 8.52, "grad_norm": 4.131344318389893, "learning_rate": 3.076641309128245e-05, "loss": 1.0783, "step": 28480 }, { "epoch": 8.52, "grad_norm": 3.618623971939087, "learning_rate": 3.0760696145097477e-05, "loss": 1.2069, "step": 28485 }, { "epoch": 8.52, "grad_norm": 2.4561777114868164, "learning_rate": 3.0754978880755246e-05, "loss": 1.0412, "step": 28490 }, { "epoch": 8.53, "grad_norm": 2.350400924682617, "learning_rate": 3.074926129857151e-05, "loss": 1.1583, "step": 28495 }, { "epoch": 8.53, "grad_norm": 1.5594244003295898, "learning_rate": 3.074354339886204e-05, "loss": 1.2653, "step": 28500 }, { "epoch": 8.53, "grad_norm": 1.28977370262146, "learning_rate": 3.073782518194265e-05, "loss": 1.1303, "step": 28505 }, { "epoch": 8.53, "grad_norm": 1.747999668121338, "learning_rate": 3.073210664812913e-05, "loss": 1.0107, "step": 28510 }, { "epoch": 8.53, "grad_norm": 1.435805082321167, "learning_rate": 3.072638779773732e-05, "loss": 0.9404, "step": 28515 }, { "epoch": 8.53, "grad_norm": 2.7023794651031494, "learning_rate": 3.0720668631083074e-05, "loss": 1.1618, "step": 28520 }, { "epoch": 8.53, "grad_norm": 2.3820207118988037, "learning_rate": 3.071494914848224e-05, "loss": 1.2369, "step": 28525 }, { "epoch": 8.54, "grad_norm": 2.2789406776428223, "learning_rate": 3.07092293502507e-05, "loss": 1.0973, "step": 28530 }, { "epoch": 8.54, "grad_norm": 1.514122724533081, "learning_rate": 3.0703509236704366e-05, "loss": 1.0835, "step": 28535 }, { "epoch": 8.54, "grad_norm": 2.0251221656799316, "learning_rate": 3.069778880815914e-05, "loss": 1.1254, "step": 28540 }, { "epoch": 8.54, "grad_norm": 4.081529140472412, "learning_rate": 3.069206806493095e-05, "loss": 1.2861, "step": 28545 }, { "epoch": 8.54, "grad_norm": 1.8973231315612793, "learning_rate": 3.068634700733577e-05, "loss": 1.122, "step": 28550 }, { "epoch": 8.54, "grad_norm": 1.6018325090408325, "learning_rate": 3.068062563568956e-05, "loss": 1.0091, "step": 28555 }, { "epoch": 8.54, "grad_norm": 2.3443856239318848, "learning_rate": 3.0674903950308295e-05, "loss": 1.1682, "step": 28560 }, { "epoch": 8.55, "grad_norm": 2.2536654472351074, "learning_rate": 3.0669181951507986e-05, "loss": 1.0887, "step": 28565 }, { "epoch": 8.55, "grad_norm": 2.6237266063690186, "learning_rate": 3.0663459639604645e-05, "loss": 1.0353, "step": 28570 }, { "epoch": 8.55, "grad_norm": 2.24894118309021, "learning_rate": 3.065773701491432e-05, "loss": 1.2106, "step": 28575 }, { "epoch": 8.55, "grad_norm": 2.0882694721221924, "learning_rate": 3.065201407775306e-05, "loss": 1.1984, "step": 28580 }, { "epoch": 8.55, "grad_norm": 2.8938817977905273, "learning_rate": 3.064629082843693e-05, "loss": 1.0492, "step": 28585 }, { "epoch": 8.55, "grad_norm": 2.2271273136138916, "learning_rate": 3.064056726728204e-05, "loss": 1.165, "step": 28590 }, { "epoch": 8.56, "grad_norm": 5.680057525634766, "learning_rate": 3.063484339460447e-05, "loss": 1.1335, "step": 28595 }, { "epoch": 8.56, "grad_norm": 4.835309982299805, "learning_rate": 3.0629119210720364e-05, "loss": 1.0781, "step": 28600 }, { "epoch": 8.56, "grad_norm": 1.8358601331710815, "learning_rate": 3.062339471594585e-05, "loss": 1.0802, "step": 28605 }, { "epoch": 8.56, "grad_norm": 2.384139060974121, "learning_rate": 3.061766991059709e-05, "loss": 1.0434, "step": 28610 }, { "epoch": 8.56, "grad_norm": 1.7726037502288818, "learning_rate": 3.0611944794990265e-05, "loss": 0.9551, "step": 28615 }, { "epoch": 8.56, "grad_norm": 3.578003168106079, "learning_rate": 3.060621936944157e-05, "loss": 0.9481, "step": 28620 }, { "epoch": 8.56, "grad_norm": 2.8826417922973633, "learning_rate": 3.0600493634267196e-05, "loss": 1.1564, "step": 28625 }, { "epoch": 8.57, "grad_norm": 2.272235870361328, "learning_rate": 3.059476758978338e-05, "loss": 0.8952, "step": 28630 }, { "epoch": 8.57, "grad_norm": 2.82199764251709, "learning_rate": 3.058904123630636e-05, "loss": 1.1897, "step": 28635 }, { "epoch": 8.57, "grad_norm": 1.5297337770462036, "learning_rate": 3.0583314574152414e-05, "loss": 1.1364, "step": 28640 }, { "epoch": 8.57, "grad_norm": 2.4493231773376465, "learning_rate": 3.05775876036378e-05, "loss": 1.0275, "step": 28645 }, { "epoch": 8.57, "grad_norm": 4.320399761199951, "learning_rate": 3.057186032507883e-05, "loss": 1.205, "step": 28650 }, { "epoch": 8.57, "grad_norm": 4.2380595207214355, "learning_rate": 3.05661327387918e-05, "loss": 1.0884, "step": 28655 }, { "epoch": 8.57, "grad_norm": 3.2652807235717773, "learning_rate": 3.056040484509304e-05, "loss": 0.9676, "step": 28660 }, { "epoch": 8.58, "grad_norm": 2.164764404296875, "learning_rate": 3.0554676644298906e-05, "loss": 1.1527, "step": 28665 }, { "epoch": 8.58, "grad_norm": 3.5430350303649902, "learning_rate": 3.0548948136725754e-05, "loss": 1.285, "step": 28670 }, { "epoch": 8.58, "grad_norm": 2.4227631092071533, "learning_rate": 3.0543219322689955e-05, "loss": 1.0471, "step": 28675 }, { "epoch": 8.58, "grad_norm": 2.0381109714508057, "learning_rate": 3.053749020250792e-05, "loss": 1.0022, "step": 28680 }, { "epoch": 8.58, "grad_norm": 1.9876576662063599, "learning_rate": 3.0531760776496064e-05, "loss": 1.0601, "step": 28685 }, { "epoch": 8.58, "grad_norm": 2.803956985473633, "learning_rate": 3.0526031044970806e-05, "loss": 1.1354, "step": 28690 }, { "epoch": 8.59, "grad_norm": 1.6594746112823486, "learning_rate": 3.05203010082486e-05, "loss": 1.1666, "step": 28695 }, { "epoch": 8.59, "grad_norm": 2.4495763778686523, "learning_rate": 3.0514570666645896e-05, "loss": 1.2394, "step": 28700 }, { "epoch": 8.59, "grad_norm": 0.8862543106079102, "learning_rate": 3.0508840020479194e-05, "loss": 1.1907, "step": 28705 }, { "epoch": 8.59, "grad_norm": 3.8315329551696777, "learning_rate": 3.0503109070064984e-05, "loss": 1.026, "step": 28710 }, { "epoch": 8.59, "grad_norm": 6.697932243347168, "learning_rate": 3.0497377815719787e-05, "loss": 1.143, "step": 28715 }, { "epoch": 8.59, "grad_norm": 2.7949328422546387, "learning_rate": 3.049164625776012e-05, "loss": 1.0389, "step": 28720 }, { "epoch": 8.59, "grad_norm": 3.456163167953491, "learning_rate": 3.048591439650254e-05, "loss": 0.9912, "step": 28725 }, { "epoch": 8.6, "grad_norm": 4.755772113800049, "learning_rate": 3.048018223226361e-05, "loss": 1.0972, "step": 28730 }, { "epoch": 8.6, "grad_norm": 1.6794426441192627, "learning_rate": 3.0474449765359908e-05, "loss": 1.0897, "step": 28735 }, { "epoch": 8.6, "grad_norm": 1.9840363264083862, "learning_rate": 3.0468716996108038e-05, "loss": 1.0486, "step": 28740 }, { "epoch": 8.6, "grad_norm": 5.026776313781738, "learning_rate": 3.046298392482462e-05, "loss": 1.0958, "step": 28745 }, { "epoch": 8.6, "grad_norm": 3.4160451889038086, "learning_rate": 3.0457250551826272e-05, "loss": 1.1688, "step": 28750 }, { "epoch": 8.6, "grad_norm": 5.9285054206848145, "learning_rate": 3.0451516877429648e-05, "loss": 1.1551, "step": 28755 }, { "epoch": 8.6, "grad_norm": 2.421475410461426, "learning_rate": 3.044578290195141e-05, "loss": 1.3426, "step": 28760 }, { "epoch": 8.61, "grad_norm": 5.284264087677002, "learning_rate": 3.0440048625708244e-05, "loss": 1.22, "step": 28765 }, { "epoch": 8.61, "grad_norm": 1.7585577964782715, "learning_rate": 3.0434314049016854e-05, "loss": 1.3052, "step": 28770 }, { "epoch": 8.61, "grad_norm": 2.2892251014709473, "learning_rate": 3.042857917219394e-05, "loss": 1.195, "step": 28775 }, { "epoch": 8.61, "grad_norm": 2.7813527584075928, "learning_rate": 3.0422843995556245e-05, "loss": 1.1471, "step": 28780 }, { "epoch": 8.61, "grad_norm": 4.371838569641113, "learning_rate": 3.041710851942051e-05, "loss": 1.1609, "step": 28785 }, { "epoch": 8.61, "grad_norm": 5.511404991149902, "learning_rate": 3.0411372744103504e-05, "loss": 0.9399, "step": 28790 }, { "epoch": 8.62, "grad_norm": 6.196451663970947, "learning_rate": 3.0405636669922004e-05, "loss": 0.9446, "step": 28795 }, { "epoch": 8.62, "grad_norm": 1.4433752298355103, "learning_rate": 3.0399900297192812e-05, "loss": 1.1955, "step": 28800 }, { "epoch": 8.62, "grad_norm": 2.80629563331604, "learning_rate": 3.0394163626232742e-05, "loss": 1.2015, "step": 28805 }, { "epoch": 8.62, "grad_norm": 3.159404754638672, "learning_rate": 3.0388426657358628e-05, "loss": 1.0804, "step": 28810 }, { "epoch": 8.62, "grad_norm": 1.2445366382598877, "learning_rate": 3.0382689390887297e-05, "loss": 1.2395, "step": 28815 }, { "epoch": 8.62, "grad_norm": 3.2405431270599365, "learning_rate": 3.0376951827135632e-05, "loss": 1.0396, "step": 28820 }, { "epoch": 8.62, "grad_norm": 3.0723726749420166, "learning_rate": 3.0371213966420503e-05, "loss": 1.2086, "step": 28825 }, { "epoch": 8.63, "grad_norm": 2.3256523609161377, "learning_rate": 3.0365475809058814e-05, "loss": 1.2707, "step": 28830 }, { "epoch": 8.63, "grad_norm": 2.9459950923919678, "learning_rate": 3.0359737355367467e-05, "loss": 1.1848, "step": 28835 }, { "epoch": 8.63, "grad_norm": 2.410904884338379, "learning_rate": 3.0353998605663403e-05, "loss": 1.0366, "step": 28840 }, { "epoch": 8.63, "grad_norm": 4.633788585662842, "learning_rate": 3.0348259560263563e-05, "loss": 1.0301, "step": 28845 }, { "epoch": 8.63, "grad_norm": 2.1768550872802734, "learning_rate": 3.0342520219484903e-05, "loss": 1.2337, "step": 28850 }, { "epoch": 8.63, "grad_norm": 1.6011451482772827, "learning_rate": 3.033678058364441e-05, "loss": 1.1315, "step": 28855 }, { "epoch": 8.63, "grad_norm": 1.7358301877975464, "learning_rate": 3.0331040653059063e-05, "loss": 1.2123, "step": 28860 }, { "epoch": 8.64, "grad_norm": 2.091358184814453, "learning_rate": 3.0325300428045883e-05, "loss": 1.07, "step": 28865 }, { "epoch": 8.64, "grad_norm": 2.0123684406280518, "learning_rate": 3.0319559908921895e-05, "loss": 1.1838, "step": 28870 }, { "epoch": 8.64, "grad_norm": 2.712315082550049, "learning_rate": 3.0313819096004154e-05, "loss": 1.2163, "step": 28875 }, { "epoch": 8.64, "grad_norm": 2.8222601413726807, "learning_rate": 3.03080779896097e-05, "loss": 1.1945, "step": 28880 }, { "epoch": 8.64, "grad_norm": 4.944241046905518, "learning_rate": 3.0302336590055617e-05, "loss": 0.891, "step": 28885 }, { "epoch": 8.64, "grad_norm": 2.032339572906494, "learning_rate": 3.0296594897658993e-05, "loss": 1.2057, "step": 28890 }, { "epoch": 8.65, "grad_norm": 2.003133773803711, "learning_rate": 3.0290852912736944e-05, "loss": 1.052, "step": 28895 }, { "epoch": 8.65, "grad_norm": 2.7884342670440674, "learning_rate": 3.0285110635606585e-05, "loss": 1.1927, "step": 28900 }, { "epoch": 8.65, "grad_norm": 2.4504990577697754, "learning_rate": 3.0279368066585056e-05, "loss": 1.0378, "step": 28905 }, { "epoch": 8.65, "grad_norm": 6.284214496612549, "learning_rate": 3.0273625205989525e-05, "loss": 1.1471, "step": 28910 }, { "epoch": 8.65, "grad_norm": 12.456482887268066, "learning_rate": 3.0267882054137148e-05, "loss": 1.0668, "step": 28915 }, { "epoch": 8.65, "grad_norm": 2.481710195541382, "learning_rate": 3.026213861134512e-05, "loss": 1.2337, "step": 28920 }, { "epoch": 8.65, "grad_norm": 7.710996150970459, "learning_rate": 3.025639487793065e-05, "loss": 0.9881, "step": 28925 }, { "epoch": 8.66, "grad_norm": 2.2037603855133057, "learning_rate": 3.0250650854210953e-05, "loss": 1.1093, "step": 28930 }, { "epoch": 8.66, "grad_norm": 2.117426872253418, "learning_rate": 3.0244906540503266e-05, "loss": 1.1326, "step": 28935 }, { "epoch": 8.66, "grad_norm": 3.677184581756592, "learning_rate": 3.023916193712485e-05, "loss": 1.1629, "step": 28940 }, { "epoch": 8.66, "grad_norm": 3.842292070388794, "learning_rate": 3.0233417044392953e-05, "loss": 1.0908, "step": 28945 }, { "epoch": 8.66, "grad_norm": 2.213963508605957, "learning_rate": 3.0227671862624878e-05, "loss": 1.166, "step": 28950 }, { "epoch": 8.66, "grad_norm": 0.9696227312088013, "learning_rate": 3.0221926392137922e-05, "loss": 1.1106, "step": 28955 }, { "epoch": 8.66, "grad_norm": 1.315886378288269, "learning_rate": 3.0216180633249396e-05, "loss": 1.1739, "step": 28960 }, { "epoch": 8.67, "grad_norm": 4.016354560852051, "learning_rate": 3.0210434586276637e-05, "loss": 0.9315, "step": 28965 }, { "epoch": 8.67, "grad_norm": 4.51848030090332, "learning_rate": 3.0204688251536994e-05, "loss": 0.9756, "step": 28970 }, { "epoch": 8.67, "grad_norm": 3.8415751457214355, "learning_rate": 3.0198941629347833e-05, "loss": 1.0718, "step": 28975 }, { "epoch": 8.67, "grad_norm": 3.0675783157348633, "learning_rate": 3.0193194720026524e-05, "loss": 1.2526, "step": 28980 }, { "epoch": 8.67, "grad_norm": 1.8814668655395508, "learning_rate": 3.0187447523890468e-05, "loss": 1.1403, "step": 28985 }, { "epoch": 8.67, "grad_norm": 3.3150155544281006, "learning_rate": 3.0181700041257077e-05, "loss": 1.0186, "step": 28990 }, { "epoch": 8.67, "grad_norm": 3.6737823486328125, "learning_rate": 3.017595227244378e-05, "loss": 1.0857, "step": 28995 }, { "epoch": 8.68, "grad_norm": 1.9824943542480469, "learning_rate": 3.0170204217768023e-05, "loss": 1.2406, "step": 29000 }, { "epoch": 8.68, "grad_norm": 1.4903932809829712, "learning_rate": 3.016445587754726e-05, "loss": 1.3973, "step": 29005 }, { "epoch": 8.68, "grad_norm": 3.9678359031677246, "learning_rate": 3.0158707252098966e-05, "loss": 1.1738, "step": 29010 }, { "epoch": 8.68, "grad_norm": 3.8614068031311035, "learning_rate": 3.015295834174063e-05, "loss": 1.123, "step": 29015 }, { "epoch": 8.68, "grad_norm": 4.1375555992126465, "learning_rate": 3.0147209146789762e-05, "loss": 0.9881, "step": 29020 }, { "epoch": 8.68, "grad_norm": 3.8305399417877197, "learning_rate": 3.014145966756388e-05, "loss": 0.8714, "step": 29025 }, { "epoch": 8.69, "grad_norm": 2.49973464012146, "learning_rate": 3.013570990438053e-05, "loss": 1.0092, "step": 29030 }, { "epoch": 8.69, "grad_norm": 6.754157543182373, "learning_rate": 3.0129959857557256e-05, "loss": 1.1094, "step": 29035 }, { "epoch": 8.69, "grad_norm": 3.1229982376098633, "learning_rate": 3.0124209527411634e-05, "loss": 1.0827, "step": 29040 }, { "epoch": 8.69, "grad_norm": 5.443647384643555, "learning_rate": 3.0118458914261242e-05, "loss": 1.0179, "step": 29045 }, { "epoch": 8.69, "grad_norm": 3.6279354095458984, "learning_rate": 3.011270801842369e-05, "loss": 1.0837, "step": 29050 }, { "epoch": 8.69, "grad_norm": 1.7841243743896484, "learning_rate": 3.0106956840216586e-05, "loss": 1.2112, "step": 29055 }, { "epoch": 8.69, "grad_norm": 1.7607803344726562, "learning_rate": 3.0101205379957563e-05, "loss": 1.2424, "step": 29060 }, { "epoch": 8.7, "grad_norm": 1.5843974351882935, "learning_rate": 3.0095453637964272e-05, "loss": 1.0776, "step": 29065 }, { "epoch": 8.7, "grad_norm": 12.485400199890137, "learning_rate": 3.0089701614554377e-05, "loss": 1.0256, "step": 29070 }, { "epoch": 8.7, "grad_norm": 1.710533618927002, "learning_rate": 3.008394931004555e-05, "loss": 1.1107, "step": 29075 }, { "epoch": 8.7, "grad_norm": 1.6686811447143555, "learning_rate": 3.007819672475548e-05, "loss": 1.1631, "step": 29080 }, { "epoch": 8.7, "grad_norm": 4.799095630645752, "learning_rate": 3.0072443859001893e-05, "loss": 1.1524, "step": 29085 }, { "epoch": 8.7, "grad_norm": 3.90974497795105, "learning_rate": 3.0066690713102497e-05, "loss": 1.0848, "step": 29090 }, { "epoch": 8.7, "grad_norm": 2.303661346435547, "learning_rate": 3.006093728737504e-05, "loss": 1.0947, "step": 29095 }, { "epoch": 8.71, "grad_norm": 1.7615519762039185, "learning_rate": 3.005518358213728e-05, "loss": 0.9884, "step": 29100 }, { "epoch": 8.71, "grad_norm": 3.437652826309204, "learning_rate": 3.0049429597706987e-05, "loss": 0.9993, "step": 29105 }, { "epoch": 8.71, "grad_norm": 1.8464926481246948, "learning_rate": 3.0043675334401943e-05, "loss": 1.3269, "step": 29110 }, { "epoch": 8.71, "grad_norm": 2.6147725582122803, "learning_rate": 3.0037920792539954e-05, "loss": 1.1523, "step": 29115 }, { "epoch": 8.71, "grad_norm": 1.6204800605773926, "learning_rate": 3.003216597243883e-05, "loss": 1.0221, "step": 29120 }, { "epoch": 8.71, "grad_norm": 2.307574510574341, "learning_rate": 3.0026410874416416e-05, "loss": 1.1662, "step": 29125 }, { "epoch": 8.72, "grad_norm": 1.2338721752166748, "learning_rate": 3.0020655498790552e-05, "loss": 1.1687, "step": 29130 }, { "epoch": 8.72, "grad_norm": 1.9817173480987549, "learning_rate": 3.001489984587911e-05, "loss": 1.1231, "step": 29135 }, { "epoch": 8.72, "grad_norm": 2.5606281757354736, "learning_rate": 3.000914391599995e-05, "loss": 1.1578, "step": 29140 }, { "epoch": 8.72, "grad_norm": 2.77673602104187, "learning_rate": 3.0003387709470988e-05, "loss": 1.1036, "step": 29145 }, { "epoch": 8.72, "grad_norm": 1.4961824417114258, "learning_rate": 2.9997631226610116e-05, "loss": 1.197, "step": 29150 }, { "epoch": 8.72, "grad_norm": 4.214254379272461, "learning_rate": 2.9991874467735272e-05, "loss": 1.1423, "step": 29155 }, { "epoch": 8.72, "grad_norm": 4.497615337371826, "learning_rate": 2.998611743316439e-05, "loss": 1.1546, "step": 29160 }, { "epoch": 8.73, "grad_norm": 1.615196943283081, "learning_rate": 2.998036012321543e-05, "loss": 1.2521, "step": 29165 }, { "epoch": 8.73, "grad_norm": 1.5576528310775757, "learning_rate": 2.997460253820635e-05, "loss": 1.2029, "step": 29170 }, { "epoch": 8.73, "grad_norm": 1.751317024230957, "learning_rate": 2.996884467845514e-05, "loss": 1.2419, "step": 29175 }, { "epoch": 8.73, "grad_norm": 4.62571907043457, "learning_rate": 2.9963086544279807e-05, "loss": 1.3243, "step": 29180 }, { "epoch": 8.73, "grad_norm": 3.067814826965332, "learning_rate": 2.9957328135998365e-05, "loss": 1.1107, "step": 29185 }, { "epoch": 8.73, "grad_norm": 5.224938869476318, "learning_rate": 2.9951569453928834e-05, "loss": 1.2911, "step": 29190 }, { "epoch": 8.73, "grad_norm": 3.1084907054901123, "learning_rate": 2.9945810498389275e-05, "loss": 1.0711, "step": 29195 }, { "epoch": 8.74, "grad_norm": 1.8605875968933105, "learning_rate": 2.994005126969775e-05, "loss": 1.1007, "step": 29200 }, { "epoch": 8.74, "grad_norm": 1.8477222919464111, "learning_rate": 2.9934291768172324e-05, "loss": 1.1518, "step": 29205 }, { "epoch": 8.74, "grad_norm": 2.743089199066162, "learning_rate": 2.9928531994131086e-05, "loss": 1.1243, "step": 29210 }, { "epoch": 8.74, "grad_norm": 3.1125733852386475, "learning_rate": 2.9922771947892154e-05, "loss": 1.2135, "step": 29215 }, { "epoch": 8.74, "grad_norm": 2.2629575729370117, "learning_rate": 2.9917011629773643e-05, "loss": 1.1818, "step": 29220 }, { "epoch": 8.74, "grad_norm": 3.8765952587127686, "learning_rate": 2.991125104009369e-05, "loss": 1.2061, "step": 29225 }, { "epoch": 8.75, "grad_norm": 2.9777157306671143, "learning_rate": 2.9905490179170446e-05, "loss": 1.2431, "step": 29230 }, { "epoch": 8.75, "grad_norm": 1.9893121719360352, "learning_rate": 2.9899729047322085e-05, "loss": 1.066, "step": 29235 }, { "epoch": 8.75, "grad_norm": 2.7022969722747803, "learning_rate": 2.989396764486677e-05, "loss": 1.1172, "step": 29240 }, { "epoch": 8.75, "grad_norm": 3.0575449466705322, "learning_rate": 2.988820597212272e-05, "loss": 1.0959, "step": 29245 }, { "epoch": 8.75, "grad_norm": 1.885754942893982, "learning_rate": 2.988244402940813e-05, "loss": 1.1691, "step": 29250 }, { "epoch": 8.75, "grad_norm": 1.6529167890548706, "learning_rate": 2.9876681817041235e-05, "loss": 1.2623, "step": 29255 }, { "epoch": 8.75, "grad_norm": 1.5923422574996948, "learning_rate": 2.987091933534027e-05, "loss": 1.2028, "step": 29260 }, { "epoch": 8.76, "grad_norm": 1.1636615991592407, "learning_rate": 2.9865156584623495e-05, "loss": 0.9298, "step": 29265 }, { "epoch": 8.76, "grad_norm": 2.3403260707855225, "learning_rate": 2.9859393565209177e-05, "loss": 1.1567, "step": 29270 }, { "epoch": 8.76, "grad_norm": 3.294311761856079, "learning_rate": 2.98536302774156e-05, "loss": 0.9932, "step": 29275 }, { "epoch": 8.76, "grad_norm": 1.9269850254058838, "learning_rate": 2.9847866721561075e-05, "loss": 1.2663, "step": 29280 }, { "epoch": 8.76, "grad_norm": 4.520174026489258, "learning_rate": 2.9842102897963902e-05, "loss": 1.0276, "step": 29285 }, { "epoch": 8.76, "grad_norm": 1.383296012878418, "learning_rate": 2.9836338806942425e-05, "loss": 1.1108, "step": 29290 }, { "epoch": 8.76, "grad_norm": 2.5761561393737793, "learning_rate": 2.9830574448814984e-05, "loss": 1.1263, "step": 29295 }, { "epoch": 8.77, "grad_norm": 2.923858404159546, "learning_rate": 2.9824809823899936e-05, "loss": 1.1611, "step": 29300 }, { "epoch": 8.77, "grad_norm": 1.9109852313995361, "learning_rate": 2.9819044932515655e-05, "loss": 1.0511, "step": 29305 }, { "epoch": 8.77, "grad_norm": 2.4346346855163574, "learning_rate": 2.9813279774980524e-05, "loss": 1.1955, "step": 29310 }, { "epoch": 8.77, "grad_norm": 3.335416078567505, "learning_rate": 2.9807514351612965e-05, "loss": 1.086, "step": 29315 }, { "epoch": 8.77, "grad_norm": 3.1424591541290283, "learning_rate": 2.9801748662731376e-05, "loss": 1.2607, "step": 29320 }, { "epoch": 8.77, "grad_norm": 3.5147266387939453, "learning_rate": 2.9795982708654206e-05, "loss": 1.0876, "step": 29325 }, { "epoch": 8.78, "grad_norm": 3.8521475791931152, "learning_rate": 2.97902164896999e-05, "loss": 1.1177, "step": 29330 }, { "epoch": 8.78, "grad_norm": 4.234015464782715, "learning_rate": 2.9784450006186914e-05, "loss": 1.0803, "step": 29335 }, { "epoch": 8.78, "grad_norm": 1.5537238121032715, "learning_rate": 2.9778683258433727e-05, "loss": 1.1356, "step": 29340 }, { "epoch": 8.78, "grad_norm": 3.8383750915527344, "learning_rate": 2.9772916246758825e-05, "loss": 1.0342, "step": 29345 }, { "epoch": 8.78, "grad_norm": 2.1243529319763184, "learning_rate": 2.9767148971480725e-05, "loss": 1.0873, "step": 29350 }, { "epoch": 8.78, "grad_norm": 4.195858478546143, "learning_rate": 2.976138143291794e-05, "loss": 1.1394, "step": 29355 }, { "epoch": 8.78, "grad_norm": 2.7369132041931152, "learning_rate": 2.9755613631389017e-05, "loss": 1.0517, "step": 29360 }, { "epoch": 8.79, "grad_norm": 1.731953740119934, "learning_rate": 2.9749845567212487e-05, "loss": 1.1122, "step": 29365 }, { "epoch": 8.79, "grad_norm": 2.9949727058410645, "learning_rate": 2.9744077240706925e-05, "loss": 1.192, "step": 29370 }, { "epoch": 8.79, "grad_norm": 3.624964714050293, "learning_rate": 2.9738308652190905e-05, "loss": 1.1176, "step": 29375 }, { "epoch": 8.79, "grad_norm": 1.6444040536880493, "learning_rate": 2.973253980198303e-05, "loss": 1.1123, "step": 29380 }, { "epoch": 8.79, "grad_norm": 3.1505863666534424, "learning_rate": 2.9726770690401894e-05, "loss": 1.1554, "step": 29385 }, { "epoch": 8.79, "grad_norm": 2.775284767150879, "learning_rate": 2.972100131776613e-05, "loss": 1.0491, "step": 29390 }, { "epoch": 8.79, "grad_norm": 2.9408364295959473, "learning_rate": 2.971523168439437e-05, "loss": 1.0462, "step": 29395 }, { "epoch": 8.8, "grad_norm": 4.636882305145264, "learning_rate": 2.9709461790605263e-05, "loss": 1.1156, "step": 29400 }, { "epoch": 8.8, "grad_norm": 4.47889518737793, "learning_rate": 2.970369163671748e-05, "loss": 1.1295, "step": 29405 }, { "epoch": 8.8, "grad_norm": 5.22827672958374, "learning_rate": 2.9697921223049697e-05, "loss": 1.0539, "step": 29410 }, { "epoch": 8.8, "grad_norm": 2.5013325214385986, "learning_rate": 2.9692150549920606e-05, "loss": 1.1867, "step": 29415 }, { "epoch": 8.8, "grad_norm": 1.6474865674972534, "learning_rate": 2.9686379617648917e-05, "loss": 1.0142, "step": 29420 }, { "epoch": 8.8, "grad_norm": 2.7693967819213867, "learning_rate": 2.9680608426553358e-05, "loss": 1.0996, "step": 29425 }, { "epoch": 8.81, "grad_norm": 3.4689788818359375, "learning_rate": 2.9674836976952657e-05, "loss": 1.3735, "step": 29430 }, { "epoch": 8.81, "grad_norm": 1.6215572357177734, "learning_rate": 2.966906526916557e-05, "loss": 1.1179, "step": 29435 }, { "epoch": 8.81, "grad_norm": 2.069563627243042, "learning_rate": 2.9663293303510857e-05, "loss": 1.0961, "step": 29440 }, { "epoch": 8.81, "grad_norm": 1.665561318397522, "learning_rate": 2.9657521080307305e-05, "loss": 1.1638, "step": 29445 }, { "epoch": 8.81, "grad_norm": 3.0357062816619873, "learning_rate": 2.9651748599873708e-05, "loss": 1.2004, "step": 29450 }, { "epoch": 8.81, "grad_norm": 7.12582540512085, "learning_rate": 2.9645975862528868e-05, "loss": 1.1028, "step": 29455 }, { "epoch": 8.81, "grad_norm": 4.670501232147217, "learning_rate": 2.9640202868591616e-05, "loss": 0.9468, "step": 29460 }, { "epoch": 8.82, "grad_norm": 4.492547035217285, "learning_rate": 2.9634429618380775e-05, "loss": 1.161, "step": 29465 }, { "epoch": 8.82, "grad_norm": 4.036949634552002, "learning_rate": 2.9628656112215202e-05, "loss": 1.0556, "step": 29470 }, { "epoch": 8.82, "grad_norm": 6.104241847991943, "learning_rate": 2.962288235041377e-05, "loss": 1.2953, "step": 29475 }, { "epoch": 8.82, "grad_norm": 2.0466535091400146, "learning_rate": 2.9617108333295345e-05, "loss": 1.2064, "step": 29480 }, { "epoch": 8.82, "grad_norm": 3.5240540504455566, "learning_rate": 2.9612488935986672e-05, "loss": 1.0582, "step": 29485 }, { "epoch": 8.82, "grad_norm": 2.5299673080444336, "learning_rate": 2.960671446010129e-05, "loss": 1.1523, "step": 29490 }, { "epoch": 8.82, "grad_norm": 2.4263341426849365, "learning_rate": 2.9600939729791864e-05, "loss": 1.0886, "step": 29495 }, { "epoch": 8.83, "grad_norm": 5.209130764007568, "learning_rate": 2.9595164745377314e-05, "loss": 1.0808, "step": 29500 }, { "epoch": 8.83, "grad_norm": 2.5315446853637695, "learning_rate": 2.958938950717659e-05, "loss": 1.264, "step": 29505 }, { "epoch": 8.83, "grad_norm": 2.663262367248535, "learning_rate": 2.9583614015508666e-05, "loss": 1.201, "step": 29510 }, { "epoch": 8.83, "grad_norm": 1.5915833711624146, "learning_rate": 2.9577838270692493e-05, "loss": 1.0883, "step": 29515 }, { "epoch": 8.83, "grad_norm": 6.19256067276001, "learning_rate": 2.9572062273047075e-05, "loss": 1.1954, "step": 29520 }, { "epoch": 8.83, "grad_norm": 2.0903115272521973, "learning_rate": 2.9566286022891404e-05, "loss": 1.1074, "step": 29525 }, { "epoch": 8.84, "grad_norm": 7.435312271118164, "learning_rate": 2.9560509520544505e-05, "loss": 0.967, "step": 29530 }, { "epoch": 8.84, "grad_norm": 1.675291657447815, "learning_rate": 2.9554732766325406e-05, "loss": 1.0977, "step": 29535 }, { "epoch": 8.84, "grad_norm": 2.438756227493286, "learning_rate": 2.9548955760553155e-05, "loss": 1.1914, "step": 29540 }, { "epoch": 8.84, "grad_norm": 2.381863832473755, "learning_rate": 2.9543178503546805e-05, "loss": 1.0097, "step": 29545 }, { "epoch": 8.84, "grad_norm": 2.3754689693450928, "learning_rate": 2.9537400995625426e-05, "loss": 1.2332, "step": 29550 }, { "epoch": 8.84, "grad_norm": 1.7114568948745728, "learning_rate": 2.9531623237108103e-05, "loss": 1.1569, "step": 29555 }, { "epoch": 8.84, "grad_norm": 2.6389689445495605, "learning_rate": 2.952584522831394e-05, "loss": 1.2255, "step": 29560 }, { "epoch": 8.85, "grad_norm": 1.7149155139923096, "learning_rate": 2.9520066969562056e-05, "loss": 1.0352, "step": 29565 }, { "epoch": 8.85, "grad_norm": 4.823055744171143, "learning_rate": 2.9514288461171557e-05, "loss": 1.2276, "step": 29570 }, { "epoch": 8.85, "grad_norm": 1.9236773252487183, "learning_rate": 2.9508509703461613e-05, "loss": 1.3419, "step": 29575 }, { "epoch": 8.85, "grad_norm": 0.9075140357017517, "learning_rate": 2.9502730696751362e-05, "loss": 0.9838, "step": 29580 }, { "epoch": 8.85, "grad_norm": 1.5596219301223755, "learning_rate": 2.949695144135997e-05, "loss": 1.1306, "step": 29585 }, { "epoch": 8.85, "grad_norm": 4.231356620788574, "learning_rate": 2.9491171937606628e-05, "loss": 1.13, "step": 29590 }, { "epoch": 8.85, "grad_norm": 1.3872261047363281, "learning_rate": 2.948539218581053e-05, "loss": 1.1116, "step": 29595 }, { "epoch": 8.86, "grad_norm": 2.4153921604156494, "learning_rate": 2.9479612186290878e-05, "loss": 1.1196, "step": 29600 }, { "epoch": 8.86, "grad_norm": 2.1678645610809326, "learning_rate": 2.9473831939366908e-05, "loss": 1.0154, "step": 29605 }, { "epoch": 8.86, "grad_norm": 5.16516637802124, "learning_rate": 2.9468051445357848e-05, "loss": 0.9722, "step": 29610 }, { "epoch": 8.86, "grad_norm": 1.94868004322052, "learning_rate": 2.946227070458295e-05, "loss": 1.1472, "step": 29615 }, { "epoch": 8.86, "grad_norm": 1.3372802734375, "learning_rate": 2.9456489717361478e-05, "loss": 1.1834, "step": 29620 }, { "epoch": 8.86, "grad_norm": 1.819388508796692, "learning_rate": 2.9450708484012713e-05, "loss": 1.0856, "step": 29625 }, { "epoch": 8.86, "grad_norm": 2.233203649520874, "learning_rate": 2.9444927004855942e-05, "loss": 1.2613, "step": 29630 }, { "epoch": 8.87, "grad_norm": 1.7675302028656006, "learning_rate": 2.9439145280210473e-05, "loss": 1.1527, "step": 29635 }, { "epoch": 8.87, "grad_norm": 3.0035910606384277, "learning_rate": 2.9433363310395634e-05, "loss": 0.9933, "step": 29640 }, { "epoch": 8.87, "grad_norm": 1.98517906665802, "learning_rate": 2.942758109573074e-05, "loss": 1.2819, "step": 29645 }, { "epoch": 8.87, "grad_norm": 2.4812839031219482, "learning_rate": 2.9421798636535137e-05, "loss": 0.9064, "step": 29650 }, { "epoch": 8.87, "grad_norm": 3.8419299125671387, "learning_rate": 2.9416015933128194e-05, "loss": 1.1614, "step": 29655 }, { "epoch": 8.87, "grad_norm": 8.991368293762207, "learning_rate": 2.941023298582929e-05, "loss": 1.0835, "step": 29660 }, { "epoch": 8.88, "grad_norm": 1.9285551309585571, "learning_rate": 2.9404449794957795e-05, "loss": 1.1749, "step": 29665 }, { "epoch": 8.88, "grad_norm": 4.323269844055176, "learning_rate": 2.9398666360833126e-05, "loss": 1.0456, "step": 29670 }, { "epoch": 8.88, "grad_norm": 3.1791858673095703, "learning_rate": 2.9392882683774674e-05, "loss": 1.0455, "step": 29675 }, { "epoch": 8.88, "grad_norm": 1.171151041984558, "learning_rate": 2.9387098764101882e-05, "loss": 1.1266, "step": 29680 }, { "epoch": 8.88, "grad_norm": 3.656491994857788, "learning_rate": 2.938131460213419e-05, "loss": 1.1155, "step": 29685 }, { "epoch": 8.88, "grad_norm": 3.314228057861328, "learning_rate": 2.937553019819104e-05, "loss": 1.0785, "step": 29690 }, { "epoch": 8.88, "grad_norm": 2.453927516937256, "learning_rate": 2.936974555259191e-05, "loss": 1.2502, "step": 29695 }, { "epoch": 8.89, "grad_norm": 1.7844200134277344, "learning_rate": 2.936396066565627e-05, "loss": 1.2183, "step": 29700 }, { "epoch": 8.89, "grad_norm": 3.409106731414795, "learning_rate": 2.9358175537703624e-05, "loss": 1.1222, "step": 29705 }, { "epoch": 8.89, "grad_norm": 7.029350757598877, "learning_rate": 2.9352390169053475e-05, "loss": 1.1221, "step": 29710 }, { "epoch": 8.89, "grad_norm": 5.586660861968994, "learning_rate": 2.9346604560025336e-05, "loss": 1.1623, "step": 29715 }, { "epoch": 8.89, "grad_norm": 2.2339181900024414, "learning_rate": 2.9340818710938745e-05, "loss": 1.2503, "step": 29720 }, { "epoch": 8.89, "grad_norm": 2.694247245788574, "learning_rate": 2.9335032622113252e-05, "loss": 1.1939, "step": 29725 }, { "epoch": 8.89, "grad_norm": 1.8695008754730225, "learning_rate": 2.9329246293868407e-05, "loss": 1.1074, "step": 29730 }, { "epoch": 8.9, "grad_norm": 3.689002275466919, "learning_rate": 2.9323459726523794e-05, "loss": 0.9559, "step": 29735 }, { "epoch": 8.9, "grad_norm": 2.7362306118011475, "learning_rate": 2.9317672920398993e-05, "loss": 1.0817, "step": 29740 }, { "epoch": 8.9, "grad_norm": 1.8381929397583008, "learning_rate": 2.9311885875813604e-05, "loss": 1.1532, "step": 29745 }, { "epoch": 8.9, "grad_norm": 2.022266149520874, "learning_rate": 2.930609859308724e-05, "loss": 0.9327, "step": 29750 }, { "epoch": 8.9, "grad_norm": 3.2312119007110596, "learning_rate": 2.9300311072539527e-05, "loss": 0.9719, "step": 29755 }, { "epoch": 8.9, "grad_norm": 7.237270355224609, "learning_rate": 2.92945233144901e-05, "loss": 1.172, "step": 29760 }, { "epoch": 8.91, "grad_norm": 2.9999465942382812, "learning_rate": 2.928873531925862e-05, "loss": 1.0583, "step": 29765 }, { "epoch": 8.91, "grad_norm": 3.0304183959960938, "learning_rate": 2.928294708716475e-05, "loss": 1.0234, "step": 29770 }, { "epoch": 8.91, "grad_norm": 1.2405683994293213, "learning_rate": 2.927715861852816e-05, "loss": 0.9056, "step": 29775 }, { "epoch": 8.91, "grad_norm": 4.732754230499268, "learning_rate": 2.9271369913668546e-05, "loss": 1.2299, "step": 29780 }, { "epoch": 8.91, "grad_norm": 2.121570348739624, "learning_rate": 2.9265580972905603e-05, "loss": 1.0564, "step": 29785 }, { "epoch": 8.91, "grad_norm": 3.0892438888549805, "learning_rate": 2.9259791796559066e-05, "loss": 1.0464, "step": 29790 }, { "epoch": 8.91, "grad_norm": 2.665210247039795, "learning_rate": 2.9254002384948655e-05, "loss": 1.1263, "step": 29795 }, { "epoch": 8.92, "grad_norm": 2.6491611003875732, "learning_rate": 2.9248212738394116e-05, "loss": 1.0433, "step": 29800 }, { "epoch": 8.92, "grad_norm": 4.1141815185546875, "learning_rate": 2.9242422857215195e-05, "loss": 1.134, "step": 29805 }, { "epoch": 8.92, "grad_norm": 6.5142951011657715, "learning_rate": 2.9236632741731673e-05, "loss": 1.2787, "step": 29810 }, { "epoch": 8.92, "grad_norm": 2.864144802093506, "learning_rate": 2.923084239226333e-05, "loss": 1.0214, "step": 29815 }, { "epoch": 8.92, "grad_norm": 6.211786270141602, "learning_rate": 2.922505180912996e-05, "loss": 1.0869, "step": 29820 }, { "epoch": 8.92, "grad_norm": 4.338846206665039, "learning_rate": 2.921926099265137e-05, "loss": 1.1458, "step": 29825 }, { "epoch": 8.92, "grad_norm": 4.332965850830078, "learning_rate": 2.9213469943147374e-05, "loss": 1.2077, "step": 29830 }, { "epoch": 8.93, "grad_norm": 2.471970319747925, "learning_rate": 2.920767866093782e-05, "loss": 1.1077, "step": 29835 }, { "epoch": 8.93, "grad_norm": 1.8260835409164429, "learning_rate": 2.920188714634255e-05, "loss": 1.0752, "step": 29840 }, { "epoch": 8.93, "grad_norm": 8.06219482421875, "learning_rate": 2.919609539968141e-05, "loss": 1.1127, "step": 29845 }, { "epoch": 8.93, "grad_norm": 3.8866000175476074, "learning_rate": 2.9190303421274288e-05, "loss": 1.065, "step": 29850 }, { "epoch": 8.93, "grad_norm": 3.3143255710601807, "learning_rate": 2.918451121144107e-05, "loss": 1.0448, "step": 29855 }, { "epoch": 8.93, "grad_norm": 6.668207168579102, "learning_rate": 2.9178718770501638e-05, "loss": 1.0846, "step": 29860 }, { "epoch": 8.94, "grad_norm": 2.2254040241241455, "learning_rate": 2.917292609877592e-05, "loss": 1.1679, "step": 29865 }, { "epoch": 8.94, "grad_norm": 2.8223040103912354, "learning_rate": 2.916713319658383e-05, "loss": 0.9145, "step": 29870 }, { "epoch": 8.94, "grad_norm": 2.584289312362671, "learning_rate": 2.9161340064245302e-05, "loss": 1.0576, "step": 29875 }, { "epoch": 8.94, "grad_norm": 2.3711581230163574, "learning_rate": 2.9155546702080282e-05, "loss": 1.0433, "step": 29880 }, { "epoch": 8.94, "grad_norm": 3.263002634048462, "learning_rate": 2.9149753110408744e-05, "loss": 1.0901, "step": 29885 }, { "epoch": 8.94, "grad_norm": 3.8075244426727295, "learning_rate": 2.9143959289550653e-05, "loss": 0.8897, "step": 29890 }, { "epoch": 8.94, "grad_norm": 4.947206497192383, "learning_rate": 2.913816523982601e-05, "loss": 1.147, "step": 29895 }, { "epoch": 8.95, "grad_norm": 1.8365360498428345, "learning_rate": 2.913237096155479e-05, "loss": 1.0571, "step": 29900 }, { "epoch": 8.95, "grad_norm": 2.5439870357513428, "learning_rate": 2.9126576455057014e-05, "loss": 1.062, "step": 29905 }, { "epoch": 8.95, "grad_norm": 3.2245960235595703, "learning_rate": 2.9120781720652713e-05, "loss": 1.2575, "step": 29910 }, { "epoch": 8.95, "grad_norm": 6.086019039154053, "learning_rate": 2.9114986758661922e-05, "loss": 1.1212, "step": 29915 }, { "epoch": 8.95, "grad_norm": 2.142395496368408, "learning_rate": 2.9109191569404693e-05, "loss": 1.0457, "step": 29920 }, { "epoch": 8.95, "grad_norm": 2.1010286808013916, "learning_rate": 2.9104555254582145e-05, "loss": 1.0763, "step": 29925 }, { "epoch": 8.95, "grad_norm": 2.635202407836914, "learning_rate": 2.909875965705188e-05, "loss": 1.2506, "step": 29930 }, { "epoch": 8.96, "grad_norm": 2.7703280448913574, "learning_rate": 2.9092963833151388e-05, "loss": 1.1693, "step": 29935 }, { "epoch": 8.96, "grad_norm": 5.17828369140625, "learning_rate": 2.9087167783200752e-05, "loss": 1.1341, "step": 29940 }, { "epoch": 8.96, "grad_norm": 3.351463556289673, "learning_rate": 2.908137150752008e-05, "loss": 1.1165, "step": 29945 }, { "epoch": 8.96, "grad_norm": 2.4802517890930176, "learning_rate": 2.9075575006429524e-05, "loss": 1.1043, "step": 29950 }, { "epoch": 8.96, "grad_norm": 1.3321456909179688, "learning_rate": 2.9069778280249183e-05, "loss": 1.0579, "step": 29955 }, { "epoch": 8.96, "grad_norm": 5.725347518920898, "learning_rate": 2.9063981329299216e-05, "loss": 1.0917, "step": 29960 }, { "epoch": 8.97, "grad_norm": 1.3497544527053833, "learning_rate": 2.905818415389978e-05, "loss": 1.1703, "step": 29965 }, { "epoch": 8.97, "grad_norm": 2.001232385635376, "learning_rate": 2.9052386754371065e-05, "loss": 0.9471, "step": 29970 }, { "epoch": 8.97, "grad_norm": 2.2034993171691895, "learning_rate": 2.904658913103323e-05, "loss": 1.2491, "step": 29975 }, { "epoch": 8.97, "grad_norm": 7.609285354614258, "learning_rate": 2.9040791284206493e-05, "loss": 1.062, "step": 29980 }, { "epoch": 8.97, "grad_norm": 3.246325731277466, "learning_rate": 2.9034993214211048e-05, "loss": 1.1693, "step": 29985 }, { "epoch": 8.97, "grad_norm": 1.247209906578064, "learning_rate": 2.902919492136712e-05, "loss": 1.2194, "step": 29990 }, { "epoch": 8.97, "grad_norm": 3.488403081893921, "learning_rate": 2.9023396405994946e-05, "loss": 1.2346, "step": 29995 }, { "epoch": 8.98, "grad_norm": 1.7326892614364624, "learning_rate": 2.901759766841477e-05, "loss": 1.0722, "step": 30000 }, { "epoch": 8.98, "grad_norm": 4.2910356521606445, "learning_rate": 2.901179870894685e-05, "loss": 1.0043, "step": 30005 }, { "epoch": 8.98, "grad_norm": 2.8054089546203613, "learning_rate": 2.900599952791146e-05, "loss": 1.1417, "step": 30010 }, { "epoch": 8.98, "grad_norm": 4.5647687911987305, "learning_rate": 2.9000200125628885e-05, "loss": 1.1773, "step": 30015 }, { "epoch": 8.98, "grad_norm": 1.9864246845245361, "learning_rate": 2.899440050241941e-05, "loss": 1.212, "step": 30020 }, { "epoch": 8.98, "grad_norm": 3.7864115238189697, "learning_rate": 2.898860065860335e-05, "loss": 0.9945, "step": 30025 }, { "epoch": 8.98, "grad_norm": 2.867279529571533, "learning_rate": 2.8982800594501014e-05, "loss": 1.1722, "step": 30030 }, { "epoch": 8.99, "grad_norm": 1.8707783222198486, "learning_rate": 2.8977000310432744e-05, "loss": 1.2435, "step": 30035 }, { "epoch": 8.99, "grad_norm": 3.260164260864258, "learning_rate": 2.8971199806718884e-05, "loss": 1.2162, "step": 30040 }, { "epoch": 8.99, "grad_norm": 3.576340436935425, "learning_rate": 2.896539908367979e-05, "loss": 1.0317, "step": 30045 }, { "epoch": 8.99, "grad_norm": 2.4378347396850586, "learning_rate": 2.8959598141635826e-05, "loss": 1.0759, "step": 30050 }, { "epoch": 8.99, "grad_norm": 3.9936773777008057, "learning_rate": 2.8953796980907365e-05, "loss": 1.1189, "step": 30055 }, { "epoch": 8.99, "grad_norm": 2.5489132404327393, "learning_rate": 2.894799560181481e-05, "loss": 1.0809, "step": 30060 }, { "epoch": 9.0, "grad_norm": 2.193693161010742, "learning_rate": 2.894219400467856e-05, "loss": 1.1111, "step": 30065 }, { "epoch": 9.0, "grad_norm": 2.2259438037872314, "learning_rate": 2.8936392189819034e-05, "loss": 1.2037, "step": 30070 }, { "epoch": 9.0, "grad_norm": 6.183416843414307, "learning_rate": 2.893059015755666e-05, "loss": 1.2801, "step": 30075 }, { "epoch": 9.0, "grad_norm": 2.10697865486145, "learning_rate": 2.892478790821187e-05, "loss": 1.089, "step": 30080 }, { "epoch": 9.0, "grad_norm": 2.224777936935425, "learning_rate": 2.8918985442105128e-05, "loss": 1.1623, "step": 30085 }, { "epoch": 9.0, "grad_norm": 2.0744781494140625, "learning_rate": 2.8913182759556894e-05, "loss": 0.9921, "step": 30090 }, { "epoch": 9.0, "grad_norm": 3.06063175201416, "learning_rate": 2.8907379860887645e-05, "loss": 1.0767, "step": 30095 }, { "epoch": 9.01, "grad_norm": 1.988356590270996, "learning_rate": 2.890157674641787e-05, "loss": 1.0859, "step": 30100 }, { "epoch": 9.01, "grad_norm": 1.3868892192840576, "learning_rate": 2.8895773416468063e-05, "loss": 1.118, "step": 30105 }, { "epoch": 9.01, "grad_norm": 2.165102481842041, "learning_rate": 2.8889969871358746e-05, "loss": 1.0599, "step": 30110 }, { "epoch": 9.01, "grad_norm": 1.76089346408844, "learning_rate": 2.888416611141043e-05, "loss": 1.1494, "step": 30115 }, { "epoch": 9.01, "grad_norm": 3.6625218391418457, "learning_rate": 2.887836213694366e-05, "loss": 1.0324, "step": 30120 }, { "epoch": 9.01, "grad_norm": 2.0290017127990723, "learning_rate": 2.8872557948278976e-05, "loss": 1.1136, "step": 30125 }, { "epoch": 9.01, "grad_norm": 1.7238839864730835, "learning_rate": 2.8866753545736946e-05, "loss": 1.0594, "step": 30130 }, { "epoch": 9.02, "grad_norm": 3.4980666637420654, "learning_rate": 2.8860948929638136e-05, "loss": 0.9758, "step": 30135 }, { "epoch": 9.02, "grad_norm": 4.032216548919678, "learning_rate": 2.885514410030313e-05, "loss": 0.999, "step": 30140 }, { "epoch": 9.02, "grad_norm": 1.803316593170166, "learning_rate": 2.8849339058052526e-05, "loss": 1.188, "step": 30145 }, { "epoch": 9.02, "grad_norm": 1.5701510906219482, "learning_rate": 2.8843533803206923e-05, "loss": 1.0158, "step": 30150 }, { "epoch": 9.02, "grad_norm": 1.275262713432312, "learning_rate": 2.8837728336086946e-05, "loss": 0.9069, "step": 30155 }, { "epoch": 9.02, "grad_norm": 2.408705949783325, "learning_rate": 2.8831922657013216e-05, "loss": 1.0335, "step": 30160 }, { "epoch": 9.03, "grad_norm": 2.678744316101074, "learning_rate": 2.8826116766306383e-05, "loss": 1.0944, "step": 30165 }, { "epoch": 9.03, "grad_norm": 3.0019614696502686, "learning_rate": 2.8820310664287096e-05, "loss": 0.9107, "step": 30170 }, { "epoch": 9.03, "grad_norm": 1.4615614414215088, "learning_rate": 2.881450435127603e-05, "loss": 1.0898, "step": 30175 }, { "epoch": 9.03, "grad_norm": 1.9362547397613525, "learning_rate": 2.8808697827593845e-05, "loss": 1.1276, "step": 30180 }, { "epoch": 9.03, "grad_norm": 2.6547698974609375, "learning_rate": 2.880289109356124e-05, "loss": 1.2548, "step": 30185 }, { "epoch": 9.03, "grad_norm": 7.905858516693115, "learning_rate": 2.8797084149498915e-05, "loss": 0.9404, "step": 30190 }, { "epoch": 9.03, "grad_norm": 1.2215291261672974, "learning_rate": 2.879127699572758e-05, "loss": 1.003, "step": 30195 }, { "epoch": 9.04, "grad_norm": 1.8280051946640015, "learning_rate": 2.878546963256795e-05, "loss": 1.0391, "step": 30200 }, { "epoch": 9.04, "grad_norm": 1.2926268577575684, "learning_rate": 2.8779662060340778e-05, "loss": 1.2109, "step": 30205 }, { "epoch": 9.04, "grad_norm": 1.7577592134475708, "learning_rate": 2.8773854279366797e-05, "loss": 1.1695, "step": 30210 }, { "epoch": 9.04, "grad_norm": 2.365588903427124, "learning_rate": 2.8768046289966766e-05, "loss": 1.1072, "step": 30215 }, { "epoch": 9.04, "grad_norm": 2.1536929607391357, "learning_rate": 2.8762238092461447e-05, "loss": 1.1535, "step": 30220 }, { "epoch": 9.04, "grad_norm": 2.988088369369507, "learning_rate": 2.8756429687171637e-05, "loss": 0.9861, "step": 30225 }, { "epoch": 9.04, "grad_norm": 3.4814767837524414, "learning_rate": 2.8750621074418115e-05, "loss": 1.3146, "step": 30230 }, { "epoch": 9.05, "grad_norm": 1.8867098093032837, "learning_rate": 2.874481225452169e-05, "loss": 1.037, "step": 30235 }, { "epoch": 9.05, "grad_norm": 1.5579580068588257, "learning_rate": 2.8739003227803184e-05, "loss": 1.1022, "step": 30240 }, { "epoch": 9.05, "grad_norm": 3.373805284500122, "learning_rate": 2.8733193994583412e-05, "loss": 1.1459, "step": 30245 }, { "epoch": 9.05, "grad_norm": 2.02644419670105, "learning_rate": 2.8727384555183217e-05, "loss": 1.1493, "step": 30250 }, { "epoch": 9.05, "grad_norm": 2.24518084526062, "learning_rate": 2.8721574909923445e-05, "loss": 1.0157, "step": 30255 }, { "epoch": 9.05, "grad_norm": 1.2897074222564697, "learning_rate": 2.871576505912496e-05, "loss": 1.128, "step": 30260 }, { "epoch": 9.05, "grad_norm": 1.510237693786621, "learning_rate": 2.8709955003108636e-05, "loss": 1.1292, "step": 30265 }, { "epoch": 9.06, "grad_norm": 1.2689833641052246, "learning_rate": 2.870414474219535e-05, "loss": 1.0674, "step": 30270 }, { "epoch": 9.06, "grad_norm": 1.3254342079162598, "learning_rate": 2.8698334276705995e-05, "loss": 1.1342, "step": 30275 }, { "epoch": 9.06, "grad_norm": 2.8477869033813477, "learning_rate": 2.8692523606961492e-05, "loss": 1.1701, "step": 30280 }, { "epoch": 9.06, "grad_norm": 1.7730213403701782, "learning_rate": 2.868671273328274e-05, "loss": 1.1741, "step": 30285 }, { "epoch": 9.06, "grad_norm": 1.311159610748291, "learning_rate": 2.8680901655990678e-05, "loss": 1.0136, "step": 30290 }, { "epoch": 9.06, "grad_norm": 1.5597997903823853, "learning_rate": 2.867509037540625e-05, "loss": 1.07, "step": 30295 }, { "epoch": 9.07, "grad_norm": 3.055157423019409, "learning_rate": 2.8669278891850392e-05, "loss": 1.2124, "step": 30300 }, { "epoch": 9.07, "grad_norm": 2.4278862476348877, "learning_rate": 2.8663467205644086e-05, "loss": 0.9972, "step": 30305 }, { "epoch": 9.07, "grad_norm": 2.5763089656829834, "learning_rate": 2.8657655317108284e-05, "loss": 0.9838, "step": 30310 }, { "epoch": 9.07, "grad_norm": 2.0975847244262695, "learning_rate": 2.8651843226563983e-05, "loss": 1.1683, "step": 30315 }, { "epoch": 9.07, "grad_norm": 1.7405827045440674, "learning_rate": 2.864603093433218e-05, "loss": 1.0669, "step": 30320 }, { "epoch": 9.07, "grad_norm": 7.420092582702637, "learning_rate": 2.8640218440733875e-05, "loss": 1.015, "step": 30325 }, { "epoch": 9.07, "grad_norm": 3.352482795715332, "learning_rate": 2.8634405746090088e-05, "loss": 1.0018, "step": 30330 }, { "epoch": 9.08, "grad_norm": 4.274402141571045, "learning_rate": 2.8628592850721857e-05, "loss": 1.1142, "step": 30335 }, { "epoch": 9.08, "grad_norm": 1.6124602556228638, "learning_rate": 2.862277975495021e-05, "loss": 1.1566, "step": 30340 }, { "epoch": 9.08, "grad_norm": 1.762075424194336, "learning_rate": 2.8616966459096202e-05, "loss": 1.037, "step": 30345 }, { "epoch": 9.08, "grad_norm": 3.1598358154296875, "learning_rate": 2.8611152963480892e-05, "loss": 1.0827, "step": 30350 }, { "epoch": 9.08, "grad_norm": 4.015747547149658, "learning_rate": 2.8605339268425363e-05, "loss": 0.9845, "step": 30355 }, { "epoch": 9.08, "grad_norm": 6.202319145202637, "learning_rate": 2.8599525374250684e-05, "loss": 1.1009, "step": 30360 }, { "epoch": 9.08, "grad_norm": 3.212571620941162, "learning_rate": 2.859371128127797e-05, "loss": 1.1507, "step": 30365 }, { "epoch": 9.09, "grad_norm": 2.252204656600952, "learning_rate": 2.8587896989828323e-05, "loss": 1.1523, "step": 30370 }, { "epoch": 9.09, "grad_norm": 2.1097633838653564, "learning_rate": 2.8582082500222845e-05, "loss": 1.0826, "step": 30375 }, { "epoch": 9.09, "grad_norm": 2.7466039657592773, "learning_rate": 2.8576267812782675e-05, "loss": 1.1409, "step": 30380 }, { "epoch": 9.09, "grad_norm": 8.023296356201172, "learning_rate": 2.857045292782895e-05, "loss": 0.9465, "step": 30385 }, { "epoch": 9.09, "grad_norm": 3.5571467876434326, "learning_rate": 2.8564637845682823e-05, "loss": 1.1629, "step": 30390 }, { "epoch": 9.09, "grad_norm": 3.21684193611145, "learning_rate": 2.8558822566665454e-05, "loss": 1.0213, "step": 30395 }, { "epoch": 9.1, "grad_norm": 3.065678596496582, "learning_rate": 2.8553007091098016e-05, "loss": 0.9455, "step": 30400 }, { "epoch": 9.1, "grad_norm": 3.9140865802764893, "learning_rate": 2.8547191419301687e-05, "loss": 0.974, "step": 30405 }, { "epoch": 9.1, "grad_norm": 4.4788641929626465, "learning_rate": 2.854137555159766e-05, "loss": 0.9984, "step": 30410 }, { "epoch": 9.1, "grad_norm": 1.5970557928085327, "learning_rate": 2.8535559488307145e-05, "loss": 1.1595, "step": 30415 }, { "epoch": 9.1, "grad_norm": 2.536341905593872, "learning_rate": 2.8529743229751354e-05, "loss": 1.1607, "step": 30420 }, { "epoch": 9.1, "grad_norm": 2.384742259979248, "learning_rate": 2.8523926776251514e-05, "loss": 1.0816, "step": 30425 }, { "epoch": 9.1, "grad_norm": 4.2098388671875, "learning_rate": 2.8518110128128863e-05, "loss": 1.0073, "step": 30430 }, { "epoch": 9.11, "grad_norm": 1.5726318359375, "learning_rate": 2.851229328570465e-05, "loss": 1.0309, "step": 30435 }, { "epoch": 9.11, "grad_norm": 2.4862496852874756, "learning_rate": 2.850647624930012e-05, "loss": 1.1971, "step": 30440 }, { "epoch": 9.11, "grad_norm": 3.5220296382904053, "learning_rate": 2.8500659019236553e-05, "loss": 1.0254, "step": 30445 }, { "epoch": 9.11, "grad_norm": 5.456885814666748, "learning_rate": 2.8494841595835226e-05, "loss": 0.7803, "step": 30450 }, { "epoch": 9.11, "grad_norm": 3.647815704345703, "learning_rate": 2.8489023979417435e-05, "loss": 1.0921, "step": 30455 }, { "epoch": 9.11, "grad_norm": 3.8451616764068604, "learning_rate": 2.8483206170304473e-05, "loss": 1.048, "step": 30460 }, { "epoch": 9.11, "grad_norm": 1.4922055006027222, "learning_rate": 2.8477388168817664e-05, "loss": 1.0033, "step": 30465 }, { "epoch": 9.12, "grad_norm": 3.4989609718322754, "learning_rate": 2.847156997527831e-05, "loss": 1.2043, "step": 30470 }, { "epoch": 9.12, "grad_norm": 2.6432862281799316, "learning_rate": 2.8465751590007762e-05, "loss": 1.0925, "step": 30475 }, { "epoch": 9.12, "grad_norm": 2.137115478515625, "learning_rate": 2.845993301332735e-05, "loss": 1.1623, "step": 30480 }, { "epoch": 9.12, "grad_norm": 3.7192418575286865, "learning_rate": 2.845411424555844e-05, "loss": 1.0968, "step": 30485 }, { "epoch": 9.12, "grad_norm": 2.331989049911499, "learning_rate": 2.8448295287022386e-05, "loss": 1.066, "step": 30490 }, { "epoch": 9.12, "grad_norm": 2.4555537700653076, "learning_rate": 2.8442476138040568e-05, "loss": 1.0104, "step": 30495 }, { "epoch": 9.13, "grad_norm": 4.285811901092529, "learning_rate": 2.8436656798934376e-05, "loss": 1.0691, "step": 30500 }, { "epoch": 9.13, "grad_norm": 1.4364317655563354, "learning_rate": 2.8430837270025196e-05, "loss": 1.1077, "step": 30505 }, { "epoch": 9.13, "grad_norm": 1.1822254657745361, "learning_rate": 2.842501755163444e-05, "loss": 1.0285, "step": 30510 }, { "epoch": 9.13, "grad_norm": 7.019848823547363, "learning_rate": 2.8419197644083527e-05, "loss": 1.1324, "step": 30515 }, { "epoch": 9.13, "grad_norm": 1.9157739877700806, "learning_rate": 2.841337754769388e-05, "loss": 1.1563, "step": 30520 }, { "epoch": 9.13, "grad_norm": 2.709386110305786, "learning_rate": 2.8407557262786945e-05, "loss": 1.1656, "step": 30525 }, { "epoch": 9.13, "grad_norm": 4.894626140594482, "learning_rate": 2.8401736789684153e-05, "loss": 1.1094, "step": 30530 }, { "epoch": 9.14, "grad_norm": 2.685741662979126, "learning_rate": 2.839591612870698e-05, "loss": 0.9865, "step": 30535 }, { "epoch": 9.14, "grad_norm": 2.1641218662261963, "learning_rate": 2.8390095280176894e-05, "loss": 1.1466, "step": 30540 }, { "epoch": 9.14, "grad_norm": 3.6387031078338623, "learning_rate": 2.838427424441536e-05, "loss": 1.0634, "step": 30545 }, { "epoch": 9.14, "grad_norm": 2.7952628135681152, "learning_rate": 2.8378453021743882e-05, "loss": 1.104, "step": 30550 }, { "epoch": 9.14, "grad_norm": 2.7595205307006836, "learning_rate": 2.837263161248396e-05, "loss": 1.1497, "step": 30555 }, { "epoch": 9.14, "grad_norm": 2.8632593154907227, "learning_rate": 2.8366810016957096e-05, "loss": 0.8454, "step": 30560 }, { "epoch": 9.14, "grad_norm": 2.603904962539673, "learning_rate": 2.8360988235484814e-05, "loss": 1.0677, "step": 30565 }, { "epoch": 9.15, "grad_norm": 3.4272186756134033, "learning_rate": 2.8355166268388643e-05, "loss": 1.0451, "step": 30570 }, { "epoch": 9.15, "grad_norm": 2.422112226486206, "learning_rate": 2.8349344115990127e-05, "loss": 1.1719, "step": 30575 }, { "epoch": 9.15, "grad_norm": 4.006277561187744, "learning_rate": 2.834352177861082e-05, "loss": 1.0388, "step": 30580 }, { "epoch": 9.15, "grad_norm": 3.0967395305633545, "learning_rate": 2.8337699256572282e-05, "loss": 0.8718, "step": 30585 }, { "epoch": 9.15, "grad_norm": 3.621616840362549, "learning_rate": 2.833187655019608e-05, "loss": 1.2271, "step": 30590 }, { "epoch": 9.15, "grad_norm": 1.8164085149765015, "learning_rate": 2.8326053659803803e-05, "loss": 1.0748, "step": 30595 }, { "epoch": 9.16, "grad_norm": 5.264411926269531, "learning_rate": 2.8320230585717034e-05, "loss": 1.0605, "step": 30600 }, { "epoch": 9.16, "grad_norm": 2.898695707321167, "learning_rate": 2.831440732825738e-05, "loss": 1.0845, "step": 30605 }, { "epoch": 9.16, "grad_norm": 1.863917350769043, "learning_rate": 2.830858388774646e-05, "loss": 1.1017, "step": 30610 }, { "epoch": 9.16, "grad_norm": 2.258936643600464, "learning_rate": 2.830276026450589e-05, "loss": 1.2552, "step": 30615 }, { "epoch": 9.16, "grad_norm": 1.4957672357559204, "learning_rate": 2.82969364588573e-05, "loss": 1.1517, "step": 30620 }, { "epoch": 9.16, "grad_norm": 4.283958911895752, "learning_rate": 2.8291112471122338e-05, "loss": 1.2211, "step": 30625 }, { "epoch": 9.16, "grad_norm": 2.875387191772461, "learning_rate": 2.8285288301622658e-05, "loss": 1.1582, "step": 30630 }, { "epoch": 9.17, "grad_norm": 3.575859546661377, "learning_rate": 2.827946395067991e-05, "loss": 1.2838, "step": 30635 }, { "epoch": 9.17, "grad_norm": 3.4343674182891846, "learning_rate": 2.8273639418615788e-05, "loss": 1.0276, "step": 30640 }, { "epoch": 9.17, "grad_norm": 2.2512946128845215, "learning_rate": 2.826781470575196e-05, "loss": 0.8439, "step": 30645 }, { "epoch": 9.17, "grad_norm": 3.449051856994629, "learning_rate": 2.826198981241012e-05, "loss": 1.1702, "step": 30650 }, { "epoch": 9.17, "grad_norm": 3.092623233795166, "learning_rate": 2.8256164738911977e-05, "loss": 1.0548, "step": 30655 }, { "epoch": 9.17, "grad_norm": 2.316596031188965, "learning_rate": 2.8250339485579248e-05, "loss": 1.1654, "step": 30660 }, { "epoch": 9.17, "grad_norm": 5.095229148864746, "learning_rate": 2.824451405273364e-05, "loss": 1.1143, "step": 30665 }, { "epoch": 9.18, "grad_norm": 2.3152172565460205, "learning_rate": 2.8238688440696885e-05, "loss": 1.0509, "step": 30670 }, { "epoch": 9.18, "grad_norm": 2.434166669845581, "learning_rate": 2.8232862649790742e-05, "loss": 0.9805, "step": 30675 }, { "epoch": 9.18, "grad_norm": 2.4533190727233887, "learning_rate": 2.8227036680336956e-05, "loss": 0.964, "step": 30680 }, { "epoch": 9.18, "grad_norm": 3.624160051345825, "learning_rate": 2.8221210532657283e-05, "loss": 1.0408, "step": 30685 }, { "epoch": 9.18, "grad_norm": 4.148279666900635, "learning_rate": 2.8215384207073515e-05, "loss": 1.1267, "step": 30690 }, { "epoch": 9.18, "grad_norm": 3.3725688457489014, "learning_rate": 2.820955770390741e-05, "loss": 1.0548, "step": 30695 }, { "epoch": 9.19, "grad_norm": 1.4353331327438354, "learning_rate": 2.820373102348076e-05, "loss": 0.8967, "step": 30700 }, { "epoch": 9.19, "grad_norm": 1.5979543924331665, "learning_rate": 2.8197904166115386e-05, "loss": 1.2111, "step": 30705 }, { "epoch": 9.19, "grad_norm": 4.696241855621338, "learning_rate": 2.819207713213309e-05, "loss": 1.0566, "step": 30710 }, { "epoch": 9.19, "grad_norm": 2.242954730987549, "learning_rate": 2.8186249921855683e-05, "loss": 1.1283, "step": 30715 }, { "epoch": 9.19, "grad_norm": 1.5376769304275513, "learning_rate": 2.818042253560501e-05, "loss": 1.1125, "step": 30720 }, { "epoch": 9.19, "grad_norm": 3.4272782802581787, "learning_rate": 2.8174594973702906e-05, "loss": 1.1207, "step": 30725 }, { "epoch": 9.19, "grad_norm": 1.447213053703308, "learning_rate": 2.8168767236471223e-05, "loss": 1.2412, "step": 30730 }, { "epoch": 9.2, "grad_norm": 1.9952369928359985, "learning_rate": 2.816293932423182e-05, "loss": 1.0659, "step": 30735 }, { "epoch": 9.2, "grad_norm": 3.7000386714935303, "learning_rate": 2.8157111237306562e-05, "loss": 1.0848, "step": 30740 }, { "epoch": 9.2, "grad_norm": 0.9547794461250305, "learning_rate": 2.8151282976017336e-05, "loss": 1.0252, "step": 30745 }, { "epoch": 9.2, "grad_norm": 2.513737440109253, "learning_rate": 2.8145454540686024e-05, "loss": 0.9642, "step": 30750 }, { "epoch": 9.2, "grad_norm": 5.190265655517578, "learning_rate": 2.813962593163453e-05, "loss": 1.0277, "step": 30755 }, { "epoch": 9.2, "grad_norm": 2.4562807083129883, "learning_rate": 2.8133797149184755e-05, "loss": 1.1477, "step": 30760 }, { "epoch": 9.2, "grad_norm": 1.605533242225647, "learning_rate": 2.812796819365862e-05, "loss": 1.0803, "step": 30765 }, { "epoch": 9.21, "grad_norm": 1.3287237882614136, "learning_rate": 2.812213906537806e-05, "loss": 1.1856, "step": 30770 }, { "epoch": 9.21, "grad_norm": 2.1841320991516113, "learning_rate": 2.8116309764665e-05, "loss": 1.272, "step": 30775 }, { "epoch": 9.21, "grad_norm": 2.0276877880096436, "learning_rate": 2.811048029184139e-05, "loss": 1.0535, "step": 30780 }, { "epoch": 9.21, "grad_norm": 2.9414737224578857, "learning_rate": 2.810465064722919e-05, "loss": 1.0341, "step": 30785 }, { "epoch": 9.21, "grad_norm": 1.2586784362792969, "learning_rate": 2.8098820831150362e-05, "loss": 1.1424, "step": 30790 }, { "epoch": 9.21, "grad_norm": 4.782675743103027, "learning_rate": 2.809299084392688e-05, "loss": 1.0249, "step": 30795 }, { "epoch": 9.22, "grad_norm": 1.6296465396881104, "learning_rate": 2.8087160685880726e-05, "loss": 1.0696, "step": 30800 }, { "epoch": 9.22, "grad_norm": 1.8938437700271606, "learning_rate": 2.808133035733389e-05, "loss": 1.0324, "step": 30805 }, { "epoch": 9.22, "grad_norm": 3.3237104415893555, "learning_rate": 2.807549985860839e-05, "loss": 1.0725, "step": 30810 }, { "epoch": 9.22, "grad_norm": 1.7440217733383179, "learning_rate": 2.8069669190026233e-05, "loss": 1.0946, "step": 30815 }, { "epoch": 9.22, "grad_norm": 1.855573058128357, "learning_rate": 2.8063838351909434e-05, "loss": 1.1026, "step": 30820 }, { "epoch": 9.22, "grad_norm": 1.5629770755767822, "learning_rate": 2.8058007344580027e-05, "loss": 1.3132, "step": 30825 }, { "epoch": 9.22, "grad_norm": 4.092102527618408, "learning_rate": 2.805217616836005e-05, "loss": 1.1355, "step": 30830 }, { "epoch": 9.23, "grad_norm": 2.19754695892334, "learning_rate": 2.8046344823571557e-05, "loss": 1.0562, "step": 30835 }, { "epoch": 9.23, "grad_norm": 2.126832962036133, "learning_rate": 2.804051331053661e-05, "loss": 1.169, "step": 30840 }, { "epoch": 9.23, "grad_norm": 3.9059386253356934, "learning_rate": 2.803468162957727e-05, "loss": 1.0877, "step": 30845 }, { "epoch": 9.23, "grad_norm": 3.600858449935913, "learning_rate": 2.8028849781015615e-05, "loss": 1.0115, "step": 30850 }, { "epoch": 9.23, "grad_norm": 3.020725965499878, "learning_rate": 2.802301776517374e-05, "loss": 0.9752, "step": 30855 }, { "epoch": 9.23, "grad_norm": 2.0996792316436768, "learning_rate": 2.801718558237374e-05, "loss": 1.253, "step": 30860 }, { "epoch": 9.23, "grad_norm": 1.7010042667388916, "learning_rate": 2.801135323293771e-05, "loss": 1.0858, "step": 30865 }, { "epoch": 9.24, "grad_norm": 2.4270615577697754, "learning_rate": 2.8005520717187773e-05, "loss": 0.7829, "step": 30870 }, { "epoch": 9.24, "grad_norm": 3.7250173091888428, "learning_rate": 2.7999688035446048e-05, "loss": 1.1342, "step": 30875 }, { "epoch": 9.24, "grad_norm": 2.1018354892730713, "learning_rate": 2.7993855188034674e-05, "loss": 1.086, "step": 30880 }, { "epoch": 9.24, "grad_norm": 1.520485758781433, "learning_rate": 2.7988022175275786e-05, "loss": 1.0099, "step": 30885 }, { "epoch": 9.24, "grad_norm": 2.133230447769165, "learning_rate": 2.7982188997491544e-05, "loss": 1.1996, "step": 30890 }, { "epoch": 9.24, "grad_norm": 3.0092363357543945, "learning_rate": 2.7976355655004096e-05, "loss": 1.1812, "step": 30895 }, { "epoch": 9.24, "grad_norm": 4.08493185043335, "learning_rate": 2.7970522148135626e-05, "loss": 1.1021, "step": 30900 }, { "epoch": 9.25, "grad_norm": 2.9970130920410156, "learning_rate": 2.79646884772083e-05, "loss": 1.0735, "step": 30905 }, { "epoch": 9.25, "grad_norm": 3.3271689414978027, "learning_rate": 2.795885464254431e-05, "loss": 1.053, "step": 30910 }, { "epoch": 9.25, "grad_norm": 1.4670406579971313, "learning_rate": 2.7953020644465865e-05, "loss": 1.0696, "step": 30915 }, { "epoch": 9.25, "grad_norm": 5.897749900817871, "learning_rate": 2.7947186483295157e-05, "loss": 1.0656, "step": 30920 }, { "epoch": 9.25, "grad_norm": 2.2055165767669678, "learning_rate": 2.7941352159354394e-05, "loss": 0.9848, "step": 30925 }, { "epoch": 9.25, "grad_norm": 2.664703845977783, "learning_rate": 2.7935517672965817e-05, "loss": 1.071, "step": 30930 }, { "epoch": 9.26, "grad_norm": 5.1785454750061035, "learning_rate": 2.792968302445164e-05, "loss": 1.1283, "step": 30935 }, { "epoch": 9.26, "grad_norm": 4.427746772766113, "learning_rate": 2.7923848214134123e-05, "loss": 1.0444, "step": 30940 }, { "epoch": 9.26, "grad_norm": 2.530972957611084, "learning_rate": 2.7918013242335504e-05, "loss": 1.0145, "step": 30945 }, { "epoch": 9.26, "grad_norm": 16.054180145263672, "learning_rate": 2.7912178109378056e-05, "loss": 1.2892, "step": 30950 }, { "epoch": 9.26, "grad_norm": 2.498807430267334, "learning_rate": 2.790634281558403e-05, "loss": 0.9976, "step": 30955 }, { "epoch": 9.26, "grad_norm": 1.9145476818084717, "learning_rate": 2.7900507361275714e-05, "loss": 0.938, "step": 30960 }, { "epoch": 9.26, "grad_norm": 1.1175459623336792, "learning_rate": 2.7894671746775386e-05, "loss": 1.1884, "step": 30965 }, { "epoch": 9.27, "grad_norm": 4.308173656463623, "learning_rate": 2.7888835972405352e-05, "loss": 1.1149, "step": 30970 }, { "epoch": 9.27, "grad_norm": 2.5797321796417236, "learning_rate": 2.7883000038487904e-05, "loss": 1.0525, "step": 30975 }, { "epoch": 9.27, "grad_norm": 2.3308804035186768, "learning_rate": 2.7877163945345368e-05, "loss": 1.168, "step": 30980 }, { "epoch": 9.27, "grad_norm": 2.186554193496704, "learning_rate": 2.7871327693300054e-05, "loss": 1.0804, "step": 30985 }, { "epoch": 9.27, "grad_norm": 7.420773029327393, "learning_rate": 2.7865491282674293e-05, "loss": 1.021, "step": 30990 }, { "epoch": 9.27, "grad_norm": 3.671011447906494, "learning_rate": 2.7859654713790434e-05, "loss": 1.0881, "step": 30995 }, { "epoch": 9.27, "grad_norm": 1.4102942943572998, "learning_rate": 2.7853817986970814e-05, "loss": 1.1006, "step": 31000 }, { "epoch": 9.28, "grad_norm": 4.502505779266357, "learning_rate": 2.784798110253779e-05, "loss": 1.0186, "step": 31005 }, { "epoch": 9.28, "grad_norm": 2.1751863956451416, "learning_rate": 2.7842144060813736e-05, "loss": 0.9848, "step": 31010 }, { "epoch": 9.28, "grad_norm": 2.5720980167388916, "learning_rate": 2.783630686212102e-05, "loss": 1.0289, "step": 31015 }, { "epoch": 9.28, "grad_norm": 1.7621455192565918, "learning_rate": 2.783046950678202e-05, "loss": 0.89, "step": 31020 }, { "epoch": 9.28, "grad_norm": 1.859261155128479, "learning_rate": 2.7824631995119134e-05, "loss": 1.1063, "step": 31025 }, { "epoch": 9.28, "grad_norm": 2.331822156906128, "learning_rate": 2.7818794327454757e-05, "loss": 1.0846, "step": 31030 }, { "epoch": 9.29, "grad_norm": 5.336060523986816, "learning_rate": 2.78129565041113e-05, "loss": 0.9446, "step": 31035 }, { "epoch": 9.29, "grad_norm": 1.7737152576446533, "learning_rate": 2.7807118525411176e-05, "loss": 1.0767, "step": 31040 }, { "epoch": 9.29, "grad_norm": 10.145687103271484, "learning_rate": 2.780128039167682e-05, "loss": 1.118, "step": 31045 }, { "epoch": 9.29, "grad_norm": 2.768529176712036, "learning_rate": 2.7795442103230656e-05, "loss": 1.0003, "step": 31050 }, { "epoch": 9.29, "grad_norm": 2.1712610721588135, "learning_rate": 2.778960366039513e-05, "loss": 1.0129, "step": 31055 }, { "epoch": 9.29, "grad_norm": 2.940528154373169, "learning_rate": 2.7783765063492696e-05, "loss": 1.077, "step": 31060 }, { "epoch": 9.29, "grad_norm": 2.5205068588256836, "learning_rate": 2.777792631284581e-05, "loss": 1.2037, "step": 31065 }, { "epoch": 9.3, "grad_norm": 1.8787987232208252, "learning_rate": 2.7772087408776937e-05, "loss": 1.1164, "step": 31070 }, { "epoch": 9.3, "grad_norm": 1.8113431930541992, "learning_rate": 2.776624835160856e-05, "loss": 1.1057, "step": 31075 }, { "epoch": 9.3, "grad_norm": 1.184775710105896, "learning_rate": 2.7760409141663164e-05, "loss": 1.1547, "step": 31080 }, { "epoch": 9.3, "grad_norm": 2.488152265548706, "learning_rate": 2.775456977926324e-05, "loss": 1.1285, "step": 31085 }, { "epoch": 9.3, "grad_norm": 1.7316298484802246, "learning_rate": 2.7748730264731287e-05, "loss": 1.0315, "step": 31090 }, { "epoch": 9.3, "grad_norm": 1.8663023710250854, "learning_rate": 2.774289059838982e-05, "loss": 1.0133, "step": 31095 }, { "epoch": 9.3, "grad_norm": 1.8005400896072388, "learning_rate": 2.7737050780561358e-05, "loss": 1.0123, "step": 31100 }, { "epoch": 9.31, "grad_norm": 2.0711119174957275, "learning_rate": 2.7731210811568427e-05, "loss": 1.0181, "step": 31105 }, { "epoch": 9.31, "grad_norm": 4.488975524902344, "learning_rate": 2.7725370691733565e-05, "loss": 0.9204, "step": 31110 }, { "epoch": 9.31, "grad_norm": 1.9340286254882812, "learning_rate": 2.7719530421379308e-05, "loss": 0.9195, "step": 31115 }, { "epoch": 9.31, "grad_norm": 1.3298678398132324, "learning_rate": 2.7713690000828213e-05, "loss": 1.09, "step": 31120 }, { "epoch": 9.31, "grad_norm": 1.4798712730407715, "learning_rate": 2.7707849430402838e-05, "loss": 1.0828, "step": 31125 }, { "epoch": 9.31, "grad_norm": 2.92453670501709, "learning_rate": 2.770200871042576e-05, "loss": 1.1759, "step": 31130 }, { "epoch": 9.32, "grad_norm": 2.3763322830200195, "learning_rate": 2.769616784121954e-05, "loss": 1.04, "step": 31135 }, { "epoch": 9.32, "grad_norm": 1.7251625061035156, "learning_rate": 2.7690326823106776e-05, "loss": 1.0022, "step": 31140 }, { "epoch": 9.32, "grad_norm": 2.365755796432495, "learning_rate": 2.768448565641007e-05, "loss": 1.0852, "step": 31145 }, { "epoch": 9.32, "grad_norm": 2.628757953643799, "learning_rate": 2.7678644341451998e-05, "loss": 1.0231, "step": 31150 }, { "epoch": 9.32, "grad_norm": 2.307605743408203, "learning_rate": 2.7672802878555186e-05, "loss": 1.1832, "step": 31155 }, { "epoch": 9.32, "grad_norm": 4.4019951820373535, "learning_rate": 2.7666961268042253e-05, "loss": 1.0704, "step": 31160 }, { "epoch": 9.32, "grad_norm": 1.9276536703109741, "learning_rate": 2.7661119510235816e-05, "loss": 1.1429, "step": 31165 }, { "epoch": 9.33, "grad_norm": 1.0725014209747314, "learning_rate": 2.7655277605458507e-05, "loss": 0.9747, "step": 31170 }, { "epoch": 9.33, "grad_norm": 2.2577459812164307, "learning_rate": 2.7649435554032994e-05, "loss": 1.1215, "step": 31175 }, { "epoch": 9.33, "grad_norm": 1.3800833225250244, "learning_rate": 2.76435933562819e-05, "loss": 1.2958, "step": 31180 }, { "epoch": 9.33, "grad_norm": 1.0564745664596558, "learning_rate": 2.763775101252789e-05, "loss": 1.0229, "step": 31185 }, { "epoch": 9.33, "grad_norm": 1.7594860792160034, "learning_rate": 2.763190852309364e-05, "loss": 1.041, "step": 31190 }, { "epoch": 9.33, "grad_norm": 1.3283685445785522, "learning_rate": 2.7626065888301816e-05, "loss": 1.1612, "step": 31195 }, { "epoch": 9.33, "grad_norm": 2.163010597229004, "learning_rate": 2.76202231084751e-05, "loss": 1.0172, "step": 31200 }, { "epoch": 9.34, "grad_norm": 0.8803406953811646, "learning_rate": 2.761438018393619e-05, "loss": 1.0381, "step": 31205 }, { "epoch": 9.34, "grad_norm": 2.269392728805542, "learning_rate": 2.7608537115007775e-05, "loss": 1.0224, "step": 31210 }, { "epoch": 9.34, "grad_norm": 3.6767170429229736, "learning_rate": 2.7602693902012572e-05, "loss": 1.1205, "step": 31215 }, { "epoch": 9.34, "grad_norm": 2.087529182434082, "learning_rate": 2.7596850545273286e-05, "loss": 1.0549, "step": 31220 }, { "epoch": 9.34, "grad_norm": 2.73809552192688, "learning_rate": 2.7591007045112642e-05, "loss": 1.0792, "step": 31225 }, { "epoch": 9.34, "grad_norm": 4.561300277709961, "learning_rate": 2.7585163401853376e-05, "loss": 1.1238, "step": 31230 }, { "epoch": 9.35, "grad_norm": 3.266984462738037, "learning_rate": 2.7579319615818215e-05, "loss": 1.0098, "step": 31235 }, { "epoch": 9.35, "grad_norm": 2.625706434249878, "learning_rate": 2.757347568732993e-05, "loss": 0.9742, "step": 31240 }, { "epoch": 9.35, "grad_norm": 1.4844180345535278, "learning_rate": 2.7567631616711243e-05, "loss": 1.1356, "step": 31245 }, { "epoch": 9.35, "grad_norm": 1.707905888557434, "learning_rate": 2.7561787404284928e-05, "loss": 0.9465, "step": 31250 }, { "epoch": 9.35, "grad_norm": 1.6472175121307373, "learning_rate": 2.755594305037376e-05, "loss": 1.1298, "step": 31255 }, { "epoch": 9.35, "grad_norm": 4.289102554321289, "learning_rate": 2.755009855530052e-05, "loss": 1.0665, "step": 31260 }, { "epoch": 9.35, "grad_norm": 1.6025971174240112, "learning_rate": 2.7544253919387987e-05, "loss": 0.968, "step": 31265 }, { "epoch": 9.36, "grad_norm": 3.026954412460327, "learning_rate": 2.7538409142958953e-05, "loss": 0.775, "step": 31270 }, { "epoch": 9.36, "grad_norm": 3.0979301929473877, "learning_rate": 2.7532564226336222e-05, "loss": 1.1642, "step": 31275 }, { "epoch": 9.36, "grad_norm": 3.6659834384918213, "learning_rate": 2.7526719169842602e-05, "loss": 1.1043, "step": 31280 }, { "epoch": 9.36, "grad_norm": 2.309248208999634, "learning_rate": 2.7520873973800903e-05, "loss": 1.0931, "step": 31285 }, { "epoch": 9.36, "grad_norm": 2.78249192237854, "learning_rate": 2.7515028638533958e-05, "loss": 1.0622, "step": 31290 }, { "epoch": 9.36, "grad_norm": 1.3780916929244995, "learning_rate": 2.7509183164364593e-05, "loss": 1.0036, "step": 31295 }, { "epoch": 9.36, "grad_norm": 2.205402135848999, "learning_rate": 2.7503337551615654e-05, "loss": 1.3497, "step": 31300 }, { "epoch": 9.37, "grad_norm": 3.5160036087036133, "learning_rate": 2.7497491800609986e-05, "loss": 1.2502, "step": 31305 }, { "epoch": 9.37, "grad_norm": 1.4284930229187012, "learning_rate": 2.7491645911670437e-05, "loss": 0.9721, "step": 31310 }, { "epoch": 9.37, "grad_norm": 5.789677143096924, "learning_rate": 2.748579988511988e-05, "loss": 1.1206, "step": 31315 }, { "epoch": 9.37, "grad_norm": 3.2626028060913086, "learning_rate": 2.747995372128117e-05, "loss": 1.0979, "step": 31320 }, { "epoch": 9.37, "grad_norm": 3.9523472785949707, "learning_rate": 2.7474107420477195e-05, "loss": 1.0172, "step": 31325 }, { "epoch": 9.37, "grad_norm": 3.8621017932891846, "learning_rate": 2.7468260983030846e-05, "loss": 1.1448, "step": 31330 }, { "epoch": 9.38, "grad_norm": 2.3181259632110596, "learning_rate": 2.7462414409265003e-05, "loss": 1.0566, "step": 31335 }, { "epoch": 9.38, "grad_norm": 1.018276572227478, "learning_rate": 2.7456567699502573e-05, "loss": 1.1421, "step": 31340 }, { "epoch": 9.38, "grad_norm": 2.900003671646118, "learning_rate": 2.7450720854066464e-05, "loss": 1.0946, "step": 31345 }, { "epoch": 9.38, "grad_norm": 1.6421140432357788, "learning_rate": 2.7444873873279586e-05, "loss": 1.1454, "step": 31350 }, { "epoch": 9.38, "grad_norm": 3.2035090923309326, "learning_rate": 2.743902675746487e-05, "loss": 1.1674, "step": 31355 }, { "epoch": 9.38, "grad_norm": 2.1225266456604004, "learning_rate": 2.743317950694524e-05, "loss": 1.0314, "step": 31360 }, { "epoch": 9.38, "grad_norm": 1.5846216678619385, "learning_rate": 2.742733212204363e-05, "loss": 0.9834, "step": 31365 }, { "epoch": 9.39, "grad_norm": 2.3945157527923584, "learning_rate": 2.7421484603083004e-05, "loss": 1.1503, "step": 31370 }, { "epoch": 9.39, "grad_norm": 1.944319725036621, "learning_rate": 2.7415636950386285e-05, "loss": 1.1397, "step": 31375 }, { "epoch": 9.39, "grad_norm": 3.456681728363037, "learning_rate": 2.7409789164276456e-05, "loss": 1.0885, "step": 31380 }, { "epoch": 9.39, "grad_norm": 2.284849166870117, "learning_rate": 2.740394124507647e-05, "loss": 1.0706, "step": 31385 }, { "epoch": 9.39, "grad_norm": 3.1083555221557617, "learning_rate": 2.7398093193109314e-05, "loss": 1.2338, "step": 31390 }, { "epoch": 9.39, "grad_norm": 2.024843454360962, "learning_rate": 2.739224500869796e-05, "loss": 1.2683, "step": 31395 }, { "epoch": 9.39, "grad_norm": 3.8661715984344482, "learning_rate": 2.7386396692165406e-05, "loss": 1.213, "step": 31400 }, { "epoch": 9.4, "grad_norm": 2.7120206356048584, "learning_rate": 2.738054824383464e-05, "loss": 1.0801, "step": 31405 }, { "epoch": 9.4, "grad_norm": 1.732399582862854, "learning_rate": 2.7374699664028668e-05, "loss": 1.2241, "step": 31410 }, { "epoch": 9.4, "grad_norm": 2.247511386871338, "learning_rate": 2.7368850953070503e-05, "loss": 1.0742, "step": 31415 }, { "epoch": 9.4, "grad_norm": 1.6583189964294434, "learning_rate": 2.736300211128316e-05, "loss": 1.0932, "step": 31420 }, { "epoch": 9.4, "grad_norm": 3.329725742340088, "learning_rate": 2.7357153138989668e-05, "loss": 1.0847, "step": 31425 }, { "epoch": 9.4, "grad_norm": 4.0160956382751465, "learning_rate": 2.735130403651306e-05, "loss": 1.026, "step": 31430 }, { "epoch": 9.4, "grad_norm": 4.307696342468262, "learning_rate": 2.734545480417637e-05, "loss": 0.9154, "step": 31435 }, { "epoch": 9.41, "grad_norm": 1.9788671731948853, "learning_rate": 2.733960544230265e-05, "loss": 1.0141, "step": 31440 }, { "epoch": 9.41, "grad_norm": 10.733818054199219, "learning_rate": 2.7333755951214957e-05, "loss": 0.9602, "step": 31445 }, { "epoch": 9.41, "grad_norm": 4.034037113189697, "learning_rate": 2.732790633123634e-05, "loss": 0.9853, "step": 31450 }, { "epoch": 9.41, "grad_norm": 4.451757431030273, "learning_rate": 2.7322056582689885e-05, "loss": 1.1026, "step": 31455 }, { "epoch": 9.41, "grad_norm": 5.899003028869629, "learning_rate": 2.7316206705898655e-05, "loss": 1.2884, "step": 31460 }, { "epoch": 9.41, "grad_norm": 2.728759765625, "learning_rate": 2.7310356701185747e-05, "loss": 1.0355, "step": 31465 }, { "epoch": 9.42, "grad_norm": 1.9716694355010986, "learning_rate": 2.7304506568874228e-05, "loss": 1.1145, "step": 31470 }, { "epoch": 9.42, "grad_norm": 2.966343641281128, "learning_rate": 2.7298656309287206e-05, "loss": 1.2393, "step": 31475 }, { "epoch": 9.42, "grad_norm": 3.9974021911621094, "learning_rate": 2.7292805922747787e-05, "loss": 1.08, "step": 31480 }, { "epoch": 9.42, "grad_norm": 2.1800546646118164, "learning_rate": 2.7286955409579084e-05, "loss": 1.0767, "step": 31485 }, { "epoch": 9.42, "grad_norm": 7.327594757080078, "learning_rate": 2.7281104770104205e-05, "loss": 1.0415, "step": 31490 }, { "epoch": 9.42, "grad_norm": 1.3344082832336426, "learning_rate": 2.7275254004646284e-05, "loss": 1.059, "step": 31495 }, { "epoch": 9.42, "grad_norm": 3.478764772415161, "learning_rate": 2.726940311352845e-05, "loss": 1.1981, "step": 31500 }, { "epoch": 9.43, "grad_norm": 0.9754401445388794, "learning_rate": 2.726355209707384e-05, "loss": 1.0865, "step": 31505 }, { "epoch": 9.43, "grad_norm": 2.373671531677246, "learning_rate": 2.72577009556056e-05, "loss": 0.9644, "step": 31510 }, { "epoch": 9.43, "grad_norm": 2.6133816242218018, "learning_rate": 2.7251849689446885e-05, "loss": 1.0806, "step": 31515 }, { "epoch": 9.43, "grad_norm": 3.878574848175049, "learning_rate": 2.724599829892085e-05, "loss": 1.1225, "step": 31520 }, { "epoch": 9.43, "grad_norm": 3.734332799911499, "learning_rate": 2.724014678435067e-05, "loss": 1.121, "step": 31525 }, { "epoch": 9.43, "grad_norm": 8.514619827270508, "learning_rate": 2.7234295146059503e-05, "loss": 1.0401, "step": 31530 }, { "epoch": 9.43, "grad_norm": 1.05905282497406, "learning_rate": 2.722844338437054e-05, "loss": 0.9848, "step": 31535 }, { "epoch": 9.44, "grad_norm": 1.8391199111938477, "learning_rate": 2.7222591499606966e-05, "loss": 1.0086, "step": 31540 }, { "epoch": 9.44, "grad_norm": 2.663299560546875, "learning_rate": 2.721673949209197e-05, "loss": 1.134, "step": 31545 }, { "epoch": 9.44, "grad_norm": 2.3702192306518555, "learning_rate": 2.7210887362148755e-05, "loss": 1.1833, "step": 31550 }, { "epoch": 9.44, "grad_norm": 3.5295441150665283, "learning_rate": 2.7205035110100534e-05, "loss": 1.0104, "step": 31555 }, { "epoch": 9.44, "grad_norm": 6.249225616455078, "learning_rate": 2.7199182736270524e-05, "loss": 1.0269, "step": 31560 }, { "epoch": 9.44, "grad_norm": 3.3529887199401855, "learning_rate": 2.719333024098193e-05, "loss": 1.0667, "step": 31565 }, { "epoch": 9.45, "grad_norm": 2.5261456966400146, "learning_rate": 2.7187477624557982e-05, "loss": 0.9635, "step": 31570 }, { "epoch": 9.45, "grad_norm": 3.518795967102051, "learning_rate": 2.718162488732192e-05, "loss": 1.1624, "step": 31575 }, { "epoch": 9.45, "grad_norm": 8.467171669006348, "learning_rate": 2.7175772029596986e-05, "loss": 1.1869, "step": 31580 }, { "epoch": 9.45, "grad_norm": 3.2780685424804688, "learning_rate": 2.7169919051706422e-05, "loss": 1.0429, "step": 31585 }, { "epoch": 9.45, "grad_norm": 4.071305751800537, "learning_rate": 2.716406595397349e-05, "loss": 1.252, "step": 31590 }, { "epoch": 9.45, "grad_norm": 2.072431802749634, "learning_rate": 2.7158212736721444e-05, "loss": 1.0693, "step": 31595 }, { "epoch": 9.45, "grad_norm": 6.514179229736328, "learning_rate": 2.7152359400273546e-05, "loss": 1.147, "step": 31600 }, { "epoch": 9.46, "grad_norm": 3.2777328491210938, "learning_rate": 2.7146505944953075e-05, "loss": 0.9809, "step": 31605 }, { "epoch": 9.46, "grad_norm": 2.1688232421875, "learning_rate": 2.714065237108332e-05, "loss": 1.2513, "step": 31610 }, { "epoch": 9.46, "grad_norm": 3.6384012699127197, "learning_rate": 2.7134798678987544e-05, "loss": 1.1978, "step": 31615 }, { "epoch": 9.46, "grad_norm": 1.3136318922042847, "learning_rate": 2.712894486898907e-05, "loss": 1.0106, "step": 31620 }, { "epoch": 9.46, "grad_norm": 1.3264063596725464, "learning_rate": 2.7123090941411185e-05, "loss": 1.1233, "step": 31625 }, { "epoch": 9.46, "grad_norm": 1.84585440158844, "learning_rate": 2.7117236896577185e-05, "loss": 1.0608, "step": 31630 }, { "epoch": 9.46, "grad_norm": 2.1590003967285156, "learning_rate": 2.71113827348104e-05, "loss": 1.1022, "step": 31635 }, { "epoch": 9.47, "grad_norm": 5.31462287902832, "learning_rate": 2.7105528456434136e-05, "loss": 1.0598, "step": 31640 }, { "epoch": 9.47, "grad_norm": 2.746979236602783, "learning_rate": 2.7099674061771724e-05, "loss": 1.116, "step": 31645 }, { "epoch": 9.47, "grad_norm": 2.339494466781616, "learning_rate": 2.70938195511465e-05, "loss": 1.0503, "step": 31650 }, { "epoch": 9.47, "grad_norm": 10.379657745361328, "learning_rate": 2.708796492488179e-05, "loss": 1.2657, "step": 31655 }, { "epoch": 9.47, "grad_norm": 1.8636327981948853, "learning_rate": 2.7082110183300962e-05, "loss": 1.0569, "step": 31660 }, { "epoch": 9.47, "grad_norm": 3.62514328956604, "learning_rate": 2.7076255326727344e-05, "loss": 1.1566, "step": 31665 }, { "epoch": 9.48, "grad_norm": 1.6103851795196533, "learning_rate": 2.7070400355484306e-05, "loss": 1.2291, "step": 31670 }, { "epoch": 9.48, "grad_norm": 38.40741729736328, "learning_rate": 2.70645452698952e-05, "loss": 1.0219, "step": 31675 }, { "epoch": 9.48, "grad_norm": 1.3236831426620483, "learning_rate": 2.705869007028341e-05, "loss": 1.1879, "step": 31680 }, { "epoch": 9.48, "grad_norm": 2.328878402709961, "learning_rate": 2.7052834756972307e-05, "loss": 0.949, "step": 31685 }, { "epoch": 9.48, "grad_norm": 2.4937305450439453, "learning_rate": 2.704697933028528e-05, "loss": 1.2623, "step": 31690 }, { "epoch": 9.48, "grad_norm": 1.3484976291656494, "learning_rate": 2.7041123790545703e-05, "loss": 0.9774, "step": 31695 }, { "epoch": 9.48, "grad_norm": 2.5267484188079834, "learning_rate": 2.703526813807698e-05, "loss": 1.0935, "step": 31700 }, { "epoch": 9.49, "grad_norm": 1.9129496812820435, "learning_rate": 2.702941237320252e-05, "loss": 1.0674, "step": 31705 }, { "epoch": 9.49, "grad_norm": 2.022209882736206, "learning_rate": 2.702355649624572e-05, "loss": 1.1119, "step": 31710 }, { "epoch": 9.49, "grad_norm": 5.069640159606934, "learning_rate": 2.7017700507529996e-05, "loss": 1.1296, "step": 31715 }, { "epoch": 9.49, "grad_norm": 3.870335102081299, "learning_rate": 2.7011844407378776e-05, "loss": 1.2017, "step": 31720 }, { "epoch": 9.49, "grad_norm": 1.1030346155166626, "learning_rate": 2.7005988196115482e-05, "loss": 1.1276, "step": 31725 }, { "epoch": 9.49, "grad_norm": 1.269315481185913, "learning_rate": 2.7000131874063545e-05, "loss": 1.1485, "step": 31730 }, { "epoch": 9.49, "grad_norm": 6.841867923736572, "learning_rate": 2.69942754415464e-05, "loss": 0.9942, "step": 31735 }, { "epoch": 9.5, "grad_norm": 2.453371524810791, "learning_rate": 2.6988418898887498e-05, "loss": 1.0962, "step": 31740 }, { "epoch": 9.5, "grad_norm": 4.395910263061523, "learning_rate": 2.698256224641028e-05, "loss": 1.0983, "step": 31745 }, { "epoch": 9.5, "grad_norm": 3.848184108734131, "learning_rate": 2.6976705484438213e-05, "loss": 1.0488, "step": 31750 }, { "epoch": 9.5, "grad_norm": 3.335904121398926, "learning_rate": 2.6970848613294765e-05, "loss": 1.2326, "step": 31755 }, { "epoch": 9.5, "grad_norm": 6.4267706871032715, "learning_rate": 2.696499163330339e-05, "loss": 1.0299, "step": 31760 }, { "epoch": 9.5, "grad_norm": 2.3225696086883545, "learning_rate": 2.6959134544787567e-05, "loss": 1.1295, "step": 31765 }, { "epoch": 9.51, "grad_norm": 4.162220478057861, "learning_rate": 2.6953277348070783e-05, "loss": 1.0728, "step": 31770 }, { "epoch": 9.51, "grad_norm": 5.359671592712402, "learning_rate": 2.6947420043476524e-05, "loss": 1.186, "step": 31775 }, { "epoch": 9.51, "grad_norm": 2.6894781589508057, "learning_rate": 2.6941562631328278e-05, "loss": 0.9445, "step": 31780 }, { "epoch": 9.51, "grad_norm": 30.432933807373047, "learning_rate": 2.693570511194954e-05, "loss": 1.1887, "step": 31785 }, { "epoch": 9.51, "grad_norm": 4.67871618270874, "learning_rate": 2.6929847485663823e-05, "loss": 0.9568, "step": 31790 }, { "epoch": 9.51, "grad_norm": 5.078667163848877, "learning_rate": 2.6923989752794638e-05, "loss": 1.0335, "step": 31795 }, { "epoch": 9.51, "grad_norm": 3.381072521209717, "learning_rate": 2.69181319136655e-05, "loss": 1.1649, "step": 31800 }, { "epoch": 9.52, "grad_norm": 3.0394537448883057, "learning_rate": 2.6912273968599928e-05, "loss": 1.1632, "step": 31805 }, { "epoch": 9.52, "grad_norm": 3.1796021461486816, "learning_rate": 2.690641591792145e-05, "loss": 1.2188, "step": 31810 }, { "epoch": 9.52, "grad_norm": 6.784097671508789, "learning_rate": 2.690055776195361e-05, "loss": 1.0647, "step": 31815 }, { "epoch": 9.52, "grad_norm": 5.862364768981934, "learning_rate": 2.6894699501019937e-05, "loss": 1.034, "step": 31820 }, { "epoch": 9.52, "grad_norm": 1.8071324825286865, "learning_rate": 2.688884113544398e-05, "loss": 1.0983, "step": 31825 }, { "epoch": 9.52, "grad_norm": 3.1565282344818115, "learning_rate": 2.6882982665549288e-05, "loss": 1.1485, "step": 31830 }, { "epoch": 9.52, "grad_norm": 2.5325186252593994, "learning_rate": 2.6877124091659424e-05, "loss": 1.1328, "step": 31835 }, { "epoch": 9.53, "grad_norm": 1.6840946674346924, "learning_rate": 2.6871265414097947e-05, "loss": 0.9854, "step": 31840 }, { "epoch": 9.53, "grad_norm": 1.4422705173492432, "learning_rate": 2.6865406633188423e-05, "loss": 1.0145, "step": 31845 }, { "epoch": 9.53, "grad_norm": 2.4371087551116943, "learning_rate": 2.6859547749254433e-05, "loss": 1.1726, "step": 31850 }, { "epoch": 9.53, "grad_norm": 3.1019036769866943, "learning_rate": 2.6853688762619555e-05, "loss": 1.1436, "step": 31855 }, { "epoch": 9.53, "grad_norm": 3.6679959297180176, "learning_rate": 2.6847829673607373e-05, "loss": 1.0577, "step": 31860 }, { "epoch": 9.53, "grad_norm": 3.406115770339966, "learning_rate": 2.6841970482541473e-05, "loss": 1.1801, "step": 31865 }, { "epoch": 9.54, "grad_norm": 2.184192180633545, "learning_rate": 2.6836111189745462e-05, "loss": 1.1725, "step": 31870 }, { "epoch": 9.54, "grad_norm": 2.7702200412750244, "learning_rate": 2.6830251795542938e-05, "loss": 1.061, "step": 31875 }, { "epoch": 9.54, "grad_norm": 1.0884158611297607, "learning_rate": 2.6824392300257505e-05, "loss": 0.953, "step": 31880 }, { "epoch": 9.54, "grad_norm": 1.4687933921813965, "learning_rate": 2.681853270421279e-05, "loss": 1.1745, "step": 31885 }, { "epoch": 9.54, "grad_norm": 1.536842703819275, "learning_rate": 2.68126730077324e-05, "loss": 0.9724, "step": 31890 }, { "epoch": 9.54, "grad_norm": 3.3644862174987793, "learning_rate": 2.6806813211139965e-05, "loss": 1.1599, "step": 31895 }, { "epoch": 9.54, "grad_norm": 2.4303081035614014, "learning_rate": 2.6800953314759108e-05, "loss": 1.2175, "step": 31900 }, { "epoch": 9.55, "grad_norm": 4.207813262939453, "learning_rate": 2.679509331891348e-05, "loss": 1.1489, "step": 31905 }, { "epoch": 9.55, "grad_norm": 4.013348579406738, "learning_rate": 2.6789233223926713e-05, "loss": 1.1652, "step": 31910 }, { "epoch": 9.55, "grad_norm": 4.057195663452148, "learning_rate": 2.6783373030122455e-05, "loss": 1.1245, "step": 31915 }, { "epoch": 9.55, "grad_norm": 1.9660584926605225, "learning_rate": 2.6777512737824358e-05, "loss": 1.2141, "step": 31920 }, { "epoch": 9.55, "grad_norm": 1.8502095937728882, "learning_rate": 2.6771652347356074e-05, "loss": 0.9919, "step": 31925 }, { "epoch": 9.55, "grad_norm": 1.4505739212036133, "learning_rate": 2.6765791859041278e-05, "loss": 1.0553, "step": 31930 }, { "epoch": 9.55, "grad_norm": 3.0813705921173096, "learning_rate": 2.6759931273203632e-05, "loss": 0.9613, "step": 31935 }, { "epoch": 9.56, "grad_norm": 1.6187719106674194, "learning_rate": 2.6754070590166808e-05, "loss": 1.1784, "step": 31940 }, { "epoch": 9.56, "grad_norm": 5.102146148681641, "learning_rate": 2.6748209810254493e-05, "loss": 1.1304, "step": 31945 }, { "epoch": 9.56, "grad_norm": 2.7340807914733887, "learning_rate": 2.674234893379037e-05, "loss": 1.0532, "step": 31950 }, { "epoch": 9.56, "grad_norm": 3.3203606605529785, "learning_rate": 2.6736487961098122e-05, "loss": 1.0749, "step": 31955 }, { "epoch": 9.56, "grad_norm": 2.275956153869629, "learning_rate": 2.6730626892501448e-05, "loss": 1.0954, "step": 31960 }, { "epoch": 9.56, "grad_norm": 1.4483486413955688, "learning_rate": 2.6724765728324054e-05, "loss": 1.1644, "step": 31965 }, { "epoch": 9.57, "grad_norm": 2.504141330718994, "learning_rate": 2.6718904468889633e-05, "loss": 1.299, "step": 31970 }, { "epoch": 9.57, "grad_norm": 2.5018177032470703, "learning_rate": 2.671304311452191e-05, "loss": 1.1193, "step": 31975 }, { "epoch": 9.57, "grad_norm": 3.87642502784729, "learning_rate": 2.6707181665544594e-05, "loss": 1.0883, "step": 31980 }, { "epoch": 9.57, "grad_norm": 4.660168647766113, "learning_rate": 2.6701320122281416e-05, "loss": 1.0395, "step": 31985 }, { "epoch": 9.57, "grad_norm": 2.28615665435791, "learning_rate": 2.669545848505609e-05, "loss": 1.1664, "step": 31990 }, { "epoch": 9.57, "grad_norm": 4.775680065155029, "learning_rate": 2.6689596754192355e-05, "loss": 1.2104, "step": 31995 }, { "epoch": 9.57, "grad_norm": 1.285249948501587, "learning_rate": 2.668373493001395e-05, "loss": 1.1832, "step": 32000 }, { "epoch": 9.58, "grad_norm": 4.368350982666016, "learning_rate": 2.667787301284461e-05, "loss": 1.1155, "step": 32005 }, { "epoch": 9.58, "grad_norm": 1.7169698476791382, "learning_rate": 2.667201100300809e-05, "loss": 0.9742, "step": 32010 }, { "epoch": 9.58, "grad_norm": 2.3918094635009766, "learning_rate": 2.666614890082815e-05, "loss": 1.067, "step": 32015 }, { "epoch": 9.58, "grad_norm": 3.565277576446533, "learning_rate": 2.666028670662853e-05, "loss": 1.0911, "step": 32020 }, { "epoch": 9.58, "grad_norm": 4.275758743286133, "learning_rate": 2.6654424420732997e-05, "loss": 0.998, "step": 32025 }, { "epoch": 9.58, "grad_norm": 2.591794490814209, "learning_rate": 2.6648562043465323e-05, "loss": 1.111, "step": 32030 }, { "epoch": 9.58, "grad_norm": 5.0382561683654785, "learning_rate": 2.664269957514929e-05, "loss": 1.1469, "step": 32035 }, { "epoch": 9.59, "grad_norm": 2.2757861614227295, "learning_rate": 2.6636837016108656e-05, "loss": 1.0449, "step": 32040 }, { "epoch": 9.59, "grad_norm": 2.4247963428497314, "learning_rate": 2.6630974366667226e-05, "loss": 1.166, "step": 32045 }, { "epoch": 9.59, "grad_norm": 1.7559020519256592, "learning_rate": 2.6625111627148768e-05, "loss": 1.1379, "step": 32050 }, { "epoch": 9.59, "grad_norm": 2.4389190673828125, "learning_rate": 2.661924879787709e-05, "loss": 1.0711, "step": 32055 }, { "epoch": 9.59, "grad_norm": 2.227290630340576, "learning_rate": 2.661338587917597e-05, "loss": 1.0021, "step": 32060 }, { "epoch": 9.59, "grad_norm": 1.5972774028778076, "learning_rate": 2.660752287136924e-05, "loss": 1.0343, "step": 32065 }, { "epoch": 9.59, "grad_norm": 7.264094829559326, "learning_rate": 2.6601659774780692e-05, "loss": 1.0336, "step": 32070 }, { "epoch": 9.6, "grad_norm": 1.8394643068313599, "learning_rate": 2.6595796589734136e-05, "loss": 1.0561, "step": 32075 }, { "epoch": 9.6, "grad_norm": 2.3896703720092773, "learning_rate": 2.6589933316553395e-05, "loss": 1.2167, "step": 32080 }, { "epoch": 9.6, "grad_norm": 1.8738449811935425, "learning_rate": 2.6584069955562286e-05, "loss": 1.1215, "step": 32085 }, { "epoch": 9.6, "grad_norm": 1.6844857931137085, "learning_rate": 2.657820650708464e-05, "loss": 1.1444, "step": 32090 }, { "epoch": 9.6, "grad_norm": 6.457211494445801, "learning_rate": 2.657234297144429e-05, "loss": 0.9628, "step": 32095 }, { "epoch": 9.6, "grad_norm": 3.831761121749878, "learning_rate": 2.656647934896508e-05, "loss": 0.8621, "step": 32100 }, { "epoch": 9.61, "grad_norm": 2.6082820892333984, "learning_rate": 2.6560615639970833e-05, "loss": 1.1312, "step": 32105 }, { "epoch": 9.61, "grad_norm": 1.152781367301941, "learning_rate": 2.6554751844785414e-05, "loss": 1.1762, "step": 32110 }, { "epoch": 9.61, "grad_norm": 1.7581003904342651, "learning_rate": 2.654888796373266e-05, "loss": 0.9553, "step": 32115 }, { "epoch": 9.61, "grad_norm": 3.2171480655670166, "learning_rate": 2.654302399713644e-05, "loss": 1.0941, "step": 32120 }, { "epoch": 9.61, "grad_norm": 1.9642105102539062, "learning_rate": 2.6537159945320606e-05, "loss": 1.1688, "step": 32125 }, { "epoch": 9.61, "grad_norm": 9.561652183532715, "learning_rate": 2.6531295808609023e-05, "loss": 0.9819, "step": 32130 }, { "epoch": 9.61, "grad_norm": 7.757037162780762, "learning_rate": 2.6525431587325568e-05, "loss": 1.0823, "step": 32135 }, { "epoch": 9.62, "grad_norm": 4.364965438842773, "learning_rate": 2.6519567281794105e-05, "loss": 1.008, "step": 32140 }, { "epoch": 9.62, "grad_norm": 5.253482818603516, "learning_rate": 2.6513702892338526e-05, "loss": 0.9406, "step": 32145 }, { "epoch": 9.62, "grad_norm": 3.2114205360412598, "learning_rate": 2.650783841928271e-05, "loss": 0.9781, "step": 32150 }, { "epoch": 9.62, "grad_norm": 5.03977108001709, "learning_rate": 2.6501973862950547e-05, "loss": 1.1507, "step": 32155 }, { "epoch": 9.62, "grad_norm": 2.5563600063323975, "learning_rate": 2.6496109223665928e-05, "loss": 1.0864, "step": 32160 }, { "epoch": 9.62, "grad_norm": 3.900869369506836, "learning_rate": 2.649024450175275e-05, "loss": 1.0983, "step": 32165 }, { "epoch": 9.62, "grad_norm": 1.9279170036315918, "learning_rate": 2.648437969753491e-05, "loss": 1.0866, "step": 32170 }, { "epoch": 9.63, "grad_norm": 5.001946926116943, "learning_rate": 2.6478514811336342e-05, "loss": 1.0995, "step": 32175 }, { "epoch": 9.63, "grad_norm": 3.2745273113250732, "learning_rate": 2.6472649843480923e-05, "loss": 1.1328, "step": 32180 }, { "epoch": 9.63, "grad_norm": 5.206939697265625, "learning_rate": 2.6466784794292588e-05, "loss": 1.1132, "step": 32185 }, { "epoch": 9.63, "grad_norm": 2.813488483428955, "learning_rate": 2.6460919664095245e-05, "loss": 1.0479, "step": 32190 }, { "epoch": 9.63, "grad_norm": 3.6982460021972656, "learning_rate": 2.6455054453212837e-05, "loss": 1.0327, "step": 32195 }, { "epoch": 9.63, "grad_norm": 1.1332085132598877, "learning_rate": 2.644918916196928e-05, "loss": 1.0878, "step": 32200 }, { "epoch": 9.64, "grad_norm": 2.1422863006591797, "learning_rate": 2.6443323790688517e-05, "loss": 1.0685, "step": 32205 }, { "epoch": 9.64, "grad_norm": 4.674763202667236, "learning_rate": 2.6437458339694483e-05, "loss": 0.8133, "step": 32210 }, { "epoch": 9.64, "grad_norm": 1.626625657081604, "learning_rate": 2.6431592809311112e-05, "loss": 1.2528, "step": 32215 }, { "epoch": 9.64, "grad_norm": 1.6559752225875854, "learning_rate": 2.642572719986236e-05, "loss": 1.0107, "step": 32220 }, { "epoch": 9.64, "grad_norm": 3.3808319568634033, "learning_rate": 2.6419861511672174e-05, "loss": 1.2791, "step": 32225 }, { "epoch": 9.64, "grad_norm": 2.20304274559021, "learning_rate": 2.6413995745064513e-05, "loss": 1.0505, "step": 32230 }, { "epoch": 9.64, "grad_norm": 3.014941692352295, "learning_rate": 2.6408129900363342e-05, "loss": 1.0684, "step": 32235 }, { "epoch": 9.65, "grad_norm": 2.0386364459991455, "learning_rate": 2.6402263977892617e-05, "loss": 0.9374, "step": 32240 }, { "epoch": 9.65, "grad_norm": 2.927957057952881, "learning_rate": 2.6396397977976305e-05, "loss": 0.95, "step": 32245 }, { "epoch": 9.65, "grad_norm": 2.662858724594116, "learning_rate": 2.639053190093839e-05, "loss": 1.0665, "step": 32250 }, { "epoch": 9.65, "grad_norm": 2.204014539718628, "learning_rate": 2.6384665747102842e-05, "loss": 1.0675, "step": 32255 }, { "epoch": 9.65, "grad_norm": 3.048424005508423, "learning_rate": 2.6378799516793645e-05, "loss": 1.1372, "step": 32260 }, { "epoch": 9.65, "grad_norm": 2.034694194793701, "learning_rate": 2.6372933210334788e-05, "loss": 1.1936, "step": 32265 }, { "epoch": 9.65, "grad_norm": 1.9877517223358154, "learning_rate": 2.636706682805026e-05, "loss": 1.129, "step": 32270 }, { "epoch": 9.66, "grad_norm": 1.1743559837341309, "learning_rate": 2.636120037026404e-05, "loss": 1.1884, "step": 32275 }, { "epoch": 9.66, "grad_norm": 3.967900514602661, "learning_rate": 2.6355333837300144e-05, "loss": 1.1177, "step": 32280 }, { "epoch": 9.66, "grad_norm": 2.5938291549682617, "learning_rate": 2.6349467229482566e-05, "loss": 1.1809, "step": 32285 }, { "epoch": 9.66, "grad_norm": 3.2662594318389893, "learning_rate": 2.6343600547135318e-05, "loss": 1.0683, "step": 32290 }, { "epoch": 9.66, "grad_norm": 2.3414182662963867, "learning_rate": 2.633773379058241e-05, "loss": 1.2112, "step": 32295 }, { "epoch": 9.66, "grad_norm": 2.2585291862487793, "learning_rate": 2.633186696014785e-05, "loss": 1.1298, "step": 32300 }, { "epoch": 9.67, "grad_norm": 1.8855080604553223, "learning_rate": 2.632600005615567e-05, "loss": 1.0465, "step": 32305 }, { "epoch": 9.67, "grad_norm": 1.7714307308197021, "learning_rate": 2.632013307892988e-05, "loss": 1.0813, "step": 32310 }, { "epoch": 9.67, "grad_norm": 1.7894978523254395, "learning_rate": 2.6314266028794516e-05, "loss": 1.0873, "step": 32315 }, { "epoch": 9.67, "grad_norm": 2.007699489593506, "learning_rate": 2.63083989060736e-05, "loss": 1.0634, "step": 32320 }, { "epoch": 9.67, "grad_norm": 4.4481892585754395, "learning_rate": 2.630253171109118e-05, "loss": 1.1221, "step": 32325 }, { "epoch": 9.67, "grad_norm": 2.6404707431793213, "learning_rate": 2.6296664444171277e-05, "loss": 1.0629, "step": 32330 }, { "epoch": 9.67, "grad_norm": 4.706933975219727, "learning_rate": 2.629079710563795e-05, "loss": 0.9293, "step": 32335 }, { "epoch": 9.68, "grad_norm": 3.423985242843628, "learning_rate": 2.628492969581524e-05, "loss": 1.1948, "step": 32340 }, { "epoch": 9.68, "grad_norm": 3.1933882236480713, "learning_rate": 2.62790622150272e-05, "loss": 0.9721, "step": 32345 }, { "epoch": 9.68, "grad_norm": 2.481731414794922, "learning_rate": 2.6273194663597877e-05, "loss": 1.0354, "step": 32350 }, { "epoch": 9.68, "grad_norm": 2.2168052196502686, "learning_rate": 2.626732704185134e-05, "loss": 1.0921, "step": 32355 }, { "epoch": 9.68, "grad_norm": 1.8711326122283936, "learning_rate": 2.626145935011165e-05, "loss": 1.2218, "step": 32360 }, { "epoch": 9.68, "grad_norm": 5.039557456970215, "learning_rate": 2.6255591588702872e-05, "loss": 1.0336, "step": 32365 }, { "epoch": 9.68, "grad_norm": 2.2609169483184814, "learning_rate": 2.624972375794908e-05, "loss": 1.2382, "step": 32370 }, { "epoch": 9.69, "grad_norm": 3.6978182792663574, "learning_rate": 2.624385585817434e-05, "loss": 1.16, "step": 32375 }, { "epoch": 9.69, "grad_norm": 1.6256968975067139, "learning_rate": 2.623798788970273e-05, "loss": 1.1525, "step": 32380 }, { "epoch": 9.69, "grad_norm": 2.7560393810272217, "learning_rate": 2.623211985285834e-05, "loss": 1.0104, "step": 32385 }, { "epoch": 9.69, "grad_norm": 2.977550506591797, "learning_rate": 2.6226251747965247e-05, "loss": 1.093, "step": 32390 }, { "epoch": 9.69, "grad_norm": 2.2501907348632812, "learning_rate": 2.6220383575347547e-05, "loss": 1.1158, "step": 32395 }, { "epoch": 9.69, "grad_norm": 1.943388819694519, "learning_rate": 2.621451533532933e-05, "loss": 1.1113, "step": 32400 }, { "epoch": 9.7, "grad_norm": 3.2862977981567383, "learning_rate": 2.6208647028234695e-05, "loss": 1.1408, "step": 32405 }, { "epoch": 9.7, "grad_norm": 2.2214648723602295, "learning_rate": 2.6202778654387737e-05, "loss": 0.9565, "step": 32410 }, { "epoch": 9.7, "grad_norm": 2.63506817817688, "learning_rate": 2.619691021411257e-05, "loss": 1.1946, "step": 32415 }, { "epoch": 9.7, "grad_norm": 2.0027542114257812, "learning_rate": 2.6191041707733293e-05, "loss": 0.9843, "step": 32420 }, { "epoch": 9.7, "grad_norm": 3.6025943756103516, "learning_rate": 2.618517313557402e-05, "loss": 0.9494, "step": 32425 }, { "epoch": 9.7, "grad_norm": 3.030153751373291, "learning_rate": 2.6179304497958855e-05, "loss": 1.059, "step": 32430 }, { "epoch": 9.7, "grad_norm": 1.995133399963379, "learning_rate": 2.6173435795211947e-05, "loss": 0.9847, "step": 32435 }, { "epoch": 9.71, "grad_norm": 2.5811808109283447, "learning_rate": 2.6167567027657397e-05, "loss": 1.0383, "step": 32440 }, { "epoch": 9.71, "grad_norm": 3.842437982559204, "learning_rate": 2.6161698195619327e-05, "loss": 1.0719, "step": 32445 }, { "epoch": 9.71, "grad_norm": 5.087784290313721, "learning_rate": 2.6155829299421875e-05, "loss": 1.1697, "step": 32450 }, { "epoch": 9.71, "grad_norm": 3.2515745162963867, "learning_rate": 2.6149960339389174e-05, "loss": 1.0113, "step": 32455 }, { "epoch": 9.71, "grad_norm": 2.410245656967163, "learning_rate": 2.6144091315845353e-05, "loss": 1.1748, "step": 32460 }, { "epoch": 9.71, "grad_norm": 2.392496347427368, "learning_rate": 2.6138222229114572e-05, "loss": 1.1732, "step": 32465 }, { "epoch": 9.71, "grad_norm": 2.2038094997406006, "learning_rate": 2.613235307952095e-05, "loss": 0.9646, "step": 32470 }, { "epoch": 9.72, "grad_norm": 1.9154731035232544, "learning_rate": 2.6126483867388645e-05, "loss": 1.2406, "step": 32475 }, { "epoch": 9.72, "grad_norm": 1.628433108329773, "learning_rate": 2.612061459304181e-05, "loss": 1.1421, "step": 32480 }, { "epoch": 9.72, "grad_norm": 1.9438600540161133, "learning_rate": 2.611474525680459e-05, "loss": 1.0932, "step": 32485 }, { "epoch": 9.72, "grad_norm": 5.289341926574707, "learning_rate": 2.6108875859001152e-05, "loss": 0.9074, "step": 32490 }, { "epoch": 9.72, "grad_norm": 1.4697092771530151, "learning_rate": 2.610300639995565e-05, "loss": 1.178, "step": 32495 }, { "epoch": 9.72, "grad_norm": 2.8310253620147705, "learning_rate": 2.6097136879992256e-05, "loss": 1.0798, "step": 32500 }, { "epoch": 9.73, "grad_norm": 4.534608840942383, "learning_rate": 2.609126729943513e-05, "loss": 1.1111, "step": 32505 }, { "epoch": 9.73, "grad_norm": 4.378732681274414, "learning_rate": 2.608539765860844e-05, "loss": 1.1557, "step": 32510 }, { "epoch": 9.73, "grad_norm": 2.2998828887939453, "learning_rate": 2.607952795783637e-05, "loss": 1.0735, "step": 32515 }, { "epoch": 9.73, "grad_norm": 2.0673623085021973, "learning_rate": 2.6073658197443095e-05, "loss": 1.0948, "step": 32520 }, { "epoch": 9.73, "grad_norm": 2.536200761795044, "learning_rate": 2.6067788377752793e-05, "loss": 1.3438, "step": 32525 }, { "epoch": 9.73, "grad_norm": 1.4531900882720947, "learning_rate": 2.6061918499089656e-05, "loss": 0.9371, "step": 32530 }, { "epoch": 9.73, "grad_norm": 4.1081342697143555, "learning_rate": 2.6056048561777852e-05, "loss": 1.1155, "step": 32535 }, { "epoch": 9.74, "grad_norm": 8.0404691696167, "learning_rate": 2.6050178566141585e-05, "loss": 1.1525, "step": 32540 }, { "epoch": 9.74, "grad_norm": 2.6150429248809814, "learning_rate": 2.6044308512505056e-05, "loss": 1.1611, "step": 32545 }, { "epoch": 9.74, "grad_norm": 6.387349605560303, "learning_rate": 2.6038438401192444e-05, "loss": 1.0697, "step": 32550 }, { "epoch": 9.74, "grad_norm": 3.0312044620513916, "learning_rate": 2.6032568232527964e-05, "loss": 1.0854, "step": 32555 }, { "epoch": 9.74, "grad_norm": 1.371320366859436, "learning_rate": 2.6026698006835814e-05, "loss": 1.0813, "step": 32560 }, { "epoch": 9.74, "grad_norm": 2.4062769412994385, "learning_rate": 2.60208277244402e-05, "loss": 1.0397, "step": 32565 }, { "epoch": 9.74, "grad_norm": 2.3554258346557617, "learning_rate": 2.601495738566533e-05, "loss": 1.1501, "step": 32570 }, { "epoch": 9.75, "grad_norm": 2.7629899978637695, "learning_rate": 2.6009086990835418e-05, "loss": 1.039, "step": 32575 }, { "epoch": 9.75, "grad_norm": 1.9334179162979126, "learning_rate": 2.6003216540274682e-05, "loss": 1.1206, "step": 32580 }, { "epoch": 9.75, "grad_norm": 2.248354434967041, "learning_rate": 2.5997346034307337e-05, "loss": 1.0189, "step": 32585 }, { "epoch": 9.75, "grad_norm": 2.1622071266174316, "learning_rate": 2.5991475473257608e-05, "loss": 1.0698, "step": 32590 }, { "epoch": 9.75, "grad_norm": 1.745422601699829, "learning_rate": 2.598560485744972e-05, "loss": 0.8944, "step": 32595 }, { "epoch": 9.75, "grad_norm": 2.3082125186920166, "learning_rate": 2.59797341872079e-05, "loss": 1.0338, "step": 32600 }, { "epoch": 9.76, "grad_norm": 3.817018985748291, "learning_rate": 2.5973863462856378e-05, "loss": 0.965, "step": 32605 }, { "epoch": 9.76, "grad_norm": 2.986459493637085, "learning_rate": 2.596799268471939e-05, "loss": 1.1374, "step": 32610 }, { "epoch": 9.76, "grad_norm": 4.016047477722168, "learning_rate": 2.5962121853121174e-05, "loss": 1.047, "step": 32615 }, { "epoch": 9.76, "grad_norm": 3.042452812194824, "learning_rate": 2.5956250968385966e-05, "loss": 0.9784, "step": 32620 }, { "epoch": 9.76, "grad_norm": 2.1322600841522217, "learning_rate": 2.5950380030838017e-05, "loss": 1.0133, "step": 32625 }, { "epoch": 9.76, "grad_norm": 2.257575511932373, "learning_rate": 2.5944509040801564e-05, "loss": 1.0372, "step": 32630 }, { "epoch": 9.76, "grad_norm": 2.1339566707611084, "learning_rate": 2.593863799860085e-05, "loss": 1.0806, "step": 32635 }, { "epoch": 9.77, "grad_norm": 2.67110538482666, "learning_rate": 2.593276690456014e-05, "loss": 1.0397, "step": 32640 }, { "epoch": 9.77, "grad_norm": 4.476059913635254, "learning_rate": 2.592689575900369e-05, "loss": 1.0341, "step": 32645 }, { "epoch": 9.77, "grad_norm": 3.7582499980926514, "learning_rate": 2.592102456225574e-05, "loss": 0.9378, "step": 32650 }, { "epoch": 9.77, "grad_norm": 2.036689043045044, "learning_rate": 2.5915153314640566e-05, "loss": 1.0847, "step": 32655 }, { "epoch": 9.77, "grad_norm": 8.208381652832031, "learning_rate": 2.5909282016482435e-05, "loss": 0.9298, "step": 32660 }, { "epoch": 9.77, "grad_norm": 2.9294674396514893, "learning_rate": 2.5903410668105586e-05, "loss": 1.1432, "step": 32665 }, { "epoch": 9.77, "grad_norm": 1.4900659322738647, "learning_rate": 2.5897539269834313e-05, "loss": 1.0694, "step": 32670 }, { "epoch": 9.78, "grad_norm": 7.3784990310668945, "learning_rate": 2.5891667821992883e-05, "loss": 1.0586, "step": 32675 }, { "epoch": 9.78, "grad_norm": 1.4126672744750977, "learning_rate": 2.588579632490556e-05, "loss": 1.0634, "step": 32680 }, { "epoch": 9.78, "grad_norm": 2.6837716102600098, "learning_rate": 2.587992477889663e-05, "loss": 1.0635, "step": 32685 }, { "epoch": 9.78, "grad_norm": 1.9375698566436768, "learning_rate": 2.5874053184290366e-05, "loss": 1.1508, "step": 32690 }, { "epoch": 9.78, "grad_norm": 2.615001678466797, "learning_rate": 2.586818154141105e-05, "loss": 1.2275, "step": 32695 }, { "epoch": 9.78, "grad_norm": 2.7543222904205322, "learning_rate": 2.5862309850582977e-05, "loss": 1.072, "step": 32700 }, { "epoch": 9.78, "grad_norm": 1.6145623922348022, "learning_rate": 2.5856438112130427e-05, "loss": 0.9575, "step": 32705 }, { "epoch": 9.79, "grad_norm": 2.7645764350891113, "learning_rate": 2.585056632637769e-05, "loss": 1.1105, "step": 32710 }, { "epoch": 9.79, "grad_norm": 1.5867153406143188, "learning_rate": 2.5844694493649054e-05, "loss": 0.9333, "step": 32715 }, { "epoch": 9.79, "grad_norm": 1.9193617105484009, "learning_rate": 2.583882261426882e-05, "loss": 1.2603, "step": 32720 }, { "epoch": 9.79, "grad_norm": 2.898785352706909, "learning_rate": 2.5832950688561297e-05, "loss": 1.1845, "step": 32725 }, { "epoch": 9.79, "grad_norm": 1.4677989482879639, "learning_rate": 2.582707871685076e-05, "loss": 1.1262, "step": 32730 }, { "epoch": 9.79, "grad_norm": 2.376750946044922, "learning_rate": 2.582120669946153e-05, "loss": 1.183, "step": 32735 }, { "epoch": 9.8, "grad_norm": 2.5935792922973633, "learning_rate": 2.5815334636717902e-05, "loss": 1.0188, "step": 32740 }, { "epoch": 9.8, "grad_norm": 4.154141902923584, "learning_rate": 2.5809462528944195e-05, "loss": 1.1518, "step": 32745 }, { "epoch": 9.8, "grad_norm": 4.1396260261535645, "learning_rate": 2.5803590376464716e-05, "loss": 1.1648, "step": 32750 }, { "epoch": 9.8, "grad_norm": 2.666245698928833, "learning_rate": 2.5797718179603776e-05, "loss": 1.0906, "step": 32755 }, { "epoch": 9.8, "grad_norm": 2.568533420562744, "learning_rate": 2.579184593868569e-05, "loss": 0.9812, "step": 32760 }, { "epoch": 9.8, "grad_norm": 3.8902790546417236, "learning_rate": 2.578597365403477e-05, "loss": 0.9988, "step": 32765 }, { "epoch": 9.8, "grad_norm": 1.01784086227417, "learning_rate": 2.578010132597534e-05, "loss": 1.2201, "step": 32770 }, { "epoch": 9.81, "grad_norm": 23.18547248840332, "learning_rate": 2.577422895483173e-05, "loss": 0.8956, "step": 32775 }, { "epoch": 9.81, "grad_norm": 2.158190965652466, "learning_rate": 2.5768356540928256e-05, "loss": 1.0248, "step": 32780 }, { "epoch": 9.81, "grad_norm": 6.015243053436279, "learning_rate": 2.5762484084589256e-05, "loss": 1.1377, "step": 32785 }, { "epoch": 9.81, "grad_norm": 2.5766751766204834, "learning_rate": 2.5756611586139044e-05, "loss": 0.8034, "step": 32790 }, { "epoch": 9.81, "grad_norm": 3.290252208709717, "learning_rate": 2.5750739045901966e-05, "loss": 1.3023, "step": 32795 }, { "epoch": 9.81, "grad_norm": 2.5434985160827637, "learning_rate": 2.574486646420235e-05, "loss": 1.1181, "step": 32800 }, { "epoch": 9.81, "grad_norm": 8.717535018920898, "learning_rate": 2.5738993841364535e-05, "loss": 1.056, "step": 32805 }, { "epoch": 9.82, "grad_norm": 2.3016560077667236, "learning_rate": 2.5733121177712856e-05, "loss": 1.2113, "step": 32810 }, { "epoch": 9.82, "grad_norm": 1.7972419261932373, "learning_rate": 2.5727248473571653e-05, "loss": 1.1958, "step": 32815 }, { "epoch": 9.82, "grad_norm": 3.321375846862793, "learning_rate": 2.5721375729265283e-05, "loss": 0.9634, "step": 32820 }, { "epoch": 9.82, "grad_norm": 3.124709129333496, "learning_rate": 2.5715502945118075e-05, "loss": 1.0188, "step": 32825 }, { "epoch": 9.82, "grad_norm": 1.5069762468338013, "learning_rate": 2.570963012145438e-05, "loss": 0.8482, "step": 32830 }, { "epoch": 9.82, "grad_norm": 2.640984535217285, "learning_rate": 2.5703757258598554e-05, "loss": 1.1124, "step": 32835 }, { "epoch": 9.83, "grad_norm": 4.078088283538818, "learning_rate": 2.5697884356874947e-05, "loss": 1.0427, "step": 32840 }, { "epoch": 9.83, "grad_norm": 3.2041678428649902, "learning_rate": 2.5692011416607908e-05, "loss": 0.9205, "step": 32845 }, { "epoch": 9.83, "grad_norm": 2.750579595565796, "learning_rate": 2.5686138438121804e-05, "loss": 1.2344, "step": 32850 }, { "epoch": 9.83, "grad_norm": 2.770933151245117, "learning_rate": 2.568026542174099e-05, "loss": 1.056, "step": 32855 }, { "epoch": 9.83, "grad_norm": 2.695629596710205, "learning_rate": 2.5674392367789818e-05, "loss": 0.8705, "step": 32860 }, { "epoch": 9.83, "grad_norm": 1.1691042184829712, "learning_rate": 2.5668519276592658e-05, "loss": 1.0706, "step": 32865 }, { "epoch": 9.83, "grad_norm": 4.2656097412109375, "learning_rate": 2.5662646148473867e-05, "loss": 0.9537, "step": 32870 }, { "epoch": 9.84, "grad_norm": 1.754385232925415, "learning_rate": 2.5656772983757822e-05, "loss": 1.1298, "step": 32875 }, { "epoch": 9.84, "grad_norm": 1.9311494827270508, "learning_rate": 2.5650899782768882e-05, "loss": 1.2659, "step": 32880 }, { "epoch": 9.84, "grad_norm": 3.109220504760742, "learning_rate": 2.564502654583144e-05, "loss": 1.4295, "step": 32885 }, { "epoch": 9.84, "grad_norm": 2.71708607673645, "learning_rate": 2.563915327326984e-05, "loss": 1.1224, "step": 32890 }, { "epoch": 9.84, "grad_norm": 2.3169498443603516, "learning_rate": 2.5633279965408475e-05, "loss": 0.8078, "step": 32895 }, { "epoch": 9.84, "grad_norm": 1.5977541208267212, "learning_rate": 2.5627406622571708e-05, "loss": 1.19, "step": 32900 }, { "epoch": 9.84, "grad_norm": 3.2006161212921143, "learning_rate": 2.5621533245083934e-05, "loss": 1.1088, "step": 32905 }, { "epoch": 9.85, "grad_norm": 1.4427859783172607, "learning_rate": 2.5615659833269516e-05, "loss": 1.0645, "step": 32910 }, { "epoch": 9.85, "grad_norm": 3.558783769607544, "learning_rate": 2.5609786387452855e-05, "loss": 1.1666, "step": 32915 }, { "epoch": 9.85, "grad_norm": 1.977691888809204, "learning_rate": 2.5603912907958323e-05, "loss": 1.0334, "step": 32920 }, { "epoch": 9.85, "grad_norm": 2.796060562133789, "learning_rate": 2.5598039395110307e-05, "loss": 0.9684, "step": 32925 }, { "epoch": 9.85, "grad_norm": 4.863877773284912, "learning_rate": 2.5592165849233196e-05, "loss": 1.1839, "step": 32930 }, { "epoch": 9.85, "grad_norm": 2.7297861576080322, "learning_rate": 2.558629227065138e-05, "loss": 1.1317, "step": 32935 }, { "epoch": 9.86, "grad_norm": 3.373911142349243, "learning_rate": 2.5580418659689255e-05, "loss": 1.1497, "step": 32940 }, { "epoch": 9.86, "grad_norm": 3.8792035579681396, "learning_rate": 2.5574545016671204e-05, "loss": 1.1907, "step": 32945 }, { "epoch": 9.86, "grad_norm": 3.2369890213012695, "learning_rate": 2.556867134192164e-05, "loss": 1.1728, "step": 32950 }, { "epoch": 9.86, "grad_norm": 4.953618049621582, "learning_rate": 2.5562797635764936e-05, "loss": 0.9538, "step": 32955 }, { "epoch": 9.86, "grad_norm": 3.96720290184021, "learning_rate": 2.555692389852551e-05, "loss": 1.0353, "step": 32960 }, { "epoch": 9.86, "grad_norm": 1.2469520568847656, "learning_rate": 2.5551050130527753e-05, "loss": 1.0343, "step": 32965 }, { "epoch": 9.86, "grad_norm": 3.0623819828033447, "learning_rate": 2.554517633209607e-05, "loss": 0.9463, "step": 32970 }, { "epoch": 9.87, "grad_norm": 2.923931837081909, "learning_rate": 2.553930250355487e-05, "loss": 1.115, "step": 32975 }, { "epoch": 9.87, "grad_norm": 3.7751688957214355, "learning_rate": 2.553342864522856e-05, "loss": 1.0373, "step": 32980 }, { "epoch": 9.87, "grad_norm": 3.002441167831421, "learning_rate": 2.552755475744153e-05, "loss": 1.2383, "step": 32985 }, { "epoch": 9.87, "grad_norm": 2.0195484161376953, "learning_rate": 2.55216808405182e-05, "loss": 1.033, "step": 32990 }, { "epoch": 9.87, "grad_norm": 3.1848971843719482, "learning_rate": 2.551580689478298e-05, "loss": 1.0186, "step": 32995 }, { "epoch": 9.87, "grad_norm": 1.623724341392517, "learning_rate": 2.5509932920560274e-05, "loss": 1.0359, "step": 33000 }, { "epoch": 9.87, "grad_norm": 3.2824020385742188, "learning_rate": 2.5504058918174513e-05, "loss": 1.2735, "step": 33005 }, { "epoch": 9.88, "grad_norm": 2.9799790382385254, "learning_rate": 2.54981848879501e-05, "loss": 1.0281, "step": 33010 }, { "epoch": 9.88, "grad_norm": 3.8928287029266357, "learning_rate": 2.5492310830211456e-05, "loss": 0.9883, "step": 33015 }, { "epoch": 9.88, "grad_norm": 4.279067516326904, "learning_rate": 2.5486436745282995e-05, "loss": 1.197, "step": 33020 }, { "epoch": 9.88, "grad_norm": 3.4032154083251953, "learning_rate": 2.5480562633489135e-05, "loss": 1.2078, "step": 33025 }, { "epoch": 9.88, "grad_norm": 3.3113880157470703, "learning_rate": 2.5474688495154298e-05, "loss": 1.2098, "step": 33030 }, { "epoch": 9.88, "grad_norm": 1.2559151649475098, "learning_rate": 2.5468814330602913e-05, "loss": 0.9934, "step": 33035 }, { "epoch": 9.89, "grad_norm": 1.3851603269577026, "learning_rate": 2.5462940140159398e-05, "loss": 1.1007, "step": 33040 }, { "epoch": 9.89, "grad_norm": 4.625222206115723, "learning_rate": 2.5457065924148184e-05, "loss": 1.074, "step": 33045 }, { "epoch": 9.89, "grad_norm": 2.140420913696289, "learning_rate": 2.545119168289369e-05, "loss": 1.1087, "step": 33050 }, { "epoch": 9.89, "grad_norm": 2.4748363494873047, "learning_rate": 2.544531741672035e-05, "loss": 1.0391, "step": 33055 }, { "epoch": 9.89, "grad_norm": 1.9943400621414185, "learning_rate": 2.543944312595259e-05, "loss": 0.932, "step": 33060 }, { "epoch": 9.89, "grad_norm": 4.314105987548828, "learning_rate": 2.543356881091484e-05, "loss": 1.0815, "step": 33065 }, { "epoch": 9.89, "grad_norm": 3.688622236251831, "learning_rate": 2.5427694471931546e-05, "loss": 1.1081, "step": 33070 }, { "epoch": 9.9, "grad_norm": 5.567243576049805, "learning_rate": 2.542182010932712e-05, "loss": 1.0279, "step": 33075 }, { "epoch": 9.9, "grad_norm": 1.8669523000717163, "learning_rate": 2.541594572342602e-05, "loss": 1.0404, "step": 33080 }, { "epoch": 9.9, "grad_norm": 4.838998317718506, "learning_rate": 2.5410071314552664e-05, "loss": 1.1254, "step": 33085 }, { "epoch": 9.9, "grad_norm": 4.472231388092041, "learning_rate": 2.540419688303149e-05, "loss": 1.0976, "step": 33090 }, { "epoch": 9.9, "grad_norm": 2.0864551067352295, "learning_rate": 2.539832242918695e-05, "loss": 1.0639, "step": 33095 }, { "epoch": 9.9, "grad_norm": 3.6057910919189453, "learning_rate": 2.539244795334347e-05, "loss": 1.0898, "step": 33100 }, { "epoch": 9.9, "grad_norm": 2.7485694885253906, "learning_rate": 2.5386573455825503e-05, "loss": 1.0214, "step": 33105 }, { "epoch": 9.91, "grad_norm": 2.115241050720215, "learning_rate": 2.5380698936957486e-05, "loss": 1.1543, "step": 33110 }, { "epoch": 9.91, "grad_norm": 3.4498496055603027, "learning_rate": 2.5374824397063857e-05, "loss": 1.2445, "step": 33115 }, { "epoch": 9.91, "grad_norm": 1.5659513473510742, "learning_rate": 2.536894983646907e-05, "loss": 1.1021, "step": 33120 }, { "epoch": 9.91, "grad_norm": 8.8037748336792, "learning_rate": 2.5363075255497564e-05, "loss": 0.9424, "step": 33125 }, { "epoch": 9.91, "grad_norm": 4.802347183227539, "learning_rate": 2.5357200654473788e-05, "loss": 1.0744, "step": 33130 }, { "epoch": 9.91, "grad_norm": 2.3840949535369873, "learning_rate": 2.5351326033722194e-05, "loss": 1.1233, "step": 33135 }, { "epoch": 9.92, "grad_norm": 2.0759215354919434, "learning_rate": 2.534545139356723e-05, "loss": 0.9863, "step": 33140 }, { "epoch": 9.92, "grad_norm": 1.2032520771026611, "learning_rate": 2.533957673433334e-05, "loss": 1.169, "step": 33145 }, { "epoch": 9.92, "grad_norm": 4.248573303222656, "learning_rate": 2.5333702056344984e-05, "loss": 0.9997, "step": 33150 }, { "epoch": 9.92, "grad_norm": 2.4295151233673096, "learning_rate": 2.532782735992661e-05, "loss": 1.0271, "step": 33155 }, { "epoch": 9.92, "grad_norm": 2.852827310562134, "learning_rate": 2.5321952645402668e-05, "loss": 1.0106, "step": 33160 }, { "epoch": 9.92, "grad_norm": 4.686607837677002, "learning_rate": 2.5316077913097618e-05, "loss": 1.045, "step": 33165 }, { "epoch": 9.92, "grad_norm": 1.6220539808273315, "learning_rate": 2.5310203163335916e-05, "loss": 1.2063, "step": 33170 }, { "epoch": 9.93, "grad_norm": 1.336083173751831, "learning_rate": 2.530432839644202e-05, "loss": 1.3131, "step": 33175 }, { "epoch": 9.93, "grad_norm": 1.3632646799087524, "learning_rate": 2.5298453612740375e-05, "loss": 1.073, "step": 33180 }, { "epoch": 9.93, "grad_norm": 1.0934840440750122, "learning_rate": 2.529257881255545e-05, "loss": 0.9939, "step": 33185 }, { "epoch": 9.93, "grad_norm": 1.7837378978729248, "learning_rate": 2.5286703996211697e-05, "loss": 1.3673, "step": 33190 }, { "epoch": 9.93, "grad_norm": 1.5650957822799683, "learning_rate": 2.5280829164033588e-05, "loss": 1.2912, "step": 33195 }, { "epoch": 9.93, "grad_norm": 1.1075351238250732, "learning_rate": 2.5274954316345568e-05, "loss": 1.0113, "step": 33200 }, { "epoch": 9.93, "grad_norm": 3.3911895751953125, "learning_rate": 2.5269079453472117e-05, "loss": 0.9913, "step": 33205 }, { "epoch": 9.94, "grad_norm": 4.012703895568848, "learning_rate": 2.5263204575737683e-05, "loss": 1.0538, "step": 33210 }, { "epoch": 9.94, "grad_norm": 3.5184414386749268, "learning_rate": 2.525732968346673e-05, "loss": 1.291, "step": 33215 }, { "epoch": 9.94, "grad_norm": 3.051833152770996, "learning_rate": 2.5251454776983724e-05, "loss": 1.0442, "step": 33220 }, { "epoch": 9.94, "grad_norm": 3.1589720249176025, "learning_rate": 2.5245579856613137e-05, "loss": 0.8625, "step": 33225 }, { "epoch": 9.94, "grad_norm": 2.489769458770752, "learning_rate": 2.523970492267943e-05, "loss": 1.0129, "step": 33230 }, { "epoch": 9.94, "grad_norm": 3.1582303047180176, "learning_rate": 2.5233829975507068e-05, "loss": 1.0015, "step": 33235 }, { "epoch": 9.95, "grad_norm": 2.759131908416748, "learning_rate": 2.522795501542052e-05, "loss": 1.1334, "step": 33240 }, { "epoch": 9.95, "grad_norm": 4.12851619720459, "learning_rate": 2.522208004274425e-05, "loss": 1.1993, "step": 33245 }, { "epoch": 9.95, "grad_norm": 1.7142823934555054, "learning_rate": 2.5216205057802732e-05, "loss": 1.2478, "step": 33250 }, { "epoch": 9.95, "grad_norm": 2.8924169540405273, "learning_rate": 2.521033006092044e-05, "loss": 1.0277, "step": 33255 }, { "epoch": 9.95, "grad_norm": 5.144822597503662, "learning_rate": 2.5204455052421828e-05, "loss": 1.3498, "step": 33260 }, { "epoch": 9.95, "grad_norm": 2.7403883934020996, "learning_rate": 2.519858003263138e-05, "loss": 0.9622, "step": 33265 }, { "epoch": 9.95, "grad_norm": 3.224583864212036, "learning_rate": 2.5192705001873566e-05, "loss": 1.2568, "step": 33270 }, { "epoch": 9.96, "grad_norm": 2.203730821609497, "learning_rate": 2.518682996047285e-05, "loss": 1.2091, "step": 33275 }, { "epoch": 9.96, "grad_norm": 2.4827523231506348, "learning_rate": 2.5180954908753716e-05, "loss": 1.0987, "step": 33280 }, { "epoch": 9.96, "grad_norm": 3.099918842315674, "learning_rate": 2.5175079847040626e-05, "loss": 1.0014, "step": 33285 }, { "epoch": 9.96, "grad_norm": 1.661093831062317, "learning_rate": 2.5169204775658055e-05, "loss": 1.0222, "step": 33290 }, { "epoch": 9.96, "grad_norm": 5.493398189544678, "learning_rate": 2.5163329694930488e-05, "loss": 1.0784, "step": 33295 }, { "epoch": 9.96, "grad_norm": 1.5556894540786743, "learning_rate": 2.5157454605182386e-05, "loss": 1.1672, "step": 33300 }, { "epoch": 9.96, "grad_norm": 3.597127676010132, "learning_rate": 2.515157950673823e-05, "loss": 1.0432, "step": 33305 }, { "epoch": 9.97, "grad_norm": 1.7815759181976318, "learning_rate": 2.51457043999225e-05, "loss": 1.0406, "step": 33310 }, { "epoch": 9.97, "grad_norm": 4.965347766876221, "learning_rate": 2.5139829285059664e-05, "loss": 1.0041, "step": 33315 }, { "epoch": 9.97, "grad_norm": 1.7741583585739136, "learning_rate": 2.5133954162474195e-05, "loss": 1.0557, "step": 33320 }, { "epoch": 9.97, "grad_norm": 2.6704812049865723, "learning_rate": 2.512807903249058e-05, "loss": 1.1279, "step": 33325 }, { "epoch": 9.97, "grad_norm": 1.5926697254180908, "learning_rate": 2.51222038954333e-05, "loss": 1.0901, "step": 33330 }, { "epoch": 9.97, "grad_norm": 1.6942665576934814, "learning_rate": 2.5116328751626827e-05, "loss": 1.034, "step": 33335 }, { "epoch": 9.97, "grad_norm": 3.1640069484710693, "learning_rate": 2.5110453601395633e-05, "loss": 1.0341, "step": 33340 }, { "epoch": 9.98, "grad_norm": 2.211376905441284, "learning_rate": 2.5104578445064202e-05, "loss": 1.0243, "step": 33345 }, { "epoch": 9.98, "grad_norm": 2.296888589859009, "learning_rate": 2.5098703282957013e-05, "loss": 1.0835, "step": 33350 }, { "epoch": 9.98, "grad_norm": 2.575103998184204, "learning_rate": 2.5092828115398544e-05, "loss": 0.963, "step": 33355 }, { "epoch": 9.98, "grad_norm": 11.325212478637695, "learning_rate": 2.508695294271327e-05, "loss": 0.9614, "step": 33360 }, { "epoch": 9.98, "grad_norm": 2.817223310470581, "learning_rate": 2.5081077765225682e-05, "loss": 1.0673, "step": 33365 }, { "epoch": 9.98, "grad_norm": 2.326043128967285, "learning_rate": 2.5075202583260256e-05, "loss": 1.145, "step": 33370 }, { "epoch": 9.99, "grad_norm": 2.059147357940674, "learning_rate": 2.506932739714146e-05, "loss": 1.3188, "step": 33375 }, { "epoch": 9.99, "grad_norm": 1.470616102218628, "learning_rate": 2.506345220719379e-05, "loss": 1.1262, "step": 33380 }, { "epoch": 9.99, "grad_norm": 2.974729537963867, "learning_rate": 2.5057577013741716e-05, "loss": 1.207, "step": 33385 }, { "epoch": 9.99, "grad_norm": 1.9753620624542236, "learning_rate": 2.505170181710973e-05, "loss": 1.1227, "step": 33390 }, { "epoch": 9.99, "grad_norm": 2.3012120723724365, "learning_rate": 2.5045826617622298e-05, "loss": 1.1889, "step": 33395 }, { "epoch": 9.99, "grad_norm": 1.2801812887191772, "learning_rate": 2.503995141560392e-05, "loss": 1.1202, "step": 33400 }, { "epoch": 9.99, "grad_norm": 4.384020805358887, "learning_rate": 2.5034076211379053e-05, "loss": 0.9629, "step": 33405 }, { "epoch": 10.0, "grad_norm": 2.4980461597442627, "learning_rate": 2.5028201005272206e-05, "loss": 1.1078, "step": 33410 }, { "epoch": 10.0, "grad_norm": 1.83892023563385, "learning_rate": 2.5022325797607837e-05, "loss": 1.1261, "step": 33415 }, { "epoch": 10.0, "grad_norm": 2.998216390609741, "learning_rate": 2.5016450588710443e-05, "loss": 1.0531, "step": 33420 }, { "epoch": 10.0, "grad_norm": 3.9695932865142822, "learning_rate": 2.50105753789045e-05, "loss": 0.8376, "step": 33425 }, { "epoch": 10.0, "grad_norm": 2.823977470397949, "learning_rate": 2.5004700168514482e-05, "loss": 0.9155, "step": 33430 }, { "epoch": 10.0, "grad_norm": 1.843510627746582, "learning_rate": 2.4998824957864885e-05, "loss": 1.1598, "step": 33435 }, { "epoch": 10.0, "grad_norm": 1.946144461631775, "learning_rate": 2.499294974728019e-05, "loss": 0.9176, "step": 33440 }, { "epoch": 10.01, "grad_norm": 2.370626211166382, "learning_rate": 2.498707453708486e-05, "loss": 0.9641, "step": 33445 }, { "epoch": 10.01, "grad_norm": 1.2042300701141357, "learning_rate": 2.4981199327603404e-05, "loss": 1.1646, "step": 33450 }, { "epoch": 10.01, "grad_norm": 2.661841869354248, "learning_rate": 2.497532411916029e-05, "loss": 1.1475, "step": 33455 }, { "epoch": 10.01, "grad_norm": 2.6765615940093994, "learning_rate": 2.4969448912079985e-05, "loss": 0.9705, "step": 33460 }, { "epoch": 10.01, "grad_norm": 2.9539201259613037, "learning_rate": 2.4963573706686997e-05, "loss": 0.7655, "step": 33465 }, { "epoch": 10.01, "grad_norm": 6.744268894195557, "learning_rate": 2.4957698503305786e-05, "loss": 0.8524, "step": 33470 }, { "epoch": 10.02, "grad_norm": 2.093243360519409, "learning_rate": 2.495182330226085e-05, "loss": 1.0871, "step": 33475 }, { "epoch": 10.02, "grad_norm": 2.485497236251831, "learning_rate": 2.494594810387666e-05, "loss": 0.8595, "step": 33480 }, { "epoch": 10.02, "grad_norm": 3.0233352184295654, "learning_rate": 2.49400729084777e-05, "loss": 0.8909, "step": 33485 }, { "epoch": 10.02, "grad_norm": 2.09739351272583, "learning_rate": 2.493419771638845e-05, "loss": 1.0453, "step": 33490 }, { "epoch": 10.02, "grad_norm": 2.0949864387512207, "learning_rate": 2.4928322527933393e-05, "loss": 0.9512, "step": 33495 }, { "epoch": 10.02, "grad_norm": 2.8882389068603516, "learning_rate": 2.4922447343437005e-05, "loss": 1.0104, "step": 33500 }, { "epoch": 10.02, "grad_norm": 1.7648423910140991, "learning_rate": 2.4916572163223772e-05, "loss": 0.9308, "step": 33505 }, { "epoch": 10.03, "grad_norm": 16.295032501220703, "learning_rate": 2.4910696987618174e-05, "loss": 1.057, "step": 33510 }, { "epoch": 10.03, "grad_norm": 4.960565090179443, "learning_rate": 2.490482181694469e-05, "loss": 1.0269, "step": 33515 }, { "epoch": 10.03, "grad_norm": 1.606244683265686, "learning_rate": 2.489894665152779e-05, "loss": 1.1142, "step": 33520 }, { "epoch": 10.03, "grad_norm": 2.571674108505249, "learning_rate": 2.489307149169197e-05, "loss": 1.1242, "step": 33525 }, { "epoch": 10.03, "grad_norm": 1.8234943151474, "learning_rate": 2.4887196337761693e-05, "loss": 1.0145, "step": 33530 }, { "epoch": 10.03, "grad_norm": 1.6958742141723633, "learning_rate": 2.4881321190061453e-05, "loss": 1.0754, "step": 33535 }, { "epoch": 10.03, "grad_norm": 1.0637584924697876, "learning_rate": 2.487544604891571e-05, "loss": 1.028, "step": 33540 }, { "epoch": 10.04, "grad_norm": 1.169573426246643, "learning_rate": 2.4869570914648963e-05, "loss": 1.0321, "step": 33545 }, { "epoch": 10.04, "grad_norm": 1.4608122110366821, "learning_rate": 2.4863695787585678e-05, "loss": 1.1961, "step": 33550 }, { "epoch": 10.04, "grad_norm": 2.158911943435669, "learning_rate": 2.4857820668050324e-05, "loss": 1.1969, "step": 33555 }, { "epoch": 10.04, "grad_norm": 9.060139656066895, "learning_rate": 2.4851945556367396e-05, "loss": 0.7947, "step": 33560 }, { "epoch": 10.04, "grad_norm": 4.058126449584961, "learning_rate": 2.484607045286135e-05, "loss": 0.8664, "step": 33565 }, { "epoch": 10.04, "grad_norm": 3.4800760746002197, "learning_rate": 2.4840195357856685e-05, "loss": 1.0867, "step": 33570 }, { "epoch": 10.05, "grad_norm": 2.5772833824157715, "learning_rate": 2.483432027167785e-05, "loss": 0.9432, "step": 33575 }, { "epoch": 10.05, "grad_norm": 1.0311754941940308, "learning_rate": 2.482844519464935e-05, "loss": 1.0534, "step": 33580 }, { "epoch": 10.05, "grad_norm": 3.5726497173309326, "learning_rate": 2.4822570127095637e-05, "loss": 0.9547, "step": 33585 }, { "epoch": 10.05, "grad_norm": 3.7769217491149902, "learning_rate": 2.4816695069341192e-05, "loss": 1.01, "step": 33590 }, { "epoch": 10.05, "grad_norm": 1.1994178295135498, "learning_rate": 2.4810820021710486e-05, "loss": 1.1168, "step": 33595 }, { "epoch": 10.05, "grad_norm": 3.4825947284698486, "learning_rate": 2.4804944984527995e-05, "loss": 1.0242, "step": 33600 }, { "epoch": 10.05, "grad_norm": 1.254137396812439, "learning_rate": 2.4799069958118187e-05, "loss": 0.9139, "step": 33605 }, { "epoch": 10.06, "grad_norm": 5.904828071594238, "learning_rate": 2.4793194942805545e-05, "loss": 1.0005, "step": 33610 }, { "epoch": 10.06, "grad_norm": 2.302828073501587, "learning_rate": 2.478731993891452e-05, "loss": 1.0739, "step": 33615 }, { "epoch": 10.06, "grad_norm": 2.0300052165985107, "learning_rate": 2.4781444946769603e-05, "loss": 0.9196, "step": 33620 }, { "epoch": 10.06, "grad_norm": 2.183711051940918, "learning_rate": 2.4775569966695242e-05, "loss": 1.069, "step": 33625 }, { "epoch": 10.06, "grad_norm": 6.40975284576416, "learning_rate": 2.476969499901593e-05, "loss": 1.0779, "step": 33630 }, { "epoch": 10.06, "grad_norm": 2.763566017150879, "learning_rate": 2.4763820044056114e-05, "loss": 1.191, "step": 33635 }, { "epoch": 10.06, "grad_norm": 6.577224254608154, "learning_rate": 2.4757945102140287e-05, "loss": 0.9616, "step": 33640 }, { "epoch": 10.07, "grad_norm": 2.686187267303467, "learning_rate": 2.4752070173592895e-05, "loss": 1.21, "step": 33645 }, { "epoch": 10.07, "grad_norm": 1.8847886323928833, "learning_rate": 2.47461952587384e-05, "loss": 1.0332, "step": 33650 }, { "epoch": 10.07, "grad_norm": 4.264788627624512, "learning_rate": 2.4740320357901286e-05, "loss": 0.9612, "step": 33655 }, { "epoch": 10.07, "grad_norm": 3.13706111907959, "learning_rate": 2.4734445471406e-05, "loss": 1.1428, "step": 33660 }, { "epoch": 10.07, "grad_norm": 3.5950639247894287, "learning_rate": 2.472857059957703e-05, "loss": 1.0232, "step": 33665 }, { "epoch": 10.07, "grad_norm": 4.524314880371094, "learning_rate": 2.4722695742738806e-05, "loss": 0.8956, "step": 33670 }, { "epoch": 10.08, "grad_norm": 2.5336546897888184, "learning_rate": 2.471682090121582e-05, "loss": 1.1582, "step": 33675 }, { "epoch": 10.08, "grad_norm": 2.4728264808654785, "learning_rate": 2.4710946075332515e-05, "loss": 1.1319, "step": 33680 }, { "epoch": 10.08, "grad_norm": 2.4756505489349365, "learning_rate": 2.4705071265413355e-05, "loss": 1.0484, "step": 33685 }, { "epoch": 10.08, "grad_norm": 4.324848651885986, "learning_rate": 2.469919647178281e-05, "loss": 0.9865, "step": 33690 }, { "epoch": 10.08, "grad_norm": 1.772365927696228, "learning_rate": 2.4693321694765324e-05, "loss": 1.1322, "step": 33695 }, { "epoch": 10.08, "grad_norm": 4.808743476867676, "learning_rate": 2.468744693468537e-05, "loss": 0.9088, "step": 33700 }, { "epoch": 10.08, "grad_norm": 2.5702743530273438, "learning_rate": 2.46815721918674e-05, "loss": 1.1035, "step": 33705 }, { "epoch": 10.09, "grad_norm": 2.589160680770874, "learning_rate": 2.4675697466635855e-05, "loss": 1.2078, "step": 33710 }, { "epoch": 10.09, "grad_norm": 1.3858650922775269, "learning_rate": 2.466982275931521e-05, "loss": 0.9329, "step": 33715 }, { "epoch": 10.09, "grad_norm": 2.2143914699554443, "learning_rate": 2.4663948070229905e-05, "loss": 1.0403, "step": 33720 }, { "epoch": 10.09, "grad_norm": 1.3419291973114014, "learning_rate": 2.4658073399704405e-05, "loss": 1.1443, "step": 33725 }, { "epoch": 10.09, "grad_norm": 3.204826593399048, "learning_rate": 2.4652198748063146e-05, "loss": 1.0266, "step": 33730 }, { "epoch": 10.09, "grad_norm": 2.098097085952759, "learning_rate": 2.46463241156306e-05, "loss": 1.1408, "step": 33735 }, { "epoch": 10.09, "grad_norm": 3.6927952766418457, "learning_rate": 2.4640449502731204e-05, "loss": 1.0207, "step": 33740 }, { "epoch": 10.1, "grad_norm": 4.6298828125, "learning_rate": 2.46345749096894e-05, "loss": 1.11, "step": 33745 }, { "epoch": 10.1, "grad_norm": 1.5225332975387573, "learning_rate": 2.4628700336829655e-05, "loss": 1.0937, "step": 33750 }, { "epoch": 10.1, "grad_norm": 2.734102964401245, "learning_rate": 2.4622825784476392e-05, "loss": 0.9719, "step": 33755 }, { "epoch": 10.1, "grad_norm": 25.489742279052734, "learning_rate": 2.4616951252954078e-05, "loss": 1.1438, "step": 33760 }, { "epoch": 10.1, "grad_norm": 1.8963426351547241, "learning_rate": 2.4611076742587137e-05, "loss": 1.106, "step": 33765 }, { "epoch": 10.1, "grad_norm": 2.171485662460327, "learning_rate": 2.4605202253700034e-05, "loss": 1.0517, "step": 33770 }, { "epoch": 10.11, "grad_norm": 3.205883502960205, "learning_rate": 2.45993277866172e-05, "loss": 1.1039, "step": 33775 }, { "epoch": 10.11, "grad_norm": 3.306411027908325, "learning_rate": 2.459345334166307e-05, "loss": 1.0611, "step": 33780 }, { "epoch": 10.11, "grad_norm": 2.3397672176361084, "learning_rate": 2.4587578919162097e-05, "loss": 1.1455, "step": 33785 }, { "epoch": 10.11, "grad_norm": 3.6171796321868896, "learning_rate": 2.45817045194387e-05, "loss": 0.9492, "step": 33790 }, { "epoch": 10.11, "grad_norm": 1.5914018154144287, "learning_rate": 2.4575830142817342e-05, "loss": 0.9019, "step": 33795 }, { "epoch": 10.11, "grad_norm": 1.1069579124450684, "learning_rate": 2.456995578962243e-05, "loss": 1.1982, "step": 33800 }, { "epoch": 10.11, "grad_norm": 4.241572856903076, "learning_rate": 2.4564081460178427e-05, "loss": 1.0414, "step": 33805 }, { "epoch": 10.12, "grad_norm": 2.3619353771209717, "learning_rate": 2.455820715480975e-05, "loss": 1.16, "step": 33810 }, { "epoch": 10.12, "grad_norm": 1.428951621055603, "learning_rate": 2.4552332873840818e-05, "loss": 1.1485, "step": 33815 }, { "epoch": 10.12, "grad_norm": 1.82322359085083, "learning_rate": 2.454645861759609e-05, "loss": 1.0625, "step": 33820 }, { "epoch": 10.12, "grad_norm": 2.700174570083618, "learning_rate": 2.4540584386399974e-05, "loss": 0.8932, "step": 33825 }, { "epoch": 10.12, "grad_norm": 3.158874988555908, "learning_rate": 2.4534710180576912e-05, "loss": 1.1615, "step": 33830 }, { "epoch": 10.12, "grad_norm": 6.1408820152282715, "learning_rate": 2.4528836000451323e-05, "loss": 1.0226, "step": 33835 }, { "epoch": 10.12, "grad_norm": 1.1300007104873657, "learning_rate": 2.452296184634762e-05, "loss": 0.9967, "step": 33840 }, { "epoch": 10.13, "grad_norm": 8.636886596679688, "learning_rate": 2.4517087718590244e-05, "loss": 0.9646, "step": 33845 }, { "epoch": 10.13, "grad_norm": 4.390270233154297, "learning_rate": 2.4511213617503616e-05, "loss": 1.131, "step": 33850 }, { "epoch": 10.13, "grad_norm": 4.6113409996032715, "learning_rate": 2.4505339543412148e-05, "loss": 1.1715, "step": 33855 }, { "epoch": 10.13, "grad_norm": 2.2898902893066406, "learning_rate": 2.449946549664026e-05, "loss": 1.0189, "step": 33860 }, { "epoch": 10.13, "grad_norm": 0.8719561100006104, "learning_rate": 2.449359147751238e-05, "loss": 1.1226, "step": 33865 }, { "epoch": 10.13, "grad_norm": 3.1541287899017334, "learning_rate": 2.4487717486352914e-05, "loss": 0.9081, "step": 33870 }, { "epoch": 10.14, "grad_norm": 4.781918048858643, "learning_rate": 2.448184352348627e-05, "loss": 1.2464, "step": 33875 }, { "epoch": 10.14, "grad_norm": 3.1645240783691406, "learning_rate": 2.4475969589236887e-05, "loss": 0.847, "step": 33880 }, { "epoch": 10.14, "grad_norm": 2.1200978755950928, "learning_rate": 2.447009568392914e-05, "loss": 1.1901, "step": 33885 }, { "epoch": 10.14, "grad_norm": 3.1781980991363525, "learning_rate": 2.446422180788747e-05, "loss": 0.9578, "step": 33890 }, { "epoch": 10.14, "grad_norm": 4.06983757019043, "learning_rate": 2.4458347961436264e-05, "loss": 0.9594, "step": 33895 }, { "epoch": 10.14, "grad_norm": 13.415980339050293, "learning_rate": 2.4452474144899947e-05, "loss": 1.0795, "step": 33900 }, { "epoch": 10.14, "grad_norm": 2.0610928535461426, "learning_rate": 2.4446600358602915e-05, "loss": 1.0463, "step": 33905 }, { "epoch": 10.15, "grad_norm": 1.7884243726730347, "learning_rate": 2.4440726602869557e-05, "loss": 1.1359, "step": 33910 }, { "epoch": 10.15, "grad_norm": 2.057011604309082, "learning_rate": 2.44348528780243e-05, "loss": 1.0827, "step": 33915 }, { "epoch": 10.15, "grad_norm": 3.1838924884796143, "learning_rate": 2.442897918439152e-05, "loss": 1.107, "step": 33920 }, { "epoch": 10.15, "grad_norm": 2.055171251296997, "learning_rate": 2.4423105522295633e-05, "loss": 0.9141, "step": 33925 }, { "epoch": 10.15, "grad_norm": 7.081864833831787, "learning_rate": 2.441723189206102e-05, "loss": 0.9043, "step": 33930 }, { "epoch": 10.15, "grad_norm": 1.6306078433990479, "learning_rate": 2.44113582940121e-05, "loss": 1.1316, "step": 33935 }, { "epoch": 10.15, "grad_norm": 3.7581050395965576, "learning_rate": 2.440548472847324e-05, "loss": 1.058, "step": 33940 }, { "epoch": 10.16, "grad_norm": 2.7319552898406982, "learning_rate": 2.4399611195768836e-05, "loss": 1.2098, "step": 33945 }, { "epoch": 10.16, "grad_norm": 4.5412068367004395, "learning_rate": 2.4393737696223286e-05, "loss": 1.0749, "step": 33950 }, { "epoch": 10.16, "grad_norm": 3.6076760292053223, "learning_rate": 2.4387864230160972e-05, "loss": 1.0315, "step": 33955 }, { "epoch": 10.16, "grad_norm": 1.9118307828903198, "learning_rate": 2.4381990797906284e-05, "loss": 1.0247, "step": 33960 }, { "epoch": 10.16, "grad_norm": 2.3119821548461914, "learning_rate": 2.4376117399783605e-05, "loss": 1.0456, "step": 33965 }, { "epoch": 10.16, "grad_norm": 1.257812261581421, "learning_rate": 2.43702440361173e-05, "loss": 1.1916, "step": 33970 }, { "epoch": 10.16, "grad_norm": 3.263524055480957, "learning_rate": 2.4364370707231777e-05, "loss": 1.1353, "step": 33975 }, { "epoch": 10.17, "grad_norm": 2.8173470497131348, "learning_rate": 2.4358497413451383e-05, "loss": 1.1754, "step": 33980 }, { "epoch": 10.17, "grad_norm": 1.8419231176376343, "learning_rate": 2.4352624155100525e-05, "loss": 0.9914, "step": 33985 }, { "epoch": 10.17, "grad_norm": 4.419125080108643, "learning_rate": 2.434675093250355e-05, "loss": 1.0154, "step": 33990 }, { "epoch": 10.17, "grad_norm": 4.144635200500488, "learning_rate": 2.4340877745984854e-05, "loss": 1.1304, "step": 33995 }, { "epoch": 10.17, "grad_norm": 2.172372341156006, "learning_rate": 2.4335004595868794e-05, "loss": 1.0345, "step": 34000 }, { "epoch": 10.17, "grad_norm": 3.2084264755249023, "learning_rate": 2.4329131482479727e-05, "loss": 1.1348, "step": 34005 }, { "epoch": 10.18, "grad_norm": 1.3004094362258911, "learning_rate": 2.4323258406142042e-05, "loss": 1.0589, "step": 34010 }, { "epoch": 10.18, "grad_norm": 2.1280858516693115, "learning_rate": 2.4317385367180083e-05, "loss": 1.16, "step": 34015 }, { "epoch": 10.18, "grad_norm": 3.7424776554107666, "learning_rate": 2.4311512365918232e-05, "loss": 1.0512, "step": 34020 }, { "epoch": 10.18, "grad_norm": 2.389583110809326, "learning_rate": 2.4305639402680825e-05, "loss": 1.2194, "step": 34025 }, { "epoch": 10.18, "grad_norm": 9.110445022583008, "learning_rate": 2.4299766477792242e-05, "loss": 1.1583, "step": 34030 }, { "epoch": 10.18, "grad_norm": 2.022211790084839, "learning_rate": 2.4293893591576825e-05, "loss": 1.0861, "step": 34035 }, { "epoch": 10.18, "grad_norm": 3.275707483291626, "learning_rate": 2.428802074435893e-05, "loss": 0.866, "step": 34040 }, { "epoch": 10.19, "grad_norm": 1.1770823001861572, "learning_rate": 2.428214793646291e-05, "loss": 1.2091, "step": 34045 }, { "epoch": 10.19, "grad_norm": 7.510256767272949, "learning_rate": 2.4276275168213105e-05, "loss": 1.1663, "step": 34050 }, { "epoch": 10.19, "grad_norm": 2.775092840194702, "learning_rate": 2.4270402439933886e-05, "loss": 1.151, "step": 34055 }, { "epoch": 10.19, "grad_norm": 2.5497207641601562, "learning_rate": 2.4264529751949576e-05, "loss": 1.1476, "step": 34060 }, { "epoch": 10.19, "grad_norm": 1.3202245235443115, "learning_rate": 2.4258657104584518e-05, "loss": 1.0267, "step": 34065 }, { "epoch": 10.19, "grad_norm": 1.2881629467010498, "learning_rate": 2.4252784498163064e-05, "loss": 1.0565, "step": 34070 }, { "epoch": 10.19, "grad_norm": 2.9040749073028564, "learning_rate": 2.4246911933009536e-05, "loss": 1.1041, "step": 34075 }, { "epoch": 10.2, "grad_norm": 1.2760438919067383, "learning_rate": 2.424103940944829e-05, "loss": 1.2469, "step": 34080 }, { "epoch": 10.2, "grad_norm": 1.911608099937439, "learning_rate": 2.423516692780364e-05, "loss": 0.9625, "step": 34085 }, { "epoch": 10.2, "grad_norm": 1.7194567918777466, "learning_rate": 2.4229294488399935e-05, "loss": 1.1776, "step": 34090 }, { "epoch": 10.2, "grad_norm": 4.4640960693359375, "learning_rate": 2.4223422091561493e-05, "loss": 0.9402, "step": 34095 }, { "epoch": 10.2, "grad_norm": 5.374719142913818, "learning_rate": 2.4217549737612632e-05, "loss": 1.0412, "step": 34100 }, { "epoch": 10.2, "grad_norm": 5.335212230682373, "learning_rate": 2.4211677426877698e-05, "loss": 0.9229, "step": 34105 }, { "epoch": 10.21, "grad_norm": 1.18242609500885, "learning_rate": 2.4205805159680986e-05, "loss": 0.9371, "step": 34110 }, { "epoch": 10.21, "grad_norm": 2.225745916366577, "learning_rate": 2.419993293634684e-05, "loss": 1.1263, "step": 34115 }, { "epoch": 10.21, "grad_norm": 4.692055702209473, "learning_rate": 2.4194060757199557e-05, "loss": 1.1487, "step": 34120 }, { "epoch": 10.21, "grad_norm": 3.8176944255828857, "learning_rate": 2.4188188622563474e-05, "loss": 1.1546, "step": 34125 }, { "epoch": 10.21, "grad_norm": 2.7893073558807373, "learning_rate": 2.4182316532762884e-05, "loss": 0.9161, "step": 34130 }, { "epoch": 10.21, "grad_norm": 5.134355545043945, "learning_rate": 2.417644448812209e-05, "loss": 1.1189, "step": 34135 }, { "epoch": 10.21, "grad_norm": 1.823405385017395, "learning_rate": 2.4170572488965427e-05, "loss": 1.0416, "step": 34140 }, { "epoch": 10.22, "grad_norm": 2.36574125289917, "learning_rate": 2.4164700535617174e-05, "loss": 0.9979, "step": 34145 }, { "epoch": 10.22, "grad_norm": 1.0665093660354614, "learning_rate": 2.415882862840165e-05, "loss": 0.937, "step": 34150 }, { "epoch": 10.22, "grad_norm": 3.2020561695098877, "learning_rate": 2.4152956767643138e-05, "loss": 1.0047, "step": 34155 }, { "epoch": 10.22, "grad_norm": 3.9763476848602295, "learning_rate": 2.4147084953665953e-05, "loss": 0.9047, "step": 34160 }, { "epoch": 10.22, "grad_norm": 5.038437843322754, "learning_rate": 2.4141213186794378e-05, "loss": 0.9305, "step": 34165 }, { "epoch": 10.22, "grad_norm": 2.591571092605591, "learning_rate": 2.4135341467352697e-05, "loss": 1.1079, "step": 34170 }, { "epoch": 10.22, "grad_norm": 2.8258914947509766, "learning_rate": 2.4129469795665215e-05, "loss": 0.9622, "step": 34175 }, { "epoch": 10.23, "grad_norm": 1.6246439218521118, "learning_rate": 2.4123598172056205e-05, "loss": 1.0708, "step": 34180 }, { "epoch": 10.23, "grad_norm": 3.791229724884033, "learning_rate": 2.4117726596849964e-05, "loss": 1.185, "step": 34185 }, { "epoch": 10.23, "grad_norm": 2.0742027759552, "learning_rate": 2.411185507037077e-05, "loss": 1.1087, "step": 34190 }, { "epoch": 10.23, "grad_norm": 2.5888705253601074, "learning_rate": 2.4105983592942886e-05, "loss": 1.1562, "step": 34195 }, { "epoch": 10.23, "grad_norm": 1.4645191431045532, "learning_rate": 2.410011216489061e-05, "loss": 1.0233, "step": 34200 }, { "epoch": 10.23, "grad_norm": 5.13542366027832, "learning_rate": 2.409424078653819e-05, "loss": 1.0835, "step": 34205 }, { "epoch": 10.24, "grad_norm": 1.8826115131378174, "learning_rate": 2.4088369458209916e-05, "loss": 1.0035, "step": 34210 }, { "epoch": 10.24, "grad_norm": 2.7958590984344482, "learning_rate": 2.408249818023005e-05, "loss": 1.0768, "step": 34215 }, { "epoch": 10.24, "grad_norm": 1.63248872756958, "learning_rate": 2.4076626952922857e-05, "loss": 1.0479, "step": 34220 }, { "epoch": 10.24, "grad_norm": 3.1579835414886475, "learning_rate": 2.4070755776612604e-05, "loss": 1.1089, "step": 34225 }, { "epoch": 10.24, "grad_norm": 1.6841520071029663, "learning_rate": 2.4064884651623527e-05, "loss": 1.3328, "step": 34230 }, { "epoch": 10.24, "grad_norm": 1.0290015935897827, "learning_rate": 2.4059013578279917e-05, "loss": 1.1065, "step": 34235 }, { "epoch": 10.24, "grad_norm": 2.263608932495117, "learning_rate": 2.4053142556905992e-05, "loss": 1.0712, "step": 34240 }, { "epoch": 10.25, "grad_norm": 3.4659013748168945, "learning_rate": 2.4047271587826032e-05, "loss": 0.9181, "step": 34245 }, { "epoch": 10.25, "grad_norm": 3.7479326725006104, "learning_rate": 2.404140067136426e-05, "loss": 0.7427, "step": 34250 }, { "epoch": 10.25, "grad_norm": 1.718619704246521, "learning_rate": 2.4035529807844944e-05, "loss": 1.0278, "step": 34255 }, { "epoch": 10.25, "grad_norm": 1.7770065069198608, "learning_rate": 2.4029658997592315e-05, "loss": 1.1875, "step": 34260 }, { "epoch": 10.25, "grad_norm": 5.215729236602783, "learning_rate": 2.4023788240930603e-05, "loss": 1.0645, "step": 34265 }, { "epoch": 10.25, "grad_norm": 3.1575255393981934, "learning_rate": 2.401791753818406e-05, "loss": 1.1738, "step": 34270 }, { "epoch": 10.25, "grad_norm": 1.1542024612426758, "learning_rate": 2.4012046889676898e-05, "loss": 1.002, "step": 34275 }, { "epoch": 10.26, "grad_norm": 2.203456163406372, "learning_rate": 2.400617629573337e-05, "loss": 0.8584, "step": 34280 }, { "epoch": 10.26, "grad_norm": 3.4563515186309814, "learning_rate": 2.4000305756677685e-05, "loss": 1.1988, "step": 34285 }, { "epoch": 10.26, "grad_norm": 5.644460678100586, "learning_rate": 2.399443527283408e-05, "loss": 1.0035, "step": 34290 }, { "epoch": 10.26, "grad_norm": 3.3462517261505127, "learning_rate": 2.398856484452677e-05, "loss": 1.2118, "step": 34295 }, { "epoch": 10.26, "grad_norm": 2.617570161819458, "learning_rate": 2.398269447207997e-05, "loss": 1.1375, "step": 34300 }, { "epoch": 10.26, "grad_norm": 3.606656789779663, "learning_rate": 2.3976824155817894e-05, "loss": 1.0468, "step": 34305 }, { "epoch": 10.27, "grad_norm": 2.8307979106903076, "learning_rate": 2.397095389606476e-05, "loss": 1.089, "step": 34310 }, { "epoch": 10.27, "grad_norm": 1.1224695444107056, "learning_rate": 2.3965083693144773e-05, "loss": 1.0908, "step": 34315 }, { "epoch": 10.27, "grad_norm": 3.3736870288848877, "learning_rate": 2.3959213547382138e-05, "loss": 1.0837, "step": 34320 }, { "epoch": 10.27, "grad_norm": 1.475428581237793, "learning_rate": 2.395334345910105e-05, "loss": 1.1278, "step": 34325 }, { "epoch": 10.27, "grad_norm": 2.2437570095062256, "learning_rate": 2.394747342862573e-05, "loss": 1.1207, "step": 34330 }, { "epoch": 10.27, "grad_norm": 1.757214903831482, "learning_rate": 2.394160345628034e-05, "loss": 1.0901, "step": 34335 }, { "epoch": 10.27, "grad_norm": 1.8947856426239014, "learning_rate": 2.3935733542389103e-05, "loss": 1.164, "step": 34340 }, { "epoch": 10.28, "grad_norm": 2.8285508155822754, "learning_rate": 2.392986368727619e-05, "loss": 0.9759, "step": 34345 }, { "epoch": 10.28, "grad_norm": 1.154876470565796, "learning_rate": 2.39239938912658e-05, "loss": 1.1223, "step": 34350 }, { "epoch": 10.28, "grad_norm": 2.4530434608459473, "learning_rate": 2.391812415468211e-05, "loss": 1.0409, "step": 34355 }, { "epoch": 10.28, "grad_norm": 1.6551940441131592, "learning_rate": 2.3912254477849286e-05, "loss": 1.067, "step": 34360 }, { "epoch": 10.28, "grad_norm": 2.834213972091675, "learning_rate": 2.390638486109153e-05, "loss": 0.9237, "step": 34365 }, { "epoch": 10.28, "grad_norm": 3.8012025356292725, "learning_rate": 2.390051530473299e-05, "loss": 1.1377, "step": 34370 }, { "epoch": 10.28, "grad_norm": 2.4281139373779297, "learning_rate": 2.3894645809097858e-05, "loss": 0.9931, "step": 34375 }, { "epoch": 10.29, "grad_norm": 2.0580687522888184, "learning_rate": 2.3888776374510273e-05, "loss": 1.0118, "step": 34380 }, { "epoch": 10.29, "grad_norm": 1.2598625421524048, "learning_rate": 2.3882907001294433e-05, "loss": 0.9545, "step": 34385 }, { "epoch": 10.29, "grad_norm": 3.062026023864746, "learning_rate": 2.3877037689774467e-05, "loss": 1.0679, "step": 34390 }, { "epoch": 10.29, "grad_norm": 3.9732203483581543, "learning_rate": 2.3871168440274545e-05, "loss": 1.1264, "step": 34395 }, { "epoch": 10.29, "grad_norm": 8.932393074035645, "learning_rate": 2.3865299253118816e-05, "loss": 0.9523, "step": 34400 }, { "epoch": 10.29, "grad_norm": 2.8433444499969482, "learning_rate": 2.385943012863143e-05, "loss": 0.9684, "step": 34405 }, { "epoch": 10.3, "grad_norm": 3.345172643661499, "learning_rate": 2.385356106713653e-05, "loss": 0.9367, "step": 34410 }, { "epoch": 10.3, "grad_norm": 16.16388702392578, "learning_rate": 2.384769206895827e-05, "loss": 0.9319, "step": 34415 }, { "epoch": 10.3, "grad_norm": 1.3967313766479492, "learning_rate": 2.3841823134420767e-05, "loss": 1.1586, "step": 34420 }, { "epoch": 10.3, "grad_norm": 3.0620460510253906, "learning_rate": 2.3835954263848176e-05, "loss": 1.1258, "step": 34425 }, { "epoch": 10.3, "grad_norm": 2.4952540397644043, "learning_rate": 2.3830085457564613e-05, "loss": 1.0679, "step": 34430 }, { "epoch": 10.3, "grad_norm": 2.0433554649353027, "learning_rate": 2.3824216715894224e-05, "loss": 1.2555, "step": 34435 }, { "epoch": 10.3, "grad_norm": 3.4012880325317383, "learning_rate": 2.3818348039161115e-05, "loss": 0.8585, "step": 34440 }, { "epoch": 10.31, "grad_norm": 1.1092032194137573, "learning_rate": 2.3812479427689424e-05, "loss": 1.1172, "step": 34445 }, { "epoch": 10.31, "grad_norm": 1.4429662227630615, "learning_rate": 2.380661088180326e-05, "loss": 1.2767, "step": 34450 }, { "epoch": 10.31, "grad_norm": 8.600884437561035, "learning_rate": 2.3800742401826727e-05, "loss": 1.0851, "step": 34455 }, { "epoch": 10.31, "grad_norm": 2.5165319442749023, "learning_rate": 2.3794873988083954e-05, "loss": 0.9119, "step": 34460 }, { "epoch": 10.31, "grad_norm": 4.288184642791748, "learning_rate": 2.378900564089903e-05, "loss": 1.1302, "step": 34465 }, { "epoch": 10.31, "grad_norm": 2.89193058013916, "learning_rate": 2.3783137360596075e-05, "loss": 1.1722, "step": 34470 }, { "epoch": 10.31, "grad_norm": 4.590530872344971, "learning_rate": 2.3777269147499165e-05, "loss": 0.8362, "step": 34475 }, { "epoch": 10.32, "grad_norm": 4.36327600479126, "learning_rate": 2.377140100193242e-05, "loss": 1.0983, "step": 34480 }, { "epoch": 10.32, "grad_norm": 2.2678892612457275, "learning_rate": 2.376553292421992e-05, "loss": 1.1464, "step": 34485 }, { "epoch": 10.32, "grad_norm": 3.2285358905792236, "learning_rate": 2.3759664914685754e-05, "loss": 1.1098, "step": 34490 }, { "epoch": 10.32, "grad_norm": 4.26018762588501, "learning_rate": 2.3753796973654e-05, "loss": 1.0358, "step": 34495 }, { "epoch": 10.32, "grad_norm": 1.6850954294204712, "learning_rate": 2.374792910144874e-05, "loss": 1.0134, "step": 34500 }, { "epoch": 10.32, "grad_norm": 1.3236955404281616, "learning_rate": 2.3742061298394065e-05, "loss": 1.0113, "step": 34505 }, { "epoch": 10.32, "grad_norm": 5.23323917388916, "learning_rate": 2.373619356481403e-05, "loss": 1.097, "step": 34510 }, { "epoch": 10.33, "grad_norm": 1.9136748313903809, "learning_rate": 2.3730325901032718e-05, "loss": 1.0261, "step": 34515 }, { "epoch": 10.33, "grad_norm": 4.667322635650635, "learning_rate": 2.3724458307374187e-05, "loss": 1.0971, "step": 34520 }, { "epoch": 10.33, "grad_norm": 1.810530185699463, "learning_rate": 2.3718590784162485e-05, "loss": 1.0513, "step": 34525 }, { "epoch": 10.33, "grad_norm": 5.116344451904297, "learning_rate": 2.3712723331721698e-05, "loss": 1.0967, "step": 34530 }, { "epoch": 10.33, "grad_norm": 2.3734960556030273, "learning_rate": 2.370685595037585e-05, "loss": 1.1215, "step": 34535 }, { "epoch": 10.33, "grad_norm": 1.920328974723816, "learning_rate": 2.3700988640449014e-05, "loss": 1.1294, "step": 34540 }, { "epoch": 10.34, "grad_norm": 3.0604352951049805, "learning_rate": 2.3695121402265224e-05, "loss": 1.0668, "step": 34545 }, { "epoch": 10.34, "grad_norm": 3.079564094543457, "learning_rate": 2.3689254236148514e-05, "loss": 0.9018, "step": 34550 }, { "epoch": 10.34, "grad_norm": 2.8750357627868652, "learning_rate": 2.368338714242294e-05, "loss": 1.0537, "step": 34555 }, { "epoch": 10.34, "grad_norm": 4.040964603424072, "learning_rate": 2.3677520121412516e-05, "loss": 0.8928, "step": 34560 }, { "epoch": 10.34, "grad_norm": 4.141225814819336, "learning_rate": 2.3671653173441292e-05, "loss": 1.1755, "step": 34565 }, { "epoch": 10.34, "grad_norm": 4.6155195236206055, "learning_rate": 2.3665786298833266e-05, "loss": 1.0279, "step": 34570 }, { "epoch": 10.34, "grad_norm": 4.095025539398193, "learning_rate": 2.3659919497912488e-05, "loss": 1.0721, "step": 34575 }, { "epoch": 10.35, "grad_norm": 2.4246129989624023, "learning_rate": 2.3654052771002965e-05, "loss": 1.0305, "step": 34580 }, { "epoch": 10.35, "grad_norm": 1.700454592704773, "learning_rate": 2.36481861184287e-05, "loss": 1.3431, "step": 34585 }, { "epoch": 10.35, "grad_norm": 2.0215277671813965, "learning_rate": 2.364231954051372e-05, "loss": 1.0706, "step": 34590 }, { "epoch": 10.35, "grad_norm": 3.362088203430176, "learning_rate": 2.363645303758201e-05, "loss": 1.0435, "step": 34595 }, { "epoch": 10.35, "grad_norm": 1.618765115737915, "learning_rate": 2.3630586609957592e-05, "loss": 1.0702, "step": 34600 }, { "epoch": 10.35, "grad_norm": 2.0355236530303955, "learning_rate": 2.3624720257964442e-05, "loss": 1.0517, "step": 34605 }, { "epoch": 10.35, "grad_norm": 4.993839263916016, "learning_rate": 2.3618853981926573e-05, "loss": 0.96, "step": 34610 }, { "epoch": 10.36, "grad_norm": 2.558202028274536, "learning_rate": 2.3612987782167964e-05, "loss": 1.053, "step": 34615 }, { "epoch": 10.36, "grad_norm": 1.7264127731323242, "learning_rate": 2.3607121659012586e-05, "loss": 1.0745, "step": 34620 }, { "epoch": 10.36, "grad_norm": 6.849349021911621, "learning_rate": 2.360125561278444e-05, "loss": 1.0707, "step": 34625 }, { "epoch": 10.36, "grad_norm": 2.509521245956421, "learning_rate": 2.3595389643807488e-05, "loss": 1.0879, "step": 34630 }, { "epoch": 10.36, "grad_norm": 1.6693092584609985, "learning_rate": 2.358952375240571e-05, "loss": 0.9827, "step": 34635 }, { "epoch": 10.36, "grad_norm": 10.258462905883789, "learning_rate": 2.3583657938903057e-05, "loss": 1.1256, "step": 34640 }, { "epoch": 10.37, "grad_norm": 3.046783685684204, "learning_rate": 2.3577792203623523e-05, "loss": 1.0576, "step": 34645 }, { "epoch": 10.37, "grad_norm": 4.163492202758789, "learning_rate": 2.3571926546891042e-05, "loss": 0.9902, "step": 34650 }, { "epoch": 10.37, "grad_norm": 1.5861027240753174, "learning_rate": 2.3566060969029563e-05, "loss": 1.195, "step": 34655 }, { "epoch": 10.37, "grad_norm": 2.5667498111724854, "learning_rate": 2.356019547036305e-05, "loss": 1.0898, "step": 34660 }, { "epoch": 10.37, "grad_norm": 1.8456099033355713, "learning_rate": 2.355433005121545e-05, "loss": 0.9962, "step": 34665 }, { "epoch": 10.37, "grad_norm": 3.5785045623779297, "learning_rate": 2.3548464711910692e-05, "loss": 1.1786, "step": 34670 }, { "epoch": 10.37, "grad_norm": 2.0253660678863525, "learning_rate": 2.354259945277273e-05, "loss": 1.1772, "step": 34675 }, { "epoch": 10.38, "grad_norm": 1.426957368850708, "learning_rate": 2.353673427412547e-05, "loss": 0.9349, "step": 34680 }, { "epoch": 10.38, "grad_norm": 2.7187154293060303, "learning_rate": 2.353086917629287e-05, "loss": 1.0479, "step": 34685 }, { "epoch": 10.38, "grad_norm": 7.200928211212158, "learning_rate": 2.3525004159598822e-05, "loss": 0.9289, "step": 34690 }, { "epoch": 10.38, "grad_norm": 4.092981338500977, "learning_rate": 2.3519139224367278e-05, "loss": 0.9506, "step": 34695 }, { "epoch": 10.38, "grad_norm": 2.0632410049438477, "learning_rate": 2.351327437092212e-05, "loss": 1.054, "step": 34700 }, { "epoch": 10.38, "grad_norm": 2.467764377593994, "learning_rate": 2.3507409599587287e-05, "loss": 1.2235, "step": 34705 }, { "epoch": 10.38, "grad_norm": 2.497389793395996, "learning_rate": 2.350154491068667e-05, "loss": 0.9353, "step": 34710 }, { "epoch": 10.39, "grad_norm": 1.8514834642410278, "learning_rate": 2.349568030454416e-05, "loss": 1.1288, "step": 34715 }, { "epoch": 10.39, "grad_norm": 1.1936990022659302, "learning_rate": 2.348981578148367e-05, "loss": 1.1242, "step": 34720 }, { "epoch": 10.39, "grad_norm": 2.6927096843719482, "learning_rate": 2.3483951341829077e-05, "loss": 1.1503, "step": 34725 }, { "epoch": 10.39, "grad_norm": 5.187263011932373, "learning_rate": 2.3478086985904287e-05, "loss": 1.0652, "step": 34730 }, { "epoch": 10.39, "grad_norm": 3.8386361598968506, "learning_rate": 2.3472222714033157e-05, "loss": 1.0557, "step": 34735 }, { "epoch": 10.39, "grad_norm": 1.5286484956741333, "learning_rate": 2.346635852653959e-05, "loss": 1.0264, "step": 34740 }, { "epoch": 10.4, "grad_norm": 2.502507209777832, "learning_rate": 2.3460494423747443e-05, "loss": 1.0286, "step": 34745 }, { "epoch": 10.4, "grad_norm": 2.476358652114868, "learning_rate": 2.345463040598059e-05, "loss": 0.9376, "step": 34750 }, { "epoch": 10.4, "grad_norm": 2.4512181282043457, "learning_rate": 2.3448766473562892e-05, "loss": 1.0785, "step": 34755 }, { "epoch": 10.4, "grad_norm": 3.614104986190796, "learning_rate": 2.344290262681821e-05, "loss": 1.0775, "step": 34760 }, { "epoch": 10.4, "grad_norm": 3.8133108615875244, "learning_rate": 2.3437038866070396e-05, "loss": 0.9405, "step": 34765 }, { "epoch": 10.4, "grad_norm": 3.1824207305908203, "learning_rate": 2.3431175191643307e-05, "loss": 0.924, "step": 34770 }, { "epoch": 10.4, "grad_norm": 4.146546840667725, "learning_rate": 2.342531160386077e-05, "loss": 1.0573, "step": 34775 }, { "epoch": 10.41, "grad_norm": 3.6027698516845703, "learning_rate": 2.341944810304665e-05, "loss": 1.087, "step": 34780 }, { "epoch": 10.41, "grad_norm": 4.211222171783447, "learning_rate": 2.3413584689524753e-05, "loss": 1.0654, "step": 34785 }, { "epoch": 10.41, "grad_norm": 3.0907680988311768, "learning_rate": 2.3407721363618935e-05, "loss": 1.1095, "step": 34790 }, { "epoch": 10.41, "grad_norm": 3.797797679901123, "learning_rate": 2.3401858125653006e-05, "loss": 0.9867, "step": 34795 }, { "epoch": 10.41, "grad_norm": 3.5584876537323, "learning_rate": 2.33959949759508e-05, "loss": 1.0036, "step": 34800 }, { "epoch": 10.41, "grad_norm": 2.268932580947876, "learning_rate": 2.3390131914836122e-05, "loss": 1.1396, "step": 34805 }, { "epoch": 10.41, "grad_norm": 1.9871071577072144, "learning_rate": 2.3384268942632777e-05, "loss": 1.049, "step": 34810 }, { "epoch": 10.42, "grad_norm": 5.160571098327637, "learning_rate": 2.3378406059664587e-05, "loss": 0.8987, "step": 34815 }, { "epoch": 10.42, "grad_norm": 3.7017822265625, "learning_rate": 2.3372543266255335e-05, "loss": 1.0438, "step": 34820 }, { "epoch": 10.42, "grad_norm": 2.0446062088012695, "learning_rate": 2.336668056272884e-05, "loss": 1.1254, "step": 34825 }, { "epoch": 10.42, "grad_norm": 3.492748260498047, "learning_rate": 2.3360817949408864e-05, "loss": 0.9717, "step": 34830 }, { "epoch": 10.42, "grad_norm": 4.558749198913574, "learning_rate": 2.3354955426619222e-05, "loss": 1.0145, "step": 34835 }, { "epoch": 10.42, "grad_norm": 2.7081844806671143, "learning_rate": 2.3349092994683676e-05, "loss": 1.0516, "step": 34840 }, { "epoch": 10.43, "grad_norm": NaN, "learning_rate": 2.334440311476777e-05, "loss": 0.975, "step": 34845 }, { "epoch": 10.43, "grad_norm": 3.3851234912872314, "learning_rate": 2.3338540847185518e-05, "loss": 1.0871, "step": 34850 }, { "epoch": 10.43, "grad_norm": 1.13603675365448, "learning_rate": 2.3332678671363932e-05, "loss": 1.0211, "step": 34855 }, { "epoch": 10.43, "grad_norm": 1.5273590087890625, "learning_rate": 2.3326816587626775e-05, "loss": 1.0605, "step": 34860 }, { "epoch": 10.43, "grad_norm": 2.7484490871429443, "learning_rate": 2.3320954596297788e-05, "loss": 1.193, "step": 34865 }, { "epoch": 10.43, "grad_norm": 6.367726802825928, "learning_rate": 2.3315092697700742e-05, "loss": 1.1871, "step": 34870 }, { "epoch": 10.43, "grad_norm": 2.484698534011841, "learning_rate": 2.3309230892159365e-05, "loss": 1.1412, "step": 34875 }, { "epoch": 10.44, "grad_norm": 2.0732154846191406, "learning_rate": 2.3303369179997418e-05, "loss": 1.114, "step": 34880 }, { "epoch": 10.44, "grad_norm": 2.8750739097595215, "learning_rate": 2.329750756153862e-05, "loss": 1.0455, "step": 34885 }, { "epoch": 10.44, "grad_norm": 1.7856959104537964, "learning_rate": 2.329164603710672e-05, "loss": 1.0768, "step": 34890 }, { "epoch": 10.44, "grad_norm": 8.524049758911133, "learning_rate": 2.328578460702543e-05, "loss": 1.1066, "step": 34895 }, { "epoch": 10.44, "grad_norm": 3.2621653079986572, "learning_rate": 2.3279923271618465e-05, "loss": 1.3073, "step": 34900 }, { "epoch": 10.44, "grad_norm": 1.9567291736602783, "learning_rate": 2.3274062031209563e-05, "loss": 1.0333, "step": 34905 }, { "epoch": 10.44, "grad_norm": 3.069105386734009, "learning_rate": 2.326820088612241e-05, "loss": 1.0295, "step": 34910 }, { "epoch": 10.45, "grad_norm": 1.8950694799423218, "learning_rate": 2.3262339836680727e-05, "loss": 1.0614, "step": 34915 }, { "epoch": 10.45, "grad_norm": 3.2250144481658936, "learning_rate": 2.3256478883208206e-05, "loss": 1.0405, "step": 34920 }, { "epoch": 10.45, "grad_norm": 4.422776699066162, "learning_rate": 2.3250618026028543e-05, "loss": 1.1056, "step": 34925 }, { "epoch": 10.45, "grad_norm": 1.4227927923202515, "learning_rate": 2.3244757265465435e-05, "loss": 1.0868, "step": 34930 }, { "epoch": 10.45, "grad_norm": 2.4411497116088867, "learning_rate": 2.3238896601842548e-05, "loss": 0.9554, "step": 34935 }, { "epoch": 10.45, "grad_norm": 2.067897081375122, "learning_rate": 2.3233036035483587e-05, "loss": 1.1577, "step": 34940 }, { "epoch": 10.46, "grad_norm": 2.7568440437316895, "learning_rate": 2.322717556671219e-05, "loss": 1.0572, "step": 34945 }, { "epoch": 10.46, "grad_norm": 4.667929649353027, "learning_rate": 2.3221315195852058e-05, "loss": 0.9588, "step": 34950 }, { "epoch": 10.46, "grad_norm": 2.4807698726654053, "learning_rate": 2.321545492322684e-05, "loss": 1.0147, "step": 34955 }, { "epoch": 10.46, "grad_norm": 15.608848571777344, "learning_rate": 2.320959474916018e-05, "loss": 1.2589, "step": 34960 }, { "epoch": 10.46, "grad_norm": 2.8121330738067627, "learning_rate": 2.3203734673975753e-05, "loss": 1.0937, "step": 34965 }, { "epoch": 10.46, "grad_norm": 2.748537540435791, "learning_rate": 2.319787469799718e-05, "loss": 0.9264, "step": 34970 }, { "epoch": 10.46, "grad_norm": 3.233227014541626, "learning_rate": 2.3192014821548127e-05, "loss": 1.2414, "step": 34975 }, { "epoch": 10.47, "grad_norm": 1.753865361213684, "learning_rate": 2.3186155044952203e-05, "loss": 1.0152, "step": 34980 }, { "epoch": 10.47, "grad_norm": 3.4801855087280273, "learning_rate": 2.3180295368533063e-05, "loss": 0.9391, "step": 34985 }, { "epoch": 10.47, "grad_norm": 2.416815757751465, "learning_rate": 2.3174435792614318e-05, "loss": 1.2012, "step": 34990 }, { "epoch": 10.47, "grad_norm": 8.600541114807129, "learning_rate": 2.316857631751958e-05, "loss": 1.1342, "step": 34995 }, { "epoch": 10.47, "grad_norm": 3.0248448848724365, "learning_rate": 2.3162716943572465e-05, "loss": 1.0246, "step": 35000 }, { "epoch": 10.47, "grad_norm": 2.2905187606811523, "learning_rate": 2.3156857671096592e-05, "loss": 0.9875, "step": 35005 }, { "epoch": 10.47, "grad_norm": 5.242259979248047, "learning_rate": 2.3150998500415546e-05, "loss": 0.9466, "step": 35010 }, { "epoch": 10.48, "grad_norm": 2.770148992538452, "learning_rate": 2.3145139431852934e-05, "loss": 1.0718, "step": 35015 }, { "epoch": 10.48, "grad_norm": 5.146824836730957, "learning_rate": 2.3139280465732348e-05, "loss": 1.1159, "step": 35020 }, { "epoch": 10.48, "grad_norm": 1.9731031656265259, "learning_rate": 2.3133421602377366e-05, "loss": 0.8097, "step": 35025 }, { "epoch": 10.48, "grad_norm": 1.8398514986038208, "learning_rate": 2.3127562842111565e-05, "loss": 1.1045, "step": 35030 }, { "epoch": 10.48, "grad_norm": 3.5004403591156006, "learning_rate": 2.3121704185258527e-05, "loss": 1.0801, "step": 35035 }, { "epoch": 10.48, "grad_norm": 3.4678022861480713, "learning_rate": 2.3115845632141806e-05, "loss": 1.1383, "step": 35040 }, { "epoch": 10.49, "grad_norm": 2.484443426132202, "learning_rate": 2.310998718308498e-05, "loss": 1.0761, "step": 35045 }, { "epoch": 10.49, "grad_norm": 3.0486910343170166, "learning_rate": 2.31041288384116e-05, "loss": 1.2576, "step": 35050 }, { "epoch": 10.49, "grad_norm": 2.7550971508026123, "learning_rate": 2.3098270598445203e-05, "loss": 0.8973, "step": 35055 }, { "epoch": 10.49, "grad_norm": 1.8515464067459106, "learning_rate": 2.3092412463509357e-05, "loss": 1.0277, "step": 35060 }, { "epoch": 10.49, "grad_norm": 1.7236946821212769, "learning_rate": 2.3086554433927573e-05, "loss": 1.1393, "step": 35065 }, { "epoch": 10.49, "grad_norm": 3.1678953170776367, "learning_rate": 2.308069651002341e-05, "loss": 1.0378, "step": 35070 }, { "epoch": 10.49, "grad_norm": 1.7023390531539917, "learning_rate": 2.3074838692120378e-05, "loss": 1.0801, "step": 35075 }, { "epoch": 10.5, "grad_norm": 3.274125814437866, "learning_rate": 2.306898098054201e-05, "loss": 1.027, "step": 35080 }, { "epoch": 10.5, "grad_norm": 4.002904415130615, "learning_rate": 2.306312337561181e-05, "loss": 0.9389, "step": 35085 }, { "epoch": 10.5, "grad_norm": 3.656179904937744, "learning_rate": 2.305726587765329e-05, "loss": 1.0733, "step": 35090 }, { "epoch": 10.5, "grad_norm": 5.379166126251221, "learning_rate": 2.305140848698996e-05, "loss": 1.0789, "step": 35095 }, { "epoch": 10.5, "grad_norm": 1.2176295518875122, "learning_rate": 2.3045551203945314e-05, "loss": 1.0208, "step": 35100 }, { "epoch": 10.5, "grad_norm": 2.7093119621276855, "learning_rate": 2.3039694028842847e-05, "loss": 0.9681, "step": 35105 }, { "epoch": 10.5, "grad_norm": 9.889016151428223, "learning_rate": 2.3033836962006033e-05, "loss": 0.8443, "step": 35110 }, { "epoch": 10.51, "grad_norm": 4.702127456665039, "learning_rate": 2.3027980003758366e-05, "loss": 0.9901, "step": 35115 }, { "epoch": 10.51, "grad_norm": 5.113590240478516, "learning_rate": 2.3022123154423316e-05, "loss": 1.1291, "step": 35120 }, { "epoch": 10.51, "grad_norm": 1.5303608179092407, "learning_rate": 2.301626641432434e-05, "loss": 1.1448, "step": 35125 }, { "epoch": 10.51, "grad_norm": 2.7038862705230713, "learning_rate": 2.3010409783784913e-05, "loss": 1.049, "step": 35130 }, { "epoch": 10.51, "grad_norm": 3.5487146377563477, "learning_rate": 2.3004553263128483e-05, "loss": 1.0263, "step": 35135 }, { "epoch": 10.51, "grad_norm": 1.9222893714904785, "learning_rate": 2.299869685267851e-05, "loss": 1.0709, "step": 35140 }, { "epoch": 10.51, "grad_norm": 3.621490716934204, "learning_rate": 2.2992840552758428e-05, "loss": 1.1768, "step": 35145 }, { "epoch": 10.52, "grad_norm": 1.7279472351074219, "learning_rate": 2.2986984363691663e-05, "loss": 1.0183, "step": 35150 }, { "epoch": 10.52, "grad_norm": 1.8752796649932861, "learning_rate": 2.2981128285801672e-05, "loss": 1.0267, "step": 35155 }, { "epoch": 10.52, "grad_norm": 2.7973175048828125, "learning_rate": 2.297527231941186e-05, "loss": 1.1971, "step": 35160 }, { "epoch": 10.52, "grad_norm": 1.7248725891113281, "learning_rate": 2.296941646484566e-05, "loss": 0.9569, "step": 35165 }, { "epoch": 10.52, "grad_norm": 2.4975392818450928, "learning_rate": 2.2963560722426468e-05, "loss": 1.111, "step": 35170 }, { "epoch": 10.52, "grad_norm": 3.2511773109436035, "learning_rate": 2.295770509247771e-05, "loss": 1.066, "step": 35175 }, { "epoch": 10.53, "grad_norm": 2.9692437648773193, "learning_rate": 2.2951849575322772e-05, "loss": 0.99, "step": 35180 }, { "epoch": 10.53, "grad_norm": 2.788581371307373, "learning_rate": 2.2945994171285058e-05, "loss": 1.1557, "step": 35185 }, { "epoch": 10.53, "grad_norm": 3.816265106201172, "learning_rate": 2.2940138880687946e-05, "loss": 1.0066, "step": 35190 }, { "epoch": 10.53, "grad_norm": 5.253087520599365, "learning_rate": 2.2934283703854823e-05, "loss": 0.9963, "step": 35195 }, { "epoch": 10.53, "grad_norm": 2.2299044132232666, "learning_rate": 2.2928428641109065e-05, "loss": 1.1243, "step": 35200 }, { "epoch": 10.53, "grad_norm": 4.349503993988037, "learning_rate": 2.2922573692774034e-05, "loss": 1.1135, "step": 35205 }, { "epoch": 10.53, "grad_norm": 1.9277504682540894, "learning_rate": 2.2916718859173108e-05, "loss": 1.0192, "step": 35210 }, { "epoch": 10.54, "grad_norm": 2.2060022354125977, "learning_rate": 2.2910864140629634e-05, "loss": 1.1672, "step": 35215 }, { "epoch": 10.54, "grad_norm": 3.025515556335449, "learning_rate": 2.2905009537466955e-05, "loss": 0.9311, "step": 35220 }, { "epoch": 10.54, "grad_norm": 3.500619649887085, "learning_rate": 2.2899155050008428e-05, "loss": 1.1652, "step": 35225 }, { "epoch": 10.54, "grad_norm": 8.94033145904541, "learning_rate": 2.2893300678577376e-05, "loss": 1.251, "step": 35230 }, { "epoch": 10.54, "grad_norm": 3.2193968296051025, "learning_rate": 2.2887446423497147e-05, "loss": 0.9508, "step": 35235 }, { "epoch": 10.54, "grad_norm": 3.0045528411865234, "learning_rate": 2.2881592285091045e-05, "loss": 0.9492, "step": 35240 }, { "epoch": 10.54, "grad_norm": 3.3813045024871826, "learning_rate": 2.2875738263682413e-05, "loss": 0.9394, "step": 35245 }, { "epoch": 10.55, "grad_norm": 1.6937832832336426, "learning_rate": 2.2869884359594545e-05, "loss": 1.1169, "step": 35250 }, { "epoch": 10.55, "grad_norm": 1.0461424589157104, "learning_rate": 2.2864030573150738e-05, "loss": 1.2219, "step": 35255 }, { "epoch": 10.55, "grad_norm": 2.934732675552368, "learning_rate": 2.2858176904674317e-05, "loss": 1.14, "step": 35260 }, { "epoch": 10.55, "grad_norm": 4.22146463394165, "learning_rate": 2.2852323354488548e-05, "loss": 1.103, "step": 35265 }, { "epoch": 10.55, "grad_norm": 3.387679100036621, "learning_rate": 2.2846469922916736e-05, "loss": 1.0281, "step": 35270 }, { "epoch": 10.55, "grad_norm": 2.5129520893096924, "learning_rate": 2.284061661028215e-05, "loss": 0.9338, "step": 35275 }, { "epoch": 10.56, "grad_norm": 1.3689956665039062, "learning_rate": 2.2834763416908057e-05, "loss": 1.1322, "step": 35280 }, { "epoch": 10.56, "grad_norm": 1.68153977394104, "learning_rate": 2.2828910343117734e-05, "loss": 1.1103, "step": 35285 }, { "epoch": 10.56, "grad_norm": 3.7177228927612305, "learning_rate": 2.2823057389234432e-05, "loss": 1.1346, "step": 35290 }, { "epoch": 10.56, "grad_norm": 1.6719677448272705, "learning_rate": 2.281720455558142e-05, "loss": 1.0287, "step": 35295 }, { "epoch": 10.56, "grad_norm": 2.740884780883789, "learning_rate": 2.2811351842481916e-05, "loss": 1.0737, "step": 35300 }, { "epoch": 10.56, "grad_norm": 3.8541574478149414, "learning_rate": 2.280549925025919e-05, "loss": 1.045, "step": 35305 }, { "epoch": 10.56, "grad_norm": 3.7161831855773926, "learning_rate": 2.2799646779236454e-05, "loss": 1.1965, "step": 35310 }, { "epoch": 10.57, "grad_norm": 4.742947578430176, "learning_rate": 2.2793794429736933e-05, "loss": 1.0853, "step": 35315 }, { "epoch": 10.57, "grad_norm": 1.507947564125061, "learning_rate": 2.278794220208386e-05, "loss": 0.9959, "step": 35320 }, { "epoch": 10.57, "grad_norm": 1.6198567152023315, "learning_rate": 2.2782090096600434e-05, "loss": 1.0109, "step": 35325 }, { "epoch": 10.57, "grad_norm": 1.7230312824249268, "learning_rate": 2.277623811360987e-05, "loss": 1.1351, "step": 35330 }, { "epoch": 10.57, "grad_norm": 4.703332901000977, "learning_rate": 2.2770386253435358e-05, "loss": 1.0449, "step": 35335 }, { "epoch": 10.57, "grad_norm": 2.2942113876342773, "learning_rate": 2.2764534516400106e-05, "loss": 1.1787, "step": 35340 }, { "epoch": 10.57, "grad_norm": 4.3071513175964355, "learning_rate": 2.275868290282729e-05, "loss": 1.092, "step": 35345 }, { "epoch": 10.58, "grad_norm": 2.7554285526275635, "learning_rate": 2.2752831413040074e-05, "loss": 1.1289, "step": 35350 }, { "epoch": 10.58, "grad_norm": 2.5249757766723633, "learning_rate": 2.2746980047361654e-05, "loss": 1.0906, "step": 35355 }, { "epoch": 10.58, "grad_norm": 2.719829559326172, "learning_rate": 2.2741128806115176e-05, "loss": 0.9871, "step": 35360 }, { "epoch": 10.58, "grad_norm": 2.8020482063293457, "learning_rate": 2.273527768962381e-05, "loss": 1.1126, "step": 35365 }, { "epoch": 10.58, "grad_norm": 2.6041548252105713, "learning_rate": 2.2729426698210703e-05, "loss": 1.1024, "step": 35370 }, { "epoch": 10.58, "grad_norm": 3.7162394523620605, "learning_rate": 2.2723575832198997e-05, "loss": 0.9589, "step": 35375 }, { "epoch": 10.59, "grad_norm": 25.685956954956055, "learning_rate": 2.2717725091911843e-05, "loss": 0.9937, "step": 35380 }, { "epoch": 10.59, "grad_norm": 4.040263652801514, "learning_rate": 2.2711874477672342e-05, "loss": 0.9, "step": 35385 }, { "epoch": 10.59, "grad_norm": 2.358093023300171, "learning_rate": 2.2706023989803653e-05, "loss": 1.068, "step": 35390 }, { "epoch": 10.59, "grad_norm": 3.6727354526519775, "learning_rate": 2.270017362862886e-05, "loss": 0.9494, "step": 35395 }, { "epoch": 10.59, "grad_norm": 2.7660393714904785, "learning_rate": 2.2694323394471097e-05, "loss": 0.9925, "step": 35400 }, { "epoch": 10.59, "grad_norm": 1.4063373804092407, "learning_rate": 2.2688473287653457e-05, "loss": 1.2022, "step": 35405 }, { "epoch": 10.59, "grad_norm": 2.7881920337677, "learning_rate": 2.2682623308499023e-05, "loss": 0.9778, "step": 35410 }, { "epoch": 10.6, "grad_norm": 2.657111644744873, "learning_rate": 2.267677345733091e-05, "loss": 1.0118, "step": 35415 }, { "epoch": 10.6, "grad_norm": 2.8737306594848633, "learning_rate": 2.267092373447217e-05, "loss": 0.9643, "step": 35420 }, { "epoch": 10.6, "grad_norm": 3.328007459640503, "learning_rate": 2.26650741402459e-05, "loss": 1.0657, "step": 35425 }, { "epoch": 10.6, "grad_norm": 4.150506019592285, "learning_rate": 2.265922467497515e-05, "loss": 1.0437, "step": 35430 }, { "epoch": 10.6, "grad_norm": 1.4019544124603271, "learning_rate": 2.2653375338983e-05, "loss": 1.1049, "step": 35435 }, { "epoch": 10.6, "grad_norm": 2.7139785289764404, "learning_rate": 2.264752613259249e-05, "loss": 1.0097, "step": 35440 }, { "epoch": 10.6, "grad_norm": 1.9748849868774414, "learning_rate": 2.2641677056126654e-05, "loss": 1.1295, "step": 35445 }, { "epoch": 10.61, "grad_norm": 3.615483283996582, "learning_rate": 2.263582810990855e-05, "loss": 1.0361, "step": 35450 }, { "epoch": 10.61, "grad_norm": 2.8796679973602295, "learning_rate": 2.26299792942612e-05, "loss": 1.1339, "step": 35455 }, { "epoch": 10.61, "grad_norm": 1.1939923763275146, "learning_rate": 2.262413060950763e-05, "loss": 1.0356, "step": 35460 }, { "epoch": 10.61, "grad_norm": 7.1389994621276855, "learning_rate": 2.261828205597086e-05, "loss": 1.0418, "step": 35465 }, { "epoch": 10.61, "grad_norm": 1.4644900560379028, "learning_rate": 2.2612433633973896e-05, "loss": 1.0545, "step": 35470 }, { "epoch": 10.61, "grad_norm": 1.3660115003585815, "learning_rate": 2.2606585343839744e-05, "loss": 1.0775, "step": 35475 }, { "epoch": 10.62, "grad_norm": 2.621532440185547, "learning_rate": 2.2600737185891385e-05, "loss": 1.1644, "step": 35480 }, { "epoch": 10.62, "grad_norm": 6.380326271057129, "learning_rate": 2.2594889160451828e-05, "loss": 1.1348, "step": 35485 }, { "epoch": 10.62, "grad_norm": 2.7322096824645996, "learning_rate": 2.2589041267844034e-05, "loss": 1.097, "step": 35490 }, { "epoch": 10.62, "grad_norm": 3.6032140254974365, "learning_rate": 2.2583193508390993e-05, "loss": 1.0526, "step": 35495 }, { "epoch": 10.62, "grad_norm": 2.900298833847046, "learning_rate": 2.2577345882415663e-05, "loss": 0.9409, "step": 35500 }, { "epoch": 10.62, "grad_norm": 1.8747291564941406, "learning_rate": 2.257149839024099e-05, "loss": 1.1623, "step": 35505 }, { "epoch": 10.62, "grad_norm": 2.2065136432647705, "learning_rate": 2.2565651032189948e-05, "loss": 0.9874, "step": 35510 }, { "epoch": 10.63, "grad_norm": 2.0098252296447754, "learning_rate": 2.255980380858546e-05, "loss": 1.1739, "step": 35515 }, { "epoch": 10.63, "grad_norm": 3.0822856426239014, "learning_rate": 2.2553956719750483e-05, "loss": 1.1192, "step": 35520 }, { "epoch": 10.63, "grad_norm": 2.1216416358947754, "learning_rate": 2.254810976600792e-05, "loss": 0.9649, "step": 35525 }, { "epoch": 10.63, "grad_norm": 1.3982808589935303, "learning_rate": 2.254226294768072e-05, "loss": 1.0809, "step": 35530 }, { "epoch": 10.63, "grad_norm": 1.7782407999038696, "learning_rate": 2.2536416265091775e-05, "loss": 1.1739, "step": 35535 }, { "epoch": 10.63, "grad_norm": 3.666782855987549, "learning_rate": 2.2530569718563998e-05, "loss": 1.1696, "step": 35540 }, { "epoch": 10.63, "grad_norm": 2.729365587234497, "learning_rate": 2.252472330842029e-05, "loss": 1.0824, "step": 35545 }, { "epoch": 10.64, "grad_norm": 2.964341163635254, "learning_rate": 2.251887703498354e-05, "loss": 1.2195, "step": 35550 }, { "epoch": 10.64, "grad_norm": 3.209306240081787, "learning_rate": 2.2513030898576635e-05, "loss": 1.1367, "step": 35555 }, { "epoch": 10.64, "grad_norm": 3.5836918354034424, "learning_rate": 2.2507184899522447e-05, "loss": 1.142, "step": 35560 }, { "epoch": 10.64, "grad_norm": 1.1011484861373901, "learning_rate": 2.2501339038143843e-05, "loss": 1.11, "step": 35565 }, { "epoch": 10.64, "grad_norm": 1.9480361938476562, "learning_rate": 2.2495493314763697e-05, "loss": 1.0384, "step": 35570 }, { "epoch": 10.64, "grad_norm": 1.9894143342971802, "learning_rate": 2.2489647729704838e-05, "loss": 1.0765, "step": 35575 }, { "epoch": 10.65, "grad_norm": 2.3497085571289062, "learning_rate": 2.2483802283290137e-05, "loss": 1.0708, "step": 35580 }, { "epoch": 10.65, "grad_norm": 2.3375086784362793, "learning_rate": 2.2477956975842407e-05, "loss": 1.041, "step": 35585 }, { "epoch": 10.65, "grad_norm": 4.431266784667969, "learning_rate": 2.2472111807684507e-05, "loss": 1.0047, "step": 35590 }, { "epoch": 10.65, "grad_norm": 6.045180797576904, "learning_rate": 2.246626677913923e-05, "loss": 1.0029, "step": 35595 }, { "epoch": 10.65, "grad_norm": 2.7329964637756348, "learning_rate": 2.2460421890529417e-05, "loss": 1.0755, "step": 35600 }, { "epoch": 10.65, "grad_norm": 3.588158130645752, "learning_rate": 2.2454577142177865e-05, "loss": 0.8856, "step": 35605 }, { "epoch": 10.65, "grad_norm": 2.5146617889404297, "learning_rate": 2.244873253440736e-05, "loss": 1.2, "step": 35610 }, { "epoch": 10.66, "grad_norm": 5.354367733001709, "learning_rate": 2.2442888067540715e-05, "loss": 1.0349, "step": 35615 }, { "epoch": 10.66, "grad_norm": 1.597602367401123, "learning_rate": 2.243704374190069e-05, "loss": 1.1549, "step": 35620 }, { "epoch": 10.66, "grad_norm": 1.7936272621154785, "learning_rate": 2.2431199557810092e-05, "loss": 1.0453, "step": 35625 }, { "epoch": 10.66, "grad_norm": 1.9876726865768433, "learning_rate": 2.2425355515591666e-05, "loss": 1.0696, "step": 35630 }, { "epoch": 10.66, "grad_norm": 3.0657265186309814, "learning_rate": 2.241951161556818e-05, "loss": 0.9361, "step": 35635 }, { "epoch": 10.66, "grad_norm": 4.501842021942139, "learning_rate": 2.241366785806238e-05, "loss": 1.1468, "step": 35640 }, { "epoch": 10.66, "grad_norm": 3.099733829498291, "learning_rate": 2.240782424339702e-05, "loss": 0.9717, "step": 35645 }, { "epoch": 10.67, "grad_norm": 4.878641605377197, "learning_rate": 2.2401980771894828e-05, "loss": 0.9771, "step": 35650 }, { "epoch": 10.67, "grad_norm": 3.61327862739563, "learning_rate": 2.2396137443878534e-05, "loss": 1.0749, "step": 35655 }, { "epoch": 10.67, "grad_norm": 5.230576038360596, "learning_rate": 2.2390294259670877e-05, "loss": 1.1493, "step": 35660 }, { "epoch": 10.67, "grad_norm": 1.4460947513580322, "learning_rate": 2.238445121959455e-05, "loss": 1.2674, "step": 35665 }, { "epoch": 10.67, "grad_norm": 2.8116650581359863, "learning_rate": 2.2378608323972255e-05, "loss": 1.0535, "step": 35670 }, { "epoch": 10.67, "grad_norm": 3.138775587081909, "learning_rate": 2.2372765573126712e-05, "loss": 0.8282, "step": 35675 }, { "epoch": 10.68, "grad_norm": 2.2588982582092285, "learning_rate": 2.236692296738058e-05, "loss": 1.1188, "step": 35680 }, { "epoch": 10.68, "grad_norm": 2.0676910877227783, "learning_rate": 2.2361080507056565e-05, "loss": 1.1425, "step": 35685 }, { "epoch": 10.68, "grad_norm": 7.246330738067627, "learning_rate": 2.2355238192477324e-05, "loss": 0.9489, "step": 35690 }, { "epoch": 10.68, "grad_norm": 3.171942710876465, "learning_rate": 2.234939602396554e-05, "loss": 1.1224, "step": 35695 }, { "epoch": 10.68, "grad_norm": 2.2110137939453125, "learning_rate": 2.2343554001843857e-05, "loss": 1.1013, "step": 35700 }, { "epoch": 10.68, "grad_norm": 2.53434157371521, "learning_rate": 2.233771212643491e-05, "loss": 0.9745, "step": 35705 }, { "epoch": 10.68, "grad_norm": 2.9125607013702393, "learning_rate": 2.2331870398061372e-05, "loss": 1.3203, "step": 35710 }, { "epoch": 10.69, "grad_norm": 3.6007089614868164, "learning_rate": 2.2326028817045842e-05, "loss": 1.0354, "step": 35715 }, { "epoch": 10.69, "grad_norm": 4.191182613372803, "learning_rate": 2.2320187383710978e-05, "loss": 1.0592, "step": 35720 }, { "epoch": 10.69, "grad_norm": 6.260267734527588, "learning_rate": 2.2314346098379367e-05, "loss": 1.1301, "step": 35725 }, { "epoch": 10.69, "grad_norm": 2.3227128982543945, "learning_rate": 2.230850496137363e-05, "loss": 0.998, "step": 35730 }, { "epoch": 10.69, "grad_norm": 2.7904467582702637, "learning_rate": 2.2302663973016374e-05, "loss": 1.0603, "step": 35735 }, { "epoch": 10.69, "grad_norm": 5.104443550109863, "learning_rate": 2.2296823133630174e-05, "loss": 0.9873, "step": 35740 }, { "epoch": 10.69, "grad_norm": 5.6274094581604, "learning_rate": 2.2290982443537633e-05, "loss": 0.9872, "step": 35745 }, { "epoch": 10.7, "grad_norm": 1.1891549825668335, "learning_rate": 2.2285141903061304e-05, "loss": 1.0641, "step": 35750 }, { "epoch": 10.7, "grad_norm": 2.298877000808716, "learning_rate": 2.2279301512523778e-05, "loss": 1.1714, "step": 35755 }, { "epoch": 10.7, "grad_norm": 2.9934897422790527, "learning_rate": 2.22734612722476e-05, "loss": 0.8837, "step": 35760 }, { "epoch": 10.7, "grad_norm": 3.266824722290039, "learning_rate": 2.2267621182555313e-05, "loss": 0.9443, "step": 35765 }, { "epoch": 10.7, "grad_norm": 6.283466815948486, "learning_rate": 2.2261781243769478e-05, "loss": 1.1074, "step": 35770 }, { "epoch": 10.7, "grad_norm": 1.5854196548461914, "learning_rate": 2.2255941456212606e-05, "loss": 0.9736, "step": 35775 }, { "epoch": 10.7, "grad_norm": 4.49138069152832, "learning_rate": 2.2250101820207246e-05, "loss": 1.1243, "step": 35780 }, { "epoch": 10.71, "grad_norm": 3.611423969268799, "learning_rate": 2.2244262336075896e-05, "loss": 1.1206, "step": 35785 }, { "epoch": 10.71, "grad_norm": 2.668792247772217, "learning_rate": 2.2238423004141083e-05, "loss": 1.3554, "step": 35790 }, { "epoch": 10.71, "grad_norm": 1.8896316289901733, "learning_rate": 2.2232583824725296e-05, "loss": 1.036, "step": 35795 }, { "epoch": 10.71, "grad_norm": 2.4683837890625, "learning_rate": 2.2226744798151017e-05, "loss": 1.2378, "step": 35800 }, { "epoch": 10.71, "grad_norm": 19.00712776184082, "learning_rate": 2.2220905924740752e-05, "loss": 0.9128, "step": 35805 }, { "epoch": 10.71, "grad_norm": 3.103806972503662, "learning_rate": 2.221506720481695e-05, "loss": 1.1218, "step": 35810 }, { "epoch": 10.72, "grad_norm": 5.2492756843566895, "learning_rate": 2.2209228638702098e-05, "loss": 1.0806, "step": 35815 }, { "epoch": 10.72, "grad_norm": 2.3563156127929688, "learning_rate": 2.2203390226718652e-05, "loss": 1.1438, "step": 35820 }, { "epoch": 10.72, "grad_norm": 1.5582448244094849, "learning_rate": 2.2197551969189052e-05, "loss": 1.194, "step": 35825 }, { "epoch": 10.72, "grad_norm": 3.394693613052368, "learning_rate": 2.219171386643575e-05, "loss": 1.1377, "step": 35830 }, { "epoch": 10.72, "grad_norm": 2.7966670989990234, "learning_rate": 2.2185875918781163e-05, "loss": 1.0573, "step": 35835 }, { "epoch": 10.72, "grad_norm": 12.529913902282715, "learning_rate": 2.218003812654773e-05, "loss": 0.8754, "step": 35840 }, { "epoch": 10.72, "grad_norm": 4.540731430053711, "learning_rate": 2.2174200490057856e-05, "loss": 0.9227, "step": 35845 }, { "epoch": 10.73, "grad_norm": 2.8158795833587646, "learning_rate": 2.2168363009633958e-05, "loss": 0.9375, "step": 35850 }, { "epoch": 10.73, "grad_norm": 3.6659469604492188, "learning_rate": 2.216252568559843e-05, "loss": 1.2302, "step": 35855 }, { "epoch": 10.73, "grad_norm": 2.3665411472320557, "learning_rate": 2.2156688518273648e-05, "loss": 0.997, "step": 35860 }, { "epoch": 10.73, "grad_norm": 3.4320127964019775, "learning_rate": 2.2150851507982014e-05, "loss": 1.1298, "step": 35865 }, { "epoch": 10.73, "grad_norm": 2.856066942214966, "learning_rate": 2.2145014655045876e-05, "loss": 1.2628, "step": 35870 }, { "epoch": 10.73, "grad_norm": 2.302837610244751, "learning_rate": 2.2139177959787625e-05, "loss": 0.997, "step": 35875 }, { "epoch": 10.73, "grad_norm": 3.4538989067077637, "learning_rate": 2.213334142252959e-05, "loss": 0.9814, "step": 35880 }, { "epoch": 10.74, "grad_norm": 1.6373672485351562, "learning_rate": 2.212750504359414e-05, "loss": 1.069, "step": 35885 }, { "epoch": 10.74, "grad_norm": 1.9250200986862183, "learning_rate": 2.2121668823303595e-05, "loss": 1.0603, "step": 35890 }, { "epoch": 10.74, "grad_norm": 2.419745445251465, "learning_rate": 2.211583276198029e-05, "loss": 1.1043, "step": 35895 }, { "epoch": 10.74, "grad_norm": 1.965515375137329, "learning_rate": 2.210999685994654e-05, "loss": 0.9786, "step": 35900 }, { "epoch": 10.74, "grad_norm": 2.6983680725097656, "learning_rate": 2.2104161117524664e-05, "loss": 0.9368, "step": 35905 }, { "epoch": 10.74, "grad_norm": 2.2785370349884033, "learning_rate": 2.2098325535036957e-05, "loss": 1.1098, "step": 35910 }, { "epoch": 10.75, "grad_norm": 2.5998971462249756, "learning_rate": 2.2092490112805715e-05, "loss": 0.9755, "step": 35915 }, { "epoch": 10.75, "grad_norm": 2.66782546043396, "learning_rate": 2.208665485115322e-05, "loss": 1.2246, "step": 35920 }, { "epoch": 10.75, "grad_norm": 2.6380605697631836, "learning_rate": 2.2080819750401757e-05, "loss": 1.1043, "step": 35925 }, { "epoch": 10.75, "grad_norm": 1.2431594133377075, "learning_rate": 2.2074984810873572e-05, "loss": 0.9766, "step": 35930 }, { "epoch": 10.75, "grad_norm": 1.7657440900802612, "learning_rate": 2.2069150032890952e-05, "loss": 1.0666, "step": 35935 }, { "epoch": 10.75, "grad_norm": 1.1442104578018188, "learning_rate": 2.2064482327034192e-05, "loss": 0.958, "step": 35940 }, { "epoch": 10.75, "grad_norm": 4.242776393890381, "learning_rate": 2.2058647840645615e-05, "loss": 1.0115, "step": 35945 }, { "epoch": 10.76, "grad_norm": 2.7217624187469482, "learning_rate": 2.2052813516704852e-05, "loss": 1.0799, "step": 35950 }, { "epoch": 10.76, "grad_norm": 2.8261914253234863, "learning_rate": 2.204697935553414e-05, "loss": 1.1698, "step": 35955 }, { "epoch": 10.76, "grad_norm": 1.2735272645950317, "learning_rate": 2.2041145357455684e-05, "loss": 0.9582, "step": 35960 }, { "epoch": 10.76, "grad_norm": 2.095346212387085, "learning_rate": 2.2035311522791705e-05, "loss": 1.0745, "step": 35965 }, { "epoch": 10.76, "grad_norm": 3.746208906173706, "learning_rate": 2.2029477851864376e-05, "loss": 0.9504, "step": 35970 }, { "epoch": 10.76, "grad_norm": 32.70151138305664, "learning_rate": 2.2023644344995907e-05, "loss": 1.1023, "step": 35975 }, { "epoch": 10.76, "grad_norm": 2.321796417236328, "learning_rate": 2.2017811002508466e-05, "loss": 1.1198, "step": 35980 }, { "epoch": 10.77, "grad_norm": 2.214085102081299, "learning_rate": 2.201197782472422e-05, "loss": 1.0783, "step": 35985 }, { "epoch": 10.77, "grad_norm": 2.9954833984375, "learning_rate": 2.2006144811965336e-05, "loss": 1.059, "step": 35990 }, { "epoch": 10.77, "grad_norm": 1.0794235467910767, "learning_rate": 2.2000311964553954e-05, "loss": 1.1247, "step": 35995 }, { "epoch": 10.77, "grad_norm": 5.492161750793457, "learning_rate": 2.1994479282812236e-05, "loss": 0.9969, "step": 36000 }, { "epoch": 10.77, "grad_norm": 1.0977524518966675, "learning_rate": 2.19886467670623e-05, "loss": 0.9352, "step": 36005 }, { "epoch": 10.77, "grad_norm": 2.7051050662994385, "learning_rate": 2.1982814417626265e-05, "loss": 0.7758, "step": 36010 }, { "epoch": 10.78, "grad_norm": 2.0435142517089844, "learning_rate": 2.1976982234826265e-05, "loss": 1.0474, "step": 36015 }, { "epoch": 10.78, "grad_norm": 3.4940507411956787, "learning_rate": 2.197115021898438e-05, "loss": 1.0514, "step": 36020 }, { "epoch": 10.78, "grad_norm": 2.082819700241089, "learning_rate": 2.1965318370422735e-05, "loss": 1.0285, "step": 36025 }, { "epoch": 10.78, "grad_norm": 2.880784511566162, "learning_rate": 2.195948668946339e-05, "loss": 1.2136, "step": 36030 }, { "epoch": 10.78, "grad_norm": 3.054262161254883, "learning_rate": 2.1953655176428445e-05, "loss": 1.0397, "step": 36035 }, { "epoch": 10.78, "grad_norm": 3.0019352436065674, "learning_rate": 2.194782383163996e-05, "loss": 1.11, "step": 36040 }, { "epoch": 10.78, "grad_norm": 0.9828662276268005, "learning_rate": 2.194199265541998e-05, "loss": 1.0822, "step": 36045 }, { "epoch": 10.79, "grad_norm": 3.027097702026367, "learning_rate": 2.1936161648090575e-05, "loss": 0.9884, "step": 36050 }, { "epoch": 10.79, "grad_norm": 6.978221893310547, "learning_rate": 2.193033080997377e-05, "loss": 1.0059, "step": 36055 }, { "epoch": 10.79, "grad_norm": 3.2999072074890137, "learning_rate": 2.192450014139161e-05, "loss": 1.2139, "step": 36060 }, { "epoch": 10.79, "grad_norm": 2.107088804244995, "learning_rate": 2.1918669642666106e-05, "loss": 0.9479, "step": 36065 }, { "epoch": 10.79, "grad_norm": 1.4788484573364258, "learning_rate": 2.1912839314119276e-05, "loss": 1.0025, "step": 36070 }, { "epoch": 10.79, "grad_norm": 1.6675289869308472, "learning_rate": 2.190700915607313e-05, "loss": 1.1289, "step": 36075 }, { "epoch": 10.79, "grad_norm": 4.08076286315918, "learning_rate": 2.190117916884964e-05, "loss": 1.1822, "step": 36080 }, { "epoch": 10.8, "grad_norm": 9.089180946350098, "learning_rate": 2.1895349352770816e-05, "loss": 1.0968, "step": 36085 }, { "epoch": 10.8, "grad_norm": 4.459921360015869, "learning_rate": 2.188951970815861e-05, "loss": 0.8567, "step": 36090 }, { "epoch": 10.8, "grad_norm": 3.4034626483917236, "learning_rate": 2.1883690235335004e-05, "loss": 1.05, "step": 36095 }, { "epoch": 10.8, "grad_norm": 4.486004829406738, "learning_rate": 2.187786093462194e-05, "loss": 1.04, "step": 36100 }, { "epoch": 10.8, "grad_norm": 2.139437675476074, "learning_rate": 2.187203180634138e-05, "loss": 0.9213, "step": 36105 }, { "epoch": 10.8, "grad_norm": 2.6758251190185547, "learning_rate": 2.1866202850815254e-05, "loss": 1.0313, "step": 36110 }, { "epoch": 10.81, "grad_norm": 2.4901866912841797, "learning_rate": 2.1860374068365473e-05, "loss": 1.0219, "step": 36115 }, { "epoch": 10.81, "grad_norm": 4.980169296264648, "learning_rate": 2.1854545459313985e-05, "loss": 1.1223, "step": 36120 }, { "epoch": 10.81, "grad_norm": 2.8188419342041016, "learning_rate": 2.1848717023982667e-05, "loss": 1.0865, "step": 36125 }, { "epoch": 10.81, "grad_norm": 6.227662086486816, "learning_rate": 2.1842888762693444e-05, "loss": 1.1196, "step": 36130 }, { "epoch": 10.81, "grad_norm": 2.4889774322509766, "learning_rate": 2.1837060675768196e-05, "loss": 1.0085, "step": 36135 }, { "epoch": 10.81, "grad_norm": 2.1352288722991943, "learning_rate": 2.1831232763528783e-05, "loss": 1.0865, "step": 36140 }, { "epoch": 10.81, "grad_norm": 1.6987781524658203, "learning_rate": 2.1825405026297103e-05, "loss": 1.1085, "step": 36145 }, { "epoch": 10.82, "grad_norm": 2.239659309387207, "learning_rate": 2.1819577464394992e-05, "loss": 1.2226, "step": 36150 }, { "epoch": 10.82, "grad_norm": 2.6719276905059814, "learning_rate": 2.181375007814432e-05, "loss": 1.2125, "step": 36155 }, { "epoch": 10.82, "grad_norm": 1.225275993347168, "learning_rate": 2.180792286786692e-05, "loss": 1.0033, "step": 36160 }, { "epoch": 10.82, "grad_norm": 4.794998645782471, "learning_rate": 2.180209583388462e-05, "loss": 1.0673, "step": 36165 }, { "epoch": 10.82, "grad_norm": 2.080230951309204, "learning_rate": 2.179626897651925e-05, "loss": 1.067, "step": 36170 }, { "epoch": 10.82, "grad_norm": 1.528211236000061, "learning_rate": 2.17904422960926e-05, "loss": 1.1119, "step": 36175 }, { "epoch": 10.82, "grad_norm": 4.195923328399658, "learning_rate": 2.17846157929265e-05, "loss": 1.2904, "step": 36180 }, { "epoch": 10.83, "grad_norm": 3.0591914653778076, "learning_rate": 2.1778789467342713e-05, "loss": 1.1747, "step": 36185 }, { "epoch": 10.83, "grad_norm": 9.022075653076172, "learning_rate": 2.177296331966305e-05, "loss": 1.0482, "step": 36190 }, { "epoch": 10.83, "grad_norm": 3.723809003829956, "learning_rate": 2.1767137350209253e-05, "loss": 1.0859, "step": 36195 }, { "epoch": 10.83, "grad_norm": 2.5620508193969727, "learning_rate": 2.1761311559303117e-05, "loss": 0.9736, "step": 36200 }, { "epoch": 10.83, "grad_norm": 3.8153581619262695, "learning_rate": 2.1755485947266375e-05, "loss": 1.2724, "step": 36205 }, { "epoch": 10.83, "grad_norm": 1.3778871297836304, "learning_rate": 2.174966051442076e-05, "loss": 1.1058, "step": 36210 }, { "epoch": 10.84, "grad_norm": 3.073227643966675, "learning_rate": 2.174383526108803e-05, "loss": 1.0846, "step": 36215 }, { "epoch": 10.84, "grad_norm": 1.9298650026321411, "learning_rate": 2.1738010187589878e-05, "loss": 0.9319, "step": 36220 }, { "epoch": 10.84, "grad_norm": 1.1409422159194946, "learning_rate": 2.1732185294248045e-05, "loss": 1.1355, "step": 36225 }, { "epoch": 10.84, "grad_norm": 2.380774974822998, "learning_rate": 2.1726360581384218e-05, "loss": 1.1526, "step": 36230 }, { "epoch": 10.84, "grad_norm": 1.3581106662750244, "learning_rate": 2.172053604932009e-05, "loss": 1.0837, "step": 36235 }, { "epoch": 10.84, "grad_norm": 4.142109394073486, "learning_rate": 2.1714711698377348e-05, "loss": 1.0323, "step": 36240 }, { "epoch": 10.84, "grad_norm": 2.672729015350342, "learning_rate": 2.1708887528877668e-05, "loss": 1.0175, "step": 36245 }, { "epoch": 10.85, "grad_norm": 1.8875662088394165, "learning_rate": 2.1703063541142703e-05, "loss": 1.191, "step": 36250 }, { "epoch": 10.85, "grad_norm": 1.7792450189590454, "learning_rate": 2.1697239735494117e-05, "loss": 0.9895, "step": 36255 }, { "epoch": 10.85, "grad_norm": 2.7931838035583496, "learning_rate": 2.1691416112253547e-05, "loss": 1.0206, "step": 36260 }, { "epoch": 10.85, "grad_norm": 3.638197660446167, "learning_rate": 2.1685592671742626e-05, "loss": 1.1491, "step": 36265 }, { "epoch": 10.85, "grad_norm": 1.3871898651123047, "learning_rate": 2.167976941428297e-05, "loss": 1.109, "step": 36270 }, { "epoch": 10.85, "grad_norm": 2.8166041374206543, "learning_rate": 2.167394634019621e-05, "loss": 1.0765, "step": 36275 }, { "epoch": 10.85, "grad_norm": 2.0707552433013916, "learning_rate": 2.1668123449803924e-05, "loss": 1.0585, "step": 36280 }, { "epoch": 10.86, "grad_norm": 3.0661659240722656, "learning_rate": 2.1662300743427727e-05, "loss": 1.0604, "step": 36285 }, { "epoch": 10.86, "grad_norm": 2.1380584239959717, "learning_rate": 2.1656478221389183e-05, "loss": 1.0168, "step": 36290 }, { "epoch": 10.86, "grad_norm": 2.518028974533081, "learning_rate": 2.1650655884009875e-05, "loss": 1.2193, "step": 36295 }, { "epoch": 10.86, "grad_norm": 1.8819745779037476, "learning_rate": 2.1644833731611366e-05, "loss": 0.9329, "step": 36300 }, { "epoch": 10.86, "grad_norm": 3.2668797969818115, "learning_rate": 2.163901176451519e-05, "loss": 0.9986, "step": 36305 }, { "epoch": 10.86, "grad_norm": 2.280186653137207, "learning_rate": 2.1633189983042916e-05, "loss": 0.8617, "step": 36310 }, { "epoch": 10.87, "grad_norm": 1.871981143951416, "learning_rate": 2.1627368387516043e-05, "loss": 0.9601, "step": 36315 }, { "epoch": 10.87, "grad_norm": 2.4408321380615234, "learning_rate": 2.162154697825612e-05, "loss": 1.0367, "step": 36320 }, { "epoch": 10.87, "grad_norm": 8.324357986450195, "learning_rate": 2.1615725755584636e-05, "loss": 1.1349, "step": 36325 }, { "epoch": 10.87, "grad_norm": 3.0855600833892822, "learning_rate": 2.1609904719823116e-05, "loss": 1.0233, "step": 36330 }, { "epoch": 10.87, "grad_norm": 1.6718279123306274, "learning_rate": 2.1604083871293023e-05, "loss": 1.1736, "step": 36335 }, { "epoch": 10.87, "grad_norm": 2.869089365005493, "learning_rate": 2.159826321031585e-05, "loss": 1.0822, "step": 36340 }, { "epoch": 10.87, "grad_norm": 6.643579483032227, "learning_rate": 2.1592442737213064e-05, "loss": 0.9564, "step": 36345 }, { "epoch": 10.88, "grad_norm": 1.4432473182678223, "learning_rate": 2.1586622452306125e-05, "loss": 1.0499, "step": 36350 }, { "epoch": 10.88, "grad_norm": 3.8422656059265137, "learning_rate": 2.158080235591648e-05, "loss": 0.7816, "step": 36355 }, { "epoch": 10.88, "grad_norm": 2.3019320964813232, "learning_rate": 2.1574982448365568e-05, "loss": 1.1791, "step": 36360 }, { "epoch": 10.88, "grad_norm": 2.5562939643859863, "learning_rate": 2.1569162729974806e-05, "loss": 1.1605, "step": 36365 }, { "epoch": 10.88, "grad_norm": 6.145001411437988, "learning_rate": 2.1563343201065633e-05, "loss": 1.1362, "step": 36370 }, { "epoch": 10.88, "grad_norm": 3.0975069999694824, "learning_rate": 2.1557523861959434e-05, "loss": 0.8223, "step": 36375 }, { "epoch": 10.88, "grad_norm": 3.5439906120300293, "learning_rate": 2.1551704712977623e-05, "loss": 1.0676, "step": 36380 }, { "epoch": 10.89, "grad_norm": 1.0710362195968628, "learning_rate": 2.1545885754441562e-05, "loss": 1.1736, "step": 36385 }, { "epoch": 10.89, "grad_norm": 8.471335411071777, "learning_rate": 2.1540066986672655e-05, "loss": 0.8974, "step": 36390 }, { "epoch": 10.89, "grad_norm": 3.0269336700439453, "learning_rate": 2.153424840999225e-05, "loss": 1.0499, "step": 36395 }, { "epoch": 10.89, "grad_norm": 3.6303861141204834, "learning_rate": 2.1528430024721694e-05, "loss": 1.0851, "step": 36400 }, { "epoch": 10.89, "grad_norm": 4.8882293701171875, "learning_rate": 2.1522611831182348e-05, "loss": 0.8858, "step": 36405 }, { "epoch": 10.89, "grad_norm": 2.3631794452667236, "learning_rate": 2.1516793829695526e-05, "loss": 1.1782, "step": 36410 }, { "epoch": 10.89, "grad_norm": 3.9514851570129395, "learning_rate": 2.151097602058257e-05, "loss": 1.195, "step": 36415 }, { "epoch": 10.9, "grad_norm": 2.633768320083618, "learning_rate": 2.1505158404164773e-05, "loss": 1.1729, "step": 36420 }, { "epoch": 10.9, "grad_norm": 6.990433692932129, "learning_rate": 2.1499340980763456e-05, "loss": 0.9894, "step": 36425 }, { "epoch": 10.9, "grad_norm": 2.7082440853118896, "learning_rate": 2.149352375069989e-05, "loss": 1.0238, "step": 36430 }, { "epoch": 10.9, "grad_norm": 3.5102274417877197, "learning_rate": 2.1487706714295357e-05, "loss": 0.9571, "step": 36435 }, { "epoch": 10.9, "grad_norm": 2.7834293842315674, "learning_rate": 2.1481889871871143e-05, "loss": 1.1934, "step": 36440 }, { "epoch": 10.9, "grad_norm": 1.7707253694534302, "learning_rate": 2.1476073223748485e-05, "loss": 1.0634, "step": 36445 }, { "epoch": 10.91, "grad_norm": 1.9651144742965698, "learning_rate": 2.147025677024865e-05, "loss": 1.3354, "step": 36450 }, { "epoch": 10.91, "grad_norm": 4.304943084716797, "learning_rate": 2.146444051169285e-05, "loss": 0.9292, "step": 36455 }, { "epoch": 10.91, "grad_norm": 3.199054479598999, "learning_rate": 2.1458624448402343e-05, "loss": 0.9946, "step": 36460 }, { "epoch": 10.91, "grad_norm": 1.4956828355789185, "learning_rate": 2.1452808580698325e-05, "loss": 0.9661, "step": 36465 }, { "epoch": 10.91, "grad_norm": 1.1398528814315796, "learning_rate": 2.144699290890199e-05, "loss": 1.0639, "step": 36470 }, { "epoch": 10.91, "grad_norm": 7.341798782348633, "learning_rate": 2.1441177433334552e-05, "loss": 0.8947, "step": 36475 }, { "epoch": 10.91, "grad_norm": 3.4453506469726562, "learning_rate": 2.1435362154317176e-05, "loss": 1.0649, "step": 36480 }, { "epoch": 10.92, "grad_norm": 0.8624611496925354, "learning_rate": 2.1429547072171057e-05, "loss": 1.1476, "step": 36485 }, { "epoch": 10.92, "grad_norm": 5.505244255065918, "learning_rate": 2.1423732187217338e-05, "loss": 0.9781, "step": 36490 }, { "epoch": 10.92, "grad_norm": 3.857640504837036, "learning_rate": 2.1417917499777164e-05, "loss": 1.0794, "step": 36495 }, { "epoch": 10.92, "grad_norm": 3.2706289291381836, "learning_rate": 2.1412103010171693e-05, "loss": 1.0661, "step": 36500 }, { "epoch": 10.92, "grad_norm": 2.5430166721343994, "learning_rate": 2.140628871872203e-05, "loss": 1.0644, "step": 36505 }, { "epoch": 10.92, "grad_norm": 3.8892323970794678, "learning_rate": 2.140047462574932e-05, "loss": 0.9569, "step": 36510 }, { "epoch": 10.92, "grad_norm": 5.289989948272705, "learning_rate": 2.1394660731574643e-05, "loss": 1.04, "step": 36515 }, { "epoch": 10.93, "grad_norm": 1.7710543870925903, "learning_rate": 2.138884703651911e-05, "loss": 1.024, "step": 36520 }, { "epoch": 10.93, "grad_norm": 7.825911998748779, "learning_rate": 2.138303354090381e-05, "loss": 0.9066, "step": 36525 }, { "epoch": 10.93, "grad_norm": 1.9190192222595215, "learning_rate": 2.1377220245049793e-05, "loss": 0.8708, "step": 36530 }, { "epoch": 10.93, "grad_norm": 2.441214084625244, "learning_rate": 2.1371407149278152e-05, "loss": 0.9889, "step": 36535 }, { "epoch": 10.93, "grad_norm": 5.316892623901367, "learning_rate": 2.136559425390991e-05, "loss": 1.0597, "step": 36540 }, { "epoch": 10.93, "grad_norm": 5.2226667404174805, "learning_rate": 2.135978155926613e-05, "loss": 1.0606, "step": 36545 }, { "epoch": 10.94, "grad_norm": 3.5756514072418213, "learning_rate": 2.135396906566782e-05, "loss": 1.1334, "step": 36550 }, { "epoch": 10.94, "grad_norm": 1.187839150428772, "learning_rate": 2.134815677343602e-05, "loss": 1.2043, "step": 36555 }, { "epoch": 10.94, "grad_norm": 5.102145671844482, "learning_rate": 2.1342344682891722e-05, "loss": 0.7688, "step": 36560 }, { "epoch": 10.94, "grad_norm": 4.2323527336120605, "learning_rate": 2.133653279435592e-05, "loss": 1.158, "step": 36565 }, { "epoch": 10.94, "grad_norm": 2.2554049491882324, "learning_rate": 2.1330721108149614e-05, "loss": 0.8822, "step": 36570 }, { "epoch": 10.94, "grad_norm": 2.766228437423706, "learning_rate": 2.132490962459375e-05, "loss": 1.0252, "step": 36575 }, { "epoch": 10.94, "grad_norm": 5.992697238922119, "learning_rate": 2.1319098344009325e-05, "loss": 1.0864, "step": 36580 }, { "epoch": 10.95, "grad_norm": 1.6674883365631104, "learning_rate": 2.1313287266717267e-05, "loss": 0.9115, "step": 36585 }, { "epoch": 10.95, "grad_norm": 1.9288809299468994, "learning_rate": 2.1307476393038517e-05, "loss": 1.0394, "step": 36590 }, { "epoch": 10.95, "grad_norm": 1.3914613723754883, "learning_rate": 2.1301665723294008e-05, "loss": 0.9237, "step": 36595 }, { "epoch": 10.95, "grad_norm": 2.7078752517700195, "learning_rate": 2.129585525780466e-05, "loss": 0.9573, "step": 36600 }, { "epoch": 10.95, "grad_norm": 1.410092830657959, "learning_rate": 2.1290044996891373e-05, "loss": 0.9738, "step": 36605 }, { "epoch": 10.95, "grad_norm": 2.085670232772827, "learning_rate": 2.1284234940875045e-05, "loss": 1.0609, "step": 36610 }, { "epoch": 10.95, "grad_norm": 3.979780435562134, "learning_rate": 2.1278425090076558e-05, "loss": 1.1589, "step": 36615 }, { "epoch": 10.96, "grad_norm": 4.352084636688232, "learning_rate": 2.1272615444816792e-05, "loss": 1.0897, "step": 36620 }, { "epoch": 10.96, "grad_norm": 3.6647934913635254, "learning_rate": 2.126680600541659e-05, "loss": 1.0315, "step": 36625 }, { "epoch": 10.96, "grad_norm": 4.659989833831787, "learning_rate": 2.126099677219682e-05, "loss": 0.9488, "step": 36630 }, { "epoch": 10.96, "grad_norm": 4.262448310852051, "learning_rate": 2.1255187745478305e-05, "loss": 1.18, "step": 36635 }, { "epoch": 10.96, "grad_norm": 7.49601936340332, "learning_rate": 2.124937892558189e-05, "loss": 1.1229, "step": 36640 }, { "epoch": 10.96, "grad_norm": 1.145183801651001, "learning_rate": 2.1243570312828365e-05, "loss": 0.9284, "step": 36645 }, { "epoch": 10.97, "grad_norm": 2.557342290878296, "learning_rate": 2.1237761907538556e-05, "loss": 0.982, "step": 36650 }, { "epoch": 10.97, "grad_norm": 2.6082074642181396, "learning_rate": 2.1231953710033246e-05, "loss": 1.0863, "step": 36655 }, { "epoch": 10.97, "grad_norm": 3.32584810256958, "learning_rate": 2.122614572063321e-05, "loss": 1.0423, "step": 36660 }, { "epoch": 10.97, "grad_norm": 2.8699169158935547, "learning_rate": 2.122033793965923e-05, "loss": 1.0971, "step": 36665 }, { "epoch": 10.97, "grad_norm": 3.991990566253662, "learning_rate": 2.1214530367432047e-05, "loss": 1.1467, "step": 36670 }, { "epoch": 10.97, "grad_norm": 2.663698673248291, "learning_rate": 2.120872300427243e-05, "loss": 0.9862, "step": 36675 }, { "epoch": 10.97, "grad_norm": 3.4537861347198486, "learning_rate": 2.1202915850501087e-05, "loss": 1.1518, "step": 36680 }, { "epoch": 10.98, "grad_norm": 3.4938299655914307, "learning_rate": 2.1197108906438765e-05, "loss": 1.1402, "step": 36685 }, { "epoch": 10.98, "grad_norm": 2.6186256408691406, "learning_rate": 2.119130217240616e-05, "loss": 1.2347, "step": 36690 }, { "epoch": 10.98, "grad_norm": 2.723661422729492, "learning_rate": 2.118549564872398e-05, "loss": 1.0232, "step": 36695 }, { "epoch": 10.98, "grad_norm": 4.313886642456055, "learning_rate": 2.1179689335712906e-05, "loss": 1.2257, "step": 36700 }, { "epoch": 10.98, "grad_norm": 1.8701528310775757, "learning_rate": 2.1173883233693623e-05, "loss": 1.207, "step": 36705 }, { "epoch": 10.98, "grad_norm": 1.5927581787109375, "learning_rate": 2.1168077342986793e-05, "loss": 1.0767, "step": 36710 }, { "epoch": 10.98, "grad_norm": 5.454744338989258, "learning_rate": 2.116227166391307e-05, "loss": 0.9561, "step": 36715 }, { "epoch": 10.99, "grad_norm": 0.9375532269477844, "learning_rate": 2.1156466196793086e-05, "loss": 1.1743, "step": 36720 }, { "epoch": 10.99, "grad_norm": 5.345798015594482, "learning_rate": 2.1150660941947486e-05, "loss": 1.0879, "step": 36725 }, { "epoch": 10.99, "grad_norm": 2.450878381729126, "learning_rate": 2.1144855899696873e-05, "loss": 0.8149, "step": 36730 }, { "epoch": 10.99, "grad_norm": 2.603236436843872, "learning_rate": 2.113905107036187e-05, "loss": 1.1418, "step": 36735 }, { "epoch": 10.99, "grad_norm": 3.163844347000122, "learning_rate": 2.1133246454263053e-05, "loss": 1.0855, "step": 36740 }, { "epoch": 10.99, "grad_norm": 2.926191568374634, "learning_rate": 2.1127442051721026e-05, "loss": 1.1282, "step": 36745 }, { "epoch": 11.0, "grad_norm": 4.042135238647461, "learning_rate": 2.112163786305635e-05, "loss": 1.0874, "step": 36750 }, { "epoch": 11.0, "grad_norm": 4.720858097076416, "learning_rate": 2.1115833888589575e-05, "loss": 1.306, "step": 36755 }, { "epoch": 11.0, "grad_norm": 5.201600074768066, "learning_rate": 2.1110030128641264e-05, "loss": 1.0244, "step": 36760 }, { "epoch": 11.0, "grad_norm": 4.97416353225708, "learning_rate": 2.1104226583531936e-05, "loss": 1.1876, "step": 36765 }, { "epoch": 11.0, "grad_norm": 1.4578512907028198, "learning_rate": 2.1098423253582136e-05, "loss": 1.1209, "step": 36770 }, { "epoch": 11.0, "grad_norm": 2.909250259399414, "learning_rate": 2.109262013911235e-05, "loss": 0.8941, "step": 36775 }, { "epoch": 11.0, "grad_norm": 1.240099549293518, "learning_rate": 2.108681724044311e-05, "loss": 0.9776, "step": 36780 }, { "epoch": 11.01, "grad_norm": 3.794100284576416, "learning_rate": 2.1081014557894875e-05, "loss": 1.0358, "step": 36785 }, { "epoch": 11.01, "grad_norm": 1.7050538063049316, "learning_rate": 2.1075212091788133e-05, "loss": 0.8003, "step": 36790 }, { "epoch": 11.01, "grad_norm": 1.4368036985397339, "learning_rate": 2.106940984244335e-05, "loss": 1.1668, "step": 36795 }, { "epoch": 11.01, "grad_norm": 4.024328708648682, "learning_rate": 2.1063607810180975e-05, "loss": 1.0022, "step": 36800 }, { "epoch": 11.01, "grad_norm": 2.9807326793670654, "learning_rate": 2.1057805995321447e-05, "loss": 0.9635, "step": 36805 }, { "epoch": 11.01, "grad_norm": 2.2452774047851562, "learning_rate": 2.1052004398185193e-05, "loss": 1.0438, "step": 36810 }, { "epoch": 11.01, "grad_norm": 2.678302764892578, "learning_rate": 2.104620301909264e-05, "loss": 0.9814, "step": 36815 }, { "epoch": 11.02, "grad_norm": 1.3954975605010986, "learning_rate": 2.104040185836419e-05, "loss": 1.003, "step": 36820 }, { "epoch": 11.02, "grad_norm": 1.7821259498596191, "learning_rate": 2.1034600916320212e-05, "loss": 1.1241, "step": 36825 }, { "epoch": 11.02, "grad_norm": 2.254903793334961, "learning_rate": 2.102880019328112e-05, "loss": 1.0537, "step": 36830 }, { "epoch": 11.02, "grad_norm": 3.2131004333496094, "learning_rate": 2.102299968956725e-05, "loss": 1.0781, "step": 36835 }, { "epoch": 11.02, "grad_norm": 2.1577985286712646, "learning_rate": 2.101719940549899e-05, "loss": 0.9388, "step": 36840 }, { "epoch": 11.02, "grad_norm": 3.0737853050231934, "learning_rate": 2.1011399341396664e-05, "loss": 0.9643, "step": 36845 }, { "epoch": 11.03, "grad_norm": 3.187286853790283, "learning_rate": 2.1005599497580596e-05, "loss": 1.0258, "step": 36850 }, { "epoch": 11.03, "grad_norm": 1.8270454406738281, "learning_rate": 2.0999799874371124e-05, "loss": 1.0375, "step": 36855 }, { "epoch": 11.03, "grad_norm": 3.7559213638305664, "learning_rate": 2.0994000472088537e-05, "loss": 1.117, "step": 36860 }, { "epoch": 11.03, "grad_norm": 2.0919082164764404, "learning_rate": 2.0988201291053154e-05, "loss": 1.0887, "step": 36865 }, { "epoch": 11.03, "grad_norm": 2.3934216499328613, "learning_rate": 2.098240233158523e-05, "loss": 1.1399, "step": 36870 }, { "epoch": 11.03, "grad_norm": 1.183286190032959, "learning_rate": 2.0976603594005063e-05, "loss": 1.0758, "step": 36875 }, { "epoch": 11.03, "grad_norm": 2.7067835330963135, "learning_rate": 2.0970805078632887e-05, "loss": 1.2898, "step": 36880 }, { "epoch": 11.04, "grad_norm": 2.4200856685638428, "learning_rate": 2.0965006785788958e-05, "loss": 0.9024, "step": 36885 }, { "epoch": 11.04, "grad_norm": 1.7485568523406982, "learning_rate": 2.0959208715793516e-05, "loss": 1.0622, "step": 36890 }, { "epoch": 11.04, "grad_norm": 1.9122002124786377, "learning_rate": 2.095341086896677e-05, "loss": 0.8777, "step": 36895 }, { "epoch": 11.04, "grad_norm": 2.849827289581299, "learning_rate": 2.0947613245628944e-05, "loss": 0.9607, "step": 36900 }, { "epoch": 11.04, "grad_norm": 2.4549102783203125, "learning_rate": 2.0941815846100216e-05, "loss": 0.8974, "step": 36905 }, { "epoch": 11.04, "grad_norm": 6.144309997558594, "learning_rate": 2.093601867070079e-05, "loss": 1.0058, "step": 36910 }, { "epoch": 11.04, "grad_norm": 2.7253663539886475, "learning_rate": 2.093022171975083e-05, "loss": 1.0684, "step": 36915 }, { "epoch": 11.05, "grad_norm": 7.483120918273926, "learning_rate": 2.0924424993570485e-05, "loss": 0.8457, "step": 36920 }, { "epoch": 11.05, "grad_norm": 2.1432487964630127, "learning_rate": 2.091862849247992e-05, "loss": 0.9498, "step": 36925 }, { "epoch": 11.05, "grad_norm": 1.383862853050232, "learning_rate": 2.091283221679925e-05, "loss": 1.0247, "step": 36930 }, { "epoch": 11.05, "grad_norm": 2.251617431640625, "learning_rate": 2.090703616684862e-05, "loss": 1.0731, "step": 36935 }, { "epoch": 11.05, "grad_norm": 2.736708879470825, "learning_rate": 2.0901240342948128e-05, "loss": 1.0303, "step": 36940 }, { "epoch": 11.05, "grad_norm": 8.472616195678711, "learning_rate": 2.089544474541786e-05, "loss": 1.1477, "step": 36945 }, { "epoch": 11.06, "grad_norm": 2.7550952434539795, "learning_rate": 2.0889649374577923e-05, "loss": 0.7887, "step": 36950 }, { "epoch": 11.06, "grad_norm": 4.178884506225586, "learning_rate": 2.088385423074837e-05, "loss": 1.0598, "step": 36955 }, { "epoch": 11.06, "grad_norm": 2.964043140411377, "learning_rate": 2.087805931424927e-05, "loss": 1.1823, "step": 36960 }, { "epoch": 11.06, "grad_norm": 1.5841584205627441, "learning_rate": 2.0872264625400673e-05, "loss": 0.8966, "step": 36965 }, { "epoch": 11.06, "grad_norm": 3.2998929023742676, "learning_rate": 2.086647016452261e-05, "loss": 1.2444, "step": 36970 }, { "epoch": 11.06, "grad_norm": 9.047067642211914, "learning_rate": 2.086067593193511e-05, "loss": 1.1063, "step": 36975 }, { "epoch": 11.06, "grad_norm": 4.006898403167725, "learning_rate": 2.085488192795816e-05, "loss": 1.0788, "step": 36980 }, { "epoch": 11.07, "grad_norm": 1.3867641687393188, "learning_rate": 2.084908815291179e-05, "loss": 1.0392, "step": 36985 }, { "epoch": 11.07, "grad_norm": 2.389448404312134, "learning_rate": 2.084329460711595e-05, "loss": 1.169, "step": 36990 }, { "epoch": 11.07, "grad_norm": 6.467934608459473, "learning_rate": 2.0837501290890644e-05, "loss": 1.1412, "step": 36995 }, { "epoch": 11.07, "grad_norm": 1.3220041990280151, "learning_rate": 2.08317082045558e-05, "loss": 1.0507, "step": 37000 }, { "epoch": 11.07, "grad_norm": 2.8691465854644775, "learning_rate": 2.082591534843139e-05, "loss": 1.0901, "step": 37005 }, { "epoch": 11.07, "grad_norm": 1.4073899984359741, "learning_rate": 2.082012272283734e-05, "loss": 1.1085, "step": 37010 }, { "epoch": 11.07, "grad_norm": 1.7630438804626465, "learning_rate": 2.0814330328093557e-05, "loss": 1.072, "step": 37015 }, { "epoch": 11.08, "grad_norm": 3.4774584770202637, "learning_rate": 2.0808538164519965e-05, "loss": 0.9849, "step": 37020 }, { "epoch": 11.08, "grad_norm": 1.4681512117385864, "learning_rate": 2.0802746232436445e-05, "loss": 1.0581, "step": 37025 }, { "epoch": 11.08, "grad_norm": 1.4620872735977173, "learning_rate": 2.0796954532162898e-05, "loss": 1.1402, "step": 37030 }, { "epoch": 11.08, "grad_norm": 4.764907360076904, "learning_rate": 2.0791163064019174e-05, "loss": 0.9688, "step": 37035 }, { "epoch": 11.08, "grad_norm": 1.992462396621704, "learning_rate": 2.078537182832515e-05, "loss": 0.8545, "step": 37040 }, { "epoch": 11.08, "grad_norm": 2.524420976638794, "learning_rate": 2.0779580825400653e-05, "loss": 0.9803, "step": 37045 }, { "epoch": 11.08, "grad_norm": 4.6814351081848145, "learning_rate": 2.077379005556552e-05, "loss": 1.0973, "step": 37050 }, { "epoch": 11.09, "grad_norm": 1.9848724603652954, "learning_rate": 2.076799951913957e-05, "loss": 1.0625, "step": 37055 }, { "epoch": 11.09, "grad_norm": 5.070384502410889, "learning_rate": 2.0762209216442607e-05, "loss": 1.1657, "step": 37060 }, { "epoch": 11.09, "grad_norm": 3.4245998859405518, "learning_rate": 2.0756419147794427e-05, "loss": 0.7992, "step": 37065 }, { "epoch": 11.09, "grad_norm": 3.5566506385803223, "learning_rate": 2.075062931351481e-05, "loss": 0.98, "step": 37070 }, { "epoch": 11.09, "grad_norm": 1.4207732677459717, "learning_rate": 2.0744839713923503e-05, "loss": 1.1456, "step": 37075 }, { "epoch": 11.09, "grad_norm": 1.3465211391448975, "learning_rate": 2.073905034934029e-05, "loss": 1.0347, "step": 37080 }, { "epoch": 11.1, "grad_norm": 1.9427791833877563, "learning_rate": 2.0733261220084886e-05, "loss": 1.0611, "step": 37085 }, { "epoch": 11.1, "grad_norm": 1.6937408447265625, "learning_rate": 2.0727472326477043e-05, "loss": 1.0662, "step": 37090 }, { "epoch": 11.1, "grad_norm": 3.2311325073242188, "learning_rate": 2.072168366883645e-05, "loss": 1.085, "step": 37095 }, { "epoch": 11.1, "grad_norm": 1.9753808975219727, "learning_rate": 2.0715895247482833e-05, "loss": 0.9925, "step": 37100 }, { "epoch": 11.1, "grad_norm": 1.8392963409423828, "learning_rate": 2.0710107062735867e-05, "loss": 0.9266, "step": 37105 }, { "epoch": 11.1, "grad_norm": 1.352810025215149, "learning_rate": 2.0704319114915218e-05, "loss": 1.0309, "step": 37110 }, { "epoch": 11.1, "grad_norm": 2.322559118270874, "learning_rate": 2.0698531404340573e-05, "loss": 1.0988, "step": 37115 }, { "epoch": 11.11, "grad_norm": 6.6522650718688965, "learning_rate": 2.0692743931331554e-05, "loss": 0.7917, "step": 37120 }, { "epoch": 11.11, "grad_norm": 5.853671550750732, "learning_rate": 2.068695669620782e-05, "loss": 1.034, "step": 37125 }, { "epoch": 11.11, "grad_norm": 2.2540881633758545, "learning_rate": 2.0681169699288974e-05, "loss": 1.0728, "step": 37130 }, { "epoch": 11.11, "grad_norm": 3.5193233489990234, "learning_rate": 2.067538294089465e-05, "loss": 1.0295, "step": 37135 }, { "epoch": 11.11, "grad_norm": 3.8422839641571045, "learning_rate": 2.0669596421344422e-05, "loss": 1.1198, "step": 37140 }, { "epoch": 11.11, "grad_norm": 2.048434019088745, "learning_rate": 2.0663810140957884e-05, "loss": 1.0309, "step": 37145 }, { "epoch": 11.11, "grad_norm": 1.556140661239624, "learning_rate": 2.0658024100054608e-05, "loss": 0.9682, "step": 37150 }, { "epoch": 11.12, "grad_norm": 2.3426733016967773, "learning_rate": 2.0652238298954142e-05, "loss": 1.0461, "step": 37155 }, { "epoch": 11.12, "grad_norm": 2.597688913345337, "learning_rate": 2.0646452737976037e-05, "loss": 1.1395, "step": 37160 }, { "epoch": 11.12, "grad_norm": 2.909790277481079, "learning_rate": 2.0640667417439826e-05, "loss": 1.0784, "step": 37165 }, { "epoch": 11.12, "grad_norm": 4.795456409454346, "learning_rate": 2.0634882337665018e-05, "loss": 0.9231, "step": 37170 }, { "epoch": 11.12, "grad_norm": 4.892722129821777, "learning_rate": 2.0629097498971128e-05, "loss": 1.0418, "step": 37175 }, { "epoch": 11.12, "grad_norm": 2.8806307315826416, "learning_rate": 2.062331290167763e-05, "loss": 1.1341, "step": 37180 }, { "epoch": 11.13, "grad_norm": 2.0100202560424805, "learning_rate": 2.061752854610402e-05, "loss": 1.0645, "step": 37185 }, { "epoch": 11.13, "grad_norm": 3.8369128704071045, "learning_rate": 2.0611744432569743e-05, "loss": 0.8733, "step": 37190 }, { "epoch": 11.13, "grad_norm": 1.7996447086334229, "learning_rate": 2.060596056139427e-05, "loss": 1.0846, "step": 37195 }, { "epoch": 11.13, "grad_norm": 2.4281742572784424, "learning_rate": 2.060017693289703e-05, "loss": 0.9427, "step": 37200 }, { "epoch": 11.13, "grad_norm": 3.646941900253296, "learning_rate": 2.0594393547397432e-05, "loss": 0.9663, "step": 37205 }, { "epoch": 11.13, "grad_norm": 2.000669240951538, "learning_rate": 2.058861040521491e-05, "loss": 1.1134, "step": 37210 }, { "epoch": 11.13, "grad_norm": 3.394282579421997, "learning_rate": 2.058282750666884e-05, "loss": 1.0419, "step": 37215 }, { "epoch": 11.14, "grad_norm": 1.8753750324249268, "learning_rate": 2.0577044852078624e-05, "loss": 0.9423, "step": 37220 }, { "epoch": 11.14, "grad_norm": 1.0014708042144775, "learning_rate": 2.0571262441763613e-05, "loss": 1.1235, "step": 37225 }, { "epoch": 11.14, "grad_norm": 6.966102600097656, "learning_rate": 2.0565480276043186e-05, "loss": 1.0036, "step": 37230 }, { "epoch": 11.14, "grad_norm": 3.9602415561676025, "learning_rate": 2.055969835523667e-05, "loss": 1.0308, "step": 37235 }, { "epoch": 11.14, "grad_norm": 13.549233436584473, "learning_rate": 2.0553916679663394e-05, "loss": 1.1082, "step": 37240 }, { "epoch": 11.14, "grad_norm": 3.726292610168457, "learning_rate": 2.0548135249642683e-05, "loss": 1.1484, "step": 37245 }, { "epoch": 11.14, "grad_norm": 2.3705708980560303, "learning_rate": 2.054235406549383e-05, "loss": 1.0761, "step": 37250 }, { "epoch": 11.15, "grad_norm": 2.891065835952759, "learning_rate": 2.0536573127536133e-05, "loss": 1.0185, "step": 37255 }, { "epoch": 11.15, "grad_norm": 1.9550832509994507, "learning_rate": 2.053079243608886e-05, "loss": 1.1753, "step": 37260 }, { "epoch": 11.15, "grad_norm": 4.056787490844727, "learning_rate": 2.052501199147128e-05, "loss": 0.965, "step": 37265 }, { "epoch": 11.15, "grad_norm": 4.4339494705200195, "learning_rate": 2.0519231794002637e-05, "loss": 1.0662, "step": 37270 }, { "epoch": 11.15, "grad_norm": 1.7741667032241821, "learning_rate": 2.0513451844002154e-05, "loss": 1.0956, "step": 37275 }, { "epoch": 11.15, "grad_norm": 7.996791839599609, "learning_rate": 2.0507672141789074e-05, "loss": 0.7328, "step": 37280 }, { "epoch": 11.16, "grad_norm": 1.697172999382019, "learning_rate": 2.050189268768258e-05, "loss": 1.0675, "step": 37285 }, { "epoch": 11.16, "grad_norm": 1.4075742959976196, "learning_rate": 2.049611348200189e-05, "loss": 1.0553, "step": 37290 }, { "epoch": 11.16, "grad_norm": 2.2331416606903076, "learning_rate": 2.0490334525066172e-05, "loss": 0.9881, "step": 37295 }, { "epoch": 11.16, "grad_norm": 1.8239549398422241, "learning_rate": 2.0484555817194576e-05, "loss": 1.1081, "step": 37300 }, { "epoch": 11.16, "grad_norm": 3.33955454826355, "learning_rate": 2.047877735870628e-05, "loss": 1.0621, "step": 37305 }, { "epoch": 11.16, "grad_norm": 4.439455509185791, "learning_rate": 2.0472999149920403e-05, "loss": 0.8967, "step": 37310 }, { "epoch": 11.16, "grad_norm": 1.3203307390213013, "learning_rate": 2.0467221191156085e-05, "loss": 1.1234, "step": 37315 }, { "epoch": 11.17, "grad_norm": 1.8145140409469604, "learning_rate": 2.046144348273242e-05, "loss": 0.8947, "step": 37320 }, { "epoch": 11.17, "grad_norm": 1.8895070552825928, "learning_rate": 2.0455666024968527e-05, "loss": 0.8826, "step": 37325 }, { "epoch": 11.17, "grad_norm": 2.4296302795410156, "learning_rate": 2.0449888818183465e-05, "loss": 1.1869, "step": 37330 }, { "epoch": 11.17, "grad_norm": 6.826413154602051, "learning_rate": 2.0444111862696314e-05, "loss": 0.9804, "step": 37335 }, { "epoch": 11.17, "grad_norm": 3.0793893337249756, "learning_rate": 2.0438335158826134e-05, "loss": 1.1888, "step": 37340 }, { "epoch": 11.17, "grad_norm": 5.991076469421387, "learning_rate": 2.043255870689196e-05, "loss": 1.0324, "step": 37345 }, { "epoch": 11.17, "grad_norm": 4.768267631530762, "learning_rate": 2.0426782507212822e-05, "loss": 0.9743, "step": 37350 }, { "epoch": 11.18, "grad_norm": 2.971740245819092, "learning_rate": 2.0421006560107726e-05, "loss": 1.0149, "step": 37355 }, { "epoch": 11.18, "grad_norm": 4.107237339019775, "learning_rate": 2.041523086589569e-05, "loss": 1.147, "step": 37360 }, { "epoch": 11.18, "grad_norm": 2.612445116043091, "learning_rate": 2.0409455424895686e-05, "loss": 0.9139, "step": 37365 }, { "epoch": 11.18, "grad_norm": 1.6917109489440918, "learning_rate": 2.0403680237426677e-05, "loss": 1.003, "step": 37370 }, { "epoch": 11.18, "grad_norm": 24.12595558166504, "learning_rate": 2.0397905303807642e-05, "loss": 0.9988, "step": 37375 }, { "epoch": 11.18, "grad_norm": 3.371530532836914, "learning_rate": 2.0392130624357502e-05, "loss": 0.9194, "step": 37380 }, { "epoch": 11.19, "grad_norm": 1.867092490196228, "learning_rate": 2.038635619939521e-05, "loss": 1.015, "step": 37385 }, { "epoch": 11.19, "grad_norm": 1.5033526420593262, "learning_rate": 2.0380582029239655e-05, "loss": 1.0523, "step": 37390 }, { "epoch": 11.19, "grad_norm": 1.303634524345398, "learning_rate": 2.0374808114209767e-05, "loss": 0.9923, "step": 37395 }, { "epoch": 11.19, "grad_norm": 0.9598073363304138, "learning_rate": 2.036903445462442e-05, "loss": 0.8324, "step": 37400 }, { "epoch": 11.19, "grad_norm": 2.9755938053131104, "learning_rate": 2.0363261050802473e-05, "loss": 1.0322, "step": 37405 }, { "epoch": 11.19, "grad_norm": 2.0213468074798584, "learning_rate": 2.0357487903062805e-05, "loss": 1.1045, "step": 37410 }, { "epoch": 11.19, "grad_norm": 2.2053709030151367, "learning_rate": 2.0351715011724255e-05, "loss": 1.0586, "step": 37415 }, { "epoch": 11.2, "grad_norm": 2.988892078399658, "learning_rate": 2.0345942377105654e-05, "loss": 0.9125, "step": 37420 }, { "epoch": 11.2, "grad_norm": 1.9951380491256714, "learning_rate": 2.034016999952582e-05, "loss": 1.0415, "step": 37425 }, { "epoch": 11.2, "grad_norm": 2.1068739891052246, "learning_rate": 2.0334397879303545e-05, "loss": 1.0635, "step": 37430 }, { "epoch": 11.2, "grad_norm": 1.8847640752792358, "learning_rate": 2.032862601675764e-05, "loss": 1.1524, "step": 37435 }, { "epoch": 11.2, "grad_norm": 2.446730375289917, "learning_rate": 2.0322854412206848e-05, "loss": 1.1736, "step": 37440 }, { "epoch": 11.2, "grad_norm": 3.1144163608551025, "learning_rate": 2.0317083065969957e-05, "loss": 1.0448, "step": 37445 }, { "epoch": 11.2, "grad_norm": 1.5927165746688843, "learning_rate": 2.0311311978365694e-05, "loss": 0.8874, "step": 37450 }, { "epoch": 11.21, "grad_norm": 2.248284101486206, "learning_rate": 2.0305541149712802e-05, "loss": 1.134, "step": 37455 }, { "epoch": 11.21, "grad_norm": 5.157928943634033, "learning_rate": 2.0299770580329997e-05, "loss": 1.1917, "step": 37460 }, { "epoch": 11.21, "grad_norm": 2.8743419647216797, "learning_rate": 2.0294000270535963e-05, "loss": 1.0266, "step": 37465 }, { "epoch": 11.21, "grad_norm": 2.73984956741333, "learning_rate": 2.0288230220649415e-05, "loss": 0.9329, "step": 37470 }, { "epoch": 11.21, "grad_norm": 1.6899389028549194, "learning_rate": 2.0282460430989003e-05, "loss": 0.9871, "step": 37475 }, { "epoch": 11.21, "grad_norm": 3.168191432952881, "learning_rate": 2.027669090187341e-05, "loss": 1.0343, "step": 37480 }, { "epoch": 11.22, "grad_norm": 4.179646968841553, "learning_rate": 2.0270921633621255e-05, "loss": 0.9998, "step": 37485 }, { "epoch": 11.22, "grad_norm": 2.1786577701568604, "learning_rate": 2.0265152626551195e-05, "loss": 1.2523, "step": 37490 }, { "epoch": 11.22, "grad_norm": 2.7645535469055176, "learning_rate": 2.025938388098183e-05, "loss": 1.078, "step": 37495 }, { "epoch": 11.22, "grad_norm": 4.356101036071777, "learning_rate": 2.0253615397231764e-05, "loss": 1.0382, "step": 37500 }, { "epoch": 11.22, "grad_norm": 1.3754960298538208, "learning_rate": 2.0247847175619584e-05, "loss": 1.1546, "step": 37505 }, { "epoch": 11.22, "grad_norm": 1.7943824529647827, "learning_rate": 2.024207921646387e-05, "loss": 1.0802, "step": 37510 }, { "epoch": 11.22, "grad_norm": 2.5919864177703857, "learning_rate": 2.023631152008317e-05, "loss": 1.0181, "step": 37515 }, { "epoch": 11.23, "grad_norm": 3.6458067893981934, "learning_rate": 2.0230544086796045e-05, "loss": 0.9573, "step": 37520 }, { "epoch": 11.23, "grad_norm": 3.077378273010254, "learning_rate": 2.0224776916920996e-05, "loss": 1.2146, "step": 37525 }, { "epoch": 11.23, "grad_norm": 3.0590150356292725, "learning_rate": 2.0219010010776568e-05, "loss": 0.8816, "step": 37530 }, { "epoch": 11.23, "grad_norm": 2.068225145339966, "learning_rate": 2.0213243368681234e-05, "loss": 0.929, "step": 37535 }, { "epoch": 11.23, "grad_norm": 3.8140151500701904, "learning_rate": 2.0207476990953505e-05, "loss": 0.8558, "step": 37540 }, { "epoch": 11.23, "grad_norm": 2.6358039379119873, "learning_rate": 2.0201710877911832e-05, "loss": 1.1409, "step": 37545 }, { "epoch": 11.23, "grad_norm": 3.132976531982422, "learning_rate": 2.0195945029874687e-05, "loss": 0.9641, "step": 37550 }, { "epoch": 11.24, "grad_norm": 5.017673015594482, "learning_rate": 2.0190179447160505e-05, "loss": 1.0201, "step": 37555 }, { "epoch": 11.24, "grad_norm": 2.018449306488037, "learning_rate": 2.0184414130087704e-05, "loss": 1.1206, "step": 37560 }, { "epoch": 11.24, "grad_norm": 5.14389705657959, "learning_rate": 2.0178649078974714e-05, "loss": 1.0431, "step": 37565 }, { "epoch": 11.24, "grad_norm": 1.725806713104248, "learning_rate": 2.0172884294139917e-05, "loss": 0.9678, "step": 37570 }, { "epoch": 11.24, "grad_norm": 1.3185760974884033, "learning_rate": 2.0167119775901706e-05, "loss": 0.8791, "step": 37575 }, { "epoch": 11.24, "grad_norm": 2.4314846992492676, "learning_rate": 2.016135552457844e-05, "loss": 0.9986, "step": 37580 }, { "epoch": 11.24, "grad_norm": 3.166346311569214, "learning_rate": 2.015559154048849e-05, "loss": 1.1143, "step": 37585 }, { "epoch": 11.25, "grad_norm": 2.213935136795044, "learning_rate": 2.014982782395018e-05, "loss": 0.9987, "step": 37590 }, { "epoch": 11.25, "grad_norm": 1.993200659751892, "learning_rate": 2.0144064375281834e-05, "loss": 1.0445, "step": 37595 }, { "epoch": 11.25, "grad_norm": 3.238537073135376, "learning_rate": 2.0138301194801768e-05, "loss": 1.1212, "step": 37600 }, { "epoch": 11.25, "grad_norm": 5.660292625427246, "learning_rate": 2.0132538282828273e-05, "loss": 1.1843, "step": 37605 }, { "epoch": 11.25, "grad_norm": 2.940760374069214, "learning_rate": 2.012677563967963e-05, "loss": 1.1591, "step": 37610 }, { "epoch": 11.25, "grad_norm": 2.0081498622894287, "learning_rate": 2.0121013265674095e-05, "loss": 1.0123, "step": 37615 }, { "epoch": 11.26, "grad_norm": 2.4187111854553223, "learning_rate": 2.011525116112994e-05, "loss": 1.019, "step": 37620 }, { "epoch": 11.26, "grad_norm": 2.3297410011291504, "learning_rate": 2.0109489326365384e-05, "loss": 1.0562, "step": 37625 }, { "epoch": 11.26, "grad_norm": 4.753705024719238, "learning_rate": 2.0103727761698636e-05, "loss": 1.0245, "step": 37630 }, { "epoch": 11.26, "grad_norm": 2.0567519664764404, "learning_rate": 2.0097966467447926e-05, "loss": 1.1675, "step": 37635 }, { "epoch": 11.26, "grad_norm": 4.220091342926025, "learning_rate": 2.0092205443931422e-05, "loss": 0.7716, "step": 37640 }, { "epoch": 11.26, "grad_norm": 4.092503547668457, "learning_rate": 2.0086444691467325e-05, "loss": 1.083, "step": 37645 }, { "epoch": 11.26, "grad_norm": 2.5490052700042725, "learning_rate": 2.0080684210373777e-05, "loss": 1.3257, "step": 37650 }, { "epoch": 11.27, "grad_norm": 2.8553218841552734, "learning_rate": 2.0074924000968913e-05, "loss": 0.8709, "step": 37655 }, { "epoch": 11.27, "grad_norm": 12.186079978942871, "learning_rate": 2.0069164063570896e-05, "loss": 1.0308, "step": 37660 }, { "epoch": 11.27, "grad_norm": 2.453186511993408, "learning_rate": 2.0063404398497803e-05, "loss": 1.2019, "step": 37665 }, { "epoch": 11.27, "grad_norm": 1.8696728944778442, "learning_rate": 2.0057645006067767e-05, "loss": 0.9861, "step": 37670 }, { "epoch": 11.27, "grad_norm": 2.764322519302368, "learning_rate": 2.0051885886598855e-05, "loss": 1.0721, "step": 37675 }, { "epoch": 11.27, "grad_norm": 3.057974338531494, "learning_rate": 2.0046127040409145e-05, "loss": 1.0609, "step": 37680 }, { "epoch": 11.27, "grad_norm": 1.6020811796188354, "learning_rate": 2.0040368467816688e-05, "loss": 0.8825, "step": 37685 }, { "epoch": 11.28, "grad_norm": 1.326548457145691, "learning_rate": 2.0034610169139527e-05, "loss": 1.1104, "step": 37690 }, { "epoch": 11.28, "grad_norm": 3.1496689319610596, "learning_rate": 2.002885214469568e-05, "loss": 1.106, "step": 37695 }, { "epoch": 11.28, "grad_norm": 2.7148725986480713, "learning_rate": 2.002309439480316e-05, "loss": 0.9303, "step": 37700 }, { "epoch": 11.28, "grad_norm": 2.3015408515930176, "learning_rate": 2.0017336919779973e-05, "loss": 0.9726, "step": 37705 }, { "epoch": 11.28, "grad_norm": 6.830209732055664, "learning_rate": 2.001157971994408e-05, "loss": 1.008, "step": 37710 }, { "epoch": 11.28, "grad_norm": 6.348620891571045, "learning_rate": 2.000582279561346e-05, "loss": 0.8794, "step": 37715 }, { "epoch": 11.29, "grad_norm": 2.9782297611236572, "learning_rate": 2.000006614710606e-05, "loss": 0.96, "step": 37720 }, { "epoch": 11.29, "grad_norm": 3.9869275093078613, "learning_rate": 1.9994309774739797e-05, "loss": 1.0065, "step": 37725 }, { "epoch": 11.29, "grad_norm": 9.005064010620117, "learning_rate": 1.9988553678832612e-05, "loss": 1.1601, "step": 37730 }, { "epoch": 11.29, "grad_norm": 3.4525146484375, "learning_rate": 1.998279785970239e-05, "loss": 1.2198, "step": 37735 }, { "epoch": 11.29, "grad_norm": 3.2368991374969482, "learning_rate": 1.997704231766704e-05, "loss": 1.0094, "step": 37740 }, { "epoch": 11.29, "grad_norm": 2.183854341506958, "learning_rate": 1.9971287053044406e-05, "loss": 1.1691, "step": 37745 }, { "epoch": 11.29, "grad_norm": 1.5113462209701538, "learning_rate": 1.9965532066152373e-05, "loss": 1.0431, "step": 37750 }, { "epoch": 11.3, "grad_norm": 3.4166901111602783, "learning_rate": 1.9959777357308772e-05, "loss": 0.9979, "step": 37755 }, { "epoch": 11.3, "grad_norm": 1.2922513484954834, "learning_rate": 1.9954022926831416e-05, "loss": 1.0215, "step": 37760 }, { "epoch": 11.3, "grad_norm": 3.349341630935669, "learning_rate": 1.994826877503814e-05, "loss": 0.9885, "step": 37765 }, { "epoch": 11.3, "grad_norm": 9.634007453918457, "learning_rate": 1.994251490224672e-05, "loss": 0.9638, "step": 37770 }, { "epoch": 11.3, "grad_norm": 2.8767809867858887, "learning_rate": 1.993676130877495e-05, "loss": 1.0608, "step": 37775 }, { "epoch": 11.3, "grad_norm": 1.4088268280029297, "learning_rate": 1.9931007994940592e-05, "loss": 0.9073, "step": 37780 }, { "epoch": 11.3, "grad_norm": 3.421081066131592, "learning_rate": 1.9925254961061385e-05, "loss": 1.1258, "step": 37785 }, { "epoch": 11.31, "grad_norm": 3.029031991958618, "learning_rate": 1.9919502207455083e-05, "loss": 1.053, "step": 37790 }, { "epoch": 11.31, "grad_norm": 3.345547676086426, "learning_rate": 1.991374973443938e-05, "loss": 1.0845, "step": 37795 }, { "epoch": 11.31, "grad_norm": 3.7649970054626465, "learning_rate": 1.9907997542332006e-05, "loss": 0.9521, "step": 37800 }, { "epoch": 11.31, "grad_norm": 5.937236785888672, "learning_rate": 1.990224563145062e-05, "loss": 1.0654, "step": 37805 }, { "epoch": 11.31, "grad_norm": 3.4234955310821533, "learning_rate": 1.9896494002112926e-05, "loss": 0.981, "step": 37810 }, { "epoch": 11.31, "grad_norm": 1.7007039785385132, "learning_rate": 1.989074265463656e-05, "loss": 0.8819, "step": 37815 }, { "epoch": 11.32, "grad_norm": 1.6560670137405396, "learning_rate": 1.9884991589339157e-05, "loss": 0.943, "step": 37820 }, { "epoch": 11.32, "grad_norm": 5.352659225463867, "learning_rate": 1.987924080653836e-05, "loss": 0.8836, "step": 37825 }, { "epoch": 11.32, "grad_norm": 3.438709259033203, "learning_rate": 1.9873490306551762e-05, "loss": 1.086, "step": 37830 }, { "epoch": 11.32, "grad_norm": 1.2173386812210083, "learning_rate": 1.9867740089696976e-05, "loss": 1.1148, "step": 37835 }, { "epoch": 11.32, "grad_norm": 1.3468738794326782, "learning_rate": 1.986199015629156e-05, "loss": 0.9861, "step": 37840 }, { "epoch": 11.32, "grad_norm": 3.865586280822754, "learning_rate": 1.9856240506653097e-05, "loss": 0.9005, "step": 37845 }, { "epoch": 11.32, "grad_norm": 3.671863079071045, "learning_rate": 1.9850491141099122e-05, "loss": 1.0731, "step": 37850 }, { "epoch": 11.33, "grad_norm": 3.499868154525757, "learning_rate": 1.984474205994716e-05, "loss": 1.0681, "step": 37855 }, { "epoch": 11.33, "grad_norm": 3.519331216812134, "learning_rate": 1.983899326351474e-05, "loss": 1.1132, "step": 37860 }, { "epoch": 11.33, "grad_norm": 1.4864459037780762, "learning_rate": 1.983324475211936e-05, "loss": 1.0481, "step": 37865 }, { "epoch": 11.33, "grad_norm": 3.8415820598602295, "learning_rate": 1.9827496526078498e-05, "loss": 0.9927, "step": 37870 }, { "epoch": 11.33, "grad_norm": 3.5681967735290527, "learning_rate": 1.9821748585709634e-05, "loss": 0.9556, "step": 37875 }, { "epoch": 11.33, "grad_norm": 2.1792047023773193, "learning_rate": 1.9816000931330203e-05, "loss": 0.9622, "step": 37880 }, { "epoch": 11.33, "grad_norm": 1.3530371189117432, "learning_rate": 1.9810253563257662e-05, "loss": 1.1555, "step": 37885 }, { "epoch": 11.34, "grad_norm": 1.9766597747802734, "learning_rate": 1.980450648180941e-05, "loss": 1.093, "step": 37890 }, { "epoch": 11.34, "grad_norm": 3.305300712585449, "learning_rate": 1.9798759687302874e-05, "loss": 0.9854, "step": 37895 }, { "epoch": 11.34, "grad_norm": 1.5727300643920898, "learning_rate": 1.9793013180055427e-05, "loss": 0.9959, "step": 37900 }, { "epoch": 11.34, "grad_norm": 2.8745787143707275, "learning_rate": 1.9787266960384458e-05, "loss": 1.2233, "step": 37905 }, { "epoch": 11.34, "grad_norm": 1.7028470039367676, "learning_rate": 1.978152102860732e-05, "loss": 1.04, "step": 37910 }, { "epoch": 11.34, "grad_norm": 7.1427717208862305, "learning_rate": 1.977577538504134e-05, "loss": 0.9223, "step": 37915 }, { "epoch": 11.35, "grad_norm": 2.86995530128479, "learning_rate": 1.9770030030003863e-05, "loss": 1.1699, "step": 37920 }, { "epoch": 11.35, "grad_norm": 1.6554863452911377, "learning_rate": 1.9764284963812183e-05, "loss": 1.1282, "step": 37925 }, { "epoch": 11.35, "grad_norm": 2.9643630981445312, "learning_rate": 1.9758540186783613e-05, "loss": 0.883, "step": 37930 }, { "epoch": 11.35, "grad_norm": 2.1681020259857178, "learning_rate": 1.9752795699235406e-05, "loss": 1.1171, "step": 37935 }, { "epoch": 11.35, "grad_norm": 3.2414727210998535, "learning_rate": 1.9747051501484852e-05, "loss": 1.1585, "step": 37940 }, { "epoch": 11.35, "grad_norm": 1.7487002611160278, "learning_rate": 1.9741307593849178e-05, "loss": 1.011, "step": 37945 }, { "epoch": 11.35, "grad_norm": 1.7904525995254517, "learning_rate": 1.9735563976645617e-05, "loss": 1.0142, "step": 37950 }, { "epoch": 11.36, "grad_norm": 3.4145519733428955, "learning_rate": 1.9729820650191388e-05, "loss": 1.0456, "step": 37955 }, { "epoch": 11.36, "grad_norm": 1.48880934715271, "learning_rate": 1.9724077614803686e-05, "loss": 1.0409, "step": 37960 }, { "epoch": 11.36, "grad_norm": 2.0806167125701904, "learning_rate": 1.9718334870799693e-05, "loss": 1.024, "step": 37965 }, { "epoch": 11.36, "grad_norm": 3.4798760414123535, "learning_rate": 1.9712592418496577e-05, "loss": 1.042, "step": 37970 }, { "epoch": 11.36, "grad_norm": 2.2736198902130127, "learning_rate": 1.9706850258211485e-05, "loss": 1.0028, "step": 37975 }, { "epoch": 11.36, "grad_norm": 4.118747711181641, "learning_rate": 1.9701108390261556e-05, "loss": 1.0363, "step": 37980 }, { "epoch": 11.36, "grad_norm": 26.541406631469727, "learning_rate": 1.969536681496389e-05, "loss": 1.1795, "step": 37985 }, { "epoch": 11.37, "grad_norm": 2.652695417404175, "learning_rate": 1.9689625532635615e-05, "loss": 1.1951, "step": 37990 }, { "epoch": 11.37, "grad_norm": 1.8068537712097168, "learning_rate": 1.9683884543593788e-05, "loss": 0.9577, "step": 37995 }, { "epoch": 11.37, "grad_norm": 1.585398554801941, "learning_rate": 1.9678143848155505e-05, "loss": 1.0057, "step": 38000 }, { "epoch": 11.37, "grad_norm": 4.244786739349365, "learning_rate": 1.9672403446637806e-05, "loss": 1.1152, "step": 38005 }, { "epoch": 11.37, "grad_norm": 4.891877174377441, "learning_rate": 1.9666663339357714e-05, "loss": 1.1257, "step": 38010 }, { "epoch": 11.37, "grad_norm": 3.488956928253174, "learning_rate": 1.9660923526632275e-05, "loss": 0.9834, "step": 38015 }, { "epoch": 11.38, "grad_norm": 1.9256796836853027, "learning_rate": 1.9655184008778467e-05, "loss": 0.9027, "step": 38020 }, { "epoch": 11.38, "grad_norm": 2.239374876022339, "learning_rate": 1.9649444786113303e-05, "loss": 1.1056, "step": 38025 }, { "epoch": 11.38, "grad_norm": 2.374089241027832, "learning_rate": 1.964370585895373e-05, "loss": 0.9732, "step": 38030 }, { "epoch": 11.38, "grad_norm": 1.6094075441360474, "learning_rate": 1.9637967227616723e-05, "loss": 1.1684, "step": 38035 }, { "epoch": 11.38, "grad_norm": 2.5619702339172363, "learning_rate": 1.9632228892419214e-05, "loss": 1.1079, "step": 38040 }, { "epoch": 11.38, "grad_norm": 2.8484513759613037, "learning_rate": 1.962649085367812e-05, "loss": 0.9438, "step": 38045 }, { "epoch": 11.38, "grad_norm": 2.0517704486846924, "learning_rate": 1.962075311171035e-05, "loss": 1.1127, "step": 38050 }, { "epoch": 11.39, "grad_norm": 4.042993545532227, "learning_rate": 1.9615015666832795e-05, "loss": 0.776, "step": 38055 }, { "epoch": 11.39, "grad_norm": 3.7668004035949707, "learning_rate": 1.9609278519362326e-05, "loss": 1.0291, "step": 38060 }, { "epoch": 11.39, "grad_norm": 4.582650184631348, "learning_rate": 1.9603541669615796e-05, "loss": 0.9049, "step": 38065 }, { "epoch": 11.39, "grad_norm": 2.7174317836761475, "learning_rate": 1.959780511791006e-05, "loss": 1.0892, "step": 38070 }, { "epoch": 11.39, "grad_norm": 3.4516286849975586, "learning_rate": 1.9592068864561936e-05, "loss": 1.0218, "step": 38075 }, { "epoch": 11.39, "grad_norm": 1.96125328540802, "learning_rate": 1.9586332909888216e-05, "loss": 1.028, "step": 38080 }, { "epoch": 11.39, "grad_norm": 1.8096983432769775, "learning_rate": 1.9580597254205713e-05, "loss": 1.0322, "step": 38085 }, { "epoch": 11.4, "grad_norm": 5.962859153747559, "learning_rate": 1.9574861897831175e-05, "loss": 0.9794, "step": 38090 }, { "epoch": 11.4, "grad_norm": 1.6979426145553589, "learning_rate": 1.9569126841081392e-05, "loss": 1.1257, "step": 38095 }, { "epoch": 11.4, "grad_norm": 3.6008379459381104, "learning_rate": 1.9563392084273074e-05, "loss": 0.9345, "step": 38100 }, { "epoch": 11.4, "grad_norm": 1.6322747468948364, "learning_rate": 1.955765762772297e-05, "loss": 0.9787, "step": 38105 }, { "epoch": 11.4, "grad_norm": 3.6460819244384766, "learning_rate": 1.955192347174778e-05, "loss": 1.0202, "step": 38110 }, { "epoch": 11.4, "grad_norm": 6.339641094207764, "learning_rate": 1.9546189616664183e-05, "loss": 1.0028, "step": 38115 }, { "epoch": 11.41, "grad_norm": 5.438929557800293, "learning_rate": 1.954045606278888e-05, "loss": 1.1827, "step": 38120 }, { "epoch": 11.41, "grad_norm": 1.7382398843765259, "learning_rate": 1.9534722810438496e-05, "loss": 1.0428, "step": 38125 }, { "epoch": 11.41, "grad_norm": 3.361985206604004, "learning_rate": 1.9528989859929704e-05, "loss": 1.1343, "step": 38130 }, { "epoch": 11.41, "grad_norm": 1.6024760007858276, "learning_rate": 1.952325721157911e-05, "loss": 1.0125, "step": 38135 }, { "epoch": 11.41, "grad_norm": 5.525008201599121, "learning_rate": 1.9517524865703322e-05, "loss": 1.1382, "step": 38140 }, { "epoch": 11.41, "grad_norm": 2.9428493976593018, "learning_rate": 1.9511792822618947e-05, "loss": 1.017, "step": 38145 }, { "epoch": 11.41, "grad_norm": 4.371325492858887, "learning_rate": 1.9506061082642538e-05, "loss": 1.1891, "step": 38150 }, { "epoch": 11.42, "grad_norm": 2.9674203395843506, "learning_rate": 1.9500329646090677e-05, "loss": 1.0249, "step": 38155 }, { "epoch": 11.42, "grad_norm": 2.463282346725464, "learning_rate": 1.949459851327988e-05, "loss": 1.0911, "step": 38160 }, { "epoch": 11.42, "grad_norm": 3.0271716117858887, "learning_rate": 1.94888676845267e-05, "loss": 1.041, "step": 38165 }, { "epoch": 11.42, "grad_norm": 2.7793784141540527, "learning_rate": 1.9483137160147626e-05, "loss": 1.0276, "step": 38170 }, { "epoch": 11.42, "grad_norm": 1.6293712854385376, "learning_rate": 1.947740694045914e-05, "loss": 0.9052, "step": 38175 }, { "epoch": 11.42, "grad_norm": 2.220062017440796, "learning_rate": 1.9471677025777743e-05, "loss": 1.1091, "step": 38180 }, { "epoch": 11.42, "grad_norm": 2.7983016967773438, "learning_rate": 1.9465947416419867e-05, "loss": 1.1761, "step": 38185 }, { "epoch": 11.43, "grad_norm": 2.7694313526153564, "learning_rate": 1.946021811270197e-05, "loss": 0.9134, "step": 38190 }, { "epoch": 11.43, "grad_norm": 4.779193878173828, "learning_rate": 1.9454489114940458e-05, "loss": 1.0319, "step": 38195 }, { "epoch": 11.43, "grad_norm": 2.7941768169403076, "learning_rate": 1.944876042345176e-05, "loss": 1.1433, "step": 38200 }, { "epoch": 11.43, "grad_norm": 2.305264949798584, "learning_rate": 1.944303203855225e-05, "loss": 1.2263, "step": 38205 }, { "epoch": 11.43, "grad_norm": 8.440693855285645, "learning_rate": 1.94373039605583e-05, "loss": 1.0192, "step": 38210 }, { "epoch": 11.43, "grad_norm": 3.348942995071411, "learning_rate": 1.9431576189786276e-05, "loss": 1.0054, "step": 38215 }, { "epoch": 11.43, "grad_norm": 3.2682738304138184, "learning_rate": 1.9425848726552503e-05, "loss": 1.0656, "step": 38220 }, { "epoch": 11.44, "grad_norm": 1.6970369815826416, "learning_rate": 1.9420121571173315e-05, "loss": 0.943, "step": 38225 }, { "epoch": 11.44, "grad_norm": 3.24284029006958, "learning_rate": 1.941439472396502e-05, "loss": 1.1863, "step": 38230 }, { "epoch": 11.44, "grad_norm": 4.399949550628662, "learning_rate": 1.9408668185243885e-05, "loss": 1.0284, "step": 38235 }, { "epoch": 11.44, "grad_norm": 1.228689193725586, "learning_rate": 1.9402941955326207e-05, "loss": 1.1199, "step": 38240 }, { "epoch": 11.44, "grad_norm": 2.4281301498413086, "learning_rate": 1.9397216034528216e-05, "loss": 1.0223, "step": 38245 }, { "epoch": 11.44, "grad_norm": 6.723038673400879, "learning_rate": 1.939149042316617e-05, "loss": 1.0873, "step": 38250 }, { "epoch": 11.45, "grad_norm": 2.267920970916748, "learning_rate": 1.9385765121556266e-05, "loss": 1.1129, "step": 38255 }, { "epoch": 11.45, "grad_norm": 1.6286221742630005, "learning_rate": 1.9380040130014733e-05, "loss": 0.951, "step": 38260 }, { "epoch": 11.45, "grad_norm": 5.079461574554443, "learning_rate": 1.9374315448857737e-05, "loss": 1.0332, "step": 38265 }, { "epoch": 11.45, "grad_norm": 1.5790833234786987, "learning_rate": 1.9368591078401442e-05, "loss": 0.983, "step": 38270 }, { "epoch": 11.45, "grad_norm": 4.2222394943237305, "learning_rate": 1.9362867018962022e-05, "loss": 1.0405, "step": 38275 }, { "epoch": 11.45, "grad_norm": 1.2576323747634888, "learning_rate": 1.9357143270855586e-05, "loss": 1.0631, "step": 38280 }, { "epoch": 11.45, "grad_norm": 3.4669594764709473, "learning_rate": 1.935141983439827e-05, "loss": 1.0043, "step": 38285 }, { "epoch": 11.46, "grad_norm": 3.4095098972320557, "learning_rate": 1.9345696709906152e-05, "loss": 1.146, "step": 38290 }, { "epoch": 11.46, "grad_norm": 2.086794137954712, "learning_rate": 1.9339973897695344e-05, "loss": 1.0618, "step": 38295 }, { "epoch": 11.46, "grad_norm": 2.766907215118408, "learning_rate": 1.933425139808189e-05, "loss": 1.1128, "step": 38300 }, { "epoch": 11.46, "grad_norm": 5.271542072296143, "learning_rate": 1.9328529211381835e-05, "loss": 1.0, "step": 38305 }, { "epoch": 11.46, "grad_norm": 2.415320634841919, "learning_rate": 1.932280733791122e-05, "loss": 1.0116, "step": 38310 }, { "epoch": 11.46, "grad_norm": 1.7332686185836792, "learning_rate": 1.9317085777986054e-05, "loss": 1.2115, "step": 38315 }, { "epoch": 11.46, "grad_norm": 4.1479058265686035, "learning_rate": 1.9311364531922333e-05, "loss": 1.1115, "step": 38320 }, { "epoch": 11.47, "grad_norm": 1.9687248468399048, "learning_rate": 1.9305643600036037e-05, "loss": 1.0981, "step": 38325 }, { "epoch": 11.47, "grad_norm": 2.3544981479644775, "learning_rate": 1.9299922982643127e-05, "loss": 1.0191, "step": 38330 }, { "epoch": 11.47, "grad_norm": 3.2068850994110107, "learning_rate": 1.929420268005955e-05, "loss": 0.9429, "step": 38335 }, { "epoch": 11.47, "grad_norm": 2.9129559993743896, "learning_rate": 1.9288482692601217e-05, "loss": 1.004, "step": 38340 }, { "epoch": 11.47, "grad_norm": 1.2691713571548462, "learning_rate": 1.928276302058406e-05, "loss": 1.013, "step": 38345 }, { "epoch": 11.47, "grad_norm": 2.6996827125549316, "learning_rate": 1.9277043664323945e-05, "loss": 1.0327, "step": 38350 }, { "epoch": 11.48, "grad_norm": 3.7437167167663574, "learning_rate": 1.9271324624136775e-05, "loss": 0.9692, "step": 38355 }, { "epoch": 11.48, "grad_norm": 1.4719038009643555, "learning_rate": 1.926560590033839e-05, "loss": 1.1914, "step": 38360 }, { "epoch": 11.48, "grad_norm": 2.6418561935424805, "learning_rate": 1.9259887493244615e-05, "loss": 1.0561, "step": 38365 }, { "epoch": 11.48, "grad_norm": 1.7341514825820923, "learning_rate": 1.9254169403171303e-05, "loss": 1.1022, "step": 38370 }, { "epoch": 11.48, "grad_norm": 2.533081531524658, "learning_rate": 1.9248451630434232e-05, "loss": 1.0766, "step": 38375 }, { "epoch": 11.48, "grad_norm": 3.991834878921509, "learning_rate": 1.9242734175349208e-05, "loss": 1.2385, "step": 38380 }, { "epoch": 11.48, "grad_norm": 1.5860527753829956, "learning_rate": 1.923701703823198e-05, "loss": 1.1093, "step": 38385 }, { "epoch": 11.49, "grad_norm": 3.969327688217163, "learning_rate": 1.923130021939832e-05, "loss": 0.9571, "step": 38390 }, { "epoch": 11.49, "grad_norm": 3.739284038543701, "learning_rate": 1.922558371916395e-05, "loss": 1.0152, "step": 38395 }, { "epoch": 11.49, "grad_norm": 10.35611343383789, "learning_rate": 1.9219867537844588e-05, "loss": 1.0696, "step": 38400 }, { "epoch": 11.49, "grad_norm": 2.626101016998291, "learning_rate": 1.9214151675755935e-05, "loss": 1.0215, "step": 38405 }, { "epoch": 11.49, "grad_norm": 3.1883928775787354, "learning_rate": 1.920843613321367e-05, "loss": 1.1055, "step": 38410 }, { "epoch": 11.49, "grad_norm": 3.719695568084717, "learning_rate": 1.920272091053346e-05, "loss": 0.9234, "step": 38415 }, { "epoch": 11.49, "grad_norm": 1.9183346033096313, "learning_rate": 1.9197006008030942e-05, "loss": 1.0771, "step": 38420 }, { "epoch": 11.5, "grad_norm": 10.849863052368164, "learning_rate": 1.9191291426021756e-05, "loss": 1.1378, "step": 38425 }, { "epoch": 11.5, "grad_norm": 2.5107712745666504, "learning_rate": 1.9185577164821507e-05, "loss": 0.9376, "step": 38430 }, { "epoch": 11.5, "grad_norm": 3.5658061504364014, "learning_rate": 1.917986322474578e-05, "loss": 0.8868, "step": 38435 }, { "epoch": 11.5, "grad_norm": 4.931859493255615, "learning_rate": 1.917414960611017e-05, "loss": 0.9333, "step": 38440 }, { "epoch": 11.5, "grad_norm": 3.9199514389038086, "learning_rate": 1.916843630923021e-05, "loss": 1.1173, "step": 38445 }, { "epoch": 11.5, "grad_norm": 1.2305980920791626, "learning_rate": 1.916272333442146e-05, "loss": 1.0331, "step": 38450 }, { "epoch": 11.51, "grad_norm": 1.6451231241226196, "learning_rate": 1.915701068199942e-05, "loss": 0.9607, "step": 38455 }, { "epoch": 11.51, "grad_norm": 1.1699979305267334, "learning_rate": 1.915129835227962e-05, "loss": 0.914, "step": 38460 }, { "epoch": 11.51, "grad_norm": 2.1757733821868896, "learning_rate": 1.9145586345577533e-05, "loss": 1.1439, "step": 38465 }, { "epoch": 11.51, "grad_norm": 1.6203327178955078, "learning_rate": 1.9139874662208615e-05, "loss": 1.2016, "step": 38470 }, { "epoch": 11.51, "grad_norm": 3.481635093688965, "learning_rate": 1.913416330248834e-05, "loss": 0.7547, "step": 38475 }, { "epoch": 11.51, "grad_norm": 2.200890064239502, "learning_rate": 1.9128452266732115e-05, "loss": 1.0624, "step": 38480 }, { "epoch": 11.51, "grad_norm": 1.81036376953125, "learning_rate": 1.9122741555255384e-05, "loss": 0.9228, "step": 38485 }, { "epoch": 11.52, "grad_norm": 4.309178829193115, "learning_rate": 1.911703116837352e-05, "loss": 1.0101, "step": 38490 }, { "epoch": 11.52, "grad_norm": 1.9071286916732788, "learning_rate": 1.911132110640191e-05, "loss": 1.0724, "step": 38495 }, { "epoch": 11.52, "grad_norm": 2.7584476470947266, "learning_rate": 1.9105611369655914e-05, "loss": 1.0121, "step": 38500 }, { "epoch": 11.52, "grad_norm": 3.3853213787078857, "learning_rate": 1.9099901958450868e-05, "loss": 0.9683, "step": 38505 }, { "epoch": 11.52, "grad_norm": 2.5267248153686523, "learning_rate": 1.9094192873102117e-05, "loss": 1.0502, "step": 38510 }, { "epoch": 11.52, "grad_norm": 3.773902416229248, "learning_rate": 1.9088484113924944e-05, "loss": 1.029, "step": 38515 }, { "epoch": 11.52, "grad_norm": 2.5307250022888184, "learning_rate": 1.908277568123466e-05, "loss": 0.9967, "step": 38520 }, { "epoch": 11.53, "grad_norm": 4.490280628204346, "learning_rate": 1.907706757534652e-05, "loss": 1.0186, "step": 38525 }, { "epoch": 11.53, "grad_norm": 1.3024266958236694, "learning_rate": 1.9071359796575774e-05, "loss": 1.1027, "step": 38530 }, { "epoch": 11.53, "grad_norm": 5.981440544128418, "learning_rate": 1.906565234523767e-05, "loss": 1.1221, "step": 38535 }, { "epoch": 11.53, "grad_norm": 2.8567864894866943, "learning_rate": 1.9059945221647414e-05, "loss": 1.1096, "step": 38540 }, { "epoch": 11.53, "grad_norm": 2.297405481338501, "learning_rate": 1.9054238426120218e-05, "loss": 1.0762, "step": 38545 }, { "epoch": 11.53, "grad_norm": 1.9504579305648804, "learning_rate": 1.9048531958971245e-05, "loss": 1.1548, "step": 38550 }, { "epoch": 11.54, "grad_norm": 4.491879463195801, "learning_rate": 1.9042825820515675e-05, "loss": 0.8179, "step": 38555 }, { "epoch": 11.54, "grad_norm": 3.1393933296203613, "learning_rate": 1.903712001106864e-05, "loss": 0.8162, "step": 38560 }, { "epoch": 11.54, "grad_norm": 1.3722354173660278, "learning_rate": 1.9031414530945263e-05, "loss": 1.096, "step": 38565 }, { "epoch": 11.54, "grad_norm": 3.8262624740600586, "learning_rate": 1.9025709380460662e-05, "loss": 0.9238, "step": 38570 }, { "epoch": 11.54, "grad_norm": 3.3715012073516846, "learning_rate": 1.9020004559929915e-05, "loss": 1.0101, "step": 38575 }, { "epoch": 11.54, "grad_norm": 4.317600250244141, "learning_rate": 1.9014300069668113e-05, "loss": 0.9422, "step": 38580 }, { "epoch": 11.54, "grad_norm": 3.4299378395080566, "learning_rate": 1.9008595909990286e-05, "loss": 1.0151, "step": 38585 }, { "epoch": 11.55, "grad_norm": 5.137393474578857, "learning_rate": 1.9002892081211478e-05, "loss": 1.1151, "step": 38590 }, { "epoch": 11.55, "grad_norm": 2.469620704650879, "learning_rate": 1.8997188583646713e-05, "loss": 1.0459, "step": 38595 }, { "epoch": 11.55, "grad_norm": 1.945117473602295, "learning_rate": 1.899148541761098e-05, "loss": 1.037, "step": 38600 }, { "epoch": 11.55, "grad_norm": 2.0627458095550537, "learning_rate": 1.8985782583419266e-05, "loss": 1.0576, "step": 38605 }, { "epoch": 11.55, "grad_norm": 4.658207416534424, "learning_rate": 1.8980080081386517e-05, "loss": 0.9801, "step": 38610 }, { "epoch": 11.55, "grad_norm": 3.8801674842834473, "learning_rate": 1.89743779118277e-05, "loss": 0.9549, "step": 38615 }, { "epoch": 11.55, "grad_norm": 2.336965322494507, "learning_rate": 1.896867607505773e-05, "loss": 1.236, "step": 38620 }, { "epoch": 11.56, "grad_norm": 2.502366304397583, "learning_rate": 1.89629745713915e-05, "loss": 0.8492, "step": 38625 }, { "epoch": 11.56, "grad_norm": 3.108949661254883, "learning_rate": 1.895727340114392e-05, "loss": 0.9725, "step": 38630 }, { "epoch": 11.56, "grad_norm": 4.816553115844727, "learning_rate": 1.8951572564629835e-05, "loss": 1.0565, "step": 38635 }, { "epoch": 11.56, "grad_norm": 2.322685956954956, "learning_rate": 1.8945872062164122e-05, "loss": 0.9627, "step": 38640 }, { "epoch": 11.56, "grad_norm": 2.1832380294799805, "learning_rate": 1.894017189406159e-05, "loss": 1.0276, "step": 38645 }, { "epoch": 11.56, "grad_norm": 2.5433905124664307, "learning_rate": 1.8934472060637083e-05, "loss": 1.0334, "step": 38650 }, { "epoch": 11.57, "grad_norm": 1.171791672706604, "learning_rate": 1.8928772562205373e-05, "loss": 1.0299, "step": 38655 }, { "epoch": 11.57, "grad_norm": 12.511678695678711, "learning_rate": 1.8923073399081238e-05, "loss": 0.8337, "step": 38660 }, { "epoch": 11.57, "grad_norm": 2.81550669670105, "learning_rate": 1.8917374571579444e-05, "loss": 1.1925, "step": 38665 }, { "epoch": 11.57, "grad_norm": 1.520538568496704, "learning_rate": 1.891167608001473e-05, "loss": 0.9966, "step": 38670 }, { "epoch": 11.57, "grad_norm": 6.8623456954956055, "learning_rate": 1.890597792470182e-05, "loss": 1.1197, "step": 38675 }, { "epoch": 11.57, "grad_norm": 2.9777519702911377, "learning_rate": 1.890028010595541e-05, "loss": 1.0691, "step": 38680 }, { "epoch": 11.57, "grad_norm": 5.088373184204102, "learning_rate": 1.8894582624090194e-05, "loss": 0.8929, "step": 38685 }, { "epoch": 11.58, "grad_norm": 3.7132937908172607, "learning_rate": 1.8888885479420838e-05, "loss": 1.0176, "step": 38690 }, { "epoch": 11.58, "grad_norm": 3.316521167755127, "learning_rate": 1.8883188672261972e-05, "loss": 1.0659, "step": 38695 }, { "epoch": 11.58, "grad_norm": 2.1407995223999023, "learning_rate": 1.8877492202928252e-05, "loss": 1.0775, "step": 38700 }, { "epoch": 11.58, "grad_norm": 2.758033037185669, "learning_rate": 1.887179607173426e-05, "loss": 1.0208, "step": 38705 }, { "epoch": 11.58, "grad_norm": 3.8762590885162354, "learning_rate": 1.8866100278994613e-05, "loss": 0.9107, "step": 38710 }, { "epoch": 11.58, "grad_norm": 2.9407236576080322, "learning_rate": 1.886040482502387e-05, "loss": 1.0697, "step": 38715 }, { "epoch": 11.58, "grad_norm": 2.600522994995117, "learning_rate": 1.885470971013658e-05, "loss": 1.0183, "step": 38720 }, { "epoch": 11.59, "grad_norm": 3.5335729122161865, "learning_rate": 1.884901493464729e-05, "loss": 0.9398, "step": 38725 }, { "epoch": 11.59, "grad_norm": 1.7185178995132446, "learning_rate": 1.8843320498870504e-05, "loss": 0.8392, "step": 38730 }, { "epoch": 11.59, "grad_norm": 2.37362003326416, "learning_rate": 1.883762640312074e-05, "loss": 1.0308, "step": 38735 }, { "epoch": 11.59, "grad_norm": 5.628195285797119, "learning_rate": 1.883193264771245e-05, "loss": 0.9699, "step": 38740 }, { "epoch": 11.59, "grad_norm": 2.0216867923736572, "learning_rate": 1.882623923296012e-05, "loss": 1.0402, "step": 38745 }, { "epoch": 11.59, "grad_norm": 4.347649097442627, "learning_rate": 1.8820546159178175e-05, "loss": 0.9695, "step": 38750 }, { "epoch": 11.6, "grad_norm": 5.039327621459961, "learning_rate": 1.8814853426681046e-05, "loss": 0.9951, "step": 38755 }, { "epoch": 11.6, "grad_norm": 3.6531269550323486, "learning_rate": 1.880916103578313e-05, "loss": 1.0957, "step": 38760 }, { "epoch": 11.6, "grad_norm": 2.0845232009887695, "learning_rate": 1.8803468986798814e-05, "loss": 1.05, "step": 38765 }, { "epoch": 11.6, "grad_norm": 4.7361979484558105, "learning_rate": 1.8797777280042467e-05, "loss": 0.9722, "step": 38770 }, { "epoch": 11.6, "grad_norm": 4.363923072814941, "learning_rate": 1.879208591582843e-05, "loss": 0.9497, "step": 38775 }, { "epoch": 11.6, "grad_norm": 3.8049824237823486, "learning_rate": 1.8786394894471038e-05, "loss": 1.1666, "step": 38780 }, { "epoch": 11.6, "grad_norm": 4.981505870819092, "learning_rate": 1.8780704216284604e-05, "loss": 1.0055, "step": 38785 }, { "epoch": 11.61, "grad_norm": 2.457186222076416, "learning_rate": 1.87750138815834e-05, "loss": 1.1097, "step": 38790 }, { "epoch": 11.61, "grad_norm": 2.6897389888763428, "learning_rate": 1.8769323890681717e-05, "loss": 1.2053, "step": 38795 }, { "epoch": 11.61, "grad_norm": 2.4741759300231934, "learning_rate": 1.876363424389379e-05, "loss": 1.0683, "step": 38800 }, { "epoch": 11.61, "grad_norm": 3.079468011856079, "learning_rate": 1.875794494153387e-05, "loss": 0.8789, "step": 38805 }, { "epoch": 11.61, "grad_norm": 4.948914051055908, "learning_rate": 1.875225598391616e-05, "loss": 0.9235, "step": 38810 }, { "epoch": 11.61, "grad_norm": 9.49067211151123, "learning_rate": 1.874656737135486e-05, "loss": 0.7728, "step": 38815 }, { "epoch": 11.61, "grad_norm": 4.153932094573975, "learning_rate": 1.874087910416415e-05, "loss": 0.8923, "step": 38820 }, { "epoch": 11.62, "grad_norm": 5.080899715423584, "learning_rate": 1.8735191182658164e-05, "loss": 1.1195, "step": 38825 }, { "epoch": 11.62, "grad_norm": 14.455235481262207, "learning_rate": 1.872950360715107e-05, "loss": 1.0142, "step": 38830 }, { "epoch": 11.62, "grad_norm": 2.518566131591797, "learning_rate": 1.8723816377956966e-05, "loss": 1.0349, "step": 38835 }, { "epoch": 11.62, "grad_norm": 1.6086268424987793, "learning_rate": 1.871812949538997e-05, "loss": 1.0412, "step": 38840 }, { "epoch": 11.62, "grad_norm": 3.0783514976501465, "learning_rate": 1.8712442959764144e-05, "loss": 1.0691, "step": 38845 }, { "epoch": 11.62, "grad_norm": 2.9750258922576904, "learning_rate": 1.870675677139356e-05, "loss": 0.8981, "step": 38850 }, { "epoch": 11.62, "grad_norm": 1.0562318563461304, "learning_rate": 1.8701070930592257e-05, "loss": 1.0965, "step": 38855 }, { "epoch": 11.63, "grad_norm": 3.368023157119751, "learning_rate": 1.8695385437674263e-05, "loss": 1.0257, "step": 38860 }, { "epoch": 11.63, "grad_norm": 2.06657338142395, "learning_rate": 1.868970029295357e-05, "loss": 1.0595, "step": 38865 }, { "epoch": 11.63, "grad_norm": 3.3948874473571777, "learning_rate": 1.8684015496744173e-05, "loss": 1.0824, "step": 38870 }, { "epoch": 11.63, "grad_norm": 3.1580018997192383, "learning_rate": 1.8678331049360044e-05, "loss": 0.994, "step": 38875 }, { "epoch": 11.63, "grad_norm": 2.4036195278167725, "learning_rate": 1.8672646951115116e-05, "loss": 1.1658, "step": 38880 }, { "epoch": 11.63, "grad_norm": 1.6055760383605957, "learning_rate": 1.866696320232331e-05, "loss": 1.0514, "step": 38885 }, { "epoch": 11.64, "grad_norm": 2.9293153285980225, "learning_rate": 1.866127980329856e-05, "loss": 1.0034, "step": 38890 }, { "epoch": 11.64, "grad_norm": 6.062731742858887, "learning_rate": 1.865559675435472e-05, "loss": 0.9402, "step": 38895 }, { "epoch": 11.64, "grad_norm": 2.678724527359009, "learning_rate": 1.8649914055805696e-05, "loss": 0.8018, "step": 38900 }, { "epoch": 11.64, "grad_norm": 1.7814666032791138, "learning_rate": 1.8644231707965297e-05, "loss": 1.0356, "step": 38905 }, { "epoch": 11.64, "grad_norm": 2.179852247238159, "learning_rate": 1.863854971114739e-05, "loss": 1.0481, "step": 38910 }, { "epoch": 11.64, "grad_norm": 3.8820948600769043, "learning_rate": 1.863286806566577e-05, "loss": 1.0233, "step": 38915 }, { "epoch": 11.64, "grad_norm": 4.843132019042969, "learning_rate": 1.862718677183422e-05, "loss": 1.1038, "step": 38920 }, { "epoch": 11.65, "grad_norm": 2.708319902420044, "learning_rate": 1.862150582996653e-05, "loss": 1.1368, "step": 38925 }, { "epoch": 11.65, "grad_norm": 2.447589159011841, "learning_rate": 1.861582524037643e-05, "loss": 1.096, "step": 38930 }, { "epoch": 11.65, "grad_norm": 2.1640899181365967, "learning_rate": 1.8610145003377676e-05, "loss": 0.9314, "step": 38935 }, { "epoch": 11.65, "grad_norm": 6.040550708770752, "learning_rate": 1.860446511928397e-05, "loss": 1.1117, "step": 38940 }, { "epoch": 11.65, "grad_norm": 18.708223342895508, "learning_rate": 1.8598785588409005e-05, "loss": 1.0053, "step": 38945 }, { "epoch": 11.65, "grad_norm": 2.9335033893585205, "learning_rate": 1.859310641106646e-05, "loss": 1.2107, "step": 38950 }, { "epoch": 11.65, "grad_norm": 3.3120362758636475, "learning_rate": 1.8587427587569982e-05, "loss": 1.193, "step": 38955 }, { "epoch": 11.66, "grad_norm": 1.8769471645355225, "learning_rate": 1.8581749118233225e-05, "loss": 1.1355, "step": 38960 }, { "epoch": 11.66, "grad_norm": 3.2708353996276855, "learning_rate": 1.857607100336978e-05, "loss": 0.9209, "step": 38965 }, { "epoch": 11.66, "grad_norm": 2.209517478942871, "learning_rate": 1.857039324329327e-05, "loss": 1.1848, "step": 38970 }, { "epoch": 11.66, "grad_norm": 2.2054500579833984, "learning_rate": 1.8564715838317252e-05, "loss": 1.0466, "step": 38975 }, { "epoch": 11.66, "grad_norm": 1.8774044513702393, "learning_rate": 1.8559038788755285e-05, "loss": 0.9097, "step": 38980 }, { "epoch": 11.66, "grad_norm": 2.7075273990631104, "learning_rate": 1.8553362094920918e-05, "loss": 1.1306, "step": 38985 }, { "epoch": 11.67, "grad_norm": 3.6890761852264404, "learning_rate": 1.854768575712765e-05, "loss": 1.0088, "step": 38990 }, { "epoch": 11.67, "grad_norm": 4.63300085067749, "learning_rate": 1.8542009775689003e-05, "loss": 1.1951, "step": 38995 }, { "epoch": 11.67, "grad_norm": 2.1743202209472656, "learning_rate": 1.8536334150918432e-05, "loss": 1.0896, "step": 39000 }, { "epoch": 11.67, "grad_norm": 2.1397712230682373, "learning_rate": 1.853065888312942e-05, "loss": 0.9551, "step": 39005 }, { "epoch": 11.67, "grad_norm": 3.1289427280426025, "learning_rate": 1.852498397263539e-05, "loss": 1.0159, "step": 39010 }, { "epoch": 11.67, "grad_norm": 2.917625904083252, "learning_rate": 1.8519309419749757e-05, "loss": 1.0579, "step": 39015 }, { "epoch": 11.67, "grad_norm": 1.9072588682174683, "learning_rate": 1.851363522478594e-05, "loss": 1.0325, "step": 39020 }, { "epoch": 11.68, "grad_norm": 1.4245704412460327, "learning_rate": 1.8507961388057292e-05, "loss": 1.1429, "step": 39025 }, { "epoch": 11.68, "grad_norm": 5.188929080963135, "learning_rate": 1.8502287909877197e-05, "loss": 1.1144, "step": 39030 }, { "epoch": 11.68, "grad_norm": 6.824649333953857, "learning_rate": 1.8496614790558987e-05, "loss": 0.9599, "step": 39035 }, { "epoch": 11.68, "grad_norm": 5.712022304534912, "learning_rate": 1.8490942030415982e-05, "loss": 1.105, "step": 39040 }, { "epoch": 11.68, "grad_norm": 5.067729949951172, "learning_rate": 1.8485269629761487e-05, "loss": 1.0556, "step": 39045 }, { "epoch": 11.68, "grad_norm": 1.9118393659591675, "learning_rate": 1.847959758890877e-05, "loss": 1.129, "step": 39050 }, { "epoch": 11.68, "grad_norm": 2.5474183559417725, "learning_rate": 1.847392590817111e-05, "loss": 0.9264, "step": 39055 }, { "epoch": 11.69, "grad_norm": 3.004509925842285, "learning_rate": 1.8468254587861728e-05, "loss": 0.9561, "step": 39060 }, { "epoch": 11.69, "grad_norm": 2.7860445976257324, "learning_rate": 1.8462583628293867e-05, "loss": 1.0989, "step": 39065 }, { "epoch": 11.69, "grad_norm": 3.1194570064544678, "learning_rate": 1.8456913029780714e-05, "loss": 0.9351, "step": 39070 }, { "epoch": 11.69, "grad_norm": 2.584099054336548, "learning_rate": 1.8451242792635447e-05, "loss": 1.0934, "step": 39075 }, { "epoch": 11.69, "grad_norm": 4.399397850036621, "learning_rate": 1.8445572917171238e-05, "loss": 1.0019, "step": 39080 }, { "epoch": 11.69, "grad_norm": 1.7044438123703003, "learning_rate": 1.843990340370122e-05, "loss": 1.1474, "step": 39085 }, { "epoch": 11.7, "grad_norm": 1.9364503622055054, "learning_rate": 1.8434234252538523e-05, "loss": 1.0992, "step": 39090 }, { "epoch": 11.7, "grad_norm": 2.9224693775177, "learning_rate": 1.842856546399624e-05, "loss": 0.8731, "step": 39095 }, { "epoch": 11.7, "grad_norm": 2.569241523742676, "learning_rate": 1.842289703838746e-05, "loss": 1.0233, "step": 39100 }, { "epoch": 11.7, "grad_norm": 6.116538047790527, "learning_rate": 1.841722897602524e-05, "loss": 0.9919, "step": 39105 }, { "epoch": 11.7, "grad_norm": 5.045174598693848, "learning_rate": 1.8411561277222616e-05, "loss": 0.9874, "step": 39110 }, { "epoch": 11.7, "grad_norm": 4.61328125, "learning_rate": 1.8405893942292617e-05, "loss": 0.913, "step": 39115 }, { "epoch": 11.7, "grad_norm": 3.24153470993042, "learning_rate": 1.840022697154824e-05, "loss": 0.908, "step": 39120 }, { "epoch": 11.71, "grad_norm": 1.127431869506836, "learning_rate": 1.839456036530247e-05, "loss": 1.0368, "step": 39125 }, { "epoch": 11.71, "grad_norm": 2.7838566303253174, "learning_rate": 1.838889412386826e-05, "loss": 0.9142, "step": 39130 }, { "epoch": 11.71, "grad_norm": 2.489769458770752, "learning_rate": 1.838322824755856e-05, "loss": 1.0572, "step": 39135 }, { "epoch": 11.71, "grad_norm": 2.236867904663086, "learning_rate": 1.837756273668629e-05, "loss": 1.0315, "step": 39140 }, { "epoch": 11.71, "grad_norm": 1.7803845405578613, "learning_rate": 1.8371897591564335e-05, "loss": 0.9635, "step": 39145 }, { "epoch": 11.71, "grad_norm": 19.32113265991211, "learning_rate": 1.8366232812505602e-05, "loss": 1.2016, "step": 39150 }, { "epoch": 11.71, "grad_norm": 3.2473385334014893, "learning_rate": 1.836056839982292e-05, "loss": 0.95, "step": 39155 }, { "epoch": 11.72, "grad_norm": 3.1559784412384033, "learning_rate": 1.8354904353829156e-05, "loss": 0.9813, "step": 39160 }, { "epoch": 11.72, "grad_norm": 2.750032663345337, "learning_rate": 1.8349240674837105e-05, "loss": 0.8542, "step": 39165 }, { "epoch": 11.72, "grad_norm": 5.297883033752441, "learning_rate": 1.834357736315959e-05, "loss": 0.9693, "step": 39170 }, { "epoch": 11.72, "grad_norm": 3.035358190536499, "learning_rate": 1.8337914419109383e-05, "loss": 1.0519, "step": 39175 }, { "epoch": 11.72, "grad_norm": 5.685046195983887, "learning_rate": 1.833225184299922e-05, "loss": 0.9673, "step": 39180 }, { "epoch": 11.72, "grad_norm": 4.552941799163818, "learning_rate": 1.8326589635141874e-05, "loss": 1.1577, "step": 39185 }, { "epoch": 11.73, "grad_norm": 11.873933792114258, "learning_rate": 1.832092779585003e-05, "loss": 1.0136, "step": 39190 }, { "epoch": 11.73, "grad_norm": 1.8182165622711182, "learning_rate": 1.8315266325436412e-05, "loss": 1.2264, "step": 39195 }, { "epoch": 11.73, "grad_norm": 4.9064621925354, "learning_rate": 1.8309605224213682e-05, "loss": 1.0442, "step": 39200 }, { "epoch": 11.73, "grad_norm": 4.886104106903076, "learning_rate": 1.83039444924945e-05, "loss": 0.9959, "step": 39205 }, { "epoch": 11.73, "grad_norm": 2.1193058490753174, "learning_rate": 1.8298284130591508e-05, "loss": 0.9811, "step": 39210 }, { "epoch": 11.73, "grad_norm": 2.320329427719116, "learning_rate": 1.8292624138817317e-05, "loss": 1.1301, "step": 39215 }, { "epoch": 11.73, "grad_norm": 8.177576065063477, "learning_rate": 1.8286964517484518e-05, "loss": 1.0834, "step": 39220 }, { "epoch": 11.74, "grad_norm": 3.5780060291290283, "learning_rate": 1.8281305266905697e-05, "loss": 0.9501, "step": 39225 }, { "epoch": 11.74, "grad_norm": 4.063993453979492, "learning_rate": 1.8275646387393396e-05, "loss": 1.252, "step": 39230 }, { "epoch": 11.74, "grad_norm": 1.9857643842697144, "learning_rate": 1.8269987879260163e-05, "loss": 1.1019, "step": 39235 }, { "epoch": 11.74, "grad_norm": 1.7458826303482056, "learning_rate": 1.8264329742818496e-05, "loss": 1.0769, "step": 39240 }, { "epoch": 11.74, "grad_norm": 1.2692326307296753, "learning_rate": 1.8258671978380902e-05, "loss": 1.1112, "step": 39245 }, { "epoch": 11.74, "grad_norm": 1.531681776046753, "learning_rate": 1.8253014586259842e-05, "loss": 1.0832, "step": 39250 }, { "epoch": 11.74, "grad_norm": 3.519178628921509, "learning_rate": 1.824735756676778e-05, "loss": 1.0214, "step": 39255 }, { "epoch": 11.75, "grad_norm": 1.5379389524459839, "learning_rate": 1.824170092021713e-05, "loss": 0.942, "step": 39260 }, { "epoch": 11.75, "grad_norm": 1.4689420461654663, "learning_rate": 1.823604464692033e-05, "loss": 1.0427, "step": 39265 }, { "epoch": 11.75, "grad_norm": 1.6258187294006348, "learning_rate": 1.8230388747189746e-05, "loss": 1.0556, "step": 39270 }, { "epoch": 11.75, "grad_norm": 1.4137862920761108, "learning_rate": 1.8224733221337748e-05, "loss": 1.2594, "step": 39275 }, { "epoch": 11.75, "grad_norm": 3.4396555423736572, "learning_rate": 1.82190780696767e-05, "loss": 1.1517, "step": 39280 }, { "epoch": 11.75, "grad_norm": 2.3807373046875, "learning_rate": 1.8213423292518912e-05, "loss": 0.9228, "step": 39285 }, { "epoch": 11.76, "grad_norm": 3.9610815048217773, "learning_rate": 1.8207768890176714e-05, "loss": 0.975, "step": 39290 }, { "epoch": 11.76, "grad_norm": 2.3501439094543457, "learning_rate": 1.8202114862962376e-05, "loss": 1.0386, "step": 39295 }, { "epoch": 11.76, "grad_norm": 2.7657525539398193, "learning_rate": 1.8196461211188164e-05, "loss": 1.0143, "step": 39300 }, { "epoch": 11.76, "grad_norm": 1.5906193256378174, "learning_rate": 1.8190807935166327e-05, "loss": 1.1036, "step": 39305 }, { "epoch": 11.76, "grad_norm": 26.443702697753906, "learning_rate": 1.8185155035209096e-05, "loss": 0.9669, "step": 39310 }, { "epoch": 11.76, "grad_norm": 4.151071548461914, "learning_rate": 1.817950251162866e-05, "loss": 1.0132, "step": 39315 }, { "epoch": 11.76, "grad_norm": 2.691179037094116, "learning_rate": 1.817385036473721e-05, "loss": 1.0296, "step": 39320 }, { "epoch": 11.77, "grad_norm": 3.2042014598846436, "learning_rate": 1.816819859484692e-05, "loss": 1.0784, "step": 39325 }, { "epoch": 11.77, "grad_norm": 2.0353968143463135, "learning_rate": 1.816254720226992e-05, "loss": 0.9752, "step": 39330 }, { "epoch": 11.77, "grad_norm": 1.9836816787719727, "learning_rate": 1.8156896187318322e-05, "loss": 1.1108, "step": 39335 }, { "epoch": 11.77, "grad_norm": 1.6825382709503174, "learning_rate": 1.815124555030424e-05, "loss": 1.0884, "step": 39340 }, { "epoch": 11.77, "grad_norm": 3.492959976196289, "learning_rate": 1.8145595291539742e-05, "loss": 1.1597, "step": 39345 }, { "epoch": 11.77, "grad_norm": 3.515138864517212, "learning_rate": 1.81399454113369e-05, "loss": 1.0825, "step": 39350 }, { "epoch": 11.77, "grad_norm": 2.3991005420684814, "learning_rate": 1.8134295910007733e-05, "loss": 1.1757, "step": 39355 }, { "epoch": 11.78, "grad_norm": 4.532209396362305, "learning_rate": 1.8128646787864274e-05, "loss": 0.9928, "step": 39360 }, { "epoch": 11.78, "grad_norm": 1.8209837675094604, "learning_rate": 1.8122998045218515e-05, "loss": 0.9525, "step": 39365 }, { "epoch": 11.78, "grad_norm": 1.6788138151168823, "learning_rate": 1.811734968238241e-05, "loss": 1.1068, "step": 39370 }, { "epoch": 11.78, "grad_norm": 1.9423623085021973, "learning_rate": 1.8111701699667945e-05, "loss": 0.9662, "step": 39375 }, { "epoch": 11.78, "grad_norm": 1.7735339403152466, "learning_rate": 1.8106054097387017e-05, "loss": 1.1168, "step": 39380 }, { "epoch": 11.78, "grad_norm": 4.587143898010254, "learning_rate": 1.810040687585157e-05, "loss": 1.0909, "step": 39385 }, { "epoch": 11.79, "grad_norm": 2.695783853530884, "learning_rate": 1.809476003537347e-05, "loss": 1.0518, "step": 39390 }, { "epoch": 11.79, "grad_norm": 1.4317811727523804, "learning_rate": 1.80891135762646e-05, "loss": 1.0841, "step": 39395 }, { "epoch": 11.79, "grad_norm": 2.6233603954315186, "learning_rate": 1.8083467498836805e-05, "loss": 0.8933, "step": 39400 }, { "epoch": 11.79, "grad_norm": 1.8944311141967773, "learning_rate": 1.8077821803401903e-05, "loss": 1.0322, "step": 39405 }, { "epoch": 11.79, "grad_norm": 4.844879627227783, "learning_rate": 1.807217649027172e-05, "loss": 1.0349, "step": 39410 }, { "epoch": 11.79, "grad_norm": 3.5124640464782715, "learning_rate": 1.8066531559758015e-05, "loss": 0.9921, "step": 39415 }, { "epoch": 11.79, "grad_norm": 4.222341060638428, "learning_rate": 1.8060887012172578e-05, "loss": 1.0685, "step": 39420 }, { "epoch": 11.8, "grad_norm": 2.961749792098999, "learning_rate": 1.8055242847827137e-05, "loss": 1.0769, "step": 39425 }, { "epoch": 11.8, "grad_norm": 6.683197498321533, "learning_rate": 1.8049599067033406e-05, "loss": 1.1211, "step": 39430 }, { "epoch": 11.8, "grad_norm": 2.078709840774536, "learning_rate": 1.80439556701031e-05, "loss": 1.2193, "step": 39435 }, { "epoch": 11.8, "grad_norm": 3.6731183528900146, "learning_rate": 1.803831265734789e-05, "loss": 1.0561, "step": 39440 }, { "epoch": 11.8, "grad_norm": 2.6240220069885254, "learning_rate": 1.803267002907944e-05, "loss": 0.9361, "step": 39445 }, { "epoch": 11.8, "grad_norm": 5.170044898986816, "learning_rate": 1.802702778560937e-05, "loss": 1.033, "step": 39450 }, { "epoch": 11.8, "grad_norm": 2.432077407836914, "learning_rate": 1.802138592724932e-05, "loss": 1.1533, "step": 39455 }, { "epoch": 11.81, "grad_norm": 1.541792869567871, "learning_rate": 1.8015744454310873e-05, "loss": 1.0976, "step": 39460 }, { "epoch": 11.81, "grad_norm": 2.662036180496216, "learning_rate": 1.8010103367105588e-05, "loss": 0.9965, "step": 39465 }, { "epoch": 11.81, "grad_norm": 3.3465163707733154, "learning_rate": 1.800446266594504e-05, "loss": 1.0623, "step": 39470 }, { "epoch": 11.81, "grad_norm": 2.513610601425171, "learning_rate": 1.799882235114074e-05, "loss": 0.9073, "step": 39475 }, { "epoch": 11.81, "grad_norm": 5.069911956787109, "learning_rate": 1.79931824230042e-05, "loss": 1.2199, "step": 39480 }, { "epoch": 11.81, "grad_norm": 2.737182378768921, "learning_rate": 1.7987542881846918e-05, "loss": 1.194, "step": 39485 }, { "epoch": 11.81, "grad_norm": 4.385614395141602, "learning_rate": 1.7981903727980355e-05, "loss": 1.0531, "step": 39490 }, { "epoch": 11.82, "grad_norm": 1.9035724401474, "learning_rate": 1.7976264961715954e-05, "loss": 1.0845, "step": 39495 }, { "epoch": 11.82, "grad_norm": 2.0744688510894775, "learning_rate": 1.7970626583365135e-05, "loss": 0.9908, "step": 39500 }, { "epoch": 11.82, "grad_norm": 1.704471468925476, "learning_rate": 1.796498859323931e-05, "loss": 1.0739, "step": 39505 }, { "epoch": 11.82, "grad_norm": 4.32279634475708, "learning_rate": 1.7959350991649843e-05, "loss": 0.9117, "step": 39510 }, { "epoch": 11.82, "grad_norm": 3.1391208171844482, "learning_rate": 1.795371377890811e-05, "loss": 0.9438, "step": 39515 }, { "epoch": 11.82, "grad_norm": 6.020325183868408, "learning_rate": 1.7948076955325445e-05, "loss": 0.9606, "step": 39520 }, { "epoch": 11.83, "grad_norm": 2.498509645462036, "learning_rate": 1.7942440521213144e-05, "loss": 1.0956, "step": 39525 }, { "epoch": 11.83, "grad_norm": 2.654123544692993, "learning_rate": 1.793680447688253e-05, "loss": 0.9559, "step": 39530 }, { "epoch": 11.83, "grad_norm": 3.5224146842956543, "learning_rate": 1.7931168822644857e-05, "loss": 1.0071, "step": 39535 }, { "epoch": 11.83, "grad_norm": 2.877861499786377, "learning_rate": 1.7925533558811386e-05, "loss": 1.0302, "step": 39540 }, { "epoch": 11.83, "grad_norm": 2.9893412590026855, "learning_rate": 1.7919898685693336e-05, "loss": 0.9003, "step": 39545 }, { "epoch": 11.83, "grad_norm": 3.915611505508423, "learning_rate": 1.7914264203601933e-05, "loss": 1.0966, "step": 39550 }, { "epoch": 11.83, "grad_norm": 4.270025730133057, "learning_rate": 1.7908630112848345e-05, "loss": 1.0488, "step": 39555 }, { "epoch": 11.84, "grad_norm": 1.905087947845459, "learning_rate": 1.7902996413743743e-05, "loss": 1.0429, "step": 39560 }, { "epoch": 11.84, "grad_norm": 1.7518643140792847, "learning_rate": 1.789736310659928e-05, "loss": 0.9564, "step": 39565 }, { "epoch": 11.84, "grad_norm": 2.0850372314453125, "learning_rate": 1.7891730191726062e-05, "loss": 1.0715, "step": 39570 }, { "epoch": 11.84, "grad_norm": 4.21657133102417, "learning_rate": 1.7886097669435203e-05, "loss": 1.169, "step": 39575 }, { "epoch": 11.84, "grad_norm": 1.5663069486618042, "learning_rate": 1.7880465540037772e-05, "loss": 0.9148, "step": 39580 }, { "epoch": 11.84, "grad_norm": 1.8661555051803589, "learning_rate": 1.787483380384483e-05, "loss": 0.9692, "step": 39585 }, { "epoch": 11.84, "grad_norm": 2.8097341060638428, "learning_rate": 1.7869202461167414e-05, "loss": 1.1088, "step": 39590 }, { "epoch": 11.85, "grad_norm": 10.885028839111328, "learning_rate": 1.7863571512316525e-05, "loss": 1.0241, "step": 39595 }, { "epoch": 11.85, "grad_norm": 3.2606093883514404, "learning_rate": 1.7857940957603175e-05, "loss": 1.1048, "step": 39600 }, { "epoch": 11.85, "grad_norm": 2.2225308418273926, "learning_rate": 1.785231079733831e-05, "loss": 1.0134, "step": 39605 }, { "epoch": 11.85, "grad_norm": 3.4681246280670166, "learning_rate": 1.7846681031832902e-05, "loss": 1.014, "step": 39610 }, { "epoch": 11.85, "grad_norm": 2.1326732635498047, "learning_rate": 1.784105166139785e-05, "loss": 1.0633, "step": 39615 }, { "epoch": 11.85, "grad_norm": 3.486208200454712, "learning_rate": 1.7835422686344088e-05, "loss": 1.0031, "step": 39620 }, { "epoch": 11.86, "grad_norm": 2.6152260303497314, "learning_rate": 1.7829794106982487e-05, "loss": 1.1336, "step": 39625 }, { "epoch": 11.86, "grad_norm": 3.0266525745391846, "learning_rate": 1.782416592362389e-05, "loss": 1.2247, "step": 39630 }, { "epoch": 11.86, "grad_norm": 1.3934903144836426, "learning_rate": 1.781853813657916e-05, "loss": 1.1949, "step": 39635 }, { "epoch": 11.86, "grad_norm": 1.248276948928833, "learning_rate": 1.7812910746159096e-05, "loss": 1.0491, "step": 39640 }, { "epoch": 11.86, "grad_norm": 3.2080233097076416, "learning_rate": 1.780728375267451e-05, "loss": 1.0669, "step": 39645 }, { "epoch": 11.86, "grad_norm": 6.742861747741699, "learning_rate": 1.7801657156436162e-05, "loss": 1.0856, "step": 39650 }, { "epoch": 11.86, "grad_norm": 2.66377592086792, "learning_rate": 1.7796030957754806e-05, "loss": 1.2409, "step": 39655 }, { "epoch": 11.87, "grad_norm": 2.5184435844421387, "learning_rate": 1.779040515694117e-05, "loss": 1.0872, "step": 39660 }, { "epoch": 11.87, "grad_norm": 2.7967371940612793, "learning_rate": 1.7784779754305962e-05, "loss": 1.0393, "step": 39665 }, { "epoch": 11.87, "grad_norm": 2.9046332836151123, "learning_rate": 1.7779154750159874e-05, "loss": 1.0531, "step": 39670 }, { "epoch": 11.87, "grad_norm": 1.2060468196868896, "learning_rate": 1.777353014481356e-05, "loss": 1.0541, "step": 39675 }, { "epoch": 11.87, "grad_norm": 5.9001359939575195, "learning_rate": 1.7767905938577666e-05, "loss": 1.0902, "step": 39680 }, { "epoch": 11.87, "grad_norm": 3.407264471054077, "learning_rate": 1.7762282131762814e-05, "loss": 1.0468, "step": 39685 }, { "epoch": 11.87, "grad_norm": 2.291767120361328, "learning_rate": 1.7756658724679588e-05, "loss": 0.9659, "step": 39690 }, { "epoch": 11.88, "grad_norm": 1.3889952898025513, "learning_rate": 1.775103571763858e-05, "loss": 1.0298, "step": 39695 }, { "epoch": 11.88, "grad_norm": 2.6598963737487793, "learning_rate": 1.7745413110950325e-05, "loss": 1.1888, "step": 39700 }, { "epoch": 11.88, "grad_norm": 5.673483371734619, "learning_rate": 1.7739790904925374e-05, "loss": 1.0024, "step": 39705 }, { "epoch": 11.88, "grad_norm": 4.94851016998291, "learning_rate": 1.7734169099874216e-05, "loss": 1.051, "step": 39710 }, { "epoch": 11.88, "grad_norm": 10.22668743133545, "learning_rate": 1.772854769610735e-05, "loss": 1.0193, "step": 39715 }, { "epoch": 11.88, "grad_norm": 3.5006158351898193, "learning_rate": 1.7722926693935242e-05, "loss": 1.0393, "step": 39720 }, { "epoch": 11.89, "grad_norm": 2.18442964553833, "learning_rate": 1.771730609366832e-05, "loss": 0.9004, "step": 39725 }, { "epoch": 11.89, "grad_norm": 2.1316640377044678, "learning_rate": 1.771168589561702e-05, "loss": 1.0415, "step": 39730 }, { "epoch": 11.89, "grad_norm": 7.090075492858887, "learning_rate": 1.770606610009172e-05, "loss": 1.0966, "step": 39735 }, { "epoch": 11.89, "grad_norm": 4.611660480499268, "learning_rate": 1.770044670740282e-05, "loss": 0.9932, "step": 39740 }, { "epoch": 11.89, "grad_norm": 2.1602795124053955, "learning_rate": 1.769482771786065e-05, "loss": 1.2849, "step": 39745 }, { "epoch": 11.89, "grad_norm": 5.2523274421691895, "learning_rate": 1.7689209131775563e-05, "loss": 1.0831, "step": 39750 }, { "epoch": 11.89, "grad_norm": 1.25748610496521, "learning_rate": 1.7683590949457852e-05, "loss": 1.0985, "step": 39755 }, { "epoch": 11.9, "grad_norm": 1.3775397539138794, "learning_rate": 1.7677973171217805e-05, "loss": 1.0786, "step": 39760 }, { "epoch": 11.9, "grad_norm": 1.7527154684066772, "learning_rate": 1.767235579736569e-05, "loss": 1.0301, "step": 39765 }, { "epoch": 11.9, "grad_norm": 3.6977381706237793, "learning_rate": 1.7666738828211742e-05, "loss": 1.2242, "step": 39770 }, { "epoch": 11.9, "grad_norm": 2.5999135971069336, "learning_rate": 1.76611222640662e-05, "loss": 1.0193, "step": 39775 }, { "epoch": 11.9, "grad_norm": 5.089832782745361, "learning_rate": 1.7655506105239243e-05, "loss": 1.0719, "step": 39780 }, { "epoch": 11.9, "grad_norm": 3.162248373031616, "learning_rate": 1.764989035204104e-05, "loss": 0.87, "step": 39785 }, { "epoch": 11.9, "grad_norm": 3.2377114295959473, "learning_rate": 1.7644275004781763e-05, "loss": 1.0549, "step": 39790 }, { "epoch": 11.91, "grad_norm": 1.3557695150375366, "learning_rate": 1.7638660063771523e-05, "loss": 1.0341, "step": 39795 }, { "epoch": 11.91, "grad_norm": 2.794524669647217, "learning_rate": 1.7633045529320445e-05, "loss": 1.0422, "step": 39800 }, { "epoch": 11.91, "grad_norm": 5.456986427307129, "learning_rate": 1.7627431401738597e-05, "loss": 0.9706, "step": 39805 }, { "epoch": 11.91, "grad_norm": 2.9997246265411377, "learning_rate": 1.7621817681336055e-05, "loss": 1.0415, "step": 39810 }, { "epoch": 11.91, "grad_norm": 3.4345109462738037, "learning_rate": 1.761620436842286e-05, "loss": 0.884, "step": 39815 }, { "epoch": 11.91, "grad_norm": 3.6423003673553467, "learning_rate": 1.7610591463309007e-05, "loss": 1.1453, "step": 39820 }, { "epoch": 11.92, "grad_norm": 3.724964141845703, "learning_rate": 1.760497896630452e-05, "loss": 1.1898, "step": 39825 }, { "epoch": 11.92, "grad_norm": 3.579566478729248, "learning_rate": 1.759936687771934e-05, "loss": 1.0795, "step": 39830 }, { "epoch": 11.92, "grad_norm": 6.6637492179870605, "learning_rate": 1.7593755197863455e-05, "loss": 1.0689, "step": 39835 }, { "epoch": 11.92, "grad_norm": 3.4842472076416016, "learning_rate": 1.758814392704676e-05, "loss": 1.0492, "step": 39840 }, { "epoch": 11.92, "grad_norm": 4.1526079177856445, "learning_rate": 1.758253306557917e-05, "loss": 0.9039, "step": 39845 }, { "epoch": 11.92, "grad_norm": 2.869919538497925, "learning_rate": 1.757692261377058e-05, "loss": 1.0529, "step": 39850 }, { "epoch": 11.92, "grad_norm": 1.4743778705596924, "learning_rate": 1.7571312571930827e-05, "loss": 0.9677, "step": 39855 }, { "epoch": 11.93, "grad_norm": 5.439683437347412, "learning_rate": 1.756570294036977e-05, "loss": 1.0239, "step": 39860 }, { "epoch": 11.93, "grad_norm": 1.5706872940063477, "learning_rate": 1.7560093719397204e-05, "loss": 1.0556, "step": 39865 }, { "epoch": 11.93, "grad_norm": 4.678708553314209, "learning_rate": 1.7554484909322933e-05, "loss": 1.1842, "step": 39870 }, { "epoch": 11.93, "grad_norm": 3.4976863861083984, "learning_rate": 1.7548876510456723e-05, "loss": 0.873, "step": 39875 }, { "epoch": 11.93, "grad_norm": 3.4963254928588867, "learning_rate": 1.7543268523108308e-05, "loss": 0.9826, "step": 39880 }, { "epoch": 11.93, "grad_norm": 4.180238246917725, "learning_rate": 1.7537660947587434e-05, "loss": 0.9779, "step": 39885 }, { "epoch": 11.93, "grad_norm": 0.9406687617301941, "learning_rate": 1.753205378420378e-05, "loss": 1.0133, "step": 39890 }, { "epoch": 11.94, "grad_norm": 2.034942865371704, "learning_rate": 1.7526447033267037e-05, "loss": 1.203, "step": 39895 }, { "epoch": 11.94, "grad_norm": 2.6159539222717285, "learning_rate": 1.7520840695086847e-05, "loss": 1.1254, "step": 39900 }, { "epoch": 11.94, "grad_norm": 4.698697566986084, "learning_rate": 1.7515234769972865e-05, "loss": 0.823, "step": 39905 }, { "epoch": 11.94, "grad_norm": 3.892012119293213, "learning_rate": 1.750962925823469e-05, "loss": 0.9369, "step": 39910 }, { "epoch": 11.94, "grad_norm": 1.37578284740448, "learning_rate": 1.750402416018189e-05, "loss": 1.1761, "step": 39915 }, { "epoch": 11.94, "grad_norm": 3.057701349258423, "learning_rate": 1.749841947612405e-05, "loss": 0.9876, "step": 39920 }, { "epoch": 11.95, "grad_norm": 4.663394451141357, "learning_rate": 1.7492815206370705e-05, "loss": 0.8152, "step": 39925 }, { "epoch": 11.95, "grad_norm": 2.727992296218872, "learning_rate": 1.748721135123137e-05, "loss": 1.1597, "step": 39930 }, { "epoch": 11.95, "grad_norm": 1.3559057712554932, "learning_rate": 1.7481607911015547e-05, "loss": 1.0632, "step": 39935 }, { "epoch": 11.95, "grad_norm": 2.3270246982574463, "learning_rate": 1.7476004886032703e-05, "loss": 0.8817, "step": 39940 }, { "epoch": 11.95, "grad_norm": 3.695523977279663, "learning_rate": 1.7470402276592294e-05, "loss": 0.9562, "step": 39945 }, { "epoch": 11.95, "grad_norm": 2.7991878986358643, "learning_rate": 1.746480008300373e-05, "loss": 1.0675, "step": 39950 }, { "epoch": 11.95, "grad_norm": 10.5833740234375, "learning_rate": 1.7459198305576434e-05, "loss": 1.0423, "step": 39955 }, { "epoch": 11.96, "grad_norm": 1.6265363693237305, "learning_rate": 1.745359694461977e-05, "loss": 0.9417, "step": 39960 }, { "epoch": 11.96, "grad_norm": 2.9523098468780518, "learning_rate": 1.744799600044311e-05, "loss": 1.2033, "step": 39965 }, { "epoch": 11.96, "grad_norm": 3.3546805381774902, "learning_rate": 1.744239547335577e-05, "loss": 1.0307, "step": 39970 }, { "epoch": 11.96, "grad_norm": 4.862351894378662, "learning_rate": 1.7436795363667086e-05, "loss": 0.9044, "step": 39975 }, { "epoch": 11.96, "grad_norm": 3.834397792816162, "learning_rate": 1.743119567168633e-05, "loss": 0.9981, "step": 39980 }, { "epoch": 11.96, "grad_norm": 5.2290472984313965, "learning_rate": 1.742559639772276e-05, "loss": 1.0151, "step": 39985 }, { "epoch": 11.96, "grad_norm": 1.2269209623336792, "learning_rate": 1.741999754208564e-05, "loss": 0.915, "step": 39990 }, { "epoch": 11.97, "grad_norm": 9.868966102600098, "learning_rate": 1.7414399105084166e-05, "loss": 0.9631, "step": 39995 }, { "epoch": 11.97, "grad_norm": 4.046831130981445, "learning_rate": 1.7408801087027554e-05, "loss": 1.0331, "step": 40000 }, { "epoch": 11.97, "grad_norm": 7.04620361328125, "learning_rate": 1.740320348822496e-05, "loss": 1.0635, "step": 40005 }, { "epoch": 11.97, "grad_norm": 4.793135166168213, "learning_rate": 1.739760630898554e-05, "loss": 1.0377, "step": 40010 }, { "epoch": 11.97, "grad_norm": 1.5962762832641602, "learning_rate": 1.7392009549618426e-05, "loss": 1.3054, "step": 40015 }, { "epoch": 11.97, "grad_norm": 3.573862314224243, "learning_rate": 1.7386413210432717e-05, "loss": 1.1205, "step": 40020 }, { "epoch": 11.98, "grad_norm": 2.5612030029296875, "learning_rate": 1.7380817291737488e-05, "loss": 0.9382, "step": 40025 }, { "epoch": 11.98, "grad_norm": 4.043525218963623, "learning_rate": 1.7375221793841806e-05, "loss": 1.083, "step": 40030 }, { "epoch": 11.98, "grad_norm": 3.7928805351257324, "learning_rate": 1.7369626717054693e-05, "loss": 1.1117, "step": 40035 }, { "epoch": 11.98, "grad_norm": 2.3192451000213623, "learning_rate": 1.7364032061685174e-05, "loss": 1.0247, "step": 40040 }, { "epoch": 11.98, "grad_norm": 3.9967002868652344, "learning_rate": 1.7358437828042215e-05, "loss": 0.9724, "step": 40045 }, { "epoch": 11.98, "grad_norm": 4.295874118804932, "learning_rate": 1.7352844016434802e-05, "loss": 1.17, "step": 40050 }, { "epoch": 11.98, "grad_norm": 2.709665536880493, "learning_rate": 1.7347250627171857e-05, "loss": 1.0567, "step": 40055 }, { "epoch": 11.99, "grad_norm": 1.6827901601791382, "learning_rate": 1.7341657660562313e-05, "loss": 1.0409, "step": 40060 }, { "epoch": 11.99, "grad_norm": 1.5551859140396118, "learning_rate": 1.7336065116915045e-05, "loss": 0.9164, "step": 40065 }, { "epoch": 11.99, "grad_norm": 4.49882698059082, "learning_rate": 1.7330472996538948e-05, "loss": 0.9778, "step": 40070 }, { "epoch": 11.99, "grad_norm": 8.409674644470215, "learning_rate": 1.7324881299742855e-05, "loss": 1.1369, "step": 40075 }, { "epoch": 11.99, "grad_norm": 3.090298891067505, "learning_rate": 1.7319290026835577e-05, "loss": 1.0897, "step": 40080 }, { "epoch": 11.99, "grad_norm": 4.631008625030518, "learning_rate": 1.731369917812594e-05, "loss": 1.0815, "step": 40085 }, { "epoch": 11.99, "grad_norm": 2.0959417819976807, "learning_rate": 1.7308108753922698e-05, "loss": 0.944, "step": 40090 }, { "epoch": 12.0, "grad_norm": 2.209991931915283, "learning_rate": 1.730251875453462e-05, "loss": 1.1075, "step": 40095 }, { "epoch": 12.0, "grad_norm": 1.7672827243804932, "learning_rate": 1.7296929180270424e-05, "loss": 1.0148, "step": 40100 }, { "epoch": 12.0, "grad_norm": 2.572972536087036, "learning_rate": 1.7291340031438828e-05, "loss": 0.9899, "step": 40105 }, { "epoch": 12.0, "grad_norm": 3.605733871459961, "learning_rate": 1.728575130834851e-05, "loss": 1.1367, "step": 40110 }, { "epoch": 12.0, "grad_norm": 0.9864242672920227, "learning_rate": 1.7280163011308127e-05, "loss": 1.0373, "step": 40115 }, { "epoch": 12.0, "grad_norm": 2.625638484954834, "learning_rate": 1.7274575140626318e-05, "loss": 0.9886, "step": 40120 }, { "epoch": 12.0, "grad_norm": 1.6979814767837524, "learning_rate": 1.726898769661169e-05, "loss": 0.9172, "step": 40125 }, { "epoch": 12.01, "grad_norm": 3.1777591705322266, "learning_rate": 1.7263400679572838e-05, "loss": 1.1741, "step": 40130 }, { "epoch": 12.01, "grad_norm": 1.9709137678146362, "learning_rate": 1.725781408981833e-05, "loss": 0.9846, "step": 40135 }, { "epoch": 12.01, "grad_norm": 2.2092223167419434, "learning_rate": 1.7252227927656692e-05, "loss": 1.1, "step": 40140 }, { "epoch": 12.01, "grad_norm": 3.6297123432159424, "learning_rate": 1.7246642193396463e-05, "loss": 1.1311, "step": 40145 }, { "epoch": 12.01, "grad_norm": 2.822312831878662, "learning_rate": 1.7241056887346115e-05, "loss": 1.0686, "step": 40150 }, { "epoch": 12.01, "grad_norm": 2.791294574737549, "learning_rate": 1.7235472009814142e-05, "loss": 1.0233, "step": 40155 }, { "epoch": 12.02, "grad_norm": 2.6758475303649902, "learning_rate": 1.7229887561108967e-05, "loss": 0.8289, "step": 40160 }, { "epoch": 12.02, "grad_norm": 1.5019391775131226, "learning_rate": 1.7224303541539034e-05, "loss": 1.0052, "step": 40165 }, { "epoch": 12.02, "grad_norm": 2.995958089828491, "learning_rate": 1.7218719951412736e-05, "loss": 1.0393, "step": 40170 }, { "epoch": 12.02, "grad_norm": 2.246119976043701, "learning_rate": 1.7213136791038436e-05, "loss": 1.0853, "step": 40175 }, { "epoch": 12.02, "grad_norm": 5.3217315673828125, "learning_rate": 1.7207554060724505e-05, "loss": 1.0277, "step": 40180 }, { "epoch": 12.02, "grad_norm": 1.9030908346176147, "learning_rate": 1.720197176077926e-05, "loss": 1.0401, "step": 40185 }, { "epoch": 12.02, "grad_norm": 3.2368953227996826, "learning_rate": 1.7196389891511017e-05, "loss": 0.9992, "step": 40190 }, { "epoch": 12.03, "grad_norm": 2.3075368404388428, "learning_rate": 1.7190808453228035e-05, "loss": 1.1156, "step": 40195 }, { "epoch": 12.03, "grad_norm": 3.3374037742614746, "learning_rate": 1.7185227446238597e-05, "loss": 1.2066, "step": 40200 }, { "epoch": 12.03, "grad_norm": 2.5891878604888916, "learning_rate": 1.7179646870850917e-05, "loss": 0.8088, "step": 40205 }, { "epoch": 12.03, "grad_norm": 2.94810152053833, "learning_rate": 1.7174066727373212e-05, "loss": 0.9585, "step": 40210 }, { "epoch": 12.03, "grad_norm": 1.5498286485671997, "learning_rate": 1.7168487016113675e-05, "loss": 1.1713, "step": 40215 }, { "epoch": 12.03, "grad_norm": 2.1500444412231445, "learning_rate": 1.7162907737380447e-05, "loss": 1.0594, "step": 40220 }, { "epoch": 12.03, "grad_norm": 2.072866439819336, "learning_rate": 1.7157328891481688e-05, "loss": 1.0718, "step": 40225 }, { "epoch": 12.04, "grad_norm": 2.1537182331085205, "learning_rate": 1.7151750478725506e-05, "loss": 0.953, "step": 40230 }, { "epoch": 12.04, "grad_norm": 3.077263116836548, "learning_rate": 1.7146172499419976e-05, "loss": 1.0168, "step": 40235 }, { "epoch": 12.04, "grad_norm": 3.462986469268799, "learning_rate": 1.7140594953873184e-05, "loss": 0.8017, "step": 40240 }, { "epoch": 12.04, "grad_norm": 2.716017723083496, "learning_rate": 1.7135017842393156e-05, "loss": 1.1061, "step": 40245 }, { "epoch": 12.04, "grad_norm": 2.1912074089050293, "learning_rate": 1.7129441165287923e-05, "loss": 0.9462, "step": 40250 }, { "epoch": 12.04, "grad_norm": 1.6843229532241821, "learning_rate": 1.7123864922865468e-05, "loss": 0.9333, "step": 40255 }, { "epoch": 12.05, "grad_norm": 1.5522648096084595, "learning_rate": 1.7118289115433774e-05, "loss": 0.9716, "step": 40260 }, { "epoch": 12.05, "grad_norm": 3.79538631439209, "learning_rate": 1.7112713743300778e-05, "loss": 0.9679, "step": 40265 }, { "epoch": 12.05, "grad_norm": 2.5258116722106934, "learning_rate": 1.7107138806774398e-05, "loss": 1.0447, "step": 40270 }, { "epoch": 12.05, "grad_norm": 1.7278646230697632, "learning_rate": 1.7101564306162546e-05, "loss": 1.0542, "step": 40275 }, { "epoch": 12.05, "grad_norm": 3.5281360149383545, "learning_rate": 1.7095990241773076e-05, "loss": 0.9029, "step": 40280 }, { "epoch": 12.05, "grad_norm": 2.533038854598999, "learning_rate": 1.7090416613913863e-05, "loss": 1.1102, "step": 40285 }, { "epoch": 12.05, "grad_norm": 2.665400505065918, "learning_rate": 1.7084843422892705e-05, "loss": 0.9122, "step": 40290 }, { "epoch": 12.06, "grad_norm": 3.1313700675964355, "learning_rate": 1.7079270669017422e-05, "loss": 1.0415, "step": 40295 }, { "epoch": 12.06, "grad_norm": 1.9384442567825317, "learning_rate": 1.707369835259579e-05, "loss": 0.8205, "step": 40300 }, { "epoch": 12.06, "grad_norm": 3.2962498664855957, "learning_rate": 1.7068126473935552e-05, "loss": 1.1216, "step": 40305 }, { "epoch": 12.06, "grad_norm": 4.275677680969238, "learning_rate": 1.7062555033344457e-05, "loss": 0.9877, "step": 40310 }, { "epoch": 12.06, "grad_norm": 2.7179315090179443, "learning_rate": 1.705698403113018e-05, "loss": 0.8647, "step": 40315 }, { "epoch": 12.06, "grad_norm": 2.34228515625, "learning_rate": 1.7051413467600435e-05, "loss": 0.9374, "step": 40320 }, { "epoch": 12.06, "grad_norm": 3.802971124649048, "learning_rate": 1.704584334306285e-05, "loss": 0.9489, "step": 40325 }, { "epoch": 12.07, "grad_norm": 3.335583448410034, "learning_rate": 1.704027365782508e-05, "loss": 1.0729, "step": 40330 }, { "epoch": 12.07, "grad_norm": 2.546466827392578, "learning_rate": 1.7034704412194722e-05, "loss": 0.991, "step": 40335 }, { "epoch": 12.07, "grad_norm": 3.204293727874756, "learning_rate": 1.7029135606479346e-05, "loss": 1.137, "step": 40340 }, { "epoch": 12.07, "grad_norm": 4.195864677429199, "learning_rate": 1.702356724098654e-05, "loss": 0.9883, "step": 40345 }, { "epoch": 12.07, "grad_norm": 2.6011598110198975, "learning_rate": 1.7017999316023814e-05, "loss": 1.0274, "step": 40350 }, { "epoch": 12.07, "grad_norm": 1.9285248517990112, "learning_rate": 1.7012431831898696e-05, "loss": 0.9519, "step": 40355 }, { "epoch": 12.08, "grad_norm": 1.2027796506881714, "learning_rate": 1.700686478891867e-05, "loss": 1.2077, "step": 40360 }, { "epoch": 12.08, "grad_norm": 13.604466438293457, "learning_rate": 1.700129818739118e-05, "loss": 0.802, "step": 40365 }, { "epoch": 12.08, "grad_norm": 2.9307985305786133, "learning_rate": 1.6995732027623677e-05, "loss": 1.0621, "step": 40370 }, { "epoch": 12.08, "grad_norm": 3.506657838821411, "learning_rate": 1.6990166309923584e-05, "loss": 1.0037, "step": 40375 }, { "epoch": 12.08, "grad_norm": 3.7252209186553955, "learning_rate": 1.6984601034598273e-05, "loss": 1.0522, "step": 40380 }, { "epoch": 12.08, "grad_norm": 3.6333189010620117, "learning_rate": 1.6979036201955118e-05, "loss": 1.1053, "step": 40385 }, { "epoch": 12.08, "grad_norm": 1.9753819704055786, "learning_rate": 1.6973471812301456e-05, "loss": 0.9674, "step": 40390 }, { "epoch": 12.09, "grad_norm": 1.905187964439392, "learning_rate": 1.6967907865944608e-05, "loss": 1.0107, "step": 40395 }, { "epoch": 12.09, "grad_norm": 4.743467807769775, "learning_rate": 1.6962344363191846e-05, "loss": 0.8817, "step": 40400 }, { "epoch": 12.09, "grad_norm": 1.3228265047073364, "learning_rate": 1.6956781304350466e-05, "loss": 0.9523, "step": 40405 }, { "epoch": 12.09, "grad_norm": 2.1767237186431885, "learning_rate": 1.695121868972768e-05, "loss": 1.177, "step": 40410 }, { "epoch": 12.09, "grad_norm": 2.85136342048645, "learning_rate": 1.694565651963073e-05, "loss": 1.2009, "step": 40415 }, { "epoch": 12.09, "grad_norm": 1.4177536964416504, "learning_rate": 1.694009479436679e-05, "loss": 1.0331, "step": 40420 }, { "epoch": 12.09, "grad_norm": 3.8098344802856445, "learning_rate": 1.6934533514243046e-05, "loss": 1.0316, "step": 40425 }, { "epoch": 12.1, "grad_norm": 2.5896317958831787, "learning_rate": 1.6928972679566633e-05, "loss": 1.1835, "step": 40430 }, { "epoch": 12.1, "grad_norm": 1.0999228954315186, "learning_rate": 1.692341229064466e-05, "loss": 1.1264, "step": 40435 }, { "epoch": 12.1, "grad_norm": 3.237612009048462, "learning_rate": 1.691785234778424e-05, "loss": 0.9797, "step": 40440 }, { "epoch": 12.1, "grad_norm": 2.606142520904541, "learning_rate": 1.691229285129242e-05, "loss": 0.9886, "step": 40445 }, { "epoch": 12.1, "grad_norm": 5.350770950317383, "learning_rate": 1.6906733801476275e-05, "loss": 0.9763, "step": 40450 }, { "epoch": 12.1, "grad_norm": 1.0530993938446045, "learning_rate": 1.69011751986428e-05, "loss": 0.9137, "step": 40455 }, { "epoch": 12.11, "grad_norm": 5.592156887054443, "learning_rate": 1.6895617043099006e-05, "loss": 1.0207, "step": 40460 }, { "epoch": 12.11, "grad_norm": 2.4852864742279053, "learning_rate": 1.6890059335151856e-05, "loss": 1.008, "step": 40465 }, { "epoch": 12.11, "grad_norm": 2.4040777683258057, "learning_rate": 1.6884502075108298e-05, "loss": 1.0699, "step": 40470 }, { "epoch": 12.11, "grad_norm": 2.1579723358154297, "learning_rate": 1.687894526327526e-05, "loss": 0.9733, "step": 40475 }, { "epoch": 12.11, "grad_norm": 3.1468398571014404, "learning_rate": 1.6873388899959625e-05, "loss": 1.0083, "step": 40480 }, { "epoch": 12.11, "grad_norm": 2.4095633029937744, "learning_rate": 1.686783298546828e-05, "loss": 1.0388, "step": 40485 }, { "epoch": 12.11, "grad_norm": 3.516322612762451, "learning_rate": 1.686227752010807e-05, "loss": 1.0025, "step": 40490 }, { "epoch": 12.12, "grad_norm": 4.202288627624512, "learning_rate": 1.6856722504185802e-05, "loss": 1.0996, "step": 40495 }, { "epoch": 12.12, "grad_norm": 3.198098659515381, "learning_rate": 1.68511679380083e-05, "loss": 0.8971, "step": 40500 }, { "epoch": 12.12, "grad_norm": 6.3249993324279785, "learning_rate": 1.684561382188231e-05, "loss": 0.9876, "step": 40505 }, { "epoch": 12.12, "grad_norm": 1.2453722953796387, "learning_rate": 1.68400601561146e-05, "loss": 0.9331, "step": 40510 }, { "epoch": 12.12, "grad_norm": 3.7218070030212402, "learning_rate": 1.683450694101188e-05, "loss": 0.9366, "step": 40515 }, { "epoch": 12.12, "grad_norm": 2.971968412399292, "learning_rate": 1.682895417688086e-05, "loss": 1.0184, "step": 40520 }, { "epoch": 12.12, "grad_norm": 2.1106045246124268, "learning_rate": 1.682340186402821e-05, "loss": 0.9095, "step": 40525 }, { "epoch": 12.13, "grad_norm": 1.3278517723083496, "learning_rate": 1.6817850002760565e-05, "loss": 1.1384, "step": 40530 }, { "epoch": 12.13, "grad_norm": 1.8929804563522339, "learning_rate": 1.6812298593384574e-05, "loss": 1.029, "step": 40535 }, { "epoch": 12.13, "grad_norm": 1.8360497951507568, "learning_rate": 1.6806747636206804e-05, "loss": 0.9871, "step": 40540 }, { "epoch": 12.13, "grad_norm": 1.8714280128479004, "learning_rate": 1.680119713153386e-05, "loss": 1.0098, "step": 40545 }, { "epoch": 12.13, "grad_norm": 2.2284882068634033, "learning_rate": 1.6795647079672262e-05, "loss": 0.9422, "step": 40550 }, { "epoch": 12.13, "grad_norm": 2.5915369987487793, "learning_rate": 1.6790097480928562e-05, "loss": 0.9804, "step": 40555 }, { "epoch": 12.14, "grad_norm": 1.8112045526504517, "learning_rate": 1.678454833560924e-05, "loss": 0.8877, "step": 40560 }, { "epoch": 12.14, "grad_norm": 2.2570388317108154, "learning_rate": 1.677899964402077e-05, "loss": 0.8855, "step": 40565 }, { "epoch": 12.14, "grad_norm": 2.4070281982421875, "learning_rate": 1.6773451406469607e-05, "loss": 1.2104, "step": 40570 }, { "epoch": 12.14, "grad_norm": 1.3471062183380127, "learning_rate": 1.6767903623262168e-05, "loss": 1.1283, "step": 40575 }, { "epoch": 12.14, "grad_norm": 1.2536911964416504, "learning_rate": 1.6762356294704863e-05, "loss": 1.0132, "step": 40580 }, { "epoch": 12.14, "grad_norm": 4.913021564483643, "learning_rate": 1.675680942110406e-05, "loss": 1.1661, "step": 40585 }, { "epoch": 12.14, "grad_norm": 3.420980215072632, "learning_rate": 1.675126300276609e-05, "loss": 0.7853, "step": 40590 }, { "epoch": 12.15, "grad_norm": 4.242785930633545, "learning_rate": 1.6745717039997303e-05, "loss": 0.8756, "step": 40595 }, { "epoch": 12.15, "grad_norm": 4.858880519866943, "learning_rate": 1.6740171533103974e-05, "loss": 0.894, "step": 40600 }, { "epoch": 12.15, "grad_norm": 1.5555437803268433, "learning_rate": 1.6734626482392397e-05, "loss": 0.9653, "step": 40605 }, { "epoch": 12.15, "grad_norm": 2.0157172679901123, "learning_rate": 1.6729081888168794e-05, "loss": 1.0733, "step": 40610 }, { "epoch": 12.15, "grad_norm": 5.87971830368042, "learning_rate": 1.6723537750739414e-05, "loss": 1.0843, "step": 40615 }, { "epoch": 12.15, "grad_norm": 3.524643659591675, "learning_rate": 1.6717994070410442e-05, "loss": 1.0036, "step": 40620 }, { "epoch": 12.15, "grad_norm": 3.6206631660461426, "learning_rate": 1.6712450847488037e-05, "loss": 1.038, "step": 40625 }, { "epoch": 12.16, "grad_norm": 3.202336072921753, "learning_rate": 1.6706908082278368e-05, "loss": 1.021, "step": 40630 }, { "epoch": 12.16, "grad_norm": 1.4924439191818237, "learning_rate": 1.6701365775087534e-05, "loss": 1.123, "step": 40635 }, { "epoch": 12.16, "grad_norm": 2.8223538398742676, "learning_rate": 1.669582392622165e-05, "loss": 1.1115, "step": 40640 }, { "epoch": 12.16, "grad_norm": 4.758267879486084, "learning_rate": 1.6690282535986775e-05, "loss": 1.0646, "step": 40645 }, { "epoch": 12.16, "grad_norm": 3.9418559074401855, "learning_rate": 1.6684741604688962e-05, "loss": 1.0262, "step": 40650 }, { "epoch": 12.16, "grad_norm": 2.5239322185516357, "learning_rate": 1.6679201132634227e-05, "loss": 0.9649, "step": 40655 }, { "epoch": 12.17, "grad_norm": 2.7712440490722656, "learning_rate": 1.667366112012856e-05, "loss": 0.9846, "step": 40660 }, { "epoch": 12.17, "grad_norm": 1.613471269607544, "learning_rate": 1.666812156747794e-05, "loss": 0.9812, "step": 40665 }, { "epoch": 12.17, "grad_norm": 3.2598836421966553, "learning_rate": 1.6662582474988297e-05, "loss": 1.1001, "step": 40670 }, { "epoch": 12.17, "grad_norm": 4.2924346923828125, "learning_rate": 1.665704384296557e-05, "loss": 1.0433, "step": 40675 }, { "epoch": 12.17, "grad_norm": 3.4734883308410645, "learning_rate": 1.6651505671715628e-05, "loss": 1.083, "step": 40680 }, { "epoch": 12.17, "grad_norm": 3.842501401901245, "learning_rate": 1.6645967961544357e-05, "loss": 0.8934, "step": 40685 }, { "epoch": 12.17, "grad_norm": 3.126307725906372, "learning_rate": 1.6640430712757594e-05, "loss": 0.8845, "step": 40690 }, { "epoch": 12.18, "grad_norm": 3.3595333099365234, "learning_rate": 1.6634893925661142e-05, "loss": 1.0184, "step": 40695 }, { "epoch": 12.18, "grad_norm": 2.7751407623291016, "learning_rate": 1.662935760056082e-05, "loss": 0.8099, "step": 40700 }, { "epoch": 12.18, "grad_norm": 4.374361038208008, "learning_rate": 1.6623821737762362e-05, "loss": 0.89, "step": 40705 }, { "epoch": 12.18, "grad_norm": 3.8457541465759277, "learning_rate": 1.6618286337571532e-05, "loss": 1.0311, "step": 40710 }, { "epoch": 12.18, "grad_norm": 4.070710182189941, "learning_rate": 1.661275140029404e-05, "loss": 1.0774, "step": 40715 }, { "epoch": 12.18, "grad_norm": 1.868833303451538, "learning_rate": 1.6607216926235552e-05, "loss": 1.1075, "step": 40720 }, { "epoch": 12.18, "grad_norm": 2.962254524230957, "learning_rate": 1.6601682915701767e-05, "loss": 0.7483, "step": 40725 }, { "epoch": 12.19, "grad_norm": 1.097212553024292, "learning_rate": 1.659614936899829e-05, "loss": 0.9267, "step": 40730 }, { "epoch": 12.19, "grad_norm": 1.6510802507400513, "learning_rate": 1.6590616286430754e-05, "loss": 1.0181, "step": 40735 }, { "epoch": 12.19, "grad_norm": 2.0439059734344482, "learning_rate": 1.658508366830474e-05, "loss": 1.1101, "step": 40740 }, { "epoch": 12.19, "grad_norm": 3.3008670806884766, "learning_rate": 1.6579551514925812e-05, "loss": 1.0038, "step": 40745 }, { "epoch": 12.19, "grad_norm": 1.4634921550750732, "learning_rate": 1.65740198265995e-05, "loss": 1.0533, "step": 40750 }, { "epoch": 12.19, "grad_norm": 2.8518662452697754, "learning_rate": 1.656848860363131e-05, "loss": 1.0689, "step": 40755 }, { "epoch": 12.19, "grad_norm": 2.9406023025512695, "learning_rate": 1.6562957846326738e-05, "loss": 1.1724, "step": 40760 }, { "epoch": 12.2, "grad_norm": 4.297204971313477, "learning_rate": 1.6557427554991222e-05, "loss": 1.0288, "step": 40765 }, { "epoch": 12.2, "grad_norm": 1.472741723060608, "learning_rate": 1.655189772993022e-05, "loss": 1.1269, "step": 40770 }, { "epoch": 12.2, "grad_norm": 2.547549247741699, "learning_rate": 1.6546368371449115e-05, "loss": 1.1671, "step": 40775 }, { "epoch": 12.2, "grad_norm": 2.158825159072876, "learning_rate": 1.6540839479853305e-05, "loss": 1.0053, "step": 40780 }, { "epoch": 12.2, "grad_norm": 2.1777453422546387, "learning_rate": 1.653531105544814e-05, "loss": 1.0576, "step": 40785 }, { "epoch": 12.2, "grad_norm": 3.722043991088867, "learning_rate": 1.6529783098538937e-05, "loss": 1.0552, "step": 40790 }, { "epoch": 12.21, "grad_norm": 9.3829984664917, "learning_rate": 1.6524255609431018e-05, "loss": 0.8906, "step": 40795 }, { "epoch": 12.21, "grad_norm": 1.8850293159484863, "learning_rate": 1.6518728588429643e-05, "loss": 0.9639, "step": 40800 }, { "epoch": 12.21, "grad_norm": 4.943428993225098, "learning_rate": 1.651320203584008e-05, "loss": 0.8564, "step": 40805 }, { "epoch": 12.21, "grad_norm": 1.9969452619552612, "learning_rate": 1.650767595196754e-05, "loss": 0.9259, "step": 40810 }, { "epoch": 12.21, "grad_norm": 3.900078773498535, "learning_rate": 1.650215033711724e-05, "loss": 1.0503, "step": 40815 }, { "epoch": 12.21, "grad_norm": 6.483584880828857, "learning_rate": 1.6496625191594335e-05, "loss": 1.1011, "step": 40820 }, { "epoch": 12.21, "grad_norm": 3.2015042304992676, "learning_rate": 1.6491100515703984e-05, "loss": 0.9268, "step": 40825 }, { "epoch": 12.22, "grad_norm": 1.9813512563705444, "learning_rate": 1.6485576309751304e-05, "loss": 0.7646, "step": 40830 }, { "epoch": 12.22, "grad_norm": 2.6845803260803223, "learning_rate": 1.6480052574041398e-05, "loss": 1.1781, "step": 40835 }, { "epoch": 12.22, "grad_norm": 5.457139492034912, "learning_rate": 1.647452930887933e-05, "loss": 1.0875, "step": 40840 }, { "epoch": 12.22, "grad_norm": 2.0619680881500244, "learning_rate": 1.6469006514570158e-05, "loss": 1.136, "step": 40845 }, { "epoch": 12.22, "grad_norm": 3.572672128677368, "learning_rate": 1.646348419141887e-05, "loss": 0.893, "step": 40850 }, { "epoch": 12.22, "grad_norm": 3.076113700866699, "learning_rate": 1.6457962339730492e-05, "loss": 0.948, "step": 40855 }, { "epoch": 12.22, "grad_norm": 2.0957534313201904, "learning_rate": 1.6452440959809965e-05, "loss": 0.9835, "step": 40860 }, { "epoch": 12.23, "grad_norm": 5.54816198348999, "learning_rate": 1.6446920051962247e-05, "loss": 0.9175, "step": 40865 }, { "epoch": 12.23, "grad_norm": 2.4465866088867188, "learning_rate": 1.6441399616492238e-05, "loss": 1.0556, "step": 40870 }, { "epoch": 12.23, "grad_norm": 2.606534957885742, "learning_rate": 1.6435879653704835e-05, "loss": 1.0207, "step": 40875 }, { "epoch": 12.23, "grad_norm": 2.173264980316162, "learning_rate": 1.6430360163904902e-05, "loss": 1.1046, "step": 40880 }, { "epoch": 12.23, "grad_norm": 2.238013744354248, "learning_rate": 1.6424841147397256e-05, "loss": 1.0992, "step": 40885 }, { "epoch": 12.23, "grad_norm": 1.3038547039031982, "learning_rate": 1.6419322604486737e-05, "loss": 0.9191, "step": 40890 }, { "epoch": 12.24, "grad_norm": 1.2264683246612549, "learning_rate": 1.6413804535478095e-05, "loss": 1.0838, "step": 40895 }, { "epoch": 12.24, "grad_norm": 4.56827974319458, "learning_rate": 1.6408286940676114e-05, "loss": 0.986, "step": 40900 }, { "epoch": 12.24, "grad_norm": 1.0923278331756592, "learning_rate": 1.6402769820385504e-05, "loss": 1.0315, "step": 40905 }, { "epoch": 12.24, "grad_norm": 1.2232002019882202, "learning_rate": 1.6397253174910997e-05, "loss": 1.0501, "step": 40910 }, { "epoch": 12.24, "grad_norm": 1.501046895980835, "learning_rate": 1.6391737004557246e-05, "loss": 0.9132, "step": 40915 }, { "epoch": 12.24, "grad_norm": 0.9044210910797119, "learning_rate": 1.638622130962891e-05, "loss": 1.017, "step": 40920 }, { "epoch": 12.24, "grad_norm": 2.844947576522827, "learning_rate": 1.638070609043062e-05, "loss": 0.9969, "step": 40925 }, { "epoch": 12.25, "grad_norm": 4.111929893493652, "learning_rate": 1.637519134726697e-05, "loss": 1.0534, "step": 40930 }, { "epoch": 12.25, "grad_norm": 1.8732011318206787, "learning_rate": 1.636967708044254e-05, "loss": 0.888, "step": 40935 }, { "epoch": 12.25, "grad_norm": 2.614795446395874, "learning_rate": 1.636416329026188e-05, "loss": 0.9893, "step": 40940 }, { "epoch": 12.25, "grad_norm": 2.9741506576538086, "learning_rate": 1.6358649977029493e-05, "loss": 1.0112, "step": 40945 }, { "epoch": 12.25, "grad_norm": 4.53338098526001, "learning_rate": 1.63531371410499e-05, "loss": 0.9482, "step": 40950 }, { "epoch": 12.25, "grad_norm": 1.6804264783859253, "learning_rate": 1.634762478262754e-05, "loss": 0.9047, "step": 40955 }, { "epoch": 12.25, "grad_norm": 5.280777931213379, "learning_rate": 1.634211290206688e-05, "loss": 0.8929, "step": 40960 }, { "epoch": 12.26, "grad_norm": 2.2232463359832764, "learning_rate": 1.6336601499672316e-05, "loss": 1.0002, "step": 40965 }, { "epoch": 12.26, "grad_norm": 3.6429049968719482, "learning_rate": 1.633109057574826e-05, "loss": 0.9099, "step": 40970 }, { "epoch": 12.26, "grad_norm": 4.826415538787842, "learning_rate": 1.6325580130599054e-05, "loss": 0.9408, "step": 40975 }, { "epoch": 12.26, "grad_norm": 1.3283343315124512, "learning_rate": 1.6320070164529033e-05, "loss": 0.9633, "step": 40980 }, { "epoch": 12.26, "grad_norm": 4.262852191925049, "learning_rate": 1.6314560677842526e-05, "loss": 1.0573, "step": 40985 }, { "epoch": 12.26, "grad_norm": 1.9915564060211182, "learning_rate": 1.6309051670843794e-05, "loss": 0.9936, "step": 40990 }, { "epoch": 12.27, "grad_norm": 2.673431634902954, "learning_rate": 1.6303543143837113e-05, "loss": 0.9722, "step": 40995 }, { "epoch": 12.27, "grad_norm": 2.525904417037964, "learning_rate": 1.6298035097126698e-05, "loss": 1.1012, "step": 41000 }, { "epoch": 12.27, "grad_norm": 2.020827054977417, "learning_rate": 1.629252753101677e-05, "loss": 1.0946, "step": 41005 }, { "epoch": 12.27, "grad_norm": 2.497849941253662, "learning_rate": 1.6287020445811485e-05, "loss": 1.1731, "step": 41010 }, { "epoch": 12.27, "grad_norm": 2.420525312423706, "learning_rate": 1.6281513841815006e-05, "loss": 0.9534, "step": 41015 }, { "epoch": 12.27, "grad_norm": 1.6529130935668945, "learning_rate": 1.627600771933146e-05, "loss": 1.0972, "step": 41020 }, { "epoch": 12.27, "grad_norm": 2.2307538986206055, "learning_rate": 1.6270502078664927e-05, "loss": 0.9837, "step": 41025 }, { "epoch": 12.28, "grad_norm": 4.9482269287109375, "learning_rate": 1.6264996920119507e-05, "loss": 0.7897, "step": 41030 }, { "epoch": 12.28, "grad_norm": 1.9426223039627075, "learning_rate": 1.6259492243999215e-05, "loss": 1.088, "step": 41035 }, { "epoch": 12.28, "grad_norm": 2.4098875522613525, "learning_rate": 1.6253988050608092e-05, "loss": 1.014, "step": 41040 }, { "epoch": 12.28, "grad_norm": 3.1415224075317383, "learning_rate": 1.6248484340250114e-05, "loss": 1.0839, "step": 41045 }, { "epoch": 12.28, "grad_norm": 3.377718687057495, "learning_rate": 1.6242981113229245e-05, "loss": 0.8829, "step": 41050 }, { "epoch": 12.28, "grad_norm": 2.4279189109802246, "learning_rate": 1.6237478369849433e-05, "loss": 1.1367, "step": 41055 }, { "epoch": 12.28, "grad_norm": 5.114649295806885, "learning_rate": 1.6231976110414574e-05, "loss": 1.0176, "step": 41060 }, { "epoch": 12.29, "grad_norm": 3.2667107582092285, "learning_rate": 1.622647433522857e-05, "loss": 1.0747, "step": 41065 }, { "epoch": 12.29, "grad_norm": 3.784885883331299, "learning_rate": 1.6220973044595267e-05, "loss": 1.0822, "step": 41070 }, { "epoch": 12.29, "grad_norm": 2.1055243015289307, "learning_rate": 1.621547223881849e-05, "loss": 1.1663, "step": 41075 }, { "epoch": 12.29, "grad_norm": 1.5692771673202515, "learning_rate": 1.620997191820206e-05, "loss": 0.9983, "step": 41080 }, { "epoch": 12.29, "grad_norm": 2.18072772026062, "learning_rate": 1.620447208304973e-05, "loss": 0.805, "step": 41085 }, { "epoch": 12.29, "grad_norm": 6.3626933097839355, "learning_rate": 1.6198972733665284e-05, "loss": 0.8722, "step": 41090 }, { "epoch": 12.3, "grad_norm": 2.6465694904327393, "learning_rate": 1.6193473870352408e-05, "loss": 1.124, "step": 41095 }, { "epoch": 12.3, "grad_norm": 1.850497841835022, "learning_rate": 1.6187975493414825e-05, "loss": 0.909, "step": 41100 }, { "epoch": 12.3, "grad_norm": 3.717559576034546, "learning_rate": 1.61824776031562e-05, "loss": 0.9344, "step": 41105 }, { "epoch": 12.3, "grad_norm": 1.314609408378601, "learning_rate": 1.6176980199880158e-05, "loss": 0.9273, "step": 41110 }, { "epoch": 12.3, "grad_norm": 1.5568089485168457, "learning_rate": 1.6171483283890342e-05, "loss": 1.0734, "step": 41115 }, { "epoch": 12.3, "grad_norm": 4.105639457702637, "learning_rate": 1.6165986855490316e-05, "loss": 0.9675, "step": 41120 }, { "epoch": 12.3, "grad_norm": 4.530340194702148, "learning_rate": 1.6160490914983667e-05, "loss": 1.1482, "step": 41125 }, { "epoch": 12.31, "grad_norm": Infinity, "learning_rate": 1.6156094514065535e-05, "loss": 1.1886, "step": 41130 }, { "epoch": 12.31, "grad_norm": 6.879144191741943, "learning_rate": 1.615059945253183e-05, "loss": 1.0541, "step": 41135 }, { "epoch": 12.31, "grad_norm": 5.159574508666992, "learning_rate": 1.6145104879741307e-05, "loss": 1.0869, "step": 41140 }, { "epoch": 12.31, "grad_norm": 3.31752872467041, "learning_rate": 1.6139610795997448e-05, "loss": 0.8379, "step": 41145 }, { "epoch": 12.31, "grad_norm": 1.4643511772155762, "learning_rate": 1.6134117201603662e-05, "loss": 1.0015, "step": 41150 }, { "epoch": 12.31, "grad_norm": 1.95048189163208, "learning_rate": 1.612862409686338e-05, "loss": 1.1556, "step": 41155 }, { "epoch": 12.31, "grad_norm": 3.2634193897247314, "learning_rate": 1.6123131482079962e-05, "loss": 1.2138, "step": 41160 }, { "epoch": 12.32, "grad_norm": 2.3244948387145996, "learning_rate": 1.6117639357556767e-05, "loss": 1.0905, "step": 41165 }, { "epoch": 12.32, "grad_norm": 5.133475303649902, "learning_rate": 1.6112147723597116e-05, "loss": 1.0629, "step": 41170 }, { "epoch": 12.32, "grad_norm": 2.374720811843872, "learning_rate": 1.610665658050431e-05, "loss": 1.0795, "step": 41175 }, { "epoch": 12.32, "grad_norm": 2.306103229522705, "learning_rate": 1.6101165928581612e-05, "loss": 1.022, "step": 41180 }, { "epoch": 12.32, "grad_norm": 2.1258509159088135, "learning_rate": 1.6095675768132273e-05, "loss": 0.8965, "step": 41185 }, { "epoch": 12.32, "grad_norm": 4.365717887878418, "learning_rate": 1.6090186099459505e-05, "loss": 1.1589, "step": 41190 }, { "epoch": 12.33, "grad_norm": 3.533360481262207, "learning_rate": 1.6084696922866504e-05, "loss": 0.9771, "step": 41195 }, { "epoch": 12.33, "grad_norm": 1.57905113697052, "learning_rate": 1.6079208238656414e-05, "loss": 1.1706, "step": 41200 }, { "epoch": 12.33, "grad_norm": 3.772719144821167, "learning_rate": 1.607372004713239e-05, "loss": 0.9388, "step": 41205 }, { "epoch": 12.33, "grad_norm": 2.236943006515503, "learning_rate": 1.606823234859752e-05, "loss": 1.0367, "step": 41210 }, { "epoch": 12.33, "grad_norm": 1.4967836141586304, "learning_rate": 1.6062745143354903e-05, "loss": 0.9601, "step": 41215 }, { "epoch": 12.33, "grad_norm": 1.7550181150436401, "learning_rate": 1.6057258431707585e-05, "loss": 1.1271, "step": 41220 }, { "epoch": 12.33, "grad_norm": 1.6419475078582764, "learning_rate": 1.6051772213958575e-05, "loss": 1.1171, "step": 41225 }, { "epoch": 12.34, "grad_norm": 3.1746506690979004, "learning_rate": 1.6046286490410895e-05, "loss": 0.9319, "step": 41230 }, { "epoch": 12.34, "grad_norm": 4.4260993003845215, "learning_rate": 1.6040801261367493e-05, "loss": 0.9774, "step": 41235 }, { "epoch": 12.34, "grad_norm": 1.5141162872314453, "learning_rate": 1.603531652713134e-05, "loss": 1.0768, "step": 41240 }, { "epoch": 12.34, "grad_norm": 1.775514841079712, "learning_rate": 1.602983228800532e-05, "loss": 0.874, "step": 41245 }, { "epoch": 12.34, "grad_norm": 5.238076210021973, "learning_rate": 1.6024348544292357e-05, "loss": 0.9991, "step": 41250 }, { "epoch": 12.34, "grad_norm": 2.956698417663574, "learning_rate": 1.6018865296295284e-05, "loss": 1.137, "step": 41255 }, { "epoch": 12.34, "grad_norm": 3.052151918411255, "learning_rate": 1.6013382544316947e-05, "loss": 1.0055, "step": 41260 }, { "epoch": 12.35, "grad_norm": 7.823723316192627, "learning_rate": 1.6007900288660148e-05, "loss": 0.9589, "step": 41265 }, { "epoch": 12.35, "grad_norm": 2.2807137966156006, "learning_rate": 1.6002418529627673e-05, "loss": 0.9707, "step": 41270 }, { "epoch": 12.35, "grad_norm": 5.3980207443237305, "learning_rate": 1.5996937267522265e-05, "loss": 1.0007, "step": 41275 }, { "epoch": 12.35, "grad_norm": 4.970637321472168, "learning_rate": 1.5991456502646658e-05, "loss": 0.8831, "step": 41280 }, { "epoch": 12.35, "grad_norm": 2.0372869968414307, "learning_rate": 1.5985976235303547e-05, "loss": 1.0731, "step": 41285 }, { "epoch": 12.35, "grad_norm": 1.5453438758850098, "learning_rate": 1.59804964657956e-05, "loss": 1.1497, "step": 41290 }, { "epoch": 12.35, "grad_norm": 3.3459970951080322, "learning_rate": 1.5975017194425448e-05, "loss": 0.9553, "step": 41295 }, { "epoch": 12.36, "grad_norm": 1.7910807132720947, "learning_rate": 1.5969538421495728e-05, "loss": 0.9187, "step": 41300 }, { "epoch": 12.36, "grad_norm": 2.4004948139190674, "learning_rate": 1.5964060147309e-05, "loss": 0.8181, "step": 41305 }, { "epoch": 12.36, "grad_norm": 3.5690810680389404, "learning_rate": 1.5958582372167853e-05, "loss": 1.1377, "step": 41310 }, { "epoch": 12.36, "grad_norm": 7.011974334716797, "learning_rate": 1.5953105096374797e-05, "loss": 0.9167, "step": 41315 }, { "epoch": 12.36, "grad_norm": 2.1069743633270264, "learning_rate": 1.594762832023234e-05, "loss": 1.0594, "step": 41320 }, { "epoch": 12.36, "grad_norm": 1.5728492736816406, "learning_rate": 1.594215204404297e-05, "loss": 1.1337, "step": 41325 }, { "epoch": 12.37, "grad_norm": 3.168928384780884, "learning_rate": 1.5936676268109113e-05, "loss": 0.933, "step": 41330 }, { "epoch": 12.37, "grad_norm": 1.6599355936050415, "learning_rate": 1.5931200992733217e-05, "loss": 1.1395, "step": 41335 }, { "epoch": 12.37, "grad_norm": 3.265467643737793, "learning_rate": 1.5925726218217653e-05, "loss": 1.0227, "step": 41340 }, { "epoch": 12.37, "grad_norm": 3.9105184078216553, "learning_rate": 1.5920251944864812e-05, "loss": 1.0851, "step": 41345 }, { "epoch": 12.37, "grad_norm": 7.27264928817749, "learning_rate": 1.5914778172977008e-05, "loss": 1.0477, "step": 41350 }, { "epoch": 12.37, "grad_norm": 3.6267623901367188, "learning_rate": 1.5909304902856563e-05, "loss": 1.0303, "step": 41355 }, { "epoch": 12.37, "grad_norm": 3.7175920009613037, "learning_rate": 1.590383213480576e-05, "loss": 1.0979, "step": 41360 }, { "epoch": 12.38, "grad_norm": 1.885209321975708, "learning_rate": 1.589835986912685e-05, "loss": 1.1829, "step": 41365 }, { "epoch": 12.38, "grad_norm": 4.020153045654297, "learning_rate": 1.5892888106122073e-05, "loss": 1.1045, "step": 41370 }, { "epoch": 12.38, "grad_norm": 1.572926640510559, "learning_rate": 1.5887416846093605e-05, "loss": 0.992, "step": 41375 }, { "epoch": 12.38, "grad_norm": 3.5849032402038574, "learning_rate": 1.5881946089343646e-05, "loss": 1.1051, "step": 41380 }, { "epoch": 12.38, "grad_norm": 2.3222670555114746, "learning_rate": 1.5876475836174328e-05, "loss": 0.9915, "step": 41385 }, { "epoch": 12.38, "grad_norm": 2.578094482421875, "learning_rate": 1.5871006086887756e-05, "loss": 0.9607, "step": 41390 }, { "epoch": 12.38, "grad_norm": 4.564905166625977, "learning_rate": 1.586553684178604e-05, "loss": 1.0403, "step": 41395 }, { "epoch": 12.39, "grad_norm": 4.259903907775879, "learning_rate": 1.5860068101171214e-05, "loss": 0.9894, "step": 41400 }, { "epoch": 12.39, "grad_norm": 3.7057387828826904, "learning_rate": 1.5854599865345342e-05, "loss": 1.1087, "step": 41405 }, { "epoch": 12.39, "grad_norm": 2.632147789001465, "learning_rate": 1.58491321346104e-05, "loss": 1.029, "step": 41410 }, { "epoch": 12.39, "grad_norm": 1.1085238456726074, "learning_rate": 1.5843664909268392e-05, "loss": 1.0203, "step": 41415 }, { "epoch": 12.39, "grad_norm": 3.5292742252349854, "learning_rate": 1.583819818962125e-05, "loss": 0.9154, "step": 41420 }, { "epoch": 12.39, "grad_norm": 4.0045552253723145, "learning_rate": 1.5832731975970887e-05, "loss": 1.0444, "step": 41425 }, { "epoch": 12.4, "grad_norm": 1.9779725074768066, "learning_rate": 1.5827266268619224e-05, "loss": 1.0704, "step": 41430 }, { "epoch": 12.4, "grad_norm": 4.693383693695068, "learning_rate": 1.5821801067868096e-05, "loss": 0.9821, "step": 41435 }, { "epoch": 12.4, "grad_norm": 1.2160636186599731, "learning_rate": 1.5816336374019363e-05, "loss": 0.8789, "step": 41440 }, { "epoch": 12.4, "grad_norm": 1.0221775770187378, "learning_rate": 1.581087218737483e-05, "loss": 1.0346, "step": 41445 }, { "epoch": 12.4, "grad_norm": 3.203967809677124, "learning_rate": 1.5805408508236263e-05, "loss": 0.8631, "step": 41450 }, { "epoch": 12.4, "grad_norm": 10.920624732971191, "learning_rate": 1.5799945336905438e-05, "loss": 0.9483, "step": 41455 }, { "epoch": 12.4, "grad_norm": 1.881516456604004, "learning_rate": 1.5794482673684056e-05, "loss": 1.0587, "step": 41460 }, { "epoch": 12.41, "grad_norm": 3.7939016819000244, "learning_rate": 1.5789020518873842e-05, "loss": 1.1071, "step": 41465 }, { "epoch": 12.41, "grad_norm": 2.4270174503326416, "learning_rate": 1.5783558872776438e-05, "loss": 1.1607, "step": 41470 }, { "epoch": 12.41, "grad_norm": 3.0483105182647705, "learning_rate": 1.5778097735693508e-05, "loss": 0.9586, "step": 41475 }, { "epoch": 12.41, "grad_norm": 3.0462000370025635, "learning_rate": 1.5772637107926658e-05, "loss": 1.072, "step": 41480 }, { "epoch": 12.41, "grad_norm": 1.6828171014785767, "learning_rate": 1.5767176989777455e-05, "loss": 1.138, "step": 41485 }, { "epoch": 12.41, "grad_norm": 3.1212778091430664, "learning_rate": 1.576171738154748e-05, "loss": 0.9104, "step": 41490 }, { "epoch": 12.41, "grad_norm": 3.6774206161499023, "learning_rate": 1.5756258283538243e-05, "loss": 0.9859, "step": 41495 }, { "epoch": 12.42, "grad_norm": 5.639192581176758, "learning_rate": 1.575079969605126e-05, "loss": 1.0641, "step": 41500 }, { "epoch": 12.42, "grad_norm": 2.7606070041656494, "learning_rate": 1.5745341619387986e-05, "loss": 0.9769, "step": 41505 }, { "epoch": 12.42, "grad_norm": 2.2884132862091064, "learning_rate": 1.573988405384989e-05, "loss": 1.0192, "step": 41510 }, { "epoch": 12.42, "grad_norm": 2.4269514083862305, "learning_rate": 1.573442699973837e-05, "loss": 1.0783, "step": 41515 }, { "epoch": 12.42, "grad_norm": 4.481752872467041, "learning_rate": 1.5728970457354802e-05, "loss": 1.0585, "step": 41520 }, { "epoch": 12.42, "grad_norm": 2.6316633224487305, "learning_rate": 1.572351442700057e-05, "loss": 1.1228, "step": 41525 }, { "epoch": 12.43, "grad_norm": 3.698345184326172, "learning_rate": 1.5718058908976988e-05, "loss": 1.0415, "step": 41530 }, { "epoch": 12.43, "grad_norm": 3.938213348388672, "learning_rate": 1.5712603903585367e-05, "loss": 0.9013, "step": 41535 }, { "epoch": 12.43, "grad_norm": 2.3214263916015625, "learning_rate": 1.5707149411126975e-05, "loss": 0.9264, "step": 41540 }, { "epoch": 12.43, "grad_norm": 2.7372500896453857, "learning_rate": 1.5701695431903068e-05, "loss": 0.8975, "step": 41545 }, { "epoch": 12.43, "grad_norm": 1.4826568365097046, "learning_rate": 1.569624196621486e-05, "loss": 1.1417, "step": 41550 }, { "epoch": 12.43, "grad_norm": 2.281169891357422, "learning_rate": 1.5690789014363526e-05, "loss": 0.9454, "step": 41555 }, { "epoch": 12.43, "grad_norm": 1.4119058847427368, "learning_rate": 1.568533657665025e-05, "loss": 1.0374, "step": 41560 }, { "epoch": 12.44, "grad_norm": 14.846397399902344, "learning_rate": 1.5679884653376138e-05, "loss": 0.9577, "step": 41565 }, { "epoch": 12.44, "grad_norm": 5.682839393615723, "learning_rate": 1.5674433244842324e-05, "loss": 0.8656, "step": 41570 }, { "epoch": 12.44, "grad_norm": 6.977272033691406, "learning_rate": 1.566898235134987e-05, "loss": 1.0464, "step": 41575 }, { "epoch": 12.44, "grad_norm": 3.747130870819092, "learning_rate": 1.5663531973199807e-05, "loss": 0.8694, "step": 41580 }, { "epoch": 12.44, "grad_norm": 3.2453455924987793, "learning_rate": 1.565808211069318e-05, "loss": 1.191, "step": 41585 }, { "epoch": 12.44, "grad_norm": 1.451064944267273, "learning_rate": 1.565263276413096e-05, "loss": 1.0963, "step": 41590 }, { "epoch": 12.44, "grad_norm": 1.9707701206207275, "learning_rate": 1.5647183933814124e-05, "loss": 0.9704, "step": 41595 }, { "epoch": 12.45, "grad_norm": 2.4262611865997314, "learning_rate": 1.5641735620043586e-05, "loss": 0.9327, "step": 41600 }, { "epoch": 12.45, "grad_norm": 2.2412772178649902, "learning_rate": 1.5636287823120278e-05, "loss": 1.0346, "step": 41605 }, { "epoch": 12.45, "grad_norm": 3.1475157737731934, "learning_rate": 1.5630840543345048e-05, "loss": 0.9991, "step": 41610 }, { "epoch": 12.45, "grad_norm": 5.283227920532227, "learning_rate": 1.562539378101876e-05, "loss": 0.9999, "step": 41615 }, { "epoch": 12.45, "grad_norm": 3.1468896865844727, "learning_rate": 1.561994753644223e-05, "loss": 1.0675, "step": 41620 }, { "epoch": 12.45, "grad_norm": 6.32867431640625, "learning_rate": 1.5614501809916245e-05, "loss": 1.0708, "step": 41625 }, { "epoch": 12.46, "grad_norm": 3.040778875350952, "learning_rate": 1.5609056601741573e-05, "loss": 1.26, "step": 41630 }, { "epoch": 12.46, "grad_norm": 3.2033233642578125, "learning_rate": 1.560361191221894e-05, "loss": 1.1999, "step": 41635 }, { "epoch": 12.46, "grad_norm": 1.406636118888855, "learning_rate": 1.5598167741649054e-05, "loss": 0.8328, "step": 41640 }, { "epoch": 12.46, "grad_norm": 2.998223066329956, "learning_rate": 1.55927240903326e-05, "loss": 1.0981, "step": 41645 }, { "epoch": 12.46, "grad_norm": 4.719427108764648, "learning_rate": 1.5587280958570206e-05, "loss": 0.8436, "step": 41650 }, { "epoch": 12.46, "grad_norm": 5.524830341339111, "learning_rate": 1.5581838346662506e-05, "loss": 0.8711, "step": 41655 }, { "epoch": 12.46, "grad_norm": 2.657658576965332, "learning_rate": 1.5576396254910074e-05, "loss": 0.9805, "step": 41660 }, { "epoch": 12.47, "grad_norm": 3.9892799854278564, "learning_rate": 1.5570954683613496e-05, "loss": 1.0028, "step": 41665 }, { "epoch": 12.47, "grad_norm": 3.137242555618286, "learning_rate": 1.556551363307329e-05, "loss": 1.0821, "step": 41670 }, { "epoch": 12.47, "grad_norm": 3.95418119430542, "learning_rate": 1.5560073103589947e-05, "loss": 1.0106, "step": 41675 }, { "epoch": 12.47, "grad_norm": 1.6314005851745605, "learning_rate": 1.5554633095463966e-05, "loss": 1.0254, "step": 41680 }, { "epoch": 12.47, "grad_norm": 1.4221240282058716, "learning_rate": 1.5549193608995772e-05, "loss": 1.0632, "step": 41685 }, { "epoch": 12.47, "grad_norm": 3.80000901222229, "learning_rate": 1.5543754644485797e-05, "loss": 1.1136, "step": 41690 }, { "epoch": 12.47, "grad_norm": 1.8849966526031494, "learning_rate": 1.5538316202234416e-05, "loss": 1.1025, "step": 41695 }, { "epoch": 12.48, "grad_norm": 1.476726770401001, "learning_rate": 1.5532878282542007e-05, "loss": 1.0641, "step": 41700 }, { "epoch": 12.48, "grad_norm": 2.6277527809143066, "learning_rate": 1.5527440885708884e-05, "loss": 1.0265, "step": 41705 }, { "epoch": 12.48, "grad_norm": 1.5605323314666748, "learning_rate": 1.5522004012035358e-05, "loss": 0.9976, "step": 41710 }, { "epoch": 12.48, "grad_norm": 2.3000760078430176, "learning_rate": 1.551656766182169e-05, "loss": 0.9773, "step": 41715 }, { "epoch": 12.48, "grad_norm": 1.9093542098999023, "learning_rate": 1.551113183536814e-05, "loss": 1.0161, "step": 41720 }, { "epoch": 12.48, "grad_norm": 3.195117712020874, "learning_rate": 1.5505696532974918e-05, "loss": 1.1003, "step": 41725 }, { "epoch": 12.49, "grad_norm": 1.47775399684906, "learning_rate": 1.55002617549422e-05, "loss": 1.0035, "step": 41730 }, { "epoch": 12.49, "grad_norm": 4.95327615737915, "learning_rate": 1.549482750157016e-05, "loss": 1.1078, "step": 41735 }, { "epoch": 12.49, "grad_norm": 1.3294042348861694, "learning_rate": 1.548939377315891e-05, "loss": 1.0844, "step": 41740 }, { "epoch": 12.49, "grad_norm": 1.624887228012085, "learning_rate": 1.5483960570008555e-05, "loss": 1.076, "step": 41745 }, { "epoch": 12.49, "grad_norm": 1.299483060836792, "learning_rate": 1.5478527892419176e-05, "loss": 1.0632, "step": 41750 }, { "epoch": 12.49, "grad_norm": 1.710900068283081, "learning_rate": 1.5473095740690792e-05, "loss": 1.0242, "step": 41755 }, { "epoch": 12.49, "grad_norm": 3.3313851356506348, "learning_rate": 1.5467664115123435e-05, "loss": 0.9548, "step": 41760 }, { "epoch": 12.5, "grad_norm": 2.4123222827911377, "learning_rate": 1.5462233016017074e-05, "loss": 1.0284, "step": 41765 }, { "epoch": 12.5, "grad_norm": 2.309690237045288, "learning_rate": 1.545680244367168e-05, "loss": 1.0553, "step": 41770 }, { "epoch": 12.5, "grad_norm": 1.3467509746551514, "learning_rate": 1.545137239838717e-05, "loss": 1.1693, "step": 41775 }, { "epoch": 12.5, "grad_norm": 1.2531408071517944, "learning_rate": 1.5445942880463422e-05, "loss": 1.0461, "step": 41780 }, { "epoch": 12.5, "grad_norm": 3.7026236057281494, "learning_rate": 1.5440513890200333e-05, "loss": 1.0725, "step": 41785 }, { "epoch": 12.5, "grad_norm": 1.7098792791366577, "learning_rate": 1.543508542789771e-05, "loss": 1.1343, "step": 41790 }, { "epoch": 12.5, "grad_norm": 3.3441429138183594, "learning_rate": 1.542965749385539e-05, "loss": 1.07, "step": 41795 }, { "epoch": 12.51, "grad_norm": 4.822731018066406, "learning_rate": 1.5424230088373132e-05, "loss": 0.8686, "step": 41800 }, { "epoch": 12.51, "grad_norm": 3.751056671142578, "learning_rate": 1.541880321175069e-05, "loss": 1.0426, "step": 41805 }, { "epoch": 12.51, "grad_norm": 1.4619433879852295, "learning_rate": 1.5413376864287793e-05, "loss": 0.9761, "step": 41810 }, { "epoch": 12.51, "grad_norm": 4.5311737060546875, "learning_rate": 1.5407951046284118e-05, "loss": 0.9352, "step": 41815 }, { "epoch": 12.51, "grad_norm": 1.865403175354004, "learning_rate": 1.5402525758039348e-05, "loss": 0.9368, "step": 41820 }, { "epoch": 12.51, "grad_norm": 1.6462074518203735, "learning_rate": 1.53971009998531e-05, "loss": 0.9772, "step": 41825 }, { "epoch": 12.52, "grad_norm": 1.4321706295013428, "learning_rate": 1.5391676772024983e-05, "loss": 0.9295, "step": 41830 }, { "epoch": 12.52, "grad_norm": 3.5952343940734863, "learning_rate": 1.5386253074854572e-05, "loss": 1.0067, "step": 41835 }, { "epoch": 12.52, "grad_norm": 3.0504446029663086, "learning_rate": 1.5380829908641407e-05, "loss": 0.9688, "step": 41840 }, { "epoch": 12.52, "grad_norm": 2.3888778686523438, "learning_rate": 1.537540727368501e-05, "loss": 1.1385, "step": 41845 }, { "epoch": 12.52, "grad_norm": 3.4064996242523193, "learning_rate": 1.5369985170284864e-05, "loss": 1.0932, "step": 41850 }, { "epoch": 12.52, "grad_norm": 2.349851131439209, "learning_rate": 1.536456359874043e-05, "loss": 1.0279, "step": 41855 }, { "epoch": 12.52, "grad_norm": 2.4067983627319336, "learning_rate": 1.5359142559351124e-05, "loss": 0.8575, "step": 41860 }, { "epoch": 12.53, "grad_norm": 7.212216854095459, "learning_rate": 1.5353722052416362e-05, "loss": 0.9648, "step": 41865 }, { "epoch": 12.53, "grad_norm": 2.0969083309173584, "learning_rate": 1.534830207823551e-05, "loss": 1.1525, "step": 41870 }, { "epoch": 12.53, "grad_norm": 1.6566853523254395, "learning_rate": 1.5342882637107885e-05, "loss": 1.058, "step": 41875 }, { "epoch": 12.53, "grad_norm": 4.8544921875, "learning_rate": 1.533746372933283e-05, "loss": 0.9298, "step": 41880 }, { "epoch": 12.53, "grad_norm": 3.7266921997070312, "learning_rate": 1.5332045355209597e-05, "loss": 0.9846, "step": 41885 }, { "epoch": 12.53, "grad_norm": 3.181262254714966, "learning_rate": 1.5326627515037452e-05, "loss": 0.8741, "step": 41890 }, { "epoch": 12.53, "grad_norm": 2.2438769340515137, "learning_rate": 1.532121020911562e-05, "loss": 0.966, "step": 41895 }, { "epoch": 12.54, "grad_norm": 4.2861409187316895, "learning_rate": 1.5315793437743284e-05, "loss": 1.0051, "step": 41900 }, { "epoch": 12.54, "grad_norm": 6.4033026695251465, "learning_rate": 1.5310377201219618e-05, "loss": 0.8874, "step": 41905 }, { "epoch": 12.54, "grad_norm": 3.8851475715637207, "learning_rate": 1.5304961499843734e-05, "loss": 0.9808, "step": 41910 }, { "epoch": 12.54, "grad_norm": 2.0215234756469727, "learning_rate": 1.529954633391476e-05, "loss": 1.1129, "step": 41915 }, { "epoch": 12.54, "grad_norm": 5.1186699867248535, "learning_rate": 1.529413170373175e-05, "loss": 0.908, "step": 41920 }, { "epoch": 12.54, "grad_norm": 2.9314262866973877, "learning_rate": 1.5288717609593764e-05, "loss": 1.0398, "step": 41925 }, { "epoch": 12.54, "grad_norm": 4.335308074951172, "learning_rate": 1.5283304051799813e-05, "loss": 1.0499, "step": 41930 }, { "epoch": 12.55, "grad_norm": 2.7027440071105957, "learning_rate": 1.5277891030648868e-05, "loss": 1.075, "step": 41935 }, { "epoch": 12.55, "grad_norm": 1.977004051208496, "learning_rate": 1.5272478546439907e-05, "loss": 1.1353, "step": 41940 }, { "epoch": 12.55, "grad_norm": 4.210170745849609, "learning_rate": 1.5267066599471836e-05, "loss": 1.1134, "step": 41945 }, { "epoch": 12.55, "grad_norm": 3.0245361328125, "learning_rate": 1.5261655190043568e-05, "loss": 0.8765, "step": 41950 }, { "epoch": 12.55, "grad_norm": 2.2794244289398193, "learning_rate": 1.525624431845395e-05, "loss": 0.9708, "step": 41955 }, { "epoch": 12.55, "grad_norm": 4.885735034942627, "learning_rate": 1.5250833985001845e-05, "loss": 1.0793, "step": 41960 }, { "epoch": 12.56, "grad_norm": 4.991076946258545, "learning_rate": 1.5245424189986035e-05, "loss": 1.1405, "step": 41965 }, { "epoch": 12.56, "grad_norm": 2.991661787033081, "learning_rate": 1.524001493370531e-05, "loss": 0.8436, "step": 41970 }, { "epoch": 12.56, "grad_norm": 4.58644962310791, "learning_rate": 1.5234606216458414e-05, "loss": 0.955, "step": 41975 }, { "epoch": 12.56, "grad_norm": 4.122666358947754, "learning_rate": 1.5229198038544068e-05, "loss": 0.9454, "step": 41980 }, { "epoch": 12.56, "grad_norm": 2.8381059169769287, "learning_rate": 1.5223790400260956e-05, "loss": 1.2621, "step": 41985 }, { "epoch": 12.56, "grad_norm": 2.797647476196289, "learning_rate": 1.521838330190774e-05, "loss": 1.1286, "step": 41990 }, { "epoch": 12.56, "grad_norm": 2.7422313690185547, "learning_rate": 1.5212976743783047e-05, "loss": 1.0241, "step": 41995 }, { "epoch": 12.57, "grad_norm": 1.3212940692901611, "learning_rate": 1.520757072618548e-05, "loss": 1.0562, "step": 42000 }, { "epoch": 12.57, "grad_norm": 1.8071925640106201, "learning_rate": 1.5202165249413592e-05, "loss": 1.0923, "step": 42005 }, { "epoch": 12.57, "grad_norm": 2.4288082122802734, "learning_rate": 1.5196760313765946e-05, "loss": 1.1725, "step": 42010 }, { "epoch": 12.57, "grad_norm": 4.023059368133545, "learning_rate": 1.5191355919541026e-05, "loss": 1.1285, "step": 42015 }, { "epoch": 12.57, "grad_norm": 3.815493583679199, "learning_rate": 1.5185952067037335e-05, "loss": 0.9849, "step": 42020 }, { "epoch": 12.57, "grad_norm": 3.8521835803985596, "learning_rate": 1.5180548756553306e-05, "loss": 1.0811, "step": 42025 }, { "epoch": 12.57, "grad_norm": 1.4135304689407349, "learning_rate": 1.5175145988387353e-05, "loss": 1.0513, "step": 42030 }, { "epoch": 12.58, "grad_norm": 1.7829787731170654, "learning_rate": 1.5169743762837887e-05, "loss": 1.1091, "step": 42035 }, { "epoch": 12.58, "grad_norm": 1.822656512260437, "learning_rate": 1.5164342080203246e-05, "loss": 1.2833, "step": 42040 }, { "epoch": 12.58, "grad_norm": 4.21824312210083, "learning_rate": 1.5158940940781777e-05, "loss": 0.922, "step": 42045 }, { "epoch": 12.58, "grad_norm": 3.6083362102508545, "learning_rate": 1.5153540344871757e-05, "loss": 1.0563, "step": 42050 }, { "epoch": 12.58, "grad_norm": 2.541013479232788, "learning_rate": 1.5148140292771484e-05, "loss": 0.8514, "step": 42055 }, { "epoch": 12.58, "grad_norm": 2.3995792865753174, "learning_rate": 1.5142740784779174e-05, "loss": 1.0454, "step": 42060 }, { "epoch": 12.59, "grad_norm": 2.6729390621185303, "learning_rate": 1.5137341821193045e-05, "loss": 0.9045, "step": 42065 }, { "epoch": 12.59, "grad_norm": 3.4425437450408936, "learning_rate": 1.5131943402311279e-05, "loss": 1.0334, "step": 42070 }, { "epoch": 12.59, "grad_norm": 2.506922483444214, "learning_rate": 1.5126545528432017e-05, "loss": 1.0794, "step": 42075 }, { "epoch": 12.59, "grad_norm": 1.7686995267868042, "learning_rate": 1.5121148199853385e-05, "loss": 1.046, "step": 42080 }, { "epoch": 12.59, "grad_norm": 1.438901424407959, "learning_rate": 1.5115751416873472e-05, "loss": 1.118, "step": 42085 }, { "epoch": 12.59, "grad_norm": 1.8655344247817993, "learning_rate": 1.5110355179790331e-05, "loss": 0.9908, "step": 42090 }, { "epoch": 12.59, "grad_norm": 3.9504570960998535, "learning_rate": 1.5104959488902e-05, "loss": 0.9724, "step": 42095 }, { "epoch": 12.6, "grad_norm": 2.1039040088653564, "learning_rate": 1.5099564344506458e-05, "loss": 1.0601, "step": 42100 }, { "epoch": 12.6, "grad_norm": 3.9319233894348145, "learning_rate": 1.5094169746901698e-05, "loss": 0.9059, "step": 42105 }, { "epoch": 12.6, "grad_norm": 1.384982943534851, "learning_rate": 1.5088775696385637e-05, "loss": 1.0329, "step": 42110 }, { "epoch": 12.6, "grad_norm": 3.824261426925659, "learning_rate": 1.50833821932562e-05, "loss": 0.9907, "step": 42115 }, { "epoch": 12.6, "grad_norm": 2.2953989505767822, "learning_rate": 1.5077989237811253e-05, "loss": 0.8469, "step": 42120 }, { "epoch": 12.6, "grad_norm": 6.605740070343018, "learning_rate": 1.507259683034865e-05, "loss": 1.0258, "step": 42125 }, { "epoch": 12.6, "grad_norm": 4.182356357574463, "learning_rate": 1.506720497116621e-05, "loss": 0.8448, "step": 42130 }, { "epoch": 12.61, "grad_norm": 3.7092835903167725, "learning_rate": 1.5061813660561703e-05, "loss": 0.906, "step": 42135 }, { "epoch": 12.61, "grad_norm": 3.1926708221435547, "learning_rate": 1.5056422898832906e-05, "loss": 0.8871, "step": 42140 }, { "epoch": 12.61, "grad_norm": 3.0576462745666504, "learning_rate": 1.5051032686277527e-05, "loss": 1.0512, "step": 42145 }, { "epoch": 12.61, "grad_norm": 5.683520793914795, "learning_rate": 1.5045643023193281e-05, "loss": 1.0064, "step": 42150 }, { "epoch": 12.61, "grad_norm": 1.615805983543396, "learning_rate": 1.5040253909877822e-05, "loss": 0.8095, "step": 42155 }, { "epoch": 12.61, "grad_norm": 1.38075852394104, "learning_rate": 1.5034865346628782e-05, "loss": 1.1425, "step": 42160 }, { "epoch": 12.62, "grad_norm": 6.462873935699463, "learning_rate": 1.5029477333743774e-05, "loss": 1.0516, "step": 42165 }, { "epoch": 12.62, "grad_norm": 2.051048755645752, "learning_rate": 1.5024089871520369e-05, "loss": 0.8301, "step": 42170 }, { "epoch": 12.62, "grad_norm": 2.33416485786438, "learning_rate": 1.5018702960256108e-05, "loss": 1.1115, "step": 42175 }, { "epoch": 12.62, "grad_norm": 1.324583888053894, "learning_rate": 1.5013316600248507e-05, "loss": 1.1135, "step": 42180 }, { "epoch": 12.62, "grad_norm": 1.6215559244155884, "learning_rate": 1.5007930791795055e-05, "loss": 0.9414, "step": 42185 }, { "epoch": 12.62, "grad_norm": 1.7824453115463257, "learning_rate": 1.5002545535193203e-05, "loss": 1.1032, "step": 42190 }, { "epoch": 12.62, "grad_norm": 4.399259090423584, "learning_rate": 1.4997160830740358e-05, "loss": 0.9014, "step": 42195 }, { "epoch": 12.63, "grad_norm": 3.2295663356781006, "learning_rate": 1.4991776678733935e-05, "loss": 0.7915, "step": 42200 }, { "epoch": 12.63, "grad_norm": 6.502789497375488, "learning_rate": 1.4986393079471269e-05, "loss": 0.9355, "step": 42205 }, { "epoch": 12.63, "grad_norm": 2.078927516937256, "learning_rate": 1.4981010033249718e-05, "loss": 0.9986, "step": 42210 }, { "epoch": 12.63, "grad_norm": 3.382509231567383, "learning_rate": 1.4975627540366558e-05, "loss": 1.0191, "step": 42215 }, { "epoch": 12.63, "grad_norm": 2.477400302886963, "learning_rate": 1.4970245601119077e-05, "loss": 0.9213, "step": 42220 }, { "epoch": 12.63, "grad_norm": 2.6311304569244385, "learning_rate": 1.4964864215804508e-05, "loss": 1.083, "step": 42225 }, { "epoch": 12.63, "grad_norm": 5.062621116638184, "learning_rate": 1.4959483384720047e-05, "loss": 0.922, "step": 42230 }, { "epoch": 12.64, "grad_norm": 0.9835122227668762, "learning_rate": 1.4954103108162892e-05, "loss": 0.9226, "step": 42235 }, { "epoch": 12.64, "grad_norm": 1.5001591444015503, "learning_rate": 1.4948723386430172e-05, "loss": 0.9836, "step": 42240 }, { "epoch": 12.64, "grad_norm": 3.599999189376831, "learning_rate": 1.494334421981902e-05, "loss": 0.9332, "step": 42245 }, { "epoch": 12.64, "grad_norm": 2.7632405757904053, "learning_rate": 1.4937965608626503e-05, "loss": 0.869, "step": 42250 }, { "epoch": 12.64, "grad_norm": 2.2371535301208496, "learning_rate": 1.493258755314969e-05, "loss": 0.9492, "step": 42255 }, { "epoch": 12.64, "grad_norm": 3.195711851119995, "learning_rate": 1.4927210053685611e-05, "loss": 1.1248, "step": 42260 }, { "epoch": 12.65, "grad_norm": 3.760953903198242, "learning_rate": 1.4921833110531239e-05, "loss": 1.0165, "step": 42265 }, { "epoch": 12.65, "grad_norm": 2.693413019180298, "learning_rate": 1.4916456723983558e-05, "loss": 0.8719, "step": 42270 }, { "epoch": 12.65, "grad_norm": 1.946767807006836, "learning_rate": 1.4911080894339482e-05, "loss": 0.9631, "step": 42275 }, { "epoch": 12.65, "grad_norm": 3.6406733989715576, "learning_rate": 1.4905705621895932e-05, "loss": 0.937, "step": 42280 }, { "epoch": 12.65, "grad_norm": 2.393702745437622, "learning_rate": 1.4900330906949767e-05, "loss": 1.0413, "step": 42285 }, { "epoch": 12.65, "grad_norm": 2.1976375579833984, "learning_rate": 1.4894956749797817e-05, "loss": 0.9671, "step": 42290 }, { "epoch": 12.65, "grad_norm": 1.965586543083191, "learning_rate": 1.4889583150736913e-05, "loss": 0.9733, "step": 42295 }, { "epoch": 12.66, "grad_norm": 2.103189468383789, "learning_rate": 1.4884210110063811e-05, "loss": 1.0441, "step": 42300 }, { "epoch": 12.66, "grad_norm": 3.943366050720215, "learning_rate": 1.4878837628075281e-05, "loss": 0.8933, "step": 42305 }, { "epoch": 12.66, "grad_norm": 2.3624773025512695, "learning_rate": 1.4873465705068018e-05, "loss": 0.9805, "step": 42310 }, { "epoch": 12.66, "grad_norm": 4.398625373840332, "learning_rate": 1.4868094341338729e-05, "loss": 1.0953, "step": 42315 }, { "epoch": 12.66, "grad_norm": 2.5565319061279297, "learning_rate": 1.4862723537184059e-05, "loss": 0.8852, "step": 42320 }, { "epoch": 12.66, "grad_norm": 3.775041341781616, "learning_rate": 1.4857353292900616e-05, "loss": 0.9722, "step": 42325 }, { "epoch": 12.66, "grad_norm": 5.716676235198975, "learning_rate": 1.485198360878502e-05, "loss": 0.9083, "step": 42330 }, { "epoch": 12.67, "grad_norm": 2.962364435195923, "learning_rate": 1.4846614485133814e-05, "loss": 1.1341, "step": 42335 }, { "epoch": 12.67, "grad_norm": 2.1964805126190186, "learning_rate": 1.4841245922243541e-05, "loss": 0.9692, "step": 42340 }, { "epoch": 12.67, "grad_norm": 1.980453372001648, "learning_rate": 1.4835877920410695e-05, "loss": 1.13, "step": 42345 }, { "epoch": 12.67, "grad_norm": 16.400785446166992, "learning_rate": 1.4830510479931747e-05, "loss": 1.0367, "step": 42350 }, { "epoch": 12.67, "grad_norm": 1.031053066253662, "learning_rate": 1.4825143601103137e-05, "loss": 0.9072, "step": 42355 }, { "epoch": 12.67, "grad_norm": 8.164375305175781, "learning_rate": 1.4819777284221264e-05, "loss": 0.9322, "step": 42360 }, { "epoch": 12.68, "grad_norm": 2.8158113956451416, "learning_rate": 1.4814411529582517e-05, "loss": 0.9522, "step": 42365 }, { "epoch": 12.68, "grad_norm": 2.234344244003296, "learning_rate": 1.4809046337483223e-05, "loss": 0.9949, "step": 42370 }, { "epoch": 12.68, "grad_norm": 7.7483720779418945, "learning_rate": 1.4803681708219719e-05, "loss": 0.9843, "step": 42375 }, { "epoch": 12.68, "grad_norm": 2.1835453510284424, "learning_rate": 1.4798317642088274e-05, "loss": 0.9065, "step": 42380 }, { "epoch": 12.68, "grad_norm": 1.4759970903396606, "learning_rate": 1.4792954139385134e-05, "loss": 1.0872, "step": 42385 }, { "epoch": 12.68, "grad_norm": 2.038754940032959, "learning_rate": 1.4787591200406536e-05, "loss": 1.1076, "step": 42390 }, { "epoch": 12.68, "grad_norm": 2.0084173679351807, "learning_rate": 1.4782228825448653e-05, "loss": 1.0353, "step": 42395 }, { "epoch": 12.69, "grad_norm": 3.7430591583251953, "learning_rate": 1.4776867014807654e-05, "loss": 1.1075, "step": 42400 }, { "epoch": 12.69, "grad_norm": 3.881999969482422, "learning_rate": 1.4771505768779659e-05, "loss": 0.9953, "step": 42405 }, { "epoch": 12.69, "grad_norm": 2.164416790008545, "learning_rate": 1.4766145087660779e-05, "loss": 1.1684, "step": 42410 }, { "epoch": 12.69, "grad_norm": 3.810238838195801, "learning_rate": 1.476078497174706e-05, "loss": 1.067, "step": 42415 }, { "epoch": 12.69, "grad_norm": 2.1892411708831787, "learning_rate": 1.4755425421334548e-05, "loss": 1.0355, "step": 42420 }, { "epoch": 12.69, "grad_norm": 1.6944376230239868, "learning_rate": 1.475006643671924e-05, "loss": 1.0403, "step": 42425 }, { "epoch": 12.69, "grad_norm": 1.0041096210479736, "learning_rate": 1.4744708018197107e-05, "loss": 1.1216, "step": 42430 }, { "epoch": 12.7, "grad_norm": 1.6599242687225342, "learning_rate": 1.4739350166064092e-05, "loss": 1.0584, "step": 42435 }, { "epoch": 12.7, "grad_norm": 1.151971459388733, "learning_rate": 1.4733992880616099e-05, "loss": 1.0802, "step": 42440 }, { "epoch": 12.7, "grad_norm": 3.420637845993042, "learning_rate": 1.4728636162149011e-05, "loss": 1.0013, "step": 42445 }, { "epoch": 12.7, "grad_norm": 3.075683832168579, "learning_rate": 1.4723280010958676e-05, "loss": 1.0202, "step": 42450 }, { "epoch": 12.7, "grad_norm": 4.555771350860596, "learning_rate": 1.4717924427340896e-05, "loss": 1.2088, "step": 42455 }, { "epoch": 12.7, "grad_norm": 1.3353490829467773, "learning_rate": 1.4712569411591468e-05, "loss": 1.031, "step": 42460 }, { "epoch": 12.71, "grad_norm": 2.8824167251586914, "learning_rate": 1.4707214964006131e-05, "loss": 1.1876, "step": 42465 }, { "epoch": 12.71, "grad_norm": 4.97543478012085, "learning_rate": 1.470186108488062e-05, "loss": 1.1018, "step": 42470 }, { "epoch": 12.71, "grad_norm": 1.2788058519363403, "learning_rate": 1.469650777451061e-05, "loss": 1.0444, "step": 42475 }, { "epoch": 12.71, "grad_norm": 4.054439067840576, "learning_rate": 1.4691155033191773e-05, "loss": 0.7675, "step": 42480 }, { "epoch": 12.71, "grad_norm": 4.729781627655029, "learning_rate": 1.4685802861219732e-05, "loss": 1.1607, "step": 42485 }, { "epoch": 12.71, "grad_norm": 3.289395570755005, "learning_rate": 1.4680451258890066e-05, "loss": 1.0532, "step": 42490 }, { "epoch": 12.71, "grad_norm": 3.1528773307800293, "learning_rate": 1.467510022649836e-05, "loss": 1.1017, "step": 42495 }, { "epoch": 12.72, "grad_norm": 1.4691784381866455, "learning_rate": 1.4669749764340124e-05, "loss": 0.9924, "step": 42500 }, { "epoch": 12.72, "grad_norm": 4.324456691741943, "learning_rate": 1.4664399872710885e-05, "loss": 1.0135, "step": 42505 }, { "epoch": 12.72, "grad_norm": 4.2510294914245605, "learning_rate": 1.4659050551906089e-05, "loss": 1.0921, "step": 42510 }, { "epoch": 12.72, "grad_norm": 10.014864921569824, "learning_rate": 1.4653701802221187e-05, "loss": 0.9695, "step": 42515 }, { "epoch": 12.72, "grad_norm": 6.1459455490112305, "learning_rate": 1.4648353623951577e-05, "loss": 0.9502, "step": 42520 }, { "epoch": 12.72, "grad_norm": 2.0565860271453857, "learning_rate": 1.4643006017392636e-05, "loss": 0.827, "step": 42525 }, { "epoch": 12.72, "grad_norm": 2.4255897998809814, "learning_rate": 1.4637658982839709e-05, "loss": 0.9423, "step": 42530 }, { "epoch": 12.73, "grad_norm": 2.0831713676452637, "learning_rate": 1.4632312520588104e-05, "loss": 0.8922, "step": 42535 }, { "epoch": 12.73, "grad_norm": 2.779167890548706, "learning_rate": 1.46269666309331e-05, "loss": 0.9137, "step": 42540 }, { "epoch": 12.73, "grad_norm": 2.2856433391571045, "learning_rate": 1.4621621314169953e-05, "loss": 0.7789, "step": 42545 }, { "epoch": 12.73, "grad_norm": 1.7285444736480713, "learning_rate": 1.4616276570593865e-05, "loss": 1.0144, "step": 42550 }, { "epoch": 12.73, "grad_norm": 4.265864372253418, "learning_rate": 1.4610932400500037e-05, "loss": 1.0229, "step": 42555 }, { "epoch": 12.73, "grad_norm": 3.695251941680908, "learning_rate": 1.4605588804183607e-05, "loss": 0.9748, "step": 42560 }, { "epoch": 12.73, "grad_norm": 6.5001749992370605, "learning_rate": 1.4600245781939708e-05, "loss": 1.1294, "step": 42565 }, { "epoch": 12.74, "grad_norm": 2.0487442016601562, "learning_rate": 1.4594903334063423e-05, "loss": 1.0273, "step": 42570 }, { "epoch": 12.74, "grad_norm": 2.534780502319336, "learning_rate": 1.458956146084981e-05, "loss": 1.0041, "step": 42575 }, { "epoch": 12.74, "grad_norm": 4.824891567230225, "learning_rate": 1.4584220162593898e-05, "loss": 1.0993, "step": 42580 }, { "epoch": 12.74, "grad_norm": 1.7529278993606567, "learning_rate": 1.4578879439590675e-05, "loss": 1.1054, "step": 42585 }, { "epoch": 12.74, "grad_norm": 1.254698634147644, "learning_rate": 1.4573539292135113e-05, "loss": 0.8739, "step": 42590 }, { "epoch": 12.74, "grad_norm": 4.246768951416016, "learning_rate": 1.4568199720522135e-05, "loss": 1.1456, "step": 42595 }, { "epoch": 12.75, "grad_norm": 4.16953182220459, "learning_rate": 1.4562860725046648e-05, "loss": 0.839, "step": 42600 }, { "epoch": 12.75, "grad_norm": 1.6719205379486084, "learning_rate": 1.4557522306003521e-05, "loss": 0.9557, "step": 42605 }, { "epoch": 12.75, "grad_norm": 2.465372085571289, "learning_rate": 1.4552184463687562e-05, "loss": 1.0845, "step": 42610 }, { "epoch": 12.75, "grad_norm": 3.8194258213043213, "learning_rate": 1.4546847198393618e-05, "loss": 1.0781, "step": 42615 }, { "epoch": 12.75, "grad_norm": 1.8574891090393066, "learning_rate": 1.4541510510416418e-05, "loss": 0.9392, "step": 42620 }, { "epoch": 12.75, "grad_norm": 2.715480089187622, "learning_rate": 1.4536174400050739e-05, "loss": 1.1462, "step": 42625 }, { "epoch": 12.75, "grad_norm": 1.606964349746704, "learning_rate": 1.453083886759125e-05, "loss": 0.9121, "step": 42630 }, { "epoch": 12.76, "grad_norm": 3.806936740875244, "learning_rate": 1.4525503913332672e-05, "loss": 0.973, "step": 42635 }, { "epoch": 12.76, "grad_norm": 3.890246629714966, "learning_rate": 1.4520169537569615e-05, "loss": 1.1849, "step": 42640 }, { "epoch": 12.76, "grad_norm": 3.9114744663238525, "learning_rate": 1.4514835740596699e-05, "loss": 1.0194, "step": 42645 }, { "epoch": 12.76, "grad_norm": 1.8596765995025635, "learning_rate": 1.4509502522708507e-05, "loss": 1.057, "step": 42650 }, { "epoch": 12.76, "grad_norm": 3.2842726707458496, "learning_rate": 1.4504169884199587e-05, "loss": 0.9596, "step": 42655 }, { "epoch": 12.76, "grad_norm": 2.9906461238861084, "learning_rate": 1.4498837825364452e-05, "loss": 0.8944, "step": 42660 }, { "epoch": 12.76, "grad_norm": 1.7279856204986572, "learning_rate": 1.4493506346497593e-05, "loss": 1.0216, "step": 42665 }, { "epoch": 12.77, "grad_norm": 1.639946460723877, "learning_rate": 1.4488175447893455e-05, "loss": 1.097, "step": 42670 }, { "epoch": 12.77, "grad_norm": 9.285032272338867, "learning_rate": 1.4482845129846459e-05, "loss": 1.0578, "step": 42675 }, { "epoch": 12.77, "grad_norm": 1.8126649856567383, "learning_rate": 1.4477515392650997e-05, "loss": 0.999, "step": 42680 }, { "epoch": 12.77, "grad_norm": 2.5790581703186035, "learning_rate": 1.447218623660142e-05, "loss": 0.9432, "step": 42685 }, { "epoch": 12.77, "grad_norm": 1.561050295829773, "learning_rate": 1.4466857661992055e-05, "loss": 0.9825, "step": 42690 }, { "epoch": 12.77, "grad_norm": 3.258485794067383, "learning_rate": 1.4461529669117197e-05, "loss": 1.1113, "step": 42695 }, { "epoch": 12.78, "grad_norm": 1.9323861598968506, "learning_rate": 1.4456202258271096e-05, "loss": 1.1198, "step": 42700 }, { "epoch": 12.78, "grad_norm": 3.977754831314087, "learning_rate": 1.4450875429747987e-05, "loss": 1.0889, "step": 42705 }, { "epoch": 12.78, "grad_norm": 2.0819084644317627, "learning_rate": 1.4445549183842072e-05, "loss": 1.2944, "step": 42710 }, { "epoch": 12.78, "grad_norm": 2.5505735874176025, "learning_rate": 1.4440223520847485e-05, "loss": 0.9569, "step": 42715 }, { "epoch": 12.78, "grad_norm": 1.7122985124588013, "learning_rate": 1.4434898441058398e-05, "loss": 1.0941, "step": 42720 }, { "epoch": 12.78, "grad_norm": 2.2943074703216553, "learning_rate": 1.4429573944768865e-05, "loss": 1.0611, "step": 42725 }, { "epoch": 12.78, "grad_norm": 5.206298828125, "learning_rate": 1.4424250032272999e-05, "loss": 1.1535, "step": 42730 }, { "epoch": 12.79, "grad_norm": 1.9564958810806274, "learning_rate": 1.4418926703864799e-05, "loss": 1.0421, "step": 42735 }, { "epoch": 12.79, "grad_norm": 1.8103880882263184, "learning_rate": 1.4413603959838278e-05, "loss": 0.8809, "step": 42740 }, { "epoch": 12.79, "grad_norm": 2.9786670207977295, "learning_rate": 1.4408281800487411e-05, "loss": 1.0065, "step": 42745 }, { "epoch": 12.79, "grad_norm": 5.209625720977783, "learning_rate": 1.4402960226106128e-05, "loss": 1.0411, "step": 42750 }, { "epoch": 12.79, "grad_norm": 2.3398683071136475, "learning_rate": 1.4397639236988334e-05, "loss": 1.0483, "step": 42755 }, { "epoch": 12.79, "grad_norm": 1.2769595384597778, "learning_rate": 1.4392318833427896e-05, "loss": 1.0679, "step": 42760 }, { "epoch": 12.79, "grad_norm": 8.292903900146484, "learning_rate": 1.4386999015718684e-05, "loss": 1.0761, "step": 42765 }, { "epoch": 12.8, "grad_norm": 2.5282647609710693, "learning_rate": 1.4381679784154472e-05, "loss": 1.041, "step": 42770 }, { "epoch": 12.8, "grad_norm": 2.1609020233154297, "learning_rate": 1.4376361139029052e-05, "loss": 1.123, "step": 42775 }, { "epoch": 12.8, "grad_norm": 1.4257582426071167, "learning_rate": 1.4371043080636159e-05, "loss": 1.1743, "step": 42780 }, { "epoch": 12.8, "grad_norm": 3.5058562755584717, "learning_rate": 1.4365725609269515e-05, "loss": 1.0082, "step": 42785 }, { "epoch": 12.8, "grad_norm": 2.25451397895813, "learning_rate": 1.4360408725222785e-05, "loss": 0.9842, "step": 42790 }, { "epoch": 12.8, "grad_norm": 2.5415477752685547, "learning_rate": 1.4355092428789627e-05, "loss": 0.9281, "step": 42795 }, { "epoch": 12.81, "grad_norm": 2.6299939155578613, "learning_rate": 1.4349776720263647e-05, "loss": 0.9657, "step": 42800 }, { "epoch": 12.81, "grad_norm": 3.0297467708587646, "learning_rate": 1.4344461599938439e-05, "loss": 0.9558, "step": 42805 }, { "epoch": 12.81, "grad_norm": 1.9421048164367676, "learning_rate": 1.4339147068107522e-05, "loss": 1.0252, "step": 42810 }, { "epoch": 12.81, "grad_norm": 3.981034517288208, "learning_rate": 1.433383312506445e-05, "loss": 1.0214, "step": 42815 }, { "epoch": 12.81, "grad_norm": 5.4559736251831055, "learning_rate": 1.4328519771102666e-05, "loss": 0.9897, "step": 42820 }, { "epoch": 12.81, "grad_norm": 3.1577250957489014, "learning_rate": 1.4323207006515665e-05, "loss": 0.8926, "step": 42825 }, { "epoch": 12.81, "grad_norm": 11.30273723602295, "learning_rate": 1.4317894831596823e-05, "loss": 0.9951, "step": 42830 }, { "epoch": 12.82, "grad_norm": 1.4776180982589722, "learning_rate": 1.4312583246639566e-05, "loss": 1.1993, "step": 42835 }, { "epoch": 12.82, "grad_norm": 1.8206647634506226, "learning_rate": 1.4307272251937215e-05, "loss": 1.1282, "step": 42840 }, { "epoch": 12.82, "grad_norm": 4.272174835205078, "learning_rate": 1.4301961847783096e-05, "loss": 1.0822, "step": 42845 }, { "epoch": 12.82, "grad_norm": 1.5834592580795288, "learning_rate": 1.4296652034470523e-05, "loss": 1.0474, "step": 42850 }, { "epoch": 12.82, "grad_norm": 3.7897226810455322, "learning_rate": 1.4291342812292712e-05, "loss": 1.0012, "step": 42855 }, { "epoch": 12.82, "grad_norm": 1.9825385808944702, "learning_rate": 1.4286034181542928e-05, "loss": 0.9521, "step": 42860 }, { "epoch": 12.82, "grad_norm": 2.32985782623291, "learning_rate": 1.4280726142514328e-05, "loss": 1.189, "step": 42865 }, { "epoch": 12.83, "grad_norm": 2.0926525592803955, "learning_rate": 1.4275418695500081e-05, "loss": 0.8125, "step": 42870 }, { "epoch": 12.83, "grad_norm": 3.3241047859191895, "learning_rate": 1.4270111840793316e-05, "loss": 0.8291, "step": 42875 }, { "epoch": 12.83, "grad_norm": 2.867534637451172, "learning_rate": 1.4264805578687116e-05, "loss": 0.9589, "step": 42880 }, { "epoch": 12.83, "grad_norm": 1.9998924732208252, "learning_rate": 1.4259499909474548e-05, "loss": 1.176, "step": 42885 }, { "epoch": 12.83, "grad_norm": 3.46891188621521, "learning_rate": 1.4254194833448634e-05, "loss": 1.0184, "step": 42890 }, { "epoch": 12.83, "grad_norm": 2.473839044570923, "learning_rate": 1.4248890350902371e-05, "loss": 1.104, "step": 42895 }, { "epoch": 12.84, "grad_norm": 1.8417174816131592, "learning_rate": 1.4243586462128727e-05, "loss": 1.0769, "step": 42900 }, { "epoch": 12.84, "grad_norm": 3.319664716720581, "learning_rate": 1.4238283167420601e-05, "loss": 0.9912, "step": 42905 }, { "epoch": 12.84, "grad_norm": 1.5683692693710327, "learning_rate": 1.4232980467070933e-05, "loss": 0.8247, "step": 42910 }, { "epoch": 12.84, "grad_norm": 1.6574991941452026, "learning_rate": 1.4227678361372544e-05, "loss": 1.074, "step": 42915 }, { "epoch": 12.84, "grad_norm": 4.681344032287598, "learning_rate": 1.4222376850618286e-05, "loss": 1.0578, "step": 42920 }, { "epoch": 12.84, "grad_norm": 1.7636343240737915, "learning_rate": 1.4217075935100955e-05, "loss": 1.0217, "step": 42925 }, { "epoch": 12.84, "grad_norm": 3.016009569168091, "learning_rate": 1.4211775615113312e-05, "loss": 1.0764, "step": 42930 }, { "epoch": 12.85, "grad_norm": 2.3196792602539062, "learning_rate": 1.4206475890948099e-05, "loss": 0.9541, "step": 42935 }, { "epoch": 12.85, "grad_norm": 1.9939143657684326, "learning_rate": 1.4201176762897981e-05, "loss": 1.1661, "step": 42940 }, { "epoch": 12.85, "grad_norm": 2.861206531524658, "learning_rate": 1.4195878231255665e-05, "loss": 0.9937, "step": 42945 }, { "epoch": 12.85, "grad_norm": 3.518277883529663, "learning_rate": 1.4190580296313746e-05, "loss": 0.9096, "step": 42950 }, { "epoch": 12.85, "grad_norm": 5.832763671875, "learning_rate": 1.4185282958364863e-05, "loss": 0.982, "step": 42955 }, { "epoch": 12.85, "grad_norm": 1.4618480205535889, "learning_rate": 1.4179986217701547e-05, "loss": 1.1446, "step": 42960 }, { "epoch": 12.85, "grad_norm": 2.2054197788238525, "learning_rate": 1.4174690074616348e-05, "loss": 1.0362, "step": 42965 }, { "epoch": 12.86, "grad_norm": 2.494736909866333, "learning_rate": 1.4169394529401764e-05, "loss": 1.151, "step": 42970 }, { "epoch": 12.86, "grad_norm": 2.238530397415161, "learning_rate": 1.416409958235026e-05, "loss": 0.9931, "step": 42975 }, { "epoch": 12.86, "grad_norm": 2.8167457580566406, "learning_rate": 1.4158805233754274e-05, "loss": 0.9729, "step": 42980 }, { "epoch": 12.86, "grad_norm": 2.740593433380127, "learning_rate": 1.4153511483906207e-05, "loss": 0.894, "step": 42985 }, { "epoch": 12.86, "grad_norm": 2.3016016483306885, "learning_rate": 1.414821833309843e-05, "loss": 1.0095, "step": 42990 }, { "epoch": 12.86, "grad_norm": 4.366925239562988, "learning_rate": 1.4142925781623279e-05, "loss": 1.0067, "step": 42995 }, { "epoch": 12.87, "grad_norm": 2.0582058429718018, "learning_rate": 1.4137633829773032e-05, "loss": 1.0384, "step": 43000 }, { "epoch": 12.87, "grad_norm": 2.0724596977233887, "learning_rate": 1.4132342477839988e-05, "loss": 1.0962, "step": 43005 }, { "epoch": 12.87, "grad_norm": 4.274909496307373, "learning_rate": 1.4127051726116375e-05, "loss": 0.9295, "step": 43010 }, { "epoch": 12.87, "grad_norm": 4.048436164855957, "learning_rate": 1.4121761574894393e-05, "loss": 0.7961, "step": 43015 }, { "epoch": 12.87, "grad_norm": 2.6834957599639893, "learning_rate": 1.4116472024466209e-05, "loss": 1.1797, "step": 43020 }, { "epoch": 12.87, "grad_norm": 1.8873329162597656, "learning_rate": 1.4111183075123965e-05, "loss": 0.8737, "step": 43025 }, { "epoch": 12.87, "grad_norm": 3.20086407661438, "learning_rate": 1.410589472715977e-05, "loss": 0.9586, "step": 43030 }, { "epoch": 12.88, "grad_norm": 2.417081117630005, "learning_rate": 1.4100606980865666e-05, "loss": 1.0065, "step": 43035 }, { "epoch": 12.88, "grad_norm": 7.702847003936768, "learning_rate": 1.4095319836533732e-05, "loss": 1.0683, "step": 43040 }, { "epoch": 12.88, "grad_norm": 2.575244665145874, "learning_rate": 1.4090033294455923e-05, "loss": 0.743, "step": 43045 }, { "epoch": 12.88, "grad_norm": 2.828678607940674, "learning_rate": 1.408474735492426e-05, "loss": 1.0292, "step": 43050 }, { "epoch": 12.88, "grad_norm": 2.4835140705108643, "learning_rate": 1.4079462018230633e-05, "loss": 1.0267, "step": 43055 }, { "epoch": 12.88, "grad_norm": 3.8808891773223877, "learning_rate": 1.4074177284666986e-05, "loss": 0.7873, "step": 43060 }, { "epoch": 12.88, "grad_norm": 2.5032103061676025, "learning_rate": 1.4068893154525165e-05, "loss": 1.1472, "step": 43065 }, { "epoch": 12.89, "grad_norm": 7.541705131530762, "learning_rate": 1.4063609628097007e-05, "loss": 1.0327, "step": 43070 }, { "epoch": 12.89, "grad_norm": 1.81223726272583, "learning_rate": 1.4058326705674324e-05, "loss": 1.0865, "step": 43075 }, { "epoch": 12.89, "grad_norm": 1.7020493745803833, "learning_rate": 1.4053044387548886e-05, "loss": 1.1599, "step": 43080 }, { "epoch": 12.89, "grad_norm": 1.811834454536438, "learning_rate": 1.4047762674012426e-05, "loss": 1.1018, "step": 43085 }, { "epoch": 12.89, "grad_norm": 2.5735673904418945, "learning_rate": 1.404248156535665e-05, "loss": 1.164, "step": 43090 }, { "epoch": 12.89, "grad_norm": 5.103279113769531, "learning_rate": 1.4037201061873225e-05, "loss": 1.0598, "step": 43095 }, { "epoch": 12.9, "grad_norm": 2.6463088989257812, "learning_rate": 1.4031921163853791e-05, "loss": 1.1851, "step": 43100 }, { "epoch": 12.9, "grad_norm": 3.571685314178467, "learning_rate": 1.4026641871589951e-05, "loss": 1.1104, "step": 43105 }, { "epoch": 12.9, "grad_norm": 2.5913071632385254, "learning_rate": 1.4021363185373277e-05, "loss": 1.11, "step": 43110 }, { "epoch": 12.9, "grad_norm": 1.9859071969985962, "learning_rate": 1.4016085105495299e-05, "loss": 1.0636, "step": 43115 }, { "epoch": 12.9, "grad_norm": 1.736292839050293, "learning_rate": 1.4010807632247525e-05, "loss": 1.1104, "step": 43120 }, { "epoch": 12.9, "grad_norm": 4.8815836906433105, "learning_rate": 1.4005530765921432e-05, "loss": 0.8693, "step": 43125 }, { "epoch": 12.9, "grad_norm": 3.071505069732666, "learning_rate": 1.4000254506808428e-05, "loss": 1.0174, "step": 43130 }, { "epoch": 12.91, "grad_norm": 6.106081485748291, "learning_rate": 1.3994978855199953e-05, "loss": 1.0536, "step": 43135 }, { "epoch": 12.91, "grad_norm": 3.1881868839263916, "learning_rate": 1.3989703811387337e-05, "loss": 1.0537, "step": 43140 }, { "epoch": 12.91, "grad_norm": 9.79444694519043, "learning_rate": 1.3984429375661955e-05, "loss": 0.8779, "step": 43145 }, { "epoch": 12.91, "grad_norm": 3.3935022354125977, "learning_rate": 1.3979155548315067e-05, "loss": 0.9611, "step": 43150 }, { "epoch": 12.91, "grad_norm": 2.132736921310425, "learning_rate": 1.3973882329637983e-05, "loss": 0.9938, "step": 43155 }, { "epoch": 12.91, "grad_norm": 3.71537446975708, "learning_rate": 1.396860971992191e-05, "loss": 0.8078, "step": 43160 }, { "epoch": 12.91, "grad_norm": 1.3954871892929077, "learning_rate": 1.3963337719458052e-05, "loss": 0.8549, "step": 43165 }, { "epoch": 12.92, "grad_norm": 4.286108016967773, "learning_rate": 1.3958066328537583e-05, "loss": 1.1669, "step": 43170 }, { "epoch": 12.92, "grad_norm": 1.1374285221099854, "learning_rate": 1.395279554745163e-05, "loss": 1.1838, "step": 43175 }, { "epoch": 12.92, "grad_norm": 4.613668441772461, "learning_rate": 1.39475253764913e-05, "loss": 0.9838, "step": 43180 }, { "epoch": 12.92, "grad_norm": 4.321890830993652, "learning_rate": 1.3942255815947652e-05, "loss": 0.972, "step": 43185 }, { "epoch": 12.92, "grad_norm": 2.9430012702941895, "learning_rate": 1.393698686611172e-05, "loss": 0.9042, "step": 43190 }, { "epoch": 12.92, "grad_norm": 3.068352222442627, "learning_rate": 1.3931718527274506e-05, "loss": 1.0785, "step": 43195 }, { "epoch": 12.92, "grad_norm": 1.4933991432189941, "learning_rate": 1.3926450799726975e-05, "loss": 0.9176, "step": 43200 }, { "epoch": 12.93, "grad_norm": 3.601362943649292, "learning_rate": 1.3921183683760054e-05, "loss": 1.0315, "step": 43205 }, { "epoch": 12.93, "grad_norm": 4.306023120880127, "learning_rate": 1.391591717966464e-05, "loss": 1.1656, "step": 43210 }, { "epoch": 12.93, "grad_norm": 1.5193262100219727, "learning_rate": 1.3910651287731601e-05, "loss": 1.0546, "step": 43215 }, { "epoch": 12.93, "grad_norm": 1.7641685009002686, "learning_rate": 1.3905386008251775e-05, "loss": 1.1402, "step": 43220 }, { "epoch": 12.93, "grad_norm": 3.4775922298431396, "learning_rate": 1.3900121341515929e-05, "loss": 1.0022, "step": 43225 }, { "epoch": 12.93, "grad_norm": 2.7105138301849365, "learning_rate": 1.389485728781486e-05, "loss": 1.0017, "step": 43230 }, { "epoch": 12.94, "grad_norm": 4.041553497314453, "learning_rate": 1.3889593847439265e-05, "loss": 0.9267, "step": 43235 }, { "epoch": 12.94, "grad_norm": 3.279675006866455, "learning_rate": 1.3884331020679869e-05, "loss": 0.9009, "step": 43240 }, { "epoch": 12.94, "grad_norm": 1.1497808694839478, "learning_rate": 1.3879068807827295e-05, "loss": 1.0816, "step": 43245 }, { "epoch": 12.94, "grad_norm": 3.0773322582244873, "learning_rate": 1.3873807209172215e-05, "loss": 1.0265, "step": 43250 }, { "epoch": 12.94, "grad_norm": 3.1439836025238037, "learning_rate": 1.3868546225005185e-05, "loss": 1.0566, "step": 43255 }, { "epoch": 12.94, "grad_norm": 4.710149765014648, "learning_rate": 1.3863285855616781e-05, "loss": 1.1018, "step": 43260 }, { "epoch": 12.94, "grad_norm": 4.115421295166016, "learning_rate": 1.385802610129752e-05, "loss": 1.0303, "step": 43265 }, { "epoch": 12.95, "grad_norm": 1.8717732429504395, "learning_rate": 1.3852766962337897e-05, "loss": 1.152, "step": 43270 }, { "epoch": 12.95, "grad_norm": 1.6938856840133667, "learning_rate": 1.3847508439028367e-05, "loss": 1.1106, "step": 43275 }, { "epoch": 12.95, "grad_norm": 2.244997024536133, "learning_rate": 1.3842250531659351e-05, "loss": 1.0663, "step": 43280 }, { "epoch": 12.95, "grad_norm": 1.7450692653656006, "learning_rate": 1.3836993240521245e-05, "loss": 0.9527, "step": 43285 }, { "epoch": 12.95, "grad_norm": 2.384140968322754, "learning_rate": 1.3831736565904396e-05, "loss": 0.8981, "step": 43290 }, { "epoch": 12.95, "grad_norm": 3.3110265731811523, "learning_rate": 1.382648050809913e-05, "loss": 0.9716, "step": 43295 }, { "epoch": 12.95, "grad_norm": 1.9519492387771606, "learning_rate": 1.3821225067395729e-05, "loss": 1.0456, "step": 43300 }, { "epoch": 12.96, "grad_norm": 3.2571592330932617, "learning_rate": 1.3815970244084447e-05, "loss": 1.3004, "step": 43305 }, { "epoch": 12.96, "grad_norm": 3.3342976570129395, "learning_rate": 1.3810716038455506e-05, "loss": 0.9739, "step": 43310 }, { "epoch": 12.96, "grad_norm": 2.1756532192230225, "learning_rate": 1.3805462450799098e-05, "loss": 0.9842, "step": 43315 }, { "epoch": 12.96, "grad_norm": 3.6889872550964355, "learning_rate": 1.3800209481405341e-05, "loss": 1.0174, "step": 43320 }, { "epoch": 12.96, "grad_norm": 2.0239474773406982, "learning_rate": 1.3794957130564393e-05, "loss": 1.0647, "step": 43325 }, { "epoch": 12.96, "grad_norm": 7.694988250732422, "learning_rate": 1.3789705398566296e-05, "loss": 0.9891, "step": 43330 }, { "epoch": 12.97, "grad_norm": 1.3746068477630615, "learning_rate": 1.3784454285701137e-05, "loss": 1.0001, "step": 43335 }, { "epoch": 12.97, "grad_norm": 3.215383291244507, "learning_rate": 1.3779203792258891e-05, "loss": 1.1611, "step": 43340 }, { "epoch": 12.97, "grad_norm": 2.7879562377929688, "learning_rate": 1.3773953918529576e-05, "loss": 1.0483, "step": 43345 }, { "epoch": 12.97, "grad_norm": 4.000429630279541, "learning_rate": 1.3768704664803106e-05, "loss": 1.0986, "step": 43350 }, { "epoch": 12.97, "grad_norm": 3.5371413230895996, "learning_rate": 1.3763456031369404e-05, "loss": 1.0504, "step": 43355 }, { "epoch": 12.97, "grad_norm": 2.885620594024658, "learning_rate": 1.3758208018518346e-05, "loss": 0.8533, "step": 43360 }, { "epoch": 12.97, "grad_norm": 1.5964106321334839, "learning_rate": 1.3752960626539774e-05, "loss": 1.0267, "step": 43365 }, { "epoch": 12.98, "grad_norm": 2.651956558227539, "learning_rate": 1.3747713855723493e-05, "loss": 1.0612, "step": 43370 }, { "epoch": 12.98, "grad_norm": 1.7484973669052124, "learning_rate": 1.3742467706359283e-05, "loss": 1.1684, "step": 43375 }, { "epoch": 12.98, "grad_norm": 2.3421952724456787, "learning_rate": 1.373722217873688e-05, "loss": 1.1648, "step": 43380 }, { "epoch": 12.98, "grad_norm": 2.6561176776885986, "learning_rate": 1.3731977273145984e-05, "loss": 0.9651, "step": 43385 }, { "epoch": 12.98, "grad_norm": 2.5251173973083496, "learning_rate": 1.3726732989876278e-05, "loss": 0.9813, "step": 43390 }, { "epoch": 12.98, "grad_norm": 2.7231156826019287, "learning_rate": 1.3721489329217385e-05, "loss": 1.1203, "step": 43395 }, { "epoch": 12.98, "grad_norm": 2.138787269592285, "learning_rate": 1.3716246291458918e-05, "loss": 1.0665, "step": 43400 }, { "epoch": 12.99, "grad_norm": 7.916914939880371, "learning_rate": 1.3711003876890438e-05, "loss": 1.1231, "step": 43405 }, { "epoch": 12.99, "grad_norm": 2.818751811981201, "learning_rate": 1.3705762085801478e-05, "loss": 1.101, "step": 43410 }, { "epoch": 12.99, "grad_norm": 2.751744031906128, "learning_rate": 1.370052091848154e-05, "loss": 0.9642, "step": 43415 }, { "epoch": 12.99, "grad_norm": 1.3935418128967285, "learning_rate": 1.3695280375220093e-05, "loss": 0.9676, "step": 43420 }, { "epoch": 12.99, "grad_norm": 2.9495058059692383, "learning_rate": 1.3690040456306546e-05, "loss": 1.0966, "step": 43425 }, { "epoch": 12.99, "grad_norm": 3.748474359512329, "learning_rate": 1.3684801162030326e-05, "loss": 1.0841, "step": 43430 }, { "epoch": 13.0, "grad_norm": 1.8227459192276, "learning_rate": 1.3679562492680754e-05, "loss": 0.9915, "step": 43435 }, { "epoch": 13.0, "grad_norm": 4.090573310852051, "learning_rate": 1.3674324448547201e-05, "loss": 0.8671, "step": 43440 }, { "epoch": 13.0, "grad_norm": 3.219416856765747, "learning_rate": 1.3669087029918925e-05, "loss": 1.0274, "step": 43445 }, { "epoch": 13.0, "grad_norm": 1.7496566772460938, "learning_rate": 1.3663850237085196e-05, "loss": 1.1035, "step": 43450 }, { "epoch": 13.0, "grad_norm": 1.8964442014694214, "learning_rate": 1.3658614070335236e-05, "loss": 0.9436, "step": 43455 }, { "epoch": 13.0, "grad_norm": 1.2473254203796387, "learning_rate": 1.365337852995823e-05, "loss": 0.9611, "step": 43460 }, { "epoch": 13.0, "grad_norm": 2.2625179290771484, "learning_rate": 1.3648143616243334e-05, "loss": 0.8075, "step": 43465 }, { "epoch": 13.01, "grad_norm": 1.278442144393921, "learning_rate": 1.3642909329479666e-05, "loss": 0.8595, "step": 43470 }, { "epoch": 13.01, "grad_norm": 3.522331953048706, "learning_rate": 1.3637675669956312e-05, "loss": 1.0262, "step": 43475 }, { "epoch": 13.01, "grad_norm": 2.990205764770508, "learning_rate": 1.363244263796232e-05, "loss": 1.0063, "step": 43480 }, { "epoch": 13.01, "grad_norm": 1.9036306142807007, "learning_rate": 1.3627210233786705e-05, "loss": 0.8788, "step": 43485 }, { "epoch": 13.01, "grad_norm": 3.0223844051361084, "learning_rate": 1.3621978457718449e-05, "loss": 0.9906, "step": 43490 }, { "epoch": 13.01, "grad_norm": 2.851306676864624, "learning_rate": 1.3616747310046493e-05, "loss": 1.1554, "step": 43495 }, { "epoch": 13.01, "grad_norm": 1.1327463388442993, "learning_rate": 1.3611516791059754e-05, "loss": 0.9168, "step": 43500 }, { "epoch": 13.02, "grad_norm": 2.258399248123169, "learning_rate": 1.3606286901047099e-05, "loss": 0.9594, "step": 43505 }, { "epoch": 13.02, "grad_norm": 2.515151262283325, "learning_rate": 1.3601057640297382e-05, "loss": 0.961, "step": 43510 }, { "epoch": 13.02, "grad_norm": 5.121065616607666, "learning_rate": 1.359582900909941e-05, "loss": 0.8494, "step": 43515 }, { "epoch": 13.02, "grad_norm": 2.3708105087280273, "learning_rate": 1.3590601007741926e-05, "loss": 0.9847, "step": 43520 }, { "epoch": 13.02, "grad_norm": 1.6514267921447754, "learning_rate": 1.3585373636513715e-05, "loss": 1.1194, "step": 43525 }, { "epoch": 13.02, "grad_norm": 4.888957977294922, "learning_rate": 1.3580146895703428e-05, "loss": 1.012, "step": 43530 }, { "epoch": 13.03, "grad_norm": 3.775286912918091, "learning_rate": 1.357492078559978e-05, "loss": 1.0716, "step": 43535 }, { "epoch": 13.03, "grad_norm": 2.8520710468292236, "learning_rate": 1.3569695306491358e-05, "loss": 0.9447, "step": 43540 }, { "epoch": 13.03, "grad_norm": 1.2906252145767212, "learning_rate": 1.3564470458666806e-05, "loss": 0.7763, "step": 43545 }, { "epoch": 13.03, "grad_norm": 3.1408872604370117, "learning_rate": 1.3559246242414653e-05, "loss": 0.9982, "step": 43550 }, { "epoch": 13.03, "grad_norm": 3.5548839569091797, "learning_rate": 1.3554022658023442e-05, "loss": 1.1682, "step": 43555 }, { "epoch": 13.03, "grad_norm": 2.9345786571502686, "learning_rate": 1.3548799705781655e-05, "loss": 1.0853, "step": 43560 }, { "epoch": 13.03, "grad_norm": 1.7587555646896362, "learning_rate": 1.3543577385977763e-05, "loss": 0.9033, "step": 43565 }, { "epoch": 13.04, "grad_norm": 2.7659850120544434, "learning_rate": 1.3538355698900182e-05, "loss": 0.9808, "step": 43570 }, { "epoch": 13.04, "grad_norm": 3.0085887908935547, "learning_rate": 1.3533134644837303e-05, "loss": 1.2206, "step": 43575 }, { "epoch": 13.04, "grad_norm": 2.224841594696045, "learning_rate": 1.3527914224077475e-05, "loss": 1.0777, "step": 43580 }, { "epoch": 13.04, "grad_norm": 2.03397536277771, "learning_rate": 1.3522694436909022e-05, "loss": 0.9373, "step": 43585 }, { "epoch": 13.04, "grad_norm": 1.9126800298690796, "learning_rate": 1.3517475283620226e-05, "loss": 0.9797, "step": 43590 }, { "epoch": 13.04, "grad_norm": 1.9172559976577759, "learning_rate": 1.3512256764499335e-05, "loss": 1.051, "step": 43595 }, { "epoch": 13.04, "grad_norm": 3.562412738800049, "learning_rate": 1.3507038879834561e-05, "loss": 0.9013, "step": 43600 }, { "epoch": 13.05, "grad_norm": 3.7953248023986816, "learning_rate": 1.3501821629914082e-05, "loss": 0.9603, "step": 43605 }, { "epoch": 13.05, "grad_norm": 5.259886741638184, "learning_rate": 1.3496605015026054e-05, "loss": 0.9906, "step": 43610 }, { "epoch": 13.05, "grad_norm": 1.6214237213134766, "learning_rate": 1.3491389035458557e-05, "loss": 0.8656, "step": 43615 }, { "epoch": 13.05, "grad_norm": 2.001491069793701, "learning_rate": 1.3486173691499698e-05, "loss": 0.8376, "step": 43620 }, { "epoch": 13.05, "grad_norm": 2.1933164596557617, "learning_rate": 1.3480958983437475e-05, "loss": 1.0552, "step": 43625 }, { "epoch": 13.05, "grad_norm": 1.1674703359603882, "learning_rate": 1.347574491155994e-05, "loss": 0.9067, "step": 43630 }, { "epoch": 13.06, "grad_norm": 3.31339693069458, "learning_rate": 1.347053147615501e-05, "loss": 0.9645, "step": 43635 }, { "epoch": 13.06, "grad_norm": 2.1307213306427, "learning_rate": 1.3465318677510664e-05, "loss": 1.1741, "step": 43640 }, { "epoch": 13.06, "grad_norm": 1.4840748310089111, "learning_rate": 1.346010651591477e-05, "loss": 0.8708, "step": 43645 }, { "epoch": 13.06, "grad_norm": 1.7846823930740356, "learning_rate": 1.3454894991655196e-05, "loss": 0.8823, "step": 43650 }, { "epoch": 13.06, "grad_norm": 16.712650299072266, "learning_rate": 1.3449684105019772e-05, "loss": 1.0997, "step": 43655 }, { "epoch": 13.06, "grad_norm": 3.8388795852661133, "learning_rate": 1.3444473856296277e-05, "loss": 0.992, "step": 43660 }, { "epoch": 13.06, "grad_norm": 1.0792344808578491, "learning_rate": 1.3439264245772502e-05, "loss": 0.9544, "step": 43665 }, { "epoch": 13.07, "grad_norm": 2.6744980812072754, "learning_rate": 1.3434055273736135e-05, "loss": 0.9327, "step": 43670 }, { "epoch": 13.07, "grad_norm": 4.98563289642334, "learning_rate": 1.3428846940474871e-05, "loss": 1.0448, "step": 43675 }, { "epoch": 13.07, "grad_norm": 1.9465887546539307, "learning_rate": 1.3423639246276366e-05, "loss": 1.1599, "step": 43680 }, { "epoch": 13.07, "grad_norm": 3.1894278526306152, "learning_rate": 1.3418432191428232e-05, "loss": 1.129, "step": 43685 }, { "epoch": 13.07, "grad_norm": 1.8298451900482178, "learning_rate": 1.3413225776218048e-05, "loss": 1.0085, "step": 43690 }, { "epoch": 13.07, "grad_norm": 2.9106414318084717, "learning_rate": 1.3408020000933363e-05, "loss": 1.0084, "step": 43695 }, { "epoch": 13.07, "grad_norm": 1.1771775484085083, "learning_rate": 1.340281486586168e-05, "loss": 0.889, "step": 43700 }, { "epoch": 13.08, "grad_norm": 15.508944511413574, "learning_rate": 1.3397610371290492e-05, "loss": 0.9712, "step": 43705 }, { "epoch": 13.08, "grad_norm": 3.3548052310943604, "learning_rate": 1.33924065175072e-05, "loss": 0.9766, "step": 43710 }, { "epoch": 13.08, "grad_norm": 1.3726935386657715, "learning_rate": 1.338720330479925e-05, "loss": 1.0355, "step": 43715 }, { "epoch": 13.08, "grad_norm": 4.749921798706055, "learning_rate": 1.3382000733453976e-05, "loss": 1.154, "step": 43720 }, { "epoch": 13.08, "grad_norm": 3.290040969848633, "learning_rate": 1.3376798803758742e-05, "loss": 1.101, "step": 43725 }, { "epoch": 13.08, "grad_norm": 2.6318535804748535, "learning_rate": 1.3371597516000809e-05, "loss": 1.0034, "step": 43730 }, { "epoch": 13.09, "grad_norm": 2.853180170059204, "learning_rate": 1.3366396870467468e-05, "loss": 1.0768, "step": 43735 }, { "epoch": 13.09, "grad_norm": 4.42519474029541, "learning_rate": 1.336119686744594e-05, "loss": 0.9629, "step": 43740 }, { "epoch": 13.09, "grad_norm": 3.0336861610412598, "learning_rate": 1.33559975072234e-05, "loss": 0.9401, "step": 43745 }, { "epoch": 13.09, "grad_norm": 3.247124433517456, "learning_rate": 1.335079879008703e-05, "loss": 0.874, "step": 43750 }, { "epoch": 13.09, "grad_norm": 2.6699001789093018, "learning_rate": 1.3345600716323913e-05, "loss": 1.1099, "step": 43755 }, { "epoch": 13.09, "grad_norm": 6.743714332580566, "learning_rate": 1.3340403286221176e-05, "loss": 1.1685, "step": 43760 }, { "epoch": 13.09, "grad_norm": 1.998405933380127, "learning_rate": 1.3335206500065828e-05, "loss": 1.0679, "step": 43765 }, { "epoch": 13.1, "grad_norm": 4.789541721343994, "learning_rate": 1.3330010358144917e-05, "loss": 0.8997, "step": 43770 }, { "epoch": 13.1, "grad_norm": 1.4575897455215454, "learning_rate": 1.3324814860745394e-05, "loss": 1.0083, "step": 43775 }, { "epoch": 13.1, "grad_norm": 3.014540672302246, "learning_rate": 1.331962000815421e-05, "loss": 0.9282, "step": 43780 }, { "epoch": 13.1, "grad_norm": 2.8595590591430664, "learning_rate": 1.3314425800658275e-05, "loss": 1.037, "step": 43785 }, { "epoch": 13.1, "grad_norm": 1.8672281503677368, "learning_rate": 1.3309232238544457e-05, "loss": 1.0224, "step": 43790 }, { "epoch": 13.1, "grad_norm": 1.25929594039917, "learning_rate": 1.3304039322099588e-05, "loss": 0.9784, "step": 43795 }, { "epoch": 13.1, "grad_norm": 2.0091841220855713, "learning_rate": 1.329884705161048e-05, "loss": 0.9481, "step": 43800 }, { "epoch": 13.11, "grad_norm": 3.789120674133301, "learning_rate": 1.3293655427363866e-05, "loss": 0.8126, "step": 43805 }, { "epoch": 13.11, "grad_norm": 2.5179672241210938, "learning_rate": 1.3288464449646503e-05, "loss": 1.1027, "step": 43810 }, { "epoch": 13.11, "grad_norm": 3.7937817573547363, "learning_rate": 1.3283274118745079e-05, "loss": 1.04, "step": 43815 }, { "epoch": 13.11, "grad_norm": 3.776716947555542, "learning_rate": 1.3278084434946247e-05, "loss": 1.1246, "step": 43820 }, { "epoch": 13.11, "grad_norm": 3.676992654800415, "learning_rate": 1.3272895398536623e-05, "loss": 0.9363, "step": 43825 }, { "epoch": 13.11, "grad_norm": 2.452056407928467, "learning_rate": 1.32677070098028e-05, "loss": 1.1244, "step": 43830 }, { "epoch": 13.11, "grad_norm": 4.2442307472229, "learning_rate": 1.3262519269031331e-05, "loss": 1.0792, "step": 43835 }, { "epoch": 13.12, "grad_norm": 4.072299957275391, "learning_rate": 1.3257332176508708e-05, "loss": 0.9724, "step": 43840 }, { "epoch": 13.12, "grad_norm": 5.6725311279296875, "learning_rate": 1.3252145732521438e-05, "loss": 0.9115, "step": 43845 }, { "epoch": 13.12, "grad_norm": 4.147802829742432, "learning_rate": 1.324695993735593e-05, "loss": 1.1873, "step": 43850 }, { "epoch": 13.12, "grad_norm": 2.995326042175293, "learning_rate": 1.3241774791298628e-05, "loss": 1.0866, "step": 43855 }, { "epoch": 13.12, "grad_norm": 1.8452882766723633, "learning_rate": 1.323659029463586e-05, "loss": 1.1149, "step": 43860 }, { "epoch": 13.12, "grad_norm": 3.185358762741089, "learning_rate": 1.3231406447654004e-05, "loss": 1.03, "step": 43865 }, { "epoch": 13.13, "grad_norm": 5.549739360809326, "learning_rate": 1.3226223250639328e-05, "loss": 0.9462, "step": 43870 }, { "epoch": 13.13, "grad_norm": 1.393759846687317, "learning_rate": 1.3221040703878101e-05, "loss": 1.0435, "step": 43875 }, { "epoch": 13.13, "grad_norm": 1.9850372076034546, "learning_rate": 1.3215858807656556e-05, "loss": 1.0667, "step": 43880 }, { "epoch": 13.13, "grad_norm": 2.1288838386535645, "learning_rate": 1.3210677562260878e-05, "loss": 0.9594, "step": 43885 }, { "epoch": 13.13, "grad_norm": 1.302878975868225, "learning_rate": 1.3205496967977226e-05, "loss": 0.9986, "step": 43890 }, { "epoch": 13.13, "grad_norm": 2.470869302749634, "learning_rate": 1.3200317025091712e-05, "loss": 0.9677, "step": 43895 }, { "epoch": 13.13, "grad_norm": 3.7706472873687744, "learning_rate": 1.3195137733890428e-05, "loss": 0.9866, "step": 43900 }, { "epoch": 13.14, "grad_norm": 1.7888362407684326, "learning_rate": 1.3189959094659415e-05, "loss": 1.061, "step": 43905 }, { "epoch": 13.14, "grad_norm": 2.0343103408813477, "learning_rate": 1.3184781107684687e-05, "loss": 1.1638, "step": 43910 }, { "epoch": 13.14, "grad_norm": 3.1971328258514404, "learning_rate": 1.3179603773252217e-05, "loss": 1.0671, "step": 43915 }, { "epoch": 13.14, "grad_norm": 3.4805822372436523, "learning_rate": 1.3174427091647943e-05, "loss": 1.0129, "step": 43920 }, { "epoch": 13.14, "grad_norm": 1.6715178489685059, "learning_rate": 1.3169251063157767e-05, "loss": 0.8938, "step": 43925 }, { "epoch": 13.14, "grad_norm": 3.162388324737549, "learning_rate": 1.3164075688067567e-05, "loss": 0.8423, "step": 43930 }, { "epoch": 13.14, "grad_norm": 2.0230519771575928, "learning_rate": 1.3158900966663148e-05, "loss": 1.0128, "step": 43935 }, { "epoch": 13.15, "grad_norm": 2.7525076866149902, "learning_rate": 1.315372689923034e-05, "loss": 1.0519, "step": 43940 }, { "epoch": 13.15, "grad_norm": 2.027358055114746, "learning_rate": 1.314855348605486e-05, "loss": 0.9151, "step": 43945 }, { "epoch": 13.15, "grad_norm": 1.6696336269378662, "learning_rate": 1.3143380727422472e-05, "loss": 0.9351, "step": 43950 }, { "epoch": 13.15, "grad_norm": 3.3159263134002686, "learning_rate": 1.3138208623618823e-05, "loss": 1.0519, "step": 43955 }, { "epoch": 13.15, "grad_norm": 12.850335121154785, "learning_rate": 1.3133037174929602e-05, "loss": 1.0565, "step": 43960 }, { "epoch": 13.15, "grad_norm": 2.337259531021118, "learning_rate": 1.3127866381640392e-05, "loss": 1.0833, "step": 43965 }, { "epoch": 13.16, "grad_norm": 2.980786085128784, "learning_rate": 1.312269624403678e-05, "loss": 1.0833, "step": 43970 }, { "epoch": 13.16, "grad_norm": 3.550776958465576, "learning_rate": 1.311752676240431e-05, "loss": 0.792, "step": 43975 }, { "epoch": 13.16, "grad_norm": 5.096564292907715, "learning_rate": 1.3112357937028488e-05, "loss": 0.8951, "step": 43980 }, { "epoch": 13.16, "grad_norm": 3.9432437419891357, "learning_rate": 1.3107189768194777e-05, "loss": 1.0846, "step": 43985 }, { "epoch": 13.16, "grad_norm": 3.766063928604126, "learning_rate": 1.3102022256188618e-05, "loss": 1.151, "step": 43990 }, { "epoch": 13.16, "grad_norm": 1.3759208917617798, "learning_rate": 1.30968554012954e-05, "loss": 0.8187, "step": 43995 }, { "epoch": 13.16, "grad_norm": 3.239544153213501, "learning_rate": 1.3091689203800483e-05, "loss": 0.8463, "step": 44000 }, { "epoch": 13.17, "grad_norm": 4.143579959869385, "learning_rate": 1.3086523663989197e-05, "loss": 1.0706, "step": 44005 }, { "epoch": 13.17, "grad_norm": 1.883518934249878, "learning_rate": 1.3081358782146823e-05, "loss": 0.9982, "step": 44010 }, { "epoch": 13.17, "grad_norm": 1.332044243812561, "learning_rate": 1.3076194558558617e-05, "loss": 0.9293, "step": 44015 }, { "epoch": 13.17, "grad_norm": 3.111307382583618, "learning_rate": 1.3071030993509788e-05, "loss": 0.9561, "step": 44020 }, { "epoch": 13.17, "grad_norm": 2.8684184551239014, "learning_rate": 1.3065868087285533e-05, "loss": 0.9597, "step": 44025 }, { "epoch": 13.17, "grad_norm": 2.7545359134674072, "learning_rate": 1.3060705840170953e-05, "loss": 0.7545, "step": 44030 }, { "epoch": 13.17, "grad_norm": 2.23874831199646, "learning_rate": 1.3055544252451202e-05, "loss": 0.9002, "step": 44035 }, { "epoch": 13.18, "grad_norm": 3.11095929145813, "learning_rate": 1.3050383324411308e-05, "loss": 0.9736, "step": 44040 }, { "epoch": 13.18, "grad_norm": 2.461932897567749, "learning_rate": 1.3045223056336339e-05, "loss": 1.1343, "step": 44045 }, { "epoch": 13.18, "grad_norm": 3.571146011352539, "learning_rate": 1.3040063448511255e-05, "loss": 0.9675, "step": 44050 }, { "epoch": 13.18, "grad_norm": 3.9316980838775635, "learning_rate": 1.3034904501221057e-05, "loss": 0.9565, "step": 44055 }, { "epoch": 13.18, "grad_norm": 5.21784782409668, "learning_rate": 1.3029746214750636e-05, "loss": 0.9655, "step": 44060 }, { "epoch": 13.18, "grad_norm": 3.174137830734253, "learning_rate": 1.3024588589384887e-05, "loss": 1.0195, "step": 44065 }, { "epoch": 13.19, "grad_norm": 3.539970636367798, "learning_rate": 1.3019431625408663e-05, "loss": 1.0647, "step": 44070 }, { "epoch": 13.19, "grad_norm": 4.315314769744873, "learning_rate": 1.3014275323106777e-05, "loss": 1.1308, "step": 44075 }, { "epoch": 13.19, "grad_norm": 2.9523892402648926, "learning_rate": 1.300911968276401e-05, "loss": 0.9636, "step": 44080 }, { "epoch": 13.19, "grad_norm": 2.1555330753326416, "learning_rate": 1.3003964704665096e-05, "loss": 1.0895, "step": 44085 }, { "epoch": 13.19, "grad_norm": 1.9076026678085327, "learning_rate": 1.2998810389094743e-05, "loss": 0.959, "step": 44090 }, { "epoch": 13.19, "grad_norm": 2.8785440921783447, "learning_rate": 1.2993656736337617e-05, "loss": 0.9798, "step": 44095 }, { "epoch": 13.19, "grad_norm": 7.201198101043701, "learning_rate": 1.2988503746678348e-05, "loss": 0.9449, "step": 44100 }, { "epoch": 13.2, "grad_norm": 2.4313573837280273, "learning_rate": 1.2983351420401535e-05, "loss": 1.0374, "step": 44105 }, { "epoch": 13.2, "grad_norm": 3.2586467266082764, "learning_rate": 1.297819975779173e-05, "loss": 0.9367, "step": 44110 }, { "epoch": 13.2, "grad_norm": 2.9108598232269287, "learning_rate": 1.297304875913346e-05, "loss": 1.2829, "step": 44115 }, { "epoch": 13.2, "grad_norm": 4.189149379730225, "learning_rate": 1.2967898424711203e-05, "loss": 0.8909, "step": 44120 }, { "epoch": 13.2, "grad_norm": 2.829456090927124, "learning_rate": 1.296274875480941e-05, "loss": 1.0128, "step": 44125 }, { "epoch": 13.2, "grad_norm": 1.993833303451538, "learning_rate": 1.29575997497125e-05, "loss": 0.9394, "step": 44130 }, { "epoch": 13.2, "grad_norm": 1.8777223825454712, "learning_rate": 1.2952451409704818e-05, "loss": 0.9643, "step": 44135 }, { "epoch": 13.21, "grad_norm": 2.4245407581329346, "learning_rate": 1.2947303735070742e-05, "loss": 0.9848, "step": 44140 }, { "epoch": 13.21, "grad_norm": 3.382310628890991, "learning_rate": 1.2942156726094534e-05, "loss": 1.0159, "step": 44145 }, { "epoch": 13.21, "grad_norm": 3.079493761062622, "learning_rate": 1.29370103830605e-05, "loss": 0.8951, "step": 44150 }, { "epoch": 13.21, "grad_norm": 2.4276509284973145, "learning_rate": 1.2931864706252828e-05, "loss": 1.2272, "step": 44155 }, { "epoch": 13.21, "grad_norm": 3.1140429973602295, "learning_rate": 1.2926719695955727e-05, "loss": 1.0597, "step": 44160 }, { "epoch": 13.21, "grad_norm": 2.4898757934570312, "learning_rate": 1.2921575352453347e-05, "loss": 0.9542, "step": 44165 }, { "epoch": 13.22, "grad_norm": 2.116917133331299, "learning_rate": 1.2916431676029806e-05, "loss": 0.9554, "step": 44170 }, { "epoch": 13.22, "grad_norm": 3.3684239387512207, "learning_rate": 1.2911288666969184e-05, "loss": 1.057, "step": 44175 }, { "epoch": 13.22, "grad_norm": 3.8344614505767822, "learning_rate": 1.2906146325555522e-05, "loss": 0.9452, "step": 44180 }, { "epoch": 13.22, "grad_norm": 3.2199113368988037, "learning_rate": 1.2901004652072826e-05, "loss": 0.9491, "step": 44185 }, { "epoch": 13.22, "grad_norm": 2.2717814445495605, "learning_rate": 1.289586364680507e-05, "loss": 1.0688, "step": 44190 }, { "epoch": 13.22, "grad_norm": 2.989772319793701, "learning_rate": 1.2890723310036181e-05, "loss": 1.018, "step": 44195 }, { "epoch": 13.22, "grad_norm": 9.16136360168457, "learning_rate": 1.2885583642050058e-05, "loss": 1.0984, "step": 44200 }, { "epoch": 13.23, "grad_norm": 1.604578971862793, "learning_rate": 1.2880444643130556e-05, "loss": 1.0616, "step": 44205 }, { "epoch": 13.23, "grad_norm": 2.7663040161132812, "learning_rate": 1.2875306313561502e-05, "loss": 0.9586, "step": 44210 }, { "epoch": 13.23, "grad_norm": 1.7422765493392944, "learning_rate": 1.2870168653626677e-05, "loss": 1.0868, "step": 44215 }, { "epoch": 13.23, "grad_norm": 3.091707944869995, "learning_rate": 1.2865031663609827e-05, "loss": 1.1116, "step": 44220 }, { "epoch": 13.23, "grad_norm": 2.8757541179656982, "learning_rate": 1.2859895343794676e-05, "loss": 1.0587, "step": 44225 }, { "epoch": 13.23, "grad_norm": 2.7807388305664062, "learning_rate": 1.2854759694464868e-05, "loss": 1.175, "step": 44230 }, { "epoch": 13.23, "grad_norm": 1.115530252456665, "learning_rate": 1.2849624715904074e-05, "loss": 0.9452, "step": 44235 }, { "epoch": 13.24, "grad_norm": 4.6550822257995605, "learning_rate": 1.284449040839586e-05, "loss": 1.0934, "step": 44240 }, { "epoch": 13.24, "grad_norm": 2.822744369506836, "learning_rate": 1.2839356772223823e-05, "loss": 1.1822, "step": 44245 }, { "epoch": 13.24, "grad_norm": 2.8846404552459717, "learning_rate": 1.2834223807671453e-05, "loss": 1.1075, "step": 44250 }, { "epoch": 13.24, "grad_norm": 2.568181037902832, "learning_rate": 1.2829091515022279e-05, "loss": 0.876, "step": 44255 }, { "epoch": 13.24, "grad_norm": 5.2756123542785645, "learning_rate": 1.2823959894559718e-05, "loss": 1.0383, "step": 44260 }, { "epoch": 13.24, "grad_norm": 1.8959022760391235, "learning_rate": 1.2818828946567194e-05, "loss": 0.9319, "step": 44265 }, { "epoch": 13.25, "grad_norm": 2.6469838619232178, "learning_rate": 1.2813698671328084e-05, "loss": 1.0376, "step": 44270 }, { "epoch": 13.25, "grad_norm": 2.5506365299224854, "learning_rate": 1.2808569069125734e-05, "loss": 0.944, "step": 44275 }, { "epoch": 13.25, "grad_norm": 2.260101079940796, "learning_rate": 1.280344014024344e-05, "loss": 0.8573, "step": 44280 }, { "epoch": 13.25, "grad_norm": 1.153891921043396, "learning_rate": 1.279831188496447e-05, "loss": 1.0468, "step": 44285 }, { "epoch": 13.25, "grad_norm": 3.153810977935791, "learning_rate": 1.279318430357205e-05, "loss": 0.854, "step": 44290 }, { "epoch": 13.25, "grad_norm": 2.335554838180542, "learning_rate": 1.2788057396349374e-05, "loss": 0.9501, "step": 44295 }, { "epoch": 13.25, "grad_norm": 1.3886281251907349, "learning_rate": 1.2782931163579593e-05, "loss": 1.0305, "step": 44300 }, { "epoch": 13.26, "grad_norm": 1.6027575731277466, "learning_rate": 1.2777805605545828e-05, "loss": 0.9369, "step": 44305 }, { "epoch": 13.26, "grad_norm": 2.810401201248169, "learning_rate": 1.2772680722531153e-05, "loss": 0.8592, "step": 44310 }, { "epoch": 13.26, "grad_norm": 4.480473518371582, "learning_rate": 1.2767556514818613e-05, "loss": 0.9118, "step": 44315 }, { "epoch": 13.26, "grad_norm": 5.290129661560059, "learning_rate": 1.2762432982691219e-05, "loss": 0.8724, "step": 44320 }, { "epoch": 13.26, "grad_norm": 3.318096160888672, "learning_rate": 1.2757310126431915e-05, "loss": 0.8357, "step": 44325 }, { "epoch": 13.26, "grad_norm": 2.25472354888916, "learning_rate": 1.2752187946323663e-05, "loss": 0.9452, "step": 44330 }, { "epoch": 13.26, "grad_norm": 3.4436185359954834, "learning_rate": 1.2747066442649324e-05, "loss": 0.9833, "step": 44335 }, { "epoch": 13.27, "grad_norm": 5.141889572143555, "learning_rate": 1.2741945615691785e-05, "loss": 1.0254, "step": 44340 }, { "epoch": 13.27, "grad_norm": 2.958003520965576, "learning_rate": 1.2736825465733832e-05, "loss": 0.8983, "step": 44345 }, { "epoch": 13.27, "grad_norm": 1.62663996219635, "learning_rate": 1.2731705993058279e-05, "loss": 0.9047, "step": 44350 }, { "epoch": 13.27, "grad_norm": 1.6435695886611938, "learning_rate": 1.2726587197947843e-05, "loss": 0.9762, "step": 44355 }, { "epoch": 13.27, "grad_norm": 2.0215165615081787, "learning_rate": 1.2721469080685238e-05, "loss": 1.058, "step": 44360 }, { "epoch": 13.27, "grad_norm": 7.587679386138916, "learning_rate": 1.2716351641553137e-05, "loss": 0.797, "step": 44365 }, { "epoch": 13.27, "grad_norm": 4.353124141693115, "learning_rate": 1.271123488083416e-05, "loss": 0.9351, "step": 44370 }, { "epoch": 13.28, "grad_norm": 1.8056886196136475, "learning_rate": 1.2706118798810912e-05, "loss": 1.0511, "step": 44375 }, { "epoch": 13.28, "grad_norm": 2.1814777851104736, "learning_rate": 1.2701003395765942e-05, "loss": 0.9481, "step": 44380 }, { "epoch": 13.28, "grad_norm": 2.314023017883301, "learning_rate": 1.2695888671981771e-05, "loss": 1.0634, "step": 44385 }, { "epoch": 13.28, "grad_norm": 2.8826498985290527, "learning_rate": 1.2690774627740875e-05, "loss": 0.9873, "step": 44390 }, { "epoch": 13.28, "grad_norm": 2.8844268321990967, "learning_rate": 1.2685661263325707e-05, "loss": 0.8023, "step": 44395 }, { "epoch": 13.28, "grad_norm": 2.0241379737854004, "learning_rate": 1.2680548579018666e-05, "loss": 0.9862, "step": 44400 }, { "epoch": 13.29, "grad_norm": 2.5573320388793945, "learning_rate": 1.2676458921440643e-05, "loss": 0.8519, "step": 44405 }, { "epoch": 13.29, "grad_norm": 2.713150978088379, "learning_rate": 1.2671347462039773e-05, "loss": 1.0685, "step": 44410 }, { "epoch": 13.29, "grad_norm": 2.36755633354187, "learning_rate": 1.2666236683537571e-05, "loss": 0.8845, "step": 44415 }, { "epoch": 13.29, "grad_norm": 3.59240984916687, "learning_rate": 1.26611265862163e-05, "loss": 1.0012, "step": 44420 }, { "epoch": 13.29, "grad_norm": 1.345497965812683, "learning_rate": 1.2656017170358175e-05, "loss": 1.0763, "step": 44425 }, { "epoch": 13.29, "grad_norm": 2.690333604812622, "learning_rate": 1.2650908436245395e-05, "loss": 1.0471, "step": 44430 }, { "epoch": 13.29, "grad_norm": 3.9363012313842773, "learning_rate": 1.2645800384160103e-05, "loss": 1.0104, "step": 44435 }, { "epoch": 13.3, "grad_norm": 3.2774622440338135, "learning_rate": 1.2640693014384417e-05, "loss": 1.052, "step": 44440 }, { "epoch": 13.3, "grad_norm": 5.962934494018555, "learning_rate": 1.2635586327200408e-05, "loss": 1.044, "step": 44445 }, { "epoch": 13.3, "grad_norm": 1.6138075590133667, "learning_rate": 1.2630480322890114e-05, "loss": 1.0042, "step": 44450 }, { "epoch": 13.3, "grad_norm": 4.094041347503662, "learning_rate": 1.2625375001735534e-05, "loss": 0.9887, "step": 44455 }, { "epoch": 13.3, "grad_norm": 10.620294570922852, "learning_rate": 1.2620270364018633e-05, "loss": 0.9852, "step": 44460 }, { "epoch": 13.3, "grad_norm": 1.635864496231079, "learning_rate": 1.2615166410021329e-05, "loss": 1.0061, "step": 44465 }, { "epoch": 13.3, "grad_norm": 5.53233528137207, "learning_rate": 1.2610063140025519e-05, "loss": 1.0371, "step": 44470 }, { "epoch": 13.31, "grad_norm": 3.4139328002929688, "learning_rate": 1.2604960554313027e-05, "loss": 1.0421, "step": 44475 }, { "epoch": 13.31, "grad_norm": 4.036562919616699, "learning_rate": 1.2599858653165698e-05, "loss": 0.8257, "step": 44480 }, { "epoch": 13.31, "grad_norm": 5.966806411743164, "learning_rate": 1.2594757436865265e-05, "loss": 1.1459, "step": 44485 }, { "epoch": 13.31, "grad_norm": 2.0404038429260254, "learning_rate": 1.2589656905693503e-05, "loss": 1.0175, "step": 44490 }, { "epoch": 13.31, "grad_norm": 2.6337130069732666, "learning_rate": 1.2584557059932076e-05, "loss": 1.0467, "step": 44495 }, { "epoch": 13.31, "grad_norm": 5.764756679534912, "learning_rate": 1.2579457899862673e-05, "loss": 1.0118, "step": 44500 }, { "epoch": 13.32, "grad_norm": 8.07419490814209, "learning_rate": 1.257435942576689e-05, "loss": 1.1198, "step": 44505 }, { "epoch": 13.32, "grad_norm": 2.342231273651123, "learning_rate": 1.2569261637926322e-05, "loss": 1.1913, "step": 44510 }, { "epoch": 13.32, "grad_norm": 2.863327980041504, "learning_rate": 1.2564164536622513e-05, "loss": 0.9799, "step": 44515 }, { "epoch": 13.32, "grad_norm": 2.152618885040283, "learning_rate": 1.255906812213697e-05, "loss": 0.9915, "step": 44520 }, { "epoch": 13.32, "grad_norm": 1.8867714405059814, "learning_rate": 1.2553972394751162e-05, "loss": 0.9104, "step": 44525 }, { "epoch": 13.32, "grad_norm": 4.3180460929870605, "learning_rate": 1.2548877354746519e-05, "loss": 0.8839, "step": 44530 }, { "epoch": 13.32, "grad_norm": 3.7813620567321777, "learning_rate": 1.254378300240444e-05, "loss": 1.0885, "step": 44535 }, { "epoch": 13.33, "grad_norm": 2.9510982036590576, "learning_rate": 1.2538689338006282e-05, "loss": 1.0409, "step": 44540 }, { "epoch": 13.33, "grad_norm": 2.6716909408569336, "learning_rate": 1.2533596361833355e-05, "loss": 1.0121, "step": 44545 }, { "epoch": 13.33, "grad_norm": 3.946298837661743, "learning_rate": 1.2528504074166941e-05, "loss": 0.9055, "step": 44550 }, { "epoch": 13.33, "grad_norm": 2.191622018814087, "learning_rate": 1.2523412475288288e-05, "loss": 1.0652, "step": 44555 }, { "epoch": 13.33, "grad_norm": 4.9477858543396, "learning_rate": 1.2518321565478593e-05, "loss": 0.8954, "step": 44560 }, { "epoch": 13.33, "grad_norm": 1.2523143291473389, "learning_rate": 1.2513231345019032e-05, "loss": 1.0451, "step": 44565 }, { "epoch": 13.33, "grad_norm": 2.840963840484619, "learning_rate": 1.250814181419071e-05, "loss": 0.8473, "step": 44570 }, { "epoch": 13.34, "grad_norm": 2.3389129638671875, "learning_rate": 1.2503052973274749e-05, "loss": 1.0362, "step": 44575 }, { "epoch": 13.34, "grad_norm": 2.131584882736206, "learning_rate": 1.2497964822552161e-05, "loss": 1.0984, "step": 44580 }, { "epoch": 13.34, "grad_norm": 6.629162311553955, "learning_rate": 1.2492877362304e-05, "loss": 1.0674, "step": 44585 }, { "epoch": 13.34, "grad_norm": 4.698531627655029, "learning_rate": 1.2487790592811202e-05, "loss": 0.8879, "step": 44590 }, { "epoch": 13.34, "grad_norm": 3.542203187942505, "learning_rate": 1.2482704514354746e-05, "loss": 0.916, "step": 44595 }, { "epoch": 13.34, "grad_norm": 6.20295524597168, "learning_rate": 1.2477619127215498e-05, "loss": 1.0514, "step": 44600 }, { "epoch": 13.35, "grad_norm": 7.1144022941589355, "learning_rate": 1.2472534431674327e-05, "loss": 0.9958, "step": 44605 }, { "epoch": 13.35, "grad_norm": 1.586234211921692, "learning_rate": 1.2467450428012059e-05, "loss": 0.9424, "step": 44610 }, { "epoch": 13.35, "grad_norm": 3.302457332611084, "learning_rate": 1.246236711650948e-05, "loss": 0.9997, "step": 44615 }, { "epoch": 13.35, "grad_norm": 3.5869815349578857, "learning_rate": 1.245728449744733e-05, "loss": 1.0039, "step": 44620 }, { "epoch": 13.35, "grad_norm": 4.133788108825684, "learning_rate": 1.245220257110632e-05, "loss": 0.8164, "step": 44625 }, { "epoch": 13.35, "grad_norm": 1.5097733736038208, "learning_rate": 1.2447121337767121e-05, "loss": 0.7921, "step": 44630 }, { "epoch": 13.35, "grad_norm": 1.8934462070465088, "learning_rate": 1.2442040797710358e-05, "loss": 0.9432, "step": 44635 }, { "epoch": 13.36, "grad_norm": 7.073252201080322, "learning_rate": 1.2436960951216633e-05, "loss": 0.9477, "step": 44640 }, { "epoch": 13.36, "grad_norm": 2.9802677631378174, "learning_rate": 1.2431881798566495e-05, "loss": 0.9598, "step": 44645 }, { "epoch": 13.36, "grad_norm": 1.559698224067688, "learning_rate": 1.2426803340040461e-05, "loss": 0.9934, "step": 44650 }, { "epoch": 13.36, "grad_norm": 1.7870979309082031, "learning_rate": 1.242172557591901e-05, "loss": 1.0398, "step": 44655 }, { "epoch": 13.36, "grad_norm": 4.0721755027771, "learning_rate": 1.2416648506482589e-05, "loss": 1.0168, "step": 44660 }, { "epoch": 13.36, "grad_norm": 4.374423980712891, "learning_rate": 1.2411572132011572e-05, "loss": 0.9884, "step": 44665 }, { "epoch": 13.36, "grad_norm": 3.6331615447998047, "learning_rate": 1.2406496452786364e-05, "loss": 0.9356, "step": 44670 }, { "epoch": 13.37, "grad_norm": 1.985280156135559, "learning_rate": 1.2401421469087246e-05, "loss": 1.0527, "step": 44675 }, { "epoch": 13.37, "grad_norm": 3.937107563018799, "learning_rate": 1.2396347181194542e-05, "loss": 1.0093, "step": 44680 }, { "epoch": 13.37, "grad_norm": 1.2847137451171875, "learning_rate": 1.2391273589388463e-05, "loss": 1.065, "step": 44685 }, { "epoch": 13.37, "grad_norm": 2.778695583343506, "learning_rate": 1.238620069394926e-05, "loss": 1.0256, "step": 44690 }, { "epoch": 13.37, "grad_norm": 2.4905941486358643, "learning_rate": 1.2381128495157071e-05, "loss": 0.9014, "step": 44695 }, { "epoch": 13.37, "grad_norm": 3.061601400375366, "learning_rate": 1.2376056993292037e-05, "loss": 0.9241, "step": 44700 }, { "epoch": 13.38, "grad_norm": 5.156485557556152, "learning_rate": 1.2370986188634253e-05, "loss": 1.0604, "step": 44705 }, { "epoch": 13.38, "grad_norm": 2.8037447929382324, "learning_rate": 1.2365916081463777e-05, "loss": 0.852, "step": 44710 }, { "epoch": 13.38, "grad_norm": 2.281531572341919, "learning_rate": 1.2360846672060622e-05, "loss": 1.1074, "step": 44715 }, { "epoch": 13.38, "grad_norm": 3.54512357711792, "learning_rate": 1.2355777960704767e-05, "loss": 0.9447, "step": 44720 }, { "epoch": 13.38, "grad_norm": 2.5671660900115967, "learning_rate": 1.2350709947676153e-05, "loss": 1.0479, "step": 44725 }, { "epoch": 13.38, "grad_norm": 1.9921631813049316, "learning_rate": 1.2345642633254682e-05, "loss": 0.9497, "step": 44730 }, { "epoch": 13.38, "grad_norm": 1.9253196716308594, "learning_rate": 1.2340576017720212e-05, "loss": 1.0235, "step": 44735 }, { "epoch": 13.39, "grad_norm": 5.337843418121338, "learning_rate": 1.233551010135257e-05, "loss": 1.1129, "step": 44740 }, { "epoch": 13.39, "grad_norm": 3.605689764022827, "learning_rate": 1.2330444884431544e-05, "loss": 1.0846, "step": 44745 }, { "epoch": 13.39, "grad_norm": 2.8609673976898193, "learning_rate": 1.2325380367236875e-05, "loss": 1.0635, "step": 44750 }, { "epoch": 13.39, "grad_norm": 2.364123582839966, "learning_rate": 1.2320316550048287e-05, "loss": 0.951, "step": 44755 }, { "epoch": 13.39, "grad_norm": 7.171827793121338, "learning_rate": 1.2315253433145412e-05, "loss": 0.8352, "step": 44760 }, { "epoch": 13.39, "grad_norm": 3.1451687812805176, "learning_rate": 1.2310191016807925e-05, "loss": 0.9133, "step": 44765 }, { "epoch": 13.39, "grad_norm": 2.174330711364746, "learning_rate": 1.2305129301315382e-05, "loss": 1.0346, "step": 44770 }, { "epoch": 13.4, "grad_norm": 2.454254150390625, "learning_rate": 1.230006828694737e-05, "loss": 1.0938, "step": 44775 }, { "epoch": 13.4, "grad_norm": 2.6113619804382324, "learning_rate": 1.2295007973983366e-05, "loss": 0.9711, "step": 44780 }, { "epoch": 13.4, "grad_norm": 2.4465274810791016, "learning_rate": 1.2289948362702883e-05, "loss": 1.0361, "step": 44785 }, { "epoch": 13.4, "grad_norm": 2.1108946800231934, "learning_rate": 1.2284889453385334e-05, "loss": 1.0811, "step": 44790 }, { "epoch": 13.4, "grad_norm": 3.172182083129883, "learning_rate": 1.2279831246310122e-05, "loss": 1.0407, "step": 44795 }, { "epoch": 13.4, "grad_norm": 1.988529086112976, "learning_rate": 1.2274773741756607e-05, "loss": 1.0839, "step": 44800 }, { "epoch": 13.41, "grad_norm": 2.424704074859619, "learning_rate": 1.2269716940004103e-05, "loss": 1.0702, "step": 44805 }, { "epoch": 13.41, "grad_norm": 1.8845196962356567, "learning_rate": 1.2264660841331921e-05, "loss": 0.9947, "step": 44810 }, { "epoch": 13.41, "grad_norm": 1.3340157270431519, "learning_rate": 1.2259605446019261e-05, "loss": 1.1926, "step": 44815 }, { "epoch": 13.41, "grad_norm": 1.2041699886322021, "learning_rate": 1.225455075434537e-05, "loss": 1.128, "step": 44820 }, { "epoch": 13.41, "grad_norm": 2.0719640254974365, "learning_rate": 1.2249496766589382e-05, "loss": 1.0323, "step": 44825 }, { "epoch": 13.41, "grad_norm": 2.7494218349456787, "learning_rate": 1.2244443483030438e-05, "loss": 1.1221, "step": 44830 }, { "epoch": 13.41, "grad_norm": 1.4055849313735962, "learning_rate": 1.2239390903947618e-05, "loss": 1.0946, "step": 44835 }, { "epoch": 13.42, "grad_norm": 3.2642478942871094, "learning_rate": 1.2234339029619974e-05, "loss": 1.0766, "step": 44840 }, { "epoch": 13.42, "grad_norm": 1.8189183473587036, "learning_rate": 1.2229287860326519e-05, "loss": 0.8681, "step": 44845 }, { "epoch": 13.42, "grad_norm": 2.4354348182678223, "learning_rate": 1.222423739634622e-05, "loss": 0.722, "step": 44850 }, { "epoch": 13.42, "grad_norm": 1.813585638999939, "learning_rate": 1.2219187637958007e-05, "loss": 1.1683, "step": 44855 }, { "epoch": 13.42, "grad_norm": 3.655863046646118, "learning_rate": 1.221413858544079e-05, "loss": 1.067, "step": 44860 }, { "epoch": 13.42, "grad_norm": 8.485624313354492, "learning_rate": 1.2209090239073386e-05, "loss": 1.0451, "step": 44865 }, { "epoch": 13.42, "grad_norm": 4.247042655944824, "learning_rate": 1.2204042599134654e-05, "loss": 1.1318, "step": 44870 }, { "epoch": 13.43, "grad_norm": 3.260227680206299, "learning_rate": 1.219899566590333e-05, "loss": 0.9493, "step": 44875 }, { "epoch": 13.43, "grad_norm": 2.963711977005005, "learning_rate": 1.2193949439658187e-05, "loss": 0.919, "step": 44880 }, { "epoch": 13.43, "grad_norm": 3.6867597103118896, "learning_rate": 1.2188903920677896e-05, "loss": 1.0995, "step": 44885 }, { "epoch": 13.43, "grad_norm": 2.1366159915924072, "learning_rate": 1.2183859109241116e-05, "loss": 0.9898, "step": 44890 }, { "epoch": 13.43, "grad_norm": 4.271877288818359, "learning_rate": 1.2178815005626493e-05, "loss": 1.0278, "step": 44895 }, { "epoch": 13.43, "grad_norm": 7.151296615600586, "learning_rate": 1.2173771610112574e-05, "loss": 1.0398, "step": 44900 }, { "epoch": 13.44, "grad_norm": 2.853313684463501, "learning_rate": 1.2168728922977934e-05, "loss": 1.0188, "step": 44905 }, { "epoch": 13.44, "grad_norm": 1.6056421995162964, "learning_rate": 1.216368694450104e-05, "loss": 0.8771, "step": 44910 }, { "epoch": 13.44, "grad_norm": 1.6957907676696777, "learning_rate": 1.2158645674960392e-05, "loss": 0.9153, "step": 44915 }, { "epoch": 13.44, "grad_norm": 13.22221851348877, "learning_rate": 1.2153605114634387e-05, "loss": 1.1427, "step": 44920 }, { "epoch": 13.44, "grad_norm": 4.49719762802124, "learning_rate": 1.2148565263801417e-05, "loss": 1.0046, "step": 44925 }, { "epoch": 13.44, "grad_norm": 8.903334617614746, "learning_rate": 1.2143526122739832e-05, "loss": 0.9307, "step": 44930 }, { "epoch": 13.44, "grad_norm": 2.33970046043396, "learning_rate": 1.2138487691727932e-05, "loss": 1.0706, "step": 44935 }, { "epoch": 13.45, "grad_norm": 2.2916619777679443, "learning_rate": 1.2133449971043992e-05, "loss": 1.002, "step": 44940 }, { "epoch": 13.45, "grad_norm": 3.4537885189056396, "learning_rate": 1.2128412960966231e-05, "loss": 1.0616, "step": 44945 }, { "epoch": 13.45, "grad_norm": 2.979152202606201, "learning_rate": 1.2123376661772848e-05, "loss": 0.9152, "step": 44950 }, { "epoch": 13.45, "grad_norm": 1.5841301679611206, "learning_rate": 1.2118341073741993e-05, "loss": 0.9443, "step": 44955 }, { "epoch": 13.45, "grad_norm": 8.155377388000488, "learning_rate": 1.2113306197151754e-05, "loss": 0.9218, "step": 44960 }, { "epoch": 13.45, "grad_norm": 3.3958740234375, "learning_rate": 1.2108272032280226e-05, "loss": 0.9962, "step": 44965 }, { "epoch": 13.45, "grad_norm": 3.104916572570801, "learning_rate": 1.2103238579405437e-05, "loss": 1.0308, "step": 44970 }, { "epoch": 13.46, "grad_norm": 1.5764812231063843, "learning_rate": 1.2098205838805373e-05, "loss": 0.9052, "step": 44975 }, { "epoch": 13.46, "grad_norm": 1.8205498456954956, "learning_rate": 1.2093173810757991e-05, "loss": 1.1331, "step": 44980 }, { "epoch": 13.46, "grad_norm": 3.6816673278808594, "learning_rate": 1.2088142495541205e-05, "loss": 0.9301, "step": 44985 }, { "epoch": 13.46, "grad_norm": 3.829503059387207, "learning_rate": 1.2083111893432893e-05, "loss": 1.0146, "step": 44990 }, { "epoch": 13.46, "grad_norm": 2.2091660499572754, "learning_rate": 1.207808200471087e-05, "loss": 0.9556, "step": 44995 }, { "epoch": 13.46, "grad_norm": 4.129636287689209, "learning_rate": 1.2074058607558082e-05, "loss": 1.0866, "step": 45000 }, { "epoch": 13.46, "grad_norm": 3.674562454223633, "learning_rate": 1.206903000363143e-05, "loss": 1.1933, "step": 45005 }, { "epoch": 13.47, "grad_norm": 3.4031946659088135, "learning_rate": 1.2064002113868822e-05, "loss": 1.0472, "step": 45010 }, { "epoch": 13.47, "grad_norm": 4.102842330932617, "learning_rate": 1.2058974938547913e-05, "loss": 1.0977, "step": 45015 }, { "epoch": 13.47, "grad_norm": 1.0470472574234009, "learning_rate": 1.205394847794638e-05, "loss": 1.0491, "step": 45020 }, { "epoch": 13.47, "grad_norm": 4.4615702629089355, "learning_rate": 1.2048922732341802e-05, "loss": 1.0362, "step": 45025 }, { "epoch": 13.47, "grad_norm": 1.8083804845809937, "learning_rate": 1.2043897702011777e-05, "loss": 0.9582, "step": 45030 }, { "epoch": 13.47, "grad_norm": 1.3587656021118164, "learning_rate": 1.2038873387233806e-05, "loss": 1.0904, "step": 45035 }, { "epoch": 13.48, "grad_norm": 9.736397743225098, "learning_rate": 1.2033849788285387e-05, "loss": 1.0481, "step": 45040 }, { "epoch": 13.48, "grad_norm": 1.1890517473220825, "learning_rate": 1.2028826905443965e-05, "loss": 1.0538, "step": 45045 }, { "epoch": 13.48, "grad_norm": 1.4753776788711548, "learning_rate": 1.2023804738986948e-05, "loss": 1.0234, "step": 45050 }, { "epoch": 13.48, "grad_norm": 2.6727852821350098, "learning_rate": 1.2018783289191713e-05, "loss": 1.0864, "step": 45055 }, { "epoch": 13.48, "grad_norm": 3.8187108039855957, "learning_rate": 1.2013762556335578e-05, "loss": 1.0876, "step": 45060 }, { "epoch": 13.48, "grad_norm": 2.2639684677124023, "learning_rate": 1.2008742540695842e-05, "loss": 1.1417, "step": 45065 }, { "epoch": 13.48, "grad_norm": 1.7074878215789795, "learning_rate": 1.2003723242549749e-05, "loss": 0.9508, "step": 45070 }, { "epoch": 13.49, "grad_norm": 3.0342020988464355, "learning_rate": 1.1998704662174515e-05, "loss": 0.8973, "step": 45075 }, { "epoch": 13.49, "grad_norm": 2.464416980743408, "learning_rate": 1.1993686799847307e-05, "loss": 1.093, "step": 45080 }, { "epoch": 13.49, "grad_norm": 2.7546801567077637, "learning_rate": 1.1988669655845259e-05, "loss": 1.1162, "step": 45085 }, { "epoch": 13.49, "grad_norm": 7.984701156616211, "learning_rate": 1.198365323044546e-05, "loss": 0.9271, "step": 45090 }, { "epoch": 13.49, "grad_norm": 2.5185933113098145, "learning_rate": 1.1978637523924962e-05, "loss": 0.9088, "step": 45095 }, { "epoch": 13.49, "grad_norm": 5.400019645690918, "learning_rate": 1.1973622536560783e-05, "loss": 1.0487, "step": 45100 }, { "epoch": 13.49, "grad_norm": 1.2262287139892578, "learning_rate": 1.1968608268629897e-05, "loss": 0.9563, "step": 45105 }, { "epoch": 13.5, "grad_norm": 1.9510408639907837, "learning_rate": 1.1963594720409213e-05, "loss": 0.9829, "step": 45110 }, { "epoch": 13.5, "grad_norm": 3.9547598361968994, "learning_rate": 1.1958581892175664e-05, "loss": 1.0245, "step": 45115 }, { "epoch": 13.5, "grad_norm": 2.1796655654907227, "learning_rate": 1.1953569784206056e-05, "loss": 0.8948, "step": 45120 }, { "epoch": 13.5, "grad_norm": 1.8676890134811401, "learning_rate": 1.194855839677725e-05, "loss": 0.9237, "step": 45125 }, { "epoch": 13.5, "grad_norm": 2.9657278060913086, "learning_rate": 1.194354773016599e-05, "loss": 0.9703, "step": 45130 }, { "epoch": 13.5, "grad_norm": 3.7803890705108643, "learning_rate": 1.1938537784649015e-05, "loss": 0.941, "step": 45135 }, { "epoch": 13.51, "grad_norm": 4.787195205688477, "learning_rate": 1.1933528560503021e-05, "loss": 0.854, "step": 45140 }, { "epoch": 13.51, "grad_norm": 2.3021395206451416, "learning_rate": 1.1928520058004666e-05, "loss": 0.9937, "step": 45145 }, { "epoch": 13.51, "grad_norm": 3.430438995361328, "learning_rate": 1.192351227743056e-05, "loss": 1.0839, "step": 45150 }, { "epoch": 13.51, "grad_norm": 3.5404064655303955, "learning_rate": 1.1918505219057265e-05, "loss": 1.0443, "step": 45155 }, { "epoch": 13.51, "grad_norm": 1.529388427734375, "learning_rate": 1.1913498883161354e-05, "loss": 1.0485, "step": 45160 }, { "epoch": 13.51, "grad_norm": 3.046905755996704, "learning_rate": 1.1908493270019283e-05, "loss": 1.1611, "step": 45165 }, { "epoch": 13.51, "grad_norm": 2.4746837615966797, "learning_rate": 1.1903488379907524e-05, "loss": 0.9898, "step": 45170 }, { "epoch": 13.52, "grad_norm": 3.2525274753570557, "learning_rate": 1.1898484213102487e-05, "loss": 1.036, "step": 45175 }, { "epoch": 13.52, "grad_norm": 2.007542371749878, "learning_rate": 1.1893480769880549e-05, "loss": 1.0232, "step": 45180 }, { "epoch": 13.52, "grad_norm": 3.283017873764038, "learning_rate": 1.1888478050518046e-05, "loss": 1.0205, "step": 45185 }, { "epoch": 13.52, "grad_norm": 3.2330360412597656, "learning_rate": 1.188347605529127e-05, "loss": 0.9873, "step": 45190 }, { "epoch": 13.52, "grad_norm": 3.1978647708892822, "learning_rate": 1.1878474784476478e-05, "loss": 1.0942, "step": 45195 }, { "epoch": 13.52, "grad_norm": 3.234929084777832, "learning_rate": 1.1873474238349894e-05, "loss": 1.181, "step": 45200 }, { "epoch": 13.52, "grad_norm": 3.867767333984375, "learning_rate": 1.1868474417187666e-05, "loss": 0.9687, "step": 45205 }, { "epoch": 13.53, "grad_norm": 2.2236828804016113, "learning_rate": 1.1863475321265962e-05, "loss": 1.0287, "step": 45210 }, { "epoch": 13.53, "grad_norm": 3.1465940475463867, "learning_rate": 1.1858476950860848e-05, "loss": 0.9651, "step": 45215 }, { "epoch": 13.53, "grad_norm": 2.4534783363342285, "learning_rate": 1.1853479306248404e-05, "loss": 1.0121, "step": 45220 }, { "epoch": 13.53, "grad_norm": 6.127681732177734, "learning_rate": 1.1848482387704621e-05, "loss": 1.1017, "step": 45225 }, { "epoch": 13.53, "grad_norm": 2.688446283340454, "learning_rate": 1.184348619550549e-05, "loss": 1.0992, "step": 45230 }, { "epoch": 13.53, "grad_norm": 2.3378570079803467, "learning_rate": 1.1838490729926952e-05, "loss": 1.0143, "step": 45235 }, { "epoch": 13.54, "grad_norm": 2.69551944732666, "learning_rate": 1.1833495991244873e-05, "loss": 0.999, "step": 45240 }, { "epoch": 13.54, "grad_norm": 3.029266357421875, "learning_rate": 1.182850197973514e-05, "loss": 1.0098, "step": 45245 }, { "epoch": 13.54, "grad_norm": 8.008578300476074, "learning_rate": 1.1823508695673535e-05, "loss": 1.0435, "step": 45250 }, { "epoch": 13.54, "grad_norm": 1.313415765762329, "learning_rate": 1.1818516139335867e-05, "loss": 1.2993, "step": 45255 }, { "epoch": 13.54, "grad_norm": 4.881174087524414, "learning_rate": 1.1813524310997842e-05, "loss": 0.9305, "step": 45260 }, { "epoch": 13.54, "grad_norm": 2.0278892517089844, "learning_rate": 1.1808533210935164e-05, "loss": 0.9142, "step": 45265 }, { "epoch": 13.54, "grad_norm": 5.330128192901611, "learning_rate": 1.180354283942348e-05, "loss": 1.1023, "step": 45270 }, { "epoch": 13.55, "grad_norm": 1.4603615999221802, "learning_rate": 1.1798553196738415e-05, "loss": 1.04, "step": 45275 }, { "epoch": 13.55, "grad_norm": 2.043243408203125, "learning_rate": 1.1793564283155532e-05, "loss": 0.9853, "step": 45280 }, { "epoch": 13.55, "grad_norm": 3.6421613693237305, "learning_rate": 1.1788576098950365e-05, "loss": 0.9557, "step": 45285 }, { "epoch": 13.55, "grad_norm": 2.091878890991211, "learning_rate": 1.1783588644398408e-05, "loss": 1.1519, "step": 45290 }, { "epoch": 13.55, "grad_norm": 1.9971786737442017, "learning_rate": 1.1778601919775122e-05, "loss": 1.0033, "step": 45295 }, { "epoch": 13.55, "grad_norm": 0.9933714270591736, "learning_rate": 1.1773615925355891e-05, "loss": 0.9982, "step": 45300 }, { "epoch": 13.55, "grad_norm": 5.131785869598389, "learning_rate": 1.1768630661416114e-05, "loss": 1.0143, "step": 45305 }, { "epoch": 13.56, "grad_norm": 2.784761905670166, "learning_rate": 1.176364612823111e-05, "loss": 0.8749, "step": 45310 }, { "epoch": 13.56, "grad_norm": 1.629541039466858, "learning_rate": 1.1758662326076176e-05, "loss": 1.1158, "step": 45315 }, { "epoch": 13.56, "grad_norm": 3.978738784790039, "learning_rate": 1.1753679255226555e-05, "loss": 0.7877, "step": 45320 }, { "epoch": 13.56, "grad_norm": 2.3555569648742676, "learning_rate": 1.174869691595746e-05, "loss": 1.0907, "step": 45325 }, { "epoch": 13.56, "grad_norm": 2.2074787616729736, "learning_rate": 1.174371530854407e-05, "loss": 0.9258, "step": 45330 }, { "epoch": 13.56, "grad_norm": 3.912524461746216, "learning_rate": 1.1738734433261486e-05, "loss": 0.9214, "step": 45335 }, { "epoch": 13.57, "grad_norm": 3.4181976318359375, "learning_rate": 1.1733754290384833e-05, "loss": 0.9704, "step": 45340 }, { "epoch": 13.57, "grad_norm": 2.531336545944214, "learning_rate": 1.1728774880189123e-05, "loss": 1.087, "step": 45345 }, { "epoch": 13.57, "grad_norm": 3.3904941082000732, "learning_rate": 1.17237962029494e-05, "loss": 0.9672, "step": 45350 }, { "epoch": 13.57, "grad_norm": 4.165085792541504, "learning_rate": 1.1718818258940596e-05, "loss": 0.9494, "step": 45355 }, { "epoch": 13.57, "grad_norm": 3.3338541984558105, "learning_rate": 1.171384104843767e-05, "loss": 1.0561, "step": 45360 }, { "epoch": 13.57, "grad_norm": 2.241774797439575, "learning_rate": 1.1708864571715486e-05, "loss": 1.0043, "step": 45365 }, { "epoch": 13.57, "grad_norm": 3.3441967964172363, "learning_rate": 1.1703888829048895e-05, "loss": 1.0327, "step": 45370 }, { "epoch": 13.58, "grad_norm": 3.085231065750122, "learning_rate": 1.1698913820712704e-05, "loss": 0.7949, "step": 45375 }, { "epoch": 13.58, "grad_norm": 3.700368881225586, "learning_rate": 1.1693939546981678e-05, "loss": 1.0952, "step": 45380 }, { "epoch": 13.58, "grad_norm": 3.532536745071411, "learning_rate": 1.1688966008130539e-05, "loss": 0.9898, "step": 45385 }, { "epoch": 13.58, "grad_norm": 3.1939263343811035, "learning_rate": 1.1683993204433971e-05, "loss": 1.0335, "step": 45390 }, { "epoch": 13.58, "grad_norm": 2.641188144683838, "learning_rate": 1.1679021136166618e-05, "loss": 0.9966, "step": 45395 }, { "epoch": 13.58, "grad_norm": 12.577829360961914, "learning_rate": 1.1674049803603079e-05, "loss": 0.9728, "step": 45400 }, { "epoch": 13.58, "grad_norm": 5.408492565155029, "learning_rate": 1.166907920701792e-05, "loss": 1.183, "step": 45405 }, { "epoch": 13.59, "grad_norm": 15.173827171325684, "learning_rate": 1.166410934668566e-05, "loss": 1.0365, "step": 45410 }, { "epoch": 13.59, "grad_norm": 1.8098371028900146, "learning_rate": 1.1659140222880777e-05, "loss": 1.0531, "step": 45415 }, { "epoch": 13.59, "grad_norm": 3.362779378890991, "learning_rate": 1.1654171835877715e-05, "loss": 1.0122, "step": 45420 }, { "epoch": 13.59, "grad_norm": 4.117199420928955, "learning_rate": 1.1649204185950877e-05, "loss": 0.7663, "step": 45425 }, { "epoch": 13.59, "grad_norm": 4.3105621337890625, "learning_rate": 1.1644237273374595e-05, "loss": 1.0172, "step": 45430 }, { "epoch": 13.59, "grad_norm": 2.2465403079986572, "learning_rate": 1.1639271098423227e-05, "loss": 0.9232, "step": 45435 }, { "epoch": 13.6, "grad_norm": 1.8414210081100464, "learning_rate": 1.163430566137101e-05, "loss": 0.915, "step": 45440 }, { "epoch": 13.6, "grad_norm": 2.6317269802093506, "learning_rate": 1.1629340962492217e-05, "loss": 0.8813, "step": 45445 }, { "epoch": 13.6, "grad_norm": 2.4604814052581787, "learning_rate": 1.1624377002061004e-05, "loss": 1.111, "step": 45450 }, { "epoch": 13.6, "grad_norm": 2.286057710647583, "learning_rate": 1.1619413780351566e-05, "loss": 1.0006, "step": 45455 }, { "epoch": 13.6, "grad_norm": 3.36887264251709, "learning_rate": 1.1614451297637985e-05, "loss": 1.1177, "step": 45460 }, { "epoch": 13.6, "grad_norm": 3.5638434886932373, "learning_rate": 1.1609489554194348e-05, "loss": 0.9992, "step": 45465 }, { "epoch": 13.6, "grad_norm": 1.2167627811431885, "learning_rate": 1.1604528550294685e-05, "loss": 1.1973, "step": 45470 }, { "epoch": 13.61, "grad_norm": 2.9576528072357178, "learning_rate": 1.1599568286212984e-05, "loss": 0.9278, "step": 45475 }, { "epoch": 13.61, "grad_norm": 3.1728708744049072, "learning_rate": 1.15946087622232e-05, "loss": 1.1047, "step": 45480 }, { "epoch": 13.61, "grad_norm": 5.425692558288574, "learning_rate": 1.1589649978599237e-05, "loss": 0.9143, "step": 45485 }, { "epoch": 13.61, "grad_norm": 2.459667921066284, "learning_rate": 1.158469193561497e-05, "loss": 0.8807, "step": 45490 }, { "epoch": 13.61, "grad_norm": 2.123094320297241, "learning_rate": 1.157973463354422e-05, "loss": 0.945, "step": 45495 }, { "epoch": 13.61, "grad_norm": 2.2343719005584717, "learning_rate": 1.1574778072660778e-05, "loss": 1.0458, "step": 45500 }, { "epoch": 13.61, "grad_norm": 6.470086097717285, "learning_rate": 1.1569822253238385e-05, "loss": 1.0621, "step": 45505 }, { "epoch": 13.62, "grad_norm": 2.044508934020996, "learning_rate": 1.1564867175550753e-05, "loss": 1.0052, "step": 45510 }, { "epoch": 13.62, "grad_norm": 2.4018311500549316, "learning_rate": 1.1559912839871542e-05, "loss": 1.0759, "step": 45515 }, { "epoch": 13.62, "grad_norm": 3.800114154815674, "learning_rate": 1.1554959246474382e-05, "loss": 0.9216, "step": 45520 }, { "epoch": 13.62, "grad_norm": 2.723524808883667, "learning_rate": 1.155000639563283e-05, "loss": 0.9357, "step": 45525 }, { "epoch": 13.62, "grad_norm": 4.594392776489258, "learning_rate": 1.154505428762046e-05, "loss": 1.1448, "step": 45530 }, { "epoch": 13.62, "grad_norm": 2.8049912452697754, "learning_rate": 1.154010292271074e-05, "loss": 1.1228, "step": 45535 }, { "epoch": 13.63, "grad_norm": 2.90828013420105, "learning_rate": 1.1535152301177165e-05, "loss": 1.131, "step": 45540 }, { "epoch": 13.63, "grad_norm": 2.163752317428589, "learning_rate": 1.1530202423293113e-05, "loss": 0.987, "step": 45545 }, { "epoch": 13.63, "grad_norm": 2.9470038414001465, "learning_rate": 1.1525253289331997e-05, "loss": 0.8447, "step": 45550 }, { "epoch": 13.63, "grad_norm": 5.131521224975586, "learning_rate": 1.1520304899567128e-05, "loss": 1.112, "step": 45555 }, { "epoch": 13.63, "grad_norm": 2.858603000640869, "learning_rate": 1.1515357254271805e-05, "loss": 1.0796, "step": 45560 }, { "epoch": 13.63, "grad_norm": 5.054248809814453, "learning_rate": 1.1510410353719287e-05, "loss": 0.8444, "step": 45565 }, { "epoch": 13.63, "grad_norm": 1.9235126972198486, "learning_rate": 1.1505464198182784e-05, "loss": 1.1076, "step": 45570 }, { "epoch": 13.64, "grad_norm": 3.045146942138672, "learning_rate": 1.1500518787935466e-05, "loss": 0.9475, "step": 45575 }, { "epoch": 13.64, "grad_norm": 4.526063442230225, "learning_rate": 1.1495574123250462e-05, "loss": 0.8281, "step": 45580 }, { "epoch": 13.64, "grad_norm": 2.535439968109131, "learning_rate": 1.1490630204400863e-05, "loss": 1.0387, "step": 45585 }, { "epoch": 13.64, "grad_norm": 3.0743353366851807, "learning_rate": 1.1485687031659717e-05, "loss": 1.0645, "step": 45590 }, { "epoch": 13.64, "grad_norm": 3.6751158237457275, "learning_rate": 1.1480744605300026e-05, "loss": 0.9101, "step": 45595 }, { "epoch": 13.64, "grad_norm": 3.3130288124084473, "learning_rate": 1.1475802925594762e-05, "loss": 0.8764, "step": 45600 }, { "epoch": 13.64, "grad_norm": 8.780059814453125, "learning_rate": 1.1470861992816839e-05, "loss": 1.0427, "step": 45605 }, { "epoch": 13.65, "grad_norm": 3.3624203205108643, "learning_rate": 1.146592180723915e-05, "loss": 0.9371, "step": 45610 }, { "epoch": 13.65, "grad_norm": 2.7508459091186523, "learning_rate": 1.1460982369134541e-05, "loss": 0.8532, "step": 45615 }, { "epoch": 13.65, "grad_norm": 1.148845911026001, "learning_rate": 1.1456043678775783e-05, "loss": 0.992, "step": 45620 }, { "epoch": 13.65, "grad_norm": 3.8979814052581787, "learning_rate": 1.1451105736435672e-05, "loss": 1.1124, "step": 45625 }, { "epoch": 13.65, "grad_norm": 6.8672990798950195, "learning_rate": 1.144616854238689e-05, "loss": 0.8867, "step": 45630 }, { "epoch": 13.65, "grad_norm": 3.3490543365478516, "learning_rate": 1.1441232096902149e-05, "loss": 1.044, "step": 45635 }, { "epoch": 13.65, "grad_norm": 2.1857025623321533, "learning_rate": 1.1436296400254049e-05, "loss": 0.8137, "step": 45640 }, { "epoch": 13.66, "grad_norm": 3.0062873363494873, "learning_rate": 1.1431361452715218e-05, "loss": 0.9847, "step": 45645 }, { "epoch": 13.66, "grad_norm": 3.707160472869873, "learning_rate": 1.1426427254558182e-05, "loss": 1.1328, "step": 45650 }, { "epoch": 13.66, "grad_norm": 3.175178289413452, "learning_rate": 1.1421493806055455e-05, "loss": 1.0651, "step": 45655 }, { "epoch": 13.66, "grad_norm": 4.4922776222229, "learning_rate": 1.1416561107479514e-05, "loss": 1.1197, "step": 45660 }, { "epoch": 13.66, "grad_norm": 2.236589193344116, "learning_rate": 1.1411629159102785e-05, "loss": 1.0603, "step": 45665 }, { "epoch": 13.66, "grad_norm": 1.2973885536193848, "learning_rate": 1.1406697961197654e-05, "loss": 0.9378, "step": 45670 }, { "epoch": 13.67, "grad_norm": 2.1828157901763916, "learning_rate": 1.1401767514036463e-05, "loss": 0.94, "step": 45675 }, { "epoch": 13.67, "grad_norm": 4.644625186920166, "learning_rate": 1.1396837817891518e-05, "loss": 0.9499, "step": 45680 }, { "epoch": 13.67, "grad_norm": 2.0859460830688477, "learning_rate": 1.1391908873035082e-05, "loss": 0.9002, "step": 45685 }, { "epoch": 13.67, "grad_norm": 7.057641983032227, "learning_rate": 1.1386980679739373e-05, "loss": 0.917, "step": 45690 }, { "epoch": 13.67, "grad_norm": 3.8702352046966553, "learning_rate": 1.1382053238276569e-05, "loss": 0.9209, "step": 45695 }, { "epoch": 13.67, "grad_norm": 1.8291438817977905, "learning_rate": 1.1377126548918811e-05, "loss": 1.02, "step": 45700 }, { "epoch": 13.67, "grad_norm": 6.240175247192383, "learning_rate": 1.1372200611938196e-05, "loss": 0.9408, "step": 45705 }, { "epoch": 13.68, "grad_norm": 8.977832794189453, "learning_rate": 1.1367275427606774e-05, "loss": 0.8573, "step": 45710 }, { "epoch": 13.68, "grad_norm": 2.6074182987213135, "learning_rate": 1.1362350996196559e-05, "loss": 0.9426, "step": 45715 }, { "epoch": 13.68, "grad_norm": 3.5321543216705322, "learning_rate": 1.1357427317979532e-05, "loss": 1.0316, "step": 45720 }, { "epoch": 13.68, "grad_norm": 1.716353178024292, "learning_rate": 1.1352504393227598e-05, "loss": 1.0807, "step": 45725 }, { "epoch": 13.68, "grad_norm": 4.071742534637451, "learning_rate": 1.1347582222212677e-05, "loss": 0.951, "step": 45730 }, { "epoch": 13.68, "grad_norm": 2.598299741744995, "learning_rate": 1.1342660805206578e-05, "loss": 0.9232, "step": 45735 }, { "epoch": 13.68, "grad_norm": 3.115583658218384, "learning_rate": 1.1337740142481148e-05, "loss": 1.1328, "step": 45740 }, { "epoch": 13.69, "grad_norm": 4.597363471984863, "learning_rate": 1.1332820234308123e-05, "loss": 0.9854, "step": 45745 }, { "epoch": 13.69, "grad_norm": 4.575268745422363, "learning_rate": 1.1327901080959224e-05, "loss": 0.9848, "step": 45750 }, { "epoch": 13.69, "grad_norm": 4.154250144958496, "learning_rate": 1.132298268270614e-05, "loss": 0.877, "step": 45755 }, { "epoch": 13.69, "grad_norm": 2.12250018119812, "learning_rate": 1.1318065039820505e-05, "loss": 0.8998, "step": 45760 }, { "epoch": 13.69, "grad_norm": 1.1914949417114258, "learning_rate": 1.1313148152573915e-05, "loss": 0.975, "step": 45765 }, { "epoch": 13.69, "grad_norm": 2.145120143890381, "learning_rate": 1.130823202123793e-05, "loss": 1.1313, "step": 45770 }, { "epoch": 13.7, "grad_norm": 6.92742395401001, "learning_rate": 1.1303316646084055e-05, "loss": 1.184, "step": 45775 }, { "epoch": 13.7, "grad_norm": 4.461109638214111, "learning_rate": 1.1298402027383768e-05, "loss": 1.0285, "step": 45780 }, { "epoch": 13.7, "grad_norm": 3.108293294906616, "learning_rate": 1.1293488165408492e-05, "loss": 1.0309, "step": 45785 }, { "epoch": 13.7, "grad_norm": 2.175156593322754, "learning_rate": 1.1288575060429618e-05, "loss": 1.0424, "step": 45790 }, { "epoch": 13.7, "grad_norm": 2.99977707862854, "learning_rate": 1.1283662712718493e-05, "loss": 0.8261, "step": 45795 }, { "epoch": 13.7, "grad_norm": 3.166217565536499, "learning_rate": 1.1278751122546417e-05, "loss": 0.9703, "step": 45800 }, { "epoch": 13.7, "grad_norm": 4.1235032081604, "learning_rate": 1.1273840290184653e-05, "loss": 1.0018, "step": 45805 }, { "epoch": 13.71, "grad_norm": 2.862684726715088, "learning_rate": 1.1268930215904424e-05, "loss": 1.0977, "step": 45810 }, { "epoch": 13.71, "grad_norm": 3.851482629776001, "learning_rate": 1.1264020899976918e-05, "loss": 0.9958, "step": 45815 }, { "epoch": 13.71, "grad_norm": 4.1922197341918945, "learning_rate": 1.125911234267324e-05, "loss": 1.0227, "step": 45820 }, { "epoch": 13.71, "grad_norm": 2.539138078689575, "learning_rate": 1.1254204544264521e-05, "loss": 1.1639, "step": 45825 }, { "epoch": 13.71, "grad_norm": 5.336416721343994, "learning_rate": 1.1249297505021778e-05, "loss": 0.9809, "step": 45830 }, { "epoch": 13.71, "grad_norm": 2.3755998611450195, "learning_rate": 1.1244391225216061e-05, "loss": 0.9196, "step": 45835 }, { "epoch": 13.71, "grad_norm": 4.774353504180908, "learning_rate": 1.123948570511831e-05, "loss": 1.012, "step": 45840 }, { "epoch": 13.72, "grad_norm": 4.135644435882568, "learning_rate": 1.123458094499946e-05, "loss": 1.0807, "step": 45845 }, { "epoch": 13.72, "grad_norm": 2.5520973205566406, "learning_rate": 1.1229676945130394e-05, "loss": 1.0183, "step": 45850 }, { "epoch": 13.72, "grad_norm": 4.4872870445251465, "learning_rate": 1.122477370578196e-05, "loss": 0.9953, "step": 45855 }, { "epoch": 13.72, "grad_norm": 1.6755377054214478, "learning_rate": 1.1219871227224953e-05, "loss": 0.929, "step": 45860 }, { "epoch": 13.72, "grad_norm": 3.4664084911346436, "learning_rate": 1.1214969509730135e-05, "loss": 0.9963, "step": 45865 }, { "epoch": 13.72, "grad_norm": 4.792949199676514, "learning_rate": 1.1210068553568224e-05, "loss": 0.7811, "step": 45870 }, { "epoch": 13.73, "grad_norm": 4.7333550453186035, "learning_rate": 1.120516835900989e-05, "loss": 0.9869, "step": 45875 }, { "epoch": 13.73, "grad_norm": 2.1963367462158203, "learning_rate": 1.1200268926325771e-05, "loss": 1.0632, "step": 45880 }, { "epoch": 13.73, "grad_norm": 4.561999797821045, "learning_rate": 1.1195370255786455e-05, "loss": 0.9137, "step": 45885 }, { "epoch": 13.73, "grad_norm": 2.747818946838379, "learning_rate": 1.119047234766249e-05, "loss": 0.9463, "step": 45890 }, { "epoch": 13.73, "grad_norm": 3.7482149600982666, "learning_rate": 1.1185575202224383e-05, "loss": 0.8418, "step": 45895 }, { "epoch": 13.73, "grad_norm": 2.163506507873535, "learning_rate": 1.1180678819742599e-05, "loss": 1.2076, "step": 45900 }, { "epoch": 13.73, "grad_norm": 1.5832871198654175, "learning_rate": 1.1175783200487558e-05, "loss": 1.1003, "step": 45905 }, { "epoch": 13.74, "grad_norm": 4.2854413986206055, "learning_rate": 1.1170888344729652e-05, "loss": 1.0152, "step": 45910 }, { "epoch": 13.74, "grad_norm": 2.954716920852661, "learning_rate": 1.116599425273919e-05, "loss": 1.0792, "step": 45915 }, { "epoch": 13.74, "grad_norm": 2.61503005027771, "learning_rate": 1.1161100924786502e-05, "loss": 1.0473, "step": 45920 }, { "epoch": 13.74, "grad_norm": 4.037346839904785, "learning_rate": 1.115620836114181e-05, "loss": 0.996, "step": 45925 }, { "epoch": 13.74, "grad_norm": 1.7600798606872559, "learning_rate": 1.1151316562075356e-05, "loss": 0.942, "step": 45930 }, { "epoch": 13.74, "grad_norm": 2.2943618297576904, "learning_rate": 1.1146425527857274e-05, "loss": 0.9601, "step": 45935 }, { "epoch": 13.74, "grad_norm": 2.6054422855377197, "learning_rate": 1.1141535258757733e-05, "loss": 1.0357, "step": 45940 }, { "epoch": 13.75, "grad_norm": 2.9605374336242676, "learning_rate": 1.1136645755046781e-05, "loss": 0.9024, "step": 45945 }, { "epoch": 13.75, "grad_norm": 6.355667591094971, "learning_rate": 1.1131757016994476e-05, "loss": 1.1976, "step": 45950 }, { "epoch": 13.75, "grad_norm": 2.857518434524536, "learning_rate": 1.1126869044870817e-05, "loss": 1.0472, "step": 45955 }, { "epoch": 13.75, "grad_norm": 3.006471633911133, "learning_rate": 1.1121981838945747e-05, "loss": 1.1969, "step": 45960 }, { "epoch": 13.75, "grad_norm": 2.723449945449829, "learning_rate": 1.1117095399489215e-05, "loss": 0.8746, "step": 45965 }, { "epoch": 13.75, "grad_norm": 2.74454927444458, "learning_rate": 1.1112209726771067e-05, "loss": 1.1539, "step": 45970 }, { "epoch": 13.76, "grad_norm": 3.3110272884368896, "learning_rate": 1.1107324821061139e-05, "loss": 0.9179, "step": 45975 }, { "epoch": 13.76, "grad_norm": 1.4932845830917358, "learning_rate": 1.1102440682629217e-05, "loss": 1.0535, "step": 45980 }, { "epoch": 13.76, "grad_norm": 2.4937548637390137, "learning_rate": 1.1097557311745055e-05, "loss": 1.027, "step": 45985 }, { "epoch": 13.76, "grad_norm": 2.2536213397979736, "learning_rate": 1.1092674708678349e-05, "loss": 1.0212, "step": 45990 }, { "epoch": 13.76, "grad_norm": 1.4273040294647217, "learning_rate": 1.1087792873698763e-05, "loss": 1.0554, "step": 45995 }, { "epoch": 13.76, "grad_norm": 3.2895405292510986, "learning_rate": 1.1082911807075917e-05, "loss": 0.9926, "step": 46000 }, { "epoch": 13.76, "grad_norm": 2.659669876098633, "learning_rate": 1.1078031509079394e-05, "loss": 1.0621, "step": 46005 }, { "epoch": 13.77, "grad_norm": 6.099573135375977, "learning_rate": 1.1073151979978702e-05, "loss": 0.9202, "step": 46010 }, { "epoch": 13.77, "grad_norm": 3.4382877349853516, "learning_rate": 1.1068273220043367e-05, "loss": 1.0081, "step": 46015 }, { "epoch": 13.77, "grad_norm": 2.076596975326538, "learning_rate": 1.1063395229542806e-05, "loss": 1.0796, "step": 46020 }, { "epoch": 13.77, "grad_norm": 3.9843549728393555, "learning_rate": 1.1058518008746454e-05, "loss": 0.8787, "step": 46025 }, { "epoch": 13.77, "grad_norm": 4.303983688354492, "learning_rate": 1.1053641557923647e-05, "loss": 1.0372, "step": 46030 }, { "epoch": 13.77, "grad_norm": 2.91055965423584, "learning_rate": 1.1048765877343736e-05, "loss": 0.9737, "step": 46035 }, { "epoch": 13.77, "grad_norm": 3.4289169311523438, "learning_rate": 1.1043890967275978e-05, "loss": 1.1013, "step": 46040 }, { "epoch": 13.78, "grad_norm": 18.518823623657227, "learning_rate": 1.1039016827989601e-05, "loss": 1.1429, "step": 46045 }, { "epoch": 13.78, "grad_norm": 3.946131706237793, "learning_rate": 1.1034143459753835e-05, "loss": 0.9853, "step": 46050 }, { "epoch": 13.78, "grad_norm": 1.882799506187439, "learning_rate": 1.102927086283779e-05, "loss": 0.9411, "step": 46055 }, { "epoch": 13.78, "grad_norm": 4.6659393310546875, "learning_rate": 1.102439903751061e-05, "loss": 0.8427, "step": 46060 }, { "epoch": 13.78, "grad_norm": 4.819268703460693, "learning_rate": 1.1019527984041328e-05, "loss": 1.0051, "step": 46065 }, { "epoch": 13.78, "grad_norm": 1.3024616241455078, "learning_rate": 1.1014657702699003e-05, "loss": 1.1975, "step": 46070 }, { "epoch": 13.79, "grad_norm": 5.274040222167969, "learning_rate": 1.1009788193752584e-05, "loss": 1.0944, "step": 46075 }, { "epoch": 13.79, "grad_norm": 1.9087549448013306, "learning_rate": 1.1004919457471022e-05, "loss": 1.1598, "step": 46080 }, { "epoch": 13.79, "grad_norm": 5.655836582183838, "learning_rate": 1.1000051494123211e-05, "loss": 0.8013, "step": 46085 }, { "epoch": 13.79, "grad_norm": 4.382424831390381, "learning_rate": 1.0995184303978004e-05, "loss": 0.9066, "step": 46090 }, { "epoch": 13.79, "grad_norm": 4.181612968444824, "learning_rate": 1.0990317887304214e-05, "loss": 0.8979, "step": 46095 }, { "epoch": 13.79, "grad_norm": 3.6581287384033203, "learning_rate": 1.0985452244370608e-05, "loss": 0.9312, "step": 46100 }, { "epoch": 13.79, "grad_norm": 3.3705477714538574, "learning_rate": 1.0980587375445894e-05, "loss": 0.9881, "step": 46105 }, { "epoch": 13.8, "grad_norm": 1.2490730285644531, "learning_rate": 1.0975723280798783e-05, "loss": 0.9072, "step": 46110 }, { "epoch": 13.8, "grad_norm": 2.7945122718811035, "learning_rate": 1.0970859960697879e-05, "loss": 0.9368, "step": 46115 }, { "epoch": 13.8, "grad_norm": 1.166204810142517, "learning_rate": 1.0965997415411808e-05, "loss": 1.0581, "step": 46120 }, { "epoch": 13.8, "grad_norm": 1.6771597862243652, "learning_rate": 1.096113564520911e-05, "loss": 0.8542, "step": 46125 }, { "epoch": 13.8, "grad_norm": 4.801357269287109, "learning_rate": 1.09562746503583e-05, "loss": 1.0804, "step": 46130 }, { "epoch": 13.8, "grad_norm": 3.126763105392456, "learning_rate": 1.0951414431127852e-05, "loss": 0.8767, "step": 46135 }, { "epoch": 13.8, "grad_norm": 3.6435258388519287, "learning_rate": 1.0946554987786162e-05, "loss": 0.9254, "step": 46140 }, { "epoch": 13.81, "grad_norm": 3.6946301460266113, "learning_rate": 1.0941696320601652e-05, "loss": 0.8884, "step": 46145 }, { "epoch": 13.81, "grad_norm": 1.621283769607544, "learning_rate": 1.0936838429842621e-05, "loss": 1.1543, "step": 46150 }, { "epoch": 13.81, "grad_norm": 2.5034942626953125, "learning_rate": 1.0931981315777406e-05, "loss": 1.0512, "step": 46155 }, { "epoch": 13.81, "grad_norm": 2.1045899391174316, "learning_rate": 1.0927124978674225e-05, "loss": 1.0269, "step": 46160 }, { "epoch": 13.81, "grad_norm": 1.1668440103530884, "learning_rate": 1.0922269418801318e-05, "loss": 0.8998, "step": 46165 }, { "epoch": 13.81, "grad_norm": 1.8777698278427124, "learning_rate": 1.091741463642683e-05, "loss": 1.0598, "step": 46170 }, { "epoch": 13.82, "grad_norm": 2.1455204486846924, "learning_rate": 1.091256063181889e-05, "loss": 1.0727, "step": 46175 }, { "epoch": 13.82, "grad_norm": 3.745995283126831, "learning_rate": 1.0907707405245588e-05, "loss": 0.9028, "step": 46180 }, { "epoch": 13.82, "grad_norm": 4.1767096519470215, "learning_rate": 1.0902854956974957e-05, "loss": 1.0149, "step": 46185 }, { "epoch": 13.82, "grad_norm": 1.699058175086975, "learning_rate": 1.0898003287274991e-05, "loss": 0.9797, "step": 46190 }, { "epoch": 13.82, "grad_norm": 2.886626958847046, "learning_rate": 1.0893152396413655e-05, "loss": 1.0911, "step": 46195 }, { "epoch": 13.82, "grad_norm": 6.132671356201172, "learning_rate": 1.0888302284658833e-05, "loss": 0.9886, "step": 46200 }, { "epoch": 13.82, "grad_norm": 1.1873294115066528, "learning_rate": 1.0883452952278416e-05, "loss": 1.1952, "step": 46205 }, { "epoch": 13.83, "grad_norm": 2.7054784297943115, "learning_rate": 1.0878604399540219e-05, "loss": 1.0763, "step": 46210 }, { "epoch": 13.83, "grad_norm": 2.0939934253692627, "learning_rate": 1.087375662671202e-05, "loss": 0.959, "step": 46215 }, { "epoch": 13.83, "grad_norm": 2.734236717224121, "learning_rate": 1.0868909634061561e-05, "loss": 0.9568, "step": 46220 }, { "epoch": 13.83, "grad_norm": 2.2614846229553223, "learning_rate": 1.0864063421856535e-05, "loss": 1.0809, "step": 46225 }, { "epoch": 13.83, "grad_norm": 2.1451010704040527, "learning_rate": 1.08592179903646e-05, "loss": 1.1251, "step": 46230 }, { "epoch": 13.83, "grad_norm": 1.8617221117019653, "learning_rate": 1.085437333985334e-05, "loss": 0.9847, "step": 46235 }, { "epoch": 13.83, "grad_norm": 6.787776947021484, "learning_rate": 1.0849529470590358e-05, "loss": 0.777, "step": 46240 }, { "epoch": 13.84, "grad_norm": 5.022008895874023, "learning_rate": 1.0844686382843134e-05, "loss": 1.2188, "step": 46245 }, { "epoch": 13.84, "grad_norm": 3.551828622817993, "learning_rate": 1.0839844076879185e-05, "loss": 0.998, "step": 46250 }, { "epoch": 13.84, "grad_norm": 2.8129985332489014, "learning_rate": 1.0835002552965911e-05, "loss": 1.0112, "step": 46255 }, { "epoch": 13.84, "grad_norm": 5.713034629821777, "learning_rate": 1.083016181137074e-05, "loss": 0.8538, "step": 46260 }, { "epoch": 13.84, "grad_norm": 7.516810417175293, "learning_rate": 1.0825321852360995e-05, "loss": 0.889, "step": 46265 }, { "epoch": 13.84, "grad_norm": 1.1386287212371826, "learning_rate": 1.0820482676203991e-05, "loss": 1.0063, "step": 46270 }, { "epoch": 13.84, "grad_norm": 1.6292932033538818, "learning_rate": 1.081564428316699e-05, "loss": 0.9629, "step": 46275 }, { "epoch": 13.85, "grad_norm": 2.4698173999786377, "learning_rate": 1.081080667351721e-05, "loss": 0.9526, "step": 46280 }, { "epoch": 13.85, "grad_norm": 5.473732948303223, "learning_rate": 1.0805969847521829e-05, "loss": 0.8786, "step": 46285 }, { "epoch": 13.85, "grad_norm": 2.3145463466644287, "learning_rate": 1.0801133805447979e-05, "loss": 0.8604, "step": 46290 }, { "epoch": 13.85, "grad_norm": 4.11014461517334, "learning_rate": 1.0796298547562753e-05, "loss": 1.0444, "step": 46295 }, { "epoch": 13.85, "grad_norm": 2.2047159671783447, "learning_rate": 1.0791464074133189e-05, "loss": 1.0339, "step": 46300 }, { "epoch": 13.85, "grad_norm": 5.002912521362305, "learning_rate": 1.07866303854263e-05, "loss": 1.0871, "step": 46305 }, { "epoch": 13.86, "grad_norm": 2.067976713180542, "learning_rate": 1.0781797481709039e-05, "loss": 1.0389, "step": 46310 }, { "epoch": 13.86, "grad_norm": 3.528301477432251, "learning_rate": 1.0776965363248326e-05, "loss": 1.061, "step": 46315 }, { "epoch": 13.86, "grad_norm": 2.523347854614258, "learning_rate": 1.077213403031103e-05, "loss": 0.9864, "step": 46320 }, { "epoch": 13.86, "grad_norm": 4.887777328491211, "learning_rate": 1.0767303483163991e-05, "loss": 1.0392, "step": 46325 }, { "epoch": 13.86, "grad_norm": 4.290348052978516, "learning_rate": 1.0762473722073968e-05, "loss": 1.0014, "step": 46330 }, { "epoch": 13.86, "grad_norm": 5.4099931716918945, "learning_rate": 1.0757644747307744e-05, "loss": 0.898, "step": 46335 }, { "epoch": 13.86, "grad_norm": 3.631016254425049, "learning_rate": 1.0752816559131976e-05, "loss": 0.9292, "step": 46340 }, { "epoch": 13.87, "grad_norm": 2.76509428024292, "learning_rate": 1.0747989157813357e-05, "loss": 1.0867, "step": 46345 }, { "epoch": 13.87, "grad_norm": 1.9425288438796997, "learning_rate": 1.0743162543618465e-05, "loss": 1.0359, "step": 46350 }, { "epoch": 13.87, "grad_norm": 1.2764666080474854, "learning_rate": 1.0738336716813907e-05, "loss": 1.122, "step": 46355 }, { "epoch": 13.87, "grad_norm": 5.330173492431641, "learning_rate": 1.0733511677666178e-05, "loss": 1.1068, "step": 46360 }, { "epoch": 13.87, "grad_norm": 4.102222442626953, "learning_rate": 1.0728687426441769e-05, "loss": 1.0624, "step": 46365 }, { "epoch": 13.87, "grad_norm": 2.589555025100708, "learning_rate": 1.0723863963407119e-05, "loss": 0.9326, "step": 46370 }, { "epoch": 13.87, "grad_norm": 2.6629140377044678, "learning_rate": 1.0719041288828624e-05, "loss": 0.8154, "step": 46375 }, { "epoch": 13.88, "grad_norm": 3.4112234115600586, "learning_rate": 1.0714219402972633e-05, "loss": 1.1058, "step": 46380 }, { "epoch": 13.88, "grad_norm": 2.342379093170166, "learning_rate": 1.0709398306105456e-05, "loss": 1.0584, "step": 46385 }, { "epoch": 13.88, "grad_norm": 1.7157783508300781, "learning_rate": 1.070457799849336e-05, "loss": 1.0045, "step": 46390 }, { "epoch": 13.88, "grad_norm": 1.9355769157409668, "learning_rate": 1.0699758480402555e-05, "loss": 0.8319, "step": 46395 }, { "epoch": 13.88, "grad_norm": 2.4929144382476807, "learning_rate": 1.0694939752099229e-05, "loss": 0.9453, "step": 46400 }, { "epoch": 13.88, "grad_norm": 3.4646151065826416, "learning_rate": 1.069012181384951e-05, "loss": 1.0055, "step": 46405 }, { "epoch": 13.89, "grad_norm": 3.360816240310669, "learning_rate": 1.068530466591949e-05, "loss": 1.0103, "step": 46410 }, { "epoch": 13.89, "grad_norm": 7.58369255065918, "learning_rate": 1.0680488308575215e-05, "loss": 1.0717, "step": 46415 }, { "epoch": 13.89, "grad_norm": 3.2002921104431152, "learning_rate": 1.0675672742082692e-05, "loss": 0.9674, "step": 46420 }, { "epoch": 13.89, "grad_norm": 3.446225643157959, "learning_rate": 1.067085796670786e-05, "loss": 1.1751, "step": 46425 }, { "epoch": 13.89, "grad_norm": 1.5883591175079346, "learning_rate": 1.0666043982716662e-05, "loss": 1.177, "step": 46430 }, { "epoch": 13.89, "grad_norm": 5.534438133239746, "learning_rate": 1.066123079037494e-05, "loss": 0.8887, "step": 46435 }, { "epoch": 13.89, "grad_norm": 1.9987788200378418, "learning_rate": 1.0656418389948556e-05, "loss": 0.9892, "step": 46440 }, { "epoch": 13.9, "grad_norm": 1.9698108434677124, "learning_rate": 1.0651606781703256e-05, "loss": 1.0445, "step": 46445 }, { "epoch": 13.9, "grad_norm": 1.448635458946228, "learning_rate": 1.0646795965904815e-05, "loss": 1.0926, "step": 46450 }, { "epoch": 13.9, "grad_norm": 1.830094337463379, "learning_rate": 1.0641985942818907e-05, "loss": 1.0397, "step": 46455 }, { "epoch": 13.9, "grad_norm": 3.425884485244751, "learning_rate": 1.0637176712711192e-05, "loss": 1.1553, "step": 46460 }, { "epoch": 13.9, "grad_norm": 3.424363136291504, "learning_rate": 1.0632368275847276e-05, "loss": 1.083, "step": 46465 }, { "epoch": 13.9, "grad_norm": 4.42012357711792, "learning_rate": 1.0627560632492726e-05, "loss": 0.9418, "step": 46470 }, { "epoch": 13.9, "grad_norm": 1.4648548364639282, "learning_rate": 1.0622753782913062e-05, "loss": 0.844, "step": 46475 }, { "epoch": 13.91, "grad_norm": 2.15419340133667, "learning_rate": 1.0617947727373764e-05, "loss": 1.0659, "step": 46480 }, { "epoch": 13.91, "grad_norm": 1.1197208166122437, "learning_rate": 1.0613142466140264e-05, "loss": 1.0441, "step": 46485 }, { "epoch": 13.91, "grad_norm": 1.7040044069290161, "learning_rate": 1.0608337999477949e-05, "loss": 1.058, "step": 46490 }, { "epoch": 13.91, "grad_norm": 3.230975389480591, "learning_rate": 1.0603534327652167e-05, "loss": 0.8304, "step": 46495 }, { "epoch": 13.91, "grad_norm": 3.9677131175994873, "learning_rate": 1.0598731450928224e-05, "loss": 1.0955, "step": 46500 }, { "epoch": 13.91, "grad_norm": 4.740390777587891, "learning_rate": 1.059392936957137e-05, "loss": 0.8264, "step": 46505 }, { "epoch": 13.92, "grad_norm": 3.7869038581848145, "learning_rate": 1.0589128083846822e-05, "loss": 0.9756, "step": 46510 }, { "epoch": 13.92, "grad_norm": 3.1534292697906494, "learning_rate": 1.0584327594019752e-05, "loss": 1.0928, "step": 46515 }, { "epoch": 13.92, "grad_norm": 3.072333812713623, "learning_rate": 1.0579527900355286e-05, "loss": 0.8572, "step": 46520 }, { "epoch": 13.92, "grad_norm": 1.7602797746658325, "learning_rate": 1.0574729003118511e-05, "loss": 0.8316, "step": 46525 }, { "epoch": 13.92, "grad_norm": 2.0324742794036865, "learning_rate": 1.0569930902574443e-05, "loss": 0.9476, "step": 46530 }, { "epoch": 13.92, "grad_norm": 1.305268406867981, "learning_rate": 1.056513359898811e-05, "loss": 1.0128, "step": 46535 }, { "epoch": 13.92, "grad_norm": 1.4846941232681274, "learning_rate": 1.0560337092624426e-05, "loss": 1.087, "step": 46540 }, { "epoch": 13.93, "grad_norm": 1.1681172847747803, "learning_rate": 1.0555541383748332e-05, "loss": 0.988, "step": 46545 }, { "epoch": 13.93, "grad_norm": 4.223578929901123, "learning_rate": 1.0550746472624665e-05, "loss": 1.1653, "step": 46550 }, { "epoch": 13.93, "grad_norm": 1.3550422191619873, "learning_rate": 1.0545952359518251e-05, "loss": 1.0737, "step": 46555 }, { "epoch": 13.93, "grad_norm": 3.005021095275879, "learning_rate": 1.0541159044693866e-05, "loss": 0.8508, "step": 46560 }, { "epoch": 13.93, "grad_norm": 3.721085786819458, "learning_rate": 1.0536366528416236e-05, "loss": 1.0313, "step": 46565 }, { "epoch": 13.93, "grad_norm": 1.5910168886184692, "learning_rate": 1.0531574810950048e-05, "loss": 1.0548, "step": 46570 }, { "epoch": 13.93, "grad_norm": 2.5519471168518066, "learning_rate": 1.0526783892559944e-05, "loss": 1.1611, "step": 46575 }, { "epoch": 13.94, "grad_norm": 1.598804235458374, "learning_rate": 1.0521993773510522e-05, "loss": 1.0835, "step": 46580 }, { "epoch": 13.94, "grad_norm": 4.5623273849487305, "learning_rate": 1.0517204454066337e-05, "loss": 1.0025, "step": 46585 }, { "epoch": 13.94, "grad_norm": 2.7830114364624023, "learning_rate": 1.0512415934491893e-05, "loss": 0.9183, "step": 46590 }, { "epoch": 13.94, "grad_norm": 2.62373685836792, "learning_rate": 1.050762821505166e-05, "loss": 0.8919, "step": 46595 }, { "epoch": 13.94, "grad_norm": 8.385918617248535, "learning_rate": 1.0502841296010055e-05, "loss": 1.0263, "step": 46600 }, { "epoch": 13.94, "grad_norm": 1.7394742965698242, "learning_rate": 1.0498055177631458e-05, "loss": 1.0657, "step": 46605 }, { "epoch": 13.95, "grad_norm": 3.228177070617676, "learning_rate": 1.0493269860180196e-05, "loss": 0.856, "step": 46610 }, { "epoch": 13.95, "grad_norm": 3.1336793899536133, "learning_rate": 1.0488485343920564e-05, "loss": 0.968, "step": 46615 }, { "epoch": 13.95, "grad_norm": 3.7546615600585938, "learning_rate": 1.0483701629116813e-05, "loss": 0.9154, "step": 46620 }, { "epoch": 13.95, "grad_norm": 4.517045497894287, "learning_rate": 1.047891871603311e-05, "loss": 1.031, "step": 46625 }, { "epoch": 13.95, "grad_norm": 4.163939952850342, "learning_rate": 1.0474136604933654e-05, "loss": 0.9468, "step": 46630 }, { "epoch": 13.95, "grad_norm": 1.9266438484191895, "learning_rate": 1.0469355296082513e-05, "loss": 1.1067, "step": 46635 }, { "epoch": 13.95, "grad_norm": 1.395596981048584, "learning_rate": 1.0464574789743798e-05, "loss": 0.9645, "step": 46640 }, { "epoch": 13.96, "grad_norm": 3.1779944896698, "learning_rate": 1.0459795086181487e-05, "loss": 0.9891, "step": 46645 }, { "epoch": 13.96, "grad_norm": 7.933053493499756, "learning_rate": 1.04550161856596e-05, "loss": 1.0198, "step": 46650 }, { "epoch": 13.96, "grad_norm": 3.6656312942504883, "learning_rate": 1.0450238088442038e-05, "loss": 0.8881, "step": 46655 }, { "epoch": 13.96, "grad_norm": 3.141336679458618, "learning_rate": 1.0445460794792706e-05, "loss": 1.0471, "step": 46660 }, { "epoch": 13.96, "grad_norm": 1.635010004043579, "learning_rate": 1.0440684304975442e-05, "loss": 0.8864, "step": 46665 }, { "epoch": 13.96, "grad_norm": 3.4402453899383545, "learning_rate": 1.0435908619254053e-05, "loss": 1.0659, "step": 46670 }, { "epoch": 13.96, "grad_norm": 2.2001841068267822, "learning_rate": 1.0431133737892292e-05, "loss": 0.9821, "step": 46675 }, { "epoch": 13.97, "grad_norm": 2.5706429481506348, "learning_rate": 1.0426359661153873e-05, "loss": 0.8815, "step": 46680 }, { "epoch": 13.97, "grad_norm": 5.090077877044678, "learning_rate": 1.042158638930246e-05, "loss": 0.9084, "step": 46685 }, { "epoch": 13.97, "grad_norm": 4.350427150726318, "learning_rate": 1.0416813922601677e-05, "loss": 1.0049, "step": 46690 }, { "epoch": 13.97, "grad_norm": 3.4876809120178223, "learning_rate": 1.0412042261315103e-05, "loss": 1.1069, "step": 46695 }, { "epoch": 13.97, "grad_norm": 3.4440534114837646, "learning_rate": 1.0407271405706271e-05, "loss": 1.1091, "step": 46700 }, { "epoch": 13.97, "grad_norm": 2.864044427871704, "learning_rate": 1.0402501356038671e-05, "loss": 0.8457, "step": 46705 }, { "epoch": 13.98, "grad_norm": 1.3529998064041138, "learning_rate": 1.0397732112575747e-05, "loss": 1.0893, "step": 46710 }, { "epoch": 13.98, "grad_norm": 1.4070992469787598, "learning_rate": 1.0392963675580911e-05, "loss": 1.1165, "step": 46715 }, { "epoch": 13.98, "grad_norm": 3.6802985668182373, "learning_rate": 1.0388196045317488e-05, "loss": 1.1286, "step": 46720 }, { "epoch": 13.98, "grad_norm": 1.862669587135315, "learning_rate": 1.0383429222048829e-05, "loss": 0.9559, "step": 46725 }, { "epoch": 13.98, "grad_norm": 2.9887638092041016, "learning_rate": 1.0378663206038164e-05, "loss": 0.9677, "step": 46730 }, { "epoch": 13.98, "grad_norm": 3.377013921737671, "learning_rate": 1.0373897997548748e-05, "loss": 0.9216, "step": 46735 }, { "epoch": 13.98, "grad_norm": 3.8938591480255127, "learning_rate": 1.0369133596843727e-05, "loss": 1.0537, "step": 46740 }, { "epoch": 13.99, "grad_norm": 3.0918357372283936, "learning_rate": 1.0364370004186267e-05, "loss": 1.0356, "step": 46745 }, { "epoch": 13.99, "grad_norm": 4.587892055511475, "learning_rate": 1.0359607219839427e-05, "loss": 0.9786, "step": 46750 }, { "epoch": 13.99, "grad_norm": 1.8486536741256714, "learning_rate": 1.0354845244066263e-05, "loss": 0.9638, "step": 46755 }, { "epoch": 13.99, "grad_norm": 1.7416905164718628, "learning_rate": 1.0350084077129776e-05, "loss": 1.0862, "step": 46760 }, { "epoch": 13.99, "grad_norm": 2.3283634185791016, "learning_rate": 1.0345323719292915e-05, "loss": 1.0598, "step": 46765 }, { "epoch": 13.99, "grad_norm": 1.997711181640625, "learning_rate": 1.0340564170818595e-05, "loss": 1.2272, "step": 46770 }, { "epoch": 13.99, "grad_norm": 2.0410749912261963, "learning_rate": 1.0335805431969675e-05, "loss": 1.1836, "step": 46775 }, { "epoch": 14.0, "grad_norm": 2.7848238945007324, "learning_rate": 1.0331047503008981e-05, "loss": 0.9318, "step": 46780 }, { "epoch": 14.0, "grad_norm": 1.9022867679595947, "learning_rate": 1.0326290384199285e-05, "loss": 1.0139, "step": 46785 }, { "epoch": 14.0, "grad_norm": 1.689530849456787, "learning_rate": 1.0321534075803319e-05, "loss": 1.0845, "step": 46790 }, { "epoch": 14.0, "grad_norm": 4.382483005523682, "learning_rate": 1.0316778578083768e-05, "loss": 0.8879, "step": 46795 }, { "epoch": 14.0, "grad_norm": 2.087588310241699, "learning_rate": 1.0312023891303273e-05, "loss": 0.9106, "step": 46800 }, { "epoch": 14.0, "grad_norm": 1.368632197380066, "learning_rate": 1.0307270015724433e-05, "loss": 1.122, "step": 46805 }, { "epoch": 14.01, "grad_norm": 2.369845390319824, "learning_rate": 1.0302516951609806e-05, "loss": 0.9383, "step": 46810 }, { "epoch": 14.01, "grad_norm": 1.6115524768829346, "learning_rate": 1.0297764699221874e-05, "loss": 0.9586, "step": 46815 }, { "epoch": 14.01, "grad_norm": 4.875194072723389, "learning_rate": 1.0293013258823131e-05, "loss": 0.9979, "step": 46820 }, { "epoch": 14.01, "grad_norm": 2.5877294540405273, "learning_rate": 1.028826263067596e-05, "loss": 0.8455, "step": 46825 }, { "epoch": 14.01, "grad_norm": 2.146116018295288, "learning_rate": 1.0283512815042773e-05, "loss": 0.8959, "step": 46830 }, { "epoch": 14.01, "grad_norm": 3.2791755199432373, "learning_rate": 1.0278763812185857e-05, "loss": 0.9684, "step": 46835 }, { "epoch": 14.01, "grad_norm": 2.8382163047790527, "learning_rate": 1.027401562236753e-05, "loss": 1.033, "step": 46840 }, { "epoch": 14.02, "grad_norm": 2.077665090560913, "learning_rate": 1.0269268245850006e-05, "loss": 0.9306, "step": 46845 }, { "epoch": 14.02, "grad_norm": 3.7365541458129883, "learning_rate": 1.0264521682895478e-05, "loss": 0.9208, "step": 46850 }, { "epoch": 14.02, "grad_norm": 3.4815821647644043, "learning_rate": 1.0259775933766117e-05, "loss": 0.9793, "step": 46855 }, { "epoch": 14.02, "grad_norm": 3.6899566650390625, "learning_rate": 1.0255030998723991e-05, "loss": 0.9788, "step": 46860 }, { "epoch": 14.02, "grad_norm": 2.341599702835083, "learning_rate": 1.0250286878031193e-05, "loss": 1.0963, "step": 46865 }, { "epoch": 14.02, "grad_norm": 4.204500675201416, "learning_rate": 1.0245543571949704e-05, "loss": 0.9897, "step": 46870 }, { "epoch": 14.02, "grad_norm": 2.09965443611145, "learning_rate": 1.0240801080741525e-05, "loss": 1.0211, "step": 46875 }, { "epoch": 14.03, "grad_norm": 2.6598119735717773, "learning_rate": 1.0236059404668549e-05, "loss": 0.9235, "step": 46880 }, { "epoch": 14.03, "grad_norm": 1.9532800912857056, "learning_rate": 1.0231318543992669e-05, "loss": 1.044, "step": 46885 }, { "epoch": 14.03, "grad_norm": 2.8368186950683594, "learning_rate": 1.0226578498975714e-05, "loss": 0.9055, "step": 46890 }, { "epoch": 14.03, "grad_norm": 1.4692111015319824, "learning_rate": 1.0221839269879472e-05, "loss": 1.0778, "step": 46895 }, { "epoch": 14.03, "grad_norm": 2.8631324768066406, "learning_rate": 1.0217100856965687e-05, "loss": 0.9229, "step": 46900 }, { "epoch": 14.03, "grad_norm": 1.9299060106277466, "learning_rate": 1.0212363260496063e-05, "loss": 1.0288, "step": 46905 }, { "epoch": 14.03, "grad_norm": 2.9853992462158203, "learning_rate": 1.0207626480732228e-05, "loss": 0.952, "step": 46910 }, { "epoch": 14.04, "grad_norm": 2.1383092403411865, "learning_rate": 1.0202890517935824e-05, "loss": 1.1462, "step": 46915 }, { "epoch": 14.04, "grad_norm": 6.036135196685791, "learning_rate": 1.019815537236838e-05, "loss": 0.9865, "step": 46920 }, { "epoch": 14.04, "grad_norm": 4.347772598266602, "learning_rate": 1.0193421044291446e-05, "loss": 0.9413, "step": 46925 }, { "epoch": 14.04, "grad_norm": 5.3200860023498535, "learning_rate": 1.0188687533966457e-05, "loss": 0.9559, "step": 46930 }, { "epoch": 14.04, "grad_norm": 1.9751383066177368, "learning_rate": 1.0183954841654873e-05, "loss": 1.0025, "step": 46935 }, { "epoch": 14.04, "grad_norm": 2.211293935775757, "learning_rate": 1.0179222967618068e-05, "loss": 1.0879, "step": 46940 }, { "epoch": 14.05, "grad_norm": 2.816315174102783, "learning_rate": 1.0174491912117357e-05, "loss": 1.1058, "step": 46945 }, { "epoch": 14.05, "grad_norm": 3.3474440574645996, "learning_rate": 1.0169761675414064e-05, "loss": 0.8382, "step": 46950 }, { "epoch": 14.05, "grad_norm": 1.9791414737701416, "learning_rate": 1.0165032257769403e-05, "loss": 1.1009, "step": 46955 }, { "epoch": 14.05, "grad_norm": 3.0999069213867188, "learning_rate": 1.0160303659444606e-05, "loss": 0.9814, "step": 46960 }, { "epoch": 14.05, "grad_norm": 1.7832539081573486, "learning_rate": 1.0155575880700796e-05, "loss": 0.9725, "step": 46965 }, { "epoch": 14.05, "grad_norm": 2.467935800552368, "learning_rate": 1.015084892179912e-05, "loss": 0.9384, "step": 46970 }, { "epoch": 14.05, "grad_norm": 1.9563651084899902, "learning_rate": 1.0146122783000612e-05, "loss": 1.039, "step": 46975 }, { "epoch": 14.06, "grad_norm": 3.385742425918579, "learning_rate": 1.0141397464566302e-05, "loss": 1.0829, "step": 46980 }, { "epoch": 14.06, "grad_norm": 6.85827112197876, "learning_rate": 1.0136672966757166e-05, "loss": 0.9445, "step": 46985 }, { "epoch": 14.06, "grad_norm": 3.0008556842803955, "learning_rate": 1.0131949289834133e-05, "loss": 1.0382, "step": 46990 }, { "epoch": 14.06, "grad_norm": 1.650694727897644, "learning_rate": 1.0127226434058088e-05, "loss": 0.9398, "step": 46995 }, { "epoch": 14.06, "grad_norm": 2.1701951026916504, "learning_rate": 1.0122504399689863e-05, "loss": 1.0166, "step": 47000 }, { "epoch": 14.06, "grad_norm": 3.96262526512146, "learning_rate": 1.011778318699026e-05, "loss": 0.8386, "step": 47005 }, { "epoch": 14.06, "grad_norm": 3.1463615894317627, "learning_rate": 1.0113062796220021e-05, "loss": 1.0431, "step": 47010 }, { "epoch": 14.07, "grad_norm": 3.198836326599121, "learning_rate": 1.010834322763985e-05, "loss": 1.0575, "step": 47015 }, { "epoch": 14.07, "grad_norm": 2.9448750019073486, "learning_rate": 1.0103624481510404e-05, "loss": 0.9553, "step": 47020 }, { "epoch": 14.07, "grad_norm": 1.3542437553405762, "learning_rate": 1.0098906558092291e-05, "loss": 1.0053, "step": 47025 }, { "epoch": 14.07, "grad_norm": 3.5193848609924316, "learning_rate": 1.0094189457646083e-05, "loss": 0.9204, "step": 47030 }, { "epoch": 14.07, "grad_norm": 1.792724609375, "learning_rate": 1.0089473180432307e-05, "loss": 1.0258, "step": 47035 }, { "epoch": 14.07, "grad_norm": 1.6008864641189575, "learning_rate": 1.0084757726711408e-05, "loss": 0.9024, "step": 47040 }, { "epoch": 14.08, "grad_norm": 3.162278890609741, "learning_rate": 1.0080043096743857e-05, "loss": 0.9124, "step": 47045 }, { "epoch": 14.08, "grad_norm": 2.5094027519226074, "learning_rate": 1.0075329290789998e-05, "loss": 0.9806, "step": 47050 }, { "epoch": 14.08, "grad_norm": 3.3782403469085693, "learning_rate": 1.0070616309110207e-05, "loss": 1.0407, "step": 47055 }, { "epoch": 14.08, "grad_norm": 4.14473819732666, "learning_rate": 1.0065904151964738e-05, "loss": 0.8774, "step": 47060 }, { "epoch": 14.08, "grad_norm": 9.009183883666992, "learning_rate": 1.0061192819613882e-05, "loss": 1.0632, "step": 47065 }, { "epoch": 14.08, "grad_norm": 1.744873285293579, "learning_rate": 1.0056482312317809e-05, "loss": 1.0087, "step": 47070 }, { "epoch": 14.08, "grad_norm": 1.8300987482070923, "learning_rate": 1.0051772630336684e-05, "loss": 1.0234, "step": 47075 }, { "epoch": 14.09, "grad_norm": 2.92419171333313, "learning_rate": 1.0047063773930617e-05, "loss": 0.8112, "step": 47080 }, { "epoch": 14.09, "grad_norm": 2.440823793411255, "learning_rate": 1.0042355743359676e-05, "loss": 0.7468, "step": 47085 }, { "epoch": 14.09, "grad_norm": 8.091914176940918, "learning_rate": 1.0037648538883882e-05, "loss": 0.9097, "step": 47090 }, { "epoch": 14.09, "grad_norm": 5.099801063537598, "learning_rate": 1.0032942160763206e-05, "loss": 0.7116, "step": 47095 }, { "epoch": 14.09, "grad_norm": 1.9223390817642212, "learning_rate": 1.0028236609257574e-05, "loss": 1.0824, "step": 47100 }, { "epoch": 14.09, "grad_norm": 2.9645304679870605, "learning_rate": 1.0023531884626875e-05, "loss": 1.1287, "step": 47105 }, { "epoch": 14.09, "grad_norm": 4.622707366943359, "learning_rate": 1.0018827987130942e-05, "loss": 0.9873, "step": 47110 }, { "epoch": 14.1, "grad_norm": 2.609149217605591, "learning_rate": 1.0014124917029567e-05, "loss": 1.0403, "step": 47115 }, { "epoch": 14.1, "grad_norm": 1.8755382299423218, "learning_rate": 1.0009422674582497e-05, "loss": 1.0787, "step": 47120 }, { "epoch": 14.1, "grad_norm": 3.716013193130493, "learning_rate": 1.000472126004943e-05, "loss": 1.0361, "step": 47125 }, { "epoch": 14.1, "grad_norm": 4.679911136627197, "learning_rate": 1.0000020673690027e-05, "loss": 0.9316, "step": 47130 }, { "epoch": 14.1, "grad_norm": 4.787873268127441, "learning_rate": 9.995320915763878e-06, "loss": 0.9913, "step": 47135 }, { "epoch": 14.1, "grad_norm": 4.154457092285156, "learning_rate": 9.990621986530574e-06, "loss": 0.8499, "step": 47140 }, { "epoch": 14.11, "grad_norm": 2.3199286460876465, "learning_rate": 9.985923886249599e-06, "loss": 1.1649, "step": 47145 }, { "epoch": 14.11, "grad_norm": 2.379757881164551, "learning_rate": 9.981226615180456e-06, "loss": 0.9088, "step": 47150 }, { "epoch": 14.11, "grad_norm": 3.3448526859283447, "learning_rate": 9.97653017358254e-06, "loss": 1.0832, "step": 47155 }, { "epoch": 14.11, "grad_norm": 1.5010708570480347, "learning_rate": 9.971834561715265e-06, "loss": 1.0648, "step": 47160 }, { "epoch": 14.11, "grad_norm": 1.6477797031402588, "learning_rate": 9.967139779837939e-06, "loss": 0.9349, "step": 47165 }, { "epoch": 14.11, "grad_norm": 4.819705009460449, "learning_rate": 9.962445828209854e-06, "loss": 0.9833, "step": 47170 }, { "epoch": 14.11, "grad_norm": 6.116825580596924, "learning_rate": 9.957752707090257e-06, "loss": 0.9763, "step": 47175 }, { "epoch": 14.12, "grad_norm": 2.0187926292419434, "learning_rate": 9.953060416738343e-06, "loss": 1.0569, "step": 47180 }, { "epoch": 14.12, "grad_norm": 2.0881597995758057, "learning_rate": 9.94836895741326e-06, "loss": 0.8785, "step": 47185 }, { "epoch": 14.12, "grad_norm": 4.898426055908203, "learning_rate": 9.943678329374116e-06, "loss": 0.7823, "step": 47190 }, { "epoch": 14.12, "grad_norm": 3.7305986881256104, "learning_rate": 9.938988532879967e-06, "loss": 0.8913, "step": 47195 }, { "epoch": 14.12, "grad_norm": 1.356601357460022, "learning_rate": 9.934299568189826e-06, "loss": 1.0196, "step": 47200 }, { "epoch": 14.12, "grad_norm": 3.387441873550415, "learning_rate": 9.929611435562658e-06, "loss": 0.8262, "step": 47205 }, { "epoch": 14.12, "grad_norm": 2.2645153999328613, "learning_rate": 9.924924135257388e-06, "loss": 0.9682, "step": 47210 }, { "epoch": 14.13, "grad_norm": 3.1974477767944336, "learning_rate": 9.92023766753289e-06, "loss": 1.0503, "step": 47215 }, { "epoch": 14.13, "grad_norm": 1.60183584690094, "learning_rate": 9.915552032647988e-06, "loss": 0.9126, "step": 47220 }, { "epoch": 14.13, "grad_norm": 2.7414391040802, "learning_rate": 9.910867230861467e-06, "loss": 1.0307, "step": 47225 }, { "epoch": 14.13, "grad_norm": 3.494734287261963, "learning_rate": 9.906183262432068e-06, "loss": 0.9636, "step": 47230 }, { "epoch": 14.13, "grad_norm": 1.4926824569702148, "learning_rate": 9.901500127618485e-06, "loss": 0.8568, "step": 47235 }, { "epoch": 14.13, "grad_norm": 1.8363795280456543, "learning_rate": 9.896817826679338e-06, "loss": 0.9843, "step": 47240 }, { "epoch": 14.14, "grad_norm": 2.363992214202881, "learning_rate": 9.892136359873264e-06, "loss": 1.0686, "step": 47245 }, { "epoch": 14.14, "grad_norm": 1.888505458831787, "learning_rate": 9.887455727458775e-06, "loss": 1.0122, "step": 47250 }, { "epoch": 14.14, "grad_norm": 1.452127456665039, "learning_rate": 9.882775929694416e-06, "loss": 0.9163, "step": 47255 }, { "epoch": 14.14, "grad_norm": 7.370832443237305, "learning_rate": 9.878096966838617e-06, "loss": 0.8539, "step": 47260 }, { "epoch": 14.14, "grad_norm": 3.398411750793457, "learning_rate": 9.873418839149808e-06, "loss": 1.0393, "step": 47265 }, { "epoch": 14.14, "grad_norm": 2.2340104579925537, "learning_rate": 9.868741546886353e-06, "loss": 0.9541, "step": 47270 }, { "epoch": 14.14, "grad_norm": 2.5456645488739014, "learning_rate": 9.864065090306574e-06, "loss": 1.0563, "step": 47275 }, { "epoch": 14.15, "grad_norm": 1.726277470588684, "learning_rate": 9.859389469668745e-06, "loss": 1.0858, "step": 47280 }, { "epoch": 14.15, "grad_norm": 1.36215078830719, "learning_rate": 9.8547146852311e-06, "loss": 1.061, "step": 47285 }, { "epoch": 14.15, "grad_norm": 3.9773330688476562, "learning_rate": 9.85004073725182e-06, "loss": 0.7896, "step": 47290 }, { "epoch": 14.15, "grad_norm": 1.2395633459091187, "learning_rate": 9.845367625989044e-06, "loss": 1.0116, "step": 47295 }, { "epoch": 14.15, "grad_norm": 2.955395460128784, "learning_rate": 9.84069535170086e-06, "loss": 1.0423, "step": 47300 }, { "epoch": 14.15, "grad_norm": 2.663750171661377, "learning_rate": 9.836023914645313e-06, "loss": 0.9401, "step": 47305 }, { "epoch": 14.15, "grad_norm": 3.750603437423706, "learning_rate": 9.831353315080405e-06, "loss": 0.9206, "step": 47310 }, { "epoch": 14.16, "grad_norm": 4.748321056365967, "learning_rate": 9.826683553264085e-06, "loss": 1.0016, "step": 47315 }, { "epoch": 14.16, "grad_norm": 4.10500955581665, "learning_rate": 9.822014629454263e-06, "loss": 0.9215, "step": 47320 }, { "epoch": 14.16, "grad_norm": 5.371630668640137, "learning_rate": 9.817346543908796e-06, "loss": 0.9788, "step": 47325 }, { "epoch": 14.16, "grad_norm": 1.5055346488952637, "learning_rate": 9.812679296885505e-06, "loss": 0.9854, "step": 47330 }, { "epoch": 14.16, "grad_norm": 1.5663864612579346, "learning_rate": 9.808012888642132e-06, "loss": 0.9251, "step": 47335 }, { "epoch": 14.16, "grad_norm": 2.7338407039642334, "learning_rate": 9.803347319436435e-06, "loss": 0.9418, "step": 47340 }, { "epoch": 14.17, "grad_norm": 2.1328275203704834, "learning_rate": 9.798682589526052e-06, "loss": 0.8952, "step": 47345 }, { "epoch": 14.17, "grad_norm": 3.0970237255096436, "learning_rate": 9.794018699168644e-06, "loss": 0.8826, "step": 47350 }, { "epoch": 14.17, "grad_norm": 1.936923623085022, "learning_rate": 9.789355648621764e-06, "loss": 0.9455, "step": 47355 }, { "epoch": 14.17, "grad_norm": 2.612597703933716, "learning_rate": 9.784693438142975e-06, "loss": 0.9279, "step": 47360 }, { "epoch": 14.17, "grad_norm": 3.304236888885498, "learning_rate": 9.780032067989744e-06, "loss": 0.929, "step": 47365 }, { "epoch": 14.17, "grad_norm": 1.589285135269165, "learning_rate": 9.775371538419522e-06, "loss": 1.154, "step": 47370 }, { "epoch": 14.17, "grad_norm": 1.9944769144058228, "learning_rate": 9.770711849689706e-06, "loss": 0.8109, "step": 47375 }, { "epoch": 14.18, "grad_norm": 3.4619359970092773, "learning_rate": 9.766053002057646e-06, "loss": 0.9818, "step": 47380 }, { "epoch": 14.18, "grad_norm": 3.2043612003326416, "learning_rate": 9.76139499578064e-06, "loss": 1.0544, "step": 47385 }, { "epoch": 14.18, "grad_norm": 7.88495397567749, "learning_rate": 9.75673783111595e-06, "loss": 0.891, "step": 47390 }, { "epoch": 14.18, "grad_norm": 2.1868255138397217, "learning_rate": 9.752081508320785e-06, "loss": 0.9812, "step": 47395 }, { "epoch": 14.18, "grad_norm": 2.6226940155029297, "learning_rate": 9.747426027652309e-06, "loss": 0.9334, "step": 47400 }, { "epoch": 14.18, "grad_norm": 3.620140552520752, "learning_rate": 9.74277138936764e-06, "loss": 0.9, "step": 47405 }, { "epoch": 14.18, "grad_norm": 7.102359771728516, "learning_rate": 9.738117593723847e-06, "loss": 0.8913, "step": 47410 }, { "epoch": 14.19, "grad_norm": 3.3569910526275635, "learning_rate": 9.733464640977957e-06, "loss": 1.0191, "step": 47415 }, { "epoch": 14.19, "grad_norm": 3.8863701820373535, "learning_rate": 9.728812531386946e-06, "loss": 1.025, "step": 47420 }, { "epoch": 14.19, "grad_norm": 2.6565213203430176, "learning_rate": 9.724161265207755e-06, "loss": 1.0252, "step": 47425 }, { "epoch": 14.19, "grad_norm": 3.354027032852173, "learning_rate": 9.719510842697238e-06, "loss": 0.8642, "step": 47430 }, { "epoch": 14.19, "grad_norm": 3.7532005310058594, "learning_rate": 9.714861264112273e-06, "loss": 0.9739, "step": 47435 }, { "epoch": 14.19, "grad_norm": 7.380504608154297, "learning_rate": 9.710212529709618e-06, "loss": 1.0136, "step": 47440 }, { "epoch": 14.19, "grad_norm": 3.0353171825408936, "learning_rate": 9.705564639746045e-06, "loss": 1.1409, "step": 47445 }, { "epoch": 14.2, "grad_norm": 4.069791793823242, "learning_rate": 9.700917594478223e-06, "loss": 0.8443, "step": 47450 }, { "epoch": 14.2, "grad_norm": 2.922313928604126, "learning_rate": 9.69627139416284e-06, "loss": 0.9226, "step": 47455 }, { "epoch": 14.2, "grad_norm": 2.804293632507324, "learning_rate": 9.69162603905647e-06, "loss": 1.0484, "step": 47460 }, { "epoch": 14.2, "grad_norm": 1.4043070077896118, "learning_rate": 9.68698152941568e-06, "loss": 1.0361, "step": 47465 }, { "epoch": 14.2, "grad_norm": 2.1712961196899414, "learning_rate": 9.682337865496985e-06, "loss": 1.0993, "step": 47470 }, { "epoch": 14.2, "grad_norm": 3.498368740081787, "learning_rate": 9.677695047556848e-06, "loss": 0.9285, "step": 47475 }, { "epoch": 14.21, "grad_norm": 2.01509428024292, "learning_rate": 9.673053075851687e-06, "loss": 1.0765, "step": 47480 }, { "epoch": 14.21, "grad_norm": 1.8657152652740479, "learning_rate": 9.668411950637874e-06, "loss": 1.0225, "step": 47485 }, { "epoch": 14.21, "grad_norm": 3.4774529933929443, "learning_rate": 9.66377167217173e-06, "loss": 0.9132, "step": 47490 }, { "epoch": 14.21, "grad_norm": 3.322037696838379, "learning_rate": 9.659132240709536e-06, "loss": 0.9264, "step": 47495 }, { "epoch": 14.21, "grad_norm": 3.845637321472168, "learning_rate": 9.654493656507524e-06, "loss": 0.883, "step": 47500 }, { "epoch": 14.21, "grad_norm": 3.871750593185425, "learning_rate": 9.649855919821874e-06, "loss": 0.9087, "step": 47505 }, { "epoch": 14.21, "grad_norm": 6.507814407348633, "learning_rate": 9.645219030908728e-06, "loss": 0.8509, "step": 47510 }, { "epoch": 14.22, "grad_norm": 3.187875270843506, "learning_rate": 9.640582990024175e-06, "loss": 0.7997, "step": 47515 }, { "epoch": 14.22, "grad_norm": 2.1788666248321533, "learning_rate": 9.635947797424266e-06, "loss": 0.8929, "step": 47520 }, { "epoch": 14.22, "grad_norm": 1.6329278945922852, "learning_rate": 9.63131345336497e-06, "loss": 0.9739, "step": 47525 }, { "epoch": 14.22, "grad_norm": 4.640608787536621, "learning_rate": 9.62667995810228e-06, "loss": 0.8712, "step": 47530 }, { "epoch": 14.22, "grad_norm": 2.9405198097229004, "learning_rate": 9.622047311892055e-06, "loss": 0.9474, "step": 47535 }, { "epoch": 14.22, "grad_norm": 1.338927149772644, "learning_rate": 9.61741551499019e-06, "loss": 1.0767, "step": 47540 }, { "epoch": 14.22, "grad_norm": 1.2258890867233276, "learning_rate": 9.61278456765246e-06, "loss": 1.1003, "step": 47545 }, { "epoch": 14.23, "grad_norm": 1.6150643825531006, "learning_rate": 9.608154470134665e-06, "loss": 1.1114, "step": 47550 }, { "epoch": 14.23, "grad_norm": 4.458968639373779, "learning_rate": 9.603525222692486e-06, "loss": 0.9945, "step": 47555 }, { "epoch": 14.23, "grad_norm": 4.157359600067139, "learning_rate": 9.598896825581607e-06, "loss": 0.9888, "step": 47560 }, { "epoch": 14.23, "grad_norm": 3.2777342796325684, "learning_rate": 9.59426927905765e-06, "loss": 0.7707, "step": 47565 }, { "epoch": 14.23, "grad_norm": 3.0390613079071045, "learning_rate": 9.589642583376186e-06, "loss": 0.9935, "step": 47570 }, { "epoch": 14.23, "grad_norm": 3.542185068130493, "learning_rate": 9.58501673879274e-06, "loss": 0.8352, "step": 47575 }, { "epoch": 14.24, "grad_norm": 3.9005398750305176, "learning_rate": 9.580391745562802e-06, "loss": 1.0942, "step": 47580 }, { "epoch": 14.24, "grad_norm": 3.451244592666626, "learning_rate": 9.575767603941798e-06, "loss": 0.9602, "step": 47585 }, { "epoch": 14.24, "grad_norm": 1.035033941268921, "learning_rate": 9.571144314185116e-06, "loss": 1.1651, "step": 47590 }, { "epoch": 14.24, "grad_norm": 3.617086410522461, "learning_rate": 9.566521876548096e-06, "loss": 0.9759, "step": 47595 }, { "epoch": 14.24, "grad_norm": 3.698251485824585, "learning_rate": 9.561900291286032e-06, "loss": 0.9557, "step": 47600 }, { "epoch": 14.24, "grad_norm": 0.981471061706543, "learning_rate": 9.557279558654167e-06, "loss": 0.8647, "step": 47605 }, { "epoch": 14.24, "grad_norm": 1.0644017457962036, "learning_rate": 9.552659678907699e-06, "loss": 0.8904, "step": 47610 }, { "epoch": 14.25, "grad_norm": 2.2468714714050293, "learning_rate": 9.548040652301792e-06, "loss": 1.0655, "step": 47615 }, { "epoch": 14.25, "grad_norm": 3.204252004623413, "learning_rate": 9.543422479091518e-06, "loss": 1.083, "step": 47620 }, { "epoch": 14.25, "grad_norm": 3.293745279312134, "learning_rate": 9.538805159531974e-06, "loss": 1.0888, "step": 47625 }, { "epoch": 14.25, "grad_norm": 2.6639957427978516, "learning_rate": 9.534188693878131e-06, "loss": 1.0, "step": 47630 }, { "epoch": 14.25, "grad_norm": 3.4556894302368164, "learning_rate": 9.529573082384988e-06, "loss": 1.0495, "step": 47635 }, { "epoch": 14.25, "grad_norm": 2.608086347579956, "learning_rate": 9.524958325307426e-06, "loss": 0.9045, "step": 47640 }, { "epoch": 14.25, "grad_norm": 3.2534093856811523, "learning_rate": 9.520344422900348e-06, "loss": 1.1092, "step": 47645 }, { "epoch": 14.26, "grad_norm": 1.683605670928955, "learning_rate": 9.515731375418549e-06, "loss": 0.9562, "step": 47650 }, { "epoch": 14.26, "grad_norm": 4.434023857116699, "learning_rate": 9.511119183116812e-06, "loss": 0.8168, "step": 47655 }, { "epoch": 14.26, "grad_norm": 2.663111448287964, "learning_rate": 9.506507846249859e-06, "loss": 0.95, "step": 47660 }, { "epoch": 14.26, "grad_norm": 3.199991226196289, "learning_rate": 9.501897365072367e-06, "loss": 0.9858, "step": 47665 }, { "epoch": 14.26, "grad_norm": 2.4415783882141113, "learning_rate": 9.497287739838992e-06, "loss": 1.1561, "step": 47670 }, { "epoch": 14.26, "grad_norm": 4.541195392608643, "learning_rate": 9.492678970804283e-06, "loss": 0.9719, "step": 47675 }, { "epoch": 14.27, "grad_norm": 3.981882095336914, "learning_rate": 9.488071058222814e-06, "loss": 0.984, "step": 47680 }, { "epoch": 14.27, "grad_norm": 7.917405128479004, "learning_rate": 9.483464002349046e-06, "loss": 1.0035, "step": 47685 }, { "epoch": 14.27, "grad_norm": 2.864999771118164, "learning_rate": 9.478857803437433e-06, "loss": 0.9655, "step": 47690 }, { "epoch": 14.27, "grad_norm": 3.0915513038635254, "learning_rate": 9.474252461742373e-06, "loss": 1.0438, "step": 47695 }, { "epoch": 14.27, "grad_norm": 2.601701021194458, "learning_rate": 9.469647977518207e-06, "loss": 0.9761, "step": 47700 }, { "epoch": 14.27, "grad_norm": 4.57741641998291, "learning_rate": 9.465044351019243e-06, "loss": 1.025, "step": 47705 }, { "epoch": 14.27, "grad_norm": 4.853657245635986, "learning_rate": 9.460441582499729e-06, "loss": 1.1026, "step": 47710 }, { "epoch": 14.28, "grad_norm": 3.169487476348877, "learning_rate": 9.455839672213878e-06, "loss": 0.9469, "step": 47715 }, { "epoch": 14.28, "grad_norm": 2.6972315311431885, "learning_rate": 9.451238620415851e-06, "loss": 1.1723, "step": 47720 }, { "epoch": 14.28, "grad_norm": 2.2301697731018066, "learning_rate": 9.446638427359736e-06, "loss": 0.8984, "step": 47725 }, { "epoch": 14.28, "grad_norm": 2.9678409099578857, "learning_rate": 9.442039093299631e-06, "loss": 0.9494, "step": 47730 }, { "epoch": 14.28, "grad_norm": 1.5792745351791382, "learning_rate": 9.437440618489518e-06, "loss": 1.1286, "step": 47735 }, { "epoch": 14.28, "grad_norm": 2.416245222091675, "learning_rate": 9.432843003183392e-06, "loss": 0.9913, "step": 47740 }, { "epoch": 14.28, "grad_norm": 2.8561947345733643, "learning_rate": 9.428246247635176e-06, "loss": 0.8804, "step": 47745 }, { "epoch": 14.29, "grad_norm": 3.78583025932312, "learning_rate": 9.423650352098712e-06, "loss": 0.9608, "step": 47750 }, { "epoch": 14.29, "grad_norm": 2.2284066677093506, "learning_rate": 9.419055316827869e-06, "loss": 0.9273, "step": 47755 }, { "epoch": 14.29, "grad_norm": 2.1891860961914062, "learning_rate": 9.41446114207639e-06, "loss": 0.8688, "step": 47760 }, { "epoch": 14.29, "grad_norm": 7.929460048675537, "learning_rate": 9.409867828098035e-06, "loss": 0.9384, "step": 47765 }, { "epoch": 14.29, "grad_norm": 1.693267822265625, "learning_rate": 9.405275375146458e-06, "loss": 1.0242, "step": 47770 }, { "epoch": 14.29, "grad_norm": 4.085503578186035, "learning_rate": 9.40068378347533e-06, "loss": 1.039, "step": 47775 }, { "epoch": 14.3, "grad_norm": 2.429783821105957, "learning_rate": 9.396093053338218e-06, "loss": 1.0925, "step": 47780 }, { "epoch": 14.3, "grad_norm": 1.4568767547607422, "learning_rate": 9.391503184988661e-06, "loss": 0.906, "step": 47785 }, { "epoch": 14.3, "grad_norm": 2.0767431259155273, "learning_rate": 9.386914178680162e-06, "loss": 0.9838, "step": 47790 }, { "epoch": 14.3, "grad_norm": 1.0575764179229736, "learning_rate": 9.382326034666164e-06, "loss": 0.983, "step": 47795 }, { "epoch": 14.3, "grad_norm": 1.7569162845611572, "learning_rate": 9.377738753200066e-06, "loss": 0.9052, "step": 47800 }, { "epoch": 14.3, "grad_norm": 1.5973174571990967, "learning_rate": 9.373152334535216e-06, "loss": 1.0071, "step": 47805 }, { "epoch": 14.3, "grad_norm": 2.8834640979766846, "learning_rate": 9.36856677892492e-06, "loss": 0.9311, "step": 47810 }, { "epoch": 14.31, "grad_norm": 1.4674243927001953, "learning_rate": 9.363982086622442e-06, "loss": 0.9815, "step": 47815 }, { "epoch": 14.31, "grad_norm": 1.9711565971374512, "learning_rate": 9.359398257880963e-06, "loss": 0.9914, "step": 47820 }, { "epoch": 14.31, "grad_norm": 3.0938401222229004, "learning_rate": 9.35481529295367e-06, "loss": 1.0849, "step": 47825 }, { "epoch": 14.31, "grad_norm": 3.689774513244629, "learning_rate": 9.350233192093667e-06, "loss": 1.2623, "step": 47830 }, { "epoch": 14.31, "grad_norm": 11.061504364013672, "learning_rate": 9.345651955554016e-06, "loss": 0.9138, "step": 47835 }, { "epoch": 14.31, "grad_norm": 2.238264560699463, "learning_rate": 9.341071583587745e-06, "loss": 1.0129, "step": 47840 }, { "epoch": 14.31, "grad_norm": 3.2770984172821045, "learning_rate": 9.336492076447794e-06, "loss": 0.8907, "step": 47845 }, { "epoch": 14.32, "grad_norm": 2.24216890335083, "learning_rate": 9.331913434387127e-06, "loss": 1.0423, "step": 47850 }, { "epoch": 14.32, "grad_norm": 1.8534948825836182, "learning_rate": 9.327335657658573e-06, "loss": 1.002, "step": 47855 }, { "epoch": 14.32, "grad_norm": 3.518404006958008, "learning_rate": 9.322758746515e-06, "loss": 1.0224, "step": 47860 }, { "epoch": 14.32, "grad_norm": 2.3978283405303955, "learning_rate": 9.318182701209147e-06, "loss": 1.194, "step": 47865 }, { "epoch": 14.32, "grad_norm": 2.3987321853637695, "learning_rate": 9.31360752199378e-06, "loss": 0.983, "step": 47870 }, { "epoch": 14.32, "grad_norm": 4.193480491638184, "learning_rate": 9.309033209121556e-06, "loss": 0.8785, "step": 47875 }, { "epoch": 14.33, "grad_norm": 2.2980704307556152, "learning_rate": 9.30445976284512e-06, "loss": 1.0265, "step": 47880 }, { "epoch": 14.33, "grad_norm": 2.2900497913360596, "learning_rate": 9.299887183417055e-06, "loss": 0.9558, "step": 47885 }, { "epoch": 14.33, "grad_norm": 1.8453694581985474, "learning_rate": 9.295315471089903e-06, "loss": 1.0211, "step": 47890 }, { "epoch": 14.33, "grad_norm": 4.543096542358398, "learning_rate": 9.290744626116152e-06, "loss": 1.0024, "step": 47895 }, { "epoch": 14.33, "grad_norm": 4.047824859619141, "learning_rate": 9.286174648748247e-06, "loss": 1.0203, "step": 47900 }, { "epoch": 14.33, "grad_norm": 2.266964912414551, "learning_rate": 9.28160553923858e-06, "loss": 0.9312, "step": 47905 }, { "epoch": 14.33, "grad_norm": 3.8564565181732178, "learning_rate": 9.277037297839506e-06, "loss": 0.998, "step": 47910 }, { "epoch": 14.34, "grad_norm": 4.338145732879639, "learning_rate": 9.272469924803315e-06, "loss": 0.8935, "step": 47915 }, { "epoch": 14.34, "grad_norm": 2.9112906455993652, "learning_rate": 9.267903420382262e-06, "loss": 0.9426, "step": 47920 }, { "epoch": 14.34, "grad_norm": 2.4785072803497314, "learning_rate": 9.263337784828551e-06, "loss": 1.1364, "step": 47925 }, { "epoch": 14.34, "grad_norm": 2.290468692779541, "learning_rate": 9.258773018394337e-06, "loss": 0.8601, "step": 47930 }, { "epoch": 14.34, "grad_norm": 1.839494228363037, "learning_rate": 9.254209121331728e-06, "loss": 1.0367, "step": 47935 }, { "epoch": 14.34, "grad_norm": 2.684028387069702, "learning_rate": 9.249646093892783e-06, "loss": 1.0927, "step": 47940 }, { "epoch": 14.34, "grad_norm": 13.804609298706055, "learning_rate": 9.245083936329521e-06, "loss": 1.0277, "step": 47945 }, { "epoch": 14.35, "grad_norm": 1.9250423908233643, "learning_rate": 9.24052264889388e-06, "loss": 0.9594, "step": 47950 }, { "epoch": 14.35, "grad_norm": 1.3630380630493164, "learning_rate": 9.235962231837805e-06, "loss": 0.9599, "step": 47955 }, { "epoch": 14.35, "grad_norm": 3.4717135429382324, "learning_rate": 9.231402685413137e-06, "loss": 1.0512, "step": 47960 }, { "epoch": 14.35, "grad_norm": 2.14388370513916, "learning_rate": 9.226844009871723e-06, "loss": 0.8996, "step": 47965 }, { "epoch": 14.35, "grad_norm": 4.260863304138184, "learning_rate": 9.222286205465314e-06, "loss": 0.9731, "step": 47970 }, { "epoch": 14.35, "grad_norm": 1.8017550706863403, "learning_rate": 9.217729272445636e-06, "loss": 0.8765, "step": 47975 }, { "epoch": 14.36, "grad_norm": 2.0987396240234375, "learning_rate": 9.213173211064364e-06, "loss": 1.0251, "step": 47980 }, { "epoch": 14.36, "grad_norm": 2.1268539428710938, "learning_rate": 9.208618021573126e-06, "loss": 0.9544, "step": 47985 }, { "epoch": 14.36, "grad_norm": 4.74144172668457, "learning_rate": 9.2040637042235e-06, "loss": 0.9862, "step": 47990 }, { "epoch": 14.36, "grad_norm": 2.23129940032959, "learning_rate": 9.199510259267018e-06, "loss": 1.0009, "step": 47995 }, { "epoch": 14.36, "grad_norm": 2.542621612548828, "learning_rate": 9.194957686955164e-06, "loss": 0.9754, "step": 48000 }, { "epoch": 14.36, "grad_norm": 2.1914637088775635, "learning_rate": 9.190405987539363e-06, "loss": 1.049, "step": 48005 }, { "epoch": 14.36, "grad_norm": 5.300290107727051, "learning_rate": 9.185855161271009e-06, "loss": 1.0649, "step": 48010 }, { "epoch": 14.37, "grad_norm": 3.934770107269287, "learning_rate": 9.181305208401437e-06, "loss": 0.9544, "step": 48015 }, { "epoch": 14.37, "grad_norm": 2.419064521789551, "learning_rate": 9.176756129181935e-06, "loss": 0.9372, "step": 48020 }, { "epoch": 14.37, "grad_norm": 2.2804276943206787, "learning_rate": 9.172207923863746e-06, "loss": 0.92, "step": 48025 }, { "epoch": 14.37, "grad_norm": 4.2205400466918945, "learning_rate": 9.167660592698058e-06, "loss": 1.0041, "step": 48030 }, { "epoch": 14.37, "grad_norm": 5.472626209259033, "learning_rate": 9.163114135936022e-06, "loss": 1.0433, "step": 48035 }, { "epoch": 14.37, "grad_norm": 3.6936001777648926, "learning_rate": 9.158568553828737e-06, "loss": 1.1153, "step": 48040 }, { "epoch": 14.37, "grad_norm": 2.1582984924316406, "learning_rate": 9.154023846627227e-06, "loss": 0.9568, "step": 48045 }, { "epoch": 14.38, "grad_norm": 3.0322723388671875, "learning_rate": 9.149480014582529e-06, "loss": 1.1763, "step": 48050 }, { "epoch": 14.38, "grad_norm": 1.92725670337677, "learning_rate": 9.144937057945555e-06, "loss": 0.9095, "step": 48055 }, { "epoch": 14.38, "grad_norm": 1.9671467542648315, "learning_rate": 9.140394976967246e-06, "loss": 1.1279, "step": 48060 }, { "epoch": 14.38, "grad_norm": 1.5574039220809937, "learning_rate": 9.135853771898418e-06, "loss": 1.1226, "step": 48065 }, { "epoch": 14.38, "grad_norm": 2.8291425704956055, "learning_rate": 9.131313442989914e-06, "loss": 0.9447, "step": 48070 }, { "epoch": 14.38, "grad_norm": 3.1782844066619873, "learning_rate": 9.126773990492466e-06, "loss": 0.9002, "step": 48075 }, { "epoch": 14.38, "grad_norm": 1.8452153205871582, "learning_rate": 9.122235414656788e-06, "loss": 1.1123, "step": 48080 }, { "epoch": 14.39, "grad_norm": 6.0874834060668945, "learning_rate": 9.117697715733548e-06, "loss": 0.9248, "step": 48085 }, { "epoch": 14.39, "grad_norm": 2.1895172595977783, "learning_rate": 9.113160893973352e-06, "loss": 1.0326, "step": 48090 }, { "epoch": 14.39, "grad_norm": 1.827024221420288, "learning_rate": 9.108624949626767e-06, "loss": 0.9664, "step": 48095 }, { "epoch": 14.39, "grad_norm": 1.7563681602478027, "learning_rate": 9.104089882944308e-06, "loss": 0.9242, "step": 48100 }, { "epoch": 14.39, "grad_norm": 4.570804119110107, "learning_rate": 9.099555694176443e-06, "loss": 1.0313, "step": 48105 }, { "epoch": 14.39, "grad_norm": 3.724151849746704, "learning_rate": 9.095022383573587e-06, "loss": 1.0457, "step": 48110 }, { "epoch": 14.4, "grad_norm": 1.7167346477508545, "learning_rate": 9.090489951386114e-06, "loss": 1.0725, "step": 48115 }, { "epoch": 14.4, "grad_norm": 3.227980136871338, "learning_rate": 9.085958397864344e-06, "loss": 1.0361, "step": 48120 }, { "epoch": 14.4, "grad_norm": 3.609764337539673, "learning_rate": 9.081427723258552e-06, "loss": 1.0083, "step": 48125 }, { "epoch": 14.4, "grad_norm": 2.5227441787719727, "learning_rate": 9.076897927818956e-06, "loss": 1.0384, "step": 48130 }, { "epoch": 14.4, "grad_norm": 2.815845251083374, "learning_rate": 9.072369011795747e-06, "loss": 1.0781, "step": 48135 }, { "epoch": 14.4, "grad_norm": 2.0394959449768066, "learning_rate": 9.067840975439026e-06, "loss": 1.0609, "step": 48140 }, { "epoch": 14.4, "grad_norm": 2.39359712600708, "learning_rate": 9.063313818998903e-06, "loss": 0.9759, "step": 48145 }, { "epoch": 14.41, "grad_norm": 2.754600763320923, "learning_rate": 9.058787542725375e-06, "loss": 0.7847, "step": 48150 }, { "epoch": 14.41, "grad_norm": 2.4412641525268555, "learning_rate": 9.054262146868459e-06, "loss": 1.2569, "step": 48155 }, { "epoch": 14.41, "grad_norm": 3.2604317665100098, "learning_rate": 9.049737631678055e-06, "loss": 1.1841, "step": 48160 }, { "epoch": 14.41, "grad_norm": 1.3231927156448364, "learning_rate": 9.045213997404075e-06, "loss": 0.9842, "step": 48165 }, { "epoch": 14.41, "grad_norm": 1.9147082567214966, "learning_rate": 9.040691244296335e-06, "loss": 0.9722, "step": 48170 }, { "epoch": 14.41, "grad_norm": 2.9659924507141113, "learning_rate": 9.036169372604627e-06, "loss": 1.0222, "step": 48175 }, { "epoch": 14.41, "grad_norm": 2.9592783451080322, "learning_rate": 9.031648382578695e-06, "loss": 0.9292, "step": 48180 }, { "epoch": 14.42, "grad_norm": 1.969634771347046, "learning_rate": 9.027128274468221e-06, "loss": 1.1377, "step": 48185 }, { "epoch": 14.42, "grad_norm": 1.3826353549957275, "learning_rate": 9.022609048522854e-06, "loss": 1.149, "step": 48190 }, { "epoch": 14.42, "grad_norm": 3.4229960441589355, "learning_rate": 9.018090704992177e-06, "loss": 1.0666, "step": 48195 }, { "epoch": 14.42, "grad_norm": 9.103164672851562, "learning_rate": 9.013573244125742e-06, "loss": 0.9819, "step": 48200 }, { "epoch": 14.42, "grad_norm": 1.6387892961502075, "learning_rate": 9.009056666173039e-06, "loss": 1.0427, "step": 48205 }, { "epoch": 14.42, "grad_norm": 5.4994425773620605, "learning_rate": 9.00454097138351e-06, "loss": 0.9706, "step": 48210 }, { "epoch": 14.43, "grad_norm": 1.8383941650390625, "learning_rate": 9.000026160006561e-06, "loss": 1.0697, "step": 48215 }, { "epoch": 14.43, "grad_norm": 4.727527618408203, "learning_rate": 8.995512232291537e-06, "loss": 1.0467, "step": 48220 }, { "epoch": 14.43, "grad_norm": 3.8419857025146484, "learning_rate": 8.990999188487734e-06, "loss": 0.7724, "step": 48225 }, { "epoch": 14.43, "grad_norm": 1.8518542051315308, "learning_rate": 8.986487028844415e-06, "loss": 1.0587, "step": 48230 }, { "epoch": 14.43, "grad_norm": 2.0114905834198, "learning_rate": 8.981975753610756e-06, "loss": 1.127, "step": 48235 }, { "epoch": 14.43, "grad_norm": 1.4375, "learning_rate": 8.977465363035942e-06, "loss": 1.2177, "step": 48240 }, { "epoch": 14.43, "grad_norm": 4.793522834777832, "learning_rate": 8.972955857369045e-06, "loss": 0.9406, "step": 48245 }, { "epoch": 14.44, "grad_norm": 3.620276927947998, "learning_rate": 8.968447236859154e-06, "loss": 0.8755, "step": 48250 }, { "epoch": 14.44, "grad_norm": 4.639894962310791, "learning_rate": 8.963939501755242e-06, "loss": 1.0752, "step": 48255 }, { "epoch": 14.44, "grad_norm": 1.244881510734558, "learning_rate": 8.959432652306299e-06, "loss": 0.8917, "step": 48260 }, { "epoch": 14.44, "grad_norm": 3.2123985290527344, "learning_rate": 8.954926688761208e-06, "loss": 1.0363, "step": 48265 }, { "epoch": 14.44, "grad_norm": 2.0880956649780273, "learning_rate": 8.950421611368836e-06, "loss": 0.9985, "step": 48270 }, { "epoch": 14.44, "grad_norm": 5.486839771270752, "learning_rate": 8.945917420378e-06, "loss": 1.0174, "step": 48275 }, { "epoch": 14.44, "grad_norm": 2.638864040374756, "learning_rate": 8.941414116037458e-06, "loss": 0.9216, "step": 48280 }, { "epoch": 14.45, "grad_norm": 3.484980344772339, "learning_rate": 8.936911698595921e-06, "loss": 0.9612, "step": 48285 }, { "epoch": 14.45, "grad_norm": 4.663930416107178, "learning_rate": 8.932410168302052e-06, "loss": 1.0834, "step": 48290 }, { "epoch": 14.45, "grad_norm": 3.5646653175354004, "learning_rate": 8.92790952540447e-06, "loss": 0.9851, "step": 48295 }, { "epoch": 14.45, "grad_norm": 3.214585304260254, "learning_rate": 8.923409770151739e-06, "loss": 0.9698, "step": 48300 }, { "epoch": 14.45, "grad_norm": 1.7530304193496704, "learning_rate": 8.918910902792377e-06, "loss": 0.9638, "step": 48305 }, { "epoch": 14.45, "grad_norm": 3.360421895980835, "learning_rate": 8.914412923574848e-06, "loss": 0.979, "step": 48310 }, { "epoch": 14.46, "grad_norm": 2.919114589691162, "learning_rate": 8.909915832747573e-06, "loss": 0.9093, "step": 48315 }, { "epoch": 14.46, "grad_norm": 1.921119213104248, "learning_rate": 8.905419630558922e-06, "loss": 0.9799, "step": 48320 }, { "epoch": 14.46, "grad_norm": 3.1176345348358154, "learning_rate": 8.900924317257226e-06, "loss": 1.0288, "step": 48325 }, { "epoch": 14.46, "grad_norm": 2.6858246326446533, "learning_rate": 8.896429893090729e-06, "loss": 1.0427, "step": 48330 }, { "epoch": 14.46, "grad_norm": 30.713518142700195, "learning_rate": 8.891936358307685e-06, "loss": 1.0085, "step": 48335 }, { "epoch": 14.46, "grad_norm": 4.43396520614624, "learning_rate": 8.887443713156237e-06, "loss": 0.8242, "step": 48340 }, { "epoch": 14.46, "grad_norm": 3.269070625305176, "learning_rate": 8.882951957884541e-06, "loss": 1.0081, "step": 48345 }, { "epoch": 14.47, "grad_norm": 3.0732762813568115, "learning_rate": 8.87846109274064e-06, "loss": 1.0627, "step": 48350 }, { "epoch": 14.47, "grad_norm": 1.8434253931045532, "learning_rate": 8.87397111797259e-06, "loss": 0.9998, "step": 48355 }, { "epoch": 14.47, "grad_norm": 1.8734230995178223, "learning_rate": 8.869482033828347e-06, "loss": 1.0262, "step": 48360 }, { "epoch": 14.47, "grad_norm": 2.2515382766723633, "learning_rate": 8.864993840555844e-06, "loss": 1.006, "step": 48365 }, { "epoch": 14.47, "grad_norm": 1.439144253730774, "learning_rate": 8.86050653840296e-06, "loss": 0.9979, "step": 48370 }, { "epoch": 14.47, "grad_norm": 3.4755709171295166, "learning_rate": 8.856020127617524e-06, "loss": 0.8835, "step": 48375 }, { "epoch": 14.47, "grad_norm": 2.903697967529297, "learning_rate": 8.851534608447311e-06, "loss": 1.0311, "step": 48380 }, { "epoch": 14.48, "grad_norm": 2.571676731109619, "learning_rate": 8.847049981140063e-06, "loss": 0.9793, "step": 48385 }, { "epoch": 14.48, "grad_norm": 2.435349702835083, "learning_rate": 8.84256624594345e-06, "loss": 0.858, "step": 48390 }, { "epoch": 14.48, "grad_norm": 1.679379940032959, "learning_rate": 8.83808340310511e-06, "loss": 1.1234, "step": 48395 }, { "epoch": 14.48, "grad_norm": 4.435678005218506, "learning_rate": 8.833601452872625e-06, "loss": 0.9629, "step": 48400 }, { "epoch": 14.48, "grad_norm": 3.5596141815185547, "learning_rate": 8.829120395493527e-06, "loss": 1.2037, "step": 48405 }, { "epoch": 14.48, "grad_norm": 4.797013759613037, "learning_rate": 8.8246402312153e-06, "loss": 1.1643, "step": 48410 }, { "epoch": 14.49, "grad_norm": 2.486677885055542, "learning_rate": 8.820160960285382e-06, "loss": 1.0333, "step": 48415 }, { "epoch": 14.49, "grad_norm": 1.6059032678604126, "learning_rate": 8.815682582951163e-06, "loss": 1.1628, "step": 48420 }, { "epoch": 14.49, "grad_norm": 2.2335846424102783, "learning_rate": 8.811205099459954e-06, "loss": 0.9198, "step": 48425 }, { "epoch": 14.49, "grad_norm": 2.2356197834014893, "learning_rate": 8.806728510059078e-06, "loss": 1.0291, "step": 48430 }, { "epoch": 14.49, "grad_norm": 4.154965400695801, "learning_rate": 8.802252814995738e-06, "loss": 1.0695, "step": 48435 }, { "epoch": 14.49, "grad_norm": 1.8840675354003906, "learning_rate": 8.797778014517156e-06, "loss": 1.07, "step": 48440 }, { "epoch": 14.49, "grad_norm": 7.801070213317871, "learning_rate": 8.793304108870434e-06, "loss": 0.9854, "step": 48445 }, { "epoch": 14.5, "grad_norm": 2.220947027206421, "learning_rate": 8.7888310983027e-06, "loss": 1.0613, "step": 48450 }, { "epoch": 14.5, "grad_norm": 2.6094961166381836, "learning_rate": 8.784358983060964e-06, "loss": 0.9946, "step": 48455 }, { "epoch": 14.5, "grad_norm": 2.7676682472229004, "learning_rate": 8.779887763392225e-06, "loss": 1.0854, "step": 48460 }, { "epoch": 14.5, "grad_norm": 4.184953212738037, "learning_rate": 8.775417439543427e-06, "loss": 0.7902, "step": 48465 }, { "epoch": 14.5, "grad_norm": 2.03581166267395, "learning_rate": 8.770948011761456e-06, "loss": 1.0086, "step": 48470 }, { "epoch": 14.5, "grad_norm": 3.659592866897583, "learning_rate": 8.76647948029316e-06, "loss": 0.8873, "step": 48475 }, { "epoch": 14.5, "grad_norm": 4.103082656860352, "learning_rate": 8.762011845385321e-06, "loss": 1.1404, "step": 48480 }, { "epoch": 14.51, "grad_norm": 2.476278066635132, "learning_rate": 8.757545107284704e-06, "loss": 1.0754, "step": 48485 }, { "epoch": 14.51, "grad_norm": 3.953524112701416, "learning_rate": 8.753079266237978e-06, "loss": 0.9528, "step": 48490 }, { "epoch": 14.51, "grad_norm": 1.5628539323806763, "learning_rate": 8.748614322491799e-06, "loss": 0.9617, "step": 48495 }, { "epoch": 14.51, "grad_norm": 2.0729362964630127, "learning_rate": 8.744150276292759e-06, "loss": 1.0597, "step": 48500 }, { "epoch": 14.51, "grad_norm": 3.8265740871429443, "learning_rate": 8.739687127887399e-06, "loss": 1.1671, "step": 48505 }, { "epoch": 14.51, "grad_norm": 2.81380558013916, "learning_rate": 8.73522487752222e-06, "loss": 0.9367, "step": 48510 }, { "epoch": 14.52, "grad_norm": 3.2788760662078857, "learning_rate": 8.730763525443663e-06, "loss": 1.0131, "step": 48515 }, { "epoch": 14.52, "grad_norm": 1.7596465349197388, "learning_rate": 8.726303071898128e-06, "loss": 0.9415, "step": 48520 }, { "epoch": 14.52, "grad_norm": 3.689640760421753, "learning_rate": 8.721843517131967e-06, "loss": 1.0165, "step": 48525 }, { "epoch": 14.52, "grad_norm": 3.1340339183807373, "learning_rate": 8.717384861391448e-06, "loss": 0.9638, "step": 48530 }, { "epoch": 14.52, "grad_norm": 3.0869131088256836, "learning_rate": 8.712927104922859e-06, "loss": 1.1088, "step": 48535 }, { "epoch": 14.52, "grad_norm": 2.496973991394043, "learning_rate": 8.708470247972359e-06, "loss": 1.0464, "step": 48540 }, { "epoch": 14.52, "grad_norm": 3.2096173763275146, "learning_rate": 8.704014290786128e-06, "loss": 0.9417, "step": 48545 }, { "epoch": 14.53, "grad_norm": 3.4847817420959473, "learning_rate": 8.699559233610242e-06, "loss": 0.8683, "step": 48550 }, { "epoch": 14.53, "grad_norm": 2.737156867980957, "learning_rate": 8.695105076690745e-06, "loss": 0.8797, "step": 48555 }, { "epoch": 14.53, "grad_norm": 1.7218374013900757, "learning_rate": 8.690651820273668e-06, "loss": 1.0199, "step": 48560 }, { "epoch": 14.53, "grad_norm": 2.990959882736206, "learning_rate": 8.68619946460492e-06, "loss": 1.1195, "step": 48565 }, { "epoch": 14.53, "grad_norm": 2.1475510597229004, "learning_rate": 8.681748009930433e-06, "loss": 1.0958, "step": 48570 }, { "epoch": 14.53, "grad_norm": 2.3086507320404053, "learning_rate": 8.677297456496026e-06, "loss": 0.9051, "step": 48575 }, { "epoch": 14.53, "grad_norm": 1.2065564393997192, "learning_rate": 8.672847804547533e-06, "loss": 1.0571, "step": 48580 }, { "epoch": 14.54, "grad_norm": 1.4163763523101807, "learning_rate": 8.668399054330672e-06, "loss": 1.0684, "step": 48585 }, { "epoch": 14.54, "grad_norm": 3.3399434089660645, "learning_rate": 8.66395120609116e-06, "loss": 1.1536, "step": 48590 }, { "epoch": 14.54, "grad_norm": 1.9370921850204468, "learning_rate": 8.659504260074638e-06, "loss": 1.0895, "step": 48595 }, { "epoch": 14.54, "grad_norm": 2.0222585201263428, "learning_rate": 8.655058216526712e-06, "loss": 0.9891, "step": 48600 }, { "epoch": 14.54, "grad_norm": 2.3010804653167725, "learning_rate": 8.650613075692931e-06, "loss": 0.9449, "step": 48605 }, { "epoch": 14.54, "grad_norm": 2.6222808361053467, "learning_rate": 8.646168837818797e-06, "loss": 1.08, "step": 48610 }, { "epoch": 14.55, "grad_norm": 1.6273561716079712, "learning_rate": 8.641725503149762e-06, "loss": 1.0818, "step": 48615 }, { "epoch": 14.55, "grad_norm": 2.0088725090026855, "learning_rate": 8.637283071931227e-06, "loss": 0.9697, "step": 48620 }, { "epoch": 14.55, "grad_norm": 1.362270474433899, "learning_rate": 8.632841544408526e-06, "loss": 1.1185, "step": 48625 }, { "epoch": 14.55, "grad_norm": 2.453993797302246, "learning_rate": 8.62840092082699e-06, "loss": 1.0598, "step": 48630 }, { "epoch": 14.55, "grad_norm": 2.08078932762146, "learning_rate": 8.623961201431835e-06, "loss": 0.938, "step": 48635 }, { "epoch": 14.55, "grad_norm": 2.452207565307617, "learning_rate": 8.619522386468292e-06, "loss": 0.896, "step": 48640 }, { "epoch": 14.55, "grad_norm": 8.122790336608887, "learning_rate": 8.6150844761815e-06, "loss": 1.0177, "step": 48645 }, { "epoch": 14.56, "grad_norm": 2.5997066497802734, "learning_rate": 8.61064747081656e-06, "loss": 1.0295, "step": 48650 }, { "epoch": 14.56, "grad_norm": 3.2175190448760986, "learning_rate": 8.606211370618537e-06, "loss": 1.0199, "step": 48655 }, { "epoch": 14.56, "grad_norm": 2.0975887775421143, "learning_rate": 8.601776175832399e-06, "loss": 0.9438, "step": 48660 }, { "epoch": 14.56, "grad_norm": 2.3262147903442383, "learning_rate": 8.597341886703134e-06, "loss": 1.0345, "step": 48665 }, { "epoch": 14.56, "grad_norm": 1.7775872945785522, "learning_rate": 8.592908503475614e-06, "loss": 1.1137, "step": 48670 }, { "epoch": 14.56, "grad_norm": 3.6084024906158447, "learning_rate": 8.588476026394716e-06, "loss": 1.0825, "step": 48675 }, { "epoch": 14.56, "grad_norm": 3.329972982406616, "learning_rate": 8.584044455705223e-06, "loss": 1.1109, "step": 48680 }, { "epoch": 14.57, "grad_norm": 1.461459994316101, "learning_rate": 8.579613791651889e-06, "loss": 0.7492, "step": 48685 }, { "epoch": 14.57, "grad_norm": 3.0383195877075195, "learning_rate": 8.575184034479416e-06, "loss": 1.0271, "step": 48690 }, { "epoch": 14.57, "grad_norm": 1.4644750356674194, "learning_rate": 8.57075518443246e-06, "loss": 1.0969, "step": 48695 }, { "epoch": 14.57, "grad_norm": 2.943840503692627, "learning_rate": 8.566327241755617e-06, "loss": 1.1267, "step": 48700 }, { "epoch": 14.57, "grad_norm": 4.192392349243164, "learning_rate": 8.561900206693437e-06, "loss": 0.8901, "step": 48705 }, { "epoch": 14.57, "grad_norm": 1.5619021654129028, "learning_rate": 8.557474079490421e-06, "loss": 1.1158, "step": 48710 }, { "epoch": 14.57, "grad_norm": 5.234612941741943, "learning_rate": 8.553048860391025e-06, "loss": 0.8096, "step": 48715 }, { "epoch": 14.58, "grad_norm": 4.29495906829834, "learning_rate": 8.548624549639641e-06, "loss": 0.9903, "step": 48720 }, { "epoch": 14.58, "grad_norm": 3.1692752838134766, "learning_rate": 8.544201147480625e-06, "loss": 1.1217, "step": 48725 }, { "epoch": 14.58, "grad_norm": 1.6975233554840088, "learning_rate": 8.539778654158274e-06, "loss": 1.0455, "step": 48730 }, { "epoch": 14.58, "grad_norm": 3.1467435359954834, "learning_rate": 8.535357069916839e-06, "loss": 0.7778, "step": 48735 }, { "epoch": 14.58, "grad_norm": 2.9374170303344727, "learning_rate": 8.530936395000517e-06, "loss": 0.939, "step": 48740 }, { "epoch": 14.58, "grad_norm": 3.6052603721618652, "learning_rate": 8.52651662965346e-06, "loss": 0.9546, "step": 48745 }, { "epoch": 14.59, "grad_norm": 2.1928467750549316, "learning_rate": 8.522097774119775e-06, "loss": 1.1193, "step": 48750 }, { "epoch": 14.59, "grad_norm": 3.1543407440185547, "learning_rate": 8.517679828643485e-06, "loss": 0.8595, "step": 48755 }, { "epoch": 14.59, "grad_norm": 2.9056456089019775, "learning_rate": 8.513262793468623e-06, "loss": 0.9613, "step": 48760 }, { "epoch": 14.59, "grad_norm": 2.9297358989715576, "learning_rate": 8.508846668839104e-06, "loss": 0.976, "step": 48765 }, { "epoch": 14.59, "grad_norm": 2.0978548526763916, "learning_rate": 8.504431454998856e-06, "loss": 0.9262, "step": 48770 }, { "epoch": 14.59, "grad_norm": 1.7425466775894165, "learning_rate": 8.500017152191708e-06, "loss": 1.0549, "step": 48775 }, { "epoch": 14.59, "grad_norm": 3.82523250579834, "learning_rate": 8.495603760661459e-06, "loss": 1.0423, "step": 48780 }, { "epoch": 14.6, "grad_norm": 3.274580717086792, "learning_rate": 8.491191280651861e-06, "loss": 0.8973, "step": 48785 }, { "epoch": 14.6, "grad_norm": 8.596813201904297, "learning_rate": 8.486779712406605e-06, "loss": 0.9724, "step": 48790 }, { "epoch": 14.6, "grad_norm": 2.4201295375823975, "learning_rate": 8.482369056169345e-06, "loss": 0.9512, "step": 48795 }, { "epoch": 14.6, "grad_norm": 4.427967071533203, "learning_rate": 8.47795931218367e-06, "loss": 0.9629, "step": 48800 }, { "epoch": 14.6, "grad_norm": 3.9436044692993164, "learning_rate": 8.47355048069313e-06, "loss": 0.9762, "step": 48805 }, { "epoch": 14.6, "grad_norm": 4.806298732757568, "learning_rate": 8.469142561941218e-06, "loss": 1.0321, "step": 48810 }, { "epoch": 14.6, "grad_norm": 1.625897765159607, "learning_rate": 8.46473555617138e-06, "loss": 1.068, "step": 48815 }, { "epoch": 14.61, "grad_norm": 2.3483619689941406, "learning_rate": 8.46032946362701e-06, "loss": 1.077, "step": 48820 }, { "epoch": 14.61, "grad_norm": 3.5805463790893555, "learning_rate": 8.455924284551453e-06, "loss": 1.0664, "step": 48825 }, { "epoch": 14.61, "grad_norm": 1.450373649597168, "learning_rate": 8.451520019187997e-06, "loss": 1.1001, "step": 48830 }, { "epoch": 14.61, "grad_norm": 2.2781779766082764, "learning_rate": 8.447116667779892e-06, "loss": 0.9847, "step": 48835 }, { "epoch": 14.61, "grad_norm": 3.301300048828125, "learning_rate": 8.442714230570328e-06, "loss": 0.8831, "step": 48840 }, { "epoch": 14.61, "grad_norm": 2.685093879699707, "learning_rate": 8.438312707802453e-06, "loss": 0.8498, "step": 48845 }, { "epoch": 14.62, "grad_norm": 4.1189069747924805, "learning_rate": 8.433912099719337e-06, "loss": 0.9296, "step": 48850 }, { "epoch": 14.62, "grad_norm": 2.040025234222412, "learning_rate": 8.429512406564055e-06, "loss": 1.0176, "step": 48855 }, { "epoch": 14.62, "grad_norm": 2.4712417125701904, "learning_rate": 8.42511362857956e-06, "loss": 0.9175, "step": 48860 }, { "epoch": 14.62, "grad_norm": 2.281320810317993, "learning_rate": 8.420715766008826e-06, "loss": 0.8373, "step": 48865 }, { "epoch": 14.62, "grad_norm": 2.7027089595794678, "learning_rate": 8.416318819094713e-06, "loss": 0.9954, "step": 48870 }, { "epoch": 14.62, "grad_norm": 2.729827880859375, "learning_rate": 8.411922788080088e-06, "loss": 0.8774, "step": 48875 }, { "epoch": 14.62, "grad_norm": 1.878912329673767, "learning_rate": 8.407527673207718e-06, "loss": 0.9414, "step": 48880 }, { "epoch": 14.63, "grad_norm": 2.9538416862487793, "learning_rate": 8.403133474720348e-06, "loss": 0.9427, "step": 48885 }, { "epoch": 14.63, "grad_norm": 1.521445393562317, "learning_rate": 8.398740192860663e-06, "loss": 0.9906, "step": 48890 }, { "epoch": 14.63, "grad_norm": 2.062879800796509, "learning_rate": 8.394347827871302e-06, "loss": 0.8437, "step": 48895 }, { "epoch": 14.63, "grad_norm": 3.722719669342041, "learning_rate": 8.38995637999485e-06, "loss": 1.0068, "step": 48900 }, { "epoch": 14.63, "grad_norm": 3.8463211059570312, "learning_rate": 8.385565849473843e-06, "loss": 0.9874, "step": 48905 }, { "epoch": 14.63, "grad_norm": 2.462646484375, "learning_rate": 8.381176236550764e-06, "loss": 1.0429, "step": 48910 }, { "epoch": 14.63, "grad_norm": 2.3265976905822754, "learning_rate": 8.376787541468045e-06, "loss": 1.1793, "step": 48915 }, { "epoch": 14.64, "grad_norm": 4.777642726898193, "learning_rate": 8.372399764468076e-06, "loss": 0.9058, "step": 48920 }, { "epoch": 14.64, "grad_norm": 2.799137592315674, "learning_rate": 8.36801290579318e-06, "loss": 0.9833, "step": 48925 }, { "epoch": 14.64, "grad_norm": 1.032945156097412, "learning_rate": 8.363626965685645e-06, "loss": 1.1707, "step": 48930 }, { "epoch": 14.64, "grad_norm": 2.207265853881836, "learning_rate": 8.359241944387699e-06, "loss": 0.9743, "step": 48935 }, { "epoch": 14.64, "grad_norm": 2.7506961822509766, "learning_rate": 8.354857842141533e-06, "loss": 0.9348, "step": 48940 }, { "epoch": 14.64, "grad_norm": 1.6580191850662231, "learning_rate": 8.35047465918925e-06, "loss": 1.0558, "step": 48945 }, { "epoch": 14.65, "grad_norm": 4.469571590423584, "learning_rate": 8.34609239577296e-06, "loss": 1.0138, "step": 48950 }, { "epoch": 14.65, "grad_norm": 2.21535325050354, "learning_rate": 8.341711052134663e-06, "loss": 1.0124, "step": 48955 }, { "epoch": 14.65, "grad_norm": 5.841526031494141, "learning_rate": 8.337330628516363e-06, "loss": 1.0061, "step": 48960 }, { "epoch": 14.65, "grad_norm": 2.309290647506714, "learning_rate": 8.332951125159957e-06, "loss": 0.9943, "step": 48965 }, { "epoch": 14.65, "grad_norm": 5.338931560516357, "learning_rate": 8.328572542307353e-06, "loss": 0.9796, "step": 48970 }, { "epoch": 14.65, "grad_norm": 4.652040958404541, "learning_rate": 8.324194880200348e-06, "loss": 0.7701, "step": 48975 }, { "epoch": 14.65, "grad_norm": 4.739422798156738, "learning_rate": 8.319818139080728e-06, "loss": 1.0848, "step": 48980 }, { "epoch": 14.66, "grad_norm": 1.5917638540267944, "learning_rate": 8.315442319190214e-06, "loss": 0.9494, "step": 48985 }, { "epoch": 14.66, "grad_norm": 1.3649824857711792, "learning_rate": 8.31106742077048e-06, "loss": 1.1455, "step": 48990 }, { "epoch": 14.66, "grad_norm": 1.3612852096557617, "learning_rate": 8.30669344406314e-06, "loss": 0.9826, "step": 48995 }, { "epoch": 14.66, "grad_norm": 4.043242454528809, "learning_rate": 8.302320389309776e-06, "loss": 0.8944, "step": 49000 }, { "epoch": 14.66, "grad_norm": 4.382472991943359, "learning_rate": 8.297948256751897e-06, "loss": 0.9066, "step": 49005 }, { "epoch": 14.66, "grad_norm": 1.3883436918258667, "learning_rate": 8.293577046630973e-06, "loss": 1.0056, "step": 49010 }, { "epoch": 14.66, "grad_norm": 2.2432875633239746, "learning_rate": 8.289206759188426e-06, "loss": 1.1799, "step": 49015 }, { "epoch": 14.67, "grad_norm": 2.0689566135406494, "learning_rate": 8.28483739466562e-06, "loss": 0.9735, "step": 49020 }, { "epoch": 14.67, "grad_norm": 2.059446096420288, "learning_rate": 8.280468953303868e-06, "loss": 0.9371, "step": 49025 }, { "epoch": 14.67, "grad_norm": 2.9364001750946045, "learning_rate": 8.276101435344441e-06, "loss": 1.0071, "step": 49030 }, { "epoch": 14.67, "grad_norm": 2.926053524017334, "learning_rate": 8.271734841028553e-06, "loss": 0.8205, "step": 49035 }, { "epoch": 14.67, "grad_norm": 1.4417191743850708, "learning_rate": 8.267369170597345e-06, "loss": 0.9981, "step": 49040 }, { "epoch": 14.67, "grad_norm": 6.5991973876953125, "learning_rate": 8.263004424291962e-06, "loss": 0.7934, "step": 49045 }, { "epoch": 14.68, "grad_norm": 3.2373931407928467, "learning_rate": 8.258640602353432e-06, "loss": 0.9667, "step": 49050 }, { "epoch": 14.68, "grad_norm": 2.341061592102051, "learning_rate": 8.254277705022795e-06, "loss": 1.0543, "step": 49055 }, { "epoch": 14.68, "grad_norm": 2.1643905639648438, "learning_rate": 8.24991573254098e-06, "loss": 0.9737, "step": 49060 }, { "epoch": 14.68, "grad_norm": 4.566475868225098, "learning_rate": 8.245554685148924e-06, "loss": 0.9937, "step": 49065 }, { "epoch": 14.68, "grad_norm": 3.2137701511383057, "learning_rate": 8.241194563087456e-06, "loss": 1.0407, "step": 49070 }, { "epoch": 14.68, "grad_norm": 1.9922786951065063, "learning_rate": 8.236835366597397e-06, "loss": 1.1503, "step": 49075 }, { "epoch": 14.68, "grad_norm": 6.491326808929443, "learning_rate": 8.232477095919495e-06, "loss": 0.8255, "step": 49080 }, { "epoch": 14.69, "grad_norm": 3.8574929237365723, "learning_rate": 8.228119751294452e-06, "loss": 0.8979, "step": 49085 }, { "epoch": 14.69, "grad_norm": 1.933752417564392, "learning_rate": 8.223763332962925e-06, "loss": 0.992, "step": 49090 }, { "epoch": 14.69, "grad_norm": 1.47145676612854, "learning_rate": 8.219407841165508e-06, "loss": 1.1588, "step": 49095 }, { "epoch": 14.69, "grad_norm": 1.7857855558395386, "learning_rate": 8.215053276142756e-06, "loss": 0.9984, "step": 49100 }, { "epoch": 14.69, "grad_norm": 3.7101316452026367, "learning_rate": 8.210699638135164e-06, "loss": 0.933, "step": 49105 }, { "epoch": 14.69, "grad_norm": 3.045780658721924, "learning_rate": 8.20634692738318e-06, "loss": 0.8924, "step": 49110 }, { "epoch": 14.69, "grad_norm": 4.745621204376221, "learning_rate": 8.201995144127198e-06, "loss": 0.9093, "step": 49115 }, { "epoch": 14.7, "grad_norm": 1.8852388858795166, "learning_rate": 8.197644288607562e-06, "loss": 1.1749, "step": 49120 }, { "epoch": 14.7, "grad_norm": 2.5533406734466553, "learning_rate": 8.193294361064569e-06, "loss": 1.1027, "step": 49125 }, { "epoch": 14.7, "grad_norm": 1.9613593816757202, "learning_rate": 8.188945361738468e-06, "loss": 0.9726, "step": 49130 }, { "epoch": 14.7, "grad_norm": 2.30924916267395, "learning_rate": 8.184597290869423e-06, "loss": 1.0676, "step": 49135 }, { "epoch": 14.7, "grad_norm": 7.164217472076416, "learning_rate": 8.180250148697605e-06, "loss": 0.9149, "step": 49140 }, { "epoch": 14.7, "grad_norm": 1.656010627746582, "learning_rate": 8.175903935463076e-06, "loss": 1.0562, "step": 49145 }, { "epoch": 14.71, "grad_norm": 2.542595863342285, "learning_rate": 8.171558651405897e-06, "loss": 1.0277, "step": 49150 }, { "epoch": 14.71, "grad_norm": 2.8253142833709717, "learning_rate": 8.16721429676603e-06, "loss": 1.0583, "step": 49155 }, { "epoch": 14.71, "grad_norm": 4.2701945304870605, "learning_rate": 8.162870871783435e-06, "loss": 0.9444, "step": 49160 }, { "epoch": 14.71, "grad_norm": 3.9655089378356934, "learning_rate": 8.15852837669797e-06, "loss": 0.9712, "step": 49165 }, { "epoch": 14.71, "grad_norm": 3.8803188800811768, "learning_rate": 8.154186811749479e-06, "loss": 0.8939, "step": 49170 }, { "epoch": 14.71, "grad_norm": 3.9758951663970947, "learning_rate": 8.14984617717774e-06, "loss": 0.9559, "step": 49175 }, { "epoch": 14.71, "grad_norm": 1.9809141159057617, "learning_rate": 8.145506473222481e-06, "loss": 1.0336, "step": 49180 }, { "epoch": 14.72, "grad_norm": 3.15104079246521, "learning_rate": 8.14116770012338e-06, "loss": 1.1105, "step": 49185 }, { "epoch": 14.72, "grad_norm": 3.349479913711548, "learning_rate": 8.136829858120066e-06, "loss": 1.0803, "step": 49190 }, { "epoch": 14.72, "grad_norm": 3.6429026126861572, "learning_rate": 8.132492947452108e-06, "loss": 0.9848, "step": 49195 }, { "epoch": 14.72, "grad_norm": 2.910015106201172, "learning_rate": 8.128156968359035e-06, "loss": 0.9665, "step": 49200 }, { "epoch": 14.72, "grad_norm": 6.490506172180176, "learning_rate": 8.123821921080313e-06, "loss": 0.8624, "step": 49205 }, { "epoch": 14.72, "grad_norm": 1.679452896118164, "learning_rate": 8.119487805855364e-06, "loss": 0.9264, "step": 49210 }, { "epoch": 14.72, "grad_norm": 5.132246971130371, "learning_rate": 8.115154622923556e-06, "loss": 0.8382, "step": 49215 }, { "epoch": 14.73, "grad_norm": 1.5223355293273926, "learning_rate": 8.11082237252421e-06, "loss": 1.04, "step": 49220 }, { "epoch": 14.73, "grad_norm": 2.475334644317627, "learning_rate": 8.10649105489659e-06, "loss": 1.0027, "step": 49225 }, { "epoch": 14.73, "grad_norm": 1.9048575162887573, "learning_rate": 8.102160670279906e-06, "loss": 1.0384, "step": 49230 }, { "epoch": 14.73, "grad_norm": 2.135935068130493, "learning_rate": 8.097831218913333e-06, "loss": 1.0354, "step": 49235 }, { "epoch": 14.73, "grad_norm": 1.7497014999389648, "learning_rate": 8.093502701035957e-06, "loss": 0.8898, "step": 49240 }, { "epoch": 14.73, "grad_norm": 2.735699415206909, "learning_rate": 8.08917511688687e-06, "loss": 0.9877, "step": 49245 }, { "epoch": 14.74, "grad_norm": 1.427288293838501, "learning_rate": 8.084848466705048e-06, "loss": 0.8119, "step": 49250 }, { "epoch": 14.74, "grad_norm": 3.9717910289764404, "learning_rate": 8.080522750729477e-06, "loss": 0.9547, "step": 49255 }, { "epoch": 14.74, "grad_norm": 4.33355188369751, "learning_rate": 8.07619796919904e-06, "loss": 1.0372, "step": 49260 }, { "epoch": 14.74, "grad_norm": 4.518667697906494, "learning_rate": 8.071874122352598e-06, "loss": 0.9114, "step": 49265 }, { "epoch": 14.74, "grad_norm": 3.398543119430542, "learning_rate": 8.067551210428952e-06, "loss": 0.9405, "step": 49270 }, { "epoch": 14.74, "grad_norm": 4.380087852478027, "learning_rate": 8.063229233666854e-06, "loss": 0.9915, "step": 49275 }, { "epoch": 14.74, "grad_norm": 1.5817692279815674, "learning_rate": 8.058908192305e-06, "loss": 1.1618, "step": 49280 }, { "epoch": 14.75, "grad_norm": 2.916731834411621, "learning_rate": 8.054588086582029e-06, "loss": 0.9583, "step": 49285 }, { "epoch": 14.75, "grad_norm": 1.46005117893219, "learning_rate": 8.050268916736561e-06, "loss": 0.9385, "step": 49290 }, { "epoch": 14.75, "grad_norm": 4.392984390258789, "learning_rate": 8.045950683007115e-06, "loss": 0.9728, "step": 49295 }, { "epoch": 14.75, "grad_norm": 1.823644995689392, "learning_rate": 8.041633385632186e-06, "loss": 1.1487, "step": 49300 }, { "epoch": 14.75, "grad_norm": 5.119616508483887, "learning_rate": 8.037317024850222e-06, "loss": 1.0758, "step": 49305 }, { "epoch": 14.75, "grad_norm": 5.690764904022217, "learning_rate": 8.033001600899606e-06, "loss": 0.8963, "step": 49310 }, { "epoch": 14.75, "grad_norm": 3.6248810291290283, "learning_rate": 8.028687114018674e-06, "loss": 0.8885, "step": 49315 }, { "epoch": 14.76, "grad_norm": 2.2621960639953613, "learning_rate": 8.024373564445714e-06, "loss": 1.0696, "step": 49320 }, { "epoch": 14.76, "grad_norm": 4.6661601066589355, "learning_rate": 8.020060952418956e-06, "loss": 1.1138, "step": 49325 }, { "epoch": 14.76, "grad_norm": 2.248114824295044, "learning_rate": 8.01574927817659e-06, "loss": 1.1691, "step": 49330 }, { "epoch": 14.76, "grad_norm": 1.4611231088638306, "learning_rate": 8.01143854195672e-06, "loss": 1.1069, "step": 49335 }, { "epoch": 14.76, "grad_norm": 1.7469176054000854, "learning_rate": 8.007128743997457e-06, "loss": 1.0006, "step": 49340 }, { "epoch": 14.76, "grad_norm": 1.546489953994751, "learning_rate": 8.002819884536797e-06, "loss": 0.9871, "step": 49345 }, { "epoch": 14.76, "grad_norm": 3.91237473487854, "learning_rate": 7.998511963812741e-06, "loss": 0.9547, "step": 49350 }, { "epoch": 14.77, "grad_norm": 2.6893093585968018, "learning_rate": 7.99420498206318e-06, "loss": 0.81, "step": 49355 }, { "epoch": 14.77, "grad_norm": 3.268592119216919, "learning_rate": 7.989898939526018e-06, "loss": 1.152, "step": 49360 }, { "epoch": 14.77, "grad_norm": 3.8002939224243164, "learning_rate": 7.985593836439051e-06, "loss": 1.1162, "step": 49365 }, { "epoch": 14.77, "grad_norm": 2.8874077796936035, "learning_rate": 7.981289673040041e-06, "loss": 0.97, "step": 49370 }, { "epoch": 14.77, "grad_norm": 3.9415621757507324, "learning_rate": 7.976986449566728e-06, "loss": 1.2578, "step": 49375 }, { "epoch": 14.77, "grad_norm": 1.1208429336547852, "learning_rate": 7.972684166256744e-06, "loss": 0.9146, "step": 49380 }, { "epoch": 14.78, "grad_norm": 4.455554962158203, "learning_rate": 7.968382823347731e-06, "loss": 0.9225, "step": 49385 }, { "epoch": 14.78, "grad_norm": 3.3006980419158936, "learning_rate": 7.964082421077223e-06, "loss": 0.92, "step": 49390 }, { "epoch": 14.78, "grad_norm": 1.741047739982605, "learning_rate": 7.959782959682732e-06, "loss": 0.919, "step": 49395 }, { "epoch": 14.78, "grad_norm": 4.698997497558594, "learning_rate": 7.95548443940172e-06, "loss": 1.0252, "step": 49400 }, { "epoch": 14.78, "grad_norm": 2.376089334487915, "learning_rate": 7.951186860471582e-06, "loss": 0.995, "step": 49405 }, { "epoch": 14.78, "grad_norm": 3.1121981143951416, "learning_rate": 7.946890223129677e-06, "loss": 0.9083, "step": 49410 }, { "epoch": 14.78, "grad_norm": 3.4917523860931396, "learning_rate": 7.942594527613295e-06, "loss": 0.8519, "step": 49415 }, { "epoch": 14.79, "grad_norm": 6.6114420890808105, "learning_rate": 7.938299774159691e-06, "loss": 0.8482, "step": 49420 }, { "epoch": 14.79, "grad_norm": 1.4963136911392212, "learning_rate": 7.934005963006062e-06, "loss": 0.9212, "step": 49425 }, { "epoch": 14.79, "grad_norm": 3.31788969039917, "learning_rate": 7.929713094389527e-06, "loss": 1.1518, "step": 49430 }, { "epoch": 14.79, "grad_norm": 2.473154067993164, "learning_rate": 7.92542116854721e-06, "loss": 0.7843, "step": 49435 }, { "epoch": 14.79, "grad_norm": 2.462738513946533, "learning_rate": 7.921130185716119e-06, "loss": 0.8878, "step": 49440 }, { "epoch": 14.79, "grad_norm": 3.9160032272338867, "learning_rate": 7.916840146133264e-06, "loss": 1.0054, "step": 49445 }, { "epoch": 14.79, "grad_norm": 1.9631022214889526, "learning_rate": 7.912551050035572e-06, "loss": 1.0096, "step": 49450 }, { "epoch": 14.8, "grad_norm": 3.000981330871582, "learning_rate": 7.908262897659921e-06, "loss": 1.051, "step": 49455 }, { "epoch": 14.8, "grad_norm": 3.195512056350708, "learning_rate": 7.903975689243155e-06, "loss": 1.085, "step": 49460 }, { "epoch": 14.8, "grad_norm": 1.471376657485962, "learning_rate": 7.899689425022022e-06, "loss": 1.0268, "step": 49465 }, { "epoch": 14.8, "grad_norm": 2.2439353466033936, "learning_rate": 7.895404105233286e-06, "loss": 0.9531, "step": 49470 }, { "epoch": 14.8, "grad_norm": 3.2079246044158936, "learning_rate": 7.891119730113586e-06, "loss": 0.9474, "step": 49475 }, { "epoch": 14.8, "grad_norm": 2.1932594776153564, "learning_rate": 7.886836299899574e-06, "loss": 0.8452, "step": 49480 }, { "epoch": 14.81, "grad_norm": 2.9864261150360107, "learning_rate": 7.882553814827797e-06, "loss": 1.0548, "step": 49485 }, { "epoch": 14.81, "grad_norm": 1.2952245473861694, "learning_rate": 7.878272275134782e-06, "loss": 1.072, "step": 49490 }, { "epoch": 14.81, "grad_norm": 2.7455410957336426, "learning_rate": 7.873991681056992e-06, "loss": 1.242, "step": 49495 }, { "epoch": 14.81, "grad_norm": 2.4569225311279297, "learning_rate": 7.86971203283084e-06, "loss": 0.9538, "step": 49500 }, { "epoch": 14.81, "grad_norm": 6.187790870666504, "learning_rate": 7.865433330692684e-06, "loss": 0.872, "step": 49505 }, { "epoch": 14.81, "grad_norm": 2.7457752227783203, "learning_rate": 7.861155574878838e-06, "loss": 1.0486, "step": 49510 }, { "epoch": 14.81, "grad_norm": 3.150146245956421, "learning_rate": 7.856878765625553e-06, "loss": 0.9142, "step": 49515 }, { "epoch": 14.82, "grad_norm": 25.611284255981445, "learning_rate": 7.852602903169043e-06, "loss": 0.9913, "step": 49520 }, { "epoch": 14.82, "grad_norm": 5.1431145668029785, "learning_rate": 7.848327987745433e-06, "loss": 0.7851, "step": 49525 }, { "epoch": 14.82, "grad_norm": 5.0381388664245605, "learning_rate": 7.844054019590851e-06, "loss": 0.9707, "step": 49530 }, { "epoch": 14.82, "grad_norm": 2.5118184089660645, "learning_rate": 7.839780998941331e-06, "loss": 0.9747, "step": 49535 }, { "epoch": 14.82, "grad_norm": 3.1850192546844482, "learning_rate": 7.83550892603287e-06, "loss": 0.9729, "step": 49540 }, { "epoch": 14.82, "grad_norm": 2.785214424133301, "learning_rate": 7.83123780110141e-06, "loss": 0.9847, "step": 49545 }, { "epoch": 14.82, "grad_norm": 2.283158540725708, "learning_rate": 7.826967624382839e-06, "loss": 1.0142, "step": 49550 }, { "epoch": 14.83, "grad_norm": 2.654142379760742, "learning_rate": 7.822698396113005e-06, "loss": 1.1584, "step": 49555 }, { "epoch": 14.83, "grad_norm": 1.418227195739746, "learning_rate": 7.818430116527668e-06, "loss": 0.7142, "step": 49560 }, { "epoch": 14.83, "grad_norm": 3.0038633346557617, "learning_rate": 7.814162785862591e-06, "loss": 1.0606, "step": 49565 }, { "epoch": 14.83, "grad_norm": 3.01194429397583, "learning_rate": 7.809896404353426e-06, "loss": 0.9423, "step": 49570 }, { "epoch": 14.83, "grad_norm": 3.334562063217163, "learning_rate": 7.805630972235827e-06, "loss": 0.9911, "step": 49575 }, { "epoch": 14.83, "grad_norm": 4.44184684753418, "learning_rate": 7.801366489745343e-06, "loss": 0.8377, "step": 49580 }, { "epoch": 14.84, "grad_norm": 2.8854176998138428, "learning_rate": 7.797102957117527e-06, "loss": 1.0517, "step": 49585 }, { "epoch": 14.84, "grad_norm": 1.7378528118133545, "learning_rate": 7.792840374587826e-06, "loss": 0.9528, "step": 49590 }, { "epoch": 14.84, "grad_norm": 4.007412910461426, "learning_rate": 7.788578742391664e-06, "loss": 0.9821, "step": 49595 }, { "epoch": 14.84, "grad_norm": 5.08059549331665, "learning_rate": 7.784318060764406e-06, "loss": 0.9414, "step": 49600 }, { "epoch": 14.84, "grad_norm": 1.6905008554458618, "learning_rate": 7.78005832994137e-06, "loss": 0.9635, "step": 49605 }, { "epoch": 14.84, "grad_norm": 1.3702781200408936, "learning_rate": 7.775799550157811e-06, "loss": 1.0004, "step": 49610 }, { "epoch": 14.84, "grad_norm": 2.688145875930786, "learning_rate": 7.771541721648943e-06, "loss": 1.0286, "step": 49615 }, { "epoch": 14.85, "grad_norm": 2.3341612815856934, "learning_rate": 7.767284844649914e-06, "loss": 1.0534, "step": 49620 }, { "epoch": 14.85, "grad_norm": 3.92333984375, "learning_rate": 7.763028919395832e-06, "loss": 1.0172, "step": 49625 }, { "epoch": 14.85, "grad_norm": 3.7512214183807373, "learning_rate": 7.758773946121745e-06, "loss": 1.1164, "step": 49630 }, { "epoch": 14.85, "grad_norm": 2.6205503940582275, "learning_rate": 7.754519925062651e-06, "loss": 0.9167, "step": 49635 }, { "epoch": 14.85, "grad_norm": 4.808284282684326, "learning_rate": 7.750266856453498e-06, "loss": 1.1432, "step": 49640 }, { "epoch": 14.85, "grad_norm": 3.2909045219421387, "learning_rate": 7.746014740529176e-06, "loss": 1.1521, "step": 49645 }, { "epoch": 14.85, "grad_norm": 4.219728946685791, "learning_rate": 7.741763577524532e-06, "loss": 0.9124, "step": 49650 }, { "epoch": 14.86, "grad_norm": 3.6862242221832275, "learning_rate": 7.737513367674331e-06, "loss": 1.0414, "step": 49655 }, { "epoch": 14.86, "grad_norm": 2.3704283237457275, "learning_rate": 7.73326411121334e-06, "loss": 1.0735, "step": 49660 }, { "epoch": 14.86, "grad_norm": 2.672393321990967, "learning_rate": 7.729015808376208e-06, "loss": 0.9972, "step": 49665 }, { "epoch": 14.86, "grad_norm": 3.06726336479187, "learning_rate": 7.724768459397597e-06, "loss": 0.9701, "step": 49670 }, { "epoch": 14.86, "grad_norm": 1.9261653423309326, "learning_rate": 7.720522064512054e-06, "loss": 1.0339, "step": 49675 }, { "epoch": 14.86, "grad_norm": 4.520108222961426, "learning_rate": 7.716276623954127e-06, "loss": 0.8752, "step": 49680 }, { "epoch": 14.87, "grad_norm": 6.763810157775879, "learning_rate": 7.712032137958273e-06, "loss": 1.0836, "step": 49685 }, { "epoch": 14.87, "grad_norm": 2.1053125858306885, "learning_rate": 7.707788606758912e-06, "loss": 0.8817, "step": 49690 }, { "epoch": 14.87, "grad_norm": 1.749535083770752, "learning_rate": 7.703546030590414e-06, "loss": 0.8303, "step": 49695 }, { "epoch": 14.87, "grad_norm": 2.739657163619995, "learning_rate": 7.699304409687089e-06, "loss": 0.9729, "step": 49700 }, { "epoch": 14.87, "grad_norm": 3.52274489402771, "learning_rate": 7.695063744283196e-06, "loss": 0.9307, "step": 49705 }, { "epoch": 14.87, "grad_norm": 2.7961769104003906, "learning_rate": 7.690824034612948e-06, "loss": 0.991, "step": 49710 }, { "epoch": 14.87, "grad_norm": 2.9371488094329834, "learning_rate": 7.686585280910497e-06, "loss": 0.999, "step": 49715 }, { "epoch": 14.88, "grad_norm": 2.072927236557007, "learning_rate": 7.68234748340994e-06, "loss": 0.9193, "step": 49720 }, { "epoch": 14.88, "grad_norm": 1.750504732131958, "learning_rate": 7.678110642345334e-06, "loss": 0.8694, "step": 49725 }, { "epoch": 14.88, "grad_norm": 8.674440383911133, "learning_rate": 7.673874757950675e-06, "loss": 0.8633, "step": 49730 }, { "epoch": 14.88, "grad_norm": 2.6912498474121094, "learning_rate": 7.6696398304599e-06, "loss": 1.0709, "step": 49735 }, { "epoch": 14.88, "grad_norm": 4.30122184753418, "learning_rate": 7.665405860106902e-06, "loss": 0.8949, "step": 49740 }, { "epoch": 14.88, "grad_norm": 2.0600149631500244, "learning_rate": 7.66117284712553e-06, "loss": 0.9048, "step": 49745 }, { "epoch": 14.88, "grad_norm": 3.0497703552246094, "learning_rate": 7.656940791749545e-06, "loss": 0.933, "step": 49750 }, { "epoch": 14.89, "grad_norm": 4.7076311111450195, "learning_rate": 7.652709694212706e-06, "loss": 0.8802, "step": 49755 }, { "epoch": 14.89, "grad_norm": 2.6163299083709717, "learning_rate": 7.648479554748666e-06, "loss": 1.0973, "step": 49760 }, { "epoch": 14.89, "grad_norm": 6.505290508270264, "learning_rate": 7.644250373591078e-06, "loss": 0.9932, "step": 49765 }, { "epoch": 14.89, "grad_norm": 3.616623878479004, "learning_rate": 7.640022150973485e-06, "loss": 0.8964, "step": 49770 }, { "epoch": 14.89, "grad_norm": 3.170760154724121, "learning_rate": 7.635794887129441e-06, "loss": 1.1569, "step": 49775 }, { "epoch": 14.89, "grad_norm": 5.005249500274658, "learning_rate": 7.631568582292389e-06, "loss": 0.9821, "step": 49780 }, { "epoch": 14.9, "grad_norm": 3.093559503555298, "learning_rate": 7.62734323669575e-06, "loss": 1.0313, "step": 49785 }, { "epoch": 14.9, "grad_norm": 5.463796138763428, "learning_rate": 7.623118850572886e-06, "loss": 1.0058, "step": 49790 }, { "epoch": 14.9, "grad_norm": 2.4942896366119385, "learning_rate": 7.618895424157105e-06, "loss": 0.9905, "step": 49795 }, { "epoch": 14.9, "grad_norm": 3.5765671730041504, "learning_rate": 7.614672957681665e-06, "loss": 0.9716, "step": 49800 }, { "epoch": 14.9, "grad_norm": 3.6634316444396973, "learning_rate": 7.610451451379763e-06, "loss": 1.0055, "step": 49805 }, { "epoch": 14.9, "grad_norm": 1.4103604555130005, "learning_rate": 7.606230905484557e-06, "loss": 1.0418, "step": 49810 }, { "epoch": 14.9, "grad_norm": 5.684398651123047, "learning_rate": 7.602011320229132e-06, "loss": 1.0942, "step": 49815 }, { "epoch": 14.91, "grad_norm": 1.4336367845535278, "learning_rate": 7.597792695846542e-06, "loss": 1.0598, "step": 49820 }, { "epoch": 14.91, "grad_norm": 3.313039779663086, "learning_rate": 7.593575032569772e-06, "loss": 1.0155, "step": 49825 }, { "epoch": 14.91, "grad_norm": 4.043692111968994, "learning_rate": 7.58935833063176e-06, "loss": 0.8216, "step": 49830 }, { "epoch": 14.91, "grad_norm": 1.758624792098999, "learning_rate": 7.58514259026539e-06, "loss": 1.0355, "step": 49835 }, { "epoch": 14.91, "grad_norm": 2.282357931137085, "learning_rate": 7.5809278117035006e-06, "loss": 1.1196, "step": 49840 }, { "epoch": 14.91, "grad_norm": 2.790658950805664, "learning_rate": 7.576713995178847e-06, "loss": 1.0328, "step": 49845 }, { "epoch": 14.91, "grad_norm": 7.928509712219238, "learning_rate": 7.572501140924184e-06, "loss": 0.8045, "step": 49850 }, { "epoch": 14.92, "grad_norm": 1.6898220777511597, "learning_rate": 7.568289249172153e-06, "loss": 0.9176, "step": 49855 }, { "epoch": 14.92, "grad_norm": 1.0324536561965942, "learning_rate": 7.5640783201554046e-06, "loss": 1.0659, "step": 49860 }, { "epoch": 14.92, "grad_norm": 2.4962313175201416, "learning_rate": 7.5598683541064665e-06, "loss": 1.0566, "step": 49865 }, { "epoch": 14.92, "grad_norm": 4.477339744567871, "learning_rate": 7.55565935125789e-06, "loss": 0.9315, "step": 49870 }, { "epoch": 14.92, "grad_norm": 1.8302847146987915, "learning_rate": 7.551451311842109e-06, "loss": 0.9124, "step": 49875 }, { "epoch": 14.92, "grad_norm": 4.733458995819092, "learning_rate": 7.547244236091533e-06, "loss": 1.0675, "step": 49880 }, { "epoch": 14.93, "grad_norm": 3.320866584777832, "learning_rate": 7.543038124238516e-06, "loss": 1.038, "step": 49885 }, { "epoch": 14.93, "grad_norm": 1.8176350593566895, "learning_rate": 7.5388329765153585e-06, "loss": 1.1704, "step": 49890 }, { "epoch": 14.93, "grad_norm": 3.144420623779297, "learning_rate": 7.534628793154308e-06, "loss": 0.948, "step": 49895 }, { "epoch": 14.93, "grad_norm": 2.1764090061187744, "learning_rate": 7.530425574387554e-06, "loss": 0.9405, "step": 49900 }, { "epoch": 14.93, "grad_norm": 2.065609931945801, "learning_rate": 7.526223320447234e-06, "loss": 1.005, "step": 49905 }, { "epoch": 14.93, "grad_norm": 2.02253794670105, "learning_rate": 7.52202203156544e-06, "loss": 0.9092, "step": 49910 }, { "epoch": 14.93, "grad_norm": 2.1637492179870605, "learning_rate": 7.517821707974202e-06, "loss": 0.8773, "step": 49915 }, { "epoch": 14.94, "grad_norm": 10.103838920593262, "learning_rate": 7.513622349905497e-06, "loss": 0.9454, "step": 49920 }, { "epoch": 14.94, "grad_norm": 2.852343797683716, "learning_rate": 7.509423957591255e-06, "loss": 1.1144, "step": 49925 }, { "epoch": 14.94, "grad_norm": 3.75535249710083, "learning_rate": 7.505226531263349e-06, "loss": 1.0971, "step": 49930 }, { "epoch": 14.94, "grad_norm": 2.4124093055725098, "learning_rate": 7.501030071153594e-06, "loss": 0.9609, "step": 49935 }, { "epoch": 14.94, "grad_norm": 3.048701286315918, "learning_rate": 7.496834577493761e-06, "loss": 0.9755, "step": 49940 }, { "epoch": 14.94, "grad_norm": 2.259594202041626, "learning_rate": 7.492640050515567e-06, "loss": 1.0239, "step": 49945 }, { "epoch": 14.94, "grad_norm": 2.8681416511535645, "learning_rate": 7.488446490450651e-06, "loss": 0.9469, "step": 49950 }, { "epoch": 14.95, "grad_norm": 3.7870285511016846, "learning_rate": 7.484253897530649e-06, "loss": 1.0347, "step": 49955 }, { "epoch": 14.95, "grad_norm": 1.4063019752502441, "learning_rate": 7.480900519694567e-06, "loss": 0.9334, "step": 49960 }, { "epoch": 14.95, "grad_norm": 2.873671293258667, "learning_rate": 7.476709668218851e-06, "loss": 1.0212, "step": 49965 }, { "epoch": 14.95, "grad_norm": 4.571763515472412, "learning_rate": 7.472519784536242e-06, "loss": 0.9162, "step": 49970 }, { "epoch": 14.95, "grad_norm": 2.2711031436920166, "learning_rate": 7.468330868878149e-06, "loss": 0.9683, "step": 49975 }, { "epoch": 14.95, "grad_norm": 2.531252384185791, "learning_rate": 7.464142921475919e-06, "loss": 1.0777, "step": 49980 }, { "epoch": 14.95, "grad_norm": 2.323441982269287, "learning_rate": 7.459955942560848e-06, "loss": 1.1148, "step": 49985 }, { "epoch": 14.96, "grad_norm": 3.1378073692321777, "learning_rate": 7.455769932364185e-06, "loss": 1.0343, "step": 49990 }, { "epoch": 14.96, "grad_norm": 3.8024823665618896, "learning_rate": 7.4515848911170975e-06, "loss": 0.9482, "step": 49995 }, { "epoch": 14.96, "grad_norm": 1.5496140718460083, "learning_rate": 7.447400819050751e-06, "loss": 0.9271, "step": 50000 }, { "epoch": 14.96, "grad_norm": 3.12982177734375, "learning_rate": 7.443217716396198e-06, "loss": 0.9457, "step": 50005 }, { "epoch": 14.96, "grad_norm": 1.8738044500350952, "learning_rate": 7.439035583384496e-06, "loss": 1.1598, "step": 50010 }, { "epoch": 14.96, "grad_norm": 5.965039253234863, "learning_rate": 7.43485442024659e-06, "loss": 0.8773, "step": 50015 }, { "epoch": 14.97, "grad_norm": 4.203342914581299, "learning_rate": 7.43067422721343e-06, "loss": 0.7135, "step": 50020 }, { "epoch": 14.97, "grad_norm": 3.4606893062591553, "learning_rate": 7.426495004515865e-06, "loss": 0.9092, "step": 50025 }, { "epoch": 14.97, "grad_norm": 6.534817218780518, "learning_rate": 7.422316752384711e-06, "loss": 1.0587, "step": 50030 }, { "epoch": 14.97, "grad_norm": 2.5901870727539062, "learning_rate": 7.418139471050736e-06, "loss": 1.0401, "step": 50035 }, { "epoch": 14.97, "grad_norm": 3.344170331954956, "learning_rate": 7.413963160744642e-06, "loss": 1.0651, "step": 50040 }, { "epoch": 14.97, "grad_norm": 4.951648712158203, "learning_rate": 7.409787821697078e-06, "loss": 1.0275, "step": 50045 }, { "epoch": 14.97, "grad_norm": 2.5820136070251465, "learning_rate": 7.405613454138655e-06, "loss": 1.0403, "step": 50050 }, { "epoch": 14.98, "grad_norm": 4.226273059844971, "learning_rate": 7.4014400582999075e-06, "loss": 0.7765, "step": 50055 }, { "epoch": 14.98, "grad_norm": 2.259936571121216, "learning_rate": 7.3972676344113366e-06, "loss": 0.9865, "step": 50060 }, { "epoch": 14.98, "grad_norm": 5.1020917892456055, "learning_rate": 7.3930961827033765e-06, "loss": 0.9311, "step": 50065 }, { "epoch": 14.98, "grad_norm": 3.1059951782226562, "learning_rate": 7.388925703406413e-06, "loss": 1.0568, "step": 50070 }, { "epoch": 14.98, "grad_norm": 2.5398685932159424, "learning_rate": 7.384756196750775e-06, "loss": 0.8391, "step": 50075 }, { "epoch": 14.98, "grad_norm": 6.540744304656982, "learning_rate": 7.380587662966743e-06, "loss": 0.8211, "step": 50080 }, { "epoch": 14.98, "grad_norm": 6.105128765106201, "learning_rate": 7.376420102284543e-06, "loss": 0.8739, "step": 50085 }, { "epoch": 14.99, "grad_norm": 4.216371059417725, "learning_rate": 7.372253514934338e-06, "loss": 1.1114, "step": 50090 }, { "epoch": 14.99, "grad_norm": 3.220215082168579, "learning_rate": 7.368087901146259e-06, "loss": 1.1085, "step": 50095 }, { "epoch": 14.99, "grad_norm": 2.48602557182312, "learning_rate": 7.36392326115034e-06, "loss": 0.9271, "step": 50100 }, { "epoch": 14.99, "grad_norm": 2.3491077423095703, "learning_rate": 7.3597595951766245e-06, "loss": 0.994, "step": 50105 }, { "epoch": 14.99, "grad_norm": 2.6628293991088867, "learning_rate": 7.3555969034550335e-06, "loss": 1.0288, "step": 50110 }, { "epoch": 14.99, "grad_norm": 3.2842299938201904, "learning_rate": 7.351435186215502e-06, "loss": 0.766, "step": 50115 }, { "epoch": 15.0, "grad_norm": 8.026897430419922, "learning_rate": 7.347274443687854e-06, "loss": 0.8578, "step": 50120 }, { "epoch": 15.0, "grad_norm": 1.960700273513794, "learning_rate": 7.3431146761018866e-06, "loss": 1.0008, "step": 50125 }, { "epoch": 15.0, "grad_norm": 1.605670690536499, "learning_rate": 7.338955883687346e-06, "loss": 0.9886, "step": 50130 }, { "epoch": 15.0, "grad_norm": 4.049495697021484, "learning_rate": 7.334798066673912e-06, "loss": 1.1398, "step": 50135 }, { "epoch": 15.0, "grad_norm": 1.0559757947921753, "learning_rate": 7.330641225291218e-06, "loss": 1.0428, "step": 50140 }, { "epoch": 15.0, "grad_norm": 3.241338014602661, "learning_rate": 7.326485359768845e-06, "loss": 0.7697, "step": 50145 }, { "epoch": 15.0, "grad_norm": 3.934131145477295, "learning_rate": 7.3223304703363135e-06, "loss": 0.8568, "step": 50150 }, { "epoch": 15.01, "grad_norm": 1.8612024784088135, "learning_rate": 7.318176557223097e-06, "loss": 1.0974, "step": 50155 }, { "epoch": 15.01, "grad_norm": 4.100109100341797, "learning_rate": 7.314023620658608e-06, "loss": 1.0554, "step": 50160 }, { "epoch": 15.01, "grad_norm": 5.153068542480469, "learning_rate": 7.309871660872211e-06, "loss": 1.0952, "step": 50165 }, { "epoch": 15.01, "grad_norm": 3.603163003921509, "learning_rate": 7.305720678093214e-06, "loss": 0.8898, "step": 50170 }, { "epoch": 15.01, "grad_norm": 2.5232720375061035, "learning_rate": 7.301570672550873e-06, "loss": 0.909, "step": 50175 }, { "epoch": 15.01, "grad_norm": 1.8282480239868164, "learning_rate": 7.297421644474389e-06, "loss": 0.8844, "step": 50180 }, { "epoch": 15.01, "grad_norm": 3.7669854164123535, "learning_rate": 7.293273594092903e-06, "loss": 0.8167, "step": 50185 }, { "epoch": 15.02, "grad_norm": 3.2310571670532227, "learning_rate": 7.289126521635522e-06, "loss": 0.8983, "step": 50190 }, { "epoch": 15.02, "grad_norm": 3.063481569290161, "learning_rate": 7.284980427331256e-06, "loss": 0.9985, "step": 50195 }, { "epoch": 15.02, "grad_norm": 4.190944671630859, "learning_rate": 7.280835311409123e-06, "loss": 1.027, "step": 50200 }, { "epoch": 15.02, "grad_norm": 3.723928213119507, "learning_rate": 7.276691174098024e-06, "loss": 1.0427, "step": 50205 }, { "epoch": 15.02, "grad_norm": 1.9666212797164917, "learning_rate": 7.272548015626865e-06, "loss": 1.149, "step": 50210 }, { "epoch": 15.02, "grad_norm": 2.085639238357544, "learning_rate": 7.268405836224443e-06, "loss": 0.9174, "step": 50215 }, { "epoch": 15.03, "grad_norm": 2.700467586517334, "learning_rate": 7.264264636119536e-06, "loss": 1.0293, "step": 50220 }, { "epoch": 15.03, "grad_norm": 2.4150545597076416, "learning_rate": 7.260124415540859e-06, "loss": 0.9693, "step": 50225 }, { "epoch": 15.03, "grad_norm": 1.4733829498291016, "learning_rate": 7.255985174717067e-06, "loss": 0.9674, "step": 50230 }, { "epoch": 15.03, "grad_norm": 3.151942253112793, "learning_rate": 7.251846913876772e-06, "loss": 0.8926, "step": 50235 }, { "epoch": 15.03, "grad_norm": 3.5010616779327393, "learning_rate": 7.247709633248526e-06, "loss": 0.8898, "step": 50240 }, { "epoch": 15.03, "grad_norm": 3.5921566486358643, "learning_rate": 7.243573333060824e-06, "loss": 0.9497, "step": 50245 }, { "epoch": 15.03, "grad_norm": 2.436879873275757, "learning_rate": 7.239438013542107e-06, "loss": 1.0677, "step": 50250 }, { "epoch": 15.04, "grad_norm": 2.137514352798462, "learning_rate": 7.235303674920771e-06, "loss": 1.1743, "step": 50255 }, { "epoch": 15.04, "grad_norm": 2.3740313053131104, "learning_rate": 7.2311703174251454e-06, "loss": 0.9629, "step": 50260 }, { "epoch": 15.04, "grad_norm": 3.162233591079712, "learning_rate": 7.227037941283515e-06, "loss": 1.0236, "step": 50265 }, { "epoch": 15.04, "grad_norm": 3.699193000793457, "learning_rate": 7.222906546724104e-06, "loss": 0.955, "step": 50270 }, { "epoch": 15.04, "grad_norm": 2.988489866256714, "learning_rate": 7.218776133975086e-06, "loss": 1.1703, "step": 50275 }, { "epoch": 15.04, "grad_norm": 3.407639980316162, "learning_rate": 7.21464670326458e-06, "loss": 0.8461, "step": 50280 }, { "epoch": 15.04, "grad_norm": 1.7939776182174683, "learning_rate": 7.210518254820658e-06, "loss": 1.1161, "step": 50285 }, { "epoch": 15.05, "grad_norm": 4.75887393951416, "learning_rate": 7.206390788871306e-06, "loss": 0.9254, "step": 50290 }, { "epoch": 15.05, "grad_norm": 2.7789371013641357, "learning_rate": 7.20226430564451e-06, "loss": 0.9432, "step": 50295 }, { "epoch": 15.05, "grad_norm": 5.088046073913574, "learning_rate": 7.198138805368143e-06, "loss": 1.1504, "step": 50300 }, { "epoch": 15.05, "grad_norm": 3.722242593765259, "learning_rate": 7.194014288270079e-06, "loss": 1.0487, "step": 50305 }, { "epoch": 15.05, "grad_norm": 1.9623161554336548, "learning_rate": 7.189890754578083e-06, "loss": 0.9153, "step": 50310 }, { "epoch": 15.05, "grad_norm": 1.74498450756073, "learning_rate": 7.185768204519924e-06, "loss": 0.976, "step": 50315 }, { "epoch": 15.06, "grad_norm": 1.8223540782928467, "learning_rate": 7.181646638323261e-06, "loss": 0.9415, "step": 50320 }, { "epoch": 15.06, "grad_norm": 2.9013192653656006, "learning_rate": 7.177526056215733e-06, "loss": 0.9538, "step": 50325 }, { "epoch": 15.06, "grad_norm": 2.3087453842163086, "learning_rate": 7.173406458424917e-06, "loss": 1.1462, "step": 50330 }, { "epoch": 15.06, "grad_norm": 1.529740333557129, "learning_rate": 7.169287845178335e-06, "loss": 0.9129, "step": 50335 }, { "epoch": 15.06, "grad_norm": 3.4274179935455322, "learning_rate": 7.165170216703446e-06, "loss": 1.035, "step": 50340 }, { "epoch": 15.06, "grad_norm": 2.6179728507995605, "learning_rate": 7.161053573227671e-06, "loss": 1.0247, "step": 50345 }, { "epoch": 15.06, "grad_norm": 3.914541721343994, "learning_rate": 7.156937914978365e-06, "loss": 0.888, "step": 50350 }, { "epoch": 15.07, "grad_norm": 2.483247995376587, "learning_rate": 7.152823242182829e-06, "loss": 1.1135, "step": 50355 }, { "epoch": 15.07, "grad_norm": 2.098078727722168, "learning_rate": 7.148709555068314e-06, "loss": 1.1288, "step": 50360 }, { "epoch": 15.07, "grad_norm": 1.533263921737671, "learning_rate": 7.144596853862015e-06, "loss": 1.0257, "step": 50365 }, { "epoch": 15.07, "grad_norm": 2.030672550201416, "learning_rate": 7.140485138791075e-06, "loss": 0.9171, "step": 50370 }, { "epoch": 15.07, "grad_norm": 2.625800371170044, "learning_rate": 7.136374410082575e-06, "loss": 1.0061, "step": 50375 }, { "epoch": 15.07, "grad_norm": 2.7783334255218506, "learning_rate": 7.132264667963556e-06, "loss": 0.9497, "step": 50380 }, { "epoch": 15.07, "grad_norm": 1.379747748374939, "learning_rate": 7.128155912660972e-06, "loss": 0.8702, "step": 50385 }, { "epoch": 15.08, "grad_norm": 2.8253347873687744, "learning_rate": 7.124048144401774e-06, "loss": 0.8353, "step": 50390 }, { "epoch": 15.08, "grad_norm": 3.046046733856201, "learning_rate": 7.119941363412802e-06, "loss": 0.8835, "step": 50395 }, { "epoch": 15.08, "grad_norm": 2.3068737983703613, "learning_rate": 7.115835569920898e-06, "loss": 1.0548, "step": 50400 }, { "epoch": 15.08, "grad_norm": 2.7872204780578613, "learning_rate": 7.111730764152791e-06, "loss": 1.0162, "step": 50405 }, { "epoch": 15.08, "grad_norm": 1.9851144552230835, "learning_rate": 7.107626946335214e-06, "loss": 0.8412, "step": 50410 }, { "epoch": 15.08, "grad_norm": 2.1134822368621826, "learning_rate": 7.103524116694795e-06, "loss": 1.049, "step": 50415 }, { "epoch": 15.09, "grad_norm": 1.651341199874878, "learning_rate": 7.0994222754581395e-06, "loss": 1.052, "step": 50420 }, { "epoch": 15.09, "grad_norm": 1.9419851303100586, "learning_rate": 7.095321422851784e-06, "loss": 1.0539, "step": 50425 }, { "epoch": 15.09, "grad_norm": 2.2368361949920654, "learning_rate": 7.091221559102212e-06, "loss": 0.9218, "step": 50430 }, { "epoch": 15.09, "grad_norm": 1.586853265762329, "learning_rate": 7.087122684435862e-06, "loss": 0.9266, "step": 50435 }, { "epoch": 15.09, "grad_norm": 2.1675949096679688, "learning_rate": 7.083024799079099e-06, "loss": 0.9255, "step": 50440 }, { "epoch": 15.09, "grad_norm": 2.4687063694000244, "learning_rate": 7.078927903258267e-06, "loss": 0.9003, "step": 50445 }, { "epoch": 15.09, "grad_norm": 3.6023154258728027, "learning_rate": 7.0748319971996104e-06, "loss": 1.0, "step": 50450 }, { "epoch": 15.1, "grad_norm": 3.7124223709106445, "learning_rate": 7.070737081129353e-06, "loss": 0.8707, "step": 50455 }, { "epoch": 15.1, "grad_norm": 2.767544746398926, "learning_rate": 7.066643155273647e-06, "loss": 1.0032, "step": 50460 }, { "epoch": 15.1, "grad_norm": 3.66680645942688, "learning_rate": 7.062550219858602e-06, "loss": 1.023, "step": 50465 }, { "epoch": 15.1, "grad_norm": 4.994945049285889, "learning_rate": 7.058458275110261e-06, "loss": 1.0194, "step": 50470 }, { "epoch": 15.1, "grad_norm": 4.854514122009277, "learning_rate": 7.054367321254629e-06, "loss": 0.9221, "step": 50475 }, { "epoch": 15.1, "grad_norm": 3.2869067192077637, "learning_rate": 7.050277358517618e-06, "loss": 0.8274, "step": 50480 }, { "epoch": 15.1, "grad_norm": 3.371748685836792, "learning_rate": 7.046188387125149e-06, "loss": 1.0187, "step": 50485 }, { "epoch": 15.11, "grad_norm": 2.176785945892334, "learning_rate": 7.042100407303018e-06, "loss": 0.9843, "step": 50490 }, { "epoch": 15.11, "grad_norm": 2.4034008979797363, "learning_rate": 7.038013419277034e-06, "loss": 1.0416, "step": 50495 }, { "epoch": 15.11, "grad_norm": 3.674947500228882, "learning_rate": 7.033927423272879e-06, "loss": 0.9973, "step": 50500 }, { "epoch": 15.11, "grad_norm": 2.3554413318634033, "learning_rate": 7.029842419516253e-06, "loss": 1.0056, "step": 50505 }, { "epoch": 15.11, "grad_norm": 4.33696985244751, "learning_rate": 7.025758408232744e-06, "loss": 0.8275, "step": 50510 }, { "epoch": 15.11, "grad_norm": 2.8192827701568604, "learning_rate": 7.021675389647916e-06, "loss": 0.9531, "step": 50515 }, { "epoch": 15.12, "grad_norm": 1.3056793212890625, "learning_rate": 7.017593363987268e-06, "loss": 0.9896, "step": 50520 }, { "epoch": 15.12, "grad_norm": 4.758703708648682, "learning_rate": 7.013512331476238e-06, "loss": 1.0297, "step": 50525 }, { "epoch": 15.12, "grad_norm": 2.7597901821136475, "learning_rate": 7.009432292340243e-06, "loss": 0.9807, "step": 50530 }, { "epoch": 15.12, "grad_norm": 2.2206053733825684, "learning_rate": 7.0053532468045855e-06, "loss": 0.8794, "step": 50535 }, { "epoch": 15.12, "grad_norm": 1.7539916038513184, "learning_rate": 7.001275195094581e-06, "loss": 1.0552, "step": 50540 }, { "epoch": 15.12, "grad_norm": 1.2006642818450928, "learning_rate": 6.997198137435432e-06, "loss": 0.8482, "step": 50545 }, { "epoch": 15.12, "grad_norm": 2.1931300163269043, "learning_rate": 6.993122074052314e-06, "loss": 0.8871, "step": 50550 }, { "epoch": 15.13, "grad_norm": 1.5462733507156372, "learning_rate": 6.989047005170349e-06, "loss": 1.0243, "step": 50555 }, { "epoch": 15.13, "grad_norm": 2.5775654315948486, "learning_rate": 6.984972931014597e-06, "loss": 1.0924, "step": 50560 }, { "epoch": 15.13, "grad_norm": 1.4666215181350708, "learning_rate": 6.980899851810061e-06, "loss": 1.0157, "step": 50565 }, { "epoch": 15.13, "grad_norm": 1.5045851469039917, "learning_rate": 6.9768277677817075e-06, "loss": 1.0205, "step": 50570 }, { "epoch": 15.13, "grad_norm": 3.089066505432129, "learning_rate": 6.972756679154404e-06, "loss": 1.0415, "step": 50575 }, { "epoch": 15.13, "grad_norm": 1.6471816301345825, "learning_rate": 6.9686865861530285e-06, "loss": 1.0791, "step": 50580 }, { "epoch": 15.13, "grad_norm": 1.9261685609817505, "learning_rate": 6.964617489002334e-06, "loss": 0.8684, "step": 50585 }, { "epoch": 15.14, "grad_norm": 14.480093002319336, "learning_rate": 6.960549387927087e-06, "loss": 0.8348, "step": 50590 }, { "epoch": 15.14, "grad_norm": 3.1349940299987793, "learning_rate": 6.956482283151927e-06, "loss": 1.0708, "step": 50595 }, { "epoch": 15.14, "grad_norm": 3.3612797260284424, "learning_rate": 6.952416174901505e-06, "loss": 1.0016, "step": 50600 }, { "epoch": 15.14, "grad_norm": 1.3571832180023193, "learning_rate": 6.948351063400385e-06, "loss": 1.0697, "step": 50605 }, { "epoch": 15.14, "grad_norm": 1.7172069549560547, "learning_rate": 6.9442869488730574e-06, "loss": 0.9281, "step": 50610 }, { "epoch": 15.14, "grad_norm": 1.9647376537322998, "learning_rate": 6.94022383154401e-06, "loss": 1.0363, "step": 50615 }, { "epoch": 15.14, "grad_norm": 2.1266095638275146, "learning_rate": 6.936161711637612e-06, "loss": 0.8694, "step": 50620 }, { "epoch": 15.15, "grad_norm": 2.7892374992370605, "learning_rate": 6.9321005893782404e-06, "loss": 1.0771, "step": 50625 }, { "epoch": 15.15, "grad_norm": 3.818957805633545, "learning_rate": 6.92804046499016e-06, "loss": 1.0741, "step": 50630 }, { "epoch": 15.15, "grad_norm": 3.792024850845337, "learning_rate": 6.923981338697632e-06, "loss": 1.1138, "step": 50635 }, { "epoch": 15.15, "grad_norm": 9.00833511352539, "learning_rate": 6.919923210724821e-06, "loss": 0.8402, "step": 50640 }, { "epoch": 15.15, "grad_norm": 3.8722808361053467, "learning_rate": 6.915866081295858e-06, "loss": 0.9153, "step": 50645 }, { "epoch": 15.15, "grad_norm": 10.343985557556152, "learning_rate": 6.911809950634813e-06, "loss": 0.9226, "step": 50650 }, { "epoch": 15.16, "grad_norm": 4.360511779785156, "learning_rate": 6.9077548189657e-06, "loss": 0.8344, "step": 50655 }, { "epoch": 15.16, "grad_norm": 3.0080530643463135, "learning_rate": 6.903700686512488e-06, "loss": 0.9623, "step": 50660 }, { "epoch": 15.16, "grad_norm": 3.3220059871673584, "learning_rate": 6.899647553499073e-06, "loss": 0.8129, "step": 50665 }, { "epoch": 15.16, "grad_norm": 1.465591549873352, "learning_rate": 6.895595420149309e-06, "loss": 0.9545, "step": 50670 }, { "epoch": 15.16, "grad_norm": 2.015324115753174, "learning_rate": 6.891544286687002e-06, "loss": 1.1375, "step": 50675 }, { "epoch": 15.16, "grad_norm": 2.517958164215088, "learning_rate": 6.887494153335863e-06, "loss": 1.1182, "step": 50680 }, { "epoch": 15.16, "grad_norm": 4.011493682861328, "learning_rate": 6.883445020319604e-06, "loss": 1.1003, "step": 50685 }, { "epoch": 15.17, "grad_norm": 3.0748703479766846, "learning_rate": 6.879396887861847e-06, "loss": 1.0012, "step": 50690 }, { "epoch": 15.17, "grad_norm": 2.5038511753082275, "learning_rate": 6.8753497561861615e-06, "loss": 0.8941, "step": 50695 }, { "epoch": 15.17, "grad_norm": 3.3688976764678955, "learning_rate": 6.871303625516079e-06, "loss": 0.9082, "step": 50700 }, { "epoch": 15.17, "grad_norm": 1.502925992012024, "learning_rate": 6.867258496075038e-06, "loss": 1.0297, "step": 50705 }, { "epoch": 15.17, "grad_norm": 2.218376636505127, "learning_rate": 6.863214368086479e-06, "loss": 0.7532, "step": 50710 }, { "epoch": 15.17, "grad_norm": 3.5436737537384033, "learning_rate": 6.859171241773721e-06, "loss": 1.0066, "step": 50715 }, { "epoch": 15.17, "grad_norm": 3.2597169876098633, "learning_rate": 6.855129117360096e-06, "loss": 0.9328, "step": 50720 }, { "epoch": 15.18, "grad_norm": 3.4346742630004883, "learning_rate": 6.851087995068811e-06, "loss": 0.7659, "step": 50725 }, { "epoch": 15.18, "grad_norm": 1.6817134618759155, "learning_rate": 6.847047875123089e-06, "loss": 0.9972, "step": 50730 }, { "epoch": 15.18, "grad_norm": 3.8097851276397705, "learning_rate": 6.843008757746036e-06, "loss": 1.0971, "step": 50735 }, { "epoch": 15.18, "grad_norm": 4.3598551750183105, "learning_rate": 6.838970643160736e-06, "loss": 0.9112, "step": 50740 }, { "epoch": 15.18, "grad_norm": 7.945572853088379, "learning_rate": 6.834933531590209e-06, "loss": 1.0161, "step": 50745 }, { "epoch": 15.18, "grad_norm": 2.397972345352173, "learning_rate": 6.830897423257423e-06, "loss": 1.1107, "step": 50750 }, { "epoch": 15.19, "grad_norm": 3.211143732070923, "learning_rate": 6.826862318385285e-06, "loss": 1.0359, "step": 50755 }, { "epoch": 15.19, "grad_norm": 1.8211517333984375, "learning_rate": 6.822828217196656e-06, "loss": 0.9641, "step": 50760 }, { "epoch": 15.19, "grad_norm": 3.2200350761413574, "learning_rate": 6.818795119914326e-06, "loss": 1.0015, "step": 50765 }, { "epoch": 15.19, "grad_norm": 1.9324058294296265, "learning_rate": 6.8147630267610454e-06, "loss": 1.0077, "step": 50770 }, { "epoch": 15.19, "grad_norm": 2.95210337638855, "learning_rate": 6.810731937959497e-06, "loss": 1.1641, "step": 50775 }, { "epoch": 15.19, "grad_norm": 1.5571585893630981, "learning_rate": 6.8067018537323195e-06, "loss": 1.0368, "step": 50780 }, { "epoch": 15.19, "grad_norm": 12.76740550994873, "learning_rate": 6.802672774302088e-06, "loss": 0.9348, "step": 50785 }, { "epoch": 15.2, "grad_norm": 1.9661290645599365, "learning_rate": 6.798644699891321e-06, "loss": 1.102, "step": 50790 }, { "epoch": 15.2, "grad_norm": 1.8433258533477783, "learning_rate": 6.794617630722491e-06, "loss": 1.1175, "step": 50795 }, { "epoch": 15.2, "grad_norm": 5.543290138244629, "learning_rate": 6.7905915670180065e-06, "loss": 0.9451, "step": 50800 }, { "epoch": 15.2, "grad_norm": 4.0879998207092285, "learning_rate": 6.786566509000228e-06, "loss": 1.0948, "step": 50805 }, { "epoch": 15.2, "grad_norm": 3.9433319568634033, "learning_rate": 6.782542456891433e-06, "loss": 1.0932, "step": 50810 }, { "epoch": 15.2, "grad_norm": 1.188195824623108, "learning_rate": 6.7785194109139e-06, "loss": 1.0968, "step": 50815 }, { "epoch": 15.2, "grad_norm": 1.7227709293365479, "learning_rate": 6.774497371289784e-06, "loss": 0.9657, "step": 50820 }, { "epoch": 15.21, "grad_norm": 0.8999642133712769, "learning_rate": 6.770476338241247e-06, "loss": 0.9252, "step": 50825 }, { "epoch": 15.21, "grad_norm": 2.9426021575927734, "learning_rate": 6.766456311990347e-06, "loss": 0.9599, "step": 50830 }, { "epoch": 15.21, "grad_norm": 1.2726308107376099, "learning_rate": 6.7624372927591135e-06, "loss": 0.953, "step": 50835 }, { "epoch": 15.21, "grad_norm": 1.873698353767395, "learning_rate": 6.75841928076951e-06, "loss": 0.8177, "step": 50840 }, { "epoch": 15.21, "grad_norm": 3.4419801235198975, "learning_rate": 6.7544022762434486e-06, "loss": 0.9171, "step": 50845 }, { "epoch": 15.21, "grad_norm": 2.00152325630188, "learning_rate": 6.750386279402784e-06, "loss": 0.981, "step": 50850 }, { "epoch": 15.22, "grad_norm": 1.6282256841659546, "learning_rate": 6.7463712904693165e-06, "loss": 0.9112, "step": 50855 }, { "epoch": 15.22, "grad_norm": 3.2624893188476562, "learning_rate": 6.742357309664787e-06, "loss": 0.9756, "step": 50860 }, { "epoch": 15.22, "grad_norm": 3.1753547191619873, "learning_rate": 6.738344337210889e-06, "loss": 1.0622, "step": 50865 }, { "epoch": 15.22, "grad_norm": 4.24165678024292, "learning_rate": 6.73433237332925e-06, "loss": 0.9718, "step": 50870 }, { "epoch": 15.22, "grad_norm": 3.261056661605835, "learning_rate": 6.730321418241448e-06, "loss": 0.9434, "step": 50875 }, { "epoch": 15.22, "grad_norm": 3.543851852416992, "learning_rate": 6.726311472169003e-06, "loss": 1.0265, "step": 50880 }, { "epoch": 15.22, "grad_norm": 2.614902973175049, "learning_rate": 6.72230253533338e-06, "loss": 0.8983, "step": 50885 }, { "epoch": 15.23, "grad_norm": 2.52410626411438, "learning_rate": 6.718294607955991e-06, "loss": 0.9063, "step": 50890 }, { "epoch": 15.23, "grad_norm": 1.27847421169281, "learning_rate": 6.7142876902581895e-06, "loss": 0.9997, "step": 50895 }, { "epoch": 15.23, "grad_norm": 1.5926611423492432, "learning_rate": 6.710281782461275e-06, "loss": 0.9342, "step": 50900 }, { "epoch": 15.23, "grad_norm": 2.9667863845825195, "learning_rate": 6.706276884786475e-06, "loss": 1.1293, "step": 50905 }, { "epoch": 15.23, "grad_norm": 2.5114152431488037, "learning_rate": 6.702272997455e-06, "loss": 1.1009, "step": 50910 }, { "epoch": 15.23, "grad_norm": 3.5501632690429688, "learning_rate": 6.698270120687955e-06, "loss": 0.9759, "step": 50915 }, { "epoch": 15.23, "grad_norm": 5.344282627105713, "learning_rate": 6.694268254706443e-06, "loss": 0.8718, "step": 50920 }, { "epoch": 15.24, "grad_norm": 4.164098262786865, "learning_rate": 6.690267399731459e-06, "loss": 0.976, "step": 50925 }, { "epoch": 15.24, "grad_norm": 2.1116974353790283, "learning_rate": 6.6862675559839744e-06, "loss": 0.8628, "step": 50930 }, { "epoch": 15.24, "grad_norm": 1.2934943437576294, "learning_rate": 6.682268723684898e-06, "loss": 0.9742, "step": 50935 }, { "epoch": 15.24, "grad_norm": 1.8546779155731201, "learning_rate": 6.678270903055078e-06, "loss": 0.9964, "step": 50940 }, { "epoch": 15.24, "grad_norm": 1.931983470916748, "learning_rate": 6.674274094315311e-06, "loss": 0.8238, "step": 50945 }, { "epoch": 15.24, "grad_norm": 1.8833105564117432, "learning_rate": 6.670278297686341e-06, "loss": 0.9469, "step": 50950 }, { "epoch": 15.25, "grad_norm": 2.2532496452331543, "learning_rate": 6.666283513388844e-06, "loss": 0.9581, "step": 50955 }, { "epoch": 15.25, "grad_norm": 3.5239064693450928, "learning_rate": 6.662289741643454e-06, "loss": 1.1331, "step": 50960 }, { "epoch": 15.25, "grad_norm": 4.513704299926758, "learning_rate": 6.658296982670739e-06, "loss": 0.9937, "step": 50965 }, { "epoch": 15.25, "grad_norm": 3.6698179244995117, "learning_rate": 6.654305236691219e-06, "loss": 0.9723, "step": 50970 }, { "epoch": 15.25, "grad_norm": 2.5509257316589355, "learning_rate": 6.650314503925348e-06, "loss": 1.0257, "step": 50975 }, { "epoch": 15.25, "grad_norm": 4.316878795623779, "learning_rate": 6.646324784593536e-06, "loss": 0.8953, "step": 50980 }, { "epoch": 15.25, "grad_norm": 2.966710090637207, "learning_rate": 6.6423360789161285e-06, "loss": 1.0324, "step": 50985 }, { "epoch": 15.26, "grad_norm": 3.1171488761901855, "learning_rate": 6.638348387113416e-06, "loss": 0.92, "step": 50990 }, { "epoch": 15.26, "grad_norm": 4.053133487701416, "learning_rate": 6.634361709405645e-06, "loss": 0.9734, "step": 50995 }, { "epoch": 15.26, "grad_norm": 2.0579705238342285, "learning_rate": 6.630376046012973e-06, "loss": 0.9514, "step": 51000 }, { "epoch": 15.26, "grad_norm": 1.2330838441848755, "learning_rate": 6.6263913971555515e-06, "loss": 1.0869, "step": 51005 }, { "epoch": 15.26, "grad_norm": 4.011294841766357, "learning_rate": 6.6224077630534205e-06, "loss": 1.148, "step": 51010 }, { "epoch": 15.26, "grad_norm": 1.641052007675171, "learning_rate": 6.618425143926618e-06, "loss": 1.0063, "step": 51015 }, { "epoch": 15.26, "grad_norm": 1.6458262205123901, "learning_rate": 6.614443539995074e-06, "loss": 0.9671, "step": 51020 }, { "epoch": 15.27, "grad_norm": 4.2398529052734375, "learning_rate": 6.6104629514787215e-06, "loss": 0.902, "step": 51025 }, { "epoch": 15.27, "grad_norm": 2.740884780883789, "learning_rate": 6.606483378597375e-06, "loss": 0.9838, "step": 51030 }, { "epoch": 15.27, "grad_norm": 1.063081979751587, "learning_rate": 6.6025048215708345e-06, "loss": 0.7767, "step": 51035 }, { "epoch": 15.27, "grad_norm": 1.965700387954712, "learning_rate": 6.598527280618827e-06, "loss": 1.1383, "step": 51040 }, { "epoch": 15.27, "grad_norm": 5.353956699371338, "learning_rate": 6.594550755961032e-06, "loss": 0.9201, "step": 51045 }, { "epoch": 15.27, "grad_norm": 3.3293633460998535, "learning_rate": 6.590575247817069e-06, "loss": 1.0393, "step": 51050 }, { "epoch": 15.28, "grad_norm": 3.897473096847534, "learning_rate": 6.586600756406494e-06, "loss": 0.8148, "step": 51055 }, { "epoch": 15.28, "grad_norm": 2.9738924503326416, "learning_rate": 6.582627281948825e-06, "loss": 1.107, "step": 51060 }, { "epoch": 15.28, "grad_norm": 1.3063501119613647, "learning_rate": 6.578654824663505e-06, "loss": 0.9212, "step": 51065 }, { "epoch": 15.28, "grad_norm": 2.1776340007781982, "learning_rate": 6.574683384769933e-06, "loss": 1.1221, "step": 51070 }, { "epoch": 15.28, "grad_norm": 2.1266050338745117, "learning_rate": 6.570712962487444e-06, "loss": 0.9498, "step": 51075 }, { "epoch": 15.28, "grad_norm": 1.2182793617248535, "learning_rate": 6.566743558035324e-06, "loss": 0.9176, "step": 51080 }, { "epoch": 15.28, "grad_norm": 11.197833061218262, "learning_rate": 6.562775171632793e-06, "loss": 0.8579, "step": 51085 }, { "epoch": 15.29, "grad_norm": 1.7195172309875488, "learning_rate": 6.558807803499034e-06, "loss": 1.1437, "step": 51090 }, { "epoch": 15.29, "grad_norm": 2.056217670440674, "learning_rate": 6.554841453853139e-06, "loss": 1.1385, "step": 51095 }, { "epoch": 15.29, "grad_norm": 5.98038387298584, "learning_rate": 6.55087612291419e-06, "loss": 0.7964, "step": 51100 }, { "epoch": 15.29, "grad_norm": 5.708714962005615, "learning_rate": 6.54691181090116e-06, "loss": 0.9003, "step": 51105 }, { "epoch": 15.29, "grad_norm": 1.2579269409179688, "learning_rate": 6.542948518033029e-06, "loss": 1.0455, "step": 51110 }, { "epoch": 15.29, "grad_norm": 1.0137869119644165, "learning_rate": 6.538986244528647e-06, "loss": 0.8792, "step": 51115 }, { "epoch": 15.29, "grad_norm": 1.5064419507980347, "learning_rate": 6.535024990606883e-06, "loss": 0.8194, "step": 51120 }, { "epoch": 15.3, "grad_norm": 2.8707802295684814, "learning_rate": 6.531064756486488e-06, "loss": 1.0377, "step": 51125 }, { "epoch": 15.3, "grad_norm": 2.328263521194458, "learning_rate": 6.527105542386189e-06, "loss": 0.9897, "step": 51130 }, { "epoch": 15.3, "grad_norm": 5.99672269821167, "learning_rate": 6.523147348524649e-06, "loss": 1.0021, "step": 51135 }, { "epoch": 15.3, "grad_norm": 1.5641214847564697, "learning_rate": 6.519190175120473e-06, "loss": 1.1588, "step": 51140 }, { "epoch": 15.3, "grad_norm": 6.234478950500488, "learning_rate": 6.515234022392217e-06, "loss": 0.9205, "step": 51145 }, { "epoch": 15.3, "grad_norm": 1.717020869255066, "learning_rate": 6.511278890558373e-06, "loss": 1.0511, "step": 51150 }, { "epoch": 15.3, "grad_norm": 3.1675846576690674, "learning_rate": 6.507324779837376e-06, "loss": 0.8302, "step": 51155 }, { "epoch": 15.31, "grad_norm": 2.2740492820739746, "learning_rate": 6.503371690447607e-06, "loss": 0.8826, "step": 51160 }, { "epoch": 15.31, "grad_norm": 3.2151083946228027, "learning_rate": 6.499419622607397e-06, "loss": 0.9839, "step": 51165 }, { "epoch": 15.31, "grad_norm": 5.2867560386657715, "learning_rate": 6.49546857653501e-06, "loss": 0.9079, "step": 51170 }, { "epoch": 15.31, "grad_norm": 2.4737038612365723, "learning_rate": 6.491518552448658e-06, "loss": 0.989, "step": 51175 }, { "epoch": 15.31, "grad_norm": 2.2063138484954834, "learning_rate": 6.487569550566499e-06, "loss": 0.8596, "step": 51180 }, { "epoch": 15.31, "grad_norm": 3.6992015838623047, "learning_rate": 6.48362157110664e-06, "loss": 1.1441, "step": 51185 }, { "epoch": 15.32, "grad_norm": 4.015367031097412, "learning_rate": 6.479674614287098e-06, "loss": 1.054, "step": 51190 }, { "epoch": 15.32, "grad_norm": 2.584434747695923, "learning_rate": 6.47572868032589e-06, "loss": 1.0146, "step": 51195 }, { "epoch": 15.32, "grad_norm": 2.181087017059326, "learning_rate": 6.47178376944092e-06, "loss": 0.9372, "step": 51200 }, { "epoch": 15.32, "grad_norm": 4.231384754180908, "learning_rate": 6.467839881850085e-06, "loss": 1.0406, "step": 51205 }, { "epoch": 15.32, "grad_norm": 3.6681032180786133, "learning_rate": 6.463897017771178e-06, "loss": 0.9692, "step": 51210 }, { "epoch": 15.32, "grad_norm": 1.9487011432647705, "learning_rate": 6.459955177421986e-06, "loss": 1.0082, "step": 51215 }, { "epoch": 15.32, "grad_norm": 2.813549518585205, "learning_rate": 6.45601436102019e-06, "loss": 1.1155, "step": 51220 }, { "epoch": 15.33, "grad_norm": 1.6334689855575562, "learning_rate": 6.4520745687834476e-06, "loss": 0.9634, "step": 51225 }, { "epoch": 15.33, "grad_norm": 3.823737382888794, "learning_rate": 6.448135800929347e-06, "loss": 1.1307, "step": 51230 }, { "epoch": 15.33, "grad_norm": 1.4004770517349243, "learning_rate": 6.444198057675418e-06, "loss": 1.0298, "step": 51235 }, { "epoch": 15.33, "grad_norm": 6.257365703582764, "learning_rate": 6.440261339239148e-06, "loss": 0.97, "step": 51240 }, { "epoch": 15.33, "grad_norm": 6.089005947113037, "learning_rate": 6.436325645837951e-06, "loss": 0.9796, "step": 51245 }, { "epoch": 15.33, "grad_norm": 4.073474884033203, "learning_rate": 6.432390977689193e-06, "loss": 0.9491, "step": 51250 }, { "epoch": 15.33, "grad_norm": 1.1611909866333008, "learning_rate": 6.4284573350101765e-06, "loss": 1.0043, "step": 51255 }, { "epoch": 15.34, "grad_norm": 1.883181095123291, "learning_rate": 6.424524718018163e-06, "loss": 0.8922, "step": 51260 }, { "epoch": 15.34, "grad_norm": 12.2318754196167, "learning_rate": 6.4205931269303385e-06, "loss": 1.024, "step": 51265 }, { "epoch": 15.34, "grad_norm": 2.295652151107788, "learning_rate": 6.416662561963846e-06, "loss": 0.8952, "step": 51270 }, { "epoch": 15.34, "grad_norm": 6.346905708312988, "learning_rate": 6.412733023335763e-06, "loss": 1.072, "step": 51275 }, { "epoch": 15.34, "grad_norm": 3.8380446434020996, "learning_rate": 6.408804511263119e-06, "loss": 0.9072, "step": 51280 }, { "epoch": 15.34, "grad_norm": 2.404541492462158, "learning_rate": 6.404877025962866e-06, "loss": 1.0707, "step": 51285 }, { "epoch": 15.35, "grad_norm": 2.2299270629882812, "learning_rate": 6.400950567651939e-06, "loss": 0.9094, "step": 51290 }, { "epoch": 15.35, "grad_norm": 4.42881965637207, "learning_rate": 6.397025136547166e-06, "loss": 1.04, "step": 51295 }, { "epoch": 15.35, "grad_norm": 1.782752513885498, "learning_rate": 6.393100732865373e-06, "loss": 0.9773, "step": 51300 }, { "epoch": 15.35, "grad_norm": 3.4827709197998047, "learning_rate": 6.38917735682327e-06, "loss": 0.8695, "step": 51305 }, { "epoch": 15.35, "grad_norm": 2.491976022720337, "learning_rate": 6.385255008637573e-06, "loss": 0.8696, "step": 51310 }, { "epoch": 15.35, "grad_norm": 3.1498947143554688, "learning_rate": 6.381333688524885e-06, "loss": 0.849, "step": 51315 }, { "epoch": 15.35, "grad_norm": 2.436060667037964, "learning_rate": 6.377413396701781e-06, "loss": 0.9044, "step": 51320 }, { "epoch": 15.36, "grad_norm": 2.632699966430664, "learning_rate": 6.373494133384783e-06, "loss": 1.0249, "step": 51325 }, { "epoch": 15.36, "grad_norm": 10.201811790466309, "learning_rate": 6.369575898790334e-06, "loss": 0.9744, "step": 51330 }, { "epoch": 15.36, "grad_norm": 2.500356435775757, "learning_rate": 6.365658693134857e-06, "loss": 1.0102, "step": 51335 }, { "epoch": 15.36, "grad_norm": 2.9083356857299805, "learning_rate": 6.361742516634664e-06, "loss": 1.012, "step": 51340 }, { "epoch": 15.36, "grad_norm": 4.8380022048950195, "learning_rate": 6.357827369506075e-06, "loss": 0.9587, "step": 51345 }, { "epoch": 15.36, "grad_norm": 4.04415225982666, "learning_rate": 6.353913251965296e-06, "loss": 0.7099, "step": 51350 }, { "epoch": 15.36, "grad_norm": 3.18339204788208, "learning_rate": 6.350000164228509e-06, "loss": 1.0013, "step": 51355 }, { "epoch": 15.37, "grad_norm": 1.7995153665542603, "learning_rate": 6.346088106511821e-06, "loss": 0.8609, "step": 51360 }, { "epoch": 15.37, "grad_norm": 5.062925338745117, "learning_rate": 6.3421770790313025e-06, "loss": 0.9407, "step": 51365 }, { "epoch": 15.37, "grad_norm": 2.892843723297119, "learning_rate": 6.338267082002949e-06, "loss": 1.0163, "step": 51370 }, { "epoch": 15.37, "grad_norm": 2.197019577026367, "learning_rate": 6.334358115642708e-06, "loss": 0.9733, "step": 51375 }, { "epoch": 15.37, "grad_norm": 3.871853828430176, "learning_rate": 6.330450180166464e-06, "loss": 1.108, "step": 51380 }, { "epoch": 15.37, "grad_norm": 1.472909927368164, "learning_rate": 6.326543275790059e-06, "loss": 1.0104, "step": 51385 }, { "epoch": 15.38, "grad_norm": 3.942326068878174, "learning_rate": 6.322637402729242e-06, "loss": 1.0015, "step": 51390 }, { "epoch": 15.38, "grad_norm": 7.206786155700684, "learning_rate": 6.318732561199767e-06, "loss": 1.0726, "step": 51395 }, { "epoch": 15.38, "grad_norm": 1.5072174072265625, "learning_rate": 6.314828751417257e-06, "loss": 1.0457, "step": 51400 }, { "epoch": 15.38, "grad_norm": 2.0470387935638428, "learning_rate": 6.3109259735973455e-06, "loss": 1.1581, "step": 51405 }, { "epoch": 15.38, "grad_norm": 5.198572635650635, "learning_rate": 6.3070242279555606e-06, "loss": 1.0023, "step": 51410 }, { "epoch": 15.38, "grad_norm": 3.871081590652466, "learning_rate": 6.303123514707393e-06, "loss": 0.9422, "step": 51415 }, { "epoch": 15.38, "grad_norm": 4.593938827514648, "learning_rate": 6.299223834068288e-06, "loss": 1.1043, "step": 51420 }, { "epoch": 15.39, "grad_norm": 3.9437222480773926, "learning_rate": 6.2953251862536036e-06, "loss": 0.77, "step": 51425 }, { "epoch": 15.39, "grad_norm": 1.9378674030303955, "learning_rate": 6.291427571478678e-06, "loss": 1.0061, "step": 51430 }, { "epoch": 15.39, "grad_norm": 1.423578143119812, "learning_rate": 6.287530989958748e-06, "loss": 0.8501, "step": 51435 }, { "epoch": 15.39, "grad_norm": 7.725799560546875, "learning_rate": 6.283635441909045e-06, "loss": 0.8871, "step": 51440 }, { "epoch": 15.39, "grad_norm": 1.6603666543960571, "learning_rate": 6.2797409275446914e-06, "loss": 0.7305, "step": 51445 }, { "epoch": 15.39, "grad_norm": 1.9601699113845825, "learning_rate": 6.275847447080793e-06, "loss": 1.0657, "step": 51450 }, { "epoch": 15.39, "grad_norm": 3.121793746948242, "learning_rate": 6.271955000732374e-06, "loss": 1.1533, "step": 51455 }, { "epoch": 15.4, "grad_norm": 1.4244256019592285, "learning_rate": 6.268063588714415e-06, "loss": 0.9971, "step": 51460 }, { "epoch": 15.4, "grad_norm": 2.398967981338501, "learning_rate": 6.264173211241833e-06, "loss": 0.9865, "step": 51465 }, { "epoch": 15.4, "grad_norm": 1.3114820718765259, "learning_rate": 6.26028386852949e-06, "loss": 0.9019, "step": 51470 }, { "epoch": 15.4, "grad_norm": 5.757796764373779, "learning_rate": 6.256395560792188e-06, "loss": 1.0239, "step": 51475 }, { "epoch": 15.4, "grad_norm": 1.7129333019256592, "learning_rate": 6.252508288244688e-06, "loss": 1.1315, "step": 51480 }, { "epoch": 15.4, "grad_norm": 3.2258822917938232, "learning_rate": 6.248622051101649e-06, "loss": 1.0204, "step": 51485 }, { "epoch": 15.41, "grad_norm": 2.3875536918640137, "learning_rate": 6.244736849577734e-06, "loss": 0.9069, "step": 51490 }, { "epoch": 15.41, "grad_norm": 2.4159421920776367, "learning_rate": 6.240852683887508e-06, "loss": 0.9534, "step": 51495 }, { "epoch": 15.41, "grad_norm": 3.4338598251342773, "learning_rate": 6.236969554245486e-06, "loss": 1.0511, "step": 51500 }, { "epoch": 15.41, "grad_norm": 1.4087140560150146, "learning_rate": 6.233087460866141e-06, "loss": 0.987, "step": 51505 }, { "epoch": 15.41, "grad_norm": 2.022752285003662, "learning_rate": 6.229206403963852e-06, "loss": 0.9237, "step": 51510 }, { "epoch": 15.41, "grad_norm": 2.3156323432922363, "learning_rate": 6.225326383753e-06, "loss": 0.8778, "step": 51515 }, { "epoch": 15.41, "grad_norm": 2.3037240505218506, "learning_rate": 6.221447400447841e-06, "loss": 1.0857, "step": 51520 }, { "epoch": 15.42, "grad_norm": 3.583475112915039, "learning_rate": 6.2175694542626394e-06, "loss": 1.0723, "step": 51525 }, { "epoch": 15.42, "grad_norm": 4.166855335235596, "learning_rate": 6.213692545411537e-06, "loss": 1.1171, "step": 51530 }, { "epoch": 15.42, "grad_norm": 1.957580327987671, "learning_rate": 6.2098166741086815e-06, "loss": 0.9268, "step": 51535 }, { "epoch": 15.42, "grad_norm": 2.228228807449341, "learning_rate": 6.205941840568114e-06, "loss": 0.9613, "step": 51540 }, { "epoch": 15.42, "grad_norm": 3.6298060417175293, "learning_rate": 6.2020680450038425e-06, "loss": 1.1636, "step": 51545 }, { "epoch": 15.42, "grad_norm": 3.282169818878174, "learning_rate": 6.198195287629813e-06, "loss": 0.898, "step": 51550 }, { "epoch": 15.42, "grad_norm": 15.333877563476562, "learning_rate": 6.194323568659916e-06, "loss": 0.9714, "step": 51555 }, { "epoch": 15.43, "grad_norm": 4.079663276672363, "learning_rate": 6.190452888307979e-06, "loss": 1.148, "step": 51560 }, { "epoch": 15.43, "grad_norm": 3.9293205738067627, "learning_rate": 6.18658324678778e-06, "loss": 0.8797, "step": 51565 }, { "epoch": 15.43, "grad_norm": 3.552079916000366, "learning_rate": 6.1827146443130315e-06, "loss": 1.0032, "step": 51570 }, { "epoch": 15.43, "grad_norm": 1.169809341430664, "learning_rate": 6.1788470810973944e-06, "loss": 0.9592, "step": 51575 }, { "epoch": 15.43, "grad_norm": 1.6891237497329712, "learning_rate": 6.174980557354468e-06, "loss": 1.0002, "step": 51580 }, { "epoch": 15.43, "grad_norm": 1.8492525815963745, "learning_rate": 6.171115073297801e-06, "loss": 0.9641, "step": 51585 }, { "epoch": 15.44, "grad_norm": 5.5648579597473145, "learning_rate": 6.167250629140872e-06, "loss": 0.9558, "step": 51590 }, { "epoch": 15.44, "grad_norm": 2.914482831954956, "learning_rate": 6.16338722509712e-06, "loss": 1.0153, "step": 51595 }, { "epoch": 15.44, "grad_norm": 2.55684757232666, "learning_rate": 6.159524861379909e-06, "loss": 1.0406, "step": 51600 }, { "epoch": 15.44, "grad_norm": 2.4347622394561768, "learning_rate": 6.155663538202558e-06, "loss": 0.9547, "step": 51605 }, { "epoch": 15.44, "grad_norm": 5.680244445800781, "learning_rate": 6.1518032557783265e-06, "loss": 1.0837, "step": 51610 }, { "epoch": 15.44, "grad_norm": 4.704986572265625, "learning_rate": 6.147944014320398e-06, "loss": 1.177, "step": 51615 }, { "epoch": 15.44, "grad_norm": 4.459755897521973, "learning_rate": 6.1440858140419414e-06, "loss": 1.2561, "step": 51620 }, { "epoch": 15.45, "grad_norm": 2.4121649265289307, "learning_rate": 6.140228655156008e-06, "loss": 1.0119, "step": 51625 }, { "epoch": 15.45, "grad_norm": 1.6860464811325073, "learning_rate": 6.136372537875659e-06, "loss": 0.9792, "step": 51630 }, { "epoch": 15.45, "grad_norm": 2.846508741378784, "learning_rate": 6.132517462413839e-06, "loss": 0.8266, "step": 51635 }, { "epoch": 15.45, "grad_norm": 3.4774718284606934, "learning_rate": 6.128663428983469e-06, "loss": 0.9554, "step": 51640 }, { "epoch": 15.45, "grad_norm": 2.086501121520996, "learning_rate": 6.124810437797402e-06, "loss": 0.9001, "step": 51645 }, { "epoch": 15.45, "grad_norm": 2.1175014972686768, "learning_rate": 6.120958489068434e-06, "loss": 0.8511, "step": 51650 }, { "epoch": 15.45, "grad_norm": 2.3501827716827393, "learning_rate": 6.117107583009304e-06, "loss": 1.0259, "step": 51655 }, { "epoch": 15.46, "grad_norm": 2.106353998184204, "learning_rate": 6.113257719832696e-06, "loss": 0.799, "step": 51660 }, { "epoch": 15.46, "grad_norm": 1.536186695098877, "learning_rate": 6.1094088997512315e-06, "loss": 0.9458, "step": 51665 }, { "epoch": 15.46, "grad_norm": 3.473801612854004, "learning_rate": 6.105561122977479e-06, "loss": 0.9084, "step": 51670 }, { "epoch": 15.46, "grad_norm": 2.5647025108337402, "learning_rate": 6.101714389723945e-06, "loss": 0.9893, "step": 51675 }, { "epoch": 15.46, "grad_norm": 5.690295696258545, "learning_rate": 6.097868700203083e-06, "loss": 1.0979, "step": 51680 }, { "epoch": 15.46, "grad_norm": 3.3370258808135986, "learning_rate": 6.094024054627281e-06, "loss": 0.9205, "step": 51685 }, { "epoch": 15.47, "grad_norm": 5.14525032043457, "learning_rate": 6.0901804532088824e-06, "loss": 0.9944, "step": 51690 }, { "epoch": 15.47, "grad_norm": 12.52432632446289, "learning_rate": 6.086337896160163e-06, "loss": 0.9203, "step": 51695 }, { "epoch": 15.47, "grad_norm": 3.637500047683716, "learning_rate": 6.0824963836933396e-06, "loss": 0.9591, "step": 51700 }, { "epoch": 15.47, "grad_norm": 2.3651740550994873, "learning_rate": 6.078655916020584e-06, "loss": 0.8642, "step": 51705 }, { "epoch": 15.47, "grad_norm": 4.2891645431518555, "learning_rate": 6.0748164933539796e-06, "loss": 1.094, "step": 51710 }, { "epoch": 15.47, "grad_norm": 1.5036563873291016, "learning_rate": 6.0709781159056064e-06, "loss": 1.0111, "step": 51715 }, { "epoch": 15.47, "grad_norm": 4.821185111999512, "learning_rate": 6.0671407838874205e-06, "loss": 0.9488, "step": 51720 }, { "epoch": 15.48, "grad_norm": 3.7103047370910645, "learning_rate": 6.063304497511382e-06, "loss": 0.9017, "step": 51725 }, { "epoch": 15.48, "grad_norm": 3.0226051807403564, "learning_rate": 6.059469256989339e-06, "loss": 0.8582, "step": 51730 }, { "epoch": 15.48, "grad_norm": 2.9177422523498535, "learning_rate": 6.055635062533138e-06, "loss": 0.9762, "step": 51735 }, { "epoch": 15.48, "grad_norm": 3.6491503715515137, "learning_rate": 6.051801914354511e-06, "loss": 0.9779, "step": 51740 }, { "epoch": 15.48, "grad_norm": 2.9927515983581543, "learning_rate": 6.047969812665169e-06, "loss": 0.9971, "step": 51745 }, { "epoch": 15.48, "grad_norm": 2.7734007835388184, "learning_rate": 6.044138757676757e-06, "loss": 1.0923, "step": 51750 }, { "epoch": 15.48, "grad_norm": 1.9728281497955322, "learning_rate": 6.040308749600854e-06, "loss": 1.0897, "step": 51755 }, { "epoch": 15.49, "grad_norm": 3.1228339672088623, "learning_rate": 6.036479788648994e-06, "loss": 0.9912, "step": 51760 }, { "epoch": 15.49, "grad_norm": 2.159838914871216, "learning_rate": 6.032651875032644e-06, "loss": 0.957, "step": 51765 }, { "epoch": 15.49, "grad_norm": 4.584501266479492, "learning_rate": 6.028825008963215e-06, "loss": 0.8692, "step": 51770 }, { "epoch": 15.49, "grad_norm": 2.946192979812622, "learning_rate": 6.024999190652058e-06, "loss": 0.9615, "step": 51775 }, { "epoch": 15.49, "grad_norm": 3.8030338287353516, "learning_rate": 6.0211744203104765e-06, "loss": 0.8554, "step": 51780 }, { "epoch": 15.49, "grad_norm": 2.243009090423584, "learning_rate": 6.0173506981497045e-06, "loss": 0.9453, "step": 51785 }, { "epoch": 15.49, "grad_norm": 4.18550968170166, "learning_rate": 6.013528024380921e-06, "loss": 1.0779, "step": 51790 }, { "epoch": 15.5, "grad_norm": 1.270992636680603, "learning_rate": 6.009706399215248e-06, "loss": 0.882, "step": 51795 }, { "epoch": 15.5, "grad_norm": 5.413167953491211, "learning_rate": 6.00588582286376e-06, "loss": 0.9145, "step": 51800 }, { "epoch": 15.5, "grad_norm": 4.414466381072998, "learning_rate": 6.00206629553744e-06, "loss": 0.9951, "step": 51805 }, { "epoch": 15.5, "grad_norm": 1.6211533546447754, "learning_rate": 5.998247817447264e-06, "loss": 0.8533, "step": 51810 }, { "epoch": 15.5, "grad_norm": 2.9947216510772705, "learning_rate": 5.994430388804098e-06, "loss": 1.0242, "step": 51815 }, { "epoch": 15.5, "grad_norm": 1.102436900138855, "learning_rate": 5.9906140098188015e-06, "loss": 0.892, "step": 51820 }, { "epoch": 15.51, "grad_norm": 2.9922842979431152, "learning_rate": 5.986798680702116e-06, "loss": 0.941, "step": 51825 }, { "epoch": 15.51, "grad_norm": 7.5577569007873535, "learning_rate": 5.982984401664793e-06, "loss": 1.0939, "step": 51830 }, { "epoch": 15.51, "grad_norm": 2.929849863052368, "learning_rate": 5.979171172917467e-06, "loss": 0.9213, "step": 51835 }, { "epoch": 15.51, "grad_norm": 3.348611831665039, "learning_rate": 5.9753589946707464e-06, "loss": 1.044, "step": 51840 }, { "epoch": 15.51, "grad_norm": 1.479557752609253, "learning_rate": 5.971547867135174e-06, "loss": 0.8942, "step": 51845 }, { "epoch": 15.51, "grad_norm": 3.3817899227142334, "learning_rate": 5.9677377905212324e-06, "loss": 1.2193, "step": 51850 }, { "epoch": 15.51, "grad_norm": 2.0309934616088867, "learning_rate": 5.9639287650393504e-06, "loss": 1.1913, "step": 51855 }, { "epoch": 15.52, "grad_norm": 1.8293267488479614, "learning_rate": 5.9601207908998956e-06, "loss": 0.7932, "step": 51860 }, { "epoch": 15.52, "grad_norm": 3.6836533546447754, "learning_rate": 5.956313868313179e-06, "loss": 0.978, "step": 51865 }, { "epoch": 15.52, "grad_norm": 4.970479488372803, "learning_rate": 5.952507997489451e-06, "loss": 1.0691, "step": 51870 }, { "epoch": 15.52, "grad_norm": 5.052027702331543, "learning_rate": 5.948703178638909e-06, "loss": 1.1021, "step": 51875 }, { "epoch": 15.52, "grad_norm": 2.7056381702423096, "learning_rate": 5.944899411971688e-06, "loss": 0.8781, "step": 51880 }, { "epoch": 15.52, "grad_norm": 3.194413900375366, "learning_rate": 5.941096697697865e-06, "loss": 0.9483, "step": 51885 }, { "epoch": 15.52, "grad_norm": 3.6588056087493896, "learning_rate": 5.937295036027463e-06, "loss": 0.8384, "step": 51890 }, { "epoch": 15.53, "grad_norm": 1.5710773468017578, "learning_rate": 5.933494427170447e-06, "loss": 0.9579, "step": 51895 }, { "epoch": 15.53, "grad_norm": 1.679604411125183, "learning_rate": 5.9296948713367e-06, "loss": 1.0501, "step": 51900 }, { "epoch": 15.53, "grad_norm": 5.178948879241943, "learning_rate": 5.925896368736098e-06, "loss": 0.8546, "step": 51905 }, { "epoch": 15.53, "grad_norm": 2.5556845664978027, "learning_rate": 5.922098919578398e-06, "loss": 1.0649, "step": 51910 }, { "epoch": 15.53, "grad_norm": 1.5957000255584717, "learning_rate": 5.918302524073361e-06, "loss": 0.964, "step": 51915 }, { "epoch": 15.53, "grad_norm": 2.147024631500244, "learning_rate": 5.9145071824306255e-06, "loss": 1.1415, "step": 51920 }, { "epoch": 15.54, "grad_norm": 2.6177618503570557, "learning_rate": 5.910712894859832e-06, "loss": 0.9363, "step": 51925 }, { "epoch": 15.54, "grad_norm": 2.6957054138183594, "learning_rate": 5.90691966157052e-06, "loss": 0.8722, "step": 51930 }, { "epoch": 15.54, "grad_norm": 3.155423402786255, "learning_rate": 5.903127482772186e-06, "loss": 0.974, "step": 51935 }, { "epoch": 15.54, "grad_norm": 1.4433481693267822, "learning_rate": 5.899336358674273e-06, "loss": 1.0228, "step": 51940 }, { "epoch": 15.54, "grad_norm": 2.683156967163086, "learning_rate": 5.895546289486159e-06, "loss": 1.0334, "step": 51945 }, { "epoch": 15.54, "grad_norm": 1.733872890472412, "learning_rate": 5.891757275417165e-06, "loss": 0.8581, "step": 51950 }, { "epoch": 15.54, "grad_norm": 1.4323784112930298, "learning_rate": 5.8879693166765535e-06, "loss": 0.9999, "step": 51955 }, { "epoch": 15.55, "grad_norm": 3.402912139892578, "learning_rate": 5.8841824134735295e-06, "loss": 0.8503, "step": 51960 }, { "epoch": 15.55, "grad_norm": 2.6373584270477295, "learning_rate": 5.880396566017246e-06, "loss": 0.8351, "step": 51965 }, { "epoch": 15.55, "grad_norm": 1.3156791925430298, "learning_rate": 5.876611774516783e-06, "loss": 0.9677, "step": 51970 }, { "epoch": 15.55, "grad_norm": 2.961254835128784, "learning_rate": 5.872828039181175e-06, "loss": 0.8483, "step": 51975 }, { "epoch": 15.55, "grad_norm": 1.9350943565368652, "learning_rate": 5.869045360219391e-06, "loss": 0.9348, "step": 51980 }, { "epoch": 15.55, "grad_norm": 3.028542995452881, "learning_rate": 5.865263737840349e-06, "loss": 0.9053, "step": 51985 }, { "epoch": 15.55, "grad_norm": 2.8941564559936523, "learning_rate": 5.861483172252907e-06, "loss": 1.0792, "step": 51990 }, { "epoch": 15.56, "grad_norm": 2.4049479961395264, "learning_rate": 5.857703663665839e-06, "loss": 0.8029, "step": 51995 }, { "epoch": 15.56, "grad_norm": 2.2416865825653076, "learning_rate": 5.85392521228792e-06, "loss": 0.8356, "step": 52000 }, { "epoch": 15.56, "grad_norm": 1.7893521785736084, "learning_rate": 5.850147818327792e-06, "loss": 1.0072, "step": 52005 }, { "epoch": 15.56, "grad_norm": 4.932260513305664, "learning_rate": 5.8463714819941116e-06, "loss": 0.962, "step": 52010 }, { "epoch": 15.56, "grad_norm": 1.802653193473816, "learning_rate": 5.84259620349541e-06, "loss": 1.0114, "step": 52015 }, { "epoch": 15.56, "grad_norm": 4.934607982635498, "learning_rate": 5.838821983040221e-06, "loss": 0.9888, "step": 52020 }, { "epoch": 15.57, "grad_norm": 3.02289080619812, "learning_rate": 5.835048820836969e-06, "loss": 0.9526, "step": 52025 }, { "epoch": 15.57, "grad_norm": 2.7780637741088867, "learning_rate": 5.83127671709405e-06, "loss": 0.9735, "step": 52030 }, { "epoch": 15.57, "grad_norm": 3.7103610038757324, "learning_rate": 5.827505672019795e-06, "loss": 0.8817, "step": 52035 }, { "epoch": 15.57, "grad_norm": 1.6637479066848755, "learning_rate": 5.82373568582247e-06, "loss": 0.8395, "step": 52040 }, { "epoch": 15.57, "grad_norm": 4.275225639343262, "learning_rate": 5.819966758710293e-06, "loss": 1.0206, "step": 52045 }, { "epoch": 15.57, "grad_norm": 3.6473262310028076, "learning_rate": 5.816198890891417e-06, "loss": 0.9276, "step": 52050 }, { "epoch": 15.57, "grad_norm": 3.159449815750122, "learning_rate": 5.812432082573932e-06, "loss": 1.243, "step": 52055 }, { "epoch": 15.58, "grad_norm": 3.919372320175171, "learning_rate": 5.8086663339658815e-06, "loss": 1.1114, "step": 52060 }, { "epoch": 15.58, "grad_norm": 3.567432403564453, "learning_rate": 5.80490164527524e-06, "loss": 0.8347, "step": 52065 }, { "epoch": 15.58, "grad_norm": 4.436063289642334, "learning_rate": 5.801138016709928e-06, "loss": 1.0756, "step": 52070 }, { "epoch": 15.58, "grad_norm": 3.9391560554504395, "learning_rate": 5.797375448477807e-06, "loss": 0.925, "step": 52075 }, { "epoch": 15.58, "grad_norm": 4.531538963317871, "learning_rate": 5.793613940786679e-06, "loss": 0.9541, "step": 52080 }, { "epoch": 15.58, "grad_norm": 2.420389413833618, "learning_rate": 5.789853493844294e-06, "loss": 0.8691, "step": 52085 }, { "epoch": 15.58, "grad_norm": 2.241720676422119, "learning_rate": 5.7860941078583284e-06, "loss": 1.0427, "step": 52090 }, { "epoch": 15.59, "grad_norm": 1.725215196609497, "learning_rate": 5.782335783036422e-06, "loss": 1.0402, "step": 52095 }, { "epoch": 15.59, "grad_norm": 2.595808744430542, "learning_rate": 5.778578519586119e-06, "loss": 1.0218, "step": 52100 }, { "epoch": 15.59, "grad_norm": 2.5729830265045166, "learning_rate": 5.774822317714959e-06, "loss": 0.8591, "step": 52105 }, { "epoch": 15.59, "grad_norm": 3.0964512825012207, "learning_rate": 5.771067177630368e-06, "loss": 0.9244, "step": 52110 }, { "epoch": 15.59, "grad_norm": 2.0227348804473877, "learning_rate": 5.767313099539762e-06, "loss": 0.7325, "step": 52115 }, { "epoch": 15.59, "grad_norm": 1.4698666334152222, "learning_rate": 5.763560083650451e-06, "loss": 1.0007, "step": 52120 }, { "epoch": 15.6, "grad_norm": 4.7408857345581055, "learning_rate": 5.7598081301697276e-06, "loss": 1.1846, "step": 52125 }, { "epoch": 15.6, "grad_norm": 1.241419792175293, "learning_rate": 5.756057239304799e-06, "loss": 0.9077, "step": 52130 }, { "epoch": 15.6, "grad_norm": 6.334906578063965, "learning_rate": 5.752307411262825e-06, "loss": 0.9065, "step": 52135 }, { "epoch": 15.6, "grad_norm": 1.5211411714553833, "learning_rate": 5.748558646250909e-06, "loss": 0.9217, "step": 52140 }, { "epoch": 15.6, "grad_norm": 2.1848652362823486, "learning_rate": 5.744810944476079e-06, "loss": 0.9956, "step": 52145 }, { "epoch": 15.6, "grad_norm": 1.960239291191101, "learning_rate": 5.741064306145341e-06, "loss": 1.0003, "step": 52150 }, { "epoch": 15.6, "grad_norm": 2.8279387950897217, "learning_rate": 5.737318731465593e-06, "loss": 1.0655, "step": 52155 }, { "epoch": 15.61, "grad_norm": 1.6879123449325562, "learning_rate": 5.733574220643712e-06, "loss": 0.8975, "step": 52160 }, { "epoch": 15.61, "grad_norm": 1.8145525455474854, "learning_rate": 5.729830773886502e-06, "loss": 0.9761, "step": 52165 }, { "epoch": 15.61, "grad_norm": 2.2954049110412598, "learning_rate": 5.726088391400705e-06, "loss": 1.0937, "step": 52170 }, { "epoch": 15.61, "grad_norm": 3.0802268981933594, "learning_rate": 5.722347073393012e-06, "loss": 1.1665, "step": 52175 }, { "epoch": 15.61, "grad_norm": 1.6516430377960205, "learning_rate": 5.718606820070055e-06, "loss": 0.9886, "step": 52180 }, { "epoch": 15.61, "grad_norm": 2.8192756175994873, "learning_rate": 5.714867631638399e-06, "loss": 0.9826, "step": 52185 }, { "epoch": 15.61, "grad_norm": 4.979455471038818, "learning_rate": 5.7111295083045636e-06, "loss": 0.9313, "step": 52190 }, { "epoch": 15.62, "grad_norm": 2.0236334800720215, "learning_rate": 5.707392450274984e-06, "loss": 0.9639, "step": 52195 }, { "epoch": 15.62, "grad_norm": 1.1548880338668823, "learning_rate": 5.7036564577560794e-06, "loss": 1.1111, "step": 52200 }, { "epoch": 15.62, "grad_norm": 3.311762571334839, "learning_rate": 5.699921530954158e-06, "loss": 1.0053, "step": 52205 }, { "epoch": 15.62, "grad_norm": 2.1778554916381836, "learning_rate": 5.696187670075523e-06, "loss": 1.0448, "step": 52210 }, { "epoch": 15.62, "grad_norm": 6.210904121398926, "learning_rate": 5.6924548753263694e-06, "loss": 0.9885, "step": 52215 }, { "epoch": 15.62, "grad_norm": 3.4226996898651123, "learning_rate": 5.688723146912859e-06, "loss": 1.1471, "step": 52220 }, { "epoch": 15.63, "grad_norm": 3.190202474594116, "learning_rate": 5.684992485041108e-06, "loss": 0.8414, "step": 52225 }, { "epoch": 15.63, "grad_norm": 3.1030070781707764, "learning_rate": 5.681262889917133e-06, "loss": 1.0518, "step": 52230 }, { "epoch": 15.63, "grad_norm": 1.8778613805770874, "learning_rate": 5.677534361746939e-06, "loss": 1.0345, "step": 52235 }, { "epoch": 15.63, "grad_norm": 2.847775459289551, "learning_rate": 5.6738069007364255e-06, "loss": 1.0059, "step": 52240 }, { "epoch": 15.63, "grad_norm": 1.932309627532959, "learning_rate": 5.670080507091483e-06, "loss": 0.9203, "step": 52245 }, { "epoch": 15.63, "grad_norm": 13.216365814208984, "learning_rate": 5.666355181017893e-06, "loss": 0.9181, "step": 52250 }, { "epoch": 15.63, "grad_norm": 1.8943175077438354, "learning_rate": 5.662630922721413e-06, "loss": 0.9876, "step": 52255 }, { "epoch": 15.64, "grad_norm": 1.8065849542617798, "learning_rate": 5.658907732407726e-06, "loss": 0.9735, "step": 52260 }, { "epoch": 15.64, "grad_norm": 1.6062184572219849, "learning_rate": 5.65518561028246e-06, "loss": 0.8603, "step": 52265 }, { "epoch": 15.64, "grad_norm": 1.6171048879623413, "learning_rate": 5.651464556551186e-06, "loss": 1.1217, "step": 52270 }, { "epoch": 15.64, "grad_norm": 2.9715936183929443, "learning_rate": 5.647744571419411e-06, "loss": 1.195, "step": 52275 }, { "epoch": 15.64, "grad_norm": 3.835232734680176, "learning_rate": 5.644025655092591e-06, "loss": 1.1408, "step": 52280 }, { "epoch": 15.64, "grad_norm": 3.526592969894409, "learning_rate": 5.6403078077761186e-06, "loss": 0.8728, "step": 52285 }, { "epoch": 15.64, "grad_norm": 4.8948187828063965, "learning_rate": 5.636591029675311e-06, "loss": 0.8828, "step": 52290 }, { "epoch": 15.65, "grad_norm": 1.3252886533737183, "learning_rate": 5.632875320995468e-06, "loss": 0.9628, "step": 52295 }, { "epoch": 15.65, "grad_norm": 3.719871759414673, "learning_rate": 5.629160681941772e-06, "loss": 0.8964, "step": 52300 }, { "epoch": 15.65, "grad_norm": 2.8799922466278076, "learning_rate": 5.6254471127194064e-06, "loss": 1.073, "step": 52305 }, { "epoch": 15.65, "grad_norm": 3.387589454650879, "learning_rate": 5.621734613533458e-06, "loss": 0.7555, "step": 52310 }, { "epoch": 15.65, "grad_norm": 3.153660535812378, "learning_rate": 5.618023184588964e-06, "loss": 1.031, "step": 52315 }, { "epoch": 15.65, "grad_norm": 2.53116774559021, "learning_rate": 5.6143128260909115e-06, "loss": 0.8774, "step": 52320 }, { "epoch": 15.66, "grad_norm": 2.402709722518921, "learning_rate": 5.610603538244197e-06, "loss": 0.9746, "step": 52325 }, { "epoch": 15.66, "grad_norm": 3.300175666809082, "learning_rate": 5.606895321253705e-06, "loss": 0.9147, "step": 52330 }, { "epoch": 15.66, "grad_norm": 2.9742469787597656, "learning_rate": 5.603188175324217e-06, "loss": 1.1199, "step": 52335 }, { "epoch": 15.66, "grad_norm": 2.81020450592041, "learning_rate": 5.599482100660497e-06, "loss": 0.961, "step": 52340 }, { "epoch": 15.66, "grad_norm": 2.7677392959594727, "learning_rate": 5.595777097467206e-06, "loss": 1.1416, "step": 52345 }, { "epoch": 15.66, "grad_norm": 2.42520809173584, "learning_rate": 5.59207316594898e-06, "loss": 1.1443, "step": 52350 }, { "epoch": 15.66, "grad_norm": 2.994718313217163, "learning_rate": 5.588370306310375e-06, "loss": 1.0766, "step": 52355 }, { "epoch": 15.67, "grad_norm": 2.0844037532806396, "learning_rate": 5.584668518755904e-06, "loss": 0.9827, "step": 52360 }, { "epoch": 15.67, "grad_norm": 2.0058093070983887, "learning_rate": 5.580967803490008e-06, "loss": 0.925, "step": 52365 }, { "epoch": 15.67, "grad_norm": 2.180823564529419, "learning_rate": 5.577268160717078e-06, "loss": 0.9218, "step": 52370 }, { "epoch": 15.67, "grad_norm": 4.118743419647217, "learning_rate": 5.573569590641436e-06, "loss": 0.9062, "step": 52375 }, { "epoch": 15.67, "grad_norm": 2.850475788116455, "learning_rate": 5.569872093467354e-06, "loss": 1.0542, "step": 52380 }, { "epoch": 15.67, "grad_norm": 4.366810321807861, "learning_rate": 5.566175669399037e-06, "loss": 0.9499, "step": 52385 }, { "epoch": 15.67, "grad_norm": 1.8252012729644775, "learning_rate": 5.562480318640642e-06, "loss": 1.0941, "step": 52390 }, { "epoch": 15.68, "grad_norm": 6.694037437438965, "learning_rate": 5.558786041396252e-06, "loss": 1.0343, "step": 52395 }, { "epoch": 15.68, "grad_norm": 3.3669612407684326, "learning_rate": 5.555092837869902e-06, "loss": 1.0908, "step": 52400 }, { "epoch": 15.68, "grad_norm": 2.897421360015869, "learning_rate": 5.551400708265561e-06, "loss": 1.0238, "step": 52405 }, { "epoch": 15.68, "grad_norm": 2.594632387161255, "learning_rate": 5.5477096527871445e-06, "loss": 0.8819, "step": 52410 }, { "epoch": 15.68, "grad_norm": 2.2491109371185303, "learning_rate": 5.544019671638512e-06, "loss": 0.9708, "step": 52415 }, { "epoch": 15.68, "grad_norm": 2.392650604248047, "learning_rate": 5.5403307650234355e-06, "loss": 0.9749, "step": 52420 }, { "epoch": 15.68, "grad_norm": 2.6764276027679443, "learning_rate": 5.536642933145677e-06, "loss": 1.1067, "step": 52425 }, { "epoch": 15.69, "grad_norm": 1.6986552476882935, "learning_rate": 5.532956176208884e-06, "loss": 0.8994, "step": 52430 }, { "epoch": 15.69, "grad_norm": 3.7357470989227295, "learning_rate": 5.5292704944167026e-06, "loss": 0.9956, "step": 52435 }, { "epoch": 15.69, "grad_norm": 2.2799248695373535, "learning_rate": 5.525585887972657e-06, "loss": 1.203, "step": 52440 }, { "epoch": 15.69, "grad_norm": 3.3140718936920166, "learning_rate": 5.521902357080275e-06, "loss": 0.8632, "step": 52445 }, { "epoch": 15.69, "grad_norm": 2.275482416152954, "learning_rate": 5.518219901942972e-06, "loss": 0.9642, "step": 52450 }, { "epoch": 15.69, "grad_norm": 2.353581428527832, "learning_rate": 5.514538522764135e-06, "loss": 1.0428, "step": 52455 }, { "epoch": 15.7, "grad_norm": 3.2124972343444824, "learning_rate": 5.510858219747078e-06, "loss": 0.9647, "step": 52460 }, { "epoch": 15.7, "grad_norm": 2.8147921562194824, "learning_rate": 5.507178993095067e-06, "loss": 1.0069, "step": 52465 }, { "epoch": 15.7, "grad_norm": 2.2177577018737793, "learning_rate": 5.503500843011297e-06, "loss": 1.0085, "step": 52470 }, { "epoch": 15.7, "grad_norm": 2.825120210647583, "learning_rate": 5.4998237696989085e-06, "loss": 1.0708, "step": 52475 }, { "epoch": 15.7, "grad_norm": 3.487179756164551, "learning_rate": 5.496147773360988e-06, "loss": 1.0576, "step": 52480 }, { "epoch": 15.7, "grad_norm": 1.7475980520248413, "learning_rate": 5.492472854200551e-06, "loss": 1.0345, "step": 52485 }, { "epoch": 15.7, "grad_norm": 1.6811771392822266, "learning_rate": 5.488799012420559e-06, "loss": 1.0522, "step": 52490 }, { "epoch": 15.71, "grad_norm": 3.03035044670105, "learning_rate": 5.4851262482239205e-06, "loss": 1.0216, "step": 52495 }, { "epoch": 15.71, "grad_norm": 1.8432042598724365, "learning_rate": 5.481454561813473e-06, "loss": 1.1813, "step": 52500 }, { "epoch": 15.71, "grad_norm": 3.2820653915405273, "learning_rate": 5.477783953392002e-06, "loss": 0.9292, "step": 52505 }, { "epoch": 15.71, "grad_norm": 1.9322882890701294, "learning_rate": 5.474114423162236e-06, "loss": 0.9244, "step": 52510 }, { "epoch": 15.71, "grad_norm": 4.189065933227539, "learning_rate": 5.470445971326821e-06, "loss": 1.1351, "step": 52515 }, { "epoch": 15.71, "grad_norm": 1.8628734350204468, "learning_rate": 5.4667785980883925e-06, "loss": 0.9412, "step": 52520 }, { "epoch": 15.71, "grad_norm": 3.525163412094116, "learning_rate": 5.463112303649462e-06, "loss": 0.8848, "step": 52525 }, { "epoch": 15.72, "grad_norm": 3.689204692840576, "learning_rate": 5.459447088212544e-06, "loss": 0.938, "step": 52530 }, { "epoch": 15.72, "grad_norm": 7.576735019683838, "learning_rate": 5.45578295198004e-06, "loss": 0.6826, "step": 52535 }, { "epoch": 15.72, "grad_norm": 3.6514053344726562, "learning_rate": 5.452119895154343e-06, "loss": 0.8254, "step": 52540 }, { "epoch": 15.72, "grad_norm": 2.51778507232666, "learning_rate": 5.4484579179377385e-06, "loss": 0.9759, "step": 52545 }, { "epoch": 15.72, "grad_norm": 3.0608720779418945, "learning_rate": 5.444797020532477e-06, "loss": 1.0327, "step": 52550 }, { "epoch": 15.72, "grad_norm": 3.0184848308563232, "learning_rate": 5.441137203140753e-06, "loss": 0.9639, "step": 52555 }, { "epoch": 15.73, "grad_norm": 3.508617401123047, "learning_rate": 5.43747846596469e-06, "loss": 0.8989, "step": 52560 }, { "epoch": 15.73, "grad_norm": 2.4190101623535156, "learning_rate": 5.433820809206358e-06, "loss": 1.0506, "step": 52565 }, { "epoch": 15.73, "grad_norm": 3.336304187774658, "learning_rate": 5.430164233067764e-06, "loss": 1.0166, "step": 52570 }, { "epoch": 15.73, "grad_norm": 3.363173246383667, "learning_rate": 5.426508737750857e-06, "loss": 1.0955, "step": 52575 }, { "epoch": 15.73, "grad_norm": 2.3108577728271484, "learning_rate": 5.422854323457527e-06, "loss": 1.0257, "step": 52580 }, { "epoch": 15.73, "grad_norm": 2.772678852081299, "learning_rate": 5.419200990389603e-06, "loss": 0.9683, "step": 52585 }, { "epoch": 15.73, "grad_norm": 2.349119186401367, "learning_rate": 5.415548738748857e-06, "loss": 0.9251, "step": 52590 }, { "epoch": 15.74, "grad_norm": 2.551915407180786, "learning_rate": 5.411897568736995e-06, "loss": 1.0174, "step": 52595 }, { "epoch": 15.74, "grad_norm": 2.7730255126953125, "learning_rate": 5.408247480555673e-06, "loss": 0.9942, "step": 52600 }, { "epoch": 15.74, "grad_norm": 4.1052141189575195, "learning_rate": 5.4045984744064836e-06, "loss": 0.9015, "step": 52605 }, { "epoch": 15.74, "grad_norm": 2.427316665649414, "learning_rate": 5.400950550490935e-06, "loss": 1.129, "step": 52610 }, { "epoch": 15.74, "grad_norm": 2.9681787490844727, "learning_rate": 5.397303709010534e-06, "loss": 1.1242, "step": 52615 }, { "epoch": 15.74, "grad_norm": 2.8961920738220215, "learning_rate": 5.393657950166659e-06, "loss": 0.9106, "step": 52620 }, { "epoch": 15.74, "grad_norm": 3.401855945587158, "learning_rate": 5.390013274160685e-06, "loss": 1.0304, "step": 52625 }, { "epoch": 15.75, "grad_norm": 5.901130676269531, "learning_rate": 5.386369681193884e-06, "loss": 0.8025, "step": 52630 }, { "epoch": 15.75, "grad_norm": 2.0417873859405518, "learning_rate": 5.382727171467514e-06, "loss": 1.0043, "step": 52635 }, { "epoch": 15.75, "grad_norm": 17.817447662353516, "learning_rate": 5.379085745182721e-06, "loss": 0.8185, "step": 52640 }, { "epoch": 15.75, "grad_norm": 1.411085844039917, "learning_rate": 5.37544540254063e-06, "loss": 1.1456, "step": 52645 }, { "epoch": 15.75, "grad_norm": 3.324824810028076, "learning_rate": 5.371806143742289e-06, "loss": 0.9423, "step": 52650 }, { "epoch": 15.75, "grad_norm": 2.4922614097595215, "learning_rate": 5.368167968988691e-06, "loss": 0.9954, "step": 52655 }, { "epoch": 15.76, "grad_norm": 2.6019651889801025, "learning_rate": 5.364530878480772e-06, "loss": 0.9052, "step": 52660 }, { "epoch": 15.76, "grad_norm": 5.232183456420898, "learning_rate": 5.360894872419403e-06, "loss": 0.7985, "step": 52665 }, { "epoch": 15.76, "grad_norm": 4.952908515930176, "learning_rate": 5.357259951005397e-06, "loss": 1.1789, "step": 52670 }, { "epoch": 15.76, "grad_norm": 2.208289861679077, "learning_rate": 5.3536261144395054e-06, "loss": 0.8794, "step": 52675 }, { "epoch": 15.76, "grad_norm": 2.049973487854004, "learning_rate": 5.3499933629224204e-06, "loss": 0.7753, "step": 52680 }, { "epoch": 15.76, "grad_norm": 2.3829286098480225, "learning_rate": 5.346361696654781e-06, "loss": 0.9712, "step": 52685 }, { "epoch": 15.76, "grad_norm": 33.99540328979492, "learning_rate": 5.3427311158371515e-06, "loss": 1.0222, "step": 52690 }, { "epoch": 15.77, "grad_norm": 1.5256500244140625, "learning_rate": 5.339101620670054e-06, "loss": 1.0975, "step": 52695 }, { "epoch": 15.77, "grad_norm": 3.328197717666626, "learning_rate": 5.3354732113539415e-06, "loss": 0.872, "step": 52700 }, { "epoch": 15.77, "grad_norm": 1.9230583906173706, "learning_rate": 5.33184588808919e-06, "loss": 0.9616, "step": 52705 }, { "epoch": 15.77, "grad_norm": 4.239948749542236, "learning_rate": 5.328219651076163e-06, "loss": 0.9001, "step": 52710 }, { "epoch": 15.77, "grad_norm": 3.32629656791687, "learning_rate": 5.324594500515101e-06, "loss": 1.0787, "step": 52715 }, { "epoch": 15.77, "grad_norm": 1.680390477180481, "learning_rate": 5.3209704366062455e-06, "loss": 1.0742, "step": 52720 }, { "epoch": 15.77, "grad_norm": 3.082519769668579, "learning_rate": 5.317347459549726e-06, "loss": 0.8436, "step": 52725 }, { "epoch": 15.78, "grad_norm": 5.2578654289245605, "learning_rate": 5.313725569545661e-06, "loss": 1.0528, "step": 52730 }, { "epoch": 15.78, "grad_norm": 3.4256768226623535, "learning_rate": 5.310104766794064e-06, "loss": 1.0051, "step": 52735 }, { "epoch": 15.78, "grad_norm": 1.8970190286636353, "learning_rate": 5.306485051494911e-06, "loss": 1.1178, "step": 52740 }, { "epoch": 15.78, "grad_norm": 6.223118305206299, "learning_rate": 5.302866423848121e-06, "loss": 1.0483, "step": 52745 }, { "epoch": 15.78, "grad_norm": 3.551870822906494, "learning_rate": 5.2992488840535425e-06, "loss": 0.7908, "step": 52750 }, { "epoch": 15.78, "grad_norm": 2.2895681858062744, "learning_rate": 5.295632432310973e-06, "loss": 0.9522, "step": 52755 }, { "epoch": 15.79, "grad_norm": 3.403578281402588, "learning_rate": 5.2920170688201405e-06, "loss": 0.9253, "step": 52760 }, { "epoch": 15.79, "grad_norm": 4.709850788116455, "learning_rate": 5.2884027937807225e-06, "loss": 0.8115, "step": 52765 }, { "epoch": 15.79, "grad_norm": 2.8941822052001953, "learning_rate": 5.284789607392326e-06, "loss": 1.031, "step": 52770 }, { "epoch": 15.79, "grad_norm": 3.633200168609619, "learning_rate": 5.28117750985451e-06, "loss": 1.0308, "step": 52775 }, { "epoch": 15.79, "grad_norm": 2.2992610931396484, "learning_rate": 5.27756650136676e-06, "loss": 0.7602, "step": 52780 }, { "epoch": 15.79, "grad_norm": 5.884945392608643, "learning_rate": 5.2739565821285145e-06, "loss": 0.8777, "step": 52785 }, { "epoch": 15.79, "grad_norm": 2.312242031097412, "learning_rate": 5.270347752339142e-06, "loss": 0.8445, "step": 52790 }, { "epoch": 15.8, "grad_norm": 3.9331436157226562, "learning_rate": 5.266740012197954e-06, "loss": 1.1452, "step": 52795 }, { "epoch": 15.8, "grad_norm": 1.6924293041229248, "learning_rate": 5.263133361904204e-06, "loss": 0.9707, "step": 52800 }, { "epoch": 15.8, "grad_norm": 1.7212278842926025, "learning_rate": 5.259527801657091e-06, "loss": 1.1103, "step": 52805 }, { "epoch": 15.8, "grad_norm": 2.418543815612793, "learning_rate": 5.255923331655724e-06, "loss": 1.0522, "step": 52810 }, { "epoch": 15.8, "grad_norm": 1.562758445739746, "learning_rate": 5.252319952099202e-06, "loss": 0.9675, "step": 52815 }, { "epoch": 15.8, "grad_norm": 3.524245023727417, "learning_rate": 5.248717663186511e-06, "loss": 1.0859, "step": 52820 }, { "epoch": 15.8, "grad_norm": 3.3297927379608154, "learning_rate": 5.245116465116625e-06, "loss": 0.9522, "step": 52825 }, { "epoch": 15.81, "grad_norm": 4.018186092376709, "learning_rate": 5.241516358088417e-06, "loss": 1.0692, "step": 52830 }, { "epoch": 15.81, "grad_norm": 1.4995861053466797, "learning_rate": 5.237917342300719e-06, "loss": 1.0195, "step": 52835 }, { "epoch": 15.81, "grad_norm": 2.2528059482574463, "learning_rate": 5.234319417952308e-06, "loss": 1.0619, "step": 52840 }, { "epoch": 15.81, "grad_norm": 3.694645881652832, "learning_rate": 5.23072258524189e-06, "loss": 1.0082, "step": 52845 }, { "epoch": 15.81, "grad_norm": 2.391193151473999, "learning_rate": 5.227126844368113e-06, "loss": 1.032, "step": 52850 }, { "epoch": 15.81, "grad_norm": 2.8515758514404297, "learning_rate": 5.223532195529571e-06, "loss": 0.9227, "step": 52855 }, { "epoch": 15.82, "grad_norm": 1.8766295909881592, "learning_rate": 5.2199386389247876e-06, "loss": 0.9314, "step": 52860 }, { "epoch": 15.82, "grad_norm": 1.6634045839309692, "learning_rate": 5.216346174752232e-06, "loss": 0.9464, "step": 52865 }, { "epoch": 15.82, "grad_norm": 2.1022915840148926, "learning_rate": 5.212754803210312e-06, "loss": 0.9514, "step": 52870 }, { "epoch": 15.82, "grad_norm": 2.1778430938720703, "learning_rate": 5.209164524497378e-06, "loss": 0.9898, "step": 52875 }, { "epoch": 15.82, "grad_norm": 1.7852226495742798, "learning_rate": 5.205575338811719e-06, "loss": 0.9918, "step": 52880 }, { "epoch": 15.82, "grad_norm": 2.882033586502075, "learning_rate": 5.201987246351556e-06, "loss": 0.8887, "step": 52885 }, { "epoch": 15.82, "grad_norm": 5.0355939865112305, "learning_rate": 5.198400247315058e-06, "loss": 1.0556, "step": 52890 }, { "epoch": 15.83, "grad_norm": 1.298420786857605, "learning_rate": 5.194814341900331e-06, "loss": 1.1109, "step": 52895 }, { "epoch": 15.83, "grad_norm": 1.944771409034729, "learning_rate": 5.1912295303054305e-06, "loss": 0.9752, "step": 52900 }, { "epoch": 15.83, "grad_norm": 2.584871768951416, "learning_rate": 5.187645812728317e-06, "loss": 0.9854, "step": 52905 }, { "epoch": 15.83, "grad_norm": 2.4717025756835938, "learning_rate": 5.184063189366948e-06, "loss": 1.0902, "step": 52910 }, { "epoch": 15.83, "grad_norm": 1.5633115768432617, "learning_rate": 5.1804816604191565e-06, "loss": 0.9654, "step": 52915 }, { "epoch": 15.83, "grad_norm": 2.15081787109375, "learning_rate": 5.176901226082776e-06, "loss": 0.9748, "step": 52920 }, { "epoch": 15.83, "grad_norm": 1.1734721660614014, "learning_rate": 5.173321886555527e-06, "loss": 1.0343, "step": 52925 }, { "epoch": 15.84, "grad_norm": 3.0947458744049072, "learning_rate": 5.169743642035102e-06, "loss": 0.9797, "step": 52930 }, { "epoch": 15.84, "grad_norm": 5.485124588012695, "learning_rate": 5.166166492719124e-06, "loss": 0.874, "step": 52935 }, { "epoch": 15.84, "grad_norm": 2.063061475753784, "learning_rate": 5.162590438805156e-06, "loss": 0.9525, "step": 52940 }, { "epoch": 15.84, "grad_norm": 1.3920997381210327, "learning_rate": 5.159015480490695e-06, "loss": 1.0252, "step": 52945 }, { "epoch": 15.84, "grad_norm": 1.3368240594863892, "learning_rate": 5.155441617973189e-06, "loss": 1.0572, "step": 52950 }, { "epoch": 15.84, "grad_norm": 3.9613707065582275, "learning_rate": 5.151868851450012e-06, "loss": 0.9808, "step": 52955 }, { "epoch": 15.85, "grad_norm": 3.1625585556030273, "learning_rate": 5.148297181118489e-06, "loss": 0.9818, "step": 52960 }, { "epoch": 15.85, "grad_norm": 3.069246292114258, "learning_rate": 5.1447266071758785e-06, "loss": 0.9461, "step": 52965 }, { "epoch": 15.85, "grad_norm": 3.266118288040161, "learning_rate": 5.14115712981938e-06, "loss": 0.958, "step": 52970 }, { "epoch": 15.85, "grad_norm": 2.903211832046509, "learning_rate": 5.137588749246128e-06, "loss": 0.9459, "step": 52975 }, { "epoch": 15.85, "grad_norm": 4.388901710510254, "learning_rate": 5.134021465653205e-06, "loss": 0.8478, "step": 52980 }, { "epoch": 15.85, "grad_norm": 3.907933473587036, "learning_rate": 5.130455279237625e-06, "loss": 1.1191, "step": 52985 }, { "epoch": 15.85, "grad_norm": 1.9974335432052612, "learning_rate": 5.126890190196348e-06, "loss": 1.1045, "step": 52990 }, { "epoch": 15.86, "grad_norm": 4.463841438293457, "learning_rate": 5.1233261987262775e-06, "loss": 0.9843, "step": 52995 }, { "epoch": 15.86, "grad_norm": 4.327840805053711, "learning_rate": 5.119763305024225e-06, "loss": 0.9954, "step": 53000 }, { "epoch": 15.86, "grad_norm": 1.5290764570236206, "learning_rate": 5.116201509286997e-06, "loss": 1.2544, "step": 53005 }, { "epoch": 15.86, "grad_norm": 2.252065420150757, "learning_rate": 5.1126408117112755e-06, "loss": 0.9876, "step": 53010 }, { "epoch": 15.86, "grad_norm": 3.484567880630493, "learning_rate": 5.109081212493744e-06, "loss": 0.8204, "step": 53015 }, { "epoch": 15.86, "grad_norm": 4.542111873626709, "learning_rate": 5.105522711830968e-06, "loss": 0.9088, "step": 53020 }, { "epoch": 15.86, "grad_norm": 2.7944681644439697, "learning_rate": 5.101965309919507e-06, "loss": 0.7633, "step": 53025 }, { "epoch": 15.87, "grad_norm": 3.740417718887329, "learning_rate": 5.098409006955812e-06, "loss": 0.8264, "step": 53030 }, { "epoch": 15.87, "grad_norm": 2.1556479930877686, "learning_rate": 5.094853803136296e-06, "loss": 1.0365, "step": 53035 }, { "epoch": 15.87, "grad_norm": 3.4103243350982666, "learning_rate": 5.091299698657326e-06, "loss": 1.164, "step": 53040 }, { "epoch": 15.87, "grad_norm": 2.4072625637054443, "learning_rate": 5.087746693715167e-06, "loss": 0.7972, "step": 53045 }, { "epoch": 15.87, "grad_norm": 2.5609822273254395, "learning_rate": 5.084194788506075e-06, "loss": 1.0668, "step": 53050 }, { "epoch": 15.87, "grad_norm": 1.8932149410247803, "learning_rate": 5.0806439832261935e-06, "loss": 0.9741, "step": 53055 }, { "epoch": 15.87, "grad_norm": 2.655104160308838, "learning_rate": 5.077094278071643e-06, "loss": 0.927, "step": 53060 }, { "epoch": 15.88, "grad_norm": 2.491745948791504, "learning_rate": 5.073545673238467e-06, "loss": 0.8797, "step": 53065 }, { "epoch": 15.88, "grad_norm": 2.7281858921051025, "learning_rate": 5.069998168922649e-06, "loss": 0.9462, "step": 53070 }, { "epoch": 15.88, "grad_norm": 3.0326929092407227, "learning_rate": 5.066451765320116e-06, "loss": 0.997, "step": 53075 }, { "epoch": 15.88, "grad_norm": 2.410046339035034, "learning_rate": 5.062906462626732e-06, "loss": 1.0823, "step": 53080 }, { "epoch": 15.88, "grad_norm": 4.804044246673584, "learning_rate": 5.059362261038303e-06, "loss": 1.0974, "step": 53085 }, { "epoch": 15.88, "grad_norm": 22.172578811645508, "learning_rate": 5.0558191607505745e-06, "loss": 0.944, "step": 53090 }, { "epoch": 15.89, "grad_norm": 4.430202007293701, "learning_rate": 5.052277161959209e-06, "loss": 0.9268, "step": 53095 }, { "epoch": 15.89, "grad_norm": 3.1318132877349854, "learning_rate": 5.048736264859854e-06, "loss": 0.8596, "step": 53100 }, { "epoch": 15.89, "grad_norm": 2.395545244216919, "learning_rate": 5.045196469648045e-06, "loss": 0.8246, "step": 53105 }, { "epoch": 15.89, "grad_norm": 1.1723501682281494, "learning_rate": 5.041657776519307e-06, "loss": 1.017, "step": 53110 }, { "epoch": 15.89, "grad_norm": 3.7153756618499756, "learning_rate": 5.038120185669051e-06, "loss": 0.8919, "step": 53115 }, { "epoch": 15.89, "grad_norm": 1.7875972986221313, "learning_rate": 5.034583697292675e-06, "loss": 0.9199, "step": 53120 }, { "epoch": 15.89, "grad_norm": 3.715594530105591, "learning_rate": 5.031048311585493e-06, "loss": 1.0215, "step": 53125 }, { "epoch": 15.9, "grad_norm": 3.1977784633636475, "learning_rate": 5.027514028742744e-06, "loss": 1.1078, "step": 53130 }, { "epoch": 15.9, "grad_norm": 2.141249895095825, "learning_rate": 5.023980848959647e-06, "loss": 1.1468, "step": 53135 }, { "epoch": 15.9, "grad_norm": 1.349517583847046, "learning_rate": 5.020448772431313e-06, "loss": 0.9661, "step": 53140 }, { "epoch": 15.9, "grad_norm": 3.260608673095703, "learning_rate": 5.016917799352838e-06, "loss": 0.877, "step": 53145 }, { "epoch": 15.9, "grad_norm": 3.2229676246643066, "learning_rate": 5.0133879299192085e-06, "loss": 0.9666, "step": 53150 }, { "epoch": 15.9, "grad_norm": 4.101037979125977, "learning_rate": 5.0098591643254e-06, "loss": 1.0229, "step": 53155 }, { "epoch": 15.9, "grad_norm": 2.1944096088409424, "learning_rate": 5.006331502766287e-06, "loss": 1.0606, "step": 53160 }, { "epoch": 15.91, "grad_norm": 7.716952800750732, "learning_rate": 5.002804945436701e-06, "loss": 0.9529, "step": 53165 }, { "epoch": 15.91, "grad_norm": 17.72467803955078, "learning_rate": 4.999279492531414e-06, "loss": 0.7256, "step": 53170 }, { "epoch": 15.91, "grad_norm": 3.899148464202881, "learning_rate": 4.995755144245129e-06, "loss": 1.0014, "step": 53175 }, { "epoch": 15.91, "grad_norm": 5.196731090545654, "learning_rate": 4.992231900772495e-06, "loss": 1.0583, "step": 53180 }, { "epoch": 15.91, "grad_norm": 4.577493190765381, "learning_rate": 4.9887097623081055e-06, "loss": 0.94, "step": 53185 }, { "epoch": 15.91, "grad_norm": 3.10968017578125, "learning_rate": 4.985188729046458e-06, "loss": 1.0676, "step": 53190 }, { "epoch": 15.92, "grad_norm": 1.4285688400268555, "learning_rate": 4.981668801182041e-06, "loss": 1.0163, "step": 53195 }, { "epoch": 15.92, "grad_norm": 2.724886894226074, "learning_rate": 4.97814997890925e-06, "loss": 0.9586, "step": 53200 }, { "epoch": 15.92, "grad_norm": 2.3468189239501953, "learning_rate": 4.974632262422421e-06, "loss": 1.0864, "step": 53205 }, { "epoch": 15.92, "grad_norm": 5.247642517089844, "learning_rate": 4.971115651915839e-06, "loss": 1.1944, "step": 53210 }, { "epoch": 15.92, "grad_norm": 4.25636100769043, "learning_rate": 4.967600147583718e-06, "loss": 0.9301, "step": 53215 }, { "epoch": 15.92, "grad_norm": 3.097277879714966, "learning_rate": 4.964085749620228e-06, "loss": 0.9153, "step": 53220 }, { "epoch": 15.92, "grad_norm": 2.7797505855560303, "learning_rate": 4.960572458219443e-06, "loss": 1.0695, "step": 53225 }, { "epoch": 15.93, "grad_norm": 3.9096181392669678, "learning_rate": 4.957060273575423e-06, "loss": 0.9084, "step": 53230 }, { "epoch": 15.93, "grad_norm": 3.273726463317871, "learning_rate": 4.953549195882115e-06, "loss": 0.9885, "step": 53235 }, { "epoch": 15.93, "grad_norm": 2.705861806869507, "learning_rate": 4.950039225333464e-06, "loss": 0.9194, "step": 53240 }, { "epoch": 15.93, "grad_norm": 4.52183198928833, "learning_rate": 4.946530362123291e-06, "loss": 0.8444, "step": 53245 }, { "epoch": 15.93, "grad_norm": 4.226823329925537, "learning_rate": 4.943022606445416e-06, "loss": 0.965, "step": 53250 }, { "epoch": 15.93, "grad_norm": 2.7318599224090576, "learning_rate": 4.939515958493549e-06, "loss": 0.9715, "step": 53255 }, { "epoch": 15.93, "grad_norm": 3.3146235942840576, "learning_rate": 4.936010418461362e-06, "loss": 0.8742, "step": 53260 }, { "epoch": 15.94, "grad_norm": 9.107588768005371, "learning_rate": 4.932505986542465e-06, "loss": 0.8569, "step": 53265 }, { "epoch": 15.94, "grad_norm": 4.029197692871094, "learning_rate": 4.929002662930404e-06, "loss": 0.9713, "step": 53270 }, { "epoch": 15.94, "grad_norm": 3.5045926570892334, "learning_rate": 4.9255004478186626e-06, "loss": 0.9804, "step": 53275 }, { "epoch": 15.94, "grad_norm": 2.6087844371795654, "learning_rate": 4.921999341400666e-06, "loss": 1.0268, "step": 53280 }, { "epoch": 15.94, "grad_norm": 1.471195936203003, "learning_rate": 4.918499343869773e-06, "loss": 0.8482, "step": 53285 }, { "epoch": 15.94, "grad_norm": 2.6033759117126465, "learning_rate": 4.915000455419289e-06, "loss": 1.0013, "step": 53290 }, { "epoch": 15.95, "grad_norm": 2.508314609527588, "learning_rate": 4.911502676242455e-06, "loss": 0.9304, "step": 53295 }, { "epoch": 15.95, "grad_norm": 3.0417144298553467, "learning_rate": 4.908006006532445e-06, "loss": 0.8015, "step": 53300 }, { "epoch": 15.95, "grad_norm": 2.4495913982391357, "learning_rate": 4.9045104464823795e-06, "loss": 1.0227, "step": 53305 }, { "epoch": 15.95, "grad_norm": 5.05440616607666, "learning_rate": 4.901015996285313e-06, "loss": 1.0307, "step": 53310 }, { "epoch": 15.95, "grad_norm": 1.3113963603973389, "learning_rate": 4.897522656134249e-06, "loss": 1.0566, "step": 53315 }, { "epoch": 15.95, "grad_norm": 2.597869634628296, "learning_rate": 4.8940304262221015e-06, "loss": 0.8975, "step": 53320 }, { "epoch": 15.95, "grad_norm": 3.1354362964630127, "learning_rate": 4.890539306741765e-06, "loss": 0.8747, "step": 53325 }, { "epoch": 15.96, "grad_norm": 3.2860591411590576, "learning_rate": 4.887049297886029e-06, "loss": 0.9559, "step": 53330 }, { "epoch": 15.96, "grad_norm": 4.5994062423706055, "learning_rate": 4.883560399847664e-06, "loss": 0.9507, "step": 53335 }, { "epoch": 15.96, "grad_norm": 4.300266742706299, "learning_rate": 4.880072612819336e-06, "loss": 0.9513, "step": 53340 }, { "epoch": 15.96, "grad_norm": 5.296845436096191, "learning_rate": 4.876585936993699e-06, "loss": 1.0441, "step": 53345 }, { "epoch": 15.96, "grad_norm": 2.162454843521118, "learning_rate": 4.8731003725632965e-06, "loss": 1.031, "step": 53350 }, { "epoch": 15.96, "grad_norm": 3.4292469024658203, "learning_rate": 4.869615919720641e-06, "loss": 0.9578, "step": 53355 }, { "epoch": 15.96, "grad_norm": 1.5867362022399902, "learning_rate": 4.866132578658173e-06, "loss": 1.0536, "step": 53360 }, { "epoch": 15.97, "grad_norm": 2.8988234996795654, "learning_rate": 4.862650349568274e-06, "loss": 1.0964, "step": 53365 }, { "epoch": 15.97, "grad_norm": 1.7928255796432495, "learning_rate": 4.859169232643265e-06, "loss": 0.9603, "step": 53370 }, { "epoch": 15.97, "grad_norm": 2.7317888736724854, "learning_rate": 4.855689228075402e-06, "loss": 0.9083, "step": 53375 }, { "epoch": 15.97, "grad_norm": 6.080751419067383, "learning_rate": 4.852210336056887e-06, "loss": 0.9522, "step": 53380 }, { "epoch": 15.97, "grad_norm": 6.8323073387146, "learning_rate": 4.848732556779853e-06, "loss": 1.0334, "step": 53385 }, { "epoch": 15.97, "grad_norm": 2.109210968017578, "learning_rate": 4.84525589043637e-06, "loss": 0.8857, "step": 53390 }, { "epoch": 15.98, "grad_norm": 1.5655369758605957, "learning_rate": 4.84178033721846e-06, "loss": 0.8181, "step": 53395 }, { "epoch": 15.98, "grad_norm": 2.1813764572143555, "learning_rate": 4.838305897318063e-06, "loss": 0.9369, "step": 53400 }, { "epoch": 15.98, "grad_norm": 6.134399890899658, "learning_rate": 4.8348325709270785e-06, "loss": 0.9357, "step": 53405 }, { "epoch": 15.98, "grad_norm": 2.2405998706817627, "learning_rate": 4.831360358237336e-06, "loss": 0.9124, "step": 53410 }, { "epoch": 15.98, "grad_norm": 3.413001775741577, "learning_rate": 4.8278892594405826e-06, "loss": 1.0046, "step": 53415 }, { "epoch": 15.98, "grad_norm": 2.7362916469573975, "learning_rate": 4.82441927472855e-06, "loss": 0.9897, "step": 53420 }, { "epoch": 15.98, "grad_norm": 4.3867340087890625, "learning_rate": 4.820950404292859e-06, "loss": 1.0994, "step": 53425 }, { "epoch": 15.99, "grad_norm": 7.300239086151123, "learning_rate": 4.817482648325114e-06, "loss": 0.9382, "step": 53430 }, { "epoch": 15.99, "grad_norm": 5.3722124099731445, "learning_rate": 4.814016007016811e-06, "loss": 1.0042, "step": 53435 }, { "epoch": 15.99, "grad_norm": 2.9729552268981934, "learning_rate": 4.810550480559434e-06, "loss": 0.8934, "step": 53440 }, { "epoch": 15.99, "grad_norm": 2.5826375484466553, "learning_rate": 4.807086069144364e-06, "loss": 1.0011, "step": 53445 }, { "epoch": 15.99, "grad_norm": 3.0726816654205322, "learning_rate": 4.80362277296294e-06, "loss": 0.9917, "step": 53450 }, { "epoch": 15.99, "grad_norm": 2.546142339706421, "learning_rate": 4.800160592206435e-06, "loss": 0.9182, "step": 53455 }, { "epoch": 15.99, "grad_norm": 1.8220473527908325, "learning_rate": 4.796699527066065e-06, "loss": 0.9448, "step": 53460 }, { "epoch": 16.0, "grad_norm": 2.17934250831604, "learning_rate": 4.79323957773298e-06, "loss": 1.1478, "step": 53465 }, { "epoch": 16.0, "grad_norm": 2.7372896671295166, "learning_rate": 4.789780744398273e-06, "loss": 0.8963, "step": 53470 }, { "epoch": 16.0, "grad_norm": 3.5111589431762695, "learning_rate": 4.786323027252965e-06, "loss": 0.9019, "step": 53475 }, { "epoch": 16.0, "grad_norm": 1.6906156539916992, "learning_rate": 4.782866426488025e-06, "loss": 0.9621, "step": 53480 }, { "epoch": 16.0, "grad_norm": 2.1876065731048584, "learning_rate": 4.779410942294357e-06, "loss": 1.0843, "step": 53485 }, { "epoch": 16.0, "grad_norm": 2.4555323123931885, "learning_rate": 4.7759565748628055e-06, "loss": 0.9055, "step": 53490 }, { "epoch": 16.01, "grad_norm": 3.3051624298095703, "learning_rate": 4.772503324384151e-06, "loss": 0.7355, "step": 53495 }, { "epoch": 16.01, "grad_norm": 1.634621500968933, "learning_rate": 4.769051191049112e-06, "loss": 1.1518, "step": 53500 }, { "epoch": 16.01, "grad_norm": 5.108380317687988, "learning_rate": 4.765600175048355e-06, "loss": 0.7752, "step": 53505 }, { "epoch": 16.01, "grad_norm": 3.3601999282836914, "learning_rate": 4.762150276572452e-06, "loss": 1.0098, "step": 53510 }, { "epoch": 16.01, "grad_norm": 1.3551602363586426, "learning_rate": 4.758701495811968e-06, "loss": 0.9517, "step": 53515 }, { "epoch": 16.01, "grad_norm": 3.776623249053955, "learning_rate": 4.755253832957349e-06, "loss": 0.9535, "step": 53520 }, { "epoch": 16.01, "grad_norm": 1.849770188331604, "learning_rate": 4.751807288199028e-06, "loss": 1.0225, "step": 53525 }, { "epoch": 16.02, "grad_norm": 2.972109794616699, "learning_rate": 4.7483618617273325e-06, "loss": 1.0399, "step": 53530 }, { "epoch": 16.02, "grad_norm": 1.7321369647979736, "learning_rate": 4.744917553732572e-06, "loss": 1.0185, "step": 53535 }, { "epoch": 16.02, "grad_norm": 2.0730857849121094, "learning_rate": 4.741474364404955e-06, "loss": 1.0239, "step": 53540 }, { "epoch": 16.02, "grad_norm": 3.4221105575561523, "learning_rate": 4.738032293934652e-06, "loss": 0.8171, "step": 53545 }, { "epoch": 16.02, "grad_norm": 1.468760371208191, "learning_rate": 4.734591342511765e-06, "loss": 0.9047, "step": 53550 }, { "epoch": 16.02, "grad_norm": 2.599126100540161, "learning_rate": 4.7311515103263315e-06, "loss": 0.9684, "step": 53555 }, { "epoch": 16.02, "grad_norm": 2.2176599502563477, "learning_rate": 4.727712797568335e-06, "loss": 0.9353, "step": 53560 }, { "epoch": 16.03, "grad_norm": 5.144350528717041, "learning_rate": 4.7242752044276865e-06, "loss": 0.9837, "step": 53565 }, { "epoch": 16.03, "grad_norm": 1.4376980066299438, "learning_rate": 4.720838731094243e-06, "loss": 1.0823, "step": 53570 }, { "epoch": 16.03, "grad_norm": 3.1391711235046387, "learning_rate": 4.717403377757798e-06, "loss": 0.7762, "step": 53575 }, { "epoch": 16.03, "grad_norm": 3.4208807945251465, "learning_rate": 4.71396914460808e-06, "loss": 0.8843, "step": 53580 }, { "epoch": 16.03, "grad_norm": 3.4522769451141357, "learning_rate": 4.710536031834761e-06, "loss": 1.1328, "step": 53585 }, { "epoch": 16.03, "grad_norm": 1.8125996589660645, "learning_rate": 4.707104039627447e-06, "loss": 0.9399, "step": 53590 }, { "epoch": 16.04, "grad_norm": 4.709026336669922, "learning_rate": 4.703673168175684e-06, "loss": 0.9658, "step": 53595 }, { "epoch": 16.04, "grad_norm": 1.6594618558883667, "learning_rate": 4.700243417668957e-06, "loss": 1.03, "step": 53600 }, { "epoch": 16.04, "grad_norm": 2.7412757873535156, "learning_rate": 4.696814788296683e-06, "loss": 0.945, "step": 53605 }, { "epoch": 16.04, "grad_norm": 4.177542686462402, "learning_rate": 4.693387280248232e-06, "loss": 0.9507, "step": 53610 }, { "epoch": 16.04, "grad_norm": 1.866605520248413, "learning_rate": 4.68996089371288e-06, "loss": 0.8753, "step": 53615 }, { "epoch": 16.04, "grad_norm": 2.461459159851074, "learning_rate": 4.686535628879893e-06, "loss": 0.8427, "step": 53620 }, { "epoch": 16.04, "grad_norm": 2.5235543251037598, "learning_rate": 4.68311148593841e-06, "loss": 1.0127, "step": 53625 }, { "epoch": 16.05, "grad_norm": 3.0015721321105957, "learning_rate": 4.679688465077578e-06, "loss": 1.0528, "step": 53630 }, { "epoch": 16.05, "grad_norm": 1.9430887699127197, "learning_rate": 4.676266566486423e-06, "loss": 0.9704, "step": 53635 }, { "epoch": 16.05, "grad_norm": 3.479346752166748, "learning_rate": 4.672845790353941e-06, "loss": 0.8397, "step": 53640 }, { "epoch": 16.05, "grad_norm": 5.18800163269043, "learning_rate": 4.669426136869056e-06, "loss": 0.8035, "step": 53645 }, { "epoch": 16.05, "grad_norm": 4.256208419799805, "learning_rate": 4.666007606220635e-06, "loss": 0.922, "step": 53650 }, { "epoch": 16.05, "grad_norm": 3.6765284538269043, "learning_rate": 4.662590198597477e-06, "loss": 0.9406, "step": 53655 }, { "epoch": 16.05, "grad_norm": 1.5843440294265747, "learning_rate": 4.659173914188322e-06, "loss": 1.2731, "step": 53660 }, { "epoch": 16.06, "grad_norm": 2.01247501373291, "learning_rate": 4.655758753181846e-06, "loss": 0.95, "step": 53665 }, { "epoch": 16.06, "grad_norm": 2.1680808067321777, "learning_rate": 4.652344715766671e-06, "loss": 1.0874, "step": 53670 }, { "epoch": 16.06, "grad_norm": 1.926619291305542, "learning_rate": 4.648931802131348e-06, "loss": 0.935, "step": 53675 }, { "epoch": 16.06, "grad_norm": 4.823452472686768, "learning_rate": 4.645520012464366e-06, "loss": 0.7357, "step": 53680 }, { "epoch": 16.06, "grad_norm": 3.968709945678711, "learning_rate": 4.6421093469541545e-06, "loss": 1.1243, "step": 53685 }, { "epoch": 16.06, "grad_norm": 2.4009251594543457, "learning_rate": 4.638699805789088e-06, "loss": 0.9297, "step": 53690 }, { "epoch": 16.06, "grad_norm": 2.19075345993042, "learning_rate": 4.635291389157462e-06, "loss": 0.8262, "step": 53695 }, { "epoch": 16.07, "grad_norm": 5.621512413024902, "learning_rate": 4.631884097247527e-06, "loss": 1.0455, "step": 53700 }, { "epoch": 16.07, "grad_norm": 1.1728134155273438, "learning_rate": 4.628477930247466e-06, "loss": 0.7797, "step": 53705 }, { "epoch": 16.07, "grad_norm": 2.9061529636383057, "learning_rate": 4.6250728883453835e-06, "loss": 0.9716, "step": 53710 }, { "epoch": 16.07, "grad_norm": 2.0948145389556885, "learning_rate": 4.621668971729359e-06, "loss": 0.9648, "step": 53715 }, { "epoch": 16.07, "grad_norm": 1.2624660730361938, "learning_rate": 4.618266180587363e-06, "loss": 1.1212, "step": 53720 }, { "epoch": 16.07, "grad_norm": 1.8535141944885254, "learning_rate": 4.61486451510735e-06, "loss": 0.9889, "step": 53725 }, { "epoch": 16.08, "grad_norm": 2.1382455825805664, "learning_rate": 4.61146397547717e-06, "loss": 0.8759, "step": 53730 }, { "epoch": 16.08, "grad_norm": 2.7965381145477295, "learning_rate": 4.608064561884656e-06, "loss": 0.8569, "step": 53735 }, { "epoch": 16.08, "grad_norm": 3.074979066848755, "learning_rate": 4.6046662745175325e-06, "loss": 1.1403, "step": 53740 }, { "epoch": 16.08, "grad_norm": 2.496306896209717, "learning_rate": 4.601269113563492e-06, "loss": 0.8768, "step": 53745 }, { "epoch": 16.08, "grad_norm": 1.5231736898422241, "learning_rate": 4.597873079210152e-06, "loss": 1.005, "step": 53750 }, { "epoch": 16.08, "grad_norm": 3.4601387977600098, "learning_rate": 4.594478171645078e-06, "loss": 1.0404, "step": 53755 }, { "epoch": 16.08, "grad_norm": 2.981828451156616, "learning_rate": 4.591084391055761e-06, "loss": 1.1838, "step": 53760 }, { "epoch": 16.09, "grad_norm": 1.5750380754470825, "learning_rate": 4.587691737629643e-06, "loss": 1.0259, "step": 53765 }, { "epoch": 16.09, "grad_norm": 1.4507620334625244, "learning_rate": 4.58430021155409e-06, "loss": 0.9411, "step": 53770 }, { "epoch": 16.09, "grad_norm": 1.5799695253372192, "learning_rate": 4.580909813016418e-06, "loss": 1.0206, "step": 53775 }, { "epoch": 16.09, "grad_norm": 2.9777636528015137, "learning_rate": 4.57752054220387e-06, "loss": 0.9011, "step": 53780 }, { "epoch": 16.09, "grad_norm": 2.887299060821533, "learning_rate": 4.574132399303638e-06, "loss": 0.9952, "step": 53785 }, { "epoch": 16.09, "grad_norm": 2.431195020675659, "learning_rate": 4.5707453845028365e-06, "loss": 1.0887, "step": 53790 }, { "epoch": 16.09, "grad_norm": 5.010396480560303, "learning_rate": 4.567359497988538e-06, "loss": 1.1466, "step": 53795 }, { "epoch": 16.1, "grad_norm": 4.0482611656188965, "learning_rate": 4.563974739947738e-06, "loss": 0.7066, "step": 53800 }, { "epoch": 16.1, "grad_norm": 1.8453367948532104, "learning_rate": 4.560591110567361e-06, "loss": 0.9911, "step": 53805 }, { "epoch": 16.1, "grad_norm": 4.799133777618408, "learning_rate": 4.557208610034302e-06, "loss": 0.9197, "step": 53810 }, { "epoch": 16.1, "grad_norm": 2.3460564613342285, "learning_rate": 4.553827238535352e-06, "loss": 1.1286, "step": 53815 }, { "epoch": 16.1, "grad_norm": 2.8893086910247803, "learning_rate": 4.550446996257282e-06, "loss": 0.8731, "step": 53820 }, { "epoch": 16.1, "grad_norm": 3.3876943588256836, "learning_rate": 4.5470678833867575e-06, "loss": 1.0628, "step": 53825 }, { "epoch": 16.11, "grad_norm": 3.1112098693847656, "learning_rate": 4.543689900110423e-06, "loss": 0.7802, "step": 53830 }, { "epoch": 16.11, "grad_norm": 3.280080795288086, "learning_rate": 4.54031304661483e-06, "loss": 0.9001, "step": 53835 }, { "epoch": 16.11, "grad_norm": 2.949982166290283, "learning_rate": 4.536937323086479e-06, "loss": 0.8499, "step": 53840 }, { "epoch": 16.11, "grad_norm": 1.0892285108566284, "learning_rate": 4.53356272971181e-06, "loss": 1.1195, "step": 53845 }, { "epoch": 16.11, "grad_norm": 1.7821409702301025, "learning_rate": 4.53018926667719e-06, "loss": 1.0122, "step": 53850 }, { "epoch": 16.11, "grad_norm": 3.348202705383301, "learning_rate": 4.526816934168954e-06, "loss": 0.9711, "step": 53855 }, { "epoch": 16.11, "grad_norm": 3.279318332672119, "learning_rate": 4.52344573237333e-06, "loss": 0.9613, "step": 53860 }, { "epoch": 16.12, "grad_norm": 1.0132023096084595, "learning_rate": 4.520075661476517e-06, "loss": 0.9816, "step": 53865 }, { "epoch": 16.12, "grad_norm": 2.115264892578125, "learning_rate": 4.516706721664635e-06, "loss": 1.0883, "step": 53870 }, { "epoch": 16.12, "grad_norm": 4.187863826751709, "learning_rate": 4.5133389131237495e-06, "loss": 1.0543, "step": 53875 }, { "epoch": 16.12, "grad_norm": 3.177401065826416, "learning_rate": 4.509972236039861e-06, "loss": 0.8548, "step": 53880 }, { "epoch": 16.12, "grad_norm": 3.7143056392669678, "learning_rate": 4.50660669059891e-06, "loss": 1.0591, "step": 53885 }, { "epoch": 16.12, "grad_norm": 2.0598580837249756, "learning_rate": 4.503242276986769e-06, "loss": 1.0228, "step": 53890 }, { "epoch": 16.12, "grad_norm": 2.2336246967315674, "learning_rate": 4.4998789953892566e-06, "loss": 0.8827, "step": 53895 }, { "epoch": 16.13, "grad_norm": 2.2673490047454834, "learning_rate": 4.496516845992108e-06, "loss": 1.0858, "step": 53900 }, { "epoch": 16.13, "grad_norm": 1.8698546886444092, "learning_rate": 4.493155828981033e-06, "loss": 1.0087, "step": 53905 }, { "epoch": 16.13, "grad_norm": 1.9772906303405762, "learning_rate": 4.489795944541633e-06, "loss": 0.9655, "step": 53910 }, { "epoch": 16.13, "grad_norm": 1.3239150047302246, "learning_rate": 4.486437192859496e-06, "loss": 0.8522, "step": 53915 }, { "epoch": 16.13, "grad_norm": 2.097325086593628, "learning_rate": 4.483079574120097e-06, "loss": 1.1089, "step": 53920 }, { "epoch": 16.13, "grad_norm": 2.80251407623291, "learning_rate": 4.479723088508897e-06, "loss": 0.9511, "step": 53925 }, { "epoch": 16.14, "grad_norm": 2.2428815364837646, "learning_rate": 4.476367736211265e-06, "loss": 0.7491, "step": 53930 }, { "epoch": 16.14, "grad_norm": 4.145894527435303, "learning_rate": 4.4730135174124974e-06, "loss": 1.0072, "step": 53935 }, { "epoch": 16.14, "grad_norm": 1.747147560119629, "learning_rate": 4.469660432297868e-06, "loss": 0.9186, "step": 53940 }, { "epoch": 16.14, "grad_norm": 2.3156204223632812, "learning_rate": 4.466308481052542e-06, "loss": 1.0476, "step": 53945 }, { "epoch": 16.14, "grad_norm": 1.5770155191421509, "learning_rate": 4.4636277365666055e-06, "loss": 1.0194, "step": 53950 }, { "epoch": 16.14, "grad_norm": 5.032956123352051, "learning_rate": 4.460277826752518e-06, "loss": 0.9442, "step": 53955 }, { "epoch": 16.14, "grad_norm": 2.544370651245117, "learning_rate": 4.456929051325948e-06, "loss": 0.9561, "step": 53960 }, { "epoch": 16.15, "grad_norm": 2.7542896270751953, "learning_rate": 4.453581410471821e-06, "loss": 0.9285, "step": 53965 }, { "epoch": 16.15, "grad_norm": 4.545336723327637, "learning_rate": 4.450234904375047e-06, "loss": 0.8249, "step": 53970 }, { "epoch": 16.15, "grad_norm": 6.846313953399658, "learning_rate": 4.446889533220427e-06, "loss": 0.9482, "step": 53975 }, { "epoch": 16.15, "grad_norm": 2.253056764602661, "learning_rate": 4.443545297192747e-06, "loss": 1.0561, "step": 53980 }, { "epoch": 16.15, "grad_norm": 3.439204216003418, "learning_rate": 4.440202196476687e-06, "loss": 0.8451, "step": 53985 }, { "epoch": 16.15, "grad_norm": 2.593958854675293, "learning_rate": 4.43686023125689e-06, "loss": 0.8443, "step": 53990 }, { "epoch": 16.15, "grad_norm": 3.0037314891815186, "learning_rate": 4.433519401717925e-06, "loss": 0.8752, "step": 53995 }, { "epoch": 16.16, "grad_norm": 1.4025530815124512, "learning_rate": 4.43017970804431e-06, "loss": 0.8974, "step": 54000 }, { "epoch": 16.16, "grad_norm": 3.922529935836792, "learning_rate": 4.426841150420485e-06, "loss": 1.1543, "step": 54005 }, { "epoch": 16.16, "grad_norm": 2.439948797225952, "learning_rate": 4.4235037290308425e-06, "loss": 0.8726, "step": 54010 }, { "epoch": 16.16, "grad_norm": 2.4692089557647705, "learning_rate": 4.420167444059698e-06, "loss": 0.8118, "step": 54015 }, { "epoch": 16.16, "grad_norm": 2.5722415447235107, "learning_rate": 4.416832295691314e-06, "loss": 1.0299, "step": 54020 }, { "epoch": 16.16, "grad_norm": 2.7503256797790527, "learning_rate": 4.413498284109888e-06, "loss": 1.172, "step": 54025 }, { "epoch": 16.17, "grad_norm": 3.138927459716797, "learning_rate": 4.410165409499553e-06, "loss": 0.941, "step": 54030 }, { "epoch": 16.17, "grad_norm": 2.402808666229248, "learning_rate": 4.40683367204438e-06, "loss": 0.9587, "step": 54035 }, { "epoch": 16.17, "grad_norm": 3.9878361225128174, "learning_rate": 4.403503071928378e-06, "loss": 0.8567, "step": 54040 }, { "epoch": 16.17, "grad_norm": 1.6606813669204712, "learning_rate": 4.4001736093355005e-06, "loss": 1.0092, "step": 54045 }, { "epoch": 16.17, "grad_norm": 3.9329001903533936, "learning_rate": 4.396845284449608e-06, "loss": 1.008, "step": 54050 }, { "epoch": 16.17, "grad_norm": 5.513614654541016, "learning_rate": 4.393518097454546e-06, "loss": 1.0288, "step": 54055 }, { "epoch": 16.17, "grad_norm": 1.570499062538147, "learning_rate": 4.39019204853405e-06, "loss": 1.0363, "step": 54060 }, { "epoch": 16.18, "grad_norm": 2.9202938079833984, "learning_rate": 4.386867137871839e-06, "loss": 0.8739, "step": 54065 }, { "epoch": 16.18, "grad_norm": 1.5912072658538818, "learning_rate": 4.383543365651513e-06, "loss": 0.9977, "step": 54070 }, { "epoch": 16.18, "grad_norm": 1.2433182001113892, "learning_rate": 4.380220732056673e-06, "loss": 0.9558, "step": 54075 }, { "epoch": 16.18, "grad_norm": 3.465158700942993, "learning_rate": 4.376899237270798e-06, "loss": 0.9604, "step": 54080 }, { "epoch": 16.18, "grad_norm": 4.610307216644287, "learning_rate": 4.373578881477347e-06, "loss": 0.9055, "step": 54085 }, { "epoch": 16.18, "grad_norm": 1.9522101879119873, "learning_rate": 4.370259664859691e-06, "loss": 0.9228, "step": 54090 }, { "epoch": 16.18, "grad_norm": 1.3069242238998413, "learning_rate": 4.3669415876011535e-06, "loss": 1.0042, "step": 54095 }, { "epoch": 16.19, "grad_norm": 2.4760279655456543, "learning_rate": 4.363624649884982e-06, "loss": 1.0021, "step": 54100 }, { "epoch": 16.19, "grad_norm": 1.8461346626281738, "learning_rate": 4.360308851894366e-06, "loss": 1.0572, "step": 54105 }, { "epoch": 16.19, "grad_norm": 3.235544443130493, "learning_rate": 4.35699419381245e-06, "loss": 0.9269, "step": 54110 }, { "epoch": 16.19, "grad_norm": 2.0938470363616943, "learning_rate": 4.353680675822281e-06, "loss": 0.9334, "step": 54115 }, { "epoch": 16.19, "grad_norm": 1.0838735103607178, "learning_rate": 4.350368298106869e-06, "loss": 1.0252, "step": 54120 }, { "epoch": 16.19, "grad_norm": 4.055896282196045, "learning_rate": 4.34705706084915e-06, "loss": 0.9531, "step": 54125 }, { "epoch": 16.2, "grad_norm": 1.868022084236145, "learning_rate": 4.343746964232004e-06, "loss": 1.0052, "step": 54130 }, { "epoch": 16.2, "grad_norm": 2.870447874069214, "learning_rate": 4.340438008438241e-06, "loss": 1.0222, "step": 54135 }, { "epoch": 16.2, "grad_norm": 3.2282488346099854, "learning_rate": 4.3371301936506205e-06, "loss": 1.0202, "step": 54140 }, { "epoch": 16.2, "grad_norm": 2.0114264488220215, "learning_rate": 4.333823520051805e-06, "loss": 1.038, "step": 54145 }, { "epoch": 16.2, "grad_norm": 1.6153552532196045, "learning_rate": 4.330517987824451e-06, "loss": 1.0861, "step": 54150 }, { "epoch": 16.2, "grad_norm": 4.407323360443115, "learning_rate": 4.327213597151092e-06, "loss": 1.1782, "step": 54155 }, { "epoch": 16.2, "grad_norm": 2.3100712299346924, "learning_rate": 4.323910348214247e-06, "loss": 0.7662, "step": 54160 }, { "epoch": 16.21, "grad_norm": 2.331082582473755, "learning_rate": 4.320608241196333e-06, "loss": 0.9865, "step": 54165 }, { "epoch": 16.21, "grad_norm": 2.015458822250366, "learning_rate": 4.317307276279742e-06, "loss": 1.0186, "step": 54170 }, { "epoch": 16.21, "grad_norm": 2.7874059677124023, "learning_rate": 4.3140074536467625e-06, "loss": 1.0223, "step": 54175 }, { "epoch": 16.21, "grad_norm": 1.4887959957122803, "learning_rate": 4.310708773479652e-06, "loss": 1.0826, "step": 54180 }, { "epoch": 16.21, "grad_norm": 3.373190402984619, "learning_rate": 4.307411235960593e-06, "loss": 1.1612, "step": 54185 }, { "epoch": 16.21, "grad_norm": 5.217267990112305, "learning_rate": 4.304114841271692e-06, "loss": 1.0709, "step": 54190 }, { "epoch": 16.21, "grad_norm": 1.9477370977401733, "learning_rate": 4.3008195895950315e-06, "loss": 0.9157, "step": 54195 }, { "epoch": 16.22, "grad_norm": 3.2760956287384033, "learning_rate": 4.297525481112577e-06, "loss": 0.9145, "step": 54200 }, { "epoch": 16.22, "grad_norm": 4.605449199676514, "learning_rate": 4.2942325160062826e-06, "loss": 0.9861, "step": 54205 }, { "epoch": 16.22, "grad_norm": 1.7385135889053345, "learning_rate": 4.290940694457995e-06, "loss": 1.0439, "step": 54210 }, { "epoch": 16.22, "grad_norm": 2.1554768085479736, "learning_rate": 4.287650016649533e-06, "loss": 1.2434, "step": 54215 }, { "epoch": 16.22, "grad_norm": 3.2271509170532227, "learning_rate": 4.284360482762626e-06, "loss": 0.9879, "step": 54220 }, { "epoch": 16.22, "grad_norm": 2.956493616104126, "learning_rate": 4.28107209297896e-06, "loss": 1.2336, "step": 54225 }, { "epoch": 16.22, "grad_norm": 2.8822336196899414, "learning_rate": 4.277784847480145e-06, "loss": 0.9283, "step": 54230 }, { "epoch": 16.23, "grad_norm": 3.9838624000549316, "learning_rate": 4.274498746447739e-06, "loss": 0.999, "step": 54235 }, { "epoch": 16.23, "grad_norm": 1.797528862953186, "learning_rate": 4.271213790063213e-06, "loss": 0.9987, "step": 54240 }, { "epoch": 16.23, "grad_norm": 8.245445251464844, "learning_rate": 4.267929978508014e-06, "loss": 0.8547, "step": 54245 }, { "epoch": 16.23, "grad_norm": 1.7248831987380981, "learning_rate": 4.264647311963482e-06, "loss": 1.2987, "step": 54250 }, { "epoch": 16.23, "grad_norm": 1.602921485900879, "learning_rate": 4.261365790610935e-06, "loss": 1.0701, "step": 54255 }, { "epoch": 16.23, "grad_norm": 1.774390459060669, "learning_rate": 4.258085414631591e-06, "loss": 0.9237, "step": 54260 }, { "epoch": 16.24, "grad_norm": 2.390256881713867, "learning_rate": 4.254806184206634e-06, "loss": 1.0289, "step": 54265 }, { "epoch": 16.24, "grad_norm": 2.603825330734253, "learning_rate": 4.2515280995171715e-06, "loss": 0.9717, "step": 54270 }, { "epoch": 16.24, "grad_norm": 1.769459843635559, "learning_rate": 4.248251160744235e-06, "loss": 1.237, "step": 54275 }, { "epoch": 16.24, "grad_norm": 2.9963274002075195, "learning_rate": 4.2449753680688295e-06, "loss": 0.7686, "step": 54280 }, { "epoch": 16.24, "grad_norm": 1.8369311094284058, "learning_rate": 4.241700721671849e-06, "loss": 1.0841, "step": 54285 }, { "epoch": 16.24, "grad_norm": 1.0869977474212646, "learning_rate": 4.23842722173417e-06, "loss": 0.7985, "step": 54290 }, { "epoch": 16.24, "grad_norm": 2.742431402206421, "learning_rate": 4.235154868436564e-06, "loss": 1.0202, "step": 54295 }, { "epoch": 16.25, "grad_norm": 2.4266834259033203, "learning_rate": 4.231883661959785e-06, "loss": 0.8518, "step": 54300 }, { "epoch": 16.25, "grad_norm": 3.0461745262145996, "learning_rate": 4.228613602484477e-06, "loss": 0.8847, "step": 54305 }, { "epoch": 16.25, "grad_norm": 2.476036787033081, "learning_rate": 4.225344690191247e-06, "loss": 0.8996, "step": 54310 }, { "epoch": 16.25, "grad_norm": 3.7024502754211426, "learning_rate": 4.222076925260637e-06, "loss": 0.9061, "step": 54315 }, { "epoch": 16.25, "grad_norm": 3.387453556060791, "learning_rate": 4.218810307873122e-06, "loss": 1.0476, "step": 54320 }, { "epoch": 16.25, "grad_norm": 4.558333396911621, "learning_rate": 4.215544838209112e-06, "loss": 0.797, "step": 54325 }, { "epoch": 16.25, "grad_norm": 3.4511423110961914, "learning_rate": 4.2122805164489575e-06, "loss": 0.7751, "step": 54330 }, { "epoch": 16.26, "grad_norm": 2.4084949493408203, "learning_rate": 4.209017342772939e-06, "loss": 0.8006, "step": 54335 }, { "epoch": 16.26, "grad_norm": 3.032461643218994, "learning_rate": 4.205755317361293e-06, "loss": 0.9037, "step": 54340 }, { "epoch": 16.26, "grad_norm": 1.9977115392684937, "learning_rate": 4.20249444039415e-06, "loss": 0.9293, "step": 54345 }, { "epoch": 16.26, "grad_norm": 1.7754746675491333, "learning_rate": 4.199234712051628e-06, "loss": 1.0857, "step": 54350 }, { "epoch": 16.26, "grad_norm": 3.022590398788452, "learning_rate": 4.195976132513754e-06, "loss": 0.8392, "step": 54355 }, { "epoch": 16.26, "grad_norm": 3.174025535583496, "learning_rate": 4.192718701960491e-06, "loss": 0.9006, "step": 54360 }, { "epoch": 16.27, "grad_norm": 12.422464370727539, "learning_rate": 4.189462420571752e-06, "loss": 1.0187, "step": 54365 }, { "epoch": 16.27, "grad_norm": 1.5121108293533325, "learning_rate": 4.186207288527361e-06, "loss": 0.9843, "step": 54370 }, { "epoch": 16.27, "grad_norm": 2.849782705307007, "learning_rate": 4.18295330600712e-06, "loss": 1.0467, "step": 54375 }, { "epoch": 16.27, "grad_norm": 3.5961849689483643, "learning_rate": 4.179700473190717e-06, "loss": 0.9365, "step": 54380 }, { "epoch": 16.27, "grad_norm": 2.3234174251556396, "learning_rate": 4.176448790257828e-06, "loss": 0.8933, "step": 54385 }, { "epoch": 16.27, "grad_norm": 1.903601050376892, "learning_rate": 4.173198257388014e-06, "loss": 0.8867, "step": 54390 }, { "epoch": 16.27, "grad_norm": 5.895204067230225, "learning_rate": 4.169948874760823e-06, "loss": 1.0914, "step": 54395 }, { "epoch": 16.28, "grad_norm": 2.9874160289764404, "learning_rate": 4.166700642555702e-06, "loss": 1.0449, "step": 54400 }, { "epoch": 16.28, "grad_norm": 2.241297721862793, "learning_rate": 4.1634535609520465e-06, "loss": 1.0472, "step": 54405 }, { "epoch": 16.28, "grad_norm": 3.7421207427978516, "learning_rate": 4.160207630129192e-06, "loss": 1.1745, "step": 54410 }, { "epoch": 16.28, "grad_norm": 4.566219329833984, "learning_rate": 4.15696285026641e-06, "loss": 1.0118, "step": 54415 }, { "epoch": 16.28, "grad_norm": 1.1942859888076782, "learning_rate": 4.153719221542904e-06, "loss": 1.0707, "step": 54420 }, { "epoch": 16.28, "grad_norm": 3.381263494491577, "learning_rate": 4.150476744137818e-06, "loss": 0.9244, "step": 54425 }, { "epoch": 16.28, "grad_norm": 2.5632987022399902, "learning_rate": 4.147235418230227e-06, "loss": 0.9353, "step": 54430 }, { "epoch": 16.29, "grad_norm": 4.676177978515625, "learning_rate": 4.143995243999152e-06, "loss": 1.0719, "step": 54435 }, { "epoch": 16.29, "grad_norm": 3.268036365509033, "learning_rate": 4.140756221623537e-06, "loss": 0.7861, "step": 54440 }, { "epoch": 16.29, "grad_norm": 1.5229823589324951, "learning_rate": 4.137518351282277e-06, "loss": 0.874, "step": 54445 }, { "epoch": 16.29, "grad_norm": 4.277475833892822, "learning_rate": 4.1342816331541915e-06, "loss": 0.9358, "step": 54450 }, { "epoch": 16.29, "grad_norm": 3.253185510635376, "learning_rate": 4.131046067418043e-06, "loss": 1.0229, "step": 54455 }, { "epoch": 16.29, "grad_norm": 1.8826074600219727, "learning_rate": 4.127811654252531e-06, "loss": 0.8511, "step": 54460 }, { "epoch": 16.3, "grad_norm": 4.963164806365967, "learning_rate": 4.124578393836282e-06, "loss": 1.0294, "step": 54465 }, { "epoch": 16.3, "grad_norm": 4.152764320373535, "learning_rate": 4.121346286347877e-06, "loss": 1.0621, "step": 54470 }, { "epoch": 16.3, "grad_norm": 4.597795009613037, "learning_rate": 4.1181153319658025e-06, "loss": 0.8713, "step": 54475 }, { "epoch": 16.3, "grad_norm": 2.7429773807525635, "learning_rate": 4.1148855308685256e-06, "loss": 0.9384, "step": 54480 }, { "epoch": 16.3, "grad_norm": 3.036653518676758, "learning_rate": 4.111656883234396e-06, "loss": 1.026, "step": 54485 }, { "epoch": 16.3, "grad_norm": 3.560821533203125, "learning_rate": 4.108429389241761e-06, "loss": 0.9886, "step": 54490 }, { "epoch": 16.3, "grad_norm": 3.0521786212921143, "learning_rate": 4.105203049068848e-06, "loss": 1.0105, "step": 54495 }, { "epoch": 16.31, "grad_norm": 2.9421958923339844, "learning_rate": 4.101977862893852e-06, "loss": 0.8541, "step": 54500 }, { "epoch": 16.31, "grad_norm": 2.5392541885375977, "learning_rate": 4.098753830894894e-06, "loss": 1.1314, "step": 54505 }, { "epoch": 16.31, "grad_norm": 2.7192037105560303, "learning_rate": 4.09553095325004e-06, "loss": 1.0789, "step": 54510 }, { "epoch": 16.31, "grad_norm": 1.7262064218521118, "learning_rate": 4.092309230137282e-06, "loss": 0.901, "step": 54515 }, { "epoch": 16.31, "grad_norm": 6.73134708404541, "learning_rate": 4.08908866173455e-06, "loss": 0.8987, "step": 54520 }, { "epoch": 16.31, "grad_norm": 2.3271663188934326, "learning_rate": 4.085869248219715e-06, "loss": 1.0085, "step": 54525 }, { "epoch": 16.31, "grad_norm": 4.21439266204834, "learning_rate": 4.082650989770584e-06, "loss": 0.9719, "step": 54530 }, { "epoch": 16.32, "grad_norm": 3.639162540435791, "learning_rate": 4.079433886564899e-06, "loss": 0.9036, "step": 54535 }, { "epoch": 16.32, "grad_norm": 2.1096203327178955, "learning_rate": 4.0762179387803305e-06, "loss": 1.0381, "step": 54540 }, { "epoch": 16.32, "grad_norm": 4.696827411651611, "learning_rate": 4.073003146594498e-06, "loss": 0.8693, "step": 54545 }, { "epoch": 16.32, "grad_norm": 4.135863304138184, "learning_rate": 4.069789510184951e-06, "loss": 0.7749, "step": 54550 }, { "epoch": 16.32, "grad_norm": 2.406681537628174, "learning_rate": 4.0665770297291714e-06, "loss": 1.0704, "step": 54555 }, { "epoch": 16.32, "grad_norm": 2.2193562984466553, "learning_rate": 4.063365705404584e-06, "loss": 1.1637, "step": 54560 }, { "epoch": 16.33, "grad_norm": 2.6856563091278076, "learning_rate": 4.060155537388552e-06, "loss": 1.0761, "step": 54565 }, { "epoch": 16.33, "grad_norm": 5.369014263153076, "learning_rate": 4.056946525858352e-06, "loss": 1.0351, "step": 54570 }, { "epoch": 16.33, "grad_norm": 2.9913430213928223, "learning_rate": 4.053738670991239e-06, "loss": 1.006, "step": 54575 }, { "epoch": 16.33, "grad_norm": 1.8716338872909546, "learning_rate": 4.0505319729643554e-06, "loss": 0.9395, "step": 54580 }, { "epoch": 16.33, "grad_norm": 1.9372947216033936, "learning_rate": 4.0473264319548256e-06, "loss": 0.8675, "step": 54585 }, { "epoch": 16.33, "grad_norm": 2.8180017471313477, "learning_rate": 4.044122048139673e-06, "loss": 0.9045, "step": 54590 }, { "epoch": 16.33, "grad_norm": 2.123150110244751, "learning_rate": 4.0409188216958775e-06, "loss": 0.8954, "step": 54595 }, { "epoch": 16.34, "grad_norm": 1.7818152904510498, "learning_rate": 4.037716752800347e-06, "loss": 1.0991, "step": 54600 }, { "epoch": 16.34, "grad_norm": 1.8431493043899536, "learning_rate": 4.034515841629932e-06, "loss": 1.1051, "step": 54605 }, { "epoch": 16.34, "grad_norm": 1.9420349597930908, "learning_rate": 4.031316088361417e-06, "loss": 0.7161, "step": 54610 }, { "epoch": 16.34, "grad_norm": 2.2753307819366455, "learning_rate": 4.0281174931715185e-06, "loss": 0.8588, "step": 54615 }, { "epoch": 16.34, "grad_norm": 1.6886123418807983, "learning_rate": 4.024920056236889e-06, "loss": 0.9475, "step": 54620 }, { "epoch": 16.34, "grad_norm": 2.501573324203491, "learning_rate": 4.021723777734124e-06, "loss": 0.935, "step": 54625 }, { "epoch": 16.34, "grad_norm": 2.7062265872955322, "learning_rate": 4.018528657839752e-06, "loss": 1.1356, "step": 54630 }, { "epoch": 16.35, "grad_norm": 1.7512439489364624, "learning_rate": 4.015334696730227e-06, "loss": 1.0477, "step": 54635 }, { "epoch": 16.35, "grad_norm": 3.424471139907837, "learning_rate": 4.012141894581958e-06, "loss": 0.9916, "step": 54640 }, { "epoch": 16.35, "grad_norm": 2.5888991355895996, "learning_rate": 4.008950251571278e-06, "loss": 0.8972, "step": 54645 }, { "epoch": 16.35, "grad_norm": 2.482107639312744, "learning_rate": 4.005759767874453e-06, "loss": 0.9857, "step": 54650 }, { "epoch": 16.35, "grad_norm": 4.3171796798706055, "learning_rate": 4.002570443667697e-06, "loss": 1.0612, "step": 54655 }, { "epoch": 16.35, "grad_norm": 2.3370325565338135, "learning_rate": 3.999382279127153e-06, "loss": 1.032, "step": 54660 }, { "epoch": 16.36, "grad_norm": 2.1718616485595703, "learning_rate": 3.996195274428888e-06, "loss": 1.2058, "step": 54665 }, { "epoch": 16.36, "grad_norm": 4.225613594055176, "learning_rate": 3.993009429748937e-06, "loss": 0.8797, "step": 54670 }, { "epoch": 16.36, "grad_norm": 3.9194295406341553, "learning_rate": 3.989824745263224e-06, "loss": 0.8541, "step": 54675 }, { "epoch": 16.36, "grad_norm": 3.645948886871338, "learning_rate": 3.986641221147666e-06, "loss": 1.0261, "step": 54680 }, { "epoch": 16.36, "grad_norm": 1.8858450651168823, "learning_rate": 3.9834588575780595e-06, "loss": 1.023, "step": 54685 }, { "epoch": 16.36, "grad_norm": 2.733396291732788, "learning_rate": 3.9802776547301884e-06, "loss": 1.2168, "step": 54690 }, { "epoch": 16.36, "grad_norm": 1.9093108177185059, "learning_rate": 3.977097612779723e-06, "loss": 0.9631, "step": 54695 }, { "epoch": 16.37, "grad_norm": 3.2848117351531982, "learning_rate": 3.9739187319023066e-06, "loss": 0.972, "step": 54700 }, { "epoch": 16.37, "grad_norm": 1.4223105907440186, "learning_rate": 3.970741012273507e-06, "loss": 0.9396, "step": 54705 }, { "epoch": 16.37, "grad_norm": 1.8752068281173706, "learning_rate": 3.967564454068817e-06, "loss": 0.8828, "step": 54710 }, { "epoch": 16.37, "grad_norm": 2.8616607189178467, "learning_rate": 3.964389057463683e-06, "loss": 1.0098, "step": 54715 }, { "epoch": 16.37, "grad_norm": 2.8053674697875977, "learning_rate": 3.961214822633477e-06, "loss": 0.9985, "step": 54720 }, { "epoch": 16.37, "grad_norm": 3.857592821121216, "learning_rate": 3.958041749753505e-06, "loss": 0.9801, "step": 54725 }, { "epoch": 16.37, "grad_norm": 1.627759337425232, "learning_rate": 3.954869838999017e-06, "loss": 0.9103, "step": 54730 }, { "epoch": 16.38, "grad_norm": 3.5982964038848877, "learning_rate": 3.951699090545194e-06, "loss": 0.7888, "step": 54735 }, { "epoch": 16.38, "grad_norm": 3.797572135925293, "learning_rate": 3.948529504567147e-06, "loss": 0.9846, "step": 54740 }, { "epoch": 16.38, "grad_norm": 2.0291848182678223, "learning_rate": 3.9453610812399375e-06, "loss": 0.8557, "step": 54745 }, { "epoch": 16.38, "grad_norm": 1.8980909585952759, "learning_rate": 3.9421938207385485e-06, "loss": 1.0815, "step": 54750 }, { "epoch": 16.38, "grad_norm": 3.711595058441162, "learning_rate": 3.939027723237914e-06, "loss": 0.9014, "step": 54755 }, { "epoch": 16.38, "grad_norm": 1.4966007471084595, "learning_rate": 3.935862788912875e-06, "loss": 0.9629, "step": 54760 }, { "epoch": 16.39, "grad_norm": 4.108902931213379, "learning_rate": 3.932699017938251e-06, "loss": 1.071, "step": 54765 }, { "epoch": 16.39, "grad_norm": 2.2687480449676514, "learning_rate": 3.929536410488749e-06, "loss": 0.8898, "step": 54770 }, { "epoch": 16.39, "grad_norm": 1.9844911098480225, "learning_rate": 3.926374966739063e-06, "loss": 0.9253, "step": 54775 }, { "epoch": 16.39, "grad_norm": 1.7941007614135742, "learning_rate": 3.923214686863769e-06, "loss": 0.9916, "step": 54780 }, { "epoch": 16.39, "grad_norm": 3.715179681777954, "learning_rate": 3.920055571037431e-06, "loss": 0.9925, "step": 54785 }, { "epoch": 16.39, "grad_norm": 3.1290266513824463, "learning_rate": 3.916897619434509e-06, "loss": 0.9698, "step": 54790 }, { "epoch": 16.39, "grad_norm": 2.829307794570923, "learning_rate": 3.913740832229415e-06, "loss": 0.9611, "step": 54795 }, { "epoch": 16.4, "grad_norm": 4.010441780090332, "learning_rate": 3.910585209596499e-06, "loss": 0.8892, "step": 54800 }, { "epoch": 16.4, "grad_norm": 5.990212917327881, "learning_rate": 3.907430751710039e-06, "loss": 0.9162, "step": 54805 }, { "epoch": 16.4, "grad_norm": 5.119846820831299, "learning_rate": 3.904277458744254e-06, "loss": 1.0017, "step": 54810 }, { "epoch": 16.4, "grad_norm": 2.6883440017700195, "learning_rate": 3.901125330873298e-06, "loss": 0.9282, "step": 54815 }, { "epoch": 16.4, "grad_norm": 4.281408309936523, "learning_rate": 3.897974368271256e-06, "loss": 0.9491, "step": 54820 }, { "epoch": 16.4, "grad_norm": 1.7802320718765259, "learning_rate": 3.89482457111216e-06, "loss": 1.0643, "step": 54825 }, { "epoch": 16.4, "grad_norm": 1.374393343925476, "learning_rate": 3.891675939569961e-06, "loss": 1.0454, "step": 54830 }, { "epoch": 16.41, "grad_norm": 1.9680904150009155, "learning_rate": 3.888528473818562e-06, "loss": 0.8646, "step": 54835 }, { "epoch": 16.41, "grad_norm": 3.9535000324249268, "learning_rate": 3.88538217403179e-06, "loss": 0.914, "step": 54840 }, { "epoch": 16.41, "grad_norm": 4.569047451019287, "learning_rate": 3.882237040383413e-06, "loss": 0.8554, "step": 54845 }, { "epoch": 16.41, "grad_norm": 2.8582353591918945, "learning_rate": 3.879093073047141e-06, "loss": 1.1247, "step": 54850 }, { "epoch": 16.41, "grad_norm": 3.682445764541626, "learning_rate": 3.87595027219659e-06, "loss": 0.9073, "step": 54855 }, { "epoch": 16.41, "grad_norm": 2.520266056060791, "learning_rate": 3.872808638005362e-06, "loss": 0.8065, "step": 54860 }, { "epoch": 16.41, "grad_norm": 4.994065284729004, "learning_rate": 3.8696681706469425e-06, "loss": 0.9488, "step": 54865 }, { "epoch": 16.42, "grad_norm": 2.4194717407226562, "learning_rate": 3.866528870294795e-06, "loss": 0.8722, "step": 54870 }, { "epoch": 16.42, "grad_norm": 3.143922805786133, "learning_rate": 3.86339073712228e-06, "loss": 1.0721, "step": 54875 }, { "epoch": 16.42, "grad_norm": 3.6272706985473633, "learning_rate": 3.860253771302736e-06, "loss": 1.1331, "step": 54880 }, { "epoch": 16.42, "grad_norm": 2.595782518386841, "learning_rate": 3.857117973009397e-06, "loss": 0.7396, "step": 54885 }, { "epoch": 16.42, "grad_norm": 2.1412410736083984, "learning_rate": 3.853983342415454e-06, "loss": 0.8936, "step": 54890 }, { "epoch": 16.42, "grad_norm": 2.1500375270843506, "learning_rate": 3.850849879694032e-06, "loss": 0.9859, "step": 54895 }, { "epoch": 16.43, "grad_norm": 2.9890248775482178, "learning_rate": 3.84771758501819e-06, "loss": 1.1094, "step": 54900 }, { "epoch": 16.43, "grad_norm": 3.5275800228118896, "learning_rate": 3.844586458560917e-06, "loss": 1.0972, "step": 54905 }, { "epoch": 16.43, "grad_norm": 3.5730512142181396, "learning_rate": 3.841456500495144e-06, "loss": 0.9291, "step": 54910 }, { "epoch": 16.43, "grad_norm": 2.170400619506836, "learning_rate": 3.838327710993736e-06, "loss": 0.9088, "step": 54915 }, { "epoch": 16.43, "grad_norm": 3.1143369674682617, "learning_rate": 3.835200090229493e-06, "loss": 0.9509, "step": 54920 }, { "epoch": 16.43, "grad_norm": 2.1064987182617188, "learning_rate": 3.832073638375147e-06, "loss": 1.1217, "step": 54925 }, { "epoch": 16.43, "grad_norm": 1.3867061138153076, "learning_rate": 3.828948355603374e-06, "loss": 0.9994, "step": 54930 }, { "epoch": 16.44, "grad_norm": 3.5733249187469482, "learning_rate": 3.8258242420867755e-06, "loss": 0.9443, "step": 54935 }, { "epoch": 16.44, "grad_norm": 4.986148357391357, "learning_rate": 3.8227012979978935e-06, "loss": 1.0024, "step": 54940 }, { "epoch": 16.44, "grad_norm": 3.2679810523986816, "learning_rate": 3.819579523509214e-06, "loss": 0.8478, "step": 54945 }, { "epoch": 16.44, "grad_norm": 1.716213345527649, "learning_rate": 3.816458918793131e-06, "loss": 0.7901, "step": 54950 }, { "epoch": 16.44, "grad_norm": 2.6446051597595215, "learning_rate": 3.813339484022013e-06, "loss": 0.9754, "step": 54955 }, { "epoch": 16.44, "grad_norm": 3.2618935108184814, "learning_rate": 3.810221219368121e-06, "loss": 1.1121, "step": 54960 }, { "epoch": 16.44, "grad_norm": 2.8837664127349854, "learning_rate": 3.8071041250036964e-06, "loss": 0.9444, "step": 54965 }, { "epoch": 16.45, "grad_norm": 3.107837438583374, "learning_rate": 3.803988201100872e-06, "loss": 1.2149, "step": 54970 }, { "epoch": 16.45, "grad_norm": 1.7038583755493164, "learning_rate": 3.80087344783176e-06, "loss": 0.9996, "step": 54975 }, { "epoch": 16.45, "grad_norm": 4.2032365798950195, "learning_rate": 3.797759865368364e-06, "loss": 0.9182, "step": 54980 }, { "epoch": 16.45, "grad_norm": 3.1628682613372803, "learning_rate": 3.794647453882652e-06, "loss": 0.9363, "step": 54985 }, { "epoch": 16.45, "grad_norm": 2.4282500743865967, "learning_rate": 3.7915362135465205e-06, "loss": 1.1558, "step": 54990 }, { "epoch": 16.45, "grad_norm": 3.435473918914795, "learning_rate": 3.7884261445318007e-06, "loss": 0.959, "step": 54995 }, { "epoch": 16.46, "grad_norm": 3.692786455154419, "learning_rate": 3.7853172470102533e-06, "loss": 0.9526, "step": 55000 }, { "epoch": 16.46, "grad_norm": 1.5530604124069214, "learning_rate": 3.782209521153579e-06, "loss": 0.9287, "step": 55005 }, { "epoch": 16.46, "grad_norm": 2.799910306930542, "learning_rate": 3.77910296713343e-06, "loss": 1.0503, "step": 55010 }, { "epoch": 16.46, "grad_norm": 1.5998826026916504, "learning_rate": 3.7759975851213603e-06, "loss": 0.8919, "step": 55015 }, { "epoch": 16.46, "grad_norm": 1.571890115737915, "learning_rate": 3.772893375288883e-06, "loss": 0.9862, "step": 55020 }, { "epoch": 16.46, "grad_norm": 4.059834003448486, "learning_rate": 3.7697903378074412e-06, "loss": 1.0683, "step": 55025 }, { "epoch": 16.46, "grad_norm": 1.3430641889572144, "learning_rate": 3.7666884728484094e-06, "loss": 1.0935, "step": 55030 }, { "epoch": 16.47, "grad_norm": 3.6715023517608643, "learning_rate": 3.7635877805831026e-06, "loss": 0.9086, "step": 55035 }, { "epoch": 16.47, "grad_norm": 2.4204914569854736, "learning_rate": 3.760488261182768e-06, "loss": 1.0692, "step": 55040 }, { "epoch": 16.47, "grad_norm": 6.810561180114746, "learning_rate": 3.7573899148185932e-06, "loss": 1.2002, "step": 55045 }, { "epoch": 16.47, "grad_norm": 3.171234607696533, "learning_rate": 3.754292741661694e-06, "loss": 1.0991, "step": 55050 }, { "epoch": 16.47, "grad_norm": 3.974795341491699, "learning_rate": 3.7511967418831145e-06, "loss": 1.072, "step": 55055 }, { "epoch": 16.47, "grad_norm": 1.6691629886627197, "learning_rate": 3.748101915653862e-06, "loss": 0.9038, "step": 55060 }, { "epoch": 16.47, "grad_norm": 2.60886287689209, "learning_rate": 3.745008263144842e-06, "loss": 0.9588, "step": 55065 }, { "epoch": 16.48, "grad_norm": 1.6360840797424316, "learning_rate": 3.741915784526931e-06, "loss": 0.9272, "step": 55070 }, { "epoch": 16.48, "grad_norm": 1.671827793121338, "learning_rate": 3.7388244799709095e-06, "loss": 1.0059, "step": 55075 }, { "epoch": 16.48, "grad_norm": 1.6585466861724854, "learning_rate": 3.735734349647507e-06, "loss": 1.0757, "step": 55080 }, { "epoch": 16.48, "grad_norm": 2.521653890609741, "learning_rate": 3.7326453937274047e-06, "loss": 1.0236, "step": 55085 }, { "epoch": 16.48, "grad_norm": 2.377997636795044, "learning_rate": 3.7295576123811795e-06, "loss": 0.9756, "step": 55090 }, { "epoch": 16.48, "grad_norm": 2.9460082054138184, "learning_rate": 3.726471005779389e-06, "loss": 1.1004, "step": 55095 }, { "epoch": 16.49, "grad_norm": 3.480149507522583, "learning_rate": 3.7233855740924805e-06, "loss": 0.8099, "step": 55100 }, { "epoch": 16.49, "grad_norm": 1.956493616104126, "learning_rate": 3.7203013174908816e-06, "loss": 0.8011, "step": 55105 }, { "epoch": 16.49, "grad_norm": 3.8703737258911133, "learning_rate": 3.7172182361449174e-06, "loss": 1.013, "step": 55110 }, { "epoch": 16.49, "grad_norm": 1.4395601749420166, "learning_rate": 3.714136330224868e-06, "loss": 0.9137, "step": 55115 }, { "epoch": 16.49, "grad_norm": 0.9838583469390869, "learning_rate": 3.711055599900945e-06, "loss": 0.941, "step": 55120 }, { "epoch": 16.49, "grad_norm": 2.752509355545044, "learning_rate": 3.7079760453432895e-06, "loss": 0.9234, "step": 55125 }, { "epoch": 16.49, "grad_norm": 3.263653516769409, "learning_rate": 3.7048976667219877e-06, "loss": 0.974, "step": 55130 }, { "epoch": 16.5, "grad_norm": 3.148515462875366, "learning_rate": 3.701820464207054e-06, "loss": 0.8743, "step": 55135 }, { "epoch": 16.5, "grad_norm": 4.454446315765381, "learning_rate": 3.6987444379684348e-06, "loss": 1.0195, "step": 55140 }, { "epoch": 16.5, "grad_norm": 3.798888921737671, "learning_rate": 3.695669588176026e-06, "loss": 0.9871, "step": 55145 }, { "epoch": 16.5, "grad_norm": 1.551338791847229, "learning_rate": 3.6925959149996274e-06, "loss": 0.9701, "step": 55150 }, { "epoch": 16.5, "grad_norm": 4.081212997436523, "learning_rate": 3.689523418609017e-06, "loss": 1.0122, "step": 55155 }, { "epoch": 16.5, "grad_norm": 2.6100282669067383, "learning_rate": 3.686452099173876e-06, "loss": 0.8688, "step": 55160 }, { "epoch": 16.5, "grad_norm": 3.0267417430877686, "learning_rate": 3.683381956863832e-06, "loss": 0.9878, "step": 55165 }, { "epoch": 16.51, "grad_norm": 3.891383409500122, "learning_rate": 3.680312991848445e-06, "loss": 1.0131, "step": 55170 }, { "epoch": 16.51, "grad_norm": 2.7012784481048584, "learning_rate": 3.677245204297211e-06, "loss": 1.1283, "step": 55175 }, { "epoch": 16.51, "grad_norm": 1.8227554559707642, "learning_rate": 3.674178594379565e-06, "loss": 1.0563, "step": 55180 }, { "epoch": 16.51, "grad_norm": 3.7222025394439697, "learning_rate": 3.671113162264861e-06, "loss": 0.9301, "step": 55185 }, { "epoch": 16.51, "grad_norm": 2.2444727420806885, "learning_rate": 3.668048908122415e-06, "loss": 0.954, "step": 55190 }, { "epoch": 16.51, "grad_norm": 3.4596662521362305, "learning_rate": 3.6649858321214424e-06, "loss": 0.9375, "step": 55195 }, { "epoch": 16.52, "grad_norm": 4.218637466430664, "learning_rate": 3.6619239344311383e-06, "loss": 1.0127, "step": 55200 }, { "epoch": 16.52, "grad_norm": 2.6548755168914795, "learning_rate": 3.6588632152205897e-06, "loss": 0.9714, "step": 55205 }, { "epoch": 16.52, "grad_norm": 2.7811079025268555, "learning_rate": 3.655803674658842e-06, "loss": 0.8688, "step": 55210 }, { "epoch": 16.52, "grad_norm": 1.8681191205978394, "learning_rate": 3.6527453129148738e-06, "loss": 1.0711, "step": 55215 }, { "epoch": 16.52, "grad_norm": 1.8448801040649414, "learning_rate": 3.6496881301575887e-06, "loss": 0.9248, "step": 55220 }, { "epoch": 16.52, "grad_norm": 3.9367079734802246, "learning_rate": 3.6466321265558406e-06, "loss": 0.994, "step": 55225 }, { "epoch": 16.52, "grad_norm": 3.2302794456481934, "learning_rate": 3.6435773022784004e-06, "loss": 0.8619, "step": 55230 }, { "epoch": 16.53, "grad_norm": 4.870388984680176, "learning_rate": 3.640523657493991e-06, "loss": 0.9849, "step": 55235 }, { "epoch": 16.53, "grad_norm": 1.335530400276184, "learning_rate": 3.637471192371256e-06, "loss": 1.0656, "step": 55240 }, { "epoch": 16.53, "grad_norm": 7.603557586669922, "learning_rate": 3.6344199070787815e-06, "loss": 1.1671, "step": 55245 }, { "epoch": 16.53, "grad_norm": 2.9642961025238037, "learning_rate": 3.6313698017850866e-06, "loss": 1.0257, "step": 55250 }, { "epoch": 16.53, "grad_norm": 2.462388277053833, "learning_rate": 3.628320876658628e-06, "loss": 0.8544, "step": 55255 }, { "epoch": 16.53, "grad_norm": 1.824250340461731, "learning_rate": 3.6252731318677902e-06, "loss": 1.0885, "step": 55260 }, { "epoch": 16.53, "grad_norm": 6.232416152954102, "learning_rate": 3.6222265675809025e-06, "loss": 0.9456, "step": 55265 }, { "epoch": 16.54, "grad_norm": 3.9859626293182373, "learning_rate": 3.6191811839662194e-06, "loss": 0.7835, "step": 55270 }, { "epoch": 16.54, "grad_norm": 3.1337802410125732, "learning_rate": 3.616136981191939e-06, "loss": 1.0781, "step": 55275 }, { "epoch": 16.54, "grad_norm": 2.0644679069519043, "learning_rate": 3.6130939594261783e-06, "loss": 0.9863, "step": 55280 }, { "epoch": 16.54, "grad_norm": 4.25192403793335, "learning_rate": 3.610052118837015e-06, "loss": 0.8807, "step": 55285 }, { "epoch": 16.54, "grad_norm": 1.9776525497436523, "learning_rate": 3.6070114595924327e-06, "loss": 0.9195, "step": 55290 }, { "epoch": 16.54, "grad_norm": 1.4639649391174316, "learning_rate": 3.6039719818603797e-06, "loss": 1.0648, "step": 55295 }, { "epoch": 16.55, "grad_norm": 1.7178945541381836, "learning_rate": 3.600933685808708e-06, "loss": 0.921, "step": 55300 }, { "epoch": 16.55, "grad_norm": 2.7617597579956055, "learning_rate": 3.5978965716052248e-06, "loss": 0.9255, "step": 55305 }, { "epoch": 16.55, "grad_norm": 1.6579577922821045, "learning_rate": 3.5948606394176687e-06, "loss": 0.8896, "step": 55310 }, { "epoch": 16.55, "grad_norm": 5.555087089538574, "learning_rate": 3.5918258894137107e-06, "loss": 0.7572, "step": 55315 }, { "epoch": 16.55, "grad_norm": 3.7583398818969727, "learning_rate": 3.5887923217609586e-06, "loss": 0.8979, "step": 55320 }, { "epoch": 16.55, "grad_norm": 2.396233081817627, "learning_rate": 3.585759936626948e-06, "loss": 0.9145, "step": 55325 }, { "epoch": 16.55, "grad_norm": 2.1706974506378174, "learning_rate": 3.5827287341791583e-06, "loss": 0.9833, "step": 55330 }, { "epoch": 16.56, "grad_norm": 21.9448184967041, "learning_rate": 3.5796987145849974e-06, "loss": 0.9713, "step": 55335 }, { "epoch": 16.56, "grad_norm": 1.5266534090042114, "learning_rate": 3.5766698780118125e-06, "loss": 0.9322, "step": 55340 }, { "epoch": 16.56, "grad_norm": 3.1922240257263184, "learning_rate": 3.5736422246268804e-06, "loss": 0.9115, "step": 55345 }, { "epoch": 16.56, "grad_norm": 1.6911910772323608, "learning_rate": 3.5706157545974174e-06, "loss": 1.0724, "step": 55350 }, { "epoch": 16.56, "grad_norm": 1.6051517724990845, "learning_rate": 3.5675904680905704e-06, "loss": 0.9671, "step": 55355 }, { "epoch": 16.56, "grad_norm": 3.0182387828826904, "learning_rate": 3.5645663652734277e-06, "loss": 0.9052, "step": 55360 }, { "epoch": 16.56, "grad_norm": 2.883270740509033, "learning_rate": 3.561543446313001e-06, "loss": 1.1312, "step": 55365 }, { "epoch": 16.57, "grad_norm": 5.493057727813721, "learning_rate": 3.5585217113762533e-06, "loss": 0.889, "step": 55370 }, { "epoch": 16.57, "grad_norm": 2.2365972995758057, "learning_rate": 3.5555011606300507e-06, "loss": 0.9428, "step": 55375 }, { "epoch": 16.57, "grad_norm": 2.719250440597534, "learning_rate": 3.5524817942412414e-06, "loss": 0.9899, "step": 55380 }, { "epoch": 16.57, "grad_norm": 1.566593050956726, "learning_rate": 3.549463612376558e-06, "loss": 0.8925, "step": 55385 }, { "epoch": 16.57, "grad_norm": 3.2841618061065674, "learning_rate": 3.5464466152027113e-06, "loss": 1.0915, "step": 55390 }, { "epoch": 16.57, "grad_norm": 3.8649485111236572, "learning_rate": 3.5434308028863107e-06, "loss": 0.9006, "step": 55395 }, { "epoch": 16.58, "grad_norm": 3.333514928817749, "learning_rate": 3.540416175593933e-06, "loss": 1.0511, "step": 55400 }, { "epoch": 16.58, "grad_norm": 3.3573429584503174, "learning_rate": 3.5374027334920595e-06, "loss": 1.0593, "step": 55405 }, { "epoch": 16.58, "grad_norm": 3.672663450241089, "learning_rate": 3.5343904767471235e-06, "loss": 0.8411, "step": 55410 }, { "epoch": 16.58, "grad_norm": 3.1080899238586426, "learning_rate": 3.5313794055254894e-06, "loss": 0.9778, "step": 55415 }, { "epoch": 16.58, "grad_norm": 3.76132869720459, "learning_rate": 3.5283695199934545e-06, "loss": 1.039, "step": 55420 }, { "epoch": 16.58, "grad_norm": 1.2220946550369263, "learning_rate": 3.5253608203172527e-06, "loss": 0.9653, "step": 55425 }, { "epoch": 16.58, "grad_norm": 2.680553674697876, "learning_rate": 3.5223533066630538e-06, "loss": 0.9479, "step": 55430 }, { "epoch": 16.59, "grad_norm": 2.850705862045288, "learning_rate": 3.5193469791969523e-06, "loss": 0.9687, "step": 55435 }, { "epoch": 16.59, "grad_norm": 6.794698715209961, "learning_rate": 3.516341838084994e-06, "loss": 0.9051, "step": 55440 }, { "epoch": 16.59, "grad_norm": 1.4583183526992798, "learning_rate": 3.5133378834931424e-06, "loss": 0.9477, "step": 55445 }, { "epoch": 16.59, "grad_norm": 1.7493852376937866, "learning_rate": 3.5103351155873044e-06, "loss": 1.0363, "step": 55450 }, { "epoch": 16.59, "grad_norm": 2.9901397228240967, "learning_rate": 3.5073335345333247e-06, "loss": 1.0936, "step": 55455 }, { "epoch": 16.59, "grad_norm": 2.2844088077545166, "learning_rate": 3.504333140496968e-06, "loss": 0.9662, "step": 55460 }, { "epoch": 16.59, "grad_norm": 1.7796694040298462, "learning_rate": 3.5013339336439576e-06, "loss": 0.9429, "step": 55465 }, { "epoch": 16.6, "grad_norm": 1.1826192140579224, "learning_rate": 3.4983359141399164e-06, "loss": 1.0261, "step": 55470 }, { "epoch": 16.6, "grad_norm": 2.0418243408203125, "learning_rate": 3.4953390821504423e-06, "loss": 0.9578, "step": 55475 }, { "epoch": 16.6, "grad_norm": 6.461709022521973, "learning_rate": 3.492343437841028e-06, "loss": 0.9947, "step": 55480 }, { "epoch": 16.6, "grad_norm": 3.240283489227295, "learning_rate": 3.4893489813771413e-06, "loss": 0.8213, "step": 55485 }, { "epoch": 16.6, "grad_norm": 2.8579697608947754, "learning_rate": 3.486355712924139e-06, "loss": 0.912, "step": 55490 }, { "epoch": 16.6, "grad_norm": 2.917512893676758, "learning_rate": 3.48336363264736e-06, "loss": 0.9478, "step": 55495 }, { "epoch": 16.6, "grad_norm": 2.837340831756592, "learning_rate": 3.4803727407120375e-06, "loss": 1.032, "step": 55500 }, { "epoch": 16.61, "grad_norm": 3.3056883811950684, "learning_rate": 3.4773830372833576e-06, "loss": 1.1411, "step": 55505 }, { "epoch": 16.61, "grad_norm": 4.646909713745117, "learning_rate": 3.474394522526442e-06, "loss": 0.9487, "step": 55510 }, { "epoch": 16.61, "grad_norm": 5.3782124519348145, "learning_rate": 3.4714071966063436e-06, "loss": 0.9774, "step": 55515 }, { "epoch": 16.61, "grad_norm": 1.5361804962158203, "learning_rate": 3.4684210596880486e-06, "loss": 1.0604, "step": 55520 }, { "epoch": 16.61, "grad_norm": 2.3509507179260254, "learning_rate": 3.4654361119364764e-06, "loss": 0.9274, "step": 55525 }, { "epoch": 16.61, "grad_norm": 4.507537841796875, "learning_rate": 3.4624523535164823e-06, "loss": 0.8698, "step": 55530 }, { "epoch": 16.62, "grad_norm": 2.660539388656616, "learning_rate": 3.459469784592859e-06, "loss": 0.9137, "step": 55535 }, { "epoch": 16.62, "grad_norm": 5.054265022277832, "learning_rate": 3.4564884053303298e-06, "loss": 0.9331, "step": 55540 }, { "epoch": 16.62, "grad_norm": 1.3174716234207153, "learning_rate": 3.4535082158935527e-06, "loss": 1.135, "step": 55545 }, { "epoch": 16.62, "grad_norm": 2.0269131660461426, "learning_rate": 3.450529216447121e-06, "loss": 1.0076, "step": 55550 }, { "epoch": 16.62, "grad_norm": 5.588700771331787, "learning_rate": 3.447551407155561e-06, "loss": 0.9909, "step": 55555 }, { "epoch": 16.62, "grad_norm": 2.787294864654541, "learning_rate": 3.4445747881833386e-06, "loss": 1.0891, "step": 55560 }, { "epoch": 16.62, "grad_norm": 2.2700955867767334, "learning_rate": 3.441599359694836e-06, "loss": 0.9488, "step": 55565 }, { "epoch": 16.63, "grad_norm": 1.9791069030761719, "learning_rate": 3.4386251218543996e-06, "loss": 1.0003, "step": 55570 }, { "epoch": 16.63, "grad_norm": 2.2911648750305176, "learning_rate": 3.435652074826279e-06, "loss": 1.0334, "step": 55575 }, { "epoch": 16.63, "grad_norm": 5.44551420211792, "learning_rate": 3.4326802187746864e-06, "loss": 0.9583, "step": 55580 }, { "epoch": 16.63, "grad_norm": 1.6152199506759644, "learning_rate": 3.4297095538637388e-06, "loss": 1.0084, "step": 55585 }, { "epoch": 16.63, "grad_norm": 3.9968864917755127, "learning_rate": 3.4267400802575233e-06, "loss": 0.888, "step": 55590 }, { "epoch": 16.63, "grad_norm": 1.834970474243164, "learning_rate": 3.423771798120021e-06, "loss": 0.8411, "step": 55595 }, { "epoch": 16.63, "grad_norm": 1.1965527534484863, "learning_rate": 3.4208047076151773e-06, "loss": 0.9757, "step": 55600 }, { "epoch": 16.64, "grad_norm": 2.324232578277588, "learning_rate": 3.417838808906859e-06, "loss": 0.9249, "step": 55605 }, { "epoch": 16.64, "grad_norm": 1.6368885040283203, "learning_rate": 3.4148741021588686e-06, "loss": 1.0691, "step": 55610 }, { "epoch": 16.64, "grad_norm": 2.840993881225586, "learning_rate": 3.4119105875349444e-06, "loss": 0.998, "step": 55615 }, { "epoch": 16.64, "grad_norm": 4.377618789672852, "learning_rate": 3.4089482651987607e-06, "loss": 1.0532, "step": 55620 }, { "epoch": 16.64, "grad_norm": 3.657803535461426, "learning_rate": 3.4059871353139207e-06, "loss": 0.8328, "step": 55625 }, { "epoch": 16.64, "grad_norm": 2.706447124481201, "learning_rate": 3.403027198043968e-06, "loss": 0.8507, "step": 55630 }, { "epoch": 16.65, "grad_norm": 2.529573917388916, "learning_rate": 3.4000684535523714e-06, "loss": 0.9334, "step": 55635 }, { "epoch": 16.65, "grad_norm": 3.0248026847839355, "learning_rate": 3.397110902002543e-06, "loss": 0.9053, "step": 55640 }, { "epoch": 16.65, "grad_norm": 1.9483550786972046, "learning_rate": 3.394154543557826e-06, "loss": 0.8746, "step": 55645 }, { "epoch": 16.65, "grad_norm": 6.750313758850098, "learning_rate": 3.3911993783814934e-06, "loss": 0.9609, "step": 55650 }, { "epoch": 16.65, "grad_norm": 4.349567890167236, "learning_rate": 3.388245406636764e-06, "loss": 0.9785, "step": 55655 }, { "epoch": 16.65, "grad_norm": 4.089972019195557, "learning_rate": 3.3852926284867665e-06, "loss": 1.1758, "step": 55660 }, { "epoch": 16.65, "grad_norm": 1.6726969480514526, "learning_rate": 3.3823410440946e-06, "loss": 0.8968, "step": 55665 }, { "epoch": 16.66, "grad_norm": 12.856719970703125, "learning_rate": 3.379390653623257e-06, "loss": 0.9887, "step": 55670 }, { "epoch": 16.66, "grad_norm": 4.076183795928955, "learning_rate": 3.3764414572357046e-06, "loss": 0.8843, "step": 55675 }, { "epoch": 16.66, "grad_norm": 3.290557384490967, "learning_rate": 3.3734934550948034e-06, "loss": 0.9424, "step": 55680 }, { "epoch": 16.66, "grad_norm": 1.8041436672210693, "learning_rate": 3.37054664736339e-06, "loss": 1.054, "step": 55685 }, { "epoch": 16.66, "grad_norm": 2.115232467651367, "learning_rate": 3.3676010342041988e-06, "loss": 1.0038, "step": 55690 }, { "epoch": 16.66, "grad_norm": 2.6543099880218506, "learning_rate": 3.364656615779918e-06, "loss": 0.9418, "step": 55695 }, { "epoch": 16.66, "grad_norm": 1.230863332748413, "learning_rate": 3.36171339225316e-06, "loss": 0.9834, "step": 55700 }, { "epoch": 16.67, "grad_norm": 4.0604047775268555, "learning_rate": 3.358771363786481e-06, "loss": 0.9245, "step": 55705 }, { "epoch": 16.67, "grad_norm": 15.559052467346191, "learning_rate": 3.355830530542367e-06, "loss": 0.9499, "step": 55710 }, { "epoch": 16.67, "grad_norm": 7.960972309112549, "learning_rate": 3.3528908926832304e-06, "loss": 0.9759, "step": 55715 }, { "epoch": 16.67, "grad_norm": 3.3675858974456787, "learning_rate": 3.3499524503714326e-06, "loss": 0.9966, "step": 55720 }, { "epoch": 16.67, "grad_norm": 2.5648207664489746, "learning_rate": 3.3470152037692547e-06, "loss": 1.1776, "step": 55725 }, { "epoch": 16.67, "grad_norm": 3.301900625228882, "learning_rate": 3.3440791530389232e-06, "loss": 1.0791, "step": 55730 }, { "epoch": 16.68, "grad_norm": 1.4749406576156616, "learning_rate": 3.3411442983425885e-06, "loss": 1.0181, "step": 55735 }, { "epoch": 16.68, "grad_norm": 1.6755149364471436, "learning_rate": 3.33821063984234e-06, "loss": 1.0547, "step": 55740 }, { "epoch": 16.68, "grad_norm": 3.6287951469421387, "learning_rate": 3.335278177700202e-06, "loss": 0.9052, "step": 55745 }, { "epoch": 16.68, "grad_norm": 2.7663354873657227, "learning_rate": 3.3323469120781326e-06, "loss": 0.8988, "step": 55750 }, { "epoch": 16.68, "grad_norm": 2.6993236541748047, "learning_rate": 3.329416843138017e-06, "loss": 0.983, "step": 55755 }, { "epoch": 16.68, "grad_norm": 6.778196811676025, "learning_rate": 3.3264879710416917e-06, "loss": 1.0198, "step": 55760 }, { "epoch": 16.68, "grad_norm": 1.4923733472824097, "learning_rate": 3.3235602959508977e-06, "loss": 0.9077, "step": 55765 }, { "epoch": 16.69, "grad_norm": 2.7575523853302, "learning_rate": 3.3206338180273465e-06, "loss": 0.8404, "step": 55770 }, { "epoch": 16.69, "grad_norm": 4.008522033691406, "learning_rate": 3.3177085374326453e-06, "loss": 0.7967, "step": 55775 }, { "epoch": 16.69, "grad_norm": 3.0780680179595947, "learning_rate": 3.314784454328376e-06, "loss": 0.9908, "step": 55780 }, { "epoch": 16.69, "grad_norm": 3.2288169860839844, "learning_rate": 3.3118615688760124e-06, "loss": 0.9111, "step": 55785 }, { "epoch": 16.69, "grad_norm": 2.5853915214538574, "learning_rate": 3.3089398812369942e-06, "loss": 1.0616, "step": 55790 }, { "epoch": 16.69, "grad_norm": 2.06427264213562, "learning_rate": 3.3060193915726763e-06, "loss": 0.997, "step": 55795 }, { "epoch": 16.69, "grad_norm": 9.891775131225586, "learning_rate": 3.3031001000443597e-06, "loss": 1.0209, "step": 55800 }, { "epoch": 16.7, "grad_norm": 2.132046937942505, "learning_rate": 3.300182006813271e-06, "loss": 0.9908, "step": 55805 }, { "epoch": 16.7, "grad_norm": 2.466611862182617, "learning_rate": 3.297265112040568e-06, "loss": 1.0584, "step": 55810 }, { "epoch": 16.7, "grad_norm": 1.6372896432876587, "learning_rate": 3.2943494158873684e-06, "loss": 0.8243, "step": 55815 }, { "epoch": 16.7, "grad_norm": 2.080674886703491, "learning_rate": 3.29143491851468e-06, "loss": 0.9725, "step": 55820 }, { "epoch": 16.7, "grad_norm": 2.6015894412994385, "learning_rate": 3.288521620083479e-06, "loss": 1.0789, "step": 55825 }, { "epoch": 16.7, "grad_norm": 2.357999086380005, "learning_rate": 3.2856095207546595e-06, "loss": 0.9111, "step": 55830 }, { "epoch": 16.71, "grad_norm": 2.4748330116271973, "learning_rate": 3.2826986206890537e-06, "loss": 1.0326, "step": 55835 }, { "epoch": 16.71, "grad_norm": 3.481816291809082, "learning_rate": 3.279788920047433e-06, "loss": 0.9831, "step": 55840 }, { "epoch": 16.71, "grad_norm": 2.6234841346740723, "learning_rate": 3.2768804189904913e-06, "loss": 1.0191, "step": 55845 }, { "epoch": 16.71, "grad_norm": 1.8331263065338135, "learning_rate": 3.2739731176788636e-06, "loss": 0.9873, "step": 55850 }, { "epoch": 16.71, "grad_norm": 1.4677926301956177, "learning_rate": 3.271067016273124e-06, "loss": 0.9661, "step": 55855 }, { "epoch": 16.71, "grad_norm": 2.3680524826049805, "learning_rate": 3.2681621149337538e-06, "loss": 0.862, "step": 55860 }, { "epoch": 16.71, "grad_norm": 1.7516663074493408, "learning_rate": 3.265258413821215e-06, "loss": 0.9544, "step": 55865 }, { "epoch": 16.72, "grad_norm": 1.8315908908843994, "learning_rate": 3.262355913095852e-06, "loss": 0.9942, "step": 55870 }, { "epoch": 16.72, "grad_norm": 2.081782341003418, "learning_rate": 3.259454612917984e-06, "loss": 1.2045, "step": 55875 }, { "epoch": 16.72, "grad_norm": 2.6945362091064453, "learning_rate": 3.256554513447832e-06, "loss": 1.0241, "step": 55880 }, { "epoch": 16.72, "grad_norm": 3.111546277999878, "learning_rate": 3.253655614845583e-06, "loss": 0.9431, "step": 55885 }, { "epoch": 16.72, "grad_norm": 3.409693717956543, "learning_rate": 3.250757917271324e-06, "loss": 1.1223, "step": 55890 }, { "epoch": 16.72, "grad_norm": 3.733633279800415, "learning_rate": 3.2478614208850928e-06, "loss": 1.0297, "step": 55895 }, { "epoch": 16.72, "grad_norm": 2.1389636993408203, "learning_rate": 3.2449661258468768e-06, "loss": 0.9982, "step": 55900 }, { "epoch": 16.73, "grad_norm": 2.501753568649292, "learning_rate": 3.2420720323165566e-06, "loss": 0.9889, "step": 55905 }, { "epoch": 16.73, "grad_norm": 1.5202560424804688, "learning_rate": 3.239179140453996e-06, "loss": 0.8041, "step": 55910 }, { "epoch": 16.73, "grad_norm": 4.239963054656982, "learning_rate": 3.2362874504189428e-06, "loss": 0.9234, "step": 55915 }, { "epoch": 16.73, "grad_norm": 3.403322458267212, "learning_rate": 3.2333969623711157e-06, "loss": 0.935, "step": 55920 }, { "epoch": 16.73, "grad_norm": 1.6759166717529297, "learning_rate": 3.2305076764701458e-06, "loss": 0.895, "step": 55925 }, { "epoch": 16.73, "grad_norm": 3.4392189979553223, "learning_rate": 3.2276195928756107e-06, "loss": 0.9788, "step": 55930 }, { "epoch": 16.74, "grad_norm": 2.1375179290771484, "learning_rate": 3.224732711747014e-06, "loss": 0.8672, "step": 55935 }, { "epoch": 16.74, "grad_norm": 1.1759334802627563, "learning_rate": 3.221847033243794e-06, "loss": 1.0013, "step": 55940 }, { "epoch": 16.74, "grad_norm": 2.3137335777282715, "learning_rate": 3.2189625575253265e-06, "loss": 0.8176, "step": 55945 }, { "epoch": 16.74, "grad_norm": 1.971378207206726, "learning_rate": 3.2160792847509225e-06, "loss": 0.8394, "step": 55950 }, { "epoch": 16.74, "grad_norm": 1.7172585725784302, "learning_rate": 3.213197215079805e-06, "loss": 0.9224, "step": 55955 }, { "epoch": 16.74, "grad_norm": 6.083642482757568, "learning_rate": 3.210316348671169e-06, "loss": 1.0735, "step": 55960 }, { "epoch": 16.74, "grad_norm": 1.318853497505188, "learning_rate": 3.2074366856841036e-06, "loss": 1.0313, "step": 55965 }, { "epoch": 16.75, "grad_norm": 2.593428134918213, "learning_rate": 3.2045582262776626e-06, "loss": 0.9487, "step": 55970 }, { "epoch": 16.75, "grad_norm": 3.650310516357422, "learning_rate": 3.2016809706108153e-06, "loss": 0.9554, "step": 55975 }, { "epoch": 16.75, "grad_norm": 1.8704103231430054, "learning_rate": 3.1988049188424683e-06, "loss": 1.0365, "step": 55980 }, { "epoch": 16.75, "grad_norm": 1.763230562210083, "learning_rate": 3.1959300711314727e-06, "loss": 1.0271, "step": 55985 }, { "epoch": 16.75, "grad_norm": 2.3225176334381104, "learning_rate": 3.1930564276365842e-06, "loss": 0.9846, "step": 55990 }, { "epoch": 16.75, "grad_norm": 3.4815707206726074, "learning_rate": 3.190183988516532e-06, "loss": 0.9911, "step": 55995 }, { "epoch": 16.75, "grad_norm": 2.6258528232574463, "learning_rate": 3.187312753929936e-06, "loss": 1.1604, "step": 56000 }, { "epoch": 16.76, "grad_norm": 1.8864493370056152, "learning_rate": 3.184442724035397e-06, "loss": 1.023, "step": 56005 }, { "epoch": 16.76, "grad_norm": 3.0884039402008057, "learning_rate": 3.1815738989914022e-06, "loss": 0.9337, "step": 56010 }, { "epoch": 16.76, "grad_norm": 2.619140625, "learning_rate": 3.178706278956403e-06, "loss": 0.9569, "step": 56015 }, { "epoch": 16.76, "grad_norm": 2.4161531925201416, "learning_rate": 3.1758398640887753e-06, "loss": 1.0311, "step": 56020 }, { "epoch": 16.76, "grad_norm": 2.404909133911133, "learning_rate": 3.172974654546826e-06, "loss": 0.8572, "step": 56025 }, { "epoch": 16.76, "grad_norm": 3.482473850250244, "learning_rate": 3.1701106504888e-06, "loss": 0.9448, "step": 56030 }, { "epoch": 16.77, "grad_norm": 1.7155938148498535, "learning_rate": 3.167247852072869e-06, "loss": 0.8405, "step": 56035 }, { "epoch": 16.77, "grad_norm": 5.21242094039917, "learning_rate": 3.164386259457147e-06, "loss": 0.999, "step": 56040 }, { "epoch": 16.77, "grad_norm": 0.9593510627746582, "learning_rate": 3.1615258727996812e-06, "loss": 0.9095, "step": 56045 }, { "epoch": 16.77, "grad_norm": 3.08491849899292, "learning_rate": 3.15866669225843e-06, "loss": 0.9706, "step": 56050 }, { "epoch": 16.77, "grad_norm": 4.684256076812744, "learning_rate": 3.1558087179913183e-06, "loss": 0.9068, "step": 56055 }, { "epoch": 16.77, "grad_norm": 1.6469522714614868, "learning_rate": 3.1529519501561883e-06, "loss": 0.8806, "step": 56060 }, { "epoch": 16.77, "grad_norm": 2.585136651992798, "learning_rate": 3.1500963889108113e-06, "loss": 0.986, "step": 56065 }, { "epoch": 16.78, "grad_norm": 1.8152649402618408, "learning_rate": 3.1472420344129005e-06, "loss": 0.9496, "step": 56070 }, { "epoch": 16.78, "grad_norm": 4.420323371887207, "learning_rate": 3.144388886820096e-06, "loss": 1.0925, "step": 56075 }, { "epoch": 16.78, "grad_norm": 1.4763715267181396, "learning_rate": 3.1415369462899803e-06, "loss": 1.0199, "step": 56080 }, { "epoch": 16.78, "grad_norm": 5.959677219390869, "learning_rate": 3.1386862129800494e-06, "loss": 0.9485, "step": 56085 }, { "epoch": 16.78, "grad_norm": 3.459981918334961, "learning_rate": 3.135836687047766e-06, "loss": 0.8902, "step": 56090 }, { "epoch": 16.78, "grad_norm": 3.747342348098755, "learning_rate": 3.132988368650486e-06, "loss": 0.9345, "step": 56095 }, { "epoch": 16.78, "grad_norm": 2.7175967693328857, "learning_rate": 3.1301412579455373e-06, "loss": 0.9902, "step": 56100 }, { "epoch": 16.79, "grad_norm": 2.1608591079711914, "learning_rate": 3.127295355090146e-06, "loss": 0.9676, "step": 56105 }, { "epoch": 16.79, "grad_norm": 4.2502336502075195, "learning_rate": 3.124450660241507e-06, "loss": 1.024, "step": 56110 }, { "epoch": 16.79, "grad_norm": 3.1099443435668945, "learning_rate": 3.121607173556712e-06, "loss": 0.8469, "step": 56115 }, { "epoch": 16.79, "grad_norm": 2.466075897216797, "learning_rate": 3.1187648951928134e-06, "loss": 0.8405, "step": 56120 }, { "epoch": 16.79, "grad_norm": 1.5878918170928955, "learning_rate": 3.115923825306785e-06, "loss": 1.0979, "step": 56125 }, { "epoch": 16.79, "grad_norm": 6.672008037567139, "learning_rate": 3.113083964055538e-06, "loss": 1.0864, "step": 56130 }, { "epoch": 16.79, "grad_norm": 3.1133804321289062, "learning_rate": 3.1102453115959106e-06, "loss": 0.8562, "step": 56135 }, { "epoch": 16.8, "grad_norm": 2.506171226501465, "learning_rate": 3.1074078680846858e-06, "loss": 0.8097, "step": 56140 }, { "epoch": 16.8, "grad_norm": 3.1147594451904297, "learning_rate": 3.104571633678563e-06, "loss": 0.8618, "step": 56145 }, { "epoch": 16.8, "grad_norm": 2.9119558334350586, "learning_rate": 3.101736608534195e-06, "loss": 0.9775, "step": 56150 }, { "epoch": 16.8, "grad_norm": 2.8269081115722656, "learning_rate": 3.098902792808148e-06, "loss": 0.8513, "step": 56155 }, { "epoch": 16.8, "grad_norm": 3.424445867538452, "learning_rate": 3.096070186656938e-06, "loss": 0.9278, "step": 56160 }, { "epoch": 16.8, "grad_norm": 2.689589500427246, "learning_rate": 3.0932387902370017e-06, "loss": 0.9281, "step": 56165 }, { "epoch": 16.81, "grad_norm": 2.5684525966644287, "learning_rate": 3.090408603704717e-06, "loss": 1.0208, "step": 56170 }, { "epoch": 16.81, "grad_norm": 1.9679588079452515, "learning_rate": 3.0875796272163972e-06, "loss": 0.9015, "step": 56175 }, { "epoch": 16.81, "grad_norm": 1.7874964475631714, "learning_rate": 3.084751860928264e-06, "loss": 1.0641, "step": 56180 }, { "epoch": 16.81, "grad_norm": 3.177513599395752, "learning_rate": 3.081925304996522e-06, "loss": 1.0432, "step": 56185 }, { "epoch": 16.81, "grad_norm": 3.6404199600219727, "learning_rate": 3.0790999595772473e-06, "loss": 0.9549, "step": 56190 }, { "epoch": 16.81, "grad_norm": 11.20604419708252, "learning_rate": 3.0762758248265107e-06, "loss": 1.0167, "step": 56195 }, { "epoch": 16.81, "grad_norm": 2.8811817169189453, "learning_rate": 3.073452900900259e-06, "loss": 0.8696, "step": 56200 }, { "epoch": 16.82, "grad_norm": 3.1575052738189697, "learning_rate": 3.070631187954423e-06, "loss": 0.7686, "step": 56205 }, { "epoch": 16.82, "grad_norm": 1.944756269454956, "learning_rate": 3.067810686144826e-06, "loss": 1.0258, "step": 56210 }, { "epoch": 16.82, "grad_norm": 3.3357532024383545, "learning_rate": 3.064991395627248e-06, "loss": 0.8491, "step": 56215 }, { "epoch": 16.82, "grad_norm": 4.347214221954346, "learning_rate": 3.062173316557396e-06, "loss": 0.9459, "step": 56220 }, { "epoch": 16.82, "grad_norm": 2.0537803173065186, "learning_rate": 3.0593564490909084e-06, "loss": 0.9565, "step": 56225 }, { "epoch": 16.82, "grad_norm": 2.428102493286133, "learning_rate": 3.0565407933833586e-06, "loss": 0.7407, "step": 56230 }, { "epoch": 16.82, "grad_norm": 4.625239372253418, "learning_rate": 3.0537263495902522e-06, "loss": 0.9934, "step": 56235 }, { "epoch": 16.83, "grad_norm": 1.8983107805252075, "learning_rate": 3.050913117867027e-06, "loss": 1.0148, "step": 56240 }, { "epoch": 16.83, "grad_norm": 1.6342809200286865, "learning_rate": 3.048101098369055e-06, "loss": 1.0129, "step": 56245 }, { "epoch": 16.83, "grad_norm": 1.8513437509536743, "learning_rate": 3.0452902912516403e-06, "loss": 0.855, "step": 56250 }, { "epoch": 16.83, "grad_norm": 3.056840658187866, "learning_rate": 3.0424806966700246e-06, "loss": 0.9762, "step": 56255 }, { "epoch": 16.83, "grad_norm": 9.707741737365723, "learning_rate": 3.0396723147793737e-06, "loss": 0.9429, "step": 56260 }, { "epoch": 16.83, "grad_norm": 8.954275131225586, "learning_rate": 3.036865145734796e-06, "loss": 1.0287, "step": 56265 }, { "epoch": 16.84, "grad_norm": 2.4749701023101807, "learning_rate": 3.034059189691335e-06, "loss": 1.0522, "step": 56270 }, { "epoch": 16.84, "grad_norm": 3.8181324005126953, "learning_rate": 3.0312544468039383e-06, "loss": 0.9914, "step": 56275 }, { "epoch": 16.84, "grad_norm": 6.021519660949707, "learning_rate": 3.028450917227535e-06, "loss": 0.7978, "step": 56280 }, { "epoch": 16.84, "grad_norm": 2.9637115001678467, "learning_rate": 3.02564860111694e-06, "loss": 1.0615, "step": 56285 }, { "epoch": 16.84, "grad_norm": 4.300103187561035, "learning_rate": 3.0228474986269406e-06, "loss": 0.7859, "step": 56290 }, { "epoch": 16.84, "grad_norm": 1.1417111158370972, "learning_rate": 3.0200476099122186e-06, "loss": 1.0462, "step": 56295 }, { "epoch": 16.84, "grad_norm": 1.8585431575775146, "learning_rate": 3.0172489351274313e-06, "loss": 1.1015, "step": 56300 }, { "epoch": 16.85, "grad_norm": 1.4056024551391602, "learning_rate": 3.0144514744271325e-06, "loss": 1.066, "step": 56305 }, { "epoch": 16.85, "grad_norm": 2.6178314685821533, "learning_rate": 3.011655227965823e-06, "loss": 1.0539, "step": 56310 }, { "epoch": 16.85, "grad_norm": 4.641908645629883, "learning_rate": 3.0088601958979416e-06, "loss": 0.7809, "step": 56315 }, { "epoch": 16.85, "grad_norm": 8.58937931060791, "learning_rate": 3.006066378377853e-06, "loss": 1.0699, "step": 56320 }, { "epoch": 16.85, "grad_norm": 1.9301339387893677, "learning_rate": 3.003273775559856e-06, "loss": 0.9594, "step": 56325 }, { "epoch": 16.85, "grad_norm": 3.1416869163513184, "learning_rate": 3.0004823875981857e-06, "loss": 0.8427, "step": 56330 }, { "epoch": 16.85, "grad_norm": 1.7465052604675293, "learning_rate": 2.997692214647005e-06, "loss": 1.0006, "step": 56335 }, { "epoch": 16.86, "grad_norm": 1.5967586040496826, "learning_rate": 2.9949032568604125e-06, "loss": 1.0248, "step": 56340 }, { "epoch": 16.86, "grad_norm": 2.2583372592926025, "learning_rate": 2.992115514392441e-06, "loss": 1.0231, "step": 56345 }, { "epoch": 16.86, "grad_norm": 4.796255111694336, "learning_rate": 2.9893289873970528e-06, "loss": 0.9619, "step": 56350 }, { "epoch": 16.86, "grad_norm": 6.1636152267456055, "learning_rate": 2.9865436760281445e-06, "loss": 0.9783, "step": 56355 }, { "epoch": 16.86, "grad_norm": 2.931490421295166, "learning_rate": 2.983759580439549e-06, "loss": 0.9008, "step": 56360 }, { "epoch": 16.86, "grad_norm": 2.367924213409424, "learning_rate": 2.980976700785035e-06, "loss": 1.0938, "step": 56365 }, { "epoch": 16.87, "grad_norm": 1.7075245380401611, "learning_rate": 2.978195037218276e-06, "loss": 1.027, "step": 56370 }, { "epoch": 16.87, "grad_norm": 2.3319573402404785, "learning_rate": 2.9754145898929275e-06, "loss": 0.9178, "step": 56375 }, { "epoch": 16.87, "grad_norm": 2.598543643951416, "learning_rate": 2.972635358962525e-06, "loss": 0.8888, "step": 56380 }, { "epoch": 16.87, "grad_norm": 1.452913522720337, "learning_rate": 2.9698573445805873e-06, "loss": 1.0465, "step": 56385 }, { "epoch": 16.87, "grad_norm": 6.2622833251953125, "learning_rate": 2.967080546900519e-06, "loss": 0.9591, "step": 56390 }, { "epoch": 16.87, "grad_norm": 2.436224937438965, "learning_rate": 2.964304966075701e-06, "loss": 0.935, "step": 56395 }, { "epoch": 16.87, "grad_norm": 2.5119998455047607, "learning_rate": 2.9615306022594104e-06, "loss": 0.9041, "step": 56400 }, { "epoch": 16.88, "grad_norm": 2.9698565006256104, "learning_rate": 2.958757455604874e-06, "loss": 1.0233, "step": 56405 }, { "epoch": 16.88, "grad_norm": 1.585296630859375, "learning_rate": 2.9559855262652564e-06, "loss": 1.0708, "step": 56410 }, { "epoch": 16.88, "grad_norm": 2.456385850906372, "learning_rate": 2.9532148143936426e-06, "loss": 0.8779, "step": 56415 }, { "epoch": 16.88, "grad_norm": 2.774512767791748, "learning_rate": 2.950445320143058e-06, "loss": 1.0221, "step": 56420 }, { "epoch": 16.88, "grad_norm": 1.0109182596206665, "learning_rate": 2.9476770436664582e-06, "loss": 0.9762, "step": 56425 }, { "epoch": 16.88, "grad_norm": 1.9075700044631958, "learning_rate": 2.9449099851167365e-06, "loss": 0.8601, "step": 56430 }, { "epoch": 16.88, "grad_norm": 1.4174439907073975, "learning_rate": 2.9421441446467078e-06, "loss": 1.0847, "step": 56435 }, { "epoch": 16.89, "grad_norm": 2.0944745540618896, "learning_rate": 2.9393795224091332e-06, "loss": 0.7858, "step": 56440 }, { "epoch": 16.89, "grad_norm": 3.1148488521575928, "learning_rate": 2.9366161185566952e-06, "loss": 1.08, "step": 56445 }, { "epoch": 16.89, "grad_norm": 2.154017448425293, "learning_rate": 2.9338539332420143e-06, "loss": 1.025, "step": 56450 }, { "epoch": 16.89, "grad_norm": 1.8690617084503174, "learning_rate": 2.931092966617646e-06, "loss": 1.0529, "step": 56455 }, { "epoch": 16.89, "grad_norm": 1.549531102180481, "learning_rate": 2.928333218836074e-06, "loss": 0.8698, "step": 56460 }, { "epoch": 16.89, "grad_norm": 4.329528331756592, "learning_rate": 2.9255746900497148e-06, "loss": 0.8416, "step": 56465 }, { "epoch": 16.9, "grad_norm": 4.210263729095459, "learning_rate": 2.9228173804109276e-06, "loss": 1.1474, "step": 56470 }, { "epoch": 16.9, "grad_norm": 2.099457263946533, "learning_rate": 2.9200612900719764e-06, "loss": 1.1059, "step": 56475 }, { "epoch": 16.9, "grad_norm": 4.597415924072266, "learning_rate": 2.917306419185098e-06, "loss": 0.8395, "step": 56480 }, { "epoch": 16.9, "grad_norm": 2.6783969402313232, "learning_rate": 2.914552767902426e-06, "loss": 1.0578, "step": 56485 }, { "epoch": 16.9, "grad_norm": 1.4600439071655273, "learning_rate": 2.9118003363760553e-06, "loss": 0.8978, "step": 56490 }, { "epoch": 16.9, "grad_norm": 3.4358808994293213, "learning_rate": 2.90904912475799e-06, "loss": 0.9432, "step": 56495 }, { "epoch": 16.9, "grad_norm": 2.708552360534668, "learning_rate": 2.906299133200177e-06, "loss": 0.9166, "step": 56500 }, { "epoch": 16.91, "grad_norm": 2.577993869781494, "learning_rate": 2.9035503618544986e-06, "loss": 1.0563, "step": 56505 }, { "epoch": 16.91, "grad_norm": 2.4056396484375, "learning_rate": 2.900802810872766e-06, "loss": 1.03, "step": 56510 }, { "epoch": 16.91, "grad_norm": 4.068645000457764, "learning_rate": 2.898056480406722e-06, "loss": 0.8931, "step": 56515 }, { "epoch": 16.91, "grad_norm": 2.8915958404541016, "learning_rate": 2.8953113706080477e-06, "loss": 1.1591, "step": 56520 }, { "epoch": 16.91, "grad_norm": 1.79038667678833, "learning_rate": 2.892567481628347e-06, "loss": 1.0253, "step": 56525 }, { "epoch": 16.91, "grad_norm": 2.4151062965393066, "learning_rate": 2.889824813619166e-06, "loss": 0.8595, "step": 56530 }, { "epoch": 16.91, "grad_norm": 1.7418419122695923, "learning_rate": 2.8870833667319795e-06, "loss": 0.8801, "step": 56535 }, { "epoch": 16.92, "grad_norm": 4.192162036895752, "learning_rate": 2.8843431411181926e-06, "loss": 1.1202, "step": 56540 }, { "epoch": 16.92, "grad_norm": 4.1534881591796875, "learning_rate": 2.8816041369291446e-06, "loss": 0.8828, "step": 56545 }, { "epoch": 16.92, "grad_norm": 2.742586374282837, "learning_rate": 2.878866354316112e-06, "loss": 0.9694, "step": 56550 }, { "epoch": 16.92, "grad_norm": 3.240999460220337, "learning_rate": 2.8761297934302934e-06, "loss": 0.8857, "step": 56555 }, { "epoch": 16.92, "grad_norm": 4.671759128570557, "learning_rate": 2.873394454422834e-06, "loss": 0.9096, "step": 56560 }, { "epoch": 16.92, "grad_norm": 2.9793546199798584, "learning_rate": 2.8706603374448027e-06, "loss": 1.1287, "step": 56565 }, { "epoch": 16.93, "grad_norm": 1.7608052492141724, "learning_rate": 2.8679274426471864e-06, "loss": 0.9546, "step": 56570 }, { "epoch": 16.93, "grad_norm": 3.5491206645965576, "learning_rate": 2.865195770180945e-06, "loss": 0.9436, "step": 56575 }, { "epoch": 16.93, "grad_norm": 4.213696002960205, "learning_rate": 2.8624653201969247e-06, "loss": 0.8552, "step": 56580 }, { "epoch": 16.93, "grad_norm": 2.7237231731414795, "learning_rate": 2.859736092845941e-06, "loss": 1.0671, "step": 56585 }, { "epoch": 16.93, "grad_norm": 2.797983407974243, "learning_rate": 2.8570080882787175e-06, "loss": 1.0807, "step": 56590 }, { "epoch": 16.93, "grad_norm": 4.629161834716797, "learning_rate": 2.8542813066459173e-06, "loss": 0.9689, "step": 56595 }, { "epoch": 16.93, "grad_norm": 4.058006763458252, "learning_rate": 2.8515557480981448e-06, "loss": 1.0349, "step": 56600 }, { "epoch": 16.94, "grad_norm": 2.309382677078247, "learning_rate": 2.848831412785924e-06, "loss": 0.9884, "step": 56605 }, { "epoch": 16.94, "grad_norm": 4.170457363128662, "learning_rate": 2.8461083008597204e-06, "loss": 0.8189, "step": 56610 }, { "epoch": 16.94, "grad_norm": 2.4919357299804688, "learning_rate": 2.8433864124699284e-06, "loss": 0.9594, "step": 56615 }, { "epoch": 16.94, "grad_norm": 4.131077289581299, "learning_rate": 2.840665747766874e-06, "loss": 0.9989, "step": 56620 }, { "epoch": 16.94, "grad_norm": 4.024702072143555, "learning_rate": 2.8379463069008153e-06, "loss": 0.9219, "step": 56625 }, { "epoch": 16.94, "grad_norm": 2.887530565261841, "learning_rate": 2.8352280900219462e-06, "loss": 0.8609, "step": 56630 }, { "epoch": 16.94, "grad_norm": 3.2231643199920654, "learning_rate": 2.8325110972803936e-06, "loss": 1.0832, "step": 56635 }, { "epoch": 16.95, "grad_norm": 2.5507965087890625, "learning_rate": 2.829795328826207e-06, "loss": 1.023, "step": 56640 }, { "epoch": 16.95, "grad_norm": 1.9942423105239868, "learning_rate": 2.827080784809383e-06, "loss": 1.0267, "step": 56645 }, { "epoch": 16.95, "grad_norm": 3.263000011444092, "learning_rate": 2.8243674653798376e-06, "loss": 0.9424, "step": 56650 }, { "epoch": 16.95, "grad_norm": 2.1129324436187744, "learning_rate": 2.8216553706874288e-06, "loss": 1.119, "step": 56655 }, { "epoch": 16.95, "grad_norm": 1.207990050315857, "learning_rate": 2.8189445008819454e-06, "loss": 0.762, "step": 56660 }, { "epoch": 16.95, "grad_norm": 2.738921880722046, "learning_rate": 2.8162348561130895e-06, "loss": 1.012, "step": 56665 }, { "epoch": 16.96, "grad_norm": 4.169528007507324, "learning_rate": 2.8135264365305366e-06, "loss": 1.0023, "step": 56670 }, { "epoch": 16.96, "grad_norm": 3.962857723236084, "learning_rate": 2.8108192422838437e-06, "loss": 0.901, "step": 56675 }, { "epoch": 16.96, "grad_norm": 2.816699743270874, "learning_rate": 2.808113273522553e-06, "loss": 0.969, "step": 56680 }, { "epoch": 16.96, "grad_norm": 4.528411388397217, "learning_rate": 2.8054085303960863e-06, "loss": 1.0662, "step": 56685 }, { "epoch": 16.96, "grad_norm": 1.3069534301757812, "learning_rate": 2.8027050130538494e-06, "loss": 1.0393, "step": 56690 }, { "epoch": 16.96, "grad_norm": 5.835165977478027, "learning_rate": 2.800002721645134e-06, "loss": 1.1026, "step": 56695 }, { "epoch": 16.96, "grad_norm": 2.2448999881744385, "learning_rate": 2.79730165631919e-06, "loss": 0.8728, "step": 56700 }, { "epoch": 16.97, "grad_norm": 3.5195298194885254, "learning_rate": 2.7946018172252066e-06, "loss": 1.0797, "step": 56705 }, { "epoch": 16.97, "grad_norm": 1.8925422430038452, "learning_rate": 2.7919032045122725e-06, "loss": 0.9078, "step": 56710 }, { "epoch": 16.97, "grad_norm": 1.31459379196167, "learning_rate": 2.789205818329449e-06, "loss": 0.9306, "step": 56715 }, { "epoch": 16.97, "grad_norm": 3.2655656337738037, "learning_rate": 2.786509658825698e-06, "loss": 1.1181, "step": 56720 }, { "epoch": 16.97, "grad_norm": 1.9709197282791138, "learning_rate": 2.7838147261499305e-06, "loss": 0.8847, "step": 56725 }, { "epoch": 16.97, "grad_norm": 2.601952314376831, "learning_rate": 2.78112102045098e-06, "loss": 0.911, "step": 56730 }, { "epoch": 16.97, "grad_norm": 1.5735349655151367, "learning_rate": 2.7784285418776227e-06, "loss": 0.8787, "step": 56735 }, { "epoch": 16.98, "grad_norm": 1.383877158164978, "learning_rate": 2.775737290578559e-06, "loss": 0.8761, "step": 56740 }, { "epoch": 16.98, "grad_norm": 2.030069351196289, "learning_rate": 2.773047266702422e-06, "loss": 1.0923, "step": 56745 }, { "epoch": 16.98, "grad_norm": 2.9204518795013428, "learning_rate": 2.7703584703977824e-06, "loss": 0.9418, "step": 56750 }, { "epoch": 16.98, "grad_norm": 2.140024423599243, "learning_rate": 2.7676709018131436e-06, "loss": 0.9529, "step": 56755 }, { "epoch": 16.98, "grad_norm": 1.8030258417129517, "learning_rate": 2.76498456109692e-06, "loss": 1.0081, "step": 56760 }, { "epoch": 16.98, "grad_norm": 4.3402509689331055, "learning_rate": 2.7622994483974985e-06, "loss": 0.8433, "step": 56765 }, { "epoch": 16.98, "grad_norm": 2.835216999053955, "learning_rate": 2.759615563863152e-06, "loss": 1.0517, "step": 56770 }, { "epoch": 16.99, "grad_norm": 2.009197473526001, "learning_rate": 2.7569329076421317e-06, "loss": 0.985, "step": 56775 }, { "epoch": 16.99, "grad_norm": 1.9505784511566162, "learning_rate": 2.7542514798825773e-06, "loss": 0.8046, "step": 56780 }, { "epoch": 16.99, "grad_norm": 1.5446016788482666, "learning_rate": 2.7515712807325955e-06, "loss": 0.9255, "step": 56785 }, { "epoch": 16.99, "grad_norm": 1.5326980352401733, "learning_rate": 2.7488923103402094e-06, "loss": 1.1662, "step": 56790 }, { "epoch": 16.99, "grad_norm": 2.3964767456054688, "learning_rate": 2.7462145688533615e-06, "loss": 0.9944, "step": 56795 }, { "epoch": 16.99, "grad_norm": 5.0350189208984375, "learning_rate": 2.743538056419964e-06, "loss": 0.9068, "step": 56800 }, { "epoch": 17.0, "grad_norm": 2.6177234649658203, "learning_rate": 2.7408627731878133e-06, "loss": 0.9081, "step": 56805 }, { "epoch": 17.0, "grad_norm": 2.825147867202759, "learning_rate": 2.738188719304685e-06, "loss": 0.9299, "step": 56810 }, { "epoch": 17.0, "grad_norm": 3.711740732192993, "learning_rate": 2.735515894918245e-06, "loss": 0.9777, "step": 56815 }, { "epoch": 17.0, "grad_norm": 2.3973779678344727, "learning_rate": 2.7328443001761266e-06, "loss": 1.0681, "step": 56820 }, { "epoch": 17.0, "grad_norm": 3.3198635578155518, "learning_rate": 2.7301739352258716e-06, "loss": 0.9094, "step": 56825 }, { "epoch": 17.0, "grad_norm": 2.2214438915252686, "learning_rate": 2.7275048002149585e-06, "loss": 0.9812, "step": 56830 }, { "epoch": 17.0, "grad_norm": 6.447070598602295, "learning_rate": 2.7248368952908053e-06, "loss": 0.76, "step": 56835 }, { "epoch": 17.01, "grad_norm": 4.4163713455200195, "learning_rate": 2.722170220600756e-06, "loss": 0.8238, "step": 56840 }, { "epoch": 17.01, "grad_norm": 2.432135820388794, "learning_rate": 2.7195047762920895e-06, "loss": 0.8192, "step": 56845 }, { "epoch": 17.01, "grad_norm": 1.874724268913269, "learning_rate": 2.716840562512021e-06, "loss": 0.916, "step": 56850 }, { "epoch": 17.01, "grad_norm": 3.546983003616333, "learning_rate": 2.714177579407673e-06, "loss": 0.858, "step": 56855 }, { "epoch": 17.01, "grad_norm": 3.305762529373169, "learning_rate": 2.7115158271261403e-06, "loss": 0.9272, "step": 56860 }, { "epoch": 17.01, "grad_norm": 4.167823791503906, "learning_rate": 2.70885530581442e-06, "loss": 0.9225, "step": 56865 }, { "epoch": 17.01, "grad_norm": 2.542015790939331, "learning_rate": 2.7061960156194526e-06, "loss": 0.972, "step": 56870 }, { "epoch": 17.02, "grad_norm": 2.959282398223877, "learning_rate": 2.7035379566881042e-06, "loss": 1.1611, "step": 56875 }, { "epoch": 17.02, "grad_norm": 2.539849281311035, "learning_rate": 2.700881129167179e-06, "loss": 0.8913, "step": 56880 }, { "epoch": 17.02, "grad_norm": 3.6170997619628906, "learning_rate": 2.6982255332034133e-06, "loss": 1.0476, "step": 56885 }, { "epoch": 17.02, "grad_norm": 4.399633407592773, "learning_rate": 2.6955711689434614e-06, "loss": 0.7257, "step": 56890 }, { "epoch": 17.02, "grad_norm": 2.9377009868621826, "learning_rate": 2.6929180365339423e-06, "loss": 1.2028, "step": 56895 }, { "epoch": 17.02, "grad_norm": 3.030989408493042, "learning_rate": 2.690266136121361e-06, "loss": 1.0568, "step": 56900 }, { "epoch": 17.03, "grad_norm": 2.5369529724121094, "learning_rate": 2.6876154678522005e-06, "loss": 1.0388, "step": 56905 }, { "epoch": 17.03, "grad_norm": 2.8671159744262695, "learning_rate": 2.6849660318728376e-06, "loss": 0.9034, "step": 56910 }, { "epoch": 17.03, "grad_norm": 1.3632619380950928, "learning_rate": 2.682317828329614e-06, "loss": 0.735, "step": 56915 }, { "epoch": 17.03, "grad_norm": 2.6106884479522705, "learning_rate": 2.679670857368774e-06, "loss": 0.9862, "step": 56920 }, { "epoch": 17.03, "grad_norm": 2.3712689876556396, "learning_rate": 2.6770251191365113e-06, "loss": 1.0196, "step": 56925 }, { "epoch": 17.03, "grad_norm": 5.395451545715332, "learning_rate": 2.6743806137789485e-06, "loss": 1.0626, "step": 56930 }, { "epoch": 17.03, "grad_norm": 4.0020856857299805, "learning_rate": 2.6717373414421377e-06, "loss": 0.9506, "step": 56935 }, { "epoch": 17.04, "grad_norm": 1.977827548980713, "learning_rate": 2.669095302272065e-06, "loss": 0.7509, "step": 56940 }, { "epoch": 17.04, "grad_norm": 4.007442474365234, "learning_rate": 2.666454496414647e-06, "loss": 0.9768, "step": 56945 }, { "epoch": 17.04, "grad_norm": 2.781235933303833, "learning_rate": 2.6638149240157313e-06, "loss": 0.9895, "step": 56950 }, { "epoch": 17.04, "grad_norm": 3.7630717754364014, "learning_rate": 2.6611765852211037e-06, "loss": 1.0876, "step": 56955 }, { "epoch": 17.04, "grad_norm": 6.746951580047607, "learning_rate": 2.6585394801764697e-06, "loss": 0.9619, "step": 56960 }, { "epoch": 17.04, "grad_norm": 1.971835732460022, "learning_rate": 2.6559036090274796e-06, "loss": 1.147, "step": 56965 }, { "epoch": 17.04, "grad_norm": 1.7732189893722534, "learning_rate": 2.653268971919706e-06, "loss": 1.0708, "step": 56970 }, { "epoch": 17.05, "grad_norm": 2.1703782081604004, "learning_rate": 2.6506355689986607e-06, "loss": 0.9244, "step": 56975 }, { "epoch": 17.05, "grad_norm": 3.1191794872283936, "learning_rate": 2.6480034004097877e-06, "loss": 0.9953, "step": 56980 }, { "epoch": 17.05, "grad_norm": 3.8292319774627686, "learning_rate": 2.645372466298443e-06, "loss": 0.9508, "step": 56985 }, { "epoch": 17.05, "grad_norm": 2.0366878509521484, "learning_rate": 2.64274276680995e-06, "loss": 0.9986, "step": 56990 }, { "epoch": 17.05, "grad_norm": 2.463479518890381, "learning_rate": 2.6401143020895245e-06, "loss": 1.0819, "step": 56995 }, { "epoch": 17.05, "grad_norm": 1.6114779710769653, "learning_rate": 2.637487072282355e-06, "loss": 1.0996, "step": 57000 }, { "epoch": 17.06, "grad_norm": 1.567771315574646, "learning_rate": 2.6348610775335182e-06, "loss": 1.001, "step": 57005 }, { "epoch": 17.06, "grad_norm": 1.417826533317566, "learning_rate": 2.632236317988068e-06, "loss": 0.9204, "step": 57010 }, { "epoch": 17.06, "grad_norm": 1.9809144735336304, "learning_rate": 2.6296127937909504e-06, "loss": 0.928, "step": 57015 }, { "epoch": 17.06, "grad_norm": 2.4139678478240967, "learning_rate": 2.626990505087068e-06, "loss": 0.8989, "step": 57020 }, { "epoch": 17.06, "grad_norm": 2.6247403621673584, "learning_rate": 2.6243694520212414e-06, "loss": 0.9612, "step": 57025 }, { "epoch": 17.06, "grad_norm": 4.078159332275391, "learning_rate": 2.6217496347382325e-06, "loss": 1.0525, "step": 57030 }, { "epoch": 17.06, "grad_norm": 3.896151065826416, "learning_rate": 2.6191310533827313e-06, "loss": 1.1528, "step": 57035 }, { "epoch": 17.07, "grad_norm": 2.571683883666992, "learning_rate": 2.616513708099358e-06, "loss": 1.0413, "step": 57040 }, { "epoch": 17.07, "grad_norm": 3.916944980621338, "learning_rate": 2.613897599032666e-06, "loss": 0.8858, "step": 57045 }, { "epoch": 17.07, "grad_norm": 3.300248622894287, "learning_rate": 2.611282726327141e-06, "loss": 1.1224, "step": 57050 }, { "epoch": 17.07, "grad_norm": 1.99065363407135, "learning_rate": 2.6086690901272e-06, "loss": 1.035, "step": 57055 }, { "epoch": 17.07, "grad_norm": 2.4363794326782227, "learning_rate": 2.606056690577191e-06, "loss": 0.926, "step": 57060 }, { "epoch": 17.07, "grad_norm": 1.8297630548477173, "learning_rate": 2.603445527821391e-06, "loss": 1.1107, "step": 57065 }, { "epoch": 17.07, "grad_norm": 4.487589359283447, "learning_rate": 2.6008356020040174e-06, "loss": 1.0452, "step": 57070 }, { "epoch": 17.08, "grad_norm": 4.226923942565918, "learning_rate": 2.598226913269214e-06, "loss": 0.8234, "step": 57075 }, { "epoch": 17.08, "grad_norm": 2.6841580867767334, "learning_rate": 2.595619461761045e-06, "loss": 0.735, "step": 57080 }, { "epoch": 17.08, "grad_norm": 4.622152805328369, "learning_rate": 2.5930132476235326e-06, "loss": 0.9079, "step": 57085 }, { "epoch": 17.08, "grad_norm": 2.2728214263916016, "learning_rate": 2.5904082710005995e-06, "loss": 1.0287, "step": 57090 }, { "epoch": 17.08, "grad_norm": 3.7428131103515625, "learning_rate": 2.587804532036134e-06, "loss": 0.8489, "step": 57095 }, { "epoch": 17.08, "grad_norm": 5.1959099769592285, "learning_rate": 2.585202030873915e-06, "loss": 0.9871, "step": 57100 }, { "epoch": 17.09, "grad_norm": 9.105627059936523, "learning_rate": 2.582600767657703e-06, "loss": 0.9979, "step": 57105 }, { "epoch": 17.09, "grad_norm": 3.824483633041382, "learning_rate": 2.580000742531141e-06, "loss": 0.9031, "step": 57110 }, { "epoch": 17.09, "grad_norm": 3.2056384086608887, "learning_rate": 2.577401955637837e-06, "loss": 0.9565, "step": 57115 }, { "epoch": 17.09, "grad_norm": 1.918878197669983, "learning_rate": 2.574804407121312e-06, "loss": 1.0012, "step": 57120 }, { "epoch": 17.09, "grad_norm": 2.389338731765747, "learning_rate": 2.572208097125034e-06, "loss": 1.0327, "step": 57125 }, { "epoch": 17.09, "grad_norm": 1.8829810619354248, "learning_rate": 2.5696130257923863e-06, "loss": 0.9748, "step": 57130 }, { "epoch": 17.09, "grad_norm": 2.50826358795166, "learning_rate": 2.5670191932666983e-06, "loss": 0.901, "step": 57135 }, { "epoch": 17.1, "grad_norm": 3.3760509490966797, "learning_rate": 2.5644265996912246e-06, "loss": 1.0282, "step": 57140 }, { "epoch": 17.1, "grad_norm": 2.7886831760406494, "learning_rate": 2.561835245209146e-06, "loss": 0.8485, "step": 57145 }, { "epoch": 17.1, "grad_norm": 1.582168698310852, "learning_rate": 2.559245129963586e-06, "loss": 0.9798, "step": 57150 }, { "epoch": 17.1, "grad_norm": 2.845203161239624, "learning_rate": 2.556656254097589e-06, "loss": 1.2068, "step": 57155 }, { "epoch": 17.1, "grad_norm": 2.8991782665252686, "learning_rate": 2.5540686177541408e-06, "loss": 1.0069, "step": 57160 }, { "epoch": 17.1, "grad_norm": 2.5524137020111084, "learning_rate": 2.5514822210761485e-06, "loss": 0.7097, "step": 57165 }, { "epoch": 17.1, "grad_norm": 5.034450531005859, "learning_rate": 2.5488970642064624e-06, "loss": 0.9645, "step": 57170 }, { "epoch": 17.11, "grad_norm": 3.3670802116394043, "learning_rate": 2.5463131472878544e-06, "loss": 1.0891, "step": 57175 }, { "epoch": 17.11, "grad_norm": 5.955173015594482, "learning_rate": 2.543730470463035e-06, "loss": 0.9105, "step": 57180 }, { "epoch": 17.11, "grad_norm": 2.0601420402526855, "learning_rate": 2.541149033874632e-06, "loss": 1.0384, "step": 57185 }, { "epoch": 17.11, "grad_norm": 1.1754088401794434, "learning_rate": 2.538568837665231e-06, "loss": 0.8901, "step": 57190 }, { "epoch": 17.11, "grad_norm": 6.220560550689697, "learning_rate": 2.535989881977319e-06, "loss": 0.9757, "step": 57195 }, { "epoch": 17.11, "grad_norm": 2.9648501873016357, "learning_rate": 2.5334121669533416e-06, "loss": 0.9412, "step": 57200 }, { "epoch": 17.12, "grad_norm": 2.4804067611694336, "learning_rate": 2.5308356927356556e-06, "loss": 1.1147, "step": 57205 }, { "epoch": 17.12, "grad_norm": 2.2306246757507324, "learning_rate": 2.5282604594665583e-06, "loss": 0.9197, "step": 57210 }, { "epoch": 17.12, "grad_norm": 1.3986696004867554, "learning_rate": 2.525686467288277e-06, "loss": 0.9156, "step": 57215 }, { "epoch": 17.12, "grad_norm": 2.3900442123413086, "learning_rate": 2.523113716342973e-06, "loss": 0.9889, "step": 57220 }, { "epoch": 17.12, "grad_norm": 6.6008100509643555, "learning_rate": 2.520542206772733e-06, "loss": 0.9103, "step": 57225 }, { "epoch": 17.12, "grad_norm": 2.302074909210205, "learning_rate": 2.517971938719582e-06, "loss": 0.7901, "step": 57230 }, { "epoch": 17.12, "grad_norm": 2.978907585144043, "learning_rate": 2.5154029123254708e-06, "loss": 1.1318, "step": 57235 }, { "epoch": 17.13, "grad_norm": 3.265174388885498, "learning_rate": 2.5128351277322853e-06, "loss": 0.8781, "step": 57240 }, { "epoch": 17.13, "grad_norm": 2.929891347885132, "learning_rate": 2.510268585081843e-06, "loss": 0.8406, "step": 57245 }, { "epoch": 17.13, "grad_norm": 2.2917799949645996, "learning_rate": 2.5077032845158886e-06, "loss": 0.9925, "step": 57250 }, { "epoch": 17.13, "grad_norm": 6.078389644622803, "learning_rate": 2.505139226176104e-06, "loss": 0.8077, "step": 57255 }, { "epoch": 17.13, "grad_norm": 3.7742414474487305, "learning_rate": 2.5025764102040966e-06, "loss": 0.6981, "step": 57260 }, { "epoch": 17.13, "grad_norm": 1.999995231628418, "learning_rate": 2.50001483674141e-06, "loss": 1.1849, "step": 57265 }, { "epoch": 17.13, "grad_norm": 7.203548431396484, "learning_rate": 2.497454505929517e-06, "loss": 0.8368, "step": 57270 }, { "epoch": 17.14, "grad_norm": 1.5824021100997925, "learning_rate": 2.494895417909826e-06, "loss": 1.0648, "step": 57275 }, { "epoch": 17.14, "grad_norm": 2.918457269668579, "learning_rate": 2.492337572823658e-06, "loss": 0.8636, "step": 57280 }, { "epoch": 17.14, "grad_norm": 4.666050434112549, "learning_rate": 2.4897809708123e-06, "loss": 0.8613, "step": 57285 }, { "epoch": 17.14, "grad_norm": 3.866513967514038, "learning_rate": 2.487225612016933e-06, "loss": 1.0003, "step": 57290 }, { "epoch": 17.14, "grad_norm": 3.5122618675231934, "learning_rate": 2.4846714965787027e-06, "loss": 1.0402, "step": 57295 }, { "epoch": 17.14, "grad_norm": 3.2854020595550537, "learning_rate": 2.4821186246386575e-06, "loss": 0.8922, "step": 57300 }, { "epoch": 17.14, "grad_norm": 3.5705018043518066, "learning_rate": 2.4795669963377955e-06, "loss": 1.0352, "step": 57305 }, { "epoch": 17.15, "grad_norm": 2.31522798538208, "learning_rate": 2.4770166118170374e-06, "loss": 0.9692, "step": 57310 }, { "epoch": 17.15, "grad_norm": 4.260332107543945, "learning_rate": 2.474467471217243e-06, "loss": 1.0586, "step": 57315 }, { "epoch": 17.15, "grad_norm": 3.6906752586364746, "learning_rate": 2.4719195746791964e-06, "loss": 0.8967, "step": 57320 }, { "epoch": 17.15, "grad_norm": 2.0712099075317383, "learning_rate": 2.469372922343613e-06, "loss": 1.1484, "step": 57325 }, { "epoch": 17.15, "grad_norm": 2.5663881301879883, "learning_rate": 2.466827514351144e-06, "loss": 0.9263, "step": 57330 }, { "epoch": 17.15, "grad_norm": 2.5558412075042725, "learning_rate": 2.4642833508423724e-06, "loss": 1.1596, "step": 57335 }, { "epoch": 17.16, "grad_norm": 4.297339916229248, "learning_rate": 2.461740431957804e-06, "loss": 0.8887, "step": 57340 }, { "epoch": 17.16, "grad_norm": 1.2254616022109985, "learning_rate": 2.4591987578378856e-06, "loss": 1.0521, "step": 57345 }, { "epoch": 17.16, "grad_norm": 3.5310375690460205, "learning_rate": 2.456658328622988e-06, "loss": 0.7953, "step": 57350 }, { "epoch": 17.16, "grad_norm": 2.0364389419555664, "learning_rate": 2.4541191444534205e-06, "loss": 0.9538, "step": 57355 }, { "epoch": 17.16, "grad_norm": 2.6163721084594727, "learning_rate": 2.4515812054694166e-06, "loss": 0.9859, "step": 57360 }, { "epoch": 17.16, "grad_norm": 4.850313663482666, "learning_rate": 2.4490445118111437e-06, "loss": 0.8275, "step": 57365 }, { "epoch": 17.16, "grad_norm": 2.779263734817505, "learning_rate": 2.44650906361871e-06, "loss": 1.0238, "step": 57370 }, { "epoch": 17.17, "grad_norm": 1.5706207752227783, "learning_rate": 2.443974861032125e-06, "loss": 1.1682, "step": 57375 }, { "epoch": 17.17, "grad_norm": 2.980870246887207, "learning_rate": 2.4414419041913744e-06, "loss": 1.0511, "step": 57380 }, { "epoch": 17.17, "grad_norm": 1.8307000398635864, "learning_rate": 2.438910193236327e-06, "loss": 1.1374, "step": 57385 }, { "epoch": 17.17, "grad_norm": 4.98880672454834, "learning_rate": 2.436379728306831e-06, "loss": 0.8951, "step": 57390 }, { "epoch": 17.17, "grad_norm": 3.5696189403533936, "learning_rate": 2.433850509542618e-06, "loss": 0.9665, "step": 57395 }, { "epoch": 17.17, "grad_norm": 2.4421303272247314, "learning_rate": 2.431322537083394e-06, "loss": 1.0267, "step": 57400 }, { "epoch": 17.17, "grad_norm": 4.078541278839111, "learning_rate": 2.428795811068765e-06, "loss": 0.8258, "step": 57405 }, { "epoch": 17.18, "grad_norm": 2.0753166675567627, "learning_rate": 2.426270331638281e-06, "loss": 0.8415, "step": 57410 }, { "epoch": 17.18, "grad_norm": 2.376988410949707, "learning_rate": 2.423746098931423e-06, "loss": 1.0177, "step": 57415 }, { "epoch": 17.18, "grad_norm": 4.53126335144043, "learning_rate": 2.4212231130876007e-06, "loss": 0.9601, "step": 57420 }, { "epoch": 17.18, "grad_norm": 2.623030662536621, "learning_rate": 2.4187013742461545e-06, "loss": 0.8718, "step": 57425 }, { "epoch": 17.18, "grad_norm": 2.2002532482147217, "learning_rate": 2.4161808825463622e-06, "loss": 1.0355, "step": 57430 }, { "epoch": 17.18, "grad_norm": 2.742001533508301, "learning_rate": 2.4136616381274237e-06, "loss": 0.8977, "step": 57435 }, { "epoch": 17.19, "grad_norm": 2.284527540206909, "learning_rate": 2.411143641128477e-06, "loss": 0.9512, "step": 57440 }, { "epoch": 17.19, "grad_norm": 4.969058036804199, "learning_rate": 2.408626891688587e-06, "loss": 0.8995, "step": 57445 }, { "epoch": 17.19, "grad_norm": 2.935258150100708, "learning_rate": 2.4061113899467497e-06, "loss": 0.9235, "step": 57450 }, { "epoch": 17.19, "grad_norm": 4.259613037109375, "learning_rate": 2.4035971360418963e-06, "loss": 1.0788, "step": 57455 }, { "epoch": 17.19, "grad_norm": 2.900808095932007, "learning_rate": 2.401084130112885e-06, "loss": 1.1105, "step": 57460 }, { "epoch": 17.19, "grad_norm": 2.809903860092163, "learning_rate": 2.398572372298513e-06, "loss": 0.986, "step": 57465 }, { "epoch": 17.19, "grad_norm": 2.9427716732025146, "learning_rate": 2.396061862737484e-06, "loss": 1.0418, "step": 57470 }, { "epoch": 17.2, "grad_norm": 4.519219398498535, "learning_rate": 2.3935526015684745e-06, "loss": 1.0045, "step": 57475 }, { "epoch": 17.2, "grad_norm": 2.9605634212493896, "learning_rate": 2.391044588930047e-06, "loss": 0.9415, "step": 57480 }, { "epoch": 17.2, "grad_norm": 2.0215342044830322, "learning_rate": 2.388537824960735e-06, "loss": 0.9759, "step": 57485 }, { "epoch": 17.2, "grad_norm": 1.2563127279281616, "learning_rate": 2.3860323097989663e-06, "loss": 0.956, "step": 57490 }, { "epoch": 17.2, "grad_norm": 1.9748398065567017, "learning_rate": 2.3835280435831385e-06, "loss": 0.9808, "step": 57495 }, { "epoch": 17.2, "grad_norm": 2.0095794200897217, "learning_rate": 2.3810250264515415e-06, "loss": 1.0952, "step": 57500 }, { "epoch": 17.2, "grad_norm": 2.950597047805786, "learning_rate": 2.3785232585424226e-06, "loss": 1.0353, "step": 57505 }, { "epoch": 17.21, "grad_norm": 1.762516736984253, "learning_rate": 2.3760227399939493e-06, "loss": 1.081, "step": 57510 }, { "epoch": 17.21, "grad_norm": 4.237101078033447, "learning_rate": 2.3735234709442194e-06, "loss": 1.1056, "step": 57515 }, { "epoch": 17.21, "grad_norm": 2.0214059352874756, "learning_rate": 2.371025451531278e-06, "loss": 0.933, "step": 57520 }, { "epoch": 17.21, "grad_norm": 2.3543801307678223, "learning_rate": 2.36852868189307e-06, "loss": 0.9907, "step": 57525 }, { "epoch": 17.21, "grad_norm": 3.8169326782226562, "learning_rate": 2.366033162167508e-06, "loss": 1.045, "step": 57530 }, { "epoch": 17.21, "grad_norm": 1.6182808876037598, "learning_rate": 2.363538892492406e-06, "loss": 0.9037, "step": 57535 }, { "epoch": 17.22, "grad_norm": 2.3788492679595947, "learning_rate": 2.3610458730055186e-06, "loss": 1.0592, "step": 57540 }, { "epoch": 17.22, "grad_norm": 1.563767910003662, "learning_rate": 2.3585541038445374e-06, "loss": 0.8683, "step": 57545 }, { "epoch": 17.22, "grad_norm": 3.2709128856658936, "learning_rate": 2.356063585147078e-06, "loss": 0.8699, "step": 57550 }, { "epoch": 17.22, "grad_norm": 2.178466320037842, "learning_rate": 2.3535743170506887e-06, "loss": 0.8278, "step": 57555 }, { "epoch": 17.22, "grad_norm": 4.895992755889893, "learning_rate": 2.351086299692856e-06, "loss": 1.0167, "step": 57560 }, { "epoch": 17.22, "grad_norm": 3.7449183464050293, "learning_rate": 2.348599533210974e-06, "loss": 1.1186, "step": 57565 }, { "epoch": 17.22, "grad_norm": 4.224990367889404, "learning_rate": 2.346114017742407e-06, "loss": 0.9867, "step": 57570 }, { "epoch": 17.23, "grad_norm": 1.4723498821258545, "learning_rate": 2.3436297534244035e-06, "loss": 1.0563, "step": 57575 }, { "epoch": 17.23, "grad_norm": 2.0138280391693115, "learning_rate": 2.34114674039419e-06, "loss": 0.8924, "step": 57580 }, { "epoch": 17.23, "grad_norm": 5.9495391845703125, "learning_rate": 2.3386649787888794e-06, "loss": 1.0926, "step": 57585 }, { "epoch": 17.23, "grad_norm": 3.3847897052764893, "learning_rate": 2.33618446874555e-06, "loss": 0.9745, "step": 57590 }, { "epoch": 17.23, "grad_norm": 8.030804634094238, "learning_rate": 2.333705210401202e-06, "loss": 0.9503, "step": 57595 }, { "epoch": 17.23, "grad_norm": 1.3774622678756714, "learning_rate": 2.3312272038927443e-06, "loss": 0.8589, "step": 57600 }, { "epoch": 17.23, "grad_norm": 4.3188652992248535, "learning_rate": 2.3287504493570534e-06, "loss": 0.8434, "step": 57605 }, { "epoch": 17.24, "grad_norm": 2.444995880126953, "learning_rate": 2.3262749469309007e-06, "loss": 1.1008, "step": 57610 }, { "epoch": 17.24, "grad_norm": 4.819691181182861, "learning_rate": 2.3238006967510273e-06, "loss": 0.9677, "step": 57615 }, { "epoch": 17.24, "grad_norm": 3.0465807914733887, "learning_rate": 2.321327698954057e-06, "loss": 0.7771, "step": 57620 }, { "epoch": 17.24, "grad_norm": 1.9639545679092407, "learning_rate": 2.3188559536765965e-06, "loss": 0.7874, "step": 57625 }, { "epoch": 17.24, "grad_norm": 4.145541191101074, "learning_rate": 2.3163854610551427e-06, "loss": 0.9116, "step": 57630 }, { "epoch": 17.24, "grad_norm": 5.50032377243042, "learning_rate": 2.3139162212261394e-06, "loss": 0.828, "step": 57635 }, { "epoch": 17.25, "grad_norm": 3.153430223464966, "learning_rate": 2.311448234325961e-06, "loss": 1.1071, "step": 57640 }, { "epoch": 17.25, "grad_norm": 4.366771697998047, "learning_rate": 2.3089815004909145e-06, "loss": 1.0254, "step": 57645 }, { "epoch": 17.25, "grad_norm": 3.364361047744751, "learning_rate": 2.3065160198572355e-06, "loss": 0.8954, "step": 57650 }, { "epoch": 17.25, "grad_norm": 2.316021680831909, "learning_rate": 2.3040517925610904e-06, "loss": 0.8784, "step": 57655 }, { "epoch": 17.25, "grad_norm": 3.245994806289673, "learning_rate": 2.3015888187385674e-06, "loss": 1.0405, "step": 57660 }, { "epoch": 17.25, "grad_norm": 2.167280435562134, "learning_rate": 2.299127098525705e-06, "loss": 0.9786, "step": 57665 }, { "epoch": 17.25, "grad_norm": 3.1760175228118896, "learning_rate": 2.29666663205845e-06, "loss": 0.9913, "step": 57670 }, { "epoch": 17.26, "grad_norm": 1.8148313760757446, "learning_rate": 2.294207419472705e-06, "loss": 1.0802, "step": 57675 }, { "epoch": 17.26, "grad_norm": 5.893067836761475, "learning_rate": 2.29174946090428e-06, "loss": 0.751, "step": 57680 }, { "epoch": 17.26, "grad_norm": 2.0050816535949707, "learning_rate": 2.2892927564889284e-06, "loss": 0.9561, "step": 57685 }, { "epoch": 17.26, "grad_norm": 2.132786512374878, "learning_rate": 2.286837306362338e-06, "loss": 0.838, "step": 57690 }, { "epoch": 17.26, "grad_norm": 3.9043054580688477, "learning_rate": 2.2843831106601017e-06, "loss": 0.9945, "step": 57695 }, { "epoch": 17.26, "grad_norm": 2.53236722946167, "learning_rate": 2.2819301695177876e-06, "loss": 0.8508, "step": 57700 }, { "epoch": 17.26, "grad_norm": 2.5461416244506836, "learning_rate": 2.2794784830708432e-06, "loss": 0.7963, "step": 57705 }, { "epoch": 17.27, "grad_norm": 3.189567804336548, "learning_rate": 2.2770280514546964e-06, "loss": 0.9692, "step": 57710 }, { "epoch": 17.27, "grad_norm": 2.352689027786255, "learning_rate": 2.2745788748046614e-06, "loss": 0.9831, "step": 57715 }, { "epoch": 17.27, "grad_norm": 2.441016674041748, "learning_rate": 2.2721309532560244e-06, "loss": 0.962, "step": 57720 }, { "epoch": 17.27, "grad_norm": 3.2766942977905273, "learning_rate": 2.2696842869439657e-06, "loss": 0.9884, "step": 57725 }, { "epoch": 17.27, "grad_norm": 4.255434989929199, "learning_rate": 2.267238876003616e-06, "loss": 1.06, "step": 57730 }, { "epoch": 17.27, "grad_norm": 4.779931545257568, "learning_rate": 2.2647947205700322e-06, "loss": 1.0216, "step": 57735 }, { "epoch": 17.28, "grad_norm": 2.0698294639587402, "learning_rate": 2.262351820778205e-06, "loss": 0.8473, "step": 57740 }, { "epoch": 17.28, "grad_norm": 2.572493553161621, "learning_rate": 2.2599101767630524e-06, "loss": 1.0734, "step": 57745 }, { "epoch": 17.28, "grad_norm": 3.1653802394866943, "learning_rate": 2.257469788659425e-06, "loss": 0.8459, "step": 57750 }, { "epoch": 17.28, "grad_norm": 2.538551092147827, "learning_rate": 2.255030656602103e-06, "loss": 0.9257, "step": 57755 }, { "epoch": 17.28, "grad_norm": 1.3721609115600586, "learning_rate": 2.2525927807257928e-06, "loss": 0.984, "step": 57760 }, { "epoch": 17.28, "grad_norm": 1.590498447418213, "learning_rate": 2.25015616116514e-06, "loss": 0.8294, "step": 57765 }, { "epoch": 17.28, "grad_norm": 3.673794746398926, "learning_rate": 2.2477207980547145e-06, "loss": 0.9454, "step": 57770 }, { "epoch": 17.29, "grad_norm": 3.491346836090088, "learning_rate": 2.2452866915290195e-06, "loss": 1.0912, "step": 57775 }, { "epoch": 17.29, "grad_norm": 6.499100208282471, "learning_rate": 2.2428538417224894e-06, "loss": 0.9398, "step": 57780 }, { "epoch": 17.29, "grad_norm": 2.021291971206665, "learning_rate": 2.2404222487694913e-06, "loss": 0.9325, "step": 57785 }, { "epoch": 17.29, "grad_norm": 2.0946333408355713, "learning_rate": 2.2379919128043047e-06, "loss": 0.9485, "step": 57790 }, { "epoch": 17.29, "grad_norm": 1.8785912990570068, "learning_rate": 2.2355628339611744e-06, "loss": 0.9824, "step": 57795 }, { "epoch": 17.29, "grad_norm": 3.972280979156494, "learning_rate": 2.233135012374238e-06, "loss": 0.9679, "step": 57800 }, { "epoch": 17.29, "grad_norm": 6.458027362823486, "learning_rate": 2.2307084481775985e-06, "loss": 0.9765, "step": 57805 }, { "epoch": 17.3, "grad_norm": 8.290522575378418, "learning_rate": 2.228283141505255e-06, "loss": 1.0319, "step": 57810 }, { "epoch": 17.3, "grad_norm": 3.988783121109009, "learning_rate": 2.2258590924911753e-06, "loss": 0.9265, "step": 57815 }, { "epoch": 17.3, "grad_norm": 1.954484224319458, "learning_rate": 2.223436301269219e-06, "loss": 1.0169, "step": 57820 }, { "epoch": 17.3, "grad_norm": 4.165136814117432, "learning_rate": 2.221014767973201e-06, "loss": 1.0911, "step": 57825 }, { "epoch": 17.3, "grad_norm": 3.616419553756714, "learning_rate": 2.2185944927368587e-06, "loss": 0.9891, "step": 57830 }, { "epoch": 17.3, "grad_norm": 2.0609145164489746, "learning_rate": 2.216175475693863e-06, "loss": 1.0497, "step": 57835 }, { "epoch": 17.31, "grad_norm": 3.2805593013763428, "learning_rate": 2.2137577169778155e-06, "loss": 0.9588, "step": 57840 }, { "epoch": 17.31, "grad_norm": 1.7305797338485718, "learning_rate": 2.211341216722243e-06, "loss": 0.9302, "step": 57845 }, { "epoch": 17.31, "grad_norm": 3.898714303970337, "learning_rate": 2.2089259750606052e-06, "loss": 1.1782, "step": 57850 }, { "epoch": 17.31, "grad_norm": 1.8012914657592773, "learning_rate": 2.206511992126298e-06, "loss": 0.9423, "step": 57855 }, { "epoch": 17.31, "grad_norm": 1.263310432434082, "learning_rate": 2.2040992680526423e-06, "loss": 0.9829, "step": 57860 }, { "epoch": 17.31, "grad_norm": 1.296061396598816, "learning_rate": 2.2016878029728877e-06, "loss": 0.9826, "step": 57865 }, { "epoch": 17.31, "grad_norm": 3.5253007411956787, "learning_rate": 2.199277597020219e-06, "loss": 0.9559, "step": 57870 }, { "epoch": 17.32, "grad_norm": 1.947613000869751, "learning_rate": 2.1968686503277464e-06, "loss": 0.9857, "step": 57875 }, { "epoch": 17.32, "grad_norm": 1.1814169883728027, "learning_rate": 2.194460963028516e-06, "loss": 0.9992, "step": 57880 }, { "epoch": 17.32, "grad_norm": 3.306941509246826, "learning_rate": 2.192054535255503e-06, "loss": 0.9873, "step": 57885 }, { "epoch": 17.32, "grad_norm": 2.9445595741271973, "learning_rate": 2.189649367141616e-06, "loss": 0.8051, "step": 57890 }, { "epoch": 17.32, "grad_norm": 2.4458541870117188, "learning_rate": 2.187245458819673e-06, "loss": 1.121, "step": 57895 }, { "epoch": 17.32, "grad_norm": 4.300705432891846, "learning_rate": 2.1848428104224605e-06, "loss": 0.9273, "step": 57900 }, { "epoch": 17.32, "grad_norm": 1.9480119943618774, "learning_rate": 2.1824414220826566e-06, "loss": 0.9085, "step": 57905 }, { "epoch": 17.33, "grad_norm": 5.82395601272583, "learning_rate": 2.180041293932905e-06, "loss": 1.0309, "step": 57910 }, { "epoch": 17.33, "grad_norm": 2.66611909866333, "learning_rate": 2.177642426105747e-06, "loss": 1.1571, "step": 57915 }, { "epoch": 17.33, "grad_norm": 1.7344080209732056, "learning_rate": 2.1752448187336738e-06, "loss": 0.8145, "step": 57920 }, { "epoch": 17.33, "grad_norm": 4.083990573883057, "learning_rate": 2.172848471949107e-06, "loss": 0.8786, "step": 57925 }, { "epoch": 17.33, "grad_norm": 3.4210381507873535, "learning_rate": 2.1704533858843884e-06, "loss": 0.9086, "step": 57930 }, { "epoch": 17.33, "grad_norm": 2.1340394020080566, "learning_rate": 2.1680595606718008e-06, "loss": 0.9275, "step": 57935 }, { "epoch": 17.33, "grad_norm": 2.192603588104248, "learning_rate": 2.165666996443552e-06, "loss": 1.0049, "step": 57940 }, { "epoch": 17.34, "grad_norm": 1.9342150688171387, "learning_rate": 2.1632756933317815e-06, "loss": 0.9161, "step": 57945 }, { "epoch": 17.34, "grad_norm": 2.2969846725463867, "learning_rate": 2.160885651468553e-06, "loss": 1.0195, "step": 57950 }, { "epoch": 17.34, "grad_norm": 2.6062443256378174, "learning_rate": 2.158496870985874e-06, "loss": 1.0117, "step": 57955 }, { "epoch": 17.34, "grad_norm": 1.2157763242721558, "learning_rate": 2.156109352015667e-06, "loss": 0.9364, "step": 57960 }, { "epoch": 17.34, "grad_norm": 3.201415777206421, "learning_rate": 2.153723094689797e-06, "loss": 1.1783, "step": 57965 }, { "epoch": 17.34, "grad_norm": 2.736159324645996, "learning_rate": 2.1513380991400547e-06, "loss": 0.8852, "step": 57970 }, { "epoch": 17.35, "grad_norm": 2.2211790084838867, "learning_rate": 2.148954365498157e-06, "loss": 0.8922, "step": 57975 }, { "epoch": 17.35, "grad_norm": 0.9812418818473816, "learning_rate": 2.1465718938957576e-06, "loss": 1.049, "step": 57980 }, { "epoch": 17.35, "grad_norm": 2.103342294692993, "learning_rate": 2.144190684464442e-06, "loss": 1.1637, "step": 57985 }, { "epoch": 17.35, "grad_norm": 2.9801440238952637, "learning_rate": 2.1418107373357116e-06, "loss": 0.8715, "step": 57990 }, { "epoch": 17.35, "grad_norm": 3.3744688034057617, "learning_rate": 2.139432052641019e-06, "loss": 1.0241, "step": 57995 }, { "epoch": 17.35, "grad_norm": 3.7058229446411133, "learning_rate": 2.1370546305117254e-06, "loss": 1.0102, "step": 58000 }, { "epoch": 17.35, "grad_norm": 4.445005416870117, "learning_rate": 2.134678471079149e-06, "loss": 1.1774, "step": 58005 }, { "epoch": 17.36, "grad_norm": 2.2905967235565186, "learning_rate": 2.132303574474509e-06, "loss": 0.8403, "step": 58010 }, { "epoch": 17.36, "grad_norm": 1.867017388343811, "learning_rate": 2.129929940828973e-06, "loss": 0.987, "step": 58015 }, { "epoch": 17.36, "grad_norm": 2.154637575149536, "learning_rate": 2.1275575702736334e-06, "loss": 0.9626, "step": 58020 }, { "epoch": 17.36, "grad_norm": 1.9806933403015137, "learning_rate": 2.1251864629395156e-06, "loss": 1.1247, "step": 58025 }, { "epoch": 17.36, "grad_norm": 2.581702947616577, "learning_rate": 2.122816618957571e-06, "loss": 1.0137, "step": 58030 }, { "epoch": 17.36, "grad_norm": 2.4726662635803223, "learning_rate": 2.120448038458686e-06, "loss": 0.9178, "step": 58035 }, { "epoch": 17.36, "grad_norm": 1.8047939538955688, "learning_rate": 2.118080721573673e-06, "loss": 1.1279, "step": 58040 }, { "epoch": 17.37, "grad_norm": 1.1406919956207275, "learning_rate": 2.1157146684332774e-06, "loss": 0.9604, "step": 58045 }, { "epoch": 17.37, "grad_norm": 2.2998058795928955, "learning_rate": 2.113349879168175e-06, "loss": 1.1191, "step": 58050 }, { "epoch": 17.37, "grad_norm": 2.92384672164917, "learning_rate": 2.11098635390897e-06, "loss": 1.0299, "step": 58055 }, { "epoch": 17.37, "grad_norm": 2.4828195571899414, "learning_rate": 2.1086240927861933e-06, "loss": 1.1108, "step": 58060 }, { "epoch": 17.37, "grad_norm": 2.7545273303985596, "learning_rate": 2.1062630959303163e-06, "loss": 1.0005, "step": 58065 }, { "epoch": 17.37, "grad_norm": 2.8774526119232178, "learning_rate": 2.103903363471732e-06, "loss": 1.0559, "step": 58070 }, { "epoch": 17.38, "grad_norm": 1.6022573709487915, "learning_rate": 2.1015448955407637e-06, "loss": 0.8921, "step": 58075 }, { "epoch": 17.38, "grad_norm": 3.47226619720459, "learning_rate": 2.0991876922676764e-06, "loss": 1.0754, "step": 58080 }, { "epoch": 17.38, "grad_norm": 1.9637651443481445, "learning_rate": 2.096831753782638e-06, "loss": 0.9987, "step": 58085 }, { "epoch": 17.38, "grad_norm": 1.8076112270355225, "learning_rate": 2.094477080215787e-06, "loss": 0.9186, "step": 58090 }, { "epoch": 17.38, "grad_norm": 3.499573230743408, "learning_rate": 2.0921236716971465e-06, "loss": 1.0806, "step": 58095 }, { "epoch": 17.38, "grad_norm": 5.197295188903809, "learning_rate": 2.0897715283567126e-06, "loss": 1.0594, "step": 58100 }, { "epoch": 17.38, "grad_norm": 4.264853477478027, "learning_rate": 2.0874206503243758e-06, "loss": 0.8798, "step": 58105 }, { "epoch": 17.39, "grad_norm": 4.624975681304932, "learning_rate": 2.0850710377299883e-06, "loss": 0.9827, "step": 58110 }, { "epoch": 17.39, "grad_norm": 2.4760243892669678, "learning_rate": 2.0827226907033037e-06, "loss": 0.9113, "step": 58115 }, { "epoch": 17.39, "grad_norm": 1.9899121522903442, "learning_rate": 2.0803756093740247e-06, "loss": 0.9547, "step": 58120 }, { "epoch": 17.39, "grad_norm": 2.200958251953125, "learning_rate": 2.0780297938717776e-06, "loss": 0.9839, "step": 58125 }, { "epoch": 17.39, "grad_norm": 2.856152296066284, "learning_rate": 2.075685244326117e-06, "loss": 0.8033, "step": 58130 }, { "epoch": 17.39, "grad_norm": 2.164071559906006, "learning_rate": 2.0733419608665345e-06, "loss": 0.8925, "step": 58135 }, { "epoch": 17.39, "grad_norm": 1.5249561071395874, "learning_rate": 2.0709999436224426e-06, "loss": 0.958, "step": 58140 }, { "epoch": 17.4, "grad_norm": 3.109286069869995, "learning_rate": 2.0686591927231908e-06, "loss": 0.8569, "step": 58145 }, { "epoch": 17.4, "grad_norm": 3.086642265319824, "learning_rate": 2.066319708298056e-06, "loss": 1.0587, "step": 58150 }, { "epoch": 17.4, "grad_norm": 3.6070919036865234, "learning_rate": 2.063981490476247e-06, "loss": 1.0945, "step": 58155 }, { "epoch": 17.4, "grad_norm": 3.0385048389434814, "learning_rate": 2.0616445393868977e-06, "loss": 1.0345, "step": 58160 }, { "epoch": 17.4, "grad_norm": 3.2083237171173096, "learning_rate": 2.0593088551590783e-06, "loss": 0.9921, "step": 58165 }, { "epoch": 17.4, "grad_norm": 2.1536431312561035, "learning_rate": 2.056974437921785e-06, "loss": 1.1214, "step": 58170 }, { "epoch": 17.41, "grad_norm": 4.799535274505615, "learning_rate": 2.0546412878039517e-06, "loss": 0.9159, "step": 58175 }, { "epoch": 17.41, "grad_norm": 3.742593765258789, "learning_rate": 2.052309404934419e-06, "loss": 0.9264, "step": 58180 }, { "epoch": 17.41, "grad_norm": 2.696674346923828, "learning_rate": 2.049978789441992e-06, "loss": 1.0157, "step": 58185 }, { "epoch": 17.41, "grad_norm": 2.9800398349761963, "learning_rate": 2.0476494414553765e-06, "loss": 0.8998, "step": 58190 }, { "epoch": 17.41, "grad_norm": 5.797310829162598, "learning_rate": 2.045321361103231e-06, "loss": 0.9081, "step": 58195 }, { "epoch": 17.41, "grad_norm": 3.218557119369507, "learning_rate": 2.0429945485141184e-06, "loss": 0.8853, "step": 58200 }, { "epoch": 17.41, "grad_norm": 4.598995208740234, "learning_rate": 2.0411340113185823e-06, "loss": 0.9268, "step": 58205 }, { "epoch": 17.42, "grad_norm": 10.203455924987793, "learning_rate": 2.0388094810267407e-06, "loss": 0.9933, "step": 58210 }, { "epoch": 17.42, "grad_norm": 1.8637256622314453, "learning_rate": 2.0364862188575833e-06, "loss": 1.0012, "step": 58215 }, { "epoch": 17.42, "grad_norm": 4.718301296234131, "learning_rate": 2.034164224939425e-06, "loss": 1.0701, "step": 58220 }, { "epoch": 17.42, "grad_norm": 3.8083252906799316, "learning_rate": 2.0318434994005014e-06, "loss": 0.8359, "step": 58225 }, { "epoch": 17.42, "grad_norm": 3.1337733268737793, "learning_rate": 2.0295240423689938e-06, "loss": 0.968, "step": 58230 }, { "epoch": 17.42, "grad_norm": 2.8172826766967773, "learning_rate": 2.0272058539729882e-06, "loss": 0.9344, "step": 58235 }, { "epoch": 17.42, "grad_norm": 1.8639297485351562, "learning_rate": 2.0248889343405324e-06, "loss": 1.1076, "step": 58240 }, { "epoch": 17.43, "grad_norm": 1.549034595489502, "learning_rate": 2.0225732835995715e-06, "loss": 1.0001, "step": 58245 }, { "epoch": 17.43, "grad_norm": 1.5290988683700562, "learning_rate": 2.020258901878014e-06, "loss": 1.0315, "step": 58250 }, { "epoch": 17.43, "grad_norm": 3.2922134399414062, "learning_rate": 2.01794578930366e-06, "loss": 0.976, "step": 58255 }, { "epoch": 17.43, "grad_norm": 2.2940144538879395, "learning_rate": 2.0156339460042804e-06, "loss": 0.9909, "step": 58260 }, { "epoch": 17.43, "grad_norm": 2.8492350578308105, "learning_rate": 2.013323372107545e-06, "loss": 1.0184, "step": 58265 }, { "epoch": 17.43, "grad_norm": 7.312509059906006, "learning_rate": 2.0110140677410655e-06, "loss": 0.8777, "step": 58270 }, { "epoch": 17.44, "grad_norm": 3.636141777038574, "learning_rate": 2.0087060330323813e-06, "loss": 0.8916, "step": 58275 }, { "epoch": 17.44, "grad_norm": 4.8693366050720215, "learning_rate": 2.006399268108969e-06, "loss": 0.9538, "step": 58280 }, { "epoch": 17.44, "grad_norm": 4.98232364654541, "learning_rate": 2.00409377309822e-06, "loss": 0.8613, "step": 58285 }, { "epoch": 17.44, "grad_norm": 1.1212947368621826, "learning_rate": 2.0017895481274724e-06, "loss": 0.8985, "step": 58290 }, { "epoch": 17.44, "grad_norm": 4.1995673179626465, "learning_rate": 1.999486593323982e-06, "loss": 0.9499, "step": 58295 }, { "epoch": 17.44, "grad_norm": 5.486680507659912, "learning_rate": 1.9971849088149393e-06, "loss": 0.9938, "step": 58300 }, { "epoch": 17.44, "grad_norm": 2.455383062362671, "learning_rate": 1.9948844947274616e-06, "loss": 1.2352, "step": 58305 }, { "epoch": 17.45, "grad_norm": 2.302922010421753, "learning_rate": 1.9925853511886026e-06, "loss": 1.0595, "step": 58310 }, { "epoch": 17.45, "grad_norm": 1.5419032573699951, "learning_rate": 1.9902874783253393e-06, "loss": 1.0541, "step": 58315 }, { "epoch": 17.45, "grad_norm": 3.4702677726745605, "learning_rate": 1.9879908762645837e-06, "loss": 0.9092, "step": 58320 }, { "epoch": 17.45, "grad_norm": 2.8185274600982666, "learning_rate": 1.985695545133173e-06, "loss": 0.9399, "step": 58325 }, { "epoch": 17.45, "grad_norm": 2.852463960647583, "learning_rate": 1.9834014850578703e-06, "loss": 0.9442, "step": 58330 }, { "epoch": 17.45, "grad_norm": 1.91469407081604, "learning_rate": 1.9811086961653846e-06, "loss": 1.1047, "step": 58335 }, { "epoch": 17.45, "grad_norm": 3.1850225925445557, "learning_rate": 1.9788171785823316e-06, "loss": 0.9461, "step": 58340 }, { "epoch": 17.46, "grad_norm": 2.731886386871338, "learning_rate": 1.9765269324352855e-06, "loss": 0.8916, "step": 58345 }, { "epoch": 17.46, "grad_norm": 2.1498260498046875, "learning_rate": 1.9742379578507163e-06, "loss": 0.9886, "step": 58350 }, { "epoch": 17.46, "grad_norm": 3.7887089252471924, "learning_rate": 1.97195025495506e-06, "loss": 0.9632, "step": 58355 }, { "epoch": 17.46, "grad_norm": 2.147568702697754, "learning_rate": 1.969663823874651e-06, "loss": 0.911, "step": 58360 }, { "epoch": 17.46, "grad_norm": 3.2035396099090576, "learning_rate": 1.9673786647357693e-06, "loss": 0.8811, "step": 58365 }, { "epoch": 17.46, "grad_norm": 3.055304765701294, "learning_rate": 1.9650947776646223e-06, "loss": 1.1041, "step": 58370 }, { "epoch": 17.47, "grad_norm": 3.3420493602752686, "learning_rate": 1.9628121627873476e-06, "loss": 0.9011, "step": 58375 }, { "epoch": 17.47, "grad_norm": 1.865210771560669, "learning_rate": 1.9605308202300136e-06, "loss": 0.9144, "step": 58380 }, { "epoch": 17.47, "grad_norm": 3.021851062774658, "learning_rate": 1.958250750118612e-06, "loss": 1.0783, "step": 58385 }, { "epoch": 17.47, "grad_norm": 2.6499688625335693, "learning_rate": 1.9559719525790693e-06, "loss": 0.8827, "step": 58390 }, { "epoch": 17.47, "grad_norm": 2.7535033226013184, "learning_rate": 1.953694427737246e-06, "loss": 0.9414, "step": 58395 }, { "epoch": 17.47, "grad_norm": 1.7163090705871582, "learning_rate": 1.9514181757189222e-06, "loss": 0.8349, "step": 58400 }, { "epoch": 17.47, "grad_norm": 1.809923529624939, "learning_rate": 1.9491431966498162e-06, "loss": 0.997, "step": 58405 }, { "epoch": 17.48, "grad_norm": 2.4826242923736572, "learning_rate": 1.94686949065557e-06, "loss": 0.8481, "step": 58410 }, { "epoch": 17.48, "grad_norm": 2.3572394847869873, "learning_rate": 1.9445970578617605e-06, "loss": 1.0958, "step": 58415 }, { "epoch": 17.48, "grad_norm": 2.6370866298675537, "learning_rate": 1.942325898393896e-06, "loss": 0.8277, "step": 58420 }, { "epoch": 17.48, "grad_norm": 2.8419244289398193, "learning_rate": 1.9400560123773946e-06, "loss": 0.9423, "step": 58425 }, { "epoch": 17.48, "grad_norm": 0.7702686786651611, "learning_rate": 1.937787399937638e-06, "loss": 0.935, "step": 58430 }, { "epoch": 17.48, "grad_norm": 2.7786147594451904, "learning_rate": 1.9355200611999055e-06, "loss": 0.778, "step": 58435 }, { "epoch": 17.48, "grad_norm": 4.474719047546387, "learning_rate": 1.933253996289433e-06, "loss": 0.9768, "step": 58440 }, { "epoch": 17.49, "grad_norm": 2.249441146850586, "learning_rate": 1.930989205331357e-06, "loss": 0.725, "step": 58445 }, { "epoch": 17.49, "grad_norm": 3.9831950664520264, "learning_rate": 1.9287256884507747e-06, "loss": 0.8339, "step": 58450 }, { "epoch": 17.49, "grad_norm": 3.666069269180298, "learning_rate": 1.9264634457726914e-06, "loss": 0.9266, "step": 58455 }, { "epoch": 17.49, "grad_norm": 3.5630712509155273, "learning_rate": 1.924202477422046e-06, "loss": 0.8352, "step": 58460 }, { "epoch": 17.49, "grad_norm": 2.659298896789551, "learning_rate": 1.921942783523711e-06, "loss": 0.942, "step": 58465 }, { "epoch": 17.49, "grad_norm": 4.289942741394043, "learning_rate": 1.9196843642024896e-06, "loss": 0.9033, "step": 58470 }, { "epoch": 17.5, "grad_norm": 5.508975505828857, "learning_rate": 1.9174272195831096e-06, "loss": 1.015, "step": 58475 }, { "epoch": 17.5, "grad_norm": 3.3930578231811523, "learning_rate": 1.915171349790232e-06, "loss": 0.9336, "step": 58480 }, { "epoch": 17.5, "grad_norm": 2.193418264389038, "learning_rate": 1.9129167549484435e-06, "loss": 0.9792, "step": 58485 }, { "epoch": 17.5, "grad_norm": 1.5776050090789795, "learning_rate": 1.9106634351822665e-06, "loss": 1.0228, "step": 58490 }, { "epoch": 17.5, "grad_norm": 1.6216871738433838, "learning_rate": 1.9084113906161464e-06, "loss": 0.9904, "step": 58495 }, { "epoch": 17.5, "grad_norm": 2.6556360721588135, "learning_rate": 1.9061606213744642e-06, "loss": 1.0049, "step": 58500 }, { "epoch": 17.5, "grad_norm": 3.8837389945983887, "learning_rate": 1.9039111275815257e-06, "loss": 0.8822, "step": 58505 }, { "epoch": 17.51, "grad_norm": 2.5499298572540283, "learning_rate": 1.9016629093615706e-06, "loss": 1.0682, "step": 58510 }, { "epoch": 17.51, "grad_norm": 4.589527606964111, "learning_rate": 1.8994159668387662e-06, "loss": 0.8225, "step": 58515 }, { "epoch": 17.51, "grad_norm": 2.39145827293396, "learning_rate": 1.8971703001371965e-06, "loss": 0.7831, "step": 58520 }, { "epoch": 17.51, "grad_norm": 2.863334894180298, "learning_rate": 1.8949259093809068e-06, "loss": 0.8574, "step": 58525 }, { "epoch": 17.51, "grad_norm": 1.6067497730255127, "learning_rate": 1.892682794693834e-06, "loss": 0.9947, "step": 58530 }, { "epoch": 17.51, "grad_norm": 2.9973134994506836, "learning_rate": 1.8904409561998793e-06, "loss": 0.8852, "step": 58535 }, { "epoch": 17.51, "grad_norm": 12.899645805358887, "learning_rate": 1.8882003940228431e-06, "loss": 1.015, "step": 58540 }, { "epoch": 17.52, "grad_norm": 4.770505428314209, "learning_rate": 1.8859611082864826e-06, "loss": 1.1247, "step": 58545 }, { "epoch": 17.52, "grad_norm": 1.5795246362686157, "learning_rate": 1.8837230991144622e-06, "loss": 1.0028, "step": 58550 }, { "epoch": 17.52, "grad_norm": 3.8196096420288086, "learning_rate": 1.8814863666303833e-06, "loss": 0.9865, "step": 58555 }, { "epoch": 17.52, "grad_norm": 1.8591629266738892, "learning_rate": 1.879250910957786e-06, "loss": 0.8811, "step": 58560 }, { "epoch": 17.52, "grad_norm": 2.9709808826446533, "learning_rate": 1.877016732220127e-06, "loss": 0.9346, "step": 58565 }, { "epoch": 17.52, "grad_norm": 4.240139961242676, "learning_rate": 1.8747838305407967e-06, "loss": 1.0878, "step": 58570 }, { "epoch": 17.52, "grad_norm": 3.321091890335083, "learning_rate": 1.872552206043121e-06, "loss": 0.9013, "step": 58575 }, { "epoch": 17.53, "grad_norm": 2.4288010597229004, "learning_rate": 1.8703218588503463e-06, "loss": 0.8679, "step": 58580 }, { "epoch": 17.53, "grad_norm": 3.094264507293701, "learning_rate": 1.8680927890856515e-06, "loss": 1.1084, "step": 58585 }, { "epoch": 17.53, "grad_norm": 2.697632312774658, "learning_rate": 1.8658649968721492e-06, "loss": 1.087, "step": 58590 }, { "epoch": 17.53, "grad_norm": 1.5430232286453247, "learning_rate": 1.8636384823328773e-06, "loss": 0.7268, "step": 58595 }, { "epoch": 17.53, "grad_norm": 3.243159532546997, "learning_rate": 1.861413245590804e-06, "loss": 0.9764, "step": 58600 }, { "epoch": 17.53, "grad_norm": 2.758650541305542, "learning_rate": 1.8591892867688226e-06, "loss": 1.088, "step": 58605 }, { "epoch": 17.54, "grad_norm": 3.265185594558716, "learning_rate": 1.8569666059897655e-06, "loss": 0.9399, "step": 58610 }, { "epoch": 17.54, "grad_norm": 2.0844991207122803, "learning_rate": 1.8547452033763873e-06, "loss": 1.0002, "step": 58615 }, { "epoch": 17.54, "grad_norm": 3.4934003353118896, "learning_rate": 1.852525079051376e-06, "loss": 0.8137, "step": 58620 }, { "epoch": 17.54, "grad_norm": 3.0585551261901855, "learning_rate": 1.8503062331373362e-06, "loss": 0.9706, "step": 58625 }, { "epoch": 17.54, "grad_norm": 2.177954912185669, "learning_rate": 1.8480886657568307e-06, "loss": 0.9228, "step": 58630 }, { "epoch": 17.54, "grad_norm": 8.818868637084961, "learning_rate": 1.8458723770323122e-06, "loss": 0.8919, "step": 58635 }, { "epoch": 17.54, "grad_norm": 4.750818252563477, "learning_rate": 1.8436573670862045e-06, "loss": 0.9735, "step": 58640 }, { "epoch": 17.55, "grad_norm": 4.335287094116211, "learning_rate": 1.8414436360408265e-06, "loss": 0.9834, "step": 58645 }, { "epoch": 17.55, "grad_norm": 3.9974465370178223, "learning_rate": 1.8392311840184445e-06, "loss": 0.9222, "step": 58650 }, { "epoch": 17.55, "grad_norm": 2.57463002204895, "learning_rate": 1.8370200111412495e-06, "loss": 1.0656, "step": 58655 }, { "epoch": 17.55, "grad_norm": 3.5472350120544434, "learning_rate": 1.8348101175313632e-06, "loss": 0.8107, "step": 58660 }, { "epoch": 17.55, "grad_norm": 2.0845704078674316, "learning_rate": 1.8326015033108351e-06, "loss": 0.9723, "step": 58665 }, { "epoch": 17.55, "grad_norm": 8.588332176208496, "learning_rate": 1.8303941686016402e-06, "loss": 1.0629, "step": 58670 }, { "epoch": 17.55, "grad_norm": 2.690653085708618, "learning_rate": 1.8281881135257e-06, "loss": 0.9891, "step": 58675 }, { "epoch": 17.56, "grad_norm": 1.5229490995407104, "learning_rate": 1.8259833382048425e-06, "loss": 0.9973, "step": 58680 }, { "epoch": 17.56, "grad_norm": 1.4649319648742676, "learning_rate": 1.8237798427608337e-06, "loss": 0.9495, "step": 58685 }, { "epoch": 17.56, "grad_norm": 4.790569305419922, "learning_rate": 1.8215776273153767e-06, "loss": 0.8056, "step": 58690 }, { "epoch": 17.56, "grad_norm": 5.313488960266113, "learning_rate": 1.8193766919900934e-06, "loss": 0.9817, "step": 58695 }, { "epoch": 17.56, "grad_norm": 1.8241521120071411, "learning_rate": 1.8171770369065422e-06, "loss": 0.9368, "step": 58700 }, { "epoch": 17.56, "grad_norm": 2.6710360050201416, "learning_rate": 1.8149786621862036e-06, "loss": 0.9306, "step": 58705 }, { "epoch": 17.57, "grad_norm": 3.22537899017334, "learning_rate": 1.8127815679504972e-06, "loss": 0.9387, "step": 58710 }, { "epoch": 17.57, "grad_norm": 1.9393733739852905, "learning_rate": 1.8105857543207649e-06, "loss": 1.0874, "step": 58715 }, { "epoch": 17.57, "grad_norm": 4.174792289733887, "learning_rate": 1.808391221418268e-06, "loss": 1.0662, "step": 58720 }, { "epoch": 17.57, "grad_norm": 4.3584513664245605, "learning_rate": 1.8061979693642262e-06, "loss": 0.9845, "step": 58725 }, { "epoch": 17.57, "grad_norm": 2.3744728565216064, "learning_rate": 1.8040059982797563e-06, "loss": 0.9638, "step": 58730 }, { "epoch": 17.57, "grad_norm": 3.663601875305176, "learning_rate": 1.8018153082859313e-06, "loss": 0.9749, "step": 58735 }, { "epoch": 17.57, "grad_norm": 2.476334810256958, "learning_rate": 1.7996258995037291e-06, "loss": 0.8683, "step": 58740 }, { "epoch": 17.58, "grad_norm": 2.907877206802368, "learning_rate": 1.797437772054067e-06, "loss": 0.8443, "step": 58745 }, { "epoch": 17.58, "grad_norm": 2.9625954627990723, "learning_rate": 1.7952509260578092e-06, "loss": 0.942, "step": 58750 }, { "epoch": 17.58, "grad_norm": 3.8954741954803467, "learning_rate": 1.7930653616357152e-06, "loss": 0.956, "step": 58755 }, { "epoch": 17.58, "grad_norm": 3.0243756771087646, "learning_rate": 1.7908810789085073e-06, "loss": 1.2546, "step": 58760 }, { "epoch": 17.58, "grad_norm": 1.2879854440689087, "learning_rate": 1.7886980779968032e-06, "loss": 0.9019, "step": 58765 }, { "epoch": 17.58, "grad_norm": 5.267662048339844, "learning_rate": 1.786516359021187e-06, "loss": 0.8835, "step": 58770 }, { "epoch": 17.58, "grad_norm": 1.7943377494812012, "learning_rate": 1.7843359221021372e-06, "loss": 0.9029, "step": 58775 }, { "epoch": 17.59, "grad_norm": 5.033475399017334, "learning_rate": 1.7821567673600854e-06, "loss": 0.8688, "step": 58780 }, { "epoch": 17.59, "grad_norm": 3.698159694671631, "learning_rate": 1.7799788949153796e-06, "loss": 1.0448, "step": 58785 }, { "epoch": 17.59, "grad_norm": 2.3559699058532715, "learning_rate": 1.777802304888304e-06, "loss": 0.7721, "step": 58790 }, { "epoch": 17.59, "grad_norm": 3.055173873901367, "learning_rate": 1.7756269973990713e-06, "loss": 0.8932, "step": 58795 }, { "epoch": 17.59, "grad_norm": 2.6839029788970947, "learning_rate": 1.7734529725678184e-06, "loss": 1.1173, "step": 58800 }, { "epoch": 17.59, "grad_norm": 3.986969470977783, "learning_rate": 1.7712802305146131e-06, "loss": 0.8831, "step": 58805 }, { "epoch": 17.6, "grad_norm": 3.205456018447876, "learning_rate": 1.7691087713594595e-06, "loss": 0.91, "step": 58810 }, { "epoch": 17.6, "grad_norm": 3.0267038345336914, "learning_rate": 1.7669385952222755e-06, "loss": 0.9393, "step": 58815 }, { "epoch": 17.6, "grad_norm": 3.005690336227417, "learning_rate": 1.7647697022229321e-06, "loss": 0.8653, "step": 58820 }, { "epoch": 17.6, "grad_norm": 3.8134701251983643, "learning_rate": 1.762602092481197e-06, "loss": 1.0195, "step": 58825 }, { "epoch": 17.6, "grad_norm": 5.009188175201416, "learning_rate": 1.760435766116797e-06, "loss": 0.9841, "step": 58830 }, { "epoch": 17.6, "grad_norm": 2.0699079036712646, "learning_rate": 1.758270723249375e-06, "loss": 0.953, "step": 58835 }, { "epoch": 17.6, "grad_norm": 2.2950937747955322, "learning_rate": 1.7561069639985023e-06, "loss": 1.0804, "step": 58840 }, { "epoch": 17.61, "grad_norm": 1.1012648344039917, "learning_rate": 1.753944488483686e-06, "loss": 0.9962, "step": 58845 }, { "epoch": 17.61, "grad_norm": 3.6749930381774902, "learning_rate": 1.7517832968243442e-06, "loss": 1.0799, "step": 58850 }, { "epoch": 17.61, "grad_norm": 3.0792107582092285, "learning_rate": 1.749623389139851e-06, "loss": 0.8246, "step": 58855 }, { "epoch": 17.61, "grad_norm": 5.0220046043396, "learning_rate": 1.7474647655494863e-06, "loss": 0.8415, "step": 58860 }, { "epoch": 17.61, "grad_norm": 3.235442876815796, "learning_rate": 1.7453074261724795e-06, "loss": 0.944, "step": 58865 }, { "epoch": 17.61, "grad_norm": 1.411855936050415, "learning_rate": 1.7431513711279685e-06, "loss": 1.0868, "step": 58870 }, { "epoch": 17.61, "grad_norm": 1.5485836267471313, "learning_rate": 1.7409966005350331e-06, "loss": 0.8386, "step": 58875 }, { "epoch": 17.62, "grad_norm": 2.2840311527252197, "learning_rate": 1.7388431145126782e-06, "loss": 1.0442, "step": 58880 }, { "epoch": 17.62, "grad_norm": 2.2418808937072754, "learning_rate": 1.7366909131798393e-06, "loss": 0.9909, "step": 58885 }, { "epoch": 17.62, "grad_norm": 2.9602952003479004, "learning_rate": 1.7345399966553821e-06, "loss": 1.0754, "step": 58890 }, { "epoch": 17.62, "grad_norm": 3.5335211753845215, "learning_rate": 1.7323903650580953e-06, "loss": 0.9721, "step": 58895 }, { "epoch": 17.62, "grad_norm": 3.265909194946289, "learning_rate": 1.7302420185067058e-06, "loss": 0.7851, "step": 58900 }, { "epoch": 17.62, "grad_norm": 1.4548457860946655, "learning_rate": 1.7280949571198607e-06, "loss": 1.0308, "step": 58905 }, { "epoch": 17.63, "grad_norm": 2.820035457611084, "learning_rate": 1.7259491810161399e-06, "loss": 1.0166, "step": 58910 }, { "epoch": 17.63, "grad_norm": 3.4491255283355713, "learning_rate": 1.7238046903140542e-06, "loss": 0.9413, "step": 58915 }, { "epoch": 17.63, "grad_norm": 1.849202275276184, "learning_rate": 1.7216614851320451e-06, "loss": 1.1075, "step": 58920 }, { "epoch": 17.63, "grad_norm": 2.7424187660217285, "learning_rate": 1.7195195655884706e-06, "loss": 1.1669, "step": 58925 }, { "epoch": 17.63, "grad_norm": 2.260991096496582, "learning_rate": 1.717378931801636e-06, "loss": 1.0069, "step": 58930 }, { "epoch": 17.63, "grad_norm": 3.9417827129364014, "learning_rate": 1.7152395838897605e-06, "loss": 0.9019, "step": 58935 }, { "epoch": 17.63, "grad_norm": 3.6205639839172363, "learning_rate": 1.7131015219710029e-06, "loss": 0.974, "step": 58940 }, { "epoch": 17.64, "grad_norm": 2.234180450439453, "learning_rate": 1.7109647461634348e-06, "loss": 0.9879, "step": 58945 }, { "epoch": 17.64, "grad_norm": 3.515759229660034, "learning_rate": 1.7088292565850845e-06, "loss": 1.0616, "step": 58950 }, { "epoch": 17.64, "grad_norm": 1.790371060371399, "learning_rate": 1.7066950533538767e-06, "loss": 0.8907, "step": 58955 }, { "epoch": 17.64, "grad_norm": 1.755277395248413, "learning_rate": 1.7045621365876952e-06, "loss": 0.9207, "step": 58960 }, { "epoch": 17.64, "grad_norm": 1.3836179971694946, "learning_rate": 1.702430506404329e-06, "loss": 1.1258, "step": 58965 }, { "epoch": 17.64, "grad_norm": 2.935575246810913, "learning_rate": 1.7003001629215143e-06, "loss": 0.9072, "step": 58970 }, { "epoch": 17.64, "grad_norm": 2.9841513633728027, "learning_rate": 1.6981711062568989e-06, "loss": 0.9736, "step": 58975 }, { "epoch": 17.65, "grad_norm": 8.933850288391113, "learning_rate": 1.696043336528072e-06, "loss": 0.9844, "step": 58980 }, { "epoch": 17.65, "grad_norm": 1.3362852334976196, "learning_rate": 1.6939168538525508e-06, "loss": 1.0783, "step": 58985 }, { "epoch": 17.65, "grad_norm": 3.199964761734009, "learning_rate": 1.6917916583477744e-06, "loss": 0.8654, "step": 58990 }, { "epoch": 17.65, "grad_norm": 2.5024354457855225, "learning_rate": 1.6896677501311186e-06, "loss": 0.9169, "step": 58995 }, { "epoch": 17.65, "grad_norm": 3.7208096981048584, "learning_rate": 1.6875451293198812e-06, "loss": 0.945, "step": 59000 }, { "epoch": 17.65, "grad_norm": 6.412079811096191, "learning_rate": 1.6854237960312963e-06, "loss": 1.0159, "step": 59005 }, { "epoch": 17.66, "grad_norm": 6.456012725830078, "learning_rate": 1.68330375038252e-06, "loss": 0.9998, "step": 59010 }, { "epoch": 17.66, "grad_norm": 1.0914913415908813, "learning_rate": 1.681184992490642e-06, "loss": 0.9272, "step": 59015 }, { "epoch": 17.66, "grad_norm": 1.7044358253479004, "learning_rate": 1.6790675224726798e-06, "loss": 1.0046, "step": 59020 }, { "epoch": 17.66, "grad_norm": 3.6310768127441406, "learning_rate": 1.6769513404455733e-06, "loss": 1.0404, "step": 59025 }, { "epoch": 17.66, "grad_norm": 1.748637080192566, "learning_rate": 1.6748364465262039e-06, "loss": 0.8425, "step": 59030 }, { "epoch": 17.66, "grad_norm": 3.776822090148926, "learning_rate": 1.6727228408313782e-06, "loss": 1.1857, "step": 59035 }, { "epoch": 17.66, "grad_norm": 2.349539279937744, "learning_rate": 1.6706105234778114e-06, "loss": 0.987, "step": 59040 }, { "epoch": 17.67, "grad_norm": 8.603898048400879, "learning_rate": 1.6684994945821847e-06, "loss": 0.8815, "step": 59045 }, { "epoch": 17.67, "grad_norm": 3.434723377227783, "learning_rate": 1.6663897542610718e-06, "loss": 0.9244, "step": 59050 }, { "epoch": 17.67, "grad_norm": 3.597698211669922, "learning_rate": 1.6642813026310072e-06, "loss": 0.7415, "step": 59055 }, { "epoch": 17.67, "grad_norm": 4.349695205688477, "learning_rate": 1.6621741398084201e-06, "loss": 0.9339, "step": 59060 }, { "epoch": 17.67, "grad_norm": 1.6020125150680542, "learning_rate": 1.6600682659097062e-06, "loss": 0.9233, "step": 59065 }, { "epoch": 17.67, "grad_norm": 3.7563843727111816, "learning_rate": 1.6579636810511584e-06, "loss": 1.053, "step": 59070 }, { "epoch": 17.67, "grad_norm": 1.2841134071350098, "learning_rate": 1.6558603853490145e-06, "loss": 0.9631, "step": 59075 }, { "epoch": 17.68, "grad_norm": 2.884526014328003, "learning_rate": 1.6537583789194345e-06, "loss": 0.7626, "step": 59080 }, { "epoch": 17.68, "grad_norm": 1.4702849388122559, "learning_rate": 1.651657661878514e-06, "loss": 0.8606, "step": 59085 }, { "epoch": 17.68, "grad_norm": 3.296031951904297, "learning_rate": 1.6495582343422688e-06, "loss": 0.9722, "step": 59090 }, { "epoch": 17.68, "grad_norm": 2.626152753829956, "learning_rate": 1.6474600964266534e-06, "loss": 0.8891, "step": 59095 }, { "epoch": 17.68, "grad_norm": 6.174269676208496, "learning_rate": 1.6453632482475418e-06, "loss": 1.2104, "step": 59100 }, { "epoch": 17.68, "grad_norm": 2.552041530609131, "learning_rate": 1.6432676899207439e-06, "loss": 1.1366, "step": 59105 }, { "epoch": 17.69, "grad_norm": 2.2161214351654053, "learning_rate": 1.6411734215619923e-06, "loss": 0.8592, "step": 59110 }, { "epoch": 17.69, "grad_norm": 3.290187120437622, "learning_rate": 1.6390804432869527e-06, "loss": 1.1547, "step": 59115 }, { "epoch": 17.69, "grad_norm": 4.858699798583984, "learning_rate": 1.6369887552112163e-06, "loss": 0.7281, "step": 59120 }, { "epoch": 17.69, "grad_norm": 3.530651569366455, "learning_rate": 1.6348983574503097e-06, "loss": 0.983, "step": 59125 }, { "epoch": 17.69, "grad_norm": 3.629481077194214, "learning_rate": 1.6328092501196823e-06, "loss": 0.8718, "step": 59130 }, { "epoch": 17.69, "grad_norm": 1.404110074043274, "learning_rate": 1.6307214333347032e-06, "loss": 0.9193, "step": 59135 }, { "epoch": 17.69, "grad_norm": 2.185800313949585, "learning_rate": 1.6286349072106965e-06, "loss": 0.8672, "step": 59140 }, { "epoch": 17.7, "grad_norm": 3.7186684608459473, "learning_rate": 1.6265496718628815e-06, "loss": 0.9765, "step": 59145 }, { "epoch": 17.7, "grad_norm": 4.324354648590088, "learning_rate": 1.6244657274064407e-06, "loss": 0.9801, "step": 59150 }, { "epoch": 17.7, "grad_norm": 3.0012760162353516, "learning_rate": 1.6223830739564544e-06, "loss": 0.9174, "step": 59155 }, { "epoch": 17.7, "grad_norm": 2.157304286956787, "learning_rate": 1.6203017116279584e-06, "loss": 0.9751, "step": 59160 }, { "epoch": 17.7, "grad_norm": 1.090956449508667, "learning_rate": 1.618221640535894e-06, "loss": 0.891, "step": 59165 }, { "epoch": 17.7, "grad_norm": 3.6914825439453125, "learning_rate": 1.6161428607951417e-06, "loss": 1.0343, "step": 59170 }, { "epoch": 17.7, "grad_norm": 3.7854952812194824, "learning_rate": 1.6140653725205152e-06, "loss": 1.0016, "step": 59175 }, { "epoch": 17.71, "grad_norm": 4.192695617675781, "learning_rate": 1.6119891758267503e-06, "loss": 1.0018, "step": 59180 }, { "epoch": 17.71, "grad_norm": 2.511425256729126, "learning_rate": 1.6099142708285108e-06, "loss": 0.9797, "step": 59185 }, { "epoch": 17.71, "grad_norm": 2.2904865741729736, "learning_rate": 1.6078406576403943e-06, "loss": 0.8575, "step": 59190 }, { "epoch": 17.71, "grad_norm": 2.1356563568115234, "learning_rate": 1.6057683363769227e-06, "loss": 1.042, "step": 59195 }, { "epoch": 17.71, "grad_norm": 2.6813127994537354, "learning_rate": 1.6036973071525518e-06, "loss": 0.6984, "step": 59200 }, { "epoch": 17.71, "grad_norm": 2.049669027328491, "learning_rate": 1.6016275700816568e-06, "loss": 1.1138, "step": 59205 }, { "epoch": 17.71, "grad_norm": 1.8621686697006226, "learning_rate": 1.599559125278549e-06, "loss": 1.0503, "step": 59210 }, { "epoch": 17.72, "grad_norm": 2.028589963912964, "learning_rate": 1.5974919728574706e-06, "loss": 0.7726, "step": 59215 }, { "epoch": 17.72, "grad_norm": 6.469555377960205, "learning_rate": 1.5954261129325825e-06, "loss": 1.0135, "step": 59220 }, { "epoch": 17.72, "grad_norm": 3.396925449371338, "learning_rate": 1.5933615456179885e-06, "loss": 1.0272, "step": 59225 }, { "epoch": 17.72, "grad_norm": 3.9737088680267334, "learning_rate": 1.5912982710276968e-06, "loss": 0.975, "step": 59230 }, { "epoch": 17.72, "grad_norm": 3.608037233352661, "learning_rate": 1.5892362892756779e-06, "loss": 0.9554, "step": 59235 }, { "epoch": 17.72, "grad_norm": 1.3548368215560913, "learning_rate": 1.587175600475796e-06, "loss": 0.9917, "step": 59240 }, { "epoch": 17.73, "grad_norm": 5.358870983123779, "learning_rate": 1.5851162047418793e-06, "loss": 1.0518, "step": 59245 }, { "epoch": 17.73, "grad_norm": 1.2182296514511108, "learning_rate": 1.5830581021876455e-06, "loss": 1.0005, "step": 59250 }, { "epoch": 17.73, "grad_norm": 2.458919048309326, "learning_rate": 1.5810012929267815e-06, "loss": 1.116, "step": 59255 }, { "epoch": 17.73, "grad_norm": 2.0615477561950684, "learning_rate": 1.578945777072871e-06, "loss": 0.8852, "step": 59260 }, { "epoch": 17.73, "grad_norm": 3.5121617317199707, "learning_rate": 1.5768915547394375e-06, "loss": 1.0029, "step": 59265 }, { "epoch": 17.73, "grad_norm": 2.439354658126831, "learning_rate": 1.57483862603994e-06, "loss": 1.0474, "step": 59270 }, { "epoch": 17.73, "grad_norm": 1.8715204000473022, "learning_rate": 1.5727869910877547e-06, "loss": 1.1325, "step": 59275 }, { "epoch": 17.74, "grad_norm": 1.4641121625900269, "learning_rate": 1.5707366499961907e-06, "loss": 0.8829, "step": 59280 }, { "epoch": 17.74, "grad_norm": 2.544196367263794, "learning_rate": 1.5686876028784907e-06, "loss": 1.0373, "step": 59285 }, { "epoch": 17.74, "grad_norm": 2.298362970352173, "learning_rate": 1.5666398498478175e-06, "loss": 1.0759, "step": 59290 }, { "epoch": 17.74, "grad_norm": 3.0841598510742188, "learning_rate": 1.564593391017266e-06, "loss": 0.9647, "step": 59295 }, { "epoch": 17.74, "grad_norm": 4.1196722984313965, "learning_rate": 1.5625482264998658e-06, "loss": 0.8123, "step": 59300 }, { "epoch": 17.74, "grad_norm": 1.699238657951355, "learning_rate": 1.5605043564085626e-06, "loss": 1.1025, "step": 59305 }, { "epoch": 17.74, "grad_norm": 3.654175043106079, "learning_rate": 1.5584617808562407e-06, "loss": 0.9208, "step": 59310 }, { "epoch": 17.75, "grad_norm": 1.3798152208328247, "learning_rate": 1.5564204999557075e-06, "loss": 0.9044, "step": 59315 }, { "epoch": 17.75, "grad_norm": 3.55618953704834, "learning_rate": 1.554380513819706e-06, "loss": 1.0186, "step": 59320 }, { "epoch": 17.75, "grad_norm": 3.153846263885498, "learning_rate": 1.5523418225608904e-06, "loss": 0.9396, "step": 59325 }, { "epoch": 17.75, "grad_norm": 2.9710476398468018, "learning_rate": 1.5503044262918738e-06, "loss": 0.8835, "step": 59330 }, { "epoch": 17.75, "grad_norm": 3.084639072418213, "learning_rate": 1.5482683251251572e-06, "loss": 0.8919, "step": 59335 }, { "epoch": 17.75, "grad_norm": 2.0667994022369385, "learning_rate": 1.5462335191732153e-06, "loss": 1.0935, "step": 59340 }, { "epoch": 17.76, "grad_norm": 2.048433303833008, "learning_rate": 1.5442000085484077e-06, "loss": 1.0158, "step": 59345 }, { "epoch": 17.76, "grad_norm": 3.5328614711761475, "learning_rate": 1.5421677933630612e-06, "loss": 0.923, "step": 59350 }, { "epoch": 17.76, "grad_norm": 1.7786338329315186, "learning_rate": 1.5401368737294003e-06, "loss": 0.9076, "step": 59355 }, { "epoch": 17.76, "grad_norm": 2.726869583129883, "learning_rate": 1.5381072497595961e-06, "loss": 0.9152, "step": 59360 }, { "epoch": 17.76, "grad_norm": 3.708962917327881, "learning_rate": 1.5360789215657395e-06, "loss": 0.9758, "step": 59365 }, { "epoch": 17.76, "grad_norm": 4.803073406219482, "learning_rate": 1.5340518892598548e-06, "loss": 1.1745, "step": 59370 }, { "epoch": 17.76, "grad_norm": 2.451441764831543, "learning_rate": 1.5320261529538943e-06, "loss": 0.9447, "step": 59375 }, { "epoch": 17.77, "grad_norm": 6.812480449676514, "learning_rate": 1.530001712759735e-06, "loss": 0.9028, "step": 59380 }, { "epoch": 17.77, "grad_norm": 6.00867223739624, "learning_rate": 1.5279785687891846e-06, "loss": 0.8863, "step": 59385 }, { "epoch": 17.77, "grad_norm": 3.013056516647339, "learning_rate": 1.5259567211539788e-06, "loss": 0.9852, "step": 59390 }, { "epoch": 17.77, "grad_norm": 1.796088457107544, "learning_rate": 1.5239361699657867e-06, "loss": 0.9819, "step": 59395 }, { "epoch": 17.77, "grad_norm": 4.895589351654053, "learning_rate": 1.5219169153361967e-06, "loss": 0.9859, "step": 59400 }, { "epoch": 17.77, "grad_norm": 1.5124635696411133, "learning_rate": 1.5198989573767308e-06, "loss": 0.9221, "step": 59405 }, { "epoch": 17.77, "grad_norm": 2.5901122093200684, "learning_rate": 1.5178822961988387e-06, "loss": 0.8347, "step": 59410 }, { "epoch": 17.78, "grad_norm": 5.9025397300720215, "learning_rate": 1.5158669319139007e-06, "loss": 0.9824, "step": 59415 }, { "epoch": 17.78, "grad_norm": 2.827467918395996, "learning_rate": 1.5138528646332195e-06, "loss": 1.1154, "step": 59420 }, { "epoch": 17.78, "grad_norm": 2.0498061180114746, "learning_rate": 1.5118400944680366e-06, "loss": 0.9927, "step": 59425 }, { "epoch": 17.78, "grad_norm": 1.4182449579238892, "learning_rate": 1.509828621529505e-06, "loss": 0.8675, "step": 59430 }, { "epoch": 17.78, "grad_norm": 4.468570232391357, "learning_rate": 1.507818445928727e-06, "loss": 0.9225, "step": 59435 }, { "epoch": 17.78, "grad_norm": 2.804314374923706, "learning_rate": 1.5058095677767113e-06, "loss": 1.0115, "step": 59440 }, { "epoch": 17.79, "grad_norm": 3.139026641845703, "learning_rate": 1.5038019871844222e-06, "loss": 0.9551, "step": 59445 }, { "epoch": 17.79, "grad_norm": 2.992780923843384, "learning_rate": 1.501795704262718e-06, "loss": 1.0013, "step": 59450 }, { "epoch": 17.79, "grad_norm": 4.901608943939209, "learning_rate": 1.4997907191224152e-06, "loss": 1.04, "step": 59455 }, { "epoch": 17.79, "grad_norm": 2.3052425384521484, "learning_rate": 1.4977870318742426e-06, "loss": 0.9111, "step": 59460 }, { "epoch": 17.79, "grad_norm": 3.878119707107544, "learning_rate": 1.495784642628864e-06, "loss": 1.0182, "step": 59465 }, { "epoch": 17.79, "grad_norm": 3.6295268535614014, "learning_rate": 1.4937835514968663e-06, "loss": 0.9679, "step": 59470 }, { "epoch": 17.79, "grad_norm": 3.5464370250701904, "learning_rate": 1.491783758588769e-06, "loss": 1.0276, "step": 59475 }, { "epoch": 17.8, "grad_norm": 5.657863140106201, "learning_rate": 1.4897852640150228e-06, "loss": 1.0774, "step": 59480 }, { "epoch": 17.8, "grad_norm": 1.0737026929855347, "learning_rate": 1.487788067885998e-06, "loss": 0.9233, "step": 59485 }, { "epoch": 17.8, "grad_norm": 10.904875755310059, "learning_rate": 1.4857921703119976e-06, "loss": 0.7518, "step": 59490 }, { "epoch": 17.8, "grad_norm": 2.129427909851074, "learning_rate": 1.483797571403256e-06, "loss": 0.9003, "step": 59495 }, { "epoch": 17.8, "grad_norm": 2.353217601776123, "learning_rate": 1.4818042712699294e-06, "loss": 1.0227, "step": 59500 }, { "epoch": 17.8, "grad_norm": 2.1998493671417236, "learning_rate": 1.4798122700221074e-06, "loss": 1.0736, "step": 59505 }, { "epoch": 17.8, "grad_norm": 5.145125865936279, "learning_rate": 1.4778215677698048e-06, "loss": 0.916, "step": 59510 }, { "epoch": 17.81, "grad_norm": 1.4517490863800049, "learning_rate": 1.4758321646229672e-06, "loss": 0.8587, "step": 59515 }, { "epoch": 17.81, "grad_norm": 1.5610160827636719, "learning_rate": 1.4738440606914706e-06, "loss": 1.056, "step": 59520 }, { "epoch": 17.81, "grad_norm": 7.770084857940674, "learning_rate": 1.4718572560851073e-06, "loss": 0.8253, "step": 59525 }, { "epoch": 17.81, "grad_norm": 2.7066264152526855, "learning_rate": 1.469871750913618e-06, "loss": 1.0866, "step": 59530 }, { "epoch": 17.81, "grad_norm": 4.087286949157715, "learning_rate": 1.4678875452866448e-06, "loss": 1.0194, "step": 59535 }, { "epoch": 17.81, "grad_norm": 1.208440899848938, "learning_rate": 1.4659046393137893e-06, "loss": 0.9872, "step": 59540 }, { "epoch": 17.82, "grad_norm": 3.085111141204834, "learning_rate": 1.4639230331045528e-06, "loss": 0.8561, "step": 59545 }, { "epoch": 17.82, "grad_norm": 1.8525588512420654, "learning_rate": 1.4619427267683894e-06, "loss": 0.8675, "step": 59550 }, { "epoch": 17.82, "grad_norm": 3.9756102561950684, "learning_rate": 1.4599637204146587e-06, "loss": 0.8729, "step": 59555 }, { "epoch": 17.82, "grad_norm": 3.060441017150879, "learning_rate": 1.4579860141526624e-06, "loss": 0.9836, "step": 59560 }, { "epoch": 17.82, "grad_norm": 2.611800193786621, "learning_rate": 1.4560096080916325e-06, "loss": 0.9945, "step": 59565 }, { "epoch": 17.82, "grad_norm": 3.2961905002593994, "learning_rate": 1.454034502340712e-06, "loss": 0.9673, "step": 59570 }, { "epoch": 17.82, "grad_norm": 3.6914517879486084, "learning_rate": 1.452060697009e-06, "loss": 0.9933, "step": 59575 }, { "epoch": 17.83, "grad_norm": 1.3202792406082153, "learning_rate": 1.4500881922054926e-06, "loss": 1.0733, "step": 59580 }, { "epoch": 17.83, "grad_norm": 3.6315410137176514, "learning_rate": 1.4481169880391388e-06, "loss": 1.0638, "step": 59585 }, { "epoch": 17.83, "grad_norm": 3.1422181129455566, "learning_rate": 1.4461470846188013e-06, "loss": 1.0285, "step": 59590 }, { "epoch": 17.83, "grad_norm": 2.8239080905914307, "learning_rate": 1.4441784820532766e-06, "loss": 0.9607, "step": 59595 }, { "epoch": 17.83, "grad_norm": 2.704333782196045, "learning_rate": 1.4422111804512916e-06, "loss": 0.9963, "step": 59600 }, { "epoch": 17.83, "grad_norm": 2.927246332168579, "learning_rate": 1.4402451799214927e-06, "loss": 1.0692, "step": 59605 }, { "epoch": 17.83, "grad_norm": 3.1765265464782715, "learning_rate": 1.438280480572468e-06, "loss": 0.8938, "step": 59610 }, { "epoch": 17.84, "grad_norm": 4.268390655517578, "learning_rate": 1.4363170825127226e-06, "loss": 1.0705, "step": 59615 }, { "epoch": 17.84, "grad_norm": 16.1699275970459, "learning_rate": 1.4343549858506833e-06, "loss": 0.9742, "step": 59620 }, { "epoch": 17.84, "grad_norm": 2.6375677585601807, "learning_rate": 1.432394190694733e-06, "loss": 0.9267, "step": 59625 }, { "epoch": 17.84, "grad_norm": 1.9000338315963745, "learning_rate": 1.4304346971531434e-06, "loss": 0.994, "step": 59630 }, { "epoch": 17.84, "grad_norm": 3.127405881881714, "learning_rate": 1.4284765053341532e-06, "loss": 1.0222, "step": 59635 }, { "epoch": 17.84, "grad_norm": 2.3889312744140625, "learning_rate": 1.4265196153459032e-06, "loss": 0.7955, "step": 59640 }, { "epoch": 17.85, "grad_norm": 2.0697555541992188, "learning_rate": 1.424564027296471e-06, "loss": 1.0505, "step": 59645 }, { "epoch": 17.85, "grad_norm": 3.0797669887542725, "learning_rate": 1.4226097412938677e-06, "loss": 0.9022, "step": 59650 }, { "epoch": 17.85, "grad_norm": 3.074493885040283, "learning_rate": 1.4206567574460121e-06, "loss": 0.9206, "step": 59655 }, { "epoch": 17.85, "grad_norm": 2.3382043838500977, "learning_rate": 1.4187050758607824e-06, "loss": 1.0961, "step": 59660 }, { "epoch": 17.85, "grad_norm": 4.023479461669922, "learning_rate": 1.4167546966459527e-06, "loss": 0.9317, "step": 59665 }, { "epoch": 17.85, "grad_norm": 1.3876618146896362, "learning_rate": 1.415195331053179e-06, "loss": 1.0919, "step": 59670 }, { "epoch": 17.85, "grad_norm": 2.543372869491577, "learning_rate": 1.4132472963764826e-06, "loss": 0.8302, "step": 59675 }, { "epoch": 17.86, "grad_norm": 3.604238271713257, "learning_rate": 1.411300564371626e-06, "loss": 1.0208, "step": 59680 }, { "epoch": 17.86, "grad_norm": 3.2941768169403076, "learning_rate": 1.4093551351461149e-06, "loss": 1.0286, "step": 59685 }, { "epoch": 17.86, "grad_norm": 3.470616579055786, "learning_rate": 1.407411008807405e-06, "loss": 1.003, "step": 59690 }, { "epoch": 17.86, "grad_norm": 4.124062538146973, "learning_rate": 1.4054681854628548e-06, "loss": 0.9581, "step": 59695 }, { "epoch": 17.86, "grad_norm": 2.6687521934509277, "learning_rate": 1.4035266652197786e-06, "loss": 0.9555, "step": 59700 }, { "epoch": 17.86, "grad_norm": 6.374357223510742, "learning_rate": 1.4015864481853963e-06, "loss": 0.9077, "step": 59705 }, { "epoch": 17.86, "grad_norm": 5.034076690673828, "learning_rate": 1.3996475344668636e-06, "loss": 0.9172, "step": 59710 }, { "epoch": 17.87, "grad_norm": 5.536380767822266, "learning_rate": 1.3977099241712643e-06, "loss": 1.0173, "step": 59715 }, { "epoch": 17.87, "grad_norm": 2.3674838542938232, "learning_rate": 1.3957736174056157e-06, "loss": 1.0644, "step": 59720 }, { "epoch": 17.87, "grad_norm": 6.831422805786133, "learning_rate": 1.3938386142768517e-06, "loss": 1.0125, "step": 59725 }, { "epoch": 17.87, "grad_norm": 2.7988386154174805, "learning_rate": 1.3919049148918478e-06, "loss": 0.9512, "step": 59730 }, { "epoch": 17.87, "grad_norm": 1.8349326848983765, "learning_rate": 1.3899725193573937e-06, "loss": 0.8805, "step": 59735 }, { "epoch": 17.87, "grad_norm": 4.807056903839111, "learning_rate": 1.3880414277802178e-06, "loss": 0.9358, "step": 59740 }, { "epoch": 17.88, "grad_norm": 1.473981261253357, "learning_rate": 1.3861116402669683e-06, "loss": 1.0964, "step": 59745 }, { "epoch": 17.88, "grad_norm": 1.3169115781784058, "learning_rate": 1.384183156924229e-06, "loss": 1.0681, "step": 59750 }, { "epoch": 17.88, "grad_norm": 3.194916009902954, "learning_rate": 1.382255977858507e-06, "loss": 1.1128, "step": 59755 }, { "epoch": 17.88, "grad_norm": 3.641432285308838, "learning_rate": 1.3803301031762362e-06, "loss": 0.9769, "step": 59760 }, { "epoch": 17.88, "grad_norm": 1.8899619579315186, "learning_rate": 1.378405532983787e-06, "loss": 1.1635, "step": 59765 }, { "epoch": 17.88, "grad_norm": 2.5514962673187256, "learning_rate": 1.3764822673874411e-06, "loss": 1.0953, "step": 59770 }, { "epoch": 17.88, "grad_norm": 2.5154950618743896, "learning_rate": 1.3745603064934303e-06, "loss": 0.8913, "step": 59775 }, { "epoch": 17.89, "grad_norm": 2.303077220916748, "learning_rate": 1.3726396504078892e-06, "loss": 0.865, "step": 59780 }, { "epoch": 17.89, "grad_norm": 4.337673664093018, "learning_rate": 1.3707202992369078e-06, "loss": 0.8839, "step": 59785 }, { "epoch": 17.89, "grad_norm": 3.780333995819092, "learning_rate": 1.3688022530864763e-06, "loss": 0.8552, "step": 59790 }, { "epoch": 17.89, "grad_norm": 1.5258413553237915, "learning_rate": 1.3668855120625406e-06, "loss": 0.8518, "step": 59795 }, { "epoch": 17.89, "grad_norm": 2.358565330505371, "learning_rate": 1.3649700762709495e-06, "loss": 0.954, "step": 59800 }, { "epoch": 17.89, "grad_norm": 1.6710665225982666, "learning_rate": 1.363055945817493e-06, "loss": 1.056, "step": 59805 }, { "epoch": 17.89, "grad_norm": 2.9221675395965576, "learning_rate": 1.3611431208078896e-06, "loss": 1.0636, "step": 59810 }, { "epoch": 17.9, "grad_norm": 2.2900736331939697, "learning_rate": 1.359231601347777e-06, "loss": 0.9176, "step": 59815 }, { "epoch": 17.9, "grad_norm": 2.580756187438965, "learning_rate": 1.3573213875427316e-06, "loss": 0.9621, "step": 59820 }, { "epoch": 17.9, "grad_norm": 3.829488515853882, "learning_rate": 1.3554124794982498e-06, "loss": 0.919, "step": 59825 }, { "epoch": 17.9, "grad_norm": 2.805250883102417, "learning_rate": 1.3535048773197611e-06, "loss": 0.9749, "step": 59830 }, { "epoch": 17.9, "grad_norm": 1.3262838125228882, "learning_rate": 1.3515985811126174e-06, "loss": 1.0604, "step": 59835 }, { "epoch": 17.9, "grad_norm": 3.3138718605041504, "learning_rate": 1.349693590982104e-06, "loss": 1.0974, "step": 59840 }, { "epoch": 17.9, "grad_norm": 1.5498145818710327, "learning_rate": 1.347789907033431e-06, "loss": 0.9018, "step": 59845 }, { "epoch": 17.91, "grad_norm": 2.336205244064331, "learning_rate": 1.3458875293717366e-06, "loss": 1.0362, "step": 59850 }, { "epoch": 17.91, "grad_norm": 2.3710501194000244, "learning_rate": 1.343986458102084e-06, "loss": 0.9496, "step": 59855 }, { "epoch": 17.91, "grad_norm": 5.016022205352783, "learning_rate": 1.3420866933294752e-06, "loss": 1.1354, "step": 59860 }, { "epoch": 17.91, "grad_norm": 5.362601280212402, "learning_rate": 1.3401882351588207e-06, "loss": 0.7814, "step": 59865 }, { "epoch": 17.91, "grad_norm": 1.2273238897323608, "learning_rate": 1.338291083694984e-06, "loss": 1.0238, "step": 59870 }, { "epoch": 17.91, "grad_norm": 2.9525585174560547, "learning_rate": 1.3363952390427286e-06, "loss": 0.8545, "step": 59875 }, { "epoch": 17.92, "grad_norm": 2.0316078662872314, "learning_rate": 1.3345007013067763e-06, "loss": 1.1211, "step": 59880 }, { "epoch": 17.92, "grad_norm": 3.018373727798462, "learning_rate": 1.3326074705917401e-06, "loss": 0.8393, "step": 59885 }, { "epoch": 17.92, "grad_norm": 1.7973732948303223, "learning_rate": 1.3307155470022038e-06, "loss": 0.9589, "step": 59890 }, { "epoch": 17.92, "grad_norm": 1.8492597341537476, "learning_rate": 1.3288249306426387e-06, "loss": 1.0118, "step": 59895 }, { "epoch": 17.92, "grad_norm": 2.7371528148651123, "learning_rate": 1.3269356216174645e-06, "loss": 1.0925, "step": 59900 }, { "epoch": 17.92, "grad_norm": 3.346501350402832, "learning_rate": 1.3250476200310363e-06, "loss": 1.0044, "step": 59905 }, { "epoch": 17.92, "grad_norm": 2.5383148193359375, "learning_rate": 1.3231609259876126e-06, "loss": 0.7532, "step": 59910 }, { "epoch": 17.93, "grad_norm": 2.1317331790924072, "learning_rate": 1.3212755395914074e-06, "loss": 1.1127, "step": 59915 }, { "epoch": 17.93, "grad_norm": 1.8375227451324463, "learning_rate": 1.319391460946534e-06, "loss": 1.0262, "step": 59920 }, { "epoch": 17.93, "grad_norm": 1.335551142692566, "learning_rate": 1.3175086901570626e-06, "loss": 0.9814, "step": 59925 }, { "epoch": 17.93, "grad_norm": 1.4829717874526978, "learning_rate": 1.3156272273269682e-06, "loss": 0.8828, "step": 59930 }, { "epoch": 17.93, "grad_norm": 1.677444577217102, "learning_rate": 1.313747072560162e-06, "loss": 0.8892, "step": 59935 }, { "epoch": 17.93, "grad_norm": 5.412178993225098, "learning_rate": 1.3118682259604832e-06, "loss": 0.9235, "step": 59940 }, { "epoch": 17.93, "grad_norm": 2.3385939598083496, "learning_rate": 1.3099906876317014e-06, "loss": 1.1063, "step": 59945 }, { "epoch": 17.94, "grad_norm": 10.752957344055176, "learning_rate": 1.3081144576775089e-06, "loss": 0.9414, "step": 59950 }, { "epoch": 17.94, "grad_norm": 5.151787281036377, "learning_rate": 1.306239536201531e-06, "loss": 0.8772, "step": 59955 }, { "epoch": 17.94, "grad_norm": 1.5891977548599243, "learning_rate": 1.3043659233073102e-06, "loss": 0.951, "step": 59960 }, { "epoch": 17.94, "grad_norm": 3.044320583343506, "learning_rate": 1.3024936190983355e-06, "loss": 1.0105, "step": 59965 }, { "epoch": 17.94, "grad_norm": 4.976190090179443, "learning_rate": 1.3006226236779996e-06, "loss": 0.9063, "step": 59970 }, { "epoch": 17.94, "grad_norm": 3.8189163208007812, "learning_rate": 1.2987529371496498e-06, "loss": 0.9465, "step": 59975 }, { "epoch": 17.95, "grad_norm": 1.8560311794281006, "learning_rate": 1.296884559616529e-06, "loss": 1.015, "step": 59980 }, { "epoch": 17.95, "grad_norm": 4.682796478271484, "learning_rate": 1.2950174911818407e-06, "loss": 0.8412, "step": 59985 }, { "epoch": 17.95, "grad_norm": 1.3414361476898193, "learning_rate": 1.2931517319487024e-06, "loss": 0.9081, "step": 59990 }, { "epoch": 17.95, "grad_norm": 3.5960850715637207, "learning_rate": 1.2912872820201427e-06, "loss": 1.1309, "step": 59995 }, { "epoch": 17.95, "grad_norm": 2.8373448848724365, "learning_rate": 1.2894241414991488e-06, "loss": 0.9147, "step": 60000 }, { "epoch": 17.95, "grad_norm": 2.71930193901062, "learning_rate": 1.2875623104886104e-06, "loss": 0.9289, "step": 60005 }, { "epoch": 17.95, "grad_norm": 2.6316206455230713, "learning_rate": 1.2857017890913619e-06, "loss": 0.8153, "step": 60010 }, { "epoch": 17.96, "grad_norm": 5.35076379776001, "learning_rate": 1.2838425774101466e-06, "loss": 0.8993, "step": 60015 }, { "epoch": 17.96, "grad_norm": 2.4414889812469482, "learning_rate": 1.2819846755476622e-06, "loss": 0.993, "step": 60020 }, { "epoch": 17.96, "grad_norm": 2.973344087600708, "learning_rate": 1.2801280836065077e-06, "loss": 1.1067, "step": 60025 }, { "epoch": 17.96, "grad_norm": 3.7870774269104004, "learning_rate": 1.2782728016892231e-06, "loss": 1.0017, "step": 60030 }, { "epoch": 17.96, "grad_norm": 1.356503963470459, "learning_rate": 1.2764188298982738e-06, "loss": 0.9037, "step": 60035 }, { "epoch": 17.96, "grad_norm": 1.4454678297042847, "learning_rate": 1.2745661683360554e-06, "loss": 1.0378, "step": 60040 }, { "epoch": 17.96, "grad_norm": 1.8113714456558228, "learning_rate": 1.2727148171048836e-06, "loss": 0.8887, "step": 60045 }, { "epoch": 17.97, "grad_norm": 4.557557582855225, "learning_rate": 1.2708647763070152e-06, "loss": 0.8867, "step": 60050 }, { "epoch": 17.97, "grad_norm": 2.7099125385284424, "learning_rate": 1.2690160460446105e-06, "loss": 0.9419, "step": 60055 }, { "epoch": 17.97, "grad_norm": 4.08752965927124, "learning_rate": 1.2671686264197874e-06, "loss": 0.8125, "step": 60060 }, { "epoch": 17.97, "grad_norm": 1.8687279224395752, "learning_rate": 1.265322517534573e-06, "loss": 0.7801, "step": 60065 }, { "epoch": 17.97, "grad_norm": 4.718087673187256, "learning_rate": 1.2634777194909242e-06, "loss": 0.9194, "step": 60070 }, { "epoch": 17.97, "grad_norm": 2.097789764404297, "learning_rate": 1.2616342323907293e-06, "loss": 0.9047, "step": 60075 }, { "epoch": 17.98, "grad_norm": 2.0857861042022705, "learning_rate": 1.2597920563357984e-06, "loss": 0.9454, "step": 60080 }, { "epoch": 17.98, "grad_norm": 1.1246609687805176, "learning_rate": 1.2579511914278807e-06, "loss": 1.0526, "step": 60085 }, { "epoch": 17.98, "grad_norm": 3.467363119125366, "learning_rate": 1.2561116377686338e-06, "loss": 0.8471, "step": 60090 }, { "epoch": 17.98, "grad_norm": 1.099082350730896, "learning_rate": 1.254273395459668e-06, "loss": 1.0249, "step": 60095 }, { "epoch": 17.98, "grad_norm": 3.2694308757781982, "learning_rate": 1.252436464602494e-06, "loss": 1.0626, "step": 60100 }, { "epoch": 17.98, "grad_norm": 5.15191125869751, "learning_rate": 1.2506008452985746e-06, "loss": 0.9525, "step": 60105 }, { "epoch": 17.98, "grad_norm": 4.068530082702637, "learning_rate": 1.248766537649279e-06, "loss": 1.0582, "step": 60110 }, { "epoch": 17.99, "grad_norm": 1.9388269186019897, "learning_rate": 1.2469335417559236e-06, "loss": 0.8859, "step": 60115 }, { "epoch": 17.99, "grad_norm": 3.2138922214508057, "learning_rate": 1.245101857719738e-06, "loss": 0.856, "step": 60120 }, { "epoch": 17.99, "grad_norm": 2.7871532440185547, "learning_rate": 1.2432714856418837e-06, "loss": 1.0063, "step": 60125 }, { "epoch": 17.99, "grad_norm": 4.555139541625977, "learning_rate": 1.241442425623454e-06, "loss": 0.7705, "step": 60130 }, { "epoch": 17.99, "grad_norm": 4.78867244720459, "learning_rate": 1.2396146777654604e-06, "loss": 1.1051, "step": 60135 }, { "epoch": 17.99, "grad_norm": 2.8767669200897217, "learning_rate": 1.2377882421688526e-06, "loss": 0.8788, "step": 60140 }, { "epoch": 17.99, "grad_norm": 5.313528060913086, "learning_rate": 1.2359631189344994e-06, "loss": 1.0437, "step": 60145 }, { "epoch": 18.0, "grad_norm": 3.7219114303588867, "learning_rate": 1.234139308163204e-06, "loss": 0.8678, "step": 60150 }, { "epoch": 18.0, "grad_norm": 3.0899860858917236, "learning_rate": 1.2323168099556886e-06, "loss": 0.7888, "step": 60155 }, { "epoch": 18.0, "grad_norm": 1.9980682134628296, "learning_rate": 1.2304956244126115e-06, "loss": 1.1034, "step": 60160 }, { "epoch": 18.0, "grad_norm": 2.616227626800537, "learning_rate": 1.228675751634556e-06, "loss": 1.0001, "step": 60165 }, { "epoch": 18.0, "grad_norm": 8.774314880371094, "learning_rate": 1.226857191722028e-06, "loss": 0.9797, "step": 60170 }, { "epoch": 18.0, "grad_norm": 1.6300761699676514, "learning_rate": 1.2250399447754663e-06, "loss": 1.0554, "step": 60175 }, { "epoch": 18.01, "grad_norm": 2.3184926509857178, "learning_rate": 1.223224010895238e-06, "loss": 0.9453, "step": 60180 }, { "epoch": 18.01, "grad_norm": 5.169106483459473, "learning_rate": 1.2214093901816297e-06, "loss": 0.7603, "step": 60185 }, { "epoch": 18.01, "grad_norm": 2.690866231918335, "learning_rate": 1.2195960827348696e-06, "loss": 0.9451, "step": 60190 }, { "epoch": 18.01, "grad_norm": 3.5127060413360596, "learning_rate": 1.2177840886550911e-06, "loss": 1.0737, "step": 60195 }, { "epoch": 18.01, "grad_norm": 2.8311686515808105, "learning_rate": 1.215973408042384e-06, "loss": 0.7565, "step": 60200 }, { "epoch": 18.01, "grad_norm": 3.0393755435943604, "learning_rate": 1.2141640409967403e-06, "loss": 0.9511, "step": 60205 }, { "epoch": 18.01, "grad_norm": 1.882336139678955, "learning_rate": 1.2123559876180968e-06, "loss": 1.1201, "step": 60210 }, { "epoch": 18.02, "grad_norm": 3.4361493587493896, "learning_rate": 1.2105492480063013e-06, "loss": 0.5961, "step": 60215 }, { "epoch": 18.02, "grad_norm": 2.833055019378662, "learning_rate": 1.2087438222611463e-06, "loss": 1.1693, "step": 60220 }, { "epoch": 18.02, "grad_norm": 3.044955015182495, "learning_rate": 1.2069397104823381e-06, "loss": 1.0318, "step": 60225 }, { "epoch": 18.02, "grad_norm": 3.623894691467285, "learning_rate": 1.2051369127695195e-06, "loss": 1.0285, "step": 60230 }, { "epoch": 18.02, "grad_norm": 1.35172700881958, "learning_rate": 1.2033354292222547e-06, "loss": 0.9498, "step": 60235 }, { "epoch": 18.02, "grad_norm": 1.692931056022644, "learning_rate": 1.2015352599400399e-06, "loss": 0.8844, "step": 60240 }, { "epoch": 18.02, "grad_norm": 2.4801394939422607, "learning_rate": 1.1997364050222947e-06, "loss": 0.8478, "step": 60245 }, { "epoch": 18.03, "grad_norm": 2.9214184284210205, "learning_rate": 1.1979388645683682e-06, "loss": 0.8282, "step": 60250 }, { "epoch": 18.03, "grad_norm": 1.6741719245910645, "learning_rate": 1.1961426386775386e-06, "loss": 1.0689, "step": 60255 }, { "epoch": 18.03, "grad_norm": 2.074756383895874, "learning_rate": 1.1943477274490079e-06, "loss": 0.9789, "step": 60260 }, { "epoch": 18.03, "grad_norm": 4.593778610229492, "learning_rate": 1.1925541309819071e-06, "loss": 1.0066, "step": 60265 }, { "epoch": 18.03, "grad_norm": 4.825080394744873, "learning_rate": 1.1907618493752965e-06, "loss": 1.0119, "step": 60270 }, { "epoch": 18.03, "grad_norm": 2.745469093322754, "learning_rate": 1.1889708827281604e-06, "loss": 1.0391, "step": 60275 }, { "epoch": 18.04, "grad_norm": 3.292245864868164, "learning_rate": 1.1871812311394115e-06, "loss": 0.8295, "step": 60280 }, { "epoch": 18.04, "grad_norm": 2.7830917835235596, "learning_rate": 1.1853928947078957e-06, "loss": 1.0809, "step": 60285 }, { "epoch": 18.04, "grad_norm": 3.622986316680908, "learning_rate": 1.1836058735323674e-06, "loss": 1.0924, "step": 60290 }, { "epoch": 18.04, "grad_norm": 2.329106569290161, "learning_rate": 1.1818201677115393e-06, "loss": 0.9369, "step": 60295 }, { "epoch": 18.04, "grad_norm": 2.0072081089019775, "learning_rate": 1.1800357773440212e-06, "loss": 0.8325, "step": 60300 }, { "epoch": 18.04, "grad_norm": 2.3265275955200195, "learning_rate": 1.1782527025283706e-06, "loss": 0.827, "step": 60305 }, { "epoch": 18.04, "grad_norm": 4.844279766082764, "learning_rate": 1.1764709433630589e-06, "loss": 0.88, "step": 60310 }, { "epoch": 18.05, "grad_norm": 2.5498387813568115, "learning_rate": 1.174690499946496e-06, "loss": 0.9118, "step": 60315 }, { "epoch": 18.05, "grad_norm": 2.94402813911438, "learning_rate": 1.1729113723770114e-06, "loss": 1.0359, "step": 60320 }, { "epoch": 18.05, "grad_norm": 8.016414642333984, "learning_rate": 1.1711335607528629e-06, "loss": 0.8906, "step": 60325 }, { "epoch": 18.05, "grad_norm": 1.8241318464279175, "learning_rate": 1.1693570651722414e-06, "loss": 0.9315, "step": 60330 }, { "epoch": 18.05, "grad_norm": 3.5544328689575195, "learning_rate": 1.167581885733257e-06, "loss": 0.8926, "step": 60335 }, { "epoch": 18.05, "grad_norm": 4.292209148406982, "learning_rate": 1.1658080225339541e-06, "loss": 0.9165, "step": 60340 }, { "epoch": 18.05, "grad_norm": 2.8831968307495117, "learning_rate": 1.1640354756722981e-06, "loss": 1.1162, "step": 60345 }, { "epoch": 18.06, "grad_norm": 3.58367657661438, "learning_rate": 1.1622642452461862e-06, "loss": 0.8821, "step": 60350 }, { "epoch": 18.06, "grad_norm": 2.5088016986846924, "learning_rate": 1.1604943313534455e-06, "loss": 1.0714, "step": 60355 }, { "epoch": 18.06, "grad_norm": 4.4307780265808105, "learning_rate": 1.15872573409182e-06, "loss": 1.0852, "step": 60360 }, { "epoch": 18.06, "grad_norm": 3.551413059234619, "learning_rate": 1.1569584535589929e-06, "loss": 0.9419, "step": 60365 }, { "epoch": 18.06, "grad_norm": 3.166933059692383, "learning_rate": 1.1551924898525634e-06, "loss": 0.9873, "step": 60370 }, { "epoch": 18.06, "grad_norm": 4.157015800476074, "learning_rate": 1.1534278430700707e-06, "loss": 0.9274, "step": 60375 }, { "epoch": 18.06, "grad_norm": 3.1233348846435547, "learning_rate": 1.1516645133089727e-06, "loss": 0.9412, "step": 60380 }, { "epoch": 18.07, "grad_norm": 1.6911544799804688, "learning_rate": 1.1499025006666498e-06, "loss": 0.8977, "step": 60385 }, { "epoch": 18.07, "grad_norm": 2.505157470703125, "learning_rate": 1.148141805240427e-06, "loss": 1.0199, "step": 60390 }, { "epoch": 18.07, "grad_norm": 3.155808925628662, "learning_rate": 1.1463824271275319e-06, "loss": 1.0877, "step": 60395 }, { "epoch": 18.07, "grad_norm": 2.7817635536193848, "learning_rate": 1.1446243664251482e-06, "loss": 0.8983, "step": 60400 }, { "epoch": 18.07, "grad_norm": 3.2476534843444824, "learning_rate": 1.1428676232303592e-06, "loss": 1.1019, "step": 60405 }, { "epoch": 18.07, "grad_norm": 2.456259250640869, "learning_rate": 1.1411121976401956e-06, "loss": 1.1568, "step": 60410 }, { "epoch": 18.08, "grad_norm": 3.9711551666259766, "learning_rate": 1.139358089751605e-06, "loss": 0.9174, "step": 60415 }, { "epoch": 18.08, "grad_norm": 4.814849376678467, "learning_rate": 1.137605299661465e-06, "loss": 0.8198, "step": 60420 }, { "epoch": 18.08, "grad_norm": 2.4193475246429443, "learning_rate": 1.135853827466582e-06, "loss": 0.9368, "step": 60425 }, { "epoch": 18.08, "grad_norm": 4.294716835021973, "learning_rate": 1.1341036732636868e-06, "loss": 0.932, "step": 60430 }, { "epoch": 18.08, "grad_norm": 2.2720484733581543, "learning_rate": 1.1323548371494352e-06, "loss": 1.0362, "step": 60435 }, { "epoch": 18.08, "grad_norm": 2.023895740509033, "learning_rate": 1.1306073192204198e-06, "loss": 0.9637, "step": 60440 }, { "epoch": 18.08, "grad_norm": 1.7245546579360962, "learning_rate": 1.1288611195731518e-06, "loss": 1.1067, "step": 60445 }, { "epoch": 18.09, "grad_norm": 3.7294325828552246, "learning_rate": 1.1271162383040685e-06, "loss": 0.9301, "step": 60450 }, { "epoch": 18.09, "grad_norm": 3.503115177154541, "learning_rate": 1.125372675509545e-06, "loss": 1.0133, "step": 60455 }, { "epoch": 18.09, "grad_norm": 3.752464532852173, "learning_rate": 1.1236304312858686e-06, "loss": 1.0687, "step": 60460 }, { "epoch": 18.09, "grad_norm": 1.6993839740753174, "learning_rate": 1.1218895057292678e-06, "loss": 0.8668, "step": 60465 }, { "epoch": 18.09, "grad_norm": 1.8143806457519531, "learning_rate": 1.1201498989358878e-06, "loss": 0.925, "step": 60470 }, { "epoch": 18.09, "grad_norm": 1.6880947351455688, "learning_rate": 1.118411611001813e-06, "loss": 1.0283, "step": 60475 }, { "epoch": 18.09, "grad_norm": 3.8792147636413574, "learning_rate": 1.116674642023033e-06, "loss": 0.9818, "step": 60480 }, { "epoch": 18.1, "grad_norm": 3.6388609409332275, "learning_rate": 1.1149389920954933e-06, "loss": 0.9869, "step": 60485 }, { "epoch": 18.1, "grad_norm": 2.7923548221588135, "learning_rate": 1.1132046613150398e-06, "loss": 1.0708, "step": 60490 }, { "epoch": 18.1, "grad_norm": 3.55141282081604, "learning_rate": 1.1114716497774708e-06, "loss": 0.918, "step": 60495 }, { "epoch": 18.1, "grad_norm": 5.645336151123047, "learning_rate": 1.1097399575784845e-06, "loss": 0.8854, "step": 60500 }, { "epoch": 18.1, "grad_norm": 4.349913120269775, "learning_rate": 1.108009584813735e-06, "loss": 0.9067, "step": 60505 }, { "epoch": 18.1, "grad_norm": 3.4893198013305664, "learning_rate": 1.1062805315787794e-06, "loss": 0.8483, "step": 60510 }, { "epoch": 18.11, "grad_norm": 4.366122245788574, "learning_rate": 1.1045527979691134e-06, "loss": 0.9011, "step": 60515 }, { "epoch": 18.11, "grad_norm": 4.734853267669678, "learning_rate": 1.1028263840801577e-06, "loss": 0.9712, "step": 60520 }, { "epoch": 18.11, "grad_norm": 3.5593693256378174, "learning_rate": 1.1011012900072616e-06, "loss": 0.9062, "step": 60525 }, { "epoch": 18.11, "grad_norm": 3.4199602603912354, "learning_rate": 1.099377515845698e-06, "loss": 0.8207, "step": 60530 }, { "epoch": 18.11, "grad_norm": 1.6151660680770874, "learning_rate": 1.097655061690675e-06, "loss": 0.8984, "step": 60535 }, { "epoch": 18.11, "grad_norm": 8.06431770324707, "learning_rate": 1.0959339276373132e-06, "loss": 1.0251, "step": 60540 }, { "epoch": 18.11, "grad_norm": 1.1797876358032227, "learning_rate": 1.0942141137806782e-06, "loss": 1.0986, "step": 60545 }, { "epoch": 18.12, "grad_norm": 2.656918525695801, "learning_rate": 1.0924956202157443e-06, "loss": 1.0583, "step": 60550 }, { "epoch": 18.12, "grad_norm": 4.957900524139404, "learning_rate": 1.09077844703743e-06, "loss": 0.981, "step": 60555 }, { "epoch": 18.12, "grad_norm": 2.982581853866577, "learning_rate": 1.0890625943405703e-06, "loss": 1.0173, "step": 60560 }, { "epoch": 18.12, "grad_norm": 3.549740791320801, "learning_rate": 1.0873480622199283e-06, "loss": 0.8502, "step": 60565 }, { "epoch": 18.12, "grad_norm": 2.2221314907073975, "learning_rate": 1.085634850770198e-06, "loss": 1.0024, "step": 60570 }, { "epoch": 18.12, "grad_norm": 4.6512675285339355, "learning_rate": 1.0839229600859952e-06, "loss": 1.1475, "step": 60575 }, { "epoch": 18.12, "grad_norm": 1.9820324182510376, "learning_rate": 1.082212390261872e-06, "loss": 1.0201, "step": 60580 }, { "epoch": 18.13, "grad_norm": 1.954908013343811, "learning_rate": 1.0805031413922888e-06, "loss": 0.9867, "step": 60585 }, { "epoch": 18.13, "grad_norm": 2.869485855102539, "learning_rate": 1.078795213571665e-06, "loss": 1.0383, "step": 60590 }, { "epoch": 18.13, "grad_norm": 5.065496444702148, "learning_rate": 1.0770886068943082e-06, "loss": 0.8963, "step": 60595 }, { "epoch": 18.13, "grad_norm": 2.9921464920043945, "learning_rate": 1.0753833214544872e-06, "loss": 0.9474, "step": 60600 }, { "epoch": 18.13, "grad_norm": 1.8409907817840576, "learning_rate": 1.0736793573463744e-06, "loss": 1.0217, "step": 60605 }, { "epoch": 18.13, "grad_norm": 4.104241371154785, "learning_rate": 1.0719767146640803e-06, "loss": 0.884, "step": 60610 }, { "epoch": 18.14, "grad_norm": 3.4805054664611816, "learning_rate": 1.0702753935016408e-06, "loss": 0.8448, "step": 60615 }, { "epoch": 18.14, "grad_norm": 1.8355894088745117, "learning_rate": 1.068575393953017e-06, "loss": 0.9421, "step": 60620 }, { "epoch": 18.14, "grad_norm": 2.950094699859619, "learning_rate": 1.0668767161121e-06, "loss": 0.9723, "step": 60625 }, { "epoch": 18.14, "grad_norm": 4.175784587860107, "learning_rate": 1.0651793600727018e-06, "loss": 0.7554, "step": 60630 }, { "epoch": 18.14, "grad_norm": 1.5484530925750732, "learning_rate": 1.0634833259285743e-06, "loss": 0.9147, "step": 60635 }, { "epoch": 18.14, "grad_norm": 2.7090961933135986, "learning_rate": 1.061788613773379e-06, "loss": 0.9327, "step": 60640 }, { "epoch": 18.14, "grad_norm": 2.208608627319336, "learning_rate": 1.0600952237007162e-06, "loss": 1.1507, "step": 60645 }, { "epoch": 18.15, "grad_norm": 3.7526814937591553, "learning_rate": 1.0584031558041108e-06, "loss": 1.0072, "step": 60650 }, { "epoch": 18.15, "grad_norm": 4.413444519042969, "learning_rate": 1.0567124101770103e-06, "loss": 0.9917, "step": 60655 }, { "epoch": 18.15, "grad_norm": 5.183726787567139, "learning_rate": 1.0550229869127987e-06, "loss": 1.0578, "step": 60660 }, { "epoch": 18.15, "grad_norm": 1.6198534965515137, "learning_rate": 1.0533348861047814e-06, "loss": 0.9569, "step": 60665 }, { "epoch": 18.15, "grad_norm": 3.9471218585968018, "learning_rate": 1.0516481078461786e-06, "loss": 0.9391, "step": 60670 }, { "epoch": 18.15, "grad_norm": 2.755523204803467, "learning_rate": 1.049962652230166e-06, "loss": 0.8292, "step": 60675 }, { "epoch": 18.15, "grad_norm": 3.8161582946777344, "learning_rate": 1.0482785193498156e-06, "loss": 0.8619, "step": 60680 }, { "epoch": 18.16, "grad_norm": 2.392561435699463, "learning_rate": 1.046595709298151e-06, "loss": 1.0699, "step": 60685 }, { "epoch": 18.16, "grad_norm": 2.5720813274383545, "learning_rate": 1.044914222168103e-06, "loss": 0.9893, "step": 60690 }, { "epoch": 18.16, "grad_norm": 2.1463463306427, "learning_rate": 1.0432340580525473e-06, "loss": 0.9201, "step": 60695 }, { "epoch": 18.16, "grad_norm": 2.0431580543518066, "learning_rate": 1.0415552170442682e-06, "loss": 0.8918, "step": 60700 }, { "epoch": 18.16, "grad_norm": 3.739319086074829, "learning_rate": 1.0398776992359943e-06, "loss": 0.9097, "step": 60705 }, { "epoch": 18.16, "grad_norm": 3.897839069366455, "learning_rate": 1.038201504720368e-06, "loss": 0.8448, "step": 60710 }, { "epoch": 18.17, "grad_norm": 2.4379286766052246, "learning_rate": 1.0365266335899626e-06, "loss": 1.0746, "step": 60715 }, { "epoch": 18.17, "grad_norm": 5.488431453704834, "learning_rate": 1.0348530859372902e-06, "loss": 0.9621, "step": 60720 }, { "epoch": 18.17, "grad_norm": 2.122537136077881, "learning_rate": 1.0331808618547628e-06, "loss": 1.044, "step": 60725 }, { "epoch": 18.17, "grad_norm": 1.5338541269302368, "learning_rate": 1.0315099614347511e-06, "loss": 0.8884, "step": 60730 }, { "epoch": 18.17, "grad_norm": 2.4747703075408936, "learning_rate": 1.0298403847695286e-06, "loss": 0.818, "step": 60735 }, { "epoch": 18.17, "grad_norm": 5.456902980804443, "learning_rate": 1.028172131951305e-06, "loss": 0.985, "step": 60740 }, { "epoch": 18.17, "grad_norm": 3.0986452102661133, "learning_rate": 1.0265052030722171e-06, "loss": 1.0442, "step": 60745 }, { "epoch": 18.18, "grad_norm": 8.805809020996094, "learning_rate": 1.024839598224331e-06, "loss": 0.7961, "step": 60750 }, { "epoch": 18.18, "grad_norm": 3.979262351989746, "learning_rate": 1.0231753174996278e-06, "loss": 1.0433, "step": 60755 }, { "epoch": 18.18, "grad_norm": 3.9714319705963135, "learning_rate": 1.0215123609900373e-06, "loss": 1.0825, "step": 60760 }, { "epoch": 18.18, "grad_norm": 2.5382189750671387, "learning_rate": 1.019850728787386e-06, "loss": 0.6957, "step": 60765 }, { "epoch": 18.18, "grad_norm": 2.689236640930176, "learning_rate": 1.0181904209834586e-06, "loss": 0.9993, "step": 60770 }, { "epoch": 18.18, "grad_norm": 2.2523913383483887, "learning_rate": 1.016531437669943e-06, "loss": 1.0457, "step": 60775 }, { "epoch": 18.18, "grad_norm": 2.1695034503936768, "learning_rate": 1.0148737789384689e-06, "loss": 0.945, "step": 60780 }, { "epoch": 18.19, "grad_norm": 4.191242694854736, "learning_rate": 1.013217444880582e-06, "loss": 1.0195, "step": 60785 }, { "epoch": 18.19, "grad_norm": 2.4551188945770264, "learning_rate": 1.011562435587765e-06, "loss": 1.0352, "step": 60790 }, { "epoch": 18.19, "grad_norm": 5.271094799041748, "learning_rate": 1.0099087511514227e-06, "loss": 1.0489, "step": 60795 }, { "epoch": 18.19, "grad_norm": 3.048891067504883, "learning_rate": 1.008256391662879e-06, "loss": 1.0531, "step": 60800 }, { "epoch": 18.19, "grad_norm": 3.0242154598236084, "learning_rate": 1.0066053572133999e-06, "loss": 1.1194, "step": 60805 }, { "epoch": 18.19, "grad_norm": 3.903019905090332, "learning_rate": 1.004955647894165e-06, "loss": 0.9434, "step": 60810 }, { "epoch": 18.2, "grad_norm": 2.762580394744873, "learning_rate": 1.003307263796291e-06, "loss": 0.8196, "step": 60815 }, { "epoch": 18.2, "grad_norm": 22.09214210510254, "learning_rate": 1.0016602050108099e-06, "loss": 0.8785, "step": 60820 }, { "epoch": 18.2, "grad_norm": 4.526973247528076, "learning_rate": 1.0000144716286935e-06, "loss": 0.7305, "step": 60825 }, { "epoch": 18.2, "grad_norm": 4.591505527496338, "learning_rate": 9.983700637408305e-07, "loss": 1.1613, "step": 60830 }, { "epoch": 18.2, "grad_norm": 6.078658580780029, "learning_rate": 9.967269814380425e-07, "loss": 0.9575, "step": 60835 }, { "epoch": 18.2, "grad_norm": 3.495609998703003, "learning_rate": 9.950852248110709e-07, "loss": 1.0076, "step": 60840 }, { "epoch": 18.2, "grad_norm": 1.9880722761154175, "learning_rate": 9.934447939505904e-07, "loss": 0.89, "step": 60845 }, { "epoch": 18.21, "grad_norm": 4.335904121398926, "learning_rate": 9.918056889472006e-07, "loss": 1.0215, "step": 60850 }, { "epoch": 18.21, "grad_norm": 1.3856970071792603, "learning_rate": 9.901679098914291e-07, "loss": 1.0306, "step": 60855 }, { "epoch": 18.21, "grad_norm": 2.3149681091308594, "learning_rate": 9.885314568737258e-07, "loss": 0.9417, "step": 60860 }, { "epoch": 18.21, "grad_norm": 2.511589527130127, "learning_rate": 9.868963299844742e-07, "loss": 0.9716, "step": 60865 }, { "epoch": 18.21, "grad_norm": 1.1993108987808228, "learning_rate": 9.85262529313974e-07, "loss": 1.0695, "step": 60870 }, { "epoch": 18.21, "grad_norm": 3.9712471961975098, "learning_rate": 9.836300549524642e-07, "loss": 1.0194, "step": 60875 }, { "epoch": 18.21, "grad_norm": 1.5070483684539795, "learning_rate": 9.81998906990103e-07, "loss": 0.9899, "step": 60880 }, { "epoch": 18.22, "grad_norm": 1.7017158269882202, "learning_rate": 9.803690855169772e-07, "loss": 1.0185, "step": 60885 }, { "epoch": 18.22, "grad_norm": 2.1888837814331055, "learning_rate": 9.787405906231006e-07, "loss": 1.0552, "step": 60890 }, { "epoch": 18.22, "grad_norm": 2.574962615966797, "learning_rate": 9.771134223984097e-07, "loss": 0.8768, "step": 60895 }, { "epoch": 18.22, "grad_norm": 2.792041301727295, "learning_rate": 9.754875809327769e-07, "loss": 0.8991, "step": 60900 }, { "epoch": 18.22, "grad_norm": 2.1841979026794434, "learning_rate": 9.738630663159886e-07, "loss": 1.1429, "step": 60905 }, { "epoch": 18.22, "grad_norm": 3.5888936519622803, "learning_rate": 9.722398786377762e-07, "loss": 0.8153, "step": 60910 }, { "epoch": 18.23, "grad_norm": 5.765038967132568, "learning_rate": 9.706180179877733e-07, "loss": 0.9749, "step": 60915 }, { "epoch": 18.23, "grad_norm": 4.348241329193115, "learning_rate": 9.68997484455564e-07, "loss": 1.0241, "step": 60920 }, { "epoch": 18.23, "grad_norm": 1.9215142726898193, "learning_rate": 9.673782781306428e-07, "loss": 1.1308, "step": 60925 }, { "epoch": 18.23, "grad_norm": 2.1733293533325195, "learning_rate": 9.657603991024416e-07, "loss": 0.8968, "step": 60930 }, { "epoch": 18.23, "grad_norm": 3.1698646545410156, "learning_rate": 9.641438474603081e-07, "loss": 0.9575, "step": 60935 }, { "epoch": 18.23, "grad_norm": 4.437852382659912, "learning_rate": 9.625286232935266e-07, "loss": 1.0785, "step": 60940 }, { "epoch": 18.23, "grad_norm": 2.4240994453430176, "learning_rate": 9.60914726691306e-07, "loss": 0.9774, "step": 60945 }, { "epoch": 18.24, "grad_norm": 4.216778755187988, "learning_rate": 9.593021577427752e-07, "loss": 1.0754, "step": 60950 }, { "epoch": 18.24, "grad_norm": 5.071732044219971, "learning_rate": 9.57690916536999e-07, "loss": 1.0031, "step": 60955 }, { "epoch": 18.24, "grad_norm": 5.060432434082031, "learning_rate": 9.56081003162962e-07, "loss": 0.9962, "step": 60960 }, { "epoch": 18.24, "grad_norm": 4.500984191894531, "learning_rate": 9.544724177095787e-07, "loss": 0.9923, "step": 60965 }, { "epoch": 18.24, "grad_norm": 3.332535743713379, "learning_rate": 9.528651602656896e-07, "loss": 1.1458, "step": 60970 }, { "epoch": 18.24, "grad_norm": 2.8214833736419678, "learning_rate": 9.512592309200625e-07, "loss": 1.0929, "step": 60975 }, { "epoch": 18.24, "grad_norm": 3.44423770904541, "learning_rate": 9.496546297613901e-07, "loss": 1.0643, "step": 60980 }, { "epoch": 18.25, "grad_norm": 4.527592182159424, "learning_rate": 9.480513568782962e-07, "loss": 0.9452, "step": 60985 }, { "epoch": 18.25, "grad_norm": 1.5139596462249756, "learning_rate": 9.46449412359321e-07, "loss": 0.951, "step": 60990 }, { "epoch": 18.25, "grad_norm": 3.112560749053955, "learning_rate": 9.448487962929492e-07, "loss": 1.0462, "step": 60995 }, { "epoch": 18.25, "grad_norm": 6.812036991119385, "learning_rate": 9.432495087675657e-07, "loss": 1.0087, "step": 61000 }, { "epoch": 18.25, "grad_norm": 1.6199243068695068, "learning_rate": 9.416515498715139e-07, "loss": 0.8079, "step": 61005 }, { "epoch": 18.25, "grad_norm": 2.11490797996521, "learning_rate": 9.400549196930342e-07, "loss": 0.9901, "step": 61010 }, { "epoch": 18.25, "grad_norm": 1.8027985095977783, "learning_rate": 9.38459618320317e-07, "loss": 1.1343, "step": 61015 }, { "epoch": 18.26, "grad_norm": 2.305330514907837, "learning_rate": 9.368656458414643e-07, "loss": 1.1905, "step": 61020 }, { "epoch": 18.26, "grad_norm": 1.3872077465057373, "learning_rate": 9.352730023445055e-07, "loss": 0.9287, "step": 61025 }, { "epoch": 18.26, "grad_norm": 3.1503524780273438, "learning_rate": 9.336816879174093e-07, "loss": 0.9731, "step": 61030 }, { "epoch": 18.26, "grad_norm": 1.6931301355361938, "learning_rate": 9.320917026480553e-07, "loss": 0.9644, "step": 61035 }, { "epoch": 18.26, "grad_norm": 1.539621353149414, "learning_rate": 9.305030466242592e-07, "loss": 1.0219, "step": 61040 }, { "epoch": 18.26, "grad_norm": 2.573394298553467, "learning_rate": 9.289157199337622e-07, "loss": 1.0027, "step": 61045 }, { "epoch": 18.27, "grad_norm": 2.412062168121338, "learning_rate": 9.273297226642275e-07, "loss": 1.0636, "step": 61050 }, { "epoch": 18.27, "grad_norm": 1.9419618844985962, "learning_rate": 9.257450549032515e-07, "loss": 0.9127, "step": 61055 }, { "epoch": 18.27, "grad_norm": 1.3534716367721558, "learning_rate": 9.241617167383531e-07, "loss": 1.0985, "step": 61060 }, { "epoch": 18.27, "grad_norm": 3.1224892139434814, "learning_rate": 9.225797082569765e-07, "loss": 1.0405, "step": 61065 }, { "epoch": 18.27, "grad_norm": 2.6927273273468018, "learning_rate": 9.209990295464959e-07, "loss": 0.8734, "step": 61070 }, { "epoch": 18.27, "grad_norm": 1.9963269233703613, "learning_rate": 9.194196806942112e-07, "loss": 0.8057, "step": 61075 }, { "epoch": 18.27, "grad_norm": 3.602238178253174, "learning_rate": 9.178416617873442e-07, "loss": 0.9009, "step": 61080 }, { "epoch": 18.28, "grad_norm": 1.87447988986969, "learning_rate": 9.162649729130529e-07, "loss": 0.8498, "step": 61085 }, { "epoch": 18.28, "grad_norm": 2.5276732444763184, "learning_rate": 9.146896141584149e-07, "loss": 0.9557, "step": 61090 }, { "epoch": 18.28, "grad_norm": 2.9176580905914307, "learning_rate": 9.131155856104301e-07, "loss": 0.9328, "step": 61095 }, { "epoch": 18.28, "grad_norm": 1.1620190143585205, "learning_rate": 9.115428873560372e-07, "loss": 1.0674, "step": 61100 }, { "epoch": 18.28, "grad_norm": 3.642578601837158, "learning_rate": 9.09971519482089e-07, "loss": 0.8607, "step": 61105 }, { "epoch": 18.28, "grad_norm": 3.2962098121643066, "learning_rate": 9.0840148207538e-07, "loss": 0.8209, "step": 61110 }, { "epoch": 18.28, "grad_norm": 2.739952325820923, "learning_rate": 9.068327752226102e-07, "loss": 0.9166, "step": 61115 }, { "epoch": 18.29, "grad_norm": 2.815523862838745, "learning_rate": 9.052653990104243e-07, "loss": 1.0532, "step": 61120 }, { "epoch": 18.29, "grad_norm": 1.6121025085449219, "learning_rate": 9.036993535253863e-07, "loss": 1.0011, "step": 61125 }, { "epoch": 18.29, "grad_norm": 1.8355814218521118, "learning_rate": 9.021346388539825e-07, "loss": 0.9553, "step": 61130 }, { "epoch": 18.29, "grad_norm": 2.7154858112335205, "learning_rate": 9.005712550826384e-07, "loss": 1.089, "step": 61135 }, { "epoch": 18.29, "grad_norm": 2.364108085632324, "learning_rate": 8.990092022976904e-07, "loss": 0.9518, "step": 61140 }, { "epoch": 18.29, "grad_norm": 1.0809826850891113, "learning_rate": 8.974484805854166e-07, "loss": 0.9845, "step": 61145 }, { "epoch": 18.3, "grad_norm": 1.4934520721435547, "learning_rate": 8.958890900320066e-07, "loss": 0.9068, "step": 61150 }, { "epoch": 18.3, "grad_norm": 2.0490729808807373, "learning_rate": 8.943310307235886e-07, "loss": 1.0463, "step": 61155 }, { "epoch": 18.3, "grad_norm": 2.349944591522217, "learning_rate": 8.927743027462104e-07, "loss": 1.065, "step": 61160 }, { "epoch": 18.3, "grad_norm": 2.097916841506958, "learning_rate": 8.912189061858506e-07, "loss": 0.9555, "step": 61165 }, { "epoch": 18.3, "grad_norm": 1.0531599521636963, "learning_rate": 8.896648411284097e-07, "loss": 0.9135, "step": 61170 }, { "epoch": 18.3, "grad_norm": 3.910612106323242, "learning_rate": 8.881121076597193e-07, "loss": 0.7968, "step": 61175 }, { "epoch": 18.3, "grad_norm": 2.345278739929199, "learning_rate": 8.865607058655356e-07, "loss": 0.9632, "step": 61180 }, { "epoch": 18.31, "grad_norm": 2.56709885597229, "learning_rate": 8.850106358315402e-07, "loss": 1.1304, "step": 61185 }, { "epoch": 18.31, "grad_norm": 3.583867073059082, "learning_rate": 8.834618976433367e-07, "loss": 0.7337, "step": 61190 }, { "epoch": 18.31, "grad_norm": 2.285977840423584, "learning_rate": 8.819144913864708e-07, "loss": 1.0113, "step": 61195 }, { "epoch": 18.31, "grad_norm": 3.424396514892578, "learning_rate": 8.803684171463905e-07, "loss": 0.91, "step": 61200 }, { "epoch": 18.31, "grad_norm": 3.49349045753479, "learning_rate": 8.788236750084999e-07, "loss": 1.0395, "step": 61205 }, { "epoch": 18.31, "grad_norm": 1.7467879056930542, "learning_rate": 8.772802650580975e-07, "loss": 0.8866, "step": 61210 }, { "epoch": 18.31, "grad_norm": 5.124495506286621, "learning_rate": 8.757381873804371e-07, "loss": 0.9363, "step": 61215 }, { "epoch": 18.32, "grad_norm": 1.684017539024353, "learning_rate": 8.741974420606813e-07, "loss": 0.8885, "step": 61220 }, { "epoch": 18.32, "grad_norm": 3.0965070724487305, "learning_rate": 8.726580291839203e-07, "loss": 0.9159, "step": 61225 }, { "epoch": 18.32, "grad_norm": 4.807112693786621, "learning_rate": 8.711199488351779e-07, "loss": 0.9592, "step": 61230 }, { "epoch": 18.32, "grad_norm": 2.7470273971557617, "learning_rate": 8.695832010993998e-07, "loss": 0.8318, "step": 61235 }, { "epoch": 18.32, "grad_norm": 5.211430072784424, "learning_rate": 8.6804778606146e-07, "loss": 0.9829, "step": 61240 }, { "epoch": 18.32, "grad_norm": 2.6941945552825928, "learning_rate": 8.665137038061572e-07, "loss": 0.9167, "step": 61245 }, { "epoch": 18.33, "grad_norm": 2.2288553714752197, "learning_rate": 8.64980954418218e-07, "loss": 0.9119, "step": 61250 }, { "epoch": 18.33, "grad_norm": 2.5863308906555176, "learning_rate": 8.634495379822943e-07, "loss": 1.03, "step": 61255 }, { "epoch": 18.33, "grad_norm": 1.7381858825683594, "learning_rate": 8.619194545829628e-07, "loss": 1.1305, "step": 61260 }, { "epoch": 18.33, "grad_norm": 2.788691997528076, "learning_rate": 8.603907043047283e-07, "loss": 1.1239, "step": 61265 }, { "epoch": 18.33, "grad_norm": 3.6779868602752686, "learning_rate": 8.588632872320257e-07, "loss": 1.1608, "step": 61270 }, { "epoch": 18.33, "grad_norm": 3.579209327697754, "learning_rate": 8.573372034492099e-07, "loss": 0.9267, "step": 61275 }, { "epoch": 18.33, "grad_norm": 4.529468059539795, "learning_rate": 8.558124530405664e-07, "loss": 0.9345, "step": 61280 }, { "epoch": 18.34, "grad_norm": 1.8296302556991577, "learning_rate": 8.542890360903e-07, "loss": 0.796, "step": 61285 }, { "epoch": 18.34, "grad_norm": 2.3215250968933105, "learning_rate": 8.527669526825599e-07, "loss": 1.0763, "step": 61290 }, { "epoch": 18.34, "grad_norm": 3.3448548316955566, "learning_rate": 8.512462029013929e-07, "loss": 0.9275, "step": 61295 }, { "epoch": 18.34, "grad_norm": 2.267402172088623, "learning_rate": 8.49726786830804e-07, "loss": 1.107, "step": 61300 }, { "epoch": 18.34, "grad_norm": 3.513765335083008, "learning_rate": 8.482087045546955e-07, "loss": 1.0165, "step": 61305 }, { "epoch": 18.34, "grad_norm": 2.1769518852233887, "learning_rate": 8.466919561569225e-07, "loss": 0.9577, "step": 61310 }, { "epoch": 18.34, "grad_norm": 2.1250414848327637, "learning_rate": 8.451765417212432e-07, "loss": 0.8942, "step": 61315 }, { "epoch": 18.35, "grad_norm": 1.5039304494857788, "learning_rate": 8.43662461331357e-07, "loss": 0.9996, "step": 61320 }, { "epoch": 18.35, "grad_norm": 1.338394045829773, "learning_rate": 8.421497150708835e-07, "loss": 0.8906, "step": 61325 }, { "epoch": 18.35, "grad_norm": 2.3154618740081787, "learning_rate": 8.406383030233694e-07, "loss": 1.0178, "step": 61330 }, { "epoch": 18.35, "grad_norm": 1.515351414680481, "learning_rate": 8.391282252722898e-07, "loss": 0.9011, "step": 61335 }, { "epoch": 18.35, "grad_norm": 4.342297077178955, "learning_rate": 8.376194819010446e-07, "loss": 0.9005, "step": 61340 }, { "epoch": 18.35, "grad_norm": 3.609494209289551, "learning_rate": 8.361120729929617e-07, "loss": 0.9243, "step": 61345 }, { "epoch": 18.36, "grad_norm": 2.8200643062591553, "learning_rate": 8.34605998631291e-07, "loss": 1.0519, "step": 61350 }, { "epoch": 18.36, "grad_norm": 2.881962537765503, "learning_rate": 8.331012588992132e-07, "loss": 0.8966, "step": 61355 }, { "epoch": 18.36, "grad_norm": 2.507383108139038, "learning_rate": 8.315978538798314e-07, "loss": 0.8928, "step": 61360 }, { "epoch": 18.36, "grad_norm": 1.8377445936203003, "learning_rate": 8.30095783656179e-07, "loss": 0.9829, "step": 61365 }, { "epoch": 18.36, "grad_norm": 1.3998005390167236, "learning_rate": 8.28595048311212e-07, "loss": 0.9379, "step": 61370 }, { "epoch": 18.36, "grad_norm": 1.7559727430343628, "learning_rate": 8.270956479278196e-07, "loss": 1.1797, "step": 61375 }, { "epoch": 18.36, "grad_norm": 1.639439582824707, "learning_rate": 8.255975825888024e-07, "loss": 0.8938, "step": 61380 }, { "epoch": 18.37, "grad_norm": 2.903548002243042, "learning_rate": 8.241008523769106e-07, "loss": 0.8785, "step": 61385 }, { "epoch": 18.37, "grad_norm": 2.10689377784729, "learning_rate": 8.226054573747894e-07, "loss": 1.0115, "step": 61390 }, { "epoch": 18.37, "grad_norm": 3.6648929119110107, "learning_rate": 8.211113976650475e-07, "loss": 0.9137, "step": 61395 }, { "epoch": 18.37, "grad_norm": 3.99898624420166, "learning_rate": 8.19618673330183e-07, "loss": 1.1167, "step": 61400 }, { "epoch": 18.37, "grad_norm": 3.173650026321411, "learning_rate": 8.181272844526495e-07, "loss": 1.1092, "step": 61405 }, { "epoch": 18.37, "grad_norm": 3.0180552005767822, "learning_rate": 8.166372311148113e-07, "loss": 0.8644, "step": 61410 }, { "epoch": 18.37, "grad_norm": 3.1037330627441406, "learning_rate": 8.151485133989584e-07, "loss": 0.8741, "step": 61415 }, { "epoch": 18.38, "grad_norm": 3.032813310623169, "learning_rate": 8.136611313873166e-07, "loss": 0.9209, "step": 61420 }, { "epoch": 18.38, "grad_norm": 2.8166263103485107, "learning_rate": 8.121750851620286e-07, "loss": 0.8434, "step": 61425 }, { "epoch": 18.38, "grad_norm": 2.1553432941436768, "learning_rate": 8.106903748051675e-07, "loss": 0.8497, "step": 61430 }, { "epoch": 18.38, "grad_norm": 2.0251331329345703, "learning_rate": 8.092070003987373e-07, "loss": 1.1232, "step": 61435 }, { "epoch": 18.38, "grad_norm": 4.4784345626831055, "learning_rate": 8.077249620246558e-07, "loss": 0.9472, "step": 61440 }, { "epoch": 18.38, "grad_norm": 3.576678514480591, "learning_rate": 8.062442597647795e-07, "loss": 0.8144, "step": 61445 }, { "epoch": 18.39, "grad_norm": 1.705344796180725, "learning_rate": 8.047648937008851e-07, "loss": 1.1875, "step": 61450 }, { "epoch": 18.39, "grad_norm": 1.4665272235870361, "learning_rate": 8.032868639146762e-07, "loss": 1.0611, "step": 61455 }, { "epoch": 18.39, "grad_norm": 2.712339162826538, "learning_rate": 8.018101704877823e-07, "loss": 1.0206, "step": 61460 }, { "epoch": 18.39, "grad_norm": 3.855246067047119, "learning_rate": 8.003348135017603e-07, "loss": 1.0525, "step": 61465 }, { "epoch": 18.39, "grad_norm": 2.246304988861084, "learning_rate": 7.988607930380948e-07, "loss": 0.8379, "step": 61470 }, { "epoch": 18.39, "grad_norm": 3.892084836959839, "learning_rate": 7.973881091781876e-07, "loss": 1.0106, "step": 61475 }, { "epoch": 18.39, "grad_norm": 1.8931845426559448, "learning_rate": 7.959167620033847e-07, "loss": 0.9843, "step": 61480 }, { "epoch": 18.4, "grad_norm": 2.768260955810547, "learning_rate": 7.944467515949322e-07, "loss": 0.8572, "step": 61485 }, { "epoch": 18.4, "grad_norm": 11.066926002502441, "learning_rate": 7.929780780340318e-07, "loss": 0.8194, "step": 61490 }, { "epoch": 18.4, "grad_norm": 4.7817912101745605, "learning_rate": 7.915107414017825e-07, "loss": 1.0095, "step": 61495 }, { "epoch": 18.4, "grad_norm": 1.809881329536438, "learning_rate": 7.900447417792389e-07, "loss": 0.8781, "step": 61500 }, { "epoch": 18.4, "grad_norm": 3.047032356262207, "learning_rate": 7.885800792473585e-07, "loss": 1.0429, "step": 61505 }, { "epoch": 18.4, "grad_norm": 1.490786075592041, "learning_rate": 7.871167538870322e-07, "loss": 0.8668, "step": 61510 }, { "epoch": 18.4, "grad_norm": 1.9980204105377197, "learning_rate": 7.856547657790786e-07, "loss": 1.0007, "step": 61515 }, { "epoch": 18.41, "grad_norm": 3.4392709732055664, "learning_rate": 7.841941150042415e-07, "loss": 0.9901, "step": 61520 }, { "epoch": 18.41, "grad_norm": 4.489561557769775, "learning_rate": 7.827348016431979e-07, "loss": 1.0117, "step": 61525 }, { "epoch": 18.41, "grad_norm": 4.4032979011535645, "learning_rate": 7.812768257765335e-07, "loss": 1.0882, "step": 61530 }, { "epoch": 18.41, "grad_norm": 3.137603282928467, "learning_rate": 7.79820187484781e-07, "loss": 0.921, "step": 61535 }, { "epoch": 18.41, "grad_norm": 4.174556732177734, "learning_rate": 7.783648868483817e-07, "loss": 0.982, "step": 61540 }, { "epoch": 18.41, "grad_norm": 2.2925820350646973, "learning_rate": 7.769109239477129e-07, "loss": 0.9188, "step": 61545 }, { "epoch": 18.42, "grad_norm": 2.8582160472869873, "learning_rate": 7.754582988630743e-07, "loss": 1.0092, "step": 61550 }, { "epoch": 18.42, "grad_norm": 3.7304418087005615, "learning_rate": 7.740070116746961e-07, "loss": 0.7871, "step": 61555 }, { "epoch": 18.42, "grad_norm": 2.1194798946380615, "learning_rate": 7.725570624627282e-07, "loss": 0.9669, "step": 61560 }, { "epoch": 18.42, "grad_norm": 2.3362677097320557, "learning_rate": 7.71108451307248e-07, "loss": 0.9318, "step": 61565 }, { "epoch": 18.42, "grad_norm": 1.7674001455307007, "learning_rate": 7.696611782882668e-07, "loss": 0.9901, "step": 61570 }, { "epoch": 18.42, "grad_norm": 1.8596383333206177, "learning_rate": 7.682152434857149e-07, "loss": 0.8932, "step": 61575 }, { "epoch": 18.42, "grad_norm": 3.049741744995117, "learning_rate": 7.667706469794395e-07, "loss": 0.8584, "step": 61580 }, { "epoch": 18.43, "grad_norm": 2.637298107147217, "learning_rate": 7.653273888492407e-07, "loss": 1.1926, "step": 61585 }, { "epoch": 18.43, "grad_norm": 2.7286012172698975, "learning_rate": 7.638854691748132e-07, "loss": 1.0714, "step": 61590 }, { "epoch": 18.43, "grad_norm": 4.056117057800293, "learning_rate": 7.624448880358043e-07, "loss": 0.9455, "step": 61595 }, { "epoch": 18.43, "grad_norm": 2.3087191581726074, "learning_rate": 7.61005645511767e-07, "loss": 1.0218, "step": 61600 }, { "epoch": 18.43, "grad_norm": 2.0572636127471924, "learning_rate": 7.595677416821933e-07, "loss": 0.9051, "step": 61605 }, { "epoch": 18.43, "grad_norm": 6.119582176208496, "learning_rate": 7.581311766265003e-07, "loss": 0.8552, "step": 61610 }, { "epoch": 18.43, "grad_norm": 2.6109657287597656, "learning_rate": 7.56695950424019e-07, "loss": 0.9766, "step": 61615 }, { "epoch": 18.44, "grad_norm": 3.126173734664917, "learning_rate": 7.552620631540247e-07, "loss": 0.9172, "step": 61620 }, { "epoch": 18.44, "grad_norm": 3.1183481216430664, "learning_rate": 7.538295148957015e-07, "loss": 0.7652, "step": 61625 }, { "epoch": 18.44, "grad_norm": 1.6673765182495117, "learning_rate": 7.523983057281775e-07, "loss": 0.884, "step": 61630 }, { "epoch": 18.44, "grad_norm": 3.5469305515289307, "learning_rate": 7.509684357304897e-07, "loss": 0.9741, "step": 61635 }, { "epoch": 18.44, "grad_norm": 2.485867977142334, "learning_rate": 7.495399049816082e-07, "loss": 0.7726, "step": 61640 }, { "epoch": 18.44, "grad_norm": 1.444970965385437, "learning_rate": 7.48112713560431e-07, "loss": 0.9309, "step": 61645 }, { "epoch": 18.44, "grad_norm": 3.333101987838745, "learning_rate": 7.466868615457783e-07, "loss": 1.1428, "step": 61650 }, { "epoch": 18.45, "grad_norm": 2.304952383041382, "learning_rate": 7.45262349016404e-07, "loss": 0.9161, "step": 61655 }, { "epoch": 18.45, "grad_norm": 2.9560463428497314, "learning_rate": 7.438391760509755e-07, "loss": 0.9267, "step": 61660 }, { "epoch": 18.45, "grad_norm": 4.144384384155273, "learning_rate": 7.427016022174993e-07, "loss": 1.1577, "step": 61665 }, { "epoch": 18.45, "grad_norm": 5.686766624450684, "learning_rate": 7.412808406652038e-07, "loss": 0.9974, "step": 61670 }, { "epoch": 18.45, "grad_norm": 3.3661046028137207, "learning_rate": 7.398614188967534e-07, "loss": 0.8438, "step": 61675 }, { "epoch": 18.45, "grad_norm": 5.448729515075684, "learning_rate": 7.38443336990538e-07, "loss": 0.8848, "step": 61680 }, { "epoch": 18.46, "grad_norm": 2.479353189468384, "learning_rate": 7.370265950248783e-07, "loss": 1.0998, "step": 61685 }, { "epoch": 18.46, "grad_norm": 2.4149727821350098, "learning_rate": 7.356111930780201e-07, "loss": 0.9238, "step": 61690 }, { "epoch": 18.46, "grad_norm": 2.909254312515259, "learning_rate": 7.341971312281343e-07, "loss": 0.9805, "step": 61695 }, { "epoch": 18.46, "grad_norm": 2.9561984539031982, "learning_rate": 7.327844095533193e-07, "loss": 1.1177, "step": 61700 }, { "epoch": 18.46, "grad_norm": 1.3195778131484985, "learning_rate": 7.313730281315961e-07, "loss": 1.0986, "step": 61705 }, { "epoch": 18.46, "grad_norm": 1.9130079746246338, "learning_rate": 7.299629870409136e-07, "loss": 0.8861, "step": 61710 }, { "epoch": 18.46, "grad_norm": 4.564031600952148, "learning_rate": 7.285542863591482e-07, "loss": 0.8437, "step": 61715 }, { "epoch": 18.47, "grad_norm": 2.09362530708313, "learning_rate": 7.271469261641017e-07, "loss": 0.8984, "step": 61720 }, { "epoch": 18.47, "grad_norm": 3.5240318775177, "learning_rate": 7.257409065335035e-07, "loss": 0.7457, "step": 61725 }, { "epoch": 18.47, "grad_norm": 4.574277400970459, "learning_rate": 7.243362275449972e-07, "loss": 0.9621, "step": 61730 }, { "epoch": 18.47, "grad_norm": 4.2122578620910645, "learning_rate": 7.229328892761733e-07, "loss": 0.9186, "step": 61735 }, { "epoch": 18.47, "grad_norm": 2.056427001953125, "learning_rate": 7.215308918045255e-07, "loss": 1.0321, "step": 61740 }, { "epoch": 18.47, "grad_norm": 3.035954236984253, "learning_rate": 7.201302352074973e-07, "loss": 0.9903, "step": 61745 }, { "epoch": 18.47, "grad_norm": 3.6890830993652344, "learning_rate": 7.187309195624353e-07, "loss": 0.9409, "step": 61750 }, { "epoch": 18.48, "grad_norm": 2.126034736633301, "learning_rate": 7.173329449466248e-07, "loss": 1.0002, "step": 61755 }, { "epoch": 18.48, "grad_norm": 12.646512031555176, "learning_rate": 7.159363114372763e-07, "loss": 0.8951, "step": 61760 }, { "epoch": 18.48, "grad_norm": 4.24096155166626, "learning_rate": 7.145410191115226e-07, "loss": 0.8652, "step": 61765 }, { "epoch": 18.48, "grad_norm": 1.79692804813385, "learning_rate": 7.13147068046427e-07, "loss": 1.0179, "step": 61770 }, { "epoch": 18.48, "grad_norm": 2.1023857593536377, "learning_rate": 7.117544583189723e-07, "loss": 1.0399, "step": 61775 }, { "epoch": 18.48, "grad_norm": 3.325761079788208, "learning_rate": 7.10363190006072e-07, "loss": 0.9114, "step": 61780 }, { "epoch": 18.49, "grad_norm": 1.3993957042694092, "learning_rate": 7.089732631845674e-07, "loss": 0.9271, "step": 61785 }, { "epoch": 18.49, "grad_norm": 2.963554620742798, "learning_rate": 7.075846779312196e-07, "loss": 1.0317, "step": 61790 }, { "epoch": 18.49, "grad_norm": 4.754477500915527, "learning_rate": 7.061974343227168e-07, "loss": 0.87, "step": 61795 }, { "epoch": 18.49, "grad_norm": 2.58296799659729, "learning_rate": 7.048115324356814e-07, "loss": 1.0729, "step": 61800 }, { "epoch": 18.49, "grad_norm": 1.766918659210205, "learning_rate": 7.034269723466491e-07, "loss": 1.0143, "step": 61805 }, { "epoch": 18.49, "grad_norm": 6.538331985473633, "learning_rate": 7.020437541320924e-07, "loss": 0.9332, "step": 61810 }, { "epoch": 18.49, "grad_norm": 1.1120764017105103, "learning_rate": 7.006618778683999e-07, "loss": 1.0328, "step": 61815 }, { "epoch": 18.5, "grad_norm": 3.360107898712158, "learning_rate": 6.992813436318996e-07, "loss": 0.9667, "step": 61820 }, { "epoch": 18.5, "grad_norm": 1.8557356595993042, "learning_rate": 6.979021514988249e-07, "loss": 1.0017, "step": 61825 }, { "epoch": 18.5, "grad_norm": 2.17095947265625, "learning_rate": 6.965243015453593e-07, "loss": 0.9728, "step": 61830 }, { "epoch": 18.5, "grad_norm": 2.826488971710205, "learning_rate": 6.951477938475892e-07, "loss": 0.9713, "step": 61835 }, { "epoch": 18.5, "grad_norm": 2.857255220413208, "learning_rate": 6.937726284815482e-07, "loss": 0.9573, "step": 61840 }, { "epoch": 18.5, "grad_norm": 6.233930587768555, "learning_rate": 6.923988055231784e-07, "loss": 1.2165, "step": 61845 }, { "epoch": 18.5, "grad_norm": 4.246452808380127, "learning_rate": 6.910263250483551e-07, "loss": 1.1228, "step": 61850 }, { "epoch": 18.51, "grad_norm": 2.3864681720733643, "learning_rate": 6.896551871328788e-07, "loss": 0.9279, "step": 61855 }, { "epoch": 18.51, "grad_norm": 3.7490153312683105, "learning_rate": 6.882853918524779e-07, "loss": 0.9047, "step": 61860 }, { "epoch": 18.51, "grad_norm": 3.5015833377838135, "learning_rate": 6.869169392828056e-07, "loss": 1.0886, "step": 61865 }, { "epoch": 18.51, "grad_norm": 4.527100563049316, "learning_rate": 6.85549829499435e-07, "loss": 0.8575, "step": 61870 }, { "epoch": 18.51, "grad_norm": 4.221701145172119, "learning_rate": 6.841840625778805e-07, "loss": 0.9333, "step": 61875 }, { "epoch": 18.51, "grad_norm": 2.010347843170166, "learning_rate": 6.828196385935626e-07, "loss": 0.8162, "step": 61880 }, { "epoch": 18.52, "grad_norm": 2.520059585571289, "learning_rate": 6.814565576218374e-07, "loss": 1.0076, "step": 61885 }, { "epoch": 18.52, "grad_norm": 2.8664329051971436, "learning_rate": 6.80094819737992e-07, "loss": 1.0359, "step": 61890 }, { "epoch": 18.52, "grad_norm": 3.0318400859832764, "learning_rate": 6.787344250172273e-07, "loss": 0.9725, "step": 61895 }, { "epoch": 18.52, "grad_norm": 4.079439640045166, "learning_rate": 6.773753735346805e-07, "loss": 0.9406, "step": 61900 }, { "epoch": 18.52, "grad_norm": 2.608128309249878, "learning_rate": 6.76017665365411e-07, "loss": 0.8831, "step": 61905 }, { "epoch": 18.52, "grad_norm": 2.588148355484009, "learning_rate": 6.746613005844033e-07, "loss": 1.0509, "step": 61910 }, { "epoch": 18.52, "grad_norm": 1.884019374847412, "learning_rate": 6.733062792665695e-07, "loss": 1.0, "step": 61915 }, { "epoch": 18.53, "grad_norm": 3.63097882270813, "learning_rate": 6.719526014867361e-07, "loss": 1.114, "step": 61920 }, { "epoch": 18.53, "grad_norm": 1.6369963884353638, "learning_rate": 6.706002673196793e-07, "loss": 1.0093, "step": 61925 }, { "epoch": 18.53, "grad_norm": 3.430427312850952, "learning_rate": 6.692492768400782e-07, "loss": 1.1051, "step": 61930 }, { "epoch": 18.53, "grad_norm": 1.9971526861190796, "learning_rate": 6.67899630122551e-07, "loss": 0.9439, "step": 61935 }, { "epoch": 18.53, "grad_norm": 1.4598491191864014, "learning_rate": 6.665513272416324e-07, "loss": 0.9472, "step": 61940 }, { "epoch": 18.53, "grad_norm": 2.7384274005889893, "learning_rate": 6.652043682717907e-07, "loss": 0.922, "step": 61945 }, { "epoch": 18.53, "grad_norm": 2.7123560905456543, "learning_rate": 6.638587532874219e-07, "loss": 0.9753, "step": 61950 }, { "epoch": 18.54, "grad_norm": 2.5400397777557373, "learning_rate": 6.625144823628332e-07, "loss": 0.7639, "step": 61955 }, { "epoch": 18.54, "grad_norm": 4.62942361831665, "learning_rate": 6.611715555722764e-07, "loss": 1.0445, "step": 61960 }, { "epoch": 18.54, "grad_norm": 2.880040168762207, "learning_rate": 6.598299729899088e-07, "loss": 0.9792, "step": 61965 }, { "epoch": 18.54, "grad_norm": 2.0064916610717773, "learning_rate": 6.584897346898405e-07, "loss": 1.0655, "step": 61970 }, { "epoch": 18.54, "grad_norm": 2.9998416900634766, "learning_rate": 6.571508407460764e-07, "loss": 0.9663, "step": 61975 }, { "epoch": 18.54, "grad_norm": 2.280630350112915, "learning_rate": 6.55813291232571e-07, "loss": 1.0144, "step": 61980 }, { "epoch": 18.55, "grad_norm": 2.611635208129883, "learning_rate": 6.54477086223193e-07, "loss": 1.2247, "step": 61985 }, { "epoch": 18.55, "grad_norm": 5.889001369476318, "learning_rate": 6.531422257917391e-07, "loss": 1.0725, "step": 61990 }, { "epoch": 18.55, "grad_norm": 3.5222485065460205, "learning_rate": 6.518087100119335e-07, "loss": 0.8588, "step": 61995 }, { "epoch": 18.55, "grad_norm": 2.528952121734619, "learning_rate": 6.50476538957423e-07, "loss": 0.9481, "step": 62000 }, { "epoch": 18.55, "grad_norm": 2.8010430335998535, "learning_rate": 6.491457127017847e-07, "loss": 0.8445, "step": 62005 }, { "epoch": 18.55, "grad_norm": 1.7682600021362305, "learning_rate": 6.478162313185182e-07, "loss": 0.9884, "step": 62010 }, { "epoch": 18.55, "grad_norm": 4.158107280731201, "learning_rate": 6.464880948810453e-07, "loss": 0.8942, "step": 62015 }, { "epoch": 18.56, "grad_norm": 2.5172414779663086, "learning_rate": 6.45161303462724e-07, "loss": 0.9485, "step": 62020 }, { "epoch": 18.56, "grad_norm": 1.1113386154174805, "learning_rate": 6.438358571368263e-07, "loss": 0.9776, "step": 62025 }, { "epoch": 18.56, "grad_norm": 2.090869426727295, "learning_rate": 6.425117559765576e-07, "loss": 1.0483, "step": 62030 }, { "epoch": 18.56, "grad_norm": 3.047197103500366, "learning_rate": 6.41189000055048e-07, "loss": 0.8976, "step": 62035 }, { "epoch": 18.56, "grad_norm": 2.634687662124634, "learning_rate": 6.398675894453477e-07, "loss": 0.9284, "step": 62040 }, { "epoch": 18.56, "grad_norm": 3.5582494735717773, "learning_rate": 6.385475242204453e-07, "loss": 0.8967, "step": 62045 }, { "epoch": 18.56, "grad_norm": 5.443139553070068, "learning_rate": 6.372288044532326e-07, "loss": 0.9113, "step": 62050 }, { "epoch": 18.57, "grad_norm": 1.4090752601623535, "learning_rate": 6.35911430216557e-07, "loss": 1.0798, "step": 62055 }, { "epoch": 18.57, "grad_norm": 1.6593273878097534, "learning_rate": 6.345954015831629e-07, "loss": 0.6439, "step": 62060 }, { "epoch": 18.57, "grad_norm": 1.541206955909729, "learning_rate": 6.332807186257423e-07, "loss": 0.9511, "step": 62065 }, { "epoch": 18.57, "grad_norm": 4.009206295013428, "learning_rate": 6.319673814168953e-07, "loss": 0.8826, "step": 62070 }, { "epoch": 18.57, "grad_norm": 2.005704879760742, "learning_rate": 6.30655390029164e-07, "loss": 0.9057, "step": 62075 }, { "epoch": 18.57, "grad_norm": 2.7836337089538574, "learning_rate": 6.293447445350043e-07, "loss": 0.9222, "step": 62080 }, { "epoch": 18.58, "grad_norm": 4.371108055114746, "learning_rate": 6.280354450068026e-07, "loss": 0.8937, "step": 62085 }, { "epoch": 18.58, "grad_norm": 2.3152432441711426, "learning_rate": 6.26727491516868e-07, "loss": 1.1751, "step": 62090 }, { "epoch": 18.58, "grad_norm": 2.359133720397949, "learning_rate": 6.254208841374426e-07, "loss": 0.9842, "step": 62095 }, { "epoch": 18.58, "grad_norm": 2.8773648738861084, "learning_rate": 6.241156229406825e-07, "loss": 0.9049, "step": 62100 }, { "epoch": 18.58, "grad_norm": 2.3904407024383545, "learning_rate": 6.228117079986829e-07, "loss": 0.9434, "step": 62105 }, { "epoch": 18.58, "grad_norm": 2.5811879634857178, "learning_rate": 6.215091393834499e-07, "loss": 1.0067, "step": 62110 }, { "epoch": 18.58, "grad_norm": 5.010559558868408, "learning_rate": 6.202079171669289e-07, "loss": 0.9329, "step": 62115 }, { "epoch": 18.59, "grad_norm": 6.2016167640686035, "learning_rate": 6.189080414209847e-07, "loss": 0.8496, "step": 62120 }, { "epoch": 18.59, "grad_norm": 1.5568153858184814, "learning_rate": 6.176095122174041e-07, "loss": 0.9277, "step": 62125 }, { "epoch": 18.59, "grad_norm": 1.9093276262283325, "learning_rate": 6.163123296279077e-07, "loss": 0.9473, "step": 62130 }, { "epoch": 18.59, "grad_norm": 7.955539703369141, "learning_rate": 6.150164937241354e-07, "loss": 0.876, "step": 62135 }, { "epoch": 18.59, "grad_norm": 2.9364593029022217, "learning_rate": 6.137220045776604e-07, "loss": 0.8387, "step": 62140 }, { "epoch": 18.59, "grad_norm": 3.2200205326080322, "learning_rate": 6.124288622599617e-07, "loss": 0.9952, "step": 62145 }, { "epoch": 18.59, "grad_norm": 2.081777572631836, "learning_rate": 6.111370668424765e-07, "loss": 0.8266, "step": 62150 }, { "epoch": 18.6, "grad_norm": 2.223055601119995, "learning_rate": 6.098466183965313e-07, "loss": 0.9303, "step": 62155 }, { "epoch": 18.6, "grad_norm": 1.2533679008483887, "learning_rate": 6.085575169934132e-07, "loss": 0.949, "step": 62160 }, { "epoch": 18.6, "grad_norm": 2.873736619949341, "learning_rate": 6.072697627043045e-07, "loss": 1.0112, "step": 62165 }, { "epoch": 18.6, "grad_norm": 1.682570219039917, "learning_rate": 6.059833556003341e-07, "loss": 1.0301, "step": 62170 }, { "epoch": 18.6, "grad_norm": 4.487571716308594, "learning_rate": 6.046982957525482e-07, "loss": 1.1361, "step": 62175 }, { "epoch": 18.6, "grad_norm": 4.013219833374023, "learning_rate": 6.03414583231915e-07, "loss": 0.8556, "step": 62180 }, { "epoch": 18.61, "grad_norm": 3.0844905376434326, "learning_rate": 6.021322181093386e-07, "loss": 0.9482, "step": 62185 }, { "epoch": 18.61, "grad_norm": 3.159336805343628, "learning_rate": 6.008512004556377e-07, "loss": 0.9925, "step": 62190 }, { "epoch": 18.61, "grad_norm": 2.8269448280334473, "learning_rate": 5.995715303415639e-07, "loss": 1.0819, "step": 62195 }, { "epoch": 18.61, "grad_norm": 3.574028730392456, "learning_rate": 5.982932078377912e-07, "loss": 0.9489, "step": 62200 }, { "epoch": 18.61, "grad_norm": 2.7778759002685547, "learning_rate": 5.970162330149187e-07, "loss": 0.9698, "step": 62205 }, { "epoch": 18.61, "grad_norm": 2.205036163330078, "learning_rate": 5.957406059434761e-07, "loss": 0.9544, "step": 62210 }, { "epoch": 18.61, "grad_norm": 2.8578882217407227, "learning_rate": 5.944663266939127e-07, "loss": 0.7805, "step": 62215 }, { "epoch": 18.62, "grad_norm": 2.122692346572876, "learning_rate": 5.931933953366081e-07, "loss": 0.9865, "step": 62220 }, { "epoch": 18.62, "grad_norm": 1.806129813194275, "learning_rate": 5.919218119418591e-07, "loss": 0.909, "step": 62225 }, { "epoch": 18.62, "grad_norm": 3.1925947666168213, "learning_rate": 5.906515765799009e-07, "loss": 0.97, "step": 62230 }, { "epoch": 18.62, "grad_norm": 2.054266929626465, "learning_rate": 5.89382689320886e-07, "loss": 0.9196, "step": 62235 }, { "epoch": 18.62, "grad_norm": 3.7723171710968018, "learning_rate": 5.881151502348859e-07, "loss": 1.0202, "step": 62240 }, { "epoch": 18.62, "grad_norm": 1.945148229598999, "learning_rate": 5.868489593919141e-07, "loss": 1.1337, "step": 62245 }, { "epoch": 18.62, "grad_norm": 2.7918272018432617, "learning_rate": 5.855841168618953e-07, "loss": 0.9992, "step": 62250 }, { "epoch": 18.63, "grad_norm": 4.473152160644531, "learning_rate": 5.843206227146958e-07, "loss": 0.9585, "step": 62255 }, { "epoch": 18.63, "grad_norm": 3.2821218967437744, "learning_rate": 5.83058477020082e-07, "loss": 0.8887, "step": 62260 }, { "epoch": 18.63, "grad_norm": 3.8282477855682373, "learning_rate": 5.817976798477731e-07, "loss": 0.7917, "step": 62265 }, { "epoch": 18.63, "grad_norm": 3.6935458183288574, "learning_rate": 5.805382312673941e-07, "loss": 0.82, "step": 62270 }, { "epoch": 18.63, "grad_norm": 1.9937669038772583, "learning_rate": 5.792801313485086e-07, "loss": 1.113, "step": 62275 }, { "epoch": 18.63, "grad_norm": 1.7843409776687622, "learning_rate": 5.780233801605944e-07, "loss": 0.9841, "step": 62280 }, { "epoch": 18.63, "grad_norm": 2.7850582599639893, "learning_rate": 5.767679777730656e-07, "loss": 1.0303, "step": 62285 }, { "epoch": 18.64, "grad_norm": 3.4673726558685303, "learning_rate": 5.755139242552554e-07, "loss": 0.9193, "step": 62290 }, { "epoch": 18.64, "grad_norm": 1.380431890487671, "learning_rate": 5.742612196764224e-07, "loss": 0.7912, "step": 62295 }, { "epoch": 18.64, "grad_norm": 2.402841567993164, "learning_rate": 5.73009864105753e-07, "loss": 0.8712, "step": 62300 }, { "epoch": 18.64, "grad_norm": 3.139500379562378, "learning_rate": 5.717598576123584e-07, "loss": 0.9437, "step": 62305 }, { "epoch": 18.64, "grad_norm": 2.209512233734131, "learning_rate": 5.705112002652752e-07, "loss": 0.9299, "step": 62310 }, { "epoch": 18.64, "grad_norm": 2.3451426029205322, "learning_rate": 5.692638921334647e-07, "loss": 0.9714, "step": 62315 }, { "epoch": 18.65, "grad_norm": 3.0532517433166504, "learning_rate": 5.680179332858165e-07, "loss": 1.1268, "step": 62320 }, { "epoch": 18.65, "grad_norm": 6.812263488769531, "learning_rate": 5.667733237911421e-07, "loss": 0.786, "step": 62325 }, { "epoch": 18.65, "grad_norm": 2.6777634620666504, "learning_rate": 5.655300637181809e-07, "loss": 0.8496, "step": 62330 }, { "epoch": 18.65, "grad_norm": 1.7131210565567017, "learning_rate": 5.64288153135592e-07, "loss": 1.0718, "step": 62335 }, { "epoch": 18.65, "grad_norm": 3.845153570175171, "learning_rate": 5.63047592111976e-07, "loss": 0.7881, "step": 62340 }, { "epoch": 18.65, "grad_norm": 2.2182199954986572, "learning_rate": 5.618083807158337e-07, "loss": 0.9474, "step": 62345 }, { "epoch": 18.65, "grad_norm": 7.1650390625, "learning_rate": 5.605705190156158e-07, "loss": 1.0506, "step": 62350 }, { "epoch": 18.66, "grad_norm": 4.3074259757995605, "learning_rate": 5.593340070796843e-07, "loss": 1.0749, "step": 62355 }, { "epoch": 18.66, "grad_norm": 5.269190311431885, "learning_rate": 5.580988449763319e-07, "loss": 0.986, "step": 62360 }, { "epoch": 18.66, "grad_norm": 1.8356404304504395, "learning_rate": 5.568650327737734e-07, "loss": 0.9098, "step": 62365 }, { "epoch": 18.66, "grad_norm": 3.584327459335327, "learning_rate": 5.556325705401517e-07, "loss": 1.004, "step": 62370 }, { "epoch": 18.66, "grad_norm": 2.812471628189087, "learning_rate": 5.544014583435342e-07, "loss": 0.9846, "step": 62375 }, { "epoch": 18.66, "grad_norm": 1.4029104709625244, "learning_rate": 5.53171696251914e-07, "loss": 0.8815, "step": 62380 }, { "epoch": 18.66, "grad_norm": 2.412611722946167, "learning_rate": 5.519432843332117e-07, "loss": 1.0807, "step": 62385 }, { "epoch": 18.67, "grad_norm": 2.5085036754608154, "learning_rate": 5.507162226552676e-07, "loss": 1.0554, "step": 62390 }, { "epoch": 18.67, "grad_norm": 2.622960090637207, "learning_rate": 5.494905112858551e-07, "loss": 1.0882, "step": 62395 }, { "epoch": 18.67, "grad_norm": 7.016479015350342, "learning_rate": 5.482661502926645e-07, "loss": 0.971, "step": 62400 }, { "epoch": 18.67, "grad_norm": 2.4067859649658203, "learning_rate": 5.470431397433196e-07, "loss": 0.943, "step": 62405 }, { "epoch": 18.67, "grad_norm": 1.8276368379592896, "learning_rate": 5.458214797053634e-07, "loss": 0.8402, "step": 62410 }, { "epoch": 18.67, "grad_norm": 3.209845781326294, "learning_rate": 5.4460117024627e-07, "loss": 1.0835, "step": 62415 }, { "epoch": 18.68, "grad_norm": 3.5742673873901367, "learning_rate": 5.433822114334325e-07, "loss": 0.9627, "step": 62420 }, { "epoch": 18.68, "grad_norm": 3.4893949031829834, "learning_rate": 5.421646033341748e-07, "loss": 1.057, "step": 62425 }, { "epoch": 18.68, "grad_norm": 3.8481268882751465, "learning_rate": 5.409483460157433e-07, "loss": 1.0174, "step": 62430 }, { "epoch": 18.68, "grad_norm": 1.257150650024414, "learning_rate": 5.397334395453147e-07, "loss": 0.8821, "step": 62435 }, { "epoch": 18.68, "grad_norm": 2.7487339973449707, "learning_rate": 5.385198839899769e-07, "loss": 0.9345, "step": 62440 }, { "epoch": 18.68, "grad_norm": 1.8233064413070679, "learning_rate": 5.373076794167653e-07, "loss": 0.8765, "step": 62445 }, { "epoch": 18.68, "grad_norm": 3.476785898208618, "learning_rate": 5.360968258926152e-07, "loss": 0.978, "step": 62450 }, { "epoch": 18.69, "grad_norm": 1.5684432983398438, "learning_rate": 5.348873234844176e-07, "loss": 0.8142, "step": 62455 }, { "epoch": 18.69, "grad_norm": 4.222447395324707, "learning_rate": 5.336791722589579e-07, "loss": 0.9887, "step": 62460 }, { "epoch": 18.69, "grad_norm": 3.2067711353302, "learning_rate": 5.324723722829661e-07, "loss": 1.0059, "step": 62465 }, { "epoch": 18.69, "grad_norm": 1.711020827293396, "learning_rate": 5.312669236230944e-07, "loss": 1.1326, "step": 62470 }, { "epoch": 18.69, "grad_norm": 5.746004581451416, "learning_rate": 5.300628263459173e-07, "loss": 0.8101, "step": 62475 }, { "epoch": 18.69, "grad_norm": 3.5213847160339355, "learning_rate": 5.288600805179345e-07, "loss": 0.9395, "step": 62480 }, { "epoch": 18.69, "grad_norm": 3.3688313961029053, "learning_rate": 5.276586862055704e-07, "loss": 0.8759, "step": 62485 }, { "epoch": 18.7, "grad_norm": 1.9452404975891113, "learning_rate": 5.264586434751833e-07, "loss": 0.9038, "step": 62490 }, { "epoch": 18.7, "grad_norm": 2.7685489654541016, "learning_rate": 5.252599523930452e-07, "loss": 0.8489, "step": 62495 }, { "epoch": 18.7, "grad_norm": 2.2779135704040527, "learning_rate": 5.240626130253585e-07, "loss": 1.0238, "step": 62500 }, { "epoch": 18.7, "grad_norm": 2.2392897605895996, "learning_rate": 5.228666254382536e-07, "loss": 1.1407, "step": 62505 }, { "epoch": 18.7, "grad_norm": 4.152332782745361, "learning_rate": 5.216719896977806e-07, "loss": 0.7601, "step": 62510 }, { "epoch": 18.7, "grad_norm": 2.4010398387908936, "learning_rate": 5.204787058699228e-07, "loss": 0.6884, "step": 62515 }, { "epoch": 18.71, "grad_norm": 3.122080087661743, "learning_rate": 5.192867740205775e-07, "loss": 1.0505, "step": 62520 }, { "epoch": 18.71, "grad_norm": 4.57648229598999, "learning_rate": 5.18096194215581e-07, "loss": 0.9048, "step": 62525 }, { "epoch": 18.71, "grad_norm": 5.7991414070129395, "learning_rate": 5.169069665206833e-07, "loss": 0.8917, "step": 62530 }, { "epoch": 18.71, "grad_norm": 1.6808985471725464, "learning_rate": 5.157190910015625e-07, "loss": 0.9275, "step": 62535 }, { "epoch": 18.71, "grad_norm": 2.2939209938049316, "learning_rate": 5.145325677238327e-07, "loss": 1.0068, "step": 62540 }, { "epoch": 18.71, "grad_norm": 2.1694259643554688, "learning_rate": 5.133473967530111e-07, "loss": 1.0079, "step": 62545 }, { "epoch": 18.71, "grad_norm": 3.0563623905181885, "learning_rate": 5.121635781545647e-07, "loss": 0.9808, "step": 62550 }, { "epoch": 18.72, "grad_norm": 2.7795627117156982, "learning_rate": 5.109811119938663e-07, "loss": 0.9278, "step": 62555 }, { "epoch": 18.72, "grad_norm": 2.1813108921051025, "learning_rate": 5.0979999833623e-07, "loss": 0.973, "step": 62560 }, { "epoch": 18.72, "grad_norm": 1.735756754875183, "learning_rate": 5.086202372468818e-07, "loss": 1.0072, "step": 62565 }, { "epoch": 18.72, "grad_norm": 1.2512030601501465, "learning_rate": 5.074418287909804e-07, "loss": 1.0268, "step": 62570 }, { "epoch": 18.72, "grad_norm": 6.556924343109131, "learning_rate": 5.062647730336073e-07, "loss": 0.9008, "step": 62575 }, { "epoch": 18.72, "grad_norm": 2.471876382827759, "learning_rate": 5.050890700397715e-07, "loss": 0.7419, "step": 62580 }, { "epoch": 18.72, "grad_norm": 2.7054262161254883, "learning_rate": 5.039147198744071e-07, "loss": 1.0026, "step": 62585 }, { "epoch": 18.73, "grad_norm": 3.4873416423797607, "learning_rate": 5.027417226023678e-07, "loss": 1.0207, "step": 62590 }, { "epoch": 18.73, "grad_norm": 2.8062655925750732, "learning_rate": 5.015700782884408e-07, "loss": 0.9565, "step": 62595 }, { "epoch": 18.73, "grad_norm": 1.282105565071106, "learning_rate": 5.003997869973353e-07, "loss": 1.0715, "step": 62600 }, { "epoch": 18.73, "grad_norm": 2.5329504013061523, "learning_rate": 4.992308487936804e-07, "loss": 1.0151, "step": 62605 }, { "epoch": 18.73, "grad_norm": 4.997149467468262, "learning_rate": 4.980632637420408e-07, "loss": 0.9544, "step": 62610 }, { "epoch": 18.73, "grad_norm": 2.9110753536224365, "learning_rate": 4.968970319068983e-07, "loss": 0.8164, "step": 62615 }, { "epoch": 18.74, "grad_norm": 2.0976929664611816, "learning_rate": 4.957321533526626e-07, "loss": 0.878, "step": 62620 }, { "epoch": 18.74, "grad_norm": 3.1105329990386963, "learning_rate": 4.94568628143674e-07, "loss": 1.0102, "step": 62625 }, { "epoch": 18.74, "grad_norm": 2.1142258644104004, "learning_rate": 4.93406456344181e-07, "loss": 0.9267, "step": 62630 }, { "epoch": 18.74, "grad_norm": 2.38909912109375, "learning_rate": 4.922456380183821e-07, "loss": 0.8664, "step": 62635 }, { "epoch": 18.74, "grad_norm": 3.103832721710205, "learning_rate": 4.910861732303762e-07, "loss": 0.9541, "step": 62640 }, { "epoch": 18.74, "grad_norm": 3.287440299987793, "learning_rate": 4.899280620442121e-07, "loss": 0.986, "step": 62645 }, { "epoch": 18.74, "grad_norm": 3.8672590255737305, "learning_rate": 4.887713045238385e-07, "loss": 0.9699, "step": 62650 }, { "epoch": 18.75, "grad_norm": 2.768521785736084, "learning_rate": 4.876159007331516e-07, "loss": 0.9764, "step": 62655 }, { "epoch": 18.75, "grad_norm": 1.9124183654785156, "learning_rate": 4.864618507359558e-07, "loss": 0.925, "step": 62660 }, { "epoch": 18.75, "grad_norm": 6.206687927246094, "learning_rate": 4.853091545959948e-07, "loss": 1.0686, "step": 62665 }, { "epoch": 18.75, "grad_norm": 1.3544474840164185, "learning_rate": 4.841578123769258e-07, "loss": 0.8745, "step": 62670 }, { "epoch": 18.75, "grad_norm": 2.0739264488220215, "learning_rate": 4.830078241423369e-07, "loss": 0.8589, "step": 62675 }, { "epoch": 18.75, "grad_norm": 2.00089430809021, "learning_rate": 4.818591899557467e-07, "loss": 0.908, "step": 62680 }, { "epoch": 18.75, "grad_norm": 2.7553093433380127, "learning_rate": 4.807119098805851e-07, "loss": 1.0617, "step": 62685 }, { "epoch": 18.76, "grad_norm": 1.7284417152404785, "learning_rate": 4.795659839802181e-07, "loss": 0.952, "step": 62690 }, { "epoch": 18.76, "grad_norm": 2.9970293045043945, "learning_rate": 4.784214123179337e-07, "loss": 0.878, "step": 62695 }, { "epoch": 18.76, "grad_norm": 2.638425350189209, "learning_rate": 4.772781949569455e-07, "loss": 0.9725, "step": 62700 }, { "epoch": 18.76, "grad_norm": 2.378786087036133, "learning_rate": 4.7613633196039455e-07, "loss": 0.912, "step": 62705 }, { "epoch": 18.76, "grad_norm": 2.5562052726745605, "learning_rate": 4.749958233913415e-07, "loss": 0.9821, "step": 62710 }, { "epoch": 18.76, "grad_norm": 4.997159957885742, "learning_rate": 4.7385666931277774e-07, "loss": 0.998, "step": 62715 }, { "epoch": 18.77, "grad_norm": 2.2791659832000732, "learning_rate": 4.727188697876195e-07, "loss": 1.0484, "step": 62720 }, { "epoch": 18.77, "grad_norm": 3.7600674629211426, "learning_rate": 4.715824248786971e-07, "loss": 0.9918, "step": 62725 }, { "epoch": 18.77, "grad_norm": 5.7446417808532715, "learning_rate": 4.704473346487881e-07, "loss": 0.8191, "step": 62730 }, { "epoch": 18.77, "grad_norm": 2.3552982807159424, "learning_rate": 4.693135991605702e-07, "loss": 1.0591, "step": 62735 }, { "epoch": 18.77, "grad_norm": 2.88034987449646, "learning_rate": 4.681812184766682e-07, "loss": 1.1121, "step": 62740 }, { "epoch": 18.77, "grad_norm": 1.8043874502182007, "learning_rate": 4.670501926596127e-07, "loss": 1.0824, "step": 62745 }, { "epoch": 18.77, "grad_norm": 2.6473546028137207, "learning_rate": 4.659205217718815e-07, "loss": 0.866, "step": 62750 }, { "epoch": 18.78, "grad_norm": 2.442850351333618, "learning_rate": 4.647922058758525e-07, "loss": 0.9233, "step": 62755 }, { "epoch": 18.78, "grad_norm": 2.945636510848999, "learning_rate": 4.63665245033848e-07, "loss": 0.8995, "step": 62760 }, { "epoch": 18.78, "grad_norm": 2.816267490386963, "learning_rate": 4.6253963930810995e-07, "loss": 0.9573, "step": 62765 }, { "epoch": 18.78, "grad_norm": 3.510456085205078, "learning_rate": 4.614153887607997e-07, "loss": 1.0292, "step": 62770 }, { "epoch": 18.78, "grad_norm": 1.8083337545394897, "learning_rate": 4.6029249345401484e-07, "loss": 0.8119, "step": 62775 }, { "epoch": 18.78, "grad_norm": 3.959989309310913, "learning_rate": 4.5917095344976403e-07, "loss": 0.9996, "step": 62780 }, { "epoch": 18.78, "grad_norm": 6.885543346405029, "learning_rate": 4.58050768809995e-07, "loss": 0.9633, "step": 62785 }, { "epoch": 18.79, "grad_norm": 2.3350930213928223, "learning_rate": 4.569319395965721e-07, "loss": 1.0368, "step": 62790 }, { "epoch": 18.79, "grad_norm": 4.399080276489258, "learning_rate": 4.5581446587128763e-07, "loss": 1.014, "step": 62795 }, { "epoch": 18.79, "grad_norm": 1.9063431024551392, "learning_rate": 4.5469834769585604e-07, "loss": 1.0557, "step": 62800 }, { "epoch": 18.79, "grad_norm": 1.7454651594161987, "learning_rate": 4.5358358513192257e-07, "loss": 1.1915, "step": 62805 }, { "epoch": 18.79, "grad_norm": 2.7510390281677246, "learning_rate": 4.5247017824105174e-07, "loss": 0.9034, "step": 62810 }, { "epoch": 18.79, "grad_norm": 4.9659600257873535, "learning_rate": 4.513581270847389e-07, "loss": 0.7884, "step": 62815 }, { "epoch": 18.8, "grad_norm": 4.02251672744751, "learning_rate": 4.5024743172439877e-07, "loss": 0.8928, "step": 62820 }, { "epoch": 18.8, "grad_norm": 4.709519863128662, "learning_rate": 4.491380922213767e-07, "loss": 0.9039, "step": 62825 }, { "epoch": 18.8, "grad_norm": 1.9701640605926514, "learning_rate": 4.4803010863693483e-07, "loss": 0.937, "step": 62830 }, { "epoch": 18.8, "grad_norm": 2.070518970489502, "learning_rate": 4.4692348103227143e-07, "loss": 0.9715, "step": 62835 }, { "epoch": 18.8, "grad_norm": 1.8517335653305054, "learning_rate": 4.4581820946850427e-07, "loss": 0.9508, "step": 62840 }, { "epoch": 18.8, "grad_norm": 1.6975722312927246, "learning_rate": 4.4471429400667343e-07, "loss": 1.0122, "step": 62845 }, { "epoch": 18.8, "grad_norm": 2.018352746963501, "learning_rate": 4.436117347077495e-07, "loss": 1.058, "step": 62850 }, { "epoch": 18.81, "grad_norm": 4.113342761993408, "learning_rate": 4.425105316326228e-07, "loss": 0.9022, "step": 62855 }, { "epoch": 18.81, "grad_norm": 3.3095271587371826, "learning_rate": 4.414106848421168e-07, "loss": 1.0346, "step": 62860 }, { "epoch": 18.81, "grad_norm": 3.63959002494812, "learning_rate": 4.4031219439696625e-07, "loss": 0.9892, "step": 62865 }, { "epoch": 18.81, "grad_norm": 4.62831974029541, "learning_rate": 4.3921506035784766e-07, "loss": 0.9858, "step": 62870 }, { "epoch": 18.81, "grad_norm": 3.9106874465942383, "learning_rate": 4.381192827853514e-07, "loss": 0.943, "step": 62875 }, { "epoch": 18.81, "grad_norm": 3.417293071746826, "learning_rate": 4.3702486173999856e-07, "loss": 0.8616, "step": 62880 }, { "epoch": 18.81, "grad_norm": 2.981015682220459, "learning_rate": 4.359317972822269e-07, "loss": 1.0777, "step": 62885 }, { "epoch": 18.82, "grad_norm": 9.559314727783203, "learning_rate": 4.3484008947241304e-07, "loss": 0.9715, "step": 62890 }, { "epoch": 18.82, "grad_norm": 2.853968858718872, "learning_rate": 4.33749738370845e-07, "loss": 1.0899, "step": 62895 }, { "epoch": 18.82, "grad_norm": 2.9519448280334473, "learning_rate": 4.326607440377439e-07, "loss": 0.9593, "step": 62900 }, { "epoch": 18.82, "grad_norm": 3.299821376800537, "learning_rate": 4.3157310653325344e-07, "loss": 0.9734, "step": 62905 }, { "epoch": 18.82, "grad_norm": 2.5960991382598877, "learning_rate": 4.3048682591744495e-07, "loss": 1.0205, "step": 62910 }, { "epoch": 18.82, "grad_norm": 1.4669679403305054, "learning_rate": 4.294019022503037e-07, "loss": 0.9927, "step": 62915 }, { "epoch": 18.82, "grad_norm": 3.0329182147979736, "learning_rate": 4.283183355917625e-07, "loss": 1.1614, "step": 62920 }, { "epoch": 18.83, "grad_norm": 3.468059778213501, "learning_rate": 4.272361260016566e-07, "loss": 0.7337, "step": 62925 }, { "epoch": 18.83, "grad_norm": 8.143178939819336, "learning_rate": 4.2615527353975783e-07, "loss": 0.833, "step": 62930 }, { "epoch": 18.83, "grad_norm": 1.7140194177627563, "learning_rate": 4.2507577826575995e-07, "loss": 0.9572, "step": 62935 }, { "epoch": 18.83, "grad_norm": 2.0888423919677734, "learning_rate": 4.23997640239282e-07, "loss": 1.0221, "step": 62940 }, { "epoch": 18.83, "grad_norm": 2.820460319519043, "learning_rate": 4.2292085951987084e-07, "loss": 0.8878, "step": 62945 }, { "epoch": 18.83, "grad_norm": 4.351651668548584, "learning_rate": 4.218454361669899e-07, "loss": 0.8856, "step": 62950 }, { "epoch": 18.84, "grad_norm": 1.5819222927093506, "learning_rate": 4.2077137024004455e-07, "loss": 1.04, "step": 62955 }, { "epoch": 18.84, "grad_norm": 3.579989194869995, "learning_rate": 4.1969866179834284e-07, "loss": 1.0235, "step": 62960 }, { "epoch": 18.84, "grad_norm": 2.6302943229675293, "learning_rate": 4.1862731090113736e-07, "loss": 0.9574, "step": 62965 }, { "epoch": 18.84, "grad_norm": 4.829039573669434, "learning_rate": 4.175573176075892e-07, "loss": 1.0367, "step": 62970 }, { "epoch": 18.84, "grad_norm": 3.0657598972320557, "learning_rate": 4.164886819768038e-07, "loss": 0.855, "step": 62975 }, { "epoch": 18.84, "grad_norm": 3.936453342437744, "learning_rate": 4.1542140406779504e-07, "loss": 1.1114, "step": 62980 }, { "epoch": 18.84, "grad_norm": 3.4842371940612793, "learning_rate": 4.1435548393950474e-07, "loss": 0.9725, "step": 62985 }, { "epoch": 18.85, "grad_norm": 2.537079334259033, "learning_rate": 4.1329092165080794e-07, "loss": 0.9701, "step": 62990 }, { "epoch": 18.85, "grad_norm": 2.407586097717285, "learning_rate": 4.122277172604966e-07, "loss": 0.8825, "step": 62995 }, { "epoch": 18.85, "grad_norm": 1.6151124238967896, "learning_rate": 4.111658708272903e-07, "loss": 1.1248, "step": 63000 }, { "epoch": 18.85, "grad_norm": 3.798203706741333, "learning_rate": 4.1010538240983667e-07, "loss": 1.0407, "step": 63005 }, { "epoch": 18.85, "grad_norm": 3.2758753299713135, "learning_rate": 4.090462520666999e-07, "loss": 1.033, "step": 63010 }, { "epoch": 18.85, "grad_norm": 3.0684893131256104, "learning_rate": 4.0798847985637766e-07, "loss": 0.8769, "step": 63015 }, { "epoch": 18.85, "grad_norm": 2.2949230670928955, "learning_rate": 4.0693206583729273e-07, "loss": 1.0484, "step": 63020 }, { "epoch": 18.86, "grad_norm": 3.716773271560669, "learning_rate": 4.0587701006778443e-07, "loss": 1.0033, "step": 63025 }, { "epoch": 18.86, "grad_norm": 1.4224724769592285, "learning_rate": 4.048233126061257e-07, "loss": 0.9085, "step": 63030 }, { "epoch": 18.86, "grad_norm": 1.496864676475525, "learning_rate": 4.037709735105089e-07, "loss": 0.8798, "step": 63035 }, { "epoch": 18.86, "grad_norm": 3.2750120162963867, "learning_rate": 4.0271999283905683e-07, "loss": 0.954, "step": 63040 }, { "epoch": 18.86, "grad_norm": 3.3160552978515625, "learning_rate": 4.0167037064980925e-07, "loss": 0.9068, "step": 63045 }, { "epoch": 18.86, "grad_norm": 4.469260215759277, "learning_rate": 4.006221070007421e-07, "loss": 0.8265, "step": 63050 }, { "epoch": 18.87, "grad_norm": 2.6699278354644775, "learning_rate": 3.9957520194974505e-07, "loss": 1.0278, "step": 63055 }, { "epoch": 18.87, "grad_norm": 2.068089485168457, "learning_rate": 3.9852965555463863e-07, "loss": 0.9476, "step": 63060 }, { "epoch": 18.87, "grad_norm": 1.7394936084747314, "learning_rate": 3.974854678731654e-07, "loss": 0.9202, "step": 63065 }, { "epoch": 18.87, "grad_norm": 5.004978656768799, "learning_rate": 3.964426389630016e-07, "loss": 1.0277, "step": 63070 }, { "epoch": 18.87, "grad_norm": 2.368013620376587, "learning_rate": 3.954011688817344e-07, "loss": 0.9778, "step": 63075 }, { "epoch": 18.87, "grad_norm": 1.3884702920913696, "learning_rate": 3.943610576868845e-07, "loss": 0.8678, "step": 63080 }, { "epoch": 18.87, "grad_norm": 1.4735065698623657, "learning_rate": 3.9332230543589753e-07, "loss": 1.0603, "step": 63085 }, { "epoch": 18.88, "grad_norm": 2.9235053062438965, "learning_rate": 3.922849121861416e-07, "loss": 0.9802, "step": 63090 }, { "epoch": 18.88, "grad_norm": 1.8659436702728271, "learning_rate": 3.912488779949153e-07, "loss": 1.0448, "step": 63095 }, { "epoch": 18.88, "grad_norm": 3.8090038299560547, "learning_rate": 3.902142029194311e-07, "loss": 0.9592, "step": 63100 }, { "epoch": 18.88, "grad_norm": 2.0518362522125244, "learning_rate": 3.891808870168351e-07, "loss": 0.8467, "step": 63105 }, { "epoch": 18.88, "grad_norm": 2.6214382648468018, "learning_rate": 3.8814893034420097e-07, "loss": 0.9282, "step": 63110 }, { "epoch": 18.88, "grad_norm": 4.084232807159424, "learning_rate": 3.8711833295851654e-07, "loss": 0.9546, "step": 63115 }, { "epoch": 18.88, "grad_norm": 1.4593278169631958, "learning_rate": 3.8608909491670287e-07, "loss": 0.803, "step": 63120 }, { "epoch": 18.89, "grad_norm": 3.1608002185821533, "learning_rate": 3.8506121627560345e-07, "loss": 0.9568, "step": 63125 }, { "epoch": 18.89, "grad_norm": 2.8863463401794434, "learning_rate": 3.840346970919867e-07, "loss": 1.0497, "step": 63130 }, { "epoch": 18.89, "grad_norm": 1.954045057296753, "learning_rate": 3.8300953742254895e-07, "loss": 0.9635, "step": 63135 }, { "epoch": 18.89, "grad_norm": 2.1377789974212646, "learning_rate": 3.819857373239033e-07, "loss": 0.7992, "step": 63140 }, { "epoch": 18.89, "grad_norm": 2.493187665939331, "learning_rate": 3.809632968526017e-07, "loss": 1.0234, "step": 63145 }, { "epoch": 18.89, "grad_norm": 2.3102526664733887, "learning_rate": 3.799422160651017e-07, "loss": 0.8951, "step": 63150 }, { "epoch": 18.9, "grad_norm": 2.773439645767212, "learning_rate": 3.7892249501780554e-07, "loss": 0.8641, "step": 63155 }, { "epoch": 18.9, "grad_norm": 2.822577953338623, "learning_rate": 3.779041337670236e-07, "loss": 0.9386, "step": 63160 }, { "epoch": 18.9, "grad_norm": 3.764552116394043, "learning_rate": 3.7688713236900816e-07, "loss": 0.9324, "step": 63165 }, { "epoch": 18.9, "grad_norm": 2.5591769218444824, "learning_rate": 3.758714908799199e-07, "loss": 0.925, "step": 63170 }, { "epoch": 18.9, "grad_norm": 1.3700811862945557, "learning_rate": 3.7485720935585553e-07, "loss": 0.8529, "step": 63175 }, { "epoch": 18.9, "grad_norm": 2.099714517593384, "learning_rate": 3.7384428785282867e-07, "loss": 0.9949, "step": 63180 }, { "epoch": 18.9, "grad_norm": 3.3494229316711426, "learning_rate": 3.7283272642678335e-07, "loss": 0.7835, "step": 63185 }, { "epoch": 18.91, "grad_norm": 2.3654308319091797, "learning_rate": 3.718225251335916e-07, "loss": 0.9557, "step": 63190 }, { "epoch": 18.91, "grad_norm": 3.252896547317505, "learning_rate": 3.7081368402903935e-07, "loss": 1.0649, "step": 63195 }, { "epoch": 18.91, "grad_norm": 2.0601794719696045, "learning_rate": 3.6980620316884876e-07, "loss": 0.9137, "step": 63200 }, { "epoch": 18.91, "grad_norm": 3.0151379108428955, "learning_rate": 3.688000826086585e-07, "loss": 1.0903, "step": 63205 }, { "epoch": 18.91, "grad_norm": 2.6133904457092285, "learning_rate": 3.6779532240403537e-07, "loss": 1.0124, "step": 63210 }, { "epoch": 18.91, "grad_norm": 5.491546154022217, "learning_rate": 3.6679192261047655e-07, "loss": 0.9035, "step": 63215 }, { "epoch": 18.91, "grad_norm": 2.6490890979766846, "learning_rate": 3.6578988328339334e-07, "loss": 1.0075, "step": 63220 }, { "epoch": 18.92, "grad_norm": 2.415147542953491, "learning_rate": 3.647892044781276e-07, "loss": 1.0301, "step": 63225 }, { "epoch": 18.92, "grad_norm": 6.828542709350586, "learning_rate": 3.637898862499489e-07, "loss": 1.1634, "step": 63230 }, { "epoch": 18.92, "grad_norm": 1.5809046030044556, "learning_rate": 3.6279192865404654e-07, "loss": 0.9714, "step": 63235 }, { "epoch": 18.92, "grad_norm": 2.416311264038086, "learning_rate": 3.6179533174553746e-07, "loss": 1.1117, "step": 63240 }, { "epoch": 18.92, "grad_norm": 3.4020678997039795, "learning_rate": 3.6080009557946104e-07, "loss": 0.7682, "step": 63245 }, { "epoch": 18.92, "grad_norm": 1.7761116027832031, "learning_rate": 3.5980622021078716e-07, "loss": 0.959, "step": 63250 }, { "epoch": 18.93, "grad_norm": 1.5695528984069824, "learning_rate": 3.5881370569439964e-07, "loss": 0.9388, "step": 63255 }, { "epoch": 18.93, "grad_norm": 1.8310515880584717, "learning_rate": 3.5782255208512136e-07, "loss": 0.9259, "step": 63260 }, { "epoch": 18.93, "grad_norm": 2.843670129776001, "learning_rate": 3.5683275943768914e-07, "loss": 1.0009, "step": 63265 }, { "epoch": 18.93, "grad_norm": 3.0472795963287354, "learning_rate": 3.5584432780676745e-07, "loss": 0.8153, "step": 63270 }, { "epoch": 18.93, "grad_norm": 2.187629461288452, "learning_rate": 3.548572572469461e-07, "loss": 0.9146, "step": 63275 }, { "epoch": 18.93, "grad_norm": 2.447866201400757, "learning_rate": 3.5387154781274244e-07, "loss": 0.929, "step": 63280 }, { "epoch": 18.93, "grad_norm": 2.634216785430908, "learning_rate": 3.528871995585964e-07, "loss": 0.9642, "step": 63285 }, { "epoch": 18.94, "grad_norm": 1.427452802658081, "learning_rate": 3.5190421253886717e-07, "loss": 1.1127, "step": 63290 }, { "epoch": 18.94, "grad_norm": 2.087641716003418, "learning_rate": 3.5092258680785305e-07, "loss": 1.0386, "step": 63295 }, { "epoch": 18.94, "grad_norm": 1.5409181118011475, "learning_rate": 3.4994232241976065e-07, "loss": 0.9281, "step": 63300 }, { "epoch": 18.94, "grad_norm": 2.0599822998046875, "learning_rate": 3.4896341942873e-07, "loss": 0.9306, "step": 63305 }, { "epoch": 18.94, "grad_norm": 3.1682193279266357, "learning_rate": 3.4798587788882895e-07, "loss": 1.0272, "step": 63310 }, { "epoch": 18.94, "grad_norm": 2.4055306911468506, "learning_rate": 3.4700969785404214e-07, "loss": 0.9128, "step": 63315 }, { "epoch": 18.94, "grad_norm": 2.880706548690796, "learning_rate": 3.460348793782847e-07, "loss": 0.8306, "step": 63320 }, { "epoch": 18.95, "grad_norm": 1.1103742122650146, "learning_rate": 3.450614225153942e-07, "loss": 1.0068, "step": 63325 }, { "epoch": 18.95, "grad_norm": 3.7383155822753906, "learning_rate": 3.440893273191331e-07, "loss": 0.9928, "step": 63330 }, { "epoch": 18.95, "grad_norm": 3.4909322261810303, "learning_rate": 3.431185938431919e-07, "loss": 1.0227, "step": 63335 }, { "epoch": 18.95, "grad_norm": 2.2891860008239746, "learning_rate": 3.421492221411804e-07, "loss": 0.9881, "step": 63340 }, { "epoch": 18.95, "grad_norm": 1.8985334634780884, "learning_rate": 3.4118121226663913e-07, "loss": 0.9726, "step": 63345 }, { "epoch": 18.95, "grad_norm": 2.307614803314209, "learning_rate": 3.402145642730226e-07, "loss": 1.0593, "step": 63350 }, { "epoch": 18.96, "grad_norm": 1.9816555976867676, "learning_rate": 3.3924927821372965e-07, "loss": 0.8479, "step": 63355 }, { "epoch": 18.96, "grad_norm": 3.931323289871216, "learning_rate": 3.3828535414206217e-07, "loss": 0.8215, "step": 63360 }, { "epoch": 18.96, "grad_norm": 2.6754205226898193, "learning_rate": 3.373227921112609e-07, "loss": 1.1578, "step": 63365 }, { "epoch": 18.96, "grad_norm": 3.3269999027252197, "learning_rate": 3.3636159217448617e-07, "loss": 0.9295, "step": 63370 }, { "epoch": 18.96, "grad_norm": 3.251188278198242, "learning_rate": 3.354017543848259e-07, "loss": 0.9049, "step": 63375 }, { "epoch": 18.96, "grad_norm": 1.9303770065307617, "learning_rate": 3.344432787952878e-07, "loss": 0.9681, "step": 63380 }, { "epoch": 18.96, "grad_norm": 3.30509614944458, "learning_rate": 3.3348616545880727e-07, "loss": 0.9311, "step": 63385 }, { "epoch": 18.97, "grad_norm": 3.3238556385040283, "learning_rate": 3.325304144282476e-07, "loss": 0.9782, "step": 63390 }, { "epoch": 18.97, "grad_norm": 1.8988010883331299, "learning_rate": 3.315760257563943e-07, "loss": 0.8005, "step": 63395 }, { "epoch": 18.97, "grad_norm": 2.3984742164611816, "learning_rate": 3.306229994959553e-07, "loss": 0.8219, "step": 63400 }, { "epoch": 18.97, "grad_norm": 1.133448839187622, "learning_rate": 3.2967133569956344e-07, "loss": 1.0835, "step": 63405 }, { "epoch": 18.97, "grad_norm": 2.583820343017578, "learning_rate": 3.287210344197822e-07, "loss": 0.8689, "step": 63410 }, { "epoch": 18.97, "grad_norm": 2.5700812339782715, "learning_rate": 3.2777209570909464e-07, "loss": 0.8236, "step": 63415 }, { "epoch": 18.97, "grad_norm": 3.197221279144287, "learning_rate": 3.2682451961990603e-07, "loss": 1.0251, "step": 63420 }, { "epoch": 18.98, "grad_norm": 2.2375690937042236, "learning_rate": 3.2587830620455507e-07, "loss": 0.8646, "step": 63425 }, { "epoch": 18.98, "grad_norm": 2.5171115398406982, "learning_rate": 3.249334555153e-07, "loss": 1.1793, "step": 63430 }, { "epoch": 18.98, "grad_norm": 2.9206202030181885, "learning_rate": 3.239899676043184e-07, "loss": 1.1318, "step": 63435 }, { "epoch": 18.98, "grad_norm": 2.8934311866760254, "learning_rate": 3.230478425237243e-07, "loss": 1.0891, "step": 63440 }, { "epoch": 18.98, "grad_norm": 2.022392988204956, "learning_rate": 3.221070803255427e-07, "loss": 1.0109, "step": 63445 }, { "epoch": 18.98, "grad_norm": 2.2447142601013184, "learning_rate": 3.2116768106174035e-07, "loss": 1.0619, "step": 63450 }, { "epoch": 18.99, "grad_norm": 3.3541789054870605, "learning_rate": 3.2022964478419235e-07, "loss": 1.1107, "step": 63455 }, { "epoch": 18.99, "grad_norm": 2.8497142791748047, "learning_rate": 3.192929715447102e-07, "loss": 0.9282, "step": 63460 }, { "epoch": 18.99, "grad_norm": 2.122657537460327, "learning_rate": 3.1835766139502174e-07, "loss": 0.87, "step": 63465 }, { "epoch": 18.99, "grad_norm": 2.5856778621673584, "learning_rate": 3.1742371438678586e-07, "loss": 1.0344, "step": 63470 }, { "epoch": 18.99, "grad_norm": 3.2936534881591797, "learning_rate": 3.1649113057158066e-07, "loss": 0.9697, "step": 63475 }, { "epoch": 18.99, "grad_norm": 1.6257001161575317, "learning_rate": 3.1555991000091214e-07, "loss": 0.9946, "step": 63480 }, { "epoch": 18.99, "grad_norm": 3.2520930767059326, "learning_rate": 3.1463005272621416e-07, "loss": 0.8589, "step": 63485 }, { "epoch": 19.0, "grad_norm": 3.0452685356140137, "learning_rate": 3.1370155879883735e-07, "loss": 0.9231, "step": 63490 }, { "epoch": 19.0, "grad_norm": 1.2502514123916626, "learning_rate": 3.1277442827006564e-07, "loss": 0.9966, "step": 63495 }, { "epoch": 19.0, "grad_norm": 3.0072884559631348, "learning_rate": 3.118486611910998e-07, "loss": 0.9674, "step": 63500 }, { "epoch": 19.0, "grad_norm": 1.7312711477279663, "learning_rate": 3.109242576130711e-07, "loss": 0.857, "step": 63505 }, { "epoch": 19.0, "grad_norm": 1.629128336906433, "learning_rate": 3.100012175870304e-07, "loss": 0.9265, "step": 63510 }, { "epoch": 19.0, "grad_norm": 1.9464771747589111, "learning_rate": 3.0907954116396185e-07, "loss": 0.9412, "step": 63515 }, { "epoch": 19.0, "grad_norm": 1.9227700233459473, "learning_rate": 3.0815922839476376e-07, "loss": 0.6606, "step": 63520 }, { "epoch": 19.01, "grad_norm": 3.476877450942993, "learning_rate": 3.0724027933026757e-07, "loss": 1.0086, "step": 63525 }, { "epoch": 19.01, "grad_norm": 3.3601372241973877, "learning_rate": 3.063226940212216e-07, "loss": 0.9579, "step": 63530 }, { "epoch": 19.01, "grad_norm": 2.9032952785491943, "learning_rate": 3.0540647251830755e-07, "loss": 1.0735, "step": 63535 }, { "epoch": 19.01, "grad_norm": 1.6308649778366089, "learning_rate": 3.0449161487212384e-07, "loss": 1.1542, "step": 63540 }, { "epoch": 19.01, "grad_norm": 2.7309157848358154, "learning_rate": 3.035781211331995e-07, "loss": 0.8411, "step": 63545 }, { "epoch": 19.01, "grad_norm": 9.992263793945312, "learning_rate": 3.026659913519858e-07, "loss": 0.9904, "step": 63550 }, { "epoch": 19.01, "grad_norm": 1.4755282402038574, "learning_rate": 3.017552255788564e-07, "loss": 1.1822, "step": 63555 }, { "epoch": 19.02, "grad_norm": 3.669390916824341, "learning_rate": 3.008458238641154e-07, "loss": 1.1826, "step": 63560 }, { "epoch": 19.02, "grad_norm": 2.0326449871063232, "learning_rate": 2.9993778625798383e-07, "loss": 0.9136, "step": 63565 }, { "epoch": 19.02, "grad_norm": 2.4299309253692627, "learning_rate": 2.9903111281061604e-07, "loss": 0.891, "step": 63570 }, { "epoch": 19.02, "grad_norm": 1.997877836227417, "learning_rate": 2.98125803572083e-07, "loss": 0.8897, "step": 63575 }, { "epoch": 19.02, "grad_norm": 2.001340866088867, "learning_rate": 2.972218585923864e-07, "loss": 0.9927, "step": 63580 }, { "epoch": 19.02, "grad_norm": 1.1060036420822144, "learning_rate": 2.963192779214502e-07, "loss": 1.1492, "step": 63585 }, { "epoch": 19.03, "grad_norm": 2.4950671195983887, "learning_rate": 2.9541806160912346e-07, "loss": 0.8886, "step": 63590 }, { "epoch": 19.03, "grad_norm": 1.5963852405548096, "learning_rate": 2.9451820970517466e-07, "loss": 0.9515, "step": 63595 }, { "epoch": 19.03, "grad_norm": 2.513756036758423, "learning_rate": 2.9361972225930845e-07, "loss": 0.9299, "step": 63600 }, { "epoch": 19.03, "grad_norm": 2.3485164642333984, "learning_rate": 2.927225993211408e-07, "loss": 0.7861, "step": 63605 }, { "epoch": 19.03, "grad_norm": 4.739649772644043, "learning_rate": 2.9182684094022363e-07, "loss": 1.0509, "step": 63610 }, { "epoch": 19.03, "grad_norm": 2.6401422023773193, "learning_rate": 2.909324471660285e-07, "loss": 0.9599, "step": 63615 }, { "epoch": 19.03, "grad_norm": 3.8304197788238525, "learning_rate": 2.900394180479521e-07, "loss": 0.9171, "step": 63620 }, { "epoch": 19.04, "grad_norm": 1.489724040031433, "learning_rate": 2.891477536353104e-07, "loss": 1.1215, "step": 63625 }, { "epoch": 19.04, "grad_norm": 3.6274325847625732, "learning_rate": 2.8825745397735584e-07, "loss": 0.8733, "step": 63630 }, { "epoch": 19.04, "grad_norm": 2.480381965637207, "learning_rate": 2.873685191232517e-07, "loss": 0.9707, "step": 63635 }, { "epoch": 19.04, "grad_norm": 3.279183864593506, "learning_rate": 2.8648094912210044e-07, "loss": 0.9859, "step": 63640 }, { "epoch": 19.04, "grad_norm": 6.223374843597412, "learning_rate": 2.8559474402291564e-07, "loss": 0.9295, "step": 63645 }, { "epoch": 19.04, "grad_norm": 2.4016520977020264, "learning_rate": 2.8470990387464424e-07, "loss": 0.8226, "step": 63650 }, { "epoch": 19.04, "grad_norm": 4.2338666915893555, "learning_rate": 2.8382642872615826e-07, "loss": 0.955, "step": 63655 }, { "epoch": 19.05, "grad_norm": 3.539830446243286, "learning_rate": 2.829443186262437e-07, "loss": 1.1181, "step": 63660 }, { "epoch": 19.05, "grad_norm": 3.2982068061828613, "learning_rate": 2.8206357362362543e-07, "loss": 1.0266, "step": 63665 }, { "epoch": 19.05, "grad_norm": 1.85117769241333, "learning_rate": 2.811841937669396e-07, "loss": 1.1086, "step": 63670 }, { "epoch": 19.05, "grad_norm": 4.956904888153076, "learning_rate": 2.8030617910475845e-07, "loss": 0.9842, "step": 63675 }, { "epoch": 19.05, "grad_norm": 2.6763927936553955, "learning_rate": 2.794295296855709e-07, "loss": 1.1752, "step": 63680 }, { "epoch": 19.05, "grad_norm": 2.8290627002716064, "learning_rate": 2.785542455577994e-07, "loss": 0.9809, "step": 63685 }, { "epoch": 19.06, "grad_norm": 2.090080499649048, "learning_rate": 2.776803267697775e-07, "loss": 1.065, "step": 63690 }, { "epoch": 19.06, "grad_norm": 1.995445728302002, "learning_rate": 2.768077733697749e-07, "loss": 0.9295, "step": 63695 }, { "epoch": 19.06, "grad_norm": 3.277020215988159, "learning_rate": 2.759365854059781e-07, "loss": 0.9197, "step": 63700 }, { "epoch": 19.06, "grad_norm": 2.4329395294189453, "learning_rate": 2.750667629265069e-07, "loss": 1.0294, "step": 63705 }, { "epoch": 19.06, "grad_norm": 3.564478874206543, "learning_rate": 2.741983059793979e-07, "loss": 1.0323, "step": 63710 }, { "epoch": 19.06, "grad_norm": 1.9448779821395874, "learning_rate": 2.7333121461261545e-07, "loss": 0.8142, "step": 63715 }, { "epoch": 19.06, "grad_norm": 2.6165874004364014, "learning_rate": 2.724654888740463e-07, "loss": 0.8353, "step": 63720 }, { "epoch": 19.07, "grad_norm": 3.1223433017730713, "learning_rate": 2.716011288115078e-07, "loss": 0.9984, "step": 63725 }, { "epoch": 19.07, "grad_norm": 3.356553554534912, "learning_rate": 2.7073813447273386e-07, "loss": 0.9196, "step": 63730 }, { "epoch": 19.07, "grad_norm": 1.8804484605789185, "learning_rate": 2.698765059053865e-07, "loss": 0.9712, "step": 63735 }, { "epoch": 19.07, "grad_norm": 2.733215570449829, "learning_rate": 2.690162431570553e-07, "loss": 0.9342, "step": 63740 }, { "epoch": 19.07, "grad_norm": 2.573293685913086, "learning_rate": 2.6815734627525235e-07, "loss": 0.8668, "step": 63745 }, { "epoch": 19.07, "grad_norm": 2.0941619873046875, "learning_rate": 2.672998153074119e-07, "loss": 0.8658, "step": 63750 }, { "epoch": 19.07, "grad_norm": 4.143740653991699, "learning_rate": 2.6644365030089046e-07, "loss": 0.9723, "step": 63755 }, { "epoch": 19.08, "grad_norm": 2.708742141723633, "learning_rate": 2.6558885130298363e-07, "loss": 0.8001, "step": 63760 }, { "epoch": 19.08, "grad_norm": 1.1027653217315674, "learning_rate": 2.647354183608869e-07, "loss": 0.8632, "step": 63765 }, { "epoch": 19.08, "grad_norm": 5.099153995513916, "learning_rate": 2.638833515217487e-07, "loss": 0.9992, "step": 63770 }, { "epoch": 19.08, "grad_norm": 2.2473297119140625, "learning_rate": 2.6303265083261753e-07, "loss": 1.179, "step": 63775 }, { "epoch": 19.08, "grad_norm": 2.4921000003814697, "learning_rate": 2.621833163404835e-07, "loss": 0.8985, "step": 63780 }, { "epoch": 19.08, "grad_norm": 2.435410261154175, "learning_rate": 2.6133534809224813e-07, "loss": 1.155, "step": 63785 }, { "epoch": 19.09, "grad_norm": 1.8930751085281372, "learning_rate": 2.6048874613474884e-07, "loss": 0.9032, "step": 63790 }, { "epoch": 19.09, "grad_norm": 2.620929718017578, "learning_rate": 2.596435105147399e-07, "loss": 1.0091, "step": 63795 }, { "epoch": 19.09, "grad_norm": 3.0546348094940186, "learning_rate": 2.587996412789034e-07, "loss": 0.9915, "step": 63800 }, { "epoch": 19.09, "grad_norm": 2.1018009185791016, "learning_rate": 2.579571384738466e-07, "loss": 0.9352, "step": 63805 }, { "epoch": 19.09, "grad_norm": 3.1756997108459473, "learning_rate": 2.5711600214609885e-07, "loss": 1.0105, "step": 63810 }, { "epoch": 19.09, "grad_norm": 4.981276512145996, "learning_rate": 2.562762323421147e-07, "loss": 1.0216, "step": 63815 }, { "epoch": 19.09, "grad_norm": 3.7036938667297363, "learning_rate": 2.554378291082765e-07, "loss": 1.0003, "step": 63820 }, { "epoch": 19.1, "grad_norm": 2.4953665733337402, "learning_rate": 2.5460079249088606e-07, "loss": 0.7268, "step": 63825 }, { "epoch": 19.1, "grad_norm": 2.5604419708251953, "learning_rate": 2.5376512253617034e-07, "loss": 1.0928, "step": 63830 }, { "epoch": 19.1, "grad_norm": 3.0103671550750732, "learning_rate": 2.5293081929028686e-07, "loss": 1.0442, "step": 63835 }, { "epoch": 19.1, "grad_norm": 4.777401447296143, "learning_rate": 2.5209788279930977e-07, "loss": 0.9435, "step": 63840 }, { "epoch": 19.1, "grad_norm": 2.0667479038238525, "learning_rate": 2.512663131092441e-07, "loss": 1.15, "step": 63845 }, { "epoch": 19.1, "grad_norm": 1.9874635934829712, "learning_rate": 2.504361102660141e-07, "loss": 0.983, "step": 63850 }, { "epoch": 19.1, "grad_norm": 4.316436290740967, "learning_rate": 2.4960727431547206e-07, "loss": 0.9662, "step": 63855 }, { "epoch": 19.11, "grad_norm": 2.294708013534546, "learning_rate": 2.487798053033924e-07, "loss": 0.7309, "step": 63860 }, { "epoch": 19.11, "grad_norm": 3.665914535522461, "learning_rate": 2.479537032754803e-07, "loss": 0.9579, "step": 63865 }, { "epoch": 19.11, "grad_norm": 2.2551567554473877, "learning_rate": 2.4712896827735197e-07, "loss": 1.0002, "step": 63870 }, { "epoch": 19.11, "grad_norm": 3.8116965293884277, "learning_rate": 2.463056003545655e-07, "loss": 0.8443, "step": 63875 }, { "epoch": 19.11, "grad_norm": 2.817349433898926, "learning_rate": 2.4548359955259003e-07, "loss": 1.0352, "step": 63880 }, { "epoch": 19.11, "grad_norm": 1.7628093957901, "learning_rate": 2.4466296591682256e-07, "loss": 0.9593, "step": 63885 }, { "epoch": 19.12, "grad_norm": 4.2066755294799805, "learning_rate": 2.43843699492588e-07, "loss": 1.024, "step": 63890 }, { "epoch": 19.12, "grad_norm": 2.6177399158477783, "learning_rate": 2.430258003251334e-07, "loss": 1.1829, "step": 63895 }, { "epoch": 19.12, "grad_norm": 4.167474746704102, "learning_rate": 2.422092684596311e-07, "loss": 0.765, "step": 63900 }, { "epoch": 19.12, "grad_norm": 2.174982786178589, "learning_rate": 2.4139410394117825e-07, "loss": 0.7503, "step": 63905 }, { "epoch": 19.12, "grad_norm": 4.269941329956055, "learning_rate": 2.405803068147916e-07, "loss": 0.8784, "step": 63910 }, { "epoch": 19.12, "grad_norm": 1.9396120309829712, "learning_rate": 2.397678771254186e-07, "loss": 1.0015, "step": 63915 }, { "epoch": 19.12, "grad_norm": 5.77046537399292, "learning_rate": 2.3895681491792886e-07, "loss": 1.1133, "step": 63920 }, { "epoch": 19.13, "grad_norm": 2.1925339698791504, "learning_rate": 2.3814712023711995e-07, "loss": 1.1291, "step": 63925 }, { "epoch": 19.13, "grad_norm": 1.8961554765701294, "learning_rate": 2.3733879312770324e-07, "loss": 0.8816, "step": 63930 }, { "epoch": 19.13, "grad_norm": 1.7458471059799194, "learning_rate": 2.365318336343264e-07, "loss": 1.0096, "step": 63935 }, { "epoch": 19.13, "grad_norm": 1.646514892578125, "learning_rate": 2.3572624180155655e-07, "loss": 1.1617, "step": 63940 }, { "epoch": 19.13, "grad_norm": 2.5368010997772217, "learning_rate": 2.3492201767388589e-07, "loss": 0.8202, "step": 63945 }, { "epoch": 19.13, "grad_norm": 1.8663653135299683, "learning_rate": 2.3411916129573164e-07, "loss": 0.961, "step": 63950 }, { "epoch": 19.13, "grad_norm": 3.437864065170288, "learning_rate": 2.3331767271143057e-07, "loss": 1.0659, "step": 63955 }, { "epoch": 19.14, "grad_norm": 2.4234211444854736, "learning_rate": 2.325175519652528e-07, "loss": 0.881, "step": 63960 }, { "epoch": 19.14, "grad_norm": 2.6147170066833496, "learning_rate": 2.317187991013825e-07, "loss": 0.8791, "step": 63965 }, { "epoch": 19.14, "grad_norm": 3.2492587566375732, "learning_rate": 2.3092141416394264e-07, "loss": 0.9265, "step": 63970 }, { "epoch": 19.14, "grad_norm": 2.781677722930908, "learning_rate": 2.301253971969647e-07, "loss": 0.8303, "step": 63975 }, { "epoch": 19.14, "grad_norm": 2.981367826461792, "learning_rate": 2.2933074824441624e-07, "loss": 0.8374, "step": 63980 }, { "epoch": 19.14, "grad_norm": 2.767667055130005, "learning_rate": 2.2853746735017888e-07, "loss": 1.0309, "step": 63985 }, { "epoch": 19.15, "grad_norm": 2.9164721965789795, "learning_rate": 2.2774555455807311e-07, "loss": 0.9418, "step": 63990 }, { "epoch": 19.15, "grad_norm": 2.0985426902770996, "learning_rate": 2.2695500991182783e-07, "loss": 1.0022, "step": 63995 }, { "epoch": 19.15, "grad_norm": 2.751217842102051, "learning_rate": 2.2616583345510812e-07, "loss": 0.7166, "step": 64000 }, { "epoch": 19.15, "grad_norm": 2.930095911026001, "learning_rate": 2.2537802523149853e-07, "loss": 0.921, "step": 64005 }, { "epoch": 19.15, "grad_norm": 3.8547110557556152, "learning_rate": 2.2459158528450874e-07, "loss": 0.9999, "step": 64010 }, { "epoch": 19.15, "grad_norm": 1.5911672115325928, "learning_rate": 2.2380651365757343e-07, "loss": 1.0612, "step": 64015 }, { "epoch": 19.15, "grad_norm": 4.613253593444824, "learning_rate": 2.2302281039405238e-07, "loss": 1.0091, "step": 64020 }, { "epoch": 19.16, "grad_norm": 2.566091299057007, "learning_rate": 2.2224047553722484e-07, "loss": 1.0461, "step": 64025 }, { "epoch": 19.16, "grad_norm": 2.502011299133301, "learning_rate": 2.2145950913030066e-07, "loss": 1.0877, "step": 64030 }, { "epoch": 19.16, "grad_norm": 3.6345396041870117, "learning_rate": 2.2067991121641484e-07, "loss": 0.9155, "step": 64035 }, { "epoch": 19.16, "grad_norm": 1.3162267208099365, "learning_rate": 2.1990168183861904e-07, "loss": 0.8879, "step": 64040 }, { "epoch": 19.16, "grad_norm": 2.207672595977783, "learning_rate": 2.191248210398955e-07, "loss": 0.9934, "step": 64045 }, { "epoch": 19.16, "grad_norm": 3.507061719894409, "learning_rate": 2.1834932886314885e-07, "loss": 0.9936, "step": 64050 }, { "epoch": 19.16, "grad_norm": 3.270716428756714, "learning_rate": 2.1757520535121423e-07, "loss": 0.9819, "step": 64055 }, { "epoch": 19.17, "grad_norm": 2.573598623275757, "learning_rate": 2.1680245054683524e-07, "loss": 1.0214, "step": 64060 }, { "epoch": 19.17, "grad_norm": 3.2568507194519043, "learning_rate": 2.1603106449269993e-07, "loss": 0.9246, "step": 64065 }, { "epoch": 19.17, "grad_norm": 1.5595399141311646, "learning_rate": 2.1526104723140484e-07, "loss": 0.9696, "step": 64070 }, { "epoch": 19.17, "grad_norm": 3.1589651107788086, "learning_rate": 2.1449239880548254e-07, "loss": 0.9985, "step": 64075 }, { "epoch": 19.17, "grad_norm": 2.8172848224639893, "learning_rate": 2.137251192573797e-07, "loss": 0.7963, "step": 64080 }, { "epoch": 19.17, "grad_norm": 2.691014528274536, "learning_rate": 2.1295920862947628e-07, "loss": 0.9184, "step": 64085 }, { "epoch": 19.17, "grad_norm": 3.006046772003174, "learning_rate": 2.1219466696407176e-07, "loss": 0.8609, "step": 64090 }, { "epoch": 19.18, "grad_norm": 2.1390583515167236, "learning_rate": 2.1143149430338793e-07, "loss": 1.1105, "step": 64095 }, { "epoch": 19.18, "grad_norm": 1.9489213228225708, "learning_rate": 2.106696906895772e-07, "loss": 0.9752, "step": 64100 }, { "epoch": 19.18, "grad_norm": 1.616949439048767, "learning_rate": 2.0990925616471424e-07, "loss": 0.9205, "step": 64105 }, { "epoch": 19.18, "grad_norm": 1.416013240814209, "learning_rate": 2.0915019077079322e-07, "loss": 0.9529, "step": 64110 }, { "epoch": 19.18, "grad_norm": 1.7403104305267334, "learning_rate": 2.0839249454973896e-07, "loss": 0.9774, "step": 64115 }, { "epoch": 19.18, "grad_norm": 3.7459676265716553, "learning_rate": 2.0763616754339577e-07, "loss": 0.9201, "step": 64120 }, { "epoch": 19.19, "grad_norm": 3.3942503929138184, "learning_rate": 2.0688120979353853e-07, "loss": 1.0433, "step": 64125 }, { "epoch": 19.19, "grad_norm": 2.123073101043701, "learning_rate": 2.061276213418617e-07, "loss": 0.8314, "step": 64130 }, { "epoch": 19.19, "grad_norm": 1.3835378885269165, "learning_rate": 2.0537540222998474e-07, "loss": 0.7707, "step": 64135 }, { "epoch": 19.19, "grad_norm": 3.362372398376465, "learning_rate": 2.0462455249945222e-07, "loss": 0.9923, "step": 64140 }, { "epoch": 19.19, "grad_norm": 3.691894769668579, "learning_rate": 2.0387507219173098e-07, "loss": 0.9268, "step": 64145 }, { "epoch": 19.19, "grad_norm": 3.2239859104156494, "learning_rate": 2.0312696134821562e-07, "loss": 1.0184, "step": 64150 }, { "epoch": 19.19, "grad_norm": 3.0610954761505127, "learning_rate": 2.0238022001022315e-07, "loss": 0.8632, "step": 64155 }, { "epoch": 19.2, "grad_norm": 1.395918846130371, "learning_rate": 2.0163484821899557e-07, "loss": 0.8581, "step": 64160 }, { "epoch": 19.2, "grad_norm": 4.282837390899658, "learning_rate": 2.0089084601569718e-07, "loss": 0.931, "step": 64165 }, { "epoch": 19.2, "grad_norm": 1.898026704788208, "learning_rate": 2.0014821344142286e-07, "loss": 0.866, "step": 64170 }, { "epoch": 19.2, "grad_norm": 2.9928948879241943, "learning_rate": 1.9940695053718428e-07, "loss": 1.0112, "step": 64175 }, { "epoch": 19.2, "grad_norm": 2.1379966735839844, "learning_rate": 1.9866705734392088e-07, "loss": 0.868, "step": 64180 }, { "epoch": 19.2, "grad_norm": 3.2108120918273926, "learning_rate": 1.9792853390249444e-07, "loss": 0.8312, "step": 64185 }, { "epoch": 19.2, "grad_norm": 6.297685146331787, "learning_rate": 1.971913802536973e-07, "loss": 0.9006, "step": 64190 }, { "epoch": 19.21, "grad_norm": 3.2357826232910156, "learning_rate": 1.9645559643823863e-07, "loss": 0.9766, "step": 64195 }, { "epoch": 19.21, "grad_norm": 2.2781829833984375, "learning_rate": 1.9572118249675532e-07, "loss": 1.1408, "step": 64200 }, { "epoch": 19.21, "grad_norm": 3.3626670837402344, "learning_rate": 1.9498813846980658e-07, "loss": 0.9252, "step": 64205 }, { "epoch": 19.21, "grad_norm": 6.3693108558654785, "learning_rate": 1.9425646439788224e-07, "loss": 1.0737, "step": 64210 }, { "epoch": 19.21, "grad_norm": 3.2453861236572266, "learning_rate": 1.9352616032138614e-07, "loss": 0.8096, "step": 64215 }, { "epoch": 19.21, "grad_norm": 2.069312572479248, "learning_rate": 1.9279722628065823e-07, "loss": 0.8934, "step": 64220 }, { "epoch": 19.22, "grad_norm": 3.1376097202301025, "learning_rate": 1.9206966231595236e-07, "loss": 0.9529, "step": 64225 }, { "epoch": 19.22, "grad_norm": 1.8707741498947144, "learning_rate": 1.913434684674531e-07, "loss": 1.1012, "step": 64230 }, { "epoch": 19.22, "grad_norm": 4.028426170349121, "learning_rate": 1.9061864477527004e-07, "loss": 0.9865, "step": 64235 }, { "epoch": 19.22, "grad_norm": 2.0778110027313232, "learning_rate": 1.8989519127942667e-07, "loss": 0.8687, "step": 64240 }, { "epoch": 19.22, "grad_norm": 2.294110059738159, "learning_rate": 1.8917310801988552e-07, "loss": 1.0524, "step": 64245 }, { "epoch": 19.22, "grad_norm": 2.6258926391601562, "learning_rate": 1.88452395036523e-07, "loss": 0.7745, "step": 64250 }, { "epoch": 19.22, "grad_norm": 2.8713653087615967, "learning_rate": 1.8773305236914618e-07, "loss": 1.0878, "step": 64255 }, { "epoch": 19.23, "grad_norm": 2.246710777282715, "learning_rate": 1.8701508005747882e-07, "loss": 1.0726, "step": 64260 }, { "epoch": 19.23, "grad_norm": 3.4345037937164307, "learning_rate": 1.8629847814118084e-07, "loss": 0.826, "step": 64265 }, { "epoch": 19.23, "grad_norm": 3.921339273452759, "learning_rate": 1.8558324665982341e-07, "loss": 0.9873, "step": 64270 }, { "epoch": 19.23, "grad_norm": 3.5189566612243652, "learning_rate": 1.8486938565290822e-07, "loss": 1.0737, "step": 64275 }, { "epoch": 19.23, "grad_norm": 3.8996822834014893, "learning_rate": 1.8415689515986488e-07, "loss": 0.9291, "step": 64280 }, { "epoch": 19.23, "grad_norm": 1.6882737874984741, "learning_rate": 1.8344577522004248e-07, "loss": 0.8509, "step": 64285 }, { "epoch": 19.23, "grad_norm": 1.2842025756835938, "learning_rate": 1.827360258727151e-07, "loss": 0.9767, "step": 64290 }, { "epoch": 19.24, "grad_norm": 3.280601739883423, "learning_rate": 1.820276471570792e-07, "loss": 0.843, "step": 64295 }, { "epoch": 19.24, "grad_norm": 1.3017467260360718, "learning_rate": 1.8132063911225905e-07, "loss": 1.0419, "step": 64300 }, { "epoch": 19.24, "grad_norm": 2.4863226413726807, "learning_rate": 1.8061500177730396e-07, "loss": 0.8428, "step": 64305 }, { "epoch": 19.24, "grad_norm": 3.148401975631714, "learning_rate": 1.7991073519118274e-07, "loss": 0.9154, "step": 64310 }, { "epoch": 19.24, "grad_norm": 3.4491004943847656, "learning_rate": 1.7920783939279207e-07, "loss": 1.0461, "step": 64315 }, { "epoch": 19.24, "grad_norm": 1.5797641277313232, "learning_rate": 1.7850631442095367e-07, "loss": 1.1385, "step": 64320 }, { "epoch": 19.25, "grad_norm": 2.7437055110931396, "learning_rate": 1.7780616031441156e-07, "loss": 1.0647, "step": 64325 }, { "epoch": 19.25, "grad_norm": 3.9407639503479004, "learning_rate": 1.7710737711183478e-07, "loss": 0.6554, "step": 64330 }, { "epoch": 19.25, "grad_norm": 1.4198654890060425, "learning_rate": 1.7640996485181472e-07, "loss": 1.0993, "step": 64335 }, { "epoch": 19.25, "grad_norm": 1.583427906036377, "learning_rate": 1.7571392357287053e-07, "loss": 1.1437, "step": 64340 }, { "epoch": 19.25, "grad_norm": 6.862940311431885, "learning_rate": 1.7501925331343817e-07, "loss": 0.953, "step": 64345 }, { "epoch": 19.25, "grad_norm": 4.871342182159424, "learning_rate": 1.7432595411189524e-07, "loss": 0.8867, "step": 64350 }, { "epoch": 19.25, "grad_norm": 2.5354645252227783, "learning_rate": 1.7363402600651945e-07, "loss": 1.0432, "step": 64355 }, { "epoch": 19.26, "grad_norm": 2.395785331726074, "learning_rate": 1.7294346903553305e-07, "loss": 0.8861, "step": 64360 }, { "epoch": 19.26, "grad_norm": 2.0268208980560303, "learning_rate": 1.7225428323707494e-07, "loss": 0.8826, "step": 64365 }, { "epoch": 19.26, "grad_norm": 1.8643012046813965, "learning_rate": 1.7156646864920077e-07, "loss": 1.0994, "step": 64370 }, { "epoch": 19.26, "grad_norm": 1.6344417333602905, "learning_rate": 1.708800253099052e-07, "loss": 0.9345, "step": 64375 }, { "epoch": 19.26, "grad_norm": 2.7925565242767334, "learning_rate": 1.7019495325709677e-07, "loss": 1.0531, "step": 64380 }, { "epoch": 19.26, "grad_norm": 3.3944101333618164, "learning_rate": 1.6951125252861466e-07, "loss": 0.9223, "step": 64385 }, { "epoch": 19.26, "grad_norm": 3.003763437271118, "learning_rate": 1.68828923162212e-07, "loss": 0.8319, "step": 64390 }, { "epoch": 19.27, "grad_norm": 2.5167884826660156, "learning_rate": 1.6814796519558084e-07, "loss": 1.1937, "step": 64395 }, { "epoch": 19.27, "grad_norm": 1.8801358938217163, "learning_rate": 1.6746837866632725e-07, "loss": 1.0366, "step": 64400 }, { "epoch": 19.27, "grad_norm": 2.1538734436035156, "learning_rate": 1.6679016361197951e-07, "loss": 1.0713, "step": 64405 }, { "epoch": 19.27, "grad_norm": 3.144021987915039, "learning_rate": 1.6611332006999935e-07, "loss": 0.9265, "step": 64410 }, { "epoch": 19.27, "grad_norm": 2.700958490371704, "learning_rate": 1.6543784807776795e-07, "loss": 0.8548, "step": 64415 }, { "epoch": 19.27, "grad_norm": 2.5718274116516113, "learning_rate": 1.6476374767258883e-07, "loss": 0.9646, "step": 64420 }, { "epoch": 19.28, "grad_norm": 3.631052255630493, "learning_rate": 1.640910188916961e-07, "loss": 1.0448, "step": 64425 }, { "epoch": 19.28, "grad_norm": 2.219627618789673, "learning_rate": 1.6341966177223777e-07, "loss": 1.1337, "step": 64430 }, { "epoch": 19.28, "grad_norm": 1.4927635192871094, "learning_rate": 1.6274967635129811e-07, "loss": 0.9727, "step": 64435 }, { "epoch": 19.28, "grad_norm": 3.119546890258789, "learning_rate": 1.6208106266587253e-07, "loss": 0.9235, "step": 64440 }, { "epoch": 19.28, "grad_norm": 1.84986412525177, "learning_rate": 1.6141382075289813e-07, "loss": 0.8477, "step": 64445 }, { "epoch": 19.28, "grad_norm": 2.341019868850708, "learning_rate": 1.607479506492149e-07, "loss": 1.1153, "step": 64450 }, { "epoch": 19.28, "grad_norm": 2.070584535598755, "learning_rate": 1.600834523916045e-07, "loss": 0.8931, "step": 64455 }, { "epoch": 19.29, "grad_norm": 4.45391845703125, "learning_rate": 1.5942032601676815e-07, "loss": 0.9916, "step": 64460 }, { "epoch": 19.29, "grad_norm": 3.5736074447631836, "learning_rate": 1.587585715613238e-07, "loss": 1.1195, "step": 64465 }, { "epoch": 19.29, "grad_norm": 2.6194748878479004, "learning_rate": 1.580981890618255e-07, "loss": 1.0239, "step": 64470 }, { "epoch": 19.29, "grad_norm": 2.58370304107666, "learning_rate": 1.5743917855473855e-07, "loss": 1.0816, "step": 64475 }, { "epoch": 19.29, "grad_norm": 2.7932465076446533, "learning_rate": 1.5678154007646718e-07, "loss": 1.0808, "step": 64480 }, { "epoch": 19.29, "grad_norm": 5.6625471115112305, "learning_rate": 1.5612527366332674e-07, "loss": 0.9235, "step": 64485 }, { "epoch": 19.29, "grad_norm": 3.3300294876098633, "learning_rate": 1.5547037935156327e-07, "loss": 0.8833, "step": 64490 }, { "epoch": 19.3, "grad_norm": 2.539818525314331, "learning_rate": 1.5481685717734783e-07, "loss": 0.9428, "step": 64495 }, { "epoch": 19.3, "grad_norm": 3.0235023498535156, "learning_rate": 1.54164707176771e-07, "loss": 1.1307, "step": 64500 }, { "epoch": 19.3, "grad_norm": 2.7196106910705566, "learning_rate": 1.5351392938585118e-07, "loss": 0.946, "step": 64505 }, { "epoch": 19.3, "grad_norm": 2.6924359798431396, "learning_rate": 1.5286452384053184e-07, "loss": 0.7011, "step": 64510 }, { "epoch": 19.3, "grad_norm": 1.7030616998672485, "learning_rate": 1.52216490576676e-07, "loss": 1.0243, "step": 64515 }, { "epoch": 19.3, "grad_norm": 2.473074197769165, "learning_rate": 1.5156982963007715e-07, "loss": 1.0579, "step": 64520 }, { "epoch": 19.31, "grad_norm": 1.5015933513641357, "learning_rate": 1.5092454103644572e-07, "loss": 0.9168, "step": 64525 }, { "epoch": 19.31, "grad_norm": 2.3585140705108643, "learning_rate": 1.5028062483142537e-07, "loss": 0.8898, "step": 64530 }, { "epoch": 19.31, "grad_norm": 2.2519643306732178, "learning_rate": 1.4963808105057653e-07, "loss": 0.9104, "step": 64535 }, { "epoch": 19.31, "grad_norm": 3.287801504135132, "learning_rate": 1.4899690972938474e-07, "loss": 1.0621, "step": 64540 }, { "epoch": 19.31, "grad_norm": 2.565260648727417, "learning_rate": 1.4835711090326054e-07, "loss": 0.8879, "step": 64545 }, { "epoch": 19.31, "grad_norm": 1.9417716264724731, "learning_rate": 1.4771868460754234e-07, "loss": 0.9862, "step": 64550 }, { "epoch": 19.31, "grad_norm": 1.4697206020355225, "learning_rate": 1.4708163087749082e-07, "loss": 0.9105, "step": 64555 }, { "epoch": 19.32, "grad_norm": 6.190129280090332, "learning_rate": 1.4644594974828618e-07, "loss": 0.9255, "step": 64560 }, { "epoch": 19.32, "grad_norm": 2.575528383255005, "learning_rate": 1.4581164125503922e-07, "loss": 1.0289, "step": 64565 }, { "epoch": 19.32, "grad_norm": 2.064091205596924, "learning_rate": 1.4517870543277745e-07, "loss": 1.0171, "step": 64570 }, { "epoch": 19.32, "grad_norm": 1.4593415260314941, "learning_rate": 1.445471423164646e-07, "loss": 0.8089, "step": 64575 }, { "epoch": 19.32, "grad_norm": 4.081628799438477, "learning_rate": 1.4391695194097553e-07, "loss": 0.9634, "step": 64580 }, { "epoch": 19.32, "grad_norm": 1.9155333042144775, "learning_rate": 1.4328813434111577e-07, "loss": 0.8419, "step": 64585 }, { "epoch": 19.32, "grad_norm": 1.9294395446777344, "learning_rate": 1.426606895516186e-07, "loss": 0.8925, "step": 64590 }, { "epoch": 19.33, "grad_norm": 4.205466270446777, "learning_rate": 1.4203461760713133e-07, "loss": 0.9417, "step": 64595 }, { "epoch": 19.33, "grad_norm": 2.388813018798828, "learning_rate": 1.4140991854223183e-07, "loss": 1.0479, "step": 64600 }, { "epoch": 19.33, "grad_norm": 2.137241840362549, "learning_rate": 1.407865923914259e-07, "loss": 1.0903, "step": 64605 }, { "epoch": 19.33, "grad_norm": 3.590604543685913, "learning_rate": 1.4016463918913592e-07, "loss": 0.8402, "step": 64610 }, { "epoch": 19.33, "grad_norm": 3.1465625762939453, "learning_rate": 1.3954405896971224e-07, "loss": 1.1066, "step": 64615 }, { "epoch": 19.33, "grad_norm": 4.3162922859191895, "learning_rate": 1.389248517674302e-07, "loss": 1.0114, "step": 64620 }, { "epoch": 19.34, "grad_norm": 2.18872332572937, "learning_rate": 1.3830701761648744e-07, "loss": 0.954, "step": 64625 }, { "epoch": 19.34, "grad_norm": 3.4795877933502197, "learning_rate": 1.3769055655100395e-07, "loss": 0.7991, "step": 64630 }, { "epoch": 19.34, "grad_norm": 3.6978468894958496, "learning_rate": 1.3707546860503019e-07, "loss": 0.9708, "step": 64635 }, { "epoch": 19.34, "grad_norm": 3.991750478744507, "learning_rate": 1.3646175381253345e-07, "loss": 0.9624, "step": 64640 }, { "epoch": 19.34, "grad_norm": 3.2379486560821533, "learning_rate": 1.3584941220740888e-07, "loss": 1.0254, "step": 64645 }, { "epoch": 19.34, "grad_norm": 3.404052257537842, "learning_rate": 1.352384438234794e-07, "loss": 0.8464, "step": 64650 }, { "epoch": 19.34, "grad_norm": 2.1188278198242188, "learning_rate": 1.346288486944819e-07, "loss": 0.8731, "step": 64655 }, { "epoch": 19.35, "grad_norm": 5.067503452301025, "learning_rate": 1.3402062685408945e-07, "loss": 0.8417, "step": 64660 }, { "epoch": 19.35, "grad_norm": 3.299839973449707, "learning_rate": 1.3341377833588908e-07, "loss": 1.0166, "step": 64665 }, { "epoch": 19.35, "grad_norm": 3.843200922012329, "learning_rate": 1.3280830317340122e-07, "loss": 0.9164, "step": 64670 }, { "epoch": 19.35, "grad_norm": 3.3282971382141113, "learning_rate": 1.3220420140006017e-07, "loss": 1.0903, "step": 64675 }, { "epoch": 19.35, "grad_norm": 3.1337714195251465, "learning_rate": 1.3160147304923375e-07, "loss": 0.953, "step": 64680 }, { "epoch": 19.35, "grad_norm": 4.363522529602051, "learning_rate": 1.3100011815420642e-07, "loss": 0.7601, "step": 64685 }, { "epoch": 19.35, "grad_norm": 3.619309902191162, "learning_rate": 1.3040013674819605e-07, "loss": 0.8317, "step": 64690 }, { "epoch": 19.36, "grad_norm": 4.793463230133057, "learning_rate": 1.298015288643345e-07, "loss": 1.1246, "step": 64695 }, { "epoch": 19.36, "grad_norm": 4.220412731170654, "learning_rate": 1.292042945356814e-07, "loss": 1.2013, "step": 64700 }, { "epoch": 19.36, "grad_norm": 1.8279774188995361, "learning_rate": 1.2860843379522425e-07, "loss": 0.9528, "step": 64705 }, { "epoch": 19.36, "grad_norm": 3.403494119644165, "learning_rate": 1.2801394667587285e-07, "loss": 0.9246, "step": 64710 }, { "epoch": 19.36, "grad_norm": 3.680722713470459, "learning_rate": 1.274208332104565e-07, "loss": 0.9582, "step": 64715 }, { "epoch": 19.36, "grad_norm": 1.425324559211731, "learning_rate": 1.268290934317351e-07, "loss": 1.0058, "step": 64720 }, { "epoch": 19.36, "grad_norm": 2.55713152885437, "learning_rate": 1.262387273723853e-07, "loss": 1.0049, "step": 64725 }, { "epoch": 19.37, "grad_norm": 3.1014976501464844, "learning_rate": 1.2564973506501987e-07, "loss": 0.9631, "step": 64730 }, { "epoch": 19.37, "grad_norm": 2.44319224357605, "learning_rate": 1.2506211654216282e-07, "loss": 0.9343, "step": 64735 }, { "epoch": 19.37, "grad_norm": 1.407696008682251, "learning_rate": 1.2447587183626874e-07, "loss": 0.9746, "step": 64740 }, { "epoch": 19.37, "grad_norm": 3.8315956592559814, "learning_rate": 1.238910009797145e-07, "loss": 0.9999, "step": 64745 }, { "epoch": 19.37, "grad_norm": 2.9506800174713135, "learning_rate": 1.2330750400480483e-07, "loss": 0.9509, "step": 64750 }, { "epoch": 19.37, "grad_norm": 1.5093969106674194, "learning_rate": 1.2272538094376396e-07, "loss": 1.0732, "step": 64755 }, { "epoch": 19.38, "grad_norm": 3.4581491947174072, "learning_rate": 1.2214463182873836e-07, "loss": 0.9692, "step": 64760 }, { "epoch": 19.38, "grad_norm": 6.015048027038574, "learning_rate": 1.2156525669181074e-07, "loss": 0.8817, "step": 64765 }, { "epoch": 19.38, "grad_norm": 1.234391450881958, "learning_rate": 1.2098725556496938e-07, "loss": 0.9878, "step": 64770 }, { "epoch": 19.38, "grad_norm": 2.1239829063415527, "learning_rate": 1.2041062848014428e-07, "loss": 1.0328, "step": 64775 }, { "epoch": 19.38, "grad_norm": 2.3981587886810303, "learning_rate": 1.1983537546917944e-07, "loss": 0.9892, "step": 64780 }, { "epoch": 19.38, "grad_norm": 4.929365158081055, "learning_rate": 1.1926149656384667e-07, "loss": 1.0319, "step": 64785 }, { "epoch": 19.38, "grad_norm": 2.424349308013916, "learning_rate": 1.1868899179583726e-07, "loss": 1.0278, "step": 64790 }, { "epoch": 19.39, "grad_norm": 3.70564866065979, "learning_rate": 1.1811786119677037e-07, "loss": 0.8346, "step": 64795 }, { "epoch": 19.39, "grad_norm": 1.7846856117248535, "learning_rate": 1.1754810479819301e-07, "loss": 0.8833, "step": 64800 }, { "epoch": 19.39, "grad_norm": 2.44340181350708, "learning_rate": 1.1697972263157164e-07, "loss": 0.951, "step": 64805 }, { "epoch": 19.39, "grad_norm": 8.20602798461914, "learning_rate": 1.1641271472829229e-07, "loss": 0.9087, "step": 64810 }, { "epoch": 19.39, "grad_norm": 5.193169593811035, "learning_rate": 1.1584708111967435e-07, "loss": 1.0248, "step": 64815 }, { "epoch": 19.39, "grad_norm": 5.007645130157471, "learning_rate": 1.1528282183695949e-07, "loss": 0.9085, "step": 64820 }, { "epoch": 19.39, "grad_norm": 2.5000011920928955, "learning_rate": 1.1471993691130612e-07, "loss": 1.1319, "step": 64825 }, { "epoch": 19.4, "grad_norm": 1.7281321287155151, "learning_rate": 1.1415842637380326e-07, "loss": 0.9048, "step": 64830 }, { "epoch": 19.4, "grad_norm": 3.866112470626831, "learning_rate": 1.135982902554622e-07, "loss": 0.9548, "step": 64835 }, { "epoch": 19.4, "grad_norm": 3.7432994842529297, "learning_rate": 1.1303952858722211e-07, "loss": 0.977, "step": 64840 }, { "epoch": 19.4, "grad_norm": 2.450726270675659, "learning_rate": 1.1248214139993884e-07, "loss": 1.0289, "step": 64845 }, { "epoch": 19.4, "grad_norm": 4.173949718475342, "learning_rate": 1.1192612872439889e-07, "loss": 1.0554, "step": 64850 }, { "epoch": 19.4, "grad_norm": 2.5136771202087402, "learning_rate": 1.1137149059130825e-07, "loss": 0.8978, "step": 64855 }, { "epoch": 19.41, "grad_norm": 3.791132688522339, "learning_rate": 1.1081822703130074e-07, "loss": 1.076, "step": 64860 }, { "epoch": 19.41, "grad_norm": 2.5353622436523438, "learning_rate": 1.102663380749297e-07, "loss": 1.0079, "step": 64865 }, { "epoch": 19.41, "grad_norm": 1.9366538524627686, "learning_rate": 1.0971582375267908e-07, "loss": 1.0107, "step": 64870 }, { "epoch": 19.41, "grad_norm": 2.1806907653808594, "learning_rate": 1.0916668409494957e-07, "loss": 0.959, "step": 64875 }, { "epoch": 19.41, "grad_norm": 1.3208969831466675, "learning_rate": 1.0861891913207523e-07, "loss": 0.7762, "step": 64880 }, { "epoch": 19.41, "grad_norm": 1.4078150987625122, "learning_rate": 1.080725288943013e-07, "loss": 0.9522, "step": 64885 }, { "epoch": 19.41, "grad_norm": 3.181238889694214, "learning_rate": 1.0752751341180922e-07, "loss": 0.9171, "step": 64890 }, { "epoch": 19.42, "grad_norm": 2.8408191204071045, "learning_rate": 1.0698387271469712e-07, "loss": 0.9459, "step": 64895 }, { "epoch": 19.42, "grad_norm": 2.00119686126709, "learning_rate": 1.0644160683299375e-07, "loss": 1.0576, "step": 64900 }, { "epoch": 19.42, "grad_norm": 1.823187232017517, "learning_rate": 1.0590071579664185e-07, "loss": 1.192, "step": 64905 }, { "epoch": 19.42, "grad_norm": 2.4590156078338623, "learning_rate": 1.0536119963552026e-07, "loss": 0.9914, "step": 64910 }, { "epoch": 19.42, "grad_norm": 3.0153346061706543, "learning_rate": 1.0482305837942185e-07, "loss": 0.9732, "step": 64915 }, { "epoch": 19.42, "grad_norm": 1.55227530002594, "learning_rate": 1.0428629205806728e-07, "loss": 0.9852, "step": 64920 }, { "epoch": 19.42, "grad_norm": 1.4424388408660889, "learning_rate": 1.0375090070110505e-07, "loss": 1.0208, "step": 64925 }, { "epoch": 19.43, "grad_norm": 1.9990373849868774, "learning_rate": 1.032168843381004e-07, "loss": 0.9683, "step": 64930 }, { "epoch": 19.43, "grad_norm": 4.354613304138184, "learning_rate": 1.0268424299855195e-07, "loss": 0.9116, "step": 64935 }, { "epoch": 19.43, "grad_norm": 4.074637413024902, "learning_rate": 1.0215297671186952e-07, "loss": 0.846, "step": 64940 }, { "epoch": 19.43, "grad_norm": 2.377812385559082, "learning_rate": 1.0162308550740185e-07, "loss": 1.0472, "step": 64945 }, { "epoch": 19.43, "grad_norm": 1.2441447973251343, "learning_rate": 1.0109456941440886e-07, "loss": 1.0943, "step": 64950 }, { "epoch": 19.43, "grad_norm": 35.32939910888672, "learning_rate": 1.0056742846208389e-07, "loss": 0.8993, "step": 64955 }, { "epoch": 19.44, "grad_norm": 4.968936443328857, "learning_rate": 1.0004166267953419e-07, "loss": 0.976, "step": 64960 }, { "epoch": 19.44, "grad_norm": 6.838953971862793, "learning_rate": 9.951727209580598e-08, "loss": 0.9147, "step": 64965 }, { "epoch": 19.44, "grad_norm": 1.3050497770309448, "learning_rate": 9.899425673985107e-08, "loss": 1.0613, "step": 64970 }, { "epoch": 19.44, "grad_norm": 3.5567336082458496, "learning_rate": 9.847261664056584e-08, "loss": 0.9914, "step": 64975 }, { "epoch": 19.44, "grad_norm": 3.1688625812530518, "learning_rate": 9.795235182674944e-08, "loss": 1.0523, "step": 64980 }, { "epoch": 19.44, "grad_norm": 1.9151177406311035, "learning_rate": 9.743346232714279e-08, "loss": 1.0164, "step": 64985 }, { "epoch": 19.44, "grad_norm": 3.5026047229766846, "learning_rate": 9.691594817040073e-08, "loss": 0.9442, "step": 64990 }, { "epoch": 19.45, "grad_norm": 2.427326202392578, "learning_rate": 9.639980938510595e-08, "loss": 0.9183, "step": 64995 }, { "epoch": 19.45, "grad_norm": 1.6219308376312256, "learning_rate": 9.588504599976345e-08, "loss": 1.0548, "step": 65000 }, { "epoch": 19.45, "grad_norm": 2.265070676803589, "learning_rate": 9.537165804280324e-08, "loss": 0.9385, "step": 65005 }, { "epoch": 19.45, "grad_norm": 4.394448280334473, "learning_rate": 9.485964554258043e-08, "loss": 0.9525, "step": 65010 }, { "epoch": 19.45, "grad_norm": 3.270338296890259, "learning_rate": 9.434900852737238e-08, "loss": 0.9407, "step": 65015 }, { "epoch": 19.45, "grad_norm": 2.3003993034362793, "learning_rate": 9.383974702537878e-08, "loss": 0.8104, "step": 65020 }, { "epoch": 19.45, "grad_norm": 2.146404981613159, "learning_rate": 9.33318610647299e-08, "loss": 0.9752, "step": 65025 }, { "epoch": 19.46, "grad_norm": 1.9496128559112549, "learning_rate": 9.282535067346998e-08, "loss": 0.8462, "step": 65030 }, { "epoch": 19.46, "grad_norm": 1.7979656457901, "learning_rate": 9.23202158795794e-08, "loss": 0.8072, "step": 65035 }, { "epoch": 19.46, "grad_norm": 2.433094024658203, "learning_rate": 9.181645671095252e-08, "loss": 0.8688, "step": 65040 }, { "epoch": 19.46, "grad_norm": 3.4137656688690186, "learning_rate": 9.131407319541153e-08, "loss": 1.0364, "step": 65045 }, { "epoch": 19.46, "grad_norm": 1.888330340385437, "learning_rate": 9.081306536070366e-08, "loss": 0.8318, "step": 65050 }, { "epoch": 19.46, "grad_norm": 3.29191255569458, "learning_rate": 9.03134332344957e-08, "loss": 0.7606, "step": 65055 }, { "epoch": 19.47, "grad_norm": 1.3745579719543457, "learning_rate": 8.981517684438779e-08, "loss": 0.9285, "step": 65060 }, { "epoch": 19.47, "grad_norm": 2.466064929962158, "learning_rate": 8.9318296217894e-08, "loss": 0.9789, "step": 65065 }, { "epoch": 19.47, "grad_norm": 2.370903730392456, "learning_rate": 8.882279138245908e-08, "loss": 0.8135, "step": 65070 }, { "epoch": 19.47, "grad_norm": 3.7410812377929688, "learning_rate": 8.832866236544446e-08, "loss": 0.9207, "step": 65075 }, { "epoch": 19.47, "grad_norm": 2.961038589477539, "learning_rate": 8.783590919414497e-08, "loss": 1.0046, "step": 65080 }, { "epoch": 19.47, "grad_norm": 2.3268706798553467, "learning_rate": 8.734453189577497e-08, "loss": 0.9985, "step": 65085 }, { "epoch": 19.47, "grad_norm": 2.1825544834136963, "learning_rate": 8.685453049747105e-08, "loss": 0.9957, "step": 65090 }, { "epoch": 19.48, "grad_norm": 3.99306321144104, "learning_rate": 8.636590502629494e-08, "loss": 0.9198, "step": 65095 }, { "epoch": 19.48, "grad_norm": 4.728055000305176, "learning_rate": 8.587865550923057e-08, "loss": 1.1148, "step": 65100 }, { "epoch": 19.48, "grad_norm": 1.5131349563598633, "learning_rate": 8.539278197319533e-08, "loss": 0.8645, "step": 65105 }, { "epoch": 19.48, "grad_norm": 4.779293537139893, "learning_rate": 8.490828444501775e-08, "loss": 0.9724, "step": 65110 }, { "epoch": 19.48, "grad_norm": 2.5396671295166016, "learning_rate": 8.442516295145974e-08, "loss": 0.9741, "step": 65115 }, { "epoch": 19.48, "grad_norm": 2.4737155437469482, "learning_rate": 8.394341751919998e-08, "loss": 1.1152, "step": 65120 }, { "epoch": 19.48, "grad_norm": 3.827990770339966, "learning_rate": 8.346304817484496e-08, "loss": 0.9559, "step": 65125 }, { "epoch": 19.49, "grad_norm": 2.1786043643951416, "learning_rate": 8.307974350048597e-08, "loss": 0.8171, "step": 65130 }, { "epoch": 19.49, "grad_norm": 2.547623634338379, "learning_rate": 8.260185118116881e-08, "loss": 0.9985, "step": 65135 }, { "epoch": 19.49, "grad_norm": 2.0980887413024902, "learning_rate": 8.212533502385267e-08, "loss": 0.9029, "step": 65140 }, { "epoch": 19.49, "grad_norm": 3.6388769149780273, "learning_rate": 8.165019505485261e-08, "loss": 1.0007, "step": 65145 }, { "epoch": 19.49, "grad_norm": 3.2435462474823, "learning_rate": 8.117643130041152e-08, "loss": 1.0546, "step": 65150 }, { "epoch": 19.49, "grad_norm": 2.3990752696990967, "learning_rate": 8.070404378669461e-08, "loss": 0.9811, "step": 65155 }, { "epoch": 19.5, "grad_norm": 2.219163179397583, "learning_rate": 8.02330325397893e-08, "loss": 1.179, "step": 65160 }, { "epoch": 19.5, "grad_norm": 1.6309661865234375, "learning_rate": 7.976339758571094e-08, "loss": 0.8849, "step": 65165 }, { "epoch": 19.5, "grad_norm": 1.6608452796936035, "learning_rate": 7.929513895039987e-08, "loss": 0.8809, "step": 65170 }, { "epoch": 19.5, "grad_norm": 3.3001632690429688, "learning_rate": 7.882825665971316e-08, "loss": 0.9709, "step": 65175 }, { "epoch": 19.5, "grad_norm": 3.3454103469848633, "learning_rate": 7.836275073943577e-08, "loss": 0.9296, "step": 65180 }, { "epoch": 19.5, "grad_norm": 3.6163206100463867, "learning_rate": 7.789862121528325e-08, "loss": 1.0389, "step": 65185 }, { "epoch": 19.5, "grad_norm": 2.1792218685150146, "learning_rate": 7.743586811288228e-08, "loss": 0.8764, "step": 65190 }, { "epoch": 19.51, "grad_norm": 36.940330505371094, "learning_rate": 7.6974491457793e-08, "loss": 1.1318, "step": 65195 }, { "epoch": 19.51, "grad_norm": 1.098024845123291, "learning_rate": 7.651449127549503e-08, "loss": 0.9647, "step": 65200 }, { "epoch": 19.51, "grad_norm": 2.7333242893218994, "learning_rate": 7.605586759139582e-08, "loss": 1.0234, "step": 65205 }, { "epoch": 19.51, "grad_norm": 3.17170786857605, "learning_rate": 7.559862043082511e-08, "loss": 1.0791, "step": 65210 }, { "epoch": 19.51, "grad_norm": 3.0869386196136475, "learning_rate": 7.514274981903491e-08, "loss": 0.8448, "step": 65215 }, { "epoch": 19.51, "grad_norm": 1.4254027605056763, "learning_rate": 7.468825578120508e-08, "loss": 1.0125, "step": 65220 }, { "epoch": 19.51, "grad_norm": 4.486727237701416, "learning_rate": 7.423513834242945e-08, "loss": 0.7673, "step": 65225 }, { "epoch": 19.52, "grad_norm": 2.695744514465332, "learning_rate": 7.378339752774077e-08, "loss": 0.9713, "step": 65230 }, { "epoch": 19.52, "grad_norm": 1.8701138496398926, "learning_rate": 7.333303336208574e-08, "loss": 1.0323, "step": 65235 }, { "epoch": 19.52, "grad_norm": 2.85798978805542, "learning_rate": 7.288404587033892e-08, "loss": 0.9666, "step": 65240 }, { "epoch": 19.52, "grad_norm": 2.4904799461364746, "learning_rate": 7.243643507729437e-08, "loss": 1.0193, "step": 65245 }, { "epoch": 19.52, "grad_norm": 1.5817186832427979, "learning_rate": 7.199020100767395e-08, "loss": 0.9865, "step": 65250 }, { "epoch": 19.52, "grad_norm": 4.257795333862305, "learning_rate": 7.154534368612465e-08, "loss": 0.7875, "step": 65255 }, { "epoch": 19.53, "grad_norm": 4.011660099029541, "learning_rate": 7.11018631372129e-08, "loss": 0.87, "step": 65260 }, { "epoch": 19.53, "grad_norm": 4.863183975219727, "learning_rate": 7.065975938543579e-08, "loss": 0.9798, "step": 65265 }, { "epoch": 19.53, "grad_norm": 2.2304656505584717, "learning_rate": 7.021903245520433e-08, "loss": 0.9956, "step": 65270 }, { "epoch": 19.53, "grad_norm": 2.4296457767486572, "learning_rate": 6.977968237086574e-08, "loss": 0.9869, "step": 65275 }, { "epoch": 19.53, "grad_norm": 1.2431954145431519, "learning_rate": 6.934170915668114e-08, "loss": 1.1323, "step": 65280 }, { "epoch": 19.53, "grad_norm": 2.272568464279175, "learning_rate": 6.890511283683953e-08, "loss": 1.0307, "step": 65285 }, { "epoch": 19.53, "grad_norm": 3.0335166454315186, "learning_rate": 6.846989343545219e-08, "loss": 1.0689, "step": 65290 }, { "epoch": 19.54, "grad_norm": 2.1746132373809814, "learning_rate": 6.803605097656096e-08, "loss": 1.0758, "step": 65295 }, { "epoch": 19.54, "grad_norm": 1.310981035232544, "learning_rate": 6.760358548411893e-08, "loss": 1.0002, "step": 65300 }, { "epoch": 19.54, "grad_norm": 3.3575997352600098, "learning_rate": 6.717249698202088e-08, "loss": 0.8939, "step": 65305 }, { "epoch": 19.54, "grad_norm": 1.3703774213790894, "learning_rate": 6.674278549406444e-08, "loss": 0.9257, "step": 65310 }, { "epoch": 19.54, "grad_norm": 1.6967878341674805, "learning_rate": 6.63144510439917e-08, "loss": 1.0022, "step": 65315 }, { "epoch": 19.54, "grad_norm": 3.0534799098968506, "learning_rate": 6.588749365545044e-08, "loss": 1.0344, "step": 65320 }, { "epoch": 19.54, "grad_norm": 2.0059475898742676, "learning_rate": 6.546191335202734e-08, "loss": 0.8697, "step": 65325 }, { "epoch": 19.55, "grad_norm": 2.44685697555542, "learning_rate": 6.503771015722582e-08, "loss": 0.9656, "step": 65330 }, { "epoch": 19.55, "grad_norm": 2.028308868408203, "learning_rate": 6.461488409447159e-08, "loss": 0.7818, "step": 65335 }, { "epoch": 19.55, "grad_norm": 1.6491647958755493, "learning_rate": 6.419343518711818e-08, "loss": 0.822, "step": 65340 }, { "epoch": 19.55, "grad_norm": 2.1462082862854004, "learning_rate": 6.37733634584442e-08, "loss": 1.0347, "step": 65345 }, { "epoch": 19.55, "grad_norm": 2.555720329284668, "learning_rate": 6.335466893164499e-08, "loss": 0.9114, "step": 65350 }, { "epoch": 19.55, "grad_norm": 1.5168747901916504, "learning_rate": 6.293735162984926e-08, "loss": 0.8913, "step": 65355 }, { "epoch": 19.55, "grad_norm": 3.1510086059570312, "learning_rate": 6.252141157610247e-08, "loss": 1.0805, "step": 65360 }, { "epoch": 19.56, "grad_norm": 1.3030726909637451, "learning_rate": 6.210684879337514e-08, "loss": 0.9486, "step": 65365 }, { "epoch": 19.56, "grad_norm": 1.6003532409667969, "learning_rate": 6.16936633045656e-08, "loss": 1.0441, "step": 65370 }, { "epoch": 19.56, "grad_norm": 4.755357265472412, "learning_rate": 6.128185513249452e-08, "loss": 0.9853, "step": 65375 }, { "epoch": 19.56, "grad_norm": 2.193000316619873, "learning_rate": 6.087142429990478e-08, "loss": 1.1333, "step": 65380 }, { "epoch": 19.56, "grad_norm": 2.2681541442871094, "learning_rate": 6.04623708294616e-08, "loss": 1.1406, "step": 65385 }, { "epoch": 19.56, "grad_norm": 3.417823314666748, "learning_rate": 6.00546947437608e-08, "loss": 1.0069, "step": 65390 }, { "epoch": 19.57, "grad_norm": 2.2752387523651123, "learning_rate": 5.964839606531214e-08, "loss": 1.0824, "step": 65395 }, { "epoch": 19.57, "grad_norm": 8.419718742370605, "learning_rate": 5.924347481656156e-08, "loss": 0.9543, "step": 65400 }, { "epoch": 19.57, "grad_norm": 6.15936279296875, "learning_rate": 5.8839931019868955e-08, "loss": 0.9819, "step": 65405 }, { "epoch": 19.57, "grad_norm": 3.1836822032928467, "learning_rate": 5.8437764697522044e-08, "loss": 1.0345, "step": 65410 }, { "epoch": 19.57, "grad_norm": 2.2897796630859375, "learning_rate": 5.803697587173085e-08, "loss": 0.9194, "step": 65415 }, { "epoch": 19.57, "grad_norm": 2.0201714038848877, "learning_rate": 5.7637564564633205e-08, "loss": 1.1888, "step": 65420 }, { "epoch": 19.57, "grad_norm": 3.7068679332733154, "learning_rate": 5.7239530798283704e-08, "loss": 0.9841, "step": 65425 }, { "epoch": 19.58, "grad_norm": 1.6118470430374146, "learning_rate": 5.684287459467308e-08, "loss": 0.9835, "step": 65430 }, { "epoch": 19.58, "grad_norm": 2.5250370502471924, "learning_rate": 5.6447595975700485e-08, "loss": 0.8138, "step": 65435 }, { "epoch": 19.58, "grad_norm": 2.585517406463623, "learning_rate": 5.6053694963198456e-08, "loss": 0.9517, "step": 65440 }, { "epoch": 19.58, "grad_norm": 1.3797327280044556, "learning_rate": 5.566117157892459e-08, "loss": 1.0316, "step": 65445 }, { "epoch": 19.58, "grad_norm": 2.215745449066162, "learning_rate": 5.527002584455876e-08, "loss": 0.7985, "step": 65450 }, { "epoch": 19.58, "grad_norm": 4.586230754852295, "learning_rate": 5.488025778169759e-08, "loss": 0.8436, "step": 65455 }, { "epoch": 19.58, "grad_norm": 3.00388503074646, "learning_rate": 5.4491867411871064e-08, "loss": 1.0267, "step": 65460 }, { "epoch": 19.59, "grad_norm": 2.888880729675293, "learning_rate": 5.410485475652871e-08, "loss": 0.9659, "step": 65465 }, { "epoch": 19.59, "grad_norm": 1.5345345735549927, "learning_rate": 5.3719219837047865e-08, "loss": 1.161, "step": 65470 }, { "epoch": 19.59, "grad_norm": 3.530027151107788, "learning_rate": 5.33349626747226e-08, "loss": 0.9582, "step": 65475 }, { "epoch": 19.59, "grad_norm": 1.8898435831069946, "learning_rate": 5.295208329077761e-08, "loss": 1.1305, "step": 65480 }, { "epoch": 19.59, "grad_norm": 1.663489580154419, "learning_rate": 5.257058170635709e-08, "loss": 0.8846, "step": 65485 }, { "epoch": 19.59, "grad_norm": 2.8383398056030273, "learning_rate": 5.2190457942533076e-08, "loss": 0.8722, "step": 65490 }, { "epoch": 19.6, "grad_norm": 2.9115426540374756, "learning_rate": 5.181171202029711e-08, "loss": 0.824, "step": 65495 }, { "epoch": 19.6, "grad_norm": 7.482906341552734, "learning_rate": 5.1434343960568565e-08, "loss": 1.0373, "step": 65500 }, { "epoch": 19.6, "grad_norm": 3.7145473957061768, "learning_rate": 5.105835378418911e-08, "loss": 0.8548, "step": 65505 }, { "epoch": 19.6, "grad_norm": 2.6154332160949707, "learning_rate": 5.068374151192268e-08, "loss": 1.0704, "step": 65510 }, { "epoch": 19.6, "grad_norm": 8.783279418945312, "learning_rate": 5.031050716446106e-08, "loss": 0.8265, "step": 65515 }, { "epoch": 19.6, "grad_norm": 2.184469699859619, "learning_rate": 4.993865076241555e-08, "loss": 1.0476, "step": 65520 }, { "epoch": 19.6, "grad_norm": 2.483076810836792, "learning_rate": 4.9568172326325266e-08, "loss": 0.8417, "step": 65525 }, { "epoch": 19.61, "grad_norm": 2.2957425117492676, "learning_rate": 4.919907187664885e-08, "loss": 0.9391, "step": 65530 }, { "epoch": 19.61, "grad_norm": 2.0007917881011963, "learning_rate": 4.883134943377276e-08, "loss": 0.8605, "step": 65535 }, { "epoch": 19.61, "grad_norm": 2.37748384475708, "learning_rate": 4.846500501800577e-08, "loss": 1.2328, "step": 65540 }, { "epoch": 19.61, "grad_norm": 8.686102867126465, "learning_rate": 4.810003864958168e-08, "loss": 0.8948, "step": 65545 }, { "epoch": 19.61, "grad_norm": 2.3307371139526367, "learning_rate": 4.773645034865659e-08, "loss": 1.0579, "step": 65550 }, { "epoch": 19.61, "grad_norm": 4.153103351593018, "learning_rate": 4.737424013531166e-08, "loss": 0.7621, "step": 65555 }, { "epoch": 19.61, "grad_norm": 4.211757659912109, "learning_rate": 4.701340802955034e-08, "loss": 1.0984, "step": 65560 }, { "epoch": 19.62, "grad_norm": 4.062090873718262, "learning_rate": 4.6653954051298354e-08, "loss": 0.9231, "step": 65565 }, { "epoch": 19.62, "grad_norm": 1.6518398523330688, "learning_rate": 4.6295878220414815e-08, "loss": 0.8956, "step": 65570 }, { "epoch": 19.62, "grad_norm": 2.662663698196411, "learning_rate": 4.5939180556670016e-08, "loss": 0.9944, "step": 65575 }, { "epoch": 19.62, "grad_norm": 3.212043523788452, "learning_rate": 4.5583861079767645e-08, "loss": 0.8612, "step": 65580 }, { "epoch": 19.62, "grad_norm": 2.235893964767456, "learning_rate": 4.5229919809328115e-08, "loss": 0.9491, "step": 65585 }, { "epoch": 19.62, "grad_norm": 2.6770949363708496, "learning_rate": 4.4877356764902455e-08, "loss": 0.8584, "step": 65590 }, { "epoch": 19.63, "grad_norm": 6.8069939613342285, "learning_rate": 4.452617196595843e-08, "loss": 0.895, "step": 65595 }, { "epoch": 19.63, "grad_norm": 2.035522937774658, "learning_rate": 4.417636543189718e-08, "loss": 0.8317, "step": 65600 }, { "epoch": 19.63, "grad_norm": 2.070621967315674, "learning_rate": 4.3827937182033816e-08, "loss": 0.9145, "step": 65605 }, { "epoch": 19.63, "grad_norm": 1.5345319509506226, "learning_rate": 4.348088723561128e-08, "loss": 0.9873, "step": 65610 }, { "epoch": 19.63, "grad_norm": 1.464187741279602, "learning_rate": 4.313521561180034e-08, "loss": 0.9745, "step": 65615 }, { "epoch": 19.63, "grad_norm": 5.471027851104736, "learning_rate": 4.2790922329691287e-08, "loss": 0.9186, "step": 65620 }, { "epoch": 19.63, "grad_norm": 1.9127711057662964, "learning_rate": 4.2448007408296684e-08, "loss": 0.9168, "step": 65625 }, { "epoch": 19.64, "grad_norm": 2.524143934249878, "learning_rate": 4.210647086655695e-08, "loss": 1.0501, "step": 65630 }, { "epoch": 19.64, "grad_norm": 3.043079137802124, "learning_rate": 4.1766312723334755e-08, "loss": 0.7822, "step": 65635 }, { "epoch": 19.64, "grad_norm": 2.911064624786377, "learning_rate": 4.1427532997415086e-08, "loss": 1.012, "step": 65640 }, { "epoch": 19.64, "grad_norm": 5.481101036071777, "learning_rate": 4.109013170751075e-08, "loss": 1.0, "step": 65645 }, { "epoch": 19.64, "grad_norm": 2.473296880722046, "learning_rate": 4.075410887225684e-08, "loss": 0.8792, "step": 65650 }, { "epoch": 19.64, "grad_norm": 2.742133855819702, "learning_rate": 4.041946451020795e-08, "loss": 0.9363, "step": 65655 }, { "epoch": 19.64, "grad_norm": 1.7099344730377197, "learning_rate": 4.008619863984653e-08, "loss": 1.0857, "step": 65660 }, { "epoch": 19.65, "grad_norm": 4.01974630355835, "learning_rate": 3.975431127958285e-08, "loss": 1.0116, "step": 65665 }, { "epoch": 19.65, "grad_norm": 6.58184814453125, "learning_rate": 3.9423802447743905e-08, "loss": 0.7649, "step": 65670 }, { "epoch": 19.65, "grad_norm": 3.7717185020446777, "learning_rate": 3.909467216258178e-08, "loss": 0.8852, "step": 65675 }, { "epoch": 19.65, "grad_norm": 1.591399908065796, "learning_rate": 3.876692044227359e-08, "loss": 0.932, "step": 65680 }, { "epoch": 19.65, "grad_norm": 1.8852194547653198, "learning_rate": 3.8440547304927074e-08, "loss": 0.9933, "step": 65685 }, { "epoch": 19.65, "grad_norm": 5.670814037322998, "learning_rate": 3.811555276855838e-08, "loss": 1.0301, "step": 65690 }, { "epoch": 19.66, "grad_norm": 1.9637709856033325, "learning_rate": 3.7791936851125365e-08, "loss": 0.9422, "step": 65695 }, { "epoch": 19.66, "grad_norm": 2.995957612991333, "learning_rate": 3.746969957049429e-08, "loss": 0.8587, "step": 65700 }, { "epoch": 19.66, "grad_norm": 2.3271963596343994, "learning_rate": 3.7148840944464804e-08, "loss": 0.8903, "step": 65705 }, { "epoch": 19.66, "grad_norm": 2.014490842819214, "learning_rate": 3.682936099075884e-08, "loss": 1.0424, "step": 65710 }, { "epoch": 19.66, "grad_norm": 3.5324313640594482, "learning_rate": 3.6511259727017856e-08, "loss": 0.7962, "step": 65715 }, { "epoch": 19.66, "grad_norm": 2.035303831100464, "learning_rate": 3.619453717081389e-08, "loss": 0.9449, "step": 65720 }, { "epoch": 19.66, "grad_norm": 12.83745288848877, "learning_rate": 3.587919333963574e-08, "loss": 1.0862, "step": 65725 }, { "epoch": 19.67, "grad_norm": 1.7152767181396484, "learning_rate": 3.556522825090281e-08, "loss": 0.8594, "step": 65730 }, { "epoch": 19.67, "grad_norm": 2.850773334503174, "learning_rate": 3.525264192195121e-08, "loss": 1.0139, "step": 65735 }, { "epoch": 19.67, "grad_norm": 4.40149450302124, "learning_rate": 3.494143437004771e-08, "loss": 0.8235, "step": 65740 }, { "epoch": 19.67, "grad_norm": 2.7569260597229004, "learning_rate": 3.463160561237855e-08, "loss": 0.8739, "step": 65745 }, { "epoch": 19.67, "grad_norm": 2.746654510498047, "learning_rate": 3.432315566605504e-08, "loss": 0.9751, "step": 65750 }, { "epoch": 19.67, "grad_norm": 3.1638407707214355, "learning_rate": 3.4016084548116336e-08, "loss": 0.9332, "step": 65755 }, { "epoch": 19.67, "grad_norm": 2.6680147647857666, "learning_rate": 3.371039227551553e-08, "loss": 0.8459, "step": 65760 }, { "epoch": 19.68, "grad_norm": 3.2998719215393066, "learning_rate": 3.3406078865139137e-08, "loss": 0.9761, "step": 65765 }, { "epoch": 19.68, "grad_norm": 1.581969141960144, "learning_rate": 3.3103144333793134e-08, "loss": 1.1189, "step": 65770 }, { "epoch": 19.68, "grad_norm": 5.630129337310791, "learning_rate": 3.280158869821137e-08, "loss": 0.8332, "step": 65775 }, { "epoch": 19.68, "grad_norm": 2.6767990589141846, "learning_rate": 3.250141197504442e-08, "loss": 1.0059, "step": 65780 }, { "epoch": 19.68, "grad_norm": 3.5217807292938232, "learning_rate": 3.2202614180870674e-08, "loss": 0.9764, "step": 65785 }, { "epoch": 19.68, "grad_norm": 2.578888177871704, "learning_rate": 3.190519533219638e-08, "loss": 0.7636, "step": 65790 }, { "epoch": 19.69, "grad_norm": 3.0376648902893066, "learning_rate": 3.160915544544452e-08, "loss": 0.9932, "step": 65795 }, { "epoch": 19.69, "grad_norm": 4.299635887145996, "learning_rate": 3.1314494536965886e-08, "loss": 0.6652, "step": 65800 }, { "epoch": 19.69, "grad_norm": 2.366783857345581, "learning_rate": 3.1021212623033594e-08, "loss": 1.089, "step": 65805 }, { "epoch": 19.69, "grad_norm": 5.171496391296387, "learning_rate": 3.0729309719845775e-08, "loss": 0.9039, "step": 65810 }, { "epoch": 19.69, "grad_norm": 3.2826364040374756, "learning_rate": 3.043878584352566e-08, "loss": 0.9896, "step": 65815 }, { "epoch": 19.69, "grad_norm": 2.9492897987365723, "learning_rate": 3.014964101011597e-08, "loss": 1.0328, "step": 65820 }, { "epoch": 19.69, "grad_norm": 3.130568265914917, "learning_rate": 2.9861875235587255e-08, "loss": 1.2715, "step": 65825 }, { "epoch": 19.7, "grad_norm": 2.2713990211486816, "learning_rate": 2.9575488535829587e-08, "loss": 1.0102, "step": 65830 }, { "epoch": 19.7, "grad_norm": 2.3380908966064453, "learning_rate": 2.929048092666642e-08, "loss": 0.9455, "step": 65835 }, { "epoch": 19.7, "grad_norm": 4.085785865783691, "learning_rate": 2.9006852423832386e-08, "loss": 0.8658, "step": 65840 }, { "epoch": 19.7, "grad_norm": 3.3306400775909424, "learning_rate": 2.8724603042992735e-08, "loss": 0.8673, "step": 65845 }, { "epoch": 19.7, "grad_norm": 3.320685863494873, "learning_rate": 2.8443732799740554e-08, "loss": 1.0188, "step": 65850 }, { "epoch": 19.7, "grad_norm": 3.164665460586548, "learning_rate": 2.8164241709582873e-08, "loss": 1.046, "step": 65855 }, { "epoch": 19.7, "grad_norm": 3.0726613998413086, "learning_rate": 2.7886129787954573e-08, "loss": 0.8932, "step": 65860 }, { "epoch": 19.71, "grad_norm": 2.211113452911377, "learning_rate": 2.7609397050221143e-08, "loss": 0.8676, "step": 65865 }, { "epoch": 19.71, "grad_norm": 3.0560343265533447, "learning_rate": 2.7334043511662023e-08, "loss": 0.9694, "step": 65870 }, { "epoch": 19.71, "grad_norm": 1.7159016132354736, "learning_rate": 2.706006918748727e-08, "loss": 0.8506, "step": 65875 }, { "epoch": 19.71, "grad_norm": 2.9933714866638184, "learning_rate": 2.678747409282645e-08, "loss": 0.9774, "step": 65880 }, { "epoch": 19.71, "grad_norm": 2.8719935417175293, "learning_rate": 2.6516258242736958e-08, "loss": 0.8631, "step": 65885 }, { "epoch": 19.71, "grad_norm": 1.9159245491027832, "learning_rate": 2.624642165219293e-08, "loss": 0.7401, "step": 65890 }, { "epoch": 19.72, "grad_norm": 5.0512590408325195, "learning_rate": 2.597796433610189e-08, "loss": 0.8452, "step": 65895 }, { "epoch": 19.72, "grad_norm": 2.150611639022827, "learning_rate": 2.571088630929086e-08, "loss": 0.9875, "step": 65900 }, { "epoch": 19.72, "grad_norm": 5.610554218292236, "learning_rate": 2.5445187586503606e-08, "loss": 0.9885, "step": 65905 }, { "epoch": 19.72, "grad_norm": 2.4917097091674805, "learning_rate": 2.5180868182422822e-08, "loss": 1.0434, "step": 65910 }, { "epoch": 19.72, "grad_norm": 2.9542877674102783, "learning_rate": 2.4917928111639623e-08, "loss": 1.0089, "step": 65915 }, { "epoch": 19.72, "grad_norm": 2.203429937362671, "learning_rate": 2.4656367388681268e-08, "loss": 0.981, "step": 65920 }, { "epoch": 19.72, "grad_norm": 3.659770965576172, "learning_rate": 2.4396186027991764e-08, "loss": 1.1026, "step": 65925 }, { "epoch": 19.73, "grad_norm": 2.265361785888672, "learning_rate": 2.413738404394017e-08, "loss": 0.9166, "step": 65930 }, { "epoch": 19.73, "grad_norm": 2.5812058448791504, "learning_rate": 2.3879961450817835e-08, "loss": 1.1483, "step": 65935 }, { "epoch": 19.73, "grad_norm": 2.302971363067627, "learning_rate": 2.3623918262846712e-08, "loss": 0.8114, "step": 65940 }, { "epoch": 19.73, "grad_norm": 2.793581008911133, "learning_rate": 2.3369254494162718e-08, "loss": 1.0621, "step": 65945 }, { "epoch": 19.73, "grad_norm": 5.097170829772949, "learning_rate": 2.3115970158835155e-08, "loss": 0.9018, "step": 65950 }, { "epoch": 19.73, "grad_norm": 3.7390031814575195, "learning_rate": 2.2864065270850054e-08, "loss": 1.0263, "step": 65955 }, { "epoch": 19.73, "grad_norm": 2.8096506595611572, "learning_rate": 2.2613539844118514e-08, "loss": 1.0402, "step": 65960 }, { "epoch": 19.74, "grad_norm": 1.8856858015060425, "learning_rate": 2.236439389247946e-08, "loss": 0.9717, "step": 65965 }, { "epoch": 19.74, "grad_norm": 3.062253713607788, "learning_rate": 2.2116627429694116e-08, "loss": 0.8636, "step": 65970 }, { "epoch": 19.74, "grad_norm": 2.116095781326294, "learning_rate": 2.1870240469440417e-08, "loss": 0.8674, "step": 65975 }, { "epoch": 19.74, "grad_norm": 4.265397071838379, "learning_rate": 2.162523302533248e-08, "loss": 0.9501, "step": 65980 }, { "epoch": 19.74, "grad_norm": 3.8397390842437744, "learning_rate": 2.138160511090115e-08, "loss": 0.9542, "step": 65985 }, { "epoch": 19.74, "grad_norm": 5.092801570892334, "learning_rate": 2.1139356739596773e-08, "loss": 0.9933, "step": 65990 }, { "epoch": 19.74, "grad_norm": 3.441525459289551, "learning_rate": 2.0898487924803088e-08, "loss": 0.8262, "step": 65995 }, { "epoch": 19.75, "grad_norm": 2.899848461151123, "learning_rate": 2.0658998679820573e-08, "loss": 1.0899, "step": 66000 }, { "epoch": 19.75, "grad_norm": 2.0572032928466797, "learning_rate": 2.0420889017880305e-08, "loss": 0.7595, "step": 66005 }, { "epoch": 19.75, "grad_norm": 4.190646171569824, "learning_rate": 2.0184158952124555e-08, "loss": 0.8722, "step": 66010 }, { "epoch": 19.75, "grad_norm": 2.0695345401763916, "learning_rate": 1.9948808495637293e-08, "loss": 1.0758, "step": 66015 }, { "epoch": 19.75, "grad_norm": 3.038285732269287, "learning_rate": 1.9714837661408138e-08, "loss": 0.9091, "step": 66020 }, { "epoch": 19.75, "grad_norm": 1.420379400253296, "learning_rate": 1.9482246462365628e-08, "loss": 0.9363, "step": 66025 }, { "epoch": 19.76, "grad_norm": 2.4541139602661133, "learning_rate": 1.9251034911352274e-08, "loss": 0.9865, "step": 66030 }, { "epoch": 19.76, "grad_norm": 1.3549059629440308, "learning_rate": 1.9021203021135637e-08, "loss": 1.0879, "step": 66035 }, { "epoch": 19.76, "grad_norm": 3.1724984645843506, "learning_rate": 1.8792750804413894e-08, "loss": 0.9949, "step": 66040 }, { "epoch": 19.76, "grad_norm": 2.643465995788574, "learning_rate": 1.8565678273801956e-08, "loss": 1.0872, "step": 66045 }, { "epoch": 19.76, "grad_norm": 2.5308868885040283, "learning_rate": 1.8339985441842567e-08, "loss": 0.9338, "step": 66050 }, { "epoch": 19.76, "grad_norm": 3.499067783355713, "learning_rate": 1.8115672320995202e-08, "loss": 1.0337, "step": 66055 }, { "epoch": 19.76, "grad_norm": 3.2034714221954346, "learning_rate": 1.7892738923655504e-08, "loss": 0.859, "step": 66060 }, { "epoch": 19.77, "grad_norm": 4.103240489959717, "learning_rate": 1.7671185262130296e-08, "loss": 0.8122, "step": 66065 }, { "epoch": 19.77, "grad_norm": 2.9544129371643066, "learning_rate": 1.745101134865701e-08, "loss": 0.9386, "step": 66070 }, { "epoch": 19.77, "grad_norm": 1.3058322668075562, "learning_rate": 1.7232217195398138e-08, "loss": 0.9956, "step": 66075 }, { "epoch": 19.77, "grad_norm": 4.783046245574951, "learning_rate": 1.7014802814435682e-08, "loss": 0.8943, "step": 66080 }, { "epoch": 19.77, "grad_norm": 1.3274574279785156, "learning_rate": 1.6798768217776706e-08, "loss": 1.0416, "step": 66085 }, { "epoch": 19.77, "grad_norm": 1.9618406295776367, "learning_rate": 1.658411341735333e-08, "loss": 0.8388, "step": 66090 }, { "epoch": 19.77, "grad_norm": 1.6042386293411255, "learning_rate": 1.637083842502274e-08, "loss": 1.0276, "step": 66095 }, { "epoch": 19.78, "grad_norm": 1.6663395166397095, "learning_rate": 1.6158943252558845e-08, "loss": 0.885, "step": 66100 }, { "epoch": 19.78, "grad_norm": 10.641562461853027, "learning_rate": 1.594842791166895e-08, "loss": 0.7403, "step": 66105 }, { "epoch": 19.78, "grad_norm": 2.9796385765075684, "learning_rate": 1.5739292413977093e-08, "loss": 1.0609, "step": 66110 }, { "epoch": 19.78, "grad_norm": 13.236249923706055, "learning_rate": 1.553153677103236e-08, "loss": 0.9377, "step": 66115 }, { "epoch": 19.78, "grad_norm": 8.828291893005371, "learning_rate": 1.5325160994314468e-08, "loss": 1.1329, "step": 66120 }, { "epoch": 19.78, "grad_norm": 1.7481231689453125, "learning_rate": 1.5120165095217077e-08, "loss": 0.9678, "step": 66125 }, { "epoch": 19.79, "grad_norm": 5.550840854644775, "learning_rate": 1.491654908506168e-08, "loss": 1.0408, "step": 66130 }, { "epoch": 19.79, "grad_norm": 1.4550460577011108, "learning_rate": 1.4714312975094847e-08, "loss": 0.9088, "step": 66135 }, { "epoch": 19.79, "grad_norm": 3.302130937576294, "learning_rate": 1.4513456776485413e-08, "loss": 0.9667, "step": 66140 }, { "epoch": 19.79, "grad_norm": 4.979493141174316, "learning_rate": 1.4313980500327284e-08, "loss": 1.0144, "step": 66145 }, { "epoch": 19.79, "grad_norm": 3.0126538276672363, "learning_rate": 1.411588415763665e-08, "loss": 0.9664, "step": 66150 }, { "epoch": 19.79, "grad_norm": 3.2353785037994385, "learning_rate": 1.3919167759354756e-08, "loss": 1.0019, "step": 66155 }, { "epoch": 19.79, "grad_norm": 2.159238815307617, "learning_rate": 1.3723831316345137e-08, "loss": 0.8879, "step": 66160 }, { "epoch": 19.8, "grad_norm": 2.4272637367248535, "learning_rate": 1.3529874839396383e-08, "loss": 0.984, "step": 66165 }, { "epoch": 19.8, "grad_norm": 2.7232370376586914, "learning_rate": 1.3337298339219372e-08, "loss": 0.9735, "step": 66170 }, { "epoch": 19.8, "grad_norm": 3.3963069915771484, "learning_rate": 1.3146101826452818e-08, "loss": 0.8962, "step": 66175 }, { "epoch": 19.8, "grad_norm": 4.68864631652832, "learning_rate": 1.2956285311654937e-08, "loss": 0.9397, "step": 66180 }, { "epoch": 19.8, "grad_norm": 1.9277212619781494, "learning_rate": 1.2767848805309013e-08, "loss": 0.9896, "step": 66185 }, { "epoch": 19.8, "grad_norm": 1.3516850471496582, "learning_rate": 1.258079231782061e-08, "loss": 1.0026, "step": 66190 }, { "epoch": 19.8, "grad_norm": 1.6840862035751343, "learning_rate": 1.2395115859525907e-08, "loss": 0.9797, "step": 66195 }, { "epoch": 19.81, "grad_norm": 2.9334299564361572, "learning_rate": 1.221081944067226e-08, "loss": 1.1275, "step": 66200 }, { "epoch": 19.81, "grad_norm": 16.37929916381836, "learning_rate": 1.2027903071440416e-08, "loss": 0.8693, "step": 66205 }, { "epoch": 19.81, "grad_norm": 2.172717571258545, "learning_rate": 1.1846366761936179e-08, "loss": 0.8475, "step": 66210 }, { "epoch": 19.81, "grad_norm": 2.417130947113037, "learning_rate": 1.1666210522184861e-08, "loss": 1.1559, "step": 66215 }, { "epoch": 19.81, "grad_norm": 1.5092058181762695, "learning_rate": 1.1487434362131288e-08, "loss": 0.8431, "step": 66220 }, { "epoch": 19.81, "grad_norm": 2.016624927520752, "learning_rate": 1.1310038291656443e-08, "loss": 1.1364, "step": 66225 }, { "epoch": 19.82, "grad_norm": 4.214022159576416, "learning_rate": 1.113402232054972e-08, "loss": 0.9275, "step": 66230 }, { "epoch": 19.82, "grad_norm": 1.9043920040130615, "learning_rate": 1.0959386458539444e-08, "loss": 0.9576, "step": 66235 }, { "epoch": 19.82, "grad_norm": 2.4653420448303223, "learning_rate": 1.0786130715267906e-08, "loss": 0.9825, "step": 66240 }, { "epoch": 19.82, "grad_norm": 1.7439837455749512, "learning_rate": 1.0614255100299675e-08, "loss": 0.9889, "step": 66245 }, { "epoch": 19.82, "grad_norm": 2.955815076828003, "learning_rate": 1.0443759623135484e-08, "loss": 0.8389, "step": 66250 }, { "epoch": 19.82, "grad_norm": 2.9133384227752686, "learning_rate": 1.0274644293184477e-08, "loss": 0.797, "step": 66255 }, { "epoch": 19.82, "grad_norm": 2.106208562850952, "learning_rate": 1.0106909119791952e-08, "loss": 0.9157, "step": 66260 }, { "epoch": 19.83, "grad_norm": 3.142747640609741, "learning_rate": 9.940554112217171e-09, "loss": 1.0129, "step": 66265 }, { "epoch": 19.83, "grad_norm": 4.4805121421813965, "learning_rate": 9.775579279650005e-09, "loss": 0.9918, "step": 66270 }, { "epoch": 19.83, "grad_norm": 1.981816053390503, "learning_rate": 9.611984631202608e-09, "loss": 0.9091, "step": 66275 }, { "epoch": 19.83, "grad_norm": 2.474738359451294, "learning_rate": 9.449770175909423e-09, "loss": 0.9336, "step": 66280 }, { "epoch": 19.83, "grad_norm": 3.66359281539917, "learning_rate": 9.288935922727172e-09, "loss": 0.9899, "step": 66285 }, { "epoch": 19.83, "grad_norm": 3.463992118835449, "learning_rate": 9.12948188054319e-09, "loss": 0.6621, "step": 66290 }, { "epoch": 19.83, "grad_norm": 2.546389102935791, "learning_rate": 8.97140805815877e-09, "loss": 0.9394, "step": 66295 }, { "epoch": 19.84, "grad_norm": 2.605010509490967, "learning_rate": 8.814714464308594e-09, "loss": 0.9425, "step": 66300 }, { "epoch": 19.84, "grad_norm": 1.4219180345535278, "learning_rate": 8.659401107644072e-09, "loss": 0.9586, "step": 66305 }, { "epoch": 19.84, "grad_norm": 3.061012029647827, "learning_rate": 8.505467996744455e-09, "loss": 0.8155, "step": 66310 }, { "epoch": 19.84, "grad_norm": 4.381716728210449, "learning_rate": 8.352915140111273e-09, "loss": 0.8649, "step": 66315 }, { "epoch": 19.84, "grad_norm": 1.0354138612747192, "learning_rate": 8.201742546168345e-09, "loss": 0.9967, "step": 66320 }, { "epoch": 19.84, "grad_norm": 3.057412624359131, "learning_rate": 8.051950223267324e-09, "loss": 1.1387, "step": 66325 }, { "epoch": 19.85, "grad_norm": 2.2272489070892334, "learning_rate": 7.903538179676595e-09, "loss": 0.9963, "step": 66330 }, { "epoch": 19.85, "grad_norm": 2.390744209289551, "learning_rate": 7.756506423597932e-09, "loss": 1.0075, "step": 66335 }, { "epoch": 19.85, "grad_norm": 2.477290153503418, "learning_rate": 7.610854963147063e-09, "loss": 0.9847, "step": 66340 }, { "epoch": 19.85, "grad_norm": 2.855509042739868, "learning_rate": 7.466583806370331e-09, "loss": 0.908, "step": 66345 }, { "epoch": 19.85, "grad_norm": 2.9786510467529297, "learning_rate": 7.323692961239137e-09, "loss": 1.0602, "step": 66350 }, { "epoch": 19.85, "grad_norm": 2.911388397216797, "learning_rate": 7.18218243563884e-09, "loss": 0.8163, "step": 66355 }, { "epoch": 19.85, "grad_norm": 3.1563682556152344, "learning_rate": 7.042052237390961e-09, "loss": 0.9476, "step": 66360 }, { "epoch": 19.86, "grad_norm": 3.7669315338134766, "learning_rate": 6.903302374228204e-09, "loss": 0.8065, "step": 66365 }, { "epoch": 19.86, "grad_norm": 2.0955512523651123, "learning_rate": 6.765932853819435e-09, "loss": 1.1436, "step": 66370 }, { "epoch": 19.86, "grad_norm": 2.7320590019226074, "learning_rate": 6.62994368375025e-09, "loss": 0.9004, "step": 66375 }, { "epoch": 19.86, "grad_norm": 4.438745021820068, "learning_rate": 6.495334871528536e-09, "loss": 1.0057, "step": 66380 }, { "epoch": 19.86, "grad_norm": 1.9494245052337646, "learning_rate": 6.3621064245900085e-09, "loss": 0.8972, "step": 66385 }, { "epoch": 19.86, "grad_norm": 1.6904481649398804, "learning_rate": 6.230258350292673e-09, "loss": 0.9847, "step": 66390 }, { "epoch": 19.86, "grad_norm": 2.8563525676727295, "learning_rate": 6.09979065591959e-09, "loss": 0.8644, "step": 66395 }, { "epoch": 19.87, "grad_norm": 3.8716516494750977, "learning_rate": 5.9707033486733345e-09, "loss": 0.8536, "step": 66400 }, { "epoch": 19.87, "grad_norm": 4.005788803100586, "learning_rate": 5.842996435687087e-09, "loss": 1.0165, "step": 66405 }, { "epoch": 19.87, "grad_norm": 5.931815147399902, "learning_rate": 5.71666992401354e-09, "loss": 1.0706, "step": 66410 }, { "epoch": 19.87, "grad_norm": 7.53218412399292, "learning_rate": 5.591723820624894e-09, "loss": 0.8242, "step": 66415 }, { "epoch": 19.87, "grad_norm": 3.1983590126037598, "learning_rate": 5.468158132426737e-09, "loss": 0.9099, "step": 66420 }, { "epoch": 19.87, "grad_norm": 4.075475692749023, "learning_rate": 5.3459728662413886e-09, "loss": 0.9736, "step": 66425 }, { "epoch": 19.88, "grad_norm": 1.4461299180984497, "learning_rate": 5.225168028819005e-09, "loss": 0.944, "step": 66430 }, { "epoch": 19.88, "grad_norm": 3.7994940280914307, "learning_rate": 5.105743626829251e-09, "loss": 0.9979, "step": 66435 }, { "epoch": 19.88, "grad_norm": 2.046570301055908, "learning_rate": 4.987699666869627e-09, "loss": 1.1112, "step": 66440 }, { "epoch": 19.88, "grad_norm": 3.123607873916626, "learning_rate": 4.871036155454367e-09, "loss": 0.9295, "step": 66445 }, { "epoch": 19.88, "grad_norm": 3.029066801071167, "learning_rate": 4.755753099033866e-09, "loss": 0.9579, "step": 66450 }, { "epoch": 19.88, "grad_norm": 2.6288695335388184, "learning_rate": 4.641850503972478e-09, "loss": 1.0604, "step": 66455 }, { "epoch": 19.88, "grad_norm": 2.6345958709716797, "learning_rate": 4.529328376559616e-09, "loss": 1.0179, "step": 66460 }, { "epoch": 19.89, "grad_norm": 2.6742000579833984, "learning_rate": 4.418186723009754e-09, "loss": 1.1658, "step": 66465 }, { "epoch": 19.89, "grad_norm": 8.863636016845703, "learning_rate": 4.3084255494652e-09, "loss": 1.0605, "step": 66470 }, { "epoch": 19.89, "grad_norm": 1.21725594997406, "learning_rate": 4.200044861982222e-09, "loss": 0.8926, "step": 66475 }, { "epoch": 19.89, "grad_norm": 9.21455192565918, "learning_rate": 4.093044666547696e-09, "loss": 1.0839, "step": 66480 }, { "epoch": 19.89, "grad_norm": 3.9148738384246826, "learning_rate": 3.987424969073561e-09, "loss": 1.0616, "step": 66485 }, { "epoch": 19.89, "grad_norm": 3.932304859161377, "learning_rate": 3.883185775394038e-09, "loss": 0.8605, "step": 66490 }, { "epoch": 19.89, "grad_norm": 2.094050168991089, "learning_rate": 3.780327091262858e-09, "loss": 1.0609, "step": 66495 }, { "epoch": 19.9, "grad_norm": 2.5073344707489014, "learning_rate": 3.6788489223615884e-09, "loss": 0.935, "step": 66500 }, { "epoch": 19.9, "grad_norm": 3.42728853225708, "learning_rate": 3.5787512742940785e-09, "loss": 0.9495, "step": 66505 }, { "epoch": 19.9, "grad_norm": 2.900682210922241, "learning_rate": 3.4800341525920156e-09, "loss": 1.0571, "step": 66510 }, { "epoch": 19.9, "grad_norm": 2.075742483139038, "learning_rate": 3.3826975627038183e-09, "loss": 0.8483, "step": 66515 }, { "epoch": 19.9, "grad_norm": 1.8850741386413574, "learning_rate": 3.2867415100085176e-09, "loss": 0.9677, "step": 66520 }, { "epoch": 19.9, "grad_norm": 5.589338779449463, "learning_rate": 3.192165999801877e-09, "loss": 1.1917, "step": 66525 }, { "epoch": 19.91, "grad_norm": 2.845837116241455, "learning_rate": 3.098971037310272e-09, "loss": 0.9501, "step": 66530 }, { "epoch": 19.91, "grad_norm": 2.0691795349121094, "learning_rate": 3.0071566276795858e-09, "loss": 0.9732, "step": 66535 }, { "epoch": 19.91, "grad_norm": 2.152705430984497, "learning_rate": 2.9167227759807623e-09, "loss": 1.0665, "step": 66540 }, { "epoch": 19.91, "grad_norm": 1.9462311267852783, "learning_rate": 2.8276694872098053e-09, "loss": 1.1731, "step": 66545 }, { "epoch": 19.91, "grad_norm": 1.606532335281372, "learning_rate": 2.739996766279451e-09, "loss": 0.7424, "step": 66550 }, { "epoch": 19.91, "grad_norm": 2.7857766151428223, "learning_rate": 2.6537046180385992e-09, "loss": 0.8624, "step": 66555 }, { "epoch": 19.91, "grad_norm": 1.5321699380874634, "learning_rate": 2.5687930472501066e-09, "loss": 0.8455, "step": 66560 }, { "epoch": 19.92, "grad_norm": 3.194620370864868, "learning_rate": 2.485262058604665e-09, "loss": 1.0387, "step": 66565 }, { "epoch": 19.92, "grad_norm": 1.5754766464233398, "learning_rate": 2.4031116567124756e-09, "loss": 1.1122, "step": 66570 }, { "epoch": 19.92, "grad_norm": 2.2193338871002197, "learning_rate": 2.3223418461143507e-09, "loss": 1.1581, "step": 66575 }, { "epoch": 19.92, "grad_norm": 2.662733316421509, "learning_rate": 2.242952631270612e-09, "loss": 0.8239, "step": 66580 }, { "epoch": 19.92, "grad_norm": 2.0737709999084473, "learning_rate": 2.1649440165638635e-09, "loss": 1.1056, "step": 66585 }, { "epoch": 19.92, "grad_norm": 1.9437065124511719, "learning_rate": 2.0883160063017713e-09, "loss": 1.0208, "step": 66590 }, { "epoch": 19.92, "grad_norm": 2.8126699924468994, "learning_rate": 2.0130686047198367e-09, "loss": 0.8979, "step": 66595 }, { "epoch": 19.93, "grad_norm": 5.540438175201416, "learning_rate": 1.9392018159730684e-09, "loss": 0.9252, "step": 66600 }, { "epoch": 19.93, "grad_norm": 2.1464717388153076, "learning_rate": 1.8667156441387613e-09, "loss": 1.1339, "step": 66605 }, { "epoch": 19.93, "grad_norm": 1.9437625408172607, "learning_rate": 1.7956100932220444e-09, "loss": 0.9708, "step": 66610 }, { "epoch": 19.93, "grad_norm": 3.8403539657592773, "learning_rate": 1.7258851671503317e-09, "loss": 0.9926, "step": 66615 }, { "epoch": 19.93, "grad_norm": 2.2192623615264893, "learning_rate": 1.657540869773322e-09, "loss": 1.0566, "step": 66620 }, { "epoch": 19.93, "grad_norm": 4.014240741729736, "learning_rate": 1.5905772048629973e-09, "loss": 0.8297, "step": 66625 }, { "epoch": 19.93, "grad_norm": 4.3666181564331055, "learning_rate": 1.5249941761247276e-09, "loss": 0.9347, "step": 66630 }, { "epoch": 19.94, "grad_norm": 1.6916534900665283, "learning_rate": 1.4607917871750643e-09, "loss": 0.9515, "step": 66635 }, { "epoch": 19.94, "grad_norm": 1.9983826875686646, "learning_rate": 1.3979700415611697e-09, "loss": 0.9741, "step": 66640 }, { "epoch": 19.94, "grad_norm": 2.9387166500091553, "learning_rate": 1.3365289427524908e-09, "loss": 0.8877, "step": 66645 }, { "epoch": 19.94, "grad_norm": 2.9667611122131348, "learning_rate": 1.2764684941435346e-09, "loss": 0.8683, "step": 66650 }, { "epoch": 19.94, "grad_norm": 1.2292882204055786, "learning_rate": 1.2177886990510923e-09, "loss": 0.8254, "step": 66655 }, { "epoch": 19.94, "grad_norm": 1.751691222190857, "learning_rate": 1.1604895607142397e-09, "loss": 0.9371, "step": 66660 }, { "epoch": 19.95, "grad_norm": 2.478400945663452, "learning_rate": 1.1045710822971123e-09, "loss": 0.981, "step": 66665 }, { "epoch": 19.95, "grad_norm": 5.2517781257629395, "learning_rate": 1.0500332668916813e-09, "loss": 0.8857, "step": 66670 }, { "epoch": 19.95, "grad_norm": 3.200549840927124, "learning_rate": 9.968761175094267e-10, "loss": 1.0439, "step": 66675 }, { "epoch": 19.95, "grad_norm": 3.636068105697632, "learning_rate": 9.45099637081337e-10, "loss": 0.9732, "step": 66680 }, { "epoch": 19.95, "grad_norm": 2.145059823989868, "learning_rate": 8.947038284717879e-10, "loss": 0.7454, "step": 66685 }, { "epoch": 19.95, "grad_norm": 1.3678562641143799, "learning_rate": 8.456886944646636e-10, "loss": 1.0683, "step": 66690 }, { "epoch": 19.95, "grad_norm": 2.720142364501953, "learning_rate": 7.980542377633571e-10, "loss": 0.8569, "step": 66695 }, { "epoch": 19.96, "grad_norm": 5.969086647033691, "learning_rate": 7.51800460999097e-10, "loss": 0.9017, "step": 66700 }, { "epoch": 19.96, "grad_norm": 1.7213473320007324, "learning_rate": 7.069273667281717e-10, "loss": 0.8733, "step": 66705 }, { "epoch": 19.96, "grad_norm": 1.9708019495010376, "learning_rate": 6.634349574291543e-10, "loss": 0.978, "step": 66710 }, { "epoch": 19.96, "grad_norm": 1.8042583465576172, "learning_rate": 6.213232355029019e-10, "loss": 1.2455, "step": 66715 }, { "epoch": 19.96, "grad_norm": 1.776975393295288, "learning_rate": 5.805922032753319e-10, "loss": 0.7985, "step": 66720 }, { "epoch": 19.96, "grad_norm": 2.806536912918091, "learning_rate": 5.412418629946459e-10, "loss": 0.9752, "step": 66725 }, { "epoch": 19.96, "grad_norm": 1.0485270023345947, "learning_rate": 5.032722168396564e-10, "loss": 0.8385, "step": 66730 }, { "epoch": 19.97, "grad_norm": 2.4013590812683105, "learning_rate": 4.666832669003585e-10, "loss": 0.8629, "step": 66735 }, { "epoch": 19.97, "grad_norm": 3.498737335205078, "learning_rate": 4.3147501520013347e-10, "loss": 1.2686, "step": 66740 }, { "epoch": 19.97, "grad_norm": 2.2071971893310547, "learning_rate": 3.9764746368742277e-10, "loss": 1.0284, "step": 66745 }, { "epoch": 19.97, "grad_norm": 2.6617162227630615, "learning_rate": 3.652006142246256e-10, "loss": 0.9895, "step": 66750 }, { "epoch": 19.97, "grad_norm": 2.3580636978149414, "learning_rate": 3.3413446860475204e-10, "loss": 1.0622, "step": 66755 }, { "epoch": 19.97, "grad_norm": 2.603350877761841, "learning_rate": 3.044490285458723e-10, "loss": 0.9365, "step": 66760 }, { "epoch": 19.98, "grad_norm": 2.9707422256469727, "learning_rate": 2.7614429568834087e-10, "loss": 0.9106, "step": 66765 }, { "epoch": 19.98, "grad_norm": 1.205654501914978, "learning_rate": 2.4922027159202113e-10, "loss": 0.9287, "step": 66770 }, { "epoch": 19.98, "grad_norm": 1.2524653673171997, "learning_rate": 2.2367695774461185e-10, "loss": 1.0583, "step": 66775 }, { "epoch": 19.98, "grad_norm": 1.9343750476837158, "learning_rate": 1.9951435555887187e-10, "loss": 1.0717, "step": 66780 }, { "epoch": 19.98, "grad_norm": 2.118802309036255, "learning_rate": 1.7673246636984443e-10, "loss": 0.962, "step": 66785 }, { "epoch": 19.98, "grad_norm": 2.374420404434204, "learning_rate": 1.5533129143208146e-10, "loss": 0.9052, "step": 66790 }, { "epoch": 19.98, "grad_norm": 3.1811416149139404, "learning_rate": 1.3531083193074612e-10, "loss": 0.8696, "step": 66795 }, { "epoch": 19.99, "grad_norm": 1.0755863189697266, "learning_rate": 1.166710889677347e-10, "loss": 0.7908, "step": 66800 }, { "epoch": 19.99, "grad_norm": 4.653294086456299, "learning_rate": 9.941206357555467e-11, "loss": 0.968, "step": 66805 }, { "epoch": 19.99, "grad_norm": 1.384509563446045, "learning_rate": 8.353375670899777e-11, "loss": 1.0534, "step": 66810 }, { "epoch": 19.99, "grad_norm": 6.829671859741211, "learning_rate": 6.903616924236466e-11, "loss": 0.934, "step": 66815 }, { "epoch": 19.99, "grad_norm": 2.6626029014587402, "learning_rate": 5.591930197501594e-11, "loss": 0.9137, "step": 66820 }, { "epoch": 19.99, "grad_norm": 3.337085723876953, "learning_rate": 4.418315563414765e-11, "loss": 0.9638, "step": 66825 }, { "epoch": 19.99, "grad_norm": 2.169858694076538, "learning_rate": 3.3827730869240294e-11, "loss": 0.9035, "step": 66830 }, { "epoch": 20.0, "grad_norm": 3.450448513031006, "learning_rate": 2.4853028246507594e-11, "loss": 0.9036, "step": 66835 }, { "epoch": 20.0, "grad_norm": 2.451824188232422, "learning_rate": 1.7259048268325474e-11, "loss": 0.8429, "step": 66840 }, { "epoch": 20.0, "step": 66840, "total_flos": 3.489559903035851e+18, "train_loss": 1.1193655486238139, "train_runtime": 133767.8538, "train_samples_per_second": 3.998, "train_steps_per_second": 0.5 } ], "logging_steps": 5, "max_steps": 66840, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 100, "total_flos": 3.489559903035851e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }