{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.999850411368736,
  "eval_steps": 500,
  "global_step": 3342,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0002991772625280479,
      "grad_norm": 24.73757525746042,
      "learning_rate": 2.985074626865672e-08,
      "loss": 1.4467,
      "step": 1
    },
    {
      "epoch": 0.0014958863126402393,
      "grad_norm": 25.170481305263397,
      "learning_rate": 1.4925373134328358e-07,
      "loss": 1.418,
      "step": 5
    },
    {
      "epoch": 0.0029917726252804786,
      "grad_norm": 17.671227057151118,
      "learning_rate": 2.9850746268656716e-07,
      "loss": 1.4125,
      "step": 10
    },
    {
      "epoch": 0.004487658937920718,
      "grad_norm": 8.909500017199079,
      "learning_rate": 4.4776119402985074e-07,
      "loss": 1.3226,
      "step": 15
    },
    {
      "epoch": 0.005983545250560957,
      "grad_norm": 11.280714871339804,
      "learning_rate": 5.970149253731343e-07,
      "loss": 1.1982,
      "step": 20
    },
    {
      "epoch": 0.0074794315632011965,
      "grad_norm": 8.398369640505729,
      "learning_rate": 7.462686567164179e-07,
      "loss": 1.0909,
      "step": 25
    },
    {
      "epoch": 0.008975317875841436,
      "grad_norm": 2.9571179549426505,
      "learning_rate": 8.955223880597015e-07,
      "loss": 0.9951,
      "step": 30
    },
    {
      "epoch": 0.010471204188481676,
      "grad_norm": 2.8894837770977224,
      "learning_rate": 1.044776119402985e-06,
      "loss": 0.9586,
      "step": 35
    },
    {
      "epoch": 0.011967090501121914,
      "grad_norm": 2.324424480950711,
      "learning_rate": 1.1940298507462686e-06,
      "loss": 0.9305,
      "step": 40
    },
    {
      "epoch": 0.013462976813762155,
      "grad_norm": 2.2619500315834866,
      "learning_rate": 1.3432835820895524e-06,
      "loss": 0.91,
      "step": 45
    },
    {
      "epoch": 0.014958863126402393,
      "grad_norm": 2.3542199193134663,
      "learning_rate": 1.4925373134328358e-06,
      "loss": 0.8925,
      "step": 50
    },
    {
      "epoch": 0.016454749439042633,
      "grad_norm": 2.1942393280485444,
      "learning_rate": 1.6417910447761196e-06,
      "loss": 0.8768,
      "step": 55
    },
    {
      "epoch": 0.01795063575168287,
      "grad_norm": 2.1616387387297245,
      "learning_rate": 1.791044776119403e-06,
      "loss": 0.8723,
      "step": 60
    },
    {
      "epoch": 0.01944652206432311,
      "grad_norm": 2.2022102428237957,
      "learning_rate": 1.9402985074626867e-06,
      "loss": 0.8639,
      "step": 65
    },
    {
      "epoch": 0.020942408376963352,
      "grad_norm": 2.1621065249124585,
      "learning_rate": 2.08955223880597e-06,
      "loss": 0.8699,
      "step": 70
    },
    {
      "epoch": 0.02243829468960359,
      "grad_norm": 2.3233760195216147,
      "learning_rate": 2.238805970149254e-06,
      "loss": 0.8603,
      "step": 75
    },
    {
      "epoch": 0.02393418100224383,
      "grad_norm": 2.266487924942459,
      "learning_rate": 2.3880597014925373e-06,
      "loss": 0.8537,
      "step": 80
    },
    {
      "epoch": 0.025430067314884067,
      "grad_norm": 2.2873594748046076,
      "learning_rate": 2.537313432835821e-06,
      "loss": 0.8487,
      "step": 85
    },
    {
      "epoch": 0.02692595362752431,
      "grad_norm": 2.283742446960398,
      "learning_rate": 2.686567164179105e-06,
      "loss": 0.8413,
      "step": 90
    },
    {
      "epoch": 0.028421839940164548,
      "grad_norm": 2.1774038405712486,
      "learning_rate": 2.835820895522388e-06,
      "loss": 0.8281,
      "step": 95
    },
    {
      "epoch": 0.029917726252804786,
      "grad_norm": 2.342263870606748,
      "learning_rate": 2.9850746268656716e-06,
      "loss": 0.8305,
      "step": 100
    },
    {
      "epoch": 0.031413612565445025,
      "grad_norm": 2.385361324640983,
      "learning_rate": 3.1343283582089558e-06,
      "loss": 0.8168,
      "step": 105
    },
    {
      "epoch": 0.032909498878085267,
      "grad_norm": 2.500114420278517,
      "learning_rate": 3.283582089552239e-06,
      "loss": 0.8207,
      "step": 110
    },
    {
      "epoch": 0.0344053851907255,
      "grad_norm": 2.4374602854285286,
      "learning_rate": 3.4328358208955225e-06,
      "loss": 0.8244,
      "step": 115
    },
    {
      "epoch": 0.03590127150336574,
      "grad_norm": 2.44198563300893,
      "learning_rate": 3.582089552238806e-06,
      "loss": 0.8199,
      "step": 120
    },
    {
      "epoch": 0.037397157816005985,
      "grad_norm": 2.2760332882107157,
      "learning_rate": 3.73134328358209e-06,
      "loss": 0.8216,
      "step": 125
    },
    {
      "epoch": 0.03889304412864622,
      "grad_norm": 2.508508841712968,
      "learning_rate": 3.8805970149253735e-06,
      "loss": 0.8014,
      "step": 130
    },
    {
      "epoch": 0.04038893044128646,
      "grad_norm": 2.3029396530303066,
      "learning_rate": 4.029850746268657e-06,
      "loss": 0.7989,
      "step": 135
    },
    {
      "epoch": 0.041884816753926704,
      "grad_norm": 2.3322348191256594,
      "learning_rate": 4.17910447761194e-06,
      "loss": 0.7964,
      "step": 140
    },
    {
      "epoch": 0.04338070306656694,
      "grad_norm": 2.331622885776369,
      "learning_rate": 4.3283582089552236e-06,
      "loss": 0.8013,
      "step": 145
    },
    {
      "epoch": 0.04487658937920718,
      "grad_norm": 2.288860960559162,
      "learning_rate": 4.477611940298508e-06,
      "loss": 0.8045,
      "step": 150
    },
    {
      "epoch": 0.04637247569184742,
      "grad_norm": 2.4508630826235778,
      "learning_rate": 4.626865671641791e-06,
      "loss": 0.7898,
      "step": 155
    },
    {
      "epoch": 0.04786836200448766,
      "grad_norm": 2.3184684975983045,
      "learning_rate": 4.7761194029850745e-06,
      "loss": 0.7937,
      "step": 160
    },
    {
      "epoch": 0.0493642483171279,
      "grad_norm": 2.1613921165346826,
      "learning_rate": 4.925373134328359e-06,
      "loss": 0.7911,
      "step": 165
    },
    {
      "epoch": 0.050860134629768135,
      "grad_norm": 2.453275213296358,
      "learning_rate": 5.074626865671642e-06,
      "loss": 0.7857,
      "step": 170
    },
    {
      "epoch": 0.05235602094240838,
      "grad_norm": 2.3284898790399353,
      "learning_rate": 5.2238805970149255e-06,
      "loss": 0.7793,
      "step": 175
    },
    {
      "epoch": 0.05385190725504862,
      "grad_norm": 2.3201172049873686,
      "learning_rate": 5.37313432835821e-06,
      "loss": 0.7779,
      "step": 180
    },
    {
      "epoch": 0.055347793567688854,
      "grad_norm": 2.357711081062365,
      "learning_rate": 5.522388059701493e-06,
      "loss": 0.7847,
      "step": 185
    },
    {
      "epoch": 0.056843679880329095,
      "grad_norm": 2.7611374374886037,
      "learning_rate": 5.671641791044776e-06,
      "loss": 0.7706,
      "step": 190
    },
    {
      "epoch": 0.05833956619296934,
      "grad_norm": 2.6271055846372513,
      "learning_rate": 5.820895522388061e-06,
      "loss": 0.7607,
      "step": 195
    },
    {
      "epoch": 0.05983545250560957,
      "grad_norm": 2.376880891581398,
      "learning_rate": 5.970149253731343e-06,
      "loss": 0.7671,
      "step": 200
    },
    {
      "epoch": 0.061331338818249814,
      "grad_norm": 2.3835041311189022,
      "learning_rate": 6.119402985074627e-06,
      "loss": 0.7586,
      "step": 205
    },
    {
      "epoch": 0.06282722513089005,
      "grad_norm": 2.462749716678564,
      "learning_rate": 6.2686567164179116e-06,
      "loss": 0.7758,
      "step": 210
    },
    {
      "epoch": 0.0643231114435303,
      "grad_norm": 2.4077847282275866,
      "learning_rate": 6.417910447761194e-06,
      "loss": 0.7638,
      "step": 215
    },
    {
      "epoch": 0.06581899775617053,
      "grad_norm": 2.3747888389216616,
      "learning_rate": 6.567164179104478e-06,
      "loss": 0.761,
      "step": 220
    },
    {
      "epoch": 0.06731488406881077,
      "grad_norm": 2.493092964041521,
      "learning_rate": 6.7164179104477625e-06,
      "loss": 0.7574,
      "step": 225
    },
    {
      "epoch": 0.068810770381451,
      "grad_norm": 2.6477317119354984,
      "learning_rate": 6.865671641791045e-06,
      "loss": 0.7498,
      "step": 230
    },
    {
      "epoch": 0.07030665669409125,
      "grad_norm": 2.563921275934862,
      "learning_rate": 7.014925373134329e-06,
      "loss": 0.761,
      "step": 235
    },
    {
      "epoch": 0.07180254300673149,
      "grad_norm": 2.4406043810417257,
      "learning_rate": 7.164179104477612e-06,
      "loss": 0.7423,
      "step": 240
    },
    {
      "epoch": 0.07329842931937172,
      "grad_norm": 2.2639780494151034,
      "learning_rate": 7.313432835820896e-06,
      "loss": 0.7478,
      "step": 245
    },
    {
      "epoch": 0.07479431563201197,
      "grad_norm": 2.6132105098393628,
      "learning_rate": 7.46268656716418e-06,
      "loss": 0.7522,
      "step": 250
    },
    {
      "epoch": 0.0762902019446522,
      "grad_norm": 2.5688006092201365,
      "learning_rate": 7.611940298507463e-06,
      "loss": 0.7409,
      "step": 255
    },
    {
      "epoch": 0.07778608825729244,
      "grad_norm": 2.4383140799178564,
      "learning_rate": 7.761194029850747e-06,
      "loss": 0.7449,
      "step": 260
    },
    {
      "epoch": 0.07928197456993269,
      "grad_norm": 2.3404023048993365,
      "learning_rate": 7.91044776119403e-06,
      "loss": 0.7306,
      "step": 265
    },
    {
      "epoch": 0.08077786088257292,
      "grad_norm": 2.435305377918958,
      "learning_rate": 8.059701492537314e-06,
      "loss": 0.7464,
      "step": 270
    },
    {
      "epoch": 0.08227374719521316,
      "grad_norm": 2.9299430373461433,
      "learning_rate": 8.208955223880599e-06,
      "loss": 0.7279,
      "step": 275
    },
    {
      "epoch": 0.08376963350785341,
      "grad_norm": 2.387726009024013,
      "learning_rate": 8.35820895522388e-06,
      "loss": 0.7388,
      "step": 280
    },
    {
      "epoch": 0.08526551982049364,
      "grad_norm": 2.268041368580347,
      "learning_rate": 8.507462686567165e-06,
      "loss": 0.7407,
      "step": 285
    },
    {
      "epoch": 0.08676140613313388,
      "grad_norm": 2.408844552763582,
      "learning_rate": 8.656716417910447e-06,
      "loss": 0.7222,
      "step": 290
    },
    {
      "epoch": 0.08825729244577413,
      "grad_norm": 2.401282808607445,
      "learning_rate": 8.805970149253732e-06,
      "loss": 0.7265,
      "step": 295
    },
    {
      "epoch": 0.08975317875841436,
      "grad_norm": 2.4691438286047425,
      "learning_rate": 8.955223880597016e-06,
      "loss": 0.7239,
      "step": 300
    },
    {
      "epoch": 0.0912490650710546,
      "grad_norm": 2.3268484057723673,
      "learning_rate": 9.104477611940299e-06,
      "loss": 0.7227,
      "step": 305
    },
    {
      "epoch": 0.09274495138369485,
      "grad_norm": 2.402308618181149,
      "learning_rate": 9.253731343283582e-06,
      "loss": 0.7244,
      "step": 310
    },
    {
      "epoch": 0.09424083769633508,
      "grad_norm": 2.3361311735184604,
      "learning_rate": 9.402985074626867e-06,
      "loss": 0.7263,
      "step": 315
    },
    {
      "epoch": 0.09573672400897532,
      "grad_norm": 2.335173501165771,
      "learning_rate": 9.552238805970149e-06,
      "loss": 0.7215,
      "step": 320
    },
    {
      "epoch": 0.09723261032161555,
      "grad_norm": 2.730322757042367,
      "learning_rate": 9.701492537313434e-06,
      "loss": 0.7332,
      "step": 325
    },
    {
      "epoch": 0.0987284966342558,
      "grad_norm": 2.2835032077275312,
      "learning_rate": 9.850746268656717e-06,
      "loss": 0.7115,
      "step": 330
    },
    {
      "epoch": 0.10022438294689603,
      "grad_norm": 2.4782326787594338,
      "learning_rate": 1e-05,
      "loss": 0.7142,
      "step": 335
    },
    {
      "epoch": 0.10172026925953627,
      "grad_norm": 2.4128411535499454,
      "learning_rate": 9.999931779967976e-06,
      "loss": 0.7108,
      "step": 340
    },
    {
      "epoch": 0.10321615557217652,
      "grad_norm": 2.230247650641441,
      "learning_rate": 9.99972712173349e-06,
      "loss": 0.7037,
      "step": 345
    },
    {
      "epoch": 0.10471204188481675,
      "grad_norm": 2.4375274487235363,
      "learning_rate": 9.999386030881264e-06,
      "loss": 0.7052,
      "step": 350
    },
    {
      "epoch": 0.10620792819745699,
      "grad_norm": 2.497148482639146,
      "learning_rate": 9.998908516718984e-06,
      "loss": 0.723,
      "step": 355
    },
    {
      "epoch": 0.10770381451009724,
      "grad_norm": 2.7572554578379793,
      "learning_rate": 9.998294592277064e-06,
      "loss": 0.7089,
      "step": 360
    },
    {
      "epoch": 0.10919970082273747,
      "grad_norm": 2.4811541468806175,
      "learning_rate": 9.997544274308282e-06,
      "loss": 0.7049,
      "step": 365
    },
    {
      "epoch": 0.11069558713537771,
      "grad_norm": 2.3612905719078556,
      "learning_rate": 9.996657583287326e-06,
      "loss": 0.7112,
      "step": 370
    },
    {
      "epoch": 0.11219147344801796,
      "grad_norm": 2.3269094749093346,
      "learning_rate": 9.995634543410231e-06,
      "loss": 0.7091,
      "step": 375
    },
    {
      "epoch": 0.11368735976065819,
      "grad_norm": 2.1856434021067543,
      "learning_rate": 9.99447518259372e-06,
      "loss": 0.6945,
      "step": 380
    },
    {
      "epoch": 0.11518324607329843,
      "grad_norm": 2.2818233887085677,
      "learning_rate": 9.99317953247445e-06,
      "loss": 0.695,
      "step": 385
    },
    {
      "epoch": 0.11667913238593867,
      "grad_norm": 2.5644750677779267,
      "learning_rate": 9.991747628408138e-06,
      "loss": 0.6878,
      "step": 390
    },
    {
      "epoch": 0.11817501869857891,
      "grad_norm": 2.485293875776813,
      "learning_rate": 9.990179509468595e-06,
      "loss": 0.6987,
      "step": 395
    },
    {
      "epoch": 0.11967090501121914,
      "grad_norm": 2.291043542367887,
      "learning_rate": 9.988475218446676e-06,
      "loss": 0.6898,
      "step": 400
    },
    {
      "epoch": 0.1211667913238594,
      "grad_norm": 2.4232767050288726,
      "learning_rate": 9.986634801849093e-06,
      "loss": 0.6963,
      "step": 405
    },
    {
      "epoch": 0.12266267763649963,
      "grad_norm": 2.4211336275289512,
      "learning_rate": 9.984658309897161e-06,
      "loss": 0.6793,
      "step": 410
    },
    {
      "epoch": 0.12415856394913986,
      "grad_norm": 2.5029979080579734,
      "learning_rate": 9.982545796525416e-06,
      "loss": 0.6773,
      "step": 415
    },
    {
      "epoch": 0.1256544502617801,
      "grad_norm": 2.4668720551969487,
      "learning_rate": 9.980297319380148e-06,
      "loss": 0.6741,
      "step": 420
    },
    {
      "epoch": 0.12715033657442035,
      "grad_norm": 2.1324057406570796,
      "learning_rate": 9.977912939817833e-06,
      "loss": 0.6717,
      "step": 425
    },
    {
      "epoch": 0.1286462228870606,
      "grad_norm": 2.216513918034811,
      "learning_rate": 9.97539272290345e-06,
      "loss": 0.664,
      "step": 430
    },
    {
      "epoch": 0.13014210919970082,
      "grad_norm": 2.3891946944700346,
      "learning_rate": 9.97273673740871e-06,
      "loss": 0.6779,
      "step": 435
    },
    {
      "epoch": 0.13163799551234107,
      "grad_norm": 2.107297355911597,
      "learning_rate": 9.96994505581018e-06,
      "loss": 0.663,
      "step": 440
    },
    {
      "epoch": 0.13313388182498131,
      "grad_norm": 2.2741240440107666,
      "learning_rate": 9.967017754287303e-06,
      "loss": 0.6628,
      "step": 445
    },
    {
      "epoch": 0.13462976813762154,
      "grad_norm": 2.231118541487464,
      "learning_rate": 9.963954912720319e-06,
      "loss": 0.6805,
      "step": 450
    },
    {
      "epoch": 0.13612565445026178,
      "grad_norm": 2.327411278722037,
      "learning_rate": 9.960756614688089e-06,
      "loss": 0.6572,
      "step": 455
    },
    {
      "epoch": 0.137621540762902,
      "grad_norm": 2.2727646648145097,
      "learning_rate": 9.957422947465814e-06,
      "loss": 0.6682,
      "step": 460
    },
    {
      "epoch": 0.13911742707554225,
      "grad_norm": 2.43427967377174,
      "learning_rate": 9.953954002022643e-06,
      "loss": 0.658,
      "step": 465
    },
    {
      "epoch": 0.1406133133881825,
      "grad_norm": 2.203173002529278,
      "learning_rate": 9.950349873019204e-06,
      "loss": 0.6513,
      "step": 470
    },
    {
      "epoch": 0.14210919970082272,
      "grad_norm": 2.159064147239943,
      "learning_rate": 9.946610658805018e-06,
      "loss": 0.6597,
      "step": 475
    },
    {
      "epoch": 0.14360508601346297,
      "grad_norm": 2.2802374368293186,
      "learning_rate": 9.94273646141581e-06,
      "loss": 0.6642,
      "step": 480
    },
    {
      "epoch": 0.14510097232610322,
      "grad_norm": 2.321550706239028,
      "learning_rate": 9.938727386570727e-06,
      "loss": 0.6525,
      "step": 485
    },
    {
      "epoch": 0.14659685863874344,
      "grad_norm": 2.3398188402263105,
      "learning_rate": 9.934583543669454e-06,
      "loss": 0.6583,
      "step": 490
    },
    {
      "epoch": 0.1480927449513837,
      "grad_norm": 2.1439110014914524,
      "learning_rate": 9.93030504578923e-06,
      "loss": 0.6413,
      "step": 495
    },
    {
      "epoch": 0.14958863126402394,
      "grad_norm": 2.2275265346511377,
      "learning_rate": 9.925892009681762e-06,
      "loss": 0.6529,
      "step": 500
    },
    {
      "epoch": 0.15108451757666416,
      "grad_norm": 2.3496939081419637,
      "learning_rate": 9.921344555770033e-06,
      "loss": 0.6437,
      "step": 505
    },
    {
      "epoch": 0.1525804038893044,
      "grad_norm": 2.238484219281493,
      "learning_rate": 9.916662808145023e-06,
      "loss": 0.6452,
      "step": 510
    },
    {
      "epoch": 0.15407629020194466,
      "grad_norm": 2.78908558811821,
      "learning_rate": 9.911846894562325e-06,
      "loss": 0.6436,
      "step": 515
    },
    {
      "epoch": 0.15557217651458488,
      "grad_norm": 2.320928708686177,
      "learning_rate": 9.906896946438646e-06,
      "loss": 0.6336,
      "step": 520
    },
    {
      "epoch": 0.15706806282722513,
      "grad_norm": 2.2586199846671686,
      "learning_rate": 9.901813098848238e-06,
      "loss": 0.6338,
      "step": 525
    },
    {
      "epoch": 0.15856394913986538,
      "grad_norm": 2.3116521162760217,
      "learning_rate": 9.896595490519196e-06,
      "loss": 0.6414,
      "step": 530
    },
    {
      "epoch": 0.1600598354525056,
      "grad_norm": 2.1311643830360767,
      "learning_rate": 9.891244263829685e-06,
      "loss": 0.64,
      "step": 535
    },
    {
      "epoch": 0.16155572176514585,
      "grad_norm": 2.3201652793369605,
      "learning_rate": 9.885759564804045e-06,
      "loss": 0.6197,
      "step": 540
    },
    {
      "epoch": 0.1630516080777861,
      "grad_norm": 2.1802123067545134,
      "learning_rate": 9.880141543108816e-06,
      "loss": 0.6354,
      "step": 545
    },
    {
      "epoch": 0.16454749439042632,
      "grad_norm": 2.3111352831943086,
      "learning_rate": 9.874390352048646e-06,
      "loss": 0.6422,
      "step": 550
    },
    {
      "epoch": 0.16604338070306657,
      "grad_norm": 2.3857931202103524,
      "learning_rate": 9.868506148562107e-06,
      "loss": 0.6255,
      "step": 555
    },
    {
      "epoch": 0.16753926701570682,
      "grad_norm": 2.3118891681947518,
      "learning_rate": 9.862489093217422e-06,
      "loss": 0.6123,
      "step": 560
    },
    {
      "epoch": 0.16903515332834704,
      "grad_norm": 2.3891897641974165,
      "learning_rate": 9.856339350208073e-06,
      "loss": 0.6426,
      "step": 565
    },
    {
      "epoch": 0.1705310396409873,
      "grad_norm": 2.305906878734901,
      "learning_rate": 9.850057087348328e-06,
      "loss": 0.6199,
      "step": 570
    },
    {
      "epoch": 0.17202692595362754,
      "grad_norm": 2.1960382748129432,
      "learning_rate": 9.843642476068654e-06,
      "loss": 0.6095,
      "step": 575
    },
    {
      "epoch": 0.17352281226626776,
      "grad_norm": 2.0839495395902534,
      "learning_rate": 9.837095691411047e-06,
      "loss": 0.6131,
      "step": 580
    },
    {
      "epoch": 0.175018698578908,
      "grad_norm": 2.4685394970589685,
      "learning_rate": 9.83041691202425e-06,
      "loss": 0.6257,
      "step": 585
    },
    {
      "epoch": 0.17651458489154825,
      "grad_norm": 2.8548483464223957,
      "learning_rate": 9.82360632015888e-06,
      "loss": 0.5935,
      "step": 590
    },
    {
      "epoch": 0.17801047120418848,
      "grad_norm": 2.5687866778693347,
      "learning_rate": 9.816664101662458e-06,
      "loss": 0.6176,
      "step": 595
    },
    {
      "epoch": 0.17950635751682872,
      "grad_norm": 2.1643123544103497,
      "learning_rate": 9.809590445974328e-06,
      "loss": 0.6236,
      "step": 600
    },
    {
      "epoch": 0.18100224382946897,
      "grad_norm": 2.1920911452788023,
      "learning_rate": 9.802385546120498e-06,
      "loss": 0.6149,
      "step": 605
    },
    {
      "epoch": 0.1824981301421092,
      "grad_norm": 2.1719167623114046,
      "learning_rate": 9.795049598708369e-06,
      "loss": 0.6165,
      "step": 610
    },
    {
      "epoch": 0.18399401645474944,
      "grad_norm": 2.045624267196742,
      "learning_rate": 9.787582803921366e-06,
      "loss": 0.6056,
      "step": 615
    },
    {
      "epoch": 0.1854899027673897,
      "grad_norm": 2.1670193890658105,
      "learning_rate": 9.77998536551348e-06,
      "loss": 0.583,
      "step": 620
    },
    {
      "epoch": 0.1869857890800299,
      "grad_norm": 2.143005021612413,
      "learning_rate": 9.77225749080371e-06,
      "loss": 0.6025,
      "step": 625
    },
    {
      "epoch": 0.18848167539267016,
      "grad_norm": 2.2897606994593733,
      "learning_rate": 9.764399390670401e-06,
      "loss": 0.6044,
      "step": 630
    },
    {
      "epoch": 0.18997756170531038,
      "grad_norm": 2.1407407791372304,
      "learning_rate": 9.756411279545486e-06,
      "loss": 0.6028,
      "step": 635
    },
    {
      "epoch": 0.19147344801795063,
      "grad_norm": 2.1400040414477512,
      "learning_rate": 9.748293375408647e-06,
      "loss": 0.6008,
      "step": 640
    },
    {
      "epoch": 0.19296933433059088,
      "grad_norm": 2.3487555741055646,
      "learning_rate": 9.740045899781353e-06,
      "loss": 0.5905,
      "step": 645
    },
    {
      "epoch": 0.1944652206432311,
      "grad_norm": 2.211663714643132,
      "learning_rate": 9.731669077720828e-06,
      "loss": 0.5834,
      "step": 650
    },
    {
      "epoch": 0.19596110695587135,
      "grad_norm": 2.188161715718423,
      "learning_rate": 9.723163137813898e-06,
      "loss": 0.5855,
      "step": 655
    },
    {
      "epoch": 0.1974569932685116,
      "grad_norm": 2.133955120338045,
      "learning_rate": 9.714528312170762e-06,
      "loss": 0.5944,
      "step": 660
    },
    {
      "epoch": 0.19895287958115182,
      "grad_norm": 2.2340780975578527,
      "learning_rate": 9.705764836418648e-06,
      "loss": 0.583,
      "step": 665
    },
    {
      "epoch": 0.20044876589379207,
      "grad_norm": 2.3292781920189936,
      "learning_rate": 9.696872949695399e-06,
      "loss": 0.5827,
      "step": 670
    },
    {
      "epoch": 0.20194465220643232,
      "grad_norm": 2.3176955302107647,
      "learning_rate": 9.687852894642932e-06,
      "loss": 0.584,
      "step": 675
    },
    {
      "epoch": 0.20344053851907254,
      "grad_norm": 2.2410986216187863,
      "learning_rate": 9.678704917400628e-06,
      "loss": 0.5702,
      "step": 680
    },
    {
      "epoch": 0.2049364248317128,
      "grad_norm": 2.2113552696479766,
      "learning_rate": 9.669429267598603e-06,
      "loss": 0.5656,
      "step": 685
    },
    {
      "epoch": 0.20643231114435304,
      "grad_norm": 2.1894234586204613,
      "learning_rate": 9.660026198350906e-06,
      "loss": 0.5688,
      "step": 690
    },
    {
      "epoch": 0.20792819745699326,
      "grad_norm": 2.2894157314528183,
      "learning_rate": 9.650495966248618e-06,
      "loss": 0.5563,
      "step": 695
    },
    {
      "epoch": 0.2094240837696335,
      "grad_norm": 2.2231586059805863,
      "learning_rate": 9.64083883135283e-06,
      "loss": 0.5642,
      "step": 700
    },
    {
      "epoch": 0.21091997008227376,
      "grad_norm": 2.227615707267463,
      "learning_rate": 9.631055057187564e-06,
      "loss": 0.5788,
      "step": 705
    },
    {
      "epoch": 0.21241585639491398,
      "grad_norm": 2.155741018622304,
      "learning_rate": 9.621144910732573e-06,
      "loss": 0.5634,
      "step": 710
    },
    {
      "epoch": 0.21391174270755423,
      "grad_norm": 2.396343334926677,
      "learning_rate": 9.611108662416064e-06,
      "loss": 0.5655,
      "step": 715
    },
    {
      "epoch": 0.21540762902019447,
      "grad_norm": 2.331449791458783,
      "learning_rate": 9.600946586107306e-06,
      "loss": 0.5739,
      "step": 720
    },
    {
      "epoch": 0.2169035153328347,
      "grad_norm": 2.2507152546219924,
      "learning_rate": 9.590658959109168e-06,
      "loss": 0.5768,
      "step": 725
    },
    {
      "epoch": 0.21839940164547494,
      "grad_norm": 2.164980578292193,
      "learning_rate": 9.58024606215055e-06,
      "loss": 0.5517,
      "step": 730
    },
    {
      "epoch": 0.2198952879581152,
      "grad_norm": 2.2186056393230484,
      "learning_rate": 9.569708179378716e-06,
      "loss": 0.5773,
      "step": 735
    },
    {
      "epoch": 0.22139117427075541,
      "grad_norm": 2.1412265933937245,
      "learning_rate": 9.559045598351544e-06,
      "loss": 0.5597,
      "step": 740
    },
    {
      "epoch": 0.22288706058339566,
      "grad_norm": 2.113998854082962,
      "learning_rate": 9.548258610029684e-06,
      "loss": 0.5602,
      "step": 745
    },
    {
      "epoch": 0.2243829468960359,
      "grad_norm": 2.1066935794719823,
      "learning_rate": 9.537347508768613e-06,
      "loss": 0.553,
      "step": 750
    },
    {
      "epoch": 0.22587883320867613,
      "grad_norm": 2.1269652854319285,
      "learning_rate": 9.526312592310597e-06,
      "loss": 0.5462,
      "step": 755
    },
    {
      "epoch": 0.22737471952131638,
      "grad_norm": 2.1421869014604966,
      "learning_rate": 9.515154161776584e-06,
      "loss": 0.5508,
      "step": 760
    },
    {
      "epoch": 0.22887060583395663,
      "grad_norm": 2.116284198421969,
      "learning_rate": 9.503872521657964e-06,
      "loss": 0.549,
      "step": 765
    },
    {
      "epoch": 0.23036649214659685,
      "grad_norm": 2.0774732327342673,
      "learning_rate": 9.49246797980828e-06,
      "loss": 0.5485,
      "step": 770
    },
    {
      "epoch": 0.2318623784592371,
      "grad_norm": 2.276120847003367,
      "learning_rate": 9.480940847434814e-06,
      "loss": 0.5553,
      "step": 775
    },
    {
      "epoch": 0.23335826477187735,
      "grad_norm": 2.1356056201671882,
      "learning_rate": 9.469291439090104e-06,
      "loss": 0.5465,
      "step": 780
    },
    {
      "epoch": 0.23485415108451757,
      "grad_norm": 2.048373811826588,
      "learning_rate": 9.457520072663353e-06,
      "loss": 0.5396,
      "step": 785
    },
    {
      "epoch": 0.23635003739715782,
      "grad_norm": 2.2466734007706397,
      "learning_rate": 9.445627069371758e-06,
      "loss": 0.5688,
      "step": 790
    },
    {
      "epoch": 0.23784592370979807,
      "grad_norm": 2.3976619549715292,
      "learning_rate": 9.433612753751748e-06,
      "loss": 0.5496,
      "step": 795
    },
    {
      "epoch": 0.2393418100224383,
      "grad_norm": 2.0982203268057793,
      "learning_rate": 9.421477453650118e-06,
      "loss": 0.5482,
      "step": 800
    },
    {
      "epoch": 0.24083769633507854,
      "grad_norm": 2.1926594347223936,
      "learning_rate": 9.409221500215096e-06,
      "loss": 0.5281,
      "step": 805
    },
    {
      "epoch": 0.2423335826477188,
      "grad_norm": 2.046500172753204,
      "learning_rate": 9.396845227887295e-06,
      "loss": 0.5495,
      "step": 810
    },
    {
      "epoch": 0.243829468960359,
      "grad_norm": 2.116270403530158,
      "learning_rate": 9.38434897439059e-06,
      "loss": 0.5333,
      "step": 815
    },
    {
      "epoch": 0.24532535527299926,
      "grad_norm": 2.1427393113292026,
      "learning_rate": 9.371733080722911e-06,
      "loss": 0.5314,
      "step": 820
    },
    {
      "epoch": 0.24682124158563948,
      "grad_norm": 2.2287931226941766,
      "learning_rate": 9.358997891146924e-06,
      "loss": 0.5389,
      "step": 825
    },
    {
      "epoch": 0.24831712789827973,
      "grad_norm": 2.183511996335904,
      "learning_rate": 9.346143753180646e-06,
      "loss": 0.5332,
      "step": 830
    },
    {
      "epoch": 0.24981301421091998,
      "grad_norm": 2.1563125330336077,
      "learning_rate": 9.333171017587956e-06,
      "loss": 0.5278,
      "step": 835
    },
    {
      "epoch": 0.2513089005235602,
      "grad_norm": 2.384672087516804,
      "learning_rate": 9.320080038369032e-06,
      "loss": 0.5321,
      "step": 840
    },
    {
      "epoch": 0.25280478683620045,
      "grad_norm": 2.2250998536771154,
      "learning_rate": 9.30687117275068e-06,
      "loss": 0.5237,
      "step": 845
    },
    {
      "epoch": 0.2543006731488407,
      "grad_norm": 2.3295538202244237,
      "learning_rate": 9.293544781176598e-06,
      "loss": 0.5238,
      "step": 850
    },
    {
      "epoch": 0.25579655946148094,
      "grad_norm": 2.46386287871832,
      "learning_rate": 9.280101227297526e-06,
      "loss": 0.5274,
      "step": 855
    },
    {
      "epoch": 0.2572924457741212,
      "grad_norm": 2.2480305463427865,
      "learning_rate": 9.266540877961337e-06,
      "loss": 0.535,
      "step": 860
    },
    {
      "epoch": 0.2587883320867614,
      "grad_norm": 2.1850110027540826,
      "learning_rate": 9.252864103203015e-06,
      "loss": 0.5216,
      "step": 865
    },
    {
      "epoch": 0.26028421839940163,
      "grad_norm": 2.1759114077528845,
      "learning_rate": 9.239071276234568e-06,
      "loss": 0.5162,
      "step": 870
    },
    {
      "epoch": 0.2617801047120419,
      "grad_norm": 2.1338769320741515,
      "learning_rate": 9.225162773434831e-06,
      "loss": 0.5143,
      "step": 875
    },
    {
      "epoch": 0.26327599102468213,
      "grad_norm": 2.1659203361390063,
      "learning_rate": 9.21113897433921e-06,
      "loss": 0.5103,
      "step": 880
    },
    {
      "epoch": 0.2647718773373224,
      "grad_norm": 2.122282430960376,
      "learning_rate": 9.197000261629314e-06,
      "loss": 0.5081,
      "step": 885
    },
    {
      "epoch": 0.26626776364996263,
      "grad_norm": 2.056748593014802,
      "learning_rate": 9.182747021122516e-06,
      "loss": 0.5117,
      "step": 890
    },
    {
      "epoch": 0.2677636499626028,
      "grad_norm": 2.203097118962648,
      "learning_rate": 9.168379641761425e-06,
      "loss": 0.5166,
      "step": 895
    },
    {
      "epoch": 0.26925953627524307,
      "grad_norm": 2.248299702751712,
      "learning_rate": 9.153898515603272e-06,
      "loss": 0.5121,
      "step": 900
    },
    {
      "epoch": 0.2707554225878833,
      "grad_norm": 2.215308947297488,
      "learning_rate": 9.139304037809216e-06,
      "loss": 0.5151,
      "step": 905
    },
    {
      "epoch": 0.27225130890052357,
      "grad_norm": 2.115586467592,
      "learning_rate": 9.124596606633551e-06,
      "loss": 0.5083,
      "step": 910
    },
    {
      "epoch": 0.2737471952131638,
      "grad_norm": 2.2977950459018017,
      "learning_rate": 9.10977662341285e-06,
      "loss": 0.5153,
      "step": 915
    },
    {
      "epoch": 0.275243081525804,
      "grad_norm": 2.205780583800523,
      "learning_rate": 9.094844492555004e-06,
      "loss": 0.5123,
      "step": 920
    },
    {
      "epoch": 0.27673896783844426,
      "grad_norm": 2.227802917043228,
      "learning_rate": 9.07980062152819e-06,
      "loss": 0.5117,
      "step": 925
    },
    {
      "epoch": 0.2782348541510845,
      "grad_norm": 2.2359783620231632,
      "learning_rate": 9.064645420849754e-06,
      "loss": 0.5022,
      "step": 930
    },
    {
      "epoch": 0.27973074046372476,
      "grad_norm": 2.1642613110172366,
      "learning_rate": 9.049379304075009e-06,
      "loss": 0.4907,
      "step": 935
    },
    {
      "epoch": 0.281226626776365,
      "grad_norm": 2.2277389804733447,
      "learning_rate": 9.03400268778594e-06,
      "loss": 0.5011,
      "step": 940
    },
    {
      "epoch": 0.28272251308900526,
      "grad_norm": 2.1493583853918907,
      "learning_rate": 9.018515991579851e-06,
      "loss": 0.5019,
      "step": 945
    },
    {
      "epoch": 0.28421839940164545,
      "grad_norm": 2.4395894627674073,
      "learning_rate": 9.002919638057908e-06,
      "loss": 0.5033,
      "step": 950
    },
    {
      "epoch": 0.2857142857142857,
      "grad_norm": 2.2370400153506806,
      "learning_rate": 8.987214052813605e-06,
      "loss": 0.5045,
      "step": 955
    },
    {
      "epoch": 0.28721017202692595,
      "grad_norm": 2.078576437577485,
      "learning_rate": 8.971399664421154e-06,
      "loss": 0.5009,
      "step": 960
    },
    {
      "epoch": 0.2887060583395662,
      "grad_norm": 2.2142839400817937,
      "learning_rate": 8.955476904423785e-06,
      "loss": 0.5023,
      "step": 965
    },
    {
      "epoch": 0.29020194465220644,
      "grad_norm": 2.14232609513754,
      "learning_rate": 8.939446207321982e-06,
      "loss": 0.477,
      "step": 970
    },
    {
      "epoch": 0.2916978309648467,
      "grad_norm": 2.21107323554905,
      "learning_rate": 8.923308010561608e-06,
      "loss": 0.4994,
      "step": 975
    },
    {
      "epoch": 0.2931937172774869,
      "grad_norm": 2.1386395431438054,
      "learning_rate": 8.907062754521985e-06,
      "loss": 0.5023,
      "step": 980
    },
    {
      "epoch": 0.29468960359012714,
      "grad_norm": 2.1332355719651037,
      "learning_rate": 8.89071088250387e-06,
      "loss": 0.4843,
      "step": 985
    },
    {
      "epoch": 0.2961854899027674,
      "grad_norm": 2.0749503641930276,
      "learning_rate": 8.87425284071735e-06,
      "loss": 0.4942,
      "step": 990
    },
    {
      "epoch": 0.29768137621540763,
      "grad_norm": 2.159991846647922,
      "learning_rate": 8.857689078269688e-06,
      "loss": 0.5108,
      "step": 995
    },
    {
      "epoch": 0.2991772625280479,
      "grad_norm": 2.1267522505598446,
      "learning_rate": 8.841020047153039e-06,
      "loss": 0.4935,
      "step": 1000
    },
    {
      "epoch": 0.30067314884068813,
      "grad_norm": 2.1642503588715245,
      "learning_rate": 8.824246202232142e-06,
      "loss": 0.4907,
      "step": 1005
    },
    {
      "epoch": 0.3021690351533283,
      "grad_norm": 2.084991570149356,
      "learning_rate": 8.80736800123189e-06,
      "loss": 0.4781,
      "step": 1010
    },
    {
      "epoch": 0.3036649214659686,
      "grad_norm": 2.1035440822771223,
      "learning_rate": 8.790385904724848e-06,
      "loss": 0.4845,
      "step": 1015
    },
    {
      "epoch": 0.3051608077786088,
      "grad_norm": 2.1736909744601687,
      "learning_rate": 8.773300376118685e-06,
      "loss": 0.4801,
      "step": 1020
    },
    {
      "epoch": 0.30665669409124907,
      "grad_norm": 2.2520314938860815,
      "learning_rate": 8.75611188164352e-06,
      "loss": 0.4893,
      "step": 1025
    },
    {
      "epoch": 0.3081525804038893,
      "grad_norm": 2.1104641749948403,
      "learning_rate": 8.738820890339217e-06,
      "loss": 0.4938,
      "step": 1030
    },
    {
      "epoch": 0.30964846671652957,
      "grad_norm": 2.0838403753220986,
      "learning_rate": 8.721427874042563e-06,
      "loss": 0.4835,
      "step": 1035
    },
    {
      "epoch": 0.31114435302916976,
      "grad_norm": 2.0711510810184266,
      "learning_rate": 8.703933307374413e-06,
      "loss": 0.4725,
      "step": 1040
    },
    {
      "epoch": 0.31264023934181,
      "grad_norm": 2.1063779245743888,
      "learning_rate": 8.686337667726723e-06,
      "loss": 0.4892,
      "step": 1045
    },
    {
      "epoch": 0.31413612565445026,
      "grad_norm": 2.1105067703269422,
      "learning_rate": 8.668641435249534e-06,
      "loss": 0.4825,
      "step": 1050
    },
    {
      "epoch": 0.3156320119670905,
      "grad_norm": 2.102573408737706,
      "learning_rate": 8.650845092837867e-06,
      "loss": 0.4885,
      "step": 1055
    },
    {
      "epoch": 0.31712789827973076,
      "grad_norm": 2.2988609972066274,
      "learning_rate": 8.632949126118538e-06,
      "loss": 0.4752,
      "step": 1060
    },
    {
      "epoch": 0.318623784592371,
      "grad_norm": 2.122502919871484,
      "learning_rate": 8.61495402343692e-06,
      "loss": 0.4769,
      "step": 1065
    },
    {
      "epoch": 0.3201196709050112,
      "grad_norm": 2.165018274340972,
      "learning_rate": 8.596860275843602e-06,
      "loss": 0.4671,
      "step": 1070
    },
    {
      "epoch": 0.32161555721765145,
      "grad_norm": 1.9717223958070753,
      "learning_rate": 8.578668377081001e-06,
      "loss": 0.4675,
      "step": 1075
    },
    {
      "epoch": 0.3231114435302917,
      "grad_norm": 2.1031743583556803,
      "learning_rate": 8.560378823569886e-06,
      "loss": 0.4713,
      "step": 1080
    },
    {
      "epoch": 0.32460732984293195,
      "grad_norm": 2.0178473800411307,
      "learning_rate": 8.541992114395825e-06,
      "loss": 0.4715,
      "step": 1085
    },
    {
      "epoch": 0.3261032161555722,
      "grad_norm": 2.0225831073597007,
      "learning_rate": 8.523508751295574e-06,
      "loss": 0.4772,
      "step": 1090
    },
    {
      "epoch": 0.3275991024682124,
      "grad_norm": 2.087877364586164,
      "learning_rate": 8.504929238643381e-06,
      "loss": 0.4688,
      "step": 1095
    },
    {
      "epoch": 0.32909498878085264,
      "grad_norm": 2.160270876260719,
      "learning_rate": 8.486254083437227e-06,
      "loss": 0.4665,
      "step": 1100
    },
    {
      "epoch": 0.3305908750934929,
      "grad_norm": 2.060627567407879,
      "learning_rate": 8.467483795284987e-06,
      "loss": 0.4617,
      "step": 1105
    },
    {
      "epoch": 0.33208676140613314,
      "grad_norm": 2.1150833498354893,
      "learning_rate": 8.448618886390523e-06,
      "loss": 0.4676,
      "step": 1110
    },
    {
      "epoch": 0.3335826477187734,
      "grad_norm": 2.026961078510351,
      "learning_rate": 8.429659871539709e-06,
      "loss": 0.4772,
      "step": 1115
    },
    {
      "epoch": 0.33507853403141363,
      "grad_norm": 2.6128907908421852,
      "learning_rate": 8.410607268086388e-06,
      "loss": 0.4678,
      "step": 1120
    },
    {
      "epoch": 0.3365744203440538,
      "grad_norm": 2.1162204368840185,
      "learning_rate": 8.391461595938245e-06,
      "loss": 0.4728,
      "step": 1125
    },
    {
      "epoch": 0.3380703066566941,
      "grad_norm": 2.0236924938571095,
      "learning_rate": 8.372223377542631e-06,
      "loss": 0.4556,
      "step": 1130
    },
    {
      "epoch": 0.3395661929693343,
      "grad_norm": 2.0470011404134345,
      "learning_rate": 8.352893137872292e-06,
      "loss": 0.4476,
      "step": 1135
    },
    {
      "epoch": 0.3410620792819746,
      "grad_norm": 2.100726525573022,
      "learning_rate": 8.333471404411054e-06,
      "loss": 0.458,
      "step": 1140
    },
    {
      "epoch": 0.3425579655946148,
      "grad_norm": 2.162727675316811,
      "learning_rate": 8.313958707139434e-06,
      "loss": 0.4751,
      "step": 1145
    },
    {
      "epoch": 0.34405385190725507,
      "grad_norm": 2.0116970709952495,
      "learning_rate": 8.29435557852016e-06,
      "loss": 0.4647,
      "step": 1150
    },
    {
      "epoch": 0.34554973821989526,
      "grad_norm": 1.9814728402387116,
      "learning_rate": 8.274662553483662e-06,
      "loss": 0.4441,
      "step": 1155
    },
    {
      "epoch": 0.3470456245325355,
      "grad_norm": 2.0453867973962607,
      "learning_rate": 8.254880169413455e-06,
      "loss": 0.4613,
      "step": 1160
    },
    {
      "epoch": 0.34854151084517576,
      "grad_norm": 2.0854663750868268,
      "learning_rate": 8.235008966131492e-06,
      "loss": 0.456,
      "step": 1165
    },
    {
      "epoch": 0.350037397157816,
      "grad_norm": 2.1204249951123706,
      "learning_rate": 8.215049485883419e-06,
      "loss": 0.4526,
      "step": 1170
    },
    {
      "epoch": 0.35153328347045626,
      "grad_norm": 2.125080372850005,
      "learning_rate": 8.195002273323792e-06,
      "loss": 0.4442,
      "step": 1175
    },
    {
      "epoch": 0.3530291697830965,
      "grad_norm": 2.2626876387499224,
      "learning_rate": 8.174867875501203e-06,
      "loss": 0.4491,
      "step": 1180
    },
    {
      "epoch": 0.3545250560957367,
      "grad_norm": 2.17644103793076,
      "learning_rate": 8.154646841843358e-06,
      "loss": 0.449,
      "step": 1185
    },
    {
      "epoch": 0.35602094240837695,
      "grad_norm": 1.9934405786856697,
      "learning_rate": 8.134339724142083e-06,
      "loss": 0.4491,
      "step": 1190
    },
    {
      "epoch": 0.3575168287210172,
      "grad_norm": 1.9811124546772585,
      "learning_rate": 8.113947076538264e-06,
      "loss": 0.4412,
      "step": 1195
    },
    {
      "epoch": 0.35901271503365745,
      "grad_norm": 2.1197485018681785,
      "learning_rate": 8.093469455506731e-06,
      "loss": 0.4448,
      "step": 1200
    },
    {
      "epoch": 0.3605086013462977,
      "grad_norm": 2.0582968984341967,
      "learning_rate": 8.07290741984107e-06,
      "loss": 0.4397,
      "step": 1205
    },
    {
      "epoch": 0.36200448765893795,
      "grad_norm": 1.9803742197531462,
      "learning_rate": 8.052261530638375e-06,
      "loss": 0.4486,
      "step": 1210
    },
    {
      "epoch": 0.36350037397157814,
      "grad_norm": 1.9763814917893987,
      "learning_rate": 8.03153235128393e-06,
      "loss": 0.4379,
      "step": 1215
    },
    {
      "epoch": 0.3649962602842184,
      "grad_norm": 2.177684384739003,
      "learning_rate": 8.01072044743585e-06,
      "loss": 0.4448,
      "step": 1220
    },
    {
      "epoch": 0.36649214659685864,
      "grad_norm": 2.2177683069308047,
      "learning_rate": 7.989826387009634e-06,
      "loss": 0.4398,
      "step": 1225
    },
    {
      "epoch": 0.3679880329094989,
      "grad_norm": 2.0614298881537416,
      "learning_rate": 7.96885074016267e-06,
      "loss": 0.438,
      "step": 1230
    },
    {
      "epoch": 0.36948391922213913,
      "grad_norm": 2.063175118233129,
      "learning_rate": 7.947794079278678e-06,
      "loss": 0.4353,
      "step": 1235
    },
    {
      "epoch": 0.3709798055347794,
      "grad_norm": 2.0902885795644943,
      "learning_rate": 7.926656978952089e-06,
      "loss": 0.4369,
      "step": 1240
    },
    {
      "epoch": 0.3724756918474196,
      "grad_norm": 2.081819065453435,
      "learning_rate": 7.905440015972372e-06,
      "loss": 0.4392,
      "step": 1245
    },
    {
      "epoch": 0.3739715781600598,
      "grad_norm": 1.9635390617281576,
      "learning_rate": 7.884143769308276e-06,
      "loss": 0.4318,
      "step": 1250
    },
    {
      "epoch": 0.3754674644727001,
      "grad_norm": 2.010397135845292,
      "learning_rate": 7.862768820092061e-06,
      "loss": 0.4294,
      "step": 1255
    },
    {
      "epoch": 0.3769633507853403,
      "grad_norm": 2.120029095014225,
      "learning_rate": 7.84131575160361e-06,
      "loss": 0.4367,
      "step": 1260
    },
    {
      "epoch": 0.37845923709798057,
      "grad_norm": 2.047223712557703,
      "learning_rate": 7.819785149254534e-06,
      "loss": 0.4247,
      "step": 1265
    },
    {
      "epoch": 0.37995512341062077,
      "grad_norm": 2.1565665198769546,
      "learning_rate": 7.798177600572184e-06,
      "loss": 0.4545,
      "step": 1270
    },
    {
      "epoch": 0.381451009723261,
      "grad_norm": 1.9698630282226646,
      "learning_rate": 7.776493695183623e-06,
      "loss": 0.4327,
      "step": 1275
    },
    {
      "epoch": 0.38294689603590126,
      "grad_norm": 2.027501209185265,
      "learning_rate": 7.754734024799544e-06,
      "loss": 0.4378,
      "step": 1280
    },
    {
      "epoch": 0.3844427823485415,
      "grad_norm": 1.9336783003915325,
      "learning_rate": 7.732899183198108e-06,
      "loss": 0.4199,
      "step": 1285
    },
    {
      "epoch": 0.38593866866118176,
      "grad_norm": 2.074909881667748,
      "learning_rate": 7.710989766208751e-06,
      "loss": 0.431,
      "step": 1290
    },
    {
      "epoch": 0.387434554973822,
      "grad_norm": 2.08466673344805,
      "learning_rate": 7.689006371695928e-06,
      "loss": 0.436,
      "step": 1295
    },
    {
      "epoch": 0.3889304412864622,
      "grad_norm": 2.0101045976441334,
      "learning_rate": 7.666949599542788e-06,
      "loss": 0.4363,
      "step": 1300
    },
    {
      "epoch": 0.39042632759910245,
      "grad_norm": 2.1388630620219304,
      "learning_rate": 7.644820051634813e-06,
      "loss": 0.4353,
      "step": 1305
    },
    {
      "epoch": 0.3919222139117427,
      "grad_norm": 1.9897181694789714,
      "learning_rate": 7.62261833184339e-06,
      "loss": 0.4321,
      "step": 1310
    },
    {
      "epoch": 0.39341810022438295,
      "grad_norm": 2.069750404086554,
      "learning_rate": 7.60034504600933e-06,
      "loss": 0.4166,
      "step": 1315
    },
    {
      "epoch": 0.3949139865370232,
      "grad_norm": 2.0828214162126564,
      "learning_rate": 7.5780008019263465e-06,
      "loss": 0.4309,
      "step": 1320
    },
    {
      "epoch": 0.39640987284966345,
      "grad_norm": 2.1311064881304183,
      "learning_rate": 7.555586209324455e-06,
      "loss": 0.42,
      "step": 1325
    },
    {
      "epoch": 0.39790575916230364,
      "grad_norm": 2.0067032988225715,
      "learning_rate": 7.533101879853348e-06,
      "loss": 0.4247,
      "step": 1330
    },
    {
      "epoch": 0.3994016454749439,
      "grad_norm": 2.1601395941384514,
      "learning_rate": 7.510548427065693e-06,
      "loss": 0.4103,
      "step": 1335
    },
    {
      "epoch": 0.40089753178758414,
      "grad_norm": 2.0545268261654166,
      "learning_rate": 7.487926466400403e-06,
      "loss": 0.418,
      "step": 1340
    },
    {
      "epoch": 0.4023934181002244,
      "grad_norm": 2.029856636678106,
      "learning_rate": 7.465236615165826e-06,
      "loss": 0.4265,
      "step": 1345
    },
    {
      "epoch": 0.40388930441286464,
      "grad_norm": 1.9396811090214083,
      "learning_rate": 7.4424794925229175e-06,
      "loss": 0.4241,
      "step": 1350
    },
    {
      "epoch": 0.4053851907255049,
      "grad_norm": 2.073788987162284,
      "learning_rate": 7.4196557194683265e-06,
      "loss": 0.4039,
      "step": 1355
    },
    {
      "epoch": 0.4068810770381451,
      "grad_norm": 2.070263015501858,
      "learning_rate": 7.3967659188174676e-06,
      "loss": 0.4331,
      "step": 1360
    },
    {
      "epoch": 0.4083769633507853,
      "grad_norm": 1.957024406881209,
      "learning_rate": 7.373810715187516e-06,
      "loss": 0.4198,
      "step": 1365
    },
    {
      "epoch": 0.4098728496634256,
      "grad_norm": 2.0021094595131705,
      "learning_rate": 7.350790734980359e-06,
      "loss": 0.4138,
      "step": 1370
    },
    {
      "epoch": 0.4113687359760658,
      "grad_norm": 2.038893591791927,
      "learning_rate": 7.327706606365512e-06,
      "loss": 0.4099,
      "step": 1375
    },
    {
      "epoch": 0.4128646222887061,
      "grad_norm": 2.091182328954734,
      "learning_rate": 7.304558959262973e-06,
      "loss": 0.4091,
      "step": 1380
    },
    {
      "epoch": 0.4143605086013463,
      "grad_norm": 2.005484469630839,
      "learning_rate": 7.281348425326034e-06,
      "loss": 0.4071,
      "step": 1385
    },
    {
      "epoch": 0.4158563949139865,
      "grad_norm": 2.000171729890043,
      "learning_rate": 7.258075637924039e-06,
      "loss": 0.4077,
      "step": 1390
    },
    {
      "epoch": 0.41735228122662676,
      "grad_norm": 1.88335343776708,
      "learning_rate": 7.234741232125111e-06,
      "loss": 0.4106,
      "step": 1395
    },
    {
      "epoch": 0.418848167539267,
      "grad_norm": 2.041697368575073,
      "learning_rate": 7.211345844678816e-06,
      "loss": 0.4124,
      "step": 1400
    },
    {
      "epoch": 0.42034405385190726,
      "grad_norm": 2.1120074891606313,
      "learning_rate": 7.1878901139987826e-06,
      "loss": 0.414,
      "step": 1405
    },
    {
      "epoch": 0.4218399401645475,
      "grad_norm": 2.017409414749495,
      "learning_rate": 7.164374680145293e-06,
      "loss": 0.4038,
      "step": 1410
    },
    {
      "epoch": 0.42333582647718776,
      "grad_norm": 2.0432465019716144,
      "learning_rate": 7.140800184807805e-06,
      "loss": 0.4073,
      "step": 1415
    },
    {
      "epoch": 0.42483171278982795,
      "grad_norm": 2.060077990063716,
      "learning_rate": 7.117167271287453e-06,
      "loss": 0.4068,
      "step": 1420
    },
    {
      "epoch": 0.4263275991024682,
      "grad_norm": 2.027592571205212,
      "learning_rate": 7.09347658447948e-06,
      "loss": 0.4042,
      "step": 1425
    },
    {
      "epoch": 0.42782348541510845,
      "grad_norm": 2.040823863949173,
      "learning_rate": 7.069728770855652e-06,
      "loss": 0.4034,
      "step": 1430
    },
    {
      "epoch": 0.4293193717277487,
      "grad_norm": 2.1465152715010722,
      "learning_rate": 7.0459244784466115e-06,
      "loss": 0.4049,
      "step": 1435
    },
    {
      "epoch": 0.43081525804038895,
      "grad_norm": 2.017024929241199,
      "learning_rate": 7.022064356824196e-06,
      "loss": 0.4051,
      "step": 1440
    },
    {
      "epoch": 0.4323111443530292,
      "grad_norm": 1.9756966229288817,
      "learning_rate": 6.998149057083711e-06,
      "loss": 0.3991,
      "step": 1445
    },
    {
      "epoch": 0.4338070306656694,
      "grad_norm": 1.9869718270881975,
      "learning_rate": 6.9741792318261585e-06,
      "loss": 0.4029,
      "step": 1450
    },
    {
      "epoch": 0.43530291697830964,
      "grad_norm": 2.1015981628011136,
      "learning_rate": 6.950155535140439e-06,
      "loss": 0.3998,
      "step": 1455
    },
    {
      "epoch": 0.4367988032909499,
      "grad_norm": 2.1512869214406174,
      "learning_rate": 6.926078622585496e-06,
      "loss": 0.4001,
      "step": 1460
    },
    {
      "epoch": 0.43829468960359014,
      "grad_norm": 2.0152270376530677,
      "learning_rate": 6.901949151172427e-06,
      "loss": 0.4047,
      "step": 1465
    },
    {
      "epoch": 0.4397905759162304,
      "grad_norm": 2.11665136839116,
      "learning_rate": 6.877767779346556e-06,
      "loss": 0.4064,
      "step": 1470
    },
    {
      "epoch": 0.4412864622288706,
      "grad_norm": 2.231208727114714,
      "learning_rate": 6.8535351669694694e-06,
      "loss": 0.3884,
      "step": 1475
    },
    {
      "epoch": 0.44278234854151083,
      "grad_norm": 1.9444993004804072,
      "learning_rate": 6.829251975301003e-06,
      "loss": 0.3949,
      "step": 1480
    },
    {
      "epoch": 0.4442782348541511,
      "grad_norm": 1.98272069907838,
      "learning_rate": 6.8049188669812024e-06,
      "loss": 0.395,
      "step": 1485
    },
    {
      "epoch": 0.4457741211667913,
      "grad_norm": 1.9120999593676538,
      "learning_rate": 6.7805365060122386e-06,
      "loss": 0.3968,
      "step": 1490
    },
    {
      "epoch": 0.4472700074794316,
      "grad_norm": 2.0053365034386186,
      "learning_rate": 6.756105557740289e-06,
      "loss": 0.402,
      "step": 1495
    },
    {
      "epoch": 0.4487658937920718,
      "grad_norm": 1.9514629474872618,
      "learning_rate": 6.731626688837387e-06,
      "loss": 0.3836,
      "step": 1500
    },
    {
      "epoch": 0.450261780104712,
      "grad_norm": 2.1087506038221955,
      "learning_rate": 6.707100567283217e-06,
      "loss": 0.3843,
      "step": 1505
    },
    {
      "epoch": 0.45175766641735227,
      "grad_norm": 2.1300871436189306,
      "learning_rate": 6.682527862346898e-06,
      "loss": 0.3996,
      "step": 1510
    },
    {
      "epoch": 0.4532535527299925,
      "grad_norm": 1.9854434493239195,
      "learning_rate": 6.657909244568721e-06,
      "loss": 0.4011,
      "step": 1515
    },
    {
      "epoch": 0.45474943904263276,
      "grad_norm": 1.9814246083045182,
      "learning_rate": 6.6332453857418375e-06,
      "loss": 0.4012,
      "step": 1520
    },
    {
      "epoch": 0.456245325355273,
      "grad_norm": 2.023928605650618,
      "learning_rate": 6.608536958893948e-06,
      "loss": 0.3962,
      "step": 1525
    },
    {
      "epoch": 0.45774121166791326,
      "grad_norm": 2.012248063709598,
      "learning_rate": 6.583784638268919e-06,
      "loss": 0.4001,
      "step": 1530
    },
    {
      "epoch": 0.45923709798055345,
      "grad_norm": 1.9208473033828253,
      "learning_rate": 6.5589890993083934e-06,
      "loss": 0.3965,
      "step": 1535
    },
    {
      "epoch": 0.4607329842931937,
      "grad_norm": 2.1713469007968476,
      "learning_rate": 6.534151018633355e-06,
      "loss": 0.3962,
      "step": 1540
    },
    {
      "epoch": 0.46222887060583395,
      "grad_norm": 2.079357026566145,
      "learning_rate": 6.509271074025668e-06,
      "loss": 0.3913,
      "step": 1545
    },
    {
      "epoch": 0.4637247569184742,
      "grad_norm": 1.9619607602752462,
      "learning_rate": 6.484349944409579e-06,
      "loss": 0.3885,
      "step": 1550
    },
    {
      "epoch": 0.46522064323111445,
      "grad_norm": 2.025383663450042,
      "learning_rate": 6.459388309833193e-06,
      "loss": 0.396,
      "step": 1555
    },
    {
      "epoch": 0.4667165295437547,
      "grad_norm": 1.9926140480691588,
      "learning_rate": 6.434386851449914e-06,
      "loss": 0.3978,
      "step": 1560
    },
    {
      "epoch": 0.4682124158563949,
      "grad_norm": 2.0001783057698677,
      "learning_rate": 6.409346251499859e-06,
      "loss": 0.3889,
      "step": 1565
    },
    {
      "epoch": 0.46970830216903514,
      "grad_norm": 1.9977538629610117,
      "learning_rate": 6.384267193291238e-06,
      "loss": 0.3872,
      "step": 1570
    },
    {
      "epoch": 0.4712041884816754,
      "grad_norm": 1.9725560974868908,
      "learning_rate": 6.3591503611817155e-06,
      "loss": 0.39,
      "step": 1575
    },
    {
      "epoch": 0.47270007479431564,
      "grad_norm": 1.9326640130746877,
      "learning_rate": 6.333996440559726e-06,
      "loss": 0.3815,
      "step": 1580
    },
    {
      "epoch": 0.4741959611069559,
      "grad_norm": 1.9055402813860574,
      "learning_rate": 6.308806117825777e-06,
      "loss": 0.3801,
      "step": 1585
    },
    {
      "epoch": 0.47569184741959614,
      "grad_norm": 1.9722002752461958,
      "learning_rate": 6.283580080373721e-06,
      "loss": 0.3804,
      "step": 1590
    },
    {
      "epoch": 0.47718773373223633,
      "grad_norm": 2.0082373206027526,
      "learning_rate": 6.25831901657199e-06,
      "loss": 0.3775,
      "step": 1595
    },
    {
      "epoch": 0.4786836200448766,
      "grad_norm": 1.9266846313881612,
      "learning_rate": 6.233023615744813e-06,
      "loss": 0.3883,
      "step": 1600
    },
    {
      "epoch": 0.4801795063575168,
      "grad_norm": 2.088263839026747,
      "learning_rate": 6.207694568153418e-06,
      "loss": 0.389,
      "step": 1605
    },
    {
      "epoch": 0.4816753926701571,
      "grad_norm": 2.1042065332247555,
      "learning_rate": 6.182332564977174e-06,
      "loss": 0.3792,
      "step": 1610
    },
    {
      "epoch": 0.4831712789827973,
      "grad_norm": 1.9720059814432505,
      "learning_rate": 6.156938298294752e-06,
      "loss": 0.3706,
      "step": 1615
    },
    {
      "epoch": 0.4846671652954376,
      "grad_norm": 1.9180314469419848,
      "learning_rate": 6.131512461065227e-06,
      "loss": 0.377,
      "step": 1620
    },
    {
      "epoch": 0.48616305160807777,
      "grad_norm": 2.2323059057893775,
      "learning_rate": 6.106055747109169e-06,
      "loss": 0.3737,
      "step": 1625
    },
    {
      "epoch": 0.487658937920718,
      "grad_norm": 1.9094100696871863,
      "learning_rate": 6.080568851089717e-06,
      "loss": 0.381,
      "step": 1630
    },
    {
      "epoch": 0.48915482423335827,
      "grad_norm": 1.8740312923707445,
      "learning_rate": 6.055052468493614e-06,
      "loss": 0.3712,
      "step": 1635
    },
    {
      "epoch": 0.4906507105459985,
      "grad_norm": 2.2274282125289364,
      "learning_rate": 6.029507295612235e-06,
      "loss": 0.3818,
      "step": 1640
    },
    {
      "epoch": 0.49214659685863876,
      "grad_norm": 1.9574498076717952,
      "learning_rate": 6.0039340295225845e-06,
      "loss": 0.3808,
      "step": 1645
    },
    {
      "epoch": 0.49364248317127896,
      "grad_norm": 2.019106731639845,
      "learning_rate": 5.978333368068278e-06,
      "loss": 0.3739,
      "step": 1650
    },
    {
      "epoch": 0.4951383694839192,
      "grad_norm": 2.003448813298111,
      "learning_rate": 5.952706009840491e-06,
      "loss": 0.3801,
      "step": 1655
    },
    {
      "epoch": 0.49663425579655945,
      "grad_norm": 2.033251099732251,
      "learning_rate": 5.9270526541589025e-06,
      "loss": 0.3719,
      "step": 1660
    },
    {
      "epoch": 0.4981301421091997,
      "grad_norm": 1.9736607384350244,
      "learning_rate": 5.901374001052614e-06,
      "loss": 0.3647,
      "step": 1665
    },
    {
      "epoch": 0.49962602842183995,
      "grad_norm": 1.8450885851841383,
      "learning_rate": 5.875670751241036e-06,
      "loss": 0.3694,
      "step": 1670
    },
    {
      "epoch": 0.5011219147344802,
      "grad_norm": 1.9550125676086019,
      "learning_rate": 5.849943606114782e-06,
      "loss": 0.3765,
      "step": 1675
    },
    {
      "epoch": 0.5026178010471204,
      "grad_norm": 1.9088487115244133,
      "learning_rate": 5.824193267716517e-06,
      "loss": 0.3628,
      "step": 1680
    },
    {
      "epoch": 0.5041136873597607,
      "grad_norm": 2.0236760384942887,
      "learning_rate": 5.798420438721804e-06,
      "loss": 0.3681,
      "step": 1685
    },
    {
      "epoch": 0.5056095736724009,
      "grad_norm": 1.8739105083496626,
      "learning_rate": 5.772625822419933e-06,
      "loss": 0.3626,
      "step": 1690
    },
    {
      "epoch": 0.5071054599850411,
      "grad_norm": 2.0538294961575048,
      "learning_rate": 5.74681012269472e-06,
      "loss": 0.3664,
      "step": 1695
    },
    {
      "epoch": 0.5086013462976814,
      "grad_norm": 1.9510478793415906,
      "learning_rate": 5.720974044005314e-06,
      "loss": 0.3687,
      "step": 1700
    },
    {
      "epoch": 0.5100972326103216,
      "grad_norm": 2.0027682706640206,
      "learning_rate": 5.695118291366959e-06,
      "loss": 0.3791,
      "step": 1705
    },
    {
      "epoch": 0.5115931189229619,
      "grad_norm": 2.017422075178467,
      "learning_rate": 5.669243570331766e-06,
      "loss": 0.3592,
      "step": 1710
    },
    {
      "epoch": 0.5130890052356021,
      "grad_norm": 1.9298103072373924,
      "learning_rate": 5.643350586969453e-06,
      "loss": 0.3624,
      "step": 1715
    },
    {
      "epoch": 0.5145848915482424,
      "grad_norm": 1.828229384099037,
      "learning_rate": 5.617440047848081e-06,
      "loss": 0.3693,
      "step": 1720
    },
    {
      "epoch": 0.5160807778608826,
      "grad_norm": 1.9835871613164413,
      "learning_rate": 5.591512660014773e-06,
      "loss": 0.367,
      "step": 1725
    },
    {
      "epoch": 0.5175766641735228,
      "grad_norm": 1.906111097537283,
      "learning_rate": 5.5655691309764225e-06,
      "loss": 0.3698,
      "step": 1730
    },
    {
      "epoch": 0.5190725504861631,
      "grad_norm": 1.8980070836105973,
      "learning_rate": 5.539610168680381e-06,
      "loss": 0.3617,
      "step": 1735
    },
    {
      "epoch": 0.5205684367988033,
      "grad_norm": 1.929980721348062,
      "learning_rate": 5.513636481495143e-06,
      "loss": 0.3603,
      "step": 1740
    },
    {
      "epoch": 0.5220643231114436,
      "grad_norm": 1.91015401663393,
      "learning_rate": 5.487648778191021e-06,
      "loss": 0.3533,
      "step": 1745
    },
    {
      "epoch": 0.5235602094240838,
      "grad_norm": 1.9455506909545779,
      "learning_rate": 5.4616477679207906e-06,
      "loss": 0.3746,
      "step": 1750
    },
    {
      "epoch": 0.525056095736724,
      "grad_norm": 1.8552115044332138,
      "learning_rate": 5.435634160200355e-06,
      "loss": 0.3583,
      "step": 1755
    },
    {
      "epoch": 0.5265519820493643,
      "grad_norm": 1.913776238110964,
      "learning_rate": 5.409608664889376e-06,
      "loss": 0.3571,
      "step": 1760
    },
    {
      "epoch": 0.5280478683620045,
      "grad_norm": 1.9566204864416041,
      "learning_rate": 5.383571992171904e-06,
      "loss": 0.3681,
      "step": 1765
    },
    {
      "epoch": 0.5295437546746448,
      "grad_norm": 2.0484694098984813,
      "learning_rate": 5.357524852536996e-06,
      "loss": 0.3579,
      "step": 1770
    },
    {
      "epoch": 0.531039640987285,
      "grad_norm": 1.9124761975111415,
      "learning_rate": 5.331467956759331e-06,
      "loss": 0.3508,
      "step": 1775
    },
    {
      "epoch": 0.5325355272999253,
      "grad_norm": 1.9151628917936323,
      "learning_rate": 5.305402015879817e-06,
      "loss": 0.3582,
      "step": 1780
    },
    {
      "epoch": 0.5340314136125655,
      "grad_norm": 1.8760817819604374,
      "learning_rate": 5.279327741186179e-06,
      "loss": 0.3607,
      "step": 1785
    },
    {
      "epoch": 0.5355272999252056,
      "grad_norm": 1.961131431192389,
      "learning_rate": 5.253245844193564e-06,
      "loss": 0.3517,
      "step": 1790
    },
    {
      "epoch": 0.537023186237846,
      "grad_norm": 1.971571895204417,
      "learning_rate": 5.227157036625108e-06,
      "loss": 0.3456,
      "step": 1795
    },
    {
      "epoch": 0.5385190725504861,
      "grad_norm": 1.8838335367241383,
      "learning_rate": 5.2010620303925275e-06,
      "loss": 0.3519,
      "step": 1800
    },
    {
      "epoch": 0.5400149588631264,
      "grad_norm": 1.829377568323147,
      "learning_rate": 5.174961537576685e-06,
      "loss": 0.3564,
      "step": 1805
    },
    {
      "epoch": 0.5415108451757666,
      "grad_norm": 1.8522486080816014,
      "learning_rate": 5.148856270408163e-06,
      "loss": 0.3568,
      "step": 1810
    },
    {
      "epoch": 0.5430067314884068,
      "grad_norm": 1.938579795945218,
      "learning_rate": 5.122746941247828e-06,
      "loss": 0.3607,
      "step": 1815
    },
    {
      "epoch": 0.5445026178010471,
      "grad_norm": 1.8962553032833915,
      "learning_rate": 5.096634262567388e-06,
      "loss": 0.3578,
      "step": 1820
    },
    {
      "epoch": 0.5459985041136873,
      "grad_norm": 1.7953489371783111,
      "learning_rate": 5.070518946929954e-06,
      "loss": 0.3495,
      "step": 1825
    },
    {
      "epoch": 0.5474943904263276,
      "grad_norm": 1.9518780090135102,
      "learning_rate": 5.044401706970592e-06,
      "loss": 0.3558,
      "step": 1830
    },
    {
      "epoch": 0.5489902767389678,
      "grad_norm": 2.029523910683152,
      "learning_rate": 5.018283255376882e-06,
      "loss": 0.3505,
      "step": 1835
    },
    {
      "epoch": 0.550486163051608,
      "grad_norm": 1.9831397143651377,
      "learning_rate": 4.992164304869464e-06,
      "loss": 0.3569,
      "step": 1840
    },
    {
      "epoch": 0.5519820493642483,
      "grad_norm": 2.1279272506945075,
      "learning_rate": 4.966045568182596e-06,
      "loss": 0.3372,
      "step": 1845
    },
    {
      "epoch": 0.5534779356768885,
      "grad_norm": 1.9637293854690605,
      "learning_rate": 4.939927758044698e-06,
      "loss": 0.3518,
      "step": 1850
    },
    {
      "epoch": 0.5549738219895288,
      "grad_norm": 2.0828701986556695,
      "learning_rate": 4.913811587158908e-06,
      "loss": 0.3443,
      "step": 1855
    },
    {
      "epoch": 0.556469708302169,
      "grad_norm": 1.981630887644782,
      "learning_rate": 4.887697768183633e-06,
      "loss": 0.3444,
      "step": 1860
    },
    {
      "epoch": 0.5579655946148093,
      "grad_norm": 1.8743980292802156,
      "learning_rate": 4.861587013713096e-06,
      "loss": 0.346,
      "step": 1865
    },
    {
      "epoch": 0.5594614809274495,
      "grad_norm": 1.9064350126377236,
      "learning_rate": 4.835480036257904e-06,
      "loss": 0.3467,
      "step": 1870
    },
    {
      "epoch": 0.5609573672400897,
      "grad_norm": 1.8972548935569284,
      "learning_rate": 4.809377548225589e-06,
      "loss": 0.3388,
      "step": 1875
    },
    {
      "epoch": 0.56245325355273,
      "grad_norm": 1.8677668781712522,
      "learning_rate": 4.783280261901179e-06,
      "loss": 0.3442,
      "step": 1880
    },
    {
      "epoch": 0.5639491398653702,
      "grad_norm": 1.900487755648876,
      "learning_rate": 4.757188889427761e-06,
      "loss": 0.3389,
      "step": 1885
    },
    {
      "epoch": 0.5654450261780105,
      "grad_norm": 1.9420787190248043,
      "learning_rate": 4.731104142787035e-06,
      "loss": 0.3472,
      "step": 1890
    },
    {
      "epoch": 0.5669409124906507,
      "grad_norm": 1.927314753260138,
      "learning_rate": 4.7050267337799074e-06,
      "loss": 0.3481,
      "step": 1895
    },
    {
      "epoch": 0.5684367988032909,
      "grad_norm": 1.9207634340998982,
      "learning_rate": 4.678957374007046e-06,
      "loss": 0.3424,
      "step": 1900
    },
    {
      "epoch": 0.5699326851159312,
      "grad_norm": 1.9212470848002643,
      "learning_rate": 4.652896774849477e-06,
      "loss": 0.3358,
      "step": 1905
    },
    {
      "epoch": 0.5714285714285714,
      "grad_norm": 1.924739770896096,
      "learning_rate": 4.626845647449161e-06,
      "loss": 0.3353,
      "step": 1910
    },
    {
      "epoch": 0.5729244577412117,
      "grad_norm": 1.9350839334038696,
      "learning_rate": 4.600804702689598e-06,
      "loss": 0.3348,
      "step": 1915
    },
    {
      "epoch": 0.5744203440538519,
      "grad_norm": 1.8695042520523082,
      "learning_rate": 4.57477465117642e-06,
      "loss": 0.338,
      "step": 1920
    },
    {
      "epoch": 0.5759162303664922,
      "grad_norm": 1.9312558535320394,
      "learning_rate": 4.54875620321801e-06,
      "loss": 0.343,
      "step": 1925
    },
    {
      "epoch": 0.5774121166791324,
      "grad_norm": 1.8821605226871228,
      "learning_rate": 4.522750068806107e-06,
      "loss": 0.3407,
      "step": 1930
    },
    {
      "epoch": 0.5789080029917726,
      "grad_norm": 2.0267756257950906,
      "learning_rate": 4.496756957596438e-06,
      "loss": 0.332,
      "step": 1935
    },
    {
      "epoch": 0.5804038893044129,
      "grad_norm": 1.87047157167879,
      "learning_rate": 4.4707775788893586e-06,
      "loss": 0.3377,
      "step": 1940
    },
    {
      "epoch": 0.5818997756170531,
      "grad_norm": 1.8334500325846965,
      "learning_rate": 4.444812641610482e-06,
      "loss": 0.3331,
      "step": 1945
    },
    {
      "epoch": 0.5833956619296934,
      "grad_norm": 1.865940227461524,
      "learning_rate": 4.418862854291356e-06,
      "loss": 0.3336,
      "step": 1950
    },
    {
      "epoch": 0.5848915482423336,
      "grad_norm": 1.8290658280068524,
      "learning_rate": 4.392928925050106e-06,
      "loss": 0.3237,
      "step": 1955
    },
    {
      "epoch": 0.5863874345549738,
      "grad_norm": 1.8622254708709993,
      "learning_rate": 4.3670115615721265e-06,
      "loss": 0.3376,
      "step": 1960
    },
    {
      "epoch": 0.5878833208676141,
      "grad_norm": 1.9201843032013242,
      "learning_rate": 4.341111471090762e-06,
      "loss": 0.3459,
      "step": 1965
    },
    {
      "epoch": 0.5893792071802543,
      "grad_norm": 1.8796680590731187,
      "learning_rate": 4.315229360368014e-06,
      "loss": 0.3278,
      "step": 1970
    },
    {
      "epoch": 0.5908750934928946,
      "grad_norm": 1.8016030738978284,
      "learning_rate": 4.289365935675255e-06,
      "loss": 0.3268,
      "step": 1975
    },
    {
      "epoch": 0.5923709798055348,
      "grad_norm": 1.9190193806693643,
      "learning_rate": 4.263521902773944e-06,
      "loss": 0.3333,
      "step": 1980
    },
    {
      "epoch": 0.5938668661181751,
      "grad_norm": 1.8784476290504393,
      "learning_rate": 4.237697966896385e-06,
      "loss": 0.3271,
      "step": 1985
    },
    {
      "epoch": 0.5953627524308153,
      "grad_norm": 1.8712250384764961,
      "learning_rate": 4.211894832726471e-06,
      "loss": 0.3342,
      "step": 1990
    },
    {
      "epoch": 0.5968586387434555,
      "grad_norm": 1.9036575376553382,
      "learning_rate": 4.1861132043804555e-06,
      "loss": 0.335,
      "step": 1995
    },
    {
      "epoch": 0.5983545250560958,
      "grad_norm": 1.9581740636617746,
      "learning_rate": 4.160353785387746e-06,
      "loss": 0.324,
      "step": 2000
    },
    {
      "epoch": 0.599850411368736,
      "grad_norm": 1.834158258904465,
      "learning_rate": 4.134617278671694e-06,
      "loss": 0.3278,
      "step": 2005
    },
    {
      "epoch": 0.6013462976813763,
      "grad_norm": 1.864212034584157,
      "learning_rate": 4.108904386530429e-06,
      "loss": 0.3293,
      "step": 2010
    },
    {
      "epoch": 0.6028421839940165,
      "grad_norm": 1.7424366923402765,
      "learning_rate": 4.083215810617678e-06,
      "loss": 0.327,
      "step": 2015
    },
    {
      "epoch": 0.6043380703066566,
      "grad_norm": 1.8772680059539715,
      "learning_rate": 4.057552251923633e-06,
      "loss": 0.3327,
      "step": 2020
    },
    {
      "epoch": 0.605833956619297,
      "grad_norm": 1.8850386701103279,
      "learning_rate": 4.031914410755809e-06,
      "loss": 0.327,
      "step": 2025
    },
    {
      "epoch": 0.6073298429319371,
      "grad_norm": 1.8735991544459796,
      "learning_rate": 4.0063029867199455e-06,
      "loss": 0.3278,
      "step": 2030
    },
    {
      "epoch": 0.6088257292445775,
      "grad_norm": 2.0742483586745952,
      "learning_rate": 3.980718678700909e-06,
      "loss": 0.3295,
      "step": 2035
    },
    {
      "epoch": 0.6103216155572176,
      "grad_norm": 1.805997806919521,
      "learning_rate": 3.955162184843625e-06,
      "loss": 0.318,
      "step": 2040
    },
    {
      "epoch": 0.6118175018698578,
      "grad_norm": 1.9482369327485018,
      "learning_rate": 3.929634202534026e-06,
      "loss": 0.3303,
      "step": 2045
    },
    {
      "epoch": 0.6133133881824981,
      "grad_norm": 1.8643741778263954,
      "learning_rate": 3.904135428380019e-06,
      "loss": 0.3221,
      "step": 2050
    },
    {
      "epoch": 0.6148092744951383,
      "grad_norm": 1.9119914679721755,
      "learning_rate": 3.8786665581924805e-06,
      "loss": 0.3259,
      "step": 2055
    },
    {
      "epoch": 0.6163051608077786,
      "grad_norm": 2.0294178588740808,
      "learning_rate": 3.853228286966265e-06,
      "loss": 0.3114,
      "step": 2060
    },
    {
      "epoch": 0.6178010471204188,
      "grad_norm": 1.8598282314437558,
      "learning_rate": 3.827821308861244e-06,
      "loss": 0.3242,
      "step": 2065
    },
    {
      "epoch": 0.6192969334330591,
      "grad_norm": 1.9818198802388973,
      "learning_rate": 3.8024463171833636e-06,
      "loss": 0.3252,
      "step": 2070
    },
    {
      "epoch": 0.6207928197456993,
      "grad_norm": 1.9439228162479631,
      "learning_rate": 3.777104004365721e-06,
      "loss": 0.3258,
      "step": 2075
    },
    {
      "epoch": 0.6222887060583395,
      "grad_norm": 1.9406393323579751,
      "learning_rate": 3.7517950619496713e-06,
      "loss": 0.327,
      "step": 2080
    },
    {
      "epoch": 0.6237845923709798,
      "grad_norm": 1.8702196116833902,
      "learning_rate": 3.7265201805659618e-06,
      "loss": 0.3274,
      "step": 2085
    },
    {
      "epoch": 0.62528047868362,
      "grad_norm": 1.8206045884367064,
      "learning_rate": 3.701280049915877e-06,
      "loss": 0.3087,
      "step": 2090
    },
    {
      "epoch": 0.6267763649962603,
      "grad_norm": 1.8946084974764223,
      "learning_rate": 3.676075358752426e-06,
      "loss": 0.3227,
      "step": 2095
    },
    {
      "epoch": 0.6282722513089005,
      "grad_norm": 1.8425099562092453,
      "learning_rate": 3.6509067948615464e-06,
      "loss": 0.3091,
      "step": 2100
    },
    {
      "epoch": 0.6297681376215407,
      "grad_norm": 1.833988306261615,
      "learning_rate": 3.6257750450433284e-06,
      "loss": 0.3158,
      "step": 2105
    },
    {
      "epoch": 0.631264023934181,
      "grad_norm": 1.848102418513888,
      "learning_rate": 3.6006807950932867e-06,
      "loss": 0.3231,
      "step": 2110
    },
    {
      "epoch": 0.6327599102468212,
      "grad_norm": 1.8597295350064236,
      "learning_rate": 3.575624729783632e-06,
      "loss": 0.317,
      "step": 2115
    },
    {
      "epoch": 0.6342557965594615,
      "grad_norm": 1.8571116178437028,
      "learning_rate": 3.550607532844596e-06,
      "loss": 0.3185,
      "step": 2120
    },
    {
      "epoch": 0.6357516828721017,
      "grad_norm": 1.850039717310936,
      "learning_rate": 3.5256298869457715e-06,
      "loss": 0.3153,
      "step": 2125
    },
    {
      "epoch": 0.637247569184742,
      "grad_norm": 1.8517187441330423,
      "learning_rate": 3.5006924736774793e-06,
      "loss": 0.3231,
      "step": 2130
    },
    {
      "epoch": 0.6387434554973822,
      "grad_norm": 1.886804887794377,
      "learning_rate": 3.47579597353217e-06,
      "loss": 0.3132,
      "step": 2135
    },
    {
      "epoch": 0.6402393418100224,
      "grad_norm": 1.8207891498106763,
      "learning_rate": 3.4509410658858606e-06,
      "loss": 0.3239,
      "step": 2140
    },
    {
      "epoch": 0.6417352281226627,
      "grad_norm": 1.9080722925799685,
      "learning_rate": 3.426128428979589e-06,
      "loss": 0.3127,
      "step": 2145
    },
    {
      "epoch": 0.6432311144353029,
      "grad_norm": 1.7978167092374475,
      "learning_rate": 3.4013587399009073e-06,
      "loss": 0.3112,
      "step": 2150
    },
    {
      "epoch": 0.6447270007479432,
      "grad_norm": 1.8462499082396047,
      "learning_rate": 3.376632674565411e-06,
      "loss": 0.3168,
      "step": 2155
    },
    {
      "epoch": 0.6462228870605834,
      "grad_norm": 1.856553229309688,
      "learning_rate": 3.351950907698285e-06,
      "loss": 0.3065,
      "step": 2160
    },
    {
      "epoch": 0.6477187733732236,
      "grad_norm": 1.7800004213781706,
      "learning_rate": 3.3273141128159005e-06,
      "loss": 0.3132,
      "step": 2165
    },
    {
      "epoch": 0.6492146596858639,
      "grad_norm": 1.9132965188669029,
      "learning_rate": 3.3027229622074335e-06,
      "loss": 0.3179,
      "step": 2170
    },
    {
      "epoch": 0.6507105459985041,
      "grad_norm": 1.7650226022206836,
      "learning_rate": 3.278178126916515e-06,
      "loss": 0.3137,
      "step": 2175
    },
    {
      "epoch": 0.6522064323111444,
      "grad_norm": 1.951509417973989,
      "learning_rate": 3.2536802767229243e-06,
      "loss": 0.3084,
      "step": 2180
    },
    {
      "epoch": 0.6537023186237846,
      "grad_norm": 1.772116366162939,
      "learning_rate": 3.2292300801243133e-06,
      "loss": 0.3102,
      "step": 2185
    },
    {
      "epoch": 0.6551982049364248,
      "grad_norm": 1.8140401176421401,
      "learning_rate": 3.20482820431796e-06,
      "loss": 0.3056,
      "step": 2190
    },
    {
      "epoch": 0.6566940912490651,
      "grad_norm": 1.8243620700136636,
      "learning_rate": 3.180475315182563e-06,
      "loss": 0.3033,
      "step": 2195
    },
    {
      "epoch": 0.6581899775617053,
      "grad_norm": 1.8380166168759837,
      "learning_rate": 3.1561720772600736e-06,
      "loss": 0.304,
      "step": 2200
    },
    {
      "epoch": 0.6596858638743456,
      "grad_norm": 1.8336050039462124,
      "learning_rate": 3.1319191537375577e-06,
      "loss": 0.3143,
      "step": 2205
    },
    {
      "epoch": 0.6611817501869858,
      "grad_norm": 1.8667890213032734,
      "learning_rate": 3.107717206429105e-06,
      "loss": 0.3031,
      "step": 2210
    },
    {
      "epoch": 0.6626776364996261,
      "grad_norm": 1.7638159112909835,
      "learning_rate": 3.0835668957577636e-06,
      "loss": 0.3013,
      "step": 2215
    },
    {
      "epoch": 0.6641735228122663,
      "grad_norm": 1.900781665691589,
      "learning_rate": 3.059468880737519e-06,
      "loss": 0.3073,
      "step": 2220
    },
    {
      "epoch": 0.6656694091249065,
      "grad_norm": 1.943524014415726,
      "learning_rate": 3.035423818955316e-06,
      "loss": 0.3087,
      "step": 2225
    },
    {
      "epoch": 0.6671652954375468,
      "grad_norm": 1.736021065342517,
      "learning_rate": 3.0114323665531066e-06,
      "loss": 0.2979,
      "step": 2230
    },
    {
      "epoch": 0.668661181750187,
      "grad_norm": 1.746010053168365,
      "learning_rate": 2.987495178209951e-06,
      "loss": 0.307,
      "step": 2235
    },
    {
      "epoch": 0.6701570680628273,
      "grad_norm": 1.8018064213578624,
      "learning_rate": 2.9636129071241515e-06,
      "loss": 0.3126,
      "step": 2240
    },
    {
      "epoch": 0.6716529543754675,
      "grad_norm": 1.8077932770071266,
      "learning_rate": 2.9397862049954307e-06,
      "loss": 0.3004,
      "step": 2245
    },
    {
      "epoch": 0.6731488406881077,
      "grad_norm": 1.7048569088891747,
      "learning_rate": 2.916015722007137e-06,
      "loss": 0.3066,
      "step": 2250
    },
    {
      "epoch": 0.674644727000748,
      "grad_norm": 1.7988871113907166,
      "learning_rate": 2.892302106808519e-06,
      "loss": 0.3052,
      "step": 2255
    },
    {
      "epoch": 0.6761406133133882,
      "grad_norm": 1.8715481375394143,
      "learning_rate": 2.8686460064970078e-06,
      "loss": 0.3085,
      "step": 2260
    },
    {
      "epoch": 0.6776364996260285,
      "grad_norm": 1.8258948545382783,
      "learning_rate": 2.8450480666005743e-06,
      "loss": 0.3023,
      "step": 2265
    },
    {
      "epoch": 0.6791323859386686,
      "grad_norm": 1.7183769572814935,
      "learning_rate": 2.821508931060104e-06,
      "loss": 0.3169,
      "step": 2270
    },
    {
      "epoch": 0.680628272251309,
      "grad_norm": 1.8087144140013556,
      "learning_rate": 2.7980292422118282e-06,
      "loss": 0.3,
      "step": 2275
    },
    {
      "epoch": 0.6821241585639491,
      "grad_norm": 1.9721800720444596,
      "learning_rate": 2.7746096407698004e-06,
      "loss": 0.3029,
      "step": 2280
    },
    {
      "epoch": 0.6836200448765893,
      "grad_norm": 1.8344419672931702,
      "learning_rate": 2.7512507658083996e-06,
      "loss": 0.2996,
      "step": 2285
    },
    {
      "epoch": 0.6851159311892296,
      "grad_norm": 1.757267551606752,
      "learning_rate": 2.7279532547449083e-06,
      "loss": 0.3033,
      "step": 2290
    },
    {
      "epoch": 0.6866118175018698,
      "grad_norm": 1.8575093423008022,
      "learning_rate": 2.704717743322104e-06,
      "loss": 0.2873,
      "step": 2295
    },
    {
      "epoch": 0.6881077038145101,
      "grad_norm": 1.761502547654336,
      "learning_rate": 2.681544865590926e-06,
      "loss": 0.2999,
      "step": 2300
    },
    {
      "epoch": 0.6896035901271503,
      "grad_norm": 1.958074773552565,
      "learning_rate": 2.6584352538931523e-06,
      "loss": 0.3023,
      "step": 2305
    },
    {
      "epoch": 0.6910994764397905,
      "grad_norm": 1.7604780827427178,
      "learning_rate": 2.635389538844166e-06,
      "loss": 0.2923,
      "step": 2310
    },
    {
      "epoch": 0.6925953627524308,
      "grad_norm": 1.860509876291064,
      "learning_rate": 2.612408349315734e-06,
      "loss": 0.2968,
      "step": 2315
    },
    {
      "epoch": 0.694091249065071,
      "grad_norm": 1.8116830542415268,
      "learning_rate": 2.5894923124188498e-06,
      "loss": 0.2911,
      "step": 2320
    },
    {
      "epoch": 0.6955871353777113,
      "grad_norm": 1.816773761816662,
      "learning_rate": 2.5666420534866256e-06,
      "loss": 0.3017,
      "step": 2325
    },
    {
      "epoch": 0.6970830216903515,
      "grad_norm": 1.810456487051493,
      "learning_rate": 2.543858196057214e-06,
      "loss": 0.3045,
      "step": 2330
    },
    {
      "epoch": 0.6985789080029918,
      "grad_norm": 1.8462477832363797,
      "learning_rate": 2.5211413618568114e-06,
      "loss": 0.2979,
      "step": 2335
    },
    {
      "epoch": 0.700074794315632,
      "grad_norm": 1.749680469906487,
      "learning_rate": 2.4984921707826805e-06,
      "loss": 0.298,
      "step": 2340
    },
    {
      "epoch": 0.7015706806282722,
      "grad_norm": 1.7715319612256217,
      "learning_rate": 2.4759112408862366e-06,
      "loss": 0.2905,
      "step": 2345
    },
    {
      "epoch": 0.7030665669409125,
      "grad_norm": 1.9011349884243633,
      "learning_rate": 2.4533991883561868e-06,
      "loss": 0.2938,
      "step": 2350
    },
    {
      "epoch": 0.7045624532535527,
      "grad_norm": 1.7509668722553002,
      "learning_rate": 2.4309566275017027e-06,
      "loss": 0.2931,
      "step": 2355
    },
    {
      "epoch": 0.706058339566193,
      "grad_norm": 1.7463279622870067,
      "learning_rate": 2.4085841707356787e-06,
      "loss": 0.2948,
      "step": 2360
    },
    {
      "epoch": 0.7075542258788332,
      "grad_norm": 1.7457958614044327,
      "learning_rate": 2.386282428558001e-06,
      "loss": 0.2935,
      "step": 2365
    },
    {
      "epoch": 0.7090501121914734,
      "grad_norm": 1.8306487338719184,
      "learning_rate": 2.364052009538892e-06,
      "loss": 0.3029,
      "step": 2370
    },
    {
      "epoch": 0.7105459985041137,
      "grad_norm": 1.8902782477754185,
      "learning_rate": 2.341893520302313e-06,
      "loss": 0.2937,
      "step": 2375
    },
    {
      "epoch": 0.7120418848167539,
      "grad_norm": 1.7948687484011157,
      "learning_rate": 2.3198075655094023e-06,
      "loss": 0.2925,
      "step": 2380
    },
    {
      "epoch": 0.7135377711293942,
      "grad_norm": 1.8682547497864384,
      "learning_rate": 2.297794747841976e-06,
      "loss": 0.2992,
      "step": 2385
    },
    {
      "epoch": 0.7150336574420344,
      "grad_norm": 1.7985072864408282,
      "learning_rate": 2.275855667986086e-06,
      "loss": 0.2992,
      "step": 2390
    },
    {
      "epoch": 0.7165295437546746,
      "grad_norm": 1.6780824098442955,
      "learning_rate": 2.2539909246156257e-06,
      "loss": 0.2902,
      "step": 2395
    },
    {
      "epoch": 0.7180254300673149,
      "grad_norm": 1.9327685022447323,
      "learning_rate": 2.232201114375988e-06,
      "loss": 0.2879,
      "step": 2400
    },
    {
      "epoch": 0.7195213163799551,
      "grad_norm": 1.8312593750432005,
      "learning_rate": 2.2104868318677963e-06,
      "loss": 0.2967,
      "step": 2405
    },
    {
      "epoch": 0.7210172026925954,
      "grad_norm": 1.8041698028281294,
      "learning_rate": 2.1888486696306706e-06,
      "loss": 0.2849,
      "step": 2410
    },
    {
      "epoch": 0.7225130890052356,
      "grad_norm": 1.8021876820178402,
      "learning_rate": 2.1672872181270575e-06,
      "loss": 0.2918,
      "step": 2415
    },
    {
      "epoch": 0.7240089753178759,
      "grad_norm": 1.807836863115144,
      "learning_rate": 2.1458030657261235e-06,
      "loss": 0.282,
      "step": 2420
    },
    {
      "epoch": 0.7255048616305161,
      "grad_norm": 1.7515999717106407,
      "learning_rate": 2.1243967986876933e-06,
      "loss": 0.2922,
      "step": 2425
    },
    {
      "epoch": 0.7270007479431563,
      "grad_norm": 1.8149872804694056,
      "learning_rate": 2.1030690011462567e-06,
      "loss": 0.2912,
      "step": 2430
    },
    {
      "epoch": 0.7284966342557966,
      "grad_norm": 1.7878582875336215,
      "learning_rate": 2.081820255095028e-06,
      "loss": 0.2886,
      "step": 2435
    },
    {
      "epoch": 0.7299925205684368,
      "grad_norm": 1.7664930533873893,
      "learning_rate": 2.0606511403700575e-06,
      "loss": 0.2964,
      "step": 2440
    },
    {
      "epoch": 0.7314884068810771,
      "grad_norm": 1.7856577814800616,
      "learning_rate": 2.0395622346344213e-06,
      "loss": 0.2849,
      "step": 2445
    },
    {
      "epoch": 0.7329842931937173,
      "grad_norm": 1.7620387064486105,
      "learning_rate": 2.018554113362449e-06,
      "loss": 0.2811,
      "step": 2450
    },
    {
      "epoch": 0.7344801795063575,
      "grad_norm": 1.746148787119175,
      "learning_rate": 1.9976273498240234e-06,
      "loss": 0.2866,
      "step": 2455
    },
    {
      "epoch": 0.7359760658189978,
      "grad_norm": 1.759195000248038,
      "learning_rate": 1.976782515068938e-06,
      "loss": 0.294,
      "step": 2460
    },
    {
      "epoch": 0.737471952131638,
      "grad_norm": 1.6081462651916374,
      "learning_rate": 1.9560201779113056e-06,
      "loss": 0.2821,
      "step": 2465
    },
    {
      "epoch": 0.7389678384442783,
      "grad_norm": 1.8127282683936143,
      "learning_rate": 1.9353409049140515e-06,
      "loss": 0.2827,
      "step": 2470
    },
    {
      "epoch": 0.7404637247569185,
      "grad_norm": 1.7928349569557254,
      "learning_rate": 1.9147452603734402e-06,
      "loss": 0.2889,
      "step": 2475
    },
    {
      "epoch": 0.7419596110695588,
      "grad_norm": 1.7519180416889486,
      "learning_rate": 1.894233806303689e-06,
      "loss": 0.2816,
      "step": 2480
    },
    {
      "epoch": 0.743455497382199,
      "grad_norm": 1.792648064853805,
      "learning_rate": 1.8738071024216141e-06,
      "loss": 0.2843,
      "step": 2485
    },
    {
      "epoch": 0.7449513836948392,
      "grad_norm": 1.7815734013272622,
      "learning_rate": 1.8534657061313744e-06,
      "loss": 0.2742,
      "step": 2490
    },
    {
      "epoch": 0.7464472700074795,
      "grad_norm": 1.825180595387709,
      "learning_rate": 1.8332101725092522e-06,
      "loss": 0.2816,
      "step": 2495
    },
    {
      "epoch": 0.7479431563201197,
      "grad_norm": 1.8420097876440362,
      "learning_rate": 1.8130410542885084e-06,
      "loss": 0.2808,
      "step": 2500
    },
    {
      "epoch": 0.74943904263276,
      "grad_norm": 1.8442353488656769,
      "learning_rate": 1.7929589018443016e-06,
      "loss": 0.2923,
      "step": 2505
    },
    {
      "epoch": 0.7509349289454001,
      "grad_norm": 1.876793012170064,
      "learning_rate": 1.7729642631786613e-06,
      "loss": 0.2872,
      "step": 2510
    },
    {
      "epoch": 0.7524308152580403,
      "grad_norm": 1.7511287142130798,
      "learning_rate": 1.7530576839055453e-06,
      "loss": 0.2822,
      "step": 2515
    },
    {
      "epoch": 0.7539267015706806,
      "grad_norm": 1.8394555324866848,
      "learning_rate": 1.7332397072359435e-06,
      "loss": 0.2765,
      "step": 2520
    },
    {
      "epoch": 0.7554225878833208,
      "grad_norm": 1.773080627419537,
      "learning_rate": 1.7135108739630573e-06,
      "loss": 0.2772,
      "step": 2525
    },
    {
      "epoch": 0.7569184741959611,
      "grad_norm": 1.7397840701003071,
      "learning_rate": 1.693871722447542e-06,
      "loss": 0.2748,
      "step": 2530
    },
    {
      "epoch": 0.7584143605086013,
      "grad_norm": 1.8139047134561623,
      "learning_rate": 1.6743227886028152e-06,
      "loss": 0.2809,
      "step": 2535
    },
    {
      "epoch": 0.7599102468212415,
      "grad_norm": 1.723146398169513,
      "learning_rate": 1.6548646058804347e-06,
      "loss": 0.277,
      "step": 2540
    },
    {
      "epoch": 0.7614061331338818,
      "grad_norm": 1.755509982892445,
      "learning_rate": 1.6354977052555393e-06,
      "loss": 0.2845,
      "step": 2545
    },
    {
      "epoch": 0.762902019446522,
      "grad_norm": 1.7634745348399379,
      "learning_rate": 1.6162226152123633e-06,
      "loss": 0.2845,
      "step": 2550
    },
    {
      "epoch": 0.7643979057591623,
      "grad_norm": 1.8539062432851583,
      "learning_rate": 1.5970398617298078e-06,
      "loss": 0.2828,
      "step": 2555
    },
    {
      "epoch": 0.7658937920718025,
      "grad_norm": 1.8053358835812254,
      "learning_rate": 1.5779499682670963e-06,
      "loss": 0.2774,
      "step": 2560
    },
    {
      "epoch": 0.7673896783844428,
      "grad_norm": 1.8014531312640616,
      "learning_rate": 1.5589534557494868e-06,
      "loss": 0.2841,
      "step": 2565
    },
    {
      "epoch": 0.768885564697083,
      "grad_norm": 1.735571527942806,
      "learning_rate": 1.5400508425540562e-06,
      "loss": 0.2746,
      "step": 2570
    },
    {
      "epoch": 0.7703814510097232,
      "grad_norm": 1.8540824858023373,
      "learning_rate": 1.5212426444955569e-06,
      "loss": 0.2807,
      "step": 2575
    },
    {
      "epoch": 0.7718773373223635,
      "grad_norm": 1.7139393419525597,
      "learning_rate": 1.5025293748123354e-06,
      "loss": 0.2815,
      "step": 2580
    },
    {
      "epoch": 0.7733732236350037,
      "grad_norm": 1.6431033212935895,
      "learning_rate": 1.4839115441523355e-06,
      "loss": 0.2696,
      "step": 2585
    },
    {
      "epoch": 0.774869109947644,
      "grad_norm": 1.7227778483828726,
      "learning_rate": 1.4653896605591584e-06,
      "loss": 0.2732,
      "step": 2590
    },
    {
      "epoch": 0.7763649962602842,
      "grad_norm": 1.7527519060060008,
      "learning_rate": 1.4469642294582048e-06,
      "loss": 0.2748,
      "step": 2595
    },
    {
      "epoch": 0.7778608825729244,
      "grad_norm": 1.6997524796558416,
      "learning_rate": 1.4286357536428696e-06,
      "loss": 0.2729,
      "step": 2600
    },
    {
      "epoch": 0.7793567688855647,
      "grad_norm": 1.7807204337692575,
      "learning_rate": 1.4104047332608379e-06,
      "loss": 0.2755,
      "step": 2605
    },
    {
      "epoch": 0.7808526551982049,
      "grad_norm": 1.7182846099936764,
      "learning_rate": 1.392271665800427e-06,
      "loss": 0.2777,
      "step": 2610
    },
    {
      "epoch": 0.7823485415108452,
      "grad_norm": 1.7302301084436003,
      "learning_rate": 1.3742370460770144e-06,
      "loss": 0.2762,
      "step": 2615
    },
    {
      "epoch": 0.7838444278234854,
      "grad_norm": 1.711106037244554,
      "learning_rate": 1.3563013662195356e-06,
      "loss": 0.2737,
      "step": 2620
    },
    {
      "epoch": 0.7853403141361257,
      "grad_norm": 1.8191358842574659,
      "learning_rate": 1.3384651156570483e-06,
      "loss": 0.2732,
      "step": 2625
    },
    {
      "epoch": 0.7868362004487659,
      "grad_norm": 1.751260410944088,
      "learning_rate": 1.3207287811053893e-06,
      "loss": 0.2771,
      "step": 2630
    },
    {
      "epoch": 0.7883320867614061,
      "grad_norm": 1.7320253510102213,
      "learning_rate": 1.3030928465538822e-06,
      "loss": 0.27,
      "step": 2635
    },
    {
      "epoch": 0.7898279730740464,
      "grad_norm": 1.7406452518990843,
      "learning_rate": 1.2855577932521352e-06,
      "loss": 0.2703,
      "step": 2640
    },
    {
      "epoch": 0.7913238593866866,
      "grad_norm": 1.8538751789457641,
      "learning_rate": 1.2681240996969085e-06,
      "loss": 0.2776,
      "step": 2645
    },
    {
      "epoch": 0.7928197456993269,
      "grad_norm": 1.740887599672242,
      "learning_rate": 1.250792241619051e-06,
      "loss": 0.2736,
      "step": 2650
    },
    {
      "epoch": 0.7943156320119671,
      "grad_norm": 1.8281991178787242,
      "learning_rate": 1.233562691970533e-06,
      "loss": 0.2749,
      "step": 2655
    },
    {
      "epoch": 0.7958115183246073,
      "grad_norm": 1.6556477939621426,
      "learning_rate": 1.2164359209115235e-06,
      "loss": 0.2776,
      "step": 2660
    },
    {
      "epoch": 0.7973074046372476,
      "grad_norm": 1.695787778492541,
      "learning_rate": 1.1994123957975722e-06,
      "loss": 0.2702,
      "step": 2665
    },
    {
      "epoch": 0.7988032909498878,
      "grad_norm": 1.7707776645975837,
      "learning_rate": 1.1824925811668485e-06,
      "loss": 0.2627,
      "step": 2670
    },
    {
      "epoch": 0.8002991772625281,
      "grad_norm": 1.8300425136047838,
      "learning_rate": 1.1656769387274714e-06,
      "loss": 0.2688,
      "step": 2675
    },
    {
      "epoch": 0.8017950635751683,
      "grad_norm": 1.6906589157556278,
      "learning_rate": 1.1489659273449073e-06,
      "loss": 0.2672,
      "step": 2680
    },
    {
      "epoch": 0.8032909498878086,
      "grad_norm": 1.7718115103968484,
      "learning_rate": 1.132360003029449e-06,
      "loss": 0.2673,
      "step": 2685
    },
    {
      "epoch": 0.8047868362004488,
      "grad_norm": 1.7597119643475179,
      "learning_rate": 1.115859618923773e-06,
      "loss": 0.2744,
      "step": 2690
    },
    {
      "epoch": 0.806282722513089,
      "grad_norm": 1.7801333538259148,
      "learning_rate": 1.0994652252905695e-06,
      "loss": 0.2662,
      "step": 2695
    },
    {
      "epoch": 0.8077786088257293,
      "grad_norm": 1.6866429011639965,
      "learning_rate": 1.083177269500264e-06,
      "loss": 0.2675,
      "step": 2700
    },
    {
      "epoch": 0.8092744951383695,
      "grad_norm": 1.9195992948000482,
      "learning_rate": 1.0669961960188008e-06,
      "loss": 0.2739,
      "step": 2705
    },
    {
      "epoch": 0.8107703814510098,
      "grad_norm": 1.8220041781840073,
      "learning_rate": 1.0509224463955249e-06,
      "loss": 0.2604,
      "step": 2710
    },
    {
      "epoch": 0.81226626776365,
      "grad_norm": 1.7303540258737908,
      "learning_rate": 1.0349564592511162e-06,
      "loss": 0.2743,
      "step": 2715
    },
    {
      "epoch": 0.8137621540762902,
      "grad_norm": 1.6406056857804932,
      "learning_rate": 1.0190986702656403e-06,
      "loss": 0.2719,
      "step": 2720
    },
    {
      "epoch": 0.8152580403889305,
      "grad_norm": 1.8590839739169418,
      "learning_rate": 1.0033495121666442e-06,
      "loss": 0.273,
      "step": 2725
    },
    {
      "epoch": 0.8167539267015707,
      "grad_norm": 1.7341252368355093,
      "learning_rate": 9.877094147173566e-07,
      "loss": 0.2712,
      "step": 2730
    },
    {
      "epoch": 0.818249813014211,
      "grad_norm": 1.7272695337289556,
      "learning_rate": 9.721788047049586e-07,
      "loss": 0.2628,
      "step": 2735
    },
    {
      "epoch": 0.8197456993268512,
      "grad_norm": 1.7050895419647492,
      "learning_rate": 9.567581059289322e-07,
      "loss": 0.2678,
      "step": 2740
    },
    {
      "epoch": 0.8212415856394913,
      "grad_norm": 1.7258978187627068,
      "learning_rate": 9.414477391895044e-07,
      "loss": 0.2715,
      "step": 2745
    },
    {
      "epoch": 0.8227374719521316,
      "grad_norm": 1.8460755537922702,
      "learning_rate": 9.262481222761588e-07,
      "loss": 0.2716,
      "step": 2750
    },
    {
      "epoch": 0.8242333582647718,
      "grad_norm": 1.7677837124955216,
      "learning_rate": 9.11159669956237e-07,
      "loss": 0.2725,
      "step": 2755
    },
    {
      "epoch": 0.8257292445774121,
      "grad_norm": 1.7183389424616196,
      "learning_rate": 8.961827939636198e-07,
      "loss": 0.2683,
      "step": 2760
    },
    {
      "epoch": 0.8272251308900523,
      "grad_norm": 1.8851170229714924,
      "learning_rate": 8.813179029874874e-07,
      "loss": 0.2588,
      "step": 2765
    },
    {
      "epoch": 0.8287210172026926,
      "grad_norm": 1.8163919089444864,
      "learning_rate": 8.665654026611797e-07,
      "loss": 0.2631,
      "step": 2770
    },
    {
      "epoch": 0.8302169035153328,
      "grad_norm": 1.7098860990754234,
      "learning_rate": 8.51925695551113e-07,
      "loss": 0.2679,
      "step": 2775
    },
    {
      "epoch": 0.831712789827973,
      "grad_norm": 1.7663056355635183,
      "learning_rate": 8.373991811458027e-07,
      "loss": 0.2652,
      "step": 2780
    },
    {
      "epoch": 0.8332086761406133,
      "grad_norm": 1.7186868648976898,
      "learning_rate": 8.229862558449592e-07,
      "loss": 0.2661,
      "step": 2785
    },
    {
      "epoch": 0.8347045624532535,
      "grad_norm": 1.8059879215165224,
      "learning_rate": 8.086873129486722e-07,
      "loss": 0.2686,
      "step": 2790
    },
    {
      "epoch": 0.8362004487658938,
      "grad_norm": 1.7374284001547664,
      "learning_rate": 7.945027426466801e-07,
      "loss": 0.2708,
      "step": 2795
    },
    {
      "epoch": 0.837696335078534,
      "grad_norm": 1.6598096486422094,
      "learning_rate": 7.804329320077181e-07,
      "loss": 0.2653,
      "step": 2800
    },
    {
      "epoch": 0.8391922213911742,
      "grad_norm": 1.676734657625906,
      "learning_rate": 7.664782649689611e-07,
      "loss": 0.2563,
      "step": 2805
    },
    {
      "epoch": 0.8406881077038145,
      "grad_norm": 1.7941246676620155,
      "learning_rate": 7.526391223255386e-07,
      "loss": 0.2643,
      "step": 2810
    },
    {
      "epoch": 0.8421839940164547,
      "grad_norm": 1.7441327844730907,
      "learning_rate": 7.389158817201541e-07,
      "loss": 0.2663,
      "step": 2815
    },
    {
      "epoch": 0.843679880329095,
      "grad_norm": 1.6764728143369185,
      "learning_rate": 7.253089176327738e-07,
      "loss": 0.2631,
      "step": 2820
    },
    {
      "epoch": 0.8451757666417352,
      "grad_norm": 1.7090343355435693,
      "learning_rate": 7.118186013704065e-07,
      "loss": 0.2579,
      "step": 2825
    },
    {
      "epoch": 0.8466716529543755,
      "grad_norm": 1.723034589615204,
      "learning_rate": 6.984453010569758e-07,
      "loss": 0.2718,
      "step": 2830
    },
    {
      "epoch": 0.8481675392670157,
      "grad_norm": 1.7083769223090157,
      "learning_rate": 6.851893816232729e-07,
      "loss": 0.259,
      "step": 2835
    },
    {
      "epoch": 0.8496634255796559,
      "grad_norm": 1.6983173618906942,
      "learning_rate": 6.720512047969957e-07,
      "loss": 0.2655,
      "step": 2840
    },
    {
      "epoch": 0.8511593118922962,
      "grad_norm": 1.6008652695866359,
      "learning_rate": 6.590311290928825e-07,
      "loss": 0.2661,
      "step": 2845
    },
    {
      "epoch": 0.8526551982049364,
      "grad_norm": 1.723592329316595,
      "learning_rate": 6.461295098029269e-07,
      "loss": 0.2548,
      "step": 2850
    },
    {
      "epoch": 0.8541510845175767,
      "grad_norm": 1.8054575785485054,
      "learning_rate": 6.333466989866787e-07,
      "loss": 0.264,
      "step": 2855
    },
    {
      "epoch": 0.8556469708302169,
      "grad_norm": 1.7902077125134892,
      "learning_rate": 6.206830454616447e-07,
      "loss": 0.266,
      "step": 2860
    },
    {
      "epoch": 0.8571428571428571,
      "grad_norm": 1.7147769185915753,
      "learning_rate": 6.08138894793765e-07,
      "loss": 0.2654,
      "step": 2865
    },
    {
      "epoch": 0.8586387434554974,
      "grad_norm": 1.7518112730752275,
      "learning_rate": 5.957145892879829e-07,
      "loss": 0.2594,
      "step": 2870
    },
    {
      "epoch": 0.8601346297681376,
      "grad_norm": 1.811592287193994,
      "learning_rate": 5.834104679789077e-07,
      "loss": 0.2647,
      "step": 2875
    },
    {
      "epoch": 0.8616305160807779,
      "grad_norm": 1.773326433422328,
      "learning_rate": 5.712268666215559e-07,
      "loss": 0.264,
      "step": 2880
    },
    {
      "epoch": 0.8631264023934181,
      "grad_norm": 1.68178039725722,
      "learning_rate": 5.591641176822005e-07,
      "loss": 0.2614,
      "step": 2885
    },
    {
      "epoch": 0.8646222887060584,
      "grad_norm": 1.6842479909967625,
      "learning_rate": 5.472225503292883e-07,
      "loss": 0.2621,
      "step": 2890
    },
    {
      "epoch": 0.8661181750186986,
      "grad_norm": 1.7449782410599817,
      "learning_rate": 5.354024904244632e-07,
      "loss": 0.2522,
      "step": 2895
    },
    {
      "epoch": 0.8676140613313388,
      "grad_norm": 1.7259602780620398,
      "learning_rate": 5.237042605136689e-07,
      "loss": 0.2614,
      "step": 2900
    },
    {
      "epoch": 0.8691099476439791,
      "grad_norm": 1.688101500268341,
      "learning_rate": 5.121281798183547e-07,
      "loss": 0.2611,
      "step": 2905
    },
    {
      "epoch": 0.8706058339566193,
      "grad_norm": 1.7726586716734274,
      "learning_rate": 5.00674564226758e-07,
      "loss": 0.2544,
      "step": 2910
    },
    {
      "epoch": 0.8721017202692596,
      "grad_norm": 1.6935216955087868,
      "learning_rate": 4.893437262852885e-07,
      "loss": 0.2523,
      "step": 2915
    },
    {
      "epoch": 0.8735976065818998,
      "grad_norm": 1.878804856678552,
      "learning_rate": 4.781359751899984e-07,
      "loss": 0.2538,
      "step": 2920
    },
    {
      "epoch": 0.87509349289454,
      "grad_norm": 1.64770700770445,
      "learning_rate": 4.6705161677814024e-07,
      "loss": 0.2569,
      "step": 2925
    },
    {
      "epoch": 0.8765893792071803,
      "grad_norm": 1.860024134107886,
      "learning_rate": 4.560909535198299e-07,
      "loss": 0.2576,
      "step": 2930
    },
    {
      "epoch": 0.8780852655198205,
      "grad_norm": 1.675929796569693,
      "learning_rate": 4.4525428450978627e-07,
      "loss": 0.2539,
      "step": 2935
    },
    {
      "epoch": 0.8795811518324608,
      "grad_norm": 1.6649509488101208,
      "learning_rate": 4.3454190545917317e-07,
      "loss": 0.2654,
      "step": 2940
    },
    {
      "epoch": 0.881077038145101,
      "grad_norm": 1.7894352860083609,
      "learning_rate": 4.239541086875265e-07,
      "loss": 0.2647,
      "step": 2945
    },
    {
      "epoch": 0.8825729244577412,
      "grad_norm": 1.688537215035147,
      "learning_rate": 4.134911831147798e-07,
      "loss": 0.2563,
      "step": 2950
    },
    {
      "epoch": 0.8840688107703815,
      "grad_norm": 1.7422422459372517,
      "learning_rate": 4.031534142533816e-07,
      "loss": 0.2517,
      "step": 2955
    },
    {
      "epoch": 0.8855646970830217,
      "grad_norm": 1.832964243427611,
      "learning_rate": 3.9294108420049935e-07,
      "loss": 0.2664,
      "step": 2960
    },
    {
      "epoch": 0.887060583395662,
      "grad_norm": 1.7932779810454953,
      "learning_rate": 3.828544716303284e-07,
      "loss": 0.2543,
      "step": 2965
    },
    {
      "epoch": 0.8885564697083022,
      "grad_norm": 1.8073243004592312,
      "learning_rate": 3.728938517864794e-07,
      "loss": 0.2601,
      "step": 2970
    },
    {
      "epoch": 0.8900523560209425,
      "grad_norm": 1.714561248097055,
      "learning_rate": 3.6305949647447545e-07,
      "loss": 0.2564,
      "step": 2975
    },
    {
      "epoch": 0.8915482423335827,
      "grad_norm": 1.647575871046988,
      "learning_rate": 3.5335167405433024e-07,
      "loss": 0.2607,
      "step": 2980
    },
    {
      "epoch": 0.8930441286462228,
      "grad_norm": 1.6977984176077578,
      "learning_rate": 3.437706494332266e-07,
      "loss": 0.2522,
      "step": 2985
    },
    {
      "epoch": 0.8945400149588631,
      "grad_norm": 1.7141499596339997,
      "learning_rate": 3.3431668405828675e-07,
      "loss": 0.2558,
      "step": 2990
    },
    {
      "epoch": 0.8960359012715033,
      "grad_norm": 1.6494105719449952,
      "learning_rate": 3.249900359094388e-07,
      "loss": 0.256,
      "step": 2995
    },
    {
      "epoch": 0.8975317875841436,
      "grad_norm": 1.6630293618544516,
      "learning_rate": 3.1579095949237584e-07,
      "loss": 0.2508,
      "step": 3000
    },
    {
      "epoch": 0.8990276738967838,
      "grad_norm": 1.7346655505039537,
      "learning_rate": 3.067197058316157e-07,
      "loss": 0.2614,
      "step": 3005
    },
    {
      "epoch": 0.900523560209424,
      "grad_norm": 1.7107296935219805,
      "learning_rate": 2.9777652246364306e-07,
      "loss": 0.2538,
      "step": 3010
    },
    {
      "epoch": 0.9020194465220643,
      "grad_norm": 1.6491436991741326,
      "learning_rate": 2.889616534301598e-07,
      "loss": 0.2521,
      "step": 3015
    },
    {
      "epoch": 0.9035153328347045,
      "grad_norm": 1.7323747022001885,
      "learning_rate": 2.8027533927142525e-07,
      "loss": 0.2593,
      "step": 3020
    },
    {
      "epoch": 0.9050112191473448,
      "grad_norm": 1.7534706658955106,
      "learning_rate": 2.717178170196916e-07,
      "loss": 0.249,
      "step": 3025
    },
    {
      "epoch": 0.906507105459985,
      "grad_norm": 1.7068949519667596,
      "learning_rate": 2.6328932019273556e-07,
      "loss": 0.2625,
      "step": 3030
    },
    {
      "epoch": 0.9080029917726253,
      "grad_norm": 1.7466561136363379,
      "learning_rate": 2.549900787874876e-07,
      "loss": 0.2572,
      "step": 3035
    },
    {
      "epoch": 0.9094988780852655,
      "grad_norm": 1.6487218463492848,
      "learning_rate": 2.468203192737512e-07,
      "loss": 0.2618,
      "step": 3040
    },
    {
      "epoch": 0.9109947643979057,
      "grad_norm": 1.5699982289102938,
      "learning_rate": 2.3878026458803047e-07,
      "loss": 0.2559,
      "step": 3045
    },
    {
      "epoch": 0.912490650710546,
      "grad_norm": 1.678827851691801,
      "learning_rate": 2.3087013412743998e-07,
      "loss": 0.2504,
      "step": 3050
    },
    {
      "epoch": 0.9139865370231862,
      "grad_norm": 1.732344143690627,
      "learning_rate": 2.2309014374372106e-07,
      "loss": 0.2556,
      "step": 3055
    },
    {
      "epoch": 0.9154824233358265,
      "grad_norm": 1.6563146141875156,
      "learning_rate": 2.1544050573735153e-07,
      "loss": 0.2555,
      "step": 3060
    },
    {
      "epoch": 0.9169783096484667,
      "grad_norm": 1.8096543479163172,
      "learning_rate": 2.079214288517506e-07,
      "loss": 0.2553,
      "step": 3065
    },
    {
      "epoch": 0.9184741959611069,
      "grad_norm": 1.690741820888644,
      "learning_rate": 2.0053311826758458e-07,
      "loss": 0.256,
      "step": 3070
    },
    {
      "epoch": 0.9199700822737472,
      "grad_norm": 1.7615351195511213,
      "learning_rate": 1.9327577559716815e-07,
      "loss": 0.2562,
      "step": 3075
    },
    {
      "epoch": 0.9214659685863874,
      "grad_norm": 1.8449608271118088,
      "learning_rate": 1.8614959887896078e-07,
      "loss": 0.2549,
      "step": 3080
    },
    {
      "epoch": 0.9229618548990277,
      "grad_norm": 1.7775694545753302,
      "learning_rate": 1.79154782572164e-07,
      "loss": 0.247,
      "step": 3085
    },
    {
      "epoch": 0.9244577412116679,
      "grad_norm": 1.6956013013917148,
      "learning_rate": 1.7229151755141394e-07,
      "loss": 0.2571,
      "step": 3090
    },
    {
      "epoch": 0.9259536275243081,
      "grad_norm": 1.6373082200013647,
      "learning_rate": 1.655599911015754e-07,
      "loss": 0.2547,
      "step": 3095
    },
    {
      "epoch": 0.9274495138369484,
      "grad_norm": 1.7078162984487721,
      "learning_rate": 1.5896038691262772e-07,
      "loss": 0.2592,
      "step": 3100
    },
    {
      "epoch": 0.9289454001495886,
      "grad_norm": 1.6367425145666301,
      "learning_rate": 1.52492885074656e-07,
      "loss": 0.2561,
      "step": 3105
    },
    {
      "epoch": 0.9304412864622289,
      "grad_norm": 1.5872236691558035,
      "learning_rate": 1.4615766207293157e-07,
      "loss": 0.2518,
      "step": 3110
    },
    {
      "epoch": 0.9319371727748691,
      "grad_norm": 1.714675291765629,
      "learning_rate": 1.3995489078310055e-07,
      "loss": 0.2633,
      "step": 3115
    },
    {
      "epoch": 0.9334330590875094,
      "grad_norm": 1.619406307330865,
      "learning_rate": 1.338847404664667e-07,
      "loss": 0.2548,
      "step": 3120
    },
    {
      "epoch": 0.9349289454001496,
      "grad_norm": 1.5539413237386495,
      "learning_rate": 1.2794737676536993e-07,
      "loss": 0.2527,
      "step": 3125
    },
    {
      "epoch": 0.9364248317127898,
      "grad_norm": 1.7495161399355714,
      "learning_rate": 1.2214296169866578e-07,
      "loss": 0.2515,
      "step": 3130
    },
    {
      "epoch": 0.9379207180254301,
      "grad_norm": 1.641652885536429,
      "learning_rate": 1.164716536573074e-07,
      "loss": 0.2501,
      "step": 3135
    },
    {
      "epoch": 0.9394166043380703,
      "grad_norm": 1.753141085715687,
      "learning_rate": 1.1093360740002057e-07,
      "loss": 0.2515,
      "step": 3140
    },
    {
      "epoch": 0.9409124906507106,
      "grad_norm": 1.7530034719134988,
      "learning_rate": 1.0552897404908391e-07,
      "loss": 0.2559,
      "step": 3145
    },
    {
      "epoch": 0.9424083769633508,
      "grad_norm": 1.5804220071987112,
      "learning_rate": 1.0025790108620092e-07,
      "loss": 0.2483,
      "step": 3150
    },
    {
      "epoch": 0.943904263275991,
      "grad_norm": 1.822783043661551,
      "learning_rate": 9.512053234847774e-08,
      "loss": 0.258,
      "step": 3155
    },
    {
      "epoch": 0.9454001495886313,
      "grad_norm": 1.7776638404626903,
      "learning_rate": 9.01170080244984e-08,
      "loss": 0.2463,
      "step": 3160
    },
    {
      "epoch": 0.9468960359012715,
      "grad_norm": 1.7244463932526486,
      "learning_rate": 8.52474646504986e-08,
      "loss": 0.2506,
      "step": 3165
    },
    {
      "epoch": 0.9483919222139118,
      "grad_norm": 1.7184065717867174,
      "learning_rate": 8.05120351066413e-08,
      "loss": 0.2605,
      "step": 3170
    },
    {
      "epoch": 0.949887808526552,
      "grad_norm": 1.7978606844090408,
      "learning_rate": 7.591084861338749e-08,
      "loss": 0.2503,
      "step": 3175
    },
    {
      "epoch": 0.9513836948391923,
      "grad_norm": 1.6764242072572402,
      "learning_rate": 7.144403072797346e-08,
      "loss": 0.2523,
      "step": 3180
    },
    {
      "epoch": 0.9528795811518325,
      "grad_norm": 1.6752659598734612,
      "learning_rate": 6.711170334098294e-08,
      "loss": 0.2566,
      "step": 3185
    },
    {
      "epoch": 0.9543754674644727,
      "grad_norm": 1.7696006414428376,
      "learning_rate": 6.291398467302146e-08,
      "loss": 0.2579,
      "step": 3190
    },
    {
      "epoch": 0.955871353777113,
      "grad_norm": 1.6541063626129755,
      "learning_rate": 5.885098927148947e-08,
      "loss": 0.2505,
      "step": 3195
    },
    {
      "epoch": 0.9573672400897532,
      "grad_norm": 1.791951476550907,
      "learning_rate": 5.492282800745707e-08,
      "loss": 0.252,
      "step": 3200
    },
    {
      "epoch": 0.9588631264023935,
      "grad_norm": 1.6998940151846391,
      "learning_rate": 5.112960807263978e-08,
      "loss": 0.2602,
      "step": 3205
    },
    {
      "epoch": 0.9603590127150337,
      "grad_norm": 1.739892053817991,
      "learning_rate": 4.7471432976471944e-08,
      "loss": 0.264,
      "step": 3210
    },
    {
      "epoch": 0.9618548990276738,
      "grad_norm": 1.5429992279839573,
      "learning_rate": 4.3948402543282366e-08,
      "loss": 0.2543,
      "step": 3215
    },
    {
      "epoch": 0.9633507853403142,
      "grad_norm": 1.772813294024904,
      "learning_rate": 4.056061290956981e-08,
      "loss": 0.2524,
      "step": 3220
    },
    {
      "epoch": 0.9648466716529543,
      "grad_norm": 1.5751929247313246,
      "learning_rate": 3.730815652138231e-08,
      "loss": 0.2525,
      "step": 3225
    },
    {
      "epoch": 0.9663425579655947,
      "grad_norm": 1.7360588122089868,
      "learning_rate": 3.4191122131790324e-08,
      "loss": 0.255,
      "step": 3230
    },
    {
      "epoch": 0.9678384442782348,
      "grad_norm": 1.7743122424766984,
      "learning_rate": 3.120959479846919e-08,
      "loss": 0.2584,
      "step": 3235
    },
    {
      "epoch": 0.9693343305908751,
      "grad_norm": 1.574467195657007,
      "learning_rate": 2.8363655881374906e-08,
      "loss": 0.2558,
      "step": 3240
    },
    {
      "epoch": 0.9708302169035153,
      "grad_norm": 1.80180199036063,
      "learning_rate": 2.5653383040524228e-08,
      "loss": 0.2568,
      "step": 3245
    },
    {
      "epoch": 0.9723261032161555,
      "grad_norm": 1.6886241273143858,
      "learning_rate": 2.3078850233878015e-08,
      "loss": 0.2466,
      "step": 3250
    },
    {
      "epoch": 0.9738219895287958,
      "grad_norm": 1.7815633691396229,
      "learning_rate": 2.064012771532009e-08,
      "loss": 0.2536,
      "step": 3255
    },
    {
      "epoch": 0.975317875841436,
      "grad_norm": 1.6956109134529065,
      "learning_rate": 1.83372820327421e-08,
      "loss": 0.2592,
      "step": 3260
    },
    {
      "epoch": 0.9768137621540763,
      "grad_norm": 1.7584432907260417,
      "learning_rate": 1.6170376026226065e-08,
      "loss": 0.2647,
      "step": 3265
    },
    {
      "epoch": 0.9783096484667165,
      "grad_norm": 1.687889717075937,
      "learning_rate": 1.4139468826331327e-08,
      "loss": 0.2529,
      "step": 3270
    },
    {
      "epoch": 0.9798055347793567,
      "grad_norm": 1.6973842345080912,
      "learning_rate": 1.2244615852479158e-08,
      "loss": 0.2586,
      "step": 3275
    },
    {
      "epoch": 0.981301421091997,
      "grad_norm": 1.7860998582475756,
      "learning_rate": 1.0485868811441757e-08,
      "loss": 0.2596,
      "step": 3280
    },
    {
      "epoch": 0.9827973074046372,
      "grad_norm": 1.7444036807918029,
      "learning_rate": 8.86327569593115e-09,
      "loss": 0.253,
      "step": 3285
    },
    {
      "epoch": 0.9842931937172775,
      "grad_norm": 1.7876798673093501,
      "learning_rate": 7.376880783289131e-09,
      "loss": 0.2551,
      "step": 3290
    },
    {
      "epoch": 0.9857890800299177,
      "grad_norm": 1.66622892909602,
      "learning_rate": 6.026724634279335e-09,
      "loss": 0.2557,
      "step": 3295
    },
    {
      "epoch": 0.9872849663425579,
      "grad_norm": 1.7386422804846284,
      "learning_rate": 4.8128440919792405e-09,
      "loss": 0.253,
      "step": 3300
    },
    {
      "epoch": 0.9887808526551982,
      "grad_norm": 1.5376218727713236,
      "learning_rate": 3.73527228077708e-09,
      "loss": 0.2501,
      "step": 3305
    },
    {
      "epoch": 0.9902767389678384,
      "grad_norm": 1.7638583930274379,
      "learning_rate": 2.7940386054664537e-09,
      "loss": 0.262,
      "step": 3310
    },
    {
      "epoch": 0.9917726252804787,
      "grad_norm": 1.9162749151140541,
      "learning_rate": 1.9891687504436373e-09,
      "loss": 0.2575,
      "step": 3315
    },
    {
      "epoch": 0.9932685115931189,
      "grad_norm": 1.880864088528354,
      "learning_rate": 1.320684679008144e-09,
      "loss": 0.2602,
      "step": 3320
    },
    {
      "epoch": 0.9947643979057592,
      "grad_norm": 1.7803280986620529,
      "learning_rate": 7.886046327609809e-10,
      "loss": 0.2543,
      "step": 3325
    },
    {
      "epoch": 0.9962602842183994,
      "grad_norm": 1.5859942056150071,
      "learning_rate": 3.929431311094911e-10,
      "loss": 0.2563,
      "step": 3330
    },
    {
      "epoch": 0.9977561705310396,
      "grad_norm": 1.6644206261602157,
      "learning_rate": 1.337109708704487e-10,
      "loss": 0.2515,
      "step": 3335
    },
    {
      "epoch": 0.9992520568436799,
      "grad_norm": 1.6728854762026557,
      "learning_rate": 1.091522597362893e-11,
      "loss": 0.2602,
      "step": 3340
    },
    {
      "epoch": 0.999850411368736,
      "eval_loss": 0.25460532307624817,
      "eval_runtime": 342.3221,
      "eval_samples_per_second": 3.152,
      "eval_steps_per_second": 0.789,
      "step": 3342
    },
    {
      "epoch": 0.999850411368736,
      "step": 3342,
      "total_flos": 699694464368640.0,
      "train_loss": 0.43199071664427796,
      "train_runtime": 75290.8899,
      "train_samples_per_second": 1.421,
      "train_steps_per_second": 0.044
    }
  ],
  "logging_steps": 5,
  "max_steps": 3342,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 699694464368640.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}