{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 5.0,
  "eval_steps": 500,
  "global_step": 17790,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0002810567734682406,
      "grad_norm": 96.63301086425781,
      "learning_rate": 1.1242270938729624e-07,
      "loss": 17.9839,
      "step": 1
    },
    {
      "epoch": 0.001405283867341203,
      "grad_norm": 115.6960678100586,
      "learning_rate": 5.621135469364812e-07,
      "loss": 18.4829,
      "step": 5
    },
    {
      "epoch": 0.002810567734682406,
      "grad_norm": 96.61837005615234,
      "learning_rate": 1.1242270938729624e-06,
      "loss": 18.1935,
      "step": 10
    },
    {
      "epoch": 0.0042158516020236085,
      "grad_norm": 132.02969360351562,
      "learning_rate": 1.6863406408094434e-06,
      "loss": 18.6642,
      "step": 15
    },
    {
      "epoch": 0.005621135469364812,
      "grad_norm": 80.8836441040039,
      "learning_rate": 2.248454187745925e-06,
      "loss": 17.9493,
      "step": 20
    },
    {
      "epoch": 0.0070264193367060145,
      "grad_norm": 87.12368774414062,
      "learning_rate": 2.810567734682406e-06,
      "loss": 17.8325,
      "step": 25
    },
    {
      "epoch": 0.008431703204047217,
      "grad_norm": 97.9262924194336,
      "learning_rate": 3.372681281618887e-06,
      "loss": 17.3621,
      "step": 30
    },
    {
      "epoch": 0.00983698707138842,
      "grad_norm": 75.57032012939453,
      "learning_rate": 3.9347948285553685e-06,
      "loss": 16.4306,
      "step": 35
    },
    {
      "epoch": 0.011242270938729624,
      "grad_norm": 71.48660278320312,
      "learning_rate": 4.49690837549185e-06,
      "loss": 15.7961,
      "step": 40
    },
    {
      "epoch": 0.012647554806070826,
      "grad_norm": 70.25821685791016,
      "learning_rate": 5.059021922428331e-06,
      "loss": 14.0696,
      "step": 45
    },
    {
      "epoch": 0.014052838673412029,
      "grad_norm": 68.05973052978516,
      "learning_rate": 5.621135469364812e-06,
      "loss": 12.9431,
      "step": 50
    },
    {
      "epoch": 0.015458122540753232,
      "grad_norm": 52.36540222167969,
      "learning_rate": 6.183249016301293e-06,
      "loss": 11.8773,
      "step": 55
    },
    {
      "epoch": 0.016863406408094434,
      "grad_norm": 52.9713134765625,
      "learning_rate": 6.745362563237774e-06,
      "loss": 10.3712,
      "step": 60
    },
    {
      "epoch": 0.018268690275435637,
      "grad_norm": 34.965667724609375,
      "learning_rate": 7.307476110174255e-06,
      "loss": 9.4828,
      "step": 65
    },
    {
      "epoch": 0.01967397414277684,
      "grad_norm": 29.373140335083008,
      "learning_rate": 7.869589657110737e-06,
      "loss": 8.2649,
      "step": 70
    },
    {
      "epoch": 0.021079258010118045,
      "grad_norm": 23.469764709472656,
      "learning_rate": 8.431703204047219e-06,
      "loss": 7.4431,
      "step": 75
    },
    {
      "epoch": 0.022484541877459248,
      "grad_norm": 15.5948486328125,
      "learning_rate": 8.9938167509837e-06,
      "loss": 6.6997,
      "step": 80
    },
    {
      "epoch": 0.02388982574480045,
      "grad_norm": 14.63684368133545,
      "learning_rate": 9.555930297920181e-06,
      "loss": 6.0383,
      "step": 85
    },
    {
      "epoch": 0.025295109612141653,
      "grad_norm": 9.950777053833008,
      "learning_rate": 1.0118043844856662e-05,
      "loss": 5.5526,
      "step": 90
    },
    {
      "epoch": 0.026700393479482856,
      "grad_norm": 8.490776062011719,
      "learning_rate": 1.0680157391793142e-05,
      "loss": 5.0148,
      "step": 95
    },
    {
      "epoch": 0.028105677346824058,
      "grad_norm": 5.514277935028076,
      "learning_rate": 1.1242270938729624e-05,
      "loss": 4.7405,
      "step": 100
    },
    {
      "epoch": 0.02951096121416526,
      "grad_norm": 3.681926965713501,
      "learning_rate": 1.1804384485666105e-05,
      "loss": 4.3935,
      "step": 105
    },
    {
      "epoch": 0.030916245081506463,
      "grad_norm": 2.6629223823547363,
      "learning_rate": 1.2366498032602587e-05,
      "loss": 4.2049,
      "step": 110
    },
    {
      "epoch": 0.03232152894884767,
      "grad_norm": 2.426866054534912,
      "learning_rate": 1.2928611579539069e-05,
      "loss": 3.9441,
      "step": 115
    },
    {
      "epoch": 0.03372681281618887,
      "grad_norm": 1.9169517755508423,
      "learning_rate": 1.3490725126475547e-05,
      "loss": 3.8311,
      "step": 120
    },
    {
      "epoch": 0.035132096683530074,
      "grad_norm": 3.5230212211608887,
      "learning_rate": 1.4052838673412031e-05,
      "loss": 3.6553,
      "step": 125
    },
    {
      "epoch": 0.03653738055087127,
      "grad_norm": 2.1056201457977295,
      "learning_rate": 1.461495222034851e-05,
      "loss": 3.67,
      "step": 130
    },
    {
      "epoch": 0.03794266441821248,
      "grad_norm": 2.1320669651031494,
      "learning_rate": 1.5177065767284992e-05,
      "loss": 3.5765,
      "step": 135
    },
    {
      "epoch": 0.03934794828555368,
      "grad_norm": 2.591682195663452,
      "learning_rate": 1.5739179314221474e-05,
      "loss": 3.5598,
      "step": 140
    },
    {
      "epoch": 0.040753232152894885,
      "grad_norm": 2.4693403244018555,
      "learning_rate": 1.6301292861157954e-05,
      "loss": 3.3373,
      "step": 145
    },
    {
      "epoch": 0.04215851602023609,
      "grad_norm": 2.9078850746154785,
      "learning_rate": 1.6863406408094438e-05,
      "loss": 3.1877,
      "step": 150
    },
    {
      "epoch": 0.04356379988757729,
      "grad_norm": 4.213356971740723,
      "learning_rate": 1.742551995503092e-05,
      "loss": 3.1931,
      "step": 155
    },
    {
      "epoch": 0.044969083754918496,
      "grad_norm": 6.048213958740234,
      "learning_rate": 1.79876335019674e-05,
      "loss": 3.0154,
      "step": 160
    },
    {
      "epoch": 0.046374367622259695,
      "grad_norm": 8.296866416931152,
      "learning_rate": 1.854974704890388e-05,
      "loss": 2.7614,
      "step": 165
    },
    {
      "epoch": 0.0477796514896009,
      "grad_norm": 11.323473930358887,
      "learning_rate": 1.9111860595840363e-05,
      "loss": 2.5561,
      "step": 170
    },
    {
      "epoch": 0.0491849353569421,
      "grad_norm": 8.23714828491211,
      "learning_rate": 1.967397414277684e-05,
      "loss": 2.0463,
      "step": 175
    },
    {
      "epoch": 0.050590219224283306,
      "grad_norm": 8.242648124694824,
      "learning_rate": 2.0236087689713324e-05,
      "loss": 1.7472,
      "step": 180
    },
    {
      "epoch": 0.051995503091624505,
      "grad_norm": 3.206883668899536,
      "learning_rate": 2.0798201236649804e-05,
      "loss": 1.5051,
      "step": 185
    },
    {
      "epoch": 0.05340078695896571,
      "grad_norm": 4.381938934326172,
      "learning_rate": 2.1360314783586284e-05,
      "loss": 1.3826,
      "step": 190
    },
    {
      "epoch": 0.05480607082630692,
      "grad_norm": 3.7853753566741943,
      "learning_rate": 2.1922428330522768e-05,
      "loss": 1.3835,
      "step": 195
    },
    {
      "epoch": 0.056211354693648116,
      "grad_norm": 3.806912422180176,
      "learning_rate": 2.248454187745925e-05,
      "loss": 1.3331,
      "step": 200
    },
    {
      "epoch": 0.05761663856098932,
      "grad_norm": 4.964791297912598,
      "learning_rate": 2.304665542439573e-05,
      "loss": 1.2944,
      "step": 205
    },
    {
      "epoch": 0.05902192242833052,
      "grad_norm": 2.8260233402252197,
      "learning_rate": 2.360876897133221e-05,
      "loss": 1.2893,
      "step": 210
    },
    {
      "epoch": 0.06042720629567173,
      "grad_norm": 4.206593036651611,
      "learning_rate": 2.4170882518268693e-05,
      "loss": 1.2806,
      "step": 215
    },
    {
      "epoch": 0.061832490163012926,
      "grad_norm": 3.243504762649536,
      "learning_rate": 2.4732996065205173e-05,
      "loss": 1.2596,
      "step": 220
    },
    {
      "epoch": 0.06323777403035413,
      "grad_norm": 3.7753634452819824,
      "learning_rate": 2.5295109612141654e-05,
      "loss": 1.2309,
      "step": 225
    },
    {
      "epoch": 0.06464305789769534,
      "grad_norm": 2.576805353164673,
      "learning_rate": 2.5857223159078137e-05,
      "loss": 1.2858,
      "step": 230
    },
    {
      "epoch": 0.06604834176503653,
      "grad_norm": 4.091485023498535,
      "learning_rate": 2.6419336706014614e-05,
      "loss": 1.2199,
      "step": 235
    },
    {
      "epoch": 0.06745362563237774,
      "grad_norm": 2.256748676300049,
      "learning_rate": 2.6981450252951095e-05,
      "loss": 1.1955,
      "step": 240
    },
    {
      "epoch": 0.06885890949971894,
      "grad_norm": 2.6037867069244385,
      "learning_rate": 2.754356379988758e-05,
      "loss": 1.1988,
      "step": 245
    },
    {
      "epoch": 0.07026419336706015,
      "grad_norm": 4.143032073974609,
      "learning_rate": 2.8105677346824062e-05,
      "loss": 1.2066,
      "step": 250
    },
    {
      "epoch": 0.07166947723440135,
      "grad_norm": 3.437196731567383,
      "learning_rate": 2.8667790893760543e-05,
      "loss": 1.1911,
      "step": 255
    },
    {
      "epoch": 0.07307476110174255,
      "grad_norm": 2.532320022583008,
      "learning_rate": 2.922990444069702e-05,
      "loss": 1.169,
      "step": 260
    },
    {
      "epoch": 0.07448004496908375,
      "grad_norm": 2.057971954345703,
      "learning_rate": 2.9792017987633503e-05,
      "loss": 1.1756,
      "step": 265
    },
    {
      "epoch": 0.07588532883642496,
      "grad_norm": 6.0402984619140625,
      "learning_rate": 3.0354131534569984e-05,
      "loss": 1.172,
      "step": 270
    },
    {
      "epoch": 0.07729061270376616,
      "grad_norm": 11.470712661743164,
      "learning_rate": 3.091624508150647e-05,
      "loss": 1.1545,
      "step": 275
    },
    {
      "epoch": 0.07869589657110736,
      "grad_norm": 3.5605509281158447,
      "learning_rate": 3.147835862844295e-05,
      "loss": 1.1581,
      "step": 280
    },
    {
      "epoch": 0.08010118043844856,
      "grad_norm": 10.173096656799316,
      "learning_rate": 3.204047217537943e-05,
      "loss": 1.1358,
      "step": 285
    },
    {
      "epoch": 0.08150646430578977,
      "grad_norm": 6.1135945320129395,
      "learning_rate": 3.260258572231591e-05,
      "loss": 1.1421,
      "step": 290
    },
    {
      "epoch": 0.08291174817313098,
      "grad_norm": 2.5845179557800293,
      "learning_rate": 3.316469926925239e-05,
      "loss": 1.1333,
      "step": 295
    },
    {
      "epoch": 0.08431703204047218,
      "grad_norm": 3.8196470737457275,
      "learning_rate": 3.3726812816188876e-05,
      "loss": 1.1352,
      "step": 300
    },
    {
      "epoch": 0.08572231590781337,
      "grad_norm": 1.9211612939834595,
      "learning_rate": 3.428892636312535e-05,
      "loss": 1.1224,
      "step": 305
    },
    {
      "epoch": 0.08712759977515458,
      "grad_norm": 7.396655559539795,
      "learning_rate": 3.485103991006184e-05,
      "loss": 1.1093,
      "step": 310
    },
    {
      "epoch": 0.08853288364249579,
      "grad_norm": 8.079272270202637,
      "learning_rate": 3.541315345699832e-05,
      "loss": 1.1327,
      "step": 315
    },
    {
      "epoch": 0.08993816750983699,
      "grad_norm": 1.7263989448547363,
      "learning_rate": 3.59752670039348e-05,
      "loss": 1.1074,
      "step": 320
    },
    {
      "epoch": 0.09134345137717818,
      "grad_norm": 6.237440586090088,
      "learning_rate": 3.653738055087128e-05,
      "loss": 1.1028,
      "step": 325
    },
    {
      "epoch": 0.09274873524451939,
      "grad_norm": 7.730904579162598,
      "learning_rate": 3.709949409780776e-05,
      "loss": 1.1231,
      "step": 330
    },
    {
      "epoch": 0.0941540191118606,
      "grad_norm": 7.369045734405518,
      "learning_rate": 3.766160764474424e-05,
      "loss": 1.1023,
      "step": 335
    },
    {
      "epoch": 0.0955593029792018,
      "grad_norm": 8.358131408691406,
      "learning_rate": 3.8223721191680726e-05,
      "loss": 1.1214,
      "step": 340
    },
    {
      "epoch": 0.09696458684654301,
      "grad_norm": 10.260603904724121,
      "learning_rate": 3.8785834738617206e-05,
      "loss": 1.1068,
      "step": 345
    },
    {
      "epoch": 0.0983698707138842,
      "grad_norm": 2.3183040618896484,
      "learning_rate": 3.934794828555368e-05,
      "loss": 1.0982,
      "step": 350
    },
    {
      "epoch": 0.0997751545812254,
      "grad_norm": 6.748996734619141,
      "learning_rate": 3.991006183249017e-05,
      "loss": 1.1149,
      "step": 355
    },
    {
      "epoch": 0.10118043844856661,
      "grad_norm": 3.9615724086761475,
      "learning_rate": 4.047217537942665e-05,
      "loss": 1.0757,
      "step": 360
    },
    {
      "epoch": 0.10258572231590782,
      "grad_norm": 6.453889846801758,
      "learning_rate": 4.103428892636313e-05,
      "loss": 1.0829,
      "step": 365
    },
    {
      "epoch": 0.10399100618324901,
      "grad_norm": 2.474149227142334,
      "learning_rate": 4.159640247329961e-05,
      "loss": 1.0964,
      "step": 370
    },
    {
      "epoch": 0.10539629005059022,
      "grad_norm": 4.072728633880615,
      "learning_rate": 4.215851602023609e-05,
      "loss": 1.0866,
      "step": 375
    },
    {
      "epoch": 0.10680157391793142,
      "grad_norm": 3.009608745574951,
      "learning_rate": 4.272062956717257e-05,
      "loss": 1.0807,
      "step": 380
    },
    {
      "epoch": 0.10820685778527263,
      "grad_norm": 8.37180233001709,
      "learning_rate": 4.3282743114109056e-05,
      "loss": 1.0558,
      "step": 385
    },
    {
      "epoch": 0.10961214165261383,
      "grad_norm": 5.847390651702881,
      "learning_rate": 4.3844856661045536e-05,
      "loss": 1.0805,
      "step": 390
    },
    {
      "epoch": 0.11101742551995503,
      "grad_norm": 5.546202182769775,
      "learning_rate": 4.440697020798201e-05,
      "loss": 1.0752,
      "step": 395
    },
    {
      "epoch": 0.11242270938729623,
      "grad_norm": 5.740989685058594,
      "learning_rate": 4.49690837549185e-05,
      "loss": 1.0631,
      "step": 400
    },
    {
      "epoch": 0.11382799325463744,
      "grad_norm": 2.483320713043213,
      "learning_rate": 4.553119730185498e-05,
      "loss": 1.0556,
      "step": 405
    },
    {
      "epoch": 0.11523327712197864,
      "grad_norm": 157.97256469726562,
      "learning_rate": 4.609331084879146e-05,
      "loss": 1.0953,
      "step": 410
    },
    {
      "epoch": 0.11663856098931984,
      "grad_norm": 6.035214900970459,
      "learning_rate": 4.665542439572794e-05,
      "loss": 1.0634,
      "step": 415
    },
    {
      "epoch": 0.11804384485666104,
      "grad_norm": 5.549474239349365,
      "learning_rate": 4.721753794266442e-05,
      "loss": 1.1014,
      "step": 420
    },
    {
      "epoch": 0.11944912872400225,
      "grad_norm": 7.824397563934326,
      "learning_rate": 4.77796514896009e-05,
      "loss": 1.0848,
      "step": 425
    },
    {
      "epoch": 0.12085441259134345,
      "grad_norm": 5.783429145812988,
      "learning_rate": 4.8341765036537386e-05,
      "loss": 1.0663,
      "step": 430
    },
    {
      "epoch": 0.12225969645868466,
      "grad_norm": 2.586413621902466,
      "learning_rate": 4.8903878583473866e-05,
      "loss": 1.0761,
      "step": 435
    },
    {
      "epoch": 0.12366498032602585,
      "grad_norm": 1.9556853771209717,
      "learning_rate": 4.9465992130410346e-05,
      "loss": 1.0505,
      "step": 440
    },
    {
      "epoch": 0.12507026419336706,
      "grad_norm": 2.7155544757843018,
      "learning_rate": 5.002810567734683e-05,
      "loss": 1.0534,
      "step": 445
    },
    {
      "epoch": 0.12647554806070826,
      "grad_norm": 2.0361337661743164,
      "learning_rate": 5.059021922428331e-05,
      "loss": 1.0671,
      "step": 450
    },
    {
      "epoch": 0.12788083192804947,
      "grad_norm": 3.4316651821136475,
      "learning_rate": 5.115233277121979e-05,
      "loss": 1.0728,
      "step": 455
    },
    {
      "epoch": 0.12928611579539068,
      "grad_norm": 10.915202140808105,
      "learning_rate": 5.1714446318156275e-05,
      "loss": 1.0571,
      "step": 460
    },
    {
      "epoch": 0.13069139966273188,
      "grad_norm": 8.766178131103516,
      "learning_rate": 5.2276559865092755e-05,
      "loss": 1.0604,
      "step": 465
    },
    {
      "epoch": 0.13209668353007306,
      "grad_norm": 6.660830497741699,
      "learning_rate": 5.283867341202923e-05,
      "loss": 1.0545,
      "step": 470
    },
    {
      "epoch": 0.13350196739741427,
      "grad_norm": 3.1207127571105957,
      "learning_rate": 5.340078695896571e-05,
      "loss": 1.0424,
      "step": 475
    },
    {
      "epoch": 0.13490725126475547,
      "grad_norm": 5.818568229675293,
      "learning_rate": 5.396290050590219e-05,
      "loss": 1.0461,
      "step": 480
    },
    {
      "epoch": 0.13631253513209668,
      "grad_norm": 5.6313910484313965,
      "learning_rate": 5.4525014052838676e-05,
      "loss": 1.0602,
      "step": 485
    },
    {
      "epoch": 0.13771781899943789,
      "grad_norm": 8.082501411437988,
      "learning_rate": 5.508712759977516e-05,
      "loss": 1.0362,
      "step": 490
    },
    {
      "epoch": 0.1391231028667791,
      "grad_norm": 5.104698181152344,
      "learning_rate": 5.564924114671164e-05,
      "loss": 1.0365,
      "step": 495
    },
    {
      "epoch": 0.1405283867341203,
      "grad_norm": 2.717695951461792,
      "learning_rate": 5.6211354693648124e-05,
      "loss": 1.0217,
      "step": 500
    },
    {
      "epoch": 0.1419336706014615,
      "grad_norm": 7.406291961669922,
      "learning_rate": 5.6773468240584605e-05,
      "loss": 1.0462,
      "step": 505
    },
    {
      "epoch": 0.1433389544688027,
      "grad_norm": 12.639225006103516,
      "learning_rate": 5.7335581787521085e-05,
      "loss": 1.0307,
      "step": 510
    },
    {
      "epoch": 0.1447442383361439,
      "grad_norm": 2.30743670463562,
      "learning_rate": 5.789769533445756e-05,
      "loss": 1.121,
      "step": 515
    },
    {
      "epoch": 0.1461495222034851,
      "grad_norm": 6.556262016296387,
      "learning_rate": 5.845980888139404e-05,
      "loss": 1.0203,
      "step": 520
    },
    {
      "epoch": 0.1475548060708263,
      "grad_norm": 7.790770530700684,
      "learning_rate": 5.9021922428330526e-05,
      "loss": 1.0727,
      "step": 525
    },
    {
      "epoch": 0.1489600899381675,
      "grad_norm": 2.6849281787872314,
      "learning_rate": 5.9584035975267006e-05,
      "loss": 1.0281,
      "step": 530
    },
    {
      "epoch": 0.1503653738055087,
      "grad_norm": 2.0983083248138428,
      "learning_rate": 6.014614952220349e-05,
      "loss": 1.0341,
      "step": 535
    },
    {
      "epoch": 0.15177065767284992,
      "grad_norm": 3.3170852661132812,
      "learning_rate": 6.070826306913997e-05,
      "loss": 1.049,
      "step": 540
    },
    {
      "epoch": 0.15317594154019112,
      "grad_norm": 2.881958484649658,
      "learning_rate": 6.127037661607645e-05,
      "loss": 1.0369,
      "step": 545
    },
    {
      "epoch": 0.15458122540753233,
      "grad_norm": 1.7803939580917358,
      "learning_rate": 6.183249016301293e-05,
      "loss": 1.048,
      "step": 550
    },
    {
      "epoch": 0.15598650927487354,
      "grad_norm": 5.816596984863281,
      "learning_rate": 6.239460370994942e-05,
      "loss": 1.0305,
      "step": 555
    },
    {
      "epoch": 0.15739179314221471,
      "grad_norm": 10.560365676879883,
      "learning_rate": 6.29567172568859e-05,
      "loss": 1.0275,
      "step": 560
    },
    {
      "epoch": 0.15879707700955592,
      "grad_norm": 7.882879734039307,
      "learning_rate": 6.351883080382237e-05,
      "loss": 1.0507,
      "step": 565
    },
    {
      "epoch": 0.16020236087689713,
      "grad_norm": 2.731837034225464,
      "learning_rate": 6.408094435075886e-05,
      "loss": 1.0264,
      "step": 570
    },
    {
      "epoch": 0.16160764474423833,
      "grad_norm": 12.15988540649414,
      "learning_rate": 6.464305789769533e-05,
      "loss": 1.0295,
      "step": 575
    },
    {
      "epoch": 0.16301292861157954,
      "grad_norm": 5.824089527130127,
      "learning_rate": 6.520517144463182e-05,
      "loss": 1.0193,
      "step": 580
    },
    {
      "epoch": 0.16441821247892074,
      "grad_norm": 5.727222442626953,
      "learning_rate": 6.57672849915683e-05,
      "loss": 1.0167,
      "step": 585
    },
    {
      "epoch": 0.16582349634626195,
      "grad_norm": 5.926555633544922,
      "learning_rate": 6.632939853850478e-05,
      "loss": 1.017,
      "step": 590
    },
    {
      "epoch": 0.16722878021360316,
      "grad_norm": 5.063398838043213,
      "learning_rate": 6.689151208544126e-05,
      "loss": 0.9991,
      "step": 595
    },
    {
      "epoch": 0.16863406408094436,
      "grad_norm": 3.315136671066284,
      "learning_rate": 6.745362563237775e-05,
      "loss": 1.027,
      "step": 600
    },
    {
      "epoch": 0.17003934794828554,
      "grad_norm": 8.511547088623047,
      "learning_rate": 6.801573917931423e-05,
      "loss": 1.0229,
      "step": 605
    },
    {
      "epoch": 0.17144463181562675,
      "grad_norm": 3.2332677841186523,
      "learning_rate": 6.85778527262507e-05,
      "loss": 1.0153,
      "step": 610
    },
    {
      "epoch": 0.17284991568296795,
      "grad_norm": 4.09757661819458,
      "learning_rate": 6.913996627318719e-05,
      "loss": 1.0119,
      "step": 615
    },
    {
      "epoch": 0.17425519955030916,
      "grad_norm": 40.794124603271484,
      "learning_rate": 6.970207982012367e-05,
      "loss": 1.0135,
      "step": 620
    },
    {
      "epoch": 0.17566048341765036,
      "grad_norm": 3.564362049102783,
      "learning_rate": 7.026419336706015e-05,
      "loss": 1.0159,
      "step": 625
    },
    {
      "epoch": 0.17706576728499157,
      "grad_norm": 3.3208792209625244,
      "learning_rate": 7.082630691399663e-05,
      "loss": 1.0091,
      "step": 630
    },
    {
      "epoch": 0.17847105115233278,
      "grad_norm": 2.5637195110321045,
      "learning_rate": 7.138842046093311e-05,
      "loss": 1.0092,
      "step": 635
    },
    {
      "epoch": 0.17987633501967398,
      "grad_norm": 4.020427703857422,
      "learning_rate": 7.19505340078696e-05,
      "loss": 1.0202,
      "step": 640
    },
    {
      "epoch": 0.1812816188870152,
      "grad_norm": 4.912067413330078,
      "learning_rate": 7.251264755480608e-05,
      "loss": 0.9985,
      "step": 645
    },
    {
      "epoch": 0.18268690275435637,
      "grad_norm": 2.0089938640594482,
      "learning_rate": 7.307476110174256e-05,
      "loss": 1.0217,
      "step": 650
    },
    {
      "epoch": 0.18409218662169757,
      "grad_norm": 10.243757247924805,
      "learning_rate": 7.363687464867903e-05,
      "loss": 1.0169,
      "step": 655
    },
    {
      "epoch": 0.18549747048903878,
      "grad_norm": 4.835886001586914,
      "learning_rate": 7.419898819561552e-05,
      "loss": 1.017,
      "step": 660
    },
    {
      "epoch": 0.18690275435637999,
      "grad_norm": 3.1483078002929688,
      "learning_rate": 7.4761101742552e-05,
      "loss": 1.0273,
      "step": 665
    },
    {
      "epoch": 0.1883080382237212,
      "grad_norm": 4.911044597625732,
      "learning_rate": 7.532321528948848e-05,
      "loss": 1.0251,
      "step": 670
    },
    {
      "epoch": 0.1897133220910624,
      "grad_norm": 3.3334503173828125,
      "learning_rate": 7.588532883642496e-05,
      "loss": 1.0054,
      "step": 675
    },
    {
      "epoch": 0.1911186059584036,
      "grad_norm": 9.200891494750977,
      "learning_rate": 7.644744238336145e-05,
      "loss": 1.0488,
      "step": 680
    },
    {
      "epoch": 0.1925238898257448,
      "grad_norm": 1.7150282859802246,
      "learning_rate": 7.700955593029792e-05,
      "loss": 1.0303,
      "step": 685
    },
    {
      "epoch": 0.19392917369308602,
      "grad_norm": 4.619067668914795,
      "learning_rate": 7.757166947723441e-05,
      "loss": 1.0174,
      "step": 690
    },
    {
      "epoch": 0.1953344575604272,
      "grad_norm": 2.54264760017395,
      "learning_rate": 7.813378302417089e-05,
      "loss": 1.0034,
      "step": 695
    },
    {
      "epoch": 0.1967397414277684,
      "grad_norm": 1.559621810913086,
      "learning_rate": 7.869589657110736e-05,
      "loss": 0.995,
      "step": 700
    },
    {
      "epoch": 0.1981450252951096,
      "grad_norm": 3.9408676624298096,
      "learning_rate": 7.925801011804385e-05,
      "loss": 1.006,
      "step": 705
    },
    {
      "epoch": 0.1995503091624508,
      "grad_norm": 5.564051151275635,
      "learning_rate": 7.982012366498033e-05,
      "loss": 1.0215,
      "step": 710
    },
    {
      "epoch": 0.20095559302979202,
      "grad_norm": 3.5051417350769043,
      "learning_rate": 8.038223721191681e-05,
      "loss": 1.0055,
      "step": 715
    },
    {
      "epoch": 0.20236087689713322,
      "grad_norm": 5.896759510040283,
      "learning_rate": 8.09443507588533e-05,
      "loss": 1.0162,
      "step": 720
    },
    {
      "epoch": 0.20376616076447443,
      "grad_norm": 2.0476253032684326,
      "learning_rate": 8.150646430578978e-05,
      "loss": 1.0338,
      "step": 725
    },
    {
      "epoch": 0.20517144463181564,
      "grad_norm": 4.3316545486450195,
      "learning_rate": 8.206857785272625e-05,
      "loss": 1.0276,
      "step": 730
    },
    {
      "epoch": 0.20657672849915684,
      "grad_norm": 3.6653268337249756,
      "learning_rate": 8.263069139966274e-05,
      "loss": 0.9991,
      "step": 735
    },
    {
      "epoch": 0.20798201236649802,
      "grad_norm": 2.070199966430664,
      "learning_rate": 8.319280494659922e-05,
      "loss": 0.999,
      "step": 740
    },
    {
      "epoch": 0.20938729623383923,
      "grad_norm": 5.528500556945801,
      "learning_rate": 8.375491849353569e-05,
      "loss": 0.9958,
      "step": 745
    },
    {
      "epoch": 0.21079258010118043,
      "grad_norm": 5.513944149017334,
      "learning_rate": 8.431703204047218e-05,
      "loss": 0.9946,
      "step": 750
    },
    {
      "epoch": 0.21219786396852164,
      "grad_norm": 2.3669729232788086,
      "learning_rate": 8.487914558740866e-05,
      "loss": 0.9898,
      "step": 755
    },
    {
      "epoch": 0.21360314783586284,
      "grad_norm": 6.63640022277832,
      "learning_rate": 8.544125913434514e-05,
      "loss": 1.0013,
      "step": 760
    },
    {
      "epoch": 0.21500843170320405,
      "grad_norm": 4.0495219230651855,
      "learning_rate": 8.600337268128162e-05,
      "loss": 1.0017,
      "step": 765
    },
    {
      "epoch": 0.21641371557054526,
      "grad_norm": 2.7395238876342773,
      "learning_rate": 8.656548622821811e-05,
      "loss": 0.999,
      "step": 770
    },
    {
      "epoch": 0.21781899943788646,
      "grad_norm": 3.2042696475982666,
      "learning_rate": 8.712759977515458e-05,
      "loss": 0.9674,
      "step": 775
    },
    {
      "epoch": 0.21922428330522767,
      "grad_norm": 4.542521953582764,
      "learning_rate": 8.768971332209107e-05,
      "loss": 0.9904,
      "step": 780
    },
    {
      "epoch": 0.22062956717256885,
      "grad_norm": 2.6508102416992188,
      "learning_rate": 8.825182686902755e-05,
      "loss": 0.9767,
      "step": 785
    },
    {
      "epoch": 0.22203485103991005,
      "grad_norm": 2.0267133712768555,
      "learning_rate": 8.881394041596402e-05,
      "loss": 0.9847,
      "step": 790
    },
    {
      "epoch": 0.22344013490725126,
      "grad_norm": 1.6072683334350586,
      "learning_rate": 8.93760539629005e-05,
      "loss": 0.9746,
      "step": 795
    },
    {
      "epoch": 0.22484541877459246,
      "grad_norm": 17.596202850341797,
      "learning_rate": 8.9938167509837e-05,
      "loss": 0.9854,
      "step": 800
    },
    {
      "epoch": 0.22625070264193367,
      "grad_norm": 3.559966564178467,
      "learning_rate": 9.050028105677347e-05,
      "loss": 1.0136,
      "step": 805
    },
    {
      "epoch": 0.22765598650927488,
      "grad_norm": 4.577120780944824,
      "learning_rate": 9.106239460370995e-05,
      "loss": 1.0072,
      "step": 810
    },
    {
      "epoch": 0.22906127037661608,
      "grad_norm": 5.16311502456665,
      "learning_rate": 9.162450815064644e-05,
      "loss": 1.0027,
      "step": 815
    },
    {
      "epoch": 0.2304665542439573,
      "grad_norm": 6.010768413543701,
      "learning_rate": 9.218662169758291e-05,
      "loss": 1.0,
      "step": 820
    },
    {
      "epoch": 0.2318718381112985,
      "grad_norm": 3.002239942550659,
      "learning_rate": 9.27487352445194e-05,
      "loss": 0.9899,
      "step": 825
    },
    {
      "epoch": 0.23327712197863967,
      "grad_norm": 4.010767459869385,
      "learning_rate": 9.331084879145588e-05,
      "loss": 0.9793,
      "step": 830
    },
    {
      "epoch": 0.23468240584598088,
      "grad_norm": 2.6313583850860596,
      "learning_rate": 9.387296233839236e-05,
      "loss": 0.9974,
      "step": 835
    },
    {
      "epoch": 0.23608768971332209,
      "grad_norm": 2.2778332233428955,
      "learning_rate": 9.443507588532884e-05,
      "loss": 0.9862,
      "step": 840
    },
    {
      "epoch": 0.2374929735806633,
      "grad_norm": 2.9743850231170654,
      "learning_rate": 9.499718943226532e-05,
      "loss": 0.988,
      "step": 845
    },
    {
      "epoch": 0.2388982574480045,
      "grad_norm": 38.51722717285156,
      "learning_rate": 9.55593029792018e-05,
      "loss": 1.0115,
      "step": 850
    },
    {
      "epoch": 0.2403035413153457,
      "grad_norm": 2.403254508972168,
      "learning_rate": 9.612141652613828e-05,
      "loss": 1.0015,
      "step": 855
    },
    {
      "epoch": 0.2417088251826869,
      "grad_norm": 5.464376449584961,
      "learning_rate": 9.668353007307477e-05,
      "loss": 1.0364,
      "step": 860
    },
    {
      "epoch": 0.24311410905002812,
      "grad_norm": 3.9224069118499756,
      "learning_rate": 9.724564362001124e-05,
      "loss": 0.996,
      "step": 865
    },
    {
      "epoch": 0.24451939291736932,
      "grad_norm": 3.181598424911499,
      "learning_rate": 9.780775716694773e-05,
      "loss": 0.9846,
      "step": 870
    },
    {
      "epoch": 0.2459246767847105,
      "grad_norm": 3.1867926120758057,
      "learning_rate": 9.83698707138842e-05,
      "loss": 0.976,
      "step": 875
    },
    {
      "epoch": 0.2473299606520517,
      "grad_norm": 3.488450765609741,
      "learning_rate": 9.893198426082069e-05,
      "loss": 0.9664,
      "step": 880
    },
    {
      "epoch": 0.2487352445193929,
      "grad_norm": 3.7854244709014893,
      "learning_rate": 9.949409780775717e-05,
      "loss": 0.9857,
      "step": 885
    },
    {
      "epoch": 0.2501405283867341,
      "grad_norm": 23.101707458496094,
      "learning_rate": 0.00010005621135469365,
      "loss": 0.9901,
      "step": 890
    },
    {
      "epoch": 0.2515458122540753,
      "grad_norm": 1.0758060216903687,
      "learning_rate": 0.00010061832490163013,
      "loss": 0.9794,
      "step": 895
    },
    {
      "epoch": 0.25295109612141653,
      "grad_norm": 2.6756820678710938,
      "learning_rate": 0.00010118043844856661,
      "loss": 1.006,
      "step": 900
    },
    {
      "epoch": 0.2543563799887577,
      "grad_norm": 3.2079479694366455,
      "learning_rate": 0.00010174255199550309,
      "loss": 0.9816,
      "step": 905
    },
    {
      "epoch": 0.25576166385609894,
      "grad_norm": 2.79168963432312,
      "learning_rate": 0.00010230466554243957,
      "loss": 0.9726,
      "step": 910
    },
    {
      "epoch": 0.2571669477234401,
      "grad_norm": 3.4075028896331787,
      "learning_rate": 0.00010286677908937605,
      "loss": 1.0016,
      "step": 915
    },
    {
      "epoch": 0.25857223159078135,
      "grad_norm": 2.6918251514434814,
      "learning_rate": 0.00010342889263631255,
      "loss": 0.9852,
      "step": 920
    },
    {
      "epoch": 0.25997751545812253,
      "grad_norm": 1.3752806186676025,
      "learning_rate": 0.00010399100618324902,
      "loss": 0.997,
      "step": 925
    },
    {
      "epoch": 0.26138279932546377,
      "grad_norm": 1.351408839225769,
      "learning_rate": 0.00010455311973018551,
      "loss": 0.9827,
      "step": 930
    },
    {
      "epoch": 0.26278808319280494,
      "grad_norm": 2.1732945442199707,
      "learning_rate": 0.00010511523327712198,
      "loss": 0.9762,
      "step": 935
    },
    {
      "epoch": 0.2641933670601461,
      "grad_norm": 2.2650272846221924,
      "learning_rate": 0.00010567734682405846,
      "loss": 0.9887,
      "step": 940
    },
    {
      "epoch": 0.26559865092748736,
      "grad_norm": 1.916999101638794,
      "learning_rate": 0.00010623946037099494,
      "loss": 0.9847,
      "step": 945
    },
    {
      "epoch": 0.26700393479482853,
      "grad_norm": 2.3614659309387207,
      "learning_rate": 0.00010680157391793142,
      "loss": 0.9843,
      "step": 950
    },
    {
      "epoch": 0.26840921866216977,
      "grad_norm": 0.9917680621147156,
      "learning_rate": 0.00010736368746486792,
      "loss": 0.9775,
      "step": 955
    },
    {
      "epoch": 0.26981450252951095,
      "grad_norm": 5.091005802154541,
      "learning_rate": 0.00010792580101180438,
      "loss": 0.9921,
      "step": 960
    },
    {
      "epoch": 0.2712197863968522,
      "grad_norm": 4.194843769073486,
      "learning_rate": 0.00010848791455874088,
      "loss": 0.9906,
      "step": 965
    },
    {
      "epoch": 0.27262507026419336,
      "grad_norm": 12.555365562438965,
      "learning_rate": 0.00010905002810567735,
      "loss": 0.983,
      "step": 970
    },
    {
      "epoch": 0.2740303541315346,
      "grad_norm": 6.976730823516846,
      "learning_rate": 0.00010961214165261384,
      "loss": 0.9863,
      "step": 975
    },
    {
      "epoch": 0.27543563799887577,
      "grad_norm": 5.128290176391602,
      "learning_rate": 0.00011017425519955031,
      "loss": 0.9908,
      "step": 980
    },
    {
      "epoch": 0.27684092186621695,
      "grad_norm": 1.3355897665023804,
      "learning_rate": 0.00011073636874648679,
      "loss": 0.9693,
      "step": 985
    },
    {
      "epoch": 0.2782462057335582,
      "grad_norm": 5.7703680992126465,
      "learning_rate": 0.00011129848229342327,
      "loss": 0.9829,
      "step": 990
    },
    {
      "epoch": 0.27965148960089936,
      "grad_norm": 3.747619867324829,
      "learning_rate": 0.00011186059584035975,
      "loss": 0.9776,
      "step": 995
    },
    {
      "epoch": 0.2810567734682406,
      "grad_norm": 3.929070472717285,
      "learning_rate": 0.00011242270938729625,
      "loss": 0.9846,
      "step": 1000
    },
    {
      "epoch": 0.2824620573355818,
      "grad_norm": 1.7742552757263184,
      "learning_rate": 0.00011298482293423271,
      "loss": 0.9908,
      "step": 1005
    },
    {
      "epoch": 0.283867341202923,
      "grad_norm": 6.783897399902344,
      "learning_rate": 0.00011354693648116921,
      "loss": 0.963,
      "step": 1010
    },
    {
      "epoch": 0.2852726250702642,
      "grad_norm": 1.7316850423812866,
      "learning_rate": 0.00011410905002810568,
      "loss": 0.9804,
      "step": 1015
    },
    {
      "epoch": 0.2866779089376054,
      "grad_norm": 1.1217191219329834,
      "learning_rate": 0.00011467116357504217,
      "loss": 0.9709,
      "step": 1020
    },
    {
      "epoch": 0.2880831928049466,
      "grad_norm": 5.882577896118164,
      "learning_rate": 0.00011523327712197864,
      "loss": 1.0065,
      "step": 1025
    },
    {
      "epoch": 0.2894884766722878,
      "grad_norm": 2.939326524734497,
      "learning_rate": 0.00011579539066891512,
      "loss": 0.9849,
      "step": 1030
    },
    {
      "epoch": 0.290893760539629,
      "grad_norm": 3.1381020545959473,
      "learning_rate": 0.0001163575042158516,
      "loss": 1.0186,
      "step": 1035
    },
    {
      "epoch": 0.2922990444069702,
      "grad_norm": 0.9430264234542847,
      "learning_rate": 0.00011691961776278808,
      "loss": 0.9546,
      "step": 1040
    },
    {
      "epoch": 0.2937043282743114,
      "grad_norm": 1.3062149286270142,
      "learning_rate": 0.00011748173130972458,
      "loss": 0.9848,
      "step": 1045
    },
    {
      "epoch": 0.2951096121416526,
      "grad_norm": 1.2571016550064087,
      "learning_rate": 0.00011804384485666105,
      "loss": 0.9755,
      "step": 1050
    },
    {
      "epoch": 0.29651489600899383,
      "grad_norm": 0.9796061515808105,
      "learning_rate": 0.00011860595840359754,
      "loss": 0.984,
      "step": 1055
    },
    {
      "epoch": 0.297920179876335,
      "grad_norm": 1.193790316581726,
      "learning_rate": 0.00011916807195053401,
      "loss": 0.964,
      "step": 1060
    },
    {
      "epoch": 0.29932546374367625,
      "grad_norm": 4.409701347351074,
      "learning_rate": 0.0001197301854974705,
      "loss": 0.9847,
      "step": 1065
    },
    {
      "epoch": 0.3007307476110174,
      "grad_norm": 1.948830246925354,
      "learning_rate": 0.00012029229904440697,
      "loss": 1.0549,
      "step": 1070
    },
    {
      "epoch": 0.3021360314783586,
      "grad_norm": 3.3599255084991455,
      "learning_rate": 0.00012085441259134345,
      "loss": 0.9967,
      "step": 1075
    },
    {
      "epoch": 0.30354131534569984,
      "grad_norm": 2.4269542694091797,
      "learning_rate": 0.00012141652613827993,
      "loss": 0.983,
      "step": 1080
    },
    {
      "epoch": 0.304946599213041,
      "grad_norm": 0.9867331385612488,
      "learning_rate": 0.00012197863968521641,
      "loss": 0.9672,
      "step": 1085
    },
    {
      "epoch": 0.30635188308038225,
      "grad_norm": 1.6596901416778564,
      "learning_rate": 0.0001225407532321529,
      "loss": 0.9783,
      "step": 1090
    },
    {
      "epoch": 0.3077571669477234,
      "grad_norm": 3.4487783908843994,
      "learning_rate": 0.00012310286677908938,
      "loss": 0.9693,
      "step": 1095
    },
    {
      "epoch": 0.30916245081506466,
      "grad_norm": 1.8991751670837402,
      "learning_rate": 0.00012366498032602587,
      "loss": 0.9756,
      "step": 1100
    },
    {
      "epoch": 0.31056773468240584,
      "grad_norm": 1.1805132627487183,
      "learning_rate": 0.00012422709387296233,
      "loss": 0.9629,
      "step": 1105
    },
    {
      "epoch": 0.31197301854974707,
      "grad_norm": 2.110664129257202,
      "learning_rate": 0.00012478920741989884,
      "loss": 0.9692,
      "step": 1110
    },
    {
      "epoch": 0.31337830241708825,
      "grad_norm": 1.4803400039672852,
      "learning_rate": 0.0001253513209668353,
      "loss": 0.9595,
      "step": 1115
    },
    {
      "epoch": 0.31478358628442943,
      "grad_norm": 4.135382652282715,
      "learning_rate": 0.0001259134345137718,
      "loss": 0.9667,
      "step": 1120
    },
    {
      "epoch": 0.31618887015177066,
      "grad_norm": 1.764575719833374,
      "learning_rate": 0.00012647554806070828,
      "loss": 0.9922,
      "step": 1125
    },
    {
      "epoch": 0.31759415401911184,
      "grad_norm": 1.1572849750518799,
      "learning_rate": 0.00012703766160764474,
      "loss": 0.9887,
      "step": 1130
    },
    {
      "epoch": 0.3189994378864531,
      "grad_norm": 1.766047477722168,
      "learning_rate": 0.00012759977515458123,
      "loss": 0.9518,
      "step": 1135
    },
    {
      "epoch": 0.32040472175379425,
      "grad_norm": 0.9366393685340881,
      "learning_rate": 0.0001281618887015177,
      "loss": 0.9745,
      "step": 1140
    },
    {
      "epoch": 0.3218100056211355,
      "grad_norm": 2.0685994625091553,
      "learning_rate": 0.0001287240022484542,
      "loss": 0.9711,
      "step": 1145
    },
    {
      "epoch": 0.32321528948847666,
      "grad_norm": 1.1759870052337646,
      "learning_rate": 0.00012928611579539066,
      "loss": 0.9752,
      "step": 1150
    },
    {
      "epoch": 0.3246205733558179,
      "grad_norm": 1.212844967842102,
      "learning_rate": 0.00012984822934232717,
      "loss": 0.9411,
      "step": 1155
    },
    {
      "epoch": 0.3260258572231591,
      "grad_norm": 3.3538694381713867,
      "learning_rate": 0.00013041034288926363,
      "loss": 0.9753,
      "step": 1160
    },
    {
      "epoch": 0.32743114109050026,
      "grad_norm": 2.4922068119049072,
      "learning_rate": 0.00013097245643620012,
      "loss": 0.9722,
      "step": 1165
    },
    {
      "epoch": 0.3288364249578415,
      "grad_norm": 38.42106628417969,
      "learning_rate": 0.0001315345699831366,
      "loss": 0.9676,
      "step": 1170
    },
    {
      "epoch": 0.33024170882518267,
      "grad_norm": 1.5579969882965088,
      "learning_rate": 0.00013209668353007307,
      "loss": 1.0097,
      "step": 1175
    },
    {
      "epoch": 0.3316469926925239,
      "grad_norm": 4.966186046600342,
      "learning_rate": 0.00013265879707700956,
      "loss": 0.9647,
      "step": 1180
    },
    {
      "epoch": 0.3330522765598651,
      "grad_norm": 3.0127198696136475,
      "learning_rate": 0.00013322091062394604,
      "loss": 0.9513,
      "step": 1185
    },
    {
      "epoch": 0.3344575604272063,
      "grad_norm": 1.1111674308776855,
      "learning_rate": 0.00013378302417088253,
      "loss": 0.9383,
      "step": 1190
    },
    {
      "epoch": 0.3358628442945475,
      "grad_norm": 1.101528525352478,
      "learning_rate": 0.000134345137717819,
      "loss": 0.9508,
      "step": 1195
    },
    {
      "epoch": 0.3372681281618887,
      "grad_norm": 4.363356113433838,
      "learning_rate": 0.0001349072512647555,
      "loss": 0.9496,
      "step": 1200
    },
    {
      "epoch": 0.3386734120292299,
      "grad_norm": 1.4446074962615967,
      "learning_rate": 0.00013546936481169196,
      "loss": 0.9652,
      "step": 1205
    },
    {
      "epoch": 0.3400786958965711,
      "grad_norm": 5.832637786865234,
      "learning_rate": 0.00013603147835862845,
      "loss": 0.976,
      "step": 1210
    },
    {
      "epoch": 0.3414839797639123,
      "grad_norm": 3.8764944076538086,
      "learning_rate": 0.00013659359190556494,
      "loss": 0.9526,
      "step": 1215
    },
    {
      "epoch": 0.3428892636312535,
      "grad_norm": 6.050147533416748,
      "learning_rate": 0.0001371557054525014,
      "loss": 0.9262,
      "step": 1220
    },
    {
      "epoch": 0.3442945474985947,
      "grad_norm": 2.3198440074920654,
      "learning_rate": 0.00013771781899943789,
      "loss": 0.9513,
      "step": 1225
    },
    {
      "epoch": 0.3456998313659359,
      "grad_norm": 10.792181015014648,
      "learning_rate": 0.00013827993254637437,
      "loss": 0.9447,
      "step": 1230
    },
    {
      "epoch": 0.34710511523327714,
      "grad_norm": 2.717088222503662,
      "learning_rate": 0.00013884204609331086,
      "loss": 0.9519,
      "step": 1235
    },
    {
      "epoch": 0.3485103991006183,
      "grad_norm": 1.6102532148361206,
      "learning_rate": 0.00013940415964024735,
      "loss": 0.9635,
      "step": 1240
    },
    {
      "epoch": 0.34991568296795955,
      "grad_norm": 8.83648681640625,
      "learning_rate": 0.00013996627318718383,
      "loss": 1.0115,
      "step": 1245
    },
    {
      "epoch": 0.35132096683530073,
      "grad_norm": 2.410649299621582,
      "learning_rate": 0.0001405283867341203,
      "loss": 0.9562,
      "step": 1250
    },
    {
      "epoch": 0.3527262507026419,
      "grad_norm": 1.7195380926132202,
      "learning_rate": 0.00014109050028105678,
      "loss": 0.9423,
      "step": 1255
    },
    {
      "epoch": 0.35413153456998314,
      "grad_norm": 2.283205270767212,
      "learning_rate": 0.00014165261382799327,
      "loss": 0.9423,
      "step": 1260
    },
    {
      "epoch": 0.3555368184373243,
      "grad_norm": 1.9860799312591553,
      "learning_rate": 0.00014221472737492973,
      "loss": 0.9557,
      "step": 1265
    },
    {
      "epoch": 0.35694210230466555,
      "grad_norm": 2.0284266471862793,
      "learning_rate": 0.00014277684092186622,
      "loss": 0.9422,
      "step": 1270
    },
    {
      "epoch": 0.35834738617200673,
      "grad_norm": 2.489513635635376,
      "learning_rate": 0.0001433389544688027,
      "loss": 0.9776,
      "step": 1275
    },
    {
      "epoch": 0.35975267003934797,
      "grad_norm": 1.5750313997268677,
      "learning_rate": 0.0001439010680157392,
      "loss": 0.9573,
      "step": 1280
    },
    {
      "epoch": 0.36115795390668914,
      "grad_norm": 3.0681686401367188,
      "learning_rate": 0.00014446318156267568,
      "loss": 0.9514,
      "step": 1285
    },
    {
      "epoch": 0.3625632377740304,
      "grad_norm": 3.1011362075805664,
      "learning_rate": 0.00014502529510961216,
      "loss": 0.9721,
      "step": 1290
    },
    {
      "epoch": 0.36396852164137156,
      "grad_norm": 1.5543673038482666,
      "learning_rate": 0.00014558740865654862,
      "loss": 0.9467,
      "step": 1295
    },
    {
      "epoch": 0.36537380550871273,
      "grad_norm": 2.299344539642334,
      "learning_rate": 0.0001461495222034851,
      "loss": 0.9351,
      "step": 1300
    },
    {
      "epoch": 0.36677908937605397,
      "grad_norm": 3.671466588973999,
      "learning_rate": 0.0001467116357504216,
      "loss": 0.967,
      "step": 1305
    },
    {
      "epoch": 0.36818437324339515,
      "grad_norm": 1.6322294473648071,
      "learning_rate": 0.00014727374929735806,
      "loss": 0.9511,
      "step": 1310
    },
    {
      "epoch": 0.3695896571107364,
      "grad_norm": 1.7458432912826538,
      "learning_rate": 0.00014783586284429457,
      "loss": 1.0559,
      "step": 1315
    },
    {
      "epoch": 0.37099494097807756,
      "grad_norm": 1.6651675701141357,
      "learning_rate": 0.00014839797639123103,
      "loss": 0.9355,
      "step": 1320
    },
    {
      "epoch": 0.3724002248454188,
      "grad_norm": 3.2860312461853027,
      "learning_rate": 0.00014896008993816752,
      "loss": 0.9926,
      "step": 1325
    },
    {
      "epoch": 0.37380550871275997,
      "grad_norm": 1.6739774942398071,
      "learning_rate": 0.000149522203485104,
      "loss": 0.9617,
      "step": 1330
    },
    {
      "epoch": 0.3752107925801012,
      "grad_norm": 1.423102855682373,
      "learning_rate": 0.0001500843170320405,
      "loss": 0.9352,
      "step": 1335
    },
    {
      "epoch": 0.3766160764474424,
      "grad_norm": 4.285151958465576,
      "learning_rate": 0.00015064643057897695,
      "loss": 0.9744,
      "step": 1340
    },
    {
      "epoch": 0.37802136031478356,
      "grad_norm": 0.7504451870918274,
      "learning_rate": 0.00015120854412591344,
      "loss": 0.9643,
      "step": 1345
    },
    {
      "epoch": 0.3794266441821248,
      "grad_norm": 2.98614764213562,
      "learning_rate": 0.00015177065767284993,
      "loss": 0.941,
      "step": 1350
    },
    {
      "epoch": 0.380831928049466,
      "grad_norm": 2.592519521713257,
      "learning_rate": 0.0001523327712197864,
      "loss": 1.0328,
      "step": 1355
    },
    {
      "epoch": 0.3822372119168072,
      "grad_norm": 1.2264142036437988,
      "learning_rate": 0.0001528948847667229,
      "loss": 0.9446,
      "step": 1360
    },
    {
      "epoch": 0.3836424957841484,
      "grad_norm": 1.3170299530029297,
      "learning_rate": 0.00015345699831365936,
      "loss": 0.9687,
      "step": 1365
    },
    {
      "epoch": 0.3850477796514896,
      "grad_norm": 1.306921362876892,
      "learning_rate": 0.00015401911186059585,
      "loss": 0.9547,
      "step": 1370
    },
    {
      "epoch": 0.3864530635188308,
      "grad_norm": 1.5683096647262573,
      "learning_rate": 0.00015458122540753234,
      "loss": 0.9549,
      "step": 1375
    },
    {
      "epoch": 0.38785834738617203,
      "grad_norm": 1.1363381147384644,
      "learning_rate": 0.00015514333895446882,
      "loss": 0.9694,
      "step": 1380
    },
    {
      "epoch": 0.3892636312535132,
      "grad_norm": 1.0660864114761353,
      "learning_rate": 0.00015570545250140528,
      "loss": 0.9643,
      "step": 1385
    },
    {
      "epoch": 0.3906689151208544,
      "grad_norm": 1.0931308269500732,
      "learning_rate": 0.00015626756604834177,
      "loss": 0.9533,
      "step": 1390
    },
    {
      "epoch": 0.3920741989881956,
      "grad_norm": 1.278201937675476,
      "learning_rate": 0.00015682967959527826,
      "loss": 0.9554,
      "step": 1395
    },
    {
      "epoch": 0.3934794828555368,
      "grad_norm": 0.7979600429534912,
      "learning_rate": 0.00015739179314221472,
      "loss": 0.9313,
      "step": 1400
    },
    {
      "epoch": 0.39488476672287803,
      "grad_norm": 2.599208116531372,
      "learning_rate": 0.00015795390668915123,
      "loss": 0.942,
      "step": 1405
    },
    {
      "epoch": 0.3962900505902192,
      "grad_norm": 1.375581979751587,
      "learning_rate": 0.0001585160202360877,
      "loss": 0.9513,
      "step": 1410
    },
    {
      "epoch": 0.39769533445756045,
      "grad_norm": 1.6056209802627563,
      "learning_rate": 0.00015907813378302418,
      "loss": 0.9683,
      "step": 1415
    },
    {
      "epoch": 0.3991006183249016,
      "grad_norm": 2.402937173843384,
      "learning_rate": 0.00015964024732996067,
      "loss": 0.9309,
      "step": 1420
    },
    {
      "epoch": 0.40050590219224286,
      "grad_norm": 4.955761909484863,
      "learning_rate": 0.00016020236087689715,
      "loss": 0.9383,
      "step": 1425
    },
    {
      "epoch": 0.40191118605958404,
      "grad_norm": 0.937556266784668,
      "learning_rate": 0.00016076447442383361,
      "loss": 0.9778,
      "step": 1430
    },
    {
      "epoch": 0.4033164699269252,
      "grad_norm": 0.992476761341095,
      "learning_rate": 0.0001613265879707701,
      "loss": 0.9425,
      "step": 1435
    },
    {
      "epoch": 0.40472175379426645,
      "grad_norm": 0.7373234629631042,
      "learning_rate": 0.0001618887015177066,
      "loss": 0.9552,
      "step": 1440
    },
    {
      "epoch": 0.4061270376616076,
      "grad_norm": 2.2464535236358643,
      "learning_rate": 0.00016245081506464305,
      "loss": 0.9572,
      "step": 1445
    },
    {
      "epoch": 0.40753232152894886,
      "grad_norm": 2.751627206802368,
      "learning_rate": 0.00016301292861157956,
      "loss": 0.9556,
      "step": 1450
    },
    {
      "epoch": 0.40893760539629004,
      "grad_norm": 0.9762445688247681,
      "learning_rate": 0.00016357504215851602,
      "loss": 0.9433,
      "step": 1455
    },
    {
      "epoch": 0.41034288926363127,
      "grad_norm": 2.10992431640625,
      "learning_rate": 0.0001641371557054525,
      "loss": 0.95,
      "step": 1460
    },
    {
      "epoch": 0.41174817313097245,
      "grad_norm": 2.1031289100646973,
      "learning_rate": 0.000164699269252389,
      "loss": 0.9414,
      "step": 1465
    },
    {
      "epoch": 0.4131534569983137,
      "grad_norm": 1.6147500276565552,
      "learning_rate": 0.00016526138279932548,
      "loss": 0.9454,
      "step": 1470
    },
    {
      "epoch": 0.41455874086565486,
      "grad_norm": 2.729752779006958,
      "learning_rate": 0.00016582349634626194,
      "loss": 0.9483,
      "step": 1475
    },
    {
      "epoch": 0.41596402473299604,
      "grad_norm": 0.8766548037528992,
      "learning_rate": 0.00016638560989319843,
      "loss": 0.9263,
      "step": 1480
    },
    {
      "epoch": 0.4173693086003373,
      "grad_norm": 1.7011388540267944,
      "learning_rate": 0.00016694772344013492,
      "loss": 0.985,
      "step": 1485
    },
    {
      "epoch": 0.41877459246767845,
      "grad_norm": 1.1635528802871704,
      "learning_rate": 0.00016750983698707138,
      "loss": 0.9511,
      "step": 1490
    },
    {
      "epoch": 0.4201798763350197,
      "grad_norm": 1.0107743740081787,
      "learning_rate": 0.0001680719505340079,
      "loss": 0.9545,
      "step": 1495
    },
    {
      "epoch": 0.42158516020236086,
      "grad_norm": 1.4937266111373901,
      "learning_rate": 0.00016863406408094435,
      "loss": 0.9632,
      "step": 1500
    },
    {
      "epoch": 0.4229904440697021,
      "grad_norm": 0.9577686190605164,
      "learning_rate": 0.00016919617762788084,
      "loss": 0.9236,
      "step": 1505
    },
    {
      "epoch": 0.4243957279370433,
      "grad_norm": 1.1507699489593506,
      "learning_rate": 0.00016975829117481733,
      "loss": 0.9276,
      "step": 1510
    },
    {
      "epoch": 0.4258010118043845,
      "grad_norm": 1.34715735912323,
      "learning_rate": 0.00017032040472175381,
      "loss": 0.9482,
      "step": 1515
    },
    {
      "epoch": 0.4272062956717257,
      "grad_norm": 1.894851803779602,
      "learning_rate": 0.00017088251826869027,
      "loss": 0.9504,
      "step": 1520
    },
    {
      "epoch": 0.42861157953906687,
      "grad_norm": 2.9545974731445312,
      "learning_rate": 0.00017144463181562676,
      "loss": 0.9504,
      "step": 1525
    },
    {
      "epoch": 0.4300168634064081,
      "grad_norm": 1.8173575401306152,
      "learning_rate": 0.00017200674536256325,
      "loss": 0.9181,
      "step": 1530
    },
    {
      "epoch": 0.4314221472737493,
      "grad_norm": 0.6965020298957825,
      "learning_rate": 0.0001725688589094997,
      "loss": 0.9325,
      "step": 1535
    },
    {
      "epoch": 0.4328274311410905,
      "grad_norm": 0.732700526714325,
      "learning_rate": 0.00017313097245643622,
      "loss": 0.963,
      "step": 1540
    },
    {
      "epoch": 0.4342327150084317,
      "grad_norm": 0.7434464693069458,
      "learning_rate": 0.00017369308600337268,
      "loss": 0.9319,
      "step": 1545
    },
    {
      "epoch": 0.4356379988757729,
      "grad_norm": 0.7588825821876526,
      "learning_rate": 0.00017425519955030917,
      "loss": 0.9438,
      "step": 1550
    },
    {
      "epoch": 0.4370432827431141,
      "grad_norm": 1.028361201286316,
      "learning_rate": 0.00017481731309724566,
      "loss": 0.9543,
      "step": 1555
    },
    {
      "epoch": 0.43844856661045534,
      "grad_norm": 0.9480350613594055,
      "learning_rate": 0.00017537942664418214,
      "loss": 0.9612,
      "step": 1560
    },
    {
      "epoch": 0.4398538504777965,
      "grad_norm": 0.8604362607002258,
      "learning_rate": 0.0001759415401911186,
      "loss": 0.9319,
      "step": 1565
    },
    {
      "epoch": 0.4412591343451377,
      "grad_norm": 1.40911066532135,
      "learning_rate": 0.0001765036537380551,
      "loss": 0.9613,
      "step": 1570
    },
    {
      "epoch": 0.4426644182124789,
      "grad_norm": 0.7593716979026794,
      "learning_rate": 0.00017706576728499158,
      "loss": 0.9468,
      "step": 1575
    },
    {
      "epoch": 0.4440697020798201,
      "grad_norm": 1.2685699462890625,
      "learning_rate": 0.00017762788083192804,
      "loss": 0.9466,
      "step": 1580
    },
    {
      "epoch": 0.44547498594716134,
      "grad_norm": 0.813873291015625,
      "learning_rate": 0.00017818999437886455,
      "loss": 0.9277,
      "step": 1585
    },
    {
      "epoch": 0.4468802698145025,
      "grad_norm": 2.493716239929199,
      "learning_rate": 0.000178752107925801,
      "loss": 0.9486,
      "step": 1590
    },
    {
      "epoch": 0.44828555368184375,
      "grad_norm": 4.12313175201416,
      "learning_rate": 0.0001793142214727375,
      "loss": 0.9498,
      "step": 1595
    },
    {
      "epoch": 0.44969083754918493,
      "grad_norm": 2.0010995864868164,
      "learning_rate": 0.000179876335019674,
      "loss": 0.9319,
      "step": 1600
    },
    {
      "epoch": 0.45109612141652616,
      "grad_norm": 1.3007354736328125,
      "learning_rate": 0.00018043844856661047,
      "loss": 0.9427,
      "step": 1605
    },
    {
      "epoch": 0.45250140528386734,
      "grad_norm": 2.6882872581481934,
      "learning_rate": 0.00018100056211354693,
      "loss": 0.9784,
      "step": 1610
    },
    {
      "epoch": 0.4539066891512085,
      "grad_norm": 0.7096717953681946,
      "learning_rate": 0.00018156267566048342,
      "loss": 0.9584,
      "step": 1615
    },
    {
      "epoch": 0.45531197301854975,
      "grad_norm": 0.6443789601325989,
      "learning_rate": 0.0001821247892074199,
      "loss": 0.939,
      "step": 1620
    },
    {
      "epoch": 0.45671725688589093,
      "grad_norm": 0.7829910516738892,
      "learning_rate": 0.00018268690275435637,
      "loss": 0.9273,
      "step": 1625
    },
    {
      "epoch": 0.45812254075323217,
      "grad_norm": 1.2124425172805786,
      "learning_rate": 0.00018324901630129288,
      "loss": 0.9321,
      "step": 1630
    },
    {
      "epoch": 0.45952782462057334,
      "grad_norm": 7.380026340484619,
      "learning_rate": 0.00018381112984822934,
      "loss": 1.0049,
      "step": 1635
    },
    {
      "epoch": 0.4609331084879146,
      "grad_norm": 1.8242307901382446,
      "learning_rate": 0.00018437324339516583,
      "loss": 0.9445,
      "step": 1640
    },
    {
      "epoch": 0.46233839235525576,
      "grad_norm": 1.4180879592895508,
      "learning_rate": 0.00018493535694210232,
      "loss": 0.946,
      "step": 1645
    },
    {
      "epoch": 0.463743676222597,
      "grad_norm": 2.3717854022979736,
      "learning_rate": 0.0001854974704890388,
      "loss": 0.9333,
      "step": 1650
    },
    {
      "epoch": 0.46514896008993817,
      "grad_norm": 4.793551921844482,
      "learning_rate": 0.00018605958403597526,
      "loss": 0.9427,
      "step": 1655
    },
    {
      "epoch": 0.46655424395727935,
      "grad_norm": 1.434067726135254,
      "learning_rate": 0.00018662169758291175,
      "loss": 0.9378,
      "step": 1660
    },
    {
      "epoch": 0.4679595278246206,
      "grad_norm": 1.8079577684402466,
      "learning_rate": 0.00018718381112984824,
      "loss": 0.9234,
      "step": 1665
    },
    {
      "epoch": 0.46936481169196176,
      "grad_norm": 2.289116144180298,
      "learning_rate": 0.00018774592467678473,
      "loss": 0.9345,
      "step": 1670
    },
    {
      "epoch": 0.470770095559303,
      "grad_norm": 1.7819665670394897,
      "learning_rate": 0.0001883080382237212,
      "loss": 0.9202,
      "step": 1675
    },
    {
      "epoch": 0.47217537942664417,
      "grad_norm": 1.405173897743225,
      "learning_rate": 0.00018887015177065767,
      "loss": 0.9222,
      "step": 1680
    },
    {
      "epoch": 0.4735806632939854,
      "grad_norm": 1.151394009590149,
      "learning_rate": 0.00018943226531759416,
      "loss": 0.9552,
      "step": 1685
    },
    {
      "epoch": 0.4749859471613266,
      "grad_norm": 1.7987416982650757,
      "learning_rate": 0.00018999437886453065,
      "loss": 0.9193,
      "step": 1690
    },
    {
      "epoch": 0.4763912310286678,
      "grad_norm": 3.1489951610565186,
      "learning_rate": 0.00019055649241146713,
      "loss": 0.973,
      "step": 1695
    },
    {
      "epoch": 0.477796514896009,
      "grad_norm": 1.2289990186691284,
      "learning_rate": 0.0001911186059584036,
      "loss": 0.9227,
      "step": 1700
    },
    {
      "epoch": 0.4792017987633502,
      "grad_norm": 1.305069923400879,
      "learning_rate": 0.00019168071950534008,
      "loss": 0.9307,
      "step": 1705
    },
    {
      "epoch": 0.4806070826306914,
      "grad_norm": 1.7787383794784546,
      "learning_rate": 0.00019224283305227657,
      "loss": 0.922,
      "step": 1710
    },
    {
      "epoch": 0.4820123664980326,
      "grad_norm": 1.4966998100280762,
      "learning_rate": 0.00019280494659921306,
      "loss": 0.9326,
      "step": 1715
    },
    {
      "epoch": 0.4834176503653738,
      "grad_norm": 1.7701624631881714,
      "learning_rate": 0.00019336706014614954,
      "loss": 0.95,
      "step": 1720
    },
    {
      "epoch": 0.484822934232715,
      "grad_norm": 3.242124319076538,
      "learning_rate": 0.000193929173693086,
      "loss": 1.0283,
      "step": 1725
    },
    {
      "epoch": 0.48622821810005623,
      "grad_norm": 1.4034018516540527,
      "learning_rate": 0.0001944912872400225,
      "loss": 0.9584,
      "step": 1730
    },
    {
      "epoch": 0.4876335019673974,
      "grad_norm": 1.9737906455993652,
      "learning_rate": 0.00019505340078695898,
      "loss": 0.9374,
      "step": 1735
    },
    {
      "epoch": 0.48903878583473864,
      "grad_norm": 0.9555981159210205,
      "learning_rate": 0.00019561551433389546,
      "loss": 0.9341,
      "step": 1740
    },
    {
      "epoch": 0.4904440697020798,
      "grad_norm": 1.5899726152420044,
      "learning_rate": 0.00019617762788083195,
      "loss": 0.9383,
      "step": 1745
    },
    {
      "epoch": 0.491849353569421,
      "grad_norm": 0.6762555837631226,
      "learning_rate": 0.0001967397414277684,
      "loss": 0.9336,
      "step": 1750
    },
    {
      "epoch": 0.49325463743676223,
      "grad_norm": 0.9318333268165588,
      "learning_rate": 0.0001973018549747049,
      "loss": 0.9246,
      "step": 1755
    },
    {
      "epoch": 0.4946599213041034,
      "grad_norm": 1.3311002254486084,
      "learning_rate": 0.00019786396852164139,
      "loss": 0.9285,
      "step": 1760
    },
    {
      "epoch": 0.49606520517144465,
      "grad_norm": 2.389007806777954,
      "learning_rate": 0.00019842608206857787,
      "loss": 0.923,
      "step": 1765
    },
    {
      "epoch": 0.4974704890387858,
      "grad_norm": 2.03153657913208,
      "learning_rate": 0.00019898819561551433,
      "loss": 0.9424,
      "step": 1770
    },
    {
      "epoch": 0.49887577290612706,
      "grad_norm": 2.2828330993652344,
      "learning_rate": 0.00019955030916245082,
      "loss": 0.9245,
      "step": 1775
    },
    {
      "epoch": 0.5002810567734682,
      "grad_norm": 0.7001500129699707,
      "learning_rate": 0.0001999999980749907,
      "loss": 1.002,
      "step": 1780
    },
    {
      "epoch": 0.5016863406408094,
      "grad_norm": 0.7825088500976562,
      "learning_rate": 0.0001999999306996728,
      "loss": 0.9381,
      "step": 1785
    },
    {
      "epoch": 0.5030916245081506,
      "grad_norm": 1.0128787755966187,
      "learning_rate": 0.0001999997670739637,
      "loss": 0.9247,
      "step": 1790
    },
    {
      "epoch": 0.5044969083754919,
      "grad_norm": 5.00535249710083,
      "learning_rate": 0.0001999995071980209,
      "loss": 0.9202,
      "step": 1795
    },
    {
      "epoch": 0.5059021922428331,
      "grad_norm": 1.0808401107788086,
      "learning_rate": 0.00019999915107209458,
      "loss": 0.9324,
      "step": 1800
    },
    {
      "epoch": 0.5073074761101742,
      "grad_norm": 2.2071526050567627,
      "learning_rate": 0.00019999869869652748,
      "loss": 0.9561,
      "step": 1805
    },
    {
      "epoch": 0.5087127599775154,
      "grad_norm": 3.5948903560638428,
      "learning_rate": 0.00019999815007175502,
      "loss": 0.937,
      "step": 1810
    },
    {
      "epoch": 0.5101180438448567,
      "grad_norm": 0.7620937824249268,
      "learning_rate": 0.00019999750519830522,
      "loss": 0.9281,
      "step": 1815
    },
    {
      "epoch": 0.5115233277121979,
      "grad_norm": 2.3497204780578613,
      "learning_rate": 0.00019999676407679885,
      "loss": 0.9289,
      "step": 1820
    },
    {
      "epoch": 0.5129286115795391,
      "grad_norm": 1.0564337968826294,
      "learning_rate": 0.00019999592670794916,
      "loss": 0.9256,
      "step": 1825
    },
    {
      "epoch": 0.5143338954468802,
      "grad_norm": 0.8735659122467041,
      "learning_rate": 0.00019999499309256215,
      "loss": 0.9301,
      "step": 1830
    },
    {
      "epoch": 0.5157391793142214,
      "grad_norm": 1.4924262762069702,
      "learning_rate": 0.00019999396323153645,
      "loss": 0.9426,
      "step": 1835
    },
    {
      "epoch": 0.5171444631815627,
      "grad_norm": 2.184305429458618,
      "learning_rate": 0.00019999283712586328,
      "loss": 0.9199,
      "step": 1840
    },
    {
      "epoch": 0.5185497470489039,
      "grad_norm": 0.9658164381980896,
      "learning_rate": 0.00019999161477662653,
      "loss": 0.9344,
      "step": 1845
    },
    {
      "epoch": 0.5199550309162451,
      "grad_norm": 0.8796664476394653,
      "learning_rate": 0.00019999029618500273,
      "loss": 0.9335,
      "step": 1850
    },
    {
      "epoch": 0.5213603147835862,
      "grad_norm": 1.1257429122924805,
      "learning_rate": 0.00019998888135226104,
      "loss": 0.9265,
      "step": 1855
    },
    {
      "epoch": 0.5227655986509275,
      "grad_norm": 1.2149803638458252,
      "learning_rate": 0.00019998737027976323,
      "loss": 0.9255,
      "step": 1860
    },
    {
      "epoch": 0.5241708825182687,
      "grad_norm": 1.6585772037506104,
      "learning_rate": 0.00019998576296896366,
      "loss": 0.9232,
      "step": 1865
    },
    {
      "epoch": 0.5255761663856099,
      "grad_norm": 1.0203379392623901,
      "learning_rate": 0.00019998405942140942,
      "loss": 0.9343,
      "step": 1870
    },
    {
      "epoch": 0.5269814502529511,
      "grad_norm": 1.212179183959961,
      "learning_rate": 0.00019998225963874022,
      "loss": 0.9311,
      "step": 1875
    },
    {
      "epoch": 0.5283867341202922,
      "grad_norm": 2.862008571624756,
      "learning_rate": 0.00019998036362268832,
      "loss": 0.9289,
      "step": 1880
    },
    {
      "epoch": 0.5297920179876335,
      "grad_norm": 1.0918841361999512,
      "learning_rate": 0.00019997837137507865,
      "loss": 0.948,
      "step": 1885
    },
    {
      "epoch": 0.5311973018549747,
      "grad_norm": 1.3842644691467285,
      "learning_rate": 0.00019997628289782874,
      "loss": 0.9262,
      "step": 1890
    },
    {
      "epoch": 0.5326025857223159,
      "grad_norm": 1.543491244316101,
      "learning_rate": 0.0001999740981929488,
      "loss": 0.9329,
      "step": 1895
    },
    {
      "epoch": 0.5340078695896571,
      "grad_norm": 1.2093950510025024,
      "learning_rate": 0.00019997181726254154,
      "loss": 0.9714,
      "step": 1900
    },
    {
      "epoch": 0.5354131534569984,
      "grad_norm": 0.9563074111938477,
      "learning_rate": 0.00019996944010880247,
      "loss": 0.9445,
      "step": 1905
    },
    {
      "epoch": 0.5368184373243395,
      "grad_norm": 0.952589750289917,
      "learning_rate": 0.00019996696673401954,
      "loss": 0.9308,
      "step": 1910
    },
    {
      "epoch": 0.5382237211916807,
      "grad_norm": 1.4094700813293457,
      "learning_rate": 0.0001999643971405734,
      "loss": 0.9215,
      "step": 1915
    },
    {
      "epoch": 0.5396290050590219,
      "grad_norm": 0.819480299949646,
      "learning_rate": 0.0001999617313309373,
      "loss": 0.9293,
      "step": 1920
    },
    {
      "epoch": 0.5410342889263631,
      "grad_norm": 0.8585676550865173,
      "learning_rate": 0.0001999589693076771,
      "loss": 0.9155,
      "step": 1925
    },
    {
      "epoch": 0.5424395727937044,
      "grad_norm": 1.154471755027771,
      "learning_rate": 0.00019995611107345127,
      "loss": 0.9389,
      "step": 1930
    },
    {
      "epoch": 0.5438448566610455,
      "grad_norm": 1.1028889417648315,
      "learning_rate": 0.00019995315663101082,
      "loss": 0.9127,
      "step": 1935
    },
    {
      "epoch": 0.5452501405283867,
      "grad_norm": 0.968929648399353,
      "learning_rate": 0.00019995010598319947,
      "loss": 0.9408,
      "step": 1940
    },
    {
      "epoch": 0.5466554243957279,
      "grad_norm": 1.0982134342193604,
      "learning_rate": 0.00019994695913295348,
      "loss": 0.9062,
      "step": 1945
    },
    {
      "epoch": 0.5480607082630692,
      "grad_norm": 0.6998859643936157,
      "learning_rate": 0.00019994371608330166,
      "loss": 0.9204,
      "step": 1950
    },
    {
      "epoch": 0.5494659921304104,
      "grad_norm": 1.3865907192230225,
      "learning_rate": 0.0001999403768373655,
      "loss": 0.9142,
      "step": 1955
    },
    {
      "epoch": 0.5508712759977515,
      "grad_norm": 1.1197766065597534,
      "learning_rate": 0.00019993694139835904,
      "loss": 0.9232,
      "step": 1960
    },
    {
      "epoch": 0.5522765598650927,
      "grad_norm": 0.7113555669784546,
      "learning_rate": 0.0001999334097695889,
      "loss": 0.9303,
      "step": 1965
    },
    {
      "epoch": 0.5536818437324339,
      "grad_norm": 0.5910270810127258,
      "learning_rate": 0.0001999297819544543,
      "loss": 0.9236,
      "step": 1970
    },
    {
      "epoch": 0.5550871275997752,
      "grad_norm": 0.5368698835372925,
      "learning_rate": 0.000199926057956447,
      "loss": 0.932,
      "step": 1975
    },
    {
      "epoch": 0.5564924114671164,
      "grad_norm": 2.1805481910705566,
      "learning_rate": 0.00019992223777915132,
      "loss": 0.9225,
      "step": 1980
    },
    {
      "epoch": 0.5578976953344575,
      "grad_norm": 2.1168668270111084,
      "learning_rate": 0.00019991832142624434,
      "loss": 0.9148,
      "step": 1985
    },
    {
      "epoch": 0.5593029792017987,
      "grad_norm": 1.4886637926101685,
      "learning_rate": 0.00019991430890149549,
      "loss": 0.9378,
      "step": 1990
    },
    {
      "epoch": 0.56070826306914,
      "grad_norm": 0.647662878036499,
      "learning_rate": 0.00019991020020876675,
      "loss": 0.9283,
      "step": 1995
    },
    {
      "epoch": 0.5621135469364812,
      "grad_norm": 1.1095281839370728,
      "learning_rate": 0.00019990599535201292,
      "loss": 0.9062,
      "step": 2000
    },
    {
      "epoch": 0.5635188308038224,
      "grad_norm": 0.6007787585258484,
      "learning_rate": 0.0001999016943352811,
      "loss": 0.9207,
      "step": 2005
    },
    {
      "epoch": 0.5649241146711635,
      "grad_norm": 1.0246546268463135,
      "learning_rate": 0.00019989729716271106,
      "loss": 0.9243,
      "step": 2010
    },
    {
      "epoch": 0.5663293985385047,
      "grad_norm": 0.9852647185325623,
      "learning_rate": 0.0001998928038385351,
      "loss": 0.9044,
      "step": 2015
    },
    {
      "epoch": 0.567734682405846,
      "grad_norm": 0.6966019868850708,
      "learning_rate": 0.00019988821436707805,
      "loss": 0.9343,
      "step": 2020
    },
    {
      "epoch": 0.5691399662731872,
      "grad_norm": 1.1939654350280762,
      "learning_rate": 0.0001998835287527573,
      "loss": 0.9349,
      "step": 2025
    },
    {
      "epoch": 0.5705452501405284,
      "grad_norm": 0.592685341835022,
      "learning_rate": 0.00019987874700008282,
      "loss": 0.939,
      "step": 2030
    },
    {
      "epoch": 0.5719505340078695,
      "grad_norm": 0.609995424747467,
      "learning_rate": 0.000199873869113657,
      "loss": 0.9275,
      "step": 2035
    },
    {
      "epoch": 0.5733558178752108,
      "grad_norm": 1.3798760175704956,
      "learning_rate": 0.00019986889509817485,
      "loss": 0.9265,
      "step": 2040
    },
    {
      "epoch": 0.574761101742552,
      "grad_norm": 0.6716069579124451,
      "learning_rate": 0.00019986382495842394,
      "loss": 0.918,
      "step": 2045
    },
    {
      "epoch": 0.5761663856098932,
      "grad_norm": 2.2000043392181396,
      "learning_rate": 0.0001998586586992842,
      "loss": 0.9108,
      "step": 2050
    },
    {
      "epoch": 0.5775716694772344,
      "grad_norm": 0.9085923433303833,
      "learning_rate": 0.00019985339632572826,
      "loss": 0.9138,
      "step": 2055
    },
    {
      "epoch": 0.5789769533445756,
      "grad_norm": 0.5850256085395813,
      "learning_rate": 0.00019984803784282116,
      "loss": 0.9131,
      "step": 2060
    },
    {
      "epoch": 0.5803822372119168,
      "grad_norm": 0.6118605136871338,
      "learning_rate": 0.00019984258325572043,
      "loss": 0.9301,
      "step": 2065
    },
    {
      "epoch": 0.581787521079258,
      "grad_norm": 0.936461865901947,
      "learning_rate": 0.0001998370325696762,
      "loss": 0.9257,
      "step": 2070
    },
    {
      "epoch": 0.5831928049465992,
      "grad_norm": 1.579474687576294,
      "learning_rate": 0.00019983138579003095,
      "loss": 1.0169,
      "step": 2075
    },
    {
      "epoch": 0.5845980888139404,
      "grad_norm": 1.5684884786605835,
      "learning_rate": 0.0001998256429222198,
      "loss": 0.932,
      "step": 2080
    },
    {
      "epoch": 0.5860033726812817,
      "grad_norm": 0.9524665474891663,
      "learning_rate": 0.00019981980397177024,
      "loss": 0.9334,
      "step": 2085
    },
    {
      "epoch": 0.5874086565486228,
      "grad_norm": 1.5445525646209717,
      "learning_rate": 0.00019981386894430233,
      "loss": 0.9331,
      "step": 2090
    },
    {
      "epoch": 0.588813940415964,
      "grad_norm": 0.8801035284996033,
      "learning_rate": 0.00019980783784552853,
      "loss": 0.9187,
      "step": 2095
    },
    {
      "epoch": 0.5902192242833052,
      "grad_norm": 1.2468377351760864,
      "learning_rate": 0.0001998017106812538,
      "loss": 0.9256,
      "step": 2100
    },
    {
      "epoch": 0.5916245081506464,
      "grad_norm": 1.2207618951797485,
      "learning_rate": 0.00019979548745737558,
      "loss": 0.9445,
      "step": 2105
    },
    {
      "epoch": 0.5930297920179877,
      "grad_norm": 0.573489785194397,
      "learning_rate": 0.00019978916817988375,
      "loss": 0.9224,
      "step": 2110
    },
    {
      "epoch": 0.5944350758853288,
      "grad_norm": 0.6274116635322571,
      "learning_rate": 0.00019978275285486064,
      "loss": 0.9221,
      "step": 2115
    },
    {
      "epoch": 0.59584035975267,
      "grad_norm": 0.6306831240653992,
      "learning_rate": 0.000199776241488481,
      "loss": 0.8899,
      "step": 2120
    },
    {
      "epoch": 0.5972456436200112,
      "grad_norm": 0.6260164976119995,
      "learning_rate": 0.00019976963408701207,
      "loss": 0.9108,
      "step": 2125
    },
    {
      "epoch": 0.5986509274873525,
      "grad_norm": 1.0233713388442993,
      "learning_rate": 0.00019976293065681353,
      "loss": 0.9266,
      "step": 2130
    },
    {
      "epoch": 0.6000562113546937,
      "grad_norm": 0.7524725198745728,
      "learning_rate": 0.00019975613120433745,
      "loss": 0.9121,
      "step": 2135
    },
    {
      "epoch": 0.6014614952220348,
      "grad_norm": 0.9613284468650818,
      "learning_rate": 0.0001997492357361283,
      "loss": 0.9071,
      "step": 2140
    },
    {
      "epoch": 0.602866779089376,
      "grad_norm": 1.719527244567871,
      "learning_rate": 0.00019974224425882306,
      "loss": 0.9086,
      "step": 2145
    },
    {
      "epoch": 0.6042720629567172,
      "grad_norm": 0.6603277921676636,
      "learning_rate": 0.00019973515677915103,
      "loss": 0.9066,
      "step": 2150
    },
    {
      "epoch": 0.6056773468240585,
      "grad_norm": 0.7629093527793884,
      "learning_rate": 0.0001997279733039339,
      "loss": 0.9185,
      "step": 2155
    },
    {
      "epoch": 0.6070826306913997,
      "grad_norm": 1.0620144605636597,
      "learning_rate": 0.00019972069384008588,
      "loss": 0.9076,
      "step": 2160
    },
    {
      "epoch": 0.6084879145587408,
      "grad_norm": 0.8223029971122742,
      "learning_rate": 0.00019971331839461337,
      "loss": 0.9276,
      "step": 2165
    },
    {
      "epoch": 0.609893198426082,
      "grad_norm": 0.9483692049980164,
      "learning_rate": 0.00019970584697461542,
      "loss": 0.8958,
      "step": 2170
    },
    {
      "epoch": 0.6112984822934233,
      "grad_norm": 1.230619192123413,
      "learning_rate": 0.00019969827958728317,
      "loss": 0.9166,
      "step": 2175
    },
    {
      "epoch": 0.6127037661607645,
      "grad_norm": 0.8268842697143555,
      "learning_rate": 0.00019969061623990037,
      "loss": 0.9181,
      "step": 2180
    },
    {
      "epoch": 0.6141090500281057,
      "grad_norm": 1.1452637910842896,
      "learning_rate": 0.00019968285693984297,
      "loss": 0.9154,
      "step": 2185
    },
    {
      "epoch": 0.6155143338954469,
      "grad_norm": 0.6976009607315063,
      "learning_rate": 0.0001996750016945793,
      "loss": 0.905,
      "step": 2190
    },
    {
      "epoch": 0.616919617762788,
      "grad_norm": 1.1496474742889404,
      "learning_rate": 0.00019966705051167015,
      "loss": 0.9063,
      "step": 2195
    },
    {
      "epoch": 0.6183249016301293,
      "grad_norm": 1.9882307052612305,
      "learning_rate": 0.0001996590033987685,
      "loss": 0.9169,
      "step": 2200
    },
    {
      "epoch": 0.6197301854974705,
      "grad_norm": 0.7984516620635986,
      "learning_rate": 0.00019965086036361979,
      "loss": 0.9283,
      "step": 2205
    },
    {
      "epoch": 0.6211354693648117,
      "grad_norm": 0.7119618654251099,
      "learning_rate": 0.0001996426214140617,
      "loss": 0.922,
      "step": 2210
    },
    {
      "epoch": 0.6225407532321529,
      "grad_norm": 1.7469879388809204,
      "learning_rate": 0.00019963428655802426,
      "loss": 0.9503,
      "step": 2215
    },
    {
      "epoch": 0.6239460370994941,
      "grad_norm": 1.0765151977539062,
      "learning_rate": 0.0001996258558035298,
      "loss": 0.9176,
      "step": 2220
    },
    {
      "epoch": 0.6253513209668353,
      "grad_norm": 1.2708961963653564,
      "learning_rate": 0.000199617329158693,
      "loss": 0.9053,
      "step": 2225
    },
    {
      "epoch": 0.6267566048341765,
      "grad_norm": 1.076857566833496,
      "learning_rate": 0.00019960870663172074,
      "loss": 0.9068,
      "step": 2230
    },
    {
      "epoch": 0.6281618887015177,
      "grad_norm": 2.503718614578247,
      "learning_rate": 0.00019959998823091226,
      "loss": 0.9279,
      "step": 2235
    },
    {
      "epoch": 0.6295671725688589,
      "grad_norm": 0.9719533920288086,
      "learning_rate": 0.00019959117396465905,
      "loss": 0.9305,
      "step": 2240
    },
    {
      "epoch": 0.6309724564362001,
      "grad_norm": 1.205297827720642,
      "learning_rate": 0.00019958226384144488,
      "loss": 0.9147,
      "step": 2245
    },
    {
      "epoch": 0.6323777403035413,
      "grad_norm": 1.4823075532913208,
      "learning_rate": 0.00019957325786984585,
      "loss": 0.9127,
      "step": 2250
    },
    {
      "epoch": 0.6337830241708825,
      "grad_norm": 0.7677815556526184,
      "learning_rate": 0.0001995641560585302,
      "loss": 0.9114,
      "step": 2255
    },
    {
      "epoch": 0.6351883080382237,
      "grad_norm": 0.8798638582229614,
      "learning_rate": 0.00019955495841625842,
      "loss": 0.8995,
      "step": 2260
    },
    {
      "epoch": 0.636593591905565,
      "grad_norm": 1.329459309577942,
      "learning_rate": 0.00019954566495188332,
      "loss": 0.9068,
      "step": 2265
    },
    {
      "epoch": 0.6379988757729061,
      "grad_norm": 0.6494102478027344,
      "learning_rate": 0.00019953627567434996,
      "loss": 0.8993,
      "step": 2270
    },
    {
      "epoch": 0.6394041596402473,
      "grad_norm": 1.5372142791748047,
      "learning_rate": 0.00019952679059269545,
      "loss": 0.9268,
      "step": 2275
    },
    {
      "epoch": 0.6408094435075885,
      "grad_norm": 0.6430231928825378,
      "learning_rate": 0.00019951720971604932,
      "loss": 0.9612,
      "step": 2280
    },
    {
      "epoch": 0.6422147273749297,
      "grad_norm": 0.6014605760574341,
      "learning_rate": 0.0001995075330536332,
      "loss": 0.8996,
      "step": 2285
    },
    {
      "epoch": 0.643620011242271,
      "grad_norm": 0.5937612652778625,
      "learning_rate": 0.00019949776061476088,
      "loss": 0.9167,
      "step": 2290
    },
    {
      "epoch": 0.6450252951096122,
      "grad_norm": 3.626796245574951,
      "learning_rate": 0.00019948789240883835,
      "loss": 0.9205,
      "step": 2295
    },
    {
      "epoch": 0.6464305789769533,
      "grad_norm": 0.9523254036903381,
      "learning_rate": 0.00019947792844536387,
      "loss": 0.9335,
      "step": 2300
    },
    {
      "epoch": 0.6478358628442945,
      "grad_norm": 1.2352759838104248,
      "learning_rate": 0.0001994678687339278,
      "loss": 0.9045,
      "step": 2305
    },
    {
      "epoch": 0.6492411467116358,
      "grad_norm": 0.8574492931365967,
      "learning_rate": 0.00019945771328421262,
      "loss": 0.9146,
      "step": 2310
    },
    {
      "epoch": 0.650646430578977,
      "grad_norm": 0.9836457967758179,
      "learning_rate": 0.00019944746210599301,
      "loss": 0.9032,
      "step": 2315
    },
    {
      "epoch": 0.6520517144463182,
      "grad_norm": 0.8709744811058044,
      "learning_rate": 0.00019943711520913575,
      "loss": 0.9209,
      "step": 2320
    },
    {
      "epoch": 0.6534569983136593,
      "grad_norm": 1.0688056945800781,
      "learning_rate": 0.00019942667260359985,
      "loss": 0.9149,
      "step": 2325
    },
    {
      "epoch": 0.6548622821810005,
      "grad_norm": 1.94585120677948,
      "learning_rate": 0.0001994161342994363,
      "loss": 0.9644,
      "step": 2330
    },
    {
      "epoch": 0.6562675660483418,
      "grad_norm": 1.7208789587020874,
      "learning_rate": 0.00019940550030678826,
      "loss": 0.9037,
      "step": 2335
    },
    {
      "epoch": 0.657672849915683,
      "grad_norm": 1.9082733392715454,
      "learning_rate": 0.00019939477063589105,
      "loss": 0.9122,
      "step": 2340
    },
    {
      "epoch": 0.6590781337830242,
      "grad_norm": 0.9194398522377014,
      "learning_rate": 0.00019938394529707198,
      "loss": 0.9127,
      "step": 2345
    },
    {
      "epoch": 0.6604834176503653,
      "grad_norm": 4.11295223236084,
      "learning_rate": 0.00019937302430075052,
      "loss": 0.9152,
      "step": 2350
    },
    {
      "epoch": 0.6618887015177066,
      "grad_norm": 1.0777262449264526,
      "learning_rate": 0.00019936200765743815,
      "loss": 0.8949,
      "step": 2355
    },
    {
      "epoch": 0.6632939853850478,
      "grad_norm": 0.7004035711288452,
      "learning_rate": 0.00019935089537773847,
      "loss": 0.9256,
      "step": 2360
    },
    {
      "epoch": 0.664699269252389,
      "grad_norm": 0.8367961049079895,
      "learning_rate": 0.00019933968747234707,
      "loss": 0.9059,
      "step": 2365
    },
    {
      "epoch": 0.6661045531197302,
      "grad_norm": 0.8426024317741394,
      "learning_rate": 0.00019932838395205166,
      "loss": 0.8936,
      "step": 2370
    },
    {
      "epoch": 0.6675098369870713,
      "grad_norm": 1.0446916818618774,
      "learning_rate": 0.00019931698482773187,
      "loss": 0.9106,
      "step": 2375
    },
    {
      "epoch": 0.6689151208544126,
      "grad_norm": 0.6767590045928955,
      "learning_rate": 0.00019930549011035943,
      "loss": 0.9095,
      "step": 2380
    },
    {
      "epoch": 0.6703204047217538,
      "grad_norm": 0.834368884563446,
      "learning_rate": 0.00019929389981099806,
      "loss": 0.8967,
      "step": 2385
    },
    {
      "epoch": 0.671725688589095,
      "grad_norm": 0.6920280456542969,
      "learning_rate": 0.0001992822139408035,
      "loss": 0.9059,
      "step": 2390
    },
    {
      "epoch": 0.6731309724564362,
      "grad_norm": 0.7495502233505249,
      "learning_rate": 0.00019927043251102342,
      "loss": 1.0464,
      "step": 2395
    },
    {
      "epoch": 0.6745362563237775,
      "grad_norm": 0.8813453316688538,
      "learning_rate": 0.00019925855553299752,
      "loss": 0.9374,
      "step": 2400
    },
    {
      "epoch": 0.6759415401911186,
      "grad_norm": 0.6671605110168457,
      "learning_rate": 0.00019924658301815744,
      "loss": 0.9172,
      "step": 2405
    },
    {
      "epoch": 0.6773468240584598,
      "grad_norm": 1.1641546487808228,
      "learning_rate": 0.00019923451497802676,
      "loss": 0.9068,
      "step": 2410
    },
    {
      "epoch": 0.678752107925801,
      "grad_norm": 1.446001648902893,
      "learning_rate": 0.0001992223514242211,
      "loss": 0.9051,
      "step": 2415
    },
    {
      "epoch": 0.6801573917931422,
      "grad_norm": 0.8141966462135315,
      "learning_rate": 0.0001992100923684478,
      "loss": 0.9603,
      "step": 2420
    },
    {
      "epoch": 0.6815626756604835,
      "grad_norm": 0.8345810174942017,
      "learning_rate": 0.00019919773782250638,
      "loss": 0.9314,
      "step": 2425
    },
    {
      "epoch": 0.6829679595278246,
      "grad_norm": 3.239243268966675,
      "learning_rate": 0.0001991852877982881,
      "loss": 0.9192,
      "step": 2430
    },
    {
      "epoch": 0.6843732433951658,
      "grad_norm": 1.1311649084091187,
      "learning_rate": 0.00019917274230777618,
      "loss": 0.8953,
      "step": 2435
    },
    {
      "epoch": 0.685778527262507,
      "grad_norm": 1.2739644050598145,
      "learning_rate": 0.00019916010136304565,
      "loss": 0.9324,
      "step": 2440
    },
    {
      "epoch": 0.6871838111298483,
      "grad_norm": 1.0117378234863281,
      "learning_rate": 0.0001991473649762636,
      "loss": 0.8924,
      "step": 2445
    },
    {
      "epoch": 0.6885890949971895,
      "grad_norm": 0.7392624616622925,
      "learning_rate": 0.00019913453315968874,
      "loss": 0.9007,
      "step": 2450
    },
    {
      "epoch": 0.6899943788645306,
      "grad_norm": 0.6434834599494934,
      "learning_rate": 0.00019912160592567183,
      "loss": 0.9242,
      "step": 2455
    },
    {
      "epoch": 0.6913996627318718,
      "grad_norm": 0.6763484477996826,
      "learning_rate": 0.0001991085832866553,
      "loss": 0.8886,
      "step": 2460
    },
    {
      "epoch": 0.692804946599213,
      "grad_norm": 0.5814651250839233,
      "learning_rate": 0.00019909546525517365,
      "loss": 0.9238,
      "step": 2465
    },
    {
      "epoch": 0.6942102304665543,
      "grad_norm": 0.5961918234825134,
      "learning_rate": 0.00019908225184385293,
      "loss": 0.9185,
      "step": 2470
    },
    {
      "epoch": 0.6956155143338955,
      "grad_norm": 0.7034104466438293,
      "learning_rate": 0.00019906894306541108,
      "loss": 0.9004,
      "step": 2475
    },
    {
      "epoch": 0.6970207982012366,
      "grad_norm": 1.2677810192108154,
      "learning_rate": 0.00019905553893265798,
      "loss": 0.9185,
      "step": 2480
    },
    {
      "epoch": 0.6984260820685778,
      "grad_norm": 0.9194796085357666,
      "learning_rate": 0.0001990420394584951,
      "loss": 0.9258,
      "step": 2485
    },
    {
      "epoch": 0.6998313659359191,
      "grad_norm": 0.6065542697906494,
      "learning_rate": 0.00019902844465591573,
      "loss": 0.9223,
      "step": 2490
    },
    {
      "epoch": 0.7012366498032603,
      "grad_norm": 0.5745678544044495,
      "learning_rate": 0.00019901475453800496,
      "loss": 0.918,
      "step": 2495
    },
    {
      "epoch": 0.7026419336706015,
      "grad_norm": 0.8046496510505676,
      "learning_rate": 0.00019900096911793958,
      "loss": 0.9037,
      "step": 2500
    },
    {
      "epoch": 0.7040472175379426,
      "grad_norm": 0.7904361486434937,
      "learning_rate": 0.0001989870884089881,
      "loss": 0.9048,
      "step": 2505
    },
    {
      "epoch": 0.7054525014052838,
      "grad_norm": 1.3024152517318726,
      "learning_rate": 0.00019897311242451086,
      "loss": 0.9052,
      "step": 2510
    },
    {
      "epoch": 0.7068577852726251,
      "grad_norm": 1.1783087253570557,
      "learning_rate": 0.00019895904117795966,
      "loss": 0.9047,
      "step": 2515
    },
    {
      "epoch": 0.7082630691399663,
      "grad_norm": 1.1952921152114868,
      "learning_rate": 0.00019894487468287826,
      "loss": 0.9032,
      "step": 2520
    },
    {
      "epoch": 0.7096683530073075,
      "grad_norm": 1.389844536781311,
      "learning_rate": 0.00019893061295290192,
      "loss": 0.9283,
      "step": 2525
    },
    {
      "epoch": 0.7110736368746486,
      "grad_norm": 1.3843488693237305,
      "learning_rate": 0.00019891625600175763,
      "loss": 0.8915,
      "step": 2530
    },
    {
      "epoch": 0.7124789207419899,
      "grad_norm": 1.0372074842453003,
      "learning_rate": 0.00019890180384326403,
      "loss": 0.9074,
      "step": 2535
    },
    {
      "epoch": 0.7138842046093311,
      "grad_norm": 0.9712080359458923,
      "learning_rate": 0.00019888725649133137,
      "loss": 0.9281,
      "step": 2540
    },
    {
      "epoch": 0.7152894884766723,
      "grad_norm": 0.5139784812927246,
      "learning_rate": 0.00019887261395996157,
      "loss": 0.9132,
      "step": 2545
    },
    {
      "epoch": 0.7166947723440135,
      "grad_norm": 1.2253260612487793,
      "learning_rate": 0.00019885787626324812,
      "loss": 0.899,
      "step": 2550
    },
    {
      "epoch": 0.7181000562113546,
      "grad_norm": 0.992481529712677,
      "learning_rate": 0.00019884304341537615,
      "loss": 0.9,
      "step": 2555
    },
    {
      "epoch": 0.7195053400786959,
      "grad_norm": 0.6645762324333191,
      "learning_rate": 0.00019882811543062227,
      "loss": 0.9244,
      "step": 2560
    },
    {
      "epoch": 0.7209106239460371,
      "grad_norm": 0.7887224555015564,
      "learning_rate": 0.0001988130923233548,
      "loss": 0.9072,
      "step": 2565
    },
    {
      "epoch": 0.7223159078133783,
      "grad_norm": 0.7903003692626953,
      "learning_rate": 0.0001987979741080335,
      "loss": 0.9112,
      "step": 2570
    },
    {
      "epoch": 0.7237211916807195,
      "grad_norm": 0.5832539796829224,
      "learning_rate": 0.00019878276079920979,
      "loss": 0.915,
      "step": 2575
    },
    {
      "epoch": 0.7251264755480608,
      "grad_norm": 0.7505447864532471,
      "learning_rate": 0.00019876745241152648,
      "loss": 0.9067,
      "step": 2580
    },
    {
      "epoch": 0.7265317594154019,
      "grad_norm": 1.1223492622375488,
      "learning_rate": 0.00019875204895971802,
      "loss": 0.9111,
      "step": 2585
    },
    {
      "epoch": 0.7279370432827431,
      "grad_norm": 1.6072221994400024,
      "learning_rate": 0.00019873655045861023,
      "loss": 0.8981,
      "step": 2590
    },
    {
      "epoch": 0.7293423271500843,
      "grad_norm": 0.6952162384986877,
      "learning_rate": 0.00019872095692312057,
      "loss": 0.9382,
      "step": 2595
    },
    {
      "epoch": 0.7307476110174255,
      "grad_norm": 0.7969158291816711,
      "learning_rate": 0.00019870526836825785,
      "loss": 0.9029,
      "step": 2600
    },
    {
      "epoch": 0.7321528948847668,
      "grad_norm": 0.9538679718971252,
      "learning_rate": 0.00019868948480912234,
      "loss": 0.9033,
      "step": 2605
    },
    {
      "epoch": 0.7335581787521079,
      "grad_norm": 1.2282218933105469,
      "learning_rate": 0.00019867360626090586,
      "loss": 0.8992,
      "step": 2610
    },
    {
      "epoch": 0.7349634626194491,
      "grad_norm": 0.6724523305892944,
      "learning_rate": 0.00019865763273889156,
      "loss": 0.9907,
      "step": 2615
    },
    {
      "epoch": 0.7363687464867903,
      "grad_norm": 1.1378065347671509,
      "learning_rate": 0.000198641564258454,
      "loss": 0.9116,
      "step": 2620
    },
    {
      "epoch": 0.7377740303541316,
      "grad_norm": 1.1677615642547607,
      "learning_rate": 0.00019862540083505917,
      "loss": 0.9044,
      "step": 2625
    },
    {
      "epoch": 0.7391793142214728,
      "grad_norm": 0.8698350787162781,
      "learning_rate": 0.00019860914248426447,
      "loss": 0.9051,
      "step": 2630
    },
    {
      "epoch": 0.7405845980888139,
      "grad_norm": 2.929654598236084,
      "learning_rate": 0.00019859278922171864,
      "loss": 0.9174,
      "step": 2635
    },
    {
      "epoch": 0.7419898819561551,
      "grad_norm": 0.8416985869407654,
      "learning_rate": 0.00019857634106316174,
      "loss": 0.9164,
      "step": 2640
    },
    {
      "epoch": 0.7433951658234963,
      "grad_norm": 0.8570914268493652,
      "learning_rate": 0.00019855979802442522,
      "loss": 0.9186,
      "step": 2645
    },
    {
      "epoch": 0.7448004496908376,
      "grad_norm": 1.2417738437652588,
      "learning_rate": 0.00019854316012143182,
      "loss": 0.898,
      "step": 2650
    },
    {
      "epoch": 0.7462057335581788,
      "grad_norm": 1.1438087224960327,
      "learning_rate": 0.00019852642737019558,
      "loss": 0.9072,
      "step": 2655
    },
    {
      "epoch": 0.7476110174255199,
      "grad_norm": 0.5742961764335632,
      "learning_rate": 0.00019850959978682186,
      "loss": 0.8934,
      "step": 2660
    },
    {
      "epoch": 0.7490163012928611,
      "grad_norm": 0.7306331396102905,
      "learning_rate": 0.00019849267738750732,
      "loss": 0.9112,
      "step": 2665
    },
    {
      "epoch": 0.7504215851602024,
      "grad_norm": 0.6199610233306885,
      "learning_rate": 0.0001984756601885398,
      "loss": 0.8977,
      "step": 2670
    },
    {
      "epoch": 0.7518268690275436,
      "grad_norm": 1.0061787366867065,
      "learning_rate": 0.00019845854820629846,
      "loss": 0.91,
      "step": 2675
    },
    {
      "epoch": 0.7532321528948848,
      "grad_norm": 0.7906721830368042,
      "learning_rate": 0.00019844134145725363,
      "loss": 0.9033,
      "step": 2680
    },
    {
      "epoch": 0.7546374367622259,
      "grad_norm": 0.9115892052650452,
      "learning_rate": 0.00019842403995796697,
      "loss": 0.9001,
      "step": 2685
    },
    {
      "epoch": 0.7560427206295671,
      "grad_norm": 0.7275146245956421,
      "learning_rate": 0.00019840664372509115,
      "loss": 0.927,
      "step": 2690
    },
    {
      "epoch": 0.7574480044969084,
      "grad_norm": 1.2060136795043945,
      "learning_rate": 0.00019838915277537017,
      "loss": 0.9192,
      "step": 2695
    },
    {
      "epoch": 0.7588532883642496,
      "grad_norm": 1.1314098834991455,
      "learning_rate": 0.00019837156712563912,
      "loss": 0.8778,
      "step": 2700
    },
    {
      "epoch": 0.7602585722315908,
      "grad_norm": 0.6868102550506592,
      "learning_rate": 0.00019835388679282433,
      "loss": 0.9251,
      "step": 2705
    },
    {
      "epoch": 0.761663856098932,
      "grad_norm": 1.133839726448059,
      "learning_rate": 0.00019833611179394313,
      "loss": 0.9157,
      "step": 2710
    },
    {
      "epoch": 0.7630691399662732,
      "grad_norm": 1.597622036933899,
      "learning_rate": 0.0001983182421461041,
      "loss": 0.9015,
      "step": 2715
    },
    {
      "epoch": 0.7644744238336144,
      "grad_norm": 1.8652737140655518,
      "learning_rate": 0.0001983002778665068,
      "loss": 0.904,
      "step": 2720
    },
    {
      "epoch": 0.7658797077009556,
      "grad_norm": 0.7736433148384094,
      "learning_rate": 0.000198282218972442,
      "loss": 0.9139,
      "step": 2725
    },
    {
      "epoch": 0.7672849915682968,
      "grad_norm": 0.6170510053634644,
      "learning_rate": 0.0001982640654812914,
      "loss": 0.9061,
      "step": 2730
    },
    {
      "epoch": 0.768690275435638,
      "grad_norm": 0.88065505027771,
      "learning_rate": 0.00019824581741052785,
      "loss": 0.897,
      "step": 2735
    },
    {
      "epoch": 0.7700955593029792,
      "grad_norm": 0.6491146683692932,
      "learning_rate": 0.0001982274747777152,
      "loss": 0.9058,
      "step": 2740
    },
    {
      "epoch": 0.7715008431703204,
      "grad_norm": 0.5739811062812805,
      "learning_rate": 0.00019820903760050832,
      "loss": 0.8854,
      "step": 2745
    },
    {
      "epoch": 0.7729061270376616,
      "grad_norm": 0.8427909016609192,
      "learning_rate": 0.00019819050589665307,
      "loss": 0.8983,
      "step": 2750
    },
    {
      "epoch": 0.7743114109050028,
      "grad_norm": 0.6211098432540894,
      "learning_rate": 0.0001981718796839863,
      "loss": 0.8902,
      "step": 2755
    },
    {
      "epoch": 0.7757166947723441,
      "grad_norm": 1.1936466693878174,
      "learning_rate": 0.00019815315898043582,
      "loss": 0.9144,
      "step": 2760
    },
    {
      "epoch": 0.7771219786396852,
      "grad_norm": 0.8049221038818359,
      "learning_rate": 0.00019813434380402045,
      "loss": 0.8962,
      "step": 2765
    },
    {
      "epoch": 0.7785272625070264,
      "grad_norm": 0.6825897097587585,
      "learning_rate": 0.00019811543417284978,
      "loss": 0.8929,
      "step": 2770
    },
    {
      "epoch": 0.7799325463743676,
      "grad_norm": 0.7605122923851013,
      "learning_rate": 0.0001980964301051245,
      "loss": 0.8846,
      "step": 2775
    },
    {
      "epoch": 0.7813378302417088,
      "grad_norm": 0.6697847843170166,
      "learning_rate": 0.00019807733161913608,
      "loss": 0.9627,
      "step": 2780
    },
    {
      "epoch": 0.7827431141090501,
      "grad_norm": 0.7112193703651428,
      "learning_rate": 0.0001980581387332669,
      "loss": 0.9225,
      "step": 2785
    },
    {
      "epoch": 0.7841483979763912,
      "grad_norm": 0.9057043790817261,
      "learning_rate": 0.0001980388514659902,
      "loss": 0.9215,
      "step": 2790
    },
    {
      "epoch": 0.7855536818437324,
      "grad_norm": 0.7113947868347168,
      "learning_rate": 0.00019801946983587007,
      "loss": 0.9021,
      "step": 2795
    },
    {
      "epoch": 0.7869589657110736,
      "grad_norm": 0.5814126133918762,
      "learning_rate": 0.00019799999386156146,
      "loss": 0.9,
      "step": 2800
    },
    {
      "epoch": 0.7883642495784149,
      "grad_norm": 0.7124185562133789,
      "learning_rate": 0.00019798042356181,
      "loss": 0.8951,
      "step": 2805
    },
    {
      "epoch": 0.7897695334457561,
      "grad_norm": 0.7653499841690063,
      "learning_rate": 0.00019796075895545223,
      "loss": 0.9109,
      "step": 2810
    },
    {
      "epoch": 0.7911748173130972,
      "grad_norm": 0.5366164445877075,
      "learning_rate": 0.00019794100006141543,
      "loss": 0.8895,
      "step": 2815
    },
    {
      "epoch": 0.7925801011804384,
      "grad_norm": 0.949338436126709,
      "learning_rate": 0.0001979211468987176,
      "loss": 0.9306,
      "step": 2820
    },
    {
      "epoch": 0.7939853850477796,
      "grad_norm": 0.5122391581535339,
      "learning_rate": 0.00019790119948646755,
      "loss": 0.8965,
      "step": 2825
    },
    {
      "epoch": 0.7953906689151209,
      "grad_norm": 0.8764130473136902,
      "learning_rate": 0.00019788115784386473,
      "loss": 0.9049,
      "step": 2830
    },
    {
      "epoch": 0.7967959527824621,
      "grad_norm": 0.5787503719329834,
      "learning_rate": 0.00019786102199019932,
      "loss": 0.9092,
      "step": 2835
    },
    {
      "epoch": 0.7982012366498032,
      "grad_norm": 0.7566272616386414,
      "learning_rate": 0.00019784079194485213,
      "loss": 0.8874,
      "step": 2840
    },
    {
      "epoch": 0.7996065205171444,
      "grad_norm": 0.8594691157341003,
      "learning_rate": 0.00019782046772729475,
      "loss": 0.9042,
      "step": 2845
    },
    {
      "epoch": 0.8010118043844857,
      "grad_norm": 1.1679846048355103,
      "learning_rate": 0.00019780004935708925,
      "loss": 0.9016,
      "step": 2850
    },
    {
      "epoch": 0.8024170882518269,
      "grad_norm": 1.2150373458862305,
      "learning_rate": 0.00019777953685388844,
      "loss": 0.9022,
      "step": 2855
    },
    {
      "epoch": 0.8038223721191681,
      "grad_norm": 0.7408333420753479,
      "learning_rate": 0.00019775893023743572,
      "loss": 0.9224,
      "step": 2860
    },
    {
      "epoch": 0.8052276559865092,
      "grad_norm": 1.2019459009170532,
      "learning_rate": 0.00019773822952756501,
      "loss": 0.8983,
      "step": 2865
    },
    {
      "epoch": 0.8066329398538504,
      "grad_norm": 0.6662552356719971,
      "learning_rate": 0.00019771743474420088,
      "loss": 0.906,
      "step": 2870
    },
    {
      "epoch": 0.8080382237211917,
      "grad_norm": 0.6724129319190979,
      "learning_rate": 0.00019769654590735838,
      "loss": 0.8981,
      "step": 2875
    },
    {
      "epoch": 0.8094435075885329,
      "grad_norm": 2.850919246673584,
      "learning_rate": 0.0001976755630371431,
      "loss": 0.9585,
      "step": 2880
    },
    {
      "epoch": 0.8108487914558741,
      "grad_norm": 1.244378924369812,
      "learning_rate": 0.00019765448615375117,
      "loss": 0.9517,
      "step": 2885
    },
    {
      "epoch": 0.8122540753232153,
      "grad_norm": 0.7747802138328552,
      "learning_rate": 0.0001976333152774692,
      "loss": 0.8957,
      "step": 2890
    },
    {
      "epoch": 0.8136593591905565,
      "grad_norm": 0.544975221157074,
      "learning_rate": 0.00019761205042867423,
      "loss": 0.9665,
      "step": 2895
    },
    {
      "epoch": 0.8150646430578977,
      "grad_norm": 0.5579664707183838,
      "learning_rate": 0.00019759069162783376,
      "loss": 0.9038,
      "step": 2900
    },
    {
      "epoch": 0.8164699269252389,
      "grad_norm": 10.984373092651367,
      "learning_rate": 0.00019756923889550579,
      "loss": 0.9749,
      "step": 2905
    },
    {
      "epoch": 0.8178752107925801,
      "grad_norm": 1.2287412881851196,
      "learning_rate": 0.00019754769225233863,
      "loss": 0.8992,
      "step": 2910
    },
    {
      "epoch": 0.8192804946599213,
      "grad_norm": 0.8453310132026672,
      "learning_rate": 0.00019752605171907098,
      "loss": 0.9039,
      "step": 2915
    },
    {
      "epoch": 0.8206857785272625,
      "grad_norm": 0.6380810141563416,
      "learning_rate": 0.00019750431731653206,
      "loss": 0.9129,
      "step": 2920
    },
    {
      "epoch": 0.8220910623946037,
      "grad_norm": 0.7804015278816223,
      "learning_rate": 0.00019748248906564125,
      "loss": 0.8979,
      "step": 2925
    },
    {
      "epoch": 0.8234963462619449,
      "grad_norm": 6.066610813140869,
      "learning_rate": 0.00019746056698740835,
      "loss": 0.8857,
      "step": 2930
    },
    {
      "epoch": 0.8249016301292861,
      "grad_norm": 0.9343296885490417,
      "learning_rate": 0.00019743855110293353,
      "loss": 0.8939,
      "step": 2935
    },
    {
      "epoch": 0.8263069139966274,
      "grad_norm": 0.5537464022636414,
      "learning_rate": 0.00019741644143340706,
      "loss": 0.9067,
      "step": 2940
    },
    {
      "epoch": 0.8277121978639685,
      "grad_norm": 0.8258460164070129,
      "learning_rate": 0.0001973942380001097,
      "loss": 0.8952,
      "step": 2945
    },
    {
      "epoch": 0.8291174817313097,
      "grad_norm": 0.9482399225234985,
      "learning_rate": 0.0001973719408244123,
      "loss": 0.9046,
      "step": 2950
    },
    {
      "epoch": 0.8305227655986509,
      "grad_norm": 1.2801766395568848,
      "learning_rate": 0.00019734954992777604,
      "loss": 0.8946,
      "step": 2955
    },
    {
      "epoch": 0.8319280494659921,
      "grad_norm": 1.2622408866882324,
      "learning_rate": 0.00019732706533175223,
      "loss": 0.9179,
      "step": 2960
    },
    {
      "epoch": 0.8333333333333334,
      "grad_norm": 1.2289254665374756,
      "learning_rate": 0.00019730448705798239,
      "loss": 0.8904,
      "step": 2965
    },
    {
      "epoch": 0.8347386172006745,
      "grad_norm": 0.6249719262123108,
      "learning_rate": 0.00019728181512819823,
      "loss": 0.9245,
      "step": 2970
    },
    {
      "epoch": 0.8361439010680157,
      "grad_norm": 0.7410894632339478,
      "learning_rate": 0.00019725904956422157,
      "loss": 0.8945,
      "step": 2975
    },
    {
      "epoch": 0.8375491849353569,
      "grad_norm": 0.609602689743042,
      "learning_rate": 0.0001972361903879644,
      "loss": 0.9139,
      "step": 2980
    },
    {
      "epoch": 0.8389544688026982,
      "grad_norm": 0.763276219367981,
      "learning_rate": 0.00019721323762142873,
      "loss": 0.8985,
      "step": 2985
    },
    {
      "epoch": 0.8403597526700394,
      "grad_norm": 0.5820448994636536,
      "learning_rate": 0.00019719019128670677,
      "loss": 0.9034,
      "step": 2990
    },
    {
      "epoch": 0.8417650365373806,
      "grad_norm": 0.8953343033790588,
      "learning_rate": 0.00019716705140598067,
      "loss": 0.9607,
      "step": 2995
    },
    {
      "epoch": 0.8431703204047217,
      "grad_norm": 0.5899667143821716,
      "learning_rate": 0.00019714381800152268,
      "loss": 0.8925,
      "step": 3000
    },
    {
      "epoch": 0.8445756042720629,
      "grad_norm": 0.7428545951843262,
      "learning_rate": 0.00019712049109569507,
      "loss": 0.8898,
      "step": 3005
    },
    {
      "epoch": 0.8459808881394042,
      "grad_norm": 0.5695815682411194,
      "learning_rate": 0.00019709707071095006,
      "loss": 0.8838,
      "step": 3010
    },
    {
      "epoch": 0.8473861720067454,
      "grad_norm": 0.6270226836204529,
      "learning_rate": 0.00019707355686982995,
      "loss": 0.9089,
      "step": 3015
    },
    {
      "epoch": 0.8487914558740866,
      "grad_norm": 0.5706961750984192,
      "learning_rate": 0.00019704994959496687,
      "loss": 0.8895,
      "step": 3020
    },
    {
      "epoch": 0.8501967397414277,
      "grad_norm": 0.6293683052062988,
      "learning_rate": 0.00019702624890908293,
      "loss": 0.899,
      "step": 3025
    },
    {
      "epoch": 0.851602023608769,
      "grad_norm": 0.8228982090950012,
      "learning_rate": 0.00019700245483499017,
      "loss": 0.8767,
      "step": 3030
    },
    {
      "epoch": 0.8530073074761102,
      "grad_norm": 0.7604111433029175,
      "learning_rate": 0.00019697856739559044,
      "loss": 0.8926,
      "step": 3035
    },
    {
      "epoch": 0.8544125913434514,
      "grad_norm": 1.0019569396972656,
      "learning_rate": 0.00019695458661387558,
      "loss": 0.9085,
      "step": 3040
    },
    {
      "epoch": 0.8558178752107926,
      "grad_norm": 0.6671732664108276,
      "learning_rate": 0.00019693051251292717,
      "loss": 0.8854,
      "step": 3045
    },
    {
      "epoch": 0.8572231590781337,
      "grad_norm": 0.9283923506736755,
      "learning_rate": 0.00019690634511591664,
      "loss": 0.9482,
      "step": 3050
    },
    {
      "epoch": 0.858628442945475,
      "grad_norm": 1.4978197813034058,
      "learning_rate": 0.00019688208444610522,
      "loss": 0.9213,
      "step": 3055
    },
    {
      "epoch": 0.8600337268128162,
      "grad_norm": 1.9540295600891113,
      "learning_rate": 0.00019685773052684392,
      "loss": 0.9069,
      "step": 3060
    },
    {
      "epoch": 0.8614390106801574,
      "grad_norm": 0.8743392825126648,
      "learning_rate": 0.00019683328338157354,
      "loss": 0.9185,
      "step": 3065
    },
    {
      "epoch": 0.8628442945474986,
      "grad_norm": 0.5670438408851624,
      "learning_rate": 0.0001968087430338245,
      "loss": 0.9206,
      "step": 3070
    },
    {
      "epoch": 0.8642495784148398,
      "grad_norm": 1.187651515007019,
      "learning_rate": 0.00019678410950721702,
      "loss": 0.9046,
      "step": 3075
    },
    {
      "epoch": 0.865654862282181,
      "grad_norm": 0.6912389397621155,
      "learning_rate": 0.000196759382825461,
      "loss": 0.9016,
      "step": 3080
    },
    {
      "epoch": 0.8670601461495222,
      "grad_norm": 0.7588440775871277,
      "learning_rate": 0.00019673456301235595,
      "loss": 0.8968,
      "step": 3085
    },
    {
      "epoch": 0.8684654300168634,
      "grad_norm": 0.5993475317955017,
      "learning_rate": 0.0001967096500917911,
      "loss": 0.9102,
      "step": 3090
    },
    {
      "epoch": 0.8698707138842046,
      "grad_norm": 0.815632164478302,
      "learning_rate": 0.00019668464408774522,
      "loss": 0.9001,
      "step": 3095
    },
    {
      "epoch": 0.8712759977515458,
      "grad_norm": 0.8173598647117615,
      "learning_rate": 0.0001966595450242867,
      "loss": 0.8948,
      "step": 3100
    },
    {
      "epoch": 0.872681281618887,
      "grad_norm": 0.6007003784179688,
      "learning_rate": 0.00019663435292557356,
      "loss": 0.8947,
      "step": 3105
    },
    {
      "epoch": 0.8740865654862282,
      "grad_norm": 0.5965673923492432,
      "learning_rate": 0.0001966090678158532,
      "loss": 0.9009,
      "step": 3110
    },
    {
      "epoch": 0.8754918493535694,
      "grad_norm": 0.7668277621269226,
      "learning_rate": 0.00019658368971946276,
      "loss": 0.9043,
      "step": 3115
    },
    {
      "epoch": 0.8768971332209107,
      "grad_norm": 0.6460701823234558,
      "learning_rate": 0.0001965582186608287,
      "loss": 0.9009,
      "step": 3120
    },
    {
      "epoch": 0.8783024170882519,
      "grad_norm": 0.6337530612945557,
      "learning_rate": 0.00019653265466446708,
      "loss": 0.944,
      "step": 3125
    },
    {
      "epoch": 0.879707700955593,
      "grad_norm": 0.8764949440956116,
      "learning_rate": 0.00019650699775498334,
      "loss": 0.9032,
      "step": 3130
    },
    {
      "epoch": 0.8811129848229342,
      "grad_norm": 0.5437586307525635,
      "learning_rate": 0.0001964812479570724,
      "loss": 0.8881,
      "step": 3135
    },
    {
      "epoch": 0.8825182686902754,
      "grad_norm": 1.294757604598999,
      "learning_rate": 0.0001964554052955185,
      "loss": 0.9081,
      "step": 3140
    },
    {
      "epoch": 0.8839235525576167,
      "grad_norm": 0.7840426564216614,
      "learning_rate": 0.0001964294697951954,
      "loss": 0.9523,
      "step": 3145
    },
    {
      "epoch": 0.8853288364249579,
      "grad_norm": 1.4854894876480103,
      "learning_rate": 0.00019640344148106606,
      "loss": 0.9636,
      "step": 3150
    },
    {
      "epoch": 0.886734120292299,
      "grad_norm": 0.5375068187713623,
      "learning_rate": 0.0001963773203781829,
      "loss": 0.9046,
      "step": 3155
    },
    {
      "epoch": 0.8881394041596402,
      "grad_norm": 0.5959550738334656,
      "learning_rate": 0.0001963511065116876,
      "loss": 0.9146,
      "step": 3160
    },
    {
      "epoch": 0.8895446880269815,
      "grad_norm": 0.5686089396476746,
      "learning_rate": 0.0001963247999068111,
      "loss": 0.8997,
      "step": 3165
    },
    {
      "epoch": 0.8909499718943227,
      "grad_norm": 0.491180956363678,
      "learning_rate": 0.00019629840058887362,
      "loss": 0.8926,
      "step": 3170
    },
    {
      "epoch": 0.8923552557616639,
      "grad_norm": 0.9157688021659851,
      "learning_rate": 0.0001962719085832847,
      "loss": 0.891,
      "step": 3175
    },
    {
      "epoch": 0.893760539629005,
      "grad_norm": 0.5665452480316162,
      "learning_rate": 0.00019624532391554294,
      "loss": 0.8966,
      "step": 3180
    },
    {
      "epoch": 0.8951658234963462,
      "grad_norm": 0.5333157777786255,
      "learning_rate": 0.00019621864661123622,
      "loss": 0.909,
      "step": 3185
    },
    {
      "epoch": 0.8965711073636875,
      "grad_norm": 0.5403627157211304,
      "learning_rate": 0.00019619187669604155,
      "loss": 0.8982,
      "step": 3190
    },
    {
      "epoch": 0.8979763912310287,
      "grad_norm": 0.5004389882087708,
      "learning_rate": 0.00019616501419572515,
      "loss": 0.8957,
      "step": 3195
    },
    {
      "epoch": 0.8993816750983699,
      "grad_norm": 0.6234089732170105,
      "learning_rate": 0.00019613805913614227,
      "loss": 0.8909,
      "step": 3200
    },
    {
      "epoch": 0.900786958965711,
      "grad_norm": 0.5197539925575256,
      "learning_rate": 0.00019611101154323727,
      "loss": 0.8762,
      "step": 3205
    },
    {
      "epoch": 0.9021922428330523,
      "grad_norm": 0.8515856266021729,
      "learning_rate": 0.00019608387144304362,
      "loss": 0.9055,
      "step": 3210
    },
    {
      "epoch": 0.9035975267003935,
      "grad_norm": 0.5120190382003784,
      "learning_rate": 0.0001960566388616837,
      "loss": 0.8963,
      "step": 3215
    },
    {
      "epoch": 0.9050028105677347,
      "grad_norm": 0.6141207218170166,
      "learning_rate": 0.0001960293138253691,
      "loss": 0.8766,
      "step": 3220
    },
    {
      "epoch": 0.9064080944350759,
      "grad_norm": 0.6554030179977417,
      "learning_rate": 0.00019600189636040025,
      "loss": 0.8858,
      "step": 3225
    },
    {
      "epoch": 0.907813378302417,
      "grad_norm": 1.2978062629699707,
      "learning_rate": 0.00019597438649316656,
      "loss": 0.8902,
      "step": 3230
    },
    {
      "epoch": 0.9092186621697583,
      "grad_norm": 0.5541256666183472,
      "learning_rate": 0.00019594678425014644,
      "loss": 0.9083,
      "step": 3235
    },
    {
      "epoch": 0.9106239460370995,
      "grad_norm": 0.6985655426979065,
      "learning_rate": 0.0001959190896579072,
      "loss": 0.8784,
      "step": 3240
    },
    {
      "epoch": 0.9120292299044407,
      "grad_norm": 0.6532580852508545,
      "learning_rate": 0.00019589130274310493,
      "loss": 0.9066,
      "step": 3245
    },
    {
      "epoch": 0.9134345137717819,
      "grad_norm": 0.9649792909622192,
      "learning_rate": 0.0001958634235324847,
      "loss": 0.8924,
      "step": 3250
    },
    {
      "epoch": 0.9148397976391232,
      "grad_norm": 0.8815706968307495,
      "learning_rate": 0.0001958354520528804,
      "loss": 0.9032,
      "step": 3255
    },
    {
      "epoch": 0.9162450815064643,
      "grad_norm": 1.1214852333068848,
      "learning_rate": 0.00019580738833121467,
      "loss": 0.8915,
      "step": 3260
    },
    {
      "epoch": 0.9176503653738055,
      "grad_norm": 0.9138022661209106,
      "learning_rate": 0.00019577923239449905,
      "loss": 0.9029,
      "step": 3265
    },
    {
      "epoch": 0.9190556492411467,
      "grad_norm": 0.6111552715301514,
      "learning_rate": 0.00019575098426983365,
      "loss": 0.9131,
      "step": 3270
    },
    {
      "epoch": 0.9204609331084879,
      "grad_norm": 0.8963708877563477,
      "learning_rate": 0.0001957226439844075,
      "loss": 0.8875,
      "step": 3275
    },
    {
      "epoch": 0.9218662169758292,
      "grad_norm": 0.6350470781326294,
      "learning_rate": 0.0001956942115654982,
      "loss": 0.8741,
      "step": 3280
    },
    {
      "epoch": 0.9232715008431703,
      "grad_norm": 1.011514663696289,
      "learning_rate": 0.0001956656870404721,
      "loss": 0.8876,
      "step": 3285
    },
    {
      "epoch": 0.9246767847105115,
      "grad_norm": 0.5660095810890198,
      "learning_rate": 0.0001956370704367842,
      "loss": 0.9137,
      "step": 3290
    },
    {
      "epoch": 0.9260820685778527,
      "grad_norm": 0.5166494250297546,
      "learning_rate": 0.00019560836178197813,
      "loss": 0.8968,
      "step": 3295
    },
    {
      "epoch": 0.927487352445194,
      "grad_norm": 0.7625125050544739,
      "learning_rate": 0.00019557956110368606,
      "loss": 0.8933,
      "step": 3300
    },
    {
      "epoch": 0.9288926363125352,
      "grad_norm": 0.6975813508033752,
      "learning_rate": 0.0001955506684296288,
      "loss": 0.8998,
      "step": 3305
    },
    {
      "epoch": 0.9302979201798763,
      "grad_norm": 0.719968318939209,
      "learning_rate": 0.00019552168378761565,
      "loss": 0.913,
      "step": 3310
    },
    {
      "epoch": 0.9317032040472175,
      "grad_norm": 0.5239852070808411,
      "learning_rate": 0.00019549260720554452,
      "loss": 0.9068,
      "step": 3315
    },
    {
      "epoch": 0.9331084879145587,
      "grad_norm": 0.5270377993583679,
      "learning_rate": 0.0001954634387114017,
      "loss": 0.8918,
      "step": 3320
    },
    {
      "epoch": 0.9345137717819,
      "grad_norm": 0.49702611565589905,
      "learning_rate": 0.000195434178333262,
      "loss": 0.8893,
      "step": 3325
    },
    {
      "epoch": 0.9359190556492412,
      "grad_norm": 0.590557873249054,
      "learning_rate": 0.0001954048260992887,
      "loss": 0.8901,
      "step": 3330
    },
    {
      "epoch": 0.9373243395165823,
      "grad_norm": 0.5189043283462524,
      "learning_rate": 0.00019537538203773344,
      "loss": 0.8826,
      "step": 3335
    },
    {
      "epoch": 0.9387296233839235,
      "grad_norm": 0.6192963719367981,
      "learning_rate": 0.0001953458461769363,
      "loss": 0.8801,
      "step": 3340
    },
    {
      "epoch": 0.9401349072512648,
      "grad_norm": 0.5685038566589355,
      "learning_rate": 0.00019531621854532562,
      "loss": 0.8944,
      "step": 3345
    },
    {
      "epoch": 0.941540191118606,
      "grad_norm": 0.49915850162506104,
      "learning_rate": 0.00019528649917141815,
      "loss": 0.8948,
      "step": 3350
    },
    {
      "epoch": 0.9429454749859472,
      "grad_norm": 0.5442516207695007,
      "learning_rate": 0.00019525668808381897,
      "loss": 0.9089,
      "step": 3355
    },
    {
      "epoch": 0.9443507588532883,
      "grad_norm": 0.5647527575492859,
      "learning_rate": 0.0001952267853112213,
      "loss": 0.9057,
      "step": 3360
    },
    {
      "epoch": 0.9457560427206295,
      "grad_norm": 0.7556710243225098,
      "learning_rate": 0.00019519679088240679,
      "loss": 0.8948,
      "step": 3365
    },
    {
      "epoch": 0.9471613265879708,
      "grad_norm": 0.578629195690155,
      "learning_rate": 0.00019516670482624515,
      "loss": 0.9092,
      "step": 3370
    },
    {
      "epoch": 0.948566610455312,
      "grad_norm": 0.6450733542442322,
      "learning_rate": 0.00019513652717169437,
      "loss": 0.8919,
      "step": 3375
    },
    {
      "epoch": 0.9499718943226532,
      "grad_norm": 1.024837613105774,
      "learning_rate": 0.0001951062579478006,
      "loss": 0.8965,
      "step": 3380
    },
    {
      "epoch": 0.9513771781899943,
      "grad_norm": 0.6713098287582397,
      "learning_rate": 0.0001950758971836981,
      "loss": 0.9124,
      "step": 3385
    },
    {
      "epoch": 0.9527824620573356,
      "grad_norm": 0.5165430903434753,
      "learning_rate": 0.00019504544490860917,
      "loss": 0.9042,
      "step": 3390
    },
    {
      "epoch": 0.9541877459246768,
      "grad_norm": 0.7243596911430359,
      "learning_rate": 0.0001950149011518444,
      "loss": 0.8922,
      "step": 3395
    },
    {
      "epoch": 0.955593029792018,
      "grad_norm": 0.5755138993263245,
      "learning_rate": 0.00019498426594280214,
      "loss": 0.8886,
      "step": 3400
    },
    {
      "epoch": 0.9569983136593592,
      "grad_norm": 0.5063710808753967,
      "learning_rate": 0.00019495353931096908,
      "loss": 0.8976,
      "step": 3405
    },
    {
      "epoch": 0.9584035975267003,
      "grad_norm": 0.9493336081504822,
      "learning_rate": 0.0001949227212859196,
      "loss": 0.897,
      "step": 3410
    },
    {
      "epoch": 0.9598088813940416,
      "grad_norm": 0.6509354710578918,
      "learning_rate": 0.0001948918118973163,
      "loss": 0.8797,
      "step": 3415
    },
    {
      "epoch": 0.9612141652613828,
      "grad_norm": 1.024837613105774,
      "learning_rate": 0.0001948608111749095,
      "loss": 0.8834,
      "step": 3420
    },
    {
      "epoch": 0.962619449128724,
      "grad_norm": 0.8351315855979919,
      "learning_rate": 0.00019482971914853766,
      "loss": 0.9357,
      "step": 3425
    },
    {
      "epoch": 0.9640247329960652,
      "grad_norm": 0.6943332552909851,
      "learning_rate": 0.00019479853584812693,
      "loss": 0.897,
      "step": 3430
    },
    {
      "epoch": 0.9654300168634065,
      "grad_norm": 0.8710034489631653,
      "learning_rate": 0.00019476726130369137,
      "loss": 0.8862,
      "step": 3435
    },
    {
      "epoch": 0.9668353007307476,
      "grad_norm": 1.5271203517913818,
      "learning_rate": 0.0001947358955453329,
      "loss": 0.9136,
      "step": 3440
    },
    {
      "epoch": 0.9682405845980888,
      "grad_norm": 0.9000211358070374,
      "learning_rate": 0.00019470443860324118,
      "loss": 0.9069,
      "step": 3445
    },
    {
      "epoch": 0.96964586846543,
      "grad_norm": 0.8771162033081055,
      "learning_rate": 0.0001946728905076937,
      "loss": 0.9507,
      "step": 3450
    },
    {
      "epoch": 0.9710511523327712,
      "grad_norm": 0.5751308798789978,
      "learning_rate": 0.0001946412512890556,
      "loss": 0.9047,
      "step": 3455
    },
    {
      "epoch": 0.9724564362001125,
      "grad_norm": 1.101432204246521,
      "learning_rate": 0.0001946095209777798,
      "loss": 0.9037,
      "step": 3460
    },
    {
      "epoch": 0.9738617200674536,
      "grad_norm": 0.8276761174201965,
      "learning_rate": 0.00019457769960440685,
      "loss": 0.8907,
      "step": 3465
    },
    {
      "epoch": 0.9752670039347948,
      "grad_norm": 1.6798173189163208,
      "learning_rate": 0.00019454578719956502,
      "loss": 0.9013,
      "step": 3470
    },
    {
      "epoch": 0.976672287802136,
      "grad_norm": 0.5530985593795776,
      "learning_rate": 0.0001945137837939701,
      "loss": 0.8757,
      "step": 3475
    },
    {
      "epoch": 0.9780775716694773,
      "grad_norm": 0.9847890138626099,
      "learning_rate": 0.00019448168941842552,
      "loss": 0.8859,
      "step": 3480
    },
    {
      "epoch": 0.9794828555368185,
      "grad_norm": 0.7510066628456116,
      "learning_rate": 0.00019444950410382226,
      "loss": 0.8919,
      "step": 3485
    },
    {
      "epoch": 0.9808881394041596,
      "grad_norm": 0.5439227819442749,
      "learning_rate": 0.00019441722788113882,
      "loss": 0.9016,
      "step": 3490
    },
    {
      "epoch": 0.9822934232715008,
      "grad_norm": 0.5271068811416626,
      "learning_rate": 0.00019438486078144124,
      "loss": 0.8879,
      "step": 3495
    },
    {
      "epoch": 0.983698707138842,
      "grad_norm": 0.6192128658294678,
      "learning_rate": 0.00019435240283588302,
      "loss": 0.889,
      "step": 3500
    },
    {
      "epoch": 0.9851039910061833,
      "grad_norm": 0.838772177696228,
      "learning_rate": 0.00019431985407570502,
      "loss": 0.895,
      "step": 3505
    },
    {
      "epoch": 0.9865092748735245,
      "grad_norm": 0.5664814710617065,
      "learning_rate": 0.0001942872145322356,
      "loss": 0.8984,
      "step": 3510
    },
    {
      "epoch": 0.9879145587408656,
      "grad_norm": 0.617550253868103,
      "learning_rate": 0.0001942544842368905,
      "loss": 0.9046,
      "step": 3515
    },
    {
      "epoch": 0.9893198426082068,
      "grad_norm": 0.690051794052124,
      "learning_rate": 0.00019422166322117276,
      "loss": 0.8811,
      "step": 3520
    },
    {
      "epoch": 0.9907251264755481,
      "grad_norm": 0.896776020526886,
      "learning_rate": 0.00019418875151667276,
      "loss": 0.8935,
      "step": 3525
    },
    {
      "epoch": 0.9921304103428893,
      "grad_norm": 0.8291999697685242,
      "learning_rate": 0.0001941557491550681,
      "loss": 0.9204,
      "step": 3530
    },
    {
      "epoch": 0.9935356942102305,
      "grad_norm": 0.6615550518035889,
      "learning_rate": 0.0001941226561681238,
      "loss": 0.8859,
      "step": 3535
    },
    {
      "epoch": 0.9949409780775716,
      "grad_norm": 0.469942569732666,
      "learning_rate": 0.00019408947258769198,
      "loss": 0.8806,
      "step": 3540
    },
    {
      "epoch": 0.9963462619449128,
      "grad_norm": 0.6039283275604248,
      "learning_rate": 0.00019405619844571197,
      "loss": 0.8735,
      "step": 3545
    },
    {
      "epoch": 0.9977515458122541,
      "grad_norm": 0.6278392672538757,
      "learning_rate": 0.0001940228337742103,
      "loss": 0.8934,
      "step": 3550
    },
    {
      "epoch": 0.9991568296795953,
      "grad_norm": 0.4628046452999115,
      "learning_rate": 0.0001939893786053006,
      "loss": 0.8916,
      "step": 3555
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.8928501009941101,
      "eval_runtime": 641.2073,
      "eval_samples_per_second": 7.013,
      "eval_steps_per_second": 0.585,
      "step": 3558
    },
    {
      "epoch": 1.0005621135469365,
      "grad_norm": 0.7678861021995544,
      "learning_rate": 0.00019395583297118367,
      "loss": 0.885,
      "step": 3560
    },
    {
      "epoch": 1.0019673974142778,
      "grad_norm": 0.6337043642997742,
      "learning_rate": 0.00019392219690414727,
      "loss": 0.8735,
      "step": 3565
    },
    {
      "epoch": 1.0033726812816188,
      "grad_norm": 0.5203379988670349,
      "learning_rate": 0.00019388847043656633,
      "loss": 0.8724,
      "step": 3570
    },
    {
      "epoch": 1.0047779651489601,
      "grad_norm": 0.5606422424316406,
      "learning_rate": 0.00019385465360090268,
      "loss": 0.8682,
      "step": 3575
    },
    {
      "epoch": 1.0061832490163012,
      "grad_norm": 0.5460006594657898,
      "learning_rate": 0.00019382074642970522,
      "loss": 0.8634,
      "step": 3580
    },
    {
      "epoch": 1.0075885328836425,
      "grad_norm": 0.616584062576294,
      "learning_rate": 0.00019378674895560973,
      "loss": 0.8573,
      "step": 3585
    },
    {
      "epoch": 1.0089938167509838,
      "grad_norm": 0.5995298624038696,
      "learning_rate": 0.00019375266121133896,
      "loss": 0.8787,
      "step": 3590
    },
    {
      "epoch": 1.0103991006183248,
      "grad_norm": 0.5080907940864563,
      "learning_rate": 0.00019371848322970249,
      "loss": 0.8603,
      "step": 3595
    },
    {
      "epoch": 1.0118043844856661,
      "grad_norm": 0.5254037976264954,
      "learning_rate": 0.00019368421504359676,
      "loss": 0.8648,
      "step": 3600
    },
    {
      "epoch": 1.0132096683530074,
      "grad_norm": 0.7366768717765808,
      "learning_rate": 0.00019364985668600515,
      "loss": 0.8591,
      "step": 3605
    },
    {
      "epoch": 1.0146149522203485,
      "grad_norm": 0.9682057499885559,
      "learning_rate": 0.00019361540818999765,
      "loss": 0.8659,
      "step": 3610
    },
    {
      "epoch": 1.0160202360876898,
      "grad_norm": 0.572347104549408,
      "learning_rate": 0.00019358086958873113,
      "loss": 0.8721,
      "step": 3615
    },
    {
      "epoch": 1.0174255199550308,
      "grad_norm": 0.5305222868919373,
      "learning_rate": 0.00019354624091544916,
      "loss": 0.884,
      "step": 3620
    },
    {
      "epoch": 1.0188308038223721,
      "grad_norm": 0.4776608347892761,
      "learning_rate": 0.00019351152220348198,
      "loss": 0.8733,
      "step": 3625
    },
    {
      "epoch": 1.0202360876897134,
      "grad_norm": 0.6774052977561951,
      "learning_rate": 0.0001934767134862465,
      "loss": 0.9416,
      "step": 3630
    },
    {
      "epoch": 1.0216413715570545,
      "grad_norm": 0.6616668701171875,
      "learning_rate": 0.00019344181479724628,
      "loss": 0.8712,
      "step": 3635
    },
    {
      "epoch": 1.0230466554243958,
      "grad_norm": 0.6705074310302734,
      "learning_rate": 0.00019340682617007148,
      "loss": 0.8677,
      "step": 3640
    },
    {
      "epoch": 1.0244519392917368,
      "grad_norm": 0.5559976100921631,
      "learning_rate": 0.0001933717476383988,
      "loss": 0.8658,
      "step": 3645
    },
    {
      "epoch": 1.0258572231590781,
      "grad_norm": 0.5060365796089172,
      "learning_rate": 0.00019333657923599148,
      "loss": 0.8655,
      "step": 3650
    },
    {
      "epoch": 1.0272625070264194,
      "grad_norm": 0.5650534629821777,
      "learning_rate": 0.0001933013209966993,
      "loss": 0.8694,
      "step": 3655
    },
    {
      "epoch": 1.0286677908937605,
      "grad_norm": 0.6062630414962769,
      "learning_rate": 0.00019326597295445848,
      "loss": 0.8927,
      "step": 3660
    },
    {
      "epoch": 1.0300730747611018,
      "grad_norm": 0.7026441693305969,
      "learning_rate": 0.00019323053514329162,
      "loss": 0.8869,
      "step": 3665
    },
    {
      "epoch": 1.0314783586284428,
      "grad_norm": 0.6207774877548218,
      "learning_rate": 0.0001931950075973078,
      "loss": 0.883,
      "step": 3670
    },
    {
      "epoch": 1.0328836424957841,
      "grad_norm": 0.5569882988929749,
      "learning_rate": 0.00019315939035070246,
      "loss": 0.8604,
      "step": 3675
    },
    {
      "epoch": 1.0342889263631254,
      "grad_norm": 0.8226413726806641,
      "learning_rate": 0.00019312368343775733,
      "loss": 0.8624,
      "step": 3680
    },
    {
      "epoch": 1.0356942102304665,
      "grad_norm": 0.7316590547561646,
      "learning_rate": 0.00019308788689284052,
      "loss": 0.8663,
      "step": 3685
    },
    {
      "epoch": 1.0370994940978078,
      "grad_norm": 0.7959001064300537,
      "learning_rate": 0.00019305200075040634,
      "loss": 0.8617,
      "step": 3690
    },
    {
      "epoch": 1.038504777965149,
      "grad_norm": 0.7762426733970642,
      "learning_rate": 0.0001930160250449954,
      "loss": 0.8708,
      "step": 3695
    },
    {
      "epoch": 1.0399100618324901,
      "grad_norm": 1.2157001495361328,
      "learning_rate": 0.00019297995981123442,
      "loss": 0.8887,
      "step": 3700
    },
    {
      "epoch": 1.0413153456998314,
      "grad_norm": 0.8485284447669983,
      "learning_rate": 0.00019294380508383643,
      "loss": 0.8734,
      "step": 3705
    },
    {
      "epoch": 1.0427206295671725,
      "grad_norm": 0.6756306886672974,
      "learning_rate": 0.00019290756089760045,
      "loss": 0.8539,
      "step": 3710
    },
    {
      "epoch": 1.0441259134345138,
      "grad_norm": 0.6776533126831055,
      "learning_rate": 0.00019287122728741171,
      "loss": 0.8867,
      "step": 3715
    },
    {
      "epoch": 1.045531197301855,
      "grad_norm": 1.2012450695037842,
      "learning_rate": 0.00019283480428824147,
      "loss": 0.8837,
      "step": 3720
    },
    {
      "epoch": 1.0469364811691961,
      "grad_norm": 0.7898838520050049,
      "learning_rate": 0.00019279829193514706,
      "loss": 0.871,
      "step": 3725
    },
    {
      "epoch": 1.0483417650365374,
      "grad_norm": 0.6537933349609375,
      "learning_rate": 0.0001927616902632717,
      "loss": 0.8774,
      "step": 3730
    },
    {
      "epoch": 1.0497470489038785,
      "grad_norm": 0.8855597376823425,
      "learning_rate": 0.00019272499930784477,
      "loss": 0.8722,
      "step": 3735
    },
    {
      "epoch": 1.0511523327712198,
      "grad_norm": 1.1047497987747192,
      "learning_rate": 0.00019268821910418146,
      "loss": 0.8883,
      "step": 3740
    },
    {
      "epoch": 1.052557616638561,
      "grad_norm": 0.6723616719245911,
      "learning_rate": 0.00019265134968768285,
      "loss": 0.8685,
      "step": 3745
    },
    {
      "epoch": 1.0539629005059021,
      "grad_norm": 0.5157325863838196,
      "learning_rate": 0.00019261439109383591,
      "loss": 0.8733,
      "step": 3750
    },
    {
      "epoch": 1.0553681843732434,
      "grad_norm": 0.5530291199684143,
      "learning_rate": 0.0001925773433582135,
      "loss": 0.868,
      "step": 3755
    },
    {
      "epoch": 1.0567734682405847,
      "grad_norm": 0.47156935930252075,
      "learning_rate": 0.00019254020651647427,
      "loss": 0.868,
      "step": 3760
    },
    {
      "epoch": 1.0581787521079258,
      "grad_norm": 0.9338856935501099,
      "learning_rate": 0.00019250298060436246,
      "loss": 0.8783,
      "step": 3765
    },
    {
      "epoch": 1.059584035975267,
      "grad_norm": 0.5520219206809998,
      "learning_rate": 0.00019246566565770835,
      "loss": 0.8671,
      "step": 3770
    },
    {
      "epoch": 1.0609893198426081,
      "grad_norm": 0.5825735926628113,
      "learning_rate": 0.0001924282617124276,
      "loss": 0.8721,
      "step": 3775
    },
    {
      "epoch": 1.0623946037099494,
      "grad_norm": 0.9456936120986938,
      "learning_rate": 0.0001923907688045218,
      "loss": 0.8805,
      "step": 3780
    },
    {
      "epoch": 1.0637998875772907,
      "grad_norm": 0.8601976037025452,
      "learning_rate": 0.00019235318697007796,
      "loss": 0.8667,
      "step": 3785
    },
    {
      "epoch": 1.0652051714446318,
      "grad_norm": 0.6083686351776123,
      "learning_rate": 0.00019231551624526881,
      "loss": 0.8591,
      "step": 3790
    },
    {
      "epoch": 1.066610455311973,
      "grad_norm": 0.5371261239051819,
      "learning_rate": 0.00019227775666635257,
      "loss": 0.8917,
      "step": 3795
    },
    {
      "epoch": 1.0680157391793141,
      "grad_norm": 0.481442391872406,
      "learning_rate": 0.00019223990826967304,
      "loss": 0.8697,
      "step": 3800
    },
    {
      "epoch": 1.0694210230466554,
      "grad_norm": 0.5719467401504517,
      "learning_rate": 0.00019220197109165942,
      "loss": 0.8707,
      "step": 3805
    },
    {
      "epoch": 1.0708263069139967,
      "grad_norm": 0.7797774076461792,
      "learning_rate": 0.0001921639451688265,
      "loss": 0.8836,
      "step": 3810
    },
    {
      "epoch": 1.0722315907813378,
      "grad_norm": 0.6479620337486267,
      "learning_rate": 0.00019212583053777432,
      "loss": 0.88,
      "step": 3815
    },
    {
      "epoch": 1.073636874648679,
      "grad_norm": 0.5058332085609436,
      "learning_rate": 0.00019208762723518845,
      "loss": 0.8698,
      "step": 3820
    },
    {
      "epoch": 1.0750421585160201,
      "grad_norm": 0.7176181077957153,
      "learning_rate": 0.00019204933529783972,
      "loss": 0.8777,
      "step": 3825
    },
    {
      "epoch": 1.0764474423833614,
      "grad_norm": 0.8829546570777893,
      "learning_rate": 0.0001920109547625843,
      "loss": 0.8804,
      "step": 3830
    },
    {
      "epoch": 1.0778527262507027,
      "grad_norm": 0.6037710905075073,
      "learning_rate": 0.00019197248566636362,
      "loss": 0.8713,
      "step": 3835
    },
    {
      "epoch": 1.0792580101180438,
      "grad_norm": 0.8079067468643188,
      "learning_rate": 0.00019193392804620434,
      "loss": 0.8765,
      "step": 3840
    },
    {
      "epoch": 1.080663293985385,
      "grad_norm": 0.5948657989501953,
      "learning_rate": 0.0001918952819392184,
      "loss": 0.8682,
      "step": 3845
    },
    {
      "epoch": 1.0820685778527261,
      "grad_norm": 0.5095741748809814,
      "learning_rate": 0.0001918565473826028,
      "loss": 0.8579,
      "step": 3850
    },
    {
      "epoch": 1.0834738617200674,
      "grad_norm": 0.6867838501930237,
      "learning_rate": 0.00019181772441363978,
      "loss": 0.8651,
      "step": 3855
    },
    {
      "epoch": 1.0848791455874087,
      "grad_norm": 0.7166353464126587,
      "learning_rate": 0.0001917788130696966,
      "loss": 0.8687,
      "step": 3860
    },
    {
      "epoch": 1.0862844294547498,
      "grad_norm": 0.8581190705299377,
      "learning_rate": 0.0001917398133882256,
      "loss": 0.8882,
      "step": 3865
    },
    {
      "epoch": 1.087689713322091,
      "grad_norm": 0.6934407353401184,
      "learning_rate": 0.00019170072540676417,
      "loss": 0.8818,
      "step": 3870
    },
    {
      "epoch": 1.0890949971894324,
      "grad_norm": 0.5701424479484558,
      "learning_rate": 0.00019166154916293464,
      "loss": 0.8772,
      "step": 3875
    },
    {
      "epoch": 1.0905002810567734,
      "grad_norm": 0.6658897995948792,
      "learning_rate": 0.00019162228469444433,
      "loss": 0.8657,
      "step": 3880
    },
    {
      "epoch": 1.0919055649241147,
      "grad_norm": 0.5013540983200073,
      "learning_rate": 0.00019158293203908551,
      "loss": 0.8829,
      "step": 3885
    },
    {
      "epoch": 1.0933108487914558,
      "grad_norm": 0.5879671573638916,
      "learning_rate": 0.00019154349123473528,
      "loss": 0.8775,
      "step": 3890
    },
    {
      "epoch": 1.094716132658797,
      "grad_norm": 0.580640971660614,
      "learning_rate": 0.0001915039623193556,
      "loss": 0.8547,
      "step": 3895
    },
    {
      "epoch": 1.0961214165261384,
      "grad_norm": 0.5695182085037231,
      "learning_rate": 0.00019146434533099318,
      "loss": 0.8644,
      "step": 3900
    },
    {
      "epoch": 1.0975267003934794,
      "grad_norm": 0.5622773766517639,
      "learning_rate": 0.00019142464030777958,
      "loss": 0.868,
      "step": 3905
    },
    {
      "epoch": 1.0989319842608207,
      "grad_norm": 0.5157364010810852,
      "learning_rate": 0.00019138484728793107,
      "loss": 0.8713,
      "step": 3910
    },
    {
      "epoch": 1.1003372681281618,
      "grad_norm": 0.6461440920829773,
      "learning_rate": 0.00019134496630974864,
      "loss": 0.8821,
      "step": 3915
    },
    {
      "epoch": 1.101742551995503,
      "grad_norm": 0.5542477965354919,
      "learning_rate": 0.0001913049974116179,
      "loss": 0.8773,
      "step": 3920
    },
    {
      "epoch": 1.1031478358628444,
      "grad_norm": 0.5190820097923279,
      "learning_rate": 0.00019126494063200907,
      "loss": 0.8856,
      "step": 3925
    },
    {
      "epoch": 1.1045531197301854,
      "grad_norm": 0.4922696053981781,
      "learning_rate": 0.00019122479600947699,
      "loss": 0.8911,
      "step": 3930
    },
    {
      "epoch": 1.1059584035975267,
      "grad_norm": 0.6155653595924377,
      "learning_rate": 0.00019118456358266107,
      "loss": 0.8843,
      "step": 3935
    },
    {
      "epoch": 1.107363687464868,
      "grad_norm": 0.8045044541358948,
      "learning_rate": 0.00019114424339028516,
      "loss": 0.8715,
      "step": 3940
    },
    {
      "epoch": 1.108768971332209,
      "grad_norm": 0.5465171933174133,
      "learning_rate": 0.0001911038354711577,
      "loss": 0.8755,
      "step": 3945
    },
    {
      "epoch": 1.1101742551995504,
      "grad_norm": 0.5287275314331055,
      "learning_rate": 0.00019106333986417142,
      "loss": 0.8802,
      "step": 3950
    },
    {
      "epoch": 1.1115795390668914,
      "grad_norm": 0.7569435834884644,
      "learning_rate": 0.0001910227566083036,
      "loss": 0.8553,
      "step": 3955
    },
    {
      "epoch": 1.1129848229342327,
      "grad_norm": 0.9262810945510864,
      "learning_rate": 0.00019098208574261575,
      "loss": 0.8663,
      "step": 3960
    },
    {
      "epoch": 1.114390106801574,
      "grad_norm": 1.0431913137435913,
      "learning_rate": 0.00019094132730625377,
      "loss": 0.9002,
      "step": 3965
    },
    {
      "epoch": 1.115795390668915,
      "grad_norm": 0.577294111251831,
      "learning_rate": 0.0001909004813384479,
      "loss": 0.8888,
      "step": 3970
    },
    {
      "epoch": 1.1172006745362564,
      "grad_norm": 0.6003448963165283,
      "learning_rate": 0.0001908595478785125,
      "loss": 0.8726,
      "step": 3975
    },
    {
      "epoch": 1.1186059584035974,
      "grad_norm": 0.5789997577667236,
      "learning_rate": 0.00019081852696584627,
      "loss": 0.8669,
      "step": 3980
    },
    {
      "epoch": 1.1200112422709387,
      "grad_norm": 0.5823659896850586,
      "learning_rate": 0.00019077741863993199,
      "loss": 0.8812,
      "step": 3985
    },
    {
      "epoch": 1.12141652613828,
      "grad_norm": 0.8449714183807373,
      "learning_rate": 0.00019073622294033663,
      "loss": 0.8714,
      "step": 3990
    },
    {
      "epoch": 1.122821810005621,
      "grad_norm": 1.3145679235458374,
      "learning_rate": 0.00019069493990671118,
      "loss": 0.9059,
      "step": 3995
    },
    {
      "epoch": 1.1242270938729624,
      "grad_norm": 0.8457211852073669,
      "learning_rate": 0.00019065356957879086,
      "loss": 0.8518,
      "step": 4000
    },
    {
      "epoch": 1.1256323777403034,
      "grad_norm": 1.5860358476638794,
      "learning_rate": 0.00019061211199639474,
      "loss": 0.8808,
      "step": 4005
    },
    {
      "epoch": 1.1270376616076447,
      "grad_norm": 0.7184464335441589,
      "learning_rate": 0.00019057056719942587,
      "loss": 0.8753,
      "step": 4010
    },
    {
      "epoch": 1.128442945474986,
      "grad_norm": 0.585971474647522,
      "learning_rate": 0.00019052893522787144,
      "loss": 0.8672,
      "step": 4015
    },
    {
      "epoch": 1.129848229342327,
      "grad_norm": 0.551020622253418,
      "learning_rate": 0.00019048721612180232,
      "loss": 0.8659,
      "step": 4020
    },
    {
      "epoch": 1.1312535132096684,
      "grad_norm": 0.5553367137908936,
      "learning_rate": 0.00019044540992137337,
      "loss": 0.8753,
      "step": 4025
    },
    {
      "epoch": 1.1326587970770094,
      "grad_norm": 0.5330313444137573,
      "learning_rate": 0.00019040351666682322,
      "loss": 0.8818,
      "step": 4030
    },
    {
      "epoch": 1.1340640809443507,
      "grad_norm": 1.2441614866256714,
      "learning_rate": 0.00019036153639847433,
      "loss": 0.8695,
      "step": 4035
    },
    {
      "epoch": 1.135469364811692,
      "grad_norm": 0.6410412192344666,
      "learning_rate": 0.00019031946915673293,
      "loss": 0.8793,
      "step": 4040
    },
    {
      "epoch": 1.136874648679033,
      "grad_norm": 0.4777545928955078,
      "learning_rate": 0.00019027731498208895,
      "loss": 0.8738,
      "step": 4045
    },
    {
      "epoch": 1.1382799325463744,
      "grad_norm": 0.5763461589813232,
      "learning_rate": 0.00019023507391511591,
      "loss": 0.8783,
      "step": 4050
    },
    {
      "epoch": 1.1396852164137155,
      "grad_norm": 0.5704853534698486,
      "learning_rate": 0.00019019274599647106,
      "loss": 0.8611,
      "step": 4055
    },
    {
      "epoch": 1.1410905002810567,
      "grad_norm": 0.4842630922794342,
      "learning_rate": 0.00019015033126689522,
      "loss": 0.8543,
      "step": 4060
    },
    {
      "epoch": 1.142495784148398,
      "grad_norm": 0.6521385312080383,
      "learning_rate": 0.00019010782976721277,
      "loss": 0.8695,
      "step": 4065
    },
    {
      "epoch": 1.143901068015739,
      "grad_norm": 0.7000013589859009,
      "learning_rate": 0.00019006524153833158,
      "loss": 0.8659,
      "step": 4070
    },
    {
      "epoch": 1.1453063518830804,
      "grad_norm": 0.6031846404075623,
      "learning_rate": 0.000190022566621243,
      "loss": 0.8598,
      "step": 4075
    },
    {
      "epoch": 1.1467116357504217,
      "grad_norm": 0.5788977742195129,
      "learning_rate": 0.0001899798050570219,
      "loss": 0.8682,
      "step": 4080
    },
    {
      "epoch": 1.1481169196177627,
      "grad_norm": 0.6468327641487122,
      "learning_rate": 0.00018993695688682643,
      "loss": 0.874,
      "step": 4085
    },
    {
      "epoch": 1.149522203485104,
      "grad_norm": 0.5688002109527588,
      "learning_rate": 0.00018989402215189812,
      "loss": 0.8514,
      "step": 4090
    },
    {
      "epoch": 1.150927487352445,
      "grad_norm": 0.6317186951637268,
      "learning_rate": 0.00018985100089356194,
      "loss": 0.8683,
      "step": 4095
    },
    {
      "epoch": 1.1523327712197864,
      "grad_norm": 0.8182606101036072,
      "learning_rate": 0.00018980789315322595,
      "loss": 0.8882,
      "step": 4100
    },
    {
      "epoch": 1.1537380550871277,
      "grad_norm": 0.615042507648468,
      "learning_rate": 0.00018976469897238158,
      "loss": 0.88,
      "step": 4105
    },
    {
      "epoch": 1.1551433389544687,
      "grad_norm": 0.5782546401023865,
      "learning_rate": 0.00018972141839260348,
      "loss": 0.8685,
      "step": 4110
    },
    {
      "epoch": 1.15654862282181,
      "grad_norm": 1.0364948511123657,
      "learning_rate": 0.00018967805145554936,
      "loss": 0.8658,
      "step": 4115
    },
    {
      "epoch": 1.1579539066891513,
      "grad_norm": 0.7199181914329529,
      "learning_rate": 0.0001896345982029601,
      "loss": 0.8865,
      "step": 4120
    },
    {
      "epoch": 1.1593591905564924,
      "grad_norm": 0.8875741958618164,
      "learning_rate": 0.0001895910586766596,
      "loss": 0.878,
      "step": 4125
    },
    {
      "epoch": 1.1607644744238337,
      "grad_norm": 0.6755944490432739,
      "learning_rate": 0.00018954743291855496,
      "loss": 0.8666,
      "step": 4130
    },
    {
      "epoch": 1.1621697582911747,
      "grad_norm": 0.516245424747467,
      "learning_rate": 0.0001895037209706361,
      "loss": 0.8648,
      "step": 4135
    },
    {
      "epoch": 1.163575042158516,
      "grad_norm": 0.5283127427101135,
      "learning_rate": 0.00018945992287497601,
      "loss": 0.8684,
      "step": 4140
    },
    {
      "epoch": 1.1649803260258573,
      "grad_norm": 0.8151612281799316,
      "learning_rate": 0.00018941603867373054,
      "loss": 0.8662,
      "step": 4145
    },
    {
      "epoch": 1.1663856098931984,
      "grad_norm": 0.8823288083076477,
      "learning_rate": 0.00018937206840913842,
      "loss": 0.8853,
      "step": 4150
    },
    {
      "epoch": 1.1677908937605397,
      "grad_norm": 0.6180223822593689,
      "learning_rate": 0.00018932801212352124,
      "loss": 0.874,
      "step": 4155
    },
    {
      "epoch": 1.1691961776278808,
      "grad_norm": 0.531110942363739,
      "learning_rate": 0.00018928386985928337,
      "loss": 0.8658,
      "step": 4160
    },
    {
      "epoch": 1.170601461495222,
      "grad_norm": 0.4815613925457001,
      "learning_rate": 0.00018923964165891197,
      "loss": 0.8654,
      "step": 4165
    },
    {
      "epoch": 1.1720067453625633,
      "grad_norm": 0.5188429951667786,
      "learning_rate": 0.00018919532756497687,
      "loss": 0.8691,
      "step": 4170
    },
    {
      "epoch": 1.1734120292299044,
      "grad_norm": 0.5245307087898254,
      "learning_rate": 0.00018915092762013055,
      "loss": 0.8914,
      "step": 4175
    },
    {
      "epoch": 1.1748173130972457,
      "grad_norm": 0.6985578536987305,
      "learning_rate": 0.00018910644186710825,
      "loss": 0.8538,
      "step": 4180
    },
    {
      "epoch": 1.1762225969645868,
      "grad_norm": 0.8950197100639343,
      "learning_rate": 0.00018906187034872763,
      "loss": 0.8551,
      "step": 4185
    },
    {
      "epoch": 1.177627880831928,
      "grad_norm": 0.5236636996269226,
      "learning_rate": 0.00018901721310788898,
      "loss": 0.8947,
      "step": 4190
    },
    {
      "epoch": 1.1790331646992693,
      "grad_norm": 0.5204771161079407,
      "learning_rate": 0.00018897247018757516,
      "loss": 0.8729,
      "step": 4195
    },
    {
      "epoch": 1.1804384485666104,
      "grad_norm": 0.47652414441108704,
      "learning_rate": 0.0001889276416308514,
      "loss": 0.8644,
      "step": 4200
    },
    {
      "epoch": 1.1818437324339517,
      "grad_norm": 0.5452648401260376,
      "learning_rate": 0.00018888272748086537,
      "loss": 0.87,
      "step": 4205
    },
    {
      "epoch": 1.1832490163012928,
      "grad_norm": 0.7756819725036621,
      "learning_rate": 0.0001888377277808472,
      "loss": 0.8555,
      "step": 4210
    },
    {
      "epoch": 1.184654300168634,
      "grad_norm": 0.5953937768936157,
      "learning_rate": 0.00018879264257410926,
      "loss": 0.8778,
      "step": 4215
    },
    {
      "epoch": 1.1860595840359753,
      "grad_norm": 0.6475582718849182,
      "learning_rate": 0.00018874747190404624,
      "loss": 0.8938,
      "step": 4220
    },
    {
      "epoch": 1.1874648679033164,
      "grad_norm": 0.5985028743743896,
      "learning_rate": 0.0001887022158141352,
      "loss": 0.8741,
      "step": 4225
    },
    {
      "epoch": 1.1888701517706577,
      "grad_norm": 0.5418311953544617,
      "learning_rate": 0.0001886568743479353,
      "loss": 0.8513,
      "step": 4230
    },
    {
      "epoch": 1.1902754356379988,
      "grad_norm": 0.5689842104911804,
      "learning_rate": 0.0001886114475490879,
      "loss": 0.8737,
      "step": 4235
    },
    {
      "epoch": 1.19168071950534,
      "grad_norm": 0.5415789484977722,
      "learning_rate": 0.00018856593546131648,
      "loss": 0.8721,
      "step": 4240
    },
    {
      "epoch": 1.1930860033726813,
      "grad_norm": 0.6934672594070435,
      "learning_rate": 0.0001885203381284267,
      "loss": 0.85,
      "step": 4245
    },
    {
      "epoch": 1.1944912872400224,
      "grad_norm": 0.7670535445213318,
      "learning_rate": 0.00018847465559430614,
      "loss": 0.8853,
      "step": 4250
    },
    {
      "epoch": 1.1958965711073637,
      "grad_norm": 0.5853431224822998,
      "learning_rate": 0.00018842888790292442,
      "loss": 0.8842,
      "step": 4255
    },
    {
      "epoch": 1.197301854974705,
      "grad_norm": 0.5499680638313293,
      "learning_rate": 0.00018838303509833323,
      "loss": 0.8941,
      "step": 4260
    },
    {
      "epoch": 1.198707138842046,
      "grad_norm": 0.5514938235282898,
      "learning_rate": 0.00018833709722466607,
      "loss": 0.8641,
      "step": 4265
    },
    {
      "epoch": 1.2001124227093873,
      "grad_norm": 0.560611367225647,
      "learning_rate": 0.0001882910743261384,
      "loss": 0.872,
      "step": 4270
    },
    {
      "epoch": 1.2015177065767284,
      "grad_norm": 0.601483166217804,
      "learning_rate": 0.00018824496644704737,
      "loss": 0.8838,
      "step": 4275
    },
    {
      "epoch": 1.2029229904440697,
      "grad_norm": 0.46491849422454834,
      "learning_rate": 0.00018819877363177213,
      "loss": 0.8916,
      "step": 4280
    },
    {
      "epoch": 1.204328274311411,
      "grad_norm": 0.588778018951416,
      "learning_rate": 0.00018815249592477338,
      "loss": 0.864,
      "step": 4285
    },
    {
      "epoch": 1.205733558178752,
      "grad_norm": 0.5212571620941162,
      "learning_rate": 0.0001881061333705937,
      "loss": 0.8707,
      "step": 4290
    },
    {
      "epoch": 1.2071388420460933,
      "grad_norm": 0.9074152112007141,
      "learning_rate": 0.00018805968601385724,
      "loss": 0.8632,
      "step": 4295
    },
    {
      "epoch": 1.2085441259134346,
      "grad_norm": 0.5140102505683899,
      "learning_rate": 0.0001880131538992698,
      "loss": 0.8635,
      "step": 4300
    },
    {
      "epoch": 1.2099494097807757,
      "grad_norm": 0.7265368103981018,
      "learning_rate": 0.0001879665370716187,
      "loss": 0.8657,
      "step": 4305
    },
    {
      "epoch": 1.211354693648117,
      "grad_norm": 0.5487323999404907,
      "learning_rate": 0.00018791983557577292,
      "loss": 0.8694,
      "step": 4310
    },
    {
      "epoch": 1.212759977515458,
      "grad_norm": 0.5688353180885315,
      "learning_rate": 0.00018787304945668283,
      "loss": 0.8744,
      "step": 4315
    },
    {
      "epoch": 1.2141652613827993,
      "grad_norm": 0.6270791888237,
      "learning_rate": 0.00018782617875938028,
      "loss": 0.8752,
      "step": 4320
    },
    {
      "epoch": 1.2155705452501406,
      "grad_norm": 0.5649317502975464,
      "learning_rate": 0.00018777922352897854,
      "loss": 0.8689,
      "step": 4325
    },
    {
      "epoch": 1.2169758291174817,
      "grad_norm": 0.4823704957962036,
      "learning_rate": 0.00018773218381067225,
      "loss": 0.8728,
      "step": 4330
    },
    {
      "epoch": 1.218381112984823,
      "grad_norm": 0.5369855761528015,
      "learning_rate": 0.00018768505964973731,
      "loss": 0.8642,
      "step": 4335
    },
    {
      "epoch": 1.219786396852164,
      "grad_norm": 0.684241533279419,
      "learning_rate": 0.000187637851091531,
      "loss": 0.8687,
      "step": 4340
    },
    {
      "epoch": 1.2211916807195053,
      "grad_norm": 0.664493978023529,
      "learning_rate": 0.0001875905581814917,
      "loss": 0.8733,
      "step": 4345
    },
    {
      "epoch": 1.2225969645868466,
      "grad_norm": 0.6049327254295349,
      "learning_rate": 0.00018754318096513917,
      "loss": 0.8762,
      "step": 4350
    },
    {
      "epoch": 1.2240022484541877,
      "grad_norm": 0.6041503548622131,
      "learning_rate": 0.00018749571948807405,
      "loss": 0.8768,
      "step": 4355
    },
    {
      "epoch": 1.225407532321529,
      "grad_norm": 0.528701901435852,
      "learning_rate": 0.00018744817379597834,
      "loss": 0.8695,
      "step": 4360
    },
    {
      "epoch": 1.22681281618887,
      "grad_norm": 0.8402326703071594,
      "learning_rate": 0.00018740054393461493,
      "loss": 0.8739,
      "step": 4365
    },
    {
      "epoch": 1.2282181000562113,
      "grad_norm": 0.5691919922828674,
      "learning_rate": 0.00018735282994982778,
      "loss": 0.8692,
      "step": 4370
    },
    {
      "epoch": 1.2296233839235526,
      "grad_norm": 0.4437329173088074,
      "learning_rate": 0.00018730503188754187,
      "loss": 0.8728,
      "step": 4375
    },
    {
      "epoch": 1.2310286677908937,
      "grad_norm": 0.778617262840271,
      "learning_rate": 0.000187257149793763,
      "loss": 0.8746,
      "step": 4380
    },
    {
      "epoch": 1.232433951658235,
      "grad_norm": 0.5112653970718384,
      "learning_rate": 0.00018720918371457792,
      "loss": 0.869,
      "step": 4385
    },
    {
      "epoch": 1.233839235525576,
      "grad_norm": 0.5120835900306702,
      "learning_rate": 0.00018716113369615425,
      "loss": 0.8802,
      "step": 4390
    },
    {
      "epoch": 1.2352445193929174,
      "grad_norm": 0.6164501309394836,
      "learning_rate": 0.00018711299978474023,
      "loss": 0.877,
      "step": 4395
    },
    {
      "epoch": 1.2366498032602586,
      "grad_norm": 0.7080835103988647,
      "learning_rate": 0.0001870647820266651,
      "loss": 0.8633,
      "step": 4400
    },
    {
      "epoch": 1.2380550871275997,
      "grad_norm": 0.5996346473693848,
      "learning_rate": 0.00018701648046833862,
      "loss": 0.8751,
      "step": 4405
    },
    {
      "epoch": 1.239460370994941,
      "grad_norm": 0.5956994891166687,
      "learning_rate": 0.00018696809515625126,
      "loss": 0.944,
      "step": 4410
    },
    {
      "epoch": 1.240865654862282,
      "grad_norm": 0.6986256837844849,
      "learning_rate": 0.0001869196261369741,
      "loss": 0.868,
      "step": 4415
    },
    {
      "epoch": 1.2422709387296234,
      "grad_norm": 0.7284836173057556,
      "learning_rate": 0.0001868710734571588,
      "loss": 0.8701,
      "step": 4420
    },
    {
      "epoch": 1.2436762225969646,
      "grad_norm": 0.5841744542121887,
      "learning_rate": 0.00018682243716353753,
      "loss": 0.8731,
      "step": 4425
    },
    {
      "epoch": 1.2450815064643057,
      "grad_norm": 0.5694195032119751,
      "learning_rate": 0.00018677371730292297,
      "loss": 0.8674,
      "step": 4430
    },
    {
      "epoch": 1.246486790331647,
      "grad_norm": 0.6393140554428101,
      "learning_rate": 0.00018672491392220816,
      "loss": 0.8758,
      "step": 4435
    },
    {
      "epoch": 1.2478920741989883,
      "grad_norm": 1.008575201034546,
      "learning_rate": 0.00018667602706836663,
      "loss": 0.8647,
      "step": 4440
    },
    {
      "epoch": 1.2492973580663294,
      "grad_norm": 0.8124025464057922,
      "learning_rate": 0.00018662705678845217,
      "loss": 0.887,
      "step": 4445
    },
    {
      "epoch": 1.2507026419336706,
      "grad_norm": 0.8950420618057251,
      "learning_rate": 0.0001865780031295989,
      "loss": 0.8758,
      "step": 4450
    },
    {
      "epoch": 1.252107925801012,
      "grad_norm": 0.533831000328064,
      "learning_rate": 0.0001865288661390212,
      "loss": 0.8801,
      "step": 4455
    },
    {
      "epoch": 1.253513209668353,
      "grad_norm": 0.5837879776954651,
      "learning_rate": 0.00018647964586401367,
      "loss": 0.8838,
      "step": 4460
    },
    {
      "epoch": 1.2549184935356943,
      "grad_norm": 0.5391103029251099,
      "learning_rate": 0.00018643034235195103,
      "loss": 0.8636,
      "step": 4465
    },
    {
      "epoch": 1.2563237774030354,
      "grad_norm": 0.7856371402740479,
      "learning_rate": 0.0001863809556502881,
      "loss": 0.857,
      "step": 4470
    },
    {
      "epoch": 1.2577290612703766,
      "grad_norm": 0.6522362232208252,
      "learning_rate": 0.00018633148580655986,
      "loss": 0.8684,
      "step": 4475
    },
    {
      "epoch": 1.259134345137718,
      "grad_norm": 0.5931859612464905,
      "learning_rate": 0.00018628193286838123,
      "loss": 0.9331,
      "step": 4480
    },
    {
      "epoch": 1.260539629005059,
      "grad_norm": 0.8700698614120483,
      "learning_rate": 0.00018623229688344715,
      "loss": 0.8859,
      "step": 4485
    },
    {
      "epoch": 1.2619449128724003,
      "grad_norm": 1.0133111476898193,
      "learning_rate": 0.0001861825778995325,
      "loss": 0.878,
      "step": 4490
    },
    {
      "epoch": 1.2633501967397414,
      "grad_norm": 0.7472015619277954,
      "learning_rate": 0.00018613277596449197,
      "loss": 0.866,
      "step": 4495
    },
    {
      "epoch": 1.2647554806070826,
      "grad_norm": 0.7217004895210266,
      "learning_rate": 0.00018608289112626025,
      "loss": 0.8696,
      "step": 4500
    },
    {
      "epoch": 1.266160764474424,
      "grad_norm": 0.5697894096374512,
      "learning_rate": 0.00018603292343285163,
      "loss": 0.8879,
      "step": 4505
    },
    {
      "epoch": 1.267566048341765,
      "grad_norm": 0.5211483836174011,
      "learning_rate": 0.00018598287293236028,
      "loss": 0.8811,
      "step": 4510
    },
    {
      "epoch": 1.2689713322091063,
      "grad_norm": 0.5396711230278015,
      "learning_rate": 0.00018593273967296004,
      "loss": 0.8725,
      "step": 4515
    },
    {
      "epoch": 1.2703766160764474,
      "grad_norm": 0.46194639801979065,
      "learning_rate": 0.00018588252370290443,
      "loss": 0.8657,
      "step": 4520
    },
    {
      "epoch": 1.2717818999437887,
      "grad_norm": 0.6557855606079102,
      "learning_rate": 0.00018583222507052649,
      "loss": 0.8573,
      "step": 4525
    },
    {
      "epoch": 1.27318718381113,
      "grad_norm": 0.5345513820648193,
      "learning_rate": 0.00018578184382423893,
      "loss": 0.8807,
      "step": 4530
    },
    {
      "epoch": 1.274592467678471,
      "grad_norm": 0.5782117247581482,
      "learning_rate": 0.0001857313800125339,
      "loss": 0.8643,
      "step": 4535
    },
    {
      "epoch": 1.2759977515458123,
      "grad_norm": 0.7175543904304504,
      "learning_rate": 0.0001856808336839831,
      "loss": 0.8682,
      "step": 4540
    },
    {
      "epoch": 1.2774030354131534,
      "grad_norm": 0.518791675567627,
      "learning_rate": 0.00018563020488723752,
      "loss": 0.9229,
      "step": 4545
    },
    {
      "epoch": 1.2788083192804947,
      "grad_norm": 0.7908794283866882,
      "learning_rate": 0.0001855794936710277,
      "loss": 0.869,
      "step": 4550
    },
    {
      "epoch": 1.280213603147836,
      "grad_norm": 1.0072758197784424,
      "learning_rate": 0.00018552870008416335,
      "loss": 0.8675,
      "step": 4555
    },
    {
      "epoch": 1.281618887015177,
      "grad_norm": 0.505922794342041,
      "learning_rate": 0.00018547782417553354,
      "loss": 0.8683,
      "step": 4560
    },
    {
      "epoch": 1.2830241708825183,
      "grad_norm": 0.8561394810676575,
      "learning_rate": 0.00018542686599410662,
      "loss": 0.8777,
      "step": 4565
    },
    {
      "epoch": 1.2844294547498594,
      "grad_norm": 0.6131189465522766,
      "learning_rate": 0.00018537582558892998,
      "loss": 0.8396,
      "step": 4570
    },
    {
      "epoch": 1.2858347386172007,
      "grad_norm": 0.5617784857749939,
      "learning_rate": 0.00018532470300913035,
      "loss": 0.8692,
      "step": 4575
    },
    {
      "epoch": 1.287240022484542,
      "grad_norm": 0.5197677612304688,
      "learning_rate": 0.00018527349830391336,
      "loss": 0.8841,
      "step": 4580
    },
    {
      "epoch": 1.288645306351883,
      "grad_norm": 0.572418212890625,
      "learning_rate": 0.00018522221152256378,
      "loss": 0.8794,
      "step": 4585
    },
    {
      "epoch": 1.2900505902192243,
      "grad_norm": 0.5657436847686768,
      "learning_rate": 0.00018517084271444544,
      "loss": 0.9167,
      "step": 4590
    },
    {
      "epoch": 1.2914558740865654,
      "grad_norm": 0.8176900744438171,
      "learning_rate": 0.00018511939192900097,
      "loss": 0.8716,
      "step": 4595
    },
    {
      "epoch": 1.2928611579539067,
      "grad_norm": 0.7152751088142395,
      "learning_rate": 0.0001850678592157521,
      "loss": 0.8831,
      "step": 4600
    },
    {
      "epoch": 1.294266441821248,
      "grad_norm": 0.7368189096450806,
      "learning_rate": 0.00018501624462429918,
      "loss": 0.8747,
      "step": 4605
    },
    {
      "epoch": 1.295671725688589,
      "grad_norm": 0.6734061241149902,
      "learning_rate": 0.00018496454820432154,
      "loss": 0.8758,
      "step": 4610
    },
    {
      "epoch": 1.2970770095559303,
      "grad_norm": 1.670920968055725,
      "learning_rate": 0.00018491277000557722,
      "loss": 0.9219,
      "step": 4615
    },
    {
      "epoch": 1.2984822934232714,
      "grad_norm": 0.8343753814697266,
      "learning_rate": 0.00018486091007790297,
      "loss": 0.8635,
      "step": 4620
    },
    {
      "epoch": 1.2998875772906127,
      "grad_norm": 0.5270551443099976,
      "learning_rate": 0.00018480896847121426,
      "loss": 0.8667,
      "step": 4625
    },
    {
      "epoch": 1.301292861157954,
      "grad_norm": 0.9477784633636475,
      "learning_rate": 0.00018475694523550505,
      "loss": 0.8679,
      "step": 4630
    },
    {
      "epoch": 1.3026981450252952,
      "grad_norm": 0.4685782194137573,
      "learning_rate": 0.00018470484042084796,
      "loss": 0.8784,
      "step": 4635
    },
    {
      "epoch": 1.3041034288926363,
      "grad_norm": 0.7531285881996155,
      "learning_rate": 0.00018465265407739413,
      "loss": 0.8775,
      "step": 4640
    },
    {
      "epoch": 1.3055087127599776,
      "grad_norm": 0.6527368426322937,
      "learning_rate": 0.00018460038625537313,
      "loss": 0.9371,
      "step": 4645
    },
    {
      "epoch": 1.3069139966273187,
      "grad_norm": 0.5861037373542786,
      "learning_rate": 0.000184548037005093,
      "loss": 0.8715,
      "step": 4650
    },
    {
      "epoch": 1.30831928049466,
      "grad_norm": 0.6117671728134155,
      "learning_rate": 0.00018449560637694013,
      "loss": 0.8816,
      "step": 4655
    },
    {
      "epoch": 1.3097245643620012,
      "grad_norm": 0.5061159729957581,
      "learning_rate": 0.00018444309442137923,
      "loss": 0.8773,
      "step": 4660
    },
    {
      "epoch": 1.3111298482293423,
      "grad_norm": 0.4746789038181305,
      "learning_rate": 0.00018439050118895334,
      "loss": 0.8711,
      "step": 4665
    },
    {
      "epoch": 1.3125351320966836,
      "grad_norm": 0.5956834554672241,
      "learning_rate": 0.00018433782673028362,
      "loss": 0.8697,
      "step": 4670
    },
    {
      "epoch": 1.3139404159640247,
      "grad_norm": 0.5160255432128906,
      "learning_rate": 0.0001842850710960695,
      "loss": 0.8804,
      "step": 4675
    },
    {
      "epoch": 1.315345699831366,
      "grad_norm": 0.5058934688568115,
      "learning_rate": 0.00018423223433708857,
      "loss": 0.8785,
      "step": 4680
    },
    {
      "epoch": 1.3167509836987072,
      "grad_norm": 0.735233724117279,
      "learning_rate": 0.00018417931650419639,
      "loss": 0.8582,
      "step": 4685
    },
    {
      "epoch": 1.3181562675660483,
      "grad_norm": 0.6757941246032715,
      "learning_rate": 0.00018412631764832662,
      "loss": 0.8829,
      "step": 4690
    },
    {
      "epoch": 1.3195615514333896,
      "grad_norm": 0.808224081993103,
      "learning_rate": 0.00018407323782049093,
      "loss": 0.8721,
      "step": 4695
    },
    {
      "epoch": 1.3209668353007307,
      "grad_norm": 0.69460529088974,
      "learning_rate": 0.0001840200770717789,
      "loss": 0.8762,
      "step": 4700
    },
    {
      "epoch": 1.322372119168072,
      "grad_norm": 0.576110303401947,
      "learning_rate": 0.00018396683545335798,
      "loss": 0.8686,
      "step": 4705
    },
    {
      "epoch": 1.3237774030354132,
      "grad_norm": 0.69572913646698,
      "learning_rate": 0.00018391351301647344,
      "loss": 0.8768,
      "step": 4710
    },
    {
      "epoch": 1.3251826869027543,
      "grad_norm": 0.6554480195045471,
      "learning_rate": 0.00018386010981244843,
      "loss": 0.8863,
      "step": 4715
    },
    {
      "epoch": 1.3265879707700956,
      "grad_norm": 0.6453247666358948,
      "learning_rate": 0.00018380662589268377,
      "loss": 0.8602,
      "step": 4720
    },
    {
      "epoch": 1.3279932546374367,
      "grad_norm": 0.6823776364326477,
      "learning_rate": 0.00018375306130865793,
      "loss": 0.8771,
      "step": 4725
    },
    {
      "epoch": 1.329398538504778,
      "grad_norm": 0.8525605797767639,
      "learning_rate": 0.00018369941611192712,
      "loss": 0.8788,
      "step": 4730
    },
    {
      "epoch": 1.3308038223721192,
      "grad_norm": 0.9090725183486938,
      "learning_rate": 0.00018364569035412502,
      "loss": 0.9202,
      "step": 4735
    },
    {
      "epoch": 1.3322091062394603,
      "grad_norm": 0.6051588654518127,
      "learning_rate": 0.000183591884086963,
      "loss": 0.8619,
      "step": 4740
    },
    {
      "epoch": 1.3336143901068016,
      "grad_norm": 0.4923397898674011,
      "learning_rate": 0.00018353799736222975,
      "loss": 0.8691,
      "step": 4745
    },
    {
      "epoch": 1.3350196739741427,
      "grad_norm": 0.5829062461853027,
      "learning_rate": 0.0001834840302317916,
      "loss": 0.8534,
      "step": 4750
    },
    {
      "epoch": 1.336424957841484,
      "grad_norm": 0.5169609785079956,
      "learning_rate": 0.00018342998274759208,
      "loss": 0.8565,
      "step": 4755
    },
    {
      "epoch": 1.3378302417088253,
      "grad_norm": 0.5367736220359802,
      "learning_rate": 0.00018337585496165215,
      "loss": 0.9078,
      "step": 4760
    },
    {
      "epoch": 1.3392355255761663,
      "grad_norm": 0.5715290904045105,
      "learning_rate": 0.00018332164692607008,
      "loss": 0.8706,
      "step": 4765
    },
    {
      "epoch": 1.3406408094435076,
      "grad_norm": 0.5462102293968201,
      "learning_rate": 0.0001832673586930213,
      "loss": 0.8705,
      "step": 4770
    },
    {
      "epoch": 1.3420460933108487,
      "grad_norm": 0.5522609353065491,
      "learning_rate": 0.00018321299031475854,
      "loss": 0.867,
      "step": 4775
    },
    {
      "epoch": 1.34345137717819,
      "grad_norm": 0.5512838363647461,
      "learning_rate": 0.00018315854184361156,
      "loss": 0.8865,
      "step": 4780
    },
    {
      "epoch": 1.3448566610455313,
      "grad_norm": 0.6370877623558044,
      "learning_rate": 0.00018310401333198733,
      "loss": 0.8747,
      "step": 4785
    },
    {
      "epoch": 1.3462619449128723,
      "grad_norm": 0.47698330879211426,
      "learning_rate": 0.00018304940483236974,
      "loss": 0.8586,
      "step": 4790
    },
    {
      "epoch": 1.3476672287802136,
      "grad_norm": 0.49109163880348206,
      "learning_rate": 0.00018299471639731977,
      "loss": 0.8797,
      "step": 4795
    },
    {
      "epoch": 1.3490725126475547,
      "grad_norm": 0.5076658129692078,
      "learning_rate": 0.00018293994807947522,
      "loss": 0.8777,
      "step": 4800
    },
    {
      "epoch": 1.350477796514896,
      "grad_norm": 0.5216749310493469,
      "learning_rate": 0.00018288509993155086,
      "loss": 0.8673,
      "step": 4805
    },
    {
      "epoch": 1.3518830803822373,
      "grad_norm": 0.5092537999153137,
      "learning_rate": 0.00018283017200633833,
      "loss": 0.8536,
      "step": 4810
    },
    {
      "epoch": 1.3532883642495785,
      "grad_norm": 0.6097123622894287,
      "learning_rate": 0.000182775164356706,
      "loss": 0.8653,
      "step": 4815
    },
    {
      "epoch": 1.3546936481169196,
      "grad_norm": 0.6337655186653137,
      "learning_rate": 0.00018272007703559894,
      "loss": 0.8663,
      "step": 4820
    },
    {
      "epoch": 1.356098931984261,
      "grad_norm": 0.6192864775657654,
      "learning_rate": 0.000182664910096039,
      "loss": 0.8686,
      "step": 4825
    },
    {
      "epoch": 1.357504215851602,
      "grad_norm": 0.46798455715179443,
      "learning_rate": 0.0001826096635911246,
      "loss": 0.8611,
      "step": 4830
    },
    {
      "epoch": 1.3589094997189433,
      "grad_norm": 0.5440805554389954,
      "learning_rate": 0.00018255433757403071,
      "loss": 0.8682,
      "step": 4835
    },
    {
      "epoch": 1.3603147835862845,
      "grad_norm": 0.5112454295158386,
      "learning_rate": 0.00018249893209800892,
      "loss": 0.886,
      "step": 4840
    },
    {
      "epoch": 1.3617200674536256,
      "grad_norm": 1.0984801054000854,
      "learning_rate": 0.00018244344721638726,
      "loss": 0.8734,
      "step": 4845
    },
    {
      "epoch": 1.363125351320967,
      "grad_norm": 0.5598686933517456,
      "learning_rate": 0.00018238788298257014,
      "loss": 0.8729,
      "step": 4850
    },
    {
      "epoch": 1.364530635188308,
      "grad_norm": 0.7339655160903931,
      "learning_rate": 0.00018233223945003844,
      "loss": 0.8681,
      "step": 4855
    },
    {
      "epoch": 1.3659359190556493,
      "grad_norm": 0.6859006881713867,
      "learning_rate": 0.0001822765166723493,
      "loss": 0.8687,
      "step": 4860
    },
    {
      "epoch": 1.3673412029229906,
      "grad_norm": 0.5756944417953491,
      "learning_rate": 0.0001822207147031361,
      "loss": 0.8754,
      "step": 4865
    },
    {
      "epoch": 1.3687464867903316,
      "grad_norm": 0.45417851209640503,
      "learning_rate": 0.00018216483359610855,
      "loss": 0.8683,
      "step": 4870
    },
    {
      "epoch": 1.370151770657673,
      "grad_norm": 0.45869648456573486,
      "learning_rate": 0.00018210887340505244,
      "loss": 0.883,
      "step": 4875
    },
    {
      "epoch": 1.371557054525014,
      "grad_norm": 0.5835374593734741,
      "learning_rate": 0.00018205283418382972,
      "loss": 0.8745,
      "step": 4880
    },
    {
      "epoch": 1.3729623383923553,
      "grad_norm": 0.5375783443450928,
      "learning_rate": 0.00018199671598637842,
      "loss": 0.8668,
      "step": 4885
    },
    {
      "epoch": 1.3743676222596966,
      "grad_norm": 0.5502672791481018,
      "learning_rate": 0.00018194051886671252,
      "loss": 0.8679,
      "step": 4890
    },
    {
      "epoch": 1.3757729061270376,
      "grad_norm": 0.9193586111068726,
      "learning_rate": 0.00018188424287892202,
      "loss": 0.8848,
      "step": 4895
    },
    {
      "epoch": 1.377178189994379,
      "grad_norm": 0.5418441891670227,
      "learning_rate": 0.00018182788807717285,
      "loss": 0.8614,
      "step": 4900
    },
    {
      "epoch": 1.37858347386172,
      "grad_norm": 0.6423871517181396,
      "learning_rate": 0.0001817714545157067,
      "loss": 0.9258,
      "step": 4905
    },
    {
      "epoch": 1.3799887577290613,
      "grad_norm": 0.9485771059989929,
      "learning_rate": 0.0001817149422488412,
      "loss": 0.8672,
      "step": 4910
    },
    {
      "epoch": 1.3813940415964026,
      "grad_norm": 0.49273064732551575,
      "learning_rate": 0.00018165835133096962,
      "loss": 0.861,
      "step": 4915
    },
    {
      "epoch": 1.3827993254637436,
      "grad_norm": 0.7470026016235352,
      "learning_rate": 0.00018160168181656099,
      "loss": 0.8763,
      "step": 4920
    },
    {
      "epoch": 1.384204609331085,
      "grad_norm": 0.5558050870895386,
      "learning_rate": 0.00018154493376015997,
      "loss": 0.8754,
      "step": 4925
    },
    {
      "epoch": 1.385609893198426,
      "grad_norm": 0.7394638657569885,
      "learning_rate": 0.00018148810721638686,
      "loss": 0.8764,
      "step": 4930
    },
    {
      "epoch": 1.3870151770657673,
      "grad_norm": 0.6907084584236145,
      "learning_rate": 0.0001814312022399374,
      "loss": 0.886,
      "step": 4935
    },
    {
      "epoch": 1.3884204609331086,
      "grad_norm": 0.6553033590316772,
      "learning_rate": 0.00018137421888558296,
      "loss": 0.8617,
      "step": 4940
    },
    {
      "epoch": 1.3898257448004496,
      "grad_norm": 0.5968340635299683,
      "learning_rate": 0.00018131715720817024,
      "loss": 0.8673,
      "step": 4945
    },
    {
      "epoch": 1.391231028667791,
      "grad_norm": 0.5546401739120483,
      "learning_rate": 0.00018126001726262135,
      "loss": 0.8698,
      "step": 4950
    },
    {
      "epoch": 1.392636312535132,
      "grad_norm": 0.4818323254585266,
      "learning_rate": 0.00018120279910393384,
      "loss": 0.853,
      "step": 4955
    },
    {
      "epoch": 1.3940415964024733,
      "grad_norm": 0.7312915921211243,
      "learning_rate": 0.0001811455027871803,
      "loss": 0.8815,
      "step": 4960
    },
    {
      "epoch": 1.3954468802698146,
      "grad_norm": 0.6379952430725098,
      "learning_rate": 0.00018108812836750885,
      "loss": 0.8611,
      "step": 4965
    },
    {
      "epoch": 1.3968521641371556,
      "grad_norm": 0.547435998916626,
      "learning_rate": 0.00018103067590014254,
      "loss": 0.8794,
      "step": 4970
    },
    {
      "epoch": 1.398257448004497,
      "grad_norm": 0.6002300381660461,
      "learning_rate": 0.00018097314544037967,
      "loss": 0.8679,
      "step": 4975
    },
    {
      "epoch": 1.399662731871838,
      "grad_norm": 0.4890361726284027,
      "learning_rate": 0.00018091553704359354,
      "loss": 0.927,
      "step": 4980
    },
    {
      "epoch": 1.4010680157391793,
      "grad_norm": 0.7528916597366333,
      "learning_rate": 0.0001808578507652325,
      "loss": 0.869,
      "step": 4985
    },
    {
      "epoch": 1.4024732996065206,
      "grad_norm": 0.5764560103416443,
      "learning_rate": 0.00018080008666081988,
      "loss": 0.8734,
      "step": 4990
    },
    {
      "epoch": 1.4038785834738619,
      "grad_norm": 0.5067351460456848,
      "learning_rate": 0.00018074224478595392,
      "loss": 0.8641,
      "step": 4995
    },
    {
      "epoch": 1.405283867341203,
      "grad_norm": 0.617499053478241,
      "learning_rate": 0.0001806843251963076,
      "loss": 0.8697,
      "step": 5000
    },
    {
      "epoch": 1.4066891512085442,
      "grad_norm": 0.7035298347473145,
      "learning_rate": 0.00018062632794762888,
      "loss": 0.8745,
      "step": 5005
    },
    {
      "epoch": 1.4080944350758853,
      "grad_norm": 0.6594152450561523,
      "learning_rate": 0.0001805682530957403,
      "loss": 0.8636,
      "step": 5010
    },
    {
      "epoch": 1.4094997189432266,
      "grad_norm": 0.8443005681037903,
      "learning_rate": 0.0001805101006965393,
      "loss": 0.8699,
      "step": 5015
    },
    {
      "epoch": 1.4109050028105679,
      "grad_norm": 0.643632173538208,
      "learning_rate": 0.0001804518708059977,
      "loss": 0.8853,
      "step": 5020
    },
    {
      "epoch": 1.412310286677909,
      "grad_norm": 0.5515636801719666,
      "learning_rate": 0.00018039356348016202,
      "loss": 0.874,
      "step": 5025
    },
    {
      "epoch": 1.4137155705452502,
      "grad_norm": 0.5284005999565125,
      "learning_rate": 0.00018033517877515345,
      "loss": 0.8645,
      "step": 5030
    },
    {
      "epoch": 1.4151208544125913,
      "grad_norm": 0.652372419834137,
      "learning_rate": 0.00018027671674716747,
      "loss": 0.8675,
      "step": 5035
    },
    {
      "epoch": 1.4165261382799326,
      "grad_norm": 0.8631367087364197,
      "learning_rate": 0.00018021817745247402,
      "loss": 0.8673,
      "step": 5040
    },
    {
      "epoch": 1.4179314221472739,
      "grad_norm": 1.9220738410949707,
      "learning_rate": 0.0001801595609474175,
      "loss": 0.8711,
      "step": 5045
    },
    {
      "epoch": 1.419336706014615,
      "grad_norm": 0.6992930173873901,
      "learning_rate": 0.00018010086728841653,
      "loss": 0.8634,
      "step": 5050
    },
    {
      "epoch": 1.4207419898819562,
      "grad_norm": 0.5778381824493408,
      "learning_rate": 0.00018004209653196403,
      "loss": 0.8788,
      "step": 5055
    },
    {
      "epoch": 1.4221472737492973,
      "grad_norm": 0.5597308874130249,
      "learning_rate": 0.00017998324873462712,
      "loss": 0.8531,
      "step": 5060
    },
    {
      "epoch": 1.4235525576166386,
      "grad_norm": 0.5867209434509277,
      "learning_rate": 0.0001799243239530471,
      "loss": 0.8763,
      "step": 5065
    },
    {
      "epoch": 1.4249578414839799,
      "grad_norm": 0.9052205681800842,
      "learning_rate": 0.0001798653222439393,
      "loss": 0.8581,
      "step": 5070
    },
    {
      "epoch": 1.426363125351321,
      "grad_norm": 0.7517098784446716,
      "learning_rate": 0.00017980624366409318,
      "loss": 0.8549,
      "step": 5075
    },
    {
      "epoch": 1.4277684092186622,
      "grad_norm": 0.5015307664871216,
      "learning_rate": 0.0001797470882703721,
      "loss": 0.8756,
      "step": 5080
    },
    {
      "epoch": 1.4291736930860033,
      "grad_norm": 0.5503989458084106,
      "learning_rate": 0.00017968785611971344,
      "loss": 0.9157,
      "step": 5085
    },
    {
      "epoch": 1.4305789769533446,
      "grad_norm": 0.7186044454574585,
      "learning_rate": 0.00017962854726912838,
      "loss": 0.8578,
      "step": 5090
    },
    {
      "epoch": 1.4319842608206859,
      "grad_norm": 0.6403028964996338,
      "learning_rate": 0.00017956916177570197,
      "loss": 0.8621,
      "step": 5095
    },
    {
      "epoch": 1.433389544688027,
      "grad_norm": 0.7702693343162537,
      "learning_rate": 0.00017950969969659302,
      "loss": 0.8796,
      "step": 5100
    },
    {
      "epoch": 1.4347948285553682,
      "grad_norm": 0.5178045034408569,
      "learning_rate": 0.00017945016108903406,
      "loss": 0.8437,
      "step": 5105
    },
    {
      "epoch": 1.4362001124227093,
      "grad_norm": 0.5101531147956848,
      "learning_rate": 0.00017939054601033124,
      "loss": 0.874,
      "step": 5110
    },
    {
      "epoch": 1.4376053962900506,
      "grad_norm": 0.6295761466026306,
      "learning_rate": 0.00017933085451786443,
      "loss": 0.8793,
      "step": 5115
    },
    {
      "epoch": 1.4390106801573919,
      "grad_norm": 0.6453625559806824,
      "learning_rate": 0.00017927108666908686,
      "loss": 0.9023,
      "step": 5120
    },
    {
      "epoch": 1.440415964024733,
      "grad_norm": 0.6669296622276306,
      "learning_rate": 0.00017921124252152546,
      "loss": 0.8664,
      "step": 5125
    },
    {
      "epoch": 1.4418212478920742,
      "grad_norm": 0.555692732334137,
      "learning_rate": 0.0001791513221327804,
      "loss": 0.9177,
      "step": 5130
    },
    {
      "epoch": 1.4432265317594153,
      "grad_norm": 0.9209009408950806,
      "learning_rate": 0.00017909132556052538,
      "loss": 0.8641,
      "step": 5135
    },
    {
      "epoch": 1.4446318156267566,
      "grad_norm": 0.5858955979347229,
      "learning_rate": 0.00017903125286250737,
      "loss": 0.9361,
      "step": 5140
    },
    {
      "epoch": 1.4460370994940979,
      "grad_norm": 0.7377965450286865,
      "learning_rate": 0.00017897110409654661,
      "loss": 0.8703,
      "step": 5145
    },
    {
      "epoch": 1.447442383361439,
      "grad_norm": 0.6079499125480652,
      "learning_rate": 0.0001789108793205366,
      "loss": 0.8642,
      "step": 5150
    },
    {
      "epoch": 1.4488476672287802,
      "grad_norm": 0.8482330441474915,
      "learning_rate": 0.0001788505785924439,
      "loss": 0.86,
      "step": 5155
    },
    {
      "epoch": 1.4502529510961213,
      "grad_norm": 0.8403465151786804,
      "learning_rate": 0.0001787902019703083,
      "loss": 0.8705,
      "step": 5160
    },
    {
      "epoch": 1.4516582349634626,
      "grad_norm": 0.8771752715110779,
      "learning_rate": 0.0001787297495122425,
      "loss": 0.854,
      "step": 5165
    },
    {
      "epoch": 1.4530635188308039,
      "grad_norm": 1.1639344692230225,
      "learning_rate": 0.00017866922127643232,
      "loss": 0.9359,
      "step": 5170
    },
    {
      "epoch": 1.4544688026981452,
      "grad_norm": 0.7973338961601257,
      "learning_rate": 0.00017860861732113648,
      "loss": 0.8632,
      "step": 5175
    },
    {
      "epoch": 1.4558740865654862,
      "grad_norm": 0.6689393520355225,
      "learning_rate": 0.0001785479377046865,
      "loss": 0.8706,
      "step": 5180
    },
    {
      "epoch": 1.4572793704328275,
      "grad_norm": 1.694438099861145,
      "learning_rate": 0.00017848718248548686,
      "loss": 0.8738,
      "step": 5185
    },
    {
      "epoch": 1.4586846543001686,
      "grad_norm": 0.636578381061554,
      "learning_rate": 0.0001784263517220147,
      "loss": 0.8705,
      "step": 5190
    },
    {
      "epoch": 1.4600899381675099,
      "grad_norm": 0.9136772751808167,
      "learning_rate": 0.0001783654454728199,
      "loss": 0.8528,
      "step": 5195
    },
    {
      "epoch": 1.4614952220348512,
      "grad_norm": 0.623679518699646,
      "learning_rate": 0.00017830446379652504,
      "loss": 0.8694,
      "step": 5200
    },
    {
      "epoch": 1.4629005059021922,
      "grad_norm": 0.7054007649421692,
      "learning_rate": 0.0001782434067518252,
      "loss": 0.869,
      "step": 5205
    },
    {
      "epoch": 1.4643057897695335,
      "grad_norm": 1.3509278297424316,
      "learning_rate": 0.00017818227439748814,
      "loss": 0.8816,
      "step": 5210
    },
    {
      "epoch": 1.4657110736368746,
      "grad_norm": 0.787777841091156,
      "learning_rate": 0.00017812106679235395,
      "loss": 0.8716,
      "step": 5215
    },
    {
      "epoch": 1.4671163575042159,
      "grad_norm": 0.6147462129592896,
      "learning_rate": 0.0001780597839953353,
      "loss": 0.8798,
      "step": 5220
    },
    {
      "epoch": 1.4685216413715572,
      "grad_norm": 0.6231019496917725,
      "learning_rate": 0.00017799842606541714,
      "loss": 0.8933,
      "step": 5225
    },
    {
      "epoch": 1.4699269252388982,
      "grad_norm": 0.669661819934845,
      "learning_rate": 0.0001779369930616568,
      "loss": 0.8615,
      "step": 5230
    },
    {
      "epoch": 1.4713322091062395,
      "grad_norm": 0.5519552230834961,
      "learning_rate": 0.00017787548504318373,
      "loss": 0.8696,
      "step": 5235
    },
    {
      "epoch": 1.4727374929735806,
      "grad_norm": 0.5218339562416077,
      "learning_rate": 0.00017781390206919975,
      "loss": 0.8734,
      "step": 5240
    },
    {
      "epoch": 1.4741427768409219,
      "grad_norm": 0.5840299725532532,
      "learning_rate": 0.0001777522441989787,
      "loss": 0.8815,
      "step": 5245
    },
    {
      "epoch": 1.4755480607082632,
      "grad_norm": 1.1502022743225098,
      "learning_rate": 0.00017769051149186664,
      "loss": 0.8753,
      "step": 5250
    },
    {
      "epoch": 1.4769533445756042,
      "grad_norm": 0.6379995942115784,
      "learning_rate": 0.00017762870400728152,
      "loss": 0.8736,
      "step": 5255
    },
    {
      "epoch": 1.4783586284429455,
      "grad_norm": 0.5611156821250916,
      "learning_rate": 0.00017756682180471338,
      "loss": 0.8603,
      "step": 5260
    },
    {
      "epoch": 1.4797639123102866,
      "grad_norm": 0.6152333617210388,
      "learning_rate": 0.00017750486494372408,
      "loss": 0.8793,
      "step": 5265
    },
    {
      "epoch": 1.4811691961776279,
      "grad_norm": 0.6901036500930786,
      "learning_rate": 0.0001774428334839474,
      "loss": 0.8594,
      "step": 5270
    },
    {
      "epoch": 1.4825744800449692,
      "grad_norm": 0.8560009598731995,
      "learning_rate": 0.0001773807274850889,
      "loss": 0.8688,
      "step": 5275
    },
    {
      "epoch": 1.4839797639123102,
      "grad_norm": 0.5628454685211182,
      "learning_rate": 0.00017731854700692588,
      "loss": 0.8718,
      "step": 5280
    },
    {
      "epoch": 1.4853850477796515,
      "grad_norm": 0.46983468532562256,
      "learning_rate": 0.00017725629210930732,
      "loss": 0.8741,
      "step": 5285
    },
    {
      "epoch": 1.4867903316469926,
      "grad_norm": 0.592480480670929,
      "learning_rate": 0.0001771939628521539,
      "loss": 0.8663,
      "step": 5290
    },
    {
      "epoch": 1.4881956155143339,
      "grad_norm": 0.5054726600646973,
      "learning_rate": 0.00017713155929545776,
      "loss": 0.8711,
      "step": 5295
    },
    {
      "epoch": 1.4896008993816752,
      "grad_norm": 0.5680946111679077,
      "learning_rate": 0.00017706908149928266,
      "loss": 0.864,
      "step": 5300
    },
    {
      "epoch": 1.4910061832490162,
      "grad_norm": 0.670420229434967,
      "learning_rate": 0.00017700652952376374,
      "loss": 0.8774,
      "step": 5305
    },
    {
      "epoch": 1.4924114671163575,
      "grad_norm": 0.5870426893234253,
      "learning_rate": 0.00017694390342910753,
      "loss": 0.8723,
      "step": 5310
    },
    {
      "epoch": 1.4938167509836986,
      "grad_norm": 0.7351912260055542,
      "learning_rate": 0.00017688120327559196,
      "loss": 0.8652,
      "step": 5315
    },
    {
      "epoch": 1.4952220348510399,
      "grad_norm": 0.5224320888519287,
      "learning_rate": 0.00017681842912356624,
      "loss": 0.8831,
      "step": 5320
    },
    {
      "epoch": 1.4966273187183812,
      "grad_norm": 0.5571185946464539,
      "learning_rate": 0.00017675558103345075,
      "loss": 0.8747,
      "step": 5325
    },
    {
      "epoch": 1.4980326025857222,
      "grad_norm": 1.3304951190948486,
      "learning_rate": 0.00017669265906573704,
      "loss": 0.9177,
      "step": 5330
    },
    {
      "epoch": 1.4994378864530635,
      "grad_norm": 0.48752954602241516,
      "learning_rate": 0.00017662966328098784,
      "loss": 0.8519,
      "step": 5335
    },
    {
      "epoch": 1.5008431703204046,
      "grad_norm": 0.6318584084510803,
      "learning_rate": 0.0001765665937398369,
      "loss": 0.8763,
      "step": 5340
    },
    {
      "epoch": 1.5022484541877459,
      "grad_norm": 0.5569983124732971,
      "learning_rate": 0.00017650345050298886,
      "loss": 0.8781,
      "step": 5345
    },
    {
      "epoch": 1.5036537380550872,
      "grad_norm": 0.5715246796607971,
      "learning_rate": 0.00017644023363121947,
      "loss": 0.8548,
      "step": 5350
    },
    {
      "epoch": 1.5050590219224285,
      "grad_norm": 0.5527710914611816,
      "learning_rate": 0.00017637694318537513,
      "loss": 0.8537,
      "step": 5355
    },
    {
      "epoch": 1.5064643057897695,
      "grad_norm": 0.5467240810394287,
      "learning_rate": 0.00017631357922637332,
      "loss": 0.8709,
      "step": 5360
    },
    {
      "epoch": 1.5078695896571106,
      "grad_norm": 0.49403300881385803,
      "learning_rate": 0.0001762501418152021,
      "loss": 0.8675,
      "step": 5365
    },
    {
      "epoch": 1.5092748735244519,
      "grad_norm": 0.5923786163330078,
      "learning_rate": 0.00017618663101292022,
      "loss": 0.8837,
      "step": 5370
    },
    {
      "epoch": 1.5106801573917932,
      "grad_norm": 0.5225454568862915,
      "learning_rate": 0.00017612304688065713,
      "loss": 0.8664,
      "step": 5375
    },
    {
      "epoch": 1.5120854412591345,
      "grad_norm": 0.589855432510376,
      "learning_rate": 0.00017605938947961292,
      "loss": 0.8517,
      "step": 5380
    },
    {
      "epoch": 1.5134907251264755,
      "grad_norm": 0.5037773847579956,
      "learning_rate": 0.00017599565887105803,
      "loss": 0.88,
      "step": 5385
    },
    {
      "epoch": 1.5148960089938166,
      "grad_norm": 0.4747573435306549,
      "learning_rate": 0.00017593185511633356,
      "loss": 0.9105,
      "step": 5390
    },
    {
      "epoch": 1.516301292861158,
      "grad_norm": 0.4647715985774994,
      "learning_rate": 0.00017586797827685082,
      "loss": 0.8597,
      "step": 5395
    },
    {
      "epoch": 1.5177065767284992,
      "grad_norm": 0.5436646938323975,
      "learning_rate": 0.0001758040284140916,
      "loss": 0.8565,
      "step": 5400
    },
    {
      "epoch": 1.5191118605958405,
      "grad_norm": 0.7642892003059387,
      "learning_rate": 0.00017574000558960798,
      "loss": 0.8635,
      "step": 5405
    },
    {
      "epoch": 1.5205171444631815,
      "grad_norm": 1.8821611404418945,
      "learning_rate": 0.0001756759098650222,
      "loss": 0.9264,
      "step": 5410
    },
    {
      "epoch": 1.5219224283305226,
      "grad_norm": 0.5186439752578735,
      "learning_rate": 0.00017561174130202662,
      "loss": 0.8599,
      "step": 5415
    },
    {
      "epoch": 1.523327712197864,
      "grad_norm": 0.5758541226387024,
      "learning_rate": 0.00017554749996238388,
      "loss": 0.8523,
      "step": 5420
    },
    {
      "epoch": 1.5247329960652052,
      "grad_norm": 0.6760127544403076,
      "learning_rate": 0.00017548318590792646,
      "loss": 0.9273,
      "step": 5425
    },
    {
      "epoch": 1.5261382799325465,
      "grad_norm": 0.4689038097858429,
      "learning_rate": 0.00017541879920055702,
      "loss": 0.8731,
      "step": 5430
    },
    {
      "epoch": 1.5275435637998875,
      "grad_norm": 0.5448866486549377,
      "learning_rate": 0.00017535433990224802,
      "loss": 0.8888,
      "step": 5435
    },
    {
      "epoch": 1.5289488476672288,
      "grad_norm": 0.5650317668914795,
      "learning_rate": 0.00017528980807504185,
      "loss": 0.8654,
      "step": 5440
    },
    {
      "epoch": 1.53035413153457,
      "grad_norm": 0.5792402625083923,
      "learning_rate": 0.00017522520378105064,
      "loss": 0.858,
      "step": 5445
    },
    {
      "epoch": 1.5317594154019112,
      "grad_norm": 0.5568968057632446,
      "learning_rate": 0.00017516052708245637,
      "loss": 0.8609,
      "step": 5450
    },
    {
      "epoch": 1.5331646992692525,
      "grad_norm": 0.7661116123199463,
      "learning_rate": 0.00017509577804151064,
      "loss": 0.8754,
      "step": 5455
    },
    {
      "epoch": 1.5345699831365935,
      "grad_norm": 0.5771020650863647,
      "learning_rate": 0.0001750309567205347,
      "loss": 0.8595,
      "step": 5460
    },
    {
      "epoch": 1.5359752670039348,
      "grad_norm": 0.8583388328552246,
      "learning_rate": 0.0001749660631819194,
      "loss": 0.8652,
      "step": 5465
    },
    {
      "epoch": 1.537380550871276,
      "grad_norm": 0.6303179264068604,
      "learning_rate": 0.00017490109748812498,
      "loss": 0.8611,
      "step": 5470
    },
    {
      "epoch": 1.5387858347386172,
      "grad_norm": 1.0614551305770874,
      "learning_rate": 0.00017483605970168128,
      "loss": 0.8662,
      "step": 5475
    },
    {
      "epoch": 1.5401911186059585,
      "grad_norm": 0.7186009287834167,
      "learning_rate": 0.0001747709498851875,
      "loss": 0.8712,
      "step": 5480
    },
    {
      "epoch": 1.5415964024732998,
      "grad_norm": 1.02847158908844,
      "learning_rate": 0.0001747057681013121,
      "loss": 0.8588,
      "step": 5485
    },
    {
      "epoch": 1.5430016863406408,
      "grad_norm": 0.6516429781913757,
      "learning_rate": 0.00017464051441279282,
      "loss": 0.8482,
      "step": 5490
    },
    {
      "epoch": 1.544406970207982,
      "grad_norm": 0.4523126184940338,
      "learning_rate": 0.00017457518888243666,
      "loss": 0.8673,
      "step": 5495
    },
    {
      "epoch": 1.5458122540753232,
      "grad_norm": 0.6133017539978027,
      "learning_rate": 0.0001745097915731197,
      "loss": 0.8715,
      "step": 5500
    },
    {
      "epoch": 1.5472175379426645,
      "grad_norm": 0.9278969168663025,
      "learning_rate": 0.00017444432254778725,
      "loss": 0.8513,
      "step": 5505
    },
    {
      "epoch": 1.5486228218100058,
      "grad_norm": 0.831170380115509,
      "learning_rate": 0.00017437878186945348,
      "loss": 0.8677,
      "step": 5510
    },
    {
      "epoch": 1.5500281056773468,
      "grad_norm": 0.588657557964325,
      "learning_rate": 0.00017431316960120157,
      "loss": 0.8634,
      "step": 5515
    },
    {
      "epoch": 1.551433389544688,
      "grad_norm": 0.5699366927146912,
      "learning_rate": 0.00017424748580618365,
      "loss": 0.8804,
      "step": 5520
    },
    {
      "epoch": 1.5528386734120292,
      "grad_norm": 0.7426570653915405,
      "learning_rate": 0.0001741817305476207,
      "loss": 0.9172,
      "step": 5525
    },
    {
      "epoch": 1.5542439572793705,
      "grad_norm": 0.5099422335624695,
      "learning_rate": 0.00017411590388880242,
      "loss": 0.8437,
      "step": 5530
    },
    {
      "epoch": 1.5556492411467118,
      "grad_norm": 0.5340549349784851,
      "learning_rate": 0.00017405000589308732,
      "loss": 0.8754,
      "step": 5535
    },
    {
      "epoch": 1.5570545250140528,
      "grad_norm": 0.5274845957756042,
      "learning_rate": 0.00017398403662390244,
      "loss": 0.8667,
      "step": 5540
    },
    {
      "epoch": 1.558459808881394,
      "grad_norm": 0.45988380908966064,
      "learning_rate": 0.0001739179961447436,
      "loss": 0.8749,
      "step": 5545
    },
    {
      "epoch": 1.5598650927487352,
      "grad_norm": 0.7324682474136353,
      "learning_rate": 0.000173851884519175,
      "loss": 0.8513,
      "step": 5550
    },
    {
      "epoch": 1.5612703766160765,
      "grad_norm": 0.5798319578170776,
      "learning_rate": 0.00017378570181082943,
      "loss": 0.8748,
      "step": 5555
    },
    {
      "epoch": 1.5626756604834178,
      "grad_norm": 0.47172462940216064,
      "learning_rate": 0.000173719448083408,
      "loss": 0.8588,
      "step": 5560
    },
    {
      "epoch": 1.5640809443507588,
      "grad_norm": 0.567803144454956,
      "learning_rate": 0.00017365312340068023,
      "loss": 0.8879,
      "step": 5565
    },
    {
      "epoch": 1.5654862282181,
      "grad_norm": 0.48772311210632324,
      "learning_rate": 0.00017358672782648397,
      "loss": 0.8682,
      "step": 5570
    },
    {
      "epoch": 1.5668915120854412,
      "grad_norm": 0.47581541538238525,
      "learning_rate": 0.00017352026142472524,
      "loss": 0.8811,
      "step": 5575
    },
    {
      "epoch": 1.5682967959527825,
      "grad_norm": 0.4929133653640747,
      "learning_rate": 0.00017345372425937825,
      "loss": 0.8517,
      "step": 5580
    },
    {
      "epoch": 1.5697020798201238,
      "grad_norm": 0.5461881160736084,
      "learning_rate": 0.0001733871163944853,
      "loss": 0.8556,
      "step": 5585
    },
    {
      "epoch": 1.5711073636874648,
      "grad_norm": 0.6379693746566772,
      "learning_rate": 0.00017332043789415684,
      "loss": 0.8469,
      "step": 5590
    },
    {
      "epoch": 1.572512647554806,
      "grad_norm": 0.5068411231040955,
      "learning_rate": 0.00017325368882257117,
      "loss": 0.8714,
      "step": 5595
    },
    {
      "epoch": 1.5739179314221472,
      "grad_norm": 0.5322855710983276,
      "learning_rate": 0.00017318686924397463,
      "loss": 0.8757,
      "step": 5600
    },
    {
      "epoch": 1.5753232152894885,
      "grad_norm": 0.5769434571266174,
      "learning_rate": 0.00017311997922268133,
      "loss": 0.8715,
      "step": 5605
    },
    {
      "epoch": 1.5767284991568298,
      "grad_norm": 0.6665701270103455,
      "learning_rate": 0.0001730530188230732,
      "loss": 0.8703,
      "step": 5610
    },
    {
      "epoch": 1.5781337830241708,
      "grad_norm": 0.8959527015686035,
      "learning_rate": 0.00017298598810959996,
      "loss": 0.8805,
      "step": 5615
    },
    {
      "epoch": 1.5795390668915121,
      "grad_norm": 0.7809743285179138,
      "learning_rate": 0.000172918887146779,
      "loss": 0.8761,
      "step": 5620
    },
    {
      "epoch": 1.5809443507588532,
      "grad_norm": 0.888780951499939,
      "learning_rate": 0.0001728517159991953,
      "loss": 0.8699,
      "step": 5625
    },
    {
      "epoch": 1.5823496346261945,
      "grad_norm": 0.6694765686988831,
      "learning_rate": 0.00017278447473150133,
      "loss": 0.8659,
      "step": 5630
    },
    {
      "epoch": 1.5837549184935358,
      "grad_norm": 0.5062331557273865,
      "learning_rate": 0.00017271716340841722,
      "loss": 0.8545,
      "step": 5635
    },
    {
      "epoch": 1.5851602023608768,
      "grad_norm": 0.6682859659194946,
      "learning_rate": 0.00017264978209473035,
      "loss": 0.8799,
      "step": 5640
    },
    {
      "epoch": 1.5865654862282181,
      "grad_norm": 0.4775022268295288,
      "learning_rate": 0.0001725823308552956,
      "loss": 0.869,
      "step": 5645
    },
    {
      "epoch": 1.5879707700955592,
      "grad_norm": 0.5014980435371399,
      "learning_rate": 0.00017251480975503502,
      "loss": 0.8747,
      "step": 5650
    },
    {
      "epoch": 1.5893760539629005,
      "grad_norm": 0.5155623555183411,
      "learning_rate": 0.00017244721885893802,
      "loss": 0.8792,
      "step": 5655
    },
    {
      "epoch": 1.5907813378302418,
      "grad_norm": 0.6804929375648499,
      "learning_rate": 0.00017237955823206117,
      "loss": 0.8792,
      "step": 5660
    },
    {
      "epoch": 1.592186621697583,
      "grad_norm": 0.47904977202415466,
      "learning_rate": 0.0001723118279395281,
      "loss": 0.8696,
      "step": 5665
    },
    {
      "epoch": 1.5935919055649241,
      "grad_norm": 0.6762366890907288,
      "learning_rate": 0.00017224402804652956,
      "loss": 0.851,
      "step": 5670
    },
    {
      "epoch": 1.5949971894322652,
      "grad_norm": 0.5059134364128113,
      "learning_rate": 0.00017217615861832327,
      "loss": 0.8803,
      "step": 5675
    },
    {
      "epoch": 1.5964024732996065,
      "grad_norm": 0.6604952216148376,
      "learning_rate": 0.00017210821972023376,
      "loss": 0.8655,
      "step": 5680
    },
    {
      "epoch": 1.5978077571669478,
      "grad_norm": 0.6762186288833618,
      "learning_rate": 0.00017204021141765266,
      "loss": 0.8874,
      "step": 5685
    },
    {
      "epoch": 1.599213041034289,
      "grad_norm": 0.6892690658569336,
      "learning_rate": 0.00017197213377603818,
      "loss": 0.8747,
      "step": 5690
    },
    {
      "epoch": 1.6006183249016301,
      "grad_norm": 0.507866621017456,
      "learning_rate": 0.0001719039868609154,
      "loss": 0.8735,
      "step": 5695
    },
    {
      "epoch": 1.6020236087689712,
      "grad_norm": 0.6932044625282288,
      "learning_rate": 0.00017183577073787607,
      "loss": 0.88,
      "step": 5700
    },
    {
      "epoch": 1.6034288926363125,
      "grad_norm": 0.820203423500061,
      "learning_rate": 0.00017176748547257845,
      "loss": 0.8689,
      "step": 5705
    },
    {
      "epoch": 1.6048341765036538,
      "grad_norm": 0.7642927169799805,
      "learning_rate": 0.00017169913113074747,
      "loss": 0.874,
      "step": 5710
    },
    {
      "epoch": 1.606239460370995,
      "grad_norm": 0.5460090041160583,
      "learning_rate": 0.00017163070777817448,
      "loss": 0.8566,
      "step": 5715
    },
    {
      "epoch": 1.6076447442383361,
      "grad_norm": 0.597949743270874,
      "learning_rate": 0.00017156221548071728,
      "loss": 0.9107,
      "step": 5720
    },
    {
      "epoch": 1.6090500281056772,
      "grad_norm": 0.733345627784729,
      "learning_rate": 0.00017149365430430002,
      "loss": 0.8565,
      "step": 5725
    },
    {
      "epoch": 1.6104553119730185,
      "grad_norm": 0.5471769571304321,
      "learning_rate": 0.00017142502431491313,
      "loss": 0.8591,
      "step": 5730
    },
    {
      "epoch": 1.6118605958403598,
      "grad_norm": 0.5078122615814209,
      "learning_rate": 0.0001713563255786133,
      "loss": 0.9085,
      "step": 5735
    },
    {
      "epoch": 1.613265879707701,
      "grad_norm": 0.7095496654510498,
      "learning_rate": 0.00017128755816152338,
      "loss": 0.8743,
      "step": 5740
    },
    {
      "epoch": 1.6146711635750421,
      "grad_norm": 0.7899284362792969,
      "learning_rate": 0.0001712187221298323,
      "loss": 0.881,
      "step": 5745
    },
    {
      "epoch": 1.6160764474423832,
      "grad_norm": 0.5304770469665527,
      "learning_rate": 0.0001711498175497951,
      "loss": 0.8609,
      "step": 5750
    },
    {
      "epoch": 1.6174817313097245,
      "grad_norm": 0.4922604560852051,
      "learning_rate": 0.00017108084448773272,
      "loss": 0.9403,
      "step": 5755
    },
    {
      "epoch": 1.6188870151770658,
      "grad_norm": 0.6193071007728577,
      "learning_rate": 0.00017101180301003205,
      "loss": 0.8504,
      "step": 5760
    },
    {
      "epoch": 1.620292299044407,
      "grad_norm": 0.49560025334358215,
      "learning_rate": 0.00017094269318314588,
      "loss": 0.8619,
      "step": 5765
    },
    {
      "epoch": 1.6216975829117481,
      "grad_norm": 0.5417090654373169,
      "learning_rate": 0.00017087351507359263,
      "loss": 0.9363,
      "step": 5770
    },
    {
      "epoch": 1.6231028667790892,
      "grad_norm": 0.48260176181793213,
      "learning_rate": 0.00017080426874795665,
      "loss": 0.8597,
      "step": 5775
    },
    {
      "epoch": 1.6245081506464305,
      "grad_norm": 0.5174697041511536,
      "learning_rate": 0.0001707349542728878,
      "loss": 0.8719,
      "step": 5780
    },
    {
      "epoch": 1.6259134345137718,
      "grad_norm": 0.48467588424682617,
      "learning_rate": 0.0001706655717151016,
      "loss": 0.8736,
      "step": 5785
    },
    {
      "epoch": 1.627318718381113,
      "grad_norm": 0.4969995319843292,
      "learning_rate": 0.00017059612114137904,
      "loss": 0.8659,
      "step": 5790
    },
    {
      "epoch": 1.6287240022484542,
      "grad_norm": 0.8451137542724609,
      "learning_rate": 0.00017052660261856662,
      "loss": 0.8671,
      "step": 5795
    },
    {
      "epoch": 1.6301292861157954,
      "grad_norm": 0.6291013956069946,
      "learning_rate": 0.0001704570162135763,
      "loss": 0.868,
      "step": 5800
    },
    {
      "epoch": 1.6315345699831365,
      "grad_norm": 0.6090657711029053,
      "learning_rate": 0.00017038736199338525,
      "loss": 0.8794,
      "step": 5805
    },
    {
      "epoch": 1.6329398538504778,
      "grad_norm": 0.4956682026386261,
      "learning_rate": 0.000170317640025036,
      "loss": 0.8652,
      "step": 5810
    },
    {
      "epoch": 1.634345137717819,
      "grad_norm": 0.5881335139274597,
      "learning_rate": 0.00017024785037563625,
      "loss": 0.8664,
      "step": 5815
    },
    {
      "epoch": 1.6357504215851602,
      "grad_norm": 0.6143118143081665,
      "learning_rate": 0.00017017799311235885,
      "loss": 0.8806,
      "step": 5820
    },
    {
      "epoch": 1.6371557054525014,
      "grad_norm": 0.5305077433586121,
      "learning_rate": 0.00017010806830244178,
      "loss": 0.8679,
      "step": 5825
    },
    {
      "epoch": 1.6385609893198425,
      "grad_norm": 0.47793033719062805,
      "learning_rate": 0.00017003807601318792,
      "loss": 0.8736,
      "step": 5830
    },
    {
      "epoch": 1.6399662731871838,
      "grad_norm": 0.8925052881240845,
      "learning_rate": 0.00016996801631196526,
      "loss": 0.8488,
      "step": 5835
    },
    {
      "epoch": 1.641371557054525,
      "grad_norm": 1.0212877988815308,
      "learning_rate": 0.0001698978892662065,
      "loss": 0.8608,
      "step": 5840
    },
    {
      "epoch": 1.6427768409218664,
      "grad_norm": 0.8234551548957825,
      "learning_rate": 0.00016982769494340932,
      "loss": 0.8797,
      "step": 5845
    },
    {
      "epoch": 1.6441821247892074,
      "grad_norm": 0.49760153889656067,
      "learning_rate": 0.00016975743341113598,
      "loss": 0.8597,
      "step": 5850
    },
    {
      "epoch": 1.6455874086565485,
      "grad_norm": 0.575788676738739,
      "learning_rate": 0.00016968710473701362,
      "loss": 0.9101,
      "step": 5855
    },
    {
      "epoch": 1.6469926925238898,
      "grad_norm": 0.5459645390510559,
      "learning_rate": 0.00016961670898873386,
      "loss": 0.8635,
      "step": 5860
    },
    {
      "epoch": 1.648397976391231,
      "grad_norm": 0.5154797434806824,
      "learning_rate": 0.00016954624623405292,
      "loss": 0.8677,
      "step": 5865
    },
    {
      "epoch": 1.6498032602585724,
      "grad_norm": 0.481139600276947,
      "learning_rate": 0.00016947571654079156,
      "loss": 0.8608,
      "step": 5870
    },
    {
      "epoch": 1.6512085441259134,
      "grad_norm": 0.5141423940658569,
      "learning_rate": 0.0001694051199768349,
      "loss": 0.8565,
      "step": 5875
    },
    {
      "epoch": 1.6526138279932545,
      "grad_norm": 0.5673717260360718,
      "learning_rate": 0.00016933445661013248,
      "loss": 0.8603,
      "step": 5880
    },
    {
      "epoch": 1.6540191118605958,
      "grad_norm": 0.6446055769920349,
      "learning_rate": 0.00016926372650869813,
      "loss": 0.9708,
      "step": 5885
    },
    {
      "epoch": 1.655424395727937,
      "grad_norm": 0.6382657289505005,
      "learning_rate": 0.00016919292974060986,
      "loss": 0.8699,
      "step": 5890
    },
    {
      "epoch": 1.6568296795952784,
      "grad_norm": 0.5263247489929199,
      "learning_rate": 0.0001691220663740099,
      "loss": 0.8638,
      "step": 5895
    },
    {
      "epoch": 1.6582349634626194,
      "grad_norm": 0.5247864127159119,
      "learning_rate": 0.00016905113647710452,
      "loss": 0.8787,
      "step": 5900
    },
    {
      "epoch": 1.6596402473299605,
      "grad_norm": 0.5854396224021912,
      "learning_rate": 0.00016898014011816414,
      "loss": 0.8849,
      "step": 5905
    },
    {
      "epoch": 1.6610455311973018,
      "grad_norm": 0.46041086316108704,
      "learning_rate": 0.00016890907736552308,
      "loss": 0.8677,
      "step": 5910
    },
    {
      "epoch": 1.662450815064643,
      "grad_norm": 0.509297788143158,
      "learning_rate": 0.00016883794828757953,
      "loss": 0.8579,
      "step": 5915
    },
    {
      "epoch": 1.6638560989319844,
      "grad_norm": 0.5629145503044128,
      "learning_rate": 0.00016876675295279553,
      "loss": 0.8699,
      "step": 5920
    },
    {
      "epoch": 1.6652613827993255,
      "grad_norm": 0.6250473856925964,
      "learning_rate": 0.000168695491429697,
      "loss": 0.8679,
      "step": 5925
    },
    {
      "epoch": 1.6666666666666665,
      "grad_norm": 0.5854396224021912,
      "learning_rate": 0.0001686241637868734,
      "loss": 0.8662,
      "step": 5930
    },
    {
      "epoch": 1.6680719505340078,
      "grad_norm": 0.5250497460365295,
      "learning_rate": 0.0001685527700929779,
      "loss": 0.8886,
      "step": 5935
    },
    {
      "epoch": 1.669477234401349,
      "grad_norm": 0.552189826965332,
      "learning_rate": 0.00016848131041672735,
      "loss": 0.8784,
      "step": 5940
    },
    {
      "epoch": 1.6708825182686904,
      "grad_norm": 0.612978458404541,
      "learning_rate": 0.00016840978482690196,
      "loss": 0.8735,
      "step": 5945
    },
    {
      "epoch": 1.6722878021360315,
      "grad_norm": 0.7062554955482483,
      "learning_rate": 0.0001683381933923454,
      "loss": 0.9183,
      "step": 5950
    },
    {
      "epoch": 1.6736930860033725,
      "grad_norm": 0.5159368515014648,
      "learning_rate": 0.00016826653618196485,
      "loss": 0.8606,
      "step": 5955
    },
    {
      "epoch": 1.6750983698707138,
      "grad_norm": 0.5322017669677734,
      "learning_rate": 0.00016819481326473063,
      "loss": 0.8482,
      "step": 5960
    },
    {
      "epoch": 1.676503653738055,
      "grad_norm": 0.5021723508834839,
      "learning_rate": 0.0001681230247096764,
      "loss": 0.8741,
      "step": 5965
    },
    {
      "epoch": 1.6779089376053964,
      "grad_norm": 0.5162078738212585,
      "learning_rate": 0.00016805117058589893,
      "loss": 0.8628,
      "step": 5970
    },
    {
      "epoch": 1.6793142214727375,
      "grad_norm": 0.6757156252861023,
      "learning_rate": 0.00016797925096255823,
      "loss": 0.8788,
      "step": 5975
    },
    {
      "epoch": 1.6807195053400787,
      "grad_norm": 0.5981365442276001,
      "learning_rate": 0.0001679072659088772,
      "loss": 0.8685,
      "step": 5980
    },
    {
      "epoch": 1.6821247892074198,
      "grad_norm": 0.4823263883590698,
      "learning_rate": 0.0001678352154941418,
      "loss": 0.8558,
      "step": 5985
    },
    {
      "epoch": 1.683530073074761,
      "grad_norm": 0.4609000086784363,
      "learning_rate": 0.00016776309978770092,
      "loss": 0.8645,
      "step": 5990
    },
    {
      "epoch": 1.6849353569421024,
      "grad_norm": 0.6788878440856934,
      "learning_rate": 0.00016769091885896617,
      "loss": 0.8625,
      "step": 5995
    },
    {
      "epoch": 1.6863406408094435,
      "grad_norm": 0.5988131761550903,
      "learning_rate": 0.0001676186727774121,
      "loss": 0.8695,
      "step": 6000
    },
    {
      "epoch": 1.6877459246767847,
      "grad_norm": 0.6434552669525146,
      "learning_rate": 0.00016754636161257587,
      "loss": 0.8588,
      "step": 6005
    },
    {
      "epoch": 1.6891512085441258,
      "grad_norm": 0.48518407344818115,
      "learning_rate": 0.0001674739854340573,
      "loss": 0.8488,
      "step": 6010
    },
    {
      "epoch": 1.690556492411467,
      "grad_norm": 0.515152096748352,
      "learning_rate": 0.0001674015443115188,
      "loss": 0.8626,
      "step": 6015
    },
    {
      "epoch": 1.6919617762788084,
      "grad_norm": 0.50271075963974,
      "learning_rate": 0.00016732903831468532,
      "loss": 0.8681,
      "step": 6020
    },
    {
      "epoch": 1.6933670601461497,
      "grad_norm": 0.7227884531021118,
      "learning_rate": 0.00016725646751334416,
      "loss": 0.8623,
      "step": 6025
    },
    {
      "epoch": 1.6947723440134908,
      "grad_norm": 0.5185177326202393,
      "learning_rate": 0.00016718383197734506,
      "loss": 0.9226,
      "step": 6030
    },
    {
      "epoch": 1.6961776278808318,
      "grad_norm": 0.46443167328834534,
      "learning_rate": 0.00016711113177660008,
      "loss": 0.8738,
      "step": 6035
    },
    {
      "epoch": 1.697582911748173,
      "grad_norm": 0.7292987108230591,
      "learning_rate": 0.00016703836698108346,
      "loss": 0.8619,
      "step": 6040
    },
    {
      "epoch": 1.6989881956155144,
      "grad_norm": 0.7463960647583008,
      "learning_rate": 0.00016696553766083167,
      "loss": 0.8702,
      "step": 6045
    },
    {
      "epoch": 1.7003934794828557,
      "grad_norm": 1.0327949523925781,
      "learning_rate": 0.0001668926438859433,
      "loss": 0.8756,
      "step": 6050
    },
    {
      "epoch": 1.7017987633501968,
      "grad_norm": 0.5928092002868652,
      "learning_rate": 0.00016681968572657886,
      "loss": 0.8703,
      "step": 6055
    },
    {
      "epoch": 1.7032040472175378,
      "grad_norm": 0.7332513332366943,
      "learning_rate": 0.00016674666325296097,
      "loss": 0.8628,
      "step": 6060
    },
    {
      "epoch": 1.704609331084879,
      "grad_norm": 0.6123602390289307,
      "learning_rate": 0.00016667357653537407,
      "loss": 0.8604,
      "step": 6065
    },
    {
      "epoch": 1.7060146149522204,
      "grad_norm": 0.6254216432571411,
      "learning_rate": 0.00016660042564416448,
      "loss": 0.8634,
      "step": 6070
    },
    {
      "epoch": 1.7074198988195617,
      "grad_norm": 0.6215693354606628,
      "learning_rate": 0.00016652721064974027,
      "loss": 0.8586,
      "step": 6075
    },
    {
      "epoch": 1.7088251826869028,
      "grad_norm": 0.5529701113700867,
      "learning_rate": 0.00016645393162257122,
      "loss": 0.8586,
      "step": 6080
    },
    {
      "epoch": 1.7102304665542438,
      "grad_norm": 0.6067641377449036,
      "learning_rate": 0.00016638058863318865,
      "loss": 0.8579,
      "step": 6085
    },
    {
      "epoch": 1.7116357504215851,
      "grad_norm": 0.5234971046447754,
      "learning_rate": 0.00016630718175218565,
      "loss": 0.8686,
      "step": 6090
    },
    {
      "epoch": 1.7130410342889264,
      "grad_norm": 0.5197697281837463,
      "learning_rate": 0.00016623371105021654,
      "loss": 0.8561,
      "step": 6095
    },
    {
      "epoch": 1.7144463181562677,
      "grad_norm": 0.5112480521202087,
      "learning_rate": 0.00016616017659799732,
      "loss": 0.8715,
      "step": 6100
    },
    {
      "epoch": 1.7158516020236088,
      "grad_norm": 0.5215193629264832,
      "learning_rate": 0.00016608657846630518,
      "loss": 0.8542,
      "step": 6105
    },
    {
      "epoch": 1.7172568858909498,
      "grad_norm": 0.4877747893333435,
      "learning_rate": 0.00016601291672597865,
      "loss": 0.8755,
      "step": 6110
    },
    {
      "epoch": 1.7186621697582911,
      "grad_norm": 0.6159438490867615,
      "learning_rate": 0.00016593919144791754,
      "loss": 0.8752,
      "step": 6115
    },
    {
      "epoch": 1.7200674536256324,
      "grad_norm": 0.48233506083488464,
      "learning_rate": 0.00016586540270308275,
      "loss": 0.8641,
      "step": 6120
    },
    {
      "epoch": 1.7214727374929737,
      "grad_norm": 0.5225429534912109,
      "learning_rate": 0.00016579155056249627,
      "loss": 0.8409,
      "step": 6125
    },
    {
      "epoch": 1.7228780213603148,
      "grad_norm": 0.5753469467163086,
      "learning_rate": 0.00016571763509724109,
      "loss": 0.8601,
      "step": 6130
    },
    {
      "epoch": 1.7242833052276558,
      "grad_norm": 0.685015082359314,
      "learning_rate": 0.00016564365637846125,
      "loss": 0.8675,
      "step": 6135
    },
    {
      "epoch": 1.7256885890949971,
      "grad_norm": 0.5576612949371338,
      "learning_rate": 0.0001655696144773616,
      "loss": 0.8848,
      "step": 6140
    },
    {
      "epoch": 1.7270938729623384,
      "grad_norm": 0.5092169046401978,
      "learning_rate": 0.0001654955094652078,
      "loss": 0.8648,
      "step": 6145
    },
    {
      "epoch": 1.7284991568296797,
      "grad_norm": 0.4944714903831482,
      "learning_rate": 0.00016542134141332623,
      "loss": 0.8733,
      "step": 6150
    },
    {
      "epoch": 1.7299044406970208,
      "grad_norm": 0.47435131669044495,
      "learning_rate": 0.000165347110393104,
      "loss": 0.8629,
      "step": 6155
    },
    {
      "epoch": 1.731309724564362,
      "grad_norm": 0.6226024627685547,
      "learning_rate": 0.00016527281647598881,
      "loss": 0.8616,
      "step": 6160
    },
    {
      "epoch": 1.7327150084317031,
      "grad_norm": 0.5335372090339661,
      "learning_rate": 0.00016519845973348888,
      "loss": 0.8588,
      "step": 6165
    },
    {
      "epoch": 1.7341202922990444,
      "grad_norm": 0.4744356870651245,
      "learning_rate": 0.00016512404023717294,
      "loss": 0.8632,
      "step": 6170
    },
    {
      "epoch": 1.7355255761663857,
      "grad_norm": 0.5560280084609985,
      "learning_rate": 0.0001650495580586701,
      "loss": 0.8601,
      "step": 6175
    },
    {
      "epoch": 1.7369308600337268,
      "grad_norm": 0.6050639152526855,
      "learning_rate": 0.00016497501326966974,
      "loss": 0.8506,
      "step": 6180
    },
    {
      "epoch": 1.738336143901068,
      "grad_norm": 0.5097574591636658,
      "learning_rate": 0.00016490040594192165,
      "loss": 0.8942,
      "step": 6185
    },
    {
      "epoch": 1.7397414277684091,
      "grad_norm": 1.0571644306182861,
      "learning_rate": 0.00016482573614723563,
      "loss": 0.8579,
      "step": 6190
    },
    {
      "epoch": 1.7411467116357504,
      "grad_norm": 0.5297767519950867,
      "learning_rate": 0.00016475100395748178,
      "loss": 0.8646,
      "step": 6195
    },
    {
      "epoch": 1.7425519955030917,
      "grad_norm": 0.6666551232337952,
      "learning_rate": 0.00016467620944459014,
      "loss": 0.8746,
      "step": 6200
    },
    {
      "epoch": 1.743957279370433,
      "grad_norm": 0.726667582988739,
      "learning_rate": 0.00016460135268055076,
      "loss": 0.8708,
      "step": 6205
    },
    {
      "epoch": 1.745362563237774,
      "grad_norm": 0.5243112444877625,
      "learning_rate": 0.00016452643373741365,
      "loss": 0.8806,
      "step": 6210
    },
    {
      "epoch": 1.7467678471051151,
      "grad_norm": 0.5628378391265869,
      "learning_rate": 0.0001644514526872886,
      "loss": 0.8617,
      "step": 6215
    },
    {
      "epoch": 1.7481731309724564,
      "grad_norm": 0.49396875500679016,
      "learning_rate": 0.00016437640960234525,
      "loss": 0.8651,
      "step": 6220
    },
    {
      "epoch": 1.7495784148397977,
      "grad_norm": 0.7454749941825867,
      "learning_rate": 0.00016430130455481287,
      "loss": 0.8682,
      "step": 6225
    },
    {
      "epoch": 1.750983698707139,
      "grad_norm": 0.5046769976615906,
      "learning_rate": 0.00016422613761698046,
      "loss": 0.8449,
      "step": 6230
    },
    {
      "epoch": 1.75238898257448,
      "grad_norm": 0.5620967745780945,
      "learning_rate": 0.00016415090886119649,
      "loss": 0.9277,
      "step": 6235
    },
    {
      "epoch": 1.7537942664418211,
      "grad_norm": 0.5239649415016174,
      "learning_rate": 0.00016407561835986902,
      "loss": 0.9241,
      "step": 6240
    },
    {
      "epoch": 1.7551995503091624,
      "grad_norm": 0.5544788837432861,
      "learning_rate": 0.00016400026618546552,
      "loss": 0.8676,
      "step": 6245
    },
    {
      "epoch": 1.7566048341765037,
      "grad_norm": 0.5142987370491028,
      "learning_rate": 0.00016392485241051272,
      "loss": 0.8683,
      "step": 6250
    },
    {
      "epoch": 1.758010118043845,
      "grad_norm": 0.5113844275474548,
      "learning_rate": 0.00016384937710759681,
      "loss": 0.8682,
      "step": 6255
    },
    {
      "epoch": 1.759415401911186,
      "grad_norm": 0.6806747317314148,
      "learning_rate": 0.0001637738403493631,
      "loss": 0.8582,
      "step": 6260
    },
    {
      "epoch": 1.7608206857785271,
      "grad_norm": 0.688289225101471,
      "learning_rate": 0.00016369824220851604,
      "loss": 0.9636,
      "step": 6265
    },
    {
      "epoch": 1.7622259696458684,
      "grad_norm": 0.817633330821991,
      "learning_rate": 0.00016362258275781917,
      "loss": 0.8833,
      "step": 6270
    },
    {
      "epoch": 1.7636312535132097,
      "grad_norm": 0.7489690184593201,
      "learning_rate": 0.00016354686207009511,
      "loss": 0.872,
      "step": 6275
    },
    {
      "epoch": 1.765036537380551,
      "grad_norm": 0.5069727301597595,
      "learning_rate": 0.0001634710802182253,
      "loss": 0.8629,
      "step": 6280
    },
    {
      "epoch": 1.766441821247892,
      "grad_norm": 0.48243850469589233,
      "learning_rate": 0.00016339523727515023,
      "loss": 0.8687,
      "step": 6285
    },
    {
      "epoch": 1.7678471051152331,
      "grad_norm": 0.5298514366149902,
      "learning_rate": 0.00016331933331386898,
      "loss": 0.855,
      "step": 6290
    },
    {
      "epoch": 1.7692523889825744,
      "grad_norm": 0.6091026067733765,
      "learning_rate": 0.00016324336840743947,
      "loss": 0.8624,
      "step": 6295
    },
    {
      "epoch": 1.7706576728499157,
      "grad_norm": 0.7348006367683411,
      "learning_rate": 0.00016316734262897834,
      "loss": 0.8552,
      "step": 6300
    },
    {
      "epoch": 1.772062956717257,
      "grad_norm": 0.5105456709861755,
      "learning_rate": 0.00016309125605166064,
      "loss": 0.871,
      "step": 6305
    },
    {
      "epoch": 1.773468240584598,
      "grad_norm": 0.5031980872154236,
      "learning_rate": 0.00016301510874872015,
      "loss": 0.8582,
      "step": 6310
    },
    {
      "epoch": 1.7748735244519391,
      "grad_norm": 0.7760841250419617,
      "learning_rate": 0.00016293890079344892,
      "loss": 0.8663,
      "step": 6315
    },
    {
      "epoch": 1.7762788083192804,
      "grad_norm": 0.6499359607696533,
      "learning_rate": 0.00016286263225919755,
      "loss": 0.8555,
      "step": 6320
    },
    {
      "epoch": 1.7776840921866217,
      "grad_norm": 0.4855884611606598,
      "learning_rate": 0.0001627863032193748,
      "loss": 0.8681,
      "step": 6325
    },
    {
      "epoch": 1.779089376053963,
      "grad_norm": 0.4679391384124756,
      "learning_rate": 0.00016270991374744766,
      "loss": 0.8635,
      "step": 6330
    },
    {
      "epoch": 1.780494659921304,
      "grad_norm": 0.4512339234352112,
      "learning_rate": 0.00016263346391694143,
      "loss": 0.8475,
      "step": 6335
    },
    {
      "epoch": 1.7818999437886454,
      "grad_norm": 0.7599973082542419,
      "learning_rate": 0.0001625569538014394,
      "loss": 0.8658,
      "step": 6340
    },
    {
      "epoch": 1.7833052276559864,
      "grad_norm": 0.4672722816467285,
      "learning_rate": 0.0001624803834745829,
      "loss": 0.8744,
      "step": 6345
    },
    {
      "epoch": 1.7847105115233277,
      "grad_norm": 0.6698594689369202,
      "learning_rate": 0.00016240375301007122,
      "loss": 0.8644,
      "step": 6350
    },
    {
      "epoch": 1.786115795390669,
      "grad_norm": 0.803933322429657,
      "learning_rate": 0.00016232706248166156,
      "loss": 0.8648,
      "step": 6355
    },
    {
      "epoch": 1.78752107925801,
      "grad_norm": 0.4994564950466156,
      "learning_rate": 0.00016225031196316885,
      "loss": 0.8635,
      "step": 6360
    },
    {
      "epoch": 1.7889263631253514,
      "grad_norm": 0.6590884327888489,
      "learning_rate": 0.00016217350152846586,
      "loss": 0.863,
      "step": 6365
    },
    {
      "epoch": 1.7903316469926924,
      "grad_norm": 0.5152565240859985,
      "learning_rate": 0.00016209663125148297,
      "loss": 0.8706,
      "step": 6370
    },
    {
      "epoch": 1.7917369308600337,
      "grad_norm": 0.607562780380249,
      "learning_rate": 0.00016201970120620818,
      "loss": 0.8638,
      "step": 6375
    },
    {
      "epoch": 1.793142214727375,
      "grad_norm": 0.5181170701980591,
      "learning_rate": 0.000161942711466687,
      "loss": 0.8551,
      "step": 6380
    },
    {
      "epoch": 1.7945474985947163,
      "grad_norm": 0.8388688564300537,
      "learning_rate": 0.00016186566210702244,
      "loss": 0.8834,
      "step": 6385
    },
    {
      "epoch": 1.7959527824620574,
      "grad_norm": 0.8339769244194031,
      "learning_rate": 0.0001617885532013748,
      "loss": 0.8562,
      "step": 6390
    },
    {
      "epoch": 1.7973580663293984,
      "grad_norm": 0.5623959302902222,
      "learning_rate": 0.0001617113848239618,
      "loss": 0.8492,
      "step": 6395
    },
    {
      "epoch": 1.7987633501967397,
      "grad_norm": 0.5428623557090759,
      "learning_rate": 0.00016163415704905835,
      "loss": 0.8539,
      "step": 6400
    },
    {
      "epoch": 1.800168634064081,
      "grad_norm": 0.5430036783218384,
      "learning_rate": 0.00016155686995099653,
      "loss": 0.8651,
      "step": 6405
    },
    {
      "epoch": 1.8015739179314223,
      "grad_norm": 0.6421025991439819,
      "learning_rate": 0.00016147952360416552,
      "loss": 0.8671,
      "step": 6410
    },
    {
      "epoch": 1.8029792017987634,
      "grad_norm": 0.44885459542274475,
      "learning_rate": 0.00016140211808301155,
      "loss": 0.8575,
      "step": 6415
    },
    {
      "epoch": 1.8043844856661044,
      "grad_norm": 0.4630917012691498,
      "learning_rate": 0.00016132465346203775,
      "loss": 0.8611,
      "step": 6420
    },
    {
      "epoch": 1.8057897695334457,
      "grad_norm": 0.473455011844635,
      "learning_rate": 0.00016124712981580426,
      "loss": 0.879,
      "step": 6425
    },
    {
      "epoch": 1.807195053400787,
      "grad_norm": 0.5540944933891296,
      "learning_rate": 0.00016116954721892785,
      "loss": 0.8766,
      "step": 6430
    },
    {
      "epoch": 1.8086003372681283,
      "grad_norm": 0.7242889404296875,
      "learning_rate": 0.00016109190574608215,
      "loss": 0.8432,
      "step": 6435
    },
    {
      "epoch": 1.8100056211354694,
      "grad_norm": 0.5389221906661987,
      "learning_rate": 0.00016101420547199745,
      "loss": 0.8511,
      "step": 6440
    },
    {
      "epoch": 1.8114109050028104,
      "grad_norm": 0.48298758268356323,
      "learning_rate": 0.0001609364464714606,
      "loss": 0.8775,
      "step": 6445
    },
    {
      "epoch": 1.8128161888701517,
      "grad_norm": 0.595059335231781,
      "learning_rate": 0.00016085862881931508,
      "loss": 0.8528,
      "step": 6450
    },
    {
      "epoch": 1.814221472737493,
      "grad_norm": 0.47923505306243896,
      "learning_rate": 0.0001607807525904606,
      "loss": 0.8674,
      "step": 6455
    },
    {
      "epoch": 1.8156267566048343,
      "grad_norm": 0.7948163151741028,
      "learning_rate": 0.00016070281785985347,
      "loss": 0.8582,
      "step": 6460
    },
    {
      "epoch": 1.8170320404721754,
      "grad_norm": 0.676969051361084,
      "learning_rate": 0.0001606248247025062,
      "loss": 0.877,
      "step": 6465
    },
    {
      "epoch": 1.8184373243395164,
      "grad_norm": 0.6349411010742188,
      "learning_rate": 0.00016054677319348758,
      "loss": 0.8593,
      "step": 6470
    },
    {
      "epoch": 1.8198426082068577,
      "grad_norm": 0.4888991415500641,
      "learning_rate": 0.00016046866340792252,
      "loss": 0.8573,
      "step": 6475
    },
    {
      "epoch": 1.821247892074199,
      "grad_norm": 0.5025482773780823,
      "learning_rate": 0.00016039049542099207,
      "loss": 0.8644,
      "step": 6480
    },
    {
      "epoch": 1.8226531759415403,
      "grad_norm": 0.47073066234588623,
      "learning_rate": 0.00016031226930793326,
      "loss": 0.9275,
      "step": 6485
    },
    {
      "epoch": 1.8240584598088814,
      "grad_norm": 0.6219227910041809,
      "learning_rate": 0.0001602339851440391,
      "loss": 0.8498,
      "step": 6490
    },
    {
      "epoch": 1.8254637436762224,
      "grad_norm": 0.6504101157188416,
      "learning_rate": 0.00016015564300465843,
      "loss": 0.8494,
      "step": 6495
    },
    {
      "epoch": 1.8268690275435637,
      "grad_norm": 0.887282133102417,
      "learning_rate": 0.0001600772429651959,
      "loss": 0.857,
      "step": 6500
    },
    {
      "epoch": 1.828274311410905,
      "grad_norm": 0.9653581380844116,
      "learning_rate": 0.00015999878510111195,
      "loss": 0.858,
      "step": 6505
    },
    {
      "epoch": 1.8296795952782463,
      "grad_norm": 0.6541657447814941,
      "learning_rate": 0.00015992026948792267,
      "loss": 0.8747,
      "step": 6510
    },
    {
      "epoch": 1.8310848791455874,
      "grad_norm": 0.5337585210800171,
      "learning_rate": 0.00015984169620119959,
      "loss": 0.8528,
      "step": 6515
    },
    {
      "epoch": 1.8324901630129287,
      "grad_norm": 0.6224717497825623,
      "learning_rate": 0.00015976306531656994,
      "loss": 0.8653,
      "step": 6520
    },
    {
      "epoch": 1.8338954468802697,
      "grad_norm": 0.6390025019645691,
      "learning_rate": 0.00015968437690971627,
      "loss": 0.8612,
      "step": 6525
    },
    {
      "epoch": 1.835300730747611,
      "grad_norm": 0.6096919775009155,
      "learning_rate": 0.00015960563105637653,
      "loss": 0.8634,
      "step": 6530
    },
    {
      "epoch": 1.8367060146149523,
      "grad_norm": 0.8214162588119507,
      "learning_rate": 0.00015952682783234402,
      "loss": 0.8735,
      "step": 6535
    },
    {
      "epoch": 1.8381112984822934,
      "grad_norm": 0.7625783681869507,
      "learning_rate": 0.00015944796731346713,
      "loss": 0.8654,
      "step": 6540
    },
    {
      "epoch": 1.8395165823496347,
      "grad_norm": 0.5845365524291992,
      "learning_rate": 0.00015936904957564955,
      "loss": 0.8705,
      "step": 6545
    },
    {
      "epoch": 1.8409218662169757,
      "grad_norm": 0.5661296844482422,
      "learning_rate": 0.00015929007469484986,
      "loss": 0.851,
      "step": 6550
    },
    {
      "epoch": 1.842327150084317,
      "grad_norm": 0.5553953647613525,
      "learning_rate": 0.00015921104274708184,
      "loss": 0.8592,
      "step": 6555
    },
    {
      "epoch": 1.8437324339516583,
      "grad_norm": 0.46224939823150635,
      "learning_rate": 0.00015913195380841402,
      "loss": 0.8698,
      "step": 6560
    },
    {
      "epoch": 1.8451377178189996,
      "grad_norm": 0.5660232305526733,
      "learning_rate": 0.00015905280795496999,
      "loss": 0.8666,
      "step": 6565
    },
    {
      "epoch": 1.8465430016863407,
      "grad_norm": 0.5240206718444824,
      "learning_rate": 0.00015897360526292783,
      "loss": 0.8637,
      "step": 6570
    },
    {
      "epoch": 1.8479482855536817,
      "grad_norm": 0.4983304738998413,
      "learning_rate": 0.0001588943458085206,
      "loss": 0.8826,
      "step": 6575
    },
    {
      "epoch": 1.849353569421023,
      "grad_norm": 0.6366061568260193,
      "learning_rate": 0.00015881502966803588,
      "loss": 0.8531,
      "step": 6580
    },
    {
      "epoch": 1.8507588532883643,
      "grad_norm": 0.5946831703186035,
      "learning_rate": 0.0001587356569178158,
      "loss": 0.8601,
      "step": 6585
    },
    {
      "epoch": 1.8521641371557056,
      "grad_norm": 0.45793646574020386,
      "learning_rate": 0.000158656227634257,
      "loss": 0.8618,
      "step": 6590
    },
    {
      "epoch": 1.8535694210230467,
      "grad_norm": 0.5167106986045837,
      "learning_rate": 0.00015857674189381053,
      "loss": 0.8604,
      "step": 6595
    },
    {
      "epoch": 1.8549747048903877,
      "grad_norm": 0.5827748775482178,
      "learning_rate": 0.00015849719977298178,
      "loss": 0.8646,
      "step": 6600
    },
    {
      "epoch": 1.856379988757729,
      "grad_norm": 0.8314809799194336,
      "learning_rate": 0.00015841760134833042,
      "loss": 0.8668,
      "step": 6605
    },
    {
      "epoch": 1.8577852726250703,
      "grad_norm": 0.8763378858566284,
      "learning_rate": 0.00015833794669647025,
      "loss": 0.9796,
      "step": 6610
    },
    {
      "epoch": 1.8591905564924116,
      "grad_norm": 0.7995222210884094,
      "learning_rate": 0.0001582582358940693,
      "loss": 0.8516,
      "step": 6615
    },
    {
      "epoch": 1.8605958403597527,
      "grad_norm": 0.5486027598381042,
      "learning_rate": 0.00015817846901784952,
      "loss": 0.8541,
      "step": 6620
    },
    {
      "epoch": 1.8620011242270937,
      "grad_norm": 0.484632670879364,
      "learning_rate": 0.00015809864614458694,
      "loss": 0.8738,
      "step": 6625
    },
    {
      "epoch": 1.863406408094435,
      "grad_norm": 0.8042235374450684,
      "learning_rate": 0.00015801876735111142,
      "loss": 0.9362,
      "step": 6630
    },
    {
      "epoch": 1.8648116919617763,
      "grad_norm": 0.7505983710289001,
      "learning_rate": 0.0001579388327143067,
      "loss": 0.8639,
      "step": 6635
    },
    {
      "epoch": 1.8662169758291176,
      "grad_norm": 0.48971813917160034,
      "learning_rate": 0.00015785884231111016,
      "loss": 0.8397,
      "step": 6640
    },
    {
      "epoch": 1.8676222596964587,
      "grad_norm": 0.5034501552581787,
      "learning_rate": 0.00015777879621851302,
      "loss": 0.8685,
      "step": 6645
    },
    {
      "epoch": 1.8690275435637997,
      "grad_norm": 0.5677081346511841,
      "learning_rate": 0.00015769869451355995,
      "loss": 0.883,
      "step": 6650
    },
    {
      "epoch": 1.870432827431141,
      "grad_norm": 0.5018690824508667,
      "learning_rate": 0.00015761853727334918,
      "loss": 0.8672,
      "step": 6655
    },
    {
      "epoch": 1.8718381112984823,
      "grad_norm": 0.45845654606819153,
      "learning_rate": 0.0001575383245750325,
      "loss": 0.8566,
      "step": 6660
    },
    {
      "epoch": 1.8732433951658236,
      "grad_norm": 0.7619211673736572,
      "learning_rate": 0.00015745805649581497,
      "loss": 0.8681,
      "step": 6665
    },
    {
      "epoch": 1.8746486790331647,
      "grad_norm": 0.45849132537841797,
      "learning_rate": 0.000157377733112955,
      "loss": 0.8668,
      "step": 6670
    },
    {
      "epoch": 1.8760539629005057,
      "grad_norm": 0.45092353224754333,
      "learning_rate": 0.0001572973545037641,
      "loss": 0.873,
      "step": 6675
    },
    {
      "epoch": 1.877459246767847,
      "grad_norm": 0.5022733807563782,
      "learning_rate": 0.0001572169207456072,
      "loss": 0.8598,
      "step": 6680
    },
    {
      "epoch": 1.8788645306351883,
      "grad_norm": 0.670704185962677,
      "learning_rate": 0.00015713643191590213,
      "loss": 0.8547,
      "step": 6685
    },
    {
      "epoch": 1.8802698145025296,
      "grad_norm": 0.5759883522987366,
      "learning_rate": 0.00015705588809211967,
      "loss": 0.8592,
      "step": 6690
    },
    {
      "epoch": 1.8816750983698707,
      "grad_norm": 0.7777016162872314,
      "learning_rate": 0.00015697528935178372,
      "loss": 0.8674,
      "step": 6695
    },
    {
      "epoch": 1.883080382237212,
      "grad_norm": 0.5708914995193481,
      "learning_rate": 0.00015689463577247086,
      "loss": 0.8668,
      "step": 6700
    },
    {
      "epoch": 1.884485666104553,
      "grad_norm": 0.7172780632972717,
      "learning_rate": 0.00015681392743181058,
      "loss": 0.8627,
      "step": 6705
    },
    {
      "epoch": 1.8858909499718943,
      "grad_norm": 0.8145684003829956,
      "learning_rate": 0.00015673316440748499,
      "loss": 0.8605,
      "step": 6710
    },
    {
      "epoch": 1.8872962338392356,
      "grad_norm": 0.5138697624206543,
      "learning_rate": 0.0001566523467772289,
      "loss": 0.8655,
      "step": 6715
    },
    {
      "epoch": 1.8887015177065767,
      "grad_norm": 0.5821599364280701,
      "learning_rate": 0.00015657147461882963,
      "loss": 0.8761,
      "step": 6720
    },
    {
      "epoch": 1.890106801573918,
      "grad_norm": 0.5124241709709167,
      "learning_rate": 0.00015649054801012704,
      "loss": 0.8593,
      "step": 6725
    },
    {
      "epoch": 1.891512085441259,
      "grad_norm": 0.47895899415016174,
      "learning_rate": 0.00015640956702901336,
      "loss": 0.8492,
      "step": 6730
    },
    {
      "epoch": 1.8929173693086003,
      "grad_norm": 0.5329840779304504,
      "learning_rate": 0.00015632853175343305,
      "loss": 0.87,
      "step": 6735
    },
    {
      "epoch": 1.8943226531759416,
      "grad_norm": 0.5166814923286438,
      "learning_rate": 0.00015624744226138307,
      "loss": 0.8625,
      "step": 6740
    },
    {
      "epoch": 1.895727937043283,
      "grad_norm": 0.5631088614463806,
      "learning_rate": 0.00015616629863091235,
      "loss": 0.8552,
      "step": 6745
    },
    {
      "epoch": 1.897133220910624,
      "grad_norm": 3.2275400161743164,
      "learning_rate": 0.00015608510094012202,
      "loss": 0.9232,
      "step": 6750
    },
    {
      "epoch": 1.898538504777965,
      "grad_norm": 0.4875563085079193,
      "learning_rate": 0.00015600384926716524,
      "loss": 0.8574,
      "step": 6755
    },
    {
      "epoch": 1.8999437886453063,
      "grad_norm": 0.7080219388008118,
      "learning_rate": 0.00015592254369024714,
      "loss": 0.8426,
      "step": 6760
    },
    {
      "epoch": 1.9013490725126476,
      "grad_norm": 0.44725337624549866,
      "learning_rate": 0.00015584118428762467,
      "loss": 0.8487,
      "step": 6765
    },
    {
      "epoch": 1.902754356379989,
      "grad_norm": 0.4571017622947693,
      "learning_rate": 0.0001557597711376066,
      "loss": 0.8535,
      "step": 6770
    },
    {
      "epoch": 1.90415964024733,
      "grad_norm": 0.4571828246116638,
      "learning_rate": 0.00015567830431855353,
      "loss": 0.8665,
      "step": 6775
    },
    {
      "epoch": 1.905564924114671,
      "grad_norm": 0.49334484338760376,
      "learning_rate": 0.00015559678390887762,
      "loss": 0.8681,
      "step": 6780
    },
    {
      "epoch": 1.9069702079820123,
      "grad_norm": 0.5663411617279053,
      "learning_rate": 0.00015551520998704262,
      "loss": 0.8603,
      "step": 6785
    },
    {
      "epoch": 1.9083754918493536,
      "grad_norm": 0.4837425947189331,
      "learning_rate": 0.00015543358263156383,
      "loss": 0.8656,
      "step": 6790
    },
    {
      "epoch": 1.909780775716695,
      "grad_norm": 0.5288939476013184,
      "learning_rate": 0.00015535190192100793,
      "loss": 0.8806,
      "step": 6795
    },
    {
      "epoch": 1.911186059584036,
      "grad_norm": 0.4623594284057617,
      "learning_rate": 0.00015527016793399301,
      "loss": 0.8595,
      "step": 6800
    },
    {
      "epoch": 1.912591343451377,
      "grad_norm": 0.5188004374504089,
      "learning_rate": 0.00015518838074918834,
      "loss": 0.8453,
      "step": 6805
    },
    {
      "epoch": 1.9139966273187183,
      "grad_norm": 0.6675241589546204,
      "learning_rate": 0.00015510654044531452,
      "loss": 0.844,
      "step": 6810
    },
    {
      "epoch": 1.9154019111860596,
      "grad_norm": 0.590613603591919,
      "learning_rate": 0.00015502464710114323,
      "loss": 0.8849,
      "step": 6815
    },
    {
      "epoch": 1.916807195053401,
      "grad_norm": 0.4679904580116272,
      "learning_rate": 0.00015494270079549716,
      "loss": 0.9126,
      "step": 6820
    },
    {
      "epoch": 1.918212478920742,
      "grad_norm": 0.5950139760971069,
      "learning_rate": 0.00015486070160724994,
      "loss": 0.8569,
      "step": 6825
    },
    {
      "epoch": 1.919617762788083,
      "grad_norm": 0.5042142271995544,
      "learning_rate": 0.00015477864961532632,
      "loss": 0.8616,
      "step": 6830
    },
    {
      "epoch": 1.9210230466554243,
      "grad_norm": 0.5188416242599487,
      "learning_rate": 0.00015469654489870158,
      "loss": 0.8647,
      "step": 6835
    },
    {
      "epoch": 1.9224283305227656,
      "grad_norm": 0.5152612924575806,
      "learning_rate": 0.00015461438753640194,
      "loss": 0.8778,
      "step": 6840
    },
    {
      "epoch": 1.923833614390107,
      "grad_norm": 0.4704989492893219,
      "learning_rate": 0.00015453217760750426,
      "loss": 0.8673,
      "step": 6845
    },
    {
      "epoch": 1.925238898257448,
      "grad_norm": 0.6396019458770752,
      "learning_rate": 0.00015444991519113587,
      "loss": 0.8623,
      "step": 6850
    },
    {
      "epoch": 1.926644182124789,
      "grad_norm": 0.49166232347488403,
      "learning_rate": 0.00015436760036647483,
      "loss": 0.8774,
      "step": 6855
    },
    {
      "epoch": 1.9280494659921303,
      "grad_norm": 0.49419841170310974,
      "learning_rate": 0.00015428523321274953,
      "loss": 0.8567,
      "step": 6860
    },
    {
      "epoch": 1.9294547498594716,
      "grad_norm": 0.5003138184547424,
      "learning_rate": 0.00015420281380923868,
      "loss": 0.8531,
      "step": 6865
    },
    {
      "epoch": 1.930860033726813,
      "grad_norm": 0.5504321455955505,
      "learning_rate": 0.00015412034223527137,
      "loss": 0.8519,
      "step": 6870
    },
    {
      "epoch": 1.932265317594154,
      "grad_norm": 0.6040800213813782,
      "learning_rate": 0.00015403781857022684,
      "loss": 0.8731,
      "step": 6875
    },
    {
      "epoch": 1.9336706014614953,
      "grad_norm": 0.43906983733177185,
      "learning_rate": 0.00015395524289353452,
      "loss": 0.8671,
      "step": 6880
    },
    {
      "epoch": 1.9350758853288363,
      "grad_norm": 0.4836946427822113,
      "learning_rate": 0.00015387261528467384,
      "loss": 0.8584,
      "step": 6885
    },
    {
      "epoch": 1.9364811691961776,
      "grad_norm": 0.6498724222183228,
      "learning_rate": 0.00015378993582317428,
      "loss": 0.8575,
      "step": 6890
    },
    {
      "epoch": 1.937886453063519,
      "grad_norm": 0.5695022344589233,
      "learning_rate": 0.00015370720458861525,
      "loss": 0.8753,
      "step": 6895
    },
    {
      "epoch": 1.93929173693086,
      "grad_norm": 0.6768786907196045,
      "learning_rate": 0.00015362442166062587,
      "loss": 0.8501,
      "step": 6900
    },
    {
      "epoch": 1.9406970207982013,
      "grad_norm": 0.4813404977321625,
      "learning_rate": 0.0001535415871188851,
      "loss": 0.8499,
      "step": 6905
    },
    {
      "epoch": 1.9421023046655423,
      "grad_norm": 0.5628063082695007,
      "learning_rate": 0.00015345870104312154,
      "loss": 0.8837,
      "step": 6910
    },
    {
      "epoch": 1.9435075885328836,
      "grad_norm": 0.5575454831123352,
      "learning_rate": 0.0001533757635131135,
      "loss": 0.8557,
      "step": 6915
    },
    {
      "epoch": 1.944912872400225,
      "grad_norm": 0.5431692004203796,
      "learning_rate": 0.00015329277460868868,
      "loss": 0.8626,
      "step": 6920
    },
    {
      "epoch": 1.9463181562675662,
      "grad_norm": 0.5444419384002686,
      "learning_rate": 0.00015320973440972427,
      "loss": 0.8512,
      "step": 6925
    },
    {
      "epoch": 1.9477234401349073,
      "grad_norm": 0.6308559775352478,
      "learning_rate": 0.00015312664299614684,
      "loss": 0.8664,
      "step": 6930
    },
    {
      "epoch": 1.9491287240022483,
      "grad_norm": 0.5455043315887451,
      "learning_rate": 0.00015304350044793229,
      "loss": 0.8664,
      "step": 6935
    },
    {
      "epoch": 1.9505340078695896,
      "grad_norm": 0.609303891658783,
      "learning_rate": 0.0001529603068451057,
      "loss": 0.8577,
      "step": 6940
    },
    {
      "epoch": 1.951939291736931,
      "grad_norm": 0.5833176374435425,
      "learning_rate": 0.00015287706226774125,
      "loss": 0.9236,
      "step": 6945
    },
    {
      "epoch": 1.9533445756042722,
      "grad_norm": 0.8206747174263,
      "learning_rate": 0.00015279376679596228,
      "loss": 0.8897,
      "step": 6950
    },
    {
      "epoch": 1.9547498594716133,
      "grad_norm": 0.6312025189399719,
      "learning_rate": 0.00015271042050994104,
      "loss": 0.8609,
      "step": 6955
    },
    {
      "epoch": 1.9561551433389543,
      "grad_norm": 0.5285316109657288,
      "learning_rate": 0.0001526270234898987,
      "loss": 0.8649,
      "step": 6960
    },
    {
      "epoch": 1.9575604272062956,
      "grad_norm": 0.5742127299308777,
      "learning_rate": 0.0001525435758161053,
      "loss": 0.8497,
      "step": 6965
    },
    {
      "epoch": 1.958965711073637,
      "grad_norm": 0.5842658877372742,
      "learning_rate": 0.00015246007756887958,
      "loss": 0.8666,
      "step": 6970
    },
    {
      "epoch": 1.9603709949409782,
      "grad_norm": 0.4612257182598114,
      "learning_rate": 0.00015237652882858898,
      "loss": 0.8673,
      "step": 6975
    },
    {
      "epoch": 1.9617762788083193,
      "grad_norm": 0.5715500116348267,
      "learning_rate": 0.00015229292967564959,
      "loss": 0.8635,
      "step": 6980
    },
    {
      "epoch": 1.9631815626756604,
      "grad_norm": 0.5319962501525879,
      "learning_rate": 0.00015220928019052594,
      "loss": 0.8713,
      "step": 6985
    },
    {
      "epoch": 1.9645868465430016,
      "grad_norm": 0.5012961030006409,
      "learning_rate": 0.00015212558045373106,
      "loss": 0.8628,
      "step": 6990
    },
    {
      "epoch": 1.965992130410343,
      "grad_norm": 0.5424646139144897,
      "learning_rate": 0.00015204183054582632,
      "loss": 0.8636,
      "step": 6995
    },
    {
      "epoch": 1.9673974142776842,
      "grad_norm": 0.4767484664916992,
      "learning_rate": 0.0001519580305474214,
      "loss": 0.8557,
      "step": 7000
    },
    {
      "epoch": 1.9688026981450253,
      "grad_norm": 0.5038319826126099,
      "learning_rate": 0.00015187418053917416,
      "loss": 0.8672,
      "step": 7005
    },
    {
      "epoch": 1.9702079820123664,
      "grad_norm": 0.5484253764152527,
      "learning_rate": 0.00015179028060179062,
      "loss": 0.8588,
      "step": 7010
    },
    {
      "epoch": 1.9716132658797076,
      "grad_norm": 0.6408220529556274,
      "learning_rate": 0.00015170633081602487,
      "loss": 0.8636,
      "step": 7015
    },
    {
      "epoch": 1.973018549747049,
      "grad_norm": 0.6039834022521973,
      "learning_rate": 0.00015162233126267898,
      "loss": 0.8569,
      "step": 7020
    },
    {
      "epoch": 1.9744238336143902,
      "grad_norm": 0.6858595609664917,
      "learning_rate": 0.0001515382820226029,
      "loss": 0.8581,
      "step": 7025
    },
    {
      "epoch": 1.9758291174817313,
      "grad_norm": 0.5240831971168518,
      "learning_rate": 0.00015145418317669438,
      "loss": 0.8515,
      "step": 7030
    },
    {
      "epoch": 1.9772344013490724,
      "grad_norm": 0.5512087345123291,
      "learning_rate": 0.0001513700348058989,
      "loss": 0.8528,
      "step": 7035
    },
    {
      "epoch": 1.9786396852164136,
      "grad_norm": 0.5006879568099976,
      "learning_rate": 0.00015128583699120977,
      "loss": 0.8623,
      "step": 7040
    },
    {
      "epoch": 1.980044969083755,
      "grad_norm": 0.49987828731536865,
      "learning_rate": 0.0001512015898136677,
      "loss": 0.8451,
      "step": 7045
    },
    {
      "epoch": 1.9814502529510962,
      "grad_norm": 0.5027675628662109,
      "learning_rate": 0.00015111729335436097,
      "loss": 0.8563,
      "step": 7050
    },
    {
      "epoch": 1.9828555368184373,
      "grad_norm": 0.4932931959629059,
      "learning_rate": 0.00015103294769442535,
      "loss": 0.8731,
      "step": 7055
    },
    {
      "epoch": 1.9842608206857786,
      "grad_norm": 0.4976560175418854,
      "learning_rate": 0.00015094855291504391,
      "loss": 0.871,
      "step": 7060
    },
    {
      "epoch": 1.9856661045531196,
      "grad_norm": 0.4505634605884552,
      "learning_rate": 0.00015086410909744702,
      "loss": 0.8559,
      "step": 7065
    },
    {
      "epoch": 1.987071388420461,
      "grad_norm": 0.5050502419471741,
      "learning_rate": 0.0001507796163229122,
      "loss": 0.8527,
      "step": 7070
    },
    {
      "epoch": 1.9884766722878022,
      "grad_norm": 0.8051122426986694,
      "learning_rate": 0.00015069507467276418,
      "loss": 0.8406,
      "step": 7075
    },
    {
      "epoch": 1.9898819561551433,
      "grad_norm": 0.6766129732131958,
      "learning_rate": 0.00015061048422837468,
      "loss": 0.8581,
      "step": 7080
    },
    {
      "epoch": 1.9912872400224846,
      "grad_norm": 0.5113483667373657,
      "learning_rate": 0.00015052584507116234,
      "loss": 0.8441,
      "step": 7085
    },
    {
      "epoch": 1.9926925238898257,
      "grad_norm": 0.5204007029533386,
      "learning_rate": 0.0001504411572825928,
      "loss": 0.8424,
      "step": 7090
    },
    {
      "epoch": 1.994097807757167,
      "grad_norm": 0.7898670434951782,
      "learning_rate": 0.00015035642094417842,
      "loss": 0.8468,
      "step": 7095
    },
    {
      "epoch": 1.9955030916245082,
      "grad_norm": 0.5176621675491333,
      "learning_rate": 0.0001502716361374783,
      "loss": 0.8494,
      "step": 7100
    },
    {
      "epoch": 1.9969083754918495,
      "grad_norm": 0.6323757171630859,
      "learning_rate": 0.00015018680294409822,
      "loss": 0.8556,
      "step": 7105
    },
    {
      "epoch": 1.9983136593591906,
      "grad_norm": 1.1276917457580566,
      "learning_rate": 0.0001501019214456905,
      "loss": 0.8694,
      "step": 7110
    },
    {
      "epoch": 1.9997189432265317,
      "grad_norm": 0.4633052349090576,
      "learning_rate": 0.000150016991723954,
      "loss": 0.8691,
      "step": 7115
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.8701068758964539,
      "eval_runtime": 638.3523,
      "eval_samples_per_second": 7.045,
      "eval_steps_per_second": 0.587,
      "step": 7116
    },
    {
      "epoch": 2.001124227093873,
      "grad_norm": 0.5140894651412964,
      "learning_rate": 0.00014993201386063394,
      "loss": 0.8231,
      "step": 7120
    },
    {
      "epoch": 2.0025295109612142,
      "grad_norm": 0.4917040169239044,
      "learning_rate": 0.0001498469879375219,
      "loss": 0.8273,
      "step": 7125
    },
    {
      "epoch": 2.0039347948285555,
      "grad_norm": 0.5956901907920837,
      "learning_rate": 0.00014976191403645578,
      "loss": 0.8191,
      "step": 7130
    },
    {
      "epoch": 2.0053400786958964,
      "grad_norm": 0.46787455677986145,
      "learning_rate": 0.0001496767922393195,
      "loss": 0.8201,
      "step": 7135
    },
    {
      "epoch": 2.0067453625632377,
      "grad_norm": 0.4993847608566284,
      "learning_rate": 0.00014959162262804328,
      "loss": 0.8115,
      "step": 7140
    },
    {
      "epoch": 2.008150646430579,
      "grad_norm": 0.4391626715660095,
      "learning_rate": 0.00014950640528460317,
      "loss": 0.8185,
      "step": 7145
    },
    {
      "epoch": 2.0095559302979202,
      "grad_norm": 0.5829885005950928,
      "learning_rate": 0.00014942114029102132,
      "loss": 0.8289,
      "step": 7150
    },
    {
      "epoch": 2.0109612141652615,
      "grad_norm": 0.5417194962501526,
      "learning_rate": 0.00014933582772936569,
      "loss": 0.8132,
      "step": 7155
    },
    {
      "epoch": 2.0123664980326024,
      "grad_norm": 0.4647194445133209,
      "learning_rate": 0.00014925046768174988,
      "loss": 0.8201,
      "step": 7160
    },
    {
      "epoch": 2.0137717818999437,
      "grad_norm": 0.6761397123336792,
      "learning_rate": 0.00014916506023033348,
      "loss": 0.8059,
      "step": 7165
    },
    {
      "epoch": 2.015177065767285,
      "grad_norm": 0.5787038207054138,
      "learning_rate": 0.0001490796054573215,
      "loss": 0.8342,
      "step": 7170
    },
    {
      "epoch": 2.0165823496346262,
      "grad_norm": 0.4988895356655121,
      "learning_rate": 0.0001489941034449645,
      "loss": 0.8208,
      "step": 7175
    },
    {
      "epoch": 2.0179876335019675,
      "grad_norm": 0.4594341516494751,
      "learning_rate": 0.00014890855427555864,
      "loss": 0.8325,
      "step": 7180
    },
    {
      "epoch": 2.019392917369309,
      "grad_norm": 0.48250508308410645,
      "learning_rate": 0.00014882295803144536,
      "loss": 0.835,
      "step": 7185
    },
    {
      "epoch": 2.0207982012366497,
      "grad_norm": 0.5917899012565613,
      "learning_rate": 0.0001487373147950114,
      "loss": 0.8172,
      "step": 7190
    },
    {
      "epoch": 2.022203485103991,
      "grad_norm": 0.4929775297641754,
      "learning_rate": 0.00014865162464868878,
      "loss": 0.8457,
      "step": 7195
    },
    {
      "epoch": 2.0236087689713322,
      "grad_norm": 0.6094657182693481,
      "learning_rate": 0.0001485658876749547,
      "loss": 0.8277,
      "step": 7200
    },
    {
      "epoch": 2.0250140528386735,
      "grad_norm": 0.5238663554191589,
      "learning_rate": 0.00014848010395633135,
      "loss": 0.8271,
      "step": 7205
    },
    {
      "epoch": 2.026419336706015,
      "grad_norm": 0.6498709321022034,
      "learning_rate": 0.00014839427357538597,
      "loss": 0.8383,
      "step": 7210
    },
    {
      "epoch": 2.0278246205733557,
      "grad_norm": 0.8006538152694702,
      "learning_rate": 0.00014830839661473069,
      "loss": 0.8162,
      "step": 7215
    },
    {
      "epoch": 2.029229904440697,
      "grad_norm": 0.5685740113258362,
      "learning_rate": 0.00014822247315702245,
      "loss": 0.834,
      "step": 7220
    },
    {
      "epoch": 2.0306351883080382,
      "grad_norm": 0.5004500150680542,
      "learning_rate": 0.00014813650328496301,
      "loss": 0.8248,
      "step": 7225
    },
    {
      "epoch": 2.0320404721753795,
      "grad_norm": 0.8819432854652405,
      "learning_rate": 0.0001480504870812988,
      "loss": 0.8773,
      "step": 7230
    },
    {
      "epoch": 2.033445756042721,
      "grad_norm": 0.6650183200836182,
      "learning_rate": 0.0001479644246288207,
      "loss": 0.8298,
      "step": 7235
    },
    {
      "epoch": 2.0348510399100617,
      "grad_norm": 0.5287449359893799,
      "learning_rate": 0.0001478783160103643,
      "loss": 0.8332,
      "step": 7240
    },
    {
      "epoch": 2.036256323777403,
      "grad_norm": 0.5206481218338013,
      "learning_rate": 0.00014779216130880951,
      "loss": 0.82,
      "step": 7245
    },
    {
      "epoch": 2.0376616076447442,
      "grad_norm": 0.5504814982414246,
      "learning_rate": 0.00014770596060708065,
      "loss": 0.8312,
      "step": 7250
    },
    {
      "epoch": 2.0390668915120855,
      "grad_norm": 0.4870205223560333,
      "learning_rate": 0.00014761971398814626,
      "loss": 0.8228,
      "step": 7255
    },
    {
      "epoch": 2.040472175379427,
      "grad_norm": 0.5187703371047974,
      "learning_rate": 0.00014753342153501912,
      "loss": 0.8155,
      "step": 7260
    },
    {
      "epoch": 2.0418774592467677,
      "grad_norm": 0.6017442941665649,
      "learning_rate": 0.00014744708333075607,
      "loss": 0.8356,
      "step": 7265
    },
    {
      "epoch": 2.043282743114109,
      "grad_norm": 0.48812979459762573,
      "learning_rate": 0.0001473606994584581,
      "loss": 0.8223,
      "step": 7270
    },
    {
      "epoch": 2.0446880269814502,
      "grad_norm": 0.5613028407096863,
      "learning_rate": 0.0001472742700012701,
      "loss": 0.8292,
      "step": 7275
    },
    {
      "epoch": 2.0460933108487915,
      "grad_norm": 0.48770609498023987,
      "learning_rate": 0.00014718779504238068,
      "loss": 0.8303,
      "step": 7280
    },
    {
      "epoch": 2.047498594716133,
      "grad_norm": 0.6781899929046631,
      "learning_rate": 0.00014710127466502254,
      "loss": 0.8322,
      "step": 7285
    },
    {
      "epoch": 2.0489038785834737,
      "grad_norm": 0.7580632567405701,
      "learning_rate": 0.00014701470895247181,
      "loss": 0.8407,
      "step": 7290
    },
    {
      "epoch": 2.050309162450815,
      "grad_norm": 0.5731956362724304,
      "learning_rate": 0.00014692809798804847,
      "loss": 0.8423,
      "step": 7295
    },
    {
      "epoch": 2.0517144463181562,
      "grad_norm": 0.48143020272254944,
      "learning_rate": 0.00014684144185511596,
      "loss": 0.8206,
      "step": 7300
    },
    {
      "epoch": 2.0531197301854975,
      "grad_norm": 0.589863121509552,
      "learning_rate": 0.00014675474063708118,
      "loss": 0.8379,
      "step": 7305
    },
    {
      "epoch": 2.054525014052839,
      "grad_norm": 0.5739490985870361,
      "learning_rate": 0.00014666799441739444,
      "loss": 0.8297,
      "step": 7310
    },
    {
      "epoch": 2.0559302979201797,
      "grad_norm": 0.6172598600387573,
      "learning_rate": 0.00014658120327954937,
      "loss": 0.88,
      "step": 7315
    },
    {
      "epoch": 2.057335581787521,
      "grad_norm": 0.725379228591919,
      "learning_rate": 0.0001464943673070829,
      "loss": 0.8385,
      "step": 7320
    },
    {
      "epoch": 2.0587408656548623,
      "grad_norm": 0.7602442502975464,
      "learning_rate": 0.000146407486583575,
      "loss": 0.836,
      "step": 7325
    },
    {
      "epoch": 2.0601461495222035,
      "grad_norm": 0.6438000798225403,
      "learning_rate": 0.0001463205611926488,
      "loss": 0.8323,
      "step": 7330
    },
    {
      "epoch": 2.061551433389545,
      "grad_norm": 0.5296928286552429,
      "learning_rate": 0.00014623359121797034,
      "loss": 0.8391,
      "step": 7335
    },
    {
      "epoch": 2.0629567172568857,
      "grad_norm": 0.6987408399581909,
      "learning_rate": 0.00014614657674324864,
      "loss": 0.8278,
      "step": 7340
    },
    {
      "epoch": 2.064362001124227,
      "grad_norm": 0.5474652647972107,
      "learning_rate": 0.00014605951785223552,
      "loss": 0.84,
      "step": 7345
    },
    {
      "epoch": 2.0657672849915683,
      "grad_norm": 0.573154866695404,
      "learning_rate": 0.00014597241462872558,
      "loss": 0.8188,
      "step": 7350
    },
    {
      "epoch": 2.0671725688589095,
      "grad_norm": 0.6933932900428772,
      "learning_rate": 0.00014588526715655608,
      "loss": 0.8346,
      "step": 7355
    },
    {
      "epoch": 2.068577852726251,
      "grad_norm": 0.5106696486473083,
      "learning_rate": 0.00014579807551960683,
      "loss": 0.8275,
      "step": 7360
    },
    {
      "epoch": 2.069983136593592,
      "grad_norm": 0.48654860258102417,
      "learning_rate": 0.0001457108398018002,
      "loss": 0.8261,
      "step": 7365
    },
    {
      "epoch": 2.071388420460933,
      "grad_norm": 0.5113856196403503,
      "learning_rate": 0.00014562356008710094,
      "loss": 0.8258,
      "step": 7370
    },
    {
      "epoch": 2.0727937043282743,
      "grad_norm": 0.5285763144493103,
      "learning_rate": 0.00014553623645951623,
      "loss": 0.8463,
      "step": 7375
    },
    {
      "epoch": 2.0741989881956155,
      "grad_norm": 0.6509875059127808,
      "learning_rate": 0.00014544886900309537,
      "loss": 0.8306,
      "step": 7380
    },
    {
      "epoch": 2.075604272062957,
      "grad_norm": 0.5188117623329163,
      "learning_rate": 0.00014536145780193007,
      "loss": 0.8144,
      "step": 7385
    },
    {
      "epoch": 2.077009555930298,
      "grad_norm": 0.5361855626106262,
      "learning_rate": 0.0001452740029401539,
      "loss": 0.8233,
      "step": 7390
    },
    {
      "epoch": 2.078414839797639,
      "grad_norm": 0.4979911148548126,
      "learning_rate": 0.0001451865045019426,
      "loss": 0.8183,
      "step": 7395
    },
    {
      "epoch": 2.0798201236649803,
      "grad_norm": 0.5529736280441284,
      "learning_rate": 0.00014509896257151384,
      "loss": 0.8261,
      "step": 7400
    },
    {
      "epoch": 2.0812254075323215,
      "grad_norm": 0.6499199867248535,
      "learning_rate": 0.00014501137723312707,
      "loss": 0.8303,
      "step": 7405
    },
    {
      "epoch": 2.082630691399663,
      "grad_norm": 0.4609116017818451,
      "learning_rate": 0.00014492374857108365,
      "loss": 0.8321,
      "step": 7410
    },
    {
      "epoch": 2.084035975267004,
      "grad_norm": 0.4897962808609009,
      "learning_rate": 0.00014483607666972652,
      "loss": 0.8317,
      "step": 7415
    },
    {
      "epoch": 2.085441259134345,
      "grad_norm": 0.4982929825782776,
      "learning_rate": 0.0001447483616134403,
      "loss": 0.8786,
      "step": 7420
    },
    {
      "epoch": 2.0868465430016863,
      "grad_norm": 0.5008282661437988,
      "learning_rate": 0.00014466060348665116,
      "loss": 0.8353,
      "step": 7425
    },
    {
      "epoch": 2.0882518268690275,
      "grad_norm": 0.5852164626121521,
      "learning_rate": 0.00014457280237382665,
      "loss": 0.834,
      "step": 7430
    },
    {
      "epoch": 2.089657110736369,
      "grad_norm": 0.6624243855476379,
      "learning_rate": 0.00014448495835947577,
      "loss": 0.8287,
      "step": 7435
    },
    {
      "epoch": 2.09106239460371,
      "grad_norm": 0.583459734916687,
      "learning_rate": 0.0001443970715281488,
      "loss": 0.8281,
      "step": 7440
    },
    {
      "epoch": 2.092467678471051,
      "grad_norm": 0.5138252973556519,
      "learning_rate": 0.00014430914196443716,
      "loss": 0.8379,
      "step": 7445
    },
    {
      "epoch": 2.0938729623383923,
      "grad_norm": 0.5127986669540405,
      "learning_rate": 0.00014422116975297352,
      "loss": 0.8355,
      "step": 7450
    },
    {
      "epoch": 2.0952782462057336,
      "grad_norm": 0.4994426369667053,
      "learning_rate": 0.00014413315497843152,
      "loss": 0.8117,
      "step": 7455
    },
    {
      "epoch": 2.096683530073075,
      "grad_norm": 0.5604568719863892,
      "learning_rate": 0.00014404509772552579,
      "loss": 0.8552,
      "step": 7460
    },
    {
      "epoch": 2.098088813940416,
      "grad_norm": 0.5112202763557434,
      "learning_rate": 0.00014395699807901181,
      "loss": 0.8389,
      "step": 7465
    },
    {
      "epoch": 2.099494097807757,
      "grad_norm": 0.564951479434967,
      "learning_rate": 0.0001438688561236859,
      "loss": 0.8199,
      "step": 7470
    },
    {
      "epoch": 2.1008993816750983,
      "grad_norm": 0.5867136120796204,
      "learning_rate": 0.00014378067194438513,
      "loss": 0.8436,
      "step": 7475
    },
    {
      "epoch": 2.1023046655424396,
      "grad_norm": 0.4982098340988159,
      "learning_rate": 0.00014369244562598715,
      "loss": 0.8185,
      "step": 7480
    },
    {
      "epoch": 2.103709949409781,
      "grad_norm": 0.5959761738777161,
      "learning_rate": 0.00014360417725341017,
      "loss": 0.8435,
      "step": 7485
    },
    {
      "epoch": 2.105115233277122,
      "grad_norm": 0.5060386061668396,
      "learning_rate": 0.00014351586691161298,
      "loss": 0.8206,
      "step": 7490
    },
    {
      "epoch": 2.106520517144463,
      "grad_norm": 0.5006709694862366,
      "learning_rate": 0.0001434275146855946,
      "loss": 0.8228,
      "step": 7495
    },
    {
      "epoch": 2.1079258010118043,
      "grad_norm": 0.5913635492324829,
      "learning_rate": 0.0001433391206603945,
      "loss": 0.8263,
      "step": 7500
    },
    {
      "epoch": 2.1093310848791456,
      "grad_norm": 0.6296173334121704,
      "learning_rate": 0.00014325068492109235,
      "loss": 0.8259,
      "step": 7505
    },
    {
      "epoch": 2.110736368746487,
      "grad_norm": 0.5836814045906067,
      "learning_rate": 0.0001431622075528079,
      "loss": 0.8324,
      "step": 7510
    },
    {
      "epoch": 2.112141652613828,
      "grad_norm": 0.577974259853363,
      "learning_rate": 0.00014307368864070104,
      "loss": 0.842,
      "step": 7515
    },
    {
      "epoch": 2.1135469364811694,
      "grad_norm": 0.501258134841919,
      "learning_rate": 0.00014298512826997164,
      "loss": 0.8301,
      "step": 7520
    },
    {
      "epoch": 2.1149522203485103,
      "grad_norm": 0.5843653678894043,
      "learning_rate": 0.0001428965265258595,
      "loss": 0.8339,
      "step": 7525
    },
    {
      "epoch": 2.1163575042158516,
      "grad_norm": 0.584141194820404,
      "learning_rate": 0.00014280788349364413,
      "loss": 0.8257,
      "step": 7530
    },
    {
      "epoch": 2.117762788083193,
      "grad_norm": 0.6166936159133911,
      "learning_rate": 0.00014271919925864493,
      "loss": 0.8396,
      "step": 7535
    },
    {
      "epoch": 2.119168071950534,
      "grad_norm": 0.5409911870956421,
      "learning_rate": 0.0001426304739062208,
      "loss": 0.8152,
      "step": 7540
    },
    {
      "epoch": 2.120573355817875,
      "grad_norm": 0.48173296451568604,
      "learning_rate": 0.00014254170752177035,
      "loss": 0.8239,
      "step": 7545
    },
    {
      "epoch": 2.1219786396852163,
      "grad_norm": 0.5484293103218079,
      "learning_rate": 0.00014245290019073166,
      "loss": 0.8364,
      "step": 7550
    },
    {
      "epoch": 2.1233839235525576,
      "grad_norm": 0.6085915565490723,
      "learning_rate": 0.00014236405199858208,
      "loss": 0.8374,
      "step": 7555
    },
    {
      "epoch": 2.124789207419899,
      "grad_norm": 0.49279549717903137,
      "learning_rate": 0.00014227516303083856,
      "loss": 0.8442,
      "step": 7560
    },
    {
      "epoch": 2.12619449128724,
      "grad_norm": 0.4717954695224762,
      "learning_rate": 0.000142186233373057,
      "loss": 0.8289,
      "step": 7565
    },
    {
      "epoch": 2.1275997751545814,
      "grad_norm": 0.4982221722602844,
      "learning_rate": 0.0001420972631108327,
      "loss": 0.8382,
      "step": 7570
    },
    {
      "epoch": 2.1290050590219223,
      "grad_norm": 0.472332626581192,
      "learning_rate": 0.00014200825232979985,
      "loss": 0.8262,
      "step": 7575
    },
    {
      "epoch": 2.1304103428892636,
      "grad_norm": 0.6342632174491882,
      "learning_rate": 0.00014191920111563183,
      "loss": 0.8333,
      "step": 7580
    },
    {
      "epoch": 2.131815626756605,
      "grad_norm": 0.5856114029884338,
      "learning_rate": 0.0001418301095540408,
      "loss": 0.8315,
      "step": 7585
    },
    {
      "epoch": 2.133220910623946,
      "grad_norm": 0.505497932434082,
      "learning_rate": 0.00014174097773077778,
      "loss": 0.827,
      "step": 7590
    },
    {
      "epoch": 2.1346261944912874,
      "grad_norm": 0.6035593748092651,
      "learning_rate": 0.00014165180573163255,
      "loss": 0.8263,
      "step": 7595
    },
    {
      "epoch": 2.1360314783586283,
      "grad_norm": 0.5006420612335205,
      "learning_rate": 0.0001415625936424336,
      "loss": 0.8215,
      "step": 7600
    },
    {
      "epoch": 2.1374367622259696,
      "grad_norm": 0.5282971858978271,
      "learning_rate": 0.000141473341549048,
      "loss": 0.842,
      "step": 7605
    },
    {
      "epoch": 2.138842046093311,
      "grad_norm": 0.6301651000976562,
      "learning_rate": 0.00014138404953738124,
      "loss": 0.8265,
      "step": 7610
    },
    {
      "epoch": 2.140247329960652,
      "grad_norm": 0.5510733127593994,
      "learning_rate": 0.0001412947176933773,
      "loss": 0.8365,
      "step": 7615
    },
    {
      "epoch": 2.1416526138279934,
      "grad_norm": 0.5520345568656921,
      "learning_rate": 0.0001412053461030185,
      "loss": 0.8347,
      "step": 7620
    },
    {
      "epoch": 2.1430578976953343,
      "grad_norm": 0.5838735103607178,
      "learning_rate": 0.00014111593485232541,
      "loss": 0.8367,
      "step": 7625
    },
    {
      "epoch": 2.1444631815626756,
      "grad_norm": 0.5474026799201965,
      "learning_rate": 0.00014102648402735678,
      "loss": 0.8313,
      "step": 7630
    },
    {
      "epoch": 2.145868465430017,
      "grad_norm": 0.5024116635322571,
      "learning_rate": 0.0001409369937142094,
      "loss": 0.8667,
      "step": 7635
    },
    {
      "epoch": 2.147273749297358,
      "grad_norm": 0.996107280254364,
      "learning_rate": 0.00014084746399901818,
      "loss": 0.8323,
      "step": 7640
    },
    {
      "epoch": 2.1486790331646994,
      "grad_norm": 0.6177522540092468,
      "learning_rate": 0.00014075789496795576,
      "loss": 0.8362,
      "step": 7645
    },
    {
      "epoch": 2.1500843170320403,
      "grad_norm": 0.500321090221405,
      "learning_rate": 0.00014066828670723287,
      "loss": 0.824,
      "step": 7650
    },
    {
      "epoch": 2.1514896008993816,
      "grad_norm": 0.49639928340911865,
      "learning_rate": 0.0001405786393030978,
      "loss": 0.8277,
      "step": 7655
    },
    {
      "epoch": 2.152894884766723,
      "grad_norm": 0.5836513042449951,
      "learning_rate": 0.00014048895284183657,
      "loss": 0.8489,
      "step": 7660
    },
    {
      "epoch": 2.154300168634064,
      "grad_norm": 0.5478304624557495,
      "learning_rate": 0.0001403992274097729,
      "loss": 0.8407,
      "step": 7665
    },
    {
      "epoch": 2.1557054525014054,
      "grad_norm": 0.5731642842292786,
      "learning_rate": 0.00014030946309326784,
      "loss": 0.8414,
      "step": 7670
    },
    {
      "epoch": 2.1571107363687463,
      "grad_norm": 0.5691959261894226,
      "learning_rate": 0.00014021965997871994,
      "loss": 0.8802,
      "step": 7675
    },
    {
      "epoch": 2.1585160202360876,
      "grad_norm": 0.683435320854187,
      "learning_rate": 0.00014012981815256524,
      "loss": 0.829,
      "step": 7680
    },
    {
      "epoch": 2.159921304103429,
      "grad_norm": 0.5545250773429871,
      "learning_rate": 0.00014003993770127674,
      "loss": 0.8352,
      "step": 7685
    },
    {
      "epoch": 2.16132658797077,
      "grad_norm": 0.5581771731376648,
      "learning_rate": 0.00013995001871136494,
      "loss": 0.8342,
      "step": 7690
    },
    {
      "epoch": 2.1627318718381114,
      "grad_norm": 0.5403844714164734,
      "learning_rate": 0.00013986006126937716,
      "loss": 0.8255,
      "step": 7695
    },
    {
      "epoch": 2.1641371557054523,
      "grad_norm": 0.6160198450088501,
      "learning_rate": 0.0001397700654618979,
      "loss": 0.8862,
      "step": 7700
    },
    {
      "epoch": 2.1655424395727936,
      "grad_norm": 0.5250822901725769,
      "learning_rate": 0.00013968003137554855,
      "loss": 0.8255,
      "step": 7705
    },
    {
      "epoch": 2.166947723440135,
      "grad_norm": 0.49927449226379395,
      "learning_rate": 0.00013958995909698734,
      "loss": 0.8179,
      "step": 7710
    },
    {
      "epoch": 2.168353007307476,
      "grad_norm": 0.5784510374069214,
      "learning_rate": 0.0001394998487129092,
      "loss": 0.8234,
      "step": 7715
    },
    {
      "epoch": 2.1697582911748174,
      "grad_norm": 0.6177597641944885,
      "learning_rate": 0.0001394097003100458,
      "loss": 0.8204,
      "step": 7720
    },
    {
      "epoch": 2.1711635750421587,
      "grad_norm": 0.5223365426063538,
      "learning_rate": 0.00013931951397516543,
      "loss": 0.8366,
      "step": 7725
    },
    {
      "epoch": 2.1725688589094996,
      "grad_norm": 0.5713496208190918,
      "learning_rate": 0.0001392292897950728,
      "loss": 0.8324,
      "step": 7730
    },
    {
      "epoch": 2.173974142776841,
      "grad_norm": 0.4760042726993561,
      "learning_rate": 0.00013913902785660915,
      "loss": 0.8312,
      "step": 7735
    },
    {
      "epoch": 2.175379426644182,
      "grad_norm": 0.5172589421272278,
      "learning_rate": 0.00013904872824665196,
      "loss": 0.8131,
      "step": 7740
    },
    {
      "epoch": 2.1767847105115234,
      "grad_norm": 0.4978179633617401,
      "learning_rate": 0.00013895839105211504,
      "loss": 0.8284,
      "step": 7745
    },
    {
      "epoch": 2.1781899943788647,
      "grad_norm": 0.5030422806739807,
      "learning_rate": 0.00013886801635994836,
      "loss": 0.8335,
      "step": 7750
    },
    {
      "epoch": 2.1795952782462056,
      "grad_norm": 0.48498082160949707,
      "learning_rate": 0.00013877760425713795,
      "loss": 0.8212,
      "step": 7755
    },
    {
      "epoch": 2.181000562113547,
      "grad_norm": 0.5106037855148315,
      "learning_rate": 0.00013868715483070592,
      "loss": 0.8278,
      "step": 7760
    },
    {
      "epoch": 2.182405845980888,
      "grad_norm": 0.6524946689605713,
      "learning_rate": 0.0001385966681677102,
      "loss": 0.83,
      "step": 7765
    },
    {
      "epoch": 2.1838111298482294,
      "grad_norm": 0.5415175557136536,
      "learning_rate": 0.00013850614435524465,
      "loss": 0.8262,
      "step": 7770
    },
    {
      "epoch": 2.1852164137155707,
      "grad_norm": 0.5459450483322144,
      "learning_rate": 0.00013841558348043885,
      "loss": 0.8351,
      "step": 7775
    },
    {
      "epoch": 2.1866216975829116,
      "grad_norm": 0.7794672846794128,
      "learning_rate": 0.00013832498563045803,
      "loss": 0.8317,
      "step": 7780
    },
    {
      "epoch": 2.188026981450253,
      "grad_norm": 0.6935010552406311,
      "learning_rate": 0.0001382343508925031,
      "loss": 0.8486,
      "step": 7785
    },
    {
      "epoch": 2.189432265317594,
      "grad_norm": 0.7011950612068176,
      "learning_rate": 0.00013814367935381037,
      "loss": 0.8469,
      "step": 7790
    },
    {
      "epoch": 2.1908375491849355,
      "grad_norm": 0.49912673234939575,
      "learning_rate": 0.0001380529711016516,
      "loss": 0.8299,
      "step": 7795
    },
    {
      "epoch": 2.1922428330522767,
      "grad_norm": 0.6039685606956482,
      "learning_rate": 0.00013796222622333387,
      "loss": 0.8326,
      "step": 7800
    },
    {
      "epoch": 2.1936481169196176,
      "grad_norm": 0.48364749550819397,
      "learning_rate": 0.00013787144480619963,
      "loss": 0.8317,
      "step": 7805
    },
    {
      "epoch": 2.195053400786959,
      "grad_norm": 0.5642433762550354,
      "learning_rate": 0.00013778062693762632,
      "loss": 0.8197,
      "step": 7810
    },
    {
      "epoch": 2.1964586846543,
      "grad_norm": 0.5070891976356506,
      "learning_rate": 0.00013768977270502665,
      "loss": 0.8285,
      "step": 7815
    },
    {
      "epoch": 2.1978639685216415,
      "grad_norm": 0.6507462859153748,
      "learning_rate": 0.00013759888219584814,
      "loss": 0.8448,
      "step": 7820
    },
    {
      "epoch": 2.1992692523889827,
      "grad_norm": 0.5021531581878662,
      "learning_rate": 0.00013750795549757335,
      "loss": 0.8283,
      "step": 7825
    },
    {
      "epoch": 2.2006745362563236,
      "grad_norm": 0.5254793167114258,
      "learning_rate": 0.0001374169926977197,
      "loss": 0.8204,
      "step": 7830
    },
    {
      "epoch": 2.202079820123665,
      "grad_norm": 0.6183409690856934,
      "learning_rate": 0.0001373259938838392,
      "loss": 0.8293,
      "step": 7835
    },
    {
      "epoch": 2.203485103991006,
      "grad_norm": 0.5064858198165894,
      "learning_rate": 0.00013723495914351873,
      "loss": 0.848,
      "step": 7840
    },
    {
      "epoch": 2.2048903878583475,
      "grad_norm": 0.493991881608963,
      "learning_rate": 0.00013714388856437957,
      "loss": 0.8439,
      "step": 7845
    },
    {
      "epoch": 2.2062956717256887,
      "grad_norm": 0.685554027557373,
      "learning_rate": 0.0001370527822340776,
      "loss": 0.8522,
      "step": 7850
    },
    {
      "epoch": 2.2077009555930296,
      "grad_norm": 0.899387538433075,
      "learning_rate": 0.00013696164024030305,
      "loss": 0.8252,
      "step": 7855
    },
    {
      "epoch": 2.209106239460371,
      "grad_norm": 0.4570039212703705,
      "learning_rate": 0.00013687046267078055,
      "loss": 0.8402,
      "step": 7860
    },
    {
      "epoch": 2.210511523327712,
      "grad_norm": 0.47615373134613037,
      "learning_rate": 0.00013677924961326892,
      "loss": 0.8196,
      "step": 7865
    },
    {
      "epoch": 2.2119168071950535,
      "grad_norm": 0.5366038680076599,
      "learning_rate": 0.00013668800115556112,
      "loss": 0.8256,
      "step": 7870
    },
    {
      "epoch": 2.2133220910623947,
      "grad_norm": 0.49585074186325073,
      "learning_rate": 0.00013659671738548422,
      "loss": 0.83,
      "step": 7875
    },
    {
      "epoch": 2.214727374929736,
      "grad_norm": 0.6336633563041687,
      "learning_rate": 0.00013650539839089927,
      "loss": 0.8379,
      "step": 7880
    },
    {
      "epoch": 2.216132658797077,
      "grad_norm": 0.5373387932777405,
      "learning_rate": 0.00013641404425970122,
      "loss": 0.8177,
      "step": 7885
    },
    {
      "epoch": 2.217537942664418,
      "grad_norm": 0.6974838376045227,
      "learning_rate": 0.00013632265507981884,
      "loss": 0.832,
      "step": 7890
    },
    {
      "epoch": 2.2189432265317595,
      "grad_norm": 0.5168099403381348,
      "learning_rate": 0.00013623123093921464,
      "loss": 0.84,
      "step": 7895
    },
    {
      "epoch": 2.2203485103991007,
      "grad_norm": 0.519497275352478,
      "learning_rate": 0.00013613977192588475,
      "loss": 0.8298,
      "step": 7900
    },
    {
      "epoch": 2.2217537942664416,
      "grad_norm": 0.582837700843811,
      "learning_rate": 0.00013604827812785893,
      "loss": 0.8315,
      "step": 7905
    },
    {
      "epoch": 2.223159078133783,
      "grad_norm": 0.5562664866447449,
      "learning_rate": 0.00013595674963320036,
      "loss": 0.8313,
      "step": 7910
    },
    {
      "epoch": 2.224564362001124,
      "grad_norm": 0.4701671898365021,
      "learning_rate": 0.0001358651865300056,
      "loss": 0.834,
      "step": 7915
    },
    {
      "epoch": 2.2259696458684655,
      "grad_norm": 0.511581540107727,
      "learning_rate": 0.0001357735889064046,
      "loss": 0.8407,
      "step": 7920
    },
    {
      "epoch": 2.2273749297358068,
      "grad_norm": 0.4707708954811096,
      "learning_rate": 0.0001356819568505605,
      "loss": 0.8304,
      "step": 7925
    },
    {
      "epoch": 2.228780213603148,
      "grad_norm": 0.6949105858802795,
      "learning_rate": 0.0001355902904506695,
      "loss": 0.825,
      "step": 7930
    },
    {
      "epoch": 2.230185497470489,
      "grad_norm": 0.6631535291671753,
      "learning_rate": 0.00013549858979496103,
      "loss": 0.8237,
      "step": 7935
    },
    {
      "epoch": 2.23159078133783,
      "grad_norm": 0.6554431915283203,
      "learning_rate": 0.0001354068549716973,
      "loss": 0.8445,
      "step": 7940
    },
    {
      "epoch": 2.2329960652051715,
      "grad_norm": 0.7375605702400208,
      "learning_rate": 0.0001353150860691736,
      "loss": 0.8313,
      "step": 7945
    },
    {
      "epoch": 2.2344013490725128,
      "grad_norm": 0.6232590079307556,
      "learning_rate": 0.0001352232831757178,
      "loss": 0.8802,
      "step": 7950
    },
    {
      "epoch": 2.235806632939854,
      "grad_norm": 0.517485499382019,
      "learning_rate": 0.0001351314463796907,
      "loss": 0.8358,
      "step": 7955
    },
    {
      "epoch": 2.237211916807195,
      "grad_norm": 0.5201471447944641,
      "learning_rate": 0.0001350395757694856,
      "loss": 0.8418,
      "step": 7960
    },
    {
      "epoch": 2.238617200674536,
      "grad_norm": 0.6151214838027954,
      "learning_rate": 0.00013494767143352839,
      "loss": 0.8203,
      "step": 7965
    },
    {
      "epoch": 2.2400224845418775,
      "grad_norm": 0.650915801525116,
      "learning_rate": 0.00013485573346027737,
      "loss": 0.8394,
      "step": 7970
    },
    {
      "epoch": 2.2414277684092188,
      "grad_norm": 0.6588686108589172,
      "learning_rate": 0.00013476376193822333,
      "loss": 0.8874,
      "step": 7975
    },
    {
      "epoch": 2.24283305227656,
      "grad_norm": 0.5741196274757385,
      "learning_rate": 0.00013467175695588924,
      "loss": 0.8298,
      "step": 7980
    },
    {
      "epoch": 2.244238336143901,
      "grad_norm": 0.7275395393371582,
      "learning_rate": 0.00013457971860183034,
      "loss": 0.8242,
      "step": 7985
    },
    {
      "epoch": 2.245643620011242,
      "grad_norm": 0.5300001502037048,
      "learning_rate": 0.00013448764696463394,
      "loss": 0.8333,
      "step": 7990
    },
    {
      "epoch": 2.2470489038785835,
      "grad_norm": 0.7253971099853516,
      "learning_rate": 0.00013439554213291944,
      "loss": 0.8215,
      "step": 7995
    },
    {
      "epoch": 2.2484541877459248,
      "grad_norm": 0.8704935312271118,
      "learning_rate": 0.00013430340419533812,
      "loss": 0.8268,
      "step": 8000
    },
    {
      "epoch": 2.249859471613266,
      "grad_norm": 0.5288625955581665,
      "learning_rate": 0.0001342112332405732,
      "loss": 0.8351,
      "step": 8005
    },
    {
      "epoch": 2.251264755480607,
      "grad_norm": 0.6613404154777527,
      "learning_rate": 0.00013411902935733967,
      "loss": 0.8311,
      "step": 8010
    },
    {
      "epoch": 2.252670039347948,
      "grad_norm": 0.5405227541923523,
      "learning_rate": 0.00013402679263438416,
      "loss": 0.825,
      "step": 8015
    },
    {
      "epoch": 2.2540753232152895,
      "grad_norm": 0.6060507893562317,
      "learning_rate": 0.00013393452316048493,
      "loss": 0.8174,
      "step": 8020
    },
    {
      "epoch": 2.2554806070826308,
      "grad_norm": 0.7184122204780579,
      "learning_rate": 0.00013384222102445174,
      "loss": 0.8349,
      "step": 8025
    },
    {
      "epoch": 2.256885890949972,
      "grad_norm": 0.6472933292388916,
      "learning_rate": 0.0001337498863151259,
      "loss": 0.8354,
      "step": 8030
    },
    {
      "epoch": 2.2582911748173133,
      "grad_norm": 0.5747955441474915,
      "learning_rate": 0.00013365751912137997,
      "loss": 0.8286,
      "step": 8035
    },
    {
      "epoch": 2.259696458684654,
      "grad_norm": 0.5253928899765015,
      "learning_rate": 0.00013356511953211772,
      "loss": 0.8343,
      "step": 8040
    },
    {
      "epoch": 2.2611017425519955,
      "grad_norm": 0.6021010279655457,
      "learning_rate": 0.00013347268763627427,
      "loss": 0.8414,
      "step": 8045
    },
    {
      "epoch": 2.2625070264193368,
      "grad_norm": 0.5077487826347351,
      "learning_rate": 0.0001333802235228157,
      "loss": 0.849,
      "step": 8050
    },
    {
      "epoch": 2.263912310286678,
      "grad_norm": 0.6178883910179138,
      "learning_rate": 0.00013328772728073913,
      "loss": 0.8167,
      "step": 8055
    },
    {
      "epoch": 2.265317594154019,
      "grad_norm": 0.5458176732063293,
      "learning_rate": 0.00013319519899907266,
      "loss": 0.8406,
      "step": 8060
    },
    {
      "epoch": 2.26672287802136,
      "grad_norm": 0.5361412763595581,
      "learning_rate": 0.00013310263876687516,
      "loss": 0.8466,
      "step": 8065
    },
    {
      "epoch": 2.2681281618887015,
      "grad_norm": 0.5330038666725159,
      "learning_rate": 0.0001330100466732363,
      "loss": 0.8307,
      "step": 8070
    },
    {
      "epoch": 2.2695334457560428,
      "grad_norm": 0.4969392418861389,
      "learning_rate": 0.00013291742280727633,
      "loss": 0.8324,
      "step": 8075
    },
    {
      "epoch": 2.270938729623384,
      "grad_norm": 0.5226914286613464,
      "learning_rate": 0.00013282476725814618,
      "loss": 0.8258,
      "step": 8080
    },
    {
      "epoch": 2.2723440134907253,
      "grad_norm": 0.6179579496383667,
      "learning_rate": 0.00013273208011502729,
      "loss": 0.8349,
      "step": 8085
    },
    {
      "epoch": 2.273749297358066,
      "grad_norm": 0.5634477138519287,
      "learning_rate": 0.0001326393614671314,
      "loss": 0.844,
      "step": 8090
    },
    {
      "epoch": 2.2751545812254075,
      "grad_norm": 0.6256124973297119,
      "learning_rate": 0.00013254661140370063,
      "loss": 0.8242,
      "step": 8095
    },
    {
      "epoch": 2.2765598650927488,
      "grad_norm": 0.7047878503799438,
      "learning_rate": 0.00013245383001400738,
      "loss": 0.8308,
      "step": 8100
    },
    {
      "epoch": 2.27796514896009,
      "grad_norm": 0.6778944134712219,
      "learning_rate": 0.00013236101738735415,
      "loss": 0.8253,
      "step": 8105
    },
    {
      "epoch": 2.279370432827431,
      "grad_norm": 0.5102590918540955,
      "learning_rate": 0.0001322681736130735,
      "loss": 0.8302,
      "step": 8110
    },
    {
      "epoch": 2.280775716694772,
      "grad_norm": 0.6197868585586548,
      "learning_rate": 0.000132175298780528,
      "loss": 0.8562,
      "step": 8115
    },
    {
      "epoch": 2.2821810005621135,
      "grad_norm": 0.500946044921875,
      "learning_rate": 0.0001320823929791101,
      "loss": 0.831,
      "step": 8120
    },
    {
      "epoch": 2.2835862844294548,
      "grad_norm": 0.9998027086257935,
      "learning_rate": 0.00013198945629824206,
      "loss": 0.8666,
      "step": 8125
    },
    {
      "epoch": 2.284991568296796,
      "grad_norm": 0.4757716953754425,
      "learning_rate": 0.00013189648882737587,
      "loss": 0.8212,
      "step": 8130
    },
    {
      "epoch": 2.2863968521641373,
      "grad_norm": 0.5170575380325317,
      "learning_rate": 0.0001318034906559931,
      "loss": 0.8198,
      "step": 8135
    },
    {
      "epoch": 2.287802136031478,
      "grad_norm": 0.5440870523452759,
      "learning_rate": 0.000131710461873605,
      "loss": 0.8358,
      "step": 8140
    },
    {
      "epoch": 2.2892074198988195,
      "grad_norm": 0.5350499749183655,
      "learning_rate": 0.00013161740256975213,
      "loss": 0.8351,
      "step": 8145
    },
    {
      "epoch": 2.2906127037661608,
      "grad_norm": 0.563395082950592,
      "learning_rate": 0.00013152431283400455,
      "loss": 0.8113,
      "step": 8150
    },
    {
      "epoch": 2.292017987633502,
      "grad_norm": 0.528664231300354,
      "learning_rate": 0.00013143119275596147,
      "loss": 0.8172,
      "step": 8155
    },
    {
      "epoch": 2.2934232715008434,
      "grad_norm": 0.5736309885978699,
      "learning_rate": 0.00013133804242525149,
      "loss": 0.8333,
      "step": 8160
    },
    {
      "epoch": 2.294828555368184,
      "grad_norm": 0.5612114071846008,
      "learning_rate": 0.00013124486193153215,
      "loss": 0.8242,
      "step": 8165
    },
    {
      "epoch": 2.2962338392355255,
      "grad_norm": 0.5501384735107422,
      "learning_rate": 0.00013115165136449018,
      "loss": 0.8321,
      "step": 8170
    },
    {
      "epoch": 2.2976391231028668,
      "grad_norm": 0.4476981461048126,
      "learning_rate": 0.00013105841081384112,
      "loss": 0.8179,
      "step": 8175
    },
    {
      "epoch": 2.299044406970208,
      "grad_norm": 0.5093850493431091,
      "learning_rate": 0.00013096514036932942,
      "loss": 0.8278,
      "step": 8180
    },
    {
      "epoch": 2.3004496908375494,
      "grad_norm": 0.5036539435386658,
      "learning_rate": 0.00013087184012072834,
      "loss": 0.8367,
      "step": 8185
    },
    {
      "epoch": 2.30185497470489,
      "grad_norm": 0.48888805508613586,
      "learning_rate": 0.00013077851015783981,
      "loss": 0.8652,
      "step": 8190
    },
    {
      "epoch": 2.3032602585722315,
      "grad_norm": 0.5186904668807983,
      "learning_rate": 0.00013068515057049432,
      "loss": 0.8339,
      "step": 8195
    },
    {
      "epoch": 2.304665542439573,
      "grad_norm": 0.524110734462738,
      "learning_rate": 0.0001305917614485509,
      "loss": 0.8292,
      "step": 8200
    },
    {
      "epoch": 2.306070826306914,
      "grad_norm": 0.6958850622177124,
      "learning_rate": 0.00013049834288189703,
      "loss": 0.8248,
      "step": 8205
    },
    {
      "epoch": 2.3074761101742554,
      "grad_norm": 0.6316078305244446,
      "learning_rate": 0.00013040489496044848,
      "loss": 0.8177,
      "step": 8210
    },
    {
      "epoch": 2.308881394041596,
      "grad_norm": 0.590225100517273,
      "learning_rate": 0.0001303114177741493,
      "loss": 0.8442,
      "step": 8215
    },
    {
      "epoch": 2.3102866779089375,
      "grad_norm": 0.6090141534805298,
      "learning_rate": 0.00013021791141297175,
      "loss": 0.8388,
      "step": 8220
    },
    {
      "epoch": 2.311691961776279,
      "grad_norm": 0.4995424449443817,
      "learning_rate": 0.0001301243759669161,
      "loss": 0.8403,
      "step": 8225
    },
    {
      "epoch": 2.31309724564362,
      "grad_norm": 0.5057527422904968,
      "learning_rate": 0.00013003081152601062,
      "loss": 0.8797,
      "step": 8230
    },
    {
      "epoch": 2.3145025295109614,
      "grad_norm": 0.5079846382141113,
      "learning_rate": 0.00012993721818031157,
      "loss": 0.823,
      "step": 8235
    },
    {
      "epoch": 2.3159078133783026,
      "grad_norm": 0.510495126247406,
      "learning_rate": 0.00012984359601990293,
      "loss": 0.8365,
      "step": 8240
    },
    {
      "epoch": 2.3173130972456435,
      "grad_norm": 0.5368478298187256,
      "learning_rate": 0.0001297499451348965,
      "loss": 0.8303,
      "step": 8245
    },
    {
      "epoch": 2.318718381112985,
      "grad_norm": 0.7168470621109009,
      "learning_rate": 0.00012965626561543163,
      "loss": 0.8357,
      "step": 8250
    },
    {
      "epoch": 2.320123664980326,
      "grad_norm": 0.5443083047866821,
      "learning_rate": 0.00012956255755167532,
      "loss": 0.8393,
      "step": 8255
    },
    {
      "epoch": 2.3215289488476674,
      "grad_norm": 0.5011507272720337,
      "learning_rate": 0.000129468821033822,
      "loss": 0.8418,
      "step": 8260
    },
    {
      "epoch": 2.322934232715008,
      "grad_norm": 0.4719626307487488,
      "learning_rate": 0.00012937505615209354,
      "loss": 0.841,
      "step": 8265
    },
    {
      "epoch": 2.3243395165823495,
      "grad_norm": 0.5777681469917297,
      "learning_rate": 0.00012928126299673902,
      "loss": 0.8114,
      "step": 8270
    },
    {
      "epoch": 2.325744800449691,
      "grad_norm": 0.573501467704773,
      "learning_rate": 0.00012918744165803478,
      "loss": 0.8154,
      "step": 8275
    },
    {
      "epoch": 2.327150084317032,
      "grad_norm": 0.5786929130554199,
      "learning_rate": 0.0001290935922262843,
      "loss": 0.8295,
      "step": 8280
    },
    {
      "epoch": 2.3285553681843734,
      "grad_norm": 0.4540880620479584,
      "learning_rate": 0.0001289997147918181,
      "loss": 0.8205,
      "step": 8285
    },
    {
      "epoch": 2.3299606520517147,
      "grad_norm": 0.6141250133514404,
      "learning_rate": 0.00012890580944499363,
      "loss": 0.8146,
      "step": 8290
    },
    {
      "epoch": 2.3313659359190555,
      "grad_norm": 0.49410197138786316,
      "learning_rate": 0.0001288118762761952,
      "loss": 0.8392,
      "step": 8295
    },
    {
      "epoch": 2.332771219786397,
      "grad_norm": 0.568728506565094,
      "learning_rate": 0.00012871791537583398,
      "loss": 0.8272,
      "step": 8300
    },
    {
      "epoch": 2.334176503653738,
      "grad_norm": 0.5576150417327881,
      "learning_rate": 0.00012862392683434765,
      "loss": 0.833,
      "step": 8305
    },
    {
      "epoch": 2.3355817875210794,
      "grad_norm": 0.5507808327674866,
      "learning_rate": 0.0001285299107422007,
      "loss": 0.8801,
      "step": 8310
    },
    {
      "epoch": 2.33698707138842,
      "grad_norm": 0.4993041455745697,
      "learning_rate": 0.00012843586718988407,
      "loss": 0.835,
      "step": 8315
    },
    {
      "epoch": 2.3383923552557615,
      "grad_norm": 0.6031385064125061,
      "learning_rate": 0.00012834179626791504,
      "loss": 0.8278,
      "step": 8320
    },
    {
      "epoch": 2.339797639123103,
      "grad_norm": 0.5530450344085693,
      "learning_rate": 0.00012824769806683736,
      "loss": 0.8371,
      "step": 8325
    },
    {
      "epoch": 2.341202922990444,
      "grad_norm": 0.5847424268722534,
      "learning_rate": 0.0001281535726772209,
      "loss": 0.8263,
      "step": 8330
    },
    {
      "epoch": 2.3426082068577854,
      "grad_norm": 0.6169049739837646,
      "learning_rate": 0.00012805942018966185,
      "loss": 0.858,
      "step": 8335
    },
    {
      "epoch": 2.3440134907251267,
      "grad_norm": 0.5686418414115906,
      "learning_rate": 0.00012796524069478242,
      "loss": 0.819,
      "step": 8340
    },
    {
      "epoch": 2.3454187745924675,
      "grad_norm": 0.4670698940753937,
      "learning_rate": 0.00012787103428323074,
      "loss": 0.8771,
      "step": 8345
    },
    {
      "epoch": 2.346824058459809,
      "grad_norm": 0.6976507306098938,
      "learning_rate": 0.00012777680104568098,
      "loss": 0.8355,
      "step": 8350
    },
    {
      "epoch": 2.34822934232715,
      "grad_norm": 1.8245209455490112,
      "learning_rate": 0.000127682541072833,
      "loss": 0.884,
      "step": 8355
    },
    {
      "epoch": 2.3496346261944914,
      "grad_norm": 0.469552218914032,
      "learning_rate": 0.00012758825445541248,
      "loss": 0.8433,
      "step": 8360
    },
    {
      "epoch": 2.3510399100618327,
      "grad_norm": 0.5654560327529907,
      "learning_rate": 0.00012749394128417073,
      "loss": 0.8521,
      "step": 8365
    },
    {
      "epoch": 2.3524451939291735,
      "grad_norm": 0.5556405186653137,
      "learning_rate": 0.00012739960164988463,
      "loss": 0.8344,
      "step": 8370
    },
    {
      "epoch": 2.353850477796515,
      "grad_norm": 0.47403526306152344,
      "learning_rate": 0.00012730523564335645,
      "loss": 0.8248,
      "step": 8375
    },
    {
      "epoch": 2.355255761663856,
      "grad_norm": 0.6413717269897461,
      "learning_rate": 0.000127210843355414,
      "loss": 0.8386,
      "step": 8380
    },
    {
      "epoch": 2.3566610455311974,
      "grad_norm": 0.5786089301109314,
      "learning_rate": 0.0001271164248769102,
      "loss": 0.8139,
      "step": 8385
    },
    {
      "epoch": 2.3580663293985387,
      "grad_norm": 0.813075602054596,
      "learning_rate": 0.00012702198029872325,
      "loss": 0.8247,
      "step": 8390
    },
    {
      "epoch": 2.35947161326588,
      "grad_norm": 0.5730018019676208,
      "learning_rate": 0.0001269275097117566,
      "loss": 0.8141,
      "step": 8395
    },
    {
      "epoch": 2.360876897133221,
      "grad_norm": 0.6490610837936401,
      "learning_rate": 0.0001268330132069385,
      "loss": 0.8265,
      "step": 8400
    },
    {
      "epoch": 2.362282181000562,
      "grad_norm": 0.6267738938331604,
      "learning_rate": 0.00012673849087522238,
      "loss": 0.8281,
      "step": 8405
    },
    {
      "epoch": 2.3636874648679034,
      "grad_norm": 0.49770984053611755,
      "learning_rate": 0.0001266439428075863,
      "loss": 0.8287,
      "step": 8410
    },
    {
      "epoch": 2.3650927487352447,
      "grad_norm": 0.49454283714294434,
      "learning_rate": 0.00012654936909503334,
      "loss": 0.8916,
      "step": 8415
    },
    {
      "epoch": 2.3664980326025855,
      "grad_norm": 0.5877101421356201,
      "learning_rate": 0.00012645476982859103,
      "loss": 0.8296,
      "step": 8420
    },
    {
      "epoch": 2.367903316469927,
      "grad_norm": 0.49286267161369324,
      "learning_rate": 0.00012636014509931164,
      "loss": 0.8411,
      "step": 8425
    },
    {
      "epoch": 2.369308600337268,
      "grad_norm": 0.5770230889320374,
      "learning_rate": 0.00012626549499827192,
      "loss": 0.8398,
      "step": 8430
    },
    {
      "epoch": 2.3707138842046094,
      "grad_norm": 0.7743233442306519,
      "learning_rate": 0.00012617081961657293,
      "loss": 0.8281,
      "step": 8435
    },
    {
      "epoch": 2.3721191680719507,
      "grad_norm": 2.1253602504730225,
      "learning_rate": 0.0001260761190453403,
      "loss": 0.8958,
      "step": 8440
    },
    {
      "epoch": 2.373524451939292,
      "grad_norm": 0.5004022121429443,
      "learning_rate": 0.00012598139337572362,
      "loss": 0.8387,
      "step": 8445
    },
    {
      "epoch": 2.374929735806633,
      "grad_norm": 0.5295646786689758,
      "learning_rate": 0.00012588664269889694,
      "loss": 0.8281,
      "step": 8450
    },
    {
      "epoch": 2.376335019673974,
      "grad_norm": 0.5158722400665283,
      "learning_rate": 0.00012579186710605811,
      "loss": 0.8308,
      "step": 8455
    },
    {
      "epoch": 2.3777403035413154,
      "grad_norm": 0.6024075746536255,
      "learning_rate": 0.00012569706668842906,
      "loss": 0.8334,
      "step": 8460
    },
    {
      "epoch": 2.3791455874086567,
      "grad_norm": 0.5735334753990173,
      "learning_rate": 0.00012560224153725566,
      "loss": 0.8347,
      "step": 8465
    },
    {
      "epoch": 2.3805508712759975,
      "grad_norm": 0.641499400138855,
      "learning_rate": 0.00012550739174380758,
      "loss": 0.845,
      "step": 8470
    },
    {
      "epoch": 2.381956155143339,
      "grad_norm": 0.49807512760162354,
      "learning_rate": 0.00012541251739937813,
      "loss": 0.8269,
      "step": 8475
    },
    {
      "epoch": 2.38336143901068,
      "grad_norm": 0.48801207542419434,
      "learning_rate": 0.00012531761859528435,
      "loss": 0.8376,
      "step": 8480
    },
    {
      "epoch": 2.3847667228780214,
      "grad_norm": 0.6413435339927673,
      "learning_rate": 0.00012522269542286673,
      "loss": 0.8223,
      "step": 8485
    },
    {
      "epoch": 2.3861720067453627,
      "grad_norm": 0.49994173645973206,
      "learning_rate": 0.0001251277479734893,
      "loss": 0.8438,
      "step": 8490
    },
    {
      "epoch": 2.387577290612704,
      "grad_norm": 0.5298976302146912,
      "learning_rate": 0.00012503277633853937,
      "loss": 0.8348,
      "step": 8495
    },
    {
      "epoch": 2.388982574480045,
      "grad_norm": 0.5225727558135986,
      "learning_rate": 0.00012493778060942762,
      "loss": 0.8215,
      "step": 8500
    },
    {
      "epoch": 2.390387858347386,
      "grad_norm": 0.760673999786377,
      "learning_rate": 0.0001248427608775878,
      "loss": 0.8427,
      "step": 8505
    },
    {
      "epoch": 2.3917931422147274,
      "grad_norm": 0.8451589345932007,
      "learning_rate": 0.00012474771723447695,
      "loss": 0.8329,
      "step": 8510
    },
    {
      "epoch": 2.3931984260820687,
      "grad_norm": 0.5589327812194824,
      "learning_rate": 0.0001246526497715749,
      "loss": 0.8293,
      "step": 8515
    },
    {
      "epoch": 2.39460370994941,
      "grad_norm": 0.5081980228424072,
      "learning_rate": 0.0001245575585803846,
      "loss": 0.8299,
      "step": 8520
    },
    {
      "epoch": 2.396008993816751,
      "grad_norm": 0.46641793847084045,
      "learning_rate": 0.00012446244375243173,
      "loss": 0.8253,
      "step": 8525
    },
    {
      "epoch": 2.397414277684092,
      "grad_norm": 0.4882664680480957,
      "learning_rate": 0.00012436730537926472,
      "loss": 0.862,
      "step": 8530
    },
    {
      "epoch": 2.3988195615514334,
      "grad_norm": 0.5426138043403625,
      "learning_rate": 0.00012427214355245468,
      "loss": 0.8386,
      "step": 8535
    },
    {
      "epoch": 2.4002248454187747,
      "grad_norm": 0.5505515933036804,
      "learning_rate": 0.00012417695836359536,
      "loss": 0.8249,
      "step": 8540
    },
    {
      "epoch": 2.401630129286116,
      "grad_norm": 0.5572278499603271,
      "learning_rate": 0.00012408174990430291,
      "loss": 0.8337,
      "step": 8545
    },
    {
      "epoch": 2.403035413153457,
      "grad_norm": 0.531121551990509,
      "learning_rate": 0.00012398651826621591,
      "loss": 0.8207,
      "step": 8550
    },
    {
      "epoch": 2.404440697020798,
      "grad_norm": 0.5037003755569458,
      "learning_rate": 0.00012389126354099528,
      "loss": 0.8337,
      "step": 8555
    },
    {
      "epoch": 2.4058459808881394,
      "grad_norm": 0.4792149066925049,
      "learning_rate": 0.00012379598582032409,
      "loss": 0.8284,
      "step": 8560
    },
    {
      "epoch": 2.4072512647554807,
      "grad_norm": 0.714592456817627,
      "learning_rate": 0.00012370068519590755,
      "loss": 0.833,
      "step": 8565
    },
    {
      "epoch": 2.408656548622822,
      "grad_norm": 0.53053879737854,
      "learning_rate": 0.00012360536175947307,
      "loss": 0.8248,
      "step": 8570
    },
    {
      "epoch": 2.410061832490163,
      "grad_norm": 0.49217337369918823,
      "learning_rate": 0.00012351001560276974,
      "loss": 0.8436,
      "step": 8575
    },
    {
      "epoch": 2.411467116357504,
      "grad_norm": 0.505234956741333,
      "learning_rate": 0.0001234146468175688,
      "loss": 0.828,
      "step": 8580
    },
    {
      "epoch": 2.4128724002248454,
      "grad_norm": 0.5867990851402283,
      "learning_rate": 0.00012331925549566303,
      "loss": 0.8234,
      "step": 8585
    },
    {
      "epoch": 2.4142776840921867,
      "grad_norm": 0.49216267466545105,
      "learning_rate": 0.0001232238417288671,
      "loss": 0.8372,
      "step": 8590
    },
    {
      "epoch": 2.415682967959528,
      "grad_norm": 0.48326772451400757,
      "learning_rate": 0.00012312840560901715,
      "loss": 0.8278,
      "step": 8595
    },
    {
      "epoch": 2.4170882518268693,
      "grad_norm": 0.5994096398353577,
      "learning_rate": 0.00012303294722797094,
      "loss": 0.8541,
      "step": 8600
    },
    {
      "epoch": 2.41849353569421,
      "grad_norm": 0.7651786208152771,
      "learning_rate": 0.00012293746667760757,
      "loss": 0.843,
      "step": 8605
    },
    {
      "epoch": 2.4198988195615514,
      "grad_norm": 0.5067122578620911,
      "learning_rate": 0.00012284196404982746,
      "loss": 0.8182,
      "step": 8610
    },
    {
      "epoch": 2.4213041034288927,
      "grad_norm": 0.6482433676719666,
      "learning_rate": 0.00012274643943655238,
      "loss": 0.8396,
      "step": 8615
    },
    {
      "epoch": 2.422709387296234,
      "grad_norm": 0.5103670358657837,
      "learning_rate": 0.00012265089292972517,
      "loss": 0.8315,
      "step": 8620
    },
    {
      "epoch": 2.424114671163575,
      "grad_norm": 0.4920249283313751,
      "learning_rate": 0.00012255532462130984,
      "loss": 0.8371,
      "step": 8625
    },
    {
      "epoch": 2.425519955030916,
      "grad_norm": 0.5247959494590759,
      "learning_rate": 0.00012245973460329123,
      "loss": 0.8413,
      "step": 8630
    },
    {
      "epoch": 2.4269252388982574,
      "grad_norm": 0.5151497721672058,
      "learning_rate": 0.0001223641229676753,
      "loss": 0.8212,
      "step": 8635
    },
    {
      "epoch": 2.4283305227655987,
      "grad_norm": 0.4917905032634735,
      "learning_rate": 0.00012226848980648856,
      "loss": 0.8403,
      "step": 8640
    },
    {
      "epoch": 2.42973580663294,
      "grad_norm": 0.46560433506965637,
      "learning_rate": 0.00012217283521177844,
      "loss": 0.8267,
      "step": 8645
    },
    {
      "epoch": 2.4311410905002813,
      "grad_norm": 0.47513896226882935,
      "learning_rate": 0.00012207715927561288,
      "loss": 0.83,
      "step": 8650
    },
    {
      "epoch": 2.432546374367622,
      "grad_norm": 0.7692919373512268,
      "learning_rate": 0.00012198146209008046,
      "loss": 0.814,
      "step": 8655
    },
    {
      "epoch": 2.4339516582349634,
      "grad_norm": 0.5252566337585449,
      "learning_rate": 0.00012188574374729014,
      "loss": 0.8461,
      "step": 8660
    },
    {
      "epoch": 2.4353569421023047,
      "grad_norm": 0.5163432359695435,
      "learning_rate": 0.0001217900043393712,
      "loss": 0.8415,
      "step": 8665
    },
    {
      "epoch": 2.436762225969646,
      "grad_norm": 0.4713330566883087,
      "learning_rate": 0.00012169424395847335,
      "loss": 0.8203,
      "step": 8670
    },
    {
      "epoch": 2.438167509836987,
      "grad_norm": 0.5539212226867676,
      "learning_rate": 0.00012159846269676638,
      "loss": 0.8269,
      "step": 8675
    },
    {
      "epoch": 2.439572793704328,
      "grad_norm": 0.5295586585998535,
      "learning_rate": 0.00012150266064644013,
      "loss": 0.8373,
      "step": 8680
    },
    {
      "epoch": 2.4409780775716694,
      "grad_norm": 0.6764742136001587,
      "learning_rate": 0.0001214068378997046,
      "loss": 0.8362,
      "step": 8685
    },
    {
      "epoch": 2.4423833614390107,
      "grad_norm": 0.5514596700668335,
      "learning_rate": 0.00012131099454878952,
      "loss": 0.819,
      "step": 8690
    },
    {
      "epoch": 2.443788645306352,
      "grad_norm": 0.5967371463775635,
      "learning_rate": 0.00012121513068594463,
      "loss": 0.8281,
      "step": 8695
    },
    {
      "epoch": 2.4451939291736933,
      "grad_norm": 0.5824768543243408,
      "learning_rate": 0.0001211192464034393,
      "loss": 0.8334,
      "step": 8700
    },
    {
      "epoch": 2.446599213041034,
      "grad_norm": 0.5651732087135315,
      "learning_rate": 0.00012102334179356265,
      "loss": 0.8314,
      "step": 8705
    },
    {
      "epoch": 2.4480044969083754,
      "grad_norm": 0.4605449438095093,
      "learning_rate": 0.00012092741694862324,
      "loss": 0.8284,
      "step": 8710
    },
    {
      "epoch": 2.4494097807757167,
      "grad_norm": 0.6510748267173767,
      "learning_rate": 0.00012083147196094917,
      "loss": 0.8355,
      "step": 8715
    },
    {
      "epoch": 2.450815064643058,
      "grad_norm": 0.5636357069015503,
      "learning_rate": 0.000120735506922888,
      "loss": 0.8297,
      "step": 8720
    },
    {
      "epoch": 2.4522203485103993,
      "grad_norm": 0.5459914207458496,
      "learning_rate": 0.00012063952192680643,
      "loss": 0.8466,
      "step": 8725
    },
    {
      "epoch": 2.45362563237774,
      "grad_norm": 0.5210451483726501,
      "learning_rate": 0.00012054351706509055,
      "loss": 0.8223,
      "step": 8730
    },
    {
      "epoch": 2.4550309162450814,
      "grad_norm": 0.4928796589374542,
      "learning_rate": 0.00012044749243014539,
      "loss": 0.8372,
      "step": 8735
    },
    {
      "epoch": 2.4564362001124227,
      "grad_norm": 0.47832977771759033,
      "learning_rate": 0.00012035144811439516,
      "loss": 0.8335,
      "step": 8740
    },
    {
      "epoch": 2.457841483979764,
      "grad_norm": 0.46908292174339294,
      "learning_rate": 0.00012025538421028293,
      "loss": 0.835,
      "step": 8745
    },
    {
      "epoch": 2.4592467678471053,
      "grad_norm": 0.4670230448246002,
      "learning_rate": 0.00012015930081027065,
      "loss": 0.8326,
      "step": 8750
    },
    {
      "epoch": 2.4606520517144466,
      "grad_norm": 0.47103649377822876,
      "learning_rate": 0.00012006319800683905,
      "loss": 0.8179,
      "step": 8755
    },
    {
      "epoch": 2.4620573355817874,
      "grad_norm": 0.46885553002357483,
      "learning_rate": 0.0001199670758924875,
      "loss": 0.8179,
      "step": 8760
    },
    {
      "epoch": 2.4634626194491287,
      "grad_norm": 0.5258293151855469,
      "learning_rate": 0.00011987093455973397,
      "loss": 0.8278,
      "step": 8765
    },
    {
      "epoch": 2.46486790331647,
      "grad_norm": 0.47522857785224915,
      "learning_rate": 0.00011977477410111492,
      "loss": 0.8351,
      "step": 8770
    },
    {
      "epoch": 2.4662731871838113,
      "grad_norm": 0.8046366572380066,
      "learning_rate": 0.00011967859460918531,
      "loss": 0.8654,
      "step": 8775
    },
    {
      "epoch": 2.467678471051152,
      "grad_norm": 0.7628329396247864,
      "learning_rate": 0.00011958239617651825,
      "loss": 0.8776,
      "step": 8780
    },
    {
      "epoch": 2.4690837549184934,
      "grad_norm": 0.6719951629638672,
      "learning_rate": 0.00011948617889570522,
      "loss": 0.831,
      "step": 8785
    },
    {
      "epoch": 2.4704890387858347,
      "grad_norm": 0.4928390383720398,
      "learning_rate": 0.00011938994285935577,
      "loss": 0.8218,
      "step": 8790
    },
    {
      "epoch": 2.471894322653176,
      "grad_norm": 0.6428622603416443,
      "learning_rate": 0.00011929368816009752,
      "loss": 0.8246,
      "step": 8795
    },
    {
      "epoch": 2.4732996065205173,
      "grad_norm": 0.49304255843162537,
      "learning_rate": 0.00011919741489057612,
      "loss": 0.84,
      "step": 8800
    },
    {
      "epoch": 2.4747048903878586,
      "grad_norm": 0.4877597987651825,
      "learning_rate": 0.00011910112314345494,
      "loss": 0.8731,
      "step": 8805
    },
    {
      "epoch": 2.4761101742551994,
      "grad_norm": 0.5115801692008972,
      "learning_rate": 0.00011900481301141531,
      "loss": 0.8181,
      "step": 8810
    },
    {
      "epoch": 2.4775154581225407,
      "grad_norm": 0.5835259556770325,
      "learning_rate": 0.00011890848458715611,
      "loss": 0.8265,
      "step": 8815
    },
    {
      "epoch": 2.478920741989882,
      "grad_norm": 0.5181130170822144,
      "learning_rate": 0.00011881213796339392,
      "loss": 0.8356,
      "step": 8820
    },
    {
      "epoch": 2.4803260258572233,
      "grad_norm": 0.7785965800285339,
      "learning_rate": 0.00011871577323286285,
      "loss": 0.8406,
      "step": 8825
    },
    {
      "epoch": 2.481731309724564,
      "grad_norm": 0.5177497863769531,
      "learning_rate": 0.0001186193904883143,
      "loss": 0.8282,
      "step": 8830
    },
    {
      "epoch": 2.4831365935919054,
      "grad_norm": 0.5036835074424744,
      "learning_rate": 0.00011852298982251718,
      "loss": 0.8379,
      "step": 8835
    },
    {
      "epoch": 2.4845418774592467,
      "grad_norm": 0.5061383247375488,
      "learning_rate": 0.00011842657132825752,
      "loss": 0.8269,
      "step": 8840
    },
    {
      "epoch": 2.485947161326588,
      "grad_norm": 0.543797492980957,
      "learning_rate": 0.00011833013509833859,
      "loss": 0.8327,
      "step": 8845
    },
    {
      "epoch": 2.4873524451939293,
      "grad_norm": 0.48197653889656067,
      "learning_rate": 0.0001182336812255807,
      "loss": 0.8245,
      "step": 8850
    },
    {
      "epoch": 2.4887577290612706,
      "grad_norm": 0.7762760519981384,
      "learning_rate": 0.00011813720980282115,
      "loss": 0.8118,
      "step": 8855
    },
    {
      "epoch": 2.4901630129286114,
      "grad_norm": 0.5974884033203125,
      "learning_rate": 0.00011804072092291414,
      "loss": 0.8398,
      "step": 8860
    },
    {
      "epoch": 2.4915682967959527,
      "grad_norm": 0.5197319984436035,
      "learning_rate": 0.0001179442146787306,
      "loss": 0.8331,
      "step": 8865
    },
    {
      "epoch": 2.492973580663294,
      "grad_norm": 0.5598850846290588,
      "learning_rate": 0.00011784769116315825,
      "loss": 0.8247,
      "step": 8870
    },
    {
      "epoch": 2.4943788645306353,
      "grad_norm": 0.5612096786499023,
      "learning_rate": 0.00011775115046910148,
      "loss": 0.8329,
      "step": 8875
    },
    {
      "epoch": 2.4957841483979766,
      "grad_norm": 0.48368990421295166,
      "learning_rate": 0.00011765459268948111,
      "loss": 0.8363,
      "step": 8880
    },
    {
      "epoch": 2.4971894322653174,
      "grad_norm": 0.4993809163570404,
      "learning_rate": 0.00011755801791723442,
      "loss": 0.8422,
      "step": 8885
    },
    {
      "epoch": 2.4985947161326587,
      "grad_norm": 0.6153314113616943,
      "learning_rate": 0.00011746142624531509,
      "loss": 0.8329,
      "step": 8890
    },
    {
      "epoch": 2.5,
      "grad_norm": 0.6654748320579529,
      "learning_rate": 0.00011736481776669306,
      "loss": 0.8187,
      "step": 8895
    },
    {
      "epoch": 2.5014052838673413,
      "grad_norm": 0.9411234855651855,
      "learning_rate": 0.0001172681925743544,
      "loss": 0.841,
      "step": 8900
    },
    {
      "epoch": 2.5028105677346826,
      "grad_norm": 0.5395568013191223,
      "learning_rate": 0.00011717155076130133,
      "loss": 0.8348,
      "step": 8905
    },
    {
      "epoch": 2.504215851602024,
      "grad_norm": 0.6058316826820374,
      "learning_rate": 0.00011707489242055203,
      "loss": 0.8215,
      "step": 8910
    },
    {
      "epoch": 2.5056211354693647,
      "grad_norm": 0.5065362453460693,
      "learning_rate": 0.0001169782176451406,
      "loss": 0.8298,
      "step": 8915
    },
    {
      "epoch": 2.507026419336706,
      "grad_norm": 0.5494495034217834,
      "learning_rate": 0.00011688152652811692,
      "loss": 0.8375,
      "step": 8920
    },
    {
      "epoch": 2.5084317032040473,
      "grad_norm": 0.648129940032959,
      "learning_rate": 0.00011678481916254669,
      "loss": 0.8225,
      "step": 8925
    },
    {
      "epoch": 2.5098369870713886,
      "grad_norm": 0.5400100946426392,
      "learning_rate": 0.00011668809564151117,
      "loss": 0.8286,
      "step": 8930
    },
    {
      "epoch": 2.5112422709387294,
      "grad_norm": 0.5004889369010925,
      "learning_rate": 0.00011659135605810716,
      "loss": 0.8301,
      "step": 8935
    },
    {
      "epoch": 2.5126475548060707,
      "grad_norm": 0.4907006621360779,
      "learning_rate": 0.00011649460050544698,
      "loss": 0.8297,
      "step": 8940
    },
    {
      "epoch": 2.514052838673412,
      "grad_norm": 0.4957001507282257,
      "learning_rate": 0.00011639782907665828,
      "loss": 0.822,
      "step": 8945
    },
    {
      "epoch": 2.5154581225407533,
      "grad_norm": 0.555509090423584,
      "learning_rate": 0.00011630104186488405,
      "loss": 0.8272,
      "step": 8950
    },
    {
      "epoch": 2.5168634064080946,
      "grad_norm": 0.4936651587486267,
      "learning_rate": 0.00011620423896328234,
      "loss": 0.8339,
      "step": 8955
    },
    {
      "epoch": 2.518268690275436,
      "grad_norm": 0.515689492225647,
      "learning_rate": 0.00011610742046502648,
      "loss": 0.8274,
      "step": 8960
    },
    {
      "epoch": 2.5196739741427767,
      "grad_norm": 0.48193639516830444,
      "learning_rate": 0.00011601058646330463,
      "loss": 0.8298,
      "step": 8965
    },
    {
      "epoch": 2.521079258010118,
      "grad_norm": 0.5371465682983398,
      "learning_rate": 0.00011591373705132,
      "loss": 0.8789,
      "step": 8970
    },
    {
      "epoch": 2.5224845418774593,
      "grad_norm": 0.5975221395492554,
      "learning_rate": 0.00011581687232229062,
      "loss": 0.8154,
      "step": 8975
    },
    {
      "epoch": 2.5238898257448006,
      "grad_norm": 0.5806734561920166,
      "learning_rate": 0.0001157199923694492,
      "loss": 0.838,
      "step": 8980
    },
    {
      "epoch": 2.5252951096121414,
      "grad_norm": 0.5201011300086975,
      "learning_rate": 0.00011562309728604314,
      "loss": 0.8465,
      "step": 8985
    },
    {
      "epoch": 2.5267003934794827,
      "grad_norm": 0.45833903551101685,
      "learning_rate": 0.00011552618716533441,
      "loss": 0.8104,
      "step": 8990
    },
    {
      "epoch": 2.528105677346824,
      "grad_norm": 0.48683154582977295,
      "learning_rate": 0.00011542926210059944,
      "loss": 0.8418,
      "step": 8995
    },
    {
      "epoch": 2.5295109612141653,
      "grad_norm": 0.49923381209373474,
      "learning_rate": 0.00011533232218512904,
      "loss": 0.8247,
      "step": 9000
    },
    {
      "epoch": 2.5309162450815066,
      "grad_norm": 0.4724583625793457,
      "learning_rate": 0.00011523536751222836,
      "loss": 0.8475,
      "step": 9005
    },
    {
      "epoch": 2.532321528948848,
      "grad_norm": 0.4973272681236267,
      "learning_rate": 0.00011513839817521668,
      "loss": 0.8373,
      "step": 9010
    },
    {
      "epoch": 2.5337268128161887,
      "grad_norm": 0.5687633752822876,
      "learning_rate": 0.00011504141426742742,
      "loss": 0.8362,
      "step": 9015
    },
    {
      "epoch": 2.53513209668353,
      "grad_norm": 0.5235709547996521,
      "learning_rate": 0.00011494441588220808,
      "loss": 0.8448,
      "step": 9020
    },
    {
      "epoch": 2.5365373805508713,
      "grad_norm": 0.5844621062278748,
      "learning_rate": 0.00011484740311292002,
      "loss": 0.8388,
      "step": 9025
    },
    {
      "epoch": 2.5379426644182126,
      "grad_norm": 0.5028629302978516,
      "learning_rate": 0.0001147503760529385,
      "loss": 0.8249,
      "step": 9030
    },
    {
      "epoch": 2.5393479482855534,
      "grad_norm": 0.7003517746925354,
      "learning_rate": 0.00011465333479565248,
      "loss": 0.8491,
      "step": 9035
    },
    {
      "epoch": 2.5407532321528947,
      "grad_norm": 0.5840672254562378,
      "learning_rate": 0.00011455627943446461,
      "loss": 0.8363,
      "step": 9040
    },
    {
      "epoch": 2.542158516020236,
      "grad_norm": 0.6555740237236023,
      "learning_rate": 0.00011445921006279115,
      "loss": 0.8065,
      "step": 9045
    },
    {
      "epoch": 2.5435637998875773,
      "grad_norm": 0.5312934517860413,
      "learning_rate": 0.00011436212677406178,
      "loss": 0.8258,
      "step": 9050
    },
    {
      "epoch": 2.5449690837549186,
      "grad_norm": 0.49611037969589233,
      "learning_rate": 0.00011426502966171966,
      "loss": 0.8276,
      "step": 9055
    },
    {
      "epoch": 2.54637436762226,
      "grad_norm": 0.4570031464099884,
      "learning_rate": 0.00011416791881922115,
      "loss": 0.8228,
      "step": 9060
    },
    {
      "epoch": 2.547779651489601,
      "grad_norm": 0.5568541884422302,
      "learning_rate": 0.00011407079434003591,
      "loss": 0.8441,
      "step": 9065
    },
    {
      "epoch": 2.549184935356942,
      "grad_norm": 0.5997769832611084,
      "learning_rate": 0.00011397365631764669,
      "loss": 0.8447,
      "step": 9070
    },
    {
      "epoch": 2.5505902192242833,
      "grad_norm": 0.5078825354576111,
      "learning_rate": 0.00011387650484554928,
      "loss": 0.8321,
      "step": 9075
    },
    {
      "epoch": 2.5519955030916246,
      "grad_norm": 0.6241423487663269,
      "learning_rate": 0.00011377934001725243,
      "loss": 0.8353,
      "step": 9080
    },
    {
      "epoch": 2.5534007869589654,
      "grad_norm": 0.5224640369415283,
      "learning_rate": 0.00011368216192627773,
      "loss": 0.8359,
      "step": 9085
    },
    {
      "epoch": 2.5548060708263067,
      "grad_norm": 0.5141652822494507,
      "learning_rate": 0.00011358497066615951,
      "loss": 0.8559,
      "step": 9090
    },
    {
      "epoch": 2.556211354693648,
      "grad_norm": 0.4821389615535736,
      "learning_rate": 0.00011348776633044483,
      "loss": 0.8179,
      "step": 9095
    },
    {
      "epoch": 2.5576166385609893,
      "grad_norm": 0.5298710465431213,
      "learning_rate": 0.00011339054901269328,
      "loss": 0.8222,
      "step": 9100
    },
    {
      "epoch": 2.5590219224283306,
      "grad_norm": 0.5908434391021729,
      "learning_rate": 0.000113293318806477,
      "loss": 0.8311,
      "step": 9105
    },
    {
      "epoch": 2.560427206295672,
      "grad_norm": 0.6493870615959167,
      "learning_rate": 0.00011319607580538055,
      "loss": 0.8314,
      "step": 9110
    },
    {
      "epoch": 2.561832490163013,
      "grad_norm": 0.6728103756904602,
      "learning_rate": 0.00011309882010300068,
      "loss": 0.8411,
      "step": 9115
    },
    {
      "epoch": 2.563237774030354,
      "grad_norm": 0.8559635877609253,
      "learning_rate": 0.00011300155179294647,
      "loss": 0.828,
      "step": 9120
    },
    {
      "epoch": 2.5646430578976953,
      "grad_norm": 0.5684670209884644,
      "learning_rate": 0.00011290427096883914,
      "loss": 0.8408,
      "step": 9125
    },
    {
      "epoch": 2.5660483417650366,
      "grad_norm": 0.5541810393333435,
      "learning_rate": 0.0001128069777243119,
      "loss": 0.8161,
      "step": 9130
    },
    {
      "epoch": 2.567453625632378,
      "grad_norm": 0.7451156973838806,
      "learning_rate": 0.00011270967215300998,
      "loss": 0.8366,
      "step": 9135
    },
    {
      "epoch": 2.5688589094997187,
      "grad_norm": 0.5444964170455933,
      "learning_rate": 0.00011261235434859041,
      "loss": 0.8494,
      "step": 9140
    },
    {
      "epoch": 2.57026419336706,
      "grad_norm": 0.6006889343261719,
      "learning_rate": 0.00011251502440472206,
      "loss": 0.8353,
      "step": 9145
    },
    {
      "epoch": 2.5716694772344013,
      "grad_norm": 0.5542729496955872,
      "learning_rate": 0.00011241768241508537,
      "loss": 0.8183,
      "step": 9150
    },
    {
      "epoch": 2.5730747611017426,
      "grad_norm": 0.5589856505393982,
      "learning_rate": 0.00011232032847337252,
      "loss": 0.822,
      "step": 9155
    },
    {
      "epoch": 2.574480044969084,
      "grad_norm": 0.8560423254966736,
      "learning_rate": 0.00011222296267328711,
      "loss": 0.8365,
      "step": 9160
    },
    {
      "epoch": 2.575885328836425,
      "grad_norm": 0.5394550561904907,
      "learning_rate": 0.00011212558510854416,
      "loss": 0.8757,
      "step": 9165
    },
    {
      "epoch": 2.577290612703766,
      "grad_norm": 0.4776645600795746,
      "learning_rate": 0.00011202819587287001,
      "loss": 0.8282,
      "step": 9170
    },
    {
      "epoch": 2.5786958965711073,
      "grad_norm": 0.5669585466384888,
      "learning_rate": 0.00011193079506000226,
      "loss": 0.8186,
      "step": 9175
    },
    {
      "epoch": 2.5801011804384486,
      "grad_norm": 0.5108312964439392,
      "learning_rate": 0.00011183338276368964,
      "loss": 0.8433,
      "step": 9180
    },
    {
      "epoch": 2.58150646430579,
      "grad_norm": 0.5904492735862732,
      "learning_rate": 0.00011173595907769193,
      "loss": 0.8166,
      "step": 9185
    },
    {
      "epoch": 2.5829117481731307,
      "grad_norm": 0.8534032702445984,
      "learning_rate": 0.00011163852409577988,
      "loss": 0.8372,
      "step": 9190
    },
    {
      "epoch": 2.584317032040472,
      "grad_norm": 0.6443466544151306,
      "learning_rate": 0.0001115410779117351,
      "loss": 0.8292,
      "step": 9195
    },
    {
      "epoch": 2.5857223159078133,
      "grad_norm": 0.5002859234809875,
      "learning_rate": 0.00011144362061934996,
      "loss": 0.8433,
      "step": 9200
    },
    {
      "epoch": 2.5871275997751546,
      "grad_norm": 0.463712215423584,
      "learning_rate": 0.0001113461523124276,
      "loss": 0.8074,
      "step": 9205
    },
    {
      "epoch": 2.588532883642496,
      "grad_norm": 0.6767189502716064,
      "learning_rate": 0.00011124867308478167,
      "loss": 0.8422,
      "step": 9210
    },
    {
      "epoch": 2.589938167509837,
      "grad_norm": 0.6467483043670654,
      "learning_rate": 0.00011115118303023641,
      "loss": 0.8301,
      "step": 9215
    },
    {
      "epoch": 2.591343451377178,
      "grad_norm": 0.730329692363739,
      "learning_rate": 0.00011105368224262642,
      "loss": 0.8292,
      "step": 9220
    },
    {
      "epoch": 2.5927487352445193,
      "grad_norm": 0.6260877251625061,
      "learning_rate": 0.00011095617081579663,
      "loss": 0.8136,
      "step": 9225
    },
    {
      "epoch": 2.5941540191118606,
      "grad_norm": 0.5238600373268127,
      "learning_rate": 0.0001108586488436023,
      "loss": 0.8227,
      "step": 9230
    },
    {
      "epoch": 2.595559302979202,
      "grad_norm": 0.5659443736076355,
      "learning_rate": 0.00011076111641990874,
      "loss": 0.8446,
      "step": 9235
    },
    {
      "epoch": 2.5969645868465427,
      "grad_norm": 0.5455038547515869,
      "learning_rate": 0.00011066357363859135,
      "loss": 0.83,
      "step": 9240
    },
    {
      "epoch": 2.598369870713884,
      "grad_norm": 0.5853575468063354,
      "learning_rate": 0.00011056602059353549,
      "loss": 0.8279,
      "step": 9245
    },
    {
      "epoch": 2.5997751545812253,
      "grad_norm": 0.5086763501167297,
      "learning_rate": 0.00011046845737863643,
      "loss": 0.8341,
      "step": 9250
    },
    {
      "epoch": 2.6011804384485666,
      "grad_norm": 0.511115550994873,
      "learning_rate": 0.00011037088408779921,
      "loss": 0.8431,
      "step": 9255
    },
    {
      "epoch": 2.602585722315908,
      "grad_norm": 0.46000874042510986,
      "learning_rate": 0.00011027330081493858,
      "loss": 0.8333,
      "step": 9260
    },
    {
      "epoch": 2.603991006183249,
      "grad_norm": 0.5504966974258423,
      "learning_rate": 0.0001101757076539789,
      "loss": 0.8325,
      "step": 9265
    },
    {
      "epoch": 2.6053962900505905,
      "grad_norm": 0.4799230992794037,
      "learning_rate": 0.00011007810469885398,
      "loss": 0.811,
      "step": 9270
    },
    {
      "epoch": 2.6068015739179313,
      "grad_norm": 0.49556994438171387,
      "learning_rate": 0.00010998049204350714,
      "loss": 0.8431,
      "step": 9275
    },
    {
      "epoch": 2.6082068577852726,
      "grad_norm": 0.4762093722820282,
      "learning_rate": 0.00010988286978189099,
      "loss": 0.8237,
      "step": 9280
    },
    {
      "epoch": 2.609612141652614,
      "grad_norm": 0.5235040187835693,
      "learning_rate": 0.00010978523800796747,
      "loss": 0.8327,
      "step": 9285
    },
    {
      "epoch": 2.611017425519955,
      "grad_norm": 0.4617610573768616,
      "learning_rate": 0.00010968759681570755,
      "loss": 0.8218,
      "step": 9290
    },
    {
      "epoch": 2.612422709387296,
      "grad_norm": 0.5892154574394226,
      "learning_rate": 0.00010958994629909134,
      "loss": 0.8135,
      "step": 9295
    },
    {
      "epoch": 2.6138279932546373,
      "grad_norm": 1.155057668685913,
      "learning_rate": 0.00010949228655210788,
      "loss": 0.8801,
      "step": 9300
    },
    {
      "epoch": 2.6152332771219786,
      "grad_norm": 0.4898678958415985,
      "learning_rate": 0.00010939461766875519,
      "loss": 0.8188,
      "step": 9305
    },
    {
      "epoch": 2.61663856098932,
      "grad_norm": 0.8992647528648376,
      "learning_rate": 0.00010929693974303995,
      "loss": 0.8842,
      "step": 9310
    },
    {
      "epoch": 2.618043844856661,
      "grad_norm": 0.5119013786315918,
      "learning_rate": 0.00010919925286897765,
      "loss": 0.8279,
      "step": 9315
    },
    {
      "epoch": 2.6194491287240025,
      "grad_norm": 0.6281744241714478,
      "learning_rate": 0.00010910155714059235,
      "loss": 0.8224,
      "step": 9320
    },
    {
      "epoch": 2.6208544125913433,
      "grad_norm": 0.500538170337677,
      "learning_rate": 0.00010900385265191661,
      "loss": 0.8267,
      "step": 9325
    },
    {
      "epoch": 2.6222596964586846,
      "grad_norm": 0.5560206174850464,
      "learning_rate": 0.00010890613949699146,
      "loss": 0.8277,
      "step": 9330
    },
    {
      "epoch": 2.623664980326026,
      "grad_norm": 0.5452876687049866,
      "learning_rate": 0.0001088084177698663,
      "loss": 0.8385,
      "step": 9335
    },
    {
      "epoch": 2.625070264193367,
      "grad_norm": 0.5131661891937256,
      "learning_rate": 0.00010871068756459867,
      "loss": 0.8291,
      "step": 9340
    },
    {
      "epoch": 2.626475548060708,
      "grad_norm": 0.5180448889732361,
      "learning_rate": 0.0001086129489752544,
      "loss": 0.809,
      "step": 9345
    },
    {
      "epoch": 2.6278808319280493,
      "grad_norm": 0.4731772243976593,
      "learning_rate": 0.00010851520209590728,
      "loss": 0.8358,
      "step": 9350
    },
    {
      "epoch": 2.6292861157953906,
      "grad_norm": 0.4596666991710663,
      "learning_rate": 0.00010841744702063916,
      "loss": 0.8344,
      "step": 9355
    },
    {
      "epoch": 2.630691399662732,
      "grad_norm": 0.5088164806365967,
      "learning_rate": 0.00010831968384353974,
      "loss": 0.8811,
      "step": 9360
    },
    {
      "epoch": 2.632096683530073,
      "grad_norm": 0.5293578505516052,
      "learning_rate": 0.00010822191265870656,
      "loss": 0.8207,
      "step": 9365
    },
    {
      "epoch": 2.6335019673974145,
      "grad_norm": 0.5973764657974243,
      "learning_rate": 0.0001081241335602448,
      "loss": 0.8295,
      "step": 9370
    },
    {
      "epoch": 2.6349072512647553,
      "grad_norm": 0.48312100768089294,
      "learning_rate": 0.00010802634664226723,
      "loss": 0.8355,
      "step": 9375
    },
    {
      "epoch": 2.6363125351320966,
      "grad_norm": 0.8425654768943787,
      "learning_rate": 0.00010792855199889431,
      "loss": 0.8285,
      "step": 9380
    },
    {
      "epoch": 2.637717818999438,
      "grad_norm": 5.292347431182861,
      "learning_rate": 0.00010783074972425378,
      "loss": 0.8939,
      "step": 9385
    },
    {
      "epoch": 2.639123102866779,
      "grad_norm": 0.9256778359413147,
      "learning_rate": 0.00010773293991248079,
      "loss": 0.8325,
      "step": 9390
    },
    {
      "epoch": 2.64052838673412,
      "grad_norm": 0.5325646996498108,
      "learning_rate": 0.00010763512265771772,
      "loss": 0.8237,
      "step": 9395
    },
    {
      "epoch": 2.6419336706014613,
      "grad_norm": 0.5501137971878052,
      "learning_rate": 0.00010753729805411412,
      "loss": 0.8207,
      "step": 9400
    },
    {
      "epoch": 2.6433389544688026,
      "grad_norm": 0.6308431625366211,
      "learning_rate": 0.00010743946619582664,
      "loss": 0.8215,
      "step": 9405
    },
    {
      "epoch": 2.644744238336144,
      "grad_norm": 0.48350989818573,
      "learning_rate": 0.0001073416271770189,
      "loss": 0.8325,
      "step": 9410
    },
    {
      "epoch": 2.646149522203485,
      "grad_norm": 0.5262377262115479,
      "learning_rate": 0.0001072437810918614,
      "loss": 0.8252,
      "step": 9415
    },
    {
      "epoch": 2.6475548060708265,
      "grad_norm": 0.5404006838798523,
      "learning_rate": 0.00010714592803453139,
      "loss": 0.8764,
      "step": 9420
    },
    {
      "epoch": 2.648960089938168,
      "grad_norm": 0.4993666410446167,
      "learning_rate": 0.00010704806809921292,
      "loss": 0.8264,
      "step": 9425
    },
    {
      "epoch": 2.6503653738055086,
      "grad_norm": 0.5050360560417175,
      "learning_rate": 0.00010695020138009666,
      "loss": 0.8199,
      "step": 9430
    },
    {
      "epoch": 2.65177065767285,
      "grad_norm": 0.9669815301895142,
      "learning_rate": 0.00010685232797137976,
      "loss": 0.8458,
      "step": 9435
    },
    {
      "epoch": 2.653175941540191,
      "grad_norm": 1.6229816675186157,
      "learning_rate": 0.0001067544479672658,
      "loss": 0.8799,
      "step": 9440
    },
    {
      "epoch": 2.654581225407532,
      "grad_norm": 0.4925650954246521,
      "learning_rate": 0.00010665656146196475,
      "loss": 0.8317,
      "step": 9445
    },
    {
      "epoch": 2.6559865092748733,
      "grad_norm": 0.4756428599357605,
      "learning_rate": 0.00010655866854969278,
      "loss": 0.8261,
      "step": 9450
    },
    {
      "epoch": 2.6573917931422146,
      "grad_norm": 0.4711908996105194,
      "learning_rate": 0.00010646076932467232,
      "loss": 0.8339,
      "step": 9455
    },
    {
      "epoch": 2.658797077009556,
      "grad_norm": 0.49394097924232483,
      "learning_rate": 0.00010636286388113184,
      "loss": 0.8682,
      "step": 9460
    },
    {
      "epoch": 2.660202360876897,
      "grad_norm": 0.5166155695915222,
      "learning_rate": 0.00010626495231330568,
      "loss": 0.8346,
      "step": 9465
    },
    {
      "epoch": 2.6616076447442385,
      "grad_norm": 0.49899595975875854,
      "learning_rate": 0.0001061670347154343,
      "loss": 0.8214,
      "step": 9470
    },
    {
      "epoch": 2.66301292861158,
      "grad_norm": 0.477092444896698,
      "learning_rate": 0.00010606911118176372,
      "loss": 0.8332,
      "step": 9475
    },
    {
      "epoch": 2.6644182124789206,
      "grad_norm": 0.4610581696033478,
      "learning_rate": 0.00010597118180654584,
      "loss": 0.8328,
      "step": 9480
    },
    {
      "epoch": 2.665823496346262,
      "grad_norm": 0.5816370844841003,
      "learning_rate": 0.00010587324668403815,
      "loss": 0.8287,
      "step": 9485
    },
    {
      "epoch": 2.667228780213603,
      "grad_norm": 0.5491670966148376,
      "learning_rate": 0.00010577530590850362,
      "loss": 0.8295,
      "step": 9490
    },
    {
      "epoch": 2.6686340640809445,
      "grad_norm": 0.6526244282722473,
      "learning_rate": 0.00010567735957421072,
      "loss": 0.8422,
      "step": 9495
    },
    {
      "epoch": 2.6700393479482853,
      "grad_norm": 0.48390865325927734,
      "learning_rate": 0.00010557940777543323,
      "loss": 0.8362,
      "step": 9500
    },
    {
      "epoch": 2.6714446318156266,
      "grad_norm": 0.7073639631271362,
      "learning_rate": 0.0001054814506064502,
      "loss": 0.8279,
      "step": 9505
    },
    {
      "epoch": 2.672849915682968,
      "grad_norm": 0.5461817383766174,
      "learning_rate": 0.00010538348816154586,
      "loss": 0.811,
      "step": 9510
    },
    {
      "epoch": 2.674255199550309,
      "grad_norm": 0.7969082593917847,
      "learning_rate": 0.00010528552053500955,
      "loss": 0.8468,
      "step": 9515
    },
    {
      "epoch": 2.6756604834176505,
      "grad_norm": 0.5298776030540466,
      "learning_rate": 0.00010518754782113551,
      "loss": 0.8451,
      "step": 9520
    },
    {
      "epoch": 2.677065767284992,
      "grad_norm": 0.5770809054374695,
      "learning_rate": 0.00010508957011422292,
      "loss": 0.8535,
      "step": 9525
    },
    {
      "epoch": 2.6784710511523326,
      "grad_norm": 0.5948202610015869,
      "learning_rate": 0.0001049915875085758,
      "loss": 0.8389,
      "step": 9530
    },
    {
      "epoch": 2.679876335019674,
      "grad_norm": 0.6123007535934448,
      "learning_rate": 0.00010489360009850285,
      "loss": 0.8212,
      "step": 9535
    },
    {
      "epoch": 2.681281618887015,
      "grad_norm": 0.49327778816223145,
      "learning_rate": 0.0001047956079783174,
      "loss": 0.8323,
      "step": 9540
    },
    {
      "epoch": 2.6826869027543565,
      "grad_norm": 0.5005403161048889,
      "learning_rate": 0.00010469761124233731,
      "loss": 0.8243,
      "step": 9545
    },
    {
      "epoch": 2.6840921866216974,
      "grad_norm": 0.48121288418769836,
      "learning_rate": 0.00010459960998488489,
      "loss": 0.8327,
      "step": 9550
    },
    {
      "epoch": 2.6854974704890386,
      "grad_norm": 0.4431888163089752,
      "learning_rate": 0.0001045016043002868,
      "loss": 0.8167,
      "step": 9555
    },
    {
      "epoch": 2.68690275435638,
      "grad_norm": 0.5393797159194946,
      "learning_rate": 0.00010440359428287394,
      "loss": 0.8376,
      "step": 9560
    },
    {
      "epoch": 2.688308038223721,
      "grad_norm": 0.6236591339111328,
      "learning_rate": 0.00010430558002698145,
      "loss": 0.8155,
      "step": 9565
    },
    {
      "epoch": 2.6897133220910625,
      "grad_norm": 0.6783631443977356,
      "learning_rate": 0.00010420756162694847,
      "loss": 0.8437,
      "step": 9570
    },
    {
      "epoch": 2.691118605958404,
      "grad_norm": 0.6380444765090942,
      "learning_rate": 0.00010410953917711814,
      "loss": 0.8262,
      "step": 9575
    },
    {
      "epoch": 2.6925238898257446,
      "grad_norm": 0.5137963891029358,
      "learning_rate": 0.00010401151277183754,
      "loss": 0.8265,
      "step": 9580
    },
    {
      "epoch": 2.693929173693086,
      "grad_norm": 0.49073556065559387,
      "learning_rate": 0.00010391348250545754,
      "loss": 0.8279,
      "step": 9585
    },
    {
      "epoch": 2.695334457560427,
      "grad_norm": 0.5200812220573425,
      "learning_rate": 0.00010381544847233271,
      "loss": 0.8291,
      "step": 9590
    },
    {
      "epoch": 2.6967397414277685,
      "grad_norm": 0.47082236409187317,
      "learning_rate": 0.00010371741076682124,
      "loss": 0.8221,
      "step": 9595
    },
    {
      "epoch": 2.6981450252951094,
      "grad_norm": 0.47038206458091736,
      "learning_rate": 0.0001036193694832849,
      "loss": 0.8269,
      "step": 9600
    },
    {
      "epoch": 2.6995503091624506,
      "grad_norm": 0.5449730753898621,
      "learning_rate": 0.00010352132471608882,
      "loss": 0.8582,
      "step": 9605
    },
    {
      "epoch": 2.700955593029792,
      "grad_norm": 0.5254364013671875,
      "learning_rate": 0.00010342327655960162,
      "loss": 0.8257,
      "step": 9610
    },
    {
      "epoch": 2.7023608768971332,
      "grad_norm": 1.2105820178985596,
      "learning_rate": 0.00010332522510819504,
      "loss": 0.8759,
      "step": 9615
    },
    {
      "epoch": 2.7037661607644745,
      "grad_norm": 0.5029426217079163,
      "learning_rate": 0.00010322717045624411,
      "loss": 0.8339,
      "step": 9620
    },
    {
      "epoch": 2.705171444631816,
      "grad_norm": 0.5147092938423157,
      "learning_rate": 0.00010312911269812677,
      "loss": 0.8317,
      "step": 9625
    },
    {
      "epoch": 2.706576728499157,
      "grad_norm": 0.5242138504981995,
      "learning_rate": 0.00010303105192822418,
      "loss": 0.8138,
      "step": 9630
    },
    {
      "epoch": 2.707982012366498,
      "grad_norm": 0.49158915877342224,
      "learning_rate": 0.00010293298824092022,
      "loss": 0.8231,
      "step": 9635
    },
    {
      "epoch": 2.7093872962338392,
      "grad_norm": 0.5118553042411804,
      "learning_rate": 0.00010283492173060163,
      "loss": 0.8274,
      "step": 9640
    },
    {
      "epoch": 2.7107925801011805,
      "grad_norm": 0.4782494008541107,
      "learning_rate": 0.00010273685249165791,
      "loss": 0.8295,
      "step": 9645
    },
    {
      "epoch": 2.712197863968522,
      "grad_norm": 0.5195757150650024,
      "learning_rate": 0.00010263878061848115,
      "loss": 0.8161,
      "step": 9650
    },
    {
      "epoch": 2.7136031478358627,
      "grad_norm": 0.6431840658187866,
      "learning_rate": 0.00010254070620546594,
      "loss": 0.8534,
      "step": 9655
    },
    {
      "epoch": 2.715008431703204,
      "grad_norm": 0.5024916529655457,
      "learning_rate": 0.00010244262934700937,
      "loss": 0.828,
      "step": 9660
    },
    {
      "epoch": 2.7164137155705452,
      "grad_norm": 0.7182828187942505,
      "learning_rate": 0.0001023445501375109,
      "loss": 0.8114,
      "step": 9665
    },
    {
      "epoch": 2.7178189994378865,
      "grad_norm": 0.4836368262767792,
      "learning_rate": 0.00010224646867137217,
      "loss": 0.8295,
      "step": 9670
    },
    {
      "epoch": 2.719224283305228,
      "grad_norm": 0.5435917377471924,
      "learning_rate": 0.00010214838504299704,
      "loss": 0.8316,
      "step": 9675
    },
    {
      "epoch": 2.720629567172569,
      "grad_norm": 0.4549933671951294,
      "learning_rate": 0.0001020502993467915,
      "loss": 0.8427,
      "step": 9680
    },
    {
      "epoch": 2.72203485103991,
      "grad_norm": 0.4845207631587982,
      "learning_rate": 0.0001019522116771634,
      "loss": 0.8404,
      "step": 9685
    },
    {
      "epoch": 2.7234401349072512,
      "grad_norm": 0.4945378303527832,
      "learning_rate": 0.00010185412212852267,
      "loss": 0.834,
      "step": 9690
    },
    {
      "epoch": 2.7248454187745925,
      "grad_norm": 0.5254123210906982,
      "learning_rate": 0.00010175603079528088,
      "loss": 0.8175,
      "step": 9695
    },
    {
      "epoch": 2.726250702641934,
      "grad_norm": 0.47113460302352905,
      "learning_rate": 0.00010165793777185144,
      "loss": 0.8417,
      "step": 9700
    },
    {
      "epoch": 2.7276559865092747,
      "grad_norm": 0.5494838356971741,
      "learning_rate": 0.00010155984315264928,
      "loss": 0.83,
      "step": 9705
    },
    {
      "epoch": 2.729061270376616,
      "grad_norm": 0.5023055672645569,
      "learning_rate": 0.00010146174703209093,
      "loss": 0.8283,
      "step": 9710
    },
    {
      "epoch": 2.7304665542439572,
      "grad_norm": 0.5280230045318604,
      "learning_rate": 0.0001013636495045944,
      "loss": 0.8179,
      "step": 9715
    },
    {
      "epoch": 2.7318718381112985,
      "grad_norm": 0.5023404359817505,
      "learning_rate": 0.00010126555066457895,
      "loss": 0.8158,
      "step": 9720
    },
    {
      "epoch": 2.73327712197864,
      "grad_norm": 0.6119568943977356,
      "learning_rate": 0.00010116745060646522,
      "loss": 0.8223,
      "step": 9725
    },
    {
      "epoch": 2.734682405845981,
      "grad_norm": 0.5735021829605103,
      "learning_rate": 0.00010106934942467492,
      "loss": 0.8365,
      "step": 9730
    },
    {
      "epoch": 2.736087689713322,
      "grad_norm": 0.7572410106658936,
      "learning_rate": 0.00010097124721363087,
      "loss": 0.8502,
      "step": 9735
    },
    {
      "epoch": 2.7374929735806632,
      "grad_norm": 0.5611612200737,
      "learning_rate": 0.00010087314406775699,
      "loss": 0.8203,
      "step": 9740
    },
    {
      "epoch": 2.7388982574480045,
      "grad_norm": 0.5417007207870483,
      "learning_rate": 0.00010077504008147791,
      "loss": 0.8276,
      "step": 9745
    },
    {
      "epoch": 2.740303541315346,
      "grad_norm": 0.5647456645965576,
      "learning_rate": 0.00010067693534921923,
      "loss": 0.8128,
      "step": 9750
    },
    {
      "epoch": 2.7417088251826867,
      "grad_norm": 0.5229110717773438,
      "learning_rate": 0.00010057882996540712,
      "loss": 0.8161,
      "step": 9755
    },
    {
      "epoch": 2.743114109050028,
      "grad_norm": 0.642594575881958,
      "learning_rate": 0.00010048072402446855,
      "loss": 0.8396,
      "step": 9760
    },
    {
      "epoch": 2.7445193929173692,
      "grad_norm": 0.641390860080719,
      "learning_rate": 0.00010038261762083092,
      "loss": 0.8289,
      "step": 9765
    },
    {
      "epoch": 2.7459246767847105,
      "grad_norm": 0.641538143157959,
      "learning_rate": 0.00010028451084892207,
      "loss": 0.8648,
      "step": 9770
    },
    {
      "epoch": 2.747329960652052,
      "grad_norm": 0.6418523192405701,
      "learning_rate": 0.00010018640380317024,
      "loss": 0.8176,
      "step": 9775
    },
    {
      "epoch": 2.748735244519393,
      "grad_norm": 0.5464898943901062,
      "learning_rate": 0.00010008829657800388,
      "loss": 0.8263,
      "step": 9780
    },
    {
      "epoch": 2.7501405283867344,
      "grad_norm": 0.5861994028091431,
      "learning_rate": 9.999018926785165e-05,
      "loss": 0.8119,
      "step": 9785
    },
    {
      "epoch": 2.7515458122540752,
      "grad_norm": 0.526378333568573,
      "learning_rate": 9.989208196714231e-05,
      "loss": 0.826,
      "step": 9790
    },
    {
      "epoch": 2.7529510961214165,
      "grad_norm": 0.46394699811935425,
      "learning_rate": 9.979397477030455e-05,
      "loss": 0.8186,
      "step": 9795
    },
    {
      "epoch": 2.754356379988758,
      "grad_norm": 0.6000794172286987,
      "learning_rate": 9.969586777176703e-05,
      "loss": 0.8306,
      "step": 9800
    },
    {
      "epoch": 2.7557616638560987,
      "grad_norm": 0.6603537797927856,
      "learning_rate": 9.959776106595817e-05,
      "loss": 0.8328,
      "step": 9805
    },
    {
      "epoch": 2.75716694772344,
      "grad_norm": 0.5899302363395691,
      "learning_rate": 9.949965474730614e-05,
      "loss": 0.8117,
      "step": 9810
    },
    {
      "epoch": 2.7585722315907812,
      "grad_norm": 0.5155988335609436,
      "learning_rate": 9.940154891023868e-05,
      "loss": 0.8241,
      "step": 9815
    },
    {
      "epoch": 2.7599775154581225,
      "grad_norm": 0.5121949315071106,
      "learning_rate": 9.930344364918315e-05,
      "loss": 0.8168,
      "step": 9820
    },
    {
      "epoch": 2.761382799325464,
      "grad_norm": 0.5585014820098877,
      "learning_rate": 9.920533905856632e-05,
      "loss": 0.827,
      "step": 9825
    },
    {
      "epoch": 2.762788083192805,
      "grad_norm": 0.5437676906585693,
      "learning_rate": 9.910723523281425e-05,
      "loss": 0.835,
      "step": 9830
    },
    {
      "epoch": 2.7641933670601464,
      "grad_norm": 0.4653185307979584,
      "learning_rate": 9.900913226635234e-05,
      "loss": 0.8105,
      "step": 9835
    },
    {
      "epoch": 2.7655986509274872,
      "grad_norm": 0.6214808821678162,
      "learning_rate": 9.891103025360519e-05,
      "loss": 0.8285,
      "step": 9840
    },
    {
      "epoch": 2.7670039347948285,
      "grad_norm": 0.5066220760345459,
      "learning_rate": 9.881292928899644e-05,
      "loss": 0.8395,
      "step": 9845
    },
    {
      "epoch": 2.76840921866217,
      "grad_norm": 0.4878048896789551,
      "learning_rate": 9.871482946694864e-05,
      "loss": 0.8348,
      "step": 9850
    },
    {
      "epoch": 2.769814502529511,
      "grad_norm": 0.4976538121700287,
      "learning_rate": 9.861673088188337e-05,
      "loss": 0.8206,
      "step": 9855
    },
    {
      "epoch": 2.771219786396852,
      "grad_norm": 0.47465085983276367,
      "learning_rate": 9.851863362822097e-05,
      "loss": 0.8196,
      "step": 9860
    },
    {
      "epoch": 2.7726250702641932,
      "grad_norm": 0.5739564895629883,
      "learning_rate": 9.842053780038046e-05,
      "loss": 0.8219,
      "step": 9865
    },
    {
      "epoch": 2.7740303541315345,
      "grad_norm": 0.5575153827667236,
      "learning_rate": 9.832244349277957e-05,
      "loss": 0.8424,
      "step": 9870
    },
    {
      "epoch": 2.775435637998876,
      "grad_norm": 0.4867574870586395,
      "learning_rate": 9.822435079983448e-05,
      "loss": 0.8182,
      "step": 9875
    },
    {
      "epoch": 2.776840921866217,
      "grad_norm": 0.6961890459060669,
      "learning_rate": 9.812625981595993e-05,
      "loss": 0.8261,
      "step": 9880
    },
    {
      "epoch": 2.7782462057335584,
      "grad_norm": 0.5481508374214172,
      "learning_rate": 9.802817063556882e-05,
      "loss": 0.8243,
      "step": 9885
    },
    {
      "epoch": 2.7796514896008993,
      "grad_norm": 0.5110167264938354,
      "learning_rate": 9.793008335307252e-05,
      "loss": 0.8308,
      "step": 9890
    },
    {
      "epoch": 2.7810567734682405,
      "grad_norm": 0.524135947227478,
      "learning_rate": 9.783199806288052e-05,
      "loss": 0.8249,
      "step": 9895
    },
    {
      "epoch": 2.782462057335582,
      "grad_norm": 0.5294224619865417,
      "learning_rate": 9.773391485940025e-05,
      "loss": 0.8272,
      "step": 9900
    },
    {
      "epoch": 2.783867341202923,
      "grad_norm": 0.5189291834831238,
      "learning_rate": 9.763583383703732e-05,
      "loss": 0.8342,
      "step": 9905
    },
    {
      "epoch": 2.785272625070264,
      "grad_norm": 0.5350440144538879,
      "learning_rate": 9.753775509019515e-05,
      "loss": 0.8275,
      "step": 9910
    },
    {
      "epoch": 2.7866779089376053,
      "grad_norm": 0.5991389751434326,
      "learning_rate": 9.7439678713275e-05,
      "loss": 0.8254,
      "step": 9915
    },
    {
      "epoch": 2.7880831928049465,
      "grad_norm": 0.5112239122390747,
      "learning_rate": 9.734160480067578e-05,
      "loss": 0.8376,
      "step": 9920
    },
    {
      "epoch": 2.789488476672288,
      "grad_norm": 0.5403845906257629,
      "learning_rate": 9.724353344679412e-05,
      "loss": 0.8288,
      "step": 9925
    },
    {
      "epoch": 2.790893760539629,
      "grad_norm": 0.5947050452232361,
      "learning_rate": 9.714546474602415e-05,
      "loss": 0.8397,
      "step": 9930
    },
    {
      "epoch": 2.7922990444069704,
      "grad_norm": 0.6373836994171143,
      "learning_rate": 9.704739879275742e-05,
      "loss": 0.8318,
      "step": 9935
    },
    {
      "epoch": 2.7937043282743113,
      "grad_norm": 0.5135063529014587,
      "learning_rate": 9.694933568138287e-05,
      "loss": 0.9294,
      "step": 9940
    },
    {
      "epoch": 2.7951096121416525,
      "grad_norm": 0.5365307331085205,
      "learning_rate": 9.68512755062867e-05,
      "loss": 0.8305,
      "step": 9945
    },
    {
      "epoch": 2.796514896008994,
      "grad_norm": 0.6203733086585999,
      "learning_rate": 9.675321836185231e-05,
      "loss": 0.8291,
      "step": 9950
    },
    {
      "epoch": 2.797920179876335,
      "grad_norm": 0.7198903560638428,
      "learning_rate": 9.665516434246005e-05,
      "loss": 0.8435,
      "step": 9955
    },
    {
      "epoch": 2.799325463743676,
      "grad_norm": 0.5159870982170105,
      "learning_rate": 9.655711354248745e-05,
      "loss": 0.8361,
      "step": 9960
    },
    {
      "epoch": 2.8007307476110173,
      "grad_norm": 0.47406426072120667,
      "learning_rate": 9.645906605630885e-05,
      "loss": 0.8475,
      "step": 9965
    },
    {
      "epoch": 2.8021360314783585,
      "grad_norm": 0.5789318680763245,
      "learning_rate": 9.636102197829536e-05,
      "loss": 0.824,
      "step": 9970
    },
    {
      "epoch": 2.8035413153457,
      "grad_norm": 0.5253451466560364,
      "learning_rate": 9.626298140281488e-05,
      "loss": 0.822,
      "step": 9975
    },
    {
      "epoch": 2.804946599213041,
      "grad_norm": 0.48433390259742737,
      "learning_rate": 9.61649444242319e-05,
      "loss": 0.8304,
      "step": 9980
    },
    {
      "epoch": 2.8063518830803824,
      "grad_norm": 0.4688815176486969,
      "learning_rate": 9.60669111369075e-05,
      "loss": 0.8241,
      "step": 9985
    },
    {
      "epoch": 2.8077571669477237,
      "grad_norm": 0.4556552767753601,
      "learning_rate": 9.596888163519912e-05,
      "loss": 0.8197,
      "step": 9990
    },
    {
      "epoch": 2.8091624508150645,
      "grad_norm": 0.6112373471260071,
      "learning_rate": 9.587085601346062e-05,
      "loss": 0.8722,
      "step": 9995
    },
    {
      "epoch": 2.810567734682406,
      "grad_norm": 0.4816291630268097,
      "learning_rate": 9.577283436604216e-05,
      "loss": 0.8149,
      "step": 10000
    },
    {
      "epoch": 2.811973018549747,
      "grad_norm": 0.49455633759498596,
      "learning_rate": 9.567481678728994e-05,
      "loss": 0.8923,
      "step": 10005
    },
    {
      "epoch": 2.8133783024170884,
      "grad_norm": 0.6647067070007324,
      "learning_rate": 9.557680337154635e-05,
      "loss": 0.8188,
      "step": 10010
    },
    {
      "epoch": 2.8147835862844293,
      "grad_norm": 0.4788486659526825,
      "learning_rate": 9.547879421314976e-05,
      "loss": 0.8133,
      "step": 10015
    },
    {
      "epoch": 2.8161888701517706,
      "grad_norm": 0.529371976852417,
      "learning_rate": 9.538078940643449e-05,
      "loss": 0.8359,
      "step": 10020
    },
    {
      "epoch": 2.817594154019112,
      "grad_norm": 0.4806664288043976,
      "learning_rate": 9.528278904573054e-05,
      "loss": 0.8163,
      "step": 10025
    },
    {
      "epoch": 2.818999437886453,
      "grad_norm": 0.600096583366394,
      "learning_rate": 9.518479322536372e-05,
      "loss": 0.832,
      "step": 10030
    },
    {
      "epoch": 2.8204047217537944,
      "grad_norm": 0.5037655830383301,
      "learning_rate": 9.508680203965549e-05,
      "loss": 0.8428,
      "step": 10035
    },
    {
      "epoch": 2.8218100056211357,
      "grad_norm": 0.4676769971847534,
      "learning_rate": 9.498881558292279e-05,
      "loss": 0.8422,
      "step": 10040
    },
    {
      "epoch": 2.8232152894884766,
      "grad_norm": 0.5020796060562134,
      "learning_rate": 9.489083394947802e-05,
      "loss": 0.8177,
      "step": 10045
    },
    {
      "epoch": 2.824620573355818,
      "grad_norm": 0.5521809458732605,
      "learning_rate": 9.479285723362897e-05,
      "loss": 0.8494,
      "step": 10050
    },
    {
      "epoch": 2.826025857223159,
      "grad_norm": 0.8967176079750061,
      "learning_rate": 9.469488552967872e-05,
      "loss": 0.831,
      "step": 10055
    },
    {
      "epoch": 2.8274311410905004,
      "grad_norm": 0.6544962525367737,
      "learning_rate": 9.45969189319254e-05,
      "loss": 0.8201,
      "step": 10060
    },
    {
      "epoch": 2.8288364249578413,
      "grad_norm": 0.6380069255828857,
      "learning_rate": 9.449895753466231e-05,
      "loss": 0.818,
      "step": 10065
    },
    {
      "epoch": 2.8302417088251826,
      "grad_norm": 0.5384354591369629,
      "learning_rate": 9.440100143217786e-05,
      "loss": 0.8233,
      "step": 10070
    },
    {
      "epoch": 2.831646992692524,
      "grad_norm": 0.47108352184295654,
      "learning_rate": 9.430305071875513e-05,
      "loss": 0.8183,
      "step": 10075
    },
    {
      "epoch": 2.833052276559865,
      "grad_norm": 0.5076127052307129,
      "learning_rate": 9.420510548867216e-05,
      "loss": 0.8208,
      "step": 10080
    },
    {
      "epoch": 2.8344575604272064,
      "grad_norm": 0.5295957326889038,
      "learning_rate": 9.410716583620168e-05,
      "loss": 0.821,
      "step": 10085
    },
    {
      "epoch": 2.8358628442945477,
      "grad_norm": 0.5895918607711792,
      "learning_rate": 9.40092318556111e-05,
      "loss": 0.8119,
      "step": 10090
    },
    {
      "epoch": 2.8372681281618886,
      "grad_norm": 0.498048335313797,
      "learning_rate": 9.391130364116227e-05,
      "loss": 0.8261,
      "step": 10095
    },
    {
      "epoch": 2.83867341202923,
      "grad_norm": 0.4717422127723694,
      "learning_rate": 9.381338128711155e-05,
      "loss": 0.8282,
      "step": 10100
    },
    {
      "epoch": 2.840078695896571,
      "grad_norm": 0.5129121541976929,
      "learning_rate": 9.371546488770973e-05,
      "loss": 0.8409,
      "step": 10105
    },
    {
      "epoch": 2.8414839797639124,
      "grad_norm": 0.5050500631332397,
      "learning_rate": 9.361755453720166e-05,
      "loss": 0.8281,
      "step": 10110
    },
    {
      "epoch": 2.8428892636312533,
      "grad_norm": 0.45085790753364563,
      "learning_rate": 9.351965032982657e-05,
      "loss": 0.819,
      "step": 10115
    },
    {
      "epoch": 2.8442945474985946,
      "grad_norm": 0.5280387997627258,
      "learning_rate": 9.342175235981773e-05,
      "loss": 0.8242,
      "step": 10120
    },
    {
      "epoch": 2.845699831365936,
      "grad_norm": 0.5300459861755371,
      "learning_rate": 9.33238607214024e-05,
      "loss": 0.8261,
      "step": 10125
    },
    {
      "epoch": 2.847105115233277,
      "grad_norm": 0.4671616852283478,
      "learning_rate": 9.322597550880167e-05,
      "loss": 0.8289,
      "step": 10130
    },
    {
      "epoch": 2.8485103991006184,
      "grad_norm": 0.5089605450630188,
      "learning_rate": 9.312809681623051e-05,
      "loss": 0.8258,
      "step": 10135
    },
    {
      "epoch": 2.8499156829679597,
      "grad_norm": 0.5198221802711487,
      "learning_rate": 9.303022473789763e-05,
      "loss": 0.8245,
      "step": 10140
    },
    {
      "epoch": 2.851320966835301,
      "grad_norm": 0.49434977769851685,
      "learning_rate": 9.293235936800539e-05,
      "loss": 0.8719,
      "step": 10145
    },
    {
      "epoch": 2.852726250702642,
      "grad_norm": 0.5880835652351379,
      "learning_rate": 9.283450080074958e-05,
      "loss": 0.8309,
      "step": 10150
    },
    {
      "epoch": 2.854131534569983,
      "grad_norm": 0.5376434922218323,
      "learning_rate": 9.273664913031957e-05,
      "loss": 0.8724,
      "step": 10155
    },
    {
      "epoch": 2.8555368184373244,
      "grad_norm": 0.47303545475006104,
      "learning_rate": 9.263880445089803e-05,
      "loss": 0.8168,
      "step": 10160
    },
    {
      "epoch": 2.8569421023046653,
      "grad_norm": 0.5622953176498413,
      "learning_rate": 9.254096685666091e-05,
      "loss": 0.8086,
      "step": 10165
    },
    {
      "epoch": 2.8583473861720066,
      "grad_norm": 0.5584001541137695,
      "learning_rate": 9.244313644177733e-05,
      "loss": 0.8264,
      "step": 10170
    },
    {
      "epoch": 2.859752670039348,
      "grad_norm": 0.5387102365493774,
      "learning_rate": 9.234531330040954e-05,
      "loss": 0.82,
      "step": 10175
    },
    {
      "epoch": 2.861157953906689,
      "grad_norm": 0.508496880531311,
      "learning_rate": 9.22474975267128e-05,
      "loss": 0.8267,
      "step": 10180
    },
    {
      "epoch": 2.8625632377740304,
      "grad_norm": 0.5421685576438904,
      "learning_rate": 9.214968921483512e-05,
      "loss": 0.8235,
      "step": 10185
    },
    {
      "epoch": 2.8639685216413717,
      "grad_norm": 0.5567712783813477,
      "learning_rate": 9.205188845891752e-05,
      "loss": 0.8799,
      "step": 10190
    },
    {
      "epoch": 2.865373805508713,
      "grad_norm": 0.5794306993484497,
      "learning_rate": 9.19540953530937e-05,
      "loss": 0.8315,
      "step": 10195
    },
    {
      "epoch": 2.866779089376054,
      "grad_norm": 0.5016175508499146,
      "learning_rate": 9.185630999148993e-05,
      "loss": 0.8304,
      "step": 10200
    },
    {
      "epoch": 2.868184373243395,
      "grad_norm": 0.4961920976638794,
      "learning_rate": 9.175853246822505e-05,
      "loss": 0.8354,
      "step": 10205
    },
    {
      "epoch": 2.8695896571107364,
      "grad_norm": 0.5245194435119629,
      "learning_rate": 9.166076287741044e-05,
      "loss": 0.8231,
      "step": 10210
    },
    {
      "epoch": 2.8709949409780777,
      "grad_norm": 0.4738057255744934,
      "learning_rate": 9.156300131314975e-05,
      "loss": 0.8353,
      "step": 10215
    },
    {
      "epoch": 2.8724002248454186,
      "grad_norm": 0.605202853679657,
      "learning_rate": 9.146524786953889e-05,
      "loss": 0.8662,
      "step": 10220
    },
    {
      "epoch": 2.87380550871276,
      "grad_norm": 0.5499880909919739,
      "learning_rate": 9.136750264066606e-05,
      "loss": 0.8283,
      "step": 10225
    },
    {
      "epoch": 2.875210792580101,
      "grad_norm": 0.5103596448898315,
      "learning_rate": 9.12697657206115e-05,
      "loss": 0.8298,
      "step": 10230
    },
    {
      "epoch": 2.8766160764474424,
      "grad_norm": 0.5948740243911743,
      "learning_rate": 9.117203720344735e-05,
      "loss": 0.8234,
      "step": 10235
    },
    {
      "epoch": 2.8780213603147837,
      "grad_norm": 0.5047176480293274,
      "learning_rate": 9.107431718323782e-05,
      "loss": 0.8308,
      "step": 10240
    },
    {
      "epoch": 2.879426644182125,
      "grad_norm": 0.5018946528434753,
      "learning_rate": 9.097660575403888e-05,
      "loss": 0.826,
      "step": 10245
    },
    {
      "epoch": 2.880831928049466,
      "grad_norm": 0.482831209897995,
      "learning_rate": 9.087890300989823e-05,
      "loss": 0.8181,
      "step": 10250
    },
    {
      "epoch": 2.882237211916807,
      "grad_norm": 0.4997115135192871,
      "learning_rate": 9.078120904485518e-05,
      "loss": 0.8149,
      "step": 10255
    },
    {
      "epoch": 2.8836424957841484,
      "grad_norm": 0.6287440657615662,
      "learning_rate": 9.068352395294063e-05,
      "loss": 0.8261,
      "step": 10260
    },
    {
      "epoch": 2.8850477796514897,
      "grad_norm": 0.6290640234947205,
      "learning_rate": 9.058584782817697e-05,
      "loss": 0.828,
      "step": 10265
    },
    {
      "epoch": 2.8864530635188306,
      "grad_norm": 0.5529760122299194,
      "learning_rate": 9.048818076457783e-05,
      "loss": 0.8261,
      "step": 10270
    },
    {
      "epoch": 2.887858347386172,
      "grad_norm": 0.5642558932304382,
      "learning_rate": 9.039052285614828e-05,
      "loss": 0.8132,
      "step": 10275
    },
    {
      "epoch": 2.889263631253513,
      "grad_norm": 0.5524906516075134,
      "learning_rate": 9.029287419688447e-05,
      "loss": 0.8321,
      "step": 10280
    },
    {
      "epoch": 2.8906689151208544,
      "grad_norm": 0.5699685215950012,
      "learning_rate": 9.019523488077374e-05,
      "loss": 0.8184,
      "step": 10285
    },
    {
      "epoch": 2.8920741989881957,
      "grad_norm": 0.4990377724170685,
      "learning_rate": 9.009760500179428e-05,
      "loss": 0.8376,
      "step": 10290
    },
    {
      "epoch": 2.893479482855537,
      "grad_norm": 0.5495244860649109,
      "learning_rate": 8.999998465391533e-05,
      "loss": 0.8281,
      "step": 10295
    },
    {
      "epoch": 2.894884766722878,
      "grad_norm": 0.6051164865493774,
      "learning_rate": 8.990237393109702e-05,
      "loss": 0.8264,
      "step": 10300
    },
    {
      "epoch": 2.896290050590219,
      "grad_norm": 0.514365553855896,
      "learning_rate": 8.980477292728997e-05,
      "loss": 0.8352,
      "step": 10305
    },
    {
      "epoch": 2.8976953344575604,
      "grad_norm": 0.6341189742088318,
      "learning_rate": 8.970718173643566e-05,
      "loss": 0.833,
      "step": 10310
    },
    {
      "epoch": 2.8991006183249017,
      "grad_norm": 0.5225892066955566,
      "learning_rate": 8.960960045246607e-05,
      "loss": 0.8237,
      "step": 10315
    },
    {
      "epoch": 2.9005059021922426,
      "grad_norm": 0.4649873971939087,
      "learning_rate": 8.951202916930363e-05,
      "loss": 0.8346,
      "step": 10320
    },
    {
      "epoch": 2.901911186059584,
      "grad_norm": 0.6664191484451294,
      "learning_rate": 8.941446798086112e-05,
      "loss": 0.8207,
      "step": 10325
    },
    {
      "epoch": 2.903316469926925,
      "grad_norm": 0.4937977194786072,
      "learning_rate": 8.931691698104165e-05,
      "loss": 0.8227,
      "step": 10330
    },
    {
      "epoch": 2.9047217537942664,
      "grad_norm": 0.5461153388023376,
      "learning_rate": 8.921937626373852e-05,
      "loss": 0.8236,
      "step": 10335
    },
    {
      "epoch": 2.9061270376616077,
      "grad_norm": 0.4974842071533203,
      "learning_rate": 8.912184592283509e-05,
      "loss": 0.8239,
      "step": 10340
    },
    {
      "epoch": 2.907532321528949,
      "grad_norm": 0.6387110948562622,
      "learning_rate": 8.902432605220472e-05,
      "loss": 0.8279,
      "step": 10345
    },
    {
      "epoch": 2.9089376053962903,
      "grad_norm": 0.5886310338973999,
      "learning_rate": 8.892681674571081e-05,
      "loss": 0.8218,
      "step": 10350
    },
    {
      "epoch": 2.910342889263631,
      "grad_norm": 0.6844249963760376,
      "learning_rate": 8.882931809720653e-05,
      "loss": 0.864,
      "step": 10355
    },
    {
      "epoch": 2.9117481731309725,
      "grad_norm": 0.6174393892288208,
      "learning_rate": 8.873183020053469e-05,
      "loss": 0.8297,
      "step": 10360
    },
    {
      "epoch": 2.9131534569983137,
      "grad_norm": 0.4688795208930969,
      "learning_rate": 8.863435314952787e-05,
      "loss": 0.8364,
      "step": 10365
    },
    {
      "epoch": 2.914558740865655,
      "grad_norm": 0.5638646483421326,
      "learning_rate": 8.853688703800821e-05,
      "loss": 0.8313,
      "step": 10370
    },
    {
      "epoch": 2.915964024732996,
      "grad_norm": 0.4737720191478729,
      "learning_rate": 8.843943195978723e-05,
      "loss": 0.8266,
      "step": 10375
    },
    {
      "epoch": 2.917369308600337,
      "grad_norm": 0.4690057337284088,
      "learning_rate": 8.834198800866593e-05,
      "loss": 0.8173,
      "step": 10380
    },
    {
      "epoch": 2.9187745924676785,
      "grad_norm": 0.5100188255310059,
      "learning_rate": 8.824455527843457e-05,
      "loss": 0.825,
      "step": 10385
    },
    {
      "epoch": 2.9201798763350197,
      "grad_norm": 0.5377050042152405,
      "learning_rate": 8.814713386287256e-05,
      "loss": 0.8473,
      "step": 10390
    },
    {
      "epoch": 2.921585160202361,
      "grad_norm": 0.5518834590911865,
      "learning_rate": 8.804972385574849e-05,
      "loss": 0.8107,
      "step": 10395
    },
    {
      "epoch": 2.9229904440697023,
      "grad_norm": 0.4859972596168518,
      "learning_rate": 8.795232535081991e-05,
      "loss": 0.8423,
      "step": 10400
    },
    {
      "epoch": 2.924395727937043,
      "grad_norm": 0.46924281120300293,
      "learning_rate": 8.785493844183339e-05,
      "loss": 0.8339,
      "step": 10405
    },
    {
      "epoch": 2.9258010118043845,
      "grad_norm": 0.4706718623638153,
      "learning_rate": 8.77575632225242e-05,
      "loss": 0.8273,
      "step": 10410
    },
    {
      "epoch": 2.9272062956717257,
      "grad_norm": 0.5642886161804199,
      "learning_rate": 8.766019978661643e-05,
      "loss": 0.8043,
      "step": 10415
    },
    {
      "epoch": 2.928611579539067,
      "grad_norm": 0.5513885617256165,
      "learning_rate": 8.75628482278229e-05,
      "loss": 0.8301,
      "step": 10420
    },
    {
      "epoch": 2.930016863406408,
      "grad_norm": 0.7229651808738708,
      "learning_rate": 8.74655086398449e-05,
      "loss": 0.8206,
      "step": 10425
    },
    {
      "epoch": 2.931422147273749,
      "grad_norm": 0.5825774073600769,
      "learning_rate": 8.736818111637222e-05,
      "loss": 0.8292,
      "step": 10430
    },
    {
      "epoch": 2.9328274311410905,
      "grad_norm": 0.5014123320579529,
      "learning_rate": 8.727086575108304e-05,
      "loss": 0.8367,
      "step": 10435
    },
    {
      "epoch": 2.9342327150084317,
      "grad_norm": 0.508975625038147,
      "learning_rate": 8.717356263764389e-05,
      "loss": 0.8179,
      "step": 10440
    },
    {
      "epoch": 2.935637998875773,
      "grad_norm": 0.5300000309944153,
      "learning_rate": 8.70762718697094e-05,
      "loss": 0.8632,
      "step": 10445
    },
    {
      "epoch": 2.9370432827431143,
      "grad_norm": 0.5742501020431519,
      "learning_rate": 8.69789935409224e-05,
      "loss": 0.838,
      "step": 10450
    },
    {
      "epoch": 2.938448566610455,
      "grad_norm": 0.5139035582542419,
      "learning_rate": 8.688172774491377e-05,
      "loss": 0.8241,
      "step": 10455
    },
    {
      "epoch": 2.9398538504777965,
      "grad_norm": 0.4528915286064148,
      "learning_rate": 8.678447457530226e-05,
      "loss": 0.8363,
      "step": 10460
    },
    {
      "epoch": 2.9412591343451377,
      "grad_norm": 0.5604382157325745,
      "learning_rate": 8.668723412569446e-05,
      "loss": 0.8262,
      "step": 10465
    },
    {
      "epoch": 2.942664418212479,
      "grad_norm": 0.5670209527015686,
      "learning_rate": 8.659000648968476e-05,
      "loss": 0.832,
      "step": 10470
    },
    {
      "epoch": 2.94406970207982,
      "grad_norm": 0.504100501537323,
      "learning_rate": 8.649279176085524e-05,
      "loss": 0.8415,
      "step": 10475
    },
    {
      "epoch": 2.945474985947161,
      "grad_norm": 0.4927058815956116,
      "learning_rate": 8.639559003277548e-05,
      "loss": 0.8201,
      "step": 10480
    },
    {
      "epoch": 2.9468802698145025,
      "grad_norm": 0.5819506049156189,
      "learning_rate": 8.62984013990026e-05,
      "loss": 0.833,
      "step": 10485
    },
    {
      "epoch": 2.9482855536818438,
      "grad_norm": 0.512156069278717,
      "learning_rate": 8.62012259530811e-05,
      "loss": 0.8363,
      "step": 10490
    },
    {
      "epoch": 2.949690837549185,
      "grad_norm": 0.5958979725837708,
      "learning_rate": 8.610406378854284e-05,
      "loss": 0.8338,
      "step": 10495
    },
    {
      "epoch": 2.9510961214165263,
      "grad_norm": 0.46423158049583435,
      "learning_rate": 8.600691499890678e-05,
      "loss": 0.8148,
      "step": 10500
    },
    {
      "epoch": 2.952501405283867,
      "grad_norm": 0.48579469323158264,
      "learning_rate": 8.590977967767909e-05,
      "loss": 0.8345,
      "step": 10505
    },
    {
      "epoch": 2.9539066891512085,
      "grad_norm": 0.5756452083587646,
      "learning_rate": 8.581265791835303e-05,
      "loss": 0.8228,
      "step": 10510
    },
    {
      "epoch": 2.9553119730185498,
      "grad_norm": 0.4622665047645569,
      "learning_rate": 8.571554981440864e-05,
      "loss": 0.8306,
      "step": 10515
    },
    {
      "epoch": 2.956717256885891,
      "grad_norm": 0.473634272813797,
      "learning_rate": 8.561845545931297e-05,
      "loss": 0.8174,
      "step": 10520
    },
    {
      "epoch": 2.958122540753232,
      "grad_norm": 0.5003515481948853,
      "learning_rate": 8.552137494651975e-05,
      "loss": 0.8762,
      "step": 10525
    },
    {
      "epoch": 2.959527824620573,
      "grad_norm": 0.4785330891609192,
      "learning_rate": 8.542430836946949e-05,
      "loss": 0.8322,
      "step": 10530
    },
    {
      "epoch": 2.9609331084879145,
      "grad_norm": 0.4684329628944397,
      "learning_rate": 8.532725582158912e-05,
      "loss": 0.8216,
      "step": 10535
    },
    {
      "epoch": 2.9623383923552558,
      "grad_norm": 0.6075077056884766,
      "learning_rate": 8.523021739629221e-05,
      "loss": 0.8183,
      "step": 10540
    },
    {
      "epoch": 2.963743676222597,
      "grad_norm": 0.4996055066585541,
      "learning_rate": 8.513319318697868e-05,
      "loss": 0.831,
      "step": 10545
    },
    {
      "epoch": 2.9651489600899383,
      "grad_norm": 0.48926323652267456,
      "learning_rate": 8.50361832870348e-05,
      "loss": 0.8103,
      "step": 10550
    },
    {
      "epoch": 2.9665542439572796,
      "grad_norm": 0.5758883953094482,
      "learning_rate": 8.493918778983301e-05,
      "loss": 0.819,
      "step": 10555
    },
    {
      "epoch": 2.9679595278246205,
      "grad_norm": 0.5499513745307922,
      "learning_rate": 8.484220678873192e-05,
      "loss": 0.8314,
      "step": 10560
    },
    {
      "epoch": 2.9693648116919618,
      "grad_norm": 0.49780401587486267,
      "learning_rate": 8.474524037707625e-05,
      "loss": 0.8338,
      "step": 10565
    },
    {
      "epoch": 2.970770095559303,
      "grad_norm": 0.5025187134742737,
      "learning_rate": 8.464828864819651e-05,
      "loss": 0.8234,
      "step": 10570
    },
    {
      "epoch": 2.9721753794266443,
      "grad_norm": 0.45492085814476013,
      "learning_rate": 8.455135169540923e-05,
      "loss": 0.8225,
      "step": 10575
    },
    {
      "epoch": 2.973580663293985,
      "grad_norm": 0.4675244987010956,
      "learning_rate": 8.44544296120167e-05,
      "loss": 0.8211,
      "step": 10580
    },
    {
      "epoch": 2.9749859471613265,
      "grad_norm": 0.4467267394065857,
      "learning_rate": 8.435752249130689e-05,
      "loss": 0.8309,
      "step": 10585
    },
    {
      "epoch": 2.9763912310286678,
      "grad_norm": 0.46692943572998047,
      "learning_rate": 8.426063042655326e-05,
      "loss": 0.818,
      "step": 10590
    },
    {
      "epoch": 2.977796514896009,
      "grad_norm": 0.4968641698360443,
      "learning_rate": 8.41637535110149e-05,
      "loss": 0.8167,
      "step": 10595
    },
    {
      "epoch": 2.9792017987633503,
      "grad_norm": 0.5355315804481506,
      "learning_rate": 8.406689183793632e-05,
      "loss": 0.8164,
      "step": 10600
    },
    {
      "epoch": 2.9806070826306916,
      "grad_norm": 0.5333823561668396,
      "learning_rate": 8.39700455005473e-05,
      "loss": 0.8276,
      "step": 10605
    },
    {
      "epoch": 2.9820123664980325,
      "grad_norm": 0.5763578414916992,
      "learning_rate": 8.387321459206287e-05,
      "loss": 0.8282,
      "step": 10610
    },
    {
      "epoch": 2.9834176503653738,
      "grad_norm": 0.4867122769355774,
      "learning_rate": 8.377639920568323e-05,
      "loss": 0.8334,
      "step": 10615
    },
    {
      "epoch": 2.984822934232715,
      "grad_norm": 0.5764617323875427,
      "learning_rate": 8.367959943459366e-05,
      "loss": 0.8135,
      "step": 10620
    },
    {
      "epoch": 2.9862282181000563,
      "grad_norm": 0.6305127143859863,
      "learning_rate": 8.358281537196435e-05,
      "loss": 0.8757,
      "step": 10625
    },
    {
      "epoch": 2.987633501967397,
      "grad_norm": 0.5238044261932373,
      "learning_rate": 8.34860471109504e-05,
      "loss": 0.8323,
      "step": 10630
    },
    {
      "epoch": 2.9890387858347385,
      "grad_norm": 0.6222472786903381,
      "learning_rate": 8.338929474469177e-05,
      "loss": 0.8144,
      "step": 10635
    },
    {
      "epoch": 2.9904440697020798,
      "grad_norm": 0.6870152950286865,
      "learning_rate": 8.329255836631297e-05,
      "loss": 0.8293,
      "step": 10640
    },
    {
      "epoch": 2.991849353569421,
      "grad_norm": 0.6090087890625,
      "learning_rate": 8.319583806892324e-05,
      "loss": 0.8272,
      "step": 10645
    },
    {
      "epoch": 2.9932546374367623,
      "grad_norm": 0.5994482636451721,
      "learning_rate": 8.30991339456163e-05,
      "loss": 0.8229,
      "step": 10650
    },
    {
      "epoch": 2.9946599213041036,
      "grad_norm": 0.7766991853713989,
      "learning_rate": 8.300244608947034e-05,
      "loss": 0.8365,
      "step": 10655
    },
    {
      "epoch": 2.9960652051714445,
      "grad_norm": 0.4686606824398041,
      "learning_rate": 8.290577459354785e-05,
      "loss": 0.8189,
      "step": 10660
    },
    {
      "epoch": 2.9974704890387858,
      "grad_norm": 0.5815786123275757,
      "learning_rate": 8.280911955089556e-05,
      "loss": 0.8192,
      "step": 10665
    },
    {
      "epoch": 2.998875772906127,
      "grad_norm": 0.46798279881477356,
      "learning_rate": 8.271248105454444e-05,
      "loss": 0.8141,
      "step": 10670
    },
    {
      "epoch": 3.0,
      "eval_loss": 0.8574034571647644,
      "eval_runtime": 642.7609,
      "eval_samples_per_second": 6.996,
      "eval_steps_per_second": 0.583,
      "step": 10674
    },
    {
      "epoch": 3.0002810567734683,
      "grad_norm": 0.5984673500061035,
      "learning_rate": 8.261585919750945e-05,
      "loss": 0.8314,
      "step": 10675
    },
    {
      "epoch": 3.0016863406408096,
      "grad_norm": 0.4934682250022888,
      "learning_rate": 8.251925407278958e-05,
      "loss": 0.7858,
      "step": 10680
    },
    {
      "epoch": 3.0030916245081505,
      "grad_norm": 0.5943470001220703,
      "learning_rate": 8.242266577336769e-05,
      "loss": 0.7909,
      "step": 10685
    },
    {
      "epoch": 3.0044969083754918,
      "grad_norm": 0.491205632686615,
      "learning_rate": 8.232609439221053e-05,
      "loss": 0.7954,
      "step": 10690
    },
    {
      "epoch": 3.005902192242833,
      "grad_norm": 0.538317859172821,
      "learning_rate": 8.222954002226839e-05,
      "loss": 0.7859,
      "step": 10695
    },
    {
      "epoch": 3.0073074761101743,
      "grad_norm": 0.5916352868080139,
      "learning_rate": 8.213300275647535e-05,
      "loss": 0.7962,
      "step": 10700
    },
    {
      "epoch": 3.0087127599775156,
      "grad_norm": 0.5188941359519958,
      "learning_rate": 8.2036482687749e-05,
      "loss": 0.7802,
      "step": 10705
    },
    {
      "epoch": 3.0101180438448565,
      "grad_norm": 0.579635500907898,
      "learning_rate": 8.193997990899027e-05,
      "loss": 0.7976,
      "step": 10710
    },
    {
      "epoch": 3.0115233277121978,
      "grad_norm": 0.5322796702384949,
      "learning_rate": 8.184349451308358e-05,
      "loss": 0.7759,
      "step": 10715
    },
    {
      "epoch": 3.012928611579539,
      "grad_norm": 0.6273241639137268,
      "learning_rate": 8.174702659289656e-05,
      "loss": 0.7876,
      "step": 10720
    },
    {
      "epoch": 3.0143338954468804,
      "grad_norm": 0.5047946572303772,
      "learning_rate": 8.165057624128004e-05,
      "loss": 0.8025,
      "step": 10725
    },
    {
      "epoch": 3.0157391793142216,
      "grad_norm": 0.5199359655380249,
      "learning_rate": 8.155414355106787e-05,
      "loss": 0.7995,
      "step": 10730
    },
    {
      "epoch": 3.0171444631815625,
      "grad_norm": 0.4892846643924713,
      "learning_rate": 8.145772861507701e-05,
      "loss": 0.7916,
      "step": 10735
    },
    {
      "epoch": 3.0185497470489038,
      "grad_norm": 0.5490760207176208,
      "learning_rate": 8.13613315261073e-05,
      "loss": 0.7872,
      "step": 10740
    },
    {
      "epoch": 3.019955030916245,
      "grad_norm": 0.6799941062927246,
      "learning_rate": 8.126495237694128e-05,
      "loss": 0.7925,
      "step": 10745
    },
    {
      "epoch": 3.0213603147835864,
      "grad_norm": 0.5038847327232361,
      "learning_rate": 8.11685912603444e-05,
      "loss": 0.8293,
      "step": 10750
    },
    {
      "epoch": 3.0227655986509276,
      "grad_norm": 0.5515377521514893,
      "learning_rate": 8.107224826906462e-05,
      "loss": 0.7846,
      "step": 10755
    },
    {
      "epoch": 3.0241708825182685,
      "grad_norm": 0.5621849894523621,
      "learning_rate": 8.097592349583261e-05,
      "loss": 0.7921,
      "step": 10760
    },
    {
      "epoch": 3.0255761663856098,
      "grad_norm": 0.5750982165336609,
      "learning_rate": 8.08796170333613e-05,
      "loss": 0.781,
      "step": 10765
    },
    {
      "epoch": 3.026981450252951,
      "grad_norm": 0.5427119135856628,
      "learning_rate": 8.078332897434617e-05,
      "loss": 0.7927,
      "step": 10770
    },
    {
      "epoch": 3.0283867341202924,
      "grad_norm": 0.5602450966835022,
      "learning_rate": 8.068705941146488e-05,
      "loss": 0.8031,
      "step": 10775
    },
    {
      "epoch": 3.0297920179876336,
      "grad_norm": 0.5285422205924988,
      "learning_rate": 8.059080843737732e-05,
      "loss": 0.8123,
      "step": 10780
    },
    {
      "epoch": 3.0311973018549745,
      "grad_norm": 0.5058332085609436,
      "learning_rate": 8.049457614472552e-05,
      "loss": 0.7873,
      "step": 10785
    },
    {
      "epoch": 3.032602585722316,
      "grad_norm": 0.5054517984390259,
      "learning_rate": 8.039836262613348e-05,
      "loss": 0.7725,
      "step": 10790
    },
    {
      "epoch": 3.034007869589657,
      "grad_norm": 0.5090791583061218,
      "learning_rate": 8.030216797420721e-05,
      "loss": 0.7995,
      "step": 10795
    },
    {
      "epoch": 3.0354131534569984,
      "grad_norm": 0.5371220707893372,
      "learning_rate": 8.020599228153441e-05,
      "loss": 0.7901,
      "step": 10800
    },
    {
      "epoch": 3.0368184373243396,
      "grad_norm": 0.4719543159008026,
      "learning_rate": 8.010983564068466e-05,
      "loss": 0.7903,
      "step": 10805
    },
    {
      "epoch": 3.038223721191681,
      "grad_norm": 0.5546330213546753,
      "learning_rate": 8.001369814420924e-05,
      "loss": 0.7912,
      "step": 10810
    },
    {
      "epoch": 3.039629005059022,
      "grad_norm": 0.4600013494491577,
      "learning_rate": 7.991757988464081e-05,
      "loss": 0.7809,
      "step": 10815
    },
    {
      "epoch": 3.041034288926363,
      "grad_norm": 0.5031879544258118,
      "learning_rate": 7.982148095449371e-05,
      "loss": 0.7994,
      "step": 10820
    },
    {
      "epoch": 3.0424395727937044,
      "grad_norm": 0.4986834228038788,
      "learning_rate": 7.972540144626359e-05,
      "loss": 0.7846,
      "step": 10825
    },
    {
      "epoch": 3.0438448566610457,
      "grad_norm": 0.5494052767753601,
      "learning_rate": 7.962934145242741e-05,
      "loss": 0.7892,
      "step": 10830
    },
    {
      "epoch": 3.045250140528387,
      "grad_norm": 0.5115951299667358,
      "learning_rate": 7.953330106544334e-05,
      "loss": 0.7771,
      "step": 10835
    },
    {
      "epoch": 3.046655424395728,
      "grad_norm": 0.5679638385772705,
      "learning_rate": 7.943728037775071e-05,
      "loss": 0.7988,
      "step": 10840
    },
    {
      "epoch": 3.048060708263069,
      "grad_norm": 0.50021892786026,
      "learning_rate": 7.934127948176992e-05,
      "loss": 0.7859,
      "step": 10845
    },
    {
      "epoch": 3.0494659921304104,
      "grad_norm": 0.5187581777572632,
      "learning_rate": 7.924529846990215e-05,
      "loss": 0.7894,
      "step": 10850
    },
    {
      "epoch": 3.0508712759977517,
      "grad_norm": 0.4814312160015106,
      "learning_rate": 7.914933743452964e-05,
      "loss": 0.7976,
      "step": 10855
    },
    {
      "epoch": 3.052276559865093,
      "grad_norm": 0.49869638681411743,
      "learning_rate": 7.905339646801534e-05,
      "loss": 0.8099,
      "step": 10860
    },
    {
      "epoch": 3.053681843732434,
      "grad_norm": 0.5544595718383789,
      "learning_rate": 7.895747566270288e-05,
      "loss": 0.7911,
      "step": 10865
    },
    {
      "epoch": 3.055087127599775,
      "grad_norm": 0.4815824627876282,
      "learning_rate": 7.886157511091641e-05,
      "loss": 0.7901,
      "step": 10870
    },
    {
      "epoch": 3.0564924114671164,
      "grad_norm": 0.632993221282959,
      "learning_rate": 7.876569490496068e-05,
      "loss": 0.78,
      "step": 10875
    },
    {
      "epoch": 3.0578976953344577,
      "grad_norm": 0.5329408049583435,
      "learning_rate": 7.866983513712084e-05,
      "loss": 0.8005,
      "step": 10880
    },
    {
      "epoch": 3.059302979201799,
      "grad_norm": 0.5657827258110046,
      "learning_rate": 7.857399589966233e-05,
      "loss": 0.7902,
      "step": 10885
    },
    {
      "epoch": 3.06070826306914,
      "grad_norm": 0.47787678241729736,
      "learning_rate": 7.847817728483088e-05,
      "loss": 0.7819,
      "step": 10890
    },
    {
      "epoch": 3.062113546936481,
      "grad_norm": 0.5120819807052612,
      "learning_rate": 7.838237938485233e-05,
      "loss": 0.7855,
      "step": 10895
    },
    {
      "epoch": 3.0635188308038224,
      "grad_norm": 0.5875343084335327,
      "learning_rate": 7.828660229193262e-05,
      "loss": 0.7848,
      "step": 10900
    },
    {
      "epoch": 3.0649241146711637,
      "grad_norm": 0.5025877952575684,
      "learning_rate": 7.819084609825761e-05,
      "loss": 0.7896,
      "step": 10905
    },
    {
      "epoch": 3.066329398538505,
      "grad_norm": 0.5116874575614929,
      "learning_rate": 7.809511089599309e-05,
      "loss": 0.7804,
      "step": 10910
    },
    {
      "epoch": 3.067734682405846,
      "grad_norm": 0.4917939305305481,
      "learning_rate": 7.799939677728469e-05,
      "loss": 0.7897,
      "step": 10915
    },
    {
      "epoch": 3.069139966273187,
      "grad_norm": 0.5206174254417419,
      "learning_rate": 7.790370383425756e-05,
      "loss": 0.7964,
      "step": 10920
    },
    {
      "epoch": 3.0705452501405284,
      "grad_norm": 0.6208391785621643,
      "learning_rate": 7.780803215901666e-05,
      "loss": 0.7778,
      "step": 10925
    },
    {
      "epoch": 3.0719505340078697,
      "grad_norm": 0.6187405586242676,
      "learning_rate": 7.771238184364643e-05,
      "loss": 0.8014,
      "step": 10930
    },
    {
      "epoch": 3.073355817875211,
      "grad_norm": 0.5639635920524597,
      "learning_rate": 7.761675298021075e-05,
      "loss": 0.7861,
      "step": 10935
    },
    {
      "epoch": 3.074761101742552,
      "grad_norm": 0.5515173673629761,
      "learning_rate": 7.752114566075279e-05,
      "loss": 0.7903,
      "step": 10940
    },
    {
      "epoch": 3.076166385609893,
      "grad_norm": 0.6317364573478699,
      "learning_rate": 7.742555997729504e-05,
      "loss": 0.8106,
      "step": 10945
    },
    {
      "epoch": 3.0775716694772344,
      "grad_norm": 0.49566203355789185,
      "learning_rate": 7.732999602183919e-05,
      "loss": 0.8006,
      "step": 10950
    },
    {
      "epoch": 3.0789769533445757,
      "grad_norm": 0.5118474364280701,
      "learning_rate": 7.7234453886366e-05,
      "loss": 0.7936,
      "step": 10955
    },
    {
      "epoch": 3.080382237211917,
      "grad_norm": 0.5737119913101196,
      "learning_rate": 7.713893366283515e-05,
      "loss": 0.7785,
      "step": 10960
    },
    {
      "epoch": 3.081787521079258,
      "grad_norm": 0.5119444727897644,
      "learning_rate": 7.704343544318532e-05,
      "loss": 0.7899,
      "step": 10965
    },
    {
      "epoch": 3.083192804946599,
      "grad_norm": 0.6276198625564575,
      "learning_rate": 7.694795931933403e-05,
      "loss": 0.7935,
      "step": 10970
    },
    {
      "epoch": 3.0845980888139404,
      "grad_norm": 0.5036231279373169,
      "learning_rate": 7.685250538317741e-05,
      "loss": 0.8082,
      "step": 10975
    },
    {
      "epoch": 3.0860033726812817,
      "grad_norm": 0.49849963188171387,
      "learning_rate": 7.675707372659035e-05,
      "loss": 0.7793,
      "step": 10980
    },
    {
      "epoch": 3.087408656548623,
      "grad_norm": 0.5414336323738098,
      "learning_rate": 7.666166444142621e-05,
      "loss": 0.7803,
      "step": 10985
    },
    {
      "epoch": 3.088813940415964,
      "grad_norm": 0.49839186668395996,
      "learning_rate": 7.6566277619517e-05,
      "loss": 0.8002,
      "step": 10990
    },
    {
      "epoch": 3.090219224283305,
      "grad_norm": 0.48829421401023865,
      "learning_rate": 7.647091335267281e-05,
      "loss": 0.7972,
      "step": 10995
    },
    {
      "epoch": 3.0916245081506464,
      "grad_norm": 0.5062868595123291,
      "learning_rate": 7.637557173268227e-05,
      "loss": 0.794,
      "step": 11000
    },
    {
      "epoch": 3.0930297920179877,
      "grad_norm": 0.4741092622280121,
      "learning_rate": 7.628025285131212e-05,
      "loss": 0.7878,
      "step": 11005
    },
    {
      "epoch": 3.094435075885329,
      "grad_norm": 0.5686512589454651,
      "learning_rate": 7.618495680030718e-05,
      "loss": 0.7966,
      "step": 11010
    },
    {
      "epoch": 3.0958403597526702,
      "grad_norm": 0.5308302044868469,
      "learning_rate": 7.608968367139038e-05,
      "loss": 0.7893,
      "step": 11015
    },
    {
      "epoch": 3.097245643620011,
      "grad_norm": 0.5594052672386169,
      "learning_rate": 7.599443355626251e-05,
      "loss": 0.7939,
      "step": 11020
    },
    {
      "epoch": 3.0986509274873524,
      "grad_norm": 0.4811815917491913,
      "learning_rate": 7.58992065466023e-05,
      "loss": 0.7954,
      "step": 11025
    },
    {
      "epoch": 3.1000562113546937,
      "grad_norm": 0.566451370716095,
      "learning_rate": 7.580400273406611e-05,
      "loss": 0.7763,
      "step": 11030
    },
    {
      "epoch": 3.101461495222035,
      "grad_norm": 0.6021385192871094,
      "learning_rate": 7.570882221028805e-05,
      "loss": 0.7923,
      "step": 11035
    },
    {
      "epoch": 3.1028667790893762,
      "grad_norm": 0.5467907190322876,
      "learning_rate": 7.56136650668799e-05,
      "loss": 0.7989,
      "step": 11040
    },
    {
      "epoch": 3.104272062956717,
      "grad_norm": 0.5167945623397827,
      "learning_rate": 7.551853139543074e-05,
      "loss": 0.8013,
      "step": 11045
    },
    {
      "epoch": 3.1056773468240584,
      "grad_norm": 0.523460865020752,
      "learning_rate": 7.54234212875072e-05,
      "loss": 0.8119,
      "step": 11050
    },
    {
      "epoch": 3.1070826306913997,
      "grad_norm": 0.47800132632255554,
      "learning_rate": 7.532833483465322e-05,
      "loss": 0.7958,
      "step": 11055
    },
    {
      "epoch": 3.108487914558741,
      "grad_norm": 0.4831900894641876,
      "learning_rate": 7.523327212838993e-05,
      "loss": 0.782,
      "step": 11060
    },
    {
      "epoch": 3.1098931984260823,
      "grad_norm": 0.4977155327796936,
      "learning_rate": 7.51382332602156e-05,
      "loss": 0.7959,
      "step": 11065
    },
    {
      "epoch": 3.111298482293423,
      "grad_norm": 0.5085614323616028,
      "learning_rate": 7.504321832160559e-05,
      "loss": 0.7913,
      "step": 11070
    },
    {
      "epoch": 3.1127037661607644,
      "grad_norm": 0.5714178681373596,
      "learning_rate": 7.494822740401226e-05,
      "loss": 0.7831,
      "step": 11075
    },
    {
      "epoch": 3.1141090500281057,
      "grad_norm": 0.5226171016693115,
      "learning_rate": 7.485326059886471e-05,
      "loss": 0.7732,
      "step": 11080
    },
    {
      "epoch": 3.115514333895447,
      "grad_norm": 0.6232420802116394,
      "learning_rate": 7.475831799756897e-05,
      "loss": 0.8024,
      "step": 11085
    },
    {
      "epoch": 3.1169196177627883,
      "grad_norm": 0.5652778744697571,
      "learning_rate": 7.466339969150776e-05,
      "loss": 0.7916,
      "step": 11090
    },
    {
      "epoch": 3.118324901630129,
      "grad_norm": 0.4653576612472534,
      "learning_rate": 7.456850577204037e-05,
      "loss": 0.7967,
      "step": 11095
    },
    {
      "epoch": 3.1197301854974704,
      "grad_norm": 0.5993885397911072,
      "learning_rate": 7.447363633050259e-05,
      "loss": 0.7994,
      "step": 11100
    },
    {
      "epoch": 3.1211354693648117,
      "grad_norm": 0.6875457763671875,
      "learning_rate": 7.437879145820674e-05,
      "loss": 0.7762,
      "step": 11105
    },
    {
      "epoch": 3.122540753232153,
      "grad_norm": 0.6032678484916687,
      "learning_rate": 7.428397124644142e-05,
      "loss": 0.8443,
      "step": 11110
    },
    {
      "epoch": 3.1239460370994943,
      "grad_norm": 0.5008665919303894,
      "learning_rate": 7.418917578647151e-05,
      "loss": 0.8003,
      "step": 11115
    },
    {
      "epoch": 3.125351320966835,
      "grad_norm": 0.5989721417427063,
      "learning_rate": 7.40944051695381e-05,
      "loss": 0.7874,
      "step": 11120
    },
    {
      "epoch": 3.1267566048341764,
      "grad_norm": 0.6037493348121643,
      "learning_rate": 7.399965948685832e-05,
      "loss": 0.7908,
      "step": 11125
    },
    {
      "epoch": 3.1281618887015177,
      "grad_norm": 0.5136246681213379,
      "learning_rate": 7.390493882962534e-05,
      "loss": 0.7868,
      "step": 11130
    },
    {
      "epoch": 3.129567172568859,
      "grad_norm": 0.5260531902313232,
      "learning_rate": 7.38102432890082e-05,
      "loss": 0.7865,
      "step": 11135
    },
    {
      "epoch": 3.1309724564362003,
      "grad_norm": 0.6083421111106873,
      "learning_rate": 7.371557295615184e-05,
      "loss": 0.8002,
      "step": 11140
    },
    {
      "epoch": 3.132377740303541,
      "grad_norm": 0.5094545483589172,
      "learning_rate": 7.362092792217687e-05,
      "loss": 0.7901,
      "step": 11145
    },
    {
      "epoch": 3.1337830241708824,
      "grad_norm": 0.6604807376861572,
      "learning_rate": 7.352630827817954e-05,
      "loss": 0.7975,
      "step": 11150
    },
    {
      "epoch": 3.1351883080382237,
      "grad_norm": 0.5617685914039612,
      "learning_rate": 7.343171411523169e-05,
      "loss": 0.8134,
      "step": 11155
    },
    {
      "epoch": 3.136593591905565,
      "grad_norm": 0.5299571752548218,
      "learning_rate": 7.333714552438067e-05,
      "loss": 0.7944,
      "step": 11160
    },
    {
      "epoch": 3.1379988757729063,
      "grad_norm": 0.5054970383644104,
      "learning_rate": 7.324260259664918e-05,
      "loss": 0.7887,
      "step": 11165
    },
    {
      "epoch": 3.1394041596402475,
      "grad_norm": 0.4990473985671997,
      "learning_rate": 7.314808542303518e-05,
      "loss": 0.7882,
      "step": 11170
    },
    {
      "epoch": 3.1408094435075884,
      "grad_norm": 0.6480944752693176,
      "learning_rate": 7.305359409451191e-05,
      "loss": 0.8355,
      "step": 11175
    },
    {
      "epoch": 3.1422147273749297,
      "grad_norm": 0.49085888266563416,
      "learning_rate": 7.295912870202773e-05,
      "loss": 0.7944,
      "step": 11180
    },
    {
      "epoch": 3.143620011242271,
      "grad_norm": 0.5148762464523315,
      "learning_rate": 7.286468933650597e-05,
      "loss": 0.7887,
      "step": 11185
    },
    {
      "epoch": 3.1450252951096123,
      "grad_norm": 0.5787303447723389,
      "learning_rate": 7.277027608884497e-05,
      "loss": 0.7783,
      "step": 11190
    },
    {
      "epoch": 3.1464305789769536,
      "grad_norm": 0.4866994023323059,
      "learning_rate": 7.267588904991792e-05,
      "loss": 0.7936,
      "step": 11195
    },
    {
      "epoch": 3.1478358628442944,
      "grad_norm": 0.6382277607917786,
      "learning_rate": 7.25815283105728e-05,
      "loss": 0.781,
      "step": 11200
    },
    {
      "epoch": 3.1492411467116357,
      "grad_norm": 0.5190382599830627,
      "learning_rate": 7.248719396163217e-05,
      "loss": 0.8067,
      "step": 11205
    },
    {
      "epoch": 3.150646430578977,
      "grad_norm": 0.5271970629692078,
      "learning_rate": 7.239288609389334e-05,
      "loss": 0.8089,
      "step": 11210
    },
    {
      "epoch": 3.1520517144463183,
      "grad_norm": 0.6586330533027649,
      "learning_rate": 7.229860479812806e-05,
      "loss": 0.7886,
      "step": 11215
    },
    {
      "epoch": 3.1534569983136596,
      "grad_norm": 0.5243818163871765,
      "learning_rate": 7.220435016508249e-05,
      "loss": 0.8002,
      "step": 11220
    },
    {
      "epoch": 3.1548622821810004,
      "grad_norm": 0.5264410376548767,
      "learning_rate": 7.211012228547715e-05,
      "loss": 0.7902,
      "step": 11225
    },
    {
      "epoch": 3.1562675660483417,
      "grad_norm": 0.6128091216087341,
      "learning_rate": 7.201592125000683e-05,
      "loss": 0.7887,
      "step": 11230
    },
    {
      "epoch": 3.157672849915683,
      "grad_norm": 0.5602926015853882,
      "learning_rate": 7.192174714934043e-05,
      "loss": 0.8016,
      "step": 11235
    },
    {
      "epoch": 3.1590781337830243,
      "grad_norm": 0.4874815046787262,
      "learning_rate": 7.182760007412097e-05,
      "loss": 0.7918,
      "step": 11240
    },
    {
      "epoch": 3.1604834176503656,
      "grad_norm": 0.5565770268440247,
      "learning_rate": 7.173348011496542e-05,
      "loss": 0.8176,
      "step": 11245
    },
    {
      "epoch": 3.1618887015177064,
      "grad_norm": 0.5076163411140442,
      "learning_rate": 7.163938736246472e-05,
      "loss": 0.8053,
      "step": 11250
    },
    {
      "epoch": 3.1632939853850477,
      "grad_norm": 0.5580770373344421,
      "learning_rate": 7.15453219071835e-05,
      "loss": 0.7897,
      "step": 11255
    },
    {
      "epoch": 3.164699269252389,
      "grad_norm": 0.468325138092041,
      "learning_rate": 7.145128383966022e-05,
      "loss": 0.7865,
      "step": 11260
    },
    {
      "epoch": 3.1661045531197303,
      "grad_norm": 0.5661777257919312,
      "learning_rate": 7.135727325040698e-05,
      "loss": 0.7794,
      "step": 11265
    },
    {
      "epoch": 3.1675098369870716,
      "grad_norm": 0.47927501797676086,
      "learning_rate": 7.126329022990943e-05,
      "loss": 0.8006,
      "step": 11270
    },
    {
      "epoch": 3.1689151208544124,
      "grad_norm": 0.5229851007461548,
      "learning_rate": 7.116933486862656e-05,
      "loss": 0.7988,
      "step": 11275
    },
    {
      "epoch": 3.1703204047217537,
      "grad_norm": 0.5205390453338623,
      "learning_rate": 7.107540725699089e-05,
      "loss": 0.7854,
      "step": 11280
    },
    {
      "epoch": 3.171725688589095,
      "grad_norm": 0.5371643304824829,
      "learning_rate": 7.098150748540819e-05,
      "loss": 0.7936,
      "step": 11285
    },
    {
      "epoch": 3.1731309724564363,
      "grad_norm": 0.473400741815567,
      "learning_rate": 7.08876356442574e-05,
      "loss": 0.7815,
      "step": 11290
    },
    {
      "epoch": 3.1745362563237776,
      "grad_norm": 0.4691773056983948,
      "learning_rate": 7.079379182389059e-05,
      "loss": 0.7762,
      "step": 11295
    },
    {
      "epoch": 3.1759415401911184,
      "grad_norm": 0.486338347196579,
      "learning_rate": 7.069997611463288e-05,
      "loss": 0.7904,
      "step": 11300
    },
    {
      "epoch": 3.1773468240584597,
      "grad_norm": 0.5320138931274414,
      "learning_rate": 7.060618860678237e-05,
      "loss": 0.7984,
      "step": 11305
    },
    {
      "epoch": 3.178752107925801,
      "grad_norm": 0.48783358931541443,
      "learning_rate": 7.051242939060983e-05,
      "loss": 0.875,
      "step": 11310
    },
    {
      "epoch": 3.1801573917931423,
      "grad_norm": 0.519739031791687,
      "learning_rate": 7.041869855635904e-05,
      "loss": 0.781,
      "step": 11315
    },
    {
      "epoch": 3.1815626756604836,
      "grad_norm": 0.5107063055038452,
      "learning_rate": 7.03249961942464e-05,
      "loss": 0.7877,
      "step": 11320
    },
    {
      "epoch": 3.182967959527825,
      "grad_norm": 0.5117635726928711,
      "learning_rate": 7.023132239446074e-05,
      "loss": 0.793,
      "step": 11325
    },
    {
      "epoch": 3.1843732433951657,
      "grad_norm": 0.5889191031455994,
      "learning_rate": 7.013767724716356e-05,
      "loss": 0.7894,
      "step": 11330
    },
    {
      "epoch": 3.185778527262507,
      "grad_norm": 0.5487093329429626,
      "learning_rate": 7.004406084248878e-05,
      "loss": 0.7881,
      "step": 11335
    },
    {
      "epoch": 3.1871838111298483,
      "grad_norm": 0.4754599928855896,
      "learning_rate": 6.995047327054262e-05,
      "loss": 0.7883,
      "step": 11340
    },
    {
      "epoch": 3.1885890949971896,
      "grad_norm": 0.5368080139160156,
      "learning_rate": 6.985691462140352e-05,
      "loss": 0.7889,
      "step": 11345
    },
    {
      "epoch": 3.1899943788645304,
      "grad_norm": 0.5230497717857361,
      "learning_rate": 6.97633849851221e-05,
      "loss": 0.7838,
      "step": 11350
    },
    {
      "epoch": 3.1913996627318717,
      "grad_norm": 0.4732072651386261,
      "learning_rate": 6.966988445172112e-05,
      "loss": 0.8018,
      "step": 11355
    },
    {
      "epoch": 3.192804946599213,
      "grad_norm": 0.7186647653579712,
      "learning_rate": 6.957641311119517e-05,
      "loss": 0.8392,
      "step": 11360
    },
    {
      "epoch": 3.1942102304665543,
      "grad_norm": 0.5402554273605347,
      "learning_rate": 6.948297105351091e-05,
      "loss": 0.7886,
      "step": 11365
    },
    {
      "epoch": 3.1956155143338956,
      "grad_norm": 0.550915539264679,
      "learning_rate": 6.938955836860677e-05,
      "loss": 0.7775,
      "step": 11370
    },
    {
      "epoch": 3.197020798201237,
      "grad_norm": 0.499353289604187,
      "learning_rate": 6.929617514639288e-05,
      "loss": 0.7933,
      "step": 11375
    },
    {
      "epoch": 3.1984260820685777,
      "grad_norm": 0.495159775018692,
      "learning_rate": 6.920282147675095e-05,
      "loss": 0.789,
      "step": 11380
    },
    {
      "epoch": 3.199831365935919,
      "grad_norm": 0.5102311372756958,
      "learning_rate": 6.910949744953438e-05,
      "loss": 0.7843,
      "step": 11385
    },
    {
      "epoch": 3.2012366498032603,
      "grad_norm": 0.5126308798789978,
      "learning_rate": 6.901620315456793e-05,
      "loss": 0.7959,
      "step": 11390
    },
    {
      "epoch": 3.2026419336706016,
      "grad_norm": 0.6104722023010254,
      "learning_rate": 6.892293868164785e-05,
      "loss": 0.8012,
      "step": 11395
    },
    {
      "epoch": 3.204047217537943,
      "grad_norm": 0.49474766850471497,
      "learning_rate": 6.882970412054158e-05,
      "loss": 0.8101,
      "step": 11400
    },
    {
      "epoch": 3.2054525014052837,
      "grad_norm": 0.520660400390625,
      "learning_rate": 6.873649956098778e-05,
      "loss": 0.8117,
      "step": 11405
    },
    {
      "epoch": 3.206857785272625,
      "grad_norm": 0.6017019748687744,
      "learning_rate": 6.864332509269633e-05,
      "loss": 0.8399,
      "step": 11410
    },
    {
      "epoch": 3.2082630691399663,
      "grad_norm": 0.4822956621646881,
      "learning_rate": 6.855018080534804e-05,
      "loss": 0.7875,
      "step": 11415
    },
    {
      "epoch": 3.2096683530073076,
      "grad_norm": 0.537238359451294,
      "learning_rate": 6.845706678859473e-05,
      "loss": 0.797,
      "step": 11420
    },
    {
      "epoch": 3.211073636874649,
      "grad_norm": 0.5192193388938904,
      "learning_rate": 6.836398313205905e-05,
      "loss": 0.8137,
      "step": 11425
    },
    {
      "epoch": 3.2124789207419897,
      "grad_norm": 0.5809474587440491,
      "learning_rate": 6.82709299253345e-05,
      "loss": 0.7843,
      "step": 11430
    },
    {
      "epoch": 3.213884204609331,
      "grad_norm": 0.5197656750679016,
      "learning_rate": 6.817790725798515e-05,
      "loss": 0.7871,
      "step": 11435
    },
    {
      "epoch": 3.2152894884766723,
      "grad_norm": 0.6199994683265686,
      "learning_rate": 6.808491521954576e-05,
      "loss": 0.8056,
      "step": 11440
    },
    {
      "epoch": 3.2166947723440136,
      "grad_norm": 0.5630050897598267,
      "learning_rate": 6.799195389952162e-05,
      "loss": 0.7921,
      "step": 11445
    },
    {
      "epoch": 3.218100056211355,
      "grad_norm": 0.5317882895469666,
      "learning_rate": 6.78990233873884e-05,
      "loss": 0.7821,
      "step": 11450
    },
    {
      "epoch": 3.2195053400786957,
      "grad_norm": 0.4947078227996826,
      "learning_rate": 6.780612377259215e-05,
      "loss": 0.7922,
      "step": 11455
    },
    {
      "epoch": 3.220910623946037,
      "grad_norm": 0.5854290127754211,
      "learning_rate": 6.771325514454917e-05,
      "loss": 0.7796,
      "step": 11460
    },
    {
      "epoch": 3.2223159078133783,
      "grad_norm": 0.5860560536384583,
      "learning_rate": 6.762041759264596e-05,
      "loss": 0.7837,
      "step": 11465
    },
    {
      "epoch": 3.2237211916807196,
      "grad_norm": 0.5405226349830627,
      "learning_rate": 6.752761120623907e-05,
      "loss": 0.7883,
      "step": 11470
    },
    {
      "epoch": 3.225126475548061,
      "grad_norm": 0.6428171396255493,
      "learning_rate": 6.743483607465508e-05,
      "loss": 0.7898,
      "step": 11475
    },
    {
      "epoch": 3.2265317594154017,
      "grad_norm": 0.5037165284156799,
      "learning_rate": 6.734209228719051e-05,
      "loss": 0.7974,
      "step": 11480
    },
    {
      "epoch": 3.227937043282743,
      "grad_norm": 0.5832943320274353,
      "learning_rate": 6.72493799331116e-05,
      "loss": 0.8045,
      "step": 11485
    },
    {
      "epoch": 3.2293423271500843,
      "grad_norm": 0.5611792802810669,
      "learning_rate": 6.715669910165445e-05,
      "loss": 0.7885,
      "step": 11490
    },
    {
      "epoch": 3.2307476110174256,
      "grad_norm": 0.49044761061668396,
      "learning_rate": 6.70640498820248e-05,
      "loss": 0.7896,
      "step": 11495
    },
    {
      "epoch": 3.232152894884767,
      "grad_norm": 0.4808729887008667,
      "learning_rate": 6.6971432363398e-05,
      "loss": 0.7872,
      "step": 11500
    },
    {
      "epoch": 3.2335581787521077,
      "grad_norm": 0.49567148089408875,
      "learning_rate": 6.687884663491874e-05,
      "loss": 0.7877,
      "step": 11505
    },
    {
      "epoch": 3.234963462619449,
      "grad_norm": 0.5091308951377869,
      "learning_rate": 6.678629278570125e-05,
      "loss": 0.8029,
      "step": 11510
    },
    {
      "epoch": 3.2363687464867903,
      "grad_norm": 0.4903222322463989,
      "learning_rate": 6.669377090482903e-05,
      "loss": 0.799,
      "step": 11515
    },
    {
      "epoch": 3.2377740303541316,
      "grad_norm": 0.4882327616214752,
      "learning_rate": 6.660128108135481e-05,
      "loss": 0.7867,
      "step": 11520
    },
    {
      "epoch": 3.239179314221473,
      "grad_norm": 0.5147338509559631,
      "learning_rate": 6.650882340430048e-05,
      "loss": 0.8014,
      "step": 11525
    },
    {
      "epoch": 3.240584598088814,
      "grad_norm": 0.4741876721382141,
      "learning_rate": 6.641639796265696e-05,
      "loss": 0.7802,
      "step": 11530
    },
    {
      "epoch": 3.241989881956155,
      "grad_norm": 0.5736861228942871,
      "learning_rate": 6.632400484538422e-05,
      "loss": 0.8062,
      "step": 11535
    },
    {
      "epoch": 3.2433951658234963,
      "grad_norm": 0.5249696373939514,
      "learning_rate": 6.623164414141094e-05,
      "loss": 0.8,
      "step": 11540
    },
    {
      "epoch": 3.2448004496908376,
      "grad_norm": 0.5339483022689819,
      "learning_rate": 6.613931593963483e-05,
      "loss": 0.7891,
      "step": 11545
    },
    {
      "epoch": 3.246205733558179,
      "grad_norm": 0.6143501996994019,
      "learning_rate": 6.604702032892221e-05,
      "loss": 0.8513,
      "step": 11550
    },
    {
      "epoch": 3.2476110174255197,
      "grad_norm": 0.5576615929603577,
      "learning_rate": 6.595475739810792e-05,
      "loss": 0.7899,
      "step": 11555
    },
    {
      "epoch": 3.249016301292861,
      "grad_norm": 0.5930474996566772,
      "learning_rate": 6.586252723599553e-05,
      "loss": 0.797,
      "step": 11560
    },
    {
      "epoch": 3.2504215851602023,
      "grad_norm": 0.5222381949424744,
      "learning_rate": 6.577032993135699e-05,
      "loss": 0.7892,
      "step": 11565
    },
    {
      "epoch": 3.2518268690275436,
      "grad_norm": 0.5676156282424927,
      "learning_rate": 6.567816557293262e-05,
      "loss": 0.7971,
      "step": 11570
    },
    {
      "epoch": 3.253232152894885,
      "grad_norm": 0.5307049751281738,
      "learning_rate": 6.558603424943104e-05,
      "loss": 0.7918,
      "step": 11575
    },
    {
      "epoch": 3.254637436762226,
      "grad_norm": 0.5155093669891357,
      "learning_rate": 6.549393604952906e-05,
      "loss": 0.7893,
      "step": 11580
    },
    {
      "epoch": 3.256042720629567,
      "grad_norm": 0.4945572018623352,
      "learning_rate": 6.540187106187167e-05,
      "loss": 0.7838,
      "step": 11585
    },
    {
      "epoch": 3.2574480044969083,
      "grad_norm": 0.570228099822998,
      "learning_rate": 6.530983937507173e-05,
      "loss": 0.7912,
      "step": 11590
    },
    {
      "epoch": 3.2588532883642496,
      "grad_norm": 0.5457921028137207,
      "learning_rate": 6.521784107771027e-05,
      "loss": 0.7801,
      "step": 11595
    },
    {
      "epoch": 3.260258572231591,
      "grad_norm": 0.5242686867713928,
      "learning_rate": 6.512587625833602e-05,
      "loss": 0.7951,
      "step": 11600
    },
    {
      "epoch": 3.261663856098932,
      "grad_norm": 0.5158045291900635,
      "learning_rate": 6.503394500546558e-05,
      "loss": 0.7868,
      "step": 11605
    },
    {
      "epoch": 3.263069139966273,
      "grad_norm": 0.5344769358634949,
      "learning_rate": 6.494204740758314e-05,
      "loss": 0.8,
      "step": 11610
    },
    {
      "epoch": 3.2644744238336143,
      "grad_norm": 0.5959330797195435,
      "learning_rate": 6.48501835531406e-05,
      "loss": 0.7896,
      "step": 11615
    },
    {
      "epoch": 3.2658797077009556,
      "grad_norm": 0.518044114112854,
      "learning_rate": 6.475835353055735e-05,
      "loss": 0.7932,
      "step": 11620
    },
    {
      "epoch": 3.267284991568297,
      "grad_norm": 0.5310397148132324,
      "learning_rate": 6.466655742822017e-05,
      "loss": 0.7983,
      "step": 11625
    },
    {
      "epoch": 3.268690275435638,
      "grad_norm": 0.5325920581817627,
      "learning_rate": 6.457479533448323e-05,
      "loss": 0.7946,
      "step": 11630
    },
    {
      "epoch": 3.270095559302979,
      "grad_norm": 0.7155055999755859,
      "learning_rate": 6.4483067337668e-05,
      "loss": 0.7785,
      "step": 11635
    },
    {
      "epoch": 3.2715008431703203,
      "grad_norm": 0.5178682208061218,
      "learning_rate": 6.439137352606311e-05,
      "loss": 0.7872,
      "step": 11640
    },
    {
      "epoch": 3.2729061270376616,
      "grad_norm": 0.4876868426799774,
      "learning_rate": 6.42997139879242e-05,
      "loss": 0.8078,
      "step": 11645
    },
    {
      "epoch": 3.274311410905003,
      "grad_norm": 0.5107020735740662,
      "learning_rate": 6.420808881147406e-05,
      "loss": 0.8063,
      "step": 11650
    },
    {
      "epoch": 3.275716694772344,
      "grad_norm": 0.49932652711868286,
      "learning_rate": 6.411649808490236e-05,
      "loss": 0.7829,
      "step": 11655
    },
    {
      "epoch": 3.277121978639685,
      "grad_norm": 0.552949845790863,
      "learning_rate": 6.40249418963655e-05,
      "loss": 0.7929,
      "step": 11660
    },
    {
      "epoch": 3.2785272625070263,
      "grad_norm": 0.5141507983207703,
      "learning_rate": 6.393342033398681e-05,
      "loss": 0.8005,
      "step": 11665
    },
    {
      "epoch": 3.2799325463743676,
      "grad_norm": 0.541842520236969,
      "learning_rate": 6.384193348585617e-05,
      "loss": 0.7945,
      "step": 11670
    },
    {
      "epoch": 3.281337830241709,
      "grad_norm": 0.53905189037323,
      "learning_rate": 6.375048144003013e-05,
      "loss": 0.7838,
      "step": 11675
    },
    {
      "epoch": 3.28274311410905,
      "grad_norm": 0.49840521812438965,
      "learning_rate": 6.365906428453167e-05,
      "loss": 0.7822,
      "step": 11680
    },
    {
      "epoch": 3.2841483979763915,
      "grad_norm": 0.5327207446098328,
      "learning_rate": 6.356768210735023e-05,
      "loss": 0.7924,
      "step": 11685
    },
    {
      "epoch": 3.2855536818437323,
      "grad_norm": 0.507588803768158,
      "learning_rate": 6.347633499644158e-05,
      "loss": 0.8038,
      "step": 11690
    },
    {
      "epoch": 3.2869589657110736,
      "grad_norm": 0.5312201976776123,
      "learning_rate": 6.338502303972774e-05,
      "loss": 0.8128,
      "step": 11695
    },
    {
      "epoch": 3.288364249578415,
      "grad_norm": 0.5181981325149536,
      "learning_rate": 6.329374632509686e-05,
      "loss": 0.7969,
      "step": 11700
    },
    {
      "epoch": 3.289769533445756,
      "grad_norm": 0.6399694681167603,
      "learning_rate": 6.32025049404032e-05,
      "loss": 0.7881,
      "step": 11705
    },
    {
      "epoch": 3.291174817313097,
      "grad_norm": 0.6674182415008545,
      "learning_rate": 6.311129897346708e-05,
      "loss": 0.7811,
      "step": 11710
    },
    {
      "epoch": 3.2925801011804383,
      "grad_norm": 0.492032527923584,
      "learning_rate": 6.302012851207455e-05,
      "loss": 0.7961,
      "step": 11715
    },
    {
      "epoch": 3.2939853850477796,
      "grad_norm": 0.5350286364555359,
      "learning_rate": 6.292899364397764e-05,
      "loss": 0.7901,
      "step": 11720
    },
    {
      "epoch": 3.295390668915121,
      "grad_norm": 0.5172516107559204,
      "learning_rate": 6.283789445689414e-05,
      "loss": 0.7946,
      "step": 11725
    },
    {
      "epoch": 3.296795952782462,
      "grad_norm": 0.5363070964813232,
      "learning_rate": 6.274683103850734e-05,
      "loss": 0.7859,
      "step": 11730
    },
    {
      "epoch": 3.2982012366498035,
      "grad_norm": 0.5336095094680786,
      "learning_rate": 6.265580347646624e-05,
      "loss": 0.7982,
      "step": 11735
    },
    {
      "epoch": 3.2996065205171443,
      "grad_norm": 0.5102813839912415,
      "learning_rate": 6.256481185838528e-05,
      "loss": 0.7899,
      "step": 11740
    },
    {
      "epoch": 3.3010118043844856,
      "grad_norm": 0.5685641169548035,
      "learning_rate": 6.247385627184435e-05,
      "loss": 0.7871,
      "step": 11745
    },
    {
      "epoch": 3.302417088251827,
      "grad_norm": 0.534301221370697,
      "learning_rate": 6.238293680438854e-05,
      "loss": 0.7816,
      "step": 11750
    },
    {
      "epoch": 3.303822372119168,
      "grad_norm": 0.52571702003479,
      "learning_rate": 6.22920535435283e-05,
      "loss": 0.8023,
      "step": 11755
    },
    {
      "epoch": 3.305227655986509,
      "grad_norm": 0.5615999102592468,
      "learning_rate": 6.220120657673922e-05,
      "loss": 0.7981,
      "step": 11760
    },
    {
      "epoch": 3.3066329398538503,
      "grad_norm": 0.5294827818870544,
      "learning_rate": 6.211039599146184e-05,
      "loss": 0.8098,
      "step": 11765
    },
    {
      "epoch": 3.3080382237211916,
      "grad_norm": 0.5420514941215515,
      "learning_rate": 6.201962187510179e-05,
      "loss": 0.7976,
      "step": 11770
    },
    {
      "epoch": 3.309443507588533,
      "grad_norm": 0.505811333656311,
      "learning_rate": 6.192888431502962e-05,
      "loss": 0.8005,
      "step": 11775
    },
    {
      "epoch": 3.310848791455874,
      "grad_norm": 0.49639084935188293,
      "learning_rate": 6.183818339858064e-05,
      "loss": 0.8182,
      "step": 11780
    },
    {
      "epoch": 3.3122540753232155,
      "grad_norm": 0.7370562553405762,
      "learning_rate": 6.174751921305484e-05,
      "loss": 0.7969,
      "step": 11785
    },
    {
      "epoch": 3.3136593591905563,
      "grad_norm": 0.4888727366924286,
      "learning_rate": 6.165689184571694e-05,
      "loss": 0.808,
      "step": 11790
    },
    {
      "epoch": 3.3150646430578976,
      "grad_norm": 0.535828173160553,
      "learning_rate": 6.15663013837962e-05,
      "loss": 0.7836,
      "step": 11795
    },
    {
      "epoch": 3.316469926925239,
      "grad_norm": 0.5442777276039124,
      "learning_rate": 6.147574791448638e-05,
      "loss": 0.795,
      "step": 11800
    },
    {
      "epoch": 3.31787521079258,
      "grad_norm": 0.5069718360900879,
      "learning_rate": 6.138523152494557e-05,
      "loss": 0.8176,
      "step": 11805
    },
    {
      "epoch": 3.3192804946599215,
      "grad_norm": 0.5373730659484863,
      "learning_rate": 6.129475230229622e-05,
      "loss": 0.8136,
      "step": 11810
    },
    {
      "epoch": 3.3206857785272623,
      "grad_norm": 0.5106169581413269,
      "learning_rate": 6.120431033362503e-05,
      "loss": 0.7883,
      "step": 11815
    },
    {
      "epoch": 3.3220910623946036,
      "grad_norm": 0.5332974195480347,
      "learning_rate": 6.111390570598274e-05,
      "loss": 0.8001,
      "step": 11820
    },
    {
      "epoch": 3.323496346261945,
      "grad_norm": 0.5583652257919312,
      "learning_rate": 6.102353850638428e-05,
      "loss": 0.7907,
      "step": 11825
    },
    {
      "epoch": 3.324901630129286,
      "grad_norm": 0.56987065076828,
      "learning_rate": 6.0933208821808465e-05,
      "loss": 0.7943,
      "step": 11830
    },
    {
      "epoch": 3.3263069139966275,
      "grad_norm": 0.5036487579345703,
      "learning_rate": 6.084291673919806e-05,
      "loss": 0.7914,
      "step": 11835
    },
    {
      "epoch": 3.3277121978639688,
      "grad_norm": 0.5210623741149902,
      "learning_rate": 6.075266234545956e-05,
      "loss": 0.8094,
      "step": 11840
    },
    {
      "epoch": 3.3291174817313096,
      "grad_norm": 0.5213614106178284,
      "learning_rate": 6.066244572746327e-05,
      "loss": 0.7805,
      "step": 11845
    },
    {
      "epoch": 3.330522765598651,
      "grad_norm": 0.5487878918647766,
      "learning_rate": 6.057226697204308e-05,
      "loss": 0.7955,
      "step": 11850
    },
    {
      "epoch": 3.331928049465992,
      "grad_norm": 0.6446609497070312,
      "learning_rate": 6.048212616599645e-05,
      "loss": 0.7966,
      "step": 11855
    },
    {
      "epoch": 3.3333333333333335,
      "grad_norm": 0.5579555034637451,
      "learning_rate": 6.039202339608432e-05,
      "loss": 0.7979,
      "step": 11860
    },
    {
      "epoch": 3.3347386172006743,
      "grad_norm": 0.5008167028427124,
      "learning_rate": 6.030195874903104e-05,
      "loss": 0.7986,
      "step": 11865
    },
    {
      "epoch": 3.3361439010680156,
      "grad_norm": 0.6530143022537231,
      "learning_rate": 6.021193231152424e-05,
      "loss": 0.7906,
      "step": 11870
    },
    {
      "epoch": 3.337549184935357,
      "grad_norm": 0.5034095644950867,
      "learning_rate": 6.012194417021475e-05,
      "loss": 0.8082,
      "step": 11875
    },
    {
      "epoch": 3.338954468802698,
      "grad_norm": 0.7468091249465942,
      "learning_rate": 6.0031994411716594e-05,
      "loss": 0.8013,
      "step": 11880
    },
    {
      "epoch": 3.3403597526700395,
      "grad_norm": 0.5912469029426575,
      "learning_rate": 5.9942083122606864e-05,
      "loss": 0.8076,
      "step": 11885
    },
    {
      "epoch": 3.3417650365373808,
      "grad_norm": 0.5136885046958923,
      "learning_rate": 5.98522103894255e-05,
      "loss": 0.792,
      "step": 11890
    },
    {
      "epoch": 3.3431703204047216,
      "grad_norm": 0.5261921882629395,
      "learning_rate": 5.976237629867545e-05,
      "loss": 0.7974,
      "step": 11895
    },
    {
      "epoch": 3.344575604272063,
      "grad_norm": 0.5408470630645752,
      "learning_rate": 5.9672580936822465e-05,
      "loss": 0.7946,
      "step": 11900
    },
    {
      "epoch": 3.345980888139404,
      "grad_norm": 0.5385935306549072,
      "learning_rate": 5.9582824390295e-05,
      "loss": 0.7778,
      "step": 11905
    },
    {
      "epoch": 3.3473861720067455,
      "grad_norm": 0.5939579010009766,
      "learning_rate": 5.9493106745484096e-05,
      "loss": 0.8341,
      "step": 11910
    },
    {
      "epoch": 3.3487914558740863,
      "grad_norm": 0.5419361591339111,
      "learning_rate": 5.9403428088743416e-05,
      "loss": 0.8016,
      "step": 11915
    },
    {
      "epoch": 3.3501967397414276,
      "grad_norm": 0.6431915760040283,
      "learning_rate": 5.9313788506389115e-05,
      "loss": 0.7874,
      "step": 11920
    },
    {
      "epoch": 3.351602023608769,
      "grad_norm": 0.5178044438362122,
      "learning_rate": 5.922418808469963e-05,
      "loss": 0.7925,
      "step": 11925
    },
    {
      "epoch": 3.35300730747611,
      "grad_norm": 0.5566776394844055,
      "learning_rate": 5.9134626909915825e-05,
      "loss": 0.7794,
      "step": 11930
    },
    {
      "epoch": 3.3544125913434515,
      "grad_norm": 0.529966413974762,
      "learning_rate": 5.904510506824074e-05,
      "loss": 0.7994,
      "step": 11935
    },
    {
      "epoch": 3.3558178752107928,
      "grad_norm": 0.5139265656471252,
      "learning_rate": 5.895562264583958e-05,
      "loss": 0.8013,
      "step": 11940
    },
    {
      "epoch": 3.3572231590781336,
      "grad_norm": 0.5273216962814331,
      "learning_rate": 5.886617972883952e-05,
      "loss": 0.7953,
      "step": 11945
    },
    {
      "epoch": 3.358628442945475,
      "grad_norm": 0.5380910634994507,
      "learning_rate": 5.877677640332979e-05,
      "loss": 0.7986,
      "step": 11950
    },
    {
      "epoch": 3.360033726812816,
      "grad_norm": 0.4941082298755646,
      "learning_rate": 5.868741275536158e-05,
      "loss": 0.7855,
      "step": 11955
    },
    {
      "epoch": 3.3614390106801575,
      "grad_norm": 0.4764517545700073,
      "learning_rate": 5.859808887094771e-05,
      "loss": 0.7901,
      "step": 11960
    },
    {
      "epoch": 3.362844294547499,
      "grad_norm": 0.587144672870636,
      "learning_rate": 5.850880483606289e-05,
      "loss": 0.7967,
      "step": 11965
    },
    {
      "epoch": 3.3642495784148396,
      "grad_norm": 0.5278950929641724,
      "learning_rate": 5.8419560736643374e-05,
      "loss": 0.7887,
      "step": 11970
    },
    {
      "epoch": 3.365654862282181,
      "grad_norm": 0.5130148530006409,
      "learning_rate": 5.833035665858706e-05,
      "loss": 0.784,
      "step": 11975
    },
    {
      "epoch": 3.367060146149522,
      "grad_norm": 0.5496023893356323,
      "learning_rate": 5.8241192687753245e-05,
      "loss": 0.7911,
      "step": 11980
    },
    {
      "epoch": 3.3684654300168635,
      "grad_norm": 0.5216452479362488,
      "learning_rate": 5.8152068909962665e-05,
      "loss": 0.811,
      "step": 11985
    },
    {
      "epoch": 3.369870713884205,
      "grad_norm": 0.49990314245224,
      "learning_rate": 5.806298541099742e-05,
      "loss": 0.8043,
      "step": 11990
    },
    {
      "epoch": 3.3712759977515456,
      "grad_norm": 0.5206735134124756,
      "learning_rate": 5.797394227660068e-05,
      "loss": 0.8125,
      "step": 11995
    },
    {
      "epoch": 3.372681281618887,
      "grad_norm": 0.5224774479866028,
      "learning_rate": 5.788493959247694e-05,
      "loss": 0.7833,
      "step": 12000
    },
    {
      "epoch": 3.374086565486228,
      "grad_norm": 0.5243582725524902,
      "learning_rate": 5.779597744429166e-05,
      "loss": 0.8006,
      "step": 12005
    },
    {
      "epoch": 3.3754918493535695,
      "grad_norm": 0.5527240037918091,
      "learning_rate": 5.770705591767135e-05,
      "loss": 0.7957,
      "step": 12010
    },
    {
      "epoch": 3.376897133220911,
      "grad_norm": 0.5153176784515381,
      "learning_rate": 5.7618175098203355e-05,
      "loss": 0.78,
      "step": 12015
    },
    {
      "epoch": 3.3783024170882516,
      "grad_norm": 0.604854166507721,
      "learning_rate": 5.75293350714359e-05,
      "loss": 0.7887,
      "step": 12020
    },
    {
      "epoch": 3.379707700955593,
      "grad_norm": 0.6152995228767395,
      "learning_rate": 5.7440535922877926e-05,
      "loss": 0.8006,
      "step": 12025
    },
    {
      "epoch": 3.381112984822934,
      "grad_norm": 0.5055761337280273,
      "learning_rate": 5.7351777737998955e-05,
      "loss": 0.8001,
      "step": 12030
    },
    {
      "epoch": 3.3825182686902755,
      "grad_norm": 0.5889893770217896,
      "learning_rate": 5.72630606022292e-05,
      "loss": 0.7956,
      "step": 12035
    },
    {
      "epoch": 3.383923552557617,
      "grad_norm": 0.5556915402412415,
      "learning_rate": 5.7174384600959294e-05,
      "loss": 0.7849,
      "step": 12040
    },
    {
      "epoch": 3.385328836424958,
      "grad_norm": 0.7165861129760742,
      "learning_rate": 5.708574981954033e-05,
      "loss": 0.7997,
      "step": 12045
    },
    {
      "epoch": 3.386734120292299,
      "grad_norm": 0.6431218981742859,
      "learning_rate": 5.699715634328362e-05,
      "loss": 0.7927,
      "step": 12050
    },
    {
      "epoch": 3.38813940415964,
      "grad_norm": 0.5332899689674377,
      "learning_rate": 5.690860425746084e-05,
      "loss": 0.7962,
      "step": 12055
    },
    {
      "epoch": 3.3895446880269815,
      "grad_norm": 0.5417184233665466,
      "learning_rate": 5.682009364730376e-05,
      "loss": 0.7856,
      "step": 12060
    },
    {
      "epoch": 3.390949971894323,
      "grad_norm": 0.5324541330337524,
      "learning_rate": 5.6731624598004254e-05,
      "loss": 0.7881,
      "step": 12065
    },
    {
      "epoch": 3.3923552557616636,
      "grad_norm": 0.544225811958313,
      "learning_rate": 5.6643197194714184e-05,
      "loss": 0.8035,
      "step": 12070
    },
    {
      "epoch": 3.393760539629005,
      "grad_norm": 0.5472251176834106,
      "learning_rate": 5.655481152254534e-05,
      "loss": 0.834,
      "step": 12075
    },
    {
      "epoch": 3.395165823496346,
      "grad_norm": 0.5320506691932678,
      "learning_rate": 5.646646766656938e-05,
      "loss": 0.7933,
      "step": 12080
    },
    {
      "epoch": 3.3965711073636875,
      "grad_norm": 0.5282317996025085,
      "learning_rate": 5.6378165711817554e-05,
      "loss": 0.7776,
      "step": 12085
    },
    {
      "epoch": 3.397976391231029,
      "grad_norm": 0.5126026272773743,
      "learning_rate": 5.628990574328099e-05,
      "loss": 0.792,
      "step": 12090
    },
    {
      "epoch": 3.39938167509837,
      "grad_norm": 0.49469032883644104,
      "learning_rate": 5.620168784591031e-05,
      "loss": 0.789,
      "step": 12095
    },
    {
      "epoch": 3.400786958965711,
      "grad_norm": 0.5256794095039368,
      "learning_rate": 5.611351210461559e-05,
      "loss": 0.7976,
      "step": 12100
    },
    {
      "epoch": 3.402192242833052,
      "grad_norm": 0.5282547473907471,
      "learning_rate": 5.602537860426641e-05,
      "loss": 0.7873,
      "step": 12105
    },
    {
      "epoch": 3.4035975267003935,
      "grad_norm": 0.5821418166160583,
      "learning_rate": 5.5937287429691664e-05,
      "loss": 0.7942,
      "step": 12110
    },
    {
      "epoch": 3.405002810567735,
      "grad_norm": 0.5334835648536682,
      "learning_rate": 5.584923866567954e-05,
      "loss": 0.7937,
      "step": 12115
    },
    {
      "epoch": 3.4064080944350756,
      "grad_norm": 0.5103002786636353,
      "learning_rate": 5.576123239697735e-05,
      "loss": 0.7858,
      "step": 12120
    },
    {
      "epoch": 3.407813378302417,
      "grad_norm": 0.5318170189857483,
      "learning_rate": 5.5673268708291526e-05,
      "loss": 0.8589,
      "step": 12125
    },
    {
      "epoch": 3.409218662169758,
      "grad_norm": 0.5624529719352722,
      "learning_rate": 5.558534768428758e-05,
      "loss": 0.7899,
      "step": 12130
    },
    {
      "epoch": 3.4106239460370995,
      "grad_norm": 0.5814023613929749,
      "learning_rate": 5.54974694095898e-05,
      "loss": 0.794,
      "step": 12135
    },
    {
      "epoch": 3.412029229904441,
      "grad_norm": 0.5576233863830566,
      "learning_rate": 5.5409633968781495e-05,
      "loss": 0.7996,
      "step": 12140
    },
    {
      "epoch": 3.413434513771782,
      "grad_norm": 0.5190870761871338,
      "learning_rate": 5.532184144640464e-05,
      "loss": 0.7997,
      "step": 12145
    },
    {
      "epoch": 3.414839797639123,
      "grad_norm": 0.5696029663085938,
      "learning_rate": 5.523409192696003e-05,
      "loss": 0.7931,
      "step": 12150
    },
    {
      "epoch": 3.416245081506464,
      "grad_norm": 0.5543867349624634,
      "learning_rate": 5.514638549490686e-05,
      "loss": 0.7841,
      "step": 12155
    },
    {
      "epoch": 3.4176503653738055,
      "grad_norm": 0.5611448884010315,
      "learning_rate": 5.5058722234663026e-05,
      "loss": 0.8076,
      "step": 12160
    },
    {
      "epoch": 3.419055649241147,
      "grad_norm": 0.7647613286972046,
      "learning_rate": 5.497110223060482e-05,
      "loss": 0.8003,
      "step": 12165
    },
    {
      "epoch": 3.420460933108488,
      "grad_norm": 0.5532127022743225,
      "learning_rate": 5.488352556706689e-05,
      "loss": 0.7898,
      "step": 12170
    },
    {
      "epoch": 3.421866216975829,
      "grad_norm": 0.6410893797874451,
      "learning_rate": 5.4795992328342185e-05,
      "loss": 0.799,
      "step": 12175
    },
    {
      "epoch": 3.4232715008431702,
      "grad_norm": 0.48902472853660583,
      "learning_rate": 5.4708502598681835e-05,
      "loss": 0.7883,
      "step": 12180
    },
    {
      "epoch": 3.4246767847105115,
      "grad_norm": 0.495099276304245,
      "learning_rate": 5.462105646229515e-05,
      "loss": 0.7942,
      "step": 12185
    },
    {
      "epoch": 3.426082068577853,
      "grad_norm": 0.5260860919952393,
      "learning_rate": 5.453365400334937e-05,
      "loss": 0.7951,
      "step": 12190
    },
    {
      "epoch": 3.427487352445194,
      "grad_norm": 0.5531935691833496,
      "learning_rate": 5.444629530596979e-05,
      "loss": 0.7798,
      "step": 12195
    },
    {
      "epoch": 3.4288926363125354,
      "grad_norm": 0.5121034383773804,
      "learning_rate": 5.435898045423955e-05,
      "loss": 0.833,
      "step": 12200
    },
    {
      "epoch": 3.4302979201798762,
      "grad_norm": 0.5159273743629456,
      "learning_rate": 5.4271709532199646e-05,
      "loss": 0.7916,
      "step": 12205
    },
    {
      "epoch": 3.4317032040472175,
      "grad_norm": 0.6313301920890808,
      "learning_rate": 5.418448262384864e-05,
      "loss": 0.7915,
      "step": 12210
    },
    {
      "epoch": 3.433108487914559,
      "grad_norm": 0.5238364934921265,
      "learning_rate": 5.409729981314291e-05,
      "loss": 0.7942,
      "step": 12215
    },
    {
      "epoch": 3.4345137717819,
      "grad_norm": 0.5627838969230652,
      "learning_rate": 5.4010161183996264e-05,
      "loss": 0.7899,
      "step": 12220
    },
    {
      "epoch": 3.435919055649241,
      "grad_norm": 0.5431748032569885,
      "learning_rate": 5.3923066820280075e-05,
      "loss": 0.7829,
      "step": 12225
    },
    {
      "epoch": 3.4373243395165822,
      "grad_norm": 0.5447311401367188,
      "learning_rate": 5.383601680582304e-05,
      "loss": 0.793,
      "step": 12230
    },
    {
      "epoch": 3.4387296233839235,
      "grad_norm": 0.47594648599624634,
      "learning_rate": 5.374901122441121e-05,
      "loss": 0.7832,
      "step": 12235
    },
    {
      "epoch": 3.440134907251265,
      "grad_norm": 0.5034043788909912,
      "learning_rate": 5.366205015978787e-05,
      "loss": 0.7853,
      "step": 12240
    },
    {
      "epoch": 3.441540191118606,
      "grad_norm": 0.5102277398109436,
      "learning_rate": 5.35751336956534e-05,
      "loss": 0.7788,
      "step": 12245
    },
    {
      "epoch": 3.4429454749859474,
      "grad_norm": 0.5406153798103333,
      "learning_rate": 5.348826191566532e-05,
      "loss": 0.7859,
      "step": 12250
    },
    {
      "epoch": 3.4443507588532882,
      "grad_norm": 0.4994388222694397,
      "learning_rate": 5.340143490343813e-05,
      "loss": 0.7925,
      "step": 12255
    },
    {
      "epoch": 3.4457560427206295,
      "grad_norm": 0.6013630032539368,
      "learning_rate": 5.331465274254318e-05,
      "loss": 0.8095,
      "step": 12260
    },
    {
      "epoch": 3.447161326587971,
      "grad_norm": 0.5830052495002747,
      "learning_rate": 5.322791551650872e-05,
      "loss": 0.7978,
      "step": 12265
    },
    {
      "epoch": 3.448566610455312,
      "grad_norm": 0.5456408858299255,
      "learning_rate": 5.3141223308819696e-05,
      "loss": 0.7905,
      "step": 12270
    },
    {
      "epoch": 3.449971894322653,
      "grad_norm": 0.5100853443145752,
      "learning_rate": 5.3054576202917896e-05,
      "loss": 0.777,
      "step": 12275
    },
    {
      "epoch": 3.4513771781899942,
      "grad_norm": 0.5169028639793396,
      "learning_rate": 5.296797428220138e-05,
      "loss": 0.8264,
      "step": 12280
    },
    {
      "epoch": 3.4527824620573355,
      "grad_norm": 0.5377866625785828,
      "learning_rate": 5.288141763002499e-05,
      "loss": 0.787,
      "step": 12285
    },
    {
      "epoch": 3.454187745924677,
      "grad_norm": 0.5345072150230408,
      "learning_rate": 5.27949063296999e-05,
      "loss": 0.7897,
      "step": 12290
    },
    {
      "epoch": 3.455593029792018,
      "grad_norm": 0.515516459941864,
      "learning_rate": 5.270844046449357e-05,
      "loss": 0.7872,
      "step": 12295
    },
    {
      "epoch": 3.4569983136593594,
      "grad_norm": 0.5370394587516785,
      "learning_rate": 5.2622020117629824e-05,
      "loss": 0.8009,
      "step": 12300
    },
    {
      "epoch": 3.4584035975267002,
      "grad_norm": 0.5313602089881897,
      "learning_rate": 5.2535645372288655e-05,
      "loss": 0.7967,
      "step": 12305
    },
    {
      "epoch": 3.4598088813940415,
      "grad_norm": 0.4978809058666229,
      "learning_rate": 5.244931631160619e-05,
      "loss": 0.7815,
      "step": 12310
    },
    {
      "epoch": 3.461214165261383,
      "grad_norm": 0.5503342151641846,
      "learning_rate": 5.2363033018674444e-05,
      "loss": 0.7922,
      "step": 12315
    },
    {
      "epoch": 3.462619449128724,
      "grad_norm": 0.5449639558792114,
      "learning_rate": 5.227679557654151e-05,
      "loss": 0.8062,
      "step": 12320
    },
    {
      "epoch": 3.4640247329960654,
      "grad_norm": 0.5676690936088562,
      "learning_rate": 5.219060406821141e-05,
      "loss": 0.8049,
      "step": 12325
    },
    {
      "epoch": 3.4654300168634062,
      "grad_norm": 0.49038565158843994,
      "learning_rate": 5.210445857664377e-05,
      "loss": 0.7954,
      "step": 12330
    },
    {
      "epoch": 3.4668353007307475,
      "grad_norm": 0.582527756690979,
      "learning_rate": 5.2018359184754065e-05,
      "loss": 0.7865,
      "step": 12335
    },
    {
      "epoch": 3.468240584598089,
      "grad_norm": 0.6153197884559631,
      "learning_rate": 5.193230597541332e-05,
      "loss": 0.7932,
      "step": 12340
    },
    {
      "epoch": 3.46964586846543,
      "grad_norm": 0.5249049067497253,
      "learning_rate": 5.184629903144821e-05,
      "loss": 0.7931,
      "step": 12345
    },
    {
      "epoch": 3.4710511523327714,
      "grad_norm": 0.5095629096031189,
      "learning_rate": 5.176033843564072e-05,
      "loss": 0.7963,
      "step": 12350
    },
    {
      "epoch": 3.4724564362001122,
      "grad_norm": 0.4929661750793457,
      "learning_rate": 5.167442427072837e-05,
      "loss": 0.7887,
      "step": 12355
    },
    {
      "epoch": 3.4738617200674535,
      "grad_norm": 0.5074275732040405,
      "learning_rate": 5.1588556619403985e-05,
      "loss": 0.7862,
      "step": 12360
    },
    {
      "epoch": 3.475267003934795,
      "grad_norm": 0.5591232776641846,
      "learning_rate": 5.150273556431542e-05,
      "loss": 0.7957,
      "step": 12365
    },
    {
      "epoch": 3.476672287802136,
      "grad_norm": 0.575605571269989,
      "learning_rate": 5.141696118806598e-05,
      "loss": 0.794,
      "step": 12370
    },
    {
      "epoch": 3.4780775716694774,
      "grad_norm": 0.5372434258460999,
      "learning_rate": 5.1331233573213844e-05,
      "loss": 0.8056,
      "step": 12375
    },
    {
      "epoch": 3.4794828555368182,
      "grad_norm": 0.5141233801841736,
      "learning_rate": 5.1245552802272275e-05,
      "loss": 0.7907,
      "step": 12380
    },
    {
      "epoch": 3.4808881394041595,
      "grad_norm": 0.511116087436676,
      "learning_rate": 5.115991895770933e-05,
      "loss": 0.7909,
      "step": 12385
    },
    {
      "epoch": 3.482293423271501,
      "grad_norm": 0.4942467212677002,
      "learning_rate": 5.107433212194801e-05,
      "loss": 0.7964,
      "step": 12390
    },
    {
      "epoch": 3.483698707138842,
      "grad_norm": 0.5006674528121948,
      "learning_rate": 5.098879237736609e-05,
      "loss": 0.7924,
      "step": 12395
    },
    {
      "epoch": 3.4851039910061834,
      "grad_norm": 0.47983628511428833,
      "learning_rate": 5.090329980629587e-05,
      "loss": 0.7918,
      "step": 12400
    },
    {
      "epoch": 3.4865092748735247,
      "grad_norm": 0.5198928713798523,
      "learning_rate": 5.0817854491024384e-05,
      "loss": 0.837,
      "step": 12405
    },
    {
      "epoch": 3.4879145587408655,
      "grad_norm": 0.5449215769767761,
      "learning_rate": 5.073245651379316e-05,
      "loss": 0.7916,
      "step": 12410
    },
    {
      "epoch": 3.489319842608207,
      "grad_norm": 0.49125024676322937,
      "learning_rate": 5.0647105956798116e-05,
      "loss": 0.7824,
      "step": 12415
    },
    {
      "epoch": 3.490725126475548,
      "grad_norm": 0.5600565671920776,
      "learning_rate": 5.056180290218957e-05,
      "loss": 0.8073,
      "step": 12420
    },
    {
      "epoch": 3.4921304103428894,
      "grad_norm": 0.49226316809654236,
      "learning_rate": 5.047654743207209e-05,
      "loss": 0.7841,
      "step": 12425
    },
    {
      "epoch": 3.4935356942102302,
      "grad_norm": 0.5037206411361694,
      "learning_rate": 5.03913396285045e-05,
      "loss": 0.803,
      "step": 12430
    },
    {
      "epoch": 3.4949409780775715,
      "grad_norm": 0.5516871213912964,
      "learning_rate": 5.030617957349962e-05,
      "loss": 0.7919,
      "step": 12435
    },
    {
      "epoch": 3.496346261944913,
      "grad_norm": 0.5346094965934753,
      "learning_rate": 5.022106734902444e-05,
      "loss": 0.7907,
      "step": 12440
    },
    {
      "epoch": 3.497751545812254,
      "grad_norm": 0.49881866574287415,
      "learning_rate": 5.013600303699987e-05,
      "loss": 0.8021,
      "step": 12445
    },
    {
      "epoch": 3.4991568296795954,
      "grad_norm": 0.5102651715278625,
      "learning_rate": 5.005098671930073e-05,
      "loss": 0.7913,
      "step": 12450
    },
    {
      "epoch": 3.5005621135469367,
      "grad_norm": 0.5428999662399292,
      "learning_rate": 4.996601847775553e-05,
      "loss": 0.8005,
      "step": 12455
    },
    {
      "epoch": 3.5019673974142775,
      "grad_norm": 0.6149014830589294,
      "learning_rate": 4.988109839414664e-05,
      "loss": 0.797,
      "step": 12460
    },
    {
      "epoch": 3.503372681281619,
      "grad_norm": 0.4979318678379059,
      "learning_rate": 4.979622655021003e-05,
      "loss": 0.7881,
      "step": 12465
    },
    {
      "epoch": 3.50477796514896,
      "grad_norm": 0.5156809687614441,
      "learning_rate": 4.971140302763524e-05,
      "loss": 0.7809,
      "step": 12470
    },
    {
      "epoch": 3.5061832490163014,
      "grad_norm": 0.5653836131095886,
      "learning_rate": 4.9626627908065294e-05,
      "loss": 0.8515,
      "step": 12475
    },
    {
      "epoch": 3.5075885328836423,
      "grad_norm": 0.6203331351280212,
      "learning_rate": 4.9541901273096626e-05,
      "loss": 0.7845,
      "step": 12480
    },
    {
      "epoch": 3.5089938167509835,
      "grad_norm": 0.5102115273475647,
      "learning_rate": 4.9457223204279066e-05,
      "loss": 0.7831,
      "step": 12485
    },
    {
      "epoch": 3.510399100618325,
      "grad_norm": 0.5312532782554626,
      "learning_rate": 4.937259378311555e-05,
      "loss": 0.7937,
      "step": 12490
    },
    {
      "epoch": 3.511804384485666,
      "grad_norm": 0.5389629602432251,
      "learning_rate": 4.928801309106233e-05,
      "loss": 0.7994,
      "step": 12495
    },
    {
      "epoch": 3.5132096683530074,
      "grad_norm": 0.5493090748786926,
      "learning_rate": 4.920348120952874e-05,
      "loss": 0.787,
      "step": 12500
    },
    {
      "epoch": 3.5146149522203487,
      "grad_norm": 0.49603936076164246,
      "learning_rate": 4.9118998219877044e-05,
      "loss": 0.841,
      "step": 12505
    },
    {
      "epoch": 3.51602023608769,
      "grad_norm": 0.5134543180465698,
      "learning_rate": 4.9034564203422526e-05,
      "loss": 0.7841,
      "step": 12510
    },
    {
      "epoch": 3.517425519955031,
      "grad_norm": 0.508040726184845,
      "learning_rate": 4.8950179241433325e-05,
      "loss": 0.803,
      "step": 12515
    },
    {
      "epoch": 3.518830803822372,
      "grad_norm": 0.52872633934021,
      "learning_rate": 4.886584341513035e-05,
      "loss": 0.7982,
      "step": 12520
    },
    {
      "epoch": 3.5202360876897134,
      "grad_norm": 0.49528661370277405,
      "learning_rate": 4.878155680568721e-05,
      "loss": 0.7869,
      "step": 12525
    },
    {
      "epoch": 3.5216413715570543,
      "grad_norm": 0.5169901251792908,
      "learning_rate": 4.8697319494230176e-05,
      "loss": 0.7899,
      "step": 12530
    },
    {
      "epoch": 3.5230466554243955,
      "grad_norm": 0.553735077381134,
      "learning_rate": 4.8613131561838054e-05,
      "loss": 0.7899,
      "step": 12535
    },
    {
      "epoch": 3.524451939291737,
      "grad_norm": 0.5353341698646545,
      "learning_rate": 4.8528993089542065e-05,
      "loss": 0.7942,
      "step": 12540
    },
    {
      "epoch": 3.525857223159078,
      "grad_norm": 0.5440569519996643,
      "learning_rate": 4.84449041583259e-05,
      "loss": 0.797,
      "step": 12545
    },
    {
      "epoch": 3.5272625070264194,
      "grad_norm": 0.4897547960281372,
      "learning_rate": 4.836086484912553e-05,
      "loss": 0.808,
      "step": 12550
    },
    {
      "epoch": 3.5286677908937607,
      "grad_norm": 0.49370864033699036,
      "learning_rate": 4.827687524282923e-05,
      "loss": 0.7879,
      "step": 12555
    },
    {
      "epoch": 3.530073074761102,
      "grad_norm": 0.5558463931083679,
      "learning_rate": 4.81929354202773e-05,
      "loss": 0.7819,
      "step": 12560
    },
    {
      "epoch": 3.531478358628443,
      "grad_norm": 0.5049279928207397,
      "learning_rate": 4.8109045462262233e-05,
      "loss": 0.7829,
      "step": 12565
    },
    {
      "epoch": 3.532883642495784,
      "grad_norm": 0.5592412948608398,
      "learning_rate": 4.802520544952852e-05,
      "loss": 0.8016,
      "step": 12570
    },
    {
      "epoch": 3.5342889263631254,
      "grad_norm": 0.5813235640525818,
      "learning_rate": 4.794141546277253e-05,
      "loss": 0.7847,
      "step": 12575
    },
    {
      "epoch": 3.5356942102304667,
      "grad_norm": 0.5444972515106201,
      "learning_rate": 4.785767558264252e-05,
      "loss": 0.801,
      "step": 12580
    },
    {
      "epoch": 3.5370994940978076,
      "grad_norm": 0.550809383392334,
      "learning_rate": 4.777398588973852e-05,
      "loss": 0.7921,
      "step": 12585
    },
    {
      "epoch": 3.538504777965149,
      "grad_norm": 0.5555300116539001,
      "learning_rate": 4.7690346464612255e-05,
      "loss": 0.8072,
      "step": 12590
    },
    {
      "epoch": 3.53991006183249,
      "grad_norm": 0.5757794976234436,
      "learning_rate": 4.7606757387766996e-05,
      "loss": 0.7817,
      "step": 12595
    },
    {
      "epoch": 3.5413153456998314,
      "grad_norm": 0.5164437890052795,
      "learning_rate": 4.752321873965765e-05,
      "loss": 0.7962,
      "step": 12600
    },
    {
      "epoch": 3.5427206295671727,
      "grad_norm": 0.5786922574043274,
      "learning_rate": 4.743973060069058e-05,
      "loss": 0.7916,
      "step": 12605
    },
    {
      "epoch": 3.544125913434514,
      "grad_norm": 0.5284855961799622,
      "learning_rate": 4.735629305122343e-05,
      "loss": 0.7832,
      "step": 12610
    },
    {
      "epoch": 3.545531197301855,
      "grad_norm": 0.5422257781028748,
      "learning_rate": 4.7272906171565266e-05,
      "loss": 0.8164,
      "step": 12615
    },
    {
      "epoch": 3.546936481169196,
      "grad_norm": 0.5167434811592102,
      "learning_rate": 4.718957004197634e-05,
      "loss": 0.7981,
      "step": 12620
    },
    {
      "epoch": 3.5483417650365374,
      "grad_norm": 0.5607612133026123,
      "learning_rate": 4.710628474266805e-05,
      "loss": 0.7916,
      "step": 12625
    },
    {
      "epoch": 3.5497470489038787,
      "grad_norm": 0.5161816477775574,
      "learning_rate": 4.702305035380288e-05,
      "loss": 0.7861,
      "step": 12630
    },
    {
      "epoch": 3.5511523327712196,
      "grad_norm": 0.6233710646629333,
      "learning_rate": 4.693986695549432e-05,
      "loss": 0.7981,
      "step": 12635
    },
    {
      "epoch": 3.552557616638561,
      "grad_norm": 0.5273126363754272,
      "learning_rate": 4.6856734627806754e-05,
      "loss": 0.8011,
      "step": 12640
    },
    {
      "epoch": 3.553962900505902,
      "grad_norm": 0.5350555181503296,
      "learning_rate": 4.677365345075548e-05,
      "loss": 0.7916,
      "step": 12645
    },
    {
      "epoch": 3.5553681843732434,
      "grad_norm": 0.5276046991348267,
      "learning_rate": 4.669062350430644e-05,
      "loss": 0.7841,
      "step": 12650
    },
    {
      "epoch": 3.5567734682405847,
      "grad_norm": 0.6036867499351501,
      "learning_rate": 4.660764486837635e-05,
      "loss": 0.8028,
      "step": 12655
    },
    {
      "epoch": 3.558178752107926,
      "grad_norm": 0.5013184547424316,
      "learning_rate": 4.65247176228326e-05,
      "loss": 0.794,
      "step": 12660
    },
    {
      "epoch": 3.559584035975267,
      "grad_norm": 0.5366091728210449,
      "learning_rate": 4.644184184749295e-05,
      "loss": 0.8003,
      "step": 12665
    },
    {
      "epoch": 3.560989319842608,
      "grad_norm": 0.48761817812919617,
      "learning_rate": 4.6359017622125745e-05,
      "loss": 0.7976,
      "step": 12670
    },
    {
      "epoch": 3.5623946037099494,
      "grad_norm": 0.556034505367279,
      "learning_rate": 4.6276245026449694e-05,
      "loss": 0.7858,
      "step": 12675
    },
    {
      "epoch": 3.5637998875772907,
      "grad_norm": 0.5266842842102051,
      "learning_rate": 4.6193524140133784e-05,
      "loss": 0.8347,
      "step": 12680
    },
    {
      "epoch": 3.5652051714446316,
      "grad_norm": 0.5990260243415833,
      "learning_rate": 4.6110855042797264e-05,
      "loss": 0.7882,
      "step": 12685
    },
    {
      "epoch": 3.566610455311973,
      "grad_norm": 0.5775483250617981,
      "learning_rate": 4.602823781400951e-05,
      "loss": 0.7857,
      "step": 12690
    },
    {
      "epoch": 3.568015739179314,
      "grad_norm": 0.5805662870407104,
      "learning_rate": 4.594567253329002e-05,
      "loss": 0.7875,
      "step": 12695
    },
    {
      "epoch": 3.5694210230466554,
      "grad_norm": 0.5316618084907532,
      "learning_rate": 4.586315928010817e-05,
      "loss": 0.7848,
      "step": 12700
    },
    {
      "epoch": 3.5708263069139967,
      "grad_norm": 0.5092722177505493,
      "learning_rate": 4.5780698133883405e-05,
      "loss": 0.7953,
      "step": 12705
    },
    {
      "epoch": 3.572231590781338,
      "grad_norm": 0.48918163776397705,
      "learning_rate": 4.5698289173984944e-05,
      "loss": 0.7876,
      "step": 12710
    },
    {
      "epoch": 3.5736368746486793,
      "grad_norm": 0.5924161672592163,
      "learning_rate": 4.561593247973181e-05,
      "loss": 0.7996,
      "step": 12715
    },
    {
      "epoch": 3.57504215851602,
      "grad_norm": 0.6285293102264404,
      "learning_rate": 4.5533628130392656e-05,
      "loss": 0.7904,
      "step": 12720
    },
    {
      "epoch": 3.5764474423833614,
      "grad_norm": 0.6160907745361328,
      "learning_rate": 4.54513762051858e-05,
      "loss": 0.7892,
      "step": 12725
    },
    {
      "epoch": 3.5778527262507027,
      "grad_norm": 0.5645490288734436,
      "learning_rate": 4.536917678327912e-05,
      "loss": 0.8048,
      "step": 12730
    },
    {
      "epoch": 3.579258010118044,
      "grad_norm": 0.5955885052680969,
      "learning_rate": 4.5287029943789935e-05,
      "loss": 0.7905,
      "step": 12735
    },
    {
      "epoch": 3.580663293985385,
      "grad_norm": 0.5727905035018921,
      "learning_rate": 4.520493576578494e-05,
      "loss": 0.8024,
      "step": 12740
    },
    {
      "epoch": 3.582068577852726,
      "grad_norm": 0.592326819896698,
      "learning_rate": 4.512289432828015e-05,
      "loss": 0.7992,
      "step": 12745
    },
    {
      "epoch": 3.5834738617200674,
      "grad_norm": 0.5737560987472534,
      "learning_rate": 4.5040905710240885e-05,
      "loss": 0.792,
      "step": 12750
    },
    {
      "epoch": 3.5848791455874087,
      "grad_norm": 0.4912492632865906,
      "learning_rate": 4.495896999058149e-05,
      "loss": 0.7914,
      "step": 12755
    },
    {
      "epoch": 3.58628442945475,
      "grad_norm": 0.4853997230529785,
      "learning_rate": 4.48770872481655e-05,
      "loss": 0.7819,
      "step": 12760
    },
    {
      "epoch": 3.5876897133220913,
      "grad_norm": 0.5390718579292297,
      "learning_rate": 4.479525756180545e-05,
      "loss": 0.7925,
      "step": 12765
    },
    {
      "epoch": 3.589094997189432,
      "grad_norm": 0.5622910261154175,
      "learning_rate": 4.471348101026274e-05,
      "loss": 0.7948,
      "step": 12770
    },
    {
      "epoch": 3.5905002810567734,
      "grad_norm": 0.5547574758529663,
      "learning_rate": 4.463175767224769e-05,
      "loss": 0.7913,
      "step": 12775
    },
    {
      "epoch": 3.5919055649241147,
      "grad_norm": 0.5188785791397095,
      "learning_rate": 4.455008762641937e-05,
      "loss": 0.793,
      "step": 12780
    },
    {
      "epoch": 3.593310848791456,
      "grad_norm": 0.4869183599948883,
      "learning_rate": 4.4468470951385666e-05,
      "loss": 0.7952,
      "step": 12785
    },
    {
      "epoch": 3.594716132658797,
      "grad_norm": 0.5184937119483948,
      "learning_rate": 4.438690772570291e-05,
      "loss": 0.7909,
      "step": 12790
    },
    {
      "epoch": 3.596121416526138,
      "grad_norm": 0.5637416243553162,
      "learning_rate": 4.43053980278761e-05,
      "loss": 0.8009,
      "step": 12795
    },
    {
      "epoch": 3.5975267003934794,
      "grad_norm": 0.48426154255867004,
      "learning_rate": 4.4223941936358745e-05,
      "loss": 0.8062,
      "step": 12800
    },
    {
      "epoch": 3.5989319842608207,
      "grad_norm": 0.48236146569252014,
      "learning_rate": 4.414253952955262e-05,
      "loss": 0.7967,
      "step": 12805
    },
    {
      "epoch": 3.600337268128162,
      "grad_norm": 0.5122547745704651,
      "learning_rate": 4.406119088580796e-05,
      "loss": 0.801,
      "step": 12810
    },
    {
      "epoch": 3.6017425519955033,
      "grad_norm": 0.5660848617553711,
      "learning_rate": 4.397989608342319e-05,
      "loss": 0.7827,
      "step": 12815
    },
    {
      "epoch": 3.603147835862844,
      "grad_norm": 0.5280227065086365,
      "learning_rate": 4.389865520064499e-05,
      "loss": 0.7914,
      "step": 12820
    },
    {
      "epoch": 3.6045531197301854,
      "grad_norm": 0.4934808015823364,
      "learning_rate": 4.3817468315667954e-05,
      "loss": 0.7792,
      "step": 12825
    },
    {
      "epoch": 3.6059584035975267,
      "grad_norm": 0.52223801612854,
      "learning_rate": 4.373633550663495e-05,
      "loss": 0.7834,
      "step": 12830
    },
    {
      "epoch": 3.607363687464868,
      "grad_norm": 0.5264943838119507,
      "learning_rate": 4.365525685163668e-05,
      "loss": 0.7889,
      "step": 12835
    },
    {
      "epoch": 3.608768971332209,
      "grad_norm": 0.5082861185073853,
      "learning_rate": 4.357423242871164e-05,
      "loss": 0.7941,
      "step": 12840
    },
    {
      "epoch": 3.61017425519955,
      "grad_norm": 0.5219237804412842,
      "learning_rate": 4.349326231584624e-05,
      "loss": 0.7887,
      "step": 12845
    },
    {
      "epoch": 3.6115795390668914,
      "grad_norm": 0.48076823353767395,
      "learning_rate": 4.341234659097459e-05,
      "loss": 0.7818,
      "step": 12850
    },
    {
      "epoch": 3.6129848229342327,
      "grad_norm": 0.4861706793308258,
      "learning_rate": 4.333148533197849e-05,
      "loss": 0.7901,
      "step": 12855
    },
    {
      "epoch": 3.614390106801574,
      "grad_norm": 0.515626072883606,
      "learning_rate": 4.32506786166872e-05,
      "loss": 0.7808,
      "step": 12860
    },
    {
      "epoch": 3.6157953906689153,
      "grad_norm": 0.5344110131263733,
      "learning_rate": 4.316992652287758e-05,
      "loss": 0.7773,
      "step": 12865
    },
    {
      "epoch": 3.6172006745362566,
      "grad_norm": 0.5144559741020203,
      "learning_rate": 4.3089229128273924e-05,
      "loss": 0.8073,
      "step": 12870
    },
    {
      "epoch": 3.6186059584035974,
      "grad_norm": 0.5614593029022217,
      "learning_rate": 4.300858651054774e-05,
      "loss": 0.7971,
      "step": 12875
    },
    {
      "epoch": 3.6200112422709387,
      "grad_norm": 0.4780723750591278,
      "learning_rate": 4.2927998747318034e-05,
      "loss": 0.7915,
      "step": 12880
    },
    {
      "epoch": 3.62141652613828,
      "grad_norm": 0.533393919467926,
      "learning_rate": 4.284746591615084e-05,
      "loss": 0.7805,
      "step": 12885
    },
    {
      "epoch": 3.622821810005621,
      "grad_norm": 0.5291486382484436,
      "learning_rate": 4.276698809455944e-05,
      "loss": 0.7906,
      "step": 12890
    },
    {
      "epoch": 3.624227093872962,
      "grad_norm": 0.4856104552745819,
      "learning_rate": 4.2686565360004e-05,
      "loss": 0.7866,
      "step": 12895
    },
    {
      "epoch": 3.6256323777403034,
      "grad_norm": 0.5947971940040588,
      "learning_rate": 4.260619778989183e-05,
      "loss": 0.8074,
      "step": 12900
    },
    {
      "epoch": 3.6270376616076447,
      "grad_norm": 0.5176420211791992,
      "learning_rate": 4.252588546157713e-05,
      "loss": 0.7776,
      "step": 12905
    },
    {
      "epoch": 3.628442945474986,
      "grad_norm": 0.4930765926837921,
      "learning_rate": 4.244562845236079e-05,
      "loss": 0.7908,
      "step": 12910
    },
    {
      "epoch": 3.6298482293423273,
      "grad_norm": 0.5033612847328186,
      "learning_rate": 4.2365426839490583e-05,
      "loss": 0.7942,
      "step": 12915
    },
    {
      "epoch": 3.6312535132096686,
      "grad_norm": 0.5101487636566162,
      "learning_rate": 4.228528070016094e-05,
      "loss": 0.7964,
      "step": 12920
    },
    {
      "epoch": 3.6326587970770094,
      "grad_norm": 0.48270928859710693,
      "learning_rate": 4.220519011151289e-05,
      "loss": 0.784,
      "step": 12925
    },
    {
      "epoch": 3.6340640809443507,
      "grad_norm": 0.5077676177024841,
      "learning_rate": 4.2125155150633986e-05,
      "loss": 0.7954,
      "step": 12930
    },
    {
      "epoch": 3.635469364811692,
      "grad_norm": 0.5236606001853943,
      "learning_rate": 4.204517589455825e-05,
      "loss": 0.7917,
      "step": 12935
    },
    {
      "epoch": 3.6368746486790333,
      "grad_norm": 0.503719687461853,
      "learning_rate": 4.19652524202661e-05,
      "loss": 0.7852,
      "step": 12940
    },
    {
      "epoch": 3.638279932546374,
      "grad_norm": 0.5560110807418823,
      "learning_rate": 4.18853848046842e-05,
      "loss": 0.7773,
      "step": 12945
    },
    {
      "epoch": 3.6396852164137155,
      "grad_norm": 0.5376433730125427,
      "learning_rate": 4.1805573124685504e-05,
      "loss": 0.7888,
      "step": 12950
    },
    {
      "epoch": 3.6410905002810567,
      "grad_norm": 0.5354559421539307,
      "learning_rate": 4.1725817457089146e-05,
      "loss": 0.7928,
      "step": 12955
    },
    {
      "epoch": 3.642495784148398,
      "grad_norm": 0.5309024453163147,
      "learning_rate": 4.164611787866034e-05,
      "loss": 0.8369,
      "step": 12960
    },
    {
      "epoch": 3.6439010680157393,
      "grad_norm": 0.48679086565971375,
      "learning_rate": 4.156647446611023e-05,
      "loss": 0.7882,
      "step": 12965
    },
    {
      "epoch": 3.6453063518830806,
      "grad_norm": 0.5110394954681396,
      "learning_rate": 4.148688729609599e-05,
      "loss": 0.7856,
      "step": 12970
    },
    {
      "epoch": 3.6467116357504215,
      "grad_norm": 0.5115504264831543,
      "learning_rate": 4.1407356445220655e-05,
      "loss": 0.8106,
      "step": 12975
    },
    {
      "epoch": 3.6481169196177627,
      "grad_norm": 0.5382710695266724,
      "learning_rate": 4.132788199003302e-05,
      "loss": 0.7896,
      "step": 12980
    },
    {
      "epoch": 3.649522203485104,
      "grad_norm": 0.5351971983909607,
      "learning_rate": 4.124846400702761e-05,
      "loss": 0.7915,
      "step": 12985
    },
    {
      "epoch": 3.6509274873524453,
      "grad_norm": 0.5722103118896484,
      "learning_rate": 4.116910257264461e-05,
      "loss": 0.8012,
      "step": 12990
    },
    {
      "epoch": 3.652332771219786,
      "grad_norm": 0.4893995225429535,
      "learning_rate": 4.108979776326979e-05,
      "loss": 0.7898,
      "step": 12995
    },
    {
      "epoch": 3.6537380550871275,
      "grad_norm": 0.5160402059555054,
      "learning_rate": 4.101054965523432e-05,
      "loss": 0.7932,
      "step": 13000
    },
    {
      "epoch": 3.6551433389544687,
      "grad_norm": 0.4884108901023865,
      "learning_rate": 4.093135832481491e-05,
      "loss": 0.7948,
      "step": 13005
    },
    {
      "epoch": 3.65654862282181,
      "grad_norm": 0.48411718010902405,
      "learning_rate": 4.0852223848233604e-05,
      "loss": 0.7861,
      "step": 13010
    },
    {
      "epoch": 3.6579539066891513,
      "grad_norm": 0.4931657016277313,
      "learning_rate": 4.077314630165763e-05,
      "loss": 0.7853,
      "step": 13015
    },
    {
      "epoch": 3.6593591905564926,
      "grad_norm": 0.5150357484817505,
      "learning_rate": 4.069412576119953e-05,
      "loss": 0.7864,
      "step": 13020
    },
    {
      "epoch": 3.6607644744238335,
      "grad_norm": 0.530714213848114,
      "learning_rate": 4.061516230291693e-05,
      "loss": 0.7786,
      "step": 13025
    },
    {
      "epoch": 3.6621697582911747,
      "grad_norm": 0.5072176456451416,
      "learning_rate": 4.053625600281252e-05,
      "loss": 0.776,
      "step": 13030
    },
    {
      "epoch": 3.663575042158516,
      "grad_norm": 0.49845072627067566,
      "learning_rate": 4.0457406936833984e-05,
      "loss": 0.8124,
      "step": 13035
    },
    {
      "epoch": 3.6649803260258573,
      "grad_norm": 0.5175867080688477,
      "learning_rate": 4.0378615180873905e-05,
      "loss": 0.7966,
      "step": 13040
    },
    {
      "epoch": 3.666385609893198,
      "grad_norm": 0.49814727902412415,
      "learning_rate": 4.0299880810769694e-05,
      "loss": 0.7934,
      "step": 13045
    },
    {
      "epoch": 3.6677908937605395,
      "grad_norm": 0.5344975590705872,
      "learning_rate": 4.02212039023036e-05,
      "loss": 0.7882,
      "step": 13050
    },
    {
      "epoch": 3.6691961776278808,
      "grad_norm": 0.5497987866401672,
      "learning_rate": 4.014258453120242e-05,
      "loss": 0.7858,
      "step": 13055
    },
    {
      "epoch": 3.670601461495222,
      "grad_norm": 0.5829319953918457,
      "learning_rate": 4.006402277313771e-05,
      "loss": 0.7908,
      "step": 13060
    },
    {
      "epoch": 3.6720067453625633,
      "grad_norm": 0.4865242838859558,
      "learning_rate": 3.9985518703725545e-05,
      "loss": 0.7913,
      "step": 13065
    },
    {
      "epoch": 3.6734120292299046,
      "grad_norm": 0.53497314453125,
      "learning_rate": 3.990707239852638e-05,
      "loss": 0.7808,
      "step": 13070
    },
    {
      "epoch": 3.674817313097246,
      "grad_norm": 0.5179198980331421,
      "learning_rate": 3.9828683933045186e-05,
      "loss": 0.7808,
      "step": 13075
    },
    {
      "epoch": 3.6762225969645868,
      "grad_norm": 0.5384912490844727,
      "learning_rate": 3.975035338273121e-05,
      "loss": 0.8018,
      "step": 13080
    },
    {
      "epoch": 3.677627880831928,
      "grad_norm": 0.5635836124420166,
      "learning_rate": 3.9672080822977985e-05,
      "loss": 0.7842,
      "step": 13085
    },
    {
      "epoch": 3.6790331646992693,
      "grad_norm": 0.5859927535057068,
      "learning_rate": 3.9593866329123184e-05,
      "loss": 0.7991,
      "step": 13090
    },
    {
      "epoch": 3.6804384485666106,
      "grad_norm": 0.5475590229034424,
      "learning_rate": 3.951570997644862e-05,
      "loss": 0.8046,
      "step": 13095
    },
    {
      "epoch": 3.6818437324339515,
      "grad_norm": 0.5195204019546509,
      "learning_rate": 3.94376118401802e-05,
      "loss": 0.7896,
      "step": 13100
    },
    {
      "epoch": 3.6832490163012928,
      "grad_norm": 0.5426797270774841,
      "learning_rate": 3.9359571995487644e-05,
      "loss": 0.79,
      "step": 13105
    },
    {
      "epoch": 3.684654300168634,
      "grad_norm": 0.49161791801452637,
      "learning_rate": 3.928159051748469e-05,
      "loss": 0.7918,
      "step": 13110
    },
    {
      "epoch": 3.6860595840359753,
      "grad_norm": 0.5060259699821472,
      "learning_rate": 3.9203667481228876e-05,
      "loss": 0.7774,
      "step": 13115
    },
    {
      "epoch": 3.6874648679033166,
      "grad_norm": 0.4989676773548126,
      "learning_rate": 3.9125802961721536e-05,
      "loss": 0.7903,
      "step": 13120
    },
    {
      "epoch": 3.688870151770658,
      "grad_norm": 0.7031319737434387,
      "learning_rate": 3.904799703390752e-05,
      "loss": 0.793,
      "step": 13125
    },
    {
      "epoch": 3.6902754356379988,
      "grad_norm": 0.6419675946235657,
      "learning_rate": 3.897024977267546e-05,
      "loss": 0.7808,
      "step": 13130
    },
    {
      "epoch": 3.69168071950534,
      "grad_norm": 0.5509933829307556,
      "learning_rate": 3.8892561252857415e-05,
      "loss": 0.8033,
      "step": 13135
    },
    {
      "epoch": 3.6930860033726813,
      "grad_norm": 0.5243677496910095,
      "learning_rate": 3.881493154922898e-05,
      "loss": 0.7917,
      "step": 13140
    },
    {
      "epoch": 3.6944912872400226,
      "grad_norm": 0.5301499366760254,
      "learning_rate": 3.873736073650906e-05,
      "loss": 0.8439,
      "step": 13145
    },
    {
      "epoch": 3.6958965711073635,
      "grad_norm": 0.5213268995285034,
      "learning_rate": 3.865984888935996e-05,
      "loss": 0.7959,
      "step": 13150
    },
    {
      "epoch": 3.6973018549747048,
      "grad_norm": 0.49913641810417175,
      "learning_rate": 3.858239608238718e-05,
      "loss": 0.7911,
      "step": 13155
    },
    {
      "epoch": 3.698707138842046,
      "grad_norm": 0.5024813413619995,
      "learning_rate": 3.850500239013937e-05,
      "loss": 0.7901,
      "step": 13160
    },
    {
      "epoch": 3.7001124227093873,
      "grad_norm": 0.5601816773414612,
      "learning_rate": 3.842766788710832e-05,
      "loss": 0.793,
      "step": 13165
    },
    {
      "epoch": 3.7015177065767286,
      "grad_norm": 0.49501365423202515,
      "learning_rate": 3.8350392647728896e-05,
      "loss": 0.794,
      "step": 13170
    },
    {
      "epoch": 3.70292299044407,
      "grad_norm": 0.5512212514877319,
      "learning_rate": 3.827317674637878e-05,
      "loss": 0.7949,
      "step": 13175
    },
    {
      "epoch": 3.7043282743114108,
      "grad_norm": 0.5936423540115356,
      "learning_rate": 3.8196020257378686e-05,
      "loss": 0.7948,
      "step": 13180
    },
    {
      "epoch": 3.705733558178752,
      "grad_norm": 0.5287837386131287,
      "learning_rate": 3.811892325499208e-05,
      "loss": 0.7922,
      "step": 13185
    },
    {
      "epoch": 3.7071388420460933,
      "grad_norm": 0.6881371736526489,
      "learning_rate": 3.804188581342517e-05,
      "loss": 0.8122,
      "step": 13190
    },
    {
      "epoch": 3.7085441259134346,
      "grad_norm": 0.5230666995048523,
      "learning_rate": 3.796490800682687e-05,
      "loss": 0.8076,
      "step": 13195
    },
    {
      "epoch": 3.7099494097807755,
      "grad_norm": 0.5672796368598938,
      "learning_rate": 3.7887989909288646e-05,
      "loss": 0.7956,
      "step": 13200
    },
    {
      "epoch": 3.7113546936481168,
      "grad_norm": 0.48925039172172546,
      "learning_rate": 3.7811131594844574e-05,
      "loss": 0.841,
      "step": 13205
    },
    {
      "epoch": 3.712759977515458,
      "grad_norm": 0.5012375712394714,
      "learning_rate": 3.773433313747105e-05,
      "loss": 0.7958,
      "step": 13210
    },
    {
      "epoch": 3.7141652613827993,
      "grad_norm": 0.5377150177955627,
      "learning_rate": 3.765759461108698e-05,
      "loss": 0.8373,
      "step": 13215
    },
    {
      "epoch": 3.7155705452501406,
      "grad_norm": 0.5510297417640686,
      "learning_rate": 3.758091608955354e-05,
      "loss": 0.7929,
      "step": 13220
    },
    {
      "epoch": 3.716975829117482,
      "grad_norm": 0.4897007346153259,
      "learning_rate": 3.750429764667421e-05,
      "loss": 0.7859,
      "step": 13225
    },
    {
      "epoch": 3.718381112984823,
      "grad_norm": 0.5488338470458984,
      "learning_rate": 3.742773935619451e-05,
      "loss": 0.7972,
      "step": 13230
    },
    {
      "epoch": 3.719786396852164,
      "grad_norm": 0.5065768957138062,
      "learning_rate": 3.735124129180219e-05,
      "loss": 0.7969,
      "step": 13235
    },
    {
      "epoch": 3.7211916807195053,
      "grad_norm": 0.4968884289264679,
      "learning_rate": 3.7274803527126966e-05,
      "loss": 0.7883,
      "step": 13240
    },
    {
      "epoch": 3.7225969645868466,
      "grad_norm": 0.5004639625549316,
      "learning_rate": 3.719842613574056e-05,
      "loss": 0.7893,
      "step": 13245
    },
    {
      "epoch": 3.7240022484541875,
      "grad_norm": 0.6538251042366028,
      "learning_rate": 3.712210919115655e-05,
      "loss": 0.7924,
      "step": 13250
    },
    {
      "epoch": 3.7254075323215288,
      "grad_norm": 0.5038894414901733,
      "learning_rate": 3.7045852766830344e-05,
      "loss": 0.7805,
      "step": 13255
    },
    {
      "epoch": 3.72681281618887,
      "grad_norm": 0.5190169215202332,
      "learning_rate": 3.696965693615915e-05,
      "loss": 0.8538,
      "step": 13260
    },
    {
      "epoch": 3.7282181000562113,
      "grad_norm": 0.5098369717597961,
      "learning_rate": 3.68935217724817e-05,
      "loss": 0.7711,
      "step": 13265
    },
    {
      "epoch": 3.7296233839235526,
      "grad_norm": 0.5032815933227539,
      "learning_rate": 3.6817447349078506e-05,
      "loss": 0.8048,
      "step": 13270
    },
    {
      "epoch": 3.731028667790894,
      "grad_norm": 0.5347372889518738,
      "learning_rate": 3.674143373917157e-05,
      "loss": 0.7887,
      "step": 13275
    },
    {
      "epoch": 3.732433951658235,
      "grad_norm": 0.5623759031295776,
      "learning_rate": 3.6665481015924265e-05,
      "loss": 0.7976,
      "step": 13280
    },
    {
      "epoch": 3.733839235525576,
      "grad_norm": 0.5230100750923157,
      "learning_rate": 3.658958925244145e-05,
      "loss": 0.7769,
      "step": 13285
    },
    {
      "epoch": 3.7352445193929174,
      "grad_norm": 0.6419785618782043,
      "learning_rate": 3.651375852176935e-05,
      "loss": 0.7898,
      "step": 13290
    },
    {
      "epoch": 3.7366498032602586,
      "grad_norm": 0.5499880313873291,
      "learning_rate": 3.643798889689539e-05,
      "loss": 0.793,
      "step": 13295
    },
    {
      "epoch": 3.7380550871276,
      "grad_norm": 0.57011479139328,
      "learning_rate": 3.636228045074812e-05,
      "loss": 0.7857,
      "step": 13300
    },
    {
      "epoch": 3.7394603709949408,
      "grad_norm": 0.4934341013431549,
      "learning_rate": 3.62866332561973e-05,
      "loss": 0.8345,
      "step": 13305
    },
    {
      "epoch": 3.740865654862282,
      "grad_norm": 0.47908616065979004,
      "learning_rate": 3.621104738605373e-05,
      "loss": 0.786,
      "step": 13310
    },
    {
      "epoch": 3.7422709387296234,
      "grad_norm": 0.5038416385650635,
      "learning_rate": 3.61355229130691e-05,
      "loss": 0.7937,
      "step": 13315
    },
    {
      "epoch": 3.7436762225969646,
      "grad_norm": 0.5455382466316223,
      "learning_rate": 3.606005990993607e-05,
      "loss": 0.7889,
      "step": 13320
    },
    {
      "epoch": 3.745081506464306,
      "grad_norm": 0.48870712518692017,
      "learning_rate": 3.5984658449288155e-05,
      "loss": 0.7941,
      "step": 13325
    },
    {
      "epoch": 3.746486790331647,
      "grad_norm": 0.49450066685676575,
      "learning_rate": 3.590931860369963e-05,
      "loss": 0.7958,
      "step": 13330
    },
    {
      "epoch": 3.747892074198988,
      "grad_norm": 0.47441360354423523,
      "learning_rate": 3.5834040445685324e-05,
      "loss": 0.7849,
      "step": 13335
    },
    {
      "epoch": 3.7492973580663294,
      "grad_norm": 0.49870437383651733,
      "learning_rate": 3.575882404770093e-05,
      "loss": 0.7893,
      "step": 13340
    },
    {
      "epoch": 3.7507026419336706,
      "grad_norm": 0.4865383505821228,
      "learning_rate": 3.5683669482142565e-05,
      "loss": 0.7878,
      "step": 13345
    },
    {
      "epoch": 3.752107925801012,
      "grad_norm": 0.4772275388240814,
      "learning_rate": 3.5608576821346786e-05,
      "loss": 0.7967,
      "step": 13350
    },
    {
      "epoch": 3.753513209668353,
      "grad_norm": 0.5540780425071716,
      "learning_rate": 3.553354613759064e-05,
      "loss": 0.8323,
      "step": 13355
    },
    {
      "epoch": 3.754918493535694,
      "grad_norm": 0.5324468016624451,
      "learning_rate": 3.545857750309153e-05,
      "loss": 0.7924,
      "step": 13360
    },
    {
      "epoch": 3.7563237774030354,
      "grad_norm": 0.5483556985855103,
      "learning_rate": 3.538367099000711e-05,
      "loss": 0.7986,
      "step": 13365
    },
    {
      "epoch": 3.7577290612703766,
      "grad_norm": 0.5705327391624451,
      "learning_rate": 3.530882667043519e-05,
      "loss": 0.7925,
      "step": 13370
    },
    {
      "epoch": 3.759134345137718,
      "grad_norm": 0.5296326279640198,
      "learning_rate": 3.5234044616413816e-05,
      "loss": 0.7952,
      "step": 13375
    },
    {
      "epoch": 3.7605396290050592,
      "grad_norm": 0.5047909021377563,
      "learning_rate": 3.515932489992104e-05,
      "loss": 0.812,
      "step": 13380
    },
    {
      "epoch": 3.7619449128724,
      "grad_norm": 0.4710387885570526,
      "learning_rate": 3.508466759287494e-05,
      "loss": 0.7827,
      "step": 13385
    },
    {
      "epoch": 3.7633501967397414,
      "grad_norm": 0.5809809565544128,
      "learning_rate": 3.5010072767133504e-05,
      "loss": 0.7874,
      "step": 13390
    },
    {
      "epoch": 3.7647554806070826,
      "grad_norm": 0.5445647835731506,
      "learning_rate": 3.493554049449461e-05,
      "loss": 0.7853,
      "step": 13395
    },
    {
      "epoch": 3.766160764474424,
      "grad_norm": 0.586715042591095,
      "learning_rate": 3.4861070846695945e-05,
      "loss": 0.7984,
      "step": 13400
    },
    {
      "epoch": 3.767566048341765,
      "grad_norm": 0.5245351195335388,
      "learning_rate": 3.478666389541481e-05,
      "loss": 0.7974,
      "step": 13405
    },
    {
      "epoch": 3.768971332209106,
      "grad_norm": 0.5032010674476624,
      "learning_rate": 3.471231971226826e-05,
      "loss": 0.78,
      "step": 13410
    },
    {
      "epoch": 3.7703766160764474,
      "grad_norm": 0.5067157745361328,
      "learning_rate": 3.4638038368812965e-05,
      "loss": 0.7949,
      "step": 13415
    },
    {
      "epoch": 3.7717818999437887,
      "grad_norm": 0.5336160063743591,
      "learning_rate": 3.4563819936544985e-05,
      "loss": 0.7977,
      "step": 13420
    },
    {
      "epoch": 3.77318718381113,
      "grad_norm": 0.5501193404197693,
      "learning_rate": 3.448966448689992e-05,
      "loss": 0.7966,
      "step": 13425
    },
    {
      "epoch": 3.7745924676784712,
      "grad_norm": 0.5124372839927673,
      "learning_rate": 3.4415572091252756e-05,
      "loss": 0.7849,
      "step": 13430
    },
    {
      "epoch": 3.7759977515458125,
      "grad_norm": 0.5596308708190918,
      "learning_rate": 3.434154282091775e-05,
      "loss": 0.7827,
      "step": 13435
    },
    {
      "epoch": 3.7774030354131534,
      "grad_norm": 0.5546366572380066,
      "learning_rate": 3.42675767471484e-05,
      "loss": 0.7851,
      "step": 13440
    },
    {
      "epoch": 3.7788083192804947,
      "grad_norm": 0.5042093396186829,
      "learning_rate": 3.419367394113742e-05,
      "loss": 0.7815,
      "step": 13445
    },
    {
      "epoch": 3.780213603147836,
      "grad_norm": 0.49269187450408936,
      "learning_rate": 3.411983447401656e-05,
      "loss": 0.7876,
      "step": 13450
    },
    {
      "epoch": 3.781618887015177,
      "grad_norm": 0.4880962073802948,
      "learning_rate": 3.404605841685672e-05,
      "loss": 0.7881,
      "step": 13455
    },
    {
      "epoch": 3.783024170882518,
      "grad_norm": 0.5173097252845764,
      "learning_rate": 3.3972345840667606e-05,
      "loss": 0.7966,
      "step": 13460
    },
    {
      "epoch": 3.7844294547498594,
      "grad_norm": 0.5115389823913574,
      "learning_rate": 3.3898696816397944e-05,
      "loss": 0.7871,
      "step": 13465
    },
    {
      "epoch": 3.7858347386172007,
      "grad_norm": 0.49383237957954407,
      "learning_rate": 3.3825111414935284e-05,
      "loss": 0.8044,
      "step": 13470
    },
    {
      "epoch": 3.787240022484542,
      "grad_norm": 0.5096961259841919,
      "learning_rate": 3.3751589707105856e-05,
      "loss": 0.7931,
      "step": 13475
    },
    {
      "epoch": 3.7886453063518832,
      "grad_norm": 0.5193392038345337,
      "learning_rate": 3.367813176367467e-05,
      "loss": 0.8222,
      "step": 13480
    },
    {
      "epoch": 3.7900505902192245,
      "grad_norm": 0.6020642518997192,
      "learning_rate": 3.360473765534534e-05,
      "loss": 0.79,
      "step": 13485
    },
    {
      "epoch": 3.7914558740865654,
      "grad_norm": 0.679407000541687,
      "learning_rate": 3.353140745276002e-05,
      "loss": 0.7824,
      "step": 13490
    },
    {
      "epoch": 3.7928611579539067,
      "grad_norm": 0.5894041061401367,
      "learning_rate": 3.345814122649937e-05,
      "loss": 0.8103,
      "step": 13495
    },
    {
      "epoch": 3.794266441821248,
      "grad_norm": 0.4902842938899994,
      "learning_rate": 3.338493904708246e-05,
      "loss": 0.7874,
      "step": 13500
    },
    {
      "epoch": 3.7956717256885892,
      "grad_norm": 0.5133805871009827,
      "learning_rate": 3.3311800984966776e-05,
      "loss": 0.8053,
      "step": 13505
    },
    {
      "epoch": 3.79707700955593,
      "grad_norm": 0.5681965351104736,
      "learning_rate": 3.323872711054796e-05,
      "loss": 0.8309,
      "step": 13510
    },
    {
      "epoch": 3.7984822934232714,
      "grad_norm": 0.5490449666976929,
      "learning_rate": 3.316571749415998e-05,
      "loss": 0.7835,
      "step": 13515
    },
    {
      "epoch": 3.7998875772906127,
      "grad_norm": 0.5362129211425781,
      "learning_rate": 3.309277220607493e-05,
      "loss": 0.8493,
      "step": 13520
    },
    {
      "epoch": 3.801292861157954,
      "grad_norm": 0.5412881374359131,
      "learning_rate": 3.3019891316503016e-05,
      "loss": 0.7863,
      "step": 13525
    },
    {
      "epoch": 3.8026981450252952,
      "grad_norm": 0.5421592593193054,
      "learning_rate": 3.294707489559237e-05,
      "loss": 0.802,
      "step": 13530
    },
    {
      "epoch": 3.8041034288926365,
      "grad_norm": 0.5225282311439514,
      "learning_rate": 3.287432301342914e-05,
      "loss": 0.7921,
      "step": 13535
    },
    {
      "epoch": 3.8055087127599774,
      "grad_norm": 0.49669602513313293,
      "learning_rate": 3.2801635740037375e-05,
      "loss": 0.7848,
      "step": 13540
    },
    {
      "epoch": 3.8069139966273187,
      "grad_norm": 0.5054916143417358,
      "learning_rate": 3.2729013145378894e-05,
      "loss": 0.7869,
      "step": 13545
    },
    {
      "epoch": 3.80831928049466,
      "grad_norm": 0.5187205076217651,
      "learning_rate": 3.265645529935327e-05,
      "loss": 0.8285,
      "step": 13550
    },
    {
      "epoch": 3.8097245643620012,
      "grad_norm": 0.5249623656272888,
      "learning_rate": 3.2583962271797776e-05,
      "loss": 0.7834,
      "step": 13555
    },
    {
      "epoch": 3.811129848229342,
      "grad_norm": 0.5045213103294373,
      "learning_rate": 3.251153413248731e-05,
      "loss": 0.8339,
      "step": 13560
    },
    {
      "epoch": 3.8125351320966834,
      "grad_norm": 0.49637290835380554,
      "learning_rate": 3.243917095113422e-05,
      "loss": 0.7872,
      "step": 13565
    },
    {
      "epoch": 3.8139404159640247,
      "grad_norm": 0.49728259444236755,
      "learning_rate": 3.2366872797388434e-05,
      "loss": 0.787,
      "step": 13570
    },
    {
      "epoch": 3.815345699831366,
      "grad_norm": 0.5004485249519348,
      "learning_rate": 3.22946397408373e-05,
      "loss": 0.7981,
      "step": 13575
    },
    {
      "epoch": 3.8167509836987072,
      "grad_norm": 0.5126949548721313,
      "learning_rate": 3.2222471851005375e-05,
      "loss": 0.8253,
      "step": 13580
    },
    {
      "epoch": 3.8181562675660485,
      "grad_norm": 0.5166481137275696,
      "learning_rate": 3.2150369197354636e-05,
      "loss": 0.8044,
      "step": 13585
    },
    {
      "epoch": 3.8195615514333894,
      "grad_norm": 0.4981909394264221,
      "learning_rate": 3.2078331849284204e-05,
      "loss": 0.7918,
      "step": 13590
    },
    {
      "epoch": 3.8209668353007307,
      "grad_norm": 0.5490385890007019,
      "learning_rate": 3.200635987613038e-05,
      "loss": 0.7832,
      "step": 13595
    },
    {
      "epoch": 3.822372119168072,
      "grad_norm": 0.4970734119415283,
      "learning_rate": 3.1934453347166484e-05,
      "loss": 0.7895,
      "step": 13600
    },
    {
      "epoch": 3.8237774030354132,
      "grad_norm": 0.5258076190948486,
      "learning_rate": 3.1862612331602904e-05,
      "loss": 0.8058,
      "step": 13605
    },
    {
      "epoch": 3.825182686902754,
      "grad_norm": 0.6316525936126709,
      "learning_rate": 3.1790836898586976e-05,
      "loss": 0.7868,
      "step": 13610
    },
    {
      "epoch": 3.8265879707700954,
      "grad_norm": 0.5370373725891113,
      "learning_rate": 3.171912711720281e-05,
      "loss": 0.8033,
      "step": 13615
    },
    {
      "epoch": 3.8279932546374367,
      "grad_norm": 0.5009310841560364,
      "learning_rate": 3.164748305647144e-05,
      "loss": 0.7875,
      "step": 13620
    },
    {
      "epoch": 3.829398538504778,
      "grad_norm": 0.5032376646995544,
      "learning_rate": 3.1575904785350586e-05,
      "loss": 0.7787,
      "step": 13625
    },
    {
      "epoch": 3.8308038223721192,
      "grad_norm": 0.4967799484729767,
      "learning_rate": 3.1504392372734715e-05,
      "loss": 0.786,
      "step": 13630
    },
    {
      "epoch": 3.8322091062394605,
      "grad_norm": 0.5378280282020569,
      "learning_rate": 3.143294588745478e-05,
      "loss": 0.797,
      "step": 13635
    },
    {
      "epoch": 3.833614390106802,
      "grad_norm": 0.517133355140686,
      "learning_rate": 3.136156539827837e-05,
      "loss": 0.8284,
      "step": 13640
    },
    {
      "epoch": 3.8350196739741427,
      "grad_norm": 0.53566974401474,
      "learning_rate": 3.129025097390955e-05,
      "loss": 0.7862,
      "step": 13645
    },
    {
      "epoch": 3.836424957841484,
      "grad_norm": 0.5064863562583923,
      "learning_rate": 3.1219002682988774e-05,
      "loss": 0.79,
      "step": 13650
    },
    {
      "epoch": 3.8378302417088253,
      "grad_norm": 0.49409398436546326,
      "learning_rate": 3.114782059409284e-05,
      "loss": 0.8419,
      "step": 13655
    },
    {
      "epoch": 3.8392355255761665,
      "grad_norm": 0.551094651222229,
      "learning_rate": 3.107670477573484e-05,
      "loss": 0.7942,
      "step": 13660
    },
    {
      "epoch": 3.8406408094435074,
      "grad_norm": 0.4756724238395691,
      "learning_rate": 3.100565529636412e-05,
      "loss": 0.7884,
      "step": 13665
    },
    {
      "epoch": 3.8420460933108487,
      "grad_norm": 0.5304540395736694,
      "learning_rate": 3.093467222436605e-05,
      "loss": 0.8083,
      "step": 13670
    },
    {
      "epoch": 3.84345137717819,
      "grad_norm": 0.4981602728366852,
      "learning_rate": 3.0863755628062196e-05,
      "loss": 0.7893,
      "step": 13675
    },
    {
      "epoch": 3.8448566610455313,
      "grad_norm": 0.48895543813705444,
      "learning_rate": 3.079290557571014e-05,
      "loss": 0.7858,
      "step": 13680
    },
    {
      "epoch": 3.8462619449128725,
      "grad_norm": 0.5378122329711914,
      "learning_rate": 3.072212213550332e-05,
      "loss": 0.7804,
      "step": 13685
    },
    {
      "epoch": 3.847667228780214,
      "grad_norm": 0.5221678614616394,
      "learning_rate": 3.065140537557114e-05,
      "loss": 0.7889,
      "step": 13690
    },
    {
      "epoch": 3.8490725126475547,
      "grad_norm": 0.5159165859222412,
      "learning_rate": 3.05807553639788e-05,
      "loss": 0.7954,
      "step": 13695
    },
    {
      "epoch": 3.850477796514896,
      "grad_norm": 0.5093886852264404,
      "learning_rate": 3.0510172168727325e-05,
      "loss": 0.7932,
      "step": 13700
    },
    {
      "epoch": 3.8518830803822373,
      "grad_norm": 0.5088807344436646,
      "learning_rate": 3.043965585775329e-05,
      "loss": 0.7728,
      "step": 13705
    },
    {
      "epoch": 3.8532883642495785,
      "grad_norm": 0.5359891653060913,
      "learning_rate": 3.0369206498928993e-05,
      "loss": 0.7857,
      "step": 13710
    },
    {
      "epoch": 3.8546936481169194,
      "grad_norm": 0.5391300320625305,
      "learning_rate": 3.02988241600623e-05,
      "loss": 0.7837,
      "step": 13715
    },
    {
      "epoch": 3.8560989319842607,
      "grad_norm": 0.5096182227134705,
      "learning_rate": 3.0228508908896458e-05,
      "loss": 0.8227,
      "step": 13720
    },
    {
      "epoch": 3.857504215851602,
      "grad_norm": 0.4783703088760376,
      "learning_rate": 3.0158260813110263e-05,
      "loss": 0.7892,
      "step": 13725
    },
    {
      "epoch": 3.8589094997189433,
      "grad_norm": 0.535466194152832,
      "learning_rate": 3.0088079940317814e-05,
      "loss": 0.7936,
      "step": 13730
    },
    {
      "epoch": 3.8603147835862845,
      "grad_norm": 0.5733150243759155,
      "learning_rate": 3.0017966358068572e-05,
      "loss": 0.7844,
      "step": 13735
    },
    {
      "epoch": 3.861720067453626,
      "grad_norm": 0.5078558921813965,
      "learning_rate": 2.9947920133847106e-05,
      "loss": 0.7834,
      "step": 13740
    },
    {
      "epoch": 3.8631253513209667,
      "grad_norm": 0.5028983950614929,
      "learning_rate": 2.9877941335073233e-05,
      "loss": 0.7907,
      "step": 13745
    },
    {
      "epoch": 3.864530635188308,
      "grad_norm": 0.4952241778373718,
      "learning_rate": 2.9808030029101964e-05,
      "loss": 0.7852,
      "step": 13750
    },
    {
      "epoch": 3.8659359190556493,
      "grad_norm": 0.5080897808074951,
      "learning_rate": 2.9738186283223146e-05,
      "loss": 0.7885,
      "step": 13755
    },
    {
      "epoch": 3.8673412029229906,
      "grad_norm": 0.47834813594818115,
      "learning_rate": 2.966841016466174e-05,
      "loss": 0.805,
      "step": 13760
    },
    {
      "epoch": 3.8687464867903314,
      "grad_norm": 0.5171611905097961,
      "learning_rate": 2.9598701740577593e-05,
      "loss": 0.8313,
      "step": 13765
    },
    {
      "epoch": 3.8701517706576727,
      "grad_norm": 0.5075935125350952,
      "learning_rate": 2.95290610780654e-05,
      "loss": 0.7942,
      "step": 13770
    },
    {
      "epoch": 3.871557054525014,
      "grad_norm": 0.5939414501190186,
      "learning_rate": 2.9459488244154555e-05,
      "loss": 0.7886,
      "step": 13775
    },
    {
      "epoch": 3.8729623383923553,
      "grad_norm": 0.5912177562713623,
      "learning_rate": 2.9389983305809253e-05,
      "loss": 0.7975,
      "step": 13780
    },
    {
      "epoch": 3.8743676222596966,
      "grad_norm": 0.6902967095375061,
      "learning_rate": 2.9320546329928366e-05,
      "loss": 0.7856,
      "step": 13785
    },
    {
      "epoch": 3.875772906127038,
      "grad_norm": 0.5265038013458252,
      "learning_rate": 2.925117738334523e-05,
      "loss": 0.7851,
      "step": 13790
    },
    {
      "epoch": 3.877178189994379,
      "grad_norm": 0.5476034879684448,
      "learning_rate": 2.9181876532827766e-05,
      "loss": 0.7903,
      "step": 13795
    },
    {
      "epoch": 3.87858347386172,
      "grad_norm": 0.5170614719390869,
      "learning_rate": 2.9112643845078436e-05,
      "loss": 0.7874,
      "step": 13800
    },
    {
      "epoch": 3.8799887577290613,
      "grad_norm": 0.483660489320755,
      "learning_rate": 2.9043479386734007e-05,
      "loss": 0.787,
      "step": 13805
    },
    {
      "epoch": 3.8813940415964026,
      "grad_norm": 0.5684224963188171,
      "learning_rate": 2.897438322436554e-05,
      "loss": 0.7852,
      "step": 13810
    },
    {
      "epoch": 3.8827993254637434,
      "grad_norm": 0.509792685508728,
      "learning_rate": 2.8905355424478418e-05,
      "loss": 0.8273,
      "step": 13815
    },
    {
      "epoch": 3.8842046093310847,
      "grad_norm": 0.5174958109855652,
      "learning_rate": 2.883639605351226e-05,
      "loss": 0.7799,
      "step": 13820
    },
    {
      "epoch": 3.885609893198426,
      "grad_norm": 0.5141977071762085,
      "learning_rate": 2.8767505177840716e-05,
      "loss": 0.8001,
      "step": 13825
    },
    {
      "epoch": 3.8870151770657673,
      "grad_norm": 0.5077630281448364,
      "learning_rate": 2.8698682863771586e-05,
      "loss": 0.7866,
      "step": 13830
    },
    {
      "epoch": 3.8884204609331086,
      "grad_norm": 0.46954312920570374,
      "learning_rate": 2.8629929177546665e-05,
      "loss": 0.7822,
      "step": 13835
    },
    {
      "epoch": 3.88982574480045,
      "grad_norm": 0.5315179824829102,
      "learning_rate": 2.8561244185341706e-05,
      "loss": 0.7972,
      "step": 13840
    },
    {
      "epoch": 3.891231028667791,
      "grad_norm": 0.4870656132698059,
      "learning_rate": 2.8492627953266328e-05,
      "loss": 0.7788,
      "step": 13845
    },
    {
      "epoch": 3.892636312535132,
      "grad_norm": 0.5004348754882812,
      "learning_rate": 2.8424080547363952e-05,
      "loss": 0.7958,
      "step": 13850
    },
    {
      "epoch": 3.8940415964024733,
      "grad_norm": 0.5151705741882324,
      "learning_rate": 2.8355602033611774e-05,
      "loss": 0.7857,
      "step": 13855
    },
    {
      "epoch": 3.8954468802698146,
      "grad_norm": 0.505160391330719,
      "learning_rate": 2.828719247792071e-05,
      "loss": 0.7884,
      "step": 13860
    },
    {
      "epoch": 3.896852164137156,
      "grad_norm": 0.49165022373199463,
      "learning_rate": 2.8218851946135217e-05,
      "loss": 0.7811,
      "step": 13865
    },
    {
      "epoch": 3.8982574480044967,
      "grad_norm": 0.5374483466148376,
      "learning_rate": 2.8150580504033396e-05,
      "loss": 0.7831,
      "step": 13870
    },
    {
      "epoch": 3.899662731871838,
      "grad_norm": 0.5054756999015808,
      "learning_rate": 2.8082378217326842e-05,
      "loss": 0.7998,
      "step": 13875
    },
    {
      "epoch": 3.9010680157391793,
      "grad_norm": 0.5778508186340332,
      "learning_rate": 2.8014245151660524e-05,
      "loss": 0.7787,
      "step": 13880
    },
    {
      "epoch": 3.9024732996065206,
      "grad_norm": 0.5403375625610352,
      "learning_rate": 2.7946181372612858e-05,
      "loss": 0.7901,
      "step": 13885
    },
    {
      "epoch": 3.903878583473862,
      "grad_norm": 0.49616947770118713,
      "learning_rate": 2.7878186945695526e-05,
      "loss": 0.7848,
      "step": 13890
    },
    {
      "epoch": 3.905283867341203,
      "grad_norm": 0.4898039698600769,
      "learning_rate": 2.7810261936353487e-05,
      "loss": 0.7883,
      "step": 13895
    },
    {
      "epoch": 3.906689151208544,
      "grad_norm": 0.5719556212425232,
      "learning_rate": 2.7742406409964882e-05,
      "loss": 0.7943,
      "step": 13900
    },
    {
      "epoch": 3.9080944350758853,
      "grad_norm": 0.5483629107475281,
      "learning_rate": 2.767462043184096e-05,
      "loss": 0.7846,
      "step": 13905
    },
    {
      "epoch": 3.9094997189432266,
      "grad_norm": 0.5457851886749268,
      "learning_rate": 2.7606904067226046e-05,
      "loss": 0.7911,
      "step": 13910
    },
    {
      "epoch": 3.910905002810568,
      "grad_norm": 0.5014635324478149,
      "learning_rate": 2.7539257381297422e-05,
      "loss": 0.8347,
      "step": 13915
    },
    {
      "epoch": 3.9123102866779087,
      "grad_norm": 0.5336162447929382,
      "learning_rate": 2.7471680439165348e-05,
      "loss": 0.7902,
      "step": 13920
    },
    {
      "epoch": 3.91371557054525,
      "grad_norm": 0.5272439122200012,
      "learning_rate": 2.740417330587294e-05,
      "loss": 0.7884,
      "step": 13925
    },
    {
      "epoch": 3.9151208544125913,
      "grad_norm": 0.5460571050643921,
      "learning_rate": 2.733673604639615e-05,
      "loss": 0.7974,
      "step": 13930
    },
    {
      "epoch": 3.9165261382799326,
      "grad_norm": 0.5119529366493225,
      "learning_rate": 2.7269368725643594e-05,
      "loss": 0.7927,
      "step": 13935
    },
    {
      "epoch": 3.917931422147274,
      "grad_norm": 0.5041726231575012,
      "learning_rate": 2.7202071408456654e-05,
      "loss": 0.788,
      "step": 13940
    },
    {
      "epoch": 3.919336706014615,
      "grad_norm": 0.5189741849899292,
      "learning_rate": 2.713484415960931e-05,
      "loss": 0.7826,
      "step": 13945
    },
    {
      "epoch": 3.920741989881956,
      "grad_norm": 0.5067157745361328,
      "learning_rate": 2.7067687043808086e-05,
      "loss": 0.782,
      "step": 13950
    },
    {
      "epoch": 3.9221472737492973,
      "grad_norm": 0.50437992811203,
      "learning_rate": 2.7000600125692033e-05,
      "loss": 0.7965,
      "step": 13955
    },
    {
      "epoch": 3.9235525576166386,
      "grad_norm": 0.5085188746452332,
      "learning_rate": 2.69335834698326e-05,
      "loss": 0.7952,
      "step": 13960
    },
    {
      "epoch": 3.92495784148398,
      "grad_norm": 0.5374967455863953,
      "learning_rate": 2.6866637140733663e-05,
      "loss": 0.7938,
      "step": 13965
    },
    {
      "epoch": 3.9263631253513207,
      "grad_norm": 0.5251932740211487,
      "learning_rate": 2.679976120283131e-05,
      "loss": 0.7813,
      "step": 13970
    },
    {
      "epoch": 3.927768409218662,
      "grad_norm": 0.49533960223197937,
      "learning_rate": 2.6732955720493957e-05,
      "loss": 0.805,
      "step": 13975
    },
    {
      "epoch": 3.9291736930860033,
      "grad_norm": 0.4887608289718628,
      "learning_rate": 2.6666220758022233e-05,
      "loss": 0.7844,
      "step": 13980
    },
    {
      "epoch": 3.9305789769533446,
      "grad_norm": 0.4844372570514679,
      "learning_rate": 2.659955637964877e-05,
      "loss": 0.7866,
      "step": 13985
    },
    {
      "epoch": 3.931984260820686,
      "grad_norm": 0.4718281626701355,
      "learning_rate": 2.6532962649538384e-05,
      "loss": 0.7938,
      "step": 13990
    },
    {
      "epoch": 3.933389544688027,
      "grad_norm": 0.5602251291275024,
      "learning_rate": 2.6466439631787833e-05,
      "loss": 0.7871,
      "step": 13995
    },
    {
      "epoch": 3.9347948285553684,
      "grad_norm": 0.535500168800354,
      "learning_rate": 2.6399987390425816e-05,
      "loss": 0.7922,
      "step": 14000
    },
    {
      "epoch": 3.9362001124227093,
      "grad_norm": 0.5144920945167542,
      "learning_rate": 2.6333605989412947e-05,
      "loss": 0.7763,
      "step": 14005
    },
    {
      "epoch": 3.9376053962900506,
      "grad_norm": 0.5025665163993835,
      "learning_rate": 2.6267295492641607e-05,
      "loss": 0.7908,
      "step": 14010
    },
    {
      "epoch": 3.939010680157392,
      "grad_norm": 0.49400579929351807,
      "learning_rate": 2.6201055963935995e-05,
      "loss": 0.7964,
      "step": 14015
    },
    {
      "epoch": 3.940415964024733,
      "grad_norm": 0.5128166675567627,
      "learning_rate": 2.6134887467051894e-05,
      "loss": 0.8326,
      "step": 14020
    },
    {
      "epoch": 3.941821247892074,
      "grad_norm": 0.54892897605896,
      "learning_rate": 2.6068790065676806e-05,
      "loss": 0.7847,
      "step": 14025
    },
    {
      "epoch": 3.9432265317594153,
      "grad_norm": 0.5115255117416382,
      "learning_rate": 2.6002763823429808e-05,
      "loss": 0.7959,
      "step": 14030
    },
    {
      "epoch": 3.9446318156267566,
      "grad_norm": 0.49871450662612915,
      "learning_rate": 2.5936808803861468e-05,
      "loss": 0.7917,
      "step": 14035
    },
    {
      "epoch": 3.946037099494098,
      "grad_norm": 0.4911504089832306,
      "learning_rate": 2.5870925070453746e-05,
      "loss": 0.7823,
      "step": 14040
    },
    {
      "epoch": 3.947442383361439,
      "grad_norm": 0.5270660519599915,
      "learning_rate": 2.5805112686620046e-05,
      "loss": 0.7865,
      "step": 14045
    },
    {
      "epoch": 3.9488476672287804,
      "grad_norm": 0.501278817653656,
      "learning_rate": 2.5739371715705117e-05,
      "loss": 0.7967,
      "step": 14050
    },
    {
      "epoch": 3.9502529510961213,
      "grad_norm": 0.5706464648246765,
      "learning_rate": 2.567370222098494e-05,
      "loss": 0.7918,
      "step": 14055
    },
    {
      "epoch": 3.9516582349634626,
      "grad_norm": 0.5234654545783997,
      "learning_rate": 2.5608104265666687e-05,
      "loss": 0.797,
      "step": 14060
    },
    {
      "epoch": 3.953063518830804,
      "grad_norm": 0.555103600025177,
      "learning_rate": 2.554257791288871e-05,
      "loss": 0.8022,
      "step": 14065
    },
    {
      "epoch": 3.954468802698145,
      "grad_norm": 0.5400828719139099,
      "learning_rate": 2.5477123225720433e-05,
      "loss": 0.7918,
      "step": 14070
    },
    {
      "epoch": 3.955874086565486,
      "grad_norm": 0.5535014271736145,
      "learning_rate": 2.5411740267162254e-05,
      "loss": 0.7976,
      "step": 14075
    },
    {
      "epoch": 3.9572793704328273,
      "grad_norm": 0.49540334939956665,
      "learning_rate": 2.5346429100145584e-05,
      "loss": 0.798,
      "step": 14080
    },
    {
      "epoch": 3.9586846543001686,
      "grad_norm": 0.5534090995788574,
      "learning_rate": 2.5281189787532756e-05,
      "loss": 0.7798,
      "step": 14085
    },
    {
      "epoch": 3.96008993816751,
      "grad_norm": 0.503334105014801,
      "learning_rate": 2.5216022392116844e-05,
      "loss": 0.7877,
      "step": 14090
    },
    {
      "epoch": 3.961495222034851,
      "grad_norm": 0.5218794941902161,
      "learning_rate": 2.5150926976621803e-05,
      "loss": 0.7796,
      "step": 14095
    },
    {
      "epoch": 3.9629005059021924,
      "grad_norm": 0.488923579454422,
      "learning_rate": 2.5085903603702267e-05,
      "loss": 0.7821,
      "step": 14100
    },
    {
      "epoch": 3.9643057897695333,
      "grad_norm": 0.5786020159721375,
      "learning_rate": 2.5020952335943514e-05,
      "loss": 0.8033,
      "step": 14105
    },
    {
      "epoch": 3.9657110736368746,
      "grad_norm": 0.5032353401184082,
      "learning_rate": 2.4956073235861453e-05,
      "loss": 0.7795,
      "step": 14110
    },
    {
      "epoch": 3.967116357504216,
      "grad_norm": 0.504985511302948,
      "learning_rate": 2.489126636590251e-05,
      "loss": 0.8391,
      "step": 14115
    },
    {
      "epoch": 3.968521641371557,
      "grad_norm": 0.5181665420532227,
      "learning_rate": 2.482653178844363e-05,
      "loss": 0.7793,
      "step": 14120
    },
    {
      "epoch": 3.969926925238898,
      "grad_norm": 0.46679332852363586,
      "learning_rate": 2.4761869565792062e-05,
      "loss": 0.7851,
      "step": 14125
    },
    {
      "epoch": 3.9713322091062393,
      "grad_norm": 0.5379502177238464,
      "learning_rate": 2.4697279760185542e-05,
      "loss": 0.8018,
      "step": 14130
    },
    {
      "epoch": 3.9727374929735806,
      "grad_norm": 0.543921947479248,
      "learning_rate": 2.4632762433792046e-05,
      "loss": 0.7999,
      "step": 14135
    },
    {
      "epoch": 3.974142776840922,
      "grad_norm": 0.5470147728919983,
      "learning_rate": 2.4568317648709825e-05,
      "loss": 0.7912,
      "step": 14140
    },
    {
      "epoch": 3.975548060708263,
      "grad_norm": 0.5134099721908569,
      "learning_rate": 2.4503945466967225e-05,
      "loss": 0.7833,
      "step": 14145
    },
    {
      "epoch": 3.9769533445756045,
      "grad_norm": 0.557580828666687,
      "learning_rate": 2.4439645950522784e-05,
      "loss": 0.7919,
      "step": 14150
    },
    {
      "epoch": 3.9783586284429457,
      "grad_norm": 0.4935877025127411,
      "learning_rate": 2.437541916126509e-05,
      "loss": 0.7874,
      "step": 14155
    },
    {
      "epoch": 3.9797639123102866,
      "grad_norm": 0.5110975503921509,
      "learning_rate": 2.431126516101272e-05,
      "loss": 0.7974,
      "step": 14160
    },
    {
      "epoch": 3.981169196177628,
      "grad_norm": 0.4988139271736145,
      "learning_rate": 2.42471840115142e-05,
      "loss": 0.7843,
      "step": 14165
    },
    {
      "epoch": 3.982574480044969,
      "grad_norm": 0.5011380314826965,
      "learning_rate": 2.418317577444792e-05,
      "loss": 0.7945,
      "step": 14170
    },
    {
      "epoch": 3.98397976391231,
      "grad_norm": 0.51650071144104,
      "learning_rate": 2.4119240511422126e-05,
      "loss": 0.7769,
      "step": 14175
    },
    {
      "epoch": 3.9853850477796513,
      "grad_norm": 0.5402264595031738,
      "learning_rate": 2.405537828397475e-05,
      "loss": 0.8084,
      "step": 14180
    },
    {
      "epoch": 3.9867903316469926,
      "grad_norm": 0.6527597904205322,
      "learning_rate": 2.3991589153573513e-05,
      "loss": 0.79,
      "step": 14185
    },
    {
      "epoch": 3.988195615514334,
      "grad_norm": 0.6313776969909668,
      "learning_rate": 2.3927873181615768e-05,
      "loss": 0.8267,
      "step": 14190
    },
    {
      "epoch": 3.989600899381675,
      "grad_norm": 0.5356908440589905,
      "learning_rate": 2.386423042942837e-05,
      "loss": 0.7881,
      "step": 14195
    },
    {
      "epoch": 3.9910061832490165,
      "grad_norm": 0.5451526045799255,
      "learning_rate": 2.3800660958267795e-05,
      "loss": 0.7883,
      "step": 14200
    },
    {
      "epoch": 3.9924114671163577,
      "grad_norm": 0.5183944702148438,
      "learning_rate": 2.3737164829319915e-05,
      "loss": 0.7769,
      "step": 14205
    },
    {
      "epoch": 3.9938167509836986,
      "grad_norm": 0.5416154265403748,
      "learning_rate": 2.3673742103700135e-05,
      "loss": 0.7842,
      "step": 14210
    },
    {
      "epoch": 3.99522203485104,
      "grad_norm": 0.5259328484535217,
      "learning_rate": 2.361039284245302e-05,
      "loss": 0.7908,
      "step": 14215
    },
    {
      "epoch": 3.996627318718381,
      "grad_norm": 0.49881279468536377,
      "learning_rate": 2.3547117106552574e-05,
      "loss": 0.7924,
      "step": 14220
    },
    {
      "epoch": 3.9980326025857225,
      "grad_norm": 0.5812293291091919,
      "learning_rate": 2.3483914956901996e-05,
      "loss": 0.7961,
      "step": 14225
    },
    {
      "epoch": 3.9994378864530633,
      "grad_norm": 0.5475685000419617,
      "learning_rate": 2.34207864543336e-05,
      "loss": 0.838,
      "step": 14230
    },
    {
      "epoch": 4.0,
      "eval_loss": 0.8493756055831909,
      "eval_runtime": 627.4844,
      "eval_samples_per_second": 7.167,
      "eval_steps_per_second": 0.598,
      "step": 14232
    },
    {
      "epoch": 4.000843170320405,
      "grad_norm": 0.5091008543968201,
      "learning_rate": 2.3357731659608872e-05,
      "loss": 0.7801,
      "step": 14235
    },
    {
      "epoch": 4.002248454187746,
      "grad_norm": 0.5158987045288086,
      "learning_rate": 2.329475063341835e-05,
      "loss": 0.7712,
      "step": 14240
    },
    {
      "epoch": 4.003653738055087,
      "grad_norm": 0.6087315678596497,
      "learning_rate": 2.3231843436381594e-05,
      "loss": 0.7642,
      "step": 14245
    },
    {
      "epoch": 4.0050590219224285,
      "grad_norm": 0.5059085488319397,
      "learning_rate": 2.3169010129046966e-05,
      "loss": 0.7619,
      "step": 14250
    },
    {
      "epoch": 4.00646430578977,
      "grad_norm": 0.5402360558509827,
      "learning_rate": 2.3106250771891912e-05,
      "loss": 0.7601,
      "step": 14255
    },
    {
      "epoch": 4.007869589657111,
      "grad_norm": 0.5491556525230408,
      "learning_rate": 2.304356542532259e-05,
      "loss": 0.8093,
      "step": 14260
    },
    {
      "epoch": 4.009274873524452,
      "grad_norm": 0.5153757929801941,
      "learning_rate": 2.2980954149673893e-05,
      "loss": 0.7517,
      "step": 14265
    },
    {
      "epoch": 4.010680157391793,
      "grad_norm": 0.5505621433258057,
      "learning_rate": 2.291841700520947e-05,
      "loss": 0.7519,
      "step": 14270
    },
    {
      "epoch": 4.012085441259134,
      "grad_norm": 0.5181359052658081,
      "learning_rate": 2.285595405212162e-05,
      "loss": 0.7579,
      "step": 14275
    },
    {
      "epoch": 4.013490725126475,
      "grad_norm": 0.5227612257003784,
      "learning_rate": 2.2793565350531242e-05,
      "loss": 0.7562,
      "step": 14280
    },
    {
      "epoch": 4.014896008993817,
      "grad_norm": 0.5462778210639954,
      "learning_rate": 2.2731250960487705e-05,
      "loss": 0.76,
      "step": 14285
    },
    {
      "epoch": 4.016301292861158,
      "grad_norm": 0.5738673806190491,
      "learning_rate": 2.2669010941968905e-05,
      "loss": 0.7577,
      "step": 14290
    },
    {
      "epoch": 4.017706576728499,
      "grad_norm": 0.5354070663452148,
      "learning_rate": 2.2606845354881155e-05,
      "loss": 0.769,
      "step": 14295
    },
    {
      "epoch": 4.0191118605958405,
      "grad_norm": 0.5066848397254944,
      "learning_rate": 2.254475425905912e-05,
      "loss": 0.7566,
      "step": 14300
    },
    {
      "epoch": 4.020517144463182,
      "grad_norm": 0.5838959813117981,
      "learning_rate": 2.2482737714265756e-05,
      "loss": 0.7689,
      "step": 14305
    },
    {
      "epoch": 4.021922428330523,
      "grad_norm": 0.5021213293075562,
      "learning_rate": 2.2420795780192283e-05,
      "loss": 0.7606,
      "step": 14310
    },
    {
      "epoch": 4.023327712197864,
      "grad_norm": 0.5116584897041321,
      "learning_rate": 2.23589285164581e-05,
      "loss": 0.7675,
      "step": 14315
    },
    {
      "epoch": 4.024732996065205,
      "grad_norm": 0.5282134413719177,
      "learning_rate": 2.2297135982610707e-05,
      "loss": 0.758,
      "step": 14320
    },
    {
      "epoch": 4.026138279932546,
      "grad_norm": 0.5259456038475037,
      "learning_rate": 2.2235418238125705e-05,
      "loss": 0.7679,
      "step": 14325
    },
    {
      "epoch": 4.027543563799887,
      "grad_norm": 0.4868493974208832,
      "learning_rate": 2.2173775342406712e-05,
      "loss": 0.7529,
      "step": 14330
    },
    {
      "epoch": 4.028948847667229,
      "grad_norm": 0.6483232378959656,
      "learning_rate": 2.211220735478533e-05,
      "loss": 0.755,
      "step": 14335
    },
    {
      "epoch": 4.03035413153457,
      "grad_norm": 0.505325436592102,
      "learning_rate": 2.2050714334520972e-05,
      "loss": 0.7598,
      "step": 14340
    },
    {
      "epoch": 4.031759415401911,
      "grad_norm": 0.5150840282440186,
      "learning_rate": 2.198929634080098e-05,
      "loss": 0.7528,
      "step": 14345
    },
    {
      "epoch": 4.0331646992692525,
      "grad_norm": 0.5273594260215759,
      "learning_rate": 2.1927953432740444e-05,
      "loss": 0.7509,
      "step": 14350
    },
    {
      "epoch": 4.034569983136594,
      "grad_norm": 0.5245232582092285,
      "learning_rate": 2.1866685669382204e-05,
      "loss": 0.7609,
      "step": 14355
    },
    {
      "epoch": 4.035975267003935,
      "grad_norm": 0.5435845851898193,
      "learning_rate": 2.180549310969676e-05,
      "loss": 0.7657,
      "step": 14360
    },
    {
      "epoch": 4.037380550871276,
      "grad_norm": 0.578952431678772,
      "learning_rate": 2.174437581258224e-05,
      "loss": 0.7708,
      "step": 14365
    },
    {
      "epoch": 4.038785834738618,
      "grad_norm": 0.5136585235595703,
      "learning_rate": 2.168333383686434e-05,
      "loss": 0.7654,
      "step": 14370
    },
    {
      "epoch": 4.040191118605958,
      "grad_norm": 0.5169129967689514,
      "learning_rate": 2.162236724129618e-05,
      "loss": 0.7583,
      "step": 14375
    },
    {
      "epoch": 4.041596402473299,
      "grad_norm": 0.4992331564426422,
      "learning_rate": 2.156147608455843e-05,
      "loss": 0.7746,
      "step": 14380
    },
    {
      "epoch": 4.043001686340641,
      "grad_norm": 0.5378484129905701,
      "learning_rate": 2.150066042525912e-05,
      "loss": 0.765,
      "step": 14385
    },
    {
      "epoch": 4.044406970207982,
      "grad_norm": 0.5266295075416565,
      "learning_rate": 2.143992032193356e-05,
      "loss": 0.759,
      "step": 14390
    },
    {
      "epoch": 4.045812254075323,
      "grad_norm": 0.5418915152549744,
      "learning_rate": 2.137925583304439e-05,
      "loss": 0.7647,
      "step": 14395
    },
    {
      "epoch": 4.0472175379426645,
      "grad_norm": 0.5171228647232056,
      "learning_rate": 2.1318667016981465e-05,
      "loss": 0.7654,
      "step": 14400
    },
    {
      "epoch": 4.048622821810006,
      "grad_norm": 0.5772991180419922,
      "learning_rate": 2.1258153932061808e-05,
      "loss": 0.7505,
      "step": 14405
    },
    {
      "epoch": 4.050028105677347,
      "grad_norm": 0.5454348921775818,
      "learning_rate": 2.1197716636529518e-05,
      "loss": 0.7525,
      "step": 14410
    },
    {
      "epoch": 4.051433389544688,
      "grad_norm": 0.5749043226242065,
      "learning_rate": 2.1137355188555798e-05,
      "loss": 0.7605,
      "step": 14415
    },
    {
      "epoch": 4.05283867341203,
      "grad_norm": 0.6449677348136902,
      "learning_rate": 2.1077069646238824e-05,
      "loss": 0.7761,
      "step": 14420
    },
    {
      "epoch": 4.05424395727937,
      "grad_norm": 0.5155068039894104,
      "learning_rate": 2.101686006760365e-05,
      "loss": 0.7587,
      "step": 14425
    },
    {
      "epoch": 4.055649241146711,
      "grad_norm": 0.5196123719215393,
      "learning_rate": 2.095672651060231e-05,
      "loss": 0.7678,
      "step": 14430
    },
    {
      "epoch": 4.057054525014053,
      "grad_norm": 0.5773482322692871,
      "learning_rate": 2.0896669033113626e-05,
      "loss": 0.7604,
      "step": 14435
    },
    {
      "epoch": 4.058459808881394,
      "grad_norm": 0.5296126008033752,
      "learning_rate": 2.083668769294321e-05,
      "loss": 0.7573,
      "step": 14440
    },
    {
      "epoch": 4.059865092748735,
      "grad_norm": 0.563080906867981,
      "learning_rate": 2.0776782547823337e-05,
      "loss": 0.7622,
      "step": 14445
    },
    {
      "epoch": 4.0612703766160765,
      "grad_norm": 0.5182024240493774,
      "learning_rate": 2.0716953655413007e-05,
      "loss": 0.759,
      "step": 14450
    },
    {
      "epoch": 4.062675660483418,
      "grad_norm": 0.5270100235939026,
      "learning_rate": 2.0657201073297805e-05,
      "loss": 0.7609,
      "step": 14455
    },
    {
      "epoch": 4.064080944350759,
      "grad_norm": 0.5340908169746399,
      "learning_rate": 2.0597524858989857e-05,
      "loss": 0.7647,
      "step": 14460
    },
    {
      "epoch": 4.0654862282181,
      "grad_norm": 0.5522120594978333,
      "learning_rate": 2.0537925069927798e-05,
      "loss": 0.7605,
      "step": 14465
    },
    {
      "epoch": 4.066891512085442,
      "grad_norm": 0.5666811466217041,
      "learning_rate": 2.0478401763476694e-05,
      "loss": 0.7582,
      "step": 14470
    },
    {
      "epoch": 4.068296795952782,
      "grad_norm": 0.5185369253158569,
      "learning_rate": 2.041895499692804e-05,
      "loss": 0.7639,
      "step": 14475
    },
    {
      "epoch": 4.069702079820123,
      "grad_norm": 0.5280120968818665,
      "learning_rate": 2.0359584827499544e-05,
      "loss": 0.7737,
      "step": 14480
    },
    {
      "epoch": 4.071107363687465,
      "grad_norm": 0.5524877905845642,
      "learning_rate": 2.0300291312335317e-05,
      "loss": 0.8033,
      "step": 14485
    },
    {
      "epoch": 4.072512647554806,
      "grad_norm": 0.5730440616607666,
      "learning_rate": 2.0241074508505643e-05,
      "loss": 0.7678,
      "step": 14490
    },
    {
      "epoch": 4.073917931422147,
      "grad_norm": 0.5500258207321167,
      "learning_rate": 2.0181934473006935e-05,
      "loss": 0.7645,
      "step": 14495
    },
    {
      "epoch": 4.0753232152894885,
      "grad_norm": 0.4928179681301117,
      "learning_rate": 2.0122871262761754e-05,
      "loss": 0.7624,
      "step": 14500
    },
    {
      "epoch": 4.07672849915683,
      "grad_norm": 0.5159469842910767,
      "learning_rate": 2.0063884934618728e-05,
      "loss": 0.8052,
      "step": 14505
    },
    {
      "epoch": 4.078133783024171,
      "grad_norm": 0.5040550231933594,
      "learning_rate": 2.0004975545352457e-05,
      "loss": 0.7689,
      "step": 14510
    },
    {
      "epoch": 4.079539066891512,
      "grad_norm": 0.5132153034210205,
      "learning_rate": 1.99461431516635e-05,
      "loss": 0.755,
      "step": 14515
    },
    {
      "epoch": 4.080944350758854,
      "grad_norm": 0.49842506647109985,
      "learning_rate": 1.9887387810178317e-05,
      "loss": 0.7574,
      "step": 14520
    },
    {
      "epoch": 4.082349634626194,
      "grad_norm": 0.5168430209159851,
      "learning_rate": 1.98287095774492e-05,
      "loss": 0.7632,
      "step": 14525
    },
    {
      "epoch": 4.083754918493535,
      "grad_norm": 0.5256431102752686,
      "learning_rate": 1.9770108509954167e-05,
      "loss": 0.7644,
      "step": 14530
    },
    {
      "epoch": 4.085160202360877,
      "grad_norm": 0.5269330739974976,
      "learning_rate": 1.971158466409706e-05,
      "loss": 0.7577,
      "step": 14535
    },
    {
      "epoch": 4.086565486228218,
      "grad_norm": 0.615835964679718,
      "learning_rate": 1.9653138096207324e-05,
      "loss": 0.7582,
      "step": 14540
    },
    {
      "epoch": 4.087970770095559,
      "grad_norm": 0.590116560459137,
      "learning_rate": 1.959476886254009e-05,
      "loss": 0.759,
      "step": 14545
    },
    {
      "epoch": 4.0893760539629005,
      "grad_norm": 0.6698099374771118,
      "learning_rate": 1.9536477019275955e-05,
      "loss": 0.753,
      "step": 14550
    },
    {
      "epoch": 4.090781337830242,
      "grad_norm": 0.49944236874580383,
      "learning_rate": 1.9478262622521114e-05,
      "loss": 0.7655,
      "step": 14555
    },
    {
      "epoch": 4.092186621697583,
      "grad_norm": 0.5291188955307007,
      "learning_rate": 1.942012572830719e-05,
      "loss": 0.7573,
      "step": 14560
    },
    {
      "epoch": 4.093591905564924,
      "grad_norm": 0.5382302403450012,
      "learning_rate": 1.9362066392591205e-05,
      "loss": 0.7569,
      "step": 14565
    },
    {
      "epoch": 4.094997189432266,
      "grad_norm": 0.531326413154602,
      "learning_rate": 1.9304084671255542e-05,
      "loss": 0.7542,
      "step": 14570
    },
    {
      "epoch": 4.096402473299607,
      "grad_norm": 0.5258771777153015,
      "learning_rate": 1.9246180620107858e-05,
      "loss": 0.7661,
      "step": 14575
    },
    {
      "epoch": 4.097807757166947,
      "grad_norm": 0.5160670876502991,
      "learning_rate": 1.9188354294881115e-05,
      "loss": 0.7618,
      "step": 14580
    },
    {
      "epoch": 4.099213041034289,
      "grad_norm": 0.531150758266449,
      "learning_rate": 1.9130605751233355e-05,
      "loss": 0.8067,
      "step": 14585
    },
    {
      "epoch": 4.10061832490163,
      "grad_norm": 0.4959104657173157,
      "learning_rate": 1.9072935044747843e-05,
      "loss": 0.7663,
      "step": 14590
    },
    {
      "epoch": 4.102023608768971,
      "grad_norm": 0.5348832607269287,
      "learning_rate": 1.901534223093291e-05,
      "loss": 0.7668,
      "step": 14595
    },
    {
      "epoch": 4.1034288926363125,
      "grad_norm": 0.5030957460403442,
      "learning_rate": 1.895782736522187e-05,
      "loss": 0.7525,
      "step": 14600
    },
    {
      "epoch": 4.104834176503654,
      "grad_norm": 0.5333991646766663,
      "learning_rate": 1.8900390502973065e-05,
      "loss": 0.7675,
      "step": 14605
    },
    {
      "epoch": 4.106239460370995,
      "grad_norm": 0.5182572603225708,
      "learning_rate": 1.884303169946974e-05,
      "loss": 0.7722,
      "step": 14610
    },
    {
      "epoch": 4.107644744238336,
      "grad_norm": 0.6066407561302185,
      "learning_rate": 1.8785751009919994e-05,
      "loss": 0.7678,
      "step": 14615
    },
    {
      "epoch": 4.109050028105678,
      "grad_norm": 0.5684912800788879,
      "learning_rate": 1.8728548489456765e-05,
      "loss": 0.7594,
      "step": 14620
    },
    {
      "epoch": 4.110455311973019,
      "grad_norm": 0.5319727659225464,
      "learning_rate": 1.8671424193137733e-05,
      "loss": 0.7567,
      "step": 14625
    },
    {
      "epoch": 4.111860595840359,
      "grad_norm": 0.5175413489341736,
      "learning_rate": 1.8614378175945334e-05,
      "loss": 0.7609,
      "step": 14630
    },
    {
      "epoch": 4.113265879707701,
      "grad_norm": 0.5334261655807495,
      "learning_rate": 1.8557410492786554e-05,
      "loss": 0.7608,
      "step": 14635
    },
    {
      "epoch": 4.114671163575042,
      "grad_norm": 0.5544094443321228,
      "learning_rate": 1.8500521198493082e-05,
      "loss": 0.7675,
      "step": 14640
    },
    {
      "epoch": 4.116076447442383,
      "grad_norm": 0.5419462323188782,
      "learning_rate": 1.844371034782112e-05,
      "loss": 0.761,
      "step": 14645
    },
    {
      "epoch": 4.1174817313097245,
      "grad_norm": 0.5597703456878662,
      "learning_rate": 1.8386977995451394e-05,
      "loss": 0.759,
      "step": 14650
    },
    {
      "epoch": 4.118887015177066,
      "grad_norm": 0.5377607941627502,
      "learning_rate": 1.8330324195989023e-05,
      "loss": 0.7761,
      "step": 14655
    },
    {
      "epoch": 4.120292299044407,
      "grad_norm": 0.523078441619873,
      "learning_rate": 1.8273749003963547e-05,
      "loss": 0.7736,
      "step": 14660
    },
    {
      "epoch": 4.121697582911748,
      "grad_norm": 0.5491028428077698,
      "learning_rate": 1.821725247382886e-05,
      "loss": 0.7631,
      "step": 14665
    },
    {
      "epoch": 4.12310286677909,
      "grad_norm": 0.5564166307449341,
      "learning_rate": 1.8160834659963143e-05,
      "loss": 0.766,
      "step": 14670
    },
    {
      "epoch": 4.124508150646431,
      "grad_norm": 0.5371769070625305,
      "learning_rate": 1.810449561666877e-05,
      "loss": 0.7734,
      "step": 14675
    },
    {
      "epoch": 4.125913434513771,
      "grad_norm": 0.5102625489234924,
      "learning_rate": 1.8048235398172354e-05,
      "loss": 0.7551,
      "step": 14680
    },
    {
      "epoch": 4.127318718381113,
      "grad_norm": 0.5413872599601746,
      "learning_rate": 1.799205405862463e-05,
      "loss": 0.7703,
      "step": 14685
    },
    {
      "epoch": 4.128724002248454,
      "grad_norm": 0.7608216404914856,
      "learning_rate": 1.7935951652100347e-05,
      "loss": 0.7821,
      "step": 14690
    },
    {
      "epoch": 4.130129286115795,
      "grad_norm": 0.4896598160266876,
      "learning_rate": 1.7879928232598342e-05,
      "loss": 0.7622,
      "step": 14695
    },
    {
      "epoch": 4.1315345699831365,
      "grad_norm": 0.5583370923995972,
      "learning_rate": 1.7823983854041428e-05,
      "loss": 0.7617,
      "step": 14700
    },
    {
      "epoch": 4.132939853850478,
      "grad_norm": 0.5344766974449158,
      "learning_rate": 1.776811857027635e-05,
      "loss": 0.7666,
      "step": 14705
    },
    {
      "epoch": 4.134345137717819,
      "grad_norm": 0.5449461936950684,
      "learning_rate": 1.771233243507361e-05,
      "loss": 0.7723,
      "step": 14710
    },
    {
      "epoch": 4.13575042158516,
      "grad_norm": 0.5095350742340088,
      "learning_rate": 1.765662550212769e-05,
      "loss": 0.7694,
      "step": 14715
    },
    {
      "epoch": 4.137155705452502,
      "grad_norm": 0.566623330116272,
      "learning_rate": 1.7600997825056798e-05,
      "loss": 0.7864,
      "step": 14720
    },
    {
      "epoch": 4.138560989319843,
      "grad_norm": 0.520380973815918,
      "learning_rate": 1.7545449457402752e-05,
      "loss": 0.7662,
      "step": 14725
    },
    {
      "epoch": 4.139966273187184,
      "grad_norm": 0.5430235266685486,
      "learning_rate": 1.748998045263114e-05,
      "loss": 0.7603,
      "step": 14730
    },
    {
      "epoch": 4.141371557054525,
      "grad_norm": 0.5299351215362549,
      "learning_rate": 1.743459086413114e-05,
      "loss": 0.7691,
      "step": 14735
    },
    {
      "epoch": 4.142776840921866,
      "grad_norm": 0.5721263289451599,
      "learning_rate": 1.7379280745215498e-05,
      "loss": 0.783,
      "step": 14740
    },
    {
      "epoch": 4.144182124789207,
      "grad_norm": 0.5396542549133301,
      "learning_rate": 1.732405014912042e-05,
      "loss": 0.7685,
      "step": 14745
    },
    {
      "epoch": 4.1455874086565485,
      "grad_norm": 0.5272538065910339,
      "learning_rate": 1.7268899129005622e-05,
      "loss": 0.7782,
      "step": 14750
    },
    {
      "epoch": 4.14699269252389,
      "grad_norm": 0.5426777601242065,
      "learning_rate": 1.7213827737954248e-05,
      "loss": 0.7727,
      "step": 14755
    },
    {
      "epoch": 4.148397976391231,
      "grad_norm": 0.5160602927207947,
      "learning_rate": 1.715883602897268e-05,
      "loss": 0.7647,
      "step": 14760
    },
    {
      "epoch": 4.149803260258572,
      "grad_norm": 0.5610844492912292,
      "learning_rate": 1.710392405499077e-05,
      "loss": 0.7588,
      "step": 14765
    },
    {
      "epoch": 4.151208544125914,
      "grad_norm": 0.5141729116439819,
      "learning_rate": 1.7049091868861523e-05,
      "loss": 0.7676,
      "step": 14770
    },
    {
      "epoch": 4.152613827993255,
      "grad_norm": 0.5534763336181641,
      "learning_rate": 1.699433952336118e-05,
      "loss": 0.7607,
      "step": 14775
    },
    {
      "epoch": 4.154019111860596,
      "grad_norm": 0.5679239630699158,
      "learning_rate": 1.693966707118909e-05,
      "loss": 0.7701,
      "step": 14780
    },
    {
      "epoch": 4.155424395727937,
      "grad_norm": 0.5184338092803955,
      "learning_rate": 1.688507456496776e-05,
      "loss": 0.7668,
      "step": 14785
    },
    {
      "epoch": 4.156829679595278,
      "grad_norm": 0.6242445707321167,
      "learning_rate": 1.6830562057242738e-05,
      "loss": 0.7614,
      "step": 14790
    },
    {
      "epoch": 4.158234963462619,
      "grad_norm": 0.5235223174095154,
      "learning_rate": 1.6776129600482537e-05,
      "loss": 0.7554,
      "step": 14795
    },
    {
      "epoch": 4.1596402473299605,
      "grad_norm": 0.5166909098625183,
      "learning_rate": 1.672177724707865e-05,
      "loss": 0.753,
      "step": 14800
    },
    {
      "epoch": 4.161045531197302,
      "grad_norm": 0.5642342567443848,
      "learning_rate": 1.6667505049345476e-05,
      "loss": 0.7636,
      "step": 14805
    },
    {
      "epoch": 4.162450815064643,
      "grad_norm": 0.5586567521095276,
      "learning_rate": 1.661331305952025e-05,
      "loss": 0.767,
      "step": 14810
    },
    {
      "epoch": 4.163856098931984,
      "grad_norm": 0.5883127450942993,
      "learning_rate": 1.6559201329763006e-05,
      "loss": 0.7709,
      "step": 14815
    },
    {
      "epoch": 4.165261382799326,
      "grad_norm": 0.5193496942520142,
      "learning_rate": 1.6505169912156548e-05,
      "loss": 0.7653,
      "step": 14820
    },
    {
      "epoch": 4.166666666666667,
      "grad_norm": 0.5206704139709473,
      "learning_rate": 1.6451218858706374e-05,
      "loss": 0.7685,
      "step": 14825
    },
    {
      "epoch": 4.168071950534008,
      "grad_norm": 0.5586879253387451,
      "learning_rate": 1.639734822134057e-05,
      "loss": 0.757,
      "step": 14830
    },
    {
      "epoch": 4.169477234401349,
      "grad_norm": 0.5711727738380432,
      "learning_rate": 1.634355805190989e-05,
      "loss": 0.7513,
      "step": 14835
    },
    {
      "epoch": 4.17088251826869,
      "grad_norm": 0.5114162564277649,
      "learning_rate": 1.6289848402187648e-05,
      "loss": 0.7684,
      "step": 14840
    },
    {
      "epoch": 4.172287802136031,
      "grad_norm": 0.5075726509094238,
      "learning_rate": 1.6236219323869618e-05,
      "loss": 0.7433,
      "step": 14845
    },
    {
      "epoch": 4.1736930860033725,
      "grad_norm": 0.5122740864753723,
      "learning_rate": 1.6182670868574003e-05,
      "loss": 0.7636,
      "step": 14850
    },
    {
      "epoch": 4.175098369870714,
      "grad_norm": 0.5521731972694397,
      "learning_rate": 1.612920308784145e-05,
      "loss": 0.7657,
      "step": 14855
    },
    {
      "epoch": 4.176503653738055,
      "grad_norm": 0.5010851621627808,
      "learning_rate": 1.607581603313495e-05,
      "loss": 0.7655,
      "step": 14860
    },
    {
      "epoch": 4.177908937605396,
      "grad_norm": 0.5235019326210022,
      "learning_rate": 1.6022509755839797e-05,
      "loss": 0.7776,
      "step": 14865
    },
    {
      "epoch": 4.179314221472738,
      "grad_norm": 0.5572540760040283,
      "learning_rate": 1.596928430726351e-05,
      "loss": 0.7525,
      "step": 14870
    },
    {
      "epoch": 4.180719505340079,
      "grad_norm": 0.5532335638999939,
      "learning_rate": 1.5916139738635825e-05,
      "loss": 0.7541,
      "step": 14875
    },
    {
      "epoch": 4.18212478920742,
      "grad_norm": 0.5307499766349792,
      "learning_rate": 1.5863076101108675e-05,
      "loss": 0.7653,
      "step": 14880
    },
    {
      "epoch": 4.1835300730747615,
      "grad_norm": 0.5084945559501648,
      "learning_rate": 1.5810093445756002e-05,
      "loss": 0.76,
      "step": 14885
    },
    {
      "epoch": 4.184935356942102,
      "grad_norm": 0.5106390714645386,
      "learning_rate": 1.575719182357386e-05,
      "loss": 0.7607,
      "step": 14890
    },
    {
      "epoch": 4.186340640809443,
      "grad_norm": 0.5546446442604065,
      "learning_rate": 1.5704371285480345e-05,
      "loss": 0.7616,
      "step": 14895
    },
    {
      "epoch": 4.1877459246767845,
      "grad_norm": 0.5922245383262634,
      "learning_rate": 1.565163188231541e-05,
      "loss": 0.7512,
      "step": 14900
    },
    {
      "epoch": 4.189151208544126,
      "grad_norm": 0.5253076553344727,
      "learning_rate": 1.5598973664841e-05,
      "loss": 0.7547,
      "step": 14905
    },
    {
      "epoch": 4.190556492411467,
      "grad_norm": 0.5457403659820557,
      "learning_rate": 1.5546396683740892e-05,
      "loss": 0.7719,
      "step": 14910
    },
    {
      "epoch": 4.191961776278808,
      "grad_norm": 0.5217248201370239,
      "learning_rate": 1.549390098962067e-05,
      "loss": 0.7663,
      "step": 14915
    },
    {
      "epoch": 4.19336706014615,
      "grad_norm": 0.5782322883605957,
      "learning_rate": 1.5441486633007674e-05,
      "loss": 0.7631,
      "step": 14920
    },
    {
      "epoch": 4.194772344013491,
      "grad_norm": 0.557896614074707,
      "learning_rate": 1.5389153664350963e-05,
      "loss": 0.7501,
      "step": 14925
    },
    {
      "epoch": 4.196177627880832,
      "grad_norm": 0.5055593848228455,
      "learning_rate": 1.533690213402129e-05,
      "loss": 0.7582,
      "step": 14930
    },
    {
      "epoch": 4.1975829117481736,
      "grad_norm": 0.5190460085868835,
      "learning_rate": 1.528473209231095e-05,
      "loss": 0.7647,
      "step": 14935
    },
    {
      "epoch": 4.198988195615514,
      "grad_norm": 0.5824263095855713,
      "learning_rate": 1.5232643589433848e-05,
      "loss": 0.7549,
      "step": 14940
    },
    {
      "epoch": 4.200393479482855,
      "grad_norm": 0.535484790802002,
      "learning_rate": 1.5180636675525428e-05,
      "loss": 0.7728,
      "step": 14945
    },
    {
      "epoch": 4.2017987633501965,
      "grad_norm": 0.5152845978736877,
      "learning_rate": 1.5128711400642593e-05,
      "loss": 0.7504,
      "step": 14950
    },
    {
      "epoch": 4.203204047217538,
      "grad_norm": 0.5120229125022888,
      "learning_rate": 1.5076867814763629e-05,
      "loss": 0.7655,
      "step": 14955
    },
    {
      "epoch": 4.204609331084879,
      "grad_norm": 0.5439524054527283,
      "learning_rate": 1.5025105967788222e-05,
      "loss": 0.7572,
      "step": 14960
    },
    {
      "epoch": 4.20601461495222,
      "grad_norm": 0.5316490530967712,
      "learning_rate": 1.4973425909537409e-05,
      "loss": 0.7708,
      "step": 14965
    },
    {
      "epoch": 4.207419898819562,
      "grad_norm": 0.5262763500213623,
      "learning_rate": 1.4921827689753465e-05,
      "loss": 0.7703,
      "step": 14970
    },
    {
      "epoch": 4.208825182686903,
      "grad_norm": 0.5419159531593323,
      "learning_rate": 1.4870311358099932e-05,
      "loss": 0.7453,
      "step": 14975
    },
    {
      "epoch": 4.210230466554244,
      "grad_norm": 0.5065816640853882,
      "learning_rate": 1.4818876964161499e-05,
      "loss": 0.7686,
      "step": 14980
    },
    {
      "epoch": 4.2116357504215856,
      "grad_norm": 0.5062248706817627,
      "learning_rate": 1.4767524557444034e-05,
      "loss": 0.8098,
      "step": 14985
    },
    {
      "epoch": 4.213041034288926,
      "grad_norm": 0.5111182332038879,
      "learning_rate": 1.4716254187374422e-05,
      "loss": 0.7687,
      "step": 14990
    },
    {
      "epoch": 4.214446318156267,
      "grad_norm": 0.5519953966140747,
      "learning_rate": 1.4665065903300645e-05,
      "loss": 0.7524,
      "step": 14995
    },
    {
      "epoch": 4.2158516020236085,
      "grad_norm": 0.5973950028419495,
      "learning_rate": 1.4613959754491691e-05,
      "loss": 0.7635,
      "step": 15000
    },
    {
      "epoch": 4.21725688589095,
      "grad_norm": 0.558695912361145,
      "learning_rate": 1.456293579013741e-05,
      "loss": 0.7794,
      "step": 15005
    },
    {
      "epoch": 4.218662169758291,
      "grad_norm": 0.5113961696624756,
      "learning_rate": 1.4511994059348622e-05,
      "loss": 0.7664,
      "step": 15010
    },
    {
      "epoch": 4.220067453625632,
      "grad_norm": 0.517517626285553,
      "learning_rate": 1.446113461115698e-05,
      "loss": 0.7711,
      "step": 15015
    },
    {
      "epoch": 4.221472737492974,
      "grad_norm": 0.5033154487609863,
      "learning_rate": 1.4410357494514947e-05,
      "loss": 0.7702,
      "step": 15020
    },
    {
      "epoch": 4.222878021360315,
      "grad_norm": 0.5211993455886841,
      "learning_rate": 1.435966275829571e-05,
      "loss": 0.7544,
      "step": 15025
    },
    {
      "epoch": 4.224283305227656,
      "grad_norm": 0.5256550908088684,
      "learning_rate": 1.4309050451293205e-05,
      "loss": 0.769,
      "step": 15030
    },
    {
      "epoch": 4.225688589094998,
      "grad_norm": 0.5042311549186707,
      "learning_rate": 1.4258520622222004e-05,
      "loss": 0.7731,
      "step": 15035
    },
    {
      "epoch": 4.227093872962339,
      "grad_norm": 0.5198147892951965,
      "learning_rate": 1.4208073319717285e-05,
      "loss": 0.7614,
      "step": 15040
    },
    {
      "epoch": 4.228499156829679,
      "grad_norm": 0.5736011862754822,
      "learning_rate": 1.4157708592334818e-05,
      "loss": 0.7517,
      "step": 15045
    },
    {
      "epoch": 4.2299044406970205,
      "grad_norm": 0.5595988035202026,
      "learning_rate": 1.4107426488550868e-05,
      "loss": 0.7668,
      "step": 15050
    },
    {
      "epoch": 4.231309724564362,
      "grad_norm": 0.5281693935394287,
      "learning_rate": 1.4057227056762235e-05,
      "loss": 0.7587,
      "step": 15055
    },
    {
      "epoch": 4.232715008431703,
      "grad_norm": 0.4986502230167389,
      "learning_rate": 1.4007110345286056e-05,
      "loss": 0.767,
      "step": 15060
    },
    {
      "epoch": 4.234120292299044,
      "grad_norm": 0.5474176406860352,
      "learning_rate": 1.3957076402359903e-05,
      "loss": 0.7629,
      "step": 15065
    },
    {
      "epoch": 4.235525576166386,
      "grad_norm": 0.5422942042350769,
      "learning_rate": 1.3907125276141675e-05,
      "loss": 0.7607,
      "step": 15070
    },
    {
      "epoch": 4.236930860033727,
      "grad_norm": 0.5286979675292969,
      "learning_rate": 1.3857257014709579e-05,
      "loss": 0.7523,
      "step": 15075
    },
    {
      "epoch": 4.238336143901068,
      "grad_norm": 0.5423422455787659,
      "learning_rate": 1.3807471666062022e-05,
      "loss": 0.7587,
      "step": 15080
    },
    {
      "epoch": 4.23974142776841,
      "grad_norm": 0.5019691586494446,
      "learning_rate": 1.375776927811765e-05,
      "loss": 0.7666,
      "step": 15085
    },
    {
      "epoch": 4.24114671163575,
      "grad_norm": 0.6004632115364075,
      "learning_rate": 1.370814989871525e-05,
      "loss": 0.7947,
      "step": 15090
    },
    {
      "epoch": 4.242551995503091,
      "grad_norm": 0.5509012937545776,
      "learning_rate": 1.3658613575613654e-05,
      "loss": 0.7564,
      "step": 15095
    },
    {
      "epoch": 4.2439572793704325,
      "grad_norm": 0.5599709749221802,
      "learning_rate": 1.3609160356491835e-05,
      "loss": 0.7566,
      "step": 15100
    },
    {
      "epoch": 4.245362563237774,
      "grad_norm": 0.5406939387321472,
      "learning_rate": 1.3559790288948737e-05,
      "loss": 0.7611,
      "step": 15105
    },
    {
      "epoch": 4.246767847105115,
      "grad_norm": 0.532951831817627,
      "learning_rate": 1.3510503420503295e-05,
      "loss": 0.7601,
      "step": 15110
    },
    {
      "epoch": 4.248173130972456,
      "grad_norm": 0.5300197601318359,
      "learning_rate": 1.3461299798594296e-05,
      "loss": 0.7572,
      "step": 15115
    },
    {
      "epoch": 4.249578414839798,
      "grad_norm": 0.5183244943618774,
      "learning_rate": 1.3412179470580488e-05,
      "loss": 0.8164,
      "step": 15120
    },
    {
      "epoch": 4.250983698707139,
      "grad_norm": 0.5541335940361023,
      "learning_rate": 1.3363142483740398e-05,
      "loss": 0.7659,
      "step": 15125
    },
    {
      "epoch": 4.25238898257448,
      "grad_norm": 0.544763445854187,
      "learning_rate": 1.331418888527236e-05,
      "loss": 0.7673,
      "step": 15130
    },
    {
      "epoch": 4.253794266441822,
      "grad_norm": 0.566593587398529,
      "learning_rate": 1.326531872229444e-05,
      "loss": 0.7693,
      "step": 15135
    },
    {
      "epoch": 4.255199550309163,
      "grad_norm": 0.5759837031364441,
      "learning_rate": 1.3216532041844377e-05,
      "loss": 0.7653,
      "step": 15140
    },
    {
      "epoch": 4.256604834176503,
      "grad_norm": 0.5467374920845032,
      "learning_rate": 1.3167828890879607e-05,
      "loss": 0.7719,
      "step": 15145
    },
    {
      "epoch": 4.2580101180438445,
      "grad_norm": 0.5818522572517395,
      "learning_rate": 1.3119209316277092e-05,
      "loss": 0.7721,
      "step": 15150
    },
    {
      "epoch": 4.259415401911186,
      "grad_norm": 0.49862098693847656,
      "learning_rate": 1.3070673364833419e-05,
      "loss": 0.7689,
      "step": 15155
    },
    {
      "epoch": 4.260820685778527,
      "grad_norm": 0.5164232850074768,
      "learning_rate": 1.3022221083264685e-05,
      "loss": 0.7648,
      "step": 15160
    },
    {
      "epoch": 4.262225969645868,
      "grad_norm": 0.5314116477966309,
      "learning_rate": 1.2973852518206375e-05,
      "loss": 0.7673,
      "step": 15165
    },
    {
      "epoch": 4.26363125351321,
      "grad_norm": 0.521121084690094,
      "learning_rate": 1.292556771621347e-05,
      "loss": 0.7661,
      "step": 15170
    },
    {
      "epoch": 4.265036537380551,
      "grad_norm": 0.5553752183914185,
      "learning_rate": 1.2877366723760365e-05,
      "loss": 0.7547,
      "step": 15175
    },
    {
      "epoch": 4.266441821247892,
      "grad_norm": 0.5141292810440063,
      "learning_rate": 1.282924958724071e-05,
      "loss": 0.7715,
      "step": 15180
    },
    {
      "epoch": 4.267847105115234,
      "grad_norm": 0.554672360420227,
      "learning_rate": 1.278121635296744e-05,
      "loss": 0.7729,
      "step": 15185
    },
    {
      "epoch": 4.269252388982575,
      "grad_norm": 0.5302976965904236,
      "learning_rate": 1.2733267067172794e-05,
      "loss": 0.7554,
      "step": 15190
    },
    {
      "epoch": 4.270657672849916,
      "grad_norm": 0.5027261972427368,
      "learning_rate": 1.2685401776008188e-05,
      "loss": 0.767,
      "step": 15195
    },
    {
      "epoch": 4.2720629567172566,
      "grad_norm": 0.5278288722038269,
      "learning_rate": 1.2637620525544135e-05,
      "loss": 0.7617,
      "step": 15200
    },
    {
      "epoch": 4.273468240584598,
      "grad_norm": 0.5126884579658508,
      "learning_rate": 1.2589923361770339e-05,
      "loss": 0.7729,
      "step": 15205
    },
    {
      "epoch": 4.274873524451939,
      "grad_norm": 0.5113247036933899,
      "learning_rate": 1.254231033059554e-05,
      "loss": 0.7678,
      "step": 15210
    },
    {
      "epoch": 4.27627880831928,
      "grad_norm": 0.531362771987915,
      "learning_rate": 1.2494781477847517e-05,
      "loss": 0.813,
      "step": 15215
    },
    {
      "epoch": 4.277684092186622,
      "grad_norm": 0.5442110896110535,
      "learning_rate": 1.244733684927294e-05,
      "loss": 0.7555,
      "step": 15220
    },
    {
      "epoch": 4.279089376053963,
      "grad_norm": 0.5352432131767273,
      "learning_rate": 1.2399976490537557e-05,
      "loss": 0.7686,
      "step": 15225
    },
    {
      "epoch": 4.280494659921304,
      "grad_norm": 0.5032948851585388,
      "learning_rate": 1.2352700447225918e-05,
      "loss": 0.7678,
      "step": 15230
    },
    {
      "epoch": 4.281899943788646,
      "grad_norm": 0.5116592645645142,
      "learning_rate": 1.230550876484139e-05,
      "loss": 0.7748,
      "step": 15235
    },
    {
      "epoch": 4.283305227655987,
      "grad_norm": 0.5569871664047241,
      "learning_rate": 1.2258401488806214e-05,
      "loss": 0.7693,
      "step": 15240
    },
    {
      "epoch": 4.284710511523327,
      "grad_norm": 0.5144664645195007,
      "learning_rate": 1.2211378664461348e-05,
      "loss": 0.7605,
      "step": 15245
    },
    {
      "epoch": 4.286115795390669,
      "grad_norm": 0.5427828431129456,
      "learning_rate": 1.2164440337066496e-05,
      "loss": 0.7653,
      "step": 15250
    },
    {
      "epoch": 4.28752107925801,
      "grad_norm": 0.5294144749641418,
      "learning_rate": 1.2117586551799987e-05,
      "loss": 0.7809,
      "step": 15255
    },
    {
      "epoch": 4.288926363125351,
      "grad_norm": 0.5116956233978271,
      "learning_rate": 1.2070817353758812e-05,
      "loss": 0.7702,
      "step": 15260
    },
    {
      "epoch": 4.290331646992692,
      "grad_norm": 0.5081368088722229,
      "learning_rate": 1.2024132787958532e-05,
      "loss": 0.7511,
      "step": 15265
    },
    {
      "epoch": 4.291736930860034,
      "grad_norm": 0.5898987650871277,
      "learning_rate": 1.1977532899333265e-05,
      "loss": 0.7767,
      "step": 15270
    },
    {
      "epoch": 4.293142214727375,
      "grad_norm": 0.4957788586616516,
      "learning_rate": 1.1931017732735627e-05,
      "loss": 0.7618,
      "step": 15275
    },
    {
      "epoch": 4.294547498594716,
      "grad_norm": 0.5281771421432495,
      "learning_rate": 1.1884587332936658e-05,
      "loss": 0.7715,
      "step": 15280
    },
    {
      "epoch": 4.295952782462058,
      "grad_norm": 0.5147823095321655,
      "learning_rate": 1.1838241744625866e-05,
      "loss": 0.7568,
      "step": 15285
    },
    {
      "epoch": 4.297358066329399,
      "grad_norm": 0.5159749984741211,
      "learning_rate": 1.1791981012411047e-05,
      "loss": 0.7559,
      "step": 15290
    },
    {
      "epoch": 4.29876335019674,
      "grad_norm": 0.5492069125175476,
      "learning_rate": 1.174580518081838e-05,
      "loss": 0.7648,
      "step": 15295
    },
    {
      "epoch": 4.300168634064081,
      "grad_norm": 0.5118445754051208,
      "learning_rate": 1.169971429429234e-05,
      "loss": 0.7675,
      "step": 15300
    },
    {
      "epoch": 4.301573917931422,
      "grad_norm": 0.5533217787742615,
      "learning_rate": 1.1653708397195584e-05,
      "loss": 0.7551,
      "step": 15305
    },
    {
      "epoch": 4.302979201798763,
      "grad_norm": 0.5249292850494385,
      "learning_rate": 1.1607787533809012e-05,
      "loss": 0.7705,
      "step": 15310
    },
    {
      "epoch": 4.304384485666104,
      "grad_norm": 0.5463940501213074,
      "learning_rate": 1.1561951748331657e-05,
      "loss": 0.7654,
      "step": 15315
    },
    {
      "epoch": 4.305789769533446,
      "grad_norm": 0.5173946022987366,
      "learning_rate": 1.1516201084880685e-05,
      "loss": 0.7684,
      "step": 15320
    },
    {
      "epoch": 4.307195053400787,
      "grad_norm": 0.5160226225852966,
      "learning_rate": 1.1470535587491316e-05,
      "loss": 0.7517,
      "step": 15325
    },
    {
      "epoch": 4.308600337268128,
      "grad_norm": 0.5239101648330688,
      "learning_rate": 1.1424955300116802e-05,
      "loss": 0.764,
      "step": 15330
    },
    {
      "epoch": 4.31000562113547,
      "grad_norm": 0.49685534834861755,
      "learning_rate": 1.1379460266628395e-05,
      "loss": 0.7719,
      "step": 15335
    },
    {
      "epoch": 4.311410905002811,
      "grad_norm": 0.5540210604667664,
      "learning_rate": 1.1334050530815221e-05,
      "loss": 0.767,
      "step": 15340
    },
    {
      "epoch": 4.312816188870152,
      "grad_norm": 0.5216269493103027,
      "learning_rate": 1.1288726136384397e-05,
      "loss": 0.7611,
      "step": 15345
    },
    {
      "epoch": 4.314221472737493,
      "grad_norm": 0.5204557180404663,
      "learning_rate": 1.1243487126960862e-05,
      "loss": 0.7632,
      "step": 15350
    },
    {
      "epoch": 4.315626756604834,
      "grad_norm": 0.5199287533760071,
      "learning_rate": 1.119833354608738e-05,
      "loss": 0.7522,
      "step": 15355
    },
    {
      "epoch": 4.317032040472175,
      "grad_norm": 0.5106371641159058,
      "learning_rate": 1.1153265437224436e-05,
      "loss": 0.754,
      "step": 15360
    },
    {
      "epoch": 4.318437324339516,
      "grad_norm": 0.5255478620529175,
      "learning_rate": 1.1108282843750318e-05,
      "loss": 0.7619,
      "step": 15365
    },
    {
      "epoch": 4.319842608206858,
      "grad_norm": 0.540982186794281,
      "learning_rate": 1.106338580896098e-05,
      "loss": 0.7582,
      "step": 15370
    },
    {
      "epoch": 4.321247892074199,
      "grad_norm": 0.5933972597122192,
      "learning_rate": 1.1018574376070012e-05,
      "loss": 0.7546,
      "step": 15375
    },
    {
      "epoch": 4.32265317594154,
      "grad_norm": 0.5034326314926147,
      "learning_rate": 1.0973848588208635e-05,
      "loss": 0.7602,
      "step": 15380
    },
    {
      "epoch": 4.324058459808882,
      "grad_norm": 0.5004976391792297,
      "learning_rate": 1.0929208488425624e-05,
      "loss": 0.7574,
      "step": 15385
    },
    {
      "epoch": 4.325463743676223,
      "grad_norm": 0.5454297661781311,
      "learning_rate": 1.0884654119687287e-05,
      "loss": 0.7643,
      "step": 15390
    },
    {
      "epoch": 4.326869027543564,
      "grad_norm": 0.5362131595611572,
      "learning_rate": 1.0840185524877388e-05,
      "loss": 0.7601,
      "step": 15395
    },
    {
      "epoch": 4.328274311410905,
      "grad_norm": 0.5598523020744324,
      "learning_rate": 1.0795802746797157e-05,
      "loss": 0.7513,
      "step": 15400
    },
    {
      "epoch": 4.329679595278246,
      "grad_norm": 0.5776387453079224,
      "learning_rate": 1.0751505828165253e-05,
      "loss": 0.7659,
      "step": 15405
    },
    {
      "epoch": 4.331084879145587,
      "grad_norm": 0.5143082737922668,
      "learning_rate": 1.0707294811617607e-05,
      "loss": 0.7705,
      "step": 15410
    },
    {
      "epoch": 4.332490163012928,
      "grad_norm": 0.5051475763320923,
      "learning_rate": 1.0663169739707557e-05,
      "loss": 0.7609,
      "step": 15415
    },
    {
      "epoch": 4.33389544688027,
      "grad_norm": 0.5366913080215454,
      "learning_rate": 1.0619130654905695e-05,
      "loss": 0.7665,
      "step": 15420
    },
    {
      "epoch": 4.335300730747611,
      "grad_norm": 0.5204769968986511,
      "learning_rate": 1.0575177599599818e-05,
      "loss": 0.7678,
      "step": 15425
    },
    {
      "epoch": 4.336706014614952,
      "grad_norm": 0.5569886565208435,
      "learning_rate": 1.0531310616094958e-05,
      "loss": 0.7628,
      "step": 15430
    },
    {
      "epoch": 4.338111298482294,
      "grad_norm": 0.6373873949050903,
      "learning_rate": 1.048752974661329e-05,
      "loss": 0.7731,
      "step": 15435
    },
    {
      "epoch": 4.339516582349635,
      "grad_norm": 0.5396785140037537,
      "learning_rate": 1.0443835033294113e-05,
      "loss": 0.7491,
      "step": 15440
    },
    {
      "epoch": 4.340921866216976,
      "grad_norm": 0.5870918035507202,
      "learning_rate": 1.0400226518193756e-05,
      "loss": 0.768,
      "step": 15445
    },
    {
      "epoch": 4.3423271500843175,
      "grad_norm": 0.5297492742538452,
      "learning_rate": 1.0356704243285631e-05,
      "loss": 0.7622,
      "step": 15450
    },
    {
      "epoch": 4.343732433951658,
      "grad_norm": 0.5458266735076904,
      "learning_rate": 1.0313268250460118e-05,
      "loss": 0.7672,
      "step": 15455
    },
    {
      "epoch": 4.345137717818999,
      "grad_norm": 0.5890789031982422,
      "learning_rate": 1.0269918581524596e-05,
      "loss": 0.7483,
      "step": 15460
    },
    {
      "epoch": 4.3465430016863404,
      "grad_norm": 0.5684923529624939,
      "learning_rate": 1.0226655278203267e-05,
      "loss": 0.7606,
      "step": 15465
    },
    {
      "epoch": 4.347948285553682,
      "grad_norm": 0.533937931060791,
      "learning_rate": 1.0183478382137291e-05,
      "loss": 0.7585,
      "step": 15470
    },
    {
      "epoch": 4.349353569421023,
      "grad_norm": 0.5563515424728394,
      "learning_rate": 1.0140387934884609e-05,
      "loss": 0.7688,
      "step": 15475
    },
    {
      "epoch": 4.350758853288364,
      "grad_norm": 0.5250877737998962,
      "learning_rate": 1.0097383977919995e-05,
      "loss": 0.7556,
      "step": 15480
    },
    {
      "epoch": 4.352164137155706,
      "grad_norm": 0.5379632711410522,
      "learning_rate": 1.0054466552634934e-05,
      "loss": 0.773,
      "step": 15485
    },
    {
      "epoch": 4.353569421023047,
      "grad_norm": 0.5072575807571411,
      "learning_rate": 1.0011635700337662e-05,
      "loss": 0.7553,
      "step": 15490
    },
    {
      "epoch": 4.354974704890388,
      "grad_norm": 0.5212836861610413,
      "learning_rate": 9.968891462253083e-06,
      "loss": 0.76,
      "step": 15495
    },
    {
      "epoch": 4.3563799887577295,
      "grad_norm": 0.5077595114707947,
      "learning_rate": 9.926233879522683e-06,
      "loss": 0.762,
      "step": 15500
    },
    {
      "epoch": 4.35778527262507,
      "grad_norm": 0.5250112414360046,
      "learning_rate": 9.8836629932046e-06,
      "loss": 0.7732,
      "step": 15505
    },
    {
      "epoch": 4.359190556492411,
      "grad_norm": 0.49983668327331543,
      "learning_rate": 9.841178844273502e-06,
      "loss": 0.7607,
      "step": 15510
    },
    {
      "epoch": 4.3605958403597525,
      "grad_norm": 0.5391385555267334,
      "learning_rate": 9.79878147362061e-06,
      "loss": 0.7721,
      "step": 15515
    },
    {
      "epoch": 4.362001124227094,
      "grad_norm": 0.5197238326072693,
      "learning_rate": 9.756470922053529e-06,
      "loss": 0.7712,
      "step": 15520
    },
    {
      "epoch": 4.363406408094435,
      "grad_norm": 0.5797452330589294,
      "learning_rate": 9.714247230296391e-06,
      "loss": 0.7648,
      "step": 15525
    },
    {
      "epoch": 4.364811691961776,
      "grad_norm": 0.6094208359718323,
      "learning_rate": 9.672110438989701e-06,
      "loss": 0.7616,
      "step": 15530
    },
    {
      "epoch": 4.366216975829118,
      "grad_norm": 0.5721893310546875,
      "learning_rate": 9.630060588690292e-06,
      "loss": 0.7581,
      "step": 15535
    },
    {
      "epoch": 4.367622259696459,
      "grad_norm": 0.5357393026351929,
      "learning_rate": 9.588097719871347e-06,
      "loss": 0.7634,
      "step": 15540
    },
    {
      "epoch": 4.3690275435638,
      "grad_norm": 0.5096330642700195,
      "learning_rate": 9.546221872922322e-06,
      "loss": 0.7596,
      "step": 15545
    },
    {
      "epoch": 4.3704328274311415,
      "grad_norm": 0.5289176106452942,
      "learning_rate": 9.504433088148934e-06,
      "loss": 0.7496,
      "step": 15550
    },
    {
      "epoch": 4.371838111298482,
      "grad_norm": 0.5366396903991699,
      "learning_rate": 9.462731405773039e-06,
      "loss": 0.7589,
      "step": 15555
    },
    {
      "epoch": 4.373243395165823,
      "grad_norm": 0.5225094556808472,
      "learning_rate": 9.421116865932711e-06,
      "loss": 0.7554,
      "step": 15560
    },
    {
      "epoch": 4.3746486790331645,
      "grad_norm": 0.5416147112846375,
      "learning_rate": 9.379589508682152e-06,
      "loss": 0.7664,
      "step": 15565
    },
    {
      "epoch": 4.376053962900506,
      "grad_norm": 0.541254460811615,
      "learning_rate": 9.338149373991611e-06,
      "loss": 0.7587,
      "step": 15570
    },
    {
      "epoch": 4.377459246767847,
      "grad_norm": 0.522843599319458,
      "learning_rate": 9.296796501747406e-06,
      "loss": 0.8008,
      "step": 15575
    },
    {
      "epoch": 4.378864530635188,
      "grad_norm": 0.5551980137825012,
      "learning_rate": 9.255530931751866e-06,
      "loss": 0.771,
      "step": 15580
    },
    {
      "epoch": 4.38026981450253,
      "grad_norm": 0.5036432147026062,
      "learning_rate": 9.214352703723327e-06,
      "loss": 0.7634,
      "step": 15585
    },
    {
      "epoch": 4.381675098369871,
      "grad_norm": 0.49579012393951416,
      "learning_rate": 9.173261857295989e-06,
      "loss": 0.7678,
      "step": 15590
    },
    {
      "epoch": 4.383080382237212,
      "grad_norm": 0.5740176439285278,
      "learning_rate": 9.13225843201998e-06,
      "loss": 0.7705,
      "step": 15595
    },
    {
      "epoch": 4.3844856661045535,
      "grad_norm": 0.5432335138320923,
      "learning_rate": 9.091342467361308e-06,
      "loss": 0.7656,
      "step": 15600
    },
    {
      "epoch": 4.385890949971895,
      "grad_norm": 0.515156090259552,
      "learning_rate": 9.050514002701748e-06,
      "loss": 0.7561,
      "step": 15605
    },
    {
      "epoch": 4.387296233839235,
      "grad_norm": 0.5516624450683594,
      "learning_rate": 9.00977307733889e-06,
      "loss": 0.7573,
      "step": 15610
    },
    {
      "epoch": 4.3887015177065765,
      "grad_norm": 0.5266974568367004,
      "learning_rate": 8.969119730486075e-06,
      "loss": 0.7579,
      "step": 15615
    },
    {
      "epoch": 4.390106801573918,
      "grad_norm": 0.5234085321426392,
      "learning_rate": 8.928554001272337e-06,
      "loss": 0.7637,
      "step": 15620
    },
    {
      "epoch": 4.391512085441259,
      "grad_norm": 0.49571606516838074,
      "learning_rate": 8.888075928742357e-06,
      "loss": 0.7528,
      "step": 15625
    },
    {
      "epoch": 4.3929173693086,
      "grad_norm": 0.5543531775474548,
      "learning_rate": 8.847685551856455e-06,
      "loss": 0.7663,
      "step": 15630
    },
    {
      "epoch": 4.394322653175942,
      "grad_norm": 0.5438440442085266,
      "learning_rate": 8.807382909490603e-06,
      "loss": 0.7748,
      "step": 15635
    },
    {
      "epoch": 4.395727937043283,
      "grad_norm": 0.5622188448905945,
      "learning_rate": 8.767168040436235e-06,
      "loss": 0.7497,
      "step": 15640
    },
    {
      "epoch": 4.397133220910624,
      "grad_norm": 0.5074348449707031,
      "learning_rate": 8.72704098340037e-06,
      "loss": 0.7475,
      "step": 15645
    },
    {
      "epoch": 4.3985385047779655,
      "grad_norm": 0.5234601497650146,
      "learning_rate": 8.687001777005465e-06,
      "loss": 0.7619,
      "step": 15650
    },
    {
      "epoch": 4.399943788645306,
      "grad_norm": 0.5129987597465515,
      "learning_rate": 8.647050459789474e-06,
      "loss": 0.7652,
      "step": 15655
    },
    {
      "epoch": 4.401349072512647,
      "grad_norm": 0.5113297700881958,
      "learning_rate": 8.60718707020567e-06,
      "loss": 0.7547,
      "step": 15660
    },
    {
      "epoch": 4.4027543563799885,
      "grad_norm": 0.7454274892807007,
      "learning_rate": 8.56741164662278e-06,
      "loss": 0.764,
      "step": 15665
    },
    {
      "epoch": 4.40415964024733,
      "grad_norm": 0.5510764718055725,
      "learning_rate": 8.527724227324851e-06,
      "loss": 0.767,
      "step": 15670
    },
    {
      "epoch": 4.405564924114671,
      "grad_norm": 0.63843834400177,
      "learning_rate": 8.48812485051116e-06,
      "loss": 0.7677,
      "step": 15675
    },
    {
      "epoch": 4.406970207982012,
      "grad_norm": 0.5484469532966614,
      "learning_rate": 8.448613554296304e-06,
      "loss": 0.7667,
      "step": 15680
    },
    {
      "epoch": 4.408375491849354,
      "grad_norm": 0.6233168840408325,
      "learning_rate": 8.409190376710097e-06,
      "loss": 0.7638,
      "step": 15685
    },
    {
      "epoch": 4.409780775716695,
      "grad_norm": 0.6250582933425903,
      "learning_rate": 8.369855355697554e-06,
      "loss": 0.7635,
      "step": 15690
    },
    {
      "epoch": 4.411186059584036,
      "grad_norm": 0.5301230549812317,
      "learning_rate": 8.330608529118756e-06,
      "loss": 0.7624,
      "step": 15695
    },
    {
      "epoch": 4.4125913434513775,
      "grad_norm": 0.5399486422538757,
      "learning_rate": 8.291449934748985e-06,
      "loss": 0.7563,
      "step": 15700
    },
    {
      "epoch": 4.413996627318719,
      "grad_norm": 0.530448853969574,
      "learning_rate": 8.252379610278582e-06,
      "loss": 0.8029,
      "step": 15705
    },
    {
      "epoch": 4.415401911186059,
      "grad_norm": 0.5428824424743652,
      "learning_rate": 8.213397593312866e-06,
      "loss": 0.7827,
      "step": 15710
    },
    {
      "epoch": 4.4168071950534005,
      "grad_norm": 0.688864529132843,
      "learning_rate": 8.174503921372246e-06,
      "loss": 0.7641,
      "step": 15715
    },
    {
      "epoch": 4.418212478920742,
      "grad_norm": 0.5148605704307556,
      "learning_rate": 8.13569863189204e-06,
      "loss": 0.7754,
      "step": 15720
    },
    {
      "epoch": 4.419617762788083,
      "grad_norm": 0.509868323802948,
      "learning_rate": 8.096981762222534e-06,
      "loss": 0.7706,
      "step": 15725
    },
    {
      "epoch": 4.421023046655424,
      "grad_norm": 0.5653720498085022,
      "learning_rate": 8.058353349628877e-06,
      "loss": 0.7484,
      "step": 15730
    },
    {
      "epoch": 4.422428330522766,
      "grad_norm": 0.524649441242218,
      "learning_rate": 8.0198134312911e-06,
      "loss": 0.7613,
      "step": 15735
    },
    {
      "epoch": 4.423833614390107,
      "grad_norm": 0.5777130722999573,
      "learning_rate": 7.981362044304074e-06,
      "loss": 0.7628,
      "step": 15740
    },
    {
      "epoch": 4.425238898257448,
      "grad_norm": 0.4954436123371124,
      "learning_rate": 7.942999225677394e-06,
      "loss": 0.7615,
      "step": 15745
    },
    {
      "epoch": 4.4266441821247895,
      "grad_norm": 0.5425832867622375,
      "learning_rate": 7.904725012335457e-06,
      "loss": 0.7525,
      "step": 15750
    },
    {
      "epoch": 4.428049465992131,
      "grad_norm": 0.607391893863678,
      "learning_rate": 7.866539441117383e-06,
      "loss": 0.7587,
      "step": 15755
    },
    {
      "epoch": 4.429454749859472,
      "grad_norm": 0.5088168382644653,
      "learning_rate": 7.82844254877697e-06,
      "loss": 0.7697,
      "step": 15760
    },
    {
      "epoch": 4.4308600337268125,
      "grad_norm": 0.5161483287811279,
      "learning_rate": 7.790434371982624e-06,
      "loss": 0.7699,
      "step": 15765
    },
    {
      "epoch": 4.432265317594154,
      "grad_norm": 0.5147720575332642,
      "learning_rate": 7.75251494731739e-06,
      "loss": 0.7515,
      "step": 15770
    },
    {
      "epoch": 4.433670601461495,
      "grad_norm": 0.519303023815155,
      "learning_rate": 7.714684311278908e-06,
      "loss": 0.7599,
      "step": 15775
    },
    {
      "epoch": 4.435075885328836,
      "grad_norm": 0.5051060318946838,
      "learning_rate": 7.676942500279316e-06,
      "loss": 0.7647,
      "step": 15780
    },
    {
      "epoch": 4.436481169196178,
      "grad_norm": 0.5474300384521484,
      "learning_rate": 7.6392895506453e-06,
      "loss": 0.7635,
      "step": 15785
    },
    {
      "epoch": 4.437886453063519,
      "grad_norm": 0.5233967304229736,
      "learning_rate": 7.601725498617985e-06,
      "loss": 0.7639,
      "step": 15790
    },
    {
      "epoch": 4.43929173693086,
      "grad_norm": 0.5138359665870667,
      "learning_rate": 7.564250380352966e-06,
      "loss": 0.7558,
      "step": 15795
    },
    {
      "epoch": 4.4406970207982015,
      "grad_norm": 0.5092753171920776,
      "learning_rate": 7.526864231920183e-06,
      "loss": 0.7662,
      "step": 15800
    },
    {
      "epoch": 4.442102304665543,
      "grad_norm": 0.5444674491882324,
      "learning_rate": 7.489567089303984e-06,
      "loss": 0.763,
      "step": 15805
    },
    {
      "epoch": 4.443507588532883,
      "grad_norm": 0.5607460141181946,
      "learning_rate": 7.452358988403063e-06,
      "loss": 0.7821,
      "step": 15810
    },
    {
      "epoch": 4.4449128724002245,
      "grad_norm": 0.5317428112030029,
      "learning_rate": 7.4152399650303515e-06,
      "loss": 0.7736,
      "step": 15815
    },
    {
      "epoch": 4.446318156267566,
      "grad_norm": 0.5346295833587646,
      "learning_rate": 7.378210054913104e-06,
      "loss": 0.7567,
      "step": 15820
    },
    {
      "epoch": 4.447723440134907,
      "grad_norm": 0.5458088517189026,
      "learning_rate": 7.341269293692765e-06,
      "loss": 0.7599,
      "step": 15825
    },
    {
      "epoch": 4.449128724002248,
      "grad_norm": 0.5532664656639099,
      "learning_rate": 7.304417716924994e-06,
      "loss": 0.7883,
      "step": 15830
    },
    {
      "epoch": 4.45053400786959,
      "grad_norm": 0.6144757866859436,
      "learning_rate": 7.267655360079595e-06,
      "loss": 0.7551,
      "step": 15835
    },
    {
      "epoch": 4.451939291736931,
      "grad_norm": 0.5150646567344666,
      "learning_rate": 7.230982258540508e-06,
      "loss": 0.7669,
      "step": 15840
    },
    {
      "epoch": 4.453344575604272,
      "grad_norm": 0.5442806482315063,
      "learning_rate": 7.194398447605788e-06,
      "loss": 0.761,
      "step": 15845
    },
    {
      "epoch": 4.4547498594716135,
      "grad_norm": 0.5361512899398804,
      "learning_rate": 7.157903962487489e-06,
      "loss": 0.7558,
      "step": 15850
    },
    {
      "epoch": 4.456155143338955,
      "grad_norm": 0.506851851940155,
      "learning_rate": 7.121498838311713e-06,
      "loss": 0.7636,
      "step": 15855
    },
    {
      "epoch": 4.457560427206296,
      "grad_norm": 0.5137316584587097,
      "learning_rate": 7.0851831101185764e-06,
      "loss": 0.766,
      "step": 15860
    },
    {
      "epoch": 4.4589657110736365,
      "grad_norm": 0.5157634615898132,
      "learning_rate": 7.048956812862151e-06,
      "loss": 0.7577,
      "step": 15865
    },
    {
      "epoch": 4.460370994940978,
      "grad_norm": 0.6385879516601562,
      "learning_rate": 7.012819981410379e-06,
      "loss": 0.7752,
      "step": 15870
    },
    {
      "epoch": 4.461776278808319,
      "grad_norm": 0.5130940675735474,
      "learning_rate": 6.976772650545138e-06,
      "loss": 0.7726,
      "step": 15875
    },
    {
      "epoch": 4.46318156267566,
      "grad_norm": 0.5212185382843018,
      "learning_rate": 6.940814854962141e-06,
      "loss": 0.7591,
      "step": 15880
    },
    {
      "epoch": 4.464586846543002,
      "grad_norm": 0.5533390045166016,
      "learning_rate": 6.9049466292709584e-06,
      "loss": 0.8123,
      "step": 15885
    },
    {
      "epoch": 4.465992130410343,
      "grad_norm": 0.5034365653991699,
      "learning_rate": 6.869168007994897e-06,
      "loss": 0.7644,
      "step": 15890
    },
    {
      "epoch": 4.467397414277684,
      "grad_norm": 0.5371700525283813,
      "learning_rate": 6.833479025571044e-06,
      "loss": 0.7671,
      "step": 15895
    },
    {
      "epoch": 4.4688026981450255,
      "grad_norm": 0.5383287072181702,
      "learning_rate": 6.797879716350242e-06,
      "loss": 0.7615,
      "step": 15900
    },
    {
      "epoch": 4.470207982012367,
      "grad_norm": 0.5258095264434814,
      "learning_rate": 6.7623701145969495e-06,
      "loss": 0.7732,
      "step": 15905
    },
    {
      "epoch": 4.471613265879708,
      "grad_norm": 0.5264952778816223,
      "learning_rate": 6.726950254489328e-06,
      "loss": 0.7599,
      "step": 15910
    },
    {
      "epoch": 4.4730185497470485,
      "grad_norm": 0.557831883430481,
      "learning_rate": 6.691620170119173e-06,
      "loss": 0.7707,
      "step": 15915
    },
    {
      "epoch": 4.47442383361439,
      "grad_norm": 0.5028601884841919,
      "learning_rate": 6.656379895491826e-06,
      "loss": 0.7693,
      "step": 15920
    },
    {
      "epoch": 4.475829117481731,
      "grad_norm": 0.5211167931556702,
      "learning_rate": 6.621229464526235e-06,
      "loss": 0.7557,
      "step": 15925
    },
    {
      "epoch": 4.477234401349072,
      "grad_norm": 0.5280676484107971,
      "learning_rate": 6.586168911054835e-06,
      "loss": 0.7496,
      "step": 15930
    },
    {
      "epoch": 4.478639685216414,
      "grad_norm": 0.5282173156738281,
      "learning_rate": 6.551198268823588e-06,
      "loss": 0.7588,
      "step": 15935
    },
    {
      "epoch": 4.480044969083755,
      "grad_norm": 0.5561384558677673,
      "learning_rate": 6.51631757149187e-06,
      "loss": 0.8083,
      "step": 15940
    },
    {
      "epoch": 4.481450252951096,
      "grad_norm": 0.5195598006248474,
      "learning_rate": 6.4815268526325465e-06,
      "loss": 0.7791,
      "step": 15945
    },
    {
      "epoch": 4.4828555368184375,
      "grad_norm": 0.5344316363334656,
      "learning_rate": 6.44682614573181e-06,
      "loss": 0.7518,
      "step": 15950
    },
    {
      "epoch": 4.484260820685779,
      "grad_norm": 0.5257565975189209,
      "learning_rate": 6.412215484189288e-06,
      "loss": 0.7538,
      "step": 15955
    },
    {
      "epoch": 4.48566610455312,
      "grad_norm": 0.4952334463596344,
      "learning_rate": 6.377694901317865e-06,
      "loss": 0.7582,
      "step": 15960
    },
    {
      "epoch": 4.4870713884204605,
      "grad_norm": 0.5309807062149048,
      "learning_rate": 6.343264430343776e-06,
      "loss": 0.7604,
      "step": 15965
    },
    {
      "epoch": 4.488476672287802,
      "grad_norm": 0.5829389691352844,
      "learning_rate": 6.308924104406511e-06,
      "loss": 0.8324,
      "step": 15970
    },
    {
      "epoch": 4.489881956155143,
      "grad_norm": 0.5203680992126465,
      "learning_rate": 6.274673956558774e-06,
      "loss": 0.7751,
      "step": 15975
    },
    {
      "epoch": 4.491287240022484,
      "grad_norm": 0.5616741180419922,
      "learning_rate": 6.240514019766497e-06,
      "loss": 0.7647,
      "step": 15980
    },
    {
      "epoch": 4.492692523889826,
      "grad_norm": 0.5452646613121033,
      "learning_rate": 6.206444326908778e-06,
      "loss": 0.7675,
      "step": 15985
    },
    {
      "epoch": 4.494097807757167,
      "grad_norm": 0.5431065559387207,
      "learning_rate": 6.172464910777853e-06,
      "loss": 0.7525,
      "step": 15990
    },
    {
      "epoch": 4.495503091624508,
      "grad_norm": 0.550834596157074,
      "learning_rate": 6.138575804079072e-06,
      "loss": 0.7584,
      "step": 15995
    },
    {
      "epoch": 4.4969083754918495,
      "grad_norm": 0.5268097519874573,
      "learning_rate": 6.104777039430842e-06,
      "loss": 0.7632,
      "step": 16000
    },
    {
      "epoch": 4.498313659359191,
      "grad_norm": 0.5832206606864929,
      "learning_rate": 6.071068649364642e-06,
      "loss": 0.7646,
      "step": 16005
    },
    {
      "epoch": 4.499718943226532,
      "grad_norm": 0.5139409303665161,
      "learning_rate": 6.037450666324939e-06,
      "loss": 0.8085,
      "step": 16010
    },
    {
      "epoch": 4.501124227093873,
      "grad_norm": 0.6051554679870605,
      "learning_rate": 6.0039231226691976e-06,
      "loss": 0.755,
      "step": 16015
    },
    {
      "epoch": 4.502529510961214,
      "grad_norm": 0.586642324924469,
      "learning_rate": 5.970486050667834e-06,
      "loss": 0.7656,
      "step": 16020
    },
    {
      "epoch": 4.503934794828555,
      "grad_norm": 0.5114891529083252,
      "learning_rate": 5.937139482504206e-06,
      "loss": 0.7529,
      "step": 16025
    },
    {
      "epoch": 4.505340078695896,
      "grad_norm": 0.5245956778526306,
      "learning_rate": 5.903883450274506e-06,
      "loss": 0.752,
      "step": 16030
    },
    {
      "epoch": 4.506745362563238,
      "grad_norm": 0.5149771571159363,
      "learning_rate": 5.870717985987817e-06,
      "loss": 0.7527,
      "step": 16035
    },
    {
      "epoch": 4.508150646430579,
      "grad_norm": 0.5146948099136353,
      "learning_rate": 5.837643121566072e-06,
      "loss": 0.757,
      "step": 16040
    },
    {
      "epoch": 4.50955593029792,
      "grad_norm": 0.5637561678886414,
      "learning_rate": 5.804658888843961e-06,
      "loss": 0.7752,
      "step": 16045
    },
    {
      "epoch": 4.5109612141652615,
      "grad_norm": 0.57065349817276,
      "learning_rate": 5.771765319568967e-06,
      "loss": 0.7598,
      "step": 16050
    },
    {
      "epoch": 4.512366498032603,
      "grad_norm": 0.6010316014289856,
      "learning_rate": 5.738962445401308e-06,
      "loss": 0.8019,
      "step": 16055
    },
    {
      "epoch": 4.513771781899944,
      "grad_norm": 0.5177924036979675,
      "learning_rate": 5.7062502979138955e-06,
      "loss": 0.7554,
      "step": 16060
    },
    {
      "epoch": 4.5151770657672845,
      "grad_norm": 0.5299884080886841,
      "learning_rate": 5.673628908592321e-06,
      "loss": 0.7604,
      "step": 16065
    },
    {
      "epoch": 4.516582349634627,
      "grad_norm": 0.535160481929779,
      "learning_rate": 5.641098308834802e-06,
      "loss": 0.7599,
      "step": 16070
    },
    {
      "epoch": 4.517987633501967,
      "grad_norm": 0.5174487829208374,
      "learning_rate": 5.608658529952238e-06,
      "loss": 0.7566,
      "step": 16075
    },
    {
      "epoch": 4.519392917369308,
      "grad_norm": 0.510744035243988,
      "learning_rate": 5.576309603168017e-06,
      "loss": 0.7561,
      "step": 16080
    },
    {
      "epoch": 4.52079820123665,
      "grad_norm": 0.5133626461029053,
      "learning_rate": 5.544051559618135e-06,
      "loss": 0.7474,
      "step": 16085
    },
    {
      "epoch": 4.522203485103991,
      "grad_norm": 0.5179252624511719,
      "learning_rate": 5.511884430351111e-06,
      "loss": 0.7613,
      "step": 16090
    },
    {
      "epoch": 4.523608768971332,
      "grad_norm": 0.5430054664611816,
      "learning_rate": 5.479808246327989e-06,
      "loss": 0.761,
      "step": 16095
    },
    {
      "epoch": 4.5250140528386735,
      "grad_norm": 0.530794084072113,
      "learning_rate": 5.447823038422206e-06,
      "loss": 0.7604,
      "step": 16100
    },
    {
      "epoch": 4.526419336706015,
      "grad_norm": 0.5674378871917725,
      "learning_rate": 5.4159288374196705e-06,
      "loss": 0.774,
      "step": 16105
    },
    {
      "epoch": 4.527824620573356,
      "grad_norm": 0.5048345327377319,
      "learning_rate": 5.384125674018725e-06,
      "loss": 0.8147,
      "step": 16110
    },
    {
      "epoch": 4.529229904440697,
      "grad_norm": 0.5289924144744873,
      "learning_rate": 5.352413578830029e-06,
      "loss": 0.7554,
      "step": 16115
    },
    {
      "epoch": 4.530635188308038,
      "grad_norm": 0.5061425566673279,
      "learning_rate": 5.320792582376622e-06,
      "loss": 0.7521,
      "step": 16120
    },
    {
      "epoch": 4.532040472175379,
      "grad_norm": 0.5277791023254395,
      "learning_rate": 5.289262715093879e-06,
      "loss": 0.7567,
      "step": 16125
    },
    {
      "epoch": 4.53344575604272,
      "grad_norm": 0.5125671625137329,
      "learning_rate": 5.257824007329437e-06,
      "loss": 0.7617,
      "step": 16130
    },
    {
      "epoch": 4.534851039910062,
      "grad_norm": 0.5968344807624817,
      "learning_rate": 5.226476489343168e-06,
      "loss": 0.7655,
      "step": 16135
    },
    {
      "epoch": 4.536256323777403,
      "grad_norm": 0.5773058533668518,
      "learning_rate": 5.195220191307226e-06,
      "loss": 0.7576,
      "step": 16140
    },
    {
      "epoch": 4.537661607644744,
      "grad_norm": 0.5427569150924683,
      "learning_rate": 5.1640551433059685e-06,
      "loss": 0.771,
      "step": 16145
    },
    {
      "epoch": 4.5390668915120855,
      "grad_norm": 0.5206560492515564,
      "learning_rate": 5.132981375335843e-06,
      "loss": 0.7581,
      "step": 16150
    },
    {
      "epoch": 4.540472175379427,
      "grad_norm": 0.5196772813796997,
      "learning_rate": 5.101998917305517e-06,
      "loss": 0.768,
      "step": 16155
    },
    {
      "epoch": 4.541877459246768,
      "grad_norm": 0.5120740532875061,
      "learning_rate": 5.071107799035746e-06,
      "loss": 0.745,
      "step": 16160
    },
    {
      "epoch": 4.543282743114109,
      "grad_norm": 0.5352237820625305,
      "learning_rate": 5.04030805025939e-06,
      "loss": 0.7531,
      "step": 16165
    },
    {
      "epoch": 4.544688026981451,
      "grad_norm": 0.5228151082992554,
      "learning_rate": 5.009599700621314e-06,
      "loss": 0.7602,
      "step": 16170
    },
    {
      "epoch": 4.546093310848791,
      "grad_norm": 0.5458948612213135,
      "learning_rate": 4.978982779678476e-06,
      "loss": 0.7561,
      "step": 16175
    },
    {
      "epoch": 4.547498594716132,
      "grad_norm": 0.5529809594154358,
      "learning_rate": 4.94845731689979e-06,
      "loss": 0.8094,
      "step": 16180
    },
    {
      "epoch": 4.548903878583474,
      "grad_norm": 0.5764121413230896,
      "learning_rate": 4.918023341666145e-06,
      "loss": 0.7596,
      "step": 16185
    },
    {
      "epoch": 4.550309162450815,
      "grad_norm": 0.5057691335678101,
      "learning_rate": 4.887680883270385e-06,
      "loss": 0.7736,
      "step": 16190
    },
    {
      "epoch": 4.551714446318156,
      "grad_norm": 0.5106030106544495,
      "learning_rate": 4.857429970917282e-06,
      "loss": 0.7563,
      "step": 16195
    },
    {
      "epoch": 4.5531197301854975,
      "grad_norm": 0.49489980936050415,
      "learning_rate": 4.82727063372348e-06,
      "loss": 0.7626,
      "step": 16200
    },
    {
      "epoch": 4.554525014052839,
      "grad_norm": 0.5165312886238098,
      "learning_rate": 4.797202900717457e-06,
      "loss": 0.756,
      "step": 16205
    },
    {
      "epoch": 4.55593029792018,
      "grad_norm": 0.5062229633331299,
      "learning_rate": 4.7672268008395415e-06,
      "loss": 0.7612,
      "step": 16210
    },
    {
      "epoch": 4.557335581787521,
      "grad_norm": 0.5089703798294067,
      "learning_rate": 4.737342362941899e-06,
      "loss": 0.7699,
      "step": 16215
    },
    {
      "epoch": 4.558740865654862,
      "grad_norm": 0.5935402512550354,
      "learning_rate": 4.707549615788398e-06,
      "loss": 0.7597,
      "step": 16220
    },
    {
      "epoch": 4.560146149522204,
      "grad_norm": 0.55452561378479,
      "learning_rate": 4.6778485880547115e-06,
      "loss": 0.7709,
      "step": 16225
    },
    {
      "epoch": 4.561551433389544,
      "grad_norm": 0.5225324034690857,
      "learning_rate": 4.648239308328228e-06,
      "loss": 0.762,
      "step": 16230
    },
    {
      "epoch": 4.562956717256886,
      "grad_norm": 0.519692063331604,
      "learning_rate": 4.618721805107995e-06,
      "loss": 0.7638,
      "step": 16235
    },
    {
      "epoch": 4.564362001124227,
      "grad_norm": 0.5288437008857727,
      "learning_rate": 4.589296106804753e-06,
      "loss": 0.76,
      "step": 16240
    },
    {
      "epoch": 4.565767284991568,
      "grad_norm": 0.5157180428504944,
      "learning_rate": 4.559962241740867e-06,
      "loss": 0.7628,
      "step": 16245
    },
    {
      "epoch": 4.5671725688589095,
      "grad_norm": 0.5235899686813354,
      "learning_rate": 4.530720238150332e-06,
      "loss": 0.7634,
      "step": 16250
    },
    {
      "epoch": 4.568577852726251,
      "grad_norm": 0.5051559805870056,
      "learning_rate": 4.501570124178689e-06,
      "loss": 0.7726,
      "step": 16255
    },
    {
      "epoch": 4.569983136593592,
      "grad_norm": 0.5335587859153748,
      "learning_rate": 4.472511927883072e-06,
      "loss": 0.763,
      "step": 16260
    },
    {
      "epoch": 4.571388420460933,
      "grad_norm": 0.5303399562835693,
      "learning_rate": 4.4435456772321085e-06,
      "loss": 0.7794,
      "step": 16265
    },
    {
      "epoch": 4.572793704328275,
      "grad_norm": 0.5151443481445312,
      "learning_rate": 4.414671400105985e-06,
      "loss": 0.7641,
      "step": 16270
    },
    {
      "epoch": 4.574198988195615,
      "grad_norm": 0.5121344327926636,
      "learning_rate": 4.3858891242962825e-06,
      "loss": 0.7451,
      "step": 16275
    },
    {
      "epoch": 4.575604272062956,
      "grad_norm": 0.5286407470703125,
      "learning_rate": 4.357198877506086e-06,
      "loss": 0.7743,
      "step": 16280
    },
    {
      "epoch": 4.577009555930298,
      "grad_norm": 0.5202147364616394,
      "learning_rate": 4.328600687349904e-06,
      "loss": 0.764,
      "step": 16285
    },
    {
      "epoch": 4.578414839797639,
      "grad_norm": 0.5142115354537964,
      "learning_rate": 4.30009458135362e-06,
      "loss": 0.7661,
      "step": 16290
    },
    {
      "epoch": 4.57982012366498,
      "grad_norm": 0.5535638332366943,
      "learning_rate": 4.271680586954474e-06,
      "loss": 0.7589,
      "step": 16295
    },
    {
      "epoch": 4.5812254075323215,
      "grad_norm": 0.5315068960189819,
      "learning_rate": 4.2433587315010905e-06,
      "loss": 0.7642,
      "step": 16300
    },
    {
      "epoch": 4.582630691399663,
      "grad_norm": 0.5588862299919128,
      "learning_rate": 4.2151290422533855e-06,
      "loss": 0.7632,
      "step": 16305
    },
    {
      "epoch": 4.584035975267004,
      "grad_norm": 0.5527219772338867,
      "learning_rate": 4.186991546382535e-06,
      "loss": 0.7657,
      "step": 16310
    },
    {
      "epoch": 4.585441259134345,
      "grad_norm": 0.5488102436065674,
      "learning_rate": 4.158946270971031e-06,
      "loss": 0.7743,
      "step": 16315
    },
    {
      "epoch": 4.586846543001687,
      "grad_norm": 0.5361760258674622,
      "learning_rate": 4.130993243012582e-06,
      "loss": 0.7533,
      "step": 16320
    },
    {
      "epoch": 4.588251826869028,
      "grad_norm": 0.4912406802177429,
      "learning_rate": 4.103132489412087e-06,
      "loss": 0.7597,
      "step": 16325
    },
    {
      "epoch": 4.589657110736368,
      "grad_norm": 0.5646995902061462,
      "learning_rate": 4.075364036985663e-06,
      "loss": 0.7607,
      "step": 16330
    },
    {
      "epoch": 4.59106239460371,
      "grad_norm": 0.5502670407295227,
      "learning_rate": 4.047687912460563e-06,
      "loss": 0.7692,
      "step": 16335
    },
    {
      "epoch": 4.592467678471051,
      "grad_norm": 0.5219792127609253,
      "learning_rate": 4.0201041424752006e-06,
      "loss": 0.8079,
      "step": 16340
    },
    {
      "epoch": 4.593872962338392,
      "grad_norm": 0.5074188709259033,
      "learning_rate": 3.992612753579061e-06,
      "loss": 0.8031,
      "step": 16345
    },
    {
      "epoch": 4.5952782462057336,
      "grad_norm": 0.5782954096794128,
      "learning_rate": 3.965213772232745e-06,
      "loss": 0.7648,
      "step": 16350
    },
    {
      "epoch": 4.596683530073075,
      "grad_norm": 0.5999060869216919,
      "learning_rate": 3.93790722480788e-06,
      "loss": 0.7574,
      "step": 16355
    },
    {
      "epoch": 4.598088813940416,
      "grad_norm": 0.5622010827064514,
      "learning_rate": 3.910693137587185e-06,
      "loss": 0.777,
      "step": 16360
    },
    {
      "epoch": 4.599494097807757,
      "grad_norm": 0.5037088990211487,
      "learning_rate": 3.883571536764297e-06,
      "loss": 0.7594,
      "step": 16365
    },
    {
      "epoch": 4.600899381675099,
      "grad_norm": 0.5346062779426575,
      "learning_rate": 3.856542448443889e-06,
      "loss": 0.7749,
      "step": 16370
    },
    {
      "epoch": 4.602304665542439,
      "grad_norm": 0.5186667442321777,
      "learning_rate": 3.8296058986416196e-06,
      "loss": 0.7553,
      "step": 16375
    },
    {
      "epoch": 4.60370994940978,
      "grad_norm": 0.5188003778457642,
      "learning_rate": 3.802761913283992e-06,
      "loss": 0.771,
      "step": 16380
    },
    {
      "epoch": 4.605115233277122,
      "grad_norm": 0.5112647414207458,
      "learning_rate": 3.7760105182084838e-06,
      "loss": 0.7583,
      "step": 16385
    },
    {
      "epoch": 4.606520517144463,
      "grad_norm": 0.5383247137069702,
      "learning_rate": 3.7493517391634426e-06,
      "loss": 0.7726,
      "step": 16390
    },
    {
      "epoch": 4.607925801011804,
      "grad_norm": 0.5470320582389832,
      "learning_rate": 3.7227856018080655e-06,
      "loss": 0.7592,
      "step": 16395
    },
    {
      "epoch": 4.609331084879146,
      "grad_norm": 0.5709768533706665,
      "learning_rate": 3.696312131712376e-06,
      "loss": 0.7748,
      "step": 16400
    },
    {
      "epoch": 4.610736368746487,
      "grad_norm": 0.5174989104270935,
      "learning_rate": 3.6699313543572034e-06,
      "loss": 0.7677,
      "step": 16405
    },
    {
      "epoch": 4.612141652613828,
      "grad_norm": 0.5489538311958313,
      "learning_rate": 3.6436432951341914e-06,
      "loss": 0.7547,
      "step": 16410
    },
    {
      "epoch": 4.613546936481169,
      "grad_norm": 0.565827488899231,
      "learning_rate": 3.6174479793456894e-06,
      "loss": 0.77,
      "step": 16415
    },
    {
      "epoch": 4.614952220348511,
      "grad_norm": 0.5434299111366272,
      "learning_rate": 3.591345432204807e-06,
      "loss": 0.7526,
      "step": 16420
    },
    {
      "epoch": 4.616357504215852,
      "grad_norm": 0.5446549654006958,
      "learning_rate": 3.565335678835391e-06,
      "loss": 0.7752,
      "step": 16425
    },
    {
      "epoch": 4.617762788083192,
      "grad_norm": 0.5367806553840637,
      "learning_rate": 3.5394187442719494e-06,
      "loss": 0.7712,
      "step": 16430
    },
    {
      "epoch": 4.619168071950534,
      "grad_norm": 0.5381150245666504,
      "learning_rate": 3.5135946534596175e-06,
      "loss": 0.81,
      "step": 16435
    },
    {
      "epoch": 4.620573355817875,
      "grad_norm": 0.5752198100090027,
      "learning_rate": 3.4878634312542125e-06,
      "loss": 0.7584,
      "step": 16440
    },
    {
      "epoch": 4.621978639685216,
      "grad_norm": 0.5078454613685608,
      "learning_rate": 3.4622251024221674e-06,
      "loss": 0.7533,
      "step": 16445
    },
    {
      "epoch": 4.623383923552558,
      "grad_norm": 0.5012295842170715,
      "learning_rate": 3.4366796916404875e-06,
      "loss": 0.753,
      "step": 16450
    },
    {
      "epoch": 4.624789207419899,
      "grad_norm": 0.507514476776123,
      "learning_rate": 3.411227223496749e-06,
      "loss": 0.7937,
      "step": 16455
    },
    {
      "epoch": 4.62619449128724,
      "grad_norm": 0.49216654896736145,
      "learning_rate": 3.3858677224890557e-06,
      "loss": 0.7496,
      "step": 16460
    },
    {
      "epoch": 4.627599775154581,
      "grad_norm": 0.5676223039627075,
      "learning_rate": 3.36060121302606e-06,
      "loss": 0.77,
      "step": 16465
    },
    {
      "epoch": 4.629005059021923,
      "grad_norm": 0.5882121920585632,
      "learning_rate": 3.3354277194268755e-06,
      "loss": 0.7718,
      "step": 16470
    },
    {
      "epoch": 4.630410342889264,
      "grad_norm": 0.5617997646331787,
      "learning_rate": 3.310347265921121e-06,
      "loss": 0.7586,
      "step": 16475
    },
    {
      "epoch": 4.631815626756605,
      "grad_norm": 0.5538641214370728,
      "learning_rate": 3.2853598766488523e-06,
      "loss": 0.7652,
      "step": 16480
    },
    {
      "epoch": 4.633220910623946,
      "grad_norm": 0.5209192633628845,
      "learning_rate": 3.2604655756605206e-06,
      "loss": 0.768,
      "step": 16485
    },
    {
      "epoch": 4.634626194491287,
      "grad_norm": 0.6011056303977966,
      "learning_rate": 3.2356643869170254e-06,
      "loss": 0.7707,
      "step": 16490
    },
    {
      "epoch": 4.636031478358628,
      "grad_norm": 0.5389314889907837,
      "learning_rate": 3.2109563342896053e-06,
      "loss": 0.7543,
      "step": 16495
    },
    {
      "epoch": 4.63743676222597,
      "grad_norm": 0.541045069694519,
      "learning_rate": 3.1863414415598923e-06,
      "loss": 0.7532,
      "step": 16500
    },
    {
      "epoch": 4.638842046093311,
      "grad_norm": 0.5399346351623535,
      "learning_rate": 3.1618197324198352e-06,
      "loss": 0.7775,
      "step": 16505
    },
    {
      "epoch": 4.640247329960652,
      "grad_norm": 0.5333517789840698,
      "learning_rate": 3.1373912304716758e-06,
      "loss": 0.7716,
      "step": 16510
    },
    {
      "epoch": 4.641652613827993,
      "grad_norm": 0.5069199800491333,
      "learning_rate": 3.113055959227984e-06,
      "loss": 0.7525,
      "step": 16515
    },
    {
      "epoch": 4.643057897695335,
      "grad_norm": 0.5578119158744812,
      "learning_rate": 3.0888139421115347e-06,
      "loss": 0.7621,
      "step": 16520
    },
    {
      "epoch": 4.644463181562676,
      "grad_norm": 0.5046953558921814,
      "learning_rate": 3.064665202455408e-06,
      "loss": 0.7598,
      "step": 16525
    },
    {
      "epoch": 4.645868465430016,
      "grad_norm": 0.5236426591873169,
      "learning_rate": 3.040609763502866e-06,
      "loss": 0.7749,
      "step": 16530
    },
    {
      "epoch": 4.647273749297358,
      "grad_norm": 0.5197000503540039,
      "learning_rate": 3.016647648407389e-06,
      "loss": 0.7596,
      "step": 16535
    },
    {
      "epoch": 4.648679033164699,
      "grad_norm": 0.5232204794883728,
      "learning_rate": 2.992778880232594e-06,
      "loss": 0.8043,
      "step": 16540
    },
    {
      "epoch": 4.65008431703204,
      "grad_norm": 0.506645679473877,
      "learning_rate": 2.969003481952315e-06,
      "loss": 0.7428,
      "step": 16545
    },
    {
      "epoch": 4.651489600899382,
      "grad_norm": 0.5069267749786377,
      "learning_rate": 2.9453214764504576e-06,
      "loss": 0.7707,
      "step": 16550
    },
    {
      "epoch": 4.652894884766723,
      "grad_norm": 0.5028716325759888,
      "learning_rate": 2.921732886521067e-06,
      "loss": 0.7609,
      "step": 16555
    },
    {
      "epoch": 4.654300168634064,
      "grad_norm": 0.5739719867706299,
      "learning_rate": 2.8982377348682697e-06,
      "loss": 0.7573,
      "step": 16560
    },
    {
      "epoch": 4.655705452501405,
      "grad_norm": 0.5546180009841919,
      "learning_rate": 2.874836044106266e-06,
      "loss": 0.762,
      "step": 16565
    },
    {
      "epoch": 4.657110736368747,
      "grad_norm": 0.5253195762634277,
      "learning_rate": 2.8515278367592823e-06,
      "loss": 0.7555,
      "step": 16570
    },
    {
      "epoch": 4.658516020236088,
      "grad_norm": 0.5367646217346191,
      "learning_rate": 2.828313135261573e-06,
      "loss": 0.7514,
      "step": 16575
    },
    {
      "epoch": 4.659921304103429,
      "grad_norm": 0.5407754778862,
      "learning_rate": 2.8051919619573986e-06,
      "loss": 0.7546,
      "step": 16580
    },
    {
      "epoch": 4.66132658797077,
      "grad_norm": 0.5303405523300171,
      "learning_rate": 2.782164339101001e-06,
      "loss": 0.7582,
      "step": 16585
    },
    {
      "epoch": 4.662731871838111,
      "grad_norm": 0.5342952609062195,
      "learning_rate": 2.7592302888565514e-06,
      "loss": 0.7662,
      "step": 16590
    },
    {
      "epoch": 4.664137155705452,
      "grad_norm": 0.5367206335067749,
      "learning_rate": 2.7363898332981696e-06,
      "loss": 0.7698,
      "step": 16595
    },
    {
      "epoch": 4.665542439572794,
      "grad_norm": 0.5240176916122437,
      "learning_rate": 2.7136429944099483e-06,
      "loss": 0.8022,
      "step": 16600
    },
    {
      "epoch": 4.666947723440135,
      "grad_norm": 0.5152767300605774,
      "learning_rate": 2.6909897940857966e-06,
      "loss": 0.7678,
      "step": 16605
    },
    {
      "epoch": 4.668353007307476,
      "grad_norm": 0.551674485206604,
      "learning_rate": 2.668430254129506e-06,
      "loss": 0.7693,
      "step": 16610
    },
    {
      "epoch": 4.669758291174817,
      "grad_norm": 0.5180559754371643,
      "learning_rate": 2.645964396254763e-06,
      "loss": 0.7608,
      "step": 16615
    },
    {
      "epoch": 4.671163575042159,
      "grad_norm": 0.5306651592254639,
      "learning_rate": 2.62359224208506e-06,
      "loss": 0.7579,
      "step": 16620
    },
    {
      "epoch": 4.6725688589095,
      "grad_norm": 0.5154826045036316,
      "learning_rate": 2.6013138131536717e-06,
      "loss": 0.7538,
      "step": 16625
    },
    {
      "epoch": 4.67397414277684,
      "grad_norm": 0.5123304724693298,
      "learning_rate": 2.579129130903701e-06,
      "loss": 0.7611,
      "step": 16630
    },
    {
      "epoch": 4.675379426644183,
      "grad_norm": 0.5388631224632263,
      "learning_rate": 2.5570382166880126e-06,
      "loss": 0.7551,
      "step": 16635
    },
    {
      "epoch": 4.676784710511523,
      "grad_norm": 0.5057893991470337,
      "learning_rate": 2.535041091769219e-06,
      "loss": 0.7537,
      "step": 16640
    },
    {
      "epoch": 4.678189994378864,
      "grad_norm": 0.5687735080718994,
      "learning_rate": 2.5131377773196184e-06,
      "loss": 0.7613,
      "step": 16645
    },
    {
      "epoch": 4.679595278246206,
      "grad_norm": 0.5253791809082031,
      "learning_rate": 2.4913282944212914e-06,
      "loss": 0.8126,
      "step": 16650
    },
    {
      "epoch": 4.681000562113547,
      "grad_norm": 0.5506506562232971,
      "learning_rate": 2.4696126640659566e-06,
      "loss": 0.7567,
      "step": 16655
    },
    {
      "epoch": 4.682405845980888,
      "grad_norm": 0.5498470664024353,
      "learning_rate": 2.4479909071549954e-06,
      "loss": 0.7759,
      "step": 16660
    },
    {
      "epoch": 4.6838111298482294,
      "grad_norm": 0.6022109389305115,
      "learning_rate": 2.4264630444994498e-06,
      "loss": 0.8043,
      "step": 16665
    },
    {
      "epoch": 4.685216413715571,
      "grad_norm": 0.5312721729278564,
      "learning_rate": 2.4050290968199884e-06,
      "loss": 0.7698,
      "step": 16670
    },
    {
      "epoch": 4.686621697582912,
      "grad_norm": 0.6051803827285767,
      "learning_rate": 2.3836890847468873e-06,
      "loss": 0.7579,
      "step": 16675
    },
    {
      "epoch": 4.688026981450253,
      "grad_norm": 0.5202509164810181,
      "learning_rate": 2.362443028820005e-06,
      "loss": 0.763,
      "step": 16680
    },
    {
      "epoch": 4.689432265317594,
      "grad_norm": 0.5393158793449402,
      "learning_rate": 2.3412909494887613e-06,
      "loss": 0.769,
      "step": 16685
    },
    {
      "epoch": 4.690837549184935,
      "grad_norm": 0.5265124440193176,
      "learning_rate": 2.3202328671121376e-06,
      "loss": 0.7679,
      "step": 16690
    },
    {
      "epoch": 4.692242833052276,
      "grad_norm": 0.5325393080711365,
      "learning_rate": 2.2992688019586206e-06,
      "loss": 0.7597,
      "step": 16695
    },
    {
      "epoch": 4.693648116919618,
      "grad_norm": 0.5232254266738892,
      "learning_rate": 2.2783987742062475e-06,
      "loss": 0.7658,
      "step": 16700
    },
    {
      "epoch": 4.695053400786959,
      "grad_norm": 0.579431414604187,
      "learning_rate": 2.257622803942483e-06,
      "loss": 0.7673,
      "step": 16705
    },
    {
      "epoch": 4.6964586846543,
      "grad_norm": 0.5365939736366272,
      "learning_rate": 2.23694091116432e-06,
      "loss": 0.7516,
      "step": 16710
    },
    {
      "epoch": 4.6978639685216415,
      "grad_norm": 0.5156635046005249,
      "learning_rate": 2.216353115778158e-06,
      "loss": 0.7662,
      "step": 16715
    },
    {
      "epoch": 4.699269252388983,
      "grad_norm": 0.5380986928939819,
      "learning_rate": 2.195859437599845e-06,
      "loss": 0.7537,
      "step": 16720
    },
    {
      "epoch": 4.700674536256324,
      "grad_norm": 0.5444085001945496,
      "learning_rate": 2.175459896354659e-06,
      "loss": 0.7707,
      "step": 16725
    },
    {
      "epoch": 4.702079820123665,
      "grad_norm": 0.5282695293426514,
      "learning_rate": 2.1551545116772265e-06,
      "loss": 0.7597,
      "step": 16730
    },
    {
      "epoch": 4.703485103991007,
      "grad_norm": 0.5188681483268738,
      "learning_rate": 2.1349433031115807e-06,
      "loss": 0.7615,
      "step": 16735
    },
    {
      "epoch": 4.704890387858347,
      "grad_norm": 0.5118913650512695,
      "learning_rate": 2.114826290111116e-06,
      "loss": 0.766,
      "step": 16740
    },
    {
      "epoch": 4.706295671725688,
      "grad_norm": 0.5214533805847168,
      "learning_rate": 2.094803492038533e-06,
      "loss": 0.75,
      "step": 16745
    },
    {
      "epoch": 4.70770095559303,
      "grad_norm": 0.5205959677696228,
      "learning_rate": 2.074874928165871e-06,
      "loss": 0.7511,
      "step": 16750
    },
    {
      "epoch": 4.709106239460371,
      "grad_norm": 0.5414964556694031,
      "learning_rate": 2.055040617674464e-06,
      "loss": 0.7702,
      "step": 16755
    },
    {
      "epoch": 4.710511523327712,
      "grad_norm": 0.5163237452507019,
      "learning_rate": 2.035300579654931e-06,
      "loss": 0.7565,
      "step": 16760
    },
    {
      "epoch": 4.7119168071950535,
      "grad_norm": 0.5173978805541992,
      "learning_rate": 2.015654833107161e-06,
      "loss": 0.7656,
      "step": 16765
    },
    {
      "epoch": 4.713322091062395,
      "grad_norm": 0.5129281282424927,
      "learning_rate": 1.996103396940252e-06,
      "loss": 0.752,
      "step": 16770
    },
    {
      "epoch": 4.714727374929736,
      "grad_norm": 0.5729324221611023,
      "learning_rate": 1.97664628997255e-06,
      "loss": 0.7571,
      "step": 16775
    },
    {
      "epoch": 4.716132658797077,
      "grad_norm": 0.5637750625610352,
      "learning_rate": 1.957283530931631e-06,
      "loss": 0.7708,
      "step": 16780
    },
    {
      "epoch": 4.717537942664418,
      "grad_norm": 0.5214433073997498,
      "learning_rate": 1.938015138454219e-06,
      "loss": 0.7593,
      "step": 16785
    },
    {
      "epoch": 4.71894322653176,
      "grad_norm": 0.5149796605110168,
      "learning_rate": 1.9188411310862466e-06,
      "loss": 0.7578,
      "step": 16790
    },
    {
      "epoch": 4.7203485103991,
      "grad_norm": 0.5418787002563477,
      "learning_rate": 1.8997615272827617e-06,
      "loss": 0.7822,
      "step": 16795
    },
    {
      "epoch": 4.721753794266442,
      "grad_norm": 0.49819424748420715,
      "learning_rate": 1.8807763454079975e-06,
      "loss": 0.7682,
      "step": 16800
    },
    {
      "epoch": 4.723159078133783,
      "grad_norm": 0.5212000012397766,
      "learning_rate": 1.8618856037352584e-06,
      "loss": 0.769,
      "step": 16805
    },
    {
      "epoch": 4.724564362001124,
      "grad_norm": 0.5406200289726257,
      "learning_rate": 1.843089320446978e-06,
      "loss": 0.7569,
      "step": 16810
    },
    {
      "epoch": 4.7259696458684655,
      "grad_norm": 0.5158624649047852,
      "learning_rate": 1.8243875136346623e-06,
      "loss": 0.7518,
      "step": 16815
    },
    {
      "epoch": 4.727374929735807,
      "grad_norm": 0.532293975353241,
      "learning_rate": 1.80578020129889e-06,
      "loss": 0.7533,
      "step": 16820
    },
    {
      "epoch": 4.728780213603148,
      "grad_norm": 0.5103762745857239,
      "learning_rate": 1.7872674013492796e-06,
      "loss": 0.7643,
      "step": 16825
    },
    {
      "epoch": 4.730185497470489,
      "grad_norm": 0.5502645969390869,
      "learning_rate": 1.7688491316044776e-06,
      "loss": 0.7552,
      "step": 16830
    },
    {
      "epoch": 4.731590781337831,
      "grad_norm": 0.4943491816520691,
      "learning_rate": 1.7505254097921807e-06,
      "loss": 0.7571,
      "step": 16835
    },
    {
      "epoch": 4.732996065205171,
      "grad_norm": 0.5176577568054199,
      "learning_rate": 1.7322962535490262e-06,
      "loss": 0.7556,
      "step": 16840
    },
    {
      "epoch": 4.734401349072512,
      "grad_norm": 0.534931480884552,
      "learning_rate": 1.7141616804206784e-06,
      "loss": 0.7638,
      "step": 16845
    },
    {
      "epoch": 4.735806632939854,
      "grad_norm": 0.530157744884491,
      "learning_rate": 1.696121707861731e-06,
      "loss": 0.7522,
      "step": 16850
    },
    {
      "epoch": 4.737211916807195,
      "grad_norm": 0.49656274914741516,
      "learning_rate": 1.6781763532357498e-06,
      "loss": 0.7595,
      "step": 16855
    },
    {
      "epoch": 4.738617200674536,
      "grad_norm": 0.5944429636001587,
      "learning_rate": 1.6603256338152295e-06,
      "loss": 0.7526,
      "step": 16860
    },
    {
      "epoch": 4.7400224845418775,
      "grad_norm": 0.5462916493415833,
      "learning_rate": 1.642569566781549e-06,
      "loss": 0.7656,
      "step": 16865
    },
    {
      "epoch": 4.741427768409219,
      "grad_norm": 0.5489064455032349,
      "learning_rate": 1.6249081692250257e-06,
      "loss": 0.7569,
      "step": 16870
    },
    {
      "epoch": 4.74283305227656,
      "grad_norm": 0.5147420763969421,
      "learning_rate": 1.6073414581448288e-06,
      "loss": 0.7659,
      "step": 16875
    },
    {
      "epoch": 4.744238336143901,
      "grad_norm": 0.5433098673820496,
      "learning_rate": 1.5898694504489776e-06,
      "loss": 0.7681,
      "step": 16880
    },
    {
      "epoch": 4.745643620011243,
      "grad_norm": 0.5174381732940674,
      "learning_rate": 1.5724921629543977e-06,
      "loss": 0.7943,
      "step": 16885
    },
    {
      "epoch": 4.747048903878584,
      "grad_norm": 0.6421141624450684,
      "learning_rate": 1.5552096123867655e-06,
      "loss": 0.7744,
      "step": 16890
    },
    {
      "epoch": 4.748454187745924,
      "grad_norm": 0.5340020656585693,
      "learning_rate": 1.5380218153806526e-06,
      "loss": 0.7633,
      "step": 16895
    },
    {
      "epoch": 4.749859471613266,
      "grad_norm": 0.536148190498352,
      "learning_rate": 1.5209287884793588e-06,
      "loss": 0.758,
      "step": 16900
    },
    {
      "epoch": 4.751264755480607,
      "grad_norm": 0.49202027916908264,
      "learning_rate": 1.503930548135024e-06,
      "loss": 0.7628,
      "step": 16905
    },
    {
      "epoch": 4.752670039347948,
      "grad_norm": 0.5275793671607971,
      "learning_rate": 1.4870271107085388e-06,
      "loss": 0.7621,
      "step": 16910
    },
    {
      "epoch": 4.7540753232152895,
      "grad_norm": 0.5963732004165649,
      "learning_rate": 1.4702184924695107e-06,
      "loss": 0.7658,
      "step": 16915
    },
    {
      "epoch": 4.755480607082631,
      "grad_norm": 0.5299362540245056,
      "learning_rate": 1.4535047095963427e-06,
      "loss": 0.8079,
      "step": 16920
    },
    {
      "epoch": 4.756885890949972,
      "grad_norm": 0.5836244821548462,
      "learning_rate": 1.4368857781761003e-06,
      "loss": 0.7688,
      "step": 16925
    },
    {
      "epoch": 4.758291174817313,
      "grad_norm": 0.5270640254020691,
      "learning_rate": 1.420361714204599e-06,
      "loss": 0.7769,
      "step": 16930
    },
    {
      "epoch": 4.759696458684655,
      "grad_norm": 0.5261182188987732,
      "learning_rate": 1.4039325335863162e-06,
      "loss": 0.7707,
      "step": 16935
    },
    {
      "epoch": 4.761101742551995,
      "grad_norm": 0.5504453182220459,
      "learning_rate": 1.3875982521344145e-06,
      "loss": 0.7674,
      "step": 16940
    },
    {
      "epoch": 4.762507026419336,
      "grad_norm": 0.5098614692687988,
      "learning_rate": 1.3713588855707282e-06,
      "loss": 0.7963,
      "step": 16945
    },
    {
      "epoch": 4.763912310286678,
      "grad_norm": 0.5301799178123474,
      "learning_rate": 1.355214449525699e-06,
      "loss": 0.7508,
      "step": 16950
    },
    {
      "epoch": 4.765317594154019,
      "grad_norm": 0.5289101600646973,
      "learning_rate": 1.3391649595384303e-06,
      "loss": 0.7491,
      "step": 16955
    },
    {
      "epoch": 4.76672287802136,
      "grad_norm": 0.5116060376167297,
      "learning_rate": 1.323210431056643e-06,
      "loss": 0.7704,
      "step": 16960
    },
    {
      "epoch": 4.7681281618887015,
      "grad_norm": 0.5461031198501587,
      "learning_rate": 1.3073508794366306e-06,
      "loss": 0.7646,
      "step": 16965
    },
    {
      "epoch": 4.769533445756043,
      "grad_norm": 0.550803542137146,
      "learning_rate": 1.291586319943283e-06,
      "loss": 0.7597,
      "step": 16970
    },
    {
      "epoch": 4.770938729623384,
      "grad_norm": 0.5380566120147705,
      "learning_rate": 1.2759167677500738e-06,
      "loss": 0.8017,
      "step": 16975
    },
    {
      "epoch": 4.772344013490725,
      "grad_norm": 0.5564086437225342,
      "learning_rate": 1.260342237938994e-06,
      "loss": 0.7696,
      "step": 16980
    },
    {
      "epoch": 4.773749297358067,
      "grad_norm": 0.543734610080719,
      "learning_rate": 1.244862745500619e-06,
      "loss": 0.851,
      "step": 16985
    },
    {
      "epoch": 4.775154581225408,
      "grad_norm": 0.5428884625434875,
      "learning_rate": 1.2294783053340419e-06,
      "loss": 0.7553,
      "step": 16990
    },
    {
      "epoch": 4.776559865092748,
      "grad_norm": 0.5188602209091187,
      "learning_rate": 1.2141889322468293e-06,
      "loss": 0.7577,
      "step": 16995
    },
    {
      "epoch": 4.77796514896009,
      "grad_norm": 0.5654597282409668,
      "learning_rate": 1.1989946409550867e-06,
      "loss": 0.7672,
      "step": 17000
    },
    {
      "epoch": 4.779370432827431,
      "grad_norm": 0.5196244716644287,
      "learning_rate": 1.1838954460833828e-06,
      "loss": 0.7977,
      "step": 17005
    },
    {
      "epoch": 4.780775716694772,
      "grad_norm": 0.4975441098213196,
      "learning_rate": 1.16889136216477e-06,
      "loss": 0.7551,
      "step": 17010
    },
    {
      "epoch": 4.7821810005621135,
      "grad_norm": 0.553006649017334,
      "learning_rate": 1.1539824036407522e-06,
      "loss": 0.7525,
      "step": 17015
    },
    {
      "epoch": 4.783586284429455,
      "grad_norm": 0.5071210861206055,
      "learning_rate": 1.13916858486125e-06,
      "loss": 0.766,
      "step": 17020
    },
    {
      "epoch": 4.784991568296796,
      "grad_norm": 0.49615663290023804,
      "learning_rate": 1.1244499200846582e-06,
      "loss": 0.7962,
      "step": 17025
    },
    {
      "epoch": 4.786396852164137,
      "grad_norm": 0.5509187579154968,
      "learning_rate": 1.1098264234777446e-06,
      "loss": 0.7916,
      "step": 17030
    },
    {
      "epoch": 4.787802136031479,
      "grad_norm": 0.5368015766143799,
      "learning_rate": 1.0952981091156833e-06,
      "loss": 0.7638,
      "step": 17035
    },
    {
      "epoch": 4.78920741989882,
      "grad_norm": 0.5518297553062439,
      "learning_rate": 1.0808649909820557e-06,
      "loss": 0.7621,
      "step": 17040
    },
    {
      "epoch": 4.790612703766161,
      "grad_norm": 0.56369948387146,
      "learning_rate": 1.0665270829688046e-06,
      "loss": 0.7639,
      "step": 17045
    },
    {
      "epoch": 4.792017987633502,
      "grad_norm": 0.5085920095443726,
      "learning_rate": 1.0522843988762355e-06,
      "loss": 0.7602,
      "step": 17050
    },
    {
      "epoch": 4.793423271500843,
      "grad_norm": 0.5725511312484741,
      "learning_rate": 1.0381369524129714e-06,
      "loss": 0.7746,
      "step": 17055
    },
    {
      "epoch": 4.794828555368184,
      "grad_norm": 0.5052986145019531,
      "learning_rate": 1.0240847571960199e-06,
      "loss": 0.7486,
      "step": 17060
    },
    {
      "epoch": 4.7962338392355255,
      "grad_norm": 0.5754660964012146,
      "learning_rate": 1.0101278267506842e-06,
      "loss": 0.7621,
      "step": 17065
    },
    {
      "epoch": 4.797639123102867,
      "grad_norm": 0.5558141469955444,
      "learning_rate": 9.962661745105517e-07,
      "loss": 0.7641,
      "step": 17070
    },
    {
      "epoch": 4.799044406970208,
      "grad_norm": 0.6065548658370972,
      "learning_rate": 9.8249981381755e-07,
      "loss": 0.7843,
      "step": 17075
    },
    {
      "epoch": 4.800449690837549,
      "grad_norm": 0.5406213998794556,
      "learning_rate": 9.688287579218581e-07,
      "loss": 0.7666,
      "step": 17080
    },
    {
      "epoch": 4.801854974704891,
      "grad_norm": 0.5441781878471375,
      "learning_rate": 9.552530199819165e-07,
      "loss": 0.7665,
      "step": 17085
    },
    {
      "epoch": 4.803260258572232,
      "grad_norm": 0.5460792779922485,
      "learning_rate": 9.41772613064451e-07,
      "loss": 0.7691,
      "step": 17090
    },
    {
      "epoch": 4.804665542439572,
      "grad_norm": 0.5391635894775391,
      "learning_rate": 9.283875501444162e-07,
      "loss": 0.7603,
      "step": 17095
    },
    {
      "epoch": 4.806070826306914,
      "grad_norm": 0.5086688995361328,
      "learning_rate": 9.15097844104984e-07,
      "loss": 0.7548,
      "step": 17100
    },
    {
      "epoch": 4.807476110174255,
      "grad_norm": 0.5172122716903687,
      "learning_rate": 9.019035077375448e-07,
      "loss": 0.7606,
      "step": 17105
    },
    {
      "epoch": 4.808881394041596,
      "grad_norm": 0.5639137625694275,
      "learning_rate": 8.888045537417399e-07,
      "loss": 0.7713,
      "step": 17110
    },
    {
      "epoch": 4.8102866779089375,
      "grad_norm": 0.5356950163841248,
      "learning_rate": 8.75800994725362e-07,
      "loss": 0.7661,
      "step": 17115
    },
    {
      "epoch": 4.811691961776279,
      "grad_norm": 0.5866556167602539,
      "learning_rate": 8.628928432043881e-07,
      "loss": 0.7602,
      "step": 17120
    },
    {
      "epoch": 4.81309724564362,
      "grad_norm": 0.5141949653625488,
      "learning_rate": 8.5008011160298e-07,
      "loss": 0.7577,
      "step": 17125
    },
    {
      "epoch": 4.814502529510961,
      "grad_norm": 0.5439788699150085,
      "learning_rate": 8.373628122534505e-07,
      "loss": 0.7715,
      "step": 17130
    },
    {
      "epoch": 4.815907813378303,
      "grad_norm": 0.5305047035217285,
      "learning_rate": 8.247409573962527e-07,
      "loss": 0.7718,
      "step": 17135
    },
    {
      "epoch": 4.817313097245644,
      "grad_norm": 0.565777063369751,
      "learning_rate": 8.122145591799801e-07,
      "loss": 0.7725,
      "step": 17140
    },
    {
      "epoch": 4.818718381112985,
      "grad_norm": 0.5218404531478882,
      "learning_rate": 7.997836296613437e-07,
      "loss": 0.7636,
      "step": 17145
    },
    {
      "epoch": 4.820123664980326,
      "grad_norm": 0.552274763584137,
      "learning_rate": 7.874481808051836e-07,
      "loss": 0.7678,
      "step": 17150
    },
    {
      "epoch": 4.821528948847667,
      "grad_norm": 0.524766206741333,
      "learning_rate": 7.75208224484425e-07,
      "loss": 0.7554,
      "step": 17155
    },
    {
      "epoch": 4.822934232715008,
      "grad_norm": 0.5318744778633118,
      "learning_rate": 7.630637724800771e-07,
      "loss": 0.7619,
      "step": 17160
    },
    {
      "epoch": 4.8243395165823495,
      "grad_norm": 0.5492753982543945,
      "learning_rate": 7.51014836481223e-07,
      "loss": 0.8131,
      "step": 17165
    },
    {
      "epoch": 4.825744800449691,
      "grad_norm": 0.5176735520362854,
      "learning_rate": 7.390614280850306e-07,
      "loss": 0.7591,
      "step": 17170
    },
    {
      "epoch": 4.827150084317032,
      "grad_norm": 0.554851233959198,
      "learning_rate": 7.272035587967074e-07,
      "loss": 0.7583,
      "step": 17175
    },
    {
      "epoch": 4.828555368184373,
      "grad_norm": 0.6030601263046265,
      "learning_rate": 7.154412400294908e-07,
      "loss": 0.7663,
      "step": 17180
    },
    {
      "epoch": 4.829960652051715,
      "grad_norm": 0.5561255216598511,
      "learning_rate": 7.037744831047022e-07,
      "loss": 0.7515,
      "step": 17185
    },
    {
      "epoch": 4.831365935919056,
      "grad_norm": 0.5055898427963257,
      "learning_rate": 6.922032992516148e-07,
      "loss": 0.7699,
      "step": 17190
    },
    {
      "epoch": 4.832771219786397,
      "grad_norm": 0.5240102410316467,
      "learning_rate": 6.807276996075529e-07,
      "loss": 0.7639,
      "step": 17195
    },
    {
      "epoch": 4.8341765036537385,
      "grad_norm": 0.530353844165802,
      "learning_rate": 6.693476952178479e-07,
      "loss": 0.7516,
      "step": 17200
    },
    {
      "epoch": 4.835581787521079,
      "grad_norm": 0.521259605884552,
      "learning_rate": 6.580632970357937e-07,
      "loss": 0.7587,
      "step": 17205
    },
    {
      "epoch": 4.83698707138842,
      "grad_norm": 0.5423803329467773,
      "learning_rate": 6.468745159226796e-07,
      "loss": 0.7535,
      "step": 17210
    },
    {
      "epoch": 4.8383923552557615,
      "grad_norm": 0.5381777286529541,
      "learning_rate": 6.357813626477471e-07,
      "loss": 0.7516,
      "step": 17215
    },
    {
      "epoch": 4.839797639123103,
      "grad_norm": 0.5275468826293945,
      "learning_rate": 6.247838478882328e-07,
      "loss": 0.7552,
      "step": 17220
    },
    {
      "epoch": 4.841202922990444,
      "grad_norm": 0.49720627069473267,
      "learning_rate": 6.138819822292807e-07,
      "loss": 0.767,
      "step": 17225
    },
    {
      "epoch": 4.842608206857785,
      "grad_norm": 0.564487636089325,
      "learning_rate": 6.030757761639749e-07,
      "loss": 0.7639,
      "step": 17230
    },
    {
      "epoch": 4.844013490725127,
      "grad_norm": 0.5002022385597229,
      "learning_rate": 5.92365240093351e-07,
      "loss": 0.7498,
      "step": 17235
    },
    {
      "epoch": 4.845418774592468,
      "grad_norm": 0.5189710855484009,
      "learning_rate": 5.817503843263516e-07,
      "loss": 0.7557,
      "step": 17240
    },
    {
      "epoch": 4.846824058459809,
      "grad_norm": 0.5181299448013306,
      "learning_rate": 5.712312190798264e-07,
      "loss": 0.7734,
      "step": 17245
    },
    {
      "epoch": 4.84822934232715,
      "grad_norm": 0.5273287892341614,
      "learning_rate": 5.608077544784984e-07,
      "loss": 0.7593,
      "step": 17250
    },
    {
      "epoch": 4.849634626194491,
      "grad_norm": 0.5158643126487732,
      "learning_rate": 5.504800005550204e-07,
      "loss": 0.765,
      "step": 17255
    },
    {
      "epoch": 4.851039910061832,
      "grad_norm": 0.5436376929283142,
      "learning_rate": 5.402479672498961e-07,
      "loss": 0.7596,
      "step": 17260
    },
    {
      "epoch": 4.8524451939291735,
      "grad_norm": 0.5546701550483704,
      "learning_rate": 5.301116644115034e-07,
      "loss": 0.7546,
      "step": 17265
    },
    {
      "epoch": 4.853850477796515,
      "grad_norm": 0.5897644758224487,
      "learning_rate": 5.200711017960824e-07,
      "loss": 0.7785,
      "step": 17270
    },
    {
      "epoch": 4.855255761663856,
      "grad_norm": 0.5387979745864868,
      "learning_rate": 5.10126289067725e-07,
      "loss": 0.7668,
      "step": 17275
    },
    {
      "epoch": 4.856661045531197,
      "grad_norm": 0.5134298801422119,
      "learning_rate": 5.002772357983521e-07,
      "loss": 0.7686,
      "step": 17280
    },
    {
      "epoch": 4.858066329398539,
      "grad_norm": 0.5166084170341492,
      "learning_rate": 4.905239514677251e-07,
      "loss": 0.7672,
      "step": 17285
    },
    {
      "epoch": 4.85947161326588,
      "grad_norm": 0.5076904296875,
      "learning_rate": 4.808664454634238e-07,
      "loss": 0.7592,
      "step": 17290
    },
    {
      "epoch": 4.860876897133221,
      "grad_norm": 0.5120134353637695,
      "learning_rate": 4.7130472708084573e-07,
      "loss": 0.7596,
      "step": 17295
    },
    {
      "epoch": 4.8622821810005625,
      "grad_norm": 0.5964426398277283,
      "learning_rate": 4.618388055231848e-07,
      "loss": 0.7614,
      "step": 17300
    },
    {
      "epoch": 4.863687464867903,
      "grad_norm": 0.5426851511001587,
      "learning_rate": 4.5246868990143076e-07,
      "loss": 0.8021,
      "step": 17305
    },
    {
      "epoch": 4.865092748735244,
      "grad_norm": 0.5391839742660522,
      "learning_rate": 4.431943892343693e-07,
      "loss": 0.7574,
      "step": 17310
    },
    {
      "epoch": 4.8664980326025855,
      "grad_norm": 0.5277245044708252,
      "learning_rate": 4.340159124485488e-07,
      "loss": 0.836,
      "step": 17315
    },
    {
      "epoch": 4.867903316469927,
      "grad_norm": 0.5153777003288269,
      "learning_rate": 4.249332683783025e-07,
      "loss": 0.7579,
      "step": 17320
    },
    {
      "epoch": 4.869308600337268,
      "grad_norm": 0.5122922658920288,
      "learning_rate": 4.1594646576572636e-07,
      "loss": 0.7648,
      "step": 17325
    },
    {
      "epoch": 4.870713884204609,
      "grad_norm": 0.5131497979164124,
      "learning_rate": 4.070555132606346e-07,
      "loss": 0.7643,
      "step": 17330
    },
    {
      "epoch": 4.872119168071951,
      "grad_norm": 0.5253489017486572,
      "learning_rate": 3.982604194206263e-07,
      "loss": 0.7559,
      "step": 17335
    },
    {
      "epoch": 4.873524451939292,
      "grad_norm": 0.5008916258811951,
      "learning_rate": 3.8956119271101876e-07,
      "loss": 0.7966,
      "step": 17340
    },
    {
      "epoch": 4.874929735806633,
      "grad_norm": 0.5834038853645325,
      "learning_rate": 3.809578415048587e-07,
      "loss": 0.7749,
      "step": 17345
    },
    {
      "epoch": 4.876335019673974,
      "grad_norm": 0.5124272108078003,
      "learning_rate": 3.7245037408291104e-07,
      "loss": 0.7509,
      "step": 17350
    },
    {
      "epoch": 4.877740303541316,
      "grad_norm": 0.5225289463996887,
      "learning_rate": 3.6403879863363687e-07,
      "loss": 0.7534,
      "step": 17355
    },
    {
      "epoch": 4.879145587408656,
      "grad_norm": 0.5116289854049683,
      "learning_rate": 3.5572312325323766e-07,
      "loss": 0.7674,
      "step": 17360
    },
    {
      "epoch": 4.8805508712759975,
      "grad_norm": 0.5568757653236389,
      "learning_rate": 3.475033559455665e-07,
      "loss": 0.7473,
      "step": 17365
    },
    {
      "epoch": 4.881956155143339,
      "grad_norm": 0.5169786810874939,
      "learning_rate": 3.393795046222059e-07,
      "loss": 0.7732,
      "step": 17370
    },
    {
      "epoch": 4.88336143901068,
      "grad_norm": 0.5193935036659241,
      "learning_rate": 3.3135157710240116e-07,
      "loss": 0.7599,
      "step": 17375
    },
    {
      "epoch": 4.884766722878021,
      "grad_norm": 0.49602463841438293,
      "learning_rate": 3.2341958111306025e-07,
      "loss": 0.7547,
      "step": 17380
    },
    {
      "epoch": 4.886172006745363,
      "grad_norm": 0.518610954284668,
      "learning_rate": 3.15583524288765e-07,
      "loss": 0.7663,
      "step": 17385
    },
    {
      "epoch": 4.887577290612704,
      "grad_norm": 0.4968721568584442,
      "learning_rate": 3.07843414171749e-07,
      "loss": 0.7559,
      "step": 17390
    },
    {
      "epoch": 4.888982574480045,
      "grad_norm": 0.541677713394165,
      "learning_rate": 3.001992582119306e-07,
      "loss": 0.7627,
      "step": 17395
    },
    {
      "epoch": 4.8903878583473865,
      "grad_norm": 0.5368949770927429,
      "learning_rate": 2.926510637668134e-07,
      "loss": 0.7738,
      "step": 17400
    },
    {
      "epoch": 4.891793142214727,
      "grad_norm": 0.5182912945747375,
      "learning_rate": 2.851988381015858e-07,
      "loss": 0.7614,
      "step": 17405
    },
    {
      "epoch": 4.893198426082068,
      "grad_norm": 0.5060423612594604,
      "learning_rate": 2.7784258838905454e-07,
      "loss": 0.755,
      "step": 17410
    },
    {
      "epoch": 4.8946037099494095,
      "grad_norm": 0.4969828724861145,
      "learning_rate": 2.705823217096226e-07,
      "loss": 0.7437,
      "step": 17415
    },
    {
      "epoch": 4.896008993816751,
      "grad_norm": 0.5251031517982483,
      "learning_rate": 2.634180450513446e-07,
      "loss": 0.7609,
      "step": 17420
    },
    {
      "epoch": 4.897414277684092,
      "grad_norm": 0.5827402472496033,
      "learning_rate": 2.5634976530988233e-07,
      "loss": 0.7557,
      "step": 17425
    },
    {
      "epoch": 4.898819561551433,
      "grad_norm": 0.5410636067390442,
      "learning_rate": 2.493774892884604e-07,
      "loss": 0.7615,
      "step": 17430
    },
    {
      "epoch": 4.900224845418775,
      "grad_norm": 0.5140436887741089,
      "learning_rate": 2.4250122369794403e-07,
      "loss": 0.7606,
      "step": 17435
    },
    {
      "epoch": 4.901630129286116,
      "grad_norm": 0.5037211775779724,
      "learning_rate": 2.3572097515676127e-07,
      "loss": 0.7665,
      "step": 17440
    },
    {
      "epoch": 4.903035413153457,
      "grad_norm": 0.5306110978126526,
      "learning_rate": 2.290367501909363e-07,
      "loss": 0.7716,
      "step": 17445
    },
    {
      "epoch": 4.9044406970207985,
      "grad_norm": 0.5421374440193176,
      "learning_rate": 2.2244855523406716e-07,
      "loss": 0.7621,
      "step": 17450
    },
    {
      "epoch": 4.90584598088814,
      "grad_norm": 0.5292686223983765,
      "learning_rate": 2.1595639662732593e-07,
      "loss": 0.758,
      "step": 17455
    },
    {
      "epoch": 4.90725126475548,
      "grad_norm": 0.530255138874054,
      "learning_rate": 2.0956028061942523e-07,
      "loss": 0.7678,
      "step": 17460
    },
    {
      "epoch": 4.9086565486228215,
      "grad_norm": 0.4856848418712616,
      "learning_rate": 2.03260213366685e-07,
      "loss": 0.7534,
      "step": 17465
    },
    {
      "epoch": 4.910061832490163,
      "grad_norm": 0.5324820876121521,
      "learning_rate": 1.970562009329324e-07,
      "loss": 0.7562,
      "step": 17470
    },
    {
      "epoch": 4.911467116357504,
      "grad_norm": 0.5274339318275452,
      "learning_rate": 1.9094824928954646e-07,
      "loss": 0.7672,
      "step": 17475
    },
    {
      "epoch": 4.912872400224845,
      "grad_norm": 0.5117448568344116,
      "learning_rate": 1.849363643154911e-07,
      "loss": 0.7625,
      "step": 17480
    },
    {
      "epoch": 4.914277684092187,
      "grad_norm": 0.4951476454734802,
      "learning_rate": 1.7902055179720434e-07,
      "loss": 0.7583,
      "step": 17485
    },
    {
      "epoch": 4.915682967959528,
      "grad_norm": 0.6083093285560608,
      "learning_rate": 1.7320081742869808e-07,
      "loss": 0.7563,
      "step": 17490
    },
    {
      "epoch": 4.917088251826869,
      "grad_norm": 0.504711389541626,
      "learning_rate": 1.6747716681148052e-07,
      "loss": 0.7667,
      "step": 17495
    },
    {
      "epoch": 4.9184935356942105,
      "grad_norm": 0.5517157316207886,
      "learning_rate": 1.6184960545460037e-07,
      "loss": 0.7644,
      "step": 17500
    },
    {
      "epoch": 4.919898819561551,
      "grad_norm": 0.5390472412109375,
      "learning_rate": 1.5631813877461376e-07,
      "loss": 0.7691,
      "step": 17505
    },
    {
      "epoch": 4.921304103428893,
      "grad_norm": 0.5268747210502625,
      "learning_rate": 1.50882772095573e-07,
      "loss": 0.7622,
      "step": 17510
    },
    {
      "epoch": 4.9227093872962335,
      "grad_norm": 0.5161256790161133,
      "learning_rate": 1.455435106490488e-07,
      "loss": 0.7602,
      "step": 17515
    },
    {
      "epoch": 4.924114671163575,
      "grad_norm": 0.5164275169372559,
      "learning_rate": 1.4030035957410814e-07,
      "loss": 0.7576,
      "step": 17520
    },
    {
      "epoch": 4.925519955030916,
      "grad_norm": 0.5255956649780273,
      "learning_rate": 1.3515332391730306e-07,
      "loss": 0.7661,
      "step": 17525
    },
    {
      "epoch": 4.926925238898257,
      "grad_norm": 0.527873694896698,
      "learning_rate": 1.3010240863268187e-07,
      "loss": 0.7529,
      "step": 17530
    },
    {
      "epoch": 4.928330522765599,
      "grad_norm": 0.5253483653068542,
      "learning_rate": 1.2514761858177793e-07,
      "loss": 0.7727,
      "step": 17535
    },
    {
      "epoch": 4.92973580663294,
      "grad_norm": 0.5246861577033997,
      "learning_rate": 1.2028895853358756e-07,
      "loss": 0.763,
      "step": 17540
    },
    {
      "epoch": 4.931141090500281,
      "grad_norm": 0.5083796977996826,
      "learning_rate": 1.1552643316459222e-07,
      "loss": 0.7606,
      "step": 17545
    },
    {
      "epoch": 4.9325463743676226,
      "grad_norm": 0.49852460622787476,
      "learning_rate": 1.1086004705875841e-07,
      "loss": 0.755,
      "step": 17550
    },
    {
      "epoch": 4.933951658234964,
      "grad_norm": 0.5587095022201538,
      "learning_rate": 1.0628980470750449e-07,
      "loss": 0.7587,
      "step": 17555
    },
    {
      "epoch": 4.935356942102304,
      "grad_norm": 0.5445096492767334,
      "learning_rate": 1.0181571050968953e-07,
      "loss": 0.764,
      "step": 17560
    },
    {
      "epoch": 4.9367622259696455,
      "grad_norm": 0.5117522478103638,
      "learning_rate": 9.743776877166877e-08,
      "loss": 0.7614,
      "step": 17565
    },
    {
      "epoch": 4.938167509836987,
      "grad_norm": 0.5934692025184631,
      "learning_rate": 9.315598370722711e-08,
      "loss": 0.8096,
      "step": 17570
    },
    {
      "epoch": 4.939572793704328,
      "grad_norm": 0.5090463161468506,
      "learning_rate": 8.897035943760124e-08,
      "loss": 0.7632,
      "step": 17575
    },
    {
      "epoch": 4.940978077571669,
      "grad_norm": 0.5313629508018494,
      "learning_rate": 8.488089999146854e-08,
      "loss": 0.7703,
      "step": 17580
    },
    {
      "epoch": 4.942383361439011,
      "grad_norm": 0.5156370401382446,
      "learning_rate": 8.08876093049582e-08,
      "loss": 0.7559,
      "step": 17585
    },
    {
      "epoch": 4.943788645306352,
      "grad_norm": 0.505222737789154,
      "learning_rate": 7.699049122162904e-08,
      "loss": 0.7479,
      "step": 17590
    },
    {
      "epoch": 4.945193929173693,
      "grad_norm": 0.5333053469657898,
      "learning_rate": 7.318954949248059e-08,
      "loss": 0.764,
      "step": 17595
    },
    {
      "epoch": 4.946599213041035,
      "grad_norm": 0.5412734150886536,
      "learning_rate": 6.948478777591971e-08,
      "loss": 0.7544,
      "step": 17600
    },
    {
      "epoch": 4.948004496908376,
      "grad_norm": 0.5213012099266052,
      "learning_rate": 6.587620963781626e-08,
      "loss": 0.7571,
      "step": 17605
    },
    {
      "epoch": 4.949409780775717,
      "grad_norm": 0.514471173286438,
      "learning_rate": 6.236381855143636e-08,
      "loss": 0.7522,
      "step": 17610
    },
    {
      "epoch": 4.9508150646430575,
      "grad_norm": 0.5372034907341003,
      "learning_rate": 5.8947617897464614e-08,
      "loss": 0.7646,
      "step": 17615
    },
    {
      "epoch": 4.952220348510399,
      "grad_norm": 0.5589454174041748,
      "learning_rate": 5.562761096402636e-08,
      "loss": 0.7702,
      "step": 17620
    },
    {
      "epoch": 4.95362563237774,
      "grad_norm": 0.4952004551887512,
      "learning_rate": 5.2403800946621006e-08,
      "loss": 0.7715,
      "step": 17625
    },
    {
      "epoch": 4.955030916245081,
      "grad_norm": 0.5731898546218872,
      "learning_rate": 4.9276190948199795e-08,
      "loss": 0.7667,
      "step": 17630
    },
    {
      "epoch": 4.956436200112423,
      "grad_norm": 0.5195038318634033,
      "learning_rate": 4.624478397909915e-08,
      "loss": 0.7522,
      "step": 17635
    },
    {
      "epoch": 4.957841483979764,
      "grad_norm": 0.5436112284660339,
      "learning_rate": 4.330958295705179e-08,
      "loss": 0.7555,
      "step": 17640
    },
    {
      "epoch": 4.959246767847105,
      "grad_norm": 0.5222539901733398,
      "learning_rate": 4.047059070720893e-08,
      "loss": 0.7617,
      "step": 17645
    },
    {
      "epoch": 4.960652051714447,
      "grad_norm": 0.5810261964797974,
      "learning_rate": 3.772780996211811e-08,
      "loss": 0.7628,
      "step": 17650
    },
    {
      "epoch": 4.962057335581788,
      "grad_norm": 0.49842745065689087,
      "learning_rate": 3.508124336170093e-08,
      "loss": 0.7526,
      "step": 17655
    },
    {
      "epoch": 4.963462619449128,
      "grad_norm": 0.5396468043327332,
      "learning_rate": 3.253089345331972e-08,
      "loss": 0.7686,
      "step": 17660
    },
    {
      "epoch": 4.9648679033164695,
      "grad_norm": 0.5041926503181458,
      "learning_rate": 3.007676269166648e-08,
      "loss": 0.7646,
      "step": 17665
    },
    {
      "epoch": 4.966273187183811,
      "grad_norm": 0.5142960548400879,
      "learning_rate": 2.7718853438873926e-08,
      "loss": 0.7689,
      "step": 17670
    },
    {
      "epoch": 4.967678471051152,
      "grad_norm": 0.5062931180000305,
      "learning_rate": 2.5457167964426653e-08,
      "loss": 0.7526,
      "step": 17675
    },
    {
      "epoch": 4.969083754918493,
      "grad_norm": 0.5020378232002258,
      "learning_rate": 2.3291708445216664e-08,
      "loss": 0.7528,
      "step": 17680
    },
    {
      "epoch": 4.970489038785835,
      "grad_norm": 0.6097744703292847,
      "learning_rate": 2.1222476965510052e-08,
      "loss": 0.7603,
      "step": 17685
    },
    {
      "epoch": 4.971894322653176,
      "grad_norm": 0.5038420557975769,
      "learning_rate": 1.9249475516947002e-08,
      "loss": 0.7533,
      "step": 17690
    },
    {
      "epoch": 4.973299606520517,
      "grad_norm": 0.5745223760604858,
      "learning_rate": 1.7372705998552896e-08,
      "loss": 0.7661,
      "step": 17695
    },
    {
      "epoch": 4.974704890387859,
      "grad_norm": 0.5366801023483276,
      "learning_rate": 1.5592170216716105e-08,
      "loss": 0.7602,
      "step": 17700
    },
    {
      "epoch": 4.9761101742552,
      "grad_norm": 0.5210517048835754,
      "learning_rate": 1.3907869885232405e-08,
      "loss": 0.7607,
      "step": 17705
    },
    {
      "epoch": 4.977515458122541,
      "grad_norm": 0.5279877781867981,
      "learning_rate": 1.2319806625227248e-08,
      "loss": 0.8017,
      "step": 17710
    },
    {
      "epoch": 4.9789207419898815,
      "grad_norm": 0.517043948173523,
      "learning_rate": 1.0827981965233492e-08,
      "loss": 0.777,
      "step": 17715
    },
    {
      "epoch": 4.980326025857223,
      "grad_norm": 0.527386486530304,
      "learning_rate": 9.432397341124777e-09,
      "loss": 0.7605,
      "step": 17720
    },
    {
      "epoch": 4.981731309724564,
      "grad_norm": 0.5199275016784668,
      "learning_rate": 8.133054096171044e-09,
      "loss": 0.7482,
      "step": 17725
    },
    {
      "epoch": 4.983136593591905,
      "grad_norm": 0.5189145803451538,
      "learning_rate": 6.9299534809941224e-09,
      "loss": 0.7552,
      "step": 17730
    },
    {
      "epoch": 4.984541877459247,
      "grad_norm": 0.5272063612937927,
      "learning_rate": 5.82309665357883e-09,
      "loss": 0.75,
      "step": 17735
    },
    {
      "epoch": 4.985947161326588,
      "grad_norm": 0.5504851937294006,
      "learning_rate": 4.81248467928408e-09,
      "loss": 0.7652,
      "step": 17740
    },
    {
      "epoch": 4.987352445193929,
      "grad_norm": 0.5174840688705444,
      "learning_rate": 3.898118530820671e-09,
      "loss": 0.7568,
      "step": 17745
    },
    {
      "epoch": 4.988757729061271,
      "grad_norm": 0.5700994729995728,
      "learning_rate": 3.0799990882734995e-09,
      "loss": 0.7614,
      "step": 17750
    },
    {
      "epoch": 4.990163012928612,
      "grad_norm": 0.507964015007019,
      "learning_rate": 2.3581271391015512e-09,
      "loss": 0.7574,
      "step": 17755
    },
    {
      "epoch": 4.991568296795953,
      "grad_norm": 0.5576549172401428,
      "learning_rate": 1.7325033780934973e-09,
      "loss": 0.7551,
      "step": 17760
    },
    {
      "epoch": 4.992973580663294,
      "grad_norm": 0.5367130041122437,
      "learning_rate": 1.2031284074121019e-09,
      "loss": 0.7534,
      "step": 17765
    },
    {
      "epoch": 4.994378864530635,
      "grad_norm": 0.5104110836982727,
      "learning_rate": 7.70002736594222e-10,
      "loss": 0.7582,
      "step": 17770
    },
    {
      "epoch": 4.995784148397976,
      "grad_norm": 0.5402113795280457,
      "learning_rate": 4.3312678251750115e-10,
      "loss": 0.7562,
      "step": 17775
    },
    {
      "epoch": 4.997189432265317,
      "grad_norm": 0.5511764287948608,
      "learning_rate": 1.9250086943367607e-10,
      "loss": 0.7579,
      "step": 17780
    },
    {
      "epoch": 4.998594716132659,
      "grad_norm": 0.5197888612747192,
      "learning_rate": 4.8125228935269606e-11,
      "loss": 0.7629,
      "step": 17785
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.5132002234458923,
      "learning_rate": 0.0,
      "loss": 0.7613,
      "step": 17790
    },
    {
      "epoch": 5.0,
      "eval_loss": 0.8528754711151123,
      "eval_runtime": 646.1588,
      "eval_samples_per_second": 6.96,
      "eval_steps_per_second": 0.58,
      "step": 17790
    },
    {
      "epoch": 5.0,
      "step": 17790,
      "total_flos": 2.1191156679948894e+19,
      "train_loss": 0.9175008542622121,
      "train_runtime": 172180.6812,
      "train_samples_per_second": 2.48,
      "train_steps_per_second": 0.103
    }
  ],
  "logging_steps": 5,
  "max_steps": 17790,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.1191156679948894e+19,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}