{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 8795,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "grad_norm": 0.6929430365562439,
      "learning_rate": 0.0001999998405083484,
      "loss": 1.6838,
      "step": 5
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5281903743743896,
      "learning_rate": 0.00019999936203390236,
      "loss": 1.4915,
      "step": 10
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6337839961051941,
      "learning_rate": 0.0001999985645781881,
      "loss": 1.2662,
      "step": 15
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6382477879524231,
      "learning_rate": 0.00019999744814374942,
      "loss": 1.1492,
      "step": 20
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6016966104507446,
      "learning_rate": 0.0001999960127341475,
      "loss": 1.189,
      "step": 25
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.48733243346214294,
      "learning_rate": 0.00019999425835396113,
      "loss": 1.2111,
      "step": 30
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4674977660179138,
      "learning_rate": 0.0001999921850087864,
      "loss": 1.0462,
      "step": 35
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.48517295718193054,
      "learning_rate": 0.00019998979270523704,
      "loss": 0.9653,
      "step": 40
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5423499345779419,
      "learning_rate": 0.000199987081450944,
      "loss": 1.0769,
      "step": 45
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5383784174919128,
      "learning_rate": 0.0001999840512545558,
      "loss": 1.0471,
      "step": 50
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.37535467743873596,
      "learning_rate": 0.00019998070212573824,
      "loss": 1.1045,
      "step": 55
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5334364175796509,
      "learning_rate": 0.00019997703407517443,
      "loss": 0.9794,
      "step": 60
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5327732563018799,
      "learning_rate": 0.0001999730471145649,
      "loss": 1.0664,
      "step": 65
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4103855788707733,
      "learning_rate": 0.0001999687412566274,
      "loss": 1.0529,
      "step": 70
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5592668652534485,
      "learning_rate": 0.00019996411651509684,
      "loss": 0.9661,
      "step": 75
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5078962445259094,
      "learning_rate": 0.0001999591729047254,
      "loss": 1.0836,
      "step": 80
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5035337209701538,
      "learning_rate": 0.0001999539104412824,
      "loss": 1.0011,
      "step": 85
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5704492330551147,
      "learning_rate": 0.00019994832914155416,
      "loss": 0.9957,
      "step": 90
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4775836169719696,
      "learning_rate": 0.00019994242902334416,
      "loss": 1.0125,
      "step": 95
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4531150460243225,
      "learning_rate": 0.00019993621010547277,
      "loss": 0.9085,
      "step": 100
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5194448232650757,
      "learning_rate": 0.00019992967240777727,
      "loss": 0.9871,
      "step": 105
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.44902467727661133,
      "learning_rate": 0.00019992281595111185,
      "loss": 0.912,
      "step": 110
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5913822650909424,
      "learning_rate": 0.00019991564075734744,
      "loss": 0.941,
      "step": 115
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.524722158908844,
      "learning_rate": 0.00019990814684937174,
      "loss": 1.0098,
      "step": 120
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6472988724708557,
      "learning_rate": 0.00019990033425108905,
      "loss": 0.9765,
      "step": 125
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5326634049415588,
      "learning_rate": 0.00019989220298742026,
      "loss": 0.9342,
      "step": 130
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5234887599945068,
      "learning_rate": 0.00019988375308430275,
      "loss": 0.9872,
      "step": 135
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.49476635456085205,
      "learning_rate": 0.00019987498456869025,
      "loss": 0.9218,
      "step": 140
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4328886568546295,
      "learning_rate": 0.00019986589746855295,
      "loss": 1.0531,
      "step": 145
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5224207639694214,
      "learning_rate": 0.0001998564918128771,
      "loss": 0.9544,
      "step": 150
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4767338037490845,
      "learning_rate": 0.0001998467676316652,
      "loss": 0.9498,
      "step": 155
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4608069658279419,
      "learning_rate": 0.00019983672495593578,
      "loss": 0.8847,
      "step": 160
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4687401056289673,
      "learning_rate": 0.00019982636381772327,
      "loss": 0.938,
      "step": 165
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5468807220458984,
      "learning_rate": 0.000199815684250078,
      "loss": 0.9296,
      "step": 170
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5462723970413208,
      "learning_rate": 0.00019980468628706604,
      "loss": 0.9583,
      "step": 175
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5237535834312439,
      "learning_rate": 0.00019979336996376893,
      "loss": 0.9683,
      "step": 180
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5200393199920654,
      "learning_rate": 0.000199781735316284,
      "loss": 0.9164,
      "step": 185
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.480398565530777,
      "learning_rate": 0.00019976978238172373,
      "loss": 0.9312,
      "step": 190
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.49409154057502747,
      "learning_rate": 0.000199757511198216,
      "loss": 0.9962,
      "step": 195
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5906654000282288,
      "learning_rate": 0.00019974492180490388,
      "loss": 0.9277,
      "step": 200
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5397025346755981,
      "learning_rate": 0.00019973201424194542,
      "loss": 0.8953,
      "step": 205
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5493289232254028,
      "learning_rate": 0.00019971878855051358,
      "loss": 0.9364,
      "step": 210
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5614472031593323,
      "learning_rate": 0.0001997052447727961,
      "loss": 0.9419,
      "step": 215
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5108090043067932,
      "learning_rate": 0.0001996913829519954,
      "loss": 0.9203,
      "step": 220
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5096657872200012,
      "learning_rate": 0.0001996772031323283,
      "loss": 0.8614,
      "step": 225
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.541383683681488,
      "learning_rate": 0.00019966270535902618,
      "loss": 0.9603,
      "step": 230
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5045916438102722,
      "learning_rate": 0.00019964788967833438,
      "loss": 0.9289,
      "step": 235
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5646150708198547,
      "learning_rate": 0.00019963275613751256,
      "loss": 0.8575,
      "step": 240
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.4511927664279938,
      "learning_rate": 0.0001996173047848341,
      "loss": 0.9126,
      "step": 245
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5300033688545227,
      "learning_rate": 0.0001996015356695863,
      "loss": 0.9346,
      "step": 250
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5646623969078064,
      "learning_rate": 0.00019958544884207,
      "loss": 0.8818,
      "step": 255
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.4728727638721466,
      "learning_rate": 0.00019956904435359943,
      "loss": 0.9771,
      "step": 260
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.49879685044288635,
      "learning_rate": 0.00019955232225650225,
      "loss": 0.8741,
      "step": 265
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.47209540009498596,
      "learning_rate": 0.00019953528260411912,
      "loss": 0.8946,
      "step": 270
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5027173161506653,
      "learning_rate": 0.00019951792545080369,
      "loss": 0.8586,
      "step": 275
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5433563590049744,
      "learning_rate": 0.00019950025085192232,
      "loss": 0.9146,
      "step": 280
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.493032306432724,
      "learning_rate": 0.00019948225886385414,
      "loss": 0.9083,
      "step": 285
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5513988733291626,
      "learning_rate": 0.00019946394954399054,
      "loss": 0.8455,
      "step": 290
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.588005542755127,
      "learning_rate": 0.00019944532295073516,
      "loss": 1.0188,
      "step": 295
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5429700016975403,
      "learning_rate": 0.00019942637914350378,
      "loss": 0.9102,
      "step": 300
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6686177849769592,
      "learning_rate": 0.00019940711818272394,
      "loss": 1.0241,
      "step": 305
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5708988308906555,
      "learning_rate": 0.00019938754012983488,
      "loss": 0.9757,
      "step": 310
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5050585865974426,
      "learning_rate": 0.0001993676450472874,
      "loss": 1.0048,
      "step": 315
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5606969594955444,
      "learning_rate": 0.00019934743299854338,
      "loss": 0.9737,
      "step": 320
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5190075635910034,
      "learning_rate": 0.00019932690404807598,
      "loss": 0.9677,
      "step": 325
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5396589040756226,
      "learning_rate": 0.00019930605826136904,
      "loss": 0.9119,
      "step": 330
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5063149333000183,
      "learning_rate": 0.0001992848957049172,
      "loss": 0.9627,
      "step": 335
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5704573392868042,
      "learning_rate": 0.00019926341644622544,
      "loss": 0.9782,
      "step": 340
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.542542040348053,
      "learning_rate": 0.00019924162055380903,
      "loss": 0.9253,
      "step": 345
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4711320102214813,
      "learning_rate": 0.00019921950809719324,
      "loss": 0.9039,
      "step": 350
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5174899697303772,
      "learning_rate": 0.00019919707914691311,
      "loss": 0.8753,
      "step": 355
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5371158719062805,
      "learning_rate": 0.0001991743337745132,
      "loss": 0.9531,
      "step": 360
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.49162808060646057,
      "learning_rate": 0.00019915127205254751,
      "loss": 0.9007,
      "step": 365
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5364812016487122,
      "learning_rate": 0.00019912789405457905,
      "loss": 0.8619,
      "step": 370
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5108892321586609,
      "learning_rate": 0.00019910419985517977,
      "loss": 0.9384,
      "step": 375
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.6169962286949158,
      "learning_rate": 0.00019908018952993016,
      "loss": 1.0248,
      "step": 380
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5570533275604248,
      "learning_rate": 0.00019905586315541917,
      "loss": 0.9526,
      "step": 385
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5810699462890625,
      "learning_rate": 0.00019903122080924387,
      "loss": 0.9722,
      "step": 390
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4718081057071686,
      "learning_rate": 0.00019900626257000922,
      "loss": 0.8801,
      "step": 395
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6052469611167908,
      "learning_rate": 0.00019898098851732786,
      "loss": 0.8618,
      "step": 400
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.491576611995697,
      "learning_rate": 0.0001989553987318198,
      "loss": 0.8347,
      "step": 405
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4725711941719055,
      "learning_rate": 0.00019892949329511212,
      "loss": 0.8782,
      "step": 410
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4833574891090393,
      "learning_rate": 0.00019890327228983893,
      "loss": 0.9227,
      "step": 415
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4949423670768738,
      "learning_rate": 0.0001988767357996408,
      "loss": 0.9076,
      "step": 420
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4801913797855377,
      "learning_rate": 0.0001988498839091647,
      "loss": 0.8838,
      "step": 425
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5127063393592834,
      "learning_rate": 0.00019882271670406372,
      "loss": 0.7314,
      "step": 430
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.524604320526123,
      "learning_rate": 0.00019879523427099665,
      "loss": 0.8783,
      "step": 435
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6428730487823486,
      "learning_rate": 0.00019876743669762793,
      "loss": 0.977,
      "step": 440
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5448722243309021,
      "learning_rate": 0.00019873932407262715,
      "loss": 0.9373,
      "step": 445
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5859642028808594,
      "learning_rate": 0.00019871089648566885,
      "loss": 0.9717,
      "step": 450
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5077968835830688,
      "learning_rate": 0.00019868215402743235,
      "loss": 0.8836,
      "step": 455
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5130951404571533,
      "learning_rate": 0.00019865309678960123,
      "loss": 1.012,
      "step": 460
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5490102767944336,
      "learning_rate": 0.0001986237248648633,
      "loss": 0.8743,
      "step": 465
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5080798864364624,
      "learning_rate": 0.00019859403834691003,
      "loss": 0.9109,
      "step": 470
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5114970207214355,
      "learning_rate": 0.0001985640373304365,
      "loss": 0.9453,
      "step": 475
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.500872790813446,
      "learning_rate": 0.0001985337219111409,
      "loss": 0.9275,
      "step": 480
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4827217757701874,
      "learning_rate": 0.00019850309218572438,
      "loss": 0.9394,
      "step": 485
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5236514210700989,
      "learning_rate": 0.00019847214825189066,
      "loss": 0.9253,
      "step": 490
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.49005958437919617,
      "learning_rate": 0.0001984408902083457,
      "loss": 0.8693,
      "step": 495
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5737020373344421,
      "learning_rate": 0.00019840931815479746,
      "loss": 0.9076,
      "step": 500
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5236185193061829,
      "learning_rate": 0.00019837743219195552,
      "loss": 0.9429,
      "step": 505
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5625522136688232,
      "learning_rate": 0.00019834523242153078,
      "loss": 0.9305,
      "step": 510
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5392923951148987,
      "learning_rate": 0.0001983127189462351,
      "loss": 0.8803,
      "step": 515
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5529817938804626,
      "learning_rate": 0.00019827989186978103,
      "loss": 0.9182,
      "step": 520
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5279693007469177,
      "learning_rate": 0.00019824675129688152,
      "loss": 0.9022,
      "step": 525
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6373478174209595,
      "learning_rate": 0.00019821329733324942,
      "loss": 0.9551,
      "step": 530
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5089979767799377,
      "learning_rate": 0.00019817953008559734,
      "loss": 0.8277,
      "step": 535
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5426749587059021,
      "learning_rate": 0.00019814544966163708,
      "loss": 1.012,
      "step": 540
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5283740758895874,
      "learning_rate": 0.0001981110561700796,
      "loss": 0.8224,
      "step": 545
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4337727725505829,
      "learning_rate": 0.00019807634972063428,
      "loss": 0.858,
      "step": 550
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4819512963294983,
      "learning_rate": 0.000198041330424009,
      "loss": 0.7776,
      "step": 555
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.550744891166687,
      "learning_rate": 0.00019800599839190941,
      "loss": 0.8895,
      "step": 560
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.4993669092655182,
      "learning_rate": 0.0001979703537370388,
      "loss": 0.9043,
      "step": 565
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.618319571018219,
      "learning_rate": 0.00019793439657309772,
      "loss": 0.8229,
      "step": 570
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5560276508331299,
      "learning_rate": 0.00019789812701478346,
      "loss": 0.9208,
      "step": 575
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5499486327171326,
      "learning_rate": 0.00019786154517778987,
      "loss": 0.8309,
      "step": 580
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8018636703491211,
      "learning_rate": 0.00019782465117880693,
      "loss": 0.9529,
      "step": 585
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5488550066947937,
      "learning_rate": 0.0001977874451355203,
      "loss": 0.7879,
      "step": 590
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5369092226028442,
      "learning_rate": 0.00019774992716661106,
      "loss": 0.8819,
      "step": 595
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5121927857398987,
      "learning_rate": 0.00019771209739175523,
      "loss": 0.8949,
      "step": 600
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5077289938926697,
      "learning_rate": 0.00019767395593162353,
      "loss": 0.9174,
      "step": 605
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6287967562675476,
      "learning_rate": 0.00019763550290788085,
      "loss": 0.8388,
      "step": 610
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5971408486366272,
      "learning_rate": 0.0001975967384431859,
      "loss": 0.8899,
      "step": 615
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5257498025894165,
      "learning_rate": 0.00019755766266119085,
      "loss": 1.0072,
      "step": 620
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5270594954490662,
      "learning_rate": 0.00019751827568654089,
      "loss": 0.9276,
      "step": 625
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5067614912986755,
      "learning_rate": 0.00019747857764487395,
      "loss": 0.8488,
      "step": 630
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5074208378791809,
      "learning_rate": 0.0001974385686628201,
      "loss": 0.7905,
      "step": 635
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5764243602752686,
      "learning_rate": 0.00019739824886800134,
      "loss": 0.8907,
      "step": 640
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6378028392791748,
      "learning_rate": 0.00019735761838903106,
      "loss": 0.9187,
      "step": 645
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8019906282424927,
      "learning_rate": 0.00019731667735551375,
      "loss": 0.9371,
      "step": 650
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5774128437042236,
      "learning_rate": 0.00019727542589804444,
      "loss": 0.9012,
      "step": 655
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6190884709358215,
      "learning_rate": 0.00019723386414820842,
      "loss": 0.941,
      "step": 660
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.526430070400238,
      "learning_rate": 0.00019719199223858068,
      "loss": 0.77,
      "step": 665
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.4773986041545868,
      "learning_rate": 0.00019714981030272567,
      "loss": 0.8433,
      "step": 670
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5461030006408691,
      "learning_rate": 0.00019710731847519665,
      "loss": 0.8035,
      "step": 675
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6205869317054749,
      "learning_rate": 0.00019706451689153556,
      "loss": 0.9234,
      "step": 680
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5920631885528564,
      "learning_rate": 0.00019702140568827222,
      "loss": 0.884,
      "step": 685
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5703709721565247,
      "learning_rate": 0.0001969779850029242,
      "loss": 0.833,
      "step": 690
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6153601408004761,
      "learning_rate": 0.00019693425497399627,
      "loss": 0.9611,
      "step": 695
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.4773414433002472,
      "learning_rate": 0.00019689021574097987,
      "loss": 0.8674,
      "step": 700
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5657919645309448,
      "learning_rate": 0.00019684586744435283,
      "loss": 0.9631,
      "step": 705
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5606719255447388,
      "learning_rate": 0.0001968012102255788,
      "loss": 0.8937,
      "step": 710
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.510236382484436,
      "learning_rate": 0.00019675624422710682,
      "loss": 0.8685,
      "step": 715
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6055552363395691,
      "learning_rate": 0.000196710969592371,
      "loss": 0.9136,
      "step": 720
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.52703857421875,
      "learning_rate": 0.0001966653864657898,
      "loss": 0.8531,
      "step": 725
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5948337316513062,
      "learning_rate": 0.00019661949499276578,
      "loss": 0.9152,
      "step": 730
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5188962817192078,
      "learning_rate": 0.00019657329531968512,
      "loss": 0.8868,
      "step": 735
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5457183718681335,
      "learning_rate": 0.000196526787593917,
      "loss": 0.8716,
      "step": 740
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5485244393348694,
      "learning_rate": 0.0001964799719638134,
      "loss": 0.8099,
      "step": 745
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5232064723968506,
      "learning_rate": 0.00019643284857870822,
      "loss": 0.8874,
      "step": 750
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5872951149940491,
      "learning_rate": 0.00019638541758891734,
      "loss": 0.7815,
      "step": 755
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5641792416572571,
      "learning_rate": 0.0001963376791457376,
      "loss": 0.8868,
      "step": 760
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.058152198791504,
      "learning_rate": 0.0001962896334014467,
      "loss": 0.8355,
      "step": 765
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5059372782707214,
      "learning_rate": 0.00019624128050930252,
      "loss": 0.7938,
      "step": 770
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5510187745094299,
      "learning_rate": 0.00019619262062354275,
      "loss": 0.8468,
      "step": 775
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.4972304105758667,
      "learning_rate": 0.00019614365389938426,
      "loss": 0.8065,
      "step": 780
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.7013474106788635,
      "learning_rate": 0.00019609438049302273,
      "loss": 0.91,
      "step": 785
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5787790417671204,
      "learning_rate": 0.00019604480056163213,
      "loss": 0.9695,
      "step": 790
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.45666056871414185,
      "learning_rate": 0.00019599491426336413,
      "loss": 0.9191,
      "step": 795
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5539554953575134,
      "learning_rate": 0.00019594472175734774,
      "loss": 0.9278,
      "step": 800
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5579492449760437,
      "learning_rate": 0.0001958942232036886,
      "loss": 0.9074,
      "step": 805
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.48329582810401917,
      "learning_rate": 0.00019584341876346874,
      "loss": 0.773,
      "step": 810
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5779743790626526,
      "learning_rate": 0.0001957923085987458,
      "loss": 0.7915,
      "step": 815
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5726707577705383,
      "learning_rate": 0.0001957408928725527,
      "loss": 0.8683,
      "step": 820
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5140533447265625,
      "learning_rate": 0.00019568917174889693,
      "loss": 0.8599,
      "step": 825
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5206133723258972,
      "learning_rate": 0.00019563714539276036,
      "loss": 0.8629,
      "step": 830
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.6327289342880249,
      "learning_rate": 0.0001955848139700983,
      "loss": 0.8577,
      "step": 835
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5038033127784729,
      "learning_rate": 0.00019553217764783928,
      "loss": 0.8652,
      "step": 840
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5202915072441101,
      "learning_rate": 0.0001954792365938844,
      "loss": 0.9021,
      "step": 845
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5957011580467224,
      "learning_rate": 0.00019542599097710676,
      "loss": 0.8404,
      "step": 850
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6331242918968201,
      "learning_rate": 0.00019537244096735096,
      "loss": 1.0052,
      "step": 855
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.43145543336868286,
      "learning_rate": 0.00019531858673543266,
      "loss": 0.8814,
      "step": 860
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5859599709510803,
      "learning_rate": 0.0001952644284531378,
      "loss": 0.8131,
      "step": 865
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5870697498321533,
      "learning_rate": 0.00019520996629322228,
      "loss": 0.8458,
      "step": 870
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.4881855845451355,
      "learning_rate": 0.00019515520042941132,
      "loss": 0.808,
      "step": 875
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5687686204910278,
      "learning_rate": 0.00019510013103639883,
      "loss": 0.8764,
      "step": 880
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.49425217509269714,
      "learning_rate": 0.00019504475828984705,
      "loss": 0.9531,
      "step": 885
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6400182247161865,
      "learning_rate": 0.00019498908236638572,
      "loss": 0.9817,
      "step": 890
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.50630122423172,
      "learning_rate": 0.0001949331034436118,
      "loss": 0.8061,
      "step": 895
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6477893590927124,
      "learning_rate": 0.00019487682170008866,
      "loss": 0.8433,
      "step": 900
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5080977082252502,
      "learning_rate": 0.0001948202373153457,
      "loss": 0.753,
      "step": 905
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6288018226623535,
      "learning_rate": 0.00019476335046987763,
      "loss": 0.8719,
      "step": 910
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5954068899154663,
      "learning_rate": 0.00019470616134514406,
      "loss": 0.9141,
      "step": 915
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9248217940330505,
      "learning_rate": 0.00019464867012356865,
      "loss": 0.8477,
      "step": 920
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5251208543777466,
      "learning_rate": 0.00019459087698853883,
      "loss": 0.805,
      "step": 925
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5578038692474365,
      "learning_rate": 0.0001945327821244051,
      "loss": 0.9431,
      "step": 930
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5071999430656433,
      "learning_rate": 0.0001944743857164803,
      "loss": 0.8176,
      "step": 935
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5080613493919373,
      "learning_rate": 0.00019441568795103932,
      "loss": 0.9004,
      "step": 940
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5936622619628906,
      "learning_rate": 0.00019435668901531813,
      "loss": 0.9215,
      "step": 945
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5861743092536926,
      "learning_rate": 0.00019429738909751353,
      "loss": 0.8413,
      "step": 950
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.561229407787323,
      "learning_rate": 0.00019423778838678236,
      "loss": 0.8808,
      "step": 955
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6140787601470947,
      "learning_rate": 0.00019417788707324095,
      "loss": 0.8038,
      "step": 960
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.4852558672428131,
      "learning_rate": 0.00019411768534796444,
      "loss": 0.7745,
      "step": 965
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6961116194725037,
      "learning_rate": 0.00019405718340298632,
      "loss": 0.9561,
      "step": 970
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5008209943771362,
      "learning_rate": 0.00019399638143129767,
      "loss": 0.8497,
      "step": 975
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6141425967216492,
      "learning_rate": 0.00019393527962684664,
      "loss": 0.8182,
      "step": 980
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7052502036094666,
      "learning_rate": 0.0001938738781845378,
      "loss": 0.8945,
      "step": 985
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5405805110931396,
      "learning_rate": 0.00019381217730023146,
      "loss": 0.9554,
      "step": 990
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6514587998390198,
      "learning_rate": 0.00019375017717074318,
      "loss": 0.811,
      "step": 995
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5553276538848877,
      "learning_rate": 0.000193687877993843,
      "loss": 0.8157,
      "step": 1000
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5811892151832581,
      "learning_rate": 0.00019362527996825488,
      "loss": 0.8352,
      "step": 1005
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.4772842526435852,
      "learning_rate": 0.00019356238329365613,
      "loss": 0.8563,
      "step": 1010
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6040914058685303,
      "learning_rate": 0.00019349918817067655,
      "loss": 0.8884,
      "step": 1015
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.638569712638855,
      "learning_rate": 0.0001934356948008981,
      "loss": 0.938,
      "step": 1020
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.47892439365386963,
      "learning_rate": 0.00019337190338685397,
      "loss": 0.9725,
      "step": 1025
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5647065043449402,
      "learning_rate": 0.0001933078141320282,
      "loss": 0.8063,
      "step": 1030
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.4954369068145752,
      "learning_rate": 0.0001932434272408547,
      "loss": 0.8535,
      "step": 1035
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.655193567276001,
      "learning_rate": 0.00019317874291871704,
      "loss": 0.876,
      "step": 1040
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6297211050987244,
      "learning_rate": 0.0001931137613719473,
      "loss": 0.9351,
      "step": 1045
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5889206528663635,
      "learning_rate": 0.0001930484828078258,
      "loss": 0.918,
      "step": 1050
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5167111158370972,
      "learning_rate": 0.00019298290743458027,
      "loss": 1.1183,
      "step": 1055
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.44684621691703796,
      "learning_rate": 0.0001929170354613852,
      "loss": 0.8091,
      "step": 1060
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5272998213768005,
      "learning_rate": 0.00019285086709836116,
      "loss": 0.8537,
      "step": 1065
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5813104510307312,
      "learning_rate": 0.0001927844025565742,
      "loss": 0.8221,
      "step": 1070
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.507824718952179,
      "learning_rate": 0.00019271764204803512,
      "loss": 0.9199,
      "step": 1075
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5733903050422668,
      "learning_rate": 0.00019265058578569878,
      "loss": 0.7459,
      "step": 1080
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6304961442947388,
      "learning_rate": 0.00019258323398346346,
      "loss": 0.8584,
      "step": 1085
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5296971797943115,
      "learning_rate": 0.00019251558685617014,
      "loss": 0.8552,
      "step": 1090
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5370798707008362,
      "learning_rate": 0.00019244764461960191,
      "loss": 0.9183,
      "step": 1095
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5226066708564758,
      "learning_rate": 0.00019237940749048318,
      "loss": 0.883,
      "step": 1100
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6162168383598328,
      "learning_rate": 0.00019231087568647893,
      "loss": 0.924,
      "step": 1105
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6270351409912109,
      "learning_rate": 0.00019224204942619417,
      "loss": 0.9861,
      "step": 1110
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6001037955284119,
      "learning_rate": 0.00019217292892917325,
      "loss": 0.8134,
      "step": 1115
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5420514345169067,
      "learning_rate": 0.00019210351441589896,
      "loss": 0.7618,
      "step": 1120
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6640905141830444,
      "learning_rate": 0.000192033806107792,
      "loss": 0.9157,
      "step": 1125
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5504305362701416,
      "learning_rate": 0.00019196380422721026,
      "loss": 0.8995,
      "step": 1130
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6820715069770813,
      "learning_rate": 0.00019189350899744806,
      "loss": 0.9161,
      "step": 1135
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5563843250274658,
      "learning_rate": 0.00019182292064273544,
      "loss": 0.7748,
      "step": 1140
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6278269290924072,
      "learning_rate": 0.00019175203938823744,
      "loss": 0.7787,
      "step": 1145
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5549231171607971,
      "learning_rate": 0.00019168086546005346,
      "loss": 0.9038,
      "step": 1150
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5321454405784607,
      "learning_rate": 0.0001916093990852164,
      "loss": 0.7601,
      "step": 1155
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5376914143562317,
      "learning_rate": 0.0001915376404916921,
      "loss": 0.8395,
      "step": 1160
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5487528443336487,
      "learning_rate": 0.00019146558990837853,
      "loss": 0.7653,
      "step": 1165
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.54075688123703,
      "learning_rate": 0.00019139324756510496,
      "loss": 0.8811,
      "step": 1170
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5763316750526428,
      "learning_rate": 0.00019132061369263136,
      "loss": 0.9043,
      "step": 1175
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5854605436325073,
      "learning_rate": 0.00019124768852264774,
      "loss": 0.9339,
      "step": 1180
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.533819854259491,
      "learning_rate": 0.00019117447228777316,
      "loss": 0.8174,
      "step": 1185
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.48118382692337036,
      "learning_rate": 0.00019110096522155523,
      "loss": 0.8992,
      "step": 1190
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5746036171913147,
      "learning_rate": 0.00019102716755846913,
      "loss": 0.7809,
      "step": 1195
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5010830163955688,
      "learning_rate": 0.00019095307953391718,
      "loss": 0.834,
      "step": 1200
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5111698508262634,
      "learning_rate": 0.00019087870138422775,
      "loss": 0.7903,
      "step": 1205
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5492734909057617,
      "learning_rate": 0.00019080403334665474,
      "loss": 0.8529,
      "step": 1210
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5762624144554138,
      "learning_rate": 0.00019072907565937674,
      "loss": 0.8261,
      "step": 1215
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7704640030860901,
      "learning_rate": 0.00019065382856149623,
      "loss": 0.8578,
      "step": 1220
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5472645163536072,
      "learning_rate": 0.0001905782922930389,
      "loss": 0.7221,
      "step": 1225
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.4950571656227112,
      "learning_rate": 0.0001905024670949528,
      "loss": 0.8874,
      "step": 1230
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6059962511062622,
      "learning_rate": 0.00019042635320910768,
      "loss": 0.8667,
      "step": 1235
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5352611541748047,
      "learning_rate": 0.00019034995087829416,
      "loss": 0.8767,
      "step": 1240
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5850544571876526,
      "learning_rate": 0.00019027326034622288,
      "loss": 0.8335,
      "step": 1245
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6183121204376221,
      "learning_rate": 0.00019019628185752382,
      "loss": 0.866,
      "step": 1250
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5667844414710999,
      "learning_rate": 0.00019011901565774554,
      "loss": 0.8816,
      "step": 1255
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6229298114776611,
      "learning_rate": 0.0001900414619933543,
      "loss": 0.7841,
      "step": 1260
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5172335505485535,
      "learning_rate": 0.00018996362111173336,
      "loss": 0.841,
      "step": 1265
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5118042230606079,
      "learning_rate": 0.00018988549326118208,
      "loss": 0.8585,
      "step": 1270
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5746064782142639,
      "learning_rate": 0.0001898070786909153,
      "loss": 0.8849,
      "step": 1275
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5497964024543762,
      "learning_rate": 0.00018972837765106245,
      "loss": 0.8754,
      "step": 1280
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.4933513402938843,
      "learning_rate": 0.0001896493903926666,
      "loss": 0.8773,
      "step": 1285
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5292490124702454,
      "learning_rate": 0.00018957011716768402,
      "loss": 0.825,
      "step": 1290
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5365048050880432,
      "learning_rate": 0.00018949055822898298,
      "loss": 0.8519,
      "step": 1295
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5695987939834595,
      "learning_rate": 0.00018941071383034327,
      "loss": 0.9198,
      "step": 1300
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5951317548751831,
      "learning_rate": 0.00018933058422645514,
      "loss": 0.8947,
      "step": 1305
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5998417139053345,
      "learning_rate": 0.00018925016967291872,
      "loss": 0.9372,
      "step": 1310
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5713790655136108,
      "learning_rate": 0.00018916947042624293,
      "loss": 0.9231,
      "step": 1315
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5592827796936035,
      "learning_rate": 0.00018908848674384493,
      "loss": 0.8792,
      "step": 1320
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6018208265304565,
      "learning_rate": 0.00018900721888404917,
      "loss": 0.8533,
      "step": 1325
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5641770958900452,
      "learning_rate": 0.0001889256671060865,
      "loss": 0.8096,
      "step": 1330
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.49681103229522705,
      "learning_rate": 0.00018884383167009348,
      "loss": 0.7558,
      "step": 1335
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6027383208274841,
      "learning_rate": 0.0001887617128371115,
      "loss": 0.7801,
      "step": 1340
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5480598211288452,
      "learning_rate": 0.00018867931086908598,
      "loss": 0.9335,
      "step": 1345
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5374003648757935,
      "learning_rate": 0.00018859662602886538,
      "loss": 0.8229,
      "step": 1350
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5359901785850525,
      "learning_rate": 0.00018851365858020054,
      "loss": 0.888,
      "step": 1355
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6158584356307983,
      "learning_rate": 0.0001884304087877438,
      "loss": 0.8434,
      "step": 1360
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6495197415351868,
      "learning_rate": 0.00018834687691704805,
      "loss": 0.902,
      "step": 1365
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5516675710678101,
      "learning_rate": 0.000188263063234566,
      "loss": 0.8055,
      "step": 1370
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5624775886535645,
      "learning_rate": 0.00018817896800764938,
      "loss": 0.8212,
      "step": 1375
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.47556230425834656,
      "learning_rate": 0.00018809459150454788,
      "loss": 0.7061,
      "step": 1380
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5220008492469788,
      "learning_rate": 0.00018800993399440845,
      "loss": 0.7378,
      "step": 1385
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5644752979278564,
      "learning_rate": 0.00018792499574727441,
      "loss": 0.8245,
      "step": 1390
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6211121678352356,
      "learning_rate": 0.0001878397770340846,
      "loss": 0.9034,
      "step": 1395
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5424923896789551,
      "learning_rate": 0.00018775427812667248,
      "loss": 0.9151,
      "step": 1400
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5865684151649475,
      "learning_rate": 0.00018766849929776532,
      "loss": 0.8795,
      "step": 1405
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6018354892730713,
      "learning_rate": 0.0001875824408209832,
      "loss": 0.6693,
      "step": 1410
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5678966045379639,
      "learning_rate": 0.0001874961029708383,
      "loss": 0.8994,
      "step": 1415
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.4987218677997589,
      "learning_rate": 0.000187409486022734,
      "loss": 0.9152,
      "step": 1420
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5679898858070374,
      "learning_rate": 0.00018732259025296388,
      "loss": 0.7408,
      "step": 1425
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.46406781673431396,
      "learning_rate": 0.0001872354159387109,
      "loss": 0.7725,
      "step": 1430
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5870163440704346,
      "learning_rate": 0.00018714796335804663,
      "loss": 0.9934,
      "step": 1435
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5287370681762695,
      "learning_rate": 0.00018706023278993014,
      "loss": 0.8331,
      "step": 1440
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6170235872268677,
      "learning_rate": 0.00018697222451420734,
      "loss": 0.8614,
      "step": 1445
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5744518041610718,
      "learning_rate": 0.00018688393881160993,
      "loss": 0.8481,
      "step": 1450
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6607945561408997,
      "learning_rate": 0.0001867953759637545,
      "loss": 0.7717,
      "step": 1455
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6847087740898132,
      "learning_rate": 0.00018670653625314185,
      "loss": 0.9084,
      "step": 1460
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5219533443450928,
      "learning_rate": 0.00018661741996315573,
      "loss": 0.7862,
      "step": 1465
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.541492223739624,
      "learning_rate": 0.00018652802737806226,
      "loss": 0.7658,
      "step": 1470
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5432409048080444,
      "learning_rate": 0.00018643835878300887,
      "loss": 0.7273,
      "step": 1475
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5351329445838928,
      "learning_rate": 0.00018634841446402343,
      "loss": 0.7721,
      "step": 1480
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5327515602111816,
      "learning_rate": 0.0001862581947080132,
      "loss": 0.9053,
      "step": 1485
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5060054063796997,
      "learning_rate": 0.00018616769980276426,
      "loss": 0.8919,
      "step": 1490
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6146079897880554,
      "learning_rate": 0.0001860769300369402,
      "loss": 0.8494,
      "step": 1495
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5177258849143982,
      "learning_rate": 0.0001859858857000814,
      "loss": 0.8473,
      "step": 1500
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5560600757598877,
      "learning_rate": 0.0001858945670826041,
      "loss": 0.8193,
      "step": 1505
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6341832876205444,
      "learning_rate": 0.00018580297447579947,
      "loss": 0.8413,
      "step": 1510
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5502546429634094,
      "learning_rate": 0.0001857111081718326,
      "loss": 0.8251,
      "step": 1515
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6432173252105713,
      "learning_rate": 0.00018561896846374168,
      "loss": 0.879,
      "step": 1520
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5981735587120056,
      "learning_rate": 0.00018552655564543695,
      "loss": 0.8699,
      "step": 1525
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5953419804573059,
      "learning_rate": 0.00018543387001169993,
      "loss": 0.8163,
      "step": 1530
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5923714637756348,
      "learning_rate": 0.0001853409118581823,
      "loss": 0.9091,
      "step": 1535
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9148262739181519,
      "learning_rate": 0.00018524768148140504,
      "loss": 0.7836,
      "step": 1540
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5257108807563782,
      "learning_rate": 0.00018515417917875748,
      "loss": 0.7875,
      "step": 1545
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6084693074226379,
      "learning_rate": 0.00018506040524849637,
      "loss": 0.8409,
      "step": 1550
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.49939265847206116,
      "learning_rate": 0.00018496635998974489,
      "loss": 0.8631,
      "step": 1555
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5126424431800842,
      "learning_rate": 0.00018487204370249167,
      "loss": 0.8033,
      "step": 1560
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.4886222183704376,
      "learning_rate": 0.00018477745668758996,
      "loss": 0.8473,
      "step": 1565
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5652774572372437,
      "learning_rate": 0.00018468259924675655,
      "loss": 0.9041,
      "step": 1570
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5118491053581238,
      "learning_rate": 0.00018458747168257085,
      "loss": 0.8749,
      "step": 1575
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5526809692382812,
      "learning_rate": 0.00018449207429847384,
      "loss": 0.915,
      "step": 1580
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6415011882781982,
      "learning_rate": 0.0001843964073987673,
      "loss": 0.878,
      "step": 1585
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5006015300750732,
      "learning_rate": 0.00018430047128861266,
      "loss": 0.7848,
      "step": 1590
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5490383505821228,
      "learning_rate": 0.0001842042662740301,
      "loss": 0.7836,
      "step": 1595
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5240172743797302,
      "learning_rate": 0.00018410779266189752,
      "loss": 0.7642,
      "step": 1600
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6275254487991333,
      "learning_rate": 0.00018401105075994967,
      "loss": 0.8773,
      "step": 1605
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5745741724967957,
      "learning_rate": 0.00018391404087677704,
      "loss": 0.904,
      "step": 1610
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6686891913414001,
      "learning_rate": 0.00018381676332182497,
      "loss": 0.8057,
      "step": 1615
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.501735508441925,
      "learning_rate": 0.00018371921840539264,
      "loss": 0.8361,
      "step": 1620
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6197385191917419,
      "learning_rate": 0.000183621406438632,
      "loss": 0.9193,
      "step": 1625
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5199822783470154,
      "learning_rate": 0.00018352332773354695,
      "loss": 0.8461,
      "step": 1630
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.48266005516052246,
      "learning_rate": 0.00018342498260299212,
      "loss": 0.7336,
      "step": 1635
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7070842981338501,
      "learning_rate": 0.0001833263713606721,
      "loss": 0.9886,
      "step": 1640
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5649372339248657,
      "learning_rate": 0.00018322749432114028,
      "loss": 0.8146,
      "step": 1645
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5961683988571167,
      "learning_rate": 0.00018312835179979788,
      "loss": 0.7934,
      "step": 1650
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5285751819610596,
      "learning_rate": 0.00018302894411289304,
      "loss": 0.9225,
      "step": 1655
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5439310669898987,
      "learning_rate": 0.0001829292715775196,
      "loss": 0.9477,
      "step": 1660
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5521465539932251,
      "learning_rate": 0.00018282933451161643,
      "loss": 0.8531,
      "step": 1665
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6291643381118774,
      "learning_rate": 0.00018272913323396598,
      "loss": 0.9441,
      "step": 1670
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5861973762512207,
      "learning_rate": 0.00018262866806419362,
      "loss": 0.7543,
      "step": 1675
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6347075700759888,
      "learning_rate": 0.0001825279393227665,
      "loss": 0.8737,
      "step": 1680
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6788772344589233,
      "learning_rate": 0.00018242694733099245,
      "loss": 0.8511,
      "step": 1685
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5907087326049805,
      "learning_rate": 0.0001823256924110191,
      "loss": 0.8505,
      "step": 1690
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6607640385627747,
      "learning_rate": 0.0001822241748858327,
      "loss": 0.856,
      "step": 1695
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6193135976791382,
      "learning_rate": 0.0001821223950792572,
      "loss": 0.8215,
      "step": 1700
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6171255707740784,
      "learning_rate": 0.00018202035331595323,
      "loss": 0.7666,
      "step": 1705
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.49476027488708496,
      "learning_rate": 0.00018191804992141695,
      "loss": 0.8192,
      "step": 1710
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6600732803344727,
      "learning_rate": 0.0001818154852219791,
      "loss": 0.8839,
      "step": 1715
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6551568508148193,
      "learning_rate": 0.00018171265954480394,
      "loss": 0.7813,
      "step": 1720
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.539866030216217,
      "learning_rate": 0.00018160957321788828,
      "loss": 0.8957,
      "step": 1725
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5746687054634094,
      "learning_rate": 0.00018150622657006016,
      "loss": 0.9047,
      "step": 1730
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.642020583152771,
      "learning_rate": 0.0001814026199309783,
      "loss": 0.7867,
      "step": 1735
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.47608810663223267,
      "learning_rate": 0.00018129875363113044,
      "loss": 0.9095,
      "step": 1740
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5929521918296814,
      "learning_rate": 0.0001811946280018328,
      "loss": 0.8587,
      "step": 1745
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6232489347457886,
      "learning_rate": 0.00018109024337522876,
      "loss": 0.8188,
      "step": 1750
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.47936391830444336,
      "learning_rate": 0.00018098560008428778,
      "loss": 0.8164,
      "step": 1755
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6068043112754822,
      "learning_rate": 0.00018088069846280456,
      "loss": 0.8273,
      "step": 1760
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5103864073753357,
      "learning_rate": 0.00018077553884539773,
      "loss": 0.8453,
      "step": 1765
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5663210153579712,
      "learning_rate": 0.0001806701215675089,
      "loss": 0.8745,
      "step": 1770
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6410555243492126,
      "learning_rate": 0.00018056444696540162,
      "loss": 0.8698,
      "step": 1775
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.7071842551231384,
      "learning_rate": 0.00018045851537616016,
      "loss": 0.7438,
      "step": 1780
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.551302969455719,
      "learning_rate": 0.0001803523271376887,
      "loss": 0.8576,
      "step": 1785
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5493254065513611,
      "learning_rate": 0.0001802458825887099,
      "loss": 0.7339,
      "step": 1790
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6084312200546265,
      "learning_rate": 0.00018013918206876415,
      "loss": 0.7465,
      "step": 1795
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.52370685338974,
      "learning_rate": 0.00018003222591820824,
      "loss": 0.8574,
      "step": 1800
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.46578991413116455,
      "learning_rate": 0.00017992501447821452,
      "loss": 0.7609,
      "step": 1805
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6183673143386841,
      "learning_rate": 0.00017981754809076952,
      "loss": 0.8273,
      "step": 1810
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6610841751098633,
      "learning_rate": 0.0001797098270986731,
      "loss": 0.9363,
      "step": 1815
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5758525729179382,
      "learning_rate": 0.00017960185184553716,
      "loss": 0.7438,
      "step": 1820
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5575788021087646,
      "learning_rate": 0.00017949362267578485,
      "loss": 0.8472,
      "step": 1825
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5445650815963745,
      "learning_rate": 0.0001793851399346491,
      "loss": 0.895,
      "step": 1830
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.509607195854187,
      "learning_rate": 0.0001792764039681717,
      "loss": 0.8065,
      "step": 1835
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5268949270248413,
      "learning_rate": 0.00017916741512320227,
      "loss": 0.7979,
      "step": 1840
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5786017775535583,
      "learning_rate": 0.00017905817374739704,
      "loss": 0.8833,
      "step": 1845
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5071285963058472,
      "learning_rate": 0.0001789486801892177,
      "loss": 0.7521,
      "step": 1850
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.620481550693512,
      "learning_rate": 0.0001788389347979305,
      "loss": 0.8354,
      "step": 1855
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5200830698013306,
      "learning_rate": 0.00017872893792360484,
      "loss": 0.9292,
      "step": 1860
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.594330370426178,
      "learning_rate": 0.00017861868991711247,
      "loss": 0.9231,
      "step": 1865
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.542568564414978,
      "learning_rate": 0.00017850819113012601,
      "loss": 0.6837,
      "step": 1870
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6251922845840454,
      "learning_rate": 0.0001783974419151182,
      "loss": 0.7409,
      "step": 1875
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5484223365783691,
      "learning_rate": 0.0001782864426253606,
      "loss": 0.824,
      "step": 1880
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.7240644693374634,
      "learning_rate": 0.00017817519361492228,
      "loss": 0.7806,
      "step": 1885
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5933576822280884,
      "learning_rate": 0.00017806369523866913,
      "loss": 0.8518,
      "step": 1890
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6882241368293762,
      "learning_rate": 0.00017795194785226229,
      "loss": 0.9101,
      "step": 1895
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5205492973327637,
      "learning_rate": 0.00017783995181215728,
      "loss": 0.8973,
      "step": 1900
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6640832424163818,
      "learning_rate": 0.00017772770747560273,
      "loss": 0.9665,
      "step": 1905
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6470832228660583,
      "learning_rate": 0.00017761521520063945,
      "loss": 0.8718,
      "step": 1910
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.704579770565033,
      "learning_rate": 0.0001775024753460989,
      "loss": 0.9091,
      "step": 1915
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.7193452715873718,
      "learning_rate": 0.00017738948827160242,
      "loss": 0.7795,
      "step": 1920
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5544724464416504,
      "learning_rate": 0.0001772762543375599,
      "loss": 0.8755,
      "step": 1925
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5948014855384827,
      "learning_rate": 0.00017716277390516876,
      "loss": 0.8527,
      "step": 1930
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.7045935392379761,
      "learning_rate": 0.00017704904733641255,
      "loss": 0.9894,
      "step": 1935
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5722846388816833,
      "learning_rate": 0.00017693507499406,
      "loss": 0.8367,
      "step": 1940
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5934826135635376,
      "learning_rate": 0.000176820857241664,
      "loss": 0.96,
      "step": 1945
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6156487464904785,
      "learning_rate": 0.00017670639444355998,
      "loss": 0.8629,
      "step": 1950
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6569040417671204,
      "learning_rate": 0.0001765916869648652,
      "loss": 0.8539,
      "step": 1955
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5664108395576477,
      "learning_rate": 0.0001764767351714774,
      "loss": 0.8514,
      "step": 1960
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.681502640247345,
      "learning_rate": 0.0001763615394300735,
      "loss": 0.9149,
      "step": 1965
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5791369080543518,
      "learning_rate": 0.00017624610010810878,
      "loss": 0.8566,
      "step": 1970
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5944411158561707,
      "learning_rate": 0.00017613041757381538,
      "loss": 0.8279,
      "step": 1975
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5867791175842285,
      "learning_rate": 0.00017601449219620125,
      "loss": 0.8742,
      "step": 1980
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5641552805900574,
      "learning_rate": 0.00017589832434504902,
      "loss": 0.7705,
      "step": 1985
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5856841206550598,
      "learning_rate": 0.0001757819143909147,
      "loss": 0.8156,
      "step": 1990
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5216368436813354,
      "learning_rate": 0.00017566526270512665,
      "loss": 0.965,
      "step": 1995
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6632785797119141,
      "learning_rate": 0.0001755483696597842,
      "loss": 0.88,
      "step": 2000
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5716277360916138,
      "learning_rate": 0.0001754312356277567,
      "loss": 0.8717,
      "step": 2005
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5452391505241394,
      "learning_rate": 0.0001753138609826822,
      "loss": 0.7268,
      "step": 2010
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5985669493675232,
      "learning_rate": 0.00017519624609896615,
      "loss": 0.8528,
      "step": 2015
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.605197548866272,
      "learning_rate": 0.0001750783913517804,
      "loss": 0.9327,
      "step": 2020
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.7269711494445801,
      "learning_rate": 0.0001749602971170619,
      "loss": 0.9022,
      "step": 2025
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6030486226081848,
      "learning_rate": 0.00017484196377151161,
      "loss": 0.851,
      "step": 2030
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5089486837387085,
      "learning_rate": 0.00017472339169259307,
      "loss": 0.7455,
      "step": 2035
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5635782480239868,
      "learning_rate": 0.00017460458125853143,
      "loss": 1.0043,
      "step": 2040
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5171327590942383,
      "learning_rate": 0.0001744855328483122,
      "loss": 0.8556,
      "step": 2045
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5177988409996033,
      "learning_rate": 0.00017436624684167984,
      "loss": 0.8203,
      "step": 2050
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.702873170375824,
      "learning_rate": 0.00017424672361913686,
      "loss": 0.8809,
      "step": 2055
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5576356649398804,
      "learning_rate": 0.00017412696356194235,
      "loss": 0.8858,
      "step": 2060
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6151427030563354,
      "learning_rate": 0.0001740069670521109,
      "loss": 0.9176,
      "step": 2065
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5806076526641846,
      "learning_rate": 0.00017388673447241138,
      "loss": 1.0125,
      "step": 2070
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6101603507995605,
      "learning_rate": 0.00017376626620636557,
      "loss": 0.8164,
      "step": 2075
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6350337266921997,
      "learning_rate": 0.00017364556263824719,
      "loss": 0.86,
      "step": 2080
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5311822891235352,
      "learning_rate": 0.00017352462415308044,
      "loss": 0.8738,
      "step": 2085
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5520626306533813,
      "learning_rate": 0.0001734034511366389,
      "loss": 0.8186,
      "step": 2090
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6877513527870178,
      "learning_rate": 0.00017328204397544424,
      "loss": 0.8194,
      "step": 2095
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6582801342010498,
      "learning_rate": 0.00017316040305676508,
      "loss": 0.9177,
      "step": 2100
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.637827455997467,
      "learning_rate": 0.0001730385287686156,
      "loss": 0.8896,
      "step": 2105
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5763481855392456,
      "learning_rate": 0.00017291642149975446,
      "loss": 0.8431,
      "step": 2110
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5631943941116333,
      "learning_rate": 0.00017279408163968342,
      "loss": 0.8405,
      "step": 2115
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5558026432991028,
      "learning_rate": 0.00017267150957864623,
      "loss": 0.8788,
      "step": 2120
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6178169250488281,
      "learning_rate": 0.00017254870570762733,
      "loss": 0.8176,
      "step": 2125
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5574104189872742,
      "learning_rate": 0.0001724256704183505,
      "loss": 0.8623,
      "step": 2130
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6056390404701233,
      "learning_rate": 0.00017230240410327782,
      "loss": 0.8526,
      "step": 2135
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.584563136100769,
      "learning_rate": 0.00017217890715560822,
      "loss": 0.8111,
      "step": 2140
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5519852042198181,
      "learning_rate": 0.0001720551799692764,
      "loss": 0.767,
      "step": 2145
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5955138206481934,
      "learning_rate": 0.00017193122293895138,
      "loss": 0.9088,
      "step": 2150
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.637936532497406,
      "learning_rate": 0.00017180703646003535,
      "loss": 0.7683,
      "step": 2155
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5777458548545837,
      "learning_rate": 0.0001716826209286625,
      "loss": 0.8103,
      "step": 2160
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5864526629447937,
      "learning_rate": 0.0001715579767416976,
      "loss": 0.8297,
      "step": 2165
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5473954677581787,
      "learning_rate": 0.0001714331042967348,
      "loss": 0.8161,
      "step": 2170
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5501269102096558,
      "learning_rate": 0.00017130800399209632,
      "loss": 0.774,
      "step": 2175
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.622527539730072,
      "learning_rate": 0.00017118267622683123,
      "loss": 0.9349,
      "step": 2180
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6729717254638672,
      "learning_rate": 0.00017105712140071426,
      "loss": 0.8568,
      "step": 2185
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5891861319541931,
      "learning_rate": 0.00017093133991424425,
      "loss": 0.787,
      "step": 2190
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.683480978012085,
      "learning_rate": 0.00017080533216864318,
      "loss": 0.8124,
      "step": 2195
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.7241565585136414,
      "learning_rate": 0.00017067909856585472,
      "loss": 0.6895,
      "step": 2200
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6606466770172119,
      "learning_rate": 0.00017055263950854297,
      "loss": 0.8192,
      "step": 2205
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5163353681564331,
      "learning_rate": 0.00017042595540009124,
      "loss": 0.8085,
      "step": 2210
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6061686277389526,
      "learning_rate": 0.00017029904664460065,
      "loss": 0.9215,
      "step": 2215
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5943601727485657,
      "learning_rate": 0.00017017191364688896,
      "loss": 0.8554,
      "step": 2220
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6568538546562195,
      "learning_rate": 0.00017004455681248918,
      "loss": 0.8472,
      "step": 2225
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.554853081703186,
      "learning_rate": 0.0001699169765476484,
      "loss": 0.8969,
      "step": 2230
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.4984689950942993,
      "learning_rate": 0.0001697891732593263,
      "loss": 0.8548,
      "step": 2235
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6138368248939514,
      "learning_rate": 0.00016966114735519406,
      "loss": 0.8888,
      "step": 2240
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8124855756759644,
      "learning_rate": 0.00016953289924363297,
      "loss": 0.9703,
      "step": 2245
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6956800222396851,
      "learning_rate": 0.00016940442933373304,
      "loss": 0.7584,
      "step": 2250
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.7704644799232483,
      "learning_rate": 0.00016927573803529185,
      "loss": 0.7784,
      "step": 2255
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5658566355705261,
      "learning_rate": 0.00016914682575881314,
      "loss": 0.8646,
      "step": 2260
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6220143437385559,
      "learning_rate": 0.00016901769291550558,
      "loss": 0.7348,
      "step": 2265
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6346325278282166,
      "learning_rate": 0.00016888833991728137,
      "loss": 0.8734,
      "step": 2270
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5919114947319031,
      "learning_rate": 0.00016875876717675496,
      "loss": 0.781,
      "step": 2275
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5826617479324341,
      "learning_rate": 0.00016862897510724176,
      "loss": 0.8185,
      "step": 2280
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5474236607551575,
      "learning_rate": 0.00016849896412275683,
      "loss": 0.7699,
      "step": 2285
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.619706928730011,
      "learning_rate": 0.0001683687346380135,
      "loss": 0.852,
      "step": 2290
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8853769302368164,
      "learning_rate": 0.000168238287068422,
      "loss": 0.8513,
      "step": 2295
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6231085062026978,
      "learning_rate": 0.00016810762183008845,
      "loss": 0.7905,
      "step": 2300
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5759189128875732,
      "learning_rate": 0.00016797673933981297,
      "loss": 0.8566,
      "step": 2305
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5591956973075867,
      "learning_rate": 0.000167845640015089,
      "loss": 0.9151,
      "step": 2310
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.7117490172386169,
      "learning_rate": 0.00016771432427410137,
      "loss": 0.7662,
      "step": 2315
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5831469893455505,
      "learning_rate": 0.00016758279253572546,
      "loss": 0.8582,
      "step": 2320
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.7675443291664124,
      "learning_rate": 0.00016745104521952552,
      "loss": 0.8713,
      "step": 2325
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5224565863609314,
      "learning_rate": 0.0001673190827457535,
      "loss": 0.784,
      "step": 2330
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6027007102966309,
      "learning_rate": 0.00016718690553534766,
      "loss": 0.8347,
      "step": 2335
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5675181746482849,
      "learning_rate": 0.0001670545140099312,
      "loss": 0.7134,
      "step": 2340
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6312824487686157,
      "learning_rate": 0.00016692190859181102,
      "loss": 0.8298,
      "step": 2345
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5071117281913757,
      "learning_rate": 0.00016678908970397624,
      "loss": 0.845,
      "step": 2350
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6821117997169495,
      "learning_rate": 0.00016665605777009697,
      "loss": 0.8077,
      "step": 2355
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5504366755485535,
      "learning_rate": 0.00016652281321452282,
      "loss": 0.8021,
      "step": 2360
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5457181930541992,
      "learning_rate": 0.0001663893564622817,
      "loss": 0.7769,
      "step": 2365
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6445764899253845,
      "learning_rate": 0.00016625568793907834,
      "loss": 0.7976,
      "step": 2370
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6524176001548767,
      "learning_rate": 0.00016612180807129304,
      "loss": 0.7837,
      "step": 2375
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5401653051376343,
      "learning_rate": 0.00016598771728598024,
      "loss": 0.6942,
      "step": 2380
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5231527090072632,
      "learning_rate": 0.00016585341601086712,
      "loss": 0.7986,
      "step": 2385
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6786569952964783,
      "learning_rate": 0.0001657189046743523,
      "loss": 0.8576,
      "step": 2390
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6088935732841492,
      "learning_rate": 0.0001655841837055046,
      "loss": 0.8909,
      "step": 2395
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6915789842605591,
      "learning_rate": 0.00016544925353406125,
      "loss": 0.7604,
      "step": 2400
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5768089890480042,
      "learning_rate": 0.0001653141145904271,
      "loss": 0.8903,
      "step": 2405
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6887524127960205,
      "learning_rate": 0.0001651787673056728,
      "loss": 0.8363,
      "step": 2410
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6299374103546143,
      "learning_rate": 0.00016504321211153355,
      "loss": 0.8185,
      "step": 2415
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6226490139961243,
      "learning_rate": 0.00016490744944040777,
      "loss": 0.8176,
      "step": 2420
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6668693423271179,
      "learning_rate": 0.00016477147972535577,
      "loss": 0.8422,
      "step": 2425
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.7947617769241333,
      "learning_rate": 0.00016463530340009817,
      "loss": 0.8986,
      "step": 2430
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5857140421867371,
      "learning_rate": 0.00016449892089901477,
      "loss": 0.75,
      "step": 2435
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.658028244972229,
      "learning_rate": 0.00016436233265714297,
      "loss": 0.8304,
      "step": 2440
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5767120122909546,
      "learning_rate": 0.00016422553911017642,
      "loss": 0.8077,
      "step": 2445
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.607893705368042,
      "learning_rate": 0.00016408854069446374,
      "loss": 0.8232,
      "step": 2450
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6129177212715149,
      "learning_rate": 0.00016395133784700695,
      "loss": 0.8561,
      "step": 2455
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5294451117515564,
      "learning_rate": 0.00016381393100546026,
      "loss": 0.7024,
      "step": 2460
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5510571002960205,
      "learning_rate": 0.00016367632060812856,
      "loss": 0.8306,
      "step": 2465
    },
    {
      "epoch": 0.28,
      "grad_norm": 5.814347267150879,
      "learning_rate": 0.00016353850709396604,
      "loss": 0.8465,
      "step": 2470
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5485466718673706,
      "learning_rate": 0.00016340049090257476,
      "loss": 0.741,
      "step": 2475
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6270123720169067,
      "learning_rate": 0.00016326227247420337,
      "loss": 0.9109,
      "step": 2480
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6270929574966431,
      "learning_rate": 0.00016312385224974554,
      "loss": 0.8672,
      "step": 2485
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5627338886260986,
      "learning_rate": 0.0001629852306707387,
      "loss": 0.9262,
      "step": 2490
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6023781299591064,
      "learning_rate": 0.00016284640817936254,
      "loss": 0.7498,
      "step": 2495
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5960550308227539,
      "learning_rate": 0.00016270738521843763,
      "loss": 0.7668,
      "step": 2500
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5488481521606445,
      "learning_rate": 0.000162568162231424,
      "loss": 0.8611,
      "step": 2505
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.547410786151886,
      "learning_rate": 0.00016242873966241974,
      "loss": 0.7795,
      "step": 2510
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5622233748435974,
      "learning_rate": 0.00016228911795615952,
      "loss": 0.8837,
      "step": 2515
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.8012298941612244,
      "learning_rate": 0.00016214929755801335,
      "loss": 0.895,
      "step": 2520
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6051672101020813,
      "learning_rate": 0.00016200927891398489,
      "loss": 0.7937,
      "step": 2525
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6143134832382202,
      "learning_rate": 0.00016186906247071025,
      "loss": 0.8751,
      "step": 2530
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6904672980308533,
      "learning_rate": 0.0001617286486754565,
      "loss": 0.7504,
      "step": 2535
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5270385146141052,
      "learning_rate": 0.00016158803797612019,
      "loss": 0.8145,
      "step": 2540
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6666483879089355,
      "learning_rate": 0.00016144723082122596,
      "loss": 0.778,
      "step": 2545
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5860428214073181,
      "learning_rate": 0.0001613062276599251,
      "loss": 0.8231,
      "step": 2550
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6345640420913696,
      "learning_rate": 0.00016116502894199418,
      "loss": 0.8982,
      "step": 2555
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.590721845626831,
      "learning_rate": 0.00016102363511783362,
      "loss": 0.833,
      "step": 2560
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5986994504928589,
      "learning_rate": 0.00016088204663846595,
      "loss": 0.8326,
      "step": 2565
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5553504824638367,
      "learning_rate": 0.00016074026395553487,
      "loss": 0.7604,
      "step": 2570
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6035515666007996,
      "learning_rate": 0.00016059828752130345,
      "loss": 0.7755,
      "step": 2575
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6856073141098022,
      "learning_rate": 0.0001604561177886528,
      "loss": 0.7277,
      "step": 2580
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5378929972648621,
      "learning_rate": 0.00016031375521108066,
      "loss": 0.8081,
      "step": 2585
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6315340399742126,
      "learning_rate": 0.00016017120024269986,
      "loss": 0.9446,
      "step": 2590
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5610125660896301,
      "learning_rate": 0.00016002845333823695,
      "loss": 0.7239,
      "step": 2595
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6176686882972717,
      "learning_rate": 0.00015988551495303073,
      "loss": 0.8375,
      "step": 2600
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5788987278938293,
      "learning_rate": 0.00015974238554303076,
      "loss": 0.8433,
      "step": 2605
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6443178057670593,
      "learning_rate": 0.00015959906556479596,
      "loss": 0.9211,
      "step": 2610
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6430835723876953,
      "learning_rate": 0.00015945555547549315,
      "loss": 0.8475,
      "step": 2615
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6127652525901794,
      "learning_rate": 0.00015931185573289555,
      "loss": 0.8167,
      "step": 2620
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5990703105926514,
      "learning_rate": 0.00015916796679538134,
      "loss": 0.7095,
      "step": 2625
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6349881887435913,
      "learning_rate": 0.00015902388912193222,
      "loss": 0.8973,
      "step": 2630
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5582308173179626,
      "learning_rate": 0.0001588796231721319,
      "loss": 0.7662,
      "step": 2635
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5647494792938232,
      "learning_rate": 0.00015873516940616468,
      "loss": 0.7906,
      "step": 2640
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.630150556564331,
      "learning_rate": 0.00015859052828481394,
      "loss": 0.8536,
      "step": 2645
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6978116035461426,
      "learning_rate": 0.0001584457002694607,
      "loss": 0.7525,
      "step": 2650
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6089206337928772,
      "learning_rate": 0.00015830068582208217,
      "loss": 0.849,
      "step": 2655
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5681561231613159,
      "learning_rate": 0.0001581554854052502,
      "loss": 0.8956,
      "step": 2660
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6889328956604004,
      "learning_rate": 0.0001580100994821299,
      "loss": 0.8592,
      "step": 2665
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5611298084259033,
      "learning_rate": 0.0001578645285164781,
      "loss": 0.8436,
      "step": 2670
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6381964087486267,
      "learning_rate": 0.00015771877297264184,
      "loss": 0.7636,
      "step": 2675
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6424821615219116,
      "learning_rate": 0.00015757283331555697,
      "loss": 0.8919,
      "step": 2680
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5920271873474121,
      "learning_rate": 0.00015742671001074668,
      "loss": 0.9166,
      "step": 2685
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6440754532814026,
      "learning_rate": 0.00015728040352431982,
      "loss": 0.8435,
      "step": 2690
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6160061359405518,
      "learning_rate": 0.00015713391432296977,
      "loss": 0.7851,
      "step": 2695
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5635654926300049,
      "learning_rate": 0.00015698724287397254,
      "loss": 0.8102,
      "step": 2700
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5842839479446411,
      "learning_rate": 0.00015684038964518558,
      "loss": 0.878,
      "step": 2705
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6162332892417908,
      "learning_rate": 0.00015669335510504618,
      "loss": 0.8285,
      "step": 2710
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5869714617729187,
      "learning_rate": 0.00015654613972256997,
      "loss": 0.8949,
      "step": 2715
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6483595371246338,
      "learning_rate": 0.00015639874396734943,
      "loss": 0.739,
      "step": 2720
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6540059447288513,
      "learning_rate": 0.00015625116830955243,
      "loss": 0.9031,
      "step": 2725
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5853153467178345,
      "learning_rate": 0.00015610341321992068,
      "loss": 0.7753,
      "step": 2730
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.521800696849823,
      "learning_rate": 0.0001559554791697682,
      "loss": 0.8599,
      "step": 2735
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5266906023025513,
      "learning_rate": 0.00015580736663097996,
      "loss": 0.8546,
      "step": 2740
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.585526168346405,
      "learning_rate": 0.00015565907607601023,
      "loss": 0.7511,
      "step": 2745
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5523586273193359,
      "learning_rate": 0.00015551060797788107,
      "loss": 0.8193,
      "step": 2750
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6577056050300598,
      "learning_rate": 0.00015536196281018097,
      "loss": 0.8619,
      "step": 2755
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5628554224967957,
      "learning_rate": 0.00015521314104706318,
      "loss": 0.7435,
      "step": 2760
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5676989555358887,
      "learning_rate": 0.00015506414316324426,
      "loss": 0.8461,
      "step": 2765
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6753340363502502,
      "learning_rate": 0.0001549149696340026,
      "loss": 0.8576,
      "step": 2770
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6714786887168884,
      "learning_rate": 0.00015476562093517688,
      "loss": 0.8376,
      "step": 2775
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5616926550865173,
      "learning_rate": 0.00015461609754316446,
      "loss": 0.7985,
      "step": 2780
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5463203191757202,
      "learning_rate": 0.00015446639993492003,
      "loss": 0.844,
      "step": 2785
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6310843229293823,
      "learning_rate": 0.00015431652858795394,
      "loss": 0.8265,
      "step": 2790
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6531693935394287,
      "learning_rate": 0.00015416648398033076,
      "loss": 0.9024,
      "step": 2795
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.4960426688194275,
      "learning_rate": 0.00015401626659066774,
      "loss": 0.8993,
      "step": 2800
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6042740345001221,
      "learning_rate": 0.0001538658768981333,
      "loss": 0.8909,
      "step": 2805
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.579511284828186,
      "learning_rate": 0.00015371531538244546,
      "loss": 0.7696,
      "step": 2810
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6731118559837341,
      "learning_rate": 0.00015356458252387025,
      "loss": 0.7309,
      "step": 2815
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5301114916801453,
      "learning_rate": 0.00015341367880322042,
      "loss": 0.7494,
      "step": 2820
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.7461742162704468,
      "learning_rate": 0.00015326260470185352,
      "loss": 0.8429,
      "step": 2825
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6235002875328064,
      "learning_rate": 0.00015311136070167075,
      "loss": 0.7959,
      "step": 2830
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5418492555618286,
      "learning_rate": 0.00015295994728511532,
      "loss": 0.8112,
      "step": 2835
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6243062019348145,
      "learning_rate": 0.0001528083649351706,
      "loss": 0.8134,
      "step": 2840
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5746551156044006,
      "learning_rate": 0.00015265661413535906,
      "loss": 0.8692,
      "step": 2845
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.7506961226463318,
      "learning_rate": 0.00015250469536974042,
      "loss": 0.8399,
      "step": 2850
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6415050625801086,
      "learning_rate": 0.00015235260912291012,
      "loss": 0.7829,
      "step": 2855
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6047353148460388,
      "learning_rate": 0.00015220035587999796,
      "loss": 0.8918,
      "step": 2860
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.573059618473053,
      "learning_rate": 0.00015204793612666627,
      "loss": 0.817,
      "step": 2865
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5482955574989319,
      "learning_rate": 0.00015189535034910873,
      "loss": 0.7738,
      "step": 2870
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5009284615516663,
      "learning_rate": 0.00015174259903404845,
      "loss": 0.7723,
      "step": 2875
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5775097012519836,
      "learning_rate": 0.00015158968266873658,
      "loss": 0.804,
      "step": 2880
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5707810521125793,
      "learning_rate": 0.00015143660174095081,
      "loss": 0.7024,
      "step": 2885
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5989297032356262,
      "learning_rate": 0.00015128335673899375,
      "loss": 0.8147,
      "step": 2890
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5856032967567444,
      "learning_rate": 0.00015112994815169142,
      "loss": 0.8361,
      "step": 2895
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6352725028991699,
      "learning_rate": 0.0001509763764683915,
      "loss": 0.8242,
      "step": 2900
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.613037645816803,
      "learning_rate": 0.00015082264217896208,
      "loss": 0.9165,
      "step": 2905
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.555920422077179,
      "learning_rate": 0.00015066874577378988,
      "loss": 0.8409,
      "step": 2910
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.616894006729126,
      "learning_rate": 0.00015051468774377868,
      "loss": 0.7981,
      "step": 2915
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6403358578681946,
      "learning_rate": 0.00015036046858034796,
      "loss": 0.9592,
      "step": 2920
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6272974014282227,
      "learning_rate": 0.00015020608877543102,
      "loss": 0.7743,
      "step": 2925
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6145214438438416,
      "learning_rate": 0.00015005154882147373,
      "loss": 0.7876,
      "step": 2930
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5387482047080994,
      "learning_rate": 0.00014989684921143268,
      "loss": 0.8426,
      "step": 2935
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6402955651283264,
      "learning_rate": 0.0001497419904387738,
      "loss": 0.8531,
      "step": 2940
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6462345719337463,
      "learning_rate": 0.0001495869729974708,
      "loss": 0.8089,
      "step": 2945
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5510848760604858,
      "learning_rate": 0.00014943179738200333,
      "loss": 0.7983,
      "step": 2950
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6539138555526733,
      "learning_rate": 0.00014927646408735576,
      "loss": 0.7698,
      "step": 2955
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5367149710655212,
      "learning_rate": 0.00014912097360901533,
      "loss": 0.7783,
      "step": 2960
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5643497109413147,
      "learning_rate": 0.0001489653264429707,
      "loss": 0.7887,
      "step": 2965
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6941254138946533,
      "learning_rate": 0.0001488095230857104,
      "loss": 0.9146,
      "step": 2970
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5869201421737671,
      "learning_rate": 0.00014865356403422105,
      "loss": 0.8394,
      "step": 2975
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.8639833331108093,
      "learning_rate": 0.00014849744978598603,
      "loss": 0.7952,
      "step": 2980
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6019383668899536,
      "learning_rate": 0.00014834118083898373,
      "loss": 0.8434,
      "step": 2985
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6566647887229919,
      "learning_rate": 0.00014818475769168594,
      "loss": 0.7786,
      "step": 2990
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6752328276634216,
      "learning_rate": 0.00014802818084305646,
      "loss": 0.7453,
      "step": 2995
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5767747759819031,
      "learning_rate": 0.00014787145079254925,
      "loss": 0.8015,
      "step": 3000
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6346129775047302,
      "learning_rate": 0.00014771456804010702,
      "loss": 0.9022,
      "step": 3005
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6062515377998352,
      "learning_rate": 0.0001475575330861595,
      "loss": 0.887,
      "step": 3010
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5885165929794312,
      "learning_rate": 0.00014740034643162208,
      "loss": 0.8497,
      "step": 3015
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5770689845085144,
      "learning_rate": 0.00014724300857789385,
      "loss": 0.7701,
      "step": 3020
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6336144804954529,
      "learning_rate": 0.00014708552002685633,
      "loss": 0.7648,
      "step": 3025
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.614093005657196,
      "learning_rate": 0.00014692788128087175,
      "loss": 0.7606,
      "step": 3030
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6177241206169128,
      "learning_rate": 0.00014677009284278127,
      "loss": 0.7948,
      "step": 3035
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6105360984802246,
      "learning_rate": 0.00014661215521590375,
      "loss": 0.6969,
      "step": 3040
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5755805373191833,
      "learning_rate": 0.00014645406890403384,
      "loss": 0.912,
      "step": 3045
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6132418513298035,
      "learning_rate": 0.00014629583441144042,
      "loss": 0.8358,
      "step": 3050
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5401825308799744,
      "learning_rate": 0.00014613745224286524,
      "loss": 0.8461,
      "step": 3055
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6283666491508484,
      "learning_rate": 0.0001459789229035208,
      "loss": 0.8416,
      "step": 3060
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5837119221687317,
      "learning_rate": 0.00014582024689908932,
      "loss": 0.8082,
      "step": 3065
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6010891199111938,
      "learning_rate": 0.0001456614247357208,
      "loss": 0.7427,
      "step": 3070
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.625624418258667,
      "learning_rate": 0.00014550245692003132,
      "loss": 0.8802,
      "step": 3075
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5869577527046204,
      "learning_rate": 0.00014534334395910171,
      "loss": 0.8265,
      "step": 3080
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6369906067848206,
      "learning_rate": 0.0001451840863604758,
      "loss": 0.7906,
      "step": 3085
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5773164629936218,
      "learning_rate": 0.00014502468463215866,
      "loss": 0.7897,
      "step": 3090
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.646680474281311,
      "learning_rate": 0.00014486513928261524,
      "loss": 0.8279,
      "step": 3095
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6799820065498352,
      "learning_rate": 0.00014470545082076854,
      "loss": 0.8948,
      "step": 3100
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5638763904571533,
      "learning_rate": 0.0001445456197559981,
      "loss": 0.8377,
      "step": 3105
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5281423330307007,
      "learning_rate": 0.00014438564659813833,
      "loss": 0.8436,
      "step": 3110
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.54918372631073,
      "learning_rate": 0.00014422553185747692,
      "loss": 0.7828,
      "step": 3115
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5749617218971252,
      "learning_rate": 0.00014406527604475308,
      "loss": 0.7934,
      "step": 3120
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5960623025894165,
      "learning_rate": 0.00014390487967115619,
      "loss": 0.8148,
      "step": 3125
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5716930031776428,
      "learning_rate": 0.00014374434324832385,
      "loss": 0.9293,
      "step": 3130
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6069075465202332,
      "learning_rate": 0.00014358366728834044,
      "loss": 0.7865,
      "step": 3135
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6393450498580933,
      "learning_rate": 0.0001434228523037355,
      "loss": 0.8179,
      "step": 3140
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6741149425506592,
      "learning_rate": 0.00014326189880748186,
      "loss": 0.867,
      "step": 3145
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5840499997138977,
      "learning_rate": 0.00014310080731299443,
      "loss": 0.8286,
      "step": 3150
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5931133031845093,
      "learning_rate": 0.0001429395783341281,
      "loss": 0.9023,
      "step": 3155
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7968230843544006,
      "learning_rate": 0.00014277821238517643,
      "loss": 0.754,
      "step": 3160
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.56842041015625,
      "learning_rate": 0.00014261670998086986,
      "loss": 0.6804,
      "step": 3165
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6133948564529419,
      "learning_rate": 0.00014245507163637407,
      "loss": 0.8501,
      "step": 3170
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5714240670204163,
      "learning_rate": 0.00014229329786728839,
      "loss": 0.8027,
      "step": 3175
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.8033362627029419,
      "learning_rate": 0.00014213138918964415,
      "loss": 0.8119,
      "step": 3180
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7656739950180054,
      "learning_rate": 0.00014196934611990296,
      "loss": 0.8129,
      "step": 3185
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5456616282463074,
      "learning_rate": 0.0001418071691749552,
      "loss": 0.6827,
      "step": 3190
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5840951204299927,
      "learning_rate": 0.00014164485887211824,
      "loss": 0.67,
      "step": 3195
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7157981395721436,
      "learning_rate": 0.0001414824157291348,
      "loss": 0.861,
      "step": 3200
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6238237619400024,
      "learning_rate": 0.00014131984026417147,
      "loss": 0.8524,
      "step": 3205
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7766329646110535,
      "learning_rate": 0.00014115713299581677,
      "loss": 0.7376,
      "step": 3210
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5822386741638184,
      "learning_rate": 0.00014099429444307973,
      "loss": 0.9006,
      "step": 3215
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6553748250007629,
      "learning_rate": 0.00014083132512538815,
      "loss": 0.781,
      "step": 3220
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5938107967376709,
      "learning_rate": 0.00014066822556258693,
      "loss": 0.7423,
      "step": 3225
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6933189630508423,
      "learning_rate": 0.00014050499627493647,
      "loss": 0.8366,
      "step": 3230
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5675535202026367,
      "learning_rate": 0.00014034163778311095,
      "loss": 0.8206,
      "step": 3235
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.49278539419174194,
      "learning_rate": 0.00014017815060819665,
      "loss": 0.7769,
      "step": 3240
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5603547096252441,
      "learning_rate": 0.00014001453527169035,
      "loss": 0.815,
      "step": 3245
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6954237818717957,
      "learning_rate": 0.00013985079229549772,
      "loss": 0.8185,
      "step": 3250
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5948113799095154,
      "learning_rate": 0.00013968692220193144,
      "loss": 0.7734,
      "step": 3255
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5735976696014404,
      "learning_rate": 0.00013952292551370978,
      "loss": 0.7777,
      "step": 3260
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6098427176475525,
      "learning_rate": 0.00013935880275395482,
      "loss": 0.689,
      "step": 3265
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5526130199432373,
      "learning_rate": 0.00013919455444619074,
      "loss": 0.7506,
      "step": 3270
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.585938572883606,
      "learning_rate": 0.0001390301811143422,
      "loss": 0.794,
      "step": 3275
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.737772524356842,
      "learning_rate": 0.00013886568328273267,
      "loss": 0.8794,
      "step": 3280
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5786934494972229,
      "learning_rate": 0.00013870106147608282,
      "loss": 0.8145,
      "step": 3285
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6087589859962463,
      "learning_rate": 0.0001385363162195087,
      "loss": 0.898,
      "step": 3290
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6845444440841675,
      "learning_rate": 0.00013837144803852016,
      "loss": 0.9058,
      "step": 3295
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6652143001556396,
      "learning_rate": 0.00013820645745901916,
      "loss": 0.7903,
      "step": 3300
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5612785816192627,
      "learning_rate": 0.00013804134500729816,
      "loss": 0.8815,
      "step": 3305
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5807976126670837,
      "learning_rate": 0.00013787611121003824,
      "loss": 0.7484,
      "step": 3310
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6199260950088501,
      "learning_rate": 0.0001377107565943077,
      "loss": 0.7713,
      "step": 3315
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5932952761650085,
      "learning_rate": 0.00013754528168756006,
      "loss": 0.8288,
      "step": 3320
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5941212773323059,
      "learning_rate": 0.00013737968701763275,
      "loss": 0.8343,
      "step": 3325
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5822454690933228,
      "learning_rate": 0.00013721397311274505,
      "loss": 0.7255,
      "step": 3330
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6364741921424866,
      "learning_rate": 0.00013704814050149663,
      "loss": 0.8083,
      "step": 3335
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.707168459892273,
      "learning_rate": 0.0001368821897128659,
      "loss": 0.7845,
      "step": 3340
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6789791584014893,
      "learning_rate": 0.0001367161212762081,
      "loss": 0.8041,
      "step": 3345
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6019969582557678,
      "learning_rate": 0.00013654993572125384,
      "loss": 0.7461,
      "step": 3350
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9661110639572144,
      "learning_rate": 0.00013638363357810734,
      "loss": 0.7196,
      "step": 3355
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6087985038757324,
      "learning_rate": 0.00013621721537724458,
      "loss": 0.7691,
      "step": 3360
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.628359854221344,
      "learning_rate": 0.00013605068164951193,
      "loss": 0.8378,
      "step": 3365
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6273655891418457,
      "learning_rate": 0.00013588403292612408,
      "loss": 0.7873,
      "step": 3370
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6866421103477478,
      "learning_rate": 0.00013571726973866274,
      "loss": 0.7953,
      "step": 3375
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6045382022857666,
      "learning_rate": 0.00013555039261907453,
      "loss": 0.8285,
      "step": 3380
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5913712978363037,
      "learning_rate": 0.00013538340209966966,
      "loss": 0.775,
      "step": 3385
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.669894278049469,
      "learning_rate": 0.00013521629871311995,
      "loss": 0.7326,
      "step": 3390
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6073742508888245,
      "learning_rate": 0.00013504908299245738,
      "loss": 0.8209,
      "step": 3395
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.7493303418159485,
      "learning_rate": 0.00013488175547107215,
      "loss": 0.7391,
      "step": 3400
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5117560625076294,
      "learning_rate": 0.00013471431668271103,
      "loss": 0.7678,
      "step": 3405
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5908418297767639,
      "learning_rate": 0.00013454676716147593,
      "loss": 0.8567,
      "step": 3410
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.4934523403644562,
      "learning_rate": 0.00013437910744182178,
      "loss": 0.9218,
      "step": 3415
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6765584349632263,
      "learning_rate": 0.0001342113380585551,
      "loss": 0.7872,
      "step": 3420
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5264098048210144,
      "learning_rate": 0.0001340434595468322,
      "loss": 0.7688,
      "step": 3425
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.621438205242157,
      "learning_rate": 0.00013387547244215754,
      "loss": 0.8054,
      "step": 3430
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.8711406588554382,
      "learning_rate": 0.0001337073772803819,
      "loss": 0.8414,
      "step": 3435
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5573263764381409,
      "learning_rate": 0.00013353917459770078,
      "loss": 0.7817,
      "step": 3440
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5504346489906311,
      "learning_rate": 0.00013337086493065266,
      "loss": 0.7979,
      "step": 3445
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6491450071334839,
      "learning_rate": 0.00013320244881611726,
      "loss": 0.8133,
      "step": 3450
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5762600898742676,
      "learning_rate": 0.00013303392679131393,
      "loss": 0.7396,
      "step": 3455
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.606731116771698,
      "learning_rate": 0.00013286529939379968,
      "loss": 0.8597,
      "step": 3460
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.583909273147583,
      "learning_rate": 0.00013269656716146785,
      "loss": 0.7119,
      "step": 3465
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5781696438789368,
      "learning_rate": 0.0001325277306325461,
      "loss": 0.7583,
      "step": 3470
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6569092273712158,
      "learning_rate": 0.00013235879034559467,
      "loss": 0.7816,
      "step": 3475
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.5959500670433044,
      "learning_rate": 0.0001321897468395049,
      "loss": 0.7365,
      "step": 3480
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6647725701332092,
      "learning_rate": 0.0001320206006534974,
      "loss": 0.8132,
      "step": 3485
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.7699481248855591,
      "learning_rate": 0.00013185135232712022,
      "loss": 0.7308,
      "step": 3490
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.5117635130882263,
      "learning_rate": 0.00013168200240024728,
      "loss": 0.7804,
      "step": 3495
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6221270561218262,
      "learning_rate": 0.00013151255141307657,
      "loss": 0.8068,
      "step": 3500
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6052716374397278,
      "learning_rate": 0.0001313429999061284,
      "loss": 0.7677,
      "step": 3505
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6011590361595154,
      "learning_rate": 0.00013117334842024385,
      "loss": 0.8228,
      "step": 3510
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6260893940925598,
      "learning_rate": 0.0001310035974965828,
      "loss": 0.8167,
      "step": 3515
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.5733935832977295,
      "learning_rate": 0.0001308337476766223,
      "loss": 0.8315,
      "step": 3520
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6916394233703613,
      "learning_rate": 0.00013066379950215498,
      "loss": 0.735,
      "step": 3525
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.7169948816299438,
      "learning_rate": 0.0001304937535152871,
      "loss": 0.7433,
      "step": 3530
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.5684279203414917,
      "learning_rate": 0.00013032361025843705,
      "loss": 0.7539,
      "step": 3535
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.7120059728622437,
      "learning_rate": 0.0001301533702743333,
      "loss": 0.913,
      "step": 3540
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6004089713096619,
      "learning_rate": 0.000129983034106013,
      "loss": 0.7089,
      "step": 3545
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.5413163900375366,
      "learning_rate": 0.00012981260229682018,
      "loss": 0.7601,
      "step": 3550
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.5603556036949158,
      "learning_rate": 0.0001296420753904037,
      "loss": 0.8225,
      "step": 3555
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6073338389396667,
      "learning_rate": 0.00012947145393071608,
      "loss": 0.8175,
      "step": 3560
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5353379249572754,
      "learning_rate": 0.00012930073846201116,
      "loss": 0.7516,
      "step": 3565
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5886621475219727,
      "learning_rate": 0.00012912992952884283,
      "loss": 0.8678,
      "step": 3570
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5950932502746582,
      "learning_rate": 0.0001289590276760631,
      "loss": 0.7118,
      "step": 3575
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6099336743354797,
      "learning_rate": 0.00012878803344882028,
      "loss": 0.909,
      "step": 3580
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.662491500377655,
      "learning_rate": 0.00012861694739255746,
      "loss": 0.8293,
      "step": 3585
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.49604055285453796,
      "learning_rate": 0.00012844577005301054,
      "loss": 0.7738,
      "step": 3590
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5976107716560364,
      "learning_rate": 0.00012827450197620672,
      "loss": 0.7678,
      "step": 3595
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6851217150688171,
      "learning_rate": 0.00012810314370846252,
      "loss": 0.7946,
      "step": 3600
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6138992309570312,
      "learning_rate": 0.00012793169579638223,
      "loss": 0.7757,
      "step": 3605
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5814104080200195,
      "learning_rate": 0.00012776015878685604,
      "loss": 0.8763,
      "step": 3610
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7025929689407349,
      "learning_rate": 0.00012758853322705836,
      "loss": 0.7614,
      "step": 3615
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7081409692764282,
      "learning_rate": 0.00012741681966444609,
      "loss": 0.9598,
      "step": 3620
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5396503210067749,
      "learning_rate": 0.0001272450186467568,
      "loss": 0.7691,
      "step": 3625
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5764672160148621,
      "learning_rate": 0.0001270731307220071,
      "loss": 0.7361,
      "step": 3630
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5809180736541748,
      "learning_rate": 0.00012690115643849078,
      "loss": 0.7498,
      "step": 3635
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.8152880668640137,
      "learning_rate": 0.000126729096344777,
      "loss": 0.8225,
      "step": 3640
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6310808658599854,
      "learning_rate": 0.0001265569509897088,
      "loss": 0.9078,
      "step": 3645
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.540705144405365,
      "learning_rate": 0.00012638472092240112,
      "loss": 0.7529,
      "step": 3650
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5447615385055542,
      "learning_rate": 0.00012621240669223905,
      "loss": 0.8246,
      "step": 3655
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5487338900566101,
      "learning_rate": 0.00012604000884887634,
      "loss": 0.8896,
      "step": 3660
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5725840926170349,
      "learning_rate": 0.0001258675279422332,
      "loss": 0.8124,
      "step": 3665
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5746605396270752,
      "learning_rate": 0.00012569496452249497,
      "loss": 0.8167,
      "step": 3670
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6197704672813416,
      "learning_rate": 0.00012552231914011015,
      "loss": 0.7547,
      "step": 3675
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5929337739944458,
      "learning_rate": 0.0001253495923457887,
      "loss": 0.8175,
      "step": 3680
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6313364505767822,
      "learning_rate": 0.00012517678469050022,
      "loss": 0.8266,
      "step": 3685
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6353349685668945,
      "learning_rate": 0.00012500389672547233,
      "loss": 0.7939,
      "step": 3690
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.7956532835960388,
      "learning_rate": 0.00012483092900218872,
      "loss": 0.7978,
      "step": 3695
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6404840350151062,
      "learning_rate": 0.00012465788207238754,
      "loss": 0.8911,
      "step": 3700
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6020888686180115,
      "learning_rate": 0.00012448475648805965,
      "loss": 0.8003,
      "step": 3705
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5578533411026001,
      "learning_rate": 0.0001243115528014467,
      "loss": 0.7608,
      "step": 3710
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5616059899330139,
      "learning_rate": 0.0001241382715650396,
      "loss": 0.8111,
      "step": 3715
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5208618640899658,
      "learning_rate": 0.00012396491333157653,
      "loss": 0.7848,
      "step": 3720
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6344425082206726,
      "learning_rate": 0.00012379147865404126,
      "loss": 0.82,
      "step": 3725
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6129960417747498,
      "learning_rate": 0.00012361796808566154,
      "loss": 0.8048,
      "step": 3730
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5904735922813416,
      "learning_rate": 0.00012344438217990706,
      "loss": 0.8064,
      "step": 3735
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6212365031242371,
      "learning_rate": 0.00012327072149048785,
      "loss": 0.7793,
      "step": 3740
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5697383284568787,
      "learning_rate": 0.00012309698657135264,
      "loss": 0.8082,
      "step": 3745
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5733943581581116,
      "learning_rate": 0.00012292317797668665,
      "loss": 0.8163,
      "step": 3750
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7434484362602234,
      "learning_rate": 0.00012274929626091035,
      "loss": 0.8446,
      "step": 3755
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5016825795173645,
      "learning_rate": 0.00012257534197867743,
      "loss": 0.731,
      "step": 3760
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6584892868995667,
      "learning_rate": 0.00012240131568487292,
      "loss": 0.7483,
      "step": 3765
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5814744234085083,
      "learning_rate": 0.0001222272179346117,
      "loss": 0.718,
      "step": 3770
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7187736630439758,
      "learning_rate": 0.00012205304928323649,
      "loss": 0.8388,
      "step": 3775
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6147611141204834,
      "learning_rate": 0.00012187881028631621,
      "loss": 0.8159,
      "step": 3780
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5736737251281738,
      "learning_rate": 0.0001217045014996442,
      "loss": 0.6625,
      "step": 3785
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6499507427215576,
      "learning_rate": 0.00012153012347923634,
      "loss": 0.8721,
      "step": 3790
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6190776824951172,
      "learning_rate": 0.00012135567678132942,
      "loss": 0.7648,
      "step": 3795
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6103724837303162,
      "learning_rate": 0.0001211811619623793,
      "loss": 0.7944,
      "step": 3800
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5723038911819458,
      "learning_rate": 0.00012100657957905908,
      "loss": 0.7289,
      "step": 3805
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5608327984809875,
      "learning_rate": 0.00012083193018825744,
      "loss": 0.8117,
      "step": 3810
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5121841430664062,
      "learning_rate": 0.00012065721434707677,
      "loss": 0.9014,
      "step": 3815
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6049484014511108,
      "learning_rate": 0.00012048243261283143,
      "loss": 0.7161,
      "step": 3820
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6480629444122314,
      "learning_rate": 0.00012030758554304593,
      "loss": 0.8718,
      "step": 3825
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6752771735191345,
      "learning_rate": 0.00012013267369545329,
      "loss": 0.8241,
      "step": 3830
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6116204261779785,
      "learning_rate": 0.00011995769762799307,
      "loss": 0.8426,
      "step": 3835
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6204956769943237,
      "learning_rate": 0.00011978265789880973,
      "loss": 0.8223,
      "step": 3840
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6253282427787781,
      "learning_rate": 0.00011960755506625077,
      "loss": 0.7238,
      "step": 3845
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7981095314025879,
      "learning_rate": 0.00011943238968886492,
      "loss": 0.7958,
      "step": 3850
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.67615807056427,
      "learning_rate": 0.00011925716232540061,
      "loss": 0.8668,
      "step": 3855
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6348207592964172,
      "learning_rate": 0.0001190818735348038,
      "loss": 0.8649,
      "step": 3860
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.8550826907157898,
      "learning_rate": 0.00011890652387621643,
      "loss": 0.7417,
      "step": 3865
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.638140082359314,
      "learning_rate": 0.00011873111390897475,
      "loss": 0.8436,
      "step": 3870
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7016919255256653,
      "learning_rate": 0.00011855564419260714,
      "loss": 0.7805,
      "step": 3875
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6126405000686646,
      "learning_rate": 0.00011838011528683279,
      "loss": 0.8705,
      "step": 3880
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6007192730903625,
      "learning_rate": 0.00011820452775155957,
      "loss": 0.7607,
      "step": 3885
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.5800730586051941,
      "learning_rate": 0.00011802888214688235,
      "loss": 0.8891,
      "step": 3890
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6240999102592468,
      "learning_rate": 0.00011785317903308137,
      "loss": 0.729,
      "step": 3895
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.5820502638816833,
      "learning_rate": 0.00011767741897062017,
      "loss": 0.7398,
      "step": 3900
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7410991787910461,
      "learning_rate": 0.00011750160252014402,
      "loss": 0.7372,
      "step": 3905
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6214268207550049,
      "learning_rate": 0.00011732573024247804,
      "loss": 0.7226,
      "step": 3910
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.7160118222236633,
      "learning_rate": 0.00011714980269862538,
      "loss": 0.8079,
      "step": 3915
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6754052042961121,
      "learning_rate": 0.00011697382044976564,
      "loss": 0.7681,
      "step": 3920
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6616350412368774,
      "learning_rate": 0.00011679778405725274,
      "loss": 0.8621,
      "step": 3925
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.452217698097229,
      "learning_rate": 0.00011662169408261339,
      "loss": 0.7792,
      "step": 3930
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6552872061729431,
      "learning_rate": 0.00011644555108754517,
      "loss": 0.8509,
      "step": 3935
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6112470626831055,
      "learning_rate": 0.0001162693556339149,
      "loss": 0.852,
      "step": 3940
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6591911315917969,
      "learning_rate": 0.00011609310828375661,
      "loss": 0.8543,
      "step": 3945
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6253312826156616,
      "learning_rate": 0.00011591680959926994,
      "loss": 0.79,
      "step": 3950
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.5114595890045166,
      "learning_rate": 0.00011574046014281823,
      "loss": 0.8251,
      "step": 3955
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.5724785923957825,
      "learning_rate": 0.0001155640604769268,
      "loss": 0.8662,
      "step": 3960
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6301324963569641,
      "learning_rate": 0.00011538761116428118,
      "loss": 0.7555,
      "step": 3965
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.74462890625,
      "learning_rate": 0.00011521111276772518,
      "loss": 0.9435,
      "step": 3970
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.7058918476104736,
      "learning_rate": 0.00011503456585025918,
      "loss": 0.8144,
      "step": 3975
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.5204983949661255,
      "learning_rate": 0.00011485797097503848,
      "loss": 0.894,
      "step": 3980
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6754153370857239,
      "learning_rate": 0.00011468132870537112,
      "loss": 0.8955,
      "step": 3985
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.5927032232284546,
      "learning_rate": 0.00011450463960471651,
      "loss": 0.82,
      "step": 3990
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.5471856594085693,
      "learning_rate": 0.00011432790423668338,
      "loss": 0.7896,
      "step": 3995
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.5610169768333435,
      "learning_rate": 0.00011415112316502803,
      "loss": 0.6074,
      "step": 4000
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6560962200164795,
      "learning_rate": 0.0001139742969536526,
      "loss": 0.7843,
      "step": 4005
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6780028939247131,
      "learning_rate": 0.0001137974261666031,
      "loss": 0.851,
      "step": 4010
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6535429954528809,
      "learning_rate": 0.00011362051136806789,
      "loss": 0.7268,
      "step": 4015
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.7623457908630371,
      "learning_rate": 0.0001134435531223756,
      "loss": 0.8387,
      "step": 4020
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5498214364051819,
      "learning_rate": 0.00011326655199399345,
      "loss": 0.7551,
      "step": 4025
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6058107614517212,
      "learning_rate": 0.00011308950854752558,
      "loss": 0.765,
      "step": 4030
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5960306525230408,
      "learning_rate": 0.00011291242334771095,
      "loss": 0.8633,
      "step": 4035
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6701599359512329,
      "learning_rate": 0.00011273529695942183,
      "loss": 0.7851,
      "step": 4040
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5869520902633667,
      "learning_rate": 0.00011255812994766175,
      "loss": 0.8756,
      "step": 4045
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6017047762870789,
      "learning_rate": 0.00011238092287756397,
      "loss": 0.6697,
      "step": 4050
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.7067684531211853,
      "learning_rate": 0.00011220367631438942,
      "loss": 0.7732,
      "step": 4055
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6048575043678284,
      "learning_rate": 0.00011202639082352506,
      "loss": 0.7937,
      "step": 4060
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6676318049430847,
      "learning_rate": 0.00011184906697048201,
      "loss": 0.8011,
      "step": 4065
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5223115086555481,
      "learning_rate": 0.00011167170532089369,
      "loss": 0.7453,
      "step": 4070
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5543930530548096,
      "learning_rate": 0.00011149430644051424,
      "loss": 0.8195,
      "step": 4075
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.8007245063781738,
      "learning_rate": 0.0001113168708952164,
      "loss": 0.889,
      "step": 4080
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5854726433753967,
      "learning_rate": 0.00011113939925098997,
      "loss": 0.8129,
      "step": 4085
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.5896696448326111,
      "learning_rate": 0.00011096189207393987,
      "loss": 0.7341,
      "step": 4090
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6035794019699097,
      "learning_rate": 0.00011078434993028431,
      "loss": 0.7217,
      "step": 4095
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.5358508825302124,
      "learning_rate": 0.0001106067733863531,
      "loss": 0.767,
      "step": 4100
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6229228973388672,
      "learning_rate": 0.00011042916300858583,
      "loss": 0.7915,
      "step": 4105
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.5339498519897461,
      "learning_rate": 0.00011025151936352987,
      "loss": 0.7711,
      "step": 4110
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.8428621292114258,
      "learning_rate": 0.00011007384301783883,
      "loss": 0.8321,
      "step": 4115
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6193743348121643,
      "learning_rate": 0.00010989613453827057,
      "loss": 0.7195,
      "step": 4120
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7001969814300537,
      "learning_rate": 0.00010971839449168543,
      "loss": 0.7381,
      "step": 4125
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6056939959526062,
      "learning_rate": 0.00010954062344504458,
      "loss": 0.8131,
      "step": 4130
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7164072394371033,
      "learning_rate": 0.00010936282196540788,
      "loss": 0.8643,
      "step": 4135
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6174270510673523,
      "learning_rate": 0.00010918499061993241,
      "loss": 0.7746,
      "step": 4140
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.602906346321106,
      "learning_rate": 0.00010900712997587047,
      "loss": 0.8276,
      "step": 4145
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6625570058822632,
      "learning_rate": 0.0001088292406005678,
      "loss": 0.8349,
      "step": 4150
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6430180072784424,
      "learning_rate": 0.00010865132306146182,
      "loss": 0.911,
      "step": 4155
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6040902733802795,
      "learning_rate": 0.00010847337792607978,
      "loss": 0.7391,
      "step": 4160
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6236419081687927,
      "learning_rate": 0.00010829540576203695,
      "loss": 0.7698,
      "step": 4165
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.704279363155365,
      "learning_rate": 0.00010811740713703476,
      "loss": 0.7582,
      "step": 4170
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.5443447232246399,
      "learning_rate": 0.00010793938261885916,
      "loss": 0.6971,
      "step": 4175
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5762811303138733,
      "learning_rate": 0.00010776133277537865,
      "loss": 0.7751,
      "step": 4180
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5704807639122009,
      "learning_rate": 0.00010758325817454248,
      "loss": 0.7032,
      "step": 4185
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5983087420463562,
      "learning_rate": 0.0001074051593843789,
      "loss": 0.676,
      "step": 4190
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5941609740257263,
      "learning_rate": 0.00010722703697299328,
      "loss": 0.7671,
      "step": 4195
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6191790103912354,
      "learning_rate": 0.0001070488915085664,
      "loss": 0.7947,
      "step": 4200
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6260554194450378,
      "learning_rate": 0.00010687072355935257,
      "loss": 0.88,
      "step": 4205
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5160613059997559,
      "learning_rate": 0.00010669253369367775,
      "loss": 0.7526,
      "step": 4210
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5811184644699097,
      "learning_rate": 0.00010651432247993794,
      "loss": 0.7775,
      "step": 4215
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5606276392936707,
      "learning_rate": 0.00010633609048659705,
      "loss": 0.8119,
      "step": 4220
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6187065243721008,
      "learning_rate": 0.00010615783828218547,
      "loss": 0.8063,
      "step": 4225
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5912905931472778,
      "learning_rate": 0.0001059795664352979,
      "loss": 0.7661,
      "step": 4230
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7005530595779419,
      "learning_rate": 0.00010580127551459178,
      "loss": 0.7361,
      "step": 4235
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.58009934425354,
      "learning_rate": 0.00010562296608878545,
      "loss": 0.7438,
      "step": 4240
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7159207463264465,
      "learning_rate": 0.00010544463872665611,
      "loss": 0.8424,
      "step": 4245
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5975012183189392,
      "learning_rate": 0.00010526629399703833,
      "loss": 0.8002,
      "step": 4250
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7247589230537415,
      "learning_rate": 0.00010508793246882202,
      "loss": 0.7255,
      "step": 4255
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6801584959030151,
      "learning_rate": 0.0001049095547109506,
      "loss": 0.8668,
      "step": 4260
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.596948504447937,
      "learning_rate": 0.00010473116129241944,
      "loss": 0.7466,
      "step": 4265
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.8162465691566467,
      "learning_rate": 0.0001045527527822737,
      "loss": 0.7843,
      "step": 4270
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6837438941001892,
      "learning_rate": 0.00010437432974960674,
      "loss": 0.8472,
      "step": 4275
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6489061713218689,
      "learning_rate": 0.00010419589276355826,
      "loss": 0.8139,
      "step": 4280
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6814378499984741,
      "learning_rate": 0.00010401744239331243,
      "loss": 0.7912,
      "step": 4285
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6457976698875427,
      "learning_rate": 0.00010383897920809618,
      "loss": 0.9099,
      "step": 4290
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.547829806804657,
      "learning_rate": 0.00010366050377717722,
      "loss": 0.7389,
      "step": 4295
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6295444965362549,
      "learning_rate": 0.00010348201666986241,
      "loss": 0.8012,
      "step": 4300
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6654611825942993,
      "learning_rate": 0.00010330351845549578,
      "loss": 0.9024,
      "step": 4305
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.5944251418113708,
      "learning_rate": 0.00010312500970345688,
      "loss": 0.7721,
      "step": 4310
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6546233296394348,
      "learning_rate": 0.0001029464909831588,
      "loss": 0.7065,
      "step": 4315
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6331318020820618,
      "learning_rate": 0.00010276796286404644,
      "loss": 0.7166,
      "step": 4320
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9102218151092529,
      "learning_rate": 0.00010258942591559475,
      "loss": 0.8608,
      "step": 4325
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.5956624150276184,
      "learning_rate": 0.00010241088070730669,
      "loss": 0.7382,
      "step": 4330
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6455515623092651,
      "learning_rate": 0.00010223232780871173,
      "loss": 0.8699,
      "step": 4335
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6276233196258545,
      "learning_rate": 0.00010205376778936379,
      "loss": 0.8406,
      "step": 4340
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6347047090530396,
      "learning_rate": 0.0001018752012188395,
      "loss": 0.7482,
      "step": 4345
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6462671756744385,
      "learning_rate": 0.00010169662866673646,
      "loss": 0.7888,
      "step": 4350
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7501237392425537,
      "learning_rate": 0.00010151805070267121,
      "loss": 0.7205,
      "step": 4355
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6664027571678162,
      "learning_rate": 0.00010133946789627773,
      "loss": 0.7484,
      "step": 4360
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.5977053642272949,
      "learning_rate": 0.00010116088081720527,
      "loss": 0.7813,
      "step": 4365
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.5906143188476562,
      "learning_rate": 0.00010098229003511683,
      "loss": 0.7464,
      "step": 4370
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6777781844139099,
      "learning_rate": 0.00010080369611968723,
      "loss": 0.7871,
      "step": 4375
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6508293747901917,
      "learning_rate": 0.00010062509964060118,
      "loss": 0.7998,
      "step": 4380
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6403629779815674,
      "learning_rate": 0.00010044650116755165,
      "loss": 0.7488,
      "step": 4385
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.5627826452255249,
      "learning_rate": 0.00010026790127023793,
      "loss": 0.7261,
      "step": 4390
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.5952783823013306,
      "learning_rate": 0.0001000893005183639,
      "loss": 0.7672,
      "step": 4395
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.5621878504753113,
      "learning_rate": 9.991069948163614e-05,
      "loss": 0.7098,
      "step": 4400
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7619518041610718,
      "learning_rate": 9.97320987297621e-05,
      "loss": 0.8246,
      "step": 4405
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6683063507080078,
      "learning_rate": 9.955349883244837e-05,
      "loss": 0.7404,
      "step": 4410
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.5906397104263306,
      "learning_rate": 9.937490035939885e-05,
      "loss": 0.7627,
      "step": 4415
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6680077910423279,
      "learning_rate": 9.919630388031278e-05,
      "loss": 0.7825,
      "step": 4420
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7676994800567627,
      "learning_rate": 9.901770996488315e-05,
      "loss": 0.8636,
      "step": 4425
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7993883490562439,
      "learning_rate": 9.883911918279476e-05,
      "loss": 0.8637,
      "step": 4430
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6810579895973206,
      "learning_rate": 9.86605321037223e-05,
      "loss": 0.7691,
      "step": 4435
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6630615592002869,
      "learning_rate": 9.84819492973288e-05,
      "loss": 0.8173,
      "step": 4440
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6403682231903076,
      "learning_rate": 9.830337133326355e-05,
      "loss": 0.9035,
      "step": 4445
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.5860075354576111,
      "learning_rate": 9.81247987811605e-05,
      "loss": 0.8773,
      "step": 4450
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.5799443125724792,
      "learning_rate": 9.794623221063625e-05,
      "loss": 0.8335,
      "step": 4455
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6200720071792603,
      "learning_rate": 9.776767219128828e-05,
      "loss": 0.7708,
      "step": 4460
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6964035630226135,
      "learning_rate": 9.758911929269334e-05,
      "loss": 0.8915,
      "step": 4465
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.5871273875236511,
      "learning_rate": 9.741057408440528e-05,
      "loss": 0.7691,
      "step": 4470
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.5692684054374695,
      "learning_rate": 9.723203713595355e-05,
      "loss": 0.7232,
      "step": 4475
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6924221515655518,
      "learning_rate": 9.705350901684119e-05,
      "loss": 0.819,
      "step": 4480
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6366986036300659,
      "learning_rate": 9.687499029654314e-05,
      "loss": 0.6907,
      "step": 4485
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6119223237037659,
      "learning_rate": 9.669648154450425e-05,
      "loss": 0.8127,
      "step": 4490
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6606696248054504,
      "learning_rate": 9.651798333013762e-05,
      "loss": 0.7755,
      "step": 4495
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.566644549369812,
      "learning_rate": 9.63394962228228e-05,
      "loss": 0.9107,
      "step": 4500
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6656118035316467,
      "learning_rate": 9.616102079190382e-05,
      "loss": 0.7586,
      "step": 4505
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.5353782773017883,
      "learning_rate": 9.598255760668758e-05,
      "loss": 0.6815,
      "step": 4510
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7555747032165527,
      "learning_rate": 9.580410723644177e-05,
      "loss": 0.9274,
      "step": 4515
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.5931302905082703,
      "learning_rate": 9.562567025039327e-05,
      "loss": 0.7938,
      "step": 4520
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6406871676445007,
      "learning_rate": 9.544724721772631e-05,
      "loss": 0.8135,
      "step": 4525
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5799956321716309,
      "learning_rate": 9.526883870758056e-05,
      "loss": 0.7286,
      "step": 4530
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6960060596466064,
      "learning_rate": 9.50904452890494e-05,
      "loss": 0.8437,
      "step": 4535
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5997190475463867,
      "learning_rate": 9.491206753117803e-05,
      "loss": 0.769,
      "step": 4540
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5737845301628113,
      "learning_rate": 9.473370600296169e-05,
      "loss": 0.7596,
      "step": 4545
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7565434575080872,
      "learning_rate": 9.45553612733439e-05,
      "loss": 0.7319,
      "step": 4550
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6088534593582153,
      "learning_rate": 9.437703391121456e-05,
      "loss": 0.7568,
      "step": 4555
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6066818833351135,
      "learning_rate": 9.419872448540821e-05,
      "loss": 0.6767,
      "step": 4560
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5788107514381409,
      "learning_rate": 9.402043356470215e-05,
      "loss": 0.7041,
      "step": 4565
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7555676102638245,
      "learning_rate": 9.384216171781457e-05,
      "loss": 0.6752,
      "step": 4570
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5866352915763855,
      "learning_rate": 9.366390951340297e-05,
      "loss": 0.8096,
      "step": 4575
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5765191912651062,
      "learning_rate": 9.348567752006207e-05,
      "loss": 0.7834,
      "step": 4580
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5642898082733154,
      "learning_rate": 9.330746630632224e-05,
      "loss": 0.8147,
      "step": 4585
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7535125017166138,
      "learning_rate": 9.312927644064741e-05,
      "loss": 0.8679,
      "step": 4590
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6822525262832642,
      "learning_rate": 9.295110849143361e-05,
      "loss": 0.8211,
      "step": 4595
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6119083762168884,
      "learning_rate": 9.277296302700676e-05,
      "loss": 0.7726,
      "step": 4600
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6004748344421387,
      "learning_rate": 9.259484061562113e-05,
      "loss": 0.7189,
      "step": 4605
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5561325550079346,
      "learning_rate": 9.241674182545754e-05,
      "loss": 0.7632,
      "step": 4610
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.674069344997406,
      "learning_rate": 9.223866722462134e-05,
      "loss": 0.734,
      "step": 4615
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6486625075340271,
      "learning_rate": 9.206061738114086e-05,
      "loss": 0.7448,
      "step": 4620
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.585586428642273,
      "learning_rate": 9.188259286296528e-05,
      "loss": 0.6513,
      "step": 4625
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6953662037849426,
      "learning_rate": 9.170459423796309e-05,
      "loss": 0.7195,
      "step": 4630
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6233338117599487,
      "learning_rate": 9.152662207392024e-05,
      "loss": 0.8018,
      "step": 4635
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6842007637023926,
      "learning_rate": 9.134867693853816e-05,
      "loss": 0.7632,
      "step": 4640
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6602983474731445,
      "learning_rate": 9.117075939943221e-05,
      "loss": 0.7252,
      "step": 4645
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5861719846725464,
      "learning_rate": 9.099287002412956e-05,
      "loss": 0.6509,
      "step": 4650
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6650002002716064,
      "learning_rate": 9.081500938006761e-05,
      "loss": 0.8915,
      "step": 4655
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5285857915878296,
      "learning_rate": 9.063717803459213e-05,
      "loss": 0.7512,
      "step": 4660
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5826857089996338,
      "learning_rate": 9.045937655495544e-05,
      "loss": 0.9319,
      "step": 4665
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6033085584640503,
      "learning_rate": 9.028160550831458e-05,
      "loss": 0.7979,
      "step": 4670
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7138378620147705,
      "learning_rate": 9.010386546172949e-05,
      "loss": 0.763,
      "step": 4675
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6795177459716797,
      "learning_rate": 8.99261569821612e-05,
      "loss": 0.7867,
      "step": 4680
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6258403658866882,
      "learning_rate": 8.974848063647015e-05,
      "loss": 0.7436,
      "step": 4685
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6418249607086182,
      "learning_rate": 8.957083699141419e-05,
      "loss": 0.9125,
      "step": 4690
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5499231815338135,
      "learning_rate": 8.939322661364689e-05,
      "loss": 0.7395,
      "step": 4695
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5704962015151978,
      "learning_rate": 8.92156500697157e-05,
      "loss": 0.7699,
      "step": 4700
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6621077060699463,
      "learning_rate": 8.903810792606018e-05,
      "loss": 0.7511,
      "step": 4705
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.49777495861053467,
      "learning_rate": 8.886060074901005e-05,
      "loss": 0.6749,
      "step": 4710
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.609890878200531,
      "learning_rate": 8.868312910478362e-05,
      "loss": 0.6744,
      "step": 4715
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6777181625366211,
      "learning_rate": 8.85056935594858e-05,
      "loss": 0.8498,
      "step": 4720
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6590617895126343,
      "learning_rate": 8.832829467910631e-05,
      "loss": 0.7196,
      "step": 4725
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7174111604690552,
      "learning_rate": 8.815093302951804e-05,
      "loss": 0.7889,
      "step": 4730
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7540302276611328,
      "learning_rate": 8.797360917647498e-05,
      "loss": 0.8266,
      "step": 4735
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6515617966651917,
      "learning_rate": 8.77963236856106e-05,
      "loss": 0.8768,
      "step": 4740
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.5930551290512085,
      "learning_rate": 8.761907712243606e-05,
      "loss": 0.7096,
      "step": 4745
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6492053866386414,
      "learning_rate": 8.744187005233826e-05,
      "loss": 0.7805,
      "step": 4750
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.5830654501914978,
      "learning_rate": 8.72647030405782e-05,
      "loss": 0.8618,
      "step": 4755
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.5837696194648743,
      "learning_rate": 8.708757665228909e-05,
      "loss": 0.8436,
      "step": 4760
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6426911354064941,
      "learning_rate": 8.691049145247445e-05,
      "loss": 0.8142,
      "step": 4765
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.5991452932357788,
      "learning_rate": 8.673344800600657e-05,
      "loss": 0.8628,
      "step": 4770
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.5630876421928406,
      "learning_rate": 8.655644687762443e-05,
      "loss": 0.6788,
      "step": 4775
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.692578911781311,
      "learning_rate": 8.637948863193214e-05,
      "loss": 0.8166,
      "step": 4780
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7014926075935364,
      "learning_rate": 8.620257383339694e-05,
      "loss": 0.8809,
      "step": 4785
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.717907190322876,
      "learning_rate": 8.602570304634745e-05,
      "loss": 0.6915,
      "step": 4790
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6019871234893799,
      "learning_rate": 8.584887683497199e-05,
      "loss": 0.7664,
      "step": 4795
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5950486063957214,
      "learning_rate": 8.567209576331663e-05,
      "loss": 0.7824,
      "step": 4800
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5407964587211609,
      "learning_rate": 8.54953603952835e-05,
      "loss": 0.7581,
      "step": 4805
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6220264434814453,
      "learning_rate": 8.531867129462888e-05,
      "loss": 0.7908,
      "step": 4810
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6367149353027344,
      "learning_rate": 8.514202902496157e-05,
      "loss": 0.8258,
      "step": 4815
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5935396552085876,
      "learning_rate": 8.496543414974083e-05,
      "loss": 0.6965,
      "step": 4820
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6432780027389526,
      "learning_rate": 8.478888723227485e-05,
      "loss": 0.8545,
      "step": 4825
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6651439666748047,
      "learning_rate": 8.461238883571885e-05,
      "loss": 0.8435,
      "step": 4830
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6177381873130798,
      "learning_rate": 8.443593952307319e-05,
      "loss": 0.8476,
      "step": 4835
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.647454559803009,
      "learning_rate": 8.42595398571818e-05,
      "loss": 0.7337,
      "step": 4840
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6528860330581665,
      "learning_rate": 8.408319040073011e-05,
      "loss": 0.8445,
      "step": 4845
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5049680471420288,
      "learning_rate": 8.390689171624341e-05,
      "loss": 0.8322,
      "step": 4850
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7274385094642639,
      "learning_rate": 8.373064436608512e-05,
      "loss": 0.9198,
      "step": 4855
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6594643592834473,
      "learning_rate": 8.355444891245482e-05,
      "loss": 0.7995,
      "step": 4860
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6585742235183716,
      "learning_rate": 8.337830591738664e-05,
      "loss": 0.7419,
      "step": 4865
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7104743719100952,
      "learning_rate": 8.32022159427473e-05,
      "loss": 0.7447,
      "step": 4870
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7039633393287659,
      "learning_rate": 8.302617955023437e-05,
      "loss": 0.8402,
      "step": 4875
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5455465912818909,
      "learning_rate": 8.285019730137463e-05,
      "loss": 0.8199,
      "step": 4880
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5330617427825928,
      "learning_rate": 8.2674269757522e-05,
      "loss": 0.6672,
      "step": 4885
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6174395084381104,
      "learning_rate": 8.2498397479856e-05,
      "loss": 0.8038,
      "step": 4890
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7968682646751404,
      "learning_rate": 8.232258102937987e-05,
      "loss": 0.7963,
      "step": 4895
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6184393763542175,
      "learning_rate": 8.214682096691866e-05,
      "loss": 0.7699,
      "step": 4900
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6042989492416382,
      "learning_rate": 8.197111785311768e-05,
      "loss": 0.7207,
      "step": 4905
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6476695537567139,
      "learning_rate": 8.179547224844047e-05,
      "loss": 0.8862,
      "step": 4910
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7073302865028381,
      "learning_rate": 8.161988471316723e-05,
      "loss": 0.6839,
      "step": 4915
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7182896733283997,
      "learning_rate": 8.144435580739284e-05,
      "loss": 0.8197,
      "step": 4920
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5961290001869202,
      "learning_rate": 8.126888609102528e-05,
      "loss": 0.7861,
      "step": 4925
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.621379554271698,
      "learning_rate": 8.109347612378358e-05,
      "loss": 0.8238,
      "step": 4930
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5518468618392944,
      "learning_rate": 8.091812646519623e-05,
      "loss": 0.8338,
      "step": 4935
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.715250551700592,
      "learning_rate": 8.074283767459941e-05,
      "loss": 0.8835,
      "step": 4940
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6616021394729614,
      "learning_rate": 8.056761031113506e-05,
      "loss": 0.7993,
      "step": 4945
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6410152316093445,
      "learning_rate": 8.03924449337493e-05,
      "loss": 0.7862,
      "step": 4950
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5611416101455688,
      "learning_rate": 8.02173421011903e-05,
      "loss": 0.8749,
      "step": 4955
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.5541871786117554,
      "learning_rate": 8.004230237200694e-05,
      "loss": 0.6462,
      "step": 4960
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.594853401184082,
      "learning_rate": 7.986732630454674e-05,
      "loss": 0.8506,
      "step": 4965
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6446029543876648,
      "learning_rate": 7.969241445695406e-05,
      "loss": 0.8062,
      "step": 4970
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.5990138649940491,
      "learning_rate": 7.95175673871686e-05,
      "loss": 0.8053,
      "step": 4975
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7167456746101379,
      "learning_rate": 7.934278565292328e-05,
      "loss": 0.8316,
      "step": 4980
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8522974848747253,
      "learning_rate": 7.916806981174258e-05,
      "loss": 0.9252,
      "step": 4985
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.703740119934082,
      "learning_rate": 7.899342042094095e-05,
      "loss": 0.8845,
      "step": 4990
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6203125715255737,
      "learning_rate": 7.88188380376207e-05,
      "loss": 0.6643,
      "step": 4995
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6809445023536682,
      "learning_rate": 7.864432321867057e-05,
      "loss": 0.8035,
      "step": 5000
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.5899534821510315,
      "learning_rate": 7.846987652076372e-05,
      "loss": 0.8727,
      "step": 5005
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6520495414733887,
      "learning_rate": 7.829549850035584e-05,
      "loss": 0.792,
      "step": 5010
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.4904904067516327,
      "learning_rate": 7.812118971368383e-05,
      "loss": 0.7416,
      "step": 5015
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6947556734085083,
      "learning_rate": 7.794695071676355e-05,
      "loss": 0.7412,
      "step": 5020
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6248174905776978,
      "learning_rate": 7.777278206538832e-05,
      "loss": 0.8281,
      "step": 5025
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6942163109779358,
      "learning_rate": 7.759868431512709e-05,
      "loss": 0.8064,
      "step": 5030
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8450883626937866,
      "learning_rate": 7.742465802132262e-05,
      "loss": 0.7073,
      "step": 5035
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6221974492073059,
      "learning_rate": 7.725070373908967e-05,
      "loss": 0.7276,
      "step": 5040
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.5618094205856323,
      "learning_rate": 7.707682202331338e-05,
      "loss": 0.7289,
      "step": 5045
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.5586840510368347,
      "learning_rate": 7.690301342864739e-05,
      "loss": 0.7277,
      "step": 5050
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6269596815109253,
      "learning_rate": 7.672927850951213e-05,
      "loss": 0.8247,
      "step": 5055
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.5967304110527039,
      "learning_rate": 7.655561782009298e-05,
      "loss": 0.7651,
      "step": 5060
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.5993629097938538,
      "learning_rate": 7.638203191433848e-05,
      "loss": 0.8346,
      "step": 5065
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6899064779281616,
      "learning_rate": 7.620852134595875e-05,
      "loss": 0.8915,
      "step": 5070
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6052946448326111,
      "learning_rate": 7.60350866684235e-05,
      "loss": 0.7582,
      "step": 5075
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6006975173950195,
      "learning_rate": 7.586172843496042e-05,
      "loss": 0.7252,
      "step": 5080
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7047126293182373,
      "learning_rate": 7.568844719855328e-05,
      "loss": 0.7539,
      "step": 5085
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.529346227645874,
      "learning_rate": 7.551524351194039e-05,
      "loss": 0.7305,
      "step": 5090
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.870637834072113,
      "learning_rate": 7.534211792761248e-05,
      "loss": 0.7647,
      "step": 5095
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6663516163825989,
      "learning_rate": 7.51690709978113e-05,
      "loss": 0.8185,
      "step": 5100
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.688782811164856,
      "learning_rate": 7.49961032745277e-05,
      "loss": 0.7494,
      "step": 5105
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7706353068351746,
      "learning_rate": 7.482321530949976e-05,
      "loss": 0.825,
      "step": 5110
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6962013840675354,
      "learning_rate": 7.465040765421132e-05,
      "loss": 0.9311,
      "step": 5115
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6817395091056824,
      "learning_rate": 7.447768085988987e-05,
      "loss": 0.8245,
      "step": 5120
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.5578342080116272,
      "learning_rate": 7.430503547750505e-05,
      "loss": 0.872,
      "step": 5125
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6477236151695251,
      "learning_rate": 7.413247205776683e-05,
      "loss": 0.7653,
      "step": 5130
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6161476373672485,
      "learning_rate": 7.395999115112369e-05,
      "loss": 0.7479,
      "step": 5135
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6497325897216797,
      "learning_rate": 7.378759330776093e-05,
      "loss": 0.704,
      "step": 5140
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6440129280090332,
      "learning_rate": 7.361527907759893e-05,
      "loss": 0.7506,
      "step": 5145
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.8600013256072998,
      "learning_rate": 7.344304901029121e-05,
      "loss": 0.792,
      "step": 5150
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7030536532402039,
      "learning_rate": 7.327090365522302e-05,
      "loss": 0.8071,
      "step": 5155
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7202965617179871,
      "learning_rate": 7.309884356150924e-05,
      "loss": 0.6725,
      "step": 5160
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6689905524253845,
      "learning_rate": 7.292686927799288e-05,
      "loss": 0.7843,
      "step": 5165
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6386666297912598,
      "learning_rate": 7.275498135324322e-05,
      "loss": 0.7353,
      "step": 5170
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.8615363836288452,
      "learning_rate": 7.258318033555394e-05,
      "loss": 0.8075,
      "step": 5175
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.5758625864982605,
      "learning_rate": 7.241146677294168e-05,
      "loss": 0.7186,
      "step": 5180
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6153453588485718,
      "learning_rate": 7.2239841213144e-05,
      "loss": 0.8593,
      "step": 5185
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.728493869304657,
      "learning_rate": 7.20683042036178e-05,
      "loss": 0.7179,
      "step": 5190
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7267651557922363,
      "learning_rate": 7.189685629153749e-05,
      "loss": 0.8491,
      "step": 5195
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6467040777206421,
      "learning_rate": 7.17254980237933e-05,
      "loss": 0.7203,
      "step": 5200
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6471399664878845,
      "learning_rate": 7.155422994698948e-05,
      "loss": 0.8021,
      "step": 5205
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.5818991661071777,
      "learning_rate": 7.138305260744256e-05,
      "loss": 0.8434,
      "step": 5210
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.5848895311355591,
      "learning_rate": 7.121196655117974e-05,
      "loss": 0.7041,
      "step": 5215
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6077134609222412,
      "learning_rate": 7.104097232393691e-05,
      "loss": 0.718,
      "step": 5220
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6883412599563599,
      "learning_rate": 7.08700704711572e-05,
      "loss": 0.8343,
      "step": 5225
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7491132020950317,
      "learning_rate": 7.069926153798888e-05,
      "loss": 0.7679,
      "step": 5230
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.740841805934906,
      "learning_rate": 7.052854606928396e-05,
      "loss": 0.8734,
      "step": 5235
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.5949041247367859,
      "learning_rate": 7.03579246095963e-05,
      "loss": 0.7022,
      "step": 5240
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.8379162549972534,
      "learning_rate": 7.018739770317985e-05,
      "loss": 0.7814,
      "step": 5245
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6472388505935669,
      "learning_rate": 7.001696589398699e-05,
      "loss": 0.8437,
      "step": 5250
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6293101906776428,
      "learning_rate": 6.984662972566674e-05,
      "loss": 0.825,
      "step": 5255
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6227039694786072,
      "learning_rate": 6.967638974156299e-05,
      "loss": 0.8873,
      "step": 5260
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.5379999279975891,
      "learning_rate": 6.950624648471288e-05,
      "loss": 0.7832,
      "step": 5265
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6406290531158447,
      "learning_rate": 6.933620049784501e-05,
      "loss": 0.808,
      "step": 5270
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.5745694041252136,
      "learning_rate": 6.91662523233777e-05,
      "loss": 0.8721,
      "step": 5275
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.5971238017082214,
      "learning_rate": 6.899640250341726e-05,
      "loss": 0.8064,
      "step": 5280
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6286386847496033,
      "learning_rate": 6.882665157975617e-05,
      "loss": 0.7531,
      "step": 5285
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7099630236625671,
      "learning_rate": 6.865700009387161e-05,
      "loss": 0.8179,
      "step": 5290
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6734933853149414,
      "learning_rate": 6.848744858692344e-05,
      "loss": 0.7745,
      "step": 5295
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6301453709602356,
      "learning_rate": 6.831799759975273e-05,
      "loss": 0.7436,
      "step": 5300
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6269943118095398,
      "learning_rate": 6.814864767287978e-05,
      "loss": 0.845,
      "step": 5305
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6703981757164001,
      "learning_rate": 6.797939934650262e-05,
      "loss": 0.6949,
      "step": 5310
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6487662196159363,
      "learning_rate": 6.781025316049512e-05,
      "loss": 0.8129,
      "step": 5315
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6541668772697449,
      "learning_rate": 6.764120965440537e-05,
      "loss": 0.7955,
      "step": 5320
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.5929297208786011,
      "learning_rate": 6.747226936745394e-05,
      "loss": 0.733,
      "step": 5325
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6727938652038574,
      "learning_rate": 6.730343283853214e-05,
      "loss": 0.7599,
      "step": 5330
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6011897325515747,
      "learning_rate": 6.713470060620033e-05,
      "loss": 0.7038,
      "step": 5335
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.5427403450012207,
      "learning_rate": 6.696607320868612e-05,
      "loss": 0.6685,
      "step": 5340
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6301597952842712,
      "learning_rate": 6.679755118388275e-05,
      "loss": 0.7247,
      "step": 5345
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7286877036094666,
      "learning_rate": 6.662913506934736e-05,
      "loss": 0.7292,
      "step": 5350
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.5117044448852539,
      "learning_rate": 6.646082540229923e-05,
      "loss": 0.687,
      "step": 5355
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.5101413726806641,
      "learning_rate": 6.629262271961811e-05,
      "loss": 0.6912,
      "step": 5360
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.5790354609489441,
      "learning_rate": 6.61245275578425e-05,
      "loss": 0.7624,
      "step": 5365
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6759084463119507,
      "learning_rate": 6.595654045316782e-05,
      "loss": 0.7712,
      "step": 5370
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6083055138587952,
      "learning_rate": 6.578866194144492e-05,
      "loss": 0.7512,
      "step": 5375
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6502549648284912,
      "learning_rate": 6.562089255817823e-05,
      "loss": 0.8051,
      "step": 5380
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.5895970463752747,
      "learning_rate": 6.545323283852407e-05,
      "loss": 0.6741,
      "step": 5385
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.630634069442749,
      "learning_rate": 6.528568331728895e-05,
      "loss": 0.8695,
      "step": 5390
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6064544320106506,
      "learning_rate": 6.51182445289279e-05,
      "loss": 0.7345,
      "step": 5395
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6453479528427124,
      "learning_rate": 6.495091700754266e-05,
      "loss": 0.743,
      "step": 5400
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6371870040893555,
      "learning_rate": 6.478370128688005e-05,
      "loss": 0.7806,
      "step": 5405
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6676410436630249,
      "learning_rate": 6.461659790033038e-05,
      "loss": 0.767,
      "step": 5410
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6216074824333191,
      "learning_rate": 6.444960738092548e-05,
      "loss": 0.6315,
      "step": 5415
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6729696989059448,
      "learning_rate": 6.428273026133731e-05,
      "loss": 0.7697,
      "step": 5420
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.5996483564376831,
      "learning_rate": 6.411596707387594e-05,
      "loss": 0.7897,
      "step": 5425
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9583771824836731,
      "learning_rate": 6.39493183504881e-05,
      "loss": 0.7393,
      "step": 5430
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.62912917137146,
      "learning_rate": 6.378278462275542e-05,
      "loss": 0.8387,
      "step": 5435
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.617304801940918,
      "learning_rate": 6.361636642189269e-05,
      "loss": 0.8835,
      "step": 5440
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6186046004295349,
      "learning_rate": 6.345006427874615e-05,
      "loss": 0.6838,
      "step": 5445
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.5584391355514526,
      "learning_rate": 6.328387872379193e-05,
      "loss": 0.6263,
      "step": 5450
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.49611517786979675,
      "learning_rate": 6.311781028713414e-05,
      "loss": 0.7853,
      "step": 5455
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.5180690288543701,
      "learning_rate": 6.295185949850339e-05,
      "loss": 0.7376,
      "step": 5460
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6937770247459412,
      "learning_rate": 6.278602688725497e-05,
      "loss": 0.7639,
      "step": 5465
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6713787317276001,
      "learning_rate": 6.262031298236728e-05,
      "loss": 0.7227,
      "step": 5470
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7440084218978882,
      "learning_rate": 6.245471831243996e-05,
      "loss": 0.7153,
      "step": 5475
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6448591947555542,
      "learning_rate": 6.228924340569233e-05,
      "loss": 0.7611,
      "step": 5480
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6704011559486389,
      "learning_rate": 6.212388878996177e-05,
      "loss": 0.8411,
      "step": 5485
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6402199864387512,
      "learning_rate": 6.195865499270186e-05,
      "loss": 0.6718,
      "step": 5490
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6440407037734985,
      "learning_rate": 6.179354254098085e-05,
      "loss": 0.6699,
      "step": 5495
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6990440487861633,
      "learning_rate": 6.162855196147986e-05,
      "loss": 0.7178,
      "step": 5500
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6173884272575378,
      "learning_rate": 6.146368378049134e-05,
      "loss": 0.8273,
      "step": 5505
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6001347303390503,
      "learning_rate": 6.129893852391721e-05,
      "loss": 0.8893,
      "step": 5510
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.5927189588546753,
      "learning_rate": 6.113431671726735e-05,
      "loss": 0.7978,
      "step": 5515
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7115824222564697,
      "learning_rate": 6.0969818885657835e-05,
      "loss": 0.8715,
      "step": 5520
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6806285977363586,
      "learning_rate": 6.080544555380927e-05,
      "loss": 0.6983,
      "step": 5525
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6034073829650879,
      "learning_rate": 6.06411972460452e-05,
      "loss": 0.7861,
      "step": 5530
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6072666645050049,
      "learning_rate": 6.047707448629023e-05,
      "loss": 0.796,
      "step": 5535
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6448633670806885,
      "learning_rate": 6.0313077798068575e-05,
      "loss": 0.7532,
      "step": 5540
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6306453943252563,
      "learning_rate": 6.014920770450232e-05,
      "loss": 0.7327,
      "step": 5545
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.8060635328292847,
      "learning_rate": 5.998546472830965e-05,
      "loss": 0.7468,
      "step": 5550
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6441696286201477,
      "learning_rate": 5.9821849391803375e-05,
      "loss": 0.6766,
      "step": 5555
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7230235934257507,
      "learning_rate": 5.9658362216889095e-05,
      "loss": 0.7493,
      "step": 5560
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7482134103775024,
      "learning_rate": 5.949500372506354e-05,
      "loss": 0.6559,
      "step": 5565
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.5693408846855164,
      "learning_rate": 5.933177443741309e-05,
      "loss": 0.674,
      "step": 5570
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6371012330055237,
      "learning_rate": 5.916867487461186e-05,
      "loss": 0.7717,
      "step": 5575
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.600836455821991,
      "learning_rate": 5.900570555692029e-05,
      "loss": 0.8248,
      "step": 5580
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.748927891254425,
      "learning_rate": 5.884286700418328e-05,
      "loss": 0.7426,
      "step": 5585
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6669610142707825,
      "learning_rate": 5.8680159735828555e-05,
      "loss": 0.7964,
      "step": 5590
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6355113387107849,
      "learning_rate": 5.85175842708652e-05,
      "loss": 0.8308,
      "step": 5595
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6880905628204346,
      "learning_rate": 5.835514112788179e-05,
      "loss": 0.711,
      "step": 5600
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7189605236053467,
      "learning_rate": 5.819283082504482e-05,
      "loss": 0.7994,
      "step": 5605
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.521282970905304,
      "learning_rate": 5.8030653880097066e-05,
      "loss": 0.6762,
      "step": 5610
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6705846786499023,
      "learning_rate": 5.7868610810355896e-05,
      "loss": 0.7619,
      "step": 5615
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7209597826004028,
      "learning_rate": 5.770670213271165e-05,
      "loss": 0.8654,
      "step": 5620
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6950438022613525,
      "learning_rate": 5.7544928363625974e-05,
      "loss": 0.7806,
      "step": 5625
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7177894711494446,
      "learning_rate": 5.738329001913014e-05,
      "loss": 0.8453,
      "step": 5630
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6125010848045349,
      "learning_rate": 5.722178761482356e-05,
      "loss": 0.7242,
      "step": 5635
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.650780975818634,
      "learning_rate": 5.706042166587193e-05,
      "loss": 0.8344,
      "step": 5640
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6594287157058716,
      "learning_rate": 5.6899192687005585e-05,
      "loss": 0.8212,
      "step": 5645
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8201553225517273,
      "learning_rate": 5.673810119251814e-05,
      "loss": 0.9035,
      "step": 5650
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.5799230933189392,
      "learning_rate": 5.657714769626455e-05,
      "loss": 0.7603,
      "step": 5655
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7373571395874023,
      "learning_rate": 5.641633271165955e-05,
      "loss": 0.8696,
      "step": 5660
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6818183660507202,
      "learning_rate": 5.6255656751676143e-05,
      "loss": 0.7146,
      "step": 5665
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.640631377696991,
      "learning_rate": 5.609512032884385e-05,
      "loss": 0.8767,
      "step": 5670
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6422764658927917,
      "learning_rate": 5.5934723955246917e-05,
      "loss": 0.7608,
      "step": 5675
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.633758544921875,
      "learning_rate": 5.5774468142523104e-05,
      "loss": 0.7595,
      "step": 5680
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6388438940048218,
      "learning_rate": 5.5614353401861675e-05,
      "loss": 0.8263,
      "step": 5685
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6727738976478577,
      "learning_rate": 5.545438024400192e-05,
      "loss": 0.7927,
      "step": 5690
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6814836263656616,
      "learning_rate": 5.529454917923149e-05,
      "loss": 0.8103,
      "step": 5695
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.772027850151062,
      "learning_rate": 5.513486071738481e-05,
      "loss": 0.707,
      "step": 5700
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6700000166893005,
      "learning_rate": 5.4975315367841374e-05,
      "loss": 0.8225,
      "step": 5705
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7606449723243713,
      "learning_rate": 5.481591363952421e-05,
      "loss": 0.7547,
      "step": 5710
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6637645363807678,
      "learning_rate": 5.465665604089829e-05,
      "loss": 0.8537,
      "step": 5715
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6221731901168823,
      "learning_rate": 5.449754307996871e-05,
      "loss": 0.6966,
      "step": 5720
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6201137900352478,
      "learning_rate": 5.433857526427923e-05,
      "loss": 0.7586,
      "step": 5725
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.715356171131134,
      "learning_rate": 5.417975310091068e-05,
      "loss": 0.9557,
      "step": 5730
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7795970439910889,
      "learning_rate": 5.402107709647921e-05,
      "loss": 0.8289,
      "step": 5735
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6747726798057556,
      "learning_rate": 5.3862547757134816e-05,
      "loss": 0.6959,
      "step": 5740
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6822933554649353,
      "learning_rate": 5.370416558855955e-05,
      "loss": 0.7804,
      "step": 5745
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6174326539039612,
      "learning_rate": 5.354593109596621e-05,
      "loss": 0.8149,
      "step": 5750
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6532416939735413,
      "learning_rate": 5.338784478409628e-05,
      "loss": 0.8078,
      "step": 5755
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6764138340950012,
      "learning_rate": 5.3229907157218737e-05,
      "loss": 0.802,
      "step": 5760
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6659644842147827,
      "learning_rate": 5.307211871912828e-05,
      "loss": 0.7593,
      "step": 5765
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.8311240077018738,
      "learning_rate": 5.291447997314367e-05,
      "loss": 0.7216,
      "step": 5770
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6325226426124573,
      "learning_rate": 5.275699142210615e-05,
      "loss": 0.7994,
      "step": 5775
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.678072988986969,
      "learning_rate": 5.259965356837795e-05,
      "loss": 0.8148,
      "step": 5780
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6537745594978333,
      "learning_rate": 5.244246691384051e-05,
      "loss": 0.8272,
      "step": 5785
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6158615350723267,
      "learning_rate": 5.228543195989303e-05,
      "loss": 0.6634,
      "step": 5790
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.5966333746910095,
      "learning_rate": 5.212854920745075e-05,
      "loss": 0.733,
      "step": 5795
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6967484951019287,
      "learning_rate": 5.1971819156943545e-05,
      "loss": 0.855,
      "step": 5800
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6322739124298096,
      "learning_rate": 5.181524230831409e-05,
      "loss": 0.8315,
      "step": 5805
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6847050786018372,
      "learning_rate": 5.1658819161016294e-05,
      "loss": 0.8198,
      "step": 5810
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6578314304351807,
      "learning_rate": 5.150255021401399e-05,
      "loss": 0.8202,
      "step": 5815
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6339473128318787,
      "learning_rate": 5.134643596577897e-05,
      "loss": 0.7877,
      "step": 5820
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.5576701760292053,
      "learning_rate": 5.1190476914289645e-05,
      "loss": 0.7409,
      "step": 5825
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.082842469215393,
      "learning_rate": 5.103467355702928e-05,
      "loss": 0.6842,
      "step": 5830
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7405625581741333,
      "learning_rate": 5.087902639098472e-05,
      "loss": 0.7636,
      "step": 5835
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.5836653709411621,
      "learning_rate": 5.0723535912644294e-05,
      "loss": 0.7134,
      "step": 5840
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7886473536491394,
      "learning_rate": 5.0568202617996675e-05,
      "loss": 0.8099,
      "step": 5845
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6854684948921204,
      "learning_rate": 5.0413027002529214e-05,
      "loss": 0.7903,
      "step": 5850
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6613681316375732,
      "learning_rate": 5.025800956122619e-05,
      "loss": 0.7311,
      "step": 5855
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6116787791252136,
      "learning_rate": 5.010315078856733e-05,
      "loss": 0.7632,
      "step": 5860
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.74680095911026,
      "learning_rate": 4.99484511785263e-05,
      "loss": 0.8084,
      "step": 5865
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6365654468536377,
      "learning_rate": 4.979391122456899e-05,
      "loss": 0.6616,
      "step": 5870
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7090327143669128,
      "learning_rate": 4.9639531419652075e-05,
      "loss": 0.8372,
      "step": 5875
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7884336709976196,
      "learning_rate": 4.948531225622129e-05,
      "loss": 0.7197,
      "step": 5880
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7464672923088074,
      "learning_rate": 4.933125422621013e-05,
      "loss": 0.7625,
      "step": 5885
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.5780450701713562,
      "learning_rate": 4.9177357821037964e-05,
      "loss": 0.7627,
      "step": 5890
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6157823801040649,
      "learning_rate": 4.902362353160851e-05,
      "loss": 0.8363,
      "step": 5895
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6948485374450684,
      "learning_rate": 4.8870051848308603e-05,
      "loss": 0.7677,
      "step": 5900
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7331840395927429,
      "learning_rate": 4.871664326100625e-05,
      "loss": 0.887,
      "step": 5905
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6237366199493408,
      "learning_rate": 4.856339825904921e-05,
      "loss": 0.7934,
      "step": 5910
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6936962008476257,
      "learning_rate": 4.841031733126345e-05,
      "loss": 0.7613,
      "step": 5915
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6001924276351929,
      "learning_rate": 4.825740096595159e-05,
      "loss": 0.7355,
      "step": 5920
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.5404167771339417,
      "learning_rate": 4.8104649650891295e-05,
      "loss": 0.7993,
      "step": 5925
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.656347930431366,
      "learning_rate": 4.795206387333371e-05,
      "loss": 0.7538,
      "step": 5930
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6526111364364624,
      "learning_rate": 4.779964412000206e-05,
      "loss": 0.7349,
      "step": 5935
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7296586632728577,
      "learning_rate": 4.7647390877089884e-05,
      "loss": 0.7385,
      "step": 5940
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6850858330726624,
      "learning_rate": 4.749530463025961e-05,
      "loss": 0.7774,
      "step": 5945
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7608144283294678,
      "learning_rate": 4.734338586464096e-05,
      "loss": 0.7299,
      "step": 5950
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6235141158103943,
      "learning_rate": 4.719163506482942e-05,
      "loss": 0.734,
      "step": 5955
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6358844637870789,
      "learning_rate": 4.704005271488472e-05,
      "loss": 0.78,
      "step": 5960
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7749031186103821,
      "learning_rate": 4.6888639298329216e-05,
      "loss": 0.8022,
      "step": 5965
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.5940665006637573,
      "learning_rate": 4.673739529814653e-05,
      "loss": 0.7931,
      "step": 5970
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.674860417842865,
      "learning_rate": 4.658632119677965e-05,
      "loss": 0.8207,
      "step": 5975
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.5767741799354553,
      "learning_rate": 4.643541747612974e-05,
      "loss": 0.6727,
      "step": 5980
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6625251770019531,
      "learning_rate": 4.6284684617554555e-05,
      "loss": 0.7103,
      "step": 5985
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6491448879241943,
      "learning_rate": 4.613412310186669e-05,
      "loss": 0.7724,
      "step": 5990
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.8284974098205566,
      "learning_rate": 4.5983733409332265e-05,
      "loss": 0.8283,
      "step": 5995
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6128905415534973,
      "learning_rate": 4.5833516019669275e-05,
      "loss": 0.7541,
      "step": 6000
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.642086923122406,
      "learning_rate": 4.568347141204611e-05,
      "loss": 0.7939,
      "step": 6005
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6303368806838989,
      "learning_rate": 4.553360006508003e-05,
      "loss": 0.7202,
      "step": 6010
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.5753061771392822,
      "learning_rate": 4.538390245683555e-05,
      "loss": 0.7782,
      "step": 6015
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6998262405395508,
      "learning_rate": 4.523437906482313e-05,
      "loss": 0.723,
      "step": 6020
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.5152105689048767,
      "learning_rate": 4.508503036599743e-05,
      "loss": 0.7258,
      "step": 6025
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7657172679901123,
      "learning_rate": 4.493585683675575e-05,
      "loss": 0.7904,
      "step": 6030
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7116373777389526,
      "learning_rate": 4.478685895293685e-05,
      "loss": 0.7479,
      "step": 6035
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6210662722587585,
      "learning_rate": 4.463803718981905e-05,
      "loss": 0.7604,
      "step": 6040
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6628497242927551,
      "learning_rate": 4.448939202211896e-05,
      "loss": 0.7996,
      "step": 6045
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7364223599433899,
      "learning_rate": 4.434092392398978e-05,
      "loss": 0.8187,
      "step": 6050
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7144873142242432,
      "learning_rate": 4.4192633369020066e-05,
      "loss": 0.8995,
      "step": 6055
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7300914525985718,
      "learning_rate": 4.404452083023183e-05,
      "loss": 0.755,
      "step": 6060
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7752556800842285,
      "learning_rate": 4.389658678007933e-05,
      "loss": 0.835,
      "step": 6065
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.631405770778656,
      "learning_rate": 4.3748831690447565e-05,
      "loss": 0.769,
      "step": 6070
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6509888172149658,
      "learning_rate": 4.360125603265057e-05,
      "loss": 0.7765,
      "step": 6075
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6003565788269043,
      "learning_rate": 4.345386027743005e-05,
      "loss": 0.6942,
      "step": 6080
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7362748384475708,
      "learning_rate": 4.330664489495385e-05,
      "loss": 0.803,
      "step": 6085
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7295613288879395,
      "learning_rate": 4.315961035481445e-05,
      "loss": 0.7736,
      "step": 6090
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7188911437988281,
      "learning_rate": 4.30127571260275e-05,
      "loss": 0.7708,
      "step": 6095
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6692444086074829,
      "learning_rate": 4.286608567703024e-05,
      "loss": 0.8271,
      "step": 6100
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.8137506246566772,
      "learning_rate": 4.271959647568017e-05,
      "loss": 0.7915,
      "step": 6105
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7544528245925903,
      "learning_rate": 4.257328998925338e-05,
      "loss": 0.7633,
      "step": 6110
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7398918271064758,
      "learning_rate": 4.242716668444304e-05,
      "loss": 0.7613,
      "step": 6115
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6789968609809875,
      "learning_rate": 4.2281227027358187e-05,
      "loss": 0.8473,
      "step": 6120
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.5836973190307617,
      "learning_rate": 4.2135471483521925e-05,
      "loss": 0.7006,
      "step": 6125
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6515111327171326,
      "learning_rate": 4.198990051787012e-05,
      "loss": 0.7468,
      "step": 6130
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.5469412207603455,
      "learning_rate": 4.184451459474983e-05,
      "loss": 0.7166,
      "step": 6135
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6362948417663574,
      "learning_rate": 4.169931417791788e-05,
      "loss": 0.6851,
      "step": 6140
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7324738502502441,
      "learning_rate": 4.155429973053935e-05,
      "loss": 0.6982,
      "step": 6145
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6752232313156128,
      "learning_rate": 4.140947171518609e-05,
      "loss": 0.7343,
      "step": 6150
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7206586003303528,
      "learning_rate": 4.126483059383534e-05,
      "loss": 0.7781,
      "step": 6155
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6868919134140015,
      "learning_rate": 4.112037682786811e-05,
      "loss": 0.8156,
      "step": 6160
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.709814190864563,
      "learning_rate": 4.0976110878067783e-05,
      "loss": 0.8395,
      "step": 6165
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6500702500343323,
      "learning_rate": 4.083203320461867e-05,
      "loss": 0.7951,
      "step": 6170
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7838833332061768,
      "learning_rate": 4.068814426710447e-05,
      "loss": 0.8203,
      "step": 6175
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6506848335266113,
      "learning_rate": 4.0544444524506875e-05,
      "loss": 0.7022,
      "step": 6180
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8521379828453064,
      "learning_rate": 4.040093443520404e-05,
      "loss": 0.7542,
      "step": 6185
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.5580353736877441,
      "learning_rate": 4.025761445696929e-05,
      "loss": 0.7818,
      "step": 6190
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8889886736869812,
      "learning_rate": 4.011448504696933e-05,
      "loss": 0.7498,
      "step": 6195
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6307147145271301,
      "learning_rate": 3.997154666176306e-05,
      "loss": 0.8169,
      "step": 6200
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.740788459777832,
      "learning_rate": 3.982879975730015e-05,
      "loss": 0.6286,
      "step": 6205
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7221429347991943,
      "learning_rate": 3.9686244788919345e-05,
      "loss": 0.8662,
      "step": 6210
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.578336238861084,
      "learning_rate": 3.9543882211347206e-05,
      "loss": 0.7928,
      "step": 6215
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6942074298858643,
      "learning_rate": 3.940171247869658e-05,
      "loss": 0.7358,
      "step": 6220
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8117511868476868,
      "learning_rate": 3.925973604446517e-05,
      "loss": 0.699,
      "step": 6225
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6607906818389893,
      "learning_rate": 3.91179533615341e-05,
      "loss": 0.7651,
      "step": 6230
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.705653190612793,
      "learning_rate": 3.8976364882166414e-05,
      "loss": 0.8838,
      "step": 6235
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6348661780357361,
      "learning_rate": 3.8834971058005796e-05,
      "loss": 0.7253,
      "step": 6240
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7973907589912415,
      "learning_rate": 3.869377234007494e-05,
      "loss": 0.7247,
      "step": 6245
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7183629274368286,
      "learning_rate": 3.855276917877407e-05,
      "loss": 0.7184,
      "step": 6250
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6137698292732239,
      "learning_rate": 3.8411962023879844e-05,
      "loss": 0.8104,
      "step": 6255
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.753842294216156,
      "learning_rate": 3.827135132454351e-05,
      "loss": 0.7628,
      "step": 6260
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.658284068107605,
      "learning_rate": 3.813093752928973e-05,
      "loss": 0.7889,
      "step": 6265
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.5758240818977356,
      "learning_rate": 3.799072108601511e-05,
      "loss": 0.7978,
      "step": 6270
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6841059327125549,
      "learning_rate": 3.78507024419867e-05,
      "loss": 0.8056,
      "step": 6275
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6589455604553223,
      "learning_rate": 3.771088204384051e-05,
      "loss": 0.7676,
      "step": 6280
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.5679621696472168,
      "learning_rate": 3.757126033758028e-05,
      "loss": 0.7095,
      "step": 6285
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6563994288444519,
      "learning_rate": 3.7431837768576017e-05,
      "loss": 0.7954,
      "step": 6290
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6838070750236511,
      "learning_rate": 3.7292614781562384e-05,
      "loss": 0.8108,
      "step": 6295
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7321529984474182,
      "learning_rate": 3.715359182063748e-05,
      "loss": 0.8576,
      "step": 6300
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7272433042526245,
      "learning_rate": 3.701476932926132e-05,
      "loss": 0.7887,
      "step": 6305
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7071683406829834,
      "learning_rate": 3.68761477502545e-05,
      "loss": 0.8411,
      "step": 6310
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.5736758708953857,
      "learning_rate": 3.673772752579665e-05,
      "loss": 0.7584,
      "step": 6315
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.656456708908081,
      "learning_rate": 3.659950909742525e-05,
      "loss": 0.8634,
      "step": 6320
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6820427775382996,
      "learning_rate": 3.646149290603398e-05,
      "loss": 0.862,
      "step": 6325
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.8717942833900452,
      "learning_rate": 3.6323679391871446e-05,
      "loss": 0.7477,
      "step": 6330
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7242932915687561,
      "learning_rate": 3.6186068994539745e-05,
      "loss": 0.762,
      "step": 6335
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7470153570175171,
      "learning_rate": 3.6048662152993065e-05,
      "loss": 0.7616,
      "step": 6340
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.725097119808197,
      "learning_rate": 3.59114593055363e-05,
      "loss": 0.808,
      "step": 6345
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6063842177391052,
      "learning_rate": 3.5774460889823566e-05,
      "loss": 0.8324,
      "step": 6350
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7853096723556519,
      "learning_rate": 3.563766734285704e-05,
      "loss": 0.8145,
      "step": 6355
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6266723871231079,
      "learning_rate": 3.5501079100985254e-05,
      "loss": 0.7249,
      "step": 6360
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6518904566764832,
      "learning_rate": 3.5364696599901835e-05,
      "loss": 0.8258,
      "step": 6365
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6777641177177429,
      "learning_rate": 3.522852027464426e-05,
      "loss": 0.8226,
      "step": 6370
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6241174936294556,
      "learning_rate": 3.509255055959224e-05,
      "loss": 0.7909,
      "step": 6375
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7724244594573975,
      "learning_rate": 3.495678788846648e-05,
      "loss": 0.6656,
      "step": 6380
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6773399710655212,
      "learning_rate": 3.4821232694327224e-05,
      "loss": 0.704,
      "step": 6385
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6904076337814331,
      "learning_rate": 3.4685885409572893e-05,
      "loss": 0.7869,
      "step": 6390
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6653614044189453,
      "learning_rate": 3.455074646593876e-05,
      "loss": 0.8249,
      "step": 6395
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.871178925037384,
      "learning_rate": 3.441581629449542e-05,
      "loss": 0.8137,
      "step": 6400
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.5883834362030029,
      "learning_rate": 3.4281095325647684e-05,
      "loss": 0.798,
      "step": 6405
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6861564517021179,
      "learning_rate": 3.41465839891329e-05,
      "loss": 0.7559,
      "step": 6410
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6617395877838135,
      "learning_rate": 3.401228271401978e-05,
      "loss": 0.782,
      "step": 6415
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6430420279502869,
      "learning_rate": 3.387819192870697e-05,
      "loss": 0.7349,
      "step": 6420
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7476750612258911,
      "learning_rate": 3.374431206092168e-05,
      "loss": 0.8074,
      "step": 6425
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6503930687904358,
      "learning_rate": 3.3610643537718345e-05,
      "loss": 0.7641,
      "step": 6430
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.5740126967430115,
      "learning_rate": 3.3477186785477186e-05,
      "loss": 0.6907,
      "step": 6435
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.613012433052063,
      "learning_rate": 3.334394222990307e-05,
      "loss": 0.7404,
      "step": 6440
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7166250348091125,
      "learning_rate": 3.3210910296023776e-05,
      "loss": 0.8843,
      "step": 6445
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7031468152999878,
      "learning_rate": 3.3078091408188985e-05,
      "loss": 0.7878,
      "step": 6450
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8461940288543701,
      "learning_rate": 3.29454859900688e-05,
      "loss": 0.8152,
      "step": 6455
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7095286846160889,
      "learning_rate": 3.281309446465236e-05,
      "loss": 0.7507,
      "step": 6460
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.5577227473258972,
      "learning_rate": 3.2680917254246515e-05,
      "loss": 0.6388,
      "step": 6465
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.6764439940452576,
      "learning_rate": 3.2548954780474484e-05,
      "loss": 0.8344,
      "step": 6470
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7055917978286743,
      "learning_rate": 3.241720746427456e-05,
      "loss": 0.813,
      "step": 6475
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7273811101913452,
      "learning_rate": 3.228567572589864e-05,
      "loss": 0.8289,
      "step": 6480
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8205636739730835,
      "learning_rate": 3.215435998491102e-05,
      "loss": 0.8189,
      "step": 6485
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.5480372309684753,
      "learning_rate": 3.202326066018701e-05,
      "loss": 0.8921,
      "step": 6490
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.6462122797966003,
      "learning_rate": 3.189237816991161e-05,
      "loss": 0.7161,
      "step": 6495
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.678535521030426,
      "learning_rate": 3.176171293157798e-05,
      "loss": 0.6587,
      "step": 6500
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.6733066439628601,
      "learning_rate": 3.163126536198653e-05,
      "loss": 0.7188,
      "step": 6505
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.5796012878417969,
      "learning_rate": 3.150103587724318e-05,
      "loss": 0.7414,
      "step": 6510
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.6976703405380249,
      "learning_rate": 3.137102489275824e-05,
      "loss": 0.6708,
      "step": 6515
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7101731300354004,
      "learning_rate": 3.1241232823245026e-05,
      "loss": 0.6906,
      "step": 6520
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7651215195655823,
      "learning_rate": 3.111166008271866e-05,
      "loss": 0.8167,
      "step": 6525
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7553005218505859,
      "learning_rate": 3.098230708449445e-05,
      "loss": 0.6965,
      "step": 6530
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.6705849766731262,
      "learning_rate": 3.0853174241186865e-05,
      "loss": 0.7017,
      "step": 6535
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.6687391996383667,
      "learning_rate": 3.072426196470818e-05,
      "loss": 0.7522,
      "step": 6540
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.5514757633209229,
      "learning_rate": 3.0595570666266996e-05,
      "loss": 0.6664,
      "step": 6545
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7751938700675964,
      "learning_rate": 3.046710075636706e-05,
      "loss": 0.7551,
      "step": 6550
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6046425700187683,
      "learning_rate": 3.033885264480595e-05,
      "loss": 0.7579,
      "step": 6555
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6243056058883667,
      "learning_rate": 3.0210826740673727e-05,
      "loss": 0.7391,
      "step": 6560
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6286137700080872,
      "learning_rate": 3.0083023452351633e-05,
      "loss": 0.8608,
      "step": 6565
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6316589117050171,
      "learning_rate": 2.99554431875108e-05,
      "loss": 0.7529,
      "step": 6570
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6141951084136963,
      "learning_rate": 2.982808635311104e-05,
      "loss": 0.7676,
      "step": 6575
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7614745497703552,
      "learning_rate": 2.9700953355399386e-05,
      "loss": 0.77,
      "step": 6580
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6968342661857605,
      "learning_rate": 2.9574044599908766e-05,
      "loss": 0.836,
      "step": 6585
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6154257655143738,
      "learning_rate": 2.9447360491457033e-05,
      "loss": 0.8214,
      "step": 6590
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7290199398994446,
      "learning_rate": 2.93209014341453e-05,
      "loss": 0.7725,
      "step": 6595
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.5743769407272339,
      "learning_rate": 2.9194667831356837e-05,
      "loss": 0.7704,
      "step": 6600
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6820549964904785,
      "learning_rate": 2.9068660085755773e-05,
      "loss": 0.7054,
      "step": 6605
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.5897756814956665,
      "learning_rate": 2.894287859928577e-05,
      "loss": 0.7094,
      "step": 6610
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7251617908477783,
      "learning_rate": 2.881732377316878e-05,
      "loss": 0.9164,
      "step": 6615
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.749259889125824,
      "learning_rate": 2.8691996007903686e-05,
      "loss": 0.8373,
      "step": 6620
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.680907666683197,
      "learning_rate": 2.8566895703265217e-05,
      "loss": 0.8164,
      "step": 6625
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7473598122596741,
      "learning_rate": 2.844202325830241e-05,
      "loss": 0.8554,
      "step": 6630
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8276415467262268,
      "learning_rate": 2.831737907133751e-05,
      "loss": 0.7296,
      "step": 6635
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6819781064987183,
      "learning_rate": 2.8192963539964677e-05,
      "loss": 0.7994,
      "step": 6640
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6347323060035706,
      "learning_rate": 2.8068777061048668e-05,
      "loss": 0.7408,
      "step": 6645
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6867766976356506,
      "learning_rate": 2.794482003072364e-05,
      "loss": 0.8205,
      "step": 6650
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6473438143730164,
      "learning_rate": 2.782109284439176e-05,
      "loss": 0.7703,
      "step": 6655
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7229429483413696,
      "learning_rate": 2.7697595896722207e-05,
      "loss": 0.747,
      "step": 6660
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6416674852371216,
      "learning_rate": 2.7574329581649526e-05,
      "loss": 0.714,
      "step": 6665
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6123269200325012,
      "learning_rate": 2.7451294292372686e-05,
      "loss": 0.7289,
      "step": 6670
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7136829495429993,
      "learning_rate": 2.732849042135377e-05,
      "loss": 0.7079,
      "step": 6675
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6285436153411865,
      "learning_rate": 2.7205918360316597e-05,
      "loss": 0.7023,
      "step": 6680
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6968997716903687,
      "learning_rate": 2.7083578500245566e-05,
      "loss": 0.6881,
      "step": 6685
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9395476579666138,
      "learning_rate": 2.6961471231384417e-05,
      "loss": 0.761,
      "step": 6690
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7061296105384827,
      "learning_rate": 2.6839596943234947e-05,
      "loss": 0.8281,
      "step": 6695
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6853705644607544,
      "learning_rate": 2.671795602455578e-05,
      "loss": 0.7588,
      "step": 6700
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7715938687324524,
      "learning_rate": 2.6596548863361117e-05,
      "loss": 0.8728,
      "step": 6705
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.588621199131012,
      "learning_rate": 2.647537584691957e-05,
      "loss": 0.7497,
      "step": 6710
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7696636319160461,
      "learning_rate": 2.6354437361752848e-05,
      "loss": 0.803,
      "step": 6715
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7940670251846313,
      "learning_rate": 2.623373379363444e-05,
      "loss": 0.8015,
      "step": 6720
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.679672360420227,
      "learning_rate": 2.6113265527588648e-05,
      "loss": 0.72,
      "step": 6725
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6845086812973022,
      "learning_rate": 2.5993032947889117e-05,
      "loss": 0.6869,
      "step": 6730
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7502850890159607,
      "learning_rate": 2.5873036438057674e-05,
      "loss": 0.677,
      "step": 6735
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6978954672813416,
      "learning_rate": 2.5753276380863144e-05,
      "loss": 0.7409,
      "step": 6740
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.624478816986084,
      "learning_rate": 2.5633753158320185e-05,
      "loss": 0.8546,
      "step": 6745
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.8565587401390076,
      "learning_rate": 2.551446715168785e-05,
      "loss": 0.7651,
      "step": 6750
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7041921019554138,
      "learning_rate": 2.539541874146857e-05,
      "loss": 0.8322,
      "step": 6755
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6443396806716919,
      "learning_rate": 2.5276608307406945e-05,
      "loss": 0.7984,
      "step": 6760
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6369118094444275,
      "learning_rate": 2.5158036228488426e-05,
      "loss": 0.7303,
      "step": 6765
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9061431884765625,
      "learning_rate": 2.503970288293811e-05,
      "loss": 0.7423,
      "step": 6770
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6243317723274231,
      "learning_rate": 2.492160864821964e-05,
      "loss": 0.7388,
      "step": 6775
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.8769336938858032,
      "learning_rate": 2.480375390103389e-05,
      "loss": 0.8951,
      "step": 6780
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7571940422058105,
      "learning_rate": 2.4686139017317833e-05,
      "loss": 0.6837,
      "step": 6785
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7043862342834473,
      "learning_rate": 2.4568764372243268e-05,
      "loss": 0.6231,
      "step": 6790
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6525396108627319,
      "learning_rate": 2.4451630340215805e-05,
      "loss": 0.7283,
      "step": 6795
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.8056162595748901,
      "learning_rate": 2.433473729487341e-05,
      "loss": 0.7733,
      "step": 6800
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7512674927711487,
      "learning_rate": 2.4218085609085316e-05,
      "loss": 0.7004,
      "step": 6805
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6620556712150574,
      "learning_rate": 2.4101675654951006e-05,
      "loss": 0.7546,
      "step": 6810
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6622219085693359,
      "learning_rate": 2.3985507803798768e-05,
      "loss": 0.7237,
      "step": 6815
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.6944047212600708,
      "learning_rate": 2.3869582426184644e-05,
      "loss": 0.7919,
      "step": 6820
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8888928890228271,
      "learning_rate": 2.375389989189124e-05,
      "loss": 0.7315,
      "step": 6825
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.6875692009925842,
      "learning_rate": 2.3638460569926523e-05,
      "loss": 0.8011,
      "step": 6830
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.6747264266014099,
      "learning_rate": 2.3523264828522662e-05,
      "loss": 0.8427,
      "step": 6835
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7636617422103882,
      "learning_rate": 2.3408313035134798e-05,
      "loss": 0.8938,
      "step": 6840
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7376787066459656,
      "learning_rate": 2.3293605556440033e-05,
      "loss": 0.7261,
      "step": 6845
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.6401947736740112,
      "learning_rate": 2.3179142758336026e-05,
      "loss": 0.8163,
      "step": 6850
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.5118803977966309,
      "learning_rate": 2.3064925005939986e-05,
      "loss": 0.6642,
      "step": 6855
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7035655379295349,
      "learning_rate": 2.2950952663587498e-05,
      "loss": 0.8234,
      "step": 6860
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.5912356376647949,
      "learning_rate": 2.2837226094831278e-05,
      "loss": 0.6674,
      "step": 6865
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.6668829917907715,
      "learning_rate": 2.272374566244011e-05,
      "loss": 0.747,
      "step": 6870
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.674974262714386,
      "learning_rate": 2.2610511728397587e-05,
      "loss": 0.8882,
      "step": 6875
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8864641785621643,
      "learning_rate": 2.2497524653901146e-05,
      "loss": 0.7622,
      "step": 6880
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7429625391960144,
      "learning_rate": 2.238478479936059e-05,
      "loss": 0.7334,
      "step": 6885
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.71225905418396,
      "learning_rate": 2.2272292524397252e-05,
      "loss": 0.7476,
      "step": 6890
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7212709188461304,
      "learning_rate": 2.2160048187842742e-05,
      "loss": 0.8207,
      "step": 6895
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7402102947235107,
      "learning_rate": 2.204805214773774e-05,
      "loss": 0.8319,
      "step": 6900
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.847400426864624,
      "learning_rate": 2.193630476133087e-05,
      "loss": 0.7596,
      "step": 6905
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6712285876274109,
      "learning_rate": 2.1824806385077744e-05,
      "loss": 0.791,
      "step": 6910
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7666999697685242,
      "learning_rate": 2.1713557374639458e-05,
      "loss": 0.7542,
      "step": 6915
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9645623564720154,
      "learning_rate": 2.1602558084881796e-05,
      "loss": 0.8158,
      "step": 6920
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6309619545936584,
      "learning_rate": 2.149180886987401e-05,
      "loss": 0.7656,
      "step": 6925
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6789581775665283,
      "learning_rate": 2.1381310082887563e-05,
      "loss": 0.801,
      "step": 6930
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7283372282981873,
      "learning_rate": 2.127106207639519e-05,
      "loss": 0.8794,
      "step": 6935
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6633392572402954,
      "learning_rate": 2.116106520206952e-05,
      "loss": 0.9224,
      "step": 6940
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6747632622718811,
      "learning_rate": 2.10513198107823e-05,
      "loss": 0.789,
      "step": 6945
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.5490332841873169,
      "learning_rate": 2.0941826252602993e-05,
      "loss": 0.7228,
      "step": 6950
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6821791529655457,
      "learning_rate": 2.0832584876797723e-05,
      "loss": 0.7467,
      "step": 6955
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.5666481852531433,
      "learning_rate": 2.0723596031828295e-05,
      "loss": 0.7156,
      "step": 6960
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.631689727306366,
      "learning_rate": 2.061486006535095e-05,
      "loss": 0.7043,
      "step": 6965
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6669164896011353,
      "learning_rate": 2.0506377324215153e-05,
      "loss": 0.6862,
      "step": 6970
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6554184556007385,
      "learning_rate": 2.0398148154462826e-05,
      "loss": 0.7031,
      "step": 6975
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6377161145210266,
      "learning_rate": 2.029017290132693e-05,
      "loss": 0.7723,
      "step": 6980
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6968027353286743,
      "learning_rate": 2.0182451909230493e-05,
      "loss": 0.7634,
      "step": 6985
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6753907799720764,
      "learning_rate": 2.0074985521785495e-05,
      "loss": 0.8347,
      "step": 6990
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6418029069900513,
      "learning_rate": 1.9967774081791756e-05,
      "loss": 0.731,
      "step": 6995
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.630171537399292,
      "learning_rate": 1.9860817931235877e-05,
      "loss": 0.8453,
      "step": 7000
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7392057180404663,
      "learning_rate": 1.9754117411290096e-05,
      "loss": 0.6955,
      "step": 7005
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7004316449165344,
      "learning_rate": 1.9647672862311316e-05,
      "loss": 0.7702,
      "step": 7010
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6444839239120483,
      "learning_rate": 1.9541484623839836e-05,
      "loss": 0.7155,
      "step": 7015
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6129523515701294,
      "learning_rate": 1.9435553034598398e-05,
      "loss": 0.7409,
      "step": 7020
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.8170189261436462,
      "learning_rate": 1.9329878432491112e-05,
      "loss": 0.752,
      "step": 7025
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6246253252029419,
      "learning_rate": 1.9224461154602292e-05,
      "loss": 0.7082,
      "step": 7030
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7174789309501648,
      "learning_rate": 1.9119301537195455e-05,
      "loss": 0.8013,
      "step": 7035
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7129815220832825,
      "learning_rate": 1.901439991571221e-05,
      "loss": 0.8366,
      "step": 7040
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.696826159954071,
      "learning_rate": 1.890975662477128e-05,
      "loss": 0.6921,
      "step": 7045
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6623396277427673,
      "learning_rate": 1.8805371998167222e-05,
      "loss": 0.8734,
      "step": 7050
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7665203809738159,
      "learning_rate": 1.8701246368869563e-05,
      "loss": 0.826,
      "step": 7055
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6396629810333252,
      "learning_rate": 1.859738006902172e-05,
      "loss": 0.7085,
      "step": 7060
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6628134846687317,
      "learning_rate": 1.849377342993982e-05,
      "loss": 0.7313,
      "step": 7065
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6394834518432617,
      "learning_rate": 1.839042678211176e-05,
      "loss": 0.7605,
      "step": 7070
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6791489124298096,
      "learning_rate": 1.8287340455196068e-05,
      "loss": 0.8351,
      "step": 7075
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6268406510353088,
      "learning_rate": 1.8184514778020935e-05,
      "loss": 0.753,
      "step": 7080
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7322360873222351,
      "learning_rate": 1.80819500785831e-05,
      "loss": 0.7054,
      "step": 7085
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6677795052528381,
      "learning_rate": 1.7979646684046782e-05,
      "loss": 0.8602,
      "step": 7090
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6794170141220093,
      "learning_rate": 1.787760492074281e-05,
      "loss": 0.6957,
      "step": 7095
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7083120942115784,
      "learning_rate": 1.7775825114167344e-05,
      "loss": 0.8359,
      "step": 7100
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7114768624305725,
      "learning_rate": 1.767430758898092e-05,
      "loss": 0.8783,
      "step": 7105
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6977761387825012,
      "learning_rate": 1.7573052669007552e-05,
      "loss": 0.8449,
      "step": 7110
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7091259360313416,
      "learning_rate": 1.7472060677233503e-05,
      "loss": 0.7588,
      "step": 7115
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.8661299347877502,
      "learning_rate": 1.737133193580638e-05,
      "loss": 0.7614,
      "step": 7120
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6567466855049133,
      "learning_rate": 1.727086676603401e-05,
      "loss": 0.8487,
      "step": 7125
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.637890100479126,
      "learning_rate": 1.7170665488383597e-05,
      "loss": 0.8408,
      "step": 7130
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6123327612876892,
      "learning_rate": 1.70707284224804e-05,
      "loss": 0.7182,
      "step": 7135
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7284800410270691,
      "learning_rate": 1.697105588710698e-05,
      "loss": 0.6767,
      "step": 7140
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.8092273473739624,
      "learning_rate": 1.6871648200202127e-05,
      "loss": 0.8872,
      "step": 7145
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6525725722312927,
      "learning_rate": 1.677250567885974e-05,
      "loss": 0.8425,
      "step": 7150
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7781700491905212,
      "learning_rate": 1.667362863932792e-05,
      "loss": 0.8548,
      "step": 7155
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6901320815086365,
      "learning_rate": 1.6575017397007896e-05,
      "loss": 0.7168,
      "step": 7160
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.637550413608551,
      "learning_rate": 1.6476672266453087e-05,
      "loss": 0.8974,
      "step": 7165
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6621235609054565,
      "learning_rate": 1.6378593561368016e-05,
      "loss": 0.7816,
      "step": 7170
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6375472545623779,
      "learning_rate": 1.6280781594607364e-05,
      "loss": 0.7895,
      "step": 7175
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6751498579978943,
      "learning_rate": 1.6183236678175028e-05,
      "loss": 0.8145,
      "step": 7180
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6726610064506531,
      "learning_rate": 1.6085959123222995e-05,
      "loss": 0.7143,
      "step": 7185
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7486894130706787,
      "learning_rate": 1.5988949240050343e-05,
      "loss": 0.8543,
      "step": 7190
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6508118510246277,
      "learning_rate": 1.5892207338102494e-05,
      "loss": 0.7921,
      "step": 7195
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6627808809280396,
      "learning_rate": 1.579573372596993e-05,
      "loss": 0.8042,
      "step": 7200
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6512479782104492,
      "learning_rate": 1.5699528711387357e-05,
      "loss": 0.7605,
      "step": 7205
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6039304137229919,
      "learning_rate": 1.560359260123272e-05,
      "loss": 0.7278,
      "step": 7210
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7306320667266846,
      "learning_rate": 1.550792570152618e-05,
      "loss": 0.7868,
      "step": 7215
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7047690153121948,
      "learning_rate": 1.5412528317429197e-05,
      "loss": 0.6986,
      "step": 7220
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7422201037406921,
      "learning_rate": 1.531740075324345e-05,
      "loss": 0.6588,
      "step": 7225
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7010562419891357,
      "learning_rate": 1.5222543312410042e-05,
      "loss": 0.78,
      "step": 7230
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7242764234542847,
      "learning_rate": 1.5127956297508338e-05,
      "loss": 0.6492,
      "step": 7235
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.5822871923446655,
      "learning_rate": 1.5033640010255145e-05,
      "loss": 0.6479,
      "step": 7240
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7780561447143555,
      "learning_rate": 1.493959475150365e-05,
      "loss": 0.7035,
      "step": 7245
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7585102319717407,
      "learning_rate": 1.484582082124254e-05,
      "loss": 0.7921,
      "step": 7250
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6618209481239319,
      "learning_rate": 1.4752318518594987e-05,
      "loss": 0.6766,
      "step": 7255
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.6074077486991882,
      "learning_rate": 1.46590881418177e-05,
      "loss": 0.7541,
      "step": 7260
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7971552610397339,
      "learning_rate": 1.4566129988300093e-05,
      "loss": 0.935,
      "step": 7265
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.6890114545822144,
      "learning_rate": 1.4473444354563082e-05,
      "loss": 0.6824,
      "step": 7270
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.6999330520629883,
      "learning_rate": 1.438103153625835e-05,
      "loss": 0.7515,
      "step": 7275
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7287675738334656,
      "learning_rate": 1.4288891828167428e-05,
      "loss": 0.7248,
      "step": 7280
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.6378064155578613,
      "learning_rate": 1.4197025524200547e-05,
      "loss": 0.7629,
      "step": 7285
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.6283854842185974,
      "learning_rate": 1.4105432917395911e-05,
      "loss": 0.7033,
      "step": 7290
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7032335996627808,
      "learning_rate": 1.4014114299918612e-05,
      "loss": 0.8074,
      "step": 7295
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.6264139413833618,
      "learning_rate": 1.3923069963059821e-05,
      "loss": 0.7572,
      "step": 7300
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7607216835021973,
      "learning_rate": 1.3832300197235748e-05,
      "loss": 0.6808,
      "step": 7305
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7733168005943298,
      "learning_rate": 1.3741805291986787e-05,
      "loss": 0.7818,
      "step": 7310
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7297649383544922,
      "learning_rate": 1.3651585535976596e-05,
      "loss": 0.7182,
      "step": 7315
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7502012848854065,
      "learning_rate": 1.3561641216991162e-05,
      "loss": 0.7778,
      "step": 7320
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7216570377349854,
      "learning_rate": 1.3471972621937756e-05,
      "loss": 0.7803,
      "step": 7325
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7050399780273438,
      "learning_rate": 1.3382580036844295e-05,
      "loss": 0.8175,
      "step": 7330
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8511192798614502,
      "learning_rate": 1.3293463746858182e-05,
      "loss": 0.8151,
      "step": 7335
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7982873320579529,
      "learning_rate": 1.3204624036245505e-05,
      "loss": 0.7518,
      "step": 7340
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6801825761795044,
      "learning_rate": 1.3116061188390083e-05,
      "loss": 0.7761,
      "step": 7345
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6566172242164612,
      "learning_rate": 1.3027775485792681e-05,
      "loss": 0.7077,
      "step": 7350
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6745688319206238,
      "learning_rate": 1.2939767210069876e-05,
      "loss": 0.7668,
      "step": 7355
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.5891917943954468,
      "learning_rate": 1.285203664195338e-05,
      "loss": 0.7246,
      "step": 7360
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7064673900604248,
      "learning_rate": 1.2764584061289098e-05,
      "loss": 0.734,
      "step": 7365
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7701008915901184,
      "learning_rate": 1.267740974703614e-05,
      "loss": 0.8342,
      "step": 7370
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6421898007392883,
      "learning_rate": 1.2590513977266006e-05,
      "loss": 0.7911,
      "step": 7375
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7210098505020142,
      "learning_rate": 1.2503897029161715e-05,
      "loss": 0.8654,
      "step": 7380
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8242705464363098,
      "learning_rate": 1.2417559179016836e-05,
      "loss": 0.798,
      "step": 7385
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7865302562713623,
      "learning_rate": 1.2331500702234722e-05,
      "loss": 0.8011,
      "step": 7390
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6898683309555054,
      "learning_rate": 1.2245721873327521e-05,
      "loss": 0.8357,
      "step": 7395
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6212425827980042,
      "learning_rate": 1.2160222965915401e-05,
      "loss": 0.7448,
      "step": 7400
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7410294413566589,
      "learning_rate": 1.2075004252725619e-05,
      "loss": 0.7546,
      "step": 7405
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6324216723442078,
      "learning_rate": 1.199006600559156e-05,
      "loss": 0.774,
      "step": 7410
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8635458946228027,
      "learning_rate": 1.190540849545213e-05,
      "loss": 0.697,
      "step": 7415
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6437505483627319,
      "learning_rate": 1.1821031992350628e-05,
      "loss": 0.797,
      "step": 7420
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7493045330047607,
      "learning_rate": 1.1736936765434004e-05,
      "loss": 0.749,
      "step": 7425
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6989094614982605,
      "learning_rate": 1.1653123082951966e-05,
      "loss": 0.8166,
      "step": 7430
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7406939268112183,
      "learning_rate": 1.1569591212256237e-05,
      "loss": 0.7769,
      "step": 7435
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6417839527130127,
      "learning_rate": 1.1486341419799474e-05,
      "loss": 0.7865,
      "step": 7440
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6294339299201965,
      "learning_rate": 1.1403373971134624e-05,
      "loss": 0.7634,
      "step": 7445
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.615867018699646,
      "learning_rate": 1.1320689130914019e-05,
      "loss": 0.689,
      "step": 7450
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6019276976585388,
      "learning_rate": 1.1238287162888483e-05,
      "loss": 0.7225,
      "step": 7455
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1533637046813965,
      "learning_rate": 1.1156168329906535e-05,
      "loss": 0.7035,
      "step": 7460
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6268231272697449,
      "learning_rate": 1.1074332893913542e-05,
      "loss": 0.7861,
      "step": 7465
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7267367243766785,
      "learning_rate": 1.0992781115950868e-05,
      "loss": 0.7226,
      "step": 7470
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6262809634208679,
      "learning_rate": 1.0911513256155092e-05,
      "loss": 0.7548,
      "step": 7475
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6905179023742676,
      "learning_rate": 1.0830529573757076e-05,
      "loss": 0.7447,
      "step": 7480
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7611731290817261,
      "learning_rate": 1.074983032708129e-05,
      "loss": 0.7509,
      "step": 7485
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.708593487739563,
      "learning_rate": 1.0669415773544866e-05,
      "loss": 0.7352,
      "step": 7490
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6629523038864136,
      "learning_rate": 1.0589286169656742e-05,
      "loss": 0.7969,
      "step": 7495
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6062517166137695,
      "learning_rate": 1.0509441771017026e-05,
      "loss": 0.7943,
      "step": 7500
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.8772883415222168,
      "learning_rate": 1.0429882832316006e-05,
      "loss": 0.7385,
      "step": 7505
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7119115591049194,
      "learning_rate": 1.0350609607333384e-05,
      "loss": 0.7723,
      "step": 7510
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6540088653564453,
      "learning_rate": 1.0271622348937581e-05,
      "loss": 0.713,
      "step": 7515
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6274105906486511,
      "learning_rate": 1.0192921309084702e-05,
      "loss": 0.7161,
      "step": 7520
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6521838307380676,
      "learning_rate": 1.0114506738817942e-05,
      "loss": 0.6904,
      "step": 7525
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.5840886235237122,
      "learning_rate": 1.0036378888266663e-05,
      "loss": 0.736,
      "step": 7530
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7056083679199219,
      "learning_rate": 9.9585380066457e-06,
      "loss": 0.7663,
      "step": 7535
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7185655236244202,
      "learning_rate": 9.880984342254462e-06,
      "loss": 0.7682,
      "step": 7540
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7891202569007874,
      "learning_rate": 9.803718142476181e-06,
      "loss": 0.8107,
      "step": 7545
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8266674876213074,
      "learning_rate": 9.72673965377714e-06,
      "loss": 0.6771,
      "step": 7550
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6623797416687012,
      "learning_rate": 9.650049121705851e-06,
      "loss": 0.774,
      "step": 7555
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7579045295715332,
      "learning_rate": 9.573646790892298e-06,
      "loss": 0.8548,
      "step": 7560
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7818818688392639,
      "learning_rate": 9.497532905047202e-06,
      "loss": 0.7678,
      "step": 7565
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6764331459999084,
      "learning_rate": 9.421707706961136e-06,
      "loss": 0.7864,
      "step": 7570
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6758101582527161,
      "learning_rate": 9.34617143850378e-06,
      "loss": 0.7407,
      "step": 7575
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7731313109397888,
      "learning_rate": 9.270924340623267e-06,
      "loss": 0.6977,
      "step": 7580
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7126657366752625,
      "learning_rate": 9.195966653345255e-06,
      "loss": 0.7612,
      "step": 7585
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8116534948348999,
      "learning_rate": 9.121298615772256e-06,
      "loss": 0.85,
      "step": 7590
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7227126359939575,
      "learning_rate": 9.04692046608281e-06,
      "loss": 0.7488,
      "step": 7595
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7809383273124695,
      "learning_rate": 8.972832441530876e-06,
      "loss": 0.8398,
      "step": 7600
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7499304413795471,
      "learning_rate": 8.899034778444804e-06,
      "loss": 0.7438,
      "step": 7605
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7327824234962463,
      "learning_rate": 8.825527712226833e-06,
      "loss": 0.8292,
      "step": 7610
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7105828523635864,
      "learning_rate": 8.752311477352259e-06,
      "loss": 0.6284,
      "step": 7615
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7823231220245361,
      "learning_rate": 8.679386307368631e-06,
      "loss": 0.7742,
      "step": 7620
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.6463223099708557,
      "learning_rate": 8.606752434895061e-06,
      "loss": 0.6789,
      "step": 7625
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.6511633396148682,
      "learning_rate": 8.53441009162148e-06,
      "loss": 0.706,
      "step": 7630
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.6864632368087769,
      "learning_rate": 8.462359508307882e-06,
      "loss": 0.7008,
      "step": 7635
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7921543717384338,
      "learning_rate": 8.390600914783598e-06,
      "loss": 0.7399,
      "step": 7640
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.6895166039466858,
      "learning_rate": 8.319134539946549e-06,
      "loss": 0.8166,
      "step": 7645
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7610728144645691,
      "learning_rate": 8.247960611762562e-06,
      "loss": 0.7897,
      "step": 7650
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.566939115524292,
      "learning_rate": 8.177079357264583e-06,
      "loss": 0.744,
      "step": 7655
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7656039595603943,
      "learning_rate": 8.10649100255194e-06,
      "loss": 0.8224,
      "step": 7660
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7149258852005005,
      "learning_rate": 8.036195772789734e-06,
      "loss": 0.76,
      "step": 7665
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7486068606376648,
      "learning_rate": 7.966193892208007e-06,
      "loss": 0.718,
      "step": 7670
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.674845278263092,
      "learning_rate": 7.896485584101066e-06,
      "loss": 0.7689,
      "step": 7675
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.74075847864151,
      "learning_rate": 7.827071070826775e-06,
      "loss": 0.8496,
      "step": 7680
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.6453339457511902,
      "learning_rate": 7.757950573805839e-06,
      "loss": 0.7359,
      "step": 7685
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7150562405586243,
      "learning_rate": 7.689124313521112e-06,
      "loss": 0.7677,
      "step": 7690
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.6305781602859497,
      "learning_rate": 7.620592509516844e-06,
      "loss": 0.7111,
      "step": 7695
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.639238178730011,
      "learning_rate": 7.5523553803980795e-06,
      "loss": 0.7908,
      "step": 7700
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6141027808189392,
      "learning_rate": 7.48441314382986e-06,
      "loss": 0.7716,
      "step": 7705
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.732420802116394,
      "learning_rate": 7.416766016536569e-06,
      "loss": 0.7698,
      "step": 7710
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7163684368133545,
      "learning_rate": 7.349414214301243e-06,
      "loss": 0.7367,
      "step": 7715
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7079850435256958,
      "learning_rate": 7.282357951964902e-06,
      "loss": 0.7766,
      "step": 7720
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6404287815093994,
      "learning_rate": 7.215597443425815e-06,
      "loss": 0.7758,
      "step": 7725
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.66849285364151,
      "learning_rate": 7.149132901638844e-06,
      "loss": 0.7765,
      "step": 7730
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6282253265380859,
      "learning_rate": 7.082964538614823e-06,
      "loss": 0.7917,
      "step": 7735
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7129911184310913,
      "learning_rate": 7.017092565419747e-06,
      "loss": 0.7986,
      "step": 7740
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6497909426689148,
      "learning_rate": 6.951517192174195e-06,
      "loss": 0.7719,
      "step": 7745
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7094148397445679,
      "learning_rate": 6.88623862805271e-06,
      "loss": 0.7931,
      "step": 7750
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.5958633422851562,
      "learning_rate": 6.821257081282972e-06,
      "loss": 0.8135,
      "step": 7755
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6889587044715881,
      "learning_rate": 6.756572759145285e-06,
      "loss": 0.7241,
      "step": 7760
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6314862370491028,
      "learning_rate": 6.6921858679718345e-06,
      "loss": 0.598,
      "step": 7765
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6266872882843018,
      "learning_rate": 6.62809661314604e-06,
      "loss": 0.8213,
      "step": 7770
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8507253527641296,
      "learning_rate": 6.564305199101939e-06,
      "loss": 0.8546,
      "step": 7775
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6322931051254272,
      "learning_rate": 6.500811829323461e-06,
      "loss": 0.8104,
      "step": 7780
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.5790343880653381,
      "learning_rate": 6.4376167063438965e-06,
      "loss": 0.6864,
      "step": 7785
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7141791582107544,
      "learning_rate": 6.3747200317451294e-06,
      "loss": 0.7184,
      "step": 7790
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7355352640151978,
      "learning_rate": 6.3121220061570065e-06,
      "loss": 0.7836,
      "step": 7795
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6871746778488159,
      "learning_rate": 6.249822829256835e-06,
      "loss": 0.7652,
      "step": 7800
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8354716897010803,
      "learning_rate": 6.1878226997685525e-06,
      "loss": 0.7652,
      "step": 7805
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6610546708106995,
      "learning_rate": 6.1261218154622264e-06,
      "loss": 0.7605,
      "step": 7810
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6841945648193359,
      "learning_rate": 6.064720373153365e-06,
      "loss": 0.6656,
      "step": 7815
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7112994194030762,
      "learning_rate": 6.003618568702351e-06,
      "loss": 0.7717,
      "step": 7820
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6608765721321106,
      "learning_rate": 5.942816597013712e-06,
      "loss": 0.7229,
      "step": 7825
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6349248886108398,
      "learning_rate": 5.882314652035581e-06,
      "loss": 0.7397,
      "step": 7830
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7428655624389648,
      "learning_rate": 5.822112926759071e-06,
      "loss": 0.7451,
      "step": 7835
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6516420245170593,
      "learning_rate": 5.7622116132176495e-06,
      "loss": 0.6874,
      "step": 7840
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6775025129318237,
      "learning_rate": 5.7026109024864716e-06,
      "loss": 0.8085,
      "step": 7845
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6347958445549011,
      "learning_rate": 5.643310984681882e-06,
      "loss": 0.7767,
      "step": 7850
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7689727544784546,
      "learning_rate": 5.5843120489607045e-06,
      "loss": 0.7772,
      "step": 7855
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6171656250953674,
      "learning_rate": 5.525614283519697e-06,
      "loss": 0.7042,
      "step": 7860
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7996339797973633,
      "learning_rate": 5.46721787559491e-06,
      "loss": 0.8328,
      "step": 7865
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9136884212493896,
      "learning_rate": 5.409123011461159e-06,
      "loss": 0.7864,
      "step": 7870
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7024335265159607,
      "learning_rate": 5.351329876431377e-06,
      "loss": 0.7926,
      "step": 7875
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.8358083367347717,
      "learning_rate": 5.293838654855965e-06,
      "loss": 0.7301,
      "step": 7880
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.6557454466819763,
      "learning_rate": 5.236649530122361e-06,
      "loss": 0.8171,
      "step": 7885
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7085033059120178,
      "learning_rate": 5.17976268465431e-06,
      "loss": 0.8429,
      "step": 7890
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7088157534599304,
      "learning_rate": 5.123178299911357e-06,
      "loss": 0.8049,
      "step": 7895
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7027326822280884,
      "learning_rate": 5.0668965563882235e-06,
      "loss": 0.8121,
      "step": 7900
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.5936857461929321,
      "learning_rate": 5.0109176336142984e-06,
      "loss": 0.6958,
      "step": 7905
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7407312989234924,
      "learning_rate": 4.95524171015298e-06,
      "loss": 0.6585,
      "step": 7910
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.8003597855567932,
      "learning_rate": 4.899868963601173e-06,
      "loss": 0.6724,
      "step": 7915
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.6756457090377808,
      "learning_rate": 4.844799570588699e-06,
      "loss": 0.722,
      "step": 7920
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7574281096458435,
      "learning_rate": 4.79003370677773e-06,
      "loss": 0.8335,
      "step": 7925
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.71590656042099,
      "learning_rate": 4.735571546862217e-06,
      "loss": 0.7708,
      "step": 7930
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.5873979330062866,
      "learning_rate": 4.681413264567358e-06,
      "loss": 0.6377,
      "step": 7935
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7385571599006653,
      "learning_rate": 4.627559032649031e-06,
      "loss": 0.7705,
      "step": 7940
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.5803728103637695,
      "learning_rate": 4.574009022893255e-06,
      "loss": 0.7057,
      "step": 7945
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.6368845105171204,
      "learning_rate": 4.520763406115592e-06,
      "loss": 0.7599,
      "step": 7950
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7460334897041321,
      "learning_rate": 4.467822352160722e-06,
      "loss": 0.7941,
      "step": 7955
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6857527494430542,
      "learning_rate": 4.415186029901719e-06,
      "loss": 0.7451,
      "step": 7960
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7004684209823608,
      "learning_rate": 4.362854607239652e-06,
      "loss": 0.7884,
      "step": 7965
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7447669506072998,
      "learning_rate": 4.310828251103072e-06,
      "loss": 0.7734,
      "step": 7970
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6418143510818481,
      "learning_rate": 4.259107127447348e-06,
      "loss": 0.7259,
      "step": 7975
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.680642306804657,
      "learning_rate": 4.20769140125421e-06,
      "loss": 0.7835,
      "step": 7980
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6749563217163086,
      "learning_rate": 4.156581236531265e-06,
      "loss": 0.7962,
      "step": 7985
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6090324521064758,
      "learning_rate": 4.1057767963113895e-06,
      "loss": 0.7743,
      "step": 7990
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.5747905969619751,
      "learning_rate": 4.055278242652272e-06,
      "loss": 0.7332,
      "step": 7995
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.59056156873703,
      "learning_rate": 4.00508573663585e-06,
      "loss": 0.796,
      "step": 8000
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7366644740104675,
      "learning_rate": 3.955199438367874e-06,
      "loss": 0.6988,
      "step": 8005
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8058913946151733,
      "learning_rate": 3.905619506977287e-06,
      "loss": 0.7612,
      "step": 8010
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.696850061416626,
      "learning_rate": 3.85634610061576e-06,
      "loss": 0.7504,
      "step": 8015
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.636525571346283,
      "learning_rate": 3.807379376457276e-06,
      "loss": 0.8115,
      "step": 8020
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6842799186706543,
      "learning_rate": 3.7587194906974934e-06,
      "loss": 0.7669,
      "step": 8025
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.665556788444519,
      "learning_rate": 3.7103665985533275e-06,
      "loss": 0.8176,
      "step": 8030
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7209974527359009,
      "learning_rate": 3.662320854262413e-06,
      "loss": 0.7631,
      "step": 8035
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6841431856155396,
      "learning_rate": 3.61458241108269e-06,
      "loss": 0.8565,
      "step": 8040
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6740476489067078,
      "learning_rate": 3.567151421291781e-06,
      "loss": 0.7445,
      "step": 8045
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6454640626907349,
      "learning_rate": 3.5200280361866287e-06,
      "loss": 0.7506,
      "step": 8050
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.696540355682373,
      "learning_rate": 3.473212406082993e-06,
      "loss": 0.7865,
      "step": 8055
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6765589118003845,
      "learning_rate": 3.426704680314896e-06,
      "loss": 0.7273,
      "step": 8060
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7231236100196838,
      "learning_rate": 3.3805050072342246e-06,
      "loss": 0.7769,
      "step": 8065
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7834519147872925,
      "learning_rate": 3.334613534210218e-06,
      "loss": 0.7718,
      "step": 8070
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7390478253364563,
      "learning_rate": 3.2890304076290122e-06,
      "loss": 0.7633,
      "step": 8075
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6524391174316406,
      "learning_rate": 3.2437557728931643e-06,
      "loss": 0.7352,
      "step": 8080
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7138876914978027,
      "learning_rate": 3.1987897744212068e-06,
      "loss": 0.7841,
      "step": 8085
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6502974033355713,
      "learning_rate": 3.1541325556471713e-06,
      "loss": 0.8611,
      "step": 8090
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7619243860244751,
      "learning_rate": 3.1097842590201433e-06,
      "loss": 0.8266,
      "step": 8095
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6988756060600281,
      "learning_rate": 3.06574502600373e-06,
      "loss": 0.7378,
      "step": 8100
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6371370553970337,
      "learning_rate": 3.0220149970757947e-06,
      "loss": 0.7703,
      "step": 8105
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6489354372024536,
      "learning_rate": 2.9785943117277893e-06,
      "loss": 0.7766,
      "step": 8110
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.757959246635437,
      "learning_rate": 2.9354831084644652e-06,
      "loss": 0.8003,
      "step": 8115
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7314863204956055,
      "learning_rate": 2.8926815248033533e-06,
      "loss": 0.8064,
      "step": 8120
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6922609210014343,
      "learning_rate": 2.8501896972743748e-06,
      "loss": 0.6391,
      "step": 8125
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7072837352752686,
      "learning_rate": 2.8080077614193513e-06,
      "loss": 0.6651,
      "step": 8130
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6854616403579712,
      "learning_rate": 2.766135851791607e-06,
      "loss": 0.8429,
      "step": 8135
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6851674318313599,
      "learning_rate": 2.724574101955557e-06,
      "loss": 0.7833,
      "step": 8140
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7848556041717529,
      "learning_rate": 2.6833226444862526e-06,
      "loss": 0.7674,
      "step": 8145
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6435510516166687,
      "learning_rate": 2.6423816109689357e-06,
      "loss": 0.6399,
      "step": 8150
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6673296689987183,
      "learning_rate": 2.6017511319986752e-06,
      "loss": 0.7375,
      "step": 8155
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7312006950378418,
      "learning_rate": 2.56143133717992e-06,
      "loss": 0.7741,
      "step": 8160
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6996206641197205,
      "learning_rate": 2.5214223551260686e-06,
      "loss": 0.7002,
      "step": 8165
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7395245432853699,
      "learning_rate": 2.481724313459111e-06,
      "loss": 0.7389,
      "step": 8170
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.8081804513931274,
      "learning_rate": 2.4423373388091753e-06,
      "loss": 0.7366,
      "step": 8175
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7896558046340942,
      "learning_rate": 2.4032615568141183e-06,
      "loss": 0.7466,
      "step": 8180
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6015843152999878,
      "learning_rate": 2.3644970921191445e-06,
      "loss": 0.701,
      "step": 8185
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.8499304056167603,
      "learning_rate": 2.326044068376465e-06,
      "loss": 0.7778,
      "step": 8190
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.5991072058677673,
      "learning_rate": 2.287902608244774e-06,
      "loss": 0.7927,
      "step": 8195
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6677555441856384,
      "learning_rate": 2.250072833388972e-06,
      "loss": 0.7374,
      "step": 8200
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1476563215255737,
      "learning_rate": 2.2125548644797323e-06,
      "loss": 0.6659,
      "step": 8205
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6608490347862244,
      "learning_rate": 2.1753488211931016e-06,
      "loss": 0.8061,
      "step": 8210
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7551729679107666,
      "learning_rate": 2.1384548222101342e-06,
      "loss": 0.7791,
      "step": 8215
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7040895819664001,
      "learning_rate": 2.1018729852165574e-06,
      "loss": 0.7368,
      "step": 8220
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.8961385488510132,
      "learning_rate": 2.065603426902296e-06,
      "loss": 0.8227,
      "step": 8225
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.5928700566291809,
      "learning_rate": 2.0296462629611934e-06,
      "loss": 0.7019,
      "step": 8230
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6719995737075806,
      "learning_rate": 1.994001608090612e-06,
      "loss": 0.6937,
      "step": 8235
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6805534362792969,
      "learning_rate": 1.9586695759910233e-06,
      "loss": 0.6968,
      "step": 8240
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6675357818603516,
      "learning_rate": 1.92365027936573e-06,
      "loss": 0.79,
      "step": 8245
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7165231704711914,
      "learning_rate": 1.888943829920431e-06,
      "loss": 0.7975,
      "step": 8250
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.663772463798523,
      "learning_rate": 1.8545503383629147e-06,
      "loss": 0.64,
      "step": 8255
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6648406982421875,
      "learning_rate": 1.8204699144026893e-06,
      "loss": 0.7558,
      "step": 8260
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.707249641418457,
      "learning_rate": 1.7867026667505725e-06,
      "loss": 0.8406,
      "step": 8265
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6816351413726807,
      "learning_rate": 1.7532487031184819e-06,
      "loss": 0.8215,
      "step": 8270
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7135679721832275,
      "learning_rate": 1.720108130218967e-06,
      "loss": 0.7175,
      "step": 8275
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6640385389328003,
      "learning_rate": 1.6872810537649331e-06,
      "loss": 0.7476,
      "step": 8280
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6773399114608765,
      "learning_rate": 1.6547675784692517e-06,
      "loss": 0.6793,
      "step": 8285
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6559042930603027,
      "learning_rate": 1.6225678080444951e-06,
      "loss": 0.7233,
      "step": 8290
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.8715654015541077,
      "learning_rate": 1.5906818452025463e-06,
      "loss": 0.8084,
      "step": 8295
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6095733046531677,
      "learning_rate": 1.5591097916543006e-06,
      "loss": 0.6426,
      "step": 8300
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.669862687587738,
      "learning_rate": 1.5278517481093436e-06,
      "loss": 0.6873,
      "step": 8305
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6891058683395386,
      "learning_rate": 1.4969078142756277e-06,
      "loss": 0.8274,
      "step": 8310
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6286734938621521,
      "learning_rate": 1.4662780888591076e-06,
      "loss": 0.7603,
      "step": 8315
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6344169974327087,
      "learning_rate": 1.4359626695635176e-06,
      "loss": 0.7516,
      "step": 8320
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7188591361045837,
      "learning_rate": 1.405961653089971e-06,
      "loss": 0.7821,
      "step": 8325
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6320931911468506,
      "learning_rate": 1.3762751351367064e-06,
      "loss": 0.8255,
      "step": 8330
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6263067126274109,
      "learning_rate": 1.3469032103987534e-06,
      "loss": 0.8558,
      "step": 8335
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7200011014938354,
      "learning_rate": 1.317845972567655e-06,
      "loss": 0.7277,
      "step": 8340
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8542875647544861,
      "learning_rate": 1.289103514331147e-06,
      "loss": 0.7618,
      "step": 8345
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6276410818099976,
      "learning_rate": 1.2606759273728564e-06,
      "loss": 0.7309,
      "step": 8350
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7522603273391724,
      "learning_rate": 1.2325633023720695e-06,
      "loss": 0.7636,
      "step": 8355
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7600675225257874,
      "learning_rate": 1.204765729003332e-06,
      "loss": 0.8992,
      "step": 8360
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6711683869361877,
      "learning_rate": 1.1772832959362933e-06,
      "loss": 0.806,
      "step": 8365
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.5430208444595337,
      "learning_rate": 1.150116090835307e-06,
      "loss": 0.7333,
      "step": 8370
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7371984720230103,
      "learning_rate": 1.1232642003592197e-06,
      "loss": 0.772,
      "step": 8375
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6525667905807495,
      "learning_rate": 1.096727710161094e-06,
      "loss": 0.7436,
      "step": 8380
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7685186862945557,
      "learning_rate": 1.070506704887886e-06,
      "loss": 0.829,
      "step": 8385
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6457349061965942,
      "learning_rate": 1.0446012681802343e-06,
      "loss": 0.7674,
      "step": 8390
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.5994265079498291,
      "learning_rate": 1.0190114826721497e-06,
      "loss": 0.7031,
      "step": 8395
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7038202881813049,
      "learning_rate": 9.937374299907931e-07,
      "loss": 0.7039,
      "step": 8400
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.593492329120636,
      "learning_rate": 9.687791907561527e-07,
      "loss": 0.6959,
      "step": 8405
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.60867840051651,
      "learning_rate": 9.441368445808451e-07,
      "loss": 0.6178,
      "step": 8410
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7278956174850464,
      "learning_rate": 9.198104700698595e-07,
      "loss": 0.8133,
      "step": 8415
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0709000825881958,
      "learning_rate": 8.958001448202357e-07,
      "loss": 0.912,
      "step": 8420
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7433140277862549,
      "learning_rate": 8.721059454209424e-07,
      "loss": 0.7567,
      "step": 8425
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7910411953926086,
      "learning_rate": 8.487279474524989e-07,
      "loss": 0.8209,
      "step": 8430
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6875026226043701,
      "learning_rate": 8.256662254867986e-07,
      "loss": 0.7594,
      "step": 8435
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8132088780403137,
      "learning_rate": 8.029208530869081e-07,
      "loss": 0.9099,
      "step": 8440
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7695709466934204,
      "learning_rate": 7.804919028067681e-07,
      "loss": 0.7659,
      "step": 8445
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8422206044197083,
      "learning_rate": 7.58379446190971e-07,
      "loss": 0.7784,
      "step": 8450
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6463632583618164,
      "learning_rate": 7.365835537745725e-07,
      "loss": 0.7772,
      "step": 8455
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6620475649833679,
      "learning_rate": 7.151042950828246e-07,
      "loss": 0.7338,
      "step": 8460
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6927075386047363,
      "learning_rate": 6.939417386309766e-07,
      "loss": 0.7169,
      "step": 8465
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6566410064697266,
      "learning_rate": 6.730959519240409e-07,
      "loss": 0.7196,
      "step": 8470
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6217153072357178,
      "learning_rate": 6.525670014566166e-07,
      "loss": 0.6497,
      "step": 8475
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.6876888871192932,
      "learning_rate": 6.323549527126216e-07,
      "loss": 0.7666,
      "step": 8480
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.736946702003479,
      "learning_rate": 6.124598701651052e-07,
      "loss": 0.8501,
      "step": 8485
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.5957123041152954,
      "learning_rate": 5.928818172760697e-07,
      "loss": 0.7375,
      "step": 8490
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6869728565216064,
      "learning_rate": 5.736208564962265e-07,
      "loss": 0.7024,
      "step": 8495
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7970830798149109,
      "learning_rate": 5.546770492648401e-07,
      "loss": 0.7898,
      "step": 8500
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7111850380897522,
      "learning_rate": 5.360504560094736e-07,
      "loss": 0.7083,
      "step": 8505
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6761876344680786,
      "learning_rate": 5.177411361458661e-07,
      "loss": 0.7067,
      "step": 8510
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6835991144180298,
      "learning_rate": 4.997491480776773e-07,
      "loss": 0.695,
      "step": 8515
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6578661203384399,
      "learning_rate": 4.82074549196343e-07,
      "loss": 0.7607,
      "step": 8520
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.684698224067688,
      "learning_rate": 4.6471739588089814e-07,
      "loss": 0.7665,
      "step": 8525
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7736423015594482,
      "learning_rate": 4.476777434977653e-07,
      "loss": 0.8594,
      "step": 8530
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6832173466682434,
      "learning_rate": 4.30955646400566e-07,
      "loss": 0.6579,
      "step": 8535
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.731162965297699,
      "learning_rate": 4.14551157930021e-07,
      "loss": 0.8462,
      "step": 8540
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7852889895439148,
      "learning_rate": 3.984643304136948e-07,
      "loss": 0.8533,
      "step": 8545
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.8519006967544556,
      "learning_rate": 3.826952151658958e-07,
      "loss": 0.6815,
      "step": 8550
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6394432783126831,
      "learning_rate": 3.6724386248745415e-07,
      "loss": 0.7208,
      "step": 8555
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.602070152759552,
      "learning_rate": 3.5211032166561077e-07,
      "loss": 0.7506,
      "step": 8560
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6670829653739929,
      "learning_rate": 3.372946409738398e-07,
      "loss": 0.7156,
      "step": 8565
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7526081204414368,
      "learning_rate": 3.2279686767168196e-07,
      "loss": 0.8094,
      "step": 8570
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.68255215883255,
      "learning_rate": 3.086170480046113e-07,
      "loss": 0.7417,
      "step": 8575
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.6421539187431335,
      "learning_rate": 2.947552272038911e-07,
      "loss": 0.7067,
      "step": 8580
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7133721113204956,
      "learning_rate": 2.812114494864182e-07,
      "loss": 0.7699,
      "step": 8585
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7376065254211426,
      "learning_rate": 2.679857580545786e-07,
      "loss": 0.8313,
      "step": 8590
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7178567051887512,
      "learning_rate": 2.550781950961034e-07,
      "loss": 0.7967,
      "step": 8595
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7155488133430481,
      "learning_rate": 2.42488801783991e-07,
      "loss": 0.7509,
      "step": 8600
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7433626055717468,
      "learning_rate": 2.3021761827628496e-07,
      "loss": 0.7372,
      "step": 8605
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7234955430030823,
      "learning_rate": 2.182646837160185e-07,
      "loss": 0.6896,
      "step": 8610
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.790782630443573,
      "learning_rate": 2.0663003623105914e-07,
      "loss": 0.7696,
      "step": 8615
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.852401614189148,
      "learning_rate": 1.953137129339977e-07,
      "loss": 0.7851,
      "step": 8620
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7792094349861145,
      "learning_rate": 1.8431574992199275e-07,
      "loss": 0.7594,
      "step": 8625
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7497335076332092,
      "learning_rate": 1.7363618227672628e-07,
      "loss": 0.7123,
      "step": 8630
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.612318217754364,
      "learning_rate": 1.632750440642261e-07,
      "loss": 0.7277,
      "step": 8635
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.6249239444732666,
      "learning_rate": 1.5323236833479916e-07,
      "loss": 0.7046,
      "step": 8640
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.5679818987846375,
      "learning_rate": 1.4350818712292048e-07,
      "loss": 0.6912,
      "step": 8645
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.6267695426940918,
      "learning_rate": 1.3410253144707785e-07,
      "loss": 0.6389,
      "step": 8650
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7306914925575256,
      "learning_rate": 1.2501543130974959e-07,
      "loss": 0.7294,
      "step": 8655
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7982718348503113,
      "learning_rate": 1.162469156972712e-07,
      "loss": 0.753,
      "step": 8660
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.658090353012085,
      "learning_rate": 1.0779701257974672e-07,
      "loss": 0.6063,
      "step": 8665
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6100469827651978,
      "learning_rate": 9.96657489109487e-08,
      "loss": 0.8564,
      "step": 8670
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1241860389709473,
      "learning_rate": 9.185315062826272e-08,
      "loss": 0.7727,
      "step": 8675
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6518518328666687,
      "learning_rate": 8.435924265256523e-08,
      "loss": 0.7119,
      "step": 8680
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6026988625526428,
      "learning_rate": 7.718404888816811e-08,
      "loss": 0.7094,
      "step": 8685
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6683136820793152,
      "learning_rate": 7.032759222274087e-08,
      "loss": 0.7913,
      "step": 8690
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6700591444969177,
      "learning_rate": 6.378989452724416e-08,
      "loss": 0.794,
      "step": 8695
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6720278859138489,
      "learning_rate": 5.757097665584077e-08,
      "loss": 0.6934,
      "step": 8700
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7080987691879272,
      "learning_rate": 5.1670858445829195e-08,
      "loss": 0.8342,
      "step": 8705
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7693775296211243,
      "learning_rate": 4.6089558717610226e-08,
      "loss": 0.7174,
      "step": 8710
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.664599597454071,
      "learning_rate": 4.082709527459816e-08,
      "loss": 0.7675,
      "step": 8715
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6770430207252502,
      "learning_rate": 3.588348490317639e-08,
      "loss": 0.7371,
      "step": 8720
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7018636465072632,
      "learning_rate": 3.125874337261969e-08,
      "loss": 0.7199,
      "step": 8725
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6643563508987427,
      "learning_rate": 2.6952885435105323e-08,
      "loss": 0.6487,
      "step": 8730
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6865366697311401,
      "learning_rate": 2.2965924825579797e-08,
      "loss": 0.7733,
      "step": 8735
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6612882614135742,
      "learning_rate": 1.9297874261792193e-08,
      "loss": 0.737,
      "step": 8740
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.62270188331604,
      "learning_rate": 1.5948745444216428e-08,
      "loss": 0.7636,
      "step": 8745
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6600854396820068,
      "learning_rate": 1.2918549056006867e-08,
      "loss": 0.6694,
      "step": 8750
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7353793382644653,
      "learning_rate": 1.0207294762987208e-08,
      "loss": 0.7041,
      "step": 8755
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7038155794143677,
      "learning_rate": 7.81499121359497e-09,
      "loss": 0.689,
      "step": 8760
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.8333893418312073,
      "learning_rate": 5.7416460388926004e-09,
      "loss": 0.8068,
      "step": 8765
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.6727174520492554,
      "learning_rate": 3.9872658525008655e-09,
      "loss": 0.7088,
      "step": 8770
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.6914416551589966,
      "learning_rate": 2.5518562505988386e-09,
      "loss": 0.6869,
      "step": 8775
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.6037778258323669,
      "learning_rate": 1.435421811901705e-09,
      "loss": 0.7188,
      "step": 8780
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7393903732299805,
      "learning_rate": 6.37966097649656e-10,
      "loss": 0.7448,
      "step": 8785
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.6320146322250366,
      "learning_rate": 1.5949165159678813e-10,
      "loss": 0.7623,
      "step": 8790
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.6219347715377808,
      "learning_rate": 0.0,
      "loss": 0.7163,
      "step": 8795
    },
    {
      "epoch": 1.0,
      "step": 8795,
      "total_flos": 4.313969195378278e+16,
      "train_loss": 0.0,
      "train_runtime": 0.0081,
      "train_samples_per_second": 1092292.541,
      "train_steps_per_second": 1092292.541
    }
  ],
  "logging_steps": 5,
  "max_steps": 8795,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "total_flos": 4.313969195378278e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}