{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 11838,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "grad_norm": 0.7875872850418091,
      "learning_rate": 0.0001999999119654754,
      "loss": 1.7457,
      "step": 5
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4465181827545166,
      "learning_rate": 0.00019999964786205653,
      "loss": 1.5438,
      "step": 10
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5769105553627014,
      "learning_rate": 0.00019999920769020845,
      "loss": 1.4644,
      "step": 15
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5974473357200623,
      "learning_rate": 0.00019999859145070615,
      "loss": 1.2951,
      "step": 20
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5096644163131714,
      "learning_rate": 0.00019999779914463462,
      "loss": 1.1236,
      "step": 25
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.49290400743484497,
      "learning_rate": 0.0001999968307733889,
      "loss": 1.0382,
      "step": 30
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.39245930314064026,
      "learning_rate": 0.000199995686338674,
      "loss": 1.1353,
      "step": 35
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6507896184921265,
      "learning_rate": 0.00019999436584250483,
      "loss": 1.1689,
      "step": 40
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5615530014038086,
      "learning_rate": 0.00019999286928720647,
      "loss": 1.1778,
      "step": 45
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5074531435966492,
      "learning_rate": 0.00019999119667541386,
      "loss": 1.1218,
      "step": 50
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5020948648452759,
      "learning_rate": 0.00019998934801007193,
      "loss": 1.0081,
      "step": 55
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5447856783866882,
      "learning_rate": 0.00019998732329443562,
      "loss": 1.0737,
      "step": 60
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4655187129974365,
      "learning_rate": 0.00019998512253206982,
      "loss": 1.1562,
      "step": 65
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5390958189964294,
      "learning_rate": 0.0001999827457268494,
      "loss": 1.0221,
      "step": 70
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.47219589352607727,
      "learning_rate": 0.00019998019288295922,
      "loss": 1.1061,
      "step": 75
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5623837113380432,
      "learning_rate": 0.00019997746400489397,
      "loss": 1.0978,
      "step": 80
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4483198821544647,
      "learning_rate": 0.00019997455909745844,
      "loss": 1.1252,
      "step": 85
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5483498573303223,
      "learning_rate": 0.00019997147816576717,
      "loss": 1.1272,
      "step": 90
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5134523510932922,
      "learning_rate": 0.00019996822121524485,
      "loss": 1.1525,
      "step": 95
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.51983642578125,
      "learning_rate": 0.00019996478825162585,
      "loss": 1.3265,
      "step": 100
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5366414189338684,
      "learning_rate": 0.00019996117928095463,
      "loss": 1.0426,
      "step": 105
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5997601747512817,
      "learning_rate": 0.00019995739430958545,
      "loss": 1.1507,
      "step": 110
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4979299008846283,
      "learning_rate": 0.00019995343334418245,
      "loss": 1.048,
      "step": 115
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4658752381801605,
      "learning_rate": 0.0001999492963917197,
      "loss": 1.1218,
      "step": 120
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5081388354301453,
      "learning_rate": 0.00019994498345948108,
      "loss": 1.0929,
      "step": 125
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5739812850952148,
      "learning_rate": 0.00019994049455506033,
      "loss": 1.1785,
      "step": 130
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5141093134880066,
      "learning_rate": 0.00019993582968636097,
      "loss": 0.9686,
      "step": 135
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4595348834991455,
      "learning_rate": 0.0001999309888615965,
      "loss": 1.1072,
      "step": 140
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.44480034708976746,
      "learning_rate": 0.00019992597208929,
      "loss": 1.0815,
      "step": 145
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.583330512046814,
      "learning_rate": 0.00019992077937827456,
      "loss": 1.1567,
      "step": 150
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.42396825551986694,
      "learning_rate": 0.00019991541073769283,
      "loss": 1.0406,
      "step": 155
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.46554988622665405,
      "learning_rate": 0.0001999098661769974,
      "loss": 1.0028,
      "step": 160
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.41337451338768005,
      "learning_rate": 0.0001999041457059505,
      "loss": 1.0591,
      "step": 165
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.47287315130233765,
      "learning_rate": 0.0001998982493346241,
      "loss": 1.0964,
      "step": 170
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5164761543273926,
      "learning_rate": 0.0001998921770733999,
      "loss": 1.0439,
      "step": 175
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7466127872467041,
      "learning_rate": 0.00019988592893296927,
      "loss": 0.9453,
      "step": 180
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5365219712257385,
      "learning_rate": 0.00019987950492433325,
      "loss": 0.9862,
      "step": 185
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.43139711022377014,
      "learning_rate": 0.0001998729050588025,
      "loss": 1.0837,
      "step": 190
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.49741968512535095,
      "learning_rate": 0.0001998661293479974,
      "loss": 0.9904,
      "step": 195
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5120337009429932,
      "learning_rate": 0.00019985917780384786,
      "loss": 1.0652,
      "step": 200
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5979030728340149,
      "learning_rate": 0.00019985205043859336,
      "loss": 1.0907,
      "step": 205
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7089561223983765,
      "learning_rate": 0.00019984474726478303,
      "loss": 1.1065,
      "step": 210
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5356090068817139,
      "learning_rate": 0.00019983726829527547,
      "loss": 1.027,
      "step": 215
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5716044902801514,
      "learning_rate": 0.00019982961354323887,
      "loss": 1.0789,
      "step": 220
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4484700560569763,
      "learning_rate": 0.00019982178302215082,
      "loss": 1.0366,
      "step": 225
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5676905512809753,
      "learning_rate": 0.00019981377674579845,
      "loss": 0.9822,
      "step": 230
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6980101466178894,
      "learning_rate": 0.00019980559472827843,
      "loss": 1.143,
      "step": 235
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5166971683502197,
      "learning_rate": 0.00019979723698399665,
      "loss": 1.0075,
      "step": 240
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5025323629379272,
      "learning_rate": 0.00019978870352766853,
      "loss": 0.944,
      "step": 245
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.49045535922050476,
      "learning_rate": 0.0001997799943743189,
      "loss": 1.033,
      "step": 250
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5475656986236572,
      "learning_rate": 0.00019977110953928182,
      "loss": 0.9334,
      "step": 255
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5707181096076965,
      "learning_rate": 0.0001997620490382008,
      "loss": 0.9527,
      "step": 260
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5227631330490112,
      "learning_rate": 0.0001997528128870285,
      "loss": 1.0197,
      "step": 265
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5836647152900696,
      "learning_rate": 0.00019974340110202697,
      "loss": 1.0444,
      "step": 270
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6727388501167297,
      "learning_rate": 0.00019973381369976746,
      "loss": 1.0267,
      "step": 275
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.4579056203365326,
      "learning_rate": 0.00019972405069713041,
      "loss": 0.9956,
      "step": 280
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.608708918094635,
      "learning_rate": 0.00019971411211130543,
      "loss": 1.1209,
      "step": 285
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.44869670271873474,
      "learning_rate": 0.00019970399795979132,
      "loss": 1.089,
      "step": 290
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.47517067193984985,
      "learning_rate": 0.00019969370826039592,
      "loss": 0.9034,
      "step": 295
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.509774386882782,
      "learning_rate": 0.00019968324303123625,
      "loss": 0.9518,
      "step": 300
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5976845622062683,
      "learning_rate": 0.00019967260229073836,
      "loss": 0.9457,
      "step": 305
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6182131171226501,
      "learning_rate": 0.00019966178605763726,
      "loss": 1.1329,
      "step": 310
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5523433089256287,
      "learning_rate": 0.00019965079435097698,
      "loss": 1.0727,
      "step": 315
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6128446459770203,
      "learning_rate": 0.00019963962719011055,
      "loss": 1.0578,
      "step": 320
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5032752752304077,
      "learning_rate": 0.00019962828459469984,
      "loss": 0.9942,
      "step": 325
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5268670916557312,
      "learning_rate": 0.0001996167665847157,
      "loss": 1.0418,
      "step": 330
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5722517371177673,
      "learning_rate": 0.00019960507318043775,
      "loss": 1.0697,
      "step": 335
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5031557083129883,
      "learning_rate": 0.00019959320440245443,
      "loss": 1.0137,
      "step": 340
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5025811791419983,
      "learning_rate": 0.00019958116027166307,
      "loss": 1.0486,
      "step": 345
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5244653224945068,
      "learning_rate": 0.00019956894080926958,
      "loss": 1.0232,
      "step": 350
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6231172680854797,
      "learning_rate": 0.00019955654603678866,
      "loss": 1.0067,
      "step": 355
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5865558385848999,
      "learning_rate": 0.0001995439759760437,
      "loss": 1.0994,
      "step": 360
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5528464913368225,
      "learning_rate": 0.00019953123064916665,
      "loss": 1.0448,
      "step": 365
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5767375230789185,
      "learning_rate": 0.00019951831007859814,
      "loss": 1.0204,
      "step": 370
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.4730927348136902,
      "learning_rate": 0.00019950521428708723,
      "loss": 0.9448,
      "step": 375
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6184219121932983,
      "learning_rate": 0.0001994919432976916,
      "loss": 1.0811,
      "step": 380
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.451190322637558,
      "learning_rate": 0.00019947849713377734,
      "loss": 1.0114,
      "step": 385
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5863661766052246,
      "learning_rate": 0.00019946487581901895,
      "loss": 0.9805,
      "step": 390
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6007524132728577,
      "learning_rate": 0.00019945107937739944,
      "loss": 1.0675,
      "step": 395
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5558364987373352,
      "learning_rate": 0.00019943710783320998,
      "loss": 0.9698,
      "step": 400
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5028205513954163,
      "learning_rate": 0.00019942296121105017,
      "loss": 0.9619,
      "step": 405
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.5114047527313232,
      "learning_rate": 0.00019940863953582787,
      "loss": 0.963,
      "step": 410
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4985787570476532,
      "learning_rate": 0.00019939414283275906,
      "loss": 0.972,
      "step": 415
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5882110595703125,
      "learning_rate": 0.00019937947112736796,
      "loss": 1.1282,
      "step": 420
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.6093044877052307,
      "learning_rate": 0.00019936462444548693,
      "loss": 0.931,
      "step": 425
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5328900814056396,
      "learning_rate": 0.00019934960281325635,
      "loss": 1.0776,
      "step": 430
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.6070789694786072,
      "learning_rate": 0.0001993344062571247,
      "loss": 0.9918,
      "step": 435
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.47850194573402405,
      "learning_rate": 0.00019931903480384838,
      "loss": 0.9318,
      "step": 440
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.47907599806785583,
      "learning_rate": 0.00019930348848049177,
      "loss": 0.9338,
      "step": 445
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4980204999446869,
      "learning_rate": 0.00019928776731442712,
      "loss": 0.9255,
      "step": 450
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5016769766807556,
      "learning_rate": 0.00019927187133333456,
      "loss": 0.9518,
      "step": 455
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5496203899383545,
      "learning_rate": 0.00019925580056520198,
      "loss": 0.8733,
      "step": 460
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4883849024772644,
      "learning_rate": 0.00019923955503832504,
      "loss": 0.9664,
      "step": 465
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5501581430435181,
      "learning_rate": 0.00019922313478130713,
      "loss": 0.9563,
      "step": 470
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5573701858520508,
      "learning_rate": 0.00019920653982305911,
      "loss": 1.1285,
      "step": 475
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.63383948802948,
      "learning_rate": 0.0001991897701927997,
      "loss": 1.0254,
      "step": 480
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.6332881450653076,
      "learning_rate": 0.00019917282592005496,
      "loss": 1.1536,
      "step": 485
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8212701678276062,
      "learning_rate": 0.0001991557070346585,
      "loss": 0.89,
      "step": 490
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5946714878082275,
      "learning_rate": 0.00019913841356675142,
      "loss": 1.0984,
      "step": 495
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.46487969160079956,
      "learning_rate": 0.00019912094554678215,
      "loss": 0.8758,
      "step": 500
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5632352828979492,
      "learning_rate": 0.00019910330300550646,
      "loss": 1.0401,
      "step": 505
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.6485393047332764,
      "learning_rate": 0.00019908548597398742,
      "loss": 0.7612,
      "step": 510
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.45863208174705505,
      "learning_rate": 0.0001990674944835953,
      "loss": 0.9827,
      "step": 515
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4923437833786011,
      "learning_rate": 0.00019904932856600752,
      "loss": 1.0867,
      "step": 520
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5223394632339478,
      "learning_rate": 0.00019903098825320867,
      "loss": 1.0333,
      "step": 525
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.5169032216072083,
      "learning_rate": 0.00019901247357749036,
      "loss": 1.0153,
      "step": 530
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.7526834607124329,
      "learning_rate": 0.0001989937845714512,
      "loss": 1.0064,
      "step": 535
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.45596638321876526,
      "learning_rate": 0.00019897492126799674,
      "loss": 0.8658,
      "step": 540
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5975187420845032,
      "learning_rate": 0.00019895588370033942,
      "loss": 0.9575,
      "step": 545
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.534826934337616,
      "learning_rate": 0.00019893667190199848,
      "loss": 1.0665,
      "step": 550
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5490930676460266,
      "learning_rate": 0.00019891728590680003,
      "loss": 1.0273,
      "step": 555
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5129736661911011,
      "learning_rate": 0.00019889772574887673,
      "loss": 1.0674,
      "step": 560
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6879858374595642,
      "learning_rate": 0.000198877991462668,
      "loss": 1.0637,
      "step": 565
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5870321393013,
      "learning_rate": 0.00019885808308291977,
      "loss": 0.9868,
      "step": 570
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6196910738945007,
      "learning_rate": 0.00019883800064468462,
      "loss": 1.0895,
      "step": 575
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.529233992099762,
      "learning_rate": 0.0001988177441833214,
      "loss": 0.9474,
      "step": 580
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5826104283332825,
      "learning_rate": 0.00019879731373449554,
      "loss": 1.1702,
      "step": 585
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6717869639396667,
      "learning_rate": 0.00019877670933417872,
      "loss": 1.0355,
      "step": 590
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5537705421447754,
      "learning_rate": 0.0001987559310186489,
      "loss": 0.9953,
      "step": 595
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5874431729316711,
      "learning_rate": 0.0001987349788244903,
      "loss": 1.1586,
      "step": 600
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5384119749069214,
      "learning_rate": 0.0001987138527885932,
      "loss": 1.0429,
      "step": 605
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.764053225517273,
      "learning_rate": 0.00019869255294815402,
      "loss": 1.0185,
      "step": 610
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6233411431312561,
      "learning_rate": 0.00019867107934067523,
      "loss": 1.0977,
      "step": 615
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5031682252883911,
      "learning_rate": 0.00019864943200396517,
      "loss": 0.9116,
      "step": 620
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6120206117630005,
      "learning_rate": 0.0001986276109761381,
      "loss": 1.0817,
      "step": 625
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.542709231376648,
      "learning_rate": 0.0001986056162956141,
      "loss": 1.0411,
      "step": 630
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.4971280097961426,
      "learning_rate": 0.00019858344800111898,
      "loss": 1.1277,
      "step": 635
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6049294471740723,
      "learning_rate": 0.0001985611061316843,
      "loss": 0.9203,
      "step": 640
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6006289124488831,
      "learning_rate": 0.0001985385907266471,
      "loss": 1.1261,
      "step": 645
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.5616739392280579,
      "learning_rate": 0.00019851590182565012,
      "loss": 1.0205,
      "step": 650
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5229505300521851,
      "learning_rate": 0.0001984930394686414,
      "loss": 0.8954,
      "step": 655
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5182190537452698,
      "learning_rate": 0.00019847000369587457,
      "loss": 0.9065,
      "step": 660
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6021913290023804,
      "learning_rate": 0.00019844679454790844,
      "loss": 1.1373,
      "step": 665
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.7754777669906616,
      "learning_rate": 0.00019842341206560712,
      "loss": 1.0343,
      "step": 670
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5829971432685852,
      "learning_rate": 0.00019839985629013999,
      "loss": 0.9031,
      "step": 675
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5609190464019775,
      "learning_rate": 0.00019837612726298143,
      "loss": 1.0791,
      "step": 680
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5505595207214355,
      "learning_rate": 0.0001983522250259109,
      "loss": 1.0283,
      "step": 685
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8407588601112366,
      "learning_rate": 0.0001983281496210129,
      "loss": 1.0979,
      "step": 690
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6268724203109741,
      "learning_rate": 0.00019830390109067673,
      "loss": 1.0197,
      "step": 695
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6348004341125488,
      "learning_rate": 0.00019827947947759653,
      "loss": 0.9533,
      "step": 700
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.7127957344055176,
      "learning_rate": 0.0001982548848247712,
      "loss": 1.0689,
      "step": 705
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6464601159095764,
      "learning_rate": 0.00019823011717550438,
      "loss": 1.0081,
      "step": 710
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5490895509719849,
      "learning_rate": 0.0001982051765734042,
      "loss": 0.9213,
      "step": 715
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6216040253639221,
      "learning_rate": 0.00019818006306238328,
      "loss": 0.9828,
      "step": 720
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5060167908668518,
      "learning_rate": 0.0001981547766866588,
      "loss": 1.0694,
      "step": 725
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5995177030563354,
      "learning_rate": 0.00019812931749075223,
      "loss": 0.9466,
      "step": 730
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6505825519561768,
      "learning_rate": 0.00019810368551948936,
      "loss": 0.7913,
      "step": 735
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5346279144287109,
      "learning_rate": 0.00019807788081800012,
      "loss": 0.9923,
      "step": 740
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5277034044265747,
      "learning_rate": 0.00019805190343171857,
      "loss": 1.0233,
      "step": 745
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6092624068260193,
      "learning_rate": 0.00019802575340638296,
      "loss": 0.8504,
      "step": 750
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.5947295427322388,
      "learning_rate": 0.0001979994307880353,
      "loss": 1.0926,
      "step": 755
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6714326739311218,
      "learning_rate": 0.00019797293562302158,
      "loss": 0.9823,
      "step": 760
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.569905698299408,
      "learning_rate": 0.00019794626795799158,
      "loss": 1.0132,
      "step": 765
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5079753398895264,
      "learning_rate": 0.00019791942783989889,
      "loss": 0.9252,
      "step": 770
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7192429900169373,
      "learning_rate": 0.00019789241531600053,
      "loss": 1.0005,
      "step": 775
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5635434985160828,
      "learning_rate": 0.00019786523043385727,
      "loss": 1.0082,
      "step": 780
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6172094941139221,
      "learning_rate": 0.00019783787324133324,
      "loss": 0.8479,
      "step": 785
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5081483721733093,
      "learning_rate": 0.00019781034378659604,
      "loss": 0.8188,
      "step": 790
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.49087268114089966,
      "learning_rate": 0.00019778264211811646,
      "loss": 0.8061,
      "step": 795
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5902777910232544,
      "learning_rate": 0.0001977547682846686,
      "loss": 1.0909,
      "step": 800
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5288147926330566,
      "learning_rate": 0.00019772672233532964,
      "loss": 1.0024,
      "step": 805
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.46710124611854553,
      "learning_rate": 0.0001976985043194798,
      "loss": 0.9985,
      "step": 810
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7614003419876099,
      "learning_rate": 0.00019767011428680227,
      "loss": 0.8788,
      "step": 815
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6387910842895508,
      "learning_rate": 0.00019764155228728315,
      "loss": 0.8908,
      "step": 820
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7001696228981018,
      "learning_rate": 0.0001976128183712113,
      "loss": 0.9774,
      "step": 825
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6360479593276978,
      "learning_rate": 0.00019758391258917814,
      "loss": 1.001,
      "step": 830
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5724257826805115,
      "learning_rate": 0.0001975548349920779,
      "loss": 1.0208,
      "step": 835
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5296745300292969,
      "learning_rate": 0.00019752558563110724,
      "loss": 1.0957,
      "step": 840
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6365566253662109,
      "learning_rate": 0.0001974961645577652,
      "loss": 1.0577,
      "step": 845
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5774630308151245,
      "learning_rate": 0.00019746657182385314,
      "loss": 0.9223,
      "step": 850
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7029066681861877,
      "learning_rate": 0.00019743680748147478,
      "loss": 0.8853,
      "step": 855
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5141474604606628,
      "learning_rate": 0.00019740687158303585,
      "loss": 0.8765,
      "step": 860
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7042635083198547,
      "learning_rate": 0.0001973767641812443,
      "loss": 1.0361,
      "step": 865
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6724997162818909,
      "learning_rate": 0.00019734648532910982,
      "loss": 0.9077,
      "step": 870
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.5841765403747559,
      "learning_rate": 0.00019731603507994416,
      "loss": 1.0122,
      "step": 875
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.797713577747345,
      "learning_rate": 0.00019728541348736084,
      "loss": 0.9423,
      "step": 880
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6127451062202454,
      "learning_rate": 0.00019725462060527489,
      "loss": 1.0396,
      "step": 885
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5570659637451172,
      "learning_rate": 0.00019722365648790313,
      "loss": 0.88,
      "step": 890
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5685396790504456,
      "learning_rate": 0.00019719252118976374,
      "loss": 0.9511,
      "step": 895
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.48790881037712097,
      "learning_rate": 0.00019716121476567639,
      "loss": 0.9494,
      "step": 900
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7598410844802856,
      "learning_rate": 0.00019712973727076195,
      "loss": 1.0273,
      "step": 905
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.49358242750167847,
      "learning_rate": 0.0001970980887604426,
      "loss": 0.9141,
      "step": 910
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5327233672142029,
      "learning_rate": 0.0001970662692904415,
      "loss": 0.9731,
      "step": 915
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.665021538734436,
      "learning_rate": 0.000197034278916783,
      "loss": 1.0352,
      "step": 920
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6691067218780518,
      "learning_rate": 0.00019700211769579213,
      "loss": 0.9288,
      "step": 925
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6341536641120911,
      "learning_rate": 0.00019696978568409495,
      "loss": 1.0483,
      "step": 930
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6046646237373352,
      "learning_rate": 0.000196937282938618,
      "loss": 0.9216,
      "step": 935
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6167765259742737,
      "learning_rate": 0.0001969046095165887,
      "loss": 1.036,
      "step": 940
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5134046077728271,
      "learning_rate": 0.0001968717654755347,
      "loss": 0.9485,
      "step": 945
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.556530773639679,
      "learning_rate": 0.00019683875087328427,
      "loss": 0.9117,
      "step": 950
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7202883958816528,
      "learning_rate": 0.0001968055657679659,
      "loss": 0.9137,
      "step": 955
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6756083369255066,
      "learning_rate": 0.00019677221021800824,
      "loss": 0.9523,
      "step": 960
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6849576830863953,
      "learning_rate": 0.00019673868428214016,
      "loss": 1.0056,
      "step": 965
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5652838945388794,
      "learning_rate": 0.00019670498801939044,
      "loss": 0.9335,
      "step": 970
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6338204741477966,
      "learning_rate": 0.0001966711214890877,
      "loss": 1.0252,
      "step": 975
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5528327226638794,
      "learning_rate": 0.0001966370847508605,
      "loss": 1.0104,
      "step": 980
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5702161192893982,
      "learning_rate": 0.00019660287786463698,
      "loss": 0.8076,
      "step": 985
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5875357389450073,
      "learning_rate": 0.00019656850089064484,
      "loss": 1.1024,
      "step": 990
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5919451117515564,
      "learning_rate": 0.00019653395388941137,
      "loss": 0.9424,
      "step": 995
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5543453097343445,
      "learning_rate": 0.00019649923692176304,
      "loss": 0.9896,
      "step": 1000
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.5521298050880432,
      "learning_rate": 0.00019646435004882576,
      "loss": 1.0355,
      "step": 1005
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5741308927536011,
      "learning_rate": 0.00019642929333202452,
      "loss": 0.989,
      "step": 1010
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5588503479957581,
      "learning_rate": 0.00019639406683308336,
      "loss": 0.9998,
      "step": 1015
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5201951861381531,
      "learning_rate": 0.00019635867061402516,
      "loss": 0.9533,
      "step": 1020
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5763158202171326,
      "learning_rate": 0.00019632310473717172,
      "loss": 0.9033,
      "step": 1025
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.6396028399467468,
      "learning_rate": 0.00019628736926514365,
      "loss": 0.9325,
      "step": 1030
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5742698907852173,
      "learning_rate": 0.00019625146426085994,
      "loss": 1.0342,
      "step": 1035
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5504851937294006,
      "learning_rate": 0.00019621538978753823,
      "loss": 0.9261,
      "step": 1040
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.6293525099754333,
      "learning_rate": 0.00019617914590869452,
      "loss": 0.9576,
      "step": 1045
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.548366367816925,
      "learning_rate": 0.00019614273268814305,
      "loss": 0.8242,
      "step": 1050
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.564641535282135,
      "learning_rate": 0.00019610615018999622,
      "loss": 1.0472,
      "step": 1055
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.6370099782943726,
      "learning_rate": 0.0001960693984786645,
      "loss": 0.98,
      "step": 1060
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5116264820098877,
      "learning_rate": 0.00019603247761885629,
      "loss": 1.0122,
      "step": 1065
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.529659628868103,
      "learning_rate": 0.00019599538767557775,
      "loss": 0.8735,
      "step": 1070
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.6025382280349731,
      "learning_rate": 0.00019595812871413281,
      "loss": 1.0549,
      "step": 1075
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5760678648948669,
      "learning_rate": 0.00019592070080012302,
      "loss": 0.9554,
      "step": 1080
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.6323814988136292,
      "learning_rate": 0.00019588310399944726,
      "loss": 0.9444,
      "step": 1085
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.6250112652778625,
      "learning_rate": 0.00019584533837830196,
      "loss": 0.9244,
      "step": 1090
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.6357011198997498,
      "learning_rate": 0.00019580740400318062,
      "loss": 0.8238,
      "step": 1095
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.625298023223877,
      "learning_rate": 0.00019576930094087396,
      "loss": 1.0092,
      "step": 1100
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.568483829498291,
      "learning_rate": 0.00019573102925846968,
      "loss": 0.8903,
      "step": 1105
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5354487895965576,
      "learning_rate": 0.00019569258902335236,
      "loss": 0.9904,
      "step": 1110
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.5910528302192688,
      "learning_rate": 0.00019565398030320336,
      "loss": 0.9412,
      "step": 1115
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.6233749389648438,
      "learning_rate": 0.0001956152031660007,
      "loss": 0.9314,
      "step": 1120
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5822467803955078,
      "learning_rate": 0.00019557625768001886,
      "loss": 0.8303,
      "step": 1125
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6500090956687927,
      "learning_rate": 0.00019553714391382887,
      "loss": 0.9304,
      "step": 1130
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5523923635482788,
      "learning_rate": 0.0001954978619362979,
      "loss": 1.006,
      "step": 1135
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5451334118843079,
      "learning_rate": 0.00019545841181658943,
      "loss": 1.0762,
      "step": 1140
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5570092797279358,
      "learning_rate": 0.0001954187936241628,
      "loss": 0.9606,
      "step": 1145
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.688922643661499,
      "learning_rate": 0.00019537900742877344,
      "loss": 0.9992,
      "step": 1150
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5290732383728027,
      "learning_rate": 0.00019533905330047256,
      "loss": 0.9519,
      "step": 1155
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5557504296302795,
      "learning_rate": 0.000195298931309607,
      "loss": 1.0104,
      "step": 1160
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6165986657142639,
      "learning_rate": 0.00019525864152681913,
      "loss": 0.8128,
      "step": 1165
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6277154088020325,
      "learning_rate": 0.00019521818402304681,
      "loss": 0.9949,
      "step": 1170
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5798670053482056,
      "learning_rate": 0.0001951775588695232,
      "loss": 1.0184,
      "step": 1175
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5742266774177551,
      "learning_rate": 0.0001951367661377766,
      "loss": 1.0086,
      "step": 1180
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5187317728996277,
      "learning_rate": 0.00019509580589963034,
      "loss": 0.9316,
      "step": 1185
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5465469360351562,
      "learning_rate": 0.0001950546782272028,
      "loss": 0.9607,
      "step": 1190
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5568990707397461,
      "learning_rate": 0.00019501338319290708,
      "loss": 0.8688,
      "step": 1195
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6503220200538635,
      "learning_rate": 0.00019497192086945093,
      "loss": 1.0604,
      "step": 1200
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5293739438056946,
      "learning_rate": 0.00019493029132983662,
      "loss": 1.0046,
      "step": 1205
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.4872817099094391,
      "learning_rate": 0.00019488849464736096,
      "loss": 1.0668,
      "step": 1210
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5833202600479126,
      "learning_rate": 0.00019484653089561494,
      "loss": 1.0663,
      "step": 1215
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.6370205879211426,
      "learning_rate": 0.00019480440014848377,
      "loss": 0.8139,
      "step": 1220
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.52309250831604,
      "learning_rate": 0.00019476210248014656,
      "loss": 0.9474,
      "step": 1225
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.7012914419174194,
      "learning_rate": 0.0001947196379650765,
      "loss": 0.9654,
      "step": 1230
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.5470362305641174,
      "learning_rate": 0.00019467700667804048,
      "loss": 0.9098,
      "step": 1235
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.4781595766544342,
      "learning_rate": 0.00019463420869409893,
      "loss": 1.0829,
      "step": 1240
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6375689506530762,
      "learning_rate": 0.00019459124408860586,
      "loss": 0.9418,
      "step": 1245
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.4911366403102875,
      "learning_rate": 0.0001945481129372087,
      "loss": 0.985,
      "step": 1250
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5354143381118774,
      "learning_rate": 0.000194504815315848,
      "loss": 0.9878,
      "step": 1255
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5379586815834045,
      "learning_rate": 0.0001944613513007575,
      "loss": 0.9631,
      "step": 1260
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5477179884910583,
      "learning_rate": 0.00019441772096846384,
      "loss": 0.9537,
      "step": 1265
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6477294564247131,
      "learning_rate": 0.0001943739243957866,
      "loss": 0.867,
      "step": 1270
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.4842207729816437,
      "learning_rate": 0.00019432996165983797,
      "loss": 0.9455,
      "step": 1275
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6251997351646423,
      "learning_rate": 0.00019428583283802265,
      "loss": 1.0139,
      "step": 1280
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5363724827766418,
      "learning_rate": 0.0001942415380080379,
      "loss": 0.955,
      "step": 1285
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5772451758384705,
      "learning_rate": 0.00019419707724787323,
      "loss": 0.9396,
      "step": 1290
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5976244211196899,
      "learning_rate": 0.00019415245063581025,
      "loss": 0.9639,
      "step": 1295
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5342226624488831,
      "learning_rate": 0.00019410765825042257,
      "loss": 0.9478,
      "step": 1300
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7162676453590393,
      "learning_rate": 0.00019406270017057576,
      "loss": 0.91,
      "step": 1305
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.543129026889801,
      "learning_rate": 0.00019401757647542707,
      "loss": 0.874,
      "step": 1310
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6252813339233398,
      "learning_rate": 0.00019397228724442537,
      "loss": 0.8728,
      "step": 1315
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5755243301391602,
      "learning_rate": 0.00019392683255731096,
      "loss": 1.0385,
      "step": 1320
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7668480277061462,
      "learning_rate": 0.00019388121249411553,
      "loss": 1.0515,
      "step": 1325
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5681257247924805,
      "learning_rate": 0.0001938354271351618,
      "loss": 0.9672,
      "step": 1330
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6426078081130981,
      "learning_rate": 0.00019378947656106373,
      "loss": 0.9659,
      "step": 1335
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.596076250076294,
      "learning_rate": 0.00019374336085272595,
      "loss": 0.9441,
      "step": 1340
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5348164439201355,
      "learning_rate": 0.000193697080091344,
      "loss": 1.008,
      "step": 1345
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7003368139266968,
      "learning_rate": 0.000193650634358404,
      "loss": 1.0318,
      "step": 1350
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.5743355751037598,
      "learning_rate": 0.00019360402373568247,
      "loss": 1.0015,
      "step": 1355
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.6081267595291138,
      "learning_rate": 0.0001935572483052463,
      "loss": 1.037,
      "step": 1360
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.513016402721405,
      "learning_rate": 0.00019351030814945255,
      "loss": 0.9706,
      "step": 1365
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5697603821754456,
      "learning_rate": 0.0001934632033509483,
      "loss": 0.7802,
      "step": 1370
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6547832489013672,
      "learning_rate": 0.00019341593399267053,
      "loss": 0.8814,
      "step": 1375
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.656122088432312,
      "learning_rate": 0.00019336850015784594,
      "loss": 1.0709,
      "step": 1380
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6056241989135742,
      "learning_rate": 0.00019332090192999087,
      "loss": 0.9867,
      "step": 1385
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5010951161384583,
      "learning_rate": 0.000193273139392911,
      "loss": 0.8737,
      "step": 1390
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5506241917610168,
      "learning_rate": 0.0001932252126307014,
      "loss": 0.9104,
      "step": 1395
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5468207597732544,
      "learning_rate": 0.00019317712172774632,
      "loss": 0.8749,
      "step": 1400
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5140394568443298,
      "learning_rate": 0.00019312886676871888,
      "loss": 0.8969,
      "step": 1405
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5933979153633118,
      "learning_rate": 0.00019308044783858115,
      "loss": 0.9699,
      "step": 1410
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6451619267463684,
      "learning_rate": 0.0001930318650225839,
      "loss": 1.0721,
      "step": 1415
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6029821634292603,
      "learning_rate": 0.0001929831184062664,
      "loss": 1.0622,
      "step": 1420
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5566766262054443,
      "learning_rate": 0.0001929342080754564,
      "loss": 1.0093,
      "step": 1425
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5888077616691589,
      "learning_rate": 0.00019288513411626983,
      "loss": 0.972,
      "step": 1430
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6099488735198975,
      "learning_rate": 0.00019283589661511072,
      "loss": 0.8556,
      "step": 1435
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6205644011497498,
      "learning_rate": 0.0001927864956586711,
      "loss": 0.9261,
      "step": 1440
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6054074764251709,
      "learning_rate": 0.00019273693133393076,
      "loss": 1.0162,
      "step": 1445
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.538075864315033,
      "learning_rate": 0.00019268720372815713,
      "loss": 0.9089,
      "step": 1450
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5652662515640259,
      "learning_rate": 0.00019263731292890515,
      "loss": 0.959,
      "step": 1455
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6320458650588989,
      "learning_rate": 0.00019258725902401703,
      "loss": 1.1716,
      "step": 1460
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5600968599319458,
      "learning_rate": 0.00019253704210162224,
      "loss": 0.8629,
      "step": 1465
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.6069446206092834,
      "learning_rate": 0.00019248666225013726,
      "loss": 1.0024,
      "step": 1470
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.5121304988861084,
      "learning_rate": 0.00019243611955826537,
      "loss": 0.8568,
      "step": 1475
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5882970094680786,
      "learning_rate": 0.00019238541411499663,
      "loss": 0.9581,
      "step": 1480
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5393926501274109,
      "learning_rate": 0.0001923345460096076,
      "loss": 0.9318,
      "step": 1485
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6565856337547302,
      "learning_rate": 0.00019228351533166134,
      "loss": 1.0293,
      "step": 1490
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6271263360977173,
      "learning_rate": 0.000192232322171007,
      "loss": 0.9848,
      "step": 1495
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5515784025192261,
      "learning_rate": 0.00019218096661777992,
      "loss": 0.9167,
      "step": 1500
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6837130188941956,
      "learning_rate": 0.00019212944876240137,
      "loss": 0.9197,
      "step": 1505
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5938873887062073,
      "learning_rate": 0.00019207776869557833,
      "loss": 0.9345,
      "step": 1510
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.7142723798751831,
      "learning_rate": 0.00019202592650830337,
      "loss": 0.9954,
      "step": 1515
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.500342071056366,
      "learning_rate": 0.00019197392229185453,
      "loss": 0.9401,
      "step": 1520
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.643747091293335,
      "learning_rate": 0.0001919217561377952,
      "loss": 0.8676,
      "step": 1525
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6643776297569275,
      "learning_rate": 0.0001918694281379738,
      "loss": 1.0773,
      "step": 1530
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6194174885749817,
      "learning_rate": 0.0001918169383845237,
      "loss": 1.0095,
      "step": 1535
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5246191024780273,
      "learning_rate": 0.0001917642869698632,
      "loss": 0.9783,
      "step": 1540
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5903621912002563,
      "learning_rate": 0.0001917114739866951,
      "loss": 1.0164,
      "step": 1545
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5673597455024719,
      "learning_rate": 0.00019165849952800667,
      "loss": 0.7935,
      "step": 1550
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6583690643310547,
      "learning_rate": 0.0001916053636870696,
      "loss": 1.0241,
      "step": 1555
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.629821240901947,
      "learning_rate": 0.00019155206655743965,
      "loss": 0.9434,
      "step": 1560
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6511532068252563,
      "learning_rate": 0.00019149860823295656,
      "loss": 1.1081,
      "step": 1565
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.517203152179718,
      "learning_rate": 0.00019144498880774386,
      "loss": 0.8622,
      "step": 1570
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6585999727249146,
      "learning_rate": 0.00019139120837620882,
      "loss": 1.0642,
      "step": 1575
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.7876483798027039,
      "learning_rate": 0.00019133726703304208,
      "loss": 0.9939,
      "step": 1580
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.608589231967926,
      "learning_rate": 0.00019128316487321772,
      "loss": 0.9917,
      "step": 1585
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5026513934135437,
      "learning_rate": 0.00019122890199199284,
      "loss": 0.8672,
      "step": 1590
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.5934941172599792,
      "learning_rate": 0.0001911744784849076,
      "loss": 0.8947,
      "step": 1595
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5116546154022217,
      "learning_rate": 0.00019111989444778492,
      "loss": 1.0184,
      "step": 1600
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7126646637916565,
      "learning_rate": 0.00019106514997673047,
      "loss": 0.9383,
      "step": 1605
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6720495223999023,
      "learning_rate": 0.00019101024516813224,
      "loss": 1.0264,
      "step": 1610
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5357206463813782,
      "learning_rate": 0.00019095518011866063,
      "loss": 1.0058,
      "step": 1615
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6155744791030884,
      "learning_rate": 0.0001908999549252682,
      "loss": 1.0121,
      "step": 1620
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6566094160079956,
      "learning_rate": 0.0001908445696851893,
      "loss": 0.9829,
      "step": 1625
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5650537610054016,
      "learning_rate": 0.00019078902449594032,
      "loss": 0.923,
      "step": 1630
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5771694779396057,
      "learning_rate": 0.00019073331945531908,
      "loss": 0.9511,
      "step": 1635
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6931601166725159,
      "learning_rate": 0.00019067745466140495,
      "loss": 0.8401,
      "step": 1640
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5769656300544739,
      "learning_rate": 0.0001906214302125586,
      "loss": 0.9956,
      "step": 1645
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5963305830955505,
      "learning_rate": 0.00019056524620742157,
      "loss": 0.9114,
      "step": 1650
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.4932982325553894,
      "learning_rate": 0.00019050890274491665,
      "loss": 0.9108,
      "step": 1655
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8321012854576111,
      "learning_rate": 0.00019045239992424717,
      "loss": 1.0043,
      "step": 1660
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5227043032646179,
      "learning_rate": 0.00019039573784489716,
      "loss": 0.9348,
      "step": 1665
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.5955290794372559,
      "learning_rate": 0.00019033891660663098,
      "loss": 1.0376,
      "step": 1670
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.532315731048584,
      "learning_rate": 0.00019028193630949323,
      "loss": 0.9035,
      "step": 1675
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6716378927230835,
      "learning_rate": 0.00019022479705380857,
      "loss": 0.9297,
      "step": 1680
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7406010031700134,
      "learning_rate": 0.0001901674989401816,
      "loss": 0.8579,
      "step": 1685
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7114114761352539,
      "learning_rate": 0.00019011004206949652,
      "loss": 0.9276,
      "step": 1690
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6022518873214722,
      "learning_rate": 0.00019005242654291708,
      "loss": 1.0488,
      "step": 1695
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6278284788131714,
      "learning_rate": 0.00018999465246188644,
      "loss": 1.0768,
      "step": 1700
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.579444408416748,
      "learning_rate": 0.00018993671992812683,
      "loss": 0.9125,
      "step": 1705
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6062266826629639,
      "learning_rate": 0.00018987862904363954,
      "loss": 1.0356,
      "step": 1710
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7265365123748779,
      "learning_rate": 0.00018982037991070462,
      "loss": 0.9464,
      "step": 1715
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5000131130218506,
      "learning_rate": 0.00018976197263188079,
      "loss": 0.9396,
      "step": 1720
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.555634617805481,
      "learning_rate": 0.00018970340731000516,
      "loss": 0.9729,
      "step": 1725
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6329852938652039,
      "learning_rate": 0.00018964468404819313,
      "loss": 0.8783,
      "step": 1730
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6367356777191162,
      "learning_rate": 0.00018958580294983822,
      "loss": 1.0654,
      "step": 1735
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5371712446212769,
      "learning_rate": 0.00018952676411861184,
      "loss": 0.9471,
      "step": 1740
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5458500385284424,
      "learning_rate": 0.00018946756765846304,
      "loss": 0.9233,
      "step": 1745
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6498488187789917,
      "learning_rate": 0.00018940821367361847,
      "loss": 0.9091,
      "step": 1750
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6993901133537292,
      "learning_rate": 0.00018934870226858217,
      "loss": 0.9276,
      "step": 1755
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6266675591468811,
      "learning_rate": 0.0001892890335481353,
      "loss": 0.8598,
      "step": 1760
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5299180150032043,
      "learning_rate": 0.00018922920761733596,
      "loss": 0.925,
      "step": 1765
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5607171654701233,
      "learning_rate": 0.00018916922458151914,
      "loss": 0.9816,
      "step": 1770
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5217439532279968,
      "learning_rate": 0.0001891090845462964,
      "loss": 0.8764,
      "step": 1775
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5624527931213379,
      "learning_rate": 0.00018904878761755569,
      "loss": 0.8804,
      "step": 1780
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6318154335021973,
      "learning_rate": 0.0001889883339014613,
      "loss": 0.9651,
      "step": 1785
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6358602046966553,
      "learning_rate": 0.00018892772350445345,
      "loss": 0.9051,
      "step": 1790
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6364490389823914,
      "learning_rate": 0.00018886695653324832,
      "loss": 0.993,
      "step": 1795
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.64936363697052,
      "learning_rate": 0.00018880603309483776,
      "loss": 0.866,
      "step": 1800
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6414142847061157,
      "learning_rate": 0.00018874495329648908,
      "loss": 1.0359,
      "step": 1805
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6995022892951965,
      "learning_rate": 0.00018868371724574488,
      "loss": 0.8347,
      "step": 1810
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5469136238098145,
      "learning_rate": 0.00018862232505042288,
      "loss": 0.9802,
      "step": 1815
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.7556516528129578,
      "learning_rate": 0.00018856077681861578,
      "loss": 0.9124,
      "step": 1820
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.6139054298400879,
      "learning_rate": 0.0001884990726586909,
      "loss": 0.9399,
      "step": 1825
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.5691984295845032,
      "learning_rate": 0.00018843721267929023,
      "loss": 0.8927,
      "step": 1830
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.7042884826660156,
      "learning_rate": 0.00018837519698933002,
      "loss": 1.0038,
      "step": 1835
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6047410368919373,
      "learning_rate": 0.00018831302569800073,
      "loss": 0.879,
      "step": 1840
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6905311346054077,
      "learning_rate": 0.00018825069891476671,
      "loss": 0.9217,
      "step": 1845
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5696797966957092,
      "learning_rate": 0.00018818821674936623,
      "loss": 0.9863,
      "step": 1850
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5839678645133972,
      "learning_rate": 0.00018812557931181093,
      "loss": 0.931,
      "step": 1855
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6562358736991882,
      "learning_rate": 0.000188062786712386,
      "loss": 0.9697,
      "step": 1860
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.49899277091026306,
      "learning_rate": 0.00018799983906164983,
      "loss": 0.9136,
      "step": 1865
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6238011121749878,
      "learning_rate": 0.00018793673647043364,
      "loss": 0.9229,
      "step": 1870
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6438485383987427,
      "learning_rate": 0.00018787347904984165,
      "loss": 1.0229,
      "step": 1875
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.546998918056488,
      "learning_rate": 0.00018781006691125053,
      "loss": 0.8177,
      "step": 1880
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6244615316390991,
      "learning_rate": 0.0001877465001663095,
      "loss": 1.0081,
      "step": 1885
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6127185821533203,
      "learning_rate": 0.0001876827789269399,
      "loss": 0.9392,
      "step": 1890
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.7033048272132874,
      "learning_rate": 0.0001876189033053351,
      "loss": 1.0329,
      "step": 1895
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.7424870133399963,
      "learning_rate": 0.00018755487341396028,
      "loss": 1.0154,
      "step": 1900
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5857945680618286,
      "learning_rate": 0.00018749068936555228,
      "loss": 1.0817,
      "step": 1905
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.6423769593238831,
      "learning_rate": 0.00018742635127311935,
      "loss": 0.9001,
      "step": 1910
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5362260341644287,
      "learning_rate": 0.00018736185924994096,
      "loss": 0.8897,
      "step": 1915
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5578159093856812,
      "learning_rate": 0.00018729721340956758,
      "loss": 0.9347,
      "step": 1920
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5923319458961487,
      "learning_rate": 0.0001872324138658206,
      "loss": 0.9671,
      "step": 1925
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5793907046318054,
      "learning_rate": 0.00018716746073279184,
      "loss": 0.8519,
      "step": 1930
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5267802476882935,
      "learning_rate": 0.00018710235412484373,
      "loss": 0.9927,
      "step": 1935
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.5435567498207092,
      "learning_rate": 0.00018703709415660887,
      "loss": 0.8592,
      "step": 1940
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.7846719622612,
      "learning_rate": 0.00018697168094298984,
      "loss": 0.9461,
      "step": 1945
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.7979434728622437,
      "learning_rate": 0.00018690611459915908,
      "loss": 0.9974,
      "step": 1950
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6998960375785828,
      "learning_rate": 0.00018684039524055862,
      "loss": 1.0231,
      "step": 1955
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6466115117073059,
      "learning_rate": 0.0001867745229828999,
      "loss": 0.9781,
      "step": 1960
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.7325738072395325,
      "learning_rate": 0.00018670849794216355,
      "loss": 0.8436,
      "step": 1965
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5265980362892151,
      "learning_rate": 0.00018664232023459933,
      "loss": 1.0024,
      "step": 1970
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6271637082099915,
      "learning_rate": 0.00018657598997672562,
      "loss": 1.1172,
      "step": 1975
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.664203405380249,
      "learning_rate": 0.00018650950728532948,
      "loss": 1.0014,
      "step": 1980
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6504744291305542,
      "learning_rate": 0.00018644287227746636,
      "loss": 0.9328,
      "step": 1985
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6602086424827576,
      "learning_rate": 0.0001863760850704599,
      "loss": 1.1187,
      "step": 1990
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5959579348564148,
      "learning_rate": 0.0001863091457819017,
      "loss": 0.8691,
      "step": 1995
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5921610593795776,
      "learning_rate": 0.00018624205452965112,
      "loss": 1.0163,
      "step": 2000
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5723516345024109,
      "learning_rate": 0.00018617481143183508,
      "loss": 1.004,
      "step": 2005
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5717443823814392,
      "learning_rate": 0.00018610741660684784,
      "loss": 0.9705,
      "step": 2010
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6303788423538208,
      "learning_rate": 0.00018603987017335092,
      "loss": 0.9824,
      "step": 2015
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.7067974805831909,
      "learning_rate": 0.0001859721722502726,
      "loss": 0.9011,
      "step": 2020
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5747745633125305,
      "learning_rate": 0.000185904322956808,
      "loss": 0.9616,
      "step": 2025
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5667222142219543,
      "learning_rate": 0.0001858363224124187,
      "loss": 0.8805,
      "step": 2030
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.7209914326667786,
      "learning_rate": 0.0001857681707368326,
      "loss": 1.0061,
      "step": 2035
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5965637564659119,
      "learning_rate": 0.0001856998680500438,
      "loss": 0.9908,
      "step": 2040
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5698437094688416,
      "learning_rate": 0.00018563141447231211,
      "loss": 1.0224,
      "step": 2045
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.5468728542327881,
      "learning_rate": 0.0001855628101241631,
      "loss": 0.9173,
      "step": 2050
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.7616835832595825,
      "learning_rate": 0.00018549405512638783,
      "loss": 1.0199,
      "step": 2055
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6229009032249451,
      "learning_rate": 0.00018542514960004253,
      "loss": 0.9385,
      "step": 2060
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6399763226509094,
      "learning_rate": 0.0001853560936664485,
      "loss": 0.9203,
      "step": 2065
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.6714853048324585,
      "learning_rate": 0.00018528688744719193,
      "loss": 0.8532,
      "step": 2070
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6095345616340637,
      "learning_rate": 0.0001852175310641235,
      "loss": 0.9489,
      "step": 2075
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6012184619903564,
      "learning_rate": 0.00018514802463935834,
      "loss": 1.0208,
      "step": 2080
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.4830770492553711,
      "learning_rate": 0.00018507836829527574,
      "loss": 0.939,
      "step": 2085
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6476888656616211,
      "learning_rate": 0.000185008562154519,
      "loss": 0.9238,
      "step": 2090
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6635558009147644,
      "learning_rate": 0.00018493860633999508,
      "loss": 0.8019,
      "step": 2095
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5985074639320374,
      "learning_rate": 0.00018486850097487457,
      "loss": 0.9802,
      "step": 2100
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6918281316757202,
      "learning_rate": 0.00018479824618259128,
      "loss": 1.0577,
      "step": 2105
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.617668867111206,
      "learning_rate": 0.0001847278420868422,
      "loss": 0.9448,
      "step": 2110
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5378293991088867,
      "learning_rate": 0.00018465728881158708,
      "loss": 0.8489,
      "step": 2115
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5690131187438965,
      "learning_rate": 0.00018458658648104844,
      "loss": 1.0106,
      "step": 2120
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5910131335258484,
      "learning_rate": 0.00018451573521971123,
      "loss": 0.9338,
      "step": 2125
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6318804025650024,
      "learning_rate": 0.00018444473515232256,
      "loss": 0.9011,
      "step": 2130
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.527715265750885,
      "learning_rate": 0.00018437358640389158,
      "loss": 0.8679,
      "step": 2135
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5651088953018188,
      "learning_rate": 0.00018430228909968921,
      "loss": 0.9532,
      "step": 2140
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5673348903656006,
      "learning_rate": 0.00018423084336524793,
      "loss": 0.9157,
      "step": 2145
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6541638374328613,
      "learning_rate": 0.00018415924932636157,
      "loss": 0.7572,
      "step": 2150
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6314670443534851,
      "learning_rate": 0.0001840875071090851,
      "loss": 0.917,
      "step": 2155
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5921071171760559,
      "learning_rate": 0.00018401561683973434,
      "loss": 0.939,
      "step": 2160
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6067410707473755,
      "learning_rate": 0.0001839435786448858,
      "loss": 0.9877,
      "step": 2165
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6221805214881897,
      "learning_rate": 0.00018387139265137642,
      "loss": 0.9306,
      "step": 2170
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5830126404762268,
      "learning_rate": 0.00018379905898630345,
      "loss": 0.8507,
      "step": 2175
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.5757225751876831,
      "learning_rate": 0.00018372657777702406,
      "loss": 0.969,
      "step": 2180
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.7756751775741577,
      "learning_rate": 0.00018365394915115517,
      "loss": 1.1542,
      "step": 2185
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.6099016666412354,
      "learning_rate": 0.0001835811732365734,
      "loss": 1.0071,
      "step": 2190
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7187824249267578,
      "learning_rate": 0.00018350825016141457,
      "loss": 0.9009,
      "step": 2195
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7240955233573914,
      "learning_rate": 0.00018343518005407367,
      "loss": 0.8871,
      "step": 2200
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7031527757644653,
      "learning_rate": 0.0001833619630432045,
      "loss": 1.0222,
      "step": 2205
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7623906135559082,
      "learning_rate": 0.00018328859925771958,
      "loss": 0.9689,
      "step": 2210
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6677159667015076,
      "learning_rate": 0.0001832150888267898,
      "loss": 0.8876,
      "step": 2215
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.579214334487915,
      "learning_rate": 0.00018314143187984433,
      "loss": 0.9057,
      "step": 2220
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7031520009040833,
      "learning_rate": 0.00018306762854657023,
      "loss": 1.0086,
      "step": 2225
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6051443219184875,
      "learning_rate": 0.00018299367895691234,
      "loss": 0.8878,
      "step": 2230
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.614453911781311,
      "learning_rate": 0.00018291958324107298,
      "loss": 0.9012,
      "step": 2235
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7160709500312805,
      "learning_rate": 0.00018284534152951176,
      "loss": 0.9625,
      "step": 2240
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5451361536979675,
      "learning_rate": 0.00018277095395294538,
      "loss": 0.9954,
      "step": 2245
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5429083704948425,
      "learning_rate": 0.00018269642064234733,
      "loss": 0.8744,
      "step": 2250
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6032977104187012,
      "learning_rate": 0.0001826217417289477,
      "loss": 0.93,
      "step": 2255
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5320645570755005,
      "learning_rate": 0.00018254691734423295,
      "loss": 0.8629,
      "step": 2260
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5685572028160095,
      "learning_rate": 0.00018247194761994567,
      "loss": 1.0152,
      "step": 2265
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5646812319755554,
      "learning_rate": 0.00018239683268808432,
      "loss": 1.0437,
      "step": 2270
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5840547680854797,
      "learning_rate": 0.00018232157268090307,
      "loss": 1.0193,
      "step": 2275
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5709146857261658,
      "learning_rate": 0.00018224616773091147,
      "loss": 0.9139,
      "step": 2280
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7946630120277405,
      "learning_rate": 0.00018217061797087434,
      "loss": 0.8473,
      "step": 2285
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6987430453300476,
      "learning_rate": 0.00018209492353381138,
      "loss": 0.9721,
      "step": 2290
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7423457503318787,
      "learning_rate": 0.00018201908455299707,
      "loss": 0.9289,
      "step": 2295
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.6933215856552124,
      "learning_rate": 0.00018194310116196043,
      "loss": 0.9983,
      "step": 2300
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.5887870192527771,
      "learning_rate": 0.00018186697349448463,
      "loss": 0.9486,
      "step": 2305
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6001654267311096,
      "learning_rate": 0.000181790701684607,
      "loss": 0.9821,
      "step": 2310
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6377297043800354,
      "learning_rate": 0.0001817142858666185,
      "loss": 1.0207,
      "step": 2315
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6929865479469299,
      "learning_rate": 0.00018163772617506383,
      "loss": 0.9276,
      "step": 2320
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6459087133407593,
      "learning_rate": 0.00018156102274474086,
      "loss": 0.9748,
      "step": 2325
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5593891143798828,
      "learning_rate": 0.00018148417571070056,
      "loss": 0.8451,
      "step": 2330
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.7829484939575195,
      "learning_rate": 0.00018140718520824684,
      "loss": 0.8557,
      "step": 2335
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6354033350944519,
      "learning_rate": 0.0001813300513729361,
      "loss": 1.0291,
      "step": 2340
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5774961709976196,
      "learning_rate": 0.0001812527743405772,
      "loss": 0.917,
      "step": 2345
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.632072925567627,
      "learning_rate": 0.00018117535424723102,
      "loss": 0.9833,
      "step": 2350
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.55246901512146,
      "learning_rate": 0.0001810977912292104,
      "loss": 1.0615,
      "step": 2355
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6602877974510193,
      "learning_rate": 0.00018102008542307982,
      "loss": 1.0171,
      "step": 2360
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.487744003534317,
      "learning_rate": 0.00018094223696565512,
      "loss": 0.7264,
      "step": 2365
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6929968595504761,
      "learning_rate": 0.0001808642459940034,
      "loss": 0.9428,
      "step": 2370
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.7799659371376038,
      "learning_rate": 0.0001807861126454426,
      "loss": 0.945,
      "step": 2375
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5674405694007874,
      "learning_rate": 0.00018070783705754134,
      "loss": 1.0994,
      "step": 2380
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5485437512397766,
      "learning_rate": 0.00018062941936811868,
      "loss": 0.9951,
      "step": 2385
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5618184804916382,
      "learning_rate": 0.00018055085971524398,
      "loss": 0.9063,
      "step": 2390
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.6102981567382812,
      "learning_rate": 0.0001804721582372364,
      "loss": 0.9701,
      "step": 2395
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.506664514541626,
      "learning_rate": 0.00018039331507266492,
      "loss": 0.9649,
      "step": 2400
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.7159459590911865,
      "learning_rate": 0.00018031433036034793,
      "loss": 0.9289,
      "step": 2405
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9469946026802063,
      "learning_rate": 0.0001802352042393531,
      "loss": 0.8951,
      "step": 2410
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5196167230606079,
      "learning_rate": 0.00018015593684899702,
      "loss": 0.9079,
      "step": 2415
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5103448033332825,
      "learning_rate": 0.000180076528328845,
      "loss": 0.9119,
      "step": 2420
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5514436364173889,
      "learning_rate": 0.0001799969788187109,
      "loss": 0.9128,
      "step": 2425
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.609643280506134,
      "learning_rate": 0.0001799172884586568,
      "loss": 0.8515,
      "step": 2430
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.82509446144104,
      "learning_rate": 0.0001798374573889927,
      "loss": 0.901,
      "step": 2435
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.563224196434021,
      "learning_rate": 0.00017975748575027646,
      "loss": 0.9706,
      "step": 2440
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5628690123558044,
      "learning_rate": 0.00017967737368331337,
      "loss": 0.9594,
      "step": 2445
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5404948592185974,
      "learning_rate": 0.00017959712132915599,
      "loss": 0.903,
      "step": 2450
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6303789615631104,
      "learning_rate": 0.00017951672882910385,
      "loss": 0.9149,
      "step": 2455
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.8092197179794312,
      "learning_rate": 0.0001794361963247033,
      "loss": 0.9326,
      "step": 2460
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5375030040740967,
      "learning_rate": 0.00017935552395774708,
      "loss": 0.973,
      "step": 2465
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6692082285881042,
      "learning_rate": 0.00017927471187027436,
      "loss": 0.9744,
      "step": 2470
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6109797358512878,
      "learning_rate": 0.00017919376020457018,
      "loss": 0.9621,
      "step": 2475
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5392331480979919,
      "learning_rate": 0.0001791126691031653,
      "loss": 0.9281,
      "step": 2480
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.7013811469078064,
      "learning_rate": 0.00017903143870883615,
      "loss": 0.9098,
      "step": 2485
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.700690507888794,
      "learning_rate": 0.00017895006916460426,
      "loss": 0.9465,
      "step": 2490
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6423932313919067,
      "learning_rate": 0.00017886856061373623,
      "loss": 0.891,
      "step": 2495
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5714556574821472,
      "learning_rate": 0.00017878691319974337,
      "loss": 0.8927,
      "step": 2500
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6294008493423462,
      "learning_rate": 0.00017870512706638148,
      "loss": 0.976,
      "step": 2505
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6031877398490906,
      "learning_rate": 0.0001786232023576507,
      "loss": 0.9927,
      "step": 2510
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5919615626335144,
      "learning_rate": 0.00017854113921779509,
      "loss": 1.0071,
      "step": 2515
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5988855957984924,
      "learning_rate": 0.00017845893779130237,
      "loss": 0.8991,
      "step": 2520
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.7165807485580444,
      "learning_rate": 0.00017837659822290386,
      "loss": 1.0104,
      "step": 2525
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5668060183525085,
      "learning_rate": 0.00017829412065757398,
      "loss": 1.0519,
      "step": 2530
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.5884309411048889,
      "learning_rate": 0.0001782115052405303,
      "loss": 0.9941,
      "step": 2535
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.6587290167808533,
      "learning_rate": 0.00017812875211723291,
      "loss": 0.936,
      "step": 2540
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.615807056427002,
      "learning_rate": 0.00017804586143338455,
      "loss": 0.9558,
      "step": 2545
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6964926719665527,
      "learning_rate": 0.00017796283333492997,
      "loss": 1.0988,
      "step": 2550
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6390440464019775,
      "learning_rate": 0.00017787966796805596,
      "loss": 0.979,
      "step": 2555
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6075397729873657,
      "learning_rate": 0.00017779636547919102,
      "loss": 0.9705,
      "step": 2560
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.65053391456604,
      "learning_rate": 0.00017771292601500505,
      "loss": 1.0245,
      "step": 2565
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5904353857040405,
      "learning_rate": 0.00017762934972240913,
      "loss": 0.8593,
      "step": 2570
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6183270215988159,
      "learning_rate": 0.0001775456367485552,
      "loss": 0.8164,
      "step": 2575
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5901939272880554,
      "learning_rate": 0.00017746178724083593,
      "loss": 0.8847,
      "step": 2580
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6754553318023682,
      "learning_rate": 0.00017737780134688435,
      "loss": 1.0066,
      "step": 2585
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5526779890060425,
      "learning_rate": 0.00017729367921457363,
      "loss": 0.9569,
      "step": 2590
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.618124783039093,
      "learning_rate": 0.00017720942099201678,
      "loss": 0.9416,
      "step": 2595
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5879096388816833,
      "learning_rate": 0.00017712502682756646,
      "loss": 0.8886,
      "step": 2600
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5963704586029053,
      "learning_rate": 0.00017704049686981471,
      "loss": 1.0272,
      "step": 2605
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6259241104125977,
      "learning_rate": 0.0001769558312675926,
      "loss": 0.9514,
      "step": 2610
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5682948231697083,
      "learning_rate": 0.00017687103016997003,
      "loss": 0.8188,
      "step": 2615
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.7589099407196045,
      "learning_rate": 0.0001767860937262555,
      "loss": 0.9704,
      "step": 2620
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6059669256210327,
      "learning_rate": 0.0001767010220859958,
      "loss": 0.9102,
      "step": 2625
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6229948997497559,
      "learning_rate": 0.00017661581539897577,
      "loss": 0.8566,
      "step": 2630
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5671799778938293,
      "learning_rate": 0.000176530473815218,
      "loss": 0.9599,
      "step": 2635
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.593555748462677,
      "learning_rate": 0.00017644499748498263,
      "loss": 0.8292,
      "step": 2640
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6068690419197083,
      "learning_rate": 0.000176359386558767,
      "loss": 0.8347,
      "step": 2645
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.4839940667152405,
      "learning_rate": 0.00017627364118730544,
      "loss": 0.7297,
      "step": 2650
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6615285873413086,
      "learning_rate": 0.00017618776152156901,
      "loss": 0.989,
      "step": 2655
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.5995057821273804,
      "learning_rate": 0.00017610174771276525,
      "loss": 0.9066,
      "step": 2660
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6151632070541382,
      "learning_rate": 0.0001760155999123378,
      "loss": 0.9305,
      "step": 2665
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6653651595115662,
      "learning_rate": 0.0001759293182719664,
      "loss": 1.0602,
      "step": 2670
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6078524589538574,
      "learning_rate": 0.00017584290294356616,
      "loss": 0.9534,
      "step": 2675
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.7024226188659668,
      "learning_rate": 0.00017575635407928784,
      "loss": 0.9684,
      "step": 2680
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5981400012969971,
      "learning_rate": 0.00017566967183151714,
      "loss": 0.9116,
      "step": 2685
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5973444581031799,
      "learning_rate": 0.00017558285635287465,
      "loss": 0.9494,
      "step": 2690
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.7531886696815491,
      "learning_rate": 0.00017549590779621563,
      "loss": 0.9766,
      "step": 2695
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5628442168235779,
      "learning_rate": 0.00017540882631462954,
      "loss": 0.9382,
      "step": 2700
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6290925741195679,
      "learning_rate": 0.00017532161206143993,
      "loss": 0.9072,
      "step": 2705
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6227972507476807,
      "learning_rate": 0.0001752342651902041,
      "loss": 0.9144,
      "step": 2710
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.48317110538482666,
      "learning_rate": 0.00017514678585471284,
      "loss": 0.8375,
      "step": 2715
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.8648179769515991,
      "learning_rate": 0.00017505917420899018,
      "loss": 0.8783,
      "step": 2720
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5906129479408264,
      "learning_rate": 0.00017497143040729314,
      "loss": 0.9017,
      "step": 2725
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.7248625159263611,
      "learning_rate": 0.0001748835546041114,
      "loss": 0.9519,
      "step": 2730
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5871652364730835,
      "learning_rate": 0.000174795546954167,
      "loss": 0.9223,
      "step": 2735
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6332212686538696,
      "learning_rate": 0.00017470740761241422,
      "loss": 0.8887,
      "step": 2740
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6930819749832153,
      "learning_rate": 0.00017461913673403915,
      "loss": 1.0195,
      "step": 2745
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.4964215159416199,
      "learning_rate": 0.00017453073447445952,
      "loss": 0.8916,
      "step": 2750
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6864855885505676,
      "learning_rate": 0.0001744422009893243,
      "loss": 0.8499,
      "step": 2755
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6707122921943665,
      "learning_rate": 0.00017435353643451357,
      "loss": 0.9833,
      "step": 2760
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6430404782295227,
      "learning_rate": 0.00017426474096613812,
      "loss": 0.9741,
      "step": 2765
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5402218103408813,
      "learning_rate": 0.00017417581474053938,
      "loss": 0.8927,
      "step": 2770
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.6882984638214111,
      "learning_rate": 0.00017408675791428886,
      "loss": 1.0262,
      "step": 2775
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.5932244062423706,
      "learning_rate": 0.00017399757064418805,
      "loss": 0.8567,
      "step": 2780
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6488876938819885,
      "learning_rate": 0.00017390825308726817,
      "loss": 1.122,
      "step": 2785
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6389744281768799,
      "learning_rate": 0.00017381880540078974,
      "loss": 1.0426,
      "step": 2790
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.615460991859436,
      "learning_rate": 0.0001737292277422425,
      "loss": 0.9403,
      "step": 2795
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5837079882621765,
      "learning_rate": 0.000173639520269345,
      "loss": 0.8836,
      "step": 2800
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.7517130374908447,
      "learning_rate": 0.0001735496831400443,
      "loss": 0.873,
      "step": 2805
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.7326067686080933,
      "learning_rate": 0.00017345971651251576,
      "loss": 1.0026,
      "step": 2810
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6494262218475342,
      "learning_rate": 0.00017336962054516277,
      "loss": 0.9344,
      "step": 2815
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.7557472586631775,
      "learning_rate": 0.0001732793953966165,
      "loss": 0.8161,
      "step": 2820
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6807621717453003,
      "learning_rate": 0.00017318904122573542,
      "loss": 0.952,
      "step": 2825
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6098347902297974,
      "learning_rate": 0.00017309855819160535,
      "loss": 0.8668,
      "step": 2830
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.4934307038784027,
      "learning_rate": 0.00017300794645353884,
      "loss": 0.914,
      "step": 2835
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6054341197013855,
      "learning_rate": 0.00017291720617107516,
      "loss": 0.9653,
      "step": 2840
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5854093432426453,
      "learning_rate": 0.00017282633750397984,
      "loss": 0.9033,
      "step": 2845
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6354621648788452,
      "learning_rate": 0.0001727353406122445,
      "loss": 1.0004,
      "step": 2850
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.7072866559028625,
      "learning_rate": 0.00017264421565608648,
      "loss": 1.042,
      "step": 2855
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6686625480651855,
      "learning_rate": 0.00017255296279594862,
      "loss": 0.984,
      "step": 2860
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6095872521400452,
      "learning_rate": 0.000172461582192499,
      "loss": 0.9247,
      "step": 2865
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5870845317840576,
      "learning_rate": 0.00017237007400663053,
      "loss": 0.8918,
      "step": 2870
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6446424722671509,
      "learning_rate": 0.0001722784383994608,
      "loss": 0.9891,
      "step": 2875
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.598726212978363,
      "learning_rate": 0.00017218667553233182,
      "loss": 0.9481,
      "step": 2880
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.5482841730117798,
      "learning_rate": 0.00017209478556680957,
      "loss": 0.9311,
      "step": 2885
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6305568814277649,
      "learning_rate": 0.00017200276866468375,
      "loss": 0.8996,
      "step": 2890
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.4958514869213104,
      "learning_rate": 0.0001719106249879678,
      "loss": 0.8383,
      "step": 2895
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.7220568060874939,
      "learning_rate": 0.00017181835469889812,
      "loss": 0.935,
      "step": 2900
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6705941557884216,
      "learning_rate": 0.00017172595795993413,
      "loss": 0.9062,
      "step": 2905
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5938014984130859,
      "learning_rate": 0.0001716334349337579,
      "loss": 0.8727,
      "step": 2910
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.7060703039169312,
      "learning_rate": 0.00017154078578327387,
      "loss": 0.9586,
      "step": 2915
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6116676926612854,
      "learning_rate": 0.00017144801067160844,
      "loss": 0.9642,
      "step": 2920
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5843990445137024,
      "learning_rate": 0.00017135510976211,
      "loss": 0.8538,
      "step": 2925
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6299535632133484,
      "learning_rate": 0.0001712620832183482,
      "loss": 0.9251,
      "step": 2930
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.4945293664932251,
      "learning_rate": 0.00017116893120411398,
      "loss": 0.952,
      "step": 2935
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6320123672485352,
      "learning_rate": 0.00017107565388341925,
      "loss": 0.8861,
      "step": 2940
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.7203616499900818,
      "learning_rate": 0.0001709822514204965,
      "loss": 0.9179,
      "step": 2945
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6014286279678345,
      "learning_rate": 0.00017088872397979854,
      "loss": 0.9257,
      "step": 2950
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6653793454170227,
      "learning_rate": 0.00017079507172599828,
      "loss": 0.971,
      "step": 2955
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5894360542297363,
      "learning_rate": 0.00017070129482398832,
      "loss": 0.9119,
      "step": 2960
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.662100076675415,
      "learning_rate": 0.00017060739343888076,
      "loss": 0.935,
      "step": 2965
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.8383188247680664,
      "learning_rate": 0.00017051336773600686,
      "loss": 0.9977,
      "step": 2970
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6464200019836426,
      "learning_rate": 0.00017041921788091684,
      "loss": 0.9875,
      "step": 2975
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.670562744140625,
      "learning_rate": 0.0001703249440393794,
      "loss": 1.0091,
      "step": 2980
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6573236584663391,
      "learning_rate": 0.0001702305463773816,
      "loss": 0.8405,
      "step": 2985
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6789873838424683,
      "learning_rate": 0.00017013602506112853,
      "loss": 0.8046,
      "step": 2990
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6313668489456177,
      "learning_rate": 0.00017004138025704298,
      "loss": 0.9618,
      "step": 2995
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6663994193077087,
      "learning_rate": 0.00016994661213176512,
      "loss": 0.9682,
      "step": 3000
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6564815044403076,
      "learning_rate": 0.00016985172085215235,
      "loss": 0.9292,
      "step": 3005
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.5784175992012024,
      "learning_rate": 0.00016975670658527875,
      "loss": 0.9443,
      "step": 3010
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.6862292289733887,
      "learning_rate": 0.00016966156949843513,
      "loss": 0.8598,
      "step": 3015
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5390055179595947,
      "learning_rate": 0.0001695663097591284,
      "loss": 0.8773,
      "step": 3020
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6282461881637573,
      "learning_rate": 0.00016947092753508147,
      "loss": 0.9229,
      "step": 3025
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6688674688339233,
      "learning_rate": 0.00016937542299423294,
      "loss": 0.9077,
      "step": 3030
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5949118733406067,
      "learning_rate": 0.00016927979630473677,
      "loss": 0.9493,
      "step": 3035
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5177481174468994,
      "learning_rate": 0.0001691840476349619,
      "loss": 0.9389,
      "step": 3040
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.7047688364982605,
      "learning_rate": 0.00016908817715349217,
      "loss": 0.9878,
      "step": 3045
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5326054096221924,
      "learning_rate": 0.00016899218502912578,
      "loss": 0.8119,
      "step": 3050
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6114538908004761,
      "learning_rate": 0.00016889607143087516,
      "loss": 1.0068,
      "step": 3055
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6147065758705139,
      "learning_rate": 0.0001687998365279666,
      "loss": 1.0051,
      "step": 3060
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6280825138092041,
      "learning_rate": 0.00016870348048984,
      "loss": 0.9755,
      "step": 3065
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.7574710845947266,
      "learning_rate": 0.0001686070034861485,
      "loss": 0.7589,
      "step": 3070
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6277915835380554,
      "learning_rate": 0.0001685104056867583,
      "loss": 0.9468,
      "step": 3075
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6549091935157776,
      "learning_rate": 0.00016841368726174812,
      "loss": 0.9123,
      "step": 3080
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6778223514556885,
      "learning_rate": 0.00016831684838140927,
      "loss": 0.9532,
      "step": 3085
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5268427133560181,
      "learning_rate": 0.00016821988921624499,
      "loss": 0.8781,
      "step": 3090
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6663500070571899,
      "learning_rate": 0.00016812280993697037,
      "loss": 0.9117,
      "step": 3095
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6826052665710449,
      "learning_rate": 0.000168025610714512,
      "loss": 0.965,
      "step": 3100
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6532934904098511,
      "learning_rate": 0.0001679282917200076,
      "loss": 0.9292,
      "step": 3105
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6209408640861511,
      "learning_rate": 0.00016783085312480585,
      "loss": 1.0588,
      "step": 3110
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6071512699127197,
      "learning_rate": 0.00016773329510046586,
      "loss": 1.0328,
      "step": 3115
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.6279392242431641,
      "learning_rate": 0.0001676356178187572,
      "loss": 0.9559,
      "step": 3120
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5424745678901672,
      "learning_rate": 0.0001675378214516593,
      "loss": 0.9844,
      "step": 3125
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.74262535572052,
      "learning_rate": 0.00016743990617136128,
      "loss": 1.0874,
      "step": 3130
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.5871965885162354,
      "learning_rate": 0.00016734187215026167,
      "loss": 1.0176,
      "step": 3135
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5088030099868774,
      "learning_rate": 0.000167243719560968,
      "loss": 0.8139,
      "step": 3140
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6284034252166748,
      "learning_rate": 0.00016714544857629666,
      "loss": 0.9315,
      "step": 3145
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.7349436283111572,
      "learning_rate": 0.00016704705936927244,
      "loss": 0.8626,
      "step": 3150
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0078330039978027,
      "learning_rate": 0.00016694855211312818,
      "loss": 1.0201,
      "step": 3155
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.7132492065429688,
      "learning_rate": 0.00016684992698130476,
      "loss": 0.938,
      "step": 3160
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6642095446586609,
      "learning_rate": 0.00016675118414745052,
      "loss": 0.897,
      "step": 3165
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5875717401504517,
      "learning_rate": 0.000166652323785421,
      "loss": 0.9306,
      "step": 3170
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5877960324287415,
      "learning_rate": 0.00016655334606927865,
      "loss": 0.9407,
      "step": 3175
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.7100574374198914,
      "learning_rate": 0.00016645425117329268,
      "loss": 1.0241,
      "step": 3180
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.564652144908905,
      "learning_rate": 0.0001663550392719385,
      "loss": 0.9337,
      "step": 3185
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5287343263626099,
      "learning_rate": 0.00016625571053989754,
      "loss": 0.8411,
      "step": 3190
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5721025466918945,
      "learning_rate": 0.00016615626515205695,
      "loss": 0.8796,
      "step": 3195
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.8463550209999084,
      "learning_rate": 0.00016605670328350932,
      "loss": 0.9973,
      "step": 3200
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6993775367736816,
      "learning_rate": 0.00016595702510955227,
      "loss": 1.0342,
      "step": 3205
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.7446513175964355,
      "learning_rate": 0.00016585723080568817,
      "loss": 0.874,
      "step": 3210
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6518576145172119,
      "learning_rate": 0.00016575732054762397,
      "loss": 0.8751,
      "step": 3215
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6227362155914307,
      "learning_rate": 0.00016565729451127067,
      "loss": 0.8898,
      "step": 3220
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6903109550476074,
      "learning_rate": 0.00016555715287274318,
      "loss": 0.8295,
      "step": 3225
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5956553816795349,
      "learning_rate": 0.00016545689580835994,
      "loss": 0.9709,
      "step": 3230
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.633112370967865,
      "learning_rate": 0.00016535652349464254,
      "loss": 0.8406,
      "step": 3235
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.5930719375610352,
      "learning_rate": 0.00016525603610831566,
      "loss": 0.857,
      "step": 3240
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.717291533946991,
      "learning_rate": 0.0001651554338263064,
      "loss": 1.0251,
      "step": 3245
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.7223893404006958,
      "learning_rate": 0.0001650547168257443,
      "loss": 0.8636,
      "step": 3250
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6896404027938843,
      "learning_rate": 0.0001649538852839608,
      "loss": 0.9304,
      "step": 3255
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.7068783640861511,
      "learning_rate": 0.00016485293937848903,
      "loss": 0.9012,
      "step": 3260
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.8663097620010376,
      "learning_rate": 0.0001647518792870635,
      "loss": 1.0295,
      "step": 3265
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.7634738683700562,
      "learning_rate": 0.00016465070518761977,
      "loss": 0.9565,
      "step": 3270
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6867614388465881,
      "learning_rate": 0.00016454941725829405,
      "loss": 0.9136,
      "step": 3275
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6606219410896301,
      "learning_rate": 0.0001644480156774231,
      "loss": 0.8807,
      "step": 3280
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6782506108283997,
      "learning_rate": 0.0001643465006235437,
      "loss": 0.9843,
      "step": 3285
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6689923405647278,
      "learning_rate": 0.00016424487227539243,
      "loss": 0.9309,
      "step": 3290
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5364828705787659,
      "learning_rate": 0.00016414313081190537,
      "loss": 0.8986,
      "step": 3295
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6805548071861267,
      "learning_rate": 0.00016404127641221774,
      "loss": 0.8756,
      "step": 3300
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6093565821647644,
      "learning_rate": 0.00016393930925566358,
      "loss": 0.9902,
      "step": 3305
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6762187480926514,
      "learning_rate": 0.00016383722952177557,
      "loss": 0.9862,
      "step": 3310
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6144864559173584,
      "learning_rate": 0.00016373503739028448,
      "loss": 1.0414,
      "step": 3315
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.8119445443153381,
      "learning_rate": 0.00016363273304111902,
      "loss": 0.9946,
      "step": 3320
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6898847818374634,
      "learning_rate": 0.00016353031665440547,
      "loss": 0.8921,
      "step": 3325
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6010476350784302,
      "learning_rate": 0.00016342778841046745,
      "loss": 0.9369,
      "step": 3330
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.4694141149520874,
      "learning_rate": 0.00016332514848982542,
      "loss": 0.8308,
      "step": 3335
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6777420043945312,
      "learning_rate": 0.00016322239707319648,
      "loss": 0.9351,
      "step": 3340
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.770054280757904,
      "learning_rate": 0.00016311953434149413,
      "loss": 0.8852,
      "step": 3345
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6395520567893982,
      "learning_rate": 0.0001630165604758278,
      "loss": 0.8646,
      "step": 3350
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6607555150985718,
      "learning_rate": 0.00016291347565750255,
      "loss": 0.8932,
      "step": 3355
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5776294469833374,
      "learning_rate": 0.00016281028006801887,
      "loss": 0.9063,
      "step": 3360
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.5314105153083801,
      "learning_rate": 0.0001627069738890723,
      "loss": 0.8283,
      "step": 3365
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.6023496389389038,
      "learning_rate": 0.00016260355730255297,
      "loss": 0.8207,
      "step": 3370
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.714431881904602,
      "learning_rate": 0.0001625000304905455,
      "loss": 0.9015,
      "step": 3375
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6060982346534729,
      "learning_rate": 0.00016239639363532858,
      "loss": 0.9208,
      "step": 3380
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6184629797935486,
      "learning_rate": 0.00016229264691937462,
      "loss": 0.9031,
      "step": 3385
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6056995391845703,
      "learning_rate": 0.00016218879052534949,
      "loss": 1.0191,
      "step": 3390
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6588426232337952,
      "learning_rate": 0.0001620848246361122,
      "loss": 0.9528,
      "step": 3395
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6407586932182312,
      "learning_rate": 0.0001619807494347144,
      "loss": 0.8039,
      "step": 3400
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6428894400596619,
      "learning_rate": 0.0001618765651044004,
      "loss": 0.8567,
      "step": 3405
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.7074903249740601,
      "learning_rate": 0.00016177227182860647,
      "loss": 0.8954,
      "step": 3410
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6333289742469788,
      "learning_rate": 0.00016166786979096088,
      "loss": 0.8844,
      "step": 3415
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5690221786499023,
      "learning_rate": 0.00016156335917528325,
      "loss": 0.8021,
      "step": 3420
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6526438593864441,
      "learning_rate": 0.00016145874016558443,
      "loss": 0.9289,
      "step": 3425
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6392170786857605,
      "learning_rate": 0.00016135401294606618,
      "loss": 0.9413,
      "step": 3430
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.8693280816078186,
      "learning_rate": 0.0001612491777011206,
      "loss": 0.7817,
      "step": 3435
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.7930571436882019,
      "learning_rate": 0.00016114423461533026,
      "loss": 0.9878,
      "step": 3440
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5343663692474365,
      "learning_rate": 0.00016103918387346732,
      "loss": 0.8839,
      "step": 3445
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.718345046043396,
      "learning_rate": 0.00016093402566049367,
      "loss": 1.0465,
      "step": 3450
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5717639327049255,
      "learning_rate": 0.0001608287601615604,
      "loss": 0.8313,
      "step": 3455
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6372225880622864,
      "learning_rate": 0.00016072338756200746,
      "loss": 0.9464,
      "step": 3460
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5836284756660461,
      "learning_rate": 0.00016061790804736332,
      "loss": 0.9665,
      "step": 3465
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.7392018437385559,
      "learning_rate": 0.00016051232180334485,
      "loss": 0.8797,
      "step": 3470
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6039625406265259,
      "learning_rate": 0.00016040662901585674,
      "loss": 0.9242,
      "step": 3475
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.5853233337402344,
      "learning_rate": 0.00016030082987099123,
      "loss": 0.9494,
      "step": 3480
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6518459320068359,
      "learning_rate": 0.00016019492455502787,
      "loss": 0.9992,
      "step": 3485
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.6406946182250977,
      "learning_rate": 0.00016008891325443317,
      "loss": 0.9105,
      "step": 3490
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.7694531083106995,
      "learning_rate": 0.0001599827961558602,
      "loss": 0.9901,
      "step": 3495
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.8471524715423584,
      "learning_rate": 0.00015987657344614835,
      "loss": 0.8733,
      "step": 3500
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6725741028785706,
      "learning_rate": 0.0001597702453123229,
      "loss": 0.9278,
      "step": 3505
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6549644470214844,
      "learning_rate": 0.00015966381194159482,
      "loss": 0.9492,
      "step": 3510
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6407148838043213,
      "learning_rate": 0.0001595572735213603,
      "loss": 0.8639,
      "step": 3515
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.677624523639679,
      "learning_rate": 0.00015945063023920056,
      "loss": 1.0831,
      "step": 3520
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5648879408836365,
      "learning_rate": 0.00015934388228288138,
      "loss": 0.8247,
      "step": 3525
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.7800204753875732,
      "learning_rate": 0.00015923702984035288,
      "loss": 0.9129,
      "step": 3530
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.624596357345581,
      "learning_rate": 0.00015913007309974916,
      "loss": 0.9534,
      "step": 3535
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.7033581733703613,
      "learning_rate": 0.00015902301224938792,
      "loss": 0.7902,
      "step": 3540
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.7332955598831177,
      "learning_rate": 0.00015891584747777018,
      "loss": 0.9685,
      "step": 3545
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.7487533092498779,
      "learning_rate": 0.00015880857897357994,
      "loss": 0.9143,
      "step": 3550
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6748859286308289,
      "learning_rate": 0.00015870120692568383,
      "loss": 0.9375,
      "step": 3555
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6206909418106079,
      "learning_rate": 0.00015859373152313078,
      "loss": 0.9535,
      "step": 3560
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6313974857330322,
      "learning_rate": 0.00015848615295515175,
      "loss": 0.8325,
      "step": 3565
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5907221436500549,
      "learning_rate": 0.00015837847141115927,
      "loss": 0.8503,
      "step": 3570
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6586530804634094,
      "learning_rate": 0.00015827068708074724,
      "loss": 0.8813,
      "step": 3575
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.8002377152442932,
      "learning_rate": 0.00015816280015369045,
      "loss": 0.9446,
      "step": 3580
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6264663934707642,
      "learning_rate": 0.00015805481081994444,
      "loss": 1.0207,
      "step": 3585
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6522766351699829,
      "learning_rate": 0.00015794671926964497,
      "loss": 0.8568,
      "step": 3590
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5466406941413879,
      "learning_rate": 0.00015783852569310785,
      "loss": 0.8827,
      "step": 3595
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.5301129817962646,
      "learning_rate": 0.00015773023028082842,
      "loss": 0.8395,
      "step": 3600
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6852278709411621,
      "learning_rate": 0.00015762183322348144,
      "loss": 0.9331,
      "step": 3605
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.7991815209388733,
      "learning_rate": 0.0001575133347119205,
      "loss": 1.0256,
      "step": 3610
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6975600123405457,
      "learning_rate": 0.00015740473493717802,
      "loss": 0.8432,
      "step": 3615
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5848995447158813,
      "learning_rate": 0.00015729603409046447,
      "loss": 0.9775,
      "step": 3620
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5751985311508179,
      "learning_rate": 0.00015718723236316846,
      "loss": 0.9224,
      "step": 3625
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.7802300453186035,
      "learning_rate": 0.0001570783299468562,
      "loss": 0.9733,
      "step": 3630
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5258293747901917,
      "learning_rate": 0.000156969327033271,
      "loss": 0.8185,
      "step": 3635
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6269134283065796,
      "learning_rate": 0.00015686022381433337,
      "loss": 0.899,
      "step": 3640
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5452288389205933,
      "learning_rate": 0.00015675102048214027,
      "loss": 0.8696,
      "step": 3645
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6281285285949707,
      "learning_rate": 0.000156641717228965,
      "loss": 0.9534,
      "step": 3650
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.8480727076530457,
      "learning_rate": 0.00015653231424725671,
      "loss": 0.9262,
      "step": 3655
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.603663444519043,
      "learning_rate": 0.00015642281172964024,
      "loss": 0.8537,
      "step": 3660
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5591794848442078,
      "learning_rate": 0.0001563132098689156,
      "loss": 0.8872,
      "step": 3665
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6126306056976318,
      "learning_rate": 0.00015620350885805774,
      "loss": 0.833,
      "step": 3670
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6492719054222107,
      "learning_rate": 0.00015609370889021617,
      "loss": 0.923,
      "step": 3675
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6563048958778381,
      "learning_rate": 0.00015598381015871472,
      "loss": 0.8586,
      "step": 3680
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.602532684803009,
      "learning_rate": 0.000155873812857051,
      "loss": 0.81,
      "step": 3685
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.5984077453613281,
      "learning_rate": 0.0001557637171788962,
      "loss": 0.8112,
      "step": 3690
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6583290100097656,
      "learning_rate": 0.00015565352331809473,
      "loss": 0.8635,
      "step": 3695
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6902117729187012,
      "learning_rate": 0.0001555432314686639,
      "loss": 0.8655,
      "step": 3700
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6870086789131165,
      "learning_rate": 0.00015543284182479352,
      "loss": 0.9489,
      "step": 3705
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6190910935401917,
      "learning_rate": 0.00015532235458084554,
      "loss": 0.8847,
      "step": 3710
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6703643798828125,
      "learning_rate": 0.00015521176993135388,
      "loss": 0.9021,
      "step": 3715
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6639297604560852,
      "learning_rate": 0.00015510108807102383,
      "loss": 0.9294,
      "step": 3720
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.6397198438644409,
      "learning_rate": 0.00015499030919473186,
      "loss": 0.9062,
      "step": 3725
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.7036738991737366,
      "learning_rate": 0.00015487943349752533,
      "loss": 0.922,
      "step": 3730
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5168409943580627,
      "learning_rate": 0.00015476846117462204,
      "loss": 0.9515,
      "step": 3735
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6692652106285095,
      "learning_rate": 0.00015465739242140987,
      "loss": 0.8807,
      "step": 3740
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6560524702072144,
      "learning_rate": 0.0001545462274334465,
      "loss": 0.9095,
      "step": 3745
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5958166122436523,
      "learning_rate": 0.00015443496640645915,
      "loss": 0.9193,
      "step": 3750
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6260406970977783,
      "learning_rate": 0.00015432360953634397,
      "loss": 0.9808,
      "step": 3755
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.8512209057807922,
      "learning_rate": 0.00015421215701916596,
      "loss": 0.964,
      "step": 3760
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.8193923234939575,
      "learning_rate": 0.00015410060905115852,
      "loss": 0.8987,
      "step": 3765
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6861628890037537,
      "learning_rate": 0.0001539889658287231,
      "loss": 0.9304,
      "step": 3770
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6048279404640198,
      "learning_rate": 0.00015387722754842885,
      "loss": 0.9073,
      "step": 3775
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.7512372732162476,
      "learning_rate": 0.0001537653944070123,
      "loss": 0.979,
      "step": 3780
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6055505871772766,
      "learning_rate": 0.00015365346660137702,
      "loss": 0.9774,
      "step": 3785
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6355717778205872,
      "learning_rate": 0.0001535414443285932,
      "loss": 0.8838,
      "step": 3790
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.7207841873168945,
      "learning_rate": 0.0001534293277858974,
      "loss": 0.8993,
      "step": 3795
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6495758891105652,
      "learning_rate": 0.00015331711717069216,
      "loss": 0.9091,
      "step": 3800
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6666922569274902,
      "learning_rate": 0.0001532048126805456,
      "loss": 1.0268,
      "step": 3805
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6077538132667542,
      "learning_rate": 0.00015309241451319126,
      "loss": 0.8616,
      "step": 3810
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6129001975059509,
      "learning_rate": 0.00015297992286652745,
      "loss": 0.93,
      "step": 3815
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.7638548612594604,
      "learning_rate": 0.0001528673379386172,
      "loss": 1.01,
      "step": 3820
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5867222547531128,
      "learning_rate": 0.0001527546599276876,
      "loss": 0.9305,
      "step": 3825
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.5881127119064331,
      "learning_rate": 0.00015264188903212991,
      "loss": 0.83,
      "step": 3830
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.8406863808631897,
      "learning_rate": 0.00015252902545049866,
      "loss": 0.9555,
      "step": 3835
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.6948667764663696,
      "learning_rate": 0.00015241606938151177,
      "loss": 0.8267,
      "step": 3840
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.7978765368461609,
      "learning_rate": 0.00015230302102404986,
      "loss": 0.9554,
      "step": 3845
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6019850969314575,
      "learning_rate": 0.0001521898805771561,
      "loss": 1.0175,
      "step": 3850
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6698493957519531,
      "learning_rate": 0.0001520766482400358,
      "loss": 0.8467,
      "step": 3855
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5901235342025757,
      "learning_rate": 0.0001519633242120561,
      "loss": 0.8262,
      "step": 3860
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6284335851669312,
      "learning_rate": 0.0001518499086927455,
      "loss": 0.6594,
      "step": 3865
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6574525833129883,
      "learning_rate": 0.00015173640188179363,
      "loss": 0.8159,
      "step": 3870
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6073183417320251,
      "learning_rate": 0.00015162280397905086,
      "loss": 0.9547,
      "step": 3875
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5954556465148926,
      "learning_rate": 0.00015150911518452793,
      "loss": 0.9168,
      "step": 3880
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.734402596950531,
      "learning_rate": 0.00015139533569839565,
      "loss": 0.8784,
      "step": 3885
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.7888867259025574,
      "learning_rate": 0.00015128146572098442,
      "loss": 0.9391,
      "step": 3890
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5263239145278931,
      "learning_rate": 0.00015116750545278408,
      "loss": 0.8745,
      "step": 3895
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.7067481279373169,
      "learning_rate": 0.00015105345509444336,
      "loss": 0.8533,
      "step": 3900
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6724989414215088,
      "learning_rate": 0.00015093931484676967,
      "loss": 0.889,
      "step": 3905
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6885275840759277,
      "learning_rate": 0.00015082508491072864,
      "loss": 0.9317,
      "step": 3910
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6939470767974854,
      "learning_rate": 0.00015071076548744386,
      "loss": 0.9193,
      "step": 3915
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.781311571598053,
      "learning_rate": 0.00015059635677819636,
      "loss": 1.0144,
      "step": 3920
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6873858571052551,
      "learning_rate": 0.00015048185898442463,
      "loss": 0.8466,
      "step": 3925
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.7110685110092163,
      "learning_rate": 0.00015036727230772367,
      "loss": 0.9595,
      "step": 3930
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.7505444288253784,
      "learning_rate": 0.00015025259694984524,
      "loss": 0.9217,
      "step": 3935
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5211585164070129,
      "learning_rate": 0.0001501378331126972,
      "loss": 0.9427,
      "step": 3940
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6653823256492615,
      "learning_rate": 0.00015002298099834303,
      "loss": 0.8937,
      "step": 3945
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.7119855880737305,
      "learning_rate": 0.00014990804080900185,
      "loss": 0.9794,
      "step": 3950
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.6810693144798279,
      "learning_rate": 0.0001497930127470477,
      "loss": 1.0353,
      "step": 3955
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5851398706436157,
      "learning_rate": 0.00014967789701500944,
      "loss": 0.9098,
      "step": 3960
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.7086507678031921,
      "learning_rate": 0.00014956269381557024,
      "loss": 0.8536,
      "step": 3965
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.595748782157898,
      "learning_rate": 0.00014944740335156724,
      "loss": 0.9451,
      "step": 3970
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.7851556539535522,
      "learning_rate": 0.0001493320258259913,
      "loss": 1.0031,
      "step": 3975
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6311838626861572,
      "learning_rate": 0.00014921656144198652,
      "loss": 0.8833,
      "step": 3980
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5700263977050781,
      "learning_rate": 0.00014910101040284992,
      "loss": 0.9191,
      "step": 3985
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.7289249897003174,
      "learning_rate": 0.00014898537291203117,
      "loss": 0.849,
      "step": 3990
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6655135154724121,
      "learning_rate": 0.00014886964917313207,
      "loss": 1.0639,
      "step": 3995
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6108108758926392,
      "learning_rate": 0.00014875383938990627,
      "loss": 0.8746,
      "step": 4000
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.7380669713020325,
      "learning_rate": 0.00014863794376625904,
      "loss": 0.8703,
      "step": 4005
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6507090330123901,
      "learning_rate": 0.00014852196250624662,
      "loss": 0.9557,
      "step": 4010
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6174106001853943,
      "learning_rate": 0.00014840589581407616,
      "loss": 0.9274,
      "step": 4015
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6442661881446838,
      "learning_rate": 0.00014828974389410516,
      "loss": 0.9409,
      "step": 4020
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.8616316318511963,
      "learning_rate": 0.0001481735069508412,
      "loss": 0.8272,
      "step": 4025
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6512661576271057,
      "learning_rate": 0.00014805718518894157,
      "loss": 0.8498,
      "step": 4030
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6582963466644287,
      "learning_rate": 0.00014794077881321292,
      "loss": 0.9465,
      "step": 4035
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6269610524177551,
      "learning_rate": 0.0001478242880286108,
      "loss": 0.9631,
      "step": 4040
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5856170058250427,
      "learning_rate": 0.00014770771304023942,
      "loss": 0.8131,
      "step": 4045
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5877073407173157,
      "learning_rate": 0.00014759105405335132,
      "loss": 0.8252,
      "step": 4050
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.6415336728096008,
      "learning_rate": 0.00014747431127334678,
      "loss": 0.9773,
      "step": 4055
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.7174964547157288,
      "learning_rate": 0.0001473574849057738,
      "loss": 1.0136,
      "step": 4060
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5549941062927246,
      "learning_rate": 0.00014724057515632738,
      "loss": 0.9079,
      "step": 4065
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.7842785716056824,
      "learning_rate": 0.00014712358223084942,
      "loss": 0.8031,
      "step": 4070
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.5719451308250427,
      "learning_rate": 0.00014700650633532827,
      "loss": 0.8293,
      "step": 4075
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.742992639541626,
      "learning_rate": 0.00014688934767589833,
      "loss": 0.9294,
      "step": 4080
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.608527421951294,
      "learning_rate": 0.00014677210645883977,
      "loss": 0.9394,
      "step": 4085
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.533574104309082,
      "learning_rate": 0.00014665478289057805,
      "loss": 0.8738,
      "step": 4090
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.7454668283462524,
      "learning_rate": 0.00014653737717768367,
      "loss": 0.983,
      "step": 4095
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6168544888496399,
      "learning_rate": 0.00014641988952687177,
      "loss": 0.9008,
      "step": 4100
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6677118539810181,
      "learning_rate": 0.0001463023201450017,
      "loss": 0.8957,
      "step": 4105
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6591238379478455,
      "learning_rate": 0.00014618466923907678,
      "loss": 0.9938,
      "step": 4110
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5012399554252625,
      "learning_rate": 0.00014606693701624385,
      "loss": 0.855,
      "step": 4115
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6333223581314087,
      "learning_rate": 0.0001459491236837929,
      "loss": 0.8756,
      "step": 4120
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.513618528842926,
      "learning_rate": 0.00014583122944915672,
      "loss": 0.9169,
      "step": 4125
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6848164796829224,
      "learning_rate": 0.00014571325451991066,
      "loss": 0.9924,
      "step": 4130
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6335467100143433,
      "learning_rate": 0.00014559519910377193,
      "loss": 0.7958,
      "step": 4135
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6131249666213989,
      "learning_rate": 0.0001454770634085997,
      "loss": 0.9621,
      "step": 4140
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6990880370140076,
      "learning_rate": 0.00014535884764239424,
      "loss": 1.0121,
      "step": 4145
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.7077513933181763,
      "learning_rate": 0.00014524055201329704,
      "loss": 0.9293,
      "step": 4150
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.533998966217041,
      "learning_rate": 0.00014512217672959003,
      "loss": 0.8922,
      "step": 4155
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6833199858665466,
      "learning_rate": 0.00014500372199969546,
      "loss": 0.9573,
      "step": 4160
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6293190717697144,
      "learning_rate": 0.00014488518803217542,
      "loss": 1.0084,
      "step": 4165
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6474047303199768,
      "learning_rate": 0.0001447665750357316,
      "loss": 0.8068,
      "step": 4170
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.7276792526245117,
      "learning_rate": 0.00014464788321920472,
      "loss": 0.9225,
      "step": 4175
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.63521808385849,
      "learning_rate": 0.00014452911279157435,
      "loss": 0.8718,
      "step": 4180
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.720392644405365,
      "learning_rate": 0.0001444102639619585,
      "loss": 0.9408,
      "step": 4185
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.707635223865509,
      "learning_rate": 0.00014429133693961304,
      "loss": 0.9199,
      "step": 4190
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.5724509954452515,
      "learning_rate": 0.0001441723319339318,
      "loss": 1.0271,
      "step": 4195
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.6615992784500122,
      "learning_rate": 0.00014405324915444572,
      "loss": 0.858,
      "step": 4200
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.533299446105957,
      "learning_rate": 0.00014393408881082265,
      "loss": 0.9178,
      "step": 4205
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6693369746208191,
      "learning_rate": 0.00014381485111286714,
      "loss": 0.8711,
      "step": 4210
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6453130841255188,
      "learning_rate": 0.00014369553627051982,
      "loss": 0.9083,
      "step": 4215
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7167688608169556,
      "learning_rate": 0.0001435761444938573,
      "loss": 0.9752,
      "step": 4220
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.602178156375885,
      "learning_rate": 0.00014345667599309142,
      "loss": 0.9038,
      "step": 4225
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6880233883857727,
      "learning_rate": 0.0001433371309785693,
      "loss": 0.9242,
      "step": 4230
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.8023783564567566,
      "learning_rate": 0.0001432175096607727,
      "loss": 0.9589,
      "step": 4235
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6405156254768372,
      "learning_rate": 0.00014309781225031778,
      "loss": 0.9669,
      "step": 4240
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.47613513469696045,
      "learning_rate": 0.00014297803895795455,
      "loss": 0.9187,
      "step": 4245
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7372537851333618,
      "learning_rate": 0.00014285818999456676,
      "loss": 0.8633,
      "step": 4250
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7306585311889648,
      "learning_rate": 0.0001427382655711713,
      "loss": 0.96,
      "step": 4255
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6008553504943848,
      "learning_rate": 0.000142618265898918,
      "loss": 0.8579,
      "step": 4260
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6557427048683167,
      "learning_rate": 0.00014249819118908915,
      "loss": 0.9197,
      "step": 4265
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6179531216621399,
      "learning_rate": 0.00014237804165309913,
      "loss": 0.9992,
      "step": 4270
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5417391061782837,
      "learning_rate": 0.0001422578175024941,
      "loss": 0.9273,
      "step": 4275
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.5395805835723877,
      "learning_rate": 0.00014213751894895154,
      "loss": 0.9608,
      "step": 4280
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6742745041847229,
      "learning_rate": 0.00014201714620428,
      "loss": 0.9618,
      "step": 4285
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7993083596229553,
      "learning_rate": 0.00014189669948041863,
      "loss": 0.8944,
      "step": 4290
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0645782947540283,
      "learning_rate": 0.00014177617898943683,
      "loss": 0.8406,
      "step": 4295
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6639379262924194,
      "learning_rate": 0.00014165558494353385,
      "loss": 0.9971,
      "step": 4300
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6995848417282104,
      "learning_rate": 0.00014153491755503853,
      "loss": 1.0182,
      "step": 4305
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.762792706489563,
      "learning_rate": 0.00014141417703640875,
      "loss": 0.98,
      "step": 4310
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.7014017701148987,
      "learning_rate": 0.0001412933636002312,
      "loss": 0.895,
      "step": 4315
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.6615109443664551,
      "learning_rate": 0.00014117247745922101,
      "loss": 0.9504,
      "step": 4320
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.7112694978713989,
      "learning_rate": 0.00014105151882622122,
      "loss": 0.8517,
      "step": 4325
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6270447373390198,
      "learning_rate": 0.00014093048791420252,
      "loss": 0.7386,
      "step": 4330
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.7246109247207642,
      "learning_rate": 0.00014080938493626286,
      "loss": 0.9284,
      "step": 4335
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.7187846899032593,
      "learning_rate": 0.00014068821010562718,
      "loss": 0.9285,
      "step": 4340
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.8331519365310669,
      "learning_rate": 0.00014056696363564682,
      "loss": 0.9343,
      "step": 4345
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6256982684135437,
      "learning_rate": 0.00014044564573979925,
      "loss": 0.9124,
      "step": 4350
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.7178502678871155,
      "learning_rate": 0.0001403242566316878,
      "loss": 0.9675,
      "step": 4355
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5572247505187988,
      "learning_rate": 0.0001402027965250411,
      "loss": 0.9588,
      "step": 4360
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.7772032022476196,
      "learning_rate": 0.00014008126563371274,
      "loss": 0.9215,
      "step": 4365
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.7644178867340088,
      "learning_rate": 0.0001399596641716811,
      "loss": 1.0078,
      "step": 4370
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5885058641433716,
      "learning_rate": 0.0001398379923530487,
      "loss": 0.8546,
      "step": 4375
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6888991594314575,
      "learning_rate": 0.0001397162503920419,
      "loss": 0.9105,
      "step": 4380
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.7413958311080933,
      "learning_rate": 0.00013959443850301061,
      "loss": 0.8351,
      "step": 4385
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6878228783607483,
      "learning_rate": 0.00013947255690042795,
      "loss": 0.9956,
      "step": 4390
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.7583858370780945,
      "learning_rate": 0.00013935060579888962,
      "loss": 0.8068,
      "step": 4395
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5721178650856018,
      "learning_rate": 0.00013922858541311382,
      "loss": 0.8582,
      "step": 4400
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6023159623146057,
      "learning_rate": 0.00013910649595794058,
      "loss": 0.9859,
      "step": 4405
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5773957967758179,
      "learning_rate": 0.00013898433764833178,
      "loss": 0.8051,
      "step": 4410
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.5671504735946655,
      "learning_rate": 0.00013886211069937034,
      "loss": 1.0797,
      "step": 4415
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6403985619544983,
      "learning_rate": 0.00013873981532626007,
      "loss": 0.8315,
      "step": 4420
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9393098950386047,
      "learning_rate": 0.00013861745174432525,
      "loss": 0.9213,
      "step": 4425
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6221628785133362,
      "learning_rate": 0.00013849502016901035,
      "loss": 0.8995,
      "step": 4430
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.6712929606437683,
      "learning_rate": 0.00013837252081587938,
      "loss": 0.9216,
      "step": 4435
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.7631377577781677,
      "learning_rate": 0.0001382499539006159,
      "loss": 0.8936,
      "step": 4440
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6782535314559937,
      "learning_rate": 0.00013812731963902224,
      "loss": 0.8619,
      "step": 4445
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.8166306614875793,
      "learning_rate": 0.0001380046182470194,
      "loss": 0.8772,
      "step": 4450
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6431621313095093,
      "learning_rate": 0.0001378818499406465,
      "loss": 0.9215,
      "step": 4455
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6358498930931091,
      "learning_rate": 0.00013775901493606063,
      "loss": 0.8781,
      "step": 4460
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5732253193855286,
      "learning_rate": 0.0001376361134495361,
      "loss": 0.9351,
      "step": 4465
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.7262488007545471,
      "learning_rate": 0.0001375131456974645,
      "loss": 0.8956,
      "step": 4470
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.7199252843856812,
      "learning_rate": 0.0001373901118963539,
      "loss": 0.952,
      "step": 4475
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5929174423217773,
      "learning_rate": 0.00013726701226282885,
      "loss": 0.9268,
      "step": 4480
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.8617218136787415,
      "learning_rate": 0.00013714384701362956,
      "loss": 0.9656,
      "step": 4485
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5977094173431396,
      "learning_rate": 0.000137020616365612,
      "loss": 0.7784,
      "step": 4490
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.7792064547538757,
      "learning_rate": 0.0001368973205357472,
      "loss": 0.932,
      "step": 4495
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.7215954065322876,
      "learning_rate": 0.00013677395974112094,
      "loss": 0.9451,
      "step": 4500
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.8179596066474915,
      "learning_rate": 0.00013665053419893337,
      "loss": 0.8776,
      "step": 4505
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6614099144935608,
      "learning_rate": 0.0001365270441264987,
      "loss": 0.8245,
      "step": 4510
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6148139834403992,
      "learning_rate": 0.00013640348974124474,
      "loss": 0.8813,
      "step": 4515
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.7663241624832153,
      "learning_rate": 0.0001362798712607125,
      "loss": 0.981,
      "step": 4520
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6683364510536194,
      "learning_rate": 0.00013615618890255589,
      "loss": 0.9146,
      "step": 4525
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.644741952419281,
      "learning_rate": 0.0001360324428845412,
      "loss": 0.9912,
      "step": 4530
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6618791818618774,
      "learning_rate": 0.00013590863342454693,
      "loss": 0.8832,
      "step": 4535
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5592582821846008,
      "learning_rate": 0.0001357847607405632,
      "loss": 0.8504,
      "step": 4540
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.7591463327407837,
      "learning_rate": 0.00013566082505069143,
      "loss": 1.0021,
      "step": 4545
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.5851222276687622,
      "learning_rate": 0.00013553682657314412,
      "loss": 0.8535,
      "step": 4550
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.6739622354507446,
      "learning_rate": 0.00013541276552624405,
      "loss": 0.8448,
      "step": 4555
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6252307295799255,
      "learning_rate": 0.00013528864212842444,
      "loss": 0.8135,
      "step": 4560
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9975850582122803,
      "learning_rate": 0.00013516445659822815,
      "loss": 0.9283,
      "step": 4565
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6981391310691833,
      "learning_rate": 0.00013504020915430746,
      "loss": 0.9259,
      "step": 4570
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5520308017730713,
      "learning_rate": 0.00013491590001542367,
      "loss": 0.8799,
      "step": 4575
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6810752153396606,
      "learning_rate": 0.00013479152940044665,
      "loss": 0.9626,
      "step": 4580
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.7570480108261108,
      "learning_rate": 0.00013466709752835466,
      "loss": 0.9614,
      "step": 4585
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.7146151661872864,
      "learning_rate": 0.00013454260461823365,
      "loss": 0.8655,
      "step": 4590
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6674631834030151,
      "learning_rate": 0.00013441805088927706,
      "loss": 0.9221,
      "step": 4595
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6004106998443604,
      "learning_rate": 0.00013429343656078555,
      "loss": 0.8091,
      "step": 4600
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.5462713241577148,
      "learning_rate": 0.0001341687618521663,
      "loss": 0.8159,
      "step": 4605
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.7340354323387146,
      "learning_rate": 0.00013404402698293294,
      "loss": 0.8762,
      "step": 4610
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6642255783081055,
      "learning_rate": 0.00013391923217270497,
      "loss": 0.9129,
      "step": 4615
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.7021158933639526,
      "learning_rate": 0.00013379437764120738,
      "loss": 0.8938,
      "step": 4620
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6833433508872986,
      "learning_rate": 0.00013366946360827037,
      "loss": 0.9181,
      "step": 4625
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.7175443172454834,
      "learning_rate": 0.00013354449029382893,
      "loss": 0.7676,
      "step": 4630
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6878382563591003,
      "learning_rate": 0.00013341945791792238,
      "loss": 0.9573,
      "step": 4635
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.7180898189544678,
      "learning_rate": 0.00013329436670069395,
      "loss": 0.8508,
      "step": 4640
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6059539914131165,
      "learning_rate": 0.0001331692168623907,
      "loss": 0.9017,
      "step": 4645
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6651197671890259,
      "learning_rate": 0.00013304400862336263,
      "loss": 0.8017,
      "step": 4650
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.7838413119316101,
      "learning_rate": 0.00013291874220406274,
      "loss": 0.988,
      "step": 4655
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6739104986190796,
      "learning_rate": 0.00013279341782504645,
      "loss": 0.9072,
      "step": 4660
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6881901025772095,
      "learning_rate": 0.00013266803570697116,
      "loss": 0.8914,
      "step": 4665
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6005337834358215,
      "learning_rate": 0.00013254259607059605,
      "loss": 0.9813,
      "step": 4670
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.6260716319084167,
      "learning_rate": 0.0001324170991367814,
      "loss": 0.8676,
      "step": 4675
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.644090473651886,
      "learning_rate": 0.0001322915451264885,
      "loss": 0.8814,
      "step": 4680
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6466627717018127,
      "learning_rate": 0.00013216593426077918,
      "loss": 0.9372,
      "step": 4685
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6953723430633545,
      "learning_rate": 0.00013204026676081517,
      "loss": 0.9277,
      "step": 4690
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.8439871668815613,
      "learning_rate": 0.0001319145428478581,
      "loss": 0.8426,
      "step": 4695
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6791027784347534,
      "learning_rate": 0.0001317887627432689,
      "loss": 0.7959,
      "step": 4700
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6005025506019592,
      "learning_rate": 0.00013166292666850734,
      "loss": 0.8866,
      "step": 4705
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.754767894744873,
      "learning_rate": 0.00013153703484513186,
      "loss": 0.9494,
      "step": 4710
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6382832527160645,
      "learning_rate": 0.00013141108749479898,
      "loss": 0.889,
      "step": 4715
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6113407611846924,
      "learning_rate": 0.00013128508483926298,
      "loss": 0.8678,
      "step": 4720
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6342490315437317,
      "learning_rate": 0.00013115902710037554,
      "loss": 0.8763,
      "step": 4725
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6291064620018005,
      "learning_rate": 0.00013103291450008533,
      "loss": 0.8743,
      "step": 4730
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.796549379825592,
      "learning_rate": 0.00013090674726043766,
      "loss": 0.8185,
      "step": 4735
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6640483140945435,
      "learning_rate": 0.0001307805256035739,
      "loss": 0.8382,
      "step": 4740
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.7306240797042847,
      "learning_rate": 0.00013065424975173135,
      "loss": 0.8412,
      "step": 4745
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.8053780794143677,
      "learning_rate": 0.00013052791992724275,
      "loss": 0.9946,
      "step": 4750
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6016142964363098,
      "learning_rate": 0.00013040153635253575,
      "loss": 0.9382,
      "step": 4755
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6706305146217346,
      "learning_rate": 0.00013027509925013275,
      "loss": 0.9322,
      "step": 4760
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.7635506391525269,
      "learning_rate": 0.00013014860884265036,
      "loss": 0.9591,
      "step": 4765
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.7298561930656433,
      "learning_rate": 0.000130022065352799,
      "loss": 0.8639,
      "step": 4770
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6929640173912048,
      "learning_rate": 0.00012989546900338264,
      "loss": 0.9269,
      "step": 4775
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.7712711691856384,
      "learning_rate": 0.00012976882001729823,
      "loss": 0.8759,
      "step": 4780
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.6714412569999695,
      "learning_rate": 0.00012964211861753543,
      "loss": 0.8264,
      "step": 4785
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.7652319669723511,
      "learning_rate": 0.00012951536502717623,
      "loss": 0.8953,
      "step": 4790
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6817525029182434,
      "learning_rate": 0.00012938855946939443,
      "loss": 0.9328,
      "step": 4795
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6716194748878479,
      "learning_rate": 0.0001292617021674554,
      "loss": 0.9282,
      "step": 4800
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7245244383811951,
      "learning_rate": 0.00012913479334471557,
      "loss": 0.9207,
      "step": 4805
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6501569151878357,
      "learning_rate": 0.0001290078332246221,
      "loss": 0.9289,
      "step": 4810
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5875827074050903,
      "learning_rate": 0.0001288808220307125,
      "loss": 0.9092,
      "step": 4815
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6515260338783264,
      "learning_rate": 0.0001287537599866141,
      "loss": 0.7633,
      "step": 4820
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6986538767814636,
      "learning_rate": 0.00012862664731604388,
      "loss": 0.8686,
      "step": 4825
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5599557161331177,
      "learning_rate": 0.0001284994842428079,
      "loss": 0.8821,
      "step": 4830
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7143694758415222,
      "learning_rate": 0.00012837227099080098,
      "loss": 1.0317,
      "step": 4835
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6995121836662292,
      "learning_rate": 0.00012824500778400627,
      "loss": 0.897,
      "step": 4840
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6644828915596008,
      "learning_rate": 0.00012811769484649492,
      "loss": 0.9869,
      "step": 4845
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7595239877700806,
      "learning_rate": 0.0001279903324024256,
      "loss": 0.9084,
      "step": 4850
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6890538334846497,
      "learning_rate": 0.0001278629206760441,
      "loss": 0.8771,
      "step": 4855
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7247397303581238,
      "learning_rate": 0.0001277354598916831,
      "loss": 0.9808,
      "step": 4860
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6517183780670166,
      "learning_rate": 0.00012760795027376158,
      "loss": 0.8585,
      "step": 4865
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7327473163604736,
      "learning_rate": 0.00012748039204678446,
      "loss": 0.9243,
      "step": 4870
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.5720110535621643,
      "learning_rate": 0.00012735278543534243,
      "loss": 0.8099,
      "step": 4875
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.7129859924316406,
      "learning_rate": 0.00012722513066411103,
      "loss": 0.9101,
      "step": 4880
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6797164678573608,
      "learning_rate": 0.00012709742795785097,
      "loss": 0.903,
      "step": 4885
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6795310974121094,
      "learning_rate": 0.00012696967754140714,
      "loss": 0.9116,
      "step": 4890
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.8723884224891663,
      "learning_rate": 0.00012684187963970847,
      "loss": 0.9222,
      "step": 4895
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6401852965354919,
      "learning_rate": 0.00012671403447776753,
      "loss": 0.8375,
      "step": 4900
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6854673027992249,
      "learning_rate": 0.00012658614228068003,
      "loss": 0.8835,
      "step": 4905
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.6760403513908386,
      "learning_rate": 0.00012645820327362466,
      "loss": 1.0362,
      "step": 4910
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.8172838687896729,
      "learning_rate": 0.0001263302176818623,
      "loss": 1.0059,
      "step": 4915
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6979231834411621,
      "learning_rate": 0.000126202185730736,
      "loss": 1.0049,
      "step": 4920
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.675632894039154,
      "learning_rate": 0.00012607410764567045,
      "loss": 0.8314,
      "step": 4925
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.7185483574867249,
      "learning_rate": 0.00012594598365217144,
      "loss": 0.8724,
      "step": 4930
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.7700892686843872,
      "learning_rate": 0.00012581781397582567,
      "loss": 0.932,
      "step": 4935
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6535851359367371,
      "learning_rate": 0.00012568959884230036,
      "loss": 0.8531,
      "step": 4940
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6171019673347473,
      "learning_rate": 0.0001255613384773426,
      "loss": 0.8942,
      "step": 4945
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5885823369026184,
      "learning_rate": 0.0001254330331067792,
      "loss": 1.0151,
      "step": 4950
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.7443267703056335,
      "learning_rate": 0.00012530468295651617,
      "loss": 1.0425,
      "step": 4955
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6809262633323669,
      "learning_rate": 0.00012517628825253852,
      "loss": 0.9637,
      "step": 4960
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.5984592437744141,
      "learning_rate": 0.00012504784922090945,
      "loss": 0.9221,
      "step": 4965
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6745832562446594,
      "learning_rate": 0.00012491936608777045,
      "loss": 0.7759,
      "step": 4970
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6799601912498474,
      "learning_rate": 0.00012479083907934052,
      "loss": 0.8416,
      "step": 4975
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.658454954624176,
      "learning_rate": 0.00012466226842191587,
      "loss": 0.8171,
      "step": 4980
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6778995990753174,
      "learning_rate": 0.00012453365434186975,
      "loss": 0.9652,
      "step": 4985
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.7929224371910095,
      "learning_rate": 0.00012440499706565164,
      "loss": 0.8463,
      "step": 4990
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6529220342636108,
      "learning_rate": 0.00012427629681978724,
      "loss": 0.7954,
      "step": 4995
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6256465315818787,
      "learning_rate": 0.00012414755383087785,
      "loss": 0.887,
      "step": 5000
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6464981436729431,
      "learning_rate": 0.0001240187683256,
      "loss": 0.8077,
      "step": 5005
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6251599788665771,
      "learning_rate": 0.00012388994053070512,
      "loss": 0.7699,
      "step": 5010
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6544944643974304,
      "learning_rate": 0.00012376107067301912,
      "loss": 0.9582,
      "step": 5015
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.6476804614067078,
      "learning_rate": 0.00012363215897944187,
      "loss": 0.9299,
      "step": 5020
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.679531455039978,
      "learning_rate": 0.000123503205676947,
      "loss": 0.8737,
      "step": 5025
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.7165660858154297,
      "learning_rate": 0.00012337421099258133,
      "loss": 0.8456,
      "step": 5030
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6283479332923889,
      "learning_rate": 0.00012324517515346467,
      "loss": 0.8646,
      "step": 5035
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.687667191028595,
      "learning_rate": 0.00012311609838678905,
      "loss": 0.9228,
      "step": 5040
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7233263254165649,
      "learning_rate": 0.0001229869809198188,
      "loss": 0.9077,
      "step": 5045
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.8381845951080322,
      "learning_rate": 0.00012285782297988984,
      "loss": 0.8916,
      "step": 5050
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5955893397331238,
      "learning_rate": 0.00012272862479440922,
      "loss": 0.8171,
      "step": 5055
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7041948437690735,
      "learning_rate": 0.00012259938659085504,
      "loss": 0.941,
      "step": 5060
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6682184338569641,
      "learning_rate": 0.00012247010859677576,
      "loss": 0.8761,
      "step": 5065
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6222808361053467,
      "learning_rate": 0.00012234079103978993,
      "loss": 0.9773,
      "step": 5070
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6586690545082092,
      "learning_rate": 0.00012221143414758572,
      "loss": 0.9188,
      "step": 5075
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6669284105300903,
      "learning_rate": 0.00012208203814792056,
      "loss": 0.8918,
      "step": 5080
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6407530903816223,
      "learning_rate": 0.00012195260326862081,
      "loss": 0.91,
      "step": 5085
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9201768040657043,
      "learning_rate": 0.00012182312973758118,
      "loss": 0.9377,
      "step": 5090
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6031652688980103,
      "learning_rate": 0.00012169361778276451,
      "loss": 0.9322,
      "step": 5095
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5742199420928955,
      "learning_rate": 0.00012156406763220128,
      "loss": 1.0405,
      "step": 5100
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.8060709238052368,
      "learning_rate": 0.0001214344795139892,
      "loss": 0.8857,
      "step": 5105
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7517329454421997,
      "learning_rate": 0.0001213048536562928,
      "loss": 0.9292,
      "step": 5110
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5800279974937439,
      "learning_rate": 0.00012117519028734317,
      "loss": 0.9284,
      "step": 5115
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.5874794721603394,
      "learning_rate": 0.00012104548963543729,
      "loss": 0.9274,
      "step": 5120
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9434084296226501,
      "learning_rate": 0.00012091575192893789,
      "loss": 0.8461,
      "step": 5125
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6450409889221191,
      "learning_rate": 0.00012078597739627297,
      "loss": 0.9097,
      "step": 5130
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7026113867759705,
      "learning_rate": 0.00012065616626593528,
      "loss": 0.9492,
      "step": 5135
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.742713451385498,
      "learning_rate": 0.00012052631876648199,
      "loss": 1.0525,
      "step": 5140
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.6497244238853455,
      "learning_rate": 0.00012039643512653444,
      "loss": 0.8872,
      "step": 5145
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7265699505805969,
      "learning_rate": 0.00012026651557477745,
      "loss": 1.0148,
      "step": 5150
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6718622446060181,
      "learning_rate": 0.00012013656033995921,
      "loss": 1.0182,
      "step": 5155
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7481784820556641,
      "learning_rate": 0.00012000656965089063,
      "loss": 0.8686,
      "step": 5160
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6820610165596008,
      "learning_rate": 0.00011987654373644506,
      "loss": 0.826,
      "step": 5165
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.5447041392326355,
      "learning_rate": 0.00011974648282555794,
      "loss": 0.8705,
      "step": 5170
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7161911129951477,
      "learning_rate": 0.00011961638714722623,
      "loss": 0.9256,
      "step": 5175
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9855062365531921,
      "learning_rate": 0.00011948625693050816,
      "loss": 0.8967,
      "step": 5180
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6545083522796631,
      "learning_rate": 0.00011935609240452281,
      "loss": 0.7265,
      "step": 5185
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7910086512565613,
      "learning_rate": 0.00011922589379844961,
      "loss": 0.914,
      "step": 5190
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7386326193809509,
      "learning_rate": 0.00011909566134152794,
      "loss": 0.9195,
      "step": 5195
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6439299583435059,
      "learning_rate": 0.00011896539526305694,
      "loss": 0.8454,
      "step": 5200
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7418578267097473,
      "learning_rate": 0.00011883509579239482,
      "loss": 0.9424,
      "step": 5205
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6000198125839233,
      "learning_rate": 0.0001187047631589586,
      "loss": 0.898,
      "step": 5210
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7546732425689697,
      "learning_rate": 0.00011857439759222373,
      "loss": 0.9509,
      "step": 5215
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6965680122375488,
      "learning_rate": 0.00011844399932172362,
      "loss": 0.9157,
      "step": 5220
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7454926371574402,
      "learning_rate": 0.00011831356857704927,
      "loss": 0.9386,
      "step": 5225
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6428052186965942,
      "learning_rate": 0.00011818310558784882,
      "loss": 0.8267,
      "step": 5230
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7606145143508911,
      "learning_rate": 0.00011805261058382723,
      "loss": 0.9663,
      "step": 5235
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7600546479225159,
      "learning_rate": 0.0001179220837947459,
      "loss": 1.0242,
      "step": 5240
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6770562529563904,
      "learning_rate": 0.00011779152545042195,
      "loss": 0.9582,
      "step": 5245
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7594524025917053,
      "learning_rate": 0.00011766093578072832,
      "loss": 0.9125,
      "step": 5250
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.6377832293510437,
      "learning_rate": 0.00011753031501559298,
      "loss": 0.9992,
      "step": 5255
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.8343694806098938,
      "learning_rate": 0.00011739966338499866,
      "loss": 0.9534,
      "step": 5260
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.645725667476654,
      "learning_rate": 0.00011726898111898246,
      "loss": 0.9256,
      "step": 5265
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.670018196105957,
      "learning_rate": 0.00011713826844763538,
      "loss": 0.8773,
      "step": 5270
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.7871105074882507,
      "learning_rate": 0.000117007525601102,
      "loss": 0.982,
      "step": 5275
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6718564629554749,
      "learning_rate": 0.00011687675280958,
      "loss": 0.8211,
      "step": 5280
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6720396280288696,
      "learning_rate": 0.00011674595030331974,
      "loss": 0.9162,
      "step": 5285
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.7001306414604187,
      "learning_rate": 0.00011661511831262401,
      "loss": 0.9542,
      "step": 5290
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6401751637458801,
      "learning_rate": 0.0001164842570678475,
      "loss": 0.8765,
      "step": 5295
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6717837452888489,
      "learning_rate": 0.00011635336679939624,
      "loss": 0.8623,
      "step": 5300
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.7279519438743591,
      "learning_rate": 0.00011622244773772755,
      "loss": 1.1157,
      "step": 5305
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6708661913871765,
      "learning_rate": 0.00011609150011334937,
      "loss": 0.9421,
      "step": 5310
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.7468719482421875,
      "learning_rate": 0.00011596052415681992,
      "loss": 0.9219,
      "step": 5315
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.7473458647727966,
      "learning_rate": 0.00011582952009874737,
      "loss": 1.0115,
      "step": 5320
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.666793167591095,
      "learning_rate": 0.00011569848816978924,
      "loss": 0.8903,
      "step": 5325
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.7159398198127747,
      "learning_rate": 0.00011556742860065226,
      "loss": 0.8547,
      "step": 5330
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6559115648269653,
      "learning_rate": 0.00011543634162209178,
      "loss": 0.9685,
      "step": 5335
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6398196816444397,
      "learning_rate": 0.00011530522746491132,
      "loss": 0.8261,
      "step": 5340
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6815221309661865,
      "learning_rate": 0.00011517408635996241,
      "loss": 0.8332,
      "step": 5345
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6399449706077576,
      "learning_rate": 0.00011504291853814393,
      "loss": 0.9185,
      "step": 5350
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6674028635025024,
      "learning_rate": 0.00011491172423040178,
      "loss": 0.8802,
      "step": 5355
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6427600979804993,
      "learning_rate": 0.00011478050366772855,
      "loss": 0.9533,
      "step": 5360
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.8196895718574524,
      "learning_rate": 0.00011464925708116306,
      "loss": 0.9565,
      "step": 5365
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.675538957118988,
      "learning_rate": 0.00011451798470178988,
      "loss": 0.92,
      "step": 5370
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6670119762420654,
      "learning_rate": 0.0001143866867607391,
      "loss": 0.8504,
      "step": 5375
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.6218076944351196,
      "learning_rate": 0.0001142553634891857,
      "loss": 0.8803,
      "step": 5380
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.7193536162376404,
      "learning_rate": 0.00011412401511834934,
      "loss": 0.9395,
      "step": 5385
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6328010559082031,
      "learning_rate": 0.00011399264187949385,
      "loss": 0.9048,
      "step": 5390
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6608994007110596,
      "learning_rate": 0.00011386124400392686,
      "loss": 0.8393,
      "step": 5395
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6986910700798035,
      "learning_rate": 0.0001137298217229993,
      "loss": 0.9332,
      "step": 5400
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9013976454734802,
      "learning_rate": 0.00011359837526810521,
      "loss": 0.9066,
      "step": 5405
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.7434113025665283,
      "learning_rate": 0.00011346690487068103,
      "loss": 0.9425,
      "step": 5410
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6475106477737427,
      "learning_rate": 0.00011333541076220555,
      "loss": 0.8998,
      "step": 5415
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5834239721298218,
      "learning_rate": 0.00011320389317419908,
      "loss": 0.8483,
      "step": 5420
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.786338746547699,
      "learning_rate": 0.00011307235233822345,
      "loss": 0.9344,
      "step": 5425
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6710865497589111,
      "learning_rate": 0.00011294078848588136,
      "loss": 0.8631,
      "step": 5430
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.5884045958518982,
      "learning_rate": 0.00011280920184881598,
      "loss": 0.8094,
      "step": 5435
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6520205140113831,
      "learning_rate": 0.0001126775926587107,
      "loss": 0.873,
      "step": 5440
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6478862166404724,
      "learning_rate": 0.00011254596114728859,
      "loss": 0.8687,
      "step": 5445
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.7314843535423279,
      "learning_rate": 0.00011241430754631194,
      "loss": 0.9825,
      "step": 5450
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6119475364685059,
      "learning_rate": 0.00011228263208758206,
      "loss": 0.8239,
      "step": 5455
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6662307977676392,
      "learning_rate": 0.0001121509350029386,
      "loss": 0.775,
      "step": 5460
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.7896751165390015,
      "learning_rate": 0.00011201921652425945,
      "loss": 0.8355,
      "step": 5465
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.597769558429718,
      "learning_rate": 0.00011188747688346002,
      "loss": 0.9079,
      "step": 5470
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.7042331099510193,
      "learning_rate": 0.00011175571631249305,
      "loss": 0.9043,
      "step": 5475
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.7097181081771851,
      "learning_rate": 0.00011162393504334814,
      "loss": 0.8301,
      "step": 5480
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.773230791091919,
      "learning_rate": 0.00011149213330805135,
      "loss": 0.9735,
      "step": 5485
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6911418437957764,
      "learning_rate": 0.00011136031133866467,
      "loss": 0.9265,
      "step": 5490
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.7387533783912659,
      "learning_rate": 0.00011122846936728584,
      "loss": 0.9248,
      "step": 5495
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.6310597658157349,
      "learning_rate": 0.00011109660762604774,
      "loss": 0.8217,
      "step": 5500
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7292531728744507,
      "learning_rate": 0.0001109647263471181,
      "loss": 0.7904,
      "step": 5505
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6988025903701782,
      "learning_rate": 0.00011083282576269905,
      "loss": 0.945,
      "step": 5510
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7716386914253235,
      "learning_rate": 0.00011070090610502663,
      "loss": 1.0456,
      "step": 5515
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6976988315582275,
      "learning_rate": 0.00011056896760637063,
      "loss": 0.9231,
      "step": 5520
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.5705205202102661,
      "learning_rate": 0.00011043701049903381,
      "loss": 0.9198,
      "step": 5525
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6474895477294922,
      "learning_rate": 0.00011030503501535186,
      "loss": 0.9076,
      "step": 5530
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.8015474081039429,
      "learning_rate": 0.00011017304138769272,
      "loss": 1.0548,
      "step": 5535
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7305599451065063,
      "learning_rate": 0.00011004102984845635,
      "loss": 0.8504,
      "step": 5540
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.735675573348999,
      "learning_rate": 0.00010990900063007414,
      "loss": 0.8588,
      "step": 5545
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7288222312927246,
      "learning_rate": 0.00010977695396500878,
      "loss": 0.9225,
      "step": 5550
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6950204372406006,
      "learning_rate": 0.00010964489008575354,
      "loss": 0.9049,
      "step": 5555
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7171755433082581,
      "learning_rate": 0.00010951280922483198,
      "loss": 0.8445,
      "step": 5560
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6393475532531738,
      "learning_rate": 0.0001093807116147977,
      "loss": 0.8743,
      "step": 5565
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6673814058303833,
      "learning_rate": 0.00010924859748823366,
      "loss": 1.0077,
      "step": 5570
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7003934979438782,
      "learning_rate": 0.00010911646707775194,
      "loss": 0.9263,
      "step": 5575
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7368084788322449,
      "learning_rate": 0.00010898432061599333,
      "loss": 0.9174,
      "step": 5580
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7135486602783203,
      "learning_rate": 0.00010885215833562683,
      "loss": 0.9149,
      "step": 5585
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7837123274803162,
      "learning_rate": 0.00010871998046934928,
      "loss": 0.972,
      "step": 5590
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7325451374053955,
      "learning_rate": 0.00010858778724988506,
      "loss": 0.9518,
      "step": 5595
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.725795328617096,
      "learning_rate": 0.00010845557890998545,
      "loss": 0.7896,
      "step": 5600
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6287091374397278,
      "learning_rate": 0.00010832335568242851,
      "loss": 1.0537,
      "step": 5605
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7267513871192932,
      "learning_rate": 0.0001081911178000183,
      "loss": 0.8986,
      "step": 5610
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.7996358871459961,
      "learning_rate": 0.00010805886549558484,
      "loss": 0.7822,
      "step": 5615
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.6031549572944641,
      "learning_rate": 0.00010792659900198359,
      "loss": 0.9068,
      "step": 5620
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6607316136360168,
      "learning_rate": 0.00010779431855209478,
      "loss": 0.8688,
      "step": 5625
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7166876196861267,
      "learning_rate": 0.0001076620243788234,
      "loss": 0.9289,
      "step": 5630
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6892978549003601,
      "learning_rate": 0.00010752971671509857,
      "loss": 0.9349,
      "step": 5635
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6517727375030518,
      "learning_rate": 0.00010739739579387311,
      "loss": 0.995,
      "step": 5640
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7273731231689453,
      "learning_rate": 0.00010726506184812322,
      "loss": 0.9097,
      "step": 5645
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5607343912124634,
      "learning_rate": 0.00010713271511084797,
      "loss": 0.8307,
      "step": 5650
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6832279562950134,
      "learning_rate": 0.00010700035581506908,
      "loss": 0.9201,
      "step": 5655
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7223308086395264,
      "learning_rate": 0.00010686798419383027,
      "loss": 0.8605,
      "step": 5660
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6218956708908081,
      "learning_rate": 0.00010673560048019693,
      "loss": 0.8124,
      "step": 5665
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5892270803451538,
      "learning_rate": 0.0001066032049072559,
      "loss": 0.882,
      "step": 5670
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7258059978485107,
      "learning_rate": 0.00010647079770811479,
      "loss": 0.868,
      "step": 5675
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.8001238703727722,
      "learning_rate": 0.00010633837911590163,
      "loss": 0.8023,
      "step": 5680
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7664066553115845,
      "learning_rate": 0.00010620594936376466,
      "loss": 0.9067,
      "step": 5685
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5850561857223511,
      "learning_rate": 0.00010607350868487165,
      "loss": 0.824,
      "step": 5690
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.8648871183395386,
      "learning_rate": 0.00010594105731240961,
      "loss": 0.871,
      "step": 5695
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6848756074905396,
      "learning_rate": 0.00010580859547958448,
      "loss": 0.7997,
      "step": 5700
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9481160640716553,
      "learning_rate": 0.00010567612341962048,
      "loss": 0.9955,
      "step": 5705
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7015474438667297,
      "learning_rate": 0.00010554364136575998,
      "loss": 0.9107,
      "step": 5710
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7029824256896973,
      "learning_rate": 0.00010541114955126284,
      "loss": 0.9192,
      "step": 5715
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6626318097114563,
      "learning_rate": 0.00010527864820940608,
      "loss": 0.8277,
      "step": 5720
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7007662653923035,
      "learning_rate": 0.00010514613757348364,
      "loss": 0.9554,
      "step": 5725
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.5854273438453674,
      "learning_rate": 0.0001050136178768057,
      "loss": 0.8745,
      "step": 5730
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.8346757292747498,
      "learning_rate": 0.00010488108935269843,
      "loss": 1.0342,
      "step": 5735
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.6574100255966187,
      "learning_rate": 0.00010474855223450355,
      "loss": 0.9006,
      "step": 5740
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.5388402938842773,
      "learning_rate": 0.0001046160067555779,
      "loss": 0.9565,
      "step": 5745
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6622834205627441,
      "learning_rate": 0.00010448345314929301,
      "loss": 0.9538,
      "step": 5750
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6827812194824219,
      "learning_rate": 0.00010435089164903484,
      "loss": 0.9606,
      "step": 5755
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7946076989173889,
      "learning_rate": 0.00010421832248820309,
      "loss": 0.9556,
      "step": 5760
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6617308855056763,
      "learning_rate": 0.00010408574590021101,
      "loss": 0.9374,
      "step": 5765
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.8348833918571472,
      "learning_rate": 0.000103953162118485,
      "loss": 0.9182,
      "step": 5770
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7646685838699341,
      "learning_rate": 0.00010382057137646401,
      "loss": 0.9838,
      "step": 5775
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.605670154094696,
      "learning_rate": 0.00010368797390759937,
      "loss": 0.7536,
      "step": 5780
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7083485126495361,
      "learning_rate": 0.0001035553699453541,
      "loss": 0.9704,
      "step": 5785
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6487541794776917,
      "learning_rate": 0.00010342275972320276,
      "loss": 0.9805,
      "step": 5790
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6678948402404785,
      "learning_rate": 0.00010329014347463097,
      "loss": 0.9252,
      "step": 5795
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7373467087745667,
      "learning_rate": 0.00010315752143313479,
      "loss": 0.8942,
      "step": 5800
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6852657198905945,
      "learning_rate": 0.00010302489383222065,
      "loss": 0.8262,
      "step": 5805
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7030515670776367,
      "learning_rate": 0.00010289226090540473,
      "loss": 0.7714,
      "step": 5810
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7813947796821594,
      "learning_rate": 0.00010275962288621251,
      "loss": 0.9961,
      "step": 5815
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7376470565795898,
      "learning_rate": 0.00010262698000817852,
      "loss": 0.8246,
      "step": 5820
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6423072814941406,
      "learning_rate": 0.00010249433250484579,
      "loss": 0.8495,
      "step": 5825
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.703114926815033,
      "learning_rate": 0.00010236168060976555,
      "loss": 0.8891,
      "step": 5830
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.8897769451141357,
      "learning_rate": 0.00010222902455649673,
      "loss": 0.8899,
      "step": 5835
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7237744927406311,
      "learning_rate": 0.00010209636457860552,
      "loss": 0.9736,
      "step": 5840
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6191496253013611,
      "learning_rate": 0.00010196370090966516,
      "loss": 0.8626,
      "step": 5845
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.6924605965614319,
      "learning_rate": 0.0001018310337832553,
      "loss": 0.8597,
      "step": 5850
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9236262440681458,
      "learning_rate": 0.00010169836343296162,
      "loss": 0.887,
      "step": 5855
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.792788565158844,
      "learning_rate": 0.0001015656900923756,
      "loss": 0.8942,
      "step": 5860
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.84331214427948,
      "learning_rate": 0.00010143301399509395,
      "loss": 0.9444,
      "step": 5865
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.5621626973152161,
      "learning_rate": 0.00010130033537471815,
      "loss": 0.9003,
      "step": 5870
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6908104419708252,
      "learning_rate": 0.00010116765446485423,
      "loss": 0.8987,
      "step": 5875
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6341535449028015,
      "learning_rate": 0.0001010349714991122,
      "loss": 0.7817,
      "step": 5880
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7346782088279724,
      "learning_rate": 0.00010090228671110568,
      "loss": 1.0713,
      "step": 5885
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6268035173416138,
      "learning_rate": 0.00010076960033445155,
      "loss": 0.8851,
      "step": 5890
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7278366684913635,
      "learning_rate": 0.0001006369126027694,
      "loss": 0.8986,
      "step": 5895
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7147312164306641,
      "learning_rate": 0.00010050422374968131,
      "loss": 0.7817,
      "step": 5900
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6873770356178284,
      "learning_rate": 0.00010037153400881126,
      "loss": 0.9511,
      "step": 5905
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6804821491241455,
      "learning_rate": 0.00010023884361378477,
      "loss": 0.9003,
      "step": 5910
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7105894684791565,
      "learning_rate": 0.00010010615279822865,
      "loss": 0.9026,
      "step": 5915
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7469001412391663,
      "learning_rate": 9.99734617957703e-05,
      "loss": 0.8842,
      "step": 5920
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6915943026542664,
      "learning_rate": 9.984077084003752e-05,
      "loss": 0.9516,
      "step": 5925
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7061002254486084,
      "learning_rate": 9.970808016465797e-05,
      "loss": 1.0175,
      "step": 5930
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6731154322624207,
      "learning_rate": 9.957539000325893e-05,
      "loss": 1.0213,
      "step": 5935
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.8961304426193237,
      "learning_rate": 9.944270058946666e-05,
      "loss": 1.0686,
      "step": 5940
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7615440487861633,
      "learning_rate": 9.931001215690616e-05,
      "loss": 0.9474,
      "step": 5945
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6351935863494873,
      "learning_rate": 9.917732493920071e-05,
      "loss": 0.7973,
      "step": 5950
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6528865694999695,
      "learning_rate": 9.90446391699714e-05,
      "loss": 1.0574,
      "step": 5955
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.8191664814949036,
      "learning_rate": 9.891195508283684e-05,
      "loss": 0.8424,
      "step": 5960
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.6079810261726379,
      "learning_rate": 9.877927291141261e-05,
      "loss": 0.8154,
      "step": 5965
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7904455065727234,
      "learning_rate": 9.864659288931095e-05,
      "loss": 0.8835,
      "step": 5970
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.704140305519104,
      "learning_rate": 9.851391525014035e-05,
      "loss": 0.96,
      "step": 5975
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.714992880821228,
      "learning_rate": 9.838124022750502e-05,
      "loss": 0.9106,
      "step": 5980
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.5958659052848816,
      "learning_rate": 9.824856805500462e-05,
      "loss": 0.9694,
      "step": 5985
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7285751104354858,
      "learning_rate": 9.811589896623382e-05,
      "loss": 0.9694,
      "step": 5990
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.8631846904754639,
      "learning_rate": 9.798323319478178e-05,
      "loss": 0.7295,
      "step": 5995
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6519853472709656,
      "learning_rate": 9.785057097423186e-05,
      "loss": 0.8604,
      "step": 6000
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6751505732536316,
      "learning_rate": 9.771791253816123e-05,
      "loss": 0.8958,
      "step": 6005
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6848597526550293,
      "learning_rate": 9.758525812014029e-05,
      "loss": 0.8678,
      "step": 6010
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.8685235381126404,
      "learning_rate": 9.745260795373239e-05,
      "loss": 0.9,
      "step": 6015
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.660624086856842,
      "learning_rate": 9.731996227249347e-05,
      "loss": 0.9081,
      "step": 6020
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.8399864435195923,
      "learning_rate": 9.718732130997148e-05,
      "loss": 0.9256,
      "step": 6025
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6387921571731567,
      "learning_rate": 9.705468529970613e-05,
      "loss": 0.9444,
      "step": 6030
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7852475047111511,
      "learning_rate": 9.692205447522837e-05,
      "loss": 1.0471,
      "step": 6035
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7889160513877869,
      "learning_rate": 9.678942907006002e-05,
      "loss": 0.8986,
      "step": 6040
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7621517181396484,
      "learning_rate": 9.665680931771341e-05,
      "loss": 0.9484,
      "step": 6045
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6188884973526001,
      "learning_rate": 9.652419545169083e-05,
      "loss": 0.8797,
      "step": 6050
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7267559766769409,
      "learning_rate": 9.639158770548426e-05,
      "loss": 0.8975,
      "step": 6055
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.8356955051422119,
      "learning_rate": 9.625898631257492e-05,
      "loss": 0.9743,
      "step": 6060
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7362446188926697,
      "learning_rate": 9.612639150643282e-05,
      "loss": 0.8549,
      "step": 6065
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7240428328514099,
      "learning_rate": 9.599380352051633e-05,
      "loss": 0.9337,
      "step": 6070
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.6012915968894958,
      "learning_rate": 9.586122258827193e-05,
      "loss": 0.9229,
      "step": 6075
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.8492560982704163,
      "learning_rate": 9.572864894313357e-05,
      "loss": 0.8502,
      "step": 6080
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.8213250041007996,
      "learning_rate": 9.559608281852238e-05,
      "loss": 0.9357,
      "step": 6085
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7091009616851807,
      "learning_rate": 9.546352444784632e-05,
      "loss": 0.8587,
      "step": 6090
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.7153491973876953,
      "learning_rate": 9.533097406449962e-05,
      "loss": 0.9584,
      "step": 6095
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7168654203414917,
      "learning_rate": 9.519843190186249e-05,
      "loss": 0.9244,
      "step": 6100
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.692661464214325,
      "learning_rate": 9.506589819330069e-05,
      "loss": 0.8989,
      "step": 6105
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6731054186820984,
      "learning_rate": 9.493337317216498e-05,
      "loss": 0.8934,
      "step": 6110
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.665674090385437,
      "learning_rate": 9.4800857071791e-05,
      "loss": 0.8809,
      "step": 6115
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6796616911888123,
      "learning_rate": 9.466835012549855e-05,
      "loss": 0.777,
      "step": 6120
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7332762479782104,
      "learning_rate": 9.453585256659127e-05,
      "loss": 0.8262,
      "step": 6125
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6969581246376038,
      "learning_rate": 9.440336462835648e-05,
      "loss": 0.898,
      "step": 6130
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7141453623771667,
      "learning_rate": 9.42708865440644e-05,
      "loss": 0.9318,
      "step": 6135
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6876282691955566,
      "learning_rate": 9.413841854696785e-05,
      "loss": 0.8526,
      "step": 6140
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7855693697929382,
      "learning_rate": 9.400596087030207e-05,
      "loss": 0.8262,
      "step": 6145
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7895783185958862,
      "learning_rate": 9.387351374728403e-05,
      "loss": 0.8961,
      "step": 6150
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6923259496688843,
      "learning_rate": 9.3741077411112e-05,
      "loss": 0.9048,
      "step": 6155
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.789193868637085,
      "learning_rate": 9.360865209496554e-05,
      "loss": 0.8957,
      "step": 6160
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7600136995315552,
      "learning_rate": 9.347623803200456e-05,
      "loss": 1.0295,
      "step": 6165
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.5362326502799988,
      "learning_rate": 9.334383545536918e-05,
      "loss": 0.8746,
      "step": 6170
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7427657842636108,
      "learning_rate": 9.321144459817952e-05,
      "loss": 0.993,
      "step": 6175
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7276807427406311,
      "learning_rate": 9.307906569353474e-05,
      "loss": 0.9435,
      "step": 6180
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7534928917884827,
      "learning_rate": 9.294669897451324e-05,
      "loss": 0.9096,
      "step": 6185
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6962252855300903,
      "learning_rate": 9.281434467417181e-05,
      "loss": 0.8884,
      "step": 6190
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.6648967862129211,
      "learning_rate": 9.268200302554533e-05,
      "loss": 0.881,
      "step": 6195
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7699589729309082,
      "learning_rate": 9.254967426164661e-05,
      "loss": 0.9009,
      "step": 6200
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7004358172416687,
      "learning_rate": 9.241735861546555e-05,
      "loss": 0.9127,
      "step": 6205
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.7334666848182678,
      "learning_rate": 9.228505631996905e-05,
      "loss": 1.0146,
      "step": 6210
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7301573753356934,
      "learning_rate": 9.215276760810061e-05,
      "loss": 0.9073,
      "step": 6215
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6607963442802429,
      "learning_rate": 9.202049271277961e-05,
      "loss": 0.8612,
      "step": 6220
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.759436845779419,
      "learning_rate": 9.188823186690117e-05,
      "loss": 0.8869,
      "step": 6225
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5646474957466125,
      "learning_rate": 9.175598530333582e-05,
      "loss": 0.8658,
      "step": 6230
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.654805600643158,
      "learning_rate": 9.162375325492875e-05,
      "loss": 1.0176,
      "step": 6235
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6620368361473083,
      "learning_rate": 9.149153595449968e-05,
      "loss": 0.8632,
      "step": 6240
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7386505007743835,
      "learning_rate": 9.135933363484236e-05,
      "loss": 0.9617,
      "step": 6245
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.8237467408180237,
      "learning_rate": 9.122714652872412e-05,
      "loss": 0.9263,
      "step": 6250
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.701260507106781,
      "learning_rate": 9.109497486888564e-05,
      "loss": 0.8149,
      "step": 6255
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6147468090057373,
      "learning_rate": 9.096281888804022e-05,
      "loss": 0.972,
      "step": 6260
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7591169476509094,
      "learning_rate": 9.083067881887365e-05,
      "loss": 0.8234,
      "step": 6265
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7122942805290222,
      "learning_rate": 9.069855489404372e-05,
      "loss": 0.8351,
      "step": 6270
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.8314396739006042,
      "learning_rate": 9.056644734617975e-05,
      "loss": 0.9396,
      "step": 6275
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0538718700408936,
      "learning_rate": 9.043435640788222e-05,
      "loss": 0.9465,
      "step": 6280
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7261043190956116,
      "learning_rate": 9.030228231172245e-05,
      "loss": 0.935,
      "step": 6285
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7041414976119995,
      "learning_rate": 9.0170225290242e-05,
      "loss": 0.895,
      "step": 6290
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7024123668670654,
      "learning_rate": 9.003818557595241e-05,
      "loss": 0.8386,
      "step": 6295
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6621682047843933,
      "learning_rate": 8.990616340133478e-05,
      "loss": 0.8501,
      "step": 6300
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7288509011268616,
      "learning_rate": 8.977415899883928e-05,
      "loss": 0.8765,
      "step": 6305
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7058336138725281,
      "learning_rate": 8.964217260088479e-05,
      "loss": 0.8252,
      "step": 6310
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6799612045288086,
      "learning_rate": 8.951020443985854e-05,
      "loss": 0.9172,
      "step": 6315
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.6987103223800659,
      "learning_rate": 8.937825474811558e-05,
      "loss": 0.8024,
      "step": 6320
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.7850658893585205,
      "learning_rate": 8.924632375797852e-05,
      "loss": 0.8931,
      "step": 6325
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.5284181833267212,
      "learning_rate": 8.911441170173698e-05,
      "loss": 0.732,
      "step": 6330
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6844927668571472,
      "learning_rate": 8.898251881164723e-05,
      "loss": 0.9311,
      "step": 6335
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.692069947719574,
      "learning_rate": 8.88506453199319e-05,
      "loss": 0.8758,
      "step": 6340
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6895598769187927,
      "learning_rate": 8.871879145877933e-05,
      "loss": 0.8757,
      "step": 6345
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6951631307601929,
      "learning_rate": 8.858695746034336e-05,
      "loss": 0.8794,
      "step": 6350
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6704897284507751,
      "learning_rate": 8.84551435567429e-05,
      "loss": 0.9,
      "step": 6355
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6468834280967712,
      "learning_rate": 8.832334998006143e-05,
      "loss": 0.8502,
      "step": 6360
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6603518128395081,
      "learning_rate": 8.819157696234659e-05,
      "loss": 0.8134,
      "step": 6365
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.8900942802429199,
      "learning_rate": 8.805982473560996e-05,
      "loss": 0.8732,
      "step": 6370
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7345215082168579,
      "learning_rate": 8.792809353182638e-05,
      "loss": 0.9701,
      "step": 6375
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6759326457977295,
      "learning_rate": 8.779638358293374e-05,
      "loss": 0.9155,
      "step": 6380
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6166871786117554,
      "learning_rate": 8.766469512083251e-05,
      "loss": 0.8308,
      "step": 6385
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7100403308868408,
      "learning_rate": 8.753302837738527e-05,
      "loss": 0.8684,
      "step": 6390
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7487990260124207,
      "learning_rate": 8.740138358441648e-05,
      "loss": 0.9515,
      "step": 6395
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7730888724327087,
      "learning_rate": 8.72697609737118e-05,
      "loss": 0.8772,
      "step": 6400
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.643085241317749,
      "learning_rate": 8.713816077701792e-05,
      "loss": 0.8361,
      "step": 6405
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7037286758422852,
      "learning_rate": 8.700658322604211e-05,
      "loss": 0.8065,
      "step": 6410
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7489007711410522,
      "learning_rate": 8.687502855245169e-05,
      "loss": 0.9188,
      "step": 6415
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6815351843833923,
      "learning_rate": 8.674349698787366e-05,
      "loss": 0.9211,
      "step": 6420
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7705895900726318,
      "learning_rate": 8.661198876389448e-05,
      "loss": 0.9211,
      "step": 6425
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6026811003684998,
      "learning_rate": 8.64805041120594e-05,
      "loss": 0.9545,
      "step": 6430
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.725632905960083,
      "learning_rate": 8.634904326387216e-05,
      "loss": 0.8984,
      "step": 6435
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.8397646546363831,
      "learning_rate": 8.621760645079468e-05,
      "loss": 0.9554,
      "step": 6440
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.6730992197990417,
      "learning_rate": 8.608619390424648e-05,
      "loss": 0.6669,
      "step": 6445
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.7317922711372375,
      "learning_rate": 8.595480585560438e-05,
      "loss": 0.8063,
      "step": 6450
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7895035743713379,
      "learning_rate": 8.582344253620208e-05,
      "loss": 0.9371,
      "step": 6455
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.605285108089447,
      "learning_rate": 8.569210417732975e-05,
      "loss": 0.6815,
      "step": 6460
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.689128577709198,
      "learning_rate": 8.556079101023348e-05,
      "loss": 0.8454,
      "step": 6465
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9090960025787354,
      "learning_rate": 8.542950326611525e-05,
      "loss": 0.862,
      "step": 6470
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7995282411575317,
      "learning_rate": 8.529824117613208e-05,
      "loss": 0.9217,
      "step": 6475
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.821121096611023,
      "learning_rate": 8.516700497139589e-05,
      "loss": 0.954,
      "step": 6480
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7355401515960693,
      "learning_rate": 8.503579488297304e-05,
      "loss": 0.7894,
      "step": 6485
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7227281928062439,
      "learning_rate": 8.490461114188383e-05,
      "loss": 0.965,
      "step": 6490
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5704253315925598,
      "learning_rate": 8.477345397910229e-05,
      "loss": 0.9453,
      "step": 6495
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6398398876190186,
      "learning_rate": 8.464232362555557e-05,
      "loss": 0.8524,
      "step": 6500
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6368110775947571,
      "learning_rate": 8.451122031212357e-05,
      "loss": 0.9498,
      "step": 6505
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6170530915260315,
      "learning_rate": 8.438014426963874e-05,
      "loss": 0.9573,
      "step": 6510
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6524907946586609,
      "learning_rate": 8.424909572888542e-05,
      "loss": 0.8642,
      "step": 6515
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6852608323097229,
      "learning_rate": 8.411807492059944e-05,
      "loss": 0.8879,
      "step": 6520
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6991391777992249,
      "learning_rate": 8.398708207546797e-05,
      "loss": 0.9535,
      "step": 6525
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.5600320100784302,
      "learning_rate": 8.385611742412887e-05,
      "loss": 0.8784,
      "step": 6530
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.8736658692359924,
      "learning_rate": 8.372518119717027e-05,
      "loss": 1.0701,
      "step": 6535
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6932893991470337,
      "learning_rate": 8.359427362513046e-05,
      "loss": 0.8919,
      "step": 6540
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7107034921646118,
      "learning_rate": 8.346339493849704e-05,
      "loss": 0.8767,
      "step": 6545
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7890344262123108,
      "learning_rate": 8.333254536770696e-05,
      "loss": 0.8633,
      "step": 6550
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6766764521598816,
      "learning_rate": 8.320172514314581e-05,
      "loss": 0.8874,
      "step": 6555
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.7544351816177368,
      "learning_rate": 8.307093449514743e-05,
      "loss": 0.8401,
      "step": 6560
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.6718683838844299,
      "learning_rate": 8.294017365399377e-05,
      "loss": 0.9419,
      "step": 6565
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.667589545249939,
      "learning_rate": 8.280944284991418e-05,
      "loss": 0.9142,
      "step": 6570
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7825703620910645,
      "learning_rate": 8.267874231308506e-05,
      "loss": 1.0079,
      "step": 6575
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6643834114074707,
      "learning_rate": 8.254807227362973e-05,
      "loss": 0.7752,
      "step": 6580
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6645832657814026,
      "learning_rate": 8.241743296161759e-05,
      "loss": 0.8748,
      "step": 6585
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6274651885032654,
      "learning_rate": 8.228682460706403e-05,
      "loss": 1.0167,
      "step": 6590
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.8026042580604553,
      "learning_rate": 8.215624743993003e-05,
      "loss": 0.8749,
      "step": 6595
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7438853979110718,
      "learning_rate": 8.20257016901215e-05,
      "loss": 0.8102,
      "step": 6600
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.8356721997261047,
      "learning_rate": 8.189518758748908e-05,
      "loss": 0.884,
      "step": 6605
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7413963675498962,
      "learning_rate": 8.176470536182777e-05,
      "loss": 0.8673,
      "step": 6610
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.8624594211578369,
      "learning_rate": 8.163425524287628e-05,
      "loss": 0.905,
      "step": 6615
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7963995933532715,
      "learning_rate": 8.150383746031707e-05,
      "loss": 0.9699,
      "step": 6620
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6565963625907898,
      "learning_rate": 8.137345224377536e-05,
      "loss": 0.9647,
      "step": 6625
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.827908992767334,
      "learning_rate": 8.124309982281914e-05,
      "loss": 0.9212,
      "step": 6630
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6607983708381653,
      "learning_rate": 8.111278042695881e-05,
      "loss": 0.7614,
      "step": 6635
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7446150779724121,
      "learning_rate": 8.098249428564635e-05,
      "loss": 0.8997,
      "step": 6640
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6742003560066223,
      "learning_rate": 8.08522416282754e-05,
      "loss": 0.9889,
      "step": 6645
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6623156666755676,
      "learning_rate": 8.072202268418057e-05,
      "loss": 0.8356,
      "step": 6650
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0455914735794067,
      "learning_rate": 8.059183768263712e-05,
      "loss": 0.7725,
      "step": 6655
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.6387537121772766,
      "learning_rate": 8.046168685286052e-05,
      "loss": 0.8654,
      "step": 6660
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7108795642852783,
      "learning_rate": 8.033157042400613e-05,
      "loss": 0.7349,
      "step": 6665
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7971534729003906,
      "learning_rate": 8.02014886251687e-05,
      "loss": 0.8774,
      "step": 6670
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7332680821418762,
      "learning_rate": 8.007144168538198e-05,
      "loss": 0.8998,
      "step": 6675
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7816454768180847,
      "learning_rate": 7.994142983361843e-05,
      "loss": 0.8344,
      "step": 6680
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.7597826719284058,
      "learning_rate": 7.981145329878867e-05,
      "loss": 0.9178,
      "step": 6685
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6635742783546448,
      "learning_rate": 7.96815123097411e-05,
      "loss": 0.79,
      "step": 6690
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6543397307395935,
      "learning_rate": 7.955160709526167e-05,
      "loss": 0.9132,
      "step": 6695
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7143847346305847,
      "learning_rate": 7.942173788407318e-05,
      "loss": 0.8708,
      "step": 6700
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8613307476043701,
      "learning_rate": 7.929190490483517e-05,
      "loss": 0.9289,
      "step": 6705
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7165137529373169,
      "learning_rate": 7.916210838614331e-05,
      "loss": 0.9139,
      "step": 6710
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7924880385398865,
      "learning_rate": 7.903234855652907e-05,
      "loss": 0.9247,
      "step": 6715
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7915977835655212,
      "learning_rate": 7.890262564445939e-05,
      "loss": 0.8209,
      "step": 6720
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6632764339447021,
      "learning_rate": 7.877293987833617e-05,
      "loss": 0.8967,
      "step": 6725
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.759326159954071,
      "learning_rate": 7.864329148649584e-05,
      "loss": 0.8654,
      "step": 6730
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6976078748703003,
      "learning_rate": 7.851368069720917e-05,
      "loss": 0.8969,
      "step": 6735
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8202966451644897,
      "learning_rate": 7.838410773868061e-05,
      "loss": 0.7885,
      "step": 6740
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7911727428436279,
      "learning_rate": 7.825457283904802e-05,
      "loss": 0.9239,
      "step": 6745
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7848120331764221,
      "learning_rate": 7.81250762263823e-05,
      "loss": 0.9427,
      "step": 6750
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6924152970314026,
      "learning_rate": 7.799561812868691e-05,
      "loss": 0.8516,
      "step": 6755
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8438791036605835,
      "learning_rate": 7.786619877389742e-05,
      "loss": 0.9296,
      "step": 6760
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7251960635185242,
      "learning_rate": 7.773681838988136e-05,
      "loss": 0.7171,
      "step": 6765
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8076329231262207,
      "learning_rate": 7.760747720443744e-05,
      "loss": 0.8795,
      "step": 6770
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7533569931983948,
      "learning_rate": 7.747817544529555e-05,
      "loss": 0.8433,
      "step": 6775
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6897385716438293,
      "learning_rate": 7.7348913340116e-05,
      "loss": 0.7796,
      "step": 6780
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.7647858262062073,
      "learning_rate": 7.721969111648936e-05,
      "loss": 0.9444,
      "step": 6785
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8716790676116943,
      "learning_rate": 7.709050900193601e-05,
      "loss": 0.9598,
      "step": 6790
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6649170517921448,
      "learning_rate": 7.696136722390566e-05,
      "loss": 0.7195,
      "step": 6795
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8289276361465454,
      "learning_rate": 7.683226600977695e-05,
      "loss": 1.0056,
      "step": 6800
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.6001815795898438,
      "learning_rate": 7.670320558685724e-05,
      "loss": 0.739,
      "step": 6805
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7629383206367493,
      "learning_rate": 7.657418618238196e-05,
      "loss": 0.871,
      "step": 6810
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7336935997009277,
      "learning_rate": 7.644520802351431e-05,
      "loss": 0.9111,
      "step": 6815
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6389930248260498,
      "learning_rate": 7.631627133734497e-05,
      "loss": 0.8693,
      "step": 6820
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7814512848854065,
      "learning_rate": 7.61873763508915e-05,
      "loss": 0.9354,
      "step": 6825
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.8012021780014038,
      "learning_rate": 7.605852329109808e-05,
      "loss": 0.9172,
      "step": 6830
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7072709798812866,
      "learning_rate": 7.592971238483508e-05,
      "loss": 0.7958,
      "step": 6835
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7154051065444946,
      "learning_rate": 7.580094385889862e-05,
      "loss": 0.9477,
      "step": 6840
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7569812536239624,
      "learning_rate": 7.567221794001025e-05,
      "loss": 0.8842,
      "step": 6845
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7594531178474426,
      "learning_rate": 7.554353485481646e-05,
      "loss": 0.8449,
      "step": 6850
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.712028443813324,
      "learning_rate": 7.54148948298883e-05,
      "loss": 0.9318,
      "step": 6855
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.8297650814056396,
      "learning_rate": 7.528629809172109e-05,
      "loss": 0.8338,
      "step": 6860
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6936589479446411,
      "learning_rate": 7.515774486673386e-05,
      "loss": 0.9181,
      "step": 6865
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7724419832229614,
      "learning_rate": 7.502923538126903e-05,
      "loss": 1.0482,
      "step": 6870
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7306141257286072,
      "learning_rate": 7.490076986159207e-05,
      "loss": 0.9175,
      "step": 6875
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.8186717629432678,
      "learning_rate": 7.477234853389099e-05,
      "loss": 0.9299,
      "step": 6880
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7106767892837524,
      "learning_rate": 7.464397162427595e-05,
      "loss": 0.8991,
      "step": 6885
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7082087993621826,
      "learning_rate": 7.451563935877901e-05,
      "loss": 0.9664,
      "step": 6890
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.6625487804412842,
      "learning_rate": 7.438735196335361e-05,
      "loss": 0.9274,
      "step": 6895
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.683401346206665,
      "learning_rate": 7.425910966387399e-05,
      "loss": 0.8408,
      "step": 6900
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7105583548545837,
      "learning_rate": 7.413091268613535e-05,
      "loss": 0.8892,
      "step": 6905
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7126787900924683,
      "learning_rate": 7.400276125585275e-05,
      "loss": 0.8939,
      "step": 6910
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7547131776809692,
      "learning_rate": 7.387465559866118e-05,
      "loss": 0.9486,
      "step": 6915
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9549170732498169,
      "learning_rate": 7.374659594011519e-05,
      "loss": 0.741,
      "step": 6920
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7775087356567383,
      "learning_rate": 7.361858250568805e-05,
      "loss": 0.9265,
      "step": 6925
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6937190294265747,
      "learning_rate": 7.34906155207719e-05,
      "loss": 1.0223,
      "step": 6930
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.803637683391571,
      "learning_rate": 7.3362695210677e-05,
      "loss": 0.8736,
      "step": 6935
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7149904370307922,
      "learning_rate": 7.32348218006313e-05,
      "loss": 0.8665,
      "step": 6940
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6032792925834656,
      "learning_rate": 7.310699551578045e-05,
      "loss": 0.842,
      "step": 6945
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9613990783691406,
      "learning_rate": 7.29792165811869e-05,
      "loss": 0.8327,
      "step": 6950
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.8656706809997559,
      "learning_rate": 7.285148522182975e-05,
      "loss": 0.9984,
      "step": 6955
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.8221157789230347,
      "learning_rate": 7.272380166260453e-05,
      "loss": 0.8664,
      "step": 6960
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6553401350975037,
      "learning_rate": 7.259616612832237e-05,
      "loss": 0.8861,
      "step": 6965
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.8584771156311035,
      "learning_rate": 7.24685788437099e-05,
      "loss": 0.9434,
      "step": 6970
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6871647834777832,
      "learning_rate": 7.234104003340898e-05,
      "loss": 0.9134,
      "step": 6975
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6781166195869446,
      "learning_rate": 7.221354992197587e-05,
      "loss": 0.8003,
      "step": 6980
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7665917873382568,
      "learning_rate": 7.208610873388122e-05,
      "loss": 0.9455,
      "step": 6985
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7046342492103577,
      "learning_rate": 7.195871669350953e-05,
      "loss": 0.9578,
      "step": 6990
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.709830105304718,
      "learning_rate": 7.183137402515872e-05,
      "loss": 0.8432,
      "step": 6995
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.8972019553184509,
      "learning_rate": 7.170408095303992e-05,
      "loss": 0.9268,
      "step": 7000
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6620240211486816,
      "learning_rate": 7.157683770127671e-05,
      "loss": 0.8903,
      "step": 7005
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6928107738494873,
      "learning_rate": 7.14496444939051e-05,
      "loss": 0.9334,
      "step": 7010
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7520967125892639,
      "learning_rate": 7.132250155487304e-05,
      "loss": 0.8403,
      "step": 7015
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7167927622795105,
      "learning_rate": 7.119540910803982e-05,
      "loss": 0.7793,
      "step": 7020
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.713470995426178,
      "learning_rate": 7.106836737717589e-05,
      "loss": 0.8981,
      "step": 7025
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7497614026069641,
      "learning_rate": 7.094137658596247e-05,
      "loss": 0.8394,
      "step": 7030
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.6969075798988342,
      "learning_rate": 7.081443695799102e-05,
      "loss": 0.7917,
      "step": 7035
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7626899480819702,
      "learning_rate": 7.068754871676291e-05,
      "loss": 0.9156,
      "step": 7040
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6913230419158936,
      "learning_rate": 7.056071208568911e-05,
      "loss": 0.9365,
      "step": 7045
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.719185471534729,
      "learning_rate": 7.043392728808962e-05,
      "loss": 0.7711,
      "step": 7050
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6523585319519043,
      "learning_rate": 7.030719454719325e-05,
      "loss": 0.7625,
      "step": 7055
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7925283312797546,
      "learning_rate": 7.018051408613715e-05,
      "loss": 0.9242,
      "step": 7060
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7551404237747192,
      "learning_rate": 7.005388612796635e-05,
      "loss": 0.8112,
      "step": 7065
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7069532871246338,
      "learning_rate": 6.992731089563356e-05,
      "loss": 0.9597,
      "step": 7070
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.76814204454422,
      "learning_rate": 6.980078861199854e-05,
      "loss": 0.8544,
      "step": 7075
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6964544057846069,
      "learning_rate": 6.967431949982789e-05,
      "loss": 0.9857,
      "step": 7080
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7238839268684387,
      "learning_rate": 6.954790378179459e-05,
      "loss": 0.8608,
      "step": 7085
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7430681586265564,
      "learning_rate": 6.942154168047756e-05,
      "loss": 0.8633,
      "step": 7090
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7325025200843811,
      "learning_rate": 6.929523341836133e-05,
      "loss": 0.9935,
      "step": 7095
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.8399198055267334,
      "learning_rate": 6.916897921783574e-05,
      "loss": 0.7856,
      "step": 7100
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7091554999351501,
      "learning_rate": 6.904277930119529e-05,
      "loss": 0.8994,
      "step": 7105
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.8715316653251648,
      "learning_rate": 6.891663389063898e-05,
      "loss": 0.8977,
      "step": 7110
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7023261189460754,
      "learning_rate": 6.879054320826988e-05,
      "loss": 0.881,
      "step": 7115
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6556528210639954,
      "learning_rate": 6.866450747609461e-05,
      "loss": 0.8441,
      "step": 7120
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7134454250335693,
      "learning_rate": 6.853852691602309e-05,
      "loss": 0.8711,
      "step": 7125
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7662861943244934,
      "learning_rate": 6.841260174986811e-05,
      "loss": 0.9114,
      "step": 7130
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.8190237283706665,
      "learning_rate": 6.828673219934491e-05,
      "loss": 0.9327,
      "step": 7135
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7545244693756104,
      "learning_rate": 6.816091848607081e-05,
      "loss": 0.826,
      "step": 7140
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.8296646475791931,
      "learning_rate": 6.80351608315648e-05,
      "loss": 0.6987,
      "step": 7145
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.6238453984260559,
      "learning_rate": 6.790945945724721e-05,
      "loss": 0.9342,
      "step": 7150
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7644151449203491,
      "learning_rate": 6.778381458443925e-05,
      "loss": 0.8402,
      "step": 7155
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.7168214321136475,
      "learning_rate": 6.765822643436267e-05,
      "loss": 0.8579,
      "step": 7160
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6701621413230896,
      "learning_rate": 6.753269522813929e-05,
      "loss": 0.7196,
      "step": 7165
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7104114890098572,
      "learning_rate": 6.740722118679075e-05,
      "loss": 0.7927,
      "step": 7170
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7076807022094727,
      "learning_rate": 6.728180453123798e-05,
      "loss": 0.8925,
      "step": 7175
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7029675841331482,
      "learning_rate": 6.715644548230086e-05,
      "loss": 0.8258,
      "step": 7180
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7940952777862549,
      "learning_rate": 6.703114426069797e-05,
      "loss": 0.8566,
      "step": 7185
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.8184251189231873,
      "learning_rate": 6.69059010870459e-05,
      "loss": 0.878,
      "step": 7190
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7338846921920776,
      "learning_rate": 6.678071618185913e-05,
      "loss": 0.7983,
      "step": 7195
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6665210723876953,
      "learning_rate": 6.665558976554957e-05,
      "loss": 0.8469,
      "step": 7200
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7321475744247437,
      "learning_rate": 6.653052205842609e-05,
      "loss": 0.8938,
      "step": 7205
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6931696534156799,
      "learning_rate": 6.640551328069414e-05,
      "loss": 0.9283,
      "step": 7210
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7098008990287781,
      "learning_rate": 6.628056365245561e-05,
      "loss": 0.8951,
      "step": 7215
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.8372482061386108,
      "learning_rate": 6.615567339370803e-05,
      "loss": 0.8581,
      "step": 7220
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.763494610786438,
      "learning_rate": 6.603084272434455e-05,
      "loss": 0.8382,
      "step": 7225
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.8865066766738892,
      "learning_rate": 6.59060718641533e-05,
      "loss": 0.978,
      "step": 7230
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.811994194984436,
      "learning_rate": 6.578136103281717e-05,
      "loss": 0.94,
      "step": 7235
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.8629786372184753,
      "learning_rate": 6.565671044991335e-05,
      "loss": 0.9999,
      "step": 7240
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7018327713012695,
      "learning_rate": 6.553212033491291e-05,
      "loss": 0.8501,
      "step": 7245
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.6607480049133301,
      "learning_rate": 6.540759090718047e-05,
      "loss": 0.7967,
      "step": 7250
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.8655691146850586,
      "learning_rate": 6.528312238597382e-05,
      "loss": 0.8571,
      "step": 7255
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.705172598361969,
      "learning_rate": 6.515871499044358e-05,
      "loss": 0.7804,
      "step": 7260
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7695091962814331,
      "learning_rate": 6.50343689396325e-05,
      "loss": 0.8561,
      "step": 7265
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7393696904182434,
      "learning_rate": 6.491008445247563e-05,
      "loss": 0.8283,
      "step": 7270
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7259432077407837,
      "learning_rate": 6.478586174779947e-05,
      "loss": 0.761,
      "step": 7275
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.8482978343963623,
      "learning_rate": 6.466170104432166e-05,
      "loss": 0.9335,
      "step": 7280
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7087788581848145,
      "learning_rate": 6.453760256065091e-05,
      "loss": 0.9627,
      "step": 7285
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.857541024684906,
      "learning_rate": 6.441356651528609e-05,
      "loss": 0.9501,
      "step": 7290
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.803997278213501,
      "learning_rate": 6.428959312661642e-05,
      "loss": 0.8953,
      "step": 7295
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7093678116798401,
      "learning_rate": 6.416568261292062e-05,
      "loss": 0.8817,
      "step": 7300
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7989717125892639,
      "learning_rate": 6.404183519236669e-05,
      "loss": 0.8569,
      "step": 7305
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6285958290100098,
      "learning_rate": 6.391805108301167e-05,
      "loss": 0.9482,
      "step": 7310
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7950562238693237,
      "learning_rate": 6.37943305028011e-05,
      "loss": 0.933,
      "step": 7315
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7158616781234741,
      "learning_rate": 6.367067366956854e-05,
      "loss": 0.96,
      "step": 7320
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7947881817817688,
      "learning_rate": 6.354708080103548e-05,
      "loss": 1.0352,
      "step": 7325
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7674684524536133,
      "learning_rate": 6.342355211481065e-05,
      "loss": 0.922,
      "step": 7330
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7878246307373047,
      "learning_rate": 6.33000878283898e-05,
      "loss": 0.9961,
      "step": 7335
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6970924735069275,
      "learning_rate": 6.317668815915547e-05,
      "loss": 0.8092,
      "step": 7340
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7438830733299255,
      "learning_rate": 6.305335332437617e-05,
      "loss": 0.7657,
      "step": 7345
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6238775253295898,
      "learning_rate": 6.293008354120635e-05,
      "loss": 0.9087,
      "step": 7350
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7835497260093689,
      "learning_rate": 6.280687902668604e-05,
      "loss": 0.9097,
      "step": 7355
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.726040244102478,
      "learning_rate": 6.26837399977402e-05,
      "loss": 0.8469,
      "step": 7360
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7236318588256836,
      "learning_rate": 6.256066667117855e-05,
      "loss": 0.8242,
      "step": 7365
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7460520267486572,
      "learning_rate": 6.243765926369513e-05,
      "loss": 1.0041,
      "step": 7370
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6667692065238953,
      "learning_rate": 6.231471799186788e-05,
      "loss": 0.8797,
      "step": 7375
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.7931992411613464,
      "learning_rate": 6.219184307215843e-05,
      "loss": 0.8165,
      "step": 7380
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.8295156955718994,
      "learning_rate": 6.206903472091139e-05,
      "loss": 0.9236,
      "step": 7385
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6679932475090027,
      "learning_rate": 6.194629315435426e-05,
      "loss": 0.9222,
      "step": 7390
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.6346510052680969,
      "learning_rate": 6.182361858859699e-05,
      "loss": 0.9591,
      "step": 7395
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7934569716453552,
      "learning_rate": 6.170101123963152e-05,
      "loss": 0.8165,
      "step": 7400
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.8610015511512756,
      "learning_rate": 6.157847132333138e-05,
      "loss": 0.934,
      "step": 7405
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6814960837364197,
      "learning_rate": 6.145599905545151e-05,
      "loss": 0.9619,
      "step": 7410
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7562384009361267,
      "learning_rate": 6.133359465162767e-05,
      "loss": 0.8535,
      "step": 7415
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9794740080833435,
      "learning_rate": 6.121125832737605e-05,
      "loss": 0.8925,
      "step": 7420
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6807798147201538,
      "learning_rate": 6.108899029809313e-05,
      "loss": 0.8478,
      "step": 7425
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6749770641326904,
      "learning_rate": 6.0966790779055036e-05,
      "loss": 0.7792,
      "step": 7430
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7428116202354431,
      "learning_rate": 6.0844659985417285e-05,
      "loss": 0.8334,
      "step": 7435
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7202659845352173,
      "learning_rate": 6.0722598132214445e-05,
      "loss": 0.8558,
      "step": 7440
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7060097455978394,
      "learning_rate": 6.060060543435961e-05,
      "loss": 0.9009,
      "step": 7445
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.658271849155426,
      "learning_rate": 6.0478682106644225e-05,
      "loss": 0.8167,
      "step": 7450
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7464290857315063,
      "learning_rate": 6.0356828363737484e-05,
      "loss": 0.7015,
      "step": 7455
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7930641174316406,
      "learning_rate": 6.0235044420186125e-05,
      "loss": 0.8402,
      "step": 7460
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7233032584190369,
      "learning_rate": 6.0113330490413985e-05,
      "loss": 0.8989,
      "step": 7465
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6508310437202454,
      "learning_rate": 5.9991686788721646e-05,
      "loss": 0.7937,
      "step": 7470
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.5942479372024536,
      "learning_rate": 5.9870113529285956e-05,
      "loss": 0.9154,
      "step": 7475
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7045297622680664,
      "learning_rate": 5.974861092615985e-05,
      "loss": 0.8383,
      "step": 7480
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7324946522712708,
      "learning_rate": 5.96271791932718e-05,
      "loss": 0.9954,
      "step": 7485
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7979293465614319,
      "learning_rate": 5.950581854442547e-05,
      "loss": 0.8141,
      "step": 7490
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7801807522773743,
      "learning_rate": 5.9384529193299444e-05,
      "loss": 0.9454,
      "step": 7495
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.7280653119087219,
      "learning_rate": 5.926331135344671e-05,
      "loss": 0.8436,
      "step": 7500
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6937660574913025,
      "learning_rate": 5.9142165238294344e-05,
      "loss": 0.8859,
      "step": 7505
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.8108348846435547,
      "learning_rate": 5.9021091061143194e-05,
      "loss": 0.9237,
      "step": 7510
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6278057098388672,
      "learning_rate": 5.89000890351674e-05,
      "loss": 0.9174,
      "step": 7515
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8261899352073669,
      "learning_rate": 5.877915937341407e-05,
      "loss": 0.9808,
      "step": 7520
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6049155592918396,
      "learning_rate": 5.865830228880294e-05,
      "loss": 0.859,
      "step": 7525
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9032023549079895,
      "learning_rate": 5.8537517994125876e-05,
      "loss": 0.786,
      "step": 7530
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7123483419418335,
      "learning_rate": 5.84168067020467e-05,
      "loss": 0.9726,
      "step": 7535
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8480584621429443,
      "learning_rate": 5.829616862510059e-05,
      "loss": 0.8336,
      "step": 7540
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.730718731880188,
      "learning_rate": 5.817560397569385e-05,
      "loss": 0.8618,
      "step": 7545
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6453015208244324,
      "learning_rate": 5.805511296610362e-05,
      "loss": 0.8167,
      "step": 7550
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.5834646821022034,
      "learning_rate": 5.793469580847714e-05,
      "loss": 0.8459,
      "step": 7555
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7204859256744385,
      "learning_rate": 5.7814352714831774e-05,
      "loss": 0.9851,
      "step": 7560
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7948461771011353,
      "learning_rate": 5.769408389705453e-05,
      "loss": 0.8953,
      "step": 7565
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.742241382598877,
      "learning_rate": 5.757388956690155e-05,
      "loss": 0.9261,
      "step": 7570
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6826033592224121,
      "learning_rate": 5.7453769935997825e-05,
      "loss": 0.93,
      "step": 7575
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7635707259178162,
      "learning_rate": 5.733372521583686e-05,
      "loss": 1.0025,
      "step": 7580
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7816930413246155,
      "learning_rate": 5.721375561778026e-05,
      "loss": 0.8832,
      "step": 7585
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6665587425231934,
      "learning_rate": 5.70938613530573e-05,
      "loss": 0.8808,
      "step": 7590
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8372485041618347,
      "learning_rate": 5.697404263276476e-05,
      "loss": 0.8198,
      "step": 7595
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7937222123146057,
      "learning_rate": 5.685429966786628e-05,
      "loss": 0.8799,
      "step": 7600
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8681680560112,
      "learning_rate": 5.673463266919216e-05,
      "loss": 0.7796,
      "step": 7605
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8229097723960876,
      "learning_rate": 5.661504184743895e-05,
      "loss": 0.9202,
      "step": 7610
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.710150957107544,
      "learning_rate": 5.6495527413169026e-05,
      "loss": 0.9788,
      "step": 7615
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8372032046318054,
      "learning_rate": 5.6376089576810396e-05,
      "loss": 0.8809,
      "step": 7620
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.6738128066062927,
      "learning_rate": 5.625672854865609e-05,
      "loss": 0.8339,
      "step": 7625
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8087076544761658,
      "learning_rate": 5.613744453886394e-05,
      "loss": 0.9022,
      "step": 7630
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.7368395328521729,
      "learning_rate": 5.6018237757456163e-05,
      "loss": 0.8785,
      "step": 7635
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.8094408512115479,
      "learning_rate": 5.5899108414318994e-05,
      "loss": 0.8625,
      "step": 7640
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7902326583862305,
      "learning_rate": 5.5780056719202304e-05,
      "loss": 0.8728,
      "step": 7645
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.8153887987136841,
      "learning_rate": 5.566108288171936e-05,
      "loss": 0.8901,
      "step": 7650
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7384223937988281,
      "learning_rate": 5.5542187111346224e-05,
      "loss": 0.844,
      "step": 7655
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6871650218963623,
      "learning_rate": 5.5423369617421564e-05,
      "loss": 0.7374,
      "step": 7660
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.714224100112915,
      "learning_rate": 5.530463060914619e-05,
      "loss": 0.8958,
      "step": 7665
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6976300477981567,
      "learning_rate": 5.5185970295582726e-05,
      "loss": 0.7616,
      "step": 7670
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7711216807365417,
      "learning_rate": 5.50673888856553e-05,
      "loss": 0.8756,
      "step": 7675
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6878015398979187,
      "learning_rate": 5.494888658814907e-05,
      "loss": 0.8061,
      "step": 7680
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.8739657998085022,
      "learning_rate": 5.483046361170992e-05,
      "loss": 0.9096,
      "step": 7685
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7206617593765259,
      "learning_rate": 5.471212016484399e-05,
      "loss": 0.9847,
      "step": 7690
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.8055949211120605,
      "learning_rate": 5.4593856455917536e-05,
      "loss": 0.879,
      "step": 7695
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.816685676574707,
      "learning_rate": 5.447567269315627e-05,
      "loss": 0.9028,
      "step": 7700
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7042024731636047,
      "learning_rate": 5.435756908464529e-05,
      "loss": 0.8437,
      "step": 7705
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9031636118888855,
      "learning_rate": 5.4239545838328475e-05,
      "loss": 0.8619,
      "step": 7710
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.8082189559936523,
      "learning_rate": 5.4121603162008226e-05,
      "loss": 0.9048,
      "step": 7715
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7807760238647461,
      "learning_rate": 5.400374126334511e-05,
      "loss": 0.8588,
      "step": 7720
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6958518028259277,
      "learning_rate": 5.388596034985742e-05,
      "loss": 0.9614,
      "step": 7725
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7661911845207214,
      "learning_rate": 5.376826062892086e-05,
      "loss": 0.8568,
      "step": 7730
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.773129403591156,
      "learning_rate": 5.365064230776831e-05,
      "loss": 0.8852,
      "step": 7735
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.7705147862434387,
      "learning_rate": 5.3533105593489163e-05,
      "loss": 0.9321,
      "step": 7740
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.6738101840019226,
      "learning_rate": 5.3415650693029205e-05,
      "loss": 0.7993,
      "step": 7745
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9420228600502014,
      "learning_rate": 5.329827781319018e-05,
      "loss": 0.952,
      "step": 7750
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7898505330085754,
      "learning_rate": 5.318098716062934e-05,
      "loss": 0.9361,
      "step": 7755
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7509401440620422,
      "learning_rate": 5.30637789418593e-05,
      "loss": 0.908,
      "step": 7760
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7303262948989868,
      "learning_rate": 5.294665336324742e-05,
      "loss": 0.9535,
      "step": 7765
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.755186140537262,
      "learning_rate": 5.2829610631015606e-05,
      "loss": 0.8986,
      "step": 7770
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.8424975872039795,
      "learning_rate": 5.271265095123987e-05,
      "loss": 0.9281,
      "step": 7775
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7621006965637207,
      "learning_rate": 5.2595774529850006e-05,
      "loss": 0.8352,
      "step": 7780
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7491013407707214,
      "learning_rate": 5.24789815726292e-05,
      "loss": 0.8101,
      "step": 7785
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.8187841773033142,
      "learning_rate": 5.2362272285213756e-05,
      "loss": 0.8814,
      "step": 7790
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7539845705032349,
      "learning_rate": 5.224564687309261e-05,
      "loss": 1.0235,
      "step": 7795
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7542448043823242,
      "learning_rate": 5.2129105541606916e-05,
      "loss": 0.9453,
      "step": 7800
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7108261585235596,
      "learning_rate": 5.2012648495949976e-05,
      "loss": 0.8537,
      "step": 7805
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.8113353848457336,
      "learning_rate": 5.189627594116657e-05,
      "loss": 0.8769,
      "step": 7810
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.8466603755950928,
      "learning_rate": 5.1779988082152786e-05,
      "loss": 0.899,
      "step": 7815
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7004279494285583,
      "learning_rate": 5.166378512365552e-05,
      "loss": 0.8884,
      "step": 7820
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7048774361610413,
      "learning_rate": 5.1547667270272226e-05,
      "loss": 0.9008,
      "step": 7825
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7400707602500916,
      "learning_rate": 5.143163472645049e-05,
      "loss": 0.8443,
      "step": 7830
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.8226672410964966,
      "learning_rate": 5.131568769648775e-05,
      "loss": 0.7586,
      "step": 7835
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7571192979812622,
      "learning_rate": 5.119982638453075e-05,
      "loss": 0.8673,
      "step": 7840
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.58736652135849,
      "learning_rate": 5.108405099457549e-05,
      "loss": 0.7977,
      "step": 7845
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7114312052726746,
      "learning_rate": 5.096836173046663e-05,
      "loss": 0.9531,
      "step": 7850
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9230597615242004,
      "learning_rate": 5.0852758795897006e-05,
      "loss": 0.8671,
      "step": 7855
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.6817287802696228,
      "learning_rate": 5.073724239440773e-05,
      "loss": 0.8198,
      "step": 7860
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7963387370109558,
      "learning_rate": 5.06218127293874e-05,
      "loss": 0.9242,
      "step": 7865
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7177277207374573,
      "learning_rate": 5.050647000407189e-05,
      "loss": 0.8446,
      "step": 7870
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7984820008277893,
      "learning_rate": 5.039121442154415e-05,
      "loss": 0.9517,
      "step": 7875
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.8220101594924927,
      "learning_rate": 5.027604618473347e-05,
      "loss": 0.8199,
      "step": 7880
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6571751236915588,
      "learning_rate": 5.016096549641549e-05,
      "loss": 0.7767,
      "step": 7885
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7824209332466125,
      "learning_rate": 5.004597255921174e-05,
      "loss": 0.962,
      "step": 7890
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.723919689655304,
      "learning_rate": 4.993106757558912e-05,
      "loss": 0.931,
      "step": 7895
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6097091436386108,
      "learning_rate": 4.981625074785986e-05,
      "loss": 0.8089,
      "step": 7900
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.8885555863380432,
      "learning_rate": 4.9701522278180736e-05,
      "loss": 0.941,
      "step": 7905
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.211531162261963,
      "learning_rate": 4.958688236855308e-05,
      "loss": 0.8999,
      "step": 7910
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9506832957267761,
      "learning_rate": 4.9472331220822366e-05,
      "loss": 1.0478,
      "step": 7915
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6763548254966736,
      "learning_rate": 4.935786903667767e-05,
      "loss": 0.8329,
      "step": 7920
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.8154675960540771,
      "learning_rate": 4.9243496017651434e-05,
      "loss": 0.8818,
      "step": 7925
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.8322145938873291,
      "learning_rate": 4.912921236511927e-05,
      "loss": 0.8129,
      "step": 7930
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7868553996086121,
      "learning_rate": 4.901501828029919e-05,
      "loss": 0.9957,
      "step": 7935
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7912847399711609,
      "learning_rate": 4.890091396425163e-05,
      "loss": 0.8805,
      "step": 7940
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7965796589851379,
      "learning_rate": 4.878689961787907e-05,
      "loss": 0.9009,
      "step": 7945
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6940076351165771,
      "learning_rate": 4.8672975441925425e-05,
      "loss": 0.9712,
      "step": 7950
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.8586145043373108,
      "learning_rate": 4.8559141636975925e-05,
      "loss": 0.7787,
      "step": 7955
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.6977418661117554,
      "learning_rate": 4.844539840345666e-05,
      "loss": 0.9263,
      "step": 7960
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7754257321357727,
      "learning_rate": 4.8331745941634235e-05,
      "loss": 0.8932,
      "step": 7965
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7636638283729553,
      "learning_rate": 4.821818445161551e-05,
      "loss": 0.9223,
      "step": 7970
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7074651718139648,
      "learning_rate": 4.810471413334711e-05,
      "loss": 0.9669,
      "step": 7975
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.7296220064163208,
      "learning_rate": 4.7991335186615126e-05,
      "loss": 0.912,
      "step": 7980
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9196190237998962,
      "learning_rate": 4.78780478110448e-05,
      "loss": 0.9151,
      "step": 7985
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.638798713684082,
      "learning_rate": 4.776485220610014e-05,
      "loss": 0.8709,
      "step": 7990
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.8261780142784119,
      "learning_rate": 4.765174857108352e-05,
      "loss": 0.892,
      "step": 7995
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6537724733352661,
      "learning_rate": 4.7538737105135526e-05,
      "loss": 0.8884,
      "step": 8000
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6304785013198853,
      "learning_rate": 4.7425818007234324e-05,
      "loss": 0.8929,
      "step": 8005
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9420642852783203,
      "learning_rate": 4.73129914761955e-05,
      "loss": 0.7801,
      "step": 8010
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6773472428321838,
      "learning_rate": 4.720025771067166e-05,
      "loss": 0.7125,
      "step": 8015
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6679495573043823,
      "learning_rate": 4.708761690915206e-05,
      "loss": 0.9065,
      "step": 8020
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7421685457229614,
      "learning_rate": 4.697506926996226e-05,
      "loss": 0.9077,
      "step": 8025
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7675222158432007,
      "learning_rate": 4.686261499126389e-05,
      "loss": 0.9164,
      "step": 8030
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6961898803710938,
      "learning_rate": 4.6750254271054087e-05,
      "loss": 0.7967,
      "step": 8035
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9274904727935791,
      "learning_rate": 4.663798730716532e-05,
      "loss": 0.9743,
      "step": 8040
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.760764479637146,
      "learning_rate": 4.6525814297264945e-05,
      "loss": 0.9513,
      "step": 8045
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7536008358001709,
      "learning_rate": 4.641373543885489e-05,
      "loss": 0.8118,
      "step": 8050
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.8215941190719604,
      "learning_rate": 4.6301750929271404e-05,
      "loss": 0.8853,
      "step": 8055
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7154656648635864,
      "learning_rate": 4.61898609656845e-05,
      "loss": 0.845,
      "step": 8060
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.8202133774757385,
      "learning_rate": 4.607806574509781e-05,
      "loss": 0.8672,
      "step": 8065
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7498393654823303,
      "learning_rate": 4.596636546434807e-05,
      "loss": 0.9539,
      "step": 8070
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.775325357913971,
      "learning_rate": 4.585476032010494e-05,
      "loss": 0.9314,
      "step": 8075
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9472012519836426,
      "learning_rate": 4.5743250508870475e-05,
      "loss": 0.9524,
      "step": 8080
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.8268347978591919,
      "learning_rate": 4.5631836226979017e-05,
      "loss": 0.9405,
      "step": 8085
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.7666311860084534,
      "learning_rate": 4.5520517670596607e-05,
      "loss": 0.8873,
      "step": 8090
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.783304750919342,
      "learning_rate": 4.540929503572077e-05,
      "loss": 0.8744,
      "step": 8095
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6756754517555237,
      "learning_rate": 4.5298168518180115e-05,
      "loss": 0.8438,
      "step": 8100
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.6557435989379883,
      "learning_rate": 4.518713831363408e-05,
      "loss": 0.8878,
      "step": 8105
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9128133654594421,
      "learning_rate": 4.5076204617572425e-05,
      "loss": 0.911,
      "step": 8110
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7338535189628601,
      "learning_rate": 4.4965367625315146e-05,
      "loss": 0.8907,
      "step": 8115
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.901493489742279,
      "learning_rate": 4.4854627532011836e-05,
      "loss": 0.9086,
      "step": 8120
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.834770679473877,
      "learning_rate": 4.474398453264154e-05,
      "loss": 0.8569,
      "step": 8125
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.773137092590332,
      "learning_rate": 4.463343882201231e-05,
      "loss": 0.9045,
      "step": 8130
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.893915593624115,
      "learning_rate": 4.452299059476091e-05,
      "loss": 0.9175,
      "step": 8135
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7731142044067383,
      "learning_rate": 4.441264004535254e-05,
      "loss": 0.9611,
      "step": 8140
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.8787577748298645,
      "learning_rate": 4.430238736808033e-05,
      "loss": 0.9501,
      "step": 8145
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7855997681617737,
      "learning_rate": 4.419223275706515e-05,
      "loss": 0.8213,
      "step": 8150
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7477709650993347,
      "learning_rate": 4.408217640625514e-05,
      "loss": 0.808,
      "step": 8155
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.884198784828186,
      "learning_rate": 4.397221850942549e-05,
      "loss": 0.8188,
      "step": 8160
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.755005955696106,
      "learning_rate": 4.386235926017798e-05,
      "loss": 0.9438,
      "step": 8165
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7178514003753662,
      "learning_rate": 4.3752598851940805e-05,
      "loss": 1.0384,
      "step": 8170
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6809802651405334,
      "learning_rate": 4.3642937477968105e-05,
      "loss": 0.8595,
      "step": 8175
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6554874777793884,
      "learning_rate": 4.3533375331339486e-05,
      "loss": 0.8091,
      "step": 8180
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6444360017776489,
      "learning_rate": 4.3423912604960095e-05,
      "loss": 0.7865,
      "step": 8185
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7334080934524536,
      "learning_rate": 4.331454949155983e-05,
      "loss": 0.9078,
      "step": 8190
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7476295232772827,
      "learning_rate": 4.320528618369337e-05,
      "loss": 0.8777,
      "step": 8195
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7522503733634949,
      "learning_rate": 4.309612287373957e-05,
      "loss": 0.9077,
      "step": 8200
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7205644845962524,
      "learning_rate": 4.298705975390115e-05,
      "loss": 0.8997,
      "step": 8205
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6863880753517151,
      "learning_rate": 4.287809701620459e-05,
      "loss": 0.8168,
      "step": 8210
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.7311332821846008,
      "learning_rate": 4.2769234852499505e-05,
      "loss": 0.9752,
      "step": 8215
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.8853205442428589,
      "learning_rate": 4.266047345445846e-05,
      "loss": 0.8139,
      "step": 8220
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.6810586452484131,
      "learning_rate": 4.255181301357668e-05,
      "loss": 0.8455,
      "step": 8225
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8352945446968079,
      "learning_rate": 4.244325372117156e-05,
      "loss": 0.8777,
      "step": 8230
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6936649680137634,
      "learning_rate": 4.2334795768382306e-05,
      "loss": 0.7469,
      "step": 8235
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8308777213096619,
      "learning_rate": 4.2226439346169924e-05,
      "loss": 0.8074,
      "step": 8240
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7798436880111694,
      "learning_rate": 4.211818464531649e-05,
      "loss": 0.892,
      "step": 8245
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8142618536949158,
      "learning_rate": 4.2010031856425e-05,
      "loss": 0.8976,
      "step": 8250
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8766167759895325,
      "learning_rate": 4.190198116991915e-05,
      "loss": 0.9761,
      "step": 8255
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7710538506507874,
      "learning_rate": 4.179403277604259e-05,
      "loss": 0.9142,
      "step": 8260
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.800112247467041,
      "learning_rate": 4.168618686485916e-05,
      "loss": 0.8007,
      "step": 8265
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7750797271728516,
      "learning_rate": 4.1578443626252094e-05,
      "loss": 0.7865,
      "step": 8270
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6965195536613464,
      "learning_rate": 4.147080324992384e-05,
      "loss": 0.8434,
      "step": 8275
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8158455491065979,
      "learning_rate": 4.136326592539591e-05,
      "loss": 0.7183,
      "step": 8280
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7494208812713623,
      "learning_rate": 4.125583184200812e-05,
      "loss": 0.8158,
      "step": 8285
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8211252093315125,
      "learning_rate": 4.114850118891866e-05,
      "loss": 0.9652,
      "step": 8290
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7634385824203491,
      "learning_rate": 4.104127415510365e-05,
      "loss": 0.9349,
      "step": 8295
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7225372195243835,
      "learning_rate": 4.093415092935667e-05,
      "loss": 0.8758,
      "step": 8300
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7113853096961975,
      "learning_rate": 4.082713170028858e-05,
      "loss": 0.8378,
      "step": 8305
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7211659550666809,
      "learning_rate": 4.0720216656327105e-05,
      "loss": 0.9371,
      "step": 8310
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.763894259929657,
      "learning_rate": 4.0613405985716554e-05,
      "loss": 0.8955,
      "step": 8315
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7050108909606934,
      "learning_rate": 4.050669987651742e-05,
      "loss": 0.9218,
      "step": 8320
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7349695563316345,
      "learning_rate": 4.04000985166062e-05,
      "loss": 0.8825,
      "step": 8325
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.7132920622825623,
      "learning_rate": 4.029360209367487e-05,
      "loss": 0.8753,
      "step": 8330
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6986986994743347,
      "learning_rate": 4.0187210795230677e-05,
      "loss": 0.8414,
      "step": 8335
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8510329127311707,
      "learning_rate": 4.008092480859574e-05,
      "loss": 0.8665,
      "step": 8340
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8111036419868469,
      "learning_rate": 3.997474432090679e-05,
      "loss": 0.8516,
      "step": 8345
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7777669429779053,
      "learning_rate": 3.986866951911483e-05,
      "loss": 0.8469,
      "step": 8350
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8348636627197266,
      "learning_rate": 3.9762700589984744e-05,
      "loss": 0.9188,
      "step": 8355
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8833033442497253,
      "learning_rate": 3.965683772009502e-05,
      "loss": 0.956,
      "step": 8360
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7732721567153931,
      "learning_rate": 3.95510810958374e-05,
      "loss": 0.9024,
      "step": 8365
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7568506598472595,
      "learning_rate": 3.944543090341656e-05,
      "loss": 0.9141,
      "step": 8370
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7183312773704529,
      "learning_rate": 3.933988732884976e-05,
      "loss": 0.8308,
      "step": 8375
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8897679448127747,
      "learning_rate": 3.923445055796664e-05,
      "loss": 0.9499,
      "step": 8380
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8179068565368652,
      "learning_rate": 3.912912077640869e-05,
      "loss": 1.0211,
      "step": 8385
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9936897158622742,
      "learning_rate": 3.9023898169629046e-05,
      "loss": 0.9061,
      "step": 8390
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6856409907341003,
      "learning_rate": 3.891878292289216e-05,
      "loss": 0.9331,
      "step": 8395
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7015929222106934,
      "learning_rate": 3.881377522127343e-05,
      "loss": 0.7606,
      "step": 8400
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8607639074325562,
      "learning_rate": 3.8708875249658905e-05,
      "loss": 0.918,
      "step": 8405
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0351639986038208,
      "learning_rate": 3.8604083192745036e-05,
      "loss": 0.8172,
      "step": 8410
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8130463361740112,
      "learning_rate": 3.849939923503815e-05,
      "loss": 0.9112,
      "step": 8415
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7744996547698975,
      "learning_rate": 3.83948235608543e-05,
      "loss": 0.9202,
      "step": 8420
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8210654258728027,
      "learning_rate": 3.829035635431889e-05,
      "loss": 0.8436,
      "step": 8425
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7811559438705444,
      "learning_rate": 3.818599779936629e-05,
      "loss": 0.9175,
      "step": 8430
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7337599992752075,
      "learning_rate": 3.80817480797397e-05,
      "loss": 0.8021,
      "step": 8435
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7291502356529236,
      "learning_rate": 3.7977607378990574e-05,
      "loss": 0.8785,
      "step": 8440
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.6997067928314209,
      "learning_rate": 3.787357588047844e-05,
      "loss": 0.7704,
      "step": 8445
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8448328375816345,
      "learning_rate": 3.7769653767370586e-05,
      "loss": 0.9154,
      "step": 8450
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.8886889815330505,
      "learning_rate": 3.766584122264166e-05,
      "loss": 0.9185,
      "step": 8455
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7532009482383728,
      "learning_rate": 3.7562138429073424e-05,
      "loss": 0.795,
      "step": 8460
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7099334001541138,
      "learning_rate": 3.7458545569254445e-05,
      "loss": 0.84,
      "step": 8465
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6124157905578613,
      "learning_rate": 3.735506282557967e-05,
      "loss": 0.7536,
      "step": 8470
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7885011434555054,
      "learning_rate": 3.725169038025016e-05,
      "loss": 0.8933,
      "step": 8475
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7972614765167236,
      "learning_rate": 3.714842841527282e-05,
      "loss": 0.8322,
      "step": 8480
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7810702323913574,
      "learning_rate": 3.7045277112459954e-05,
      "loss": 0.8997,
      "step": 8485
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7650167942047119,
      "learning_rate": 3.694223665342915e-05,
      "loss": 0.8807,
      "step": 8490
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.8026538491249084,
      "learning_rate": 3.683930721960276e-05,
      "loss": 0.8664,
      "step": 8495
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9043586254119873,
      "learning_rate": 3.6736488992207615e-05,
      "loss": 0.8305,
      "step": 8500
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6823323965072632,
      "learning_rate": 3.663378215227483e-05,
      "loss": 0.7567,
      "step": 8505
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2210302352905273,
      "learning_rate": 3.653118688063935e-05,
      "loss": 0.9452,
      "step": 8510
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7662010192871094,
      "learning_rate": 3.6428703357939644e-05,
      "loss": 0.9422,
      "step": 8515
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7560374736785889,
      "learning_rate": 3.632633176461755e-05,
      "loss": 0.8106,
      "step": 8520
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0334359407424927,
      "learning_rate": 3.622407228091774e-05,
      "loss": 0.93,
      "step": 8525
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7211924195289612,
      "learning_rate": 3.612192508688751e-05,
      "loss": 0.8359,
      "step": 8530
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7545579075813293,
      "learning_rate": 3.601989036237644e-05,
      "loss": 0.9147,
      "step": 8535
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.8421271443367004,
      "learning_rate": 3.5917968287036104e-05,
      "loss": 0.7863,
      "step": 8540
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.8393056392669678,
      "learning_rate": 3.5816159040319716e-05,
      "loss": 0.9036,
      "step": 8545
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9179325699806213,
      "learning_rate": 3.5714462801481895e-05,
      "loss": 0.8731,
      "step": 8550
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6770288348197937,
      "learning_rate": 3.5612879749578244e-05,
      "loss": 0.7885,
      "step": 8555
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7247023582458496,
      "learning_rate": 3.551141006346499e-05,
      "loss": 0.9884,
      "step": 8560
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.8267160058021545,
      "learning_rate": 3.5410053921798926e-05,
      "loss": 0.7846,
      "step": 8565
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.8529921770095825,
      "learning_rate": 3.530881150303679e-05,
      "loss": 0.844,
      "step": 8570
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.7341428995132446,
      "learning_rate": 3.5207682985435206e-05,
      "loss": 0.912,
      "step": 8575
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.6659154891967773,
      "learning_rate": 3.510666854705021e-05,
      "loss": 0.9415,
      "step": 8580
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7431322932243347,
      "learning_rate": 3.5005768365736855e-05,
      "loss": 0.8272,
      "step": 8585
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9032961130142212,
      "learning_rate": 3.490498261914923e-05,
      "loss": 0.8756,
      "step": 8590
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8175019025802612,
      "learning_rate": 3.48043114847398e-05,
      "loss": 0.9391,
      "step": 8595
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.667407751083374,
      "learning_rate": 3.470375513975925e-05,
      "loss": 0.8181,
      "step": 8600
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7949620485305786,
      "learning_rate": 3.460331376125624e-05,
      "loss": 0.8459,
      "step": 8605
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8054453134536743,
      "learning_rate": 3.450298752607696e-05,
      "loss": 0.9848,
      "step": 8610
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7551806569099426,
      "learning_rate": 3.440277661086475e-05,
      "loss": 0.8592,
      "step": 8615
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8027676343917847,
      "learning_rate": 3.4302681192060114e-05,
      "loss": 0.9916,
      "step": 8620
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0340180397033691,
      "learning_rate": 3.4202701445900085e-05,
      "loss": 0.8464,
      "step": 8625
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6809491515159607,
      "learning_rate": 3.410283754841801e-05,
      "loss": 0.8435,
      "step": 8630
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7135958075523376,
      "learning_rate": 3.40030896754434e-05,
      "loss": 0.8993,
      "step": 8635
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7542569041252136,
      "learning_rate": 3.390345800260125e-05,
      "loss": 0.8111,
      "step": 8640
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9424663186073303,
      "learning_rate": 3.380394270531221e-05,
      "loss": 0.8995,
      "step": 8645
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8005475401878357,
      "learning_rate": 3.370454395879188e-05,
      "loss": 0.8029,
      "step": 8650
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6959512233734131,
      "learning_rate": 3.360526193805065e-05,
      "loss": 0.9035,
      "step": 8655
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.7621459364891052,
      "learning_rate": 3.3506096817893526e-05,
      "loss": 0.9833,
      "step": 8660
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8192619681358337,
      "learning_rate": 3.3407048772919514e-05,
      "loss": 0.9324,
      "step": 8665
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8644863963127136,
      "learning_rate": 3.3308117977521544e-05,
      "loss": 0.8136,
      "step": 8670
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6707440614700317,
      "learning_rate": 3.32093046058862e-05,
      "loss": 0.845,
      "step": 8675
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.6646841764450073,
      "learning_rate": 3.311060883199323e-05,
      "loss": 0.907,
      "step": 8680
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8453482985496521,
      "learning_rate": 3.301203082961532e-05,
      "loss": 0.8255,
      "step": 8685
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9667664766311646,
      "learning_rate": 3.291357077231781e-05,
      "loss": 0.8206,
      "step": 8690
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.8043253421783447,
      "learning_rate": 3.281522883345843e-05,
      "loss": 0.7825,
      "step": 8695
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9271131157875061,
      "learning_rate": 3.271700518618683e-05,
      "loss": 0.8616,
      "step": 8700
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8352097272872925,
      "learning_rate": 3.261890000344453e-05,
      "loss": 0.9039,
      "step": 8705
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8335424065589905,
      "learning_rate": 3.252091345796432e-05,
      "loss": 0.8746,
      "step": 8710
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.693943440914154,
      "learning_rate": 3.2423045722270294e-05,
      "loss": 0.8702,
      "step": 8715
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8505630493164062,
      "learning_rate": 3.232529696867712e-05,
      "loss": 0.8461,
      "step": 8720
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.6869897842407227,
      "learning_rate": 3.222766736929013e-05,
      "loss": 0.7622,
      "step": 8725
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.82327800989151,
      "learning_rate": 3.2130157096004864e-05,
      "loss": 0.845,
      "step": 8730
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.6616914868354797,
      "learning_rate": 3.203276632050671e-05,
      "loss": 0.7665,
      "step": 8735
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8684021830558777,
      "learning_rate": 3.1935495214270705e-05,
      "loss": 0.9294,
      "step": 8740
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9236558675765991,
      "learning_rate": 3.1838343948561136e-05,
      "loss": 0.8502,
      "step": 8745
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7163612246513367,
      "learning_rate": 3.1741312694431315e-05,
      "loss": 0.8604,
      "step": 8750
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.6481351256370544,
      "learning_rate": 3.164440162272322e-05,
      "loss": 0.9637,
      "step": 8755
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8455091118812561,
      "learning_rate": 3.1547610904067325e-05,
      "loss": 0.7494,
      "step": 8760
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9111529588699341,
      "learning_rate": 3.145094070888208e-05,
      "loss": 0.9955,
      "step": 8765
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9329970479011536,
      "learning_rate": 3.13543912073738e-05,
      "loss": 0.9213,
      "step": 8770
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8602619767189026,
      "learning_rate": 3.125796256953625e-05,
      "loss": 1.0095,
      "step": 8775
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.703950047492981,
      "learning_rate": 3.1161654965150436e-05,
      "loss": 0.7685,
      "step": 8780
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7916018962860107,
      "learning_rate": 3.1065468563784196e-05,
      "loss": 0.9332,
      "step": 8785
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8622072339057922,
      "learning_rate": 3.096940353479208e-05,
      "loss": 0.8634,
      "step": 8790
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8001255989074707,
      "learning_rate": 3.087346004731485e-05,
      "loss": 0.9707,
      "step": 8795
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8370776176452637,
      "learning_rate": 3.077763827027929e-05,
      "loss": 0.8443,
      "step": 8800
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.5743516683578491,
      "learning_rate": 3.0681938372397865e-05,
      "loss": 0.9002,
      "step": 8805
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.7769607901573181,
      "learning_rate": 3.0586360522168476e-05,
      "loss": 1.0137,
      "step": 8810
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.825899600982666,
      "learning_rate": 3.0490904887874183e-05,
      "loss": 0.9284,
      "step": 8815
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7428541779518127,
      "learning_rate": 3.039557163758279e-05,
      "loss": 0.9896,
      "step": 8820
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.94400954246521,
      "learning_rate": 3.030036093914663e-05,
      "loss": 0.9276,
      "step": 8825
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7103738188743591,
      "learning_rate": 3.0205272960202292e-05,
      "loss": 0.8383,
      "step": 8830
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8435823321342468,
      "learning_rate": 3.0110307868170263e-05,
      "loss": 1.0178,
      "step": 8835
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.891716718673706,
      "learning_rate": 3.0015465830254663e-05,
      "loss": 0.9146,
      "step": 8840
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7224240303039551,
      "learning_rate": 2.9920747013443007e-05,
      "loss": 0.9252,
      "step": 8845
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8182008862495422,
      "learning_rate": 2.98261515845058e-05,
      "loss": 0.8569,
      "step": 8850
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8286107182502747,
      "learning_rate": 2.9731679709996306e-05,
      "loss": 0.8813,
      "step": 8855
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7946951985359192,
      "learning_rate": 2.963733155625026e-05,
      "loss": 0.7828,
      "step": 8860
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7622092366218567,
      "learning_rate": 2.954310728938553e-05,
      "loss": 0.7407,
      "step": 8865
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.790444016456604,
      "learning_rate": 2.944900707530195e-05,
      "loss": 0.7441,
      "step": 8870
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7887465357780457,
      "learning_rate": 2.9355031079680827e-05,
      "loss": 0.9499,
      "step": 8875
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8154371976852417,
      "learning_rate": 2.9261179467984822e-05,
      "loss": 0.948,
      "step": 8880
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8430317640304565,
      "learning_rate": 2.9167452405457562e-05,
      "loss": 0.8144,
      "step": 8885
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.72621750831604,
      "learning_rate": 2.907385005712341e-05,
      "loss": 0.8755,
      "step": 8890
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8740549683570862,
      "learning_rate": 2.8980372587787087e-05,
      "loss": 0.8555,
      "step": 8895
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.6749491095542908,
      "learning_rate": 2.888702016203354e-05,
      "loss": 0.9325,
      "step": 8900
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7589474320411682,
      "learning_rate": 2.879379294422748e-05,
      "loss": 0.7657,
      "step": 8905
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8777940273284912,
      "learning_rate": 2.8700691098513188e-05,
      "loss": 0.855,
      "step": 8910
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7081353664398193,
      "learning_rate": 2.8607714788814176e-05,
      "loss": 0.8862,
      "step": 8915
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7634384036064148,
      "learning_rate": 2.8514864178832967e-05,
      "loss": 0.8565,
      "step": 8920
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7505052089691162,
      "learning_rate": 2.842213943205072e-05,
      "loss": 0.8969,
      "step": 8925
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8570383787155151,
      "learning_rate": 2.8329540711727054e-05,
      "loss": 0.8267,
      "step": 8930
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9586119055747986,
      "learning_rate": 2.823706818089965e-05,
      "loss": 0.8691,
      "step": 8935
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7504484057426453,
      "learning_rate": 2.8144722002383993e-05,
      "loss": 0.8925,
      "step": 8940
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7530816197395325,
      "learning_rate": 2.8052502338773146e-05,
      "loss": 0.8073,
      "step": 8945
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9067265391349792,
      "learning_rate": 2.7960409352437333e-05,
      "loss": 0.9125,
      "step": 8950
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7983117699623108,
      "learning_rate": 2.7868443205523888e-05,
      "loss": 0.8934,
      "step": 8955
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8042848706245422,
      "learning_rate": 2.777660405995671e-05,
      "loss": 0.7423,
      "step": 8960
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6886625289916992,
      "learning_rate": 2.768489207743603e-05,
      "loss": 0.8258,
      "step": 8965
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7667304873466492,
      "learning_rate": 2.7593307419438354e-05,
      "loss": 0.842,
      "step": 8970
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7944421172142029,
      "learning_rate": 2.7501850247215878e-05,
      "loss": 0.8514,
      "step": 8975
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8095190525054932,
      "learning_rate": 2.741052072179636e-05,
      "loss": 0.8196,
      "step": 8980
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.6305115818977356,
      "learning_rate": 2.7319319003982925e-05,
      "loss": 0.8992,
      "step": 8985
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8835801482200623,
      "learning_rate": 2.7228245254353444e-05,
      "loss": 0.8358,
      "step": 8990
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7960600256919861,
      "learning_rate": 2.7137299633260638e-05,
      "loss": 0.8111,
      "step": 8995
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8967324495315552,
      "learning_rate": 2.7046482300831642e-05,
      "loss": 1.0048,
      "step": 9000
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7498697638511658,
      "learning_rate": 2.6955793416967646e-05,
      "loss": 0.735,
      "step": 9005
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8737277388572693,
      "learning_rate": 2.686523314134367e-05,
      "loss": 0.85,
      "step": 9010
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8953958749771118,
      "learning_rate": 2.6774801633408418e-05,
      "loss": 0.886,
      "step": 9015
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.5604196190834045,
      "learning_rate": 2.668449905238367e-05,
      "loss": 0.8173,
      "step": 9020
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7240638136863708,
      "learning_rate": 2.659432555726441e-05,
      "loss": 0.8973,
      "step": 9025
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7774072885513306,
      "learning_rate": 2.6504281306818225e-05,
      "loss": 0.8,
      "step": 9030
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9128009676933289,
      "learning_rate": 2.641436645958515e-05,
      "loss": 0.8173,
      "step": 9035
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7718796730041504,
      "learning_rate": 2.6324581173877473e-05,
      "loss": 0.9214,
      "step": 9040
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7290648818016052,
      "learning_rate": 2.6234925607779215e-05,
      "loss": 0.8464,
      "step": 9045
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.7356486320495605,
      "learning_rate": 2.6145399919146086e-05,
      "loss": 0.8361,
      "step": 9050
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8046448826789856,
      "learning_rate": 2.6056004265605148e-05,
      "loss": 0.9615,
      "step": 9055
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.667546272277832,
      "learning_rate": 2.596673880455448e-05,
      "loss": 0.8548,
      "step": 9060
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.8502492904663086,
      "learning_rate": 2.587760369316291e-05,
      "loss": 0.8098,
      "step": 9065
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.8480196595191956,
      "learning_rate": 2.578859908836979e-05,
      "loss": 0.8643,
      "step": 9070
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7749778628349304,
      "learning_rate": 2.569972514688468e-05,
      "loss": 0.8322,
      "step": 9075
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7703043222427368,
      "learning_rate": 2.5610982025187046e-05,
      "loss": 0.8639,
      "step": 9080
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9232176542282104,
      "learning_rate": 2.552236987952612e-05,
      "loss": 0.8019,
      "step": 9085
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7729461193084717,
      "learning_rate": 2.543388886592045e-05,
      "loss": 1.0313,
      "step": 9090
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7758802771568298,
      "learning_rate": 2.5345539140157705e-05,
      "loss": 0.812,
      "step": 9095
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7040194272994995,
      "learning_rate": 2.5257320857794397e-05,
      "loss": 0.7539,
      "step": 9100
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7888084650039673,
      "learning_rate": 2.5169234174155608e-05,
      "loss": 0.7946,
      "step": 9105
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9183518886566162,
      "learning_rate": 2.5081279244334764e-05,
      "loss": 0.865,
      "step": 9110
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7420482039451599,
      "learning_rate": 2.4993456223193266e-05,
      "loss": 0.9083,
      "step": 9115
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9450098872184753,
      "learning_rate": 2.490576526536025e-05,
      "loss": 0.9594,
      "step": 9120
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.798826277256012,
      "learning_rate": 2.4818206525232356e-05,
      "loss": 0.8737,
      "step": 9125
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.8134889602661133,
      "learning_rate": 2.4730780156973442e-05,
      "loss": 0.9898,
      "step": 9130
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.8726826310157776,
      "learning_rate": 2.464348631451424e-05,
      "loss": 0.8145,
      "step": 9135
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1030023097991943,
      "learning_rate": 2.455632515155224e-05,
      "loss": 0.8348,
      "step": 9140
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6278375387191772,
      "learning_rate": 2.4469296821551257e-05,
      "loss": 0.7682,
      "step": 9145
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7514384388923645,
      "learning_rate": 2.4382401477741244e-05,
      "loss": 0.9087,
      "step": 9150
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.000822901725769,
      "learning_rate": 2.429563927311801e-05,
      "loss": 1.0172,
      "step": 9155
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7753348350524902,
      "learning_rate": 2.4209010360442896e-05,
      "loss": 0.9404,
      "step": 9160
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7746703028678894,
      "learning_rate": 2.4122514892242677e-05,
      "loss": 0.818,
      "step": 9165
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.6258317232131958,
      "learning_rate": 2.4036153020809072e-05,
      "loss": 0.8354,
      "step": 9170
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9577354192733765,
      "learning_rate": 2.3949924898198604e-05,
      "loss": 0.8962,
      "step": 9175
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7608239650726318,
      "learning_rate": 2.3863830676232313e-05,
      "loss": 0.8366,
      "step": 9180
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7837699055671692,
      "learning_rate": 2.377787050649547e-05,
      "loss": 0.9145,
      "step": 9185
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7647613286972046,
      "learning_rate": 2.36920445403373e-05,
      "loss": 0.9256,
      "step": 9190
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7220770716667175,
      "learning_rate": 2.3606352928870835e-05,
      "loss": 0.8174,
      "step": 9195
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8374908566474915,
      "learning_rate": 2.352079582297244e-05,
      "loss": 0.939,
      "step": 9200
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8317983150482178,
      "learning_rate": 2.34353733732817e-05,
      "loss": 0.8696,
      "step": 9205
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8166177272796631,
      "learning_rate": 2.335008573020111e-05,
      "loss": 0.9865,
      "step": 9210
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7623334527015686,
      "learning_rate": 2.326493304389582e-05,
      "loss": 0.9127,
      "step": 9215
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8136252164840698,
      "learning_rate": 2.3179915464293323e-05,
      "loss": 0.8295,
      "step": 9220
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9206348061561584,
      "learning_rate": 2.309503314108331e-05,
      "loss": 0.8645,
      "step": 9225
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8517584800720215,
      "learning_rate": 2.301028622371726e-05,
      "loss": 0.8817,
      "step": 9230
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7885266542434692,
      "learning_rate": 2.2925674861408264e-05,
      "loss": 0.8897,
      "step": 9235
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8661707639694214,
      "learning_rate": 2.2841199203130747e-05,
      "loss": 0.9675,
      "step": 9240
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8700675964355469,
      "learning_rate": 2.2756859397620156e-05,
      "loss": 0.9892,
      "step": 9245
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7101337909698486,
      "learning_rate": 2.267265559337286e-05,
      "loss": 0.8835,
      "step": 9250
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.6243454217910767,
      "learning_rate": 2.2588587938645656e-05,
      "loss": 0.8324,
      "step": 9255
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7496815323829651,
      "learning_rate": 2.2504656581455665e-05,
      "loss": 0.9837,
      "step": 9260
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8095391392707825,
      "learning_rate": 2.242086166958004e-05,
      "loss": 0.8328,
      "step": 9265
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9355162978172302,
      "learning_rate": 2.233720335055567e-05,
      "loss": 0.8582,
      "step": 9270
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.7414233684539795,
      "learning_rate": 2.2253681771678946e-05,
      "loss": 0.9332,
      "step": 9275
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.6823673844337463,
      "learning_rate": 2.2170297080005564e-05,
      "loss": 0.8942,
      "step": 9280
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8288149833679199,
      "learning_rate": 2.208704942235017e-05,
      "loss": 0.768,
      "step": 9285
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.8060517311096191,
      "learning_rate": 2.200393894528603e-05,
      "loss": 0.79,
      "step": 9290
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7582494020462036,
      "learning_rate": 2.1920965795145054e-05,
      "loss": 0.9492,
      "step": 9295
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6934027075767517,
      "learning_rate": 2.1838130118017252e-05,
      "loss": 0.8737,
      "step": 9300
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7684122323989868,
      "learning_rate": 2.175543205975059e-05,
      "loss": 0.8799,
      "step": 9305
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6786194443702698,
      "learning_rate": 2.1672871765950808e-05,
      "loss": 0.8591,
      "step": 9310
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9380123615264893,
      "learning_rate": 2.1590449381980993e-05,
      "loss": 0.9141,
      "step": 9315
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7513065338134766,
      "learning_rate": 2.150816505296147e-05,
      "loss": 0.8632,
      "step": 9320
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7867124676704407,
      "learning_rate": 2.1426018923769464e-05,
      "loss": 0.8689,
      "step": 9325
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7759522199630737,
      "learning_rate": 2.1344011139038843e-05,
      "loss": 0.9019,
      "step": 9330
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9177059531211853,
      "learning_rate": 2.126214184316002e-05,
      "loss": 0.8543,
      "step": 9335
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7387694716453552,
      "learning_rate": 2.1180411180279458e-05,
      "loss": 0.9069,
      "step": 9340
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7109723091125488,
      "learning_rate": 2.1098819294299498e-05,
      "loss": 0.9439,
      "step": 9345
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6802003979682922,
      "learning_rate": 2.101736632887825e-05,
      "loss": 0.772,
      "step": 9350
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7871384024620056,
      "learning_rate": 2.0936052427429186e-05,
      "loss": 0.8521,
      "step": 9355
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7216570377349854,
      "learning_rate": 2.085487773312086e-05,
      "loss": 0.9107,
      "step": 9360
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7913159728050232,
      "learning_rate": 2.0773842388876884e-05,
      "loss": 0.8631,
      "step": 9365
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7265539765357971,
      "learning_rate": 2.0692946537375336e-05,
      "loss": 0.8232,
      "step": 9370
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.7547101378440857,
      "learning_rate": 2.0612190321048762e-05,
      "loss": 0.8404,
      "step": 9375
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9315720796585083,
      "learning_rate": 2.053157388208393e-05,
      "loss": 0.8242,
      "step": 9380
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.8665992021560669,
      "learning_rate": 2.0451097362421366e-05,
      "loss": 0.9618,
      "step": 9385
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.8100541830062866,
      "learning_rate": 2.037076090375539e-05,
      "loss": 0.8903,
      "step": 9390
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.8045421838760376,
      "learning_rate": 2.029056464753363e-05,
      "loss": 0.9265,
      "step": 9395
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.6738159656524658,
      "learning_rate": 2.021050873495679e-05,
      "loss": 0.7856,
      "step": 9400
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.8198833465576172,
      "learning_rate": 2.013059330697864e-05,
      "loss": 0.9533,
      "step": 9405
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.853940486907959,
      "learning_rate": 2.005081850430548e-05,
      "loss": 0.836,
      "step": 9410
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7123111486434937,
      "learning_rate": 1.9971184467396022e-05,
      "loss": 0.9173,
      "step": 9415
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7735269069671631,
      "learning_rate": 1.989169133646124e-05,
      "loss": 0.8409,
      "step": 9420
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7102484107017517,
      "learning_rate": 1.981233925146385e-05,
      "loss": 0.8676,
      "step": 9425
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.8679525852203369,
      "learning_rate": 1.9733128352118324e-05,
      "loss": 0.9388,
      "step": 9430
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7913849949836731,
      "learning_rate": 1.9654058777890573e-05,
      "loss": 0.9927,
      "step": 9435
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7318495512008667,
      "learning_rate": 1.9575130667997643e-05,
      "loss": 0.8995,
      "step": 9440
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.8687650561332703,
      "learning_rate": 1.9496344161407487e-05,
      "loss": 1.0084,
      "step": 9445
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7607772350311279,
      "learning_rate": 1.9417699396838764e-05,
      "loss": 0.8664,
      "step": 9450
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7554779052734375,
      "learning_rate": 1.9339196512760538e-05,
      "loss": 0.8402,
      "step": 9455
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7785469889640808,
      "learning_rate": 1.926083564739215e-05,
      "loss": 0.8458,
      "step": 9460
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7814673781394958,
      "learning_rate": 1.9182616938702792e-05,
      "loss": 0.858,
      "step": 9465
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7215919494628906,
      "learning_rate": 1.910454052441141e-05,
      "loss": 0.883,
      "step": 9470
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6685999035835266,
      "learning_rate": 1.9026606541986393e-05,
      "loss": 0.8643,
      "step": 9475
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6879013776779175,
      "learning_rate": 1.894881512864537e-05,
      "loss": 0.8209,
      "step": 9480
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6500614285469055,
      "learning_rate": 1.8871166421354924e-05,
      "loss": 0.8513,
      "step": 9485
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.8063387274742126,
      "learning_rate": 1.879366055683044e-05,
      "loss": 0.9353,
      "step": 9490
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7360300421714783,
      "learning_rate": 1.871629767153573e-05,
      "loss": 0.9331,
      "step": 9495
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.8143420815467834,
      "learning_rate": 1.863907790168289e-05,
      "loss": 0.7772,
      "step": 9500
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.8035261034965515,
      "learning_rate": 1.8562001383232043e-05,
      "loss": 0.7737,
      "step": 9505
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.8917401432991028,
      "learning_rate": 1.848506825189107e-05,
      "loss": 0.8535,
      "step": 9510
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.862570583820343,
      "learning_rate": 1.8408278643115384e-05,
      "loss": 0.9848,
      "step": 9515
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.8407866358757019,
      "learning_rate": 1.833163269210777e-05,
      "loss": 0.8791,
      "step": 9520
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6715019941329956,
      "learning_rate": 1.825513053381801e-05,
      "loss": 0.7603,
      "step": 9525
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7662861943244934,
      "learning_rate": 1.8178772302942705e-05,
      "loss": 0.9243,
      "step": 9530
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.8278026580810547,
      "learning_rate": 1.8102558133925084e-05,
      "loss": 0.8764,
      "step": 9535
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.8157891035079956,
      "learning_rate": 1.802648816095468e-05,
      "loss": 0.8869,
      "step": 9540
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.718010663986206,
      "learning_rate": 1.7950562517967217e-05,
      "loss": 0.9088,
      "step": 9545
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.6439600586891174,
      "learning_rate": 1.787478133864423e-05,
      "loss": 0.7075,
      "step": 9550
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.823449969291687,
      "learning_rate": 1.779914475641292e-05,
      "loss": 0.957,
      "step": 9555
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7597917318344116,
      "learning_rate": 1.7723652904445907e-05,
      "loss": 0.8781,
      "step": 9560
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7375972867012024,
      "learning_rate": 1.7648305915660968e-05,
      "loss": 0.883,
      "step": 9565
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7715105414390564,
      "learning_rate": 1.75731039227208e-05,
      "loss": 0.8056,
      "step": 9570
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7677664756774902,
      "learning_rate": 1.7498047058032896e-05,
      "loss": 0.937,
      "step": 9575
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.8163573145866394,
      "learning_rate": 1.742313545374914e-05,
      "loss": 0.9381,
      "step": 9580
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7601113319396973,
      "learning_rate": 1.7348369241765683e-05,
      "loss": 0.8025,
      "step": 9585
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7613367438316345,
      "learning_rate": 1.7273748553722668e-05,
      "loss": 0.8191,
      "step": 9590
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.689087450504303,
      "learning_rate": 1.7199273521004046e-05,
      "loss": 0.8272,
      "step": 9595
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.8311107754707336,
      "learning_rate": 1.7124944274737274e-05,
      "loss": 1.0169,
      "step": 9600
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.8512225151062012,
      "learning_rate": 1.7050760945793187e-05,
      "loss": 0.9597,
      "step": 9605
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7289577126502991,
      "learning_rate": 1.6976723664785653e-05,
      "loss": 0.7989,
      "step": 9610
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7322782874107361,
      "learning_rate": 1.6902832562071404e-05,
      "loss": 0.7628,
      "step": 9615
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.895979642868042,
      "learning_rate": 1.682908776774981e-05,
      "loss": 0.816,
      "step": 9620
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7974205017089844,
      "learning_rate": 1.6755489411662595e-05,
      "loss": 0.9064,
      "step": 9625
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7401018142700195,
      "learning_rate": 1.668203762339373e-05,
      "loss": 0.7754,
      "step": 9630
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.812610387802124,
      "learning_rate": 1.6608732532269077e-05,
      "loss": 0.778,
      "step": 9635
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7918111085891724,
      "learning_rate": 1.6535574267356192e-05,
      "loss": 0.9172,
      "step": 9640
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.7501040101051331,
      "learning_rate": 1.6462562957464132e-05,
      "loss": 0.8736,
      "step": 9645
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7926919460296631,
      "learning_rate": 1.6389698731143242e-05,
      "loss": 0.8152,
      "step": 9650
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7895399928092957,
      "learning_rate": 1.631698171668483e-05,
      "loss": 0.8256,
      "step": 9655
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9749563336372375,
      "learning_rate": 1.6244412042121105e-05,
      "loss": 0.9638,
      "step": 9660
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8345392346382141,
      "learning_rate": 1.61719898352248e-05,
      "loss": 0.9168,
      "step": 9665
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9674035906791687,
      "learning_rate": 1.6099715223508937e-05,
      "loss": 0.9306,
      "step": 9670
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8595812320709229,
      "learning_rate": 1.6027588334226807e-05,
      "loss": 0.9261,
      "step": 9675
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8274486064910889,
      "learning_rate": 1.59556092943715e-05,
      "loss": 0.7433,
      "step": 9680
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7736295461654663,
      "learning_rate": 1.5883778230675862e-05,
      "loss": 0.8638,
      "step": 9685
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7375960350036621,
      "learning_rate": 1.5812095269612136e-05,
      "loss": 0.9223,
      "step": 9690
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.6871909499168396,
      "learning_rate": 1.5740560537391858e-05,
      "loss": 0.8969,
      "step": 9695
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9287179708480835,
      "learning_rate": 1.5669174159965517e-05,
      "loss": 1.0913,
      "step": 9700
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7640308737754822,
      "learning_rate": 1.559793626302245e-05,
      "loss": 0.7871,
      "step": 9705
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9685657024383545,
      "learning_rate": 1.5526846971990505e-05,
      "loss": 0.7518,
      "step": 9710
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8307138085365295,
      "learning_rate": 1.545590641203599e-05,
      "loss": 0.8653,
      "step": 9715
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.839181125164032,
      "learning_rate": 1.5385114708063265e-05,
      "loss": 0.8401,
      "step": 9720
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.729323148727417,
      "learning_rate": 1.531447198471453e-05,
      "loss": 0.8694,
      "step": 9725
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8462833166122437,
      "learning_rate": 1.5243978366369837e-05,
      "loss": 0.8935,
      "step": 9730
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8905854225158691,
      "learning_rate": 1.5173633977146595e-05,
      "loss": 0.8879,
      "step": 9735
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7661150097846985,
      "learning_rate": 1.5103438940899494e-05,
      "loss": 0.8359,
      "step": 9740
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8612456917762756,
      "learning_rate": 1.5033393381220329e-05,
      "loss": 0.8787,
      "step": 9745
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8852258920669556,
      "learning_rate": 1.4963497421437577e-05,
      "loss": 1.0159,
      "step": 9750
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8979580402374268,
      "learning_rate": 1.48937511846164e-05,
      "loss": 0.7945,
      "step": 9755
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.7696619033813477,
      "learning_rate": 1.4824154793558375e-05,
      "loss": 0.8359,
      "step": 9760
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8621139526367188,
      "learning_rate": 1.4754708370801151e-05,
      "loss": 0.9384,
      "step": 9765
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7909301519393921,
      "learning_rate": 1.4685412038618473e-05,
      "loss": 0.7223,
      "step": 9770
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8770456314086914,
      "learning_rate": 1.4616265919019645e-05,
      "loss": 0.7893,
      "step": 9775
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7560000419616699,
      "learning_rate": 1.454727013374959e-05,
      "loss": 0.8974,
      "step": 9780
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8642236590385437,
      "learning_rate": 1.4478424804288582e-05,
      "loss": 0.9128,
      "step": 9785
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9646701216697693,
      "learning_rate": 1.440973005185191e-05,
      "loss": 0.8999,
      "step": 9790
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.790399432182312,
      "learning_rate": 1.434118599738975e-05,
      "loss": 0.8739,
      "step": 9795
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8744881749153137,
      "learning_rate": 1.427279276158704e-05,
      "loss": 1.0425,
      "step": 9800
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7287259697914124,
      "learning_rate": 1.4204550464863021e-05,
      "loss": 0.9056,
      "step": 9805
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8026889562606812,
      "learning_rate": 1.4136459227371269e-05,
      "loss": 0.9259,
      "step": 9810
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8606771230697632,
      "learning_rate": 1.4068519168999405e-05,
      "loss": 0.8604,
      "step": 9815
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8293012976646423,
      "learning_rate": 1.4000730409368845e-05,
      "loss": 0.8262,
      "step": 9820
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.039216160774231,
      "learning_rate": 1.3933093067834601e-05,
      "loss": 0.9428,
      "step": 9825
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8637626767158508,
      "learning_rate": 1.3865607263485091e-05,
      "loss": 0.9074,
      "step": 9830
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1418802738189697,
      "learning_rate": 1.3798273115141912e-05,
      "loss": 0.8704,
      "step": 9835
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7517408728599548,
      "learning_rate": 1.373109074135972e-05,
      "loss": 0.8379,
      "step": 9840
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9394586682319641,
      "learning_rate": 1.3664060260425827e-05,
      "loss": 0.8223,
      "step": 9845
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8001432418823242,
      "learning_rate": 1.359718179036019e-05,
      "loss": 0.8278,
      "step": 9850
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7592292428016663,
      "learning_rate": 1.353045544891508e-05,
      "loss": 0.9601,
      "step": 9855
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.752763569355011,
      "learning_rate": 1.3463881353574947e-05,
      "loss": 0.7738,
      "step": 9860
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8289079666137695,
      "learning_rate": 1.339745962155613e-05,
      "loss": 0.7855,
      "step": 9865
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8047125339508057,
      "learning_rate": 1.33311903698068e-05,
      "loss": 0.8015,
      "step": 9870
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.701745331287384,
      "learning_rate": 1.326507371500656e-05,
      "loss": 0.9886,
      "step": 9875
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.7644436955451965,
      "learning_rate": 1.3199109773566387e-05,
      "loss": 0.9931,
      "step": 9880
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7744791507720947,
      "learning_rate": 1.3133298661628368e-05,
      "loss": 0.8211,
      "step": 9885
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7323466539382935,
      "learning_rate": 1.3067640495065492e-05,
      "loss": 0.8264,
      "step": 9890
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.008718729019165,
      "learning_rate": 1.3002135389481451e-05,
      "loss": 0.9723,
      "step": 9895
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8565289378166199,
      "learning_rate": 1.29367834602105e-05,
      "loss": 0.8958,
      "step": 9900
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.6596556901931763,
      "learning_rate": 1.2871584822317151e-05,
      "loss": 0.8743,
      "step": 9905
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7944501638412476,
      "learning_rate": 1.2806539590596023e-05,
      "loss": 0.8661,
      "step": 9910
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8090604543685913,
      "learning_rate": 1.2741647879571627e-05,
      "loss": 0.9232,
      "step": 9915
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7964836955070496,
      "learning_rate": 1.2676909803498161e-05,
      "loss": 0.9486,
      "step": 9920
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0404587984085083,
      "learning_rate": 1.2612325476359388e-05,
      "loss": 1.0251,
      "step": 9925
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8059197068214417,
      "learning_rate": 1.2547895011868304e-05,
      "loss": 0.7066,
      "step": 9930
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7185767292976379,
      "learning_rate": 1.2483618523467e-05,
      "loss": 0.8999,
      "step": 9935
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7250891327857971,
      "learning_rate": 1.241949612432649e-05,
      "loss": 0.898,
      "step": 9940
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7541928887367249,
      "learning_rate": 1.2355527927346478e-05,
      "loss": 0.8629,
      "step": 9945
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.006060004234314,
      "learning_rate": 1.229171404515511e-05,
      "loss": 0.9575,
      "step": 9950
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.737918496131897,
      "learning_rate": 1.2228054590108962e-05,
      "loss": 0.9269,
      "step": 9955
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8843221068382263,
      "learning_rate": 1.2164549674292581e-05,
      "loss": 0.9613,
      "step": 9960
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7975506782531738,
      "learning_rate": 1.2101199409518483e-05,
      "loss": 0.8365,
      "step": 9965
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9914053678512573,
      "learning_rate": 1.2038003907326867e-05,
      "loss": 0.8665,
      "step": 9970
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.678312361240387,
      "learning_rate": 1.1974963278985463e-05,
      "loss": 0.8762,
      "step": 9975
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8215805292129517,
      "learning_rate": 1.1912077635489282e-05,
      "loss": 0.957,
      "step": 9980
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8267842531204224,
      "learning_rate": 1.1849347087560525e-05,
      "loss": 0.9234,
      "step": 9985
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7704806923866272,
      "learning_rate": 1.1786771745648229e-05,
      "loss": 0.8277,
      "step": 9990
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9222090244293213,
      "learning_rate": 1.1724351719928228e-05,
      "loss": 0.8743,
      "step": 9995
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.7938253879547119,
      "learning_rate": 1.1662087120302867e-05,
      "loss": 0.8359,
      "step": 10000
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.8336005210876465,
      "learning_rate": 1.1599978056400796e-05,
      "loss": 0.8933,
      "step": 10005
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.847213864326477,
      "learning_rate": 1.1538024637576905e-05,
      "loss": 0.9159,
      "step": 10010
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7625183463096619,
      "learning_rate": 1.1476226972911974e-05,
      "loss": 0.8969,
      "step": 10015
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.8129417300224304,
      "learning_rate": 1.1414585171212555e-05,
      "loss": 0.8956,
      "step": 10020
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7780897617340088,
      "learning_rate": 1.1353099341010786e-05,
      "loss": 0.8503,
      "step": 10025
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6232269406318665,
      "learning_rate": 1.1291769590564182e-05,
      "loss": 0.8895,
      "step": 10030
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7388679385185242,
      "learning_rate": 1.1230596027855434e-05,
      "loss": 0.862,
      "step": 10035
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9394962787628174,
      "learning_rate": 1.1169578760592292e-05,
      "loss": 0.9012,
      "step": 10040
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0249824523925781,
      "learning_rate": 1.1108717896207276e-05,
      "loss": 0.9584,
      "step": 10045
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6979790329933167,
      "learning_rate": 1.1048013541857472e-05,
      "loss": 0.8473,
      "step": 10050
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.8743270039558411,
      "learning_rate": 1.0987465804424512e-05,
      "loss": 0.8449,
      "step": 10055
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7989283800125122,
      "learning_rate": 1.0927074790514203e-05,
      "loss": 0.8792,
      "step": 10060
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7869884371757507,
      "learning_rate": 1.0866840606456452e-05,
      "loss": 1.0367,
      "step": 10065
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9411396384239197,
      "learning_rate": 1.0806763358305005e-05,
      "loss": 0.7985,
      "step": 10070
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7178574204444885,
      "learning_rate": 1.074684315183727e-05,
      "loss": 0.8868,
      "step": 10075
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6789056062698364,
      "learning_rate": 1.0687080092554225e-05,
      "loss": 0.8484,
      "step": 10080
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0155320167541504,
      "learning_rate": 1.0627474285680105e-05,
      "loss": 0.8272,
      "step": 10085
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7251559495925903,
      "learning_rate": 1.0568025836162265e-05,
      "loss": 0.8303,
      "step": 10090
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6140643358230591,
      "learning_rate": 1.0508734848671064e-05,
      "loss": 0.7968,
      "step": 10095
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6832136511802673,
      "learning_rate": 1.0449601427599588e-05,
      "loss": 0.8966,
      "step": 10100
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.7631263732910156,
      "learning_rate": 1.0390625677063415e-05,
      "loss": 0.8881,
      "step": 10105
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9187348484992981,
      "learning_rate": 1.0331807700900664e-05,
      "loss": 0.9736,
      "step": 10110
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.8617235422134399,
      "learning_rate": 1.0273147602671562e-05,
      "loss": 0.9088,
      "step": 10115
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.6641935110092163,
      "learning_rate": 1.0214645485658358e-05,
      "loss": 0.8002,
      "step": 10120
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7935595512390137,
      "learning_rate": 1.0156301452865246e-05,
      "loss": 0.7914,
      "step": 10125
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9449029564857483,
      "learning_rate": 1.0098115607017922e-05,
      "loss": 0.7394,
      "step": 10130
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8909382224082947,
      "learning_rate": 1.00400880505637e-05,
      "loss": 0.7768,
      "step": 10135
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7915505766868591,
      "learning_rate": 9.982218885671158e-06,
      "loss": 0.8899,
      "step": 10140
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7059157490730286,
      "learning_rate": 9.924508214229933e-06,
      "loss": 0.8906,
      "step": 10145
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8084311485290527,
      "learning_rate": 9.866956137850736e-06,
      "loss": 0.8016,
      "step": 10150
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8688581585884094,
      "learning_rate": 9.809562757864887e-06,
      "loss": 0.9325,
      "step": 10155
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7226961851119995,
      "learning_rate": 9.752328175324366e-06,
      "loss": 0.7376,
      "step": 10160
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9018610119819641,
      "learning_rate": 9.695252491001617e-06,
      "loss": 0.9814,
      "step": 10165
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.6705411672592163,
      "learning_rate": 9.638335805389209e-06,
      "loss": 0.9008,
      "step": 10170
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.798916220664978,
      "learning_rate": 9.581578218699805e-06,
      "loss": 0.969,
      "step": 10175
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.748852550983429,
      "learning_rate": 9.524979830865999e-06,
      "loss": 0.8754,
      "step": 10180
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7891843914985657,
      "learning_rate": 9.468540741539988e-06,
      "loss": 0.844,
      "step": 10185
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7447881102561951,
      "learning_rate": 9.41226105009353e-06,
      "loss": 0.8591,
      "step": 10190
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8944937586784363,
      "learning_rate": 9.356140855617778e-06,
      "loss": 0.8099,
      "step": 10195
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8635166883468628,
      "learning_rate": 9.30018025692302e-06,
      "loss": 0.8817,
      "step": 10200
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7399436831474304,
      "learning_rate": 9.244379352538535e-06,
      "loss": 0.8345,
      "step": 10205
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8072450757026672,
      "learning_rate": 9.188738240712447e-06,
      "loss": 0.8074,
      "step": 10210
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7312385439872742,
      "learning_rate": 9.133257019411524e-06,
      "loss": 0.7803,
      "step": 10215
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.90677410364151,
      "learning_rate": 9.077935786321045e-06,
      "loss": 0.8638,
      "step": 10220
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.8683536648750305,
      "learning_rate": 9.022774638844588e-06,
      "loss": 0.9492,
      "step": 10225
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7896186113357544,
      "learning_rate": 8.96777367410383e-06,
      "loss": 0.9377,
      "step": 10230
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.7445749640464783,
      "learning_rate": 8.912932988938472e-06,
      "loss": 0.7302,
      "step": 10235
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7277476191520691,
      "learning_rate": 8.858252679905966e-06,
      "loss": 0.7507,
      "step": 10240
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8207641243934631,
      "learning_rate": 8.803732843281409e-06,
      "loss": 0.8242,
      "step": 10245
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8514395952224731,
      "learning_rate": 8.749373575057384e-06,
      "loss": 1.0068,
      "step": 10250
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8273018598556519,
      "learning_rate": 8.695174970943732e-06,
      "loss": 0.927,
      "step": 10255
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7913432121276855,
      "learning_rate": 8.641137126367416e-06,
      "loss": 0.8793,
      "step": 10260
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8091998100280762,
      "learning_rate": 8.587260136472353e-06,
      "loss": 0.8632,
      "step": 10265
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8871718645095825,
      "learning_rate": 8.53354409611924e-06,
      "loss": 0.7877,
      "step": 10270
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8100157976150513,
      "learning_rate": 8.479989099885388e-06,
      "loss": 0.8755,
      "step": 10275
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8143556118011475,
      "learning_rate": 8.426595242064606e-06,
      "loss": 0.9769,
      "step": 10280
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8220342993736267,
      "learning_rate": 8.373362616666936e-06,
      "loss": 1.0094,
      "step": 10285
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.774535596370697,
      "learning_rate": 8.320291317418549e-06,
      "loss": 0.9654,
      "step": 10290
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7471094727516174,
      "learning_rate": 8.26738143776159e-06,
      "loss": 0.9107,
      "step": 10295
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7431105375289917,
      "learning_rate": 8.214633070853938e-06,
      "loss": 0.8253,
      "step": 10300
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7525473237037659,
      "learning_rate": 8.162046309569205e-06,
      "loss": 0.7647,
      "step": 10305
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8467829823493958,
      "learning_rate": 8.109621246496368e-06,
      "loss": 0.9366,
      "step": 10310
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.852018415927887,
      "learning_rate": 8.057357973939727e-06,
      "loss": 0.9855,
      "step": 10315
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9554034471511841,
      "learning_rate": 8.005256583918763e-06,
      "loss": 1.0396,
      "step": 10320
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7318937182426453,
      "learning_rate": 7.953317168167862e-06,
      "loss": 0.8658,
      "step": 10325
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7739142179489136,
      "learning_rate": 7.901539818136261e-06,
      "loss": 0.8594,
      "step": 10330
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8797909617424011,
      "learning_rate": 7.849924624987881e-06,
      "loss": 1.0571,
      "step": 10335
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8140009045600891,
      "learning_rate": 7.798471679601082e-06,
      "loss": 0.8524,
      "step": 10340
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.8369308114051819,
      "learning_rate": 7.747181072568576e-06,
      "loss": 0.8995,
      "step": 10345
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7931288480758667,
      "learning_rate": 7.696052894197247e-06,
      "loss": 0.7742,
      "step": 10350
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.7234024405479431,
      "learning_rate": 7.645087234507975e-06,
      "loss": 0.9128,
      "step": 10355
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7736112475395203,
      "learning_rate": 7.594284183235556e-06,
      "loss": 0.8915,
      "step": 10360
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9592683911323547,
      "learning_rate": 7.543643829828406e-06,
      "loss": 0.9485,
      "step": 10365
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9321366548538208,
      "learning_rate": 7.493166263448515e-06,
      "loss": 0.9154,
      "step": 10370
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7174406051635742,
      "learning_rate": 7.442851572971265e-06,
      "loss": 0.924,
      "step": 10375
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8121606707572937,
      "learning_rate": 7.392699846985263e-06,
      "loss": 0.8669,
      "step": 10380
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7170925736427307,
      "learning_rate": 7.342711173792127e-06,
      "loss": 0.8111,
      "step": 10385
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7287190556526184,
      "learning_rate": 7.2928856414064996e-06,
      "loss": 0.8309,
      "step": 10390
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7759504914283752,
      "learning_rate": 7.243223337555693e-06,
      "loss": 0.9299,
      "step": 10395
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7308063507080078,
      "learning_rate": 7.193724349679654e-06,
      "loss": 0.8408,
      "step": 10400
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8811477422714233,
      "learning_rate": 7.144388764930788e-06,
      "loss": 0.9797,
      "step": 10405
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7261849641799927,
      "learning_rate": 7.095216670173776e-06,
      "loss": 0.9345,
      "step": 10410
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6926263570785522,
      "learning_rate": 7.046208151985456e-06,
      "loss": 0.7932,
      "step": 10415
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7097862958908081,
      "learning_rate": 6.997363296654691e-06,
      "loss": 0.8278,
      "step": 10420
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8528007864952087,
      "learning_rate": 6.9486821901821435e-06,
      "loss": 0.7973,
      "step": 10425
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8164514899253845,
      "learning_rate": 6.900164918280128e-06,
      "loss": 1.0044,
      "step": 10430
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8210064768791199,
      "learning_rate": 6.851811566372601e-06,
      "loss": 0.8487,
      "step": 10435
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8617835640907288,
      "learning_rate": 6.8036222195948075e-06,
      "loss": 0.8885,
      "step": 10440
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9348113536834717,
      "learning_rate": 6.755596962793309e-06,
      "loss": 0.8039,
      "step": 10445
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8518204689025879,
      "learning_rate": 6.707735880525723e-06,
      "loss": 0.8798,
      "step": 10450
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6845257878303528,
      "learning_rate": 6.660039057060552e-06,
      "loss": 0.8902,
      "step": 10455
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.6822859048843384,
      "learning_rate": 6.612506576377175e-06,
      "loss": 0.9559,
      "step": 10460
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8563945889472961,
      "learning_rate": 6.565138522165581e-06,
      "loss": 0.8785,
      "step": 10465
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.7450621724128723,
      "learning_rate": 6.517934977826223e-06,
      "loss": 0.7979,
      "step": 10470
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9026947021484375,
      "learning_rate": 6.4708960264699745e-06,
      "loss": 0.9681,
      "step": 10475
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8436296582221985,
      "learning_rate": 6.424021750917864e-06,
      "loss": 0.9227,
      "step": 10480
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8324162364006042,
      "learning_rate": 6.377312233700938e-06,
      "loss": 0.8586,
      "step": 10485
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7005321979522705,
      "learning_rate": 6.3307675570602354e-06,
      "loss": 0.8165,
      "step": 10490
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7949735522270203,
      "learning_rate": 6.284387802946534e-06,
      "loss": 0.8962,
      "step": 10495
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8014736175537109,
      "learning_rate": 6.238173053020191e-06,
      "loss": 0.8234,
      "step": 10500
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8914954662322998,
      "learning_rate": 6.192123388651128e-06,
      "loss": 0.8268,
      "step": 10505
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8167476654052734,
      "learning_rate": 6.146238890918488e-06,
      "loss": 0.8591,
      "step": 10510
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7341588139533997,
      "learning_rate": 6.100519640610725e-06,
      "loss": 0.841,
      "step": 10515
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6818287968635559,
      "learning_rate": 6.054965718225258e-06,
      "loss": 0.7915,
      "step": 10520
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7893515825271606,
      "learning_rate": 6.009577203968453e-06,
      "loss": 0.8643,
      "step": 10525
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8433229923248291,
      "learning_rate": 5.964354177755449e-06,
      "loss": 0.8659,
      "step": 10530
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8310180902481079,
      "learning_rate": 5.919296719209988e-06,
      "loss": 0.8752,
      "step": 10535
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.6668066382408142,
      "learning_rate": 5.874404907664277e-06,
      "loss": 0.812,
      "step": 10540
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7034140229225159,
      "learning_rate": 5.8296788221589575e-06,
      "loss": 0.8203,
      "step": 10545
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8013192415237427,
      "learning_rate": 5.785118541442791e-06,
      "loss": 0.8858,
      "step": 10550
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8860305547714233,
      "learning_rate": 5.740724143972642e-06,
      "loss": 0.8153,
      "step": 10555
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8523777723312378,
      "learning_rate": 5.6964957079133186e-06,
      "loss": 0.8841,
      "step": 10560
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8978105187416077,
      "learning_rate": 5.652433311137384e-06,
      "loss": 0.8173,
      "step": 10565
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8412472605705261,
      "learning_rate": 5.608537031225092e-06,
      "loss": 0.8595,
      "step": 10570
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.767458975315094,
      "learning_rate": 5.564806945464218e-06,
      "loss": 0.7613,
      "step": 10575
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.7909321784973145,
      "learning_rate": 5.521243130849873e-06,
      "loss": 0.9713,
      "step": 10580
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.975814700126648,
      "learning_rate": 5.4778456640845135e-06,
      "loss": 0.8448,
      "step": 10585
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8089463710784912,
      "learning_rate": 5.434614621577594e-06,
      "loss": 0.8023,
      "step": 10590
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.876457691192627,
      "learning_rate": 5.391550079445606e-06,
      "loss": 0.961,
      "step": 10595
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.852300763130188,
      "learning_rate": 5.348652113511898e-06,
      "loss": 0.8673,
      "step": 10600
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.857282280921936,
      "learning_rate": 5.305920799306496e-06,
      "loss": 0.7921,
      "step": 10605
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.715785026550293,
      "learning_rate": 5.263356212066028e-06,
      "loss": 0.8206,
      "step": 10610
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.8727372884750366,
      "learning_rate": 5.220958426733558e-06,
      "loss": 0.9199,
      "step": 10615
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9024982452392578,
      "learning_rate": 5.178727517958459e-06,
      "loss": 0.9166,
      "step": 10620
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7073826789855957,
      "learning_rate": 5.136663560096277e-06,
      "loss": 0.7656,
      "step": 10625
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7534424662590027,
      "learning_rate": 5.094766627208647e-06,
      "loss": 0.8346,
      "step": 10630
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9219887256622314,
      "learning_rate": 5.053036793063093e-06,
      "loss": 0.935,
      "step": 10635
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.8320545554161072,
      "learning_rate": 5.011474131132931e-06,
      "loss": 0.8014,
      "step": 10640
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.6234966516494751,
      "learning_rate": 4.970078714597149e-06,
      "loss": 0.8109,
      "step": 10645
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7838702201843262,
      "learning_rate": 4.928850616340252e-06,
      "loss": 0.9262,
      "step": 10650
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.717758297920227,
      "learning_rate": 4.887789908952178e-06,
      "loss": 0.9535,
      "step": 10655
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.8258304595947266,
      "learning_rate": 4.846896664728118e-06,
      "loss": 0.8687,
      "step": 10660
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7627803087234497,
      "learning_rate": 4.806170955668421e-06,
      "loss": 0.7835,
      "step": 10665
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.8373433947563171,
      "learning_rate": 4.765612853478451e-06,
      "loss": 0.8576,
      "step": 10670
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0411616563796997,
      "learning_rate": 4.725222429568477e-06,
      "loss": 0.8517,
      "step": 10675
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.8107260465621948,
      "learning_rate": 4.68499975505351e-06,
      "loss": 0.9144,
      "step": 10680
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9633600115776062,
      "learning_rate": 4.644944900753278e-06,
      "loss": 0.9668,
      "step": 10685
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7689321041107178,
      "learning_rate": 4.605057937191947e-06,
      "loss": 1.0446,
      "step": 10690
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7285904884338379,
      "learning_rate": 4.565338934598129e-06,
      "loss": 0.9144,
      "step": 10695
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.8424079418182373,
      "learning_rate": 4.525787962904682e-06,
      "loss": 0.7431,
      "step": 10700
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9980275630950928,
      "learning_rate": 4.4864050917486355e-06,
      "loss": 0.972,
      "step": 10705
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.8323859572410583,
      "learning_rate": 4.447190390471024e-06,
      "loss": 0.9221,
      "step": 10710
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8923182487487793,
      "learning_rate": 4.408143928116815e-06,
      "loss": 0.9211,
      "step": 10715
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8041788935661316,
      "learning_rate": 4.369265773434739e-06,
      "loss": 0.7976,
      "step": 10720
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7583939433097839,
      "learning_rate": 4.330555994877195e-06,
      "loss": 0.8283,
      "step": 10725
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8751514554023743,
      "learning_rate": 4.292014660600119e-06,
      "loss": 0.8757,
      "step": 10730
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8682853579521179,
      "learning_rate": 4.253641838462852e-06,
      "loss": 0.9022,
      "step": 10735
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7690489292144775,
      "learning_rate": 4.2154375960280935e-06,
      "loss": 0.8395,
      "step": 10740
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8148819208145142,
      "learning_rate": 4.17740200056167e-06,
      "loss": 0.9871,
      "step": 10745
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.759973406791687,
      "learning_rate": 4.139535119032501e-06,
      "loss": 0.8988,
      "step": 10750
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7451571822166443,
      "learning_rate": 4.1018370181124424e-06,
      "loss": 0.9156,
      "step": 10755
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6766397356987,
      "learning_rate": 4.064307764176168e-06,
      "loss": 0.8459,
      "step": 10760
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.6828172206878662,
      "learning_rate": 4.0269474233010865e-06,
      "loss": 0.8727,
      "step": 10765
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8449336886405945,
      "learning_rate": 3.9897560612672136e-06,
      "loss": 0.83,
      "step": 10770
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9624890089035034,
      "learning_rate": 3.9527337435570025e-06,
      "loss": 0.8118,
      "step": 10775
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9531586170196533,
      "learning_rate": 3.915880535355298e-06,
      "loss": 0.9792,
      "step": 10780
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0197491645812988,
      "learning_rate": 3.879196501549209e-06,
      "loss": 0.8994,
      "step": 10785
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8006296157836914,
      "learning_rate": 3.842681706727957e-06,
      "loss": 0.9336,
      "step": 10790
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7368683218955994,
      "learning_rate": 3.806336215182782e-06,
      "loss": 0.7437,
      "step": 10795
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7810891270637512,
      "learning_rate": 3.7701600909068714e-06,
      "loss": 0.9383,
      "step": 10800
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8878799080848694,
      "learning_rate": 3.734153397595164e-06,
      "loss": 0.8256,
      "step": 10805
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8740106225013733,
      "learning_rate": 3.6983161986443027e-06,
      "loss": 0.8673,
      "step": 10810
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9431246519088745,
      "learning_rate": 3.662648557152515e-06,
      "loss": 0.8343,
      "step": 10815
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7177392840385437,
      "learning_rate": 3.6271505359194547e-06,
      "loss": 0.8108,
      "step": 10820
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8011447191238403,
      "learning_rate": 3.591822197446182e-06,
      "loss": 0.8757,
      "step": 10825
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8898225426673889,
      "learning_rate": 3.556663603934951e-06,
      "loss": 0.8689,
      "step": 10830
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8089674115180969,
      "learning_rate": 3.5216748172891446e-06,
      "loss": 0.9874,
      "step": 10835
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8062359690666199,
      "learning_rate": 3.486855899113217e-06,
      "loss": 0.8446,
      "step": 10840
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7981634140014648,
      "learning_rate": 3.4522069107124966e-06,
      "loss": 0.871,
      "step": 10845
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6979275345802307,
      "learning_rate": 3.4177279130931163e-06,
      "loss": 0.8024,
      "step": 10850
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0290021896362305,
      "learning_rate": 3.3834189669619377e-06,
      "loss": 0.8975,
      "step": 10855
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7928755879402161,
      "learning_rate": 3.3492801327263843e-06,
      "loss": 0.8094,
      "step": 10860
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7724294662475586,
      "learning_rate": 3.3153114704943756e-06,
      "loss": 1.0105,
      "step": 10865
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1200708150863647,
      "learning_rate": 3.2815130400742133e-06,
      "loss": 0.8083,
      "step": 10870
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9042993187904358,
      "learning_rate": 3.247884900974474e-06,
      "loss": 0.7872,
      "step": 10875
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6799546480178833,
      "learning_rate": 3.214427112403906e-06,
      "loss": 0.8935,
      "step": 10880
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0057941675186157,
      "learning_rate": 3.181139733271332e-06,
      "loss": 0.8105,
      "step": 10885
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8763043880462646,
      "learning_rate": 3.1480228221854923e-06,
      "loss": 0.7507,
      "step": 10890
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.6605707406997681,
      "learning_rate": 3.1150764374550443e-06,
      "loss": 0.8297,
      "step": 10895
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8191012740135193,
      "learning_rate": 3.0823006370883534e-06,
      "loss": 0.8562,
      "step": 10900
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8007928729057312,
      "learning_rate": 3.0496954787934684e-06,
      "loss": 0.8344,
      "step": 10905
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7685137987136841,
      "learning_rate": 3.0172610199780017e-06,
      "loss": 0.7817,
      "step": 10910
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9101439714431763,
      "learning_rate": 2.984997317748972e-06,
      "loss": 0.926,
      "step": 10915
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7492097020149231,
      "learning_rate": 2.9529044289127726e-06,
      "loss": 0.6755,
      "step": 10920
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9658187031745911,
      "learning_rate": 2.9209824099750595e-06,
      "loss": 0.8649,
      "step": 10925
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8430820107460022,
      "learning_rate": 2.889231317140617e-06,
      "loss": 0.9128,
      "step": 10930
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8035411834716797,
      "learning_rate": 2.857651206313305e-06,
      "loss": 0.8344,
      "step": 10935
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.7803851366043091,
      "learning_rate": 2.8262421330959244e-06,
      "loss": 0.9662,
      "step": 10940
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8594342470169067,
      "learning_rate": 2.795004152790115e-06,
      "loss": 0.8618,
      "step": 10945
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.937749445438385,
      "learning_rate": 2.7639373203963036e-06,
      "loss": 0.9277,
      "step": 10950
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.787311315536499,
      "learning_rate": 2.7330416906135582e-06,
      "loss": 0.8844,
      "step": 10955
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7901485562324524,
      "learning_rate": 2.702317317839531e-06,
      "loss": 0.7633,
      "step": 10960
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7819672226905823,
      "learning_rate": 2.6717642561703505e-06,
      "loss": 0.8706,
      "step": 10965
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.719790518283844,
      "learning_rate": 2.6413825594004625e-06,
      "loss": 0.8467,
      "step": 10970
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7518222332000732,
      "learning_rate": 2.611172281022645e-06,
      "loss": 0.807,
      "step": 10975
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.837917149066925,
      "learning_rate": 2.5811334742278593e-06,
      "loss": 0.9382,
      "step": 10980
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7309818267822266,
      "learning_rate": 2.551266191905133e-06,
      "loss": 0.8529,
      "step": 10985
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7382307648658752,
      "learning_rate": 2.5215704866415224e-06,
      "loss": 0.709,
      "step": 10990
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.8184390068054199,
      "learning_rate": 2.492046410721971e-06,
      "loss": 0.8557,
      "step": 10995
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9037685394287109,
      "learning_rate": 2.4626940161292187e-06,
      "loss": 1.1225,
      "step": 11000
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.8128904104232788,
      "learning_rate": 2.4335133545437596e-06,
      "loss": 0.8341,
      "step": 11005
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7361746430397034,
      "learning_rate": 2.4045044773437163e-06,
      "loss": 0.8179,
      "step": 11010
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.8453391194343567,
      "learning_rate": 2.3756674356047338e-06,
      "loss": 0.8738,
      "step": 11015
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.8270261287689209,
      "learning_rate": 2.3470022800999193e-06,
      "loss": 0.8572,
      "step": 11020
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7221106290817261,
      "learning_rate": 2.318509061299745e-06,
      "loss": 0.8055,
      "step": 11025
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.8942533731460571,
      "learning_rate": 2.2901878293719257e-06,
      "loss": 0.933,
      "step": 11030
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7586672306060791,
      "learning_rate": 2.2620386341814182e-06,
      "loss": 0.8081,
      "step": 11035
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9604291915893555,
      "learning_rate": 2.234061525290232e-06,
      "loss": 0.8651,
      "step": 11040
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.8790186047554016,
      "learning_rate": 2.2062565519573865e-06,
      "loss": 0.9874,
      "step": 11045
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.8522576093673706,
      "learning_rate": 2.1786237631388428e-06,
      "loss": 0.9064,
      "step": 11050
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.6957898139953613,
      "learning_rate": 2.1511632074873835e-06,
      "loss": 0.8559,
      "step": 11055
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2120394706726074,
      "learning_rate": 2.1238749333525543e-06,
      "loss": 0.8321,
      "step": 11060
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.7582048177719116,
      "learning_rate": 2.096758988780556e-06,
      "loss": 0.9462,
      "step": 11065
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7362944483757019,
      "learning_rate": 2.069815421514176e-06,
      "loss": 0.7978,
      "step": 11070
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7996507287025452,
      "learning_rate": 2.0430442789927007e-06,
      "loss": 0.8505,
      "step": 11075
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.8071786165237427,
      "learning_rate": 2.0164456083518246e-06,
      "loss": 0.8272,
      "step": 11080
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9571120738983154,
      "learning_rate": 1.990019456423564e-06,
      "loss": 0.8708,
      "step": 11085
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9792050719261169,
      "learning_rate": 1.9637658697362003e-06,
      "loss": 0.9641,
      "step": 11090
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6688560247421265,
      "learning_rate": 1.93768489451418e-06,
      "loss": 0.7361,
      "step": 11095
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6988593339920044,
      "learning_rate": 1.911776576678015e-06,
      "loss": 0.806,
      "step": 11100
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.8763333559036255,
      "learning_rate": 1.8860409618442488e-06,
      "loss": 0.8753,
      "step": 11105
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.8303632736206055,
      "learning_rate": 1.8604780953253353e-06,
      "loss": 0.8662,
      "step": 11110
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.8031095862388611,
      "learning_rate": 1.8350880221295496e-06,
      "loss": 0.7915,
      "step": 11115
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7835865616798401,
      "learning_rate": 1.8098707869609654e-06,
      "loss": 0.8461,
      "step": 11120
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7007308006286621,
      "learning_rate": 1.7848264342193333e-06,
      "loss": 1.0948,
      "step": 11125
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.8703559041023254,
      "learning_rate": 1.7599550080000027e-06,
      "loss": 0.8635,
      "step": 11130
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.87107914686203,
      "learning_rate": 1.7352565520938558e-06,
      "loss": 0.8839,
      "step": 11135
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.6888677477836609,
      "learning_rate": 1.7107311099872403e-06,
      "loss": 0.858,
      "step": 11140
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7759958505630493,
      "learning_rate": 1.6863787248618367e-06,
      "loss": 1.0134,
      "step": 11145
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7978636026382446,
      "learning_rate": 1.6621994395946916e-06,
      "loss": 0.8704,
      "step": 11150
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.716876208782196,
      "learning_rate": 1.6381932967580505e-06,
      "loss": 0.9225,
      "step": 11155
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7568153142929077,
      "learning_rate": 1.6143603386192474e-06,
      "loss": 0.8241,
      "step": 11160
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.8619349598884583,
      "learning_rate": 1.5907006071408049e-06,
      "loss": 0.9491,
      "step": 11165
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7976325750350952,
      "learning_rate": 1.5672141439801446e-06,
      "loss": 0.8012,
      "step": 11170
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7966151237487793,
      "learning_rate": 1.5439009904896773e-06,
      "loss": 0.9116,
      "step": 11175
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7527498602867126,
      "learning_rate": 1.5207611877166573e-06,
      "loss": 0.853,
      "step": 11180
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.7213166356086731,
      "learning_rate": 1.4977947764031053e-06,
      "loss": 0.7906,
      "step": 11185
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8000203371047974,
      "learning_rate": 1.4750017969857643e-06,
      "loss": 0.805,
      "step": 11190
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8495139479637146,
      "learning_rate": 1.4523822895960216e-06,
      "loss": 0.7696,
      "step": 11195
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8969099521636963,
      "learning_rate": 1.4299362940598194e-06,
      "loss": 0.8998,
      "step": 11200
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8963848948478699,
      "learning_rate": 1.4076638498976113e-06,
      "loss": 0.8154,
      "step": 11205
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.816973090171814,
      "learning_rate": 1.3855649963242957e-06,
      "loss": 0.8815,
      "step": 11210
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6730329990386963,
      "learning_rate": 1.3636397722490813e-06,
      "loss": 0.8261,
      "step": 11215
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7854148745536804,
      "learning_rate": 1.3418882162755219e-06,
      "loss": 0.8484,
      "step": 11220
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8766478300094604,
      "learning_rate": 1.3203103667013827e-06,
      "loss": 0.9359,
      "step": 11225
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9398655891418457,
      "learning_rate": 1.298906261518551e-06,
      "loss": 0.716,
      "step": 11230
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9365181922912598,
      "learning_rate": 1.2776759384130698e-06,
      "loss": 0.9044,
      "step": 11235
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.782059371471405,
      "learning_rate": 1.2566194347649385e-06,
      "loss": 0.9254,
      "step": 11240
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7425671815872192,
      "learning_rate": 1.2357367876481452e-06,
      "loss": 0.8266,
      "step": 11245
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8430447578430176,
      "learning_rate": 1.2150280338305787e-06,
      "loss": 0.8314,
      "step": 11250
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7275057435035706,
      "learning_rate": 1.194493209773928e-06,
      "loss": 0.7711,
      "step": 11255
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.758685827255249,
      "learning_rate": 1.1741323516336832e-06,
      "loss": 0.8514,
      "step": 11260
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6766619086265564,
      "learning_rate": 1.1539454952590123e-06,
      "loss": 0.835,
      "step": 11265
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7155946493148804,
      "learning_rate": 1.133932676192695e-06,
      "loss": 0.8194,
      "step": 11270
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7995068430900574,
      "learning_rate": 1.114093929671145e-06,
      "loss": 0.9196,
      "step": 11275
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7955594062805176,
      "learning_rate": 1.0944292906242326e-06,
      "loss": 0.8761,
      "step": 11280
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7788894772529602,
      "learning_rate": 1.0749387936753064e-06,
      "loss": 0.9241,
      "step": 11285
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.790854275226593,
      "learning_rate": 1.0556224731411157e-06,
      "loss": 0.8612,
      "step": 11290
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.7776708602905273,
      "learning_rate": 1.0364803630316887e-06,
      "loss": 0.8391,
      "step": 11295
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8150311708450317,
      "learning_rate": 1.017512497050377e-06,
      "loss": 0.7578,
      "step": 11300
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.6092957854270935,
      "learning_rate": 9.98718908593732e-07,
      "loss": 0.8103,
      "step": 11305
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9293496012687683,
      "learning_rate": 9.8009963075143e-07,
      "loss": 0.8924,
      "step": 11310
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9524035453796387,
      "learning_rate": 9.61654696306258e-07,
      "loss": 0.8824,
      "step": 11315
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8459351062774658,
      "learning_rate": 9.43384137734038e-07,
      "loss": 0.8594,
      "step": 11320
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8590577840805054,
      "learning_rate": 9.252879872035713e-07,
      "loss": 0.7377,
      "step": 11325
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7393367290496826,
      "learning_rate": 9.073662765765823e-07,
      "loss": 0.7557,
      "step": 11330
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9165522456169128,
      "learning_rate": 8.896190374076518e-07,
      "loss": 0.9628,
      "step": 11335
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8148025870323181,
      "learning_rate": 8.720463009441626e-07,
      "loss": 0.8027,
      "step": 11340
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8349050879478455,
      "learning_rate": 8.546480981262872e-07,
      "loss": 0.9549,
      "step": 11345
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8241772651672363,
      "learning_rate": 8.374244595868664e-07,
      "loss": 0.9792,
      "step": 11350
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.815220057964325,
      "learning_rate": 8.203754156513865e-07,
      "loss": 0.9256,
      "step": 11355
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7551809549331665,
      "learning_rate": 8.03500996337958e-07,
      "loss": 0.8234,
      "step": 11360
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0061076879501343,
      "learning_rate": 7.868012313571927e-07,
      "loss": 0.8945,
      "step": 11365
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9475626945495605,
      "learning_rate": 7.702761501122147e-07,
      "loss": 0.8082,
      "step": 11370
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9202659726142883,
      "learning_rate": 7.539257816985835e-07,
      "loss": 0.8168,
      "step": 11375
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7858511805534363,
      "learning_rate": 7.377501549042265e-07,
      "loss": 0.8055,
      "step": 11380
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8680155873298645,
      "learning_rate": 7.217492982094176e-07,
      "loss": 0.8777,
      "step": 11385
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7543894648551941,
      "learning_rate": 7.059232397867099e-07,
      "loss": 0.8607,
      "step": 11390
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7095973491668701,
      "learning_rate": 6.902720075009139e-07,
      "loss": 0.8256,
      "step": 11395
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7466055750846863,
      "learning_rate": 6.747956289089863e-07,
      "loss": 0.9458,
      "step": 11400
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7379010319709778,
      "learning_rate": 6.594941312600411e-07,
      "loss": 0.9594,
      "step": 11405
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7673978805541992,
      "learning_rate": 6.443675414952833e-07,
      "loss": 0.9929,
      "step": 11410
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0383026599884033,
      "learning_rate": 6.294158862479527e-07,
      "loss": 0.8045,
      "step": 11415
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7899762988090515,
      "learning_rate": 6.146391918433026e-07,
      "loss": 0.8108,
      "step": 11420
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.889194667339325,
      "learning_rate": 6.000374842984991e-07,
      "loss": 1.0163,
      "step": 11425
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7975661754608154,
      "learning_rate": 5.856107893226325e-07,
      "loss": 0.8313,
      "step": 11430
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.87502521276474,
      "learning_rate": 5.713591323166622e-07,
      "loss": 0.8928,
      "step": 11435
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1591724157333374,
      "learning_rate": 5.57282538373316e-07,
      "loss": 0.9258,
      "step": 11440
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7677488923072815,
      "learning_rate": 5.433810322771571e-07,
      "loss": 0.8604,
      "step": 11445
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7682551145553589,
      "learning_rate": 5.296546385044065e-07,
      "loss": 0.8226,
      "step": 11450
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.8181256651878357,
      "learning_rate": 5.161033812229987e-07,
      "loss": 1.0031,
      "step": 11455
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7503376007080078,
      "learning_rate": 5.027272842925146e-07,
      "loss": 0.8191,
      "step": 11460
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.8176915049552917,
      "learning_rate": 4.895263712641151e-07,
      "loss": 0.9518,
      "step": 11465
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.8447414636611938,
      "learning_rate": 4.7650066538051927e-07,
      "loss": 0.8723,
      "step": 11470
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.8194155693054199,
      "learning_rate": 4.636501895759704e-07,
      "loss": 0.8594,
      "step": 11475
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.8020860552787781,
      "learning_rate": 4.5097496647616977e-07,
      "loss": 0.8447,
      "step": 11480
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0051360130310059,
      "learning_rate": 4.3847501839827666e-07,
      "loss": 0.9012,
      "step": 11485
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7807974219322205,
      "learning_rate": 4.261503673508194e-07,
      "loss": 0.8135,
      "step": 11490
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.769763708114624,
      "learning_rate": 4.1400103503368425e-07,
      "loss": 0.8548,
      "step": 11495
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.161005973815918,
      "learning_rate": 4.0202704283810456e-07,
      "loss": 0.8857,
      "step": 11500
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.8864768743515015,
      "learning_rate": 3.9022841184657155e-07,
      "loss": 0.7329,
      "step": 11505
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6769604086875916,
      "learning_rate": 3.7860516283282355e-07,
      "loss": 0.859,
      "step": 11510
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6905530095100403,
      "learning_rate": 3.6715731626179027e-07,
      "loss": 0.8472,
      "step": 11515
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7534658312797546,
      "learning_rate": 3.55884892289593e-07,
      "loss": 0.7692,
      "step": 11520
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.6859395503997803,
      "learning_rate": 3.447879107634888e-07,
      "loss": 0.7706,
      "step": 11525
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.847043514251709,
      "learning_rate": 3.338663912218265e-07,
      "loss": 0.7983,
      "step": 11530
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7656576037406921,
      "learning_rate": 3.23120352894013e-07,
      "loss": 0.9127,
      "step": 11535
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.7100486159324646,
      "learning_rate": 3.1254981470049126e-07,
      "loss": 0.8365,
      "step": 11540
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9396764636039734,
      "learning_rate": 3.021547952527293e-07,
      "loss": 0.822,
      "step": 11545
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9337235689163208,
      "learning_rate": 2.9193531285311993e-07,
      "loss": 0.9381,
      "step": 11550
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9499861001968384,
      "learning_rate": 2.818913854950256e-07,
      "loss": 0.891,
      "step": 11555
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.6093025803565979,
      "learning_rate": 2.720230308626781e-07,
      "loss": 0.7486,
      "step": 11560
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7417643666267395,
      "learning_rate": 2.6233026633118994e-07,
      "loss": 0.7791,
      "step": 11565
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7814109921455383,
      "learning_rate": 2.528131089665431e-07,
      "loss": 0.7943,
      "step": 11570
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8193375468254089,
      "learning_rate": 2.4347157552548907e-07,
      "loss": 0.982,
      "step": 11575
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8868688344955444,
      "learning_rate": 2.3430568245558227e-07,
      "loss": 1.0064,
      "step": 11580
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0020816326141357,
      "learning_rate": 2.2531544589512454e-07,
      "loss": 0.8038,
      "step": 11585
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.6277792453765869,
      "learning_rate": 2.1650088167313177e-07,
      "loss": 0.6911,
      "step": 11590
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8221533894538879,
      "learning_rate": 2.0786200530933387e-07,
      "loss": 0.9594,
      "step": 11595
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.751361072063446,
      "learning_rate": 1.9939883201410826e-07,
      "loss": 0.8091,
      "step": 11600
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.835889458656311,
      "learning_rate": 1.911113766884909e-07,
      "loss": 0.803,
      "step": 11605
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8068159818649292,
      "learning_rate": 1.8299965392413187e-07,
      "loss": 0.8209,
      "step": 11610
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8454260230064392,
      "learning_rate": 1.7506367800325108e-07,
      "loss": 0.8195,
      "step": 11615
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.6896167993545532,
      "learning_rate": 1.6730346289864918e-07,
      "loss": 0.9279,
      "step": 11620
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8151208162307739,
      "learning_rate": 1.597190222736633e-07,
      "loss": 0.8927,
      "step": 11625
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7260826230049133,
      "learning_rate": 1.5231036948215594e-07,
      "loss": 0.8088,
      "step": 11630
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.191455364227295,
      "learning_rate": 1.4507751756845934e-07,
      "loss": 0.9637,
      "step": 11635
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.6957030892372131,
      "learning_rate": 1.380204792673867e-07,
      "loss": 0.9675,
      "step": 11640
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9041639566421509,
      "learning_rate": 1.3113926700420998e-07,
      "loss": 0.9778,
      "step": 11645
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.7494191527366638,
      "learning_rate": 1.2443389289460427e-07,
      "loss": 0.7918,
      "step": 11650
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8155191540718079,
      "learning_rate": 1.1790436874465904e-07,
      "loss": 0.8427,
      "step": 11655
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8684887886047363,
      "learning_rate": 1.1155070605085583e-07,
      "loss": 0.85,
      "step": 11660
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9394787549972534,
      "learning_rate": 1.0537291600000165e-07,
      "loss": 0.82,
      "step": 11665
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.829444169998169,
      "learning_rate": 9.937100946930677e-08,
      "loss": 0.739,
      "step": 11670
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7598903775215149,
      "learning_rate": 9.354499702625141e-08,
      "loss": 0.7957,
      "step": 11675
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6364439725875854,
      "learning_rate": 8.789488892864129e-08,
      "loss": 0.7983,
      "step": 11680
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8551909923553467,
      "learning_rate": 8.242069512456318e-08,
      "loss": 0.8569,
      "step": 11685
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8468892574310303,
      "learning_rate": 7.71224252523961e-08,
      "loss": 0.8113,
      "step": 11690
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7935190200805664,
      "learning_rate": 7.200008864073349e-08,
      "loss": 0.909,
      "step": 11695
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8427583575248718,
      "learning_rate": 6.705369430843878e-08,
      "loss": 0.8116,
      "step": 11700
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6899325847625732,
      "learning_rate": 6.228325096457876e-08,
      "loss": 0.6711,
      "step": 11705
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1237952709197998,
      "learning_rate": 5.7688767008423627e-08,
      "loss": 0.9356,
      "step": 11710
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6556943655014038,
      "learning_rate": 5.327025052943579e-08,
      "loss": 0.7901,
      "step": 11715
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.796808660030365,
      "learning_rate": 4.902770930725886e-08,
      "loss": 0.8344,
      "step": 11720
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8386369943618774,
      "learning_rate": 4.4961150811695384e-08,
      "loss": 0.9573,
      "step": 11725
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8265208005905151,
      "learning_rate": 4.107058220270687e-08,
      "loss": 0.9486,
      "step": 11730
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7974259257316589,
      "learning_rate": 3.735601033035829e-08,
      "loss": 0.8705,
      "step": 11735
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.97637540102005,
      "learning_rate": 3.3817441734862455e-08,
      "loss": 0.8278,
      "step": 11740
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.6852146983146667,
      "learning_rate": 3.045488264656893e-08,
      "loss": 0.7963,
      "step": 11745
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8473973274230957,
      "learning_rate": 2.7268338985875218e-08,
      "loss": 0.843,
      "step": 11750
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7919003367424011,
      "learning_rate": 2.4257816363326692e-08,
      "loss": 0.8883,
      "step": 11755
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7065966129302979,
      "learning_rate": 2.1423320079494435e-08,
      "loss": 0.8417,
      "step": 11760
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.7676900625228882,
      "learning_rate": 1.8764855125052993e-08,
      "loss": 0.9791,
      "step": 11765
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8119387626647949,
      "learning_rate": 1.6282426180758148e-08,
      "loss": 0.9255,
      "step": 11770
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.060429573059082,
      "learning_rate": 1.3976037617380311e-08,
      "loss": 1.0282,
      "step": 11775
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7375126481056213,
      "learning_rate": 1.1845693495760035e-08,
      "loss": 0.7632,
      "step": 11780
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.090340495109558,
      "learning_rate": 9.891397566774708e-09,
      "loss": 0.7868,
      "step": 11785
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.8043749928474426,
      "learning_rate": 8.113153271327446e-09,
      "loss": 0.8244,
      "step": 11790
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.781341552734375,
      "learning_rate": 6.510963740369303e-09,
      "loss": 0.7928,
      "step": 11795
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.6983230710029602,
      "learning_rate": 5.08483179485486e-09,
      "loss": 0.9473,
      "step": 11800
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7805584669113159,
      "learning_rate": 3.83475994575333e-09,
      "loss": 0.825,
      "step": 11805
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7910088300704956,
      "learning_rate": 2.7607503940707546e-09,
      "loss": 0.8383,
      "step": 11810
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7440575361251831,
      "learning_rate": 1.862805030783399e-09,
      "loss": 0.899,
      "step": 11815
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.762624204158783,
      "learning_rate": 1.1409254369154632e-09,
      "loss": 0.8825,
      "step": 11820
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.8033168315887451,
      "learning_rate": 5.951128834613684e-10,
      "loss": 0.8778,
      "step": 11825
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.8624210953712463,
      "learning_rate": 2.2536833143016467e-10,
      "loss": 0.8045,
      "step": 11830
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7830139398574829,
      "learning_rate": 3.169243183442916e-11,
      "loss": 1.0231,
      "step": 11835
    },
    {
      "epoch": 1.0,
      "step": 11838,
      "total_flos": 6.314593069983334e+16,
      "train_loss": 0.0,
      "train_runtime": 0.0111,
      "train_samples_per_second": 1063397.814,
      "train_steps_per_second": 1063397.814
    }
  ],
  "logging_steps": 5,
  "max_steps": 11838,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "total_flos": 6.314593069983334e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}