{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.9991577765300392,
  "eval_steps": 500,
  "global_step": 3561,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0008422234699606962,
      "grad_norm": 5.789886951446533,
      "learning_rate": 2.8011204481792718e-08,
      "loss": 0.8115,
      "step": 1
    },
    {
      "epoch": 0.0016844469399213925,
      "grad_norm": 5.740841865539551,
      "learning_rate": 5.6022408963585437e-08,
      "loss": 0.8182,
      "step": 2
    },
    {
      "epoch": 0.0025266704098820887,
      "grad_norm": 5.808021545410156,
      "learning_rate": 8.403361344537815e-08,
      "loss": 0.8416,
      "step": 3
    },
    {
      "epoch": 0.003368893879842785,
      "grad_norm": 5.797684192657471,
      "learning_rate": 1.1204481792717087e-07,
      "loss": 0.8424,
      "step": 4
    },
    {
      "epoch": 0.004211117349803481,
      "grad_norm": 5.925669193267822,
      "learning_rate": 1.400560224089636e-07,
      "loss": 0.8598,
      "step": 5
    },
    {
      "epoch": 0.0050533408197641775,
      "grad_norm": 5.973628044128418,
      "learning_rate": 1.680672268907563e-07,
      "loss": 0.8674,
      "step": 6
    },
    {
      "epoch": 0.005895564289724873,
      "grad_norm": 6.019296646118164,
      "learning_rate": 1.9607843137254904e-07,
      "loss": 0.8991,
      "step": 7
    },
    {
      "epoch": 0.00673778775968557,
      "grad_norm": 5.99725866317749,
      "learning_rate": 2.2408963585434175e-07,
      "loss": 0.864,
      "step": 8
    },
    {
      "epoch": 0.007580011229646266,
      "grad_norm": 5.903975009918213,
      "learning_rate": 2.5210084033613445e-07,
      "loss": 0.8529,
      "step": 9
    },
    {
      "epoch": 0.008422234699606962,
      "grad_norm": 5.64990234375,
      "learning_rate": 2.801120448179272e-07,
      "loss": 0.8443,
      "step": 10
    },
    {
      "epoch": 0.009264458169567658,
      "grad_norm": 5.645129203796387,
      "learning_rate": 3.081232492997199e-07,
      "loss": 0.8497,
      "step": 11
    },
    {
      "epoch": 0.010106681639528355,
      "grad_norm": 5.82932186126709,
      "learning_rate": 3.361344537815126e-07,
      "loss": 0.8511,
      "step": 12
    },
    {
      "epoch": 0.010948905109489052,
      "grad_norm": 5.648269176483154,
      "learning_rate": 3.641456582633054e-07,
      "loss": 0.8614,
      "step": 13
    },
    {
      "epoch": 0.011791128579449747,
      "grad_norm": 5.631472110748291,
      "learning_rate": 3.921568627450981e-07,
      "loss": 0.871,
      "step": 14
    },
    {
      "epoch": 0.012633352049410443,
      "grad_norm": 5.537994384765625,
      "learning_rate": 4.201680672268908e-07,
      "loss": 0.863,
      "step": 15
    },
    {
      "epoch": 0.01347557551937114,
      "grad_norm": 5.148649215698242,
      "learning_rate": 4.481792717086835e-07,
      "loss": 0.7878,
      "step": 16
    },
    {
      "epoch": 0.014317798989331837,
      "grad_norm": 5.496405601501465,
      "learning_rate": 4.7619047619047623e-07,
      "loss": 0.867,
      "step": 17
    },
    {
      "epoch": 0.015160022459292532,
      "grad_norm": 5.026359558105469,
      "learning_rate": 5.042016806722689e-07,
      "loss": 0.8244,
      "step": 18
    },
    {
      "epoch": 0.016002245929253228,
      "grad_norm": 4.364068031311035,
      "learning_rate": 5.322128851540616e-07,
      "loss": 0.8214,
      "step": 19
    },
    {
      "epoch": 0.016844469399213923,
      "grad_norm": 4.4918904304504395,
      "learning_rate": 5.602240896358544e-07,
      "loss": 0.8024,
      "step": 20
    },
    {
      "epoch": 0.01768669286917462,
      "grad_norm": 4.242900371551514,
      "learning_rate": 5.882352941176471e-07,
      "loss": 0.8066,
      "step": 21
    },
    {
      "epoch": 0.018528916339135316,
      "grad_norm": 4.09971284866333,
      "learning_rate": 6.162464985994398e-07,
      "loss": 0.7916,
      "step": 22
    },
    {
      "epoch": 0.019371139809096015,
      "grad_norm": 4.071753025054932,
      "learning_rate": 6.442577030812325e-07,
      "loss": 0.7761,
      "step": 23
    },
    {
      "epoch": 0.02021336327905671,
      "grad_norm": 4.0924296379089355,
      "learning_rate": 6.722689075630252e-07,
      "loss": 0.8152,
      "step": 24
    },
    {
      "epoch": 0.021055586749017405,
      "grad_norm": 2.9275577068328857,
      "learning_rate": 7.002801120448179e-07,
      "loss": 0.7976,
      "step": 25
    },
    {
      "epoch": 0.021897810218978103,
      "grad_norm": 2.3993306159973145,
      "learning_rate": 7.282913165266108e-07,
      "loss": 0.8081,
      "step": 26
    },
    {
      "epoch": 0.022740033688938798,
      "grad_norm": 2.2805216312408447,
      "learning_rate": 7.563025210084034e-07,
      "loss": 0.7708,
      "step": 27
    },
    {
      "epoch": 0.023582257158899493,
      "grad_norm": 2.2916431427001953,
      "learning_rate": 7.843137254901962e-07,
      "loss": 0.7625,
      "step": 28
    },
    {
      "epoch": 0.02442448062886019,
      "grad_norm": 2.189636468887329,
      "learning_rate": 8.123249299719889e-07,
      "loss": 0.7308,
      "step": 29
    },
    {
      "epoch": 0.025266704098820886,
      "grad_norm": 2.1304352283477783,
      "learning_rate": 8.403361344537816e-07,
      "loss": 0.7876,
      "step": 30
    },
    {
      "epoch": 0.026108927568781585,
      "grad_norm": 2.0298783779144287,
      "learning_rate": 8.683473389355742e-07,
      "loss": 0.75,
      "step": 31
    },
    {
      "epoch": 0.02695115103874228,
      "grad_norm": 1.9074889421463013,
      "learning_rate": 8.96358543417367e-07,
      "loss": 0.7543,
      "step": 32
    },
    {
      "epoch": 0.027793374508702975,
      "grad_norm": 1.7056124210357666,
      "learning_rate": 9.243697478991598e-07,
      "loss": 0.739,
      "step": 33
    },
    {
      "epoch": 0.028635597978663673,
      "grad_norm": 1.4362610578536987,
      "learning_rate": 9.523809523809525e-07,
      "loss": 0.7185,
      "step": 34
    },
    {
      "epoch": 0.029477821448624368,
      "grad_norm": 1.774840235710144,
      "learning_rate": 9.80392156862745e-07,
      "loss": 0.7459,
      "step": 35
    },
    {
      "epoch": 0.030320044918585063,
      "grad_norm": 2.0444815158843994,
      "learning_rate": 1.0084033613445378e-06,
      "loss": 0.7009,
      "step": 36
    },
    {
      "epoch": 0.03116226838854576,
      "grad_norm": 2.1377086639404297,
      "learning_rate": 1.0364145658263308e-06,
      "loss": 0.6827,
      "step": 37
    },
    {
      "epoch": 0.032004491858506456,
      "grad_norm": 2.285421371459961,
      "learning_rate": 1.0644257703081233e-06,
      "loss": 0.7261,
      "step": 38
    },
    {
      "epoch": 0.032846715328467155,
      "grad_norm": 2.213207244873047,
      "learning_rate": 1.092436974789916e-06,
      "loss": 0.7263,
      "step": 39
    },
    {
      "epoch": 0.033688938798427846,
      "grad_norm": 1.962202787399292,
      "learning_rate": 1.1204481792717088e-06,
      "loss": 0.6892,
      "step": 40
    },
    {
      "epoch": 0.034531162268388545,
      "grad_norm": 1.8724396228790283,
      "learning_rate": 1.1484593837535015e-06,
      "loss": 0.7065,
      "step": 41
    },
    {
      "epoch": 0.03537338573834924,
      "grad_norm": 1.599852442741394,
      "learning_rate": 1.1764705882352942e-06,
      "loss": 0.6902,
      "step": 42
    },
    {
      "epoch": 0.03621560920830994,
      "grad_norm": 1.5018494129180908,
      "learning_rate": 1.204481792717087e-06,
      "loss": 0.7039,
      "step": 43
    },
    {
      "epoch": 0.03705783267827063,
      "grad_norm": 1.1314431428909302,
      "learning_rate": 1.2324929971988797e-06,
      "loss": 0.673,
      "step": 44
    },
    {
      "epoch": 0.03790005614823133,
      "grad_norm": 0.9867624044418335,
      "learning_rate": 1.2605042016806724e-06,
      "loss": 0.6989,
      "step": 45
    },
    {
      "epoch": 0.03874227961819203,
      "grad_norm": 0.874235212802887,
      "learning_rate": 1.288515406162465e-06,
      "loss": 0.6687,
      "step": 46
    },
    {
      "epoch": 0.03958450308815272,
      "grad_norm": 0.9184491038322449,
      "learning_rate": 1.316526610644258e-06,
      "loss": 0.6974,
      "step": 47
    },
    {
      "epoch": 0.04042672655811342,
      "grad_norm": 1.1418858766555786,
      "learning_rate": 1.3445378151260504e-06,
      "loss": 0.6473,
      "step": 48
    },
    {
      "epoch": 0.04126895002807412,
      "grad_norm": 1.0575696229934692,
      "learning_rate": 1.3725490196078434e-06,
      "loss": 0.6673,
      "step": 49
    },
    {
      "epoch": 0.04211117349803481,
      "grad_norm": 1.0372340679168701,
      "learning_rate": 1.4005602240896359e-06,
      "loss": 0.6159,
      "step": 50
    },
    {
      "epoch": 0.04295339696799551,
      "grad_norm": 0.8753142952919006,
      "learning_rate": 1.4285714285714286e-06,
      "loss": 0.6496,
      "step": 51
    },
    {
      "epoch": 0.043795620437956206,
      "grad_norm": 0.7866713404655457,
      "learning_rate": 1.4565826330532216e-06,
      "loss": 0.6484,
      "step": 52
    },
    {
      "epoch": 0.0446378439079169,
      "grad_norm": 0.902646541595459,
      "learning_rate": 1.484593837535014e-06,
      "loss": 0.6761,
      "step": 53
    },
    {
      "epoch": 0.045480067377877596,
      "grad_norm": 0.7898141741752625,
      "learning_rate": 1.5126050420168068e-06,
      "loss": 0.6644,
      "step": 54
    },
    {
      "epoch": 0.046322290847838295,
      "grad_norm": 0.8646948337554932,
      "learning_rate": 1.5406162464985996e-06,
      "loss": 0.6655,
      "step": 55
    },
    {
      "epoch": 0.047164514317798986,
      "grad_norm": 0.6505382061004639,
      "learning_rate": 1.5686274509803923e-06,
      "loss": 0.6467,
      "step": 56
    },
    {
      "epoch": 0.048006737787759685,
      "grad_norm": 0.5825474262237549,
      "learning_rate": 1.5966386554621848e-06,
      "loss": 0.6392,
      "step": 57
    },
    {
      "epoch": 0.04884896125772038,
      "grad_norm": 0.6559675931930542,
      "learning_rate": 1.6246498599439778e-06,
      "loss": 0.6254,
      "step": 58
    },
    {
      "epoch": 0.04969118472768108,
      "grad_norm": 0.5996388792991638,
      "learning_rate": 1.6526610644257705e-06,
      "loss": 0.5868,
      "step": 59
    },
    {
      "epoch": 0.05053340819764177,
      "grad_norm": 0.6724130511283875,
      "learning_rate": 1.6806722689075632e-06,
      "loss": 0.6636,
      "step": 60
    },
    {
      "epoch": 0.05137563166760247,
      "grad_norm": 0.6920154690742493,
      "learning_rate": 1.708683473389356e-06,
      "loss": 0.6199,
      "step": 61
    },
    {
      "epoch": 0.05221785513756317,
      "grad_norm": 0.540673017501831,
      "learning_rate": 1.7366946778711485e-06,
      "loss": 0.6095,
      "step": 62
    },
    {
      "epoch": 0.05306007860752386,
      "grad_norm": 0.5125397443771362,
      "learning_rate": 1.7647058823529414e-06,
      "loss": 0.6205,
      "step": 63
    },
    {
      "epoch": 0.05390230207748456,
      "grad_norm": 0.5275012850761414,
      "learning_rate": 1.792717086834734e-06,
      "loss": 0.5832,
      "step": 64
    },
    {
      "epoch": 0.05474452554744526,
      "grad_norm": 0.5311269760131836,
      "learning_rate": 1.8207282913165267e-06,
      "loss": 0.5914,
      "step": 65
    },
    {
      "epoch": 0.05558674901740595,
      "grad_norm": 0.5057007670402527,
      "learning_rate": 1.8487394957983196e-06,
      "loss": 0.6166,
      "step": 66
    },
    {
      "epoch": 0.05642897248736665,
      "grad_norm": 0.481393426656723,
      "learning_rate": 1.8767507002801122e-06,
      "loss": 0.5915,
      "step": 67
    },
    {
      "epoch": 0.057271195957327346,
      "grad_norm": 0.4921214282512665,
      "learning_rate": 1.904761904761905e-06,
      "loss": 0.5782,
      "step": 68
    },
    {
      "epoch": 0.05811341942728804,
      "grad_norm": 0.4925113916397095,
      "learning_rate": 1.932773109243698e-06,
      "loss": 0.6009,
      "step": 69
    },
    {
      "epoch": 0.058955642897248736,
      "grad_norm": 0.44187214970588684,
      "learning_rate": 1.96078431372549e-06,
      "loss": 0.5834,
      "step": 70
    },
    {
      "epoch": 0.059797866367209435,
      "grad_norm": 0.4849870502948761,
      "learning_rate": 1.988795518207283e-06,
      "loss": 0.6063,
      "step": 71
    },
    {
      "epoch": 0.060640089837170126,
      "grad_norm": 0.4766691327095032,
      "learning_rate": 2.0168067226890756e-06,
      "loss": 0.6041,
      "step": 72
    },
    {
      "epoch": 0.061482313307130824,
      "grad_norm": 0.43484383821487427,
      "learning_rate": 2.0448179271708684e-06,
      "loss": 0.601,
      "step": 73
    },
    {
      "epoch": 0.06232453677709152,
      "grad_norm": 0.4051569104194641,
      "learning_rate": 2.0728291316526615e-06,
      "loss": 0.5901,
      "step": 74
    },
    {
      "epoch": 0.06316676024705221,
      "grad_norm": 0.4978720247745514,
      "learning_rate": 2.100840336134454e-06,
      "loss": 0.5714,
      "step": 75
    },
    {
      "epoch": 0.06400898371701291,
      "grad_norm": 0.4194071292877197,
      "learning_rate": 2.1288515406162466e-06,
      "loss": 0.5577,
      "step": 76
    },
    {
      "epoch": 0.06485120718697361,
      "grad_norm": 0.41055306792259216,
      "learning_rate": 2.1568627450980393e-06,
      "loss": 0.5773,
      "step": 77
    },
    {
      "epoch": 0.06569343065693431,
      "grad_norm": 0.4237929582595825,
      "learning_rate": 2.184873949579832e-06,
      "loss": 0.6204,
      "step": 78
    },
    {
      "epoch": 0.06653565412689501,
      "grad_norm": 0.4256727695465088,
      "learning_rate": 2.2128851540616248e-06,
      "loss": 0.5684,
      "step": 79
    },
    {
      "epoch": 0.06737787759685569,
      "grad_norm": 0.43531617522239685,
      "learning_rate": 2.2408963585434175e-06,
      "loss": 0.5994,
      "step": 80
    },
    {
      "epoch": 0.06822010106681639,
      "grad_norm": 0.489454060792923,
      "learning_rate": 2.2689075630252102e-06,
      "loss": 0.5825,
      "step": 81
    },
    {
      "epoch": 0.06906232453677709,
      "grad_norm": 0.46862828731536865,
      "learning_rate": 2.296918767507003e-06,
      "loss": 0.5829,
      "step": 82
    },
    {
      "epoch": 0.06990454800673779,
      "grad_norm": 0.3990698754787445,
      "learning_rate": 2.3249299719887957e-06,
      "loss": 0.5526,
      "step": 83
    },
    {
      "epoch": 0.07074677147669849,
      "grad_norm": 0.41348353028297424,
      "learning_rate": 2.3529411764705885e-06,
      "loss": 0.58,
      "step": 84
    },
    {
      "epoch": 0.07158899494665918,
      "grad_norm": 0.4134857952594757,
      "learning_rate": 2.380952380952381e-06,
      "loss": 0.5564,
      "step": 85
    },
    {
      "epoch": 0.07243121841661988,
      "grad_norm": 0.4596283733844757,
      "learning_rate": 2.408963585434174e-06,
      "loss": 0.6039,
      "step": 86
    },
    {
      "epoch": 0.07327344188658057,
      "grad_norm": 0.43277508020401,
      "learning_rate": 2.4369747899159667e-06,
      "loss": 0.577,
      "step": 87
    },
    {
      "epoch": 0.07411566535654127,
      "grad_norm": 0.4344329237937927,
      "learning_rate": 2.4649859943977594e-06,
      "loss": 0.5984,
      "step": 88
    },
    {
      "epoch": 0.07495788882650196,
      "grad_norm": 0.4119739830493927,
      "learning_rate": 2.492997198879552e-06,
      "loss": 0.5857,
      "step": 89
    },
    {
      "epoch": 0.07580011229646266,
      "grad_norm": 0.40994206070899963,
      "learning_rate": 2.521008403361345e-06,
      "loss": 0.6026,
      "step": 90
    },
    {
      "epoch": 0.07664233576642336,
      "grad_norm": 0.4059639573097229,
      "learning_rate": 2.549019607843137e-06,
      "loss": 0.5647,
      "step": 91
    },
    {
      "epoch": 0.07748455923638406,
      "grad_norm": 0.39020976424217224,
      "learning_rate": 2.57703081232493e-06,
      "loss": 0.5605,
      "step": 92
    },
    {
      "epoch": 0.07832678270634474,
      "grad_norm": 0.44847968220710754,
      "learning_rate": 2.605042016806723e-06,
      "loss": 0.5663,
      "step": 93
    },
    {
      "epoch": 0.07916900617630544,
      "grad_norm": 0.4272569715976715,
      "learning_rate": 2.633053221288516e-06,
      "loss": 0.5949,
      "step": 94
    },
    {
      "epoch": 0.08001122964626614,
      "grad_norm": 0.44790881872177124,
      "learning_rate": 2.6610644257703085e-06,
      "loss": 0.564,
      "step": 95
    },
    {
      "epoch": 0.08085345311622684,
      "grad_norm": 0.37310442328453064,
      "learning_rate": 2.689075630252101e-06,
      "loss": 0.557,
      "step": 96
    },
    {
      "epoch": 0.08169567658618754,
      "grad_norm": 0.45554637908935547,
      "learning_rate": 2.7170868347338936e-06,
      "loss": 0.5886,
      "step": 97
    },
    {
      "epoch": 0.08253790005614824,
      "grad_norm": 0.3916391432285309,
      "learning_rate": 2.7450980392156867e-06,
      "loss": 0.5487,
      "step": 98
    },
    {
      "epoch": 0.08338012352610892,
      "grad_norm": 0.4407063126564026,
      "learning_rate": 2.7731092436974795e-06,
      "loss": 0.5438,
      "step": 99
    },
    {
      "epoch": 0.08422234699606962,
      "grad_norm": 0.4014075994491577,
      "learning_rate": 2.8011204481792718e-06,
      "loss": 0.5848,
      "step": 100
    },
    {
      "epoch": 0.08506457046603032,
      "grad_norm": 0.42044365406036377,
      "learning_rate": 2.8291316526610645e-06,
      "loss": 0.5648,
      "step": 101
    },
    {
      "epoch": 0.08590679393599102,
      "grad_norm": 0.392103374004364,
      "learning_rate": 2.8571428571428573e-06,
      "loss": 0.5917,
      "step": 102
    },
    {
      "epoch": 0.08674901740595171,
      "grad_norm": 0.4008248448371887,
      "learning_rate": 2.88515406162465e-06,
      "loss": 0.5539,
      "step": 103
    },
    {
      "epoch": 0.08759124087591241,
      "grad_norm": 0.4122604727745056,
      "learning_rate": 2.913165266106443e-06,
      "loss": 0.5861,
      "step": 104
    },
    {
      "epoch": 0.08843346434587311,
      "grad_norm": 0.4432787597179413,
      "learning_rate": 2.9411764705882355e-06,
      "loss": 0.5663,
      "step": 105
    },
    {
      "epoch": 0.0892756878158338,
      "grad_norm": 0.38645657896995544,
      "learning_rate": 2.969187675070028e-06,
      "loss": 0.5403,
      "step": 106
    },
    {
      "epoch": 0.0901179112857945,
      "grad_norm": 0.41515976190567017,
      "learning_rate": 2.997198879551821e-06,
      "loss": 0.547,
      "step": 107
    },
    {
      "epoch": 0.09096013475575519,
      "grad_norm": 0.423387348651886,
      "learning_rate": 3.0252100840336137e-06,
      "loss": 0.5629,
      "step": 108
    },
    {
      "epoch": 0.09180235822571589,
      "grad_norm": 0.4305417835712433,
      "learning_rate": 3.053221288515407e-06,
      "loss": 0.5629,
      "step": 109
    },
    {
      "epoch": 0.09264458169567659,
      "grad_norm": 0.4247176945209503,
      "learning_rate": 3.081232492997199e-06,
      "loss": 0.5575,
      "step": 110
    },
    {
      "epoch": 0.09348680516563729,
      "grad_norm": 0.41834551095962524,
      "learning_rate": 3.109243697478992e-06,
      "loss": 0.5771,
      "step": 111
    },
    {
      "epoch": 0.09432902863559797,
      "grad_norm": 0.413619726896286,
      "learning_rate": 3.1372549019607846e-06,
      "loss": 0.5402,
      "step": 112
    },
    {
      "epoch": 0.09517125210555867,
      "grad_norm": 0.46302708983421326,
      "learning_rate": 3.1652661064425773e-06,
      "loss": 0.561,
      "step": 113
    },
    {
      "epoch": 0.09601347557551937,
      "grad_norm": 0.416839063167572,
      "learning_rate": 3.1932773109243696e-06,
      "loss": 0.5635,
      "step": 114
    },
    {
      "epoch": 0.09685569904548007,
      "grad_norm": 0.3974224328994751,
      "learning_rate": 3.221288515406163e-06,
      "loss": 0.5441,
      "step": 115
    },
    {
      "epoch": 0.09769792251544077,
      "grad_norm": 0.4101355969905853,
      "learning_rate": 3.2492997198879555e-06,
      "loss": 0.5425,
      "step": 116
    },
    {
      "epoch": 0.09854014598540146,
      "grad_norm": 0.39879581332206726,
      "learning_rate": 3.2773109243697483e-06,
      "loss": 0.5573,
      "step": 117
    },
    {
      "epoch": 0.09938236945536216,
      "grad_norm": 0.4118677079677582,
      "learning_rate": 3.305322128851541e-06,
      "loss": 0.5518,
      "step": 118
    },
    {
      "epoch": 0.10022459292532285,
      "grad_norm": 0.41027089953422546,
      "learning_rate": 3.3333333333333333e-06,
      "loss": 0.5556,
      "step": 119
    },
    {
      "epoch": 0.10106681639528355,
      "grad_norm": 0.3836262822151184,
      "learning_rate": 3.3613445378151265e-06,
      "loss": 0.5415,
      "step": 120
    },
    {
      "epoch": 0.10190903986524424,
      "grad_norm": 0.3906749188899994,
      "learning_rate": 3.3893557422969192e-06,
      "loss": 0.5761,
      "step": 121
    },
    {
      "epoch": 0.10275126333520494,
      "grad_norm": 0.38707324862480164,
      "learning_rate": 3.417366946778712e-06,
      "loss": 0.5404,
      "step": 122
    },
    {
      "epoch": 0.10359348680516564,
      "grad_norm": 0.37047573924064636,
      "learning_rate": 3.4453781512605043e-06,
      "loss": 0.5323,
      "step": 123
    },
    {
      "epoch": 0.10443571027512634,
      "grad_norm": 0.3858558237552643,
      "learning_rate": 3.473389355742297e-06,
      "loss": 0.5157,
      "step": 124
    },
    {
      "epoch": 0.10527793374508702,
      "grad_norm": 0.3803700804710388,
      "learning_rate": 3.5014005602240897e-06,
      "loss": 0.5336,
      "step": 125
    },
    {
      "epoch": 0.10612015721504772,
      "grad_norm": 0.43880030512809753,
      "learning_rate": 3.529411764705883e-06,
      "loss": 0.5863,
      "step": 126
    },
    {
      "epoch": 0.10696238068500842,
      "grad_norm": 0.3891538679599762,
      "learning_rate": 3.5574229691876756e-06,
      "loss": 0.5462,
      "step": 127
    },
    {
      "epoch": 0.10780460415496912,
      "grad_norm": 0.40231987833976746,
      "learning_rate": 3.585434173669468e-06,
      "loss": 0.5239,
      "step": 128
    },
    {
      "epoch": 0.10864682762492982,
      "grad_norm": 0.3876766562461853,
      "learning_rate": 3.6134453781512607e-06,
      "loss": 0.5374,
      "step": 129
    },
    {
      "epoch": 0.10948905109489052,
      "grad_norm": 0.40061232447624207,
      "learning_rate": 3.6414565826330534e-06,
      "loss": 0.5677,
      "step": 130
    },
    {
      "epoch": 0.1103312745648512,
      "grad_norm": 0.3698410391807556,
      "learning_rate": 3.669467787114846e-06,
      "loss": 0.5452,
      "step": 131
    },
    {
      "epoch": 0.1111734980348119,
      "grad_norm": 0.3980056941509247,
      "learning_rate": 3.6974789915966393e-06,
      "loss": 0.5272,
      "step": 132
    },
    {
      "epoch": 0.1120157215047726,
      "grad_norm": 0.45054540038108826,
      "learning_rate": 3.7254901960784316e-06,
      "loss": 0.5525,
      "step": 133
    },
    {
      "epoch": 0.1128579449747333,
      "grad_norm": 0.38893014192581177,
      "learning_rate": 3.7535014005602243e-06,
      "loss": 0.5519,
      "step": 134
    },
    {
      "epoch": 0.113700168444694,
      "grad_norm": 0.39634814858436584,
      "learning_rate": 3.781512605042017e-06,
      "loss": 0.515,
      "step": 135
    },
    {
      "epoch": 0.11454239191465469,
      "grad_norm": 0.4353926181793213,
      "learning_rate": 3.80952380952381e-06,
      "loss": 0.5606,
      "step": 136
    },
    {
      "epoch": 0.11538461538461539,
      "grad_norm": 0.4090767204761505,
      "learning_rate": 3.8375350140056026e-06,
      "loss": 0.5165,
      "step": 137
    },
    {
      "epoch": 0.11622683885457608,
      "grad_norm": 0.42739954590797424,
      "learning_rate": 3.865546218487396e-06,
      "loss": 0.5409,
      "step": 138
    },
    {
      "epoch": 0.11706906232453677,
      "grad_norm": 0.3913635313510895,
      "learning_rate": 3.893557422969188e-06,
      "loss": 0.5,
      "step": 139
    },
    {
      "epoch": 0.11791128579449747,
      "grad_norm": 0.41620737314224243,
      "learning_rate": 3.92156862745098e-06,
      "loss": 0.5424,
      "step": 140
    },
    {
      "epoch": 0.11875350926445817,
      "grad_norm": 0.43139201402664185,
      "learning_rate": 3.9495798319327735e-06,
      "loss": 0.5438,
      "step": 141
    },
    {
      "epoch": 0.11959573273441887,
      "grad_norm": 0.44021180272102356,
      "learning_rate": 3.977591036414566e-06,
      "loss": 0.5302,
      "step": 142
    },
    {
      "epoch": 0.12043795620437957,
      "grad_norm": 0.38332462310791016,
      "learning_rate": 4.005602240896359e-06,
      "loss": 0.5179,
      "step": 143
    },
    {
      "epoch": 0.12128017967434025,
      "grad_norm": 0.4462721049785614,
      "learning_rate": 4.033613445378151e-06,
      "loss": 0.5536,
      "step": 144
    },
    {
      "epoch": 0.12212240314430095,
      "grad_norm": 0.4713367819786072,
      "learning_rate": 4.0616246498599444e-06,
      "loss": 0.5378,
      "step": 145
    },
    {
      "epoch": 0.12296462661426165,
      "grad_norm": 0.395902544260025,
      "learning_rate": 4.089635854341737e-06,
      "loss": 0.5401,
      "step": 146
    },
    {
      "epoch": 0.12380685008422235,
      "grad_norm": 0.40437230467796326,
      "learning_rate": 4.11764705882353e-06,
      "loss": 0.5353,
      "step": 147
    },
    {
      "epoch": 0.12464907355418305,
      "grad_norm": 0.45917031168937683,
      "learning_rate": 4.145658263305323e-06,
      "loss": 0.5313,
      "step": 148
    },
    {
      "epoch": 0.12549129702414374,
      "grad_norm": 0.4625679552555084,
      "learning_rate": 4.173669467787115e-06,
      "loss": 0.5394,
      "step": 149
    },
    {
      "epoch": 0.12633352049410443,
      "grad_norm": 0.4416869580745697,
      "learning_rate": 4.201680672268908e-06,
      "loss": 0.5639,
      "step": 150
    },
    {
      "epoch": 0.12717574396406514,
      "grad_norm": 0.4602159559726715,
      "learning_rate": 4.229691876750701e-06,
      "loss": 0.5028,
      "step": 151
    },
    {
      "epoch": 0.12801796743402583,
      "grad_norm": 0.4193742871284485,
      "learning_rate": 4.257703081232493e-06,
      "loss": 0.5306,
      "step": 152
    },
    {
      "epoch": 0.12886019090398654,
      "grad_norm": 0.42938506603240967,
      "learning_rate": 4.2857142857142855e-06,
      "loss": 0.5476,
      "step": 153
    },
    {
      "epoch": 0.12970241437394722,
      "grad_norm": 0.5189563035964966,
      "learning_rate": 4.313725490196079e-06,
      "loss": 0.5414,
      "step": 154
    },
    {
      "epoch": 0.1305446378439079,
      "grad_norm": 0.4153963625431061,
      "learning_rate": 4.341736694677872e-06,
      "loss": 0.561,
      "step": 155
    },
    {
      "epoch": 0.13138686131386862,
      "grad_norm": 0.41574448347091675,
      "learning_rate": 4.369747899159664e-06,
      "loss": 0.5249,
      "step": 156
    },
    {
      "epoch": 0.1322290847838293,
      "grad_norm": 0.5603359937667847,
      "learning_rate": 4.397759103641457e-06,
      "loss": 0.5569,
      "step": 157
    },
    {
      "epoch": 0.13307130825379002,
      "grad_norm": 0.4055115580558777,
      "learning_rate": 4.4257703081232496e-06,
      "loss": 0.5235,
      "step": 158
    },
    {
      "epoch": 0.1339135317237507,
      "grad_norm": 0.4988480508327484,
      "learning_rate": 4.453781512605043e-06,
      "loss": 0.5554,
      "step": 159
    },
    {
      "epoch": 0.13475575519371139,
      "grad_norm": 0.5111923813819885,
      "learning_rate": 4.481792717086835e-06,
      "loss": 0.5339,
      "step": 160
    },
    {
      "epoch": 0.1355979786636721,
      "grad_norm": 0.4302005469799042,
      "learning_rate": 4.509803921568628e-06,
      "loss": 0.555,
      "step": 161
    },
    {
      "epoch": 0.13644020213363278,
      "grad_norm": 0.4389697313308716,
      "learning_rate": 4.5378151260504205e-06,
      "loss": 0.5192,
      "step": 162
    },
    {
      "epoch": 0.1372824256035935,
      "grad_norm": 0.4716448187828064,
      "learning_rate": 4.565826330532213e-06,
      "loss": 0.5197,
      "step": 163
    },
    {
      "epoch": 0.13812464907355418,
      "grad_norm": 0.47341540455818176,
      "learning_rate": 4.593837535014006e-06,
      "loss": 0.5695,
      "step": 164
    },
    {
      "epoch": 0.1389668725435149,
      "grad_norm": 0.48273712396621704,
      "learning_rate": 4.621848739495799e-06,
      "loss": 0.5186,
      "step": 165
    },
    {
      "epoch": 0.13980909601347558,
      "grad_norm": 0.4246666431427002,
      "learning_rate": 4.6498599439775914e-06,
      "loss": 0.5599,
      "step": 166
    },
    {
      "epoch": 0.14065131948343626,
      "grad_norm": 0.41264545917510986,
      "learning_rate": 4.677871148459384e-06,
      "loss": 0.5305,
      "step": 167
    },
    {
      "epoch": 0.14149354295339697,
      "grad_norm": 0.44070708751678467,
      "learning_rate": 4.705882352941177e-06,
      "loss": 0.5527,
      "step": 168
    },
    {
      "epoch": 0.14233576642335766,
      "grad_norm": 0.44124457240104675,
      "learning_rate": 4.733893557422969e-06,
      "loss": 0.5331,
      "step": 169
    },
    {
      "epoch": 0.14317798989331837,
      "grad_norm": 0.39877748489379883,
      "learning_rate": 4.761904761904762e-06,
      "loss": 0.5315,
      "step": 170
    },
    {
      "epoch": 0.14402021336327905,
      "grad_norm": 0.41009920835494995,
      "learning_rate": 4.7899159663865555e-06,
      "loss": 0.5144,
      "step": 171
    },
    {
      "epoch": 0.14486243683323977,
      "grad_norm": 0.4817536473274231,
      "learning_rate": 4.817927170868348e-06,
      "loss": 0.5478,
      "step": 172
    },
    {
      "epoch": 0.14570466030320045,
      "grad_norm": 0.4315141439437866,
      "learning_rate": 4.84593837535014e-06,
      "loss": 0.5296,
      "step": 173
    },
    {
      "epoch": 0.14654688377316114,
      "grad_norm": 0.3853152096271515,
      "learning_rate": 4.873949579831933e-06,
      "loss": 0.4998,
      "step": 174
    },
    {
      "epoch": 0.14738910724312185,
      "grad_norm": 0.5280141830444336,
      "learning_rate": 4.901960784313726e-06,
      "loss": 0.5358,
      "step": 175
    },
    {
      "epoch": 0.14823133071308253,
      "grad_norm": 0.4142664968967438,
      "learning_rate": 4.929971988795519e-06,
      "loss": 0.5176,
      "step": 176
    },
    {
      "epoch": 0.14907355418304324,
      "grad_norm": 0.4857929050922394,
      "learning_rate": 4.957983193277311e-06,
      "loss": 0.5266,
      "step": 177
    },
    {
      "epoch": 0.14991577765300393,
      "grad_norm": 0.4851177930831909,
      "learning_rate": 4.985994397759104e-06,
      "loss": 0.5706,
      "step": 178
    },
    {
      "epoch": 0.1507580011229646,
      "grad_norm": 0.44848182797431946,
      "learning_rate": 5.0140056022408966e-06,
      "loss": 0.5159,
      "step": 179
    },
    {
      "epoch": 0.15160022459292533,
      "grad_norm": 0.44422101974487305,
      "learning_rate": 5.04201680672269e-06,
      "loss": 0.5314,
      "step": 180
    },
    {
      "epoch": 0.152442448062886,
      "grad_norm": 0.4084372818470001,
      "learning_rate": 5.070028011204482e-06,
      "loss": 0.492,
      "step": 181
    },
    {
      "epoch": 0.15328467153284672,
      "grad_norm": 0.42204394936561584,
      "learning_rate": 5.098039215686274e-06,
      "loss": 0.5325,
      "step": 182
    },
    {
      "epoch": 0.1541268950028074,
      "grad_norm": 0.4422619640827179,
      "learning_rate": 5.1260504201680675e-06,
      "loss": 0.5245,
      "step": 183
    },
    {
      "epoch": 0.15496911847276812,
      "grad_norm": 0.44035136699676514,
      "learning_rate": 5.15406162464986e-06,
      "loss": 0.5426,
      "step": 184
    },
    {
      "epoch": 0.1558113419427288,
      "grad_norm": 0.4643510580062866,
      "learning_rate": 5.182072829131654e-06,
      "loss": 0.5566,
      "step": 185
    },
    {
      "epoch": 0.1566535654126895,
      "grad_norm": 0.4454789161682129,
      "learning_rate": 5.210084033613446e-06,
      "loss": 0.5156,
      "step": 186
    },
    {
      "epoch": 0.1574957888826502,
      "grad_norm": 0.4297592043876648,
      "learning_rate": 5.2380952380952384e-06,
      "loss": 0.4917,
      "step": 187
    },
    {
      "epoch": 0.15833801235261089,
      "grad_norm": 0.44328317046165466,
      "learning_rate": 5.266106442577032e-06,
      "loss": 0.5169,
      "step": 188
    },
    {
      "epoch": 0.1591802358225716,
      "grad_norm": 0.47328364849090576,
      "learning_rate": 5.294117647058824e-06,
      "loss": 0.563,
      "step": 189
    },
    {
      "epoch": 0.16002245929253228,
      "grad_norm": 0.4263771176338196,
      "learning_rate": 5.322128851540617e-06,
      "loss": 0.519,
      "step": 190
    },
    {
      "epoch": 0.160864682762493,
      "grad_norm": 0.4457511305809021,
      "learning_rate": 5.350140056022409e-06,
      "loss": 0.4977,
      "step": 191
    },
    {
      "epoch": 0.16170690623245368,
      "grad_norm": 0.443337082862854,
      "learning_rate": 5.378151260504202e-06,
      "loss": 0.5109,
      "step": 192
    },
    {
      "epoch": 0.16254912970241436,
      "grad_norm": 0.40707576274871826,
      "learning_rate": 5.406162464985995e-06,
      "loss": 0.5108,
      "step": 193
    },
    {
      "epoch": 0.16339135317237508,
      "grad_norm": 0.38914766907691956,
      "learning_rate": 5.434173669467787e-06,
      "loss": 0.5126,
      "step": 194
    },
    {
      "epoch": 0.16423357664233576,
      "grad_norm": 0.4302234947681427,
      "learning_rate": 5.4621848739495795e-06,
      "loss": 0.5152,
      "step": 195
    },
    {
      "epoch": 0.16507580011229647,
      "grad_norm": 0.44681474566459656,
      "learning_rate": 5.4901960784313735e-06,
      "loss": 0.4944,
      "step": 196
    },
    {
      "epoch": 0.16591802358225716,
      "grad_norm": 0.41282978653907776,
      "learning_rate": 5.518207282913166e-06,
      "loss": 0.5329,
      "step": 197
    },
    {
      "epoch": 0.16676024705221784,
      "grad_norm": 0.3922782838344574,
      "learning_rate": 5.546218487394959e-06,
      "loss": 0.5343,
      "step": 198
    },
    {
      "epoch": 0.16760247052217855,
      "grad_norm": 0.4058583676815033,
      "learning_rate": 5.574229691876751e-06,
      "loss": 0.5162,
      "step": 199
    },
    {
      "epoch": 0.16844469399213924,
      "grad_norm": 0.4978298544883728,
      "learning_rate": 5.6022408963585436e-06,
      "loss": 0.5363,
      "step": 200
    },
    {
      "epoch": 0.16928691746209995,
      "grad_norm": 0.4164210855960846,
      "learning_rate": 5.630252100840337e-06,
      "loss": 0.5221,
      "step": 201
    },
    {
      "epoch": 0.17012914093206064,
      "grad_norm": 0.4599871337413788,
      "learning_rate": 5.658263305322129e-06,
      "loss": 0.4968,
      "step": 202
    },
    {
      "epoch": 0.17097136440202135,
      "grad_norm": 0.4404202699661255,
      "learning_rate": 5.686274509803922e-06,
      "loss": 0.505,
      "step": 203
    },
    {
      "epoch": 0.17181358787198203,
      "grad_norm": 0.4543462097644806,
      "learning_rate": 5.7142857142857145e-06,
      "loss": 0.5492,
      "step": 204
    },
    {
      "epoch": 0.17265581134194272,
      "grad_norm": 0.4060283899307251,
      "learning_rate": 5.742296918767507e-06,
      "loss": 0.5051,
      "step": 205
    },
    {
      "epoch": 0.17349803481190343,
      "grad_norm": 0.47507861256599426,
      "learning_rate": 5.7703081232493e-06,
      "loss": 0.504,
      "step": 206
    },
    {
      "epoch": 0.1743402582818641,
      "grad_norm": 0.39485087990760803,
      "learning_rate": 5.798319327731093e-06,
      "loss": 0.5057,
      "step": 207
    },
    {
      "epoch": 0.17518248175182483,
      "grad_norm": 0.4873639941215515,
      "learning_rate": 5.826330532212886e-06,
      "loss": 0.5347,
      "step": 208
    },
    {
      "epoch": 0.1760247052217855,
      "grad_norm": 0.44983264803886414,
      "learning_rate": 5.854341736694679e-06,
      "loss": 0.5141,
      "step": 209
    },
    {
      "epoch": 0.17686692869174622,
      "grad_norm": 0.4460515081882477,
      "learning_rate": 5.882352941176471e-06,
      "loss": 0.5224,
      "step": 210
    },
    {
      "epoch": 0.1777091521617069,
      "grad_norm": 0.4756495952606201,
      "learning_rate": 5.910364145658264e-06,
      "loss": 0.5249,
      "step": 211
    },
    {
      "epoch": 0.1785513756316676,
      "grad_norm": 0.40957435965538025,
      "learning_rate": 5.938375350140056e-06,
      "loss": 0.5044,
      "step": 212
    },
    {
      "epoch": 0.1793935991016283,
      "grad_norm": 0.44416600465774536,
      "learning_rate": 5.9663865546218495e-06,
      "loss": 0.5232,
      "step": 213
    },
    {
      "epoch": 0.180235822571589,
      "grad_norm": 0.5136521458625793,
      "learning_rate": 5.994397759103642e-06,
      "loss": 0.5418,
      "step": 214
    },
    {
      "epoch": 0.1810780460415497,
      "grad_norm": 0.42955899238586426,
      "learning_rate": 6.022408963585434e-06,
      "loss": 0.529,
      "step": 215
    },
    {
      "epoch": 0.18192026951151039,
      "grad_norm": 0.4613424837589264,
      "learning_rate": 6.050420168067227e-06,
      "loss": 0.5242,
      "step": 216
    },
    {
      "epoch": 0.1827624929814711,
      "grad_norm": 0.4596075117588043,
      "learning_rate": 6.07843137254902e-06,
      "loss": 0.5152,
      "step": 217
    },
    {
      "epoch": 0.18360471645143178,
      "grad_norm": 0.43743905425071716,
      "learning_rate": 6.106442577030814e-06,
      "loss": 0.4966,
      "step": 218
    },
    {
      "epoch": 0.18444693992139247,
      "grad_norm": 0.3950223922729492,
      "learning_rate": 6.134453781512606e-06,
      "loss": 0.5195,
      "step": 219
    },
    {
      "epoch": 0.18528916339135318,
      "grad_norm": 0.4254920184612274,
      "learning_rate": 6.162464985994398e-06,
      "loss": 0.5152,
      "step": 220
    },
    {
      "epoch": 0.18613138686131386,
      "grad_norm": 0.4298346936702728,
      "learning_rate": 6.1904761904761914e-06,
      "loss": 0.5096,
      "step": 221
    },
    {
      "epoch": 0.18697361033127458,
      "grad_norm": 0.43528175354003906,
      "learning_rate": 6.218487394957984e-06,
      "loss": 0.5225,
      "step": 222
    },
    {
      "epoch": 0.18781583380123526,
      "grad_norm": 0.4190683364868164,
      "learning_rate": 6.246498599439776e-06,
      "loss": 0.5111,
      "step": 223
    },
    {
      "epoch": 0.18865805727119594,
      "grad_norm": 0.4517914056777954,
      "learning_rate": 6.274509803921569e-06,
      "loss": 0.5037,
      "step": 224
    },
    {
      "epoch": 0.18950028074115666,
      "grad_norm": 0.4035191535949707,
      "learning_rate": 6.3025210084033615e-06,
      "loss": 0.4895,
      "step": 225
    },
    {
      "epoch": 0.19034250421111734,
      "grad_norm": 0.4649275243282318,
      "learning_rate": 6.330532212885155e-06,
      "loss": 0.5278,
      "step": 226
    },
    {
      "epoch": 0.19118472768107805,
      "grad_norm": 0.43215686082839966,
      "learning_rate": 6.358543417366947e-06,
      "loss": 0.5262,
      "step": 227
    },
    {
      "epoch": 0.19202695115103874,
      "grad_norm": 0.4206549823284149,
      "learning_rate": 6.386554621848739e-06,
      "loss": 0.4995,
      "step": 228
    },
    {
      "epoch": 0.19286917462099945,
      "grad_norm": 0.45951709151268005,
      "learning_rate": 6.414565826330533e-06,
      "loss": 0.5051,
      "step": 229
    },
    {
      "epoch": 0.19371139809096014,
      "grad_norm": 0.5290578603744507,
      "learning_rate": 6.442577030812326e-06,
      "loss": 0.518,
      "step": 230
    },
    {
      "epoch": 0.19455362156092082,
      "grad_norm": 0.4648697078227997,
      "learning_rate": 6.470588235294119e-06,
      "loss": 0.508,
      "step": 231
    },
    {
      "epoch": 0.19539584503088153,
      "grad_norm": 0.4382575750350952,
      "learning_rate": 6.498599439775911e-06,
      "loss": 0.4854,
      "step": 232
    },
    {
      "epoch": 0.19623806850084222,
      "grad_norm": 0.4559900164604187,
      "learning_rate": 6.526610644257703e-06,
      "loss": 0.5501,
      "step": 233
    },
    {
      "epoch": 0.19708029197080293,
      "grad_norm": 0.44203758239746094,
      "learning_rate": 6.5546218487394966e-06,
      "loss": 0.5187,
      "step": 234
    },
    {
      "epoch": 0.1979225154407636,
      "grad_norm": 0.4503888487815857,
      "learning_rate": 6.582633053221289e-06,
      "loss": 0.4992,
      "step": 235
    },
    {
      "epoch": 0.19876473891072433,
      "grad_norm": 0.4187995493412018,
      "learning_rate": 6.610644257703082e-06,
      "loss": 0.4861,
      "step": 236
    },
    {
      "epoch": 0.199606962380685,
      "grad_norm": 0.44586825370788574,
      "learning_rate": 6.638655462184874e-06,
      "loss": 0.5192,
      "step": 237
    },
    {
      "epoch": 0.2004491858506457,
      "grad_norm": 0.4666300117969513,
      "learning_rate": 6.666666666666667e-06,
      "loss": 0.5005,
      "step": 238
    },
    {
      "epoch": 0.2012914093206064,
      "grad_norm": 0.46491876244544983,
      "learning_rate": 6.69467787114846e-06,
      "loss": 0.4966,
      "step": 239
    },
    {
      "epoch": 0.2021336327905671,
      "grad_norm": 0.4384893774986267,
      "learning_rate": 6.722689075630253e-06,
      "loss": 0.5097,
      "step": 240
    },
    {
      "epoch": 0.2029758562605278,
      "grad_norm": 0.5161104202270508,
      "learning_rate": 6.750700280112046e-06,
      "loss": 0.5101,
      "step": 241
    },
    {
      "epoch": 0.2038180797304885,
      "grad_norm": 0.4923076033592224,
      "learning_rate": 6.7787114845938384e-06,
      "loss": 0.5049,
      "step": 242
    },
    {
      "epoch": 0.20466030320044917,
      "grad_norm": 0.44914063811302185,
      "learning_rate": 6.806722689075631e-06,
      "loss": 0.548,
      "step": 243
    },
    {
      "epoch": 0.20550252667040989,
      "grad_norm": 0.508477509021759,
      "learning_rate": 6.834733893557424e-06,
      "loss": 0.5041,
      "step": 244
    },
    {
      "epoch": 0.20634475014037057,
      "grad_norm": 0.5231375694274902,
      "learning_rate": 6.862745098039216e-06,
      "loss": 0.4967,
      "step": 245
    },
    {
      "epoch": 0.20718697361033128,
      "grad_norm": 0.44574159383773804,
      "learning_rate": 6.8907563025210085e-06,
      "loss": 0.5164,
      "step": 246
    },
    {
      "epoch": 0.20802919708029197,
      "grad_norm": 0.5294585824012756,
      "learning_rate": 6.918767507002802e-06,
      "loss": 0.5301,
      "step": 247
    },
    {
      "epoch": 0.20887142055025268,
      "grad_norm": 0.5535175204277039,
      "learning_rate": 6.946778711484594e-06,
      "loss": 0.5001,
      "step": 248
    },
    {
      "epoch": 0.20971364402021336,
      "grad_norm": 0.46661141514778137,
      "learning_rate": 6.974789915966387e-06,
      "loss": 0.5012,
      "step": 249
    },
    {
      "epoch": 0.21055586749017405,
      "grad_norm": 0.497941792011261,
      "learning_rate": 7.0028011204481795e-06,
      "loss": 0.4794,
      "step": 250
    },
    {
      "epoch": 0.21139809096013476,
      "grad_norm": 0.5753109455108643,
      "learning_rate": 7.030812324929972e-06,
      "loss": 0.5049,
      "step": 251
    },
    {
      "epoch": 0.21224031443009544,
      "grad_norm": 0.5328729748725891,
      "learning_rate": 7.058823529411766e-06,
      "loss": 0.5011,
      "step": 252
    },
    {
      "epoch": 0.21308253790005616,
      "grad_norm": 0.4552242159843445,
      "learning_rate": 7.086834733893558e-06,
      "loss": 0.5064,
      "step": 253
    },
    {
      "epoch": 0.21392476137001684,
      "grad_norm": 0.5044989585876465,
      "learning_rate": 7.114845938375351e-06,
      "loss": 0.5345,
      "step": 254
    },
    {
      "epoch": 0.21476698483997755,
      "grad_norm": 0.5246854424476624,
      "learning_rate": 7.1428571428571436e-06,
      "loss": 0.5084,
      "step": 255
    },
    {
      "epoch": 0.21560920830993824,
      "grad_norm": 0.43248867988586426,
      "learning_rate": 7.170868347338936e-06,
      "loss": 0.5147,
      "step": 256
    },
    {
      "epoch": 0.21645143177989892,
      "grad_norm": 0.45621806383132935,
      "learning_rate": 7.198879551820729e-06,
      "loss": 0.5099,
      "step": 257
    },
    {
      "epoch": 0.21729365524985964,
      "grad_norm": 0.5003649592399597,
      "learning_rate": 7.226890756302521e-06,
      "loss": 0.4899,
      "step": 258
    },
    {
      "epoch": 0.21813587871982032,
      "grad_norm": 0.3974325358867645,
      "learning_rate": 7.2549019607843145e-06,
      "loss": 0.4981,
      "step": 259
    },
    {
      "epoch": 0.21897810218978103,
      "grad_norm": 0.6125016808509827,
      "learning_rate": 7.282913165266107e-06,
      "loss": 0.4948,
      "step": 260
    },
    {
      "epoch": 0.21982032565974172,
      "grad_norm": 0.4158560335636139,
      "learning_rate": 7.310924369747899e-06,
      "loss": 0.4849,
      "step": 261
    },
    {
      "epoch": 0.2206625491297024,
      "grad_norm": 0.5047378540039062,
      "learning_rate": 7.338935574229692e-06,
      "loss": 0.4955,
      "step": 262
    },
    {
      "epoch": 0.2215047725996631,
      "grad_norm": 0.47575536370277405,
      "learning_rate": 7.3669467787114854e-06,
      "loss": 0.5471,
      "step": 263
    },
    {
      "epoch": 0.2223469960696238,
      "grad_norm": 0.4500392973423004,
      "learning_rate": 7.394957983193279e-06,
      "loss": 0.5124,
      "step": 264
    },
    {
      "epoch": 0.2231892195395845,
      "grad_norm": 0.5025386214256287,
      "learning_rate": 7.422969187675071e-06,
      "loss": 0.512,
      "step": 265
    },
    {
      "epoch": 0.2240314430095452,
      "grad_norm": 0.48745909333229065,
      "learning_rate": 7.450980392156863e-06,
      "loss": 0.5132,
      "step": 266
    },
    {
      "epoch": 0.2248736664795059,
      "grad_norm": 0.45900121331214905,
      "learning_rate": 7.478991596638656e-06,
      "loss": 0.5144,
      "step": 267
    },
    {
      "epoch": 0.2257158899494666,
      "grad_norm": 0.520552396774292,
      "learning_rate": 7.507002801120449e-06,
      "loss": 0.491,
      "step": 268
    },
    {
      "epoch": 0.22655811341942728,
      "grad_norm": 0.4891297221183777,
      "learning_rate": 7.535014005602241e-06,
      "loss": 0.4787,
      "step": 269
    },
    {
      "epoch": 0.227400336889388,
      "grad_norm": 0.44976291060447693,
      "learning_rate": 7.563025210084034e-06,
      "loss": 0.4965,
      "step": 270
    },
    {
      "epoch": 0.22824256035934867,
      "grad_norm": 0.5266569256782532,
      "learning_rate": 7.5910364145658265e-06,
      "loss": 0.519,
      "step": 271
    },
    {
      "epoch": 0.22908478382930939,
      "grad_norm": 0.5453179478645325,
      "learning_rate": 7.61904761904762e-06,
      "loss": 0.5155,
      "step": 272
    },
    {
      "epoch": 0.22992700729927007,
      "grad_norm": 0.503067135810852,
      "learning_rate": 7.647058823529411e-06,
      "loss": 0.5184,
      "step": 273
    },
    {
      "epoch": 0.23076923076923078,
      "grad_norm": 0.47775349020957947,
      "learning_rate": 7.675070028011205e-06,
      "loss": 0.4659,
      "step": 274
    },
    {
      "epoch": 0.23161145423919147,
      "grad_norm": 0.4767557382583618,
      "learning_rate": 7.703081232492997e-06,
      "loss": 0.5038,
      "step": 275
    },
    {
      "epoch": 0.23245367770915215,
      "grad_norm": 0.5389216542243958,
      "learning_rate": 7.731092436974791e-06,
      "loss": 0.515,
      "step": 276
    },
    {
      "epoch": 0.23329590117911286,
      "grad_norm": 0.508139431476593,
      "learning_rate": 7.759103641456584e-06,
      "loss": 0.5057,
      "step": 277
    },
    {
      "epoch": 0.23413812464907355,
      "grad_norm": 0.5528637170791626,
      "learning_rate": 7.787114845938376e-06,
      "loss": 0.5388,
      "step": 278
    },
    {
      "epoch": 0.23498034811903426,
      "grad_norm": 0.47812914848327637,
      "learning_rate": 7.815126050420168e-06,
      "loss": 0.5114,
      "step": 279
    },
    {
      "epoch": 0.23582257158899494,
      "grad_norm": 0.5382859706878662,
      "learning_rate": 7.84313725490196e-06,
      "loss": 0.4841,
      "step": 280
    },
    {
      "epoch": 0.23666479505895563,
      "grad_norm": 0.45208120346069336,
      "learning_rate": 7.871148459383755e-06,
      "loss": 0.5154,
      "step": 281
    },
    {
      "epoch": 0.23750701852891634,
      "grad_norm": 0.5657650232315063,
      "learning_rate": 7.899159663865547e-06,
      "loss": 0.4961,
      "step": 282
    },
    {
      "epoch": 0.23834924199887703,
      "grad_norm": 0.5089389085769653,
      "learning_rate": 7.92717086834734e-06,
      "loss": 0.5129,
      "step": 283
    },
    {
      "epoch": 0.23919146546883774,
      "grad_norm": 0.5579858422279358,
      "learning_rate": 7.955182072829132e-06,
      "loss": 0.4853,
      "step": 284
    },
    {
      "epoch": 0.24003368893879842,
      "grad_norm": 0.6371762752532959,
      "learning_rate": 7.983193277310926e-06,
      "loss": 0.5347,
      "step": 285
    },
    {
      "epoch": 0.24087591240875914,
      "grad_norm": 0.4672877788543701,
      "learning_rate": 8.011204481792718e-06,
      "loss": 0.4756,
      "step": 286
    },
    {
      "epoch": 0.24171813587871982,
      "grad_norm": 0.5201910138130188,
      "learning_rate": 8.03921568627451e-06,
      "loss": 0.5125,
      "step": 287
    },
    {
      "epoch": 0.2425603593486805,
      "grad_norm": 0.48488157987594604,
      "learning_rate": 8.067226890756303e-06,
      "loss": 0.5117,
      "step": 288
    },
    {
      "epoch": 0.24340258281864122,
      "grad_norm": 0.48401185870170593,
      "learning_rate": 8.095238095238097e-06,
      "loss": 0.4741,
      "step": 289
    },
    {
      "epoch": 0.2442448062886019,
      "grad_norm": 0.4772760570049286,
      "learning_rate": 8.123249299719889e-06,
      "loss": 0.4786,
      "step": 290
    },
    {
      "epoch": 0.2450870297585626,
      "grad_norm": 0.4510549306869507,
      "learning_rate": 8.151260504201681e-06,
      "loss": 0.5188,
      "step": 291
    },
    {
      "epoch": 0.2459292532285233,
      "grad_norm": 0.4280686676502228,
      "learning_rate": 8.179271708683473e-06,
      "loss": 0.4688,
      "step": 292
    },
    {
      "epoch": 0.246771476698484,
      "grad_norm": 0.46486014127731323,
      "learning_rate": 8.207282913165266e-06,
      "loss": 0.5018,
      "step": 293
    },
    {
      "epoch": 0.2476137001684447,
      "grad_norm": 0.4601709544658661,
      "learning_rate": 8.23529411764706e-06,
      "loss": 0.5108,
      "step": 294
    },
    {
      "epoch": 0.24845592363840538,
      "grad_norm": 0.4319095015525818,
      "learning_rate": 8.263305322128852e-06,
      "loss": 0.4769,
      "step": 295
    },
    {
      "epoch": 0.2492981471083661,
      "grad_norm": 0.4812179207801819,
      "learning_rate": 8.291316526610646e-06,
      "loss": 0.4994,
      "step": 296
    },
    {
      "epoch": 0.2501403705783268,
      "grad_norm": 0.4166523218154907,
      "learning_rate": 8.319327731092438e-06,
      "loss": 0.4929,
      "step": 297
    },
    {
      "epoch": 0.2509825940482875,
      "grad_norm": 0.4598024785518646,
      "learning_rate": 8.34733893557423e-06,
      "loss": 0.496,
      "step": 298
    },
    {
      "epoch": 0.2518248175182482,
      "grad_norm": 0.47016629576683044,
      "learning_rate": 8.375350140056023e-06,
      "loss": 0.508,
      "step": 299
    },
    {
      "epoch": 0.25266704098820886,
      "grad_norm": 0.4304528534412384,
      "learning_rate": 8.403361344537815e-06,
      "loss": 0.4959,
      "step": 300
    },
    {
      "epoch": 0.25350926445816957,
      "grad_norm": 0.42471399903297424,
      "learning_rate": 8.43137254901961e-06,
      "loss": 0.5169,
      "step": 301
    },
    {
      "epoch": 0.2543514879281303,
      "grad_norm": 0.4087314307689667,
      "learning_rate": 8.459383753501402e-06,
      "loss": 0.4662,
      "step": 302
    },
    {
      "epoch": 0.25519371139809094,
      "grad_norm": 0.46059122681617737,
      "learning_rate": 8.487394957983194e-06,
      "loss": 0.5131,
      "step": 303
    },
    {
      "epoch": 0.25603593486805165,
      "grad_norm": 0.457307904958725,
      "learning_rate": 8.515406162464986e-06,
      "loss": 0.5044,
      "step": 304
    },
    {
      "epoch": 0.25687815833801236,
      "grad_norm": 0.4389522671699524,
      "learning_rate": 8.543417366946779e-06,
      "loss": 0.5144,
      "step": 305
    },
    {
      "epoch": 0.2577203818079731,
      "grad_norm": 0.40469279885292053,
      "learning_rate": 8.571428571428571e-06,
      "loss": 0.4686,
      "step": 306
    },
    {
      "epoch": 0.25856260527793373,
      "grad_norm": 0.4874703288078308,
      "learning_rate": 8.599439775910365e-06,
      "loss": 0.5075,
      "step": 307
    },
    {
      "epoch": 0.25940482874789444,
      "grad_norm": 0.4197627305984497,
      "learning_rate": 8.627450980392157e-06,
      "loss": 0.4863,
      "step": 308
    },
    {
      "epoch": 0.26024705221785516,
      "grad_norm": 0.4971052408218384,
      "learning_rate": 8.655462184873951e-06,
      "loss": 0.4905,
      "step": 309
    },
    {
      "epoch": 0.2610892756878158,
      "grad_norm": 0.5043051838874817,
      "learning_rate": 8.683473389355744e-06,
      "loss": 0.4885,
      "step": 310
    },
    {
      "epoch": 0.2619314991577765,
      "grad_norm": 0.4140719771385193,
      "learning_rate": 8.711484593837536e-06,
      "loss": 0.4958,
      "step": 311
    },
    {
      "epoch": 0.26277372262773724,
      "grad_norm": 0.6389642953872681,
      "learning_rate": 8.739495798319328e-06,
      "loss": 0.515,
      "step": 312
    },
    {
      "epoch": 0.2636159460976979,
      "grad_norm": 0.4518273174762726,
      "learning_rate": 8.76750700280112e-06,
      "loss": 0.5092,
      "step": 313
    },
    {
      "epoch": 0.2644581695676586,
      "grad_norm": 0.4997215270996094,
      "learning_rate": 8.795518207282914e-06,
      "loss": 0.5223,
      "step": 314
    },
    {
      "epoch": 0.2653003930376193,
      "grad_norm": 0.6010345220565796,
      "learning_rate": 8.823529411764707e-06,
      "loss": 0.479,
      "step": 315
    },
    {
      "epoch": 0.26614261650758003,
      "grad_norm": 0.49202221632003784,
      "learning_rate": 8.851540616246499e-06,
      "loss": 0.5174,
      "step": 316
    },
    {
      "epoch": 0.2669848399775407,
      "grad_norm": 0.4887734353542328,
      "learning_rate": 8.879551820728291e-06,
      "loss": 0.4766,
      "step": 317
    },
    {
      "epoch": 0.2678270634475014,
      "grad_norm": 0.4960244297981262,
      "learning_rate": 8.907563025210085e-06,
      "loss": 0.5005,
      "step": 318
    },
    {
      "epoch": 0.2686692869174621,
      "grad_norm": 0.5670393109321594,
      "learning_rate": 8.935574229691878e-06,
      "loss": 0.5198,
      "step": 319
    },
    {
      "epoch": 0.26951151038742277,
      "grad_norm": 0.4601237177848816,
      "learning_rate": 8.96358543417367e-06,
      "loss": 0.4817,
      "step": 320
    },
    {
      "epoch": 0.2703537338573835,
      "grad_norm": 0.571388840675354,
      "learning_rate": 8.991596638655462e-06,
      "loss": 0.5016,
      "step": 321
    },
    {
      "epoch": 0.2711959573273442,
      "grad_norm": 0.4512246251106262,
      "learning_rate": 9.019607843137256e-06,
      "loss": 0.4981,
      "step": 322
    },
    {
      "epoch": 0.2720381807973049,
      "grad_norm": 0.5007657408714294,
      "learning_rate": 9.047619047619049e-06,
      "loss": 0.4713,
      "step": 323
    },
    {
      "epoch": 0.27288040426726556,
      "grad_norm": 0.46815231442451477,
      "learning_rate": 9.075630252100841e-06,
      "loss": 0.4826,
      "step": 324
    },
    {
      "epoch": 0.2737226277372263,
      "grad_norm": 0.4824390411376953,
      "learning_rate": 9.103641456582633e-06,
      "loss": 0.4915,
      "step": 325
    },
    {
      "epoch": 0.274564851207187,
      "grad_norm": 0.48989179730415344,
      "learning_rate": 9.131652661064426e-06,
      "loss": 0.4875,
      "step": 326
    },
    {
      "epoch": 0.27540707467714765,
      "grad_norm": 0.4540961980819702,
      "learning_rate": 9.15966386554622e-06,
      "loss": 0.4958,
      "step": 327
    },
    {
      "epoch": 0.27624929814710836,
      "grad_norm": 0.440052330493927,
      "learning_rate": 9.187675070028012e-06,
      "loss": 0.4849,
      "step": 328
    },
    {
      "epoch": 0.27709152161706907,
      "grad_norm": 0.4710492491722107,
      "learning_rate": 9.215686274509804e-06,
      "loss": 0.4765,
      "step": 329
    },
    {
      "epoch": 0.2779337450870298,
      "grad_norm": 0.47432830929756165,
      "learning_rate": 9.243697478991598e-06,
      "loss": 0.506,
      "step": 330
    },
    {
      "epoch": 0.27877596855699044,
      "grad_norm": 0.4526480436325073,
      "learning_rate": 9.27170868347339e-06,
      "loss": 0.5079,
      "step": 331
    },
    {
      "epoch": 0.27961819202695115,
      "grad_norm": 0.48656177520751953,
      "learning_rate": 9.299719887955183e-06,
      "loss": 0.4694,
      "step": 332
    },
    {
      "epoch": 0.28046041549691186,
      "grad_norm": 0.4747089445590973,
      "learning_rate": 9.327731092436975e-06,
      "loss": 0.5279,
      "step": 333
    },
    {
      "epoch": 0.2813026389668725,
      "grad_norm": 0.4273201525211334,
      "learning_rate": 9.355742296918767e-06,
      "loss": 0.5096,
      "step": 334
    },
    {
      "epoch": 0.28214486243683323,
      "grad_norm": 0.534050703048706,
      "learning_rate": 9.383753501400561e-06,
      "loss": 0.5007,
      "step": 335
    },
    {
      "epoch": 0.28298708590679394,
      "grad_norm": 0.5876249670982361,
      "learning_rate": 9.411764705882354e-06,
      "loss": 0.5339,
      "step": 336
    },
    {
      "epoch": 0.28382930937675466,
      "grad_norm": 0.5101175904273987,
      "learning_rate": 9.439775910364146e-06,
      "loss": 0.5041,
      "step": 337
    },
    {
      "epoch": 0.2846715328467153,
      "grad_norm": 0.5168517231941223,
      "learning_rate": 9.467787114845938e-06,
      "loss": 0.4668,
      "step": 338
    },
    {
      "epoch": 0.285513756316676,
      "grad_norm": 0.5346341729164124,
      "learning_rate": 9.49579831932773e-06,
      "loss": 0.4813,
      "step": 339
    },
    {
      "epoch": 0.28635597978663674,
      "grad_norm": 0.47977036237716675,
      "learning_rate": 9.523809523809525e-06,
      "loss": 0.514,
      "step": 340
    },
    {
      "epoch": 0.2871982032565974,
      "grad_norm": 0.5431166291236877,
      "learning_rate": 9.551820728291317e-06,
      "loss": 0.492,
      "step": 341
    },
    {
      "epoch": 0.2880404267265581,
      "grad_norm": 0.639439582824707,
      "learning_rate": 9.579831932773111e-06,
      "loss": 0.4832,
      "step": 342
    },
    {
      "epoch": 0.2888826501965188,
      "grad_norm": 0.48333168029785156,
      "learning_rate": 9.607843137254903e-06,
      "loss": 0.5173,
      "step": 343
    },
    {
      "epoch": 0.28972487366647953,
      "grad_norm": 0.5966715216636658,
      "learning_rate": 9.635854341736696e-06,
      "loss": 0.507,
      "step": 344
    },
    {
      "epoch": 0.2905670971364402,
      "grad_norm": 0.5623136162757874,
      "learning_rate": 9.663865546218488e-06,
      "loss": 0.5025,
      "step": 345
    },
    {
      "epoch": 0.2914093206064009,
      "grad_norm": 0.4420316815376282,
      "learning_rate": 9.69187675070028e-06,
      "loss": 0.4624,
      "step": 346
    },
    {
      "epoch": 0.2922515440763616,
      "grad_norm": 0.502076268196106,
      "learning_rate": 9.719887955182074e-06,
      "loss": 0.4693,
      "step": 347
    },
    {
      "epoch": 0.29309376754632227,
      "grad_norm": 0.479105144739151,
      "learning_rate": 9.747899159663867e-06,
      "loss": 0.4757,
      "step": 348
    },
    {
      "epoch": 0.293935991016283,
      "grad_norm": 0.4886276125907898,
      "learning_rate": 9.775910364145659e-06,
      "loss": 0.5094,
      "step": 349
    },
    {
      "epoch": 0.2947782144862437,
      "grad_norm": 0.5077254772186279,
      "learning_rate": 9.803921568627451e-06,
      "loss": 0.4887,
      "step": 350
    },
    {
      "epoch": 0.2956204379562044,
      "grad_norm": 0.48335137963294983,
      "learning_rate": 9.831932773109244e-06,
      "loss": 0.4646,
      "step": 351
    },
    {
      "epoch": 0.29646266142616506,
      "grad_norm": 0.600748598575592,
      "learning_rate": 9.859943977591038e-06,
      "loss": 0.4674,
      "step": 352
    },
    {
      "epoch": 0.2973048848961258,
      "grad_norm": 0.5315221548080444,
      "learning_rate": 9.88795518207283e-06,
      "loss": 0.5176,
      "step": 353
    },
    {
      "epoch": 0.2981471083660865,
      "grad_norm": 0.5835525989532471,
      "learning_rate": 9.915966386554622e-06,
      "loss": 0.5078,
      "step": 354
    },
    {
      "epoch": 0.29898933183604715,
      "grad_norm": 0.4719061851501465,
      "learning_rate": 9.943977591036416e-06,
      "loss": 0.4761,
      "step": 355
    },
    {
      "epoch": 0.29983155530600786,
      "grad_norm": 0.4878394901752472,
      "learning_rate": 9.971988795518209e-06,
      "loss": 0.4906,
      "step": 356
    },
    {
      "epoch": 0.30067377877596857,
      "grad_norm": 0.45169317722320557,
      "learning_rate": 1e-05,
      "loss": 0.5048,
      "step": 357
    },
    {
      "epoch": 0.3015160022459292,
      "grad_norm": 0.4769243597984314,
      "learning_rate": 9.99999759644146e-06,
      "loss": 0.4938,
      "step": 358
    },
    {
      "epoch": 0.30235822571588994,
      "grad_norm": 0.4975832402706146,
      "learning_rate": 9.999990385768144e-06,
      "loss": 0.5119,
      "step": 359
    },
    {
      "epoch": 0.30320044918585065,
      "grad_norm": 0.5521783232688904,
      "learning_rate": 9.999978367986988e-06,
      "loss": 0.4584,
      "step": 360
    },
    {
      "epoch": 0.30404267265581136,
      "grad_norm": 0.47074005007743835,
      "learning_rate": 9.999961543109546e-06,
      "loss": 0.4817,
      "step": 361
    },
    {
      "epoch": 0.304884896125772,
      "grad_norm": 0.5094561576843262,
      "learning_rate": 9.999939911151992e-06,
      "loss": 0.505,
      "step": 362
    },
    {
      "epoch": 0.30572711959573273,
      "grad_norm": 0.47599393129348755,
      "learning_rate": 9.999913472135126e-06,
      "loss": 0.4879,
      "step": 363
    },
    {
      "epoch": 0.30656934306569344,
      "grad_norm": 0.45121443271636963,
      "learning_rate": 9.999882226084366e-06,
      "loss": 0.4988,
      "step": 364
    },
    {
      "epoch": 0.3074115665356541,
      "grad_norm": 0.4143451154232025,
      "learning_rate": 9.999846173029752e-06,
      "loss": 0.4988,
      "step": 365
    },
    {
      "epoch": 0.3082537900056148,
      "grad_norm": 0.4973858594894409,
      "learning_rate": 9.999805313005946e-06,
      "loss": 0.4943,
      "step": 366
    },
    {
      "epoch": 0.3090960134755755,
      "grad_norm": 0.5556269288063049,
      "learning_rate": 9.999759646052234e-06,
      "loss": 0.4836,
      "step": 367
    },
    {
      "epoch": 0.30993823694553624,
      "grad_norm": 0.44980430603027344,
      "learning_rate": 9.99970917221252e-06,
      "loss": 0.4836,
      "step": 368
    },
    {
      "epoch": 0.3107804604154969,
      "grad_norm": 0.5114539265632629,
      "learning_rate": 9.99965389153533e-06,
      "loss": 0.485,
      "step": 369
    },
    {
      "epoch": 0.3116226838854576,
      "grad_norm": 0.48699915409088135,
      "learning_rate": 9.999593804073812e-06,
      "loss": 0.489,
      "step": 370
    },
    {
      "epoch": 0.3124649073554183,
      "grad_norm": 0.5407917499542236,
      "learning_rate": 9.999528909885738e-06,
      "loss": 0.4719,
      "step": 371
    },
    {
      "epoch": 0.313307130825379,
      "grad_norm": 0.5700762867927551,
      "learning_rate": 9.999459209033495e-06,
      "loss": 0.5079,
      "step": 372
    },
    {
      "epoch": 0.3141493542953397,
      "grad_norm": 0.6078160405158997,
      "learning_rate": 9.999384701584098e-06,
      "loss": 0.4828,
      "step": 373
    },
    {
      "epoch": 0.3149915777653004,
      "grad_norm": 0.47569355368614197,
      "learning_rate": 9.99930538760918e-06,
      "loss": 0.4904,
      "step": 374
    },
    {
      "epoch": 0.3158338012352611,
      "grad_norm": 0.6140485405921936,
      "learning_rate": 9.999221267184993e-06,
      "loss": 0.4453,
      "step": 375
    },
    {
      "epoch": 0.31667602470522177,
      "grad_norm": 0.5686110854148865,
      "learning_rate": 9.999132340392416e-06,
      "loss": 0.5064,
      "step": 376
    },
    {
      "epoch": 0.3175182481751825,
      "grad_norm": 0.5290204882621765,
      "learning_rate": 9.999038607316942e-06,
      "loss": 0.4881,
      "step": 377
    },
    {
      "epoch": 0.3183604716451432,
      "grad_norm": 0.5961858630180359,
      "learning_rate": 9.998940068048688e-06,
      "loss": 0.496,
      "step": 378
    },
    {
      "epoch": 0.31920269511510385,
      "grad_norm": 0.5859911441802979,
      "learning_rate": 9.998836722682397e-06,
      "loss": 0.4929,
      "step": 379
    },
    {
      "epoch": 0.32004491858506456,
      "grad_norm": 0.6150991916656494,
      "learning_rate": 9.998728571317422e-06,
      "loss": 0.4952,
      "step": 380
    },
    {
      "epoch": 0.3208871420550253,
      "grad_norm": 0.6253373026847839,
      "learning_rate": 9.998615614057743e-06,
      "loss": 0.5128,
      "step": 381
    },
    {
      "epoch": 0.321729365524986,
      "grad_norm": 0.6281008124351501,
      "learning_rate": 9.998497851011963e-06,
      "loss": 0.5046,
      "step": 382
    },
    {
      "epoch": 0.32257158899494665,
      "grad_norm": 0.5089046955108643,
      "learning_rate": 9.998375282293298e-06,
      "loss": 0.4949,
      "step": 383
    },
    {
      "epoch": 0.32341381246490736,
      "grad_norm": 0.7248775959014893,
      "learning_rate": 9.998247908019594e-06,
      "loss": 0.5239,
      "step": 384
    },
    {
      "epoch": 0.32425603593486807,
      "grad_norm": 0.44915470480918884,
      "learning_rate": 9.998115728313305e-06,
      "loss": 0.4611,
      "step": 385
    },
    {
      "epoch": 0.3250982594048287,
      "grad_norm": 0.6267629265785217,
      "learning_rate": 9.997978743301516e-06,
      "loss": 0.497,
      "step": 386
    },
    {
      "epoch": 0.32594048287478944,
      "grad_norm": 0.4745340347290039,
      "learning_rate": 9.997836953115927e-06,
      "loss": 0.4964,
      "step": 387
    },
    {
      "epoch": 0.32678270634475015,
      "grad_norm": 0.5183274745941162,
      "learning_rate": 9.997690357892857e-06,
      "loss": 0.5168,
      "step": 388
    },
    {
      "epoch": 0.32762492981471086,
      "grad_norm": 0.5363770723342896,
      "learning_rate": 9.997538957773248e-06,
      "loss": 0.4806,
      "step": 389
    },
    {
      "epoch": 0.3284671532846715,
      "grad_norm": 0.5159518718719482,
      "learning_rate": 9.997382752902658e-06,
      "loss": 0.4508,
      "step": 390
    },
    {
      "epoch": 0.32930937675463223,
      "grad_norm": 0.5477460622787476,
      "learning_rate": 9.997221743431267e-06,
      "loss": 0.4834,
      "step": 391
    },
    {
      "epoch": 0.33015160022459294,
      "grad_norm": 0.5853870511054993,
      "learning_rate": 9.997055929513873e-06,
      "loss": 0.5239,
      "step": 392
    },
    {
      "epoch": 0.3309938236945536,
      "grad_norm": 0.5335326194763184,
      "learning_rate": 9.996885311309892e-06,
      "loss": 0.5029,
      "step": 393
    },
    {
      "epoch": 0.3318360471645143,
      "grad_norm": 0.5111095905303955,
      "learning_rate": 9.996709888983362e-06,
      "loss": 0.4892,
      "step": 394
    },
    {
      "epoch": 0.332678270634475,
      "grad_norm": 0.6140020489692688,
      "learning_rate": 9.99652966270294e-06,
      "loss": 0.4924,
      "step": 395
    },
    {
      "epoch": 0.3335204941044357,
      "grad_norm": 0.4898150563240051,
      "learning_rate": 9.996344632641895e-06,
      "loss": 0.4892,
      "step": 396
    },
    {
      "epoch": 0.3343627175743964,
      "grad_norm": 0.603682816028595,
      "learning_rate": 9.996154798978122e-06,
      "loss": 0.4847,
      "step": 397
    },
    {
      "epoch": 0.3352049410443571,
      "grad_norm": 0.5086454749107361,
      "learning_rate": 9.995960161894132e-06,
      "loss": 0.5158,
      "step": 398
    },
    {
      "epoch": 0.3360471645143178,
      "grad_norm": 0.6561251878738403,
      "learning_rate": 9.995760721577053e-06,
      "loss": 0.5111,
      "step": 399
    },
    {
      "epoch": 0.3368893879842785,
      "grad_norm": 0.44787344336509705,
      "learning_rate": 9.99555647821863e-06,
      "loss": 0.4891,
      "step": 400
    },
    {
      "epoch": 0.3377316114542392,
      "grad_norm": 0.5048717856407166,
      "learning_rate": 9.99534743201523e-06,
      "loss": 0.4446,
      "step": 401
    },
    {
      "epoch": 0.3385738349241999,
      "grad_norm": 0.5360075235366821,
      "learning_rate": 9.995133583167833e-06,
      "loss": 0.5148,
      "step": 402
    },
    {
      "epoch": 0.33941605839416056,
      "grad_norm": 0.5445719361305237,
      "learning_rate": 9.99491493188204e-06,
      "loss": 0.4884,
      "step": 403
    },
    {
      "epoch": 0.34025828186412127,
      "grad_norm": 0.41982364654541016,
      "learning_rate": 9.994691478368067e-06,
      "loss": 0.4543,
      "step": 404
    },
    {
      "epoch": 0.341100505334082,
      "grad_norm": 0.6245936751365662,
      "learning_rate": 9.994463222840748e-06,
      "loss": 0.4967,
      "step": 405
    },
    {
      "epoch": 0.3419427288040427,
      "grad_norm": 0.47641661763191223,
      "learning_rate": 9.994230165519529e-06,
      "loss": 0.507,
      "step": 406
    },
    {
      "epoch": 0.34278495227400335,
      "grad_norm": 0.5001744031906128,
      "learning_rate": 9.993992306628481e-06,
      "loss": 0.5005,
      "step": 407
    },
    {
      "epoch": 0.34362717574396406,
      "grad_norm": 0.45452240109443665,
      "learning_rate": 9.993749646396286e-06,
      "loss": 0.4821,
      "step": 408
    },
    {
      "epoch": 0.3444693992139248,
      "grad_norm": 0.5004611015319824,
      "learning_rate": 9.993502185056244e-06,
      "loss": 0.5026,
      "step": 409
    },
    {
      "epoch": 0.34531162268388543,
      "grad_norm": 0.47744011878967285,
      "learning_rate": 9.993249922846269e-06,
      "loss": 0.4907,
      "step": 410
    },
    {
      "epoch": 0.34615384615384615,
      "grad_norm": 0.4894237518310547,
      "learning_rate": 9.992992860008893e-06,
      "loss": 0.5021,
      "step": 411
    },
    {
      "epoch": 0.34699606962380686,
      "grad_norm": 0.4898819923400879,
      "learning_rate": 9.99273099679126e-06,
      "loss": 0.4945,
      "step": 412
    },
    {
      "epoch": 0.34783829309376757,
      "grad_norm": 0.5017625093460083,
      "learning_rate": 9.992464333445134e-06,
      "loss": 0.479,
      "step": 413
    },
    {
      "epoch": 0.3486805165637282,
      "grad_norm": 0.45786091685295105,
      "learning_rate": 9.99219287022689e-06,
      "loss": 0.4839,
      "step": 414
    },
    {
      "epoch": 0.34952274003368894,
      "grad_norm": 0.4322526752948761,
      "learning_rate": 9.99191660739752e-06,
      "loss": 0.454,
      "step": 415
    },
    {
      "epoch": 0.35036496350364965,
      "grad_norm": 0.45267030596733093,
      "learning_rate": 9.991635545222628e-06,
      "loss": 0.4906,
      "step": 416
    },
    {
      "epoch": 0.3512071869736103,
      "grad_norm": 0.43833205103874207,
      "learning_rate": 9.991349683972435e-06,
      "loss": 0.4719,
      "step": 417
    },
    {
      "epoch": 0.352049410443571,
      "grad_norm": 0.43331077694892883,
      "learning_rate": 9.991059023921773e-06,
      "loss": 0.4898,
      "step": 418
    },
    {
      "epoch": 0.35289163391353173,
      "grad_norm": 0.4749586582183838,
      "learning_rate": 9.990763565350092e-06,
      "loss": 0.5026,
      "step": 419
    },
    {
      "epoch": 0.35373385738349244,
      "grad_norm": 0.4278826117515564,
      "learning_rate": 9.990463308541452e-06,
      "loss": 0.5052,
      "step": 420
    },
    {
      "epoch": 0.3545760808534531,
      "grad_norm": 0.48825404047966003,
      "learning_rate": 9.990158253784525e-06,
      "loss": 0.4926,
      "step": 421
    },
    {
      "epoch": 0.3554183043234138,
      "grad_norm": 0.42613685131073,
      "learning_rate": 9.989848401372602e-06,
      "loss": 0.4651,
      "step": 422
    },
    {
      "epoch": 0.3562605277933745,
      "grad_norm": 0.46916916966438293,
      "learning_rate": 9.989533751603578e-06,
      "loss": 0.5072,
      "step": 423
    },
    {
      "epoch": 0.3571027512633352,
      "grad_norm": 0.4925563633441925,
      "learning_rate": 9.989214304779965e-06,
      "loss": 0.5162,
      "step": 424
    },
    {
      "epoch": 0.3579449747332959,
      "grad_norm": 0.4225377142429352,
      "learning_rate": 9.988890061208889e-06,
      "loss": 0.4729,
      "step": 425
    },
    {
      "epoch": 0.3587871982032566,
      "grad_norm": 0.45179373025894165,
      "learning_rate": 9.988561021202083e-06,
      "loss": 0.4918,
      "step": 426
    },
    {
      "epoch": 0.3596294216732173,
      "grad_norm": 0.4606793522834778,
      "learning_rate": 9.988227185075897e-06,
      "loss": 0.4828,
      "step": 427
    },
    {
      "epoch": 0.360471645143178,
      "grad_norm": 0.43328404426574707,
      "learning_rate": 9.987888553151285e-06,
      "loss": 0.5061,
      "step": 428
    },
    {
      "epoch": 0.3613138686131387,
      "grad_norm": 0.636332094669342,
      "learning_rate": 9.987545125753818e-06,
      "loss": 0.4961,
      "step": 429
    },
    {
      "epoch": 0.3621560920830994,
      "grad_norm": 0.4785216748714447,
      "learning_rate": 9.987196903213677e-06,
      "loss": 0.478,
      "step": 430
    },
    {
      "epoch": 0.36299831555306006,
      "grad_norm": 0.5257418751716614,
      "learning_rate": 9.986843885865649e-06,
      "loss": 0.4904,
      "step": 431
    },
    {
      "epoch": 0.36384053902302077,
      "grad_norm": 0.42186710238456726,
      "learning_rate": 9.986486074049131e-06,
      "loss": 0.4761,
      "step": 432
    },
    {
      "epoch": 0.3646827624929815,
      "grad_norm": 0.5078808069229126,
      "learning_rate": 9.986123468108134e-06,
      "loss": 0.49,
      "step": 433
    },
    {
      "epoch": 0.3655249859629422,
      "grad_norm": 0.4745253622531891,
      "learning_rate": 9.985756068391276e-06,
      "loss": 0.5014,
      "step": 434
    },
    {
      "epoch": 0.36636720943290285,
      "grad_norm": 0.46514925360679626,
      "learning_rate": 9.985383875251783e-06,
      "loss": 0.4862,
      "step": 435
    },
    {
      "epoch": 0.36720943290286356,
      "grad_norm": 0.4329011142253876,
      "learning_rate": 9.985006889047492e-06,
      "loss": 0.4953,
      "step": 436
    },
    {
      "epoch": 0.3680516563728243,
      "grad_norm": 0.5294397473335266,
      "learning_rate": 9.984625110140844e-06,
      "loss": 0.508,
      "step": 437
    },
    {
      "epoch": 0.36889387984278493,
      "grad_norm": 0.40423932671546936,
      "learning_rate": 9.98423853889889e-06,
      "loss": 0.4862,
      "step": 438
    },
    {
      "epoch": 0.36973610331274565,
      "grad_norm": 0.44111934304237366,
      "learning_rate": 9.983847175693291e-06,
      "loss": 0.4571,
      "step": 439
    },
    {
      "epoch": 0.37057832678270636,
      "grad_norm": 0.42973461747169495,
      "learning_rate": 9.983451020900312e-06,
      "loss": 0.4927,
      "step": 440
    },
    {
      "epoch": 0.371420550252667,
      "grad_norm": 0.4894910156726837,
      "learning_rate": 9.983050074900824e-06,
      "loss": 0.4949,
      "step": 441
    },
    {
      "epoch": 0.3722627737226277,
      "grad_norm": 0.49671176075935364,
      "learning_rate": 9.982644338080308e-06,
      "loss": 0.5023,
      "step": 442
    },
    {
      "epoch": 0.37310499719258844,
      "grad_norm": 0.5294812917709351,
      "learning_rate": 9.982233810828846e-06,
      "loss": 0.4976,
      "step": 443
    },
    {
      "epoch": 0.37394722066254915,
      "grad_norm": 0.520665168762207,
      "learning_rate": 9.98181849354113e-06,
      "loss": 0.4929,
      "step": 444
    },
    {
      "epoch": 0.3747894441325098,
      "grad_norm": 0.5076466798782349,
      "learning_rate": 9.98139838661646e-06,
      "loss": 0.4934,
      "step": 445
    },
    {
      "epoch": 0.3756316676024705,
      "grad_norm": 0.4853319823741913,
      "learning_rate": 9.980973490458728e-06,
      "loss": 0.4794,
      "step": 446
    },
    {
      "epoch": 0.37647389107243123,
      "grad_norm": 0.523567259311676,
      "learning_rate": 9.980543805476447e-06,
      "loss": 0.4759,
      "step": 447
    },
    {
      "epoch": 0.3773161145423919,
      "grad_norm": 0.41669684648513794,
      "learning_rate": 9.980109332082722e-06,
      "loss": 0.4886,
      "step": 448
    },
    {
      "epoch": 0.3781583380123526,
      "grad_norm": 0.49894437193870544,
      "learning_rate": 9.979670070695265e-06,
      "loss": 0.5037,
      "step": 449
    },
    {
      "epoch": 0.3790005614823133,
      "grad_norm": 0.524468183517456,
      "learning_rate": 9.979226021736396e-06,
      "loss": 0.51,
      "step": 450
    },
    {
      "epoch": 0.379842784952274,
      "grad_norm": 0.4367826282978058,
      "learning_rate": 9.978777185633032e-06,
      "loss": 0.4695,
      "step": 451
    },
    {
      "epoch": 0.3806850084222347,
      "grad_norm": 0.43843579292297363,
      "learning_rate": 9.978323562816693e-06,
      "loss": 0.4728,
      "step": 452
    },
    {
      "epoch": 0.3815272318921954,
      "grad_norm": 0.4599330723285675,
      "learning_rate": 9.977865153723508e-06,
      "loss": 0.4897,
      "step": 453
    },
    {
      "epoch": 0.3823694553621561,
      "grad_norm": 0.46640512347221375,
      "learning_rate": 9.977401958794194e-06,
      "loss": 0.4852,
      "step": 454
    },
    {
      "epoch": 0.38321167883211676,
      "grad_norm": 0.4817606806755066,
      "learning_rate": 9.976933978474085e-06,
      "loss": 0.477,
      "step": 455
    },
    {
      "epoch": 0.3840539023020775,
      "grad_norm": 0.40368732810020447,
      "learning_rate": 9.976461213213104e-06,
      "loss": 0.4574,
      "step": 456
    },
    {
      "epoch": 0.3848961257720382,
      "grad_norm": 0.4457208812236786,
      "learning_rate": 9.97598366346578e-06,
      "loss": 0.4565,
      "step": 457
    },
    {
      "epoch": 0.3857383492419989,
      "grad_norm": 0.43082210421562195,
      "learning_rate": 9.975501329691241e-06,
      "loss": 0.4979,
      "step": 458
    },
    {
      "epoch": 0.38658057271195956,
      "grad_norm": 0.4862743020057678,
      "learning_rate": 9.975014212353212e-06,
      "loss": 0.4918,
      "step": 459
    },
    {
      "epoch": 0.38742279618192027,
      "grad_norm": 0.4110148251056671,
      "learning_rate": 9.974522311920021e-06,
      "loss": 0.5015,
      "step": 460
    },
    {
      "epoch": 0.388265019651881,
      "grad_norm": 0.4098787307739258,
      "learning_rate": 9.974025628864592e-06,
      "loss": 0.4656,
      "step": 461
    },
    {
      "epoch": 0.38910724312184164,
      "grad_norm": 0.4174119830131531,
      "learning_rate": 9.973524163664447e-06,
      "loss": 0.4427,
      "step": 462
    },
    {
      "epoch": 0.38994946659180235,
      "grad_norm": 0.46027931571006775,
      "learning_rate": 9.973017916801708e-06,
      "loss": 0.486,
      "step": 463
    },
    {
      "epoch": 0.39079169006176306,
      "grad_norm": 0.44078320264816284,
      "learning_rate": 9.972506888763092e-06,
      "loss": 0.4759,
      "step": 464
    },
    {
      "epoch": 0.3916339135317238,
      "grad_norm": 0.44045937061309814,
      "learning_rate": 9.971991080039912e-06,
      "loss": 0.4839,
      "step": 465
    },
    {
      "epoch": 0.39247613700168443,
      "grad_norm": 0.4973815381526947,
      "learning_rate": 9.971470491128077e-06,
      "loss": 0.4654,
      "step": 466
    },
    {
      "epoch": 0.39331836047164515,
      "grad_norm": 0.4320937395095825,
      "learning_rate": 9.9709451225281e-06,
      "loss": 0.4915,
      "step": 467
    },
    {
      "epoch": 0.39416058394160586,
      "grad_norm": 0.41419434547424316,
      "learning_rate": 9.970414974745077e-06,
      "loss": 0.4838,
      "step": 468
    },
    {
      "epoch": 0.3950028074115665,
      "grad_norm": 0.40357041358947754,
      "learning_rate": 9.969880048288704e-06,
      "loss": 0.4841,
      "step": 469
    },
    {
      "epoch": 0.3958450308815272,
      "grad_norm": 0.42220407724380493,
      "learning_rate": 9.969340343673277e-06,
      "loss": 0.4704,
      "step": 470
    },
    {
      "epoch": 0.39668725435148794,
      "grad_norm": 0.39903971552848816,
      "learning_rate": 9.968795861417676e-06,
      "loss": 0.4956,
      "step": 471
    },
    {
      "epoch": 0.39752947782144865,
      "grad_norm": 0.39897122979164124,
      "learning_rate": 9.96824660204538e-06,
      "loss": 0.4839,
      "step": 472
    },
    {
      "epoch": 0.3983717012914093,
      "grad_norm": 0.44095873832702637,
      "learning_rate": 9.96769256608446e-06,
      "loss": 0.4998,
      "step": 473
    },
    {
      "epoch": 0.39921392476137,
      "grad_norm": 0.4241701066493988,
      "learning_rate": 9.967133754067581e-06,
      "loss": 0.4682,
      "step": 474
    },
    {
      "epoch": 0.40005614823133073,
      "grad_norm": 0.4067305624485016,
      "learning_rate": 9.966570166531997e-06,
      "loss": 0.4917,
      "step": 475
    },
    {
      "epoch": 0.4008983717012914,
      "grad_norm": 0.4195798337459564,
      "learning_rate": 9.966001804019552e-06,
      "loss": 0.506,
      "step": 476
    },
    {
      "epoch": 0.4017405951712521,
      "grad_norm": 0.4321221113204956,
      "learning_rate": 9.965428667076687e-06,
      "loss": 0.4841,
      "step": 477
    },
    {
      "epoch": 0.4025828186412128,
      "grad_norm": 0.372313529253006,
      "learning_rate": 9.964850756254426e-06,
      "loss": 0.4491,
      "step": 478
    },
    {
      "epoch": 0.40342504211117347,
      "grad_norm": 0.44276925921440125,
      "learning_rate": 9.964268072108385e-06,
      "loss": 0.4717,
      "step": 479
    },
    {
      "epoch": 0.4042672655811342,
      "grad_norm": 0.40842404961586,
      "learning_rate": 9.963680615198774e-06,
      "loss": 0.498,
      "step": 480
    },
    {
      "epoch": 0.4051094890510949,
      "grad_norm": 0.3724912703037262,
      "learning_rate": 9.963088386090386e-06,
      "loss": 0.4508,
      "step": 481
    },
    {
      "epoch": 0.4059517125210556,
      "grad_norm": 0.4156785011291504,
      "learning_rate": 9.962491385352601e-06,
      "loss": 0.4916,
      "step": 482
    },
    {
      "epoch": 0.40679393599101626,
      "grad_norm": 0.43704017996788025,
      "learning_rate": 9.961889613559396e-06,
      "loss": 0.489,
      "step": 483
    },
    {
      "epoch": 0.407636159460977,
      "grad_norm": 0.4346965253353119,
      "learning_rate": 9.961283071289323e-06,
      "loss": 0.4993,
      "step": 484
    },
    {
      "epoch": 0.4084783829309377,
      "grad_norm": 0.48395463824272156,
      "learning_rate": 9.960671759125529e-06,
      "loss": 0.5004,
      "step": 485
    },
    {
      "epoch": 0.40932060640089835,
      "grad_norm": 0.46597611904144287,
      "learning_rate": 9.960055677655743e-06,
      "loss": 0.4726,
      "step": 486
    },
    {
      "epoch": 0.41016282987085906,
      "grad_norm": 0.4586118459701538,
      "learning_rate": 9.959434827472278e-06,
      "loss": 0.4833,
      "step": 487
    },
    {
      "epoch": 0.41100505334081977,
      "grad_norm": 0.46129271388053894,
      "learning_rate": 9.958809209172038e-06,
      "loss": 0.4859,
      "step": 488
    },
    {
      "epoch": 0.4118472768107805,
      "grad_norm": 0.43223148584365845,
      "learning_rate": 9.958178823356503e-06,
      "loss": 0.46,
      "step": 489
    },
    {
      "epoch": 0.41268950028074114,
      "grad_norm": 0.4992099404335022,
      "learning_rate": 9.957543670631743e-06,
      "loss": 0.472,
      "step": 490
    },
    {
      "epoch": 0.41353172375070185,
      "grad_norm": 0.4142053723335266,
      "learning_rate": 9.956903751608409e-06,
      "loss": 0.4957,
      "step": 491
    },
    {
      "epoch": 0.41437394722066256,
      "grad_norm": 0.47864967584609985,
      "learning_rate": 9.956259066901733e-06,
      "loss": 0.4658,
      "step": 492
    },
    {
      "epoch": 0.4152161706906232,
      "grad_norm": 0.4330647587776184,
      "learning_rate": 9.95560961713153e-06,
      "loss": 0.4776,
      "step": 493
    },
    {
      "epoch": 0.41605839416058393,
      "grad_norm": 0.4518888592720032,
      "learning_rate": 9.954955402922195e-06,
      "loss": 0.4535,
      "step": 494
    },
    {
      "epoch": 0.41690061763054465,
      "grad_norm": 0.4487229585647583,
      "learning_rate": 9.954296424902709e-06,
      "loss": 0.4773,
      "step": 495
    },
    {
      "epoch": 0.41774284110050536,
      "grad_norm": 0.5818378925323486,
      "learning_rate": 9.953632683706624e-06,
      "loss": 0.5117,
      "step": 496
    },
    {
      "epoch": 0.418585064570466,
      "grad_norm": 0.4880503714084625,
      "learning_rate": 9.95296417997208e-06,
      "loss": 0.4914,
      "step": 497
    },
    {
      "epoch": 0.4194272880404267,
      "grad_norm": 0.5512215495109558,
      "learning_rate": 9.95229091434179e-06,
      "loss": 0.4724,
      "step": 498
    },
    {
      "epoch": 0.42026951151038744,
      "grad_norm": 0.5884513258934021,
      "learning_rate": 9.95161288746305e-06,
      "loss": 0.5026,
      "step": 499
    },
    {
      "epoch": 0.4211117349803481,
      "grad_norm": 0.47172412276268005,
      "learning_rate": 9.950930099987728e-06,
      "loss": 0.4836,
      "step": 500
    },
    {
      "epoch": 0.4219539584503088,
      "grad_norm": 0.6776641011238098,
      "learning_rate": 9.950242552572272e-06,
      "loss": 0.4917,
      "step": 501
    },
    {
      "epoch": 0.4227961819202695,
      "grad_norm": 0.43849074840545654,
      "learning_rate": 9.949550245877708e-06,
      "loss": 0.4718,
      "step": 502
    },
    {
      "epoch": 0.42363840539023023,
      "grad_norm": 0.6114814281463623,
      "learning_rate": 9.948853180569635e-06,
      "loss": 0.4905,
      "step": 503
    },
    {
      "epoch": 0.4244806288601909,
      "grad_norm": 0.6134074926376343,
      "learning_rate": 9.948151357318228e-06,
      "loss": 0.476,
      "step": 504
    },
    {
      "epoch": 0.4253228523301516,
      "grad_norm": 0.5051089525222778,
      "learning_rate": 9.947444776798235e-06,
      "loss": 0.4883,
      "step": 505
    },
    {
      "epoch": 0.4261650758001123,
      "grad_norm": 0.525304913520813,
      "learning_rate": 9.946733439688982e-06,
      "loss": 0.4675,
      "step": 506
    },
    {
      "epoch": 0.42700729927007297,
      "grad_norm": 0.5966299176216125,
      "learning_rate": 9.946017346674362e-06,
      "loss": 0.5013,
      "step": 507
    },
    {
      "epoch": 0.4278495227400337,
      "grad_norm": 0.45351046323776245,
      "learning_rate": 9.945296498442845e-06,
      "loss": 0.4739,
      "step": 508
    },
    {
      "epoch": 0.4286917462099944,
      "grad_norm": 0.5280798077583313,
      "learning_rate": 9.944570895687471e-06,
      "loss": 0.4672,
      "step": 509
    },
    {
      "epoch": 0.4295339696799551,
      "grad_norm": 0.5529831051826477,
      "learning_rate": 9.943840539105853e-06,
      "loss": 0.4654,
      "step": 510
    },
    {
      "epoch": 0.43037619314991576,
      "grad_norm": 0.3804203271865845,
      "learning_rate": 9.943105429400171e-06,
      "loss": 0.4649,
      "step": 511
    },
    {
      "epoch": 0.4312184166198765,
      "grad_norm": 0.5179672241210938,
      "learning_rate": 9.942365567277178e-06,
      "loss": 0.4628,
      "step": 512
    },
    {
      "epoch": 0.4320606400898372,
      "grad_norm": 0.4906690716743469,
      "learning_rate": 9.941620953448195e-06,
      "loss": 0.4899,
      "step": 513
    },
    {
      "epoch": 0.43290286355979785,
      "grad_norm": 0.4158898591995239,
      "learning_rate": 9.940871588629108e-06,
      "loss": 0.4818,
      "step": 514
    },
    {
      "epoch": 0.43374508702975856,
      "grad_norm": 0.5043514966964722,
      "learning_rate": 9.940117473540377e-06,
      "loss": 0.4988,
      "step": 515
    },
    {
      "epoch": 0.43458731049971927,
      "grad_norm": 0.4220544099807739,
      "learning_rate": 9.939358608907026e-06,
      "loss": 0.476,
      "step": 516
    },
    {
      "epoch": 0.43542953396968,
      "grad_norm": 0.42255619168281555,
      "learning_rate": 9.938594995458644e-06,
      "loss": 0.4851,
      "step": 517
    },
    {
      "epoch": 0.43627175743964064,
      "grad_norm": 0.44102704524993896,
      "learning_rate": 9.937826633929388e-06,
      "loss": 0.4757,
      "step": 518
    },
    {
      "epoch": 0.43711398090960135,
      "grad_norm": 0.42664554715156555,
      "learning_rate": 9.937053525057977e-06,
      "loss": 0.4824,
      "step": 519
    },
    {
      "epoch": 0.43795620437956206,
      "grad_norm": 0.46016159653663635,
      "learning_rate": 9.936275669587697e-06,
      "loss": 0.4622,
      "step": 520
    },
    {
      "epoch": 0.4387984278495227,
      "grad_norm": 0.46572792530059814,
      "learning_rate": 9.935493068266396e-06,
      "loss": 0.499,
      "step": 521
    },
    {
      "epoch": 0.43964065131948343,
      "grad_norm": 0.44388529658317566,
      "learning_rate": 9.934705721846487e-06,
      "loss": 0.495,
      "step": 522
    },
    {
      "epoch": 0.44048287478944415,
      "grad_norm": 0.4244205057621002,
      "learning_rate": 9.933913631084942e-06,
      "loss": 0.4702,
      "step": 523
    },
    {
      "epoch": 0.4413250982594048,
      "grad_norm": 0.45764875411987305,
      "learning_rate": 9.933116796743294e-06,
      "loss": 0.4644,
      "step": 524
    },
    {
      "epoch": 0.4421673217293655,
      "grad_norm": 0.42388972640037537,
      "learning_rate": 9.932315219587641e-06,
      "loss": 0.4611,
      "step": 525
    },
    {
      "epoch": 0.4430095451993262,
      "grad_norm": 0.4502813220024109,
      "learning_rate": 9.931508900388635e-06,
      "loss": 0.4522,
      "step": 526
    },
    {
      "epoch": 0.44385176866928694,
      "grad_norm": 0.42205312848091125,
      "learning_rate": 9.930697839921496e-06,
      "loss": 0.4687,
      "step": 527
    },
    {
      "epoch": 0.4446939921392476,
      "grad_norm": 0.4211876094341278,
      "learning_rate": 9.92988203896599e-06,
      "loss": 0.473,
      "step": 528
    },
    {
      "epoch": 0.4455362156092083,
      "grad_norm": 0.4038360118865967,
      "learning_rate": 9.929061498306448e-06,
      "loss": 0.483,
      "step": 529
    },
    {
      "epoch": 0.446378439079169,
      "grad_norm": 0.4315629303455353,
      "learning_rate": 9.92823621873176e-06,
      "loss": 0.4527,
      "step": 530
    },
    {
      "epoch": 0.4472206625491297,
      "grad_norm": 0.4485095143318176,
      "learning_rate": 9.927406201035368e-06,
      "loss": 0.4739,
      "step": 531
    },
    {
      "epoch": 0.4480628860190904,
      "grad_norm": 0.4254825711250305,
      "learning_rate": 9.926571446015271e-06,
      "loss": 0.4732,
      "step": 532
    },
    {
      "epoch": 0.4489051094890511,
      "grad_norm": 0.3976924419403076,
      "learning_rate": 9.92573195447402e-06,
      "loss": 0.4706,
      "step": 533
    },
    {
      "epoch": 0.4497473329590118,
      "grad_norm": 0.40657350420951843,
      "learning_rate": 9.924887727218724e-06,
      "loss": 0.4732,
      "step": 534
    },
    {
      "epoch": 0.45058955642897247,
      "grad_norm": 0.4227825701236725,
      "learning_rate": 9.924038765061042e-06,
      "loss": 0.4636,
      "step": 535
    },
    {
      "epoch": 0.4514317798989332,
      "grad_norm": 0.4066850543022156,
      "learning_rate": 9.923185068817184e-06,
      "loss": 0.4766,
      "step": 536
    },
    {
      "epoch": 0.4522740033688939,
      "grad_norm": 0.3809363543987274,
      "learning_rate": 9.922326639307918e-06,
      "loss": 0.4894,
      "step": 537
    },
    {
      "epoch": 0.45311622683885455,
      "grad_norm": 0.40280285477638245,
      "learning_rate": 9.921463477358555e-06,
      "loss": 0.5081,
      "step": 538
    },
    {
      "epoch": 0.45395845030881526,
      "grad_norm": 0.4289223849773407,
      "learning_rate": 9.920595583798959e-06,
      "loss": 0.5045,
      "step": 539
    },
    {
      "epoch": 0.454800673778776,
      "grad_norm": 0.45048588514328003,
      "learning_rate": 9.919722959463545e-06,
      "loss": 0.4868,
      "step": 540
    },
    {
      "epoch": 0.4556428972487367,
      "grad_norm": 0.4473317265510559,
      "learning_rate": 9.918845605191274e-06,
      "loss": 0.4783,
      "step": 541
    },
    {
      "epoch": 0.45648512071869735,
      "grad_norm": 0.4320186376571655,
      "learning_rate": 9.917963521825653e-06,
      "loss": 0.4732,
      "step": 542
    },
    {
      "epoch": 0.45732734418865806,
      "grad_norm": 0.45947572588920593,
      "learning_rate": 9.917076710214739e-06,
      "loss": 0.471,
      "step": 543
    },
    {
      "epoch": 0.45816956765861877,
      "grad_norm": 0.4281526207923889,
      "learning_rate": 9.916185171211135e-06,
      "loss": 0.4918,
      "step": 544
    },
    {
      "epoch": 0.4590117911285794,
      "grad_norm": 0.38934028148651123,
      "learning_rate": 9.915288905671986e-06,
      "loss": 0.4353,
      "step": 545
    },
    {
      "epoch": 0.45985401459854014,
      "grad_norm": 0.4092000424861908,
      "learning_rate": 9.914387914458983e-06,
      "loss": 0.4779,
      "step": 546
    },
    {
      "epoch": 0.46069623806850085,
      "grad_norm": 0.4408314526081085,
      "learning_rate": 9.913482198438357e-06,
      "loss": 0.4964,
      "step": 547
    },
    {
      "epoch": 0.46153846153846156,
      "grad_norm": 0.41076087951660156,
      "learning_rate": 9.912571758480892e-06,
      "loss": 0.5018,
      "step": 548
    },
    {
      "epoch": 0.4623806850084222,
      "grad_norm": 0.4389832019805908,
      "learning_rate": 9.911656595461899e-06,
      "loss": 0.4728,
      "step": 549
    },
    {
      "epoch": 0.46322290847838293,
      "grad_norm": 0.42833855748176575,
      "learning_rate": 9.910736710261238e-06,
      "loss": 0.4731,
      "step": 550
    },
    {
      "epoch": 0.46406513194834365,
      "grad_norm": 0.4394788146018982,
      "learning_rate": 9.909812103763312e-06,
      "loss": 0.4555,
      "step": 551
    },
    {
      "epoch": 0.4649073554183043,
      "grad_norm": 0.4729691445827484,
      "learning_rate": 9.908882776857057e-06,
      "loss": 0.483,
      "step": 552
    },
    {
      "epoch": 0.465749578888265,
      "grad_norm": 0.44986599683761597,
      "learning_rate": 9.90794873043595e-06,
      "loss": 0.4712,
      "step": 553
    },
    {
      "epoch": 0.4665918023582257,
      "grad_norm": 0.458845853805542,
      "learning_rate": 9.907009965398005e-06,
      "loss": 0.4679,
      "step": 554
    },
    {
      "epoch": 0.46743402582818644,
      "grad_norm": 0.502507209777832,
      "learning_rate": 9.906066482645774e-06,
      "loss": 0.4809,
      "step": 555
    },
    {
      "epoch": 0.4682762492981471,
      "grad_norm": 0.5294026732444763,
      "learning_rate": 9.905118283086341e-06,
      "loss": 0.4739,
      "step": 556
    },
    {
      "epoch": 0.4691184727681078,
      "grad_norm": 0.46517646312713623,
      "learning_rate": 9.904165367631329e-06,
      "loss": 0.4524,
      "step": 557
    },
    {
      "epoch": 0.4699606962380685,
      "grad_norm": 0.49281468987464905,
      "learning_rate": 9.903207737196892e-06,
      "loss": 0.4669,
      "step": 558
    },
    {
      "epoch": 0.4708029197080292,
      "grad_norm": 0.4838407635688782,
      "learning_rate": 9.902245392703719e-06,
      "loss": 0.4598,
      "step": 559
    },
    {
      "epoch": 0.4716451431779899,
      "grad_norm": 0.36874690651893616,
      "learning_rate": 9.901278335077031e-06,
      "loss": 0.4704,
      "step": 560
    },
    {
      "epoch": 0.4724873666479506,
      "grad_norm": 0.4916326701641083,
      "learning_rate": 9.900306565246579e-06,
      "loss": 0.4647,
      "step": 561
    },
    {
      "epoch": 0.47332959011791126,
      "grad_norm": 0.4747673571109772,
      "learning_rate": 9.899330084146646e-06,
      "loss": 0.486,
      "step": 562
    },
    {
      "epoch": 0.47417181358787197,
      "grad_norm": 0.4297882616519928,
      "learning_rate": 9.898348892716042e-06,
      "loss": 0.4681,
      "step": 563
    },
    {
      "epoch": 0.4750140370578327,
      "grad_norm": 0.4894065260887146,
      "learning_rate": 9.89736299189811e-06,
      "loss": 0.4754,
      "step": 564
    },
    {
      "epoch": 0.4758562605277934,
      "grad_norm": 0.39257797598838806,
      "learning_rate": 9.896372382640718e-06,
      "loss": 0.4662,
      "step": 565
    },
    {
      "epoch": 0.47669848399775405,
      "grad_norm": 0.43928053975105286,
      "learning_rate": 9.895377065896259e-06,
      "loss": 0.4838,
      "step": 566
    },
    {
      "epoch": 0.47754070746771476,
      "grad_norm": 0.4472607374191284,
      "learning_rate": 9.894377042621654e-06,
      "loss": 0.4757,
      "step": 567
    },
    {
      "epoch": 0.4783829309376755,
      "grad_norm": 0.4279944896697998,
      "learning_rate": 9.89337231377835e-06,
      "loss": 0.4874,
      "step": 568
    },
    {
      "epoch": 0.47922515440763613,
      "grad_norm": 0.4780694544315338,
      "learning_rate": 9.892362880332316e-06,
      "loss": 0.4513,
      "step": 569
    },
    {
      "epoch": 0.48006737787759685,
      "grad_norm": 0.4149211347103119,
      "learning_rate": 9.891348743254046e-06,
      "loss": 0.4569,
      "step": 570
    },
    {
      "epoch": 0.48090960134755756,
      "grad_norm": 0.4400824308395386,
      "learning_rate": 9.890329903518554e-06,
      "loss": 0.4805,
      "step": 571
    },
    {
      "epoch": 0.48175182481751827,
      "grad_norm": 0.46859511733055115,
      "learning_rate": 9.889306362105377e-06,
      "loss": 0.4742,
      "step": 572
    },
    {
      "epoch": 0.4825940482874789,
      "grad_norm": 0.4134506285190582,
      "learning_rate": 9.888278119998573e-06,
      "loss": 0.4805,
      "step": 573
    },
    {
      "epoch": 0.48343627175743964,
      "grad_norm": 0.44347694516181946,
      "learning_rate": 9.887245178186715e-06,
      "loss": 0.4932,
      "step": 574
    },
    {
      "epoch": 0.48427849522740035,
      "grad_norm": 0.4355633556842804,
      "learning_rate": 9.886207537662899e-06,
      "loss": 0.479,
      "step": 575
    },
    {
      "epoch": 0.485120718697361,
      "grad_norm": 0.4236640930175781,
      "learning_rate": 9.885165199424738e-06,
      "loss": 0.4549,
      "step": 576
    },
    {
      "epoch": 0.4859629421673217,
      "grad_norm": 0.514371395111084,
      "learning_rate": 9.884118164474359e-06,
      "loss": 0.5032,
      "step": 577
    },
    {
      "epoch": 0.48680516563728243,
      "grad_norm": 0.4017777442932129,
      "learning_rate": 9.883066433818404e-06,
      "loss": 0.4748,
      "step": 578
    },
    {
      "epoch": 0.48764738910724315,
      "grad_norm": 0.43886151909828186,
      "learning_rate": 9.882010008468038e-06,
      "loss": 0.4481,
      "step": 579
    },
    {
      "epoch": 0.4884896125772038,
      "grad_norm": 0.44533196091651917,
      "learning_rate": 9.880948889438923e-06,
      "loss": 0.4986,
      "step": 580
    },
    {
      "epoch": 0.4893318360471645,
      "grad_norm": 0.5212573409080505,
      "learning_rate": 9.879883077751255e-06,
      "loss": 0.4859,
      "step": 581
    },
    {
      "epoch": 0.4901740595171252,
      "grad_norm": 0.41685357689857483,
      "learning_rate": 9.878812574429722e-06,
      "loss": 0.488,
      "step": 582
    },
    {
      "epoch": 0.4910162829870859,
      "grad_norm": 0.4222809374332428,
      "learning_rate": 9.877737380503534e-06,
      "loss": 0.4729,
      "step": 583
    },
    {
      "epoch": 0.4918585064570466,
      "grad_norm": 0.45227038860321045,
      "learning_rate": 9.876657497006408e-06,
      "loss": 0.4745,
      "step": 584
    },
    {
      "epoch": 0.4927007299270073,
      "grad_norm": 0.4947114586830139,
      "learning_rate": 9.875572924976568e-06,
      "loss": 0.4538,
      "step": 585
    },
    {
      "epoch": 0.493542953396968,
      "grad_norm": 0.45535486936569214,
      "learning_rate": 9.874483665456746e-06,
      "loss": 0.4828,
      "step": 586
    },
    {
      "epoch": 0.4943851768669287,
      "grad_norm": 0.4382510483264923,
      "learning_rate": 9.873389719494186e-06,
      "loss": 0.4753,
      "step": 587
    },
    {
      "epoch": 0.4952274003368894,
      "grad_norm": 0.49378299713134766,
      "learning_rate": 9.87229108814063e-06,
      "loss": 0.4703,
      "step": 588
    },
    {
      "epoch": 0.4960696238068501,
      "grad_norm": 0.4653967618942261,
      "learning_rate": 9.871187772452327e-06,
      "loss": 0.5005,
      "step": 589
    },
    {
      "epoch": 0.49691184727681076,
      "grad_norm": 0.4653383195400238,
      "learning_rate": 9.870079773490033e-06,
      "loss": 0.4806,
      "step": 590
    },
    {
      "epoch": 0.49775407074677147,
      "grad_norm": 0.4653652608394623,
      "learning_rate": 9.868967092319003e-06,
      "loss": 0.4666,
      "step": 591
    },
    {
      "epoch": 0.4985962942167322,
      "grad_norm": 0.3888656198978424,
      "learning_rate": 9.867849730008994e-06,
      "loss": 0.4755,
      "step": 592
    },
    {
      "epoch": 0.4994385176866929,
      "grad_norm": 0.46171125769615173,
      "learning_rate": 9.866727687634266e-06,
      "loss": 0.4805,
      "step": 593
    },
    {
      "epoch": 0.5002807411566536,
      "grad_norm": 0.4365234673023224,
      "learning_rate": 9.865600966273576e-06,
      "loss": 0.4333,
      "step": 594
    },
    {
      "epoch": 0.5011229646266142,
      "grad_norm": 0.3933454751968384,
      "learning_rate": 9.86446956701018e-06,
      "loss": 0.4902,
      "step": 595
    },
    {
      "epoch": 0.501965188096575,
      "grad_norm": 0.4024716913700104,
      "learning_rate": 9.86333349093183e-06,
      "loss": 0.4843,
      "step": 596
    },
    {
      "epoch": 0.5028074115665356,
      "grad_norm": 0.4218408763408661,
      "learning_rate": 9.86219273913078e-06,
      "loss": 0.4657,
      "step": 597
    },
    {
      "epoch": 0.5036496350364964,
      "grad_norm": 0.4528622031211853,
      "learning_rate": 9.861047312703772e-06,
      "loss": 0.4503,
      "step": 598
    },
    {
      "epoch": 0.5044918585064571,
      "grad_norm": 0.4423084259033203,
      "learning_rate": 9.859897212752049e-06,
      "loss": 0.4975,
      "step": 599
    },
    {
      "epoch": 0.5053340819764177,
      "grad_norm": 0.4201880693435669,
      "learning_rate": 9.858742440381343e-06,
      "loss": 0.4775,
      "step": 600
    },
    {
      "epoch": 0.5061763054463785,
      "grad_norm": 0.450891375541687,
      "learning_rate": 9.857582996701878e-06,
      "loss": 0.4989,
      "step": 601
    },
    {
      "epoch": 0.5070185289163391,
      "grad_norm": 0.39716994762420654,
      "learning_rate": 9.856418882828368e-06,
      "loss": 0.4935,
      "step": 602
    },
    {
      "epoch": 0.5078607523862998,
      "grad_norm": 0.5274701118469238,
      "learning_rate": 9.855250099880026e-06,
      "loss": 0.47,
      "step": 603
    },
    {
      "epoch": 0.5087029758562606,
      "grad_norm": 0.4461921155452728,
      "learning_rate": 9.854076648980543e-06,
      "loss": 0.4931,
      "step": 604
    },
    {
      "epoch": 0.5095451993262212,
      "grad_norm": 0.4966074228286743,
      "learning_rate": 9.852898531258102e-06,
      "loss": 0.4866,
      "step": 605
    },
    {
      "epoch": 0.5103874227961819,
      "grad_norm": 0.4233178198337555,
      "learning_rate": 9.851715747845372e-06,
      "loss": 0.4819,
      "step": 606
    },
    {
      "epoch": 0.5112296462661426,
      "grad_norm": 0.4426640272140503,
      "learning_rate": 9.850528299879513e-06,
      "loss": 0.4678,
      "step": 607
    },
    {
      "epoch": 0.5120718697361033,
      "grad_norm": 0.45874670147895813,
      "learning_rate": 9.84933618850216e-06,
      "loss": 0.4657,
      "step": 608
    },
    {
      "epoch": 0.512914093206064,
      "grad_norm": 0.4922625422477722,
      "learning_rate": 9.848139414859441e-06,
      "loss": 0.4811,
      "step": 609
    },
    {
      "epoch": 0.5137563166760247,
      "grad_norm": 0.5029134154319763,
      "learning_rate": 9.84693798010196e-06,
      "loss": 0.4743,
      "step": 610
    },
    {
      "epoch": 0.5145985401459854,
      "grad_norm": 0.45070111751556396,
      "learning_rate": 9.845731885384806e-06,
      "loss": 0.4663,
      "step": 611
    },
    {
      "epoch": 0.5154407636159462,
      "grad_norm": 0.4123421907424927,
      "learning_rate": 9.844521131867546e-06,
      "loss": 0.4555,
      "step": 612
    },
    {
      "epoch": 0.5162829870859068,
      "grad_norm": 0.4319826662540436,
      "learning_rate": 9.843305720714227e-06,
      "loss": 0.4646,
      "step": 613
    },
    {
      "epoch": 0.5171252105558675,
      "grad_norm": 0.44951531291007996,
      "learning_rate": 9.842085653093372e-06,
      "loss": 0.4897,
      "step": 614
    },
    {
      "epoch": 0.5179674340258282,
      "grad_norm": 0.41780415177345276,
      "learning_rate": 9.840860930177984e-06,
      "loss": 0.4823,
      "step": 615
    },
    {
      "epoch": 0.5188096574957889,
      "grad_norm": 0.4116463363170624,
      "learning_rate": 9.83963155314554e-06,
      "loss": 0.4504,
      "step": 616
    },
    {
      "epoch": 0.5196518809657495,
      "grad_norm": 0.45759081840515137,
      "learning_rate": 9.838397523177993e-06,
      "loss": 0.4978,
      "step": 617
    },
    {
      "epoch": 0.5204941044357103,
      "grad_norm": 0.41809770464897156,
      "learning_rate": 9.837158841461767e-06,
      "loss": 0.4787,
      "step": 618
    },
    {
      "epoch": 0.521336327905671,
      "grad_norm": 0.4195670783519745,
      "learning_rate": 9.835915509187759e-06,
      "loss": 0.4829,
      "step": 619
    },
    {
      "epoch": 0.5221785513756316,
      "grad_norm": 0.39847707748413086,
      "learning_rate": 9.834667527551341e-06,
      "loss": 0.4612,
      "step": 620
    },
    {
      "epoch": 0.5230207748455924,
      "grad_norm": 0.45414745807647705,
      "learning_rate": 9.833414897752346e-06,
      "loss": 0.4607,
      "step": 621
    },
    {
      "epoch": 0.523862998315553,
      "grad_norm": 0.4302876591682434,
      "learning_rate": 9.832157620995088e-06,
      "loss": 0.4803,
      "step": 622
    },
    {
      "epoch": 0.5247052217855137,
      "grad_norm": 0.43650370836257935,
      "learning_rate": 9.830895698488341e-06,
      "loss": 0.4666,
      "step": 623
    },
    {
      "epoch": 0.5255474452554745,
      "grad_norm": 0.45703884959220886,
      "learning_rate": 9.829629131445342e-06,
      "loss": 0.4875,
      "step": 624
    },
    {
      "epoch": 0.5263896687254351,
      "grad_norm": 0.4050588309764862,
      "learning_rate": 9.828357921083803e-06,
      "loss": 0.4495,
      "step": 625
    },
    {
      "epoch": 0.5272318921953958,
      "grad_norm": 0.4459334909915924,
      "learning_rate": 9.827082068625893e-06,
      "loss": 0.4669,
      "step": 626
    },
    {
      "epoch": 0.5280741156653566,
      "grad_norm": 0.4960978627204895,
      "learning_rate": 9.825801575298248e-06,
      "loss": 0.5046,
      "step": 627
    },
    {
      "epoch": 0.5289163391353172,
      "grad_norm": 0.5348564982414246,
      "learning_rate": 9.824516442331963e-06,
      "loss": 0.501,
      "step": 628
    },
    {
      "epoch": 0.529758562605278,
      "grad_norm": 0.5745136737823486,
      "learning_rate": 9.823226670962598e-06,
      "loss": 0.4724,
      "step": 629
    },
    {
      "epoch": 0.5306007860752386,
      "grad_norm": 0.44388797879219055,
      "learning_rate": 9.821932262430164e-06,
      "loss": 0.4805,
      "step": 630
    },
    {
      "epoch": 0.5314430095451993,
      "grad_norm": 0.5274997353553772,
      "learning_rate": 9.82063321797914e-06,
      "loss": 0.4728,
      "step": 631
    },
    {
      "epoch": 0.5322852330151601,
      "grad_norm": 0.44491055607795715,
      "learning_rate": 9.819329538858458e-06,
      "loss": 0.4769,
      "step": 632
    },
    {
      "epoch": 0.5331274564851207,
      "grad_norm": 0.48145592212677,
      "learning_rate": 9.818021226321502e-06,
      "loss": 0.451,
      "step": 633
    },
    {
      "epoch": 0.5339696799550814,
      "grad_norm": 0.478351354598999,
      "learning_rate": 9.816708281626116e-06,
      "loss": 0.4578,
      "step": 634
    },
    {
      "epoch": 0.5348119034250421,
      "grad_norm": 0.43778157234191895,
      "learning_rate": 9.815390706034598e-06,
      "loss": 0.4946,
      "step": 635
    },
    {
      "epoch": 0.5356541268950028,
      "grad_norm": 0.5605493187904358,
      "learning_rate": 9.814068500813692e-06,
      "loss": 0.4915,
      "step": 636
    },
    {
      "epoch": 0.5364963503649635,
      "grad_norm": 0.42805150151252747,
      "learning_rate": 9.812741667234599e-06,
      "loss": 0.4588,
      "step": 637
    },
    {
      "epoch": 0.5373385738349242,
      "grad_norm": 0.5211917757987976,
      "learning_rate": 9.811410206572972e-06,
      "loss": 0.4367,
      "step": 638
    },
    {
      "epoch": 0.5381807973048849,
      "grad_norm": 0.5190474987030029,
      "learning_rate": 9.8100741201089e-06,
      "loss": 0.4801,
      "step": 639
    },
    {
      "epoch": 0.5390230207748455,
      "grad_norm": 0.4237360954284668,
      "learning_rate": 9.808733409126934e-06,
      "loss": 0.4862,
      "step": 640
    },
    {
      "epoch": 0.5398652442448063,
      "grad_norm": 0.4595288336277008,
      "learning_rate": 9.807388074916064e-06,
      "loss": 0.472,
      "step": 641
    },
    {
      "epoch": 0.540707467714767,
      "grad_norm": 0.4949067533016205,
      "learning_rate": 9.806038118769724e-06,
      "loss": 0.4686,
      "step": 642
    },
    {
      "epoch": 0.5415496911847277,
      "grad_norm": 0.4276716709136963,
      "learning_rate": 9.804683541985796e-06,
      "loss": 0.505,
      "step": 643
    },
    {
      "epoch": 0.5423919146546884,
      "grad_norm": 0.45827215909957886,
      "learning_rate": 9.803324345866599e-06,
      "loss": 0.4666,
      "step": 644
    },
    {
      "epoch": 0.543234138124649,
      "grad_norm": 0.39234694838523865,
      "learning_rate": 9.801960531718898e-06,
      "loss": 0.479,
      "step": 645
    },
    {
      "epoch": 0.5440763615946098,
      "grad_norm": 0.40568074584007263,
      "learning_rate": 9.800592100853894e-06,
      "loss": 0.4745,
      "step": 646
    },
    {
      "epoch": 0.5449185850645705,
      "grad_norm": 0.44964584708213806,
      "learning_rate": 9.79921905458723e-06,
      "loss": 0.4589,
      "step": 647
    },
    {
      "epoch": 0.5457608085345311,
      "grad_norm": 0.4884679317474365,
      "learning_rate": 9.797841394238987e-06,
      "loss": 0.4812,
      "step": 648
    },
    {
      "epoch": 0.5466030320044919,
      "grad_norm": 0.39494121074676514,
      "learning_rate": 9.796459121133675e-06,
      "loss": 0.4828,
      "step": 649
    },
    {
      "epoch": 0.5474452554744526,
      "grad_norm": 0.42912235856056213,
      "learning_rate": 9.795072236600247e-06,
      "loss": 0.4572,
      "step": 650
    },
    {
      "epoch": 0.5482874789444132,
      "grad_norm": 0.47243809700012207,
      "learning_rate": 9.793680741972084e-06,
      "loss": 0.4764,
      "step": 651
    },
    {
      "epoch": 0.549129702414374,
      "grad_norm": 0.3971119523048401,
      "learning_rate": 9.792284638587005e-06,
      "loss": 0.4619,
      "step": 652
    },
    {
      "epoch": 0.5499719258843346,
      "grad_norm": 0.43544715642929077,
      "learning_rate": 9.790883927787254e-06,
      "loss": 0.4539,
      "step": 653
    },
    {
      "epoch": 0.5508141493542953,
      "grad_norm": 0.45029494166374207,
      "learning_rate": 9.789478610919508e-06,
      "loss": 0.4348,
      "step": 654
    },
    {
      "epoch": 0.5516563728242561,
      "grad_norm": 0.4610249698162079,
      "learning_rate": 9.78806868933487e-06,
      "loss": 0.4704,
      "step": 655
    },
    {
      "epoch": 0.5524985962942167,
      "grad_norm": 0.379874587059021,
      "learning_rate": 9.786654164388873e-06,
      "loss": 0.4645,
      "step": 656
    },
    {
      "epoch": 0.5533408197641775,
      "grad_norm": 0.45725110173225403,
      "learning_rate": 9.785235037441473e-06,
      "loss": 0.4719,
      "step": 657
    },
    {
      "epoch": 0.5541830432341381,
      "grad_norm": 0.4846012592315674,
      "learning_rate": 9.783811309857057e-06,
      "loss": 0.4779,
      "step": 658
    },
    {
      "epoch": 0.5550252667040988,
      "grad_norm": 0.45493122935295105,
      "learning_rate": 9.782382983004424e-06,
      "loss": 0.4706,
      "step": 659
    },
    {
      "epoch": 0.5558674901740596,
      "grad_norm": 0.4952054023742676,
      "learning_rate": 9.780950058256802e-06,
      "loss": 0.4799,
      "step": 660
    },
    {
      "epoch": 0.5567097136440202,
      "grad_norm": 0.5058280825614929,
      "learning_rate": 9.779512536991839e-06,
      "loss": 0.5033,
      "step": 661
    },
    {
      "epoch": 0.5575519371139809,
      "grad_norm": 0.4578472673892975,
      "learning_rate": 9.778070420591603e-06,
      "loss": 0.4787,
      "step": 662
    },
    {
      "epoch": 0.5583941605839416,
      "grad_norm": 0.4706355631351471,
      "learning_rate": 9.77662371044258e-06,
      "loss": 0.444,
      "step": 663
    },
    {
      "epoch": 0.5592363840539023,
      "grad_norm": 0.48192644119262695,
      "learning_rate": 9.775172407935664e-06,
      "loss": 0.4935,
      "step": 664
    },
    {
      "epoch": 0.560078607523863,
      "grad_norm": 0.5187606811523438,
      "learning_rate": 9.773716514466179e-06,
      "loss": 0.5276,
      "step": 665
    },
    {
      "epoch": 0.5609208309938237,
      "grad_norm": 0.49187198281288147,
      "learning_rate": 9.77225603143385e-06,
      "loss": 0.4518,
      "step": 666
    },
    {
      "epoch": 0.5617630544637844,
      "grad_norm": 0.48891404271125793,
      "learning_rate": 9.770790960242821e-06,
      "loss": 0.4717,
      "step": 667
    },
    {
      "epoch": 0.562605277933745,
      "grad_norm": 0.41650575399398804,
      "learning_rate": 9.769321302301648e-06,
      "loss": 0.4562,
      "step": 668
    },
    {
      "epoch": 0.5634475014037058,
      "grad_norm": 0.45043256878852844,
      "learning_rate": 9.767847059023292e-06,
      "loss": 0.4698,
      "step": 669
    },
    {
      "epoch": 0.5642897248736665,
      "grad_norm": 0.4680410623550415,
      "learning_rate": 9.766368231825126e-06,
      "loss": 0.4951,
      "step": 670
    },
    {
      "epoch": 0.5651319483436271,
      "grad_norm": 0.4385649263858795,
      "learning_rate": 9.764884822128928e-06,
      "loss": 0.443,
      "step": 671
    },
    {
      "epoch": 0.5659741718135879,
      "grad_norm": 0.4595573842525482,
      "learning_rate": 9.763396831360884e-06,
      "loss": 0.4441,
      "step": 672
    },
    {
      "epoch": 0.5668163952835485,
      "grad_norm": 0.3920586109161377,
      "learning_rate": 9.761904260951583e-06,
      "loss": 0.4591,
      "step": 673
    },
    {
      "epoch": 0.5676586187535093,
      "grad_norm": 0.47144290804862976,
      "learning_rate": 9.760407112336016e-06,
      "loss": 0.4506,
      "step": 674
    },
    {
      "epoch": 0.56850084222347,
      "grad_norm": 0.539079487323761,
      "learning_rate": 9.75890538695358e-06,
      "loss": 0.4787,
      "step": 675
    },
    {
      "epoch": 0.5693430656934306,
      "grad_norm": 0.44467487931251526,
      "learning_rate": 9.757399086248062e-06,
      "loss": 0.4594,
      "step": 676
    },
    {
      "epoch": 0.5701852891633914,
      "grad_norm": 0.4511784613132477,
      "learning_rate": 9.755888211667663e-06,
      "loss": 0.4471,
      "step": 677
    },
    {
      "epoch": 0.571027512633352,
      "grad_norm": 0.40628981590270996,
      "learning_rate": 9.75437276466497e-06,
      "loss": 0.4928,
      "step": 678
    },
    {
      "epoch": 0.5718697361033127,
      "grad_norm": 0.42661377787590027,
      "learning_rate": 9.752852746696968e-06,
      "loss": 0.4636,
      "step": 679
    },
    {
      "epoch": 0.5727119595732735,
      "grad_norm": 0.42927929759025574,
      "learning_rate": 9.751328159225037e-06,
      "loss": 0.4651,
      "step": 680
    },
    {
      "epoch": 0.5735541830432341,
      "grad_norm": 0.4115970730781555,
      "learning_rate": 9.749799003714954e-06,
      "loss": 0.486,
      "step": 681
    },
    {
      "epoch": 0.5743964065131948,
      "grad_norm": 0.48273158073425293,
      "learning_rate": 9.748265281636885e-06,
      "loss": 0.4806,
      "step": 682
    },
    {
      "epoch": 0.5752386299831556,
      "grad_norm": 0.4109310209751129,
      "learning_rate": 9.746726994465383e-06,
      "loss": 0.4558,
      "step": 683
    },
    {
      "epoch": 0.5760808534531162,
      "grad_norm": 0.4456495940685272,
      "learning_rate": 9.745184143679398e-06,
      "loss": 0.4713,
      "step": 684
    },
    {
      "epoch": 0.5769230769230769,
      "grad_norm": 0.3993374705314636,
      "learning_rate": 9.743636730762259e-06,
      "loss": 0.4802,
      "step": 685
    },
    {
      "epoch": 0.5777653003930376,
      "grad_norm": 0.5037569403648376,
      "learning_rate": 9.742084757201684e-06,
      "loss": 0.4723,
      "step": 686
    },
    {
      "epoch": 0.5786075238629983,
      "grad_norm": 0.42744243144989014,
      "learning_rate": 9.74052822448978e-06,
      "loss": 0.4714,
      "step": 687
    },
    {
      "epoch": 0.5794497473329591,
      "grad_norm": 0.47391781210899353,
      "learning_rate": 9.738967134123035e-06,
      "loss": 0.4531,
      "step": 688
    },
    {
      "epoch": 0.5802919708029197,
      "grad_norm": 0.5145685076713562,
      "learning_rate": 9.737401487602314e-06,
      "loss": 0.4717,
      "step": 689
    },
    {
      "epoch": 0.5811341942728804,
      "grad_norm": 0.4967431128025055,
      "learning_rate": 9.735831286432869e-06,
      "loss": 0.4784,
      "step": 690
    },
    {
      "epoch": 0.5819764177428411,
      "grad_norm": 0.4457142651081085,
      "learning_rate": 9.734256532124326e-06,
      "loss": 0.4511,
      "step": 691
    },
    {
      "epoch": 0.5828186412128018,
      "grad_norm": 0.4701949656009674,
      "learning_rate": 9.732677226190692e-06,
      "loss": 0.4352,
      "step": 692
    },
    {
      "epoch": 0.5836608646827625,
      "grad_norm": 0.4558423161506653,
      "learning_rate": 9.731093370150349e-06,
      "loss": 0.4731,
      "step": 693
    },
    {
      "epoch": 0.5845030881527232,
      "grad_norm": 0.38747063279151917,
      "learning_rate": 9.729504965526053e-06,
      "loss": 0.4704,
      "step": 694
    },
    {
      "epoch": 0.5853453116226839,
      "grad_norm": 0.42901259660720825,
      "learning_rate": 9.727912013844933e-06,
      "loss": 0.4913,
      "step": 695
    },
    {
      "epoch": 0.5861875350926445,
      "grad_norm": 0.3792288303375244,
      "learning_rate": 9.72631451663849e-06,
      "loss": 0.4602,
      "step": 696
    },
    {
      "epoch": 0.5870297585626053,
      "grad_norm": 0.42459696531295776,
      "learning_rate": 9.724712475442597e-06,
      "loss": 0.4597,
      "step": 697
    },
    {
      "epoch": 0.587871982032566,
      "grad_norm": 0.44218334555625916,
      "learning_rate": 9.72310589179749e-06,
      "loss": 0.4922,
      "step": 698
    },
    {
      "epoch": 0.5887142055025266,
      "grad_norm": 0.4462646543979645,
      "learning_rate": 9.721494767247779e-06,
      "loss": 0.4617,
      "step": 699
    },
    {
      "epoch": 0.5895564289724874,
      "grad_norm": 0.40931814908981323,
      "learning_rate": 9.719879103342438e-06,
      "loss": 0.4413,
      "step": 700
    },
    {
      "epoch": 0.590398652442448,
      "grad_norm": 0.42423343658447266,
      "learning_rate": 9.718258901634802e-06,
      "loss": 0.466,
      "step": 701
    },
    {
      "epoch": 0.5912408759124088,
      "grad_norm": 0.42817845940589905,
      "learning_rate": 9.71663416368257e-06,
      "loss": 0.4556,
      "step": 702
    },
    {
      "epoch": 0.5920830993823695,
      "grad_norm": 0.4088610112667084,
      "learning_rate": 9.715004891047805e-06,
      "loss": 0.4443,
      "step": 703
    },
    {
      "epoch": 0.5929253228523301,
      "grad_norm": 0.4101558327674866,
      "learning_rate": 9.71337108529693e-06,
      "loss": 0.446,
      "step": 704
    },
    {
      "epoch": 0.5937675463222909,
      "grad_norm": 0.4155373275279999,
      "learning_rate": 9.71173274800072e-06,
      "loss": 0.4665,
      "step": 705
    },
    {
      "epoch": 0.5946097697922516,
      "grad_norm": 0.423970490694046,
      "learning_rate": 9.71008988073431e-06,
      "loss": 0.4708,
      "step": 706
    },
    {
      "epoch": 0.5954519932622122,
      "grad_norm": 0.4092732071876526,
      "learning_rate": 9.708442485077197e-06,
      "loss": 0.4691,
      "step": 707
    },
    {
      "epoch": 0.596294216732173,
      "grad_norm": 0.4421065151691437,
      "learning_rate": 9.70679056261322e-06,
      "loss": 0.4516,
      "step": 708
    },
    {
      "epoch": 0.5971364402021336,
      "grad_norm": 0.4051957428455353,
      "learning_rate": 9.70513411493058e-06,
      "loss": 0.4523,
      "step": 709
    },
    {
      "epoch": 0.5979786636720943,
      "grad_norm": 0.4165245294570923,
      "learning_rate": 9.70347314362182e-06,
      "loss": 0.473,
      "step": 710
    },
    {
      "epoch": 0.5988208871420551,
      "grad_norm": 0.5137622952461243,
      "learning_rate": 9.70180765028384e-06,
      "loss": 0.4823,
      "step": 711
    },
    {
      "epoch": 0.5996631106120157,
      "grad_norm": 0.41691482067108154,
      "learning_rate": 9.700137636517884e-06,
      "loss": 0.4758,
      "step": 712
    },
    {
      "epoch": 0.6005053340819764,
      "grad_norm": 0.48247072100639343,
      "learning_rate": 9.698463103929542e-06,
      "loss": 0.435,
      "step": 713
    },
    {
      "epoch": 0.6013475575519371,
      "grad_norm": 0.4419487714767456,
      "learning_rate": 9.696784054128749e-06,
      "loss": 0.4664,
      "step": 714
    },
    {
      "epoch": 0.6021897810218978,
      "grad_norm": 0.42793411016464233,
      "learning_rate": 9.695100488729784e-06,
      "loss": 0.487,
      "step": 715
    },
    {
      "epoch": 0.6030320044918585,
      "grad_norm": 0.4619417190551758,
      "learning_rate": 9.693412409351264e-06,
      "loss": 0.4762,
      "step": 716
    },
    {
      "epoch": 0.6038742279618192,
      "grad_norm": 0.49830713868141174,
      "learning_rate": 9.691719817616148e-06,
      "loss": 0.4725,
      "step": 717
    },
    {
      "epoch": 0.6047164514317799,
      "grad_norm": 0.4128069579601288,
      "learning_rate": 9.690022715151734e-06,
      "loss": 0.4812,
      "step": 718
    },
    {
      "epoch": 0.6055586749017406,
      "grad_norm": 0.43731868267059326,
      "learning_rate": 9.688321103589659e-06,
      "loss": 0.4595,
      "step": 719
    },
    {
      "epoch": 0.6064008983717013,
      "grad_norm": 0.440223753452301,
      "learning_rate": 9.686614984565888e-06,
      "loss": 0.4362,
      "step": 720
    },
    {
      "epoch": 0.607243121841662,
      "grad_norm": 0.4079315960407257,
      "learning_rate": 9.684904359720724e-06,
      "loss": 0.4765,
      "step": 721
    },
    {
      "epoch": 0.6080853453116227,
      "grad_norm": 0.40085849165916443,
      "learning_rate": 9.683189230698804e-06,
      "loss": 0.4911,
      "step": 722
    },
    {
      "epoch": 0.6089275687815834,
      "grad_norm": 0.39583781361579895,
      "learning_rate": 9.681469599149093e-06,
      "loss": 0.4639,
      "step": 723
    },
    {
      "epoch": 0.609769792251544,
      "grad_norm": 0.4038863480091095,
      "learning_rate": 9.679745466724884e-06,
      "loss": 0.4515,
      "step": 724
    },
    {
      "epoch": 0.6106120157215048,
      "grad_norm": 0.433700829744339,
      "learning_rate": 9.678016835083798e-06,
      "loss": 0.473,
      "step": 725
    },
    {
      "epoch": 0.6114542391914655,
      "grad_norm": 0.43355733156204224,
      "learning_rate": 9.676283705887783e-06,
      "loss": 0.4356,
      "step": 726
    },
    {
      "epoch": 0.6122964626614261,
      "grad_norm": 0.4053589105606079,
      "learning_rate": 9.674546080803109e-06,
      "loss": 0.4592,
      "step": 727
    },
    {
      "epoch": 0.6131386861313869,
      "grad_norm": 0.43963006138801575,
      "learning_rate": 9.67280396150037e-06,
      "loss": 0.4779,
      "step": 728
    },
    {
      "epoch": 0.6139809096013475,
      "grad_norm": 0.43531569838523865,
      "learning_rate": 9.671057349654481e-06,
      "loss": 0.4657,
      "step": 729
    },
    {
      "epoch": 0.6148231330713082,
      "grad_norm": 0.3931327164173126,
      "learning_rate": 9.669306246944674e-06,
      "loss": 0.4615,
      "step": 730
    },
    {
      "epoch": 0.615665356541269,
      "grad_norm": 0.4679105877876282,
      "learning_rate": 9.6675506550545e-06,
      "loss": 0.4901,
      "step": 731
    },
    {
      "epoch": 0.6165075800112296,
      "grad_norm": 0.4076639711856842,
      "learning_rate": 9.66579057567183e-06,
      "loss": 0.4719,
      "step": 732
    },
    {
      "epoch": 0.6173498034811904,
      "grad_norm": 0.4001845121383667,
      "learning_rate": 9.66402601048884e-06,
      "loss": 0.4529,
      "step": 733
    },
    {
      "epoch": 0.618192026951151,
      "grad_norm": 0.44772279262542725,
      "learning_rate": 9.662256961202028e-06,
      "loss": 0.4625,
      "step": 734
    },
    {
      "epoch": 0.6190342504211117,
      "grad_norm": 0.4648893177509308,
      "learning_rate": 9.660483429512198e-06,
      "loss": 0.4642,
      "step": 735
    },
    {
      "epoch": 0.6198764738910725,
      "grad_norm": 0.48027050495147705,
      "learning_rate": 9.658705417124466e-06,
      "loss": 0.4538,
      "step": 736
    },
    {
      "epoch": 0.6207186973610331,
      "grad_norm": 0.4630196988582611,
      "learning_rate": 9.656922925748254e-06,
      "loss": 0.4692,
      "step": 737
    },
    {
      "epoch": 0.6215609208309938,
      "grad_norm": 0.4299001395702362,
      "learning_rate": 9.65513595709729e-06,
      "loss": 0.458,
      "step": 738
    },
    {
      "epoch": 0.6224031443009546,
      "grad_norm": 0.4649445116519928,
      "learning_rate": 9.653344512889608e-06,
      "loss": 0.4861,
      "step": 739
    },
    {
      "epoch": 0.6232453677709152,
      "grad_norm": 0.4226287305355072,
      "learning_rate": 9.651548594847546e-06,
      "loss": 0.4691,
      "step": 740
    },
    {
      "epoch": 0.6240875912408759,
      "grad_norm": 0.4448240101337433,
      "learning_rate": 9.649748204697741e-06,
      "loss": 0.461,
      "step": 741
    },
    {
      "epoch": 0.6249298147108366,
      "grad_norm": 0.5007437467575073,
      "learning_rate": 9.647943344171129e-06,
      "loss": 0.456,
      "step": 742
    },
    {
      "epoch": 0.6257720381807973,
      "grad_norm": 0.4428342282772064,
      "learning_rate": 9.646134015002946e-06,
      "loss": 0.4657,
      "step": 743
    },
    {
      "epoch": 0.626614261650758,
      "grad_norm": 0.4932103455066681,
      "learning_rate": 9.644320218932723e-06,
      "loss": 0.482,
      "step": 744
    },
    {
      "epoch": 0.6274564851207187,
      "grad_norm": 0.4722062349319458,
      "learning_rate": 9.642501957704287e-06,
      "loss": 0.44,
      "step": 745
    },
    {
      "epoch": 0.6282987085906794,
      "grad_norm": 0.42569971084594727,
      "learning_rate": 9.640679233065755e-06,
      "loss": 0.4652,
      "step": 746
    },
    {
      "epoch": 0.62914093206064,
      "grad_norm": 0.5427646636962891,
      "learning_rate": 9.63885204676954e-06,
      "loss": 0.4408,
      "step": 747
    },
    {
      "epoch": 0.6299831555306008,
      "grad_norm": 0.4014931917190552,
      "learning_rate": 9.637020400572339e-06,
      "loss": 0.4766,
      "step": 748
    },
    {
      "epoch": 0.6308253790005615,
      "grad_norm": 0.49479010701179504,
      "learning_rate": 9.63518429623514e-06,
      "loss": 0.4903,
      "step": 749
    },
    {
      "epoch": 0.6316676024705222,
      "grad_norm": 0.46152299642562866,
      "learning_rate": 9.63334373552322e-06,
      "loss": 0.469,
      "step": 750
    },
    {
      "epoch": 0.6325098259404829,
      "grad_norm": 0.429309606552124,
      "learning_rate": 9.631498720206132e-06,
      "loss": 0.4756,
      "step": 751
    },
    {
      "epoch": 0.6333520494104435,
      "grad_norm": 0.4430297911167145,
      "learning_rate": 9.62964925205772e-06,
      "loss": 0.4495,
      "step": 752
    },
    {
      "epoch": 0.6341942728804043,
      "grad_norm": 0.4491497576236725,
      "learning_rate": 9.627795332856107e-06,
      "loss": 0.4656,
      "step": 753
    },
    {
      "epoch": 0.635036496350365,
      "grad_norm": 0.3757000267505646,
      "learning_rate": 9.625936964383691e-06,
      "loss": 0.4461,
      "step": 754
    },
    {
      "epoch": 0.6358787198203256,
      "grad_norm": 0.4083307981491089,
      "learning_rate": 9.624074148427154e-06,
      "loss": 0.4696,
      "step": 755
    },
    {
      "epoch": 0.6367209432902864,
      "grad_norm": 0.43609675765037537,
      "learning_rate": 9.622206886777448e-06,
      "loss": 0.4485,
      "step": 756
    },
    {
      "epoch": 0.637563166760247,
      "grad_norm": 0.4111359417438507,
      "learning_rate": 9.620335181229805e-06,
      "loss": 0.4689,
      "step": 757
    },
    {
      "epoch": 0.6384053902302077,
      "grad_norm": 0.44706836342811584,
      "learning_rate": 9.618459033583725e-06,
      "loss": 0.472,
      "step": 758
    },
    {
      "epoch": 0.6392476137001685,
      "grad_norm": 0.4235398471355438,
      "learning_rate": 9.616578445642982e-06,
      "loss": 0.4485,
      "step": 759
    },
    {
      "epoch": 0.6400898371701291,
      "grad_norm": 0.44726794958114624,
      "learning_rate": 9.614693419215613e-06,
      "loss": 0.4672,
      "step": 760
    },
    {
      "epoch": 0.6409320606400898,
      "grad_norm": 0.4394606649875641,
      "learning_rate": 9.612803956113932e-06,
      "loss": 0.423,
      "step": 761
    },
    {
      "epoch": 0.6417742841100506,
      "grad_norm": 0.459474116563797,
      "learning_rate": 9.61091005815451e-06,
      "loss": 0.4648,
      "step": 762
    },
    {
      "epoch": 0.6426165075800112,
      "grad_norm": 0.471973180770874,
      "learning_rate": 9.609011727158184e-06,
      "loss": 0.4758,
      "step": 763
    },
    {
      "epoch": 0.643458731049972,
      "grad_norm": 0.3927355110645294,
      "learning_rate": 9.607108964950056e-06,
      "loss": 0.4283,
      "step": 764
    },
    {
      "epoch": 0.6443009545199326,
      "grad_norm": 0.5064581632614136,
      "learning_rate": 9.605201773359485e-06,
      "loss": 0.4599,
      "step": 765
    },
    {
      "epoch": 0.6451431779898933,
      "grad_norm": 0.383443146944046,
      "learning_rate": 9.603290154220091e-06,
      "loss": 0.4659,
      "step": 766
    },
    {
      "epoch": 0.6459854014598541,
      "grad_norm": 0.4793812930583954,
      "learning_rate": 9.601374109369746e-06,
      "loss": 0.4672,
      "step": 767
    },
    {
      "epoch": 0.6468276249298147,
      "grad_norm": 0.4801420569419861,
      "learning_rate": 9.599453640650585e-06,
      "loss": 0.4711,
      "step": 768
    },
    {
      "epoch": 0.6476698483997754,
      "grad_norm": 0.44653087854385376,
      "learning_rate": 9.59752874990899e-06,
      "loss": 0.4664,
      "step": 769
    },
    {
      "epoch": 0.6485120718697361,
      "grad_norm": 0.436435729265213,
      "learning_rate": 9.595599438995593e-06,
      "loss": 0.4808,
      "step": 770
    },
    {
      "epoch": 0.6493542953396968,
      "grad_norm": 0.4917363226413727,
      "learning_rate": 9.59366570976528e-06,
      "loss": 0.4717,
      "step": 771
    },
    {
      "epoch": 0.6501965188096575,
      "grad_norm": 0.49018749594688416,
      "learning_rate": 9.591727564077189e-06,
      "loss": 0.4633,
      "step": 772
    },
    {
      "epoch": 0.6510387422796182,
      "grad_norm": 0.44837015867233276,
      "learning_rate": 9.589785003794692e-06,
      "loss": 0.4728,
      "step": 773
    },
    {
      "epoch": 0.6518809657495789,
      "grad_norm": 0.4217755198478699,
      "learning_rate": 9.587838030785413e-06,
      "loss": 0.4637,
      "step": 774
    },
    {
      "epoch": 0.6527231892195395,
      "grad_norm": 0.4415951073169708,
      "learning_rate": 9.585886646921221e-06,
      "loss": 0.4523,
      "step": 775
    },
    {
      "epoch": 0.6535654126895003,
      "grad_norm": 0.468799889087677,
      "learning_rate": 9.583930854078219e-06,
      "loss": 0.4976,
      "step": 776
    },
    {
      "epoch": 0.654407636159461,
      "grad_norm": 0.41982370615005493,
      "learning_rate": 9.581970654136752e-06,
      "loss": 0.458,
      "step": 777
    },
    {
      "epoch": 0.6552498596294217,
      "grad_norm": 0.4833631217479706,
      "learning_rate": 9.580006048981403e-06,
      "loss": 0.446,
      "step": 778
    },
    {
      "epoch": 0.6560920830993824,
      "grad_norm": 0.42399242520332336,
      "learning_rate": 9.578037040500992e-06,
      "loss": 0.4677,
      "step": 779
    },
    {
      "epoch": 0.656934306569343,
      "grad_norm": 0.48352256417274475,
      "learning_rate": 9.576063630588563e-06,
      "loss": 0.4457,
      "step": 780
    },
    {
      "epoch": 0.6577765300393038,
      "grad_norm": 0.40869513154029846,
      "learning_rate": 9.574085821141406e-06,
      "loss": 0.4655,
      "step": 781
    },
    {
      "epoch": 0.6586187535092645,
      "grad_norm": 0.5032451748847961,
      "learning_rate": 9.572103614061029e-06,
      "loss": 0.4539,
      "step": 782
    },
    {
      "epoch": 0.6594609769792251,
      "grad_norm": 0.44678542017936707,
      "learning_rate": 9.570117011253173e-06,
      "loss": 0.4753,
      "step": 783
    },
    {
      "epoch": 0.6603032004491859,
      "grad_norm": 0.4785044193267822,
      "learning_rate": 9.568126014627805e-06,
      "loss": 0.4842,
      "step": 784
    },
    {
      "epoch": 0.6611454239191465,
      "grad_norm": 0.4557432234287262,
      "learning_rate": 9.566130626099118e-06,
      "loss": 0.4608,
      "step": 785
    },
    {
      "epoch": 0.6619876473891072,
      "grad_norm": 0.48894616961479187,
      "learning_rate": 9.56413084758552e-06,
      "loss": 0.4592,
      "step": 786
    },
    {
      "epoch": 0.662829870859068,
      "grad_norm": 0.4260677695274353,
      "learning_rate": 9.562126681009649e-06,
      "loss": 0.4421,
      "step": 787
    },
    {
      "epoch": 0.6636720943290286,
      "grad_norm": 0.49780043959617615,
      "learning_rate": 9.560118128298355e-06,
      "loss": 0.4906,
      "step": 788
    },
    {
      "epoch": 0.6645143177989893,
      "grad_norm": 0.44772857427597046,
      "learning_rate": 9.55810519138271e-06,
      "loss": 0.4707,
      "step": 789
    },
    {
      "epoch": 0.66535654126895,
      "grad_norm": 0.4370916187763214,
      "learning_rate": 9.556087872197997e-06,
      "loss": 0.4378,
      "step": 790
    },
    {
      "epoch": 0.6661987647389107,
      "grad_norm": 0.4832700788974762,
      "learning_rate": 9.554066172683715e-06,
      "loss": 0.4909,
      "step": 791
    },
    {
      "epoch": 0.6670409882088714,
      "grad_norm": 0.4568367004394531,
      "learning_rate": 9.552040094783575e-06,
      "loss": 0.4839,
      "step": 792
    },
    {
      "epoch": 0.6678832116788321,
      "grad_norm": 0.46530836820602417,
      "learning_rate": 9.550009640445492e-06,
      "loss": 0.4479,
      "step": 793
    },
    {
      "epoch": 0.6687254351487928,
      "grad_norm": 0.40158531069755554,
      "learning_rate": 9.547974811621594e-06,
      "loss": 0.4712,
      "step": 794
    },
    {
      "epoch": 0.6695676586187536,
      "grad_norm": 0.4917837977409363,
      "learning_rate": 9.545935610268213e-06,
      "loss": 0.4631,
      "step": 795
    },
    {
      "epoch": 0.6704098820887142,
      "grad_norm": 0.435910701751709,
      "learning_rate": 9.543892038345885e-06,
      "loss": 0.4577,
      "step": 796
    },
    {
      "epoch": 0.6712521055586749,
      "grad_norm": 0.41109734773635864,
      "learning_rate": 9.541844097819347e-06,
      "loss": 0.4622,
      "step": 797
    },
    {
      "epoch": 0.6720943290286356,
      "grad_norm": 0.4491436183452606,
      "learning_rate": 9.53979179065754e-06,
      "loss": 0.4769,
      "step": 798
    },
    {
      "epoch": 0.6729365524985963,
      "grad_norm": 0.41438376903533936,
      "learning_rate": 9.537735118833595e-06,
      "loss": 0.4613,
      "step": 799
    },
    {
      "epoch": 0.673778775968557,
      "grad_norm": 0.4075932204723358,
      "learning_rate": 9.53567408432485e-06,
      "loss": 0.4423,
      "step": 800
    },
    {
      "epoch": 0.6746209994385177,
      "grad_norm": 0.41781115531921387,
      "learning_rate": 9.533608689112827e-06,
      "loss": 0.4724,
      "step": 801
    },
    {
      "epoch": 0.6754632229084784,
      "grad_norm": 0.46994319558143616,
      "learning_rate": 9.531538935183252e-06,
      "loss": 0.4879,
      "step": 802
    },
    {
      "epoch": 0.676305446378439,
      "grad_norm": 0.4228660464286804,
      "learning_rate": 9.529464824526027e-06,
      "loss": 0.4744,
      "step": 803
    },
    {
      "epoch": 0.6771476698483998,
      "grad_norm": 0.45590391755104065,
      "learning_rate": 9.527386359135254e-06,
      "loss": 0.4786,
      "step": 804
    },
    {
      "epoch": 0.6779898933183605,
      "grad_norm": 0.3855375349521637,
      "learning_rate": 9.525303541009218e-06,
      "loss": 0.4486,
      "step": 805
    },
    {
      "epoch": 0.6788321167883211,
      "grad_norm": 0.4994388818740845,
      "learning_rate": 9.523216372150393e-06,
      "loss": 0.4943,
      "step": 806
    },
    {
      "epoch": 0.6796743402582819,
      "grad_norm": 0.4074350595474243,
      "learning_rate": 9.521124854565425e-06,
      "loss": 0.4799,
      "step": 807
    },
    {
      "epoch": 0.6805165637282425,
      "grad_norm": 0.4292895197868347,
      "learning_rate": 9.519028990265153e-06,
      "loss": 0.4907,
      "step": 808
    },
    {
      "epoch": 0.6813587871982033,
      "grad_norm": 0.4532092213630676,
      "learning_rate": 9.516928781264588e-06,
      "loss": 0.4651,
      "step": 809
    },
    {
      "epoch": 0.682201010668164,
      "grad_norm": 0.4018387496471405,
      "learning_rate": 9.514824229582922e-06,
      "loss": 0.467,
      "step": 810
    },
    {
      "epoch": 0.6830432341381246,
      "grad_norm": 0.37878182530403137,
      "learning_rate": 9.512715337243517e-06,
      "loss": 0.4689,
      "step": 811
    },
    {
      "epoch": 0.6838854576080854,
      "grad_norm": 0.47483229637145996,
      "learning_rate": 9.510602106273914e-06,
      "loss": 0.4597,
      "step": 812
    },
    {
      "epoch": 0.684727681078046,
      "grad_norm": 0.4345182180404663,
      "learning_rate": 9.508484538705823e-06,
      "loss": 0.4614,
      "step": 813
    },
    {
      "epoch": 0.6855699045480067,
      "grad_norm": 0.41564348340034485,
      "learning_rate": 9.506362636575122e-06,
      "loss": 0.4745,
      "step": 814
    },
    {
      "epoch": 0.6864121280179675,
      "grad_norm": 0.47813597321510315,
      "learning_rate": 9.504236401921856e-06,
      "loss": 0.4815,
      "step": 815
    },
    {
      "epoch": 0.6872543514879281,
      "grad_norm": 0.3826410472393036,
      "learning_rate": 9.50210583679024e-06,
      "loss": 0.4844,
      "step": 816
    },
    {
      "epoch": 0.6880965749578888,
      "grad_norm": 0.39698269963264465,
      "learning_rate": 9.499970943228646e-06,
      "loss": 0.4551,
      "step": 817
    },
    {
      "epoch": 0.6889387984278496,
      "grad_norm": 0.40185999870300293,
      "learning_rate": 9.497831723289615e-06,
      "loss": 0.4691,
      "step": 818
    },
    {
      "epoch": 0.6897810218978102,
      "grad_norm": 0.3988841474056244,
      "learning_rate": 9.495688179029838e-06,
      "loss": 0.4456,
      "step": 819
    },
    {
      "epoch": 0.6906232453677709,
      "grad_norm": 0.37957483530044556,
      "learning_rate": 9.493540312510173e-06,
      "loss": 0.4762,
      "step": 820
    },
    {
      "epoch": 0.6914654688377316,
      "grad_norm": 0.3653427064418793,
      "learning_rate": 9.491388125795623e-06,
      "loss": 0.4801,
      "step": 821
    },
    {
      "epoch": 0.6923076923076923,
      "grad_norm": 0.4166845977306366,
      "learning_rate": 9.48923162095536e-06,
      "loss": 0.4737,
      "step": 822
    },
    {
      "epoch": 0.6931499157776531,
      "grad_norm": 0.42433661222457886,
      "learning_rate": 9.487070800062689e-06,
      "loss": 0.4754,
      "step": 823
    },
    {
      "epoch": 0.6939921392476137,
      "grad_norm": 0.42195984721183777,
      "learning_rate": 9.48490566519508e-06,
      "loss": 0.4922,
      "step": 824
    },
    {
      "epoch": 0.6948343627175744,
      "grad_norm": 0.3958542048931122,
      "learning_rate": 9.482736218434144e-06,
      "loss": 0.4573,
      "step": 825
    },
    {
      "epoch": 0.6956765861875351,
      "grad_norm": 0.40321555733680725,
      "learning_rate": 9.480562461865634e-06,
      "loss": 0.435,
      "step": 826
    },
    {
      "epoch": 0.6965188096574958,
      "grad_norm": 0.3904893696308136,
      "learning_rate": 9.478384397579452e-06,
      "loss": 0.4616,
      "step": 827
    },
    {
      "epoch": 0.6973610331274565,
      "grad_norm": 0.4160102307796478,
      "learning_rate": 9.476202027669644e-06,
      "loss": 0.4719,
      "step": 828
    },
    {
      "epoch": 0.6982032565974172,
      "grad_norm": 0.3703167736530304,
      "learning_rate": 9.474015354234385e-06,
      "loss": 0.4722,
      "step": 829
    },
    {
      "epoch": 0.6990454800673779,
      "grad_norm": 0.42957794666290283,
      "learning_rate": 9.471824379375998e-06,
      "loss": 0.4772,
      "step": 830
    },
    {
      "epoch": 0.6998877035373385,
      "grad_norm": 0.3805701434612274,
      "learning_rate": 9.469629105200937e-06,
      "loss": 0.4418,
      "step": 831
    },
    {
      "epoch": 0.7007299270072993,
      "grad_norm": 0.41370725631713867,
      "learning_rate": 9.46742953381979e-06,
      "loss": 0.4553,
      "step": 832
    },
    {
      "epoch": 0.70157215047726,
      "grad_norm": 0.3840523958206177,
      "learning_rate": 9.465225667347275e-06,
      "loss": 0.4616,
      "step": 833
    },
    {
      "epoch": 0.7024143739472206,
      "grad_norm": 0.3980235457420349,
      "learning_rate": 9.463017507902245e-06,
      "loss": 0.4605,
      "step": 834
    },
    {
      "epoch": 0.7032565974171814,
      "grad_norm": 0.42854639887809753,
      "learning_rate": 9.460805057607671e-06,
      "loss": 0.4881,
      "step": 835
    },
    {
      "epoch": 0.704098820887142,
      "grad_norm": 0.38847824931144714,
      "learning_rate": 9.458588318590659e-06,
      "loss": 0.4306,
      "step": 836
    },
    {
      "epoch": 0.7049410443571027,
      "grad_norm": 0.4005127251148224,
      "learning_rate": 9.45636729298243e-06,
      "loss": 0.4514,
      "step": 837
    },
    {
      "epoch": 0.7057832678270635,
      "grad_norm": 0.4101458489894867,
      "learning_rate": 9.45414198291833e-06,
      "loss": 0.4628,
      "step": 838
    },
    {
      "epoch": 0.7066254912970241,
      "grad_norm": 0.3847208619117737,
      "learning_rate": 9.451912390537828e-06,
      "loss": 0.4631,
      "step": 839
    },
    {
      "epoch": 0.7074677147669849,
      "grad_norm": 0.4354744553565979,
      "learning_rate": 9.449678517984503e-06,
      "loss": 0.4516,
      "step": 840
    },
    {
      "epoch": 0.7083099382369455,
      "grad_norm": 0.3677626848220825,
      "learning_rate": 9.447440367406053e-06,
      "loss": 0.4481,
      "step": 841
    },
    {
      "epoch": 0.7091521617069062,
      "grad_norm": 0.43919214606285095,
      "learning_rate": 9.445197940954292e-06,
      "loss": 0.4667,
      "step": 842
    },
    {
      "epoch": 0.709994385176867,
      "grad_norm": 0.4164602756500244,
      "learning_rate": 9.442951240785135e-06,
      "loss": 0.4933,
      "step": 843
    },
    {
      "epoch": 0.7108366086468276,
      "grad_norm": 0.39373573660850525,
      "learning_rate": 9.440700269058617e-06,
      "loss": 0.4479,
      "step": 844
    },
    {
      "epoch": 0.7116788321167883,
      "grad_norm": 0.4473947584629059,
      "learning_rate": 9.438445027938873e-06,
      "loss": 0.4794,
      "step": 845
    },
    {
      "epoch": 0.712521055586749,
      "grad_norm": 0.4795666038990021,
      "learning_rate": 9.436185519594145e-06,
      "loss": 0.4629,
      "step": 846
    },
    {
      "epoch": 0.7133632790567097,
      "grad_norm": 0.3871944844722748,
      "learning_rate": 9.433921746196777e-06,
      "loss": 0.4772,
      "step": 847
    },
    {
      "epoch": 0.7142055025266704,
      "grad_norm": 0.41736915707588196,
      "learning_rate": 9.431653709923214e-06,
      "loss": 0.4923,
      "step": 848
    },
    {
      "epoch": 0.7150477259966311,
      "grad_norm": 0.41171911358833313,
      "learning_rate": 9.429381412954e-06,
      "loss": 0.4697,
      "step": 849
    },
    {
      "epoch": 0.7158899494665918,
      "grad_norm": 0.45949098467826843,
      "learning_rate": 9.427104857473773e-06,
      "loss": 0.4522,
      "step": 850
    },
    {
      "epoch": 0.7167321729365524,
      "grad_norm": 0.3666028082370758,
      "learning_rate": 9.424824045671267e-06,
      "loss": 0.4964,
      "step": 851
    },
    {
      "epoch": 0.7175743964065132,
      "grad_norm": 0.4497719705104828,
      "learning_rate": 9.422538979739307e-06,
      "loss": 0.4813,
      "step": 852
    },
    {
      "epoch": 0.7184166198764739,
      "grad_norm": 0.4401196241378784,
      "learning_rate": 9.420249661874812e-06,
      "loss": 0.4768,
      "step": 853
    },
    {
      "epoch": 0.7192588433464346,
      "grad_norm": 0.400215744972229,
      "learning_rate": 9.417956094278784e-06,
      "loss": 0.4383,
      "step": 854
    },
    {
      "epoch": 0.7201010668163953,
      "grad_norm": 0.41766366362571716,
      "learning_rate": 9.415658279156312e-06,
      "loss": 0.4707,
      "step": 855
    },
    {
      "epoch": 0.720943290286356,
      "grad_norm": 0.42597004771232605,
      "learning_rate": 9.41335621871657e-06,
      "loss": 0.4722,
      "step": 856
    },
    {
      "epoch": 0.7217855137563167,
      "grad_norm": 0.39556196331977844,
      "learning_rate": 9.41104991517281e-06,
      "loss": 0.4486,
      "step": 857
    },
    {
      "epoch": 0.7226277372262774,
      "grad_norm": 0.45901235938072205,
      "learning_rate": 9.408739370742372e-06,
      "loss": 0.4587,
      "step": 858
    },
    {
      "epoch": 0.723469960696238,
      "grad_norm": 0.43159759044647217,
      "learning_rate": 9.406424587646664e-06,
      "loss": 0.4743,
      "step": 859
    },
    {
      "epoch": 0.7243121841661988,
      "grad_norm": 0.4248340427875519,
      "learning_rate": 9.404105568111173e-06,
      "loss": 0.4609,
      "step": 860
    },
    {
      "epoch": 0.7251544076361595,
      "grad_norm": 0.45012742280960083,
      "learning_rate": 9.401782314365458e-06,
      "loss": 0.4466,
      "step": 861
    },
    {
      "epoch": 0.7259966311061201,
      "grad_norm": 0.3587284982204437,
      "learning_rate": 9.39945482864315e-06,
      "loss": 0.4723,
      "step": 862
    },
    {
      "epoch": 0.7268388545760809,
      "grad_norm": 0.4223197400569916,
      "learning_rate": 9.39712311318195e-06,
      "loss": 0.4613,
      "step": 863
    },
    {
      "epoch": 0.7276810780460415,
      "grad_norm": 0.3823569416999817,
      "learning_rate": 9.39478717022362e-06,
      "loss": 0.4649,
      "step": 864
    },
    {
      "epoch": 0.7285233015160022,
      "grad_norm": 0.40637442469596863,
      "learning_rate": 9.392447002013996e-06,
      "loss": 0.4523,
      "step": 865
    },
    {
      "epoch": 0.729365524985963,
      "grad_norm": 0.3997270464897156,
      "learning_rate": 9.390102610802965e-06,
      "loss": 0.4577,
      "step": 866
    },
    {
      "epoch": 0.7302077484559236,
      "grad_norm": 0.37728753685951233,
      "learning_rate": 9.387753998844482e-06,
      "loss": 0.4395,
      "step": 867
    },
    {
      "epoch": 0.7310499719258844,
      "grad_norm": 0.4092571437358856,
      "learning_rate": 9.385401168396558e-06,
      "loss": 0.4515,
      "step": 868
    },
    {
      "epoch": 0.731892195395845,
      "grad_norm": 0.3990737199783325,
      "learning_rate": 9.383044121721257e-06,
      "loss": 0.439,
      "step": 869
    },
    {
      "epoch": 0.7327344188658057,
      "grad_norm": 0.4278537631034851,
      "learning_rate": 9.380682861084703e-06,
      "loss": 0.4703,
      "step": 870
    },
    {
      "epoch": 0.7335766423357665,
      "grad_norm": 0.4430762827396393,
      "learning_rate": 9.378317388757062e-06,
      "loss": 0.464,
      "step": 871
    },
    {
      "epoch": 0.7344188658057271,
      "grad_norm": 0.4137156009674072,
      "learning_rate": 9.375947707012558e-06,
      "loss": 0.4292,
      "step": 872
    },
    {
      "epoch": 0.7352610892756878,
      "grad_norm": 0.4613038897514343,
      "learning_rate": 9.37357381812946e-06,
      "loss": 0.4579,
      "step": 873
    },
    {
      "epoch": 0.7361033127456486,
      "grad_norm": 0.4336634874343872,
      "learning_rate": 9.371195724390075e-06,
      "loss": 0.4413,
      "step": 874
    },
    {
      "epoch": 0.7369455362156092,
      "grad_norm": 0.4090328514575958,
      "learning_rate": 9.368813428080763e-06,
      "loss": 0.4639,
      "step": 875
    },
    {
      "epoch": 0.7377877596855699,
      "grad_norm": 0.4172559380531311,
      "learning_rate": 9.366426931491917e-06,
      "loss": 0.4448,
      "step": 876
    },
    {
      "epoch": 0.7386299831555306,
      "grad_norm": 0.4983735680580139,
      "learning_rate": 9.364036236917972e-06,
      "loss": 0.4791,
      "step": 877
    },
    {
      "epoch": 0.7394722066254913,
      "grad_norm": 0.4172636568546295,
      "learning_rate": 9.361641346657396e-06,
      "loss": 0.4629,
      "step": 878
    },
    {
      "epoch": 0.740314430095452,
      "grad_norm": 0.49173107743263245,
      "learning_rate": 9.359242263012693e-06,
      "loss": 0.4733,
      "step": 879
    },
    {
      "epoch": 0.7411566535654127,
      "grad_norm": 0.46523916721343994,
      "learning_rate": 9.356838988290401e-06,
      "loss": 0.459,
      "step": 880
    },
    {
      "epoch": 0.7419988770353734,
      "grad_norm": 0.4529389441013336,
      "learning_rate": 9.354431524801082e-06,
      "loss": 0.4805,
      "step": 881
    },
    {
      "epoch": 0.742841100505334,
      "grad_norm": 0.46101611852645874,
      "learning_rate": 9.352019874859326e-06,
      "loss": 0.4584,
      "step": 882
    },
    {
      "epoch": 0.7436833239752948,
      "grad_norm": 0.4039270579814911,
      "learning_rate": 9.349604040783754e-06,
      "loss": 0.4506,
      "step": 883
    },
    {
      "epoch": 0.7445255474452555,
      "grad_norm": 0.45948031544685364,
      "learning_rate": 9.347184024897003e-06,
      "loss": 0.4715,
      "step": 884
    },
    {
      "epoch": 0.7453677709152162,
      "grad_norm": 0.43411338329315186,
      "learning_rate": 9.344759829525734e-06,
      "loss": 0.4793,
      "step": 885
    },
    {
      "epoch": 0.7462099943851769,
      "grad_norm": 0.46341773867607117,
      "learning_rate": 9.342331457000621e-06,
      "loss": 0.4822,
      "step": 886
    },
    {
      "epoch": 0.7470522178551375,
      "grad_norm": 0.36343854665756226,
      "learning_rate": 9.339898909656364e-06,
      "loss": 0.4736,
      "step": 887
    },
    {
      "epoch": 0.7478944413250983,
      "grad_norm": 0.45923298597335815,
      "learning_rate": 9.33746218983167e-06,
      "loss": 0.4736,
      "step": 888
    },
    {
      "epoch": 0.748736664795059,
      "grad_norm": 0.43028751015663147,
      "learning_rate": 9.335021299869256e-06,
      "loss": 0.506,
      "step": 889
    },
    {
      "epoch": 0.7495788882650196,
      "grad_norm": 0.425796777009964,
      "learning_rate": 9.332576242115852e-06,
      "loss": 0.4468,
      "step": 890
    },
    {
      "epoch": 0.7504211117349804,
      "grad_norm": 0.37941044569015503,
      "learning_rate": 9.330127018922195e-06,
      "loss": 0.4455,
      "step": 891
    },
    {
      "epoch": 0.751263335204941,
      "grad_norm": 0.3719845712184906,
      "learning_rate": 9.327673632643021e-06,
      "loss": 0.4791,
      "step": 892
    },
    {
      "epoch": 0.7521055586749017,
      "grad_norm": 0.3852885365486145,
      "learning_rate": 9.32521608563708e-06,
      "loss": 0.4546,
      "step": 893
    },
    {
      "epoch": 0.7529477821448625,
      "grad_norm": 0.4158075451850891,
      "learning_rate": 9.32275438026711e-06,
      "loss": 0.4486,
      "step": 894
    },
    {
      "epoch": 0.7537900056148231,
      "grad_norm": 0.4150233566761017,
      "learning_rate": 9.320288518899853e-06,
      "loss": 0.4511,
      "step": 895
    },
    {
      "epoch": 0.7546322290847838,
      "grad_norm": 0.39781054854393005,
      "learning_rate": 9.317818503906046e-06,
      "loss": 0.4523,
      "step": 896
    },
    {
      "epoch": 0.7554744525547445,
      "grad_norm": 0.4574238061904907,
      "learning_rate": 9.315344337660422e-06,
      "loss": 0.4569,
      "step": 897
    },
    {
      "epoch": 0.7563166760247052,
      "grad_norm": 0.3971196711063385,
      "learning_rate": 9.312866022541697e-06,
      "loss": 0.4526,
      "step": 898
    },
    {
      "epoch": 0.757158899494666,
      "grad_norm": 0.332452654838562,
      "learning_rate": 9.310383560932587e-06,
      "loss": 0.452,
      "step": 899
    },
    {
      "epoch": 0.7580011229646266,
      "grad_norm": 0.39753177762031555,
      "learning_rate": 9.307896955219787e-06,
      "loss": 0.441,
      "step": 900
    },
    {
      "epoch": 0.7588433464345873,
      "grad_norm": 0.4081959128379822,
      "learning_rate": 9.305406207793974e-06,
      "loss": 0.4903,
      "step": 901
    },
    {
      "epoch": 0.759685569904548,
      "grad_norm": 0.40721839666366577,
      "learning_rate": 9.302911321049818e-06,
      "loss": 0.449,
      "step": 902
    },
    {
      "epoch": 0.7605277933745087,
      "grad_norm": 0.4468439221382141,
      "learning_rate": 9.300412297385954e-06,
      "loss": 0.4616,
      "step": 903
    },
    {
      "epoch": 0.7613700168444694,
      "grad_norm": 0.4262661635875702,
      "learning_rate": 9.297909139205005e-06,
      "loss": 0.4861,
      "step": 904
    },
    {
      "epoch": 0.7622122403144301,
      "grad_norm": 0.4479835629463196,
      "learning_rate": 9.295401848913569e-06,
      "loss": 0.4537,
      "step": 905
    },
    {
      "epoch": 0.7630544637843908,
      "grad_norm": 0.40566354990005493,
      "learning_rate": 9.29289042892221e-06,
      "loss": 0.4576,
      "step": 906
    },
    {
      "epoch": 0.7638966872543514,
      "grad_norm": 0.40772324800491333,
      "learning_rate": 9.290374881645465e-06,
      "loss": 0.4706,
      "step": 907
    },
    {
      "epoch": 0.7647389107243122,
      "grad_norm": 0.4057879149913788,
      "learning_rate": 9.287855209501844e-06,
      "loss": 0.4807,
      "step": 908
    },
    {
      "epoch": 0.7655811341942729,
      "grad_norm": 0.4381915032863617,
      "learning_rate": 9.285331414913816e-06,
      "loss": 0.4881,
      "step": 909
    },
    {
      "epoch": 0.7664233576642335,
      "grad_norm": 0.39544057846069336,
      "learning_rate": 9.282803500307818e-06,
      "loss": 0.4302,
      "step": 910
    },
    {
      "epoch": 0.7672655811341943,
      "grad_norm": 0.38724854588508606,
      "learning_rate": 9.280271468114243e-06,
      "loss": 0.4621,
      "step": 911
    },
    {
      "epoch": 0.768107804604155,
      "grad_norm": 0.4674939811229706,
      "learning_rate": 9.277735320767449e-06,
      "loss": 0.4799,
      "step": 912
    },
    {
      "epoch": 0.7689500280741156,
      "grad_norm": 0.4303678572177887,
      "learning_rate": 9.275195060705749e-06,
      "loss": 0.446,
      "step": 913
    },
    {
      "epoch": 0.7697922515440764,
      "grad_norm": 0.48434701561927795,
      "learning_rate": 9.272650690371403e-06,
      "loss": 0.4606,
      "step": 914
    },
    {
      "epoch": 0.770634475014037,
      "grad_norm": 0.4362103044986725,
      "learning_rate": 9.270102212210632e-06,
      "loss": 0.4602,
      "step": 915
    },
    {
      "epoch": 0.7714766984839978,
      "grad_norm": 0.40923336148262024,
      "learning_rate": 9.267549628673603e-06,
      "loss": 0.4653,
      "step": 916
    },
    {
      "epoch": 0.7723189219539585,
      "grad_norm": 0.42429372668266296,
      "learning_rate": 9.264992942214427e-06,
      "loss": 0.4815,
      "step": 917
    },
    {
      "epoch": 0.7731611454239191,
      "grad_norm": 0.36550015211105347,
      "learning_rate": 9.262432155291167e-06,
      "loss": 0.4416,
      "step": 918
    },
    {
      "epoch": 0.7740033688938799,
      "grad_norm": 0.39116111397743225,
      "learning_rate": 9.25986727036582e-06,
      "loss": 0.4354,
      "step": 919
    },
    {
      "epoch": 0.7748455923638405,
      "grad_norm": 0.41847333312034607,
      "learning_rate": 9.257298289904324e-06,
      "loss": 0.4419,
      "step": 920
    },
    {
      "epoch": 0.7756878158338012,
      "grad_norm": 0.41732004284858704,
      "learning_rate": 9.254725216376562e-06,
      "loss": 0.4563,
      "step": 921
    },
    {
      "epoch": 0.776530039303762,
      "grad_norm": 0.4943685233592987,
      "learning_rate": 9.252148052256343e-06,
      "loss": 0.4652,
      "step": 922
    },
    {
      "epoch": 0.7773722627737226,
      "grad_norm": 0.39155876636505127,
      "learning_rate": 9.249566800021417e-06,
      "loss": 0.4839,
      "step": 923
    },
    {
      "epoch": 0.7782144862436833,
      "grad_norm": 0.44800281524658203,
      "learning_rate": 9.246981462153456e-06,
      "loss": 0.504,
      "step": 924
    },
    {
      "epoch": 0.779056709713644,
      "grad_norm": 0.42213407158851624,
      "learning_rate": 9.244392041138068e-06,
      "loss": 0.4284,
      "step": 925
    },
    {
      "epoch": 0.7798989331836047,
      "grad_norm": 0.47037097811698914,
      "learning_rate": 9.24179853946478e-06,
      "loss": 0.4826,
      "step": 926
    },
    {
      "epoch": 0.7807411566535654,
      "grad_norm": 0.39834311604499817,
      "learning_rate": 9.239200959627048e-06,
      "loss": 0.4498,
      "step": 927
    },
    {
      "epoch": 0.7815833801235261,
      "grad_norm": 0.3979833126068115,
      "learning_rate": 9.236599304122246e-06,
      "loss": 0.4465,
      "step": 928
    },
    {
      "epoch": 0.7824256035934868,
      "grad_norm": 0.4140772223472595,
      "learning_rate": 9.233993575451663e-06,
      "loss": 0.4603,
      "step": 929
    },
    {
      "epoch": 0.7832678270634476,
      "grad_norm": 0.3961040675640106,
      "learning_rate": 9.231383776120512e-06,
      "loss": 0.4544,
      "step": 930
    },
    {
      "epoch": 0.7841100505334082,
      "grad_norm": 0.41685524582862854,
      "learning_rate": 9.228769908637912e-06,
      "loss": 0.4812,
      "step": 931
    },
    {
      "epoch": 0.7849522740033689,
      "grad_norm": 0.4022108018398285,
      "learning_rate": 9.226151975516897e-06,
      "loss": 0.476,
      "step": 932
    },
    {
      "epoch": 0.7857944974733296,
      "grad_norm": 0.41138097643852234,
      "learning_rate": 9.223529979274411e-06,
      "loss": 0.4571,
      "step": 933
    },
    {
      "epoch": 0.7866367209432903,
      "grad_norm": 0.35879647731781006,
      "learning_rate": 9.220903922431302e-06,
      "loss": 0.4623,
      "step": 934
    },
    {
      "epoch": 0.787478944413251,
      "grad_norm": 0.4125921130180359,
      "learning_rate": 9.218273807512318e-06,
      "loss": 0.4783,
      "step": 935
    },
    {
      "epoch": 0.7883211678832117,
      "grad_norm": 0.3944634795188904,
      "learning_rate": 9.215639637046121e-06,
      "loss": 0.4609,
      "step": 936
    },
    {
      "epoch": 0.7891633913531724,
      "grad_norm": 0.35888588428497314,
      "learning_rate": 9.213001413565259e-06,
      "loss": 0.4734,
      "step": 937
    },
    {
      "epoch": 0.790005614823133,
      "grad_norm": 0.4384128153324127,
      "learning_rate": 9.210359139606183e-06,
      "loss": 0.462,
      "step": 938
    },
    {
      "epoch": 0.7908478382930938,
      "grad_norm": 0.4521356225013733,
      "learning_rate": 9.207712817709237e-06,
      "loss": 0.4561,
      "step": 939
    },
    {
      "epoch": 0.7916900617630545,
      "grad_norm": 0.4022276699542999,
      "learning_rate": 9.205062450418655e-06,
      "loss": 0.4348,
      "step": 940
    },
    {
      "epoch": 0.7925322852330151,
      "grad_norm": 0.42767754197120667,
      "learning_rate": 9.202408040282567e-06,
      "loss": 0.431,
      "step": 941
    },
    {
      "epoch": 0.7933745087029759,
      "grad_norm": 0.4460948407649994,
      "learning_rate": 9.19974958985298e-06,
      "loss": 0.4636,
      "step": 942
    },
    {
      "epoch": 0.7942167321729365,
      "grad_norm": 0.396727979183197,
      "learning_rate": 9.197087101685794e-06,
      "loss": 0.4612,
      "step": 943
    },
    {
      "epoch": 0.7950589556428973,
      "grad_norm": 0.4548892676830292,
      "learning_rate": 9.194420578340785e-06,
      "loss": 0.4629,
      "step": 944
    },
    {
      "epoch": 0.795901179112858,
      "grad_norm": 0.40557631850242615,
      "learning_rate": 9.191750022381613e-06,
      "loss": 0.4597,
      "step": 945
    },
    {
      "epoch": 0.7967434025828186,
      "grad_norm": 0.41193124651908875,
      "learning_rate": 9.189075436375813e-06,
      "loss": 0.4896,
      "step": 946
    },
    {
      "epoch": 0.7975856260527794,
      "grad_norm": 0.4075430929660797,
      "learning_rate": 9.186396822894792e-06,
      "loss": 0.4567,
      "step": 947
    },
    {
      "epoch": 0.79842784952274,
      "grad_norm": 0.39221125841140747,
      "learning_rate": 9.183714184513832e-06,
      "loss": 0.4432,
      "step": 948
    },
    {
      "epoch": 0.7992700729927007,
      "grad_norm": 0.39167267084121704,
      "learning_rate": 9.181027523812088e-06,
      "loss": 0.4499,
      "step": 949
    },
    {
      "epoch": 0.8001122964626615,
      "grad_norm": 0.41120946407318115,
      "learning_rate": 9.178336843372576e-06,
      "loss": 0.4605,
      "step": 950
    },
    {
      "epoch": 0.8009545199326221,
      "grad_norm": 0.3553692102432251,
      "learning_rate": 9.175642145782179e-06,
      "loss": 0.4546,
      "step": 951
    },
    {
      "epoch": 0.8017967434025828,
      "grad_norm": 0.4419460594654083,
      "learning_rate": 9.172943433631642e-06,
      "loss": 0.4632,
      "step": 952
    },
    {
      "epoch": 0.8026389668725435,
      "grad_norm": 0.40987521409988403,
      "learning_rate": 9.170240709515573e-06,
      "loss": 0.4632,
      "step": 953
    },
    {
      "epoch": 0.8034811903425042,
      "grad_norm": 0.41108250617980957,
      "learning_rate": 9.16753397603243e-06,
      "loss": 0.4584,
      "step": 954
    },
    {
      "epoch": 0.8043234138124649,
      "grad_norm": 0.4647172689437866,
      "learning_rate": 9.164823235784535e-06,
      "loss": 0.4623,
      "step": 955
    },
    {
      "epoch": 0.8051656372824256,
      "grad_norm": 0.48335736989974976,
      "learning_rate": 9.162108491378051e-06,
      "loss": 0.4864,
      "step": 956
    },
    {
      "epoch": 0.8060078607523863,
      "grad_norm": 0.4012032151222229,
      "learning_rate": 9.159389745423003e-06,
      "loss": 0.4678,
      "step": 957
    },
    {
      "epoch": 0.8068500842223469,
      "grad_norm": 0.4807227551937103,
      "learning_rate": 9.156667000533251e-06,
      "loss": 0.4567,
      "step": 958
    },
    {
      "epoch": 0.8076923076923077,
      "grad_norm": 0.4906511902809143,
      "learning_rate": 9.153940259326511e-06,
      "loss": 0.4812,
      "step": 959
    },
    {
      "epoch": 0.8085345311622684,
      "grad_norm": 0.40440821647644043,
      "learning_rate": 9.151209524424333e-06,
      "loss": 0.4348,
      "step": 960
    },
    {
      "epoch": 0.8093767546322291,
      "grad_norm": 0.4814048409461975,
      "learning_rate": 9.14847479845211e-06,
      "loss": 0.485,
      "step": 961
    },
    {
      "epoch": 0.8102189781021898,
      "grad_norm": 0.40049949288368225,
      "learning_rate": 9.145736084039073e-06,
      "loss": 0.4651,
      "step": 962
    },
    {
      "epoch": 0.8110612015721504,
      "grad_norm": 0.4023774266242981,
      "learning_rate": 9.142993383818284e-06,
      "loss": 0.456,
      "step": 963
    },
    {
      "epoch": 0.8119034250421112,
      "grad_norm": 0.4292305111885071,
      "learning_rate": 9.14024670042664e-06,
      "loss": 0.5059,
      "step": 964
    },
    {
      "epoch": 0.8127456485120719,
      "grad_norm": 0.39510244131088257,
      "learning_rate": 9.137496036504868e-06,
      "loss": 0.4392,
      "step": 965
    },
    {
      "epoch": 0.8135878719820325,
      "grad_norm": 0.42599359154701233,
      "learning_rate": 9.134741394697517e-06,
      "loss": 0.4452,
      "step": 966
    },
    {
      "epoch": 0.8144300954519933,
      "grad_norm": 0.39583104848861694,
      "learning_rate": 9.131982777652967e-06,
      "loss": 0.4643,
      "step": 967
    },
    {
      "epoch": 0.815272318921954,
      "grad_norm": 0.3690897524356842,
      "learning_rate": 9.129220188023419e-06,
      "loss": 0.4633,
      "step": 968
    },
    {
      "epoch": 0.8161145423919146,
      "grad_norm": 0.38295769691467285,
      "learning_rate": 9.126453628464889e-06,
      "loss": 0.4517,
      "step": 969
    },
    {
      "epoch": 0.8169567658618754,
      "grad_norm": 0.4499504864215851,
      "learning_rate": 9.12368310163721e-06,
      "loss": 0.4727,
      "step": 970
    },
    {
      "epoch": 0.817798989331836,
      "grad_norm": 0.4269801378250122,
      "learning_rate": 9.120908610204036e-06,
      "loss": 0.4808,
      "step": 971
    },
    {
      "epoch": 0.8186412128017967,
      "grad_norm": 0.39932724833488464,
      "learning_rate": 9.118130156832823e-06,
      "loss": 0.4537,
      "step": 972
    },
    {
      "epoch": 0.8194834362717575,
      "grad_norm": 0.450878769159317,
      "learning_rate": 9.115347744194844e-06,
      "loss": 0.4689,
      "step": 973
    },
    {
      "epoch": 0.8203256597417181,
      "grad_norm": 0.41747966408729553,
      "learning_rate": 9.112561374965177e-06,
      "loss": 0.4755,
      "step": 974
    },
    {
      "epoch": 0.8211678832116789,
      "grad_norm": 0.39759355783462524,
      "learning_rate": 9.109771051822702e-06,
      "loss": 0.4296,
      "step": 975
    },
    {
      "epoch": 0.8220101066816395,
      "grad_norm": 0.41313496232032776,
      "learning_rate": 9.106976777450099e-06,
      "loss": 0.4735,
      "step": 976
    },
    {
      "epoch": 0.8228523301516002,
      "grad_norm": 0.4374113380908966,
      "learning_rate": 9.10417855453385e-06,
      "loss": 0.4563,
      "step": 977
    },
    {
      "epoch": 0.823694553621561,
      "grad_norm": 0.4076191186904907,
      "learning_rate": 9.10137638576423e-06,
      "loss": 0.4728,
      "step": 978
    },
    {
      "epoch": 0.8245367770915216,
      "grad_norm": 0.3480476438999176,
      "learning_rate": 9.098570273835314e-06,
      "loss": 0.4551,
      "step": 979
    },
    {
      "epoch": 0.8253790005614823,
      "grad_norm": 0.3971334993839264,
      "learning_rate": 9.09576022144496e-06,
      "loss": 0.4508,
      "step": 980
    },
    {
      "epoch": 0.826221224031443,
      "grad_norm": 0.4569026827812195,
      "learning_rate": 9.09294623129482e-06,
      "loss": 0.4262,
      "step": 981
    },
    {
      "epoch": 0.8270634475014037,
      "grad_norm": 0.4090694785118103,
      "learning_rate": 9.090128306090329e-06,
      "loss": 0.4741,
      "step": 982
    },
    {
      "epoch": 0.8279056709713644,
      "grad_norm": 0.4151981770992279,
      "learning_rate": 9.087306448540707e-06,
      "loss": 0.4702,
      "step": 983
    },
    {
      "epoch": 0.8287478944413251,
      "grad_norm": 0.3829687237739563,
      "learning_rate": 9.084480661358954e-06,
      "loss": 0.4469,
      "step": 984
    },
    {
      "epoch": 0.8295901179112858,
      "grad_norm": 0.3807797133922577,
      "learning_rate": 9.081650947261847e-06,
      "loss": 0.4291,
      "step": 985
    },
    {
      "epoch": 0.8304323413812464,
      "grad_norm": 0.39150989055633545,
      "learning_rate": 9.07881730896994e-06,
      "loss": 0.4624,
      "step": 986
    },
    {
      "epoch": 0.8312745648512072,
      "grad_norm": 0.37471431493759155,
      "learning_rate": 9.07597974920756e-06,
      "loss": 0.4663,
      "step": 987
    },
    {
      "epoch": 0.8321167883211679,
      "grad_norm": 0.40285855531692505,
      "learning_rate": 9.073138270702804e-06,
      "loss": 0.4484,
      "step": 988
    },
    {
      "epoch": 0.8329590117911286,
      "grad_norm": 0.37800702452659607,
      "learning_rate": 9.070292876187532e-06,
      "loss": 0.4404,
      "step": 989
    },
    {
      "epoch": 0.8338012352610893,
      "grad_norm": 0.4276757538318634,
      "learning_rate": 9.067443568397378e-06,
      "loss": 0.4802,
      "step": 990
    },
    {
      "epoch": 0.83464345873105,
      "grad_norm": 0.42309343814849854,
      "learning_rate": 9.06459035007173e-06,
      "loss": 0.4784,
      "step": 991
    },
    {
      "epoch": 0.8354856822010107,
      "grad_norm": 0.3896193206310272,
      "learning_rate": 9.061733223953738e-06,
      "loss": 0.4536,
      "step": 992
    },
    {
      "epoch": 0.8363279056709714,
      "grad_norm": 0.33523300290107727,
      "learning_rate": 9.058872192790314e-06,
      "loss": 0.4466,
      "step": 993
    },
    {
      "epoch": 0.837170129140932,
      "grad_norm": 0.37372490763664246,
      "learning_rate": 9.056007259332115e-06,
      "loss": 0.4878,
      "step": 994
    },
    {
      "epoch": 0.8380123526108928,
      "grad_norm": 0.3746437430381775,
      "learning_rate": 9.053138426333562e-06,
      "loss": 0.4747,
      "step": 995
    },
    {
      "epoch": 0.8388545760808535,
      "grad_norm": 0.43201062083244324,
      "learning_rate": 9.05026569655281e-06,
      "loss": 0.4644,
      "step": 996
    },
    {
      "epoch": 0.8396967995508141,
      "grad_norm": 0.4160325825214386,
      "learning_rate": 9.047389072751777e-06,
      "loss": 0.4374,
      "step": 997
    },
    {
      "epoch": 0.8405390230207749,
      "grad_norm": 0.4333237111568451,
      "learning_rate": 9.044508557696111e-06,
      "loss": 0.4411,
      "step": 998
    },
    {
      "epoch": 0.8413812464907355,
      "grad_norm": 0.4789202809333801,
      "learning_rate": 9.041624154155208e-06,
      "loss": 0.4718,
      "step": 999
    },
    {
      "epoch": 0.8422234699606962,
      "grad_norm": 0.48608896136283875,
      "learning_rate": 9.038735864902201e-06,
      "loss": 0.4535,
      "step": 1000
    },
    {
      "epoch": 0.843065693430657,
      "grad_norm": 0.4236295521259308,
      "learning_rate": 9.035843692713961e-06,
      "loss": 0.4647,
      "step": 1001
    },
    {
      "epoch": 0.8439079169006176,
      "grad_norm": 0.42146241664886475,
      "learning_rate": 9.032947640371086e-06,
      "loss": 0.4644,
      "step": 1002
    },
    {
      "epoch": 0.8447501403705783,
      "grad_norm": 0.40867170691490173,
      "learning_rate": 9.030047710657912e-06,
      "loss": 0.4585,
      "step": 1003
    },
    {
      "epoch": 0.845592363840539,
      "grad_norm": 0.4693366587162018,
      "learning_rate": 9.027143906362499e-06,
      "loss": 0.4493,
      "step": 1004
    },
    {
      "epoch": 0.8464345873104997,
      "grad_norm": 0.36974403262138367,
      "learning_rate": 9.02423623027663e-06,
      "loss": 0.4508,
      "step": 1005
    },
    {
      "epoch": 0.8472768107804605,
      "grad_norm": 0.38446521759033203,
      "learning_rate": 9.021324685195814e-06,
      "loss": 0.4599,
      "step": 1006
    },
    {
      "epoch": 0.8481190342504211,
      "grad_norm": 0.39433109760284424,
      "learning_rate": 9.018409273919279e-06,
      "loss": 0.4437,
      "step": 1007
    },
    {
      "epoch": 0.8489612577203818,
      "grad_norm": 0.388526976108551,
      "learning_rate": 9.01548999924997e-06,
      "loss": 0.4721,
      "step": 1008
    },
    {
      "epoch": 0.8498034811903425,
      "grad_norm": 0.379374623298645,
      "learning_rate": 9.012566863994548e-06,
      "loss": 0.4686,
      "step": 1009
    },
    {
      "epoch": 0.8506457046603032,
      "grad_norm": 0.424532413482666,
      "learning_rate": 9.00963987096338e-06,
      "loss": 0.466,
      "step": 1010
    },
    {
      "epoch": 0.8514879281302639,
      "grad_norm": 0.41608524322509766,
      "learning_rate": 9.006709022970547e-06,
      "loss": 0.4789,
      "step": 1011
    },
    {
      "epoch": 0.8523301516002246,
      "grad_norm": 0.40420427918434143,
      "learning_rate": 9.003774322833835e-06,
      "loss": 0.467,
      "step": 1012
    },
    {
      "epoch": 0.8531723750701853,
      "grad_norm": 0.41449832916259766,
      "learning_rate": 9.000835773374733e-06,
      "loss": 0.4666,
      "step": 1013
    },
    {
      "epoch": 0.8540145985401459,
      "grad_norm": 0.4272162616252899,
      "learning_rate": 8.997893377418432e-06,
      "loss": 0.4702,
      "step": 1014
    },
    {
      "epoch": 0.8548568220101067,
      "grad_norm": 0.41371074318885803,
      "learning_rate": 8.99494713779382e-06,
      "loss": 0.4379,
      "step": 1015
    },
    {
      "epoch": 0.8556990454800674,
      "grad_norm": 0.38848748803138733,
      "learning_rate": 8.991997057333481e-06,
      "loss": 0.4479,
      "step": 1016
    },
    {
      "epoch": 0.856541268950028,
      "grad_norm": 0.4220898449420929,
      "learning_rate": 8.98904313887369e-06,
      "loss": 0.4671,
      "step": 1017
    },
    {
      "epoch": 0.8573834924199888,
      "grad_norm": 0.45767658948898315,
      "learning_rate": 8.986085385254417e-06,
      "loss": 0.5026,
      "step": 1018
    },
    {
      "epoch": 0.8582257158899494,
      "grad_norm": 0.3832434117794037,
      "learning_rate": 8.983123799319312e-06,
      "loss": 0.4699,
      "step": 1019
    },
    {
      "epoch": 0.8590679393599102,
      "grad_norm": 0.38593629002571106,
      "learning_rate": 8.980158383915714e-06,
      "loss": 0.4542,
      "step": 1020
    },
    {
      "epoch": 0.8599101628298709,
      "grad_norm": 0.43415534496307373,
      "learning_rate": 8.977189141894645e-06,
      "loss": 0.4983,
      "step": 1021
    },
    {
      "epoch": 0.8607523862998315,
      "grad_norm": 0.4050995111465454,
      "learning_rate": 8.9742160761108e-06,
      "loss": 0.446,
      "step": 1022
    },
    {
      "epoch": 0.8615946097697923,
      "grad_norm": 0.38833823800086975,
      "learning_rate": 8.971239189422555e-06,
      "loss": 0.4393,
      "step": 1023
    },
    {
      "epoch": 0.862436833239753,
      "grad_norm": 0.3762548863887787,
      "learning_rate": 8.968258484691961e-06,
      "loss": 0.4738,
      "step": 1024
    },
    {
      "epoch": 0.8632790567097136,
      "grad_norm": 0.4080943763256073,
      "learning_rate": 8.965273964784735e-06,
      "loss": 0.4466,
      "step": 1025
    },
    {
      "epoch": 0.8641212801796744,
      "grad_norm": 0.4030582010746002,
      "learning_rate": 8.962285632570266e-06,
      "loss": 0.4612,
      "step": 1026
    },
    {
      "epoch": 0.864963503649635,
      "grad_norm": 0.40284284949302673,
      "learning_rate": 8.959293490921606e-06,
      "loss": 0.4563,
      "step": 1027
    },
    {
      "epoch": 0.8658057271195957,
      "grad_norm": 0.3684934079647064,
      "learning_rate": 8.956297542715469e-06,
      "loss": 0.4586,
      "step": 1028
    },
    {
      "epoch": 0.8666479505895565,
      "grad_norm": 0.47546106576919556,
      "learning_rate": 8.953297790832231e-06,
      "loss": 0.4695,
      "step": 1029
    },
    {
      "epoch": 0.8674901740595171,
      "grad_norm": 0.35507622361183167,
      "learning_rate": 8.950294238155924e-06,
      "loss": 0.4482,
      "step": 1030
    },
    {
      "epoch": 0.8683323975294778,
      "grad_norm": 0.3462589383125305,
      "learning_rate": 8.947286887574234e-06,
      "loss": 0.4704,
      "step": 1031
    },
    {
      "epoch": 0.8691746209994385,
      "grad_norm": 0.38888853788375854,
      "learning_rate": 8.944275741978495e-06,
      "loss": 0.4641,
      "step": 1032
    },
    {
      "epoch": 0.8700168444693992,
      "grad_norm": 0.41295167803764343,
      "learning_rate": 8.941260804263697e-06,
      "loss": 0.5019,
      "step": 1033
    },
    {
      "epoch": 0.87085906793936,
      "grad_norm": 0.40598276257514954,
      "learning_rate": 8.938242077328469e-06,
      "loss": 0.4794,
      "step": 1034
    },
    {
      "epoch": 0.8717012914093206,
      "grad_norm": 0.4022899866104126,
      "learning_rate": 8.935219564075087e-06,
      "loss": 0.4529,
      "step": 1035
    },
    {
      "epoch": 0.8725435148792813,
      "grad_norm": 0.40545353293418884,
      "learning_rate": 8.932193267409465e-06,
      "loss": 0.4626,
      "step": 1036
    },
    {
      "epoch": 0.873385738349242,
      "grad_norm": 0.3948993384838104,
      "learning_rate": 8.929163190241157e-06,
      "loss": 0.4526,
      "step": 1037
    },
    {
      "epoch": 0.8742279618192027,
      "grad_norm": 0.4164413809776306,
      "learning_rate": 8.92612933548335e-06,
      "loss": 0.4499,
      "step": 1038
    },
    {
      "epoch": 0.8750701852891634,
      "grad_norm": 0.3526324927806854,
      "learning_rate": 8.923091706052863e-06,
      "loss": 0.4312,
      "step": 1039
    },
    {
      "epoch": 0.8759124087591241,
      "grad_norm": 0.3945640027523041,
      "learning_rate": 8.920050304870142e-06,
      "loss": 0.4494,
      "step": 1040
    },
    {
      "epoch": 0.8767546322290848,
      "grad_norm": 0.39838308095932007,
      "learning_rate": 8.917005134859263e-06,
      "loss": 0.4554,
      "step": 1041
    },
    {
      "epoch": 0.8775968556990454,
      "grad_norm": 0.36966654658317566,
      "learning_rate": 8.913956198947923e-06,
      "loss": 0.444,
      "step": 1042
    },
    {
      "epoch": 0.8784390791690062,
      "grad_norm": 0.43054866790771484,
      "learning_rate": 8.910903500067443e-06,
      "loss": 0.4591,
      "step": 1043
    },
    {
      "epoch": 0.8792813026389669,
      "grad_norm": 0.4573085606098175,
      "learning_rate": 8.907847041152757e-06,
      "loss": 0.458,
      "step": 1044
    },
    {
      "epoch": 0.8801235261089275,
      "grad_norm": 0.41495639085769653,
      "learning_rate": 8.904786825142416e-06,
      "loss": 0.4396,
      "step": 1045
    },
    {
      "epoch": 0.8809657495788883,
      "grad_norm": 0.43662676215171814,
      "learning_rate": 8.901722854978582e-06,
      "loss": 0.4724,
      "step": 1046
    },
    {
      "epoch": 0.881807973048849,
      "grad_norm": 0.4008462727069855,
      "learning_rate": 8.89865513360703e-06,
      "loss": 0.4529,
      "step": 1047
    },
    {
      "epoch": 0.8826501965188096,
      "grad_norm": 0.4239480793476105,
      "learning_rate": 8.89558366397714e-06,
      "loss": 0.4453,
      "step": 1048
    },
    {
      "epoch": 0.8834924199887704,
      "grad_norm": 0.4037556052207947,
      "learning_rate": 8.892508449041893e-06,
      "loss": 0.4632,
      "step": 1049
    },
    {
      "epoch": 0.884334643458731,
      "grad_norm": 0.3921174705028534,
      "learning_rate": 8.889429491757872e-06,
      "loss": 0.4653,
      "step": 1050
    },
    {
      "epoch": 0.8851768669286918,
      "grad_norm": 0.42982763051986694,
      "learning_rate": 8.88634679508526e-06,
      "loss": 0.4576,
      "step": 1051
    },
    {
      "epoch": 0.8860190903986525,
      "grad_norm": 0.3991256356239319,
      "learning_rate": 8.883260361987833e-06,
      "loss": 0.4496,
      "step": 1052
    },
    {
      "epoch": 0.8868613138686131,
      "grad_norm": 0.4120168089866638,
      "learning_rate": 8.88017019543296e-06,
      "loss": 0.4525,
      "step": 1053
    },
    {
      "epoch": 0.8877035373385739,
      "grad_norm": 0.43512123823165894,
      "learning_rate": 8.8770762983916e-06,
      "loss": 0.4938,
      "step": 1054
    },
    {
      "epoch": 0.8885457608085345,
      "grad_norm": 0.4584600627422333,
      "learning_rate": 8.8739786738383e-06,
      "loss": 0.4608,
      "step": 1055
    },
    {
      "epoch": 0.8893879842784952,
      "grad_norm": 0.44547486305236816,
      "learning_rate": 8.870877324751186e-06,
      "loss": 0.4787,
      "step": 1056
    },
    {
      "epoch": 0.890230207748456,
      "grad_norm": 0.3823145925998688,
      "learning_rate": 8.867772254111966e-06,
      "loss": 0.4345,
      "step": 1057
    },
    {
      "epoch": 0.8910724312184166,
      "grad_norm": 0.41858601570129395,
      "learning_rate": 8.864663464905933e-06,
      "loss": 0.4492,
      "step": 1058
    },
    {
      "epoch": 0.8919146546883773,
      "grad_norm": 0.4234440326690674,
      "learning_rate": 8.861550960121946e-06,
      "loss": 0.4524,
      "step": 1059
    },
    {
      "epoch": 0.892756878158338,
      "grad_norm": 0.39730560779571533,
      "learning_rate": 8.85843474275244e-06,
      "loss": 0.4785,
      "step": 1060
    },
    {
      "epoch": 0.8935991016282987,
      "grad_norm": 0.39776748418807983,
      "learning_rate": 8.85531481579342e-06,
      "loss": 0.4665,
      "step": 1061
    },
    {
      "epoch": 0.8944413250982594,
      "grad_norm": 0.3698873221874237,
      "learning_rate": 8.852191182244456e-06,
      "loss": 0.4577,
      "step": 1062
    },
    {
      "epoch": 0.8952835485682201,
      "grad_norm": 0.3293462097644806,
      "learning_rate": 8.849063845108685e-06,
      "loss": 0.4463,
      "step": 1063
    },
    {
      "epoch": 0.8961257720381808,
      "grad_norm": 0.43715614080429077,
      "learning_rate": 8.8459328073928e-06,
      "loss": 0.4337,
      "step": 1064
    },
    {
      "epoch": 0.8969679955081415,
      "grad_norm": 0.39426177740097046,
      "learning_rate": 8.842798072107055e-06,
      "loss": 0.4526,
      "step": 1065
    },
    {
      "epoch": 0.8978102189781022,
      "grad_norm": 0.3865719437599182,
      "learning_rate": 8.839659642265259e-06,
      "loss": 0.4508,
      "step": 1066
    },
    {
      "epoch": 0.8986524424480629,
      "grad_norm": 0.3680262565612793,
      "learning_rate": 8.836517520884768e-06,
      "loss": 0.4285,
      "step": 1067
    },
    {
      "epoch": 0.8994946659180236,
      "grad_norm": 0.3942987024784088,
      "learning_rate": 8.833371710986493e-06,
      "loss": 0.4395,
      "step": 1068
    },
    {
      "epoch": 0.9003368893879843,
      "grad_norm": 0.38288164138793945,
      "learning_rate": 8.83022221559489e-06,
      "loss": 0.4642,
      "step": 1069
    },
    {
      "epoch": 0.9011791128579449,
      "grad_norm": 0.38613536953926086,
      "learning_rate": 8.827069037737958e-06,
      "loss": 0.463,
      "step": 1070
    },
    {
      "epoch": 0.9020213363279057,
      "grad_norm": 0.369083434343338,
      "learning_rate": 8.823912180447237e-06,
      "loss": 0.4321,
      "step": 1071
    },
    {
      "epoch": 0.9028635597978664,
      "grad_norm": 0.4185395836830139,
      "learning_rate": 8.820751646757798e-06,
      "loss": 0.4657,
      "step": 1072
    },
    {
      "epoch": 0.903705783267827,
      "grad_norm": 0.37915700674057007,
      "learning_rate": 8.81758743970826e-06,
      "loss": 0.464,
      "step": 1073
    },
    {
      "epoch": 0.9045480067377878,
      "grad_norm": 0.41680216789245605,
      "learning_rate": 8.81441956234076e-06,
      "loss": 0.4925,
      "step": 1074
    },
    {
      "epoch": 0.9053902302077484,
      "grad_norm": 0.4257161617279053,
      "learning_rate": 8.81124801770097e-06,
      "loss": 0.4749,
      "step": 1075
    },
    {
      "epoch": 0.9062324536777091,
      "grad_norm": 0.4024163782596588,
      "learning_rate": 8.80807280883809e-06,
      "loss": 0.457,
      "step": 1076
    },
    {
      "epoch": 0.9070746771476699,
      "grad_norm": 0.4715385138988495,
      "learning_rate": 8.804893938804839e-06,
      "loss": 0.4533,
      "step": 1077
    },
    {
      "epoch": 0.9079169006176305,
      "grad_norm": 0.39870283007621765,
      "learning_rate": 8.801711410657456e-06,
      "loss": 0.4756,
      "step": 1078
    },
    {
      "epoch": 0.9087591240875912,
      "grad_norm": 0.4305819571018219,
      "learning_rate": 8.7985252274557e-06,
      "loss": 0.4522,
      "step": 1079
    },
    {
      "epoch": 0.909601347557552,
      "grad_norm": 0.4091406762599945,
      "learning_rate": 8.795335392262841e-06,
      "loss": 0.4584,
      "step": 1080
    },
    {
      "epoch": 0.9104435710275126,
      "grad_norm": 0.4165360629558563,
      "learning_rate": 8.79214190814566e-06,
      "loss": 0.4448,
      "step": 1081
    },
    {
      "epoch": 0.9112857944974734,
      "grad_norm": 0.3934219181537628,
      "learning_rate": 8.78894477817445e-06,
      "loss": 0.4291,
      "step": 1082
    },
    {
      "epoch": 0.912128017967434,
      "grad_norm": 0.41224405169487,
      "learning_rate": 8.785744005423003e-06,
      "loss": 0.4538,
      "step": 1083
    },
    {
      "epoch": 0.9129702414373947,
      "grad_norm": 0.5300390124320984,
      "learning_rate": 8.78253959296862e-06,
      "loss": 0.4475,
      "step": 1084
    },
    {
      "epoch": 0.9138124649073555,
      "grad_norm": 0.4256507456302643,
      "learning_rate": 8.779331543892097e-06,
      "loss": 0.4592,
      "step": 1085
    },
    {
      "epoch": 0.9146546883773161,
      "grad_norm": 0.41994404792785645,
      "learning_rate": 8.77611986127773e-06,
      "loss": 0.449,
      "step": 1086
    },
    {
      "epoch": 0.9154969118472768,
      "grad_norm": 0.4447322189807892,
      "learning_rate": 8.772904548213301e-06,
      "loss": 0.4419,
      "step": 1087
    },
    {
      "epoch": 0.9163391353172375,
      "grad_norm": 0.4261287748813629,
      "learning_rate": 8.769685607790091e-06,
      "loss": 0.4663,
      "step": 1088
    },
    {
      "epoch": 0.9171813587871982,
      "grad_norm": 0.42361849546432495,
      "learning_rate": 8.766463043102864e-06,
      "loss": 0.4523,
      "step": 1089
    },
    {
      "epoch": 0.9180235822571589,
      "grad_norm": 0.4180759787559509,
      "learning_rate": 8.76323685724987e-06,
      "loss": 0.4437,
      "step": 1090
    },
    {
      "epoch": 0.9188658057271196,
      "grad_norm": 0.45390963554382324,
      "learning_rate": 8.760007053332837e-06,
      "loss": 0.4619,
      "step": 1091
    },
    {
      "epoch": 0.9197080291970803,
      "grad_norm": 0.41265130043029785,
      "learning_rate": 8.756773634456975e-06,
      "loss": 0.4731,
      "step": 1092
    },
    {
      "epoch": 0.9205502526670409,
      "grad_norm": 0.471906453371048,
      "learning_rate": 8.75353660373097e-06,
      "loss": 0.4764,
      "step": 1093
    },
    {
      "epoch": 0.9213924761370017,
      "grad_norm": 0.4105421304702759,
      "learning_rate": 8.750295964266979e-06,
      "loss": 0.4648,
      "step": 1094
    },
    {
      "epoch": 0.9222346996069624,
      "grad_norm": 0.4072258770465851,
      "learning_rate": 8.747051719180626e-06,
      "loss": 0.4801,
      "step": 1095
    },
    {
      "epoch": 0.9230769230769231,
      "grad_norm": 0.4603798985481262,
      "learning_rate": 8.743803871591008e-06,
      "loss": 0.4638,
      "step": 1096
    },
    {
      "epoch": 0.9239191465468838,
      "grad_norm": 0.41771745681762695,
      "learning_rate": 8.740552424620679e-06,
      "loss": 0.4539,
      "step": 1097
    },
    {
      "epoch": 0.9247613700168444,
      "grad_norm": 0.4121268689632416,
      "learning_rate": 8.737297381395657e-06,
      "loss": 0.4544,
      "step": 1098
    },
    {
      "epoch": 0.9256035934868052,
      "grad_norm": 0.3926859200000763,
      "learning_rate": 8.734038745045419e-06,
      "loss": 0.4546,
      "step": 1099
    },
    {
      "epoch": 0.9264458169567659,
      "grad_norm": 0.371877521276474,
      "learning_rate": 8.730776518702891e-06,
      "loss": 0.4515,
      "step": 1100
    },
    {
      "epoch": 0.9272880404267265,
      "grad_norm": 0.3774634301662445,
      "learning_rate": 8.727510705504453e-06,
      "loss": 0.4422,
      "step": 1101
    },
    {
      "epoch": 0.9281302638966873,
      "grad_norm": 0.3900839686393738,
      "learning_rate": 8.72424130858994e-06,
      "loss": 0.485,
      "step": 1102
    },
    {
      "epoch": 0.928972487366648,
      "grad_norm": 0.3436884582042694,
      "learning_rate": 8.72096833110262e-06,
      "loss": 0.4292,
      "step": 1103
    },
    {
      "epoch": 0.9298147108366086,
      "grad_norm": 0.3713052570819855,
      "learning_rate": 8.717691776189214e-06,
      "loss": 0.4601,
      "step": 1104
    },
    {
      "epoch": 0.9306569343065694,
      "grad_norm": 0.3754946291446686,
      "learning_rate": 8.714411646999878e-06,
      "loss": 0.4357,
      "step": 1105
    },
    {
      "epoch": 0.93149915777653,
      "grad_norm": 0.3695276379585266,
      "learning_rate": 8.711127946688207e-06,
      "loss": 0.4765,
      "step": 1106
    },
    {
      "epoch": 0.9323413812464907,
      "grad_norm": 0.37371736764907837,
      "learning_rate": 8.707840678411223e-06,
      "loss": 0.45,
      "step": 1107
    },
    {
      "epoch": 0.9331836047164515,
      "grad_norm": 0.36904841661453247,
      "learning_rate": 8.704549845329386e-06,
      "loss": 0.4567,
      "step": 1108
    },
    {
      "epoch": 0.9340258281864121,
      "grad_norm": 0.3684450685977936,
      "learning_rate": 8.701255450606579e-06,
      "loss": 0.4395,
      "step": 1109
    },
    {
      "epoch": 0.9348680516563729,
      "grad_norm": 0.37789633870124817,
      "learning_rate": 8.69795749741011e-06,
      "loss": 0.4806,
      "step": 1110
    },
    {
      "epoch": 0.9357102751263335,
      "grad_norm": 0.4207715392112732,
      "learning_rate": 8.694655988910707e-06,
      "loss": 0.488,
      "step": 1111
    },
    {
      "epoch": 0.9365524985962942,
      "grad_norm": 0.35959914326667786,
      "learning_rate": 8.69135092828252e-06,
      "loss": 0.4566,
      "step": 1112
    },
    {
      "epoch": 0.937394722066255,
      "grad_norm": 0.40567758679389954,
      "learning_rate": 8.688042318703111e-06,
      "loss": 0.4551,
      "step": 1113
    },
    {
      "epoch": 0.9382369455362156,
      "grad_norm": 0.4115789234638214,
      "learning_rate": 8.684730163353457e-06,
      "loss": 0.4406,
      "step": 1114
    },
    {
      "epoch": 0.9390791690061763,
      "grad_norm": 0.37650081515312195,
      "learning_rate": 8.681414465417936e-06,
      "loss": 0.4455,
      "step": 1115
    },
    {
      "epoch": 0.939921392476137,
      "grad_norm": 0.4170813262462616,
      "learning_rate": 8.678095228084343e-06,
      "loss": 0.4806,
      "step": 1116
    },
    {
      "epoch": 0.9407636159460977,
      "grad_norm": 0.36706608533859253,
      "learning_rate": 8.674772454543869e-06,
      "loss": 0.473,
      "step": 1117
    },
    {
      "epoch": 0.9416058394160584,
      "grad_norm": 0.3832426369190216,
      "learning_rate": 8.671446147991103e-06,
      "loss": 0.4241,
      "step": 1118
    },
    {
      "epoch": 0.9424480628860191,
      "grad_norm": 0.47756582498550415,
      "learning_rate": 8.66811631162404e-06,
      "loss": 0.4554,
      "step": 1119
    },
    {
      "epoch": 0.9432902863559798,
      "grad_norm": 0.39938387274742126,
      "learning_rate": 8.664782948644058e-06,
      "loss": 0.4737,
      "step": 1120
    },
    {
      "epoch": 0.9441325098259404,
      "grad_norm": 0.3752983510494232,
      "learning_rate": 8.661446062255931e-06,
      "loss": 0.4487,
      "step": 1121
    },
    {
      "epoch": 0.9449747332959012,
      "grad_norm": 0.4420255720615387,
      "learning_rate": 8.65810565566782e-06,
      "loss": 0.4434,
      "step": 1122
    },
    {
      "epoch": 0.9458169567658619,
      "grad_norm": 0.4701543152332306,
      "learning_rate": 8.654761732091271e-06,
      "loss": 0.4776,
      "step": 1123
    },
    {
      "epoch": 0.9466591802358225,
      "grad_norm": 0.39837703108787537,
      "learning_rate": 8.65141429474121e-06,
      "loss": 0.4489,
      "step": 1124
    },
    {
      "epoch": 0.9475014037057833,
      "grad_norm": 0.4727233350276947,
      "learning_rate": 8.648063346835943e-06,
      "loss": 0.4667,
      "step": 1125
    },
    {
      "epoch": 0.9483436271757439,
      "grad_norm": 0.46260952949523926,
      "learning_rate": 8.644708891597147e-06,
      "loss": 0.4506,
      "step": 1126
    },
    {
      "epoch": 0.9491858506457047,
      "grad_norm": 0.36124905943870544,
      "learning_rate": 8.641350932249876e-06,
      "loss": 0.4431,
      "step": 1127
    },
    {
      "epoch": 0.9500280741156654,
      "grad_norm": 0.45104044675827026,
      "learning_rate": 8.637989472022548e-06,
      "loss": 0.4562,
      "step": 1128
    },
    {
      "epoch": 0.950870297585626,
      "grad_norm": 0.3556206226348877,
      "learning_rate": 8.634624514146954e-06,
      "loss": 0.4586,
      "step": 1129
    },
    {
      "epoch": 0.9517125210555868,
      "grad_norm": 0.39410850405693054,
      "learning_rate": 8.631256061858238e-06,
      "loss": 0.4349,
      "step": 1130
    },
    {
      "epoch": 0.9525547445255474,
      "grad_norm": 0.37242305278778076,
      "learning_rate": 8.627884118394913e-06,
      "loss": 0.4795,
      "step": 1131
    },
    {
      "epoch": 0.9533969679955081,
      "grad_norm": 0.3766724169254303,
      "learning_rate": 8.624508686998846e-06,
      "loss": 0.4573,
      "step": 1132
    },
    {
      "epoch": 0.9542391914654689,
      "grad_norm": 0.37467536330223083,
      "learning_rate": 8.621129770915248e-06,
      "loss": 0.4197,
      "step": 1133
    },
    {
      "epoch": 0.9550814149354295,
      "grad_norm": 0.4079817831516266,
      "learning_rate": 8.617747373392697e-06,
      "loss": 0.4435,
      "step": 1134
    },
    {
      "epoch": 0.9559236384053902,
      "grad_norm": 0.35279250144958496,
      "learning_rate": 8.614361497683102e-06,
      "loss": 0.4396,
      "step": 1135
    },
    {
      "epoch": 0.956765861875351,
      "grad_norm": 0.3883397579193115,
      "learning_rate": 8.61097214704173e-06,
      "loss": 0.4647,
      "step": 1136
    },
    {
      "epoch": 0.9576080853453116,
      "grad_norm": 0.4372005760669708,
      "learning_rate": 8.607579324727175e-06,
      "loss": 0.4681,
      "step": 1137
    },
    {
      "epoch": 0.9584503088152723,
      "grad_norm": 0.40236788988113403,
      "learning_rate": 8.60418303400138e-06,
      "loss": 0.458,
      "step": 1138
    },
    {
      "epoch": 0.959292532285233,
      "grad_norm": 0.406724750995636,
      "learning_rate": 8.600783278129617e-06,
      "loss": 0.4797,
      "step": 1139
    },
    {
      "epoch": 0.9601347557551937,
      "grad_norm": 0.40580326318740845,
      "learning_rate": 8.597380060380493e-06,
      "loss": 0.4795,
      "step": 1140
    },
    {
      "epoch": 0.9609769792251545,
      "grad_norm": 0.44473904371261597,
      "learning_rate": 8.59397338402594e-06,
      "loss": 0.4345,
      "step": 1141
    },
    {
      "epoch": 0.9618192026951151,
      "grad_norm": 0.3798663318157196,
      "learning_rate": 8.590563252341216e-06,
      "loss": 0.4319,
      "step": 1142
    },
    {
      "epoch": 0.9626614261650758,
      "grad_norm": 0.3430033028125763,
      "learning_rate": 8.5871496686049e-06,
      "loss": 0.4205,
      "step": 1143
    },
    {
      "epoch": 0.9635036496350365,
      "grad_norm": 0.4199627935886383,
      "learning_rate": 8.583732636098895e-06,
      "loss": 0.4685,
      "step": 1144
    },
    {
      "epoch": 0.9643458731049972,
      "grad_norm": 0.4430454969406128,
      "learning_rate": 8.580312158108413e-06,
      "loss": 0.4429,
      "step": 1145
    },
    {
      "epoch": 0.9651880965749579,
      "grad_norm": 0.3590855300426483,
      "learning_rate": 8.576888237921983e-06,
      "loss": 0.4325,
      "step": 1146
    },
    {
      "epoch": 0.9660303200449186,
      "grad_norm": 0.4192885160446167,
      "learning_rate": 8.57346087883144e-06,
      "loss": 0.4432,
      "step": 1147
    },
    {
      "epoch": 0.9668725435148793,
      "grad_norm": 0.36155906319618225,
      "learning_rate": 8.570030084131933e-06,
      "loss": 0.456,
      "step": 1148
    },
    {
      "epoch": 0.9677147669848399,
      "grad_norm": 0.36353155970573425,
      "learning_rate": 8.566595857121902e-06,
      "loss": 0.4558,
      "step": 1149
    },
    {
      "epoch": 0.9685569904548007,
      "grad_norm": 0.391216903924942,
      "learning_rate": 8.563158201103096e-06,
      "loss": 0.4544,
      "step": 1150
    },
    {
      "epoch": 0.9693992139247614,
      "grad_norm": 0.4157159626483917,
      "learning_rate": 8.559717119380558e-06,
      "loss": 0.455,
      "step": 1151
    },
    {
      "epoch": 0.970241437394722,
      "grad_norm": 0.4182007908821106,
      "learning_rate": 8.556272615262623e-06,
      "loss": 0.4353,
      "step": 1152
    },
    {
      "epoch": 0.9710836608646828,
      "grad_norm": 0.37532564997673035,
      "learning_rate": 8.55282469206092e-06,
      "loss": 0.4354,
      "step": 1153
    },
    {
      "epoch": 0.9719258843346434,
      "grad_norm": 0.38201865553855896,
      "learning_rate": 8.549373353090362e-06,
      "loss": 0.4407,
      "step": 1154
    },
    {
      "epoch": 0.9727681078046042,
      "grad_norm": 0.41272106766700745,
      "learning_rate": 8.545918601669147e-06,
      "loss": 0.4236,
      "step": 1155
    },
    {
      "epoch": 0.9736103312745649,
      "grad_norm": 0.419068843126297,
      "learning_rate": 8.542460441118756e-06,
      "loss": 0.4669,
      "step": 1156
    },
    {
      "epoch": 0.9744525547445255,
      "grad_norm": 0.3942708969116211,
      "learning_rate": 8.538998874763942e-06,
      "loss": 0.4786,
      "step": 1157
    },
    {
      "epoch": 0.9752947782144863,
      "grad_norm": 0.4102627635002136,
      "learning_rate": 8.535533905932739e-06,
      "loss": 0.4635,
      "step": 1158
    },
    {
      "epoch": 0.976137001684447,
      "grad_norm": 0.37368085980415344,
      "learning_rate": 8.532065537956446e-06,
      "loss": 0.4466,
      "step": 1159
    },
    {
      "epoch": 0.9769792251544076,
      "grad_norm": 0.43076467514038086,
      "learning_rate": 8.528593774169637e-06,
      "loss": 0.4746,
      "step": 1160
    },
    {
      "epoch": 0.9778214486243684,
      "grad_norm": 0.3629462718963623,
      "learning_rate": 8.525118617910144e-06,
      "loss": 0.4524,
      "step": 1161
    },
    {
      "epoch": 0.978663672094329,
      "grad_norm": 0.42330315709114075,
      "learning_rate": 8.521640072519066e-06,
      "loss": 0.4476,
      "step": 1162
    },
    {
      "epoch": 0.9795058955642897,
      "grad_norm": 0.43509650230407715,
      "learning_rate": 8.518158141340755e-06,
      "loss": 0.4731,
      "step": 1163
    },
    {
      "epoch": 0.9803481190342505,
      "grad_norm": 0.3956202268600464,
      "learning_rate": 8.514672827722824e-06,
      "loss": 0.4534,
      "step": 1164
    },
    {
      "epoch": 0.9811903425042111,
      "grad_norm": 0.39042437076568604,
      "learning_rate": 8.511184135016134e-06,
      "loss": 0.4584,
      "step": 1165
    },
    {
      "epoch": 0.9820325659741718,
      "grad_norm": 0.3678750991821289,
      "learning_rate": 8.507692066574795e-06,
      "loss": 0.4438,
      "step": 1166
    },
    {
      "epoch": 0.9828747894441325,
      "grad_norm": 0.3873535990715027,
      "learning_rate": 8.504196625756166e-06,
      "loss": 0.449,
      "step": 1167
    },
    {
      "epoch": 0.9837170129140932,
      "grad_norm": 0.41667866706848145,
      "learning_rate": 8.500697815920843e-06,
      "loss": 0.4479,
      "step": 1168
    },
    {
      "epoch": 0.9845592363840538,
      "grad_norm": 0.38302674889564514,
      "learning_rate": 8.497195640432664e-06,
      "loss": 0.441,
      "step": 1169
    },
    {
      "epoch": 0.9854014598540146,
      "grad_norm": 0.400989830493927,
      "learning_rate": 8.493690102658703e-06,
      "loss": 0.4746,
      "step": 1170
    },
    {
      "epoch": 0.9862436833239753,
      "grad_norm": 0.44950640201568604,
      "learning_rate": 8.490181205969268e-06,
      "loss": 0.4813,
      "step": 1171
    },
    {
      "epoch": 0.987085906793936,
      "grad_norm": 0.385809987783432,
      "learning_rate": 8.486668953737891e-06,
      "loss": 0.4575,
      "step": 1172
    },
    {
      "epoch": 0.9879281302638967,
      "grad_norm": 0.3932558596134186,
      "learning_rate": 8.483153349341336e-06,
      "loss": 0.473,
      "step": 1173
    },
    {
      "epoch": 0.9887703537338574,
      "grad_norm": 0.36911508440971375,
      "learning_rate": 8.479634396159587e-06,
      "loss": 0.4721,
      "step": 1174
    },
    {
      "epoch": 0.9896125772038181,
      "grad_norm": 0.3659111559391022,
      "learning_rate": 8.476112097575845e-06,
      "loss": 0.4434,
      "step": 1175
    },
    {
      "epoch": 0.9904548006737788,
      "grad_norm": 0.4058414101600647,
      "learning_rate": 8.472586456976534e-06,
      "loss": 0.4344,
      "step": 1176
    },
    {
      "epoch": 0.9912970241437394,
      "grad_norm": 0.37387171387672424,
      "learning_rate": 8.46905747775129e-06,
      "loss": 0.4526,
      "step": 1177
    },
    {
      "epoch": 0.9921392476137002,
      "grad_norm": 0.3868712782859802,
      "learning_rate": 8.465525163292948e-06,
      "loss": 0.4563,
      "step": 1178
    },
    {
      "epoch": 0.9929814710836609,
      "grad_norm": 0.3896998167037964,
      "learning_rate": 8.461989516997565e-06,
      "loss": 0.4735,
      "step": 1179
    },
    {
      "epoch": 0.9938236945536215,
      "grad_norm": 0.36210012435913086,
      "learning_rate": 8.458450542264391e-06,
      "loss": 0.4668,
      "step": 1180
    },
    {
      "epoch": 0.9946659180235823,
      "grad_norm": 0.3653891980648041,
      "learning_rate": 8.45490824249588e-06,
      "loss": 0.444,
      "step": 1181
    },
    {
      "epoch": 0.9955081414935429,
      "grad_norm": 0.3270474970340729,
      "learning_rate": 8.45136262109768e-06,
      "loss": 0.4252,
      "step": 1182
    },
    {
      "epoch": 0.9963503649635036,
      "grad_norm": 0.4117894768714905,
      "learning_rate": 8.447813681478638e-06,
      "loss": 0.4697,
      "step": 1183
    },
    {
      "epoch": 0.9971925884334644,
      "grad_norm": 0.34525200724601746,
      "learning_rate": 8.444261427050786e-06,
      "loss": 0.4757,
      "step": 1184
    },
    {
      "epoch": 0.998034811903425,
      "grad_norm": 0.36067771911621094,
      "learning_rate": 8.440705861229344e-06,
      "loss": 0.4396,
      "step": 1185
    },
    {
      "epoch": 0.9988770353733858,
      "grad_norm": 0.411179780960083,
      "learning_rate": 8.437146987432717e-06,
      "loss": 0.4651,
      "step": 1186
    },
    {
      "epoch": 0.9997192588433464,
      "grad_norm": 0.4003877341747284,
      "learning_rate": 8.43358480908249e-06,
      "loss": 0.4313,
      "step": 1187
    },
    {
      "epoch": 1.000561482313307,
      "grad_norm": 0.6488905549049377,
      "learning_rate": 8.430019329603423e-06,
      "loss": 0.7031,
      "step": 1188
    },
    {
      "epoch": 1.0014037057832679,
      "grad_norm": 0.47764989733695984,
      "learning_rate": 8.426450552423451e-06,
      "loss": 0.4329,
      "step": 1189
    },
    {
      "epoch": 1.0022459292532284,
      "grad_norm": 0.4527723789215088,
      "learning_rate": 8.422878480973681e-06,
      "loss": 0.4049,
      "step": 1190
    },
    {
      "epoch": 1.0030881527231892,
      "grad_norm": 0.3832196593284607,
      "learning_rate": 8.41930311868839e-06,
      "loss": 0.4293,
      "step": 1191
    },
    {
      "epoch": 1.00393037619315,
      "grad_norm": 0.40137726068496704,
      "learning_rate": 8.41572446900501e-06,
      "loss": 0.4507,
      "step": 1192
    },
    {
      "epoch": 1.0047725996631107,
      "grad_norm": 0.39267903566360474,
      "learning_rate": 8.412142535364139e-06,
      "loss": 0.3669,
      "step": 1193
    },
    {
      "epoch": 1.0056148231330713,
      "grad_norm": 0.38908353447914124,
      "learning_rate": 8.408557321209534e-06,
      "loss": 0.4299,
      "step": 1194
    },
    {
      "epoch": 1.006457046603032,
      "grad_norm": 0.41140037775039673,
      "learning_rate": 8.404968829988102e-06,
      "loss": 0.4435,
      "step": 1195
    },
    {
      "epoch": 1.0072992700729928,
      "grad_norm": 0.40239667892456055,
      "learning_rate": 8.401377065149904e-06,
      "loss": 0.4135,
      "step": 1196
    },
    {
      "epoch": 1.0081414935429533,
      "grad_norm": 0.3912915885448456,
      "learning_rate": 8.397782030148147e-06,
      "loss": 0.4351,
      "step": 1197
    },
    {
      "epoch": 1.0089837170129141,
      "grad_norm": 0.36936837434768677,
      "learning_rate": 8.39418372843918e-06,
      "loss": 0.3913,
      "step": 1198
    },
    {
      "epoch": 1.0098259404828749,
      "grad_norm": 0.40344229340553284,
      "learning_rate": 8.390582163482497e-06,
      "loss": 0.4113,
      "step": 1199
    },
    {
      "epoch": 1.0106681639528354,
      "grad_norm": 0.36977246403694153,
      "learning_rate": 8.386977338740724e-06,
      "loss": 0.4135,
      "step": 1200
    },
    {
      "epoch": 1.0115103874227962,
      "grad_norm": 0.4381195902824402,
      "learning_rate": 8.383369257679625e-06,
      "loss": 0.4688,
      "step": 1201
    },
    {
      "epoch": 1.012352610892757,
      "grad_norm": 0.3668336570262909,
      "learning_rate": 8.379757923768094e-06,
      "loss": 0.3949,
      "step": 1202
    },
    {
      "epoch": 1.0131948343627175,
      "grad_norm": 0.42817869782447815,
      "learning_rate": 8.376143340478153e-06,
      "loss": 0.4511,
      "step": 1203
    },
    {
      "epoch": 1.0140370578326783,
      "grad_norm": 0.41879692673683167,
      "learning_rate": 8.372525511284945e-06,
      "loss": 0.4295,
      "step": 1204
    },
    {
      "epoch": 1.014879281302639,
      "grad_norm": 0.41177067160606384,
      "learning_rate": 8.368904439666739e-06,
      "loss": 0.4242,
      "step": 1205
    },
    {
      "epoch": 1.0157215047725996,
      "grad_norm": 0.3899869918823242,
      "learning_rate": 8.365280129104912e-06,
      "loss": 0.411,
      "step": 1206
    },
    {
      "epoch": 1.0165637282425604,
      "grad_norm": 0.36402463912963867,
      "learning_rate": 8.361652583083968e-06,
      "loss": 0.3896,
      "step": 1207
    },
    {
      "epoch": 1.0174059517125211,
      "grad_norm": 0.4431062638759613,
      "learning_rate": 8.358021805091509e-06,
      "loss": 0.4212,
      "step": 1208
    },
    {
      "epoch": 1.0182481751824817,
      "grad_norm": 0.40490418672561646,
      "learning_rate": 8.354387798618254e-06,
      "loss": 0.4067,
      "step": 1209
    },
    {
      "epoch": 1.0190903986524424,
      "grad_norm": 0.3966013789176941,
      "learning_rate": 8.35075056715802e-06,
      "loss": 0.4046,
      "step": 1210
    },
    {
      "epoch": 1.0199326221224032,
      "grad_norm": 0.46774059534072876,
      "learning_rate": 8.347110114207727e-06,
      "loss": 0.468,
      "step": 1211
    },
    {
      "epoch": 1.0207748455923638,
      "grad_norm": 0.3611885905265808,
      "learning_rate": 8.34346644326739e-06,
      "loss": 0.3872,
      "step": 1212
    },
    {
      "epoch": 1.0216170690623245,
      "grad_norm": 0.467472642660141,
      "learning_rate": 8.339819557840124e-06,
      "loss": 0.4614,
      "step": 1213
    },
    {
      "epoch": 1.0224592925322853,
      "grad_norm": 0.39163926243782043,
      "learning_rate": 8.336169461432125e-06,
      "loss": 0.4209,
      "step": 1214
    },
    {
      "epoch": 1.0233015160022458,
      "grad_norm": 0.41308629512786865,
      "learning_rate": 8.332516157552684e-06,
      "loss": 0.4688,
      "step": 1215
    },
    {
      "epoch": 1.0241437394722066,
      "grad_norm": 0.3505336046218872,
      "learning_rate": 8.328859649714171e-06,
      "loss": 0.3947,
      "step": 1216
    },
    {
      "epoch": 1.0249859629421674,
      "grad_norm": 0.41953250765800476,
      "learning_rate": 8.32519994143204e-06,
      "loss": 0.4585,
      "step": 1217
    },
    {
      "epoch": 1.025828186412128,
      "grad_norm": 0.3449431359767914,
      "learning_rate": 8.321537036224822e-06,
      "loss": 0.3855,
      "step": 1218
    },
    {
      "epoch": 1.0266704098820887,
      "grad_norm": 0.40997442603111267,
      "learning_rate": 8.317870937614115e-06,
      "loss": 0.4377,
      "step": 1219
    },
    {
      "epoch": 1.0275126333520495,
      "grad_norm": 0.35637742280960083,
      "learning_rate": 8.314201649124595e-06,
      "loss": 0.4239,
      "step": 1220
    },
    {
      "epoch": 1.02835485682201,
      "grad_norm": 0.3721654713153839,
      "learning_rate": 8.310529174284004e-06,
      "loss": 0.4035,
      "step": 1221
    },
    {
      "epoch": 1.0291970802919708,
      "grad_norm": 0.3627740740776062,
      "learning_rate": 8.30685351662314e-06,
      "loss": 0.4151,
      "step": 1222
    },
    {
      "epoch": 1.0300393037619315,
      "grad_norm": 0.4615969657897949,
      "learning_rate": 8.30317467967587e-06,
      "loss": 0.4384,
      "step": 1223
    },
    {
      "epoch": 1.0308815272318923,
      "grad_norm": 0.40656593441963196,
      "learning_rate": 8.299492666979114e-06,
      "loss": 0.4765,
      "step": 1224
    },
    {
      "epoch": 1.0317237507018528,
      "grad_norm": 0.35116055607795715,
      "learning_rate": 8.295807482072842e-06,
      "loss": 0.4239,
      "step": 1225
    },
    {
      "epoch": 1.0325659741718136,
      "grad_norm": 0.41410645842552185,
      "learning_rate": 8.292119128500082e-06,
      "loss": 0.4355,
      "step": 1226
    },
    {
      "epoch": 1.0334081976417744,
      "grad_norm": 0.37771251797676086,
      "learning_rate": 8.288427609806899e-06,
      "loss": 0.4256,
      "step": 1227
    },
    {
      "epoch": 1.034250421111735,
      "grad_norm": 0.4420938193798065,
      "learning_rate": 8.28473292954241e-06,
      "loss": 0.4439,
      "step": 1228
    },
    {
      "epoch": 1.0350926445816957,
      "grad_norm": 0.3761724829673767,
      "learning_rate": 8.281035091258762e-06,
      "loss": 0.4361,
      "step": 1229
    },
    {
      "epoch": 1.0359348680516565,
      "grad_norm": 0.4022856652736664,
      "learning_rate": 8.277334098511147e-06,
      "loss": 0.4429,
      "step": 1230
    },
    {
      "epoch": 1.036777091521617,
      "grad_norm": 0.36874333024024963,
      "learning_rate": 8.273629954857784e-06,
      "loss": 0.4096,
      "step": 1231
    },
    {
      "epoch": 1.0376193149915778,
      "grad_norm": 0.3649839758872986,
      "learning_rate": 8.269922663859926e-06,
      "loss": 0.4085,
      "step": 1232
    },
    {
      "epoch": 1.0384615384615385,
      "grad_norm": 0.4348989725112915,
      "learning_rate": 8.266212229081846e-06,
      "loss": 0.4703,
      "step": 1233
    },
    {
      "epoch": 1.039303761931499,
      "grad_norm": 0.3483625054359436,
      "learning_rate": 8.262498654090846e-06,
      "loss": 0.4128,
      "step": 1234
    },
    {
      "epoch": 1.0401459854014599,
      "grad_norm": 0.4563485085964203,
      "learning_rate": 8.258781942457244e-06,
      "loss": 0.4344,
      "step": 1235
    },
    {
      "epoch": 1.0409882088714206,
      "grad_norm": 0.3762305974960327,
      "learning_rate": 8.255062097754371e-06,
      "loss": 0.3968,
      "step": 1236
    },
    {
      "epoch": 1.0418304323413812,
      "grad_norm": 0.45995399355888367,
      "learning_rate": 8.251339123558573e-06,
      "loss": 0.4349,
      "step": 1237
    },
    {
      "epoch": 1.042672655811342,
      "grad_norm": 0.36983850598335266,
      "learning_rate": 8.247613023449209e-06,
      "loss": 0.4061,
      "step": 1238
    },
    {
      "epoch": 1.0435148792813027,
      "grad_norm": 0.407674103975296,
      "learning_rate": 8.243883801008632e-06,
      "loss": 0.4803,
      "step": 1239
    },
    {
      "epoch": 1.0443571027512633,
      "grad_norm": 0.34497344493865967,
      "learning_rate": 8.240151459822207e-06,
      "loss": 0.4008,
      "step": 1240
    },
    {
      "epoch": 1.045199326221224,
      "grad_norm": 0.3719845116138458,
      "learning_rate": 8.236416003478295e-06,
      "loss": 0.4425,
      "step": 1241
    },
    {
      "epoch": 1.0460415496911848,
      "grad_norm": 0.4024560749530792,
      "learning_rate": 8.232677435568252e-06,
      "loss": 0.4379,
      "step": 1242
    },
    {
      "epoch": 1.0468837731611453,
      "grad_norm": 0.33986011147499084,
      "learning_rate": 8.228935759686424e-06,
      "loss": 0.3761,
      "step": 1243
    },
    {
      "epoch": 1.047725996631106,
      "grad_norm": 0.40530988574028015,
      "learning_rate": 8.225190979430145e-06,
      "loss": 0.452,
      "step": 1244
    },
    {
      "epoch": 1.0485682201010669,
      "grad_norm": 0.3530568480491638,
      "learning_rate": 8.221443098399733e-06,
      "loss": 0.4277,
      "step": 1245
    },
    {
      "epoch": 1.0494104435710274,
      "grad_norm": 0.5181214809417725,
      "learning_rate": 8.217692120198492e-06,
      "loss": 0.4434,
      "step": 1246
    },
    {
      "epoch": 1.0502526670409882,
      "grad_norm": 0.34518226981163025,
      "learning_rate": 8.213938048432697e-06,
      "loss": 0.431,
      "step": 1247
    },
    {
      "epoch": 1.051094890510949,
      "grad_norm": 0.3970169126987457,
      "learning_rate": 8.210180886711603e-06,
      "loss": 0.4473,
      "step": 1248
    },
    {
      "epoch": 1.0519371139809095,
      "grad_norm": 0.40059694647789,
      "learning_rate": 8.206420638647433e-06,
      "loss": 0.4275,
      "step": 1249
    },
    {
      "epoch": 1.0527793374508703,
      "grad_norm": 0.3753350079059601,
      "learning_rate": 8.202657307855376e-06,
      "loss": 0.3924,
      "step": 1250
    },
    {
      "epoch": 1.053621560920831,
      "grad_norm": 0.40452030301094055,
      "learning_rate": 8.198890897953586e-06,
      "loss": 0.4318,
      "step": 1251
    },
    {
      "epoch": 1.0544637843907916,
      "grad_norm": 0.4010884463787079,
      "learning_rate": 8.19512141256318e-06,
      "loss": 0.4657,
      "step": 1252
    },
    {
      "epoch": 1.0553060078607523,
      "grad_norm": 0.3719276487827301,
      "learning_rate": 8.191348855308229e-06,
      "loss": 0.4136,
      "step": 1253
    },
    {
      "epoch": 1.0561482313307131,
      "grad_norm": 0.3698195517063141,
      "learning_rate": 8.187573229815757e-06,
      "loss": 0.4013,
      "step": 1254
    },
    {
      "epoch": 1.0569904548006739,
      "grad_norm": 0.4454539120197296,
      "learning_rate": 8.18379453971574e-06,
      "loss": 0.4708,
      "step": 1255
    },
    {
      "epoch": 1.0578326782706344,
      "grad_norm": 0.3695486783981323,
      "learning_rate": 8.180012788641097e-06,
      "loss": 0.4086,
      "step": 1256
    },
    {
      "epoch": 1.0586749017405952,
      "grad_norm": 0.4094073474407196,
      "learning_rate": 8.176227980227693e-06,
      "loss": 0.4321,
      "step": 1257
    },
    {
      "epoch": 1.059517125210556,
      "grad_norm": 0.4454232156276703,
      "learning_rate": 8.172440118114332e-06,
      "loss": 0.4681,
      "step": 1258
    },
    {
      "epoch": 1.0603593486805165,
      "grad_norm": 0.37586653232574463,
      "learning_rate": 8.168649205942753e-06,
      "loss": 0.404,
      "step": 1259
    },
    {
      "epoch": 1.0612015721504773,
      "grad_norm": 0.3777793049812317,
      "learning_rate": 8.164855247357628e-06,
      "loss": 0.4325,
      "step": 1260
    },
    {
      "epoch": 1.062043795620438,
      "grad_norm": 0.41256600618362427,
      "learning_rate": 8.161058246006558e-06,
      "loss": 0.4215,
      "step": 1261
    },
    {
      "epoch": 1.0628860190903986,
      "grad_norm": 0.38000229001045227,
      "learning_rate": 8.157258205540069e-06,
      "loss": 0.412,
      "step": 1262
    },
    {
      "epoch": 1.0637282425603594,
      "grad_norm": 0.39581769704818726,
      "learning_rate": 8.153455129611605e-06,
      "loss": 0.4304,
      "step": 1263
    },
    {
      "epoch": 1.0645704660303201,
      "grad_norm": 0.42407965660095215,
      "learning_rate": 8.14964902187754e-06,
      "loss": 0.4313,
      "step": 1264
    },
    {
      "epoch": 1.0654126895002807,
      "grad_norm": 0.40285012125968933,
      "learning_rate": 8.145839885997146e-06,
      "loss": 0.3922,
      "step": 1265
    },
    {
      "epoch": 1.0662549129702414,
      "grad_norm": 0.4410504996776581,
      "learning_rate": 8.142027725632622e-06,
      "loss": 0.4248,
      "step": 1266
    },
    {
      "epoch": 1.0670971364402022,
      "grad_norm": 0.379028856754303,
      "learning_rate": 8.138212544449067e-06,
      "loss": 0.4453,
      "step": 1267
    },
    {
      "epoch": 1.0679393599101628,
      "grad_norm": 0.3946175277233124,
      "learning_rate": 8.134394346114486e-06,
      "loss": 0.4263,
      "step": 1268
    },
    {
      "epoch": 1.0687815833801235,
      "grad_norm": 0.421234667301178,
      "learning_rate": 8.130573134299782e-06,
      "loss": 0.4308,
      "step": 1269
    },
    {
      "epoch": 1.0696238068500843,
      "grad_norm": 0.4083137512207031,
      "learning_rate": 8.126748912678757e-06,
      "loss": 0.4647,
      "step": 1270
    },
    {
      "epoch": 1.0704660303200448,
      "grad_norm": 0.3470613360404968,
      "learning_rate": 8.122921684928111e-06,
      "loss": 0.371,
      "step": 1271
    },
    {
      "epoch": 1.0713082537900056,
      "grad_norm": 0.36055633425712585,
      "learning_rate": 8.119091454727427e-06,
      "loss": 0.4282,
      "step": 1272
    },
    {
      "epoch": 1.0721504772599664,
      "grad_norm": 0.4025020897388458,
      "learning_rate": 8.11525822575918e-06,
      "loss": 0.4421,
      "step": 1273
    },
    {
      "epoch": 1.072992700729927,
      "grad_norm": 0.36994054913520813,
      "learning_rate": 8.111422001708725e-06,
      "loss": 0.4092,
      "step": 1274
    },
    {
      "epoch": 1.0738349241998877,
      "grad_norm": 0.36701568961143494,
      "learning_rate": 8.107582786264299e-06,
      "loss": 0.4206,
      "step": 1275
    },
    {
      "epoch": 1.0746771476698485,
      "grad_norm": 0.4554266631603241,
      "learning_rate": 8.10374058311701e-06,
      "loss": 0.4541,
      "step": 1276
    },
    {
      "epoch": 1.075519371139809,
      "grad_norm": 0.3917986750602722,
      "learning_rate": 8.099895395960847e-06,
      "loss": 0.4285,
      "step": 1277
    },
    {
      "epoch": 1.0763615946097698,
      "grad_norm": 0.43072110414505005,
      "learning_rate": 8.09604722849266e-06,
      "loss": 0.4239,
      "step": 1278
    },
    {
      "epoch": 1.0772038180797305,
      "grad_norm": 0.39785557985305786,
      "learning_rate": 8.092196084412167e-06,
      "loss": 0.4409,
      "step": 1279
    },
    {
      "epoch": 1.078046041549691,
      "grad_norm": 0.40647009015083313,
      "learning_rate": 8.08834196742195e-06,
      "loss": 0.415,
      "step": 1280
    },
    {
      "epoch": 1.0788882650196518,
      "grad_norm": 0.4710695147514343,
      "learning_rate": 8.084484881227449e-06,
      "loss": 0.4115,
      "step": 1281
    },
    {
      "epoch": 1.0797304884896126,
      "grad_norm": 0.3842746615409851,
      "learning_rate": 8.080624829536949e-06,
      "loss": 0.4371,
      "step": 1282
    },
    {
      "epoch": 1.0805727119595732,
      "grad_norm": 0.4094592332839966,
      "learning_rate": 8.076761816061603e-06,
      "loss": 0.3902,
      "step": 1283
    },
    {
      "epoch": 1.081414935429534,
      "grad_norm": 0.4105948507785797,
      "learning_rate": 8.072895844515398e-06,
      "loss": 0.4356,
      "step": 1284
    },
    {
      "epoch": 1.0822571588994947,
      "grad_norm": 0.3876434862613678,
      "learning_rate": 8.069026918615173e-06,
      "loss": 0.4233,
      "step": 1285
    },
    {
      "epoch": 1.0830993823694555,
      "grad_norm": 0.407326340675354,
      "learning_rate": 8.065155042080599e-06,
      "loss": 0.4253,
      "step": 1286
    },
    {
      "epoch": 1.083941605839416,
      "grad_norm": 0.3539896011352539,
      "learning_rate": 8.061280218634192e-06,
      "loss": 0.41,
      "step": 1287
    },
    {
      "epoch": 1.0847838293093768,
      "grad_norm": 0.5342922806739807,
      "learning_rate": 8.057402452001298e-06,
      "loss": 0.4504,
      "step": 1288
    },
    {
      "epoch": 1.0856260527793375,
      "grad_norm": 0.3579134941101074,
      "learning_rate": 8.05352174591009e-06,
      "loss": 0.4353,
      "step": 1289
    },
    {
      "epoch": 1.086468276249298,
      "grad_norm": 0.510752260684967,
      "learning_rate": 8.049638104091575e-06,
      "loss": 0.4288,
      "step": 1290
    },
    {
      "epoch": 1.0873104997192589,
      "grad_norm": 0.5128939747810364,
      "learning_rate": 8.04575153027957e-06,
      "loss": 0.4579,
      "step": 1291
    },
    {
      "epoch": 1.0881527231892196,
      "grad_norm": 0.35150885581970215,
      "learning_rate": 8.041862028210725e-06,
      "loss": 0.3968,
      "step": 1292
    },
    {
      "epoch": 1.0889949466591802,
      "grad_norm": 0.4233764708042145,
      "learning_rate": 8.037969601624495e-06,
      "loss": 0.4322,
      "step": 1293
    },
    {
      "epoch": 1.089837170129141,
      "grad_norm": 0.4583946764469147,
      "learning_rate": 8.034074254263152e-06,
      "loss": 0.3938,
      "step": 1294
    },
    {
      "epoch": 1.0906793935991017,
      "grad_norm": 0.45826274156570435,
      "learning_rate": 8.030175989871769e-06,
      "loss": 0.4367,
      "step": 1295
    },
    {
      "epoch": 1.0915216170690623,
      "grad_norm": 0.39332109689712524,
      "learning_rate": 8.026274812198235e-06,
      "loss": 0.4157,
      "step": 1296
    },
    {
      "epoch": 1.092363840539023,
      "grad_norm": 0.4982011914253235,
      "learning_rate": 8.022370724993229e-06,
      "loss": 0.4159,
      "step": 1297
    },
    {
      "epoch": 1.0932060640089838,
      "grad_norm": 0.3935185372829437,
      "learning_rate": 8.018463732010235e-06,
      "loss": 0.4826,
      "step": 1298
    },
    {
      "epoch": 1.0940482874789443,
      "grad_norm": 0.31662988662719727,
      "learning_rate": 8.014553837005527e-06,
      "loss": 0.3602,
      "step": 1299
    },
    {
      "epoch": 1.094890510948905,
      "grad_norm": 0.4854174256324768,
      "learning_rate": 8.010641043738167e-06,
      "loss": 0.4365,
      "step": 1300
    },
    {
      "epoch": 1.0957327344188659,
      "grad_norm": 0.42826494574546814,
      "learning_rate": 8.006725355970008e-06,
      "loss": 0.4568,
      "step": 1301
    },
    {
      "epoch": 1.0965749578888264,
      "grad_norm": 0.3695499300956726,
      "learning_rate": 8.002806777465685e-06,
      "loss": 0.4184,
      "step": 1302
    },
    {
      "epoch": 1.0974171813587872,
      "grad_norm": 0.40694260597229004,
      "learning_rate": 7.99888531199261e-06,
      "loss": 0.3898,
      "step": 1303
    },
    {
      "epoch": 1.098259404828748,
      "grad_norm": 0.4248018264770508,
      "learning_rate": 7.99496096332097e-06,
      "loss": 0.4316,
      "step": 1304
    },
    {
      "epoch": 1.0991016282987085,
      "grad_norm": 0.36612677574157715,
      "learning_rate": 7.99103373522373e-06,
      "loss": 0.4395,
      "step": 1305
    },
    {
      "epoch": 1.0999438517686693,
      "grad_norm": 0.3682686388492584,
      "learning_rate": 7.987103631476615e-06,
      "loss": 0.399,
      "step": 1306
    },
    {
      "epoch": 1.10078607523863,
      "grad_norm": 0.4918938875198364,
      "learning_rate": 7.98317065585812e-06,
      "loss": 0.4379,
      "step": 1307
    },
    {
      "epoch": 1.1016282987085906,
      "grad_norm": 0.32616811990737915,
      "learning_rate": 7.9792348121495e-06,
      "loss": 0.4065,
      "step": 1308
    },
    {
      "epoch": 1.1024705221785513,
      "grad_norm": 0.4267609417438507,
      "learning_rate": 7.975296104134768e-06,
      "loss": 0.4858,
      "step": 1309
    },
    {
      "epoch": 1.1033127456485121,
      "grad_norm": 0.38445374369621277,
      "learning_rate": 7.97135453560069e-06,
      "loss": 0.4318,
      "step": 1310
    },
    {
      "epoch": 1.1041549691184729,
      "grad_norm": 0.37487271428108215,
      "learning_rate": 7.967410110336782e-06,
      "loss": 0.3934,
      "step": 1311
    },
    {
      "epoch": 1.1049971925884334,
      "grad_norm": 0.40322422981262207,
      "learning_rate": 7.963462832135307e-06,
      "loss": 0.4725,
      "step": 1312
    },
    {
      "epoch": 1.1058394160583942,
      "grad_norm": 0.3529769480228424,
      "learning_rate": 7.959512704791269e-06,
      "loss": 0.3986,
      "step": 1313
    },
    {
      "epoch": 1.1066816395283547,
      "grad_norm": 0.40558871626853943,
      "learning_rate": 7.955559732102414e-06,
      "loss": 0.4764,
      "step": 1314
    },
    {
      "epoch": 1.1075238629983155,
      "grad_norm": 0.37975025177001953,
      "learning_rate": 7.951603917869223e-06,
      "loss": 0.4198,
      "step": 1315
    },
    {
      "epoch": 1.1083660864682763,
      "grad_norm": 0.3445699214935303,
      "learning_rate": 7.94764526589491e-06,
      "loss": 0.409,
      "step": 1316
    },
    {
      "epoch": 1.109208309938237,
      "grad_norm": 0.37430766224861145,
      "learning_rate": 7.943683779985412e-06,
      "loss": 0.3965,
      "step": 1317
    },
    {
      "epoch": 1.1100505334081976,
      "grad_norm": 0.38708019256591797,
      "learning_rate": 7.939719463949398e-06,
      "loss": 0.4014,
      "step": 1318
    },
    {
      "epoch": 1.1108927568781584,
      "grad_norm": 0.3656475841999054,
      "learning_rate": 7.93575232159825e-06,
      "loss": 0.4155,
      "step": 1319
    },
    {
      "epoch": 1.1117349803481191,
      "grad_norm": 0.37086746096611023,
      "learning_rate": 7.931782356746076e-06,
      "loss": 0.4272,
      "step": 1320
    },
    {
      "epoch": 1.1125772038180797,
      "grad_norm": 0.4064827859401703,
      "learning_rate": 7.927809573209691e-06,
      "loss": 0.4208,
      "step": 1321
    },
    {
      "epoch": 1.1134194272880404,
      "grad_norm": 0.36401882767677307,
      "learning_rate": 7.923833974808622e-06,
      "loss": 0.3967,
      "step": 1322
    },
    {
      "epoch": 1.1142616507580012,
      "grad_norm": 0.39197808504104614,
      "learning_rate": 7.919855565365102e-06,
      "loss": 0.4603,
      "step": 1323
    },
    {
      "epoch": 1.1151038742279618,
      "grad_norm": 0.38889172673225403,
      "learning_rate": 7.91587434870407e-06,
      "loss": 0.4306,
      "step": 1324
    },
    {
      "epoch": 1.1159460976979225,
      "grad_norm": 0.3529651463031769,
      "learning_rate": 7.911890328653156e-06,
      "loss": 0.4095,
      "step": 1325
    },
    {
      "epoch": 1.1167883211678833,
      "grad_norm": 0.3491688668727875,
      "learning_rate": 7.907903509042696e-06,
      "loss": 0.4479,
      "step": 1326
    },
    {
      "epoch": 1.1176305446378438,
      "grad_norm": 0.33706966042518616,
      "learning_rate": 7.903913893705706e-06,
      "loss": 0.4084,
      "step": 1327
    },
    {
      "epoch": 1.1184727681078046,
      "grad_norm": 0.4077783226966858,
      "learning_rate": 7.899921486477899e-06,
      "loss": 0.4489,
      "step": 1328
    },
    {
      "epoch": 1.1193149915777654,
      "grad_norm": 0.3613165020942688,
      "learning_rate": 7.895926291197667e-06,
      "loss": 0.4431,
      "step": 1329
    },
    {
      "epoch": 1.120157215047726,
      "grad_norm": 0.346597284078598,
      "learning_rate": 7.891928311706088e-06,
      "loss": 0.4095,
      "step": 1330
    },
    {
      "epoch": 1.1209994385176867,
      "grad_norm": 0.33216577768325806,
      "learning_rate": 7.887927551846908e-06,
      "loss": 0.4205,
      "step": 1331
    },
    {
      "epoch": 1.1218416619876475,
      "grad_norm": 0.3293364644050598,
      "learning_rate": 7.883924015466554e-06,
      "loss": 0.4058,
      "step": 1332
    },
    {
      "epoch": 1.122683885457608,
      "grad_norm": 0.3669161796569824,
      "learning_rate": 7.87991770641412e-06,
      "loss": 0.4526,
      "step": 1333
    },
    {
      "epoch": 1.1235261089275688,
      "grad_norm": 0.3779022693634033,
      "learning_rate": 7.875908628541363e-06,
      "loss": 0.4499,
      "step": 1334
    },
    {
      "epoch": 1.1243683323975295,
      "grad_norm": 0.37117263674736023,
      "learning_rate": 7.871896785702707e-06,
      "loss": 0.43,
      "step": 1335
    },
    {
      "epoch": 1.12521055586749,
      "grad_norm": 0.36794567108154297,
      "learning_rate": 7.86788218175523e-06,
      "loss": 0.4028,
      "step": 1336
    },
    {
      "epoch": 1.1260527793374508,
      "grad_norm": 0.3738362491130829,
      "learning_rate": 7.863864820558669e-06,
      "loss": 0.3937,
      "step": 1337
    },
    {
      "epoch": 1.1268950028074116,
      "grad_norm": 0.3773650825023651,
      "learning_rate": 7.859844705975405e-06,
      "loss": 0.4727,
      "step": 1338
    },
    {
      "epoch": 1.1277372262773722,
      "grad_norm": 0.3684482276439667,
      "learning_rate": 7.855821841870472e-06,
      "loss": 0.4178,
      "step": 1339
    },
    {
      "epoch": 1.128579449747333,
      "grad_norm": 0.3587445616722107,
      "learning_rate": 7.851796232111546e-06,
      "loss": 0.4343,
      "step": 1340
    },
    {
      "epoch": 1.1294216732172937,
      "grad_norm": 0.3918401300907135,
      "learning_rate": 7.847767880568944e-06,
      "loss": 0.4254,
      "step": 1341
    },
    {
      "epoch": 1.1302638966872545,
      "grad_norm": 0.3758191168308258,
      "learning_rate": 7.843736791115614e-06,
      "loss": 0.4224,
      "step": 1342
    },
    {
      "epoch": 1.131106120157215,
      "grad_norm": 0.387981116771698,
      "learning_rate": 7.839702967627145e-06,
      "loss": 0.4406,
      "step": 1343
    },
    {
      "epoch": 1.1319483436271758,
      "grad_norm": 0.3716709315776825,
      "learning_rate": 7.835666413981744e-06,
      "loss": 0.3869,
      "step": 1344
    },
    {
      "epoch": 1.1327905670971363,
      "grad_norm": 0.38252949714660645,
      "learning_rate": 7.831627134060249e-06,
      "loss": 0.4225,
      "step": 1345
    },
    {
      "epoch": 1.133632790567097,
      "grad_norm": 0.44802263379096985,
      "learning_rate": 7.827585131746122e-06,
      "loss": 0.4597,
      "step": 1346
    },
    {
      "epoch": 1.1344750140370579,
      "grad_norm": 0.3646650016307831,
      "learning_rate": 7.823540410925434e-06,
      "loss": 0.4523,
      "step": 1347
    },
    {
      "epoch": 1.1353172375070186,
      "grad_norm": 0.3642771542072296,
      "learning_rate": 7.81949297548688e-06,
      "loss": 0.4304,
      "step": 1348
    },
    {
      "epoch": 1.1361594609769792,
      "grad_norm": 0.426010400056839,
      "learning_rate": 7.815442829321754e-06,
      "loss": 0.4169,
      "step": 1349
    },
    {
      "epoch": 1.13700168444694,
      "grad_norm": 0.4515875279903412,
      "learning_rate": 7.811389976323963e-06,
      "loss": 0.4763,
      "step": 1350
    },
    {
      "epoch": 1.1378439079169007,
      "grad_norm": 0.37319886684417725,
      "learning_rate": 7.807334420390014e-06,
      "loss": 0.4247,
      "step": 1351
    },
    {
      "epoch": 1.1386861313868613,
      "grad_norm": 0.378501296043396,
      "learning_rate": 7.803276165419015e-06,
      "loss": 0.4333,
      "step": 1352
    },
    {
      "epoch": 1.139528354856822,
      "grad_norm": 0.4014640152454376,
      "learning_rate": 7.799215215312667e-06,
      "loss": 0.4574,
      "step": 1353
    },
    {
      "epoch": 1.1403705783267828,
      "grad_norm": 0.361619770526886,
      "learning_rate": 7.795151573975262e-06,
      "loss": 0.4045,
      "step": 1354
    },
    {
      "epoch": 1.1412128017967433,
      "grad_norm": 0.4067947566509247,
      "learning_rate": 7.79108524531368e-06,
      "loss": 0.4026,
      "step": 1355
    },
    {
      "epoch": 1.142055025266704,
      "grad_norm": 0.3749244213104248,
      "learning_rate": 7.787016233237387e-06,
      "loss": 0.4062,
      "step": 1356
    },
    {
      "epoch": 1.1428972487366649,
      "grad_norm": 0.3807445764541626,
      "learning_rate": 7.782944541658423e-06,
      "loss": 0.4153,
      "step": 1357
    },
    {
      "epoch": 1.1437394722066254,
      "grad_norm": 0.3841903507709503,
      "learning_rate": 7.778870174491408e-06,
      "loss": 0.4282,
      "step": 1358
    },
    {
      "epoch": 1.1445816956765862,
      "grad_norm": 0.34361204504966736,
      "learning_rate": 7.774793135653537e-06,
      "loss": 0.4184,
      "step": 1359
    },
    {
      "epoch": 1.145423919146547,
      "grad_norm": 0.3764127492904663,
      "learning_rate": 7.770713429064567e-06,
      "loss": 0.4107,
      "step": 1360
    },
    {
      "epoch": 1.1462661426165075,
      "grad_norm": 0.39367419481277466,
      "learning_rate": 7.766631058646826e-06,
      "loss": 0.4318,
      "step": 1361
    },
    {
      "epoch": 1.1471083660864683,
      "grad_norm": 0.39291778206825256,
      "learning_rate": 7.7625460283252e-06,
      "loss": 0.4433,
      "step": 1362
    },
    {
      "epoch": 1.147950589556429,
      "grad_norm": 0.3617228865623474,
      "learning_rate": 7.75845834202713e-06,
      "loss": 0.4347,
      "step": 1363
    },
    {
      "epoch": 1.1487928130263896,
      "grad_norm": 0.34056851267814636,
      "learning_rate": 7.754368003682617e-06,
      "loss": 0.373,
      "step": 1364
    },
    {
      "epoch": 1.1496350364963503,
      "grad_norm": 0.35771188139915466,
      "learning_rate": 7.750275017224208e-06,
      "loss": 0.4585,
      "step": 1365
    },
    {
      "epoch": 1.1504772599663111,
      "grad_norm": 0.3744412064552307,
      "learning_rate": 7.746179386586994e-06,
      "loss": 0.4058,
      "step": 1366
    },
    {
      "epoch": 1.1513194834362717,
      "grad_norm": 0.3890160322189331,
      "learning_rate": 7.74208111570861e-06,
      "loss": 0.4502,
      "step": 1367
    },
    {
      "epoch": 1.1521617069062324,
      "grad_norm": 0.4113486707210541,
      "learning_rate": 7.737980208529232e-06,
      "loss": 0.4372,
      "step": 1368
    },
    {
      "epoch": 1.1530039303761932,
      "grad_norm": 0.36418694257736206,
      "learning_rate": 7.733876668991565e-06,
      "loss": 0.4242,
      "step": 1369
    },
    {
      "epoch": 1.1538461538461537,
      "grad_norm": 0.32672932744026184,
      "learning_rate": 7.72977050104085e-06,
      "loss": 0.4203,
      "step": 1370
    },
    {
      "epoch": 1.1546883773161145,
      "grad_norm": 0.3589940071105957,
      "learning_rate": 7.725661708624855e-06,
      "loss": 0.4268,
      "step": 1371
    },
    {
      "epoch": 1.1555306007860753,
      "grad_norm": 0.36602362990379333,
      "learning_rate": 7.721550295693865e-06,
      "loss": 0.4489,
      "step": 1372
    },
    {
      "epoch": 1.156372824256036,
      "grad_norm": 0.38566896319389343,
      "learning_rate": 7.71743626620069e-06,
      "loss": 0.4428,
      "step": 1373
    },
    {
      "epoch": 1.1572150477259966,
      "grad_norm": 0.3983108103275299,
      "learning_rate": 7.713319624100657e-06,
      "loss": 0.3863,
      "step": 1374
    },
    {
      "epoch": 1.1580572711959574,
      "grad_norm": 0.4003891050815582,
      "learning_rate": 7.7092003733516e-06,
      "loss": 0.3979,
      "step": 1375
    },
    {
      "epoch": 1.158899494665918,
      "grad_norm": 0.44121235609054565,
      "learning_rate": 7.705078517913862e-06,
      "loss": 0.4611,
      "step": 1376
    },
    {
      "epoch": 1.1597417181358787,
      "grad_norm": 0.40486371517181396,
      "learning_rate": 7.700954061750295e-06,
      "loss": 0.4349,
      "step": 1377
    },
    {
      "epoch": 1.1605839416058394,
      "grad_norm": 0.38473987579345703,
      "learning_rate": 7.696827008826242e-06,
      "loss": 0.451,
      "step": 1378
    },
    {
      "epoch": 1.1614261650758002,
      "grad_norm": 0.3996846079826355,
      "learning_rate": 7.692697363109553e-06,
      "loss": 0.4609,
      "step": 1379
    },
    {
      "epoch": 1.1622683885457608,
      "grad_norm": 0.3721959590911865,
      "learning_rate": 7.688565128570564e-06,
      "loss": 0.4175,
      "step": 1380
    },
    {
      "epoch": 1.1631106120157215,
      "grad_norm": 0.35757309198379517,
      "learning_rate": 7.684430309182106e-06,
      "loss": 0.3973,
      "step": 1381
    },
    {
      "epoch": 1.1639528354856823,
      "grad_norm": 0.4051101803779602,
      "learning_rate": 7.680292908919485e-06,
      "loss": 0.4598,
      "step": 1382
    },
    {
      "epoch": 1.1647950589556428,
      "grad_norm": 0.36632901430130005,
      "learning_rate": 7.676152931760496e-06,
      "loss": 0.411,
      "step": 1383
    },
    {
      "epoch": 1.1656372824256036,
      "grad_norm": 0.38474225997924805,
      "learning_rate": 7.672010381685416e-06,
      "loss": 0.4433,
      "step": 1384
    },
    {
      "epoch": 1.1664795058955644,
      "grad_norm": 0.38605427742004395,
      "learning_rate": 7.667865262676981e-06,
      "loss": 0.3975,
      "step": 1385
    },
    {
      "epoch": 1.167321729365525,
      "grad_norm": 0.3581254780292511,
      "learning_rate": 7.663717578720412e-06,
      "loss": 0.4431,
      "step": 1386
    },
    {
      "epoch": 1.1681639528354857,
      "grad_norm": 0.33589276671409607,
      "learning_rate": 7.659567333803386e-06,
      "loss": 0.3841,
      "step": 1387
    },
    {
      "epoch": 1.1690061763054465,
      "grad_norm": 0.3760092258453369,
      "learning_rate": 7.655414531916048e-06,
      "loss": 0.4294,
      "step": 1388
    },
    {
      "epoch": 1.169848399775407,
      "grad_norm": 0.3705093264579773,
      "learning_rate": 7.651259177050996e-06,
      "loss": 0.3869,
      "step": 1389
    },
    {
      "epoch": 1.1706906232453678,
      "grad_norm": 0.39357978105545044,
      "learning_rate": 7.647101273203289e-06,
      "loss": 0.4116,
      "step": 1390
    },
    {
      "epoch": 1.1715328467153285,
      "grad_norm": 0.36840763688087463,
      "learning_rate": 7.642940824370429e-06,
      "loss": 0.4015,
      "step": 1391
    },
    {
      "epoch": 1.172375070185289,
      "grad_norm": 0.37624263763427734,
      "learning_rate": 7.638777834552372e-06,
      "loss": 0.4504,
      "step": 1392
    },
    {
      "epoch": 1.1732172936552498,
      "grad_norm": 0.3869098424911499,
      "learning_rate": 7.634612307751513e-06,
      "loss": 0.3934,
      "step": 1393
    },
    {
      "epoch": 1.1740595171252106,
      "grad_norm": 0.4052045941352844,
      "learning_rate": 7.630444247972688e-06,
      "loss": 0.4512,
      "step": 1394
    },
    {
      "epoch": 1.1749017405951712,
      "grad_norm": 0.3574765920639038,
      "learning_rate": 7.626273659223166e-06,
      "loss": 0.4283,
      "step": 1395
    },
    {
      "epoch": 1.175743964065132,
      "grad_norm": 0.4295235276222229,
      "learning_rate": 7.622100545512648e-06,
      "loss": 0.452,
      "step": 1396
    },
    {
      "epoch": 1.1765861875350927,
      "grad_norm": 0.3482303023338318,
      "learning_rate": 7.617924910853266e-06,
      "loss": 0.4437,
      "step": 1397
    },
    {
      "epoch": 1.1774284110050532,
      "grad_norm": 0.3524567484855652,
      "learning_rate": 7.61374675925957e-06,
      "loss": 0.4161,
      "step": 1398
    },
    {
      "epoch": 1.178270634475014,
      "grad_norm": 0.4181055724620819,
      "learning_rate": 7.609566094748535e-06,
      "loss": 0.4265,
      "step": 1399
    },
    {
      "epoch": 1.1791128579449748,
      "grad_norm": 0.39104053378105164,
      "learning_rate": 7.605382921339548e-06,
      "loss": 0.4261,
      "step": 1400
    },
    {
      "epoch": 1.1799550814149353,
      "grad_norm": 0.39233243465423584,
      "learning_rate": 7.601197243054411e-06,
      "loss": 0.4465,
      "step": 1401
    },
    {
      "epoch": 1.180797304884896,
      "grad_norm": 0.3726870119571686,
      "learning_rate": 7.597009063917333e-06,
      "loss": 0.3936,
      "step": 1402
    },
    {
      "epoch": 1.1816395283548569,
      "grad_norm": 0.3764479160308838,
      "learning_rate": 7.5928183879549274e-06,
      "loss": 0.4518,
      "step": 1403
    },
    {
      "epoch": 1.1824817518248176,
      "grad_norm": 0.42297065258026123,
      "learning_rate": 7.588625219196208e-06,
      "loss": 0.4579,
      "step": 1404
    },
    {
      "epoch": 1.1833239752947782,
      "grad_norm": 0.3672788441181183,
      "learning_rate": 7.584429561672586e-06,
      "loss": 0.422,
      "step": 1405
    },
    {
      "epoch": 1.184166198764739,
      "grad_norm": 0.36268627643585205,
      "learning_rate": 7.580231419417863e-06,
      "loss": 0.4523,
      "step": 1406
    },
    {
      "epoch": 1.1850084222346995,
      "grad_norm": 0.38749584555625916,
      "learning_rate": 7.576030796468233e-06,
      "loss": 0.4274,
      "step": 1407
    },
    {
      "epoch": 1.1858506457046603,
      "grad_norm": 0.4002215564250946,
      "learning_rate": 7.571827696862274e-06,
      "loss": 0.3926,
      "step": 1408
    },
    {
      "epoch": 1.186692869174621,
      "grad_norm": 0.36324864625930786,
      "learning_rate": 7.567622124640942e-06,
      "loss": 0.4263,
      "step": 1409
    },
    {
      "epoch": 1.1875350926445818,
      "grad_norm": 0.437010258436203,
      "learning_rate": 7.563414083847573e-06,
      "loss": 0.4864,
      "step": 1410
    },
    {
      "epoch": 1.1883773161145423,
      "grad_norm": 0.31606045365333557,
      "learning_rate": 7.55920357852788e-06,
      "loss": 0.3919,
      "step": 1411
    },
    {
      "epoch": 1.189219539584503,
      "grad_norm": 0.38336682319641113,
      "learning_rate": 7.554990612729936e-06,
      "loss": 0.4331,
      "step": 1412
    },
    {
      "epoch": 1.1900617630544639,
      "grad_norm": 0.3902300298213959,
      "learning_rate": 7.5507751905041885e-06,
      "loss": 0.4182,
      "step": 1413
    },
    {
      "epoch": 1.1909039865244244,
      "grad_norm": 0.4050475060939789,
      "learning_rate": 7.5465573159034396e-06,
      "loss": 0.4353,
      "step": 1414
    },
    {
      "epoch": 1.1917462099943852,
      "grad_norm": 0.41302910447120667,
      "learning_rate": 7.542336992982857e-06,
      "loss": 0.4171,
      "step": 1415
    },
    {
      "epoch": 1.192588433464346,
      "grad_norm": 0.38498425483703613,
      "learning_rate": 7.538114225799955e-06,
      "loss": 0.4429,
      "step": 1416
    },
    {
      "epoch": 1.1934306569343065,
      "grad_norm": 0.459703266620636,
      "learning_rate": 7.533889018414602e-06,
      "loss": 0.4551,
      "step": 1417
    },
    {
      "epoch": 1.1942728804042673,
      "grad_norm": 0.37605124711990356,
      "learning_rate": 7.529661374889011e-06,
      "loss": 0.3814,
      "step": 1418
    },
    {
      "epoch": 1.195115103874228,
      "grad_norm": 0.3843555748462677,
      "learning_rate": 7.525431299287737e-06,
      "loss": 0.4318,
      "step": 1419
    },
    {
      "epoch": 1.1959573273441886,
      "grad_norm": 0.3674013018608093,
      "learning_rate": 7.5211987956776755e-06,
      "loss": 0.4263,
      "step": 1420
    },
    {
      "epoch": 1.1967995508141493,
      "grad_norm": 0.39034512639045715,
      "learning_rate": 7.516963868128054e-06,
      "loss": 0.4517,
      "step": 1421
    },
    {
      "epoch": 1.1976417742841101,
      "grad_norm": 0.38654136657714844,
      "learning_rate": 7.512726520710429e-06,
      "loss": 0.4206,
      "step": 1422
    },
    {
      "epoch": 1.1984839977540707,
      "grad_norm": 0.4439588785171509,
      "learning_rate": 7.508486757498687e-06,
      "loss": 0.4076,
      "step": 1423
    },
    {
      "epoch": 1.1993262212240314,
      "grad_norm": 0.33879712224006653,
      "learning_rate": 7.5042445825690344e-06,
      "loss": 0.3935,
      "step": 1424
    },
    {
      "epoch": 1.2001684446939922,
      "grad_norm": 0.4665505886077881,
      "learning_rate": 7.500000000000001e-06,
      "loss": 0.4401,
      "step": 1425
    },
    {
      "epoch": 1.2010106681639527,
      "grad_norm": 0.3511357605457306,
      "learning_rate": 7.4957530138724245e-06,
      "loss": 0.406,
      "step": 1426
    },
    {
      "epoch": 1.2018528916339135,
      "grad_norm": 0.3784853219985962,
      "learning_rate": 7.491503628269458e-06,
      "loss": 0.437,
      "step": 1427
    },
    {
      "epoch": 1.2026951151038743,
      "grad_norm": 0.4115026891231537,
      "learning_rate": 7.4872518472765594e-06,
      "loss": 0.4792,
      "step": 1428
    },
    {
      "epoch": 1.203537338573835,
      "grad_norm": 0.36362993717193604,
      "learning_rate": 7.4829976749814935e-06,
      "loss": 0.3558,
      "step": 1429
    },
    {
      "epoch": 1.2043795620437956,
      "grad_norm": 0.35372886061668396,
      "learning_rate": 7.4787411154743175e-06,
      "loss": 0.3977,
      "step": 1430
    },
    {
      "epoch": 1.2052217855137564,
      "grad_norm": 0.42046433687210083,
      "learning_rate": 7.474482172847391e-06,
      "loss": 0.4503,
      "step": 1431
    },
    {
      "epoch": 1.206064008983717,
      "grad_norm": 0.38461464643478394,
      "learning_rate": 7.470220851195356e-06,
      "loss": 0.4319,
      "step": 1432
    },
    {
      "epoch": 1.2069062324536777,
      "grad_norm": 0.3675577938556671,
      "learning_rate": 7.46595715461515e-06,
      "loss": 0.4287,
      "step": 1433
    },
    {
      "epoch": 1.2077484559236384,
      "grad_norm": 0.3817160129547119,
      "learning_rate": 7.461691087205993e-06,
      "loss": 0.4078,
      "step": 1434
    },
    {
      "epoch": 1.2085906793935992,
      "grad_norm": 0.39037024974823,
      "learning_rate": 7.457422653069379e-06,
      "loss": 0.4478,
      "step": 1435
    },
    {
      "epoch": 1.2094329028635598,
      "grad_norm": 0.3335110545158386,
      "learning_rate": 7.45315185630908e-06,
      "loss": 0.4177,
      "step": 1436
    },
    {
      "epoch": 1.2102751263335205,
      "grad_norm": 0.3990948796272278,
      "learning_rate": 7.4488787010311425e-06,
      "loss": 0.42,
      "step": 1437
    },
    {
      "epoch": 1.211117349803481,
      "grad_norm": 0.39929258823394775,
      "learning_rate": 7.444603191343878e-06,
      "loss": 0.4011,
      "step": 1438
    },
    {
      "epoch": 1.2119595732734418,
      "grad_norm": 0.35615503787994385,
      "learning_rate": 7.440325331357858e-06,
      "loss": 0.4042,
      "step": 1439
    },
    {
      "epoch": 1.2128017967434026,
      "grad_norm": 0.38169369101524353,
      "learning_rate": 7.436045125185923e-06,
      "loss": 0.4416,
      "step": 1440
    },
    {
      "epoch": 1.2136440202133634,
      "grad_norm": 0.3634074926376343,
      "learning_rate": 7.431762576943157e-06,
      "loss": 0.4153,
      "step": 1441
    },
    {
      "epoch": 1.214486243683324,
      "grad_norm": 0.3879089653491974,
      "learning_rate": 7.427477690746906e-06,
      "loss": 0.4057,
      "step": 1442
    },
    {
      "epoch": 1.2153284671532847,
      "grad_norm": 0.38335829973220825,
      "learning_rate": 7.423190470716761e-06,
      "loss": 0.4027,
      "step": 1443
    },
    {
      "epoch": 1.2161706906232455,
      "grad_norm": 0.40403735637664795,
      "learning_rate": 7.418900920974552e-06,
      "loss": 0.436,
      "step": 1444
    },
    {
      "epoch": 1.217012914093206,
      "grad_norm": 0.38219738006591797,
      "learning_rate": 7.414609045644356e-06,
      "loss": 0.4577,
      "step": 1445
    },
    {
      "epoch": 1.2178551375631668,
      "grad_norm": 0.3973982334136963,
      "learning_rate": 7.4103148488524824e-06,
      "loss": 0.409,
      "step": 1446
    },
    {
      "epoch": 1.2186973610331275,
      "grad_norm": 0.39287805557250977,
      "learning_rate": 7.40601833472747e-06,
      "loss": 0.445,
      "step": 1447
    },
    {
      "epoch": 1.219539584503088,
      "grad_norm": 0.35651451349258423,
      "learning_rate": 7.401719507400088e-06,
      "loss": 0.3882,
      "step": 1448
    },
    {
      "epoch": 1.2203818079730488,
      "grad_norm": 0.44902580976486206,
      "learning_rate": 7.3974183710033334e-06,
      "loss": 0.4266,
      "step": 1449
    },
    {
      "epoch": 1.2212240314430096,
      "grad_norm": 0.4864282011985779,
      "learning_rate": 7.393114929672414e-06,
      "loss": 0.4648,
      "step": 1450
    },
    {
      "epoch": 1.2220662549129702,
      "grad_norm": 0.3779141902923584,
      "learning_rate": 7.388809187544764e-06,
      "loss": 0.424,
      "step": 1451
    },
    {
      "epoch": 1.222908478382931,
      "grad_norm": 0.42011797428131104,
      "learning_rate": 7.384501148760024e-06,
      "loss": 0.4223,
      "step": 1452
    },
    {
      "epoch": 1.2237507018528917,
      "grad_norm": 0.41561511158943176,
      "learning_rate": 7.38019081746004e-06,
      "loss": 0.4163,
      "step": 1453
    },
    {
      "epoch": 1.2245929253228522,
      "grad_norm": 0.39888399839401245,
      "learning_rate": 7.3758781977888684e-06,
      "loss": 0.4172,
      "step": 1454
    },
    {
      "epoch": 1.225435148792813,
      "grad_norm": 0.3473283350467682,
      "learning_rate": 7.371563293892761e-06,
      "loss": 0.4033,
      "step": 1455
    },
    {
      "epoch": 1.2262773722627738,
      "grad_norm": 0.38621172308921814,
      "learning_rate": 7.367246109920171e-06,
      "loss": 0.3909,
      "step": 1456
    },
    {
      "epoch": 1.2271195957327343,
      "grad_norm": 0.43647679686546326,
      "learning_rate": 7.362926650021736e-06,
      "loss": 0.4534,
      "step": 1457
    },
    {
      "epoch": 1.227961819202695,
      "grad_norm": 0.4088779091835022,
      "learning_rate": 7.3586049183502875e-06,
      "loss": 0.4594,
      "step": 1458
    },
    {
      "epoch": 1.2288040426726559,
      "grad_norm": 0.3764493763446808,
      "learning_rate": 7.354280919060839e-06,
      "loss": 0.4214,
      "step": 1459
    },
    {
      "epoch": 1.2296462661426166,
      "grad_norm": 0.4383613169193268,
      "learning_rate": 7.349954656310585e-06,
      "loss": 0.3797,
      "step": 1460
    },
    {
      "epoch": 1.2304884896125772,
      "grad_norm": 0.4090350270271301,
      "learning_rate": 7.345626134258897e-06,
      "loss": 0.4704,
      "step": 1461
    },
    {
      "epoch": 1.231330713082538,
      "grad_norm": 0.393676221370697,
      "learning_rate": 7.341295357067315e-06,
      "loss": 0.4766,
      "step": 1462
    },
    {
      "epoch": 1.2321729365524985,
      "grad_norm": 0.3755529522895813,
      "learning_rate": 7.336962328899553e-06,
      "loss": 0.3688,
      "step": 1463
    },
    {
      "epoch": 1.2330151600224593,
      "grad_norm": 0.4419655203819275,
      "learning_rate": 7.3326270539214826e-06,
      "loss": 0.4517,
      "step": 1464
    },
    {
      "epoch": 1.23385738349242,
      "grad_norm": 0.3655077815055847,
      "learning_rate": 7.3282895363011405e-06,
      "loss": 0.4045,
      "step": 1465
    },
    {
      "epoch": 1.2346996069623808,
      "grad_norm": 0.41435039043426514,
      "learning_rate": 7.323949780208717e-06,
      "loss": 0.4268,
      "step": 1466
    },
    {
      "epoch": 1.2355418304323413,
      "grad_norm": 0.41059809923171997,
      "learning_rate": 7.319607789816555e-06,
      "loss": 0.4517,
      "step": 1467
    },
    {
      "epoch": 1.236384053902302,
      "grad_norm": 0.3652295768260956,
      "learning_rate": 7.315263569299147e-06,
      "loss": 0.4153,
      "step": 1468
    },
    {
      "epoch": 1.2372262773722627,
      "grad_norm": 0.382811039686203,
      "learning_rate": 7.310917122833127e-06,
      "loss": 0.4838,
      "step": 1469
    },
    {
      "epoch": 1.2380685008422234,
      "grad_norm": 0.34275102615356445,
      "learning_rate": 7.306568454597269e-06,
      "loss": 0.4118,
      "step": 1470
    },
    {
      "epoch": 1.2389107243121842,
      "grad_norm": 0.40695521235466003,
      "learning_rate": 7.302217568772488e-06,
      "loss": 0.415,
      "step": 1471
    },
    {
      "epoch": 1.239752947782145,
      "grad_norm": 0.3750225603580475,
      "learning_rate": 7.297864469541826e-06,
      "loss": 0.4265,
      "step": 1472
    },
    {
      "epoch": 1.2405951712521055,
      "grad_norm": 0.3629060983657837,
      "learning_rate": 7.293509161090453e-06,
      "loss": 0.4403,
      "step": 1473
    },
    {
      "epoch": 1.2414373947220663,
      "grad_norm": 0.3366106152534485,
      "learning_rate": 7.289151647605668e-06,
      "loss": 0.3831,
      "step": 1474
    },
    {
      "epoch": 1.242279618192027,
      "grad_norm": 0.39573776721954346,
      "learning_rate": 7.284791933276883e-06,
      "loss": 0.4307,
      "step": 1475
    },
    {
      "epoch": 1.2431218416619876,
      "grad_norm": 0.35405707359313965,
      "learning_rate": 7.28043002229563e-06,
      "loss": 0.4211,
      "step": 1476
    },
    {
      "epoch": 1.2439640651319483,
      "grad_norm": 0.382311075925827,
      "learning_rate": 7.276065918855554e-06,
      "loss": 0.4253,
      "step": 1477
    },
    {
      "epoch": 1.2448062886019091,
      "grad_norm": 0.3669031262397766,
      "learning_rate": 7.271699627152406e-06,
      "loss": 0.4042,
      "step": 1478
    },
    {
      "epoch": 1.2456485120718697,
      "grad_norm": 0.33937758207321167,
      "learning_rate": 7.2673311513840395e-06,
      "loss": 0.4198,
      "step": 1479
    },
    {
      "epoch": 1.2464907355418304,
      "grad_norm": 0.41505059599876404,
      "learning_rate": 7.26296049575041e-06,
      "loss": 0.456,
      "step": 1480
    },
    {
      "epoch": 1.2473329590117912,
      "grad_norm": 0.3546706736087799,
      "learning_rate": 7.2585876644535705e-06,
      "loss": 0.46,
      "step": 1481
    },
    {
      "epoch": 1.2481751824817517,
      "grad_norm": 0.3180731534957886,
      "learning_rate": 7.2542126616976596e-06,
      "loss": 0.3893,
      "step": 1482
    },
    {
      "epoch": 1.2490174059517125,
      "grad_norm": 0.3644465506076813,
      "learning_rate": 7.24983549168891e-06,
      "loss": 0.4121,
      "step": 1483
    },
    {
      "epoch": 1.2498596294216733,
      "grad_norm": 0.3657407760620117,
      "learning_rate": 7.2454561586356355e-06,
      "loss": 0.4037,
      "step": 1484
    },
    {
      "epoch": 1.250701852891634,
      "grad_norm": 0.3584146201610565,
      "learning_rate": 7.241074666748228e-06,
      "loss": 0.4092,
      "step": 1485
    },
    {
      "epoch": 1.2515440763615946,
      "grad_norm": 0.3337010145187378,
      "learning_rate": 7.236691020239157e-06,
      "loss": 0.4291,
      "step": 1486
    },
    {
      "epoch": 1.2523862998315554,
      "grad_norm": 0.37202006578445435,
      "learning_rate": 7.232305223322963e-06,
      "loss": 0.433,
      "step": 1487
    },
    {
      "epoch": 1.253228523301516,
      "grad_norm": 0.32051676511764526,
      "learning_rate": 7.227917280216254e-06,
      "loss": 0.3889,
      "step": 1488
    },
    {
      "epoch": 1.2540707467714767,
      "grad_norm": 0.3897875249385834,
      "learning_rate": 7.2235271951377005e-06,
      "loss": 0.4696,
      "step": 1489
    },
    {
      "epoch": 1.2549129702414374,
      "grad_norm": 0.35171809792518616,
      "learning_rate": 7.219134972308035e-06,
      "loss": 0.4184,
      "step": 1490
    },
    {
      "epoch": 1.2557551937113982,
      "grad_norm": 0.34497132897377014,
      "learning_rate": 7.214740615950041e-06,
      "loss": 0.4213,
      "step": 1491
    },
    {
      "epoch": 1.2565974171813588,
      "grad_norm": 0.34568437933921814,
      "learning_rate": 7.210344130288558e-06,
      "loss": 0.3975,
      "step": 1492
    },
    {
      "epoch": 1.2574396406513195,
      "grad_norm": 0.3618040680885315,
      "learning_rate": 7.205945519550467e-06,
      "loss": 0.4421,
      "step": 1493
    },
    {
      "epoch": 1.25828186412128,
      "grad_norm": 0.35818609595298767,
      "learning_rate": 7.201544787964698e-06,
      "loss": 0.4294,
      "step": 1494
    },
    {
      "epoch": 1.2591240875912408,
      "grad_norm": 0.3691999018192291,
      "learning_rate": 7.197141939762217e-06,
      "loss": 0.3808,
      "step": 1495
    },
    {
      "epoch": 1.2599663110612016,
      "grad_norm": 0.3811958432197571,
      "learning_rate": 7.192736979176025e-06,
      "loss": 0.4564,
      "step": 1496
    },
    {
      "epoch": 1.2608085345311624,
      "grad_norm": 0.39242181181907654,
      "learning_rate": 7.188329910441154e-06,
      "loss": 0.4269,
      "step": 1497
    },
    {
      "epoch": 1.261650758001123,
      "grad_norm": 0.35562315583229065,
      "learning_rate": 7.183920737794663e-06,
      "loss": 0.4089,
      "step": 1498
    },
    {
      "epoch": 1.2624929814710837,
      "grad_norm": 0.4188075065612793,
      "learning_rate": 7.179509465475636e-06,
      "loss": 0.4192,
      "step": 1499
    },
    {
      "epoch": 1.2633352049410442,
      "grad_norm": 0.34099283814430237,
      "learning_rate": 7.175096097725169e-06,
      "loss": 0.4453,
      "step": 1500
    },
    {
      "epoch": 1.264177428411005,
      "grad_norm": 0.4382229149341583,
      "learning_rate": 7.170680638786383e-06,
      "loss": 0.415,
      "step": 1501
    },
    {
      "epoch": 1.2650196518809658,
      "grad_norm": 0.3906000256538391,
      "learning_rate": 7.166263092904399e-06,
      "loss": 0.411,
      "step": 1502
    },
    {
      "epoch": 1.2658618753509265,
      "grad_norm": 0.3964785635471344,
      "learning_rate": 7.161843464326349e-06,
      "loss": 0.4352,
      "step": 1503
    },
    {
      "epoch": 1.266704098820887,
      "grad_norm": 0.3900376260280609,
      "learning_rate": 7.157421757301371e-06,
      "loss": 0.4209,
      "step": 1504
    },
    {
      "epoch": 1.2675463222908478,
      "grad_norm": 0.38399258255958557,
      "learning_rate": 7.1529979760805946e-06,
      "loss": 0.4156,
      "step": 1505
    },
    {
      "epoch": 1.2683885457608086,
      "grad_norm": 0.4026244878768921,
      "learning_rate": 7.148572124917148e-06,
      "loss": 0.4121,
      "step": 1506
    },
    {
      "epoch": 1.2692307692307692,
      "grad_norm": 0.3550397753715515,
      "learning_rate": 7.144144208066148e-06,
      "loss": 0.4002,
      "step": 1507
    },
    {
      "epoch": 1.27007299270073,
      "grad_norm": 0.41013965010643005,
      "learning_rate": 7.1397142297846975e-06,
      "loss": 0.4246,
      "step": 1508
    },
    {
      "epoch": 1.2709152161706907,
      "grad_norm": 0.39515209197998047,
      "learning_rate": 7.135282194331881e-06,
      "loss": 0.4521,
      "step": 1509
    },
    {
      "epoch": 1.2717574396406512,
      "grad_norm": 0.39057475328445435,
      "learning_rate": 7.130848105968762e-06,
      "loss": 0.4175,
      "step": 1510
    },
    {
      "epoch": 1.272599663110612,
      "grad_norm": 0.3986159563064575,
      "learning_rate": 7.126411968958374e-06,
      "loss": 0.4137,
      "step": 1511
    },
    {
      "epoch": 1.2734418865805728,
      "grad_norm": 0.3891482651233673,
      "learning_rate": 7.121973787565727e-06,
      "loss": 0.4547,
      "step": 1512
    },
    {
      "epoch": 1.2742841100505333,
      "grad_norm": 0.34719160199165344,
      "learning_rate": 7.1175335660577906e-06,
      "loss": 0.4339,
      "step": 1513
    },
    {
      "epoch": 1.275126333520494,
      "grad_norm": 0.39721041917800903,
      "learning_rate": 7.113091308703498e-06,
      "loss": 0.4387,
      "step": 1514
    },
    {
      "epoch": 1.2759685569904549,
      "grad_norm": 0.3556055426597595,
      "learning_rate": 7.1086470197737405e-06,
      "loss": 0.4161,
      "step": 1515
    },
    {
      "epoch": 1.2768107804604156,
      "grad_norm": 0.389474093914032,
      "learning_rate": 7.104200703541358e-06,
      "loss": 0.4341,
      "step": 1516
    },
    {
      "epoch": 1.2776530039303762,
      "grad_norm": 0.3703227639198303,
      "learning_rate": 7.099752364281147e-06,
      "loss": 0.4251,
      "step": 1517
    },
    {
      "epoch": 1.278495227400337,
      "grad_norm": 0.34580498933792114,
      "learning_rate": 7.095302006269842e-06,
      "loss": 0.4051,
      "step": 1518
    },
    {
      "epoch": 1.2793374508702975,
      "grad_norm": 0.3894367218017578,
      "learning_rate": 7.090849633786125e-06,
      "loss": 0.4247,
      "step": 1519
    },
    {
      "epoch": 1.2801796743402583,
      "grad_norm": 0.3620089888572693,
      "learning_rate": 7.0863952511106075e-06,
      "loss": 0.4496,
      "step": 1520
    },
    {
      "epoch": 1.281021897810219,
      "grad_norm": 0.3373377025127411,
      "learning_rate": 7.0819388625258385e-06,
      "loss": 0.3647,
      "step": 1521
    },
    {
      "epoch": 1.2818641212801798,
      "grad_norm": 0.3958834111690521,
      "learning_rate": 7.077480472316296e-06,
      "loss": 0.4873,
      "step": 1522
    },
    {
      "epoch": 1.2827063447501403,
      "grad_norm": 0.32268860936164856,
      "learning_rate": 7.0730200847683795e-06,
      "loss": 0.39,
      "step": 1523
    },
    {
      "epoch": 1.283548568220101,
      "grad_norm": 0.3884601593017578,
      "learning_rate": 7.06855770417041e-06,
      "loss": 0.462,
      "step": 1524
    },
    {
      "epoch": 1.2843907916900617,
      "grad_norm": 0.3518340587615967,
      "learning_rate": 7.0640933348126235e-06,
      "loss": 0.4213,
      "step": 1525
    },
    {
      "epoch": 1.2852330151600224,
      "grad_norm": 0.36375266313552856,
      "learning_rate": 7.059626980987172e-06,
      "loss": 0.4027,
      "step": 1526
    },
    {
      "epoch": 1.2860752386299832,
      "grad_norm": 0.4300069808959961,
      "learning_rate": 7.05515864698811e-06,
      "loss": 0.4461,
      "step": 1527
    },
    {
      "epoch": 1.286917462099944,
      "grad_norm": 0.35067275166511536,
      "learning_rate": 7.0506883371114e-06,
      "loss": 0.4224,
      "step": 1528
    },
    {
      "epoch": 1.2877596855699045,
      "grad_norm": 0.39170798659324646,
      "learning_rate": 7.046216055654902e-06,
      "loss": 0.4202,
      "step": 1529
    },
    {
      "epoch": 1.2886019090398653,
      "grad_norm": 0.38898545503616333,
      "learning_rate": 7.041741806918372e-06,
      "loss": 0.4491,
      "step": 1530
    },
    {
      "epoch": 1.2894441325098258,
      "grad_norm": 0.3366841971874237,
      "learning_rate": 7.0372655952034575e-06,
      "loss": 0.3811,
      "step": 1531
    },
    {
      "epoch": 1.2902863559797866,
      "grad_norm": 0.4407299757003784,
      "learning_rate": 7.032787424813694e-06,
      "loss": 0.4499,
      "step": 1532
    },
    {
      "epoch": 1.2911285794497473,
      "grad_norm": 0.37221354246139526,
      "learning_rate": 7.028307300054499e-06,
      "loss": 0.4285,
      "step": 1533
    },
    {
      "epoch": 1.2919708029197081,
      "grad_norm": 0.3929399847984314,
      "learning_rate": 7.023825225233169e-06,
      "loss": 0.4004,
      "step": 1534
    },
    {
      "epoch": 1.2928130263896687,
      "grad_norm": 0.3935064971446991,
      "learning_rate": 7.019341204658876e-06,
      "loss": 0.4517,
      "step": 1535
    },
    {
      "epoch": 1.2936552498596294,
      "grad_norm": 0.3892635703086853,
      "learning_rate": 7.014855242642662e-06,
      "loss": 0.4457,
      "step": 1536
    },
    {
      "epoch": 1.2944974733295902,
      "grad_norm": 0.3636402487754822,
      "learning_rate": 7.0103673434974375e-06,
      "loss": 0.4035,
      "step": 1537
    },
    {
      "epoch": 1.2953396967995507,
      "grad_norm": 0.4014008641242981,
      "learning_rate": 7.0058775115379705e-06,
      "loss": 0.4489,
      "step": 1538
    },
    {
      "epoch": 1.2961819202695115,
      "grad_norm": 0.3572685420513153,
      "learning_rate": 7.0013857510808934e-06,
      "loss": 0.393,
      "step": 1539
    },
    {
      "epoch": 1.2970241437394723,
      "grad_norm": 0.42483413219451904,
      "learning_rate": 6.99689206644469e-06,
      "loss": 0.3827,
      "step": 1540
    },
    {
      "epoch": 1.2978663672094328,
      "grad_norm": 0.3836512565612793,
      "learning_rate": 6.992396461949693e-06,
      "loss": 0.4421,
      "step": 1541
    },
    {
      "epoch": 1.2987085906793936,
      "grad_norm": 0.37512412667274475,
      "learning_rate": 6.987898941918082e-06,
      "loss": 0.4297,
      "step": 1542
    },
    {
      "epoch": 1.2995508141493544,
      "grad_norm": 0.4176206588745117,
      "learning_rate": 6.9833995106738774e-06,
      "loss": 0.4086,
      "step": 1543
    },
    {
      "epoch": 1.300393037619315,
      "grad_norm": 0.42080193758010864,
      "learning_rate": 6.978898172542939e-06,
      "loss": 0.4344,
      "step": 1544
    },
    {
      "epoch": 1.3012352610892757,
      "grad_norm": 0.3870827555656433,
      "learning_rate": 6.974394931852957e-06,
      "loss": 0.3924,
      "step": 1545
    },
    {
      "epoch": 1.3020774845592364,
      "grad_norm": 0.40513163805007935,
      "learning_rate": 6.969889792933454e-06,
      "loss": 0.4325,
      "step": 1546
    },
    {
      "epoch": 1.3029197080291972,
      "grad_norm": 0.3556414842605591,
      "learning_rate": 6.965382760115775e-06,
      "loss": 0.391,
      "step": 1547
    },
    {
      "epoch": 1.3037619314991578,
      "grad_norm": 0.41622236371040344,
      "learning_rate": 6.960873837733089e-06,
      "loss": 0.4621,
      "step": 1548
    },
    {
      "epoch": 1.3046041549691185,
      "grad_norm": 0.36453986167907715,
      "learning_rate": 6.956363030120377e-06,
      "loss": 0.443,
      "step": 1549
    },
    {
      "epoch": 1.305446378439079,
      "grad_norm": 0.357077419757843,
      "learning_rate": 6.951850341614436e-06,
      "loss": 0.4244,
      "step": 1550
    },
    {
      "epoch": 1.3062886019090398,
      "grad_norm": 0.39760202169418335,
      "learning_rate": 6.94733577655387e-06,
      "loss": 0.428,
      "step": 1551
    },
    {
      "epoch": 1.3071308253790006,
      "grad_norm": 0.32102200388908386,
      "learning_rate": 6.942819339279089e-06,
      "loss": 0.408,
      "step": 1552
    },
    {
      "epoch": 1.3079730488489614,
      "grad_norm": 0.3639855682849884,
      "learning_rate": 6.9383010341323e-06,
      "loss": 0.4128,
      "step": 1553
    },
    {
      "epoch": 1.308815272318922,
      "grad_norm": 0.33166563510894775,
      "learning_rate": 6.933780865457508e-06,
      "loss": 0.3916,
      "step": 1554
    },
    {
      "epoch": 1.3096574957888827,
      "grad_norm": 0.3716481328010559,
      "learning_rate": 6.9292588376005095e-06,
      "loss": 0.4633,
      "step": 1555
    },
    {
      "epoch": 1.3104997192588432,
      "grad_norm": 0.32041388750076294,
      "learning_rate": 6.924734954908887e-06,
      "loss": 0.3857,
      "step": 1556
    },
    {
      "epoch": 1.311341942728804,
      "grad_norm": 0.4038199782371521,
      "learning_rate": 6.920209221732007e-06,
      "loss": 0.4693,
      "step": 1557
    },
    {
      "epoch": 1.3121841661987648,
      "grad_norm": 0.32170677185058594,
      "learning_rate": 6.9156816424210175e-06,
      "loss": 0.3654,
      "step": 1558
    },
    {
      "epoch": 1.3130263896687255,
      "grad_norm": 0.3601906895637512,
      "learning_rate": 6.911152221328837e-06,
      "loss": 0.417,
      "step": 1559
    },
    {
      "epoch": 1.313868613138686,
      "grad_norm": 0.42985957860946655,
      "learning_rate": 6.90662096281016e-06,
      "loss": 0.4384,
      "step": 1560
    },
    {
      "epoch": 1.3147108366086468,
      "grad_norm": 0.365299254655838,
      "learning_rate": 6.902087871221439e-06,
      "loss": 0.4127,
      "step": 1561
    },
    {
      "epoch": 1.3155530600786074,
      "grad_norm": 0.39565566182136536,
      "learning_rate": 6.897552950920898e-06,
      "loss": 0.4226,
      "step": 1562
    },
    {
      "epoch": 1.3163952835485682,
      "grad_norm": 0.46542924642562866,
      "learning_rate": 6.893016206268518e-06,
      "loss": 0.4316,
      "step": 1563
    },
    {
      "epoch": 1.317237507018529,
      "grad_norm": 0.36687660217285156,
      "learning_rate": 6.888477641626027e-06,
      "loss": 0.3871,
      "step": 1564
    },
    {
      "epoch": 1.3180797304884897,
      "grad_norm": 0.3794815242290497,
      "learning_rate": 6.88393726135691e-06,
      "loss": 0.4388,
      "step": 1565
    },
    {
      "epoch": 1.3189219539584502,
      "grad_norm": 0.4375244081020355,
      "learning_rate": 6.879395069826394e-06,
      "loss": 0.4423,
      "step": 1566
    },
    {
      "epoch": 1.319764177428411,
      "grad_norm": 0.35745230317115784,
      "learning_rate": 6.874851071401448e-06,
      "loss": 0.4091,
      "step": 1567
    },
    {
      "epoch": 1.3206064008983718,
      "grad_norm": 0.36516058444976807,
      "learning_rate": 6.870305270450779e-06,
      "loss": 0.4286,
      "step": 1568
    },
    {
      "epoch": 1.3214486243683323,
      "grad_norm": 0.3890894949436188,
      "learning_rate": 6.865757671344827e-06,
      "loss": 0.4295,
      "step": 1569
    },
    {
      "epoch": 1.322290847838293,
      "grad_norm": 0.3535374104976654,
      "learning_rate": 6.861208278455759e-06,
      "loss": 0.482,
      "step": 1570
    },
    {
      "epoch": 1.3231330713082539,
      "grad_norm": 0.38371479511260986,
      "learning_rate": 6.856657096157469e-06,
      "loss": 0.3727,
      "step": 1571
    },
    {
      "epoch": 1.3239752947782144,
      "grad_norm": 0.36519044637680054,
      "learning_rate": 6.85210412882557e-06,
      "loss": 0.444,
      "step": 1572
    },
    {
      "epoch": 1.3248175182481752,
      "grad_norm": 0.39519909024238586,
      "learning_rate": 6.8475493808373895e-06,
      "loss": 0.436,
      "step": 1573
    },
    {
      "epoch": 1.325659741718136,
      "grad_norm": 0.42742523550987244,
      "learning_rate": 6.8429928565719724e-06,
      "loss": 0.4386,
      "step": 1574
    },
    {
      "epoch": 1.3265019651880965,
      "grad_norm": 0.39204517006874084,
      "learning_rate": 6.838434560410064e-06,
      "loss": 0.4239,
      "step": 1575
    },
    {
      "epoch": 1.3273441886580573,
      "grad_norm": 0.3770383298397064,
      "learning_rate": 6.833874496734122e-06,
      "loss": 0.4421,
      "step": 1576
    },
    {
      "epoch": 1.328186412128018,
      "grad_norm": 0.37053924798965454,
      "learning_rate": 6.829312669928293e-06,
      "loss": 0.4127,
      "step": 1577
    },
    {
      "epoch": 1.3290286355979788,
      "grad_norm": 0.3323088586330414,
      "learning_rate": 6.824749084378428e-06,
      "loss": 0.4063,
      "step": 1578
    },
    {
      "epoch": 1.3298708590679393,
      "grad_norm": 0.37581774592399597,
      "learning_rate": 6.820183744472062e-06,
      "loss": 0.438,
      "step": 1579
    },
    {
      "epoch": 1.3307130825379,
      "grad_norm": 0.34488150477409363,
      "learning_rate": 6.81561665459842e-06,
      "loss": 0.4424,
      "step": 1580
    },
    {
      "epoch": 1.3315553060078607,
      "grad_norm": 0.4256086051464081,
      "learning_rate": 6.811047819148413e-06,
      "loss": 0.4347,
      "step": 1581
    },
    {
      "epoch": 1.3323975294778214,
      "grad_norm": 0.35691678524017334,
      "learning_rate": 6.806477242514623e-06,
      "loss": 0.3678,
      "step": 1582
    },
    {
      "epoch": 1.3332397529477822,
      "grad_norm": 0.3559122085571289,
      "learning_rate": 6.801904929091311e-06,
      "loss": 0.4488,
      "step": 1583
    },
    {
      "epoch": 1.334081976417743,
      "grad_norm": 0.39617833495140076,
      "learning_rate": 6.7973308832744035e-06,
      "loss": 0.4676,
      "step": 1584
    },
    {
      "epoch": 1.3349241998877035,
      "grad_norm": 0.3526609539985657,
      "learning_rate": 6.792755109461498e-06,
      "loss": 0.4073,
      "step": 1585
    },
    {
      "epoch": 1.3357664233576643,
      "grad_norm": 0.3588114380836487,
      "learning_rate": 6.78817761205185e-06,
      "loss": 0.4136,
      "step": 1586
    },
    {
      "epoch": 1.3366086468276248,
      "grad_norm": 0.335822731256485,
      "learning_rate": 6.783598395446371e-06,
      "loss": 0.4092,
      "step": 1587
    },
    {
      "epoch": 1.3374508702975856,
      "grad_norm": 0.33908867835998535,
      "learning_rate": 6.779017464047629e-06,
      "loss": 0.4033,
      "step": 1588
    },
    {
      "epoch": 1.3382930937675463,
      "grad_norm": 0.367220014333725,
      "learning_rate": 6.7744348222598386e-06,
      "loss": 0.459,
      "step": 1589
    },
    {
      "epoch": 1.3391353172375071,
      "grad_norm": 0.37219202518463135,
      "learning_rate": 6.769850474488859e-06,
      "loss": 0.4345,
      "step": 1590
    },
    {
      "epoch": 1.3399775407074677,
      "grad_norm": 0.31587153673171997,
      "learning_rate": 6.7652644251421875e-06,
      "loss": 0.3701,
      "step": 1591
    },
    {
      "epoch": 1.3408197641774284,
      "grad_norm": 0.37061807513237,
      "learning_rate": 6.7606766786289624e-06,
      "loss": 0.4208,
      "step": 1592
    },
    {
      "epoch": 1.341661987647389,
      "grad_norm": 0.3675846755504608,
      "learning_rate": 6.756087239359948e-06,
      "loss": 0.4171,
      "step": 1593
    },
    {
      "epoch": 1.3425042111173497,
      "grad_norm": 0.35385891795158386,
      "learning_rate": 6.75149611174754e-06,
      "loss": 0.4221,
      "step": 1594
    },
    {
      "epoch": 1.3433464345873105,
      "grad_norm": 0.3574049174785614,
      "learning_rate": 6.746903300205756e-06,
      "loss": 0.4383,
      "step": 1595
    },
    {
      "epoch": 1.3441886580572713,
      "grad_norm": 0.34832295775413513,
      "learning_rate": 6.742308809150232e-06,
      "loss": 0.4462,
      "step": 1596
    },
    {
      "epoch": 1.3450308815272318,
      "grad_norm": 0.35711613297462463,
      "learning_rate": 6.737712642998219e-06,
      "loss": 0.4058,
      "step": 1597
    },
    {
      "epoch": 1.3458731049971926,
      "grad_norm": 0.3570874333381653,
      "learning_rate": 6.7331148061685796e-06,
      "loss": 0.4459,
      "step": 1598
    },
    {
      "epoch": 1.3467153284671534,
      "grad_norm": 0.3233488202095032,
      "learning_rate": 6.728515303081782e-06,
      "loss": 0.3988,
      "step": 1599
    },
    {
      "epoch": 1.347557551937114,
      "grad_norm": 0.3794422745704651,
      "learning_rate": 6.723914138159895e-06,
      "loss": 0.4351,
      "step": 1600
    },
    {
      "epoch": 1.3483997754070747,
      "grad_norm": 0.3693790137767792,
      "learning_rate": 6.719311315826589e-06,
      "loss": 0.4447,
      "step": 1601
    },
    {
      "epoch": 1.3492419988770354,
      "grad_norm": 0.34348899126052856,
      "learning_rate": 6.714706840507122e-06,
      "loss": 0.3812,
      "step": 1602
    },
    {
      "epoch": 1.350084222346996,
      "grad_norm": 0.3924219012260437,
      "learning_rate": 6.710100716628345e-06,
      "loss": 0.4324,
      "step": 1603
    },
    {
      "epoch": 1.3509264458169568,
      "grad_norm": 0.3579223155975342,
      "learning_rate": 6.705492948618694e-06,
      "loss": 0.4474,
      "step": 1604
    },
    {
      "epoch": 1.3517686692869175,
      "grad_norm": 0.34730345010757446,
      "learning_rate": 6.700883540908185e-06,
      "loss": 0.414,
      "step": 1605
    },
    {
      "epoch": 1.352610892756878,
      "grad_norm": 0.38468047976493835,
      "learning_rate": 6.696272497928411e-06,
      "loss": 0.474,
      "step": 1606
    },
    {
      "epoch": 1.3534531162268388,
      "grad_norm": 0.34957948327064514,
      "learning_rate": 6.691659824112535e-06,
      "loss": 0.3912,
      "step": 1607
    },
    {
      "epoch": 1.3542953396967996,
      "grad_norm": 0.32915937900543213,
      "learning_rate": 6.687045523895292e-06,
      "loss": 0.3784,
      "step": 1608
    },
    {
      "epoch": 1.3551375631667604,
      "grad_norm": 0.4034005403518677,
      "learning_rate": 6.682429601712976e-06,
      "loss": 0.4362,
      "step": 1609
    },
    {
      "epoch": 1.355979786636721,
      "grad_norm": 0.3841288983821869,
      "learning_rate": 6.6778120620034455e-06,
      "loss": 0.4091,
      "step": 1610
    },
    {
      "epoch": 1.3568220101066817,
      "grad_norm": 0.40738019347190857,
      "learning_rate": 6.673192909206109e-06,
      "loss": 0.4686,
      "step": 1611
    },
    {
      "epoch": 1.3576642335766422,
      "grad_norm": 0.36011794209480286,
      "learning_rate": 6.668572147761929e-06,
      "loss": 0.3981,
      "step": 1612
    },
    {
      "epoch": 1.358506457046603,
      "grad_norm": 0.35476770997047424,
      "learning_rate": 6.663949782113413e-06,
      "loss": 0.4082,
      "step": 1613
    },
    {
      "epoch": 1.3593486805165638,
      "grad_norm": 0.41307199001312256,
      "learning_rate": 6.6593258167046115e-06,
      "loss": 0.4297,
      "step": 1614
    },
    {
      "epoch": 1.3601909039865245,
      "grad_norm": 0.32547488808631897,
      "learning_rate": 6.654700255981115e-06,
      "loss": 0.3866,
      "step": 1615
    },
    {
      "epoch": 1.361033127456485,
      "grad_norm": 0.3770720660686493,
      "learning_rate": 6.6500731043900425e-06,
      "loss": 0.4153,
      "step": 1616
    },
    {
      "epoch": 1.3618753509264458,
      "grad_norm": 0.3707250654697418,
      "learning_rate": 6.64544436638005e-06,
      "loss": 0.4236,
      "step": 1617
    },
    {
      "epoch": 1.3627175743964064,
      "grad_norm": 0.39401713013648987,
      "learning_rate": 6.640814046401312e-06,
      "loss": 0.4815,
      "step": 1618
    },
    {
      "epoch": 1.3635597978663672,
      "grad_norm": 0.38815611600875854,
      "learning_rate": 6.6361821489055275e-06,
      "loss": 0.4266,
      "step": 1619
    },
    {
      "epoch": 1.364402021336328,
      "grad_norm": 0.39932483434677124,
      "learning_rate": 6.63154867834591e-06,
      "loss": 0.4487,
      "step": 1620
    },
    {
      "epoch": 1.3652442448062887,
      "grad_norm": 0.35332930088043213,
      "learning_rate": 6.626913639177189e-06,
      "loss": 0.3813,
      "step": 1621
    },
    {
      "epoch": 1.3660864682762492,
      "grad_norm": 0.3855513334274292,
      "learning_rate": 6.622277035855596e-06,
      "loss": 0.4489,
      "step": 1622
    },
    {
      "epoch": 1.36692869174621,
      "grad_norm": 0.3434603810310364,
      "learning_rate": 6.617638872838874e-06,
      "loss": 0.3877,
      "step": 1623
    },
    {
      "epoch": 1.3677709152161706,
      "grad_norm": 0.392008900642395,
      "learning_rate": 6.61299915458626e-06,
      "loss": 0.3992,
      "step": 1624
    },
    {
      "epoch": 1.3686131386861313,
      "grad_norm": 0.3973487317562103,
      "learning_rate": 6.608357885558485e-06,
      "loss": 0.4231,
      "step": 1625
    },
    {
      "epoch": 1.369455362156092,
      "grad_norm": 0.36207062005996704,
      "learning_rate": 6.603715070217779e-06,
      "loss": 0.4321,
      "step": 1626
    },
    {
      "epoch": 1.3702975856260529,
      "grad_norm": 0.3674567937850952,
      "learning_rate": 6.599070713027849e-06,
      "loss": 0.405,
      "step": 1627
    },
    {
      "epoch": 1.3711398090960134,
      "grad_norm": 0.38346025347709656,
      "learning_rate": 6.594424818453891e-06,
      "loss": 0.4096,
      "step": 1628
    },
    {
      "epoch": 1.3719820325659742,
      "grad_norm": 0.3886934518814087,
      "learning_rate": 6.589777390962575e-06,
      "loss": 0.4268,
      "step": 1629
    },
    {
      "epoch": 1.372824256035935,
      "grad_norm": 0.35428667068481445,
      "learning_rate": 6.58512843502205e-06,
      "loss": 0.4169,
      "step": 1630
    },
    {
      "epoch": 1.3736664795058955,
      "grad_norm": 0.3394586443901062,
      "learning_rate": 6.580477955101927e-06,
      "loss": 0.3929,
      "step": 1631
    },
    {
      "epoch": 1.3745087029758563,
      "grad_norm": 0.40521562099456787,
      "learning_rate": 6.5758259556732896e-06,
      "loss": 0.4213,
      "step": 1632
    },
    {
      "epoch": 1.375350926445817,
      "grad_norm": 0.41278350353240967,
      "learning_rate": 6.571172441208678e-06,
      "loss": 0.4334,
      "step": 1633
    },
    {
      "epoch": 1.3761931499157778,
      "grad_norm": 0.31815940141677856,
      "learning_rate": 6.566517416182088e-06,
      "loss": 0.3953,
      "step": 1634
    },
    {
      "epoch": 1.3770353733857383,
      "grad_norm": 0.3535056710243225,
      "learning_rate": 6.561860885068972e-06,
      "loss": 0.3888,
      "step": 1635
    },
    {
      "epoch": 1.377877596855699,
      "grad_norm": 0.4176722466945648,
      "learning_rate": 6.5572028523462275e-06,
      "loss": 0.4397,
      "step": 1636
    },
    {
      "epoch": 1.3787198203256597,
      "grad_norm": 0.34319019317626953,
      "learning_rate": 6.552543322492195e-06,
      "loss": 0.4015,
      "step": 1637
    },
    {
      "epoch": 1.3795620437956204,
      "grad_norm": 0.34834474325180054,
      "learning_rate": 6.547882299986658e-06,
      "loss": 0.413,
      "step": 1638
    },
    {
      "epoch": 1.3804042672655812,
      "grad_norm": 0.3871834874153137,
      "learning_rate": 6.54321978931083e-06,
      "loss": 0.428,
      "step": 1639
    },
    {
      "epoch": 1.381246490735542,
      "grad_norm": 0.34911099076271057,
      "learning_rate": 6.53855579494736e-06,
      "loss": 0.4189,
      "step": 1640
    },
    {
      "epoch": 1.3820887142055025,
      "grad_norm": 0.34846383333206177,
      "learning_rate": 6.53389032138032e-06,
      "loss": 0.4448,
      "step": 1641
    },
    {
      "epoch": 1.3829309376754633,
      "grad_norm": 0.31004661321640015,
      "learning_rate": 6.5292233730952074e-06,
      "loss": 0.4155,
      "step": 1642
    },
    {
      "epoch": 1.3837731611454238,
      "grad_norm": 0.41421273350715637,
      "learning_rate": 6.5245549545789335e-06,
      "loss": 0.4351,
      "step": 1643
    },
    {
      "epoch": 1.3846153846153846,
      "grad_norm": 0.34194040298461914,
      "learning_rate": 6.519885070319827e-06,
      "loss": 0.3897,
      "step": 1644
    },
    {
      "epoch": 1.3854576080853453,
      "grad_norm": 0.3616725504398346,
      "learning_rate": 6.515213724807621e-06,
      "loss": 0.4307,
      "step": 1645
    },
    {
      "epoch": 1.3862998315553061,
      "grad_norm": 0.4153498709201813,
      "learning_rate": 6.51054092253346e-06,
      "loss": 0.4535,
      "step": 1646
    },
    {
      "epoch": 1.3871420550252667,
      "grad_norm": 0.3441748023033142,
      "learning_rate": 6.505866667989884e-06,
      "loss": 0.3877,
      "step": 1647
    },
    {
      "epoch": 1.3879842784952274,
      "grad_norm": 0.38224121928215027,
      "learning_rate": 6.5011909656708305e-06,
      "loss": 0.4434,
      "step": 1648
    },
    {
      "epoch": 1.388826501965188,
      "grad_norm": 0.3421577215194702,
      "learning_rate": 6.49651382007163e-06,
      "loss": 0.3765,
      "step": 1649
    },
    {
      "epoch": 1.3896687254351487,
      "grad_norm": 0.3455880582332611,
      "learning_rate": 6.491835235688999e-06,
      "loss": 0.4241,
      "step": 1650
    },
    {
      "epoch": 1.3905109489051095,
      "grad_norm": 0.32430604100227356,
      "learning_rate": 6.487155217021039e-06,
      "loss": 0.4138,
      "step": 1651
    },
    {
      "epoch": 1.3913531723750703,
      "grad_norm": 0.3752616047859192,
      "learning_rate": 6.482473768567228e-06,
      "loss": 0.4073,
      "step": 1652
    },
    {
      "epoch": 1.3921953958450308,
      "grad_norm": 0.3761264979839325,
      "learning_rate": 6.477790894828422e-06,
      "loss": 0.4177,
      "step": 1653
    },
    {
      "epoch": 1.3930376193149916,
      "grad_norm": 0.347831666469574,
      "learning_rate": 6.473106600306842e-06,
      "loss": 0.4318,
      "step": 1654
    },
    {
      "epoch": 1.3938798427849521,
      "grad_norm": 0.36201292276382446,
      "learning_rate": 6.468420889506084e-06,
      "loss": 0.3991,
      "step": 1655
    },
    {
      "epoch": 1.394722066254913,
      "grad_norm": 0.3363351821899414,
      "learning_rate": 6.463733766931096e-06,
      "loss": 0.4189,
      "step": 1656
    },
    {
      "epoch": 1.3955642897248737,
      "grad_norm": 0.4006088674068451,
      "learning_rate": 6.459045237088189e-06,
      "loss": 0.4357,
      "step": 1657
    },
    {
      "epoch": 1.3964065131948344,
      "grad_norm": 0.37804391980171204,
      "learning_rate": 6.454355304485024e-06,
      "loss": 0.3964,
      "step": 1658
    },
    {
      "epoch": 1.397248736664795,
      "grad_norm": 0.34107768535614014,
      "learning_rate": 6.449663973630613e-06,
      "loss": 0.3935,
      "step": 1659
    },
    {
      "epoch": 1.3980909601347558,
      "grad_norm": 0.3647027611732483,
      "learning_rate": 6.444971249035312e-06,
      "loss": 0.4261,
      "step": 1660
    },
    {
      "epoch": 1.3989331836047165,
      "grad_norm": 0.3586445748806,
      "learning_rate": 6.440277135210815e-06,
      "loss": 0.4408,
      "step": 1661
    },
    {
      "epoch": 1.399775407074677,
      "grad_norm": 0.3841874897480011,
      "learning_rate": 6.435581636670154e-06,
      "loss": 0.4186,
      "step": 1662
    },
    {
      "epoch": 1.4006176305446378,
      "grad_norm": 0.3352213501930237,
      "learning_rate": 6.43088475792769e-06,
      "loss": 0.4178,
      "step": 1663
    },
    {
      "epoch": 1.4014598540145986,
      "grad_norm": 0.3513410687446594,
      "learning_rate": 6.426186503499114e-06,
      "loss": 0.4098,
      "step": 1664
    },
    {
      "epoch": 1.4023020774845594,
      "grad_norm": 0.36197108030319214,
      "learning_rate": 6.421486877901436e-06,
      "loss": 0.406,
      "step": 1665
    },
    {
      "epoch": 1.40314430095452,
      "grad_norm": 0.3798445761203766,
      "learning_rate": 6.4167858856529875e-06,
      "loss": 0.449,
      "step": 1666
    },
    {
      "epoch": 1.4039865244244807,
      "grad_norm": 0.3281676769256592,
      "learning_rate": 6.412083531273411e-06,
      "loss": 0.427,
      "step": 1667
    },
    {
      "epoch": 1.4048287478944412,
      "grad_norm": 0.3604828119277954,
      "learning_rate": 6.407379819283661e-06,
      "loss": 0.4119,
      "step": 1668
    },
    {
      "epoch": 1.405670971364402,
      "grad_norm": 0.40924736857414246,
      "learning_rate": 6.402674754205998e-06,
      "loss": 0.4207,
      "step": 1669
    },
    {
      "epoch": 1.4065131948343628,
      "grad_norm": 0.3530431091785431,
      "learning_rate": 6.397968340563978e-06,
      "loss": 0.4354,
      "step": 1670
    },
    {
      "epoch": 1.4073554183043235,
      "grad_norm": 0.3318769335746765,
      "learning_rate": 6.393260582882462e-06,
      "loss": 0.3769,
      "step": 1671
    },
    {
      "epoch": 1.408197641774284,
      "grad_norm": 0.6089239716529846,
      "learning_rate": 6.3885514856875945e-06,
      "loss": 0.4704,
      "step": 1672
    },
    {
      "epoch": 1.4090398652442448,
      "grad_norm": 0.392598420381546,
      "learning_rate": 6.383841053506813e-06,
      "loss": 0.4505,
      "step": 1673
    },
    {
      "epoch": 1.4098820887142054,
      "grad_norm": 0.3316216468811035,
      "learning_rate": 6.379129290868837e-06,
      "loss": 0.4007,
      "step": 1674
    },
    {
      "epoch": 1.4107243121841662,
      "grad_norm": 0.38152143359184265,
      "learning_rate": 6.3744162023036685e-06,
      "loss": 0.4557,
      "step": 1675
    },
    {
      "epoch": 1.411566535654127,
      "grad_norm": 0.3643946349620819,
      "learning_rate": 6.369701792342576e-06,
      "loss": 0.42,
      "step": 1676
    },
    {
      "epoch": 1.4124087591240877,
      "grad_norm": 0.3469342291355133,
      "learning_rate": 6.364986065518106e-06,
      "loss": 0.3939,
      "step": 1677
    },
    {
      "epoch": 1.4132509825940482,
      "grad_norm": 0.35626712441444397,
      "learning_rate": 6.360269026364071e-06,
      "loss": 0.461,
      "step": 1678
    },
    {
      "epoch": 1.414093206064009,
      "grad_norm": 0.40261387825012207,
      "learning_rate": 6.35555067941554e-06,
      "loss": 0.4269,
      "step": 1679
    },
    {
      "epoch": 1.4149354295339696,
      "grad_norm": 0.41415682435035706,
      "learning_rate": 6.350831029208844e-06,
      "loss": 0.3978,
      "step": 1680
    },
    {
      "epoch": 1.4157776530039303,
      "grad_norm": 0.34618473052978516,
      "learning_rate": 6.3461100802815625e-06,
      "loss": 0.4225,
      "step": 1681
    },
    {
      "epoch": 1.416619876473891,
      "grad_norm": 0.3744317889213562,
      "learning_rate": 6.34138783717253e-06,
      "loss": 0.4137,
      "step": 1682
    },
    {
      "epoch": 1.4174620999438519,
      "grad_norm": 0.3310936987400055,
      "learning_rate": 6.336664304421818e-06,
      "loss": 0.3625,
      "step": 1683
    },
    {
      "epoch": 1.4183043234138124,
      "grad_norm": 0.4007769227027893,
      "learning_rate": 6.331939486570745e-06,
      "loss": 0.4517,
      "step": 1684
    },
    {
      "epoch": 1.4191465468837732,
      "grad_norm": 0.3379608690738678,
      "learning_rate": 6.3272133881618596e-06,
      "loss": 0.4097,
      "step": 1685
    },
    {
      "epoch": 1.4199887703537337,
      "grad_norm": 0.35915008187294006,
      "learning_rate": 6.322486013738942e-06,
      "loss": 0.4097,
      "step": 1686
    },
    {
      "epoch": 1.4208309938236945,
      "grad_norm": 0.4049501121044159,
      "learning_rate": 6.317757367847005e-06,
      "loss": 0.3791,
      "step": 1687
    },
    {
      "epoch": 1.4216732172936553,
      "grad_norm": 0.479805052280426,
      "learning_rate": 6.313027455032274e-06,
      "loss": 0.4822,
      "step": 1688
    },
    {
      "epoch": 1.422515440763616,
      "grad_norm": 0.37043845653533936,
      "learning_rate": 6.308296279842204e-06,
      "loss": 0.3952,
      "step": 1689
    },
    {
      "epoch": 1.4233576642335766,
      "grad_norm": 0.5009329319000244,
      "learning_rate": 6.303563846825453e-06,
      "loss": 0.4435,
      "step": 1690
    },
    {
      "epoch": 1.4241998877035373,
      "grad_norm": 0.384129136800766,
      "learning_rate": 6.298830160531895e-06,
      "loss": 0.4327,
      "step": 1691
    },
    {
      "epoch": 1.425042111173498,
      "grad_norm": 0.3468339443206787,
      "learning_rate": 6.294095225512604e-06,
      "loss": 0.4116,
      "step": 1692
    },
    {
      "epoch": 1.4258843346434587,
      "grad_norm": 0.4745643734931946,
      "learning_rate": 6.289359046319862e-06,
      "loss": 0.4898,
      "step": 1693
    },
    {
      "epoch": 1.4267265581134194,
      "grad_norm": 0.36639225482940674,
      "learning_rate": 6.2846216275071395e-06,
      "loss": 0.3756,
      "step": 1694
    },
    {
      "epoch": 1.4275687815833802,
      "grad_norm": 0.3968314826488495,
      "learning_rate": 6.279882973629101e-06,
      "loss": 0.4254,
      "step": 1695
    },
    {
      "epoch": 1.428411005053341,
      "grad_norm": 0.3875367045402527,
      "learning_rate": 6.275143089241603e-06,
      "loss": 0.3883,
      "step": 1696
    },
    {
      "epoch": 1.4292532285233015,
      "grad_norm": 0.46734973788261414,
      "learning_rate": 6.270401978901678e-06,
      "loss": 0.4804,
      "step": 1697
    },
    {
      "epoch": 1.4300954519932623,
      "grad_norm": 0.404204398393631,
      "learning_rate": 6.265659647167542e-06,
      "loss": 0.4309,
      "step": 1698
    },
    {
      "epoch": 1.4309376754632228,
      "grad_norm": 0.33319371938705444,
      "learning_rate": 6.260916098598584e-06,
      "loss": 0.3725,
      "step": 1699
    },
    {
      "epoch": 1.4317798989331836,
      "grad_norm": 0.381969153881073,
      "learning_rate": 6.256171337755362e-06,
      "loss": 0.4001,
      "step": 1700
    },
    {
      "epoch": 1.4326221224031443,
      "grad_norm": 0.4366336762905121,
      "learning_rate": 6.2514253691996e-06,
      "loss": 0.4483,
      "step": 1701
    },
    {
      "epoch": 1.4334643458731051,
      "grad_norm": 0.3863450884819031,
      "learning_rate": 6.246678197494185e-06,
      "loss": 0.4626,
      "step": 1702
    },
    {
      "epoch": 1.4343065693430657,
      "grad_norm": 0.34888941049575806,
      "learning_rate": 6.241929827203156e-06,
      "loss": 0.4198,
      "step": 1703
    },
    {
      "epoch": 1.4351487928130264,
      "grad_norm": 0.41581615805625916,
      "learning_rate": 6.237180262891709e-06,
      "loss": 0.4636,
      "step": 1704
    },
    {
      "epoch": 1.435991016282987,
      "grad_norm": 0.3597257733345032,
      "learning_rate": 6.2324295091261885e-06,
      "loss": 0.4201,
      "step": 1705
    },
    {
      "epoch": 1.4368332397529477,
      "grad_norm": 0.3542833924293518,
      "learning_rate": 6.227677570474077e-06,
      "loss": 0.4305,
      "step": 1706
    },
    {
      "epoch": 1.4376754632229085,
      "grad_norm": 0.3412033021450043,
      "learning_rate": 6.222924451504001e-06,
      "loss": 0.4166,
      "step": 1707
    },
    {
      "epoch": 1.4385176866928693,
      "grad_norm": 0.3602859675884247,
      "learning_rate": 6.21817015678572e-06,
      "loss": 0.3943,
      "step": 1708
    },
    {
      "epoch": 1.4393599101628298,
      "grad_norm": 0.3602357506752014,
      "learning_rate": 6.213414690890125e-06,
      "loss": 0.4628,
      "step": 1709
    },
    {
      "epoch": 1.4402021336327906,
      "grad_norm": 0.31991007924079895,
      "learning_rate": 6.208658058389232e-06,
      "loss": 0.3811,
      "step": 1710
    },
    {
      "epoch": 1.4410443571027511,
      "grad_norm": 0.35336634516716003,
      "learning_rate": 6.203900263856177e-06,
      "loss": 0.4381,
      "step": 1711
    },
    {
      "epoch": 1.441886580572712,
      "grad_norm": 0.3043598234653473,
      "learning_rate": 6.19914131186522e-06,
      "loss": 0.3902,
      "step": 1712
    },
    {
      "epoch": 1.4427288040426727,
      "grad_norm": 0.331346720457077,
      "learning_rate": 6.194381206991723e-06,
      "loss": 0.4348,
      "step": 1713
    },
    {
      "epoch": 1.4435710275126334,
      "grad_norm": 0.3434552252292633,
      "learning_rate": 6.189619953812167e-06,
      "loss": 0.4302,
      "step": 1714
    },
    {
      "epoch": 1.444413250982594,
      "grad_norm": 0.3755924701690674,
      "learning_rate": 6.184857556904129e-06,
      "loss": 0.4126,
      "step": 1715
    },
    {
      "epoch": 1.4452554744525548,
      "grad_norm": 0.3912715017795563,
      "learning_rate": 6.180094020846291e-06,
      "loss": 0.4453,
      "step": 1716
    },
    {
      "epoch": 1.4460976979225155,
      "grad_norm": 0.31278514862060547,
      "learning_rate": 6.175329350218426e-06,
      "loss": 0.3871,
      "step": 1717
    },
    {
      "epoch": 1.446939921392476,
      "grad_norm": 0.3584263026714325,
      "learning_rate": 6.170563549601402e-06,
      "loss": 0.4281,
      "step": 1718
    },
    {
      "epoch": 1.4477821448624368,
      "grad_norm": 0.3482597768306732,
      "learning_rate": 6.165796623577171e-06,
      "loss": 0.4206,
      "step": 1719
    },
    {
      "epoch": 1.4486243683323976,
      "grad_norm": 0.43736737966537476,
      "learning_rate": 6.161028576728767e-06,
      "loss": 0.4244,
      "step": 1720
    },
    {
      "epoch": 1.4494665918023582,
      "grad_norm": 0.36675095558166504,
      "learning_rate": 6.156259413640302e-06,
      "loss": 0.4653,
      "step": 1721
    },
    {
      "epoch": 1.450308815272319,
      "grad_norm": 0.3223220705986023,
      "learning_rate": 6.15148913889696e-06,
      "loss": 0.4124,
      "step": 1722
    },
    {
      "epoch": 1.4511510387422797,
      "grad_norm": 0.37512096762657166,
      "learning_rate": 6.146717757084995e-06,
      "loss": 0.48,
      "step": 1723
    },
    {
      "epoch": 1.4519932622122402,
      "grad_norm": 0.369476854801178,
      "learning_rate": 6.141945272791727e-06,
      "loss": 0.4211,
      "step": 1724
    },
    {
      "epoch": 1.452835485682201,
      "grad_norm": 0.3191913962364197,
      "learning_rate": 6.1371716906055336e-06,
      "loss": 0.4145,
      "step": 1725
    },
    {
      "epoch": 1.4536777091521618,
      "grad_norm": 0.3159056603908539,
      "learning_rate": 6.132397015115846e-06,
      "loss": 0.3934,
      "step": 1726
    },
    {
      "epoch": 1.4545199326221225,
      "grad_norm": 0.35988160967826843,
      "learning_rate": 6.127621250913152e-06,
      "loss": 0.4571,
      "step": 1727
    },
    {
      "epoch": 1.455362156092083,
      "grad_norm": 0.37037160992622375,
      "learning_rate": 6.122844402588982e-06,
      "loss": 0.4296,
      "step": 1728
    },
    {
      "epoch": 1.4562043795620438,
      "grad_norm": 0.3710545301437378,
      "learning_rate": 6.11806647473591e-06,
      "loss": 0.4272,
      "step": 1729
    },
    {
      "epoch": 1.4570466030320044,
      "grad_norm": 0.33392849564552307,
      "learning_rate": 6.113287471947547e-06,
      "loss": 0.4217,
      "step": 1730
    },
    {
      "epoch": 1.4578888265019652,
      "grad_norm": 0.33905646204948425,
      "learning_rate": 6.10850739881854e-06,
      "loss": 0.4227,
      "step": 1731
    },
    {
      "epoch": 1.458731049971926,
      "grad_norm": 0.33890268206596375,
      "learning_rate": 6.103726259944562e-06,
      "loss": 0.4179,
      "step": 1732
    },
    {
      "epoch": 1.4595732734418867,
      "grad_norm": 0.3401795029640198,
      "learning_rate": 6.098944059922311e-06,
      "loss": 0.3962,
      "step": 1733
    },
    {
      "epoch": 1.4604154969118472,
      "grad_norm": 0.3709690272808075,
      "learning_rate": 6.094160803349508e-06,
      "loss": 0.4184,
      "step": 1734
    },
    {
      "epoch": 1.461257720381808,
      "grad_norm": 0.4229232966899872,
      "learning_rate": 6.089376494824886e-06,
      "loss": 0.4712,
      "step": 1735
    },
    {
      "epoch": 1.4620999438517686,
      "grad_norm": 0.3296484649181366,
      "learning_rate": 6.084591138948192e-06,
      "loss": 0.3886,
      "step": 1736
    },
    {
      "epoch": 1.4629421673217293,
      "grad_norm": 0.3299495577812195,
      "learning_rate": 6.079804740320181e-06,
      "loss": 0.3851,
      "step": 1737
    },
    {
      "epoch": 1.46378439079169,
      "grad_norm": 0.3509671688079834,
      "learning_rate": 6.075017303542605e-06,
      "loss": 0.4765,
      "step": 1738
    },
    {
      "epoch": 1.4646266142616509,
      "grad_norm": 0.32099881768226624,
      "learning_rate": 6.070228833218221e-06,
      "loss": 0.3809,
      "step": 1739
    },
    {
      "epoch": 1.4654688377316114,
      "grad_norm": 0.39293694496154785,
      "learning_rate": 6.065439333950776e-06,
      "loss": 0.4612,
      "step": 1740
    },
    {
      "epoch": 1.4663110612015722,
      "grad_norm": 0.3401546776294708,
      "learning_rate": 6.060648810345006e-06,
      "loss": 0.3919,
      "step": 1741
    },
    {
      "epoch": 1.4671532846715327,
      "grad_norm": 0.36801353096961975,
      "learning_rate": 6.055857267006631e-06,
      "loss": 0.4288,
      "step": 1742
    },
    {
      "epoch": 1.4679955081414935,
      "grad_norm": 0.4008091390132904,
      "learning_rate": 6.051064708542357e-06,
      "loss": 0.444,
      "step": 1743
    },
    {
      "epoch": 1.4688377316114543,
      "grad_norm": 0.33927202224731445,
      "learning_rate": 6.046271139559859e-06,
      "loss": 0.4031,
      "step": 1744
    },
    {
      "epoch": 1.469679955081415,
      "grad_norm": 0.3647124469280243,
      "learning_rate": 6.041476564667785e-06,
      "loss": 0.417,
      "step": 1745
    },
    {
      "epoch": 1.4705221785513756,
      "grad_norm": 0.32976004481315613,
      "learning_rate": 6.036680988475756e-06,
      "loss": 0.3815,
      "step": 1746
    },
    {
      "epoch": 1.4713644020213363,
      "grad_norm": 0.354787677526474,
      "learning_rate": 6.031884415594347e-06,
      "loss": 0.4395,
      "step": 1747
    },
    {
      "epoch": 1.472206625491297,
      "grad_norm": 0.3639145791530609,
      "learning_rate": 6.0270868506351e-06,
      "loss": 0.3909,
      "step": 1748
    },
    {
      "epoch": 1.4730488489612577,
      "grad_norm": 0.4057386815547943,
      "learning_rate": 6.022288298210502e-06,
      "loss": 0.4903,
      "step": 1749
    },
    {
      "epoch": 1.4738910724312184,
      "grad_norm": 0.3805537819862366,
      "learning_rate": 6.017488762933996e-06,
      "loss": 0.4474,
      "step": 1750
    },
    {
      "epoch": 1.4747332959011792,
      "grad_norm": 0.3587068021297455,
      "learning_rate": 6.012688249419966e-06,
      "loss": 0.4271,
      "step": 1751
    },
    {
      "epoch": 1.4755755193711397,
      "grad_norm": 0.37675634026527405,
      "learning_rate": 6.00788676228374e-06,
      "loss": 0.4406,
      "step": 1752
    },
    {
      "epoch": 1.4764177428411005,
      "grad_norm": 0.3774729073047638,
      "learning_rate": 6.003084306141579e-06,
      "loss": 0.4001,
      "step": 1753
    },
    {
      "epoch": 1.4772599663110613,
      "grad_norm": 0.33632904291152954,
      "learning_rate": 5.998280885610677e-06,
      "loss": 0.3967,
      "step": 1754
    },
    {
      "epoch": 1.4781021897810218,
      "grad_norm": 0.37056559324264526,
      "learning_rate": 5.993476505309154e-06,
      "loss": 0.479,
      "step": 1755
    },
    {
      "epoch": 1.4789444132509826,
      "grad_norm": 0.3666709065437317,
      "learning_rate": 5.988671169856056e-06,
      "loss": 0.3864,
      "step": 1756
    },
    {
      "epoch": 1.4797866367209433,
      "grad_norm": 0.35843202471733093,
      "learning_rate": 5.983864883871344e-06,
      "loss": 0.4113,
      "step": 1757
    },
    {
      "epoch": 1.4806288601909041,
      "grad_norm": 0.36991503834724426,
      "learning_rate": 5.979057651975893e-06,
      "loss": 0.4413,
      "step": 1758
    },
    {
      "epoch": 1.4814710836608647,
      "grad_norm": 0.36879754066467285,
      "learning_rate": 5.974249478791489e-06,
      "loss": 0.4196,
      "step": 1759
    },
    {
      "epoch": 1.4823133071308254,
      "grad_norm": 0.33545348048210144,
      "learning_rate": 5.969440368940823e-06,
      "loss": 0.4095,
      "step": 1760
    },
    {
      "epoch": 1.483155530600786,
      "grad_norm": 0.3630067706108093,
      "learning_rate": 5.964630327047485e-06,
      "loss": 0.4306,
      "step": 1761
    },
    {
      "epoch": 1.4839977540707467,
      "grad_norm": 0.35270413756370544,
      "learning_rate": 5.9598193577359606e-06,
      "loss": 0.4163,
      "step": 1762
    },
    {
      "epoch": 1.4848399775407075,
      "grad_norm": 0.3227611482143402,
      "learning_rate": 5.955007465631632e-06,
      "loss": 0.4084,
      "step": 1763
    },
    {
      "epoch": 1.4856822010106683,
      "grad_norm": 0.3537072539329529,
      "learning_rate": 5.9501946553607615e-06,
      "loss": 0.4356,
      "step": 1764
    },
    {
      "epoch": 1.4865244244806288,
      "grad_norm": 0.37992537021636963,
      "learning_rate": 5.945380931550497e-06,
      "loss": 0.4663,
      "step": 1765
    },
    {
      "epoch": 1.4873666479505896,
      "grad_norm": 0.34261927008628845,
      "learning_rate": 5.940566298828871e-06,
      "loss": 0.4182,
      "step": 1766
    },
    {
      "epoch": 1.4882088714205501,
      "grad_norm": 0.3496706783771515,
      "learning_rate": 5.935750761824777e-06,
      "loss": 0.4323,
      "step": 1767
    },
    {
      "epoch": 1.489051094890511,
      "grad_norm": 0.41907596588134766,
      "learning_rate": 5.93093432516799e-06,
      "loss": 0.4091,
      "step": 1768
    },
    {
      "epoch": 1.4898933183604717,
      "grad_norm": 0.3818108141422272,
      "learning_rate": 5.926116993489143e-06,
      "loss": 0.4102,
      "step": 1769
    },
    {
      "epoch": 1.4907355418304324,
      "grad_norm": 0.3289741575717926,
      "learning_rate": 5.921298771419731e-06,
      "loss": 0.4136,
      "step": 1770
    },
    {
      "epoch": 1.491577765300393,
      "grad_norm": 0.3686586022377014,
      "learning_rate": 5.916479663592107e-06,
      "loss": 0.3911,
      "step": 1771
    },
    {
      "epoch": 1.4924199887703538,
      "grad_norm": 0.3653160035610199,
      "learning_rate": 5.911659674639473e-06,
      "loss": 0.4257,
      "step": 1772
    },
    {
      "epoch": 1.4932622122403143,
      "grad_norm": 0.37425723671913147,
      "learning_rate": 5.906838809195879e-06,
      "loss": 0.45,
      "step": 1773
    },
    {
      "epoch": 1.494104435710275,
      "grad_norm": 0.36310699582099915,
      "learning_rate": 5.90201707189622e-06,
      "loss": 0.4143,
      "step": 1774
    },
    {
      "epoch": 1.4949466591802358,
      "grad_norm": 0.34949183464050293,
      "learning_rate": 5.897194467376226e-06,
      "loss": 0.4091,
      "step": 1775
    },
    {
      "epoch": 1.4957888826501966,
      "grad_norm": 0.37286433577537537,
      "learning_rate": 5.8923710002724595e-06,
      "loss": 0.4406,
      "step": 1776
    },
    {
      "epoch": 1.4966311061201572,
      "grad_norm": 0.32669660449028015,
      "learning_rate": 5.887546675222319e-06,
      "loss": 0.418,
      "step": 1777
    },
    {
      "epoch": 1.497473329590118,
      "grad_norm": 0.3827909231185913,
      "learning_rate": 5.8827214968640215e-06,
      "loss": 0.4602,
      "step": 1778
    },
    {
      "epoch": 1.4983155530600787,
      "grad_norm": 0.37040790915489197,
      "learning_rate": 5.877895469836604e-06,
      "loss": 0.4192,
      "step": 1779
    },
    {
      "epoch": 1.4991577765300392,
      "grad_norm": 0.3382098972797394,
      "learning_rate": 5.873068598779926e-06,
      "loss": 0.419,
      "step": 1780
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.32661136984825134,
      "learning_rate": 5.8682408883346535e-06,
      "loss": 0.4077,
      "step": 1781
    },
    {
      "epoch": 1.5008422234699608,
      "grad_norm": 0.4062345623970032,
      "learning_rate": 5.863412343142258e-06,
      "loss": 0.4195,
      "step": 1782
    },
    {
      "epoch": 1.5016844469399215,
      "grad_norm": 0.37449854612350464,
      "learning_rate": 5.858582967845018e-06,
      "loss": 0.4345,
      "step": 1783
    },
    {
      "epoch": 1.502526670409882,
      "grad_norm": 0.3559536337852478,
      "learning_rate": 5.853752767086007e-06,
      "loss": 0.4362,
      "step": 1784
    },
    {
      "epoch": 1.5033688938798426,
      "grad_norm": 0.3291703462600708,
      "learning_rate": 5.848921745509094e-06,
      "loss": 0.4215,
      "step": 1785
    },
    {
      "epoch": 1.5042111173498034,
      "grad_norm": 0.36910104751586914,
      "learning_rate": 5.844089907758935e-06,
      "loss": 0.4245,
      "step": 1786
    },
    {
      "epoch": 1.5050533408197642,
      "grad_norm": 0.40321680903434753,
      "learning_rate": 5.839257258480974e-06,
      "loss": 0.4564,
      "step": 1787
    },
    {
      "epoch": 1.505895564289725,
      "grad_norm": 0.362263560295105,
      "learning_rate": 5.8344238023214305e-06,
      "loss": 0.3892,
      "step": 1788
    },
    {
      "epoch": 1.5067377877596857,
      "grad_norm": 0.35872048139572144,
      "learning_rate": 5.829589543927305e-06,
      "loss": 0.4665,
      "step": 1789
    },
    {
      "epoch": 1.5075800112296462,
      "grad_norm": 0.37036749720573425,
      "learning_rate": 5.824754487946366e-06,
      "loss": 0.3963,
      "step": 1790
    },
    {
      "epoch": 1.508422234699607,
      "grad_norm": 0.3882778584957123,
      "learning_rate": 5.819918639027149e-06,
      "loss": 0.4588,
      "step": 1791
    },
    {
      "epoch": 1.5092644581695676,
      "grad_norm": 0.30543699860572815,
      "learning_rate": 5.815082001818951e-06,
      "loss": 0.378,
      "step": 1792
    },
    {
      "epoch": 1.5101066816395283,
      "grad_norm": 0.35238638520240784,
      "learning_rate": 5.8102445809718325e-06,
      "loss": 0.449,
      "step": 1793
    },
    {
      "epoch": 1.510948905109489,
      "grad_norm": 0.37585923075675964,
      "learning_rate": 5.805406381136598e-06,
      "loss": 0.4295,
      "step": 1794
    },
    {
      "epoch": 1.5117911285794499,
      "grad_norm": 0.3508363366127014,
      "learning_rate": 5.80056740696481e-06,
      "loss": 0.4211,
      "step": 1795
    },
    {
      "epoch": 1.5126333520494104,
      "grad_norm": 0.30608731508255005,
      "learning_rate": 5.79572766310877e-06,
      "loss": 0.3471,
      "step": 1796
    },
    {
      "epoch": 1.5134755755193712,
      "grad_norm": 0.35021746158599854,
      "learning_rate": 5.790887154221521e-06,
      "loss": 0.4499,
      "step": 1797
    },
    {
      "epoch": 1.5143177989893317,
      "grad_norm": 0.3389970064163208,
      "learning_rate": 5.7860458849568425e-06,
      "loss": 0.4447,
      "step": 1798
    },
    {
      "epoch": 1.5151600224592925,
      "grad_norm": 0.33112776279449463,
      "learning_rate": 5.781203859969242e-06,
      "loss": 0.4137,
      "step": 1799
    },
    {
      "epoch": 1.5160022459292533,
      "grad_norm": 0.34750548005104065,
      "learning_rate": 5.776361083913959e-06,
      "loss": 0.4111,
      "step": 1800
    },
    {
      "epoch": 1.516844469399214,
      "grad_norm": 0.3387426435947418,
      "learning_rate": 5.771517561446949e-06,
      "loss": 0.3766,
      "step": 1801
    },
    {
      "epoch": 1.5176866928691746,
      "grad_norm": 0.32886838912963867,
      "learning_rate": 5.766673297224889e-06,
      "loss": 0.4043,
      "step": 1802
    },
    {
      "epoch": 1.5185289163391353,
      "grad_norm": 0.35596925020217896,
      "learning_rate": 5.7618282959051685e-06,
      "loss": 0.4596,
      "step": 1803
    },
    {
      "epoch": 1.5193711398090959,
      "grad_norm": 0.33357831835746765,
      "learning_rate": 5.756982562145884e-06,
      "loss": 0.4286,
      "step": 1804
    },
    {
      "epoch": 1.5202133632790567,
      "grad_norm": 0.31267881393432617,
      "learning_rate": 5.75213610060584e-06,
      "loss": 0.4224,
      "step": 1805
    },
    {
      "epoch": 1.5210555867490174,
      "grad_norm": 0.3210352063179016,
      "learning_rate": 5.747288915944533e-06,
      "loss": 0.3842,
      "step": 1806
    },
    {
      "epoch": 1.5218978102189782,
      "grad_norm": 0.3441021740436554,
      "learning_rate": 5.742441012822166e-06,
      "loss": 0.4452,
      "step": 1807
    },
    {
      "epoch": 1.522740033688939,
      "grad_norm": 0.3384980261325836,
      "learning_rate": 5.737592395899623e-06,
      "loss": 0.4376,
      "step": 1808
    },
    {
      "epoch": 1.5235822571588995,
      "grad_norm": 0.3519379198551178,
      "learning_rate": 5.7327430698384775e-06,
      "loss": 0.4076,
      "step": 1809
    },
    {
      "epoch": 1.52442448062886,
      "grad_norm": 0.3666609525680542,
      "learning_rate": 5.727893039300987e-06,
      "loss": 0.4285,
      "step": 1810
    },
    {
      "epoch": 1.5252667040988208,
      "grad_norm": 0.34244969487190247,
      "learning_rate": 5.7230423089500845e-06,
      "loss": 0.423,
      "step": 1811
    },
    {
      "epoch": 1.5261089275687816,
      "grad_norm": 0.35458362102508545,
      "learning_rate": 5.718190883449373e-06,
      "loss": 0.4455,
      "step": 1812
    },
    {
      "epoch": 1.5269511510387423,
      "grad_norm": 0.3420352339744568,
      "learning_rate": 5.713338767463129e-06,
      "loss": 0.3855,
      "step": 1813
    },
    {
      "epoch": 1.5277933745087031,
      "grad_norm": 0.36626195907592773,
      "learning_rate": 5.708485965656291e-06,
      "loss": 0.4388,
      "step": 1814
    },
    {
      "epoch": 1.5286355979786637,
      "grad_norm": 0.32654279470443726,
      "learning_rate": 5.703632482694453e-06,
      "loss": 0.4093,
      "step": 1815
    },
    {
      "epoch": 1.5294778214486242,
      "grad_norm": 0.39345836639404297,
      "learning_rate": 5.698778323243871e-06,
      "loss": 0.4597,
      "step": 1816
    },
    {
      "epoch": 1.530320044918585,
      "grad_norm": 0.344894140958786,
      "learning_rate": 5.693923491971445e-06,
      "loss": 0.4206,
      "step": 1817
    },
    {
      "epoch": 1.5311622683885457,
      "grad_norm": 0.3713409900665283,
      "learning_rate": 5.689067993544726e-06,
      "loss": 0.4265,
      "step": 1818
    },
    {
      "epoch": 1.5320044918585065,
      "grad_norm": 0.3840521275997162,
      "learning_rate": 5.6842118326318996e-06,
      "loss": 0.45,
      "step": 1819
    },
    {
      "epoch": 1.5328467153284673,
      "grad_norm": 0.41773805022239685,
      "learning_rate": 5.679355013901797e-06,
      "loss": 0.4527,
      "step": 1820
    },
    {
      "epoch": 1.5336889387984278,
      "grad_norm": 0.365517258644104,
      "learning_rate": 5.674497542023875e-06,
      "loss": 0.4075,
      "step": 1821
    },
    {
      "epoch": 1.5345311622683886,
      "grad_norm": 0.4107154905796051,
      "learning_rate": 5.669639421668221e-06,
      "loss": 0.441,
      "step": 1822
    },
    {
      "epoch": 1.5353733857383491,
      "grad_norm": 0.3592158257961273,
      "learning_rate": 5.664780657505547e-06,
      "loss": 0.4088,
      "step": 1823
    },
    {
      "epoch": 1.53621560920831,
      "grad_norm": 0.3922053873538971,
      "learning_rate": 5.659921254207183e-06,
      "loss": 0.4422,
      "step": 1824
    },
    {
      "epoch": 1.5370578326782707,
      "grad_norm": 0.3290477991104126,
      "learning_rate": 5.65506121644507e-06,
      "loss": 0.3746,
      "step": 1825
    },
    {
      "epoch": 1.5379000561482314,
      "grad_norm": 0.338505357503891,
      "learning_rate": 5.650200548891764e-06,
      "loss": 0.4319,
      "step": 1826
    },
    {
      "epoch": 1.538742279618192,
      "grad_norm": 0.33921101689338684,
      "learning_rate": 5.645339256220427e-06,
      "loss": 0.4096,
      "step": 1827
    },
    {
      "epoch": 1.5395845030881528,
      "grad_norm": 0.3518275022506714,
      "learning_rate": 5.640477343104815e-06,
      "loss": 0.4388,
      "step": 1828
    },
    {
      "epoch": 1.5404267265581133,
      "grad_norm": 0.34286606311798096,
      "learning_rate": 5.635614814219289e-06,
      "loss": 0.4192,
      "step": 1829
    },
    {
      "epoch": 1.541268950028074,
      "grad_norm": 0.33660122752189636,
      "learning_rate": 5.630751674238796e-06,
      "loss": 0.3952,
      "step": 1830
    },
    {
      "epoch": 1.5421111734980348,
      "grad_norm": 0.3654322028160095,
      "learning_rate": 5.625887927838872e-06,
      "loss": 0.4453,
      "step": 1831
    },
    {
      "epoch": 1.5429533969679956,
      "grad_norm": 0.3303602635860443,
      "learning_rate": 5.6210235796956395e-06,
      "loss": 0.3892,
      "step": 1832
    },
    {
      "epoch": 1.5437956204379562,
      "grad_norm": 0.3896334171295166,
      "learning_rate": 5.616158634485793e-06,
      "loss": 0.4662,
      "step": 1833
    },
    {
      "epoch": 1.544637843907917,
      "grad_norm": 0.36496981978416443,
      "learning_rate": 5.61129309688661e-06,
      "loss": 0.4095,
      "step": 1834
    },
    {
      "epoch": 1.5454800673778775,
      "grad_norm": 0.3529205024242401,
      "learning_rate": 5.606426971575926e-06,
      "loss": 0.4546,
      "step": 1835
    },
    {
      "epoch": 1.5463222908478382,
      "grad_norm": 0.3384782671928406,
      "learning_rate": 5.601560263232153e-06,
      "loss": 0.414,
      "step": 1836
    },
    {
      "epoch": 1.547164514317799,
      "grad_norm": 0.3420189917087555,
      "learning_rate": 5.596692976534256e-06,
      "loss": 0.4333,
      "step": 1837
    },
    {
      "epoch": 1.5480067377877598,
      "grad_norm": 0.35726824402809143,
      "learning_rate": 5.591825116161758e-06,
      "loss": 0.4488,
      "step": 1838
    },
    {
      "epoch": 1.5488489612577205,
      "grad_norm": 0.35384902358055115,
      "learning_rate": 5.5869566867947344e-06,
      "loss": 0.4267,
      "step": 1839
    },
    {
      "epoch": 1.549691184727681,
      "grad_norm": 0.3589079976081848,
      "learning_rate": 5.582087693113808e-06,
      "loss": 0.4295,
      "step": 1840
    },
    {
      "epoch": 1.5505334081976416,
      "grad_norm": 0.3449692130088806,
      "learning_rate": 5.577218139800143e-06,
      "loss": 0.4041,
      "step": 1841
    },
    {
      "epoch": 1.5513756316676024,
      "grad_norm": 0.3510948717594147,
      "learning_rate": 5.572348031535442e-06,
      "loss": 0.4085,
      "step": 1842
    },
    {
      "epoch": 1.5522178551375632,
      "grad_norm": 0.3463253080844879,
      "learning_rate": 5.567477373001942e-06,
      "loss": 0.4114,
      "step": 1843
    },
    {
      "epoch": 1.553060078607524,
      "grad_norm": 0.37737277150154114,
      "learning_rate": 5.562606168882404e-06,
      "loss": 0.4486,
      "step": 1844
    },
    {
      "epoch": 1.5539023020774847,
      "grad_norm": 0.3449036777019501,
      "learning_rate": 5.557734423860122e-06,
      "loss": 0.4237,
      "step": 1845
    },
    {
      "epoch": 1.5547445255474452,
      "grad_norm": 0.32605794072151184,
      "learning_rate": 5.552862142618906e-06,
      "loss": 0.3612,
      "step": 1846
    },
    {
      "epoch": 1.5555867490174058,
      "grad_norm": 0.39051705598831177,
      "learning_rate": 5.547989329843079e-06,
      "loss": 0.4368,
      "step": 1847
    },
    {
      "epoch": 1.5564289724873666,
      "grad_norm": 0.3814125657081604,
      "learning_rate": 5.543115990217478e-06,
      "loss": 0.4501,
      "step": 1848
    },
    {
      "epoch": 1.5572711959573273,
      "grad_norm": 0.36002981662750244,
      "learning_rate": 5.538242128427444e-06,
      "loss": 0.425,
      "step": 1849
    },
    {
      "epoch": 1.558113419427288,
      "grad_norm": 0.39874234795570374,
      "learning_rate": 5.533367749158829e-06,
      "loss": 0.4476,
      "step": 1850
    },
    {
      "epoch": 1.5589556428972489,
      "grad_norm": 0.41082867980003357,
      "learning_rate": 5.528492857097966e-06,
      "loss": 0.4206,
      "step": 1851
    },
    {
      "epoch": 1.5597978663672094,
      "grad_norm": 0.3912190794944763,
      "learning_rate": 5.523617456931696e-06,
      "loss": 0.3827,
      "step": 1852
    },
    {
      "epoch": 1.5606400898371702,
      "grad_norm": 0.37453386187553406,
      "learning_rate": 5.518741553347341e-06,
      "loss": 0.4599,
      "step": 1853
    },
    {
      "epoch": 1.5614823133071307,
      "grad_norm": 0.3898167014122009,
      "learning_rate": 5.513865151032709e-06,
      "loss": 0.396,
      "step": 1854
    },
    {
      "epoch": 1.5623245367770915,
      "grad_norm": 0.34454745054244995,
      "learning_rate": 5.508988254676087e-06,
      "loss": 0.4113,
      "step": 1855
    },
    {
      "epoch": 1.5631667602470523,
      "grad_norm": 0.38040435314178467,
      "learning_rate": 5.504110868966239e-06,
      "loss": 0.398,
      "step": 1856
    },
    {
      "epoch": 1.564008983717013,
      "grad_norm": 0.33942267298698425,
      "learning_rate": 5.499232998592399e-06,
      "loss": 0.4031,
      "step": 1857
    },
    {
      "epoch": 1.5648512071869736,
      "grad_norm": 0.3592524230480194,
      "learning_rate": 5.49435464824426e-06,
      "loss": 0.4283,
      "step": 1858
    },
    {
      "epoch": 1.5656934306569343,
      "grad_norm": 0.35318848490715027,
      "learning_rate": 5.489475822611988e-06,
      "loss": 0.3964,
      "step": 1859
    },
    {
      "epoch": 1.5665356541268949,
      "grad_norm": 0.34393244981765747,
      "learning_rate": 5.484596526386198e-06,
      "loss": 0.4419,
      "step": 1860
    },
    {
      "epoch": 1.5673778775968557,
      "grad_norm": 0.34705081582069397,
      "learning_rate": 5.479716764257961e-06,
      "loss": 0.4264,
      "step": 1861
    },
    {
      "epoch": 1.5682201010668164,
      "grad_norm": 0.38587233424186707,
      "learning_rate": 5.474836540918791e-06,
      "loss": 0.4168,
      "step": 1862
    },
    {
      "epoch": 1.5690623245367772,
      "grad_norm": 0.36382216215133667,
      "learning_rate": 5.469955861060653e-06,
      "loss": 0.4171,
      "step": 1863
    },
    {
      "epoch": 1.5699045480067377,
      "grad_norm": 0.3396497666835785,
      "learning_rate": 5.465074729375944e-06,
      "loss": 0.4297,
      "step": 1864
    },
    {
      "epoch": 1.5707467714766985,
      "grad_norm": 0.3795713484287262,
      "learning_rate": 5.4601931505575e-06,
      "loss": 0.4355,
      "step": 1865
    },
    {
      "epoch": 1.571588994946659,
      "grad_norm": 0.33509719371795654,
      "learning_rate": 5.455311129298586e-06,
      "loss": 0.3789,
      "step": 1866
    },
    {
      "epoch": 1.5724312184166198,
      "grad_norm": 0.319144070148468,
      "learning_rate": 5.450428670292889e-06,
      "loss": 0.4237,
      "step": 1867
    },
    {
      "epoch": 1.5732734418865806,
      "grad_norm": 0.37985891103744507,
      "learning_rate": 5.445545778234522e-06,
      "loss": 0.4591,
      "step": 1868
    },
    {
      "epoch": 1.5741156653565413,
      "grad_norm": 0.34583935141563416,
      "learning_rate": 5.44066245781801e-06,
      "loss": 0.4426,
      "step": 1869
    },
    {
      "epoch": 1.5749578888265021,
      "grad_norm": 0.32699471712112427,
      "learning_rate": 5.435778713738292e-06,
      "loss": 0.3861,
      "step": 1870
    },
    {
      "epoch": 1.5758001122964627,
      "grad_norm": 0.3062248229980469,
      "learning_rate": 5.430894550690714e-06,
      "loss": 0.4212,
      "step": 1871
    },
    {
      "epoch": 1.5766423357664232,
      "grad_norm": 0.32369568943977356,
      "learning_rate": 5.426009973371026e-06,
      "loss": 0.4156,
      "step": 1872
    },
    {
      "epoch": 1.577484559236384,
      "grad_norm": 0.3235737979412079,
      "learning_rate": 5.421124986475371e-06,
      "loss": 0.4213,
      "step": 1873
    },
    {
      "epoch": 1.5783267827063447,
      "grad_norm": 0.3202875554561615,
      "learning_rate": 5.416239594700294e-06,
      "loss": 0.439,
      "step": 1874
    },
    {
      "epoch": 1.5791690061763055,
      "grad_norm": 0.31741461157798767,
      "learning_rate": 5.4113538027427245e-06,
      "loss": 0.4535,
      "step": 1875
    },
    {
      "epoch": 1.5800112296462663,
      "grad_norm": 0.3218853175640106,
      "learning_rate": 5.4064676152999765e-06,
      "loss": 0.3796,
      "step": 1876
    },
    {
      "epoch": 1.5808534531162268,
      "grad_norm": 0.3223547339439392,
      "learning_rate": 5.4015810370697445e-06,
      "loss": 0.3971,
      "step": 1877
    },
    {
      "epoch": 1.5816956765861874,
      "grad_norm": 0.3761754631996155,
      "learning_rate": 5.396694072750099e-06,
      "loss": 0.4385,
      "step": 1878
    },
    {
      "epoch": 1.5825379000561481,
      "grad_norm": 0.3272077739238739,
      "learning_rate": 5.391806727039484e-06,
      "loss": 0.4484,
      "step": 1879
    },
    {
      "epoch": 1.583380123526109,
      "grad_norm": 0.3572317659854889,
      "learning_rate": 5.386919004636706e-06,
      "loss": 0.4045,
      "step": 1880
    },
    {
      "epoch": 1.5842223469960697,
      "grad_norm": 0.3417137563228607,
      "learning_rate": 5.382030910240936e-06,
      "loss": 0.44,
      "step": 1881
    },
    {
      "epoch": 1.5850645704660304,
      "grad_norm": 0.3452729284763336,
      "learning_rate": 5.3771424485517034e-06,
      "loss": 0.384,
      "step": 1882
    },
    {
      "epoch": 1.585906793935991,
      "grad_norm": 0.3340751826763153,
      "learning_rate": 5.3722536242688895e-06,
      "loss": 0.4013,
      "step": 1883
    },
    {
      "epoch": 1.5867490174059518,
      "grad_norm": 0.3218977451324463,
      "learning_rate": 5.367364442092724e-06,
      "loss": 0.4016,
      "step": 1884
    },
    {
      "epoch": 1.5875912408759123,
      "grad_norm": 0.3342333436012268,
      "learning_rate": 5.362474906723781e-06,
      "loss": 0.4191,
      "step": 1885
    },
    {
      "epoch": 1.588433464345873,
      "grad_norm": 0.35919150710105896,
      "learning_rate": 5.357585022862977e-06,
      "loss": 0.4421,
      "step": 1886
    },
    {
      "epoch": 1.5892756878158338,
      "grad_norm": 0.3553474247455597,
      "learning_rate": 5.352694795211555e-06,
      "loss": 0.4395,
      "step": 1887
    },
    {
      "epoch": 1.5901179112857946,
      "grad_norm": 0.3446367681026459,
      "learning_rate": 5.347804228471101e-06,
      "loss": 0.4134,
      "step": 1888
    },
    {
      "epoch": 1.5909601347557552,
      "grad_norm": 0.3193166255950928,
      "learning_rate": 5.342913327343515e-06,
      "loss": 0.4012,
      "step": 1889
    },
    {
      "epoch": 1.591802358225716,
      "grad_norm": 0.37085726857185364,
      "learning_rate": 5.338022096531028e-06,
      "loss": 0.4339,
      "step": 1890
    },
    {
      "epoch": 1.5926445816956765,
      "grad_norm": 0.3162636160850525,
      "learning_rate": 5.33313054073618e-06,
      "loss": 0.385,
      "step": 1891
    },
    {
      "epoch": 1.5934868051656372,
      "grad_norm": 0.3616180419921875,
      "learning_rate": 5.32823866466183e-06,
      "loss": 0.4827,
      "step": 1892
    },
    {
      "epoch": 1.594329028635598,
      "grad_norm": 0.3515593707561493,
      "learning_rate": 5.3233464730111426e-06,
      "loss": 0.4156,
      "step": 1893
    },
    {
      "epoch": 1.5951712521055588,
      "grad_norm": 0.34745118021965027,
      "learning_rate": 5.318453970487582e-06,
      "loss": 0.3886,
      "step": 1894
    },
    {
      "epoch": 1.5960134755755195,
      "grad_norm": 0.35733386874198914,
      "learning_rate": 5.31356116179492e-06,
      "loss": 0.4231,
      "step": 1895
    },
    {
      "epoch": 1.59685569904548,
      "grad_norm": 0.3735392391681671,
      "learning_rate": 5.308668051637213e-06,
      "loss": 0.4028,
      "step": 1896
    },
    {
      "epoch": 1.5976979225154406,
      "grad_norm": 0.35290831327438354,
      "learning_rate": 5.303774644718813e-06,
      "loss": 0.4176,
      "step": 1897
    },
    {
      "epoch": 1.5985401459854014,
      "grad_norm": 0.325775682926178,
      "learning_rate": 5.298880945744356e-06,
      "loss": 0.3825,
      "step": 1898
    },
    {
      "epoch": 1.5993823694553622,
      "grad_norm": 0.388995498418808,
      "learning_rate": 5.29398695941876e-06,
      "loss": 0.4394,
      "step": 1899
    },
    {
      "epoch": 1.600224592925323,
      "grad_norm": 0.3513931930065155,
      "learning_rate": 5.289092690447215e-06,
      "loss": 0.4221,
      "step": 1900
    },
    {
      "epoch": 1.6010668163952837,
      "grad_norm": 0.3548142611980438,
      "learning_rate": 5.284198143535188e-06,
      "loss": 0.4184,
      "step": 1901
    },
    {
      "epoch": 1.6019090398652442,
      "grad_norm": 0.3796072006225586,
      "learning_rate": 5.279303323388413e-06,
      "loss": 0.3986,
      "step": 1902
    },
    {
      "epoch": 1.6027512633352048,
      "grad_norm": 0.38613030314445496,
      "learning_rate": 5.274408234712881e-06,
      "loss": 0.4213,
      "step": 1903
    },
    {
      "epoch": 1.6035934868051656,
      "grad_norm": 0.3198748826980591,
      "learning_rate": 5.2695128822148466e-06,
      "loss": 0.3925,
      "step": 1904
    },
    {
      "epoch": 1.6044357102751263,
      "grad_norm": 0.3678312301635742,
      "learning_rate": 5.2646172706008154e-06,
      "loss": 0.4713,
      "step": 1905
    },
    {
      "epoch": 1.605277933745087,
      "grad_norm": 0.371604859828949,
      "learning_rate": 5.259721404577546e-06,
      "loss": 0.4432,
      "step": 1906
    },
    {
      "epoch": 1.6061201572150479,
      "grad_norm": 0.41272178292274475,
      "learning_rate": 5.254825288852033e-06,
      "loss": 0.4116,
      "step": 1907
    },
    {
      "epoch": 1.6069623806850084,
      "grad_norm": 0.34213462471961975,
      "learning_rate": 5.249928928131523e-06,
      "loss": 0.4136,
      "step": 1908
    },
    {
      "epoch": 1.607804604154969,
      "grad_norm": 0.401568204164505,
      "learning_rate": 5.245032327123488e-06,
      "loss": 0.465,
      "step": 1909
    },
    {
      "epoch": 1.6086468276249297,
      "grad_norm": 0.377538800239563,
      "learning_rate": 5.240135490535635e-06,
      "loss": 0.4114,
      "step": 1910
    },
    {
      "epoch": 1.6094890510948905,
      "grad_norm": 0.3539682626724243,
      "learning_rate": 5.235238423075899e-06,
      "loss": 0.4079,
      "step": 1911
    },
    {
      "epoch": 1.6103312745648513,
      "grad_norm": 0.32989129424095154,
      "learning_rate": 5.230341129452434e-06,
      "loss": 0.4299,
      "step": 1912
    },
    {
      "epoch": 1.611173498034812,
      "grad_norm": 0.37851524353027344,
      "learning_rate": 5.225443614373614e-06,
      "loss": 0.4403,
      "step": 1913
    },
    {
      "epoch": 1.6120157215047726,
      "grad_norm": 0.4279498755931854,
      "learning_rate": 5.220545882548024e-06,
      "loss": 0.446,
      "step": 1914
    },
    {
      "epoch": 1.6128579449747333,
      "grad_norm": 0.33705705404281616,
      "learning_rate": 5.215647938684458e-06,
      "loss": 0.4312,
      "step": 1915
    },
    {
      "epoch": 1.6137001684446939,
      "grad_norm": 0.40331628918647766,
      "learning_rate": 5.210749787491913e-06,
      "loss": 0.4058,
      "step": 1916
    },
    {
      "epoch": 1.6145423919146547,
      "grad_norm": 0.38453754782676697,
      "learning_rate": 5.20585143367959e-06,
      "loss": 0.4075,
      "step": 1917
    },
    {
      "epoch": 1.6153846153846154,
      "grad_norm": 0.3300616443157196,
      "learning_rate": 5.200952881956875e-06,
      "loss": 0.4041,
      "step": 1918
    },
    {
      "epoch": 1.6162268388545762,
      "grad_norm": 0.35155466198921204,
      "learning_rate": 5.196054137033354e-06,
      "loss": 0.395,
      "step": 1919
    },
    {
      "epoch": 1.6170690623245367,
      "grad_norm": 0.38718655705451965,
      "learning_rate": 5.191155203618796e-06,
      "loss": 0.4298,
      "step": 1920
    },
    {
      "epoch": 1.6179112857944975,
      "grad_norm": 0.32645291090011597,
      "learning_rate": 5.186256086423148e-06,
      "loss": 0.4107,
      "step": 1921
    },
    {
      "epoch": 1.618753509264458,
      "grad_norm": 0.3717871308326721,
      "learning_rate": 5.181356790156539e-06,
      "loss": 0.4557,
      "step": 1922
    },
    {
      "epoch": 1.6195957327344188,
      "grad_norm": 0.3418821394443512,
      "learning_rate": 5.176457319529264e-06,
      "loss": 0.4079,
      "step": 1923
    },
    {
      "epoch": 1.6204379562043796,
      "grad_norm": 0.35428687930107117,
      "learning_rate": 5.171557679251788e-06,
      "loss": 0.4317,
      "step": 1924
    },
    {
      "epoch": 1.6212801796743403,
      "grad_norm": 0.3820854723453522,
      "learning_rate": 5.166657874034745e-06,
      "loss": 0.4351,
      "step": 1925
    },
    {
      "epoch": 1.6221224031443011,
      "grad_norm": 0.3292276859283447,
      "learning_rate": 5.161757908588917e-06,
      "loss": 0.4156,
      "step": 1926
    },
    {
      "epoch": 1.6229646266142617,
      "grad_norm": 0.3266378343105316,
      "learning_rate": 5.156857787625249e-06,
      "loss": 0.388,
      "step": 1927
    },
    {
      "epoch": 1.6238068500842222,
      "grad_norm": 0.3950997292995453,
      "learning_rate": 5.15195751585483e-06,
      "loss": 0.4573,
      "step": 1928
    },
    {
      "epoch": 1.624649073554183,
      "grad_norm": 0.3299473226070404,
      "learning_rate": 5.147057097988898e-06,
      "loss": 0.362,
      "step": 1929
    },
    {
      "epoch": 1.6254912970241437,
      "grad_norm": 0.3683773875236511,
      "learning_rate": 5.142156538738827e-06,
      "loss": 0.4547,
      "step": 1930
    },
    {
      "epoch": 1.6263335204941045,
      "grad_norm": 0.3501223623752594,
      "learning_rate": 5.137255842816132e-06,
      "loss": 0.3841,
      "step": 1931
    },
    {
      "epoch": 1.6271757439640653,
      "grad_norm": 0.3782899081707001,
      "learning_rate": 5.132355014932455e-06,
      "loss": 0.4174,
      "step": 1932
    },
    {
      "epoch": 1.6280179674340258,
      "grad_norm": 0.3692985475063324,
      "learning_rate": 5.127454059799567e-06,
      "loss": 0.4467,
      "step": 1933
    },
    {
      "epoch": 1.6288601909039864,
      "grad_norm": 0.3853273391723633,
      "learning_rate": 5.122552982129362e-06,
      "loss": 0.4286,
      "step": 1934
    },
    {
      "epoch": 1.6297024143739471,
      "grad_norm": 0.3312138319015503,
      "learning_rate": 5.1176517866338495e-06,
      "loss": 0.3862,
      "step": 1935
    },
    {
      "epoch": 1.630544637843908,
      "grad_norm": 0.35506272315979004,
      "learning_rate": 5.112750478025156e-06,
      "loss": 0.4061,
      "step": 1936
    },
    {
      "epoch": 1.6313868613138687,
      "grad_norm": 0.33168190717697144,
      "learning_rate": 5.1078490610155105e-06,
      "loss": 0.3652,
      "step": 1937
    },
    {
      "epoch": 1.6322290847838294,
      "grad_norm": 0.3665081262588501,
      "learning_rate": 5.102947540317254e-06,
      "loss": 0.46,
      "step": 1938
    },
    {
      "epoch": 1.63307130825379,
      "grad_norm": 0.33602267503738403,
      "learning_rate": 5.09804592064282e-06,
      "loss": 0.3974,
      "step": 1939
    },
    {
      "epoch": 1.6339135317237508,
      "grad_norm": 0.38545602560043335,
      "learning_rate": 5.093144206704743e-06,
      "loss": 0.432,
      "step": 1940
    },
    {
      "epoch": 1.6347557551937113,
      "grad_norm": 0.3584495782852173,
      "learning_rate": 5.088242403215644e-06,
      "loss": 0.4134,
      "step": 1941
    },
    {
      "epoch": 1.635597978663672,
      "grad_norm": 0.34780940413475037,
      "learning_rate": 5.083340514888232e-06,
      "loss": 0.4093,
      "step": 1942
    },
    {
      "epoch": 1.6364402021336328,
      "grad_norm": 0.4163903594017029,
      "learning_rate": 5.078438546435298e-06,
      "loss": 0.4732,
      "step": 1943
    },
    {
      "epoch": 1.6372824256035936,
      "grad_norm": 0.3261132538318634,
      "learning_rate": 5.073536502569708e-06,
      "loss": 0.3668,
      "step": 1944
    },
    {
      "epoch": 1.6381246490735542,
      "grad_norm": 0.3966606557369232,
      "learning_rate": 5.0686343880044044e-06,
      "loss": 0.4649,
      "step": 1945
    },
    {
      "epoch": 1.638966872543515,
      "grad_norm": 0.3614017963409424,
      "learning_rate": 5.063732207452391e-06,
      "loss": 0.4235,
      "step": 1946
    },
    {
      "epoch": 1.6398090960134755,
      "grad_norm": 0.3801117539405823,
      "learning_rate": 5.058829965626742e-06,
      "loss": 0.3902,
      "step": 1947
    },
    {
      "epoch": 1.6406513194834362,
      "grad_norm": 0.3918428421020508,
      "learning_rate": 5.053927667240585e-06,
      "loss": 0.3812,
      "step": 1948
    },
    {
      "epoch": 1.641493542953397,
      "grad_norm": 0.3896355628967285,
      "learning_rate": 5.049025317007108e-06,
      "loss": 0.4262,
      "step": 1949
    },
    {
      "epoch": 1.6423357664233578,
      "grad_norm": 0.35838058590888977,
      "learning_rate": 5.0441229196395416e-06,
      "loss": 0.4378,
      "step": 1950
    },
    {
      "epoch": 1.6431779898933183,
      "grad_norm": 0.3511332869529724,
      "learning_rate": 5.039220479851167e-06,
      "loss": 0.4002,
      "step": 1951
    },
    {
      "epoch": 1.644020213363279,
      "grad_norm": 0.399654746055603,
      "learning_rate": 5.034318002355305e-06,
      "loss": 0.4274,
      "step": 1952
    },
    {
      "epoch": 1.6448624368332396,
      "grad_norm": 0.3877694010734558,
      "learning_rate": 5.029415491865311e-06,
      "loss": 0.4296,
      "step": 1953
    },
    {
      "epoch": 1.6457046603032004,
      "grad_norm": 0.3473438620567322,
      "learning_rate": 5.024512953094577e-06,
      "loss": 0.3951,
      "step": 1954
    },
    {
      "epoch": 1.6465468837731612,
      "grad_norm": 0.3119718134403229,
      "learning_rate": 5.019610390756513e-06,
      "loss": 0.3586,
      "step": 1955
    },
    {
      "epoch": 1.647389107243122,
      "grad_norm": 0.3869885504245758,
      "learning_rate": 5.014707809564562e-06,
      "loss": 0.4536,
      "step": 1956
    },
    {
      "epoch": 1.6482313307130827,
      "grad_norm": 0.34311357140541077,
      "learning_rate": 5.009805214232177e-06,
      "loss": 0.3827,
      "step": 1957
    },
    {
      "epoch": 1.6490735541830432,
      "grad_norm": 0.3768558204174042,
      "learning_rate": 5.004902609472831e-06,
      "loss": 0.4295,
      "step": 1958
    },
    {
      "epoch": 1.6499157776530038,
      "grad_norm": 0.33412107825279236,
      "learning_rate": 5e-06,
      "loss": 0.4384,
      "step": 1959
    },
    {
      "epoch": 1.6507580011229646,
      "grad_norm": 0.39377856254577637,
      "learning_rate": 4.995097390527171e-06,
      "loss": 0.4399,
      "step": 1960
    },
    {
      "epoch": 1.6516002245929253,
      "grad_norm": 0.4128464162349701,
      "learning_rate": 4.990194785767824e-06,
      "loss": 0.4235,
      "step": 1961
    },
    {
      "epoch": 1.652442448062886,
      "grad_norm": 0.32967013120651245,
      "learning_rate": 4.98529219043544e-06,
      "loss": 0.3958,
      "step": 1962
    },
    {
      "epoch": 1.6532846715328469,
      "grad_norm": 0.38035550713539124,
      "learning_rate": 4.980389609243488e-06,
      "loss": 0.4544,
      "step": 1963
    },
    {
      "epoch": 1.6541268950028074,
      "grad_norm": 0.38238686323165894,
      "learning_rate": 4.975487046905426e-06,
      "loss": 0.3813,
      "step": 1964
    },
    {
      "epoch": 1.654969118472768,
      "grad_norm": 0.43175530433654785,
      "learning_rate": 4.97058450813469e-06,
      "loss": 0.4247,
      "step": 1965
    },
    {
      "epoch": 1.6558113419427287,
      "grad_norm": 0.3457217812538147,
      "learning_rate": 4.9656819976446975e-06,
      "loss": 0.434,
      "step": 1966
    },
    {
      "epoch": 1.6566535654126895,
      "grad_norm": 0.4377143681049347,
      "learning_rate": 4.960779520148835e-06,
      "loss": 0.4582,
      "step": 1967
    },
    {
      "epoch": 1.6574957888826503,
      "grad_norm": 0.3563629686832428,
      "learning_rate": 4.955877080360462e-06,
      "loss": 0.3729,
      "step": 1968
    },
    {
      "epoch": 1.658338012352611,
      "grad_norm": 0.37348672747612,
      "learning_rate": 4.950974682992894e-06,
      "loss": 0.4694,
      "step": 1969
    },
    {
      "epoch": 1.6591802358225716,
      "grad_norm": 0.3545929193496704,
      "learning_rate": 4.9460723327594175e-06,
      "loss": 0.4046,
      "step": 1970
    },
    {
      "epoch": 1.6600224592925323,
      "grad_norm": 0.346150666475296,
      "learning_rate": 4.94117003437326e-06,
      "loss": 0.4027,
      "step": 1971
    },
    {
      "epoch": 1.6608646827624929,
      "grad_norm": 0.35138726234436035,
      "learning_rate": 4.9362677925476124e-06,
      "loss": 0.4042,
      "step": 1972
    },
    {
      "epoch": 1.6617069062324537,
      "grad_norm": 0.356486439704895,
      "learning_rate": 4.931365611995598e-06,
      "loss": 0.438,
      "step": 1973
    },
    {
      "epoch": 1.6625491297024144,
      "grad_norm": 0.3417152166366577,
      "learning_rate": 4.926463497430293e-06,
      "loss": 0.3962,
      "step": 1974
    },
    {
      "epoch": 1.6633913531723752,
      "grad_norm": 0.3611716628074646,
      "learning_rate": 4.921561453564704e-06,
      "loss": 0.4166,
      "step": 1975
    },
    {
      "epoch": 1.6642335766423357,
      "grad_norm": 0.37337633967399597,
      "learning_rate": 4.9166594851117696e-06,
      "loss": 0.4082,
      "step": 1976
    },
    {
      "epoch": 1.6650758001122965,
      "grad_norm": 0.38721075654029846,
      "learning_rate": 4.911757596784358e-06,
      "loss": 0.4391,
      "step": 1977
    },
    {
      "epoch": 1.665918023582257,
      "grad_norm": 0.3553026020526886,
      "learning_rate": 4.906855793295259e-06,
      "loss": 0.4188,
      "step": 1978
    },
    {
      "epoch": 1.6667602470522178,
      "grad_norm": 0.37549564242362976,
      "learning_rate": 4.901954079357182e-06,
      "loss": 0.3445,
      "step": 1979
    },
    {
      "epoch": 1.6676024705221786,
      "grad_norm": 0.3521761894226074,
      "learning_rate": 4.897052459682749e-06,
      "loss": 0.4528,
      "step": 1980
    },
    {
      "epoch": 1.6684446939921393,
      "grad_norm": 0.3409058451652527,
      "learning_rate": 4.892150938984491e-06,
      "loss": 0.4385,
      "step": 1981
    },
    {
      "epoch": 1.6692869174621,
      "grad_norm": 0.371194064617157,
      "learning_rate": 4.887249521974848e-06,
      "loss": 0.4417,
      "step": 1982
    },
    {
      "epoch": 1.6701291409320607,
      "grad_norm": 0.32014748454093933,
      "learning_rate": 4.882348213366152e-06,
      "loss": 0.4008,
      "step": 1983
    },
    {
      "epoch": 1.6709713644020212,
      "grad_norm": 0.3325524628162384,
      "learning_rate": 4.8774470178706405e-06,
      "loss": 0.4377,
      "step": 1984
    },
    {
      "epoch": 1.671813587871982,
      "grad_norm": 0.31678506731987,
      "learning_rate": 4.872545940200435e-06,
      "loss": 0.3969,
      "step": 1985
    },
    {
      "epoch": 1.6726558113419427,
      "grad_norm": 0.3358134627342224,
      "learning_rate": 4.867644985067548e-06,
      "loss": 0.4347,
      "step": 1986
    },
    {
      "epoch": 1.6734980348119035,
      "grad_norm": 0.40612342953681946,
      "learning_rate": 4.862744157183869e-06,
      "loss": 0.4441,
      "step": 1987
    },
    {
      "epoch": 1.6743402582818643,
      "grad_norm": 0.36498767137527466,
      "learning_rate": 4.857843461261176e-06,
      "loss": 0.4571,
      "step": 1988
    },
    {
      "epoch": 1.6751824817518248,
      "grad_norm": 0.3464157283306122,
      "learning_rate": 4.8529429020111035e-06,
      "loss": 0.4055,
      "step": 1989
    },
    {
      "epoch": 1.6760247052217854,
      "grad_norm": 0.3412296175956726,
      "learning_rate": 4.8480424841451725e-06,
      "loss": 0.443,
      "step": 1990
    },
    {
      "epoch": 1.6768669286917461,
      "grad_norm": 0.39738065004348755,
      "learning_rate": 4.8431422123747524e-06,
      "loss": 0.4301,
      "step": 1991
    },
    {
      "epoch": 1.677709152161707,
      "grad_norm": 0.3822493851184845,
      "learning_rate": 4.838242091411085e-06,
      "loss": 0.3763,
      "step": 1992
    },
    {
      "epoch": 1.6785513756316677,
      "grad_norm": 0.357477068901062,
      "learning_rate": 4.833342125965257e-06,
      "loss": 0.4202,
      "step": 1993
    },
    {
      "epoch": 1.6793935991016284,
      "grad_norm": 0.3294481039047241,
      "learning_rate": 4.828442320748213e-06,
      "loss": 0.389,
      "step": 1994
    },
    {
      "epoch": 1.680235822571589,
      "grad_norm": 0.33039289712905884,
      "learning_rate": 4.823542680470738e-06,
      "loss": 0.382,
      "step": 1995
    },
    {
      "epoch": 1.6810780460415495,
      "grad_norm": 0.4039980471134186,
      "learning_rate": 4.818643209843463e-06,
      "loss": 0.4605,
      "step": 1996
    },
    {
      "epoch": 1.6819202695115103,
      "grad_norm": 0.3374316394329071,
      "learning_rate": 4.813743913576852e-06,
      "loss": 0.4002,
      "step": 1997
    },
    {
      "epoch": 1.682762492981471,
      "grad_norm": 0.3417966067790985,
      "learning_rate": 4.808844796381205e-06,
      "loss": 0.4432,
      "step": 1998
    },
    {
      "epoch": 1.6836047164514318,
      "grad_norm": 0.3111039996147156,
      "learning_rate": 4.803945862966646e-06,
      "loss": 0.3766,
      "step": 1999
    },
    {
      "epoch": 1.6844469399213926,
      "grad_norm": 0.3267902731895447,
      "learning_rate": 4.799047118043126e-06,
      "loss": 0.4382,
      "step": 2000
    },
    {
      "epoch": 1.6852891633913532,
      "grad_norm": 0.3291420042514801,
      "learning_rate": 4.794148566320412e-06,
      "loss": 0.4324,
      "step": 2001
    },
    {
      "epoch": 1.686131386861314,
      "grad_norm": 0.33393216133117676,
      "learning_rate": 4.789250212508088e-06,
      "loss": 0.4004,
      "step": 2002
    },
    {
      "epoch": 1.6869736103312745,
      "grad_norm": 0.34935852885246277,
      "learning_rate": 4.7843520613155434e-06,
      "loss": 0.443,
      "step": 2003
    },
    {
      "epoch": 1.6878158338012352,
      "grad_norm": 0.3145216703414917,
      "learning_rate": 4.779454117451978e-06,
      "loss": 0.4244,
      "step": 2004
    },
    {
      "epoch": 1.688658057271196,
      "grad_norm": 0.3191610872745514,
      "learning_rate": 4.774556385626386e-06,
      "loss": 0.4303,
      "step": 2005
    },
    {
      "epoch": 1.6895002807411568,
      "grad_norm": 0.31468063592910767,
      "learning_rate": 4.769658870547567e-06,
      "loss": 0.3777,
      "step": 2006
    },
    {
      "epoch": 1.6903425042111173,
      "grad_norm": 0.3177923262119293,
      "learning_rate": 4.7647615769241e-06,
      "loss": 0.412,
      "step": 2007
    },
    {
      "epoch": 1.691184727681078,
      "grad_norm": 0.31874868273735046,
      "learning_rate": 4.759864509464366e-06,
      "loss": 0.4023,
      "step": 2008
    },
    {
      "epoch": 1.6920269511510386,
      "grad_norm": 0.32656773924827576,
      "learning_rate": 4.754967672876513e-06,
      "loss": 0.4196,
      "step": 2009
    },
    {
      "epoch": 1.6928691746209994,
      "grad_norm": 0.3959745764732361,
      "learning_rate": 4.750071071868478e-06,
      "loss": 0.4334,
      "step": 2010
    },
    {
      "epoch": 1.6937113980909602,
      "grad_norm": 0.33357009291648865,
      "learning_rate": 4.745174711147967e-06,
      "loss": 0.4164,
      "step": 2011
    },
    {
      "epoch": 1.694553621560921,
      "grad_norm": 0.3512556254863739,
      "learning_rate": 4.7402785954224565e-06,
      "loss": 0.4303,
      "step": 2012
    },
    {
      "epoch": 1.6953958450308815,
      "grad_norm": 0.3446783721446991,
      "learning_rate": 4.7353827293991845e-06,
      "loss": 0.4306,
      "step": 2013
    },
    {
      "epoch": 1.6962380685008422,
      "grad_norm": 0.34245941042900085,
      "learning_rate": 4.730487117785155e-06,
      "loss": 0.4017,
      "step": 2014
    },
    {
      "epoch": 1.6970802919708028,
      "grad_norm": 0.3586832582950592,
      "learning_rate": 4.725591765287119e-06,
      "loss": 0.4424,
      "step": 2015
    },
    {
      "epoch": 1.6979225154407636,
      "grad_norm": 0.31148597598075867,
      "learning_rate": 4.720696676611589e-06,
      "loss": 0.3764,
      "step": 2016
    },
    {
      "epoch": 1.6987647389107243,
      "grad_norm": 0.3436831533908844,
      "learning_rate": 4.715801856464812e-06,
      "loss": 0.4567,
      "step": 2017
    },
    {
      "epoch": 1.699606962380685,
      "grad_norm": 0.3282856345176697,
      "learning_rate": 4.710907309552787e-06,
      "loss": 0.3949,
      "step": 2018
    },
    {
      "epoch": 1.7004491858506459,
      "grad_norm": 0.3740271329879761,
      "learning_rate": 4.706013040581242e-06,
      "loss": 0.442,
      "step": 2019
    },
    {
      "epoch": 1.7012914093206064,
      "grad_norm": 0.3330669403076172,
      "learning_rate": 4.701119054255646e-06,
      "loss": 0.4147,
      "step": 2020
    },
    {
      "epoch": 1.702133632790567,
      "grad_norm": 0.3463244140148163,
      "learning_rate": 4.6962253552811885e-06,
      "loss": 0.4105,
      "step": 2021
    },
    {
      "epoch": 1.7029758562605277,
      "grad_norm": 0.3253532350063324,
      "learning_rate": 4.691331948362789e-06,
      "loss": 0.3641,
      "step": 2022
    },
    {
      "epoch": 1.7038180797304885,
      "grad_norm": 0.3608870506286621,
      "learning_rate": 4.6864388382050804e-06,
      "loss": 0.4509,
      "step": 2023
    },
    {
      "epoch": 1.7046603032004493,
      "grad_norm": 0.34802815318107605,
      "learning_rate": 4.6815460295124185e-06,
      "loss": 0.4412,
      "step": 2024
    },
    {
      "epoch": 1.70550252667041,
      "grad_norm": 0.32237696647644043,
      "learning_rate": 4.676653526988858e-06,
      "loss": 0.3984,
      "step": 2025
    },
    {
      "epoch": 1.7063447501403706,
      "grad_norm": 0.3641161620616913,
      "learning_rate": 4.671761335338171e-06,
      "loss": 0.4223,
      "step": 2026
    },
    {
      "epoch": 1.7071869736103311,
      "grad_norm": 0.3260505795478821,
      "learning_rate": 4.666869459263821e-06,
      "loss": 0.4426,
      "step": 2027
    },
    {
      "epoch": 1.7080291970802919,
      "grad_norm": 0.339491605758667,
      "learning_rate": 4.661977903468974e-06,
      "loss": 0.4133,
      "step": 2028
    },
    {
      "epoch": 1.7088714205502527,
      "grad_norm": 0.3772306740283966,
      "learning_rate": 4.657086672656486e-06,
      "loss": 0.3773,
      "step": 2029
    },
    {
      "epoch": 1.7097136440202134,
      "grad_norm": 0.33272403478622437,
      "learning_rate": 4.652195771528901e-06,
      "loss": 0.4239,
      "step": 2030
    },
    {
      "epoch": 1.7105558674901742,
      "grad_norm": 0.3325572609901428,
      "learning_rate": 4.647305204788445e-06,
      "loss": 0.4246,
      "step": 2031
    },
    {
      "epoch": 1.7113980909601347,
      "grad_norm": 0.376950204372406,
      "learning_rate": 4.642414977137026e-06,
      "loss": 0.4427,
      "step": 2032
    },
    {
      "epoch": 1.7122403144300955,
      "grad_norm": 0.338232159614563,
      "learning_rate": 4.63752509327622e-06,
      "loss": 0.4008,
      "step": 2033
    },
    {
      "epoch": 1.713082537900056,
      "grad_norm": 0.3506542444229126,
      "learning_rate": 4.632635557907277e-06,
      "loss": 0.4138,
      "step": 2034
    },
    {
      "epoch": 1.7139247613700168,
      "grad_norm": 0.3496381342411041,
      "learning_rate": 4.627746375731112e-06,
      "loss": 0.4702,
      "step": 2035
    },
    {
      "epoch": 1.7147669848399776,
      "grad_norm": 0.35456255078315735,
      "learning_rate": 4.622857551448297e-06,
      "loss": 0.4028,
      "step": 2036
    },
    {
      "epoch": 1.7156092083099383,
      "grad_norm": 0.32382795214653015,
      "learning_rate": 4.617969089759066e-06,
      "loss": 0.4026,
      "step": 2037
    },
    {
      "epoch": 1.716451431779899,
      "grad_norm": 0.3537888824939728,
      "learning_rate": 4.613080995363296e-06,
      "loss": 0.4566,
      "step": 2038
    },
    {
      "epoch": 1.7172936552498597,
      "grad_norm": 0.3191777765750885,
      "learning_rate": 4.608193272960519e-06,
      "loss": 0.4054,
      "step": 2039
    },
    {
      "epoch": 1.7181358787198202,
      "grad_norm": 0.31682664155960083,
      "learning_rate": 4.603305927249902e-06,
      "loss": 0.4104,
      "step": 2040
    },
    {
      "epoch": 1.718978102189781,
      "grad_norm": 0.3352941572666168,
      "learning_rate": 4.598418962930258e-06,
      "loss": 0.4362,
      "step": 2041
    },
    {
      "epoch": 1.7198203256597417,
      "grad_norm": 0.3795085847377777,
      "learning_rate": 4.593532384700026e-06,
      "loss": 0.4732,
      "step": 2042
    },
    {
      "epoch": 1.7206625491297025,
      "grad_norm": 0.36198845505714417,
      "learning_rate": 4.588646197257278e-06,
      "loss": 0.4579,
      "step": 2043
    },
    {
      "epoch": 1.721504772599663,
      "grad_norm": 0.30162474513053894,
      "learning_rate": 4.583760405299707e-06,
      "loss": 0.3952,
      "step": 2044
    },
    {
      "epoch": 1.7223469960696238,
      "grad_norm": 0.3591868579387665,
      "learning_rate": 4.57887501352463e-06,
      "loss": 0.3909,
      "step": 2045
    },
    {
      "epoch": 1.7231892195395844,
      "grad_norm": 0.4012768566608429,
      "learning_rate": 4.573990026628976e-06,
      "loss": 0.4274,
      "step": 2046
    },
    {
      "epoch": 1.7240314430095451,
      "grad_norm": 0.34333470463752747,
      "learning_rate": 4.569105449309289e-06,
      "loss": 0.4356,
      "step": 2047
    },
    {
      "epoch": 1.724873666479506,
      "grad_norm": 0.3085940480232239,
      "learning_rate": 4.564221286261709e-06,
      "loss": 0.3846,
      "step": 2048
    },
    {
      "epoch": 1.7257158899494667,
      "grad_norm": 0.4123508334159851,
      "learning_rate": 4.559337542181993e-06,
      "loss": 0.4646,
      "step": 2049
    },
    {
      "epoch": 1.7265581134194274,
      "grad_norm": 0.32943204045295715,
      "learning_rate": 4.554454221765479e-06,
      "loss": 0.3829,
      "step": 2050
    },
    {
      "epoch": 1.727400336889388,
      "grad_norm": 0.31788432598114014,
      "learning_rate": 4.549571329707113e-06,
      "loss": 0.4062,
      "step": 2051
    },
    {
      "epoch": 1.7282425603593485,
      "grad_norm": 0.35175997018814087,
      "learning_rate": 4.544688870701416e-06,
      "loss": 0.4022,
      "step": 2052
    },
    {
      "epoch": 1.7290847838293093,
      "grad_norm": 0.4073474109172821,
      "learning_rate": 4.539806849442501e-06,
      "loss": 0.4036,
      "step": 2053
    },
    {
      "epoch": 1.72992700729927,
      "grad_norm": 0.3784160017967224,
      "learning_rate": 4.534925270624057e-06,
      "loss": 0.4543,
      "step": 2054
    },
    {
      "epoch": 1.7307692307692308,
      "grad_norm": 0.32001420855522156,
      "learning_rate": 4.53004413893935e-06,
      "loss": 0.4031,
      "step": 2055
    },
    {
      "epoch": 1.7316114542391916,
      "grad_norm": 0.3638020157814026,
      "learning_rate": 4.52516345908121e-06,
      "loss": 0.4485,
      "step": 2056
    },
    {
      "epoch": 1.7324536777091522,
      "grad_norm": 0.4148275852203369,
      "learning_rate": 4.520283235742042e-06,
      "loss": 0.4591,
      "step": 2057
    },
    {
      "epoch": 1.7332959011791127,
      "grad_norm": 0.31319859623908997,
      "learning_rate": 4.5154034736138035e-06,
      "loss": 0.3919,
      "step": 2058
    },
    {
      "epoch": 1.7341381246490735,
      "grad_norm": 0.33549177646636963,
      "learning_rate": 4.510524177388014e-06,
      "loss": 0.4361,
      "step": 2059
    },
    {
      "epoch": 1.7349803481190342,
      "grad_norm": 0.3380624055862427,
      "learning_rate": 4.505645351755741e-06,
      "loss": 0.4228,
      "step": 2060
    },
    {
      "epoch": 1.735822571588995,
      "grad_norm": 0.3501165211200714,
      "learning_rate": 4.500767001407604e-06,
      "loss": 0.457,
      "step": 2061
    },
    {
      "epoch": 1.7366647950589558,
      "grad_norm": 0.33711856603622437,
      "learning_rate": 4.495889131033762e-06,
      "loss": 0.4093,
      "step": 2062
    },
    {
      "epoch": 1.7375070185289163,
      "grad_norm": 0.3633987605571747,
      "learning_rate": 4.491011745323914e-06,
      "loss": 0.4017,
      "step": 2063
    },
    {
      "epoch": 1.738349241998877,
      "grad_norm": 0.34409981966018677,
      "learning_rate": 4.486134848967292e-06,
      "loss": 0.3993,
      "step": 2064
    },
    {
      "epoch": 1.7391914654688376,
      "grad_norm": 0.37137487530708313,
      "learning_rate": 4.481258446652662e-06,
      "loss": 0.4339,
      "step": 2065
    },
    {
      "epoch": 1.7400336889387984,
      "grad_norm": 0.3434106707572937,
      "learning_rate": 4.4763825430683055e-06,
      "loss": 0.4219,
      "step": 2066
    },
    {
      "epoch": 1.7408759124087592,
      "grad_norm": 0.31746235489845276,
      "learning_rate": 4.471507142902036e-06,
      "loss": 0.4045,
      "step": 2067
    },
    {
      "epoch": 1.74171813587872,
      "grad_norm": 0.3365199863910675,
      "learning_rate": 4.466632250841173e-06,
      "loss": 0.4172,
      "step": 2068
    },
    {
      "epoch": 1.7425603593486805,
      "grad_norm": 0.36368420720100403,
      "learning_rate": 4.4617578715725565e-06,
      "loss": 0.4302,
      "step": 2069
    },
    {
      "epoch": 1.7434025828186412,
      "grad_norm": 0.32088297605514526,
      "learning_rate": 4.4568840097825225e-06,
      "loss": 0.4297,
      "step": 2070
    },
    {
      "epoch": 1.7442448062886018,
      "grad_norm": 0.30506211519241333,
      "learning_rate": 4.452010670156922e-06,
      "loss": 0.3799,
      "step": 2071
    },
    {
      "epoch": 1.7450870297585626,
      "grad_norm": 0.3472638726234436,
      "learning_rate": 4.447137857381095e-06,
      "loss": 0.4358,
      "step": 2072
    },
    {
      "epoch": 1.7459292532285233,
      "grad_norm": 0.31622374057769775,
      "learning_rate": 4.4422655761398785e-06,
      "loss": 0.4135,
      "step": 2073
    },
    {
      "epoch": 1.746771476698484,
      "grad_norm": 0.36380907893180847,
      "learning_rate": 4.437393831117596e-06,
      "loss": 0.4702,
      "step": 2074
    },
    {
      "epoch": 1.7476137001684446,
      "grad_norm": 0.35127002000808716,
      "learning_rate": 4.432522626998061e-06,
      "loss": 0.4372,
      "step": 2075
    },
    {
      "epoch": 1.7484559236384054,
      "grad_norm": 0.3164195716381073,
      "learning_rate": 4.427651968464559e-06,
      "loss": 0.4189,
      "step": 2076
    },
    {
      "epoch": 1.749298147108366,
      "grad_norm": 0.28680703043937683,
      "learning_rate": 4.4227818601998575e-06,
      "loss": 0.3745,
      "step": 2077
    },
    {
      "epoch": 1.7501403705783267,
      "grad_norm": 0.3419021666049957,
      "learning_rate": 4.417912306886192e-06,
      "loss": 0.4187,
      "step": 2078
    },
    {
      "epoch": 1.7509825940482875,
      "grad_norm": 0.36882203817367554,
      "learning_rate": 4.413043313205266e-06,
      "loss": 0.4318,
      "step": 2079
    },
    {
      "epoch": 1.7518248175182483,
      "grad_norm": 0.3037567436695099,
      "learning_rate": 4.408174883838243e-06,
      "loss": 0.3859,
      "step": 2080
    },
    {
      "epoch": 1.752667040988209,
      "grad_norm": 0.3385636508464813,
      "learning_rate": 4.403307023465746e-06,
      "loss": 0.4087,
      "step": 2081
    },
    {
      "epoch": 1.7535092644581696,
      "grad_norm": 0.3466944694519043,
      "learning_rate": 4.3984397367678475e-06,
      "loss": 0.444,
      "step": 2082
    },
    {
      "epoch": 1.7543514879281301,
      "grad_norm": 0.3154969811439514,
      "learning_rate": 4.393573028424075e-06,
      "loss": 0.4228,
      "step": 2083
    },
    {
      "epoch": 1.7551937113980909,
      "grad_norm": 0.3383405804634094,
      "learning_rate": 4.388706903113391e-06,
      "loss": 0.4181,
      "step": 2084
    },
    {
      "epoch": 1.7560359348680517,
      "grad_norm": 0.3285348415374756,
      "learning_rate": 4.383841365514208e-06,
      "loss": 0.4359,
      "step": 2085
    },
    {
      "epoch": 1.7568781583380124,
      "grad_norm": 0.3323192894458771,
      "learning_rate": 4.378976420304361e-06,
      "loss": 0.4625,
      "step": 2086
    },
    {
      "epoch": 1.7577203818079732,
      "grad_norm": 0.33605578541755676,
      "learning_rate": 4.374112072161129e-06,
      "loss": 0.4173,
      "step": 2087
    },
    {
      "epoch": 1.7585626052779337,
      "grad_norm": 0.2948352098464966,
      "learning_rate": 4.369248325761205e-06,
      "loss": 0.3863,
      "step": 2088
    },
    {
      "epoch": 1.7594048287478943,
      "grad_norm": 0.35610172152519226,
      "learning_rate": 4.364385185780712e-06,
      "loss": 0.423,
      "step": 2089
    },
    {
      "epoch": 1.760247052217855,
      "grad_norm": 0.3229409158229828,
      "learning_rate": 4.359522656895185e-06,
      "loss": 0.3494,
      "step": 2090
    },
    {
      "epoch": 1.7610892756878158,
      "grad_norm": 0.3547365069389343,
      "learning_rate": 4.354660743779575e-06,
      "loss": 0.4673,
      "step": 2091
    },
    {
      "epoch": 1.7619314991577766,
      "grad_norm": 0.33307409286499023,
      "learning_rate": 4.349799451108236e-06,
      "loss": 0.4282,
      "step": 2092
    },
    {
      "epoch": 1.7627737226277373,
      "grad_norm": 0.33791491389274597,
      "learning_rate": 4.3449387835549305e-06,
      "loss": 0.4206,
      "step": 2093
    },
    {
      "epoch": 1.763615946097698,
      "grad_norm": 0.3400745391845703,
      "learning_rate": 4.340078745792818e-06,
      "loss": 0.4029,
      "step": 2094
    },
    {
      "epoch": 1.7644581695676587,
      "grad_norm": 0.3194737434387207,
      "learning_rate": 4.3352193424944535e-06,
      "loss": 0.4251,
      "step": 2095
    },
    {
      "epoch": 1.7653003930376192,
      "grad_norm": 0.3281265199184418,
      "learning_rate": 4.3303605783317794e-06,
      "loss": 0.4168,
      "step": 2096
    },
    {
      "epoch": 1.76614261650758,
      "grad_norm": 0.33552688360214233,
      "learning_rate": 4.325502457976126e-06,
      "loss": 0.4075,
      "step": 2097
    },
    {
      "epoch": 1.7669848399775407,
      "grad_norm": 0.34261763095855713,
      "learning_rate": 4.320644986098204e-06,
      "loss": 0.4658,
      "step": 2098
    },
    {
      "epoch": 1.7678270634475015,
      "grad_norm": 0.32754063606262207,
      "learning_rate": 4.315788167368102e-06,
      "loss": 0.3986,
      "step": 2099
    },
    {
      "epoch": 1.768669286917462,
      "grad_norm": 0.31225332617759705,
      "learning_rate": 4.310932006455276e-06,
      "loss": 0.3743,
      "step": 2100
    },
    {
      "epoch": 1.7695115103874228,
      "grad_norm": 0.3372456133365631,
      "learning_rate": 4.306076508028557e-06,
      "loss": 0.4037,
      "step": 2101
    },
    {
      "epoch": 1.7703537338573834,
      "grad_norm": 0.3676266670227051,
      "learning_rate": 4.301221676756129e-06,
      "loss": 0.4136,
      "step": 2102
    },
    {
      "epoch": 1.7711959573273441,
      "grad_norm": 0.33974653482437134,
      "learning_rate": 4.296367517305548e-06,
      "loss": 0.4147,
      "step": 2103
    },
    {
      "epoch": 1.772038180797305,
      "grad_norm": 0.31625646352767944,
      "learning_rate": 4.29151403434371e-06,
      "loss": 0.3752,
      "step": 2104
    },
    {
      "epoch": 1.7728804042672657,
      "grad_norm": 0.37204375863075256,
      "learning_rate": 4.286661232536873e-06,
      "loss": 0.409,
      "step": 2105
    },
    {
      "epoch": 1.7737226277372264,
      "grad_norm": 0.32173147797584534,
      "learning_rate": 4.281809116550629e-06,
      "loss": 0.4216,
      "step": 2106
    },
    {
      "epoch": 1.774564851207187,
      "grad_norm": 0.295701265335083,
      "learning_rate": 4.276957691049917e-06,
      "loss": 0.3715,
      "step": 2107
    },
    {
      "epoch": 1.7754070746771475,
      "grad_norm": 0.3636719286441803,
      "learning_rate": 4.272106960699015e-06,
      "loss": 0.4599,
      "step": 2108
    },
    {
      "epoch": 1.7762492981471083,
      "grad_norm": 0.31267687678337097,
      "learning_rate": 4.267256930161523e-06,
      "loss": 0.3782,
      "step": 2109
    },
    {
      "epoch": 1.777091521617069,
      "grad_norm": 0.3446705639362335,
      "learning_rate": 4.2624076041003794e-06,
      "loss": 0.4267,
      "step": 2110
    },
    {
      "epoch": 1.7779337450870298,
      "grad_norm": 0.32158637046813965,
      "learning_rate": 4.257558987177835e-06,
      "loss": 0.3996,
      "step": 2111
    },
    {
      "epoch": 1.7787759685569906,
      "grad_norm": 0.3701116442680359,
      "learning_rate": 4.252711084055468e-06,
      "loss": 0.4149,
      "step": 2112
    },
    {
      "epoch": 1.7796181920269512,
      "grad_norm": 0.32340797781944275,
      "learning_rate": 4.247863899394162e-06,
      "loss": 0.4028,
      "step": 2113
    },
    {
      "epoch": 1.7804604154969117,
      "grad_norm": 0.3442474901676178,
      "learning_rate": 4.243017437854117e-06,
      "loss": 0.4446,
      "step": 2114
    },
    {
      "epoch": 1.7813026389668725,
      "grad_norm": 0.3432466685771942,
      "learning_rate": 4.238171704094833e-06,
      "loss": 0.396,
      "step": 2115
    },
    {
      "epoch": 1.7821448624368332,
      "grad_norm": 0.3357381522655487,
      "learning_rate": 4.2333267027751125e-06,
      "loss": 0.3967,
      "step": 2116
    },
    {
      "epoch": 1.782987085906794,
      "grad_norm": 0.3396158516407013,
      "learning_rate": 4.228482438553052e-06,
      "loss": 0.4329,
      "step": 2117
    },
    {
      "epoch": 1.7838293093767548,
      "grad_norm": 0.3326312005519867,
      "learning_rate": 4.223638916086044e-06,
      "loss": 0.4166,
      "step": 2118
    },
    {
      "epoch": 1.7846715328467153,
      "grad_norm": 0.3201029300689697,
      "learning_rate": 4.218796140030759e-06,
      "loss": 0.3801,
      "step": 2119
    },
    {
      "epoch": 1.7855137563166759,
      "grad_norm": 0.35324305295944214,
      "learning_rate": 4.21395411504316e-06,
      "loss": 0.4221,
      "step": 2120
    },
    {
      "epoch": 1.7863559797866366,
      "grad_norm": 0.3589279353618622,
      "learning_rate": 4.209112845778481e-06,
      "loss": 0.4123,
      "step": 2121
    },
    {
      "epoch": 1.7871982032565974,
      "grad_norm": 0.33615368604660034,
      "learning_rate": 4.204272336891232e-06,
      "loss": 0.4102,
      "step": 2122
    },
    {
      "epoch": 1.7880404267265582,
      "grad_norm": 0.37369590997695923,
      "learning_rate": 4.199432593035192e-06,
      "loss": 0.4446,
      "step": 2123
    },
    {
      "epoch": 1.788882650196519,
      "grad_norm": 0.3222702741622925,
      "learning_rate": 4.194593618863404e-06,
      "loss": 0.3946,
      "step": 2124
    },
    {
      "epoch": 1.7897248736664795,
      "grad_norm": 0.37022969126701355,
      "learning_rate": 4.189755419028169e-06,
      "loss": 0.4583,
      "step": 2125
    },
    {
      "epoch": 1.7905670971364402,
      "grad_norm": 0.32242074608802795,
      "learning_rate": 4.1849179981810506e-06,
      "loss": 0.4411,
      "step": 2126
    },
    {
      "epoch": 1.7914093206064008,
      "grad_norm": 0.3568630814552307,
      "learning_rate": 4.180081360972852e-06,
      "loss": 0.384,
      "step": 2127
    },
    {
      "epoch": 1.7922515440763616,
      "grad_norm": 0.3601033389568329,
      "learning_rate": 4.175245512053637e-06,
      "loss": 0.4149,
      "step": 2128
    },
    {
      "epoch": 1.7930937675463223,
      "grad_norm": 0.3268354535102844,
      "learning_rate": 4.1704104560726955e-06,
      "loss": 0.4172,
      "step": 2129
    },
    {
      "epoch": 1.793935991016283,
      "grad_norm": 0.3503991365432739,
      "learning_rate": 4.165576197678571e-06,
      "loss": 0.4412,
      "step": 2130
    },
    {
      "epoch": 1.7947782144862436,
      "grad_norm": 0.3087463676929474,
      "learning_rate": 4.160742741519028e-06,
      "loss": 0.4014,
      "step": 2131
    },
    {
      "epoch": 1.7956204379562044,
      "grad_norm": 0.37419605255126953,
      "learning_rate": 4.1559100922410665e-06,
      "loss": 0.4076,
      "step": 2132
    },
    {
      "epoch": 1.796462661426165,
      "grad_norm": 0.3523031771183014,
      "learning_rate": 4.151078254490908e-06,
      "loss": 0.4169,
      "step": 2133
    },
    {
      "epoch": 1.7973048848961257,
      "grad_norm": 0.3644320070743561,
      "learning_rate": 4.146247232913996e-06,
      "loss": 0.4115,
      "step": 2134
    },
    {
      "epoch": 1.7981471083660865,
      "grad_norm": 0.3902427554130554,
      "learning_rate": 4.141417032154984e-06,
      "loss": 0.44,
      "step": 2135
    },
    {
      "epoch": 1.7989893318360473,
      "grad_norm": 0.37764886021614075,
      "learning_rate": 4.136587656857744e-06,
      "loss": 0.4311,
      "step": 2136
    },
    {
      "epoch": 1.799831555306008,
      "grad_norm": 0.32081881165504456,
      "learning_rate": 4.131759111665349e-06,
      "loss": 0.4061,
      "step": 2137
    },
    {
      "epoch": 1.8006737787759686,
      "grad_norm": 0.35331636667251587,
      "learning_rate": 4.126931401220075e-06,
      "loss": 0.429,
      "step": 2138
    },
    {
      "epoch": 1.8015160022459291,
      "grad_norm": 0.3510521352291107,
      "learning_rate": 4.122104530163397e-06,
      "loss": 0.423,
      "step": 2139
    },
    {
      "epoch": 1.8023582257158899,
      "grad_norm": 0.3521886169910431,
      "learning_rate": 4.117278503135981e-06,
      "loss": 0.4161,
      "step": 2140
    },
    {
      "epoch": 1.8032004491858507,
      "grad_norm": 0.315483957529068,
      "learning_rate": 4.112453324777683e-06,
      "loss": 0.3806,
      "step": 2141
    },
    {
      "epoch": 1.8040426726558114,
      "grad_norm": 0.38652729988098145,
      "learning_rate": 4.107628999727542e-06,
      "loss": 0.4765,
      "step": 2142
    },
    {
      "epoch": 1.8048848961257722,
      "grad_norm": 0.34711435437202454,
      "learning_rate": 4.102805532623775e-06,
      "loss": 0.3922,
      "step": 2143
    },
    {
      "epoch": 1.8057271195957327,
      "grad_norm": 0.3769338130950928,
      "learning_rate": 4.097982928103782e-06,
      "loss": 0.4249,
      "step": 2144
    },
    {
      "epoch": 1.8065693430656933,
      "grad_norm": 0.3669670820236206,
      "learning_rate": 4.09316119080412e-06,
      "loss": 0.4555,
      "step": 2145
    },
    {
      "epoch": 1.807411566535654,
      "grad_norm": 0.31039881706237793,
      "learning_rate": 4.088340325360529e-06,
      "loss": 0.3555,
      "step": 2146
    },
    {
      "epoch": 1.8082537900056148,
      "grad_norm": 0.3554658591747284,
      "learning_rate": 4.083520336407894e-06,
      "loss": 0.4275,
      "step": 2147
    },
    {
      "epoch": 1.8090960134755756,
      "grad_norm": 0.32819443941116333,
      "learning_rate": 4.0787012285802695e-06,
      "loss": 0.4052,
      "step": 2148
    },
    {
      "epoch": 1.8099382369455363,
      "grad_norm": 0.31903257966041565,
      "learning_rate": 4.073883006510858e-06,
      "loss": 0.4303,
      "step": 2149
    },
    {
      "epoch": 1.810780460415497,
      "grad_norm": 0.34762516617774963,
      "learning_rate": 4.069065674832011e-06,
      "loss": 0.3979,
      "step": 2150
    },
    {
      "epoch": 1.8116226838854577,
      "grad_norm": 0.3634417653083801,
      "learning_rate": 4.064249238175223e-06,
      "loss": 0.4565,
      "step": 2151
    },
    {
      "epoch": 1.8124649073554182,
      "grad_norm": 0.3426850736141205,
      "learning_rate": 4.059433701171131e-06,
      "loss": 0.446,
      "step": 2152
    },
    {
      "epoch": 1.813307130825379,
      "grad_norm": 0.32439306378364563,
      "learning_rate": 4.054619068449502e-06,
      "loss": 0.4234,
      "step": 2153
    },
    {
      "epoch": 1.8141493542953397,
      "grad_norm": 0.34646233916282654,
      "learning_rate": 4.04980534463924e-06,
      "loss": 0.4592,
      "step": 2154
    },
    {
      "epoch": 1.8149915777653005,
      "grad_norm": 0.36668625473976135,
      "learning_rate": 4.044992534368369e-06,
      "loss": 0.4026,
      "step": 2155
    },
    {
      "epoch": 1.815833801235261,
      "grad_norm": 0.33941563963890076,
      "learning_rate": 4.04018064226404e-06,
      "loss": 0.3895,
      "step": 2156
    },
    {
      "epoch": 1.8166760247052218,
      "grad_norm": 0.3179699182510376,
      "learning_rate": 4.035369672952516e-06,
      "loss": 0.4044,
      "step": 2157
    },
    {
      "epoch": 1.8175182481751824,
      "grad_norm": 0.31479278206825256,
      "learning_rate": 4.030559631059179e-06,
      "loss": 0.4242,
      "step": 2158
    },
    {
      "epoch": 1.8183604716451431,
      "grad_norm": 0.3610108494758606,
      "learning_rate": 4.025750521208512e-06,
      "loss": 0.43,
      "step": 2159
    },
    {
      "epoch": 1.819202695115104,
      "grad_norm": 0.36555030941963196,
      "learning_rate": 4.020942348024108e-06,
      "loss": 0.4642,
      "step": 2160
    },
    {
      "epoch": 1.8200449185850647,
      "grad_norm": 0.3154170513153076,
      "learning_rate": 4.016135116128656e-06,
      "loss": 0.4,
      "step": 2161
    },
    {
      "epoch": 1.8208871420550252,
      "grad_norm": 0.3227609395980835,
      "learning_rate": 4.011328830143945e-06,
      "loss": 0.3805,
      "step": 2162
    },
    {
      "epoch": 1.821729365524986,
      "grad_norm": 0.2951693832874298,
      "learning_rate": 4.0065234946908456e-06,
      "loss": 0.39,
      "step": 2163
    },
    {
      "epoch": 1.8225715889949465,
      "grad_norm": 0.35953858494758606,
      "learning_rate": 4.001719114389325e-06,
      "loss": 0.4261,
      "step": 2164
    },
    {
      "epoch": 1.8234138124649073,
      "grad_norm": 0.3715936839580536,
      "learning_rate": 3.996915693858422e-06,
      "loss": 0.4562,
      "step": 2165
    },
    {
      "epoch": 1.824256035934868,
      "grad_norm": 0.3132725954055786,
      "learning_rate": 3.992113237716261e-06,
      "loss": 0.4014,
      "step": 2166
    },
    {
      "epoch": 1.8250982594048288,
      "grad_norm": 0.34752848744392395,
      "learning_rate": 3.987311750580035e-06,
      "loss": 0.4045,
      "step": 2167
    },
    {
      "epoch": 1.8259404828747896,
      "grad_norm": 0.3774251639842987,
      "learning_rate": 3.9825112370660055e-06,
      "loss": 0.3936,
      "step": 2168
    },
    {
      "epoch": 1.8267827063447502,
      "grad_norm": 0.3653651475906372,
      "learning_rate": 3.977711701789499e-06,
      "loss": 0.4263,
      "step": 2169
    },
    {
      "epoch": 1.8276249298147107,
      "grad_norm": 0.3409895896911621,
      "learning_rate": 3.972913149364902e-06,
      "loss": 0.4469,
      "step": 2170
    },
    {
      "epoch": 1.8284671532846715,
      "grad_norm": 0.3000805377960205,
      "learning_rate": 3.9681155844056525e-06,
      "loss": 0.4036,
      "step": 2171
    },
    {
      "epoch": 1.8293093767546322,
      "grad_norm": 0.3259512782096863,
      "learning_rate": 3.963319011524246e-06,
      "loss": 0.4075,
      "step": 2172
    },
    {
      "epoch": 1.830151600224593,
      "grad_norm": 0.36544784903526306,
      "learning_rate": 3.9585234353322155e-06,
      "loss": 0.4272,
      "step": 2173
    },
    {
      "epoch": 1.8309938236945538,
      "grad_norm": 0.33963972330093384,
      "learning_rate": 3.953728860440144e-06,
      "loss": 0.398,
      "step": 2174
    },
    {
      "epoch": 1.8318360471645143,
      "grad_norm": 0.30871880054473877,
      "learning_rate": 3.948935291457645e-06,
      "loss": 0.3656,
      "step": 2175
    },
    {
      "epoch": 1.8326782706344749,
      "grad_norm": 0.3258126974105835,
      "learning_rate": 3.94414273299337e-06,
      "loss": 0.4107,
      "step": 2176
    },
    {
      "epoch": 1.8335204941044356,
      "grad_norm": 0.3361981511116028,
      "learning_rate": 3.939351189654996e-06,
      "loss": 0.4219,
      "step": 2177
    },
    {
      "epoch": 1.8343627175743964,
      "grad_norm": 0.33787086606025696,
      "learning_rate": 3.934560666049226e-06,
      "loss": 0.4132,
      "step": 2178
    },
    {
      "epoch": 1.8352049410443572,
      "grad_norm": 0.29300960898399353,
      "learning_rate": 3.929771166781781e-06,
      "loss": 0.3739,
      "step": 2179
    },
    {
      "epoch": 1.836047164514318,
      "grad_norm": 0.35240232944488525,
      "learning_rate": 3.9249826964573965e-06,
      "loss": 0.4487,
      "step": 2180
    },
    {
      "epoch": 1.8368893879842785,
      "grad_norm": 0.3224708139896393,
      "learning_rate": 3.920195259679822e-06,
      "loss": 0.4249,
      "step": 2181
    },
    {
      "epoch": 1.8377316114542392,
      "grad_norm": 0.32709088921546936,
      "learning_rate": 3.915408861051809e-06,
      "loss": 0.4067,
      "step": 2182
    },
    {
      "epoch": 1.8385738349241998,
      "grad_norm": 0.3238849639892578,
      "learning_rate": 3.910623505175116e-06,
      "loss": 0.4008,
      "step": 2183
    },
    {
      "epoch": 1.8394160583941606,
      "grad_norm": 0.3151823580265045,
      "learning_rate": 3.905839196650494e-06,
      "loss": 0.4276,
      "step": 2184
    },
    {
      "epoch": 1.8402582818641213,
      "grad_norm": 0.37266623973846436,
      "learning_rate": 3.901055940077691e-06,
      "loss": 0.4137,
      "step": 2185
    },
    {
      "epoch": 1.841100505334082,
      "grad_norm": 0.33263522386550903,
      "learning_rate": 3.8962737400554395e-06,
      "loss": 0.4305,
      "step": 2186
    },
    {
      "epoch": 1.8419427288040426,
      "grad_norm": 0.3094389736652374,
      "learning_rate": 3.891492601181462e-06,
      "loss": 0.4252,
      "step": 2187
    },
    {
      "epoch": 1.8427849522740034,
      "grad_norm": 0.3080383539199829,
      "learning_rate": 3.8867125280524535e-06,
      "loss": 0.4137,
      "step": 2188
    },
    {
      "epoch": 1.843627175743964,
      "grad_norm": 0.32397541403770447,
      "learning_rate": 3.881933525264092e-06,
      "loss": 0.3943,
      "step": 2189
    },
    {
      "epoch": 1.8444693992139247,
      "grad_norm": 0.3626363277435303,
      "learning_rate": 3.877155597411019e-06,
      "loss": 0.4694,
      "step": 2190
    },
    {
      "epoch": 1.8453116226838855,
      "grad_norm": 0.3122662305831909,
      "learning_rate": 3.87237874908685e-06,
      "loss": 0.3888,
      "step": 2191
    },
    {
      "epoch": 1.8461538461538463,
      "grad_norm": 0.41452518105506897,
      "learning_rate": 3.867602984884155e-06,
      "loss": 0.4565,
      "step": 2192
    },
    {
      "epoch": 1.8469960696238068,
      "grad_norm": 0.32591354846954346,
      "learning_rate": 3.862828309394469e-06,
      "loss": 0.3962,
      "step": 2193
    },
    {
      "epoch": 1.8478382930937676,
      "grad_norm": 0.3238661587238312,
      "learning_rate": 3.8580547272082746e-06,
      "loss": 0.3807,
      "step": 2194
    },
    {
      "epoch": 1.8486805165637281,
      "grad_norm": 0.38796621561050415,
      "learning_rate": 3.853282242915007e-06,
      "loss": 0.4156,
      "step": 2195
    },
    {
      "epoch": 1.8495227400336889,
      "grad_norm": 0.434707373380661,
      "learning_rate": 3.8485108611030415e-06,
      "loss": 0.4335,
      "step": 2196
    },
    {
      "epoch": 1.8503649635036497,
      "grad_norm": 0.35717305541038513,
      "learning_rate": 3.843740586359701e-06,
      "loss": 0.4098,
      "step": 2197
    },
    {
      "epoch": 1.8512071869736104,
      "grad_norm": 0.3260836899280548,
      "learning_rate": 3.8389714232712346e-06,
      "loss": 0.4089,
      "step": 2198
    },
    {
      "epoch": 1.8520494104435712,
      "grad_norm": 0.3993750512599945,
      "learning_rate": 3.834203376422831e-06,
      "loss": 0.3849,
      "step": 2199
    },
    {
      "epoch": 1.8528916339135317,
      "grad_norm": 0.41325613856315613,
      "learning_rate": 3.829436450398599e-06,
      "loss": 0.4099,
      "step": 2200
    },
    {
      "epoch": 1.8537338573834923,
      "grad_norm": 0.3419803977012634,
      "learning_rate": 3.824670649781576e-06,
      "loss": 0.4394,
      "step": 2201
    },
    {
      "epoch": 1.854576080853453,
      "grad_norm": 0.32242095470428467,
      "learning_rate": 3.8199059791537105e-06,
      "loss": 0.3949,
      "step": 2202
    },
    {
      "epoch": 1.8554183043234138,
      "grad_norm": 0.3225153386592865,
      "learning_rate": 3.815142443095873e-06,
      "loss": 0.4251,
      "step": 2203
    },
    {
      "epoch": 1.8562605277933746,
      "grad_norm": 0.36200666427612305,
      "learning_rate": 3.8103800461878344e-06,
      "loss": 0.4152,
      "step": 2204
    },
    {
      "epoch": 1.8571027512633353,
      "grad_norm": 0.3551524579524994,
      "learning_rate": 3.805618793008279e-06,
      "loss": 0.4133,
      "step": 2205
    },
    {
      "epoch": 1.857944974733296,
      "grad_norm": 0.36505594849586487,
      "learning_rate": 3.8008586881347815e-06,
      "loss": 0.4207,
      "step": 2206
    },
    {
      "epoch": 1.8587871982032564,
      "grad_norm": 0.3326520323753357,
      "learning_rate": 3.7960997361438235e-06,
      "loss": 0.3814,
      "step": 2207
    },
    {
      "epoch": 1.8596294216732172,
      "grad_norm": 0.3274734914302826,
      "learning_rate": 3.7913419416107692e-06,
      "loss": 0.4184,
      "step": 2208
    },
    {
      "epoch": 1.860471645143178,
      "grad_norm": 0.31664031744003296,
      "learning_rate": 3.786585309109877e-06,
      "loss": 0.3971,
      "step": 2209
    },
    {
      "epoch": 1.8613138686131387,
      "grad_norm": 0.38508856296539307,
      "learning_rate": 3.7818298432142814e-06,
      "loss": 0.4413,
      "step": 2210
    },
    {
      "epoch": 1.8621560920830995,
      "grad_norm": 0.38103389739990234,
      "learning_rate": 3.777075548496001e-06,
      "loss": 0.4359,
      "step": 2211
    },
    {
      "epoch": 1.86299831555306,
      "grad_norm": 0.3253352642059326,
      "learning_rate": 3.7723224295259247e-06,
      "loss": 0.4028,
      "step": 2212
    },
    {
      "epoch": 1.8638405390230208,
      "grad_norm": 0.34315067529678345,
      "learning_rate": 3.7675704908738136e-06,
      "loss": 0.4348,
      "step": 2213
    },
    {
      "epoch": 1.8646827624929814,
      "grad_norm": 0.3814580738544464,
      "learning_rate": 3.7628197371082916e-06,
      "loss": 0.3785,
      "step": 2214
    },
    {
      "epoch": 1.8655249859629421,
      "grad_norm": 0.3312060832977295,
      "learning_rate": 3.758070172796846e-06,
      "loss": 0.4336,
      "step": 2215
    },
    {
      "epoch": 1.866367209432903,
      "grad_norm": 0.34449654817581177,
      "learning_rate": 3.753321802505817e-06,
      "loss": 0.4709,
      "step": 2216
    },
    {
      "epoch": 1.8672094329028637,
      "grad_norm": 0.34758204221725464,
      "learning_rate": 3.7485746308004013e-06,
      "loss": 0.3993,
      "step": 2217
    },
    {
      "epoch": 1.8680516563728242,
      "grad_norm": 0.35430437326431274,
      "learning_rate": 3.743828662244639e-06,
      "loss": 0.4577,
      "step": 2218
    },
    {
      "epoch": 1.868893879842785,
      "grad_norm": 0.30245599150657654,
      "learning_rate": 3.739083901401418e-06,
      "loss": 0.3791,
      "step": 2219
    },
    {
      "epoch": 1.8697361033127455,
      "grad_norm": 0.32237744331359863,
      "learning_rate": 3.7343403528324574e-06,
      "loss": 0.4255,
      "step": 2220
    },
    {
      "epoch": 1.8705783267827063,
      "grad_norm": 0.38111838698387146,
      "learning_rate": 3.7295980210983233e-06,
      "loss": 0.4489,
      "step": 2221
    },
    {
      "epoch": 1.871420550252667,
      "grad_norm": 0.3468860685825348,
      "learning_rate": 3.7248569107583976e-06,
      "loss": 0.419,
      "step": 2222
    },
    {
      "epoch": 1.8722627737226278,
      "grad_norm": 0.30807003378868103,
      "learning_rate": 3.7201170263709004e-06,
      "loss": 0.4118,
      "step": 2223
    },
    {
      "epoch": 1.8731049971925884,
      "grad_norm": 0.30873218178749084,
      "learning_rate": 3.7153783724928617e-06,
      "loss": 0.3951,
      "step": 2224
    },
    {
      "epoch": 1.8739472206625492,
      "grad_norm": 0.3643030822277069,
      "learning_rate": 3.71064095368014e-06,
      "loss": 0.4504,
      "step": 2225
    },
    {
      "epoch": 1.8747894441325097,
      "grad_norm": 0.33644217252731323,
      "learning_rate": 3.705904774487396e-06,
      "loss": 0.3748,
      "step": 2226
    },
    {
      "epoch": 1.8756316676024705,
      "grad_norm": 0.3861021399497986,
      "learning_rate": 3.7011698394681075e-06,
      "loss": 0.4566,
      "step": 2227
    },
    {
      "epoch": 1.8764738910724312,
      "grad_norm": 0.3075239956378937,
      "learning_rate": 3.696436153174548e-06,
      "loss": 0.4041,
      "step": 2228
    },
    {
      "epoch": 1.877316114542392,
      "grad_norm": 0.3207765221595764,
      "learning_rate": 3.6917037201577977e-06,
      "loss": 0.4014,
      "step": 2229
    },
    {
      "epoch": 1.8781583380123528,
      "grad_norm": 0.34298422932624817,
      "learning_rate": 3.6869725449677254e-06,
      "loss": 0.4201,
      "step": 2230
    },
    {
      "epoch": 1.8790005614823133,
      "grad_norm": 0.3269258439540863,
      "learning_rate": 3.6822426321529967e-06,
      "loss": 0.4316,
      "step": 2231
    },
    {
      "epoch": 1.8798427849522739,
      "grad_norm": 0.37192609906196594,
      "learning_rate": 3.6775139862610577e-06,
      "loss": 0.4165,
      "step": 2232
    },
    {
      "epoch": 1.8806850084222346,
      "grad_norm": 0.311799019575119,
      "learning_rate": 3.672786611838142e-06,
      "loss": 0.3756,
      "step": 2233
    },
    {
      "epoch": 1.8815272318921954,
      "grad_norm": 0.33450427651405334,
      "learning_rate": 3.668060513429256e-06,
      "loss": 0.4369,
      "step": 2234
    },
    {
      "epoch": 1.8823694553621562,
      "grad_norm": 0.3473842144012451,
      "learning_rate": 3.6633356955781827e-06,
      "loss": 0.4421,
      "step": 2235
    },
    {
      "epoch": 1.883211678832117,
      "grad_norm": 0.3228735327720642,
      "learning_rate": 3.658612162827472e-06,
      "loss": 0.4139,
      "step": 2236
    },
    {
      "epoch": 1.8840539023020775,
      "grad_norm": 0.3483162820339203,
      "learning_rate": 3.653889919718439e-06,
      "loss": 0.3813,
      "step": 2237
    },
    {
      "epoch": 1.884896125772038,
      "grad_norm": 0.3416324853897095,
      "learning_rate": 3.649168970791157e-06,
      "loss": 0.3995,
      "step": 2238
    },
    {
      "epoch": 1.8857383492419988,
      "grad_norm": 0.32762500643730164,
      "learning_rate": 3.644449320584462e-06,
      "loss": 0.4069,
      "step": 2239
    },
    {
      "epoch": 1.8865805727119596,
      "grad_norm": 0.33794930577278137,
      "learning_rate": 3.639730973635929e-06,
      "loss": 0.4665,
      "step": 2240
    },
    {
      "epoch": 1.8874227961819203,
      "grad_norm": 0.31257081031799316,
      "learning_rate": 3.635013934481895e-06,
      "loss": 0.4054,
      "step": 2241
    },
    {
      "epoch": 1.888265019651881,
      "grad_norm": 0.3177803158760071,
      "learning_rate": 3.6302982076574244e-06,
      "loss": 0.389,
      "step": 2242
    },
    {
      "epoch": 1.8891072431218416,
      "grad_norm": 0.3513704836368561,
      "learning_rate": 3.6255837976963336e-06,
      "loss": 0.4095,
      "step": 2243
    },
    {
      "epoch": 1.8899494665918024,
      "grad_norm": 0.3811786472797394,
      "learning_rate": 3.620870709131163e-06,
      "loss": 0.4205,
      "step": 2244
    },
    {
      "epoch": 1.890791690061763,
      "grad_norm": 0.3606407940387726,
      "learning_rate": 3.616158946493188e-06,
      "loss": 0.4366,
      "step": 2245
    },
    {
      "epoch": 1.8916339135317237,
      "grad_norm": 0.3215019702911377,
      "learning_rate": 3.6114485143124068e-06,
      "loss": 0.3815,
      "step": 2246
    },
    {
      "epoch": 1.8924761370016845,
      "grad_norm": 0.35493236780166626,
      "learning_rate": 3.6067394171175397e-06,
      "loss": 0.4182,
      "step": 2247
    },
    {
      "epoch": 1.8933183604716453,
      "grad_norm": 0.3606269359588623,
      "learning_rate": 3.602031659436022e-06,
      "loss": 0.4215,
      "step": 2248
    },
    {
      "epoch": 1.8941605839416058,
      "grad_norm": 0.34096285700798035,
      "learning_rate": 3.5973252457940034e-06,
      "loss": 0.4092,
      "step": 2249
    },
    {
      "epoch": 1.8950028074115666,
      "grad_norm": 0.3313191831111908,
      "learning_rate": 3.5926201807163384e-06,
      "loss": 0.4354,
      "step": 2250
    },
    {
      "epoch": 1.8958450308815271,
      "grad_norm": 0.3833111524581909,
      "learning_rate": 3.58791646872659e-06,
      "loss": 0.4349,
      "step": 2251
    },
    {
      "epoch": 1.8966872543514879,
      "grad_norm": 0.3477966785430908,
      "learning_rate": 3.5832141143470146e-06,
      "loss": 0.4123,
      "step": 2252
    },
    {
      "epoch": 1.8975294778214487,
      "grad_norm": 0.293789803981781,
      "learning_rate": 3.578513122098566e-06,
      "loss": 0.3791,
      "step": 2253
    },
    {
      "epoch": 1.8983717012914094,
      "grad_norm": 0.3719714879989624,
      "learning_rate": 3.5738134965008885e-06,
      "loss": 0.4575,
      "step": 2254
    },
    {
      "epoch": 1.89921392476137,
      "grad_norm": 0.3821752667427063,
      "learning_rate": 3.5691152420723115e-06,
      "loss": 0.4227,
      "step": 2255
    },
    {
      "epoch": 1.9000561482313307,
      "grad_norm": 0.3281887471675873,
      "learning_rate": 3.564418363329848e-06,
      "loss": 0.4095,
      "step": 2256
    },
    {
      "epoch": 1.9008983717012913,
      "grad_norm": 0.3229450583457947,
      "learning_rate": 3.559722864789187e-06,
      "loss": 0.3806,
      "step": 2257
    },
    {
      "epoch": 1.901740595171252,
      "grad_norm": 0.3254774212837219,
      "learning_rate": 3.5550287509646902e-06,
      "loss": 0.4436,
      "step": 2258
    },
    {
      "epoch": 1.9025828186412128,
      "grad_norm": 0.3382726013660431,
      "learning_rate": 3.5503360263693887e-06,
      "loss": 0.4009,
      "step": 2259
    },
    {
      "epoch": 1.9034250421111736,
      "grad_norm": 0.37322717905044556,
      "learning_rate": 3.5456446955149783e-06,
      "loss": 0.4446,
      "step": 2260
    },
    {
      "epoch": 1.9042672655811343,
      "grad_norm": 0.30604061484336853,
      "learning_rate": 3.5409547629118124e-06,
      "loss": 0.3834,
      "step": 2261
    },
    {
      "epoch": 1.905109489051095,
      "grad_norm": 0.303580105304718,
      "learning_rate": 3.5362662330689067e-06,
      "loss": 0.4206,
      "step": 2262
    },
    {
      "epoch": 1.9059517125210554,
      "grad_norm": 0.33803531527519226,
      "learning_rate": 3.531579110493917e-06,
      "loss": 0.4383,
      "step": 2263
    },
    {
      "epoch": 1.9067939359910162,
      "grad_norm": 0.35076847672462463,
      "learning_rate": 3.5268933996931596e-06,
      "loss": 0.3699,
      "step": 2264
    },
    {
      "epoch": 1.907636159460977,
      "grad_norm": 0.33669257164001465,
      "learning_rate": 3.5222091051715803e-06,
      "loss": 0.412,
      "step": 2265
    },
    {
      "epoch": 1.9084783829309377,
      "grad_norm": 0.3304939568042755,
      "learning_rate": 3.517526231432775e-06,
      "loss": 0.4422,
      "step": 2266
    },
    {
      "epoch": 1.9093206064008985,
      "grad_norm": 0.3294644355773926,
      "learning_rate": 3.512844782978963e-06,
      "loss": 0.3998,
      "step": 2267
    },
    {
      "epoch": 1.910162829870859,
      "grad_norm": 0.3414252996444702,
      "learning_rate": 3.5081647643110028e-06,
      "loss": 0.4387,
      "step": 2268
    },
    {
      "epoch": 1.9110050533408196,
      "grad_norm": 0.2990201711654663,
      "learning_rate": 3.5034861799283713e-06,
      "loss": 0.3905,
      "step": 2269
    },
    {
      "epoch": 1.9118472768107804,
      "grad_norm": 0.31908953189849854,
      "learning_rate": 3.498809034329171e-06,
      "loss": 0.4025,
      "step": 2270
    },
    {
      "epoch": 1.9126895002807411,
      "grad_norm": 0.3192228078842163,
      "learning_rate": 3.4941333320101173e-06,
      "loss": 0.3991,
      "step": 2271
    },
    {
      "epoch": 1.913531723750702,
      "grad_norm": 0.34117114543914795,
      "learning_rate": 3.4894590774665414e-06,
      "loss": 0.4343,
      "step": 2272
    },
    {
      "epoch": 1.9143739472206627,
      "grad_norm": 0.3385365605354309,
      "learning_rate": 3.48478627519238e-06,
      "loss": 0.3972,
      "step": 2273
    },
    {
      "epoch": 1.9152161706906232,
      "grad_norm": 0.343700110912323,
      "learning_rate": 3.480114929680176e-06,
      "loss": 0.4213,
      "step": 2274
    },
    {
      "epoch": 1.916058394160584,
      "grad_norm": 0.3982769250869751,
      "learning_rate": 3.4754450454210686e-06,
      "loss": 0.4538,
      "step": 2275
    },
    {
      "epoch": 1.9169006176305445,
      "grad_norm": 0.34075817465782166,
      "learning_rate": 3.470776626904795e-06,
      "loss": 0.4128,
      "step": 2276
    },
    {
      "epoch": 1.9177428411005053,
      "grad_norm": 0.35588014125823975,
      "learning_rate": 3.466109678619681e-06,
      "loss": 0.4281,
      "step": 2277
    },
    {
      "epoch": 1.918585064570466,
      "grad_norm": 0.36886054277420044,
      "learning_rate": 3.4614442050526424e-06,
      "loss": 0.3873,
      "step": 2278
    },
    {
      "epoch": 1.9194272880404268,
      "grad_norm": 0.39788997173309326,
      "learning_rate": 3.4567802106891724e-06,
      "loss": 0.4543,
      "step": 2279
    },
    {
      "epoch": 1.9202695115103874,
      "grad_norm": 0.3277210295200348,
      "learning_rate": 3.4521177000133456e-06,
      "loss": 0.4105,
      "step": 2280
    },
    {
      "epoch": 1.9211117349803482,
      "grad_norm": 0.34242963790893555,
      "learning_rate": 3.4474566775078055e-06,
      "loss": 0.419,
      "step": 2281
    },
    {
      "epoch": 1.9219539584503087,
      "grad_norm": 0.31894755363464355,
      "learning_rate": 3.442797147653776e-06,
      "loss": 0.3675,
      "step": 2282
    },
    {
      "epoch": 1.9227961819202695,
      "grad_norm": 0.34929659962654114,
      "learning_rate": 3.4381391149310294e-06,
      "loss": 0.4458,
      "step": 2283
    },
    {
      "epoch": 1.9236384053902302,
      "grad_norm": 0.3380218744277954,
      "learning_rate": 3.4334825838179143e-06,
      "loss": 0.4356,
      "step": 2284
    },
    {
      "epoch": 1.924480628860191,
      "grad_norm": 0.34835129976272583,
      "learning_rate": 3.4288275587913235e-06,
      "loss": 0.4236,
      "step": 2285
    },
    {
      "epoch": 1.9253228523301515,
      "grad_norm": 0.3273218870162964,
      "learning_rate": 3.4241740443267112e-06,
      "loss": 0.4017,
      "step": 2286
    },
    {
      "epoch": 1.9261650758001123,
      "grad_norm": 0.3632931709289551,
      "learning_rate": 3.419522044898073e-06,
      "loss": 0.4008,
      "step": 2287
    },
    {
      "epoch": 1.9270072992700729,
      "grad_norm": 0.32571175694465637,
      "learning_rate": 3.414871564977951e-06,
      "loss": 0.4437,
      "step": 2288
    },
    {
      "epoch": 1.9278495227400336,
      "grad_norm": 0.3315962255001068,
      "learning_rate": 3.4102226090374246e-06,
      "loss": 0.445,
      "step": 2289
    },
    {
      "epoch": 1.9286917462099944,
      "grad_norm": 0.34038984775543213,
      "learning_rate": 3.4055751815461102e-06,
      "loss": 0.397,
      "step": 2290
    },
    {
      "epoch": 1.9295339696799552,
      "grad_norm": 0.36829593777656555,
      "learning_rate": 3.4009292869721516e-06,
      "loss": 0.4172,
      "step": 2291
    },
    {
      "epoch": 1.930376193149916,
      "grad_norm": 0.3408529758453369,
      "learning_rate": 3.3962849297822225e-06,
      "loss": 0.4271,
      "step": 2292
    },
    {
      "epoch": 1.9312184166198765,
      "grad_norm": 0.33486056327819824,
      "learning_rate": 3.3916421144415146e-06,
      "loss": 0.4178,
      "step": 2293
    },
    {
      "epoch": 1.932060640089837,
      "grad_norm": 0.33972930908203125,
      "learning_rate": 3.387000845413742e-06,
      "loss": 0.3605,
      "step": 2294
    },
    {
      "epoch": 1.9329028635597978,
      "grad_norm": 0.35704052448272705,
      "learning_rate": 3.3823611271611266e-06,
      "loss": 0.4603,
      "step": 2295
    },
    {
      "epoch": 1.9337450870297586,
      "grad_norm": 0.3475121557712555,
      "learning_rate": 3.377722964144405e-06,
      "loss": 0.3871,
      "step": 2296
    },
    {
      "epoch": 1.9345873104997193,
      "grad_norm": 0.34563231468200684,
      "learning_rate": 3.3730863608228125e-06,
      "loss": 0.4461,
      "step": 2297
    },
    {
      "epoch": 1.93542953396968,
      "grad_norm": 0.3667762875556946,
      "learning_rate": 3.368451321654091e-06,
      "loss": 0.43,
      "step": 2298
    },
    {
      "epoch": 1.9362717574396406,
      "grad_norm": 0.3395666778087616,
      "learning_rate": 3.363817851094473e-06,
      "loss": 0.3766,
      "step": 2299
    },
    {
      "epoch": 1.9371139809096012,
      "grad_norm": 0.3399890065193176,
      "learning_rate": 3.3591859535986894e-06,
      "loss": 0.4256,
      "step": 2300
    },
    {
      "epoch": 1.937956204379562,
      "grad_norm": 0.36868178844451904,
      "learning_rate": 3.35455563361995e-06,
      "loss": 0.4304,
      "step": 2301
    },
    {
      "epoch": 1.9387984278495227,
      "grad_norm": 0.3355114161968231,
      "learning_rate": 3.3499268956099583e-06,
      "loss": 0.4077,
      "step": 2302
    },
    {
      "epoch": 1.9396406513194835,
      "grad_norm": 0.33046525716781616,
      "learning_rate": 3.345299744018886e-06,
      "loss": 0.4297,
      "step": 2303
    },
    {
      "epoch": 1.9404828747894443,
      "grad_norm": 0.3666151762008667,
      "learning_rate": 3.3406741832953893e-06,
      "loss": 0.4215,
      "step": 2304
    },
    {
      "epoch": 1.9413250982594048,
      "grad_norm": 0.38225436210632324,
      "learning_rate": 3.336050217886588e-06,
      "loss": 0.4253,
      "step": 2305
    },
    {
      "epoch": 1.9421673217293656,
      "grad_norm": 0.30344218015670776,
      "learning_rate": 3.331427852238073e-06,
      "loss": 0.4251,
      "step": 2306
    },
    {
      "epoch": 1.9430095451993261,
      "grad_norm": 0.2834741771221161,
      "learning_rate": 3.3268070907938915e-06,
      "loss": 0.3983,
      "step": 2307
    },
    {
      "epoch": 1.9438517686692869,
      "grad_norm": 0.3373931348323822,
      "learning_rate": 3.3221879379965553e-06,
      "loss": 0.4074,
      "step": 2308
    },
    {
      "epoch": 1.9446939921392477,
      "grad_norm": 0.4025129973888397,
      "learning_rate": 3.3175703982870232e-06,
      "loss": 0.4093,
      "step": 2309
    },
    {
      "epoch": 1.9455362156092084,
      "grad_norm": 0.3170529007911682,
      "learning_rate": 3.3129544761047093e-06,
      "loss": 0.3663,
      "step": 2310
    },
    {
      "epoch": 1.946378439079169,
      "grad_norm": 0.34305307269096375,
      "learning_rate": 3.3083401758874655e-06,
      "loss": 0.4289,
      "step": 2311
    },
    {
      "epoch": 1.9472206625491297,
      "grad_norm": 0.3554058372974396,
      "learning_rate": 3.303727502071591e-06,
      "loss": 0.4393,
      "step": 2312
    },
    {
      "epoch": 1.9480628860190903,
      "grad_norm": 0.300304114818573,
      "learning_rate": 3.2991164590918162e-06,
      "loss": 0.3873,
      "step": 2313
    },
    {
      "epoch": 1.948905109489051,
      "grad_norm": 0.31696784496307373,
      "learning_rate": 3.2945070513813082e-06,
      "loss": 0.4125,
      "step": 2314
    },
    {
      "epoch": 1.9497473329590118,
      "grad_norm": 0.3465268611907959,
      "learning_rate": 3.289899283371657e-06,
      "loss": 0.4449,
      "step": 2315
    },
    {
      "epoch": 1.9505895564289726,
      "grad_norm": 0.3022516071796417,
      "learning_rate": 3.2852931594928804e-06,
      "loss": 0.4001,
      "step": 2316
    },
    {
      "epoch": 1.9514317798989333,
      "grad_norm": 0.3349062502384186,
      "learning_rate": 3.280688684173412e-06,
      "loss": 0.4286,
      "step": 2317
    },
    {
      "epoch": 1.952274003368894,
      "grad_norm": 0.3136197626590729,
      "learning_rate": 3.276085861840106e-06,
      "loss": 0.4151,
      "step": 2318
    },
    {
      "epoch": 1.9531162268388544,
      "grad_norm": 0.32434752583503723,
      "learning_rate": 3.271484696918218e-06,
      "loss": 0.3734,
      "step": 2319
    },
    {
      "epoch": 1.9539584503088152,
      "grad_norm": 0.35659119486808777,
      "learning_rate": 3.2668851938314217e-06,
      "loss": 0.4409,
      "step": 2320
    },
    {
      "epoch": 1.954800673778776,
      "grad_norm": 0.33717280626296997,
      "learning_rate": 3.262287357001781e-06,
      "loss": 0.4275,
      "step": 2321
    },
    {
      "epoch": 1.9556428972487367,
      "grad_norm": 0.3311217129230499,
      "learning_rate": 3.2576911908497695e-06,
      "loss": 0.4164,
      "step": 2322
    },
    {
      "epoch": 1.9564851207186975,
      "grad_norm": 0.3252238631248474,
      "learning_rate": 3.253096699794245e-06,
      "loss": 0.4295,
      "step": 2323
    },
    {
      "epoch": 1.957327344188658,
      "grad_norm": 0.33747342228889465,
      "learning_rate": 3.248503888252461e-06,
      "loss": 0.4073,
      "step": 2324
    },
    {
      "epoch": 1.9581695676586186,
      "grad_norm": 0.34092071652412415,
      "learning_rate": 3.2439127606400546e-06,
      "loss": 0.3982,
      "step": 2325
    },
    {
      "epoch": 1.9590117911285794,
      "grad_norm": 0.3187452256679535,
      "learning_rate": 3.239323321371039e-06,
      "loss": 0.4084,
      "step": 2326
    },
    {
      "epoch": 1.9598540145985401,
      "grad_norm": 0.3614373207092285,
      "learning_rate": 3.2347355748578134e-06,
      "loss": 0.4713,
      "step": 2327
    },
    {
      "epoch": 1.960696238068501,
      "grad_norm": 0.32135477662086487,
      "learning_rate": 3.2301495255111426e-06,
      "loss": 0.3862,
      "step": 2328
    },
    {
      "epoch": 1.9615384615384617,
      "grad_norm": 0.3258622884750366,
      "learning_rate": 3.225565177740163e-06,
      "loss": 0.4295,
      "step": 2329
    },
    {
      "epoch": 1.9623806850084222,
      "grad_norm": 0.32498592138290405,
      "learning_rate": 3.2209825359523717e-06,
      "loss": 0.3939,
      "step": 2330
    },
    {
      "epoch": 1.9632229084783828,
      "grad_norm": 0.34039250016212463,
      "learning_rate": 3.2164016045536306e-06,
      "loss": 0.4205,
      "step": 2331
    },
    {
      "epoch": 1.9640651319483435,
      "grad_norm": 0.3229815661907196,
      "learning_rate": 3.2118223879481525e-06,
      "loss": 0.4132,
      "step": 2332
    },
    {
      "epoch": 1.9649073554183043,
      "grad_norm": 0.32059547305107117,
      "learning_rate": 3.2072448905385046e-06,
      "loss": 0.4179,
      "step": 2333
    },
    {
      "epoch": 1.965749578888265,
      "grad_norm": 0.3339865803718567,
      "learning_rate": 3.202669116725598e-06,
      "loss": 0.4227,
      "step": 2334
    },
    {
      "epoch": 1.9665918023582258,
      "grad_norm": 0.34816163778305054,
      "learning_rate": 3.1980950709086923e-06,
      "loss": 0.4248,
      "step": 2335
    },
    {
      "epoch": 1.9674340258281864,
      "grad_norm": 0.3554445505142212,
      "learning_rate": 3.193522757485378e-06,
      "loss": 0.4289,
      "step": 2336
    },
    {
      "epoch": 1.9682762492981472,
      "grad_norm": 0.3246705234050751,
      "learning_rate": 3.1889521808515888e-06,
      "loss": 0.3813,
      "step": 2337
    },
    {
      "epoch": 1.9691184727681077,
      "grad_norm": 0.3335620164871216,
      "learning_rate": 3.1843833454015804e-06,
      "loss": 0.4183,
      "step": 2338
    },
    {
      "epoch": 1.9699606962380685,
      "grad_norm": 0.3261442184448242,
      "learning_rate": 3.179816255527941e-06,
      "loss": 0.3888,
      "step": 2339
    },
    {
      "epoch": 1.9708029197080292,
      "grad_norm": 0.3700886070728302,
      "learning_rate": 3.1752509156215738e-06,
      "loss": 0.4223,
      "step": 2340
    },
    {
      "epoch": 1.97164514317799,
      "grad_norm": 0.3582298457622528,
      "learning_rate": 3.1706873300717094e-06,
      "loss": 0.4054,
      "step": 2341
    },
    {
      "epoch": 1.9724873666479505,
      "grad_norm": 0.2966936230659485,
      "learning_rate": 3.16612550326588e-06,
      "loss": 0.4062,
      "step": 2342
    },
    {
      "epoch": 1.9733295901179113,
      "grad_norm": 0.37263983488082886,
      "learning_rate": 3.1615654395899377e-06,
      "loss": 0.4294,
      "step": 2343
    },
    {
      "epoch": 1.9741718135878719,
      "grad_norm": 0.33031609654426575,
      "learning_rate": 3.1570071434280292e-06,
      "loss": 0.393,
      "step": 2344
    },
    {
      "epoch": 1.9750140370578326,
      "grad_norm": 0.33011654019355774,
      "learning_rate": 3.152450619162612e-06,
      "loss": 0.3965,
      "step": 2345
    },
    {
      "epoch": 1.9758562605277934,
      "grad_norm": 0.34393349289894104,
      "learning_rate": 3.1478958711744324e-06,
      "loss": 0.4331,
      "step": 2346
    },
    {
      "epoch": 1.9766984839977542,
      "grad_norm": 0.30916979908943176,
      "learning_rate": 3.1433429038425334e-06,
      "loss": 0.3933,
      "step": 2347
    },
    {
      "epoch": 1.977540707467715,
      "grad_norm": 0.32264065742492676,
      "learning_rate": 3.1387917215442427e-06,
      "loss": 0.4379,
      "step": 2348
    },
    {
      "epoch": 1.9783829309376755,
      "grad_norm": 0.34119778871536255,
      "learning_rate": 3.1342423286551756e-06,
      "loss": 0.4294,
      "step": 2349
    },
    {
      "epoch": 1.979225154407636,
      "grad_norm": 0.34695371985435486,
      "learning_rate": 3.1296947295492226e-06,
      "loss": 0.4227,
      "step": 2350
    },
    {
      "epoch": 1.9800673778775968,
      "grad_norm": 0.34029725193977356,
      "learning_rate": 3.125148928598554e-06,
      "loss": 0.4385,
      "step": 2351
    },
    {
      "epoch": 1.9809096013475576,
      "grad_norm": 0.32337313890457153,
      "learning_rate": 3.120604930173608e-06,
      "loss": 0.3693,
      "step": 2352
    },
    {
      "epoch": 1.9817518248175183,
      "grad_norm": 0.3419548273086548,
      "learning_rate": 3.116062738643092e-06,
      "loss": 0.4561,
      "step": 2353
    },
    {
      "epoch": 1.982594048287479,
      "grad_norm": 0.3310522139072418,
      "learning_rate": 3.1115223583739746e-06,
      "loss": 0.3872,
      "step": 2354
    },
    {
      "epoch": 1.9834362717574396,
      "grad_norm": 0.3474218547344208,
      "learning_rate": 3.1069837937314846e-06,
      "loss": 0.4181,
      "step": 2355
    },
    {
      "epoch": 1.9842784952274002,
      "grad_norm": 0.3278506100177765,
      "learning_rate": 3.1024470490791027e-06,
      "loss": 0.4372,
      "step": 2356
    },
    {
      "epoch": 1.985120718697361,
      "grad_norm": 0.31884655356407166,
      "learning_rate": 3.097912128778563e-06,
      "loss": 0.3855,
      "step": 2357
    },
    {
      "epoch": 1.9859629421673217,
      "grad_norm": 0.3464679419994354,
      "learning_rate": 3.093379037189842e-06,
      "loss": 0.4009,
      "step": 2358
    },
    {
      "epoch": 1.9868051656372825,
      "grad_norm": 0.333181768655777,
      "learning_rate": 3.0888477786711646e-06,
      "loss": 0.4123,
      "step": 2359
    },
    {
      "epoch": 1.9876473891072433,
      "grad_norm": 0.3394389748573303,
      "learning_rate": 3.0843183575789824e-06,
      "loss": 0.457,
      "step": 2360
    },
    {
      "epoch": 1.9884896125772038,
      "grad_norm": 0.2993851602077484,
      "learning_rate": 3.0797907782679944e-06,
      "loss": 0.386,
      "step": 2361
    },
    {
      "epoch": 1.9893318360471643,
      "grad_norm": 0.29469501972198486,
      "learning_rate": 3.075265045091114e-06,
      "loss": 0.3782,
      "step": 2362
    },
    {
      "epoch": 1.9901740595171251,
      "grad_norm": 0.36533671617507935,
      "learning_rate": 3.070741162399492e-06,
      "loss": 0.4869,
      "step": 2363
    },
    {
      "epoch": 1.9910162829870859,
      "grad_norm": 0.34510818123817444,
      "learning_rate": 3.0662191345424925e-06,
      "loss": 0.4216,
      "step": 2364
    },
    {
      "epoch": 1.9918585064570467,
      "grad_norm": 0.3161218464374542,
      "learning_rate": 3.061698965867701e-06,
      "loss": 0.4304,
      "step": 2365
    },
    {
      "epoch": 1.9927007299270074,
      "grad_norm": 0.3247872591018677,
      "learning_rate": 3.057180660720912e-06,
      "loss": 0.4071,
      "step": 2366
    },
    {
      "epoch": 1.993542953396968,
      "grad_norm": 0.2996198534965515,
      "learning_rate": 3.0526642234461313e-06,
      "loss": 0.3856,
      "step": 2367
    },
    {
      "epoch": 1.9943851768669287,
      "grad_norm": 0.36659252643585205,
      "learning_rate": 3.048149658385565e-06,
      "loss": 0.4326,
      "step": 2368
    },
    {
      "epoch": 1.9952274003368893,
      "grad_norm": 0.33244216442108154,
      "learning_rate": 3.043636969879625e-06,
      "loss": 0.4199,
      "step": 2369
    },
    {
      "epoch": 1.99606962380685,
      "grad_norm": 0.34029316902160645,
      "learning_rate": 3.039126162266912e-06,
      "loss": 0.4253,
      "step": 2370
    },
    {
      "epoch": 1.9969118472768108,
      "grad_norm": 0.3124338686466217,
      "learning_rate": 3.0346172398842254e-06,
      "loss": 0.4165,
      "step": 2371
    },
    {
      "epoch": 1.9977540707467716,
      "grad_norm": 0.3335796892642975,
      "learning_rate": 3.0301102070665466e-06,
      "loss": 0.4274,
      "step": 2372
    },
    {
      "epoch": 1.9985962942167321,
      "grad_norm": 0.3069957196712494,
      "learning_rate": 3.0256050681470446e-06,
      "loss": 0.3827,
      "step": 2373
    },
    {
      "epoch": 1.999438517686693,
      "grad_norm": 0.3421737849712372,
      "learning_rate": 3.0211018274570625e-06,
      "loss": 0.4283,
      "step": 2374
    },
    {
      "epoch": 2.0002807411566534,
      "grad_norm": 0.6979325413703918,
      "learning_rate": 3.0166004893261247e-06,
      "loss": 0.6577,
      "step": 2375
    },
    {
      "epoch": 2.001122964626614,
      "grad_norm": 0.2916451394557953,
      "learning_rate": 3.012101058081919e-06,
      "loss": 0.3507,
      "step": 2376
    },
    {
      "epoch": 2.001965188096575,
      "grad_norm": 0.33503973484039307,
      "learning_rate": 3.007603538050309e-06,
      "loss": 0.4168,
      "step": 2377
    },
    {
      "epoch": 2.0028074115665357,
      "grad_norm": 0.3224446773529053,
      "learning_rate": 3.0031079335553097e-06,
      "loss": 0.3417,
      "step": 2378
    },
    {
      "epoch": 2.0036496350364965,
      "grad_norm": 0.3372901678085327,
      "learning_rate": 2.9986142489191074e-06,
      "loss": 0.4373,
      "step": 2379
    },
    {
      "epoch": 2.004491858506457,
      "grad_norm": 0.3095046877861023,
      "learning_rate": 2.994122488462029e-06,
      "loss": 0.3856,
      "step": 2380
    },
    {
      "epoch": 2.0053340819764176,
      "grad_norm": 0.30631381273269653,
      "learning_rate": 2.989632656502564e-06,
      "loss": 0.3697,
      "step": 2381
    },
    {
      "epoch": 2.0061763054463784,
      "grad_norm": 0.3306558132171631,
      "learning_rate": 2.9851447573573383e-06,
      "loss": 0.402,
      "step": 2382
    },
    {
      "epoch": 2.007018528916339,
      "grad_norm": 0.3114640712738037,
      "learning_rate": 2.980658795341125e-06,
      "loss": 0.3799,
      "step": 2383
    },
    {
      "epoch": 2.0078607523863,
      "grad_norm": 0.3301217257976532,
      "learning_rate": 2.9761747747668314e-06,
      "loss": 0.4128,
      "step": 2384
    },
    {
      "epoch": 2.0087029758562607,
      "grad_norm": 0.33894869685173035,
      "learning_rate": 2.971692699945502e-06,
      "loss": 0.4233,
      "step": 2385
    },
    {
      "epoch": 2.0095451993262214,
      "grad_norm": 0.3198185861110687,
      "learning_rate": 2.9672125751863067e-06,
      "loss": 0.3784,
      "step": 2386
    },
    {
      "epoch": 2.0103874227961818,
      "grad_norm": 0.3345125615596771,
      "learning_rate": 2.9627344047965433e-06,
      "loss": 0.3579,
      "step": 2387
    },
    {
      "epoch": 2.0112296462661425,
      "grad_norm": 0.3559136390686035,
      "learning_rate": 2.958258193081629e-06,
      "loss": 0.4647,
      "step": 2388
    },
    {
      "epoch": 2.0120718697361033,
      "grad_norm": 0.30635929107666016,
      "learning_rate": 2.9537839443451e-06,
      "loss": 0.3463,
      "step": 2389
    },
    {
      "epoch": 2.012914093206064,
      "grad_norm": 0.33349528908729553,
      "learning_rate": 2.949311662888601e-06,
      "loss": 0.3812,
      "step": 2390
    },
    {
      "epoch": 2.013756316676025,
      "grad_norm": 0.3638151288032532,
      "learning_rate": 2.9448413530118912e-06,
      "loss": 0.3969,
      "step": 2391
    },
    {
      "epoch": 2.0145985401459856,
      "grad_norm": 0.34058448672294617,
      "learning_rate": 2.94037301901283e-06,
      "loss": 0.3986,
      "step": 2392
    },
    {
      "epoch": 2.015440763615946,
      "grad_norm": 0.2870912253856659,
      "learning_rate": 2.935906665187378e-06,
      "loss": 0.3532,
      "step": 2393
    },
    {
      "epoch": 2.0162829870859067,
      "grad_norm": 0.3189200460910797,
      "learning_rate": 2.9314422958295906e-06,
      "loss": 0.3981,
      "step": 2394
    },
    {
      "epoch": 2.0171252105558675,
      "grad_norm": 0.3521755635738373,
      "learning_rate": 2.9269799152316226e-06,
      "loss": 0.4105,
      "step": 2395
    },
    {
      "epoch": 2.0179674340258282,
      "grad_norm": 0.32815036177635193,
      "learning_rate": 2.922519527683706e-06,
      "loss": 0.4169,
      "step": 2396
    },
    {
      "epoch": 2.018809657495789,
      "grad_norm": 0.30524566769599915,
      "learning_rate": 2.9180611374741623e-06,
      "loss": 0.3608,
      "step": 2397
    },
    {
      "epoch": 2.0196518809657498,
      "grad_norm": 0.29507043957710266,
      "learning_rate": 2.913604748889395e-06,
      "loss": 0.3588,
      "step": 2398
    },
    {
      "epoch": 2.02049410443571,
      "grad_norm": 0.33840590715408325,
      "learning_rate": 2.9091503662138764e-06,
      "loss": 0.436,
      "step": 2399
    },
    {
      "epoch": 2.021336327905671,
      "grad_norm": 0.3247450888156891,
      "learning_rate": 2.904697993730159e-06,
      "loss": 0.3918,
      "step": 2400
    },
    {
      "epoch": 2.0221785513756316,
      "grad_norm": 0.35062626004219055,
      "learning_rate": 2.900247635718856e-06,
      "loss": 0.405,
      "step": 2401
    },
    {
      "epoch": 2.0230207748455924,
      "grad_norm": 0.28334277868270874,
      "learning_rate": 2.8957992964586445e-06,
      "loss": 0.3459,
      "step": 2402
    },
    {
      "epoch": 2.023862998315553,
      "grad_norm": 0.3211638927459717,
      "learning_rate": 2.891352980226262e-06,
      "loss": 0.402,
      "step": 2403
    },
    {
      "epoch": 2.024705221785514,
      "grad_norm": 0.28249630331993103,
      "learning_rate": 2.886908691296504e-06,
      "loss": 0.3375,
      "step": 2404
    },
    {
      "epoch": 2.0255474452554743,
      "grad_norm": 0.29784122109413147,
      "learning_rate": 2.8824664339422115e-06,
      "loss": 0.3809,
      "step": 2405
    },
    {
      "epoch": 2.026389668725435,
      "grad_norm": 0.32199567556381226,
      "learning_rate": 2.8780262124342755e-06,
      "loss": 0.3398,
      "step": 2406
    },
    {
      "epoch": 2.027231892195396,
      "grad_norm": 0.33898991346359253,
      "learning_rate": 2.873588031041627e-06,
      "loss": 0.4446,
      "step": 2407
    },
    {
      "epoch": 2.0280741156653566,
      "grad_norm": 0.3014601469039917,
      "learning_rate": 2.8691518940312413e-06,
      "loss": 0.42,
      "step": 2408
    },
    {
      "epoch": 2.0289163391353173,
      "grad_norm": 0.3043738603591919,
      "learning_rate": 2.8647178056681197e-06,
      "loss": 0.3787,
      "step": 2409
    },
    {
      "epoch": 2.029758562605278,
      "grad_norm": 0.387542724609375,
      "learning_rate": 2.8602857702153054e-06,
      "loss": 0.405,
      "step": 2410
    },
    {
      "epoch": 2.0306007860752384,
      "grad_norm": 0.35983800888061523,
      "learning_rate": 2.8558557919338537e-06,
      "loss": 0.3902,
      "step": 2411
    },
    {
      "epoch": 2.031443009545199,
      "grad_norm": 0.3184964060783386,
      "learning_rate": 2.8514278750828537e-06,
      "loss": 0.3661,
      "step": 2412
    },
    {
      "epoch": 2.03228523301516,
      "grad_norm": 0.3535210192203522,
      "learning_rate": 2.847002023919406e-06,
      "loss": 0.4287,
      "step": 2413
    },
    {
      "epoch": 2.0331274564851207,
      "grad_norm": 0.37853676080703735,
      "learning_rate": 2.8425782426986304e-06,
      "loss": 0.4244,
      "step": 2414
    },
    {
      "epoch": 2.0339696799550815,
      "grad_norm": 0.3390139937400818,
      "learning_rate": 2.838156535673652e-06,
      "loss": 0.3879,
      "step": 2415
    },
    {
      "epoch": 2.0348119034250423,
      "grad_norm": 0.3345479965209961,
      "learning_rate": 2.833736907095604e-06,
      "loss": 0.3977,
      "step": 2416
    },
    {
      "epoch": 2.035654126895003,
      "grad_norm": 0.33563995361328125,
      "learning_rate": 2.8293193612136183e-06,
      "loss": 0.3943,
      "step": 2417
    },
    {
      "epoch": 2.0364963503649633,
      "grad_norm": 0.35232239961624146,
      "learning_rate": 2.8249039022748315e-06,
      "loss": 0.3999,
      "step": 2418
    },
    {
      "epoch": 2.037338573834924,
      "grad_norm": 0.34022974967956543,
      "learning_rate": 2.8204905345243664e-06,
      "loss": 0.3919,
      "step": 2419
    },
    {
      "epoch": 2.038180797304885,
      "grad_norm": 0.30851611495018005,
      "learning_rate": 2.816079262205339e-06,
      "loss": 0.3746,
      "step": 2420
    },
    {
      "epoch": 2.0390230207748457,
      "grad_norm": 0.3183887004852295,
      "learning_rate": 2.8116700895588473e-06,
      "loss": 0.3975,
      "step": 2421
    },
    {
      "epoch": 2.0398652442448064,
      "grad_norm": 0.3174850642681122,
      "learning_rate": 2.807263020823977e-06,
      "loss": 0.3897,
      "step": 2422
    },
    {
      "epoch": 2.040707467714767,
      "grad_norm": 0.30304619669914246,
      "learning_rate": 2.8028580602377852e-06,
      "loss": 0.3788,
      "step": 2423
    },
    {
      "epoch": 2.0415496911847275,
      "grad_norm": 0.317258358001709,
      "learning_rate": 2.798455212035305e-06,
      "loss": 0.4023,
      "step": 2424
    },
    {
      "epoch": 2.0423919146546883,
      "grad_norm": 0.3165973424911499,
      "learning_rate": 2.7940544804495345e-06,
      "loss": 0.3788,
      "step": 2425
    },
    {
      "epoch": 2.043234138124649,
      "grad_norm": 0.3063352406024933,
      "learning_rate": 2.789655869711445e-06,
      "loss": 0.3491,
      "step": 2426
    },
    {
      "epoch": 2.04407636159461,
      "grad_norm": 0.3284105360507965,
      "learning_rate": 2.785259384049959e-06,
      "loss": 0.4223,
      "step": 2427
    },
    {
      "epoch": 2.0449185850645706,
      "grad_norm": 0.33078718185424805,
      "learning_rate": 2.780865027691968e-06,
      "loss": 0.367,
      "step": 2428
    },
    {
      "epoch": 2.0457608085345313,
      "grad_norm": 0.313933789730072,
      "learning_rate": 2.7764728048623003e-06,
      "loss": 0.4117,
      "step": 2429
    },
    {
      "epoch": 2.0466030320044917,
      "grad_norm": 0.31020480394363403,
      "learning_rate": 2.7720827197837475e-06,
      "loss": 0.4125,
      "step": 2430
    },
    {
      "epoch": 2.0474452554744524,
      "grad_norm": 0.3092164695262909,
      "learning_rate": 2.7676947766770367e-06,
      "loss": 0.3861,
      "step": 2431
    },
    {
      "epoch": 2.048287478944413,
      "grad_norm": 0.31776222586631775,
      "learning_rate": 2.7633089797608435e-06,
      "loss": 0.4174,
      "step": 2432
    },
    {
      "epoch": 2.049129702414374,
      "grad_norm": 0.2995806038379669,
      "learning_rate": 2.7589253332517736e-06,
      "loss": 0.3681,
      "step": 2433
    },
    {
      "epoch": 2.0499719258843347,
      "grad_norm": 0.33012688159942627,
      "learning_rate": 2.7545438413643666e-06,
      "loss": 0.3927,
      "step": 2434
    },
    {
      "epoch": 2.0508141493542955,
      "grad_norm": 0.345049649477005,
      "learning_rate": 2.7501645083110893e-06,
      "loss": 0.3975,
      "step": 2435
    },
    {
      "epoch": 2.051656372824256,
      "grad_norm": 0.3037142753601074,
      "learning_rate": 2.745787338302341e-06,
      "loss": 0.3983,
      "step": 2436
    },
    {
      "epoch": 2.0524985962942166,
      "grad_norm": 0.31277570128440857,
      "learning_rate": 2.741412335546431e-06,
      "loss": 0.4003,
      "step": 2437
    },
    {
      "epoch": 2.0533408197641774,
      "grad_norm": 0.31544387340545654,
      "learning_rate": 2.7370395042495913e-06,
      "loss": 0.3778,
      "step": 2438
    },
    {
      "epoch": 2.054183043234138,
      "grad_norm": 0.2784262001514435,
      "learning_rate": 2.7326688486159613e-06,
      "loss": 0.3572,
      "step": 2439
    },
    {
      "epoch": 2.055025266704099,
      "grad_norm": 0.30272725224494934,
      "learning_rate": 2.7283003728475952e-06,
      "loss": 0.3816,
      "step": 2440
    },
    {
      "epoch": 2.0558674901740597,
      "grad_norm": 0.33540278673171997,
      "learning_rate": 2.7239340811444476e-06,
      "loss": 0.4126,
      "step": 2441
    },
    {
      "epoch": 2.05670971364402,
      "grad_norm": 0.3455903232097626,
      "learning_rate": 2.7195699777043723e-06,
      "loss": 0.4193,
      "step": 2442
    },
    {
      "epoch": 2.0575519371139808,
      "grad_norm": 0.30142679810523987,
      "learning_rate": 2.7152080667231185e-06,
      "loss": 0.3836,
      "step": 2443
    },
    {
      "epoch": 2.0583941605839415,
      "grad_norm": 0.3111274242401123,
      "learning_rate": 2.710848352394334e-06,
      "loss": 0.3975,
      "step": 2444
    },
    {
      "epoch": 2.0592363840539023,
      "grad_norm": 0.2949243187904358,
      "learning_rate": 2.706490838909547e-06,
      "loss": 0.3465,
      "step": 2445
    },
    {
      "epoch": 2.060078607523863,
      "grad_norm": 0.3270438313484192,
      "learning_rate": 2.7021355304581765e-06,
      "loss": 0.3967,
      "step": 2446
    },
    {
      "epoch": 2.060920830993824,
      "grad_norm": 0.31983643770217896,
      "learning_rate": 2.6977824312275123e-06,
      "loss": 0.3926,
      "step": 2447
    },
    {
      "epoch": 2.0617630544637846,
      "grad_norm": 0.30299797654151917,
      "learning_rate": 2.6934315454027323e-06,
      "loss": 0.4122,
      "step": 2448
    },
    {
      "epoch": 2.062605277933745,
      "grad_norm": 0.31638631224632263,
      "learning_rate": 2.6890828771668742e-06,
      "loss": 0.417,
      "step": 2449
    },
    {
      "epoch": 2.0634475014037057,
      "grad_norm": 0.3103678822517395,
      "learning_rate": 2.684736430700854e-06,
      "loss": 0.3922,
      "step": 2450
    },
    {
      "epoch": 2.0642897248736665,
      "grad_norm": 0.29586607217788696,
      "learning_rate": 2.680392210183446e-06,
      "loss": 0.3811,
      "step": 2451
    },
    {
      "epoch": 2.0651319483436272,
      "grad_norm": 0.31447890400886536,
      "learning_rate": 2.6760502197912842e-06,
      "loss": 0.3913,
      "step": 2452
    },
    {
      "epoch": 2.065974171813588,
      "grad_norm": 0.3338525593280792,
      "learning_rate": 2.671710463698859e-06,
      "loss": 0.3955,
      "step": 2453
    },
    {
      "epoch": 2.0668163952835488,
      "grad_norm": 0.3043944239616394,
      "learning_rate": 2.6673729460785174e-06,
      "loss": 0.3466,
      "step": 2454
    },
    {
      "epoch": 2.067658618753509,
      "grad_norm": 0.3382345139980316,
      "learning_rate": 2.663037671100448e-06,
      "loss": 0.4254,
      "step": 2455
    },
    {
      "epoch": 2.06850084222347,
      "grad_norm": 0.31396129727363586,
      "learning_rate": 2.6587046429326855e-06,
      "loss": 0.3716,
      "step": 2456
    },
    {
      "epoch": 2.0693430656934306,
      "grad_norm": 0.28630709648132324,
      "learning_rate": 2.6543738657411033e-06,
      "loss": 0.3597,
      "step": 2457
    },
    {
      "epoch": 2.0701852891633914,
      "grad_norm": 0.3135938048362732,
      "learning_rate": 2.6500453436894157e-06,
      "loss": 0.4028,
      "step": 2458
    },
    {
      "epoch": 2.071027512633352,
      "grad_norm": 0.2959445118904114,
      "learning_rate": 2.6457190809391627e-06,
      "loss": 0.3658,
      "step": 2459
    },
    {
      "epoch": 2.071869736103313,
      "grad_norm": 0.29808199405670166,
      "learning_rate": 2.6413950816497146e-06,
      "loss": 0.4042,
      "step": 2460
    },
    {
      "epoch": 2.0727119595732733,
      "grad_norm": 0.3044230341911316,
      "learning_rate": 2.6370733499782654e-06,
      "loss": 0.3952,
      "step": 2461
    },
    {
      "epoch": 2.073554183043234,
      "grad_norm": 0.2965955138206482,
      "learning_rate": 2.6327538900798306e-06,
      "loss": 0.3502,
      "step": 2462
    },
    {
      "epoch": 2.074396406513195,
      "grad_norm": 0.3124077022075653,
      "learning_rate": 2.628436706107238e-06,
      "loss": 0.4125,
      "step": 2463
    },
    {
      "epoch": 2.0752386299831556,
      "grad_norm": 0.30966681241989136,
      "learning_rate": 2.6241218022111336e-06,
      "loss": 0.4028,
      "step": 2464
    },
    {
      "epoch": 2.0760808534531163,
      "grad_norm": 0.31093326210975647,
      "learning_rate": 2.6198091825399606e-06,
      "loss": 0.3908,
      "step": 2465
    },
    {
      "epoch": 2.076923076923077,
      "grad_norm": 0.3148452639579773,
      "learning_rate": 2.6154988512399784e-06,
      "loss": 0.3651,
      "step": 2466
    },
    {
      "epoch": 2.0777653003930374,
      "grad_norm": 0.31576019525527954,
      "learning_rate": 2.6111908124552355e-06,
      "loss": 0.3727,
      "step": 2467
    },
    {
      "epoch": 2.078607523862998,
      "grad_norm": 0.31384697556495667,
      "learning_rate": 2.6068850703275856e-06,
      "loss": 0.3756,
      "step": 2468
    },
    {
      "epoch": 2.079449747332959,
      "grad_norm": 0.32771238684654236,
      "learning_rate": 2.6025816289966703e-06,
      "loss": 0.4306,
      "step": 2469
    },
    {
      "epoch": 2.0802919708029197,
      "grad_norm": 0.2917294204235077,
      "learning_rate": 2.598280492599913e-06,
      "loss": 0.3702,
      "step": 2470
    },
    {
      "epoch": 2.0811341942728805,
      "grad_norm": 0.30116191506385803,
      "learning_rate": 2.5939816652725324e-06,
      "loss": 0.3963,
      "step": 2471
    },
    {
      "epoch": 2.0819764177428413,
      "grad_norm": 0.3046509027481079,
      "learning_rate": 2.5896851511475184e-06,
      "loss": 0.4043,
      "step": 2472
    },
    {
      "epoch": 2.0828186412128016,
      "grad_norm": 0.29755252599716187,
      "learning_rate": 2.5853909543556444e-06,
      "loss": 0.3448,
      "step": 2473
    },
    {
      "epoch": 2.0836608646827623,
      "grad_norm": 0.3319643437862396,
      "learning_rate": 2.5810990790254486e-06,
      "loss": 0.4152,
      "step": 2474
    },
    {
      "epoch": 2.084503088152723,
      "grad_norm": 0.30071091651916504,
      "learning_rate": 2.5768095292832412e-06,
      "loss": 0.3743,
      "step": 2475
    },
    {
      "epoch": 2.085345311622684,
      "grad_norm": 0.30836713314056396,
      "learning_rate": 2.5725223092530937e-06,
      "loss": 0.3881,
      "step": 2476
    },
    {
      "epoch": 2.0861875350926447,
      "grad_norm": 0.30034932494163513,
      "learning_rate": 2.568237423056844e-06,
      "loss": 0.3679,
      "step": 2477
    },
    {
      "epoch": 2.0870297585626054,
      "grad_norm": 0.33954447507858276,
      "learning_rate": 2.5639548748140803e-06,
      "loss": 0.3818,
      "step": 2478
    },
    {
      "epoch": 2.087871982032566,
      "grad_norm": 0.321036696434021,
      "learning_rate": 2.5596746686421436e-06,
      "loss": 0.3879,
      "step": 2479
    },
    {
      "epoch": 2.0887142055025265,
      "grad_norm": 0.34285712242126465,
      "learning_rate": 2.5553968086561244e-06,
      "loss": 0.4307,
      "step": 2480
    },
    {
      "epoch": 2.0895564289724873,
      "grad_norm": 0.2989185154438019,
      "learning_rate": 2.5511212989688587e-06,
      "loss": 0.3521,
      "step": 2481
    },
    {
      "epoch": 2.090398652442448,
      "grad_norm": 0.3268737196922302,
      "learning_rate": 2.546848143690922e-06,
      "loss": 0.3981,
      "step": 2482
    },
    {
      "epoch": 2.091240875912409,
      "grad_norm": 0.32022973895072937,
      "learning_rate": 2.5425773469306247e-06,
      "loss": 0.3619,
      "step": 2483
    },
    {
      "epoch": 2.0920830993823696,
      "grad_norm": 0.33728864789009094,
      "learning_rate": 2.5383089127940087e-06,
      "loss": 0.3724,
      "step": 2484
    },
    {
      "epoch": 2.0929253228523303,
      "grad_norm": 0.28090474009513855,
      "learning_rate": 2.534042845384851e-06,
      "loss": 0.3508,
      "step": 2485
    },
    {
      "epoch": 2.0937675463222907,
      "grad_norm": 0.34956198930740356,
      "learning_rate": 2.5297791488046445e-06,
      "loss": 0.384,
      "step": 2486
    },
    {
      "epoch": 2.0946097697922514,
      "grad_norm": 0.3604210615158081,
      "learning_rate": 2.525517827152614e-06,
      "loss": 0.4196,
      "step": 2487
    },
    {
      "epoch": 2.095451993262212,
      "grad_norm": 0.31995895504951477,
      "learning_rate": 2.5212588845256837e-06,
      "loss": 0.3649,
      "step": 2488
    },
    {
      "epoch": 2.096294216732173,
      "grad_norm": 0.3351212441921234,
      "learning_rate": 2.517002325018508e-06,
      "loss": 0.3753,
      "step": 2489
    },
    {
      "epoch": 2.0971364402021337,
      "grad_norm": 0.30865800380706787,
      "learning_rate": 2.5127481527234397e-06,
      "loss": 0.3754,
      "step": 2490
    },
    {
      "epoch": 2.0979786636720945,
      "grad_norm": 0.35669586062431335,
      "learning_rate": 2.508496371730543e-06,
      "loss": 0.472,
      "step": 2491
    },
    {
      "epoch": 2.098820887142055,
      "grad_norm": 0.2910619080066681,
      "learning_rate": 2.5042469861275768e-06,
      "loss": 0.332,
      "step": 2492
    },
    {
      "epoch": 2.0996631106120156,
      "grad_norm": 0.3080119788646698,
      "learning_rate": 2.5000000000000015e-06,
      "loss": 0.3562,
      "step": 2493
    },
    {
      "epoch": 2.1005053340819764,
      "grad_norm": 0.32427725195884705,
      "learning_rate": 2.4957554174309655e-06,
      "loss": 0.4094,
      "step": 2494
    },
    {
      "epoch": 2.101347557551937,
      "grad_norm": 0.319868266582489,
      "learning_rate": 2.491513242501315e-06,
      "loss": 0.3954,
      "step": 2495
    },
    {
      "epoch": 2.102189781021898,
      "grad_norm": 0.3312855362892151,
      "learning_rate": 2.487273479289574e-06,
      "loss": 0.3649,
      "step": 2496
    },
    {
      "epoch": 2.1030320044918587,
      "grad_norm": 0.31714558601379395,
      "learning_rate": 2.4830361318719493e-06,
      "loss": 0.4038,
      "step": 2497
    },
    {
      "epoch": 2.103874227961819,
      "grad_norm": 0.2809481620788574,
      "learning_rate": 2.4788012043223253e-06,
      "loss": 0.3505,
      "step": 2498
    },
    {
      "epoch": 2.1047164514317798,
      "grad_norm": 0.3540138304233551,
      "learning_rate": 2.4745687007122636e-06,
      "loss": 0.4494,
      "step": 2499
    },
    {
      "epoch": 2.1055586749017405,
      "grad_norm": 0.3482866585254669,
      "learning_rate": 2.470338625110991e-06,
      "loss": 0.4007,
      "step": 2500
    },
    {
      "epoch": 2.1064008983717013,
      "grad_norm": 0.3097721040248871,
      "learning_rate": 2.4661109815854005e-06,
      "loss": 0.3862,
      "step": 2501
    },
    {
      "epoch": 2.107243121841662,
      "grad_norm": 0.3179652690887451,
      "learning_rate": 2.4618857742000463e-06,
      "loss": 0.3762,
      "step": 2502
    },
    {
      "epoch": 2.108085345311623,
      "grad_norm": 0.33774158358573914,
      "learning_rate": 2.4576630070171447e-06,
      "loss": 0.3795,
      "step": 2503
    },
    {
      "epoch": 2.108927568781583,
      "grad_norm": 0.32899728417396545,
      "learning_rate": 2.4534426840965604e-06,
      "loss": 0.3739,
      "step": 2504
    },
    {
      "epoch": 2.109769792251544,
      "grad_norm": 0.3247832953929901,
      "learning_rate": 2.449224809495815e-06,
      "loss": 0.4114,
      "step": 2505
    },
    {
      "epoch": 2.1106120157215047,
      "grad_norm": 0.2894893288612366,
      "learning_rate": 2.4450093872700648e-06,
      "loss": 0.3683,
      "step": 2506
    },
    {
      "epoch": 2.1114542391914655,
      "grad_norm": 0.3101656436920166,
      "learning_rate": 2.440796421472122e-06,
      "loss": 0.4012,
      "step": 2507
    },
    {
      "epoch": 2.1122964626614262,
      "grad_norm": 0.3071208894252777,
      "learning_rate": 2.436585916152426e-06,
      "loss": 0.394,
      "step": 2508
    },
    {
      "epoch": 2.113138686131387,
      "grad_norm": 0.30802860856056213,
      "learning_rate": 2.4323778753590582e-06,
      "loss": 0.4173,
      "step": 2509
    },
    {
      "epoch": 2.1139809096013478,
      "grad_norm": 0.30354928970336914,
      "learning_rate": 2.4281723031377275e-06,
      "loss": 0.3737,
      "step": 2510
    },
    {
      "epoch": 2.114823133071308,
      "grad_norm": 0.3168238699436188,
      "learning_rate": 2.423969203531768e-06,
      "loss": 0.3902,
      "step": 2511
    },
    {
      "epoch": 2.115665356541269,
      "grad_norm": 0.30449843406677246,
      "learning_rate": 2.419768580582137e-06,
      "loss": 0.3685,
      "step": 2512
    },
    {
      "epoch": 2.1165075800112296,
      "grad_norm": 0.3029516637325287,
      "learning_rate": 2.4155704383274154e-06,
      "loss": 0.4033,
      "step": 2513
    },
    {
      "epoch": 2.1173498034811904,
      "grad_norm": 0.35437849164009094,
      "learning_rate": 2.411374780803793e-06,
      "loss": 0.425,
      "step": 2514
    },
    {
      "epoch": 2.118192026951151,
      "grad_norm": 0.3117205798625946,
      "learning_rate": 2.4071816120450742e-06,
      "loss": 0.4056,
      "step": 2515
    },
    {
      "epoch": 2.119034250421112,
      "grad_norm": 0.29806411266326904,
      "learning_rate": 2.402990936082667e-06,
      "loss": 0.3552,
      "step": 2516
    },
    {
      "epoch": 2.1198764738910723,
      "grad_norm": 0.31308627128601074,
      "learning_rate": 2.3988027569455895e-06,
      "loss": 0.419,
      "step": 2517
    },
    {
      "epoch": 2.120718697361033,
      "grad_norm": 0.2987107038497925,
      "learning_rate": 2.3946170786604526e-06,
      "loss": 0.3937,
      "step": 2518
    },
    {
      "epoch": 2.121560920830994,
      "grad_norm": 0.3022991418838501,
      "learning_rate": 2.390433905251467e-06,
      "loss": 0.4134,
      "step": 2519
    },
    {
      "epoch": 2.1224031443009546,
      "grad_norm": 0.32321813702583313,
      "learning_rate": 2.3862532407404306e-06,
      "loss": 0.397,
      "step": 2520
    },
    {
      "epoch": 2.1232453677709153,
      "grad_norm": 0.30526119470596313,
      "learning_rate": 2.3820750891467355e-06,
      "loss": 0.3839,
      "step": 2521
    },
    {
      "epoch": 2.124087591240876,
      "grad_norm": 0.32280218601226807,
      "learning_rate": 2.377899454487351e-06,
      "loss": 0.3866,
      "step": 2522
    },
    {
      "epoch": 2.1249298147108364,
      "grad_norm": 0.30442073941230774,
      "learning_rate": 2.373726340776837e-06,
      "loss": 0.3925,
      "step": 2523
    },
    {
      "epoch": 2.125772038180797,
      "grad_norm": 0.36591842770576477,
      "learning_rate": 2.369555752027313e-06,
      "loss": 0.414,
      "step": 2524
    },
    {
      "epoch": 2.126614261650758,
      "grad_norm": 0.3297121524810791,
      "learning_rate": 2.365387692248488e-06,
      "loss": 0.4171,
      "step": 2525
    },
    {
      "epoch": 2.1274564851207187,
      "grad_norm": 0.30282390117645264,
      "learning_rate": 2.361222165447628e-06,
      "loss": 0.3633,
      "step": 2526
    },
    {
      "epoch": 2.1282987085906795,
      "grad_norm": 0.3258180022239685,
      "learning_rate": 2.3570591756295717e-06,
      "loss": 0.4034,
      "step": 2527
    },
    {
      "epoch": 2.1291409320606403,
      "grad_norm": 0.3188164234161377,
      "learning_rate": 2.3528987267967135e-06,
      "loss": 0.347,
      "step": 2528
    },
    {
      "epoch": 2.1299831555306006,
      "grad_norm": 0.3422144949436188,
      "learning_rate": 2.348740822949006e-06,
      "loss": 0.4135,
      "step": 2529
    },
    {
      "epoch": 2.1308253790005613,
      "grad_norm": 0.31077733635902405,
      "learning_rate": 2.3445854680839534e-06,
      "loss": 0.3686,
      "step": 2530
    },
    {
      "epoch": 2.131667602470522,
      "grad_norm": 0.27454495429992676,
      "learning_rate": 2.3404326661966148e-06,
      "loss": 0.3612,
      "step": 2531
    },
    {
      "epoch": 2.132509825940483,
      "grad_norm": 0.3350626528263092,
      "learning_rate": 2.33628242127959e-06,
      "loss": 0.4198,
      "step": 2532
    },
    {
      "epoch": 2.1333520494104437,
      "grad_norm": 0.31338322162628174,
      "learning_rate": 2.33213473732302e-06,
      "loss": 0.408,
      "step": 2533
    },
    {
      "epoch": 2.1341942728804044,
      "grad_norm": 0.3182331621646881,
      "learning_rate": 2.3279896183145857e-06,
      "loss": 0.3581,
      "step": 2534
    },
    {
      "epoch": 2.1350364963503647,
      "grad_norm": 0.30024346709251404,
      "learning_rate": 2.323847068239504e-06,
      "loss": 0.3804,
      "step": 2535
    },
    {
      "epoch": 2.1358787198203255,
      "grad_norm": 0.3235088288784027,
      "learning_rate": 2.319707091080517e-06,
      "loss": 0.4173,
      "step": 2536
    },
    {
      "epoch": 2.1367209432902863,
      "grad_norm": 0.3326219618320465,
      "learning_rate": 2.3155696908178974e-06,
      "loss": 0.3852,
      "step": 2537
    },
    {
      "epoch": 2.137563166760247,
      "grad_norm": 0.3088136911392212,
      "learning_rate": 2.3114348714294355e-06,
      "loss": 0.3709,
      "step": 2538
    },
    {
      "epoch": 2.138405390230208,
      "grad_norm": 0.35366424918174744,
      "learning_rate": 2.3073026368904478e-06,
      "loss": 0.3734,
      "step": 2539
    },
    {
      "epoch": 2.1392476137001686,
      "grad_norm": 0.2992304265499115,
      "learning_rate": 2.3031729911737576e-06,
      "loss": 0.3713,
      "step": 2540
    },
    {
      "epoch": 2.1400898371701293,
      "grad_norm": 0.31421947479248047,
      "learning_rate": 2.2990459382497086e-06,
      "loss": 0.4093,
      "step": 2541
    },
    {
      "epoch": 2.1409320606400897,
      "grad_norm": 0.30632284283638,
      "learning_rate": 2.2949214820861403e-06,
      "loss": 0.4064,
      "step": 2542
    },
    {
      "epoch": 2.1417742841100504,
      "grad_norm": 0.3092319667339325,
      "learning_rate": 2.290799626648402e-06,
      "loss": 0.3567,
      "step": 2543
    },
    {
      "epoch": 2.142616507580011,
      "grad_norm": 0.34475943446159363,
      "learning_rate": 2.2866803758993446e-06,
      "loss": 0.4249,
      "step": 2544
    },
    {
      "epoch": 2.143458731049972,
      "grad_norm": 0.30626732110977173,
      "learning_rate": 2.2825637337993094e-06,
      "loss": 0.3805,
      "step": 2545
    },
    {
      "epoch": 2.1443009545199327,
      "grad_norm": 0.340401291847229,
      "learning_rate": 2.2784497043061384e-06,
      "loss": 0.4077,
      "step": 2546
    },
    {
      "epoch": 2.1451431779898935,
      "grad_norm": 0.29327282309532166,
      "learning_rate": 2.274338291375147e-06,
      "loss": 0.3699,
      "step": 2547
    },
    {
      "epoch": 2.145985401459854,
      "grad_norm": 0.3318997621536255,
      "learning_rate": 2.2702294989591513e-06,
      "loss": 0.4256,
      "step": 2548
    },
    {
      "epoch": 2.1468276249298146,
      "grad_norm": 0.3067331910133362,
      "learning_rate": 2.266123331008436e-06,
      "loss": 0.3715,
      "step": 2549
    },
    {
      "epoch": 2.1476698483997754,
      "grad_norm": 0.3534662425518036,
      "learning_rate": 2.262019791470772e-06,
      "loss": 0.4175,
      "step": 2550
    },
    {
      "epoch": 2.148512071869736,
      "grad_norm": 0.3536500036716461,
      "learning_rate": 2.257918884291392e-06,
      "loss": 0.3711,
      "step": 2551
    },
    {
      "epoch": 2.149354295339697,
      "grad_norm": 0.3196483850479126,
      "learning_rate": 2.253820613413009e-06,
      "loss": 0.4055,
      "step": 2552
    },
    {
      "epoch": 2.1501965188096577,
      "grad_norm": 0.31786462664604187,
      "learning_rate": 2.2497249827757933e-06,
      "loss": 0.3946,
      "step": 2553
    },
    {
      "epoch": 2.151038742279618,
      "grad_norm": 0.288513720035553,
      "learning_rate": 2.245631996317384e-06,
      "loss": 0.3339,
      "step": 2554
    },
    {
      "epoch": 2.1518809657495788,
      "grad_norm": 0.359544962644577,
      "learning_rate": 2.2415416579728714e-06,
      "loss": 0.4342,
      "step": 2555
    },
    {
      "epoch": 2.1527231892195395,
      "grad_norm": 0.3170144259929657,
      "learning_rate": 2.2374539716748034e-06,
      "loss": 0.3958,
      "step": 2556
    },
    {
      "epoch": 2.1535654126895003,
      "grad_norm": 0.3131515681743622,
      "learning_rate": 2.233368941353175e-06,
      "loss": 0.37,
      "step": 2557
    },
    {
      "epoch": 2.154407636159461,
      "grad_norm": 0.3091438114643097,
      "learning_rate": 2.2292865709354346e-06,
      "loss": 0.3927,
      "step": 2558
    },
    {
      "epoch": 2.155249859629422,
      "grad_norm": 0.29642829298973083,
      "learning_rate": 2.225206864346465e-06,
      "loss": 0.3883,
      "step": 2559
    },
    {
      "epoch": 2.156092083099382,
      "grad_norm": 0.34121206402778625,
      "learning_rate": 2.221129825508593e-06,
      "loss": 0.404,
      "step": 2560
    },
    {
      "epoch": 2.156934306569343,
      "grad_norm": 0.2781011462211609,
      "learning_rate": 2.2170554583415782e-06,
      "loss": 0.3271,
      "step": 2561
    },
    {
      "epoch": 2.1577765300393037,
      "grad_norm": 0.3075297176837921,
      "learning_rate": 2.2129837667626147e-06,
      "loss": 0.4093,
      "step": 2562
    },
    {
      "epoch": 2.1586187535092645,
      "grad_norm": 0.3256165385246277,
      "learning_rate": 2.2089147546863187e-06,
      "loss": 0.4264,
      "step": 2563
    },
    {
      "epoch": 2.1594609769792252,
      "grad_norm": 0.31064149737358093,
      "learning_rate": 2.20484842602474e-06,
      "loss": 0.3803,
      "step": 2564
    },
    {
      "epoch": 2.160303200449186,
      "grad_norm": 0.3152274191379547,
      "learning_rate": 2.2007847846873342e-06,
      "loss": 0.4155,
      "step": 2565
    },
    {
      "epoch": 2.1611454239191463,
      "grad_norm": 0.300730437040329,
      "learning_rate": 2.196723834580987e-06,
      "loss": 0.3607,
      "step": 2566
    },
    {
      "epoch": 2.161987647389107,
      "grad_norm": 0.2975861728191376,
      "learning_rate": 2.1926655796099873e-06,
      "loss": 0.404,
      "step": 2567
    },
    {
      "epoch": 2.162829870859068,
      "grad_norm": 0.31291040778160095,
      "learning_rate": 2.188610023676041e-06,
      "loss": 0.3695,
      "step": 2568
    },
    {
      "epoch": 2.1636720943290286,
      "grad_norm": 0.3340233266353607,
      "learning_rate": 2.1845571706782486e-06,
      "loss": 0.4028,
      "step": 2569
    },
    {
      "epoch": 2.1645143177989894,
      "grad_norm": 0.3235953450202942,
      "learning_rate": 2.1805070245131234e-06,
      "loss": 0.3913,
      "step": 2570
    },
    {
      "epoch": 2.16535654126895,
      "grad_norm": 0.32769548892974854,
      "learning_rate": 2.176459589074566e-06,
      "loss": 0.346,
      "step": 2571
    },
    {
      "epoch": 2.166198764738911,
      "grad_norm": 0.34479454159736633,
      "learning_rate": 2.17241486825388e-06,
      "loss": 0.4176,
      "step": 2572
    },
    {
      "epoch": 2.1670409882088713,
      "grad_norm": 0.2936125099658966,
      "learning_rate": 2.1683728659397517e-06,
      "loss": 0.3476,
      "step": 2573
    },
    {
      "epoch": 2.167883211678832,
      "grad_norm": 0.2912256717681885,
      "learning_rate": 2.164333586018259e-06,
      "loss": 0.38,
      "step": 2574
    },
    {
      "epoch": 2.168725435148793,
      "grad_norm": 0.34022608399391174,
      "learning_rate": 2.160297032372857e-06,
      "loss": 0.406,
      "step": 2575
    },
    {
      "epoch": 2.1695676586187536,
      "grad_norm": 0.350392609834671,
      "learning_rate": 2.156263208884386e-06,
      "loss": 0.4436,
      "step": 2576
    },
    {
      "epoch": 2.1704098820887143,
      "grad_norm": 0.3116767406463623,
      "learning_rate": 2.1522321194310577e-06,
      "loss": 0.374,
      "step": 2577
    },
    {
      "epoch": 2.171252105558675,
      "grad_norm": 0.3407011032104492,
      "learning_rate": 2.148203767888455e-06,
      "loss": 0.4155,
      "step": 2578
    },
    {
      "epoch": 2.1720943290286354,
      "grad_norm": 0.3086830675601959,
      "learning_rate": 2.1441781581295286e-06,
      "loss": 0.3765,
      "step": 2579
    },
    {
      "epoch": 2.172936552498596,
      "grad_norm": 0.3208644390106201,
      "learning_rate": 2.1401552940245962e-06,
      "loss": 0.3866,
      "step": 2580
    },
    {
      "epoch": 2.173778775968557,
      "grad_norm": 0.309477835893631,
      "learning_rate": 2.1361351794413334e-06,
      "loss": 0.3877,
      "step": 2581
    },
    {
      "epoch": 2.1746209994385177,
      "grad_norm": 0.3050850033760071,
      "learning_rate": 2.132117818244771e-06,
      "loss": 0.3831,
      "step": 2582
    },
    {
      "epoch": 2.1754632229084785,
      "grad_norm": 0.32747694849967957,
      "learning_rate": 2.1281032142972933e-06,
      "loss": 0.4231,
      "step": 2583
    },
    {
      "epoch": 2.1763054463784393,
      "grad_norm": 0.3177086114883423,
      "learning_rate": 2.124091371458638e-06,
      "loss": 0.4043,
      "step": 2584
    },
    {
      "epoch": 2.1771476698483996,
      "grad_norm": 0.33712029457092285,
      "learning_rate": 2.1200822935858807e-06,
      "loss": 0.4409,
      "step": 2585
    },
    {
      "epoch": 2.1779898933183603,
      "grad_norm": 0.2812432050704956,
      "learning_rate": 2.1160759845334483e-06,
      "loss": 0.3874,
      "step": 2586
    },
    {
      "epoch": 2.178832116788321,
      "grad_norm": 0.3003031313419342,
      "learning_rate": 2.1120724481530937e-06,
      "loss": 0.3661,
      "step": 2587
    },
    {
      "epoch": 2.179674340258282,
      "grad_norm": 0.3174550533294678,
      "learning_rate": 2.1080716882939145e-06,
      "loss": 0.4105,
      "step": 2588
    },
    {
      "epoch": 2.1805165637282427,
      "grad_norm": 0.3217324912548065,
      "learning_rate": 2.1040737088023323e-06,
      "loss": 0.3991,
      "step": 2589
    },
    {
      "epoch": 2.1813587871982034,
      "grad_norm": 0.29086801409721375,
      "learning_rate": 2.100078513522102e-06,
      "loss": 0.3392,
      "step": 2590
    },
    {
      "epoch": 2.182201010668164,
      "grad_norm": 0.31397268176078796,
      "learning_rate": 2.0960861062942956e-06,
      "loss": 0.3768,
      "step": 2591
    },
    {
      "epoch": 2.1830432341381245,
      "grad_norm": 0.3941654562950134,
      "learning_rate": 2.0920964909573065e-06,
      "loss": 0.4091,
      "step": 2592
    },
    {
      "epoch": 2.1838854576080853,
      "grad_norm": 0.29949766397476196,
      "learning_rate": 2.0881096713468435e-06,
      "loss": 0.3831,
      "step": 2593
    },
    {
      "epoch": 2.184727681078046,
      "grad_norm": 0.3455720841884613,
      "learning_rate": 2.0841256512959314e-06,
      "loss": 0.4304,
      "step": 2594
    },
    {
      "epoch": 2.185569904548007,
      "grad_norm": 0.30586519837379456,
      "learning_rate": 2.080144434634898e-06,
      "loss": 0.3684,
      "step": 2595
    },
    {
      "epoch": 2.1864121280179676,
      "grad_norm": 0.33201974630355835,
      "learning_rate": 2.0761660251913795e-06,
      "loss": 0.388,
      "step": 2596
    },
    {
      "epoch": 2.187254351487928,
      "grad_norm": 0.3201391398906708,
      "learning_rate": 2.0721904267903097e-06,
      "loss": 0.363,
      "step": 2597
    },
    {
      "epoch": 2.1880965749578887,
      "grad_norm": 0.3645334541797638,
      "learning_rate": 2.068217643253925e-06,
      "loss": 0.3867,
      "step": 2598
    },
    {
      "epoch": 2.1889387984278494,
      "grad_norm": 0.3313852548599243,
      "learning_rate": 2.0642476784017507e-06,
      "loss": 0.4138,
      "step": 2599
    },
    {
      "epoch": 2.18978102189781,
      "grad_norm": 0.3227449357509613,
      "learning_rate": 2.0602805360506044e-06,
      "loss": 0.4125,
      "step": 2600
    },
    {
      "epoch": 2.190623245367771,
      "grad_norm": 0.3119596242904663,
      "learning_rate": 2.056316220014588e-06,
      "loss": 0.3631,
      "step": 2601
    },
    {
      "epoch": 2.1914654688377317,
      "grad_norm": 0.3037448227405548,
      "learning_rate": 2.0523547341050913e-06,
      "loss": 0.3754,
      "step": 2602
    },
    {
      "epoch": 2.1923076923076925,
      "grad_norm": 0.3211354613304138,
      "learning_rate": 2.0483960821307757e-06,
      "loss": 0.381,
      "step": 2603
    },
    {
      "epoch": 2.193149915777653,
      "grad_norm": 0.3336183428764343,
      "learning_rate": 2.0444402678975876e-06,
      "loss": 0.438,
      "step": 2604
    },
    {
      "epoch": 2.1939921392476136,
      "grad_norm": 0.3205857276916504,
      "learning_rate": 2.040487295208732e-06,
      "loss": 0.4043,
      "step": 2605
    },
    {
      "epoch": 2.1948343627175744,
      "grad_norm": 0.30986538529396057,
      "learning_rate": 2.036537167864695e-06,
      "loss": 0.4249,
      "step": 2606
    },
    {
      "epoch": 2.195676586187535,
      "grad_norm": 0.2801513969898224,
      "learning_rate": 2.0325898896632178e-06,
      "loss": 0.3748,
      "step": 2607
    },
    {
      "epoch": 2.196518809657496,
      "grad_norm": 0.2988492548465729,
      "learning_rate": 2.0286454643993097e-06,
      "loss": 0.4055,
      "step": 2608
    },
    {
      "epoch": 2.1973610331274567,
      "grad_norm": 0.33293262124061584,
      "learning_rate": 2.024703895865232e-06,
      "loss": 0.3901,
      "step": 2609
    },
    {
      "epoch": 2.198203256597417,
      "grad_norm": 0.31317999958992004,
      "learning_rate": 2.0207651878505e-06,
      "loss": 0.3736,
      "step": 2610
    },
    {
      "epoch": 2.1990454800673778,
      "grad_norm": 0.30600878596305847,
      "learning_rate": 2.0168293441418798e-06,
      "loss": 0.3791,
      "step": 2611
    },
    {
      "epoch": 2.1998877035373385,
      "grad_norm": 0.30101341009140015,
      "learning_rate": 2.012896368523386e-06,
      "loss": 0.3585,
      "step": 2612
    },
    {
      "epoch": 2.2007299270072993,
      "grad_norm": 0.32162153720855713,
      "learning_rate": 2.0089662647762716e-06,
      "loss": 0.4169,
      "step": 2613
    },
    {
      "epoch": 2.20157215047726,
      "grad_norm": 0.3242132365703583,
      "learning_rate": 2.0050390366790307e-06,
      "loss": 0.3696,
      "step": 2614
    },
    {
      "epoch": 2.202414373947221,
      "grad_norm": 0.31240808963775635,
      "learning_rate": 2.001114688007393e-06,
      "loss": 0.3751,
      "step": 2615
    },
    {
      "epoch": 2.203256597417181,
      "grad_norm": 0.30991238355636597,
      "learning_rate": 1.997193222534316e-06,
      "loss": 0.3999,
      "step": 2616
    },
    {
      "epoch": 2.204098820887142,
      "grad_norm": 0.3069349527359009,
      "learning_rate": 1.9932746440299926e-06,
      "loss": 0.3731,
      "step": 2617
    },
    {
      "epoch": 2.2049410443571027,
      "grad_norm": 0.30552878975868225,
      "learning_rate": 1.989358956261835e-06,
      "loss": 0.3724,
      "step": 2618
    },
    {
      "epoch": 2.2057832678270635,
      "grad_norm": 0.32413092255592346,
      "learning_rate": 1.9854461629944764e-06,
      "loss": 0.3908,
      "step": 2619
    },
    {
      "epoch": 2.2066254912970242,
      "grad_norm": 0.32390186190605164,
      "learning_rate": 1.981536267989766e-06,
      "loss": 0.4029,
      "step": 2620
    },
    {
      "epoch": 2.207467714766985,
      "grad_norm": 0.3163199722766876,
      "learning_rate": 1.977629275006772e-06,
      "loss": 0.3977,
      "step": 2621
    },
    {
      "epoch": 2.2083099382369458,
      "grad_norm": 0.29891249537467957,
      "learning_rate": 1.9737251878017678e-06,
      "loss": 0.3658,
      "step": 2622
    },
    {
      "epoch": 2.209152161706906,
      "grad_norm": 0.3165622651576996,
      "learning_rate": 1.969824010128233e-06,
      "loss": 0.3901,
      "step": 2623
    },
    {
      "epoch": 2.209994385176867,
      "grad_norm": 0.32791003584861755,
      "learning_rate": 1.9659257457368503e-06,
      "loss": 0.3882,
      "step": 2624
    },
    {
      "epoch": 2.2108366086468276,
      "grad_norm": 0.32387205958366394,
      "learning_rate": 1.962030398375506e-06,
      "loss": 0.4168,
      "step": 2625
    },
    {
      "epoch": 2.2116788321167884,
      "grad_norm": 0.3419168293476105,
      "learning_rate": 1.9581379717892748e-06,
      "loss": 0.4043,
      "step": 2626
    },
    {
      "epoch": 2.212521055586749,
      "grad_norm": 0.3095424473285675,
      "learning_rate": 1.954248469720431e-06,
      "loss": 0.3654,
      "step": 2627
    },
    {
      "epoch": 2.2133632790567095,
      "grad_norm": 0.31504231691360474,
      "learning_rate": 1.950361895908427e-06,
      "loss": 0.3912,
      "step": 2628
    },
    {
      "epoch": 2.2142055025266703,
      "grad_norm": 0.30399057269096375,
      "learning_rate": 1.946478254089911e-06,
      "loss": 0.378,
      "step": 2629
    },
    {
      "epoch": 2.215047725996631,
      "grad_norm": 0.3031809329986572,
      "learning_rate": 1.942597547998703e-06,
      "loss": 0.3536,
      "step": 2630
    },
    {
      "epoch": 2.215889949466592,
      "grad_norm": 0.33480167388916016,
      "learning_rate": 1.9387197813658092e-06,
      "loss": 0.4365,
      "step": 2631
    },
    {
      "epoch": 2.2167321729365526,
      "grad_norm": 0.3504936993122101,
      "learning_rate": 1.934844957919403e-06,
      "loss": 0.3868,
      "step": 2632
    },
    {
      "epoch": 2.2175743964065133,
      "grad_norm": 0.31473788619041443,
      "learning_rate": 1.9309730813848302e-06,
      "loss": 0.3834,
      "step": 2633
    },
    {
      "epoch": 2.218416619876474,
      "grad_norm": 0.28943800926208496,
      "learning_rate": 1.927104155484602e-06,
      "loss": 0.3848,
      "step": 2634
    },
    {
      "epoch": 2.2192588433464344,
      "grad_norm": 0.2858538031578064,
      "learning_rate": 1.923238183938398e-06,
      "loss": 0.3579,
      "step": 2635
    },
    {
      "epoch": 2.220101066816395,
      "grad_norm": 0.34496554732322693,
      "learning_rate": 1.919375170463052e-06,
      "loss": 0.4186,
      "step": 2636
    },
    {
      "epoch": 2.220943290286356,
      "grad_norm": 0.3537115752696991,
      "learning_rate": 1.915515118772555e-06,
      "loss": 0.3843,
      "step": 2637
    },
    {
      "epoch": 2.2217855137563167,
      "grad_norm": 0.324553519487381,
      "learning_rate": 1.9116580325780505e-06,
      "loss": 0.381,
      "step": 2638
    },
    {
      "epoch": 2.2226277372262775,
      "grad_norm": 0.32668453454971313,
      "learning_rate": 1.9078039155878338e-06,
      "loss": 0.3717,
      "step": 2639
    },
    {
      "epoch": 2.2234699606962383,
      "grad_norm": 0.3009384870529175,
      "learning_rate": 1.9039527715073424e-06,
      "loss": 0.3479,
      "step": 2640
    },
    {
      "epoch": 2.2243121841661986,
      "grad_norm": 0.2981441617012024,
      "learning_rate": 1.9001046040391558e-06,
      "loss": 0.3883,
      "step": 2641
    },
    {
      "epoch": 2.2251544076361593,
      "grad_norm": 0.3143802583217621,
      "learning_rate": 1.8962594168829907e-06,
      "loss": 0.4046,
      "step": 2642
    },
    {
      "epoch": 2.22599663110612,
      "grad_norm": 0.315255731344223,
      "learning_rate": 1.8924172137357038e-06,
      "loss": 0.3886,
      "step": 2643
    },
    {
      "epoch": 2.226838854576081,
      "grad_norm": 0.2861502766609192,
      "learning_rate": 1.8885779982912756e-06,
      "loss": 0.3607,
      "step": 2644
    },
    {
      "epoch": 2.2276810780460417,
      "grad_norm": 0.29868990182876587,
      "learning_rate": 1.884741774240823e-06,
      "loss": 0.3806,
      "step": 2645
    },
    {
      "epoch": 2.2285233015160024,
      "grad_norm": 0.28815537691116333,
      "learning_rate": 1.8809085452725744e-06,
      "loss": 0.4001,
      "step": 2646
    },
    {
      "epoch": 2.2293655249859627,
      "grad_norm": 0.3021093010902405,
      "learning_rate": 1.8770783150718913e-06,
      "loss": 0.3725,
      "step": 2647
    },
    {
      "epoch": 2.2302077484559235,
      "grad_norm": 0.29392096400260925,
      "learning_rate": 1.8732510873212428e-06,
      "loss": 0.3736,
      "step": 2648
    },
    {
      "epoch": 2.2310499719258843,
      "grad_norm": 0.29127538204193115,
      "learning_rate": 1.8694268657002197e-06,
      "loss": 0.3499,
      "step": 2649
    },
    {
      "epoch": 2.231892195395845,
      "grad_norm": 0.3140545189380646,
      "learning_rate": 1.865605653885516e-06,
      "loss": 0.3833,
      "step": 2650
    },
    {
      "epoch": 2.232734418865806,
      "grad_norm": 0.3265913724899292,
      "learning_rate": 1.8617874555509342e-06,
      "loss": 0.3919,
      "step": 2651
    },
    {
      "epoch": 2.2335766423357666,
      "grad_norm": 0.3079654574394226,
      "learning_rate": 1.8579722743673773e-06,
      "loss": 0.3726,
      "step": 2652
    },
    {
      "epoch": 2.2344188658057273,
      "grad_norm": 0.29007747769355774,
      "learning_rate": 1.8541601140028542e-06,
      "loss": 0.4181,
      "step": 2653
    },
    {
      "epoch": 2.2352610892756877,
      "grad_norm": 0.2822338938713074,
      "learning_rate": 1.8503509781224627e-06,
      "loss": 0.3618,
      "step": 2654
    },
    {
      "epoch": 2.2361033127456484,
      "grad_norm": 0.3207438290119171,
      "learning_rate": 1.8465448703883959e-06,
      "loss": 0.403,
      "step": 2655
    },
    {
      "epoch": 2.236945536215609,
      "grad_norm": 0.31586772203445435,
      "learning_rate": 1.8427417944599325e-06,
      "loss": 0.4091,
      "step": 2656
    },
    {
      "epoch": 2.23778775968557,
      "grad_norm": 0.2693243622779846,
      "learning_rate": 1.8389417539934428e-06,
      "loss": 0.3512,
      "step": 2657
    },
    {
      "epoch": 2.2386299831555307,
      "grad_norm": 0.31845182180404663,
      "learning_rate": 1.8351447526423728e-06,
      "loss": 0.4079,
      "step": 2658
    },
    {
      "epoch": 2.239472206625491,
      "grad_norm": 0.3414849638938904,
      "learning_rate": 1.8313507940572477e-06,
      "loss": 0.4035,
      "step": 2659
    },
    {
      "epoch": 2.240314430095452,
      "grad_norm": 0.3171975612640381,
      "learning_rate": 1.8275598818856682e-06,
      "loss": 0.406,
      "step": 2660
    },
    {
      "epoch": 2.2411566535654126,
      "grad_norm": 0.3242760896682739,
      "learning_rate": 1.8237720197723075e-06,
      "loss": 0.3859,
      "step": 2661
    },
    {
      "epoch": 2.2419988770353734,
      "grad_norm": 0.2889457643032074,
      "learning_rate": 1.819987211358903e-06,
      "loss": 0.3889,
      "step": 2662
    },
    {
      "epoch": 2.242841100505334,
      "grad_norm": 0.30771684646606445,
      "learning_rate": 1.8162054602842621e-06,
      "loss": 0.3891,
      "step": 2663
    },
    {
      "epoch": 2.243683323975295,
      "grad_norm": 0.33391228318214417,
      "learning_rate": 1.812426770184243e-06,
      "loss": 0.3415,
      "step": 2664
    },
    {
      "epoch": 2.2445255474452557,
      "grad_norm": 0.28734102845191956,
      "learning_rate": 1.8086511446917715e-06,
      "loss": 0.3815,
      "step": 2665
    },
    {
      "epoch": 2.245367770915216,
      "grad_norm": 0.306974321603775,
      "learning_rate": 1.8048785874368191e-06,
      "loss": 0.3674,
      "step": 2666
    },
    {
      "epoch": 2.2462099943851768,
      "grad_norm": 0.32439813017845154,
      "learning_rate": 1.8011091020464138e-06,
      "loss": 0.4234,
      "step": 2667
    },
    {
      "epoch": 2.2470522178551375,
      "grad_norm": 0.30479106307029724,
      "learning_rate": 1.7973426921446258e-06,
      "loss": 0.3774,
      "step": 2668
    },
    {
      "epoch": 2.2478944413250983,
      "grad_norm": 0.33403682708740234,
      "learning_rate": 1.7935793613525693e-06,
      "loss": 0.3957,
      "step": 2669
    },
    {
      "epoch": 2.248736664795059,
      "grad_norm": 0.292681485414505,
      "learning_rate": 1.789819113288397e-06,
      "loss": 0.3786,
      "step": 2670
    },
    {
      "epoch": 2.24957888826502,
      "grad_norm": 0.30174657702445984,
      "learning_rate": 1.7860619515673034e-06,
      "loss": 0.3992,
      "step": 2671
    },
    {
      "epoch": 2.25042111173498,
      "grad_norm": 0.32006698846817017,
      "learning_rate": 1.7823078798015098e-06,
      "loss": 0.4088,
      "step": 2672
    },
    {
      "epoch": 2.251263335204941,
      "grad_norm": 0.29294347763061523,
      "learning_rate": 1.7785569016002686e-06,
      "loss": 0.3293,
      "step": 2673
    },
    {
      "epoch": 2.2521055586749017,
      "grad_norm": 0.33983707427978516,
      "learning_rate": 1.7748090205698565e-06,
      "loss": 0.419,
      "step": 2674
    },
    {
      "epoch": 2.2529477821448625,
      "grad_norm": 0.31440821290016174,
      "learning_rate": 1.7710642403135768e-06,
      "loss": 0.3983,
      "step": 2675
    },
    {
      "epoch": 2.2537900056148232,
      "grad_norm": 0.2985384464263916,
      "learning_rate": 1.7673225644317487e-06,
      "loss": 0.4008,
      "step": 2676
    },
    {
      "epoch": 2.254632229084784,
      "grad_norm": 0.31571680307388306,
      "learning_rate": 1.7635839965217055e-06,
      "loss": 0.379,
      "step": 2677
    },
    {
      "epoch": 2.2554744525547443,
      "grad_norm": 0.3235429525375366,
      "learning_rate": 1.7598485401777932e-06,
      "loss": 0.3879,
      "step": 2678
    },
    {
      "epoch": 2.256316676024705,
      "grad_norm": 0.33716055750846863,
      "learning_rate": 1.75611619899137e-06,
      "loss": 0.4339,
      "step": 2679
    },
    {
      "epoch": 2.257158899494666,
      "grad_norm": 0.28918740153312683,
      "learning_rate": 1.7523869765507928e-06,
      "loss": 0.3759,
      "step": 2680
    },
    {
      "epoch": 2.2580011229646266,
      "grad_norm": 0.3062315881252289,
      "learning_rate": 1.748660876441428e-06,
      "loss": 0.4209,
      "step": 2681
    },
    {
      "epoch": 2.2588433464345874,
      "grad_norm": 0.301724374294281,
      "learning_rate": 1.7449379022456297e-06,
      "loss": 0.3938,
      "step": 2682
    },
    {
      "epoch": 2.259685569904548,
      "grad_norm": 0.3291449248790741,
      "learning_rate": 1.7412180575427572e-06,
      "loss": 0.4115,
      "step": 2683
    },
    {
      "epoch": 2.260527793374509,
      "grad_norm": 0.3217688798904419,
      "learning_rate": 1.7375013459091529e-06,
      "loss": 0.4102,
      "step": 2684
    },
    {
      "epoch": 2.2613700168444693,
      "grad_norm": 0.2935628890991211,
      "learning_rate": 1.7337877709181527e-06,
      "loss": 0.3646,
      "step": 2685
    },
    {
      "epoch": 2.26221224031443,
      "grad_norm": 0.332753449678421,
      "learning_rate": 1.7300773361400746e-06,
      "loss": 0.3557,
      "step": 2686
    },
    {
      "epoch": 2.263054463784391,
      "grad_norm": 0.322719007730484,
      "learning_rate": 1.7263700451422166e-06,
      "loss": 0.419,
      "step": 2687
    },
    {
      "epoch": 2.2638966872543516,
      "grad_norm": 0.31832337379455566,
      "learning_rate": 1.7226659014888548e-06,
      "loss": 0.3922,
      "step": 2688
    },
    {
      "epoch": 2.2647389107243123,
      "grad_norm": 0.30217939615249634,
      "learning_rate": 1.7189649087412385e-06,
      "loss": 0.4177,
      "step": 2689
    },
    {
      "epoch": 2.2655811341942727,
      "grad_norm": 0.31510043144226074,
      "learning_rate": 1.7152670704575919e-06,
      "loss": 0.369,
      "step": 2690
    },
    {
      "epoch": 2.2664233576642334,
      "grad_norm": 0.374496191740036,
      "learning_rate": 1.711572390193102e-06,
      "loss": 0.3996,
      "step": 2691
    },
    {
      "epoch": 2.267265581134194,
      "grad_norm": 0.32443323731422424,
      "learning_rate": 1.7078808714999207e-06,
      "loss": 0.4325,
      "step": 2692
    },
    {
      "epoch": 2.268107804604155,
      "grad_norm": 0.3127228319644928,
      "learning_rate": 1.7041925179271584e-06,
      "loss": 0.3765,
      "step": 2693
    },
    {
      "epoch": 2.2689500280741157,
      "grad_norm": 0.2999798655509949,
      "learning_rate": 1.7005073330208881e-06,
      "loss": 0.403,
      "step": 2694
    },
    {
      "epoch": 2.2697922515440765,
      "grad_norm": 0.3041866421699524,
      "learning_rate": 1.696825320324132e-06,
      "loss": 0.3999,
      "step": 2695
    },
    {
      "epoch": 2.2706344750140373,
      "grad_norm": 0.3059066832065582,
      "learning_rate": 1.6931464833768624e-06,
      "loss": 0.3495,
      "step": 2696
    },
    {
      "epoch": 2.2714766984839976,
      "grad_norm": 0.3430868685245514,
      "learning_rate": 1.689470825715998e-06,
      "loss": 0.406,
      "step": 2697
    },
    {
      "epoch": 2.2723189219539583,
      "grad_norm": 0.31429359316825867,
      "learning_rate": 1.6857983508754056e-06,
      "loss": 0.3784,
      "step": 2698
    },
    {
      "epoch": 2.273161145423919,
      "grad_norm": 0.31196680665016174,
      "learning_rate": 1.6821290623858865e-06,
      "loss": 0.3854,
      "step": 2699
    },
    {
      "epoch": 2.27400336889388,
      "grad_norm": 0.3459366261959076,
      "learning_rate": 1.6784629637751814e-06,
      "loss": 0.4211,
      "step": 2700
    },
    {
      "epoch": 2.2748455923638407,
      "grad_norm": 0.3201557397842407,
      "learning_rate": 1.6748000585679602e-06,
      "loss": 0.3878,
      "step": 2701
    },
    {
      "epoch": 2.2756878158338014,
      "grad_norm": 0.3116450011730194,
      "learning_rate": 1.6711403502858302e-06,
      "loss": 0.3711,
      "step": 2702
    },
    {
      "epoch": 2.2765300393037617,
      "grad_norm": 0.3142700493335724,
      "learning_rate": 1.6674838424473172e-06,
      "loss": 0.3755,
      "step": 2703
    },
    {
      "epoch": 2.2773722627737225,
      "grad_norm": 0.33677130937576294,
      "learning_rate": 1.6638305385678783e-06,
      "loss": 0.3987,
      "step": 2704
    },
    {
      "epoch": 2.2782144862436833,
      "grad_norm": 0.3058329224586487,
      "learning_rate": 1.6601804421598787e-06,
      "loss": 0.3512,
      "step": 2705
    },
    {
      "epoch": 2.279056709713644,
      "grad_norm": 0.3271533250808716,
      "learning_rate": 1.6565335567326112e-06,
      "loss": 0.4368,
      "step": 2706
    },
    {
      "epoch": 2.279898933183605,
      "grad_norm": 0.29440590739250183,
      "learning_rate": 1.6528898857922747e-06,
      "loss": 0.3552,
      "step": 2707
    },
    {
      "epoch": 2.2807411566535656,
      "grad_norm": 0.31064683198928833,
      "learning_rate": 1.6492494328419816e-06,
      "loss": 0.4153,
      "step": 2708
    },
    {
      "epoch": 2.281583380123526,
      "grad_norm": 0.31292927265167236,
      "learning_rate": 1.6456122013817477e-06,
      "loss": 0.4365,
      "step": 2709
    },
    {
      "epoch": 2.2824256035934867,
      "grad_norm": 0.2937861979007721,
      "learning_rate": 1.6419781949084928e-06,
      "loss": 0.4039,
      "step": 2710
    },
    {
      "epoch": 2.2832678270634474,
      "grad_norm": 0.2949824631214142,
      "learning_rate": 1.6383474169160334e-06,
      "loss": 0.4164,
      "step": 2711
    },
    {
      "epoch": 2.284110050533408,
      "grad_norm": 0.32580703496932983,
      "learning_rate": 1.6347198708950884e-06,
      "loss": 0.3834,
      "step": 2712
    },
    {
      "epoch": 2.284952274003369,
      "grad_norm": 0.2845863401889801,
      "learning_rate": 1.631095560333264e-06,
      "loss": 0.3835,
      "step": 2713
    },
    {
      "epoch": 2.2857944974733297,
      "grad_norm": 0.3103090822696686,
      "learning_rate": 1.6274744887150562e-06,
      "loss": 0.405,
      "step": 2714
    },
    {
      "epoch": 2.2866367209432905,
      "grad_norm": 0.3113895356655121,
      "learning_rate": 1.6238566595218475e-06,
      "loss": 0.3966,
      "step": 2715
    },
    {
      "epoch": 2.287478944413251,
      "grad_norm": 0.2905803918838501,
      "learning_rate": 1.6202420762319065e-06,
      "loss": 0.3875,
      "step": 2716
    },
    {
      "epoch": 2.2883211678832116,
      "grad_norm": 0.30843886733055115,
      "learning_rate": 1.6166307423203765e-06,
      "loss": 0.3829,
      "step": 2717
    },
    {
      "epoch": 2.2891633913531724,
      "grad_norm": 0.31146448850631714,
      "learning_rate": 1.6130226612592787e-06,
      "loss": 0.3971,
      "step": 2718
    },
    {
      "epoch": 2.290005614823133,
      "grad_norm": 0.3174664080142975,
      "learning_rate": 1.6094178365175044e-06,
      "loss": 0.4067,
      "step": 2719
    },
    {
      "epoch": 2.290847838293094,
      "grad_norm": 0.3080785572528839,
      "learning_rate": 1.6058162715608205e-06,
      "loss": 0.3742,
      "step": 2720
    },
    {
      "epoch": 2.2916900617630542,
      "grad_norm": 0.29765498638153076,
      "learning_rate": 1.6022179698518525e-06,
      "loss": 0.4071,
      "step": 2721
    },
    {
      "epoch": 2.292532285233015,
      "grad_norm": 0.33761483430862427,
      "learning_rate": 1.598622934850097e-06,
      "loss": 0.4335,
      "step": 2722
    },
    {
      "epoch": 2.2933745087029758,
      "grad_norm": 0.3177092671394348,
      "learning_rate": 1.595031170011898e-06,
      "loss": 0.3945,
      "step": 2723
    },
    {
      "epoch": 2.2942167321729365,
      "grad_norm": 0.29376649856567383,
      "learning_rate": 1.591442678790467e-06,
      "loss": 0.3744,
      "step": 2724
    },
    {
      "epoch": 2.2950589556428973,
      "grad_norm": 0.3084278702735901,
      "learning_rate": 1.5878574646358608e-06,
      "loss": 0.4486,
      "step": 2725
    },
    {
      "epoch": 2.295901179112858,
      "grad_norm": 0.29154255986213684,
      "learning_rate": 1.584275530994991e-06,
      "loss": 0.3723,
      "step": 2726
    },
    {
      "epoch": 2.296743402582819,
      "grad_norm": 0.31140437722206116,
      "learning_rate": 1.580696881311611e-06,
      "loss": 0.428,
      "step": 2727
    },
    {
      "epoch": 2.297585626052779,
      "grad_norm": 0.3041135370731354,
      "learning_rate": 1.5771215190263183e-06,
      "loss": 0.3547,
      "step": 2728
    },
    {
      "epoch": 2.29842784952274,
      "grad_norm": 0.30441126227378845,
      "learning_rate": 1.573549447576549e-06,
      "loss": 0.3838,
      "step": 2729
    },
    {
      "epoch": 2.2992700729927007,
      "grad_norm": 0.3177395462989807,
      "learning_rate": 1.5699806703965787e-06,
      "loss": 0.3988,
      "step": 2730
    },
    {
      "epoch": 2.3001122964626615,
      "grad_norm": 0.30048099160194397,
      "learning_rate": 1.5664151909175124e-06,
      "loss": 0.4068,
      "step": 2731
    },
    {
      "epoch": 2.3009545199326222,
      "grad_norm": 0.31564056873321533,
      "learning_rate": 1.5628530125672848e-06,
      "loss": 0.4108,
      "step": 2732
    },
    {
      "epoch": 2.301796743402583,
      "grad_norm": 0.2874499261379242,
      "learning_rate": 1.5592941387706562e-06,
      "loss": 0.3729,
      "step": 2733
    },
    {
      "epoch": 2.3026389668725433,
      "grad_norm": 0.2925226390361786,
      "learning_rate": 1.555738572949214e-06,
      "loss": 0.3935,
      "step": 2734
    },
    {
      "epoch": 2.303481190342504,
      "grad_norm": 0.3430479168891907,
      "learning_rate": 1.5521863185213626e-06,
      "loss": 0.4741,
      "step": 2735
    },
    {
      "epoch": 2.304323413812465,
      "grad_norm": 0.28215762972831726,
      "learning_rate": 1.5486373789023206e-06,
      "loss": 0.3485,
      "step": 2736
    },
    {
      "epoch": 2.3051656372824256,
      "grad_norm": 0.30106109380722046,
      "learning_rate": 1.5450917575041209e-06,
      "loss": 0.3942,
      "step": 2737
    },
    {
      "epoch": 2.3060078607523864,
      "grad_norm": 0.2865884006023407,
      "learning_rate": 1.54154945773561e-06,
      "loss": 0.3663,
      "step": 2738
    },
    {
      "epoch": 2.306850084222347,
      "grad_norm": 0.29752078652381897,
      "learning_rate": 1.538010483002435e-06,
      "loss": 0.4006,
      "step": 2739
    },
    {
      "epoch": 2.3076923076923075,
      "grad_norm": 0.2793341279029846,
      "learning_rate": 1.5344748367070534e-06,
      "loss": 0.381,
      "step": 2740
    },
    {
      "epoch": 2.3085345311622683,
      "grad_norm": 0.32022103667259216,
      "learning_rate": 1.5309425222487119e-06,
      "loss": 0.4441,
      "step": 2741
    },
    {
      "epoch": 2.309376754632229,
      "grad_norm": 0.3207767903804779,
      "learning_rate": 1.5274135430234654e-06,
      "loss": 0.38,
      "step": 2742
    },
    {
      "epoch": 2.31021897810219,
      "grad_norm": 0.30871689319610596,
      "learning_rate": 1.5238879024241544e-06,
      "loss": 0.3716,
      "step": 2743
    },
    {
      "epoch": 2.3110612015721506,
      "grad_norm": 0.289203941822052,
      "learning_rate": 1.5203656038404146e-06,
      "loss": 0.369,
      "step": 2744
    },
    {
      "epoch": 2.3119034250421113,
      "grad_norm": 0.2959163784980774,
      "learning_rate": 1.5168466506586654e-06,
      "loss": 0.3789,
      "step": 2745
    },
    {
      "epoch": 2.312745648512072,
      "grad_norm": 0.3246634006500244,
      "learning_rate": 1.5133310462621103e-06,
      "loss": 0.4428,
      "step": 2746
    },
    {
      "epoch": 2.3135878719820324,
      "grad_norm": 0.31475815176963806,
      "learning_rate": 1.509818794030733e-06,
      "loss": 0.4145,
      "step": 2747
    },
    {
      "epoch": 2.314430095451993,
      "grad_norm": 0.2952810823917389,
      "learning_rate": 1.506309897341297e-06,
      "loss": 0.3398,
      "step": 2748
    },
    {
      "epoch": 2.315272318921954,
      "grad_norm": 0.2998516261577606,
      "learning_rate": 1.502804359567337e-06,
      "loss": 0.4025,
      "step": 2749
    },
    {
      "epoch": 2.3161145423919147,
      "grad_norm": 0.3099167048931122,
      "learning_rate": 1.499302184079159e-06,
      "loss": 0.3773,
      "step": 2750
    },
    {
      "epoch": 2.3169567658618755,
      "grad_norm": 0.3260098099708557,
      "learning_rate": 1.4958033742438348e-06,
      "loss": 0.3821,
      "step": 2751
    },
    {
      "epoch": 2.317798989331836,
      "grad_norm": 0.28313449025154114,
      "learning_rate": 1.492307933425205e-06,
      "loss": 0.3517,
      "step": 2752
    },
    {
      "epoch": 2.3186412128017966,
      "grad_norm": 0.30141666531562805,
      "learning_rate": 1.4888158649838675e-06,
      "loss": 0.3918,
      "step": 2753
    },
    {
      "epoch": 2.3194834362717573,
      "grad_norm": 0.3240821063518524,
      "learning_rate": 1.4853271722771772e-06,
      "loss": 0.3928,
      "step": 2754
    },
    {
      "epoch": 2.320325659741718,
      "grad_norm": 0.28797823190689087,
      "learning_rate": 1.4818418586592448e-06,
      "loss": 0.3771,
      "step": 2755
    },
    {
      "epoch": 2.321167883211679,
      "grad_norm": 0.3126569986343384,
      "learning_rate": 1.478359927480935e-06,
      "loss": 0.4028,
      "step": 2756
    },
    {
      "epoch": 2.3220101066816397,
      "grad_norm": 0.2850261926651001,
      "learning_rate": 1.4748813820898554e-06,
      "loss": 0.3692,
      "step": 2757
    },
    {
      "epoch": 2.3228523301516004,
      "grad_norm": 0.31068480014801025,
      "learning_rate": 1.4714062258303653e-06,
      "loss": 0.4186,
      "step": 2758
    },
    {
      "epoch": 2.3236945536215607,
      "grad_norm": 0.3109278976917267,
      "learning_rate": 1.4679344620435543e-06,
      "loss": 0.4161,
      "step": 2759
    },
    {
      "epoch": 2.3245367770915215,
      "grad_norm": 0.30922266840934753,
      "learning_rate": 1.4644660940672628e-06,
      "loss": 0.3902,
      "step": 2760
    },
    {
      "epoch": 2.3253790005614823,
      "grad_norm": 0.3063334822654724,
      "learning_rate": 1.4610011252360594e-06,
      "loss": 0.4141,
      "step": 2761
    },
    {
      "epoch": 2.326221224031443,
      "grad_norm": 0.27713248133659363,
      "learning_rate": 1.4575395588812452e-06,
      "loss": 0.3581,
      "step": 2762
    },
    {
      "epoch": 2.327063447501404,
      "grad_norm": 0.3043242394924164,
      "learning_rate": 1.454081398330855e-06,
      "loss": 0.4112,
      "step": 2763
    },
    {
      "epoch": 2.3279056709713646,
      "grad_norm": 0.2933207154273987,
      "learning_rate": 1.450626646909639e-06,
      "loss": 0.3617,
      "step": 2764
    },
    {
      "epoch": 2.328747894441325,
      "grad_norm": 0.2946457862854004,
      "learning_rate": 1.4471753079390815e-06,
      "loss": 0.3771,
      "step": 2765
    },
    {
      "epoch": 2.3295901179112857,
      "grad_norm": 0.30994439125061035,
      "learning_rate": 1.4437273847373778e-06,
      "loss": 0.4126,
      "step": 2766
    },
    {
      "epoch": 2.3304323413812464,
      "grad_norm": 0.2955210208892822,
      "learning_rate": 1.4402828806194436e-06,
      "loss": 0.3728,
      "step": 2767
    },
    {
      "epoch": 2.331274564851207,
      "grad_norm": 0.2960677444934845,
      "learning_rate": 1.4368417988969058e-06,
      "loss": 0.3356,
      "step": 2768
    },
    {
      "epoch": 2.332116788321168,
      "grad_norm": 0.34130629897117615,
      "learning_rate": 1.4334041428781003e-06,
      "loss": 0.4493,
      "step": 2769
    },
    {
      "epoch": 2.3329590117911287,
      "grad_norm": 0.2894655168056488,
      "learning_rate": 1.429969915868068e-06,
      "loss": 0.371,
      "step": 2770
    },
    {
      "epoch": 2.333801235261089,
      "grad_norm": 0.3008657991886139,
      "learning_rate": 1.4265391211685597e-06,
      "loss": 0.3886,
      "step": 2771
    },
    {
      "epoch": 2.33464345873105,
      "grad_norm": 0.3144446909427643,
      "learning_rate": 1.4231117620780188e-06,
      "loss": 0.3757,
      "step": 2772
    },
    {
      "epoch": 2.3354856822010106,
      "grad_norm": 0.31135794520378113,
      "learning_rate": 1.4196878418915894e-06,
      "loss": 0.3801,
      "step": 2773
    },
    {
      "epoch": 2.3363279056709714,
      "grad_norm": 0.32514965534210205,
      "learning_rate": 1.4162673639011065e-06,
      "loss": 0.4115,
      "step": 2774
    },
    {
      "epoch": 2.337170129140932,
      "grad_norm": 0.33493971824645996,
      "learning_rate": 1.4128503313951008e-06,
      "loss": 0.435,
      "step": 2775
    },
    {
      "epoch": 2.338012352610893,
      "grad_norm": 0.31175974011421204,
      "learning_rate": 1.4094367476587867e-06,
      "loss": 0.3917,
      "step": 2776
    },
    {
      "epoch": 2.3388545760808537,
      "grad_norm": 0.33174020051956177,
      "learning_rate": 1.4060266159740627e-06,
      "loss": 0.3685,
      "step": 2777
    },
    {
      "epoch": 2.339696799550814,
      "grad_norm": 0.3015550971031189,
      "learning_rate": 1.4026199396195078e-06,
      "loss": 0.3627,
      "step": 2778
    },
    {
      "epoch": 2.3405390230207748,
      "grad_norm": 0.32105711102485657,
      "learning_rate": 1.399216721870384e-06,
      "loss": 0.4279,
      "step": 2779
    },
    {
      "epoch": 2.3413812464907355,
      "grad_norm": 0.2911979556083679,
      "learning_rate": 1.3958169659986204e-06,
      "loss": 0.3816,
      "step": 2780
    },
    {
      "epoch": 2.3422234699606963,
      "grad_norm": 0.30800798535346985,
      "learning_rate": 1.3924206752728282e-06,
      "loss": 0.3855,
      "step": 2781
    },
    {
      "epoch": 2.343065693430657,
      "grad_norm": 0.6257643699645996,
      "learning_rate": 1.389027852958273e-06,
      "loss": 0.4651,
      "step": 2782
    },
    {
      "epoch": 2.3439079169006174,
      "grad_norm": 0.28801101446151733,
      "learning_rate": 1.385638502316899e-06,
      "loss": 0.3545,
      "step": 2783
    },
    {
      "epoch": 2.344750140370578,
      "grad_norm": 0.33417609333992004,
      "learning_rate": 1.3822526266073044e-06,
      "loss": 0.4119,
      "step": 2784
    },
    {
      "epoch": 2.345592363840539,
      "grad_norm": 0.3260952830314636,
      "learning_rate": 1.3788702290847517e-06,
      "loss": 0.324,
      "step": 2785
    },
    {
      "epoch": 2.3464345873104997,
      "grad_norm": 0.3526674807071686,
      "learning_rate": 1.3754913130011566e-06,
      "loss": 0.4659,
      "step": 2786
    },
    {
      "epoch": 2.3472768107804605,
      "grad_norm": 0.2934742867946625,
      "learning_rate": 1.3721158816050872e-06,
      "loss": 0.3451,
      "step": 2787
    },
    {
      "epoch": 2.3481190342504212,
      "grad_norm": 0.3071657717227936,
      "learning_rate": 1.3687439381417616e-06,
      "loss": 0.3931,
      "step": 2788
    },
    {
      "epoch": 2.348961257720382,
      "grad_norm": 0.2955377995967865,
      "learning_rate": 1.3653754858530477e-06,
      "loss": 0.4118,
      "step": 2789
    },
    {
      "epoch": 2.3498034811903423,
      "grad_norm": 0.329490065574646,
      "learning_rate": 1.3620105279774532e-06,
      "loss": 0.3859,
      "step": 2790
    },
    {
      "epoch": 2.350645704660303,
      "grad_norm": 0.3248009979724884,
      "learning_rate": 1.3586490677501269e-06,
      "loss": 0.4108,
      "step": 2791
    },
    {
      "epoch": 2.351487928130264,
      "grad_norm": 0.31018805503845215,
      "learning_rate": 1.3552911084028536e-06,
      "loss": 0.3655,
      "step": 2792
    },
    {
      "epoch": 2.3523301516002246,
      "grad_norm": 0.3027118742465973,
      "learning_rate": 1.3519366531640589e-06,
      "loss": 0.3907,
      "step": 2793
    },
    {
      "epoch": 2.3531723750701854,
      "grad_norm": 0.32811105251312256,
      "learning_rate": 1.3485857052587908e-06,
      "loss": 0.4206,
      "step": 2794
    },
    {
      "epoch": 2.354014598540146,
      "grad_norm": 0.29739895462989807,
      "learning_rate": 1.3452382679087307e-06,
      "loss": 0.3984,
      "step": 2795
    },
    {
      "epoch": 2.3548568220101065,
      "grad_norm": 0.3401264548301697,
      "learning_rate": 1.3418943443321807e-06,
      "loss": 0.4015,
      "step": 2796
    },
    {
      "epoch": 2.3556990454800673,
      "grad_norm": 0.3162270486354828,
      "learning_rate": 1.3385539377440709e-06,
      "loss": 0.3314,
      "step": 2797
    },
    {
      "epoch": 2.356541268950028,
      "grad_norm": 0.31521472334861755,
      "learning_rate": 1.3352170513559432e-06,
      "loss": 0.423,
      "step": 2798
    },
    {
      "epoch": 2.357383492419989,
      "grad_norm": 0.3120950162410736,
      "learning_rate": 1.3318836883759634e-06,
      "loss": 0.3973,
      "step": 2799
    },
    {
      "epoch": 2.3582257158899496,
      "grad_norm": 0.29730677604675293,
      "learning_rate": 1.3285538520088976e-06,
      "loss": 0.3713,
      "step": 2800
    },
    {
      "epoch": 2.3590679393599103,
      "grad_norm": 0.3355885148048401,
      "learning_rate": 1.3252275454561337e-06,
      "loss": 0.4218,
      "step": 2801
    },
    {
      "epoch": 2.3599101628298707,
      "grad_norm": 0.2868945300579071,
      "learning_rate": 1.3219047719156575e-06,
      "loss": 0.3848,
      "step": 2802
    },
    {
      "epoch": 2.3607523862998314,
      "grad_norm": 0.2839897572994232,
      "learning_rate": 1.318585534582064e-06,
      "loss": 0.3624,
      "step": 2803
    },
    {
      "epoch": 2.361594609769792,
      "grad_norm": 0.32167497277259827,
      "learning_rate": 1.3152698366465449e-06,
      "loss": 0.4234,
      "step": 2804
    },
    {
      "epoch": 2.362436833239753,
      "grad_norm": 0.2925056219100952,
      "learning_rate": 1.3119576812968893e-06,
      "loss": 0.4029,
      "step": 2805
    },
    {
      "epoch": 2.3632790567097137,
      "grad_norm": 0.2922198474407196,
      "learning_rate": 1.30864907171748e-06,
      "loss": 0.3528,
      "step": 2806
    },
    {
      "epoch": 2.3641212801796745,
      "grad_norm": 0.30218738317489624,
      "learning_rate": 1.305344011089294e-06,
      "loss": 0.387,
      "step": 2807
    },
    {
      "epoch": 2.3649635036496353,
      "grad_norm": 0.32325610518455505,
      "learning_rate": 1.3020425025898926e-06,
      "loss": 0.3861,
      "step": 2808
    },
    {
      "epoch": 2.3658057271195956,
      "grad_norm": 0.33478403091430664,
      "learning_rate": 1.2987445493934236e-06,
      "loss": 0.4036,
      "step": 2809
    },
    {
      "epoch": 2.3666479505895563,
      "grad_norm": 0.3017295300960541,
      "learning_rate": 1.295450154670615e-06,
      "loss": 0.4115,
      "step": 2810
    },
    {
      "epoch": 2.367490174059517,
      "grad_norm": 0.3010775148868561,
      "learning_rate": 1.292159321588778e-06,
      "loss": 0.4109,
      "step": 2811
    },
    {
      "epoch": 2.368332397529478,
      "grad_norm": 0.27966195344924927,
      "learning_rate": 1.288872053311795e-06,
      "loss": 0.36,
      "step": 2812
    },
    {
      "epoch": 2.3691746209994387,
      "grad_norm": 0.2948685586452484,
      "learning_rate": 1.2855883530001228e-06,
      "loss": 0.3851,
      "step": 2813
    },
    {
      "epoch": 2.370016844469399,
      "grad_norm": 0.32642823457717896,
      "learning_rate": 1.282308223810786e-06,
      "loss": 0.3492,
      "step": 2814
    },
    {
      "epoch": 2.3708590679393597,
      "grad_norm": 0.31677156686782837,
      "learning_rate": 1.2790316688973809e-06,
      "loss": 0.3915,
      "step": 2815
    },
    {
      "epoch": 2.3717012914093205,
      "grad_norm": 0.30423635244369507,
      "learning_rate": 1.2757586914100612e-06,
      "loss": 0.4302,
      "step": 2816
    },
    {
      "epoch": 2.3725435148792813,
      "grad_norm": 0.2971431612968445,
      "learning_rate": 1.272489294495548e-06,
      "loss": 0.4011,
      "step": 2817
    },
    {
      "epoch": 2.373385738349242,
      "grad_norm": 0.30449995398521423,
      "learning_rate": 1.2692234812971106e-06,
      "loss": 0.3971,
      "step": 2818
    },
    {
      "epoch": 2.374227961819203,
      "grad_norm": 0.30180802941322327,
      "learning_rate": 1.265961254954583e-06,
      "loss": 0.3825,
      "step": 2819
    },
    {
      "epoch": 2.3750701852891636,
      "grad_norm": 0.30260175466537476,
      "learning_rate": 1.2627026186043423e-06,
      "loss": 0.3806,
      "step": 2820
    },
    {
      "epoch": 2.375912408759124,
      "grad_norm": 0.3002779483795166,
      "learning_rate": 1.2594475753793211e-06,
      "loss": 0.3786,
      "step": 2821
    },
    {
      "epoch": 2.3767546322290847,
      "grad_norm": 0.31379565596580505,
      "learning_rate": 1.256196128408993e-06,
      "loss": 0.3899,
      "step": 2822
    },
    {
      "epoch": 2.3775968556990454,
      "grad_norm": 0.30502650141716003,
      "learning_rate": 1.252948280819375e-06,
      "loss": 0.3956,
      "step": 2823
    },
    {
      "epoch": 2.378439079169006,
      "grad_norm": 0.31214070320129395,
      "learning_rate": 1.249704035733022e-06,
      "loss": 0.3937,
      "step": 2824
    },
    {
      "epoch": 2.379281302638967,
      "grad_norm": 0.3387082815170288,
      "learning_rate": 1.2464633962690304e-06,
      "loss": 0.4032,
      "step": 2825
    },
    {
      "epoch": 2.3801235261089277,
      "grad_norm": 0.3070423901081085,
      "learning_rate": 1.243226365543026e-06,
      "loss": 0.4125,
      "step": 2826
    },
    {
      "epoch": 2.3809657495788885,
      "grad_norm": 0.30641090869903564,
      "learning_rate": 1.239992946667165e-06,
      "loss": 0.3953,
      "step": 2827
    },
    {
      "epoch": 2.381807973048849,
      "grad_norm": 0.309529572725296,
      "learning_rate": 1.2367631427501308e-06,
      "loss": 0.3985,
      "step": 2828
    },
    {
      "epoch": 2.3826501965188096,
      "grad_norm": 0.3128246068954468,
      "learning_rate": 1.2335369568971362e-06,
      "loss": 0.4013,
      "step": 2829
    },
    {
      "epoch": 2.3834924199887704,
      "grad_norm": 0.30300572514533997,
      "learning_rate": 1.2303143922099092e-06,
      "loss": 0.428,
      "step": 2830
    },
    {
      "epoch": 2.384334643458731,
      "grad_norm": 0.3170304298400879,
      "learning_rate": 1.2270954517867e-06,
      "loss": 0.3884,
      "step": 2831
    },
    {
      "epoch": 2.385176866928692,
      "grad_norm": 0.30888524651527405,
      "learning_rate": 1.2238801387222716e-06,
      "loss": 0.3764,
      "step": 2832
    },
    {
      "epoch": 2.3860190903986522,
      "grad_norm": 0.30731016397476196,
      "learning_rate": 1.2206684561079035e-06,
      "loss": 0.3529,
      "step": 2833
    },
    {
      "epoch": 2.386861313868613,
      "grad_norm": 0.3136957883834839,
      "learning_rate": 1.2174604070313811e-06,
      "loss": 0.371,
      "step": 2834
    },
    {
      "epoch": 2.3877035373385738,
      "grad_norm": 0.2912660539150238,
      "learning_rate": 1.2142559945769995e-06,
      "loss": 0.3792,
      "step": 2835
    },
    {
      "epoch": 2.3885457608085345,
      "grad_norm": 0.2876724898815155,
      "learning_rate": 1.211055221825554e-06,
      "loss": 0.4283,
      "step": 2836
    },
    {
      "epoch": 2.3893879842784953,
      "grad_norm": 0.3053809106349945,
      "learning_rate": 1.207858091854342e-06,
      "loss": 0.4355,
      "step": 2837
    },
    {
      "epoch": 2.390230207748456,
      "grad_norm": 0.290591299533844,
      "learning_rate": 1.2046646077371615e-06,
      "loss": 0.3498,
      "step": 2838
    },
    {
      "epoch": 2.391072431218417,
      "grad_norm": 0.31885483860969543,
      "learning_rate": 1.2014747725443004e-06,
      "loss": 0.3936,
      "step": 2839
    },
    {
      "epoch": 2.391914654688377,
      "grad_norm": 0.30447638034820557,
      "learning_rate": 1.1982885893425455e-06,
      "loss": 0.428,
      "step": 2840
    },
    {
      "epoch": 2.392756878158338,
      "grad_norm": 0.281002402305603,
      "learning_rate": 1.1951060611951615e-06,
      "loss": 0.3767,
      "step": 2841
    },
    {
      "epoch": 2.3935991016282987,
      "grad_norm": 0.3152581751346588,
      "learning_rate": 1.1919271911619106e-06,
      "loss": 0.3952,
      "step": 2842
    },
    {
      "epoch": 2.3944413250982595,
      "grad_norm": 0.3094266653060913,
      "learning_rate": 1.1887519822990296e-06,
      "loss": 0.4065,
      "step": 2843
    },
    {
      "epoch": 2.3952835485682202,
      "grad_norm": 0.29169324040412903,
      "learning_rate": 1.185580437659241e-06,
      "loss": 0.3786,
      "step": 2844
    },
    {
      "epoch": 2.3961257720381806,
      "grad_norm": 0.3131087124347687,
      "learning_rate": 1.1824125602917414e-06,
      "loss": 0.4091,
      "step": 2845
    },
    {
      "epoch": 2.3969679955081413,
      "grad_norm": 0.3096350431442261,
      "learning_rate": 1.1792483532422021e-06,
      "loss": 0.3833,
      "step": 2846
    },
    {
      "epoch": 2.397810218978102,
      "grad_norm": 0.3159383237361908,
      "learning_rate": 1.1760878195527642e-06,
      "loss": 0.4174,
      "step": 2847
    },
    {
      "epoch": 2.398652442448063,
      "grad_norm": 0.29435330629348755,
      "learning_rate": 1.1729309622620422e-06,
      "loss": 0.3574,
      "step": 2848
    },
    {
      "epoch": 2.3994946659180236,
      "grad_norm": 0.3100670576095581,
      "learning_rate": 1.1697777844051105e-06,
      "loss": 0.3656,
      "step": 2849
    },
    {
      "epoch": 2.4003368893879844,
      "grad_norm": 0.3192143738269806,
      "learning_rate": 1.1666282890135083e-06,
      "loss": 0.4077,
      "step": 2850
    },
    {
      "epoch": 2.401179112857945,
      "grad_norm": 0.30785679817199707,
      "learning_rate": 1.1634824791152334e-06,
      "loss": 0.4177,
      "step": 2851
    },
    {
      "epoch": 2.4020213363279055,
      "grad_norm": 0.30079424381256104,
      "learning_rate": 1.1603403577347434e-06,
      "loss": 0.3489,
      "step": 2852
    },
    {
      "epoch": 2.4028635597978663,
      "grad_norm": 0.29950305819511414,
      "learning_rate": 1.1572019278929457e-06,
      "loss": 0.3856,
      "step": 2853
    },
    {
      "epoch": 2.403705783267827,
      "grad_norm": 0.2962183952331543,
      "learning_rate": 1.1540671926072012e-06,
      "loss": 0.4087,
      "step": 2854
    },
    {
      "epoch": 2.404548006737788,
      "grad_norm": 0.30312973260879517,
      "learning_rate": 1.1509361548913151e-06,
      "loss": 0.4088,
      "step": 2855
    },
    {
      "epoch": 2.4053902302077486,
      "grad_norm": 0.2955353558063507,
      "learning_rate": 1.147808817755544e-06,
      "loss": 0.3787,
      "step": 2856
    },
    {
      "epoch": 2.4062324536777093,
      "grad_norm": 0.3006346821784973,
      "learning_rate": 1.1446851842065804e-06,
      "loss": 0.3961,
      "step": 2857
    },
    {
      "epoch": 2.40707467714767,
      "grad_norm": 0.3063647449016571,
      "learning_rate": 1.1415652572475628e-06,
      "loss": 0.3745,
      "step": 2858
    },
    {
      "epoch": 2.4079169006176304,
      "grad_norm": 0.31248560547828674,
      "learning_rate": 1.1384490398780563e-06,
      "loss": 0.4119,
      "step": 2859
    },
    {
      "epoch": 2.408759124087591,
      "grad_norm": 0.31569498777389526,
      "learning_rate": 1.1353365350940688e-06,
      "loss": 0.3621,
      "step": 2860
    },
    {
      "epoch": 2.409601347557552,
      "grad_norm": 0.29618898034095764,
      "learning_rate": 1.1322277458880337e-06,
      "loss": 0.3834,
      "step": 2861
    },
    {
      "epoch": 2.4104435710275127,
      "grad_norm": 0.2961718440055847,
      "learning_rate": 1.129122675248816e-06,
      "loss": 0.4028,
      "step": 2862
    },
    {
      "epoch": 2.4112857944974735,
      "grad_norm": 0.277557373046875,
      "learning_rate": 1.1260213261617015e-06,
      "loss": 0.3726,
      "step": 2863
    },
    {
      "epoch": 2.412128017967434,
      "grad_norm": 0.333768367767334,
      "learning_rate": 1.1229237016084005e-06,
      "loss": 0.4347,
      "step": 2864
    },
    {
      "epoch": 2.4129702414373946,
      "grad_norm": 0.2747149169445038,
      "learning_rate": 1.1198298045670402e-06,
      "loss": 0.3498,
      "step": 2865
    },
    {
      "epoch": 2.4138124649073553,
      "grad_norm": 0.3095918297767639,
      "learning_rate": 1.116739638012168e-06,
      "loss": 0.39,
      "step": 2866
    },
    {
      "epoch": 2.414654688377316,
      "grad_norm": 0.30287206172943115,
      "learning_rate": 1.113653204914742e-06,
      "loss": 0.3921,
      "step": 2867
    },
    {
      "epoch": 2.415496911847277,
      "grad_norm": 0.2812071442604065,
      "learning_rate": 1.1105705082421303e-06,
      "loss": 0.3819,
      "step": 2868
    },
    {
      "epoch": 2.4163391353172377,
      "grad_norm": 0.3028387427330017,
      "learning_rate": 1.1074915509581086e-06,
      "loss": 0.3834,
      "step": 2869
    },
    {
      "epoch": 2.4171813587871984,
      "grad_norm": 0.3085595965385437,
      "learning_rate": 1.104416336022861e-06,
      "loss": 0.4062,
      "step": 2870
    },
    {
      "epoch": 2.4180235822571587,
      "grad_norm": 0.3044005334377289,
      "learning_rate": 1.1013448663929704e-06,
      "loss": 0.4239,
      "step": 2871
    },
    {
      "epoch": 2.4188658057271195,
      "grad_norm": 0.2942149341106415,
      "learning_rate": 1.0982771450214197e-06,
      "loss": 0.4002,
      "step": 2872
    },
    {
      "epoch": 2.4197080291970803,
      "grad_norm": 0.29830029606819153,
      "learning_rate": 1.0952131748575855e-06,
      "loss": 0.3679,
      "step": 2873
    },
    {
      "epoch": 2.420550252667041,
      "grad_norm": 0.31245356798171997,
      "learning_rate": 1.0921529588472446e-06,
      "loss": 0.3892,
      "step": 2874
    },
    {
      "epoch": 2.421392476137002,
      "grad_norm": 0.32598012685775757,
      "learning_rate": 1.0890964999325566e-06,
      "loss": 0.3898,
      "step": 2875
    },
    {
      "epoch": 2.422234699606962,
      "grad_norm": 0.2951027452945709,
      "learning_rate": 1.0860438010520773e-06,
      "loss": 0.3575,
      "step": 2876
    },
    {
      "epoch": 2.423076923076923,
      "grad_norm": 0.31239113211631775,
      "learning_rate": 1.0829948651407374e-06,
      "loss": 0.4011,
      "step": 2877
    },
    {
      "epoch": 2.4239191465468837,
      "grad_norm": 0.2757001519203186,
      "learning_rate": 1.0799496951298595e-06,
      "loss": 0.3733,
      "step": 2878
    },
    {
      "epoch": 2.4247613700168444,
      "grad_norm": 0.2911476492881775,
      "learning_rate": 1.0769082939471382e-06,
      "loss": 0.3941,
      "step": 2879
    },
    {
      "epoch": 2.425603593486805,
      "grad_norm": 0.30155235528945923,
      "learning_rate": 1.0738706645166508e-06,
      "loss": 0.4062,
      "step": 2880
    },
    {
      "epoch": 2.426445816956766,
      "grad_norm": 0.2781033515930176,
      "learning_rate": 1.0708368097588435e-06,
      "loss": 0.388,
      "step": 2881
    },
    {
      "epoch": 2.4272880404267267,
      "grad_norm": 0.30575472116470337,
      "learning_rate": 1.0678067325905362e-06,
      "loss": 0.4055,
      "step": 2882
    },
    {
      "epoch": 2.428130263896687,
      "grad_norm": 0.3014995753765106,
      "learning_rate": 1.0647804359249143e-06,
      "loss": 0.3769,
      "step": 2883
    },
    {
      "epoch": 2.428972487366648,
      "grad_norm": 0.28821301460266113,
      "learning_rate": 1.0617579226715324e-06,
      "loss": 0.3648,
      "step": 2884
    },
    {
      "epoch": 2.4298147108366086,
      "grad_norm": 0.3214152157306671,
      "learning_rate": 1.0587391957363053e-06,
      "loss": 0.4408,
      "step": 2885
    },
    {
      "epoch": 2.4306569343065694,
      "grad_norm": 0.27706223726272583,
      "learning_rate": 1.0557242580215066e-06,
      "loss": 0.3835,
      "step": 2886
    },
    {
      "epoch": 2.43149915777653,
      "grad_norm": 0.29877641797065735,
      "learning_rate": 1.0527131124257677e-06,
      "loss": 0.3571,
      "step": 2887
    },
    {
      "epoch": 2.432341381246491,
      "grad_norm": 0.2910982668399811,
      "learning_rate": 1.0497057618440765e-06,
      "loss": 0.3809,
      "step": 2888
    },
    {
      "epoch": 2.4331836047164517,
      "grad_norm": 0.30788475275039673,
      "learning_rate": 1.0467022091677692e-06,
      "loss": 0.3873,
      "step": 2889
    },
    {
      "epoch": 2.434025828186412,
      "grad_norm": 0.3104749023914337,
      "learning_rate": 1.0437024572845317e-06,
      "loss": 0.3832,
      "step": 2890
    },
    {
      "epoch": 2.4348680516563728,
      "grad_norm": 0.31269872188568115,
      "learning_rate": 1.040706509078394e-06,
      "loss": 0.403,
      "step": 2891
    },
    {
      "epoch": 2.4357102751263335,
      "grad_norm": 0.3350343704223633,
      "learning_rate": 1.037714367429734e-06,
      "loss": 0.4075,
      "step": 2892
    },
    {
      "epoch": 2.4365524985962943,
      "grad_norm": 0.2805357277393341,
      "learning_rate": 1.0347260352152644e-06,
      "loss": 0.3532,
      "step": 2893
    },
    {
      "epoch": 2.437394722066255,
      "grad_norm": 0.29471975564956665,
      "learning_rate": 1.0317415153080406e-06,
      "loss": 0.409,
      "step": 2894
    },
    {
      "epoch": 2.4382369455362154,
      "grad_norm": 0.296909898519516,
      "learning_rate": 1.0287608105774456e-06,
      "loss": 0.4209,
      "step": 2895
    },
    {
      "epoch": 2.439079169006176,
      "grad_norm": 0.3039299547672272,
      "learning_rate": 1.025783923889202e-06,
      "loss": 0.3653,
      "step": 2896
    },
    {
      "epoch": 2.439921392476137,
      "grad_norm": 0.30187898874282837,
      "learning_rate": 1.0228108581053565e-06,
      "loss": 0.4159,
      "step": 2897
    },
    {
      "epoch": 2.4407636159460977,
      "grad_norm": 0.2929585874080658,
      "learning_rate": 1.019841616084286e-06,
      "loss": 0.3813,
      "step": 2898
    },
    {
      "epoch": 2.4416058394160585,
      "grad_norm": 0.2924494445323944,
      "learning_rate": 1.0168762006806886e-06,
      "loss": 0.3643,
      "step": 2899
    },
    {
      "epoch": 2.4424480628860192,
      "grad_norm": 0.3118704557418823,
      "learning_rate": 1.0139146147455842e-06,
      "loss": 0.4059,
      "step": 2900
    },
    {
      "epoch": 2.44329028635598,
      "grad_norm": 0.30575382709503174,
      "learning_rate": 1.0109568611263094e-06,
      "loss": 0.3598,
      "step": 2901
    },
    {
      "epoch": 2.4441325098259403,
      "grad_norm": 0.2921285927295685,
      "learning_rate": 1.0080029426665194e-06,
      "loss": 0.3665,
      "step": 2902
    },
    {
      "epoch": 2.444974733295901,
      "grad_norm": 0.30365264415740967,
      "learning_rate": 1.0050528622061805e-06,
      "loss": 0.3633,
      "step": 2903
    },
    {
      "epoch": 2.445816956765862,
      "grad_norm": 0.3068585991859436,
      "learning_rate": 1.002106622581569e-06,
      "loss": 0.4319,
      "step": 2904
    },
    {
      "epoch": 2.4466591802358226,
      "grad_norm": 0.3038271367549896,
      "learning_rate": 9.991642266252672e-07,
      "loss": 0.3548,
      "step": 2905
    },
    {
      "epoch": 2.4475014037057834,
      "grad_norm": 0.3318277895450592,
      "learning_rate": 9.96225677166166e-07,
      "loss": 0.4157,
      "step": 2906
    },
    {
      "epoch": 2.4483436271757437,
      "grad_norm": 0.2949073612689972,
      "learning_rate": 9.932909770294542e-07,
      "loss": 0.3731,
      "step": 2907
    },
    {
      "epoch": 2.4491858506457045,
      "grad_norm": 0.28573328256607056,
      "learning_rate": 9.903601290366217e-07,
      "loss": 0.3762,
      "step": 2908
    },
    {
      "epoch": 2.4500280741156653,
      "grad_norm": 0.3032919466495514,
      "learning_rate": 9.87433136005454e-07,
      "loss": 0.3884,
      "step": 2909
    },
    {
      "epoch": 2.450870297585626,
      "grad_norm": 0.31727156043052673,
      "learning_rate": 9.845100007500292e-07,
      "loss": 0.4501,
      "step": 2910
    },
    {
      "epoch": 2.451712521055587,
      "grad_norm": 0.2684254050254822,
      "learning_rate": 9.81590726080721e-07,
      "loss": 0.317,
      "step": 2911
    },
    {
      "epoch": 2.4525547445255476,
      "grad_norm": 0.31349310278892517,
      "learning_rate": 9.786753148041871e-07,
      "loss": 0.4017,
      "step": 2912
    },
    {
      "epoch": 2.4533969679955083,
      "grad_norm": 0.30170711874961853,
      "learning_rate": 9.757637697233723e-07,
      "loss": 0.3951,
      "step": 2913
    },
    {
      "epoch": 2.4542391914654687,
      "grad_norm": 0.3405531942844391,
      "learning_rate": 9.728560936375032e-07,
      "loss": 0.4038,
      "step": 2914
    },
    {
      "epoch": 2.4550814149354294,
      "grad_norm": 0.33685559034347534,
      "learning_rate": 9.699522893420894e-07,
      "loss": 0.3904,
      "step": 2915
    },
    {
      "epoch": 2.45592363840539,
      "grad_norm": 0.31055864691734314,
      "learning_rate": 9.670523596289138e-07,
      "loss": 0.3669,
      "step": 2916
    },
    {
      "epoch": 2.456765861875351,
      "grad_norm": 0.296867698431015,
      "learning_rate": 9.641563072860416e-07,
      "loss": 0.385,
      "step": 2917
    },
    {
      "epoch": 2.4576080853453117,
      "grad_norm": 0.3118743300437927,
      "learning_rate": 9.61264135097799e-07,
      "loss": 0.4035,
      "step": 2918
    },
    {
      "epoch": 2.4584503088152725,
      "grad_norm": 0.30530011653900146,
      "learning_rate": 9.58375845844793e-07,
      "loss": 0.378,
      "step": 2919
    },
    {
      "epoch": 2.4592925322852333,
      "grad_norm": 0.2869267761707306,
      "learning_rate": 9.55491442303889e-07,
      "loss": 0.387,
      "step": 2920
    },
    {
      "epoch": 2.4601347557551936,
      "grad_norm": 0.3432108759880066,
      "learning_rate": 9.526109272482237e-07,
      "loss": 0.3852,
      "step": 2921
    },
    {
      "epoch": 2.4609769792251543,
      "grad_norm": 0.3084927499294281,
      "learning_rate": 9.497343034471896e-07,
      "loss": 0.436,
      "step": 2922
    },
    {
      "epoch": 2.461819202695115,
      "grad_norm": 0.27186495065689087,
      "learning_rate": 9.468615736664405e-07,
      "loss": 0.3766,
      "step": 2923
    },
    {
      "epoch": 2.462661426165076,
      "grad_norm": 0.3263488709926605,
      "learning_rate": 9.439927406678845e-07,
      "loss": 0.4382,
      "step": 2924
    },
    {
      "epoch": 2.4635036496350367,
      "grad_norm": 0.29395371675491333,
      "learning_rate": 9.41127807209688e-07,
      "loss": 0.4123,
      "step": 2925
    },
    {
      "epoch": 2.464345873104997,
      "grad_norm": 0.34496888518333435,
      "learning_rate": 9.382667760462633e-07,
      "loss": 0.3272,
      "step": 2926
    },
    {
      "epoch": 2.4651880965749577,
      "grad_norm": 0.3057279884815216,
      "learning_rate": 9.354096499282728e-07,
      "loss": 0.4185,
      "step": 2927
    },
    {
      "epoch": 2.4660303200449185,
      "grad_norm": 0.2811923921108246,
      "learning_rate": 9.325564316026236e-07,
      "loss": 0.3365,
      "step": 2928
    },
    {
      "epoch": 2.4668725435148793,
      "grad_norm": 0.331850528717041,
      "learning_rate": 9.297071238124683e-07,
      "loss": 0.3978,
      "step": 2929
    },
    {
      "epoch": 2.46771476698484,
      "grad_norm": 0.3954698443412781,
      "learning_rate": 9.268617292971982e-07,
      "loss": 0.4123,
      "step": 2930
    },
    {
      "epoch": 2.468556990454801,
      "grad_norm": 0.2983318865299225,
      "learning_rate": 9.240202507924412e-07,
      "loss": 0.385,
      "step": 2931
    },
    {
      "epoch": 2.4693992139247616,
      "grad_norm": 0.30436816811561584,
      "learning_rate": 9.211826910300598e-07,
      "loss": 0.3922,
      "step": 2932
    },
    {
      "epoch": 2.470241437394722,
      "grad_norm": 0.29220616817474365,
      "learning_rate": 9.183490527381539e-07,
      "loss": 0.417,
      "step": 2933
    },
    {
      "epoch": 2.4710836608646827,
      "grad_norm": 0.27598142623901367,
      "learning_rate": 9.155193386410466e-07,
      "loss": 0.3814,
      "step": 2934
    },
    {
      "epoch": 2.4719258843346434,
      "grad_norm": 0.2866694927215576,
      "learning_rate": 9.126935514592949e-07,
      "loss": 0.3843,
      "step": 2935
    },
    {
      "epoch": 2.472768107804604,
      "grad_norm": 0.3091675043106079,
      "learning_rate": 9.098716939096719e-07,
      "loss": 0.3952,
      "step": 2936
    },
    {
      "epoch": 2.473610331274565,
      "grad_norm": 0.31456586718559265,
      "learning_rate": 9.070537687051817e-07,
      "loss": 0.4447,
      "step": 2937
    },
    {
      "epoch": 2.4744525547445253,
      "grad_norm": 0.31526079773902893,
      "learning_rate": 9.042397785550405e-07,
      "loss": 0.3947,
      "step": 2938
    },
    {
      "epoch": 2.475294778214486,
      "grad_norm": 0.31181979179382324,
      "learning_rate": 9.014297261646876e-07,
      "loss": 0.386,
      "step": 2939
    },
    {
      "epoch": 2.476137001684447,
      "grad_norm": 0.30453747510910034,
      "learning_rate": 8.986236142357707e-07,
      "loss": 0.367,
      "step": 2940
    },
    {
      "epoch": 2.4769792251544076,
      "grad_norm": 0.3037295937538147,
      "learning_rate": 8.958214454661529e-07,
      "loss": 0.4205,
      "step": 2941
    },
    {
      "epoch": 2.4778214486243684,
      "grad_norm": 0.302885502576828,
      "learning_rate": 8.930232225499025e-07,
      "loss": 0.4057,
      "step": 2942
    },
    {
      "epoch": 2.478663672094329,
      "grad_norm": 0.30855268239974976,
      "learning_rate": 8.902289481772996e-07,
      "loss": 0.3934,
      "step": 2943
    },
    {
      "epoch": 2.47950589556429,
      "grad_norm": 0.301862508058548,
      "learning_rate": 8.874386250348232e-07,
      "loss": 0.3954,
      "step": 2944
    },
    {
      "epoch": 2.4803481190342502,
      "grad_norm": 0.28800830245018005,
      "learning_rate": 8.846522558051563e-07,
      "loss": 0.401,
      "step": 2945
    },
    {
      "epoch": 2.481190342504211,
      "grad_norm": 0.3113436996936798,
      "learning_rate": 8.818698431671774e-07,
      "loss": 0.4023,
      "step": 2946
    },
    {
      "epoch": 2.4820325659741718,
      "grad_norm": 0.3344839811325073,
      "learning_rate": 8.790913897959663e-07,
      "loss": 0.3703,
      "step": 2947
    },
    {
      "epoch": 2.4828747894441325,
      "grad_norm": 0.33744433522224426,
      "learning_rate": 8.763168983627912e-07,
      "loss": 0.4065,
      "step": 2948
    },
    {
      "epoch": 2.4837170129140933,
      "grad_norm": 0.30251064896583557,
      "learning_rate": 8.735463715351139e-07,
      "loss": 0.3543,
      "step": 2949
    },
    {
      "epoch": 2.484559236384054,
      "grad_norm": 0.32029077410697937,
      "learning_rate": 8.70779811976582e-07,
      "loss": 0.4094,
      "step": 2950
    },
    {
      "epoch": 2.485401459854015,
      "grad_norm": 0.30513691902160645,
      "learning_rate": 8.680172223470329e-07,
      "loss": 0.3933,
      "step": 2951
    },
    {
      "epoch": 2.486243683323975,
      "grad_norm": 0.2997528910636902,
      "learning_rate": 8.652586053024836e-07,
      "loss": 0.3748,
      "step": 2952
    },
    {
      "epoch": 2.487085906793936,
      "grad_norm": 0.34260571002960205,
      "learning_rate": 8.625039634951354e-07,
      "loss": 0.4124,
      "step": 2953
    },
    {
      "epoch": 2.4879281302638967,
      "grad_norm": 0.2949962019920349,
      "learning_rate": 8.597532995733615e-07,
      "loss": 0.3495,
      "step": 2954
    },
    {
      "epoch": 2.4887703537338575,
      "grad_norm": 0.30550286173820496,
      "learning_rate": 8.570066161817176e-07,
      "loss": 0.3985,
      "step": 2955
    },
    {
      "epoch": 2.4896125772038182,
      "grad_norm": 0.3095935583114624,
      "learning_rate": 8.542639159609278e-07,
      "loss": 0.3887,
      "step": 2956
    },
    {
      "epoch": 2.4904548006737786,
      "grad_norm": 0.28934234380722046,
      "learning_rate": 8.515252015478915e-07,
      "loss": 0.3757,
      "step": 2957
    },
    {
      "epoch": 2.4912970241437393,
      "grad_norm": 0.3104151487350464,
      "learning_rate": 8.487904755756676e-07,
      "loss": 0.3971,
      "step": 2958
    },
    {
      "epoch": 2.4921392476137,
      "grad_norm": 0.2909107804298401,
      "learning_rate": 8.460597406734905e-07,
      "loss": 0.39,
      "step": 2959
    },
    {
      "epoch": 2.492981471083661,
      "grad_norm": 0.3045305609703064,
      "learning_rate": 8.433329994667488e-07,
      "loss": 0.3814,
      "step": 2960
    },
    {
      "epoch": 2.4938236945536216,
      "grad_norm": 0.3271944522857666,
      "learning_rate": 8.406102545769989e-07,
      "loss": 0.4229,
      "step": 2961
    },
    {
      "epoch": 2.4946659180235824,
      "grad_norm": 0.27755120396614075,
      "learning_rate": 8.378915086219497e-07,
      "loss": 0.3426,
      "step": 2962
    },
    {
      "epoch": 2.495508141493543,
      "grad_norm": 0.3123335540294647,
      "learning_rate": 8.351767642154673e-07,
      "loss": 0.3973,
      "step": 2963
    },
    {
      "epoch": 2.4963503649635035,
      "grad_norm": 0.31231802701950073,
      "learning_rate": 8.324660239675697e-07,
      "loss": 0.3975,
      "step": 2964
    },
    {
      "epoch": 2.4971925884334643,
      "grad_norm": 0.28957825899124146,
      "learning_rate": 8.297592904844282e-07,
      "loss": 0.3657,
      "step": 2965
    },
    {
      "epoch": 2.498034811903425,
      "grad_norm": 0.31576716899871826,
      "learning_rate": 8.270565663683583e-07,
      "loss": 0.3862,
      "step": 2966
    },
    {
      "epoch": 2.498877035373386,
      "grad_norm": 0.3018133342266083,
      "learning_rate": 8.243578542178227e-07,
      "loss": 0.3566,
      "step": 2967
    },
    {
      "epoch": 2.4997192588433466,
      "grad_norm": 0.32084065675735474,
      "learning_rate": 8.216631566274252e-07,
      "loss": 0.388,
      "step": 2968
    },
    {
      "epoch": 2.500561482313307,
      "grad_norm": 0.3015078604221344,
      "learning_rate": 8.189724761879131e-07,
      "loss": 0.4133,
      "step": 2969
    },
    {
      "epoch": 2.501403705783268,
      "grad_norm": 0.3065216541290283,
      "learning_rate": 8.16285815486168e-07,
      "loss": 0.3962,
      "step": 2970
    },
    {
      "epoch": 2.5022459292532284,
      "grad_norm": 0.29474735260009766,
      "learning_rate": 8.13603177105211e-07,
      "loss": 0.4094,
      "step": 2971
    },
    {
      "epoch": 2.503088152723189,
      "grad_norm": 0.3073674142360687,
      "learning_rate": 8.109245636241892e-07,
      "loss": 0.4,
      "step": 2972
    },
    {
      "epoch": 2.50393037619315,
      "grad_norm": 0.2732267379760742,
      "learning_rate": 8.082499776183883e-07,
      "loss": 0.3194,
      "step": 2973
    },
    {
      "epoch": 2.5047725996631107,
      "grad_norm": 0.3273344337940216,
      "learning_rate": 8.05579421659215e-07,
      "loss": 0.4361,
      "step": 2974
    },
    {
      "epoch": 2.5056148231330715,
      "grad_norm": 0.3004530072212219,
      "learning_rate": 8.029128983142076e-07,
      "loss": 0.3729,
      "step": 2975
    },
    {
      "epoch": 2.506457046603032,
      "grad_norm": 0.2926609516143799,
      "learning_rate": 8.002504101470204e-07,
      "loss": 0.3546,
      "step": 2976
    },
    {
      "epoch": 2.5072992700729926,
      "grad_norm": 0.2894236147403717,
      "learning_rate": 7.975919597174342e-07,
      "loss": 0.3772,
      "step": 2977
    },
    {
      "epoch": 2.5081414935429533,
      "grad_norm": 0.29511478543281555,
      "learning_rate": 7.949375495813439e-07,
      "loss": 0.3823,
      "step": 2978
    },
    {
      "epoch": 2.508983717012914,
      "grad_norm": 0.28661513328552246,
      "learning_rate": 7.922871822907641e-07,
      "loss": 0.3797,
      "step": 2979
    },
    {
      "epoch": 2.509825940482875,
      "grad_norm": 0.26862290501594543,
      "learning_rate": 7.896408603938194e-07,
      "loss": 0.3472,
      "step": 2980
    },
    {
      "epoch": 2.5106681639528357,
      "grad_norm": 0.32024142146110535,
      "learning_rate": 7.869985864347424e-07,
      "loss": 0.4356,
      "step": 2981
    },
    {
      "epoch": 2.5115103874227964,
      "grad_norm": 0.29275330901145935,
      "learning_rate": 7.843603629538804e-07,
      "loss": 0.4065,
      "step": 2982
    },
    {
      "epoch": 2.5123526108927567,
      "grad_norm": 0.2851814925670624,
      "learning_rate": 7.817261924876812e-07,
      "loss": 0.3493,
      "step": 2983
    },
    {
      "epoch": 2.5131948343627175,
      "grad_norm": 0.2936204671859741,
      "learning_rate": 7.790960775687001e-07,
      "loss": 0.4019,
      "step": 2984
    },
    {
      "epoch": 2.5140370578326783,
      "grad_norm": 0.32532504200935364,
      "learning_rate": 7.764700207255904e-07,
      "loss": 0.3997,
      "step": 2985
    },
    {
      "epoch": 2.514879281302639,
      "grad_norm": 0.2882044017314911,
      "learning_rate": 7.738480244831042e-07,
      "loss": 0.3956,
      "step": 2986
    },
    {
      "epoch": 2.5157215047726,
      "grad_norm": 0.284999817609787,
      "learning_rate": 7.71230091362089e-07,
      "loss": 0.3894,
      "step": 2987
    },
    {
      "epoch": 2.51656372824256,
      "grad_norm": 0.2829105854034424,
      "learning_rate": 7.686162238794898e-07,
      "loss": 0.3696,
      "step": 2988
    },
    {
      "epoch": 2.517405951712521,
      "grad_norm": 0.30537793040275574,
      "learning_rate": 7.660064245483384e-07,
      "loss": 0.3937,
      "step": 2989
    },
    {
      "epoch": 2.5182481751824817,
      "grad_norm": 0.2793121337890625,
      "learning_rate": 7.634006958777568e-07,
      "loss": 0.3731,
      "step": 2990
    },
    {
      "epoch": 2.5190903986524424,
      "grad_norm": 0.29884031414985657,
      "learning_rate": 7.607990403729526e-07,
      "loss": 0.4013,
      "step": 2991
    },
    {
      "epoch": 2.519932622122403,
      "grad_norm": 0.28855738043785095,
      "learning_rate": 7.582014605352206e-07,
      "loss": 0.3728,
      "step": 2992
    },
    {
      "epoch": 2.520774845592364,
      "grad_norm": 0.29164955019950867,
      "learning_rate": 7.556079588619341e-07,
      "loss": 0.3883,
      "step": 2993
    },
    {
      "epoch": 2.5216170690623247,
      "grad_norm": 0.2730679512023926,
      "learning_rate": 7.530185378465459e-07,
      "loss": 0.3625,
      "step": 2994
    },
    {
      "epoch": 2.522459292532285,
      "grad_norm": 0.3318729102611542,
      "learning_rate": 7.504331999785852e-07,
      "loss": 0.4187,
      "step": 2995
    },
    {
      "epoch": 2.523301516002246,
      "grad_norm": 0.2912776470184326,
      "learning_rate": 7.47851947743658e-07,
      "loss": 0.3673,
      "step": 2996
    },
    {
      "epoch": 2.5241437394722066,
      "grad_norm": 0.3005622625350952,
      "learning_rate": 7.452747836234392e-07,
      "loss": 0.3904,
      "step": 2997
    },
    {
      "epoch": 2.5249859629421674,
      "grad_norm": 0.27534130215644836,
      "learning_rate": 7.427017100956779e-07,
      "loss": 0.3363,
      "step": 2998
    },
    {
      "epoch": 2.525828186412128,
      "grad_norm": 0.3041474521160126,
      "learning_rate": 7.401327296341826e-07,
      "loss": 0.3815,
      "step": 2999
    },
    {
      "epoch": 2.5266704098820885,
      "grad_norm": 0.3153543174266815,
      "learning_rate": 7.375678447088347e-07,
      "loss": 0.4161,
      "step": 3000
    },
    {
      "epoch": 2.5275126333520497,
      "grad_norm": 0.29368677735328674,
      "learning_rate": 7.350070577855716e-07,
      "loss": 0.4007,
      "step": 3001
    },
    {
      "epoch": 2.52835485682201,
      "grad_norm": 0.30028706789016724,
      "learning_rate": 7.324503713263975e-07,
      "loss": 0.3951,
      "step": 3002
    },
    {
      "epoch": 2.5291970802919708,
      "grad_norm": 0.2897226810455322,
      "learning_rate": 7.298977877893688e-07,
      "loss": 0.4017,
      "step": 3003
    },
    {
      "epoch": 2.5300393037619315,
      "grad_norm": 0.2840642035007477,
      "learning_rate": 7.273493096285989e-07,
      "loss": 0.3779,
      "step": 3004
    },
    {
      "epoch": 2.5308815272318923,
      "grad_norm": 0.30169549584388733,
      "learning_rate": 7.24804939294253e-07,
      "loss": 0.369,
      "step": 3005
    },
    {
      "epoch": 2.531723750701853,
      "grad_norm": 0.2769645154476166,
      "learning_rate": 7.222646792325516e-07,
      "loss": 0.3761,
      "step": 3006
    },
    {
      "epoch": 2.5325659741718134,
      "grad_norm": 0.3068864047527313,
      "learning_rate": 7.197285318857584e-07,
      "loss": 0.4157,
      "step": 3007
    },
    {
      "epoch": 2.533408197641774,
      "grad_norm": 0.3362139165401459,
      "learning_rate": 7.171964996921848e-07,
      "loss": 0.4031,
      "step": 3008
    },
    {
      "epoch": 2.534250421111735,
      "grad_norm": 0.2942652106285095,
      "learning_rate": 7.146685850861851e-07,
      "loss": 0.3875,
      "step": 3009
    },
    {
      "epoch": 2.5350926445816957,
      "grad_norm": 0.31760233640670776,
      "learning_rate": 7.121447904981571e-07,
      "loss": 0.4139,
      "step": 3010
    },
    {
      "epoch": 2.5359348680516565,
      "grad_norm": 0.2902313768863678,
      "learning_rate": 7.096251183545355e-07,
      "loss": 0.3798,
      "step": 3011
    },
    {
      "epoch": 2.5367770915216172,
      "grad_norm": 0.30539512634277344,
      "learning_rate": 7.071095710777925e-07,
      "loss": 0.4149,
      "step": 3012
    },
    {
      "epoch": 2.537619314991578,
      "grad_norm": 0.32333505153656006,
      "learning_rate": 7.045981510864319e-07,
      "loss": 0.4317,
      "step": 3013
    },
    {
      "epoch": 2.5384615384615383,
      "grad_norm": 0.2925693094730377,
      "learning_rate": 7.02090860794995e-07,
      "loss": 0.3915,
      "step": 3014
    },
    {
      "epoch": 2.539303761931499,
      "grad_norm": 0.27622848749160767,
      "learning_rate": 6.995877026140468e-07,
      "loss": 0.3953,
      "step": 3015
    },
    {
      "epoch": 2.54014598540146,
      "grad_norm": 0.28096166253089905,
      "learning_rate": 6.970886789501851e-07,
      "loss": 0.3629,
      "step": 3016
    },
    {
      "epoch": 2.5409882088714206,
      "grad_norm": 0.3282982110977173,
      "learning_rate": 6.945937922060259e-07,
      "loss": 0.4301,
      "step": 3017
    },
    {
      "epoch": 2.5418304323413814,
      "grad_norm": 0.31540486216545105,
      "learning_rate": 6.921030447802146e-07,
      "loss": 0.3957,
      "step": 3018
    },
    {
      "epoch": 2.5426726558113417,
      "grad_norm": 0.31668004393577576,
      "learning_rate": 6.896164390674125e-07,
      "loss": 0.3891,
      "step": 3019
    },
    {
      "epoch": 2.5435148792813025,
      "grad_norm": 0.29793673753738403,
      "learning_rate": 6.871339774583025e-07,
      "loss": 0.3741,
      "step": 3020
    },
    {
      "epoch": 2.5443571027512633,
      "grad_norm": 0.29583069682121277,
      "learning_rate": 6.846556623395795e-07,
      "loss": 0.4056,
      "step": 3021
    },
    {
      "epoch": 2.545199326221224,
      "grad_norm": 0.293761283159256,
      "learning_rate": 6.821814960939549e-07,
      "loss": 0.3876,
      "step": 3022
    },
    {
      "epoch": 2.546041549691185,
      "grad_norm": 0.5133453607559204,
      "learning_rate": 6.797114811001482e-07,
      "loss": 0.3571,
      "step": 3023
    },
    {
      "epoch": 2.5468837731611456,
      "grad_norm": 0.3155250549316406,
      "learning_rate": 6.772456197328919e-07,
      "loss": 0.4378,
      "step": 3024
    },
    {
      "epoch": 2.5477259966311063,
      "grad_norm": 0.2979055345058441,
      "learning_rate": 6.74783914362922e-07,
      "loss": 0.3904,
      "step": 3025
    },
    {
      "epoch": 2.5485682201010667,
      "grad_norm": 0.28110602498054504,
      "learning_rate": 6.723263673569796e-07,
      "loss": 0.3436,
      "step": 3026
    },
    {
      "epoch": 2.5494104435710274,
      "grad_norm": 0.30146723985671997,
      "learning_rate": 6.698729810778065e-07,
      "loss": 0.3919,
      "step": 3027
    },
    {
      "epoch": 2.550252667040988,
      "grad_norm": 0.30941635370254517,
      "learning_rate": 6.674237578841486e-07,
      "loss": 0.4264,
      "step": 3028
    },
    {
      "epoch": 2.551094890510949,
      "grad_norm": 0.29012003540992737,
      "learning_rate": 6.649787001307451e-07,
      "loss": 0.3403,
      "step": 3029
    },
    {
      "epoch": 2.5519371139809097,
      "grad_norm": 0.30784356594085693,
      "learning_rate": 6.625378101683317e-07,
      "loss": 0.3955,
      "step": 3030
    },
    {
      "epoch": 2.55277933745087,
      "grad_norm": 0.29527172446250916,
      "learning_rate": 6.601010903436355e-07,
      "loss": 0.379,
      "step": 3031
    },
    {
      "epoch": 2.5536215609208313,
      "grad_norm": 0.2922913432121277,
      "learning_rate": 6.57668542999379e-07,
      "loss": 0.3632,
      "step": 3032
    },
    {
      "epoch": 2.5544637843907916,
      "grad_norm": 0.3036286532878876,
      "learning_rate": 6.552401704742678e-07,
      "loss": 0.3842,
      "step": 3033
    },
    {
      "epoch": 2.5553060078607523,
      "grad_norm": 0.31427401304244995,
      "learning_rate": 6.528159751029988e-07,
      "loss": 0.446,
      "step": 3034
    },
    {
      "epoch": 2.556148231330713,
      "grad_norm": 0.30401694774627686,
      "learning_rate": 6.503959592162468e-07,
      "loss": 0.3834,
      "step": 3035
    },
    {
      "epoch": 2.556990454800674,
      "grad_norm": 0.3343844413757324,
      "learning_rate": 6.479801251406748e-07,
      "loss": 0.4675,
      "step": 3036
    },
    {
      "epoch": 2.5578326782706347,
      "grad_norm": 0.28221985697746277,
      "learning_rate": 6.455684751989194e-07,
      "loss": 0.3873,
      "step": 3037
    },
    {
      "epoch": 2.558674901740595,
      "grad_norm": 0.3119698464870453,
      "learning_rate": 6.431610117095999e-07,
      "loss": 0.4035,
      "step": 3038
    },
    {
      "epoch": 2.5595171252105557,
      "grad_norm": 0.30893653631210327,
      "learning_rate": 6.40757736987307e-07,
      "loss": 0.3879,
      "step": 3039
    },
    {
      "epoch": 2.5603593486805165,
      "grad_norm": 0.30957165360450745,
      "learning_rate": 6.383586533426051e-07,
      "loss": 0.3859,
      "step": 3040
    },
    {
      "epoch": 2.5612015721504773,
      "grad_norm": 0.30730268359184265,
      "learning_rate": 6.359637630820292e-07,
      "loss": 0.4098,
      "step": 3041
    },
    {
      "epoch": 2.562043795620438,
      "grad_norm": 0.29576143622398376,
      "learning_rate": 6.335730685080838e-07,
      "loss": 0.3799,
      "step": 3042
    },
    {
      "epoch": 2.562886019090399,
      "grad_norm": 0.29629090428352356,
      "learning_rate": 6.311865719192384e-07,
      "loss": 0.4204,
      "step": 3043
    },
    {
      "epoch": 2.5637282425603596,
      "grad_norm": 0.27148982882499695,
      "learning_rate": 6.28804275609926e-07,
      "loss": 0.3535,
      "step": 3044
    },
    {
      "epoch": 2.56457046603032,
      "grad_norm": 0.2988394498825073,
      "learning_rate": 6.26426181870542e-07,
      "loss": 0.3939,
      "step": 3045
    },
    {
      "epoch": 2.5654126895002807,
      "grad_norm": 0.3135700821876526,
      "learning_rate": 6.24052292987442e-07,
      "loss": 0.3894,
      "step": 3046
    },
    {
      "epoch": 2.5662549129702414,
      "grad_norm": 0.2991756796836853,
      "learning_rate": 6.216826112429391e-07,
      "loss": 0.3988,
      "step": 3047
    },
    {
      "epoch": 2.567097136440202,
      "grad_norm": 0.3013117015361786,
      "learning_rate": 6.193171389152996e-07,
      "loss": 0.3973,
      "step": 3048
    },
    {
      "epoch": 2.567939359910163,
      "grad_norm": 0.29117438197135925,
      "learning_rate": 6.169558782787438e-07,
      "loss": 0.3953,
      "step": 3049
    },
    {
      "epoch": 2.5687815833801233,
      "grad_norm": 0.3161605894565582,
      "learning_rate": 6.145988316034441e-07,
      "loss": 0.3992,
      "step": 3050
    },
    {
      "epoch": 2.569623806850084,
      "grad_norm": 0.2794417142868042,
      "learning_rate": 6.122460011555187e-07,
      "loss": 0.402,
      "step": 3051
    },
    {
      "epoch": 2.570466030320045,
      "grad_norm": 0.29506421089172363,
      "learning_rate": 6.098973891970373e-07,
      "loss": 0.3744,
      "step": 3052
    },
    {
      "epoch": 2.5713082537900056,
      "grad_norm": 0.28356239199638367,
      "learning_rate": 6.075529979860068e-07,
      "loss": 0.3653,
      "step": 3053
    },
    {
      "epoch": 2.5721504772599664,
      "grad_norm": 0.31786295771598816,
      "learning_rate": 6.052128297763804e-07,
      "loss": 0.393,
      "step": 3054
    },
    {
      "epoch": 2.572992700729927,
      "grad_norm": 0.3052363395690918,
      "learning_rate": 6.028768868180523e-07,
      "loss": 0.3924,
      "step": 3055
    },
    {
      "epoch": 2.573834924199888,
      "grad_norm": 0.28932759165763855,
      "learning_rate": 6.005451713568505e-07,
      "loss": 0.3842,
      "step": 3056
    },
    {
      "epoch": 2.5746771476698482,
      "grad_norm": 0.3112121820449829,
      "learning_rate": 5.982176856345445e-07,
      "loss": 0.3901,
      "step": 3057
    },
    {
      "epoch": 2.575519371139809,
      "grad_norm": 0.30743566155433655,
      "learning_rate": 5.958944318888287e-07,
      "loss": 0.3926,
      "step": 3058
    },
    {
      "epoch": 2.5763615946097698,
      "grad_norm": 0.32021230459213257,
      "learning_rate": 5.935754123533378e-07,
      "loss": 0.3806,
      "step": 3059
    },
    {
      "epoch": 2.5772038180797305,
      "grad_norm": 0.2895594537258148,
      "learning_rate": 5.912606292576284e-07,
      "loss": 0.3829,
      "step": 3060
    },
    {
      "epoch": 2.5780460415496913,
      "grad_norm": 0.3041628301143646,
      "learning_rate": 5.889500848271901e-07,
      "loss": 0.3584,
      "step": 3061
    },
    {
      "epoch": 2.5788882650196516,
      "grad_norm": 0.28974097967147827,
      "learning_rate": 5.866437812834325e-07,
      "loss": 0.419,
      "step": 3062
    },
    {
      "epoch": 2.579730488489613,
      "grad_norm": 0.29705438017845154,
      "learning_rate": 5.843417208436908e-07,
      "loss": 0.4142,
      "step": 3063
    },
    {
      "epoch": 2.580572711959573,
      "grad_norm": 0.2942309081554413,
      "learning_rate": 5.82043905721218e-07,
      "loss": 0.4074,
      "step": 3064
    },
    {
      "epoch": 2.581414935429534,
      "grad_norm": 0.3029472529888153,
      "learning_rate": 5.797503381251896e-07,
      "loss": 0.3938,
      "step": 3065
    },
    {
      "epoch": 2.5822571588994947,
      "grad_norm": 0.29329630732536316,
      "learning_rate": 5.774610202606939e-07,
      "loss": 0.4032,
      "step": 3066
    },
    {
      "epoch": 2.5830993823694555,
      "grad_norm": 0.30351969599723816,
      "learning_rate": 5.751759543287355e-07,
      "loss": 0.4146,
      "step": 3067
    },
    {
      "epoch": 2.5839416058394162,
      "grad_norm": 0.2931276857852936,
      "learning_rate": 5.728951425262292e-07,
      "loss": 0.3494,
      "step": 3068
    },
    {
      "epoch": 2.5847838293093766,
      "grad_norm": 0.28105103969573975,
      "learning_rate": 5.706185870460018e-07,
      "loss": 0.3765,
      "step": 3069
    },
    {
      "epoch": 2.5856260527793373,
      "grad_norm": 0.29818084836006165,
      "learning_rate": 5.683462900767873e-07,
      "loss": 0.3903,
      "step": 3070
    },
    {
      "epoch": 2.586468276249298,
      "grad_norm": 0.28325164318084717,
      "learning_rate": 5.660782538032245e-07,
      "loss": 0.3856,
      "step": 3071
    },
    {
      "epoch": 2.587310499719259,
      "grad_norm": 0.30016422271728516,
      "learning_rate": 5.63814480405856e-07,
      "loss": 0.3844,
      "step": 3072
    },
    {
      "epoch": 2.5881527231892196,
      "grad_norm": 0.2865643799304962,
      "learning_rate": 5.61554972061128e-07,
      "loss": 0.3759,
      "step": 3073
    },
    {
      "epoch": 2.5889949466591804,
      "grad_norm": 0.27734076976776123,
      "learning_rate": 5.592997309413834e-07,
      "loss": 0.3718,
      "step": 3074
    },
    {
      "epoch": 2.589837170129141,
      "grad_norm": 0.3068936765193939,
      "learning_rate": 5.570487592148666e-07,
      "loss": 0.4128,
      "step": 3075
    },
    {
      "epoch": 2.5906793935991015,
      "grad_norm": 0.27739664912223816,
      "learning_rate": 5.548020590457098e-07,
      "loss": 0.3893,
      "step": 3076
    },
    {
      "epoch": 2.5915216170690623,
      "grad_norm": 0.3018111288547516,
      "learning_rate": 5.525596325939469e-07,
      "loss": 0.4024,
      "step": 3077
    },
    {
      "epoch": 2.592363840539023,
      "grad_norm": 0.2942950427532196,
      "learning_rate": 5.503214820154979e-07,
      "loss": 0.36,
      "step": 3078
    },
    {
      "epoch": 2.593206064008984,
      "grad_norm": 0.29665935039520264,
      "learning_rate": 5.480876094621734e-07,
      "loss": 0.3776,
      "step": 3079
    },
    {
      "epoch": 2.5940482874789446,
      "grad_norm": 0.2938675880432129,
      "learning_rate": 5.458580170816713e-07,
      "loss": 0.4164,
      "step": 3080
    },
    {
      "epoch": 2.594890510948905,
      "grad_norm": 0.2764010727405548,
      "learning_rate": 5.436327070175729e-07,
      "loss": 0.3514,
      "step": 3081
    },
    {
      "epoch": 2.5957327344188657,
      "grad_norm": 0.33839163184165955,
      "learning_rate": 5.414116814093434e-07,
      "loss": 0.4242,
      "step": 3082
    },
    {
      "epoch": 2.5965749578888264,
      "grad_norm": 0.35481876134872437,
      "learning_rate": 5.391949423923298e-07,
      "loss": 0.3554,
      "step": 3083
    },
    {
      "epoch": 2.597417181358787,
      "grad_norm": 0.313858300447464,
      "learning_rate": 5.369824920977567e-07,
      "loss": 0.3696,
      "step": 3084
    },
    {
      "epoch": 2.598259404828748,
      "grad_norm": 0.3032216429710388,
      "learning_rate": 5.347743326527255e-07,
      "loss": 0.4036,
      "step": 3085
    },
    {
      "epoch": 2.5991016282987087,
      "grad_norm": 0.31062746047973633,
      "learning_rate": 5.325704661802106e-07,
      "loss": 0.416,
      "step": 3086
    },
    {
      "epoch": 2.5999438517686695,
      "grad_norm": 0.3095996081829071,
      "learning_rate": 5.303708947990638e-07,
      "loss": 0.4085,
      "step": 3087
    },
    {
      "epoch": 2.60078607523863,
      "grad_norm": 0.28702718019485474,
      "learning_rate": 5.281756206240035e-07,
      "loss": 0.3567,
      "step": 3088
    },
    {
      "epoch": 2.6016282987085906,
      "grad_norm": 0.2956518232822418,
      "learning_rate": 5.25984645765617e-07,
      "loss": 0.4074,
      "step": 3089
    },
    {
      "epoch": 2.6024705221785513,
      "grad_norm": 0.28210586309432983,
      "learning_rate": 5.237979723303582e-07,
      "loss": 0.3889,
      "step": 3090
    },
    {
      "epoch": 2.603312745648512,
      "grad_norm": 0.28807681798934937,
      "learning_rate": 5.216156024205482e-07,
      "loss": 0.3955,
      "step": 3091
    },
    {
      "epoch": 2.604154969118473,
      "grad_norm": 0.27729231119155884,
      "learning_rate": 5.194375381343664e-07,
      "loss": 0.3728,
      "step": 3092
    },
    {
      "epoch": 2.604997192588433,
      "grad_norm": 0.3012796640396118,
      "learning_rate": 5.172637815658583e-07,
      "loss": 0.4075,
      "step": 3093
    },
    {
      "epoch": 2.6058394160583944,
      "grad_norm": 0.2991304397583008,
      "learning_rate": 5.150943348049198e-07,
      "loss": 0.3708,
      "step": 3094
    },
    {
      "epoch": 2.6066816395283547,
      "grad_norm": 0.315244197845459,
      "learning_rate": 5.129291999373109e-07,
      "loss": 0.4513,
      "step": 3095
    },
    {
      "epoch": 2.6075238629983155,
      "grad_norm": 0.2940312623977661,
      "learning_rate": 5.107683790446411e-07,
      "loss": 0.3727,
      "step": 3096
    },
    {
      "epoch": 2.6083660864682763,
      "grad_norm": 0.30001065135002136,
      "learning_rate": 5.086118742043761e-07,
      "loss": 0.4187,
      "step": 3097
    },
    {
      "epoch": 2.609208309938237,
      "grad_norm": 0.28274279832839966,
      "learning_rate": 5.064596874898292e-07,
      "loss": 0.3469,
      "step": 3098
    },
    {
      "epoch": 2.610050533408198,
      "grad_norm": 0.3021934926509857,
      "learning_rate": 5.04311820970163e-07,
      "loss": 0.4251,
      "step": 3099
    },
    {
      "epoch": 2.610892756878158,
      "grad_norm": 0.28122419118881226,
      "learning_rate": 5.021682767103858e-07,
      "loss": 0.3942,
      "step": 3100
    },
    {
      "epoch": 2.611734980348119,
      "grad_norm": 0.291301429271698,
      "learning_rate": 5.000290567713533e-07,
      "loss": 0.3926,
      "step": 3101
    },
    {
      "epoch": 2.6125772038180797,
      "grad_norm": 0.3115138113498688,
      "learning_rate": 4.978941632097612e-07,
      "loss": 0.3735,
      "step": 3102
    },
    {
      "epoch": 2.6134194272880404,
      "grad_norm": 0.3022382855415344,
      "learning_rate": 4.957635980781445e-07,
      "loss": 0.3767,
      "step": 3103
    },
    {
      "epoch": 2.614261650758001,
      "grad_norm": 0.28915268182754517,
      "learning_rate": 4.936373634248792e-07,
      "loss": 0.3694,
      "step": 3104
    },
    {
      "epoch": 2.615103874227962,
      "grad_norm": 0.29075250029563904,
      "learning_rate": 4.915154612941781e-07,
      "loss": 0.3672,
      "step": 3105
    },
    {
      "epoch": 2.6159460976979227,
      "grad_norm": 0.3000990152359009,
      "learning_rate": 4.893978937260868e-07,
      "loss": 0.4019,
      "step": 3106
    },
    {
      "epoch": 2.616788321167883,
      "grad_norm": 0.30624130368232727,
      "learning_rate": 4.872846627564842e-07,
      "loss": 0.4195,
      "step": 3107
    },
    {
      "epoch": 2.617630544637844,
      "grad_norm": 0.3011264503002167,
      "learning_rate": 4.851757704170796e-07,
      "loss": 0.4125,
      "step": 3108
    },
    {
      "epoch": 2.6184727681078046,
      "grad_norm": 0.33022674918174744,
      "learning_rate": 4.830712187354125e-07,
      "loss": 0.4128,
      "step": 3109
    },
    {
      "epoch": 2.6193149915777654,
      "grad_norm": 0.289302796125412,
      "learning_rate": 4.809710097348469e-07,
      "loss": 0.3477,
      "step": 3110
    },
    {
      "epoch": 2.620157215047726,
      "grad_norm": 0.29445400834083557,
      "learning_rate": 4.788751454345763e-07,
      "loss": 0.3705,
      "step": 3111
    },
    {
      "epoch": 2.6209994385176865,
      "grad_norm": 0.3250124454498291,
      "learning_rate": 4.767836278496085e-07,
      "loss": 0.3976,
      "step": 3112
    },
    {
      "epoch": 2.6218416619876472,
      "grad_norm": 0.2844984829425812,
      "learning_rate": 4.7469645899078153e-07,
      "loss": 0.3766,
      "step": 3113
    },
    {
      "epoch": 2.622683885457608,
      "grad_norm": 0.29596519470214844,
      "learning_rate": 4.726136408647464e-07,
      "loss": 0.3921,
      "step": 3114
    },
    {
      "epoch": 2.6235261089275688,
      "grad_norm": 0.2799502909183502,
      "learning_rate": 4.7053517547397454e-07,
      "loss": 0.3801,
      "step": 3115
    },
    {
      "epoch": 2.6243683323975295,
      "grad_norm": 0.28478875756263733,
      "learning_rate": 4.6846106481675035e-07,
      "loss": 0.3936,
      "step": 3116
    },
    {
      "epoch": 2.6252105558674903,
      "grad_norm": 0.28143298625946045,
      "learning_rate": 4.663913108871726e-07,
      "loss": 0.3779,
      "step": 3117
    },
    {
      "epoch": 2.626052779337451,
      "grad_norm": 0.29976803064346313,
      "learning_rate": 4.643259156751506e-07,
      "loss": 0.4057,
      "step": 3118
    },
    {
      "epoch": 2.6268950028074114,
      "grad_norm": 0.3093685507774353,
      "learning_rate": 4.622648811664049e-07,
      "loss": 0.4188,
      "step": 3119
    },
    {
      "epoch": 2.627737226277372,
      "grad_norm": 0.2811008095741272,
      "learning_rate": 4.60208209342462e-07,
      "loss": 0.3733,
      "step": 3120
    },
    {
      "epoch": 2.628579449747333,
      "grad_norm": 0.2941335439682007,
      "learning_rate": 4.581559021806542e-07,
      "loss": 0.4047,
      "step": 3121
    },
    {
      "epoch": 2.6294216732172937,
      "grad_norm": 0.29169759154319763,
      "learning_rate": 4.561079616541164e-07,
      "loss": 0.3635,
      "step": 3122
    },
    {
      "epoch": 2.6302638966872545,
      "grad_norm": 0.31558889150619507,
      "learning_rate": 4.540643897317887e-07,
      "loss": 0.3837,
      "step": 3123
    },
    {
      "epoch": 2.631106120157215,
      "grad_norm": 0.28248462080955505,
      "learning_rate": 4.520251883784077e-07,
      "loss": 0.3604,
      "step": 3124
    },
    {
      "epoch": 2.631948343627176,
      "grad_norm": 0.305896520614624,
      "learning_rate": 4.4999035955450964e-07,
      "loss": 0.4455,
      "step": 3125
    },
    {
      "epoch": 2.6327905670971363,
      "grad_norm": 0.2827717661857605,
      "learning_rate": 4.4795990521642684e-07,
      "loss": 0.3791,
      "step": 3126
    },
    {
      "epoch": 2.633632790567097,
      "grad_norm": 0.303053617477417,
      "learning_rate": 4.459338273162844e-07,
      "loss": 0.4217,
      "step": 3127
    },
    {
      "epoch": 2.634475014037058,
      "grad_norm": 0.29254698753356934,
      "learning_rate": 4.439121278020031e-07,
      "loss": 0.4088,
      "step": 3128
    },
    {
      "epoch": 2.6353172375070186,
      "grad_norm": 0.27057260274887085,
      "learning_rate": 4.4189480861729137e-07,
      "loss": 0.3602,
      "step": 3129
    },
    {
      "epoch": 2.6361594609769794,
      "grad_norm": 0.27644434571266174,
      "learning_rate": 4.3988187170164673e-07,
      "loss": 0.3679,
      "step": 3130
    },
    {
      "epoch": 2.6370016844469397,
      "grad_norm": 0.2863173484802246,
      "learning_rate": 4.378733189903528e-07,
      "loss": 0.3975,
      "step": 3131
    },
    {
      "epoch": 2.6378439079169005,
      "grad_norm": 0.31703850626945496,
      "learning_rate": 4.35869152414482e-07,
      "loss": 0.4062,
      "step": 3132
    },
    {
      "epoch": 2.6386861313868613,
      "grad_norm": 0.3080425560474396,
      "learning_rate": 4.3386937390088366e-07,
      "loss": 0.4346,
      "step": 3133
    },
    {
      "epoch": 2.639528354856822,
      "grad_norm": 0.2867372930049896,
      "learning_rate": 4.3187398537219593e-07,
      "loss": 0.3737,
      "step": 3134
    },
    {
      "epoch": 2.640370578326783,
      "grad_norm": 0.2886494994163513,
      "learning_rate": 4.2988298874682754e-07,
      "loss": 0.3686,
      "step": 3135
    },
    {
      "epoch": 2.6412128017967436,
      "grad_norm": 0.2857321798801422,
      "learning_rate": 4.278963859389723e-07,
      "loss": 0.4124,
      "step": 3136
    },
    {
      "epoch": 2.6420550252667043,
      "grad_norm": 0.2849075198173523,
      "learning_rate": 4.259141788585947e-07,
      "loss": 0.4,
      "step": 3137
    },
    {
      "epoch": 2.6428972487366647,
      "grad_norm": 0.2891564667224884,
      "learning_rate": 4.239363694114368e-07,
      "loss": 0.3929,
      "step": 3138
    },
    {
      "epoch": 2.6437394722066254,
      "grad_norm": 0.2954115569591522,
      "learning_rate": 4.2196295949901044e-07,
      "loss": 0.3739,
      "step": 3139
    },
    {
      "epoch": 2.644581695676586,
      "grad_norm": 0.2846229672431946,
      "learning_rate": 4.1999395101859796e-07,
      "loss": 0.3738,
      "step": 3140
    },
    {
      "epoch": 2.645423919146547,
      "grad_norm": 0.2996189296245575,
      "learning_rate": 4.1802934586324897e-07,
      "loss": 0.4137,
      "step": 3141
    },
    {
      "epoch": 2.6462661426165077,
      "grad_norm": 0.3471488058567047,
      "learning_rate": 4.160691459217825e-07,
      "loss": 0.376,
      "step": 3142
    },
    {
      "epoch": 2.647108366086468,
      "grad_norm": 0.33924970030784607,
      "learning_rate": 4.1411335307878056e-07,
      "loss": 0.383,
      "step": 3143
    },
    {
      "epoch": 2.647950589556429,
      "grad_norm": 0.30116960406303406,
      "learning_rate": 4.1216196921458786e-07,
      "loss": 0.3976,
      "step": 3144
    },
    {
      "epoch": 2.6487928130263896,
      "grad_norm": 0.2909619212150574,
      "learning_rate": 4.102149962053098e-07,
      "loss": 0.3892,
      "step": 3145
    },
    {
      "epoch": 2.6496350364963503,
      "grad_norm": 0.2809937298297882,
      "learning_rate": 4.0827243592281294e-07,
      "loss": 0.3823,
      "step": 3146
    },
    {
      "epoch": 2.650477259966311,
      "grad_norm": 0.30525973439216614,
      "learning_rate": 4.0633429023472004e-07,
      "loss": 0.4219,
      "step": 3147
    },
    {
      "epoch": 2.651319483436272,
      "grad_norm": 0.265893816947937,
      "learning_rate": 4.044005610044094e-07,
      "loss": 0.3803,
      "step": 3148
    },
    {
      "epoch": 2.6521617069062327,
      "grad_norm": 0.29206013679504395,
      "learning_rate": 4.0247125009101275e-07,
      "loss": 0.4126,
      "step": 3149
    },
    {
      "epoch": 2.653003930376193,
      "grad_norm": 0.2840431332588196,
      "learning_rate": 4.0054635934941633e-07,
      "loss": 0.403,
      "step": 3150
    },
    {
      "epoch": 2.6538461538461537,
      "grad_norm": 0.27510398626327515,
      "learning_rate": 3.986258906302543e-07,
      "loss": 0.3754,
      "step": 3151
    },
    {
      "epoch": 2.6546883773161145,
      "grad_norm": 0.30163827538490295,
      "learning_rate": 3.967098457799118e-07,
      "loss": 0.4216,
      "step": 3152
    },
    {
      "epoch": 2.6555306007860753,
      "grad_norm": 0.28304561972618103,
      "learning_rate": 3.947982266405159e-07,
      "loss": 0.3947,
      "step": 3153
    },
    {
      "epoch": 2.656372824256036,
      "grad_norm": 0.3140842020511627,
      "learning_rate": 3.928910350499454e-07,
      "loss": 0.4072,
      "step": 3154
    },
    {
      "epoch": 2.6572150477259964,
      "grad_norm": 0.273867130279541,
      "learning_rate": 3.9098827284181683e-07,
      "loss": 0.3598,
      "step": 3155
    },
    {
      "epoch": 2.6580572711959576,
      "grad_norm": 0.29585766792297363,
      "learning_rate": 3.890899418454913e-07,
      "loss": 0.3928,
      "step": 3156
    },
    {
      "epoch": 2.658899494665918,
      "grad_norm": 0.3688523471355438,
      "learning_rate": 3.871960438860689e-07,
      "loss": 0.4155,
      "step": 3157
    },
    {
      "epoch": 2.6597417181358787,
      "grad_norm": 0.2967906594276428,
      "learning_rate": 3.8530658078438754e-07,
      "loss": 0.3844,
      "step": 3158
    },
    {
      "epoch": 2.6605839416058394,
      "grad_norm": 0.295522004365921,
      "learning_rate": 3.834215543570191e-07,
      "loss": 0.3869,
      "step": 3159
    },
    {
      "epoch": 2.6614261650758,
      "grad_norm": 0.2936214804649353,
      "learning_rate": 3.81540966416275e-07,
      "loss": 0.3889,
      "step": 3160
    },
    {
      "epoch": 2.662268388545761,
      "grad_norm": 0.27925553917884827,
      "learning_rate": 3.796648187701957e-07,
      "loss": 0.3743,
      "step": 3161
    },
    {
      "epoch": 2.6631106120157213,
      "grad_norm": 0.3312590420246124,
      "learning_rate": 3.777931132225526e-07,
      "loss": 0.3971,
      "step": 3162
    },
    {
      "epoch": 2.663952835485682,
      "grad_norm": 0.30617785453796387,
      "learning_rate": 3.75925851572847e-07,
      "loss": 0.3847,
      "step": 3163
    },
    {
      "epoch": 2.664795058955643,
      "grad_norm": 0.2943297326564789,
      "learning_rate": 3.7406303561630996e-07,
      "loss": 0.3842,
      "step": 3164
    },
    {
      "epoch": 2.6656372824256036,
      "grad_norm": 0.28235912322998047,
      "learning_rate": 3.72204667143895e-07,
      "loss": 0.3646,
      "step": 3165
    },
    {
      "epoch": 2.6664795058955644,
      "grad_norm": 0.2815081775188446,
      "learning_rate": 3.703507479422813e-07,
      "loss": 0.3999,
      "step": 3166
    },
    {
      "epoch": 2.667321729365525,
      "grad_norm": 0.3042549192905426,
      "learning_rate": 3.6850127979386917e-07,
      "loss": 0.3971,
      "step": 3167
    },
    {
      "epoch": 2.668163952835486,
      "grad_norm": 0.31222057342529297,
      "learning_rate": 3.666562644767824e-07,
      "loss": 0.4074,
      "step": 3168
    },
    {
      "epoch": 2.6690061763054462,
      "grad_norm": 0.2914198935031891,
      "learning_rate": 3.648157037648598e-07,
      "loss": 0.4203,
      "step": 3169
    },
    {
      "epoch": 2.669848399775407,
      "grad_norm": 0.31163159012794495,
      "learning_rate": 3.6297959942766303e-07,
      "loss": 0.4015,
      "step": 3170
    },
    {
      "epoch": 2.6706906232453678,
      "grad_norm": 0.2960785925388336,
      "learning_rate": 3.611479532304618e-07,
      "loss": 0.3533,
      "step": 3171
    },
    {
      "epoch": 2.6715328467153285,
      "grad_norm": 0.28464391827583313,
      "learning_rate": 3.593207669342463e-07,
      "loss": 0.3775,
      "step": 3172
    },
    {
      "epoch": 2.6723750701852893,
      "grad_norm": 0.3112133741378784,
      "learning_rate": 3.574980422957147e-07,
      "loss": 0.4194,
      "step": 3173
    },
    {
      "epoch": 2.6732172936552496,
      "grad_norm": 0.30644652247428894,
      "learning_rate": 3.556797810672785e-07,
      "loss": 0.4058,
      "step": 3174
    },
    {
      "epoch": 2.6740595171252104,
      "grad_norm": 0.28767129778862,
      "learning_rate": 3.538659849970555e-07,
      "loss": 0.3821,
      "step": 3175
    },
    {
      "epoch": 2.674901740595171,
      "grad_norm": 0.3072955906391144,
      "learning_rate": 3.5205665582887296e-07,
      "loss": 0.4297,
      "step": 3176
    },
    {
      "epoch": 2.675743964065132,
      "grad_norm": 0.2869326174259186,
      "learning_rate": 3.5025179530225995e-07,
      "loss": 0.3615,
      "step": 3177
    },
    {
      "epoch": 2.6765861875350927,
      "grad_norm": 0.30249276757240295,
      "learning_rate": 3.484514051524546e-07,
      "loss": 0.3664,
      "step": 3178
    },
    {
      "epoch": 2.6774284110050535,
      "grad_norm": 0.28222936391830444,
      "learning_rate": 3.466554871103922e-07,
      "loss": 0.3735,
      "step": 3179
    },
    {
      "epoch": 2.6782706344750142,
      "grad_norm": 0.2997955083847046,
      "learning_rate": 3.4486404290271115e-07,
      "loss": 0.4144,
      "step": 3180
    },
    {
      "epoch": 2.6791128579449746,
      "grad_norm": 0.2840504050254822,
      "learning_rate": 3.43077074251747e-07,
      "loss": 0.3774,
      "step": 3181
    },
    {
      "epoch": 2.6799550814149353,
      "grad_norm": 0.3122518062591553,
      "learning_rate": 3.4129458287553487e-07,
      "loss": 0.4014,
      "step": 3182
    },
    {
      "epoch": 2.680797304884896,
      "grad_norm": 0.30824050307273865,
      "learning_rate": 3.395165704878023e-07,
      "loss": 0.4053,
      "step": 3183
    },
    {
      "epoch": 2.681639528354857,
      "grad_norm": 0.28457245230674744,
      "learning_rate": 3.3774303879797297e-07,
      "loss": 0.3704,
      "step": 3184
    },
    {
      "epoch": 2.6824817518248176,
      "grad_norm": 0.2803898751735687,
      "learning_rate": 3.359739895111602e-07,
      "loss": 0.3707,
      "step": 3185
    },
    {
      "epoch": 2.683323975294778,
      "grad_norm": 0.29929593205451965,
      "learning_rate": 3.3420942432817127e-07,
      "loss": 0.3862,
      "step": 3186
    },
    {
      "epoch": 2.684166198764739,
      "grad_norm": 0.31750839948654175,
      "learning_rate": 3.324493449454991e-07,
      "loss": 0.4079,
      "step": 3187
    },
    {
      "epoch": 2.6850084222346995,
      "grad_norm": 0.29784905910491943,
      "learning_rate": 3.3069375305532725e-07,
      "loss": 0.3829,
      "step": 3188
    },
    {
      "epoch": 2.6858506457046603,
      "grad_norm": 0.30293020606040955,
      "learning_rate": 3.289426503455201e-07,
      "loss": 0.3988,
      "step": 3189
    },
    {
      "epoch": 2.686692869174621,
      "grad_norm": 0.28821542859077454,
      "learning_rate": 3.271960384996309e-07,
      "loss": 0.3656,
      "step": 3190
    },
    {
      "epoch": 2.687535092644582,
      "grad_norm": 0.30006521940231323,
      "learning_rate": 3.2545391919689193e-07,
      "loss": 0.3926,
      "step": 3191
    },
    {
      "epoch": 2.6883773161145426,
      "grad_norm": 0.31322476267814636,
      "learning_rate": 3.237162941122185e-07,
      "loss": 0.4009,
      "step": 3192
    },
    {
      "epoch": 2.689219539584503,
      "grad_norm": 0.275764137506485,
      "learning_rate": 3.2198316491620305e-07,
      "loss": 0.3554,
      "step": 3193
    },
    {
      "epoch": 2.6900617630544637,
      "grad_norm": 0.3070089519023895,
      "learning_rate": 3.202545332751178e-07,
      "loss": 0.4067,
      "step": 3194
    },
    {
      "epoch": 2.6909039865244244,
      "grad_norm": 0.285894513130188,
      "learning_rate": 3.185304008509077e-07,
      "loss": 0.3645,
      "step": 3195
    },
    {
      "epoch": 2.691746209994385,
      "grad_norm": 0.2769801616668701,
      "learning_rate": 3.1681076930119626e-07,
      "loss": 0.3945,
      "step": 3196
    },
    {
      "epoch": 2.692588433464346,
      "grad_norm": 0.3104212284088135,
      "learning_rate": 3.150956402792765e-07,
      "loss": 0.4115,
      "step": 3197
    },
    {
      "epoch": 2.6934306569343067,
      "grad_norm": 0.2742215096950531,
      "learning_rate": 3.133850154341139e-07,
      "loss": 0.3722,
      "step": 3198
    },
    {
      "epoch": 2.6942728804042675,
      "grad_norm": 0.2855996787548065,
      "learning_rate": 3.116788964103429e-07,
      "loss": 0.3872,
      "step": 3199
    },
    {
      "epoch": 2.695115103874228,
      "grad_norm": 0.3099382519721985,
      "learning_rate": 3.099772848482657e-07,
      "loss": 0.3831,
      "step": 3200
    },
    {
      "epoch": 2.6959573273441886,
      "grad_norm": 0.2815803289413452,
      "learning_rate": 3.082801823838527e-07,
      "loss": 0.3698,
      "step": 3201
    },
    {
      "epoch": 2.6967995508141493,
      "grad_norm": 0.30173245072364807,
      "learning_rate": 3.0658759064873755e-07,
      "loss": 0.4318,
      "step": 3202
    },
    {
      "epoch": 2.69764177428411,
      "grad_norm": 0.2969933748245239,
      "learning_rate": 3.0489951127021744e-07,
      "loss": 0.3846,
      "step": 3203
    },
    {
      "epoch": 2.698483997754071,
      "grad_norm": 0.27855950593948364,
      "learning_rate": 3.0321594587125083e-07,
      "loss": 0.342,
      "step": 3204
    },
    {
      "epoch": 2.699326221224031,
      "grad_norm": 0.26119405031204224,
      "learning_rate": 3.015368960704584e-07,
      "loss": 0.3879,
      "step": 3205
    },
    {
      "epoch": 2.700168444693992,
      "grad_norm": 0.2974548935890198,
      "learning_rate": 2.9986236348211684e-07,
      "loss": 0.4138,
      "step": 3206
    },
    {
      "epoch": 2.7010106681639527,
      "grad_norm": 0.2644493281841278,
      "learning_rate": 2.9819234971616154e-07,
      "loss": 0.3685,
      "step": 3207
    },
    {
      "epoch": 2.7018528916339135,
      "grad_norm": 0.2956278622150421,
      "learning_rate": 2.9652685637818147e-07,
      "loss": 0.3805,
      "step": 3208
    },
    {
      "epoch": 2.7026951151038743,
      "grad_norm": 0.30897256731987,
      "learning_rate": 2.9486588506942303e-07,
      "loss": 0.4338,
      "step": 3209
    },
    {
      "epoch": 2.703537338573835,
      "grad_norm": 0.29765596985816956,
      "learning_rate": 2.932094373867811e-07,
      "loss": 0.4002,
      "step": 3210
    },
    {
      "epoch": 2.704379562043796,
      "grad_norm": 0.30194181203842163,
      "learning_rate": 2.915575149228056e-07,
      "loss": 0.3826,
      "step": 3211
    },
    {
      "epoch": 2.705221785513756,
      "grad_norm": 0.3106936514377594,
      "learning_rate": 2.8991011926569003e-07,
      "loss": 0.379,
      "step": 3212
    },
    {
      "epoch": 2.706064008983717,
      "grad_norm": 0.2682586908340454,
      "learning_rate": 2.882672519992824e-07,
      "loss": 0.366,
      "step": 3213
    },
    {
      "epoch": 2.7069062324536777,
      "grad_norm": 0.2907557785511017,
      "learning_rate": 2.8662891470307154e-07,
      "loss": 0.4046,
      "step": 3214
    },
    {
      "epoch": 2.7077484559236384,
      "grad_norm": 0.28720754384994507,
      "learning_rate": 2.8499510895219464e-07,
      "loss": 0.365,
      "step": 3215
    },
    {
      "epoch": 2.708590679393599,
      "grad_norm": 0.26736098527908325,
      "learning_rate": 2.833658363174302e-07,
      "loss": 0.3777,
      "step": 3216
    },
    {
      "epoch": 2.7094329028635595,
      "grad_norm": 0.2920697331428528,
      "learning_rate": 2.817410983651997e-07,
      "loss": 0.3949,
      "step": 3217
    },
    {
      "epoch": 2.7102751263335207,
      "grad_norm": 0.2991887032985687,
      "learning_rate": 2.80120896657563e-07,
      "loss": 0.4049,
      "step": 3218
    },
    {
      "epoch": 2.711117349803481,
      "grad_norm": 0.28289687633514404,
      "learning_rate": 2.785052327522214e-07,
      "loss": 0.3796,
      "step": 3219
    },
    {
      "epoch": 2.711959573273442,
      "grad_norm": 0.29251065850257874,
      "learning_rate": 2.768941082025112e-07,
      "loss": 0.4015,
      "step": 3220
    },
    {
      "epoch": 2.7128017967434026,
      "grad_norm": 0.2925495207309723,
      "learning_rate": 2.7528752455740606e-07,
      "loss": 0.3926,
      "step": 3221
    },
    {
      "epoch": 2.7136440202133634,
      "grad_norm": 0.29327860474586487,
      "learning_rate": 2.73685483361511e-07,
      "loss": 0.4037,
      "step": 3222
    },
    {
      "epoch": 2.714486243683324,
      "grad_norm": 0.27673694491386414,
      "learning_rate": 2.720879861550685e-07,
      "loss": 0.361,
      "step": 3223
    },
    {
      "epoch": 2.7153284671532845,
      "grad_norm": 0.30695652961730957,
      "learning_rate": 2.7049503447394874e-07,
      "loss": 0.3886,
      "step": 3224
    },
    {
      "epoch": 2.7161706906232452,
      "grad_norm": 0.30627840757369995,
      "learning_rate": 2.6890662984965234e-07,
      "loss": 0.4114,
      "step": 3225
    },
    {
      "epoch": 2.717012914093206,
      "grad_norm": 0.3095587193965912,
      "learning_rate": 2.6732277380930873e-07,
      "loss": 0.3952,
      "step": 3226
    },
    {
      "epoch": 2.7178551375631668,
      "grad_norm": 0.3086182773113251,
      "learning_rate": 2.657434678756754e-07,
      "loss": 0.3539,
      "step": 3227
    },
    {
      "epoch": 2.7186973610331275,
      "grad_norm": 0.3078022301197052,
      "learning_rate": 2.6416871356713224e-07,
      "loss": 0.412,
      "step": 3228
    },
    {
      "epoch": 2.7195395845030883,
      "grad_norm": 0.31640195846557617,
      "learning_rate": 2.625985123976876e-07,
      "loss": 0.4013,
      "step": 3229
    },
    {
      "epoch": 2.720381807973049,
      "grad_norm": 0.3044123649597168,
      "learning_rate": 2.6103286587696674e-07,
      "loss": 0.419,
      "step": 3230
    },
    {
      "epoch": 2.7212240314430094,
      "grad_norm": 0.2738416790962219,
      "learning_rate": 2.594717755102205e-07,
      "loss": 0.3966,
      "step": 3231
    },
    {
      "epoch": 2.72206625491297,
      "grad_norm": 0.29159486293792725,
      "learning_rate": 2.5791524279831613e-07,
      "loss": 0.3927,
      "step": 3232
    },
    {
      "epoch": 2.722908478382931,
      "grad_norm": 0.29908058047294617,
      "learning_rate": 2.5636326923774325e-07,
      "loss": 0.3734,
      "step": 3233
    },
    {
      "epoch": 2.7237507018528917,
      "grad_norm": 0.292209655046463,
      "learning_rate": 2.548158563206038e-07,
      "loss": 0.3521,
      "step": 3234
    },
    {
      "epoch": 2.7245929253228525,
      "grad_norm": 0.2941299080848694,
      "learning_rate": 2.532730055346172e-07,
      "loss": 0.3772,
      "step": 3235
    },
    {
      "epoch": 2.725435148792813,
      "grad_norm": 0.30169984698295593,
      "learning_rate": 2.517347183631158e-07,
      "loss": 0.3844,
      "step": 3236
    },
    {
      "epoch": 2.7262773722627736,
      "grad_norm": 0.2929541766643524,
      "learning_rate": 2.5020099628504603e-07,
      "loss": 0.4061,
      "step": 3237
    },
    {
      "epoch": 2.7271195957327343,
      "grad_norm": 0.2816338837146759,
      "learning_rate": 2.4867184077496333e-07,
      "loss": 0.3571,
      "step": 3238
    },
    {
      "epoch": 2.727961819202695,
      "grad_norm": 0.2811914384365082,
      "learning_rate": 2.471472533030339e-07,
      "loss": 0.4027,
      "step": 3239
    },
    {
      "epoch": 2.728804042672656,
      "grad_norm": 0.2766009569168091,
      "learning_rate": 2.4562723533503084e-07,
      "loss": 0.3474,
      "step": 3240
    },
    {
      "epoch": 2.7296462661426166,
      "grad_norm": 0.30303955078125,
      "learning_rate": 2.441117883323374e-07,
      "loss": 0.4082,
      "step": 3241
    },
    {
      "epoch": 2.7304884896125774,
      "grad_norm": 0.28383496403694153,
      "learning_rate": 2.426009137519375e-07,
      "loss": 0.4017,
      "step": 3242
    },
    {
      "epoch": 2.7313307130825377,
      "grad_norm": 0.27790775895118713,
      "learning_rate": 2.4109461304642254e-07,
      "loss": 0.389,
      "step": 3243
    },
    {
      "epoch": 2.7321729365524985,
      "grad_norm": 0.2835022509098053,
      "learning_rate": 2.395928876639847e-07,
      "loss": 0.3685,
      "step": 3244
    },
    {
      "epoch": 2.7330151600224593,
      "grad_norm": 0.2761072516441345,
      "learning_rate": 2.3809573904841844e-07,
      "loss": 0.3609,
      "step": 3245
    },
    {
      "epoch": 2.73385738349242,
      "grad_norm": 0.28051328659057617,
      "learning_rate": 2.3660316863911682e-07,
      "loss": 0.3816,
      "step": 3246
    },
    {
      "epoch": 2.734699606962381,
      "grad_norm": 0.29827699065208435,
      "learning_rate": 2.3511517787107363e-07,
      "loss": 0.4174,
      "step": 3247
    },
    {
      "epoch": 2.735541830432341,
      "grad_norm": 0.28237438201904297,
      "learning_rate": 2.336317681748751e-07,
      "loss": 0.3702,
      "step": 3248
    },
    {
      "epoch": 2.7363840539023023,
      "grad_norm": 0.2822381854057312,
      "learning_rate": 2.3215294097670927e-07,
      "loss": 0.3534,
      "step": 3249
    },
    {
      "epoch": 2.7372262773722627,
      "grad_norm": 0.2802215814590454,
      "learning_rate": 2.3067869769835215e-07,
      "loss": 0.3958,
      "step": 3250
    },
    {
      "epoch": 2.7380685008422234,
      "grad_norm": 0.30941593647003174,
      "learning_rate": 2.292090397571789e-07,
      "loss": 0.419,
      "step": 3251
    },
    {
      "epoch": 2.738910724312184,
      "grad_norm": 0.2879559099674225,
      "learning_rate": 2.277439685661509e-07,
      "loss": 0.3524,
      "step": 3252
    },
    {
      "epoch": 2.739752947782145,
      "grad_norm": 0.29783523082733154,
      "learning_rate": 2.262834855338225e-07,
      "loss": 0.362,
      "step": 3253
    },
    {
      "epoch": 2.7405951712521057,
      "grad_norm": 0.2959594130516052,
      "learning_rate": 2.2482759206433613e-07,
      "loss": 0.3989,
      "step": 3254
    },
    {
      "epoch": 2.741437394722066,
      "grad_norm": 0.31286874413490295,
      "learning_rate": 2.2337628955742263e-07,
      "loss": 0.3982,
      "step": 3255
    },
    {
      "epoch": 2.742279618192027,
      "grad_norm": 0.29451850056648254,
      "learning_rate": 2.21929579408397e-07,
      "loss": 0.4137,
      "step": 3256
    },
    {
      "epoch": 2.7431218416619876,
      "grad_norm": 0.2793000042438507,
      "learning_rate": 2.204874630081616e-07,
      "loss": 0.377,
      "step": 3257
    },
    {
      "epoch": 2.7439640651319483,
      "grad_norm": 0.28668496012687683,
      "learning_rate": 2.1904994174319903e-07,
      "loss": 0.4174,
      "step": 3258
    },
    {
      "epoch": 2.744806288601909,
      "grad_norm": 0.2973991930484772,
      "learning_rate": 2.1761701699557824e-07,
      "loss": 0.3681,
      "step": 3259
    },
    {
      "epoch": 2.74564851207187,
      "grad_norm": 0.314039409160614,
      "learning_rate": 2.1618869014294498e-07,
      "loss": 0.3858,
      "step": 3260
    },
    {
      "epoch": 2.7464907355418307,
      "grad_norm": 0.32014885544776917,
      "learning_rate": 2.1476496255852685e-07,
      "loss": 0.3825,
      "step": 3261
    },
    {
      "epoch": 2.747332959011791,
      "grad_norm": 0.29545527696609497,
      "learning_rate": 2.1334583561112786e-07,
      "loss": 0.3944,
      "step": 3262
    },
    {
      "epoch": 2.7481751824817517,
      "grad_norm": 0.2890028655529022,
      "learning_rate": 2.1193131066513107e-07,
      "loss": 0.357,
      "step": 3263
    },
    {
      "epoch": 2.7490174059517125,
      "grad_norm": 0.2741541862487793,
      "learning_rate": 2.1052138908049303e-07,
      "loss": 0.3911,
      "step": 3264
    },
    {
      "epoch": 2.7498596294216733,
      "grad_norm": 0.2633139193058014,
      "learning_rate": 2.091160722127472e-07,
      "loss": 0.3547,
      "step": 3265
    },
    {
      "epoch": 2.750701852891634,
      "grad_norm": 0.28986993432044983,
      "learning_rate": 2.0771536141299565e-07,
      "loss": 0.4007,
      "step": 3266
    },
    {
      "epoch": 2.7515440763615944,
      "grad_norm": 0.2848016917705536,
      "learning_rate": 2.0631925802791608e-07,
      "loss": 0.3753,
      "step": 3267
    },
    {
      "epoch": 2.7523862998315556,
      "grad_norm": 0.29773396253585815,
      "learning_rate": 2.0492776339975374e-07,
      "loss": 0.4114,
      "step": 3268
    },
    {
      "epoch": 2.753228523301516,
      "grad_norm": 0.30697107315063477,
      "learning_rate": 2.0354087886632623e-07,
      "loss": 0.381,
      "step": 3269
    },
    {
      "epoch": 2.7540707467714767,
      "grad_norm": 0.2689322531223297,
      "learning_rate": 2.0215860576101532e-07,
      "loss": 0.3796,
      "step": 3270
    },
    {
      "epoch": 2.7549129702414374,
      "grad_norm": 0.2802405059337616,
      "learning_rate": 2.0078094541277016e-07,
      "loss": 0.3777,
      "step": 3271
    },
    {
      "epoch": 2.755755193711398,
      "grad_norm": 0.2826237380504608,
      "learning_rate": 1.9940789914610682e-07,
      "loss": 0.3994,
      "step": 3272
    },
    {
      "epoch": 2.756597417181359,
      "grad_norm": 0.27338993549346924,
      "learning_rate": 1.9803946828110376e-07,
      "loss": 0.3771,
      "step": 3273
    },
    {
      "epoch": 2.7574396406513193,
      "grad_norm": 0.29778552055358887,
      "learning_rate": 1.966756541334025e-07,
      "loss": 0.4042,
      "step": 3274
    },
    {
      "epoch": 2.75828186412128,
      "grad_norm": 0.30525827407836914,
      "learning_rate": 1.953164580142064e-07,
      "loss": 0.405,
      "step": 3275
    },
    {
      "epoch": 2.759124087591241,
      "grad_norm": 0.2855517864227295,
      "learning_rate": 1.9396188123027736e-07,
      "loss": 0.3737,
      "step": 3276
    },
    {
      "epoch": 2.7599663110612016,
      "grad_norm": 0.27432820200920105,
      "learning_rate": 1.9261192508393755e-07,
      "loss": 0.3466,
      "step": 3277
    },
    {
      "epoch": 2.7608085345311624,
      "grad_norm": 0.3135077655315399,
      "learning_rate": 1.912665908730671e-07,
      "loss": 0.4093,
      "step": 3278
    },
    {
      "epoch": 2.7616507580011227,
      "grad_norm": 0.31187331676483154,
      "learning_rate": 1.8992587989110133e-07,
      "loss": 0.3982,
      "step": 3279
    },
    {
      "epoch": 2.762492981471084,
      "grad_norm": 0.28547269105911255,
      "learning_rate": 1.8858979342703088e-07,
      "loss": 0.3905,
      "step": 3280
    },
    {
      "epoch": 2.7633352049410442,
      "grad_norm": 0.28984692692756653,
      "learning_rate": 1.8725833276540095e-07,
      "loss": 0.376,
      "step": 3281
    },
    {
      "epoch": 2.764177428411005,
      "grad_norm": 0.3001108467578888,
      "learning_rate": 1.8593149918630927e-07,
      "loss": 0.4287,
      "step": 3282
    },
    {
      "epoch": 2.7650196518809658,
      "grad_norm": 0.2894747853279114,
      "learning_rate": 1.8460929396540428e-07,
      "loss": 0.3724,
      "step": 3283
    },
    {
      "epoch": 2.7658618753509265,
      "grad_norm": 0.29921725392341614,
      "learning_rate": 1.8329171837388527e-07,
      "loss": 0.3857,
      "step": 3284
    },
    {
      "epoch": 2.7667040988208873,
      "grad_norm": 0.2954384386539459,
      "learning_rate": 1.8197877367849948e-07,
      "loss": 0.3938,
      "step": 3285
    },
    {
      "epoch": 2.7675463222908476,
      "grad_norm": 0.2904924750328064,
      "learning_rate": 1.8067046114154386e-07,
      "loss": 0.3569,
      "step": 3286
    },
    {
      "epoch": 2.7683885457608084,
      "grad_norm": 0.31355270743370056,
      "learning_rate": 1.7936678202085945e-07,
      "loss": 0.4153,
      "step": 3287
    },
    {
      "epoch": 2.769230769230769,
      "grad_norm": 0.311679482460022,
      "learning_rate": 1.7806773756983641e-07,
      "loss": 0.353,
      "step": 3288
    },
    {
      "epoch": 2.77007299270073,
      "grad_norm": 0.31044232845306396,
      "learning_rate": 1.7677332903740296e-07,
      "loss": 0.398,
      "step": 3289
    },
    {
      "epoch": 2.7709152161706907,
      "grad_norm": 0.291362464427948,
      "learning_rate": 1.7548355766803638e-07,
      "loss": 0.3858,
      "step": 3290
    },
    {
      "epoch": 2.7717574396406515,
      "grad_norm": 0.29565948247909546,
      "learning_rate": 1.7419842470175196e-07,
      "loss": 0.4086,
      "step": 3291
    },
    {
      "epoch": 2.7725996631106122,
      "grad_norm": 0.2893791198730469,
      "learning_rate": 1.7291793137410695e-07,
      "loss": 0.4119,
      "step": 3292
    },
    {
      "epoch": 2.7734418865805726,
      "grad_norm": 0.2730446755886078,
      "learning_rate": 1.7164207891619823e-07,
      "loss": 0.3877,
      "step": 3293
    },
    {
      "epoch": 2.7742841100505333,
      "grad_norm": 0.29212260246276855,
      "learning_rate": 1.7037086855465902e-07,
      "loss": 0.3927,
      "step": 3294
    },
    {
      "epoch": 2.775126333520494,
      "grad_norm": 0.28400468826293945,
      "learning_rate": 1.6910430151166058e-07,
      "loss": 0.3886,
      "step": 3295
    },
    {
      "epoch": 2.775968556990455,
      "grad_norm": 0.2780841886997223,
      "learning_rate": 1.6784237900491163e-07,
      "loss": 0.4031,
      "step": 3296
    },
    {
      "epoch": 2.7768107804604156,
      "grad_norm": 0.27444252371788025,
      "learning_rate": 1.6658510224765333e-07,
      "loss": 0.4003,
      "step": 3297
    },
    {
      "epoch": 2.777653003930376,
      "grad_norm": 0.268703430891037,
      "learning_rate": 1.6533247244866102e-07,
      "loss": 0.3497,
      "step": 3298
    },
    {
      "epoch": 2.778495227400337,
      "grad_norm": 0.28543129563331604,
      "learning_rate": 1.6408449081224131e-07,
      "loss": 0.4082,
      "step": 3299
    },
    {
      "epoch": 2.7793374508702975,
      "grad_norm": 0.2921662926673889,
      "learning_rate": 1.6284115853823445e-07,
      "loss": 0.3758,
      "step": 3300
    },
    {
      "epoch": 2.7801796743402583,
      "grad_norm": 0.2752985954284668,
      "learning_rate": 1.6160247682200813e-07,
      "loss": 0.3813,
      "step": 3301
    },
    {
      "epoch": 2.781021897810219,
      "grad_norm": 0.3052251636981964,
      "learning_rate": 1.6036844685446084e-07,
      "loss": 0.3959,
      "step": 3302
    },
    {
      "epoch": 2.78186412128018,
      "grad_norm": 0.2685442566871643,
      "learning_rate": 1.5913906982201744e-07,
      "loss": 0.3458,
      "step": 3303
    },
    {
      "epoch": 2.7827063447501406,
      "grad_norm": 0.31400832533836365,
      "learning_rate": 1.5791434690662966e-07,
      "loss": 0.4279,
      "step": 3304
    },
    {
      "epoch": 2.783548568220101,
      "grad_norm": 0.3062984347343445,
      "learning_rate": 1.566942792857745e-07,
      "loss": 0.3951,
      "step": 3305
    },
    {
      "epoch": 2.7843907916900617,
      "grad_norm": 0.39688393473625183,
      "learning_rate": 1.554788681324554e-07,
      "loss": 0.4151,
      "step": 3306
    },
    {
      "epoch": 2.7852330151600224,
      "grad_norm": 0.2895534038543701,
      "learning_rate": 1.5426811461519419e-07,
      "loss": 0.3587,
      "step": 3307
    },
    {
      "epoch": 2.786075238629983,
      "grad_norm": 0.28139156103134155,
      "learning_rate": 1.530620198980398e-07,
      "loss": 0.3703,
      "step": 3308
    },
    {
      "epoch": 2.786917462099944,
      "grad_norm": 0.2979883849620819,
      "learning_rate": 1.5186058514055912e-07,
      "loss": 0.4057,
      "step": 3309
    },
    {
      "epoch": 2.7877596855699043,
      "grad_norm": 0.2908453047275543,
      "learning_rate": 1.506638114978398e-07,
      "loss": 0.3477,
      "step": 3310
    },
    {
      "epoch": 2.7886019090398655,
      "grad_norm": 0.3034881353378296,
      "learning_rate": 1.4947170012048872e-07,
      "loss": 0.3927,
      "step": 3311
    },
    {
      "epoch": 2.789444132509826,
      "grad_norm": 0.2815414369106293,
      "learning_rate": 1.482842521546285e-07,
      "loss": 0.3557,
      "step": 3312
    },
    {
      "epoch": 2.7902863559797866,
      "grad_norm": 0.30734312534332275,
      "learning_rate": 1.471014687418998e-07,
      "loss": 0.4232,
      "step": 3313
    },
    {
      "epoch": 2.7911285794497473,
      "grad_norm": 0.3076149523258209,
      "learning_rate": 1.4592335101945855e-07,
      "loss": 0.4073,
      "step": 3314
    },
    {
      "epoch": 2.791970802919708,
      "grad_norm": 0.2859514355659485,
      "learning_rate": 1.447499001199748e-07,
      "loss": 0.357,
      "step": 3315
    },
    {
      "epoch": 2.792813026389669,
      "grad_norm": 0.3154550790786743,
      "learning_rate": 1.435811171716317e-07,
      "loss": 0.4267,
      "step": 3316
    },
    {
      "epoch": 2.793655249859629,
      "grad_norm": 0.2935892939567566,
      "learning_rate": 1.4241700329812368e-07,
      "loss": 0.4009,
      "step": 3317
    },
    {
      "epoch": 2.79449747332959,
      "grad_norm": 0.28923261165618896,
      "learning_rate": 1.4125755961865827e-07,
      "loss": 0.3739,
      "step": 3318
    },
    {
      "epoch": 2.7953396967995507,
      "grad_norm": 0.29516056180000305,
      "learning_rate": 1.4010278724795157e-07,
      "loss": 0.3581,
      "step": 3319
    },
    {
      "epoch": 2.7961819202695115,
      "grad_norm": 0.3272443115711212,
      "learning_rate": 1.3895268729622824e-07,
      "loss": 0.4383,
      "step": 3320
    },
    {
      "epoch": 2.7970241437394723,
      "grad_norm": 0.2827593982219696,
      "learning_rate": 1.3780726086922103e-07,
      "loss": 0.3733,
      "step": 3321
    },
    {
      "epoch": 2.797866367209433,
      "grad_norm": 0.2935652732849121,
      "learning_rate": 1.366665090681707e-07,
      "loss": 0.3765,
      "step": 3322
    },
    {
      "epoch": 2.798708590679394,
      "grad_norm": 0.3121344745159149,
      "learning_rate": 1.355304329898216e-07,
      "loss": 0.4074,
      "step": 3323
    },
    {
      "epoch": 2.799550814149354,
      "grad_norm": 0.2910635769367218,
      "learning_rate": 1.3439903372642615e-07,
      "loss": 0.3545,
      "step": 3324
    },
    {
      "epoch": 2.800393037619315,
      "grad_norm": 0.27796366810798645,
      "learning_rate": 1.332723123657348e-07,
      "loss": 0.4028,
      "step": 3325
    },
    {
      "epoch": 2.8012352610892757,
      "grad_norm": 0.2855234444141388,
      "learning_rate": 1.3215026999100655e-07,
      "loss": 0.4232,
      "step": 3326
    },
    {
      "epoch": 2.8020774845592364,
      "grad_norm": 0.2606448531150818,
      "learning_rate": 1.3103290768099796e-07,
      "loss": 0.3599,
      "step": 3327
    },
    {
      "epoch": 2.802919708029197,
      "grad_norm": 0.32101741433143616,
      "learning_rate": 1.299202265099675e-07,
      "loss": 0.3963,
      "step": 3328
    },
    {
      "epoch": 2.8037619314991575,
      "grad_norm": 0.3057144284248352,
      "learning_rate": 1.288122275476733e-07,
      "loss": 0.3965,
      "step": 3329
    },
    {
      "epoch": 2.8046041549691187,
      "grad_norm": 0.2921451926231384,
      "learning_rate": 1.2770891185937106e-07,
      "loss": 0.3913,
      "step": 3330
    },
    {
      "epoch": 2.805446378439079,
      "grad_norm": 0.2783379852771759,
      "learning_rate": 1.2661028050581446e-07,
      "loss": 0.4123,
      "step": 3331
    },
    {
      "epoch": 2.80628860190904,
      "grad_norm": 0.294175386428833,
      "learning_rate": 1.2551633454325362e-07,
      "loss": 0.3902,
      "step": 3332
    },
    {
      "epoch": 2.8071308253790006,
      "grad_norm": 0.28449007868766785,
      "learning_rate": 1.244270750234333e-07,
      "loss": 0.3995,
      "step": 3333
    },
    {
      "epoch": 2.8079730488489614,
      "grad_norm": 0.27176418900489807,
      "learning_rate": 1.2334250299359362e-07,
      "loss": 0.356,
      "step": 3334
    },
    {
      "epoch": 2.808815272318922,
      "grad_norm": 0.2867431342601776,
      "learning_rate": 1.2226261949646656e-07,
      "loss": 0.4175,
      "step": 3335
    },
    {
      "epoch": 2.8096574957888825,
      "grad_norm": 0.2806062400341034,
      "learning_rate": 1.2118742557027885e-07,
      "loss": 0.3764,
      "step": 3336
    },
    {
      "epoch": 2.8104997192588432,
      "grad_norm": 0.29476258158683777,
      "learning_rate": 1.201169222487464e-07,
      "loss": 0.3867,
      "step": 3337
    },
    {
      "epoch": 2.811341942728804,
      "grad_norm": 0.27478858828544617,
      "learning_rate": 1.1905111056107644e-07,
      "loss": 0.3971,
      "step": 3338
    },
    {
      "epoch": 2.8121841661987648,
      "grad_norm": 0.2992803454399109,
      "learning_rate": 1.1798999153196433e-07,
      "loss": 0.395,
      "step": 3339
    },
    {
      "epoch": 2.8130263896687255,
      "grad_norm": 0.29927507042884827,
      "learning_rate": 1.1693356618159568e-07,
      "loss": 0.3687,
      "step": 3340
    },
    {
      "epoch": 2.813868613138686,
      "grad_norm": 0.30943048000335693,
      "learning_rate": 1.1588183552564247e-07,
      "loss": 0.3969,
      "step": 3341
    },
    {
      "epoch": 2.814710836608647,
      "grad_norm": 0.283802330493927,
      "learning_rate": 1.1483480057526364e-07,
      "loss": 0.4029,
      "step": 3342
    },
    {
      "epoch": 2.8155530600786074,
      "grad_norm": 0.273685097694397,
      "learning_rate": 1.1379246233710172e-07,
      "loss": 0.3954,
      "step": 3343
    },
    {
      "epoch": 2.816395283548568,
      "grad_norm": 0.31139498949050903,
      "learning_rate": 1.1275482181328568e-07,
      "loss": 0.4282,
      "step": 3344
    },
    {
      "epoch": 2.817237507018529,
      "grad_norm": 0.3079608380794525,
      "learning_rate": 1.1172188000142803e-07,
      "loss": 0.3855,
      "step": 3345
    },
    {
      "epoch": 2.8180797304884897,
      "grad_norm": 0.2971174716949463,
      "learning_rate": 1.1069363789462273e-07,
      "loss": 0.391,
      "step": 3346
    },
    {
      "epoch": 2.8189219539584505,
      "grad_norm": 0.27324238419532776,
      "learning_rate": 1.0967009648144621e-07,
      "loss": 0.3313,
      "step": 3347
    },
    {
      "epoch": 2.819764177428411,
      "grad_norm": 0.29714807868003845,
      "learning_rate": 1.0865125674595467e-07,
      "loss": 0.4045,
      "step": 3348
    },
    {
      "epoch": 2.8206064008983716,
      "grad_norm": 0.29939818382263184,
      "learning_rate": 1.0763711966768453e-07,
      "loss": 0.4089,
      "step": 3349
    },
    {
      "epoch": 2.8214486243683323,
      "grad_norm": 0.28114888072013855,
      "learning_rate": 1.0662768622165087e-07,
      "loss": 0.3683,
      "step": 3350
    },
    {
      "epoch": 2.822290847838293,
      "grad_norm": 0.2907692790031433,
      "learning_rate": 1.0562295737834738e-07,
      "loss": 0.4149,
      "step": 3351
    },
    {
      "epoch": 2.823133071308254,
      "grad_norm": 0.3068651258945465,
      "learning_rate": 1.0462293410374303e-07,
      "loss": 0.4019,
      "step": 3352
    },
    {
      "epoch": 2.8239752947782146,
      "grad_norm": 0.29543134570121765,
      "learning_rate": 1.0362761735928372e-07,
      "loss": 0.3919,
      "step": 3353
    },
    {
      "epoch": 2.8248175182481754,
      "grad_norm": 0.27781108021736145,
      "learning_rate": 1.026370081018907e-07,
      "loss": 0.3468,
      "step": 3354
    },
    {
      "epoch": 2.8256597417181357,
      "grad_norm": 0.3062828779220581,
      "learning_rate": 1.0165110728395878e-07,
      "loss": 0.3838,
      "step": 3355
    },
    {
      "epoch": 2.8265019651880965,
      "grad_norm": 0.28775954246520996,
      "learning_rate": 1.0066991585335583e-07,
      "loss": 0.3927,
      "step": 3356
    },
    {
      "epoch": 2.8273441886580573,
      "grad_norm": 0.2726040482521057,
      "learning_rate": 9.969343475342285e-08,
      "loss": 0.3918,
      "step": 3357
    },
    {
      "epoch": 2.828186412128018,
      "grad_norm": 0.2910308539867401,
      "learning_rate": 9.872166492297052e-08,
      "loss": 0.4017,
      "step": 3358
    },
    {
      "epoch": 2.829028635597979,
      "grad_norm": 0.2825460433959961,
      "learning_rate": 9.775460729628262e-08,
      "loss": 0.3734,
      "step": 3359
    },
    {
      "epoch": 2.829870859067939,
      "grad_norm": 0.28228306770324707,
      "learning_rate": 9.679226280310982e-08,
      "loss": 0.3738,
      "step": 3360
    },
    {
      "epoch": 2.8307130825379003,
      "grad_norm": 0.28247764706611633,
      "learning_rate": 9.583463236867318e-08,
      "loss": 0.386,
      "step": 3361
    },
    {
      "epoch": 2.8315553060078607,
      "grad_norm": 0.2911040484905243,
      "learning_rate": 9.48817169136601e-08,
      "loss": 0.3772,
      "step": 3362
    },
    {
      "epoch": 2.8323975294778214,
      "grad_norm": 0.31065770983695984,
      "learning_rate": 9.393351735422773e-08,
      "loss": 0.4152,
      "step": 3363
    },
    {
      "epoch": 2.833239752947782,
      "grad_norm": 0.2593594789505005,
      "learning_rate": 9.299003460199519e-08,
      "loss": 0.3779,
      "step": 3364
    },
    {
      "epoch": 2.834081976417743,
      "grad_norm": 0.2864275872707367,
      "learning_rate": 9.205126956405075e-08,
      "loss": 0.389,
      "step": 3365
    },
    {
      "epoch": 2.8349241998877037,
      "grad_norm": 0.2880719304084778,
      "learning_rate": 9.111722314294358e-08,
      "loss": 0.406,
      "step": 3366
    },
    {
      "epoch": 2.835766423357664,
      "grad_norm": 0.3075088858604431,
      "learning_rate": 9.018789623668866e-08,
      "loss": 0.4299,
      "step": 3367
    },
    {
      "epoch": 2.836608646827625,
      "grad_norm": 0.2832067608833313,
      "learning_rate": 8.926328973876242e-08,
      "loss": 0.3651,
      "step": 3368
    },
    {
      "epoch": 2.8374508702975856,
      "grad_norm": 0.31438612937927246,
      "learning_rate": 8.834340453810375e-08,
      "loss": 0.4272,
      "step": 3369
    },
    {
      "epoch": 2.8382930937675463,
      "grad_norm": 0.2937398850917816,
      "learning_rate": 8.742824151911022e-08,
      "loss": 0.363,
      "step": 3370
    },
    {
      "epoch": 2.839135317237507,
      "grad_norm": 0.29449522495269775,
      "learning_rate": 8.651780156164302e-08,
      "loss": 0.3769,
      "step": 3371
    },
    {
      "epoch": 2.8399775407074674,
      "grad_norm": 0.29911506175994873,
      "learning_rate": 8.561208554101863e-08,
      "loss": 0.41,
      "step": 3372
    },
    {
      "epoch": 2.8408197641774287,
      "grad_norm": 0.2797853946685791,
      "learning_rate": 8.471109432801494e-08,
      "loss": 0.3834,
      "step": 3373
    },
    {
      "epoch": 2.841661987647389,
      "grad_norm": 0.2563020586967468,
      "learning_rate": 8.381482878886571e-08,
      "loss": 0.352,
      "step": 3374
    },
    {
      "epoch": 2.8425042111173497,
      "grad_norm": 0.3109523355960846,
      "learning_rate": 8.29232897852611e-08,
      "loss": 0.4654,
      "step": 3375
    },
    {
      "epoch": 2.8433464345873105,
      "grad_norm": 0.2774870693683624,
      "learning_rate": 8.203647817434823e-08,
      "loss": 0.3544,
      "step": 3376
    },
    {
      "epoch": 2.8441886580572713,
      "grad_norm": 0.2891528904438019,
      "learning_rate": 8.11543948087279e-08,
      "loss": 0.3449,
      "step": 3377
    },
    {
      "epoch": 2.845030881527232,
      "grad_norm": 0.3129079341888428,
      "learning_rate": 8.027704053645613e-08,
      "loss": 0.421,
      "step": 3378
    },
    {
      "epoch": 2.8458731049971924,
      "grad_norm": 0.27477723360061646,
      "learning_rate": 7.94044162010421e-08,
      "loss": 0.3735,
      "step": 3379
    },
    {
      "epoch": 2.846715328467153,
      "grad_norm": 0.2987320125102997,
      "learning_rate": 7.85365226414464e-08,
      "loss": 0.404,
      "step": 3380
    },
    {
      "epoch": 2.847557551937114,
      "grad_norm": 0.2968631982803345,
      "learning_rate": 7.76733606920832e-08,
      "loss": 0.4405,
      "step": 3381
    },
    {
      "epoch": 2.8483997754070747,
      "grad_norm": 0.29105231165885925,
      "learning_rate": 7.681493118281646e-08,
      "loss": 0.3765,
      "step": 3382
    },
    {
      "epoch": 2.8492419988770354,
      "grad_norm": 0.26250529289245605,
      "learning_rate": 7.59612349389599e-08,
      "loss": 0.3473,
      "step": 3383
    },
    {
      "epoch": 2.850084222346996,
      "grad_norm": 0.3028121590614319,
      "learning_rate": 7.511227278127697e-08,
      "loss": 0.3998,
      "step": 3384
    },
    {
      "epoch": 2.850926445816957,
      "grad_norm": 0.3030623495578766,
      "learning_rate": 7.426804552598088e-08,
      "loss": 0.429,
      "step": 3385
    },
    {
      "epoch": 2.8517686692869173,
      "grad_norm": 0.30548834800720215,
      "learning_rate": 7.342855398472958e-08,
      "loss": 0.4069,
      "step": 3386
    },
    {
      "epoch": 2.852610892756878,
      "grad_norm": 0.27196255326271057,
      "learning_rate": 7.259379896463248e-08,
      "loss": 0.3775,
      "step": 3387
    },
    {
      "epoch": 2.853453116226839,
      "grad_norm": 0.2903123199939728,
      "learning_rate": 7.176378126824035e-08,
      "loss": 0.4032,
      "step": 3388
    },
    {
      "epoch": 2.8542953396967996,
      "grad_norm": 0.2756079137325287,
      "learning_rate": 7.093850169355266e-08,
      "loss": 0.3634,
      "step": 3389
    },
    {
      "epoch": 2.8551375631667604,
      "grad_norm": 0.28325939178466797,
      "learning_rate": 7.011796103401192e-08,
      "loss": 0.3952,
      "step": 3390
    },
    {
      "epoch": 2.8559797866367207,
      "grad_norm": 0.3056941628456116,
      "learning_rate": 6.930216007850598e-08,
      "loss": 0.3984,
      "step": 3391
    },
    {
      "epoch": 2.856822010106682,
      "grad_norm": 0.3059186041355133,
      "learning_rate": 6.849109961136468e-08,
      "loss": 0.3803,
      "step": 3392
    },
    {
      "epoch": 2.8576642335766422,
      "grad_norm": 0.2844845652580261,
      "learning_rate": 6.768478041236037e-08,
      "loss": 0.3684,
      "step": 3393
    },
    {
      "epoch": 2.858506457046603,
      "grad_norm": 0.2670156955718994,
      "learning_rate": 6.688320325670628e-08,
      "loss": 0.3833,
      "step": 3394
    },
    {
      "epoch": 2.8593486805165638,
      "grad_norm": 0.2882593870162964,
      "learning_rate": 6.608636891505982e-08,
      "loss": 0.3851,
      "step": 3395
    },
    {
      "epoch": 2.8601909039865245,
      "grad_norm": 0.35692736506462097,
      "learning_rate": 6.529427815351374e-08,
      "loss": 0.3842,
      "step": 3396
    },
    {
      "epoch": 2.8610331274564853,
      "grad_norm": 0.3008839190006256,
      "learning_rate": 6.450693173360445e-08,
      "loss": 0.3851,
      "step": 3397
    },
    {
      "epoch": 2.8618753509264456,
      "grad_norm": 0.28466638922691345,
      "learning_rate": 6.372433041230364e-08,
      "loss": 0.3958,
      "step": 3398
    },
    {
      "epoch": 2.8627175743964064,
      "grad_norm": 0.27058398723602295,
      "learning_rate": 6.294647494202444e-08,
      "loss": 0.3821,
      "step": 3399
    },
    {
      "epoch": 2.863559797866367,
      "grad_norm": 0.2802475392818451,
      "learning_rate": 6.217336607061364e-08,
      "loss": 0.3749,
      "step": 3400
    },
    {
      "epoch": 2.864402021336328,
      "grad_norm": 0.29606035351753235,
      "learning_rate": 6.140500454135668e-08,
      "loss": 0.3805,
      "step": 3401
    },
    {
      "epoch": 2.8652442448062887,
      "grad_norm": 0.3239705264568329,
      "learning_rate": 6.064139109297485e-08,
      "loss": 0.4597,
      "step": 3402
    },
    {
      "epoch": 2.866086468276249,
      "grad_norm": 0.2795093059539795,
      "learning_rate": 5.988252645962367e-08,
      "loss": 0.367,
      "step": 3403
    },
    {
      "epoch": 2.8669286917462102,
      "grad_norm": 0.29333585500717163,
      "learning_rate": 5.912841137089287e-08,
      "loss": 0.3937,
      "step": 3404
    },
    {
      "epoch": 2.8677709152161706,
      "grad_norm": 0.28539228439331055,
      "learning_rate": 5.8379046551807486e-08,
      "loss": 0.3666,
      "step": 3405
    },
    {
      "epoch": 2.8686131386861313,
      "grad_norm": 0.2850067913532257,
      "learning_rate": 5.7634432722822875e-08,
      "loss": 0.3921,
      "step": 3406
    },
    {
      "epoch": 2.869455362156092,
      "grad_norm": 0.27624449133872986,
      "learning_rate": 5.6894570599829726e-08,
      "loss": 0.362,
      "step": 3407
    },
    {
      "epoch": 2.870297585626053,
      "grad_norm": 0.2974869906902313,
      "learning_rate": 5.615946089414737e-08,
      "loss": 0.4038,
      "step": 3408
    },
    {
      "epoch": 2.8711398090960136,
      "grad_norm": 0.2942722737789154,
      "learning_rate": 5.542910431252935e-08,
      "loss": 0.3727,
      "step": 3409
    },
    {
      "epoch": 2.871982032565974,
      "grad_norm": 0.29509642720222473,
      "learning_rate": 5.470350155715565e-08,
      "loss": 0.413,
      "step": 3410
    },
    {
      "epoch": 2.8728242560359347,
      "grad_norm": 0.27400821447372437,
      "learning_rate": 5.398265332563935e-08,
      "loss": 0.3966,
      "step": 3411
    },
    {
      "epoch": 2.8736664795058955,
      "grad_norm": 0.2971052825450897,
      "learning_rate": 5.32665603110194e-08,
      "loss": 0.4039,
      "step": 3412
    },
    {
      "epoch": 2.8745087029758563,
      "grad_norm": 0.2669042646884918,
      "learning_rate": 5.255522320176565e-08,
      "loss": 0.3572,
      "step": 3413
    },
    {
      "epoch": 2.875350926445817,
      "grad_norm": 0.2769604027271271,
      "learning_rate": 5.1848642681773254e-08,
      "loss": 0.3647,
      "step": 3414
    },
    {
      "epoch": 2.876193149915778,
      "grad_norm": 0.28757867217063904,
      "learning_rate": 5.114681943036603e-08,
      "loss": 0.3777,
      "step": 3415
    },
    {
      "epoch": 2.8770353733857386,
      "grad_norm": 0.28947725892066956,
      "learning_rate": 5.0449754122292585e-08,
      "loss": 0.4312,
      "step": 3416
    },
    {
      "epoch": 2.877877596855699,
      "grad_norm": 0.27639245986938477,
      "learning_rate": 4.975744742772848e-08,
      "loss": 0.3881,
      "step": 3417
    },
    {
      "epoch": 2.8787198203256597,
      "grad_norm": 0.31110498309135437,
      "learning_rate": 4.906990001227296e-08,
      "loss": 0.3683,
      "step": 3418
    },
    {
      "epoch": 2.8795620437956204,
      "grad_norm": 0.2987087666988373,
      "learning_rate": 4.838711253695061e-08,
      "loss": 0.4207,
      "step": 3419
    },
    {
      "epoch": 2.880404267265581,
      "grad_norm": 0.29568350315093994,
      "learning_rate": 4.770908565820964e-08,
      "loss": 0.3817,
      "step": 3420
    },
    {
      "epoch": 2.881246490735542,
      "grad_norm": 0.3078053891658783,
      "learning_rate": 4.7035820027920284e-08,
      "loss": 0.3474,
      "step": 3421
    },
    {
      "epoch": 2.8820887142055023,
      "grad_norm": 0.310678094625473,
      "learning_rate": 4.636731629337587e-08,
      "loss": 0.4056,
      "step": 3422
    },
    {
      "epoch": 2.8829309376754635,
      "grad_norm": 0.28565657138824463,
      "learning_rate": 4.5703575097292286e-08,
      "loss": 0.3874,
      "step": 3423
    },
    {
      "epoch": 2.883773161145424,
      "grad_norm": 0.26623526215553284,
      "learning_rate": 4.5044597077805175e-08,
      "loss": 0.3787,
      "step": 3424
    },
    {
      "epoch": 2.8846153846153846,
      "grad_norm": 0.2779085040092468,
      "learning_rate": 4.439038286847164e-08,
      "loss": 0.3788,
      "step": 3425
    },
    {
      "epoch": 2.8854576080853453,
      "grad_norm": 0.28012943267822266,
      "learning_rate": 4.37409330982691e-08,
      "loss": 0.4114,
      "step": 3426
    },
    {
      "epoch": 2.886299831555306,
      "grad_norm": 0.2860434949398041,
      "learning_rate": 4.309624839159254e-08,
      "loss": 0.4191,
      "step": 3427
    },
    {
      "epoch": 2.887142055025267,
      "grad_norm": 0.2905972898006439,
      "learning_rate": 4.245632936825783e-08,
      "loss": 0.4064,
      "step": 3428
    },
    {
      "epoch": 2.887984278495227,
      "grad_norm": 0.2894047200679779,
      "learning_rate": 4.182117664349783e-08,
      "loss": 0.3463,
      "step": 3429
    },
    {
      "epoch": 2.888826501965188,
      "grad_norm": 0.312477707862854,
      "learning_rate": 4.119079082796351e-08,
      "loss": 0.4275,
      "step": 3430
    },
    {
      "epoch": 2.8896687254351487,
      "grad_norm": 0.28114959597587585,
      "learning_rate": 4.056517252772229e-08,
      "loss": 0.3899,
      "step": 3431
    },
    {
      "epoch": 2.8905109489051095,
      "grad_norm": 0.2802673876285553,
      "learning_rate": 3.99443223442586e-08,
      "loss": 0.3805,
      "step": 3432
    },
    {
      "epoch": 2.8913531723750703,
      "grad_norm": 0.3070076107978821,
      "learning_rate": 3.9328240874471624e-08,
      "loss": 0.4144,
      "step": 3433
    },
    {
      "epoch": 2.892195395845031,
      "grad_norm": 0.2813200056552887,
      "learning_rate": 3.871692871067756e-08,
      "loss": 0.3595,
      "step": 3434
    },
    {
      "epoch": 2.893037619314992,
      "grad_norm": 0.3033865690231323,
      "learning_rate": 3.8110386440605164e-08,
      "loss": 0.4033,
      "step": 3435
    },
    {
      "epoch": 2.893879842784952,
      "grad_norm": 0.2972250282764435,
      "learning_rate": 3.750861464739908e-08,
      "loss": 0.3817,
      "step": 3436
    },
    {
      "epoch": 2.894722066254913,
      "grad_norm": 0.2964242994785309,
      "learning_rate": 3.6911613909616505e-08,
      "loss": 0.3827,
      "step": 3437
    },
    {
      "epoch": 2.8955642897248737,
      "grad_norm": 0.3003658354282379,
      "learning_rate": 3.631938480122777e-08,
      "loss": 0.4309,
      "step": 3438
    },
    {
      "epoch": 2.8964065131948344,
      "grad_norm": 0.2702142000198364,
      "learning_rate": 3.573192789161628e-08,
      "loss": 0.3475,
      "step": 3439
    },
    {
      "epoch": 2.897248736664795,
      "grad_norm": 0.29008176922798157,
      "learning_rate": 3.514924374557638e-08,
      "loss": 0.4001,
      "step": 3440
    },
    {
      "epoch": 2.8980909601347555,
      "grad_norm": 0.31470492482185364,
      "learning_rate": 3.457133292331494e-08,
      "loss": 0.4413,
      "step": 3441
    },
    {
      "epoch": 2.8989331836047163,
      "grad_norm": 0.304623007774353,
      "learning_rate": 3.3998195980448065e-08,
      "loss": 0.3796,
      "step": 3442
    },
    {
      "epoch": 2.899775407074677,
      "grad_norm": 0.284498393535614,
      "learning_rate": 3.342983346800388e-08,
      "loss": 0.383,
      "step": 3443
    },
    {
      "epoch": 2.900617630544638,
      "grad_norm": 0.2605248689651489,
      "learning_rate": 3.2866245932418606e-08,
      "loss": 0.3447,
      "step": 3444
    },
    {
      "epoch": 2.9014598540145986,
      "grad_norm": 0.28592634201049805,
      "learning_rate": 3.230743391553881e-08,
      "loss": 0.4144,
      "step": 3445
    },
    {
      "epoch": 2.9023020774845594,
      "grad_norm": 0.2774827778339386,
      "learning_rate": 3.175339795462029e-08,
      "loss": 0.3655,
      "step": 3446
    },
    {
      "epoch": 2.90314430095452,
      "grad_norm": 0.2848191559314728,
      "learning_rate": 3.120413858232474e-08,
      "loss": 0.4,
      "step": 3447
    },
    {
      "epoch": 2.9039865244244805,
      "grad_norm": 0.3046318292617798,
      "learning_rate": 3.0659656326724186e-08,
      "loss": 0.4001,
      "step": 3448
    },
    {
      "epoch": 2.9048287478944412,
      "grad_norm": 0.3002498745918274,
      "learning_rate": 3.011995171129545e-08,
      "loss": 0.4012,
      "step": 3449
    },
    {
      "epoch": 2.905670971364402,
      "grad_norm": 0.2788611352443695,
      "learning_rate": 2.9585025254924572e-08,
      "loss": 0.3943,
      "step": 3450
    },
    {
      "epoch": 2.9065131948343628,
      "grad_norm": 0.2840082049369812,
      "learning_rate": 2.9054877471901277e-08,
      "loss": 0.3929,
      "step": 3451
    },
    {
      "epoch": 2.9073554183043235,
      "grad_norm": 0.28479117155075073,
      "learning_rate": 2.852950887192285e-08,
      "loss": 0.3494,
      "step": 3452
    },
    {
      "epoch": 2.908197641774284,
      "grad_norm": 0.30263206362724304,
      "learning_rate": 2.8008919960090253e-08,
      "loss": 0.4201,
      "step": 3453
    },
    {
      "epoch": 2.909039865244245,
      "grad_norm": 0.27019354701042175,
      "learning_rate": 2.7493111236909787e-08,
      "loss": 0.3472,
      "step": 3454
    },
    {
      "epoch": 2.9098820887142054,
      "grad_norm": 0.2872403562068939,
      "learning_rate": 2.6982083198293096e-08,
      "loss": 0.3783,
      "step": 3455
    },
    {
      "epoch": 2.910724312184166,
      "grad_norm": 0.2734415531158447,
      "learning_rate": 2.6475836335553838e-08,
      "loss": 0.3866,
      "step": 3456
    },
    {
      "epoch": 2.911566535654127,
      "grad_norm": 0.2893427610397339,
      "learning_rate": 2.5974371135408792e-08,
      "loss": 0.4002,
      "step": 3457
    },
    {
      "epoch": 2.9124087591240877,
      "grad_norm": 0.2905668020248413,
      "learning_rate": 2.5477688079979522e-08,
      "loss": 0.3959,
      "step": 3458
    },
    {
      "epoch": 2.9132509825940485,
      "grad_norm": 0.2682143747806549,
      "learning_rate": 2.4985787646788497e-08,
      "loss": 0.3908,
      "step": 3459
    },
    {
      "epoch": 2.914093206064009,
      "grad_norm": 0.27443888783454895,
      "learning_rate": 2.4498670308760742e-08,
      "loss": 0.3548,
      "step": 3460
    },
    {
      "epoch": 2.9149354295339696,
      "grad_norm": 0.2960400879383087,
      "learning_rate": 2.401633653422053e-08,
      "loss": 0.3803,
      "step": 3461
    },
    {
      "epoch": 2.9157776530039303,
      "grad_norm": 0.2993108332157135,
      "learning_rate": 2.3538786786896918e-08,
      "loss": 0.4128,
      "step": 3462
    },
    {
      "epoch": 2.916619876473891,
      "grad_norm": 0.2882375121116638,
      "learning_rate": 2.306602152591597e-08,
      "loss": 0.3909,
      "step": 3463
    },
    {
      "epoch": 2.917462099943852,
      "grad_norm": 0.2693561315536499,
      "learning_rate": 2.2598041205806333e-08,
      "loss": 0.3671,
      "step": 3464
    },
    {
      "epoch": 2.9183043234138126,
      "grad_norm": 0.2874128818511963,
      "learning_rate": 2.2134846276494205e-08,
      "loss": 0.4528,
      "step": 3465
    },
    {
      "epoch": 2.9191465468837734,
      "grad_norm": 0.30712515115737915,
      "learning_rate": 2.1676437183306697e-08,
      "loss": 0.3812,
      "step": 3466
    },
    {
      "epoch": 2.9199887703537337,
      "grad_norm": 0.2785978615283966,
      "learning_rate": 2.1222814366969048e-08,
      "loss": 0.3907,
      "step": 3467
    },
    {
      "epoch": 2.9208309938236945,
      "grad_norm": 0.2888335883617401,
      "learning_rate": 2.0773978263605164e-08,
      "loss": 0.4061,
      "step": 3468
    },
    {
      "epoch": 2.9216732172936553,
      "grad_norm": 0.3339776396751404,
      "learning_rate": 2.032992930473543e-08,
      "loss": 0.4365,
      "step": 3469
    },
    {
      "epoch": 2.922515440763616,
      "grad_norm": 0.27806806564331055,
      "learning_rate": 1.9890667917280006e-08,
      "loss": 0.3533,
      "step": 3470
    },
    {
      "epoch": 2.923357664233577,
      "grad_norm": 0.30185332894325256,
      "learning_rate": 1.9456194523554404e-08,
      "loss": 0.4108,
      "step": 3471
    },
    {
      "epoch": 2.924199887703537,
      "grad_norm": 0.2948153018951416,
      "learning_rate": 1.9026509541272276e-08,
      "loss": 0.3961,
      "step": 3472
    },
    {
      "epoch": 2.925042111173498,
      "grad_norm": 0.2840941250324249,
      "learning_rate": 1.860161338354205e-08,
      "loss": 0.4024,
      "step": 3473
    },
    {
      "epoch": 2.9258843346434587,
      "grad_norm": 0.29426267743110657,
      "learning_rate": 1.8181506458869735e-08,
      "loss": 0.3765,
      "step": 3474
    },
    {
      "epoch": 2.9267265581134194,
      "grad_norm": 0.2829715311527252,
      "learning_rate": 1.7766189171154468e-08,
      "loss": 0.3598,
      "step": 3475
    },
    {
      "epoch": 2.92756878158338,
      "grad_norm": 0.28451642394065857,
      "learning_rate": 1.7355661919693513e-08,
      "loss": 0.3883,
      "step": 3476
    },
    {
      "epoch": 2.928411005053341,
      "grad_norm": 0.2689417600631714,
      "learning_rate": 1.69499250991767e-08,
      "loss": 0.3917,
      "step": 3477
    },
    {
      "epoch": 2.9292532285233017,
      "grad_norm": 0.2871907949447632,
      "learning_rate": 1.654897909968922e-08,
      "loss": 0.3646,
      "step": 3478
    },
    {
      "epoch": 2.930095451993262,
      "grad_norm": 0.2818618714809418,
      "learning_rate": 1.6152824306709392e-08,
      "loss": 0.3947,
      "step": 3479
    },
    {
      "epoch": 2.930937675463223,
      "grad_norm": 0.3034173250198364,
      "learning_rate": 1.576146110111032e-08,
      "loss": 0.4033,
      "step": 3480
    },
    {
      "epoch": 2.9317798989331836,
      "grad_norm": 0.28176578879356384,
      "learning_rate": 1.5374889859157137e-08,
      "loss": 0.3737,
      "step": 3481
    },
    {
      "epoch": 2.9326221224031443,
      "grad_norm": 0.293709397315979,
      "learning_rate": 1.4993110952509215e-08,
      "loss": 0.3755,
      "step": 3482
    },
    {
      "epoch": 2.933464345873105,
      "grad_norm": 0.2953304052352905,
      "learning_rate": 1.4616124748217387e-08,
      "loss": 0.4311,
      "step": 3483
    },
    {
      "epoch": 2.9343065693430654,
      "grad_norm": 0.2836214303970337,
      "learning_rate": 1.424393160872506e-08,
      "loss": 0.3971,
      "step": 3484
    },
    {
      "epoch": 2.9351487928130267,
      "grad_norm": 0.27746909856796265,
      "learning_rate": 1.3876531891867106e-08,
      "loss": 0.3501,
      "step": 3485
    },
    {
      "epoch": 2.935991016282987,
      "grad_norm": 0.28175079822540283,
      "learning_rate": 1.351392595087042e-08,
      "loss": 0.3498,
      "step": 3486
    },
    {
      "epoch": 2.9368332397529477,
      "grad_norm": 0.29197704792022705,
      "learning_rate": 1.3156114134352805e-08,
      "loss": 0.3965,
      "step": 3487
    },
    {
      "epoch": 2.9376754632229085,
      "grad_norm": 0.29810264706611633,
      "learning_rate": 1.2803096786323521e-08,
      "loss": 0.3778,
      "step": 3488
    },
    {
      "epoch": 2.9385176866928693,
      "grad_norm": 0.29269251227378845,
      "learning_rate": 1.2454874246181081e-08,
      "loss": 0.3937,
      "step": 3489
    },
    {
      "epoch": 2.93935991016283,
      "grad_norm": 0.29089727997779846,
      "learning_rate": 1.2111446848714347e-08,
      "loss": 0.3786,
      "step": 3490
    },
    {
      "epoch": 2.9402021336327904,
      "grad_norm": 0.30490759015083313,
      "learning_rate": 1.1772814924103649e-08,
      "loss": 0.4118,
      "step": 3491
    },
    {
      "epoch": 2.941044357102751,
      "grad_norm": 0.2733815312385559,
      "learning_rate": 1.1438978797916888e-08,
      "loss": 0.3549,
      "step": 3492
    },
    {
      "epoch": 2.941886580572712,
      "grad_norm": 0.2806861400604248,
      "learning_rate": 1.1109938791112328e-08,
      "loss": 0.3894,
      "step": 3493
    },
    {
      "epoch": 2.9427288040426727,
      "grad_norm": 0.28716614842414856,
      "learning_rate": 1.0785695220035809e-08,
      "loss": 0.3867,
      "step": 3494
    },
    {
      "epoch": 2.9435710275126334,
      "grad_norm": 0.30406150221824646,
      "learning_rate": 1.0466248396424072e-08,
      "loss": 0.3761,
      "step": 3495
    },
    {
      "epoch": 2.944413250982594,
      "grad_norm": 0.28483232855796814,
      "learning_rate": 1.0151598627399784e-08,
      "loss": 0.3751,
      "step": 3496
    },
    {
      "epoch": 2.945255474452555,
      "grad_norm": 0.30256643891334534,
      "learning_rate": 9.841746215474845e-09,
      "loss": 0.3636,
      "step": 3497
    },
    {
      "epoch": 2.9460976979225153,
      "grad_norm": 0.2928366959095001,
      "learning_rate": 9.536691458548741e-09,
      "loss": 0.3889,
      "step": 3498
    },
    {
      "epoch": 2.946939921392476,
      "grad_norm": 0.2842877507209778,
      "learning_rate": 9.236434649908532e-09,
      "loss": 0.396,
      "step": 3499
    },
    {
      "epoch": 2.947782144862437,
      "grad_norm": 0.2947302460670471,
      "learning_rate": 8.940976078227193e-09,
      "loss": 0.4274,
      "step": 3500
    },
    {
      "epoch": 2.9486243683323976,
      "grad_norm": 0.28269490599632263,
      "learning_rate": 8.650316027566386e-09,
      "loss": 0.3566,
      "step": 3501
    },
    {
      "epoch": 2.9494665918023584,
      "grad_norm": 0.2996935546398163,
      "learning_rate": 8.364454777373132e-09,
      "loss": 0.4205,
      "step": 3502
    },
    {
      "epoch": 2.9503088152723187,
      "grad_norm": 0.2875880300998688,
      "learning_rate": 8.083392602481477e-09,
      "loss": 0.3853,
      "step": 3503
    },
    {
      "epoch": 2.9511510387422795,
      "grad_norm": 0.28591272234916687,
      "learning_rate": 7.807129773110822e-09,
      "loss": 0.3899,
      "step": 3504
    },
    {
      "epoch": 2.9519932622122402,
      "grad_norm": 0.2874554693698883,
      "learning_rate": 7.535666554866483e-09,
      "loss": 0.3891,
      "step": 3505
    },
    {
      "epoch": 2.952835485682201,
      "grad_norm": 0.31434497237205505,
      "learning_rate": 7.269003208740244e-09,
      "loss": 0.3895,
      "step": 3506
    },
    {
      "epoch": 2.9536777091521618,
      "grad_norm": 0.2705330550670624,
      "learning_rate": 7.007139991108136e-09,
      "loss": 0.3721,
      "step": 3507
    },
    {
      "epoch": 2.9545199326221225,
      "grad_norm": 0.30703678727149963,
      "learning_rate": 6.750077153731549e-09,
      "loss": 0.4012,
      "step": 3508
    },
    {
      "epoch": 2.9553621560920833,
      "grad_norm": 0.28903019428253174,
      "learning_rate": 6.497814943756675e-09,
      "loss": 0.3837,
      "step": 3509
    },
    {
      "epoch": 2.9562043795620436,
      "grad_norm": 0.28872746229171753,
      "learning_rate": 6.25035360371451e-09,
      "loss": 0.4119,
      "step": 3510
    },
    {
      "epoch": 2.9570466030320044,
      "grad_norm": 0.3056761622428894,
      "learning_rate": 6.00769337151974e-09,
      "loss": 0.4078,
      "step": 3511
    },
    {
      "epoch": 2.957888826501965,
      "grad_norm": 0.28955385088920593,
      "learning_rate": 5.769834480472414e-09,
      "loss": 0.3523,
      "step": 3512
    },
    {
      "epoch": 2.958731049971926,
      "grad_norm": 0.2924606204032898,
      "learning_rate": 5.536777159254603e-09,
      "loss": 0.3853,
      "step": 3513
    },
    {
      "epoch": 2.9595732734418867,
      "grad_norm": 0.3044460415840149,
      "learning_rate": 5.308521631934294e-09,
      "loss": 0.4426,
      "step": 3514
    },
    {
      "epoch": 2.960415496911847,
      "grad_norm": 0.27884146571159363,
      "learning_rate": 5.08506811796039e-09,
      "loss": 0.3684,
      "step": 3515
    },
    {
      "epoch": 2.9612577203818082,
      "grad_norm": 0.2903974652290344,
      "learning_rate": 4.866416832167153e-09,
      "loss": 0.3978,
      "step": 3516
    },
    {
      "epoch": 2.9620999438517686,
      "grad_norm": 0.2976093888282776,
      "learning_rate": 4.652567984770873e-09,
      "loss": 0.4047,
      "step": 3517
    },
    {
      "epoch": 2.9629421673217293,
      "grad_norm": 0.3074796795845032,
      "learning_rate": 4.443521781370974e-09,
      "loss": 0.3751,
      "step": 3518
    },
    {
      "epoch": 2.96378439079169,
      "grad_norm": 0.3143027424812317,
      "learning_rate": 4.239278422948911e-09,
      "loss": 0.4199,
      "step": 3519
    },
    {
      "epoch": 2.964626614261651,
      "grad_norm": 0.2914693355560303,
      "learning_rate": 4.0398381058692755e-09,
      "loss": 0.3612,
      "step": 3520
    },
    {
      "epoch": 2.9654688377316116,
      "grad_norm": 0.27022919058799744,
      "learning_rate": 3.845201021879241e-09,
      "loss": 0.376,
      "step": 3521
    },
    {
      "epoch": 2.966311061201572,
      "grad_norm": 0.29003021121025085,
      "learning_rate": 3.655367358106343e-09,
      "loss": 0.3834,
      "step": 3522
    },
    {
      "epoch": 2.9671532846715327,
      "grad_norm": 0.2981721758842468,
      "learning_rate": 3.470337297062365e-09,
      "loss": 0.4069,
      "step": 3523
    },
    {
      "epoch": 2.9679955081414935,
      "grad_norm": 0.31446948647499084,
      "learning_rate": 3.290111016638342e-09,
      "loss": 0.3782,
      "step": 3524
    },
    {
      "epoch": 2.9688377316114543,
      "grad_norm": 0.27396610379219055,
      "learning_rate": 3.1146886901090024e-09,
      "loss": 0.3787,
      "step": 3525
    },
    {
      "epoch": 2.969679955081415,
      "grad_norm": 0.27903494238853455,
      "learning_rate": 2.9440704861288804e-09,
      "loss": 0.37,
      "step": 3526
    },
    {
      "epoch": 2.970522178551376,
      "grad_norm": 0.28733524680137634,
      "learning_rate": 2.7782565687339836e-09,
      "loss": 0.3992,
      "step": 3527
    },
    {
      "epoch": 2.9713644020213366,
      "grad_norm": 0.2920469343662262,
      "learning_rate": 2.617247097342901e-09,
      "loss": 0.3884,
      "step": 3528
    },
    {
      "epoch": 2.972206625491297,
      "grad_norm": 0.29937198758125305,
      "learning_rate": 2.461042226752919e-09,
      "loss": 0.365,
      "step": 3529
    },
    {
      "epoch": 2.9730488489612577,
      "grad_norm": 0.2849864959716797,
      "learning_rate": 2.3096421071433508e-09,
      "loss": 0.4103,
      "step": 3530
    },
    {
      "epoch": 2.9738910724312184,
      "grad_norm": 0.31153324246406555,
      "learning_rate": 2.1630468840738716e-09,
      "loss": 0.3789,
      "step": 3531
    },
    {
      "epoch": 2.974733295901179,
      "grad_norm": 0.29115402698516846,
      "learning_rate": 2.0212566984845194e-09,
      "loss": 0.389,
      "step": 3532
    },
    {
      "epoch": 2.97557551937114,
      "grad_norm": 0.27107152342796326,
      "learning_rate": 1.8842716866956935e-09,
      "loss": 0.3667,
      "step": 3533
    },
    {
      "epoch": 2.9764177428411003,
      "grad_norm": 0.2972150146961212,
      "learning_rate": 1.7520919804075997e-09,
      "loss": 0.4202,
      "step": 3534
    },
    {
      "epoch": 2.977259966311061,
      "grad_norm": 0.2807147204875946,
      "learning_rate": 1.624717706701917e-09,
      "loss": 0.3371,
      "step": 3535
    },
    {
      "epoch": 2.978102189781022,
      "grad_norm": 0.29070064425468445,
      "learning_rate": 1.5021489880384653e-09,
      "loss": 0.4112,
      "step": 3536
    },
    {
      "epoch": 2.9789444132509826,
      "grad_norm": 0.26367712020874023,
      "learning_rate": 1.3843859422574269e-09,
      "loss": 0.3768,
      "step": 3537
    },
    {
      "epoch": 2.9797866367209433,
      "grad_norm": 0.29596856236457825,
      "learning_rate": 1.2714286825793453e-09,
      "loss": 0.4226,
      "step": 3538
    },
    {
      "epoch": 2.980628860190904,
      "grad_norm": 0.2962256669998169,
      "learning_rate": 1.163277317604572e-09,
      "loss": 0.3991,
      "step": 3539
    },
    {
      "epoch": 2.981471083660865,
      "grad_norm": 0.2822709083557129,
      "learning_rate": 1.0599319513115992e-09,
      "loss": 0.3532,
      "step": 3540
    },
    {
      "epoch": 2.982313307130825,
      "grad_norm": 0.2720504701137543,
      "learning_rate": 9.613926830587262e-10,
      "loss": 0.337,
      "step": 3541
    },
    {
      "epoch": 2.983155530600786,
      "grad_norm": 0.32348233461380005,
      "learning_rate": 8.676596075851696e-10,
      "loss": 0.4365,
      "step": 3542
    },
    {
      "epoch": 2.9839977540707467,
      "grad_norm": 0.270984411239624,
      "learning_rate": 7.787328150071771e-10,
      "loss": 0.3759,
      "step": 3543
    },
    {
      "epoch": 2.9848399775407075,
      "grad_norm": 0.2950161397457123,
      "learning_rate": 6.946123908208036e-10,
      "loss": 0.4245,
      "step": 3544
    },
    {
      "epoch": 2.9856822010106683,
      "grad_norm": 0.273258775472641,
      "learning_rate": 6.152984159024655e-10,
      "loss": 0.3555,
      "step": 3545
    },
    {
      "epoch": 2.9865244244806286,
      "grad_norm": 0.2963271141052246,
      "learning_rate": 5.40790966505611e-10,
      "loss": 0.4007,
      "step": 3546
    },
    {
      "epoch": 2.98736664795059,
      "grad_norm": 0.4032110273838043,
      "learning_rate": 4.710901142634949e-10,
      "loss": 0.3522,
      "step": 3547
    },
    {
      "epoch": 2.98820887142055,
      "grad_norm": 0.29503610730171204,
      "learning_rate": 4.061959261886239e-10,
      "loss": 0.4172,
      "step": 3548
    },
    {
      "epoch": 2.989051094890511,
      "grad_norm": 0.307991087436676,
      "learning_rate": 3.4610846467109106e-10,
      "loss": 0.4197,
      "step": 3549
    },
    {
      "epoch": 2.9898933183604717,
      "grad_norm": 0.28625157475471497,
      "learning_rate": 2.9082778748135146e-10,
      "loss": 0.3749,
      "step": 3550
    },
    {
      "epoch": 2.9907355418304324,
      "grad_norm": 0.30051809549331665,
      "learning_rate": 2.403539477668915e-10,
      "loss": 0.3636,
      "step": 3551
    },
    {
      "epoch": 2.991577765300393,
      "grad_norm": 0.3061937093734741,
      "learning_rate": 1.9468699405444936e-10,
      "loss": 0.41,
      "step": 3552
    },
    {
      "epoch": 2.9924199887703535,
      "grad_norm": 0.3031589090824127,
      "learning_rate": 1.538269702494599e-10,
      "loss": 0.4029,
      "step": 3553
    },
    {
      "epoch": 2.9932622122403143,
      "grad_norm": 0.2543588876724243,
      "learning_rate": 1.1777391563549956e-10,
      "loss": 0.3442,
      "step": 3554
    },
    {
      "epoch": 2.994104435710275,
      "grad_norm": 0.2951052188873291,
      "learning_rate": 8.652786487484133e-11,
      "loss": 0.3797,
      "step": 3555
    },
    {
      "epoch": 2.994946659180236,
      "grad_norm": 0.31477051973342896,
      "learning_rate": 6.008884800845494e-11,
      "loss": 0.434,
      "step": 3556
    },
    {
      "epoch": 2.9957888826501966,
      "grad_norm": 0.27592024207115173,
      "learning_rate": 3.8456890455451646e-11,
      "loss": 0.3726,
      "step": 3557
    },
    {
      "epoch": 2.9966311061201574,
      "grad_norm": 0.27286410331726074,
      "learning_rate": 2.1632013013084265e-11,
      "loss": 0.3561,
      "step": 3558
    },
    {
      "epoch": 2.997473329590118,
      "grad_norm": 0.29373499751091003,
      "learning_rate": 9.614231857302258e-12,
      "loss": 0.4033,
      "step": 3559
    },
    {
      "epoch": 2.9983155530600785,
      "grad_norm": 0.274093359708786,
      "learning_rate": 2.403558542196649e-12,
      "loss": 0.3676,
      "step": 3560
    },
    {
      "epoch": 2.9991577765300392,
      "grad_norm": 0.28862297534942627,
      "learning_rate": 0.0,
      "loss": 0.3956,
      "step": 3561
    },
    {
      "epoch": 2.9991577765300392,
      "step": 3561,
      "total_flos": 4618853558517760.0,
      "train_loss": 0.43624040107819445,
      "train_runtime": 72959.568,
      "train_samples_per_second": 4.686,
      "train_steps_per_second": 0.049
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 3561,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 4618853558517760.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}